Import 5.0 code.

This commit is contained in:
osku 2005-10-27 07:29:40 +00:00
parent eae9b3ec18
commit c307820962
310 changed files with 163246 additions and 0 deletions

30
Makefile.am Normal file
View file

@ -0,0 +1,30 @@
# Copyright (C) 2000 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
# & Innobase Oy
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
# Process this file with automake to create Makefile.in
AUTOMAKE_OPTIONS = foreign
TAR = gtar
noinst_HEADERS = ib_config.h
SUBDIRS = os ut btr buf data dict dyn eval fil fsp fut \
ha ibuf include lock log mach mem mtr page \
pars que read rem row srv sync thr trx usr
# Don't update the files from bitkeeper
%::SCCS/s.%

24
btr/Makefile.am Normal file
View file

@ -0,0 +1,24 @@
# Copyright (C) 2000 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
# & Innobase Oy
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
include ../include/Makefile.i
noinst_LIBRARIES = libbtr.a
libbtr_a_SOURCES = btr0btr.c btr0cur.c btr0pcur.c btr0sea.c
EXTRA_PROGRAMS =

2988
btr/btr0btr.c Normal file

File diff suppressed because it is too large Load diff

3819
btr/btr0cur.c Normal file

File diff suppressed because it is too large Load diff

564
btr/btr0pcur.c Normal file
View file

@ -0,0 +1,564 @@
/******************************************************
The index tree persistent cursor
(c) 1996 Innobase Oy
Created 2/23/1996 Heikki Tuuri
*******************************************************/
#include "btr0pcur.h"
#ifdef UNIV_NONINL
#include "btr0pcur.ic"
#endif
#include "ut0byte.h"
#include "rem0cmp.h"
#include "trx0trx.h"
/******************************************************************
Allocates memory for a persistent cursor object and initializes the cursor. */
btr_pcur_t*
btr_pcur_create_for_mysql(void)
/*============================*/
/* out, own: persistent cursor */
{
btr_pcur_t* pcur;
pcur = mem_alloc(sizeof(btr_pcur_t));
pcur->btr_cur.index = NULL;
btr_pcur_init(pcur);
return(pcur);
}
/******************************************************************
Frees the memory for a persistent cursor object. */
void
btr_pcur_free_for_mysql(
/*====================*/
btr_pcur_t* cursor) /* in, own: persistent cursor */
{
if (cursor->old_rec_buf != NULL) {
mem_free(cursor->old_rec_buf);
cursor->old_rec_buf = NULL;
}
cursor->btr_cur.page_cur.rec = NULL;
cursor->old_rec = NULL;
cursor->old_n_fields = 0;
cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
cursor->latch_mode = BTR_NO_LATCHES;
cursor->pos_state = BTR_PCUR_NOT_POSITIONED;
mem_free(cursor);
}
/******************************************************************
The position of the cursor is stored by taking an initial segment of the
record the cursor is positioned on, before, or after, and copying it to the
cursor data structure, or just setting a flag if the cursor id before the
first in an EMPTY tree, or after the last in an EMPTY tree. NOTE that the
page where the cursor is positioned must not be empty if the index tree is
not totally empty! */
void
btr_pcur_store_position(
/*====================*/
btr_pcur_t* cursor, /* in: persistent cursor */
mtr_t* mtr) /* in: mtr */
{
page_cur_t* page_cursor;
rec_t* rec;
dict_tree_t* tree;
page_t* page;
ulint offs;
ut_a(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
tree = btr_cur_get_tree(btr_pcur_get_btr_cur(cursor));
page_cursor = btr_pcur_get_page_cur(cursor);
rec = page_cur_get_rec(page_cursor);
page = ut_align_down(rec, UNIV_PAGE_SIZE);
offs = ut_align_offset(rec, UNIV_PAGE_SIZE);
ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
MTR_MEMO_PAGE_S_FIX)
|| mtr_memo_contains(mtr, buf_block_align(page),
MTR_MEMO_PAGE_X_FIX));
ut_a(cursor->latch_mode != BTR_NO_LATCHES);
if (UNIV_UNLIKELY(page_get_n_recs(page) == 0)) {
/* It must be an empty index tree; NOTE that in this case
we do not store the modify_clock, but always do a search
if we restore the cursor position */
ut_a(btr_page_get_next(page, mtr) == FIL_NULL);
ut_a(btr_page_get_prev(page, mtr) == FIL_NULL);
cursor->old_stored = BTR_PCUR_OLD_STORED;
if (page_rec_is_supremum_low(offs)) {
cursor->rel_pos = BTR_PCUR_AFTER_LAST_IN_TREE;
} else {
cursor->rel_pos = BTR_PCUR_BEFORE_FIRST_IN_TREE;
}
return;
}
if (page_rec_is_supremum_low(offs)) {
rec = page_rec_get_prev(rec);
cursor->rel_pos = BTR_PCUR_AFTER;
} else if (page_rec_is_infimum_low(offs)) {
rec = page_rec_get_next(rec);
cursor->rel_pos = BTR_PCUR_BEFORE;
} else {
cursor->rel_pos = BTR_PCUR_ON;
}
cursor->old_stored = BTR_PCUR_OLD_STORED;
cursor->old_rec = dict_tree_copy_rec_order_prefix(tree, rec,
&cursor->old_n_fields,
&cursor->old_rec_buf,
&cursor->buf_size);
cursor->block_when_stored = buf_block_align(page);
cursor->modify_clock = buf_block_get_modify_clock(
cursor->block_when_stored);
}
/******************************************************************
Copies the stored position of a pcur to another pcur. */
void
btr_pcur_copy_stored_position(
/*==========================*/
btr_pcur_t* pcur_receive, /* in: pcur which will receive the
position info */
btr_pcur_t* pcur_donate) /* in: pcur from which the info is
copied */
{
if (pcur_receive->old_rec_buf) {
mem_free(pcur_receive->old_rec_buf);
}
ut_memcpy((byte*)pcur_receive, (byte*)pcur_donate, sizeof(btr_pcur_t));
if (pcur_donate->old_rec_buf) {
pcur_receive->old_rec_buf = mem_alloc(pcur_donate->buf_size);
ut_memcpy(pcur_receive->old_rec_buf, pcur_donate->old_rec_buf,
pcur_donate->buf_size);
pcur_receive->old_rec = pcur_receive->old_rec_buf
+ (pcur_donate->old_rec - pcur_donate->old_rec_buf);
}
pcur_receive->old_n_fields = pcur_donate->old_n_fields;
}
/******************************************************************
Restores the stored position of a persistent cursor bufferfixing the page and
obtaining the specified latches. If the cursor position was saved when the
(1) cursor was positioned on a user record: this function restores the position
to the last record LESS OR EQUAL to the stored record;
(2) cursor was positioned on a page infimum record: restores the position to
the last record LESS than the user record which was the successor of the page
infimum;
(3) cursor was positioned on the page supremum: restores to the first record
GREATER than the user record which was the predecessor of the supremum.
(4) cursor was positioned before the first or after the last in an empty tree:
restores to before first or after the last in the tree. */
ibool
btr_pcur_restore_position(
/*======================*/
/* out: TRUE if the cursor position
was stored when it was on a user record
and it can be restored on a user record
whose ordering fields are identical to
the ones of the original user record */
ulint latch_mode, /* in: BTR_SEARCH_LEAF, ... */
btr_pcur_t* cursor, /* in: detached persistent cursor */
mtr_t* mtr) /* in: mtr */
{
dict_tree_t* tree;
page_t* page;
dtuple_t* tuple;
ulint mode;
ulint old_mode;
mem_heap_t* heap;
if (UNIV_UNLIKELY(cursor->old_stored != BTR_PCUR_OLD_STORED)
|| UNIV_UNLIKELY(cursor->pos_state != BTR_PCUR_WAS_POSITIONED
&& cursor->pos_state != BTR_PCUR_IS_POSITIONED)) {
ut_print_buf(stderr, (const byte*)cursor, sizeof(btr_pcur_t));
if (cursor->trx_if_known) {
trx_print(stderr, cursor->trx_if_known, 0);
}
ut_error;
}
if (UNIV_UNLIKELY(cursor->rel_pos == BTR_PCUR_AFTER_LAST_IN_TREE
|| cursor->rel_pos == BTR_PCUR_BEFORE_FIRST_IN_TREE)) {
/* In these cases we do not try an optimistic restoration,
but always do a search */
btr_cur_open_at_index_side(
cursor->rel_pos == BTR_PCUR_BEFORE_FIRST_IN_TREE,
btr_pcur_get_btr_cur(cursor)->index, latch_mode,
btr_pcur_get_btr_cur(cursor), mtr);
cursor->block_when_stored =
buf_block_align(btr_pcur_get_page(cursor));
return(FALSE);
}
ut_a(cursor->old_rec);
ut_a(cursor->old_n_fields);
page = btr_cur_get_page(btr_pcur_get_btr_cur(cursor));
if (UNIV_LIKELY(latch_mode == BTR_SEARCH_LEAF)
|| UNIV_LIKELY(latch_mode == BTR_MODIFY_LEAF)) {
/* Try optimistic restoration */
if (UNIV_LIKELY(buf_page_optimistic_get(latch_mode,
cursor->block_when_stored, page,
cursor->modify_clock, mtr))) {
cursor->pos_state = BTR_PCUR_IS_POSITIONED;
#ifdef UNIV_SYNC_DEBUG
buf_page_dbg_add_level(page, SYNC_TREE_NODE);
#endif /* UNIV_SYNC_DEBUG */
if (cursor->rel_pos == BTR_PCUR_ON) {
#ifdef UNIV_DEBUG
rec_t* rec;
ulint* offsets1;
ulint* offsets2;
dict_index_t* index;
#endif /* UNIV_DEBUG */
cursor->latch_mode = latch_mode;
#ifdef UNIV_DEBUG
rec = btr_pcur_get_rec(cursor);
index = dict_tree_find_index(
btr_cur_get_tree(
btr_pcur_get_btr_cur(cursor)),
rec);
heap = mem_heap_create(256);
offsets1 = rec_get_offsets(cursor->old_rec,
index, NULL,
cursor->old_n_fields, &heap);
offsets2 = rec_get_offsets(rec, index, NULL,
cursor->old_n_fields, &heap);
ut_ad(cmp_rec_rec(cursor->old_rec,
rec, offsets1, offsets2, index) == 0);
mem_heap_free(heap);
#endif /* UNIV_DEBUG */
return(TRUE);
}
return(FALSE);
}
}
/* If optimistic restoration did not succeed, open the cursor anew */
heap = mem_heap_create(256);
tree = btr_cur_get_tree(btr_pcur_get_btr_cur(cursor));
tuple = dict_tree_build_data_tuple(tree, cursor->old_rec,
cursor->old_n_fields, heap);
/* Save the old search mode of the cursor */
old_mode = cursor->search_mode;
if (UNIV_LIKELY(cursor->rel_pos == BTR_PCUR_ON)) {
mode = PAGE_CUR_LE;
} else if (cursor->rel_pos == BTR_PCUR_AFTER) {
mode = PAGE_CUR_G;
} else {
ut_ad(cursor->rel_pos == BTR_PCUR_BEFORE);
mode = PAGE_CUR_L;
}
btr_pcur_open_with_no_init(btr_pcur_get_btr_cur(cursor)->index, tuple,
mode, latch_mode, cursor, 0, mtr);
/* Restore the old search mode */
cursor->search_mode = old_mode;
if (cursor->rel_pos == BTR_PCUR_ON
&& btr_pcur_is_on_user_rec(cursor, mtr)
&& 0 == cmp_dtuple_rec(tuple, btr_pcur_get_rec(cursor),
rec_get_offsets(btr_pcur_get_rec(cursor),
btr_pcur_get_btr_cur(cursor)->index,
NULL, ULINT_UNDEFINED, &heap))) {
/* We have to store the NEW value for the modify clock, since
the cursor can now be on a different page! But we can retain
the value of old_rec */
cursor->block_when_stored =
buf_block_align(btr_pcur_get_page(cursor));
cursor->modify_clock =
buf_block_get_modify_clock(cursor->block_when_stored);
cursor->old_stored = BTR_PCUR_OLD_STORED;
mem_heap_free(heap);
return(TRUE);
}
mem_heap_free(heap);
/* We have to store new position information, modify_clock etc.,
to the cursor because it can now be on a different page, the record
under it may have been removed, etc. */
btr_pcur_store_position(cursor, mtr);
return(FALSE);
}
/******************************************************************
If the latch mode of the cursor is BTR_LEAF_SEARCH or BTR_LEAF_MODIFY,
releases the page latch and bufferfix reserved by the cursor.
NOTE! In the case of BTR_LEAF_MODIFY, there should not exist changes
made by the current mini-transaction to the data protected by the
cursor latch, as then the latch must not be released until mtr_commit. */
void
btr_pcur_release_leaf(
/*==================*/
btr_pcur_t* cursor, /* in: persistent cursor */
mtr_t* mtr) /* in: mtr */
{
page_t* page;
ut_a(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
page = btr_cur_get_page(btr_pcur_get_btr_cur(cursor));
btr_leaf_page_release(page, cursor->latch_mode, mtr);
cursor->latch_mode = BTR_NO_LATCHES;
cursor->pos_state = BTR_PCUR_WAS_POSITIONED;
}
/*************************************************************
Moves the persistent cursor to the first record on the next page. Releases the
latch on the current page, and bufferunfixes it. Note that there must not be
modifications on the current page, as then the x-latch can be released only in
mtr_commit. */
void
btr_pcur_move_to_next_page(
/*=======================*/
btr_pcur_t* cursor, /* in: persistent cursor; must be on the
last record of the current page */
mtr_t* mtr) /* in: mtr */
{
ulint next_page_no;
ulint space;
page_t* page;
page_t* next_page;
ut_a(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
ut_ad(btr_pcur_is_after_last_on_page(cursor, mtr));
cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
page = btr_pcur_get_page(cursor);
next_page_no = btr_page_get_next(page, mtr);
space = buf_frame_get_space_id(page);
ut_ad(next_page_no != FIL_NULL);
next_page = btr_page_get(space, next_page_no, cursor->latch_mode, mtr);
ut_a(page_is_comp(next_page) == page_is_comp(page));
buf_block_align(next_page)->check_index_page_at_flush = TRUE;
btr_leaf_page_release(page, cursor->latch_mode, mtr);
page_cur_set_before_first(next_page, btr_pcur_get_page_cur(cursor));
page_check_dir(next_page);
}
/*************************************************************
Moves the persistent cursor backward if it is on the first record of the page.
Commits mtr. Note that to prevent a possible deadlock, the operation
first stores the position of the cursor, commits mtr, acquires the necessary
latches and restores the cursor position again before returning. The
alphabetical position of the cursor is guaranteed to be sensible on
return, but it may happen that the cursor is not positioned on the last
record of any page, because the structure of the tree may have changed
during the time when the cursor had no latches. */
void
btr_pcur_move_backward_from_page(
/*=============================*/
btr_pcur_t* cursor, /* in: persistent cursor, must be on the first
record of the current page */
mtr_t* mtr) /* in: mtr */
{
ulint prev_page_no;
ulint space;
page_t* page;
page_t* prev_page;
ulint latch_mode;
ulint latch_mode2;
ut_a(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
ut_ad(btr_pcur_is_before_first_on_page(cursor, mtr));
ut_ad(!btr_pcur_is_before_first_in_tree(cursor, mtr));
latch_mode = cursor->latch_mode;
if (latch_mode == BTR_SEARCH_LEAF) {
latch_mode2 = BTR_SEARCH_PREV;
} else if (latch_mode == BTR_MODIFY_LEAF) {
latch_mode2 = BTR_MODIFY_PREV;
} else {
latch_mode2 = 0; /* To eliminate compiler warning */
ut_error;
}
btr_pcur_store_position(cursor, mtr);
mtr_commit(mtr);
mtr_start(mtr);
btr_pcur_restore_position(latch_mode2, cursor, mtr);
page = btr_pcur_get_page(cursor);
prev_page_no = btr_page_get_prev(page, mtr);
space = buf_frame_get_space_id(page);
if (btr_pcur_is_before_first_on_page(cursor, mtr)
&& (prev_page_no != FIL_NULL)) {
prev_page = btr_pcur_get_btr_cur(cursor)->left_page;
btr_leaf_page_release(page, latch_mode, mtr);
page_cur_set_after_last(prev_page,
btr_pcur_get_page_cur(cursor));
} else if (prev_page_no != FIL_NULL) {
/* The repositioned cursor did not end on an infimum record on
a page. Cursor repositioning acquired a latch also on the
previous page, but we do not need the latch: release it. */
prev_page = btr_pcur_get_btr_cur(cursor)->left_page;
btr_leaf_page_release(prev_page, latch_mode, mtr);
}
cursor->latch_mode = latch_mode;
cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
}
/*************************************************************
Moves the persistent cursor to the previous record in the tree. If no records
are left, the cursor stays 'before first in tree'. */
ibool
btr_pcur_move_to_prev(
/*==================*/
/* out: TRUE if the cursor was not before first
in tree */
btr_pcur_t* cursor, /* in: persistent cursor; NOTE that the
function may release the page latch */
mtr_t* mtr) /* in: mtr */
{
ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
if (btr_pcur_is_before_first_on_page(cursor, mtr)) {
if (btr_pcur_is_before_first_in_tree(cursor, mtr)) {
return(FALSE);
}
btr_pcur_move_backward_from_page(cursor, mtr);
return(TRUE);
}
btr_pcur_move_to_prev_on_page(cursor, mtr);
return(TRUE);
}
/******************************************************************
If mode is PAGE_CUR_G or PAGE_CUR_GE, opens a persistent cursor on the first
user record satisfying the search condition, in the case PAGE_CUR_L or
PAGE_CUR_LE, on the last user record. If no such user record exists, then
in the first case sets the cursor after last in tree, and in the latter case
before first in tree. The latching mode must be BTR_SEARCH_LEAF or
BTR_MODIFY_LEAF. */
void
btr_pcur_open_on_user_rec(
/*======================*/
dict_index_t* index, /* in: index */
dtuple_t* tuple, /* in: tuple on which search done */
ulint mode, /* in: PAGE_CUR_L, ... */
ulint latch_mode, /* in: BTR_SEARCH_LEAF or
BTR_MODIFY_LEAF */
btr_pcur_t* cursor, /* in: memory buffer for persistent
cursor */
mtr_t* mtr) /* in: mtr */
{
btr_pcur_open(index, tuple, mode, latch_mode, cursor, mtr);
if ((mode == PAGE_CUR_GE) || (mode == PAGE_CUR_G)) {
if (btr_pcur_is_after_last_on_page(cursor, mtr)) {
btr_pcur_move_to_next_user_rec(cursor, mtr);
}
} else {
ut_ad((mode == PAGE_CUR_LE) || (mode == PAGE_CUR_L));
/* Not implemented yet */
ut_error;
}
}

1648
btr/btr0sea.c Normal file

File diff suppressed because it is too large Load diff

24
buf/Makefile.am Normal file
View file

@ -0,0 +1,24 @@
# Copyright (C) 2000 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
# & Innobase Oy
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
include ../include/Makefile.i
noinst_LIBRARIES = libbuf.a
libbuf_a_SOURCES = buf0buf.c buf0flu.c buf0lru.c buf0rea.c
EXTRA_PROGRAMS =

2410
buf/buf0buf.c Normal file

File diff suppressed because it is too large Load diff

1065
buf/buf0flu.c Normal file

File diff suppressed because it is too large Load diff

1056
buf/buf0lru.c Normal file

File diff suppressed because it is too large Load diff

726
buf/buf0rea.c Normal file
View file

@ -0,0 +1,726 @@
/******************************************************
The database buffer read
(c) 1995 Innobase Oy
Created 11/5/1995 Heikki Tuuri
*******************************************************/
#include "buf0rea.h"
#include "fil0fil.h"
#include "mtr0mtr.h"
#include "buf0buf.h"
#include "buf0flu.h"
#include "buf0lru.h"
#include "ibuf0ibuf.h"
#include "log0recv.h"
#include "trx0sys.h"
#include "os0file.h"
#include "srv0start.h"
extern ulint srv_read_ahead_rnd;
extern ulint srv_read_ahead_seq;
extern ulint srv_buf_pool_reads;
/* The size in blocks of the area where the random read-ahead algorithm counts
the accessed pages when deciding whether to read-ahead */
#define BUF_READ_AHEAD_RANDOM_AREA BUF_READ_AHEAD_AREA
/* There must be at least this many pages in buf_pool in the area to start
a random read-ahead */
#define BUF_READ_AHEAD_RANDOM_THRESHOLD (5 + BUF_READ_AHEAD_RANDOM_AREA / 8)
/* The linear read-ahead area size */
#define BUF_READ_AHEAD_LINEAR_AREA BUF_READ_AHEAD_AREA
/* The linear read-ahead threshold */
#define BUF_READ_AHEAD_LINEAR_THRESHOLD (3 * BUF_READ_AHEAD_LINEAR_AREA / 8)
/* If there are buf_pool->curr_size per the number below pending reads, then
read-ahead is not done: this is to prevent flooding the buffer pool with
i/o-fixed buffer blocks */
#define BUF_READ_AHEAD_PEND_LIMIT 2
/************************************************************************
Low-level function which reads a page asynchronously from a file to the
buffer buf_pool if it is not already there, in which case does nothing.
Sets the io_fix flag and sets an exclusive lock on the buffer frame. The
flag is cleared and the x-lock released by an i/o-handler thread. */
static
ulint
buf_read_page_low(
/*==============*/
/* out: 1 if a read request was queued, 0 if the page
already resided in buf_pool, or if the page is in
the doublewrite buffer blocks in which case it is never
read into the pool, or if the tablespace does not
exist or is being dropped */
ulint* err, /* out: DB_SUCCESS or DB_TABLESPACE_DELETED if we are
trying to read from a non-existent tablespace, or a
tablespace which is just now being dropped */
ibool sync, /* in: TRUE if synchronous aio is desired */
ulint mode, /* in: BUF_READ_IBUF_PAGES_ONLY, ...,
ORed to OS_AIO_SIMULATED_WAKE_LATER (see below
at read-ahead functions) */
ulint space, /* in: space id */
ib_longlong tablespace_version, /* in: if the space memory object has
this timestamp different from what we are giving here,
treat the tablespace as dropped; this is a timestamp we
use to stop dangling page reads from a tablespace
which we have DISCARDed + IMPORTed back */
ulint offset) /* in: page number */
{
buf_block_t* block;
ulint wake_later;
*err = DB_SUCCESS;
wake_later = mode & OS_AIO_SIMULATED_WAKE_LATER;
mode = mode & ~OS_AIO_SIMULATED_WAKE_LATER;
if (trx_doublewrite && space == TRX_SYS_SPACE
&& ( (offset >= trx_doublewrite->block1
&& offset < trx_doublewrite->block1
+ TRX_SYS_DOUBLEWRITE_BLOCK_SIZE)
|| (offset >= trx_doublewrite->block2
&& offset < trx_doublewrite->block2
+ TRX_SYS_DOUBLEWRITE_BLOCK_SIZE))) {
ut_print_timestamp(stderr);
fprintf(stderr,
" InnoDB: Warning: trying to read doublewrite buffer page %lu\n",
(ulong) offset);
return(0);
}
#ifdef UNIV_LOG_DEBUG
if (space % 2 == 1) {
/* We are updating a replicate space while holding the
log mutex: the read must be handled before other reads
which might incur ibuf operations and thus write to the log */
fputs("Log debug: reading replicate page in sync mode\n",
stderr);
sync = TRUE;
}
#endif
if (ibuf_bitmap_page(offset) || trx_sys_hdr_page(space, offset)) {
/* Trx sys header is so low in the latching order that we play
safe and do not leave the i/o-completion to an asynchronous
i/o-thread. Ibuf bitmap pages must always be read with
syncronous i/o, to make sure they do not get involved in
thread deadlocks. */
sync = TRUE;
}
/* The following call will also check if the tablespace does not exist
or is being dropped; if we succeed in initing the page in the buffer
pool for read, then DISCARD cannot proceed until the read has
completed */
block = buf_page_init_for_read(err, mode, space, tablespace_version,
offset);
if (block == NULL) {
return(0);
}
#ifdef UNIV_DEBUG
if (buf_debug_prints) {
fprintf(stderr,
"Posting read request for page %lu, sync %lu\n",
(ulong) offset,
(ulong) sync);
}
#endif
ut_a(block->state == BUF_BLOCK_FILE_PAGE);
*err = fil_io(OS_FILE_READ | wake_later,
sync, space,
offset, 0, UNIV_PAGE_SIZE,
(void*)block->frame, (void*)block);
ut_a(*err == DB_SUCCESS);
if (sync) {
/* The i/o is already completed when we arrive from
fil_read */
buf_page_io_complete(block);
}
return(1);
}
/************************************************************************
Applies a random read-ahead in buf_pool if there are at least a threshold
value of accessed pages from the random read-ahead area. Does not read any
page, not even the one at the position (space, offset), if the read-ahead
mechanism is not activated. NOTE 1: the calling thread may own latches on
pages: to avoid deadlocks this function must be written such that it cannot
end up waiting for these latches! NOTE 2: the calling thread must want
access to the page given: this rule is set to prevent unintended read-aheads
performed by ibuf routines, a situation which could result in a deadlock if
the OS does not support asynchronous i/o. */
static
ulint
buf_read_ahead_random(
/*==================*/
/* out: number of page read requests issued; NOTE
that if we read ibuf pages, it may happen that
the page at the given page number does not get
read even if we return a value > 0! */
ulint space, /* in: space id */
ulint offset) /* in: page number of a page which the current thread
wants to access */
{
ib_longlong tablespace_version;
buf_block_t* block;
ulint recent_blocks = 0;
ulint count;
ulint LRU_recent_limit;
ulint ibuf_mode;
ulint low, high;
ulint err;
ulint i;
if (srv_startup_is_before_trx_rollback_phase) {
/* No read-ahead to avoid thread deadlocks */
return(0);
}
if (ibuf_bitmap_page(offset) || trx_sys_hdr_page(space, offset)) {
/* If it is an ibuf bitmap page or trx sys hdr, we do
no read-ahead, as that could break the ibuf page access
order */
return(0);
}
/* Remember the tablespace version before we ask te tablespace size
below: if DISCARD + IMPORT changes the actual .ibd file meanwhile, we
do not try to read outside the bounds of the tablespace! */
tablespace_version = fil_space_get_version(space);
low = (offset / BUF_READ_AHEAD_RANDOM_AREA)
* BUF_READ_AHEAD_RANDOM_AREA;
high = (offset / BUF_READ_AHEAD_RANDOM_AREA + 1)
* BUF_READ_AHEAD_RANDOM_AREA;
if (high > fil_space_get_size(space)) {
high = fil_space_get_size(space);
}
/* Get the minimum LRU_position field value for an initial segment
of the LRU list, to determine which blocks have recently been added
to the start of the list. */
LRU_recent_limit = buf_LRU_get_recent_limit();
mutex_enter(&(buf_pool->mutex));
if (buf_pool->n_pend_reads >
buf_pool->curr_size / BUF_READ_AHEAD_PEND_LIMIT) {
mutex_exit(&(buf_pool->mutex));
return(0);
}
/* Count how many blocks in the area have been recently accessed,
that is, reside near the start of the LRU list. */
for (i = low; i < high; i++) {
block = buf_page_hash_get(space, i);
if ((block)
&& (block->LRU_position > LRU_recent_limit)
&& block->accessed) {
recent_blocks++;
}
}
mutex_exit(&(buf_pool->mutex));
if (recent_blocks < BUF_READ_AHEAD_RANDOM_THRESHOLD) {
/* Do nothing */
return(0);
}
/* Read all the suitable blocks within the area */
if (ibuf_inside()) {
ibuf_mode = BUF_READ_IBUF_PAGES_ONLY;
} else {
ibuf_mode = BUF_READ_ANY_PAGE;
}
count = 0;
for (i = low; i < high; i++) {
/* It is only sensible to do read-ahead in the non-sync aio
mode: hence FALSE as the first parameter */
if (!ibuf_bitmap_page(i)) {
count += buf_read_page_low(&err, FALSE, ibuf_mode
| OS_AIO_SIMULATED_WAKE_LATER,
space, tablespace_version, i);
if (err == DB_TABLESPACE_DELETED) {
ut_print_timestamp(stderr);
fprintf(stderr,
" InnoDB: Warning: in random readahead trying to access tablespace\n"
"InnoDB: %lu page no. %lu,\n"
"InnoDB: but the tablespace does not exist or is just being dropped.\n",
(ulong) space, (ulong) i);
}
}
}
/* In simulated aio we wake the aio handler threads only after
queuing all aio requests, in native aio the following call does
nothing: */
os_aio_simulated_wake_handler_threads();
#ifdef UNIV_DEBUG
if (buf_debug_prints && (count > 0)) {
fprintf(stderr,
"Random read-ahead space %lu offset %lu pages %lu\n",
(ulong) space, (ulong) offset,
(ulong) count);
}
#endif /* UNIV_DEBUG */
++srv_read_ahead_rnd;
return(count);
}
/************************************************************************
High-level function which reads a page asynchronously from a file to the
buffer buf_pool if it is not already there. Sets the io_fix flag and sets
an exclusive lock on the buffer frame. The flag is cleared and the x-lock
released by the i/o-handler thread. Does a random read-ahead if it seems
sensible. */
ulint
buf_read_page(
/*==========*/
/* out: number of page read requests issued: this can
be > 1 if read-ahead occurred */
ulint space, /* in: space id */
ulint offset) /* in: page number */
{
ib_longlong tablespace_version;
ulint count;
ulint count2;
ulint err;
tablespace_version = fil_space_get_version(space);
count = buf_read_ahead_random(space, offset);
/* We do the i/o in the synchronous aio mode to save thread
switches: hence TRUE */
count2 = buf_read_page_low(&err, TRUE, BUF_READ_ANY_PAGE, space,
tablespace_version, offset);
srv_buf_pool_reads+= count2;
if (err == DB_TABLESPACE_DELETED) {
ut_print_timestamp(stderr);
fprintf(stderr,
" InnoDB: Error: trying to access tablespace %lu page no. %lu,\n"
"InnoDB: but the tablespace does not exist or is just being dropped.\n",
(ulong) space, (ulong) offset);
}
/* Flush pages from the end of the LRU list if necessary */
buf_flush_free_margin();
return(count + count2);
}
/************************************************************************
Applies linear read-ahead if in the buf_pool the page is a border page of
a linear read-ahead area and all the pages in the area have been accessed.
Does not read any page if the read-ahead mechanism is not activated. Note
that the the algorithm looks at the 'natural' adjacent successor and
predecessor of the page, which on the leaf level of a B-tree are the next
and previous page in the chain of leaves. To know these, the page specified
in (space, offset) must already be present in the buf_pool. Thus, the
natural way to use this function is to call it when a page in the buf_pool
is accessed the first time, calling this function just after it has been
bufferfixed.
NOTE 1: as this function looks at the natural predecessor and successor
fields on the page, what happens, if these are not initialized to any
sensible value? No problem, before applying read-ahead we check that the
area to read is within the span of the space, if not, read-ahead is not
applied. An uninitialized value may result in a useless read operation, but
only very improbably.
NOTE 2: the calling thread may own latches on pages: to avoid deadlocks this
function must be written such that it cannot end up waiting for these
latches!
NOTE 3: the calling thread must want access to the page given: this rule is
set to prevent unintended read-aheads performed by ibuf routines, a situation
which could result in a deadlock if the OS does not support asynchronous io. */
ulint
buf_read_ahead_linear(
/*==================*/
/* out: number of page read requests issued */
ulint space, /* in: space id */
ulint offset) /* in: page number of a page; NOTE: the current thread
must want access to this page (see NOTE 3 above) */
{
ib_longlong tablespace_version;
buf_block_t* block;
buf_frame_t* frame;
buf_block_t* pred_block = NULL;
ulint pred_offset;
ulint succ_offset;
ulint count;
int asc_or_desc;
ulint new_offset;
ulint fail_count;
ulint ibuf_mode;
ulint low, high;
ulint err;
ulint i;
if (srv_startup_is_before_trx_rollback_phase) {
/* No read-ahead to avoid thread deadlocks */
return(0);
}
if (ibuf_bitmap_page(offset) || trx_sys_hdr_page(space, offset)) {
/* If it is an ibuf bitmap page or trx sys hdr, we do
no read-ahead, as that could break the ibuf page access
order */
return(0);
}
low = (offset / BUF_READ_AHEAD_LINEAR_AREA)
* BUF_READ_AHEAD_LINEAR_AREA;
high = (offset / BUF_READ_AHEAD_LINEAR_AREA + 1)
* BUF_READ_AHEAD_LINEAR_AREA;
if ((offset != low) && (offset != high - 1)) {
/* This is not a border page of the area: return */
return(0);
}
/* Remember the tablespace version before we ask te tablespace size
below: if DISCARD + IMPORT changes the actual .ibd file meanwhile, we
do not try to read outside the bounds of the tablespace! */
tablespace_version = fil_space_get_version(space);
mutex_enter(&(buf_pool->mutex));
if (high > fil_space_get_size(space)) {
mutex_exit(&(buf_pool->mutex));
/* The area is not whole, return */
return(0);
}
if (buf_pool->n_pend_reads >
buf_pool->curr_size / BUF_READ_AHEAD_PEND_LIMIT) {
mutex_exit(&(buf_pool->mutex));
return(0);
}
/* Check that almost all pages in the area have been accessed; if
offset == low, the accesses must be in a descending order, otherwise,
in an ascending order. */
asc_or_desc = 1;
if (offset == low) {
asc_or_desc = -1;
}
fail_count = 0;
for (i = low; i < high; i++) {
block = buf_page_hash_get(space, i);
if ((block == NULL) || !block->accessed) {
/* Not accessed */
fail_count++;
} else if (pred_block && (ut_ulint_cmp(block->LRU_position,
pred_block->LRU_position)
!= asc_or_desc)) {
/* Accesses not in the right order */
fail_count++;
pred_block = block;
}
}
if (fail_count > BUF_READ_AHEAD_LINEAR_AREA -
BUF_READ_AHEAD_LINEAR_THRESHOLD) {
/* Too many failures: return */
mutex_exit(&(buf_pool->mutex));
return(0);
}
/* If we got this far, we know that enough pages in the area have
been accessed in the right order: linear read-ahead can be sensible */
block = buf_page_hash_get(space, offset);
if (block == NULL) {
mutex_exit(&(buf_pool->mutex));
return(0);
}
frame = block->frame;
/* Read the natural predecessor and successor page addresses from
the page; NOTE that because the calling thread may have an x-latch
on the page, we do not acquire an s-latch on the page, this is to
prevent deadlocks. Even if we read values which are nonsense, the
algorithm will work. */
pred_offset = fil_page_get_prev(frame);
succ_offset = fil_page_get_next(frame);
mutex_exit(&(buf_pool->mutex));
if ((offset == low) && (succ_offset == offset + 1)) {
/* This is ok, we can continue */
new_offset = pred_offset;
} else if ((offset == high - 1) && (pred_offset == offset - 1)) {
/* This is ok, we can continue */
new_offset = succ_offset;
} else {
/* Successor or predecessor not in the right order */
return(0);
}
low = (new_offset / BUF_READ_AHEAD_LINEAR_AREA)
* BUF_READ_AHEAD_LINEAR_AREA;
high = (new_offset / BUF_READ_AHEAD_LINEAR_AREA + 1)
* BUF_READ_AHEAD_LINEAR_AREA;
if ((new_offset != low) && (new_offset != high - 1)) {
/* This is not a border page of the area: return */
return(0);
}
if (high > fil_space_get_size(space)) {
/* The area is not whole, return */
return(0);
}
/* If we got this far, read-ahead can be sensible: do it */
if (ibuf_inside()) {
ibuf_mode = BUF_READ_IBUF_PAGES_ONLY;
} else {
ibuf_mode = BUF_READ_ANY_PAGE;
}
count = 0;
/* Since Windows XP seems to schedule the i/o handler thread
very eagerly, and consequently it does not wait for the
full read batch to be posted, we use special heuristics here */
os_aio_simulated_put_read_threads_to_sleep();
for (i = low; i < high; i++) {
/* It is only sensible to do read-ahead in the non-sync
aio mode: hence FALSE as the first parameter */
if (!ibuf_bitmap_page(i)) {
count += buf_read_page_low(&err, FALSE, ibuf_mode
| OS_AIO_SIMULATED_WAKE_LATER,
space, tablespace_version, i);
if (err == DB_TABLESPACE_DELETED) {
ut_print_timestamp(stderr);
fprintf(stderr,
" InnoDB: Warning: in linear readahead trying to access tablespace\n"
"InnoDB: %lu page no. %lu,\n"
"InnoDB: but the tablespace does not exist or is just being dropped.\n",
(ulong) space, (ulong) i);
}
}
}
/* In simulated aio we wake the aio handler threads only after
queuing all aio requests, in native aio the following call does
nothing: */
os_aio_simulated_wake_handler_threads();
/* Flush pages from the end of the LRU list if necessary */
buf_flush_free_margin();
#ifdef UNIV_DEBUG
if (buf_debug_prints && (count > 0)) {
fprintf(stderr,
"LINEAR read-ahead space %lu offset %lu pages %lu\n",
(ulong) space, (ulong) offset, (ulong) count);
}
#endif /* UNIV_DEBUG */
++srv_read_ahead_seq;
return(count);
}
/************************************************************************
Issues read requests for pages which the ibuf module wants to read in, in
order to contract the insert buffer tree. Technically, this function is like
a read-ahead function. */
void
buf_read_ibuf_merge_pages(
/*======================*/
ibool sync, /* in: TRUE if the caller wants this function
to wait for the highest address page to get
read in, before this function returns */
ulint* space_ids, /* in: array of space ids */
ib_longlong* space_versions,/* in: the spaces must have this version
number (timestamp), otherwise we discard the
read; we use this to cancel reads if
DISCARD + IMPORT may have changed the
tablespace size */
ulint* page_nos, /* in: array of page numbers to read, with the
highest page number the last in the array */
ulint n_stored) /* in: number of page numbers in the array */
{
ulint err;
ulint i;
ut_ad(!ibuf_inside());
#ifdef UNIV_IBUF_DEBUG
ut_a(n_stored < UNIV_PAGE_SIZE);
#endif
while (buf_pool->n_pend_reads >
buf_pool->curr_size / BUF_READ_AHEAD_PEND_LIMIT) {
os_thread_sleep(500000);
}
for (i = 0; i < n_stored; i++) {
if ((i + 1 == n_stored) && sync) {
buf_read_page_low(&err, TRUE, BUF_READ_ANY_PAGE,
space_ids[i], space_versions[i], page_nos[i]);
} else {
buf_read_page_low(&err, FALSE, BUF_READ_ANY_PAGE,
space_ids[i], space_versions[i], page_nos[i]);
}
if (err == DB_TABLESPACE_DELETED) {
/* We have deleted or are deleting the single-table
tablespace: remove the entries for that page */
ibuf_merge_or_delete_for_page(NULL, space_ids[i],
page_nos[i], FALSE);
}
}
os_aio_simulated_wake_handler_threads();
/* Flush pages from the end of the LRU list if necessary */
buf_flush_free_margin();
#ifdef UNIV_DEBUG
if (buf_debug_prints) {
fprintf(stderr,
"Ibuf merge read-ahead space %lu pages %lu\n",
(ulong) space_ids[0], (ulong) n_stored);
}
#endif /* UNIV_DEBUG */
}
/************************************************************************
Issues read requests for pages which recovery wants to read in. */
void
buf_read_recv_pages(
/*================*/
ibool sync, /* in: TRUE if the caller wants this function
to wait for the highest address page to get
read in, before this function returns */
ulint space, /* in: space id */
ulint* page_nos, /* in: array of page numbers to read, with the
highest page number the last in the array */
ulint n_stored) /* in: number of page numbers in the array */
{
ib_longlong tablespace_version;
ulint count;
ulint err;
ulint i;
tablespace_version = fil_space_get_version(space);
for (i = 0; i < n_stored; i++) {
count = 0;
os_aio_print_debug = FALSE;
while (buf_pool->n_pend_reads >= recv_n_pool_free_frames / 2) {
os_aio_simulated_wake_handler_threads();
os_thread_sleep(500000);
count++;
if (count > 100) {
fprintf(stderr,
"InnoDB: Error: InnoDB has waited for 50 seconds for pending\n"
"InnoDB: reads to the buffer pool to be finished.\n"
"InnoDB: Number of pending reads %lu, pending pread calls %lu\n",
(ulong) buf_pool->n_pend_reads,
(ulong)os_file_n_pending_preads);
os_aio_print_debug = TRUE;
}
}
os_aio_print_debug = FALSE;
if ((i + 1 == n_stored) && sync) {
buf_read_page_low(&err, TRUE, BUF_READ_ANY_PAGE, space,
tablespace_version, page_nos[i]);
} else {
buf_read_page_low(&err, FALSE, BUF_READ_ANY_PAGE
| OS_AIO_SIMULATED_WAKE_LATER,
space, tablespace_version, page_nos[i]);
}
}
os_aio_simulated_wake_handler_threads();
/* Flush pages from the end of the LRU list if necessary */
buf_flush_free_margin();
#ifdef UNIV_DEBUG
if (buf_debug_prints) {
fprintf(stderr,
"Recovery applies read-ahead pages %lu\n", (ulong) n_stored);
}
#endif /* UNIV_DEBUG */
}

137
configure.in Normal file
View file

@ -0,0 +1,137 @@
# Process this file with autoconf to produce a configure script
AC_INIT
AC_CANONICAL_SYSTEM
AM_MAINTAINER_MODE
AM_CONFIG_HEADER(ib_config.h)
AM_INIT_AUTOMAKE(ib, 0.90)
# This is need before AC_PROG_CC
#
if test "x${CFLAGS-}" = x ; then
cflags_is_set=no
else
cflags_is_set=yes
fi
if test "x${CPPFLAGS-}" = x ; then
cppflags_is_set=no
else
cppflags_is_set=yes
fi
if test "x${LDFLAGS-}" = x ; then
ldflags_is_set=no
else
ldflags_is_set=yes
fi
# The following hack should ensure that configure doesn't add optimizing
# or debugging flags to CFLAGS or CXXFLAGS
CFLAGS="$CFLAGS "
CXXFLAGS="$CXXFLAGS "
AC_PROG_CC
AC_PROG_RANLIB
AC_PROG_INSTALL
AC_PROG_LIBTOOL
AC_CHECK_HEADERS(aio.h sched.h)
AC_CHECK_SIZEOF(int, 4)
AC_CHECK_SIZEOF(long, 4)
AC_CHECK_SIZEOF(void*, 4)
AC_CHECK_FUNCS(sched_yield)
AC_CHECK_FUNCS(fdatasync)
AC_CHECK_FUNCS(localtime_r)
#AC_CHECK_FUNCS(readdir_r) MySQL checks that it has also the right args.
# Some versions of Unix only take 2 arguments.
#AC_C_INLINE Already checked in MySQL
AC_C_BIGENDIAN
# Build optimized or debug version ?
# First check for gcc and g++
if test "$ac_cv_prog_gcc" = "yes"
then
DEBUG_CFLAGS="-g"
DEBUG_OPTIMIZE_CC="-O"
OPTIMIZE_CFLAGS="$MAX_C_OPTIMIZE"
else
DEBUG_CFLAGS="-g"
DEBUG_OPTIMIZE_CC=""
OPTIMIZE_CFLAGS="-O"
fi
if test "$ac_cv_prog_cxx_g" = "yes"
then
DEBUG_CXXFLAGS="-g"
DEBUG_OPTIMIZE_CXX="-O"
OPTIMIZE_CXXFLAGS="-O3"
else
DEBUG_CXXFLAGS="-g"
DEBUG_OPTIMIZE_CXX=""
OPTIMIZE_CXXFLAGS="-O"
fi
AC_ARG_WITH(debug,
[ --without-debug Build a production version without debugging code],
[with_debug=$withval],
[with_debug=no])
if test "$with_debug" = "yes"
then
# Medium debug.
CFLAGS="$DEBUG_CFLAGS $DEBUG_OPTIMIZE_CC -DDBUG_ON -DSAFE_MUTEX $CFLAGS"
CXXFLAGS="$DEBUG_CXXFLAGS $DEBUG_OPTIMIZE_CXX -DSAFE_MUTEX $CXXFLAGS"
elif test "$with_debug" = "full"
then
# Full debug. Very slow in some cases
CFLAGS="$DEBUG_CFLAGS -DDBUG_ON -DSAFE_MUTEX -DSAFEMALLOC $CFLAGS"
CXXFLAGS="$DEBUG_CXXFLAGS -DSAFE_MUTEX -DSAFEMALLOC $CXXFLAGS"
else
# Optimized version. No debug
CFLAGS="$OPTIMIZE_CFLAGS -DDBUG_OFF $CFLAGS -DDEBUG_OFF"
CXXFLAGS="$OPTIMIZE_CXXFLAGS -DDBUG_OFF $CXXFLAGS -DDEBUG_OFF"
fi
case "$target_os" in
lin*)
CFLAGS="$CFLAGS -DUNIV_LINUX";;
hpux10*)
CFLAGS="$CFLAGS -DUNIV_MUST_NOT_INLINE -DUNIV_HPUX -DUNIV_HPUX10";;
hp*)
CFLAGS="$CFLAGS -DUNIV_MUST_NOT_INLINE -DUNIV_HPUX";;
aix*)
CFLAGS="$CFLAGS -DUNIV_AIX";;
irix*)
CFLAGS="$CFLAGS -DUNIV_MUST_NOT_INLINE";;
osf*)
CFLAGS="$CFLAGS -DUNIV_MUST_NOT_INLINE";;
sysv5uw7*)
# Problem when linking on SCO
CFLAGS="$CFLAGS -DUNIV_MUST_NOT_INLINE";;
openbsd*)
CFLAGS="$CFLAGS -DUNIV_MUST_NOT_INLINE";;
esac
case "$target" in
i[[4567]]86-*-*)
CFLAGS="$CFLAGS -DUNIV_INTEL_X86";;
# The compiler on Linux/S390 does not seem to have inlining
s390-*-*)
CFLAGS="$CFLAGS -DUNIV_MUST_NOT_INLINE";;
esac
# must go in pair with AR as set by MYSQL_CHECK_AR
if test -z "$ARFLAGS"
then
ARFLAGS="cru"
fi
AC_SUBST(ARFLAGS)
AC_OUTPUT(Makefile os/Makefile ut/Makefile btr/Makefile dnl
buf/Makefile data/Makefile dnl
dict/Makefile dyn/Makefile dnl
eval/Makefile fil/Makefile fsp/Makefile fut/Makefile dnl
ha/Makefile ibuf/Makefile include/Makefile dnl
lock/Makefile log/Makefile dnl
mach/Makefile mem/Makefile mtr/Makefile dnl
page/Makefile pars/Makefile que/Makefile dnl
read/Makefile rem/Makefile row/Makefile dnl
srv/Makefile sync/Makefile thr/Makefile trx/Makefile dnl
usr/Makefile)

24
data/Makefile.am Normal file
View file

@ -0,0 +1,24 @@
# Copyright (C) 2000 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
# & Innobase Oy
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
include ../include/Makefile.i
noinst_LIBRARIES = libdata.a
libdata_a_SOURCES = data0data.c data0type.c
EXTRA_PROGRAMS =

662
data/data0data.c Normal file
View file

@ -0,0 +1,662 @@
/************************************************************************
SQL data field and tuple
(c) 1994-1996 Innobase Oy
Created 5/30/1994 Heikki Tuuri
*************************************************************************/
#include "data0data.h"
#ifdef UNIV_NONINL
#include "data0data.ic"
#endif
#include "rem0rec.h"
#include "rem0cmp.h"
#include "page0page.h"
#include "dict0dict.h"
#include "btr0cur.h"
byte data_error; /* data pointers of tuple fields are initialized
to point here for error checking */
#ifdef UNIV_DEBUG
ulint data_dummy; /* this is used to fool the compiler in
dtuple_validate */
#endif /* UNIV_DEBUG */
/* Some non-inlined functions used in the MySQL interface: */
void
dfield_set_data_noninline(
dfield_t* field, /* in: field */
void* data, /* in: data */
ulint len) /* in: length or UNIV_SQL_NULL */
{
dfield_set_data(field, data, len);
}
void*
dfield_get_data_noninline(
dfield_t* field) /* in: field */
{
return(dfield_get_data(field));
}
ulint
dfield_get_len_noninline(
dfield_t* field) /* in: field */
{
return(dfield_get_len(field));
}
ulint
dtuple_get_n_fields_noninline(
dtuple_t* tuple) /* in: tuple */
{
return(dtuple_get_n_fields(tuple));
}
dfield_t*
dtuple_get_nth_field_noninline(
dtuple_t* tuple, /* in: tuple */
ulint n) /* in: index of field */
{
return(dtuple_get_nth_field(tuple, n));
}
/*************************************************************************
Tests if dfield data length and content is equal to the given. */
ibool
dfield_data_is_binary_equal(
/*========================*/
/* out: TRUE if equal */
dfield_t* field, /* in: field */
ulint len, /* in: data length or UNIV_SQL_NULL */
byte* data) /* in: data */
{
if (len != field->len) {
return(FALSE);
}
if (len == UNIV_SQL_NULL) {
return(TRUE);
}
if (0 != ut_memcmp(field->data, data, len)) {
return(FALSE);
}
return(TRUE);
}
/****************************************************************
Returns TRUE if lengths of two dtuples are equal and respective data fields
in them are equal when compared with collation in char fields (not as binary
strings). */
ibool
dtuple_datas_are_ordering_equal(
/*============================*/
/* out: TRUE if length and fieds are equal
when compared with cmp_data_data:
NOTE: in character type fields some letters
are identified with others! (collation) */
dtuple_t* tuple1, /* in: tuple 1 */
dtuple_t* tuple2) /* in: tuple 2 */
{
dfield_t* field1;
dfield_t* field2;
ulint n_fields;
ulint i;
ut_ad(tuple1 && tuple2);
ut_ad(tuple1->magic_n == DATA_TUPLE_MAGIC_N);
ut_ad(tuple2->magic_n == DATA_TUPLE_MAGIC_N);
ut_ad(dtuple_check_typed(tuple1));
ut_ad(dtuple_check_typed(tuple2));
n_fields = dtuple_get_n_fields(tuple1);
if (n_fields != dtuple_get_n_fields(tuple2)) {
return(FALSE);
}
for (i = 0; i < n_fields; i++) {
field1 = dtuple_get_nth_field(tuple1, i);
field2 = dtuple_get_nth_field(tuple2, i);
if (0 != cmp_dfield_dfield(field1, field2)) {
return(FALSE);
}
}
return(TRUE);
}
/*************************************************************************
Creates a dtuple for use in MySQL. */
dtuple_t*
dtuple_create_for_mysql(
/*====================*/
/* out, own created dtuple */
void** heap, /* out: created memory heap */
ulint n_fields) /* in: number of fields */
{
*heap = (void*)mem_heap_create(500);
return(dtuple_create(*((mem_heap_t**)heap), n_fields));
}
/*************************************************************************
Frees a dtuple used in MySQL. */
void
dtuple_free_for_mysql(
/*==================*/
void* heap) /* in: memory heap where tuple was created */
{
mem_heap_free((mem_heap_t*)heap);
}
/*************************************************************************
Sets number of fields used in a tuple. Normally this is set in
dtuple_create, but if you want later to set it smaller, you can use this. */
void
dtuple_set_n_fields(
/*================*/
dtuple_t* tuple, /* in: tuple */
ulint n_fields) /* in: number of fields */
{
ut_ad(tuple);
tuple->n_fields = n_fields;
tuple->n_fields_cmp = n_fields;
}
/**************************************************************
Checks that a data field is typed. */
static
ibool
dfield_check_typed_no_assert(
/*=========================*/
/* out: TRUE if ok */
dfield_t* field) /* in: data field */
{
if (dfield_get_type(field)->mtype > DATA_MYSQL
|| dfield_get_type(field)->mtype < DATA_VARCHAR) {
fprintf(stderr,
"InnoDB: Error: data field type %lu, len %lu\n",
(ulong) dfield_get_type(field)->mtype,
(ulong) dfield_get_len(field));
return(FALSE);
}
return(TRUE);
}
/**************************************************************
Checks that a data tuple is typed. */
ibool
dtuple_check_typed_no_assert(
/*=========================*/
/* out: TRUE if ok */
dtuple_t* tuple) /* in: tuple */
{
dfield_t* field;
ulint i;
if (dtuple_get_n_fields(tuple) > REC_MAX_N_FIELDS) {
fprintf(stderr,
"InnoDB: Error: index entry has %lu fields\n",
(ulong) dtuple_get_n_fields(tuple));
dump:
fputs("InnoDB: Tuple contents: ", stderr);
dtuple_print(stderr, tuple);
putc('\n', stderr);
return(FALSE);
}
for (i = 0; i < dtuple_get_n_fields(tuple); i++) {
field = dtuple_get_nth_field(tuple, i);
if (!dfield_check_typed_no_assert(field)) {
goto dump;
}
}
return(TRUE);
}
/**************************************************************
Checks that a data field is typed. Asserts an error if not. */
ibool
dfield_check_typed(
/*===============*/
/* out: TRUE if ok */
dfield_t* field) /* in: data field */
{
if (dfield_get_type(field)->mtype > DATA_MYSQL
|| dfield_get_type(field)->mtype < DATA_VARCHAR) {
fprintf(stderr,
"InnoDB: Error: data field type %lu, len %lu\n",
(ulong) dfield_get_type(field)->mtype,
(ulong) dfield_get_len(field));
ut_error;
}
return(TRUE);
}
/**************************************************************
Checks that a data tuple is typed. Asserts an error if not. */
ibool
dtuple_check_typed(
/*===============*/
/* out: TRUE if ok */
dtuple_t* tuple) /* in: tuple */
{
dfield_t* field;
ulint i;
for (i = 0; i < dtuple_get_n_fields(tuple); i++) {
field = dtuple_get_nth_field(tuple, i);
ut_a(dfield_check_typed(field));
}
return(TRUE);
}
#ifdef UNIV_DEBUG
/**************************************************************
Validates the consistency of a tuple which must be complete, i.e,
all fields must have been set. */
ibool
dtuple_validate(
/*============*/
/* out: TRUE if ok */
dtuple_t* tuple) /* in: tuple */
{
dfield_t* field;
byte* data;
ulint n_fields;
ulint len;
ulint i;
ulint j;
ut_ad(tuple->magic_n == DATA_TUPLE_MAGIC_N);
n_fields = dtuple_get_n_fields(tuple);
/* We dereference all the data of each field to test
for memory traps */
for (i = 0; i < n_fields; i++) {
field = dtuple_get_nth_field(tuple, i);
len = dfield_get_len(field);
if (len != UNIV_SQL_NULL) {
data = field->data;
for (j = 0; j < len; j++) {
data_dummy += *data; /* fool the compiler not
to optimize out this
code */
data++;
}
}
}
ut_a(dtuple_check_typed(tuple));
return(TRUE);
}
#endif /* UNIV_DEBUG */
/*****************************************************************
Pretty prints a dfield value according to its data type. */
void
dfield_print(
/*=========*/
dfield_t* dfield) /* in: dfield */
{
byte* data;
ulint len;
ulint mtype;
ulint i;
len = dfield_get_len(dfield);
data = dfield_get_data(dfield);
if (len == UNIV_SQL_NULL) {
fputs("NULL", stderr);
return;
}
mtype = dtype_get_mtype(dfield_get_type(dfield));
if ((mtype == DATA_CHAR) || (mtype == DATA_VARCHAR)) {
for (i = 0; i < len; i++) {
int c = *data++;
putc(isprint(c) ? c : ' ', stderr);
}
} else if (mtype == DATA_INT) {
ut_a(len == 4); /* only works for 32-bit integers */
fprintf(stderr, "%d", (int)mach_read_from_4(data));
} else {
ut_error;
}
}
/*****************************************************************
Pretty prints a dfield value according to its data type. Also the hex string
is printed if a string contains non-printable characters. */
void
dfield_print_also_hex(
/*==================*/
dfield_t* dfield) /* in: dfield */
{
byte* data;
ulint len;
ulint mtype;
ulint i;
ibool print_also_hex;
len = dfield_get_len(dfield);
data = dfield_get_data(dfield);
if (len == UNIV_SQL_NULL) {
fputs("NULL", stderr);
return;
}
mtype = dtype_get_mtype(dfield_get_type(dfield));
if ((mtype == DATA_CHAR) || (mtype == DATA_VARCHAR)) {
print_also_hex = FALSE;
for (i = 0; i < len; i++) {
int c = *data++;
if (!isprint(c)) {
print_also_hex = TRUE;
c = ' ';
}
putc(c, stderr);
}
if (!print_also_hex) {
return;
}
fputs(" Hex: ", stderr);
data = dfield_get_data(dfield);
for (i = 0; i < len; i++) {
fprintf(stderr, "%02lx", (ulint)*data);
data++;
}
} else if (mtype == DATA_INT) {
ut_a(len == 4); /* only works for 32-bit integers */
fprintf(stderr, "%d", (int)mach_read_from_4(data));
} else {
ut_error;
}
}
/**************************************************************
The following function prints the contents of a tuple. */
void
dtuple_print(
/*=========*/
FILE* f, /* in: output stream */
dtuple_t* tuple) /* in: tuple */
{
dfield_t* field;
ulint n_fields;
ulint i;
n_fields = dtuple_get_n_fields(tuple);
fprintf(f, "DATA TUPLE: %lu fields;\n", (ulong) n_fields);
for (i = 0; i < n_fields; i++) {
fprintf(f, " %lu:", (ulong) i);
field = dtuple_get_nth_field(tuple, i);
if (field->len != UNIV_SQL_NULL) {
ut_print_buf(f, field->data, field->len);
} else {
fputs(" SQL NULL", f);
}
putc(';', f);
}
putc('\n', f);
ut_ad(dtuple_validate(tuple));
}
/******************************************************************
Moves parts of long fields in entry to the big record vector so that
the size of tuple drops below the maximum record size allowed in the
database. Moves data only from those fields which are not necessary
to determine uniquely the insertion place of the tuple in the index. */
big_rec_t*
dtuple_convert_big_rec(
/*===================*/
/* out, own: created big record vector,
NULL if we are not able to shorten
the entry enough, i.e., if there are
too many short fields in entry */
dict_index_t* index, /* in: index */
dtuple_t* entry, /* in: index entry */
ulint* ext_vec,/* in: array of externally stored fields,
or NULL: if a field already is externally
stored, then we cannot move it to the vector
this function returns */
ulint n_ext_vec)/* in: number of elements is ext_vec */
{
mem_heap_t* heap;
big_rec_t* vector;
dfield_t* dfield;
ulint size;
ulint n_fields;
ulint longest;
ulint longest_i = ULINT_MAX;
ibool is_externally_stored;
ulint i;
ulint j;
ut_a(dtuple_check_typed_no_assert(entry));
size = rec_get_converted_size(index, entry);
if (UNIV_UNLIKELY(size > 1000000000)) {
fprintf(stderr,
"InnoDB: Warning: tuple size very big: %lu\n", (ulong) size);
fputs("InnoDB: Tuple contents: ", stderr);
dtuple_print(stderr, entry);
putc('\n', stderr);
}
heap = mem_heap_create(size + dtuple_get_n_fields(entry)
* sizeof(big_rec_field_t) + 1000);
vector = mem_heap_alloc(heap, sizeof(big_rec_t));
vector->heap = heap;
vector->fields = mem_heap_alloc(heap, dtuple_get_n_fields(entry)
* sizeof(big_rec_field_t));
/* Decide which fields to shorten: the algorithm is to look for
the longest field whose type is DATA_BLOB */
n_fields = 0;
while (rec_get_converted_size(index, entry)
>= ut_min(page_get_free_space_of_empty(
index->table->comp) / 2,
REC_MAX_DATA_SIZE)) {
longest = 0;
for (i = dict_index_get_n_unique_in_tree(index);
i < dtuple_get_n_fields(entry); i++) {
/* Skip over fields which already are externally
stored */
is_externally_stored = FALSE;
if (ext_vec) {
for (j = 0; j < n_ext_vec; j++) {
if (ext_vec[j] == i) {
is_externally_stored = TRUE;
}
}
}
if (!is_externally_stored) {
dfield = dtuple_get_nth_field(entry, i);
if (dfield->len != UNIV_SQL_NULL &&
dfield->len > longest) {
longest = dfield->len;
longest_i = i;
}
}
}
/* We do not store externally fields which are smaller than
DICT_MAX_INDEX_COL_LEN */
ut_a(DICT_MAX_INDEX_COL_LEN > REC_1BYTE_OFFS_LIMIT);
if (longest < BTR_EXTERN_FIELD_REF_SIZE + 10
+ DICT_MAX_INDEX_COL_LEN) {
/* Cannot shorten more */
mem_heap_free(heap);
return(NULL);
}
/* Move data from field longest_i to big rec vector;
we do not let data size of the remaining entry
drop below 128 which is the limit for the 2-byte
offset storage format in a physical record. This
we accomplish by storing 128 bytes of data in entry
itself, and only the remaining part to big rec vec.
We store the first bytes locally to the record. Then
we can calculate all ordering fields in all indexes
from locally stored data. */
dfield = dtuple_get_nth_field(entry, longest_i);
vector->fields[n_fields].field_no = longest_i;
ut_a(dfield->len > DICT_MAX_INDEX_COL_LEN);
vector->fields[n_fields].len = dfield->len
- DICT_MAX_INDEX_COL_LEN;
vector->fields[n_fields].data = mem_heap_alloc(heap,
vector->fields[n_fields].len);
/* Copy data (from the end of field) to big rec vector */
ut_memcpy(vector->fields[n_fields].data,
((byte*)dfield->data) + dfield->len
- vector->fields[n_fields].len,
vector->fields[n_fields].len);
dfield->len = dfield->len - vector->fields[n_fields].len
+ BTR_EXTERN_FIELD_REF_SIZE;
/* Set the extern field reference in dfield to zero */
memset(((byte*)dfield->data)
+ dfield->len - BTR_EXTERN_FIELD_REF_SIZE,
0, BTR_EXTERN_FIELD_REF_SIZE);
n_fields++;
}
vector->n_fields = n_fields;
return(vector);
}
/******************************************************************
Puts back to entry the data stored in vector. Note that to ensure the
fields in entry can accommodate the data, vector must have been created
from entry with dtuple_convert_big_rec. */
void
dtuple_convert_back_big_rec(
/*========================*/
dict_index_t* index __attribute__((unused)), /* in: index */
dtuple_t* entry, /* in: entry whose data was put to vector */
big_rec_t* vector) /* in, own: big rec vector; it is
freed in this function */
{
dfield_t* dfield;
ulint i;
for (i = 0; i < vector->n_fields; i++) {
dfield = dtuple_get_nth_field(entry,
vector->fields[i].field_no);
/* Copy data from big rec vector */
ut_memcpy(((byte*)dfield->data)
+ dfield->len - BTR_EXTERN_FIELD_REF_SIZE,
vector->fields[i].data,
vector->fields[i].len);
dfield->len = dfield->len + vector->fields[i].len
- BTR_EXTERN_FIELD_REF_SIZE;
}
mem_heap_free(vector->heap);
}
/******************************************************************
Frees the memory in a big rec vector. */
void
dtuple_big_rec_free(
/*================*/
big_rec_t* vector) /* in, own: big rec vector; it is
freed in this function */
{
mem_heap_free(vector->heap);
}

260
data/data0type.c Normal file
View file

@ -0,0 +1,260 @@
/******************************************************
Data types
(c) 1996 Innobase Oy
Created 1/16/1996 Heikki Tuuri
*******************************************************/
#include "data0type.h"
#ifdef UNIV_NONINL
#include "data0type.ic"
#endif
/**********************************************************************
This function is used to find the storage length in bytes of the first n
characters for prefix indexes using a multibyte character set. The function
finds charset information and returns length of prefix_len characters in the
index field in bytes.
NOTE: the prototype of this function is copied from ha_innodb.cc! If you change
this function, you MUST change also the prototype here! */
ulint
innobase_get_at_most_n_mbchars(
/*===========================*/
/* out: number of bytes occupied by the first
n characters */
ulint charset_id, /* in: character set id */
ulint prefix_len, /* in: prefix length in bytes of the index
(this has to be divided by mbmaxlen to get the
number of CHARACTERS n in the prefix) */
ulint data_len, /* in: length of the string in bytes */
const char* str); /* in: character string */
/* At the database startup we store the default-charset collation number of
this MySQL installation to this global variable. If we have < 4.1.2 format
column definitions, or records in the insert buffer, we use this
charset-collation code for them. */
ulint data_mysql_default_charset_coll = 99999999;
dtype_t dtype_binary_val = {DATA_BINARY, 0, 0, 0, 0, 0};
dtype_t* dtype_binary = &dtype_binary_val;
/*************************************************************************
Determine how many bytes the first n characters of the given string occupy.
If the string is shorter than n characters, returns the number of bytes
the characters in the string occupy. */
ulint
dtype_get_at_most_n_mbchars(
/*========================*/
/* out: length of the prefix,
in bytes */
const dtype_t* dtype, /* in: data type */
ulint prefix_len, /* in: length of the requested
prefix, in characters, multiplied by
dtype_get_mbmaxlen(dtype) */
ulint data_len, /* in: length of str (in bytes) */
const char* str) /* in: the string whose prefix
length is being determined */
{
#ifndef UNIV_HOTBACKUP
ut_a(data_len != UNIV_SQL_NULL);
ut_ad(!dtype->mbmaxlen || !(prefix_len % dtype->mbmaxlen));
if (dtype->mbminlen != dtype->mbmaxlen) {
ut_a(!(prefix_len % dtype->mbmaxlen));
return(innobase_get_at_most_n_mbchars(
dtype_get_charset_coll(dtype->prtype),
prefix_len, data_len, str));
}
if (prefix_len < data_len) {
return(prefix_len);
}
return(data_len);
#else /* UNIV_HOTBACKUP */
/* This function depends on MySQL code that is not included in
InnoDB Hot Backup builds. Besides, this function should never
be called in InnoDB Hot Backup. */
ut_error;
#endif /* UNIV_HOTBACKUP */
}
/*************************************************************************
Checks if a data main type is a string type. Also a BLOB is considered a
string type. */
ibool
dtype_is_string_type(
/*=================*/
/* out: TRUE if string type */
ulint mtype) /* in: InnoDB main data type code: DATA_CHAR, ... */
{
if (mtype <= DATA_BLOB
|| mtype == DATA_MYSQL
|| mtype == DATA_VARMYSQL) {
return(TRUE);
}
return(FALSE);
}
/*************************************************************************
Checks if a type is a binary string type. Note that for tables created with
< 4.0.14, we do not know if a DATA_BLOB column is a BLOB or a TEXT column. For
those DATA_BLOB columns this function currently returns FALSE. */
ibool
dtype_is_binary_string_type(
/*========================*/
/* out: TRUE if binary string type */
ulint mtype, /* in: main data type */
ulint prtype) /* in: precise type */
{
if ((mtype == DATA_FIXBINARY)
|| (mtype == DATA_BINARY)
|| (mtype == DATA_BLOB && (prtype & DATA_BINARY_TYPE))) {
return(TRUE);
}
return(FALSE);
}
/*************************************************************************
Checks if a type is a non-binary string type. That is, dtype_is_string_type is
TRUE and dtype_is_binary_string_type is FALSE. Note that for tables created
with < 4.0.14, we do not know if a DATA_BLOB column is a BLOB or a TEXT column.
For those DATA_BLOB columns this function currently returns TRUE. */
ibool
dtype_is_non_binary_string_type(
/*============================*/
/* out: TRUE if non-binary string type */
ulint mtype, /* in: main data type */
ulint prtype) /* in: precise type */
{
if (dtype_is_string_type(mtype) == TRUE
&& dtype_is_binary_string_type(mtype, prtype) == FALSE) {
return(TRUE);
}
return(FALSE);
}
/*************************************************************************
Gets the MySQL charset-collation code for MySQL string types. */
ulint
dtype_get_charset_coll_noninline(
/*=============================*/
ulint prtype) /* in: precise data type */
{
return(dtype_get_charset_coll(prtype));
}
/*************************************************************************
Forms a precise type from the < 4.1.2 format precise type plus the
charset-collation code. */
ulint
dtype_form_prtype(
/*==============*/
ulint old_prtype, /* in: the MySQL type code and the flags
DATA_BINARY_TYPE etc. */
ulint charset_coll) /* in: MySQL charset-collation code */
{
ut_a(old_prtype < 256 * 256);
ut_a(charset_coll < 256);
return(old_prtype + (charset_coll << 16));
}
/*************************************************************************
Validates a data type structure. */
ibool
dtype_validate(
/*===========*/
/* out: TRUE if ok */
dtype_t* type) /* in: type struct to validate */
{
ut_a(type);
ut_a((type->mtype >= DATA_VARCHAR) && (type->mtype <= DATA_MYSQL));
if (type->mtype == DATA_SYS) {
ut_a((type->prtype & DATA_MYSQL_TYPE_MASK) < DATA_N_SYS_COLS);
}
ut_a(type->mbminlen <= type->mbmaxlen);
return(TRUE);
}
/*************************************************************************
Prints a data type structure. */
void
dtype_print(
/*========*/
dtype_t* type) /* in: type */
{
ulint mtype;
ulint prtype;
ulint len;
ut_a(type);
mtype = type->mtype;
prtype = type->prtype;
if (mtype == DATA_VARCHAR) {
fputs("DATA_VARCHAR", stderr);
} else if (mtype == DATA_CHAR) {
fputs("DATA_CHAR", stderr);
} else if (mtype == DATA_BINARY) {
fputs("DATA_BINARY", stderr);
} else if (mtype == DATA_INT) {
fputs("DATA_INT", stderr);
} else if (mtype == DATA_MYSQL) {
fputs("DATA_MYSQL", stderr);
} else if (mtype == DATA_SYS) {
fputs("DATA_SYS", stderr);
} else {
fprintf(stderr, "type %lu", (ulong) mtype);
}
len = type->len;
if ((type->mtype == DATA_SYS)
|| (type->mtype == DATA_VARCHAR)
|| (type->mtype == DATA_CHAR)) {
putc(' ', stderr);
if (prtype == DATA_ROW_ID) {
fputs("DATA_ROW_ID", stderr);
len = DATA_ROW_ID_LEN;
} else if (prtype == DATA_ROLL_PTR) {
fputs("DATA_ROLL_PTR", stderr);
len = DATA_ROLL_PTR_LEN;
} else if (prtype == DATA_TRX_ID) {
fputs("DATA_TRX_ID", stderr);
len = DATA_TRX_ID_LEN;
} else if (prtype == DATA_MIX_ID) {
fputs("DATA_MIX_ID", stderr);
} else if (prtype == DATA_ENGLISH) {
fputs("DATA_ENGLISH", stderr);
} else {
fprintf(stderr, "prtype %lu", (ulong) mtype);
}
}
fprintf(stderr, " len %lu prec %lu", (ulong) len, (ulong) type->prec);
}

44
db/db0err.h Normal file
View file

@ -0,0 +1,44 @@
/******************************************************
Global error codes for the database
(c) 1996 Innobase Oy
Created 5/24/1996 Heikki Tuuri
*******************************************************/
#ifndef db0err_h
#define db0err_h
#define DB_SUCCESS 10
/* The following are error codes */
#define DB_ERROR 11
#define DB_OUT_OF_MEMORY 12
#define DB_OUT_OF_FILE_SPACE 13
#define DB_LOCK_WAIT 14
#define DB_DEADLOCK 15
#define DB_ROLLBACK 16
#define DB_DUPLICATE_KEY 17
#define DB_QUE_THR_SUSPENDED 18
#define DB_MISSING_HISTORY 19 /* required history data has been
deleted due to lack of space in
rollback segment */
#define DB_CLUSTER_NOT_FOUND 30
#define DB_TABLE_NOT_FOUND 31
#define DB_MUST_GET_MORE_FILE_SPACE 32 /* the database has to be stopped
and restrated with more file space */
#define DB_TABLE_IS_BEING_USED 33
#define DB_TOO_BIG_RECORD 34 /* a record in an index would become
bigger than 1/2 free space in a page
frame */
/* The following are partial failure codes */
#define DB_FAIL 1000
#define DB_OVERFLOW 1001
#define DB_UNDERFLOW 1002
#define DB_STRONG_FAIL 1003
#define DB_RECORD_NOT_FOUND 1500
#define DB_END_OF_INDEX 1501
#endif

25
dict/Makefile.am Normal file
View file

@ -0,0 +1,25 @@
# Copyright (C) 2000 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
# & Innobase Oy
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
include ../include/Makefile.i
noinst_LIBRARIES = libdict.a
libdict_a_SOURCES = dict0boot.c dict0crea.c dict0dict.c dict0load.c\
dict0mem.c
EXTRA_PROGRAMS =

414
dict/dict0boot.c Normal file
View file

@ -0,0 +1,414 @@
/******************************************************
Data dictionary creation and booting
(c) 1996 Innobase Oy
Created 4/18/1996 Heikki Tuuri
*******************************************************/
#include "dict0boot.h"
#ifdef UNIV_NONINL
#include "dict0boot.ic"
#endif
#include "dict0crea.h"
#include "btr0btr.h"
#include "dict0load.h"
#include "dict0load.h"
#include "trx0trx.h"
#include "srv0srv.h"
#include "ibuf0ibuf.h"
#include "buf0flu.h"
#include "log0recv.h"
#include "os0file.h"
/**************************************************************************
Gets a pointer to the dictionary header and x-latches its page. */
dict_hdr_t*
dict_hdr_get(
/*=========*/
/* out: pointer to the dictionary header,
page x-latched */
mtr_t* mtr) /* in: mtr */
{
dict_hdr_t* header;
ut_ad(mtr);
header = DICT_HDR + buf_page_get(DICT_HDR_SPACE, DICT_HDR_PAGE_NO,
RW_X_LATCH, mtr);
#ifdef UNIV_SYNC_DEBUG
buf_page_dbg_add_level(header, SYNC_DICT_HEADER);
#endif /* UNIV_SYNC_DEBUG */
return(header);
}
/**************************************************************************
Returns a new table, index, or tree id. */
dulint
dict_hdr_get_new_id(
/*================*/
/* out: the new id */
ulint type) /* in: DICT_HDR_ROW_ID, ... */
{
dict_hdr_t* dict_hdr;
dulint id;
mtr_t mtr;
ut_ad((type == DICT_HDR_TABLE_ID) || (type == DICT_HDR_INDEX_ID)
|| (type == DICT_HDR_MIX_ID));
mtr_start(&mtr);
dict_hdr = dict_hdr_get(&mtr);
id = mtr_read_dulint(dict_hdr + type, &mtr);
id = ut_dulint_add(id, 1);
mlog_write_dulint(dict_hdr + type, id, &mtr);
mtr_commit(&mtr);
return(id);
}
/**************************************************************************
Writes the current value of the row id counter to the dictionary header file
page. */
void
dict_hdr_flush_row_id(void)
/*=======================*/
{
dict_hdr_t* dict_hdr;
dulint id;
mtr_t mtr;
#ifdef UNIV_SYNC_DEBUG
ut_ad(mutex_own(&(dict_sys->mutex)));
#endif /* UNIV_SYNC_DEBUG */
id = dict_sys->row_id;
mtr_start(&mtr);
dict_hdr = dict_hdr_get(&mtr);
mlog_write_dulint(dict_hdr + DICT_HDR_ROW_ID, id, &mtr);
mtr_commit(&mtr);
}
/*********************************************************************
Creates the file page for the dictionary header. This function is
called only at the database creation. */
static
ibool
dict_hdr_create(
/*============*/
/* out: TRUE if succeed */
mtr_t* mtr) /* in: mtr */
{
dict_hdr_t* dict_header;
ulint hdr_page_no;
ulint root_page_no;
page_t* page;
ut_ad(mtr);
/* Create the dictionary header file block in a new, allocated file
segment in the system tablespace */
page = fseg_create(DICT_HDR_SPACE, 0,
DICT_HDR + DICT_HDR_FSEG_HEADER, mtr);
hdr_page_no = buf_frame_get_page_no(page);
ut_a(DICT_HDR_PAGE_NO == hdr_page_no);
dict_header = dict_hdr_get(mtr);
/* Start counting row, table, index, and tree ids from
DICT_HDR_FIRST_ID */
mlog_write_dulint(dict_header + DICT_HDR_ROW_ID,
ut_dulint_create(0, DICT_HDR_FIRST_ID), mtr);
mlog_write_dulint(dict_header + DICT_HDR_TABLE_ID,
ut_dulint_create(0, DICT_HDR_FIRST_ID), mtr);
mlog_write_dulint(dict_header + DICT_HDR_INDEX_ID,
ut_dulint_create(0, DICT_HDR_FIRST_ID), mtr);
mlog_write_dulint(dict_header + DICT_HDR_MIX_ID,
ut_dulint_create(0, DICT_HDR_FIRST_ID), mtr);
/* Create the B-tree roots for the clustered indexes of the basic
system tables */
/*--------------------------*/
root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE,
DICT_HDR_SPACE, DICT_TABLES_ID, FALSE, mtr);
if (root_page_no == FIL_NULL) {
return(FALSE);
}
mlog_write_ulint(dict_header + DICT_HDR_TABLES, root_page_no,
MLOG_4BYTES, mtr);
/*--------------------------*/
root_page_no = btr_create(DICT_UNIQUE, DICT_HDR_SPACE,
DICT_TABLE_IDS_ID, FALSE, mtr);
if (root_page_no == FIL_NULL) {
return(FALSE);
}
mlog_write_ulint(dict_header + DICT_HDR_TABLE_IDS, root_page_no,
MLOG_4BYTES, mtr);
/*--------------------------*/
root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE,
DICT_HDR_SPACE, DICT_COLUMNS_ID, FALSE, mtr);
if (root_page_no == FIL_NULL) {
return(FALSE);
}
mlog_write_ulint(dict_header + DICT_HDR_COLUMNS, root_page_no,
MLOG_4BYTES, mtr);
/*--------------------------*/
root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE,
DICT_HDR_SPACE, DICT_INDEXES_ID, FALSE, mtr);
if (root_page_no == FIL_NULL) {
return(FALSE);
}
mlog_write_ulint(dict_header + DICT_HDR_INDEXES, root_page_no,
MLOG_4BYTES, mtr);
/*--------------------------*/
root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE,
DICT_HDR_SPACE, DICT_FIELDS_ID, FALSE, mtr);
if (root_page_no == FIL_NULL) {
return(FALSE);
}
mlog_write_ulint(dict_header + DICT_HDR_FIELDS, root_page_no,
MLOG_4BYTES, mtr);
/*--------------------------*/
return(TRUE);
}
/*********************************************************************
Initializes the data dictionary memory structures when the database is
started. This function is also called when the data dictionary is created. */
void
dict_boot(void)
/*===========*/
{
dict_table_t* table;
dict_index_t* index;
dict_hdr_t* dict_hdr;
mtr_t mtr;
ibool success;
mtr_start(&mtr);
/* Create the hash tables etc. */
dict_init();
mutex_enter(&(dict_sys->mutex));
/* Get the dictionary header */
dict_hdr = dict_hdr_get(&mtr);
/* Because we only write new row ids to disk-based data structure
(dictionary header) when it is divisible by
DICT_HDR_ROW_ID_WRITE_MARGIN, in recovery we will not recover
the latest value of the row id counter. Therefore we advance
the counter at the database startup to avoid overlapping values.
Note that when a user after database startup first time asks for
a new row id, then because the counter is now divisible by
..._MARGIN, it will immediately be updated to the disk-based
header. */
dict_sys->row_id = ut_dulint_add(
ut_dulint_align_up(
mtr_read_dulint(dict_hdr + DICT_HDR_ROW_ID,
&mtr),
DICT_HDR_ROW_ID_WRITE_MARGIN),
DICT_HDR_ROW_ID_WRITE_MARGIN);
/* Insert into the dictionary cache the descriptions of the basic
system tables */
/*-------------------------*/
table = dict_mem_table_create("SYS_TABLES", DICT_HDR_SPACE, 8, FALSE);
dict_mem_table_add_col(table, "NAME", DATA_BINARY, 0, 0, 0);
dict_mem_table_add_col(table, "ID", DATA_BINARY, 0, 0, 0);
dict_mem_table_add_col(table, "N_COLS", DATA_INT, 0, 4, 0);
dict_mem_table_add_col(table, "TYPE", DATA_INT, 0, 4, 0);
dict_mem_table_add_col(table, "MIX_ID", DATA_BINARY, 0, 0, 0);
dict_mem_table_add_col(table, "MIX_LEN", DATA_INT, 0, 4, 0);
dict_mem_table_add_col(table, "CLUSTER_NAME", DATA_BINARY, 0, 0, 0);
dict_mem_table_add_col(table, "SPACE", DATA_INT, 0, 4, 0);
table->id = DICT_TABLES_ID;
dict_table_add_to_cache(table);
dict_sys->sys_tables = table;
index = dict_mem_index_create("SYS_TABLES", "CLUST_IND",
DICT_HDR_SPACE, DICT_UNIQUE | DICT_CLUSTERED, 1);
dict_mem_index_add_field(index, "NAME", 0, 0);
index->id = DICT_TABLES_ID;
success = dict_index_add_to_cache(table, index, mtr_read_ulint(
dict_hdr + DICT_HDR_TABLES, MLOG_4BYTES, &mtr));
ut_a(success);
/*-------------------------*/
index = dict_mem_index_create("SYS_TABLES", "ID_IND",
DICT_HDR_SPACE, DICT_UNIQUE, 1);
dict_mem_index_add_field(index, "ID", 0, 0);
index->id = DICT_TABLE_IDS_ID;
success = dict_index_add_to_cache(table, index, mtr_read_ulint(
dict_hdr + DICT_HDR_TABLE_IDS, MLOG_4BYTES, &mtr));
ut_a(success);
/*-------------------------*/
table = dict_mem_table_create("SYS_COLUMNS", DICT_HDR_SPACE, 7, FALSE);
dict_mem_table_add_col(table, "TABLE_ID", DATA_BINARY,0,0,0);
dict_mem_table_add_col(table, "POS", DATA_INT, 0, 4, 0);
dict_mem_table_add_col(table, "NAME", DATA_BINARY, 0, 0, 0);
dict_mem_table_add_col(table, "MTYPE", DATA_INT, 0, 4, 0);
dict_mem_table_add_col(table, "PRTYPE", DATA_INT, 0, 4, 0);
dict_mem_table_add_col(table, "LEN", DATA_INT, 0, 4, 0);
dict_mem_table_add_col(table, "PREC", DATA_INT, 0, 4, 0);
table->id = DICT_COLUMNS_ID;
dict_table_add_to_cache(table);
dict_sys->sys_columns = table;
index = dict_mem_index_create("SYS_COLUMNS", "CLUST_IND",
DICT_HDR_SPACE, DICT_UNIQUE | DICT_CLUSTERED, 2);
dict_mem_index_add_field(index, "TABLE_ID", 0, 0);
dict_mem_index_add_field(index, "POS", 0, 0);
index->id = DICT_COLUMNS_ID;
success = dict_index_add_to_cache(table, index, mtr_read_ulint(
dict_hdr + DICT_HDR_COLUMNS, MLOG_4BYTES, &mtr));
ut_a(success);
/*-------------------------*/
table = dict_mem_table_create("SYS_INDEXES", DICT_HDR_SPACE, 7, FALSE);
dict_mem_table_add_col(table, "TABLE_ID", DATA_BINARY, 0,0,0);
dict_mem_table_add_col(table, "ID", DATA_BINARY, 0, 0, 0);
dict_mem_table_add_col(table, "NAME", DATA_BINARY, 0, 0, 0);
dict_mem_table_add_col(table, "N_FIELDS", DATA_INT, 0, 4, 0);
dict_mem_table_add_col(table, "TYPE", DATA_INT, 0, 4, 0);
dict_mem_table_add_col(table, "SPACE", DATA_INT, 0, 4, 0);
dict_mem_table_add_col(table, "PAGE_NO", DATA_INT, 0, 4, 0);
/* The '+ 2' below comes from the 2 system fields */
#if DICT_SYS_INDEXES_PAGE_NO_FIELD != 6 + 2
#error "DICT_SYS_INDEXES_PAGE_NO_FIELD != 6 + 2"
#endif
#if DICT_SYS_INDEXES_SPACE_NO_FIELD != 5 + 2
#error "DICT_SYS_INDEXES_SPACE_NO_FIELD != 5 + 2"
#endif
#if DICT_SYS_INDEXES_TYPE_FIELD != 4 + 2
#error "DICT_SYS_INDEXES_TYPE_FIELD != 4 + 2"
#endif
table->id = DICT_INDEXES_ID;
dict_table_add_to_cache(table);
dict_sys->sys_indexes = table;
index = dict_mem_index_create("SYS_INDEXES", "CLUST_IND",
DICT_HDR_SPACE, DICT_UNIQUE | DICT_CLUSTERED, 2);
dict_mem_index_add_field(index, "TABLE_ID", 0, 0);
dict_mem_index_add_field(index, "ID", 0, 0);
index->id = DICT_INDEXES_ID;
success = dict_index_add_to_cache(table, index, mtr_read_ulint(
dict_hdr + DICT_HDR_INDEXES, MLOG_4BYTES, &mtr));
ut_a(success);
/*-------------------------*/
table = dict_mem_table_create("SYS_FIELDS", DICT_HDR_SPACE, 3, FALSE);
dict_mem_table_add_col(table, "INDEX_ID", DATA_BINARY, 0,0,0);
dict_mem_table_add_col(table, "POS", DATA_INT, 0, 4, 0);
dict_mem_table_add_col(table, "COL_NAME", DATA_BINARY, 0,0,0);
table->id = DICT_FIELDS_ID;
dict_table_add_to_cache(table);
dict_sys->sys_fields = table;
index = dict_mem_index_create("SYS_FIELDS", "CLUST_IND",
DICT_HDR_SPACE, DICT_UNIQUE | DICT_CLUSTERED, 2);
dict_mem_index_add_field(index, "INDEX_ID", 0, 0);
dict_mem_index_add_field(index, "POS", 0, 0);
index->id = DICT_FIELDS_ID;
success = dict_index_add_to_cache(table, index, mtr_read_ulint(
dict_hdr + DICT_HDR_FIELDS, MLOG_4BYTES, &mtr));
ut_a(success);
mtr_commit(&mtr);
/*-------------------------*/
/* Initialize the insert buffer table and index for each tablespace */
ibuf_init_at_db_start();
/* Load definitions of other indexes on system tables */
dict_load_sys_table(dict_sys->sys_tables);
dict_load_sys_table(dict_sys->sys_columns);
dict_load_sys_table(dict_sys->sys_indexes);
dict_load_sys_table(dict_sys->sys_fields);
mutex_exit(&(dict_sys->mutex));
}
/*********************************************************************
Inserts the basic system table data into themselves in the database
creation. */
static
void
dict_insert_initial_data(void)
/*==========================*/
{
/* Does nothing yet */
}
/*********************************************************************
Creates and initializes the data dictionary at the database creation. */
void
dict_create(void)
/*=============*/
{
mtr_t mtr;
mtr_start(&mtr);
dict_hdr_create(&mtr);
mtr_commit(&mtr);
dict_boot();
dict_insert_initial_data();
}

1468
dict/dict0crea.c Normal file

File diff suppressed because it is too large Load diff

4533
dict/dict0dict.c Normal file

File diff suppressed because it is too large Load diff

1354
dict/dict0load.c Normal file

File diff suppressed because it is too large Load diff

294
dict/dict0mem.c Normal file
View file

@ -0,0 +1,294 @@
/**********************************************************************
Data dictionary memory object creation
(c) 1996 Innobase Oy
Created 1/8/1996 Heikki Tuuri
***********************************************************************/
#include "dict0mem.h"
#ifdef UNIV_NONINL
#include "dict0mem.ic"
#endif
#include "rem0rec.h"
#include "data0type.h"
#include "mach0data.h"
#include "dict0dict.h"
#include "que0que.h"
#include "pars0pars.h"
#include "lock0lock.h"
#define DICT_HEAP_SIZE 100 /* initial memory heap size when
creating a table or index object */
/**************************************************************************
Creates a table memory object. */
dict_table_t*
dict_mem_table_create(
/*==================*/
/* out, own: table object */
const char* name, /* in: table name */
ulint space, /* in: space where the clustered index of
the table is placed; this parameter is
ignored if the table is made a member of
a cluster */
ulint n_cols, /* in: number of columns */
ibool comp) /* in: TRUE=compact page format */
{
dict_table_t* table;
mem_heap_t* heap;
ut_ad(name);
ut_ad(comp == FALSE || comp == TRUE);
heap = mem_heap_create(DICT_HEAP_SIZE);
table = mem_heap_alloc(heap, sizeof(dict_table_t));
table->heap = heap;
table->type = DICT_TABLE_ORDINARY;
table->name = mem_heap_strdup(heap, name);
table->dir_path_of_temp_table = NULL;
table->space = space;
table->ibd_file_missing = FALSE;
table->tablespace_discarded = FALSE;
table->comp = comp;
table->n_def = 0;
table->n_cols = n_cols + DATA_N_SYS_COLS;
table->mem_fix = 0;
table->n_mysql_handles_opened = 0;
table->n_foreign_key_checks_running = 0;
table->cached = FALSE;
table->mix_id = ut_dulint_zero;
table->mix_len = 0;
table->cols = mem_heap_alloc(heap, (n_cols + DATA_N_SYS_COLS)
* sizeof(dict_col_t));
UT_LIST_INIT(table->indexes);
table->auto_inc_lock = mem_heap_alloc(heap, lock_get_size());
table->query_cache_inv_trx_id = ut_dulint_zero;
UT_LIST_INIT(table->locks);
UT_LIST_INIT(table->foreign_list);
UT_LIST_INIT(table->referenced_list);
table->does_not_fit_in_memory = FALSE;
table->stat_initialized = FALSE;
table->stat_modified_counter = 0;
mutex_create(&(table->autoinc_mutex));
mutex_set_level(&(table->autoinc_mutex), SYNC_DICT_AUTOINC_MUTEX);
table->autoinc_inited = FALSE;
table->magic_n = DICT_TABLE_MAGIC_N;
return(table);
}
/**************************************************************************
Creates a cluster memory object. */
dict_table_t*
dict_mem_cluster_create(
/*====================*/
/* out, own: cluster object */
const char* name, /* in: cluster name */
ulint space, /* in: space where the clustered indexes
of the member tables are placed */
ulint n_cols, /* in: number of columns */
ulint mix_len)/* in: length of the common key prefix in the
cluster */
{
dict_table_t* cluster;
/* Clustered tables cannot work with the compact record format. */
cluster = dict_mem_table_create(name, space, n_cols, FALSE);
cluster->type = DICT_TABLE_CLUSTER;
cluster->mix_len = mix_len;
return(cluster);
}
/**************************************************************************
Declares a non-published table as a member in a cluster. */
void
dict_mem_table_make_cluster_member(
/*===============================*/
dict_table_t* table, /* in: non-published table */
const char* cluster_name) /* in: cluster name */
{
table->type = DICT_TABLE_CLUSTER_MEMBER;
table->cluster_name = cluster_name;
}
/**************************************************************************
Adds a column definition to a table. */
void
dict_mem_table_add_col(
/*===================*/
dict_table_t* table, /* in: table */
const char* name, /* in: column name */
ulint mtype, /* in: main datatype */
ulint prtype, /* in: precise type */
ulint len, /* in: length */
ulint prec) /* in: precision */
{
dict_col_t* col;
dtype_t* type;
ut_ad(table && name);
ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
table->n_def++;
col = dict_table_get_nth_col(table, table->n_def - 1);
col->ind = table->n_def - 1;
col->name = mem_heap_strdup(table->heap, name);
col->table = table;
col->ord_part = 0;
col->clust_pos = ULINT_UNDEFINED;
type = dict_col_get_type(col);
dtype_set(type, mtype, prtype, len, prec);
}
/**************************************************************************
Creates an index memory object. */
dict_index_t*
dict_mem_index_create(
/*==================*/
/* out, own: index object */
const char* table_name, /* in: table name */
const char* index_name, /* in: index name */
ulint space, /* in: space where the index tree is
placed, ignored if the index is of
the clustered type */
ulint type, /* in: DICT_UNIQUE,
DICT_CLUSTERED, ... ORed */
ulint n_fields) /* in: number of fields */
{
dict_index_t* index;
mem_heap_t* heap;
ut_ad(table_name && index_name);
heap = mem_heap_create(DICT_HEAP_SIZE);
index = mem_heap_alloc(heap, sizeof(dict_index_t));
index->heap = heap;
index->type = type;
index->space = space;
index->name = mem_heap_strdup(heap, index_name);
index->table_name = table_name;
index->table = NULL;
index->n_def = index->n_nullable = 0;
index->n_fields = n_fields;
index->fields = mem_heap_alloc(heap, 1 + n_fields
* sizeof(dict_field_t));
/* The '1 +' above prevents allocation
of an empty mem block */
index->stat_n_diff_key_vals = NULL;
index->cached = FALSE;
index->magic_n = DICT_INDEX_MAGIC_N;
return(index);
}
/**************************************************************************
Creates and initializes a foreign constraint memory object. */
dict_foreign_t*
dict_mem_foreign_create(void)
/*=========================*/
/* out, own: foreign constraint struct */
{
dict_foreign_t* foreign;
mem_heap_t* heap;
heap = mem_heap_create(100);
foreign = mem_heap_alloc(heap, sizeof(dict_foreign_t));
foreign->heap = heap;
foreign->id = NULL;
foreign->type = 0;
foreign->foreign_table_name = NULL;
foreign->foreign_table = NULL;
foreign->foreign_col_names = NULL;
foreign->referenced_table_name = NULL;
foreign->referenced_table = NULL;
foreign->referenced_col_names = NULL;
foreign->n_fields = 0;
foreign->foreign_index = NULL;
foreign->referenced_index = NULL;
return(foreign);
}
/**************************************************************************
Adds a field definition to an index. NOTE: does not take a copy
of the column name if the field is a column. The memory occupied
by the column name may be released only after publishing the index. */
void
dict_mem_index_add_field(
/*=====================*/
dict_index_t* index, /* in: index */
const char* name, /* in: column name */
ulint order, /* in: order criterion; 0 means an
ascending order */
ulint prefix_len) /* in: 0 or the column prefix length
in a MySQL index like
INDEX (textcol(25)) */
{
dict_field_t* field;
ut_ad(index && name);
ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
index->n_def++;
field = dict_index_get_nth_field(index, index->n_def - 1);
field->name = name;
field->order = order;
field->prefix_len = prefix_len;
}
/**************************************************************************
Frees an index memory object. */
void
dict_mem_index_free(
/*================*/
dict_index_t* index) /* in: index */
{
mem_heap_free(index->heap);
}

24
dyn/Makefile.am Normal file
View file

@ -0,0 +1,24 @@
# Copyright (C) 2000 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
# & Innobase Oy
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
include ../include/Makefile.i
noinst_LIBRARIES = libdyn.a
libdyn_a_SOURCES = dyn0dyn.c
EXTRA_PROGRAMS =

48
dyn/dyn0dyn.c Normal file
View file

@ -0,0 +1,48 @@
/******************************************************
The dynamically allocated array
(c) 1996 Innobase Oy
Created 2/5/1996 Heikki Tuuri
*******************************************************/
#include "dyn0dyn.h"
#ifdef UNIV_NONINL
#include "dyn0dyn.ic"
#endif
/****************************************************************
Adds a new block to a dyn array. */
dyn_block_t*
dyn_array_add_block(
/*================*/
/* out: created block */
dyn_array_t* arr) /* in: dyn array */
{
mem_heap_t* heap;
dyn_block_t* block;
ut_ad(arr);
ut_ad(arr->magic_n == DYN_BLOCK_MAGIC_N);
if (arr->heap == NULL) {
UT_LIST_INIT(arr->base);
UT_LIST_ADD_FIRST(list, arr->base, arr);
arr->heap = mem_heap_create(sizeof(dyn_block_t));
}
block = dyn_array_get_last_block(arr);
block->used = block->used | DYN_BLOCK_FULL_FLAG;
heap = arr->heap;
block = mem_heap_alloc(heap, sizeof(dyn_block_t));
block->used = 0;
UT_LIST_ADD_LAST(list, arr->base, block);
return(block);
}

24
eval/Makefile.am Normal file
View file

@ -0,0 +1,24 @@
# Copyright (C) 2000 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
# & Innobase Oy
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
include ../include/Makefile.i
noinst_LIBRARIES = libeval.a
libeval_a_SOURCES = eval0eval.c eval0proc.c
EXTRA_PROGRAMS =

835
eval/eval0eval.c Normal file
View file

@ -0,0 +1,835 @@
/******************************************************
SQL evaluator: evaluates simple data structures, like expressions, in
a query graph
(c) 1997 Innobase Oy
Created 12/29/1997 Heikki Tuuri
*******************************************************/
#include "eval0eval.h"
#ifdef UNIV_NONINL
#include "eval0eval.ic"
#endif
#include "data0data.h"
#include "row0sel.h"
/* The RND function seed */
ulint eval_rnd = 128367121;
/* Dummy adress used when we should allocate a buffer of size 0 in
the function below */
byte eval_dummy;
/*********************************************************************
Allocate a buffer from global dynamic memory for a value of a que_node.
NOTE that this memory must be explicitly freed when the query graph is
freed. If the node already has an allocated buffer, that buffer is freed
here. NOTE that this is the only function where dynamic memory should be
allocated for a query node val field. */
byte*
eval_node_alloc_val_buf(
/*====================*/
/* out: pointer to allocated buffer */
que_node_t* node, /* in: query graph node; sets the val field
data field to point to the new buffer, and
len field equal to size */
ulint size) /* in: buffer size */
{
dfield_t* dfield;
byte* data;
ut_ad(que_node_get_type(node) == QUE_NODE_SYMBOL
|| que_node_get_type(node) == QUE_NODE_FUNC);
dfield = que_node_get_val(node);
data = dfield_get_data(dfield);
if (data && data != &eval_dummy) {
mem_free(data);
}
if (size == 0) {
data = &eval_dummy;
} else {
data = mem_alloc(size);
}
que_node_set_val_buf_size(node, size);
dfield_set_data(dfield, data, size);
return(data);
}
/*********************************************************************
Free the buffer from global dynamic memory for a value of a que_node,
if it has been allocated in the above function. The freeing for pushed
column values is done in sel_col_prefetch_buf_free. */
void
eval_node_free_val_buf(
/*===================*/
que_node_t* node) /* in: query graph node */
{
dfield_t* dfield;
byte* data;
ut_ad(que_node_get_type(node) == QUE_NODE_SYMBOL
|| que_node_get_type(node) == QUE_NODE_FUNC);
dfield = que_node_get_val(node);
data = dfield_get_data(dfield);
if (que_node_get_val_buf_size(node) > 0) {
ut_a(data);
mem_free(data);
}
}
/*********************************************************************
Evaluates a comparison node. */
ibool
eval_cmp(
/*=====*/
/* out: the result of the comparison */
func_node_t* cmp_node) /* in: comparison node */
{
que_node_t* arg1;
que_node_t* arg2;
int res;
ibool val;
int func;
ut_ad(que_node_get_type(cmp_node) == QUE_NODE_FUNC);
arg1 = cmp_node->args;
arg2 = que_node_get_next(arg1);
res = cmp_dfield_dfield(que_node_get_val(arg1),
que_node_get_val(arg2));
val = TRUE;
func = cmp_node->func;
if (func == '=') {
if (res != 0) {
val = FALSE;
}
} else if (func == '<') {
if (res != -1) {
val = FALSE;
}
} else if (func == PARS_LE_TOKEN) {
if (res == 1) {
val = FALSE;
}
} else if (func == PARS_NE_TOKEN) {
if (res == 0) {
val = FALSE;
}
} else if (func == PARS_GE_TOKEN) {
if (res == -1) {
val = FALSE;
}
} else {
ut_ad(func == '>');
if (res != 1) {
val = FALSE;
}
}
eval_node_set_ibool_val(cmp_node, val);
return(val);
}
/*********************************************************************
Evaluates a logical operation node. */
UNIV_INLINE
void
eval_logical(
/*=========*/
func_node_t* logical_node) /* in: logical operation node */
{
que_node_t* arg1;
que_node_t* arg2;
ibool val1;
ibool val2 = 0; /* remove warning */
ibool val = 0; /* remove warning */
int func;
ut_ad(que_node_get_type(logical_node) == QUE_NODE_FUNC);
arg1 = logical_node->args;
arg2 = que_node_get_next(arg1); /* arg2 is NULL if func is 'NOT' */
val1 = eval_node_get_ibool_val(arg1);
if (arg2) {
val2 = eval_node_get_ibool_val(arg2);
}
func = logical_node->func;
if (func == PARS_AND_TOKEN) {
val = val1 & val2;
} else if (func == PARS_OR_TOKEN) {
val = val1 | val2;
} else if (func == PARS_NOT_TOKEN) {
val = TRUE - val1;
} else {
ut_error;
}
eval_node_set_ibool_val(logical_node, val);
}
/*********************************************************************
Evaluates an arithmetic operation node. */
UNIV_INLINE
void
eval_arith(
/*=======*/
func_node_t* arith_node) /* in: arithmetic operation node */
{
que_node_t* arg1;
que_node_t* arg2;
lint val1;
lint val2 = 0; /* remove warning */
lint val;
int func;
ut_ad(que_node_get_type(arith_node) == QUE_NODE_FUNC);
arg1 = arith_node->args;
arg2 = que_node_get_next(arg1); /* arg2 is NULL if func is unary '-' */
val1 = eval_node_get_int_val(arg1);
if (arg2) {
val2 = eval_node_get_int_val(arg2);
}
func = arith_node->func;
if (func == '+') {
val = val1 + val2;
} else if ((func == '-') && arg2) {
val = val1 - val2;
} else if (func == '-') {
val = -val1;
} else if (func == '*') {
val = val1 * val2;
} else {
ut_ad(func == '/');
val = val1 / val2;
}
eval_node_set_int_val(arith_node, val);
}
/*********************************************************************
Evaluates an aggregate operation node. */
UNIV_INLINE
void
eval_aggregate(
/*===========*/
func_node_t* node) /* in: aggregate operation node */
{
que_node_t* arg;
lint val;
lint arg_val;
int func;
ut_ad(que_node_get_type(node) == QUE_NODE_FUNC);
val = eval_node_get_int_val(node);
func = node->func;
if (func == PARS_COUNT_TOKEN) {
val = val + 1;
} else {
ut_ad(func == PARS_SUM_TOKEN);
arg = node->args;
arg_val = eval_node_get_int_val(arg);
val = val + arg_val;
}
eval_node_set_int_val(node, val);
}
/*********************************************************************
Evaluates a predefined function node where the function is not relevant
in benchmarks. */
static
void
eval_predefined_2(
/*==============*/
func_node_t* func_node) /* in: predefined function node */
{
que_node_t* arg;
que_node_t* arg1;
que_node_t* arg2 = 0; /* remove warning (??? bug ???) */
lint int_val;
byte* data;
ulint len1;
ulint len2;
int func;
ulint i;
ut_ad(que_node_get_type(func_node) == QUE_NODE_FUNC);
arg1 = func_node->args;
if (arg1) {
arg2 = que_node_get_next(arg1);
}
func = func_node->func;
if (func == PARS_PRINTF_TOKEN) {
arg = arg1;
while (arg) {
dfield_print(que_node_get_val(arg));
arg = que_node_get_next(arg);
}
putc('\n', stderr);
} else if (func == PARS_ASSERT_TOKEN) {
if (!eval_node_get_ibool_val(arg1)) {
fputs("SQL assertion fails in a stored procedure!\n",
stderr);
}
ut_a(eval_node_get_ibool_val(arg1));
/* This function, or more precisely, a debug procedure,
returns no value */
} else if (func == PARS_RND_TOKEN) {
len1 = (ulint)eval_node_get_int_val(arg1);
len2 = (ulint)eval_node_get_int_val(arg2);
ut_ad(len2 >= len1);
if (len2 > len1) {
int_val = (lint)(len1 +
(eval_rnd % (len2 - len1 + 1)));
} else {
int_val = (lint)len1;
}
eval_rnd = ut_rnd_gen_next_ulint(eval_rnd);
eval_node_set_int_val(func_node, int_val);
} else if (func == PARS_RND_STR_TOKEN) {
len1 = (ulint)eval_node_get_int_val(arg1);
data = eval_node_ensure_val_buf(func_node, len1);
for (i = 0; i < len1; i++) {
data[i] = (byte)(97 + (eval_rnd % 3));
eval_rnd = ut_rnd_gen_next_ulint(eval_rnd);
}
} else {
ut_error;
}
}
/*********************************************************************
Evaluates a notfound-function node. */
UNIV_INLINE
void
eval_notfound(
/*==========*/
func_node_t* func_node) /* in: function node */
{
que_node_t* arg1;
que_node_t* arg2;
sym_node_t* cursor;
sel_node_t* sel_node;
ibool ibool_val;
arg1 = func_node->args;
arg2 = que_node_get_next(arg1);
ut_ad(func_node->func == PARS_NOTFOUND_TOKEN);
cursor = arg1;
ut_ad(que_node_get_type(cursor) == QUE_NODE_SYMBOL);
if (cursor->token_type == SYM_LIT) {
ut_ad(ut_memcmp(dfield_get_data(que_node_get_val(cursor)),
"SQL", 3) == 0);
sel_node = cursor->sym_table->query_graph->last_sel_node;
} else {
sel_node = cursor->alias->cursor_def;
}
if (sel_node->state == SEL_NODE_NO_MORE_ROWS) {
ibool_val = TRUE;
} else {
ibool_val = FALSE;
}
eval_node_set_ibool_val(func_node, ibool_val);
}
/*********************************************************************
Evaluates a substr-function node. */
UNIV_INLINE
void
eval_substr(
/*========*/
func_node_t* func_node) /* in: function node */
{
que_node_t* arg1;
que_node_t* arg2;
que_node_t* arg3;
dfield_t* dfield;
byte* str1;
ulint len1;
ulint len2;
arg1 = func_node->args;
arg2 = que_node_get_next(arg1);
ut_ad(func_node->func == PARS_SUBSTR_TOKEN);
arg3 = que_node_get_next(arg2);
str1 = dfield_get_data(que_node_get_val(arg1));
len1 = (ulint)eval_node_get_int_val(arg2);
len2 = (ulint)eval_node_get_int_val(arg3);
dfield = que_node_get_val(func_node);
dfield_set_data(dfield, str1 + len1, len2);
}
/*********************************************************************
Evaluates a replstr-procedure node. */
static
void
eval_replstr(
/*=========*/
func_node_t* func_node) /* in: function node */
{
que_node_t* arg1;
que_node_t* arg2;
que_node_t* arg3;
que_node_t* arg4;
byte* str1;
byte* str2;
ulint len1;
ulint len2;
arg1 = func_node->args;
arg2 = que_node_get_next(arg1);
ut_ad(que_node_get_type(arg1) == QUE_NODE_SYMBOL);
arg3 = que_node_get_next(arg2);
arg4 = que_node_get_next(arg3);
str1 = dfield_get_data(que_node_get_val(arg1));
str2 = dfield_get_data(que_node_get_val(arg2));
len1 = (ulint)eval_node_get_int_val(arg3);
len2 = (ulint)eval_node_get_int_val(arg4);
if ((dfield_get_len(que_node_get_val(arg1)) < len1 + len2)
|| (dfield_get_len(que_node_get_val(arg2)) < len2)) {
ut_error;
}
ut_memcpy(str1 + len1, str2, len2);
}
/*********************************************************************
Evaluates an instr-function node. */
static
void
eval_instr(
/*=======*/
func_node_t* func_node) /* in: function node */
{
que_node_t* arg1;
que_node_t* arg2;
dfield_t* dfield1;
dfield_t* dfield2;
lint int_val;
byte* str1;
byte* str2;
byte match_char;
ulint len1;
ulint len2;
ulint i;
ulint j;
arg1 = func_node->args;
arg2 = que_node_get_next(arg1);
dfield1 = que_node_get_val(arg1);
dfield2 = que_node_get_val(arg2);
str1 = dfield_get_data(dfield1);
str2 = dfield_get_data(dfield2);
len1 = dfield_get_len(dfield1);
len2 = dfield_get_len(dfield2);
if (len2 == 0) {
ut_error;
}
match_char = str2[0];
for (i = 0; i < len1; i++) {
/* In this outer loop, the number of matched characters is 0 */
if (str1[i] == match_char) {
if (i + len2 > len1) {
break;
}
for (j = 1;; j++) {
/* We have already matched j characters */
if (j == len2) {
int_val = i + 1;
goto match_found;
}
if (str1[i + j] != str2[j]) {
break;
}
}
}
}
int_val = 0;
match_found:
eval_node_set_int_val(func_node, int_val);
}
/*********************************************************************
Evaluates a predefined function node. */
UNIV_INLINE
void
eval_binary_to_number(
/*==================*/
func_node_t* func_node) /* in: function node */
{
que_node_t* arg1;
dfield_t* dfield;
byte* str1;
byte* str2;
ulint len1;
ulint int_val;
arg1 = func_node->args;
dfield = que_node_get_val(arg1);
str1 = dfield_get_data(dfield);
len1 = dfield_get_len(dfield);
if (len1 > 4) {
ut_error;
}
if (len1 == 4) {
str2 = str1;
} else {
int_val = 0;
str2 = (byte*)&int_val;
ut_memcpy(str2 + (4 - len1), str1, len1);
}
eval_node_copy_and_alloc_val(func_node, str2, 4);
}
/*********************************************************************
Evaluates a predefined function node. */
static
void
eval_concat(
/*========*/
func_node_t* func_node) /* in: function node */
{
que_node_t* arg;
dfield_t* dfield;
byte* data;
ulint len;
ulint len1;
arg = func_node->args;
len = 0;
while (arg) {
len1 = dfield_get_len(que_node_get_val(arg));
len += len1;
arg = que_node_get_next(arg);
}
data = eval_node_ensure_val_buf(func_node, len);
arg = func_node->args;
len = 0;
while (arg) {
dfield = que_node_get_val(arg);
len1 = dfield_get_len(dfield);
ut_memcpy(data + len, dfield_get_data(dfield), len1);
len += len1;
arg = que_node_get_next(arg);
}
}
/*********************************************************************
Evaluates a predefined function node. If the first argument is an integer,
this function looks at the second argument which is the integer length in
bytes, and converts the integer to a VARCHAR.
If the first argument is of some other type, this function converts it to
BINARY. */
UNIV_INLINE
void
eval_to_binary(
/*===========*/
func_node_t* func_node) /* in: function node */
{
que_node_t* arg1;
que_node_t* arg2;
dfield_t* dfield;
byte* str1;
ulint len;
ulint len1;
arg1 = func_node->args;
str1 = dfield_get_data(que_node_get_val(arg1));
if (dtype_get_mtype(que_node_get_data_type(arg1)) != DATA_INT) {
len = dfield_get_len(que_node_get_val(arg1));
dfield = que_node_get_val(func_node);
dfield_set_data(dfield, str1, len);
return;
}
arg2 = que_node_get_next(arg1);
len1 = (ulint)eval_node_get_int_val(arg2);
if (len1 > 4) {
ut_error;
}
dfield = que_node_get_val(func_node);
dfield_set_data(dfield, str1 + (4 - len1), len1);
}
/*********************************************************************
Evaluates a predefined function node. */
UNIV_INLINE
void
eval_predefined(
/*============*/
func_node_t* func_node) /* in: function node */
{
que_node_t* arg1;
lint int_val;
byte* data;
int func;
func = func_node->func;
arg1 = func_node->args;
if (func == PARS_LENGTH_TOKEN) {
int_val = (lint)dfield_get_len(que_node_get_val(arg1));
} else if (func == PARS_TO_CHAR_TOKEN) {
/* Convert number to character string as a
signed decimal integer. */
ulint uint_val;
int int_len;
int_val = eval_node_get_int_val(arg1);
/* Determine the length of the string. */
if (int_val == 0) {
int_len = 1; /* the number 0 occupies 1 byte */
} else {
int_len = 0;
if (int_val < 0) {
uint_val = ((ulint) -int_val - 1) + 1;
int_len++; /* reserve space for minus sign */
} else {
uint_val = (ulint) int_val;
}
for (; uint_val > 0; int_len++) {
uint_val /= 10;
}
}
/* allocate the string */
data = eval_node_ensure_val_buf(func_node, int_len + 1);
/* add terminating NUL character */
data[int_len] = 0;
/* convert the number */
if (int_val == 0) {
data[0] = '0';
} else {
int tmp;
if (int_val < 0) {
data[0] = '-'; /* preceding minus sign */
uint_val = ((ulint) -int_val - 1) + 1;
} else {
uint_val = (ulint) int_val;
}
for (tmp = int_len; uint_val > 0; uint_val /= 10) {
data[--tmp] = (byte) ('0' + (byte)(uint_val % 10));
}
}
dfield_set_len((dfield_t*) que_node_get_val(func_node),
int_len);
return;
} else if (func == PARS_TO_NUMBER_TOKEN) {
int_val = atoi((char*)
dfield_get_data(que_node_get_val(arg1)));
} else if (func == PARS_SYSDATE_TOKEN) {
int_val = (lint)ut_time();
} else {
eval_predefined_2(func_node);
return;
}
eval_node_set_int_val(func_node, int_val);
}
/*********************************************************************
Evaluates a function node. */
void
eval_func(
/*======*/
func_node_t* func_node) /* in: function node */
{
que_node_t* arg;
ulint class;
ulint func;
ut_ad(que_node_get_type(func_node) == QUE_NODE_FUNC);
class = func_node->class;
func = func_node->func;
arg = func_node->args;
/* Evaluate first the argument list */
while (arg) {
eval_exp(arg);
/* The functions are not defined for SQL null argument
values, except for eval_cmp and notfound */
if ((dfield_get_len(que_node_get_val(arg)) == UNIV_SQL_NULL)
&& (class != PARS_FUNC_CMP)
&& (func != PARS_NOTFOUND_TOKEN)
&& (func != PARS_PRINTF_TOKEN)) {
ut_error;
}
arg = que_node_get_next(arg);
}
if (class == PARS_FUNC_CMP) {
eval_cmp(func_node);
} else if (class == PARS_FUNC_ARITH) {
eval_arith(func_node);
} else if (class == PARS_FUNC_AGGREGATE) {
eval_aggregate(func_node);
} else if (class == PARS_FUNC_PREDEFINED) {
if (func == PARS_NOTFOUND_TOKEN) {
eval_notfound(func_node);
} else if (func == PARS_SUBSTR_TOKEN) {
eval_substr(func_node);
} else if (func == PARS_REPLSTR_TOKEN) {
eval_replstr(func_node);
} else if (func == PARS_INSTR_TOKEN) {
eval_instr(func_node);
} else if (func == PARS_BINARY_TO_NUMBER_TOKEN) {
eval_binary_to_number(func_node);
} else if (func == PARS_CONCAT_TOKEN) {
eval_concat(func_node);
} else if (func == PARS_TO_BINARY_TOKEN) {
eval_to_binary(func_node);
} else {
eval_predefined(func_node);
}
} else {
ut_ad(class == PARS_FUNC_LOGICAL);
eval_logical(func_node);
}
}

245
eval/eval0proc.c Normal file
View file

@ -0,0 +1,245 @@
/******************************************************
Executes SQL stored procedures and their control structures
(c) 1998 Innobase Oy
Created 1/20/1998 Heikki Tuuri
*******************************************************/
#include "eval0proc.h"
#ifdef UNIV_NONINL
#include "eval0proc.ic"
#endif
/**************************************************************************
Performs an execution step of an if-statement node. */
que_thr_t*
if_step(
/*====*/
/* out: query thread to run next or NULL */
que_thr_t* thr) /* in: query thread */
{
if_node_t* node;
elsif_node_t* elsif_node;
ut_ad(thr);
node = thr->run_node;
ut_ad(que_node_get_type(node) == QUE_NODE_IF);
if (thr->prev_node == que_node_get_parent(node)) {
/* Evaluate the condition */
eval_exp(node->cond);
if (eval_node_get_ibool_val(node->cond)) {
/* The condition evaluated to TRUE: start execution
from the first statement in the statement list */
thr->run_node = node->stat_list;
} else if (node->else_part) {
thr->run_node = node->else_part;
} else if (node->elsif_list) {
elsif_node = node->elsif_list;
for (;;) {
eval_exp(elsif_node->cond);
if (eval_node_get_ibool_val(elsif_node->cond)) {
/* The condition evaluated to TRUE:
start execution from the first
statement in the statement list */
thr->run_node = elsif_node->stat_list;
break;
}
elsif_node = que_node_get_next(elsif_node);
if (elsif_node == NULL) {
thr->run_node = NULL;
break;
}
}
} else {
thr->run_node = NULL;
}
} else {
/* Move to the next statement */
ut_ad(que_node_get_next(thr->prev_node) == NULL);
thr->run_node = NULL;
}
if (thr->run_node == NULL) {
thr->run_node = que_node_get_parent(node);
}
return(thr);
}
/**************************************************************************
Performs an execution step of a while-statement node. */
que_thr_t*
while_step(
/*=======*/
/* out: query thread to run next or NULL */
que_thr_t* thr) /* in: query thread */
{
while_node_t* node;
ut_ad(thr);
node = thr->run_node;
ut_ad(que_node_get_type(node) == QUE_NODE_WHILE);
ut_ad((thr->prev_node == que_node_get_parent(node))
|| (que_node_get_next(thr->prev_node) == NULL));
/* Evaluate the condition */
eval_exp(node->cond);
if (eval_node_get_ibool_val(node->cond)) {
/* The condition evaluated to TRUE: start execution
from the first statement in the statement list */
thr->run_node = node->stat_list;
} else {
thr->run_node = que_node_get_parent(node);
}
return(thr);
}
/**************************************************************************
Performs an execution step of an assignment statement node. */
que_thr_t*
assign_step(
/*========*/
/* out: query thread to run next or NULL */
que_thr_t* thr) /* in: query thread */
{
assign_node_t* node;
ut_ad(thr);
node = thr->run_node;
ut_ad(que_node_get_type(node) == QUE_NODE_ASSIGNMENT);
/* Evaluate the value to assign */
eval_exp(node->val);
eval_node_copy_val(node->var->alias, node->val);
thr->run_node = que_node_get_parent(node);
return(thr);
}
/**************************************************************************
Performs an execution step of a for-loop node. */
que_thr_t*
for_step(
/*=====*/
/* out: query thread to run next or NULL */
que_thr_t* thr) /* in: query thread */
{
for_node_t* node;
que_node_t* parent;
lint loop_var_value;
ut_ad(thr);
node = thr->run_node;
ut_ad(que_node_get_type(node) == QUE_NODE_FOR);
parent = que_node_get_parent(node);
if (thr->prev_node != parent) {
/* Move to the next statement */
thr->run_node = que_node_get_next(thr->prev_node);
if (thr->run_node != NULL) {
return(thr);
}
/* Increment the value of loop_var */
loop_var_value = 1 + eval_node_get_int_val(node->loop_var);
} else {
/* Initialize the loop */
eval_exp(node->loop_start_limit);
eval_exp(node->loop_end_limit);
loop_var_value = eval_node_get_int_val(node->loop_start_limit);
node->loop_end_value = eval_node_get_int_val(
node->loop_end_limit);
}
/* Check if we should do another loop */
if (loop_var_value > node->loop_end_value) {
/* Enough loops done */
thr->run_node = parent;
} else {
eval_node_set_int_val(node->loop_var, loop_var_value);
thr->run_node = node->stat_list;
}
return(thr);
}
/**************************************************************************
Performs an execution step of a return-statement node. */
que_thr_t*
return_step(
/*========*/
/* out: query thread to run next or NULL */
que_thr_t* thr) /* in: query thread */
{
return_node_t* node;
que_node_t* parent;
ut_ad(thr);
node = thr->run_node;
ut_ad(que_node_get_type(node) == QUE_NODE_RETURN);
parent = node;
while (que_node_get_type(parent) != QUE_NODE_PROC) {
parent = que_node_get_parent(parent);
}
ut_a(parent);
thr->run_node = que_node_get_parent(parent);
return(thr);
}

23
export.sh Executable file
View file

@ -0,0 +1,23 @@
#!/bin/bash
#
# export current working directory in a format suitable for sending to
# MySQL as a snapshot.
rm -rf to-mysql
svn export . to-mysql
cd to-mysql
mkdir innobase
mv * innobase
mkdir -p sql mysql-test/t mysql-test/r mysql-test/include
cd innobase
mv handler/* ../sql
rmdir handler
mv mysql-test/*.test mysql-test/*.opt ../mysql-test/t
mv mysql-test/*.result ../mysql-test/r
mv mysql-test/*.inc ../mysql-test/include
rmdir mysql-test
rm setup.sh export.sh

24
fil/Makefile.am Normal file
View file

@ -0,0 +1,24 @@
# Copyright (C) 2000 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
# & Innobase Oy
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
include ../include/Makefile.i
noinst_LIBRARIES = libfil.a
libfil_a_SOURCES = fil0fil.c
EXTRA_PROGRAMS =

4337
fil/fil0fil.c Normal file

File diff suppressed because it is too large Load diff

25
fsp/Makefile.am Normal file
View file

@ -0,0 +1,25 @@
# Copyright (C) 2000 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
# & Innobase Oy
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
include ../include/Makefile.i
noinst_LIBRARIES = libfsp.a
libfsp_a_SOURCES = fsp0fsp.c
EXTRA_PROGRAMS =

3945
fsp/fsp0fsp.c Normal file

File diff suppressed because it is too large Load diff

24
fut/Makefile.am Normal file
View file

@ -0,0 +1,24 @@
# Copyright (C) 2000 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
# & Innobase Oy
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
include ../include/Makefile.i
noinst_LIBRARIES = libfut.a
libfut_a_SOURCES = fut0fut.c fut0lst.c
EXTRA_PROGRAMS =

14
fut/fut0fut.c Normal file
View file

@ -0,0 +1,14 @@
/**********************************************************************
File-based utilities
(c) 1995 Innobase Oy
Created 12/13/1995 Heikki Tuuri
***********************************************************************/
#include "fut0fut.h"
#ifdef UNIV_NONINL
#include "fut0fut.ic"
#endif

518
fut/fut0lst.c Normal file
View file

@ -0,0 +1,518 @@
/**********************************************************************
File-based list utilities
(c) 1995 Innobase Oy
Created 11/28/1995 Heikki Tuuri
***********************************************************************/
#include "fut0lst.h"
#ifdef UNIV_NONINL
#include "fut0lst.ic"
#endif
#include "buf0buf.h"
/************************************************************************
Adds a node to an empty list. */
static
void
flst_add_to_empty(
/*==============*/
flst_base_node_t* base, /* in: pointer to base node of
empty list */
flst_node_t* node, /* in: node to add */
mtr_t* mtr) /* in: mini-transaction handle */
{
ulint space;
fil_addr_t node_addr;
ulint len;
ut_ad(mtr && base && node);
ut_ad(base != node);
ut_ad(mtr_memo_contains(mtr, buf_block_align(base),
MTR_MEMO_PAGE_X_FIX));
ut_ad(mtr_memo_contains(mtr, buf_block_align(node),
MTR_MEMO_PAGE_X_FIX));
len = flst_get_len(base, mtr);
ut_a(len == 0);
buf_ptr_get_fsp_addr(node, &space, &node_addr);
/* Update first and last fields of base node */
flst_write_addr(base + FLST_FIRST, node_addr, mtr);
flst_write_addr(base + FLST_LAST, node_addr, mtr);
/* Set prev and next fields of node to add */
flst_write_addr(node + FLST_PREV, fil_addr_null, mtr);
flst_write_addr(node + FLST_NEXT, fil_addr_null, mtr);
/* Update len of base node */
mlog_write_ulint(base + FLST_LEN, len + 1, MLOG_4BYTES, mtr);
}
/************************************************************************
Adds a node as the last node in a list. */
void
flst_add_last(
/*==========*/
flst_base_node_t* base, /* in: pointer to base node of list */
flst_node_t* node, /* in: node to add */
mtr_t* mtr) /* in: mini-transaction handle */
{
ulint space;
fil_addr_t node_addr;
ulint len;
fil_addr_t last_addr;
flst_node_t* last_node;
ut_ad(mtr && base && node);
ut_ad(base != node);
ut_ad(mtr_memo_contains(mtr, buf_block_align(base),
MTR_MEMO_PAGE_X_FIX));
ut_ad(mtr_memo_contains(mtr, buf_block_align(node),
MTR_MEMO_PAGE_X_FIX));
len = flst_get_len(base, mtr);
last_addr = flst_get_last(base, mtr);
buf_ptr_get_fsp_addr(node, &space, &node_addr);
/* If the list is not empty, call flst_insert_after */
if (len != 0) {
if (last_addr.page == node_addr.page) {
last_node = buf_frame_align(node) + last_addr.boffset;
} else {
last_node = fut_get_ptr(space, last_addr, RW_X_LATCH,
mtr);
}
flst_insert_after(base, last_node, node, mtr);
} else {
/* else call flst_add_to_empty */
flst_add_to_empty(base, node, mtr);
}
}
/************************************************************************
Adds a node as the first node in a list. */
void
flst_add_first(
/*===========*/
flst_base_node_t* base, /* in: pointer to base node of list */
flst_node_t* node, /* in: node to add */
mtr_t* mtr) /* in: mini-transaction handle */
{
ulint space;
fil_addr_t node_addr;
ulint len;
fil_addr_t first_addr;
flst_node_t* first_node;
ut_ad(mtr && base && node);
ut_ad(base != node);
ut_ad(mtr_memo_contains(mtr, buf_block_align(base),
MTR_MEMO_PAGE_X_FIX));
ut_ad(mtr_memo_contains(mtr, buf_block_align(node),
MTR_MEMO_PAGE_X_FIX));
len = flst_get_len(base, mtr);
first_addr = flst_get_first(base, mtr);
buf_ptr_get_fsp_addr(node, &space, &node_addr);
/* If the list is not empty, call flst_insert_before */
if (len != 0) {
if (first_addr.page == node_addr.page) {
first_node = buf_frame_align(node)
+ first_addr.boffset;
} else {
first_node = fut_get_ptr(space, first_addr,
RW_X_LATCH, mtr);
}
flst_insert_before(base, node, first_node, mtr);
} else {
/* else call flst_add_to_empty */
flst_add_to_empty(base, node, mtr);
}
}
/************************************************************************
Inserts a node after another in a list. */
void
flst_insert_after(
/*==============*/
flst_base_node_t* base, /* in: pointer to base node of list */
flst_node_t* node1, /* in: node to insert after */
flst_node_t* node2, /* in: node to add */
mtr_t* mtr) /* in: mini-transaction handle */
{
ulint space;
fil_addr_t node1_addr;
fil_addr_t node2_addr;
flst_node_t* node3;
fil_addr_t node3_addr;
ulint len;
ut_ad(mtr && node1 && node2 && base);
ut_ad(base != node1);
ut_ad(base != node2);
ut_ad(node2 != node1);
ut_ad(mtr_memo_contains(mtr, buf_block_align(base),
MTR_MEMO_PAGE_X_FIX));
ut_ad(mtr_memo_contains(mtr, buf_block_align(node1),
MTR_MEMO_PAGE_X_FIX));
ut_ad(mtr_memo_contains(mtr, buf_block_align(node2),
MTR_MEMO_PAGE_X_FIX));
buf_ptr_get_fsp_addr(node1, &space, &node1_addr);
buf_ptr_get_fsp_addr(node2, &space, &node2_addr);
node3_addr = flst_get_next_addr(node1, mtr);
/* Set prev and next fields of node2 */
flst_write_addr(node2 + FLST_PREV, node1_addr, mtr);
flst_write_addr(node2 + FLST_NEXT, node3_addr, mtr);
if (!fil_addr_is_null(node3_addr)) {
/* Update prev field of node3 */
node3 = fut_get_ptr(space, node3_addr, RW_X_LATCH, mtr);
flst_write_addr(node3 + FLST_PREV, node2_addr, mtr);
} else {
/* node1 was last in list: update last field in base */
flst_write_addr(base + FLST_LAST, node2_addr, mtr);
}
/* Set next field of node1 */
flst_write_addr(node1 + FLST_NEXT, node2_addr, mtr);
/* Update len of base node */
len = flst_get_len(base, mtr);
mlog_write_ulint(base + FLST_LEN, len + 1, MLOG_4BYTES, mtr);
}
/************************************************************************
Inserts a node before another in a list. */
void
flst_insert_before(
/*===============*/
flst_base_node_t* base, /* in: pointer to base node of list */
flst_node_t* node2, /* in: node to insert */
flst_node_t* node3, /* in: node to insert before */
mtr_t* mtr) /* in: mini-transaction handle */
{
ulint space;
flst_node_t* node1;
fil_addr_t node1_addr;
fil_addr_t node2_addr;
fil_addr_t node3_addr;
ulint len;
ut_ad(mtr && node2 && node3 && base);
ut_ad(base != node2);
ut_ad(base != node3);
ut_ad(node2 != node3);
ut_ad(mtr_memo_contains(mtr, buf_block_align(base),
MTR_MEMO_PAGE_X_FIX));
ut_ad(mtr_memo_contains(mtr, buf_block_align(node2),
MTR_MEMO_PAGE_X_FIX));
ut_ad(mtr_memo_contains(mtr, buf_block_align(node3),
MTR_MEMO_PAGE_X_FIX));
buf_ptr_get_fsp_addr(node2, &space, &node2_addr);
buf_ptr_get_fsp_addr(node3, &space, &node3_addr);
node1_addr = flst_get_prev_addr(node3, mtr);
/* Set prev and next fields of node2 */
flst_write_addr(node2 + FLST_PREV, node1_addr, mtr);
flst_write_addr(node2 + FLST_NEXT, node3_addr, mtr);
if (!fil_addr_is_null(node1_addr)) {
/* Update next field of node1 */
node1 = fut_get_ptr(space, node1_addr, RW_X_LATCH, mtr);
flst_write_addr(node1 + FLST_NEXT, node2_addr, mtr);
} else {
/* node3 was first in list: update first field in base */
flst_write_addr(base + FLST_FIRST, node2_addr, mtr);
}
/* Set prev field of node3 */
flst_write_addr(node3 + FLST_PREV, node2_addr, mtr);
/* Update len of base node */
len = flst_get_len(base, mtr);
mlog_write_ulint(base + FLST_LEN, len + 1, MLOG_4BYTES, mtr);
}
/************************************************************************
Removes a node. */
void
flst_remove(
/*========*/
flst_base_node_t* base, /* in: pointer to base node of list */
flst_node_t* node2, /* in: node to remove */
mtr_t* mtr) /* in: mini-transaction handle */
{
ulint space;
flst_node_t* node1;
fil_addr_t node1_addr;
fil_addr_t node2_addr;
flst_node_t* node3;
fil_addr_t node3_addr;
ulint len;
ut_ad(mtr && node2 && base);
ut_ad(mtr_memo_contains(mtr, buf_block_align(base),
MTR_MEMO_PAGE_X_FIX));
ut_ad(mtr_memo_contains(mtr, buf_block_align(node2),
MTR_MEMO_PAGE_X_FIX));
buf_ptr_get_fsp_addr(node2, &space, &node2_addr);
node1_addr = flst_get_prev_addr(node2, mtr);
node3_addr = flst_get_next_addr(node2, mtr);
if (!fil_addr_is_null(node1_addr)) {
/* Update next field of node1 */
if (node1_addr.page == node2_addr.page) {
node1 = buf_frame_align(node2) + node1_addr.boffset;
} else {
node1 = fut_get_ptr(space, node1_addr, RW_X_LATCH,
mtr);
}
ut_ad(node1 != node2);
flst_write_addr(node1 + FLST_NEXT, node3_addr, mtr);
} else {
/* node2 was first in list: update first field in base */
flst_write_addr(base + FLST_FIRST, node3_addr, mtr);
}
if (!fil_addr_is_null(node3_addr)) {
/* Update prev field of node3 */
if (node3_addr.page == node2_addr.page) {
node3 = buf_frame_align(node2) + node3_addr.boffset;
} else {
node3 = fut_get_ptr(space, node3_addr, RW_X_LATCH,
mtr);
}
ut_ad(node2 != node3);
flst_write_addr(node3 + FLST_PREV, node1_addr, mtr);
} else {
/* node2 was last in list: update last field in base */
flst_write_addr(base + FLST_LAST, node1_addr, mtr);
}
/* Update len of base node */
len = flst_get_len(base, mtr);
ut_ad(len > 0);
mlog_write_ulint(base + FLST_LEN, len - 1, MLOG_4BYTES, mtr);
}
/************************************************************************
Cuts off the tail of the list, including the node given. The number of
nodes which will be removed must be provided by the caller, as this function
does not measure the length of the tail. */
void
flst_cut_end(
/*=========*/
flst_base_node_t* base, /* in: pointer to base node of list */
flst_node_t* node2, /* in: first node to remove */
ulint n_nodes,/* in: number of nodes to remove,
must be >= 1 */
mtr_t* mtr) /* in: mini-transaction handle */
{
ulint space;
flst_node_t* node1;
fil_addr_t node1_addr;
fil_addr_t node2_addr;
ulint len;
ut_ad(mtr && node2 && base);
ut_ad(mtr_memo_contains(mtr, buf_block_align(base),
MTR_MEMO_PAGE_X_FIX));
ut_ad(mtr_memo_contains(mtr, buf_block_align(node2),
MTR_MEMO_PAGE_X_FIX));
ut_ad(n_nodes > 0);
buf_ptr_get_fsp_addr(node2, &space, &node2_addr);
node1_addr = flst_get_prev_addr(node2, mtr);
if (!fil_addr_is_null(node1_addr)) {
/* Update next field of node1 */
if (node1_addr.page == node2_addr.page) {
node1 = buf_frame_align(node2) + node1_addr.boffset;
} else {
node1 = fut_get_ptr(space, node1_addr, RW_X_LATCH,
mtr);
}
flst_write_addr(node1 + FLST_NEXT, fil_addr_null, mtr);
} else {
/* node2 was first in list: update the field in base */
flst_write_addr(base + FLST_FIRST, fil_addr_null, mtr);
}
flst_write_addr(base + FLST_LAST, node1_addr, mtr);
/* Update len of base node */
len = flst_get_len(base, mtr);
ut_ad(len >= n_nodes);
mlog_write_ulint(base + FLST_LEN, len - n_nodes, MLOG_4BYTES, mtr);
}
/************************************************************************
Cuts off the tail of the list, not including the given node. The number of
nodes which will be removed must be provided by the caller, as this function
does not measure the length of the tail. */
void
flst_truncate_end(
/*==============*/
flst_base_node_t* base, /* in: pointer to base node of list */
flst_node_t* node2, /* in: first node not to remove */
ulint n_nodes,/* in: number of nodes to remove */
mtr_t* mtr) /* in: mini-transaction handle */
{
fil_addr_t node2_addr;
ulint len;
ulint space;
ut_ad(mtr && node2 && base);
ut_ad(mtr_memo_contains(mtr, buf_block_align(base),
MTR_MEMO_PAGE_X_FIX));
ut_ad(mtr_memo_contains(mtr, buf_block_align(node2),
MTR_MEMO_PAGE_X_FIX));
if (n_nodes == 0) {
ut_ad(fil_addr_is_null(flst_get_next_addr(node2, mtr)));
return;
}
buf_ptr_get_fsp_addr(node2, &space, &node2_addr);
/* Update next field of node2 */
flst_write_addr(node2 + FLST_NEXT, fil_addr_null, mtr);
flst_write_addr(base + FLST_LAST, node2_addr, mtr);
/* Update len of base node */
len = flst_get_len(base, mtr);
ut_ad(len >= n_nodes);
mlog_write_ulint(base + FLST_LEN, len - n_nodes, MLOG_4BYTES, mtr);
}
/************************************************************************
Validates a file-based list. */
ibool
flst_validate(
/*==========*/
/* out: TRUE if ok */
flst_base_node_t* base, /* in: pointer to base node of list */
mtr_t* mtr1) /* in: mtr */
{
ulint space;
flst_node_t* node;
fil_addr_t node_addr;
fil_addr_t base_addr;
ulint len;
ulint i;
mtr_t mtr2;
ut_ad(base);
ut_ad(mtr_memo_contains(mtr1, buf_block_align(base),
MTR_MEMO_PAGE_X_FIX));
/* We use two mini-transaction handles: the first is used to
lock the base node, and prevent other threads from modifying the
list. The second is used to traverse the list. We cannot run the
second mtr without committing it at times, because if the list
is long, then the x-locked pages could fill the buffer resulting
in a deadlock. */
/* Find out the space id */
buf_ptr_get_fsp_addr(base, &space, &base_addr);
len = flst_get_len(base, mtr1);
node_addr = flst_get_first(base, mtr1);
for (i = 0; i < len; i++) {
mtr_start(&mtr2);
node = fut_get_ptr(space, node_addr, RW_X_LATCH, &mtr2);
node_addr = flst_get_next_addr(node, &mtr2);
mtr_commit(&mtr2); /* Commit mtr2 each round to prevent buffer
becoming full */
}
ut_a(fil_addr_is_null(node_addr));
node_addr = flst_get_last(base, mtr1);
for (i = 0; i < len; i++) {
mtr_start(&mtr2);
node = fut_get_ptr(space, node_addr, RW_X_LATCH, &mtr2);
node_addr = flst_get_prev_addr(node, &mtr2);
mtr_commit(&mtr2); /* Commit mtr2 each round to prevent buffer
becoming full */
}
ut_a(fil_addr_is_null(node_addr));
return(TRUE);
}
/************************************************************************
Prints info of a file-based list. */
void
flst_print(
/*=======*/
flst_base_node_t* base, /* in: pointer to base node of list */
mtr_t* mtr) /* in: mtr */
{
buf_frame_t* frame;
ulint len;
ut_ad(base && mtr);
ut_ad(mtr_memo_contains(mtr, buf_block_align(base),
MTR_MEMO_PAGE_X_FIX));
frame = buf_frame_align(base);
len = flst_get_len(base, mtr);
fprintf(stderr,
"FILE-BASED LIST:\n"
"Base node in space %lu page %lu byte offset %lu; len %lu\n",
(ulong) buf_frame_get_space_id(frame),
(ulong) buf_frame_get_page_no(frame),
(ulong) (base - frame), (ulong) len);
}

24
ha/Makefile.am Normal file
View file

@ -0,0 +1,24 @@
# Copyright (C) 2000 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
# & Innobase Oy
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
include ../include/Makefile.i
noinst_LIBRARIES = libha.a
libha_a_SOURCES = ha0ha.c hash0hash.c
EXTRA_PROGRAMS =

357
ha/ha0ha.c Normal file
View file

@ -0,0 +1,357 @@
/************************************************************************
The hash table with external chains
(c) 1994-1997 Innobase Oy
Created 8/22/1994 Heikki Tuuri
*************************************************************************/
#include "ha0ha.h"
#ifdef UNIV_NONINL
#include "ha0ha.ic"
#endif
#include "buf0buf.h"
/*****************************************************************
Creates a hash table with >= n array cells. The actual number of cells is
chosen to be a prime number slightly bigger than n. */
hash_table_t*
ha_create(
/*======*/
/* out, own: created table */
ibool in_btr_search, /* in: TRUE if the hash table is used in
the btr_search module */
ulint n, /* in: number of array cells */
ulint n_mutexes, /* in: number of mutexes to protect the
hash table: must be a power of 2, or 0 */
ulint mutex_level) /* in: level of the mutexes in the latching
order: this is used in the debug version */
{
hash_table_t* table;
ulint i;
table = hash_create(n);
if (in_btr_search) {
table->adaptive = TRUE;
} else {
table->adaptive = FALSE;
}
if (n_mutexes == 0) {
if (in_btr_search) {
table->heap = mem_heap_create_in_btr_search(4096);
} else {
table->heap = mem_heap_create_in_buffer(4096);
}
return(table);
}
hash_create_mutexes(table, n_mutexes, mutex_level);
table->heaps = mem_alloc(n_mutexes * sizeof(void*));
for (i = 0; i < n_mutexes; i++) {
if (in_btr_search) {
table->heaps[i] = mem_heap_create_in_btr_search(4096);
} else {
table->heaps[i] = mem_heap_create_in_buffer(4096);
}
}
return(table);
}
/*****************************************************************
Inserts an entry into a hash table. If an entry with the same fold number
is found, its node is updated to point to the new data, and no new node
is inserted. */
ibool
ha_insert_for_fold(
/*===============*/
/* out: TRUE if succeed, FALSE if no more
memory could be allocated */
hash_table_t* table, /* in: hash table */
ulint fold, /* in: folded value of data; if a node with
the same fold value already exists, it is
updated to point to the same data, and no new
node is created! */
void* data) /* in: data, must not be NULL */
{
hash_cell_t* cell;
ha_node_t* node;
ha_node_t* prev_node;
buf_block_t* prev_block;
ulint hash;
ut_ad(table && data);
#ifdef UNIV_SYNC_DEBUG
ut_ad(!table->mutexes || mutex_own(hash_get_mutex(table, fold)));
#endif /* UNIV_SYNC_DEBUG */
hash = hash_calc_hash(fold, table);
cell = hash_get_nth_cell(table, hash);
prev_node = cell->node;
while (prev_node != NULL) {
if (prev_node->fold == fold) {
if (table->adaptive) {
prev_block = buf_block_align(prev_node->data);
ut_a(prev_block->n_pointers > 0);
prev_block->n_pointers--;
buf_block_align(data)->n_pointers++;
}
prev_node->data = data;
return(TRUE);
}
prev_node = prev_node->next;
}
/* We have to allocate a new chain node */
node = mem_heap_alloc(hash_get_heap(table, fold), sizeof(ha_node_t));
if (node == NULL) {
/* It was a btr search type memory heap and at the moment
no more memory could be allocated: return */
ut_ad(hash_get_heap(table, fold)->type & MEM_HEAP_BTR_SEARCH);
return(FALSE);
}
ha_node_set_data(node, data);
if (table->adaptive) {
buf_block_align(data)->n_pointers++;
}
node->fold = fold;
node->next = NULL;
prev_node = cell->node;
if (prev_node == NULL) {
cell->node = node;
return(TRUE);
}
while (prev_node->next != NULL) {
prev_node = prev_node->next;
}
prev_node->next = node;
return(TRUE);
}
/***************************************************************
Deletes a hash node. */
void
ha_delete_hash_node(
/*================*/
hash_table_t* table, /* in: hash table */
ha_node_t* del_node) /* in: node to be deleted */
{
if (table->adaptive) {
ut_a(buf_block_align(del_node->data)->n_pointers > 0);
buf_block_align(del_node->data)->n_pointers--;
}
HASH_DELETE_AND_COMPACT(ha_node_t, next, table, del_node);
}
/*****************************************************************
Deletes an entry from a hash table. */
void
ha_delete(
/*======*/
hash_table_t* table, /* in: hash table */
ulint fold, /* in: folded value of data */
void* data) /* in: data, must not be NULL and must exist
in the hash table */
{
ha_node_t* node;
#ifdef UNIV_SYNC_DEBUG
ut_ad(!table->mutexes || mutex_own(hash_get_mutex(table, fold)));
#endif /* UNIV_SYNC_DEBUG */
node = ha_search_with_data(table, fold, data);
ut_a(node);
ha_delete_hash_node(table, node);
}
/*************************************************************
Looks for an element when we know the pointer to the data, and updates
the pointer to data, if found. */
void
ha_search_and_update_if_found(
/*==========================*/
hash_table_t* table, /* in: hash table */
ulint fold, /* in: folded value of the searched data */
void* data, /* in: pointer to the data */
void* new_data)/* in: new pointer to the data */
{
ha_node_t* node;
#ifdef UNIV_SYNC_DEBUG
ut_ad(!table->mutexes || mutex_own(hash_get_mutex(table, fold)));
#endif /* UNIV_SYNC_DEBUG */
node = ha_search_with_data(table, fold, data);
if (node) {
if (table->adaptive) {
ut_a(buf_block_align(node->data)->n_pointers > 0);
buf_block_align(node->data)->n_pointers--;
buf_block_align(new_data)->n_pointers++;
}
node->data = new_data;
}
}
/*********************************************************************
Removes from the chain determined by fold all nodes whose data pointer
points to the page given. */
void
ha_remove_all_nodes_to_page(
/*========================*/
hash_table_t* table, /* in: hash table */
ulint fold, /* in: fold value */
page_t* page) /* in: buffer page */
{
ha_node_t* node;
#ifdef UNIV_SYNC_DEBUG
ut_ad(!table->mutexes || mutex_own(hash_get_mutex(table, fold)));
#endif /* UNIV_SYNC_DEBUG */
node = ha_chain_get_first(table, fold);
while (node) {
if (buf_frame_align(ha_node_get_data(node)) == page) {
/* Remove the hash node */
ha_delete_hash_node(table, node);
/* Start again from the first node in the chain
because the deletion may compact the heap of
nodes and move other nodes! */
node = ha_chain_get_first(table, fold);
} else {
node = ha_chain_get_next(node);
}
}
#ifdef UNIV_DEBUG
/* Check that all nodes really got deleted */
node = ha_chain_get_first(table, fold);
while (node) {
ut_a(buf_frame_align(ha_node_get_data(node)) != page);
node = ha_chain_get_next(node);
}
#endif
}
/*****************************************************************
Validates a hash table. */
ibool
ha_validate(
/*========*/
/* out: TRUE if ok */
hash_table_t* table) /* in: hash table */
{
hash_cell_t* cell;
ha_node_t* node;
ibool ok = TRUE;
ulint i;
for (i = 0; i < hash_get_n_cells(table); i++) {
cell = hash_get_nth_cell(table, i);
node = cell->node;
while (node) {
if (hash_calc_hash(node->fold, table) != i) {
ut_print_timestamp(stderr);
fprintf(stderr,
"InnoDB: Error: hash table node fold value %lu does not\n"
"InnoDB: match with the cell number %lu.\n",
(ulong) node->fold, (ulong) i);
ok = FALSE;
}
node = node->next;
}
}
return(ok);
}
/*****************************************************************
Prints info of a hash table. */
void
ha_print_info(
/*==========*/
FILE* file, /* in: file where to print */
hash_table_t* table) /* in: hash table */
{
hash_cell_t* cell;
ulint cells = 0;
ulint n_bufs;
ulint i;
for (i = 0; i < hash_get_n_cells(table); i++) {
cell = hash_get_nth_cell(table, i);
if (cell->node) {
cells++;
}
}
fprintf(file,
"Hash table size %lu, used cells %lu",
(ulong) hash_get_n_cells(table), (ulong) cells);
if (table->heaps == NULL && table->heap != NULL) {
/* This calculation is intended for the adaptive hash
index: how many buffer frames we have reserved? */
n_bufs = UT_LIST_GET_LEN(table->heap->base) - 1;
if (table->heap->free_block) {
n_bufs++;
}
fprintf(file, ", node heap has %lu buffer(s)\n", (ulong) n_bufs);
}
}

153
ha/hash0hash.c Normal file
View file

@ -0,0 +1,153 @@
/******************************************************
The simple hash table utility
(c) 1997 Innobase Oy
Created 5/20/1997 Heikki Tuuri
*******************************************************/
#include "hash0hash.h"
#ifdef UNIV_NONINL
#include "hash0hash.ic"
#endif
#include "mem0mem.h"
/****************************************************************
Reserves the mutex for a fold value in a hash table. */
void
hash_mutex_enter(
/*=============*/
hash_table_t* table, /* in: hash table */
ulint fold) /* in: fold */
{
mutex_enter(hash_get_mutex(table, fold));
}
/****************************************************************
Releases the mutex for a fold value in a hash table. */
void
hash_mutex_exit(
/*============*/
hash_table_t* table, /* in: hash table */
ulint fold) /* in: fold */
{
mutex_exit(hash_get_mutex(table, fold));
}
/****************************************************************
Reserves all the mutexes of a hash table, in an ascending order. */
void
hash_mutex_enter_all(
/*=================*/
hash_table_t* table) /* in: hash table */
{
ulint i;
for (i = 0; i < table->n_mutexes; i++) {
mutex_enter(table->mutexes + i);
}
}
/****************************************************************
Releases all the mutexes of a hash table. */
void
hash_mutex_exit_all(
/*================*/
hash_table_t* table) /* in: hash table */
{
ulint i;
for (i = 0; i < table->n_mutexes; i++) {
mutex_exit(table->mutexes + i);
}
}
/*****************************************************************
Creates a hash table with >= n array cells. The actual number of cells is
chosen to be a prime number slightly bigger than n. */
hash_table_t*
hash_create(
/*========*/
/* out, own: created table */
ulint n) /* in: number of array cells */
{
hash_cell_t* array;
ulint prime;
hash_table_t* table;
ulint i;
hash_cell_t* cell;
prime = ut_find_prime(n);
table = mem_alloc(sizeof(hash_table_t));
array = ut_malloc(sizeof(hash_cell_t) * prime);
table->adaptive = FALSE;
table->array = array;
table->n_cells = prime;
table->n_mutexes = 0;
table->mutexes = NULL;
table->heaps = NULL;
table->heap = NULL;
table->magic_n = HASH_TABLE_MAGIC_N;
/* Initialize the cell array */
for (i = 0; i < prime; i++) {
cell = hash_get_nth_cell(table, i);
cell->node = NULL;
}
return(table);
}
/*****************************************************************
Frees a hash table. */
void
hash_table_free(
/*============*/
hash_table_t* table) /* in, own: hash table */
{
ut_a(table->mutexes == NULL);
ut_free(table->array);
mem_free(table);
}
/*****************************************************************
Creates a mutex array to protect a hash table. */
void
hash_create_mutexes(
/*================*/
hash_table_t* table, /* in: hash table */
ulint n_mutexes, /* in: number of mutexes, must be a
power of 2 */
ulint sync_level) /* in: latching order level of the
mutexes: used in the debug version */
{
ulint i;
ut_a(n_mutexes == ut_2_power_up(n_mutexes));
table->mutexes = mem_alloc(n_mutexes * sizeof(mutex_t));
for (i = 0; i < n_mutexes; i++) {
mutex_create(table->mutexes + i);
mutex_set_level(table->mutexes + i, sync_level);
}
table->n_mutexes = n_mutexes;
}

7451
handler/ha_innodb.cc Normal file

File diff suppressed because it is too large Load diff

349
handler/ha_innodb.h Normal file
View file

@ -0,0 +1,349 @@
/* Copyright (C) 2000-2005 MySQL AB && Innobase Oy
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
/*
This file is based on ha_berkeley.h of MySQL distribution
This file defines the Innodb handler: the interface between MySQL and
Innodb
*/
#ifdef USE_PRAGMA_INTERFACE
#pragma interface /* gcc class implementation */
#endif
typedef struct st_innobase_share {
THR_LOCK lock;
pthread_mutex_t mutex;
char *table_name;
uint table_name_length,use_count;
} INNOBASE_SHARE;
my_bool innobase_query_caching_of_table_permitted(THD* thd, char* full_name,
uint full_name_len,
ulonglong *unused);
/* The class defining a handle to an Innodb table */
class ha_innobase: public handler
{
void* innobase_prebuilt;/* (row_prebuilt_t*) prebuilt
struct in InnoDB, used to save
CPU time with prebuilt data
structures*/
THD* user_thd; /* the thread handle of the user
currently using the handle; this is
set in external_lock function */
query_id_t last_query_id; /* the latest query id where the
handle was used */
THR_LOCK_DATA lock;
INNOBASE_SHARE *share;
gptr alloc_ptr;
byte* upd_buff; /* buffer used in updates */
byte* key_val_buff; /* buffer used in converting
search key values from MySQL format
to Innodb format */
ulong upd_and_key_val_buff_len;
/* the length of each of the previous
two buffers */
ulong int_table_flags;
uint primary_key;
uint last_dup_key;
ulong start_of_scan; /* this is set to 1 when we are
starting a table scan but have not
yet fetched any row, else 0 */
uint last_match_mode;/* match mode of the latest search:
ROW_SEL_EXACT, ROW_SEL_EXACT_PREFIX,
or undefined */
uint num_write_row; /* number of write_row() calls */
ulong max_supported_row_length(const byte *buf);
uint store_key_val_for_row(uint keynr, char* buff, uint buff_len,
const byte* record);
int update_thd(THD* thd);
int change_active_index(uint keynr);
int general_fetch(byte* buf, uint direction, uint match_mode);
int innobase_read_and_init_auto_inc(longlong* ret);
/* Init values for the class: */
public:
ha_innobase(TABLE *table_arg);
~ha_innobase() {}
/*
Get the row type from the storage engine. If this method returns
ROW_TYPE_NOT_USED, the information in HA_CREATE_INFO should be used.
*/
enum row_type get_row_type() const;
const char* table_type() const { return("InnoDB");}
const char *index_type(uint key_number) { return "BTREE"; }
const char** bas_ext() const;
ulong table_flags() const { return int_table_flags; }
ulong index_flags(uint idx, uint part, bool all_parts) const
{
return (HA_READ_NEXT |
HA_READ_PREV |
HA_READ_ORDER |
HA_READ_RANGE |
HA_KEYREAD_ONLY);
}
uint max_supported_keys() const { return MAX_KEY; }
/* An InnoDB page must store >= 2 keys;
a secondary key record must also contain the
primary key value:
max key length is therefore set to slightly
less than 1 / 4 of page size which is 16 kB;
but currently MySQL does not work with keys
whose size is > MAX_KEY_LENGTH */
uint max_supported_key_length() const { return 3500; }
uint max_supported_key_part_length() const { return 3500; }
const key_map *keys_to_use_for_scanning() { return &key_map_full; }
bool has_transactions() { return 1;}
int open(const char *name, int mode, uint test_if_locked);
int close(void);
double scan_time();
double read_time(uint index, uint ranges, ha_rows rows);
int write_row(byte * buf);
int update_row(const byte * old_data, byte * new_data);
int delete_row(const byte * buf);
void unlock_row();
int index_init(uint index);
int index_end();
int index_read(byte * buf, const byte * key,
uint key_len, enum ha_rkey_function find_flag);
int index_read_idx(byte * buf, uint index, const byte * key,
uint key_len, enum ha_rkey_function find_flag);
int index_read_last(byte * buf, const byte * key, uint key_len);
int index_next(byte * buf);
int index_next_same(byte * buf, const byte *key, uint keylen);
int index_prev(byte * buf);
int index_first(byte * buf);
int index_last(byte * buf);
int rnd_init(bool scan);
int rnd_end();
int rnd_next(byte *buf);
int rnd_pos(byte * buf, byte *pos);
void position(const byte *record);
void info(uint);
int analyze(THD* thd,HA_CHECK_OPT* check_opt);
int optimize(THD* thd,HA_CHECK_OPT* check_opt);
int discard_or_import_tablespace(my_bool discard);
int extra(enum ha_extra_function operation);
int external_lock(THD *thd, int lock_type);
int transactional_table_lock(THD *thd, int lock_type);
int start_stmt(THD *thd, thr_lock_type lock_type);
void position(byte *record);
ha_rows records_in_range(uint inx, key_range *min_key, key_range
*max_key);
ha_rows estimate_rows_upper_bound();
int create(const char *name, register TABLE *form,
HA_CREATE_INFO *create_info);
int delete_all_rows();
int delete_table(const char *name);
int rename_table(const char* from, const char* to);
int check(THD* thd, HA_CHECK_OPT* check_opt);
char* update_table_comment(const char* comment);
char* get_foreign_key_create_info();
int get_foreign_key_list(THD *thd, List<FOREIGN_KEY_INFO> *f_key_list);
bool can_switch_engines();
uint referenced_by_foreign_key();
void free_foreign_key_create_info(char* str);
THR_LOCK_DATA **store_lock(THD *thd, THR_LOCK_DATA **to,
enum thr_lock_type lock_type);
void init_table_handle_for_HANDLER();
ulonglong get_auto_increment();
int reset_auto_increment(ulonglong value);
virtual bool get_error_message(int error, String *buf);
uint8 table_cache_type() { return HA_CACHE_TBL_ASKTRANSACT; }
/*
ask handler about permission to cache table during query registration
*/
my_bool register_query_cache_table(THD *thd, char *table_key,
uint key_length,
qc_engine_callback *call_back,
ulonglong *engine_data)
{
*call_back= innobase_query_caching_of_table_permitted;
*engine_data= 0;
return innobase_query_caching_of_table_permitted(thd, table_key,
key_length,
engine_data);
}
static char *get_mysql_bin_log_name();
static ulonglong get_mysql_bin_log_pos();
bool primary_key_is_clustered() { return true; }
int cmp_ref(const byte *ref1, const byte *ref2);
};
extern struct show_var_st innodb_status_variables[];
extern uint innobase_init_flags, innobase_lock_type;
extern uint innobase_flush_log_at_trx_commit;
extern ulong innobase_cache_size, innobase_fast_shutdown;
extern ulong innobase_large_page_size;
extern char *innobase_home, *innobase_tmpdir, *innobase_logdir;
extern long innobase_lock_scan_time;
extern long innobase_mirrored_log_groups, innobase_log_files_in_group;
extern long innobase_log_file_size, innobase_log_buffer_size;
extern long innobase_buffer_pool_size, innobase_additional_mem_pool_size;
extern long innobase_buffer_pool_awe_mem_mb;
extern long innobase_file_io_threads, innobase_lock_wait_timeout;
extern long innobase_force_recovery;
extern long innobase_open_files;
extern char *innobase_data_home_dir, *innobase_data_file_path;
extern char *innobase_log_group_home_dir, *innobase_log_arch_dir;
extern char *innobase_unix_file_flush_method;
/* The following variables have to be my_bool for SHOW VARIABLES to work */
extern my_bool innobase_log_archive,
innobase_use_doublewrite,
innobase_use_checksums,
innobase_use_large_pages,
innobase_use_native_aio,
innobase_file_per_table, innobase_locks_unsafe_for_binlog,
innobase_create_status_file;
extern my_bool innobase_very_fast_shutdown; /* set this to 1 just before
calling innobase_end() if you want
InnoDB to shut down without
flushing the buffer pool: this
is equivalent to a 'crash' */
extern "C" {
extern ulong srv_max_buf_pool_modified_pct;
extern ulong srv_max_purge_lag;
extern ulong srv_auto_extend_increment;
extern ulong srv_n_spin_wait_rounds;
extern ulong srv_n_free_tickets_to_enter;
extern ulong srv_thread_sleep_delay;
extern ulong srv_thread_concurrency;
extern ulong srv_commit_concurrency;
}
extern TYPELIB innobase_lock_typelib;
bool innobase_init(void);
bool innobase_end(void);
bool innobase_flush_logs(void);
uint innobase_get_free_space(void);
/*
don't delete it - it may be re-enabled later
as an optimization for the most common case InnoDB+binlog
*/
#if 0
int innobase_report_binlog_offset_and_commit(
THD* thd,
void* trx_handle,
char* log_file_name,
my_off_t end_offset);
int innobase_commit_complete(void* trx_handle);
void innobase_store_binlog_offset_and_flush_log(char *binlog_name,longlong offset);
#endif
int innobase_drop_database(char *path);
bool innodb_show_status(THD* thd);
bool innodb_mutex_show_status(THD* thd);
void innodb_export_status(void);
void innobase_release_temporary_latches(THD *thd);
void innobase_store_binlog_offset_and_flush_log(char *binlog_name,longlong offset);
int innobase_start_trx_and_assign_read_view(THD* thd);
/***********************************************************************
This function is used to prepare X/Open XA distributed transaction */
int innobase_xa_prepare(
/*====================*/
/* out: 0 or error number */
THD* thd, /* in: handle to the MySQL thread of the user
whose XA transaction should be prepared */
bool all); /* in: TRUE - commit transaction
FALSE - the current SQL statement ended */
/***********************************************************************
This function is used to recover X/Open XA distributed transactions */
int innobase_xa_recover(
/*====================*/
/* out: number of prepared transactions
stored in xid_list */
XID* xid_list, /* in/out: prepared transactions */
uint len); /* in: number of slots in xid_list */
/***********************************************************************
This function is used to commit one X/Open XA distributed transaction
which is in the prepared state */
int innobase_commit_by_xid(
/*=======================*/
/* out: 0 or error number */
XID* xid); /* in : X/Open XA Transaction Identification */
/***********************************************************************
This function is used to rollback one X/Open XA distributed transaction
which is in the prepared state */
int innobase_rollback_by_xid(
/* out: 0 or error number */
XID *xid); /* in : X/Open XA Transaction Identification */
int innobase_xa_end(THD *thd);
int innobase_repl_report_sent_binlog(THD *thd, char *log_file_name,
my_off_t end_offset);
/***********************************************************************
Create a consistent view for a cursor based on current transaction
which is created if the corresponding MySQL thread still lacks one.
This consistent view is then used inside of MySQL when accessing records
using a cursor. */
void*
innobase_create_cursor_view(void);
/*=============================*/
/* out: Pointer to cursor view or NULL */
/***********************************************************************
Close the given consistent cursor view of a transaction and restore
global read view to a transaction read view. Transaction is created if the
corresponding MySQL thread still lacks one. */
void
innobase_close_cursor_view(
/*=======================*/
void* curview); /* in: Consistent read view to be closed */
/***********************************************************************
Set the given consistent cursor view to a transaction which is created
if the corresponding MySQL thread still lacks one. If the given
consistent cursor view is NULL global read view of a transaction is
restored to a transaction read view. */
void
innobase_set_cursor_view(
/*=====================*/
void* curview); /* in: Consistent read view to be set */

27
ibuf/Makefile.am Normal file
View file

@ -0,0 +1,27 @@
# Copyright (C) 2000 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
# & Innobase Oy
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
include ../include/Makefile.i
noinst_LIBRARIES = libibuf.a
libibuf_a_SOURCES = ibuf0ibuf.c
EXTRA_PROGRAMS =
# Don't update the files from bitkeeper
%::SCCS/s.%

3541
ibuf/ibuf0ibuf.c Normal file

File diff suppressed because it is too large Load diff

60
include/Makefile.am Normal file
View file

@ -0,0 +1,60 @@
# Copyright (C) 2000 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
# & Innobase Oy
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
noinst_HEADERS = btr0btr.h btr0btr.ic btr0cur.h btr0cur.ic \
btr0pcur.h btr0pcur.ic btr0sea.h btr0sea.ic btr0types.h \
buf0buf.h buf0buf.ic buf0flu.h buf0flu.ic buf0lru.h \
buf0lru.ic buf0rea.h buf0types.h data0data.h data0data.ic data0type.h \
data0type.ic data0types.h db0err.h dict0boot.h \
dict0boot.ic dict0crea.h dict0crea.ic dict0dict.h \
dict0dict.ic dict0load.h dict0load.ic dict0mem.h \
dict0mem.ic dict0types.h dyn0dyn.h dyn0dyn.ic eval0eval.h \
eval0eval.ic eval0proc.h eval0proc.ic fil0fil.h fsp0fsp.h \
fsp0fsp.ic fut0fut.h fut0fut.ic fut0lst.h fut0lst.ic \
ha0ha.h ha0ha.ic hash0hash.h hash0hash.ic \
ibuf0ibuf.h ibuf0ibuf.ic ibuf0types.h lock0lock.h \
lock0lock.ic lock0types.h log0log.h log0log.ic log0recv.h \
log0recv.ic mach0data.h mach0data.ic makefilewin.i \
mem0dbg.h mem0dbg.ic mem0mem.h mem0mem.ic mem0pool.h \
mem0pool.ic mtr0log.h mtr0log.ic mtr0mtr.h mtr0mtr.ic \
mtr0types.h os0file.h os0proc.h os0proc.ic \
os0sync.h os0sync.ic os0thread.h \
os0thread.ic page0cur.h page0cur.ic page0page.h \
page0page.ic page0types.h pars0grm.h pars0opt.h \
pars0opt.ic pars0pars.h pars0pars.ic pars0sym.h \
pars0sym.ic pars0types.h que0que.h que0que.ic que0types.h \
read0read.h read0read.ic read0types.h rem0cmp.h \
rem0cmp.ic rem0rec.h rem0rec.ic rem0types.h row0ins.h \
row0ins.ic row0mysql.h row0mysql.ic row0purge.h \
row0purge.ic row0row.h row0row.ic row0sel.h row0sel.ic \
row0types.h row0uins.h row0uins.ic row0umod.h row0umod.ic \
row0undo.h row0undo.ic row0upd.h row0upd.ic row0vers.h \
row0vers.ic srv0que.h srv0srv.h srv0srv.ic srv0start.h \
sync0arr.h sync0arr.ic sync0rw.h \
sync0rw.ic sync0sync.h sync0sync.ic sync0types.h \
thr0loc.h thr0loc.ic trx0purge.h trx0purge.ic trx0rec.h \
trx0rec.ic trx0roll.h trx0roll.ic trx0rseg.h trx0rseg.ic \
trx0sys.h trx0sys.ic trx0trx.h trx0trx.ic trx0types.h \
trx0undo.h trx0undo.ic trx0xa.h univ.i \
usr0sess.h usr0sess.ic usr0types.h ut0byte.h ut0byte.ic \
ut0dbg.h ut0lst.h ut0mem.h ut0mem.ic ut0rnd.h ut0rnd.ic \
ut0sort.h ut0ut.h ut0ut.ic
EXTRA_DIST = Makefile.i
# Don't update the files from bitkeeper
%::SCCS/s.%

6
include/Makefile.i Normal file
View file

@ -0,0 +1,6 @@
# Makefile included in Makefile.am in every subdirectory
INCLUDES = -I$(srcdir)/../include -I$(srcdir)/../../include -I../../include
# Don't update the files from bitkeeper
%::SCCS/s.%

439
include/btr0btr.h Normal file
View file

@ -0,0 +1,439 @@
/******************************************************
The B-tree
(c) 1994-1996 Innobase Oy
Created 6/2/1994 Heikki Tuuri
*******************************************************/
#ifndef btr0btr_h
#define btr0btr_h
#include "univ.i"
#include "dict0dict.h"
#include "data0data.h"
#include "page0cur.h"
#include "rem0rec.h"
#include "mtr0mtr.h"
#include "btr0types.h"
/* Maximum record size which can be stored on a page, without using the
special big record storage structure */
#define BTR_PAGE_MAX_REC_SIZE (UNIV_PAGE_SIZE / 2 - 200)
/* Latching modes for the search function (in btr0cur.*) */
#define BTR_SEARCH_LEAF RW_S_LATCH
#define BTR_MODIFY_LEAF RW_X_LATCH
#define BTR_NO_LATCHES RW_NO_LATCH
#define BTR_MODIFY_TREE 33
#define BTR_CONT_MODIFY_TREE 34
#define BTR_SEARCH_PREV 35
#define BTR_MODIFY_PREV 36
/* If this is ORed to the latch mode, it means that the search tuple will be
inserted to the index, at the searched position */
#define BTR_INSERT 512
/* This flag ORed to latch mode says that we do the search in query
optimization */
#define BTR_ESTIMATE 1024
/* This flag ORed to latch mode says that we can ignore possible
UNIQUE definition on secondary indexes when we decide if we can use the
insert buffer to speed up inserts */
#define BTR_IGNORE_SEC_UNIQUE 2048
/******************************************************************
Gets the root node of a tree and x-latches it. */
page_t*
btr_root_get(
/*=========*/
/* out: root page, x-latched */
dict_tree_t* tree, /* in: index tree */
mtr_t* mtr); /* in: mtr */
/******************************************************************
Gets a buffer page and declares its latching order level. */
UNIV_INLINE
page_t*
btr_page_get(
/*=========*/
ulint space, /* in: space id */
ulint page_no, /* in: page number */
ulint mode, /* in: latch mode */
mtr_t* mtr); /* in: mtr */
/******************************************************************
Gets the index id field of a page. */
UNIV_INLINE
dulint
btr_page_get_index_id(
/*==================*/
/* out: index id */
page_t* page); /* in: index page */
/************************************************************
Gets the node level field in an index page. */
UNIV_INLINE
ulint
btr_page_get_level_low(
/*===================*/
/* out: level, leaf level == 0 */
page_t* page); /* in: index page */
/************************************************************
Gets the node level field in an index page. */
UNIV_INLINE
ulint
btr_page_get_level(
/*===============*/
/* out: level, leaf level == 0 */
page_t* page, /* in: index page */
mtr_t* mtr); /* in: mini-transaction handle */
/************************************************************
Gets the next index page number. */
UNIV_INLINE
ulint
btr_page_get_next(
/*==============*/
/* out: next page number */
page_t* page, /* in: index page */
mtr_t* mtr); /* in: mini-transaction handle */
/************************************************************
Gets the previous index page number. */
UNIV_INLINE
ulint
btr_page_get_prev(
/*==============*/
/* out: prev page number */
page_t* page, /* in: index page */
mtr_t* mtr); /* in: mini-transaction handle */
/*****************************************************************
Gets pointer to the previous user record in the tree. It is assumed
that the caller has appropriate latches on the page and its neighbor. */
rec_t*
btr_get_prev_user_rec(
/*==================*/
/* out: previous user record, NULL if there is none */
rec_t* rec, /* in: record on leaf level */
mtr_t* mtr); /* in: mtr holding a latch on the page, and if
needed, also to the previous page */
/*****************************************************************
Gets pointer to the next user record in the tree. It is assumed
that the caller has appropriate latches on the page and its neighbor. */
rec_t*
btr_get_next_user_rec(
/*==================*/
/* out: next user record, NULL if there is none */
rec_t* rec, /* in: record on leaf level */
mtr_t* mtr); /* in: mtr holding a latch on the page, and if
needed, also to the next page */
/******************************************************************
Releases the latch on a leaf page and bufferunfixes it. */
UNIV_INLINE
void
btr_leaf_page_release(
/*==================*/
page_t* page, /* in: page */
ulint latch_mode, /* in: BTR_SEARCH_LEAF or BTR_MODIFY_LEAF */
mtr_t* mtr); /* in: mtr */
/******************************************************************
Gets the child node file address in a node pointer. */
UNIV_INLINE
ulint
btr_node_ptr_get_child_page_no(
/*===========================*/
/* out: child node address */
rec_t* rec, /* in: node pointer record */
const ulint* offsets);/* in: array returned by rec_get_offsets() */
/****************************************************************
Creates the root node for a new index tree. */
ulint
btr_create(
/*=======*/
/* out: page number of the created root, FIL_NULL if
did not succeed */
ulint type, /* in: type of the index */
ulint space, /* in: space where created */
dulint index_id,/* in: index id */
ulint comp, /* in: nonzero=compact page format */
mtr_t* mtr); /* in: mini-transaction handle */
/****************************************************************
Frees a B-tree except the root page, which MUST be freed after this
by calling btr_free_root. */
void
btr_free_but_not_root(
/*==================*/
ulint space, /* in: space where created */
ulint root_page_no); /* in: root page number */
/****************************************************************
Frees the B-tree root page. Other tree MUST already have been freed. */
void
btr_free_root(
/*==========*/
ulint space, /* in: space where created */
ulint root_page_no, /* in: root page number */
mtr_t* mtr); /* in: a mini-transaction which has already
been started */
/*****************************************************************
Makes tree one level higher by splitting the root, and inserts
the tuple. It is assumed that mtr contains an x-latch on the tree.
NOTE that the operation of this function must always succeed,
we cannot reverse it: therefore enough free disk space must be
guaranteed to be available before this function is called. */
rec_t*
btr_root_raise_and_insert(
/*======================*/
/* out: inserted record */
btr_cur_t* cursor, /* in: cursor at which to insert: must be
on the root page; when the function returns,
the cursor is positioned on the predecessor
of the inserted record */
dtuple_t* tuple, /* in: tuple to insert */
mtr_t* mtr); /* in: mtr */
/*****************************************************************
Reorganizes an index page. */
void
btr_page_reorganize(
/*================*/
page_t* page, /* in: page to be reorganized */
dict_index_t* index, /* in: record descriptor */
mtr_t* mtr); /* in: mtr */
/*****************************************************************
Decides if the page should be split at the convergence point of
inserts converging to left. */
ibool
btr_page_get_split_rec_to_left(
/*===========================*/
/* out: TRUE if split recommended */
btr_cur_t* cursor, /* in: cursor at which to insert */
rec_t** split_rec);/* out: if split recommended,
the first record on upper half page,
or NULL if tuple should be first */
/*****************************************************************
Decides if the page should be split at the convergence point of
inserts converging to right. */
ibool
btr_page_get_split_rec_to_right(
/*============================*/
/* out: TRUE if split recommended */
btr_cur_t* cursor, /* in: cursor at which to insert */
rec_t** split_rec);/* out: if split recommended,
the first record on upper half page,
or NULL if tuple should be first */
/*****************************************************************
Splits an index page to halves and inserts the tuple. It is assumed
that mtr holds an x-latch to the index tree. NOTE: the tree x-latch
is released within this function! NOTE that the operation of this
function must always succeed, we cannot reverse it: therefore
enough free disk space must be guaranteed to be available before
this function is called. */
rec_t*
btr_page_split_and_insert(
/*======================*/
/* out: inserted record; NOTE: the tree
x-latch is released! NOTE: 2 free disk
pages must be available! */
btr_cur_t* cursor, /* in: cursor at which to insert; when the
function returns, the cursor is positioned
on the predecessor of the inserted record */
dtuple_t* tuple, /* in: tuple to insert */
mtr_t* mtr); /* in: mtr */
/***********************************************************
Inserts a data tuple to a tree on a non-leaf level. It is assumed
that mtr holds an x-latch on the tree. */
void
btr_insert_on_non_leaf_level(
/*=========================*/
dict_tree_t* tree, /* in: tree */
ulint level, /* in: level, must be > 0 */
dtuple_t* tuple, /* in: the record to be inserted */
mtr_t* mtr); /* in: mtr */
/********************************************************************
Sets a record as the predefined minimum record. */
void
btr_set_min_rec_mark(
/*=================*/
rec_t* rec, /* in: record */
ulint comp, /* in: nonzero=compact page format */
mtr_t* mtr); /* in: mtr */
/*****************************************************************
Deletes on the upper level the node pointer to a page. */
void
btr_node_ptr_delete(
/*================*/
dict_tree_t* tree, /* in: index tree */
page_t* page, /* in: page whose node pointer is deleted */
mtr_t* mtr); /* in: mtr */
/****************************************************************
Checks that the node pointer to a page is appropriate. */
ibool
btr_check_node_ptr(
/*===============*/
/* out: TRUE */
dict_tree_t* tree, /* in: index tree */
page_t* page, /* in: index page */
mtr_t* mtr); /* in: mtr */
/*****************************************************************
Tries to merge the page first to the left immediate brother if such a
brother exists, and the node pointers to the current page and to the
brother reside on the same page. If the left brother does not satisfy these
conditions, looks at the right brother. If the page is the only one on that
level lifts the records of the page to the father page, thus reducing the
tree height. It is assumed that mtr holds an x-latch on the tree and on the
page. If cursor is on the leaf level, mtr must also hold x-latches to
the brothers, if they exist. NOTE: it is assumed that the caller has reserved
enough free extents so that the compression will always succeed if done! */
void
btr_compress(
/*=========*/
btr_cur_t* cursor, /* in: cursor on the page to merge or lift;
the page must not be empty: in record delete
use btr_discard_page if the page would become
empty */
mtr_t* mtr); /* in: mtr */
/*****************************************************************
Discards a page from a B-tree. This is used to remove the last record from
a B-tree page: the whole page must be removed at the same time. This cannot
be used for the root page, which is allowed to be empty. */
void
btr_discard_page(
/*=============*/
btr_cur_t* cursor, /* in: cursor on the page to discard: not on
the root page */
mtr_t* mtr); /* in: mtr */
/********************************************************************
Parses the redo log record for setting an index record as the predefined
minimum record. */
byte*
btr_parse_set_min_rec_mark(
/*=======================*/
/* out: end of log record or NULL */
byte* ptr, /* in: buffer */
byte* end_ptr,/* in: buffer end */
ulint comp, /* in: nonzero=compact page format */
page_t* page, /* in: page or NULL */
mtr_t* mtr); /* in: mtr or NULL */
/***************************************************************
Parses a redo log record of reorganizing a page. */
byte*
btr_parse_page_reorganize(
/*======================*/
/* out: end of log record or NULL */
byte* ptr, /* in: buffer */
byte* end_ptr,/* in: buffer end */
dict_index_t* index, /* in: record descriptor */
page_t* page, /* in: page or NULL */
mtr_t* mtr); /* in: mtr or NULL */
/******************************************************************
Gets the number of pages in a B-tree. */
ulint
btr_get_size(
/*=========*/
/* out: number of pages */
dict_index_t* index, /* in: index */
ulint flag); /* in: BTR_N_LEAF_PAGES or BTR_TOTAL_SIZE */
/******************************************************************
Allocates a new file page to be used in an index tree. NOTE: we assume
that the caller has made the reservation for free extents! */
page_t*
btr_page_alloc(
/*===========*/
/* out: new allocated page, x-latched;
NULL if out of space */
dict_tree_t* tree, /* in: index tree */
ulint hint_page_no, /* in: hint of a good page */
byte file_direction, /* in: direction where a possible
page split is made */
ulint level, /* in: level where the page is placed
in the tree */
mtr_t* mtr); /* in: mtr */
/******************************************************************
Frees a file page used in an index tree. NOTE: cannot free field external
storage pages because the page must contain info on its level. */
void
btr_page_free(
/*==========*/
dict_tree_t* tree, /* in: index tree */
page_t* page, /* in: page to be freed, x-latched */
mtr_t* mtr); /* in: mtr */
/******************************************************************
Frees a file page used in an index tree. Can be used also to BLOB
external storage pages, because the page level 0 can be given as an
argument. */
void
btr_page_free_low(
/*==============*/
dict_tree_t* tree, /* in: index tree */
page_t* page, /* in: page to be freed, x-latched */
ulint level, /* in: page level */
mtr_t* mtr); /* in: mtr */
#ifdef UNIV_BTR_PRINT
/*****************************************************************
Prints size info of a B-tree. */
void
btr_print_size(
/*===========*/
dict_tree_t* tree); /* in: index tree */
/******************************************************************
Prints directories and other info of all nodes in the tree. */
void
btr_print_tree(
/*===========*/
dict_tree_t* tree, /* in: tree */
ulint width); /* in: print this many entries from start
and end */
#endif /* UNIV_BTR_PRINT */
/****************************************************************
Checks the size and number of fields in a record based on the definition of
the index. */
ibool
btr_index_rec_validate(
/*====================*/
/* out: TRUE if ok */
rec_t* rec, /* in: index record */
dict_index_t* index, /* in: index */
ibool dump_on_error); /* in: TRUE if the function
should print hex dump of record
and page on error */
/******************************************************************
Checks the consistency of an index tree. */
ibool
btr_validate_tree(
/*==============*/
/* out: TRUE if ok */
dict_tree_t* tree, /* in: tree */
trx_t* trx); /* in: transaction or NULL */
#define BTR_N_LEAF_PAGES 1
#define BTR_TOTAL_SIZE 2
#ifndef UNIV_NONINL
#include "btr0btr.ic"
#endif
#endif

233
include/btr0btr.ic Normal file
View file

@ -0,0 +1,233 @@
/******************************************************
The B-tree
(c) 1994-1996 Innobase Oy
Created 6/2/1994 Heikki Tuuri
*******************************************************/
#include "mach0data.h"
#include "mtr0mtr.h"
#include "mtr0log.h"
#define BTR_MAX_NODE_LEVEL 50 /* used in debug checking */
/******************************************************************
Gets a buffer page and declares its latching order level. */
UNIV_INLINE
page_t*
btr_page_get(
/*=========*/
ulint space, /* in: space id */
ulint page_no, /* in: page number */
ulint mode, /* in: latch mode */
mtr_t* mtr) /* in: mtr */
{
page_t* page;
page = buf_page_get(space, page_no, mode, mtr);
#ifdef UNIV_SYNC_DEBUG
if (mode != RW_NO_LATCH) {
buf_page_dbg_add_level(page, SYNC_TREE_NODE);
}
#endif
return(page);
}
/******************************************************************
Sets the index id field of a page. */
UNIV_INLINE
void
btr_page_set_index_id(
/*==================*/
page_t* page, /* in: page to be created */
dulint id, /* in: index id */
mtr_t* mtr) /* in: mtr */
{
mlog_write_dulint(page + PAGE_HEADER + PAGE_INDEX_ID, id, mtr);
}
/******************************************************************
Gets the index id field of a page. */
UNIV_INLINE
dulint
btr_page_get_index_id(
/*==================*/
/* out: index id */
page_t* page) /* in: index page */
{
return(mach_read_from_8(page + PAGE_HEADER + PAGE_INDEX_ID));
}
/************************************************************
Gets the node level field in an index page. */
UNIV_INLINE
ulint
btr_page_get_level_low(
/*===================*/
/* out: level, leaf level == 0 */
page_t* page) /* in: index page */
{
ulint level;
ut_ad(page);
level = mach_read_from_2(page + PAGE_HEADER + PAGE_LEVEL);
ut_ad(level <= BTR_MAX_NODE_LEVEL);
return(level);
}
/************************************************************
Gets the node level field in an index page. */
UNIV_INLINE
ulint
btr_page_get_level(
/*===============*/
/* out: level, leaf level == 0 */
page_t* page, /* in: index page */
mtr_t* mtr __attribute__((unused))) /* in: mini-transaction handle */
{
ut_ad(page && mtr);
return(btr_page_get_level_low(page));
}
/************************************************************
Sets the node level field in an index page. */
UNIV_INLINE
void
btr_page_set_level(
/*===============*/
page_t* page, /* in: index page */
ulint level, /* in: level, leaf level == 0 */
mtr_t* mtr) /* in: mini-transaction handle */
{
ut_ad(page && mtr);
ut_ad(level <= BTR_MAX_NODE_LEVEL);
mlog_write_ulint(page + PAGE_HEADER + PAGE_LEVEL, level,
MLOG_2BYTES, mtr);
}
/************************************************************
Gets the next index page number. */
UNIV_INLINE
ulint
btr_page_get_next(
/*==============*/
/* out: next page number */
page_t* page, /* in: index page */
mtr_t* mtr __attribute__((unused))) /* in: mini-transaction handle */
{
ut_ad(page && mtr);
ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
MTR_MEMO_PAGE_X_FIX)
|| mtr_memo_contains(mtr, buf_block_align(page),
MTR_MEMO_PAGE_S_FIX));
return(mach_read_from_4(page + FIL_PAGE_NEXT));
}
/************************************************************
Sets the next index page field. */
UNIV_INLINE
void
btr_page_set_next(
/*==============*/
page_t* page, /* in: index page */
ulint next, /* in: next page number */
mtr_t* mtr) /* in: mini-transaction handle */
{
ut_ad(page && mtr);
mlog_write_ulint(page + FIL_PAGE_NEXT, next, MLOG_4BYTES, mtr);
}
/************************************************************
Gets the previous index page number. */
UNIV_INLINE
ulint
btr_page_get_prev(
/*==============*/
/* out: prev page number */
page_t* page, /* in: index page */
mtr_t* mtr __attribute__((unused))) /* in: mini-transaction handle */
{
ut_ad(page && mtr);
return(mach_read_from_4(page + FIL_PAGE_PREV));
}
/************************************************************
Sets the previous index page field. */
UNIV_INLINE
void
btr_page_set_prev(
/*==============*/
page_t* page, /* in: index page */
ulint prev, /* in: previous page number */
mtr_t* mtr) /* in: mini-transaction handle */
{
ut_ad(page && mtr);
mlog_write_ulint(page + FIL_PAGE_PREV, prev, MLOG_4BYTES, mtr);
}
/******************************************************************
Gets the child node file address in a node pointer. */
UNIV_INLINE
ulint
btr_node_ptr_get_child_page_no(
/*===========================*/
/* out: child node address */
rec_t* rec, /* in: node pointer record */
const ulint* offsets)/* in: array returned by rec_get_offsets() */
{
byte* field;
ulint len;
ulint page_no;
ut_ad(!rec_offs_comp(offsets) || rec_get_node_ptr_flag(rec));
/* The child address is in the last field */
field = rec_get_nth_field(rec, offsets,
rec_offs_n_fields(offsets) - 1, &len);
ut_ad(len == 4);
page_no = mach_read_from_4(field);
if (UNIV_UNLIKELY(page_no == 0)) {
fprintf(stderr,
"InnoDB: a nonsensical page number 0 in a node ptr record at offset %lu\n",
(ulong) ut_align_offset(rec, UNIV_PAGE_SIZE));
buf_page_print(buf_frame_align(rec));
}
return(page_no);
}
/******************************************************************
Releases the latches on a leaf page and bufferunfixes it. */
UNIV_INLINE
void
btr_leaf_page_release(
/*==================*/
page_t* page, /* in: page */
ulint latch_mode, /* in: BTR_SEARCH_LEAF or BTR_MODIFY_LEAF */
mtr_t* mtr) /* in: mtr */
{
ut_ad(!mtr_memo_contains(mtr, buf_block_align(page),
MTR_MEMO_MODIFY));
if (latch_mode == BTR_SEARCH_LEAF) {
mtr_memo_release(mtr, buf_block_align(page),
MTR_MEMO_PAGE_S_FIX);
} else {
ut_ad(latch_mode == BTR_MODIFY_LEAF);
mtr_memo_release(mtr, buf_block_align(page),
MTR_MEMO_PAGE_X_FIX);
}
}

701
include/btr0cur.h Normal file
View file

@ -0,0 +1,701 @@
/******************************************************
The index tree cursor
(c) 1994-1996 Innobase Oy
Created 10/16/1994 Heikki Tuuri
*******************************************************/
#ifndef btr0cur_h
#define btr0cur_h
#include "univ.i"
#include "dict0dict.h"
#include "data0data.h"
#include "page0cur.h"
#include "btr0types.h"
#include "que0types.h"
#include "row0types.h"
#include "ha0ha.h"
/* Mode flags for btr_cur operations; these can be ORed */
#define BTR_NO_UNDO_LOG_FLAG 1 /* do no undo logging */
#define BTR_NO_LOCKING_FLAG 2 /* do no record lock checking */
#define BTR_KEEP_SYS_FLAG 4 /* sys fields will be found from the
update vector or inserted entry */
#define BTR_CUR_ADAPT
#define BTR_CUR_HASH_ADAPT
/*************************************************************
Returns the page cursor component of a tree cursor. */
UNIV_INLINE
page_cur_t*
btr_cur_get_page_cur(
/*=================*/
/* out: pointer to page cursor component */
btr_cur_t* cursor);/* in: tree cursor */
/*************************************************************
Returns the record pointer of a tree cursor. */
UNIV_INLINE
rec_t*
btr_cur_get_rec(
/*============*/
/* out: pointer to record */
btr_cur_t* cursor);/* in: tree cursor */
/*************************************************************
Invalidates a tree cursor by setting record pointer to NULL. */
UNIV_INLINE
void
btr_cur_invalidate(
/*===============*/
btr_cur_t* cursor);/* in: tree cursor */
/*************************************************************
Returns the page of a tree cursor. */
UNIV_INLINE
page_t*
btr_cur_get_page(
/*=============*/
/* out: pointer to page */
btr_cur_t* cursor);/* in: tree cursor */
/*************************************************************
Returns the tree of a cursor. */
UNIV_INLINE
dict_tree_t*
btr_cur_get_tree(
/*=============*/
/* out: tree */
btr_cur_t* cursor);/* in: tree cursor */
/*************************************************************
Positions a tree cursor at a given record. */
UNIV_INLINE
void
btr_cur_position(
/*=============*/
dict_index_t* index, /* in: index */
rec_t* rec, /* in: record in tree */
btr_cur_t* cursor);/* in: cursor */
/************************************************************************
Searches an index tree and positions a tree cursor on a given level.
NOTE: n_fields_cmp in tuple must be set so that it cannot be compared
to node pointer page number fields on the upper levels of the tree!
Note that if mode is PAGE_CUR_LE, which is used in inserts, then
cursor->up_match and cursor->low_match both will have sensible values.
If mode is PAGE_CUR_GE, then up_match will a have a sensible value. */
void
btr_cur_search_to_nth_level(
/*========================*/
dict_index_t* index, /* in: index */
ulint level, /* in: the tree level of search */
dtuple_t* tuple, /* in: data tuple; NOTE: n_fields_cmp in
tuple must be set so that it cannot get
compared to the node ptr page number field! */
ulint mode, /* in: PAGE_CUR_L, ...;
NOTE that if the search is made using a unique
prefix of a record, mode should be PAGE_CUR_LE,
not PAGE_CUR_GE, as the latter may end up on
the previous page of the record! Inserts
should always be made using PAGE_CUR_LE to
search the position! */
ulint latch_mode, /* in: BTR_SEARCH_LEAF, ..., ORed with
BTR_INSERT and BTR_ESTIMATE;
cursor->left_page is used to store a pointer
to the left neighbor page, in the cases
BTR_SEARCH_PREV and BTR_MODIFY_PREV;
NOTE that if has_search_latch
is != 0, we maybe do not have a latch set
on the cursor page, we assume
the caller uses his search latch
to protect the record! */
btr_cur_t* cursor, /* in/out: tree cursor; the cursor page is
s- or x-latched, but see also above! */
ulint has_search_latch,/* in: latch mode the caller
currently has on btr_search_latch:
RW_S_LATCH, or 0 */
mtr_t* mtr); /* in: mtr */
/*********************************************************************
Opens a cursor at either end of an index. */
void
btr_cur_open_at_index_side(
/*=======================*/
ibool from_left, /* in: TRUE if open to the low end,
FALSE if to the high end */
dict_index_t* index, /* in: index */
ulint latch_mode, /* in: latch mode */
btr_cur_t* cursor, /* in: cursor */
mtr_t* mtr); /* in: mtr */
/**************************************************************************
Positions a cursor at a randomly chosen position within a B-tree. */
void
btr_cur_open_at_rnd_pos(
/*====================*/
dict_index_t* index, /* in: index */
ulint latch_mode, /* in: BTR_SEARCH_LEAF, ... */
btr_cur_t* cursor, /* in/out: B-tree cursor */
mtr_t* mtr); /* in: mtr */
/*****************************************************************
Tries to perform an insert to a page in an index tree, next to cursor.
It is assumed that mtr holds an x-latch on the page. The operation does
not succeed if there is too little space on the page. If there is just
one record on the page, the insert will always succeed; this is to
prevent trying to split a page with just one record. */
ulint
btr_cur_optimistic_insert(
/*======================*/
/* out: DB_SUCCESS, DB_WAIT_LOCK,
DB_FAIL, or error number */
ulint flags, /* in: undo logging and locking flags: if not
zero, the parameters index and thr should be
specified */
btr_cur_t* cursor, /* in: cursor on page after which to insert;
cursor stays valid */
dtuple_t* entry, /* in: entry to insert */
rec_t** rec, /* out: pointer to inserted record if
succeed */
big_rec_t** big_rec,/* out: big rec vector whose fields have to
be stored externally by the caller, or
NULL */
que_thr_t* thr, /* in: query thread or NULL */
mtr_t* mtr); /* in: mtr */
/*****************************************************************
Performs an insert on a page of an index tree. It is assumed that mtr
holds an x-latch on the tree and on the cursor page. If the insert is
made on the leaf level, to avoid deadlocks, mtr must also own x-latches
to brothers of page, if those brothers exist. */
ulint
btr_cur_pessimistic_insert(
/*=======================*/
/* out: DB_SUCCESS or error number */
ulint flags, /* in: undo logging and locking flags: if not
zero, the parameter thr should be
specified; if no undo logging is specified,
then the caller must have reserved enough
free extents in the file space so that the
insertion will certainly succeed */
btr_cur_t* cursor, /* in: cursor after which to insert;
cursor stays valid */
dtuple_t* entry, /* in: entry to insert */
rec_t** rec, /* out: pointer to inserted record if
succeed */
big_rec_t** big_rec,/* out: big rec vector whose fields have to
be stored externally by the caller, or
NULL */
que_thr_t* thr, /* in: query thread or NULL */
mtr_t* mtr); /* in: mtr */
/*****************************************************************
Updates a record when the update causes no size changes in its fields. */
ulint
btr_cur_update_in_place(
/*====================*/
/* out: DB_SUCCESS or error number */
ulint flags, /* in: undo logging and locking flags */
btr_cur_t* cursor, /* in: cursor on the record to update;
cursor stays valid and positioned on the
same record */
upd_t* update, /* in: update vector */
ulint cmpl_info,/* in: compiler info on secondary index
updates */
que_thr_t* thr, /* in: query thread */
mtr_t* mtr); /* in: mtr */
/*****************************************************************
Tries to update a record on a page in an index tree. It is assumed that mtr
holds an x-latch on the page. The operation does not succeed if there is too
little space on the page or if the update would result in too empty a page,
so that tree compression is recommended. */
ulint
btr_cur_optimistic_update(
/*======================*/
/* out: DB_SUCCESS, or DB_OVERFLOW if the
updated record does not fit, DB_UNDERFLOW
if the page would become too empty */
ulint flags, /* in: undo logging and locking flags */
btr_cur_t* cursor, /* in: cursor on the record to update;
cursor stays valid and positioned on the
same record */
upd_t* update, /* in: update vector; this must also
contain trx id and roll ptr fields */
ulint cmpl_info,/* in: compiler info on secondary index
updates */
que_thr_t* thr, /* in: query thread */
mtr_t* mtr); /* in: mtr */
/*****************************************************************
Performs an update of a record on a page of a tree. It is assumed
that mtr holds an x-latch on the tree and on the cursor page. If the
update is made on the leaf level, to avoid deadlocks, mtr must also
own x-latches to brothers of page, if those brothers exist. */
ulint
btr_cur_pessimistic_update(
/*=======================*/
/* out: DB_SUCCESS or error code */
ulint flags, /* in: undo logging, locking, and rollback
flags */
btr_cur_t* cursor, /* in: cursor on the record to update */
big_rec_t** big_rec,/* out: big rec vector whose fields have to
be stored externally by the caller, or NULL */
upd_t* update, /* in: update vector; this is allowed also
contain trx id and roll ptr fields, but
the values in update vector have no effect */
ulint cmpl_info,/* in: compiler info on secondary index
updates */
que_thr_t* thr, /* in: query thread */
mtr_t* mtr); /* in: mtr */
/***************************************************************
Marks a clustered index record deleted. Writes an undo log record to
undo log on this delete marking. Writes in the trx id field the id
of the deleting transaction, and in the roll ptr field pointer to the
undo log record created. */
ulint
btr_cur_del_mark_set_clust_rec(
/*===========================*/
/* out: DB_SUCCESS, DB_LOCK_WAIT, or error
number */
ulint flags, /* in: undo logging and locking flags */
btr_cur_t* cursor, /* in: cursor */
ibool val, /* in: value to set */
que_thr_t* thr, /* in: query thread */
mtr_t* mtr); /* in: mtr */
/***************************************************************
Sets a secondary index record delete mark to TRUE or FALSE. */
ulint
btr_cur_del_mark_set_sec_rec(
/*=========================*/
/* out: DB_SUCCESS, DB_LOCK_WAIT, or error
number */
ulint flags, /* in: locking flag */
btr_cur_t* cursor, /* in: cursor */
ibool val, /* in: value to set */
que_thr_t* thr, /* in: query thread */
mtr_t* mtr); /* in: mtr */
/***************************************************************
Sets a secondary index record delete mark to FALSE. This function is
only used by the insert buffer insert merge mechanism. */
void
btr_cur_del_unmark_for_ibuf(
/*========================*/
rec_t* rec, /* in: record to delete unmark */
mtr_t* mtr); /* in: mtr */
/*****************************************************************
Tries to compress a page of the tree on the leaf level. It is assumed
that mtr holds an x-latch on the tree and on the cursor page. To avoid
deadlocks, mtr must also own x-latches to brothers of page, if those
brothers exist. NOTE: it is assumed that the caller has reserved enough
free extents so that the compression will always succeed if done! */
void
btr_cur_compress(
/*=============*/
btr_cur_t* cursor, /* in: cursor on the page to compress;
cursor does not stay valid */
mtr_t* mtr); /* in: mtr */
/*****************************************************************
Tries to compress a page of the tree if it seems useful. It is assumed
that mtr holds an x-latch on the tree and on the cursor page. To avoid
deadlocks, mtr must also own x-latches to brothers of page, if those
brothers exist. NOTE: it is assumed that the caller has reserved enough
free extents so that the compression will always succeed if done! */
ibool
btr_cur_compress_if_useful(
/*=======================*/
/* out: TRUE if compression occurred */
btr_cur_t* cursor, /* in: cursor on the page to compress;
cursor does not stay valid if compression
occurs */
mtr_t* mtr); /* in: mtr */
/***********************************************************
Removes the record on which the tree cursor is positioned. It is assumed
that the mtr has an x-latch on the page where the cursor is positioned,
but no latch on the whole tree. */
ibool
btr_cur_optimistic_delete(
/*======================*/
/* out: TRUE if success, i.e., the page
did not become too empty */
btr_cur_t* cursor, /* in: cursor on the record to delete;
cursor stays valid: if deletion succeeds,
on function exit it points to the successor
of the deleted record */
mtr_t* mtr); /* in: mtr */
/*****************************************************************
Removes the record on which the tree cursor is positioned. Tries
to compress the page if its fillfactor drops below a threshold
or if it is the only page on the level. It is assumed that mtr holds
an x-latch on the tree and on the cursor page. To avoid deadlocks,
mtr must also own x-latches to brothers of page, if those brothers
exist. */
ibool
btr_cur_pessimistic_delete(
/*=======================*/
/* out: TRUE if compression occurred */
ulint* err, /* out: DB_SUCCESS or DB_OUT_OF_FILE_SPACE;
the latter may occur because we may have
to update node pointers on upper levels,
and in the case of variable length keys
these may actually grow in size */
ibool has_reserved_extents, /* in: TRUE if the
caller has already reserved enough free
extents so that he knows that the operation
will succeed */
btr_cur_t* cursor, /* in: cursor on the record to delete;
if compression does not occur, the cursor
stays valid: it points to successor of
deleted record on function exit */
ibool in_rollback,/* in: TRUE if called in rollback */
mtr_t* mtr); /* in: mtr */
/***************************************************************
Parses a redo log record of updating a record in-place. */
byte*
btr_cur_parse_update_in_place(
/*==========================*/
/* out: end of log record or NULL */
byte* ptr, /* in: buffer */
byte* end_ptr,/* in: buffer end */
page_t* page, /* in: page or NULL */
dict_index_t* index); /* in: index corresponding to page */
/********************************************************************
Parses the redo log record for delete marking or unmarking of a clustered
index record. */
byte*
btr_cur_parse_del_mark_set_clust_rec(
/*=================================*/
/* out: end of log record or NULL */
byte* ptr, /* in: buffer */
byte* end_ptr,/* in: buffer end */
dict_index_t* index, /* in: index corresponding to page */
page_t* page); /* in: page or NULL */
/********************************************************************
Parses the redo log record for delete marking or unmarking of a secondary
index record. */
byte*
btr_cur_parse_del_mark_set_sec_rec(
/*===============================*/
/* out: end of log record or NULL */
byte* ptr, /* in: buffer */
byte* end_ptr,/* in: buffer end */
page_t* page); /* in: page or NULL */
/***********************************************************************
Estimates the number of rows in a given index range. */
ib_longlong
btr_estimate_n_rows_in_range(
/*=========================*/
/* out: estimated number of rows */
dict_index_t* index, /* in: index */
dtuple_t* tuple1, /* in: range start, may also be empty tuple */
ulint mode1, /* in: search mode for range start */
dtuple_t* tuple2, /* in: range end, may also be empty tuple */
ulint mode2); /* in: search mode for range end */
/***********************************************************************
Estimates the number of different key values in a given index, for
each n-column prefix of the index where n <= dict_index_get_n_unique(index).
The estimates are stored in the array index->stat_n_diff_key_vals. */
void
btr_estimate_number_of_different_key_vals(
/*======================================*/
dict_index_t* index); /* in: index */
/***********************************************************************
Marks not updated extern fields as not-owned by this record. The ownership
is transferred to the updated record which is inserted elsewhere in the
index tree. In purge only the owner of externally stored field is allowed
to free the field. */
void
btr_cur_mark_extern_inherited_fields(
/*=================================*/
rec_t* rec, /* in: record in a clustered index */
const ulint* offsets,/* in: array returned by rec_get_offsets() */
upd_t* update, /* in: update vector */
mtr_t* mtr); /* in: mtr */
/***********************************************************************
The complement of the previous function: in an update entry may inherit
some externally stored fields from a record. We must mark them as inherited
in entry, so that they are not freed in a rollback. */
void
btr_cur_mark_dtuple_inherited_extern(
/*=================================*/
dtuple_t* entry, /* in: updated entry to be inserted to
clustered index */
ulint* ext_vec, /* in: array of extern fields in the
original record */
ulint n_ext_vec, /* in: number of elements in ext_vec */
upd_t* update); /* in: update vector */
/***********************************************************************
Marks all extern fields in a dtuple as owned by the record. */
void
btr_cur_unmark_dtuple_extern_fields(
/*================================*/
dtuple_t* entry, /* in: clustered index entry */
ulint* ext_vec, /* in: array of numbers of fields
which have been stored externally */
ulint n_ext_vec); /* in: number of elements in ext_vec */
/***********************************************************************
Stores the fields in big_rec_vec to the tablespace and puts pointers to
them in rec. The fields are stored on pages allocated from leaf node
file segment of the index tree. */
ulint
btr_store_big_rec_extern_fields(
/*============================*/
/* out: DB_SUCCESS or error */
dict_index_t* index, /* in: index of rec; the index tree
MUST be X-latched */
rec_t* rec, /* in: record */
const ulint* offsets, /* in: rec_get_offsets(rec, index) */
big_rec_t* big_rec_vec, /* in: vector containing fields
to be stored externally */
mtr_t* local_mtr); /* in: mtr containing the latch to
rec and to the tree */
/***********************************************************************
Frees the space in an externally stored field to the file space
management if the field in data is owned the externally stored field,
in a rollback we may have the additional condition that the field must
not be inherited. */
void
btr_free_externally_stored_field(
/*=============================*/
dict_index_t* index, /* in: index of the data, the index
tree MUST be X-latched; if the tree
height is 1, then also the root page
must be X-latched! (this is relevant
in the case this function is called
from purge where 'data' is located on
an undo log page, not an index
page) */
byte* data, /* in: internally stored data
+ reference to the externally
stored part */
ulint local_len, /* in: length of data */
ibool do_not_free_inherited,/* in: TRUE if called in a
rollback and we do not want to free
inherited fields */
mtr_t* local_mtr); /* in: mtr containing the latch to
data an an X-latch to the index
tree */
/***************************************************************
Frees the externally stored fields for a record. */
void
btr_rec_free_externally_stored_fields(
/*==================================*/
dict_index_t* index, /* in: index of the data, the index
tree MUST be X-latched */
rec_t* rec, /* in: record */
const ulint* offsets,/* in: rec_get_offsets(rec, index) */
ibool do_not_free_inherited,/* in: TRUE if called in a
rollback and we do not want to free
inherited fields */
mtr_t* mtr); /* in: mini-transaction handle which contains
an X-latch to record page and to the index
tree */
/***********************************************************************
Copies an externally stored field of a record to mem heap. */
byte*
btr_rec_copy_externally_stored_field(
/*=================================*/
/* out: the field copied to heap */
rec_t* rec, /* in: record */
const ulint* offsets,/* in: array returned by rec_get_offsets() */
ulint no, /* in: field number */
ulint* len, /* out: length of the field */
mem_heap_t* heap); /* in: mem heap */
/***********************************************************************
Copies an externally stored field of a record to mem heap. Parameter
data contains a pointer to 'internally' stored part of the field:
possibly some data, and the reference to the externally stored part in
the last 20 bytes of data. */
byte*
btr_copy_externally_stored_field(
/*=============================*/
/* out: the whole field copied to heap */
ulint* len, /* out: length of the whole field */
byte* data, /* in: 'internally' stored part of the
field containing also the reference to
the external part */
ulint local_len,/* in: length of data */
mem_heap_t* heap); /* in: mem heap */
/***********************************************************************
Stores the positions of the fields marked as extern storage in the update
vector, and also those fields who are marked as extern storage in rec
and not mentioned in updated fields. We use this function to remember
which fields we must mark as extern storage in a record inserted for an
update. */
ulint
btr_push_update_extern_fields(
/*==========================*/
/* out: number of values stored in ext_vect */
ulint* ext_vect,/* in: array of ulints, must be preallocated
to have space for all fields in rec */
const ulint* offsets,/* in: array returned by rec_get_offsets() */
upd_t* update);/* in: update vector or NULL */
/*######################################################################*/
/* In the pessimistic delete, if the page data size drops below this
limit, merging it to a neighbor is tried */
#define BTR_CUR_PAGE_COMPRESS_LIMIT (UNIV_PAGE_SIZE / 2)
/* A slot in the path array. We store here info on a search path down the
tree. Each slot contains data on a single level of the tree. */
typedef struct btr_path_struct btr_path_t;
struct btr_path_struct{
ulint nth_rec; /* index of the record
where the page cursor stopped on
this level (index in alphabetical
order); value ULINT_UNDEFINED
denotes array end */
ulint n_recs; /* number of records on the page */
};
#define BTR_PATH_ARRAY_N_SLOTS 250 /* size of path array (in slots) */
/* The tree cursor: the definition appears here only for the compiler
to know struct size! */
struct btr_cur_struct {
dict_index_t* index; /* index where positioned */
page_cur_t page_cur; /* page cursor */
page_t* left_page; /* this field is used to store a pointer
to the left neighbor page, in the cases
BTR_SEARCH_PREV and BTR_MODIFY_PREV */
/*------------------------------*/
que_thr_t* thr; /* this field is only used when
btr_cur_search_... is called for an
index entry insertion: the calling
query thread is passed here to be
used in the insert buffer */
/*------------------------------*/
/* The following fields are used in btr_cur_search... to pass
information: */
ulint flag; /* BTR_CUR_HASH, BTR_CUR_HASH_FAIL,
BTR_CUR_BINARY, or
BTR_CUR_INSERT_TO_IBUF */
ulint tree_height; /* Tree height if the search is done
for a pessimistic insert or update
operation */
ulint up_match; /* If the search mode was PAGE_CUR_LE,
the number of matched fields to the
the first user record to the right of
the cursor record after
btr_cur_search_...;
for the mode PAGE_CUR_GE, the matched
fields to the first user record AT THE
CURSOR or to the right of it;
NOTE that the up_match and low_match
values may exceed the correct values
for comparison to the adjacent user
record if that record is on a
different leaf page! (See the note in
row_ins_duplicate_key.) */
ulint up_bytes; /* number of matched bytes to the
right at the time cursor positioned;
only used internally in searches: not
defined after the search */
ulint low_match; /* if search mode was PAGE_CUR_LE,
the number of matched fields to the
first user record AT THE CURSOR or
to the left of it after
btr_cur_search_...;
NOT defined for PAGE_CUR_GE or any
other search modes; see also the NOTE
in up_match! */
ulint low_bytes; /* number of matched bytes to the
right at the time cursor positioned;
only used internally in searches: not
defined after the search */
ulint n_fields; /* prefix length used in a hash
search if hash_node != NULL */
ulint n_bytes; /* hash prefix bytes if hash_node !=
NULL */
ulint fold; /* fold value used in the search if
flag is BTR_CUR_HASH */
/*------------------------------*/
btr_path_t* path_arr; /* in estimating the number of
rows in range, we store in this array
information of the path through
the tree */
};
/* Values for the flag documenting the used search method */
#define BTR_CUR_HASH 1 /* successful shortcut using the hash
index */
#define BTR_CUR_HASH_FAIL 2 /* failure using hash, success using
binary search: the misleading hash
reference is stored in the field
hash_node, and might be necessary to
update */
#define BTR_CUR_BINARY 3 /* success using the binary search */
#define BTR_CUR_INSERT_TO_IBUF 4 /* performed the intended insert to
the insert buffer */
/* If pessimistic delete fails because of lack of file space,
there is still a good change of success a little later: try this many times,
and sleep this many microseconds in between */
#define BTR_CUR_RETRY_DELETE_N_TIMES 100
#define BTR_CUR_RETRY_SLEEP_TIME 50000
/* The reference in a field for which data is stored on a different page.
The reference is at the end of the 'locally' stored part of the field.
'Locally' means storage in the index record.
We store locally a long enough prefix of each column so that we can determine
the ordering parts of each index record without looking into the externally
stored part. */
/*--------------------------------------*/
#define BTR_EXTERN_SPACE_ID 0 /* space id where stored */
#define BTR_EXTERN_PAGE_NO 4 /* page no where stored */
#define BTR_EXTERN_OFFSET 8 /* offset of BLOB header
on that page */
#define BTR_EXTERN_LEN 12 /* 8 bytes containing the
length of the externally
stored part of the BLOB.
The 2 highest bits are
reserved to the flags below. */
/*--------------------------------------*/
#define BTR_EXTERN_FIELD_REF_SIZE 20
/* The highest bit of BTR_EXTERN_LEN (i.e., the highest bit of the byte
at lowest address) is set to 1 if this field does not 'own' the externally
stored field; only the owner field is allowed to free the field in purge!
If the 2nd highest bit is 1 then it means that the externally stored field
was inherited from an earlier version of the row. In rollback we are not
allowed to free an inherited external field. */
#define BTR_EXTERN_OWNER_FLAG 128
#define BTR_EXTERN_INHERITED_FLAG 64
extern ulint btr_cur_n_non_sea;
extern ulint btr_cur_n_sea;
extern ulint btr_cur_n_non_sea_old;
extern ulint btr_cur_n_sea_old;
#ifndef UNIV_NONINL
#include "btr0cur.ic"
#endif
#endif

172
include/btr0cur.ic Normal file
View file

@ -0,0 +1,172 @@
/******************************************************
The index tree cursor
(c) 1994-1996 Innobase Oy
Created 10/16/1994 Heikki Tuuri
*******************************************************/
#include "btr0btr.h"
/*************************************************************
Returns the page cursor component of a tree cursor. */
UNIV_INLINE
page_cur_t*
btr_cur_get_page_cur(
/*=================*/
/* out: pointer to page cursor component */
btr_cur_t* cursor) /* in: tree cursor */
{
return(&(cursor->page_cur));
}
/*************************************************************
Returns the record pointer of a tree cursor. */
UNIV_INLINE
rec_t*
btr_cur_get_rec(
/*============*/
/* out: pointer to record */
btr_cur_t* cursor) /* in: tree cursor */
{
return(page_cur_get_rec(&(cursor->page_cur)));
}
/*************************************************************
Invalidates a tree cursor by setting record pointer to NULL. */
UNIV_INLINE
void
btr_cur_invalidate(
/*===============*/
btr_cur_t* cursor) /* in: tree cursor */
{
page_cur_invalidate(&(cursor->page_cur));
}
/*************************************************************
Returns the page of a tree cursor. */
UNIV_INLINE
page_t*
btr_cur_get_page(
/*=============*/
/* out: pointer to page */
btr_cur_t* cursor) /* in: tree cursor */
{
page_t* page = buf_frame_align(page_cur_get_rec(&(cursor->page_cur)));
ut_ad(!!page_is_comp(page) == cursor->index->table->comp);
return(page);
}
/*************************************************************
Returns the tree of a cursor. */
UNIV_INLINE
dict_tree_t*
btr_cur_get_tree(
/*=============*/
/* out: tree */
btr_cur_t* cursor) /* in: tree cursor */
{
return((cursor->index)->tree);
}
/*************************************************************
Positions a tree cursor at a given record. */
UNIV_INLINE
void
btr_cur_position(
/*=============*/
dict_index_t* index, /* in: index */
rec_t* rec, /* in: record in tree */
btr_cur_t* cursor) /* in: cursor */
{
page_cur_position(rec, btr_cur_get_page_cur(cursor));
cursor->index = index;
}
/*************************************************************************
Checks if compressing an index page where a btr cursor is placed makes
sense. */
UNIV_INLINE
ibool
btr_cur_compress_recommendation(
/*============================*/
/* out: TRUE if compression is recommended */
btr_cur_t* cursor, /* in: btr cursor */
mtr_t* mtr) /* in: mtr */
{
page_t* page;
ut_ad(mtr_memo_contains(mtr, buf_block_align(
btr_cur_get_page(cursor)),
MTR_MEMO_PAGE_X_FIX));
page = btr_cur_get_page(cursor);
if ((page_get_data_size(page) < BTR_CUR_PAGE_COMPRESS_LIMIT)
|| ((btr_page_get_next(page, mtr) == FIL_NULL)
&& (btr_page_get_prev(page, mtr) == FIL_NULL))) {
/* The page fillfactor has dropped below a predefined
minimum value OR the level in the B-tree contains just
one page: we recommend compression if this is not the
root page. */
if (dict_tree_get_page((cursor->index)->tree)
== buf_frame_get_page_no(page)) {
/* It is the root page */
return(FALSE);
}
return(TRUE);
}
return(FALSE);
}
/*************************************************************************
Checks if the record on which the cursor is placed can be deleted without
making tree compression necessary (or, recommended). */
UNIV_INLINE
ibool
btr_cur_can_delete_without_compress(
/*================================*/
/* out: TRUE if can be deleted without
recommended compression */
btr_cur_t* cursor, /* in: btr cursor */
ulint rec_size,/* in: rec_get_size(btr_cur_get_rec(cursor))*/
mtr_t* mtr) /* in: mtr */
{
page_t* page;
ut_ad(mtr_memo_contains(mtr, buf_block_align(
btr_cur_get_page(cursor)),
MTR_MEMO_PAGE_X_FIX));
page = btr_cur_get_page(cursor);
if ((page_get_data_size(page) - rec_size < BTR_CUR_PAGE_COMPRESS_LIMIT)
|| ((btr_page_get_next(page, mtr) == FIL_NULL)
&& (btr_page_get_prev(page, mtr) == FIL_NULL))
|| (page_get_n_recs(page) < 2)) {
/* The page fillfactor will drop below a predefined
minimum value, OR the level in the B-tree contains just
one page, OR the page will become empty: we recommend
compression if this is not the root page. */
if (dict_tree_get_page((cursor->index)->tree)
== buf_frame_get_page_no(page)) {
/* It is the root page */
return(TRUE);
}
return(FALSE);
}
return(TRUE);
}

516
include/btr0pcur.h Normal file
View file

@ -0,0 +1,516 @@
/******************************************************
The index tree persistent cursor
(c) 1996 Innobase Oy
Created 2/23/1996 Heikki Tuuri
*******************************************************/
#ifndef btr0pcur_h
#define btr0pcur_h
#include "univ.i"
#include "dict0dict.h"
#include "data0data.h"
#include "mtr0mtr.h"
#include "page0cur.h"
#include "btr0cur.h"
#include "btr0btr.h"
#include "btr0types.h"
/* Relative positions for a stored cursor position */
#define BTR_PCUR_ON 1
#define BTR_PCUR_BEFORE 2
#define BTR_PCUR_AFTER 3
/* Note that if the tree is not empty, btr_pcur_store_position does not
use the following, but only uses the above three alternatives, where the
position is stored relative to a specific record: this makes implementation
of a scroll cursor easier */
#define BTR_PCUR_BEFORE_FIRST_IN_TREE 4 /* in an empty tree */
#define BTR_PCUR_AFTER_LAST_IN_TREE 5 /* in an empty tree */
/******************************************************************
Allocates memory for a persistent cursor object and initializes the cursor. */
btr_pcur_t*
btr_pcur_create_for_mysql(void);
/*============================*/
/* out, own: persistent cursor */
/******************************************************************
Frees the memory for a persistent cursor object. */
void
btr_pcur_free_for_mysql(
/*====================*/
btr_pcur_t* cursor); /* in, own: persistent cursor */
/******************************************************************
Copies the stored position of a pcur to another pcur. */
void
btr_pcur_copy_stored_position(
/*==========================*/
btr_pcur_t* pcur_receive, /* in: pcur which will receive the
position info */
btr_pcur_t* pcur_donate); /* in: pcur from which the info is
copied */
/******************************************************************
Sets the old_rec_buf field to NULL. */
UNIV_INLINE
void
btr_pcur_init(
/*==========*/
btr_pcur_t* pcur); /* in: persistent cursor */
/******************************************************************
Initializes and opens a persistent cursor to an index tree. It should be
closed with btr_pcur_close. */
UNIV_INLINE
void
btr_pcur_open(
/*==========*/
dict_index_t* index, /* in: index */
dtuple_t* tuple, /* in: tuple on which search done */
ulint mode, /* in: PAGE_CUR_L, ...;
NOTE that if the search is made using a unique
prefix of a record, mode should be
PAGE_CUR_LE, not PAGE_CUR_GE, as the latter
may end up on the previous page from the
record! */
ulint latch_mode,/* in: BTR_SEARCH_LEAF, ... */
btr_pcur_t* cursor, /* in: memory buffer for persistent cursor */
mtr_t* mtr); /* in: mtr */
/******************************************************************
Opens an persistent cursor to an index tree without initializing the
cursor. */
UNIV_INLINE
void
btr_pcur_open_with_no_init(
/*=======================*/
dict_index_t* index, /* in: index */
dtuple_t* tuple, /* in: tuple on which search done */
ulint mode, /* in: PAGE_CUR_L, ...;
NOTE that if the search is made using a unique
prefix of a record, mode should be
PAGE_CUR_LE, not PAGE_CUR_GE, as the latter
may end up on the previous page of the
record! */
ulint latch_mode,/* in: BTR_SEARCH_LEAF, ...;
NOTE that if has_search_latch != 0 then
we maybe do not acquire a latch on the cursor
page, but assume that the caller uses his
btr search latch to protect the record! */
btr_pcur_t* cursor, /* in: memory buffer for persistent cursor */
ulint has_search_latch,/* in: latch mode the caller
currently has on btr_search_latch:
RW_S_LATCH, or 0 */
mtr_t* mtr); /* in: mtr */
/*********************************************************************
Opens a persistent cursor at either end of an index. */
UNIV_INLINE
void
btr_pcur_open_at_index_side(
/*========================*/
ibool from_left, /* in: TRUE if open to the low end,
FALSE if to the high end */
dict_index_t* index, /* in: index */
ulint latch_mode, /* in: latch mode */
btr_pcur_t* pcur, /* in: cursor */
ibool do_init, /* in: TRUE if should be initialized */
mtr_t* mtr); /* in: mtr */
/******************************************************************
Gets the up_match value for a pcur after a search. */
UNIV_INLINE
ulint
btr_pcur_get_up_match(
/*==================*/
/* out: number of matched fields at the cursor
or to the right if search mode was PAGE_CUR_GE,
otherwise undefined */
btr_pcur_t* cursor); /* in: memory buffer for persistent cursor */
/******************************************************************
Gets the low_match value for a pcur after a search. */
UNIV_INLINE
ulint
btr_pcur_get_low_match(
/*===================*/
/* out: number of matched fields at the cursor
or to the right if search mode was PAGE_CUR_LE,
otherwise undefined */
btr_pcur_t* cursor); /* in: memory buffer for persistent cursor */
/******************************************************************
If mode is PAGE_CUR_G or PAGE_CUR_GE, opens a persistent cursor on the first
user record satisfying the search condition, in the case PAGE_CUR_L or
PAGE_CUR_LE, on the last user record. If no such user record exists, then
in the first case sets the cursor after last in tree, and in the latter case
before first in tree. The latching mode must be BTR_SEARCH_LEAF or
BTR_MODIFY_LEAF. */
void
btr_pcur_open_on_user_rec(
/*======================*/
dict_index_t* index, /* in: index */
dtuple_t* tuple, /* in: tuple on which search done */
ulint mode, /* in: PAGE_CUR_L, ... */
ulint latch_mode, /* in: BTR_SEARCH_LEAF or
BTR_MODIFY_LEAF */
btr_pcur_t* cursor, /* in: memory buffer for persistent
cursor */
mtr_t* mtr); /* in: mtr */
/**************************************************************************
Positions a cursor at a randomly chosen position within a B-tree. */
UNIV_INLINE
void
btr_pcur_open_at_rnd_pos(
/*=====================*/
dict_index_t* index, /* in: index */
ulint latch_mode, /* in: BTR_SEARCH_LEAF, ... */
btr_pcur_t* cursor, /* in/out: B-tree pcur */
mtr_t* mtr); /* in: mtr */
/******************************************************************
Frees the possible old_rec_buf buffer of a persistent cursor and sets the
latch mode of the persistent cursor to BTR_NO_LATCHES. */
UNIV_INLINE
void
btr_pcur_close(
/*===========*/
btr_pcur_t* cursor); /* in: persistent cursor */
/******************************************************************
The position of the cursor is stored by taking an initial segment of the
record the cursor is positioned on, before, or after, and copying it to the
cursor data structure, or just setting a flag if the cursor id before the
first in an EMPTY tree, or after the last in an EMPTY tree. NOTE that the
page where the cursor is positioned must not be empty if the index tree is
not totally empty! */
void
btr_pcur_store_position(
/*====================*/
btr_pcur_t* cursor, /* in: persistent cursor */
mtr_t* mtr); /* in: mtr */
/******************************************************************
Restores the stored position of a persistent cursor bufferfixing the page and
obtaining the specified latches. If the cursor position was saved when the
(1) cursor was positioned on a user record: this function restores the position
to the last record LESS OR EQUAL to the stored record;
(2) cursor was positioned on a page infimum record: restores the position to
the last record LESS than the user record which was the successor of the page
infimum;
(3) cursor was positioned on the page supremum: restores to the first record
GREATER than the user record which was the predecessor of the supremum.
(4) cursor was positioned before the first or after the last in an empty tree:
restores to before first or after the last in the tree. */
ibool
btr_pcur_restore_position(
/*======================*/
/* out: TRUE if the cursor position
was stored when it was on a user record
and it can be restored on a user record
whose ordering fields are identical to
the ones of the original user record */
ulint latch_mode, /* in: BTR_SEARCH_LEAF, ... */
btr_pcur_t* cursor, /* in: detached persistent cursor */
mtr_t* mtr); /* in: mtr */
/******************************************************************
If the latch mode of the cursor is BTR_LEAF_SEARCH or BTR_LEAF_MODIFY,
releases the page latch and bufferfix reserved by the cursor.
NOTE! In the case of BTR_LEAF_MODIFY, there should not exist changes
made by the current mini-transaction to the data protected by the
cursor latch, as then the latch must not be released until mtr_commit. */
void
btr_pcur_release_leaf(
/*==================*/
btr_pcur_t* cursor, /* in: persistent cursor */
mtr_t* mtr); /* in: mtr */
/*************************************************************
Gets the rel_pos field for a cursor whose position has been stored. */
UNIV_INLINE
ulint
btr_pcur_get_rel_pos(
/*=================*/
/* out: BTR_PCUR_ON, ... */
btr_pcur_t* cursor);/* in: persistent cursor */
/*************************************************************
Sets the mtr field for a pcur. */
UNIV_INLINE
void
btr_pcur_set_mtr(
/*=============*/
btr_pcur_t* cursor, /* in: persistent cursor */
mtr_t* mtr); /* in, own: mtr */
/*************************************************************
Gets the mtr field for a pcur. */
UNIV_INLINE
mtr_t*
btr_pcur_get_mtr(
/*=============*/
/* out: mtr */
btr_pcur_t* cursor); /* in: persistent cursor */
/******************************************************************
Commits the pcur mtr and sets the pcur latch mode to BTR_NO_LATCHES,
that is, the cursor becomes detached. If there have been modifications
to the page where pcur is positioned, this can be used instead of
btr_pcur_release_leaf. Function btr_pcur_store_position should be used
before calling this, if restoration of cursor is wanted later. */
UNIV_INLINE
void
btr_pcur_commit(
/*============*/
btr_pcur_t* pcur); /* in: persistent cursor */
/******************************************************************
Differs from btr_pcur_commit in that we can specify the mtr to commit. */
UNIV_INLINE
void
btr_pcur_commit_specify_mtr(
/*========================*/
btr_pcur_t* pcur, /* in: persistent cursor */
mtr_t* mtr); /* in: mtr to commit */
/******************************************************************
Tests if a cursor is detached: that is the latch mode is BTR_NO_LATCHES. */
UNIV_INLINE
ibool
btr_pcur_is_detached(
/*=================*/
/* out: TRUE if detached */
btr_pcur_t* pcur); /* in: persistent cursor */
/*************************************************************
Moves the persistent cursor to the next record in the tree. If no records are
left, the cursor stays 'after last in tree'. */
UNIV_INLINE
ibool
btr_pcur_move_to_next(
/*==================*/
/* out: TRUE if the cursor was not after last
in tree */
btr_pcur_t* cursor, /* in: persistent cursor; NOTE that the
function may release the page latch */
mtr_t* mtr); /* in: mtr */
/*************************************************************
Moves the persistent cursor to the previous record in the tree. If no records
are left, the cursor stays 'before first in tree'. */
ibool
btr_pcur_move_to_prev(
/*==================*/
/* out: TRUE if the cursor was not before first
in tree */
btr_pcur_t* cursor, /* in: persistent cursor; NOTE that the
function may release the page latch */
mtr_t* mtr); /* in: mtr */
/*************************************************************
Moves the persistent cursor to the last record on the same page. */
UNIV_INLINE
void
btr_pcur_move_to_last_on_page(
/*==========================*/
btr_pcur_t* cursor, /* in: persistent cursor */
mtr_t* mtr); /* in: mtr */
/*************************************************************
Moves the persistent cursor to the next user record in the tree. If no user
records are left, the cursor ends up 'after last in tree'. */
UNIV_INLINE
ibool
btr_pcur_move_to_next_user_rec(
/*===========================*/
/* out: TRUE if the cursor moved forward,
ending on a user record */
btr_pcur_t* cursor, /* in: persistent cursor; NOTE that the
function may release the page latch */
mtr_t* mtr); /* in: mtr */
/*************************************************************
Moves the persistent cursor to the first record on the next page.
Releases the latch on the current page, and bufferunfixes it.
Note that there must not be modifications on the current page,
as then the x-latch can be released only in mtr_commit. */
void
btr_pcur_move_to_next_page(
/*=======================*/
btr_pcur_t* cursor, /* in: persistent cursor; must be on the
last record of the current page */
mtr_t* mtr); /* in: mtr */
/*************************************************************
Moves the persistent cursor backward if it is on the first record
of the page. Releases the latch on the current page, and bufferunfixes
it. Note that to prevent a possible deadlock, the operation first
stores the position of the cursor, releases the leaf latch, acquires
necessary latches and restores the cursor position again before returning.
The alphabetical position of the cursor is guaranteed to be sensible
on return, but it may happen that the cursor is not positioned on the
last record of any page, because the structure of the tree may have
changed while the cursor had no latches. */
void
btr_pcur_move_backward_from_page(
/*=============================*/
btr_pcur_t* cursor, /* in: persistent cursor, must be on the
first record of the current page */
mtr_t* mtr); /* in: mtr */
/*************************************************************
Returns the btr cursor component of a persistent cursor. */
UNIV_INLINE
btr_cur_t*
btr_pcur_get_btr_cur(
/*=================*/
/* out: pointer to btr cursor component */
btr_pcur_t* cursor); /* in: persistent cursor */
/*************************************************************
Returns the page cursor component of a persistent cursor. */
UNIV_INLINE
page_cur_t*
btr_pcur_get_page_cur(
/*==================*/
/* out: pointer to page cursor component */
btr_pcur_t* cursor); /* in: persistent cursor */
/*************************************************************
Returns the page of a persistent cursor. */
UNIV_INLINE
page_t*
btr_pcur_get_page(
/*==============*/
/* out: pointer to the page */
btr_pcur_t* cursor);/* in: persistent cursor */
/*************************************************************
Returns the record of a persistent cursor. */
UNIV_INLINE
rec_t*
btr_pcur_get_rec(
/*=============*/
/* out: pointer to the record */
btr_pcur_t* cursor);/* in: persistent cursor */
/*************************************************************
Checks if the persistent cursor is on a user record. */
UNIV_INLINE
ibool
btr_pcur_is_on_user_rec(
/*====================*/
btr_pcur_t* cursor, /* in: persistent cursor */
mtr_t* mtr); /* in: mtr */
/*************************************************************
Checks if the persistent cursor is after the last user record on
a page. */
UNIV_INLINE
ibool
btr_pcur_is_after_last_on_page(
/*===========================*/
btr_pcur_t* cursor, /* in: persistent cursor */
mtr_t* mtr); /* in: mtr */
/*************************************************************
Checks if the persistent cursor is before the first user record on
a page. */
UNIV_INLINE
ibool
btr_pcur_is_before_first_on_page(
/*=============================*/
btr_pcur_t* cursor, /* in: persistent cursor */
mtr_t* mtr); /* in: mtr */
/*************************************************************
Checks if the persistent cursor is before the first user record in
the index tree. */
UNIV_INLINE
ibool
btr_pcur_is_before_first_in_tree(
/*=============================*/
btr_pcur_t* cursor, /* in: persistent cursor */
mtr_t* mtr); /* in: mtr */
/*************************************************************
Checks if the persistent cursor is after the last user record in
the index tree. */
UNIV_INLINE
ibool
btr_pcur_is_after_last_in_tree(
/*===========================*/
btr_pcur_t* cursor, /* in: persistent cursor */
mtr_t* mtr); /* in: mtr */
/*************************************************************
Moves the persistent cursor to the next record on the same page. */
UNIV_INLINE
void
btr_pcur_move_to_next_on_page(
/*==========================*/
btr_pcur_t* cursor, /* in: persistent cursor */
mtr_t* mtr); /* in: mtr */
/*************************************************************
Moves the persistent cursor to the previous record on the same page. */
UNIV_INLINE
void
btr_pcur_move_to_prev_on_page(
/*==========================*/
btr_pcur_t* cursor, /* in: persistent cursor */
mtr_t* mtr); /* in: mtr */
/* The persistent B-tree cursor structure. This is used mainly for SQL
selects, updates, and deletes. */
struct btr_pcur_struct{
btr_cur_t btr_cur; /* a B-tree cursor */
ulint latch_mode; /* see FIXME note below!
BTR_SEARCH_LEAF, BTR_MODIFY_LEAF,
BTR_MODIFY_TREE, or BTR_NO_LATCHES,
depending on the latching state of
the page and tree where the cursor is
positioned; the last value means that
the cursor is not currently positioned:
we say then that the cursor is
detached; it can be restored to
attached if the old position was
stored in old_rec */
ulint old_stored; /* BTR_PCUR_OLD_STORED
or BTR_PCUR_OLD_NOT_STORED */
rec_t* old_rec; /* if cursor position is stored,
contains an initial segment of the
latest record cursor was positioned
either on, before, or after */
ulint old_n_fields; /* number of fields in old_rec */
ulint rel_pos; /* BTR_PCUR_ON, BTR_PCUR_BEFORE, or
BTR_PCUR_AFTER, depending on whether
cursor was on, before, or after the
old_rec record */
buf_block_t* block_when_stored;/* buffer block when the position was
stored; note that if AWE is on, frames
may move */
dulint modify_clock; /* the modify clock value of the
buffer block when the cursor position
was stored */
ulint pos_state; /* see FIXME note below!
BTR_PCUR_IS_POSITIONED,
BTR_PCUR_WAS_POSITIONED,
BTR_PCUR_NOT_POSITIONED */
ulint search_mode; /* PAGE_CUR_G, ... */
trx_t* trx_if_known; /* the transaction, if we know it;
otherwise this field is not defined;
can ONLY BE USED in error prints in
fatal assertion failures! */
/*-----------------------------*/
/* NOTE that the following fields may possess dynamically allocated
memory which should be freed if not needed anymore! */
mtr_t* mtr; /* NULL, or this field may contain
a mini-transaction which holds the
latch on the cursor page */
byte* old_rec_buf; /* NULL, or a dynamically allocated
buffer for old_rec */
ulint buf_size; /* old_rec_buf size if old_rec_buf
is not NULL */
};
#define BTR_PCUR_IS_POSITIONED 1997660512 /* FIXME: currently, the state
can be BTR_PCUR_IS_POSITIONED,
though it really should be
BTR_PCUR_WAS_POSITIONED,
because we have no obligation
to commit the cursor with
mtr; similarly latch_mode may
be out of date */
#define BTR_PCUR_WAS_POSITIONED 1187549791
#define BTR_PCUR_NOT_POSITIONED 1328997689
#define BTR_PCUR_OLD_STORED 908467085
#define BTR_PCUR_OLD_NOT_STORED 122766467
#ifndef UNIV_NONINL
#include "btr0pcur.ic"
#endif
#endif

630
include/btr0pcur.ic Normal file
View file

@ -0,0 +1,630 @@
/******************************************************
The index tree persistent cursor
(c) 1996 Innobase Oy
Created 2/23/1996 Heikki Tuuri
*******************************************************/
/*************************************************************
Gets the rel_pos field for a cursor whose position has been stored. */
UNIV_INLINE
ulint
btr_pcur_get_rel_pos(
/*=================*/
/* out: BTR_PCUR_ON, ... */
btr_pcur_t* cursor) /* in: persistent cursor */
{
ut_ad(cursor);
ut_ad(cursor->old_rec);
ut_ad(cursor->old_stored == BTR_PCUR_OLD_STORED);
ut_ad(cursor->pos_state == BTR_PCUR_WAS_POSITIONED
|| cursor->pos_state == BTR_PCUR_IS_POSITIONED);
return(cursor->rel_pos);
}
/*************************************************************
Sets the mtr field for a pcur. */
UNIV_INLINE
void
btr_pcur_set_mtr(
/*=============*/
btr_pcur_t* cursor, /* in: persistent cursor */
mtr_t* mtr) /* in, own: mtr */
{
ut_ad(cursor);
cursor->mtr = mtr;
}
/*************************************************************
Gets the mtr field for a pcur. */
UNIV_INLINE
mtr_t*
btr_pcur_get_mtr(
/*=============*/
/* out: mtr */
btr_pcur_t* cursor) /* in: persistent cursor */
{
ut_ad(cursor);
return(cursor->mtr);
}
/*************************************************************
Returns the btr cursor component of a persistent cursor. */
UNIV_INLINE
btr_cur_t*
btr_pcur_get_btr_cur(
/*=================*/
/* out: pointer to btr cursor component */
btr_pcur_t* cursor) /* in: persistent cursor */
{
return(&(cursor->btr_cur));
}
/*************************************************************
Returns the page cursor component of a persistent cursor. */
UNIV_INLINE
page_cur_t*
btr_pcur_get_page_cur(
/*==================*/
/* out: pointer to page cursor component */
btr_pcur_t* cursor) /* in: persistent cursor */
{
return(btr_cur_get_page_cur(&(cursor->btr_cur)));
}
/*************************************************************
Returns the page of a persistent cursor. */
UNIV_INLINE
page_t*
btr_pcur_get_page(
/*==============*/
/* out: pointer to the page */
btr_pcur_t* cursor) /* in: persistent cursor */
{
ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
return(page_cur_get_page(btr_pcur_get_page_cur(cursor)));
}
/*************************************************************
Returns the record of a persistent cursor. */
UNIV_INLINE
rec_t*
btr_pcur_get_rec(
/*=============*/
/* out: pointer to the record */
btr_pcur_t* cursor) /* in: persistent cursor */
{
ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
return(page_cur_get_rec(btr_pcur_get_page_cur(cursor)));
}
/******************************************************************
Gets the up_match value for a pcur after a search. */
UNIV_INLINE
ulint
btr_pcur_get_up_match(
/*==================*/
/* out: number of matched fields at the cursor
or to the right if search mode was PAGE_CUR_GE,
otherwise undefined */
btr_pcur_t* cursor) /* in: memory buffer for persistent cursor */
{
btr_cur_t* btr_cursor;
ut_ad((cursor->pos_state == BTR_PCUR_WAS_POSITIONED)
|| (cursor->pos_state == BTR_PCUR_IS_POSITIONED));
btr_cursor = btr_pcur_get_btr_cur(cursor);
ut_ad(btr_cursor->up_match != ULINT_UNDEFINED);
return(btr_cursor->up_match);
}
/******************************************************************
Gets the low_match value for a pcur after a search. */
UNIV_INLINE
ulint
btr_pcur_get_low_match(
/*===================*/
/* out: number of matched fields at the cursor
or to the right if search mode was PAGE_CUR_LE,
otherwise undefined */
btr_pcur_t* cursor) /* in: memory buffer for persistent cursor */
{
btr_cur_t* btr_cursor;
ut_ad((cursor->pos_state == BTR_PCUR_WAS_POSITIONED)
|| (cursor->pos_state == BTR_PCUR_IS_POSITIONED));
btr_cursor = btr_pcur_get_btr_cur(cursor);
ut_ad(btr_cursor->low_match != ULINT_UNDEFINED);
return(btr_cursor->low_match);
}
/*************************************************************
Checks if the persistent cursor is after the last user record on
a page. */
UNIV_INLINE
ibool
btr_pcur_is_after_last_on_page(
/*===========================*/
btr_pcur_t* cursor, /* in: persistent cursor */
mtr_t* mtr) /* in: mtr */
{
UT_NOT_USED(mtr);
ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
return(page_cur_is_after_last(btr_pcur_get_page_cur(cursor)));
}
/*************************************************************
Checks if the persistent cursor is before the first user record on
a page. */
UNIV_INLINE
ibool
btr_pcur_is_before_first_on_page(
/*=============================*/
btr_pcur_t* cursor, /* in: persistent cursor */
mtr_t* mtr) /* in: mtr */
{
UT_NOT_USED(mtr);
ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
return(page_cur_is_before_first(btr_pcur_get_page_cur(cursor)));
}
/*************************************************************
Checks if the persistent cursor is on a user record. */
UNIV_INLINE
ibool
btr_pcur_is_on_user_rec(
/*====================*/
btr_pcur_t* cursor, /* in: persistent cursor */
mtr_t* mtr) /* in: mtr */
{
ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
if ((btr_pcur_is_before_first_on_page(cursor, mtr))
|| (btr_pcur_is_after_last_on_page(cursor, mtr))) {
return(FALSE);
}
return(TRUE);
}
/*************************************************************
Checks if the persistent cursor is before the first user record in
the index tree. */
UNIV_INLINE
ibool
btr_pcur_is_before_first_in_tree(
/*=============================*/
btr_pcur_t* cursor, /* in: persistent cursor */
mtr_t* mtr) /* in: mtr */
{
ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
if (btr_page_get_prev(btr_pcur_get_page(cursor), mtr) != FIL_NULL) {
return(FALSE);
}
return(page_cur_is_before_first(btr_pcur_get_page_cur(cursor)));
}
/*************************************************************
Checks if the persistent cursor is after the last user record in
the index tree. */
UNIV_INLINE
ibool
btr_pcur_is_after_last_in_tree(
/*===========================*/
btr_pcur_t* cursor, /* in: persistent cursor */
mtr_t* mtr) /* in: mtr */
{
ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
if (btr_page_get_next(btr_pcur_get_page(cursor), mtr) != FIL_NULL) {
return(FALSE);
}
return(page_cur_is_after_last(btr_pcur_get_page_cur(cursor)));
}
/*************************************************************
Moves the persistent cursor to the next record on the same page. */
UNIV_INLINE
void
btr_pcur_move_to_next_on_page(
/*==========================*/
btr_pcur_t* cursor, /* in: persistent cursor */
mtr_t* mtr) /* in: mtr */
{
UT_NOT_USED(mtr);
ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
page_cur_move_to_next(btr_pcur_get_page_cur(cursor));
cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
}
/*************************************************************
Moves the persistent cursor to the previous record on the same page. */
UNIV_INLINE
void
btr_pcur_move_to_prev_on_page(
/*==========================*/
btr_pcur_t* cursor, /* in: persistent cursor */
mtr_t* mtr) /* in: mtr */
{
UT_NOT_USED(mtr);
ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
page_cur_move_to_prev(btr_pcur_get_page_cur(cursor));
cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
}
/*************************************************************
Moves the persistent cursor to the last record on the same page. */
UNIV_INLINE
void
btr_pcur_move_to_last_on_page(
/*==========================*/
btr_pcur_t* cursor, /* in: persistent cursor */
mtr_t* mtr) /* in: mtr */
{
UT_NOT_USED(mtr);
ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
page_cur_set_after_last(buf_frame_align(btr_pcur_get_rec(cursor)),
btr_pcur_get_page_cur(cursor));
cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
}
/*************************************************************
Moves the persistent cursor to the next user record in the tree. If no user
records are left, the cursor ends up 'after last in tree'. */
UNIV_INLINE
ibool
btr_pcur_move_to_next_user_rec(
/*===========================*/
/* out: TRUE if the cursor moved forward,
ending on a user record */
btr_pcur_t* cursor, /* in: persistent cursor; NOTE that the
function may release the page latch */
mtr_t* mtr) /* in: mtr */
{
ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
loop:
if (btr_pcur_is_after_last_on_page(cursor, mtr)) {
if (btr_pcur_is_after_last_in_tree(cursor, mtr)) {
return(FALSE);
}
btr_pcur_move_to_next_page(cursor, mtr);
} else {
btr_pcur_move_to_next_on_page(cursor, mtr);
}
if (btr_pcur_is_on_user_rec(cursor, mtr)) {
return(TRUE);
}
goto loop;
}
/*************************************************************
Moves the persistent cursor to the next record in the tree. If no records are
left, the cursor stays 'after last in tree'. */
UNIV_INLINE
ibool
btr_pcur_move_to_next(
/*==================*/
/* out: TRUE if the cursor was not after last
in tree */
btr_pcur_t* cursor, /* in: persistent cursor; NOTE that the
function may release the page latch */
mtr_t* mtr) /* in: mtr */
{
ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
if (btr_pcur_is_after_last_on_page(cursor, mtr)) {
if (btr_pcur_is_after_last_in_tree(cursor, mtr)) {
return(FALSE);
}
btr_pcur_move_to_next_page(cursor, mtr);
return(TRUE);
}
btr_pcur_move_to_next_on_page(cursor, mtr);
return(TRUE);
}
/******************************************************************
Commits the pcur mtr and sets the pcur latch mode to BTR_NO_LATCHES,
that is, the cursor becomes detached. If there have been modifications
to the page where pcur is positioned, this can be used instead of
btr_pcur_release_leaf. Function btr_pcur_store_position should be used
before calling this, if restoration of cursor is wanted later. */
UNIV_INLINE
void
btr_pcur_commit(
/*============*/
btr_pcur_t* pcur) /* in: persistent cursor */
{
ut_a(pcur->pos_state == BTR_PCUR_IS_POSITIONED);
pcur->latch_mode = BTR_NO_LATCHES;
mtr_commit(pcur->mtr);
pcur->pos_state = BTR_PCUR_WAS_POSITIONED;
}
/******************************************************************
Differs from btr_pcur_commit in that we can specify the mtr to commit. */
UNIV_INLINE
void
btr_pcur_commit_specify_mtr(
/*========================*/
btr_pcur_t* pcur, /* in: persistent cursor */
mtr_t* mtr) /* in: mtr to commit */
{
ut_a(pcur->pos_state == BTR_PCUR_IS_POSITIONED);
pcur->latch_mode = BTR_NO_LATCHES;
mtr_commit(mtr);
pcur->pos_state = BTR_PCUR_WAS_POSITIONED;
}
/******************************************************************
Sets the pcur latch mode to BTR_NO_LATCHES. */
UNIV_INLINE
void
btr_pcur_detach(
/*============*/
btr_pcur_t* pcur) /* in: persistent cursor */
{
ut_a(pcur->pos_state == BTR_PCUR_IS_POSITIONED);
pcur->latch_mode = BTR_NO_LATCHES;
pcur->pos_state = BTR_PCUR_WAS_POSITIONED;
}
/******************************************************************
Tests if a cursor is detached: that is the latch mode is BTR_NO_LATCHES. */
UNIV_INLINE
ibool
btr_pcur_is_detached(
/*=================*/
/* out: TRUE if detached */
btr_pcur_t* pcur) /* in: persistent cursor */
{
if (pcur->latch_mode == BTR_NO_LATCHES) {
return(TRUE);
}
return(FALSE);
}
/******************************************************************
Sets the old_rec_buf field to NULL. */
UNIV_INLINE
void
btr_pcur_init(
/*==========*/
btr_pcur_t* pcur) /* in: persistent cursor */
{
pcur->old_stored = BTR_PCUR_OLD_NOT_STORED;
pcur->old_rec_buf = NULL;
pcur->old_rec = NULL;
}
/******************************************************************
Initializes and opens a persistent cursor to an index tree. It should be
closed with btr_pcur_close. */
UNIV_INLINE
void
btr_pcur_open(
/*==========*/
dict_index_t* index, /* in: index */
dtuple_t* tuple, /* in: tuple on which search done */
ulint mode, /* in: PAGE_CUR_L, ...;
NOTE that if the search is made using a unique
prefix of a record, mode should be
PAGE_CUR_LE, not PAGE_CUR_GE, as the latter
may end up on the previous page from the
record! */
ulint latch_mode,/* in: BTR_SEARCH_LEAF, ... */
btr_pcur_t* cursor, /* in: memory buffer for persistent cursor */
mtr_t* mtr) /* in: mtr */
{
btr_cur_t* btr_cursor;
/* Initialize the cursor */
btr_pcur_init(cursor);
cursor->latch_mode = latch_mode;
cursor->search_mode = mode;
/* Search with the tree cursor */
btr_cursor = btr_pcur_get_btr_cur(cursor);
btr_cur_search_to_nth_level(index, 0, tuple, mode, latch_mode,
btr_cursor, 0, mtr);
cursor->pos_state = BTR_PCUR_IS_POSITIONED;
cursor->trx_if_known = NULL;
}
/******************************************************************
Opens an persistent cursor to an index tree without initializing the
cursor. */
UNIV_INLINE
void
btr_pcur_open_with_no_init(
/*=======================*/
dict_index_t* index, /* in: index */
dtuple_t* tuple, /* in: tuple on which search done */
ulint mode, /* in: PAGE_CUR_L, ...;
NOTE that if the search is made using a unique
prefix of a record, mode should be
PAGE_CUR_LE, not PAGE_CUR_GE, as the latter
may end up on the previous page of the
record! */
ulint latch_mode,/* in: BTR_SEARCH_LEAF, ...;
NOTE that if has_search_latch != 0 then
we maybe do not acquire a latch on the cursor
page, but assume that the caller uses his
btr search latch to protect the record! */
btr_pcur_t* cursor, /* in: memory buffer for persistent cursor */
ulint has_search_latch,/* in: latch mode the caller
currently has on btr_search_latch:
RW_S_LATCH, or 0 */
mtr_t* mtr) /* in: mtr */
{
btr_cur_t* btr_cursor;
cursor->latch_mode = latch_mode;
cursor->search_mode = mode;
/* Search with the tree cursor */
btr_cursor = btr_pcur_get_btr_cur(cursor);
btr_cur_search_to_nth_level(index, 0, tuple, mode, latch_mode,
btr_cursor, has_search_latch, mtr);
cursor->pos_state = BTR_PCUR_IS_POSITIONED;
cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
cursor->trx_if_known = NULL;
}
/*********************************************************************
Opens a persistent cursor at either end of an index. */
UNIV_INLINE
void
btr_pcur_open_at_index_side(
/*========================*/
ibool from_left, /* in: TRUE if open to the low end,
FALSE if to the high end */
dict_index_t* index, /* in: index */
ulint latch_mode, /* in: latch mode */
btr_pcur_t* pcur, /* in: cursor */
ibool do_init, /* in: TRUE if should be initialized */
mtr_t* mtr) /* in: mtr */
{
pcur->latch_mode = latch_mode;
if (from_left) {
pcur->search_mode = PAGE_CUR_G;
} else {
pcur->search_mode = PAGE_CUR_L;
}
if (do_init) {
btr_pcur_init(pcur);
}
btr_cur_open_at_index_side(from_left, index, latch_mode,
btr_pcur_get_btr_cur(pcur), mtr);
pcur->pos_state = BTR_PCUR_IS_POSITIONED;
pcur->old_stored = BTR_PCUR_OLD_NOT_STORED;
pcur->trx_if_known = NULL;
}
/**************************************************************************
Positions a cursor at a randomly chosen position within a B-tree. */
UNIV_INLINE
void
btr_pcur_open_at_rnd_pos(
/*=====================*/
dict_index_t* index, /* in: index */
ulint latch_mode, /* in: BTR_SEARCH_LEAF, ... */
btr_pcur_t* cursor, /* in/out: B-tree pcur */
mtr_t* mtr) /* in: mtr */
{
/* Initialize the cursor */
cursor->latch_mode = latch_mode;
cursor->search_mode = PAGE_CUR_G;
btr_pcur_init(cursor);
btr_cur_open_at_rnd_pos(index, latch_mode,
btr_pcur_get_btr_cur(cursor), mtr);
cursor->pos_state = BTR_PCUR_IS_POSITIONED;
cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
cursor->trx_if_known = NULL;
}
/******************************************************************
Frees the possible memory heap of a persistent cursor and sets the latch
mode of the persistent cursor to BTR_NO_LATCHES. */
UNIV_INLINE
void
btr_pcur_close(
/*===========*/
btr_pcur_t* cursor) /* in: persistent cursor */
{
if (cursor->old_rec_buf != NULL) {
mem_free(cursor->old_rec_buf);
cursor->old_rec = NULL;
cursor->old_rec_buf = NULL;
}
cursor->btr_cur.page_cur.rec = NULL;
cursor->old_rec = NULL;
cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
cursor->latch_mode = BTR_NO_LATCHES;
cursor->pos_state = BTR_PCUR_NOT_POSITIONED;
cursor->trx_if_known = NULL;
}

253
include/btr0sea.h Normal file
View file

@ -0,0 +1,253 @@
/************************************************************************
The index tree adaptive search
(c) 1996 Innobase Oy
Created 2/17/1996 Heikki Tuuri
*************************************************************************/
#ifndef btr0sea_h
#define btr0sea_h
#include "univ.i"
#include "rem0rec.h"
#include "dict0dict.h"
#include "btr0types.h"
#include "mtr0mtr.h"
#include "ha0ha.h"
/*********************************************************************
Creates and initializes the adaptive search system at a database start. */
void
btr_search_sys_create(
/*==================*/
ulint hash_size); /* in: hash index hash table size */
/************************************************************************
Returns search info for an index. */
UNIV_INLINE
btr_search_t*
btr_search_get_info(
/*================*/
/* out: search info; search mutex reserved */
dict_index_t* index); /* in: index */
/*********************************************************************
Creates and initializes a search info struct. */
btr_search_t*
btr_search_info_create(
/*===================*/
/* out, own: search info struct */
mem_heap_t* heap); /* in: heap where created */
/*************************************************************************
Updates the search info. */
UNIV_INLINE
void
btr_search_info_update(
/*===================*/
dict_index_t* index, /* in: index of the cursor */
btr_cur_t* cursor);/* in: cursor which was just positioned */
/**********************************************************************
Tries to guess the right search position based on the hash search info
of the index. Note that if mode is PAGE_CUR_LE, which is used in inserts,
and the function returns TRUE, then cursor->up_match and cursor->low_match
both have sensible values. */
ibool
btr_search_guess_on_hash(
/*=====================*/
/* out: TRUE if succeeded */
dict_index_t* index, /* in: index */
btr_search_t* info, /* in: index search info */
dtuple_t* tuple, /* in: logical record */
ulint mode, /* in: PAGE_CUR_L, ... */
ulint latch_mode, /* in: BTR_SEARCH_LEAF, ... */
btr_cur_t* cursor, /* out: tree cursor */
ulint has_search_latch,/* in: latch mode the caller
currently has on btr_search_latch:
RW_S_LATCH, RW_X_LATCH, or 0 */
mtr_t* mtr); /* in: mtr */
/************************************************************************
Moves or deletes hash entries for moved records. If new_page is already hashed,
then the hash index for page, if any, is dropped. If new_page is not hashed,
and page is hashed, then a new hash index is built to new_page with the same
parameters as page (this often happens when a page is split). */
void
btr_search_move_or_delete_hash_entries(
/*===================================*/
page_t* new_page, /* in: records are copied
to this page */
page_t* page, /* in: index page */
dict_index_t* index); /* in: record descriptor */
/************************************************************************
Drops a page hash index. */
void
btr_search_drop_page_hash_index(
/*============================*/
page_t* page); /* in: index page, s- or x-latched */
/************************************************************************
Drops a page hash index when a page is freed from a fseg to the file system.
Drops possible hash index if the page happens to be in the buffer pool. */
void
btr_search_drop_page_hash_when_freed(
/*=================================*/
ulint space, /* in: space id */
ulint page_no); /* in: page number */
/************************************************************************
Updates the page hash index when a single record is inserted on a page. */
void
btr_search_update_hash_node_on_insert(
/*==================================*/
btr_cur_t* cursor);/* in: cursor which was positioned to the
place to insert using btr_cur_search_...,
and the new record has been inserted next
to the cursor */
/************************************************************************
Updates the page hash index when a single record is inserted on a page. */
void
btr_search_update_hash_on_insert(
/*=============================*/
btr_cur_t* cursor);/* in: cursor which was positioned to the
place to insert using btr_cur_search_...,
and the new record has been inserted next
to the cursor */
/************************************************************************
Updates the page hash index when a single record is deleted from a page. */
void
btr_search_update_hash_on_delete(
/*=============================*/
btr_cur_t* cursor);/* in: cursor which was positioned on the
record to delete using btr_cur_search_...,
the record is not yet deleted */
/************************************************************************
Validates the search system. */
ibool
btr_search_validate(void);
/*======================*/
/* out: TRUE if ok */
/* Search info directions */
#define BTR_SEA_NO_DIRECTION 1
#define BTR_SEA_LEFT 2
#define BTR_SEA_RIGHT 3
#define BTR_SEA_SAME_REC 4
/* The search info struct in an index */
struct btr_search_struct{
ulint magic_n; /* magic number */
/* The following 4 fields are currently not used: */
rec_t* last_search; /* pointer to the lower limit record of the
previous search; NULL if not known */
ulint n_direction; /* number of consecutive searches in the
same direction */
ulint direction; /* BTR_SEA_NO_DIRECTION, BTR_SEA_LEFT,
BTR_SEA_RIGHT, BTR_SEA_SAME_REC,
or BTR_SEA_SAME_PAGE */
dulint modify_clock; /* value of modify clock at the time
last_search was stored */
/*----------------------*/
/* The following 4 fields are not protected by any latch: */
page_t* root_guess; /* the root page frame when it was last time
fetched, or NULL */
ulint hash_analysis; /* when this exceeds a certain value, the
hash analysis starts; this is reset if no
success noticed */
ibool last_hash_succ; /* TRUE if the last search would have
succeeded, or did succeed, using the hash
index; NOTE that the value here is not exact:
it is not calculated for every search, and the
calculation itself is not always accurate! */
ulint n_hash_potential;/* number of consecutive searches which would
have succeeded, or did succeed, using the hash
index */
/*----------------------*/
ulint n_fields; /* recommended prefix length for hash search:
number of full fields */
ulint n_bytes; /* recommended prefix: number of bytes in
an incomplete field */
ulint side; /* BTR_SEARCH_LEFT_SIDE or
BTR_SEARCH_RIGHT_SIDE, depending on whether
the leftmost record of several records with
the same prefix should be indexed in the
hash index */
/*----------------------*/
ulint n_hash_succ; /* number of successful hash searches thus
far */
ulint n_hash_fail; /* number of failed hash searches */
ulint n_patt_succ; /* number of successful pattern searches thus
far */
ulint n_searches; /* number of searches */
};
#define BTR_SEARCH_MAGIC_N 1112765
/* The hash index system */
typedef struct btr_search_sys_struct btr_search_sys_t;
struct btr_search_sys_struct{
hash_table_t* hash_index;
};
extern btr_search_sys_t* btr_search_sys;
/* The latch protecting the adaptive search system: this latch protects the
(1) hash index;
(2) columns of a record to which we have a pointer in the hash index;
but does NOT protect:
(3) next record offset field in a record;
(4) next or previous records on the same page.
Bear in mind (3) and (4) when using the hash index.
*/
extern rw_lock_t* btr_search_latch_temp;
#define btr_search_latch (*btr_search_latch_temp)
#ifdef UNIV_SEARCH_PERF_STAT
extern ulint btr_search_n_succ;
#endif /* UNIV_SEARCH_PERF_STAT */
extern ulint btr_search_n_hash_fail;
/* After change in n_fields or n_bytes in info, this many rounds are waited
before starting the hash analysis again: this is to save CPU time when there
is no hope in building a hash index. */
#define BTR_SEARCH_HASH_ANALYSIS 17
#define BTR_SEARCH_LEFT_SIDE 1
#define BTR_SEARCH_RIGHT_SIDE 2
/* Limit of consecutive searches for trying a search shortcut on the search
pattern */
#define BTR_SEARCH_ON_PATTERN_LIMIT 3
/* Limit of consecutive searches for trying a search shortcut using the hash
index */
#define BTR_SEARCH_ON_HASH_LIMIT 3
/* We do this many searches before trying to keep the search latch over calls
from MySQL. If we notice someone waiting for the latch, we again set this
much timeout. This is to reduce contention. */
#define BTR_SEA_TIMEOUT 10000
#ifndef UNIV_NONINL
#include "btr0sea.ic"
#endif
#endif

67
include/btr0sea.ic Normal file
View file

@ -0,0 +1,67 @@
/************************************************************************
The index tree adaptive search
(c) 1996 Innobase Oy
Created 2/17/1996 Heikki Tuuri
*************************************************************************/
#include "dict0mem.h"
#include "btr0cur.h"
#include "buf0buf.h"
/*************************************************************************
Updates the search info. */
void
btr_search_info_update_slow(
/*========================*/
btr_search_t* info, /* in: search info */
btr_cur_t* cursor);/* in: cursor which was just positioned */
/************************************************************************
Returns search info for an index. */
UNIV_INLINE
btr_search_t*
btr_search_get_info(
/*================*/
/* out: search info; search mutex reserved */
dict_index_t* index) /* in: index */
{
ut_ad(index);
return(index->search_info);
}
/*************************************************************************
Updates the search info. */
UNIV_INLINE
void
btr_search_info_update(
/*===================*/
dict_index_t* index, /* in: index of the cursor */
btr_cur_t* cursor) /* in: cursor which was just positioned */
{
btr_search_t* info;
#ifdef UNIV_SYNC_DEBUG
ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_SHARED));
ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_EX));
#endif /* UNIV_SYNC_DEBUG */
info = btr_search_get_info(index);
info->hash_analysis++;
if (info->hash_analysis < BTR_SEARCH_HASH_ANALYSIS) {
/* Do nothing */
return;
}
ut_ad(cursor->flag != BTR_CUR_HASH);
btr_search_info_update_slow(info, cursor);
}

21
include/btr0types.h Normal file
View file

@ -0,0 +1,21 @@
/************************************************************************
The index tree general types
(c) 1996 Innobase Oy
Created 2/17/1996 Heikki Tuuri
*************************************************************************/
#ifndef btr0types_h
#define btr0types_h
#include "univ.i"
#include "rem0types.h"
#include "page0types.h"
typedef struct btr_pcur_struct btr_pcur_t;
typedef struct btr_cur_struct btr_cur_t;
typedef struct btr_search_struct btr_search_t;
#endif

1052
include/buf0buf.h Normal file

File diff suppressed because it is too large Load diff

663
include/buf0buf.ic Normal file
View file

@ -0,0 +1,663 @@
/******************************************************
The database buffer buf_pool
(c) 1995 Innobase Oy
Created 11/5/1995 Heikki Tuuri
*******************************************************/
#include "buf0flu.h"
#include "buf0lru.h"
#include "buf0rea.h"
#include "mtr0mtr.h"
#ifdef UNIV_DEBUG
extern ulint buf_dbg_counter; /* This is used to insert validation
operations in execution in the
debug version */
#endif /* UNIV_DEBUG */
/************************************************************************
Recommends a move of a block to the start of the LRU list if there is danger
of dropping from the buffer pool. NOTE: does not reserve the buffer pool
mutex. */
UNIV_INLINE
ibool
buf_block_peek_if_too_old(
/*======================*/
/* out: TRUE if should be made younger */
buf_block_t* block) /* in: block to make younger */
{
return(buf_pool->freed_page_clock >= block->freed_page_clock
+ 1 + (buf_pool->curr_size / 1024));
}
/*************************************************************************
Gets the current size of buffer buf_pool in bytes. In the case of AWE, the
size of AWE window (= the frames). */
UNIV_INLINE
ulint
buf_pool_get_curr_size(void)
/*========================*/
/* out: size in bytes */
{
return((buf_pool->n_frames) * UNIV_PAGE_SIZE);
}
/*************************************************************************
Gets the maximum size of buffer buf_pool in bytes. In the case of AWE, the
size of AWE window (= the frames). */
UNIV_INLINE
ulint
buf_pool_get_max_size(void)
/*=======================*/
/* out: size in bytes */
{
return((buf_pool->n_frames) * UNIV_PAGE_SIZE);
}
/***********************************************************************
Accessor function for block array. */
UNIV_INLINE
buf_block_t*
buf_pool_get_nth_block(
/*===================*/
/* out: pointer to block */
buf_pool_t* buf_pool,/* in: buf_pool */
ulint i) /* in: index of the block */
{
ut_ad(buf_pool);
ut_ad(i < buf_pool->max_size);
return(i + buf_pool->blocks);
}
/***********************************************************************
Checks if a pointer points to the block array of the buffer pool (blocks, not
the frames). */
UNIV_INLINE
ibool
buf_pool_is_block(
/*==============*/
/* out: TRUE if pointer to block */
void* ptr) /* in: pointer to memory */
{
if ((buf_pool->blocks <= (buf_block_t*)ptr)
&& ((buf_block_t*)ptr < buf_pool->blocks + buf_pool->max_size)) {
return(TRUE);
}
return(FALSE);
}
/************************************************************************
Gets the smallest oldest_modification lsn for any page in the pool. Returns
ut_dulint_zero if all modified pages have been flushed to disk. */
UNIV_INLINE
dulint
buf_pool_get_oldest_modification(void)
/*==================================*/
/* out: oldest modification in pool,
ut_dulint_zero if none */
{
buf_block_t* block;
dulint lsn;
mutex_enter(&(buf_pool->mutex));
block = UT_LIST_GET_LAST(buf_pool->flush_list);
if (block == NULL) {
lsn = ut_dulint_zero;
} else {
lsn = block->oldest_modification;
}
mutex_exit(&(buf_pool->mutex));
return(lsn);
}
/***********************************************************************
Increments the buf_pool clock by one and returns its new value. Remember
that in the 32 bit version the clock wraps around at 4 billion! */
UNIV_INLINE
ulint
buf_pool_clock_tic(void)
/*====================*/
/* out: new clock value */
{
#ifdef UNIV_SYNC_DEBUG
ut_ad(mutex_own(&(buf_pool->mutex)));
#endif /* UNIV_SYNC_DEBUG */
buf_pool->ulint_clock++;
return(buf_pool->ulint_clock);
}
/*************************************************************************
Gets a pointer to the memory frame of a block. */
UNIV_INLINE
buf_frame_t*
buf_block_get_frame(
/*================*/
/* out: pointer to the frame */
buf_block_t* block) /* in: pointer to the control block */
{
ut_ad(block);
ut_ad(block >= buf_pool->blocks);
ut_ad(block < buf_pool->blocks + buf_pool->max_size);
ut_ad(block->state != BUF_BLOCK_NOT_USED);
ut_ad((block->state != BUF_BLOCK_FILE_PAGE)
|| (block->buf_fix_count > 0));
return(block->frame);
}
/*************************************************************************
Gets the space id of a block. */
UNIV_INLINE
ulint
buf_block_get_space(
/*================*/
/* out: space id */
buf_block_t* block) /* in: pointer to the control block */
{
ut_ad(block);
ut_ad(block >= buf_pool->blocks);
ut_ad(block < buf_pool->blocks + buf_pool->max_size);
ut_a(block->state == BUF_BLOCK_FILE_PAGE);
ut_ad(block->buf_fix_count > 0);
return(block->space);
}
/*************************************************************************
Gets the page number of a block. */
UNIV_INLINE
ulint
buf_block_get_page_no(
/*==================*/
/* out: page number */
buf_block_t* block) /* in: pointer to the control block */
{
ut_ad(block);
ut_ad(block >= buf_pool->blocks);
ut_ad(block < buf_pool->blocks + buf_pool->max_size);
ut_a(block->state == BUF_BLOCK_FILE_PAGE);
ut_ad(block->buf_fix_count > 0);
return(block->offset);
}
/***********************************************************************
Gets the block to whose frame the pointer is pointing to. */
UNIV_INLINE
buf_block_t*
buf_block_align(
/*============*/
/* out: pointer to block */
byte* ptr) /* in: pointer to a frame */
{
buf_block_t* block;
buf_frame_t* frame_zero;
ut_ad(ptr);
frame_zero = buf_pool->frame_zero;
if (UNIV_UNLIKELY((ulint)ptr < (ulint)frame_zero)
|| UNIV_UNLIKELY((ulint)ptr > (ulint)(buf_pool->high_end))) {
ut_print_timestamp(stderr);
fprintf(stderr,
"InnoDB: Error: trying to access a stray pointer %p\n"
"InnoDB: buf pool start is at %p, end at %p\n"
"InnoDB: Probable reason is database corruption or memory\n"
"InnoDB: corruption. If this happens in an InnoDB database recovery,\n"
"InnoDB: you can look from section 6.1 at http://www.innodb.com/ibman.html\n"
"InnoDB: how to force recovery.\n",
ptr, frame_zero,
buf_pool->high_end);
ut_error;
}
block = *(buf_pool->blocks_of_frames + (((ulint)(ptr - frame_zero))
>> UNIV_PAGE_SIZE_SHIFT));
return(block);
}
/***********************************************************************
Gets the frame the pointer is pointing to. */
UNIV_INLINE
buf_frame_t*
buf_frame_align(
/*============*/
/* out: pointer to frame */
byte* ptr) /* in: pointer to a frame */
{
buf_frame_t* frame;
ut_ad(ptr);
frame = ut_align_down(ptr, UNIV_PAGE_SIZE);
if (UNIV_UNLIKELY((ulint)frame < (ulint)(buf_pool->frame_zero))
|| UNIV_UNLIKELY((ulint)frame >= (ulint)(buf_pool->high_end))) {
ut_print_timestamp(stderr);
fprintf(stderr,
"InnoDB: Error: trying to access a stray pointer %p\n"
"InnoDB: buf pool start is at %p, end at %p\n"
"InnoDB: Probable reason is database corruption or memory\n"
"InnoDB: corruption. If this happens in an InnoDB database recovery,\n"
"InnoDB: you can look from section 6.1 at http://www.innodb.com/ibman.html\n"
"InnoDB: how to force recovery.\n",
ptr, buf_pool->frame_zero,
buf_pool->high_end);
ut_error;
}
return(frame);
}
/**************************************************************************
Gets the page number of a pointer pointing within a buffer frame containing
a file page. */
UNIV_INLINE
ulint
buf_frame_get_page_no(
/*==================*/
/* out: page number */
byte* ptr) /* in: pointer to within a buffer frame */
{
return(buf_block_get_page_no(buf_block_align(ptr)));
}
/**************************************************************************
Gets the space id of a pointer pointing within a buffer frame containing a
file page. */
UNIV_INLINE
ulint
buf_frame_get_space_id(
/*===================*/
/* out: space id */
byte* ptr) /* in: pointer to within a buffer frame */
{
return(buf_block_get_space(buf_block_align(ptr)));
}
/**************************************************************************
Gets the space id, page offset, and byte offset within page of a
pointer pointing to a buffer frame containing a file page. */
UNIV_INLINE
void
buf_ptr_get_fsp_addr(
/*=================*/
byte* ptr, /* in: pointer to a buffer frame */
ulint* space, /* out: space id */
fil_addr_t* addr) /* out: page offset and byte offset */
{
buf_block_t* block;
block = buf_block_align(ptr);
*space = buf_block_get_space(block);
addr->page = buf_block_get_page_no(block);
addr->boffset = ptr - buf_frame_align(ptr);
}
/**************************************************************************
Gets the hash value of the page the pointer is pointing to. This can be used
in searches in the lock hash table. */
UNIV_INLINE
ulint
buf_frame_get_lock_hash_val(
/*========================*/
/* out: lock hash value */
byte* ptr) /* in: pointer to within a buffer frame */
{
buf_block_t* block;
block = buf_block_align(ptr);
return(block->lock_hash_val);
}
/**************************************************************************
Gets the mutex number protecting the page record lock hash chain in the lock
table. */
UNIV_INLINE
mutex_t*
buf_frame_get_lock_mutex(
/*=====================*/
/* out: mutex */
byte* ptr) /* in: pointer to within a buffer frame */
{
buf_block_t* block;
block = buf_block_align(ptr);
return(block->lock_mutex);
}
/*************************************************************************
Copies contents of a buffer frame to a given buffer. */
UNIV_INLINE
byte*
buf_frame_copy(
/*===========*/
/* out: buf */
byte* buf, /* in: buffer to copy to */
buf_frame_t* frame) /* in: buffer frame */
{
ut_ad(buf && frame);
ut_memcpy(buf, frame, UNIV_PAGE_SIZE);
return(buf);
}
/************************************************************************
Calculates a folded value of a file page address to use in the page hash
table. */
UNIV_INLINE
ulint
buf_page_address_fold(
/*==================*/
/* out: the folded value */
ulint space, /* in: space id */
ulint offset) /* in: offset of the page within space */
{
return((space << 20) + space + offset);
}
/************************************************************************
This function is used to get info if there is an io operation
going on on a buffer page. */
UNIV_INLINE
ibool
buf_page_io_query(
/*==============*/
/* out: TRUE if io going on */
buf_block_t* block) /* in: buf_pool block, must be bufferfixed */
{
mutex_enter(&(buf_pool->mutex));
ut_ad(block->state == BUF_BLOCK_FILE_PAGE);
ut_ad(block->buf_fix_count > 0);
if (block->io_fix != 0) {
mutex_exit(&(buf_pool->mutex));
return(TRUE);
}
mutex_exit(&(buf_pool->mutex));
return(FALSE);
}
/************************************************************************
Gets the youngest modification log sequence number for a frame. Returns zero
if not a file page or no modification occurred yet. */
UNIV_INLINE
dulint
buf_frame_get_newest_modification(
/*==============================*/
/* out: newest modification to the page */
buf_frame_t* frame) /* in: pointer to a frame */
{
buf_block_t* block;
dulint lsn;
ut_ad(frame);
block = buf_block_align(frame);
mutex_enter(&(buf_pool->mutex));
if (block->state == BUF_BLOCK_FILE_PAGE) {
lsn = block->newest_modification;
} else {
lsn = ut_dulint_zero;
}
mutex_exit(&(buf_pool->mutex));
return(lsn);
}
/************************************************************************
Increments the modify clock of a frame by 1. The caller must (1) own the
buf_pool mutex and block bufferfix count has to be zero, (2) or own an x-lock
on the block. */
UNIV_INLINE
dulint
buf_frame_modify_clock_inc(
/*=======================*/
/* out: new value */
buf_frame_t* frame) /* in: pointer to a frame */
{
buf_block_t* block;
ut_ad(frame);
block = buf_block_align(frame);
#ifdef UNIV_SYNC_DEBUG
ut_ad((mutex_own(&(buf_pool->mutex)) && (block->buf_fix_count == 0))
|| rw_lock_own(&(block->lock), RW_LOCK_EXCLUSIVE));
#endif /*UNIV_SYNC_DEBUG */
UT_DULINT_INC(block->modify_clock);
return(block->modify_clock);
}
/************************************************************************
Increments the modify clock of a frame by 1. The caller must (1) own the
buf_pool mutex and block bufferfix count has to be zero, (2) or own an x-lock
on the block. */
UNIV_INLINE
dulint
buf_block_modify_clock_inc(
/*=======================*/
/* out: new value */
buf_block_t* block) /* in: block */
{
#ifdef UNIV_SYNC_DEBUG
ut_ad((mutex_own(&(buf_pool->mutex)) && (block->buf_fix_count == 0))
|| rw_lock_own(&(block->lock), RW_LOCK_EXCLUSIVE));
#endif /* UNIV_SYNC_DEBUG */
UT_DULINT_INC(block->modify_clock);
return(block->modify_clock);
}
/************************************************************************
Returns the value of the modify clock. The caller must have an s-lock
or x-lock on the block. */
UNIV_INLINE
dulint
buf_block_get_modify_clock(
/*=======================*/
/* out: value */
buf_block_t* block) /* in: block */
{
#ifdef UNIV_SYNC_DEBUG
ut_ad(rw_lock_own(&(block->lock), RW_LOCK_SHARED)
|| rw_lock_own(&(block->lock), RW_LOCK_EXCLUSIVE));
#endif /* UNIV_SYNC_DEBUG */
return(block->modify_clock);
}
#ifdef UNIV_SYNC_DEBUG
/***********************************************************************
Increments the bufferfix count. */
UNIV_INLINE
void
buf_block_buf_fix_inc_debug(
/*========================*/
buf_block_t* block, /* in: block to bufferfix */
const char* file __attribute__ ((unused)), /* in: file name */
ulint line __attribute__ ((unused))) /* in: line */
{
#ifdef UNIV_SYNC_DEBUG
ibool ret;
ret = rw_lock_s_lock_func_nowait(&(block->debug_latch), file, line);
ut_ad(ret == TRUE);
#endif
block->buf_fix_count++;
}
#else /* UNIV_SYNC_DEBUG */
/***********************************************************************
Increments the bufferfix count. */
UNIV_INLINE
void
buf_block_buf_fix_inc(
/*==================*/
buf_block_t* block) /* in: block to bufferfix */
{
block->buf_fix_count++;
}
#endif /* UNIV_SYNC_DEBUG */
/**********************************************************************
Returns the control block of a file page, NULL if not found. */
UNIV_INLINE
buf_block_t*
buf_page_hash_get(
/*==============*/
/* out: block, NULL if not found */
ulint space, /* in: space id */
ulint offset) /* in: offset of the page within space */
{
buf_block_t* block;
ulint fold;
ut_ad(buf_pool);
#ifdef UNIV_SYNC_DEBUG
ut_ad(mutex_own(&(buf_pool->mutex)));
#endif /* UNIV_SYNC_DEBUG */
/* Look for the page in the hash table */
fold = buf_page_address_fold(space, offset);
HASH_SEARCH(hash, buf_pool->page_hash, fold, block,
(block->space == space) && (block->offset == offset));
ut_a(block == NULL || block->state == BUF_BLOCK_FILE_PAGE);
return(block);
}
/************************************************************************
Tries to get the page, but if file io is required, releases all latches
in mtr down to the given savepoint. If io is required, this function
retrieves the page to buffer buf_pool, but does not bufferfix it or latch
it. */
UNIV_INLINE
buf_frame_t*
buf_page_get_release_on_io(
/*=======================*/
/* out: pointer to the frame, or NULL
if not in buffer buf_pool */
ulint space, /* in: space id */
ulint offset, /* in: offset of the page within space
in units of a page */
buf_frame_t* guess, /* in: guessed frame or NULL */
ulint rw_latch, /* in: RW_X_LATCH, RW_S_LATCH,
or RW_NO_LATCH */
ulint savepoint, /* in: mtr savepoint */
mtr_t* mtr) /* in: mtr */
{
buf_frame_t* frame;
frame = buf_page_get_gen(space, offset, rw_latch, guess,
BUF_GET_IF_IN_POOL,
__FILE__, __LINE__,
mtr);
if (frame != NULL) {
return(frame);
}
/* The page was not in the buffer buf_pool: release the latches
down to the savepoint */
mtr_rollback_to_savepoint(mtr, savepoint);
buf_page_get(space, offset, RW_S_LATCH, mtr);
/* When we get here, the page is in buffer, but we release
the latches again down to the savepoint, before returning */
mtr_rollback_to_savepoint(mtr, savepoint);
return(NULL);
}
/************************************************************************
Decrements the bufferfix count of a buffer control block and releases
a latch, if specified. */
UNIV_INLINE
void
buf_page_release(
/*=============*/
buf_block_t* block, /* in: buffer block */
ulint rw_latch, /* in: RW_S_LATCH, RW_X_LATCH,
RW_NO_LATCH */
mtr_t* mtr) /* in: mtr */
{
ulint buf_fix_count;
ut_ad(block);
mutex_enter_fast(&(buf_pool->mutex));
ut_a(block->state == BUF_BLOCK_FILE_PAGE);
ut_a(block->buf_fix_count > 0);
if (rw_latch == RW_X_LATCH && mtr->modifications) {
buf_flush_note_modification(block, mtr);
}
#ifdef UNIV_SYNC_DEBUG
rw_lock_s_unlock(&(block->debug_latch));
#endif
buf_fix_count = block->buf_fix_count;
block->buf_fix_count = buf_fix_count - 1;
mutex_exit(&(buf_pool->mutex));
if (rw_latch == RW_S_LATCH) {
rw_lock_s_unlock(&(block->lock));
} else if (rw_latch == RW_X_LATCH) {
rw_lock_x_unlock(&(block->lock));
}
}
#ifdef UNIV_SYNC_DEBUG
/*************************************************************************
Adds latch level info for the rw-lock protecting the buffer frame. This
should be called in the debug version after a successful latching of a
page if we know the latching order level of the acquired latch. If
UNIV_SYNC_DEBUG is not defined, compiles to an empty function. */
UNIV_INLINE
void
buf_page_dbg_add_level(
/*===================*/
buf_frame_t* frame __attribute__((unused)), /* in: buffer page
where we have acquired latch */
ulint level __attribute__((unused))) /* in: latching order
level */
{
sync_thread_add_level(&(buf_block_align(frame)->lock), level);
}
#endif /* UNIV_SYNC_DEBUG */

120
include/buf0flu.h Normal file
View file

@ -0,0 +1,120 @@
/******************************************************
The database buffer pool flush algorithm
(c) 1995 Innobase Oy
Created 11/5/1995 Heikki Tuuri
*******************************************************/
#ifndef buf0flu_h
#define buf0flu_h
#include "univ.i"
#include "buf0types.h"
#include "ut0byte.h"
#include "mtr0types.h"
/************************************************************************
Updates the flush system data structures when a write is completed. */
void
buf_flush_write_complete(
/*=====================*/
buf_block_t* block); /* in: pointer to the block in question */
/*************************************************************************
Flushes pages from the end of the LRU list if there is too small
a margin of replaceable pages there. */
void
buf_flush_free_margin(void);
/*=======================*/
/************************************************************************
Initializes a page for writing to the tablespace. */
void
buf_flush_init_for_writing(
/*=======================*/
byte* page, /* in: page */
dulint newest_lsn, /* in: newest modification lsn to the page */
ulint space, /* in: space id */
ulint page_no); /* in: page number */
/***********************************************************************
This utility flushes dirty blocks from the end of the LRU list or flush_list.
NOTE 1: in the case of an LRU flush the calling thread may own latches to
pages: to avoid deadlocks, this function must be written so that it cannot
end up waiting for these latches! NOTE 2: in the case of a flush list flush,
the calling thread is not allowed to own any latches on pages! */
ulint
buf_flush_batch(
/*============*/
/* out: number of blocks for which the write
request was queued */
ulint flush_type, /* in: BUF_FLUSH_LRU or BUF_FLUSH_LIST; if
BUF_FLUSH_LIST, then the caller must not own
any latches on pages */
ulint min_n, /* in: wished minimum mumber of blocks flushed
(it is not guaranteed that the actual number
is that big, though) */
dulint lsn_limit); /* in the case BUF_FLUSH_LIST all blocks whose
oldest_modification is smaller than this
should be flushed (if their number does not
exceed min_n), otherwise ignored */
/**********************************************************************
Waits until a flush batch of the given type ends */
void
buf_flush_wait_batch_end(
/*=====================*/
ulint type); /* in: BUF_FLUSH_LRU or BUF_FLUSH_LIST */
/************************************************************************
This function should be called at a mini-transaction commit, if a page was
modified in it. Puts the block to the list of modified blocks, if it not
already in it. */
UNIV_INLINE
void
buf_flush_note_modification(
/*========================*/
buf_block_t* block, /* in: block which is modified */
mtr_t* mtr); /* in: mtr */
/************************************************************************
This function should be called when recovery has modified a buffer page. */
UNIV_INLINE
void
buf_flush_recv_note_modification(
/*=============================*/
buf_block_t* block, /* in: block which is modified */
dulint start_lsn, /* in: start lsn of the first mtr in a
set of mtr's */
dulint end_lsn); /* in: end lsn of the last mtr in the
set of mtr's */
/************************************************************************
Returns TRUE if the file page block is immediately suitable for replacement,
i.e., transition FILE_PAGE => NOT_USED allowed. */
ibool
buf_flush_ready_for_replace(
/*========================*/
/* out: TRUE if can replace immediately */
buf_block_t* block); /* in: buffer control block, must be in state
BUF_BLOCK_FILE_PAGE and in the LRU list */
/**********************************************************************
Validates the flush list. */
ibool
buf_flush_validate(void);
/*====================*/
/* out: TRUE if ok */
/* When buf_flush_free_margin is called, it tries to make this many blocks
available to replacement in the free list and at the end of the LRU list (to
make sure that a read-ahead batch can be read efficiently in a single
sweep). */
#define BUF_FLUSH_FREE_BLOCK_MARGIN (5 + BUF_READ_AHEAD_AREA)
#define BUF_FLUSH_EXTRA_MARGIN (BUF_FLUSH_FREE_BLOCK_MARGIN / 4 + 100)
#ifndef UNIV_NONINL
#include "buf0flu.ic"
#endif
#endif

106
include/buf0flu.ic Normal file
View file

@ -0,0 +1,106 @@
/******************************************************
The database buffer pool flush algorithm
(c) 1995 Innobase Oy
Created 11/5/1995 Heikki Tuuri
*******************************************************/
#include "buf0buf.h"
#include "mtr0mtr.h"
/************************************************************************
Inserts a modified block into the flush list. */
void
buf_flush_insert_into_flush_list(
/*=============================*/
buf_block_t* block); /* in: block which is modified */
/************************************************************************
Inserts a modified block into the flush list in the right sorted position.
This function is used by recovery, because there the modifications do not
necessarily come in the order of lsn's. */
void
buf_flush_insert_sorted_into_flush_list(
/*====================================*/
buf_block_t* block); /* in: block which is modified */
/************************************************************************
This function should be called at a mini-transaction commit, if a page was
modified in it. Puts the block to the list of modified blocks, if it is not
already in it. */
UNIV_INLINE
void
buf_flush_note_modification(
/*========================*/
buf_block_t* block, /* in: block which is modified */
mtr_t* mtr) /* in: mtr */
{
ut_ad(block);
ut_ad(block->state == BUF_BLOCK_FILE_PAGE);
ut_ad(block->buf_fix_count > 0);
#ifdef UNIV_SYNC_DEBUG
ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX));
ut_ad(mutex_own(&(buf_pool->mutex)));
#endif /* UNIV_SYNC_DEBUG */
ut_ad(ut_dulint_cmp(mtr->start_lsn, ut_dulint_zero) != 0);
ut_ad(mtr->modifications);
ut_ad(ut_dulint_cmp(block->newest_modification, mtr->end_lsn) <= 0);
block->newest_modification = mtr->end_lsn;
if (ut_dulint_is_zero(block->oldest_modification)) {
block->oldest_modification = mtr->start_lsn;
ut_ad(!ut_dulint_is_zero(block->oldest_modification));
buf_flush_insert_into_flush_list(block);
} else {
ut_ad(ut_dulint_cmp(block->oldest_modification,
mtr->start_lsn) <= 0);
}
++srv_buf_pool_write_requests;
}
/************************************************************************
This function should be called when recovery has modified a buffer page. */
UNIV_INLINE
void
buf_flush_recv_note_modification(
/*=============================*/
buf_block_t* block, /* in: block which is modified */
dulint start_lsn, /* in: start lsn of the first mtr in a
set of mtr's */
dulint end_lsn) /* in: end lsn of the last mtr in the
set of mtr's */
{
ut_ad(block);
ut_ad(block->state == BUF_BLOCK_FILE_PAGE);
ut_ad(block->buf_fix_count > 0);
#ifdef UNIV_SYNC_DEBUG
ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX));
#endif /* UNIV_SYNC_DEBUG */
mutex_enter(&(buf_pool->mutex));
ut_ad(ut_dulint_cmp(block->newest_modification, end_lsn) <= 0);
block->newest_modification = end_lsn;
if (ut_dulint_is_zero(block->oldest_modification)) {
block->oldest_modification = start_lsn;
ut_ad(!ut_dulint_is_zero(block->oldest_modification));
buf_flush_insert_sorted_into_flush_list(block);
} else {
ut_ad(ut_dulint_cmp(block->oldest_modification,
start_lsn) <= 0);
}
mutex_exit(&(buf_pool->mutex));
}

144
include/buf0lru.h Normal file
View file

@ -0,0 +1,144 @@
/******************************************************
The database buffer pool LRU replacement algorithm
(c) 1995 Innobase Oy
Created 11/5/1995 Heikki Tuuri
*******************************************************/
#ifndef buf0lru_h
#define buf0lru_h
#include "univ.i"
#include "ut0byte.h"
#include "buf0types.h"
/**********************************************************************
Tries to remove LRU flushed blocks from the end of the LRU list and put them
to the free list. This is beneficial for the efficiency of the insert buffer
operation, as flushed pages from non-unique non-clustered indexes are here
taken out of the buffer pool, and their inserts redirected to the insert
buffer. Otherwise, the flushed blocks could get modified again before read
operations need new buffer blocks, and the i/o work done in flushing would be
wasted. */
void
buf_LRU_try_free_flushed_blocks(void);
/*==================================*/
/**********************************************************************
Returns TRUE if less than 15 % of the buffer pool is available. This can be
used in heuristics to prevent huge transactions eating up the whole buffer
pool for their locks. */
ibool
buf_LRU_buf_pool_running_out(void);
/*==============================*/
/* out: TRUE if less than 15 % of buffer pool
left */
/*#######################################################################
These are low-level functions
#########################################################################*/
/* Minimum LRU list length for which the LRU_old pointer is defined */
#define BUF_LRU_OLD_MIN_LEN 80
#define BUF_LRU_FREE_SEARCH_LEN (5 + 2 * BUF_READ_AHEAD_AREA)
/**********************************************************************
Invalidates all pages belonging to a given tablespace when we are deleting
the data file(s) of that tablespace. A PROBLEM: if readahead is being started,
what guarantees that it will not try to read in pages after this operation has
completed? */
void
buf_LRU_invalidate_tablespace(
/*==========================*/
ulint id); /* in: space id */
/**********************************************************************
Gets the minimum LRU_position field for the blocks in an initial segment
(determined by BUF_LRU_INITIAL_RATIO) of the LRU list. The limit is not
guaranteed to be precise, because the ulint_clock may wrap around. */
ulint
buf_LRU_get_recent_limit(void);
/*==========================*/
/* out: the limit; zero if could not determine it */
/**********************************************************************
Look for a replaceable block from the end of the LRU list and put it to
the free list if found. */
ibool
buf_LRU_search_and_free_block(
/*==========================*/
/* out: TRUE if freed */
ulint n_iterations); /* in: how many times this has been called
repeatedly without result: a high value means
that we should search farther; if value is
k < 10, then we only search k/10 * number
of pages in the buffer pool from the end
of the LRU list */
/**********************************************************************
Returns a free block from the buf_pool. The block is taken off the
free list. If it is empty, blocks are moved from the end of the
LRU list to the free list. */
buf_block_t*
buf_LRU_get_free_block(void);
/*=========================*/
/* out: the free control block; also if AWE is
used, it is guaranteed that the block has its
page mapped to a frame when we return */
/**********************************************************************
Puts a block back to the free list. */
void
buf_LRU_block_free_non_file_page(
/*=============================*/
buf_block_t* block); /* in: block, must not contain a file page */
/**********************************************************************
Adds a block to the LRU list. */
void
buf_LRU_add_block(
/*==============*/
buf_block_t* block, /* in: control block */
ibool old); /* in: TRUE if should be put to the old
blocks in the LRU list, else put to the
start; if the LRU list is very short, added to
the start regardless of this parameter */
/**********************************************************************
Moves a block to the start of the LRU list. */
void
buf_LRU_make_block_young(
/*=====================*/
buf_block_t* block); /* in: control block */
/**********************************************************************
Moves a block to the end of the LRU list. */
void
buf_LRU_make_block_old(
/*===================*/
buf_block_t* block); /* in: control block */
#ifdef UNIV_DEBUG
/**************************************************************************
Validates the LRU list. */
ibool
buf_LRU_validate(void);
/*==================*/
/**************************************************************************
Prints the LRU list. */
void
buf_LRU_print(void);
/*===============*/
#endif /* UNIV_DEBUG */
#ifndef UNIV_NONINL
#include "buf0lru.ic"
#endif
#endif

8
include/buf0lru.ic Normal file
View file

@ -0,0 +1,8 @@
/******************************************************
The database buffer replacement algorithm
(c) 1995 Innobase Oy
Created 11/5/1995 Heikki Tuuri
*******************************************************/

103
include/buf0rea.h Normal file
View file

@ -0,0 +1,103 @@
/******************************************************
The database buffer read
(c) 1995 Innobase Oy
Created 11/5/1995 Heikki Tuuri
*******************************************************/
#ifndef buf0rea_h
#define buf0rea_h
#include "univ.i"
#include "buf0types.h"
/************************************************************************
High-level function which reads a page asynchronously from a file to the
buffer buf_pool if it is not already there. Sets the io_fix flag and sets
an exclusive lock on the buffer frame. The flag is cleared and the x-lock
released by the i/o-handler thread. Does a random read-ahead if it seems
sensible. */
ulint
buf_read_page(
/*==========*/
/* out: number of page read requests issued: this can
be > 1 if read-ahead occurred */
ulint space, /* in: space id */
ulint offset);/* in: page number */
/************************************************************************
Applies linear read-ahead if in the buf_pool the page is a border page of
a linear read-ahead area and all the pages in the area have been accessed.
Does not read any page if the read-ahead mechanism is not activated. Note
that the the algorithm looks at the 'natural' adjacent successor and
predecessor of the page, which on the leaf level of a B-tree are the next
and previous page in the chain of leaves. To know these, the page specified
in (space, offset) must already be present in the buf_pool. Thus, the
natural way to use this function is to call it when a page in the buf_pool
is accessed the first time, calling this function just after it has been
bufferfixed.
NOTE 1: as this function looks at the natural predecessor and successor
fields on the page, what happens, if these are not initialized to any
sensible value? No problem, before applying read-ahead we check that the
area to read is within the span of the space, if not, read-ahead is not
applied. An uninitialized value may result in a useless read operation, but
only very improbably.
NOTE 2: the calling thread may own latches on pages: to avoid deadlocks this
function must be written such that it cannot end up waiting for these
latches!
NOTE 3: the calling thread must want access to the page given: this rule is
set to prevent unintended read-aheads performed by ibuf routines, a situation
which could result in a deadlock if the OS does not support asynchronous io. */
ulint
buf_read_ahead_linear(
/*==================*/
/* out: number of page read requests issued */
ulint space, /* in: space id */
ulint offset);/* in: page number of a page; NOTE: the current thread
must want access to this page (see NOTE 3 above) */
/************************************************************************
Issues read requests for pages which the ibuf module wants to read in, in
order to contract the insert buffer tree. Technically, this function is like
a read-ahead function. */
void
buf_read_ibuf_merge_pages(
/*======================*/
ibool sync, /* in: TRUE if the caller wants this function
to wait for the highest address page to get
read in, before this function returns */
ulint* space_ids, /* in: array of space ids */
ib_longlong* space_versions,/* in: the spaces must have this version
number (timestamp), otherwise we discard the
read; we use this to cancel reads if
DISCARD + IMPORT may have changed the
tablespace size */
ulint* page_nos, /* in: array of page numbers to read, with the
highest page number the last in the array */
ulint n_stored); /* in: number of page numbers in the array */
/************************************************************************
Issues read requests for pages which recovery wants to read in. */
void
buf_read_recv_pages(
/*================*/
ibool sync, /* in: TRUE if the caller wants this function
to wait for the highest address page to get
read in, before this function returns */
ulint space, /* in: space id */
ulint* page_nos, /* in: array of page numbers to read, with the
highest page number the last in the array */
ulint n_stored); /* in: number of page numbers in the array */
/* The size in pages of the area which the read-ahead algorithms read if
invoked */
#define BUF_READ_AHEAD_AREA ut_min(64, ut_2_power_up(buf_pool->curr_size / 32))
/* Modes used in read-ahead */
#define BUF_READ_IBUF_PAGES_ONLY 131
#define BUF_READ_ANY_PAGE 132
#endif

20
include/buf0types.h Normal file
View file

@ -0,0 +1,20 @@
/******************************************************
The database buffer pool global types for the directory
(c) 1995 Innobase Oy
Created 11/17/1995 Heikki Tuuri
*******************************************************/
#ifndef buf0types_h
#define buf0types_h
typedef struct buf_block_struct buf_block_t;
typedef struct buf_pool_struct buf_pool_t;
/* The 'type' used of a buffer frame */
typedef byte buf_frame_t;
#endif

424
include/data0data.h Normal file
View file

@ -0,0 +1,424 @@
/************************************************************************
SQL data field and tuple
(c) 1994-1996 Innobase Oy
Created 5/30/1994 Heikki Tuuri
*************************************************************************/
#ifndef data0data_h
#define data0data_h
#include "univ.i"
#include "data0types.h"
#include "data0type.h"
#include "mem0mem.h"
#include "dict0types.h"
typedef struct big_rec_struct big_rec_t;
/* Some non-inlined functions used in the MySQL interface: */
void
dfield_set_data_noninline(
dfield_t* field, /* in: field */
void* data, /* in: data */
ulint len); /* in: length or UNIV_SQL_NULL */
void*
dfield_get_data_noninline(
dfield_t* field); /* in: field */
ulint
dfield_get_len_noninline(
dfield_t* field); /* in: field */
ulint
dtuple_get_n_fields_noninline(
dtuple_t* tuple); /* in: tuple */
dfield_t*
dtuple_get_nth_field_noninline(
dtuple_t* tuple, /* in: tuple */
ulint n); /* in: index of field */
/*************************************************************************
Gets pointer to the type struct of SQL data field. */
UNIV_INLINE
dtype_t*
dfield_get_type(
/*============*/
/* out: pointer to the type struct */
dfield_t* field); /* in: SQL data field */
/*************************************************************************
Sets the type struct of SQL data field. */
UNIV_INLINE
void
dfield_set_type(
/*============*/
dfield_t* field, /* in: SQL data field */
dtype_t* type); /* in: pointer to data type struct */
/*************************************************************************
Gets pointer to the data in a field. */
UNIV_INLINE
void*
dfield_get_data(
/*============*/
/* out: pointer to data */
dfield_t* field); /* in: field */
/*************************************************************************
Gets length of field data. */
UNIV_INLINE
ulint
dfield_get_len(
/*===========*/
/* out: length of data; UNIV_SQL_NULL if
SQL null data */
dfield_t* field); /* in: field */
/*************************************************************************
Sets length in a field. */
UNIV_INLINE
void
dfield_set_len(
/*===========*/
dfield_t* field, /* in: field */
ulint len); /* in: length or UNIV_SQL_NULL */
/*************************************************************************
Sets pointer to the data and length in a field. */
UNIV_INLINE
void
dfield_set_data(
/*============*/
dfield_t* field, /* in: field */
const void* data, /* in: data */
ulint len); /* in: length or UNIV_SQL_NULL */
/**************************************************************************
Writes an SQL null field full of zeros. */
UNIV_INLINE
void
data_write_sql_null(
/*================*/
byte* data, /* in: pointer to a buffer of size len */
ulint len); /* in: SQL null size in bytes */
/*************************************************************************
Copies the data and len fields. */
UNIV_INLINE
void
dfield_copy_data(
/*=============*/
dfield_t* field1, /* in: field to copy to */
dfield_t* field2);/* in: field to copy from */
/*************************************************************************
Copies a data field to another. */
UNIV_INLINE
void
dfield_copy(
/*========*/
dfield_t* field1, /* in: field to copy to */
dfield_t* field2);/* in: field to copy from */
/*************************************************************************
Tests if data length and content is equal for two dfields. */
UNIV_INLINE
ibool
dfield_datas_are_binary_equal(
/*==========================*/
/* out: TRUE if equal */
dfield_t* field1, /* in: field */
dfield_t* field2);/* in: field */
/*************************************************************************
Tests if dfield data length and content is equal to the given. */
ibool
dfield_data_is_binary_equal(
/*========================*/
/* out: TRUE if equal */
dfield_t* field, /* in: field */
ulint len, /* in: data length or UNIV_SQL_NULL */
byte* data); /* in: data */
/*************************************************************************
Gets number of fields in a data tuple. */
UNIV_INLINE
ulint
dtuple_get_n_fields(
/*================*/
/* out: number of fields */
dtuple_t* tuple); /* in: tuple */
/*************************************************************************
Gets nth field of a tuple. */
UNIV_INLINE
dfield_t*
dtuple_get_nth_field(
/*=================*/
/* out: nth field */
dtuple_t* tuple, /* in: tuple */
ulint n); /* in: index of field */
/*************************************************************************
Gets info bits in a data tuple. */
UNIV_INLINE
ulint
dtuple_get_info_bits(
/*=================*/
/* out: info bits */
dtuple_t* tuple); /* in: tuple */
/*************************************************************************
Sets info bits in a data tuple. */
UNIV_INLINE
void
dtuple_set_info_bits(
/*=================*/
dtuple_t* tuple, /* in: tuple */
ulint info_bits); /* in: info bits */
/*************************************************************************
Gets number of fields used in record comparisons. */
UNIV_INLINE
ulint
dtuple_get_n_fields_cmp(
/*====================*/
/* out: number of fields used in comparisons
in rem0cmp.* */
dtuple_t* tuple); /* in: tuple */
/*************************************************************************
Gets number of fields used in record comparisons. */
UNIV_INLINE
void
dtuple_set_n_fields_cmp(
/*====================*/
dtuple_t* tuple, /* in: tuple */
ulint n_fields_cmp); /* in: number of fields used in
comparisons in rem0cmp.* */
/**************************************************************
Creates a data tuple to a memory heap. The default value for number
of fields used in record comparisons for this tuple is n_fields. */
UNIV_INLINE
dtuple_t*
dtuple_create(
/*==========*/
/* out, own: created tuple */
mem_heap_t* heap, /* in: memory heap where the tuple
is created */
ulint n_fields); /* in: number of fields */
/*************************************************************************
Creates a dtuple for use in MySQL. */
dtuple_t*
dtuple_create_for_mysql(
/*====================*/
/* out, own created dtuple */
void** heap, /* out: created memory heap */
ulint n_fields); /* in: number of fields */
/*************************************************************************
Frees a dtuple used in MySQL. */
void
dtuple_free_for_mysql(
/*==================*/
void* heap);
/*************************************************************************
Sets number of fields used in a tuple. Normally this is set in
dtuple_create, but if you want later to set it smaller, you can use this. */
void
dtuple_set_n_fields(
/*================*/
dtuple_t* tuple, /* in: tuple */
ulint n_fields); /* in: number of fields */
/**************************************************************
The following function returns the sum of data lengths of a tuple. The space
occupied by the field structs or the tuple struct is not counted. */
UNIV_INLINE
ulint
dtuple_get_data_size(
/*=================*/
/* out: sum of data lens */
dtuple_t* tuple); /* in: typed data tuple */
/****************************************************************
Returns TRUE if lengths of two dtuples are equal and respective data fields
in them are equal when compared with collation in char fields (not as binary
strings). */
ibool
dtuple_datas_are_ordering_equal(
/*============================*/
/* out: TRUE if length and fieds are equal
when compared with cmp_data_data:
NOTE: in character type fields some letters
are identified with others! (collation) */
dtuple_t* tuple1, /* in: tuple 1 */
dtuple_t* tuple2);/* in: tuple 2 */
/****************************************************************
Folds a prefix given as the number of fields of a tuple. */
UNIV_INLINE
ulint
dtuple_fold(
/*========*/
/* out: the folded value */
dtuple_t* tuple, /* in: the tuple */
ulint n_fields,/* in: number of complete fields to fold */
ulint n_bytes,/* in: number of bytes to fold in an
incomplete last field */
dulint tree_id);/* in: index tree id */
/***********************************************************************
Sets types of fields binary in a tuple. */
UNIV_INLINE
void
dtuple_set_types_binary(
/*====================*/
dtuple_t* tuple, /* in: data tuple */
ulint n); /* in: number of fields to set */
/**************************************************************************
Checks if a dtuple contains an SQL null value. */
UNIV_INLINE
ibool
dtuple_contains_null(
/*=================*/
/* out: TRUE if some field is SQL null */
dtuple_t* tuple); /* in: dtuple */
/**************************************************************
Checks that a data field is typed. Asserts an error if not. */
ibool
dfield_check_typed(
/*===============*/
/* out: TRUE if ok */
dfield_t* field); /* in: data field */
/**************************************************************
Checks that a data tuple is typed. Asserts an error if not. */
ibool
dtuple_check_typed(
/*===============*/
/* out: TRUE if ok */
dtuple_t* tuple); /* in: tuple */
/**************************************************************
Checks that a data tuple is typed. */
ibool
dtuple_check_typed_no_assert(
/*=========================*/
/* out: TRUE if ok */
dtuple_t* tuple); /* in: tuple */
#ifdef UNIV_DEBUG
/**************************************************************
Validates the consistency of a tuple which must be complete, i.e,
all fields must have been set. */
ibool
dtuple_validate(
/*============*/
/* out: TRUE if ok */
dtuple_t* tuple); /* in: tuple */
#endif /* UNIV_DEBUG */
/*****************************************************************
Pretty prints a dfield value according to its data type. */
void
dfield_print(
/*=========*/
dfield_t* dfield);/* in: dfield */
/*****************************************************************
Pretty prints a dfield value according to its data type. Also the hex string
is printed if a string contains non-printable characters. */
void
dfield_print_also_hex(
/*==================*/
dfield_t* dfield); /* in: dfield */
/**************************************************************
The following function prints the contents of a tuple. */
void
dtuple_print(
/*=========*/
FILE* f, /* in: output stream */
dtuple_t* tuple); /* in: tuple */
/******************************************************************
Moves parts of long fields in entry to the big record vector so that
the size of tuple drops below the maximum record size allowed in the
database. Moves data only from those fields which are not necessary
to determine uniquely the insertion place of the tuple in the index. */
big_rec_t*
dtuple_convert_big_rec(
/*===================*/
/* out, own: created big record vector,
NULL if we are not able to shorten
the entry enough, i.e., if there are
too many short fields in entry */
dict_index_t* index, /* in: index */
dtuple_t* entry, /* in: index entry */
ulint* ext_vec,/* in: array of externally stored fields,
or NULL: if a field already is externally
stored, then we cannot move it to the vector
this function returns */
ulint n_ext_vec);/* in: number of elements is ext_vec */
/******************************************************************
Puts back to entry the data stored in vector. Note that to ensure the
fields in entry can accommodate the data, vector must have been created
from entry with dtuple_convert_big_rec. */
void
dtuple_convert_back_big_rec(
/*========================*/
dict_index_t* index, /* in: index */
dtuple_t* entry, /* in: entry whose data was put to vector */
big_rec_t* vector);/* in, own: big rec vector; it is
freed in this function */
/******************************************************************
Frees the memory in a big rec vector. */
void
dtuple_big_rec_free(
/*================*/
big_rec_t* vector); /* in, own: big rec vector; it is
freed in this function */
/*######################################################################*/
/* Structure for an SQL data field */
struct dfield_struct{
void* data; /* pointer to data */
ulint len; /* data length; UNIV_SQL_NULL if SQL null; */
dtype_t type; /* type of data */
};
struct dtuple_struct {
ulint info_bits; /* info bits of an index record:
the default is 0; this field is used
if an index record is built from
a data tuple */
ulint n_fields; /* number of fields in dtuple */
ulint n_fields_cmp; /* number of fields which should
be used in comparison services
of rem0cmp.*; the index search
is performed by comparing only these
fields, others are ignored; the
default value in dtuple creation is
the same value as n_fields */
dfield_t* fields; /* fields */
UT_LIST_NODE_T(dtuple_t) tuple_list;
/* data tuples can be linked into a
list using this field */
ulint magic_n;
};
#define DATA_TUPLE_MAGIC_N 65478679
/* A slot for a field in a big rec vector */
typedef struct big_rec_field_struct big_rec_field_t;
struct big_rec_field_struct {
ulint field_no; /* field number in record */
ulint len; /* stored data len */
byte* data; /* stored data */
};
/* Storage format for overflow data in a big record, that is, a record
which needs external storage of data fields */
struct big_rec_struct {
mem_heap_t* heap; /* memory heap from which allocated */
ulint n_fields; /* number of stored fields */
big_rec_field_t* fields; /* stored fields */
};
#ifndef UNIV_NONINL
#include "data0data.ic"
#endif
#endif

433
include/data0data.ic Normal file
View file

@ -0,0 +1,433 @@
/************************************************************************
SQL data field and tuple
(c) 1994-1996 Innobase Oy
Created 5/30/1994 Heikki Tuuri
*************************************************************************/
#include "mem0mem.h"
#include "ut0rnd.h"
extern byte data_error;
/*************************************************************************
Gets pointer to the type struct of SQL data field. */
UNIV_INLINE
dtype_t*
dfield_get_type(
/*============*/
/* out: pointer to the type struct */
dfield_t* field) /* in: SQL data field */
{
ut_ad(field);
return(&(field->type));
}
/*************************************************************************
Sets the type struct of SQL data field. */
UNIV_INLINE
void
dfield_set_type(
/*============*/
dfield_t* field, /* in: SQL data field */
dtype_t* type) /* in: pointer to data type struct */
{
ut_ad(field && type);
field->type = *type;
}
/*************************************************************************
Gets pointer to the data in a field. */
UNIV_INLINE
void*
dfield_get_data(
/*============*/
/* out: pointer to data */
dfield_t* field) /* in: field */
{
ut_ad(field);
ut_ad((field->len == UNIV_SQL_NULL)
|| (field->data != &data_error));
return(field->data);
}
/*************************************************************************
Gets length of field data. */
UNIV_INLINE
ulint
dfield_get_len(
/*===========*/
/* out: length of data; UNIV_SQL_NULL if
SQL null data */
dfield_t* field) /* in: field */
{
ut_ad(field);
ut_ad((field->len == UNIV_SQL_NULL)
|| (field->data != &data_error));
return(field->len);
}
/*************************************************************************
Sets length in a field. */
UNIV_INLINE
void
dfield_set_len(
/*===========*/
dfield_t* field, /* in: field */
ulint len) /* in: length or UNIV_SQL_NULL */
{
ut_ad(field);
field->len = len;
}
/*************************************************************************
Sets pointer to the data and length in a field. */
UNIV_INLINE
void
dfield_set_data(
/*============*/
dfield_t* field, /* in: field */
const void* data, /* in: data */
ulint len) /* in: length or UNIV_SQL_NULL */
{
ut_ad(field);
field->data = (void*) data;
field->len = len;
}
/*************************************************************************
Copies the data and len fields. */
UNIV_INLINE
void
dfield_copy_data(
/*=============*/
dfield_t* field1, /* in: field to copy to */
dfield_t* field2) /* in: field to copy from */
{
ut_ad(field1 && field2);
field1->data = field2->data;
field1->len = field2->len;
}
/*************************************************************************
Copies a data field to another. */
UNIV_INLINE
void
dfield_copy(
/*========*/
dfield_t* field1, /* in: field to copy to */
dfield_t* field2) /* in: field to copy from */
{
*field1 = *field2;
}
/*************************************************************************
Tests if data length and content is equal for two dfields. */
UNIV_INLINE
ibool
dfield_datas_are_binary_equal(
/*==========================*/
/* out: TRUE if equal */
dfield_t* field1, /* in: field */
dfield_t* field2) /* in: field */
{
ulint len;
len = field1->len;
if ((len != field2->len)
|| ((len != UNIV_SQL_NULL)
&& (0 != ut_memcmp(field1->data, field2->data, len)))) {
return(FALSE);
}
return(TRUE);
}
/*************************************************************************
Gets info bits in a data tuple. */
UNIV_INLINE
ulint
dtuple_get_info_bits(
/*=================*/
/* out: info bits */
dtuple_t* tuple) /* in: tuple */
{
ut_ad(tuple);
return(tuple->info_bits);
}
/*************************************************************************
Sets info bits in a data tuple. */
UNIV_INLINE
void
dtuple_set_info_bits(
/*=================*/
dtuple_t* tuple, /* in: tuple */
ulint info_bits) /* in: info bits */
{
ut_ad(tuple);
tuple->info_bits = info_bits;
}
/*************************************************************************
Gets number of fields used in record comparisons. */
UNIV_INLINE
ulint
dtuple_get_n_fields_cmp(
/*====================*/
/* out: number of fields used in comparisons
in rem0cmp.* */
dtuple_t* tuple) /* in: tuple */
{
ut_ad(tuple);
return(tuple->n_fields_cmp);
}
/*************************************************************************
Sets number of fields used in record comparisons. */
UNIV_INLINE
void
dtuple_set_n_fields_cmp(
/*====================*/
dtuple_t* tuple, /* in: tuple */
ulint n_fields_cmp) /* in: number of fields used in
comparisons in rem0cmp.* */
{
ut_ad(tuple);
ut_ad(n_fields_cmp <= tuple->n_fields);
tuple->n_fields_cmp = n_fields_cmp;
}
/*************************************************************************
Gets number of fields in a data tuple. */
UNIV_INLINE
ulint
dtuple_get_n_fields(
/*================*/
/* out: number of fields */
dtuple_t* tuple) /* in: tuple */
{
ut_ad(tuple);
return(tuple->n_fields);
}
/*************************************************************************
Gets nth field of a tuple. */
UNIV_INLINE
dfield_t*
dtuple_get_nth_field(
/*=================*/
/* out: nth field */
dtuple_t* tuple, /* in: tuple */
ulint n) /* in: index of field */
{
ut_ad(tuple);
ut_ad(n < tuple->n_fields);
return(tuple->fields + n);
}
/**************************************************************
Creates a data tuple to a memory heap. The default value for number
of fields used in record comparisons for this tuple is n_fields. */
UNIV_INLINE
dtuple_t*
dtuple_create(
/*==========*/
/* out, own: created tuple */
mem_heap_t* heap, /* in: memory heap where the tuple
is created */
ulint n_fields) /* in: number of fields */
{
dtuple_t* tuple;
ut_ad(heap);
tuple = (dtuple_t*) mem_heap_alloc(heap, sizeof(dtuple_t)
+ n_fields * sizeof(dfield_t));
tuple->info_bits = 0;
tuple->n_fields = n_fields;
tuple->n_fields_cmp = n_fields;
tuple->fields = (dfield_t*)(((byte*)tuple) + sizeof(dtuple_t));
#ifdef UNIV_DEBUG
tuple->magic_n = DATA_TUPLE_MAGIC_N;
{ /* In the debug version, initialize fields to an error value */
ulint i;
for (i = 0; i < n_fields; i++) {
(tuple->fields + i)->data = &data_error;
dfield_get_type(tuple->fields + i)->mtype = DATA_ERROR;
}
}
#endif
return(tuple);
}
/**************************************************************
The following function returns the sum of data lengths of a tuple. The space
occupied by the field structs or the tuple struct is not counted. Neither
is possible space in externally stored parts of the field. */
UNIV_INLINE
ulint
dtuple_get_data_size(
/*=================*/
/* out: sum of data lengths */
dtuple_t* tuple) /* in: typed data tuple */
{
dfield_t* field;
ulint n_fields;
ulint len;
ulint i;
ulint sum = 0;
ut_ad(tuple);
ut_ad(dtuple_check_typed(tuple));
ut_ad(tuple->magic_n == DATA_TUPLE_MAGIC_N);
n_fields = tuple->n_fields;
for (i = 0; i < n_fields; i++) {
field = dtuple_get_nth_field(tuple, i);
len = dfield_get_len(field);
if (len == UNIV_SQL_NULL) {
len = dtype_get_sql_null_size(dfield_get_type(field));
}
sum += len;
}
return(sum);
}
/***********************************************************************
Sets types of fields binary in a tuple. */
UNIV_INLINE
void
dtuple_set_types_binary(
/*====================*/
dtuple_t* tuple, /* in: data tuple */
ulint n) /* in: number of fields to set */
{
dtype_t* dfield_type;
ulint i;
for (i = 0; i < n; i++) {
dfield_type = dfield_get_type(dtuple_get_nth_field(tuple, i));
dtype_set(dfield_type, DATA_BINARY, 0, 0, 0);
}
}
/****************************************************************
Folds a prefix given as the number of fields of a tuple. */
UNIV_INLINE
ulint
dtuple_fold(
/*========*/
/* out: the folded value */
dtuple_t* tuple, /* in: the tuple */
ulint n_fields,/* in: number of complete fields to fold */
ulint n_bytes,/* in: number of bytes to fold in an
incomplete last field */
dulint tree_id)/* in: index tree id */
{
dfield_t* field;
ulint i;
byte* data;
ulint len;
ulint fold;
ut_ad(tuple);
ut_ad(tuple->magic_n == DATA_TUPLE_MAGIC_N);
ut_ad(dtuple_check_typed(tuple));
fold = ut_fold_dulint(tree_id);
for (i = 0; i < n_fields; i++) {
field = dtuple_get_nth_field(tuple, i);
data = (byte*) dfield_get_data(field);
len = dfield_get_len(field);
if (len != UNIV_SQL_NULL) {
fold = ut_fold_ulint_pair(fold,
ut_fold_binary(data, len));
}
}
if (n_bytes > 0) {
field = dtuple_get_nth_field(tuple, i);
data = (byte*) dfield_get_data(field);
len = dfield_get_len(field);
if (len != UNIV_SQL_NULL) {
if (len > n_bytes) {
len = n_bytes;
}
fold = ut_fold_ulint_pair(fold,
ut_fold_binary(data, len));
}
}
return(fold);
}
/**************************************************************************
Writes an SQL null field full of zeros. */
UNIV_INLINE
void
data_write_sql_null(
/*================*/
byte* data, /* in: pointer to a buffer of size len */
ulint len) /* in: SQL null size in bytes */
{
ulint j;
for (j = 0; j < len; j++) {
data[j] = '\0';
}
}
/**************************************************************************
Checks if a dtuple contains an SQL null value. */
UNIV_INLINE
ibool
dtuple_contains_null(
/*=================*/
/* out: TRUE if some field is SQL null */
dtuple_t* tuple) /* in: dtuple */
{
ulint n;
ulint i;
n = dtuple_get_n_fields(tuple);
for (i = 0; i < n; i++) {
if (dfield_get_len(dtuple_get_nth_field(tuple, i))
== UNIV_SQL_NULL) {
return(TRUE);
}
}
return(FALSE);
}

430
include/data0type.h Normal file
View file

@ -0,0 +1,430 @@
/******************************************************
Data types
(c) 1996 Innobase Oy
Created 1/16/1996 Heikki Tuuri
*******************************************************/
#ifndef data0type_h
#define data0type_h
#include "univ.i"
extern ulint data_mysql_default_charset_coll;
#define DATA_MYSQL_LATIN1_SWEDISH_CHARSET_COLL 8
/* SQL data type struct */
typedef struct dtype_struct dtype_t;
/* This variable is initialized as the standard binary variable length
data type */
extern dtype_t* dtype_binary;
/*-------------------------------------------*/
/* The 'MAIN TYPE' of a column */
#define DATA_VARCHAR 1 /* character varying of the
latin1_swedish_ci charset-collation; note
that the MySQL format for this, DATA_BINARY,
DATA_VARMYSQL, is also affected by whether the
'precise type' contains
DATA_MYSQL_TRUE_VARCHAR */
#define DATA_CHAR 2 /* fixed length character of the
latin1_swedish_ci charset-collation */
#define DATA_FIXBINARY 3 /* binary string of fixed length */
#define DATA_BINARY 4 /* binary string */
#define DATA_BLOB 5 /* binary large object, or a TEXT type;
if prtype & DATA_BINARY_TYPE == 0, then this is
actually a TEXT column (or a BLOB created
with < 4.0.14; since column prefix indexes
came only in 4.0.14, the missing flag in BLOBs
created before that does not cause any harm) */
#define DATA_INT 6 /* integer: can be any size 1 - 8 bytes */
#define DATA_SYS_CHILD 7 /* address of the child page in node pointer */
#define DATA_SYS 8 /* system column */
/* Data types >= DATA_FLOAT must be compared using the whole field, not as
binary strings */
#define DATA_FLOAT 9
#define DATA_DOUBLE 10
#define DATA_DECIMAL 11 /* decimal number stored as an ASCII string */
#define DATA_VARMYSQL 12 /* any charset varying length char */
#define DATA_MYSQL 13 /* any charset fixed length char */
/* NOTE that 4.1.1 used DATA_MYSQL and
DATA_VARMYSQL for all character sets, and the
charset-collation for tables created with it
can also be latin1_swedish_ci */
#define DATA_MTYPE_MAX 63 /* dtype_store_for_order_and_null_size()
requires the values are <= 63 */
/*-------------------------------------------*/
/* The 'PRECISE TYPE' of a column */
/*
Tables created by a MySQL user have the following convention:
- In the least significant byte in the precise type we store the MySQL type
code (not applicable for system columns).
- In the second least significant byte we OR flags DATA_NOT_NULL,
DATA_UNSIGNED, DATA_BINARY_TYPE.
- In the third least significant byte of the precise type of string types we
store the MySQL charset-collation code. In DATA_BLOB columns created with
< 4.0.14 we do not actually know if it is a BLOB or a TEXT column. Since there
are no indexes on prefixes of BLOB or TEXT columns in < 4.0.14, this is no
problem, though.
Note that versions < 4.1.2 or < 5.0.1 did not store the charset code to the
precise type, since the charset was always the default charset of the MySQL
installation. If the stored charset code is 0 in the system table SYS_COLUMNS
of InnoDB, that means that the default charset of this MySQL installation
should be used.
When loading a table definition from the system tables to the InnoDB data
dictionary cache in main memory, InnoDB versions >= 4.1.2 and >= 5.0.1 check
if the stored charset-collation is 0, and if that is the case and the type is
a non-binary string, replace that 0 by the default charset-collation code of
this MySQL installation. In short, in old tables, the charset-collation code
in the system tables on disk can be 0, but in in-memory data structures
(dtype_t), the charset-collation code is always != 0 for non-binary string
types.
In new tables, in binary string types, the charset-collation code is the
MySQL code for the 'binary charset', that is, != 0.
For binary string types and for DATA_CHAR, DATA_VARCHAR, and for those
DATA_BLOB which are binary or have the charset-collation latin1_swedish_ci,
InnoDB performs all comparisons internally, without resorting to the MySQL
comparison functions. This is to save CPU time.
InnoDB's own internal system tables have different precise types for their
columns, and for them the precise type is usually not used at all.
*/
#define DATA_ENGLISH 4 /* English language character string: this
is a relic from pre-MySQL time and only used
for InnoDB's own system tables */
#define DATA_ERROR 111 /* another relic from pre-MySQL time */
#define DATA_MYSQL_TYPE_MASK 255 /* AND with this mask to extract the MySQL
type from the precise type */
#define DATA_MYSQL_TRUE_VARCHAR 15 /* MySQL type code for the >= 5.0.3
format true VARCHAR */
/* Precise data types for system columns and the length of those columns;
NOTE: the values must run from 0 up in the order given! All codes must
be less than 256 */
#define DATA_ROW_ID 0 /* row id: a dulint */
#define DATA_ROW_ID_LEN 6 /* stored length for row id */
#define DATA_TRX_ID 1 /* transaction id: 6 bytes */
#define DATA_TRX_ID_LEN 6
#define DATA_ROLL_PTR 2 /* rollback data pointer: 7 bytes */
#define DATA_ROLL_PTR_LEN 7
#define DATA_MIX_ID 3 /* mixed index label: a dulint, stored in
a row in a compressed form */
#define DATA_MIX_ID_LEN 9 /* maximum stored length for mix id (in a
compressed dulint form) */
#define DATA_N_SYS_COLS 4 /* number of system columns defined above */
/* Flags ORed to the precise data type */
#define DATA_NOT_NULL 256 /* this is ORed to the precise type when
the column is declared as NOT NULL */
#define DATA_UNSIGNED 512 /* this id ORed to the precise type when
we have an unsigned integer type */
#define DATA_BINARY_TYPE 1024 /* if the data type is a binary character
string, this is ORed to the precise type:
this only holds for tables created with
>= MySQL-4.0.14 */
/* #define DATA_NONLATIN1 2048 This is a relic from < 4.1.2 and < 5.0.1.
In earlier versions this was set for some
BLOB columns.
*/
#define DATA_LONG_TRUE_VARCHAR 4096 /* this is ORed to the precise data
type when the column is true VARCHAR where
MySQL uses 2 bytes to store the data len;
for shorter VARCHARs MySQL uses only 1 byte */
/*-------------------------------------------*/
/* This many bytes we need to store the type information affecting the
alphabetical order for a single field and decide the storage size of an
SQL null*/
#define DATA_ORDER_NULL_TYPE_BUF_SIZE 4
/* In the >= 4.1.x storage format we add 2 bytes more so that we can also
store the charset-collation number; one byte is left unused, though */
#define DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE 6
/*************************************************************************
Gets the MySQL type code from a dtype. */
UNIV_INLINE
ulint
dtype_get_mysql_type(
/*=================*/
/* out: MySQL type code; this is NOT an InnoDB
type code! */
dtype_t* type); /* in: type struct */
/*************************************************************************
Determine how many bytes the first n characters of the given string occupy.
If the string is shorter than n characters, returns the number of bytes
the characters in the string occupy. */
ulint
dtype_get_at_most_n_mbchars(
/*========================*/
/* out: length of the prefix,
in bytes */
const dtype_t* dtype, /* in: data type */
ulint prefix_len, /* in: length of the requested
prefix, in characters, multiplied by
dtype_get_mbmaxlen(dtype) */
ulint data_len, /* in: length of str (in bytes) */
const char* str); /* in: the string whose prefix
length is being determined */
/*************************************************************************
Checks if a data main type is a string type. Also a BLOB is considered a
string type. */
ibool
dtype_is_string_type(
/*=================*/
/* out: TRUE if string type */
ulint mtype); /* in: InnoDB main data type code: DATA_CHAR, ... */
/*************************************************************************
Checks if a type is a binary string type. Note that for tables created with
< 4.0.14, we do not know if a DATA_BLOB column is a BLOB or a TEXT column. For
those DATA_BLOB columns this function currently returns FALSE. */
ibool
dtype_is_binary_string_type(
/*========================*/
/* out: TRUE if binary string type */
ulint mtype, /* in: main data type */
ulint prtype);/* in: precise type */
/*************************************************************************
Checks if a type is a non-binary string type. That is, dtype_is_string_type is
TRUE and dtype_is_binary_string_type is FALSE. Note that for tables created
with < 4.0.14, we do not know if a DATA_BLOB column is a BLOB or a TEXT column.
For those DATA_BLOB columns this function currently returns TRUE. */
ibool
dtype_is_non_binary_string_type(
/*============================*/
/* out: TRUE if non-binary string type */
ulint mtype, /* in: main data type */
ulint prtype);/* in: precise type */
/*************************************************************************
Sets a data type structure. */
UNIV_INLINE
void
dtype_set(
/*======*/
dtype_t* type, /* in: type struct to init */
ulint mtype, /* in: main data type */
ulint prtype, /* in: precise type */
ulint len, /* in: length of type */
ulint prec); /* in: precision of type */
/*************************************************************************
Copies a data type structure. */
UNIV_INLINE
void
dtype_copy(
/*=======*/
dtype_t* type1, /* in: type struct to copy to */
dtype_t* type2); /* in: type struct to copy from */
/*************************************************************************
Gets the SQL main data type. */
UNIV_INLINE
ulint
dtype_get_mtype(
/*============*/
dtype_t* type);
/*************************************************************************
Gets the precise data type. */
UNIV_INLINE
ulint
dtype_get_prtype(
/*=============*/
dtype_t* type);
/*************************************************************************
Gets the MySQL charset-collation code for MySQL string types. */
ulint
dtype_get_charset_coll_noninline(
/*=============================*/
ulint prtype);/* in: precise data type */
/*************************************************************************
Gets the MySQL charset-collation code for MySQL string types. */
UNIV_INLINE
ulint
dtype_get_charset_coll(
/*===================*/
ulint prtype);/* in: precise data type */
/*************************************************************************
Forms a precise type from the < 4.1.2 format precise type plus the
charset-collation code. */
ulint
dtype_form_prtype(
/*==============*/
ulint old_prtype, /* in: the MySQL type code and the flags
DATA_BINARY_TYPE etc. */
ulint charset_coll); /* in: MySQL charset-collation code */
/*************************************************************************
Gets the type length. */
UNIV_INLINE
ulint
dtype_get_len(
/*==========*/
dtype_t* type);
/*************************************************************************
Gets the type precision. */
UNIV_INLINE
ulint
dtype_get_prec(
/*===========*/
dtype_t* type);
/*************************************************************************
Gets the minimum length of a character, in bytes. */
UNIV_INLINE
ulint
dtype_get_mbminlen(
/*===============*/
/* out: minimum length of a char, in bytes,
or 0 if this is not a character type */
const dtype_t* type); /* in: type */
/*************************************************************************
Gets the maximum length of a character, in bytes. */
UNIV_INLINE
ulint
dtype_get_mbmaxlen(
/*===============*/
/* out: maximum length of a char, in bytes,
or 0 if this is not a character type */
const dtype_t* type); /* in: type */
/*************************************************************************
Gets the padding character code for the type. */
UNIV_INLINE
ulint
dtype_get_pad_char(
/*===============*/
/* out: padding character code, or
ULINT_UNDEFINED if no padding specified */
dtype_t* type); /* in: type */
/***************************************************************************
Returns the size of a fixed size data type, 0 if not a fixed size type. */
UNIV_INLINE
ulint
dtype_get_fixed_size(
/*=================*/
/* out: fixed size, or 0 */
dtype_t* type); /* in: type */
/***************************************************************************
Returns the minimum size of a data type. */
UNIV_INLINE
ulint
dtype_get_min_size(
/*===============*/
/* out: minimum size */
const dtype_t* type); /* in: type */
/***************************************************************************
Returns a stored SQL NULL size for a type. For fixed length types it is
the fixed length of the type, otherwise 0. */
UNIV_INLINE
ulint
dtype_get_sql_null_size(
/*====================*/
/* out: SQL null storage size */
dtype_t* type); /* in: type */
/***************************************************************************
Returns TRUE if a type is of a fixed size. */
UNIV_INLINE
ibool
dtype_is_fixed_size(
/*================*/
/* out: TRUE if fixed size */
dtype_t* type); /* in: type */
/**************************************************************************
Reads to a type the stored information which determines its alphabetical
ordering and the storage size of an SQL NULL value. */
UNIV_INLINE
void
dtype_read_for_order_and_null_size(
/*===============================*/
dtype_t* type, /* in: type struct */
byte* buf); /* in: buffer for the stored order info */
/**************************************************************************
Stores for a type the information which determines its alphabetical ordering
and the storage size of an SQL NULL value. This is the >= 4.1.x storage
format. */
UNIV_INLINE
void
dtype_new_store_for_order_and_null_size(
/*====================================*/
byte* buf, /* in: buffer for
DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE
bytes where we store the info */
dtype_t* type); /* in: type struct */
/**************************************************************************
Reads to a type the stored information which determines its alphabetical
ordering and the storage size of an SQL NULL value. This is the 4.1.x storage
format. */
UNIV_INLINE
void
dtype_new_read_for_order_and_null_size(
/*===================================*/
dtype_t* type, /* in: type struct */
byte* buf); /* in: buffer for stored type order info */
/*************************************************************************
Validates a data type structure. */
ibool
dtype_validate(
/*===========*/
/* out: TRUE if ok */
dtype_t* type); /* in: type struct to validate */
/*************************************************************************
Prints a data type structure. */
void
dtype_print(
/*========*/
dtype_t* type); /* in: type */
/* Structure for an SQL data type.
If you add fields to this structure, be sure to initialize them everywhere.
This structure is initialized in the following functions:
dtype_set()
dtype_read_for_order_and_null_size()
dtype_new_read_for_order_and_null_size()
sym_tab_add_null_lit() */
struct dtype_struct{
ulint mtype; /* main data type */
ulint prtype; /* precise type; MySQL data type, charset code,
flags to indicate nullability, signedness,
whether this is a binary string, whether this
is a true VARCHAR where MySQL uses 2 bytes to
store the length */
/* the remaining fields do not affect alphabetical ordering: */
ulint len; /* length; for MySQL data this is
field->pack_length(), except that for a
>= 5.0.3 type true VARCHAR this is the
maximum byte length of the string data
(in addition to the string, MySQL uses 1 or
2 bytes to store the string length) */
ulint prec; /* precision */
ulint mbminlen; /* minimum length of a character, in bytes */
ulint mbmaxlen; /* maximum length of a character, in bytes */
};
#ifndef UNIV_NONINL
#include "data0type.ic"
#endif
#endif

512
include/data0type.ic Normal file
View file

@ -0,0 +1,512 @@
/******************************************************
Data types
(c) 1996 Innobase Oy
Created 1/16/1996 Heikki Tuuri
*******************************************************/
#include "mach0data.h"
/**********************************************************************
Get the variable length bounds of the given character set.
NOTE: the prototype of this function is copied from ha_innodb.cc! If you change
this function, you MUST change also the prototype here! */
extern
void
innobase_get_cset_width(
/*====================*/
ulint cset, /* in: MySQL charset-collation code */
ulint* mbminlen, /* out: minimum length of a char (in bytes) */
ulint* mbmaxlen); /* out: maximum length of a char (in bytes) */
/*************************************************************************
Gets the MySQL charset-collation code for MySQL string types. */
UNIV_INLINE
ulint
dtype_get_charset_coll(
/*===================*/
ulint prtype) /* in: precise data type */
{
return((prtype >> 16) & 0xFFUL);
}
/*************************************************************************
Gets the MySQL type code from a dtype. */
UNIV_INLINE
ulint
dtype_get_mysql_type(
/*=================*/
/* out: MySQL type code; this is NOT an InnoDB
type code! */
dtype_t* type) /* in: type struct */
{
return(type->prtype & 0xFFUL);
}
/*************************************************************************
Sets the mbminlen and mbmaxlen members of a data type structure. */
UNIV_INLINE
void
dtype_set_mblen(
/*============*/
dtype_t* type) /* in/out: type struct */
{
ut_ad(type);
if (dtype_is_string_type(type->mtype)) {
innobase_get_cset_width(dtype_get_charset_coll(type->prtype),
&type->mbminlen, &type->mbmaxlen);
ut_ad(type->mbminlen <= type->mbmaxlen);
} else {
type->mbminlen = type->mbmaxlen = 0;
}
}
/*************************************************************************
Sets a data type structure. */
UNIV_INLINE
void
dtype_set(
/*======*/
dtype_t* type, /* in: type struct to init */
ulint mtype, /* in: main data type */
ulint prtype, /* in: precise type */
ulint len, /* in: length of type */
ulint prec) /* in: precision of type */
{
ut_ad(type);
ut_ad(mtype <= DATA_MTYPE_MAX);
type->mtype = mtype;
type->prtype = prtype;
type->len = len;
type->prec = prec;
dtype_set_mblen(type);
ut_ad(dtype_validate(type));
}
/*************************************************************************
Copies a data type structure. */
UNIV_INLINE
void
dtype_copy(
/*=======*/
dtype_t* type1, /* in: type struct to copy to */
dtype_t* type2) /* in: type struct to copy from */
{
*type1 = *type2;
ut_ad(dtype_validate(type1));
}
/*************************************************************************
Gets the SQL main data type. */
UNIV_INLINE
ulint
dtype_get_mtype(
/*============*/
dtype_t* type)
{
ut_ad(type);
return(type->mtype);
}
/*************************************************************************
Gets the precise data type. */
UNIV_INLINE
ulint
dtype_get_prtype(
/*=============*/
dtype_t* type)
{
ut_ad(type);
return(type->prtype);
}
/*************************************************************************
Gets the type length. */
UNIV_INLINE
ulint
dtype_get_len(
/*==========*/
dtype_t* type)
{
ut_ad(type);
return(type->len);
}
/*************************************************************************
Gets the type precision. */
UNIV_INLINE
ulint
dtype_get_prec(
/*===========*/
dtype_t* type)
{
ut_ad(type);
return(type->prec);
}
/*************************************************************************
Gets the minimum length of a character, in bytes. */
UNIV_INLINE
ulint
dtype_get_mbminlen(
/*===============*/
/* out: minimum length of a char, in bytes,
or 0 if this is not a character type */
const dtype_t* type) /* in: type */
{
ut_ad(type);
return(type->mbminlen);
}
/*************************************************************************
Gets the maximum length of a character, in bytes. */
UNIV_INLINE
ulint
dtype_get_mbmaxlen(
/*===============*/
/* out: maximum length of a char, in bytes,
or 0 if this is not a character type */
const dtype_t* type) /* in: type */
{
ut_ad(type);
return(type->mbmaxlen);
}
/*************************************************************************
Gets the padding character code for the type. */
UNIV_INLINE
ulint
dtype_get_pad_char(
/*===============*/
/* out: padding character code, or
ULINT_UNDEFINED if no padding specified */
dtype_t* type) /* in: type */
{
if (type->mtype == DATA_CHAR
|| type->mtype == DATA_VARCHAR
|| type->mtype == DATA_BINARY
|| type->mtype == DATA_FIXBINARY
|| type->mtype == DATA_MYSQL
|| type->mtype == DATA_VARMYSQL
|| (type->mtype == DATA_BLOB
&& (type->prtype & DATA_BINARY_TYPE) == 0)) {
/* Space is the padding character for all char and binary
strings, and starting from 5.0.3, also for TEXT strings. */
return((ulint)' ');
}
/* No padding specified */
return(ULINT_UNDEFINED);
}
/**************************************************************************
Stores for a type the information which determines its alphabetical ordering
and the storage size of an SQL NULL value. This is the >= 4.1.x storage
format. */
UNIV_INLINE
void
dtype_new_store_for_order_and_null_size(
/*====================================*/
byte* buf, /* in: buffer for
DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE
bytes where we store the info */
dtype_t* type) /* in: type struct */
{
#if 6 != DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE
#error "6 != DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE"
#endif
buf[0] = (byte)(type->mtype & 0xFFUL);
if (type->prtype & DATA_BINARY_TYPE) {
buf[0] = buf[0] | 128;
}
/* In versions < 4.1.2 we had: if (type->prtype & DATA_NONLATIN1) {
buf[0] = buf[0] | 64;
}
*/
buf[1] = (byte)(type->prtype & 0xFFUL);
mach_write_to_2(buf + 2, type->len & 0xFFFFUL);
ut_ad(dtype_get_charset_coll(type->prtype) < 256);
mach_write_to_2(buf + 4, dtype_get_charset_coll(type->prtype));
if (type->prtype & DATA_NOT_NULL) {
buf[4] |= 128;
}
}
/**************************************************************************
Reads to a type the stored information which determines its alphabetical
ordering and the storage size of an SQL NULL value. This is the < 4.1.x
storage format. */
UNIV_INLINE
void
dtype_read_for_order_and_null_size(
/*===============================*/
dtype_t* type, /* in: type struct */
byte* buf) /* in: buffer for stored type order info */
{
ut_ad(4 == DATA_ORDER_NULL_TYPE_BUF_SIZE);
type->mtype = buf[0] & 63;
type->prtype = buf[1];
if (buf[0] & 128) {
type->prtype = type->prtype | DATA_BINARY_TYPE;
}
type->len = mach_read_from_2(buf + 2);
type->prtype = dtype_form_prtype(type->prtype,
data_mysql_default_charset_coll);
dtype_set_mblen(type);
}
/**************************************************************************
Reads to a type the stored information which determines its alphabetical
ordering and the storage size of an SQL NULL value. This is the >= 4.1.x
storage format. */
UNIV_INLINE
void
dtype_new_read_for_order_and_null_size(
/*===================================*/
dtype_t* type, /* in: type struct */
byte* buf) /* in: buffer for stored type order info */
{
ulint charset_coll;
#if 6 != DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE
#error "6 != DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE"
#endif
type->mtype = buf[0] & 63;
type->prtype = buf[1];
if (buf[0] & 128) {
type->prtype |= DATA_BINARY_TYPE;
}
if (buf[4] & 128) {
type->prtype |= DATA_NOT_NULL;
}
type->len = mach_read_from_2(buf + 2);
mach_read_from_2(buf + 4);
charset_coll = mach_read_from_2(buf + 4) & 0x7fff;
if (dtype_is_string_type(type->mtype)) {
ut_a(charset_coll < 256);
if (charset_coll == 0) {
/* This insert buffer record was inserted with MySQL
version < 4.1.2, and the charset-collation code was not
explicitly stored to dtype->prtype at that time. It
must be the default charset-collation of this MySQL
installation. */
charset_coll = data_mysql_default_charset_coll;
}
type->prtype = dtype_form_prtype(type->prtype, charset_coll);
}
dtype_set_mblen(type);
}
#ifndef UNIV_HOTBACKUP
/***************************************************************************
Returns the size of a fixed size data type, 0 if not a fixed size type. */
UNIV_INLINE
ulint
dtype_get_fixed_size(
/*=================*/
/* out: fixed size, or 0 */
dtype_t* type) /* in: type */
{
ulint mtype;
mtype = dtype_get_mtype(type);
switch (mtype) {
case DATA_SYS:
#ifdef UNIV_DEBUG
switch (type->prtype & DATA_MYSQL_TYPE_MASK) {
default:
ut_ad(0);
return(0);
case DATA_ROW_ID:
ut_ad(type->len == DATA_ROW_ID_LEN);
break;
case DATA_TRX_ID:
ut_ad(type->len == DATA_TRX_ID_LEN);
break;
case DATA_ROLL_PTR:
ut_ad(type->len == DATA_ROLL_PTR_LEN);
break;
case DATA_MIX_ID:
ut_ad(type->len == DATA_MIX_ID_LEN);
break;
}
#endif /* UNIV_DEBUG */
case DATA_CHAR:
case DATA_FIXBINARY:
case DATA_INT:
case DATA_FLOAT:
case DATA_DOUBLE:
return(dtype_get_len(type));
case DATA_MYSQL:
if (type->prtype & DATA_BINARY_TYPE) {
return(dtype_get_len(type));
} else {
/* We play it safe here and ask MySQL for
mbminlen and mbmaxlen. Although
type->mbminlen and type->mbmaxlen are
initialized if and only if type->prtype
is (in one of the 3 functions in this file),
it could be that none of these functions
has been called. */
ulint mbminlen, mbmaxlen;
innobase_get_cset_width(
dtype_get_charset_coll(type->prtype),
&mbminlen, &mbmaxlen);
if (UNIV_UNLIKELY(type->mbminlen != mbminlen)
|| UNIV_UNLIKELY(type->mbmaxlen != mbmaxlen)) {
ut_print_timestamp(stderr);
fprintf(stderr, " InnoDB: "
"mbminlen=%lu, "
"mbmaxlen=%lu, "
"type->mbminlen=%lu, "
"type->mbmaxlen=%lu\n",
(ulong) mbminlen,
(ulong) mbmaxlen,
(ulong) type->mbminlen,
(ulong) type->mbmaxlen);
}
if (mbminlen == mbmaxlen) {
return(dtype_get_len(type));
}
}
/* fall through for variable-length charsets */
case DATA_VARCHAR:
case DATA_BINARY:
case DATA_DECIMAL:
case DATA_VARMYSQL:
case DATA_BLOB:
return(0);
default: ut_error;
}
return(0);
}
/***************************************************************************
Returns the minimum size of a data type. */
UNIV_INLINE
ulint
dtype_get_min_size(
/*===============*/
/* out: minimum size */
const dtype_t* type) /* in: type */
{
switch (type->mtype) {
case DATA_SYS:
#ifdef UNIV_DEBUG
switch (type->prtype & DATA_MYSQL_TYPE_MASK) {
default:
ut_ad(0);
return(0);
case DATA_ROW_ID:
ut_ad(type->len == DATA_ROW_ID_LEN);
break;
case DATA_TRX_ID:
ut_ad(type->len == DATA_TRX_ID_LEN);
break;
case DATA_ROLL_PTR:
ut_ad(type->len == DATA_ROLL_PTR_LEN);
break;
case DATA_MIX_ID:
ut_ad(type->len == DATA_MIX_ID_LEN);
break;
}
#endif /* UNIV_DEBUG */
case DATA_CHAR:
case DATA_FIXBINARY:
case DATA_INT:
case DATA_FLOAT:
case DATA_DOUBLE:
return(type->len);
case DATA_MYSQL:
if ((type->prtype & DATA_BINARY_TYPE)
|| type->mbminlen == type->mbmaxlen) {
return(type->len);
}
/* this is a variable-length character set */
ut_a(type->mbminlen > 0);
ut_a(type->mbmaxlen > type->mbminlen);
ut_a(type->len % type->mbmaxlen == 0);
return(type->len * type->mbminlen / type->mbmaxlen);
case DATA_VARCHAR:
case DATA_BINARY:
case DATA_DECIMAL:
case DATA_VARMYSQL:
case DATA_BLOB:
return(0);
default: ut_error;
}
return(0);
}
#endif /* !UNIV_HOTBACKUP */
/***************************************************************************
Returns a stored SQL NULL size for a type. For fixed length types it is
the fixed length of the type, otherwise 0. */
UNIV_INLINE
ulint
dtype_get_sql_null_size(
/*====================*/
/* out: SQL null storage size */
dtype_t* type) /* in: type */
{
return(dtype_get_fixed_size(type));
}
/***************************************************************************
Returns TRUE if a type is of a fixed size. */
UNIV_INLINE
ibool
dtype_is_fixed_size(
/*================*/
/* out: TRUE if fixed size */
dtype_t* type) /* in: type */
{
ulint size;
size = dtype_get_fixed_size(type);
if (size) {
return(TRUE);
}
return(FALSE);
}

19
include/data0types.h Normal file
View file

@ -0,0 +1,19 @@
/************************************************************************
Some type definitions
(c) 1994-2000 Innobase Oy
Created 9/21/2000 Heikki Tuuri
*************************************************************************/
#ifndef data0types_h
#define data0types_h
/* SQL data field struct */
typedef struct dfield_struct dfield_t;
/* SQL data tuple struct */
typedef struct dtuple_struct dtuple_t;
#endif

69
include/db0err.h Normal file
View file

@ -0,0 +1,69 @@
/******************************************************
Global error codes for the database
(c) 1996 Innobase Oy
Created 5/24/1996 Heikki Tuuri
*******************************************************/
#ifndef db0err_h
#define db0err_h
#define DB_SUCCESS 10
/* The following are error codes */
#define DB_ERROR 11
#define DB_OUT_OF_MEMORY 12
#define DB_OUT_OF_FILE_SPACE 13
#define DB_LOCK_WAIT 14
#define DB_DEADLOCK 15
#define DB_ROLLBACK 16
#define DB_DUPLICATE_KEY 17
#define DB_QUE_THR_SUSPENDED 18
#define DB_MISSING_HISTORY 19 /* required history data has been
deleted due to lack of space in
rollback segment */
#define DB_CLUSTER_NOT_FOUND 30
#define DB_TABLE_NOT_FOUND 31
#define DB_MUST_GET_MORE_FILE_SPACE 32 /* the database has to be stopped
and restarted with more file space */
#define DB_TABLE_IS_BEING_USED 33
#define DB_TOO_BIG_RECORD 34 /* a record in an index would become
bigger than 1/2 free space in a page
frame */
#define DB_LOCK_WAIT_TIMEOUT 35 /* lock wait lasted too long */
#define DB_NO_REFERENCED_ROW 36 /* referenced key value not found
for a foreign key in an insert or
update of a row */
#define DB_ROW_IS_REFERENCED 37 /* cannot delete or update a row
because it contains a key value
which is referenced */
#define DB_CANNOT_ADD_CONSTRAINT 38 /* adding a foreign key constraint
to a table failed */
#define DB_CORRUPTION 39 /* data structure corruption noticed */
#define DB_COL_APPEARS_TWICE_IN_INDEX 40 /* InnoDB cannot handle an index
where same column appears twice */
#define DB_CANNOT_DROP_CONSTRAINT 41 /* dropping a foreign key constraint
from a table failed */
#define DB_NO_SAVEPOINT 42 /* no savepoint exists with the given
name */
#define DB_TABLESPACE_ALREADY_EXISTS 43 /* we cannot create a new single-table
tablespace because a file of the same
name already exists */
#define DB_TABLESPACE_DELETED 44 /* tablespace does not exist or is
being dropped right now */
#define DB_LOCK_TABLE_FULL 45 /* lock structs have exhausted the
buffer pool (for big transactions,
InnoDB stores the lock structs in the
buffer pool) */
/* The following are partial failure codes */
#define DB_FAIL 1000
#define DB_OVERFLOW 1001
#define DB_UNDERFLOW 1002
#define DB_STRONG_FAIL 1003
#define DB_RECORD_NOT_FOUND 1500
#define DB_END_OF_INDEX 1501
#endif

133
include/dict0boot.h Normal file
View file

@ -0,0 +1,133 @@
/******************************************************
Data dictionary creation and booting
(c) 1996 Innobase Oy
Created 4/18/1996 Heikki Tuuri
*******************************************************/
#ifndef dict0boot_h
#define dict0boot_h
#include "univ.i"
#include "mtr0mtr.h"
#include "mtr0log.h"
#include "ut0byte.h"
#include "buf0buf.h"
#include "fsp0fsp.h"
#include "dict0dict.h"
typedef byte dict_hdr_t;
/**************************************************************************
Gets a pointer to the dictionary header and x-latches its page. */
dict_hdr_t*
dict_hdr_get(
/*=========*/
/* out: pointer to the dictionary header,
page x-latched */
mtr_t* mtr); /* in: mtr */
/**************************************************************************
Returns a new row, table, index, or tree id. */
dulint
dict_hdr_get_new_id(
/*================*/
/* out: the new id */
ulint type); /* in: DICT_HDR_ROW_ID, ... */
/**************************************************************************
Returns a new row id. */
UNIV_INLINE
dulint
dict_sys_get_new_row_id(void);
/*=========================*/
/* out: the new id */
/**************************************************************************
Reads a row id from a record or other 6-byte stored form. */
UNIV_INLINE
dulint
dict_sys_read_row_id(
/*=================*/
/* out: row id */
byte* field); /* in: record field */
/**************************************************************************
Writes a row id to a record or other 6-byte stored form. */
UNIV_INLINE
void
dict_sys_write_row_id(
/*==================*/
byte* field, /* in: record field */
dulint row_id);/* in: row id */
/*********************************************************************
Initializes the data dictionary memory structures when the database is
started. This function is also called when the data dictionary is created. */
void
dict_boot(void);
/*===========*/
/*********************************************************************
Creates and initializes the data dictionary at the database creation. */
void
dict_create(void);
/*=============*/
/* Space id and page no where the dictionary header resides */
#define DICT_HDR_SPACE 0 /* the SYSTEM tablespace */
#define DICT_HDR_PAGE_NO FSP_DICT_HDR_PAGE_NO
/* The ids for the basic system tables and their indexes */
#define DICT_TABLES_ID ut_dulint_create(0, 1)
#define DICT_COLUMNS_ID ut_dulint_create(0, 2)
#define DICT_INDEXES_ID ut_dulint_create(0, 3)
#define DICT_FIELDS_ID ut_dulint_create(0, 4)
/* The following is a secondary index on SYS_TABLES */
#define DICT_TABLE_IDS_ID ut_dulint_create(0, 5)
#define DICT_HDR_FIRST_ID 10 /* the ids for tables etc. start
from this number, except for basic
system tables and their above defined
indexes; ibuf tables and indexes are
assigned as the id the number
DICT_IBUF_ID_MIN plus the space id */
#define DICT_IBUF_ID_MIN ut_dulint_create(0xFFFFFFFFUL, 0)
/* The offset of the dictionary header on the page */
#define DICT_HDR FSEG_PAGE_DATA
/*-------------------------------------------------------------*/
/* Dictionary header offsets */
#define DICT_HDR_ROW_ID 0 /* The latest assigned row id */
#define DICT_HDR_TABLE_ID 8 /* The latest assigned table id */
#define DICT_HDR_INDEX_ID 16 /* The latest assigned index id */
#define DICT_HDR_MIX_ID 24 /* The latest assigned mix id */
#define DICT_HDR_TABLES 32 /* Root of the table index tree */
#define DICT_HDR_TABLE_IDS 36 /* Root of the table index tree */
#define DICT_HDR_COLUMNS 40 /* Root of the column index tree */
#define DICT_HDR_INDEXES 44 /* Root of the index index tree */
#define DICT_HDR_FIELDS 48 /* Root of the index field index tree */
#define DICT_HDR_FSEG_HEADER 56 /* Segment header for the tablespace
segment into which the dictionary
header is created */
/*-------------------------------------------------------------*/
/* The field number of the page number field in the sys_indexes table
clustered index */
#define DICT_SYS_INDEXES_PAGE_NO_FIELD 8
#define DICT_SYS_INDEXES_SPACE_NO_FIELD 7
#define DICT_SYS_INDEXES_TYPE_FIELD 6
/* When a row id which is zero modulo this number (which must be a power of
two) is assigned, the field DICT_HDR_ROW_ID on the dictionary header page is
updated */
#define DICT_HDR_ROW_ID_WRITE_MARGIN 256
#ifndef UNIV_NONINL
#include "dict0boot.ic"
#endif
#endif

72
include/dict0boot.ic Normal file
View file

@ -0,0 +1,72 @@
/******************************************************
Data dictionary creation and booting
(c) 1996 Innobase Oy
Created 4/18/1996 Heikki Tuuri
*******************************************************/
/**************************************************************************
Writes the current value of the row id counter to the dictionary header file
page. */
void
dict_hdr_flush_row_id(void);
/*=======================*/
/**************************************************************************
Returns a new row id. */
UNIV_INLINE
dulint
dict_sys_get_new_row_id(void)
/*=========================*/
/* out: the new id */
{
dulint id;
mutex_enter(&(dict_sys->mutex));
id = dict_sys->row_id;
if (0 == (ut_dulint_get_low(id) % DICT_HDR_ROW_ID_WRITE_MARGIN)) {
dict_hdr_flush_row_id();
}
UT_DULINT_INC(dict_sys->row_id);
mutex_exit(&(dict_sys->mutex));
return(id);
}
/**************************************************************************
Reads a row id from a record or other 6-byte stored form. */
UNIV_INLINE
dulint
dict_sys_read_row_id(
/*=================*/
/* out: row id */
byte* field) /* in: record field */
{
ut_ad(DATA_ROW_ID_LEN == 6);
return(mach_read_from_6(field));
}
/**************************************************************************
Writes a row id to a record or other 6-byte stored form. */
UNIV_INLINE
void
dict_sys_write_row_id(
/*==================*/
byte* field, /* in: record field */
dulint row_id) /* in: row id */
{
ut_ad(DATA_ROW_ID_LEN == 6);
mach_write_to_6(field, row_id);
}

177
include/dict0crea.h Normal file
View file

@ -0,0 +1,177 @@
/******************************************************
Database object creation
(c) 1996 Innobase Oy
Created 1/8/1996 Heikki Tuuri
*******************************************************/
#ifndef dict0crea_h
#define dict0crea_h
#include "univ.i"
#include "dict0types.h"
#include "dict0dict.h"
#include "que0types.h"
#include "row0types.h"
#include "mtr0mtr.h"
/*************************************************************************
Creates a table create graph. */
tab_node_t*
tab_create_graph_create(
/*====================*/
/* out, own: table create node */
dict_table_t* table, /* in: table to create, built as a memory data
structure */
mem_heap_t* heap); /* in: heap where created */
/*************************************************************************
Creates an index create graph. */
ind_node_t*
ind_create_graph_create(
/*====================*/
/* out, own: index create node */
dict_index_t* index, /* in: index to create, built as a memory data
structure */
mem_heap_t* heap); /* in: heap where created */
/***************************************************************
Creates a table. This is a high-level function used in SQL execution graphs. */
que_thr_t*
dict_create_table_step(
/*===================*/
/* out: query thread to run next or NULL */
que_thr_t* thr); /* in: query thread */
/***************************************************************
Creates an index. This is a high-level function used in SQL execution
graphs. */
que_thr_t*
dict_create_index_step(
/*===================*/
/* out: query thread to run next or NULL */
que_thr_t* thr); /* in: query thread */
/***********************************************************************
Truncates the index tree associated with a row in SYS_INDEXES table. */
ulint
dict_truncate_index_tree(
/*=====================*/
/* out: new root page number, or
FIL_NULL on failure */
dict_table_t* table, /* in: the table the index belongs to */
rec_t* rec, /* in: record in the clustered index of
SYS_INDEXES table */
mtr_t* mtr); /* in: mtr having the latch
on the record page. The mtr may be
committed and restarted in this call. */
/***********************************************************************
Drops the index tree associated with a row in SYS_INDEXES table. */
void
dict_drop_index_tree(
/*=================*/
rec_t* rec, /* in: record in the clustered index of SYS_INDEXES
table */
mtr_t* mtr); /* in: mtr having the latch on the record page */
/********************************************************************
Creates the foreign key constraints system tables inside InnoDB
at database creation or database start if they are not found or are
not of the right form. */
ulint
dict_create_or_check_foreign_constraint_tables(void);
/*================================================*/
/* out: DB_SUCCESS or error code */
/************************************************************************
Adds foreign key definitions to data dictionary tables in the database. We
look at table->foreign_list, and also generate names to constraints that were
not named by the user. A generated constraint has a name of the format
databasename/tablename_ibfk_<number>, where the numbers start from 1, and are
given locally for this table, that is, the number is not global, as in the
old format constraints < 4.0.18 it used to be. */
ulint
dict_create_add_foreigns_to_dictionary(
/*===================================*/
/* out: error code or DB_SUCCESS */
ulint start_id,/* in: if we are actually doing ALTER TABLE
ADD CONSTRAINT, we want to generate constraint
numbers which are bigger than in the table so
far; we number the constraints from
start_id + 1 up; start_id should be set to 0 if
we are creating a new table, or if the table
so far has no constraints for which the name
was generated here */
dict_table_t* table, /* in: table */
trx_t* trx); /* in: transaction */
/* Table create node structure */
struct tab_node_struct{
que_common_t common; /* node type: QUE_NODE_TABLE_CREATE */
dict_table_t* table; /* table to create, built as a memory data
structure with dict_mem_... functions */
ins_node_t* tab_def; /* child node which does the insert of
the table definition; the row to be inserted
is built by the parent node */
ins_node_t* col_def; /* child node which does the inserts of
the column definitions; the row to be inserted
is built by the parent node */
commit_node_t* commit_node;
/* child node which performs a commit after
a successful table creation */
/*----------------------*/
/* Local storage for this graph node */
ulint state; /* node execution state */
ulint col_no; /* next column definition to insert */
mem_heap_t* heap; /* memory heap used as auxiliary storage */
};
/* Table create node states */
#define TABLE_BUILD_TABLE_DEF 1
#define TABLE_BUILD_COL_DEF 2
#define TABLE_COMMIT_WORK 3
#define TABLE_ADD_TO_CACHE 4
#define TABLE_COMPLETED 5
/* Index create node struct */
struct ind_node_struct{
que_common_t common; /* node type: QUE_NODE_INDEX_CREATE */
dict_index_t* index; /* index to create, built as a memory data
structure with dict_mem_... functions */
ins_node_t* ind_def; /* child node which does the insert of
the index definition; the row to be inserted
is built by the parent node */
ins_node_t* field_def; /* child node which does the inserts of
the field definitions; the row to be inserted
is built by the parent node */
commit_node_t* commit_node;
/* child node which performs a commit after
a successful index creation */
/*----------------------*/
/* Local storage for this graph node */
ulint state; /* node execution state */
ulint page_no;/* root page number of the index */
dict_table_t* table; /* table which owns the index */
dtuple_t* ind_row;/* index definition row built */
ulint field_no;/* next field definition to insert */
mem_heap_t* heap; /* memory heap used as auxiliary storage */
};
/* Index create node states */
#define INDEX_BUILD_INDEX_DEF 1
#define INDEX_BUILD_FIELD_DEF 2
#define INDEX_CREATE_INDEX_TREE 3
#define INDEX_COMMIT_WORK 4
#define INDEX_ADD_TO_CACHE 5
#ifndef UNIV_NONINL
#include "dict0crea.ic"
#endif
#endif

8
include/dict0crea.ic Normal file
View file

@ -0,0 +1,8 @@
/******************************************************
Database object creation
(c) 1996 Innobase Oy
Created 1/8/1996 Heikki Tuuri
*******************************************************/

963
include/dict0dict.h Normal file
View file

@ -0,0 +1,963 @@
/******************************************************
Data dictionary system
(c) 1996 Innobase Oy
Created 1/8/1996 Heikki Tuuri
*******************************************************/
#ifndef dict0dict_h
#define dict0dict_h
#include "univ.i"
#include "dict0types.h"
#include "dict0mem.h"
#include "data0type.h"
#include "data0data.h"
#include "sync0sync.h"
#include "sync0rw.h"
#include "mem0mem.h"
#include "rem0types.h"
#include "btr0types.h"
#include "ut0mem.h"
#include "ut0lst.h"
#include "hash0hash.h"
#include "ut0rnd.h"
#include "ut0byte.h"
#include "trx0types.h"
/**********************************************************************
Makes all characters in a NUL-terminated UTF-8 string lower case. */
void
dict_casedn_str(
/*============*/
char* a); /* in/out: string to put in lower case */
/************************************************************************
Get the database name length in a table name. */
ulint
dict_get_db_name_len(
/*=================*/
/* out: database name length */
const char* name); /* in: table name in the form
dbname '/' tablename */
/*************************************************************************
Accepts a specified string. Comparisons are case-insensitive. */
const char*
dict_accept(
/*========*/
/* out: if string was accepted, the pointer
is moved after that, else ptr is returned */
const char* ptr, /* in: scan from this */
const char* string, /* in: accept only this string as the next
non-whitespace string */
ibool* success);/* out: TRUE if accepted */
/************************************************************************
Decrements the count of open MySQL handles to a table. */
void
dict_table_decrement_handle_count(
/*==============================*/
dict_table_t* table); /* in: table */
/**************************************************************************
Inits the data dictionary module. */
void
dict_init(void);
/*===========*/
/************************************************************************
Gets the space id of every table of the data dictionary and makes a linear
list and a hash table of them to the data dictionary cache. This function
can be called at database startup if we did not need to do a crash recovery.
In crash recovery we must scan the space id's from the .ibd files in MySQL
database directories. */
void
dict_load_space_id_list(void);
/*=========================*/
/*************************************************************************
Gets the column data type. */
UNIV_INLINE
dtype_t*
dict_col_get_type(
/*==============*/
dict_col_t* col);
/*************************************************************************
Gets the column number. */
UNIV_INLINE
ulint
dict_col_get_no(
/*============*/
dict_col_t* col);
/*************************************************************************
Gets the column position in the clustered index. */
UNIV_INLINE
ulint
dict_col_get_clust_pos(
/*===================*/
dict_col_t* col);
/************************************************************************
Initializes the autoinc counter. It is not an error to initialize an already
initialized counter. */
void
dict_table_autoinc_initialize(
/*==========================*/
dict_table_t* table, /* in: table */
ib_longlong value); /* in: next value to assign to a row */
/************************************************************************
Gets the next autoinc value (== autoinc counter value), 0 if not yet
initialized. If initialized, increments the counter by 1. */
ib_longlong
dict_table_autoinc_get(
/*===================*/
/* out: value for a new row, or 0 */
dict_table_t* table); /* in: table */
/************************************************************************
Decrements the autoinc counter value by 1. */
void
dict_table_autoinc_decrement(
/*=========================*/
dict_table_t* table); /* in: table */
/************************************************************************
Reads the next autoinc value (== autoinc counter value), 0 if not yet
initialized. */
ib_longlong
dict_table_autoinc_read(
/*====================*/
/* out: value for a new row, or 0 */
dict_table_t* table); /* in: table */
/************************************************************************
Peeks the autoinc counter value, 0 if not yet initialized. Does not
increment the counter. The read not protected by any mutex! */
ib_longlong
dict_table_autoinc_peek(
/*====================*/
/* out: value of the counter */
dict_table_t* table); /* in: table */
/************************************************************************
Updates the autoinc counter if the value supplied is equal or bigger than the
current value. If not inited, does nothing. */
void
dict_table_autoinc_update(
/*======================*/
dict_table_t* table, /* in: table */
ib_longlong value); /* in: value which was assigned to a row */
/**************************************************************************
Adds a table object to the dictionary cache. */
void
dict_table_add_to_cache(
/*====================*/
dict_table_t* table); /* in: table */
/**************************************************************************
Removes a table object from the dictionary cache. */
void
dict_table_remove_from_cache(
/*=========================*/
dict_table_t* table); /* in, own: table */
/**************************************************************************
Renames a table object. */
ibool
dict_table_rename_in_cache(
/*=======================*/
/* out: TRUE if success */
dict_table_t* table, /* in: table */
const char* new_name, /* in: new name */
ibool rename_also_foreigns);/* in: in ALTER TABLE we want
to preserve the original table name
in constraints which reference it */
/**************************************************************************
Change the id of a table object in the dictionary cache. This is used in
DISCARD TABLESPACE. */
void
dict_table_change_id_in_cache(
/*==========================*/
dict_table_t* table, /* in: table object already in cache */
dulint new_id);/* in: new id to set */
/**************************************************************************
Adds a foreign key constraint object to the dictionary cache. May free
the object if there already is an object with the same identifier in.
At least one of foreign table or referenced table must already be in
the dictionary cache! */
ulint
dict_foreign_add_to_cache(
/*======================*/
/* out: DB_SUCCESS or error code */
dict_foreign_t* foreign, /* in, own: foreign key constraint */
ibool check_types); /* in: TRUE=check type compatibility */
/*************************************************************************
Checks if a table is referenced by foreign keys. */
ibool
dict_table_referenced_by_foreign_key(
/*=================================*/
/* out: TRUE if table is referenced by a
foreign key */
dict_table_t* table); /* in: InnoDB table */
/*************************************************************************
Scans a table create SQL string and adds to the data dictionary
the foreign key constraints declared in the string. This function
should be called after the indexes for a table have been created.
Each foreign key constraint must be accompanied with indexes in
bot participating tables. The indexes are allowed to contain more
fields than mentioned in the constraint. */
ulint
dict_create_foreign_constraints(
/*============================*/
/* out: error code or DB_SUCCESS */
trx_t* trx, /* in: transaction */
const char* sql_string, /* in: table create statement where
foreign keys are declared like:
FOREIGN KEY (a, b) REFERENCES
table2(c, d), table2 can be written
also with the database
name before it: test.table2; the
default database id the database of
parameter name */
const char* name, /* in: table full name in the
normalized form
database_name/table_name */
ibool reject_fks); /* in: if TRUE, fail with error
code DB_CANNOT_ADD_CONSTRAINT if
any foreign keys are found. */
/**************************************************************************
Parses the CONSTRAINT id's to be dropped in an ALTER TABLE statement. */
ulint
dict_foreign_parse_drop_constraints(
/*================================*/
/* out: DB_SUCCESS or
DB_CANNOT_DROP_CONSTRAINT if
syntax error or the constraint
id does not match */
mem_heap_t* heap, /* in: heap from which we can
allocate memory */
trx_t* trx, /* in: transaction */
dict_table_t* table, /* in: table */
ulint* n, /* out: number of constraints
to drop */
const char*** constraints_to_drop); /* out: id's of the
constraints to drop */
/**************************************************************************
Returns a table object and memoryfixes it. NOTE! This is a high-level
function to be used mainly from outside the 'dict' directory. Inside this
directory dict_table_get_low is usually the appropriate function. */
dict_table_t*
dict_table_get(
/*===========*/
/* out: table, NULL if
does not exist */
const char* table_name, /* in: table name */
trx_t* trx); /* in: transaction handle */
/**************************************************************************
Returns a table object and increments MySQL open handle count on the table.
*/
dict_table_t*
dict_table_get_and_increment_handle_count(
/*======================================*/
/* out: table, NULL if
does not exist */
const char* table_name, /* in: table name */
trx_t* trx); /* in: transaction handle or NULL */
/**************************************************************************
Returns a table object, based on table id, and memoryfixes it. */
dict_table_t*
dict_table_get_on_id(
/*=================*/
/* out: table, NULL if does not exist */
dulint table_id, /* in: table id */
trx_t* trx); /* in: transaction handle */
/**************************************************************************
Returns a table object, based on table id, and memoryfixes it. */
UNIV_INLINE
dict_table_t*
dict_table_get_on_id_low(
/*=====================*/
/* out: table, NULL if does not exist */
dulint table_id, /* in: table id */
trx_t* trx); /* in: transaction handle */
/**************************************************************************
Releases a table from being memoryfixed. Currently this has no relevance. */
UNIV_INLINE
void
dict_table_release(
/*===============*/
dict_table_t* table); /* in: table to be released */
/**************************************************************************
Checks if a table is in the dictionary cache. */
UNIV_INLINE
dict_table_t*
dict_table_check_if_in_cache_low(
/*==============================*/
/* out: table, NULL if not found */
const char* table_name); /* in: table name */
/**************************************************************************
Gets a table; loads it to the dictionary cache if necessary. A low-level
function. */
UNIV_INLINE
dict_table_t*
dict_table_get_low(
/*===============*/
/* out: table, NULL if not found */
const char* table_name); /* in: table name */
/**************************************************************************
Returns an index object. */
UNIV_INLINE
dict_index_t*
dict_table_get_index(
/*=================*/
/* out: index, NULL if does not exist */
dict_table_t* table, /* in: table */
const char* name); /* in: index name */
/**************************************************************************
Returns an index object. */
dict_index_t*
dict_table_get_index_noninline(
/*===========================*/
/* out: index, NULL if does not exist */
dict_table_t* table, /* in: table */
const char* name); /* in: index name */
/**************************************************************************
Prints a table definition. */
void
dict_table_print(
/*=============*/
dict_table_t* table); /* in: table */
/**************************************************************************
Prints a table data. */
void
dict_table_print_low(
/*=================*/
dict_table_t* table); /* in: table */
/**************************************************************************
Prints a table data when we know the table name. */
void
dict_table_print_by_name(
/*=====================*/
const char* name);
/**************************************************************************
Outputs info on foreign keys of a table. */
void
dict_print_info_on_foreign_keys(
/*============================*/
ibool create_table_format, /* in: if TRUE then print in
a format suitable to be inserted into
a CREATE TABLE, otherwise in the format
of SHOW TABLE STATUS */
FILE* file, /* in: file where to print */
trx_t* trx, /* in: transaction */
dict_table_t* table); /* in: table */
/**************************************************************************
Outputs info on a foreign key of a table in a format suitable for
CREATE TABLE. */
void
dict_print_info_on_foreign_key_in_create_format(
/*============================================*/
FILE* file, /* in: file where to print */
trx_t* trx, /* in: transaction */
dict_foreign_t* foreign, /* in: foreign key constraint */
ibool add_newline); /* in: whether to add a newline */
/************************************************************************
Displays the names of the index and the table. */
void
dict_index_name_print(
/*==================*/
FILE* file, /* in: output stream */
trx_t* trx, /* in: transaction */
const dict_index_t* index); /* in: index to print */
/************************************************************************
Gets the first index on the table (the clustered index). */
UNIV_INLINE
dict_index_t*
dict_table_get_first_index(
/*=======================*/
/* out: index, NULL if none exists */
dict_table_t* table); /* in: table */
/************************************************************************
Gets the first index on the table (the clustered index). */
dict_index_t*
dict_table_get_first_index_noninline(
/*=================================*/
/* out: index, NULL if none exists */
dict_table_t* table); /* in: table */
/************************************************************************
Gets the next index on the table. */
UNIV_INLINE
dict_index_t*
dict_table_get_next_index(
/*======================*/
/* out: index, NULL if none left */
dict_index_t* index); /* in: index */
/************************************************************************
Gets the next index on the table. */
dict_index_t*
dict_table_get_next_index_noninline(
/*================================*/
/* out: index, NULL if none left */
dict_index_t* index); /* in: index */
/************************************************************************
Gets the number of user-defined columns in a table in the dictionary
cache. */
UNIV_INLINE
ulint
dict_table_get_n_user_cols(
/*=======================*/
/* out: number of user-defined (e.g., not
ROW_ID) columns of a table */
dict_table_t* table); /* in: table */
/************************************************************************
Gets the number of system columns in a table in the dictionary cache. */
UNIV_INLINE
ulint
dict_table_get_n_sys_cols(
/*======================*/
/* out: number of system (e.g.,
ROW_ID) columns of a table */
dict_table_t* table); /* in: table */
/************************************************************************
Gets the number of all columns (also system) in a table in the dictionary
cache. */
UNIV_INLINE
ulint
dict_table_get_n_cols(
/*==================*/
/* out: number of columns of a table */
dict_table_t* table); /* in: table */
/************************************************************************
Gets the nth column of a table. */
UNIV_INLINE
dict_col_t*
dict_table_get_nth_col(
/*===================*/
/* out: pointer to column object */
dict_table_t* table, /* in: table */
ulint pos); /* in: position of column */
/************************************************************************
Gets the nth column of a table. */
dict_col_t*
dict_table_get_nth_col_noninline(
/*=============================*/
/* out: pointer to column object */
dict_table_t* table, /* in: table */
ulint pos); /* in: position of column */
/************************************************************************
Gets the given system column of a table. */
UNIV_INLINE
dict_col_t*
dict_table_get_sys_col(
/*===================*/
/* out: pointer to column object */
dict_table_t* table, /* in: table */
ulint sys); /* in: DATA_ROW_ID, ... */
/************************************************************************
Gets the given system column number of a table. */
UNIV_INLINE
ulint
dict_table_get_sys_col_no(
/*======================*/
/* out: column number */
dict_table_t* table, /* in: table */
ulint sys); /* in: DATA_ROW_ID, ... */
/************************************************************************
Checks if a column is in the ordering columns of the clustered index of a
table. Column prefixes are treated like whole columns. */
ibool
dict_table_col_in_clustered_key(
/*============================*/
/* out: TRUE if the column, or its prefix, is
in the clustered key */
dict_table_t* table, /* in: table */
ulint n); /* in: column number */
/***********************************************************************
Copies types of columns contained in table to tuple. */
void
dict_table_copy_types(
/*==================*/
dtuple_t* tuple, /* in: data tuple */
dict_table_t* table); /* in: index */
/**************************************************************************
Looks for an index with the given id. NOTE that we do not reserve
the dictionary mutex: this function is for emergency purposes like
printing info of a corrupt database page! */
dict_index_t*
dict_index_find_on_id_low(
/*======================*/
/* out: index or NULL if not found from cache */
dulint id); /* in: index id */
/**************************************************************************
Adds an index to dictionary cache. */
ibool
dict_index_add_to_cache(
/*====================*/
/* out: TRUE if success */
dict_table_t* table, /* in: table on which the index is */
dict_index_t* index, /* in, own: index; NOTE! The index memory
object is freed in this function! */
ulint page_no);/* in: root page number of the index */
/************************************************************************
Gets the number of fields in the internal representation of an index,
including fields added by the dictionary system. */
UNIV_INLINE
ulint
dict_index_get_n_fields(
/*====================*/
/* out: number of fields */
dict_index_t* index); /* in: an internal representation of index
(in the dictionary cache) */
/************************************************************************
Gets the number of fields in the internal representation of an index
that uniquely determine the position of an index entry in the index, if
we do not take multiversioning into account: in the B-tree use the value
returned by dict_index_get_n_unique_in_tree. */
UNIV_INLINE
ulint
dict_index_get_n_unique(
/*====================*/
/* out: number of fields */
dict_index_t* index); /* in: an internal representation of index
(in the dictionary cache) */
/************************************************************************
Gets the number of fields in the internal representation of an index
which uniquely determine the position of an index entry in the index, if
we also take multiversioning into account. */
UNIV_INLINE
ulint
dict_index_get_n_unique_in_tree(
/*============================*/
/* out: number of fields */
dict_index_t* index); /* in: an internal representation of index
(in the dictionary cache) */
/************************************************************************
Gets the number of user-defined ordering fields in the index. In the internal
representation we add the row id to the ordering fields to make all indexes
unique, but this function returns the number of fields the user defined
in the index as ordering fields. */
UNIV_INLINE
ulint
dict_index_get_n_ordering_defined_by_user(
/*======================================*/
/* out: number of fields */
dict_index_t* index); /* in: an internal representation of index
(in the dictionary cache) */
/************************************************************************
Gets the nth field of an index. */
UNIV_INLINE
dict_field_t*
dict_index_get_nth_field(
/*=====================*/
/* out: pointer to field object */
dict_index_t* index, /* in: index */
ulint pos); /* in: position of field */
/************************************************************************
Gets pointer to the nth field data type in an index. */
UNIV_INLINE
dtype_t*
dict_index_get_nth_type(
/*====================*/
/* out: data type */
dict_index_t* index, /* in: index */
ulint pos); /* in: position of the field */
/************************************************************************
Gets the column number of the nth field in an index. */
UNIV_INLINE
ulint
dict_index_get_nth_col_no(
/*======================*/
/* out: column number */
dict_index_t* index, /* in: index */
ulint pos); /* in: position of the field */
/************************************************************************
Looks for column n in an index. */
ulint
dict_index_get_nth_col_pos(
/*=======================*/
/* out: position in internal representation
of the index; if not contained, returns
ULINT_UNDEFINED */
dict_index_t* index, /* in: index */
ulint n); /* in: column number */
/************************************************************************
Returns TRUE if the index contains a column or a prefix of that column. */
ibool
dict_index_contains_col_or_prefix(
/*==============================*/
/* out: TRUE if contains the column or its
prefix */
dict_index_t* index, /* in: index */
ulint n); /* in: column number */
/************************************************************************
Looks for a matching field in an index. The column has to be the same. The
column in index must be complete, or must contain a prefix longer than the
column in index2. That is, we must be able to construct the prefix in index2
from the prefix in index. */
ulint
dict_index_get_nth_field_pos(
/*=========================*/
/* out: position in internal representation
of the index; if not contained, returns
ULINT_UNDEFINED */
dict_index_t* index, /* in: index from which to search */
dict_index_t* index2, /* in: index */
ulint n); /* in: field number in index2 */
/************************************************************************
Looks for column n position in the clustered index. */
ulint
dict_table_get_nth_col_pos(
/*=======================*/
/* out: position in internal representation
of the clustered index */
dict_table_t* table, /* in: table */
ulint n); /* in: column number */
/************************************************************************
Returns the position of a system column in an index. */
UNIV_INLINE
ulint
dict_index_get_sys_col_pos(
/*=======================*/
/* out: position, ULINT_UNDEFINED if not
contained */
dict_index_t* index, /* in: index */
ulint type); /* in: DATA_ROW_ID, ... */
/***********************************************************************
Adds a column to index. */
void
dict_index_add_col(
/*===============*/
dict_index_t* index, /* in: index */
dict_col_t* col, /* in: column */
ulint order, /* in: order criterion */
ulint prefix_len); /* in: column prefix length */
/***********************************************************************
Copies types of fields contained in index to tuple. */
void
dict_index_copy_types(
/*==================*/
dtuple_t* tuple, /* in: data tuple */
dict_index_t* index, /* in: index */
ulint n_fields); /* in: number of field types to copy */
/*************************************************************************
Gets the index tree where the index is stored. */
UNIV_INLINE
dict_tree_t*
dict_index_get_tree(
/*================*/
/* out: index tree */
dict_index_t* index); /* in: index */
/*************************************************************************
Gets the field order criterion. */
UNIV_INLINE
ulint
dict_field_get_order(
/*=================*/
dict_field_t* field);
/*************************************************************************
Gets the field column. */
UNIV_INLINE
dict_col_t*
dict_field_get_col(
/*===============*/
dict_field_t* field);
/**************************************************************************
Creates an index tree struct. */
dict_tree_t*
dict_tree_create(
/*=============*/
/* out, own: created tree */
dict_index_t* index, /* in: the index for which to create: in the
case of a mixed tree, this should be the
index of the cluster object */
ulint page_no);/* in: root page number of the index */
/**************************************************************************
Frees an index tree struct. */
void
dict_tree_free(
/*===========*/
dict_tree_t* tree); /* in, own: index tree */
/**************************************************************************
In an index tree, finds the index corresponding to a record in the tree. */
dict_index_t*
dict_tree_find_index(
/*=================*/
/* out: index */
dict_tree_t* tree, /* in: index tree */
rec_t* rec); /* in: record for which to find correct index */
/**************************************************************************
In an index tree, finds the index corresponding to a dtuple which is used
in a search to a tree. */
dict_index_t*
dict_tree_find_index_for_tuple(
/*===========================*/
/* out: index; NULL if the tuple does not
contain the mix id field in a mixed tree */
dict_tree_t* tree, /* in: index tree */
dtuple_t* tuple); /* in: tuple for which to find index */
/***********************************************************************
Checks if a table which is a mixed cluster member owns a record. */
ibool
dict_is_mixed_table_rec(
/*====================*/
/* out: TRUE if the record belongs to this
table */
dict_table_t* table, /* in: table in a mixed cluster */
rec_t* rec); /* in: user record in the clustered index */
/**************************************************************************
Returns an index object if it is found in the dictionary cache. */
dict_index_t*
dict_index_get_if_in_cache(
/*=======================*/
/* out: index, NULL if not found */
dulint index_id); /* in: index id */
/**************************************************************************
Checks that a tuple has n_fields_cmp value in a sensible range, so that
no comparison can occur with the page number field in a node pointer. */
ibool
dict_tree_check_search_tuple(
/*=========================*/
/* out: TRUE if ok */
dict_tree_t* tree, /* in: index tree */
dtuple_t* tuple); /* in: tuple used in a search */
/**************************************************************************
Builds a node pointer out of a physical record and a page number. */
dtuple_t*
dict_tree_build_node_ptr(
/*=====================*/
/* out, own: node pointer */
dict_tree_t* tree, /* in: index tree */
rec_t* rec, /* in: record for which to build node
pointer */
ulint page_no,/* in: page number to put in node pointer */
mem_heap_t* heap, /* in: memory heap where pointer created */
ulint level); /* in: level of rec in tree: 0 means leaf
level */
/**************************************************************************
Copies an initial segment of a physical record, long enough to specify an
index entry uniquely. */
rec_t*
dict_tree_copy_rec_order_prefix(
/*============================*/
/* out: pointer to the prefix record */
dict_tree_t* tree, /* in: index tree */
rec_t* rec, /* in: record for which to copy prefix */
ulint* n_fields,/* out: number of fields copied */
byte** buf, /* in/out: memory buffer for the copied prefix,
or NULL */
ulint* buf_size);/* in/out: buffer size */
/**************************************************************************
Builds a typed data tuple out of a physical record. */
dtuple_t*
dict_tree_build_data_tuple(
/*=======================*/
/* out, own: data tuple */
dict_tree_t* tree, /* in: index tree */
rec_t* rec, /* in: record for which to build data tuple */
ulint n_fields,/* in: number of data fields */
mem_heap_t* heap); /* in: memory heap where tuple created */
/*************************************************************************
Gets the space id of the root of the index tree. */
UNIV_INLINE
ulint
dict_tree_get_space(
/*================*/
/* out: space id */
dict_tree_t* tree); /* in: tree */
/*************************************************************************
Sets the space id of the root of the index tree. */
UNIV_INLINE
void
dict_tree_set_space(
/*================*/
dict_tree_t* tree, /* in: tree */
ulint space); /* in: space id */
/*************************************************************************
Gets the page number of the root of the index tree. */
UNIV_INLINE
ulint
dict_tree_get_page(
/*===============*/
/* out: page number */
dict_tree_t* tree); /* in: tree */
/*************************************************************************
Sets the page number of the root of index tree. */
UNIV_INLINE
void
dict_tree_set_page(
/*===============*/
dict_tree_t* tree, /* in: tree */
ulint page); /* in: page number */
/*************************************************************************
Gets the type of the index tree. */
UNIV_INLINE
ulint
dict_tree_get_type(
/*===============*/
/* out: type */
dict_tree_t* tree); /* in: tree */
/*************************************************************************
Gets the read-write lock of the index tree. */
UNIV_INLINE
rw_lock_t*
dict_tree_get_lock(
/*===============*/
/* out: read-write lock */
dict_tree_t* tree); /* in: tree */
/************************************************************************
Returns free space reserved for future updates of records. This is
relevant only in the case of many consecutive inserts, as updates
which make the records bigger might fragment the index. */
UNIV_INLINE
ulint
dict_tree_get_space_reserve(
/*========================*/
/* out: number of free bytes on page,
reserved for updates */
dict_tree_t* tree); /* in: a tree */
/*************************************************************************
Calculates the minimum record length in an index. */
ulint
dict_index_calc_min_rec_len(
/*========================*/
dict_index_t* index); /* in: index */
/*************************************************************************
Calculates new estimates for table and index statistics. The statistics
are used in query optimization. */
void
dict_update_statistics_low(
/*=======================*/
dict_table_t* table, /* in: table */
ibool has_dict_mutex);/* in: TRUE if the caller has the
dictionary mutex */
/*************************************************************************
Calculates new estimates for table and index statistics. The statistics
are used in query optimization. */
void
dict_update_statistics(
/*===================*/
dict_table_t* table); /* in: table */
/************************************************************************
Reserves the dictionary system mutex for MySQL. */
void
dict_mutex_enter_for_mysql(void);
/*============================*/
/************************************************************************
Releases the dictionary system mutex for MySQL. */
void
dict_mutex_exit_for_mysql(void);
/*===========================*/
/************************************************************************
Checks if the database name in two table names is the same. */
ibool
dict_tables_have_same_db(
/*=====================*/
/* out: TRUE if same db name */
const char* name1, /* in: table name in the form
dbname '/' tablename */
const char* name2); /* in: table name in the form
dbname '/' tablename */
/*************************************************************************
Scans from pointer onwards. Stops if is at the start of a copy of
'string' where characters are compared without case sensitivity. Stops
also at '\0'. */
const char*
dict_scan_to(
/*=========*/
/* out: scanned up to this */
const char* ptr, /* in: scan from */
const char* string);/* in: look for this */
/* Buffers for storing detailed information about the latest foreign key
and unique key errors */
extern FILE* dict_foreign_err_file;
extern mutex_t dict_foreign_err_mutex; /* mutex protecting the buffers */
extern dict_sys_t* dict_sys; /* the dictionary system */
extern rw_lock_t dict_operation_lock;
/* Dictionary system struct */
struct dict_sys_struct{
mutex_t mutex; /* mutex protecting the data
dictionary; protects also the
disk-based dictionary system tables;
this mutex serializes CREATE TABLE
and DROP TABLE, as well as reading
the dictionary data for a table from
system tables */
dulint row_id; /* the next row id to assign;
NOTE that at a checkpoint this
must be written to the dict system
header and flushed to a file; in
recovery this must be derived from
the log records */
hash_table_t* table_hash; /* hash table of the tables, based
on name */
hash_table_t* table_id_hash; /* hash table of the tables, based
on id */
hash_table_t* col_hash; /* hash table of the columns */
UT_LIST_BASE_NODE_T(dict_table_t)
table_LRU; /* LRU list of tables */
ulint size; /* varying space in bytes occupied
by the data dictionary table and
index objects */
dict_table_t* sys_tables; /* SYS_TABLES table */
dict_table_t* sys_columns; /* SYS_COLUMNS table */
dict_table_t* sys_indexes; /* SYS_INDEXES table */
dict_table_t* sys_fields; /* SYS_FIELDS table */
};
#ifndef UNIV_NONINL
#include "dict0dict.ic"
#endif
#endif

620
include/dict0dict.ic Normal file
View file

@ -0,0 +1,620 @@
/**********************************************************************
Data dictionary system
(c) 1996 Innobase Oy
Created 1/8/1996 Heikki Tuuri
***********************************************************************/
#include "dict0load.h"
#include "trx0undo.h"
#include "trx0sys.h"
/*************************************************************************
Gets the column data type. */
UNIV_INLINE
dtype_t*
dict_col_get_type(
/*==============*/
dict_col_t* col)
{
ut_ad(col);
return(&col->type);
}
/*************************************************************************
Gets the column number. */
UNIV_INLINE
ulint
dict_col_get_no(
/*============*/
dict_col_t* col)
{
ut_ad(col);
return(col->ind);
}
/*************************************************************************
Gets the column position in the clustered index. */
UNIV_INLINE
ulint
dict_col_get_clust_pos(
/*===================*/
dict_col_t* col)
{
ut_ad(col);
return(col->clust_pos);
}
/************************************************************************
Gets the first index on the table (the clustered index). */
UNIV_INLINE
dict_index_t*
dict_table_get_first_index(
/*=======================*/
/* out: index, NULL if none exists */
dict_table_t* table) /* in: table */
{
ut_ad(table);
ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
return(UT_LIST_GET_FIRST(table->indexes));
}
/************************************************************************
Gets the next index on the table. */
UNIV_INLINE
dict_index_t*
dict_table_get_next_index(
/*======================*/
/* out: index, NULL if none left */
dict_index_t* index) /* in: index */
{
ut_ad(index);
ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
return(UT_LIST_GET_NEXT(indexes, index));
}
/************************************************************************
Gets the number of user-defined columns in a table in the dictionary
cache. */
UNIV_INLINE
ulint
dict_table_get_n_user_cols(
/*=======================*/
/* out: number of user-defined (e.g., not
ROW_ID) columns of a table */
dict_table_t* table) /* in: table */
{
ut_ad(table);
ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
ut_ad(table->cached);
return(table->n_cols - DATA_N_SYS_COLS);
}
/************************************************************************
Gets the number of system columns in a table in the dictionary cache. */
UNIV_INLINE
ulint
dict_table_get_n_sys_cols(
/*======================*/
/* out: number of system (e.g.,
ROW_ID) columns of a table */
dict_table_t* table __attribute__((unused))) /* in: table */
{
ut_ad(table);
ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
ut_ad(table->cached);
return(DATA_N_SYS_COLS);
}
/************************************************************************
Gets the number of all columns (also system) in a table in the dictionary
cache. */
UNIV_INLINE
ulint
dict_table_get_n_cols(
/*==================*/
/* out: number of columns of a table */
dict_table_t* table) /* in: table */
{
ut_ad(table);
ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
ut_ad(table->cached);
return(table->n_cols);
}
/************************************************************************
Gets the nth column of a table. */
UNIV_INLINE
dict_col_t*
dict_table_get_nth_col(
/*===================*/
/* out: pointer to column object */
dict_table_t* table, /* in: table */
ulint pos) /* in: position of column */
{
ut_ad(table);
ut_ad(pos < table->n_def);
ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
return((table->cols) + pos);
}
/************************************************************************
Gets the given system column of a table. */
UNIV_INLINE
dict_col_t*
dict_table_get_sys_col(
/*===================*/
/* out: pointer to column object */
dict_table_t* table, /* in: table */
ulint sys) /* in: DATA_ROW_ID, ... */
{
dict_col_t* col;
ut_ad(table);
ut_ad(sys < DATA_N_SYS_COLS);
ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
col = dict_table_get_nth_col(table, table->n_cols
- DATA_N_SYS_COLS + sys);
ut_ad(col->type.mtype == DATA_SYS);
ut_ad(col->type.prtype == (sys | DATA_NOT_NULL));
return(col);
}
/************************************************************************
Gets the given system column number of a table. */
UNIV_INLINE
ulint
dict_table_get_sys_col_no(
/*======================*/
/* out: column number */
dict_table_t* table, /* in: table */
ulint sys) /* in: DATA_ROW_ID, ... */
{
ut_ad(table);
ut_ad(sys < DATA_N_SYS_COLS);
ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
return(table->n_cols - DATA_N_SYS_COLS + sys);
}
/************************************************************************
Gets the number of fields in the internal representation of an index,
including fields added by the dictionary system. */
UNIV_INLINE
ulint
dict_index_get_n_fields(
/*====================*/
/* out: number of fields */
dict_index_t* index) /* in: an internal representation of index
(in the dictionary cache) */
{
ut_ad(index);
ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
return(index->n_fields);
}
/************************************************************************
Gets the number of fields in the internal representation of an index
that uniquely determine the position of an index entry in the index, if
we do not take multiversioning into account: in the B-tree use the value
returned by dict_index_get_n_unique_in_tree. */
UNIV_INLINE
ulint
dict_index_get_n_unique(
/*====================*/
/* out: number of fields */
dict_index_t* index) /* in: an internal representation of index
(in the dictionary cache) */
{
ut_ad(index);
ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
ut_ad(index->cached);
return(index->n_uniq);
}
/************************************************************************
Gets the number of fields in the internal representation of an index
which uniquely determine the position of an index entry in the index, if
we also take multiversioning into account. */
UNIV_INLINE
ulint
dict_index_get_n_unique_in_tree(
/*============================*/
/* out: number of fields */
dict_index_t* index) /* in: an internal representation of index
(in the dictionary cache) */
{
ut_ad(index);
ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
ut_ad(index->cached);
if (index->type & DICT_CLUSTERED) {
return(dict_index_get_n_unique(index));
}
return(dict_index_get_n_fields(index));
}
/************************************************************************
Gets the number of user-defined ordering fields in the index. In the internal
representation of clustered indexes we add the row id to the ordering fields
to make a clustered index unique, but this function returns the number of
fields the user defined in the index as ordering fields. */
UNIV_INLINE
ulint
dict_index_get_n_ordering_defined_by_user(
/*======================================*/
/* out: number of fields */
dict_index_t* index) /* in: an internal representation of index
(in the dictionary cache) */
{
return(index->n_user_defined_cols);
}
/************************************************************************
Gets the nth field of an index. */
UNIV_INLINE
dict_field_t*
dict_index_get_nth_field(
/*=====================*/
/* out: pointer to field object */
dict_index_t* index, /* in: index */
ulint pos) /* in: position of field */
{
ut_ad(index);
ut_ad(pos < index->n_def);
ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
return((index->fields) + pos);
}
/************************************************************************
Returns the position of a system column in an index. */
UNIV_INLINE
ulint
dict_index_get_sys_col_pos(
/*=======================*/
/* out: position, ULINT_UNDEFINED if not
contained */
dict_index_t* index, /* in: index */
ulint type) /* in: DATA_ROW_ID, ... */
{
dict_col_t* col;
ut_ad(index);
ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
ut_ad(!(index->type & DICT_UNIVERSAL));
col = dict_table_get_sys_col(index->table, type);
if (index->type & DICT_CLUSTERED) {
return(col->clust_pos);
}
return(dict_index_get_nth_col_pos(index,
dict_table_get_sys_col_no(index->table, type)));
}
/*************************************************************************
Gets the index tree where the index is stored. */
UNIV_INLINE
dict_tree_t*
dict_index_get_tree(
/*================*/
/* out: index tree */
dict_index_t* index) /* in: index */
{
ut_ad(index);
ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
return(index->tree);
}
/*************************************************************************
Gets the field order criterion. */
UNIV_INLINE
ulint
dict_field_get_order(
/*=================*/
dict_field_t* field)
{
ut_ad(field);
return(field->order);
}
/*************************************************************************
Gets the field column. */
UNIV_INLINE
dict_col_t*
dict_field_get_col(
/*===============*/
dict_field_t* field)
{
ut_ad(field);
return(field->col);
}
/************************************************************************
Gets pointer to the nth field data type in an index. */
UNIV_INLINE
dtype_t*
dict_index_get_nth_type(
/*====================*/
/* out: data type */
dict_index_t* index, /* in: index */
ulint pos) /* in: position of the field */
{
return(dict_col_get_type(dict_field_get_col(
dict_index_get_nth_field(index, pos))));
}
/************************************************************************
Gets the column number the nth field in an index. */
UNIV_INLINE
ulint
dict_index_get_nth_col_no(
/*======================*/
/* out: column number */
dict_index_t* index, /* in: index */
ulint pos) /* in: position of the field */
{
return(dict_col_get_no(dict_field_get_col(
dict_index_get_nth_field(index, pos))));
}
/*************************************************************************
Gets the space id of the root of the index tree. */
UNIV_INLINE
ulint
dict_tree_get_space(
/*================*/
/* out: space id */
dict_tree_t* tree) /* in: tree */
{
ut_ad(tree);
ut_ad(tree->magic_n == DICT_TREE_MAGIC_N);
return(tree->space);
}
/*************************************************************************
Sets the space id of the root of the index tree. */
UNIV_INLINE
void
dict_tree_set_space(
/*================*/
dict_tree_t* tree, /* in: tree */
ulint space) /* in: space id */
{
ut_ad(tree);
ut_ad(tree->magic_n == DICT_TREE_MAGIC_N);
tree->space = space;
}
/*************************************************************************
Gets the page number of the root of the index tree. */
UNIV_INLINE
ulint
dict_tree_get_page(
/*===============*/
/* out: page number */
dict_tree_t* tree) /* in: tree */
{
ut_ad(tree);
ut_ad(tree->magic_n == DICT_TREE_MAGIC_N);
return(tree->page);
}
/*************************************************************************
Sets the page number of the root of index tree. */
UNIV_INLINE
void
dict_tree_set_page(
/*===============*/
dict_tree_t* tree, /* in: tree */
ulint page) /* in: page number */
{
ut_ad(tree);
ut_ad(tree->magic_n == DICT_TREE_MAGIC_N);
tree->page = page;
}
/*************************************************************************
Gets the type of the index tree. */
UNIV_INLINE
ulint
dict_tree_get_type(
/*===============*/
/* out: type */
dict_tree_t* tree) /* in: tree */
{
ut_ad(tree);
ut_ad(tree->magic_n == DICT_TREE_MAGIC_N);
return(tree->type);
}
/*************************************************************************
Gets the read-write lock of the index tree. */
UNIV_INLINE
rw_lock_t*
dict_tree_get_lock(
/*===============*/
/* out: read-write lock */
dict_tree_t* tree) /* in: tree */
{
ut_ad(tree);
ut_ad(tree->magic_n == DICT_TREE_MAGIC_N);
return(&(tree->lock));
}
/************************************************************************
Returns free space reserved for future updates of records. This is
relevant only in the case of many consecutive inserts, as updates
which make the records bigger might fragment the index. */
UNIV_INLINE
ulint
dict_tree_get_space_reserve(
/*========================*/
/* out: number of free bytes on page,
reserved for updates */
dict_tree_t* tree) /* in: a tree */
{
ut_ad(tree);
UT_NOT_USED(tree);
return(UNIV_PAGE_SIZE / 16);
}
/**************************************************************************
Checks if a table is in the dictionary cache. */
UNIV_INLINE
dict_table_t*
dict_table_check_if_in_cache_low(
/*==============================*/
/* out: table, NULL if not found */
const char* table_name) /* in: table name */
{
dict_table_t* table;
ulint table_fold;
ut_ad(table_name);
#ifdef UNIV_SYNC_DEBUG
ut_ad(mutex_own(&(dict_sys->mutex)));
#endif /* UNIV_SYNC_DEBUG */
/* Look for the table name in the hash table */
table_fold = ut_fold_string(table_name);
HASH_SEARCH(name_hash, dict_sys->table_hash, table_fold, table,
ut_strcmp(table->name, table_name) == 0);
return(table);
}
/**************************************************************************
Gets a table; loads it to the dictionary cache if necessary. A low-level
function. */
UNIV_INLINE
dict_table_t*
dict_table_get_low(
/*===============*/
/* out: table, NULL if not found */
const char* table_name) /* in: table name */
{
dict_table_t* table;
ut_ad(table_name);
#ifdef UNIV_SYNC_DEBUG
ut_ad(mutex_own(&(dict_sys->mutex)));
#endif /* UNIV_SYNC_DEBUG */
table = dict_table_check_if_in_cache_low(table_name);
if (table == NULL) {
table = dict_load_table(table_name);
}
return(table);
}
/**************************************************************************
Returns a table object, based on table id, and memoryfixes it. */
UNIV_INLINE
dict_table_t*
dict_table_get_on_id_low(
/*=====================*/
/* out: table, NULL if does not exist */
dulint table_id, /* in: table id */
trx_t* trx) /* in: transaction handle */
{
dict_table_t* table;
ulint fold;
#ifdef UNIV_SYNC_DEBUG
ut_ad(mutex_own(&(dict_sys->mutex)));
#endif /* UNIV_SYNC_DEBUG */
UT_NOT_USED(trx);
/* Look for the table name in the hash table */
fold = ut_fold_dulint(table_id);
HASH_SEARCH(id_hash, dict_sys->table_id_hash, fold, table,
ut_dulint_cmp(table->id, table_id) == 0);
if (table == NULL) {
table = dict_load_table_on_id(table_id);
}
if (table != NULL) {
table->mem_fix++;
/* lock_push(trx, table, LOCK_DICT_MEM_FIX) */
}
/* TODO: should get the type information from MySQL */
return(table);
}
/**************************************************************************
Releases a table from being memoryfixed. Currently this has no relevance. */
UNIV_INLINE
void
dict_table_release(
/*===============*/
dict_table_t* table) /* in: table to be released */
{
mutex_enter(&(dict_sys->mutex));
table->mem_fix--;
mutex_exit(&(dict_sys->mutex));
}
/**************************************************************************
Returns an index object. */
UNIV_INLINE
dict_index_t*
dict_table_get_index(
/*=================*/
/* out: index, NULL if does not exist */
dict_table_t* table, /* in: table */
const char* name) /* in: index name */
{
dict_index_t* index = NULL;
index = dict_table_get_first_index(table);
while (index != NULL) {
if (ut_strcmp(name, index->name) == 0) {
break;
}
index = dict_table_get_next_index(index);
}
return(index);
}

99
include/dict0load.h Normal file
View file

@ -0,0 +1,99 @@
/******************************************************
Loads to the memory cache database object definitions
from dictionary tables
(c) 1996 Innobase Oy
Created 4/24/1996 Heikki Tuuri
*******************************************************/
#ifndef dict0load_h
#define dict0load_h
#include "univ.i"
#include "dict0types.h"
#include "ut0byte.h"
/************************************************************************
In a crash recovery we already have all the tablespace objects created.
This function compares the space id information in the InnoDB data dictionary
to what we already read with fil_load_single_table_tablespaces().
In a normal startup, we create the tablespace objects for every table in
InnoDB's data dictionary, if the corresponding .ibd file exists.
We also scan the biggest space id, and store it to fil_system. */
void
dict_check_tablespaces_and_store_max_id(
/*====================================*/
ibool in_crash_recovery); /* in: are we doing a crash recovery */
/************************************************************************
Finds the first table name in the given database. */
char*
dict_get_first_table_name_in_db(
/*============================*/
/* out, own: table name, NULL if
does not exist; the caller must free
the memory in the string! */
const char* name); /* in: database name which ends to '/' */
/************************************************************************
Loads a table definition and also all its index definitions, and also
the cluster definition if the table is a member in a cluster. Also loads
all foreign key constraints where the foreign key is in the table or where
a foreign key references columns in this table. */
dict_table_t*
dict_load_table(
/*============*/
/* out: table, NULL if does not exist;
if the table is stored in an .ibd file,
but the file does not exist,
then we set the ibd_file_missing flag TRUE
in the table object we return */
const char* name); /* in: table name in the
databasename/tablename format */
/***************************************************************************
Loads a table object based on the table id. */
dict_table_t*
dict_load_table_on_id(
/*==================*/
/* out: table; NULL if table does not exist */
dulint table_id); /* in: table id */
/************************************************************************
This function is called when the database is booted.
Loads system table index definitions except for the clustered index which
is added to the dictionary cache at booting before calling this function. */
void
dict_load_sys_table(
/*================*/
dict_table_t* table); /* in: system table */
/***************************************************************************
Loads foreign key constraints where the table is either the foreign key
holder or where the table is referenced by a foreign key. Adds these
constraints to the data dictionary. Note that we know that the dictionary
cache already contains all constraints where the other relevant table is
already in the dictionary cache. */
ulint
dict_load_foreigns(
/*===============*/
/* out: DB_SUCCESS or error code */
const char* table_name, /* in: table name */
ibool check_types); /* in: TRUE=check type compatibility */
/************************************************************************
Prints to the standard output information on all tables found in the data
dictionary system table. */
void
dict_print(void);
/*============*/
#ifndef UNIV_NONINL
#include "dict0load.ic"
#endif
#endif

9
include/dict0load.ic Normal file
View file

@ -0,0 +1,9 @@
/******************************************************
Loads to the memory cache database object definitions
from dictionary tables
(c) 1996 Innobase Oy
Created 4/24/1996 Heikki Tuuri
*******************************************************/

444
include/dict0mem.h Normal file
View file

@ -0,0 +1,444 @@
/******************************************************
Data dictionary memory object creation
(c) 1996 Innobase Oy
Created 1/8/1996 Heikki Tuuri
*******************************************************/
#ifndef dict0mem_h
#define dict0mem_h
#include "univ.i"
#include "dict0types.h"
#include "data0type.h"
#include "data0data.h"
#include "mem0mem.h"
#include "rem0types.h"
#include "btr0types.h"
#include "ut0mem.h"
#include "ut0lst.h"
#include "ut0rnd.h"
#include "ut0byte.h"
#include "sync0rw.h"
#include "lock0types.h"
#include "hash0hash.h"
#include "que0types.h"
/* Type flags of an index: OR'ing of the flags is allowed to define a
combination of types */
#define DICT_CLUSTERED 1 /* clustered index */
#define DICT_UNIQUE 2 /* unique index */
#define DICT_UNIVERSAL 4 /* index which can contain records from any
other index */
#define DICT_IBUF 8 /* insert buffer tree */
/* Flags for ordering an index field: OR'ing of the flags allowed */
#define DICT_DESCEND 1 /* in descending order (default ascending) */
/* Types for a table object */
#define DICT_TABLE_ORDINARY 1
#define DICT_TABLE_CLUSTER_MEMBER 2
#define DICT_TABLE_CLUSTER 3 /* this means that the table is
really a cluster definition */
/**************************************************************************
Creates a table memory object. */
dict_table_t*
dict_mem_table_create(
/*==================*/
/* out, own: table object */
const char* name, /* in: table name */
ulint space, /* in: space where the clustered index
of the table is placed; this parameter
is ignored if the table is made
a member of a cluster */
ulint n_cols, /* in: number of columns */
ibool comp); /* in: TRUE=compact page format */
/**************************************************************************
Creates a cluster memory object. */
dict_cluster_t*
dict_mem_cluster_create(
/*====================*/
/* out, own: cluster object (where the
type dict_cluster_t == dict_table_t) */
const char* name, /* in: cluster name */
ulint space, /* in: space where the clustered
indexes of the member tables are
placed */
ulint n_cols, /* in: number of columns */
ulint mix_len); /* in: length of the common key prefix
in the cluster */
/**************************************************************************
Declares a non-published table as a member in a cluster. */
void
dict_mem_table_make_cluster_member(
/*===============================*/
dict_table_t* table, /* in: non-published table */
const char* cluster_name); /* in: cluster name */
/**************************************************************************
Adds a column definition to a table. */
void
dict_mem_table_add_col(
/*===================*/
dict_table_t* table, /* in: table */
const char* name, /* in: column name */
ulint mtype, /* in: main datatype */
ulint prtype, /* in: precise type */
ulint len, /* in: length */
ulint prec); /* in: precision */
/**************************************************************************
Creates an index memory object. */
dict_index_t*
dict_mem_index_create(
/*==================*/
/* out, own: index object */
const char* table_name, /* in: table name */
const char* index_name, /* in: index name */
ulint space, /* in: space where the index tree is
placed, ignored if the index is of
the clustered type */
ulint type, /* in: DICT_UNIQUE,
DICT_CLUSTERED, ... ORed */
ulint n_fields); /* in: number of fields */
/**************************************************************************
Adds a field definition to an index. NOTE: does not take a copy
of the column name if the field is a column. The memory occupied
by the column name may be released only after publishing the index. */
void
dict_mem_index_add_field(
/*=====================*/
dict_index_t* index, /* in: index */
const char* name, /* in: column name */
ulint order, /* in: order criterion; 0 means an
ascending order */
ulint prefix_len); /* in: 0 or the column prefix length
in a MySQL index like
INDEX (textcol(25)) */
/**************************************************************************
Frees an index memory object. */
void
dict_mem_index_free(
/*================*/
dict_index_t* index); /* in: index */
/**************************************************************************
Creates and initializes a foreign constraint memory object. */
dict_foreign_t*
dict_mem_foreign_create(void);
/*=========================*/
/* out, own: foreign constraint struct */
/* Data structure for a column in a table */
struct dict_col_struct{
hash_node_t hash; /* hash chain node */
ulint ind; /* table column position (they are numbered
starting from 0) */
ulint clust_pos;/* position of the column in the
clustered index */
ulint ord_part;/* count of how many times this column
appears in ordering fields of an index */
const char* name; /* name */
dtype_t type; /* data type */
dict_table_t* table; /* back pointer to table of this column */
ulint aux; /* this is used as an auxiliary variable
in some of the functions below */
};
/* DICT_MAX_INDEX_COL_LEN is measured in bytes and is the max index column
length + 1. Starting from 4.1.6, we set it to < 3 * 256, so that one can
create a column prefix index on 255 characters of a TEXT field also in the
UTF-8 charset. In that charset, a character may take at most 3 bytes. */
#define DICT_MAX_INDEX_COL_LEN 768
/* Data structure for a field in an index */
struct dict_field_struct{
dict_col_t* col; /* pointer to the table column */
const char* name; /* name of the column */
ulint order; /* flags for ordering this field:
DICT_DESCEND, ... */
ulint prefix_len; /* 0 or the length of the column
prefix in bytes in a MySQL index of
type, e.g., INDEX (textcol(25));
must be smaller than
DICT_MAX_INDEX_COL_LEN; NOTE that
in the UTF-8 charset, MySQL sets this
to 3 * the prefix len in UTF-8 chars */
ulint fixed_len; /* 0 or the fixed length of the
column if smaller than
DICT_MAX_INDEX_COL_LEN */
ulint fixed_offs; /* offset to the field, or
ULINT_UNDEFINED if it is not fixed
within the record (due to preceding
variable-length fields) */
};
/* Data structure for an index tree */
struct dict_tree_struct{
ulint type; /* tree type */
dulint id; /* id of the index stored in the tree, in the
case of a mixed index, the id of the clustered
index of the cluster table */
ulint space; /* space of index tree */
ulint page; /* index tree root page number */
byte pad[64];/* Padding to prevent other memory hotspots on
the same memory cache line */
rw_lock_t lock; /* read-write lock protecting the upper levels
of the index tree */
ulint mem_fix;/* count of how many times this tree
struct has been memoryfixed (by mini-
transactions wanting to access the index
tree) */
UT_LIST_BASE_NODE_T(dict_index_t)
tree_indexes; /* list of indexes stored in the
index tree: if the tree is not of the
mixed type there is only one index in
the list; if the tree is of the mixed
type, the first index in the list is the
index of the cluster which owns the tree */
ulint magic_n;/* magic number */
};
#define DICT_TREE_MAGIC_N 7545676
/* Data structure for an index */
struct dict_index_struct{
dulint id; /* id of the index */
mem_heap_t* heap; /* memory heap */
ulint type; /* index type */
const char* name; /* index name */
const char* table_name; /* table name */
dict_table_t* table; /* back pointer to table */
ulint space; /* space where the index tree is placed */
ulint trx_id_offset;/* position of the the trx id column
in a clustered index record, if the fields
before it are known to be of a fixed size,
0 otherwise */
ulint n_user_defined_cols;
/* number of columns the user defined to
be in the index: in the internal
representation we add more columns */
ulint n_uniq; /* number of fields from the beginning
which are enough to determine an index
entry uniquely */
ulint n_def; /* number of fields defined so far */
ulint n_fields;/* number of fields in the index */
dict_field_t* fields; /* array of field descriptions */
ulint n_nullable;/* number of nullable fields */
UT_LIST_NODE_T(dict_index_t)
indexes;/* list of indexes of the table */
dict_tree_t* tree; /* index tree struct */
UT_LIST_NODE_T(dict_index_t)
tree_indexes; /* list of indexes of the same index
tree */
ibool cached; /* TRUE if the index object is in the
dictionary cache */
btr_search_t* search_info; /* info used in optimistic searches */
/*----------------------*/
ib_longlong* stat_n_diff_key_vals;
/* approximate number of different key values
for this index, for each n-column prefix
where n <= dict_get_n_unique(index); we
periodically calculate new estimates */
ulint stat_index_size;
/* approximate index size in database pages */
ulint stat_n_leaf_pages;
/* approximate number of leaf pages in the
index tree */
ulint magic_n;/* magic number */
};
/* Data structure for a foreign key constraint; an example:
FOREIGN KEY (A, B) REFERENCES TABLE2 (C, D) */
struct dict_foreign_struct{
mem_heap_t* heap; /* this object is allocated from
this memory heap */
char* id; /* id of the constraint as a
null-terminated string */
ulint type; /* 0 or DICT_FOREIGN_ON_DELETE_CASCADE
or DICT_FOREIGN_ON_DELETE_SET_NULL */
char* foreign_table_name;/* foreign table name */
dict_table_t* foreign_table; /* table where the foreign key is */
const char** foreign_col_names;/* names of the columns in the
foreign key */
char* referenced_table_name;/* referenced table name */
dict_table_t* referenced_table;/* table where the referenced key
is */
const char** referenced_col_names;/* names of the referenced
columns in the referenced table */
ulint n_fields; /* number of indexes' first fields
for which the the foreign key
constraint is defined: we allow the
indexes to contain more fields than
mentioned in the constraint, as long
as the first fields are as mentioned */
dict_index_t* foreign_index; /* foreign index; we require that
both tables contain explicitly defined
indexes for the constraint: InnoDB
does not generate new indexes
implicitly */
dict_index_t* referenced_index;/* referenced index */
UT_LIST_NODE_T(dict_foreign_t)
foreign_list; /* list node for foreign keys of the
table */
UT_LIST_NODE_T(dict_foreign_t)
referenced_list;/* list node for referenced keys of the
table */
};
/* The flags for ON_UPDATE and ON_DELETE can be ORed; the default is that
a foreign key constraint is enforced, therefore RESTRICT just means no flag */
#define DICT_FOREIGN_ON_DELETE_CASCADE 1
#define DICT_FOREIGN_ON_DELETE_SET_NULL 2
#define DICT_FOREIGN_ON_UPDATE_CASCADE 4
#define DICT_FOREIGN_ON_UPDATE_SET_NULL 8
#define DICT_FOREIGN_ON_DELETE_NO_ACTION 16
#define DICT_FOREIGN_ON_UPDATE_NO_ACTION 32
#define DICT_INDEX_MAGIC_N 76789786
/* Data structure for a database table */
struct dict_table_struct{
dulint id; /* id of the table or cluster */
ulint type; /* DICT_TABLE_ORDINARY, ... */
mem_heap_t* heap; /* memory heap */
const char* name; /* table name */
const char* dir_path_of_temp_table;/* NULL or the directory path
where a TEMPORARY table that was explicitly
created by a user should be placed if
innodb_file_per_table is defined in my.cnf;
in Unix this is usually /tmp/..., in Windows
\temp\... */
ulint space; /* space where the clustered index of the
table is placed */
ibool ibd_file_missing;/* TRUE if this is in a single-table
tablespace and the .ibd file is missing; then
we must return in ha_innodb.cc an error if the
user tries to query such an orphaned table */
ibool tablespace_discarded;/* this flag is set TRUE when the
user calls DISCARD TABLESPACE on this table,
and reset to FALSE in IMPORT TABLESPACE */
ibool comp; /* flag: TRUE=compact page format */
hash_node_t name_hash; /* hash chain node */
hash_node_t id_hash; /* hash chain node */
ulint n_def; /* number of columns defined so far */
ulint n_cols; /* number of columns */
dict_col_t* cols; /* array of column descriptions */
UT_LIST_BASE_NODE_T(dict_index_t)
indexes; /* list of indexes of the table */
UT_LIST_BASE_NODE_T(dict_foreign_t)
foreign_list;/* list of foreign key constraints
in the table; these refer to columns
in other tables */
UT_LIST_BASE_NODE_T(dict_foreign_t)
referenced_list;/* list of foreign key constraints
which refer to this table */
UT_LIST_NODE_T(dict_table_t)
table_LRU; /* node of the LRU list of tables */
ulint mem_fix;/* count of how many times the table
and its indexes has been fixed in memory;
currently NOT used */
ulint n_mysql_handles_opened;
/* count of how many handles MySQL has opened
to this table; dropping of the table is
NOT allowed until this count gets to zero;
MySQL does NOT itself check the number of
open handles at drop */
ulint n_foreign_key_checks_running;
/* count of how many foreign key check
operations are currently being performed
on the table: we cannot drop the table while
there are foreign key checks running on
it! */
ibool cached; /* TRUE if the table object has been added
to the dictionary cache */
lock_t* auto_inc_lock;/* a buffer for an auto-inc lock
for this table: we allocate the memory here
so that individual transactions can get it
and release it without a need to allocate
space from the lock heap of the trx:
otherwise the lock heap would grow rapidly
if we do a large insert from a select */
dulint query_cache_inv_trx_id;
/* transactions whose trx id < than this
number are not allowed to store to the MySQL
query cache or retrieve from it; when a trx
with undo logs commits, it sets this to the
value of the trx id counter for the tables it
had an IX lock on */
UT_LIST_BASE_NODE_T(lock_t)
locks; /* list of locks on the table */
/*----------------------*/
dulint mix_id; /* if the table is a member in a cluster,
this is its mix id */
ulint mix_len;/* if the table is a cluster or a member
this is the common key prefix lenght */
ulint mix_id_len;/* mix id length in a compressed form */
byte mix_id_buf[12];
/* mix id of a mixed table written in
a compressed form */
const char* cluster_name; /* if the table is a member in a
cluster, this is the name of the cluster */
/*----------------------*/
ibool does_not_fit_in_memory;
/* this field is used to specify in simulations
tables which are so big that disk should be
accessed: disk access is simulated by
putting the thread to sleep for a while;
NOTE that this flag is not stored to the data
dictionary on disk, and the database will
forget about value TRUE if it has to reload
the table definition from disk */
/*----------------------*/
ib_longlong stat_n_rows;
/* approximate number of rows in the table;
we periodically calculate new estimates */
ulint stat_clustered_index_size;
/* approximate clustered index size in
database pages */
ulint stat_sum_of_other_index_sizes;
/* other indexes in database pages */
ibool stat_initialized; /* TRUE if statistics have
been calculated the first time
after database startup or table creation */
ulint stat_modified_counter;
/* when a row is inserted, updated, or deleted,
we add 1 to this number; we calculate new
estimates for the stat_... values for the
table and the indexes at an interval of 2 GB
or when about 1 / 16 of table has been
modified; also when the estimate operation is
called for MySQL SHOW TABLE STATUS; the
counter is reset to zero at statistics
calculation; this counter is not protected by
any latch, because this is only used for
heuristics */
/*----------------------*/
mutex_t autoinc_mutex;
/* mutex protecting the autoincrement
counter */
ibool autoinc_inited;
/* TRUE if the autoinc counter has been
inited; MySQL gets the init value by executing
SELECT MAX(auto inc column) */
ib_longlong autoinc;/* autoinc counter value to give to the
next inserted row */
ulint magic_n;/* magic number */
};
#define DICT_TABLE_MAGIC_N 76333786
#ifndef UNIV_NONINL
#include "dict0mem.ic"
#endif
#endif

9
include/dict0mem.ic Normal file
View file

@ -0,0 +1,9 @@
/**********************************************************************
Data dictionary memory object creation
(c) 1996 Innobase Oy
Created 1/8/1996 Heikki Tuuri
***********************************************************************/

28
include/dict0types.h Normal file
View file

@ -0,0 +1,28 @@
/******************************************************
Data dictionary global types
(c) 1996 Innobase Oy
Created 1/8/1996 Heikki Tuuri
*******************************************************/
#ifndef dict0types_h
#define dict0types_h
typedef struct dict_sys_struct dict_sys_t;
typedef struct dict_col_struct dict_col_t;
typedef struct dict_field_struct dict_field_t;
typedef struct dict_index_struct dict_index_t;
typedef struct dict_tree_struct dict_tree_t;
typedef struct dict_table_struct dict_table_t;
typedef struct dict_foreign_struct dict_foreign_t;
/* A cluster object is a table object with the type field set to
DICT_CLUSTERED */
typedef dict_table_t dict_cluster_t;
typedef struct ind_node_struct ind_node_t;
typedef struct tab_node_struct tab_node_t;
#endif

166
include/dyn0dyn.h Normal file
View file

@ -0,0 +1,166 @@
/******************************************************
The dynamically allocated array
(c) 1996 Innobase Oy
Created 2/5/1996 Heikki Tuuri
*******************************************************/
#ifndef dyn0dyn_h
#define dyn0dyn_h
#include "univ.i"
#include "ut0lst.h"
#include "mem0mem.h"
typedef struct dyn_block_struct dyn_block_t;
typedef dyn_block_t dyn_array_t;
/* This is the initial 'payload' size of a dynamic array;
this must be > MLOG_BUF_MARGIN + 30! */
#define DYN_ARRAY_DATA_SIZE 512
/*************************************************************************
Initializes a dynamic array. */
UNIV_INLINE
dyn_array_t*
dyn_array_create(
/*=============*/
/* out: initialized dyn array */
dyn_array_t* arr); /* in: pointer to a memory buffer of
size sizeof(dyn_array_t) */
/****************************************************************
Frees a dynamic array. */
UNIV_INLINE
void
dyn_array_free(
/*===========*/
dyn_array_t* arr); /* in: dyn array */
/*************************************************************************
Makes room on top of a dyn array and returns a pointer to a buffer in it.
After copying the elements, the caller must close the buffer using
dyn_array_close. */
UNIV_INLINE
byte*
dyn_array_open(
/*===========*/
/* out: pointer to the buffer */
dyn_array_t* arr, /* in: dynamic array */
ulint size); /* in: size in bytes of the buffer; MUST be
smaller than DYN_ARRAY_DATA_SIZE! */
/*************************************************************************
Closes the buffer returned by dyn_array_open. */
UNIV_INLINE
void
dyn_array_close(
/*============*/
dyn_array_t* arr, /* in: dynamic array */
byte* ptr); /* in: buffer space from ptr up was not used */
/*************************************************************************
Makes room on top of a dyn array and returns a pointer to
the added element. The caller must copy the element to
the pointer returned. */
UNIV_INLINE
void*
dyn_array_push(
/*===========*/
/* out: pointer to the element */
dyn_array_t* arr, /* in: dynamic array */
ulint size); /* in: size in bytes of the element */
/****************************************************************
Returns pointer to an element in dyn array. */
UNIV_INLINE
void*
dyn_array_get_element(
/*==================*/
/* out: pointer to element */
dyn_array_t* arr, /* in: dyn array */
ulint pos); /* in: position of element as bytes
from array start */
/****************************************************************
Returns the size of stored data in a dyn array. */
UNIV_INLINE
ulint
dyn_array_get_data_size(
/*====================*/
/* out: data size in bytes */
dyn_array_t* arr); /* in: dyn array */
/****************************************************************
Gets the first block in a dyn array. */
UNIV_INLINE
dyn_block_t*
dyn_array_get_first_block(
/*======================*/
dyn_array_t* arr); /* in: dyn array */
/****************************************************************
Gets the last block in a dyn array. */
UNIV_INLINE
dyn_block_t*
dyn_array_get_last_block(
/*=====================*/
dyn_array_t* arr); /* in: dyn array */
/************************************************************************
Gets the next block in a dyn array. */
UNIV_INLINE
dyn_block_t*
dyn_array_get_next_block(
/*=====================*/
/* out: pointer to next, NULL if end of list */
dyn_array_t* arr, /* in: dyn array */
dyn_block_t* block); /* in: dyn array block */
/************************************************************************
Gets the number of used bytes in a dyn array block. */
UNIV_INLINE
ulint
dyn_block_get_used(
/*===============*/
/* out: number of bytes used */
dyn_block_t* block); /* in: dyn array block */
/************************************************************************
Gets pointer to the start of data in a dyn array block. */
UNIV_INLINE
byte*
dyn_block_get_data(
/*===============*/
/* out: pointer to data */
dyn_block_t* block); /* in: dyn array block */
/************************************************************
Pushes n bytes to a dyn array. */
UNIV_INLINE
void
dyn_push_string(
/*============*/
dyn_array_t* arr, /* in: dyn array */
const byte* str, /* in: string to write */
ulint len); /* in: string length */
/*#################################################################*/
/* NOTE! Do not use the fields of the struct directly: the definition
appears here only for the compiler to know its size! */
struct dyn_block_struct{
mem_heap_t* heap; /* in the first block this is != NULL
if dynamic allocation has been needed */
ulint used; /* number of data bytes used in this block */
byte data[DYN_ARRAY_DATA_SIZE];
/* storage for array elements */
UT_LIST_BASE_NODE_T(dyn_block_t) base;
/* linear list of dyn blocks: this node is
used only in the first block */
UT_LIST_NODE_T(dyn_block_t) list;
/* linear list node: used in all blocks */
#ifdef UNIV_DEBUG
ulint buf_end;/* only in the debug version: if dyn array is
opened, this is the buffer end offset, else
this is 0 */
ulint magic_n;
#endif
};
#ifndef UNIV_NONINL
#include "dyn0dyn.ic"
#endif
#endif

344
include/dyn0dyn.ic Normal file
View file

@ -0,0 +1,344 @@
/******************************************************
The dynamically allocated array
(c) 1996 Innobase Oy
Created 2/5/1996 Heikki Tuuri
*******************************************************/
#define DYN_BLOCK_MAGIC_N 375767
#define DYN_BLOCK_FULL_FLAG 0x1000000UL
/****************************************************************
Adds a new block to a dyn array. */
dyn_block_t*
dyn_array_add_block(
/*================*/
/* out: created block */
dyn_array_t* arr); /* in: dyn array */
/****************************************************************
Gets the first block in a dyn array. */
UNIV_INLINE
dyn_block_t*
dyn_array_get_first_block(
/*======================*/
dyn_array_t* arr) /* in: dyn array */
{
return(arr);
}
/****************************************************************
Gets the last block in a dyn array. */
UNIV_INLINE
dyn_block_t*
dyn_array_get_last_block(
/*=====================*/
dyn_array_t* arr) /* in: dyn array */
{
if (arr->heap == NULL) {
return(arr);
}
return(UT_LIST_GET_LAST(arr->base));
}
/************************************************************************
Gets the next block in a dyn array. */
UNIV_INLINE
dyn_block_t*
dyn_array_get_next_block(
/*=====================*/
/* out: pointer to next, NULL if end of list */
dyn_array_t* arr, /* in: dyn array */
dyn_block_t* block) /* in: dyn array block */
{
ut_ad(arr && block);
if (arr->heap == NULL) {
ut_ad(arr == block);
return(NULL);
}
return(UT_LIST_GET_NEXT(list, block));
}
/************************************************************************
Gets the number of used bytes in a dyn array block. */
UNIV_INLINE
ulint
dyn_block_get_used(
/*===============*/
/* out: number of bytes used */
dyn_block_t* block) /* in: dyn array block */
{
ut_ad(block);
return((block->used) & ~DYN_BLOCK_FULL_FLAG);
}
/************************************************************************
Gets pointer to the start of data in a dyn array block. */
UNIV_INLINE
byte*
dyn_block_get_data(
/*===============*/
/* out: pointer to data */
dyn_block_t* block) /* in: dyn array block */
{
ut_ad(block);
return(block->data);
}
/*************************************************************************
Initializes a dynamic array. */
UNIV_INLINE
dyn_array_t*
dyn_array_create(
/*=============*/
/* out: initialized dyn array */
dyn_array_t* arr) /* in: pointer to a memory buffer of
size sizeof(dyn_array_t) */
{
ut_ad(arr);
ut_ad(DYN_ARRAY_DATA_SIZE < DYN_BLOCK_FULL_FLAG);
arr->heap = NULL;
arr->used = 0;
#ifdef UNIV_DEBUG
arr->buf_end = 0;
arr->magic_n = DYN_BLOCK_MAGIC_N;
#endif
return(arr);
}
/****************************************************************
Frees a dynamic array. */
UNIV_INLINE
void
dyn_array_free(
/*===========*/
dyn_array_t* arr) /* in: dyn array */
{
if (arr->heap != NULL) {
mem_heap_free(arr->heap);
}
#ifdef UNIV_DEBUG
arr->magic_n = 0;
#endif
}
/*************************************************************************
Makes room on top of a dyn array and returns a pointer to the added element.
The caller must copy the element to the pointer returned. */
UNIV_INLINE
void*
dyn_array_push(
/*===========*/
/* out: pointer to the element */
dyn_array_t* arr, /* in: dynamic array */
ulint size) /* in: size in bytes of the element */
{
dyn_block_t* block;
ulint used;
ut_ad(arr);
ut_ad(arr->magic_n == DYN_BLOCK_MAGIC_N);
ut_ad(size <= DYN_ARRAY_DATA_SIZE);
ut_ad(size);
block = arr;
used = block->used;
if (used + size > DYN_ARRAY_DATA_SIZE) {
/* Get the last array block */
block = dyn_array_get_last_block(arr);
used = block->used;
if (used + size > DYN_ARRAY_DATA_SIZE) {
block = dyn_array_add_block(arr);
used = block->used;
}
}
block->used = used + size;
ut_ad(block->used <= DYN_ARRAY_DATA_SIZE);
return((block->data) + used);
}
/*************************************************************************
Makes room on top of a dyn array and returns a pointer to a buffer in it.
After copying the elements, the caller must close the buffer using
dyn_array_close. */
UNIV_INLINE
byte*
dyn_array_open(
/*===========*/
/* out: pointer to the buffer */
dyn_array_t* arr, /* in: dynamic array */
ulint size) /* in: size in bytes of the buffer; MUST be
smaller than DYN_ARRAY_DATA_SIZE! */
{
dyn_block_t* block;
ulint used;
ut_ad(arr);
ut_ad(arr->magic_n == DYN_BLOCK_MAGIC_N);
ut_ad(size <= DYN_ARRAY_DATA_SIZE);
ut_ad(size);
block = arr;
used = block->used;
if (used + size > DYN_ARRAY_DATA_SIZE) {
/* Get the last array block */
block = dyn_array_get_last_block(arr);
used = block->used;
if (used + size > DYN_ARRAY_DATA_SIZE) {
block = dyn_array_add_block(arr);
used = block->used;
ut_a(size <= DYN_ARRAY_DATA_SIZE);
}
}
ut_ad(block->used <= DYN_ARRAY_DATA_SIZE);
#ifdef UNIV_DEBUG
ut_ad(arr->buf_end == 0);
arr->buf_end = used + size;
#endif
return((block->data) + used);
}
/*************************************************************************
Closes the buffer returned by dyn_array_open. */
UNIV_INLINE
void
dyn_array_close(
/*============*/
dyn_array_t* arr, /* in: dynamic array */
byte* ptr) /* in: buffer space from ptr up was not used */
{
dyn_block_t* block;
ut_ad(arr);
ut_ad(arr->magic_n == DYN_BLOCK_MAGIC_N);
block = dyn_array_get_last_block(arr);
ut_ad(arr->buf_end + block->data >= ptr);
block->used = ptr - block->data;
ut_ad(block->used <= DYN_ARRAY_DATA_SIZE);
#ifdef UNIV_DEBUG
arr->buf_end = 0;
#endif
}
/****************************************************************
Returns pointer to an element in dyn array. */
UNIV_INLINE
void*
dyn_array_get_element(
/*==================*/
/* out: pointer to element */
dyn_array_t* arr, /* in: dyn array */
ulint pos) /* in: position of element as bytes
from array start */
{
dyn_block_t* block;
ulint used;
ut_ad(arr);
ut_ad(arr->magic_n == DYN_BLOCK_MAGIC_N);
/* Get the first array block */
block = dyn_array_get_first_block(arr);
if (arr->heap != NULL) {
used = dyn_block_get_used(block);
while (pos >= used) {
pos -= used;
block = UT_LIST_GET_NEXT(list, block);
ut_ad(block);
used = dyn_block_get_used(block);
}
}
ut_ad(block);
ut_ad(dyn_block_get_used(block) >= pos);
return(block->data + pos);
}
/****************************************************************
Returns the size of stored data in a dyn array. */
UNIV_INLINE
ulint
dyn_array_get_data_size(
/*====================*/
/* out: data size in bytes */
dyn_array_t* arr) /* in: dyn array */
{
dyn_block_t* block;
ulint sum = 0;
ut_ad(arr);
ut_ad(arr->magic_n == DYN_BLOCK_MAGIC_N);
if (arr->heap == NULL) {
return(arr->used);
}
/* Get the first array block */
block = dyn_array_get_first_block(arr);
while (block != NULL) {
sum += dyn_block_get_used(block);
block = dyn_array_get_next_block(arr, block);
}
return(sum);
}
/************************************************************
Pushes n bytes to a dyn array. */
UNIV_INLINE
void
dyn_push_string(
/*============*/
dyn_array_t* arr, /* in: dyn array */
const byte* str, /* in: string to write */
ulint len) /* in: string length */
{
ulint n_copied;
while (len > 0) {
if (len > DYN_ARRAY_DATA_SIZE) {
n_copied = DYN_ARRAY_DATA_SIZE;
} else {
n_copied = len;
}
memcpy(dyn_array_push(arr, n_copied), str, n_copied);
str += n_copied;
len -= n_copied;
}
}

97
include/eval0eval.h Normal file
View file

@ -0,0 +1,97 @@
/******************************************************
SQL evaluator: evaluates simple data structures, like expressions, in
a query graph
(c) 1997 Innobase Oy
Created 12/29/1997 Heikki Tuuri
*******************************************************/
#ifndef eval0eval_h
#define eval0eval_h
#include "univ.i"
#include "que0types.h"
#include "pars0sym.h"
#include "pars0pars.h"
/*********************************************************************
Free the buffer from global dynamic memory for a value of a que_node,
if it has been allocated in the above function. The freeing for pushed
column values is done in sel_col_prefetch_buf_free. */
void
eval_node_free_val_buf(
/*===================*/
que_node_t* node); /* in: query graph node */
/*********************************************************************
Evaluates a symbol table symbol. */
UNIV_INLINE
void
eval_sym(
/*=====*/
sym_node_t* sym_node); /* in: symbol table node */
/*********************************************************************
Evaluates an expression. */
UNIV_INLINE
void
eval_exp(
/*=====*/
que_node_t* exp_node); /* in: expression */
/*********************************************************************
Sets an integer value as the value of an expression node. */
UNIV_INLINE
void
eval_node_set_int_val(
/*==================*/
que_node_t* node, /* in: expression node */
lint val); /* in: value to set */
/*********************************************************************
Gets an integer value from an expression node. */
UNIV_INLINE
lint
eval_node_get_int_val(
/*==================*/
/* out: integer value */
que_node_t* node); /* in: expression node */
/*********************************************************************
Copies a binary string value as the value of a query graph node. Allocates a
new buffer if necessary. */
UNIV_INLINE
void
eval_node_copy_and_alloc_val(
/*=========================*/
que_node_t* node, /* in: query graph node */
byte* str, /* in: binary string */
ulint len); /* in: string length or UNIV_SQL_NULL */
/*********************************************************************
Copies a query node value to another node. */
UNIV_INLINE
void
eval_node_copy_val(
/*===============*/
que_node_t* node1, /* in: node to copy to */
que_node_t* node2); /* in: node to copy from */
/*********************************************************************
Gets a iboolean value from a query node. */
UNIV_INLINE
ibool
eval_node_get_ibool_val(
/*===================*/
/* out: iboolean value */
que_node_t* node); /* in: query graph node */
/*********************************************************************
Evaluates a comparison node. */
ibool
eval_cmp(
/*=====*/
/* out: the result of the comparison */
func_node_t* cmp_node); /* in: comparison node */
#ifndef UNIV_NONINL
#include "eval0eval.ic"
#endif
#endif

234
include/eval0eval.ic Normal file
View file

@ -0,0 +1,234 @@
/******************************************************
SQL evaluator: evaluates simple data structures, like expressions, in
a query graph
(c) 1997 Innobase Oy
Created 12/29/1997 Heikki Tuuri
*******************************************************/
#include "que0que.h"
#include "rem0cmp.h"
#include "pars0grm.h"
/*********************************************************************
Evaluates a function node. */
void
eval_func(
/*======*/
func_node_t* func_node); /* in: function node */
/*********************************************************************
Allocate a buffer from global dynamic memory for a value of a que_node.
NOTE that this memory must be explicitly freed when the query graph is
freed. If the node already has allocated buffer, that buffer is freed
here. NOTE that this is the only function where dynamic memory should be
allocated for a query node val field. */
byte*
eval_node_alloc_val_buf(
/*====================*/
/* out: pointer to allocated buffer */
que_node_t* node, /* in: query graph node; sets the val field
data field to point to the new buffer, and
len field equal to size */
ulint size); /* in: buffer size */
/*********************************************************************
Allocates a new buffer if needed. */
UNIV_INLINE
byte*
eval_node_ensure_val_buf(
/*=====================*/
/* out: pointer to buffer */
que_node_t* node, /* in: query graph node; sets the val field
data field to point to the new buffer, and
len field equal to size */
ulint size) /* in: buffer size */
{
dfield_t* dfield;
byte* data;
dfield = que_node_get_val(node);
dfield_set_len(dfield, size);
data = dfield_get_data(dfield);
if (!data || que_node_get_val_buf_size(node) < size) {
data = eval_node_alloc_val_buf(node, size);
}
return(data);
}
/*********************************************************************
Evaluates a symbol table symbol. */
UNIV_INLINE
void
eval_sym(
/*=====*/
sym_node_t* sym_node) /* in: symbol table node */
{
ut_ad(que_node_get_type(sym_node) == QUE_NODE_SYMBOL);
if (sym_node->indirection) {
/* The symbol table node is an alias for a variable or a
column */
dfield_copy_data(que_node_get_val(sym_node),
que_node_get_val(sym_node->indirection));
}
}
/*********************************************************************
Evaluates an expression. */
UNIV_INLINE
void
eval_exp(
/*=====*/
que_node_t* exp_node) /* in: expression */
{
if (que_node_get_type(exp_node) == QUE_NODE_SYMBOL) {
eval_sym((sym_node_t*)exp_node);
return;
}
eval_func(exp_node);
}
/*********************************************************************
Sets an integer value as the value of an expression node. */
UNIV_INLINE
void
eval_node_set_int_val(
/*==================*/
que_node_t* node, /* in: expression node */
lint val) /* in: value to set */
{
dfield_t* dfield;
byte* data;
dfield = que_node_get_val(node);
data = dfield_get_data(dfield);
if (data == NULL) {
data = eval_node_alloc_val_buf(node, 4);
}
ut_ad(dfield_get_len(dfield) == 4);
mach_write_to_4(data, (ulint)val);
}
/*********************************************************************
Gets an integer non-SQL null value from an expression node. */
UNIV_INLINE
lint
eval_node_get_int_val(
/*==================*/
/* out: integer value */
que_node_t* node) /* in: expression node */
{
dfield_t* dfield;
dfield = que_node_get_val(node);
ut_ad(dfield_get_len(dfield) == 4);
return((int)mach_read_from_4(dfield_get_data(dfield)));
}
/*********************************************************************
Gets a iboolean value from a query node. */
UNIV_INLINE
ibool
eval_node_get_ibool_val(
/*===================*/
/* out: iboolean value */
que_node_t* node) /* in: query graph node */
{
dfield_t* dfield;
byte* data;
dfield = que_node_get_val(node);
data = dfield_get_data(dfield);
ut_ad(data != NULL);
return(mach_read_from_1(data));
}
/*********************************************************************
Sets a iboolean value as the value of a function node. */
UNIV_INLINE
void
eval_node_set_ibool_val(
/*===================*/
func_node_t* func_node, /* in: function node */
ibool val) /* in: value to set */
{
dfield_t* dfield;
byte* data;
dfield = que_node_get_val(func_node);
data = dfield_get_data(dfield);
if (data == NULL) {
/* Allocate 1 byte to hold the value */
data = eval_node_alloc_val_buf(func_node, 1);
}
ut_ad(dfield_get_len(dfield) == 1);
mach_write_to_1(data, val);
}
/*********************************************************************
Copies a binary string value as the value of a query graph node. Allocates a
new buffer if necessary. */
UNIV_INLINE
void
eval_node_copy_and_alloc_val(
/*=========================*/
que_node_t* node, /* in: query graph node */
byte* str, /* in: binary string */
ulint len) /* in: string length or UNIV_SQL_NULL */
{
byte* data;
if (len == UNIV_SQL_NULL) {
dfield_set_len(que_node_get_val(node), len);
return;
}
data = eval_node_ensure_val_buf(node, len);
ut_memcpy(data, str, len);
}
/*********************************************************************
Copies a query node value to another node. */
UNIV_INLINE
void
eval_node_copy_val(
/*===============*/
que_node_t* node1, /* in: node to copy to */
que_node_t* node2) /* in: node to copy from */
{
dfield_t* dfield2;
dfield2 = que_node_get_val(node2);
eval_node_copy_and_alloc_val(node1, dfield_get_data(dfield2),
dfield_get_len(dfield2));
}

79
include/eval0proc.h Normal file
View file

@ -0,0 +1,79 @@
/******************************************************
Executes SQL stored procedures and their control structures
(c) 1998 Innobase Oy
Created 1/20/1998 Heikki Tuuri
*******************************************************/
#ifndef eval0proc_h
#define eval0proc_h
#include "univ.i"
#include "que0types.h"
#include "pars0sym.h"
#include "pars0pars.h"
/**************************************************************************
Performs an execution step of a procedure node. */
UNIV_INLINE
que_thr_t*
proc_step(
/*======*/
/* out: query thread to run next or NULL */
que_thr_t* thr); /* in: query thread */
/**************************************************************************
Performs an execution step of an if-statement node. */
que_thr_t*
if_step(
/*====*/
/* out: query thread to run next or NULL */
que_thr_t* thr); /* in: query thread */
/**************************************************************************
Performs an execution step of a while-statement node. */
que_thr_t*
while_step(
/*=======*/
/* out: query thread to run next or NULL */
que_thr_t* thr); /* in: query thread */
/**************************************************************************
Performs an execution step of a for-loop node. */
que_thr_t*
for_step(
/*=====*/
/* out: query thread to run next or NULL */
que_thr_t* thr); /* in: query thread */
/**************************************************************************
Performs an execution step of an assignment statement node. */
que_thr_t*
assign_step(
/*========*/
/* out: query thread to run next or NULL */
que_thr_t* thr); /* in: query thread */
/**************************************************************************
Performs an execution step of a procedure call node. */
UNIV_INLINE
que_thr_t*
proc_eval_step(
/*===========*/
/* out: query thread to run next or NULL */
que_thr_t* thr); /* in: query thread */
/**************************************************************************
Performs an execution step of a return-statement node. */
que_thr_t*
return_step(
/*========*/
/* out: query thread to run next or NULL */
que_thr_t* thr); /* in: query thread */
#ifndef UNIV_NONINL
#include "eval0proc.ic"
#endif
#endif

71
include/eval0proc.ic Normal file
View file

@ -0,0 +1,71 @@
/******************************************************
Executes SQL stored procedures and their control structures
(c) 1998 Innobase Oy
Created 1/20/1998 Heikki Tuuri
*******************************************************/
#include "pars0pars.h"
#include "que0que.h"
#include "eval0eval.h"
/**************************************************************************
Performs an execution step of a procedure node. */
UNIV_INLINE
que_thr_t*
proc_step(
/*======*/
/* out: query thread to run next or NULL */
que_thr_t* thr) /* in: query thread */
{
proc_node_t* node;
ut_ad(thr);
node = thr->run_node;
ut_ad(que_node_get_type(node) == QUE_NODE_PROC);
if (thr->prev_node == que_node_get_parent(node)) {
/* Start execution from the first statement in the statement
list */
thr->run_node = node->stat_list;
} else {
/* Move to the next statement */
ut_ad(que_node_get_next(thr->prev_node) == NULL);
thr->run_node = NULL;
}
if (thr->run_node == NULL) {
thr->run_node = que_node_get_parent(node);
}
return(thr);
}
/**************************************************************************
Performs an execution step of a procedure call node. */
UNIV_INLINE
que_thr_t*
proc_eval_step(
/*===========*/
/* out: query thread to run next or NULL */
que_thr_t* thr) /* in: query thread */
{
func_node_t* node;
ut_ad(thr);
node = thr->run_node;
ut_ad(que_node_get_type(node) == QUE_NODE_FUNC);
/* Evaluate the procedure */
eval_exp(node);
thr->run_node = que_node_get_parent(node);
return(thr);
}

683
include/fil0fil.h Normal file
View file

@ -0,0 +1,683 @@
/******************************************************
The low-level file system
(c) 1995 Innobase Oy
Created 10/25/1995 Heikki Tuuri
*******************************************************/
#ifndef fil0fil_h
#define fil0fil_h
#include "univ.i"
#include "sync0rw.h"
#include "dict0types.h"
#include "ibuf0types.h"
#include "ut0byte.h"
#include "os0file.h"
/* When mysqld is run, the default directory "." is the mysqld datadir, but in
ibbackup we must set it explicitly; the patgh must NOT contain the trailing
'/' or '\' */
extern const char* fil_path_to_mysql_datadir;
/* Initial size of a single-table tablespace in pages */
#define FIL_IBD_FILE_INITIAL_SIZE 4
/* 'null' (undefined) page offset in the context of file spaces */
#define FIL_NULL ULINT32_UNDEFINED
/* Space address data type; this is intended to be used when
addresses accurate to a byte are stored in file pages. If the page part
of the address is FIL_NULL, the address is considered undefined. */
typedef byte fil_faddr_t; /* 'type' definition in C: an address
stored in a file page is a string of bytes */
#define FIL_ADDR_PAGE 0 /* first in address is the page offset */
#define FIL_ADDR_BYTE 4 /* then comes 2-byte byte offset within page*/
#define FIL_ADDR_SIZE 6 /* address size is 6 bytes */
/* A struct for storing a space address FIL_ADDR, when it is used
in C program data structures. */
typedef struct fil_addr_struct fil_addr_t;
struct fil_addr_struct{
ulint page; /* page number within a space */
ulint boffset; /* byte offset within the page */
};
/* Null file address */
extern fil_addr_t fil_addr_null;
/* The byte offsets on a file page for various variables */
#define FIL_PAGE_SPACE_OR_CHKSUM 0 /* in < MySQL-4.0.14 space id the
page belongs to (== 0) but in later
versions the 'new' checksum of the
page */
#define FIL_PAGE_OFFSET 4 /* page offset inside space */
#define FIL_PAGE_PREV 8 /* if there is a 'natural' predecessor
of the page, its offset */
#define FIL_PAGE_NEXT 12 /* if there is a 'natural' successor
of the page, its offset */
#define FIL_PAGE_LSN 16 /* lsn of the end of the newest
modification log record to the page */
#define FIL_PAGE_TYPE 24 /* file page type: FIL_PAGE_INDEX,...,
2 bytes */
#define FIL_PAGE_FILE_FLUSH_LSN 26 /* this is only defined for the
first page in a data file: the file
has been flushed to disk at least up
to this lsn */
#define FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID 34 /* starting from 4.1.x this
contains the space id of the page */
#define FIL_PAGE_DATA 38 /* start of the data on the page */
/* File page trailer */
#define FIL_PAGE_END_LSN_OLD_CHKSUM 8 /* the low 4 bytes of this are used
to store the page checksum, the
last 4 bytes should be identical
to the last 4 bytes of FIL_PAGE_LSN */
#define FIL_PAGE_DATA_END 8
/* File page types */
#define FIL_PAGE_INDEX 17855
#define FIL_PAGE_UNDO_LOG 2
#define FIL_PAGE_INODE 3
#define FIL_PAGE_IBUF_FREE_LIST 4
/* Space types */
#define FIL_TABLESPACE 501
#define FIL_LOG 502
extern ulint fil_n_log_flushes;
extern ulint fil_n_pending_log_flushes;
extern ulint fil_n_pending_tablespace_flushes;
/***********************************************************************
Returns the version number of a tablespace, -1 if not found. */
ib_longlong
fil_space_get_version(
/*==================*/
/* out: version number, -1 if the tablespace does not
exist in the memory cache */
ulint id); /* in: space id */
/***********************************************************************
Returns the latch of a file space. */
rw_lock_t*
fil_space_get_latch(
/*================*/
/* out: latch protecting storage allocation */
ulint id); /* in: space id */
/***********************************************************************
Returns the type of a file space. */
ulint
fil_space_get_type(
/*===============*/
/* out: FIL_TABLESPACE or FIL_LOG */
ulint id); /* in: space id */
/***********************************************************************
Returns the ibuf data of a file space. */
ibuf_data_t*
fil_space_get_ibuf_data(
/*====================*/
/* out: ibuf data for this space */
ulint id); /* in: space id */
/***********************************************************************
Appends a new file to the chain of files of a space. File must be closed. */
void
fil_node_create(
/*============*/
const char* name, /* in: file name (file must be closed) */
ulint size, /* in: file size in database blocks, rounded
downwards to an integer */
ulint id, /* in: space id where to append */
ibool is_raw);/* in: TRUE if a raw device or
a raw disk partition */
/********************************************************************
Drops files from the start of a file space, so that its size is cut by
the amount given. */
void
fil_space_truncate_start(
/*=====================*/
ulint id, /* in: space id */
ulint trunc_len); /* in: truncate by this much; it is an error
if this does not equal to the combined size of
some initial files in the space */
/***********************************************************************
Creates a space memory object and puts it to the 'fil system' hash table. If
there is an error, prints an error message to the .err log. */
ibool
fil_space_create(
/*=============*/
/* out: TRUE if success */
const char* name, /* in: space name */
ulint id, /* in: space id */
ulint purpose);/* in: FIL_TABLESPACE, or FIL_LOG if log */
/***********************************************************************
Frees a space object from a the tablespace memory cache. Closes the files in
the chain but does not delete them. */
ibool
fil_space_free(
/*===========*/
/* out: TRUE if success */
ulint id); /* in: space id */
/***********************************************************************
Returns the size of the space in pages. The tablespace must be cached in the
memory cache. */
ulint
fil_space_get_size(
/*===============*/
/* out: space size, 0 if space not found */
ulint id); /* in: space id */
/***********************************************************************
Checks if the pair space, page_no refers to an existing page in a tablespace
file space. The tablespace must be cached in the memory cache. */
ibool
fil_check_adress_in_tablespace(
/*===========================*/
/* out: TRUE if the address is meaningful */
ulint id, /* in: space id */
ulint page_no);/* in: page number */
/********************************************************************
Initializes the tablespace memory cache. */
void
fil_init(
/*=====*/
ulint max_n_open); /* in: max number of open files */
/***********************************************************************
Opens all log files and system tablespace data files. They stay open until the
database server shutdown. This should be called at a server startup after the
space objects for the log and the system tablespace have been created. The
purpose of this operation is to make sure we never run out of file descriptors
if we need to read from the insert buffer or to write to the log. */
void
fil_open_log_and_system_tablespace_files(void);
/*==========================================*/
/***********************************************************************
Closes all open files. There must not be any pending i/o's or not flushed
modifications in the files. */
void
fil_close_all_files(void);
/*=====================*/
/***********************************************************************
Sets the max tablespace id counter if the given number is bigger than the
previous value. */
void
fil_set_max_space_id_if_bigger(
/*===========================*/
ulint max_id);/* in: maximum known id */
/********************************************************************
Initializes the ibuf data structure for space 0 == the system tablespace.
This can be called after the file space headers have been created and the
dictionary system has been initialized. */
void
fil_ibuf_init_at_db_start(void);
/*===========================*/
/********************************************************************
Writes the flushed lsn and the latest archived log number to the page
header of the first page of each data file in the system tablespace. */
ulint
fil_write_flushed_lsn_to_data_files(
/*================================*/
/* out: DB_SUCCESS or error number */
dulint lsn, /* in: lsn to write */
ulint arch_log_no); /* in: latest archived log file number */
/***********************************************************************
Reads the flushed lsn and arch no fields from a data file at database
startup. */
void
fil_read_flushed_lsn_and_arch_log_no(
/*=================================*/
os_file_t data_file, /* in: open data file */
ibool one_read_already, /* in: TRUE if min and max parameters
below already contain sensible data */
#ifdef UNIV_LOG_ARCHIVE
ulint* min_arch_log_no, /* in/out: */
ulint* max_arch_log_no, /* in/out: */
#endif /* UNIV_LOG_ARCHIVE */
dulint* min_flushed_lsn, /* in/out: */
dulint* max_flushed_lsn); /* in/out: */
/***********************************************************************
Increments the count of pending insert buffer page merges, if space is not
being deleted. */
ibool
fil_inc_pending_ibuf_merges(
/*========================*/
/* out: TRUE if being deleted, and ibuf merges should
be skipped */
ulint id); /* in: space id */
/***********************************************************************
Decrements the count of pending insert buffer page merges. */
void
fil_decr_pending_ibuf_merges(
/*========================*/
ulint id); /* in: space id */
/***********************************************************************
Parses the body of a log record written about an .ibd file operation. That is,
the log record part after the standard (type, space id, page no) header of the
log record.
If desired, also replays the delete or rename operation if the .ibd file
exists and the space id in it matches. Replays the create operation if a file
at that path does not exist yet. If the database directory for the file to be
created does not exist, then we create the directory, too.
Note that ibbackup --apply-log sets fil_path_to_mysql_datadir to point to the
datadir that we should use in replaying the file operations. */
byte*
fil_op_log_parse_or_replay(
/*=======================*/
/* out: end of log record, or NULL if the
record was not completely contained between
ptr and end_ptr */
byte* ptr, /* in: buffer containing the log record body,
or an initial segment of it, if the record does
not fir completely between ptr and end_ptr */
byte* end_ptr, /* in: buffer end */
ulint type, /* in: the type of this log record */
ibool do_replay, /* in: TRUE if we want to replay the
operation, and not just parse the log record */
ulint space_id); /* in: if do_replay is TRUE, the space id of
the tablespace in question; otherwise
ignored */
/***********************************************************************
Deletes a single-table tablespace. The tablespace must be cached in the
memory cache. */
ibool
fil_delete_tablespace(
/*==================*/
/* out: TRUE if success */
ulint id); /* in: space id */
/***********************************************************************
Discards a single-table tablespace. The tablespace must be cached in the
memory cache. Discarding is like deleting a tablespace, but
1) we do not drop the table from the data dictionary;
2) we remove all insert buffer entries for the tablespace immediately; in DROP
TABLE they are only removed gradually in the background;
3) when the user does IMPORT TABLESPACE, the tablespace will have the same id
as it originally had. */
ibool
fil_discard_tablespace(
/*===================*/
/* out: TRUE if success */
ulint id); /* in: space id */
/***********************************************************************
Renames a single-table tablespace. The tablespace must be cached in the
tablespace memory cache. */
ibool
fil_rename_tablespace(
/*==================*/
/* out: TRUE if success */
const char* old_name, /* in: old table name in the standard
databasename/tablename format of
InnoDB, or NULL if we do the rename
based on the space id only */
ulint id, /* in: space id */
const char* new_name); /* in: new table name in the standard
databasename/tablename format
of InnoDB */
/***********************************************************************
Creates a new single-table tablespace to a database directory of MySQL.
Database directories are under the 'datadir' of MySQL. The datadir is the
directory of a running mysqld program. We can refer to it by simply the
path '.'. Tables created with CREATE TEMPORARY TABLE we place in the temp
dir of the mysqld server. */
ulint
fil_create_new_single_table_tablespace(
/*===================================*/
/* out: DB_SUCCESS or error code */
ulint* space_id, /* in/out: space id; if this is != 0,
then this is an input parameter,
otherwise output */
const char* tablename, /* in: the table name in the usual
databasename/tablename format
of InnoDB, or a dir path to a temp
table */
ibool is_temp, /* in: TRUE if a table created with
CREATE TEMPORARY TABLE */
ulint size); /* in: the initial size of the
tablespace file in pages,
must be >= FIL_IBD_FILE_INITIAL_SIZE */
/************************************************************************
Tries to open a single-table tablespace and optionally checks the space id is
right in it. If does not succeed, prints an error message to the .err log. This
function is used to open a tablespace when we start up mysqld, and also in
IMPORT TABLESPACE.
NOTE that we assume this operation is used either at the database startup
or under the protection of the dictionary mutex, so that two users cannot
race here. This operation does not leave the file associated with the
tablespace open, but closes it after we have looked at the space id in it. */
ibool
fil_open_single_table_tablespace(
/*=============================*/
/* out: TRUE if success */
ibool check_space_id, /* in: should we check that the space
id in the file is right; we assume
that this function runs much faster
if no check is made, since accessing
the file inode probably is much
faster (the OS caches them) than
accessing the first page of the file */
ulint id, /* in: space id */
const char* name); /* in: table name in the
databasename/tablename format */
/************************************************************************
It is possible, though very improbable, that the lsn's in the tablespace to be
imported have risen above the current system lsn, if a lengthy purge, ibuf
merge, or rollback was performed on a backup taken with ibbackup. If that is
the case, reset page lsn's in the file. We assume that mysqld was shut down
after it performed these cleanup operations on the .ibd file, so that it at
the shutdown stamped the latest lsn to the FIL_PAGE_FILE_FLUSH_LSN in the
first page of the .ibd file, and we can determine whether we need to reset the
lsn's just by looking at that flush lsn. */
ibool
fil_reset_too_high_lsns(
/*====================*/
/* out: TRUE if success */
const char* name, /* in: table name in the
databasename/tablename format */
dulint current_lsn); /* in: reset lsn's if the lsn stamped
to FIL_PAGE_FILE_FLUSH_LSN in the
first page is too high */
/************************************************************************
At the server startup, if we need crash recovery, scans the database
directories under the MySQL datadir, looking for .ibd files. Those files are
single-table tablespaces. We need to know the space id in each of them so that
we know into which file we should look to check the contents of a page stored
in the doublewrite buffer, also to know where to apply log records where the
space id is != 0. */
ulint
fil_load_single_table_tablespaces(void);
/*===================================*/
/* out: DB_SUCCESS or error number */
/************************************************************************
If we need crash recovery, and we have called
fil_load_single_table_tablespaces() and dict_load_single_table_tablespaces(),
we can call this function to print an error message of orphaned .ibd files
for which there is not a data dictionary entry with a matching table name
and space id. */
void
fil_print_orphaned_tablespaces(void);
/*================================*/
/***********************************************************************
Returns TRUE if a single-table tablespace does not exist in the memory cache,
or is being deleted there. */
ibool
fil_tablespace_deleted_or_being_deleted_in_mem(
/*===========================================*/
/* out: TRUE if does not exist or is being\
deleted */
ulint id, /* in: space id */
ib_longlong version);/* in: tablespace_version should be this; if
you pass -1 as the value of this, then this
parameter is ignored */
/***********************************************************************
Returns TRUE if a single-table tablespace exists in the memory cache. */
ibool
fil_tablespace_exists_in_mem(
/*=========================*/
/* out: TRUE if exists */
ulint id); /* in: space id */
/***********************************************************************
Returns TRUE if a matching tablespace exists in the InnoDB tablespace memory
cache. Note that if we have not done a crash recovery at the database startup,
there may be many tablespaces which are not yet in the memory cache. */
ibool
fil_space_for_table_exists_in_mem(
/*==============================*/
/* out: TRUE if a matching tablespace
exists in the memory cache */
ulint id, /* in: space id */
const char* name, /* in: table name in the standard
'databasename/tablename' format or
the dir path to a temp table */
ibool is_temp, /* in: TRUE if created with CREATE
TEMPORARY TABLE */
ibool mark_space, /* in: in crash recovery, at database
startup we mark all spaces which have
an associated table in the InnoDB
data dictionary, so that
we can print a warning about orphaned
tablespaces */
ibool print_error_if_does_not_exist);
/* in: print detailed error
information to the .err log if a
matching tablespace is not found from
memory */
/**************************************************************************
Tries to extend a data file so that it would accommodate the number of pages
given. The tablespace must be cached in the memory cache. If the space is big
enough already, does nothing. */
ibool
fil_extend_space_to_desired_size(
/*=============================*/
/* out: TRUE if success */
ulint* actual_size, /* out: size of the space after extension;
if we ran out of disk space this may be lower
than the desired size */
ulint space_id, /* in: space id */
ulint size_after_extend);/* in: desired size in pages after the
extension; if the current space size is bigger
than this already, the function does nothing */
#ifdef UNIV_HOTBACKUP
/************************************************************************
Extends all tablespaces to the size stored in the space header. During the
ibbackup --apply-log phase we extended the spaces on-demand so that log records
could be appllied, but that may have left spaces still too small compared to
the size stored in the space header. */
void
fil_extend_tablespaces_to_stored_len(void);
/*======================================*/
#endif
/***********************************************************************
Tries to reserve free extents in a file space. */
ibool
fil_space_reserve_free_extents(
/*===========================*/
/* out: TRUE if succeed */
ulint id, /* in: space id */
ulint n_free_now, /* in: number of free extents now */
ulint n_to_reserve); /* in: how many one wants to reserve */
/***********************************************************************
Releases free extents in a file space. */
void
fil_space_release_free_extents(
/*===========================*/
ulint id, /* in: space id */
ulint n_reserved); /* in: how many one reserved */
/***********************************************************************
Gets the number of reserved extents. If the database is silent, this number
should be zero. */
ulint
fil_space_get_n_reserved_extents(
/*=============================*/
ulint id); /* in: space id */
/************************************************************************
Reads or writes data. This operation is asynchronous (aio). */
ulint
fil_io(
/*===*/
/* out: DB_SUCCESS, or DB_TABLESPACE_DELETED
if we are trying to do i/o on a tablespace
which does not exist */
ulint type, /* in: OS_FILE_READ or OS_FILE_WRITE,
ORed to OS_FILE_LOG, if a log i/o
and ORed to OS_AIO_SIMULATED_WAKE_LATER
if simulated aio and we want to post a
batch of i/os; NOTE that a simulated batch
may introduce hidden chances of deadlocks,
because i/os are not actually handled until
all have been posted: use with great
caution! */
ibool sync, /* in: TRUE if synchronous aio is desired */
ulint space_id, /* in: space id */
ulint block_offset, /* in: offset in number of blocks */
ulint byte_offset, /* in: remainder of offset in bytes; in
aio this must be divisible by the OS block
size */
ulint len, /* in: how many bytes to read or write; this
must not cross a file boundary; in aio this
must be a block size multiple */
void* buf, /* in/out: buffer where to store read data
or from where to write; in aio this must be
appropriately aligned */
void* message); /* in: message for aio handler if non-sync
aio used, else ignored */
/************************************************************************
Reads data from a space to a buffer. Remember that the possible incomplete
blocks at the end of file are ignored: they are not taken into account when
calculating the byte offset within a space. */
ulint
fil_read(
/*=====*/
/* out: DB_SUCCESS, or DB_TABLESPACE_DELETED
if we are trying to do i/o on a tablespace
which does not exist */
ibool sync, /* in: TRUE if synchronous aio is desired */
ulint space_id, /* in: space id */
ulint block_offset, /* in: offset in number of blocks */
ulint byte_offset, /* in: remainder of offset in bytes; in aio
this must be divisible by the OS block size */
ulint len, /* in: how many bytes to read; this must not
cross a file boundary; in aio this must be a
block size multiple */
void* buf, /* in/out: buffer where to store data read;
in aio this must be appropriately aligned */
void* message); /* in: message for aio handler if non-sync
aio used, else ignored */
/************************************************************************
Writes data to a space from a buffer. Remember that the possible incomplete
blocks at the end of file are ignored: they are not taken into account when
calculating the byte offset within a space. */
ulint
fil_write(
/*======*/
/* out: DB_SUCCESS, or DB_TABLESPACE_DELETED
if we are trying to do i/o on a tablespace
which does not exist */
ibool sync, /* in: TRUE if synchronous aio is desired */
ulint space_id, /* in: space id */
ulint block_offset, /* in: offset in number of blocks */
ulint byte_offset, /* in: remainder of offset in bytes; in aio
this must be divisible by the OS block size */
ulint len, /* in: how many bytes to write; this must
not cross a file boundary; in aio this must
be a block size multiple */
void* buf, /* in: buffer from which to write; in aio
this must be appropriately aligned */
void* message); /* in: message for aio handler if non-sync
aio used, else ignored */
/**************************************************************************
Waits for an aio operation to complete. This function is used to write the
handler for completed requests. The aio array of pending requests is divided
into segments (see os0file.c for more info). The thread specifies which
segment it wants to wait for. */
void
fil_aio_wait(
/*=========*/
ulint segment); /* in: the number of the segment in the aio
array to wait for */
/**************************************************************************
Flushes to disk possible writes cached by the OS. If the space does not exist
or is being dropped, does not do anything. */
void
fil_flush(
/*======*/
ulint space_id); /* in: file space id (this can be a group of
log files or a tablespace of the database) */
/**************************************************************************
Flushes to disk writes in file spaces of the given type possibly cached by
the OS. */
void
fil_flush_file_spaces(
/*==================*/
ulint purpose); /* in: FIL_TABLESPACE, FIL_LOG */
/**********************************************************************
Checks the consistency of the tablespace cache. */
ibool
fil_validate(void);
/*==============*/
/* out: TRUE if ok */
/************************************************************************
Returns TRUE if file address is undefined. */
ibool
fil_addr_is_null(
/*=============*/
/* out: TRUE if undefined */
fil_addr_t addr); /* in: address */
/************************************************************************
Accessor functions for a file page */
ulint
fil_page_get_prev(byte* page);
ulint
fil_page_get_next(byte* page);
/*************************************************************************
Sets the file page type. */
void
fil_page_set_type(
/*==============*/
byte* page, /* in: file page */
ulint type); /* in: type */
/*************************************************************************
Gets the file page type. */
ulint
fil_page_get_type(
/*==============*/
/* out: type; NOTE that if the type has not been
written to page, the return value not defined */
byte* page); /* in: file page */
typedef struct fil_space_struct fil_space_t;
#endif

387
include/fsp0fsp.h Normal file
View file

@ -0,0 +1,387 @@
/******************************************************
File space management
(c) 1995 Innobase Oy
Created 12/18/1995 Heikki Tuuri
*******************************************************/
#ifndef fsp0fsp_h
#define fsp0fsp_h
#include "univ.i"
#include "mtr0mtr.h"
#include "fut0lst.h"
#include "ut0byte.h"
#include "page0types.h"
/* If records are inserted in order, there are the following
flags to tell this (their type is made byte for the compiler
to warn if direction and hint parameters are switched in
fseg_alloc_free_page): */
#define FSP_UP ((byte)111) /* alphabetically upwards */
#define FSP_DOWN ((byte)112) /* alphabetically downwards */
#define FSP_NO_DIR ((byte)113) /* no order */
/* File space extent size in pages */
#define FSP_EXTENT_SIZE 64
/* On a page of any file segment, data may be put starting from this offset: */
#define FSEG_PAGE_DATA FIL_PAGE_DATA
/* File segment header which points to the inode describing the file segment */
typedef byte fseg_header_t;
#define FSEG_HDR_SPACE 0 /* space id of the inode */
#define FSEG_HDR_PAGE_NO 4 /* page number of the inode */
#define FSEG_HDR_OFFSET 8 /* byte offset of the inode */
#define FSEG_HEADER_SIZE 10
/**************************************************************************
Initializes the file space system. */
void
fsp_init(void);
/*==========*/
/**************************************************************************
Gets the current free limit of a tablespace. The free limit means the
place of the first page which has never been put to the the free list
for allocation. The space above that address is initialized to zero.
Sets also the global variable log_fsp_current_free_limit. */
ulint
fsp_header_get_free_limit(
/*======================*/
/* out: free limit in megabytes */
ulint space); /* in: space id, must be 0 */
/**************************************************************************
Gets the size of the tablespace from the tablespace header. If we do not
have an auto-extending data file, this should be equal to the size of the
data files. If there is an auto-extending data file, this can be smaller. */
ulint
fsp_header_get_tablespace_size(
/*===========================*/
/* out: size in pages */
ulint space); /* in: space id, must be 0 */
/**************************************************************************
Reads the file space size stored in the header page. */
ulint
fsp_get_size_low(
/*=============*/
/* out: tablespace size stored in the space header */
page_t* page); /* in: header page (page 0 in the tablespace) */
/**************************************************************************
Reads the space id from the first page of a tablespace. */
ulint
fsp_header_get_space_id(
/*====================*/
/* out: space id, ULINT UNDEFINED if error */
page_t* page); /* in: first page of a tablespace */
/**************************************************************************
Writes the space id to a tablespace header. This function is used past the
buffer pool when we in fil0fil.c create a new single-table tablespace. */
void
fsp_header_write_space_id(
/*======================*/
page_t* page, /* in: first page in the space */
ulint space_id); /* in: space id */
/**************************************************************************
Initializes the space header of a new created space and creates also the
insert buffer tree root if space == 0. */
void
fsp_header_init(
/*============*/
ulint space, /* in: space id */
ulint size, /* in: current size in blocks */
mtr_t* mtr); /* in: mini-transaction handle */
/**************************************************************************
Increases the space size field of a space. */
void
fsp_header_inc_size(
/*================*/
ulint space, /* in: space id */
ulint size_inc,/* in: size increment in pages */
mtr_t* mtr); /* in: mini-transaction handle */
/**************************************************************************
Creates a new segment. */
page_t*
fseg_create(
/*========*/
/* out: the page where the segment header is placed,
x-latched, NULL if could not create segment
because of lack of space */
ulint space, /* in: space id */
ulint page, /* in: page where the segment header is placed: if
this is != 0, the page must belong to another segment,
if this is 0, a new page will be allocated and it
will belong to the created segment */
ulint byte_offset, /* in: byte offset of the created segment header
on the page */
mtr_t* mtr); /* in: mtr */
/**************************************************************************
Creates a new segment. */
page_t*
fseg_create_general(
/*================*/
/* out: the page where the segment header is placed,
x-latched, NULL if could not create segment
because of lack of space */
ulint space, /* in: space id */
ulint page, /* in: page where the segment header is placed: if
this is != 0, the page must belong to another segment,
if this is 0, a new page will be allocated and it
will belong to the created segment */
ulint byte_offset, /* in: byte offset of the created segment header
on the page */
ibool has_done_reservation, /* in: TRUE if the caller has already
done the reservation for the pages with
fsp_reserve_free_extents (at least 2 extents: one for
the inode and the other for the segment) then there is
no need to do the check for this individual
operation */
mtr_t* mtr); /* in: mtr */
/**************************************************************************
Calculates the number of pages reserved by a segment, and how many pages are
currently used. */
ulint
fseg_n_reserved_pages(
/*==================*/
/* out: number of reserved pages */
fseg_header_t* header, /* in: segment header */
ulint* used, /* out: number of pages used (<= reserved) */
mtr_t* mtr); /* in: mtr handle */
/**************************************************************************
Allocates a single free page from a segment. This function implements
the intelligent allocation strategy which tries to minimize
file space fragmentation. */
ulint
fseg_alloc_free_page(
/*=================*/
/* out: the allocated page offset
FIL_NULL if no page could be allocated */
fseg_header_t* seg_header, /* in: segment header */
ulint hint, /* in: hint of which page would be desirable */
byte direction, /* in: if the new page is needed because
of an index page split, and records are
inserted there in order, into which
direction they go alphabetically: FSP_DOWN,
FSP_UP, FSP_NO_DIR */
mtr_t* mtr); /* in: mtr handle */
/**************************************************************************
Allocates a single free page from a segment. This function implements
the intelligent allocation strategy which tries to minimize file space
fragmentation. */
ulint
fseg_alloc_free_page_general(
/*=========================*/
/* out: allocated page offset, FIL_NULL if no
page could be allocated */
fseg_header_t* seg_header,/* in: segment header */
ulint hint, /* in: hint of which page would be desirable */
byte direction,/* in: if the new page is needed because
of an index page split, and records are
inserted there in order, into which
direction they go alphabetically: FSP_DOWN,
FSP_UP, FSP_NO_DIR */
ibool has_done_reservation, /* in: TRUE if the caller has
already done the reservation for the page
with fsp_reserve_free_extents, then there
is no need to do the check for this individual
page */
mtr_t* mtr); /* in: mtr handle */
/**************************************************************************
Reserves free pages from a tablespace. All mini-transactions which may
use several pages from the tablespace should call this function beforehand
and reserve enough free extents so that they certainly will be able
to do their operation, like a B-tree page split, fully. Reservations
must be released with function fil_space_release_free_extents!
The alloc_type below has the following meaning: FSP_NORMAL means an
operation which will probably result in more space usage, like an
insert in a B-tree; FSP_UNDO means allocation to undo logs: if we are
deleting rows, then this allocation will in the long run result in
less space usage (after a purge); FSP_CLEANING means allocation done
in a physical record delete (like in a purge) or other cleaning operation
which will result in less space usage in the long run. We prefer the latter
two types of allocation: when space is scarce, FSP_NORMAL allocations
will not succeed, but the latter two allocations will succeed, if possible.
The purpose is to avoid dead end where the database is full but the
user cannot free any space because these freeing operations temporarily
reserve some space.
Single-table tablespaces whose size is < 32 pages are a special case. In this
function we would liberally reserve several 64 page extents for every page
split or merge in a B-tree. But we do not want to waste disk space if the table
only occupies < 32 pages. That is why we apply different rules in that special
case, just ensuring that there are 3 free pages available. */
ibool
fsp_reserve_free_extents(
/*=====================*/
/* out: TRUE if we were able to make the reservation */
ulint* n_reserved,/* out: number of extents actually reserved; if we
return TRUE and the tablespace size is < 64 pages,
then this can be 0, otherwise it is n_ext */
ulint space, /* in: space id */
ulint n_ext, /* in: number of extents to reserve */
ulint alloc_type,/* in: FSP_NORMAL, FSP_UNDO, or FSP_CLEANING */
mtr_t* mtr); /* in: mtr */
/**************************************************************************
This function should be used to get information on how much we still
will be able to insert new data to the database without running out the
tablespace. Only free extents are taken into account and we also subtract
the safety margin required by the above function fsp_reserve_free_extents. */
ulint
fsp_get_available_space_in_free_extents(
/*====================================*/
/* out: available space in kB */
ulint space); /* in: space id */
/**************************************************************************
Frees a single page of a segment. */
void
fseg_free_page(
/*===========*/
fseg_header_t* seg_header, /* in: segment header */
ulint space, /* in: space id */
ulint page, /* in: page offset */
mtr_t* mtr); /* in: mtr handle */
/***********************************************************************
Frees a segment. The freeing is performed in several mini-transactions,
so that there is no danger of bufferfixing too many buffer pages. */
void
fseg_free(
/*======*/
ulint space, /* in: space id */
ulint page_no,/* in: page number where the segment header is
placed */
ulint offset);/* in: byte offset of the segment header on that
page */
/**************************************************************************
Frees part of a segment. This function can be used to free a segment
by repeatedly calling this function in different mini-transactions.
Doing the freeing in a single mini-transaction might result in
too big a mini-transaction. */
ibool
fseg_free_step(
/*===========*/
/* out: TRUE if freeing completed */
fseg_header_t* header, /* in, own: segment header; NOTE: if the header
resides on the first page of the frag list
of the segment, this pointer becomes obsolete
after the last freeing step */
mtr_t* mtr); /* in: mtr */
/**************************************************************************
Frees part of a segment. Differs from fseg_free_step because this function
leaves the header page unfreed. */
ibool
fseg_free_step_not_header(
/*======================*/
/* out: TRUE if freeing completed, except the
header page */
fseg_header_t* header, /* in: segment header which must reside on
the first fragment page of the segment */
mtr_t* mtr); /* in: mtr */
/***************************************************************************
Checks if a page address is an extent descriptor page address. */
UNIV_INLINE
ibool
fsp_descr_page(
/*===========*/
/* out: TRUE if a descriptor page */
ulint page_no);/* in: page number */
/***************************************************************
Parses a redo log record of a file page init. */
byte*
fsp_parse_init_file_page(
/*=====================*/
/* out: end of log record or NULL */
byte* ptr, /* in: buffer */
byte* end_ptr,/* in: buffer end */
page_t* page); /* in: page or NULL */
/***********************************************************************
Validates the file space system and its segments. */
ibool
fsp_validate(
/*=========*/
/* out: TRUE if ok */
ulint space); /* in: space id */
/***********************************************************************
Prints info of a file space. */
void
fsp_print(
/*======*/
ulint space); /* in: space id */
/***********************************************************************
Validates a segment. */
ibool
fseg_validate(
/*==========*/
/* out: TRUE if ok */
fseg_header_t* header, /* in: segment header */
mtr_t* mtr2); /* in: mtr */
/***********************************************************************
Writes info of a segment. */
void
fseg_print(
/*=======*/
fseg_header_t* header, /* in: segment header */
mtr_t* mtr); /* in: mtr */
/* Flags for fsp_reserve_free_extents */
#define FSP_NORMAL 1000000
#define FSP_UNDO 2000000
#define FSP_CLEANING 3000000
/* Number of pages described in a single descriptor page: currently each page
description takes less than 1 byte; a descriptor page is repeated every
this many file pages */
#define XDES_DESCRIBED_PER_PAGE UNIV_PAGE_SIZE
/* The space low address page map, and also offsets for extent descriptor and
bitmap pages which are repeated always after XDES_DESCRIBED_PER_PAGE more
pages: */
/*--------------------------------------*/
#define FSP_XDES_OFFSET 0
#define FSP_IBUF_BITMAP_OFFSET 1
/* The ibuf bitmap pages are the ones whose
page number is the number above plus a
multiple of XDES_DESCRIBED_PER_PAGE */
#define FSP_FIRST_INODE_PAGE_NO 2
#define FSP_IBUF_HEADER_PAGE_NO 3
#define FSP_IBUF_TREE_ROOT_PAGE_NO 4
/* The ibuf tree root page number in
tablespace 0; its fseg inode is on the page
number FSP_FIRST_INODE_PAGE_NO */
#define FSP_TRX_SYS_PAGE_NO 5
#define FSP_FIRST_RSEG_PAGE_NO 6
#define FSP_DICT_HDR_PAGE_NO 7
/*--------------------------------------*/
#ifndef UNIV_NONINL
#include "fsp0fsp.ic"
#endif
#endif

24
include/fsp0fsp.ic Normal file
View file

@ -0,0 +1,24 @@
/******************************************************
File space management
(c) 1995 Innobase Oy
Created 12/18/1995 Heikki Tuuri
*******************************************************/
/***************************************************************************
Checks if a page address is an extent descriptor page address. */
UNIV_INLINE
ibool
fsp_descr_page(
/*===========*/
/* out: TRUE if a descriptor page */
ulint page_no)/* in: page number */
{
if (page_no % XDES_DESCRIBED_PER_PAGE == FSP_XDES_OFFSET) {
return(TRUE);
}
return(FALSE);
}

36
include/fut0fut.h Normal file
View file

@ -0,0 +1,36 @@
/**********************************************************************
File-based utilities
(c) 1995 Innobase Oy
Created 12/13/1995 Heikki Tuuri
***********************************************************************/
#ifndef fut0fut_h
#define fut0fut_h
#include "univ.i"
#include "fil0fil.h"
#include "mtr0mtr.h"
/************************************************************************
Gets a pointer to a file address and latches the page. */
UNIV_INLINE
byte*
fut_get_ptr(
/*========*/
/* out: pointer to a byte in a frame; the file
page in the frame is bufferfixed and latched */
ulint space, /* in: space id */
fil_addr_t addr, /* in: file address */
ulint rw_latch, /* in: RW_S_LATCH, RW_X_LATCH */
mtr_t* mtr); /* in: mtr handle */
#ifndef UNIV_NONINL
#include "fut0fut.ic"
#endif
#endif

38
include/fut0fut.ic Normal file
View file

@ -0,0 +1,38 @@
/**********************************************************************
File-based utilities
(c) 1995 Innobase Oy
Created 12/13/1995 Heikki Tuuri
***********************************************************************/
#include "sync0rw.h"
#include "buf0buf.h"
/************************************************************************
Gets a pointer to a file address and latches the page. */
UNIV_INLINE
byte*
fut_get_ptr(
/*========*/
/* out: pointer to a byte in a frame; the file
page in the frame is bufferfixed and latched */
ulint space, /* in: space id */
fil_addr_t addr, /* in: file address */
ulint rw_latch, /* in: RW_S_LATCH, RW_X_LATCH */
mtr_t* mtr) /* in: mtr handle */
{
byte* ptr;
ut_ad(mtr);
ut_ad(addr.boffset < UNIV_PAGE_SIZE);
ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH));
ptr = buf_page_get(space, addr.page, rw_latch, mtr) + addr.boffset;
#ifdef UNIV_SYNC_DEBUG
buf_page_dbg_add_level(ptr, SYNC_NO_ORDER_CHECK);
#endif /* UNIV_SYNC_DEBUG */
return(ptr);
}

198
include/fut0lst.h Normal file
View file

@ -0,0 +1,198 @@
/**********************************************************************
File-based list utilities
(c) 1995 Innobase Oy
Created 11/28/1995 Heikki Tuuri
***********************************************************************/
#ifndef fut0lst_h
#define fut0lst_h
#include "univ.i"
#include "fil0fil.h"
#include "mtr0mtr.h"
/* The C 'types' of base node and list node: these should be used to
write self-documenting code. Of course, the sizeof macro cannot be
applied to these types! */
typedef byte flst_base_node_t;
typedef byte flst_node_t;
/* The physical size of a list base node in bytes */
#define FLST_BASE_NODE_SIZE (4 + 2 * FIL_ADDR_SIZE)
/* The physical size of a list node in bytes */
#define FLST_NODE_SIZE (2 * FIL_ADDR_SIZE)
/************************************************************************
Initializes a list base node. */
UNIV_INLINE
void
flst_init(
/*======*/
flst_base_node_t* base, /* in: pointer to base node */
mtr_t* mtr); /* in: mini-transaction handle */
/************************************************************************
Adds a node as the last node in a list. */
void
flst_add_last(
/*==========*/
flst_base_node_t* base, /* in: pointer to base node of list */
flst_node_t* node, /* in: node to add */
mtr_t* mtr); /* in: mini-transaction handle */
/************************************************************************
Adds a node as the first node in a list. */
void
flst_add_first(
/*===========*/
flst_base_node_t* base, /* in: pointer to base node of list */
flst_node_t* node, /* in: node to add */
mtr_t* mtr); /* in: mini-transaction handle */
/************************************************************************
Inserts a node after another in a list. */
void
flst_insert_after(
/*==============*/
flst_base_node_t* base, /* in: pointer to base node of list */
flst_node_t* node1, /* in: node to insert after */
flst_node_t* node2, /* in: node to add */
mtr_t* mtr); /* in: mini-transaction handle */
/************************************************************************
Inserts a node before another in a list. */
void
flst_insert_before(
/*===============*/
flst_base_node_t* base, /* in: pointer to base node of list */
flst_node_t* node2, /* in: node to insert */
flst_node_t* node3, /* in: node to insert before */
mtr_t* mtr); /* in: mini-transaction handle */
/************************************************************************
Removes a node. */
void
flst_remove(
/*========*/
flst_base_node_t* base, /* in: pointer to base node of list */
flst_node_t* node2, /* in: node to remove */
mtr_t* mtr); /* in: mini-transaction handle */
/************************************************************************
Cuts off the tail of the list, including the node given. The number of
nodes which will be removed must be provided by the caller, as this function
does not measure the length of the tail. */
void
flst_cut_end(
/*=========*/
flst_base_node_t* base, /* in: pointer to base node of list */
flst_node_t* node2, /* in: first node to remove */
ulint n_nodes,/* in: number of nodes to remove,
must be >= 1 */
mtr_t* mtr); /* in: mini-transaction handle */
/************************************************************************
Cuts off the tail of the list, not including the given node. The number of
nodes which will be removed must be provided by the caller, as this function
does not measure the length of the tail. */
void
flst_truncate_end(
/*==============*/
flst_base_node_t* base, /* in: pointer to base node of list */
flst_node_t* node2, /* in: first node not to remove */
ulint n_nodes,/* in: number of nodes to remove */
mtr_t* mtr); /* in: mini-transaction handle */
/************************************************************************
Gets list length. */
UNIV_INLINE
ulint
flst_get_len(
/*=========*/
/* out: length */
flst_base_node_t* base, /* in: pointer to base node */
mtr_t* mtr); /* in: mini-transaction handle */
/************************************************************************
Gets list first node address. */
UNIV_INLINE
fil_addr_t
flst_get_first(
/*===========*/
/* out: file address */
flst_base_node_t* base, /* in: pointer to base node */
mtr_t* mtr); /* in: mini-transaction handle */
/************************************************************************
Gets list last node address. */
UNIV_INLINE
fil_addr_t
flst_get_last(
/*==========*/
/* out: file address */
flst_base_node_t* base, /* in: pointer to base node */
mtr_t* mtr); /* in: mini-transaction handle */
/************************************************************************
Gets list next node address. */
UNIV_INLINE
fil_addr_t
flst_get_next_addr(
/*===============*/
/* out: file address */
flst_node_t* node, /* in: pointer to node */
mtr_t* mtr); /* in: mini-transaction handle */
/************************************************************************
Gets list prev node address. */
UNIV_INLINE
fil_addr_t
flst_get_prev_addr(
/*===============*/
/* out: file address */
flst_node_t* node, /* in: pointer to node */
mtr_t* mtr); /* in: mini-transaction handle */
/************************************************************************
Writes a file address. */
UNIV_INLINE
void
flst_write_addr(
/*============*/
fil_faddr_t* faddr, /* in: pointer to file faddress */
fil_addr_t addr, /* in: file address */
mtr_t* mtr); /* in: mini-transaction handle */
/************************************************************************
Reads a file address. */
UNIV_INLINE
fil_addr_t
flst_read_addr(
/*===========*/
/* out: file address */
fil_faddr_t* faddr, /* in: pointer to file faddress */
mtr_t* mtr); /* in: mini-transaction handle */
/************************************************************************
Validates a file-based list. */
ibool
flst_validate(
/*==========*/
/* out: TRUE if ok */
flst_base_node_t* base, /* in: pointer to base node of list */
mtr_t* mtr1); /* in: mtr */
/************************************************************************
Prints info of a file-based list. */
void
flst_print(
/*=======*/
flst_base_node_t* base, /* in: pointer to base node of list */
mtr_t* mtr); /* in: mtr */
#ifndef UNIV_NONINL
#include "fut0lst.ic"
#endif
#endif

147
include/fut0lst.ic Normal file
View file

@ -0,0 +1,147 @@
/**********************************************************************
File-based list utilities
(c) 1995 Innobase Oy
Created 11/28/1995 Heikki Tuuri
***********************************************************************/
#include "fut0fut.h"
#include "mtr0log.h"
#include "buf0buf.h"
/* We define the field offsets of a node for the list */
#define FLST_PREV 0 /* 6-byte address of the previous list element;
the page part of address is FIL_NULL, if no
previous element */
#define FLST_NEXT FIL_ADDR_SIZE /* 6-byte address of the next
list element; the page part of address
is FIL_NULL, if no next element */
/* We define the field offsets of a base node for the list */
#define FLST_LEN 0 /* 32-bit list length field */
#define FLST_FIRST 4 /* 6-byte address of the first element
of the list; undefined if empty list */
#define FLST_LAST (4 + FIL_ADDR_SIZE) /* 6-byte address of the
last element of the list; undefined
if empty list */
/************************************************************************
Writes a file address. */
UNIV_INLINE
void
flst_write_addr(
/*============*/
fil_faddr_t* faddr, /* in: pointer to file faddress */
fil_addr_t addr, /* in: file address */
mtr_t* mtr) /* in: mini-transaction handle */
{
ut_ad(faddr && mtr);
ut_ad(mtr_memo_contains(mtr, buf_block_align(faddr),
MTR_MEMO_PAGE_X_FIX));
mlog_write_ulint(faddr + FIL_ADDR_PAGE, addr.page, MLOG_4BYTES, mtr);
mlog_write_ulint(faddr + FIL_ADDR_BYTE, addr.boffset,
MLOG_2BYTES, mtr);
}
/************************************************************************
Reads a file address. */
UNIV_INLINE
fil_addr_t
flst_read_addr(
/*===========*/
/* out: file address */
fil_faddr_t* faddr, /* in: pointer to file faddress */
mtr_t* mtr) /* in: mini-transaction handle */
{
fil_addr_t addr;
ut_ad(faddr && mtr);
addr.page = mtr_read_ulint(faddr + FIL_ADDR_PAGE, MLOG_4BYTES, mtr);
addr.boffset = mtr_read_ulint(faddr + FIL_ADDR_BYTE, MLOG_2BYTES,
mtr);
return(addr);
}
/************************************************************************
Initializes a list base node. */
UNIV_INLINE
void
flst_init(
/*======*/
flst_base_node_t* base, /* in: pointer to base node */
mtr_t* mtr) /* in: mini-transaction handle */
{
ut_ad(mtr_memo_contains(mtr, buf_block_align(base),
MTR_MEMO_PAGE_X_FIX));
mlog_write_ulint(base + FLST_LEN, 0, MLOG_4BYTES, mtr);
flst_write_addr(base + FLST_FIRST, fil_addr_null, mtr);
flst_write_addr(base + FLST_LAST, fil_addr_null, mtr);
}
/************************************************************************
Gets list length. */
UNIV_INLINE
ulint
flst_get_len(
/*=========*/
/* out: length */
flst_base_node_t* base, /* in: pointer to base node */
mtr_t* mtr) /* in: mini-transaction handle */
{
return(mtr_read_ulint(base + FLST_LEN, MLOG_4BYTES, mtr));
}
/************************************************************************
Gets list first node address. */
UNIV_INLINE
fil_addr_t
flst_get_first(
/*===========*/
/* out: file address */
flst_base_node_t* base, /* in: pointer to base node */
mtr_t* mtr) /* in: mini-transaction handle */
{
return(flst_read_addr(base + FLST_FIRST, mtr));
}
/************************************************************************
Gets list last node address. */
UNIV_INLINE
fil_addr_t
flst_get_last(
/*==========*/
/* out: file address */
flst_base_node_t* base, /* in: pointer to base node */
mtr_t* mtr) /* in: mini-transaction handle */
{
return(flst_read_addr(base + FLST_LAST, mtr));
}
/************************************************************************
Gets list next node address. */
UNIV_INLINE
fil_addr_t
flst_get_next_addr(
/*===============*/
/* out: file address */
flst_node_t* node, /* in: pointer to node */
mtr_t* mtr) /* in: mini-transaction handle */
{
return(flst_read_addr(node + FLST_NEXT, mtr));
}
/************************************************************************
Gets list prev node address. */
UNIV_INLINE
fil_addr_t
flst_get_prev_addr(
/*===============*/
/* out: file address */
flst_node_t* node, /* in: pointer to node */
mtr_t* mtr) /* in: mini-transaction handle */
{
return(flst_read_addr(node + FLST_PREV, mtr));
}

146
include/ha0ha.h Normal file
View file

@ -0,0 +1,146 @@
/******************************************************
The hash table with external chains
(c) 1994-1997 Innobase Oy
Created 8/18/1994 Heikki Tuuri
*******************************************************/
#ifndef ha0ha_h
#define ha0ha_h
#include "univ.i"
#include "hash0hash.h"
#include "page0types.h"
/*****************************************************************
Looks for an element in a hash table. */
UNIV_INLINE
void*
ha_search_and_get_data(
/*===================*/
/* out: pointer to the data of the first hash
table node in chain having the fold number,
NULL if not found */
hash_table_t* table, /* in: hash table */
ulint fold); /* in: folded value of the searched data */
/*************************************************************
Looks for an element when we know the pointer to the data and updates
the pointer to data if found. */
void
ha_search_and_update_if_found(
/*==========================*/
hash_table_t* table, /* in: hash table */
ulint fold, /* in: folded value of the searched data */
void* data, /* in: pointer to the data */
void* new_data);/* in: new pointer to the data */
/*****************************************************************
Creates a hash table with >= n array cells. The actual number of cells is
chosen to be a prime number slightly bigger than n. */
hash_table_t*
ha_create(
/*======*/
/* out, own: created table */
ibool in_btr_search, /* in: TRUE if the hash table is used in
the btr_search module */
ulint n, /* in: number of array cells */
ulint n_mutexes, /* in: number of mutexes to protect the
hash table: must be a power of 2 */
ulint mutex_level); /* in: level of the mutexes in the latching
order: this is used in the debug version */
/*****************************************************************
Inserts an entry into a hash table. If an entry with the same fold number
is found, its node is updated to point to the new data, and no new node
is inserted. */
ibool
ha_insert_for_fold(
/*===============*/
/* out: TRUE if succeed, FALSE if no more
memory could be allocated */
hash_table_t* table, /* in: hash table */
ulint fold, /* in: folded value of data; if a node with
the same fold value already exists, it is
updated to point to the same data, and no new
node is created! */
void* data); /* in: data, must not be NULL */
/*****************************************************************
Reserves the necessary hash table mutex and inserts an entry into the hash
table. */
UNIV_INLINE
ibool
ha_insert_for_fold_mutex(
/*=====================*/
/* out: TRUE if succeed, FALSE if no more
memory could be allocated */
hash_table_t* table, /* in: hash table */
ulint fold, /* in: folded value of data; if a node with
the same fold value already exists, it is
updated to point to the same data, and no new
node is created! */
void* data); /* in: data, must not be NULL */
/*****************************************************************
Deletes an entry from a hash table. */
void
ha_delete(
/*======*/
hash_table_t* table, /* in: hash table */
ulint fold, /* in: folded value of data */
void* data); /* in: data, must not be NULL and must exist
in the hash table */
/*************************************************************
Looks for an element when we know the pointer to the data and deletes
it from the hash table if found. */
UNIV_INLINE
ibool
ha_search_and_delete_if_found(
/*==========================*/
/* out: TRUE if found */
hash_table_t* table, /* in: hash table */
ulint fold, /* in: folded value of the searched data */
void* data); /* in: pointer to the data */
/*********************************************************************
Removes from the chain determined by fold all nodes whose data pointer
points to the page given. */
void
ha_remove_all_nodes_to_page(
/*========================*/
hash_table_t* table, /* in: hash table */
ulint fold, /* in: fold value */
page_t* page); /* in: buffer page */
/*****************************************************************
Validates a hash table. */
ibool
ha_validate(
/*========*/
/* out: TRUE if ok */
hash_table_t* table); /* in: hash table */
/*****************************************************************
Prints info of a hash table. */
void
ha_print_info(
/*==========*/
FILE* file, /* in: file where to print */
hash_table_t* table); /* in: hash table */
/* The hash table external chain node */
typedef struct ha_node_struct ha_node_t;
struct ha_node_struct {
ha_node_t* next; /* next chain node or NULL if none */
void* data; /* pointer to the data */
ulint fold; /* fold value for the data */
};
#ifndef UNIV_NONINL
#include "ha0ha.ic"
#endif
#endif

220
include/ha0ha.ic Normal file
View file

@ -0,0 +1,220 @@
/************************************************************************
The hash table with external chains
(c) 1994-1997 Innobase Oy
Created 8/18/1994 Heikki Tuuri
*************************************************************************/
#include "ut0rnd.h"
#include "mem0mem.h"
/***************************************************************
Deletes a hash node. */
void
ha_delete_hash_node(
/*================*/
hash_table_t* table, /* in: hash table */
ha_node_t* del_node); /* in: node to be deleted */
/**********************************************************************
Gets a hash node data. */
UNIV_INLINE
void*
ha_node_get_data(
/*=============*/
/* out: pointer to the data */
ha_node_t* node) /* in: hash chain node */
{
return(node->data);
}
/**********************************************************************
Sets hash node data. */
UNIV_INLINE
void
ha_node_set_data(
/*=============*/
ha_node_t* node, /* in: hash chain node */
void* data) /* in: pointer to the data */
{
node->data = data;
}
/**********************************************************************
Gets the next node in a hash chain. */
UNIV_INLINE
ha_node_t*
ha_chain_get_next(
/*==============*/
/* out: next node, NULL if none */
ha_node_t* node) /* in: hash chain node */
{
return(node->next);
}
/**********************************************************************
Gets the first node in a hash chain. */
UNIV_INLINE
ha_node_t*
ha_chain_get_first(
/*===============*/
/* out: first node, NULL if none */
hash_table_t* table, /* in: hash table */
ulint fold) /* in: fold value determining the chain */
{
return(hash_get_nth_cell(table, hash_calc_hash(fold, table))->node);
}
/*****************************************************************
Looks for an element in a hash table. */
UNIV_INLINE
ha_node_t*
ha_search(
/*======*/
/* out: pointer to the first hash table node
in chain having the fold number, NULL if not
found */
hash_table_t* table, /* in: hash table */
ulint fold) /* in: folded value of the searched data */
{
ha_node_t* node;
#ifdef UNIV_SYNC_DEBUG
ut_ad(!table->mutexes || mutex_own(hash_get_mutex(table, fold)));
#endif /* UNIV_SYNC_DEBUG */
node = ha_chain_get_first(table, fold);
while (node) {
if (node->fold == fold) {
return(node);
}
node = ha_chain_get_next(node);
}
return(NULL);
}
/*****************************************************************
Looks for an element in a hash table. */
UNIV_INLINE
void*
ha_search_and_get_data(
/*===================*/
/* out: pointer to the data of the first hash
table node in chain having the fold number,
NULL if not found */
hash_table_t* table, /* in: hash table */
ulint fold) /* in: folded value of the searched data */
{
ha_node_t* node;
#ifdef UNIV_SYNC_DEBUG
ut_ad(!table->mutexes || mutex_own(hash_get_mutex(table, fold)));
#endif /* UNIV_SYNC_DEBUG */
node = ha_chain_get_first(table, fold);
while (node) {
if (node->fold == fold) {
return(node->data);
}
node = ha_chain_get_next(node);
}
return(NULL);
}
/*************************************************************
Looks for an element when we know the pointer to the data. */
UNIV_INLINE
ha_node_t*
ha_search_with_data(
/*================*/
/* out: pointer to the hash table node, NULL
if not found in the table */
hash_table_t* table, /* in: hash table */
ulint fold, /* in: folded value of the searched data */
void* data) /* in: pointer to the data */
{
ha_node_t* node;
#ifdef UNIV_SYNC_DEBUG
ut_ad(!table->mutexes || mutex_own(hash_get_mutex(table, fold)));
#endif /* UNIV_SYNC_DEBUG */
node = ha_chain_get_first(table, fold);
while (node) {
if (node->data == data) {
return(node);
}
node = ha_chain_get_next(node);
}
return(NULL);
}
/*************************************************************
Looks for an element when we know the pointer to the data, and deletes
it from the hash table, if found. */
UNIV_INLINE
ibool
ha_search_and_delete_if_found(
/*==========================*/
/* out: TRUE if found */
hash_table_t* table, /* in: hash table */
ulint fold, /* in: folded value of the searched data */
void* data) /* in: pointer to the data */
{
ha_node_t* node;
#ifdef UNIV_SYNC_DEBUG
ut_ad(!table->mutexes || mutex_own(hash_get_mutex(table, fold)));
#endif /* UNIV_SYNC_DEBUG */
node = ha_search_with_data(table, fold, data);
if (node) {
ha_delete_hash_node(table, node);
return(TRUE);
}
return(FALSE);
}
/*****************************************************************
Reserves the necessary hash table mutex and inserts an entry into the hash
table. */
UNIV_INLINE
ibool
ha_insert_for_fold_mutex(
/*=====================*/
/* out: TRUE if succeed, FALSE if no more
memory could be allocated */
hash_table_t* table, /* in: hash table */
ulint fold, /* in: folded value of data; if a node with
the same fold value already exists, it is
updated to point to the same data, and no new
node is created! */
void* data) /* in: data, must not be NULL */
{
ibool ret;
hash_mutex_enter(table, fold);
ret = ha_insert_for_fold(table, fold, data);
hash_mutex_exit(table, fold);
return(ret);
}

330
include/hash0hash.h Normal file
View file

@ -0,0 +1,330 @@
/******************************************************
The simple hash table utility
(c) 1997 Innobase Oy
Created 5/20/1997 Heikki Tuuri
*******************************************************/
#ifndef hash0hash_h
#define hash0hash_h
#include "univ.i"
#include "mem0mem.h"
#include "sync0sync.h"
typedef struct hash_table_struct hash_table_t;
typedef struct hash_cell_struct hash_cell_t;
typedef void* hash_node_t;
/*****************************************************************
Creates a hash table with >= n array cells. The actual number
of cells is chosen to be a prime number slightly bigger than n. */
hash_table_t*
hash_create(
/*========*/
/* out, own: created table */
ulint n); /* in: number of array cells */
/*****************************************************************
Creates a mutex array to protect a hash table. */
void
hash_create_mutexes(
/*================*/
hash_table_t* table, /* in: hash table */
ulint n_mutexes, /* in: number of mutexes */
ulint sync_level); /* in: latching order level of the
mutexes: used in the debug version */
/*****************************************************************
Frees a hash table. */
void
hash_table_free(
/*============*/
hash_table_t* table); /* in, own: hash table */
/******************************************************************
Calculates the hash value from a folded value. */
UNIV_INLINE
ulint
hash_calc_hash(
/*===========*/
/* out: hashed value */
ulint fold, /* in: folded value */
hash_table_t* table); /* in: hash table */
/************************************************************************
Assert that the mutex for the table in a hash operation is owned. */
#ifdef UNIV_SYNC_DEBUG
# define HASH_ASSERT_OWNED(TABLE, FOLD) \
ut_ad(!(TABLE)->mutexes || mutex_own(hash_get_mutex(TABLE, FOLD)));
#else
# define HASH_ASSERT_OWNED(TABLE, FOLD)
#endif
/***********************************************************************
Inserts a struct to a hash table. */
#define HASH_INSERT(TYPE, NAME, TABLE, FOLD, DATA)\
do {\
hash_cell_t* cell3333;\
TYPE* struct3333;\
\
HASH_ASSERT_OWNED(TABLE, FOLD)\
\
(DATA)->NAME = NULL;\
\
cell3333 = hash_get_nth_cell(TABLE, hash_calc_hash(FOLD, TABLE));\
\
if (cell3333->node == NULL) {\
cell3333->node = DATA;\
} else {\
struct3333 = cell3333->node;\
\
while (struct3333->NAME != NULL) {\
\
struct3333 = struct3333->NAME;\
}\
\
struct3333->NAME = DATA;\
}\
} while (0)
/***********************************************************************
Deletes a struct from a hash table. */
#define HASH_DELETE(TYPE, NAME, TABLE, FOLD, DATA)\
do {\
hash_cell_t* cell3333;\
TYPE* struct3333;\
\
HASH_ASSERT_OWNED(TABLE, FOLD)\
\
cell3333 = hash_get_nth_cell(TABLE, hash_calc_hash(FOLD, TABLE));\
\
if (cell3333->node == DATA) {\
cell3333->node = DATA->NAME;\
} else {\
struct3333 = cell3333->node;\
\
while (struct3333->NAME != DATA) {\
\
ut_a(struct3333);\
struct3333 = struct3333->NAME;\
}\
\
struct3333->NAME = DATA->NAME;\
}\
} while (0)
/***********************************************************************
Gets the first struct in a hash chain, NULL if none. */
#define HASH_GET_FIRST(TABLE, HASH_VAL)\
(hash_get_nth_cell(TABLE, HASH_VAL)->node)
/***********************************************************************
Gets the next struct in a hash chain, NULL if none. */
#define HASH_GET_NEXT(NAME, DATA) ((DATA)->NAME)
/************************************************************************
Looks for a struct in a hash table. */
#define HASH_SEARCH(NAME, TABLE, FOLD, DATA, TEST)\
{\
\
HASH_ASSERT_OWNED(TABLE, FOLD)\
\
(DATA) = HASH_GET_FIRST(TABLE, hash_calc_hash(FOLD, TABLE));\
\
while ((DATA) != NULL) {\
if (TEST) {\
break;\
} else {\
(DATA) = HASH_GET_NEXT(NAME, DATA);\
}\
}\
}
/****************************************************************
Gets the nth cell in a hash table. */
UNIV_INLINE
hash_cell_t*
hash_get_nth_cell(
/*==============*/
/* out: pointer to cell */
hash_table_t* table, /* in: hash table */
ulint n); /* in: cell index */
/*****************************************************************
Returns the number of cells in a hash table. */
UNIV_INLINE
ulint
hash_get_n_cells(
/*=============*/
/* out: number of cells */
hash_table_t* table); /* in: table */
/***********************************************************************
Deletes a struct which is stored in the heap of the hash table, and compacts
the heap. The fold value must be stored in the struct NODE in a field named
'fold'. */
#define HASH_DELETE_AND_COMPACT(TYPE, NAME, TABLE, NODE)\
do {\
TYPE* node111;\
TYPE* top_node111;\
hash_cell_t* cell111;\
ulint fold111;\
\
fold111 = (NODE)->fold;\
\
HASH_DELETE(TYPE, NAME, TABLE, fold111, NODE);\
\
top_node111 = (TYPE*)mem_heap_get_top(\
hash_get_heap(TABLE, fold111),\
sizeof(TYPE));\
\
/* If the node to remove is not the top node in the heap, compact the\
heap of nodes by moving the top node in the place of NODE. */\
\
if (NODE != top_node111) {\
\
/* Copy the top node in place of NODE */\
\
*(NODE) = *top_node111;\
\
cell111 = hash_get_nth_cell(TABLE,\
hash_calc_hash(top_node111->fold, TABLE));\
\
/* Look for the pointer to the top node, to update it */\
\
if (cell111->node == top_node111) {\
/* The top node is the first in the chain */\
\
cell111->node = NODE;\
} else {\
/* We have to look for the predecessor of the top\
node */\
node111 = cell111->node;\
\
while (top_node111 != HASH_GET_NEXT(NAME, node111)) {\
\
node111 = HASH_GET_NEXT(NAME, node111);\
}\
\
/* Now we have the predecessor node */\
\
node111->NAME = NODE;\
}\
}\
\
/* Free the space occupied by the top node */\
\
mem_heap_free_top(hash_get_heap(TABLE, fold111), sizeof(TYPE));\
} while (0)
/****************************************************************
Gets the mutex index for a fold value in a hash table. */
UNIV_INLINE
ulint
hash_get_mutex_no(
/*==============*/
/* out: mutex number */
hash_table_t* table, /* in: hash table */
ulint fold); /* in: fold */
/****************************************************************
Gets the nth heap in a hash table. */
UNIV_INLINE
mem_heap_t*
hash_get_nth_heap(
/*==============*/
/* out: mem heap */
hash_table_t* table, /* in: hash table */
ulint i); /* in: index of the heap */
/****************************************************************
Gets the heap for a fold value in a hash table. */
UNIV_INLINE
mem_heap_t*
hash_get_heap(
/*==========*/
/* out: mem heap */
hash_table_t* table, /* in: hash table */
ulint fold); /* in: fold */
/****************************************************************
Gets the nth mutex in a hash table. */
UNIV_INLINE
mutex_t*
hash_get_nth_mutex(
/*===============*/
/* out: mutex */
hash_table_t* table, /* in: hash table */
ulint i); /* in: index of the mutex */
/****************************************************************
Gets the mutex for a fold value in a hash table. */
UNIV_INLINE
mutex_t*
hash_get_mutex(
/*===========*/
/* out: mutex */
hash_table_t* table, /* in: hash table */
ulint fold); /* in: fold */
/****************************************************************
Reserves the mutex for a fold value in a hash table. */
void
hash_mutex_enter(
/*=============*/
hash_table_t* table, /* in: hash table */
ulint fold); /* in: fold */
/****************************************************************
Releases the mutex for a fold value in a hash table. */
void
hash_mutex_exit(
/*============*/
hash_table_t* table, /* in: hash table */
ulint fold); /* in: fold */
/****************************************************************
Reserves all the mutexes of a hash table, in an ascending order. */
void
hash_mutex_enter_all(
/*=================*/
hash_table_t* table); /* in: hash table */
/****************************************************************
Releases all the mutexes of a hash table. */
void
hash_mutex_exit_all(
/*================*/
hash_table_t* table); /* in: hash table */
struct hash_cell_struct{
void* node; /* hash chain node, NULL if none */
};
/* The hash table structure */
struct hash_table_struct {
ibool adaptive;/* TRUE if this is the hash table of the
adaptive hash index */
ulint n_cells;/* number of cells in the hash table */
hash_cell_t* array; /* pointer to cell array */
ulint n_mutexes;/* if mutexes != NULL, then the number of
mutexes, must be a power of 2 */
mutex_t* mutexes;/* NULL, or an array of mutexes used to
protect segments of the hash table */
mem_heap_t** heaps; /* if this is non-NULL, hash chain nodes for
external chaining can be allocated from these
memory heaps; there are then n_mutexes many of
these heaps */
mem_heap_t* heap;
ulint magic_n;
};
#define HASH_TABLE_MAGIC_N 76561114
#ifndef UNIV_NONINL
#include "hash0hash.ic"
#endif
#endif

130
include/hash0hash.ic Normal file
View file

@ -0,0 +1,130 @@
/******************************************************
The simple hash table utility
(c) 1997 Innobase Oy
Created 5/20/1997 Heikki Tuuri
*******************************************************/
#include "ut0rnd.h"
/****************************************************************
Gets the nth cell in a hash table. */
UNIV_INLINE
hash_cell_t*
hash_get_nth_cell(
/*==============*/
/* out: pointer to cell */
hash_table_t* table, /* in: hash table */
ulint n) /* in: cell index */
{
ut_ad(n < table->n_cells);
return(table->array + n);
}
/*****************************************************************
Returns the number of cells in a hash table. */
UNIV_INLINE
ulint
hash_get_n_cells(
/*=============*/
/* out: number of cells */
hash_table_t* table) /* in: table */
{
return(table->n_cells);
}
/******************************************************************
Calculates the hash value from a folded value. */
UNIV_INLINE
ulint
hash_calc_hash(
/*===========*/
/* out: hashed value */
ulint fold, /* in: folded value */
hash_table_t* table) /* in: hash table */
{
return(ut_hash_ulint(fold, table->n_cells));
}
/****************************************************************
Gets the mutex index for a fold value in a hash table. */
UNIV_INLINE
ulint
hash_get_mutex_no(
/*==============*/
/* out: mutex number */
hash_table_t* table, /* in: hash table */
ulint fold) /* in: fold */
{
return(ut_2pow_remainder(fold, table->n_mutexes));
}
/****************************************************************
Gets the nth heap in a hash table. */
UNIV_INLINE
mem_heap_t*
hash_get_nth_heap(
/*==============*/
/* out: mem heap */
hash_table_t* table, /* in: hash table */
ulint i) /* in: index of the heap */
{
ut_ad(i < table->n_mutexes);
return(table->heaps[i]);
}
/****************************************************************
Gets the heap for a fold value in a hash table. */
UNIV_INLINE
mem_heap_t*
hash_get_heap(
/*==========*/
/* out: mem heap */
hash_table_t* table, /* in: hash table */
ulint fold) /* in: fold */
{
ulint i;
if (table->heap) {
return(table->heap);
}
i = hash_get_mutex_no(table, fold);
return(hash_get_nth_heap(table, i));
}
/****************************************************************
Gets the nth mutex in a hash table. */
UNIV_INLINE
mutex_t*
hash_get_nth_mutex(
/*===============*/
/* out: mutex */
hash_table_t* table, /* in: hash table */
ulint i) /* in: index of the mutex */
{
ut_ad(i < table->n_mutexes);
return(table->mutexes + i);
}
/****************************************************************
Gets the mutex for a fold value in a hash table. */
UNIV_INLINE
mutex_t*
hash_get_mutex(
/*===========*/
/* out: mutex */
hash_table_t* table, /* in: hash table */
ulint fold) /* in: fold */
{
ulint i;
i = hash_get_mutex_no(table, fold);
return(hash_get_nth_mutex(table, i));
}

307
include/ibuf0ibuf.h Normal file
View file

@ -0,0 +1,307 @@
/******************************************************
Insert buffer
(c) 1997 Innobase Oy
Created 7/19/1997 Heikki Tuuri
*******************************************************/
#ifndef ibuf0ibuf_h
#define ibuf0ibuf_h
#include "univ.i"
#include "dict0mem.h"
#include "dict0dict.h"
#include "mtr0mtr.h"
#include "que0types.h"
#include "ibuf0types.h"
#include "fsp0fsp.h"
extern ibuf_t* ibuf;
/**********************************************************************
Creates the insert buffer data struct for a single tablespace. Reads the
root page of the insert buffer tree in the tablespace. This function can
be called only after the dictionary system has been initialized, as this
creates also the insert buffer table and index for this tablespace. */
ibuf_data_t*
ibuf_data_init_for_space(
/*=====================*/
/* out, own: ibuf data struct, linked to the list
in ibuf control structure. */
ulint space); /* in: space id */
/**********************************************************************
Creates the insert buffer data structure at a database startup and
initializes the data structures for the insert buffer of each tablespace. */
void
ibuf_init_at_db_start(void);
/*=======================*/
/*************************************************************************
Reads the biggest tablespace id from the high end of the insert buffer
tree and updates the counter in fil_system. */
void
ibuf_update_max_tablespace_id(void);
/*===============================*/
/*************************************************************************
Initializes an ibuf bitmap page. */
void
ibuf_bitmap_page_init(
/*==================*/
page_t* page, /* in: bitmap page */
mtr_t* mtr); /* in: mtr */
/****************************************************************************
Resets the free bits of the page in the ibuf bitmap. This is done in a
separate mini-transaction, hence this operation does not restrict further
work to only ibuf bitmap operations, which would result if the latch to the
bitmap page were kept. */
void
ibuf_reset_free_bits_with_type(
/*===========================*/
ulint type, /* in: index type */
page_t* page); /* in: index page; free bits are set to 0 if the index
is non-clustered and non-unique and the page level is
0 */
/****************************************************************************
Resets the free bits of the page in the ibuf bitmap. This is done in a
separate mini-transaction, hence this operation does not restrict further
work to solely ibuf bitmap operations, which would result if the latch to
the bitmap page were kept. */
void
ibuf_reset_free_bits(
/*=================*/
dict_index_t* index, /* in: index */
page_t* page); /* in: index page; free bits are set to 0 if
the index is non-clustered and non-unique and
the page level is 0 */
/****************************************************************************
Updates the free bits of the page in the ibuf bitmap if there is not enough
free on the page any more. This is done in a separate mini-transaction, hence
this operation does not restrict further work to only ibuf bitmap operations,
which would result if the latch to the bitmap page were kept. */
UNIV_INLINE
void
ibuf_update_free_bits_if_full(
/*==========================*/
dict_index_t* index, /* in: index */
page_t* page, /* in: index page to which we have added new
records; the free bits are updated if the
index is non-clustered and non-unique and
the page level is 0, and the page becomes
fuller */
ulint max_ins_size,/* in: value of maximum insert size with
reorganize before the latest operation
performed to the page */
ulint increase);/* in: upper limit for the additional space
used in the latest operation, if known, or
ULINT_UNDEFINED */
/**************************************************************************
Updates the free bits for the page to reflect the present state. Does this
in the mtr given, which means that the latching order rules virtually
prevent any further operations for this OS thread until mtr is committed. */
void
ibuf_update_free_bits_low(
/*======================*/
dict_index_t* index, /* in: index */
page_t* page, /* in: index page */
ulint max_ins_size, /* in: value of maximum insert size
with reorganize before the latest
operation performed to the page */
mtr_t* mtr); /* in: mtr */
/**************************************************************************
Updates the free bits for the two pages to reflect the present state. Does
this in the mtr given, which means that the latching order rules virtually
prevent any further operations until mtr is committed. */
void
ibuf_update_free_bits_for_two_pages_low(
/*====================================*/
dict_index_t* index, /* in: index */
page_t* page1, /* in: index page */
page_t* page2, /* in: index page */
mtr_t* mtr); /* in: mtr */
/**************************************************************************
A basic partial test if an insert to the insert buffer could be possible and
recommended. */
UNIV_INLINE
ibool
ibuf_should_try(
/*============*/
dict_index_t* index, /* in: index where to insert */
ulint ignore_sec_unique); /* in: if != 0, we should
ignore UNIQUE constraint on
a secondary index when we
decide */
/**********************************************************************
Returns TRUE if the current OS thread is performing an insert buffer
routine. */
ibool
ibuf_inside(void);
/*=============*/
/* out: TRUE if inside an insert buffer routine: for instance,
a read-ahead of non-ibuf pages is then forbidden */
/***************************************************************************
Checks if a page address is an ibuf bitmap page (level 3 page) address. */
UNIV_INLINE
ibool
ibuf_bitmap_page(
/*=============*/
/* out: TRUE if a bitmap page */
ulint page_no);/* in: page number */
/***************************************************************************
Checks if a page is a level 2 or 3 page in the ibuf hierarchy of pages. */
ibool
ibuf_page(
/*======*/
/* out: TRUE if level 2 or level 3 page */
ulint space, /* in: space id */
ulint page_no);/* in: page number */
/***************************************************************************
Checks if a page is a level 2 or 3 page in the ibuf hierarchy of pages. */
ibool
ibuf_page_low(
/*==========*/
/* out: TRUE if level 2 or level 3 page */
ulint space, /* in: space id */
ulint page_no,/* in: page number */
mtr_t* mtr); /* in: mtr which will contain an x-latch to the
bitmap page if the page is not one of the fixed
address ibuf pages */
/***************************************************************************
Frees excess pages from the ibuf free list. This function is called when an OS
thread calls fsp services to allocate a new file segment, or a new page to a
file segment, and the thread did not own the fsp latch before this call. */
void
ibuf_free_excess_pages(
/*===================*/
ulint space); /* in: space id */
/*************************************************************************
Makes an index insert to the insert buffer, instead of directly to the disk
page, if this is possible. Does not do insert if the index is clustered
or unique. */
ibool
ibuf_insert(
/*========*/
/* out: TRUE if success */
dtuple_t* entry, /* in: index entry to insert */
dict_index_t* index, /* in: index where to insert */
ulint space, /* in: space id where to insert */
ulint page_no,/* in: page number where to insert */
que_thr_t* thr); /* in: query thread */
/*************************************************************************
When an index page is read from a disk to the buffer pool, this function
inserts to the page the possible index entries buffered in the insert buffer.
The entries are deleted from the insert buffer. If the page is not read, but
created in the buffer pool, this function deletes its buffered entries from
the insert buffer; there can exist entries for such a page if the page
belonged to an index which subsequently was dropped. */
void
ibuf_merge_or_delete_for_page(
/*==========================*/
page_t* page, /* in: if page has been read from disk, pointer to
the page x-latched, else NULL */
ulint space, /* in: space id of the index page */
ulint page_no,/* in: page number of the index page */
ibool update_ibuf_bitmap);/* in: normally this is set to TRUE, but if
we have deleted or are deleting the tablespace, then we
naturally do not want to update a non-existent bitmap
page */
/*************************************************************************
Deletes all entries in the insert buffer for a given space id. This is used
in DISCARD TABLESPACE and IMPORT TABLESPACE.
NOTE: this does not update the page free bitmaps in the space. The space will
become CORRUPT when you call this function! */
void
ibuf_delete_for_discarded_space(
/*============================*/
ulint space); /* in: space id */
/*************************************************************************
Contracts insert buffer trees by reading pages to the buffer pool. */
ulint
ibuf_contract(
/*==========*/
/* out: a lower limit for the combined size in bytes
of entries which will be merged from ibuf trees to the
pages read, 0 if ibuf is empty */
ibool sync); /* in: TRUE if the caller wants to wait for the
issued read with the highest tablespace address
to complete */
/*************************************************************************
Contracts insert buffer trees by reading pages to the buffer pool. */
ulint
ibuf_contract_for_n_pages(
/*======================*/
/* out: a lower limit for the combined size in bytes
of entries which will be merged from ibuf trees to the
pages read, 0 if ibuf is empty */
ibool sync, /* in: TRUE if the caller wants to wait for the
issued read with the highest tablespace address
to complete */
ulint n_pages);/* in: try to read at least this many pages to
the buffer pool and merge the ibuf contents to
them */
/*************************************************************************
Parses a redo log record of an ibuf bitmap page init. */
byte*
ibuf_parse_bitmap_init(
/*===================*/
/* out: end of log record or NULL */
byte* ptr, /* in: buffer */
byte* end_ptr,/* in: buffer end */
page_t* page, /* in: page or NULL */
mtr_t* mtr); /* in: mtr or NULL */
/**********************************************************************
Gets the ibuf count for a given page. */
ulint
ibuf_count_get(
/*===========*/
/* out: number of entries in the insert buffer
currently buffered for this page */
ulint space, /* in: space id */
ulint page_no);/* in: page number */
/**********************************************************************
Looks if the insert buffer is empty. */
ibool
ibuf_is_empty(void);
/*===============*/
/* out: TRUE if empty */
/**********************************************************************
Prints info of ibuf. */
void
ibuf_print(
/*=======*/
FILE* file); /* in: file where to print */
#define IBUF_HEADER_PAGE_NO FSP_IBUF_HEADER_PAGE_NO
#define IBUF_TREE_ROOT_PAGE_NO FSP_IBUF_TREE_ROOT_PAGE_NO
/* The ibuf header page currently contains only the file segment header
for the file segment from which the pages for the ibuf tree are allocated */
#define IBUF_HEADER PAGE_DATA
#define IBUF_TREE_SEG_HEADER 0 /* fseg header for ibuf tree */
#ifndef UNIV_NONINL
#include "ibuf0ibuf.ic"
#endif
#endif

232
include/ibuf0ibuf.ic Normal file
View file

@ -0,0 +1,232 @@
/******************************************************
Insert buffer
(c) 1997 Innobase Oy
Created 7/19/1997 Heikki Tuuri
*******************************************************/
#include "buf0lru.h"
#include "page0page.h"
extern ulint ibuf_flush_count;
/* If this number is n, an index page must contain at least the page size
per n bytes of free space for ibuf to try to buffer inserts to this page.
If there is this much of free space, the corresponding bits are set in the
ibuf bitmap. */
#define IBUF_PAGE_SIZE_PER_FREE_SPACE 32
/* Insert buffer data struct for a single tablespace */
struct ibuf_data_struct{
ulint space; /* space id */
ulint seg_size;/* allocated pages if the file segment
containing ibuf header and tree */
ulint size; /* size of the insert buffer tree in pages */
ibool empty; /* after an insert to the ibuf tree is
performed, this is set to FALSE, and if a
contract operation finds the tree empty, this
is set to TRUE */
ulint free_list_len;
/* length of the free list */
ulint height; /* tree height */
dict_index_t* index; /* insert buffer index */
UT_LIST_NODE_T(ibuf_data_t) data_list;
/* list of ibuf data structs */
ulint n_inserts;/* number of inserts made to the insert
buffer */
ulint n_merges;/* number of pages merged */
ulint n_merged_recs;/* number of records merged */
};
/* If the ibuf meter exceeds this value, then the suitable inserts are made to
the insert buffer instead of directly to the disk page */
#define IBUF_THRESHOLD 50
struct ibuf_struct{
ulint size; /* current size of the ibuf index
trees in pages */
ulint max_size; /* recommended maximum size in pages
for the ibuf index tree */
ulint meter; /* heuristic meter which measures
desirability of doing inserts to the
insert buffer instead of directly to
the disk page */
UT_LIST_BASE_NODE_T(ibuf_data_t) data_list;
/* list of ibuf data structs for
each tablespace */
};
/****************************************************************************
Sets the free bit of the page in the ibuf bitmap. This is done in a separate
mini-transaction, hence this operation does not restrict further work to only
ibuf bitmap operations, which would result if the latch to the bitmap page
were kept. */
void
ibuf_set_free_bits(
/*===============*/
ulint type, /* in: index type */
page_t* page, /* in: index page; free bit is reset if the index is
a non-clustered non-unique, and page level is 0 */
ulint val, /* in: value to set: < 4 */
ulint max_val);/* in: ULINT_UNDEFINED or a maximum value which
the bits must have before setting; this is for
debugging */
/**************************************************************************
A basic partial test if an insert to the insert buffer could be possible and
recommended. */
UNIV_INLINE
ibool
ibuf_should_try(
/*============*/
dict_index_t* index, /* in: index where to insert */
ulint ignore_sec_unique) /* in: if != 0, we should
ignore UNIQUE constraint on
a secondary index when we
decide */
{
if (!(index->type & DICT_CLUSTERED)
&& (ignore_sec_unique || !(index->type & DICT_UNIQUE))
&& ibuf->meter > IBUF_THRESHOLD) {
ibuf_flush_count++;
if (ibuf_flush_count % 8 == 0) {
buf_LRU_try_free_flushed_blocks();
}
return(TRUE);
}
return(FALSE);
}
/***************************************************************************
Checks if a page address is an ibuf bitmap page address. */
UNIV_INLINE
ibool
ibuf_bitmap_page(
/*=============*/
/* out: TRUE if a bitmap page */
ulint page_no)/* in: page number */
{
if (page_no % XDES_DESCRIBED_PER_PAGE == FSP_IBUF_BITMAP_OFFSET) {
return(TRUE);
}
return(FALSE);
}
/*************************************************************************
Translates the free space on a page to a value in the ibuf bitmap.*/
UNIV_INLINE
ulint
ibuf_index_page_calc_free_bits(
/*===========================*/
/* out: value for ibuf bitmap bits */
ulint max_ins_size) /* in: maximum insert size after reorganize
for the page */
{
ulint n;
n = max_ins_size / (UNIV_PAGE_SIZE / IBUF_PAGE_SIZE_PER_FREE_SPACE);
if (n == 3) {
n = 2;
}
if (n > 3) {
n = 3;
}
return(n);
}
/*************************************************************************
Translates the ibuf free bits to the free space on a page in bytes. */
UNIV_INLINE
ulint
ibuf_index_page_calc_free_from_bits(
/*================================*/
/* out: maximum insert size after reorganize for the
page */
ulint bits) /* in: value for ibuf bitmap bits */
{
ut_ad(bits < 4);
if (bits == 3) {
return(4 * UNIV_PAGE_SIZE / IBUF_PAGE_SIZE_PER_FREE_SPACE);
}
return(bits * UNIV_PAGE_SIZE / IBUF_PAGE_SIZE_PER_FREE_SPACE);
}
/*************************************************************************
Translates the free space on a page to a value in the ibuf bitmap.*/
UNIV_INLINE
ulint
ibuf_index_page_calc_free(
/*======================*/
/* out: value for ibuf bitmap bits */
page_t* page) /* in: non-unique secondary index page */
{
return(ibuf_index_page_calc_free_bits(
page_get_max_insert_size_after_reorganize(page, 1)));
}
/****************************************************************************
Updates the free bits of the page in the ibuf bitmap if there is not enough
free on the page any more. This is done in a separate mini-transaction, hence
this operation does not restrict further work to only ibuf bitmap operations,
which would result if the latch to the bitmap page were kept. */
UNIV_INLINE
void
ibuf_update_free_bits_if_full(
/*==========================*/
dict_index_t* index, /* in: index */
page_t* page, /* in: index page to which we have added new
records; the free bits are updated if the
index is non-clustered and non-unique and
the page level is 0, and the page becomes
fuller */
ulint max_ins_size,/* in: value of maximum insert size with
reorganize before the latest operation
performed to the page */
ulint increase)/* in: upper limit for the additional space
used in the latest operation, if known, or
ULINT_UNDEFINED */
{
ulint before;
ulint after;
before = ibuf_index_page_calc_free_bits(max_ins_size);
if (max_ins_size >= increase) {
ut_ad(ULINT_UNDEFINED > UNIV_PAGE_SIZE);
after = ibuf_index_page_calc_free_bits(max_ins_size
- increase);
#ifdef UNIV_IBUF_DEBUG
ut_a(after <= ibuf_index_page_calc_free(page));
#endif
} else {
after = ibuf_index_page_calc_free(page);
}
if (after == 0) {
/* We move the page to the front of the buffer pool LRU list:
the purpose of this is to prevent those pages to which we
cannot make inserts using the insert buffer from slipping
out of the buffer pool */
buf_page_make_young(page);
}
if (before > after) {
ibuf_set_free_bits(index->type, page, after, before);
}
}

15
include/ibuf0types.h Normal file
View file

@ -0,0 +1,15 @@
/******************************************************
Insert buffer global types
(c) 1997 Innobase Oy
Created 7/29/1997 Heikki Tuuri
*******************************************************/
#ifndef ibuf0types_h
#define ibuf0types_h
typedef struct ibuf_data_struct ibuf_data_t;
typedef struct ibuf_struct ibuf_t;
#endif

677
include/lock0lock.h Normal file
View file

@ -0,0 +1,677 @@
/******************************************************
The transaction lock system
(c) 1996 Innobase Oy
Created 5/7/1996 Heikki Tuuri
*******************************************************/
#ifndef lock0lock_h
#define lock0lock_h
#include "univ.i"
#include "trx0types.h"
#include "rem0types.h"
#include "dict0types.h"
#include "que0types.h"
#include "page0types.h"
#include "lock0types.h"
#include "read0types.h"
#include "hash0hash.h"
#ifdef UNIV_DEBUG
extern ibool lock_print_waits;
#endif /* UNIV_DEBUG */
/* Buffer for storing information about the most recent deadlock error */
extern FILE* lock_latest_err_file;
/*************************************************************************
Gets the size of a lock struct. */
ulint
lock_get_size(void);
/*===============*/
/* out: size in bytes */
/*************************************************************************
Creates the lock system at database start. */
void
lock_sys_create(
/*============*/
ulint n_cells); /* in: number of slots in lock hash table */
/*************************************************************************
Checks if some transaction has an implicit x-lock on a record in a secondary
index. */
trx_t*
lock_sec_rec_some_has_impl_off_kernel(
/*==================================*/
/* out: transaction which has the x-lock, or
NULL */
rec_t* rec, /* in: user record */
dict_index_t* index, /* in: secondary index */
const ulint* offsets);/* in: rec_get_offsets(rec, index) */
/*************************************************************************
Checks if some transaction has an implicit x-lock on a record in a clustered
index. */
UNIV_INLINE
trx_t*
lock_clust_rec_some_has_impl(
/*=========================*/
/* out: transaction which has the x-lock, or
NULL */
rec_t* rec, /* in: user record */
dict_index_t* index, /* in: clustered index */
const ulint* offsets);/* in: rec_get_offsets(rec, index) */
/*****************************************************************
Resets the lock bits for a single record. Releases transactions
waiting for lock requests here. */
void
lock_rec_reset_and_release_wait(
/*============================*/
rec_t* rec); /* in: record whose locks bits should be reset */
/*****************************************************************
Makes a record to inherit the locks of another record as gap type
locks, but does not reset the lock bits of the other record. Also
waiting lock requests on rec are inherited as GRANTED gap locks. */
void
lock_rec_inherit_to_gap(
/*====================*/
rec_t* heir, /* in: record which inherits */
rec_t* rec); /* in: record from which inherited; does NOT reset
the locks on this record */
/*****************************************************************
Updates the lock table when we have reorganized a page. NOTE: we copy
also the locks set on the infimum of the page; the infimum may carry
locks if an update of a record is occurring on the page, and its locks
were temporarily stored on the infimum. */
void
lock_move_reorganize_page(
/*======================*/
page_t* page, /* in: old index page */
page_t* new_page); /* in: reorganized page */
/*****************************************************************
Moves the explicit locks on user records to another page if a record
list end is moved to another page. */
void
lock_move_rec_list_end(
/*===================*/
page_t* new_page, /* in: index page to move to */
page_t* page, /* in: index page */
rec_t* rec); /* in: record on page: this is the
first record moved */
/*****************************************************************
Moves the explicit locks on user records to another page if a record
list start is moved to another page. */
void
lock_move_rec_list_start(
/*=====================*/
page_t* new_page, /* in: index page to move to */
page_t* page, /* in: index page */
rec_t* rec, /* in: record on page: this is the
first record NOT copied */
rec_t* old_end); /* in: old previous-to-last record on
new_page before the records were copied */
/*****************************************************************
Updates the lock table when a page is split to the right. */
void
lock_update_split_right(
/*====================*/
page_t* right_page, /* in: right page */
page_t* left_page); /* in: left page */
/*****************************************************************
Updates the lock table when a page is merged to the right. */
void
lock_update_merge_right(
/*====================*/
rec_t* orig_succ, /* in: original successor of infimum
on the right page before merge */
page_t* left_page); /* in: merged index page which will be
discarded */
/*****************************************************************
Updates the lock table when the root page is copied to another in
btr_root_raise_and_insert. Note that we leave lock structs on the
root page, even though they do not make sense on other than leaf
pages: the reason is that in a pessimistic update the infimum record
of the root page will act as a dummy carrier of the locks of the record
to be updated. */
void
lock_update_root_raise(
/*===================*/
page_t* new_page, /* in: index page to which copied */
page_t* root); /* in: root page */
/*****************************************************************
Updates the lock table when a page is copied to another and the original page
is removed from the chain of leaf pages, except if page is the root! */
void
lock_update_copy_and_discard(
/*=========================*/
page_t* new_page, /* in: index page to which copied */
page_t* page); /* in: index page; NOT the root! */
/*****************************************************************
Updates the lock table when a page is split to the left. */
void
lock_update_split_left(
/*===================*/
page_t* right_page, /* in: right page */
page_t* left_page); /* in: left page */
/*****************************************************************
Updates the lock table when a page is merged to the left. */
void
lock_update_merge_left(
/*===================*/
page_t* left_page, /* in: left page to which merged */
rec_t* orig_pred, /* in: original predecessor of supremum
on the left page before merge */
page_t* right_page); /* in: merged index page which will be
discarded */
/*****************************************************************
Resets the original locks on heir and replaces them with gap type locks
inherited from rec. */
void
lock_rec_reset_and_inherit_gap_locks(
/*=================================*/
rec_t* heir, /* in: heir record */
rec_t* rec); /* in: record */
/*****************************************************************
Updates the lock table when a page is discarded. */
void
lock_update_discard(
/*================*/
rec_t* heir, /* in: record which will inherit the locks */
page_t* page); /* in: index page which will be discarded */
/*****************************************************************
Updates the lock table when a new user record is inserted. */
void
lock_update_insert(
/*===============*/
rec_t* rec); /* in: the inserted record */
/*****************************************************************
Updates the lock table when a record is removed. */
void
lock_update_delete(
/*===============*/
rec_t* rec); /* in: the record to be removed */
/*************************************************************************
Stores on the page infimum record the explicit locks of another record.
This function is used to store the lock state of a record when it is
updated and the size of the record changes in the update. The record
is in such an update moved, perhaps to another page. The infimum record
acts as a dummy carrier record, taking care of lock releases while the
actual record is being moved. */
void
lock_rec_store_on_page_infimum(
/*===========================*/
page_t* page, /* in: page containing the record */
rec_t* rec); /* in: record whose lock state is stored
on the infimum record of the same page; lock
bits are reset on the record */
/*************************************************************************
Restores the state of explicit lock requests on a single record, where the
state was stored on the infimum of the page. */
void
lock_rec_restore_from_page_infimum(
/*===============================*/
rec_t* rec, /* in: record whose lock state is restored */
page_t* page); /* in: page (rec is not necessarily on this page)
whose infimum stored the lock state; lock bits are
reset on the infimum */
/*************************************************************************
Returns TRUE if there are explicit record locks on a page. */
ibool
lock_rec_expl_exist_on_page(
/*========================*/
/* out: TRUE if there are explicit record locks on
the page */
ulint space, /* in: space id */
ulint page_no);/* in: page number */
/*************************************************************************
Checks if locks of other transactions prevent an immediate insert of
a record. If they do, first tests if the query thread should anyway
be suspended for some reason; if not, then puts the transaction and
the query thread to the lock wait state and inserts a waiting request
for a gap x-lock to the lock queue. */
ulint
lock_rec_insert_check_and_lock(
/*===========================*/
/* out: DB_SUCCESS, DB_LOCK_WAIT,
DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
ulint flags, /* in: if BTR_NO_LOCKING_FLAG bit is set,
does nothing */
rec_t* rec, /* in: record after which to insert */
dict_index_t* index, /* in: index */
que_thr_t* thr, /* in: query thread */
ibool* inherit);/* out: set to TRUE if the new inserted
record maybe should inherit LOCK_GAP type
locks from the successor record */
/*************************************************************************
Checks if locks of other transactions prevent an immediate modify (update,
delete mark, or delete unmark) of a clustered index record. If they do,
first tests if the query thread should anyway be suspended for some
reason; if not, then puts the transaction and the query thread to the
lock wait state and inserts a waiting request for a record x-lock to the
lock queue. */
ulint
lock_clust_rec_modify_check_and_lock(
/*=================================*/
/* out: DB_SUCCESS, DB_LOCK_WAIT,
DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
ulint flags, /* in: if BTR_NO_LOCKING_FLAG bit is set,
does nothing */
rec_t* rec, /* in: record which should be modified */
dict_index_t* index, /* in: clustered index */
const ulint* offsets,/* in: rec_get_offsets(rec, index) */
que_thr_t* thr); /* in: query thread */
/*************************************************************************
Checks if locks of other transactions prevent an immediate modify
(delete mark or delete unmark) of a secondary index record. */
ulint
lock_sec_rec_modify_check_and_lock(
/*===============================*/
/* out: DB_SUCCESS, DB_LOCK_WAIT,
DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
ulint flags, /* in: if BTR_NO_LOCKING_FLAG bit is set,
does nothing */
rec_t* rec, /* in: record which should be modified;
NOTE: as this is a secondary index, we
always have to modify the clustered index
record first: see the comment below */
dict_index_t* index, /* in: secondary index */
que_thr_t* thr); /* in: query thread */
/*************************************************************************
Like the counterpart for a clustered index below, but now we read a
secondary index record. */
ulint
lock_sec_rec_read_check_and_lock(
/*=============================*/
/* out: DB_SUCCESS, DB_LOCK_WAIT,
DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
ulint flags, /* in: if BTR_NO_LOCKING_FLAG bit is set,
does nothing */
rec_t* rec, /* in: user record or page supremum record
which should be read or passed over by a read
cursor */
dict_index_t* index, /* in: secondary index */
const ulint* offsets,/* in: rec_get_offsets(rec, index) */
ulint mode, /* in: mode of the lock which the read cursor
should set on records: LOCK_S or LOCK_X; the
latter is possible in SELECT FOR UPDATE */
ulint gap_mode,/* in: LOCK_ORDINARY, LOCK_GAP, or
LOCK_REC_NOT_GAP */
que_thr_t* thr); /* in: query thread */
/*************************************************************************
Checks if locks of other transactions prevent an immediate read, or passing
over by a read cursor, of a clustered index record. If they do, first tests
if the query thread should anyway be suspended for some reason; if not, then
puts the transaction and the query thread to the lock wait state and inserts a
waiting request for a record lock to the lock queue. Sets the requested mode
lock on the record. */
ulint
lock_clust_rec_read_check_and_lock(
/*===============================*/
/* out: DB_SUCCESS, DB_LOCK_WAIT,
DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
ulint flags, /* in: if BTR_NO_LOCKING_FLAG bit is set,
does nothing */
rec_t* rec, /* in: user record or page supremum record
which should be read or passed over by a read
cursor */
dict_index_t* index, /* in: clustered index */
const ulint* offsets,/* in: rec_get_offsets(rec, index) */
ulint mode, /* in: mode of the lock which the read cursor
should set on records: LOCK_S or LOCK_X; the
latter is possible in SELECT FOR UPDATE */
ulint gap_mode,/* in: LOCK_ORDINARY, LOCK_GAP, or
LOCK_REC_NOT_GAP */
que_thr_t* thr); /* in: query thread */
/*************************************************************************
Checks if locks of other transactions prevent an immediate read, or passing
over by a read cursor, of a clustered index record. If they do, first tests
if the query thread should anyway be suspended for some reason; if not, then
puts the transaction and the query thread to the lock wait state and inserts a
waiting request for a record lock to the lock queue. Sets the requested mode
lock on the record. This is an alternative version of
lock_clust_rec_read_check_and_lock() that does not require the parameter
"offsets". */
ulint
lock_clust_rec_read_check_and_lock_alt(
/*===================================*/
/* out: DB_SUCCESS, DB_LOCK_WAIT,
DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
ulint flags, /* in: if BTR_NO_LOCKING_FLAG bit is set,
does nothing */
rec_t* rec, /* in: user record or page supremum record
which should be read or passed over by a read
cursor */
dict_index_t* index, /* in: clustered index */
ulint mode, /* in: mode of the lock which the read cursor
should set on records: LOCK_S or LOCK_X; the
latter is possible in SELECT FOR UPDATE */
ulint gap_mode,/* in: LOCK_ORDINARY, LOCK_GAP, or
LOCK_REC_NOT_GAP */
que_thr_t* thr); /* in: query thread */
/*************************************************************************
Checks that a record is seen in a consistent read. */
ibool
lock_clust_rec_cons_read_sees(
/*==========================*/
/* out: TRUE if sees, or FALSE if an earlier
version of the record should be retrieved */
rec_t* rec, /* in: user record which should be read or
passed over by a read cursor */
dict_index_t* index, /* in: clustered index */
const ulint* offsets,/* in: rec_get_offsets(rec, index) */
read_view_t* view); /* in: consistent read view */
/*************************************************************************
Checks that a non-clustered index record is seen in a consistent read. */
ulint
lock_sec_rec_cons_read_sees(
/*========================*/
/* out: TRUE if certainly sees, or FALSE if an
earlier version of the clustered index record
might be needed: NOTE that a non-clustered
index page contains so little information on
its modifications that also in the case FALSE,
the present version of rec may be the right,
but we must check this from the clustered
index record */
rec_t* rec, /* in: user record which should be read or
passed over by a read cursor */
dict_index_t* index, /* in: non-clustered index */
read_view_t* view); /* in: consistent read view */
/*************************************************************************
Locks the specified database table in the mode given. If the lock cannot
be granted immediately, the query thread is put to wait. */
ulint
lock_table(
/*=======*/
/* out: DB_SUCCESS, DB_LOCK_WAIT,
DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
ulint flags, /* in: if BTR_NO_LOCKING_FLAG bit is set,
does nothing */
dict_table_t* table, /* in: database table in dictionary cache */
ulint mode, /* in: lock mode */
que_thr_t* thr); /* in: query thread */
/*************************************************************************
Checks if there are any locks set on the table. */
ibool
lock_is_on_table(
/*=============*/
/* out: TRUE if there are lock(s) */
dict_table_t* table); /* in: database table in dictionary cache */
/*************************************************************************
Releases a table lock.
Releases possible other transactions waiting for this lock. */
void
lock_table_unlock(
/*==============*/
lock_t* lock); /* in: lock */
/*************************************************************************
Releases an auto-inc lock a transaction possibly has on a table.
Releases possible other transactions waiting for this lock. */
void
lock_table_unlock_auto_inc(
/*=======================*/
trx_t* trx); /* in: transaction */
/*************************************************************************
Releases transaction locks, and releases possible other transactions waiting
because of these locks. */
void
lock_release_off_kernel(
/*====================*/
trx_t* trx); /* in: transaction */
/*************************************************************************
Cancels a waiting lock request and releases possible other transactions
waiting behind it. */
void
lock_cancel_waiting_and_release(
/*============================*/
lock_t* lock); /* in: waiting lock request */
/*************************************************************************
Resets all locks, both table and record locks, on a table to be dropped.
No lock is allowed to be a wait lock. */
void
lock_reset_all_on_table(
/*====================*/
dict_table_t* table); /* in: table to be dropped */
/*************************************************************************
Calculates the fold value of a page file address: used in inserting or
searching for a lock in the hash table. */
UNIV_INLINE
ulint
lock_rec_fold(
/*===========*/
/* out: folded value */
ulint space, /* in: space */
ulint page_no);/* in: page number */
/*************************************************************************
Calculates the hash value of a page file address: used in inserting or
searching for a lock in the hash table. */
UNIV_INLINE
ulint
lock_rec_hash(
/*==========*/
/* out: hashed value */
ulint space, /* in: space */
ulint page_no);/* in: page number */
/*************************************************************************
Gets the source table of an ALTER TABLE transaction. The table must be
covered by an IX or IS table lock. */
dict_table_t*
lock_get_src_table(
/*===============*/
/* out: the source table of transaction,
if it is covered by an IX or IS table lock;
dest if there is no source table, and
NULL if the transaction is locking more than
two tables or an inconsistency is found */
trx_t* trx, /* in: transaction */
dict_table_t* dest, /* in: destination of ALTER TABLE */
ulint* mode); /* out: lock mode of the source table */
/*************************************************************************
Determine if the given table is exclusively "owned" by the given
transaction, i.e., transaction holds LOCK_IX and possibly LOCK_AUTO_INC
on the table. */
ibool
lock_is_table_exclusive(
/*====================*/
/* out: TRUE if table is only locked by trx,
with LOCK_IX, and possibly LOCK_AUTO_INC */
dict_table_t* table, /* in: table */
trx_t* trx); /* in: transaction */
/*************************************************************************
Checks that a transaction id is sensible, i.e., not in the future. */
ibool
lock_check_trx_id_sanity(
/*=====================*/
/* out: TRUE if ok */
dulint trx_id, /* in: trx id */
rec_t* rec, /* in: user record */
dict_index_t* index, /* in: clustered index */
const ulint* offsets, /* in: rec_get_offsets(rec, index) */
ibool has_kernel_mutex);/* in: TRUE if the caller owns the
kernel mutex */
/*************************************************************************
Validates the lock queue on a single record. */
ibool
lock_rec_queue_validate(
/*====================*/
/* out: TRUE if ok */
rec_t* rec, /* in: record to look at */
dict_index_t* index, /* in: index, or NULL if not known */
const ulint* offsets);/* in: rec_get_offsets(rec, index) */
/*************************************************************************
Prints info of a table lock. */
void
lock_table_print(
/*=============*/
FILE* file, /* in: file where to print */
lock_t* lock); /* in: table type lock */
/*************************************************************************
Prints info of a record lock. */
void
lock_rec_print(
/*===========*/
FILE* file, /* in: file where to print */
lock_t* lock); /* in: record type lock */
/*************************************************************************
Prints info of locks for all transactions. */
void
lock_print_info_summary(
/*====================*/
FILE* file); /* in: file where to print */
/*************************************************************************
Prints info of locks for each transaction. */
void
lock_print_info_all_transactions(
/*=============================*/
FILE* file); /* in: file where to print */
/*************************************************************************
Validates the lock queue on a table. */
ibool
lock_table_queue_validate(
/*======================*/
/* out: TRUE if ok */
dict_table_t* table); /* in: table */
/*************************************************************************
Validates the record lock queues on a page. */
ibool
lock_rec_validate_page(
/*===================*/
/* out: TRUE if ok */
ulint space, /* in: space id */
ulint page_no);/* in: page number */
/*************************************************************************
Validates the lock system. */
ibool
lock_validate(void);
/*===============*/
/* out: TRUE if ok */
/* The lock system */
extern lock_sys_t* lock_sys;
/* Lock modes and types */
/* Basic modes */
#define LOCK_NONE 0 /* this flag is used elsewhere to note
consistent read */
#define LOCK_IS 2 /* intention shared */
#define LOCK_IX 3 /* intention exclusive */
#define LOCK_S 4 /* shared */
#define LOCK_X 5 /* exclusive */
#define LOCK_AUTO_INC 6 /* locks the auto-inc counter of a table
in an exclusive mode */
#define LOCK_MODE_MASK 0xFUL /* mask used to extract mode from the
type_mode field in a lock */
/* Lock types */
#define LOCK_TABLE 16 /* these type values should be so high that */
#define LOCK_REC 32 /* they can be ORed to the lock mode */
#define LOCK_TYPE_MASK 0xF0UL /* mask used to extract lock type from the
type_mode field in a lock */
/* Waiting lock flag */
#define LOCK_WAIT 256 /* this wait bit should be so high that
it can be ORed to the lock mode and type;
when this bit is set, it means that the
lock has not yet been granted, it is just
waiting for its turn in the wait queue */
/* Precise modes */
#define LOCK_ORDINARY 0 /* this flag denotes an ordinary next-key lock
in contrast to LOCK_GAP or LOCK_REC_NOT_GAP */
#define LOCK_GAP 512 /* this gap bit should be so high that
it can be ORed to the other flags;
when this bit is set, it means that the
lock holds only on the gap before the record;
for instance, an x-lock on the gap does not
give permission to modify the record on which
the bit is set; locks of this type are created
when records are removed from the index chain
of records */
#define LOCK_REC_NOT_GAP 1024 /* this bit means that the lock is only on
the index record and does NOT block inserts
to the gap before the index record; this is
used in the case when we retrieve a record
with a unique key, and is also used in
locking plain SELECTs (not part of UPDATE
or DELETE) when the user has set the READ
COMMITTED isolation level */
#define LOCK_INSERT_INTENTION 2048 /* this bit is set when we place a waiting
gap type record lock request in order to let
an insert of an index record to wait until
there are no conflicting locks by other
transactions on the gap; note that this flag
remains set when the waiting lock is granted,
or if the lock is inherited to a neighboring
record */
/* When lock bits are reset, the following flags are available: */
#define LOCK_RELEASE_WAIT 1
#define LOCK_NOT_RELEASE_WAIT 2
/* Lock operation struct */
typedef struct lock_op_struct lock_op_t;
struct lock_op_struct{
dict_table_t* table; /* table to be locked */
ulint mode; /* lock mode */
};
#define LOCK_OP_START 1
#define LOCK_OP_COMPLETE 2
/* The lock system struct */
struct lock_sys_struct{
hash_table_t* rec_hash; /* hash table of the record locks */
};
/* The lock system */
extern lock_sys_t* lock_sys;
#ifndef UNIV_NONINL
#include "lock0lock.ic"
#endif
#endif

83
include/lock0lock.ic Normal file
View file

@ -0,0 +1,83 @@
/******************************************************
The transaction lock system
(c) 1996 Innobase Oy
Created 5/7/1996 Heikki Tuuri
*******************************************************/
#include "sync0sync.h"
#include "srv0srv.h"
#include "dict0dict.h"
#include "row0row.h"
#include "trx0sys.h"
#include "trx0trx.h"
#include "buf0buf.h"
#include "page0page.h"
#include "page0cur.h"
#include "row0vers.h"
#include "que0que.h"
#include "btr0cur.h"
#include "read0read.h"
#include "log0recv.h"
/*************************************************************************
Calculates the fold value of a page file address: used in inserting or
searching for a lock in the hash table. */
UNIV_INLINE
ulint
lock_rec_fold(
/*==========*/
/* out: folded value */
ulint space, /* in: space */
ulint page_no)/* in: page number */
{
return(ut_fold_ulint_pair(space, page_no));
}
/*************************************************************************
Calculates the hash value of a page file address: used in inserting or
searching for a lock in the hash table. */
UNIV_INLINE
ulint
lock_rec_hash(
/*==========*/
/* out: hashed value */
ulint space, /* in: space */
ulint page_no)/* in: page number */
{
return(hash_calc_hash(lock_rec_fold(space, page_no),
lock_sys->rec_hash));
}
/*************************************************************************
Checks if some transaction has an implicit x-lock on a record in a clustered
index. */
UNIV_INLINE
trx_t*
lock_clust_rec_some_has_impl(
/*=========================*/
/* out: transaction which has the x-lock, or
NULL */
rec_t* rec, /* in: user record */
dict_index_t* index, /* in: clustered index */
const ulint* offsets)/* in: rec_get_offsets(rec, index) */
{
dulint trx_id;
#ifdef UNIV_SYNC_DEBUG
ut_ad(mutex_own(&kernel_mutex));
#endif /* UNIV_SYNC_DEBUG */
ut_ad(index->type & DICT_CLUSTERED);
ut_ad(page_rec_is_user_rec(rec));
trx_id = row_get_rec_trx_id(rec, index, offsets);
if (trx_is_active(trx_id)) {
/* The modifying or inserting transaction is active */
return(trx_get_on_id(trx_id));
}
return(NULL);
}

Some files were not shown because too many files have changed in this diff Show more