Import 5.0 code.

2025-01-31 19:11:46 +01:00 · 2005-10-27 07:29:40 +00:00 · 2005-10-27 07:29:40 +00:00 · c307820962
commit c307820962
parent eae9b3ec18
310 changed files with 163246 additions and 0 deletions
--- a/Makefile.am
+++ b/Makefile.am
@ -0,0 +1,30 @@
+# Copyright (C) 2000 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+# & Innobase Oy
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+# Process this file with automake to create Makefile.in
+
+AUTOMAKE_OPTIONS =	foreign
+TAR =			gtar
+
+noinst_HEADERS = ib_config.h
+
+SUBDIRS =		os ut btr buf data dict dyn eval fil fsp fut \
+			ha ibuf include lock log mach mem mtr page \
+			pars que read rem row srv sync thr trx usr
+
+# Don't update the files from bitkeeper
+%::SCCS/s.%
--- a/btr/Makefile.am
+++ b/btr/Makefile.am
@ -0,0 +1,24 @@
+# Copyright (C) 2000 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+# & Innobase Oy
+# 
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+include ../include/Makefile.i
+
+noinst_LIBRARIES =	libbtr.a
+
+libbtr_a_SOURCES =	btr0btr.c btr0cur.c btr0pcur.c btr0sea.c
+
+EXTRA_PROGRAMS =	
--- a/btr/btr0btr.c
+++ b/btr/btr0btr.c
--- a/btr/btr0cur.c
+++ b/btr/btr0cur.c
--- a/btr/btr0pcur.c
+++ b/btr/btr0pcur.c
@ -0,0 +1,564 @@
+/******************************************************
+The index tree persistent cursor
+
+(c) 1996 Innobase Oy
+
+Created 2/23/1996 Heikki Tuuri
+*******************************************************/
+
+#include "btr0pcur.h"
+
+#ifdef UNIV_NONINL
+#include "btr0pcur.ic"
+#endif
+
+#include "ut0byte.h"
+#include "rem0cmp.h"
+#include "trx0trx.h"
+
+/******************************************************************
+Allocates memory for a persistent cursor object and initializes the cursor. */
+
+btr_pcur_t*
+btr_pcur_create_for_mysql(void)
+/*============================*/
+				/* out, own: persistent cursor */
+{
+	btr_pcur_t*	pcur;
+
+	pcur = mem_alloc(sizeof(btr_pcur_t));
+
+	pcur->btr_cur.index = NULL;
+	btr_pcur_init(pcur);
+	
+	return(pcur);
+}
+
+/******************************************************************
+Frees the memory for a persistent cursor object. */
+
+void
+btr_pcur_free_for_mysql(
+/*====================*/
+	btr_pcur_t*	cursor)	/* in, own: persistent cursor */
+{
+	if (cursor->old_rec_buf != NULL) {
+
+		mem_free(cursor->old_rec_buf);
+
+		cursor->old_rec_buf = NULL;
+	}
+
+	cursor->btr_cur.page_cur.rec = NULL;
+	cursor->old_rec = NULL;
+	cursor->old_n_fields = 0;
+	cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
+	
+	cursor->latch_mode = BTR_NO_LATCHES;
+	cursor->pos_state = BTR_PCUR_NOT_POSITIONED;
+
+	mem_free(cursor);
+}
+
+/******************************************************************
+The position of the cursor is stored by taking an initial segment of the
+record the cursor is positioned on, before, or after, and copying it to the
+cursor data structure, or just setting a flag if the cursor id before the
+first in an EMPTY tree, or after the last in an EMPTY tree. NOTE that the
+page where the cursor is positioned must not be empty if the index tree is
+not totally empty! */
+
+void
+btr_pcur_store_position(
+/*====================*/
+	btr_pcur_t*	cursor, /* in: persistent cursor */
+	mtr_t*		mtr)	/* in: mtr */
+{
+	page_cur_t*	page_cursor;
+	rec_t*		rec;
+	dict_tree_t*	tree;
+	page_t*		page;
+	ulint		offs;
+	
+	ut_a(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
+	ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
+
+	tree = btr_cur_get_tree(btr_pcur_get_btr_cur(cursor));
+
+	page_cursor = btr_pcur_get_page_cur(cursor);
+
+	rec = page_cur_get_rec(page_cursor);
+	page = ut_align_down(rec, UNIV_PAGE_SIZE);
+	offs = ut_align_offset(rec, UNIV_PAGE_SIZE);
+
+	ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
+							MTR_MEMO_PAGE_S_FIX)
+	      || mtr_memo_contains(mtr, buf_block_align(page),
+							MTR_MEMO_PAGE_X_FIX));
+	ut_a(cursor->latch_mode != BTR_NO_LATCHES);
+
+	if (UNIV_UNLIKELY(page_get_n_recs(page) == 0)) {
+		/* It must be an empty index tree; NOTE that in this case
+		we do not store the modify_clock, but always do a search
+		if we restore the cursor position */
+
+		ut_a(btr_page_get_next(page, mtr) == FIL_NULL);
+		ut_a(btr_page_get_prev(page, mtr) == FIL_NULL);
+
+		cursor->old_stored = BTR_PCUR_OLD_STORED;
+
+		if (page_rec_is_supremum_low(offs)) {
+
+			cursor->rel_pos = BTR_PCUR_AFTER_LAST_IN_TREE;
+		} else {
+			cursor->rel_pos = BTR_PCUR_BEFORE_FIRST_IN_TREE;
+		}
+
+		return;
+	} 
+
+	if (page_rec_is_supremum_low(offs)) {
+
+		rec = page_rec_get_prev(rec);
+
+		cursor->rel_pos = BTR_PCUR_AFTER;
+
+	} else if (page_rec_is_infimum_low(offs)) {
+
+		rec = page_rec_get_next(rec);
+
+		cursor->rel_pos = BTR_PCUR_BEFORE;
+	} else {
+		cursor->rel_pos = BTR_PCUR_ON;
+	}
+
+	cursor->old_stored = BTR_PCUR_OLD_STORED;
+	cursor->old_rec = dict_tree_copy_rec_order_prefix(tree, rec,
+						&cursor->old_n_fields,
+						&cursor->old_rec_buf,
+						&cursor->buf_size);
+
+	cursor->block_when_stored = buf_block_align(page);	
+	cursor->modify_clock = buf_block_get_modify_clock(
+				cursor->block_when_stored);
+}
+
+/******************************************************************
+Copies the stored position of a pcur to another pcur. */
+
+void
+btr_pcur_copy_stored_position(
+/*==========================*/
+	btr_pcur_t*	pcur_receive,	/* in: pcur which will receive the
+					position info */
+	btr_pcur_t*	pcur_donate)	/* in: pcur from which the info is
+					copied */
+{
+	if (pcur_receive->old_rec_buf) {
+		mem_free(pcur_receive->old_rec_buf);
+	}
+
+	ut_memcpy((byte*)pcur_receive, (byte*)pcur_donate, sizeof(btr_pcur_t));
+
+	if (pcur_donate->old_rec_buf) {
+
+		pcur_receive->old_rec_buf = mem_alloc(pcur_donate->buf_size);
+	
+		ut_memcpy(pcur_receive->old_rec_buf, pcur_donate->old_rec_buf,
+						pcur_donate->buf_size);
+		pcur_receive->old_rec = pcur_receive->old_rec_buf
+			+ (pcur_donate->old_rec - pcur_donate->old_rec_buf);
+	}	
+
+	pcur_receive->old_n_fields = pcur_donate->old_n_fields;
+}
+
+/******************************************************************
+Restores the stored position of a persistent cursor bufferfixing the page and
+obtaining the specified latches. If the cursor position was saved when the
+(1) cursor was positioned on a user record: this function restores the position
+to the last record LESS OR EQUAL to the stored record;
+(2) cursor was positioned on a page infimum record: restores the position to
+the last record LESS than the user record which was the successor of the page
+infimum;
+(3) cursor was positioned on the page supremum: restores to the first record
+GREATER than the user record which was the predecessor of the supremum.
+(4) cursor was positioned before the first or after the last in an empty tree:
+restores to before first or after the last in the tree. */
+
+ibool
+btr_pcur_restore_position(
+/*======================*/
+					/* out: TRUE if the cursor position
+					was stored when it was on a user record
+					and it can be restored on a user record
+					whose ordering fields are identical to
+					the ones of the original user record */
+	ulint		latch_mode,	/* in: BTR_SEARCH_LEAF, ... */
+	btr_pcur_t*	cursor, 	/* in: detached persistent cursor */
+	mtr_t*		mtr)		/* in: mtr */
+{
+	dict_tree_t*	tree;
+	page_t*		page;
+	dtuple_t*	tuple;
+	ulint		mode;
+	ulint		old_mode;
+	mem_heap_t*	heap;
+
+	if (UNIV_UNLIKELY(cursor->old_stored != BTR_PCUR_OLD_STORED)
+	    || UNIV_UNLIKELY(cursor->pos_state != BTR_PCUR_WAS_POSITIONED
+			     && cursor->pos_state != BTR_PCUR_IS_POSITIONED)) {
+		ut_print_buf(stderr, (const byte*)cursor, sizeof(btr_pcur_t));
+		if (cursor->trx_if_known) {
+			trx_print(stderr, cursor->trx_if_known, 0);
+		}
+		
+		ut_error;
+	}
+
+	if (UNIV_UNLIKELY(cursor->rel_pos == BTR_PCUR_AFTER_LAST_IN_TREE
+			|| cursor->rel_pos == BTR_PCUR_BEFORE_FIRST_IN_TREE)) {
+
+	    	/* In these cases we do not try an optimistic restoration,
+	    	but always do a search */
+
+		btr_cur_open_at_index_side(
+			cursor->rel_pos == BTR_PCUR_BEFORE_FIRST_IN_TREE,
+			btr_pcur_get_btr_cur(cursor)->index, latch_mode,
+					btr_pcur_get_btr_cur(cursor), mtr);
+
+		cursor->block_when_stored =
+				buf_block_align(btr_pcur_get_page(cursor));
+
+		return(FALSE);
+	}
+	
+	ut_a(cursor->old_rec);
+	ut_a(cursor->old_n_fields);
+
+	page = btr_cur_get_page(btr_pcur_get_btr_cur(cursor));
+
+	if (UNIV_LIKELY(latch_mode == BTR_SEARCH_LEAF)
+			|| UNIV_LIKELY(latch_mode == BTR_MODIFY_LEAF)) {
+		/* Try optimistic restoration */
+	    
+		if (UNIV_LIKELY(buf_page_optimistic_get(latch_mode,
+					    cursor->block_when_stored, page,
+					    cursor->modify_clock, mtr))) {
+			cursor->pos_state = BTR_PCUR_IS_POSITIONED;
+#ifdef UNIV_SYNC_DEBUG
+			buf_page_dbg_add_level(page, SYNC_TREE_NODE);
+#endif /* UNIV_SYNC_DEBUG */
+			if (cursor->rel_pos == BTR_PCUR_ON) {
+#ifdef UNIV_DEBUG
+				rec_t*		rec;
+				ulint*		offsets1;
+				ulint*		offsets2;
+				dict_index_t*	index;
+#endif /* UNIV_DEBUG */
+				cursor->latch_mode = latch_mode;
+#ifdef UNIV_DEBUG
+				rec = btr_pcur_get_rec(cursor);
+				index = dict_tree_find_index(
+					btr_cur_get_tree(
+						btr_pcur_get_btr_cur(cursor)),
+					rec);
+
+				heap = mem_heap_create(256);
+				offsets1 = rec_get_offsets(cursor->old_rec,
+						index, NULL,
+						cursor->old_n_fields, &heap);
+				offsets2 = rec_get_offsets(rec, index, NULL,
+						cursor->old_n_fields, &heap);
+
+				ut_ad(cmp_rec_rec(cursor->old_rec,
+					rec, offsets1, offsets2, index) == 0);
+				mem_heap_free(heap);
+#endif /* UNIV_DEBUG */
+				return(TRUE);
+			}
+
+			return(FALSE);
+		}
+	}
+
+	/* If optimistic restoration did not succeed, open the cursor anew */
+
+	heap = mem_heap_create(256);
+	
+	tree = btr_cur_get_tree(btr_pcur_get_btr_cur(cursor));
+	tuple = dict_tree_build_data_tuple(tree, cursor->old_rec,
+					cursor->old_n_fields, heap);
+
+	/* Save the old search mode of the cursor */
+	old_mode = cursor->search_mode;
+	
+	if (UNIV_LIKELY(cursor->rel_pos == BTR_PCUR_ON)) {
+		mode = PAGE_CUR_LE;
+	} else if (cursor->rel_pos == BTR_PCUR_AFTER) {
+		mode = PAGE_CUR_G;
+	} else {
+		ut_ad(cursor->rel_pos == BTR_PCUR_BEFORE);
+		mode = PAGE_CUR_L;
+	}
+
+	btr_pcur_open_with_no_init(btr_pcur_get_btr_cur(cursor)->index, tuple,
+					mode, latch_mode, cursor, 0, mtr);
+	
+	/* Restore the old search mode */
+	cursor->search_mode = old_mode;
+
+	if (cursor->rel_pos == BTR_PCUR_ON
+	    && btr_pcur_is_on_user_rec(cursor, mtr)
+	    && 0 == cmp_dtuple_rec(tuple, btr_pcur_get_rec(cursor),
+			rec_get_offsets(btr_pcur_get_rec(cursor),
+				btr_pcur_get_btr_cur(cursor)->index,
+				NULL, ULINT_UNDEFINED, &heap))) {
+
+		/* We have to store the NEW value for the modify clock, since
+		the cursor can now be on a different page! But we can retain
+		the value of old_rec */
+
+		cursor->block_when_stored =
+			buf_block_align(btr_pcur_get_page(cursor));
+		cursor->modify_clock =
+			buf_block_get_modify_clock(cursor->block_when_stored);
+		cursor->old_stored = BTR_PCUR_OLD_STORED;
+
+		mem_heap_free(heap);
+
+		return(TRUE);
+	}
+
+	mem_heap_free(heap);
+
+	/* We have to store new position information, modify_clock etc.,
+	to the cursor because it can now be on a different page, the record
+	under it may have been removed, etc. */
+	
+	btr_pcur_store_position(cursor, mtr);
+
+	return(FALSE);
+}
+
+/******************************************************************
+If the latch mode of the cursor is BTR_LEAF_SEARCH or BTR_LEAF_MODIFY,
+releases the page latch and bufferfix reserved by the cursor.
+NOTE! In the case of BTR_LEAF_MODIFY, there should not exist changes
+made by the current mini-transaction to the data protected by the
+cursor latch, as then the latch must not be released until mtr_commit. */
+
+void
+btr_pcur_release_leaf(
+/*==================*/
+	btr_pcur_t*	cursor, /* in: persistent cursor */
+	mtr_t*		mtr)	/* in: mtr */
+{
+	page_t*	page;
+
+	ut_a(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
+	ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
+	
+	page = btr_cur_get_page(btr_pcur_get_btr_cur(cursor));
+	
+	btr_leaf_page_release(page, cursor->latch_mode, mtr);
+	
+	cursor->latch_mode = BTR_NO_LATCHES;	
+
+	cursor->pos_state = BTR_PCUR_WAS_POSITIONED;
+}
+
+/*************************************************************
+Moves the persistent cursor to the first record on the next page. Releases the
+latch on the current page, and bufferunfixes it. Note that there must not be
+modifications on the current page, as then the x-latch can be released only in
+mtr_commit. */
+
+void
+btr_pcur_move_to_next_page(
+/*=======================*/
+	btr_pcur_t*	cursor,	/* in: persistent cursor; must be on the
+				last record of the current page */
+	mtr_t*		mtr)	/* in: mtr */
+{
+	ulint	next_page_no;
+	ulint	space;
+	page_t*	page;
+	page_t*	next_page;
+
+	ut_a(cursor->pos_state == BTR_PCUR_IS_POSITIONED);	
+	ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
+	ut_ad(btr_pcur_is_after_last_on_page(cursor, mtr));	
+
+	cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
+	
+	page = btr_pcur_get_page(cursor);
+
+	next_page_no = btr_page_get_next(page, mtr);
+	space = buf_frame_get_space_id(page);
+
+	ut_ad(next_page_no != FIL_NULL);	
+
+	next_page = btr_page_get(space, next_page_no, cursor->latch_mode, mtr);
+	ut_a(page_is_comp(next_page) == page_is_comp(page));
+	buf_block_align(next_page)->check_index_page_at_flush = TRUE;
+
+	btr_leaf_page_release(page, cursor->latch_mode, mtr);
+	
+	page_cur_set_before_first(next_page, btr_pcur_get_page_cur(cursor));
+
+	page_check_dir(next_page);
+}
+
+/*************************************************************
+Moves the persistent cursor backward if it is on the first record of the page.
+Commits mtr. Note that to prevent a possible deadlock, the operation
+first stores the position of the cursor, commits mtr, acquires the necessary
+latches and restores the cursor position again before returning. The
+alphabetical position of the cursor is guaranteed to be sensible on
+return, but it may happen that the cursor is not positioned on the last
+record of any page, because the structure of the tree may have changed
+during the time when the cursor had no latches. */
+
+void
+btr_pcur_move_backward_from_page(
+/*=============================*/
+	btr_pcur_t*	cursor,	/* in: persistent cursor, must be on the first
+				record of the current page */
+	mtr_t*		mtr)	/* in: mtr */
+{
+	ulint	prev_page_no;
+	ulint	space;
+	page_t*	page;
+	page_t*	prev_page;
+	ulint	latch_mode;
+	ulint	latch_mode2;
+
+	ut_a(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
+	ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
+	ut_ad(btr_pcur_is_before_first_on_page(cursor, mtr));	
+	ut_ad(!btr_pcur_is_before_first_in_tree(cursor, mtr));	
+	
+	latch_mode = cursor->latch_mode;
+	
+	if (latch_mode == BTR_SEARCH_LEAF) {
+
+		latch_mode2 = BTR_SEARCH_PREV;
+
+	} else if (latch_mode == BTR_MODIFY_LEAF) {
+
+		latch_mode2 = BTR_MODIFY_PREV;
+	} else {
+		latch_mode2 = 0; /* To eliminate compiler warning */
+		ut_error;
+	}
+
+	btr_pcur_store_position(cursor, mtr);
+
+	mtr_commit(mtr);
+
+	mtr_start(mtr);
+
+	btr_pcur_restore_position(latch_mode2, cursor, mtr);	
+
+	page = btr_pcur_get_page(cursor);
+
+	prev_page_no = btr_page_get_prev(page, mtr);
+	space = buf_frame_get_space_id(page);
+
+	if (btr_pcur_is_before_first_on_page(cursor, mtr)
+					&& (prev_page_no != FIL_NULL)) {	
+
+		prev_page = btr_pcur_get_btr_cur(cursor)->left_page;
+
+		btr_leaf_page_release(page, latch_mode, mtr);
+
+		page_cur_set_after_last(prev_page,
+						btr_pcur_get_page_cur(cursor));
+	} else if (prev_page_no != FIL_NULL) {
+		
+		/* The repositioned cursor did not end on an infimum record on
+		a page. Cursor repositioning acquired a latch also on the
+		previous page, but we do not need the latch: release it. */
+	
+		prev_page = btr_pcur_get_btr_cur(cursor)->left_page;
+
+		btr_leaf_page_release(prev_page, latch_mode, mtr);
+	}
+
+	cursor->latch_mode = latch_mode;
+
+	cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
+}
+
+/*************************************************************
+Moves the persistent cursor to the previous record in the tree. If no records
+are left, the cursor stays 'before first in tree'. */
+
+ibool
+btr_pcur_move_to_prev(
+/*==================*/
+				/* out: TRUE if the cursor was not before first
+				in tree */
+	btr_pcur_t*	cursor,	/* in: persistent cursor; NOTE that the
+				function may release the page latch */
+	mtr_t*		mtr)	/* in: mtr */
+{
+	ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
+	ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
+	
+	cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
+
+	if (btr_pcur_is_before_first_on_page(cursor, mtr)) {
+
+		if (btr_pcur_is_before_first_in_tree(cursor, mtr)) {
+
+			return(FALSE);
+		}
+
+		btr_pcur_move_backward_from_page(cursor, mtr);
+
+		return(TRUE);
+	}
+
+	btr_pcur_move_to_prev_on_page(cursor, mtr);
+
+	return(TRUE);
+}
+
+/******************************************************************
+If mode is PAGE_CUR_G or PAGE_CUR_GE, opens a persistent cursor on the first
+user record satisfying the search condition, in the case PAGE_CUR_L or
+PAGE_CUR_LE, on the last user record. If no such user record exists, then
+in the first case sets the cursor after last in tree, and in the latter case
+before first in tree. The latching mode must be BTR_SEARCH_LEAF or
+BTR_MODIFY_LEAF. */
+
+void
+btr_pcur_open_on_user_rec(
+/*======================*/
+	dict_index_t*	index,		/* in: index */
+	dtuple_t*	tuple,		/* in: tuple on which search done */
+	ulint		mode,		/* in: PAGE_CUR_L, ... */
+	ulint		latch_mode,	/* in: BTR_SEARCH_LEAF or
+					BTR_MODIFY_LEAF */
+	btr_pcur_t*	cursor, 	/* in: memory buffer for persistent
+					cursor */
+	mtr_t*		mtr)		/* in: mtr */
+{
+	btr_pcur_open(index, tuple, mode, latch_mode, cursor, mtr);
+
+	if ((mode == PAGE_CUR_GE) || (mode == PAGE_CUR_G)) {
+	
+		if (btr_pcur_is_after_last_on_page(cursor, mtr)) {
+
+			btr_pcur_move_to_next_user_rec(cursor, mtr);
+		}
+	} else {
+		ut_ad((mode == PAGE_CUR_LE) || (mode == PAGE_CUR_L));
+
+		/* Not implemented yet */
+
+		ut_error;
+	}
+}
--- a/btr/btr0sea.c
+++ b/btr/btr0sea.c
--- a/buf/Makefile.am
+++ b/buf/Makefile.am
@ -0,0 +1,24 @@
+# Copyright (C) 2000 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+# & Innobase Oy
+# 
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+include ../include/Makefile.i
+
+noinst_LIBRARIES =	libbuf.a
+
+libbuf_a_SOURCES =	buf0buf.c buf0flu.c buf0lru.c buf0rea.c
+
+EXTRA_PROGRAMS =	
--- a/buf/buf0buf.c
+++ b/buf/buf0buf.c
--- a/buf/buf0flu.c
+++ b/buf/buf0flu.c
--- a/buf/buf0lru.c
+++ b/buf/buf0lru.c
--- a/buf/buf0rea.c
+++ b/buf/buf0rea.c
@ -0,0 +1,726 @@
+/******************************************************
+The database buffer read
+
+(c) 1995 Innobase Oy
+
+Created 11/5/1995 Heikki Tuuri
+*******************************************************/
+
+#include "buf0rea.h"
+
+#include "fil0fil.h"
+#include "mtr0mtr.h"
+
+#include "buf0buf.h"
+#include "buf0flu.h"
+#include "buf0lru.h"
+#include "ibuf0ibuf.h"
+#include "log0recv.h"
+#include "trx0sys.h"
+#include "os0file.h"
+#include "srv0start.h"
+
+extern ulint srv_read_ahead_rnd;
+extern ulint srv_read_ahead_seq;
+extern ulint srv_buf_pool_reads;
+
+/* The size in blocks of the area where the random read-ahead algorithm counts
+the accessed pages when deciding whether to read-ahead */
+#define	BUF_READ_AHEAD_RANDOM_AREA	BUF_READ_AHEAD_AREA
+
+/* There must be at least this many pages in buf_pool in the area to start
+a random read-ahead */
+#define BUF_READ_AHEAD_RANDOM_THRESHOLD	(5 + BUF_READ_AHEAD_RANDOM_AREA / 8)
+
+/* The linear read-ahead area size */
+#define	BUF_READ_AHEAD_LINEAR_AREA	BUF_READ_AHEAD_AREA
+
+/* The linear read-ahead threshold */
+#define BUF_READ_AHEAD_LINEAR_THRESHOLD	(3 * BUF_READ_AHEAD_LINEAR_AREA / 8)
+
+/* If there are buf_pool->curr_size per the number below pending reads, then
+read-ahead is not done: this is to prevent flooding the buffer pool with
+i/o-fixed buffer blocks */
+#define BUF_READ_AHEAD_PEND_LIMIT	2
+
+/************************************************************************
+Low-level function which reads a page asynchronously from a file to the
+buffer buf_pool if it is not already there, in which case does nothing.
+Sets the io_fix flag and sets an exclusive lock on the buffer frame. The
+flag is cleared and the x-lock released by an i/o-handler thread. */
+static
+ulint
+buf_read_page_low(
+/*==============*/
+			/* out: 1 if a read request was queued, 0 if the page
+			already resided in buf_pool, or if the page is in
+			the doublewrite buffer blocks in which case it is never
+			read into the pool, or if the tablespace does not
+			exist or is being dropped */
+	ulint*	err,	/* out: DB_SUCCESS or DB_TABLESPACE_DELETED if we are
+			trying to read from a non-existent tablespace, or a
+			tablespace which is just now being dropped */
+	ibool	sync,	/* in: TRUE if synchronous aio is desired */
+	ulint	mode,	/* in: BUF_READ_IBUF_PAGES_ONLY, ...,
+			ORed to OS_AIO_SIMULATED_WAKE_LATER (see below
+			at read-ahead functions) */
+	ulint	space,	/* in: space id */
+	ib_longlong tablespace_version, /* in: if the space memory object has
+			this timestamp different from what we are giving here,
+			treat the tablespace as dropped; this is a timestamp we
+			use to stop dangling page reads from a tablespace
+			which we have DISCARDed + IMPORTed back */
+	ulint	offset)	/* in: page number */
+{
+	buf_block_t*	block;
+	ulint		wake_later;
+
+	*err = DB_SUCCESS;
+
+	wake_later = mode & OS_AIO_SIMULATED_WAKE_LATER;
+	mode = mode & ~OS_AIO_SIMULATED_WAKE_LATER;
+	
+	if (trx_doublewrite && space == TRX_SYS_SPACE
+		&& (   (offset >= trx_doublewrite->block1
+		        && offset < trx_doublewrite->block1
+		     		+ TRX_SYS_DOUBLEWRITE_BLOCK_SIZE)
+		    || (offset >= trx_doublewrite->block2
+		        && offset < trx_doublewrite->block2
+		     		+ TRX_SYS_DOUBLEWRITE_BLOCK_SIZE))) {
+		ut_print_timestamp(stderr);
+		fprintf(stderr,
+"  InnoDB: Warning: trying to read doublewrite buffer page %lu\n",
+			(ulong) offset);
+
+		return(0);
+	}
+
+#ifdef UNIV_LOG_DEBUG
+	if (space % 2 == 1) {
+		/* We are updating a replicate space while holding the
+		log mutex: the read must be handled before other reads
+		which might incur ibuf operations and thus write to the log */
+
+		fputs("Log debug: reading replicate page in sync mode\n",
+			stderr);
+
+		sync = TRUE;
+	}
+#endif
+	if (ibuf_bitmap_page(offset) || trx_sys_hdr_page(space, offset)) {
+
+		/* Trx sys header is so low in the latching order that we play
+		safe and do not leave the i/o-completion to an asynchronous
+		i/o-thread. Ibuf bitmap pages must always be read with
+                syncronous i/o, to make sure they do not get involved in
+                thread deadlocks. */
+		
+		sync = TRUE;
+	}
+
+	/* The following call will also check if the tablespace does not exist
+	or is being dropped; if we succeed in initing the page in the buffer
+	pool for read, then DISCARD cannot proceed until the read has
+	completed */
+	block = buf_page_init_for_read(err, mode, space, tablespace_version,
+								offset);
+	if (block == NULL) {
+		
+		return(0);
+	}
+
+#ifdef UNIV_DEBUG
+	if (buf_debug_prints) {
+		fprintf(stderr,
+                        "Posting read request for page %lu, sync %lu\n",
+							   (ulong) offset,
+		       					   (ulong) sync);
+	}
+#endif
+
+	ut_a(block->state == BUF_BLOCK_FILE_PAGE);
+
+	*err = fil_io(OS_FILE_READ | wake_later,
+			sync, space,
+			offset, 0, UNIV_PAGE_SIZE,
+			(void*)block->frame, (void*)block);
+	ut_a(*err == DB_SUCCESS);
+
+	if (sync) {
+		/* The i/o is already completed when we arrive from
+		fil_read */
+		buf_page_io_complete(block);
+	}
+		
+	return(1);
+}	
+
+/************************************************************************
+Applies a random read-ahead in buf_pool if there are at least a threshold
+value of accessed pages from the random read-ahead area. Does not read any
+page, not even the one at the position (space, offset), if the read-ahead
+mechanism is not activated. NOTE 1: the calling thread may own latches on
+pages: to avoid deadlocks this function must be written such that it cannot
+end up waiting for these latches! NOTE 2: the calling thread must want
+access to the page given: this rule is set to prevent unintended read-aheads
+performed by ibuf routines, a situation which could result in a deadlock if
+the OS does not support asynchronous i/o. */
+static
+ulint
+buf_read_ahead_random(
+/*==================*/
+			/* out: number of page read requests issued; NOTE
+			that if we read ibuf pages, it may happen that
+			the page at the given page number does not get
+			read even if we return a value > 0! */
+	ulint	space,	/* in: space id */
+	ulint	offset)	/* in: page number of a page which the current thread
+			wants to access */
+{
+	ib_longlong	tablespace_version;
+	buf_block_t*	block;
+	ulint		recent_blocks	= 0;
+	ulint		count;
+	ulint		LRU_recent_limit;
+	ulint		ibuf_mode;
+	ulint		low, high;
+	ulint		err;
+	ulint		i;
+
+	if (srv_startup_is_before_trx_rollback_phase) {
+	        /* No read-ahead to avoid thread deadlocks */
+	        return(0);
+	}
+
+	if (ibuf_bitmap_page(offset) || trx_sys_hdr_page(space, offset)) {
+
+		/* If it is an ibuf bitmap page or trx sys hdr, we do
+                no read-ahead, as that could break the ibuf page access
+                order */
+
+		return(0);
+	}
+
+	/* Remember the tablespace version before we ask te tablespace size
+	below: if DISCARD + IMPORT changes the actual .ibd file meanwhile, we
+	do not try to read outside the bounds of the tablespace! */
+
+	tablespace_version = fil_space_get_version(space);
+
+	low  = (offset / BUF_READ_AHEAD_RANDOM_AREA)
+					* BUF_READ_AHEAD_RANDOM_AREA;
+	high = (offset / BUF_READ_AHEAD_RANDOM_AREA + 1)
+					* BUF_READ_AHEAD_RANDOM_AREA;
+	if (high > fil_space_get_size(space)) {
+
+		high = fil_space_get_size(space);
+	}
+
+	/* Get the minimum LRU_position field value for an initial segment
+	of the LRU list, to determine which blocks have recently been added
+	to the start of the list. */
+	
+	LRU_recent_limit = buf_LRU_get_recent_limit();
+
+	mutex_enter(&(buf_pool->mutex));
+
+	if (buf_pool->n_pend_reads >
+			buf_pool->curr_size / BUF_READ_AHEAD_PEND_LIMIT) {
+		mutex_exit(&(buf_pool->mutex));
+
+		return(0);
+	}	
+
+	/* Count how many blocks in the area have been recently accessed,
+	that is, reside near the start of the LRU list. */
+
+	for (i = low; i < high; i++) {
+		block = buf_page_hash_get(space, i);
+
+		if ((block)
+		    && (block->LRU_position > LRU_recent_limit)
+		    && block->accessed) {
+
+			recent_blocks++;
+		}
+	}
+
+	mutex_exit(&(buf_pool->mutex));
+	
+	if (recent_blocks < BUF_READ_AHEAD_RANDOM_THRESHOLD) {
+		/* Do nothing */
+
+		return(0);
+	}
+
+	/* Read all the suitable blocks within the area */
+
+	if (ibuf_inside()) {
+		ibuf_mode = BUF_READ_IBUF_PAGES_ONLY;
+	} else {
+		ibuf_mode = BUF_READ_ANY_PAGE;
+	}
+
+	count = 0;
+
+	for (i = low; i < high; i++) {
+		/* It is only sensible to do read-ahead in the non-sync aio
+		mode: hence FALSE as the first parameter */
+
+		if (!ibuf_bitmap_page(i)) {
+			count += buf_read_page_low(&err, FALSE, ibuf_mode
+					| OS_AIO_SIMULATED_WAKE_LATER,
+				        space, tablespace_version, i);
+			if (err == DB_TABLESPACE_DELETED) {
+				ut_print_timestamp(stderr);
+				fprintf(stderr,
+"  InnoDB: Warning: in random readahead trying to access tablespace\n"
+"InnoDB: %lu page no. %lu,\n"
+"InnoDB: but the tablespace does not exist or is just being dropped.\n",
+					(ulong) space, (ulong) i);
+			}
+		}
+	}
+
+	/* In simulated aio we wake the aio handler threads only after
+	queuing all aio requests, in native aio the following call does
+	nothing: */
+	
+	os_aio_simulated_wake_handler_threads();
+
+#ifdef UNIV_DEBUG
+	if (buf_debug_prints && (count > 0)) {
+		fprintf(stderr,
+			"Random read-ahead space %lu offset %lu pages %lu\n",
+						(ulong) space, (ulong) offset,
+		       				(ulong) count);
+	}
+#endif /* UNIV_DEBUG */
+
+        ++srv_read_ahead_rnd;
+	return(count);
+}
+
+/************************************************************************
+High-level function which reads a page asynchronously from a file to the
+buffer buf_pool if it is not already there. Sets the io_fix flag and sets
+an exclusive lock on the buffer frame. The flag is cleared and the x-lock
+released by the i/o-handler thread. Does a random read-ahead if it seems
+sensible. */
+
+ulint
+buf_read_page(
+/*==========*/
+			/* out: number of page read requests issued: this can
+			be > 1 if read-ahead occurred */
+	ulint	space,	/* in: space id */
+	ulint	offset)	/* in: page number */
+{
+	ib_longlong	tablespace_version;
+	ulint		count;
+	ulint		count2;
+	ulint		err;
+
+	tablespace_version = fil_space_get_version(space);
+
+	count = buf_read_ahead_random(space, offset);
+
+	/* We do the i/o in the synchronous aio mode to save thread
+	switches: hence TRUE */
+
+	count2 = buf_read_page_low(&err, TRUE, BUF_READ_ANY_PAGE, space,
+					tablespace_version, offset);
+        srv_buf_pool_reads+= count2;
+	if (err == DB_TABLESPACE_DELETED) {
+	        ut_print_timestamp(stderr);
+		fprintf(stderr,
+"  InnoDB: Error: trying to access tablespace %lu page no. %lu,\n"
+"InnoDB: but the tablespace does not exist or is just being dropped.\n",
+				 (ulong) space, (ulong) offset);
+	}
+
+	/* Flush pages from the end of the LRU list if necessary */
+	buf_flush_free_margin();
+
+	return(count + count2);
+}
+
+/************************************************************************
+Applies linear read-ahead if in the buf_pool the page is a border page of
+a linear read-ahead area and all the pages in the area have been accessed.
+Does not read any page if the read-ahead mechanism is not activated. Note
+that the the algorithm looks at the 'natural' adjacent successor and
+predecessor of the page, which on the leaf level of a B-tree are the next
+and previous page in the chain of leaves. To know these, the page specified
+in (space, offset) must already be present in the buf_pool. Thus, the
+natural way to use this function is to call it when a page in the buf_pool
+is accessed the first time, calling this function just after it has been
+bufferfixed.
+NOTE 1: as this function looks at the natural predecessor and successor
+fields on the page, what happens, if these are not initialized to any
+sensible value? No problem, before applying read-ahead we check that the
+area to read is within the span of the space, if not, read-ahead is not
+applied. An uninitialized value may result in a useless read operation, but
+only very improbably.
+NOTE 2: the calling thread may own latches on pages: to avoid deadlocks this
+function must be written such that it cannot end up waiting for these
+latches!
+NOTE 3: the calling thread must want access to the page given: this rule is
+set to prevent unintended read-aheads performed by ibuf routines, a situation
+which could result in a deadlock if the OS does not support asynchronous io. */
+
+ulint
+buf_read_ahead_linear(
+/*==================*/
+			/* out: number of page read requests issued */
+	ulint	space,	/* in: space id */
+	ulint	offset)	/* in: page number of a page; NOTE: the current thread
+			must want access to this page (see NOTE 3 above) */
+{
+	ib_longlong	tablespace_version;
+	buf_block_t*	block;
+	buf_frame_t*	frame;
+	buf_block_t*	pred_block	= NULL;
+	ulint		pred_offset;
+	ulint		succ_offset;
+	ulint		count;
+	int		asc_or_desc;
+	ulint		new_offset;
+	ulint		fail_count;
+	ulint		ibuf_mode;
+	ulint		low, high;
+	ulint		err;
+	ulint		i;
+	
+	if (srv_startup_is_before_trx_rollback_phase) {
+	        /* No read-ahead to avoid thread deadlocks */
+	        return(0);
+	}
+
+	if (ibuf_bitmap_page(offset) || trx_sys_hdr_page(space, offset)) {
+
+		/* If it is an ibuf bitmap page or trx sys hdr, we do
+                no read-ahead, as that could break the ibuf page access
+                order */
+
+		return(0);
+	}
+
+	low  = (offset / BUF_READ_AHEAD_LINEAR_AREA)
+					* BUF_READ_AHEAD_LINEAR_AREA;
+	high = (offset / BUF_READ_AHEAD_LINEAR_AREA + 1)
+					* BUF_READ_AHEAD_LINEAR_AREA;
+
+	if ((offset != low) && (offset != high - 1)) {
+		/* This is not a border page of the area: return */
+
+		return(0);
+	}
+
+	/* Remember the tablespace version before we ask te tablespace size
+	below: if DISCARD + IMPORT changes the actual .ibd file meanwhile, we
+	do not try to read outside the bounds of the tablespace! */
+
+	tablespace_version = fil_space_get_version(space);
+
+	mutex_enter(&(buf_pool->mutex));
+
+	if (high > fil_space_get_size(space)) {
+		mutex_exit(&(buf_pool->mutex));
+		/* The area is not whole, return */
+
+		return(0);
+	}
+
+	if (buf_pool->n_pend_reads >
+			buf_pool->curr_size / BUF_READ_AHEAD_PEND_LIMIT) {
+		mutex_exit(&(buf_pool->mutex));
+
+		return(0);
+	}	
+
+	/* Check that almost all pages in the area have been accessed; if
+	offset == low, the accesses must be in a descending order, otherwise,
+	in an ascending order. */
+
+	asc_or_desc = 1;
+
+	if (offset == low) {
+		asc_or_desc = -1;
+	}
+
+	fail_count = 0;
+
+	for (i = low; i < high; i++) {
+		block = buf_page_hash_get(space, i);
+		
+		if ((block == NULL) || !block->accessed) {
+			/* Not accessed */
+			fail_count++;
+
+		} else if (pred_block && (ut_ulint_cmp(block->LRU_position,
+				      		    pred_block->LRU_position)
+			       		  != asc_or_desc)) {
+			/* Accesses not in the right order */
+
+			fail_count++;
+			pred_block = block;
+		}
+	}
+
+	if (fail_count > BUF_READ_AHEAD_LINEAR_AREA -
+			 BUF_READ_AHEAD_LINEAR_THRESHOLD) {
+		/* Too many failures: return */
+
+		mutex_exit(&(buf_pool->mutex));
+
+		return(0);
+	}
+
+	/* If we got this far, we know that enough pages in the area have
+	been accessed in the right order: linear read-ahead can be sensible */
+
+	block = buf_page_hash_get(space, offset);
+
+	if (block == NULL) {
+		mutex_exit(&(buf_pool->mutex));
+
+		return(0);
+	}
+
+	frame = block->frame;
+	
+	/* Read the natural predecessor and successor page addresses from
+	the page; NOTE that because the calling thread may have an x-latch
+	on the page, we do not acquire an s-latch on the page, this is to
+	prevent deadlocks. Even if we read values which are nonsense, the
+	algorithm will work. */ 
+
+	pred_offset = fil_page_get_prev(frame);
+	succ_offset = fil_page_get_next(frame);
+
+	mutex_exit(&(buf_pool->mutex));
+	
+	if ((offset == low) && (succ_offset == offset + 1)) {
+
+	    	/* This is ok, we can continue */
+	    	new_offset = pred_offset;
+
+	} else if ((offset == high - 1) && (pred_offset == offset - 1)) {
+
+	    	/* This is ok, we can continue */
+	    	new_offset = succ_offset;
+	} else {
+		/* Successor or predecessor not in the right order */
+
+		return(0);
+	}
+
+	low  = (new_offset / BUF_READ_AHEAD_LINEAR_AREA)
+					* BUF_READ_AHEAD_LINEAR_AREA;
+	high = (new_offset / BUF_READ_AHEAD_LINEAR_AREA + 1)
+					* BUF_READ_AHEAD_LINEAR_AREA;
+
+	if ((new_offset != low) && (new_offset != high - 1)) {
+		/* This is not a border page of the area: return */
+
+		return(0);
+	}
+
+	if (high > fil_space_get_size(space)) {
+		/* The area is not whole, return */
+
+		return(0);
+	}
+
+	/* If we got this far, read-ahead can be sensible: do it */
+
+	if (ibuf_inside()) {
+		ibuf_mode = BUF_READ_IBUF_PAGES_ONLY;
+	} else {
+		ibuf_mode = BUF_READ_ANY_PAGE;
+	}
+
+	count = 0;
+
+	/* Since Windows XP seems to schedule the i/o handler thread
+	very eagerly, and consequently it does not wait for the
+	full read batch to be posted, we use special heuristics here */
+
+	os_aio_simulated_put_read_threads_to_sleep();
+	
+	for (i = low; i < high; i++) {
+		/* It is only sensible to do read-ahead in the non-sync
+		aio mode: hence FALSE as the first parameter */
+
+		if (!ibuf_bitmap_page(i)) {
+			count += buf_read_page_low(&err, FALSE, ibuf_mode
+					| OS_AIO_SIMULATED_WAKE_LATER,
+					space, 	tablespace_version, i);
+			if (err == DB_TABLESPACE_DELETED) {
+				ut_print_timestamp(stderr);
+				fprintf(stderr,
+"  InnoDB: Warning: in linear readahead trying to access tablespace\n"
+"InnoDB: %lu page no. %lu,\n"
+"InnoDB: but the tablespace does not exist or is just being dropped.\n",
+				 (ulong) space, (ulong) i);
+			}
+		}
+	}
+
+	/* In simulated aio we wake the aio handler threads only after
+	queuing all aio requests, in native aio the following call does
+	nothing: */
+	
+	os_aio_simulated_wake_handler_threads();
+
+	/* Flush pages from the end of the LRU list if necessary */
+	buf_flush_free_margin();
+
+#ifdef UNIV_DEBUG
+	if (buf_debug_prints && (count > 0)) {
+		fprintf(stderr,
+		"LINEAR read-ahead space %lu offset %lu pages %lu\n",
+		(ulong) space, (ulong) offset, (ulong) count);
+	}
+#endif /* UNIV_DEBUG */
+
+        ++srv_read_ahead_seq;
+	return(count);
+}
+
+/************************************************************************
+Issues read requests for pages which the ibuf module wants to read in, in
+order to contract the insert buffer tree. Technically, this function is like
+a read-ahead function. */
+
+void
+buf_read_ibuf_merge_pages(
+/*======================*/
+	ibool	sync,		/* in: TRUE if the caller wants this function
+				to wait for the highest address page to get
+				read in, before this function returns */
+	ulint*	space_ids,	/* in: array of space ids */
+	ib_longlong* space_versions,/* in: the spaces must have this version
+				number (timestamp), otherwise we discard the
+				read; we use this to cancel reads if
+				DISCARD + IMPORT may have changed the
+				tablespace size */
+	ulint*	page_nos,	/* in: array of page numbers to read, with the
+				highest page number the last in the array */
+	ulint	n_stored)	/* in: number of page numbers in the array */
+{
+	ulint	err;
+	ulint	i;
+
+	ut_ad(!ibuf_inside());
+#ifdef UNIV_IBUF_DEBUG
+	ut_a(n_stored < UNIV_PAGE_SIZE);
+#endif	
+	while (buf_pool->n_pend_reads >
+			buf_pool->curr_size / BUF_READ_AHEAD_PEND_LIMIT) {
+		os_thread_sleep(500000);
+	}	
+
+	for (i = 0; i < n_stored; i++) {
+		if ((i + 1 == n_stored) && sync) {
+			buf_read_page_low(&err, TRUE, BUF_READ_ANY_PAGE,
+				space_ids[i], space_versions[i], page_nos[i]);
+		} else {
+			buf_read_page_low(&err, FALSE, BUF_READ_ANY_PAGE,
+				space_ids[i], space_versions[i], page_nos[i]);
+		}
+
+		if (err == DB_TABLESPACE_DELETED) {
+			/* We have deleted or are deleting the single-table
+			tablespace: remove the entries for that page */
+
+			ibuf_merge_or_delete_for_page(NULL, space_ids[i],
+							page_nos[i], FALSE);
+		}
+	}
+	
+	os_aio_simulated_wake_handler_threads();
+
+	/* Flush pages from the end of the LRU list if necessary */
+	buf_flush_free_margin();
+
+#ifdef UNIV_DEBUG
+	if (buf_debug_prints) {
+		fprintf(stderr,
+			"Ibuf merge read-ahead space %lu pages %lu\n",
+				(ulong) space_ids[0], (ulong) n_stored);
+	}
+#endif /* UNIV_DEBUG */
+}
+
+/************************************************************************
+Issues read requests for pages which recovery wants to read in. */
+
+void
+buf_read_recv_pages(
+/*================*/
+	ibool	sync,		/* in: TRUE if the caller wants this function
+				to wait for the highest address page to get
+				read in, before this function returns */
+	ulint	space,		/* in: space id */
+	ulint*	page_nos,	/* in: array of page numbers to read, with the
+				highest page number the last in the array */
+	ulint	n_stored)	/* in: number of page numbers in the array */
+{
+	ib_longlong	tablespace_version;
+	ulint		count;
+	ulint		err;
+	ulint		i;
+
+	tablespace_version = fil_space_get_version(space);
+
+	for (i = 0; i < n_stored; i++) {
+
+		count = 0;
+
+		os_aio_print_debug = FALSE;
+
+		while (buf_pool->n_pend_reads >= recv_n_pool_free_frames / 2) {
+
+			os_aio_simulated_wake_handler_threads();
+			os_thread_sleep(500000);
+
+			count++;
+
+			if (count > 100) {
+				fprintf(stderr,
+"InnoDB: Error: InnoDB has waited for 50 seconds for pending\n"
+"InnoDB: reads to the buffer pool to be finished.\n"
+"InnoDB: Number of pending reads %lu, pending pread calls %lu\n",
+				(ulong) buf_pool->n_pend_reads,
+				(ulong)os_file_n_pending_preads);
+
+				os_aio_print_debug = TRUE;
+			}
+		}
+
+		os_aio_print_debug = FALSE;
+
+		if ((i + 1 == n_stored) && sync) {
+			buf_read_page_low(&err, TRUE, BUF_READ_ANY_PAGE, space,
+					tablespace_version, page_nos[i]);
+		} else {
+			buf_read_page_low(&err, FALSE, BUF_READ_ANY_PAGE
+					| OS_AIO_SIMULATED_WAKE_LATER,
+				       space, tablespace_version, page_nos[i]);
+		}
+	}
+	
+	os_aio_simulated_wake_handler_threads();
+
+	/* Flush pages from the end of the LRU list if necessary */
+	buf_flush_free_margin();
+
+#ifdef UNIV_DEBUG
+	if (buf_debug_prints) {
+		fprintf(stderr,
+			"Recovery applies read-ahead pages %lu\n", (ulong) n_stored);
+	}
+#endif /* UNIV_DEBUG */
+}
--- a/configure.in
+++ b/configure.in
@ -0,0 +1,137 @@
+# Process this file with autoconf to produce a configure script
+AC_INIT
+AC_CANONICAL_SYSTEM
+AM_MAINTAINER_MODE
+AM_CONFIG_HEADER(ib_config.h)
+AM_INIT_AUTOMAKE(ib, 0.90)
+
+# This is need before AC_PROG_CC
+#
+
+if test "x${CFLAGS-}" = x ; then
+  cflags_is_set=no
+else
+  cflags_is_set=yes
+fi
+
+if test "x${CPPFLAGS-}" = x ; then
+  cppflags_is_set=no
+else
+  cppflags_is_set=yes
+fi
+
+if test "x${LDFLAGS-}" = x ; then
+  ldflags_is_set=no
+else
+  ldflags_is_set=yes
+fi
+
+# The following hack should ensure that configure doesn't add optimizing
+# or debugging flags to CFLAGS or CXXFLAGS
+CFLAGS="$CFLAGS "
+CXXFLAGS="$CXXFLAGS "
+
+AC_PROG_CC
+AC_PROG_RANLIB
+AC_PROG_INSTALL
+AC_PROG_LIBTOOL
+AC_CHECK_HEADERS(aio.h sched.h)
+AC_CHECK_SIZEOF(int, 4)
+AC_CHECK_SIZEOF(long, 4)
+AC_CHECK_SIZEOF(void*, 4)
+AC_CHECK_FUNCS(sched_yield)
+AC_CHECK_FUNCS(fdatasync)
+AC_CHECK_FUNCS(localtime_r)
+#AC_CHECK_FUNCS(readdir_r) MySQL checks that it has also the right args.
+# Some versions of Unix only take 2 arguments.
+#AC_C_INLINE  Already checked in MySQL
+AC_C_BIGENDIAN
+
+# Build optimized or debug version ?
+# First check for gcc and g++
+if test "$ac_cv_prog_gcc" = "yes"
+then
+  DEBUG_CFLAGS="-g"
+  DEBUG_OPTIMIZE_CC="-O"
+  OPTIMIZE_CFLAGS="$MAX_C_OPTIMIZE"
+else
+  DEBUG_CFLAGS="-g"
+  DEBUG_OPTIMIZE_CC=""
+  OPTIMIZE_CFLAGS="-O"
+fi
+if test "$ac_cv_prog_cxx_g" = "yes"
+then
+  DEBUG_CXXFLAGS="-g"
+  DEBUG_OPTIMIZE_CXX="-O"
+  OPTIMIZE_CXXFLAGS="-O3"
+else
+  DEBUG_CXXFLAGS="-g"
+  DEBUG_OPTIMIZE_CXX=""
+  OPTIMIZE_CXXFLAGS="-O"
+fi
+AC_ARG_WITH(debug,
+    [  --without-debug         Build a production version without debugging code],
+    [with_debug=$withval],
+    [with_debug=no])
+if test "$with_debug" = "yes"
+then
+  # Medium debug.
+  CFLAGS="$DEBUG_CFLAGS $DEBUG_OPTIMIZE_CC -DDBUG_ON -DSAFE_MUTEX $CFLAGS"
+  CXXFLAGS="$DEBUG_CXXFLAGS $DEBUG_OPTIMIZE_CXX -DSAFE_MUTEX $CXXFLAGS"
+elif test "$with_debug" = "full"
+then
+  # Full debug. Very slow in some cases
+  CFLAGS="$DEBUG_CFLAGS -DDBUG_ON -DSAFE_MUTEX -DSAFEMALLOC $CFLAGS"
+  CXXFLAGS="$DEBUG_CXXFLAGS -DSAFE_MUTEX -DSAFEMALLOC $CXXFLAGS"
+else
+  # Optimized version. No debug
+  CFLAGS="$OPTIMIZE_CFLAGS -DDBUG_OFF $CFLAGS -DDEBUG_OFF"
+  CXXFLAGS="$OPTIMIZE_CXXFLAGS -DDBUG_OFF $CXXFLAGS -DDEBUG_OFF"
+fi
+
+case "$target_os" in
+       lin*)
+	 CFLAGS="$CFLAGS -DUNIV_LINUX";;
+       hpux10*)
+	 CFLAGS="$CFLAGS -DUNIV_MUST_NOT_INLINE -DUNIV_HPUX -DUNIV_HPUX10";;
+       hp*)
+	 CFLAGS="$CFLAGS -DUNIV_MUST_NOT_INLINE -DUNIV_HPUX";;
+       aix*)
+         CFLAGS="$CFLAGS -DUNIV_AIX";;
+       irix*)
+	 CFLAGS="$CFLAGS -DUNIV_MUST_NOT_INLINE";;
+       osf*)
+	 CFLAGS="$CFLAGS -DUNIV_MUST_NOT_INLINE";;
+       sysv5uw7*)
+	 # Problem when linking on SCO
+	 CFLAGS="$CFLAGS -DUNIV_MUST_NOT_INLINE";;
+       openbsd*)
+         CFLAGS="$CFLAGS -DUNIV_MUST_NOT_INLINE";;
+esac
+
+case "$target" in
+       i[[4567]]86-*-*)
+	 CFLAGS="$CFLAGS -DUNIV_INTEL_X86";;
+	 # The compiler on Linux/S390 does not seem to have inlining
+       s390-*-*)
+	 CFLAGS="$CFLAGS -DUNIV_MUST_NOT_INLINE";;
+esac
+
+# must go in pair with AR as set by MYSQL_CHECK_AR
+if test -z "$ARFLAGS"
+then
+  ARFLAGS="cru"
+fi
+AC_SUBST(ARFLAGS)
+
+AC_OUTPUT(Makefile os/Makefile ut/Makefile btr/Makefile dnl
+		buf/Makefile data/Makefile dnl
+		dict/Makefile dyn/Makefile dnl
+		eval/Makefile fil/Makefile fsp/Makefile fut/Makefile dnl
+		ha/Makefile ibuf/Makefile include/Makefile dnl
+		lock/Makefile log/Makefile dnl
+		mach/Makefile mem/Makefile mtr/Makefile dnl
+		page/Makefile pars/Makefile que/Makefile dnl
+		read/Makefile rem/Makefile row/Makefile dnl
+		srv/Makefile sync/Makefile thr/Makefile trx/Makefile dnl
+		usr/Makefile)
--- a/data/Makefile.am
+++ b/data/Makefile.am
@ -0,0 +1,24 @@
+# Copyright (C) 2000 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+# & Innobase Oy
+# 
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+include ../include/Makefile.i
+
+noinst_LIBRARIES =	libdata.a
+
+libdata_a_SOURCES =	data0data.c data0type.c
+
+EXTRA_PROGRAMS =	
--- a/data/data0data.c
+++ b/data/data0data.c
@ -0,0 +1,662 @@
+/************************************************************************
+SQL data field and tuple
+
+(c) 1994-1996 Innobase Oy
+
+Created 5/30/1994 Heikki Tuuri
+*************************************************************************/
+
+#include "data0data.h"
+
+#ifdef UNIV_NONINL
+#include "data0data.ic"
+#endif
+
+#include "rem0rec.h"
+#include "rem0cmp.h"
+#include "page0page.h"
+#include "dict0dict.h"
+#include "btr0cur.h"
+
+byte	data_error;	/* data pointers of tuple fields are initialized
+			to point here for error checking */
+
+#ifdef UNIV_DEBUG
+ulint	data_dummy;	/* this is used to fool the compiler in
+			dtuple_validate */
+#endif /* UNIV_DEBUG */
+
+/* Some non-inlined functions used in the MySQL interface: */
+void 
+dfield_set_data_noninline(
+	dfield_t* 	field,	/* in: field */
+	void*		data,	/* in: data */
+	ulint		len)	/* in: length or UNIV_SQL_NULL */
+{
+	dfield_set_data(field, data, len);
+}
+void* 
+dfield_get_data_noninline(
+	dfield_t* field)	/* in: field */
+{
+	return(dfield_get_data(field));
+}
+ulint
+dfield_get_len_noninline(
+	dfield_t* field)	/* in: field */
+{
+	return(dfield_get_len(field));
+}
+ulint 
+dtuple_get_n_fields_noninline(
+	dtuple_t* 	tuple)	/* in: tuple */
+{
+	return(dtuple_get_n_fields(tuple));
+}
+dfield_t* 
+dtuple_get_nth_field_noninline(
+	dtuple_t* 	tuple,	/* in: tuple */
+	ulint		n)	/* in: index of field */
+{
+	return(dtuple_get_nth_field(tuple, n));
+}
+
+/*************************************************************************
+Tests if dfield data length and content is equal to the given. */
+
+ibool
+dfield_data_is_binary_equal(
+/*========================*/
+				/* out: TRUE if equal */
+	dfield_t*	field,	/* in: field */
+	ulint		len,	/* in: data length or UNIV_SQL_NULL */
+	byte*		data)	/* in: data */
+{
+	if (len != field->len) {
+
+		return(FALSE);
+	}
+
+	if (len == UNIV_SQL_NULL) {
+
+		return(TRUE);
+	}
+
+	if (0 != ut_memcmp(field->data, data, len)) {
+	    	
+		return(FALSE);
+	}
+
+	return(TRUE);
+}
+
+/****************************************************************
+Returns TRUE if lengths of two dtuples are equal and respective data fields
+in them are equal when compared with collation in char fields (not as binary
+strings). */
+
+ibool
+dtuple_datas_are_ordering_equal(
+/*============================*/
+				/* out: TRUE if length and fieds are equal
+				when compared with cmp_data_data:
+				NOTE: in character type fields some letters
+				are identified with others! (collation) */
+	dtuple_t*	tuple1,	/* in: tuple 1 */
+	dtuple_t*	tuple2)	/* in: tuple 2 */
+{
+	dfield_t*	field1;
+	dfield_t*	field2;
+	ulint		n_fields;
+	ulint		i;
+
+	ut_ad(tuple1 && tuple2);
+	ut_ad(tuple1->magic_n == DATA_TUPLE_MAGIC_N);
+	ut_ad(tuple2->magic_n == DATA_TUPLE_MAGIC_N);
+	ut_ad(dtuple_check_typed(tuple1));
+	ut_ad(dtuple_check_typed(tuple2));
+
+	n_fields = dtuple_get_n_fields(tuple1);
+
+	if (n_fields != dtuple_get_n_fields(tuple2)) {
+
+		return(FALSE);
+	}
+	
+	for (i = 0; i < n_fields; i++) {
+
+		field1 = dtuple_get_nth_field(tuple1, i);
+		field2 = dtuple_get_nth_field(tuple2, i);
+
+		if (0 != cmp_dfield_dfield(field1, field2)) {
+		
+			return(FALSE);
+		}			
+	}
+	
+	return(TRUE);
+}
+
+/*************************************************************************
+Creates a dtuple for use in MySQL. */
+
+dtuple_t*
+dtuple_create_for_mysql(
+/*====================*/
+				/* out, own created dtuple */
+	void** 	heap,    	/* out: created memory heap */
+	ulint 	n_fields) 	/* in: number of fields */
+{
+  	*heap = (void*)mem_heap_create(500);
+ 
+  	return(dtuple_create(*((mem_heap_t**)heap), n_fields));  
+}
+
+/*************************************************************************
+Frees a dtuple used in MySQL. */
+
+void
+dtuple_free_for_mysql(
+/*==================*/
+	void*	heap) /* in: memory heap where tuple was created */
+{
+  	mem_heap_free((mem_heap_t*)heap);
+}
+
+/*************************************************************************
+Sets number of fields used in a tuple. Normally this is set in
+dtuple_create, but if you want later to set it smaller, you can use this. */ 
+
+void
+dtuple_set_n_fields(
+/*================*/
+	dtuple_t*	tuple,		/* in: tuple */
+	ulint		n_fields)	/* in: number of fields */
+{
+	ut_ad(tuple);
+
+	tuple->n_fields = n_fields;
+	tuple->n_fields_cmp = n_fields;
+}
+
+/**************************************************************
+Checks that a data field is typed. */
+static
+ibool
+dfield_check_typed_no_assert(
+/*=========================*/
+				/* out: TRUE if ok */
+	dfield_t*	field)	/* in: data field */
+{
+	if (dfield_get_type(field)->mtype > DATA_MYSQL
+	    || dfield_get_type(field)->mtype < DATA_VARCHAR) {
+
+		fprintf(stderr,
+"InnoDB: Error: data field type %lu, len %lu\n",
+			(ulong) dfield_get_type(field)->mtype,
+			(ulong) dfield_get_len(field));
+		return(FALSE);
+	}
+
+	return(TRUE);
+}
+
+/**************************************************************
+Checks that a data tuple is typed. */
+
+ibool
+dtuple_check_typed_no_assert(
+/*=========================*/
+				/* out: TRUE if ok */
+	dtuple_t*	tuple)	/* in: tuple */
+{
+	dfield_t*	field;
+	ulint	 	i;
+	
+	if (dtuple_get_n_fields(tuple) > REC_MAX_N_FIELDS) {
+		fprintf(stderr,
+"InnoDB: Error: index entry has %lu fields\n",
+			(ulong) dtuple_get_n_fields(tuple));
+	dump:
+		fputs("InnoDB: Tuple contents: ", stderr);
+		dtuple_print(stderr, tuple);
+		putc('\n', stderr);
+
+		return(FALSE);
+	}
+
+	for (i = 0; i < dtuple_get_n_fields(tuple); i++) {
+
+		field = dtuple_get_nth_field(tuple, i);
+
+		if (!dfield_check_typed_no_assert(field)) {
+			goto dump;
+		}
+	}
+
+	return(TRUE);
+}
+
+/**************************************************************
+Checks that a data field is typed. Asserts an error if not. */
+
+ibool
+dfield_check_typed(
+/*===============*/
+				/* out: TRUE if ok */
+	dfield_t*	field)	/* in: data field */
+{
+	if (dfield_get_type(field)->mtype > DATA_MYSQL
+	    || dfield_get_type(field)->mtype < DATA_VARCHAR) {
+
+		fprintf(stderr,
+"InnoDB: Error: data field type %lu, len %lu\n",
+			(ulong) dfield_get_type(field)->mtype,
+			(ulong) dfield_get_len(field));
+
+		ut_error;
+	}
+
+	return(TRUE);
+}
+
+/**************************************************************
+Checks that a data tuple is typed. Asserts an error if not. */
+
+ibool
+dtuple_check_typed(
+/*===============*/
+				/* out: TRUE if ok */
+	dtuple_t*	tuple)	/* in: tuple */
+{
+	dfield_t*	field;
+	ulint	 	i;
+
+	for (i = 0; i < dtuple_get_n_fields(tuple); i++) {
+
+		field = dtuple_get_nth_field(tuple, i);
+
+		ut_a(dfield_check_typed(field));
+	}
+
+	return(TRUE);
+}
+
+#ifdef UNIV_DEBUG
+/**************************************************************
+Validates the consistency of a tuple which must be complete, i.e,
+all fields must have been set. */
+
+ibool
+dtuple_validate(
+/*============*/
+				/* out: TRUE if ok */
+	dtuple_t*	tuple)	/* in: tuple */
+{
+	dfield_t*	field;
+	byte*	 	data;
+	ulint	 	n_fields;
+	ulint	 	len;
+	ulint	 	i;
+	ulint	 	j;
+
+	ut_ad(tuple->magic_n == DATA_TUPLE_MAGIC_N);
+
+	n_fields = dtuple_get_n_fields(tuple);
+
+	/* We dereference all the data of each field to test
+	for memory traps */
+
+	for (i = 0; i < n_fields; i++) {
+
+		field = dtuple_get_nth_field(tuple, i);
+		len = dfield_get_len(field);
+	
+		if (len != UNIV_SQL_NULL) {
+
+			data = field->data;
+
+			for (j = 0; j < len; j++) {
+
+				data_dummy  += *data; /* fool the compiler not
+							to optimize out this
+							code */
+				data++;
+			}
+		}
+	}
+
+	ut_a(dtuple_check_typed(tuple));
+
+	return(TRUE);
+}
+#endif /* UNIV_DEBUG */
+
+/*****************************************************************
+Pretty prints a dfield value according to its data type. */
+
+void
+dfield_print(
+/*=========*/
+	dfield_t*	dfield)	 /* in: dfield */
+{
+	byte*	data;
+	ulint	len;
+	ulint	mtype;
+	ulint	i;
+
+	len = dfield_get_len(dfield);
+	data = dfield_get_data(dfield);
+
+	if (len == UNIV_SQL_NULL) {
+		fputs("NULL", stderr);
+
+		return;
+	}
+
+	mtype = dtype_get_mtype(dfield_get_type(dfield));
+
+	if ((mtype == DATA_CHAR) || (mtype == DATA_VARCHAR)) {
+	
+		for (i = 0; i < len; i++) {
+			int	c = *data++;
+			putc(isprint(c) ? c : ' ', stderr);
+		}
+	} else if (mtype == DATA_INT) {
+		ut_a(len == 4); /* only works for 32-bit integers */
+		fprintf(stderr, "%d", (int)mach_read_from_4(data));
+	} else {
+		ut_error;
+	}
+}
+
+/*****************************************************************
+Pretty prints a dfield value according to its data type. Also the hex string
+is printed if a string contains non-printable characters. */ 
+
+void
+dfield_print_also_hex(
+/*==================*/
+	dfield_t*	dfield)	 /* in: dfield */
+{
+	byte*	data;
+	ulint	len;
+	ulint	mtype;
+	ulint	i;
+	ibool	print_also_hex;
+
+	len = dfield_get_len(dfield);
+	data = dfield_get_data(dfield);
+
+	if (len == UNIV_SQL_NULL) {
+		fputs("NULL", stderr);
+
+		return;
+	}
+
+	mtype = dtype_get_mtype(dfield_get_type(dfield));
+
+	if ((mtype == DATA_CHAR) || (mtype == DATA_VARCHAR)) {
+
+		print_also_hex = FALSE;
+	
+		for (i = 0; i < len; i++) {
+			int c = *data++;
+			if (!isprint(c)) {
+				print_also_hex = TRUE;
+				c = ' ';
+			}
+			putc(c, stderr);
+		}
+
+		if (!print_also_hex) {
+
+			return;
+		}
+
+		fputs(" Hex: ", stderr);
+		
+		data = dfield_get_data(dfield);
+		
+		for (i = 0; i < len; i++) {
+			fprintf(stderr, "%02lx", (ulint)*data);
+
+			data++;
+		}
+	} else if (mtype == DATA_INT) {
+		ut_a(len == 4); /* only works for 32-bit integers */
+		fprintf(stderr, "%d", (int)mach_read_from_4(data));
+	} else {
+		ut_error;
+	}
+}
+
+/**************************************************************
+The following function prints the contents of a tuple. */
+
+void
+dtuple_print(
+/*=========*/
+	FILE*		f,	/* in: output stream */
+	dtuple_t*	tuple)	/* in: tuple */
+{
+	dfield_t*	field;
+	ulint		n_fields;
+	ulint		i;
+
+	n_fields = dtuple_get_n_fields(tuple);
+
+	fprintf(f, "DATA TUPLE: %lu fields;\n", (ulong) n_fields);
+
+	for (i = 0; i < n_fields; i++) {
+		fprintf(f, " %lu:", (ulong) i);
+
+		field = dtuple_get_nth_field(tuple, i);
+		
+		if (field->len != UNIV_SQL_NULL) {
+			ut_print_buf(f, field->data, field->len);
+		} else {
+			fputs(" SQL NULL", f);
+		}
+
+		putc(';', f);
+	}
+
+	putc('\n', f);
+	ut_ad(dtuple_validate(tuple));
+}
+
+/******************************************************************
+Moves parts of long fields in entry to the big record vector so that
+the size of tuple drops below the maximum record size allowed in the
+database. Moves data only from those fields which are not necessary
+to determine uniquely the insertion place of the tuple in the index. */
+
+big_rec_t*
+dtuple_convert_big_rec(
+/*===================*/
+				/* out, own: created big record vector,
+				NULL if we are not able to shorten
+				the entry enough, i.e., if there are
+				too many short fields in entry */
+	dict_index_t*	index,	/* in: index */
+	dtuple_t*	entry,	/* in: index entry */
+	ulint*		ext_vec,/* in: array of externally stored fields,
+				or NULL: if a field already is externally
+				stored, then we cannot move it to the vector
+				this function returns */
+	ulint		n_ext_vec)/* in: number of elements is ext_vec */
+{
+	mem_heap_t*	heap;
+	big_rec_t*	vector;
+	dfield_t*	dfield;
+	ulint		size;
+	ulint		n_fields;
+	ulint		longest;
+	ulint		longest_i		= ULINT_MAX;
+	ibool		is_externally_stored;
+	ulint		i;
+	ulint		j;
+	
+	ut_a(dtuple_check_typed_no_assert(entry));
+
+	size = rec_get_converted_size(index, entry);
+
+	if (UNIV_UNLIKELY(size > 1000000000)) {
+		fprintf(stderr,
+"InnoDB: Warning: tuple size very big: %lu\n", (ulong) size);
+		fputs("InnoDB: Tuple contents: ", stderr);
+		dtuple_print(stderr, entry);
+		putc('\n', stderr);
+	}
+
+	heap = mem_heap_create(size + dtuple_get_n_fields(entry)
+					* sizeof(big_rec_field_t) + 1000);
+
+	vector = mem_heap_alloc(heap, sizeof(big_rec_t));
+
+	vector->heap = heap;
+	vector->fields = mem_heap_alloc(heap, dtuple_get_n_fields(entry)
+					* sizeof(big_rec_field_t));
+
+	/* Decide which fields to shorten: the algorithm is to look for
+	the longest field whose type is DATA_BLOB */
+
+	n_fields = 0;
+
+	while (rec_get_converted_size(index, entry)
+			>= ut_min(page_get_free_space_of_empty(
+					index->table->comp) / 2,
+					REC_MAX_DATA_SIZE)) {
+
+		longest = 0;
+		for (i = dict_index_get_n_unique_in_tree(index);
+				i < dtuple_get_n_fields(entry); i++) {
+
+			/* Skip over fields which already are externally
+			stored */
+
+			is_externally_stored = FALSE;
+
+			if (ext_vec) {
+				for (j = 0; j < n_ext_vec; j++) {
+					if (ext_vec[j] == i) {
+						is_externally_stored = TRUE;
+					}
+				}
+			}
+				
+			if (!is_externally_stored) {
+
+				dfield = dtuple_get_nth_field(entry, i);
+
+				if (dfield->len != UNIV_SQL_NULL &&
+			        		dfield->len > longest) {
+
+			        	longest = dfield->len;
+
+			        	longest_i = i;
+				}
+			}
+		}
+	
+		/* We do not store externally fields which are smaller than
+		DICT_MAX_INDEX_COL_LEN */
+
+		ut_a(DICT_MAX_INDEX_COL_LEN > REC_1BYTE_OFFS_LIMIT);
+
+		if (longest < BTR_EXTERN_FIELD_REF_SIZE + 10
+						+ DICT_MAX_INDEX_COL_LEN) {
+			/* Cannot shorten more */
+
+			mem_heap_free(heap);
+
+			return(NULL);
+		}
+
+		/* Move data from field longest_i to big rec vector;
+		we do not let data size of the remaining entry
+		drop below 128 which is the limit for the 2-byte
+		offset storage format in a physical record. This
+		we accomplish by storing 128 bytes of data in entry
+		itself, and only the remaining part to big rec vec.
+
+		We store the first bytes locally to the record. Then
+		we can calculate all ordering fields in all indexes
+		from locally stored data. */
+
+		dfield = dtuple_get_nth_field(entry, longest_i);
+		vector->fields[n_fields].field_no = longest_i;
+
+		ut_a(dfield->len > DICT_MAX_INDEX_COL_LEN);
+		
+		vector->fields[n_fields].len = dfield->len
+						- DICT_MAX_INDEX_COL_LEN;
+
+		vector->fields[n_fields].data = mem_heap_alloc(heap,
+						vector->fields[n_fields].len);
+
+		/* Copy data (from the end of field) to big rec vector */
+
+		ut_memcpy(vector->fields[n_fields].data,
+				((byte*)dfield->data) + dfield->len
+						- vector->fields[n_fields].len,
+				vector->fields[n_fields].len);
+		dfield->len = dfield->len - vector->fields[n_fields].len
+						+ BTR_EXTERN_FIELD_REF_SIZE;
+
+		/* Set the extern field reference in dfield to zero */
+		memset(((byte*)dfield->data)
+			+ dfield->len - BTR_EXTERN_FIELD_REF_SIZE,
+					0, BTR_EXTERN_FIELD_REF_SIZE);
+		n_fields++;
+	}	
+
+	vector->n_fields = n_fields;
+	return(vector);
+}
+
+/******************************************************************
+Puts back to entry the data stored in vector. Note that to ensure the
+fields in entry can accommodate the data, vector must have been created
+from entry with dtuple_convert_big_rec. */
+
+void
+dtuple_convert_back_big_rec(
+/*========================*/
+	dict_index_t*	index __attribute__((unused)),	/* in: index */
+	dtuple_t*	entry,	/* in: entry whose data was put to vector */
+	big_rec_t*	vector)	/* in, own: big rec vector; it is
+				freed in this function */
+{
+	dfield_t*	dfield;
+	ulint		i;	
+
+	for (i = 0; i < vector->n_fields; i++) {
+	
+		dfield = dtuple_get_nth_field(entry,
+						vector->fields[i].field_no);
+		/* Copy data from big rec vector */
+
+		ut_memcpy(((byte*)dfield->data)
+				+ dfield->len - BTR_EXTERN_FIELD_REF_SIZE,
+			  vector->fields[i].data,
+		          vector->fields[i].len);
+		dfield->len = dfield->len + vector->fields[i].len
+						- BTR_EXTERN_FIELD_REF_SIZE;
+	}	
+
+	mem_heap_free(vector->heap);
+}
+
+/******************************************************************
+Frees the memory in a big rec vector. */
+
+void
+dtuple_big_rec_free(
+/*================*/
+	big_rec_t*	vector)	/* in, own: big rec vector; it is
+				freed in this function */
+{
+	mem_heap_free(vector->heap);
+}
--- a/data/data0type.c
+++ b/data/data0type.c
@ -0,0 +1,260 @@
+/******************************************************
+Data types
+
+(c) 1996 Innobase Oy
+
+Created 1/16/1996 Heikki Tuuri
+*******************************************************/
+
+#include "data0type.h"
+
+#ifdef UNIV_NONINL
+#include "data0type.ic"
+#endif
+
+/**********************************************************************
+This function is used to find the storage length in bytes of the first n
+characters for prefix indexes using a multibyte character set. The function
+finds charset information and returns length of prefix_len characters in the
+index field in bytes.
+
+NOTE: the prototype of this function is copied from ha_innodb.cc! If you change
+this function, you MUST change also the prototype here! */
+
+ulint
+innobase_get_at_most_n_mbchars(
+/*===========================*/
+				/* out: number of bytes occupied by the first
+				n characters */
+	ulint charset_id,	/* in: character set id */
+	ulint prefix_len,	/* in: prefix length in bytes of the index
+				(this has to be divided by mbmaxlen to get the
+				number of CHARACTERS n in the prefix) */
+	ulint data_len,         /* in: length of the string in bytes */
+	const char* str);	/* in: character string */
+
+/* At the database startup we store the default-charset collation number of
+this MySQL installation to this global variable. If we have < 4.1.2 format
+column definitions, or records in the insert buffer, we use this
+charset-collation code for them. */
+
+ulint	data_mysql_default_charset_coll		= 99999999;
+
+dtype_t		dtype_binary_val = {DATA_BINARY, 0, 0, 0, 0, 0};
+dtype_t* 	dtype_binary 	= &dtype_binary_val;
+
+/*************************************************************************
+Determine how many bytes the first n characters of the given string occupy.
+If the string is shorter than n characters, returns the number of bytes
+the characters in the string occupy. */
+
+ulint
+dtype_get_at_most_n_mbchars(
+/*========================*/
+					/* out: length of the prefix,
+					in bytes */
+	const dtype_t*	dtype,		/* in: data type */
+	ulint		prefix_len,	/* in: length of the requested
+					prefix, in characters, multiplied by
+					dtype_get_mbmaxlen(dtype) */
+	ulint		data_len,	/* in: length of str (in bytes) */
+	const char*	str)		/* in: the string whose prefix
+					length is being determined */
+{
+#ifndef UNIV_HOTBACKUP
+	ut_a(data_len != UNIV_SQL_NULL);
+	ut_ad(!dtype->mbmaxlen || !(prefix_len % dtype->mbmaxlen));
+
+	if (dtype->mbminlen != dtype->mbmaxlen) {
+		ut_a(!(prefix_len % dtype->mbmaxlen));
+		return(innobase_get_at_most_n_mbchars(
+				dtype_get_charset_coll(dtype->prtype),
+				prefix_len, data_len, str));
+	}
+
+	if (prefix_len < data_len) {
+
+		return(prefix_len);
+
+	}
+
+	return(data_len);
+#else /* UNIV_HOTBACKUP */
+	/* This function depends on MySQL code that is not included in
+	InnoDB Hot Backup builds.  Besides, this function should never
+	be called in InnoDB Hot Backup. */
+	ut_error;
+#endif /* UNIV_HOTBACKUP */
+}
+
+/*************************************************************************
+Checks if a data main type is a string type. Also a BLOB is considered a
+string type. */
+
+ibool
+dtype_is_string_type(
+/*=================*/
+			/* out: TRUE if string type */
+	ulint	mtype)	/* in: InnoDB main data type code: DATA_CHAR, ... */
+{
+ 	if (mtype <= DATA_BLOB
+	    || mtype == DATA_MYSQL
+	    || mtype == DATA_VARMYSQL) {
+
+		return(TRUE);
+	}
+
+	return(FALSE);
+}
+
+/*************************************************************************
+Checks if a type is a binary string type. Note that for tables created with
+< 4.0.14, we do not know if a DATA_BLOB column is a BLOB or a TEXT column. For
+those DATA_BLOB columns this function currently returns FALSE. */
+
+ibool
+dtype_is_binary_string_type(
+/*========================*/
+			/* out: TRUE if binary string type */
+	ulint	mtype,	/* in: main data type */
+	ulint	prtype)	/* in: precise type */
+{
+        if ((mtype == DATA_FIXBINARY)
+	    || (mtype == DATA_BINARY)
+	    || (mtype == DATA_BLOB && (prtype & DATA_BINARY_TYPE))) {
+
+		return(TRUE);
+	}
+
+	return(FALSE);
+}
+
+/*************************************************************************
+Checks if a type is a non-binary string type. That is, dtype_is_string_type is
+TRUE and dtype_is_binary_string_type is FALSE. Note that for tables created
+with < 4.0.14, we do not know if a DATA_BLOB column is a BLOB or a TEXT column.
+For those DATA_BLOB columns this function currently returns TRUE. */
+
+ibool
+dtype_is_non_binary_string_type(
+/*============================*/
+			/* out: TRUE if non-binary string type */
+	ulint	mtype,	/* in: main data type */
+	ulint	prtype)	/* in: precise type */
+{
+	if (dtype_is_string_type(mtype) == TRUE
+	    && dtype_is_binary_string_type(mtype, prtype) == FALSE) {
+		
+		return(TRUE);
+	}
+
+	return(FALSE);
+}
+
+/*************************************************************************
+Gets the MySQL charset-collation code for MySQL string types. */
+
+ulint
+dtype_get_charset_coll_noninline(
+/*=============================*/
+	ulint	prtype)	/* in: precise data type */
+{
+	return(dtype_get_charset_coll(prtype));
+}
+
+/*************************************************************************
+Forms a precise type from the < 4.1.2 format precise type plus the
+charset-collation code. */
+
+ulint
+dtype_form_prtype(
+/*==============*/
+	ulint	old_prtype,	/* in: the MySQL type code and the flags
+				DATA_BINARY_TYPE etc. */
+	ulint	charset_coll)	/* in: MySQL charset-collation code */
+{
+	ut_a(old_prtype < 256 * 256);
+	ut_a(charset_coll < 256);
+
+	return(old_prtype + (charset_coll << 16));
+}
+
+/*************************************************************************
+Validates a data type structure. */
+
+ibool
+dtype_validate(
+/*===========*/
+				/* out: TRUE if ok */
+	dtype_t*	type)	/* in: type struct to validate */
+{
+	ut_a(type);
+	ut_a((type->mtype >= DATA_VARCHAR) && (type->mtype <= DATA_MYSQL));
+	
+	if (type->mtype == DATA_SYS) {
+		ut_a((type->prtype & DATA_MYSQL_TYPE_MASK) < DATA_N_SYS_COLS);
+	}
+
+	ut_a(type->mbminlen <= type->mbmaxlen);
+
+	return(TRUE);
+}
+
+/*************************************************************************
+Prints a data type structure. */
+
+void
+dtype_print(
+/*========*/
+	dtype_t*	type)	/* in: type */
+{
+	ulint	mtype;
+	ulint	prtype;
+	ulint	len;
+	
+	ut_a(type);
+
+	mtype = type->mtype;
+	prtype = type->prtype;
+	if (mtype == DATA_VARCHAR) {
+		fputs("DATA_VARCHAR", stderr);
+	} else if (mtype == DATA_CHAR) {
+		fputs("DATA_CHAR", stderr);
+	} else if (mtype == DATA_BINARY) {
+		fputs("DATA_BINARY", stderr);
+	} else if (mtype == DATA_INT) {
+		fputs("DATA_INT", stderr);
+	} else if (mtype == DATA_MYSQL) {
+		fputs("DATA_MYSQL", stderr);
+	} else if (mtype == DATA_SYS) {
+		fputs("DATA_SYS", stderr);
+	} else {
+		fprintf(stderr, "type %lu", (ulong) mtype);
+	}
+
+	len = type->len;
+	
+	if ((type->mtype == DATA_SYS)
+	   || (type->mtype == DATA_VARCHAR)
+	   || (type->mtype == DATA_CHAR)) {
+	  putc(' ', stderr);
+		if (prtype == DATA_ROW_ID) {
+			fputs("DATA_ROW_ID", stderr);
+			len = DATA_ROW_ID_LEN;
+		} else if (prtype == DATA_ROLL_PTR) {
+			fputs("DATA_ROLL_PTR", stderr);
+			len = DATA_ROLL_PTR_LEN;
+		} else if (prtype == DATA_TRX_ID) {
+			fputs("DATA_TRX_ID", stderr);
+			len = DATA_TRX_ID_LEN;
+		} else if (prtype == DATA_MIX_ID) {
+			fputs("DATA_MIX_ID", stderr);
+		} else if (prtype == DATA_ENGLISH) {
+			fputs("DATA_ENGLISH", stderr);
+		} else {
+			fprintf(stderr, "prtype %lu", (ulong) mtype);
+		}
+	}
+
+	fprintf(stderr, " len %lu prec %lu", (ulong) len, (ulong) type->prec);
+}
--- a/db/db0err.h
+++ b/db/db0err.h
@ -0,0 +1,44 @@
+/******************************************************
+Global error codes for the database
+
+(c) 1996 Innobase Oy
+
+Created 5/24/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef db0err_h
+#define db0err_h
+
+
+#define DB_SUCCESS		10
+
+/* The following are error codes */
+#define	DB_ERROR		11
+#define DB_OUT_OF_MEMORY	12
+#define DB_OUT_OF_FILE_SPACE	13
+#define DB_LOCK_WAIT		14
+#define DB_DEADLOCK		15
+#define DB_ROLLBACK		16
+#define DB_DUPLICATE_KEY	17
+#define DB_QUE_THR_SUSPENDED	18
+#define DB_MISSING_HISTORY	19	/* required history data has been
+					deleted due to lack of space in
+					rollback segment */
+#define DB_CLUSTER_NOT_FOUND	30
+#define DB_TABLE_NOT_FOUND	31
+#define DB_MUST_GET_MORE_FILE_SPACE 32	/* the database has to be stopped
+					and restrated with more file space */
+#define DB_TABLE_IS_BEING_USED	33
+#define DB_TOO_BIG_RECORD	34	/* a record in an index would become
+					bigger than 1/2 free space in a page
+					frame */
+					
+/* The following are partial failure codes */
+#define DB_FAIL 		1000
+#define DB_OVERFLOW 		1001
+#define DB_UNDERFLOW 		1002
+#define DB_STRONG_FAIL		1003
+#define DB_RECORD_NOT_FOUND	1500
+#define DB_END_OF_INDEX		1501
+
+#endif 
--- a/dict/Makefile.am
+++ b/dict/Makefile.am
@ -0,0 +1,25 @@
+# Copyright (C) 2000 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+# & Innobase Oy
+# 
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+include ../include/Makefile.i
+
+noinst_LIBRARIES =	libdict.a
+
+libdict_a_SOURCES =	dict0boot.c dict0crea.c dict0dict.c dict0load.c\
+			dict0mem.c
+
+EXTRA_PROGRAMS =	
--- a/dict/dict0boot.c
+++ b/dict/dict0boot.c
@ -0,0 +1,414 @@
+/******************************************************
+Data dictionary creation and booting
+
+(c) 1996 Innobase Oy
+
+Created 4/18/1996 Heikki Tuuri
+*******************************************************/
+
+#include "dict0boot.h"
+
+#ifdef UNIV_NONINL
+#include "dict0boot.ic"
+#endif
+
+#include "dict0crea.h"
+#include "btr0btr.h"
+#include "dict0load.h"
+#include "dict0load.h"
+#include "trx0trx.h"
+#include "srv0srv.h"
+#include "ibuf0ibuf.h"
+#include "buf0flu.h"
+#include "log0recv.h"
+#include "os0file.h"
+
+/**************************************************************************
+Gets a pointer to the dictionary header and x-latches its page. */
+
+dict_hdr_t*
+dict_hdr_get(
+/*=========*/
+			/* out: pointer to the dictionary header, 
+			page x-latched */
+	mtr_t*	mtr)	/* in: mtr */
+{
+	dict_hdr_t*	header;
+
+	ut_ad(mtr);
+	
+	header = DICT_HDR + buf_page_get(DICT_HDR_SPACE, DICT_HDR_PAGE_NO,
+							RW_X_LATCH, mtr);
+#ifdef UNIV_SYNC_DEBUG
+	buf_page_dbg_add_level(header, SYNC_DICT_HEADER);
+#endif /* UNIV_SYNC_DEBUG */
+	return(header);
+}
+
+/**************************************************************************
+Returns a new table, index, or tree id. */
+
+dulint
+dict_hdr_get_new_id(
+/*================*/
+			/* out: the new id */
+	ulint	type)	/* in: DICT_HDR_ROW_ID, ... */
+{
+	dict_hdr_t*	dict_hdr;
+	dulint		id;
+	mtr_t		mtr;
+
+	ut_ad((type == DICT_HDR_TABLE_ID) || (type == DICT_HDR_INDEX_ID)
+	      || (type == DICT_HDR_MIX_ID));
+
+	mtr_start(&mtr);
+
+	dict_hdr = dict_hdr_get(&mtr);
+
+	id = mtr_read_dulint(dict_hdr + type, &mtr); 
+	id = ut_dulint_add(id, 1);
+	
+	mlog_write_dulint(dict_hdr + type, id, &mtr); 
+
+	mtr_commit(&mtr);
+
+	return(id);
+}				
+
+/**************************************************************************
+Writes the current value of the row id counter to the dictionary header file
+page. */
+
+void
+dict_hdr_flush_row_id(void)
+/*=======================*/
+{
+	dict_hdr_t*	dict_hdr;
+	dulint		id;
+	mtr_t		mtr;
+
+#ifdef UNIV_SYNC_DEBUG
+	ut_ad(mutex_own(&(dict_sys->mutex)));
+#endif /* UNIV_SYNC_DEBUG */
+
+	id = dict_sys->row_id;
+
+	mtr_start(&mtr);
+
+	dict_hdr = dict_hdr_get(&mtr);
+	
+	mlog_write_dulint(dict_hdr + DICT_HDR_ROW_ID, id, &mtr); 
+
+	mtr_commit(&mtr);
+}				
+
+/*********************************************************************
+Creates the file page for the dictionary header. This function is
+called only at the database creation. */
+static
+ibool
+dict_hdr_create(
+/*============*/
+			/* out: TRUE if succeed */
+	mtr_t*	mtr)	/* in: mtr */
+{
+	dict_hdr_t*	dict_header;
+	ulint		hdr_page_no;
+	ulint		root_page_no;
+	page_t*		page;
+	
+	ut_ad(mtr);
+
+	/* Create the dictionary header file block in a new, allocated file
+	segment in the system tablespace */
+	page = fseg_create(DICT_HDR_SPACE, 0,
+				  DICT_HDR + DICT_HDR_FSEG_HEADER, mtr);
+
+	hdr_page_no = buf_frame_get_page_no(page);
+	
+	ut_a(DICT_HDR_PAGE_NO == hdr_page_no);
+
+	dict_header = dict_hdr_get(mtr);
+
+	/* Start counting row, table, index, and tree ids from
+	DICT_HDR_FIRST_ID */
+	mlog_write_dulint(dict_header + DICT_HDR_ROW_ID,
+				ut_dulint_create(0, DICT_HDR_FIRST_ID), mtr);
+
+	mlog_write_dulint(dict_header + DICT_HDR_TABLE_ID,
+				ut_dulint_create(0, DICT_HDR_FIRST_ID), mtr);
+
+	mlog_write_dulint(dict_header + DICT_HDR_INDEX_ID,
+				ut_dulint_create(0, DICT_HDR_FIRST_ID), mtr);
+
+	mlog_write_dulint(dict_header + DICT_HDR_MIX_ID,
+				ut_dulint_create(0, DICT_HDR_FIRST_ID), mtr);
+
+	/* Create the B-tree roots for the clustered indexes of the basic
+	system tables */
+
+	/*--------------------------*/	
+	root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE,
+				DICT_HDR_SPACE, DICT_TABLES_ID, FALSE, mtr);
+	if (root_page_no == FIL_NULL) {
+
+		return(FALSE);
+	}
+
+	mlog_write_ulint(dict_header + DICT_HDR_TABLES, root_page_no,
+							MLOG_4BYTES, mtr);
+	/*--------------------------*/	
+	root_page_no = btr_create(DICT_UNIQUE, DICT_HDR_SPACE,
+						DICT_TABLE_IDS_ID, FALSE, mtr);
+	if (root_page_no == FIL_NULL) {
+
+		return(FALSE);
+	}
+
+	mlog_write_ulint(dict_header + DICT_HDR_TABLE_IDS, root_page_no,
+							MLOG_4BYTES, mtr);
+	/*--------------------------*/	
+	root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE,
+				DICT_HDR_SPACE, DICT_COLUMNS_ID, FALSE, mtr);
+	if (root_page_no == FIL_NULL) {
+
+		return(FALSE);
+	}
+
+	mlog_write_ulint(dict_header + DICT_HDR_COLUMNS, root_page_no,
+							MLOG_4BYTES, mtr);
+	/*--------------------------*/	
+	root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE,
+				DICT_HDR_SPACE, DICT_INDEXES_ID, FALSE, mtr);
+	if (root_page_no == FIL_NULL) {
+
+		return(FALSE);
+	}
+
+	mlog_write_ulint(dict_header + DICT_HDR_INDEXES, root_page_no,
+							MLOG_4BYTES, mtr);
+	/*--------------------------*/	
+	root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE,
+				DICT_HDR_SPACE, DICT_FIELDS_ID, FALSE, mtr);
+	if (root_page_no == FIL_NULL) {
+
+		return(FALSE);
+	}
+
+	mlog_write_ulint(dict_header + DICT_HDR_FIELDS, root_page_no,
+							MLOG_4BYTES, mtr);
+	/*--------------------------*/	
+
+	return(TRUE);
+}
+
+/*********************************************************************
+Initializes the data dictionary memory structures when the database is
+started. This function is also called when the data dictionary is created. */
+
+void
+dict_boot(void)
+/*===========*/
+{
+	dict_table_t*	table;
+	dict_index_t*	index;
+	dict_hdr_t*	dict_hdr;
+	mtr_t		mtr;
+	ibool		success;
+
+	mtr_start(&mtr);
+	
+	/* Create the hash tables etc. */
+	dict_init();
+
+	mutex_enter(&(dict_sys->mutex));
+	
+	/* Get the dictionary header */
+	dict_hdr = dict_hdr_get(&mtr);
+
+	/* Because we only write new row ids to disk-based data structure
+	(dictionary header) when it is divisible by
+	DICT_HDR_ROW_ID_WRITE_MARGIN, in recovery we will not recover
+	the latest value of the row id counter. Therefore we advance
+	the counter at the database startup to avoid overlapping values.
+	Note that when a user after database startup first time asks for
+	a new row id, then because the counter is now divisible by
+	..._MARGIN, it will immediately be updated to the disk-based
+	header. */
+
+	dict_sys->row_id = ut_dulint_add(
+			     ut_dulint_align_up(
+				mtr_read_dulint(dict_hdr + DICT_HDR_ROW_ID,
+							&mtr),
+				DICT_HDR_ROW_ID_WRITE_MARGIN),
+			     DICT_HDR_ROW_ID_WRITE_MARGIN);
+
+	/* Insert into the dictionary cache the descriptions of the basic
+	system tables */
+	/*-------------------------*/
+	table = dict_mem_table_create("SYS_TABLES", DICT_HDR_SPACE, 8, FALSE);
+
+	dict_mem_table_add_col(table, "NAME", DATA_BINARY, 0, 0, 0);
+	dict_mem_table_add_col(table, "ID", DATA_BINARY, 0, 0, 0);
+	dict_mem_table_add_col(table, "N_COLS", DATA_INT, 0, 4, 0);
+	dict_mem_table_add_col(table, "TYPE", DATA_INT, 0, 4, 0);
+	dict_mem_table_add_col(table, "MIX_ID", DATA_BINARY, 0, 0, 0);
+	dict_mem_table_add_col(table, "MIX_LEN", DATA_INT, 0, 4, 0);
+	dict_mem_table_add_col(table, "CLUSTER_NAME", DATA_BINARY, 0, 0, 0);
+	dict_mem_table_add_col(table, "SPACE", DATA_INT, 0, 4, 0);
+
+	table->id = DICT_TABLES_ID;
+	
+	dict_table_add_to_cache(table);
+	dict_sys->sys_tables = table;
+	
+	index = dict_mem_index_create("SYS_TABLES", "CLUST_IND",
+			DICT_HDR_SPACE, DICT_UNIQUE | DICT_CLUSTERED, 1);
+
+	dict_mem_index_add_field(index, "NAME", 0, 0);
+
+	index->id = DICT_TABLES_ID;
+
+	success = dict_index_add_to_cache(table, index, mtr_read_ulint(
+			dict_hdr + DICT_HDR_TABLES, MLOG_4BYTES, &mtr));
+	ut_a(success);
+	/*-------------------------*/
+	index = dict_mem_index_create("SYS_TABLES", "ID_IND",
+			DICT_HDR_SPACE, DICT_UNIQUE, 1);
+	dict_mem_index_add_field(index, "ID", 0, 0);
+
+	index->id = DICT_TABLE_IDS_ID;
+	success = dict_index_add_to_cache(table, index, mtr_read_ulint(
+			dict_hdr + DICT_HDR_TABLE_IDS, MLOG_4BYTES, &mtr));
+	ut_a(success);
+	/*-------------------------*/
+	table = dict_mem_table_create("SYS_COLUMNS", DICT_HDR_SPACE, 7, FALSE);
+
+	dict_mem_table_add_col(table, "TABLE_ID", DATA_BINARY,0,0,0);
+	dict_mem_table_add_col(table, "POS", DATA_INT, 0, 4, 0);
+	dict_mem_table_add_col(table, "NAME", DATA_BINARY, 0, 0, 0);
+	dict_mem_table_add_col(table, "MTYPE", DATA_INT, 0, 4, 0);
+	dict_mem_table_add_col(table, "PRTYPE", DATA_INT, 0, 4, 0);
+	dict_mem_table_add_col(table, "LEN", DATA_INT, 0, 4, 0);
+	dict_mem_table_add_col(table, "PREC", DATA_INT, 0, 4, 0);
+	
+	table->id = DICT_COLUMNS_ID;
+
+	dict_table_add_to_cache(table);
+	dict_sys->sys_columns = table;
+
+	index = dict_mem_index_create("SYS_COLUMNS", "CLUST_IND",
+			DICT_HDR_SPACE, DICT_UNIQUE | DICT_CLUSTERED, 2);
+
+	dict_mem_index_add_field(index, "TABLE_ID", 0, 0);
+	dict_mem_index_add_field(index, "POS", 0, 0);
+
+	index->id = DICT_COLUMNS_ID;
+	success = dict_index_add_to_cache(table, index, mtr_read_ulint(
+			dict_hdr + DICT_HDR_COLUMNS, MLOG_4BYTES, &mtr));
+	ut_a(success);
+	/*-------------------------*/
+	table = dict_mem_table_create("SYS_INDEXES", DICT_HDR_SPACE, 7, FALSE);
+
+	dict_mem_table_add_col(table, "TABLE_ID", DATA_BINARY, 0,0,0);
+	dict_mem_table_add_col(table, "ID", DATA_BINARY, 0, 0, 0);
+	dict_mem_table_add_col(table, "NAME", DATA_BINARY, 0, 0, 0);
+	dict_mem_table_add_col(table, "N_FIELDS", DATA_INT, 0, 4, 0);
+	dict_mem_table_add_col(table, "TYPE", DATA_INT, 0, 4, 0);
+	dict_mem_table_add_col(table, "SPACE", DATA_INT, 0, 4, 0);
+	dict_mem_table_add_col(table, "PAGE_NO", DATA_INT, 0, 4, 0);
+
+	/* The '+ 2' below comes from the 2 system fields */
+#if DICT_SYS_INDEXES_PAGE_NO_FIELD != 6 + 2
+#error "DICT_SYS_INDEXES_PAGE_NO_FIELD != 6 + 2"
+#endif
+#if DICT_SYS_INDEXES_SPACE_NO_FIELD != 5 + 2
+#error "DICT_SYS_INDEXES_SPACE_NO_FIELD != 5 + 2"
+#endif
+#if DICT_SYS_INDEXES_TYPE_FIELD != 4 + 2
+#error "DICT_SYS_INDEXES_TYPE_FIELD != 4 + 2"
+#endif
+
+	table->id = DICT_INDEXES_ID;
+	dict_table_add_to_cache(table);
+	dict_sys->sys_indexes = table;
+
+	index = dict_mem_index_create("SYS_INDEXES", "CLUST_IND",
+			DICT_HDR_SPACE, DICT_UNIQUE | DICT_CLUSTERED, 2);
+
+	dict_mem_index_add_field(index, "TABLE_ID", 0, 0);
+	dict_mem_index_add_field(index, "ID", 0, 0);
+
+	index->id = DICT_INDEXES_ID;
+	success = dict_index_add_to_cache(table, index, mtr_read_ulint(
+			dict_hdr + DICT_HDR_INDEXES, MLOG_4BYTES, &mtr));
+	ut_a(success);
+	/*-------------------------*/
+	table = dict_mem_table_create("SYS_FIELDS", DICT_HDR_SPACE, 3, FALSE);
+
+	dict_mem_table_add_col(table, "INDEX_ID", DATA_BINARY, 0,0,0);
+	dict_mem_table_add_col(table, "POS", DATA_INT, 0, 4, 0);
+	dict_mem_table_add_col(table, "COL_NAME", DATA_BINARY, 0,0,0);
+
+	table->id = DICT_FIELDS_ID;
+	dict_table_add_to_cache(table);
+	dict_sys->sys_fields = table;
+
+	index = dict_mem_index_create("SYS_FIELDS", "CLUST_IND",
+			DICT_HDR_SPACE, DICT_UNIQUE | DICT_CLUSTERED, 2);
+
+	dict_mem_index_add_field(index, "INDEX_ID", 0, 0);
+	dict_mem_index_add_field(index, "POS", 0, 0);
+
+	index->id = DICT_FIELDS_ID;
+	success = dict_index_add_to_cache(table, index, mtr_read_ulint(
+			dict_hdr + DICT_HDR_FIELDS, MLOG_4BYTES, &mtr));
+	ut_a(success);
+
+	mtr_commit(&mtr);
+	/*-------------------------*/
+
+	/* Initialize the insert buffer table and index for each tablespace */
+
+	ibuf_init_at_db_start();
+
+	/* Load definitions of other indexes on system tables */
+
+	dict_load_sys_table(dict_sys->sys_tables);
+	dict_load_sys_table(dict_sys->sys_columns);
+	dict_load_sys_table(dict_sys->sys_indexes);
+	dict_load_sys_table(dict_sys->sys_fields);
+	
+	mutex_exit(&(dict_sys->mutex));
+}
+
+/*********************************************************************
+Inserts the basic system table data into themselves in the database
+creation. */
+static
+void
+dict_insert_initial_data(void)
+/*==========================*/
+{
+	/* Does nothing yet */
+}
+
+/*********************************************************************
+Creates and initializes the data dictionary at the database creation. */
+
+void
+dict_create(void)
+/*=============*/
+{
+	mtr_t	mtr;
+
+	mtr_start(&mtr);
+
+	dict_hdr_create(&mtr);
+
+	mtr_commit(&mtr);
+	
+	dict_boot();
+
+	dict_insert_initial_data();
+}
--- a/dict/dict0crea.c
+++ b/dict/dict0crea.c
--- a/dict/dict0dict.c
+++ b/dict/dict0dict.c
--- a/dict/dict0load.c
+++ b/dict/dict0load.c
--- a/dict/dict0mem.c
+++ b/dict/dict0mem.c
@ -0,0 +1,294 @@
+/**********************************************************************
+Data dictionary memory object creation
+
+(c) 1996 Innobase Oy
+
+Created 1/8/1996 Heikki Tuuri
+***********************************************************************/
+
+#include "dict0mem.h"
+
+#ifdef UNIV_NONINL
+#include "dict0mem.ic"
+#endif
+
+#include "rem0rec.h"
+#include "data0type.h"
+#include "mach0data.h"
+#include "dict0dict.h"
+#include "que0que.h"
+#include "pars0pars.h"
+#include "lock0lock.h"
+
+#define	DICT_HEAP_SIZE		100	/* initial memory heap size when
+					creating a table or index object */
+
+/**************************************************************************
+Creates a table memory object. */
+
+dict_table_t*
+dict_mem_table_create(
+/*==================*/
+				/* out, own: table object */
+	const char*	name,	/* in: table name */
+	ulint		space,	/* in: space where the clustered index of
+				the table is placed; this parameter is
+				ignored if the table is made a member of
+				a cluster */
+	ulint		n_cols,	/* in: number of columns */
+	ibool		comp)	/* in: TRUE=compact page format */
+{
+	dict_table_t*	table;
+	mem_heap_t*	heap;
+	
+	ut_ad(name);
+	ut_ad(comp == FALSE || comp == TRUE);
+
+	heap = mem_heap_create(DICT_HEAP_SIZE);
+
+	table = mem_heap_alloc(heap, sizeof(dict_table_t));
+
+	table->heap = heap;
+
+	table->type = DICT_TABLE_ORDINARY;
+	table->name = mem_heap_strdup(heap, name);
+	table->dir_path_of_temp_table = NULL;
+	table->space = space;
+	table->ibd_file_missing = FALSE;
+	table->tablespace_discarded = FALSE;
+	table->comp = comp;
+	table->n_def = 0;
+	table->n_cols = n_cols + DATA_N_SYS_COLS;
+	table->mem_fix = 0;
+
+	table->n_mysql_handles_opened = 0;
+	table->n_foreign_key_checks_running = 0;
+		
+	table->cached = FALSE;
+	
+	table->mix_id = ut_dulint_zero;
+	table->mix_len = 0;
+	
+	table->cols = mem_heap_alloc(heap, (n_cols + DATA_N_SYS_COLS)
+							* sizeof(dict_col_t));
+	UT_LIST_INIT(table->indexes);
+
+	table->auto_inc_lock = mem_heap_alloc(heap, lock_get_size());
+
+	table->query_cache_inv_trx_id = ut_dulint_zero;
+
+	UT_LIST_INIT(table->locks);
+	UT_LIST_INIT(table->foreign_list);
+	UT_LIST_INIT(table->referenced_list);
+
+	table->does_not_fit_in_memory = FALSE;
+
+	table->stat_initialized = FALSE;
+
+	table->stat_modified_counter = 0;
+	
+	mutex_create(&(table->autoinc_mutex));
+	mutex_set_level(&(table->autoinc_mutex), SYNC_DICT_AUTOINC_MUTEX);
+
+	table->autoinc_inited = FALSE;
+
+	table->magic_n = DICT_TABLE_MAGIC_N;
+	
+	return(table);
+}
+
+/**************************************************************************
+Creates a cluster memory object. */
+
+dict_table_t*
+dict_mem_cluster_create(
+/*====================*/
+				/* out, own: cluster object */
+	const char*	name,	/* in: cluster name */
+	ulint		space,	/* in: space where the clustered indexes
+				of the member tables are placed */
+	ulint		n_cols,	/* in: number of columns */
+	ulint		mix_len)/* in: length of the common key prefix in the
+				cluster */
+{
+	dict_table_t*		cluster;
+
+	/* Clustered tables cannot work with the compact record format. */
+	cluster = dict_mem_table_create(name, space, n_cols, FALSE);
+
+	cluster->type = DICT_TABLE_CLUSTER;
+	cluster->mix_len = mix_len;
+
+	return(cluster);
+}
+
+/**************************************************************************
+Declares a non-published table as a member in a cluster. */
+
+void
+dict_mem_table_make_cluster_member(
+/*===============================*/
+	dict_table_t*	table,		/* in: non-published table */
+	const char*	cluster_name)	/* in: cluster name */
+{
+	table->type = DICT_TABLE_CLUSTER_MEMBER;
+	table->cluster_name = cluster_name;
+}
+
+/**************************************************************************
+Adds a column definition to a table. */
+
+void
+dict_mem_table_add_col(
+/*===================*/
+	dict_table_t*	table,	/* in: table */
+	const char*	name,	/* in: column name */
+	ulint		mtype,	/* in: main datatype */
+	ulint		prtype,	/* in: precise type */
+	ulint		len,	/* in: length */
+	ulint		prec)	/* in: precision */
+{
+	dict_col_t*	col;
+	dtype_t*	type;
+	
+	ut_ad(table && name);
+	ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
+	
+	table->n_def++;
+
+	col = dict_table_get_nth_col(table, table->n_def - 1);	
+
+	col->ind = table->n_def - 1;
+	col->name = mem_heap_strdup(table->heap, name);
+	col->table = table;
+	col->ord_part = 0;
+
+	col->clust_pos = ULINT_UNDEFINED;
+	
+	type = dict_col_get_type(col);
+
+	dtype_set(type, mtype, prtype, len, prec);
+}
+
+/**************************************************************************
+Creates an index memory object. */
+
+dict_index_t*
+dict_mem_index_create(
+/*==================*/
+					/* out, own: index object */
+	const char*	table_name,	/* in: table name */
+	const char*	index_name,	/* in: index name */
+	ulint		space,		/* in: space where the index tree is
+					placed, ignored if the index is of
+					the clustered type */
+	ulint		type,		/* in: DICT_UNIQUE,
+					DICT_CLUSTERED, ... ORed */
+	ulint		n_fields)	/* in: number of fields */
+{
+	dict_index_t*	index;
+	mem_heap_t*	heap;
+	
+	ut_ad(table_name && index_name);
+
+	heap = mem_heap_create(DICT_HEAP_SIZE);
+	index = mem_heap_alloc(heap, sizeof(dict_index_t));
+
+	index->heap = heap;
+	
+	index->type = type;
+	index->space = space;
+	index->name = mem_heap_strdup(heap, index_name);
+	index->table_name = table_name;
+	index->table = NULL;
+	index->n_def = index->n_nullable = 0;
+	index->n_fields = n_fields;
+	index->fields = mem_heap_alloc(heap, 1 + n_fields
+						* sizeof(dict_field_t));
+					/* The '1 +' above prevents allocation
+					of an empty mem block */
+	index->stat_n_diff_key_vals = NULL;
+
+	index->cached = FALSE;
+	index->magic_n = DICT_INDEX_MAGIC_N;
+
+	return(index);
+}
+
+/**************************************************************************
+Creates and initializes a foreign constraint memory object. */
+
+dict_foreign_t*
+dict_mem_foreign_create(void)
+/*=========================*/
+				/* out, own: foreign constraint struct */
+{
+	dict_foreign_t*	foreign;
+	mem_heap_t*	heap;
+
+	heap = mem_heap_create(100);
+
+	foreign = mem_heap_alloc(heap, sizeof(dict_foreign_t));
+
+	foreign->heap = heap;
+
+	foreign->id = NULL;
+
+	foreign->type = 0;
+	foreign->foreign_table_name = NULL;
+	foreign->foreign_table = NULL;
+	foreign->foreign_col_names = NULL;
+
+	foreign->referenced_table_name = NULL;
+	foreign->referenced_table = NULL;
+	foreign->referenced_col_names = NULL;
+
+	foreign->n_fields = 0;
+
+	foreign->foreign_index = NULL;
+	foreign->referenced_index = NULL;
+
+	return(foreign);
+}
+
+/**************************************************************************
+Adds a field definition to an index. NOTE: does not take a copy
+of the column name if the field is a column. The memory occupied
+by the column name may be released only after publishing the index. */
+
+void
+dict_mem_index_add_field(
+/*=====================*/
+	dict_index_t*	index,		/* in: index */
+	const char*	name,		/* in: column name */
+	ulint		order,		/* in: order criterion; 0 means an
+					ascending order */
+	ulint		prefix_len)	/* in: 0 or the column prefix length
+					in a MySQL index like
+					INDEX (textcol(25)) */
+{
+	dict_field_t*	field;
+	
+	ut_ad(index && name);
+	ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
+	
+	index->n_def++;
+
+	field = dict_index_get_nth_field(index, index->n_def - 1);	
+
+	field->name = name;
+	field->order = order;
+
+	field->prefix_len = prefix_len;
+}
+
+/**************************************************************************
+Frees an index memory object. */
+
+void
+dict_mem_index_free(
+/*================*/
+	dict_index_t*	index)	/* in: index */
+{
+	mem_heap_free(index->heap);
+}
--- a/dyn/Makefile.am
+++ b/dyn/Makefile.am
@ -0,0 +1,24 @@
+# Copyright (C) 2000 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+# & Innobase Oy
+# 
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+include ../include/Makefile.i
+
+noinst_LIBRARIES =	libdyn.a
+
+libdyn_a_SOURCES =	dyn0dyn.c
+
+EXTRA_PROGRAMS =	
--- a/dyn/dyn0dyn.c
+++ b/dyn/dyn0dyn.c
@ -0,0 +1,48 @@
+/******************************************************
+The dynamically allocated array
+
+(c) 1996 Innobase Oy
+
+Created 2/5/1996 Heikki Tuuri
+*******************************************************/
+
+#include "dyn0dyn.h"
+#ifdef UNIV_NONINL
+#include "dyn0dyn.ic"
+#endif
+
+/****************************************************************
+Adds a new block to a dyn array. */
+
+dyn_block_t*
+dyn_array_add_block(
+/*================*/
+				/* out: created block */
+	dyn_array_t*	arr)	/* in: dyn array */
+{
+	mem_heap_t*	heap;
+	dyn_block_t*	block;
+
+	ut_ad(arr);
+	ut_ad(arr->magic_n == DYN_BLOCK_MAGIC_N);
+
+	if (arr->heap == NULL) {
+		UT_LIST_INIT(arr->base);
+		UT_LIST_ADD_FIRST(list, arr->base, arr);
+
+		arr->heap = mem_heap_create(sizeof(dyn_block_t));
+	}	
+
+	block = dyn_array_get_last_block(arr);
+	block->used = block->used | DYN_BLOCK_FULL_FLAG;
+
+	heap = arr->heap;
+
+	block = mem_heap_alloc(heap, sizeof(dyn_block_t));
+
+	block->used = 0;
+
+	UT_LIST_ADD_LAST(list, arr->base, block);
+
+	return(block);
+}
--- a/eval/Makefile.am
+++ b/eval/Makefile.am
@ -0,0 +1,24 @@
+# Copyright (C) 2000 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+# & Innobase Oy
+# 
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+include ../include/Makefile.i
+
+noinst_LIBRARIES =	libeval.a
+
+libeval_a_SOURCES =	eval0eval.c eval0proc.c
+
+EXTRA_PROGRAMS =	
--- a/eval/eval0eval.c
+++ b/eval/eval0eval.c
@ -0,0 +1,835 @@
+/******************************************************
+SQL evaluator: evaluates simple data structures, like expressions, in
+a query graph
+
+(c) 1997 Innobase Oy
+
+Created 12/29/1997 Heikki Tuuri
+*******************************************************/
+
+#include "eval0eval.h"
+
+#ifdef UNIV_NONINL
+#include "eval0eval.ic"
+#endif
+
+#include "data0data.h"
+#include "row0sel.h"
+
+/* The RND function seed */
+ulint	eval_rnd 	= 128367121;
+
+/* Dummy adress used when we should allocate a buffer of size 0 in
+the function below */
+
+byte	eval_dummy;
+
+/*********************************************************************
+Allocate a buffer from global dynamic memory for a value of a que_node.
+NOTE that this memory must be explicitly freed when the query graph is
+freed. If the node already has an allocated buffer, that buffer is freed
+here. NOTE that this is the only function where dynamic memory should be
+allocated for a query node val field. */
+
+byte*
+eval_node_alloc_val_buf(
+/*====================*/
+				/* out: pointer to allocated buffer */
+	que_node_t*	node,	/* in: query graph node; sets the val field
+				data field to point to the new buffer, and
+				len field equal to size */
+	ulint		size)	/* in: buffer size */
+{
+	dfield_t*	dfield;
+	byte*		data;
+
+	ut_ad(que_node_get_type(node) == QUE_NODE_SYMBOL
+	      || que_node_get_type(node) == QUE_NODE_FUNC);
+
+	dfield = que_node_get_val(node);
+
+	data = dfield_get_data(dfield);
+	
+	if (data && data != &eval_dummy) {
+		mem_free(data);
+	}
+
+	if (size == 0) {
+		data = &eval_dummy;
+	} else {
+		data = mem_alloc(size);
+	}
+
+	que_node_set_val_buf_size(node, size);
+
+	dfield_set_data(dfield, data, size);
+
+	return(data);
+}
+
+/*********************************************************************
+Free the buffer from global dynamic memory for a value of a que_node,
+if it has been allocated in the above function. The freeing for pushed
+column values is done in sel_col_prefetch_buf_free. */
+
+void
+eval_node_free_val_buf(
+/*===================*/
+	que_node_t*	node)	/* in: query graph node */
+{
+	dfield_t*	dfield;
+	byte*		data;
+
+	ut_ad(que_node_get_type(node) == QUE_NODE_SYMBOL
+	      || que_node_get_type(node) == QUE_NODE_FUNC);
+
+	dfield = que_node_get_val(node);
+
+	data = dfield_get_data(dfield);
+	
+	if (que_node_get_val_buf_size(node) > 0) {
+		ut_a(data);
+	
+		mem_free(data);
+	}
+}
+
+/*********************************************************************
+Evaluates a comparison node. */
+
+ibool
+eval_cmp(
+/*=====*/
+					/* out: the result of the comparison */
+	func_node_t*	cmp_node)	/* in: comparison node */
+{
+	que_node_t*	arg1;
+	que_node_t*	arg2;
+	int		res;
+	ibool		val;
+	int		func;
+	
+	ut_ad(que_node_get_type(cmp_node) == QUE_NODE_FUNC);
+
+	arg1 = cmp_node->args;
+	arg2 = que_node_get_next(arg1);
+
+	res = cmp_dfield_dfield(que_node_get_val(arg1),
+						que_node_get_val(arg2));
+	val = TRUE;
+
+	func = cmp_node->func;
+			
+	if (func == '=') {
+		if (res != 0) {
+			val = FALSE;
+		}
+	} else if (func == '<') {
+		if (res != -1) {
+			val = FALSE;
+		}
+	} else if (func == PARS_LE_TOKEN) {
+		if (res == 1) {
+			val = FALSE;
+		}
+	} else if (func == PARS_NE_TOKEN) {
+		if (res == 0) {
+			val = FALSE;
+		}
+	} else if (func == PARS_GE_TOKEN) {
+		if (res == -1) {
+			val = FALSE;
+		}
+	} else {
+		ut_ad(func == '>');
+		
+		if (res != 1) {
+			val = FALSE;
+		}
+	}
+
+	eval_node_set_ibool_val(cmp_node, val);
+
+	return(val);
+}
+
+/*********************************************************************
+Evaluates a logical operation node. */
+UNIV_INLINE
+void
+eval_logical(
+/*=========*/
+	func_node_t*	logical_node)	/* in: logical operation node */
+{
+	que_node_t*	arg1;
+	que_node_t*	arg2;
+	ibool		val1;
+	ibool		val2 = 0; /* remove warning */
+	ibool		val = 0;  /* remove warning */
+	int		func;
+
+	ut_ad(que_node_get_type(logical_node) == QUE_NODE_FUNC);
+
+	arg1 = logical_node->args;
+	arg2 = que_node_get_next(arg1); /* arg2 is NULL if func is 'NOT' */
+
+	val1 = eval_node_get_ibool_val(arg1);
+
+	if (arg2) {
+		val2 = eval_node_get_ibool_val(arg2);
+	}
+
+	func = logical_node->func;
+
+	if (func == PARS_AND_TOKEN) {
+		val = val1 & val2;
+	} else if (func == PARS_OR_TOKEN) {
+		val = val1 | val2;
+	} else if (func == PARS_NOT_TOKEN) {
+		val = TRUE - val1;
+	} else {
+		ut_error;
+	}
+
+	eval_node_set_ibool_val(logical_node, val);
+}
+
+/*********************************************************************
+Evaluates an arithmetic operation node. */
+UNIV_INLINE
+void
+eval_arith(
+/*=======*/
+	func_node_t*	arith_node)	/* in: arithmetic operation node */
+{
+	que_node_t*	arg1;
+	que_node_t*	arg2;
+	lint		val1;
+	lint		val2 = 0; /* remove warning */
+	lint		val;
+	int		func;
+
+	ut_ad(que_node_get_type(arith_node) == QUE_NODE_FUNC);
+
+	arg1 = arith_node->args;
+	arg2 = que_node_get_next(arg1); /* arg2 is NULL if func is unary '-' */
+
+	val1 = eval_node_get_int_val(arg1);
+
+	if (arg2) {
+		val2 = eval_node_get_int_val(arg2);
+	}
+
+	func = arith_node->func;
+
+	if (func == '+') {
+		val = val1 + val2;
+	} else if ((func == '-') && arg2) {
+		val = val1 - val2;
+	} else if (func == '-') {
+		val = -val1;
+	} else if (func == '*') {
+		val = val1 * val2;
+	} else {
+		ut_ad(func == '/');
+		val = val1 / val2;
+	}
+
+	eval_node_set_int_val(arith_node, val);
+}
+
+/*********************************************************************
+Evaluates an aggregate operation node. */
+UNIV_INLINE
+void
+eval_aggregate(
+/*===========*/
+	func_node_t*	node)	/* in: aggregate operation node */
+{
+	que_node_t*	arg;
+	lint		val;
+	lint		arg_val;
+	int		func;
+
+	ut_ad(que_node_get_type(node) == QUE_NODE_FUNC);
+
+	val = eval_node_get_int_val(node);
+
+	func = node->func;
+
+	if (func == PARS_COUNT_TOKEN) {
+
+		val = val + 1;
+	} else {
+		ut_ad(func == PARS_SUM_TOKEN);
+
+		arg = node->args;
+		arg_val = eval_node_get_int_val(arg);
+
+		val = val + arg_val;
+	}
+	
+	eval_node_set_int_val(node, val);
+}
+
+/*********************************************************************
+Evaluates a predefined function node where the function is not relevant
+in benchmarks. */
+static
+void
+eval_predefined_2(
+/*==============*/
+	func_node_t*	func_node)	/* in: predefined function node */
+{
+	que_node_t*	arg;
+	que_node_t*	arg1;
+	que_node_t*	arg2 = 0; /* remove warning (??? bug ???) */
+	lint		int_val;
+	byte*		data;
+	ulint		len1;
+	ulint		len2;
+	int		func;
+	ulint		i;
+
+	ut_ad(que_node_get_type(func_node) == QUE_NODE_FUNC);
+
+	arg1 = func_node->args;
+
+	if (arg1) {
+		arg2 = que_node_get_next(arg1);
+	}
+
+	func = func_node->func;
+
+	if (func == PARS_PRINTF_TOKEN) {
+
+		arg = arg1;
+	
+		while (arg) {
+			dfield_print(que_node_get_val(arg));
+
+			arg = que_node_get_next(arg);
+		}
+
+		putc('\n', stderr);
+		
+	} else if (func == PARS_ASSERT_TOKEN) {
+
+		if (!eval_node_get_ibool_val(arg1)) {
+			fputs("SQL assertion fails in a stored procedure!\n",
+				stderr);
+		}
+ 
+		ut_a(eval_node_get_ibool_val(arg1));
+		
+		/* This function, or more precisely, a debug procedure,
+		returns no value */
+
+	} else if (func == PARS_RND_TOKEN) {
+
+		len1 = (ulint)eval_node_get_int_val(arg1);
+		len2 = (ulint)eval_node_get_int_val(arg2);
+
+		ut_ad(len2 >= len1);
+
+		if (len2 > len1) {		
+			int_val = (lint)(len1 +
+					(eval_rnd % (len2 - len1 + 1)));
+		} else {
+			int_val = (lint)len1;
+		}
+
+		eval_rnd = ut_rnd_gen_next_ulint(eval_rnd);
+
+		eval_node_set_int_val(func_node, int_val);
+
+	} else if (func == PARS_RND_STR_TOKEN) {
+
+		len1 = (ulint)eval_node_get_int_val(arg1);
+
+		data = eval_node_ensure_val_buf(func_node, len1);
+
+		for (i = 0; i < len1; i++) {
+			data[i] = (byte)(97 + (eval_rnd % 3));
+
+			eval_rnd = ut_rnd_gen_next_ulint(eval_rnd);
+		}
+	} else {
+		ut_error;
+	}
+}
+
+/*********************************************************************
+Evaluates a notfound-function node. */
+UNIV_INLINE
+void
+eval_notfound(
+/*==========*/
+	func_node_t*	func_node)	/* in: function node */
+{
+	que_node_t*	arg1;
+	que_node_t*	arg2;
+	sym_node_t*	cursor;
+	sel_node_t*	sel_node;
+	ibool		ibool_val;
+
+	arg1 = func_node->args;
+	arg2 = que_node_get_next(arg1);
+
+	ut_ad(func_node->func == PARS_NOTFOUND_TOKEN);
+
+	cursor = arg1;
+
+	ut_ad(que_node_get_type(cursor) == QUE_NODE_SYMBOL);
+
+	if (cursor->token_type == SYM_LIT) {
+		
+		ut_ad(ut_memcmp(dfield_get_data(que_node_get_val(cursor)),
+							"SQL", 3) == 0);
+
+		sel_node = cursor->sym_table->query_graph->last_sel_node;
+	} else {
+		sel_node = cursor->alias->cursor_def;
+	}
+
+	if (sel_node->state == SEL_NODE_NO_MORE_ROWS) {
+		ibool_val = TRUE;
+	} else {
+		ibool_val = FALSE;
+	}
+
+	eval_node_set_ibool_val(func_node, ibool_val);
+}
+
+/*********************************************************************
+Evaluates a substr-function node. */
+UNIV_INLINE
+void
+eval_substr(
+/*========*/
+	func_node_t*	func_node)	/* in: function node */
+{
+	que_node_t*	arg1;
+	que_node_t*	arg2;
+	que_node_t*	arg3;
+	dfield_t*	dfield;
+	byte*		str1;
+	ulint		len1;
+	ulint		len2;
+
+	arg1 = func_node->args;
+	arg2 = que_node_get_next(arg1);
+
+	ut_ad(func_node->func == PARS_SUBSTR_TOKEN);
+
+	arg3 = que_node_get_next(arg2);
+
+	str1 = dfield_get_data(que_node_get_val(arg1));
+		
+	len1 = (ulint)eval_node_get_int_val(arg2);
+	len2 = (ulint)eval_node_get_int_val(arg3);
+
+	dfield = que_node_get_val(func_node);
+
+	dfield_set_data(dfield, str1 + len1, len2);
+}
+
+/*********************************************************************
+Evaluates a replstr-procedure node. */
+static
+void
+eval_replstr(
+/*=========*/
+	func_node_t*	func_node)	/* in: function node */
+{
+	que_node_t*	arg1;
+	que_node_t*	arg2;
+	que_node_t*	arg3;
+	que_node_t*	arg4;
+	byte*		str1;
+	byte*		str2;
+	ulint		len1;
+	ulint		len2;
+
+	arg1 = func_node->args;
+	arg2 = que_node_get_next(arg1);
+
+	ut_ad(que_node_get_type(arg1) == QUE_NODE_SYMBOL);
+
+	arg3 = que_node_get_next(arg2);
+	arg4 = que_node_get_next(arg3);
+
+	str1 = dfield_get_data(que_node_get_val(arg1));
+	str2 = dfield_get_data(que_node_get_val(arg2));
+
+	len1 = (ulint)eval_node_get_int_val(arg3);
+	len2 = (ulint)eval_node_get_int_val(arg4);
+
+	if ((dfield_get_len(que_node_get_val(arg1)) < len1 + len2)
+			|| (dfield_get_len(que_node_get_val(arg2)) < len2)) {
+
+		ut_error;
+	}
+
+	ut_memcpy(str1 + len1, str2, len2);
+}
+		
+/*********************************************************************
+Evaluates an instr-function node. */
+static
+void
+eval_instr(
+/*=======*/
+	func_node_t*	func_node)	/* in: function node */
+{
+	que_node_t*	arg1;
+	que_node_t*	arg2;
+	dfield_t*	dfield1;
+	dfield_t*	dfield2;
+	lint		int_val;
+	byte*		str1;
+	byte*		str2;
+	byte		match_char;
+	ulint		len1;
+	ulint		len2;
+	ulint		i;
+	ulint		j;
+
+	arg1 = func_node->args;
+	arg2 = que_node_get_next(arg1);
+
+	dfield1 = que_node_get_val(arg1);
+	dfield2 = que_node_get_val(arg2);
+	
+	str1 = dfield_get_data(dfield1);
+	str2 = dfield_get_data(dfield2);
+
+	len1 = dfield_get_len(dfield1);
+	len2 = dfield_get_len(dfield2);
+
+	if (len2 == 0) {
+		ut_error;
+	}
+
+	match_char = str2[0];
+
+	for (i = 0; i < len1; i++) {
+		/* In this outer loop, the number of matched characters is 0 */
+
+		if (str1[i] == match_char) {
+
+			if (i + len2 > len1) {
+
+				break;
+			}
+
+			for (j = 1;; j++) {
+				/* We have already matched j characters */
+
+				if (j == len2) {
+					int_val = i + 1;
+
+					goto match_found;
+				}
+
+				if (str1[i + j] != str2[j]) {
+
+					break;
+				}
+			}
+		}
+	}
+	
+	int_val = 0;
+
+match_found:
+	eval_node_set_int_val(func_node, int_val);
+}
+
+/*********************************************************************
+Evaluates a predefined function node. */
+UNIV_INLINE
+void
+eval_binary_to_number(
+/*==================*/
+	func_node_t*	func_node)	/* in: function node */
+{
+	que_node_t*	arg1;
+	dfield_t*	dfield;
+	byte*		str1;
+	byte*		str2;
+	ulint		len1;
+	ulint		int_val;
+
+	arg1 = func_node->args;
+
+	dfield = que_node_get_val(arg1);
+
+	str1 = dfield_get_data(dfield);
+	len1 = dfield_get_len(dfield);
+
+ 	if (len1 > 4) {
+		ut_error;
+	}
+
+	if (len1 == 4) {
+		str2 = str1;
+	} else {
+		int_val = 0;
+		str2 = (byte*)&int_val;
+			
+		ut_memcpy(str2 + (4 - len1), str1, len1);
+	}
+
+	eval_node_copy_and_alloc_val(func_node, str2, 4);
+}
+		
+/*********************************************************************
+Evaluates a predefined function node. */
+static
+void
+eval_concat(
+/*========*/
+	func_node_t*	func_node)	/* in: function node */
+{
+	que_node_t*	arg;
+	dfield_t*	dfield;
+	byte*		data;
+	ulint		len;
+	ulint		len1;
+
+	arg = func_node->args;
+	len = 0;
+
+	while (arg) {
+		len1 = dfield_get_len(que_node_get_val(arg));
+
+		len += len1;
+
+		arg = que_node_get_next(arg);
+	}
+
+	data = eval_node_ensure_val_buf(func_node, len);
+
+	arg = func_node->args;
+	len = 0;
+
+	while (arg) {
+		dfield = que_node_get_val(arg);
+		len1 = dfield_get_len(dfield);
+
+		ut_memcpy(data + len, dfield_get_data(dfield), len1);
+
+		len += len1;
+
+		arg = que_node_get_next(arg);
+	}
+}
+
+/*********************************************************************
+Evaluates a predefined function node. If the first argument is an integer,
+this function looks at the second argument which is the integer length in
+bytes, and converts the integer to a VARCHAR.
+If the first argument is of some other type, this function converts it to
+BINARY. */
+UNIV_INLINE
+void
+eval_to_binary(
+/*===========*/
+	func_node_t*	func_node)	/* in: function node */
+{
+	que_node_t*	arg1;
+	que_node_t*	arg2;
+	dfield_t*	dfield;
+	byte*		str1;
+	ulint		len;
+	ulint		len1;
+
+	arg1 = func_node->args;
+
+	str1 = dfield_get_data(que_node_get_val(arg1));
+
+	if (dtype_get_mtype(que_node_get_data_type(arg1)) != DATA_INT) {
+
+		len = dfield_get_len(que_node_get_val(arg1));
+
+		dfield = que_node_get_val(func_node);
+
+		dfield_set_data(dfield, str1, len);
+
+		return;
+	}
+
+	arg2 = que_node_get_next(arg1);
+	
+	len1 = (ulint)eval_node_get_int_val(arg2);
+
+	if (len1 > 4) {
+
+		ut_error;
+	}
+		
+	dfield = que_node_get_val(func_node);
+
+	dfield_set_data(dfield, str1 + (4 - len1), len1);
+}
+
+/*********************************************************************
+Evaluates a predefined function node. */
+UNIV_INLINE
+void
+eval_predefined(
+/*============*/
+	func_node_t*	func_node)	/* in: function node */
+{
+	que_node_t*	arg1;
+	lint		int_val;
+	byte*		data;
+	int		func;
+
+	func = func_node->func;
+	
+	arg1 = func_node->args;
+
+	if (func == PARS_LENGTH_TOKEN) {
+
+		int_val = (lint)dfield_get_len(que_node_get_val(arg1));
+
+	} else if (func == PARS_TO_CHAR_TOKEN) {
+
+		/* Convert number to character string as a
+		signed decimal integer. */
+
+		ulint	uint_val;
+		int	int_len;
+
+		int_val = eval_node_get_int_val(arg1);
+
+		/* Determine the length of the string. */
+
+		if (int_val == 0) {
+			int_len = 1; /* the number 0 occupies 1 byte */
+		} else {
+			int_len = 0;
+			if (int_val < 0) {
+				uint_val = ((ulint) -int_val - 1) + 1;
+				int_len++; /* reserve space for minus sign */
+			} else {
+				uint_val = (ulint) int_val;
+			}
+			for (; uint_val > 0; int_len++) {
+				uint_val /= 10;
+			}
+		}
+
+		/* allocate the string */
+		data = eval_node_ensure_val_buf(func_node, int_len + 1);
+
+		/* add terminating NUL character */
+		data[int_len] = 0;
+
+		/* convert the number */
+
+		if (int_val == 0) {
+			data[0] = '0';
+		} else {
+			int tmp;
+			if (int_val < 0) {
+				data[0] = '-'; /* preceding minus sign */
+				uint_val = ((ulint) -int_val - 1) + 1;
+			} else {
+				uint_val = (ulint) int_val;
+			}
+			for (tmp = int_len; uint_val > 0; uint_val /= 10) {
+				data[--tmp] = (byte) ('0' + (byte)(uint_val % 10));
+			}
+		}
+
+		dfield_set_len((dfield_t*) que_node_get_val(func_node),
+			int_len);
+
+		return;
+
+	} else if (func == PARS_TO_NUMBER_TOKEN) {
+
+		int_val = atoi((char*)
+			dfield_get_data(que_node_get_val(arg1)));
+
+	} else if (func == PARS_SYSDATE_TOKEN) {
+		int_val = (lint)ut_time();
+	} else {
+		eval_predefined_2(func_node);
+
+		return;
+	}
+
+	eval_node_set_int_val(func_node, int_val); 
+}
+
+/*********************************************************************
+Evaluates a function node. */
+
+void
+eval_func(
+/*======*/
+	func_node_t*	func_node)	/* in: function node */
+{
+	que_node_t*	arg;
+	ulint		class;
+	ulint		func;
+
+	ut_ad(que_node_get_type(func_node) == QUE_NODE_FUNC);
+
+	class = func_node->class;
+	func = func_node->func;
+
+	arg = func_node->args;
+
+	/* Evaluate first the argument list */
+	while (arg) {
+		eval_exp(arg);
+
+		/* The functions are not defined for SQL null argument
+		values, except for eval_cmp and notfound */
+		
+		if ((dfield_get_len(que_node_get_val(arg)) == UNIV_SQL_NULL)
+					&& (class != PARS_FUNC_CMP)
+					&& (func != PARS_NOTFOUND_TOKEN)
+					&& (func != PARS_PRINTF_TOKEN)) {
+			ut_error;
+		}
+
+		arg = que_node_get_next(arg);
+	}
+
+	if (class == PARS_FUNC_CMP) {
+		eval_cmp(func_node);
+	} else if (class == PARS_FUNC_ARITH) {
+		eval_arith(func_node);
+	} else if (class == PARS_FUNC_AGGREGATE) {
+		eval_aggregate(func_node);
+	} else if (class == PARS_FUNC_PREDEFINED) {
+
+		if (func == PARS_NOTFOUND_TOKEN) {
+			eval_notfound(func_node);
+		} else if (func == PARS_SUBSTR_TOKEN) {
+			eval_substr(func_node);
+		} else if (func == PARS_REPLSTR_TOKEN) {
+			eval_replstr(func_node);
+		} else if (func == PARS_INSTR_TOKEN) {
+			eval_instr(func_node);
+		} else if (func == PARS_BINARY_TO_NUMBER_TOKEN) {
+			eval_binary_to_number(func_node);
+		} else if (func == PARS_CONCAT_TOKEN) {
+			eval_concat(func_node);
+		} else if (func == PARS_TO_BINARY_TOKEN) {
+			eval_to_binary(func_node);
+		} else {
+			eval_predefined(func_node);
+		}
+	} else {
+		ut_ad(class == PARS_FUNC_LOGICAL);
+
+		eval_logical(func_node);
+	}
+}
--- a/eval/eval0proc.c
+++ b/eval/eval0proc.c
@ -0,0 +1,245 @@
+/******************************************************
+Executes SQL stored procedures and their control structures
+
+(c) 1998 Innobase Oy
+
+Created 1/20/1998 Heikki Tuuri
+*******************************************************/
+
+#include "eval0proc.h"
+
+#ifdef UNIV_NONINL
+#include "eval0proc.ic"
+#endif
+
+/**************************************************************************
+Performs an execution step of an if-statement node. */
+
+que_thr_t*
+if_step(
+/*====*/
+				/* out: query thread to run next or NULL */
+	que_thr_t*	thr)	/* in: query thread */
+{
+	if_node_t*	node;
+	elsif_node_t*	elsif_node;
+
+	ut_ad(thr);
+	
+	node = thr->run_node;
+	ut_ad(que_node_get_type(node) == QUE_NODE_IF);
+
+	if (thr->prev_node == que_node_get_parent(node)) {
+
+		/* Evaluate the condition */
+
+		eval_exp(node->cond);
+
+		if (eval_node_get_ibool_val(node->cond)) {
+
+			/* The condition evaluated to TRUE: start execution
+			from the first statement in the statement list */
+
+			thr->run_node = node->stat_list;
+
+		} else if (node->else_part) {
+			thr->run_node = node->else_part;
+
+		} else if (node->elsif_list) {
+			elsif_node = node->elsif_list;
+
+			for (;;) {
+				eval_exp(elsif_node->cond);
+
+				if (eval_node_get_ibool_val(elsif_node->cond)) {
+
+					/* The condition evaluated to TRUE:
+					start execution from the first
+					statement in the statement list */
+
+					thr->run_node = elsif_node->stat_list;
+
+					break;
+				}
+
+				elsif_node = que_node_get_next(elsif_node);
+
+				if (elsif_node == NULL) {
+					thr->run_node = NULL;
+
+					break;
+				}
+			}
+		} else {
+			thr->run_node = NULL;
+		}
+	} else {
+		/* Move to the next statement */
+		ut_ad(que_node_get_next(thr->prev_node) == NULL);
+
+		thr->run_node = NULL;
+	}
+
+	if (thr->run_node == NULL) {
+		thr->run_node = que_node_get_parent(node);
+	}
+	
+	return(thr);
+} 
+
+/**************************************************************************
+Performs an execution step of a while-statement node. */
+
+que_thr_t*
+while_step(
+/*=======*/
+				/* out: query thread to run next or NULL */
+	que_thr_t*	thr)	/* in: query thread */
+{
+	while_node_t*	node;
+
+	ut_ad(thr);
+	
+	node = thr->run_node;
+	ut_ad(que_node_get_type(node) == QUE_NODE_WHILE);
+
+	ut_ad((thr->prev_node == que_node_get_parent(node))
+			|| (que_node_get_next(thr->prev_node) == NULL));
+
+	/* Evaluate the condition */
+
+	eval_exp(node->cond);
+
+	if (eval_node_get_ibool_val(node->cond)) {
+
+		/* The condition evaluated to TRUE: start execution
+		from the first statement in the statement list */
+
+		thr->run_node = node->stat_list;
+	} else {
+		thr->run_node = que_node_get_parent(node);
+	}
+
+	return(thr);
+} 
+
+/**************************************************************************
+Performs an execution step of an assignment statement node. */
+
+que_thr_t*
+assign_step(
+/*========*/
+				/* out: query thread to run next or NULL */
+	que_thr_t*	thr)	/* in: query thread */
+{
+	assign_node_t*	node;
+
+	ut_ad(thr);
+	
+	node = thr->run_node;
+	ut_ad(que_node_get_type(node) == QUE_NODE_ASSIGNMENT);
+
+	/* Evaluate the value to assign */
+
+	eval_exp(node->val);
+
+	eval_node_copy_val(node->var->alias, node->val);
+	
+	thr->run_node = que_node_get_parent(node);
+
+	return(thr);
+} 
+
+/**************************************************************************
+Performs an execution step of a for-loop node. */
+
+que_thr_t*
+for_step(
+/*=====*/
+				/* out: query thread to run next or NULL */
+	que_thr_t*	thr)	/* in: query thread */
+{
+	for_node_t*	node;
+	que_node_t*	parent;
+	lint		loop_var_value;
+
+	ut_ad(thr);
+	
+	node = thr->run_node;
+	
+	ut_ad(que_node_get_type(node) == QUE_NODE_FOR);
+
+	parent = que_node_get_parent(node);
+
+	if (thr->prev_node != parent) {
+
+		/* Move to the next statement */
+		thr->run_node = que_node_get_next(thr->prev_node);
+
+		if (thr->run_node != NULL) {
+
+			return(thr);
+		}
+
+		/* Increment the value of loop_var */
+		
+		loop_var_value = 1 + eval_node_get_int_val(node->loop_var);
+	} else {
+		/* Initialize the loop */
+
+		eval_exp(node->loop_start_limit);
+		eval_exp(node->loop_end_limit);
+
+		loop_var_value = eval_node_get_int_val(node->loop_start_limit);
+
+		node->loop_end_value = eval_node_get_int_val(
+							node->loop_end_limit);
+	}
+
+	/* Check if we should do another loop */
+
+	if (loop_var_value > node->loop_end_value) {
+
+		/* Enough loops done */
+
+		thr->run_node = parent;
+	} else {
+		eval_node_set_int_val(node->loop_var, loop_var_value);
+
+		thr->run_node = node->stat_list;
+	}
+
+	return(thr);
+} 
+
+/**************************************************************************
+Performs an execution step of a return-statement node. */
+
+que_thr_t*
+return_step(
+/*========*/
+				/* out: query thread to run next or NULL */
+	que_thr_t*	thr)	/* in: query thread */
+{
+	return_node_t*	node;
+	que_node_t*	parent;
+
+	ut_ad(thr);
+	
+	node = thr->run_node;
+	
+	ut_ad(que_node_get_type(node) == QUE_NODE_RETURN);
+
+	parent = node;
+
+	while (que_node_get_type(parent) != QUE_NODE_PROC) {
+
+		parent = que_node_get_parent(parent);
+	}
+
+	ut_a(parent);
+
+	thr->run_node = que_node_get_parent(parent);
+
+	return(thr);
+}
--- a/export.sh
+++ b/export.sh
@ -0,0 +1,23 @@
+#!/bin/bash
+#
+# export current working directory in a format suitable for sending to
+# MySQL as a snapshot.
+
+rm -rf to-mysql
+svn export . to-mysql
+cd to-mysql
+
+mkdir innobase
+mv * innobase
+mkdir -p sql mysql-test/t mysql-test/r mysql-test/include
+cd innobase
+
+mv handler/* ../sql
+rmdir handler
+
+mv mysql-test/*.test mysql-test/*.opt ../mysql-test/t
+mv mysql-test/*.result ../mysql-test/r
+mv mysql-test/*.inc ../mysql-test/include
+rmdir mysql-test
+
+rm setup.sh export.sh
--- a/fil/Makefile.am
+++ b/fil/Makefile.am
@ -0,0 +1,24 @@
+# Copyright (C) 2000 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+# & Innobase Oy
+# 
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+include ../include/Makefile.i
+
+noinst_LIBRARIES =	libfil.a
+
+libfil_a_SOURCES =	fil0fil.c
+
+EXTRA_PROGRAMS =	
--- a/fil/fil0fil.c
+++ b/fil/fil0fil.c
--- a/fsp/Makefile.am
+++ b/fsp/Makefile.am
@ -0,0 +1,25 @@
+# Copyright (C) 2000 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+# & Innobase Oy
+# 
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+
+include ../include/Makefile.i
+
+noinst_LIBRARIES =	libfsp.a
+
+libfsp_a_SOURCES =	fsp0fsp.c
+
+EXTRA_PROGRAMS =	
--- a/fsp/fsp0fsp.c
+++ b/fsp/fsp0fsp.c
--- a/fut/Makefile.am
+++ b/fut/Makefile.am
@ -0,0 +1,24 @@
+# Copyright (C) 2000 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+# & Innobase Oy
+# 
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+include ../include/Makefile.i
+
+noinst_LIBRARIES =	libfut.a
+
+libfut_a_SOURCES =	fut0fut.c fut0lst.c
+
+EXTRA_PROGRAMS =	
--- a/fut/fut0fut.c
+++ b/fut/fut0fut.c
@ -0,0 +1,14 @@
+/**********************************************************************
+File-based utilities
+
+(c) 1995 Innobase Oy
+
+Created 12/13/1995 Heikki Tuuri
+***********************************************************************/
+
+#include "fut0fut.h"
+
+#ifdef UNIV_NONINL
+#include "fut0fut.ic"
+#endif
+
--- a/fut/fut0lst.c
+++ b/fut/fut0lst.c
@ -0,0 +1,518 @@
+/**********************************************************************
+File-based list utilities
+
+(c) 1995 Innobase Oy
+
+Created 11/28/1995 Heikki Tuuri
+***********************************************************************/
+
+#include "fut0lst.h"
+
+#ifdef UNIV_NONINL
+#include "fut0lst.ic"
+#endif
+
+#include "buf0buf.h"
+
+
+/************************************************************************
+Adds a node to an empty list. */
+static
+void
+flst_add_to_empty(
+/*==============*/
+	flst_base_node_t*	base,	/* in: pointer to base node of
+					empty list */
+	flst_node_t*		node,	/* in: node to add */
+	mtr_t*			mtr)	/* in: mini-transaction handle */
+{
+	ulint		space;
+	fil_addr_t	node_addr;
+	ulint		len;
+
+	ut_ad(mtr && base && node);
+	ut_ad(base != node);
+	ut_ad(mtr_memo_contains(mtr, buf_block_align(base),
+						MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr_memo_contains(mtr, buf_block_align(node),
+						MTR_MEMO_PAGE_X_FIX));
+	len = flst_get_len(base, mtr);
+	ut_a(len == 0);
+	
+	buf_ptr_get_fsp_addr(node, &space, &node_addr);
+
+	/* Update first and last fields of base node */
+	flst_write_addr(base + FLST_FIRST, node_addr, mtr);
+	flst_write_addr(base + FLST_LAST, node_addr, mtr);
+
+	/* Set prev and next fields of node to add */
+	flst_write_addr(node + FLST_PREV, fil_addr_null, mtr);
+	flst_write_addr(node + FLST_NEXT, fil_addr_null, mtr);
+
+	/* Update len of base node */
+	mlog_write_ulint(base + FLST_LEN, len + 1, MLOG_4BYTES, mtr); 
+}
+
+/************************************************************************
+Adds a node as the last node in a list. */
+
+void
+flst_add_last(
+/*==========*/
+	flst_base_node_t*	base,	/* in: pointer to base node of list */
+	flst_node_t*		node,	/* in: node to add */
+	mtr_t*			mtr)	/* in: mini-transaction handle */
+{
+	ulint		space;
+	fil_addr_t	node_addr;
+	ulint		len;
+	fil_addr_t	last_addr;
+	flst_node_t*	last_node;
+	
+	ut_ad(mtr && base && node);
+	ut_ad(base != node);
+	ut_ad(mtr_memo_contains(mtr, buf_block_align(base),
+						MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr_memo_contains(mtr, buf_block_align(node),
+						MTR_MEMO_PAGE_X_FIX));
+	len = flst_get_len(base, mtr);
+	last_addr = flst_get_last(base, mtr);
+
+	buf_ptr_get_fsp_addr(node, &space, &node_addr);
+
+	/* If the list is not empty, call flst_insert_after */
+	if (len != 0) {
+		if (last_addr.page == node_addr.page) {
+			last_node = buf_frame_align(node) + last_addr.boffset;
+		} else {
+			last_node = fut_get_ptr(space, last_addr, RW_X_LATCH,
+									mtr);
+		}
+
+		flst_insert_after(base, last_node, node, mtr); 
+	} else {
+		/* else call flst_add_to_empty */
+		flst_add_to_empty(base, node, mtr); 
+	}
+}
+
+/************************************************************************
+Adds a node as the first node in a list. */
+
+void
+flst_add_first(
+/*===========*/
+	flst_base_node_t*	base,	/* in: pointer to base node of list */
+	flst_node_t*		node,	/* in: node to add */
+	mtr_t*			mtr)	/* in: mini-transaction handle */
+{
+	ulint		space;
+	fil_addr_t	node_addr;
+	ulint		len;
+	fil_addr_t	first_addr;
+	flst_node_t*	first_node;
+	
+	ut_ad(mtr && base && node);
+	ut_ad(base != node);
+	ut_ad(mtr_memo_contains(mtr, buf_block_align(base),
+						MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr_memo_contains(mtr, buf_block_align(node),
+						MTR_MEMO_PAGE_X_FIX));
+	len = flst_get_len(base, mtr);
+	first_addr = flst_get_first(base, mtr);
+
+	buf_ptr_get_fsp_addr(node, &space, &node_addr);
+
+	/* If the list is not empty, call flst_insert_before */
+	if (len != 0) {
+		if (first_addr.page == node_addr.page) {
+			first_node = buf_frame_align(node)
+							+ first_addr.boffset;
+		} else {
+			first_node = fut_get_ptr(space, first_addr,
+							RW_X_LATCH, mtr);
+		}
+
+		flst_insert_before(base, node, first_node, mtr); 
+	} else {
+		/* else call flst_add_to_empty */
+		flst_add_to_empty(base, node, mtr); 
+	}
+}
+
+/************************************************************************
+Inserts a node after another in a list. */
+
+void
+flst_insert_after(
+/*==============*/
+	flst_base_node_t*	base,	/* in: pointer to base node of list */
+	flst_node_t*		node1,	/* in: node to insert after */
+	flst_node_t*		node2,	/* in: node to add */
+	mtr_t*			mtr)	/* in: mini-transaction handle */
+{
+	ulint		space;
+	fil_addr_t	node1_addr;
+	fil_addr_t	node2_addr;
+	flst_node_t*	node3;
+	fil_addr_t	node3_addr;
+	ulint		len;
+	
+	ut_ad(mtr && node1 && node2 && base);
+	ut_ad(base != node1);
+	ut_ad(base != node2);
+	ut_ad(node2 != node1);
+	ut_ad(mtr_memo_contains(mtr, buf_block_align(base),
+						MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr_memo_contains(mtr, buf_block_align(node1),
+						MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr_memo_contains(mtr, buf_block_align(node2),
+						MTR_MEMO_PAGE_X_FIX));
+
+	buf_ptr_get_fsp_addr(node1, &space, &node1_addr);
+	buf_ptr_get_fsp_addr(node2, &space, &node2_addr);
+
+	node3_addr = flst_get_next_addr(node1, mtr);
+	
+	/* Set prev and next fields of node2 */
+	flst_write_addr(node2 + FLST_PREV, node1_addr, mtr);
+	flst_write_addr(node2 + FLST_NEXT, node3_addr, mtr);
+
+	if (!fil_addr_is_null(node3_addr)) {
+		/* Update prev field of node3 */
+		node3 = fut_get_ptr(space, node3_addr, RW_X_LATCH, mtr);
+		flst_write_addr(node3 + FLST_PREV, node2_addr, mtr);
+	} else {
+		/* node1 was last in list: update last field in base */
+		flst_write_addr(base + FLST_LAST, node2_addr, mtr);
+	}
+		
+	/* Set next field of node1 */
+	flst_write_addr(node1 + FLST_NEXT, node2_addr, mtr);
+
+	/* Update len of base node */
+	len = flst_get_len(base, mtr);
+	mlog_write_ulint(base + FLST_LEN, len + 1, MLOG_4BYTES, mtr); 
+}
+
+/************************************************************************
+Inserts a node before another in a list. */
+
+void
+flst_insert_before(
+/*===============*/
+	flst_base_node_t*	base,	/* in: pointer to base node of list */
+	flst_node_t*		node2,	/* in: node to insert */
+	flst_node_t*		node3,	/* in: node to insert before */
+	mtr_t*			mtr)	/* in: mini-transaction handle */
+{
+	ulint		space;
+	flst_node_t*	node1;
+	fil_addr_t	node1_addr;
+	fil_addr_t	node2_addr;
+	fil_addr_t	node3_addr;
+	ulint		len;
+	
+	ut_ad(mtr && node2 && node3 && base);
+	ut_ad(base != node2);
+	ut_ad(base != node3);
+	ut_ad(node2 != node3);
+	ut_ad(mtr_memo_contains(mtr, buf_block_align(base),
+						MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr_memo_contains(mtr, buf_block_align(node2),
+						MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr_memo_contains(mtr, buf_block_align(node3),
+						MTR_MEMO_PAGE_X_FIX));
+
+	buf_ptr_get_fsp_addr(node2, &space, &node2_addr);
+	buf_ptr_get_fsp_addr(node3, &space, &node3_addr);
+
+	node1_addr = flst_get_prev_addr(node3, mtr);
+	
+	/* Set prev and next fields of node2 */
+	flst_write_addr(node2 + FLST_PREV, node1_addr, mtr);
+	flst_write_addr(node2 + FLST_NEXT, node3_addr, mtr);
+
+	if (!fil_addr_is_null(node1_addr)) {
+		/* Update next field of node1 */
+		node1 = fut_get_ptr(space, node1_addr, RW_X_LATCH, mtr);
+		flst_write_addr(node1 + FLST_NEXT, node2_addr, mtr);
+	} else {
+		/* node3 was first in list: update first field in base */
+		flst_write_addr(base + FLST_FIRST, node2_addr, mtr);
+	}
+		
+	/* Set prev field of node3 */
+	flst_write_addr(node3 + FLST_PREV, node2_addr, mtr);
+
+	/* Update len of base node */
+	len = flst_get_len(base, mtr);
+	mlog_write_ulint(base + FLST_LEN, len + 1, MLOG_4BYTES, mtr); 
+}
+
+/************************************************************************
+Removes a node. */
+
+void
+flst_remove(
+/*========*/
+	flst_base_node_t*	base,	/* in: pointer to base node of list */
+	flst_node_t*		node2,	/* in: node to remove */
+	mtr_t*			mtr)	/* in: mini-transaction handle */
+{
+	ulint		space;
+	flst_node_t*	node1;
+	fil_addr_t	node1_addr;
+	fil_addr_t	node2_addr;
+	flst_node_t*	node3;
+	fil_addr_t	node3_addr;
+	ulint		len;
+	
+	ut_ad(mtr && node2 && base);
+	ut_ad(mtr_memo_contains(mtr, buf_block_align(base),
+						MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr_memo_contains(mtr, buf_block_align(node2),
+						MTR_MEMO_PAGE_X_FIX));
+
+	buf_ptr_get_fsp_addr(node2, &space, &node2_addr);
+
+	node1_addr = flst_get_prev_addr(node2, mtr);
+	node3_addr = flst_get_next_addr(node2, mtr);
+
+	if (!fil_addr_is_null(node1_addr)) {
+
+		/* Update next field of node1 */
+		
+		if (node1_addr.page == node2_addr.page) {
+
+			node1 = buf_frame_align(node2) + node1_addr.boffset;
+		} else {
+			node1 = fut_get_ptr(space, node1_addr, RW_X_LATCH,
+									mtr);
+		}
+		
+		ut_ad(node1 != node2);
+
+		flst_write_addr(node1 + FLST_NEXT, node3_addr, mtr);
+	} else {
+		/* node2 was first in list: update first field in base */
+		flst_write_addr(base + FLST_FIRST, node3_addr, mtr);
+	}
+
+	if (!fil_addr_is_null(node3_addr)) {
+		/* Update prev field of node3 */
+
+		if (node3_addr.page == node2_addr.page) {
+
+			node3 = buf_frame_align(node2) + node3_addr.boffset;
+		} else {
+			node3 = fut_get_ptr(space, node3_addr, RW_X_LATCH,
+									mtr);
+		}
+		
+		ut_ad(node2 != node3);
+
+		flst_write_addr(node3 + FLST_PREV, node1_addr, mtr);
+	} else {
+		/* node2 was last in list: update last field in base */
+		flst_write_addr(base + FLST_LAST, node1_addr, mtr);
+	}
+		
+	/* Update len of base node */
+	len = flst_get_len(base, mtr);
+	ut_ad(len > 0);
+
+	mlog_write_ulint(base + FLST_LEN, len - 1, MLOG_4BYTES, mtr); 
+}
+
+/************************************************************************
+Cuts off the tail of the list, including the node given. The number of
+nodes which will be removed must be provided by the caller, as this function
+does not measure the length of the tail. */
+
+void
+flst_cut_end(
+/*=========*/
+	flst_base_node_t*	base,	/* in: pointer to base node of list */
+	flst_node_t*		node2,	/* in: first node to remove */
+	ulint			n_nodes,/* in: number of nodes to remove,
+					must be >= 1 */
+	mtr_t*			mtr)	/* in: mini-transaction handle */
+{
+	ulint		space;
+	flst_node_t*	node1;
+	fil_addr_t	node1_addr;
+	fil_addr_t	node2_addr;
+	ulint		len;
+	
+	ut_ad(mtr && node2 && base);
+	ut_ad(mtr_memo_contains(mtr, buf_block_align(base),
+						MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr_memo_contains(mtr, buf_block_align(node2),
+						MTR_MEMO_PAGE_X_FIX));
+	ut_ad(n_nodes > 0);
+						
+	buf_ptr_get_fsp_addr(node2, &space, &node2_addr);
+
+	node1_addr = flst_get_prev_addr(node2, mtr);
+
+	if (!fil_addr_is_null(node1_addr)) {
+
+		/* Update next field of node1 */
+
+		if (node1_addr.page == node2_addr.page) {
+
+			node1 = buf_frame_align(node2) + node1_addr.boffset;
+		} else {
+			node1 = fut_get_ptr(space, node1_addr, RW_X_LATCH,
+									mtr);
+		}
+		
+		flst_write_addr(node1 + FLST_NEXT, fil_addr_null, mtr);
+	} else {
+		/* node2 was first in list: update the field in base */
+		flst_write_addr(base + FLST_FIRST, fil_addr_null, mtr);
+	}
+
+	flst_write_addr(base + FLST_LAST, node1_addr, mtr);
+
+	/* Update len of base node */
+	len = flst_get_len(base, mtr);
+	ut_ad(len >= n_nodes);
+
+	mlog_write_ulint(base + FLST_LEN, len - n_nodes, MLOG_4BYTES, mtr); 
+}
+
+/************************************************************************
+Cuts off the tail of the list, not including the given node. The number of
+nodes which will be removed must be provided by the caller, as this function
+does not measure the length of the tail. */
+
+void
+flst_truncate_end(
+/*==============*/
+	flst_base_node_t*	base,	/* in: pointer to base node of list */
+	flst_node_t*		node2,	/* in: first node not to remove */
+	ulint			n_nodes,/* in: number of nodes to remove */
+	mtr_t*			mtr)	/* in: mini-transaction handle */
+{
+	fil_addr_t	node2_addr;
+	ulint		len;
+	ulint		space;
+	
+	ut_ad(mtr && node2 && base);
+	ut_ad(mtr_memo_contains(mtr, buf_block_align(base),
+						MTR_MEMO_PAGE_X_FIX));
+	ut_ad(mtr_memo_contains(mtr, buf_block_align(node2),
+						MTR_MEMO_PAGE_X_FIX));
+	if (n_nodes == 0) {
+
+		ut_ad(fil_addr_is_null(flst_get_next_addr(node2, mtr)));
+		
+		return;
+	}
+
+	buf_ptr_get_fsp_addr(node2, &space, &node2_addr);
+
+	/* Update next field of node2 */
+	flst_write_addr(node2 + FLST_NEXT, fil_addr_null, mtr);
+
+	flst_write_addr(base + FLST_LAST, node2_addr, mtr);
+
+	/* Update len of base node */
+	len = flst_get_len(base, mtr);
+	ut_ad(len >= n_nodes);
+
+	mlog_write_ulint(base + FLST_LEN, len - n_nodes, MLOG_4BYTES, mtr); 
+}
+
+/************************************************************************
+Validates a file-based list. */
+
+ibool
+flst_validate(
+/*==========*/
+					/* out: TRUE if ok */
+	flst_base_node_t*	base,	/* in: pointer to base node of list */
+	mtr_t*			mtr1)	/* in: mtr */
+{
+	ulint		space;
+	flst_node_t*	node;
+	fil_addr_t	node_addr;
+	fil_addr_t	base_addr;
+	ulint		len;
+	ulint		i;
+	mtr_t		mtr2;
+	
+	ut_ad(base);
+	ut_ad(mtr_memo_contains(mtr1, buf_block_align(base),
+							MTR_MEMO_PAGE_X_FIX));
+
+	/* We use two mini-transaction handles: the first is used to
+	lock the base node, and prevent other threads from modifying the
+	list. The second is used to traverse the list. We cannot run the
+	second mtr without committing it at times, because if the list
+	is long, then the x-locked pages could fill the buffer resulting
+	in a deadlock. */
+
+	/* Find out the space id */
+	buf_ptr_get_fsp_addr(base, &space, &base_addr);
+
+	len = flst_get_len(base, mtr1);
+	node_addr = flst_get_first(base, mtr1);
+
+	for (i = 0; i < len; i++) {
+		mtr_start(&mtr2);
+
+		node = fut_get_ptr(space, node_addr, RW_X_LATCH, &mtr2);
+		node_addr = flst_get_next_addr(node, &mtr2);
+
+		mtr_commit(&mtr2); /* Commit mtr2 each round to prevent buffer
+				   becoming full */
+	}
+	
+	ut_a(fil_addr_is_null(node_addr));
+
+	node_addr = flst_get_last(base, mtr1);
+
+	for (i = 0; i < len; i++) {
+		mtr_start(&mtr2);
+
+		node = fut_get_ptr(space, node_addr, RW_X_LATCH, &mtr2);
+		node_addr = flst_get_prev_addr(node, &mtr2);
+
+		mtr_commit(&mtr2); /* Commit mtr2 each round to prevent buffer
+				   becoming full */
+	}
+	
+	ut_a(fil_addr_is_null(node_addr));
+
+	return(TRUE);
+}
+
+/************************************************************************
+Prints info of a file-based list. */
+
+void
+flst_print(
+/*=======*/
+	flst_base_node_t*	base,	/* in: pointer to base node of list */
+	mtr_t*			mtr)	/* in: mtr */
+{
+	buf_frame_t*	frame;
+	ulint		len;
+	
+	ut_ad(base && mtr);
+	ut_ad(mtr_memo_contains(mtr, buf_block_align(base),
+							MTR_MEMO_PAGE_X_FIX));
+	frame = buf_frame_align(base);
+
+	len = flst_get_len(base, mtr);
+
+	fprintf(stderr,
+		"FILE-BASED LIST:\n"
+		"Base node in space %lu page %lu byte offset %lu; len %lu\n",
+	       (ulong) buf_frame_get_space_id(frame),
+	       (ulong) buf_frame_get_page_no(frame),
+	       (ulong) (base - frame), (ulong) len);
+}
--- a/ha/Makefile.am
+++ b/ha/Makefile.am
@ -0,0 +1,24 @@
+# Copyright (C) 2000 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+# & Innobase Oy
+# 
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+include ../include/Makefile.i
+
+noinst_LIBRARIES =	libha.a
+
+libha_a_SOURCES =	ha0ha.c hash0hash.c
+
+EXTRA_PROGRAMS =	
--- a/ha/ha0ha.c
+++ b/ha/ha0ha.c
@ -0,0 +1,357 @@
+/************************************************************************
+The hash table with external chains
+
+(c) 1994-1997 Innobase Oy
+
+Created 8/22/1994 Heikki Tuuri
+*************************************************************************/
+
+#include "ha0ha.h"
+#ifdef UNIV_NONINL
+#include "ha0ha.ic"
+#endif
+
+#include "buf0buf.h"
+
+/*****************************************************************
+Creates a hash table with >= n array cells. The actual number of cells is
+chosen to be a prime number slightly bigger than n. */
+
+hash_table_t*
+ha_create(
+/*======*/
+				/* out, own: created table */
+	ibool	in_btr_search,	/* in: TRUE if the hash table is used in
+				the btr_search module */
+	ulint	n,		/* in: number of array cells */
+	ulint	n_mutexes,	/* in: number of mutexes to protect the
+				hash table: must be a power of 2, or 0 */
+	ulint	mutex_level)	/* in: level of the mutexes in the latching
+				order: this is used in the debug version */
+{
+	hash_table_t*	table;
+	ulint		i;
+
+	table = hash_create(n);
+
+	if (in_btr_search) {
+		table->adaptive = TRUE;
+	} else {
+		table->adaptive = FALSE;
+	}
+
+	if (n_mutexes == 0) {
+		if (in_btr_search) {
+			table->heap = mem_heap_create_in_btr_search(4096);
+		} else {
+			table->heap = mem_heap_create_in_buffer(4096);
+		}
+
+		return(table);
+	}
+	
+	hash_create_mutexes(table, n_mutexes, mutex_level);
+
+	table->heaps = mem_alloc(n_mutexes * sizeof(void*));
+
+	for (i = 0; i < n_mutexes; i++) {
+		if (in_btr_search) {
+			table->heaps[i] = mem_heap_create_in_btr_search(4096);
+		} else {
+			table->heaps[i] = mem_heap_create_in_buffer(4096);
+		}
+	}
+	
+	return(table);
+}
+
+/*****************************************************************
+Inserts an entry into a hash table. If an entry with the same fold number
+is found, its node is updated to point to the new data, and no new node
+is inserted. */
+
+ibool
+ha_insert_for_fold(
+/*===============*/
+				/* out: TRUE if succeed, FALSE if no more
+				memory could be allocated */
+	hash_table_t*	table,	/* in: hash table */
+	ulint		fold,	/* in: folded value of data; if a node with
+				the same fold value already exists, it is
+				updated to point to the same data, and no new
+				node is created! */
+	void*		data)	/* in: data, must not be NULL */
+{
+	hash_cell_t*	cell;
+	ha_node_t*	node;
+	ha_node_t*	prev_node;
+	buf_block_t*	prev_block;
+	ulint		hash;
+
+	ut_ad(table && data);
+#ifdef UNIV_SYNC_DEBUG
+	ut_ad(!table->mutexes || mutex_own(hash_get_mutex(table, fold)));
+#endif /* UNIV_SYNC_DEBUG */
+	hash = hash_calc_hash(fold, table);
+
+	cell = hash_get_nth_cell(table, hash);
+
+	prev_node = cell->node;
+
+	while (prev_node != NULL) {
+		if (prev_node->fold == fold) {
+			if (table->adaptive) {
+				prev_block = buf_block_align(prev_node->data);
+				ut_a(prev_block->n_pointers > 0);
+				prev_block->n_pointers--;
+				buf_block_align(data)->n_pointers++;
+			}
+
+			prev_node->data = data;
+
+			return(TRUE);
+		}
+
+		prev_node = prev_node->next;
+	}
+	
+	/* We have to allocate a new chain node */
+
+	node = mem_heap_alloc(hash_get_heap(table, fold), sizeof(ha_node_t));
+
+	if (node == NULL) {
+		/* It was a btr search type memory heap and at the moment
+		no more memory could be allocated: return */
+
+		ut_ad(hash_get_heap(table, fold)->type & MEM_HEAP_BTR_SEARCH);
+
+		return(FALSE);
+	}
+	
+	ha_node_set_data(node, data);
+
+	if (table->adaptive) {
+		buf_block_align(data)->n_pointers++;
+	}
+
+	node->fold = fold;
+
+	node->next = NULL;
+
+	prev_node = cell->node;
+
+	if (prev_node == NULL) {
+
+		cell->node = node;
+
+		return(TRUE);
+	}
+		
+	while (prev_node->next != NULL) {
+
+		prev_node = prev_node->next;
+	}
+
+	prev_node->next = node;
+
+	return(TRUE);
+}	
+
+/***************************************************************
+Deletes a hash node. */
+
+void
+ha_delete_hash_node(
+/*================*/
+	hash_table_t*	table,		/* in: hash table */
+	ha_node_t*	del_node)	/* in: node to be deleted */
+{
+	if (table->adaptive) {
+		ut_a(buf_block_align(del_node->data)->n_pointers > 0);
+		buf_block_align(del_node->data)->n_pointers--;
+	}
+
+	HASH_DELETE_AND_COMPACT(ha_node_t, next, table, del_node);
+}
+
+/*****************************************************************
+Deletes an entry from a hash table. */
+
+void
+ha_delete(
+/*======*/
+	hash_table_t*	table,	/* in: hash table */
+	ulint		fold,	/* in: folded value of data */
+	void*		data)	/* in: data, must not be NULL and must exist
+				in the hash table */
+{
+	ha_node_t*	node;
+
+#ifdef UNIV_SYNC_DEBUG
+	ut_ad(!table->mutexes || mutex_own(hash_get_mutex(table, fold)));
+#endif /* UNIV_SYNC_DEBUG */
+	node = ha_search_with_data(table, fold, data);
+
+	ut_a(node);
+
+	ha_delete_hash_node(table, node);
+}	
+
+/*************************************************************
+Looks for an element when we know the pointer to the data, and updates
+the pointer to data, if found. */
+
+void
+ha_search_and_update_if_found(
+/*==========================*/
+	hash_table_t*	table,	/* in: hash table */
+	ulint		fold,	/* in: folded value of the searched data */
+	void*		data,	/* in: pointer to the data */
+	void*		new_data)/* in: new pointer to the data */
+{
+	ha_node_t*	node;
+
+#ifdef UNIV_SYNC_DEBUG
+	ut_ad(!table->mutexes || mutex_own(hash_get_mutex(table, fold)));
+#endif /* UNIV_SYNC_DEBUG */
+
+	node = ha_search_with_data(table, fold, data);
+
+	if (node) {
+		if (table->adaptive) {
+			ut_a(buf_block_align(node->data)->n_pointers > 0);
+			buf_block_align(node->data)->n_pointers--;
+			buf_block_align(new_data)->n_pointers++;
+		}
+
+		node->data = new_data;
+	}
+}
+
+/*********************************************************************
+Removes from the chain determined by fold all nodes whose data pointer
+points to the page given. */
+
+void
+ha_remove_all_nodes_to_page(
+/*========================*/
+	hash_table_t*	table,	/* in: hash table */
+	ulint		fold,	/* in: fold value */
+	page_t*		page)	/* in: buffer page */
+{
+	ha_node_t*	node;
+
+#ifdef UNIV_SYNC_DEBUG
+	ut_ad(!table->mutexes || mutex_own(hash_get_mutex(table, fold)));
+#endif /* UNIV_SYNC_DEBUG */
+	node = ha_chain_get_first(table, fold);
+
+	while (node) {
+		if (buf_frame_align(ha_node_get_data(node)) == page) {
+
+			/* Remove the hash node */
+
+			ha_delete_hash_node(table, node);
+
+			/* Start again from the first node in the chain
+			because the deletion may compact the heap of
+			nodes and move other nodes! */
+
+			node = ha_chain_get_first(table, fold);
+		} else {
+			node = ha_chain_get_next(node);
+		}
+	}
+#ifdef UNIV_DEBUG
+	/* Check that all nodes really got deleted */
+	
+	node = ha_chain_get_first(table, fold);
+
+	while (node) {
+		ut_a(buf_frame_align(ha_node_get_data(node)) != page);
+
+		node = ha_chain_get_next(node);
+	}
+#endif
+}
+
+/*****************************************************************
+Validates a hash table. */
+
+ibool
+ha_validate(
+/*========*/
+				/* out: TRUE if ok */
+	hash_table_t*	table)	/* in: hash table */
+{
+	hash_cell_t*	cell;
+	ha_node_t*	node;
+	ibool		ok	= TRUE;
+	ulint		i;
+
+	for (i = 0; i < hash_get_n_cells(table); i++) {
+
+		cell = hash_get_nth_cell(table, i);
+
+		node = cell->node;
+
+		while (node) {
+			if (hash_calc_hash(node->fold, table) != i) {
+				ut_print_timestamp(stderr);
+				fprintf(stderr,
+"InnoDB: Error: hash table node fold value %lu does not\n"
+"InnoDB: match with the cell number %lu.\n",
+					(ulong) node->fold, (ulong) i);
+
+				ok = FALSE;
+			}
+
+			node = node->next;
+		}
+	}
+
+	return(ok);
+}	
+
+/*****************************************************************
+Prints info of a hash table. */
+
+void
+ha_print_info(
+/*==========*/
+	FILE*		file,	/* in: file where to print */
+	hash_table_t*	table)	/* in: hash table */
+{
+	hash_cell_t*	cell;
+	ulint		cells	= 0;
+	ulint		n_bufs;
+	ulint		i;
+
+	for (i = 0; i < hash_get_n_cells(table); i++) {
+
+		cell = hash_get_nth_cell(table, i);
+
+		if (cell->node) {
+
+			cells++;
+		}
+	}
+
+	fprintf(file,
+		"Hash table size %lu, used cells %lu",
+		(ulong) hash_get_n_cells(table), (ulong) cells);
+
+	if (table->heaps == NULL && table->heap != NULL) {
+
+		/* This calculation is intended for the adaptive hash
+		index: how many buffer frames we have reserved? */
+
+		n_bufs = UT_LIST_GET_LEN(table->heap->base) - 1;
+
+		if (table->heap->free_block) {
+			n_bufs++;
+		}
+				
+		fprintf(file, ", node heap has %lu buffer(s)\n", (ulong) n_bufs);
+	}
+}	
--- a/ha/hash0hash.c
+++ b/ha/hash0hash.c
@ -0,0 +1,153 @@
+/******************************************************
+The simple hash table utility
+
+(c) 1997 Innobase Oy
+
+Created 5/20/1997 Heikki Tuuri
+*******************************************************/
+
+#include "hash0hash.h"
+#ifdef UNIV_NONINL
+#include "hash0hash.ic"
+#endif
+
+#include "mem0mem.h"
+
+/****************************************************************
+Reserves the mutex for a fold value in a hash table. */
+
+void
+hash_mutex_enter(
+/*=============*/
+	hash_table_t* 	table,	/* in: hash table */
+	ulint 		fold)	/* in: fold */
+{
+	mutex_enter(hash_get_mutex(table, fold));
+}
+
+/****************************************************************
+Releases the mutex for a fold value in a hash table. */
+
+void
+hash_mutex_exit(
+/*============*/
+	hash_table_t* 	table,	/* in: hash table */
+	ulint 		fold)	/* in: fold */
+{
+	mutex_exit(hash_get_mutex(table, fold));
+}
+
+/****************************************************************
+Reserves all the mutexes of a hash table, in an ascending order. */
+
+void
+hash_mutex_enter_all(
+/*=================*/
+	hash_table_t* 	table)	/* in: hash table */
+{
+	ulint	i;
+
+	for (i = 0; i < table->n_mutexes; i++) {
+
+		mutex_enter(table->mutexes + i);
+	}
+}
+
+/****************************************************************
+Releases all the mutexes of a hash table. */
+
+void
+hash_mutex_exit_all(
+/*================*/
+	hash_table_t* 	table)	/* in: hash table */
+{
+	ulint	i;
+
+	for (i = 0; i < table->n_mutexes; i++) {
+
+		mutex_exit(table->mutexes + i);
+	}
+}
+
+/*****************************************************************
+Creates a hash table with >= n array cells. The actual number of cells is
+chosen to be a prime number slightly bigger than n. */
+
+hash_table_t*
+hash_create(
+/*========*/
+			/* out, own: created table */
+	ulint	n)	/* in: number of array cells */
+{
+	hash_cell_t*	array;
+	ulint		prime;
+	hash_table_t*	table;
+	ulint		i;
+	hash_cell_t*	cell;
+	
+	prime = ut_find_prime(n);
+
+	table = mem_alloc(sizeof(hash_table_t));
+
+	array = ut_malloc(sizeof(hash_cell_t) * prime);
+	
+	table->adaptive = FALSE;
+	table->array = array;
+	table->n_cells = prime;
+	table->n_mutexes = 0;
+	table->mutexes = NULL;
+	table->heaps = NULL;
+	table->heap = NULL;
+	table->magic_n = HASH_TABLE_MAGIC_N;
+	
+	/* Initialize the cell array */
+
+	for (i = 0; i < prime; i++) {
+
+		cell = hash_get_nth_cell(table, i);
+		cell->node = NULL;
+	}
+
+	return(table);
+}
+
+/*****************************************************************
+Frees a hash table. */
+
+void
+hash_table_free(
+/*============*/
+	hash_table_t*	table)	/* in, own: hash table */
+{
+	ut_a(table->mutexes == NULL);
+
+	ut_free(table->array);
+	mem_free(table);
+}
+
+/*****************************************************************
+Creates a mutex array to protect a hash table. */
+
+void
+hash_create_mutexes(
+/*================*/
+	hash_table_t*	table,		/* in: hash table */
+	ulint		n_mutexes,	/* in: number of mutexes, must be a
+					power of 2 */
+	ulint		sync_level)	/* in: latching order level of the
+					mutexes: used in the debug version */
+{
+	ulint	i;
+
+	ut_a(n_mutexes == ut_2_power_up(n_mutexes));
+
+	table->mutexes = mem_alloc(n_mutexes * sizeof(mutex_t));
+
+	for (i = 0; i < n_mutexes; i++) {
+		mutex_create(table->mutexes + i);
+
+		mutex_set_level(table->mutexes + i, sync_level);
+	}
+
+	table->n_mutexes = n_mutexes;
+}
--- a/handler/ha_innodb.cc
+++ b/handler/ha_innodb.cc
--- a/handler/ha_innodb.h
+++ b/handler/ha_innodb.h
@ -0,0 +1,349 @@
+/* Copyright (C) 2000-2005 MySQL AB && Innobase Oy
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
+
+/*
+  This file is based on ha_berkeley.h of MySQL distribution
+
+  This file defines the Innodb handler: the interface between MySQL and
+  Innodb
+*/
+
+#ifdef USE_PRAGMA_INTERFACE
+#pragma interface			/* gcc class implementation */
+#endif
+
+typedef struct st_innobase_share {
+  THR_LOCK lock;
+  pthread_mutex_t mutex;
+  char *table_name;
+  uint table_name_length,use_count;
+} INNOBASE_SHARE;
+
+
+my_bool innobase_query_caching_of_table_permitted(THD* thd, char* full_name,
+                                                  uint full_name_len,
+                                                  ulonglong *unused);
+
+/* The class defining a handle to an Innodb table */
+class ha_innobase: public handler
+{
+	void*		innobase_prebuilt;/* (row_prebuilt_t*) prebuilt
+					struct in InnoDB, used to save
+					CPU time with prebuilt data
+					structures*/
+	THD*		user_thd;	/* the thread handle of the user
+					currently using the handle; this is
+					set in external_lock function */
+	query_id_t      last_query_id;  /* the latest query id where the
+					handle was used */
+  	THR_LOCK_DATA 	lock;
+	INNOBASE_SHARE  *share;
+
+  	gptr 		alloc_ptr;
+  	byte*		upd_buff;	/* buffer used in updates */
+  	byte*		key_val_buff;	/* buffer used in converting
+  					search key values from MySQL format
+  					to Innodb format */
+	ulong		upd_and_key_val_buff_len;
+					/* the length of each of the previous
+					two buffers */
+  	ulong 		int_table_flags;
+  	uint 		primary_key;
+	uint		last_dup_key;
+	ulong		start_of_scan;	/* this is set to 1 when we are
+					starting a table scan but have not
+					yet fetched any row, else 0 */
+	uint		last_match_mode;/* match mode of the latest search:
+					ROW_SEL_EXACT, ROW_SEL_EXACT_PREFIX,
+					or undefined */
+	uint		num_write_row;	/* number of write_row() calls */
+	ulong max_supported_row_length(const byte *buf);
+
+	uint store_key_val_for_row(uint keynr, char* buff, uint buff_len,
+					       const byte* record);
+	int update_thd(THD* thd);
+	int change_active_index(uint keynr);
+	int general_fetch(byte* buf, uint direction, uint match_mode);
+	int innobase_read_and_init_auto_inc(longlong* ret);
+
+	/* Init values for the class: */
+ public:
+  	ha_innobase(TABLE *table_arg);
+  	~ha_innobase() {}
+	/*
+	  Get the row type from the storage engine.  If this method returns
+	  ROW_TYPE_NOT_USED, the information in HA_CREATE_INFO should be used.
+	*/
+	enum row_type get_row_type() const;
+
+  	const char* table_type() const { return("InnoDB");}
+	const char *index_type(uint key_number) { return "BTREE"; }
+  	const char** bas_ext() const;
+ 	ulong table_flags() const { return int_table_flags; }
+	ulong index_flags(uint idx, uint part, bool all_parts) const
+	{
+	  return (HA_READ_NEXT |
+		  HA_READ_PREV |
+		  HA_READ_ORDER |
+		  HA_READ_RANGE |
+                  HA_KEYREAD_ONLY);
+	}
+  	uint max_supported_keys()          const { return MAX_KEY; }
+				/* An InnoDB page must store >= 2 keys;
+				a secondary key record must also contain the
+				primary key value:
+				max key length is therefore set to slightly
+				less than 1 / 4 of page size which is 16 kB;
+				but currently MySQL does not work with keys
+				whose size is > MAX_KEY_LENGTH */
+  	uint max_supported_key_length() const { return 3500; }
+  	uint max_supported_key_part_length() const { return 3500; }
+	const key_map *keys_to_use_for_scanning() { return &key_map_full; }
+  	bool has_transactions()  { return 1;}
+
+  	int open(const char *name, int mode, uint test_if_locked);
+  	int close(void);
+  	double scan_time();
+	double read_time(uint index, uint ranges, ha_rows rows);
+
+  	int write_row(byte * buf);
+  	int update_row(const byte * old_data, byte * new_data);
+  	int delete_row(const byte * buf);
+	void unlock_row();
+
+  	int index_init(uint index);
+  	int index_end();
+  	int index_read(byte * buf, const byte * key,
+		       uint key_len, enum ha_rkey_function find_flag);
+  	int index_read_idx(byte * buf, uint index, const byte * key,
+			   uint key_len, enum ha_rkey_function find_flag);
+	int index_read_last(byte * buf, const byte * key, uint key_len);
+  	int index_next(byte * buf);
+  	int index_next_same(byte * buf, const byte *key, uint keylen);
+  	int index_prev(byte * buf);
+  	int index_first(byte * buf);
+  	int index_last(byte * buf);
+
+  	int rnd_init(bool scan);
+  	int rnd_end();
+  	int rnd_next(byte *buf);
+  	int rnd_pos(byte * buf, byte *pos);
+
+  	void position(const byte *record);
+  	void info(uint);
+        int analyze(THD* thd,HA_CHECK_OPT* check_opt);
+        int optimize(THD* thd,HA_CHECK_OPT* check_opt);
+	int discard_or_import_tablespace(my_bool discard);
+  	int extra(enum ha_extra_function operation);
+  	int external_lock(THD *thd, int lock_type);
+	int transactional_table_lock(THD *thd, int lock_type);
+        int start_stmt(THD *thd, thr_lock_type lock_type);
+
+  	void position(byte *record);
+  	ha_rows records_in_range(uint inx, key_range *min_key, key_range
+								*max_key);
+	ha_rows estimate_rows_upper_bound();
+
+  	int create(const char *name, register TABLE *form,
+					HA_CREATE_INFO *create_info);
+	int delete_all_rows();
+  	int delete_table(const char *name);
+	int rename_table(const char* from, const char* to);
+	int check(THD* thd, HA_CHECK_OPT* check_opt);
+        char* update_table_comment(const char* comment);
+	char* get_foreign_key_create_info();
+        int get_foreign_key_list(THD *thd, List<FOREIGN_KEY_INFO> *f_key_list);
+	bool can_switch_engines();
+  	uint referenced_by_foreign_key();
+	void free_foreign_key_create_info(char* str);	
+  	THR_LOCK_DATA **store_lock(THD *thd, THR_LOCK_DATA **to,
+			     		enum thr_lock_type lock_type);
+	void init_table_handle_for_HANDLER(); 
+	ulonglong get_auto_increment();
+	int reset_auto_increment(ulonglong value);
+
+	virtual bool get_error_message(int error, String *buf);
+	
+        uint8 table_cache_type() { return HA_CACHE_TBL_ASKTRANSACT; }
+        /*
+          ask handler about permission to cache table during query registration
+        */
+        my_bool register_query_cache_table(THD *thd, char *table_key,
+					   uint key_length,
+					   qc_engine_callback *call_back,
+					   ulonglong *engine_data)
+        {
+          *call_back= innobase_query_caching_of_table_permitted;
+          *engine_data= 0;
+          return innobase_query_caching_of_table_permitted(thd, table_key,
+                                                           key_length,
+                                                           engine_data);
+        }
+        static char *get_mysql_bin_log_name();
+        static ulonglong get_mysql_bin_log_pos();
+        bool primary_key_is_clustered() { return true; }
+        int cmp_ref(const byte *ref1, const byte *ref2);
+};
+
+extern struct show_var_st innodb_status_variables[];
+extern uint innobase_init_flags, innobase_lock_type;
+extern uint innobase_flush_log_at_trx_commit;
+extern ulong innobase_cache_size, innobase_fast_shutdown;
+extern ulong innobase_large_page_size;
+extern char *innobase_home, *innobase_tmpdir, *innobase_logdir;
+extern long innobase_lock_scan_time;
+extern long innobase_mirrored_log_groups, innobase_log_files_in_group;
+extern long innobase_log_file_size, innobase_log_buffer_size;
+extern long innobase_buffer_pool_size, innobase_additional_mem_pool_size;
+extern long innobase_buffer_pool_awe_mem_mb;
+extern long innobase_file_io_threads, innobase_lock_wait_timeout;
+extern long innobase_force_recovery;
+extern long innobase_open_files;
+extern char *innobase_data_home_dir, *innobase_data_file_path;
+extern char *innobase_log_group_home_dir, *innobase_log_arch_dir;
+extern char *innobase_unix_file_flush_method;
+/* The following variables have to be my_bool for SHOW VARIABLES to work */
+extern my_bool innobase_log_archive,
+               innobase_use_doublewrite,
+               innobase_use_checksums,
+               innobase_use_large_pages,
+               innobase_use_native_aio,
+	       innobase_file_per_table, innobase_locks_unsafe_for_binlog,
+               innobase_create_status_file;
+extern my_bool innobase_very_fast_shutdown; /* set this to 1 just before
+					    calling innobase_end() if you want
+					    InnoDB to shut down without
+					    flushing the buffer pool: this
+					    is equivalent to a 'crash' */
+extern "C" {
+extern ulong srv_max_buf_pool_modified_pct;
+extern ulong srv_max_purge_lag;
+extern ulong srv_auto_extend_increment;
+extern ulong srv_n_spin_wait_rounds;
+extern ulong srv_n_free_tickets_to_enter;
+extern ulong srv_thread_sleep_delay;
+extern ulong srv_thread_concurrency;
+extern ulong srv_commit_concurrency;
+}
+
+extern TYPELIB innobase_lock_typelib;
+
+bool innobase_init(void);
+bool innobase_end(void);
+bool innobase_flush_logs(void);
+uint innobase_get_free_space(void);
+
+/*
+  don't delete it - it may be re-enabled later
+  as an optimization for the most common case InnoDB+binlog
+*/
+#if 0
+int innobase_report_binlog_offset_and_commit(
+        THD*    thd,
+	void*	trx_handle,
+        char*   log_file_name,
+        my_off_t end_offset);
+int innobase_commit_complete(void* trx_handle);
+void innobase_store_binlog_offset_and_flush_log(char *binlog_name,longlong offset);
+#endif
+
+int innobase_drop_database(char *path);
+bool innodb_show_status(THD* thd);
+bool innodb_mutex_show_status(THD* thd);
+void innodb_export_status(void);
+
+void innobase_release_temporary_latches(THD *thd);
+
+void innobase_store_binlog_offset_and_flush_log(char *binlog_name,longlong offset);
+
+int innobase_start_trx_and_assign_read_view(THD* thd);
+
+/***********************************************************************
+This function is used to prepare X/Open XA distributed transaction   */
+
+int innobase_xa_prepare(
+/*====================*/
+			/* out: 0 or error number */
+	THD*	thd,	/* in: handle to the MySQL thread of the user
+			whose XA transaction should be prepared */
+	bool	all);	/* in: TRUE - commit transaction
+			FALSE - the current SQL statement ended */
+
+/***********************************************************************
+This function is used to recover X/Open XA distributed transactions   */
+
+int innobase_xa_recover(
+/*====================*/
+				/* out: number of prepared transactions 
+				stored in xid_list */
+	XID*    xid_list, 	/* in/out: prepared transactions */
+	uint	len);		/* in: number of slots in xid_list */
+
+/***********************************************************************
+This function is used to commit one X/Open XA distributed transaction
+which is in the prepared state */
+
+int innobase_commit_by_xid(
+/*=======================*/
+			/* out: 0 or error number */
+	XID*	xid);	/* in : X/Open XA Transaction Identification */
+
+/***********************************************************************
+This function is used to rollback one X/Open XA distributed transaction
+which is in the prepared state */
+
+int innobase_rollback_by_xid(
+			/* out: 0 or error number */
+	XID	*xid);	/* in : X/Open XA Transaction Identification */
+
+
+int innobase_xa_end(THD *thd);
+
+
+int innobase_repl_report_sent_binlog(THD *thd, char *log_file_name,
+                               my_off_t end_offset);
+
+/***********************************************************************
+Create a consistent view for a cursor based on current transaction
+which is created if the corresponding MySQL thread still lacks one.
+This consistent view is then used inside of MySQL when accessing records 
+using a cursor. */
+
+void*
+innobase_create_cursor_view(void);
+/*=============================*/
+				/* out: Pointer to cursor view or NULL */
+
+/***********************************************************************
+Close the given consistent cursor view of a transaction and restore
+global read view to a transaction read view. Transaction is created if the 
+corresponding MySQL thread still lacks one. */
+
+void
+innobase_close_cursor_view(
+/*=======================*/
+	void*	curview);	/* in: Consistent read view to be closed */
+
+/***********************************************************************
+Set the given consistent cursor view to a transaction which is created 
+if the corresponding MySQL thread still lacks one. If the given 
+consistent cursor view is NULL global read view of a transaction is
+restored to a transaction read view. */
+
+void
+innobase_set_cursor_view(
+/*=====================*/
+	void*	curview);	/* in: Consistent read view to be set */
--- a/ibuf/Makefile.am
+++ b/ibuf/Makefile.am
@ -0,0 +1,27 @@
+# Copyright (C) 2000 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+# & Innobase Oy
+# 
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+include ../include/Makefile.i
+
+noinst_LIBRARIES =	libibuf.a
+
+libibuf_a_SOURCES =	ibuf0ibuf.c
+
+EXTRA_PROGRAMS =	
+
+# Don't update the files from bitkeeper
+%::SCCS/s.%
--- a/ibuf/ibuf0ibuf.c
+++ b/ibuf/ibuf0ibuf.c
--- a/include/Makefile.am
+++ b/include/Makefile.am
@ -0,0 +1,60 @@
+# Copyright (C) 2000 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
+# & Innobase Oy
+# 
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+noinst_HEADERS = btr0btr.h btr0btr.ic btr0cur.h btr0cur.ic \
+        btr0pcur.h btr0pcur.ic btr0sea.h btr0sea.ic btr0types.h \
+        buf0buf.h buf0buf.ic buf0flu.h buf0flu.ic buf0lru.h \
+        buf0lru.ic buf0rea.h buf0types.h data0data.h data0data.ic data0type.h \
+        data0type.ic data0types.h db0err.h dict0boot.h \
+        dict0boot.ic dict0crea.h dict0crea.ic dict0dict.h \
+        dict0dict.ic dict0load.h dict0load.ic dict0mem.h \
+        dict0mem.ic dict0types.h dyn0dyn.h dyn0dyn.ic eval0eval.h \
+        eval0eval.ic eval0proc.h eval0proc.ic fil0fil.h fsp0fsp.h \
+        fsp0fsp.ic fut0fut.h fut0fut.ic fut0lst.h fut0lst.ic \
+        ha0ha.h ha0ha.ic hash0hash.h hash0hash.ic \
+        ibuf0ibuf.h ibuf0ibuf.ic ibuf0types.h lock0lock.h \
+        lock0lock.ic lock0types.h log0log.h log0log.ic log0recv.h \
+        log0recv.ic mach0data.h mach0data.ic makefilewin.i \
+        mem0dbg.h mem0dbg.ic mem0mem.h mem0mem.ic mem0pool.h \
+        mem0pool.ic mtr0log.h mtr0log.ic mtr0mtr.h mtr0mtr.ic \
+        mtr0types.h os0file.h os0proc.h os0proc.ic \
+        os0sync.h os0sync.ic os0thread.h \
+        os0thread.ic page0cur.h page0cur.ic page0page.h \
+        page0page.ic page0types.h pars0grm.h pars0opt.h \
+        pars0opt.ic pars0pars.h pars0pars.ic pars0sym.h \
+        pars0sym.ic pars0types.h que0que.h que0que.ic que0types.h \
+        read0read.h read0read.ic read0types.h rem0cmp.h \
+        rem0cmp.ic rem0rec.h rem0rec.ic rem0types.h row0ins.h \
+        row0ins.ic row0mysql.h row0mysql.ic row0purge.h \
+        row0purge.ic row0row.h row0row.ic row0sel.h row0sel.ic \
+        row0types.h row0uins.h row0uins.ic row0umod.h row0umod.ic \
+        row0undo.h row0undo.ic row0upd.h row0upd.ic row0vers.h \
+        row0vers.ic srv0que.h srv0srv.h srv0srv.ic srv0start.h \
+        sync0arr.h sync0arr.ic sync0rw.h \
+        sync0rw.ic sync0sync.h sync0sync.ic sync0types.h \
+        thr0loc.h thr0loc.ic trx0purge.h trx0purge.ic trx0rec.h \
+        trx0rec.ic trx0roll.h trx0roll.ic trx0rseg.h trx0rseg.ic \
+        trx0sys.h trx0sys.ic trx0trx.h trx0trx.ic trx0types.h \
+        trx0undo.h trx0undo.ic trx0xa.h univ.i \
+        usr0sess.h usr0sess.ic usr0types.h ut0byte.h ut0byte.ic \
+        ut0dbg.h ut0lst.h ut0mem.h ut0mem.ic ut0rnd.h ut0rnd.ic \
+        ut0sort.h ut0ut.h ut0ut.ic
+
+EXTRA_DIST = Makefile.i
+
+# Don't update the files from bitkeeper
+%::SCCS/s.%
--- a/include/Makefile.i
+++ b/include/Makefile.i
@ -0,0 +1,6 @@
+# Makefile included in Makefile.am in every subdirectory
+
+INCLUDES =		-I$(srcdir)/../include -I$(srcdir)/../../include -I../../include
+
+# Don't update the files from bitkeeper
+%::SCCS/s.%
--- a/include/btr0btr.h
+++ b/include/btr0btr.h
@ -0,0 +1,439 @@
+/******************************************************
+The B-tree
+
+(c) 1994-1996 Innobase Oy
+
+Created 6/2/1994 Heikki Tuuri
+*******************************************************/
+
+#ifndef btr0btr_h
+#define btr0btr_h
+
+#include "univ.i"
+
+#include "dict0dict.h"
+#include "data0data.h"
+#include "page0cur.h"
+#include "rem0rec.h"
+#include "mtr0mtr.h"
+#include "btr0types.h"
+
+/* Maximum record size which can be stored on a page, without using the
+special big record storage structure */
+
+#define	BTR_PAGE_MAX_REC_SIZE	(UNIV_PAGE_SIZE / 2 - 200)
+
+/* Latching modes for the search function (in btr0cur.*) */
+#define BTR_SEARCH_LEAF		RW_S_LATCH
+#define BTR_MODIFY_LEAF		RW_X_LATCH
+#define BTR_NO_LATCHES		RW_NO_LATCH
+#define	BTR_MODIFY_TREE		33
+#define	BTR_CONT_MODIFY_TREE	34
+#define	BTR_SEARCH_PREV		35
+#define	BTR_MODIFY_PREV		36
+
+/* If this is ORed to the latch mode, it means that the search tuple will be
+inserted to the index, at the searched position */
+#define BTR_INSERT		512
+
+/* This flag ORed to latch mode says that we do the search in query
+optimization */
+#define BTR_ESTIMATE		1024
+
+/* This flag ORed to latch mode says that we can ignore possible
+UNIQUE definition on secondary indexes when we decide if we can use the
+insert buffer to speed up inserts */
+#define BTR_IGNORE_SEC_UNIQUE	2048	
+
+/******************************************************************
+Gets the root node of a tree and x-latches it. */
+
+page_t*
+btr_root_get(
+/*=========*/
+				/* out: root page, x-latched */
+	dict_tree_t*	tree,	/* in: index tree */
+	mtr_t*		mtr);	/* in: mtr */
+/******************************************************************
+Gets a buffer page and declares its latching order level. */
+UNIV_INLINE
+page_t*
+btr_page_get(
+/*=========*/
+	ulint	space,		/* in: space id */
+	ulint	page_no,	/* in: page number */
+	ulint	mode,		/* in: latch mode */
+	mtr_t*	mtr);		/* in: mtr */
+/******************************************************************
+Gets the index id field of a page. */
+UNIV_INLINE
+dulint
+btr_page_get_index_id(
+/*==================*/
+				/* out: index id */
+	page_t*		page);	/* in: index page */
+/************************************************************
+Gets the node level field in an index page. */
+UNIV_INLINE
+ulint
+btr_page_get_level_low(
+/*===================*/
+			/* out: level, leaf level == 0 */
+	page_t*	page);	/* in: index page */
+/************************************************************
+Gets the node level field in an index page. */
+UNIV_INLINE
+ulint
+btr_page_get_level(
+/*===============*/
+			/* out: level, leaf level == 0 */
+	page_t*	page,	/* in: index page */
+	mtr_t*	mtr);	/* in: mini-transaction handle */
+/************************************************************
+Gets the next index page number. */
+UNIV_INLINE
+ulint
+btr_page_get_next(
+/*==============*/
+			/* out: next page number */
+	page_t*	page,	/* in: index page */
+	mtr_t*	mtr);	/* in: mini-transaction handle */
+/************************************************************
+Gets the previous index page number. */
+UNIV_INLINE
+ulint
+btr_page_get_prev(
+/*==============*/
+			/* out: prev page number */
+	page_t*	page,	/* in: index page */
+	mtr_t*	mtr);	/* in: mini-transaction handle */
+/*****************************************************************
+Gets pointer to the previous user record in the tree. It is assumed
+that the caller has appropriate latches on the page and its neighbor. */
+
+rec_t*
+btr_get_prev_user_rec(
+/*==================*/
+			/* out: previous user record, NULL if there is none */
+	rec_t*	rec,	/* in: record on leaf level */
+	mtr_t*	mtr);	/* in: mtr holding a latch on the page, and if
+			needed, also to the previous page */
+/*****************************************************************
+Gets pointer to the next user record in the tree. It is assumed
+that the caller has appropriate latches on the page and its neighbor. */
+
+rec_t*
+btr_get_next_user_rec(
+/*==================*/
+			/* out: next user record, NULL if there is none */
+	rec_t*	rec,	/* in: record on leaf level */
+	mtr_t*	mtr);	/* in: mtr holding a latch on the page, and if
+			needed, also to the next page */
+/******************************************************************
+Releases the latch on a leaf page and bufferunfixes it. */
+UNIV_INLINE
+void
+btr_leaf_page_release(
+/*==================*/
+	page_t*	page,		/* in: page */
+	ulint	latch_mode,	/* in: BTR_SEARCH_LEAF or BTR_MODIFY_LEAF */
+	mtr_t*	mtr);		/* in: mtr */
+/******************************************************************
+Gets the child node file address in a node pointer. */
+UNIV_INLINE
+ulint
+btr_node_ptr_get_child_page_no(
+/*===========================*/
+			   	/* out: child node address */
+	rec_t*		rec,	/* in: node pointer record */
+	const ulint*	offsets);/* in: array returned by rec_get_offsets() */
+/****************************************************************
+Creates the root node for a new index tree. */
+
+ulint
+btr_create(
+/*=======*/
+			/* out: page number of the created root, FIL_NULL if
+			did not succeed */
+	ulint	type,	/* in: type of the index */
+	ulint	space,	/* in: space where created */
+	dulint	index_id,/* in: index id */
+	ulint	comp,	/* in: nonzero=compact page format */
+	mtr_t*	mtr);	/* in: mini-transaction handle */
+/****************************************************************
+Frees a B-tree except the root page, which MUST be freed after this
+by calling btr_free_root. */
+
+void
+btr_free_but_not_root(
+/*==================*/
+	ulint	space,		/* in: space where created */
+	ulint	root_page_no);	/* in: root page number */
+/****************************************************************
+Frees the B-tree root page. Other tree MUST already have been freed. */
+
+void
+btr_free_root(
+/*==========*/
+	ulint	space,		/* in: space where created */
+	ulint	root_page_no,	/* in: root page number */
+	mtr_t*	mtr);		/* in: a mini-transaction which has already
+				been started */
+/*****************************************************************
+Makes tree one level higher by splitting the root, and inserts
+the tuple. It is assumed that mtr contains an x-latch on the tree.
+NOTE that the operation of this function must always succeed,
+we cannot reverse it: therefore enough free disk space must be
+guaranteed to be available before this function is called. */
+
+rec_t*
+btr_root_raise_and_insert(
+/*======================*/
+				/* out: inserted record */
+	btr_cur_t*	cursor,	/* in: cursor at which to insert: must be
+				on the root page; when the function returns,
+				the cursor is positioned on the predecessor
+				of the inserted record */
+	dtuple_t*	tuple,	/* in: tuple to insert */
+	mtr_t*		mtr);	/* in: mtr */
+/*****************************************************************
+Reorganizes an index page. */
+
+void
+btr_page_reorganize(
+/*================*/
+	page_t*		page,	/* in: page to be reorganized */
+	dict_index_t*	index,	/* in: record descriptor */
+	mtr_t*		mtr);	/* in: mtr */
+/*****************************************************************
+Decides if the page should be split at the convergence point of
+inserts converging to left. */
+
+ibool
+btr_page_get_split_rec_to_left(
+/*===========================*/
+				/* out: TRUE if split recommended */
+	btr_cur_t*	cursor,	/* in: cursor at which to insert */
+	rec_t**		split_rec);/* out: if split recommended,
+				the first record on upper half page,
+				or NULL if tuple should be first */
+/*****************************************************************
+Decides if the page should be split at the convergence point of
+inserts converging to right. */
+
+ibool
+btr_page_get_split_rec_to_right(
+/*============================*/
+				/* out: TRUE if split recommended */
+	btr_cur_t*	cursor,	/* in: cursor at which to insert */
+	rec_t**		split_rec);/* out: if split recommended,
+				the first record on upper half page,
+				or NULL if tuple should be first */
+/*****************************************************************
+Splits an index page to halves and inserts the tuple. It is assumed
+that mtr holds an x-latch to the index tree. NOTE: the tree x-latch
+is released within this function! NOTE that the operation of this
+function must always succeed, we cannot reverse it: therefore
+enough free disk space must be guaranteed to be available before
+this function is called. */
+
+rec_t*
+btr_page_split_and_insert(
+/*======================*/
+				/* out: inserted record; NOTE: the tree
+				x-latch is released! NOTE: 2 free disk
+				pages must be available! */
+	btr_cur_t*	cursor,	/* in: cursor at which to insert; when the
+				function returns, the cursor is positioned
+				on the predecessor of the inserted record */
+	dtuple_t*	tuple,	/* in: tuple to insert */
+	mtr_t*		mtr);	/* in: mtr */
+/***********************************************************
+Inserts a data tuple to a tree on a non-leaf level. It is assumed
+that mtr holds an x-latch on the tree. */
+
+void
+btr_insert_on_non_leaf_level(
+/*=========================*/
+	dict_tree_t*	tree,	/* in: tree */
+	ulint		level,	/* in: level, must be > 0 */
+	dtuple_t*	tuple,	/* in: the record to be inserted */
+	mtr_t*		mtr);	/* in: mtr */
+/********************************************************************
+Sets a record as the predefined minimum record. */
+
+void
+btr_set_min_rec_mark(
+/*=================*/
+	rec_t*	rec,	/* in: record */
+	ulint	comp,	/* in: nonzero=compact page format */
+	mtr_t*	mtr);	/* in: mtr */
+/*****************************************************************
+Deletes on the upper level the node pointer to a page. */
+
+void
+btr_node_ptr_delete(
+/*================*/
+	dict_tree_t*	tree,	/* in: index tree */
+	page_t*		page,	/* in: page whose node pointer is deleted */
+	mtr_t*		mtr);	/* in: mtr */
+/****************************************************************
+Checks that the node pointer to a page is appropriate. */
+
+ibool
+btr_check_node_ptr(
+/*===============*/
+				/* out: TRUE */
+	dict_tree_t*	tree,	/* in: index tree */
+	page_t*		page,	/* in: index page */
+	mtr_t*		mtr);	/* in: mtr */
+/*****************************************************************
+Tries to merge the page first to the left immediate brother if such a
+brother exists, and the node pointers to the current page and to the
+brother reside on the same page. If the left brother does not satisfy these
+conditions, looks at the right brother. If the page is the only one on that
+level lifts the records of the page to the father page, thus reducing the
+tree height. It is assumed that mtr holds an x-latch on the tree and on the
+page. If cursor is on the leaf level, mtr must also hold x-latches to
+the brothers, if they exist. NOTE: it is assumed that the caller has reserved
+enough free extents so that the compression will always succeed if done! */
+void
+btr_compress(
+/*=========*/
+	btr_cur_t*	cursor,	/* in: cursor on the page to merge or lift;
+				the page must not be empty: in record delete
+				use btr_discard_page if the page would become
+				empty */
+	mtr_t*		mtr);	/* in: mtr */
+/*****************************************************************
+Discards a page from a B-tree. This is used to remove the last record from
+a B-tree page: the whole page must be removed at the same time. This cannot
+be used for the root page, which is allowed to be empty. */
+
+void
+btr_discard_page(
+/*=============*/
+	btr_cur_t*	cursor,	/* in: cursor on the page to discard: not on
+				the root page */
+	mtr_t*		mtr);	/* in: mtr */
+/********************************************************************
+Parses the redo log record for setting an index record as the predefined
+minimum record. */
+
+byte*
+btr_parse_set_min_rec_mark(
+/*=======================*/
+			/* out: end of log record or NULL */
+	byte*	ptr,	/* in: buffer */
+	byte*	end_ptr,/* in: buffer end */
+	ulint	comp,	/* in: nonzero=compact page format */
+	page_t*	page,	/* in: page or NULL */
+	mtr_t*	mtr);	/* in: mtr or NULL */
+/***************************************************************
+Parses a redo log record of reorganizing a page. */
+
+byte*
+btr_parse_page_reorganize(
+/*======================*/
+				/* out: end of log record or NULL */
+	byte*		ptr,	/* in: buffer */
+	byte*		end_ptr,/* in: buffer end */
+	dict_index_t*	index,	/* in: record descriptor */
+	page_t*		page,	/* in: page or NULL */
+	mtr_t*		mtr);	/* in: mtr or NULL */
+/******************************************************************
+Gets the number of pages in a B-tree. */
+
+ulint
+btr_get_size(
+/*=========*/
+				/* out: number of pages */
+	dict_index_t*	index,	/* in: index */
+	ulint		flag);	/* in: BTR_N_LEAF_PAGES or BTR_TOTAL_SIZE */
+/******************************************************************
+Allocates a new file page to be used in an index tree. NOTE: we assume
+that the caller has made the reservation for free extents! */
+
+page_t*
+btr_page_alloc(
+/*===========*/
+					/* out: new allocated page, x-latched;
+					NULL if out of space */
+	dict_tree_t*	tree,		/* in: index tree */
+	ulint		hint_page_no,	/* in: hint of a good page */
+	byte		file_direction,	/* in: direction where a possible
+					page split is made */
+	ulint		level,		/* in: level where the page is placed
+					in the tree */
+	mtr_t*		mtr);		/* in: mtr */
+/******************************************************************
+Frees a file page used in an index tree. NOTE: cannot free field external
+storage pages because the page must contain info on its level. */
+
+void
+btr_page_free(
+/*==========*/
+	dict_tree_t*	tree,	/* in: index tree */
+	page_t*		page,	/* in: page to be freed, x-latched */	
+	mtr_t*		mtr);	/* in: mtr */
+/******************************************************************
+Frees a file page used in an index tree. Can be used also to BLOB
+external storage pages, because the page level 0 can be given as an
+argument. */
+
+void
+btr_page_free_low(
+/*==============*/
+	dict_tree_t*	tree,	/* in: index tree */
+	page_t*		page,	/* in: page to be freed, x-latched */	
+	ulint		level,	/* in: page level */
+	mtr_t*		mtr);	/* in: mtr */
+#ifdef UNIV_BTR_PRINT
+/*****************************************************************
+Prints size info of a B-tree. */
+
+void
+btr_print_size(
+/*===========*/
+	dict_tree_t*	tree);	/* in: index tree */
+/******************************************************************
+Prints directories and other info of all nodes in the tree. */
+
+void
+btr_print_tree(
+/*===========*/
+	dict_tree_t*	tree,	/* in: tree */
+	ulint		width);	/* in: print this many entries from start
+				and end */
+#endif /* UNIV_BTR_PRINT */
+/****************************************************************
+Checks the size and number of fields in a record based on the definition of
+the index. */
+
+ibool
+btr_index_rec_validate(
+/*====================*/
+					/* out: TRUE if ok */
+	rec_t*		rec,		/* in: index record */
+	dict_index_t*	index,		/* in: index */
+	ibool		dump_on_error);	/* in: TRUE if the function
+					should print hex dump of record
+					and page on error */
+/******************************************************************
+Checks the consistency of an index tree. */
+
+ibool
+btr_validate_tree(
+/*==============*/
+				/* out: TRUE if ok */
+	dict_tree_t*	tree,	/* in: tree */
+	trx_t*		trx);	/* in: transaction or NULL */
+
+#define BTR_N_LEAF_PAGES 	1
+#define BTR_TOTAL_SIZE		2
+
+#ifndef UNIV_NONINL
+#include "btr0btr.ic"
+#endif
+
+#endif 
--- a/include/btr0btr.ic
+++ b/include/btr0btr.ic
@ -0,0 +1,233 @@
+/******************************************************
+The B-tree
+
+(c) 1994-1996 Innobase Oy
+
+Created 6/2/1994 Heikki Tuuri
+*******************************************************/
+
+#include "mach0data.h"
+#include "mtr0mtr.h"
+#include "mtr0log.h"
+
+#define BTR_MAX_NODE_LEVEL	50	/* used in debug checking */
+
+/******************************************************************
+Gets a buffer page and declares its latching order level. */
+UNIV_INLINE
+page_t*
+btr_page_get(
+/*=========*/
+	ulint	space,		/* in: space id */
+	ulint	page_no,	/* in: page number */
+	ulint	mode,		/* in: latch mode */
+	mtr_t*	mtr)		/* in: mtr */
+{
+	page_t*	page;
+
+	page = buf_page_get(space, page_no, mode, mtr);
+#ifdef UNIV_SYNC_DEBUG
+	if (mode != RW_NO_LATCH) {
+	
+		buf_page_dbg_add_level(page, SYNC_TREE_NODE);
+	}
+#endif
+	return(page);
+}
+
+/******************************************************************
+Sets the index id field of a page. */
+UNIV_INLINE
+void
+btr_page_set_index_id(
+/*==================*/
+	page_t*		page,	/* in: page to be created */
+	dulint		id,	/* in: index id */
+	mtr_t*		mtr)	/* in: mtr */
+{
+	mlog_write_dulint(page + PAGE_HEADER + PAGE_INDEX_ID, id, mtr);
+}
+
+/******************************************************************
+Gets the index id field of a page. */
+UNIV_INLINE
+dulint
+btr_page_get_index_id(
+/*==================*/
+				/* out: index id */
+	page_t*		page)	/* in: index page */
+{
+	return(mach_read_from_8(page + PAGE_HEADER + PAGE_INDEX_ID));
+}
+
+/************************************************************
+Gets the node level field in an index page. */
+UNIV_INLINE
+ulint
+btr_page_get_level_low(
+/*===================*/
+			/* out: level, leaf level == 0 */
+	page_t*	page)	/* in: index page */
+{
+	ulint	level;
+
+	ut_ad(page);
+	
+	level = mach_read_from_2(page + PAGE_HEADER + PAGE_LEVEL);
+
+	ut_ad(level <= BTR_MAX_NODE_LEVEL);
+
+	return(level);
+}
+
+/************************************************************
+Gets the node level field in an index page. */
+UNIV_INLINE
+ulint
+btr_page_get_level(
+/*===============*/
+			/* out: level, leaf level == 0 */
+	page_t*	page,	/* in: index page */
+	mtr_t*	mtr __attribute__((unused))) /* in: mini-transaction handle */
+{
+	ut_ad(page && mtr);
+
+	return(btr_page_get_level_low(page));	
+}
+
+/************************************************************
+Sets the node level field in an index page. */
+UNIV_INLINE
+void
+btr_page_set_level(
+/*===============*/
+	page_t*	page,	/* in: index page */
+	ulint	level,	/* in: level, leaf level == 0 */
+	mtr_t*	mtr)	/* in: mini-transaction handle */
+{
+	ut_ad(page && mtr);
+	ut_ad(level <= BTR_MAX_NODE_LEVEL);
+
+	mlog_write_ulint(page + PAGE_HEADER + PAGE_LEVEL, level,
+							 MLOG_2BYTES, mtr);
+}
+
+/************************************************************
+Gets the next index page number. */
+UNIV_INLINE
+ulint
+btr_page_get_next(
+/*==============*/
+			/* out: next page number */
+	page_t*	page,	/* in: index page */
+	mtr_t*	mtr __attribute__((unused))) /* in: mini-transaction handle */
+{
+	ut_ad(page && mtr);
+	ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
+			      				MTR_MEMO_PAGE_X_FIX)
+	      || mtr_memo_contains(mtr, buf_block_align(page),
+			      				MTR_MEMO_PAGE_S_FIX));
+
+	return(mach_read_from_4(page + FIL_PAGE_NEXT));
+}
+
+/************************************************************
+Sets the next index page field. */
+UNIV_INLINE
+void
+btr_page_set_next(
+/*==============*/
+	page_t*	page,	/* in: index page */
+	ulint	next,	/* in: next page number */
+	mtr_t*	mtr)	/* in: mini-transaction handle */
+{
+	ut_ad(page && mtr);
+
+	mlog_write_ulint(page + FIL_PAGE_NEXT, next, MLOG_4BYTES, mtr);
+}
+
+/************************************************************
+Gets the previous index page number. */
+UNIV_INLINE
+ulint
+btr_page_get_prev(
+/*==============*/
+			/* out: prev page number */
+	page_t*	page,	/* in: index page */
+	mtr_t*	mtr __attribute__((unused))) /* in: mini-transaction handle */
+{
+	ut_ad(page && mtr);
+	
+	return(mach_read_from_4(page + FIL_PAGE_PREV));
+}
+
+/************************************************************
+Sets the previous index page field. */
+UNIV_INLINE
+void
+btr_page_set_prev(
+/*==============*/
+	page_t*	page,	/* in: index page */
+	ulint	prev,	/* in: previous page number */
+	mtr_t*	mtr)	/* in: mini-transaction handle */
+{
+	ut_ad(page && mtr);
+
+	mlog_write_ulint(page + FIL_PAGE_PREV, prev, MLOG_4BYTES, mtr);
+}
+
+/******************************************************************
+Gets the child node file address in a node pointer. */
+UNIV_INLINE
+ulint
+btr_node_ptr_get_child_page_no(
+/*===========================*/
+			   	/* out: child node address */
+	rec_t*		rec,	/* in: node pointer record */
+	const ulint*	offsets)/* in: array returned by rec_get_offsets() */
+{
+	byte*	field;
+	ulint	len;
+	ulint	page_no;
+
+	ut_ad(!rec_offs_comp(offsets) || rec_get_node_ptr_flag(rec));
+
+	/* The child address is in the last field */	
+	field = rec_get_nth_field(rec, offsets,
+					rec_offs_n_fields(offsets) - 1, &len);
+
+	ut_ad(len == 4);
+	
+	page_no = mach_read_from_4(field);
+
+	if (UNIV_UNLIKELY(page_no == 0)) {
+		fprintf(stderr,
+"InnoDB: a nonsensical page number 0 in a node ptr record at offset %lu\n",
+			(ulong) ut_align_offset(rec, UNIV_PAGE_SIZE));
+		buf_page_print(buf_frame_align(rec));
+	}
+
+	return(page_no);
+}
+
+/******************************************************************
+Releases the latches on a leaf page and bufferunfixes it. */
+UNIV_INLINE
+void
+btr_leaf_page_release(
+/*==================*/
+	page_t*	page,		/* in: page */
+	ulint	latch_mode,	/* in: BTR_SEARCH_LEAF or BTR_MODIFY_LEAF */
+	mtr_t*	mtr)		/* in: mtr */
+{
+	ut_ad(!mtr_memo_contains(mtr, buf_block_align(page),
+							MTR_MEMO_MODIFY));
+	if (latch_mode == BTR_SEARCH_LEAF) {
+		mtr_memo_release(mtr, buf_block_align(page), 
+							MTR_MEMO_PAGE_S_FIX);
+	} else {
+		ut_ad(latch_mode == BTR_MODIFY_LEAF);
+		mtr_memo_release(mtr, buf_block_align(page), 
+							MTR_MEMO_PAGE_X_FIX);
+	}
+}
--- a/include/btr0cur.h
+++ b/include/btr0cur.h
@ -0,0 +1,701 @@
+/******************************************************
+The index tree cursor
+
+(c) 1994-1996 Innobase Oy
+
+Created 10/16/1994 Heikki Tuuri
+*******************************************************/
+
+#ifndef btr0cur_h
+#define btr0cur_h
+
+#include "univ.i"
+#include "dict0dict.h"
+#include "data0data.h"
+#include "page0cur.h"
+#include "btr0types.h"
+#include "que0types.h"
+#include "row0types.h"
+#include "ha0ha.h"
+
+/* Mode flags for btr_cur operations; these can be ORed */
+#define BTR_NO_UNDO_LOG_FLAG	1	/* do no undo logging */
+#define BTR_NO_LOCKING_FLAG	2	/* do no record lock checking */
+#define BTR_KEEP_SYS_FLAG	4	/* sys fields will be found from the
+					update vector or inserted entry */
+
+#define BTR_CUR_ADAPT
+#define BTR_CUR_HASH_ADAPT
+
+/*************************************************************
+Returns the page cursor component of a tree cursor. */
+UNIV_INLINE
+page_cur_t*
+btr_cur_get_page_cur(
+/*=================*/
+				/* out: pointer to page cursor component */
+	btr_cur_t*	cursor);/* in: tree cursor */
+/*************************************************************
+Returns the record pointer of a tree cursor. */
+UNIV_INLINE
+rec_t*
+btr_cur_get_rec(
+/*============*/
+				/* out: pointer to record */
+	btr_cur_t*	cursor);/* in: tree cursor */
+/*************************************************************
+Invalidates a tree cursor by setting record pointer to NULL. */
+UNIV_INLINE
+void
+btr_cur_invalidate(
+/*===============*/
+	btr_cur_t*	cursor);/* in: tree cursor */
+/*************************************************************
+Returns the page of a tree cursor. */
+UNIV_INLINE
+page_t*
+btr_cur_get_page(
+/*=============*/
+				/* out: pointer to page */
+	btr_cur_t*	cursor);/* in: tree cursor */
+/*************************************************************
+Returns the tree of a cursor. */
+UNIV_INLINE
+dict_tree_t*
+btr_cur_get_tree(
+/*=============*/
+				/* out: tree */
+	btr_cur_t*	cursor);/* in: tree cursor */
+/*************************************************************
+Positions a tree cursor at a given record. */
+UNIV_INLINE
+void
+btr_cur_position(
+/*=============*/
+	dict_index_t*	index, 	/* in: index */
+	rec_t*		rec,	/* in: record in tree */
+	btr_cur_t*	cursor);/* in: cursor */
+/************************************************************************
+Searches an index tree and positions a tree cursor on a given level.
+NOTE: n_fields_cmp in tuple must be set so that it cannot be compared
+to node pointer page number fields on the upper levels of the tree!
+Note that if mode is PAGE_CUR_LE, which is used in inserts, then
+cursor->up_match and cursor->low_match both will have sensible values.
+If mode is PAGE_CUR_GE, then up_match will a have a sensible value. */
+
+void
+btr_cur_search_to_nth_level(
+/*========================*/
+	dict_index_t*	index,	/* in: index */
+	ulint		level,	/* in: the tree level of search */
+	dtuple_t*	tuple,	/* in: data tuple; NOTE: n_fields_cmp in
+				tuple must be set so that it cannot get
+				compared to the node ptr page number field! */
+	ulint		mode,	/* in: PAGE_CUR_L, ...;
+				NOTE that if the search is made using a unique
+				prefix of a record, mode should be PAGE_CUR_LE,
+				not PAGE_CUR_GE, as the latter may end up on
+				the previous page of the record! Inserts
+				should always be made using PAGE_CUR_LE to
+				search the position! */
+	ulint		latch_mode, /* in: BTR_SEARCH_LEAF, ..., ORed with
+				BTR_INSERT and BTR_ESTIMATE;
+				cursor->left_page is used to store a pointer
+				to the left neighbor page, in the cases
+				BTR_SEARCH_PREV and BTR_MODIFY_PREV;
+				NOTE that if has_search_latch
+				is != 0, we maybe do not have a latch set
+				on the cursor page, we assume
+				the caller uses his search latch
+				to protect the record! */
+	btr_cur_t*	cursor, /* in/out: tree cursor; the cursor page is
+				s- or x-latched, but see also above! */
+	ulint		has_search_latch,/* in: latch mode the caller
+				currently has on btr_search_latch:
+				RW_S_LATCH, or 0 */
+	mtr_t*		mtr);	/* in: mtr */
+/*********************************************************************
+Opens a cursor at either end of an index. */
+
+void
+btr_cur_open_at_index_side(
+/*=======================*/
+	ibool		from_left,	/* in: TRUE if open to the low end,
+					FALSE if to the high end */
+	dict_index_t*	index,		/* in: index */
+	ulint		latch_mode,	/* in: latch mode */
+	btr_cur_t*	cursor,		/* in: cursor */
+	mtr_t*		mtr);		/* in: mtr */
+/**************************************************************************
+Positions a cursor at a randomly chosen position within a B-tree. */
+
+void
+btr_cur_open_at_rnd_pos(
+/*====================*/
+	dict_index_t*	index,		/* in: index */
+	ulint		latch_mode,	/* in: BTR_SEARCH_LEAF, ... */
+	btr_cur_t*	cursor,		/* in/out: B-tree cursor */
+	mtr_t*		mtr);		/* in: mtr */
+/*****************************************************************
+Tries to perform an insert to a page in an index tree, next to cursor.
+It is assumed that mtr holds an x-latch on the page. The operation does
+not succeed if there is too little space on the page. If there is just
+one record on the page, the insert will always succeed; this is to
+prevent trying to split a page with just one record. */
+
+ulint
+btr_cur_optimistic_insert(
+/*======================*/
+				/* out: DB_SUCCESS, DB_WAIT_LOCK,
+				DB_FAIL, or error number */
+	ulint		flags,	/* in: undo logging and locking flags: if not
+				zero, the parameters index and thr should be
+				specified */
+	btr_cur_t*	cursor,	/* in: cursor on page after which to insert;
+				cursor stays valid */
+	dtuple_t*	entry,	/* in: entry to insert */
+	rec_t**		rec,	/* out: pointer to inserted record if
+				succeed */
+	big_rec_t**	big_rec,/* out: big rec vector whose fields have to
+				be stored externally by the caller, or
+				NULL */
+	que_thr_t*	thr,	/* in: query thread or NULL */
+	mtr_t*		mtr);	/* in: mtr */
+/*****************************************************************
+Performs an insert on a page of an index tree. It is assumed that mtr
+holds an x-latch on the tree and on the cursor page. If the insert is
+made on the leaf level, to avoid deadlocks, mtr must also own x-latches
+to brothers of page, if those brothers exist. */
+
+ulint
+btr_cur_pessimistic_insert(
+/*=======================*/
+				/* out: DB_SUCCESS or error number */
+	ulint		flags,	/* in: undo logging and locking flags: if not
+				zero, the parameter thr should be
+				specified; if no undo logging is specified,
+				then the caller must have reserved enough
+				free extents in the file space so that the
+				insertion will certainly succeed */
+	btr_cur_t*	cursor,	/* in: cursor after which to insert;
+				cursor stays valid */
+	dtuple_t*	entry,	/* in: entry to insert */
+	rec_t**		rec,	/* out: pointer to inserted record if
+				succeed */
+	big_rec_t**	big_rec,/* out: big rec vector whose fields have to
+				be stored externally by the caller, or
+				NULL */
+	que_thr_t*	thr,	/* in: query thread or NULL */
+	mtr_t*		mtr);	/* in: mtr */
+/*****************************************************************
+Updates a record when the update causes no size changes in its fields. */
+
+ulint
+btr_cur_update_in_place(
+/*====================*/
+				/* out: DB_SUCCESS or error number */
+	ulint		flags,	/* in: undo logging and locking flags */
+	btr_cur_t*	cursor,	/* in: cursor on the record to update;
+				cursor stays valid and positioned on the
+				same record */
+	upd_t*		update,	/* in: update vector */
+	ulint		cmpl_info,/* in: compiler info on secondary index
+				updates */
+	que_thr_t*	thr,	/* in: query thread */
+	mtr_t*		mtr);	/* in: mtr */
+/*****************************************************************
+Tries to update a record on a page in an index tree. It is assumed that mtr
+holds an x-latch on the page. The operation does not succeed if there is too
+little space on the page or if the update would result in too empty a page,
+so that tree compression is recommended. */
+
+ulint
+btr_cur_optimistic_update(
+/*======================*/
+				/* out: DB_SUCCESS, or DB_OVERFLOW if the
+				updated record does not fit, DB_UNDERFLOW
+				if the page would become too empty */
+	ulint		flags,	/* in: undo logging and locking flags */
+	btr_cur_t*	cursor,	/* in: cursor on the record to update;
+				cursor stays valid and positioned on the
+				same record */
+	upd_t*		update,	/* in: update vector; this must also
+				contain trx id and roll ptr fields */
+	ulint		cmpl_info,/* in: compiler info on secondary index
+				updates */
+	que_thr_t*	thr,	/* in: query thread */
+	mtr_t*		mtr);	/* in: mtr */
+/*****************************************************************
+Performs an update of a record on a page of a tree. It is assumed
+that mtr holds an x-latch on the tree and on the cursor page. If the
+update is made on the leaf level, to avoid deadlocks, mtr must also
+own x-latches to brothers of page, if those brothers exist. */
+
+ulint
+btr_cur_pessimistic_update(
+/*=======================*/
+				/* out: DB_SUCCESS or error code */
+	ulint		flags,	/* in: undo logging, locking, and rollback
+				flags */
+	btr_cur_t*	cursor,	/* in: cursor on the record to update */
+	big_rec_t**	big_rec,/* out: big rec vector whose fields have to
+				be stored externally by the caller, or NULL */
+	upd_t*		update,	/* in: update vector; this is allowed also
+				contain trx id and roll ptr fields, but
+				the values in update vector have no effect */
+	ulint		cmpl_info,/* in: compiler info on secondary index
+				updates */
+	que_thr_t*	thr,	/* in: query thread */
+	mtr_t*		mtr);	/* in: mtr */
+/***************************************************************
+Marks a clustered index record deleted. Writes an undo log record to
+undo log on this delete marking. Writes in the trx id field the id
+of the deleting transaction, and in the roll ptr field pointer to the
+undo log record created. */
+
+ulint
+btr_cur_del_mark_set_clust_rec(
+/*===========================*/
+				/* out: DB_SUCCESS, DB_LOCK_WAIT, or error
+				number */
+	ulint		flags,	/* in: undo logging and locking flags */
+	btr_cur_t*	cursor,	/* in: cursor */
+	ibool		val,	/* in: value to set */
+	que_thr_t*	thr,	/* in: query thread */
+	mtr_t*		mtr);	/* in: mtr */
+/***************************************************************
+Sets a secondary index record delete mark to TRUE or FALSE. */
+
+ulint
+btr_cur_del_mark_set_sec_rec(
+/*=========================*/
+				/* out: DB_SUCCESS, DB_LOCK_WAIT, or error
+				number */
+	ulint		flags,	/* in: locking flag */
+	btr_cur_t*	cursor,	/* in: cursor */
+	ibool		val,	/* in: value to set */
+	que_thr_t*	thr,	/* in: query thread */
+	mtr_t*		mtr);	/* in: mtr */
+/***************************************************************
+Sets a secondary index record delete mark to FALSE. This function is
+only used by the insert buffer insert merge mechanism. */
+
+void
+btr_cur_del_unmark_for_ibuf(
+/*========================*/
+	rec_t*		rec,	/* in: record to delete unmark */
+	mtr_t*		mtr);	/* in: mtr */
+/*****************************************************************
+Tries to compress a page of the tree on the leaf level. It is assumed
+that mtr holds an x-latch on the tree and on the cursor page. To avoid
+deadlocks, mtr must also own x-latches to brothers of page, if those
+brothers exist. NOTE: it is assumed that the caller has reserved enough
+free extents so that the compression will always succeed if done! */
+
+void
+btr_cur_compress(
+/*=============*/
+	btr_cur_t*	cursor,	/* in: cursor on the page to compress;
+				cursor does not stay valid */
+	mtr_t*		mtr);	/* in: mtr */
+/*****************************************************************
+Tries to compress a page of the tree if it seems useful. It is assumed
+that mtr holds an x-latch on the tree and on the cursor page. To avoid
+deadlocks, mtr must also own x-latches to brothers of page, if those
+brothers exist. NOTE: it is assumed that the caller has reserved enough
+free extents so that the compression will always succeed if done! */
+
+ibool
+btr_cur_compress_if_useful(
+/*=======================*/
+				/* out: TRUE if compression occurred */
+	btr_cur_t*	cursor,	/* in: cursor on the page to compress;
+				cursor does not stay valid if compression
+				occurs */
+	mtr_t*		mtr);	/* in: mtr */
+/***********************************************************
+Removes the record on which the tree cursor is positioned. It is assumed
+that the mtr has an x-latch on the page where the cursor is positioned,
+but no latch on the whole tree. */
+
+ibool
+btr_cur_optimistic_delete(
+/*======================*/
+				/* out: TRUE if success, i.e., the page
+				did not become too empty */
+	btr_cur_t*	cursor,	/* in: cursor on the record to delete;
+				cursor stays valid: if deletion succeeds,
+				on function exit it points to the successor
+				of the deleted record */
+	mtr_t*		mtr);	/* in: mtr */
+/*****************************************************************
+Removes the record on which the tree cursor is positioned. Tries
+to compress the page if its fillfactor drops below a threshold
+or if it is the only page on the level. It is assumed that mtr holds
+an x-latch on the tree and on the cursor page. To avoid deadlocks,
+mtr must also own x-latches to brothers of page, if those brothers
+exist. */
+
+ibool
+btr_cur_pessimistic_delete(
+/*=======================*/
+				/* out: TRUE if compression occurred */
+	ulint*		err,	/* out: DB_SUCCESS or DB_OUT_OF_FILE_SPACE;
+				the latter may occur because we may have
+				to update node pointers on upper levels,
+				and in the case of variable length keys
+				these may actually grow in size */
+	ibool		has_reserved_extents, /* in: TRUE if the
+				caller has already reserved enough free
+				extents so that he knows that the operation
+				will succeed */
+	btr_cur_t*	cursor,	/* in: cursor on the record to delete;
+				if compression does not occur, the cursor
+				stays valid: it points to successor of
+				deleted record on function exit */
+	ibool		in_rollback,/* in: TRUE if called in rollback */
+	mtr_t*		mtr);	/* in: mtr */
+/***************************************************************
+Parses a redo log record of updating a record in-place. */
+
+byte*
+btr_cur_parse_update_in_place(
+/*==========================*/
+				/* out: end of log record or NULL */
+	byte*		ptr,	/* in: buffer */
+	byte*		end_ptr,/* in: buffer end */
+	page_t*		page,	/* in: page or NULL */
+	dict_index_t*	index);	/* in: index corresponding to page */
+/********************************************************************
+Parses the redo log record for delete marking or unmarking of a clustered
+index record. */
+
+byte*
+btr_cur_parse_del_mark_set_clust_rec(
+/*=================================*/
+				/* out: end of log record or NULL */
+	byte*		ptr,	/* in: buffer */
+	byte*		end_ptr,/* in: buffer end */
+	dict_index_t*	index,	/* in: index corresponding to page */
+	page_t*		page);	/* in: page or NULL */
+/********************************************************************
+Parses the redo log record for delete marking or unmarking of a secondary
+index record. */
+
+byte*
+btr_cur_parse_del_mark_set_sec_rec(
+/*===============================*/
+				/* out: end of log record or NULL */
+	byte*		ptr,	/* in: buffer */
+	byte*		end_ptr,/* in: buffer end */
+	page_t*		page);	/* in: page or NULL */
+/***********************************************************************
+Estimates the number of rows in a given index range. */
+
+ib_longlong
+btr_estimate_n_rows_in_range(
+/*=========================*/
+				/* out: estimated number of rows */
+	dict_index_t*	index,	/* in: index */
+	dtuple_t*	tuple1,	/* in: range start, may also be empty tuple */
+	ulint		mode1,	/* in: search mode for range start */
+	dtuple_t*	tuple2,	/* in: range end, may also be empty tuple */
+	ulint		mode2);	/* in: search mode for range end */
+/***********************************************************************
+Estimates the number of different key values in a given index, for
+each n-column prefix of the index where n <= dict_index_get_n_unique(index).
+The estimates are stored in the array index->stat_n_diff_key_vals. */
+
+void
+btr_estimate_number_of_different_key_vals(
+/*======================================*/
+	dict_index_t*	index);	/* in: index */
+/***********************************************************************
+Marks not updated extern fields as not-owned by this record. The ownership
+is transferred to the updated record which is inserted elsewhere in the
+index tree. In purge only the owner of externally stored field is allowed
+to free the field. */
+
+void
+btr_cur_mark_extern_inherited_fields(
+/*=================================*/
+	rec_t*		rec,	/* in: record in a clustered index */
+	const ulint*	offsets,/* in: array returned by rec_get_offsets() */
+	upd_t*		update,	/* in: update vector */
+	mtr_t*		mtr);	/* in: mtr */
+/***********************************************************************
+The complement of the previous function: in an update entry may inherit
+some externally stored fields from a record. We must mark them as inherited
+in entry, so that they are not freed in a rollback. */
+
+void
+btr_cur_mark_dtuple_inherited_extern(
+/*=================================*/
+	dtuple_t*	entry,		/* in: updated entry to be inserted to
+					clustered index */
+	ulint*		ext_vec,	/* in: array of extern fields in the
+					original record */
+	ulint		n_ext_vec,	/* in: number of elements in ext_vec */
+	upd_t*		update);	/* in: update vector */
+/***********************************************************************
+Marks all extern fields in a dtuple as owned by the record. */
+
+void
+btr_cur_unmark_dtuple_extern_fields(
+/*================================*/
+	dtuple_t*	entry,		/* in: clustered index entry */
+	ulint*		ext_vec,	/* in: array of numbers of fields
+					which have been stored externally */
+	ulint		n_ext_vec);	/* in: number of elements in ext_vec */
+/***********************************************************************
+Stores the fields in big_rec_vec to the tablespace and puts pointers to
+them in rec. The fields are stored on pages allocated from leaf node
+file segment of the index tree. */
+
+ulint
+btr_store_big_rec_extern_fields(
+/*============================*/
+					/* out: DB_SUCCESS or error */
+	dict_index_t*	index,		/* in: index of rec; the index tree
+					MUST be X-latched */
+	rec_t*		rec,		/* in: record */
+	const ulint*	offsets,	/* in: rec_get_offsets(rec, index) */
+	big_rec_t*	big_rec_vec,	/* in: vector containing fields
+					to be stored externally */
+	mtr_t*		local_mtr);	/* in: mtr containing the latch to
+					rec and to the tree */
+/***********************************************************************
+Frees the space in an externally stored field to the file space
+management if the field in data is owned the externally stored field,
+in a rollback we may have the additional condition that the field must
+not be inherited. */
+
+void
+btr_free_externally_stored_field(
+/*=============================*/
+	dict_index_t*	index,		/* in: index of the data, the index
+					tree MUST be X-latched; if the tree
+					height is 1, then also the root page
+					must be X-latched! (this is relevant
+					in the case this function is called
+					from purge where 'data' is located on
+					an undo log page, not an index
+					page) */
+	byte*		data,		/* in: internally stored data
+					+ reference to the externally
+					stored part */
+	ulint		local_len,	/* in: length of data */
+	ibool		do_not_free_inherited,/* in: TRUE if called in a
+					rollback and we do not want to free
+					inherited fields */
+	mtr_t*		local_mtr);	/* in: mtr containing the latch to
+					data an an X-latch to the index
+					tree */
+/***************************************************************
+Frees the externally stored fields for a record. */
+
+void
+btr_rec_free_externally_stored_fields(
+/*==================================*/
+	dict_index_t*	index,	/* in: index of the data, the index
+				tree MUST be X-latched */
+	rec_t*		rec,	/* in: record */
+	const ulint*	offsets,/* in: rec_get_offsets(rec, index) */
+	ibool		do_not_free_inherited,/* in: TRUE if called in a
+				rollback and we do not want to free
+				inherited fields */
+	mtr_t*		mtr);	/* in: mini-transaction handle which contains
+				an X-latch to record page and to the index
+				tree */
+/***********************************************************************
+Copies an externally stored field of a record to mem heap. */
+
+byte*
+btr_rec_copy_externally_stored_field(
+/*=================================*/
+				/* out: the field copied to heap */
+	rec_t*		rec,	/* in: record */
+	const ulint*	offsets,/* in: array returned by rec_get_offsets() */
+	ulint		no,	/* in: field number */
+	ulint*		len,	/* out: length of the field */
+	mem_heap_t*	heap);	/* in: mem heap */
+/***********************************************************************
+Copies an externally stored field of a record to mem heap. Parameter
+data contains a pointer to 'internally' stored part of the field:
+possibly some data, and the reference to the externally stored part in
+the last 20 bytes of data. */
+
+byte*
+btr_copy_externally_stored_field(
+/*=============================*/
+				/* out: the whole field copied to heap */
+	ulint*		len,	/* out: length of the whole field */
+	byte*		data,	/* in: 'internally' stored part of the
+				field containing also the reference to
+				the external part */
+	ulint		local_len,/* in: length of data */
+	mem_heap_t*	heap);	/* in: mem heap */
+/***********************************************************************
+Stores the positions of the fields marked as extern storage in the update
+vector, and also those fields who are marked as extern storage in rec
+and not mentioned in updated fields. We use this function to remember
+which fields we must mark as extern storage in a record inserted for an
+update. */
+
+ulint
+btr_push_update_extern_fields(
+/*==========================*/
+				/* out: number of values stored in ext_vect */
+	ulint*		ext_vect,/* in: array of ulints, must be preallocated
+				to have space for all fields in rec */
+	const ulint*	offsets,/* in: array returned by rec_get_offsets() */
+	upd_t*		update);/* in: update vector or NULL */
+	
+
+/*######################################################################*/
+
+/* In the pessimistic delete, if the page data size drops below this
+limit, merging it to a neighbor is tried */
+
+#define BTR_CUR_PAGE_COMPRESS_LIMIT	(UNIV_PAGE_SIZE / 2)
+
+/* A slot in the path array. We store here info on a search path down the
+tree. Each slot contains data on a single level of the tree. */
+
+typedef struct btr_path_struct	btr_path_t;
+struct btr_path_struct{
+	ulint	nth_rec;	/* index of the record
+				where the page cursor stopped on
+				this level (index in alphabetical
+				order); value ULINT_UNDEFINED
+				denotes array end */
+	ulint	n_recs;		/* number of records on the page */
+};
+
+#define BTR_PATH_ARRAY_N_SLOTS	250	/* size of path array (in slots) */
+
+/* The tree cursor: the definition appears here only for the compiler
+to know struct size! */
+
+struct btr_cur_struct {
+	dict_index_t*	index;		/* index where positioned */
+	page_cur_t	page_cur;	/* page cursor */
+	page_t*		left_page;	/* this field is used to store a pointer
+					to the left neighbor page, in the cases
+					BTR_SEARCH_PREV and BTR_MODIFY_PREV */
+	/*------------------------------*/
+	que_thr_t*	thr;		/* this field is only used when
+					btr_cur_search_... is called for an
+					index entry insertion: the calling
+					query thread is passed here to be
+					used in the insert buffer */
+	/*------------------------------*/
+	/* The following fields are used in btr_cur_search... to pass
+	information: */
+	ulint		flag;		/* BTR_CUR_HASH, BTR_CUR_HASH_FAIL,
+					BTR_CUR_BINARY, or
+					BTR_CUR_INSERT_TO_IBUF */
+	ulint		tree_height;	/* Tree height if the search is done
+					for a pessimistic insert or update
+					operation */
+	ulint		up_match;	/* If the search mode was PAGE_CUR_LE,
+					the number of matched fields to the
+					the first user record to the right of
+					the cursor record after
+					btr_cur_search_...;
+					for the mode PAGE_CUR_GE, the matched
+					fields to the first user record AT THE
+					CURSOR or to the right of it;
+					NOTE that the up_match and low_match
+					values may exceed the correct values
+					for comparison to the adjacent user
+					record if that record is on a
+					different leaf page! (See the note in
+					row_ins_duplicate_key.) */
+	ulint		up_bytes;	/* number of matched bytes to the
+					right at the time cursor positioned;
+					only used internally in searches: not
+					defined after the search */
+	ulint		low_match;	/* if search mode was PAGE_CUR_LE,
+					the number of matched fields to the
+					first user record AT THE CURSOR or
+					to the left of it after
+					btr_cur_search_...;
+					NOT defined for PAGE_CUR_GE or any
+					other search modes; see also the NOTE
+					in up_match! */
+	ulint		low_bytes;	/* number of matched bytes to the
+					right at the time cursor positioned;
+					only used internally in searches: not
+					defined after the search */
+	ulint		n_fields;	/* prefix length used in a hash
+					search if hash_node != NULL */
+	ulint		n_bytes;	/* hash prefix bytes if hash_node !=
+					NULL */
+	ulint		fold;		/* fold value used in the search if
+					flag is BTR_CUR_HASH */
+	/*------------------------------*/
+	btr_path_t*	path_arr;	/* in estimating the number of
+					rows in range, we store in this array
+					information of the path through
+					the tree */
+};
+
+/* Values for the flag documenting the used search method */
+#define BTR_CUR_HASH		1	/* successful shortcut using the hash
+					index */
+#define BTR_CUR_HASH_FAIL	2	/* failure using hash, success using
+					binary search: the misleading hash
+					reference is stored in the field
+					hash_node, and might be necessary to
+					update */
+#define BTR_CUR_BINARY		3	/* success using the binary search */
+#define BTR_CUR_INSERT_TO_IBUF	4	/* performed the intended insert to
+					the insert buffer */
+
+/* If pessimistic delete fails because of lack of file space,
+there is still a good change of success a little later: try this many times,
+and sleep this many microseconds in between */
+#define BTR_CUR_RETRY_DELETE_N_TIMES	100
+#define BTR_CUR_RETRY_SLEEP_TIME	50000
+
+/* The reference in a field for which data is stored on a different page.
+The reference is at the end of the 'locally' stored part of the field.
+'Locally' means storage in the index record.
+We store locally a long enough prefix of each column so that we can determine
+the ordering parts of each index record without looking into the externally
+stored part. */
+
+/*--------------------------------------*/
+#define BTR_EXTERN_SPACE_ID		0	/* space id where stored */
+#define BTR_EXTERN_PAGE_NO		4	/* page no where stored */
+#define BTR_EXTERN_OFFSET		8	/* offset of BLOB header
+						on that page */
+#define BTR_EXTERN_LEN			12	/* 8 bytes containing the
+						length of the externally
+						stored part of the BLOB.
+						The 2 highest bits are
+						reserved to the flags below. */
+/*--------------------------------------*/
+#define BTR_EXTERN_FIELD_REF_SIZE	20
+
+/* The highest bit of BTR_EXTERN_LEN (i.e., the highest bit of the byte
+at lowest address) is set to 1 if this field does not 'own' the externally
+stored field; only the owner field is allowed to free the field in purge!
+If the 2nd highest bit is 1 then it means that the externally stored field
+was inherited from an earlier version of the row. In rollback we are not
+allowed to free an inherited external field. */
+
+#define BTR_EXTERN_OWNER_FLAG		128
+#define BTR_EXTERN_INHERITED_FLAG	64
+
+extern ulint	btr_cur_n_non_sea;
+extern ulint	btr_cur_n_sea;
+extern ulint	btr_cur_n_non_sea_old;
+extern ulint	btr_cur_n_sea_old;
+
+#ifndef UNIV_NONINL
+#include "btr0cur.ic"
+#endif
+				
+#endif
--- a/include/btr0cur.ic
+++ b/include/btr0cur.ic
@ -0,0 +1,172 @@
+/******************************************************
+The index tree cursor
+
+(c) 1994-1996 Innobase Oy
+
+Created 10/16/1994 Heikki Tuuri
+*******************************************************/
+
+#include "btr0btr.h"
+
+/*************************************************************
+Returns the page cursor component of a tree cursor. */
+UNIV_INLINE
+page_cur_t*
+btr_cur_get_page_cur(
+/*=================*/
+				/* out: pointer to page cursor component */
+	btr_cur_t*	cursor)	/* in: tree cursor */
+{
+	return(&(cursor->page_cur));
+}
+
+/*************************************************************
+Returns the record pointer of a tree cursor. */
+UNIV_INLINE
+rec_t*
+btr_cur_get_rec(
+/*============*/
+				/* out: pointer to record */
+	btr_cur_t*	cursor)	/* in: tree cursor */
+{
+	return(page_cur_get_rec(&(cursor->page_cur)));
+}
+
+/*************************************************************
+Invalidates a tree cursor by setting record pointer to NULL. */
+UNIV_INLINE
+void
+btr_cur_invalidate(
+/*===============*/
+	btr_cur_t*	cursor)	/* in: tree cursor */
+{
+	page_cur_invalidate(&(cursor->page_cur));
+}
+
+/*************************************************************
+Returns the page of a tree cursor. */
+UNIV_INLINE
+page_t*
+btr_cur_get_page(
+/*=============*/
+				/* out: pointer to page */
+	btr_cur_t*	cursor)	/* in: tree cursor */
+{
+	page_t*	page = buf_frame_align(page_cur_get_rec(&(cursor->page_cur)));
+	ut_ad(!!page_is_comp(page) == cursor->index->table->comp);
+	return(page);
+}
+
+/*************************************************************
+Returns the tree of a cursor. */
+UNIV_INLINE
+dict_tree_t*
+btr_cur_get_tree(
+/*=============*/
+				/* out: tree */
+	btr_cur_t*	cursor)	/* in: tree cursor */
+{
+	return((cursor->index)->tree);
+}
+
+/*************************************************************
+Positions a tree cursor at a given record. */
+UNIV_INLINE
+void
+btr_cur_position(
+/*=============*/
+	dict_index_t*	index, 	/* in: index */
+	rec_t*		rec,	/* in: record in tree */
+	btr_cur_t*	cursor)	/* in: cursor */
+{
+	page_cur_position(rec, btr_cur_get_page_cur(cursor));
+
+	cursor->index = index;
+}
+
+/*************************************************************************
+Checks if compressing an index page where a btr cursor is placed makes
+sense. */
+UNIV_INLINE
+ibool
+btr_cur_compress_recommendation(
+/*============================*/
+				/* out: TRUE if compression is recommended */
+	btr_cur_t*	cursor,	/* in: btr cursor */
+	mtr_t*		mtr)	/* in: mtr */
+{
+	page_t*		page;
+	
+	ut_ad(mtr_memo_contains(mtr, buf_block_align(
+					btr_cur_get_page(cursor)),
+				MTR_MEMO_PAGE_X_FIX));
+
+	page = btr_cur_get_page(cursor);
+
+	if ((page_get_data_size(page) < BTR_CUR_PAGE_COMPRESS_LIMIT)
+ 	    || ((btr_page_get_next(page, mtr) == FIL_NULL)
+		&& (btr_page_get_prev(page, mtr) == FIL_NULL))) {
+
+		/* The page fillfactor has dropped below a predefined
+		minimum value OR the level in the B-tree contains just
+		one page: we recommend compression if this is not the
+		root page. */
+		
+		if (dict_tree_get_page((cursor->index)->tree)
+		    == buf_frame_get_page_no(page)) {
+
+		    	/* It is the root page */
+
+		    	return(FALSE);
+		}
+
+		return(TRUE);
+	}
+
+	return(FALSE);
+}	
+
+/*************************************************************************
+Checks if the record on which the cursor is placed can be deleted without
+making tree compression necessary (or, recommended). */
+UNIV_INLINE
+ibool
+btr_cur_can_delete_without_compress(
+/*================================*/
+				/* out: TRUE if can be deleted without
+				recommended compression */
+	btr_cur_t*	cursor,	/* in: btr cursor */
+	ulint		rec_size,/* in: rec_get_size(btr_cur_get_rec(cursor))*/
+	mtr_t*		mtr)	/* in: mtr */
+{
+	page_t*		page;
+	
+	ut_ad(mtr_memo_contains(mtr, buf_block_align(
+					btr_cur_get_page(cursor)),
+				MTR_MEMO_PAGE_X_FIX));
+
+	page = btr_cur_get_page(cursor);
+
+	if ((page_get_data_size(page) - rec_size < BTR_CUR_PAGE_COMPRESS_LIMIT)
+ 	    || ((btr_page_get_next(page, mtr) == FIL_NULL)
+		&& (btr_page_get_prev(page, mtr) == FIL_NULL))
+	    || (page_get_n_recs(page) < 2)) { 
+
+		/* The page fillfactor will drop below a predefined
+		minimum value, OR the level in the B-tree contains just
+		one page, OR the page will become empty: we recommend
+		compression if this is not the root page. */
+		
+		if (dict_tree_get_page((cursor->index)->tree)
+		    == buf_frame_get_page_no(page)) {
+
+		    	/* It is the root page */
+
+		    	return(TRUE);
+		}
+
+		return(FALSE);
+	}
+
+	return(TRUE);
+}
--- a/include/btr0pcur.h
+++ b/include/btr0pcur.h
@ -0,0 +1,516 @@
+/******************************************************
+The index tree persistent cursor
+
+(c) 1996 Innobase Oy
+
+Created 2/23/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef btr0pcur_h
+#define btr0pcur_h
+
+#include "univ.i"
+#include "dict0dict.h"
+#include "data0data.h"
+#include "mtr0mtr.h"
+#include "page0cur.h"
+#include "btr0cur.h"
+#include "btr0btr.h"
+#include "btr0types.h"
+
+/* Relative positions for a stored cursor position */
+#define BTR_PCUR_ON			1
+#define BTR_PCUR_BEFORE			2
+#define BTR_PCUR_AFTER			3
+/* Note that if the tree is not empty, btr_pcur_store_position does not
+use the following, but only uses the above three alternatives, where the
+position is stored relative to a specific record: this makes implementation
+of a scroll cursor easier */
+#define BTR_PCUR_BEFORE_FIRST_IN_TREE	4	/* in an empty tree */
+#define BTR_PCUR_AFTER_LAST_IN_TREE	5	/* in an empty tree */
+
+/******************************************************************
+Allocates memory for a persistent cursor object and initializes the cursor. */
+
+btr_pcur_t*
+btr_pcur_create_for_mysql(void);
+/*============================*/
+				/* out, own: persistent cursor */
+/******************************************************************
+Frees the memory for a persistent cursor object. */
+
+void
+btr_pcur_free_for_mysql(
+/*====================*/
+	btr_pcur_t*	cursor);	/* in, own: persistent cursor */
+/******************************************************************
+Copies the stored position of a pcur to another pcur. */
+
+void
+btr_pcur_copy_stored_position(
+/*==========================*/
+	btr_pcur_t*	pcur_receive,	/* in: pcur which will receive the
+					position info */
+	btr_pcur_t*	pcur_donate);	/* in: pcur from which the info is
+					copied */
+/******************************************************************
+Sets the old_rec_buf field to NULL. */
+UNIV_INLINE
+void
+btr_pcur_init(
+/*==========*/
+	btr_pcur_t*	pcur);	/* in: persistent cursor */
+/******************************************************************
+Initializes and opens a persistent cursor to an index tree. It should be
+closed with btr_pcur_close. */
+UNIV_INLINE
+void
+btr_pcur_open(
+/*==========*/
+	dict_index_t*	index,	/* in: index */
+	dtuple_t*	tuple,	/* in: tuple on which search done */
+	ulint		mode,	/* in: PAGE_CUR_L, ...;
+				NOTE that if the search is made using a unique
+				prefix of a record, mode should be
+				PAGE_CUR_LE, not PAGE_CUR_GE, as the latter
+				may end up on the previous page from the
+				record! */
+	ulint		latch_mode,/* in: BTR_SEARCH_LEAF, ... */
+	btr_pcur_t*	cursor, /* in: memory buffer for persistent cursor */
+	mtr_t*		mtr);	/* in: mtr */
+/******************************************************************
+Opens an persistent cursor to an index tree without initializing the
+cursor. */
+UNIV_INLINE
+void
+btr_pcur_open_with_no_init(
+/*=======================*/
+	dict_index_t*	index,	/* in: index */
+	dtuple_t*	tuple,	/* in: tuple on which search done */
+	ulint		mode,	/* in: PAGE_CUR_L, ...;
+				NOTE that if the search is made using a unique
+				prefix of a record, mode should be
+				PAGE_CUR_LE, not PAGE_CUR_GE, as the latter
+				may end up on the previous page of the
+				record! */
+	ulint		latch_mode,/* in: BTR_SEARCH_LEAF, ...;
+				NOTE that if has_search_latch != 0 then
+				we maybe do not acquire a latch on the cursor
+				page, but assume that the caller uses his
+				btr search latch to protect the record! */
+	btr_pcur_t*	cursor, /* in: memory buffer for persistent cursor */
+	ulint		has_search_latch,/* in: latch mode the caller
+				currently has on btr_search_latch:
+				RW_S_LATCH, or 0 */
+	mtr_t*		mtr);	/* in: mtr */
+/*********************************************************************
+Opens a persistent cursor at either end of an index. */
+UNIV_INLINE
+void
+btr_pcur_open_at_index_side(
+/*========================*/
+	ibool		from_left,	/* in: TRUE if open to the low end,
+					FALSE if to the high end */
+	dict_index_t*	index,		/* in: index */
+	ulint		latch_mode,	/* in: latch mode */
+	btr_pcur_t*	pcur,		/* in: cursor */
+	ibool		do_init,	/* in: TRUE if should be initialized */
+	mtr_t*		mtr);		/* in: mtr */
+/******************************************************************
+Gets the up_match value for a pcur after a search. */
+UNIV_INLINE
+ulint
+btr_pcur_get_up_match(
+/*==================*/
+				/* out: number of matched fields at the cursor
+				or to the right if search mode was PAGE_CUR_GE,
+				otherwise undefined */
+	btr_pcur_t*	cursor); /* in: memory buffer for persistent cursor */
+/******************************************************************
+Gets the low_match value for a pcur after a search. */
+UNIV_INLINE
+ulint
+btr_pcur_get_low_match(
+/*===================*/
+				/* out: number of matched fields at the cursor
+				or to the right if search mode was PAGE_CUR_LE,
+				otherwise undefined */
+	btr_pcur_t*	cursor); /* in: memory buffer for persistent cursor */
+/******************************************************************
+If mode is PAGE_CUR_G or PAGE_CUR_GE, opens a persistent cursor on the first
+user record satisfying the search condition, in the case PAGE_CUR_L or
+PAGE_CUR_LE, on the last user record. If no such user record exists, then
+in the first case sets the cursor after last in tree, and in the latter case
+before first in tree. The latching mode must be BTR_SEARCH_LEAF or
+BTR_MODIFY_LEAF. */
+
+void
+btr_pcur_open_on_user_rec(
+/*======================*/
+	dict_index_t*	index,		/* in: index */
+	dtuple_t*	tuple,		/* in: tuple on which search done */
+	ulint		mode,		/* in: PAGE_CUR_L, ... */
+	ulint		latch_mode,	/* in: BTR_SEARCH_LEAF or
+					BTR_MODIFY_LEAF */
+	btr_pcur_t*	cursor, 	/* in: memory buffer for persistent
+					cursor */
+	mtr_t*		mtr);		/* in: mtr */
+/**************************************************************************
+Positions a cursor at a randomly chosen position within a B-tree. */
+UNIV_INLINE
+void
+btr_pcur_open_at_rnd_pos(
+/*=====================*/
+	dict_index_t*	index,		/* in: index */
+	ulint		latch_mode,	/* in: BTR_SEARCH_LEAF, ... */
+	btr_pcur_t*	cursor,		/* in/out: B-tree pcur */
+	mtr_t*		mtr);		/* in: mtr */
+/******************************************************************
+Frees the possible old_rec_buf buffer of a persistent cursor and sets the
+latch mode of the persistent cursor to BTR_NO_LATCHES. */
+UNIV_INLINE
+void
+btr_pcur_close(
+/*===========*/
+	btr_pcur_t*	cursor);	/* in: persistent cursor */
+/******************************************************************
+The position of the cursor is stored by taking an initial segment of the
+record the cursor is positioned on, before, or after, and copying it to the
+cursor data structure, or just setting a flag if the cursor id before the
+first in an EMPTY tree, or after the last in an EMPTY tree. NOTE that the
+page where the cursor is positioned must not be empty if the index tree is
+not totally empty! */
+
+void
+btr_pcur_store_position(
+/*====================*/
+	btr_pcur_t*	cursor, /* in: persistent cursor */
+	mtr_t*		mtr);	/* in: mtr */
+/******************************************************************
+Restores the stored position of a persistent cursor bufferfixing the page and
+obtaining the specified latches. If the cursor position was saved when the
+(1) cursor was positioned on a user record: this function restores the position
+to the last record LESS OR EQUAL to the stored record;
+(2) cursor was positioned on a page infimum record: restores the position to
+the last record LESS than the user record which was the successor of the page
+infimum;
+(3) cursor was positioned on the page supremum: restores to the first record
+GREATER than the user record which was the predecessor of the supremum.
+(4) cursor was positioned before the first or after the last in an empty tree:
+restores to before first or after the last in the tree. */
+
+ibool
+btr_pcur_restore_position(
+/*======================*/
+					/* out: TRUE if the cursor position
+					was stored when it was on a user record
+					and it can be restored on a user record
+					whose ordering fields are identical to
+					the ones of the original user record */
+	ulint		latch_mode,	/* in: BTR_SEARCH_LEAF, ... */
+	btr_pcur_t*	cursor, 	/* in: detached persistent cursor */
+	mtr_t*		mtr);		/* in: mtr */
+/******************************************************************
+If the latch mode of the cursor is BTR_LEAF_SEARCH or BTR_LEAF_MODIFY,
+releases the page latch and bufferfix reserved by the cursor.
+NOTE! In the case of BTR_LEAF_MODIFY, there should not exist changes
+made by the current mini-transaction to the data protected by the
+cursor latch, as then the latch must not be released until mtr_commit. */
+
+void
+btr_pcur_release_leaf(
+/*==================*/
+	btr_pcur_t*	cursor, /* in: persistent cursor */
+	mtr_t*		mtr);	/* in: mtr */
+/*************************************************************
+Gets the rel_pos field for a cursor whose position has been stored. */
+UNIV_INLINE
+ulint
+btr_pcur_get_rel_pos(
+/*=================*/
+				/* out: BTR_PCUR_ON, ... */
+	btr_pcur_t*	cursor);/* in: persistent cursor */
+/*************************************************************
+Sets the mtr field for a pcur. */
+UNIV_INLINE
+void
+btr_pcur_set_mtr(
+/*=============*/
+	btr_pcur_t*	cursor,	/* in: persistent cursor */
+	mtr_t*		mtr);	/* in, own: mtr */
+/*************************************************************
+Gets the mtr field for a pcur. */
+UNIV_INLINE
+mtr_t*
+btr_pcur_get_mtr(
+/*=============*/
+				/* out: mtr */
+	btr_pcur_t*	cursor);	/* in: persistent cursor */
+/******************************************************************
+Commits the pcur mtr and sets the pcur latch mode to BTR_NO_LATCHES,
+that is, the cursor becomes detached. If there have been modifications
+to the page where pcur is positioned, this can be used instead of
+btr_pcur_release_leaf. Function btr_pcur_store_position should be used
+before calling this, if restoration of cursor is wanted later. */
+UNIV_INLINE
+void
+btr_pcur_commit(
+/*============*/
+	btr_pcur_t*	pcur);	/* in: persistent cursor */
+/******************************************************************
+Differs from btr_pcur_commit in that we can specify the mtr to commit. */
+UNIV_INLINE
+void
+btr_pcur_commit_specify_mtr(
+/*========================*/
+	btr_pcur_t*	pcur,	/* in: persistent cursor */
+	mtr_t*		mtr);	/* in: mtr to commit */
+/******************************************************************
+Tests if a cursor is detached: that is the latch mode is BTR_NO_LATCHES. */
+UNIV_INLINE
+ibool
+btr_pcur_is_detached(
+/*=================*/
+				/* out: TRUE if detached */
+	btr_pcur_t*	pcur);	/* in: persistent cursor */
+/*************************************************************
+Moves the persistent cursor to the next record in the tree. If no records are
+left, the cursor stays 'after last in tree'. */
+UNIV_INLINE
+ibool
+btr_pcur_move_to_next(
+/*==================*/
+				/* out: TRUE if the cursor was not after last
+				in tree */
+	btr_pcur_t*	cursor,	/* in: persistent cursor; NOTE that the
+				function may release the page latch */
+	mtr_t*		mtr);	/* in: mtr */
+/*************************************************************
+Moves the persistent cursor to the previous record in the tree. If no records
+are left, the cursor stays 'before first in tree'. */
+
+ibool
+btr_pcur_move_to_prev(
+/*==================*/
+				/* out: TRUE if the cursor was not before first
+				in tree */
+	btr_pcur_t*	cursor,	/* in: persistent cursor; NOTE that the
+				function may release the page latch */
+	mtr_t*		mtr);	/* in: mtr */
+/*************************************************************
+Moves the persistent cursor to the last record on the same page. */
+UNIV_INLINE
+void
+btr_pcur_move_to_last_on_page(
+/*==========================*/
+	btr_pcur_t*	cursor,	/* in: persistent cursor */
+	mtr_t*		mtr);	/* in: mtr */
+/*************************************************************
+Moves the persistent cursor to the next user record in the tree. If no user
+records are left, the cursor ends up 'after last in tree'. */
+UNIV_INLINE
+ibool
+btr_pcur_move_to_next_user_rec(
+/*===========================*/
+				/* out: TRUE if the cursor moved forward,
+				ending on a user record */
+	btr_pcur_t*	cursor,	/* in: persistent cursor; NOTE that the
+				function may release the page latch */
+	mtr_t*		mtr);	/* in: mtr */
+/*************************************************************
+Moves the persistent cursor to the first record on the next page.
+Releases the latch on the current page, and bufferunfixes it.
+Note that there must not be modifications on the current page,
+as then the x-latch can be released only in mtr_commit. */
+
+void
+btr_pcur_move_to_next_page(
+/*=======================*/
+	btr_pcur_t*	cursor,	/* in: persistent cursor; must be on the
+				last record of the current page */
+	mtr_t*		mtr);	/* in: mtr */
+/*************************************************************
+Moves the persistent cursor backward if it is on the first record
+of the page. Releases the latch on the current page, and bufferunfixes
+it. Note that to prevent a possible deadlock, the operation first
+stores the position of the cursor, releases the leaf latch, acquires
+necessary latches and restores the cursor position again before returning.
+The alphabetical position of the cursor is guaranteed to be sensible
+on return, but it may happen that the cursor is not positioned on the
+last record of any page, because the structure of the tree may have
+changed while the cursor had no latches. */
+
+void
+btr_pcur_move_backward_from_page(
+/*=============================*/
+	btr_pcur_t*	cursor,	/* in: persistent cursor, must be on the
+				first record of the current page */
+	mtr_t*		mtr);	/* in: mtr */
+/*************************************************************
+Returns the btr cursor component of a persistent cursor. */
+UNIV_INLINE
+btr_cur_t*
+btr_pcur_get_btr_cur(
+/*=================*/
+				/* out: pointer to btr cursor component */
+	btr_pcur_t*	cursor);	/* in: persistent cursor */
+/*************************************************************
+Returns the page cursor component of a persistent cursor. */
+UNIV_INLINE
+page_cur_t*
+btr_pcur_get_page_cur(
+/*==================*/
+				/* out: pointer to page cursor component */
+	btr_pcur_t*	cursor);	/* in: persistent cursor */
+/*************************************************************
+Returns the page of a persistent cursor. */
+UNIV_INLINE
+page_t*
+btr_pcur_get_page(
+/*==============*/
+				/* out: pointer to the page */
+	btr_pcur_t*	cursor);/* in: persistent cursor */
+/*************************************************************
+Returns the record of a persistent cursor. */
+UNIV_INLINE
+rec_t*
+btr_pcur_get_rec(
+/*=============*/
+				/* out: pointer to the record */
+	btr_pcur_t*	cursor);/* in: persistent cursor */
+/*************************************************************
+Checks if the persistent cursor is on a user record. */
+UNIV_INLINE
+ibool
+btr_pcur_is_on_user_rec(
+/*====================*/
+	btr_pcur_t*	cursor,	/* in: persistent cursor */
+	mtr_t*		mtr);	/* in: mtr */
+/*************************************************************
+Checks if the persistent cursor is after the last user record on 
+a page. */
+UNIV_INLINE
+ibool
+btr_pcur_is_after_last_on_page(
+/*===========================*/
+	btr_pcur_t*	cursor,	/* in: persistent cursor */
+	mtr_t*		mtr);	/* in: mtr */
+/*************************************************************
+Checks if the persistent cursor is before the first user record on 
+a page. */
+UNIV_INLINE
+ibool
+btr_pcur_is_before_first_on_page(
+/*=============================*/
+	btr_pcur_t*	cursor,	/* in: persistent cursor */
+	mtr_t*		mtr);	/* in: mtr */
+/*************************************************************
+Checks if the persistent cursor is before the first user record in
+the index tree. */
+UNIV_INLINE
+ibool
+btr_pcur_is_before_first_in_tree(
+/*=============================*/
+	btr_pcur_t*	cursor,	/* in: persistent cursor */
+	mtr_t*		mtr);	/* in: mtr */
+/*************************************************************
+Checks if the persistent cursor is after the last user record in
+the index tree. */
+UNIV_INLINE
+ibool
+btr_pcur_is_after_last_in_tree(
+/*===========================*/
+	btr_pcur_t*	cursor,	/* in: persistent cursor */
+	mtr_t*		mtr);	/* in: mtr */
+/*************************************************************
+Moves the persistent cursor to the next record on the same page. */
+UNIV_INLINE
+void
+btr_pcur_move_to_next_on_page(
+/*==========================*/
+	btr_pcur_t*	cursor,	/* in: persistent cursor */
+	mtr_t*		mtr);	/* in: mtr */
+/*************************************************************
+Moves the persistent cursor to the previous record on the same page. */
+UNIV_INLINE
+void
+btr_pcur_move_to_prev_on_page(
+/*==========================*/
+	btr_pcur_t*	cursor,	/* in: persistent cursor */
+	mtr_t*		mtr);	/* in: mtr */
+
+
+/* The persistent B-tree cursor structure. This is used mainly for SQL
+selects, updates, and deletes. */
+
+struct btr_pcur_struct{
+	btr_cur_t	btr_cur;	/* a B-tree cursor */
+	ulint		latch_mode;	/* see FIXME note below!
+					BTR_SEARCH_LEAF, BTR_MODIFY_LEAF,
+					BTR_MODIFY_TREE, or BTR_NO_LATCHES,
+					depending on the latching state of
+					the page and tree where the cursor is
+					positioned; the last value means that
+					the cursor is not currently positioned:
+					we say then that the cursor is
+					detached; it can be restored to
+					attached if the old position was
+					stored in old_rec */
+	ulint		old_stored;	/* BTR_PCUR_OLD_STORED
+					or BTR_PCUR_OLD_NOT_STORED */
+	rec_t*		old_rec;	/* if cursor position is stored,
+					contains an initial segment of the
+					latest record cursor was positioned
+					either on, before, or after */
+	ulint		old_n_fields;	/* number of fields in old_rec */
+	ulint		rel_pos;	/* BTR_PCUR_ON, BTR_PCUR_BEFORE, or
+					BTR_PCUR_AFTER, depending on whether
+					cursor was on, before, or after the
+					old_rec record */
+	buf_block_t*	block_when_stored;/* buffer block when the position was
+					stored; note that if AWE is on, frames
+					may move */
+	dulint		modify_clock;	/* the modify clock value of the
+					buffer block when the cursor position
+					was stored */
+	ulint		pos_state;	/* see FIXME note below!
+					BTR_PCUR_IS_POSITIONED,
+					BTR_PCUR_WAS_POSITIONED,
+					BTR_PCUR_NOT_POSITIONED */
+	ulint		search_mode;	/* PAGE_CUR_G, ... */
+	trx_t*		trx_if_known;	/* the transaction, if we know it;
+					otherwise this field is not defined;
+					can ONLY BE USED in error prints in
+					fatal assertion failures! */
+	/*-----------------------------*/
+	/* NOTE that the following fields may possess dynamically allocated
+	memory which should be freed if not needed anymore! */
+
+	mtr_t*		mtr;		/* NULL, or this field may contain
+					a mini-transaction which holds the
+					latch on the cursor page */
+	byte*		old_rec_buf;	/* NULL, or a dynamically allocated
+					buffer for old_rec */
+	ulint		buf_size;	/* old_rec_buf size if old_rec_buf
+					is not NULL */
+};
+
+#define BTR_PCUR_IS_POSITIONED	1997660512	/* FIXME: currently, the state
+						can be BTR_PCUR_IS_POSITIONED,
+						though it really should be
+						BTR_PCUR_WAS_POSITIONED,
+						because we have no obligation
+						to commit the cursor with
+						mtr; similarly latch_mode may
+						be out of date */
+#define BTR_PCUR_WAS_POSITIONED	1187549791
+#define BTR_PCUR_NOT_POSITIONED 1328997689
+
+#define BTR_PCUR_OLD_STORED	908467085
+#define BTR_PCUR_OLD_NOT_STORED	122766467
+
+#ifndef UNIV_NONINL
+#include "btr0pcur.ic"
+#endif
+				
+#endif
--- a/include/btr0pcur.ic
+++ b/include/btr0pcur.ic
@ -0,0 +1,630 @@
+/******************************************************
+The index tree persistent cursor
+
+(c) 1996 Innobase Oy
+
+Created 2/23/1996 Heikki Tuuri
+*******************************************************/
+
+
+/*************************************************************
+Gets the rel_pos field for a cursor whose position has been stored. */
+UNIV_INLINE
+ulint
+btr_pcur_get_rel_pos(
+/*=================*/
+				/* out: BTR_PCUR_ON, ... */
+	btr_pcur_t*	cursor)	/* in: persistent cursor */
+{
+	ut_ad(cursor);
+	ut_ad(cursor->old_rec);
+	ut_ad(cursor->old_stored == BTR_PCUR_OLD_STORED);
+	ut_ad(cursor->pos_state == BTR_PCUR_WAS_POSITIONED
+			|| cursor->pos_state == BTR_PCUR_IS_POSITIONED);
+
+	return(cursor->rel_pos);
+}
+
+/*************************************************************
+Sets the mtr field for a pcur. */
+UNIV_INLINE
+void
+btr_pcur_set_mtr(
+/*=============*/
+	btr_pcur_t*	cursor,	/* in: persistent cursor */
+	mtr_t*		mtr)	/* in, own: mtr */
+{
+	ut_ad(cursor);
+
+	cursor->mtr = mtr;
+}
+
+/*************************************************************
+Gets the mtr field for a pcur. */
+UNIV_INLINE
+mtr_t*
+btr_pcur_get_mtr(
+/*=============*/
+				/* out: mtr */
+	btr_pcur_t*	cursor)	/* in: persistent cursor */
+{
+	ut_ad(cursor);
+
+	return(cursor->mtr);
+}
+
+/*************************************************************
+Returns the btr cursor component of a persistent cursor. */
+UNIV_INLINE
+btr_cur_t*
+btr_pcur_get_btr_cur(
+/*=================*/
+				/* out: pointer to btr cursor component */
+	btr_pcur_t*	cursor)	/* in: persistent cursor */
+{
+	return(&(cursor->btr_cur));
+}
+
+/*************************************************************
+Returns the page cursor component of a persistent cursor. */
+UNIV_INLINE
+page_cur_t*
+btr_pcur_get_page_cur(
+/*==================*/
+				/* out: pointer to page cursor component */
+	btr_pcur_t*	cursor)	/* in: persistent cursor */
+{
+	return(btr_cur_get_page_cur(&(cursor->btr_cur)));
+}
+
+/*************************************************************
+Returns the page of a persistent cursor. */
+UNIV_INLINE
+page_t*
+btr_pcur_get_page(
+/*==============*/
+				/* out: pointer to the page */
+	btr_pcur_t*	cursor)	/* in: persistent cursor */
+{
+	ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
+
+	return(page_cur_get_page(btr_pcur_get_page_cur(cursor)));
+}
+
+/*************************************************************
+Returns the record of a persistent cursor. */
+UNIV_INLINE
+rec_t*
+btr_pcur_get_rec(
+/*=============*/
+				/* out: pointer to the record */
+	btr_pcur_t*	cursor)	/* in: persistent cursor */
+{
+	ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
+	ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
+	
+	return(page_cur_get_rec(btr_pcur_get_page_cur(cursor)));
+}
+
+/******************************************************************
+Gets the up_match value for a pcur after a search. */
+UNIV_INLINE
+ulint
+btr_pcur_get_up_match(
+/*==================*/
+				/* out: number of matched fields at the cursor
+				or to the right if search mode was PAGE_CUR_GE,
+				otherwise undefined */
+	btr_pcur_t*	cursor) /* in: memory buffer for persistent cursor */
+{
+	btr_cur_t*	btr_cursor;
+
+	ut_ad((cursor->pos_state == BTR_PCUR_WAS_POSITIONED)
+			|| (cursor->pos_state == BTR_PCUR_IS_POSITIONED));
+
+	btr_cursor = btr_pcur_get_btr_cur(cursor);
+
+	ut_ad(btr_cursor->up_match != ULINT_UNDEFINED);
+
+	return(btr_cursor->up_match);
+}
+
+/******************************************************************
+Gets the low_match value for a pcur after a search. */
+UNIV_INLINE
+ulint
+btr_pcur_get_low_match(
+/*===================*/
+				/* out: number of matched fields at the cursor
+				or to the right if search mode was PAGE_CUR_LE,
+				otherwise undefined */
+	btr_pcur_t*	cursor) /* in: memory buffer for persistent cursor */
+{
+	btr_cur_t*	btr_cursor;
+
+	ut_ad((cursor->pos_state == BTR_PCUR_WAS_POSITIONED)
+			|| (cursor->pos_state == BTR_PCUR_IS_POSITIONED));
+
+	btr_cursor = btr_pcur_get_btr_cur(cursor);
+	ut_ad(btr_cursor->low_match != ULINT_UNDEFINED);
+
+	return(btr_cursor->low_match);
+}
+
+/*************************************************************
+Checks if the persistent cursor is after the last user record on 
+a page. */
+UNIV_INLINE
+ibool
+btr_pcur_is_after_last_on_page(
+/*===========================*/
+	btr_pcur_t*	cursor,	/* in: persistent cursor */
+	mtr_t*		mtr)	/* in: mtr */
+{
+	UT_NOT_USED(mtr);
+	ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
+	ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
+	
+	return(page_cur_is_after_last(btr_pcur_get_page_cur(cursor)));
+}
+
+/*************************************************************
+Checks if the persistent cursor is before the first user record on 
+a page. */
+UNIV_INLINE
+ibool
+btr_pcur_is_before_first_on_page(
+/*=============================*/
+	btr_pcur_t*	cursor,	/* in: persistent cursor */
+	mtr_t*		mtr)	/* in: mtr */
+{
+	UT_NOT_USED(mtr);
+	ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
+	ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
+	
+	return(page_cur_is_before_first(btr_pcur_get_page_cur(cursor)));
+}
+
+/*************************************************************
+Checks if the persistent cursor is on a user record. */
+UNIV_INLINE
+ibool
+btr_pcur_is_on_user_rec(
+/*====================*/
+	btr_pcur_t*	cursor,	/* in: persistent cursor */
+	mtr_t*		mtr)	/* in: mtr */
+{
+	ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
+	ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
+	
+	if ((btr_pcur_is_before_first_on_page(cursor, mtr))
+	    || (btr_pcur_is_after_last_on_page(cursor, mtr))) {
+
+		return(FALSE);
+	}
+
+	return(TRUE);
+}
+
+/*************************************************************
+Checks if the persistent cursor is before the first user record in
+the index tree. */
+UNIV_INLINE
+ibool
+btr_pcur_is_before_first_in_tree(
+/*=============================*/
+	btr_pcur_t*	cursor,	/* in: persistent cursor */
+	mtr_t*		mtr)	/* in: mtr */
+{
+	ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
+	ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
+	
+	if (btr_page_get_prev(btr_pcur_get_page(cursor), mtr) != FIL_NULL) {
+
+		return(FALSE);
+	}
+
+	return(page_cur_is_before_first(btr_pcur_get_page_cur(cursor)));
+}
+
+/*************************************************************
+Checks if the persistent cursor is after the last user record in
+the index tree. */
+UNIV_INLINE
+ibool
+btr_pcur_is_after_last_in_tree(
+/*===========================*/
+	btr_pcur_t*	cursor,	/* in: persistent cursor */
+	mtr_t*		mtr)	/* in: mtr */
+{
+	ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
+	ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
+	
+	if (btr_page_get_next(btr_pcur_get_page(cursor), mtr) != FIL_NULL) {
+
+		return(FALSE);
+	}
+
+	return(page_cur_is_after_last(btr_pcur_get_page_cur(cursor)));
+}
+
+/*************************************************************
+Moves the persistent cursor to the next record on the same page. */
+UNIV_INLINE
+void
+btr_pcur_move_to_next_on_page(
+/*==========================*/
+	btr_pcur_t*	cursor,	/* in: persistent cursor */
+	mtr_t*		mtr)	/* in: mtr */
+{
+	UT_NOT_USED(mtr);
+	ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
+	ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
+	
+	page_cur_move_to_next(btr_pcur_get_page_cur(cursor));
+
+	cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
+}
+
+/*************************************************************
+Moves the persistent cursor to the previous record on the same page. */
+UNIV_INLINE
+void
+btr_pcur_move_to_prev_on_page(
+/*==========================*/
+	btr_pcur_t*	cursor,	/* in: persistent cursor */
+	mtr_t*		mtr)	/* in: mtr */
+{
+	UT_NOT_USED(mtr);
+	ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
+	ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
+	
+	page_cur_move_to_prev(btr_pcur_get_page_cur(cursor));
+
+	cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
+}
+
+/*************************************************************
+Moves the persistent cursor to the last record on the same page. */
+UNIV_INLINE
+void
+btr_pcur_move_to_last_on_page(
+/*==========================*/
+	btr_pcur_t*	cursor,	/* in: persistent cursor */
+	mtr_t*		mtr)	/* in: mtr */
+{
+	UT_NOT_USED(mtr);
+	ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
+	
+	page_cur_set_after_last(buf_frame_align(btr_pcur_get_rec(cursor)),
+				btr_pcur_get_page_cur(cursor));
+
+	cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
+}
+
+/*************************************************************
+Moves the persistent cursor to the next user record in the tree. If no user
+records are left, the cursor ends up 'after last in tree'. */
+UNIV_INLINE
+ibool
+btr_pcur_move_to_next_user_rec(
+/*===========================*/
+				/* out: TRUE if the cursor moved forward,
+				ending on a user record */
+	btr_pcur_t*	cursor,	/* in: persistent cursor; NOTE that the
+				function may release the page latch */
+	mtr_t*		mtr)	/* in: mtr */
+{
+	ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
+	ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
+	cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
+loop:
+	if (btr_pcur_is_after_last_on_page(cursor, mtr)) {
+
+		if (btr_pcur_is_after_last_in_tree(cursor, mtr)) {
+
+			return(FALSE);
+		}
+
+		btr_pcur_move_to_next_page(cursor, mtr);
+	} else {
+		btr_pcur_move_to_next_on_page(cursor, mtr);
+	}
+
+	if (btr_pcur_is_on_user_rec(cursor, mtr)) {
+
+		return(TRUE);
+	}
+
+	goto loop;
+}
+
+/*************************************************************
+Moves the persistent cursor to the next record in the tree. If no records are
+left, the cursor stays 'after last in tree'. */
+UNIV_INLINE
+ibool
+btr_pcur_move_to_next(
+/*==================*/
+				/* out: TRUE if the cursor was not after last
+				in tree */
+	btr_pcur_t*	cursor,	/* in: persistent cursor; NOTE that the
+				function may release the page latch */
+	mtr_t*		mtr)	/* in: mtr */
+{
+	ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
+	ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
+
+	cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
+	
+	if (btr_pcur_is_after_last_on_page(cursor, mtr)) {
+
+		if (btr_pcur_is_after_last_in_tree(cursor, mtr)) {
+
+			return(FALSE);
+		}
+		
+		btr_pcur_move_to_next_page(cursor, mtr);
+
+		return(TRUE);
+	}
+
+	btr_pcur_move_to_next_on_page(cursor, mtr);
+
+	return(TRUE);	
+}
+
+/******************************************************************
+Commits the pcur mtr and sets the pcur latch mode to BTR_NO_LATCHES,
+that is, the cursor becomes detached. If there have been modifications
+to the page where pcur is positioned, this can be used instead of
+btr_pcur_release_leaf. Function btr_pcur_store_position should be used
+before calling this, if restoration of cursor is wanted later. */
+UNIV_INLINE
+void
+btr_pcur_commit(
+/*============*/
+	btr_pcur_t*	pcur)	/* in: persistent cursor */
+{
+	ut_a(pcur->pos_state == BTR_PCUR_IS_POSITIONED);
+
+	pcur->latch_mode = BTR_NO_LATCHES;	
+
+	mtr_commit(pcur->mtr);
+
+	pcur->pos_state = BTR_PCUR_WAS_POSITIONED;
+}	
+
+/******************************************************************
+Differs from btr_pcur_commit in that we can specify the mtr to commit. */
+UNIV_INLINE
+void
+btr_pcur_commit_specify_mtr(
+/*========================*/
+	btr_pcur_t*	pcur,	/* in: persistent cursor */
+	mtr_t*		mtr)	/* in: mtr to commit */
+{
+	ut_a(pcur->pos_state == BTR_PCUR_IS_POSITIONED);
+
+	pcur->latch_mode = BTR_NO_LATCHES;	
+
+	mtr_commit(mtr);
+
+	pcur->pos_state = BTR_PCUR_WAS_POSITIONED;
+}	
+
+/******************************************************************
+Sets the pcur latch mode to BTR_NO_LATCHES. */
+UNIV_INLINE
+void
+btr_pcur_detach(
+/*============*/
+	btr_pcur_t*	pcur)	/* in: persistent cursor */
+{
+	ut_a(pcur->pos_state == BTR_PCUR_IS_POSITIONED);
+	
+	pcur->latch_mode = BTR_NO_LATCHES;
+
+	pcur->pos_state = BTR_PCUR_WAS_POSITIONED;
+}
+
+/******************************************************************
+Tests if a cursor is detached: that is the latch mode is BTR_NO_LATCHES. */
+UNIV_INLINE
+ibool
+btr_pcur_is_detached(
+/*=================*/
+				/* out: TRUE if detached */
+	btr_pcur_t*	pcur)	/* in: persistent cursor */
+{
+	if (pcur->latch_mode == BTR_NO_LATCHES) {
+
+		return(TRUE);
+	}	
+
+	return(FALSE);
+}
+
+/******************************************************************
+Sets the old_rec_buf field to NULL. */
+UNIV_INLINE
+void
+btr_pcur_init(
+/*==========*/
+	btr_pcur_t*	pcur)	/* in: persistent cursor */
+{	
+	pcur->old_stored = BTR_PCUR_OLD_NOT_STORED;
+	pcur->old_rec_buf = NULL;
+	pcur->old_rec = NULL;
+}
+
+/******************************************************************
+Initializes and opens a persistent cursor to an index tree. It should be
+closed with btr_pcur_close. */
+UNIV_INLINE
+void
+btr_pcur_open(
+/*==========*/
+	dict_index_t*	index,	/* in: index */
+	dtuple_t*	tuple,	/* in: tuple on which search done */
+	ulint		mode,	/* in: PAGE_CUR_L, ...;
+				NOTE that if the search is made using a unique
+				prefix of a record, mode should be
+				PAGE_CUR_LE, not PAGE_CUR_GE, as the latter
+				may end up on the previous page from the
+				record! */
+	ulint		latch_mode,/* in: BTR_SEARCH_LEAF, ... */
+	btr_pcur_t*	cursor, /* in: memory buffer for persistent cursor */
+	mtr_t*		mtr)	/* in: mtr */
+{
+	btr_cur_t*	btr_cursor;
+
+	/* Initialize the cursor */
+
+	btr_pcur_init(cursor);
+
+	cursor->latch_mode = latch_mode;
+	cursor->search_mode = mode;
+	
+	/* Search with the tree cursor */
+
+	btr_cursor = btr_pcur_get_btr_cur(cursor);
+
+	btr_cur_search_to_nth_level(index, 0, tuple, mode, latch_mode,
+							btr_cursor, 0, mtr);
+	cursor->pos_state = BTR_PCUR_IS_POSITIONED;
+
+	cursor->trx_if_known = NULL;
+}
+
+/******************************************************************
+Opens an persistent cursor to an index tree without initializing the
+cursor. */
+UNIV_INLINE
+void
+btr_pcur_open_with_no_init(
+/*=======================*/
+	dict_index_t*	index,	/* in: index */
+	dtuple_t*	tuple,	/* in: tuple on which search done */
+	ulint		mode,	/* in: PAGE_CUR_L, ...;
+				NOTE that if the search is made using a unique
+				prefix of a record, mode should be
+				PAGE_CUR_LE, not PAGE_CUR_GE, as the latter
+				may end up on the previous page of the
+				record! */
+	ulint		latch_mode,/* in: BTR_SEARCH_LEAF, ...;
+				NOTE that if has_search_latch != 0 then
+				we maybe do not acquire a latch on the cursor
+				page, but assume that the caller uses his
+				btr search latch to protect the record! */
+	btr_pcur_t*	cursor, /* in: memory buffer for persistent cursor */
+	ulint		has_search_latch,/* in: latch mode the caller
+				currently has on btr_search_latch:
+				RW_S_LATCH, or 0 */
+	mtr_t*		mtr)	/* in: mtr */
+{
+	btr_cur_t*	btr_cursor;
+
+	cursor->latch_mode = latch_mode;
+	cursor->search_mode = mode;
+	
+	/* Search with the tree cursor */
+
+	btr_cursor = btr_pcur_get_btr_cur(cursor);
+
+	btr_cur_search_to_nth_level(index, 0, tuple, mode, latch_mode,
+					btr_cursor, has_search_latch, mtr);
+	cursor->pos_state = BTR_PCUR_IS_POSITIONED;
+
+	cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
+
+	cursor->trx_if_known = NULL;
+}
+
+/*********************************************************************
+Opens a persistent cursor at either end of an index. */
+UNIV_INLINE
+void
+btr_pcur_open_at_index_side(
+/*========================*/
+	ibool		from_left,	/* in: TRUE if open to the low end,
+					FALSE if to the high end */
+	dict_index_t*	index,		/* in: index */
+	ulint		latch_mode,	/* in: latch mode */
+	btr_pcur_t*	pcur,		/* in: cursor */
+	ibool		do_init,	/* in: TRUE if should be initialized */
+	mtr_t*		mtr)		/* in: mtr */
+{
+	pcur->latch_mode = latch_mode;
+
+	if (from_left) {
+		pcur->search_mode = PAGE_CUR_G;
+	} else {
+		pcur->search_mode = PAGE_CUR_L;
+	}
+
+	if (do_init) {
+		btr_pcur_init(pcur);
+	}
+
+	btr_cur_open_at_index_side(from_left, index, latch_mode,
+					btr_pcur_get_btr_cur(pcur), mtr);
+	pcur->pos_state = BTR_PCUR_IS_POSITIONED;
+
+	pcur->old_stored = BTR_PCUR_OLD_NOT_STORED;
+
+	pcur->trx_if_known = NULL;
+}
+
+/**************************************************************************
+Positions a cursor at a randomly chosen position within a B-tree. */
+UNIV_INLINE
+void
+btr_pcur_open_at_rnd_pos(
+/*=====================*/
+	dict_index_t*	index,		/* in: index */
+	ulint		latch_mode,	/* in: BTR_SEARCH_LEAF, ... */
+	btr_pcur_t*	cursor,		/* in/out: B-tree pcur */
+	mtr_t*		mtr)		/* in: mtr */
+{
+	/* Initialize the cursor */
+
+	cursor->latch_mode = latch_mode;
+	cursor->search_mode = PAGE_CUR_G;
+	
+	btr_pcur_init(cursor);
+
+	btr_cur_open_at_rnd_pos(index, latch_mode,
+					btr_pcur_get_btr_cur(cursor), mtr);
+	cursor->pos_state = BTR_PCUR_IS_POSITIONED;
+	cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
+
+	cursor->trx_if_known = NULL;
+}
+	
+/******************************************************************
+Frees the possible memory heap of a persistent cursor and sets the latch
+mode of the persistent cursor to BTR_NO_LATCHES. */
+UNIV_INLINE
+void
+btr_pcur_close(
+/*===========*/
+	btr_pcur_t*	cursor)	/* in: persistent cursor */
+{
+	if (cursor->old_rec_buf != NULL) {
+
+		mem_free(cursor->old_rec_buf);
+
+		cursor->old_rec = NULL;
+		cursor->old_rec_buf = NULL;
+	}
+
+	cursor->btr_cur.page_cur.rec = NULL;
+	cursor->old_rec = NULL;
+	cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
+	
+	cursor->latch_mode = BTR_NO_LATCHES;
+	cursor->pos_state = BTR_PCUR_NOT_POSITIONED;
+
+	cursor->trx_if_known = NULL;
+}
--- a/include/btr0sea.h
+++ b/include/btr0sea.h
@ -0,0 +1,253 @@
+/************************************************************************
+The index tree adaptive search
+
+(c) 1996 Innobase Oy
+
+Created 2/17/1996 Heikki Tuuri
+*************************************************************************/
+
+#ifndef btr0sea_h
+#define btr0sea_h
+
+#include "univ.i"
+
+#include "rem0rec.h"
+#include "dict0dict.h"
+#include "btr0types.h"
+#include "mtr0mtr.h"
+#include "ha0ha.h"
+
+/*********************************************************************
+Creates and initializes the adaptive search system at a database start. */
+
+void
+btr_search_sys_create(
+/*==================*/
+	ulint	hash_size);	/* in: hash index hash table size */
+/************************************************************************
+Returns search info for an index. */
+UNIV_INLINE
+btr_search_t*
+btr_search_get_info(
+/*================*/
+				/* out: search info; search mutex reserved */
+	dict_index_t*	index);	/* in: index */
+/*********************************************************************
+Creates and initializes a search info struct. */
+
+btr_search_t*
+btr_search_info_create(
+/*===================*/
+				/* out, own: search info struct */
+	mem_heap_t*	heap);	/* in: heap where created */
+/*************************************************************************
+Updates the search info. */
+UNIV_INLINE
+void
+btr_search_info_update(
+/*===================*/
+	dict_index_t*	index,	/* in: index of the cursor */
+	btr_cur_t*	cursor);/* in: cursor which was just positioned */
+/**********************************************************************
+Tries to guess the right search position based on the hash search info
+of the index. Note that if mode is PAGE_CUR_LE, which is used in inserts,
+and the function returns TRUE, then cursor->up_match and cursor->low_match
+both have sensible values. */
+
+ibool
+btr_search_guess_on_hash(
+/*=====================*/
+					/* out: TRUE if succeeded */	
+	dict_index_t*	index,		/* in: index */
+	btr_search_t*	info,		/* in: index search info */
+	dtuple_t*	tuple,		/* in: logical record */
+	ulint		mode,		/* in: PAGE_CUR_L, ... */
+	ulint		latch_mode, 	/* in: BTR_SEARCH_LEAF, ... */
+	btr_cur_t*	cursor, 	/* out: tree cursor */
+	ulint		has_search_latch,/* in: latch mode the caller
+					currently has on btr_search_latch:
+					RW_S_LATCH, RW_X_LATCH, or 0 */
+	mtr_t*		mtr);		/* in: mtr */
+/************************************************************************
+Moves or deletes hash entries for moved records. If new_page is already hashed,
+then the hash index for page, if any, is dropped. If new_page is not hashed,
+and page is hashed, then a new hash index is built to new_page with the same
+parameters as page (this often happens when a page is split). */
+
+void
+btr_search_move_or_delete_hash_entries(
+/*===================================*/
+	page_t*		new_page,	/* in: records are copied
+					to this page */
+	page_t*		page,		/* in: index page */
+	dict_index_t*	index);		/* in: record descriptor */
+/************************************************************************
+Drops a page hash index. */
+
+void
+btr_search_drop_page_hash_index(
+/*============================*/
+	page_t*	page);	/* in: index page, s- or x-latched */
+/************************************************************************
+Drops a page hash index when a page is freed from a fseg to the file system.
+Drops possible hash index if the page happens to be in the buffer pool. */
+
+void
+btr_search_drop_page_hash_when_freed(
+/*=================================*/
+	ulint	space,		/* in: space id */
+	ulint	page_no);	/* in: page number */
+/************************************************************************
+Updates the page hash index when a single record is inserted on a page. */
+
+void
+btr_search_update_hash_node_on_insert(
+/*==================================*/
+	btr_cur_t*	cursor);/* in: cursor which was positioned to the
+				place to insert using btr_cur_search_...,
+				and the new record has been inserted next
+				to the cursor */
+/************************************************************************
+Updates the page hash index when a single record is inserted on a page. */
+
+void
+btr_search_update_hash_on_insert(
+/*=============================*/
+	btr_cur_t*	cursor);/* in: cursor which was positioned to the
+				place to insert using btr_cur_search_...,
+				and the new record has been inserted next
+				to the cursor */
+/************************************************************************
+Updates the page hash index when a single record is deleted from a page. */
+
+void
+btr_search_update_hash_on_delete(
+/*=============================*/
+	btr_cur_t*	cursor);/* in: cursor which was positioned on the
+				record to delete using btr_cur_search_...,
+				the record is not yet deleted */
+/************************************************************************
+Validates the search system. */
+
+ibool
+btr_search_validate(void);
+/*======================*/
+				/* out: TRUE if ok */
+
+/* Search info directions */
+#define BTR_SEA_NO_DIRECTION	1
+#define BTR_SEA_LEFT		2
+#define BTR_SEA_RIGHT		3
+#define BTR_SEA_SAME_REC	4
+
+/* The search info struct in an index */
+
+struct btr_search_struct{
+	ulint	magic_n;	/* magic number */
+	/* The following 4 fields are currently not used: */
+	rec_t*	last_search;	/* pointer to the lower limit record of the
+				previous search; NULL if not known */
+	ulint	n_direction;	/* number of consecutive searches in the
+				same direction */
+	ulint	direction;	/* BTR_SEA_NO_DIRECTION, BTR_SEA_LEFT,
+				BTR_SEA_RIGHT, BTR_SEA_SAME_REC,
+				or BTR_SEA_SAME_PAGE */
+	dulint	modify_clock;	/* value of modify clock at the time
+				last_search was stored */
+	/*----------------------*/
+	/* The following 4 fields are not protected by any latch: */
+	page_t*	root_guess;	/* the root page frame when it was last time
+				fetched, or NULL */
+	ulint	hash_analysis;	/* when this exceeds a certain value, the
+				hash analysis starts; this is reset if no
+				success noticed */
+	ibool	last_hash_succ;	/* TRUE if the last search would have
+				succeeded, or did succeed, using the hash
+				index; NOTE that the value here is not exact:
+				it is not calculated for every search, and the
+				calculation itself is not always accurate! */
+	ulint	n_hash_potential;/* number of consecutive searches which would
+				have succeeded, or did succeed, using the hash
+				index */
+	/*----------------------*/			
+	ulint	n_fields;	/* recommended prefix length for hash search:
+				number of full fields */
+	ulint	n_bytes;	/* recommended prefix: number of bytes in
+				an incomplete field */
+	ulint	side;		/* BTR_SEARCH_LEFT_SIDE or
+				BTR_SEARCH_RIGHT_SIDE, depending on whether
+				the leftmost record of several records with
+				the same prefix should be indexed in the
+				hash index */
+	/*----------------------*/
+	ulint	n_hash_succ;	/* number of successful hash searches thus
+				far */
+	ulint	n_hash_fail;	/* number of failed hash searches */
+	ulint	n_patt_succ;	/* number of successful pattern searches thus
+				far */
+	ulint	n_searches;	/* number of searches */
+};
+
+#define BTR_SEARCH_MAGIC_N	1112765
+
+/* The hash index system */
+
+typedef struct btr_search_sys_struct	btr_search_sys_t;
+
+struct btr_search_sys_struct{
+	hash_table_t*	hash_index;
+};
+
+extern btr_search_sys_t*	btr_search_sys;
+
+/* The latch protecting the adaptive search system: this latch protects the
+(1) hash index;
+(2) columns of a record to which we have a pointer in the hash index;
+
+but does NOT protect:
+
+(3) next record offset field in a record;
+(4) next or previous records on the same page.
+
+Bear in mind (3) and (4) when using the hash index.
+*/
+
+extern rw_lock_t*	btr_search_latch_temp;
+
+#define btr_search_latch	(*btr_search_latch_temp)
+
+#ifdef UNIV_SEARCH_PERF_STAT
+extern ulint	btr_search_n_succ;
+#endif /* UNIV_SEARCH_PERF_STAT */
+extern ulint	btr_search_n_hash_fail;
+
+/* After change in n_fields or n_bytes in info, this many rounds are waited
+before starting the hash analysis again: this is to save CPU time when there
+is no hope in building a hash index. */
+
+#define BTR_SEARCH_HASH_ANALYSIS	17
+
+#define BTR_SEARCH_LEFT_SIDE	1
+#define BTR_SEARCH_RIGHT_SIDE	2
+
+/* Limit of consecutive searches for trying a search shortcut on the search
+pattern */
+
+#define BTR_SEARCH_ON_PATTERN_LIMIT	3
+
+/* Limit of consecutive searches for trying a search shortcut using the hash
+index */
+
+#define BTR_SEARCH_ON_HASH_LIMIT	3
+
+/* We do this many searches before trying to keep the search latch over calls
+from MySQL. If we notice someone waiting for the latch, we again set this
+much timeout. This is to reduce contention. */
+
+#define BTR_SEA_TIMEOUT			10000
+
+#ifndef UNIV_NONINL
+#include "btr0sea.ic"
+#endif
+
+#endif 
--- a/include/btr0sea.ic
+++ b/include/btr0sea.ic
@ -0,0 +1,67 @@
+/************************************************************************
+The index tree adaptive search
+
+(c) 1996 Innobase Oy
+
+Created 2/17/1996 Heikki Tuuri
+*************************************************************************/
+
+#include "dict0mem.h"
+#include "btr0cur.h"
+#include "buf0buf.h"
+
+/*************************************************************************
+Updates the search info. */
+
+void
+btr_search_info_update_slow(
+/*========================*/
+	btr_search_t*	info,	/* in: search info */
+	btr_cur_t*	cursor);/* in: cursor which was just positioned */
+
+/************************************************************************
+Returns search info for an index. */
+UNIV_INLINE
+btr_search_t*
+btr_search_get_info(
+/*================*/
+				/* out: search info; search mutex reserved */
+	dict_index_t*	index)	/* in: index */
+{
+	ut_ad(index);
+
+	return(index->search_info);
+}
+
+/*************************************************************************
+Updates the search info. */
+UNIV_INLINE
+void
+btr_search_info_update(
+/*===================*/
+	dict_index_t*	index,	/* in: index of the cursor */
+	btr_cur_t*	cursor)	/* in: cursor which was just positioned */
+{
+	btr_search_t*	info;
+
+#ifdef UNIV_SYNC_DEBUG
+	ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_SHARED));
+	ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_EX));
+#endif /* UNIV_SYNC_DEBUG */
+
+	info = btr_search_get_info(index);
+
+	info->hash_analysis++;
+
+	if (info->hash_analysis < BTR_SEARCH_HASH_ANALYSIS) {
+
+		/* Do nothing */
+
+		return;
+
+	}
+
+	ut_ad(cursor->flag != BTR_CUR_HASH);
+
+	btr_search_info_update_slow(info, cursor);
+}
--- a/include/btr0types.h
+++ b/include/btr0types.h
@ -0,0 +1,21 @@
+/************************************************************************
+The index tree general types
+
+(c) 1996 Innobase Oy
+
+Created 2/17/1996 Heikki Tuuri
+*************************************************************************/
+
+#ifndef btr0types_h
+#define btr0types_h
+
+#include "univ.i"
+
+#include "rem0types.h"
+#include "page0types.h"
+
+typedef struct btr_pcur_struct		btr_pcur_t;
+typedef struct btr_cur_struct 		btr_cur_t;
+typedef struct btr_search_struct	btr_search_t;
+
+#endif 
--- a/include/buf0buf.h
+++ b/include/buf0buf.h
--- a/include/buf0buf.ic
+++ b/include/buf0buf.ic
@ -0,0 +1,663 @@
+/******************************************************
+The database buffer buf_pool
+
+(c) 1995 Innobase Oy
+
+Created 11/5/1995 Heikki Tuuri
+*******************************************************/
+
+#include "buf0flu.h"
+#include "buf0lru.h"
+#include "buf0rea.h"
+#include "mtr0mtr.h"
+
+#ifdef UNIV_DEBUG
+extern ulint		buf_dbg_counter; /* This is used to insert validation
+					operations in execution in the
+					debug version */
+#endif /* UNIV_DEBUG */
+/************************************************************************
+Recommends a move of a block to the start of the LRU list if there is danger
+of dropping from the buffer pool. NOTE: does not reserve the buffer pool
+mutex. */
+UNIV_INLINE
+ibool
+buf_block_peek_if_too_old(
+/*======================*/
+				/* out: TRUE if should be made younger */
+	buf_block_t*	block)	/* in: block to make younger */
+{
+	return(buf_pool->freed_page_clock >= block->freed_page_clock
+				+ 1 + (buf_pool->curr_size / 1024));
+}
+
+/*************************************************************************
+Gets the current size of buffer buf_pool in bytes. In the case of AWE, the
+size of AWE window (= the frames). */
+UNIV_INLINE
+ulint
+buf_pool_get_curr_size(void)
+/*========================*/
+			/* out: size in bytes */
+{
+	return((buf_pool->n_frames) * UNIV_PAGE_SIZE);
+}	
+
+/*************************************************************************
+Gets the maximum size of buffer buf_pool in bytes. In the case of AWE, the
+size of AWE window (= the frames). */
+UNIV_INLINE
+ulint
+buf_pool_get_max_size(void)
+/*=======================*/
+			/* out: size in bytes */
+{
+	return((buf_pool->n_frames) * UNIV_PAGE_SIZE);
+}	
+
+/***********************************************************************
+Accessor function for block array. */
+UNIV_INLINE
+buf_block_t*
+buf_pool_get_nth_block(
+/*===================*/
+				/* out: pointer to block */
+	buf_pool_t*	buf_pool,/* in: buf_pool */
+	ulint		i)	/* in: index of the block */
+{
+	ut_ad(buf_pool);
+	ut_ad(i < buf_pool->max_size);
+
+	return(i + buf_pool->blocks);
+}	
+
+/***********************************************************************
+Checks if a pointer points to the block array of the buffer pool (blocks, not
+the frames). */
+UNIV_INLINE
+ibool
+buf_pool_is_block(
+/*==============*/
+			/* out: TRUE if pointer to block */
+	void*	ptr)	/* in: pointer to memory */
+{
+	if ((buf_pool->blocks <= (buf_block_t*)ptr)
+	    && ((buf_block_t*)ptr < buf_pool->blocks + buf_pool->max_size)) {
+
+		return(TRUE);
+	}
+
+	return(FALSE);
+}	
+
+/************************************************************************
+Gets the smallest oldest_modification lsn for any page in the pool. Returns
+ut_dulint_zero if all modified pages have been flushed to disk. */
+UNIV_INLINE
+dulint
+buf_pool_get_oldest_modification(void)
+/*==================================*/
+				/* out: oldest modification in pool,
+				ut_dulint_zero if none */
+{
+	buf_block_t*	block;
+	dulint		lsn;
+	
+	mutex_enter(&(buf_pool->mutex));
+
+	block = UT_LIST_GET_LAST(buf_pool->flush_list);
+
+	if (block == NULL) {
+		lsn = ut_dulint_zero;
+	} else {
+		lsn = block->oldest_modification;
+	}
+
+	mutex_exit(&(buf_pool->mutex));
+
+	return(lsn);
+}
+
+/***********************************************************************
+Increments the buf_pool clock by one and returns its new value. Remember
+that in the 32 bit version the clock wraps around at 4 billion! */
+UNIV_INLINE
+ulint
+buf_pool_clock_tic(void)
+/*====================*/
+			/* out: new clock value */
+{
+#ifdef UNIV_SYNC_DEBUG
+	ut_ad(mutex_own(&(buf_pool->mutex)));
+#endif /* UNIV_SYNC_DEBUG */
+	
+	buf_pool->ulint_clock++;
+
+	return(buf_pool->ulint_clock);
+}
+
+/*************************************************************************
+Gets a pointer to the memory frame of a block. */
+UNIV_INLINE
+buf_frame_t*
+buf_block_get_frame(
+/*================*/
+				/* out: pointer to the frame */
+	buf_block_t*	block)	/* in: pointer to the control block */
+{
+	ut_ad(block);
+	ut_ad(block >= buf_pool->blocks);
+	ut_ad(block < buf_pool->blocks + buf_pool->max_size);
+	ut_ad(block->state != BUF_BLOCK_NOT_USED); 
+	ut_ad((block->state != BUF_BLOCK_FILE_PAGE) 
+	      || (block->buf_fix_count > 0));
+	
+	return(block->frame);
+}	
+
+/*************************************************************************
+Gets the space id of a block. */
+UNIV_INLINE
+ulint
+buf_block_get_space(
+/*================*/
+				/* out: space id */
+	buf_block_t*	block)	/* in: pointer to the control block */
+{
+	ut_ad(block);
+	ut_ad(block >= buf_pool->blocks);
+	ut_ad(block < buf_pool->blocks + buf_pool->max_size);
+	ut_a(block->state == BUF_BLOCK_FILE_PAGE);
+	ut_ad(block->buf_fix_count > 0);
+	
+	return(block->space);
+}	
+
+/*************************************************************************
+Gets the page number of a block. */
+UNIV_INLINE
+ulint
+buf_block_get_page_no(
+/*==================*/
+				/* out: page number */
+	buf_block_t*	block)	/* in: pointer to the control block */
+{
+	ut_ad(block);
+	ut_ad(block >= buf_pool->blocks);
+	ut_ad(block < buf_pool->blocks + buf_pool->max_size);
+	ut_a(block->state == BUF_BLOCK_FILE_PAGE);
+	ut_ad(block->buf_fix_count > 0);
+	
+	return(block->offset);
+}	
+
+/***********************************************************************
+Gets the block to whose frame the pointer is pointing to. */
+UNIV_INLINE
+buf_block_t*
+buf_block_align(
+/*============*/
+			/* out: pointer to block */
+	byte*	ptr)	/* in: pointer to a frame */
+{
+	buf_block_t*	block;
+	buf_frame_t*	frame_zero;
+
+	ut_ad(ptr);
+
+	frame_zero = buf_pool->frame_zero;
+
+	if (UNIV_UNLIKELY((ulint)ptr < (ulint)frame_zero)
+	    || UNIV_UNLIKELY((ulint)ptr > (ulint)(buf_pool->high_end))) {
+
+		ut_print_timestamp(stderr);	
+		fprintf(stderr,
+"InnoDB: Error: trying to access a stray pointer %p\n"
+"InnoDB: buf pool start is at %p, end at %p\n"
+"InnoDB: Probable reason is database corruption or memory\n"
+"InnoDB: corruption. If this happens in an InnoDB database recovery,\n"
+"InnoDB: you can look from section 6.1 at http://www.innodb.com/ibman.html\n"
+"InnoDB: how to force recovery.\n",
+ 			ptr, frame_zero,
+			buf_pool->high_end);
+		ut_error;
+	}
+	
+	block = *(buf_pool->blocks_of_frames + (((ulint)(ptr - frame_zero))
+						>> UNIV_PAGE_SIZE_SHIFT));
+	return(block);
+}	
+
+/***********************************************************************
+Gets the frame the pointer is pointing to. */
+UNIV_INLINE
+buf_frame_t*
+buf_frame_align(
+/*============*/
+			/* out: pointer to frame */
+	byte*	ptr)	/* in: pointer to a frame */
+{
+	buf_frame_t*	frame;
+
+	ut_ad(ptr);
+
+	frame = ut_align_down(ptr, UNIV_PAGE_SIZE);
+
+	if (UNIV_UNLIKELY((ulint)frame < (ulint)(buf_pool->frame_zero))
+	    || UNIV_UNLIKELY((ulint)frame >= (ulint)(buf_pool->high_end))) {
+
+		ut_print_timestamp(stderr);	
+		fprintf(stderr,
+"InnoDB: Error: trying to access a stray pointer %p\n"
+"InnoDB: buf pool start is at %p, end at %p\n"
+"InnoDB: Probable reason is database corruption or memory\n"
+"InnoDB: corruption. If this happens in an InnoDB database recovery,\n"
+"InnoDB: you can look from section 6.1 at http://www.innodb.com/ibman.html\n"
+"InnoDB: how to force recovery.\n",
+ 			ptr, buf_pool->frame_zero,
+			buf_pool->high_end);
+		ut_error;
+	}
+
+	return(frame);
+}
+
+/**************************************************************************
+Gets the page number of a pointer pointing within a buffer frame containing
+a file page. */
+UNIV_INLINE
+ulint
+buf_frame_get_page_no(
+/*==================*/
+			/* out: page number */
+	byte*	ptr)	/* in: pointer to within a buffer frame */
+{
+	return(buf_block_get_page_no(buf_block_align(ptr)));
+}
+
+/**************************************************************************
+Gets the space id of a pointer pointing within a buffer frame containing a
+file page. */
+UNIV_INLINE
+ulint
+buf_frame_get_space_id(
+/*===================*/
+			/* out: space id */
+	byte*	ptr)	/* in: pointer to within a buffer frame */
+{
+	return(buf_block_get_space(buf_block_align(ptr)));
+}
+
+/**************************************************************************
+Gets the space id, page offset, and byte offset within page of a
+pointer pointing to a buffer frame containing a file page. */
+UNIV_INLINE
+void
+buf_ptr_get_fsp_addr(
+/*=================*/
+	byte*		ptr,	/* in: pointer to a buffer frame */
+	ulint*		space,	/* out: space id */
+	fil_addr_t*	addr)	/* out: page offset and byte offset */
+{
+	buf_block_t*	block;
+
+	block = buf_block_align(ptr);
+
+	*space = buf_block_get_space(block);
+	addr->page = buf_block_get_page_no(block);
+	addr->boffset = ptr - buf_frame_align(ptr);
+}
+
+/**************************************************************************
+Gets the hash value of the page the pointer is pointing to. This can be used
+in searches in the lock hash table. */
+UNIV_INLINE
+ulint
+buf_frame_get_lock_hash_val(
+/*========================*/
+			/* out: lock hash value */
+	byte*	ptr)	/* in: pointer to within a buffer frame */
+{
+	buf_block_t*	block;
+
+	block = buf_block_align(ptr);
+
+	return(block->lock_hash_val);
+}
+
+/**************************************************************************
+Gets the mutex number protecting the page record lock hash chain in the lock
+table. */
+UNIV_INLINE
+mutex_t*
+buf_frame_get_lock_mutex(
+/*=====================*/
+			/* out: mutex */
+	byte*	ptr)	/* in: pointer to within a buffer frame */
+{
+	buf_block_t*	block;
+
+	block = buf_block_align(ptr);
+
+	return(block->lock_mutex);
+}
+
+/*************************************************************************
+Copies contents of a buffer frame to a given buffer. */
+UNIV_INLINE
+byte*
+buf_frame_copy(
+/*===========*/
+				/* out: buf */
+	byte*		buf,	/* in: buffer to copy to */
+	buf_frame_t*	frame)	/* in: buffer frame */
+{
+	ut_ad(buf && frame);
+
+	ut_memcpy(buf, frame, UNIV_PAGE_SIZE);
+
+	return(buf);
+}
+
+/************************************************************************
+Calculates a folded value of a file page address to use in the page hash
+table. */
+UNIV_INLINE
+ulint
+buf_page_address_fold(
+/*==================*/
+			/* out: the folded value */
+	ulint	space,	/* in: space id */
+	ulint	offset)	/* in: offset of the page within space */
+{
+	return((space << 20) + space + offset);
+}	
+
+/************************************************************************
+This function is used to get info if there is an io operation
+going on on a buffer page. */
+UNIV_INLINE
+ibool
+buf_page_io_query(
+/*==============*/
+				/* out: TRUE if io going on */
+	buf_block_t*	block)	/* in: buf_pool block, must be bufferfixed */
+{
+	mutex_enter(&(buf_pool->mutex));
+
+	ut_ad(block->state == BUF_BLOCK_FILE_PAGE);
+	ut_ad(block->buf_fix_count > 0);
+
+	if (block->io_fix != 0) {
+		mutex_exit(&(buf_pool->mutex));
+
+		return(TRUE);
+	}
+
+	mutex_exit(&(buf_pool->mutex));
+
+	return(FALSE);
+}
+
+/************************************************************************
+Gets the youngest modification log sequence number for a frame. Returns zero
+if not a file page or no modification occurred yet. */
+UNIV_INLINE
+dulint
+buf_frame_get_newest_modification(
+/*==============================*/
+				/* out: newest modification to the page */
+	buf_frame_t*	frame)	/* in: pointer to a frame */
+{
+	buf_block_t*	block;
+	dulint		lsn;
+	
+	ut_ad(frame);
+
+	block = buf_block_align(frame);
+
+	mutex_enter(&(buf_pool->mutex));
+
+	if (block->state == BUF_BLOCK_FILE_PAGE) {
+		lsn = block->newest_modification;
+	} else {
+		lsn = ut_dulint_zero;
+	}
+
+	mutex_exit(&(buf_pool->mutex));
+
+	return(lsn);
+}
+
+/************************************************************************
+Increments the modify clock of a frame by 1. The caller must (1) own the
+buf_pool mutex and block bufferfix count has to be zero, (2) or own an x-lock
+on the block. */
+UNIV_INLINE
+dulint
+buf_frame_modify_clock_inc(
+/*=======================*/
+				/* out: new value */
+	buf_frame_t*	frame)	/* in: pointer to a frame */
+{
+	buf_block_t*	block;
+
+	ut_ad(frame);
+
+	block = buf_block_align(frame);
+
+#ifdef UNIV_SYNC_DEBUG
+	ut_ad((mutex_own(&(buf_pool->mutex)) && (block->buf_fix_count == 0))
+	      || rw_lock_own(&(block->lock), RW_LOCK_EXCLUSIVE));
+#endif /*UNIV_SYNC_DEBUG */
+
+	UT_DULINT_INC(block->modify_clock);
+
+	return(block->modify_clock);
+}
+
+/************************************************************************
+Increments the modify clock of a frame by 1. The caller must (1) own the
+buf_pool mutex and block bufferfix count has to be zero, (2) or own an x-lock
+on the block. */
+UNIV_INLINE
+dulint
+buf_block_modify_clock_inc(
+/*=======================*/
+				/* out: new value */
+	buf_block_t*	block)	/* in: block */
+{
+#ifdef UNIV_SYNC_DEBUG
+	ut_ad((mutex_own(&(buf_pool->mutex)) && (block->buf_fix_count == 0))
+	      || rw_lock_own(&(block->lock), RW_LOCK_EXCLUSIVE));
+#endif /* UNIV_SYNC_DEBUG */
+
+	UT_DULINT_INC(block->modify_clock);
+
+	return(block->modify_clock);
+}
+
+/************************************************************************
+Returns the value of the modify clock. The caller must have an s-lock 
+or x-lock on the block. */
+UNIV_INLINE
+dulint
+buf_block_get_modify_clock(
+/*=======================*/
+				/* out: value */
+	buf_block_t*	block)	/* in: block */
+{
+#ifdef UNIV_SYNC_DEBUG
+	ut_ad(rw_lock_own(&(block->lock), RW_LOCK_SHARED)
+	      || rw_lock_own(&(block->lock), RW_LOCK_EXCLUSIVE));
+#endif /* UNIV_SYNC_DEBUG */
+
+	return(block->modify_clock);
+}
+
+#ifdef UNIV_SYNC_DEBUG
+/***********************************************************************
+Increments the bufferfix count. */
+UNIV_INLINE
+void
+buf_block_buf_fix_inc_debug(
+/*========================*/
+	buf_block_t*	block,	/* in: block to bufferfix */
+	const char*	file __attribute__ ((unused)),	/* in: file name */
+	ulint		line __attribute__ ((unused)))	/* in: line */
+{
+#ifdef UNIV_SYNC_DEBUG	
+	ibool	ret;
+
+	ret = rw_lock_s_lock_func_nowait(&(block->debug_latch), file, line);
+
+	ut_ad(ret == TRUE);
+#endif
+	block->buf_fix_count++;
+}
+#else /* UNIV_SYNC_DEBUG */
+/***********************************************************************
+Increments the bufferfix count. */
+UNIV_INLINE
+void
+buf_block_buf_fix_inc(
+/*==================*/
+	buf_block_t*	block)	/* in: block to bufferfix */
+{
+	block->buf_fix_count++;
+}
+#endif /* UNIV_SYNC_DEBUG */
+/**********************************************************************
+Returns the control block of a file page, NULL if not found. */
+UNIV_INLINE
+buf_block_t*
+buf_page_hash_get(
+/*==============*/
+			/* out: block, NULL if not found */
+	ulint	space,	/* in: space id */
+	ulint	offset)	/* in: offset of the page within space */
+{
+	buf_block_t*	block;
+	ulint		fold;
+	
+	ut_ad(buf_pool);
+#ifdef UNIV_SYNC_DEBUG
+	ut_ad(mutex_own(&(buf_pool->mutex)));
+#endif /* UNIV_SYNC_DEBUG */
+
+	/* Look for the page in the hash table */
+
+	fold = buf_page_address_fold(space, offset);
+
+	HASH_SEARCH(hash, buf_pool->page_hash, fold, block,
+			(block->space == space) && (block->offset == offset));
+	ut_a(block == NULL || block->state == BUF_BLOCK_FILE_PAGE);
+	
+	return(block);
+}
+
+/************************************************************************
+Tries to get the page, but if file io is required, releases all latches
+in mtr down to the given savepoint. If io is required, this function
+retrieves the page to buffer buf_pool, but does not bufferfix it or latch
+it. */
+UNIV_INLINE
+buf_frame_t*
+buf_page_get_release_on_io(
+/*=======================*/
+				/* out: pointer to the frame, or NULL
+				if not in buffer buf_pool */
+	ulint	space,		/* in: space id */
+	ulint	offset,		/* in: offset of the page within space
+				in units of a page */
+	buf_frame_t* guess,	/* in: guessed frame or NULL */
+	ulint	rw_latch,	/* in: RW_X_LATCH, RW_S_LATCH,
+				or RW_NO_LATCH */
+	ulint	savepoint,	/* in: mtr savepoint */
+	mtr_t*	mtr)		/* in: mtr */
+{
+	buf_frame_t*	frame;
+
+	frame = buf_page_get_gen(space, offset, rw_latch, guess,
+				BUF_GET_IF_IN_POOL,
+				__FILE__, __LINE__,
+				mtr);
+	if (frame != NULL) {
+
+		return(frame);
+	}
+
+	/* The page was not in the buffer buf_pool: release the latches
+	down to the savepoint */
+		
+	mtr_rollback_to_savepoint(mtr, savepoint);
+		
+	buf_page_get(space, offset, RW_S_LATCH, mtr);
+		
+	/* When we get here, the page is in buffer, but we release
+	the latches again down to the savepoint, before returning */
+
+	mtr_rollback_to_savepoint(mtr, savepoint);
+		
+	return(NULL);
+}
+
+/************************************************************************
+Decrements the bufferfix count of a buffer control block and releases
+a latch, if specified. */
+UNIV_INLINE
+void
+buf_page_release(
+/*=============*/
+	buf_block_t*	block,		/* in: buffer block */
+	ulint		rw_latch,	/* in: RW_S_LATCH, RW_X_LATCH,
+					RW_NO_LATCH */
+	mtr_t*		mtr)		/* in: mtr */
+{
+	ulint	buf_fix_count;
+	
+	ut_ad(block);
+
+	mutex_enter_fast(&(buf_pool->mutex));
+
+	ut_a(block->state == BUF_BLOCK_FILE_PAGE);
+	ut_a(block->buf_fix_count > 0);
+
+	if (rw_latch == RW_X_LATCH && mtr->modifications) {
+
+		buf_flush_note_modification(block, mtr);
+	}
+
+#ifdef UNIV_SYNC_DEBUG
+	rw_lock_s_unlock(&(block->debug_latch));
+#endif
+	buf_fix_count = block->buf_fix_count;
+	block->buf_fix_count = buf_fix_count - 1;
+
+	mutex_exit(&(buf_pool->mutex));
+
+	if (rw_latch == RW_S_LATCH) {
+		rw_lock_s_unlock(&(block->lock));
+	} else if (rw_latch == RW_X_LATCH) {
+		rw_lock_x_unlock(&(block->lock));
+	}
+}
+
+#ifdef UNIV_SYNC_DEBUG
+/*************************************************************************
+Adds latch level info for the rw-lock protecting the buffer frame. This
+should be called in the debug version after a successful latching of a
+page if we know the latching order level of the acquired latch. If
+UNIV_SYNC_DEBUG is not defined, compiles to an empty function. */
+UNIV_INLINE
+void
+buf_page_dbg_add_level(
+/*===================*/
+	buf_frame_t*	frame __attribute__((unused)), /* in: buffer page
+                                where we have acquired latch */
+	ulint		level __attribute__((unused))) /* in: latching order
+                                level */
+{
+	sync_thread_add_level(&(buf_block_align(frame)->lock), level);
+}
+#endif /* UNIV_SYNC_DEBUG */
--- a/include/buf0flu.h
+++ b/include/buf0flu.h
@ -0,0 +1,120 @@
+/******************************************************
+The database buffer pool flush algorithm
+
+(c) 1995 Innobase Oy
+
+Created 11/5/1995 Heikki Tuuri
+*******************************************************/
+
+#ifndef buf0flu_h
+#define buf0flu_h
+
+#include "univ.i"
+#include "buf0types.h"
+#include "ut0byte.h"
+#include "mtr0types.h"
+
+/************************************************************************
+Updates the flush system data structures when a write is completed. */
+
+void
+buf_flush_write_complete(
+/*=====================*/
+	buf_block_t*	block);	/* in: pointer to the block in question */
+/*************************************************************************
+Flushes pages from the end of the LRU list if there is too small
+a margin of replaceable pages there. */
+
+void
+buf_flush_free_margin(void);
+/*=======================*/
+/************************************************************************
+Initializes a page for writing to the tablespace. */
+
+void
+buf_flush_init_for_writing(
+/*=======================*/
+	byte*	page,		/* in: page */
+	dulint	newest_lsn,	/* in: newest modification lsn to the page */
+	ulint	space,		/* in: space id */
+	ulint	page_no);	/* in: page number */
+/***********************************************************************
+This utility flushes dirty blocks from the end of the LRU list or flush_list.
+NOTE 1: in the case of an LRU flush the calling thread may own latches to
+pages: to avoid deadlocks, this function must be written so that it cannot
+end up waiting for these latches! NOTE 2: in the case of a flush list flush,
+the calling thread is not allowed to own any latches on pages! */
+
+ulint
+buf_flush_batch(
+/*============*/
+				/* out: number of blocks for which the write
+				request was queued */
+	ulint	flush_type,	/* in: BUF_FLUSH_LRU or BUF_FLUSH_LIST; if
+				BUF_FLUSH_LIST, then the caller must not own
+				any latches on pages */
+	ulint	min_n,		/* in: wished minimum mumber of blocks flushed
+				(it is not guaranteed that the actual number
+				is that big, though) */
+	dulint	lsn_limit);	/* in the case BUF_FLUSH_LIST all blocks whose
+				oldest_modification is smaller than this
+				should be flushed (if their number does not
+				exceed min_n), otherwise ignored */
+/**********************************************************************
+Waits until a flush batch of the given type ends */
+
+void
+buf_flush_wait_batch_end(
+/*=====================*/
+	ulint	type);	/* in: BUF_FLUSH_LRU or BUF_FLUSH_LIST */
+/************************************************************************
+This function should be called at a mini-transaction commit, if a page was
+modified in it. Puts the block to the list of modified blocks, if it not
+already in it. */
+UNIV_INLINE
+void
+buf_flush_note_modification(
+/*========================*/
+	buf_block_t*	block,	/* in: block which is modified */
+	mtr_t*		mtr);	/* in: mtr */
+/************************************************************************
+This function should be called when recovery has modified a buffer page. */
+UNIV_INLINE
+void
+buf_flush_recv_note_modification(
+/*=============================*/
+	buf_block_t*	block,		/* in: block which is modified */
+	dulint		start_lsn,	/* in: start lsn of the first mtr in a
+					set of mtr's */
+	dulint		end_lsn);	/* in: end lsn of the last mtr in the
+					set of mtr's */
+/************************************************************************
+Returns TRUE if the file page block is immediately suitable for replacement,
+i.e., transition FILE_PAGE => NOT_USED allowed. */
+ibool
+buf_flush_ready_for_replace(
+/*========================*/
+				/* out: TRUE if can replace immediately */
+	buf_block_t*	block);	/* in: buffer control block, must be in state
+				BUF_BLOCK_FILE_PAGE and in the LRU list */
+/**********************************************************************
+Validates the flush list. */
+
+ibool
+buf_flush_validate(void);
+/*====================*/
+		/* out: TRUE if ok */
+
+/* When buf_flush_free_margin is called, it tries to make this many blocks
+available to replacement in the free list and at the end of the LRU list (to
+make sure that a read-ahead batch can be read efficiently in a single
+sweep). */
+
+#define BUF_FLUSH_FREE_BLOCK_MARGIN 	(5 + BUF_READ_AHEAD_AREA)
+#define BUF_FLUSH_EXTRA_MARGIN 		(BUF_FLUSH_FREE_BLOCK_MARGIN / 4 + 100)
+
+#ifndef UNIV_NONINL
+#include "buf0flu.ic"
+#endif
+				
+#endif
--- a/include/buf0flu.ic
+++ b/include/buf0flu.ic
@ -0,0 +1,106 @@
+/******************************************************
+The database buffer pool flush algorithm
+
+(c) 1995 Innobase Oy
+
+Created 11/5/1995 Heikki Tuuri
+*******************************************************/
+
+#include "buf0buf.h"
+#include "mtr0mtr.h"
+
+/************************************************************************
+Inserts a modified block into the flush list. */
+
+void
+buf_flush_insert_into_flush_list(
+/*=============================*/
+	buf_block_t*	block);	/* in: block which is modified */
+/************************************************************************
+Inserts a modified block into the flush list in the right sorted position.
+This function is used by recovery, because there the modifications do not
+necessarily come in the order of lsn's. */
+
+void
+buf_flush_insert_sorted_into_flush_list(
+/*====================================*/
+	buf_block_t*	block);	/* in: block which is modified */
+
+/************************************************************************
+This function should be called at a mini-transaction commit, if a page was
+modified in it. Puts the block to the list of modified blocks, if it is not
+already in it. */
+UNIV_INLINE
+void
+buf_flush_note_modification(
+/*========================*/
+	buf_block_t*	block,	/* in: block which is modified */
+	mtr_t*		mtr)	/* in: mtr */
+{
+	ut_ad(block);
+	ut_ad(block->state == BUF_BLOCK_FILE_PAGE);
+	ut_ad(block->buf_fix_count > 0);
+#ifdef UNIV_SYNC_DEBUG
+	ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX));
+	ut_ad(mutex_own(&(buf_pool->mutex)));
+#endif /* UNIV_SYNC_DEBUG */
+
+	ut_ad(ut_dulint_cmp(mtr->start_lsn, ut_dulint_zero) != 0);
+	ut_ad(mtr->modifications);
+	ut_ad(ut_dulint_cmp(block->newest_modification, mtr->end_lsn) <= 0);
+	
+	block->newest_modification = mtr->end_lsn;
+
+	if (ut_dulint_is_zero(block->oldest_modification)) {
+		
+		block->oldest_modification = mtr->start_lsn;
+		ut_ad(!ut_dulint_is_zero(block->oldest_modification));
+
+		buf_flush_insert_into_flush_list(block);
+	} else {
+		ut_ad(ut_dulint_cmp(block->oldest_modification,
+							mtr->start_lsn) <= 0);
+	}
+
+        ++srv_buf_pool_write_requests;
+}
+
+/************************************************************************
+This function should be called when recovery has modified a buffer page. */
+UNIV_INLINE
+void
+buf_flush_recv_note_modification(
+/*=============================*/
+	buf_block_t*	block,		/* in: block which is modified */
+	dulint		start_lsn,	/* in: start lsn of the first mtr in a
+					set of mtr's */
+	dulint		end_lsn)	/* in: end lsn of the last mtr in the
+					set of mtr's */
+{
+	ut_ad(block);
+	ut_ad(block->state == BUF_BLOCK_FILE_PAGE);
+	ut_ad(block->buf_fix_count > 0);
+#ifdef UNIV_SYNC_DEBUG
+	ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX));
+#endif /* UNIV_SYNC_DEBUG */
+
+	mutex_enter(&(buf_pool->mutex));
+	
+	ut_ad(ut_dulint_cmp(block->newest_modification, end_lsn) <= 0);
+	
+	block->newest_modification = end_lsn;
+
+	if (ut_dulint_is_zero(block->oldest_modification)) {
+		
+		block->oldest_modification = start_lsn;
+
+		ut_ad(!ut_dulint_is_zero(block->oldest_modification));
+
+		buf_flush_insert_sorted_into_flush_list(block);
+	} else {
+		ut_ad(ut_dulint_cmp(block->oldest_modification,
+							start_lsn) <= 0);
+	}
+
+	mutex_exit(&(buf_pool->mutex));
+}
--- a/include/buf0lru.h
+++ b/include/buf0lru.h
@ -0,0 +1,144 @@
+/******************************************************
+The database buffer pool LRU replacement algorithm
+
+(c) 1995 Innobase Oy
+
+Created 11/5/1995 Heikki Tuuri
+*******************************************************/
+
+#ifndef buf0lru_h
+#define buf0lru_h
+
+#include "univ.i"
+#include "ut0byte.h"
+#include "buf0types.h"
+
+/**********************************************************************
+Tries to remove LRU flushed blocks from the end of the LRU list and put them
+to the free list. This is beneficial for the efficiency of the insert buffer
+operation, as flushed pages from non-unique non-clustered indexes are here
+taken out of the buffer pool, and their inserts redirected to the insert
+buffer. Otherwise, the flushed blocks could get modified again before read
+operations need new buffer blocks, and the i/o work done in flushing would be
+wasted. */
+
+void
+buf_LRU_try_free_flushed_blocks(void);
+/*==================================*/
+/**********************************************************************
+Returns TRUE if less than 15 % of the buffer pool is available. This can be
+used in heuristics to prevent huge transactions eating up the whole buffer
+pool for their locks. */
+
+ibool
+buf_LRU_buf_pool_running_out(void);
+/*==============================*/
+				/* out: TRUE if less than 15 % of buffer pool
+				left */
+
+/*#######################################################################
+These are low-level functions
+#########################################################################*/
+
+/* Minimum LRU list length for which the LRU_old pointer is defined */
+
+#define BUF_LRU_OLD_MIN_LEN	80
+
+#define BUF_LRU_FREE_SEARCH_LEN		(5 + 2 * BUF_READ_AHEAD_AREA)
+
+/**********************************************************************
+Invalidates all pages belonging to a given tablespace when we are deleting
+the data file(s) of that tablespace. A PROBLEM: if readahead is being started,
+what guarantees that it will not try to read in pages after this operation has
+completed? */
+
+void
+buf_LRU_invalidate_tablespace(
+/*==========================*/
+	ulint	id);	/* in: space id */
+/**********************************************************************
+Gets the minimum LRU_position field for the blocks in an initial segment
+(determined by BUF_LRU_INITIAL_RATIO) of the LRU list. The limit is not
+guaranteed to be precise, because the ulint_clock may wrap around. */
+
+ulint
+buf_LRU_get_recent_limit(void);
+/*==========================*/
+			/* out: the limit; zero if could not determine it */
+/**********************************************************************
+Look for a replaceable block from the end of the LRU list and put it to
+the free list if found. */
+
+ibool
+buf_LRU_search_and_free_block(
+/*==========================*/
+				/* out: TRUE if freed */
+	ulint	n_iterations);   /* in: how many times this has been called
+				repeatedly without result: a high value means
+				that we should search farther; if value is
+				k < 10, then we only search k/10 * number
+				of pages in the buffer pool from the end
+				of the LRU list */
+/**********************************************************************
+Returns a free block from the buf_pool. The block is taken off the
+free list. If it is empty, blocks are moved from the end of the
+LRU list to the free list. */
+
+buf_block_t*
+buf_LRU_get_free_block(void);
+/*=========================*/
+				/* out: the free control block; also if AWE is
+				used, it is guaranteed that the block has its
+				page mapped to a frame when we return */
+/**********************************************************************
+Puts a block back to the free list. */
+
+void
+buf_LRU_block_free_non_file_page(
+/*=============================*/
+	buf_block_t*	block);	/* in: block, must not contain a file page */
+/**********************************************************************
+Adds a block to the LRU list. */
+
+void
+buf_LRU_add_block(
+/*==============*/
+	buf_block_t*	block,	/* in: control block */
+	ibool		old);	/* in: TRUE if should be put to the old
+				blocks in the LRU list, else put to the
+				start; if the LRU list is very short, added to
+				the start regardless of this parameter */
+/**********************************************************************
+Moves a block to the start of the LRU list. */
+
+void
+buf_LRU_make_block_young(
+/*=====================*/
+	buf_block_t*	block);	/* in: control block */
+/**********************************************************************
+Moves a block to the end of the LRU list. */
+
+void
+buf_LRU_make_block_old(
+/*===================*/
+	buf_block_t*	block);	/* in: control block */
+#ifdef UNIV_DEBUG
+/**************************************************************************
+Validates the LRU list. */
+
+ibool
+buf_LRU_validate(void);
+/*==================*/
+/**************************************************************************
+Prints the LRU list. */
+
+void
+buf_LRU_print(void);
+/*===============*/
+#endif /* UNIV_DEBUG */
+
+#ifndef UNIV_NONINL
+#include "buf0lru.ic"
+#endif
+
+#endif
--- a/include/buf0lru.ic
+++ b/include/buf0lru.ic
@ -0,0 +1,8 @@
+/******************************************************
+The database buffer replacement algorithm
+
+(c) 1995 Innobase Oy
+
+Created 11/5/1995 Heikki Tuuri
+*******************************************************/
+
--- a/include/buf0rea.h
+++ b/include/buf0rea.h
@ -0,0 +1,103 @@
+/******************************************************
+The database buffer read
+
+(c) 1995 Innobase Oy
+
+Created 11/5/1995 Heikki Tuuri
+*******************************************************/
+
+#ifndef buf0rea_h
+#define buf0rea_h
+
+#include "univ.i"
+#include "buf0types.h"
+
+/************************************************************************
+High-level function which reads a page asynchronously from a file to the
+buffer buf_pool if it is not already there. Sets the io_fix flag and sets
+an exclusive lock on the buffer frame. The flag is cleared and the x-lock
+released by the i/o-handler thread. Does a random read-ahead if it seems
+sensible. */
+
+ulint
+buf_read_page(
+/*==========*/
+			/* out: number of page read requests issued: this can
+			be > 1 if read-ahead occurred */
+	ulint	space,	/* in: space id */
+	ulint	offset);/* in: page number */
+/************************************************************************
+Applies linear read-ahead if in the buf_pool the page is a border page of
+a linear read-ahead area and all the pages in the area have been accessed.
+Does not read any page if the read-ahead mechanism is not activated. Note
+that the the algorithm looks at the 'natural' adjacent successor and
+predecessor of the page, which on the leaf level of a B-tree are the next
+and previous page in the chain of leaves. To know these, the page specified
+in (space, offset) must already be present in the buf_pool. Thus, the
+natural way to use this function is to call it when a page in the buf_pool
+is accessed the first time, calling this function just after it has been
+bufferfixed.
+NOTE 1: as this function looks at the natural predecessor and successor
+fields on the page, what happens, if these are not initialized to any
+sensible value? No problem, before applying read-ahead we check that the
+area to read is within the span of the space, if not, read-ahead is not
+applied. An uninitialized value may result in a useless read operation, but
+only very improbably.
+NOTE 2: the calling thread may own latches on pages: to avoid deadlocks this
+function must be written such that it cannot end up waiting for these
+latches!
+NOTE 3: the calling thread must want access to the page given: this rule is
+set to prevent unintended read-aheads performed by ibuf routines, a situation
+which could result in a deadlock if the OS does not support asynchronous io. */
+
+ulint
+buf_read_ahead_linear(
+/*==================*/
+			/* out: number of page read requests issued */
+	ulint	space,	/* in: space id */
+	ulint	offset);/* in: page number of a page; NOTE: the current thread
+			must want access to this page (see NOTE 3 above) */
+/************************************************************************
+Issues read requests for pages which the ibuf module wants to read in, in
+order to contract the insert buffer tree. Technically, this function is like
+a read-ahead function. */
+
+void
+buf_read_ibuf_merge_pages(
+/*======================*/
+	ibool	sync,		/* in: TRUE if the caller wants this function
+				to wait for the highest address page to get
+				read in, before this function returns */
+	ulint*	space_ids,	/* in: array of space ids */
+	ib_longlong* space_versions,/* in: the spaces must have this version
+				number (timestamp), otherwise we discard the
+				read; we use this to cancel reads if
+				DISCARD + IMPORT may have changed the
+				tablespace size */
+	ulint*	page_nos,	/* in: array of page numbers to read, with the
+				highest page number the last in the array */
+	ulint	n_stored);	/* in: number of page numbers in the array */
+/************************************************************************
+Issues read requests for pages which recovery wants to read in. */
+
+void
+buf_read_recv_pages(
+/*================*/
+	ibool	sync,		/* in: TRUE if the caller wants this function
+				to wait for the highest address page to get
+				read in, before this function returns */
+	ulint	space,		/* in: space id */
+	ulint*	page_nos,	/* in: array of page numbers to read, with the
+				highest page number the last in the array */
+	ulint	n_stored);	/* in: number of page numbers in the array */
+
+/* The size in pages of the area which the read-ahead algorithms read if
+invoked */
+
+#define	BUF_READ_AHEAD_AREA	ut_min(64, ut_2_power_up(buf_pool->curr_size / 32))
+
+/* Modes used in read-ahead */
+#define BUF_READ_IBUF_PAGES_ONLY	131
+#define BUF_READ_ANY_PAGE		132
+
+#endif
--- a/include/buf0types.h
+++ b/include/buf0types.h
@ -0,0 +1,20 @@
+/******************************************************
+The database buffer pool global types for the directory
+
+(c) 1995 Innobase Oy
+
+Created 11/17/1995 Heikki Tuuri
+*******************************************************/
+
+#ifndef buf0types_h
+#define buf0types_h
+
+typedef	struct buf_block_struct		buf_block_t;
+typedef	struct buf_pool_struct		buf_pool_t;
+
+/* The 'type' used of a buffer frame */
+typedef	byte	buf_frame_t;
+
+
+#endif
+
--- a/include/data0data.h
+++ b/include/data0data.h
@ -0,0 +1,424 @@
+/************************************************************************
+SQL data field and tuple
+
+(c) 1994-1996 Innobase Oy
+
+Created 5/30/1994 Heikki Tuuri
+*************************************************************************/
+
+#ifndef data0data_h
+#define data0data_h
+
+#include "univ.i"
+
+#include "data0types.h"
+#include "data0type.h"
+#include "mem0mem.h"
+#include "dict0types.h"
+
+typedef struct big_rec_struct		big_rec_t;
+
+/* Some non-inlined functions used in the MySQL interface: */
+void 
+dfield_set_data_noninline(
+	dfield_t* 	field,	/* in: field */
+	void*		data,	/* in: data */
+	ulint		len);	/* in: length or UNIV_SQL_NULL */
+void* 
+dfield_get_data_noninline(
+	dfield_t* field);	/* in: field */
+ulint
+dfield_get_len_noninline(
+	dfield_t* field);	/* in: field */
+ulint 
+dtuple_get_n_fields_noninline(
+	dtuple_t* 	tuple);	/* in: tuple */
+dfield_t* 
+dtuple_get_nth_field_noninline(
+	dtuple_t* 	tuple,	/* in: tuple */
+	ulint		n);	/* in: index of field */
+
+/*************************************************************************
+Gets pointer to the type struct of SQL data field. */
+UNIV_INLINE
+dtype_t*
+dfield_get_type(
+/*============*/
+				/* out: pointer to the type struct */
+	dfield_t*	field);	/* in: SQL data field */
+/*************************************************************************
+Sets the type struct of SQL data field. */
+UNIV_INLINE
+void
+dfield_set_type(
+/*============*/
+	dfield_t*	field,	/* in: SQL data field */
+	dtype_t*	type);	/* in: pointer to data type struct */
+/*************************************************************************
+Gets pointer to the data in a field. */
+UNIV_INLINE
+void* 
+dfield_get_data(
+/*============*/
+				/* out: pointer to data */
+	dfield_t* field);	/* in: field */
+/*************************************************************************
+Gets length of field data. */
+UNIV_INLINE
+ulint
+dfield_get_len(
+/*===========*/
+				/* out: length of data; UNIV_SQL_NULL if 
+				SQL null data */
+	dfield_t* field);	/* in: field */
+/*************************************************************************
+Sets length in a field. */
+UNIV_INLINE
+void 
+dfield_set_len(
+/*===========*/
+	dfield_t* 	field,	/* in: field */
+	ulint		len);	/* in: length or UNIV_SQL_NULL */
+/*************************************************************************
+Sets pointer to the data and length in a field. */
+UNIV_INLINE
+void 
+dfield_set_data(
+/*============*/
+	dfield_t* 	field,	/* in: field */
+	const void*	data,	/* in: data */
+	ulint		len);	/* in: length or UNIV_SQL_NULL */
+/**************************************************************************
+Writes an SQL null field full of zeros. */
+UNIV_INLINE
+void
+data_write_sql_null(
+/*================*/
+	byte*	data,	/* in: pointer to a buffer of size len */
+	ulint	len);	/* in: SQL null size in bytes */
+/*************************************************************************
+Copies the data and len fields. */
+UNIV_INLINE
+void 
+dfield_copy_data(
+/*=============*/
+	dfield_t* 	field1,	/* in: field to copy to */
+	dfield_t*	field2);/* in: field to copy from */
+/*************************************************************************
+Copies a data field to another. */
+UNIV_INLINE
+void
+dfield_copy(
+/*========*/
+	dfield_t*	field1,	/* in: field to copy to */
+	dfield_t*	field2);/* in: field to copy from */
+/*************************************************************************
+Tests if data length and content is equal for two dfields. */
+UNIV_INLINE
+ibool
+dfield_datas_are_binary_equal(
+/*==========================*/
+				/* out: TRUE if equal */
+	dfield_t*	field1,	/* in: field */
+	dfield_t*	field2);/* in: field */
+/*************************************************************************
+Tests if dfield data length and content is equal to the given. */
+
+ibool
+dfield_data_is_binary_equal(
+/*========================*/
+				/* out: TRUE if equal */
+	dfield_t*	field,	/* in: field */
+	ulint		len,	/* in: data length or UNIV_SQL_NULL */
+	byte*		data);	/* in: data */
+/*************************************************************************
+Gets number of fields in a data tuple. */
+UNIV_INLINE
+ulint 
+dtuple_get_n_fields(
+/*================*/
+				/* out: number of fields */
+	dtuple_t* 	tuple);	/* in: tuple */
+/*************************************************************************
+Gets nth field of a tuple. */
+UNIV_INLINE
+dfield_t* 
+dtuple_get_nth_field(
+/*=================*/
+				/* out: nth field */
+	dtuple_t* 	tuple,	/* in: tuple */
+	ulint		n);	/* in: index of field */
+/*************************************************************************
+Gets info bits in a data tuple. */
+UNIV_INLINE
+ulint
+dtuple_get_info_bits(
+/*=================*/
+				/* out: info bits */
+	dtuple_t* 	tuple);	/* in: tuple */
+/*************************************************************************
+Sets info bits in a data tuple. */
+UNIV_INLINE
+void
+dtuple_set_info_bits(
+/*=================*/
+	dtuple_t* 	tuple,		/* in: tuple */
+	ulint		info_bits);	/* in: info bits */
+/*************************************************************************
+Gets number of fields used in record comparisons. */
+UNIV_INLINE
+ulint
+dtuple_get_n_fields_cmp(
+/*====================*/
+				/* out: number of fields used in comparisons
+				in rem0cmp.* */
+	dtuple_t*	tuple);	/* in: tuple */
+/*************************************************************************
+Gets number of fields used in record comparisons. */
+UNIV_INLINE
+void
+dtuple_set_n_fields_cmp(
+/*====================*/
+	dtuple_t*	tuple,		/* in: tuple */
+	ulint		n_fields_cmp);	/* in: number of fields used in
+					comparisons in rem0cmp.* */
+/**************************************************************
+Creates a data tuple to a memory heap. The default value for number
+of fields used in record comparisons for this tuple is n_fields. */
+UNIV_INLINE
+dtuple_t*
+dtuple_create(
+/*==========*/
+	 	 		/* out, own: created tuple */
+	mem_heap_t*	heap,	/* in: memory heap where the tuple
+				is created */
+	ulint		n_fields); /* in: number of fields */	
+
+/*************************************************************************
+Creates a dtuple for use in MySQL. */
+
+dtuple_t*
+dtuple_create_for_mysql(
+/*====================*/
+			/* out, own created dtuple */
+	void** heap,    /* out: created memory heap */
+	ulint n_fields); /* in: number of fields */
+/*************************************************************************
+Frees a dtuple used in MySQL. */
+
+void
+dtuple_free_for_mysql(
+/*==================*/
+	void* heap);
+/*************************************************************************
+Sets number of fields used in a tuple. Normally this is set in
+dtuple_create, but if you want later to set it smaller, you can use this. */ 
+
+void
+dtuple_set_n_fields(
+/*================*/
+	dtuple_t*	tuple,		/* in: tuple */
+	ulint		n_fields);	/* in: number of fields */
+/**************************************************************
+The following function returns the sum of data lengths of a tuple. The space
+occupied by the field structs or the tuple struct is not counted. */
+UNIV_INLINE
+ulint
+dtuple_get_data_size(
+/*=================*/
+				/* out: sum of data lens */
+	dtuple_t*	tuple);	/* in: typed data tuple */
+/****************************************************************
+Returns TRUE if lengths of two dtuples are equal and respective data fields
+in them are equal when compared with collation in char fields (not as binary
+strings). */
+
+ibool
+dtuple_datas_are_ordering_equal(
+/*============================*/
+				/* out: TRUE if length and fieds are equal
+				when compared with cmp_data_data:
+				NOTE: in character type fields some letters
+				are identified with others! (collation) */
+	dtuple_t*	tuple1,	/* in: tuple 1 */
+	dtuple_t*	tuple2);/* in: tuple 2 */
+/****************************************************************
+Folds a prefix given as the number of fields of a tuple. */
+UNIV_INLINE
+ulint
+dtuple_fold(
+/*========*/
+				/* out: the folded value */
+	dtuple_t*	tuple,	/* in: the tuple */
+	ulint		n_fields,/* in: number of complete fields to fold */
+	ulint		n_bytes,/* in: number of bytes to fold in an
+				incomplete last field */
+	dulint		tree_id);/* in: index tree id */
+/***********************************************************************
+Sets types of fields binary in a tuple. */
+UNIV_INLINE
+void
+dtuple_set_types_binary(
+/*====================*/
+	dtuple_t*	tuple,	/* in: data tuple */
+	ulint		n);	/* in: number of fields to set */
+/**************************************************************************
+Checks if a dtuple contains an SQL null value. */
+UNIV_INLINE
+ibool
+dtuple_contains_null(
+/*=================*/
+				/* out: TRUE if some field is SQL null */
+	dtuple_t*	tuple);	/* in: dtuple */
+/**************************************************************
+Checks that a data field is typed. Asserts an error if not. */
+
+ibool
+dfield_check_typed(
+/*===============*/
+				/* out: TRUE if ok */
+	dfield_t*	field);	/* in: data field */
+/**************************************************************
+Checks that a data tuple is typed. Asserts an error if not. */
+
+ibool
+dtuple_check_typed(
+/*===============*/
+				/* out: TRUE if ok */
+	dtuple_t*	tuple);	/* in: tuple */
+/**************************************************************
+Checks that a data tuple is typed. */
+
+ibool
+dtuple_check_typed_no_assert(
+/*=========================*/
+				/* out: TRUE if ok */
+	dtuple_t*	tuple);	/* in: tuple */
+#ifdef UNIV_DEBUG
+/**************************************************************
+Validates the consistency of a tuple which must be complete, i.e,
+all fields must have been set. */
+
+ibool
+dtuple_validate(
+/*============*/
+				/* out: TRUE if ok */
+	dtuple_t*	tuple);	/* in: tuple */
+#endif /* UNIV_DEBUG */
+/*****************************************************************
+Pretty prints a dfield value according to its data type. */
+
+void
+dfield_print(
+/*=========*/
+	dfield_t*	dfield);/* in: dfield */
+/*****************************************************************
+Pretty prints a dfield value according to its data type. Also the hex string
+is printed if a string contains non-printable characters. */ 
+
+void
+dfield_print_also_hex(
+/*==================*/
+	dfield_t*	dfield);	 /* in: dfield */
+/**************************************************************
+The following function prints the contents of a tuple. */
+
+void
+dtuple_print(
+/*=========*/
+	FILE*		f,	/* in: output stream */
+	dtuple_t*	tuple);	/* in: tuple */
+/******************************************************************
+Moves parts of long fields in entry to the big record vector so that
+the size of tuple drops below the maximum record size allowed in the
+database. Moves data only from those fields which are not necessary
+to determine uniquely the insertion place of the tuple in the index. */
+
+big_rec_t*
+dtuple_convert_big_rec(
+/*===================*/
+				/* out, own: created big record vector,
+				NULL if we are not able to shorten
+				the entry enough, i.e., if there are
+				too many short fields in entry */
+	dict_index_t*	index,	/* in: index */
+	dtuple_t*	entry,	/* in: index entry */
+	ulint*		ext_vec,/* in: array of externally stored fields,
+				or NULL: if a field already is externally
+				stored, then we cannot move it to the vector
+				this function returns */
+	ulint		n_ext_vec);/* in: number of elements is ext_vec */
+/******************************************************************
+Puts back to entry the data stored in vector. Note that to ensure the
+fields in entry can accommodate the data, vector must have been created
+from entry with dtuple_convert_big_rec. */
+
+void
+dtuple_convert_back_big_rec(
+/*========================*/
+	dict_index_t*	index,	/* in: index */
+	dtuple_t*	entry,	/* in: entry whose data was put to vector */
+	big_rec_t*	vector);/* in, own: big rec vector; it is
+				freed in this function */
+/******************************************************************
+Frees the memory in a big rec vector. */
+
+void
+dtuple_big_rec_free(
+/*================*/
+	big_rec_t*	vector);	/* in, own: big rec vector; it is
+				freed in this function */
+
+/*######################################################################*/
+
+/* Structure for an SQL data field */
+struct dfield_struct{
+	void*		data;	/* pointer to data */
+	ulint		len;	/* data length; UNIV_SQL_NULL if SQL null; */
+	dtype_t		type;	/* type of data */
+};
+
+struct dtuple_struct {
+	ulint		info_bits;	/* info bits of an index record:
+					the default is 0; this field is used
+					if an index record is built from
+					a data tuple */
+	ulint		n_fields;	/* number of fields in dtuple */
+	ulint		n_fields_cmp;	/* number of fields which should
+					be used in comparison services
+					of rem0cmp.*; the index search
+					is performed by comparing only these
+					fields, others are ignored; the
+					default value in dtuple creation is
+					the same value as n_fields */
+	dfield_t*	fields;		/* fields */
+	UT_LIST_NODE_T(dtuple_t) tuple_list;
+					/* data tuples can be linked into a
+					list using this field */
+	ulint		magic_n;	
+};
+#define	DATA_TUPLE_MAGIC_N	65478679
+
+/* A slot for a field in a big rec vector */
+
+typedef struct big_rec_field_struct 	big_rec_field_t;
+struct big_rec_field_struct {
+	ulint		field_no;	/* field number in record */
+	ulint		len;		/* stored data len */
+	byte*		data;		/* stored data */
+};
+
+/* Storage format for overflow data in a big record, that is, a record
+which needs external storage of data fields */
+
+struct big_rec_struct {
+	mem_heap_t*	heap;		/* memory heap from which allocated */
+	ulint		n_fields;	/* number of stored fields */
+	big_rec_field_t* fields;	/* stored fields */
+};
+	
+#ifndef UNIV_NONINL
+#include "data0data.ic"
+#endif
+
+#endif
--- a/include/data0data.ic
+++ b/include/data0data.ic
@ -0,0 +1,433 @@
+/************************************************************************
+SQL data field and tuple
+
+(c) 1994-1996 Innobase Oy
+
+Created 5/30/1994 Heikki Tuuri
+*************************************************************************/
+
+#include "mem0mem.h"
+#include "ut0rnd.h"
+
+extern byte data_error;
+
+/*************************************************************************
+Gets pointer to the type struct of SQL data field. */
+UNIV_INLINE
+dtype_t*
+dfield_get_type(
+/*============*/
+				/* out: pointer to the type struct */
+	dfield_t*	field)	/* in: SQL data field */
+{
+	ut_ad(field);
+
+	return(&(field->type));
+}
+
+/*************************************************************************
+Sets the type struct of SQL data field. */
+UNIV_INLINE
+void
+dfield_set_type(
+/*============*/
+	dfield_t*	field,	/* in: SQL data field */
+	dtype_t*	type)	/* in: pointer to data type struct */
+{
+	ut_ad(field && type);
+
+	field->type = *type;
+}
+
+/*************************************************************************
+Gets pointer to the data in a field. */
+UNIV_INLINE
+void* 
+dfield_get_data(
+/*============*/
+				/* out: pointer to data */
+	dfield_t* field)	/* in: field */
+{
+	ut_ad(field);
+	ut_ad((field->len == UNIV_SQL_NULL)
+	      || (field->data != &data_error)); 
+
+	return(field->data);
+}
+
+/*************************************************************************
+Gets length of field data. */
+UNIV_INLINE
+ulint
+dfield_get_len(
+/*===========*/
+				/* out: length of data; UNIV_SQL_NULL if 
+				SQL null data */
+	dfield_t* field)	/* in: field */
+{
+	ut_ad(field);
+	ut_ad((field->len == UNIV_SQL_NULL)
+	      || (field->data != &data_error));
+
+	return(field->len);
+}
+
+/*************************************************************************
+Sets length in a field. */
+UNIV_INLINE
+void 
+dfield_set_len(
+/*===========*/
+	dfield_t* 	field,	/* in: field */
+	ulint		len)	/* in: length or UNIV_SQL_NULL */
+{
+	ut_ad(field);
+
+	field->len = len;
+}
+
+/*************************************************************************
+Sets pointer to the data and length in a field. */
+UNIV_INLINE
+void 
+dfield_set_data(
+/*============*/
+	dfield_t* 	field,	/* in: field */
+	const void*	data,	/* in: data */
+	ulint		len)	/* in: length or UNIV_SQL_NULL */
+{
+	ut_ad(field);
+
+	field->data = (void*) data;
+	field->len = len;
+}
+
+/*************************************************************************
+Copies the data and len fields. */
+UNIV_INLINE
+void 
+dfield_copy_data(
+/*=============*/
+	dfield_t* 	field1,	/* in: field to copy to */
+	dfield_t*	field2)	/* in: field to copy from */
+{
+	ut_ad(field1 && field2);
+
+	field1->data = field2->data;
+	field1->len = field2->len;
+}
+
+/*************************************************************************
+Copies a data field to another. */
+UNIV_INLINE
+void
+dfield_copy(
+/*========*/
+	dfield_t*	field1,	/* in: field to copy to */
+	dfield_t*	field2)	/* in: field to copy from */
+{
+	*field1 = *field2;
+}
+
+/*************************************************************************
+Tests if data length and content is equal for two dfields. */
+UNIV_INLINE
+ibool
+dfield_datas_are_binary_equal(
+/*==========================*/
+				/* out: TRUE if equal */
+	dfield_t*	field1,	/* in: field */
+	dfield_t*	field2)	/* in: field */
+{
+	ulint	len;
+
+	len = field1->len;
+	
+	if ((len != field2->len)
+	    || ((len != UNIV_SQL_NULL)
+	        && (0 != ut_memcmp(field1->data, field2->data, len)))) {
+	    	
+		return(FALSE);
+	}
+
+	return(TRUE);
+}
+
+/*************************************************************************
+Gets info bits in a data tuple. */
+UNIV_INLINE
+ulint
+dtuple_get_info_bits(
+/*=================*/
+				/* out: info bits */
+	dtuple_t* 	tuple)	/* in: tuple */
+{
+	ut_ad(tuple);
+
+	return(tuple->info_bits);
+}
+
+/*************************************************************************
+Sets info bits in a data tuple. */
+UNIV_INLINE
+void
+dtuple_set_info_bits(
+/*=================*/
+	dtuple_t* 	tuple,		/* in: tuple */
+	ulint		info_bits)	/* in: info bits */
+{
+	ut_ad(tuple);
+
+	tuple->info_bits = info_bits;
+}
+
+/*************************************************************************
+Gets number of fields used in record comparisons. */
+UNIV_INLINE
+ulint
+dtuple_get_n_fields_cmp(
+/*====================*/
+				/* out: number of fields used in comparisons
+				in rem0cmp.* */
+	dtuple_t*	tuple)	/* in: tuple */
+{
+	ut_ad(tuple);
+
+	return(tuple->n_fields_cmp);
+}
+
+/*************************************************************************
+Sets number of fields used in record comparisons. */
+UNIV_INLINE
+void
+dtuple_set_n_fields_cmp(
+/*====================*/
+	dtuple_t*	tuple,		/* in: tuple */
+	ulint		n_fields_cmp)	/* in: number of fields used in
+					comparisons in rem0cmp.* */
+{
+	ut_ad(tuple);
+	ut_ad(n_fields_cmp <= tuple->n_fields);
+
+	tuple->n_fields_cmp = n_fields_cmp;
+}
+
+/*************************************************************************
+Gets number of fields in a data tuple. */
+UNIV_INLINE
+ulint
+dtuple_get_n_fields(
+/*================*/
+				/* out: number of fields */
+	dtuple_t* 	tuple)	/* in: tuple */
+{
+	ut_ad(tuple);
+
+	return(tuple->n_fields);
+}
+
+/*************************************************************************
+Gets nth field of a tuple. */
+UNIV_INLINE
+dfield_t* 
+dtuple_get_nth_field(
+/*=================*/
+				/* out: nth field */
+	dtuple_t* 	tuple,	/* in: tuple */
+	ulint		n)	/* in: index of field */
+{
+	ut_ad(tuple);
+	ut_ad(n < tuple->n_fields);
+
+	return(tuple->fields + n);
+}
+
+/**************************************************************
+Creates a data tuple to a memory heap. The default value for number
+of fields used in record comparisons for this tuple is n_fields. */
+UNIV_INLINE
+dtuple_t*
+dtuple_create(
+/*==========*/
+	 	 		/* out, own: created tuple */
+	mem_heap_t*	heap,	/* in: memory heap where the tuple
+				is created */
+	ulint		n_fields) /* in: number of fields */	
+{
+	dtuple_t*	tuple;	
+
+	ut_ad(heap);
+
+	tuple = (dtuple_t*) mem_heap_alloc(heap, sizeof(dtuple_t)
+				     + n_fields * sizeof(dfield_t));
+	tuple->info_bits = 0;
+	tuple->n_fields = n_fields;
+	tuple->n_fields_cmp = n_fields;
+	tuple->fields = (dfield_t*)(((byte*)tuple) + sizeof(dtuple_t));
+
+#ifdef UNIV_DEBUG
+	tuple->magic_n = DATA_TUPLE_MAGIC_N;
+
+	{	/* In the debug version, initialize fields to an error value */
+		ulint	i;
+		
+		for (i = 0; i < n_fields; i++) {
+			(tuple->fields + i)->data = &data_error;
+			dfield_get_type(tuple->fields + i)->mtype = DATA_ERROR;
+		}
+	}
+#endif
+	return(tuple);	
+}
+
+/**************************************************************
+The following function returns the sum of data lengths of a tuple. The space
+occupied by the field structs or the tuple struct is not counted. Neither
+is possible space in externally stored parts of the field. */
+UNIV_INLINE
+ulint
+dtuple_get_data_size(
+/*=================*/
+				/* out: sum of data lengths */
+	dtuple_t*	tuple)	/* in: typed data tuple */
+{
+	dfield_t*	field;
+	ulint	 	n_fields;
+	ulint	 	len;
+	ulint	 	i;
+	ulint	 	sum	= 0;
+
+	ut_ad(tuple);
+	ut_ad(dtuple_check_typed(tuple));
+	ut_ad(tuple->magic_n == DATA_TUPLE_MAGIC_N);
+
+	n_fields = tuple->n_fields;
+
+	for (i = 0; i < n_fields; i++) {
+		field = dtuple_get_nth_field(tuple,  i);
+		len = dfield_get_len(field);
+
+		if (len == UNIV_SQL_NULL) {
+			len = dtype_get_sql_null_size(dfield_get_type(field));
+		}
+
+		sum += len;
+	}
+	
+	return(sum);
+}
+
+/***********************************************************************
+Sets types of fields binary in a tuple. */
+UNIV_INLINE
+void
+dtuple_set_types_binary(
+/*====================*/
+	dtuple_t*	tuple,	/* in: data tuple */
+	ulint		n)	/* in: number of fields to set */
+{
+	dtype_t*	dfield_type;
+	ulint		i;
+	
+	for (i = 0; i < n; i++) {
+		dfield_type = dfield_get_type(dtuple_get_nth_field(tuple, i));
+		dtype_set(dfield_type, DATA_BINARY, 0, 0, 0);
+	}
+}
+
+/****************************************************************
+Folds a prefix given as the number of fields of a tuple. */
+UNIV_INLINE
+ulint
+dtuple_fold(
+/*========*/
+				/* out: the folded value */
+	dtuple_t*	tuple,	/* in: the tuple */
+	ulint		n_fields,/* in: number of complete fields to fold */
+	ulint		n_bytes,/* in: number of bytes to fold in an
+				incomplete last field */
+	dulint		tree_id)/* in: index tree id */
+{
+	dfield_t*	field;
+	ulint		i;
+	byte*		data;
+	ulint		len;
+	ulint		fold;
+
+	ut_ad(tuple);
+	ut_ad(tuple->magic_n == DATA_TUPLE_MAGIC_N);
+	ut_ad(dtuple_check_typed(tuple));
+
+	fold = ut_fold_dulint(tree_id);
+
+	for (i = 0; i < n_fields; i++) {
+		field = dtuple_get_nth_field(tuple, i);
+
+		data = (byte*) dfield_get_data(field);
+		len = dfield_get_len(field);	
+		
+		if (len != UNIV_SQL_NULL) {
+			fold = ut_fold_ulint_pair(fold, 
+						  ut_fold_binary(data, len));
+		}
+	}
+
+	if (n_bytes > 0) {
+		field = dtuple_get_nth_field(tuple, i);
+
+		data = (byte*) dfield_get_data(field);
+		len = dfield_get_len(field);	
+		
+		if (len != UNIV_SQL_NULL) {
+			if (len > n_bytes) {
+				len = n_bytes;
+			}
+
+			fold = ut_fold_ulint_pair(fold, 
+						  ut_fold_binary(data, len));
+		}
+	}	
+
+	return(fold);
+}
+
+/**************************************************************************
+Writes an SQL null field full of zeros. */
+UNIV_INLINE
+void
+data_write_sql_null(
+/*================*/
+	byte*	data,	/* in: pointer to a buffer of size len */
+	ulint	len)	/* in: SQL null size in bytes */
+{
+	ulint	j;
+
+	for (j = 0; j < len; j++) {
+		data[j] = '\0';
+	}
+}
+
+/**************************************************************************
+Checks if a dtuple contains an SQL null value. */
+UNIV_INLINE
+ibool
+dtuple_contains_null(
+/*=================*/
+				/* out: TRUE if some field is SQL null */
+	dtuple_t*	tuple)	/* in: dtuple */
+{
+	ulint	n;
+	ulint	i;
+
+	n = dtuple_get_n_fields(tuple);
+
+	for (i = 0; i < n; i++) {
+		if (dfield_get_len(dtuple_get_nth_field(tuple, i))
+		    == UNIV_SQL_NULL) {
+
+			return(TRUE);
+		}
+	}
+
+	return(FALSE);
+}
--- a/include/data0type.h
+++ b/include/data0type.h
@ -0,0 +1,430 @@
+/******************************************************
+Data types
+
+(c) 1996 Innobase Oy
+
+Created 1/16/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef data0type_h
+#define data0type_h
+
+#include "univ.i"
+
+extern ulint	data_mysql_default_charset_coll;
+#define DATA_MYSQL_LATIN1_SWEDISH_CHARSET_COLL 8
+
+/* SQL data type struct */
+typedef struct dtype_struct		dtype_t;
+
+/* This variable is initialized as the standard binary variable length
+data type */
+extern dtype_t* 	dtype_binary;
+
+/*-------------------------------------------*/
+/* The 'MAIN TYPE' of a column */
+#define	DATA_VARCHAR	1	/* character varying of the
+				latin1_swedish_ci charset-collation; note
+				that the MySQL format for this, DATA_BINARY,
+				DATA_VARMYSQL, is also affected by whether the
+				'precise type' contains
+				DATA_MYSQL_TRUE_VARCHAR */
+#define DATA_CHAR	2	/* fixed length character of the
+				latin1_swedish_ci charset-collation */
+#define DATA_FIXBINARY	3	/* binary string of fixed length */
+#define DATA_BINARY	4	/* binary string */
+#define DATA_BLOB	5	/* binary large object, or a TEXT type;
+				if prtype & DATA_BINARY_TYPE == 0, then this is
+				actually a TEXT column (or a BLOB created
+				with < 4.0.14; since column prefix indexes
+				came only in 4.0.14, the missing flag in BLOBs
+				created before that does not cause any harm) */
+#define	DATA_INT	6	/* integer: can be any size 1 - 8 bytes */
+#define	DATA_SYS_CHILD	7	/* address of the child page in node pointer */
+#define	DATA_SYS	8	/* system column */
+
+/* Data types >= DATA_FLOAT must be compared using the whole field, not as
+binary strings */
+
+#define DATA_FLOAT	9
+#define DATA_DOUBLE	10
+#define DATA_DECIMAL	11	/* decimal number stored as an ASCII string */
+#define	DATA_VARMYSQL	12	/* any charset varying length char */
+#define	DATA_MYSQL	13	/* any charset fixed length char */
+				/* NOTE that 4.1.1 used DATA_MYSQL and
+				DATA_VARMYSQL for all character sets, and the
+				charset-collation for tables created with it
+				can also be latin1_swedish_ci */
+#define DATA_MTYPE_MAX	63	/* dtype_store_for_order_and_null_size()
+				requires the values are <= 63 */
+/*-------------------------------------------*/
+/* The 'PRECISE TYPE' of a column */
+/*
+Tables created by a MySQL user have the following convention:
+
+- In the least significant byte in the precise type we store the MySQL type
+code (not applicable for system columns).
+
+- In the second least significant byte we OR flags DATA_NOT_NULL,
+DATA_UNSIGNED, DATA_BINARY_TYPE.
+
+- In the third least significant byte of the precise type of string types we
+store the MySQL charset-collation code. In DATA_BLOB columns created with
+< 4.0.14 we do not actually know if it is a BLOB or a TEXT column. Since there
+are no indexes on prefixes of BLOB or TEXT columns in < 4.0.14, this is no
+problem, though.
+
+Note that versions < 4.1.2 or < 5.0.1 did not store the charset code to the
+precise type, since the charset was always the default charset of the MySQL
+installation. If the stored charset code is 0 in the system table SYS_COLUMNS
+of InnoDB, that means that the default charset of this MySQL installation
+should be used.
+
+When loading a table definition from the system tables to the InnoDB data
+dictionary cache in main memory, InnoDB versions >= 4.1.2 and >= 5.0.1 check
+if the stored charset-collation is 0, and if that is the case and the type is
+a non-binary string, replace that 0 by the default charset-collation code of
+this MySQL installation. In short, in old tables, the charset-collation code
+in the system tables on disk can be 0, but in in-memory data structures
+(dtype_t), the charset-collation code is always != 0 for non-binary string
+types.
+
+In new tables, in binary string types, the charset-collation code is the
+MySQL code for the 'binary charset', that is, != 0.
+
+For binary string types and for DATA_CHAR, DATA_VARCHAR, and for those
+DATA_BLOB which are binary or have the charset-collation latin1_swedish_ci,
+InnoDB performs all comparisons internally, without resorting to the MySQL
+comparison functions. This is to save CPU time.
+
+InnoDB's own internal system tables have different precise types for their
+columns, and for them the precise type is usually not used at all.
+*/
+
+#define DATA_ENGLISH    4       /* English language character string: this
+				is a relic from pre-MySQL time and only used
+				for InnoDB's own system tables */
+#define DATA_ERROR	111	/* another relic from pre-MySQL time */
+
+#define DATA_MYSQL_TYPE_MASK 255 /* AND with this mask to extract the MySQL
+				 type from the precise type */
+#define DATA_MYSQL_TRUE_VARCHAR 15 /* MySQL type code for the >= 5.0.3
+				   format true VARCHAR */
+
+/* Precise data types for system columns and the length of those columns;
+NOTE: the values must run from 0 up in the order given! All codes must
+be less than 256 */
+#define	DATA_ROW_ID	0	/* row id: a dulint */
+#define DATA_ROW_ID_LEN	6	/* stored length for row id */
+
+#define DATA_TRX_ID	1	/* transaction id: 6 bytes */
+#define DATA_TRX_ID_LEN	6
+
+#define	DATA_ROLL_PTR	2	/* rollback data pointer: 7 bytes */
+#define DATA_ROLL_PTR_LEN 7
+
+#define DATA_MIX_ID	3	/* mixed index label: a dulint, stored in
+				a row in a compressed form */
+#define DATA_MIX_ID_LEN	9	/* maximum stored length for mix id (in a
+				compressed dulint form) */
+#define	DATA_N_SYS_COLS 4 	/* number of system columns defined above */
+
+/* Flags ORed to the precise data type */
+#define DATA_NOT_NULL	256	/* this is ORed to the precise type when
+				the column is declared as NOT NULL */
+#define DATA_UNSIGNED	512	/* this id ORed to the precise type when
+				we have an unsigned integer type */
+#define	DATA_BINARY_TYPE 1024	/* if the data type is a binary character
+				string, this is ORed to the precise type:
+				this only holds for tables created with
+				>= MySQL-4.0.14 */
+/* #define	DATA_NONLATIN1	2048 This is a relic from < 4.1.2 and < 5.0.1.
+				In earlier versions this was set for some
+				BLOB columns.
+*/
+#define	DATA_LONG_TRUE_VARCHAR 4096	/* this is ORed to the precise data
+				type when the column is true VARCHAR where
+				MySQL uses 2 bytes to store the data len;
+				for shorter VARCHARs MySQL uses only 1 byte */
+/*-------------------------------------------*/
+
+/* This many bytes we need to store the type information affecting the
+alphabetical order for a single field and decide the storage size of an
+SQL null*/
+#define DATA_ORDER_NULL_TYPE_BUF_SIZE		4
+/* In the >= 4.1.x storage format we add 2 bytes more so that we can also
+store the charset-collation number; one byte is left unused, though */
+#define DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE	6
+
+/*************************************************************************
+Gets the MySQL type code from a dtype. */
+UNIV_INLINE
+ulint
+dtype_get_mysql_type(
+/*=================*/
+				/* out: MySQL type code; this is NOT an InnoDB
+				type code! */
+	dtype_t*	type);	/* in: type struct */
+/*************************************************************************
+Determine how many bytes the first n characters of the given string occupy.
+If the string is shorter than n characters, returns the number of bytes
+the characters in the string occupy. */
+
+ulint
+dtype_get_at_most_n_mbchars(
+/*========================*/
+					/* out: length of the prefix,
+					in bytes */
+	const dtype_t*	dtype,		/* in: data type */
+	ulint		prefix_len,	/* in: length of the requested
+					prefix, in characters, multiplied by
+					dtype_get_mbmaxlen(dtype) */
+	ulint		data_len,	/* in: length of str (in bytes) */
+	const char*	str);		/* in: the string whose prefix
+					length is being determined */
+/*************************************************************************
+Checks if a data main type is a string type. Also a BLOB is considered a
+string type. */
+
+ibool
+dtype_is_string_type(
+/*=================*/
+			/* out: TRUE if string type */
+	ulint	mtype);	/* in: InnoDB main data type code: DATA_CHAR, ... */
+/*************************************************************************
+Checks if a type is a binary string type. Note that for tables created with
+< 4.0.14, we do not know if a DATA_BLOB column is a BLOB or a TEXT column. For
+those DATA_BLOB columns this function currently returns FALSE. */
+
+ibool
+dtype_is_binary_string_type(
+/*========================*/
+			/* out: TRUE if binary string type */
+	ulint	mtype,	/* in: main data type */
+	ulint	prtype);/* in: precise type */
+/*************************************************************************
+Checks if a type is a non-binary string type. That is, dtype_is_string_type is
+TRUE and dtype_is_binary_string_type is FALSE. Note that for tables created
+with < 4.0.14, we do not know if a DATA_BLOB column is a BLOB or a TEXT column.
+For those DATA_BLOB columns this function currently returns TRUE. */
+
+ibool
+dtype_is_non_binary_string_type(
+/*============================*/
+			/* out: TRUE if non-binary string type */
+	ulint	mtype,	/* in: main data type */
+	ulint	prtype);/* in: precise type */
+/*************************************************************************
+Sets a data type structure. */
+UNIV_INLINE
+void
+dtype_set(
+/*======*/
+	dtype_t*	type,	/* in: type struct to init */
+	ulint		mtype,	/* in: main data type */
+	ulint		prtype,	/* in: precise type */
+	ulint		len,	/* in: length of type */
+	ulint		prec);	/* in: precision of type */
+/*************************************************************************
+Copies a data type structure. */
+UNIV_INLINE
+void
+dtype_copy(
+/*=======*/
+	dtype_t*	type1,	/* in: type struct to copy to */
+	dtype_t*	type2);	/* in: type struct to copy from */
+/*************************************************************************
+Gets the SQL main data type. */
+UNIV_INLINE
+ulint
+dtype_get_mtype(
+/*============*/
+	dtype_t*	type);
+/*************************************************************************
+Gets the precise data type. */
+UNIV_INLINE
+ulint
+dtype_get_prtype(
+/*=============*/
+	dtype_t*	type);
+/*************************************************************************
+Gets the MySQL charset-collation code for MySQL string types. */
+
+ulint
+dtype_get_charset_coll_noninline(
+/*=============================*/
+	ulint	prtype);/* in: precise data type */
+/*************************************************************************
+Gets the MySQL charset-collation code for MySQL string types. */
+UNIV_INLINE
+ulint
+dtype_get_charset_coll(
+/*===================*/
+	ulint	prtype);/* in: precise data type */
+/*************************************************************************
+Forms a precise type from the < 4.1.2 format precise type plus the
+charset-collation code. */
+
+ulint
+dtype_form_prtype(
+/*==============*/
+	ulint	old_prtype,	/* in: the MySQL type code and the flags
+				DATA_BINARY_TYPE etc. */
+	ulint	charset_coll);	/* in: MySQL charset-collation code */
+/*************************************************************************
+Gets the type length. */
+UNIV_INLINE
+ulint
+dtype_get_len(
+/*==========*/
+	dtype_t*	type);
+/*************************************************************************
+Gets the type precision. */
+UNIV_INLINE
+ulint
+dtype_get_prec(
+/*===========*/
+	dtype_t*	type);
+/*************************************************************************
+Gets the minimum length of a character, in bytes. */
+UNIV_INLINE
+ulint
+dtype_get_mbminlen(
+/*===============*/
+				/* out: minimum length of a char, in bytes,
+				or 0 if this is not a character type */
+	const dtype_t*	type);	/* in: type */
+/*************************************************************************
+Gets the maximum length of a character, in bytes. */
+UNIV_INLINE
+ulint
+dtype_get_mbmaxlen(
+/*===============*/
+				/* out: maximum length of a char, in bytes,
+				or 0 if this is not a character type */
+	const dtype_t*	type);	/* in: type */
+/*************************************************************************
+Gets the padding character code for the type. */
+UNIV_INLINE
+ulint
+dtype_get_pad_char(
+/*===============*/
+				/* out: padding character code, or
+				ULINT_UNDEFINED if no padding specified */
+	dtype_t*	type);	/* in: type */
+/***************************************************************************
+Returns the size of a fixed size data type, 0 if not a fixed size type. */
+UNIV_INLINE
+ulint
+dtype_get_fixed_size(
+/*=================*/
+				/* out: fixed size, or 0 */
+	dtype_t*	type);	/* in: type */
+/***************************************************************************
+Returns the minimum size of a data type. */
+UNIV_INLINE
+ulint
+dtype_get_min_size(
+/*===============*/
+				/* out: minimum size */
+	const dtype_t*	type);	/* in: type */
+/***************************************************************************
+Returns a stored SQL NULL size for a type. For fixed length types it is
+the fixed length of the type, otherwise 0. */
+UNIV_INLINE
+ulint
+dtype_get_sql_null_size(
+/*====================*/
+				/* out: SQL null storage size */
+	dtype_t*	type);	/* in: type */
+/***************************************************************************
+Returns TRUE if a type is of a fixed size. */
+UNIV_INLINE
+ibool
+dtype_is_fixed_size(
+/*================*/
+				/* out: TRUE if fixed size */
+	dtype_t*	type);	/* in: type */
+/**************************************************************************
+Reads to a type the stored information which determines its alphabetical
+ordering and the storage size of an SQL NULL value. */
+UNIV_INLINE
+void
+dtype_read_for_order_and_null_size(
+/*===============================*/
+	dtype_t*	type,	/* in: type struct */
+	byte*		buf);	/* in: buffer for the stored order info */
+/**************************************************************************
+Stores for a type the information which determines its alphabetical ordering
+and the storage size of an SQL NULL value. This is the >= 4.1.x storage
+format. */
+UNIV_INLINE
+void
+dtype_new_store_for_order_and_null_size(
+/*====================================*/
+	byte*		buf,	/* in: buffer for
+				DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE
+				bytes where we store the info */
+	dtype_t*	type);	/* in: type struct */
+/**************************************************************************
+Reads to a type the stored information which determines its alphabetical
+ordering and the storage size of an SQL NULL value. This is the 4.1.x storage
+format. */
+UNIV_INLINE
+void
+dtype_new_read_for_order_and_null_size(
+/*===================================*/
+	dtype_t*	type,	/* in: type struct */
+	byte*		buf);	/* in: buffer for stored type order info */
+
+/*************************************************************************
+Validates a data type structure. */
+
+ibool
+dtype_validate(
+/*===========*/
+				/* out: TRUE if ok */
+	dtype_t*	type);	/* in: type struct to validate */
+/*************************************************************************
+Prints a data type structure. */
+
+void
+dtype_print(
+/*========*/
+	dtype_t*	type);	/* in: type */
+
+/* Structure for an SQL data type.
+If you add fields to this structure, be sure to initialize them everywhere.
+This structure is initialized in the following functions:
+dtype_set()
+dtype_read_for_order_and_null_size()
+dtype_new_read_for_order_and_null_size()
+sym_tab_add_null_lit() */
+
+struct dtype_struct{
+	ulint	mtype;		/* main data type */
+	ulint	prtype;		/* precise type; MySQL data type, charset code,
+				flags to indicate nullability, signedness,
+				whether this is a binary string, whether this
+				is a true VARCHAR where MySQL uses 2 bytes to
+				store the length */
+
+	/* the remaining fields do not affect alphabetical ordering: */
+
+	ulint	len;		/* length; for MySQL data this is
+				field->pack_length(), except that for a
+				>= 5.0.3 type true VARCHAR this is the
+				maximum byte length of the string data
+				(in addition to the string, MySQL uses 1 or
+				2 bytes to store the string length) */
+	ulint	prec;		/* precision */
+
+	ulint	mbminlen;	/* minimum length of a character, in bytes */
+	ulint	mbmaxlen;	/* maximum length of a character, in bytes */
+};
+
+#ifndef UNIV_NONINL
+#include "data0type.ic"
+#endif
+
+#endif
--- a/include/data0type.ic
+++ b/include/data0type.ic
@ -0,0 +1,512 @@
+/******************************************************
+Data types
+
+(c) 1996 Innobase Oy
+
+Created 1/16/1996 Heikki Tuuri
+*******************************************************/
+
+#include "mach0data.h"
+
+/**********************************************************************
+Get the variable length bounds of the given character set.
+
+NOTE: the prototype of this function is copied from ha_innodb.cc! If you change
+this function, you MUST change also the prototype here! */
+extern
+void
+innobase_get_cset_width(
+/*====================*/
+	ulint	cset,		/* in: MySQL charset-collation code */
+	ulint*	mbminlen,	/* out: minimum length of a char (in bytes) */
+	ulint*	mbmaxlen);	/* out: maximum length of a char (in bytes) */
+
+/*************************************************************************
+Gets the MySQL charset-collation code for MySQL string types. */
+UNIV_INLINE
+ulint
+dtype_get_charset_coll(
+/*===================*/
+	ulint	prtype)	/* in: precise data type */
+{
+	return((prtype >> 16) & 0xFFUL);
+}
+
+/*************************************************************************
+Gets the MySQL type code from a dtype. */
+UNIV_INLINE
+ulint
+dtype_get_mysql_type(
+/*=================*/
+				/* out: MySQL type code; this is NOT an InnoDB
+				type code! */
+	dtype_t*	type)	/* in: type struct */
+{
+	return(type->prtype & 0xFFUL);
+}
+
+/*************************************************************************
+Sets the mbminlen and mbmaxlen members of a data type structure. */
+UNIV_INLINE
+void
+dtype_set_mblen(
+/*============*/
+	dtype_t*	type)	/* in/out: type struct */
+{
+	ut_ad(type);
+	if (dtype_is_string_type(type->mtype)) {
+		innobase_get_cset_width(dtype_get_charset_coll(type->prtype),
+				&type->mbminlen, &type->mbmaxlen);
+		ut_ad(type->mbminlen <= type->mbmaxlen);
+	} else {
+		type->mbminlen = type->mbmaxlen = 0;
+	}
+}
+
+/*************************************************************************
+Sets a data type structure. */
+UNIV_INLINE
+void
+dtype_set(
+/*======*/
+	dtype_t*	type,	/* in: type struct to init */
+	ulint		mtype,	/* in: main data type */
+	ulint		prtype,	/* in: precise type */
+	ulint		len,	/* in: length of type */
+	ulint		prec)	/* in: precision of type */
+{
+	ut_ad(type);
+	ut_ad(mtype <= DATA_MTYPE_MAX);
+	
+	type->mtype = mtype;
+	type->prtype = prtype;
+	type->len = len;
+	type->prec = prec;
+
+	dtype_set_mblen(type);
+	ut_ad(dtype_validate(type));
+}
+
+/*************************************************************************
+Copies a data type structure. */
+UNIV_INLINE
+void
+dtype_copy(
+/*=======*/
+	dtype_t*	type1,	/* in: type struct to copy to */
+	dtype_t*	type2)	/* in: type struct to copy from */
+{
+	*type1 = *type2;
+
+	ut_ad(dtype_validate(type1));
+}
+
+/*************************************************************************
+Gets the SQL main data type. */
+UNIV_INLINE
+ulint
+dtype_get_mtype(
+/*============*/
+	dtype_t*	type)
+{
+	ut_ad(type);
+
+	return(type->mtype);
+}
+
+/*************************************************************************
+Gets the precise data type. */
+UNIV_INLINE
+ulint
+dtype_get_prtype(
+/*=============*/
+	dtype_t*	type)
+{
+	ut_ad(type);
+
+	return(type->prtype);
+}
+
+/*************************************************************************
+Gets the type length. */
+UNIV_INLINE
+ulint
+dtype_get_len(
+/*==========*/
+	dtype_t*	type)
+{
+	ut_ad(type);
+
+	return(type->len);
+}
+
+/*************************************************************************
+Gets the type precision. */
+UNIV_INLINE
+ulint
+dtype_get_prec(
+/*===========*/
+	dtype_t*	type)
+{
+	ut_ad(type);
+
+	return(type->prec);
+}
+
+/*************************************************************************
+Gets the minimum length of a character, in bytes. */
+UNIV_INLINE
+ulint
+dtype_get_mbminlen(
+/*===============*/
+				/* out: minimum length of a char, in bytes,
+				or 0 if this is not a character type */
+	const dtype_t*	type)	/* in: type */
+{
+	ut_ad(type);
+	return(type->mbminlen);
+}
+/*************************************************************************
+Gets the maximum length of a character, in bytes. */
+UNIV_INLINE
+ulint
+dtype_get_mbmaxlen(
+/*===============*/
+				/* out: maximum length of a char, in bytes,
+				or 0 if this is not a character type */
+	const dtype_t*	type)	/* in: type */
+{
+	ut_ad(type);
+	return(type->mbmaxlen);
+}
+
+/*************************************************************************
+Gets the padding character code for the type. */
+UNIV_INLINE
+ulint
+dtype_get_pad_char(
+/*===============*/
+				/* out: padding character code, or
+				ULINT_UNDEFINED if no padding specified */
+	dtype_t*	type)	/* in: type */
+{
+	if (type->mtype == DATA_CHAR
+	    || type->mtype == DATA_VARCHAR
+	    || type->mtype == DATA_BINARY
+	    || type->mtype == DATA_FIXBINARY
+	    || type->mtype == DATA_MYSQL
+	    || type->mtype == DATA_VARMYSQL
+	    || (type->mtype == DATA_BLOB
+		&& (type->prtype & DATA_BINARY_TYPE) == 0)) {
+
+		/* Space is the padding character for all char and binary
+	        strings, and starting from 5.0.3, also for TEXT strings. */
+
+		return((ulint)' ');
+	}
+
+	/* No padding specified */
+
+	return(ULINT_UNDEFINED);
+}
+
+/**************************************************************************
+Stores for a type the information which determines its alphabetical ordering
+and the storage size of an SQL NULL value. This is the >= 4.1.x storage
+format. */
+UNIV_INLINE
+void
+dtype_new_store_for_order_and_null_size(
+/*====================================*/
+	byte*		buf,	/* in: buffer for
+				DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE
+				bytes where we store the info */
+	dtype_t*	type)	/* in: type struct */
+{
+#if 6 != DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE
+#error "6 != DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE"
+#endif
+
+	buf[0] = (byte)(type->mtype & 0xFFUL);
+
+	if (type->prtype & DATA_BINARY_TYPE) {
+		buf[0] = buf[0] | 128;
+	}
+
+	/* In versions < 4.1.2 we had: 	if (type->prtype & DATA_NONLATIN1) {
+						buf[0] = buf[0] | 64;
+					}
+	*/
+
+	buf[1] = (byte)(type->prtype & 0xFFUL);
+
+	mach_write_to_2(buf + 2, type->len & 0xFFFFUL);
+
+	ut_ad(dtype_get_charset_coll(type->prtype) < 256);
+	mach_write_to_2(buf + 4, dtype_get_charset_coll(type->prtype));
+
+	if (type->prtype & DATA_NOT_NULL) {
+		buf[4] |= 128;
+	}
+}
+
+/**************************************************************************
+Reads to a type the stored information which determines its alphabetical
+ordering and the storage size of an SQL NULL value. This is the < 4.1.x
+storage format. */
+UNIV_INLINE
+void
+dtype_read_for_order_and_null_size(
+/*===============================*/
+	dtype_t*	type,	/* in: type struct */
+	byte*		buf)	/* in: buffer for stored type order info */
+{
+	ut_ad(4 == DATA_ORDER_NULL_TYPE_BUF_SIZE);
+	
+	type->mtype = buf[0] & 63;
+	type->prtype = buf[1];
+
+	if (buf[0] & 128) {
+	        type->prtype = type->prtype | DATA_BINARY_TYPE;
+	}
+
+	type->len = mach_read_from_2(buf + 2);
+		
+	type->prtype = dtype_form_prtype(type->prtype,
+					data_mysql_default_charset_coll);
+	dtype_set_mblen(type);
+}	
+
+/**************************************************************************
+Reads to a type the stored information which determines its alphabetical
+ordering and the storage size of an SQL NULL value. This is the >= 4.1.x
+storage format. */
+UNIV_INLINE
+void
+dtype_new_read_for_order_and_null_size(
+/*===================================*/
+	dtype_t*	type,	/* in: type struct */
+	byte*		buf)	/* in: buffer for stored type order info */
+{
+	ulint	charset_coll;
+
+#if 6 != DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE
+#error "6 != DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE"
+#endif
+	
+	type->mtype = buf[0] & 63;
+	type->prtype = buf[1];
+
+	if (buf[0] & 128) {
+		type->prtype |= DATA_BINARY_TYPE;
+	}
+
+	if (buf[4] & 128) {
+		type->prtype |= DATA_NOT_NULL;
+	}
+
+	type->len = mach_read_from_2(buf + 2);
+
+	mach_read_from_2(buf + 4);
+
+	charset_coll = mach_read_from_2(buf + 4) & 0x7fff;
+
+	if (dtype_is_string_type(type->mtype)) {
+		ut_a(charset_coll < 256);
+
+		if (charset_coll == 0) {
+			/* This insert buffer record was inserted with MySQL
+			version < 4.1.2, and the charset-collation code was not
+			explicitly stored to dtype->prtype at that time. It
+			must be the default charset-collation of this MySQL
+			installation. */
+
+			charset_coll = data_mysql_default_charset_coll;
+		}
+		
+		type->prtype = dtype_form_prtype(type->prtype, charset_coll);
+	}						
+	dtype_set_mblen(type);
+}
+
+#ifndef UNIV_HOTBACKUP
+/***************************************************************************
+Returns the size of a fixed size data type, 0 if not a fixed size type. */
+UNIV_INLINE
+ulint
+dtype_get_fixed_size(
+/*=================*/
+				/* out: fixed size, or 0 */
+	dtype_t*	type)	/* in: type */
+{
+	ulint	mtype;
+
+	mtype = dtype_get_mtype(type);
+
+	switch (mtype) {
+	case DATA_SYS:
+#ifdef UNIV_DEBUG
+			switch (type->prtype & DATA_MYSQL_TYPE_MASK) {
+			default:
+				ut_ad(0);
+				return(0);
+			case DATA_ROW_ID:
+				ut_ad(type->len == DATA_ROW_ID_LEN);
+				break;
+			case DATA_TRX_ID:
+				ut_ad(type->len == DATA_TRX_ID_LEN);
+				break;
+			case DATA_ROLL_PTR:
+				ut_ad(type->len == DATA_ROLL_PTR_LEN);
+				break;
+			case DATA_MIX_ID:
+				ut_ad(type->len == DATA_MIX_ID_LEN);
+				break;
+			}
+#endif /* UNIV_DEBUG */
+	case DATA_CHAR:
+	case DATA_FIXBINARY:
+	case DATA_INT:
+	case DATA_FLOAT:
+	case DATA_DOUBLE:
+			return(dtype_get_len(type));
+	case DATA_MYSQL:
+			if (type->prtype & DATA_BINARY_TYPE) {
+				return(dtype_get_len(type));
+			} else {
+				/* We play it safe here and ask MySQL for
+				mbminlen and mbmaxlen.  Although
+				type->mbminlen and type->mbmaxlen are
+				initialized if and only if type->prtype
+				is (in one of the 3 functions in this file),
+				it could be that none of these functions
+				has been called. */
+
+				ulint	mbminlen, mbmaxlen;
+
+				innobase_get_cset_width(
+					dtype_get_charset_coll(type->prtype),
+					&mbminlen, &mbmaxlen);
+
+				if (UNIV_UNLIKELY(type->mbminlen != mbminlen)
+				|| UNIV_UNLIKELY(type->mbmaxlen != mbmaxlen)) {
+
+					ut_print_timestamp(stderr);
+					fprintf(stderr, "  InnoDB: "
+						"mbminlen=%lu, "
+						"mbmaxlen=%lu, "
+						"type->mbminlen=%lu, "
+						"type->mbmaxlen=%lu\n",
+						(ulong) mbminlen,
+						(ulong) mbmaxlen,
+						(ulong) type->mbminlen,
+						(ulong) type->mbmaxlen);
+				}
+				if (mbminlen == mbmaxlen) {
+					return(dtype_get_len(type));
+				}
+			}
+			/* fall through for variable-length charsets */
+	case DATA_VARCHAR:
+	case DATA_BINARY:
+	case DATA_DECIMAL:
+	case DATA_VARMYSQL:
+	case DATA_BLOB:
+			return(0); 
+	default:	ut_error;
+	}
+
+	return(0);
+}
+
+/***************************************************************************
+Returns the minimum size of a data type. */
+UNIV_INLINE
+ulint
+dtype_get_min_size(
+/*===============*/
+				/* out: minimum size */
+	const dtype_t*	type)	/* in: type */
+{
+	switch (type->mtype) {
+	case DATA_SYS:
+#ifdef UNIV_DEBUG
+			switch (type->prtype & DATA_MYSQL_TYPE_MASK) {
+			default:
+				ut_ad(0);
+				return(0);
+			case DATA_ROW_ID:
+				ut_ad(type->len == DATA_ROW_ID_LEN);
+				break;
+			case DATA_TRX_ID:
+				ut_ad(type->len == DATA_TRX_ID_LEN);
+				break;
+			case DATA_ROLL_PTR:
+				ut_ad(type->len == DATA_ROLL_PTR_LEN);
+				break;
+			case DATA_MIX_ID:
+				ut_ad(type->len == DATA_MIX_ID_LEN);
+				break;
+			}
+#endif /* UNIV_DEBUG */
+	case DATA_CHAR:
+	case DATA_FIXBINARY:
+	case DATA_INT:
+	case DATA_FLOAT:
+	case DATA_DOUBLE:
+			return(type->len);
+	case DATA_MYSQL:
+			if ((type->prtype & DATA_BINARY_TYPE)
+					|| type->mbminlen == type->mbmaxlen) {
+				return(type->len);
+			}
+			/* this is a variable-length character set */
+			ut_a(type->mbminlen > 0);
+			ut_a(type->mbmaxlen > type->mbminlen);
+			ut_a(type->len % type->mbmaxlen == 0);
+			return(type->len * type->mbminlen / type->mbmaxlen);
+	case DATA_VARCHAR:
+	case DATA_BINARY:
+	case DATA_DECIMAL:
+	case DATA_VARMYSQL:
+	case DATA_BLOB:
+			return(0); 
+	default:	ut_error;
+	}
+
+	return(0);
+}
+#endif /* !UNIV_HOTBACKUP */
+
+/***************************************************************************
+Returns a stored SQL NULL size for a type. For fixed length types it is
+the fixed length of the type, otherwise 0. */
+UNIV_INLINE
+ulint
+dtype_get_sql_null_size(
+/*====================*/
+				/* out: SQL null storage size */
+	dtype_t*	type)	/* in: type */
+{
+	return(dtype_get_fixed_size(type));
+}
+
+/***************************************************************************
+Returns TRUE if a type is of a fixed size. */
+UNIV_INLINE
+ibool
+dtype_is_fixed_size(
+/*================*/
+				/* out: TRUE if fixed size */
+	dtype_t*	type)	/* in: type */
+{
+	ulint	size;
+
+	size = dtype_get_fixed_size(type);
+
+	if (size) {
+		return(TRUE);
+	}
+
+	return(FALSE);
+}
--- a/include/data0types.h
+++ b/include/data0types.h
@ -0,0 +1,19 @@
+/************************************************************************
+Some type definitions
+
+(c) 1994-2000 Innobase Oy
+
+Created 9/21/2000 Heikki Tuuri
+*************************************************************************/
+
+#ifndef data0types_h
+#define data0types_h
+
+/* SQL data field struct */
+typedef struct dfield_struct	dfield_t;
+
+/* SQL data tuple struct */
+typedef struct dtuple_struct	dtuple_t;
+
+#endif
+
--- a/include/db0err.h
+++ b/include/db0err.h
@ -0,0 +1,69 @@
+/******************************************************
+Global error codes for the database
+
+(c) 1996 Innobase Oy
+
+Created 5/24/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef db0err_h
+#define db0err_h
+
+
+#define DB_SUCCESS		10
+
+/* The following are error codes */
+#define	DB_ERROR		11
+#define DB_OUT_OF_MEMORY	12
+#define DB_OUT_OF_FILE_SPACE	13
+#define DB_LOCK_WAIT		14
+#define DB_DEADLOCK		15
+#define DB_ROLLBACK		16
+#define DB_DUPLICATE_KEY	17
+#define DB_QUE_THR_SUSPENDED	18
+#define DB_MISSING_HISTORY	19	/* required history data has been
+					deleted due to lack of space in
+					rollback segment */
+#define DB_CLUSTER_NOT_FOUND	30
+#define DB_TABLE_NOT_FOUND	31
+#define DB_MUST_GET_MORE_FILE_SPACE 32	/* the database has to be stopped
+					and restarted with more file space */
+#define DB_TABLE_IS_BEING_USED	33
+#define DB_TOO_BIG_RECORD	34	/* a record in an index would become
+					bigger than 1/2 free space in a page
+					frame */
+#define DB_LOCK_WAIT_TIMEOUT	35	/* lock wait lasted too long */
+#define DB_NO_REFERENCED_ROW	36	/* referenced key value not found
+					for a foreign key in an insert or
+					update of a row */
+#define DB_ROW_IS_REFERENCED	37	/* cannot delete or update a row
+					because it contains a key value
+					which is referenced */
+#define DB_CANNOT_ADD_CONSTRAINT 38	/* adding a foreign key constraint
+					to a table failed */
+#define DB_CORRUPTION		39	/* data structure corruption noticed */
+#define DB_COL_APPEARS_TWICE_IN_INDEX 40 /* InnoDB cannot handle an index
+					    where same column appears twice */
+#define DB_CANNOT_DROP_CONSTRAINT 41	/* dropping a foreign key constraint
+					from a table failed */
+#define DB_NO_SAVEPOINT		42	/* no savepoint exists with the given
+					name */
+#define	DB_TABLESPACE_ALREADY_EXISTS 43 /* we cannot create a new single-table
+				        tablespace because a file of the same
+					name already exists */
+#define DB_TABLESPACE_DELETED	44	/* tablespace does not exist or is
+					being dropped right now */
+#define	DB_LOCK_TABLE_FULL	45	/* lock structs have exhausted the
+					buffer pool (for big transactions,
+					InnoDB stores the lock structs in the
+					buffer pool) */
+
+/* The following are partial failure codes */
+#define DB_FAIL 		1000
+#define DB_OVERFLOW 		1001
+#define DB_UNDERFLOW 		1002
+#define DB_STRONG_FAIL		1003
+#define DB_RECORD_NOT_FOUND	1500
+#define DB_END_OF_INDEX		1501
+
+#endif 
--- a/include/dict0boot.h
+++ b/include/dict0boot.h
@ -0,0 +1,133 @@
+/******************************************************
+Data dictionary creation and booting
+
+(c) 1996 Innobase Oy
+
+Created 4/18/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef dict0boot_h
+#define dict0boot_h
+
+#include "univ.i"
+
+#include "mtr0mtr.h"
+#include "mtr0log.h"
+#include "ut0byte.h"
+#include "buf0buf.h"
+#include "fsp0fsp.h"
+#include "dict0dict.h"
+
+typedef	byte	dict_hdr_t;
+
+/**************************************************************************
+Gets a pointer to the dictionary header and x-latches its page. */
+
+dict_hdr_t*
+dict_hdr_get(
+/*=========*/
+			/* out: pointer to the dictionary header, 
+			page x-latched */
+	mtr_t*	mtr);	/* in: mtr */
+/**************************************************************************
+Returns a new row, table, index, or tree id. */
+
+dulint
+dict_hdr_get_new_id(
+/*================*/
+			/* out: the new id */
+	ulint	type);	/* in: DICT_HDR_ROW_ID, ... */
+/**************************************************************************
+Returns a new row id. */
+UNIV_INLINE
+dulint
+dict_sys_get_new_row_id(void);
+/*=========================*/
+			/* out: the new id */
+/**************************************************************************
+Reads a row id from a record or other 6-byte stored form. */
+UNIV_INLINE
+dulint
+dict_sys_read_row_id(
+/*=================*/
+			/* out: row id */
+	byte*	field);	/* in: record field */
+/**************************************************************************
+Writes a row id to a record or other 6-byte stored form. */
+UNIV_INLINE
+void
+dict_sys_write_row_id(
+/*==================*/
+	byte*	field,	/* in: record field */
+	dulint	row_id);/* in: row id */
+/*********************************************************************
+Initializes the data dictionary memory structures when the database is
+started. This function is also called when the data dictionary is created. */
+
+void
+dict_boot(void);
+/*===========*/
+/*********************************************************************
+Creates and initializes the data dictionary at the database creation. */
+
+void
+dict_create(void);
+/*=============*/
+
+
+/* Space id and page no where the dictionary header resides */
+#define	DICT_HDR_SPACE		0	/* the SYSTEM tablespace */
+#define	DICT_HDR_PAGE_NO	FSP_DICT_HDR_PAGE_NO
+
+/* The ids for the basic system tables and their indexes */
+#define DICT_TABLES_ID		ut_dulint_create(0, 1)
+#define DICT_COLUMNS_ID		ut_dulint_create(0, 2)
+#define DICT_INDEXES_ID		ut_dulint_create(0, 3)
+#define DICT_FIELDS_ID		ut_dulint_create(0, 4)
+/* The following is a secondary index on SYS_TABLES */
+#define DICT_TABLE_IDS_ID	ut_dulint_create(0, 5)
+
+#define	DICT_HDR_FIRST_ID	10	/* the ids for tables etc. start
+					from this number, except for basic
+					system tables and their above defined
+					indexes; ibuf tables and indexes are
+					assigned as the id the number
+					DICT_IBUF_ID_MIN plus the space id */
+#define DICT_IBUF_ID_MIN	ut_dulint_create(0xFFFFFFFFUL, 0)
+					
+/* The offset of the dictionary header on the page */
+#define	DICT_HDR		FSEG_PAGE_DATA
+
+/*-------------------------------------------------------------*/
+/* Dictionary header offsets */
+#define DICT_HDR_ROW_ID		0	/* The latest assigned row id */
+#define	DICT_HDR_TABLE_ID	8	/* The latest assigned table id */
+#define	DICT_HDR_INDEX_ID	16	/* The latest assigned index id */
+#define	DICT_HDR_MIX_ID		24	/* The latest assigned mix id */
+#define	DICT_HDR_TABLES		32	/* Root of the table index tree */
+#define	DICT_HDR_TABLE_IDS	36	/* Root of the table index tree */
+#define	DICT_HDR_COLUMNS	40	/* Root of the column index tree */
+#define	DICT_HDR_INDEXES	44	/* Root of the index index tree */
+#define	DICT_HDR_FIELDS		48	/* Root of the index field index tree */
+
+#define DICT_HDR_FSEG_HEADER	56	/* Segment header for the tablespace
+					segment into which the dictionary
+					header is created */
+/*-------------------------------------------------------------*/
+
+/* The field number of the page number field in the sys_indexes table
+clustered index */
+#define DICT_SYS_INDEXES_PAGE_NO_FIELD	 8
+#define DICT_SYS_INDEXES_SPACE_NO_FIELD	 7
+#define DICT_SYS_INDEXES_TYPE_FIELD	 6
+					
+/* When a row id which is zero modulo this number (which must be a power of
+two) is assigned, the field DICT_HDR_ROW_ID on the dictionary header page is
+updated */
+#define DICT_HDR_ROW_ID_WRITE_MARGIN	256
+
+#ifndef UNIV_NONINL
+#include "dict0boot.ic"
+#endif
+
+#endif 
--- a/include/dict0boot.ic
+++ b/include/dict0boot.ic
@ -0,0 +1,72 @@
+/******************************************************
+Data dictionary creation and booting
+
+(c) 1996 Innobase Oy
+
+Created 4/18/1996 Heikki Tuuri
+*******************************************************/
+
+/**************************************************************************
+Writes the current value of the row id counter to the dictionary header file
+page. */
+
+void
+dict_hdr_flush_row_id(void);
+/*=======================*/
+
+
+/**************************************************************************
+Returns a new row id. */
+UNIV_INLINE
+dulint
+dict_sys_get_new_row_id(void)
+/*=========================*/
+			/* out: the new id */
+{
+	dulint	id;
+
+	mutex_enter(&(dict_sys->mutex));
+
+	id = dict_sys->row_id;
+	
+	if (0 == (ut_dulint_get_low(id) % DICT_HDR_ROW_ID_WRITE_MARGIN)) {
+
+		dict_hdr_flush_row_id();
+	}
+
+	UT_DULINT_INC(dict_sys->row_id);
+
+	mutex_exit(&(dict_sys->mutex));
+
+	return(id);
+}			
+
+/**************************************************************************
+Reads a row id from a record or other 6-byte stored form. */
+UNIV_INLINE
+dulint
+dict_sys_read_row_id(
+/*=================*/
+			/* out: row id */
+	byte*	field)	/* in: record field */
+{
+	ut_ad(DATA_ROW_ID_LEN == 6);
+
+	return(mach_read_from_6(field));
+}				
+
+/**************************************************************************
+Writes a row id to a record or other 6-byte stored form. */
+UNIV_INLINE
+void
+dict_sys_write_row_id(
+/*==================*/
+	byte*	field,	/* in: record field */
+	dulint	row_id)	/* in: row id */
+{
+	ut_ad(DATA_ROW_ID_LEN == 6);
+
+	mach_write_to_6(field, row_id);
+}				
+
+
--- a/include/dict0crea.h
+++ b/include/dict0crea.h
@ -0,0 +1,177 @@
+/******************************************************
+Database object creation
+
+(c) 1996 Innobase Oy
+
+Created 1/8/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef dict0crea_h
+#define dict0crea_h
+
+#include "univ.i"
+#include "dict0types.h"
+#include "dict0dict.h"
+#include "que0types.h"
+#include "row0types.h"
+#include "mtr0mtr.h"
+					
+/*************************************************************************
+Creates a table create graph. */
+
+tab_node_t*
+tab_create_graph_create(
+/*====================*/
+				/* out, own: table create node */
+	dict_table_t*	table,	/* in: table to create, built as a memory data
+				structure */
+	mem_heap_t*	heap);	/* in: heap where created */
+/*************************************************************************
+Creates an index create graph. */
+
+ind_node_t*
+ind_create_graph_create(
+/*====================*/
+				/* out, own: index create node */
+	dict_index_t*	index,	/* in: index to create, built as a memory data
+				structure */
+	mem_heap_t*	heap);	/* in: heap where created */
+/***************************************************************
+Creates a table. This is a high-level function used in SQL execution graphs. */
+
+que_thr_t*
+dict_create_table_step(
+/*===================*/
+				/* out: query thread to run next or NULL */
+	que_thr_t*	thr);	/* in: query thread */
+/***************************************************************
+Creates an index. This is a high-level function used in SQL execution
+graphs. */
+
+que_thr_t*
+dict_create_index_step(
+/*===================*/
+				/* out: query thread to run next or NULL */
+	que_thr_t*	thr);	/* in: query thread */
+/***********************************************************************
+Truncates the index tree associated with a row in SYS_INDEXES table. */
+
+ulint
+dict_truncate_index_tree(
+/*=====================*/
+				/* out: new root page number, or
+				FIL_NULL on failure */
+	dict_table_t*	table,	/* in: the table the index belongs to */
+	rec_t*		rec,	/* in: record in the clustered index of
+				SYS_INDEXES table */
+	mtr_t*		mtr);	/* in: mtr having the latch
+				on the record page. The mtr may be
+				committed and restarted in this call. */
+/***********************************************************************
+Drops the index tree associated with a row in SYS_INDEXES table. */
+
+void
+dict_drop_index_tree(
+/*=================*/
+	rec_t*	rec,	/* in: record in the clustered index of SYS_INDEXES
+			table */
+	mtr_t*	mtr);	/* in: mtr having the latch on the record page */
+/********************************************************************
+Creates the foreign key constraints system tables inside InnoDB
+at database creation or database start if they are not found or are
+not of the right form. */
+
+ulint
+dict_create_or_check_foreign_constraint_tables(void);
+/*================================================*/
+				/* out: DB_SUCCESS or error code */
+/************************************************************************
+Adds foreign key definitions to data dictionary tables in the database. We
+look at table->foreign_list, and also generate names to constraints that were
+not named by the user. A generated constraint has a name of the format
+databasename/tablename_ibfk_<number>, where the numbers start from 1, and are
+given locally for this table, that is, the number is not global, as in the
+old format constraints < 4.0.18 it used to be. */
+
+ulint
+dict_create_add_foreigns_to_dictionary(
+/*===================================*/
+				/* out: error code or DB_SUCCESS */
+	ulint		start_id,/* in: if we are actually doing ALTER TABLE
+				ADD CONSTRAINT, we want to generate constraint
+				numbers which are bigger than in the table so
+				far; we number the constraints from
+				start_id + 1 up; start_id should be set to 0 if
+				we are creating a new table, or if the table
+				so far has no constraints for which the name
+				was generated here */
+	dict_table_t*	table,	/* in: table */
+	trx_t*		trx);	/* in: transaction */
+
+
+/* Table create node structure */
+
+struct tab_node_struct{
+	que_common_t	common;	/* node type: QUE_NODE_TABLE_CREATE */
+	dict_table_t*	table;	/* table to create, built as a memory data
+				structure with dict_mem_... functions */
+	ins_node_t*	tab_def; /* child node which does the insert of
+				the table definition; the row to be inserted
+				is built by the parent node  */
+	ins_node_t*	col_def; /* child node which does the inserts of
+				the column definitions; the row to be inserted
+				is built by the parent node  */
+	commit_node_t*	commit_node;
+				/* child node which performs a commit after
+				a successful table creation */
+	/*----------------------*/
+	/* Local storage for this graph node */
+	ulint		state;	/* node execution state */
+	ulint		col_no;	/* next column definition to insert */
+	mem_heap_t*	heap;	/* memory heap used as auxiliary storage */
+};
+
+/* Table create node states */
+#define	TABLE_BUILD_TABLE_DEF	1
+#define	TABLE_BUILD_COL_DEF	2
+#define	TABLE_COMMIT_WORK	3
+#define	TABLE_ADD_TO_CACHE	4
+#define	TABLE_COMPLETED		5
+
+/* Index create node struct */
+
+struct ind_node_struct{
+	que_common_t	common;	/* node type: QUE_NODE_INDEX_CREATE */
+	dict_index_t*	index;	/* index to create, built as a memory data
+				structure with dict_mem_... functions */
+	ins_node_t*	ind_def; /* child node which does the insert of
+				the index definition; the row to be inserted
+				is built by the parent node  */
+	ins_node_t*	field_def; /* child node which does the inserts of
+				the field definitions; the row to be inserted
+				is built by the parent node  */
+	commit_node_t*	commit_node;
+				/* child node which performs a commit after
+				a successful index creation */
+	/*----------------------*/
+	/* Local storage for this graph node */
+	ulint		state;	/* node execution state */
+	ulint		page_no;/* root page number of the index */
+	dict_table_t*	table;	/* table which owns the index */
+	dtuple_t*	ind_row;/* index definition row built */
+	ulint		field_no;/* next field definition to insert */
+	mem_heap_t*	heap;	/* memory heap used as auxiliary storage */
+};
+
+/* Index create node states */
+#define	INDEX_BUILD_INDEX_DEF	1
+#define	INDEX_BUILD_FIELD_DEF	2
+#define	INDEX_CREATE_INDEX_TREE	3
+#define	INDEX_COMMIT_WORK	4
+#define	INDEX_ADD_TO_CACHE	5
+
+#ifndef UNIV_NONINL
+#include "dict0crea.ic"
+#endif
+
+#endif
--- a/include/dict0crea.ic
+++ b/include/dict0crea.ic
@ -0,0 +1,8 @@
+/******************************************************
+Database object creation
+
+(c) 1996 Innobase Oy
+
+Created 1/8/1996 Heikki Tuuri
+*******************************************************/
+
--- a/include/dict0dict.h
+++ b/include/dict0dict.h
@ -0,0 +1,963 @@
+/******************************************************
+Data dictionary system
+
+(c) 1996 Innobase Oy
+
+Created 1/8/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef dict0dict_h
+#define dict0dict_h
+
+#include "univ.i"
+#include "dict0types.h"
+#include "dict0mem.h"
+#include "data0type.h"
+#include "data0data.h"
+#include "sync0sync.h"
+#include "sync0rw.h"
+#include "mem0mem.h"
+#include "rem0types.h"
+#include "btr0types.h"
+#include "ut0mem.h"
+#include "ut0lst.h"
+#include "hash0hash.h"
+#include "ut0rnd.h"
+#include "ut0byte.h"
+#include "trx0types.h"
+
+/**********************************************************************
+Makes all characters in a NUL-terminated UTF-8 string lower case. */
+
+void
+dict_casedn_str(
+/*============*/
+	char*	a);	/* in/out: string to put in lower case */
+/************************************************************************
+Get the database name length in a table name. */
+
+ulint
+dict_get_db_name_len(
+/*=================*/
+				/* out: database name length */
+	const char*	name);	/* in: table name in the form
+				dbname '/' tablename */
+/*************************************************************************
+Accepts a specified string. Comparisons are case-insensitive. */
+
+const char*
+dict_accept(
+/*========*/
+				/* out: if string was accepted, the pointer
+				is moved after that, else ptr is returned */
+	const char*	ptr,	/* in: scan from this */
+	const char*	string,	/* in: accept only this string as the next
+				non-whitespace string */
+	ibool*		success);/* out: TRUE if accepted */
+/************************************************************************
+Decrements the count of open MySQL handles to a table. */
+
+void
+dict_table_decrement_handle_count(
+/*==============================*/
+	dict_table_t*	table);	/* in: table */
+/**************************************************************************
+Inits the data dictionary module. */
+
+void
+dict_init(void);
+/*===========*/
+/************************************************************************
+Gets the space id of every table of the data dictionary and makes a linear
+list and a hash table of them to the data dictionary cache. This function
+can be called at database startup if we did not need to do a crash recovery.
+In crash recovery we must scan the space id's from the .ibd files in MySQL
+database directories. */
+
+void
+dict_load_space_id_list(void);
+/*=========================*/
+/*************************************************************************
+Gets the column data type. */
+UNIV_INLINE
+dtype_t*
+dict_col_get_type(
+/*==============*/
+	dict_col_t*	col);
+/*************************************************************************
+Gets the column number. */
+UNIV_INLINE
+ulint
+dict_col_get_no(
+/*============*/
+	dict_col_t*	col);
+/*************************************************************************
+Gets the column position in the clustered index. */
+UNIV_INLINE
+ulint
+dict_col_get_clust_pos(
+/*===================*/
+	dict_col_t*	col);
+/************************************************************************
+Initializes the autoinc counter. It is not an error to initialize an already
+initialized counter. */
+
+void
+dict_table_autoinc_initialize(
+/*==========================*/
+	dict_table_t*	table,	/* in: table */
+	ib_longlong	value);	/* in: next value to assign to a row */
+/************************************************************************
+Gets the next autoinc value (== autoinc counter value), 0 if not yet
+initialized. If initialized, increments the counter by 1. */
+
+ib_longlong
+dict_table_autoinc_get(
+/*===================*/
+				/* out: value for a new row, or 0 */
+	dict_table_t*	table);	/* in: table */
+/************************************************************************
+Decrements the autoinc counter value by 1. */
+
+void
+dict_table_autoinc_decrement(
+/*=========================*/
+	dict_table_t*	table);	/* in: table */
+/************************************************************************
+Reads the next autoinc value (== autoinc counter value), 0 if not yet
+initialized. */
+
+ib_longlong
+dict_table_autoinc_read(
+/*====================*/
+				/* out: value for a new row, or 0 */
+	dict_table_t*	table);	/* in: table */
+/************************************************************************
+Peeks the autoinc counter value, 0 if not yet initialized. Does not
+increment the counter. The read not protected by any mutex! */
+
+ib_longlong
+dict_table_autoinc_peek(
+/*====================*/
+				/* out: value of the counter */
+	dict_table_t*	table);	/* in: table */
+/************************************************************************
+Updates the autoinc counter if the value supplied is equal or bigger than the
+current value. If not inited, does nothing. */
+
+void
+dict_table_autoinc_update(
+/*======================*/
+
+	dict_table_t*	table,	/* in: table */
+	ib_longlong	value);	/* in: value which was assigned to a row */
+/**************************************************************************
+Adds a table object to the dictionary cache. */
+
+void
+dict_table_add_to_cache(
+/*====================*/
+	dict_table_t*	table);	/* in: table */
+/**************************************************************************
+Removes a table object from the dictionary cache. */
+
+void
+dict_table_remove_from_cache(
+/*=========================*/
+	dict_table_t*	table);	/* in, own: table */
+/**************************************************************************
+Renames a table object. */
+
+ibool
+dict_table_rename_in_cache(
+/*=======================*/
+					/* out: TRUE if success */
+	dict_table_t*	table,		/* in: table */
+	const char*	new_name,	/* in: new name */
+	ibool		rename_also_foreigns);/* in: in ALTER TABLE we want
+					to preserve the original table name
+					in constraints which reference it */
+/**************************************************************************
+Change the id of a table object in the dictionary cache. This is used in
+DISCARD TABLESPACE. */
+
+void
+dict_table_change_id_in_cache(
+/*==========================*/
+	dict_table_t*	table,	/* in: table object already in cache */
+	dulint		new_id);/* in: new id to set */
+/**************************************************************************
+Adds a foreign key constraint object to the dictionary cache. May free
+the object if there already is an object with the same identifier in.
+At least one of foreign table or referenced table must already be in
+the dictionary cache! */
+
+ulint
+dict_foreign_add_to_cache(
+/*======================*/
+					/* out: DB_SUCCESS or error code */
+	dict_foreign_t*	foreign,	/* in, own: foreign key constraint */
+	ibool		check_types);	/* in: TRUE=check type compatibility */
+/*************************************************************************
+Checks if a table is referenced by foreign keys. */
+
+ibool
+dict_table_referenced_by_foreign_key(
+/*=================================*/
+				/* out: TRUE if table is referenced by a
+				foreign key */
+	dict_table_t*	table);	/* in: InnoDB table */
+/*************************************************************************
+Scans a table create SQL string and adds to the data dictionary
+the foreign key constraints declared in the string. This function
+should be called after the indexes for a table have been created.
+Each foreign key constraint must be accompanied with indexes in
+bot participating tables. The indexes are allowed to contain more
+fields than mentioned in the constraint. */
+
+ulint
+dict_create_foreign_constraints(
+/*============================*/
+					/* out: error code or DB_SUCCESS */
+	trx_t*		trx,		/* in: transaction */
+	const char*	sql_string,	/* in: table create statement where
+					foreign keys are declared like:
+					FOREIGN KEY (a, b) REFERENCES
+					table2(c, d), table2 can be written
+					also with the database
+					name before it: test.table2; the
+					default database id the database of
+					parameter name */
+	const char*	name,		/* in: table full name in the
+					normalized form
+					database_name/table_name */
+	ibool		reject_fks);	/* in: if TRUE, fail with error
+					code DB_CANNOT_ADD_CONSTRAINT if
+					any foreign keys are found. */
+/**************************************************************************
+Parses the CONSTRAINT id's to be dropped in an ALTER TABLE statement. */
+
+ulint
+dict_foreign_parse_drop_constraints(
+/*================================*/
+						/* out: DB_SUCCESS or
+						DB_CANNOT_DROP_CONSTRAINT if
+						syntax error or the constraint
+						id does not match */
+	mem_heap_t*	heap,			/* in: heap from which we can
+						allocate memory */
+	trx_t*		trx,			/* in: transaction */
+	dict_table_t*	table,			/* in: table */
+	ulint*		n,			/* out: number of constraints
+						to drop */
+	const char***	constraints_to_drop);	/* out: id's of the
+						constraints to drop */
+/**************************************************************************
+Returns a table object and memoryfixes it. NOTE! This is a high-level
+function to be used mainly from outside the 'dict' directory. Inside this
+directory dict_table_get_low is usually the appropriate function. */
+
+dict_table_t*
+dict_table_get(
+/*===========*/
+					/* out: table, NULL if
+					does not exist */
+	const char*	table_name,	/* in: table name */
+	trx_t*		trx);		/* in: transaction handle */
+/**************************************************************************
+Returns a table object and increments MySQL open handle count on the table.
+*/
+
+dict_table_t*
+dict_table_get_and_increment_handle_count(
+/*======================================*/
+					/* out: table, NULL if
+					does not exist */
+	const char*	table_name,	/* in: table name */
+	trx_t*		trx);		/* in: transaction handle or NULL */
+/**************************************************************************
+Returns a table object, based on table id, and memoryfixes it. */
+
+dict_table_t*
+dict_table_get_on_id(
+/*=================*/
+				/* out: table, NULL if does not exist */
+	dulint	table_id,	/* in: table id */
+	trx_t*	trx);		/* in: transaction handle */
+/**************************************************************************
+Returns a table object, based on table id, and memoryfixes it. */
+UNIV_INLINE
+dict_table_t*
+dict_table_get_on_id_low(
+/*=====================*/
+				/* out: table, NULL if does not exist */
+	dulint	table_id,	/* in: table id */
+	trx_t*	trx);		/* in: transaction handle */
+/**************************************************************************
+Releases a table from being memoryfixed. Currently this has no relevance. */
+UNIV_INLINE
+void
+dict_table_release(
+/*===============*/
+	dict_table_t*	table);	/* in: table to be released */
+/**************************************************************************
+Checks if a table is in the dictionary cache. */
+UNIV_INLINE
+dict_table_t*
+dict_table_check_if_in_cache_low(
+/*==============================*/
+					/* out: table, NULL if not found */
+	const char*	table_name);	/* in: table name */
+/**************************************************************************
+Gets a table; loads it to the dictionary cache if necessary. A low-level
+function. */
+UNIV_INLINE
+dict_table_t*
+dict_table_get_low(
+/*===============*/
+					/* out: table, NULL if not found */
+	const char*	table_name);	/* in: table name */
+/**************************************************************************
+Returns an index object. */
+UNIV_INLINE
+dict_index_t*
+dict_table_get_index(
+/*=================*/
+				/* out: index, NULL if does not exist */
+	dict_table_t*	table,	/* in: table */
+	const char*	name);	/* in: index name */
+/**************************************************************************
+Returns an index object. */
+
+dict_index_t*
+dict_table_get_index_noninline(
+/*===========================*/
+				/* out: index, NULL if does not exist */
+	dict_table_t*	table,	/* in: table */
+	const char*	name);	/* in: index name */
+/**************************************************************************
+Prints a table definition. */
+
+void
+dict_table_print(
+/*=============*/
+	dict_table_t*	table);	/* in: table */
+/**************************************************************************
+Prints a table data. */
+
+void
+dict_table_print_low(
+/*=================*/
+	dict_table_t*	table);	/* in: table */
+/**************************************************************************
+Prints a table data when we know the table name. */
+
+void
+dict_table_print_by_name(
+/*=====================*/
+	const char*	name);
+/**************************************************************************
+Outputs info on foreign keys of a table. */
+
+void
+dict_print_info_on_foreign_keys(
+/*============================*/
+	ibool		create_table_format, /* in: if TRUE then print in
+				a format suitable to be inserted into
+				a CREATE TABLE, otherwise in the format
+				of SHOW TABLE STATUS */
+	FILE*		file,	/* in: file where to print */
+	trx_t*		trx,	/* in: transaction */
+	dict_table_t*	table);	/* in: table */
+/**************************************************************************
+Outputs info on a foreign key of a table in a format suitable for
+CREATE TABLE. */
+void
+dict_print_info_on_foreign_key_in_create_format(
+/*============================================*/
+	FILE*		file,		/* in: file where to print */
+	trx_t*		trx,		/* in: transaction */
+	dict_foreign_t*	foreign,	/* in: foreign key constraint */
+	ibool		add_newline);	/* in: whether to add a newline */
+/************************************************************************
+Displays the names of the index and the table. */
+void
+dict_index_name_print(
+/*==================*/
+	FILE*			file,	/* in: output stream */
+	trx_t*			trx,	/* in: transaction */
+	const dict_index_t*	index);	/* in: index to print */
+/************************************************************************
+Gets the first index on the table (the clustered index). */
+UNIV_INLINE
+dict_index_t*
+dict_table_get_first_index(
+/*=======================*/
+				/* out: index, NULL if none exists */
+	dict_table_t*	table);	/* in: table */
+/************************************************************************
+Gets the first index on the table (the clustered index). */
+
+dict_index_t*
+dict_table_get_first_index_noninline(
+/*=================================*/
+				/* out: index, NULL if none exists */
+	dict_table_t*	table);	/* in: table */
+/************************************************************************
+Gets the next index on the table. */
+UNIV_INLINE
+dict_index_t*
+dict_table_get_next_index(
+/*======================*/
+				/* out: index, NULL if none left */
+	dict_index_t*	index);	/* in: index */
+/************************************************************************
+Gets the next index on the table. */
+
+dict_index_t*
+dict_table_get_next_index_noninline(
+/*================================*/
+				/* out: index, NULL if none left */
+	dict_index_t*	index);	/* in: index */
+/************************************************************************
+Gets the number of user-defined columns in a table in the dictionary
+cache. */
+UNIV_INLINE
+ulint
+dict_table_get_n_user_cols(
+/*=======================*/
+				/* out: number of user-defined (e.g., not
+				ROW_ID) columns of a table */
+	dict_table_t*	table);	/* in: table */
+/************************************************************************
+Gets the number of system columns in a table in the dictionary cache. */
+UNIV_INLINE
+ulint
+dict_table_get_n_sys_cols(
+/*======================*/
+				/* out: number of system (e.g.,
+				ROW_ID) columns of a table */
+	dict_table_t*	table);	/* in: table */
+/************************************************************************
+Gets the number of all columns (also system) in a table in the dictionary
+cache. */
+UNIV_INLINE
+ulint
+dict_table_get_n_cols(
+/*==================*/
+				/* out: number of columns of a table */
+	dict_table_t*	table);	/* in: table */
+/************************************************************************
+Gets the nth column of a table. */
+UNIV_INLINE
+dict_col_t*
+dict_table_get_nth_col(
+/*===================*/
+				/* out: pointer to column object */
+	dict_table_t*	table,	/* in: table */
+	ulint		pos);	/* in: position of column */
+/************************************************************************
+Gets the nth column of a table. */
+
+dict_col_t*
+dict_table_get_nth_col_noninline(
+/*=============================*/
+				/* out: pointer to column object */
+	dict_table_t*	table,	/* in: table */
+	ulint		pos);	/* in: position of column */
+/************************************************************************
+Gets the given system column of a table. */
+UNIV_INLINE
+dict_col_t*
+dict_table_get_sys_col(
+/*===================*/
+				/* out: pointer to column object */
+	dict_table_t*	table,	/* in: table */
+	ulint		sys);	/* in: DATA_ROW_ID, ... */
+/************************************************************************
+Gets the given system column number of a table. */
+UNIV_INLINE
+ulint
+dict_table_get_sys_col_no(
+/*======================*/
+				/* out: column number */
+	dict_table_t*	table,	/* in: table */
+	ulint		sys);	/* in: DATA_ROW_ID, ... */
+/************************************************************************
+Checks if a column is in the ordering columns of the clustered index of a
+table. Column prefixes are treated like whole columns. */
+
+ibool
+dict_table_col_in_clustered_key(
+/*============================*/
+				/* out: TRUE if the column, or its prefix, is
+				in the clustered key */
+	dict_table_t*	table,	/* in: table */
+	ulint		n);	/* in: column number */
+/***********************************************************************
+Copies types of columns contained in table to tuple. */
+
+void
+dict_table_copy_types(
+/*==================*/
+	dtuple_t*	tuple,	/* in: data tuple */
+	dict_table_t*	table);	/* in: index */
+/**************************************************************************
+Looks for an index with the given id. NOTE that we do not reserve
+the dictionary mutex: this function is for emergency purposes like
+printing info of a corrupt database page! */
+
+dict_index_t*
+dict_index_find_on_id_low(
+/*======================*/
+			/* out: index or NULL if not found from cache */
+	dulint	id);	/* in: index id */
+/**************************************************************************
+Adds an index to dictionary cache. */
+
+ibool
+dict_index_add_to_cache(
+/*====================*/
+				/* out: TRUE if success */
+	dict_table_t*	table,	/* in: table on which the index is */
+	dict_index_t*	index,	/* in, own: index; NOTE! The index memory
+				object is freed in this function! */
+	ulint		page_no);/* in: root page number of the index */
+/************************************************************************
+Gets the number of fields in the internal representation of an index,
+including fields added by the dictionary system. */
+UNIV_INLINE
+ulint
+dict_index_get_n_fields(
+/*====================*/
+				/* out: number of fields */
+	dict_index_t*	index);	/* in: an internal representation of index
+				(in the dictionary cache) */
+/************************************************************************
+Gets the number of fields in the internal representation of an index
+that uniquely determine the position of an index entry in the index, if
+we do not take multiversioning into account: in the B-tree use the value
+returned by dict_index_get_n_unique_in_tree. */
+UNIV_INLINE
+ulint
+dict_index_get_n_unique(
+/*====================*/
+				/* out: number of fields */
+	dict_index_t*	index);	/* in: an internal representation of index
+				(in the dictionary cache) */
+/************************************************************************
+Gets the number of fields in the internal representation of an index
+which uniquely determine the position of an index entry in the index, if
+we also take multiversioning into account. */
+UNIV_INLINE
+ulint
+dict_index_get_n_unique_in_tree(
+/*============================*/
+				/* out: number of fields */
+	dict_index_t*	index);	/* in: an internal representation of index
+				(in the dictionary cache) */
+/************************************************************************
+Gets the number of user-defined ordering fields in the index. In the internal
+representation we add the row id to the ordering fields to make all indexes
+unique, but this function returns the number of fields the user defined
+in the index as ordering fields. */
+UNIV_INLINE
+ulint
+dict_index_get_n_ordering_defined_by_user(
+/*======================================*/
+				/* out: number of fields */
+	dict_index_t*	index);	/* in: an internal representation of index
+				(in the dictionary cache) */
+/************************************************************************
+Gets the nth field of an index. */
+UNIV_INLINE
+dict_field_t*
+dict_index_get_nth_field(
+/*=====================*/
+				/* out: pointer to field object */
+	dict_index_t*	index,	/* in: index */
+	ulint		pos);	/* in: position of field */
+/************************************************************************
+Gets pointer to the nth field data type in an index. */
+UNIV_INLINE
+dtype_t*
+dict_index_get_nth_type(
+/*====================*/
+				/* out: data type */
+	dict_index_t*	index,	/* in: index */
+	ulint		pos);	/* in: position of the field */
+/************************************************************************
+Gets the column number of the nth field in an index. */
+UNIV_INLINE
+ulint
+dict_index_get_nth_col_no(
+/*======================*/
+				/* out: column number */
+	dict_index_t*	index,	/* in: index */
+	ulint		pos);	/* in: position of the field */
+/************************************************************************
+Looks for column n in an index. */
+
+ulint
+dict_index_get_nth_col_pos(
+/*=======================*/
+				/* out: position in internal representation
+				of the index; if not contained, returns
+				ULINT_UNDEFINED */
+	dict_index_t*	index,	/* in: index */
+	ulint		n);	/* in: column number */
+/************************************************************************
+Returns TRUE if the index contains a column or a prefix of that column. */
+
+ibool
+dict_index_contains_col_or_prefix(
+/*==============================*/
+				/* out: TRUE if contains the column or its
+				prefix */
+	dict_index_t*	index,	/* in: index */
+	ulint		n);	/* in: column number */
+/************************************************************************
+Looks for a matching field in an index. The column has to be the same. The
+column in index must be complete, or must contain a prefix longer than the
+column in index2. That is, we must be able to construct the prefix in index2
+from the prefix in index. */
+
+ulint
+dict_index_get_nth_field_pos(
+/*=========================*/
+				/* out: position in internal representation
+				of the index; if not contained, returns
+				ULINT_UNDEFINED */
+	dict_index_t*	index,	/* in: index from which to search */
+	dict_index_t*	index2,	/* in: index */
+	ulint		n);	/* in: field number in index2 */
+/************************************************************************
+Looks for column n position in the clustered index. */
+
+ulint
+dict_table_get_nth_col_pos(
+/*=======================*/
+				/* out: position in internal representation
+				of the clustered index */
+	dict_table_t*	table,	/* in: table */
+	ulint		n);	/* in: column number */
+/************************************************************************
+Returns the position of a system column in an index. */
+UNIV_INLINE
+ulint
+dict_index_get_sys_col_pos(
+/*=======================*/
+				/* out: position, ULINT_UNDEFINED if not
+				contained */
+	dict_index_t*	index,	/* in: index */
+	ulint		type);	/* in: DATA_ROW_ID, ... */
+/***********************************************************************
+Adds a column to index. */
+
+void
+dict_index_add_col(
+/*===============*/
+	dict_index_t*	index,		/* in: index */
+	dict_col_t*	col,		/* in: column */
+	ulint		order,		/* in: order criterion */
+	ulint		prefix_len);	/* in: column prefix length */
+/***********************************************************************
+Copies types of fields contained in index to tuple. */
+
+void
+dict_index_copy_types(
+/*==================*/
+	dtuple_t*	tuple,		/* in: data tuple */
+	dict_index_t*	index,		/* in: index */
+	ulint		n_fields);	/* in: number of field types to copy */
+/*************************************************************************
+Gets the index tree where the index is stored. */
+UNIV_INLINE
+dict_tree_t*
+dict_index_get_tree(
+/*================*/
+				/* out: index tree */
+	dict_index_t*	index);	/* in: index */
+/*************************************************************************
+Gets the field order criterion. */
+UNIV_INLINE
+ulint
+dict_field_get_order(
+/*=================*/
+	dict_field_t*	field);
+/*************************************************************************
+Gets the field column. */
+UNIV_INLINE
+dict_col_t*
+dict_field_get_col(
+/*===============*/
+	dict_field_t*	field);
+/**************************************************************************
+Creates an index tree struct. */
+
+dict_tree_t*
+dict_tree_create(
+/*=============*/
+				/* out, own: created tree */
+	dict_index_t*	index,	/* in: the index for which to create: in the
+				case of a mixed tree, this should be the
+				index of the cluster object */
+	ulint		page_no);/* in: root page number of the index */
+/**************************************************************************
+Frees an index tree struct. */
+
+void
+dict_tree_free(
+/*===========*/
+	dict_tree_t*	tree);	/* in, own: index tree */
+/**************************************************************************
+In an index tree, finds the index corresponding to a record in the tree. */
+
+dict_index_t*
+dict_tree_find_index(
+/*=================*/
+				/* out: index */
+	dict_tree_t*	tree,	/* in: index tree */
+	rec_t*		rec);	/* in: record for which to find correct index */
+/**************************************************************************
+In an index tree, finds the index corresponding to a dtuple which is used
+in a search to a tree. */
+
+dict_index_t*
+dict_tree_find_index_for_tuple(
+/*===========================*/
+				/* out: index; NULL if the tuple does not
+				contain the mix id field in a mixed tree */
+	dict_tree_t*	tree,	/* in: index tree */
+	dtuple_t*	tuple);	/* in: tuple for which to find index */
+/***********************************************************************
+Checks if a table which is a mixed cluster member owns a record. */
+
+ibool
+dict_is_mixed_table_rec(
+/*====================*/
+				/* out: TRUE if the record belongs to this
+				table */
+	dict_table_t*	table,	/* in: table in a mixed cluster */
+	rec_t*		rec);	/* in: user record in the clustered index */
+/**************************************************************************
+Returns an index object if it is found in the dictionary cache. */
+
+dict_index_t*
+dict_index_get_if_in_cache(
+/*=======================*/
+				/* out: index, NULL if not found */
+	dulint	index_id);	/* in: index id */
+/**************************************************************************
+Checks that a tuple has n_fields_cmp value in a sensible range, so that
+no comparison can occur with the page number field in a node pointer. */
+
+ibool
+dict_tree_check_search_tuple(
+/*=========================*/
+				/* out: TRUE if ok */
+	dict_tree_t*	tree,	/* in: index tree */
+	dtuple_t*	tuple);	/* in: tuple used in a search */
+/**************************************************************************
+Builds a node pointer out of a physical record and a page number. */
+
+dtuple_t*
+dict_tree_build_node_ptr(
+/*=====================*/
+				/* out, own: node pointer */
+	dict_tree_t*	tree,	/* in: index tree */
+	rec_t*		rec,	/* in: record for which to build node
+				pointer */
+	ulint		page_no,/* in: page number to put in node pointer */
+	mem_heap_t*	heap,	/* in: memory heap where pointer created */
+	ulint           level);  /* in: level of rec in tree: 0 means leaf
+				level */
+/**************************************************************************
+Copies an initial segment of a physical record, long enough to specify an
+index entry uniquely. */
+
+rec_t*
+dict_tree_copy_rec_order_prefix(
+/*============================*/
+				/* out: pointer to the prefix record */
+	dict_tree_t*	tree,	/* in: index tree */
+	rec_t*		rec,	/* in: record for which to copy prefix */
+	ulint*		n_fields,/* out: number of fields copied */
+	byte**		buf,	/* in/out: memory buffer for the copied prefix,
+				or NULL */
+	ulint*		buf_size);/* in/out: buffer size */
+/**************************************************************************
+Builds a typed data tuple out of a physical record. */
+
+dtuple_t*
+dict_tree_build_data_tuple(
+/*=======================*/
+				/* out, own: data tuple */
+	dict_tree_t*	tree,	/* in: index tree */
+	rec_t*		rec,	/* in: record for which to build data tuple */
+	ulint		n_fields,/* in: number of data fields */
+	mem_heap_t*	heap);	/* in: memory heap where tuple created */
+/*************************************************************************
+Gets the space id of the root of the index tree. */
+UNIV_INLINE
+ulint
+dict_tree_get_space(
+/*================*/
+				/* out: space id */
+	dict_tree_t*	tree);	/* in: tree */
+/*************************************************************************
+Sets the space id of the root of the index tree. */
+UNIV_INLINE
+void
+dict_tree_set_space(
+/*================*/
+	dict_tree_t*	tree,	/* in: tree */
+	ulint		space);	/* in: space id */
+/*************************************************************************
+Gets the page number of the root of the index tree. */
+UNIV_INLINE
+ulint
+dict_tree_get_page(
+/*===============*/
+				/* out: page number */
+	dict_tree_t*	tree);	/* in: tree */
+/*************************************************************************
+Sets the page number of the root of index tree. */
+UNIV_INLINE
+void
+dict_tree_set_page(
+/*===============*/
+	dict_tree_t*	tree,	/* in: tree */
+	ulint		page);	/* in: page number */
+/*************************************************************************
+Gets the type of the index tree. */
+UNIV_INLINE
+ulint
+dict_tree_get_type(
+/*===============*/
+				/* out: type */
+	dict_tree_t*	tree);	/* in: tree */
+/*************************************************************************
+Gets the read-write lock of the index tree. */
+UNIV_INLINE
+rw_lock_t*
+dict_tree_get_lock(
+/*===============*/
+				/* out: read-write lock */
+	dict_tree_t*	tree);	/* in: tree */
+/************************************************************************
+Returns free space reserved for future updates of records. This is
+relevant only in the case of many consecutive inserts, as updates
+which make the records bigger might fragment the index. */
+UNIV_INLINE
+ulint
+dict_tree_get_space_reserve(
+/*========================*/
+				/* out: number of free bytes on page,
+				reserved for updates */
+	dict_tree_t*	tree);	/* in: a tree */
+/*************************************************************************
+Calculates the minimum record length in an index. */
+
+ulint
+dict_index_calc_min_rec_len(
+/*========================*/
+	dict_index_t*	index);	/* in: index */
+/*************************************************************************
+Calculates new estimates for table and index statistics. The statistics
+are used in query optimization. */
+
+void
+dict_update_statistics_low(
+/*=======================*/
+	dict_table_t*	table,		/* in: table */
+	ibool		has_dict_mutex);/* in: TRUE if the caller has the
+					dictionary mutex */	
+/*************************************************************************
+Calculates new estimates for table and index statistics. The statistics
+are used in query optimization. */
+
+void
+dict_update_statistics(
+/*===================*/
+	dict_table_t*	table);	/* in: table */
+/************************************************************************
+Reserves the dictionary system mutex for MySQL. */
+
+void
+dict_mutex_enter_for_mysql(void);
+/*============================*/
+/************************************************************************
+Releases the dictionary system mutex for MySQL. */
+
+void
+dict_mutex_exit_for_mysql(void);
+/*===========================*/
+/************************************************************************
+Checks if the database name in two table names is the same. */
+
+ibool
+dict_tables_have_same_db(
+/*=====================*/
+				/* out: TRUE if same db name */
+	const char*	name1,	/* in: table name in the form
+				dbname '/' tablename */
+	const char*	name2);	/* in: table name in the form
+				dbname '/' tablename */
+
+/*************************************************************************
+Scans from pointer onwards. Stops if is at the start of a copy of
+'string' where characters are compared without case sensitivity. Stops
+also at '\0'. */
+
+const char*
+dict_scan_to(
+/*=========*/
+				/* out: scanned up to this */
+	const char*	ptr,	/* in: scan from */
+	const char*	string);/* in: look for this */
+
+/* Buffers for storing detailed information about the latest foreign key
+and unique key errors */
+extern FILE*	dict_foreign_err_file;
+extern mutex_t	dict_foreign_err_mutex; /* mutex protecting the buffers */
+
+extern dict_sys_t*	dict_sys;	/* the dictionary system */
+extern rw_lock_t	dict_operation_lock;
+
+/* Dictionary system struct */
+struct dict_sys_struct{
+	mutex_t		mutex;		/* mutex protecting the data
+					dictionary; protects also the
+					disk-based dictionary system tables;
+					this mutex serializes CREATE TABLE
+					and DROP TABLE, as well as reading
+					the dictionary data for a table from
+					system tables */
+	dulint		row_id;		/* the next row id to assign;
+					NOTE that at a checkpoint this
+					must be written to the dict system
+					header and flushed to a file; in
+					recovery this must be derived from
+					the log records */
+	hash_table_t* 	table_hash;	/* hash table of the tables, based
+					on name */
+	hash_table_t* 	table_id_hash;	/* hash table of the tables, based
+					on id */
+	hash_table_t* 	col_hash;	/* hash table of the columns */
+	UT_LIST_BASE_NODE_T(dict_table_t)
+			table_LRU; 	/* LRU list of tables */
+	ulint		size;		/* varying space in bytes occupied
+					by the data dictionary table and
+					index objects */
+	dict_table_t*	sys_tables;	/* SYS_TABLES table */
+	dict_table_t*	sys_columns;	/* SYS_COLUMNS table */
+	dict_table_t*	sys_indexes;	/* SYS_INDEXES table */
+	dict_table_t*	sys_fields;	/* SYS_FIELDS table */
+};
+
+#ifndef UNIV_NONINL
+#include "dict0dict.ic"
+#endif
+
+#endif
--- a/include/dict0dict.ic
+++ b/include/dict0dict.ic
@ -0,0 +1,620 @@
+/**********************************************************************
+Data dictionary system
+
+(c) 1996 Innobase Oy
+
+Created 1/8/1996 Heikki Tuuri
+***********************************************************************/
+
+#include "dict0load.h"
+#include "trx0undo.h"
+#include "trx0sys.h"
+
+/*************************************************************************
+Gets the column data type. */
+UNIV_INLINE
+dtype_t*
+dict_col_get_type(
+/*==============*/
+	dict_col_t*	col)
+{
+	ut_ad(col);
+
+	return(&col->type);
+}
+
+/*************************************************************************
+Gets the column number. */
+UNIV_INLINE
+ulint
+dict_col_get_no(
+/*============*/
+	dict_col_t*	col)
+{
+	ut_ad(col);
+
+	return(col->ind);
+}
+
+/*************************************************************************
+Gets the column position in the clustered index. */
+UNIV_INLINE
+ulint
+dict_col_get_clust_pos(
+/*===================*/
+	dict_col_t*	col)
+{
+	ut_ad(col);
+
+	return(col->clust_pos);
+}
+
+/************************************************************************
+Gets the first index on the table (the clustered index). */
+UNIV_INLINE
+dict_index_t*
+dict_table_get_first_index(
+/*=======================*/
+				/* out: index, NULL if none exists */
+	dict_table_t*	table)	/* in: table */
+{
+	ut_ad(table);
+	ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
+
+	return(UT_LIST_GET_FIRST(table->indexes));
+}
+
+/************************************************************************
+Gets the next index on the table. */
+UNIV_INLINE
+dict_index_t*
+dict_table_get_next_index(
+/*======================*/
+				/* out: index, NULL if none left */
+	dict_index_t*	index)	/* in: index */
+{
+	ut_ad(index);
+	ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
+
+	return(UT_LIST_GET_NEXT(indexes, index));
+}
+
+/************************************************************************
+Gets the number of user-defined columns in a table in the dictionary
+cache. */
+UNIV_INLINE
+ulint
+dict_table_get_n_user_cols(
+/*=======================*/
+				/* out: number of user-defined (e.g., not
+				ROW_ID) columns of a table */
+	dict_table_t*	table)	/* in: table */
+{
+	ut_ad(table);
+	ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
+	ut_ad(table->cached);
+	
+	return(table->n_cols - DATA_N_SYS_COLS);
+}
+
+/************************************************************************
+Gets the number of system columns in a table in the dictionary cache. */
+UNIV_INLINE
+ulint
+dict_table_get_n_sys_cols(
+/*======================*/
+				/* out: number of system (e.g.,
+				ROW_ID) columns of a table */
+	dict_table_t*	table __attribute__((unused)))	/* in: table */
+{
+	ut_ad(table);
+	ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
+	ut_ad(table->cached);
+
+	return(DATA_N_SYS_COLS);
+}
+
+/************************************************************************
+Gets the number of all columns (also system) in a table in the dictionary
+cache. */
+UNIV_INLINE
+ulint
+dict_table_get_n_cols(
+/*==================*/
+				/* out: number of columns of a table */
+	dict_table_t*	table)	/* in: table */
+{
+	ut_ad(table);
+	ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
+	ut_ad(table->cached);
+	
+	return(table->n_cols);
+}
+
+/************************************************************************
+Gets the nth column of a table. */
+UNIV_INLINE
+dict_col_t*
+dict_table_get_nth_col(
+/*===================*/
+				/* out: pointer to column object */
+	dict_table_t*	table,	/* in: table */
+	ulint		pos)	/* in: position of column */
+{
+	ut_ad(table);
+	ut_ad(pos < table->n_def);
+	ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
+
+	return((table->cols) + pos);
+}
+
+/************************************************************************
+Gets the given system column of a table. */
+UNIV_INLINE
+dict_col_t*
+dict_table_get_sys_col(
+/*===================*/
+				/* out: pointer to column object */
+	dict_table_t*	table,	/* in: table */
+	ulint		sys)	/* in: DATA_ROW_ID, ... */
+{
+	dict_col_t*	col;
+
+	ut_ad(table);
+	ut_ad(sys < DATA_N_SYS_COLS);
+	ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
+
+	col = dict_table_get_nth_col(table, table->n_cols 
+					   - DATA_N_SYS_COLS + sys);
+	ut_ad(col->type.mtype == DATA_SYS);
+	ut_ad(col->type.prtype == (sys | DATA_NOT_NULL));
+
+	return(col);
+}
+
+/************************************************************************
+Gets the given system column number of a table. */
+UNIV_INLINE
+ulint
+dict_table_get_sys_col_no(
+/*======================*/
+				/* out: column number */
+	dict_table_t*	table,	/* in: table */
+	ulint		sys)	/* in: DATA_ROW_ID, ... */
+{
+	ut_ad(table);
+	ut_ad(sys < DATA_N_SYS_COLS);
+	ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
+
+	return(table->n_cols - DATA_N_SYS_COLS + sys);
+}
+
+/************************************************************************
+Gets the number of fields in the internal representation of an index,
+including fields added by the dictionary system. */
+UNIV_INLINE
+ulint
+dict_index_get_n_fields(
+/*====================*/
+				/* out: number of fields */
+	dict_index_t*	index)	/* in: an internal representation of index
+				(in the dictionary cache) */
+{
+	ut_ad(index);
+	ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
+	
+	return(index->n_fields);
+}
+
+/************************************************************************
+Gets the number of fields in the internal representation of an index
+that uniquely determine the position of an index entry in the index, if
+we do not take multiversioning into account: in the B-tree use the value
+returned by dict_index_get_n_unique_in_tree. */
+UNIV_INLINE
+ulint
+dict_index_get_n_unique(
+/*====================*/
+				/* out: number of fields */
+	dict_index_t*	index)	/* in: an internal representation of index
+				(in the dictionary cache) */
+{
+	ut_ad(index);
+	ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
+	ut_ad(index->cached);
+	
+	return(index->n_uniq);
+}
+
+/************************************************************************
+Gets the number of fields in the internal representation of an index
+which uniquely determine the position of an index entry in the index, if
+we also take multiversioning into account. */
+UNIV_INLINE
+ulint
+dict_index_get_n_unique_in_tree(
+/*============================*/
+				/* out: number of fields */
+	dict_index_t*	index)	/* in: an internal representation of index
+				(in the dictionary cache) */
+{
+	ut_ad(index);
+	ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
+	ut_ad(index->cached);
+	
+	if (index->type & DICT_CLUSTERED) {
+
+		return(dict_index_get_n_unique(index));
+	}
+
+	return(dict_index_get_n_fields(index));
+}
+
+/************************************************************************
+Gets the number of user-defined ordering fields in the index. In the internal
+representation of clustered indexes we add the row id to the ordering fields
+to make a clustered index unique, but this function returns the number of
+fields the user defined in the index as ordering fields. */
+UNIV_INLINE
+ulint
+dict_index_get_n_ordering_defined_by_user(
+/*======================================*/
+				/* out: number of fields */
+	dict_index_t*	index)	/* in: an internal representation of index
+				(in the dictionary cache) */
+{
+	return(index->n_user_defined_cols);
+}
+
+/************************************************************************
+Gets the nth field of an index. */
+UNIV_INLINE
+dict_field_t*
+dict_index_get_nth_field(
+/*=====================*/
+				/* out: pointer to field object */
+	dict_index_t*	index,	/* in: index */
+	ulint		pos)	/* in: position of field */
+{
+	ut_ad(index);
+	ut_ad(pos < index->n_def);
+	ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
+
+	return((index->fields) + pos);
+}
+
+/************************************************************************
+Returns the position of a system column in an index. */
+UNIV_INLINE
+ulint
+dict_index_get_sys_col_pos(
+/*=======================*/
+				/* out: position, ULINT_UNDEFINED if not
+				contained */
+	dict_index_t*	index,	/* in: index */
+	ulint		type)	/* in: DATA_ROW_ID, ... */
+{
+	dict_col_t*	col;
+
+	ut_ad(index);
+	ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
+	ut_ad(!(index->type & DICT_UNIVERSAL));
+
+	col = dict_table_get_sys_col(index->table, type);
+
+	if (index->type & DICT_CLUSTERED) {
+
+		return(col->clust_pos);
+	}
+
+	return(dict_index_get_nth_col_pos(index,
+		dict_table_get_sys_col_no(index->table, type)));
+}
+
+/*************************************************************************
+Gets the index tree where the index is stored. */
+UNIV_INLINE
+dict_tree_t*
+dict_index_get_tree(
+/*================*/
+				/* out: index tree */
+	dict_index_t*	index)	/* in: index */
+{
+	ut_ad(index);
+	ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
+
+	return(index->tree);
+}	
+
+/*************************************************************************
+Gets the field order criterion. */
+UNIV_INLINE
+ulint
+dict_field_get_order(
+/*=================*/
+	dict_field_t*	field)
+{
+	ut_ad(field);
+
+	return(field->order);
+}
+
+/*************************************************************************
+Gets the field column. */
+UNIV_INLINE
+dict_col_t*
+dict_field_get_col(
+/*===============*/
+	dict_field_t*	field)
+{
+	ut_ad(field);
+
+	return(field->col);
+}
+
+/************************************************************************
+Gets pointer to the nth field data type in an index. */
+UNIV_INLINE
+dtype_t*
+dict_index_get_nth_type(
+/*====================*/
+				/* out: data type */
+	dict_index_t*	index,	/* in: index */
+	ulint		pos)	/* in: position of the field */
+{
+	return(dict_col_get_type(dict_field_get_col(
+			dict_index_get_nth_field(index, pos))));
+}
+
+/************************************************************************
+Gets the column number the nth field in an index. */
+UNIV_INLINE
+ulint
+dict_index_get_nth_col_no(
+/*======================*/
+				/* out: column number */
+	dict_index_t*	index,	/* in: index */
+	ulint		pos)	/* in: position of the field */
+{
+	return(dict_col_get_no(dict_field_get_col(
+			dict_index_get_nth_field(index, pos))));
+}
+
+/*************************************************************************
+Gets the space id of the root of the index tree. */
+UNIV_INLINE
+ulint
+dict_tree_get_space(
+/*================*/
+				/* out: space id */
+	dict_tree_t*	tree)	/* in: tree */
+{
+	ut_ad(tree);
+	ut_ad(tree->magic_n == DICT_TREE_MAGIC_N);
+
+	return(tree->space);
+}
+
+/*************************************************************************
+Sets the space id of the root of the index tree. */
+UNIV_INLINE
+void
+dict_tree_set_space(
+/*================*/
+	dict_tree_t*	tree,	/* in: tree */
+	ulint		space)	/* in: space id */
+{
+	ut_ad(tree);
+	ut_ad(tree->magic_n == DICT_TREE_MAGIC_N);
+
+	tree->space = space;
+}
+
+/*************************************************************************
+Gets the page number of the root of the index tree. */
+UNIV_INLINE
+ulint
+dict_tree_get_page(
+/*===============*/
+				/* out: page number */
+	dict_tree_t*	tree)	/* in: tree */
+{
+	ut_ad(tree);
+	ut_ad(tree->magic_n == DICT_TREE_MAGIC_N);
+
+	return(tree->page);
+}
+
+/*************************************************************************
+Sets the page number of the root of index tree. */
+UNIV_INLINE
+void
+dict_tree_set_page(
+/*===============*/
+	dict_tree_t*	tree,	/* in: tree */
+	ulint		page)	/* in: page number */
+{
+	ut_ad(tree);
+	ut_ad(tree->magic_n == DICT_TREE_MAGIC_N);
+
+	tree->page = page;
+}
+
+/*************************************************************************
+Gets the type of the index tree. */
+UNIV_INLINE
+ulint
+dict_tree_get_type(
+/*===============*/
+				/* out: type */
+	dict_tree_t*	tree)	/* in: tree */
+{
+	ut_ad(tree);
+	ut_ad(tree->magic_n == DICT_TREE_MAGIC_N);
+
+	return(tree->type);
+}
+
+/*************************************************************************
+Gets the read-write lock of the index tree. */
+UNIV_INLINE
+rw_lock_t*
+dict_tree_get_lock(
+/*===============*/
+				/* out: read-write lock */
+	dict_tree_t*	tree)	/* in: tree */
+{
+	ut_ad(tree);
+	ut_ad(tree->magic_n == DICT_TREE_MAGIC_N);
+
+	return(&(tree->lock));
+}
+
+/************************************************************************
+Returns free space reserved for future updates of records. This is
+relevant only in the case of many consecutive inserts, as updates
+which make the records bigger might fragment the index. */
+UNIV_INLINE
+ulint
+dict_tree_get_space_reserve(
+/*========================*/
+				/* out: number of free bytes on page,
+				reserved for updates */
+	dict_tree_t*	tree)	/* in: a tree */
+{
+	ut_ad(tree);
+
+	UT_NOT_USED(tree);
+
+	return(UNIV_PAGE_SIZE / 16);
+}
+
+/**************************************************************************
+Checks if a table is in the dictionary cache. */
+UNIV_INLINE
+dict_table_t*
+dict_table_check_if_in_cache_low(
+/*==============================*/
+					/* out: table, NULL if not found */
+	const char*	table_name)	/* in: table name */
+{
+	dict_table_t*	table;
+	ulint		table_fold;
+	
+	ut_ad(table_name);
+#ifdef UNIV_SYNC_DEBUG
+	ut_ad(mutex_own(&(dict_sys->mutex)));
+#endif /* UNIV_SYNC_DEBUG */
+
+	/* Look for the table name in the hash table */
+	table_fold = ut_fold_string(table_name);
+
+	HASH_SEARCH(name_hash, dict_sys->table_hash, table_fold, table,
+				ut_strcmp(table->name, table_name) == 0);
+	return(table);
+}
+
+/**************************************************************************
+Gets a table; loads it to the dictionary cache if necessary. A low-level
+function. */
+UNIV_INLINE
+dict_table_t*
+dict_table_get_low(
+/*===============*/
+					/* out: table, NULL if not found */
+	const char*	table_name)	/* in: table name */
+{
+	dict_table_t*	table;
+	
+	ut_ad(table_name);
+#ifdef UNIV_SYNC_DEBUG
+	ut_ad(mutex_own(&(dict_sys->mutex)));
+#endif /* UNIV_SYNC_DEBUG */
+
+	table = dict_table_check_if_in_cache_low(table_name);
+	
+	if (table == NULL) {
+		table = dict_load_table(table_name);
+	}
+
+	return(table);
+}
+
+/**************************************************************************
+Returns a table object, based on table id, and memoryfixes it. */
+UNIV_INLINE
+dict_table_t*
+dict_table_get_on_id_low(
+/*=====================*/
+				/* out: table, NULL if does not exist */
+	dulint	table_id,	/* in: table id */
+	trx_t*	trx)		/* in: transaction handle */
+{
+	dict_table_t*	table;
+	ulint		fold;
+
+#ifdef UNIV_SYNC_DEBUG
+	ut_ad(mutex_own(&(dict_sys->mutex)));
+#endif /* UNIV_SYNC_DEBUG */
+	UT_NOT_USED(trx);
+	
+	/* Look for the table name in the hash table */
+	fold = ut_fold_dulint(table_id);
+
+	HASH_SEARCH(id_hash, dict_sys->table_id_hash, fold, table,
+				ut_dulint_cmp(table->id, table_id) == 0);
+	if (table == NULL) {
+		table = dict_load_table_on_id(table_id);
+	}
+
+	if (table != NULL) {
+		table->mem_fix++;
+
+		/* lock_push(trx, table, LOCK_DICT_MEM_FIX) */
+	}
+	
+	/* TODO: should get the type information from MySQL */
+	
+	return(table);
+}
+
+/**************************************************************************
+Releases a table from being memoryfixed. Currently this has no relevance. */
+UNIV_INLINE
+void
+dict_table_release(
+/*===============*/
+	dict_table_t*	table)	/* in: table to be released */
+{
+	mutex_enter(&(dict_sys->mutex));
+	
+	table->mem_fix--;	
+	
+	mutex_exit(&(dict_sys->mutex));
+}
+
+/**************************************************************************
+Returns an index object. */
+UNIV_INLINE
+dict_index_t*
+dict_table_get_index(
+/*=================*/
+				/* out: index, NULL if does not exist */
+	dict_table_t*	table,	/* in: table */
+	const char*	name)	/* in: index name */
+{
+	dict_index_t*	index	= NULL;
+	
+	index = dict_table_get_first_index(table);
+
+	while (index != NULL) {
+		if (ut_strcmp(name, index->name) == 0) {
+
+			break;
+		}
+
+		index = dict_table_get_next_index(index);
+	}	
+	
+	return(index);
+}
--- a/include/dict0load.h
+++ b/include/dict0load.h
@ -0,0 +1,99 @@
+/******************************************************
+Loads to the memory cache database object definitions
+from dictionary tables
+
+(c) 1996 Innobase Oy
+
+Created 4/24/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef dict0load_h
+#define dict0load_h
+
+#include "univ.i"
+#include "dict0types.h"
+#include "ut0byte.h"
+
+/************************************************************************
+In a crash recovery we already have all the tablespace objects created.
+This function compares the space id information in the InnoDB data dictionary
+to what we already read with fil_load_single_table_tablespaces().
+
+In a normal startup, we create the tablespace objects for every table in
+InnoDB's data dictionary, if the corresponding .ibd file exists.
+We also scan the biggest space id, and store it to fil_system. */
+
+void
+dict_check_tablespaces_and_store_max_id(
+/*====================================*/
+	ibool	in_crash_recovery);	/* in: are we doing a crash recovery */
+/************************************************************************
+Finds the first table name in the given database. */
+
+char*
+dict_get_first_table_name_in_db(
+/*============================*/
+				/* out, own: table name, NULL if
+				does not exist; the caller must free
+				the memory in the string! */
+	const char*	name);	/* in: database name which ends to '/' */
+/************************************************************************
+Loads a table definition and also all its index definitions, and also
+the cluster definition if the table is a member in a cluster. Also loads
+all foreign key constraints where the foreign key is in the table or where
+a foreign key references columns in this table. */
+
+dict_table_t*
+dict_load_table(
+/*============*/
+				/* out: table, NULL if does not exist;
+				if the table is stored in an .ibd file,
+				but the file does not exist,
+				then we set the ibd_file_missing flag TRUE
+				in the table object we return */
+	const char*	name);	/* in: table name in the
+				databasename/tablename format */
+/***************************************************************************
+Loads a table object based on the table id. */
+
+dict_table_t*
+dict_load_table_on_id(
+/*==================*/
+				/* out: table; NULL if table does not exist */
+	dulint	table_id);	/* in: table id */	
+/************************************************************************
+This function is called when the database is booted.
+Loads system table index definitions except for the clustered index which
+is added to the dictionary cache at booting before calling this function. */
+
+void
+dict_load_sys_table(
+/*================*/
+	dict_table_t*	table);	/* in: system table */
+/***************************************************************************
+Loads foreign key constraints where the table is either the foreign key
+holder or where the table is referenced by a foreign key. Adds these
+constraints to the data dictionary. Note that we know that the dictionary
+cache already contains all constraints where the other relevant table is
+already in the dictionary cache. */
+
+ulint
+dict_load_foreigns(
+/*===============*/
+					/* out: DB_SUCCESS or error code */
+	const char*	table_name,	/* in: table name */
+	ibool		check_types);	/* in: TRUE=check type compatibility */
+/************************************************************************
+Prints to the standard output information on all tables found in the data
+dictionary system table. */
+
+void
+dict_print(void);
+/*============*/
+
+
+#ifndef UNIV_NONINL
+#include "dict0load.ic"
+#endif
+
+#endif
--- a/include/dict0load.ic
+++ b/include/dict0load.ic
@ -0,0 +1,9 @@
+/******************************************************
+Loads to the memory cache database object definitions
+from dictionary tables
+
+(c) 1996 Innobase Oy
+
+Created 4/24/1996 Heikki Tuuri
+*******************************************************/
+
--- a/include/dict0mem.h
+++ b/include/dict0mem.h
@ -0,0 +1,444 @@
+/******************************************************
+Data dictionary memory object creation
+
+(c) 1996 Innobase Oy
+
+Created 1/8/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef dict0mem_h
+#define dict0mem_h
+
+#include "univ.i"
+#include "dict0types.h"
+#include "data0type.h"
+#include "data0data.h"
+#include "mem0mem.h"
+#include "rem0types.h"
+#include "btr0types.h"
+#include "ut0mem.h"
+#include "ut0lst.h"
+#include "ut0rnd.h"
+#include "ut0byte.h"
+#include "sync0rw.h"
+#include "lock0types.h"
+#include "hash0hash.h"
+#include "que0types.h"
+
+/* Type flags of an index: OR'ing of the flags is allowed to define a
+combination of types */
+#define DICT_CLUSTERED	1	/* clustered index */
+#define DICT_UNIQUE	2	/* unique index */
+#define	DICT_UNIVERSAL 	4	/* index which can contain records from any
+				other index */
+#define	DICT_IBUF 	8	/* insert buffer tree */
+				
+/* Flags for ordering an index field: OR'ing of the flags allowed */
+#define	DICT_DESCEND	1	/* in descending order (default ascending) */
+
+/* Types for a table object */
+#define DICT_TABLE_ORDINARY		1
+#define	DICT_TABLE_CLUSTER_MEMBER	2
+#define	DICT_TABLE_CLUSTER		3 /* this means that the table is
+					  really a cluster definition */
+
+/**************************************************************************
+Creates a table memory object. */
+
+dict_table_t*
+dict_mem_table_create(
+/*==================*/
+					/* out, own: table object */
+	const char*	name,		/* in: table name */
+	ulint		space,		/* in: space where the clustered index
+					of the table is placed; this parameter
+					is ignored if the table is made
+					a member of a cluster */
+	ulint		n_cols,		/* in: number of columns */
+	ibool		comp);		/* in: TRUE=compact page format */
+/**************************************************************************
+Creates a cluster memory object. */
+
+dict_cluster_t*
+dict_mem_cluster_create(
+/*====================*/
+					/* out, own: cluster object (where the
+					type dict_cluster_t == dict_table_t) */
+	const char*	name,		/* in: cluster name */
+	ulint		space,		/* in: space where the clustered
+					indexes of the member tables are
+					placed */
+	ulint		n_cols,		/* in: number of columns */
+	ulint		mix_len);	/* in: length of the common key prefix
+					in the cluster */
+/**************************************************************************
+Declares a non-published table as a member in a cluster. */
+
+void
+dict_mem_table_make_cluster_member(
+/*===============================*/
+	dict_table_t*	table,		/* in: non-published table */
+	const char*	cluster_name);	/* in: cluster name */
+/**************************************************************************
+Adds a column definition to a table. */
+
+void
+dict_mem_table_add_col(
+/*===================*/
+	dict_table_t*	table,	/* in: table */
+	const char*	name,	/* in: column name */
+	ulint		mtype,	/* in: main datatype */
+	ulint		prtype,	/* in: precise type */
+	ulint		len,	/* in: length */
+	ulint		prec);	/* in: precision */
+/**************************************************************************
+Creates an index memory object. */
+
+dict_index_t*
+dict_mem_index_create(
+/*==================*/
+					/* out, own: index object */
+	const char*	table_name,	/* in: table name */
+	const char*	index_name,	/* in: index name */
+	ulint		space,		/* in: space where the index tree is
+					placed, ignored if the index is of
+					the clustered type */
+	ulint		type,		/* in: DICT_UNIQUE,
+					DICT_CLUSTERED, ... ORed */
+	ulint		n_fields);	/* in: number of fields */
+/**************************************************************************
+Adds a field definition to an index. NOTE: does not take a copy
+of the column name if the field is a column. The memory occupied
+by the column name may be released only after publishing the index. */
+
+void
+dict_mem_index_add_field(
+/*=====================*/
+	dict_index_t*	index,		/* in: index */
+	const char*	name,		/* in: column name */
+	ulint		order,		/* in: order criterion; 0 means an
+					ascending order */
+	ulint		prefix_len);	/* in: 0 or the column prefix length
+					in a MySQL index like
+					INDEX (textcol(25)) */
+/**************************************************************************
+Frees an index memory object. */
+
+void
+dict_mem_index_free(
+/*================*/
+	dict_index_t*	index);	/* in: index */
+/**************************************************************************
+Creates and initializes a foreign constraint memory object. */
+
+dict_foreign_t*
+dict_mem_foreign_create(void);
+/*=========================*/
+				/* out, own: foreign constraint struct */
+
+/* Data structure for a column in a table */
+struct dict_col_struct{
+	hash_node_t	hash;	/* hash chain node */
+	ulint		ind;	/* table column position (they are numbered
+				starting from 0) */
+	ulint		clust_pos;/* position of the column in the
+				clustered index */
+	ulint		ord_part;/* count of how many times this column
+				appears in ordering fields of an index */
+	const char*	name;	/* name */
+	dtype_t		type;	/* data type */
+	dict_table_t*	table;	/* back pointer to table of this column */
+	ulint		aux;	/* this is used as an auxiliary variable 
+				in some of the functions below */
+};
+
+/* DICT_MAX_INDEX_COL_LEN is measured in bytes and is the max index column
+length + 1. Starting from 4.1.6, we set it to < 3 * 256, so that one can
+create a column prefix index on 255 characters of a TEXT field also in the
+UTF-8 charset. In that charset, a character may take at most 3 bytes. */
+
+#define DICT_MAX_INDEX_COL_LEN		768
+
+/* Data structure for a field in an index */
+struct dict_field_struct{
+	dict_col_t*	col;		/* pointer to the table column */
+	const char*	name;		/* name of the column */
+	ulint		order;		/* flags for ordering this field:
+					DICT_DESCEND, ... */
+	ulint		prefix_len;	/* 0 or the length of the column
+					prefix in bytes in a MySQL index of
+					type, e.g., INDEX (textcol(25));
+					must be smaller than
+					DICT_MAX_INDEX_COL_LEN; NOTE that
+					in the UTF-8 charset, MySQL sets this
+					to 3 * the prefix len in UTF-8 chars */
+	ulint		fixed_len;	/* 0 or the fixed length of the
+					column if smaller than
+					DICT_MAX_INDEX_COL_LEN */
+	ulint		fixed_offs;	/* offset to the field, or
+					ULINT_UNDEFINED if it is not fixed
+					within the record (due to preceding
+					variable-length fields) */
+};
+
+/* Data structure for an index tree */
+struct dict_tree_struct{
+	ulint		type;	/* tree type */
+	dulint		id;	/* id of the index stored in the tree, in the
+				case of a mixed index, the id of the clustered
+				index of the cluster table */
+	ulint		space;	/* space of index tree */
+	ulint		page;	/* index tree root page number */
+	byte		pad[64];/* Padding to prevent other memory hotspots on
+				the same memory cache line */
+	rw_lock_t	lock;	/* read-write lock protecting the upper levels
+				of the index tree */
+	ulint		mem_fix;/* count of how many times this tree
+				struct has been memoryfixed (by mini-
+				transactions wanting to access the index
+				tree) */
+	UT_LIST_BASE_NODE_T(dict_index_t)
+			tree_indexes; /* list of indexes stored in the
+				index tree: if the tree is not of the
+				mixed type there is only one index in
+				the list; if the tree is of the mixed
+				type, the first index in the list is the
+				index of the cluster which owns the tree */
+	ulint		magic_n;/* magic number */
+};
+
+#define	DICT_TREE_MAGIC_N	7545676
+
+/* Data structure for an index */
+struct dict_index_struct{
+	dulint		id;	/* id of the index */
+	mem_heap_t*	heap;	/* memory heap */
+	ulint		type;	/* index type */
+	const char*	name;	/* index name */
+	const char*	table_name; /* table name */
+	dict_table_t*	table;	/* back pointer to table */
+	ulint		space;	/* space where the index tree is placed */
+	ulint		trx_id_offset;/* position of the the trx id column
+				in a clustered index record, if the fields
+				before it are known to be of a fixed size,
+				0 otherwise */
+	ulint		n_user_defined_cols;
+				/* number of columns the user defined to
+				be in the index: in the internal
+				representation we add more columns */
+	ulint		n_uniq;	/* number of fields from the beginning
+				which are enough to determine an index
+				entry uniquely */
+	ulint		n_def;	/* number of fields defined so far */
+	ulint		n_fields;/* number of fields in the index */
+	dict_field_t*	fields;	/* array of field descriptions */
+	ulint		n_nullable;/* number of nullable fields */
+	UT_LIST_NODE_T(dict_index_t)
+			indexes;/* list of indexes of the table */
+	dict_tree_t*	tree;	/* index tree struct */
+	UT_LIST_NODE_T(dict_index_t)
+			tree_indexes; /* list of indexes of the same index
+				tree */
+	ibool		cached;	/* TRUE if the index object is in the
+				dictionary cache */
+	btr_search_t*	search_info; /* info used in optimistic searches */
+	/*----------------------*/
+	ib_longlong*	stat_n_diff_key_vals;
+				/* approximate number of different key values
+				for this index, for each n-column prefix
+				where n <= dict_get_n_unique(index); we
+				periodically calculate new estimates */
+	ulint		stat_index_size;
+				/* approximate index size in database pages */
+	ulint		stat_n_leaf_pages;
+				/* approximate number of leaf pages in the
+				index tree */
+	ulint		magic_n;/* magic number */
+};
+
+/* Data structure for a foreign key constraint; an example:
+FOREIGN KEY (A, B) REFERENCES TABLE2 (C, D) */
+
+struct dict_foreign_struct{
+	mem_heap_t*	heap;		/* this object is allocated from
+					this memory heap */
+	char*		id;		/* id of the constraint as a
+					null-terminated string */
+	ulint		type;		/* 0 or DICT_FOREIGN_ON_DELETE_CASCADE
+					or DICT_FOREIGN_ON_DELETE_SET_NULL */
+	char*		foreign_table_name;/* foreign table name */
+	dict_table_t*	foreign_table;	/* table where the foreign key is */
+	const char**	foreign_col_names;/* names of the columns in the
+					foreign key */
+	char*		referenced_table_name;/* referenced table name */
+	dict_table_t*	referenced_table;/* table where the referenced key
+					is */
+	const char**	referenced_col_names;/* names of the referenced
+					columns in the referenced table */
+	ulint		n_fields;	/* number of indexes' first fields
+					for which the the foreign key
+					constraint is defined: we allow the
+					indexes to contain more fields than
+					mentioned in the constraint, as long
+					as the first fields are as mentioned */ 
+	dict_index_t*	foreign_index;	/* foreign index; we require that
+					both tables contain explicitly defined
+					indexes for the constraint: InnoDB
+					does not generate new indexes
+					implicitly */
+	dict_index_t*	referenced_index;/* referenced index */
+	UT_LIST_NODE_T(dict_foreign_t)
+			foreign_list;	/* list node for foreign keys of the
+					table */
+	UT_LIST_NODE_T(dict_foreign_t)
+			referenced_list;/* list node for referenced keys of the
+					table */
+};
+
+/* The flags for ON_UPDATE and ON_DELETE can be ORed; the default is that
+a foreign key constraint is enforced, therefore RESTRICT just means no flag */
+#define DICT_FOREIGN_ON_DELETE_CASCADE	1
+#define DICT_FOREIGN_ON_DELETE_SET_NULL	2
+#define DICT_FOREIGN_ON_UPDATE_CASCADE	4
+#define DICT_FOREIGN_ON_UPDATE_SET_NULL	8
+#define DICT_FOREIGN_ON_DELETE_NO_ACTION 16
+#define DICT_FOREIGN_ON_UPDATE_NO_ACTION 32
+
+
+#define	DICT_INDEX_MAGIC_N	76789786
+
+/* Data structure for a database table */
+struct dict_table_struct{
+	dulint		id;	/* id of the table or cluster */
+	ulint		type;	/* DICT_TABLE_ORDINARY, ... */
+	mem_heap_t*	heap;	/* memory heap */
+	const char*	name;	/* table name */
+	const char*	dir_path_of_temp_table;/* NULL or the directory path
+				where a TEMPORARY table that was explicitly
+				created by a user should be placed if
+				innodb_file_per_table is defined in my.cnf;
+				in Unix this is usually /tmp/..., in Windows
+				\temp\... */
+	ulint		space;	/* space where the clustered index of the
+				table is placed */
+	ibool		ibd_file_missing;/* TRUE if this is in a single-table
+				tablespace and the .ibd file is missing; then
+				we must return in ha_innodb.cc an error if the
+				user tries to query such an orphaned table */
+	ibool		tablespace_discarded;/* this flag is set TRUE when the
+				user calls DISCARD TABLESPACE on this table,
+				and reset to FALSE in IMPORT TABLESPACE */
+	ibool		comp;	/* flag: TRUE=compact page format */
+	hash_node_t	name_hash; /* hash chain node */
+	hash_node_t	id_hash; /* hash chain node */
+	ulint		n_def;	/* number of columns defined so far */
+	ulint		n_cols;	/* number of columns */
+	dict_col_t*	cols;	/* array of column descriptions */
+	UT_LIST_BASE_NODE_T(dict_index_t)
+			indexes; /* list of indexes of the table */
+	UT_LIST_BASE_NODE_T(dict_foreign_t)
+			foreign_list;/* list of foreign key constraints
+				in the table; these refer to columns
+				in other tables */
+	UT_LIST_BASE_NODE_T(dict_foreign_t)
+			referenced_list;/* list of foreign key constraints
+				which refer to this table */
+	UT_LIST_NODE_T(dict_table_t)
+			table_LRU; /* node of the LRU list of tables */
+	ulint		mem_fix;/* count of how many times the table 
+				and its indexes has been fixed in memory;
+				currently NOT used */
+	ulint		n_mysql_handles_opened;
+				/* count of how many handles MySQL has opened
+				to this table; dropping of the table is
+				NOT allowed until this count gets to zero;
+				MySQL does NOT itself check the number of
+				open handles at drop */
+	ulint		n_foreign_key_checks_running;
+				/* count of how many foreign key check
+				operations are currently being performed
+				on the table: we cannot drop the table while
+				there are foreign key checks running on
+				it! */
+	ibool		cached;	/* TRUE if the table object has been added
+				to the dictionary cache */
+	lock_t*		auto_inc_lock;/* a buffer for an auto-inc lock
+				for this table: we allocate the memory here
+				so that individual transactions can get it
+				and release it without a need to allocate
+				space from the lock heap of the trx:
+				otherwise the lock heap would grow rapidly
+				if we do a large insert from a select */
+	dulint		query_cache_inv_trx_id;
+				/* transactions whose trx id < than this
+				number are not allowed to store to the MySQL
+				query cache or retrieve from it; when a trx
+				with undo logs commits, it sets this to the
+				value of the trx id counter for the tables it
+				had an IX lock on */
+	UT_LIST_BASE_NODE_T(lock_t)
+			locks; /* list of locks on the table */
+	/*----------------------*/
+	dulint		mix_id;	/* if the table is a member in a cluster,
+				this is its mix id */
+	ulint		mix_len;/* if the table is a cluster or a member
+				this is the common key prefix lenght */
+	ulint		mix_id_len;/* mix id length in a compressed form */
+	byte		mix_id_buf[12];
+				/* mix id of a mixed table written in
+				a compressed form */
+	const char*	cluster_name; /* if the table is a member in a
+				cluster, this is the name of the cluster */
+	/*----------------------*/
+	ibool		does_not_fit_in_memory;
+				/* this field is used to specify in simulations
+				tables which are so big that disk should be
+				accessed: disk access is simulated by
+				putting the thread to sleep for a while;
+				NOTE that this flag is not stored to the data
+				dictionary on disk, and the database will
+				forget about value TRUE if it has to reload
+				the table definition from disk */
+	/*----------------------*/
+	ib_longlong	stat_n_rows;
+				/* approximate number of rows in the table;
+				we periodically calculate new estimates */
+	ulint		stat_clustered_index_size;
+				/* approximate clustered index size in
+				database pages */
+	ulint		stat_sum_of_other_index_sizes;
+				/* other indexes in database pages */
+	ibool           stat_initialized; /* TRUE if statistics have
+				been calculated the first time
+			        after database startup or table creation */
+	ulint		stat_modified_counter;
+				/* when a row is inserted, updated, or deleted,
+				we add 1 to this number; we calculate new
+				estimates for the stat_... values for the
+				table and the indexes at an interval of 2 GB
+				or when about 1 / 16 of table has been
+				modified; also when the estimate operation is
+				called for MySQL SHOW TABLE STATUS; the
+				counter is reset to zero at statistics
+				calculation; this counter is not protected by
+				any latch, because this is only used for
+				heuristics */
+	/*----------------------*/
+	mutex_t		autoinc_mutex;
+				/* mutex protecting the autoincrement
+				counter */
+	ibool		autoinc_inited;
+				/* TRUE if the autoinc counter has been
+				inited; MySQL gets the init value by executing
+				SELECT MAX(auto inc column) */
+	ib_longlong	autoinc;/* autoinc counter value to give to the
+				next inserted row */	
+	ulint		magic_n;/* magic number */
+};
+#define	DICT_TABLE_MAGIC_N	76333786
+					
+#ifndef UNIV_NONINL
+#include "dict0mem.ic"
+#endif
+
+#endif
--- a/include/dict0mem.ic
+++ b/include/dict0mem.ic
@ -0,0 +1,9 @@
+/**********************************************************************
+Data dictionary memory object creation
+
+(c) 1996 Innobase Oy
+
+Created 1/8/1996 Heikki Tuuri
+***********************************************************************/
+
+
--- a/include/dict0types.h
+++ b/include/dict0types.h
@ -0,0 +1,28 @@
+/******************************************************
+Data dictionary global types
+
+(c) 1996 Innobase Oy
+
+Created 1/8/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef dict0types_h
+#define dict0types_h
+
+typedef struct dict_sys_struct		dict_sys_t;
+typedef struct dict_col_struct		dict_col_t;
+typedef struct dict_field_struct	dict_field_t;
+typedef struct dict_index_struct	dict_index_t;
+typedef struct dict_tree_struct		dict_tree_t;
+typedef struct dict_table_struct	dict_table_t;
+typedef struct dict_foreign_struct	dict_foreign_t;
+
+/* A cluster object is a table object with the type field set to
+DICT_CLUSTERED */
+
+typedef dict_table_t			dict_cluster_t;
+
+typedef struct ind_node_struct		ind_node_t;
+typedef struct tab_node_struct		tab_node_t;
+
+#endif
--- a/include/dyn0dyn.h
+++ b/include/dyn0dyn.h
@ -0,0 +1,166 @@
+/******************************************************
+The dynamically allocated array
+
+(c) 1996 Innobase Oy
+
+Created 2/5/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef dyn0dyn_h
+#define dyn0dyn_h
+
+#include "univ.i"
+#include "ut0lst.h"
+#include "mem0mem.h"
+
+typedef struct dyn_block_struct		dyn_block_t;
+typedef dyn_block_t			dyn_array_t;
+
+
+/* This is the initial 'payload' size of a dynamic array;
+this must be > MLOG_BUF_MARGIN + 30! */
+#define	DYN_ARRAY_DATA_SIZE	512
+
+/*************************************************************************
+Initializes a dynamic array. */
+UNIV_INLINE
+dyn_array_t*
+dyn_array_create(
+/*=============*/
+				/* out: initialized dyn array */
+	dyn_array_t*	arr);	/* in: pointer to a memory buffer of
+				size sizeof(dyn_array_t) */
+/****************************************************************
+Frees a dynamic array. */
+UNIV_INLINE
+void
+dyn_array_free(
+/*===========*/
+	dyn_array_t*	arr);	/* in: dyn array */
+/*************************************************************************
+Makes room on top of a dyn array and returns a pointer to a buffer in it.
+After copying the elements, the caller must close the buffer using
+dyn_array_close. */
+UNIV_INLINE
+byte*
+dyn_array_open(
+/*===========*/
+				/* out: pointer to the buffer */
+	dyn_array_t*	arr,	/* in: dynamic array */
+	ulint		size);	/* in: size in bytes of the buffer; MUST be
+				smaller than DYN_ARRAY_DATA_SIZE! */
+/*************************************************************************
+Closes the buffer returned by dyn_array_open. */
+UNIV_INLINE
+void
+dyn_array_close(
+/*============*/
+	dyn_array_t*	arr,	/* in: dynamic array */
+	byte*		ptr);	/* in: buffer space from ptr up was not used */
+/*************************************************************************
+Makes room on top of a dyn array and returns a pointer to
+the added element. The caller must copy the element to
+the pointer returned. */
+UNIV_INLINE
+void*
+dyn_array_push(
+/*===========*/
+				/* out: pointer to the element */
+	dyn_array_t*	arr,	/* in: dynamic array */
+	ulint		size);	/* in: size in bytes of the element */
+/****************************************************************
+Returns pointer to an element in dyn array. */
+UNIV_INLINE
+void*
+dyn_array_get_element(
+/*==================*/
+				/* out: pointer to element */
+	dyn_array_t*	arr,	/* in: dyn array */
+	ulint		pos);	/* in: position of element as bytes 
+				from array start */
+/****************************************************************
+Returns the size of stored data in a dyn array. */
+UNIV_INLINE
+ulint
+dyn_array_get_data_size(
+/*====================*/
+				/* out: data size in bytes */
+	dyn_array_t*	arr);	/* in: dyn array */
+/****************************************************************
+Gets the first block in a dyn array. */
+UNIV_INLINE
+dyn_block_t*
+dyn_array_get_first_block(
+/*======================*/
+	dyn_array_t*	arr);	/* in: dyn array */
+/****************************************************************
+Gets the last block in a dyn array. */
+UNIV_INLINE
+dyn_block_t*
+dyn_array_get_last_block(
+/*=====================*/
+	dyn_array_t*	arr);	/* in: dyn array */
+/************************************************************************
+Gets the next block in a dyn array. */
+UNIV_INLINE
+dyn_block_t*
+dyn_array_get_next_block(
+/*=====================*/
+				/* out: pointer to next, NULL if end of list */
+	dyn_array_t*	arr,	/* in: dyn array */
+	dyn_block_t*	block);	/* in: dyn array block */
+/************************************************************************
+Gets the number of used bytes in a dyn array block. */
+UNIV_INLINE
+ulint
+dyn_block_get_used(
+/*===============*/
+				/* out: number of bytes used */
+	dyn_block_t*	block);	/* in: dyn array block */
+/************************************************************************
+Gets pointer to the start of data in a dyn array block. */
+UNIV_INLINE
+byte*
+dyn_block_get_data(
+/*===============*/
+				/* out: pointer to data */
+	dyn_block_t*	block);	/* in: dyn array block */
+/************************************************************
+Pushes n bytes to a dyn array. */
+UNIV_INLINE
+void
+dyn_push_string(
+/*============*/
+	dyn_array_t*	arr,	/* in: dyn array */
+	const byte*	str,	/* in: string to write */
+	ulint		len);	/* in: string length */
+
+/*#################################################################*/
+
+/* NOTE! Do not use the fields of the struct directly: the definition
+appears here only for the compiler to know its size! */
+struct dyn_block_struct{
+	mem_heap_t*	heap;	/* in the first block this is != NULL 
+				if dynamic allocation has been needed */
+	ulint		used;	/* number of data bytes used in this block */
+	byte		data[DYN_ARRAY_DATA_SIZE];
+				/* storage for array elements */	
+	UT_LIST_BASE_NODE_T(dyn_block_t) base;
+				/* linear list of dyn blocks: this node is
+				used only in the first block */
+	UT_LIST_NODE_T(dyn_block_t) list;
+				/* linear list node: used in all blocks */
+#ifdef UNIV_DEBUG
+	ulint		buf_end;/* only in the debug version: if dyn array is
+				opened, this is the buffer end offset, else
+				this is 0 */
+	ulint		magic_n;
+#endif
+};
+
+
+#ifndef UNIV_NONINL
+#include "dyn0dyn.ic"
+#endif
+
+#endif 
--- a/include/dyn0dyn.ic
+++ b/include/dyn0dyn.ic
@ -0,0 +1,344 @@
+/******************************************************
+The dynamically allocated array
+
+(c) 1996 Innobase Oy
+
+Created 2/5/1996 Heikki Tuuri
+*******************************************************/
+
+#define DYN_BLOCK_MAGIC_N	375767
+#define DYN_BLOCK_FULL_FLAG	0x1000000UL
+
+/****************************************************************
+Adds a new block to a dyn array. */
+
+dyn_block_t*
+dyn_array_add_block(
+/*================*/
+				/* out: created block */
+	dyn_array_t*	arr);	/* in: dyn array */
+
+
+/****************************************************************
+Gets the first block in a dyn array. */
+UNIV_INLINE
+dyn_block_t*
+dyn_array_get_first_block(
+/*======================*/
+	dyn_array_t*	arr)	/* in: dyn array */
+{
+	return(arr);
+}
+
+/****************************************************************
+Gets the last block in a dyn array. */
+UNIV_INLINE
+dyn_block_t*
+dyn_array_get_last_block(
+/*=====================*/
+	dyn_array_t*	arr)	/* in: dyn array */
+{
+	if (arr->heap == NULL) {
+
+		return(arr);
+	}  
+
+	return(UT_LIST_GET_LAST(arr->base));
+}
+
+/************************************************************************
+Gets the next block in a dyn array. */
+UNIV_INLINE
+dyn_block_t*
+dyn_array_get_next_block(
+/*=====================*/
+				/* out: pointer to next, NULL if end of list */
+	dyn_array_t*	arr,	/* in: dyn array */
+	dyn_block_t*	block)	/* in: dyn array block */
+{
+	ut_ad(arr && block);
+	
+	if (arr->heap == NULL) {
+		ut_ad(arr == block);
+	
+		return(NULL);
+	}
+	
+	return(UT_LIST_GET_NEXT(list, block));
+}
+
+/************************************************************************
+Gets the number of used bytes in a dyn array block. */
+UNIV_INLINE
+ulint
+dyn_block_get_used(
+/*===============*/
+				/* out: number of bytes used */
+	dyn_block_t*	block)	/* in: dyn array block */
+{
+	ut_ad(block);
+
+	return((block->used) & ~DYN_BLOCK_FULL_FLAG);
+}
+
+/************************************************************************
+Gets pointer to the start of data in a dyn array block. */
+UNIV_INLINE
+byte*
+dyn_block_get_data(
+/*===============*/
+				/* out: pointer to data */
+	dyn_block_t*	block)	/* in: dyn array block */
+{
+	ut_ad(block);
+
+	return(block->data);
+}
+
+/*************************************************************************
+Initializes a dynamic array. */
+UNIV_INLINE
+dyn_array_t*
+dyn_array_create(
+/*=============*/
+				/* out: initialized dyn array */
+	dyn_array_t*	arr)	/* in: pointer to a memory buffer of
+				size sizeof(dyn_array_t) */
+{
+	ut_ad(arr);
+	ut_ad(DYN_ARRAY_DATA_SIZE < DYN_BLOCK_FULL_FLAG);
+
+	arr->heap = NULL;
+	arr->used = 0;
+
+#ifdef UNIV_DEBUG
+	arr->buf_end = 0;
+	arr->magic_n = DYN_BLOCK_MAGIC_N;
+#endif
+	return(arr);
+}
+
+/****************************************************************
+Frees a dynamic array. */
+UNIV_INLINE
+void
+dyn_array_free(
+/*===========*/
+	dyn_array_t*	arr)	/* in: dyn array */
+{
+	if (arr->heap != NULL) {
+		mem_heap_free(arr->heap);
+	}
+
+#ifdef UNIV_DEBUG
+	arr->magic_n = 0;
+#endif
+}
+
+/*************************************************************************
+Makes room on top of a dyn array and returns a pointer to the added element.
+The caller must copy the element to the pointer returned. */
+UNIV_INLINE
+void*
+dyn_array_push(
+/*===========*/
+				/* out: pointer to the element */
+	dyn_array_t*	arr,	/* in: dynamic array */
+	ulint		size)	/* in: size in bytes of the element */
+{
+	dyn_block_t*	block;
+	ulint		used;
+
+	ut_ad(arr);
+	ut_ad(arr->magic_n == DYN_BLOCK_MAGIC_N);
+	ut_ad(size <= DYN_ARRAY_DATA_SIZE);
+	ut_ad(size);
+	
+	block = arr;
+	used = block->used;
+
+	if (used + size > DYN_ARRAY_DATA_SIZE) {
+		/* Get the last array block */
+		
+		block = dyn_array_get_last_block(arr);
+		used = block->used;
+
+		if (used + size > DYN_ARRAY_DATA_SIZE) {
+			block = dyn_array_add_block(arr);
+			used = block->used;
+		}
+	}
+
+	block->used = used + size;
+	ut_ad(block->used <= DYN_ARRAY_DATA_SIZE);
+
+	return((block->data) + used);
+}
+
+/*************************************************************************
+Makes room on top of a dyn array and returns a pointer to a buffer in it.
+After copying the elements, the caller must close the buffer using
+dyn_array_close. */
+UNIV_INLINE
+byte*
+dyn_array_open(
+/*===========*/
+				/* out: pointer to the buffer */
+	dyn_array_t*	arr,	/* in: dynamic array */
+	ulint		size)	/* in: size in bytes of the buffer; MUST be
+				smaller than DYN_ARRAY_DATA_SIZE! */
+{
+	dyn_block_t*	block;
+	ulint		used;
+
+	ut_ad(arr);
+	ut_ad(arr->magic_n == DYN_BLOCK_MAGIC_N);
+	ut_ad(size <= DYN_ARRAY_DATA_SIZE);
+	ut_ad(size);
+	
+	block = arr;
+	used = block->used;
+
+	if (used + size > DYN_ARRAY_DATA_SIZE) {
+		/* Get the last array block */
+		
+		block = dyn_array_get_last_block(arr);
+		used = block->used;
+
+		if (used + size > DYN_ARRAY_DATA_SIZE) {
+			block = dyn_array_add_block(arr);
+			used = block->used;
+			ut_a(size <= DYN_ARRAY_DATA_SIZE);
+		}
+	}
+
+	ut_ad(block->used <= DYN_ARRAY_DATA_SIZE);
+#ifdef UNIV_DEBUG
+	ut_ad(arr->buf_end == 0);
+
+	arr->buf_end = used + size;
+#endif	
+	return((block->data) + used);
+}
+
+/*************************************************************************
+Closes the buffer returned by dyn_array_open. */
+UNIV_INLINE
+void
+dyn_array_close(
+/*============*/
+	dyn_array_t*	arr,	/* in: dynamic array */
+	byte*		ptr)	/* in: buffer space from ptr up was not used */
+{
+	dyn_block_t*	block;
+
+	ut_ad(arr);
+	ut_ad(arr->magic_n == DYN_BLOCK_MAGIC_N);
+	
+	block = dyn_array_get_last_block(arr);
+
+	ut_ad(arr->buf_end + block->data >= ptr);
+
+	block->used = ptr - block->data;
+	
+	ut_ad(block->used <= DYN_ARRAY_DATA_SIZE);
+
+#ifdef UNIV_DEBUG
+	arr->buf_end = 0;
+#endif
+}
+
+/****************************************************************
+Returns pointer to an element in dyn array. */
+UNIV_INLINE
+void*
+dyn_array_get_element(
+/*==================*/
+				/* out: pointer to element */
+	dyn_array_t*	arr,	/* in: dyn array */
+	ulint		pos)	/* in: position of element as bytes 
+				from array start */
+{
+	dyn_block_t*	block;
+	ulint		used;
+
+	ut_ad(arr);
+	ut_ad(arr->magic_n == DYN_BLOCK_MAGIC_N);
+
+	/* Get the first array block */
+	block = dyn_array_get_first_block(arr);
+	
+	if (arr->heap != NULL) {
+		used = dyn_block_get_used(block);
+
+		while (pos >= used) {
+			pos -= used;
+			block = UT_LIST_GET_NEXT(list, block);
+			ut_ad(block);
+
+			used = dyn_block_get_used(block);
+		}
+	}
+
+	ut_ad(block);
+	ut_ad(dyn_block_get_used(block) >= pos);
+	
+	return(block->data + pos);
+}
+
+/****************************************************************
+Returns the size of stored data in a dyn array. */
+UNIV_INLINE
+ulint
+dyn_array_get_data_size(
+/*====================*/
+				/* out: data size in bytes */
+	dyn_array_t*	arr)	/* in: dyn array */
+{
+	dyn_block_t*	block;
+	ulint		sum 	= 0;
+
+	ut_ad(arr);
+	ut_ad(arr->magic_n == DYN_BLOCK_MAGIC_N);
+
+	if (arr->heap == NULL) {
+
+		return(arr->used);
+	}
+	
+	/* Get the first array block */
+	block = dyn_array_get_first_block(arr);
+
+	while (block != NULL) {
+		sum += dyn_block_get_used(block);
+		block = dyn_array_get_next_block(arr, block);
+	}
+
+	return(sum);
+}
+
+/************************************************************
+Pushes n bytes to a dyn array. */
+UNIV_INLINE
+void
+dyn_push_string(
+/*============*/
+	dyn_array_t*	arr,	/* in: dyn array */
+	const byte*	str,	/* in: string to write */
+	ulint		len)	/* in: string length */
+{
+	ulint	n_copied;
+
+	while (len > 0) {
+		if (len > DYN_ARRAY_DATA_SIZE) {
+			n_copied = DYN_ARRAY_DATA_SIZE;
+		} else {
+			n_copied = len;
+		}			
+
+		memcpy(dyn_array_push(arr, n_copied), str, n_copied);
+		
+		str += n_copied;
+		len -= n_copied;
+	}
+}
--- a/include/eval0eval.h
+++ b/include/eval0eval.h
@ -0,0 +1,97 @@
+/******************************************************
+SQL evaluator: evaluates simple data structures, like expressions, in
+a query graph
+
+(c) 1997 Innobase Oy
+
+Created 12/29/1997 Heikki Tuuri
+*******************************************************/
+
+#ifndef eval0eval_h
+#define eval0eval_h
+
+#include "univ.i"
+#include "que0types.h"
+#include "pars0sym.h"
+#include "pars0pars.h"
+
+/*********************************************************************
+Free the buffer from global dynamic memory for a value of a que_node,
+if it has been allocated in the above function. The freeing for pushed
+column values is done in sel_col_prefetch_buf_free. */
+
+void
+eval_node_free_val_buf(
+/*===================*/
+	que_node_t*	node);	/* in: query graph node */
+/*********************************************************************
+Evaluates a symbol table symbol. */
+UNIV_INLINE
+void
+eval_sym(
+/*=====*/
+	sym_node_t*	sym_node);	/* in: symbol table node */
+/*********************************************************************
+Evaluates an expression. */
+UNIV_INLINE
+void
+eval_exp(
+/*=====*/
+	que_node_t*	exp_node);	/* in: expression */
+/*********************************************************************
+Sets an integer value as the value of an expression node. */
+UNIV_INLINE
+void
+eval_node_set_int_val(
+/*==================*/
+	que_node_t*	node,	/* in: expression node */
+	lint		val);	/* in: value to set */
+/*********************************************************************
+Gets an integer value from an expression node. */
+UNIV_INLINE
+lint
+eval_node_get_int_val(
+/*==================*/
+				/* out: integer value */
+	que_node_t*	node);	/* in: expression node */
+/*********************************************************************
+Copies a binary string value as the value of a query graph node. Allocates a
+new buffer if necessary. */
+UNIV_INLINE
+void
+eval_node_copy_and_alloc_val(
+/*=========================*/
+	que_node_t*	node,	/* in: query graph node */
+	byte*		str,	/* in: binary string */
+	ulint		len);	/* in: string length or UNIV_SQL_NULL */
+/*********************************************************************
+Copies a query node value to another node. */
+UNIV_INLINE
+void
+eval_node_copy_val(
+/*===============*/
+	que_node_t*	node1,	/* in: node to copy to */
+	que_node_t*	node2);	/* in: node to copy from */
+/*********************************************************************
+Gets a iboolean value from a query node. */
+UNIV_INLINE
+ibool
+eval_node_get_ibool_val(
+/*===================*/
+				/* out: iboolean value */
+	que_node_t*	node);	/* in: query graph node */
+/*********************************************************************
+Evaluates a comparison node. */
+
+ibool
+eval_cmp(
+/*=====*/
+					/* out: the result of the comparison */
+	func_node_t*	cmp_node);	/* in: comparison node */
+
+
+#ifndef UNIV_NONINL
+#include "eval0eval.ic"
+#endif
+
+#endif 
--- a/include/eval0eval.ic
+++ b/include/eval0eval.ic
@ -0,0 +1,234 @@
+/******************************************************
+SQL evaluator: evaluates simple data structures, like expressions, in
+a query graph
+
+(c) 1997 Innobase Oy
+
+Created 12/29/1997 Heikki Tuuri
+*******************************************************/
+
+#include "que0que.h"
+#include "rem0cmp.h"
+#include "pars0grm.h"
+
+/*********************************************************************
+Evaluates a function node. */
+
+void
+eval_func(
+/*======*/
+	func_node_t*	func_node);	/* in: function node */
+/*********************************************************************
+Allocate a buffer from global dynamic memory for a value of a que_node.
+NOTE that this memory must be explicitly freed when the query graph is
+freed. If the node already has allocated buffer, that buffer is freed
+here. NOTE that this is the only function where dynamic memory should be
+allocated for a query node val field. */
+
+byte*
+eval_node_alloc_val_buf(
+/*====================*/
+				/* out: pointer to allocated buffer */
+	que_node_t*	node,	/* in: query graph node; sets the val field
+				data field to point to the new buffer, and
+				len field equal to size */
+	ulint		size);	/* in: buffer size */
+
+
+/*********************************************************************
+Allocates a new buffer if needed. */
+UNIV_INLINE
+byte*
+eval_node_ensure_val_buf(
+/*=====================*/
+				/* out: pointer to buffer */
+	que_node_t*	node,	/* in: query graph node; sets the val field
+				data field to point to the new buffer, and
+				len field equal to size */
+	ulint		size)	/* in: buffer size */
+{
+	dfield_t*	dfield;
+	byte*		data;
+
+	dfield = que_node_get_val(node);
+	dfield_set_len(dfield, size);
+
+	data = dfield_get_data(dfield);
+	
+	if (!data || que_node_get_val_buf_size(node) < size) {
+
+		data = eval_node_alloc_val_buf(node, size);
+	}
+
+	return(data);
+}
+
+/*********************************************************************
+Evaluates a symbol table symbol. */
+UNIV_INLINE
+void
+eval_sym(
+/*=====*/
+	sym_node_t*	sym_node)	/* in: symbol table node */
+{
+
+	ut_ad(que_node_get_type(sym_node) == QUE_NODE_SYMBOL);
+
+	if (sym_node->indirection) {
+		/* The symbol table node is an alias for a variable or a
+		column */
+		
+		dfield_copy_data(que_node_get_val(sym_node),
+				   que_node_get_val(sym_node->indirection));
+	}
+}
+
+/*********************************************************************
+Evaluates an expression. */
+UNIV_INLINE
+void
+eval_exp(
+/*=====*/
+	que_node_t*	exp_node)	/* in: expression */
+{
+	if (que_node_get_type(exp_node) == QUE_NODE_SYMBOL) {
+
+		eval_sym((sym_node_t*)exp_node);
+
+		return;
+	}
+	
+	eval_func(exp_node);
+}
+
+/*********************************************************************
+Sets an integer value as the value of an expression node. */
+UNIV_INLINE
+void
+eval_node_set_int_val(
+/*==================*/
+	que_node_t*	node,	/* in: expression node */
+	lint		val)	/* in: value to set */
+{
+	dfield_t*	dfield;
+	byte*		data;
+
+	dfield = que_node_get_val(node);
+
+	data = dfield_get_data(dfield);
+	
+	if (data == NULL) {
+		data = eval_node_alloc_val_buf(node, 4);
+	}
+
+	ut_ad(dfield_get_len(dfield) == 4);
+	
+	mach_write_to_4(data, (ulint)val);
+}
+
+/*********************************************************************
+Gets an integer non-SQL null value from an expression node. */
+UNIV_INLINE
+lint
+eval_node_get_int_val(
+/*==================*/
+				/* out: integer value */
+	que_node_t*	node)	/* in: expression node */
+{
+	dfield_t*	dfield;
+
+	dfield = que_node_get_val(node);
+
+	ut_ad(dfield_get_len(dfield) == 4);
+
+	return((int)mach_read_from_4(dfield_get_data(dfield)));	
+}
+
+/*********************************************************************
+Gets a iboolean value from a query node. */
+UNIV_INLINE
+ibool
+eval_node_get_ibool_val(
+/*===================*/
+				/* out: iboolean value */
+	que_node_t*	node)	/* in: query graph node */
+{
+	dfield_t*	dfield;
+	byte*		data;
+
+	dfield = que_node_get_val(node);
+
+	data = dfield_get_data(dfield);
+	
+	ut_ad(data != NULL);
+
+	return(mach_read_from_1(data));
+}
+
+/*********************************************************************
+Sets a iboolean value as the value of a function node. */
+UNIV_INLINE
+void
+eval_node_set_ibool_val(
+/*===================*/
+	func_node_t*	func_node,	/* in: function node */
+	ibool		val)		/* in: value to set */
+{
+	dfield_t*	dfield;
+	byte*		data;
+
+	dfield = que_node_get_val(func_node);
+
+	data = dfield_get_data(dfield);
+	
+	if (data == NULL) {
+		/* Allocate 1 byte to hold the value */
+
+		data = eval_node_alloc_val_buf(func_node, 1);
+	}
+
+	ut_ad(dfield_get_len(dfield) == 1);
+	
+	mach_write_to_1(data, val);
+}
+
+/*********************************************************************
+Copies a binary string value as the value of a query graph node. Allocates a
+new buffer if necessary. */
+UNIV_INLINE
+void
+eval_node_copy_and_alloc_val(
+/*=========================*/
+	que_node_t*	node,	/* in: query graph node */
+	byte*		str,	/* in: binary string */
+	ulint		len)	/* in: string length or UNIV_SQL_NULL */
+{
+	byte*		data;
+	
+	if (len == UNIV_SQL_NULL) {
+		dfield_set_len(que_node_get_val(node), len);
+
+		return;
+	}
+
+	data = eval_node_ensure_val_buf(node, len);
+	
+	ut_memcpy(data, str, len);
+}
+
+/*********************************************************************
+Copies a query node value to another node. */
+UNIV_INLINE
+void
+eval_node_copy_val(
+/*===============*/
+	que_node_t*	node1,	/* in: node to copy to */
+	que_node_t*	node2)	/* in: node to copy from */
+{
+	dfield_t*	dfield2;
+	
+	dfield2 = que_node_get_val(node2);
+
+	eval_node_copy_and_alloc_val(node1, dfield_get_data(dfield2),
+						dfield_get_len(dfield2));
+}
--- a/include/eval0proc.h
+++ b/include/eval0proc.h
@ -0,0 +1,79 @@
+/******************************************************
+Executes SQL stored procedures and their control structures
+
+(c) 1998 Innobase Oy
+
+Created 1/20/1998 Heikki Tuuri
+*******************************************************/
+
+#ifndef eval0proc_h
+#define eval0proc_h
+
+#include "univ.i"
+#include "que0types.h"
+#include "pars0sym.h"
+#include "pars0pars.h"
+
+/**************************************************************************
+Performs an execution step of a procedure node. */
+UNIV_INLINE
+que_thr_t*
+proc_step(
+/*======*/
+				/* out: query thread to run next or NULL */
+	que_thr_t*	thr);	/* in: query thread */
+/**************************************************************************
+Performs an execution step of an if-statement node. */
+
+que_thr_t*
+if_step(
+/*====*/
+				/* out: query thread to run next or NULL */
+	que_thr_t*	thr);	/* in: query thread */
+/**************************************************************************
+Performs an execution step of a while-statement node. */
+
+que_thr_t*
+while_step(
+/*=======*/
+				/* out: query thread to run next or NULL */
+	que_thr_t*	thr);	/* in: query thread */
+/**************************************************************************
+Performs an execution step of a for-loop node. */
+
+que_thr_t*
+for_step(
+/*=====*/
+				/* out: query thread to run next or NULL */
+	que_thr_t*	thr);	/* in: query thread */
+/**************************************************************************
+Performs an execution step of an assignment statement node. */
+
+que_thr_t*
+assign_step(
+/*========*/
+				/* out: query thread to run next or NULL */
+	que_thr_t*	thr);	/* in: query thread */
+/**************************************************************************
+Performs an execution step of a procedure call node. */
+UNIV_INLINE
+que_thr_t*
+proc_eval_step(
+/*===========*/
+				/* out: query thread to run next or NULL */
+	que_thr_t*	thr);	/* in: query thread */
+/**************************************************************************
+Performs an execution step of a return-statement node. */
+
+que_thr_t*
+return_step(
+/*========*/
+				/* out: query thread to run next or NULL */
+	que_thr_t*	thr);	/* in: query thread */
+
+
+#ifndef UNIV_NONINL
+#include "eval0proc.ic"
+#endif
+
+#endif 
--- a/include/eval0proc.ic
+++ b/include/eval0proc.ic
@ -0,0 +1,71 @@
+/******************************************************
+Executes SQL stored procedures and their control structures
+
+(c) 1998 Innobase Oy
+
+Created 1/20/1998 Heikki Tuuri
+*******************************************************/
+
+#include "pars0pars.h"
+#include "que0que.h"
+#include "eval0eval.h"
+
+/**************************************************************************
+Performs an execution step of a procedure node. */
+UNIV_INLINE
+que_thr_t*
+proc_step(
+/*======*/
+				/* out: query thread to run next or NULL */
+	que_thr_t*	thr)	/* in: query thread */
+{
+	proc_node_t*	node;
+
+	ut_ad(thr);
+	
+	node = thr->run_node;
+	ut_ad(que_node_get_type(node) == QUE_NODE_PROC);
+
+	if (thr->prev_node == que_node_get_parent(node)) {
+		/* Start execution from the first statement in the statement
+		list */
+		
+		thr->run_node = node->stat_list;
+	} else {
+		/* Move to the next statement */
+		ut_ad(que_node_get_next(thr->prev_node) == NULL);
+		
+		thr->run_node = NULL;
+	}
+
+	if (thr->run_node == NULL) {
+		thr->run_node = que_node_get_parent(node);
+	}
+
+	return(thr);
+} 
+
+/**************************************************************************
+Performs an execution step of a procedure call node. */
+UNIV_INLINE
+que_thr_t*
+proc_eval_step(
+/*===========*/
+				/* out: query thread to run next or NULL */
+	que_thr_t*	thr)	/* in: query thread */
+{
+	func_node_t*	node;
+
+	ut_ad(thr);
+	
+	node = thr->run_node;
+	ut_ad(que_node_get_type(node) == QUE_NODE_FUNC);
+
+	/* Evaluate the procedure */
+
+	eval_exp(node);
+	
+	thr->run_node = que_node_get_parent(node);
+
+	return(thr);
+} 
--- a/include/fil0fil.h
+++ b/include/fil0fil.h
@ -0,0 +1,683 @@
+/******************************************************
+The low-level file system
+
+(c) 1995 Innobase Oy
+
+Created 10/25/1995 Heikki Tuuri
+*******************************************************/
+
+#ifndef fil0fil_h
+#define fil0fil_h
+
+#include "univ.i"
+#include "sync0rw.h"
+#include "dict0types.h"
+#include "ibuf0types.h"
+#include "ut0byte.h"
+#include "os0file.h"
+
+/* When mysqld is run, the default directory "." is the mysqld datadir, but in
+ibbackup we must set it explicitly; the patgh must NOT contain the trailing
+'/' or '\' */
+extern const char*	fil_path_to_mysql_datadir;
+
+/* Initial size of a single-table tablespace in pages */
+#define FIL_IBD_FILE_INITIAL_SIZE	4
+
+/* 'null' (undefined) page offset in the context of file spaces */
+#define	FIL_NULL	ULINT32_UNDEFINED
+
+/* Space address data type; this is intended to be used when
+addresses accurate to a byte are stored in file pages. If the page part
+of the address is FIL_NULL, the address is considered undefined. */
+
+typedef	byte	fil_faddr_t;	/* 'type' definition in C: an address
+				stored in a file page is a string of bytes */
+#define FIL_ADDR_PAGE	0	/* first in address is the page offset */
+#define	FIL_ADDR_BYTE	4	/* then comes 2-byte byte offset within page*/
+
+#define	FIL_ADDR_SIZE	6	/* address size is 6 bytes */
+
+/* A struct for storing a space address FIL_ADDR, when it is used
+in C program data structures. */
+
+typedef struct fil_addr_struct	fil_addr_t;
+struct fil_addr_struct{
+	ulint	page;		/* page number within a space */
+	ulint	boffset;	/* byte offset within the page */
+};
+
+/* Null file address */
+extern fil_addr_t	fil_addr_null;
+
+/* The byte offsets on a file page for various variables */
+#define FIL_PAGE_SPACE_OR_CHKSUM 0	/* in < MySQL-4.0.14 space id the
+					page belongs to (== 0) but in later
+					versions the 'new' checksum of the
+					page */
+#define FIL_PAGE_OFFSET		4	/* page offset inside space */
+#define FIL_PAGE_PREV		8	/* if there is a 'natural' predecessor
+					of the page, its offset */
+#define FIL_PAGE_NEXT		12	/* if there is a 'natural' successor
+					of the page, its offset */
+#define FIL_PAGE_LSN		16	/* lsn of the end of the newest
+					modification log record to the page */
+#define	FIL_PAGE_TYPE		24	/* file page type: FIL_PAGE_INDEX,...,
+					2 bytes */
+#define FIL_PAGE_FILE_FLUSH_LSN	26	/* this is only defined for the
+					first page in a data file: the file
+					has been flushed to disk at least up
+					to this lsn */
+#define FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID  34 /* starting from 4.1.x this
+					contains the space id of the page */
+#define FIL_PAGE_DATA		38	/* start of the data on the page */
+
+/* File page trailer */
+#define FIL_PAGE_END_LSN_OLD_CHKSUM 8	/* the low 4 bytes of this are used
+					to store the page checksum, the
+					last 4 bytes should be identical
+					to the last 4 bytes of FIL_PAGE_LSN */
+#define FIL_PAGE_DATA_END	8
+
+/* File page types */
+#define FIL_PAGE_INDEX		17855
+#define FIL_PAGE_UNDO_LOG	2
+#define FIL_PAGE_INODE		3
+#define FIL_PAGE_IBUF_FREE_LIST	4
+
+/* Space types */
+#define FIL_TABLESPACE 		501
+#define FIL_LOG			502
+
+extern ulint	fil_n_log_flushes;
+
+extern ulint	fil_n_pending_log_flushes;
+extern ulint	fil_n_pending_tablespace_flushes;
+
+
+/***********************************************************************
+Returns the version number of a tablespace, -1 if not found. */
+
+ib_longlong
+fil_space_get_version(
+/*==================*/
+			/* out: version number, -1 if the tablespace does not
+			exist in the memory cache */
+	ulint	id);	/* in: space id */
+/***********************************************************************
+Returns the latch of a file space. */
+
+rw_lock_t*
+fil_space_get_latch(
+/*================*/
+			/* out: latch protecting storage allocation */
+	ulint	id);	/* in: space id */
+/***********************************************************************
+Returns the type of a file space. */
+
+ulint
+fil_space_get_type(
+/*===============*/
+			/* out: FIL_TABLESPACE or FIL_LOG */
+	ulint	id);	/* in: space id */
+/***********************************************************************
+Returns the ibuf data of a file space. */
+
+ibuf_data_t*
+fil_space_get_ibuf_data(
+/*====================*/
+			/* out: ibuf data for this space */
+	ulint	id);	/* in: space id */
+/***********************************************************************
+Appends a new file to the chain of files of a space. File must be closed. */
+
+void
+fil_node_create(
+/*============*/
+	const char*	name,	/* in: file name (file must be closed) */
+	ulint		size,	/* in: file size in database blocks, rounded
+				downwards to an integer */
+	ulint		id,	/* in: space id where to append */
+	ibool		is_raw);/* in: TRUE if a raw device or
+				a raw disk partition */
+/********************************************************************
+Drops files from the start of a file space, so that its size is cut by
+the amount given. */
+
+void
+fil_space_truncate_start(
+/*=====================*/
+	ulint	id,		/* in: space id */
+	ulint	trunc_len);	/* in: truncate by this much; it is an error
+				if this does not equal to the combined size of
+				some initial files in the space */
+/***********************************************************************
+Creates a space memory object and puts it to the 'fil system' hash table. If
+there is an error, prints an error message to the .err log. */
+
+ibool
+fil_space_create(
+/*=============*/
+				/* out: TRUE if success */
+	const char*	name,	/* in: space name */
+	ulint		id,	/* in: space id */
+	ulint		purpose);/* in: FIL_TABLESPACE, or FIL_LOG if log */
+/***********************************************************************
+Frees a space object from a the tablespace memory cache. Closes the files in
+the chain but does not delete them. */
+
+ibool
+fil_space_free(
+/*===========*/
+			/* out: TRUE if success */
+	ulint	id);	/* in: space id */
+/***********************************************************************
+Returns the size of the space in pages. The tablespace must be cached in the
+memory cache. */
+
+ulint
+fil_space_get_size(
+/*===============*/
+			/* out: space size, 0 if space not found */
+	ulint	id);	/* in: space id */
+/***********************************************************************
+Checks if the pair space, page_no refers to an existing page in a tablespace
+file space. The tablespace must be cached in the memory cache. */
+
+ibool
+fil_check_adress_in_tablespace(
+/*===========================*/
+			/* out: TRUE if the address is meaningful */
+	ulint	id,	/* in: space id */
+	ulint	page_no);/* in: page number */
+/********************************************************************
+Initializes the tablespace memory cache. */
+
+void
+fil_init(
+/*=====*/
+	ulint	max_n_open);	/* in: max number of open files */
+/***********************************************************************
+Opens all log files and system tablespace data files. They stay open until the
+database server shutdown. This should be called at a server startup after the
+space objects for the log and the system tablespace have been created. The
+purpose of this operation is to make sure we never run out of file descriptors
+if we need to read from the insert buffer or to write to the log. */
+
+void
+fil_open_log_and_system_tablespace_files(void);
+/*==========================================*/
+/***********************************************************************
+Closes all open files. There must not be any pending i/o's or not flushed
+modifications in the files. */
+
+void
+fil_close_all_files(void);
+/*=====================*/
+/***********************************************************************
+Sets the max tablespace id counter if the given number is bigger than the
+previous value. */
+
+void
+fil_set_max_space_id_if_bigger(
+/*===========================*/
+	ulint	max_id);/* in: maximum known id */
+/********************************************************************
+Initializes the ibuf data structure for space 0 == the system tablespace.
+This can be called after the file space headers have been created and the
+dictionary system has been initialized. */
+
+void
+fil_ibuf_init_at_db_start(void);
+/*===========================*/
+/********************************************************************
+Writes the flushed lsn and the latest archived log number to the page
+header of the first page of each data file in the system tablespace. */
+
+ulint
+fil_write_flushed_lsn_to_data_files(
+/*================================*/
+				/* out: DB_SUCCESS or error number */
+	dulint	lsn,		/* in: lsn to write */
+	ulint	arch_log_no);	/* in: latest archived log file number */
+/***********************************************************************
+Reads the flushed lsn and arch no fields from a data file at database
+startup. */
+
+void
+fil_read_flushed_lsn_and_arch_log_no(
+/*=================================*/
+	os_file_t data_file,		/* in: open data file */
+	ibool	one_read_already,	/* in: TRUE if min and max parameters
+					below already contain sensible data */
+#ifdef UNIV_LOG_ARCHIVE
+	ulint*	min_arch_log_no,	/* in/out: */
+	ulint*	max_arch_log_no,	/* in/out: */
+#endif /* UNIV_LOG_ARCHIVE */
+	dulint*	min_flushed_lsn,	/* in/out: */
+	dulint*	max_flushed_lsn);	/* in/out: */
+/***********************************************************************
+Increments the count of pending insert buffer page merges, if space is not
+being deleted. */
+
+ibool
+fil_inc_pending_ibuf_merges(
+/*========================*/
+			/* out: TRUE if being deleted, and ibuf merges should
+			be skipped */
+	ulint	id);	/* in: space id */
+/***********************************************************************
+Decrements the count of pending insert buffer page merges. */
+
+void
+fil_decr_pending_ibuf_merges(
+/*========================*/
+	ulint	id);	/* in: space id */
+/***********************************************************************
+Parses the body of a log record written about an .ibd file operation. That is,
+the log record part after the standard (type, space id, page no) header of the
+log record.
+
+If desired, also replays the delete or rename operation if the .ibd file
+exists and the space id in it matches. Replays the create operation if a file
+at that path does not exist yet. If the database directory for the file to be
+created does not exist, then we create the directory, too.
+
+Note that ibbackup --apply-log sets fil_path_to_mysql_datadir to point to the
+datadir that we should use in replaying the file operations. */
+
+byte*
+fil_op_log_parse_or_replay(
+/*=======================*/
+                        	/* out: end of log record, or NULL if the
+				record was not completely contained between
+				ptr and end_ptr */
+        byte*   ptr,    	/* in: buffer containing the log record body,
+				or an initial segment of it, if the record does
+				not fir completely between ptr and end_ptr */
+        byte*   end_ptr,	/* in: buffer end */
+	ulint	type,		/* in: the type of this log record */
+	ibool	do_replay,	/* in: TRUE if we want to replay the
+				operation, and not just parse the log record */
+	ulint	space_id);	/* in: if do_replay is TRUE, the space id of
+				the tablespace in question; otherwise
+				ignored */
+/***********************************************************************
+Deletes a single-table tablespace. The tablespace must be cached in the
+memory cache. */
+
+ibool
+fil_delete_tablespace(
+/*==================*/
+			/* out: TRUE if success */
+	ulint	id);	/* in: space id */
+/***********************************************************************
+Discards a single-table tablespace. The tablespace must be cached in the
+memory cache. Discarding is like deleting a tablespace, but
+1) we do not drop the table from the data dictionary;
+2) we remove all insert buffer entries for the tablespace immediately; in DROP
+TABLE they are only removed gradually in the background;
+3) when the user does IMPORT TABLESPACE, the tablespace will have the same id
+as it originally had. */
+
+ibool
+fil_discard_tablespace(
+/*===================*/
+			/* out: TRUE if success */
+	ulint	id);	/* in: space id */
+/***********************************************************************
+Renames a single-table tablespace. The tablespace must be cached in the
+tablespace memory cache. */
+
+ibool
+fil_rename_tablespace(
+/*==================*/
+					/* out: TRUE if success */
+	const char*	old_name,	/* in: old table name in the standard
+					databasename/tablename format of
+					InnoDB, or NULL if we do the rename
+					based on the space id only */
+	ulint		id,		/* in: space id */
+	const char*	new_name);	/* in: new table name in the standard
+					databasename/tablename format
+					of InnoDB */
+
+/***********************************************************************
+Creates a new single-table tablespace to a database directory of MySQL.
+Database directories are under the 'datadir' of MySQL. The datadir is the
+directory of a running mysqld program. We can refer to it by simply the
+path '.'. Tables created with CREATE TEMPORARY TABLE we place in the temp
+dir of the mysqld server. */
+
+ulint
+fil_create_new_single_table_tablespace(
+/*===================================*/
+					/* out: DB_SUCCESS or error code */
+	ulint*		space_id,	/* in/out: space id; if this is != 0,
+					then this is an input parameter,
+					otherwise output */
+	const char*	tablename,	/* in: the table name in the usual
+					databasename/tablename format
+					of InnoDB, or a dir path to a temp
+					table */
+	ibool		is_temp,	/* in: TRUE if a table created with
+					CREATE TEMPORARY TABLE */
+	ulint		size);		/* in: the initial size of the
+					tablespace file in pages,
+					must be >= FIL_IBD_FILE_INITIAL_SIZE */
+/************************************************************************
+Tries to open a single-table tablespace and optionally checks the space id is
+right in it. If does not succeed, prints an error message to the .err log. This
+function is used to open a tablespace when we start up mysqld, and also in
+IMPORT TABLESPACE.
+NOTE that we assume this operation is used either at the database startup
+or under the protection of the dictionary mutex, so that two users cannot
+race here. This operation does not leave the file associated with the
+tablespace open, but closes it after we have looked at the space id in it. */
+
+ibool
+fil_open_single_table_tablespace(
+/*=============================*/
+					/* out: TRUE if success */
+	ibool		check_space_id,	/* in: should we check that the space
+					id in the file is right; we assume
+					that this function runs much faster
+					if no check is made, since accessing
+					the file inode probably is much
+					faster (the OS caches them) than
+					accessing the first page of the file */
+	ulint		id,		/* in: space id */
+	const char*	name);		/* in: table name in the
+					databasename/tablename format */
+/************************************************************************
+It is possible, though very improbable, that the lsn's in the tablespace to be
+imported have risen above the current system lsn, if a lengthy purge, ibuf
+merge, or rollback was performed on a backup taken with ibbackup. If that is
+the case, reset page lsn's in the file. We assume that mysqld was shut down
+after it performed these cleanup operations on the .ibd file, so that it at
+the shutdown stamped the latest lsn to the FIL_PAGE_FILE_FLUSH_LSN in the
+first page of the .ibd file, and we can determine whether we need to reset the
+lsn's just by looking at that flush lsn. */
+
+ibool
+fil_reset_too_high_lsns(
+/*====================*/
+					/* out: TRUE if success */
+	const char*	name,		/* in: table name in the
+					databasename/tablename format */
+	dulint		current_lsn);	/* in: reset lsn's if the lsn stamped
+					to FIL_PAGE_FILE_FLUSH_LSN in the
+					first page is too high */
+/************************************************************************
+At the server startup, if we need crash recovery, scans the database
+directories under the MySQL datadir, looking for .ibd files. Those files are
+single-table tablespaces. We need to know the space id in each of them so that
+we know into which file we should look to check the contents of a page stored
+in the doublewrite buffer, also to know where to apply log records where the
+space id is != 0. */
+
+ulint
+fil_load_single_table_tablespaces(void);
+/*===================================*/
+			/* out: DB_SUCCESS or error number */
+/************************************************************************
+If we need crash recovery, and we have called
+fil_load_single_table_tablespaces() and dict_load_single_table_tablespaces(),
+we can call this function to print an error message of orphaned .ibd files
+for which there is not a data dictionary entry with a matching table name
+and space id. */
+
+void
+fil_print_orphaned_tablespaces(void);
+/*================================*/
+/***********************************************************************
+Returns TRUE if a single-table tablespace does not exist in the memory cache,
+or is being deleted there. */
+
+ibool
+fil_tablespace_deleted_or_being_deleted_in_mem(
+/*===========================================*/
+				/* out: TRUE if does not exist or is being\
+				deleted */
+	ulint		id,	/* in: space id */
+	ib_longlong	version);/* in: tablespace_version should be this; if
+				you pass -1 as the value of this, then this
+				parameter is ignored */
+/***********************************************************************
+Returns TRUE if a single-table tablespace exists in the memory cache. */
+
+ibool
+fil_tablespace_exists_in_mem(
+/*=========================*/
+			/* out: TRUE if exists */
+	ulint	id);	/* in: space id */
+/***********************************************************************
+Returns TRUE if a matching tablespace exists in the InnoDB tablespace memory
+cache. Note that if we have not done a crash recovery at the database startup,
+there may be many tablespaces which are not yet in the memory cache. */
+
+ibool
+fil_space_for_table_exists_in_mem(
+/*==============================*/
+					/* out: TRUE if a matching tablespace
+					exists in the memory cache */
+	ulint		id,		/* in: space id */
+	const char*	name,		/* in: table name in the standard
+					'databasename/tablename' format or
+					the dir path to a temp table */
+	ibool		is_temp,	/* in: TRUE if created with CREATE
+					TEMPORARY TABLE */
+	ibool		mark_space,	/* in: in crash recovery, at database
+					startup we mark all spaces which have
+					an associated table in the InnoDB
+					data dictionary, so that
+					we can print a warning about orphaned
+					tablespaces */
+	ibool		print_error_if_does_not_exist);
+					/* in: print detailed error
+					information to the .err log if a
+					matching tablespace is not found from
+					memory */
+/**************************************************************************
+Tries to extend a data file so that it would accommodate the number of pages
+given. The tablespace must be cached in the memory cache. If the space is big
+enough already, does nothing. */
+
+ibool
+fil_extend_space_to_desired_size(
+/*=============================*/
+				/* out: TRUE if success */
+	ulint*	actual_size,	/* out: size of the space after extension;
+				if we ran out of disk space this may be lower
+				than the desired size */
+	ulint	space_id,	/* in: space id */
+	ulint	size_after_extend);/* in: desired size in pages after the
+				extension; if the current space size is bigger
+				than this already, the function does nothing */
+#ifdef UNIV_HOTBACKUP
+/************************************************************************
+Extends all tablespaces to the size stored in the space header. During the
+ibbackup --apply-log phase we extended the spaces on-demand so that log records
+could be appllied, but that may have left spaces still too small compared to
+the size stored in the space header. */
+
+void
+fil_extend_tablespaces_to_stored_len(void);
+/*======================================*/
+#endif
+/***********************************************************************
+Tries to reserve free extents in a file space. */
+
+ibool
+fil_space_reserve_free_extents(
+/*===========================*/
+				/* out: TRUE if succeed */
+	ulint	id,		/* in: space id */
+	ulint	n_free_now,	/* in: number of free extents now */
+	ulint	n_to_reserve);	/* in: how many one wants to reserve */
+/***********************************************************************
+Releases free extents in a file space. */
+
+void
+fil_space_release_free_extents(
+/*===========================*/
+	ulint	id,		/* in: space id */
+	ulint	n_reserved);	/* in: how many one reserved */
+/***********************************************************************
+Gets the number of reserved extents. If the database is silent, this number
+should be zero. */
+
+ulint
+fil_space_get_n_reserved_extents(
+/*=============================*/
+	ulint	id);		/* in: space id */
+/************************************************************************
+Reads or writes data. This operation is asynchronous (aio). */
+
+ulint
+fil_io(
+/*===*/
+				/* out: DB_SUCCESS, or DB_TABLESPACE_DELETED
+				if we are trying to do i/o on a tablespace
+				which does not exist */
+	ulint	type,		/* in: OS_FILE_READ or OS_FILE_WRITE,
+				ORed to OS_FILE_LOG, if a log i/o
+				and ORed to OS_AIO_SIMULATED_WAKE_LATER
+				if simulated aio and we want to post a
+				batch of i/os; NOTE that a simulated batch
+				may introduce hidden chances of deadlocks,
+				because i/os are not actually handled until
+				all have been posted: use with great
+				caution! */
+	ibool	sync,		/* in: TRUE if synchronous aio is desired */
+	ulint	space_id,	/* in: space id */
+	ulint	block_offset,	/* in: offset in number of blocks */
+	ulint	byte_offset,	/* in: remainder of offset in bytes; in
+				aio this must be divisible by the OS block
+				size */
+	ulint	len,		/* in: how many bytes to read or write; this
+				must not cross a file boundary; in aio this
+				must be a block size multiple */
+	void*	buf,		/* in/out: buffer where to store read data
+				or from where to write; in aio this must be
+				appropriately aligned */
+	void*	message);	/* in: message for aio handler if non-sync
+				aio used, else ignored */
+/************************************************************************
+Reads data from a space to a buffer. Remember that the possible incomplete
+blocks at the end of file are ignored: they are not taken into account when
+calculating the byte offset within a space. */
+
+ulint
+fil_read(
+/*=====*/
+				/* out: DB_SUCCESS, or DB_TABLESPACE_DELETED
+				if we are trying to do i/o on a tablespace
+				which does not exist */
+	ibool	sync,		/* in: TRUE if synchronous aio is desired */
+	ulint	space_id,	/* in: space id */
+	ulint	block_offset,	/* in: offset in number of blocks */
+	ulint	byte_offset,	/* in: remainder of offset in bytes; in aio
+				this must be divisible by the OS block size */
+	ulint	len,		/* in: how many bytes to read; this must not
+				cross a file boundary; in aio this must be a
+				block size multiple */
+	void*	buf,		/* in/out: buffer where to store data read;
+				in aio this must be appropriately aligned */
+	void*	message);	/* in: message for aio handler if non-sync
+				aio used, else ignored */
+/************************************************************************
+Writes data to a space from a buffer. Remember that the possible incomplete
+blocks at the end of file are ignored: they are not taken into account when
+calculating the byte offset within a space. */
+
+ulint
+fil_write(
+/*======*/
+				/* out: DB_SUCCESS, or DB_TABLESPACE_DELETED
+				if we are trying to do i/o on a tablespace
+				which does not exist */
+	ibool	sync,		/* in: TRUE if synchronous aio is desired */
+	ulint	space_id,	/* in: space id */
+	ulint	block_offset,	/* in: offset in number of blocks */
+	ulint	byte_offset,	/* in: remainder of offset in bytes; in aio
+				this must be divisible by the OS block size */
+	ulint	len,		/* in: how many bytes to write; this must
+				not cross a file boundary; in aio this must
+				be a block size multiple */
+	void*	buf,		/* in: buffer from which to write; in aio
+				this must be appropriately aligned */
+	void*	message);	/* in: message for aio handler if non-sync
+				aio used, else ignored */
+/**************************************************************************
+Waits for an aio operation to complete. This function is used to write the
+handler for completed requests. The aio array of pending requests is divided
+into segments (see os0file.c for more info). The thread specifies which
+segment it wants to wait for. */
+
+void
+fil_aio_wait(
+/*=========*/
+	ulint	segment);	/* in: the number of the segment in the aio
+				array to wait for */ 
+/**************************************************************************
+Flushes to disk possible writes cached by the OS. If the space does not exist
+or is being dropped, does not do anything. */
+
+void
+fil_flush(
+/*======*/
+	ulint	space_id);	/* in: file space id (this can be a group of
+				log files or a tablespace of the database) */
+/**************************************************************************
+Flushes to disk writes in file spaces of the given type possibly cached by
+the OS. */
+
+void
+fil_flush_file_spaces(
+/*==================*/
+	ulint	purpose);	/* in: FIL_TABLESPACE, FIL_LOG */
+/**********************************************************************
+Checks the consistency of the tablespace cache. */
+
+ibool
+fil_validate(void);
+/*==============*/
+			/* out: TRUE if ok */
+/************************************************************************
+Returns TRUE if file address is undefined. */
+
+ibool
+fil_addr_is_null(
+/*=============*/
+				/* out: TRUE if undefined */
+	fil_addr_t	addr);	/* in: address */
+/************************************************************************
+Accessor functions for a file page */
+
+ulint
+fil_page_get_prev(byte*	page);
+ulint
+fil_page_get_next(byte*	page);
+/*************************************************************************
+Sets the file page type. */
+
+void
+fil_page_set_type(
+/*==============*/
+	byte* 	page,	/* in: file page */
+	ulint	type);	/* in: type */
+/*************************************************************************
+Gets the file page type. */
+
+ulint
+fil_page_get_type(
+/*==============*/
+			/* out: type; NOTE that if the type has not been
+			written to page, the return value not defined */
+	byte* 	page);	/* in: file page */
+
+
+typedef	struct fil_space_struct	fil_space_t;
+
+#endif
--- a/include/fsp0fsp.h
+++ b/include/fsp0fsp.h
@ -0,0 +1,387 @@
+/******************************************************
+File space management
+
+(c) 1995 Innobase Oy
+
+Created 12/18/1995 Heikki Tuuri
+*******************************************************/
+
+#ifndef fsp0fsp_h
+#define fsp0fsp_h
+
+#include "univ.i"
+
+#include "mtr0mtr.h"
+#include "fut0lst.h"
+#include "ut0byte.h"
+#include "page0types.h"
+
+/* If records are inserted in order, there are the following
+flags to tell this (their type is made byte for the compiler
+to warn if direction and hint parameters are switched in
+fseg_alloc_free_page): */
+#define	FSP_UP		((byte)111)	/* alphabetically upwards */
+#define	FSP_DOWN	((byte)112)	/* alphabetically downwards */
+#define	FSP_NO_DIR	((byte)113)	/* no order */
+
+/* File space extent size in pages */
+#define	FSP_EXTENT_SIZE		64
+
+/* On a page of any file segment, data may be put starting from this offset: */
+#define FSEG_PAGE_DATA		FIL_PAGE_DATA
+
+/* File segment header which points to the inode describing the file segment */
+typedef	byte	fseg_header_t;
+
+#define FSEG_HDR_SPACE		0	/* space id of the inode */
+#define FSEG_HDR_PAGE_NO	4	/* page number of the inode */
+#define FSEG_HDR_OFFSET		8	/* byte offset of the inode */
+
+#define FSEG_HEADER_SIZE	10
+
+/**************************************************************************
+Initializes the file space system. */
+
+void
+fsp_init(void);
+/*==========*/
+/**************************************************************************
+Gets the current free limit of a tablespace. The free limit means the
+place of the first page which has never been put to the the free list
+for allocation. The space above that address is initialized to zero.
+Sets also the global variable log_fsp_current_free_limit. */
+
+ulint
+fsp_header_get_free_limit(
+/*======================*/
+			/* out: free limit in megabytes */
+	ulint	space);	/* in: space id, must be 0 */
+/**************************************************************************
+Gets the size of the tablespace from the tablespace header. If we do not
+have an auto-extending data file, this should be equal to the size of the
+data files. If there is an auto-extending data file, this can be smaller. */
+
+ulint
+fsp_header_get_tablespace_size(
+/*===========================*/
+			/* out: size in pages */
+	ulint	space);	/* in: space id, must be 0 */
+/**************************************************************************
+Reads the file space size stored in the header page. */
+
+ulint
+fsp_get_size_low(
+/*=============*/
+			/* out: tablespace size stored in the space header */
+	page_t*	page);	/* in: header page (page 0 in the tablespace) */
+/**************************************************************************
+Reads the space id from the first page of a tablespace. */
+
+ulint
+fsp_header_get_space_id(
+/*====================*/
+                        /* out: space id, ULINT UNDEFINED if error */
+        page_t* page);   /* in: first page of a tablespace */
+/**************************************************************************
+Writes the space id to a tablespace header. This function is used past the
+buffer pool when we in fil0fil.c create a new single-table tablespace. */
+
+void
+fsp_header_write_space_id(
+/*======================*/
+	page_t*	page,		/* in: first page in the space */
+	ulint	space_id);	/* in: space id */
+/**************************************************************************
+Initializes the space header of a new created space and creates also the
+insert buffer tree root if space == 0. */
+
+void
+fsp_header_init(
+/*============*/
+	ulint	space,	/* in: space id */
+	ulint	size,	/* in: current size in blocks */
+	mtr_t*	mtr);	/* in: mini-transaction handle */	
+/**************************************************************************
+Increases the space size field of a space. */
+
+void
+fsp_header_inc_size(
+/*================*/
+	ulint	space,	/* in: space id */
+	ulint	size_inc,/* in: size increment in pages */
+	mtr_t*	mtr);	/* in: mini-transaction handle */	
+/**************************************************************************
+Creates a new segment. */
+
+page_t*
+fseg_create(
+/*========*/
+			/* out: the page where the segment header is placed,
+			x-latched, NULL if could not create segment
+			because of lack of space */
+	ulint	space,	/* in: space id */
+	ulint	page,	/* in: page where the segment header is placed: if
+			this is != 0, the page must belong to another segment,
+			if this is 0, a new page will be allocated and it
+			will belong to the created segment */
+	ulint	byte_offset, /* in: byte offset of the created segment header
+			on the page */
+	mtr_t*	mtr);	/* in: mtr */
+/**************************************************************************
+Creates a new segment. */
+
+page_t*
+fseg_create_general(
+/*================*/
+			/* out: the page where the segment header is placed,
+			x-latched, NULL if could not create segment
+			because of lack of space */
+	ulint	space,	/* in: space id */
+	ulint	page,	/* in: page where the segment header is placed: if
+			this is != 0, the page must belong to another segment,
+			if this is 0, a new page will be allocated and it
+			will belong to the created segment */
+	ulint	byte_offset, /* in: byte offset of the created segment header
+			on the page */
+	ibool	has_done_reservation, /* in: TRUE if the caller has already
+			done the reservation for the pages with
+			fsp_reserve_free_extents (at least 2 extents: one for
+			the inode and the other for the segment) then there is
+			no need to do the check for this individual
+			operation */
+	mtr_t*	mtr);	/* in: mtr */
+/**************************************************************************
+Calculates the number of pages reserved by a segment, and how many pages are
+currently used. */
+
+ulint
+fseg_n_reserved_pages(
+/*==================*/
+				/* out: number of reserved pages */
+	fseg_header_t* 	header,	/* in: segment header */
+	ulint*		used,	/* out: number of pages used (<= reserved) */
+	mtr_t*		mtr);	/* in: mtr handle */
+/**************************************************************************
+Allocates a single free page from a segment. This function implements
+the intelligent allocation strategy which tries to minimize
+file space fragmentation. */
+
+ulint
+fseg_alloc_free_page(
+/*=================*/
+				/* out: the allocated page offset
+				FIL_NULL if no page could be allocated */
+	fseg_header_t*	seg_header, /* in: segment header */
+	ulint		hint,	/* in: hint of which page would be desirable */
+	byte		direction, /* in: if the new page is needed because
+				of an index page split, and records are
+				inserted there in order, into which
+				direction they go alphabetically: FSP_DOWN,
+				FSP_UP, FSP_NO_DIR */
+	mtr_t*		mtr);	/* in: mtr handle */
+/**************************************************************************
+Allocates a single free page from a segment. This function implements
+the intelligent allocation strategy which tries to minimize file space
+fragmentation. */
+
+ulint
+fseg_alloc_free_page_general(
+/*=========================*/
+				/* out: allocated page offset, FIL_NULL if no
+				page could be allocated */
+	fseg_header_t*	seg_header,/* in: segment header */
+	ulint		hint,	/* in: hint of which page would be desirable */
+	byte		direction,/* in: if the new page is needed because
+				of an index page split, and records are
+				inserted there in order, into which
+				direction they go alphabetically: FSP_DOWN,
+				FSP_UP, FSP_NO_DIR */
+	ibool		has_done_reservation, /* in: TRUE if the caller has
+				already done the reservation for the page
+				with fsp_reserve_free_extents, then there
+				is no need to do the check for this individual
+				page */
+	mtr_t*		mtr);	/* in: mtr handle */
+/**************************************************************************
+Reserves free pages from a tablespace. All mini-transactions which may
+use several pages from the tablespace should call this function beforehand
+and reserve enough free extents so that they certainly will be able
+to do their operation, like a B-tree page split, fully. Reservations
+must be released with function fil_space_release_free_extents!
+
+The alloc_type below has the following meaning: FSP_NORMAL means an
+operation which will probably result in more space usage, like an
+insert in a B-tree; FSP_UNDO means allocation to undo logs: if we are
+deleting rows, then this allocation will in the long run result in
+less space usage (after a purge); FSP_CLEANING means allocation done
+in a physical record delete (like in a purge) or other cleaning operation
+which will result in less space usage in the long run. We prefer the latter
+two types of allocation: when space is scarce, FSP_NORMAL allocations
+will not succeed, but the latter two allocations will succeed, if possible.
+The purpose is to avoid dead end where the database is full but the
+user cannot free any space because these freeing operations temporarily
+reserve some space.
+
+Single-table tablespaces whose size is < 32 pages are a special case. In this
+function we would liberally reserve several 64 page extents for every page
+split or merge in a B-tree. But we do not want to waste disk space if the table
+only occupies < 32 pages. That is why we apply different rules in that special
+case, just ensuring that there are 3 free pages available. */
+
+ibool
+fsp_reserve_free_extents(
+/*=====================*/
+			/* out: TRUE if we were able to make the reservation */
+        ulint*  n_reserved,/* out: number of extents actually reserved; if we
+                        return TRUE and the tablespace size is < 64 pages,
+                        then this can be 0, otherwise it is n_ext */
+	ulint	space,	/* in: space id */
+	ulint	n_ext,	/* in: number of extents to reserve */
+	ulint	alloc_type,/* in: FSP_NORMAL, FSP_UNDO, or FSP_CLEANING */
+	mtr_t*	mtr);	/* in: mtr */
+/**************************************************************************
+This function should be used to get information on how much we still
+will be able to insert new data to the database without running out the
+tablespace. Only free extents are taken into account and we also subtract
+the safety margin required by the above function fsp_reserve_free_extents. */
+
+ulint
+fsp_get_available_space_in_free_extents(
+/*====================================*/
+			/* out: available space in kB */
+	ulint	space);	/* in: space id */
+/**************************************************************************
+Frees a single page of a segment. */
+
+void
+fseg_free_page(
+/*===========*/
+	fseg_header_t*	seg_header, /* in: segment header */
+	ulint		space,	/* in: space id */
+	ulint		page,	/* in: page offset */
+	mtr_t*		mtr);	/* in: mtr handle */
+/***********************************************************************
+Frees a segment. The freeing is performed in several mini-transactions,
+so that there is no danger of bufferfixing too many buffer pages. */
+
+void
+fseg_free(
+/*======*/
+	ulint	space,	/* in: space id */
+	ulint	page_no,/* in: page number where the segment header is
+			placed */
+	ulint	offset);/* in: byte offset of the segment header on that
+			page */
+/**************************************************************************
+Frees part of a segment. This function can be used to free a segment
+by repeatedly calling this function in different mini-transactions.
+Doing the freeing in a single mini-transaction might result in
+too big a mini-transaction. */
+
+ibool
+fseg_free_step(
+/*===========*/
+				/* out: TRUE if freeing completed */
+	fseg_header_t*	header,	/* in, own: segment header; NOTE: if the header
+				resides on the first page of the frag list
+				of the segment, this pointer becomes obsolete
+				after the last freeing step */
+	mtr_t*		mtr);	/* in: mtr */
+/**************************************************************************
+Frees part of a segment. Differs from fseg_free_step because this function
+leaves the header page unfreed. */
+
+ibool
+fseg_free_step_not_header(
+/*======================*/
+				/* out: TRUE if freeing completed, except the
+				header page */
+	fseg_header_t*	header,	/* in: segment header which must reside on
+				the first fragment page of the segment */
+	mtr_t*		mtr);	/* in: mtr */
+/***************************************************************************
+Checks if a page address is an extent descriptor page address. */
+UNIV_INLINE
+ibool
+fsp_descr_page(
+/*===========*/
+			/* out: TRUE if a descriptor page */
+	ulint	page_no);/* in: page number */
+/***************************************************************
+Parses a redo log record of a file page init. */
+
+byte*
+fsp_parse_init_file_page(
+/*=====================*/
+			/* out: end of log record or NULL */
+	byte*	ptr,	/* in: buffer */
+	byte*	end_ptr,/* in: buffer end */
+	page_t*	page);	/* in: page or NULL */
+/***********************************************************************
+Validates the file space system and its segments. */
+
+ibool
+fsp_validate(
+/*=========*/
+			/* out: TRUE if ok */
+	ulint	space);	/* in: space id */
+/***********************************************************************
+Prints info of a file space. */
+
+void
+fsp_print(
+/*======*/
+	ulint	space);	/* in: space id */
+/***********************************************************************
+Validates a segment. */
+
+ibool
+fseg_validate(
+/*==========*/
+				/* out: TRUE if ok */
+	fseg_header_t*	header, /* in: segment header */
+	mtr_t*		mtr2);	/* in: mtr */
+/***********************************************************************
+Writes info of a segment. */
+
+void
+fseg_print(
+/*=======*/
+	fseg_header_t*	header, /* in: segment header */
+	mtr_t*		mtr);	/* in: mtr */
+
+/* Flags for fsp_reserve_free_extents */
+#define FSP_NORMAL	1000000
+#define	FSP_UNDO	2000000
+#define FSP_CLEANING	3000000
+
+/* Number of pages described in a single descriptor page: currently each page
+description takes less than 1 byte; a descriptor page is repeated every
+this many file pages */
+#define XDES_DESCRIBED_PER_PAGE		UNIV_PAGE_SIZE
+
+/* The space low address page map, and also offsets for extent descriptor and
+bitmap pages which are repeated always after XDES_DESCRIBED_PER_PAGE more
+pages: */
+/*--------------------------------------*/
+#define FSP_XDES_OFFSET			0
+#define FSP_IBUF_BITMAP_OFFSET		1
+				/* The ibuf bitmap pages are the ones whose
+				page number is the number above plus a
+				multiple of XDES_DESCRIBED_PER_PAGE */
+#define FSP_FIRST_INODE_PAGE_NO		2
+#define FSP_IBUF_HEADER_PAGE_NO		3
+#define FSP_IBUF_TREE_ROOT_PAGE_NO	4
+				/* The ibuf tree root page number in
+				tablespace 0; its fseg inode is on the page
+				number FSP_FIRST_INODE_PAGE_NO */
+#define FSP_TRX_SYS_PAGE_NO		5
+#define	FSP_FIRST_RSEG_PAGE_NO		6
+#define FSP_DICT_HDR_PAGE_NO		7
+/*--------------------------------------*/
+
+#ifndef UNIV_NONINL
+#include "fsp0fsp.ic"
+#endif
+
+#endif
--- a/include/fsp0fsp.ic
+++ b/include/fsp0fsp.ic
@ -0,0 +1,24 @@
+/******************************************************
+File space management
+
+(c) 1995 Innobase Oy
+
+Created 12/18/1995 Heikki Tuuri
+*******************************************************/
+
+/***************************************************************************
+Checks if a page address is an extent descriptor page address. */
+UNIV_INLINE
+ibool
+fsp_descr_page(
+/*===========*/
+			/* out: TRUE if a descriptor page */
+	ulint	page_no)/* in: page number */
+{
+	if (page_no % XDES_DESCRIBED_PER_PAGE == FSP_XDES_OFFSET) {
+
+		return(TRUE);
+	}
+
+	return(FALSE);
+}
--- a/include/fut0fut.h
+++ b/include/fut0fut.h
@ -0,0 +1,36 @@
+/**********************************************************************
+File-based utilities
+
+(c) 1995 Innobase Oy
+
+Created 12/13/1995 Heikki Tuuri
+***********************************************************************/
+
+
+#ifndef fut0fut_h
+#define fut0fut_h
+
+#include "univ.i"
+
+#include "fil0fil.h"
+#include "mtr0mtr.h"
+
+/************************************************************************
+Gets a pointer to a file address and latches the page. */
+UNIV_INLINE
+byte*
+fut_get_ptr(
+/*========*/
+				/* out: pointer to a byte in a frame; the file
+				page in the frame is bufferfixed and latched */
+	ulint		space,	/* in: space id */
+	fil_addr_t	addr,	/* in: file address */
+	ulint		rw_latch, /* in: RW_S_LATCH, RW_X_LATCH */
+	mtr_t*		mtr);	/* in: mtr handle */
+
+#ifndef UNIV_NONINL
+#include "fut0fut.ic"
+#endif
+
+#endif
+
--- a/include/fut0fut.ic
+++ b/include/fut0fut.ic
@ -0,0 +1,38 @@
+/**********************************************************************
+File-based utilities
+
+(c) 1995 Innobase Oy
+
+Created 12/13/1995 Heikki Tuuri
+***********************************************************************/
+
+#include "sync0rw.h"
+#include "buf0buf.h"
+
+/************************************************************************
+Gets a pointer to a file address and latches the page. */
+UNIV_INLINE
+byte*
+fut_get_ptr(
+/*========*/
+				/* out: pointer to a byte in a frame; the file
+				page in the frame is bufferfixed and latched */
+	ulint		space,	/* in: space id */
+	fil_addr_t	addr,	/* in: file address */
+	ulint		rw_latch, /* in: RW_S_LATCH, RW_X_LATCH */
+	mtr_t*		mtr)	/* in: mtr handle */
+{
+	byte*	ptr;
+
+	ut_ad(mtr);
+	ut_ad(addr.boffset < UNIV_PAGE_SIZE);
+	ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH));
+
+	ptr = buf_page_get(space, addr.page, rw_latch, mtr) + addr.boffset;
+
+#ifdef UNIV_SYNC_DEBUG
+	buf_page_dbg_add_level(ptr, SYNC_NO_ORDER_CHECK);
+#endif /* UNIV_SYNC_DEBUG */
+
+	return(ptr);
+}
--- a/include/fut0lst.h
+++ b/include/fut0lst.h
@ -0,0 +1,198 @@
+/**********************************************************************
+File-based list utilities
+
+(c) 1995 Innobase Oy
+
+Created 11/28/1995 Heikki Tuuri
+***********************************************************************/
+
+#ifndef fut0lst_h
+#define fut0lst_h
+
+#include "univ.i"
+
+#include "fil0fil.h"
+#include "mtr0mtr.h"
+
+
+/* The C 'types' of base node and list node: these should be used to
+write self-documenting code. Of course, the sizeof macro cannot be
+applied to these types! */
+
+typedef	byte	flst_base_node_t;
+typedef	byte	flst_node_t;
+
+/* The physical size of a list base node in bytes */
+#define	FLST_BASE_NODE_SIZE	(4 + 2 * FIL_ADDR_SIZE)
+
+/* The physical size of a list node in bytes */
+#define	FLST_NODE_SIZE		(2 * FIL_ADDR_SIZE)
+
+
+/************************************************************************
+Initializes a list base node. */
+UNIV_INLINE
+void
+flst_init(
+/*======*/
+	flst_base_node_t*	base,	/* in: pointer to base node */
+	mtr_t*			mtr);	/* in: mini-transaction handle */
+/************************************************************************
+Adds a node as the last node in a list. */
+
+void
+flst_add_last(
+/*==========*/
+	flst_base_node_t*	base,	/* in: pointer to base node of list */
+	flst_node_t*		node,	/* in: node to add */
+	mtr_t*			mtr);	/* in: mini-transaction handle */
+/************************************************************************
+Adds a node as the first node in a list. */
+
+void
+flst_add_first(
+/*===========*/
+	flst_base_node_t*	base,	/* in: pointer to base node of list */
+	flst_node_t*		node,	/* in: node to add */
+	mtr_t*			mtr);	/* in: mini-transaction handle */
+/************************************************************************
+Inserts a node after another in a list. */
+
+void
+flst_insert_after(
+/*==============*/
+	flst_base_node_t*	base,	/* in: pointer to base node of list */
+	flst_node_t*		node1,	/* in: node to insert after */
+	flst_node_t*		node2,	/* in: node to add */
+	mtr_t*			mtr);	/* in: mini-transaction handle */
+/************************************************************************
+Inserts a node before another in a list. */
+
+void
+flst_insert_before(
+/*===============*/
+	flst_base_node_t*	base,	/* in: pointer to base node of list */
+	flst_node_t*		node2,	/* in: node to insert */
+	flst_node_t*		node3,	/* in: node to insert before */
+	mtr_t*			mtr);	/* in: mini-transaction handle */
+/************************************************************************
+Removes a node. */
+
+void
+flst_remove(
+/*========*/
+	flst_base_node_t*	base,	/* in: pointer to base node of list */
+	flst_node_t*		node2,	/* in: node to remove */
+	mtr_t*			mtr);	/* in: mini-transaction handle */
+/************************************************************************
+Cuts off the tail of the list, including the node given. The number of
+nodes which will be removed must be provided by the caller, as this function
+does not measure the length of the tail. */
+
+void
+flst_cut_end(
+/*=========*/
+	flst_base_node_t*	base,	/* in: pointer to base node of list */
+	flst_node_t*		node2,	/* in: first node to remove */
+	ulint			n_nodes,/* in: number of nodes to remove,
+					must be >= 1 */
+	mtr_t*			mtr);	/* in: mini-transaction handle */
+/************************************************************************
+Cuts off the tail of the list, not including the given node. The number of
+nodes which will be removed must be provided by the caller, as this function
+does not measure the length of the tail. */
+
+void
+flst_truncate_end(
+/*==============*/
+	flst_base_node_t*	base,	/* in: pointer to base node of list */
+	flst_node_t*		node2,	/* in: first node not to remove */
+	ulint			n_nodes,/* in: number of nodes to remove */
+	mtr_t*			mtr);	/* in: mini-transaction handle */
+/************************************************************************
+Gets list length. */
+UNIV_INLINE
+ulint
+flst_get_len(
+/*=========*/
+					/* out: length */
+	flst_base_node_t*	base,	/* in: pointer to base node */
+	mtr_t*			mtr);	/* in: mini-transaction handle */
+/************************************************************************
+Gets list first node address. */
+UNIV_INLINE
+fil_addr_t
+flst_get_first(
+/*===========*/
+					/* out: file address */
+	flst_base_node_t*	base,	/* in: pointer to base node */
+	mtr_t*			mtr);	/* in: mini-transaction handle */
+/************************************************************************
+Gets list last node address. */
+UNIV_INLINE
+fil_addr_t
+flst_get_last(
+/*==========*/
+					/* out: file address */
+	flst_base_node_t*	base,	/* in: pointer to base node */
+	mtr_t*			mtr);	/* in: mini-transaction handle */
+/************************************************************************
+Gets list next node address. */
+UNIV_INLINE
+fil_addr_t
+flst_get_next_addr(
+/*===============*/
+				/* out: file address */
+	flst_node_t*	node,	/* in: pointer to node */
+	mtr_t*		mtr);	/* in: mini-transaction handle */
+/************************************************************************
+Gets list prev node address. */
+UNIV_INLINE
+fil_addr_t
+flst_get_prev_addr(
+/*===============*/
+				/* out: file address */
+	flst_node_t*	node,	/* in: pointer to node */
+	mtr_t*		mtr);	/* in: mini-transaction handle */
+/************************************************************************
+Writes a file address. */
+UNIV_INLINE
+void
+flst_write_addr(
+/*============*/
+	fil_faddr_t*	faddr,	/* in: pointer to file faddress */
+	fil_addr_t	addr,	/* in: file address */
+	mtr_t*		mtr);	/* in: mini-transaction handle */
+/************************************************************************
+Reads a file address. */
+UNIV_INLINE
+fil_addr_t
+flst_read_addr(
+/*===========*/
+				/* out: file address */
+	fil_faddr_t*	faddr,	/* in: pointer to file faddress */
+	mtr_t*		mtr);	/* in: mini-transaction handle */
+/************************************************************************
+Validates a file-based list. */
+
+ibool
+flst_validate(
+/*==========*/
+					/* out: TRUE if ok */
+	flst_base_node_t*	base,	/* in: pointer to base node of list */
+	mtr_t*			mtr1);	/* in: mtr */
+/************************************************************************
+Prints info of a file-based list. */
+
+void
+flst_print(
+/*=======*/
+	flst_base_node_t*	base,	/* in: pointer to base node of list */
+	mtr_t*			mtr);	/* in: mtr */
+
+
+#ifndef UNIV_NONINL
+#include "fut0lst.ic"
+#endif
+
+#endif
--- a/include/fut0lst.ic
+++ b/include/fut0lst.ic
@ -0,0 +1,147 @@
+/**********************************************************************
+File-based list utilities
+
+(c) 1995 Innobase Oy
+
+Created 11/28/1995 Heikki Tuuri
+***********************************************************************/
+
+#include "fut0fut.h"
+#include "mtr0log.h"
+#include "buf0buf.h"
+
+/* We define the field offsets of a node for the list */
+#define FLST_PREV	0	/* 6-byte address of the previous list element;
+				the page part of address is FIL_NULL, if no
+				previous element */
+#define FLST_NEXT	FIL_ADDR_SIZE	/* 6-byte address of the next
+				list element; the page part of address
+				is FIL_NULL, if no next element */
+
+/* We define the field offsets of a base node for the list */
+#define FLST_LEN	0	/* 32-bit list length field */
+#define	FLST_FIRST	4	/* 6-byte address of the first element
+				of the list; undefined if empty list */
+#define	FLST_LAST	(4 + FIL_ADDR_SIZE) /* 6-byte address of the
+				last element of the list; undefined
+				if empty list */
+
+/************************************************************************
+Writes a file address. */
+UNIV_INLINE
+void
+flst_write_addr(
+/*============*/
+	fil_faddr_t*	faddr,	/* in: pointer to file faddress */
+	fil_addr_t	addr,	/* in: file address */
+	mtr_t*		mtr)	/* in: mini-transaction handle */
+{
+	ut_ad(faddr && mtr);
+	ut_ad(mtr_memo_contains(mtr, buf_block_align(faddr),
+							MTR_MEMO_PAGE_X_FIX));
+
+	mlog_write_ulint(faddr + FIL_ADDR_PAGE, addr.page, MLOG_4BYTES, mtr); 
+	mlog_write_ulint(faddr + FIL_ADDR_BYTE, addr.boffset,
+							MLOG_2BYTES, mtr); 
+}
+
+/************************************************************************
+Reads a file address. */
+UNIV_INLINE
+fil_addr_t
+flst_read_addr(
+/*===========*/
+				/* out: file address */
+	fil_faddr_t*	faddr,	/* in: pointer to file faddress */
+	mtr_t*		mtr)	/* in: mini-transaction handle */
+{
+	fil_addr_t	addr;
+
+	ut_ad(faddr && mtr);
+
+	addr.page = mtr_read_ulint(faddr + FIL_ADDR_PAGE, MLOG_4BYTES, mtr); 
+	addr.boffset = mtr_read_ulint(faddr + FIL_ADDR_BYTE, MLOG_2BYTES, 
+								mtr);
+	return(addr); 
+}
+
+/************************************************************************
+Initializes a list base node. */
+UNIV_INLINE
+void
+flst_init(
+/*======*/
+	flst_base_node_t*	base,	/* in: pointer to base node */
+	mtr_t*			mtr)	/* in: mini-transaction handle */
+{
+	ut_ad(mtr_memo_contains(mtr, buf_block_align(base),
+							MTR_MEMO_PAGE_X_FIX));
+	mlog_write_ulint(base + FLST_LEN, 0, MLOG_4BYTES, mtr); 
+	flst_write_addr(base + FLST_FIRST, fil_addr_null, mtr); 
+	flst_write_addr(base + FLST_LAST, fil_addr_null, mtr); 
+}
+
+/************************************************************************
+Gets list length. */
+UNIV_INLINE
+ulint
+flst_get_len(
+/*=========*/
+					/* out: length */
+	flst_base_node_t*	base,	/* in: pointer to base node */
+	mtr_t*			mtr)	/* in: mini-transaction handle */
+{
+	return(mtr_read_ulint(base + FLST_LEN, MLOG_4BYTES, mtr));
+}
+
+/************************************************************************
+Gets list first node address. */
+UNIV_INLINE
+fil_addr_t
+flst_get_first(
+/*===========*/
+					/* out: file address */
+	flst_base_node_t*	base,	/* in: pointer to base node */
+	mtr_t*			mtr)	/* in: mini-transaction handle */
+{
+	return(flst_read_addr(base + FLST_FIRST, mtr));
+}
+
+/************************************************************************
+Gets list last node address. */
+UNIV_INLINE
+fil_addr_t
+flst_get_last(
+/*==========*/
+					/* out: file address */
+	flst_base_node_t*	base,	/* in: pointer to base node */
+	mtr_t*			mtr)	/* in: mini-transaction handle */
+{
+	return(flst_read_addr(base + FLST_LAST, mtr));
+}
+
+/************************************************************************
+Gets list next node address. */
+UNIV_INLINE
+fil_addr_t
+flst_get_next_addr(
+/*===============*/
+				/* out: file address */
+	flst_node_t*	node,	/* in: pointer to node */
+	mtr_t*		mtr)	/* in: mini-transaction handle */
+{
+	return(flst_read_addr(node + FLST_NEXT, mtr));
+}
+
+/************************************************************************
+Gets list prev node address. */
+UNIV_INLINE
+fil_addr_t
+flst_get_prev_addr(
+/*===============*/
+				/* out: file address */
+	flst_node_t*	node,	/* in: pointer to node */
+	mtr_t*		mtr)	/* in: mini-transaction handle */
+{
+	return(flst_read_addr(node + FLST_PREV, mtr));
+}
--- a/include/ha0ha.h
+++ b/include/ha0ha.h
@ -0,0 +1,146 @@
+/******************************************************
+The hash table with external chains
+
+(c) 1994-1997 Innobase Oy
+
+Created 8/18/1994 Heikki Tuuri
+*******************************************************/
+
+#ifndef ha0ha_h
+#define ha0ha_h
+
+#include "univ.i"
+
+#include "hash0hash.h"
+#include "page0types.h"
+
+/*****************************************************************
+Looks for an element in a hash table. */
+UNIV_INLINE
+void*
+ha_search_and_get_data(
+/*===================*/
+				/* out: pointer to the data of the first hash
+				table node in chain having the fold number,
+				NULL if not found */
+	hash_table_t*	table,	/* in: hash table */
+	ulint		fold);	/* in: folded value of the searched data */
+/*************************************************************
+Looks for an element when we know the pointer to the data and updates
+the pointer to data if found. */
+
+void
+ha_search_and_update_if_found(
+/*==========================*/
+	hash_table_t*	table,	/* in: hash table */
+	ulint		fold,	/* in: folded value of the searched data */
+	void*		data,	/* in: pointer to the data */
+	void*		new_data);/* in: new pointer to the data */
+/*****************************************************************
+Creates a hash table with >= n array cells. The actual number of cells is
+chosen to be a prime number slightly bigger than n. */
+
+hash_table_t*
+ha_create(
+/*======*/
+				/* out, own: created table */
+	ibool	in_btr_search,	/* in: TRUE if the hash table is used in
+				the btr_search module */
+	ulint	n,		/* in: number of array cells */
+	ulint	n_mutexes,	/* in: number of mutexes to protect the
+				hash table: must be a power of 2 */
+	ulint	mutex_level);	/* in: level of the mutexes in the latching
+				order: this is used in the debug version */
+/*****************************************************************
+Inserts an entry into a hash table. If an entry with the same fold number
+is found, its node is updated to point to the new data, and no new node
+is inserted. */
+
+ibool
+ha_insert_for_fold(
+/*===============*/
+				/* out: TRUE if succeed, FALSE if no more
+				memory could be allocated */
+	hash_table_t*	table,	/* in: hash table */
+	ulint		fold,	/* in: folded value of data; if a node with
+				the same fold value already exists, it is
+				updated to point to the same data, and no new
+				node is created! */
+	void*		data);	/* in: data, must not be NULL */
+/*****************************************************************
+Reserves the necessary hash table mutex and inserts an entry into the hash
+table. */
+UNIV_INLINE
+ibool
+ha_insert_for_fold_mutex(
+/*=====================*/
+				/* out: TRUE if succeed, FALSE if no more
+				memory could be allocated */
+	hash_table_t*	table,	/* in: hash table */
+	ulint		fold,	/* in: folded value of data; if a node with
+				the same fold value already exists, it is
+				updated to point to the same data, and no new
+				node is created! */
+	void*		data);	/* in: data, must not be NULL */
+/*****************************************************************
+Deletes an entry from a hash table. */
+
+void
+ha_delete(
+/*======*/
+	hash_table_t*	table,	/* in: hash table */
+	ulint		fold,	/* in: folded value of data */
+	void*		data);	/* in: data, must not be NULL and must exist
+				in the hash table */
+/*************************************************************
+Looks for an element when we know the pointer to the data and deletes
+it from the hash table if found. */
+UNIV_INLINE
+ibool
+ha_search_and_delete_if_found(
+/*==========================*/
+				/* out: TRUE if found */
+	hash_table_t*	table,	/* in: hash table */
+	ulint		fold,	/* in: folded value of the searched data */
+	void*		data);	/* in: pointer to the data */
+/*********************************************************************
+Removes from the chain determined by fold all nodes whose data pointer
+points to the page given. */
+
+void
+ha_remove_all_nodes_to_page(
+/*========================*/
+	hash_table_t*	table,	/* in: hash table */
+	ulint		fold,	/* in: fold value */
+	page_t*		page);	/* in: buffer page */
+/*****************************************************************
+Validates a hash table. */
+
+ibool
+ha_validate(
+/*========*/
+				/* out: TRUE if ok */
+	hash_table_t*	table);	/* in: hash table */
+/*****************************************************************
+Prints info of a hash table. */
+
+void
+ha_print_info(
+/*==========*/
+	FILE*		file,	/* in: file where to print */
+	hash_table_t*	table);	/* in: hash table */
+
+/* The hash table external chain node */
+
+typedef struct ha_node_struct ha_node_t;
+struct ha_node_struct {
+	ha_node_t* next; /* next chain node or NULL if none */
+	void*	data;	/* pointer to the data */
+	ulint	fold;	/* fold value for the data */
+};
+
+#ifndef UNIV_NONINL
+#include "ha0ha.ic"
+#endif
+
+#endif 
--- a/include/ha0ha.ic
+++ b/include/ha0ha.ic
@ -0,0 +1,220 @@
+/************************************************************************
+The hash table with external chains
+
+(c) 1994-1997 Innobase Oy
+
+Created 8/18/1994 Heikki Tuuri
+*************************************************************************/
+
+#include "ut0rnd.h"
+#include "mem0mem.h"
+
+/***************************************************************
+Deletes a hash node. */
+
+void
+ha_delete_hash_node(
+/*================*/
+	hash_table_t*	table,		/* in: hash table */
+	ha_node_t*	del_node);	/* in: node to be deleted */
+
+/**********************************************************************
+Gets a hash node data. */
+UNIV_INLINE
+void*
+ha_node_get_data(
+/*=============*/
+				/* out: pointer to the data */
+	ha_node_t*	node)	/* in: hash chain node */
+{
+	return(node->data);
+}
+
+/**********************************************************************
+Sets hash node data. */
+UNIV_INLINE
+void
+ha_node_set_data(
+/*=============*/
+	ha_node_t*	node,	/* in: hash chain node */
+	void*		data)	/* in: pointer to the data */
+{
+	node->data = data;
+}
+
+/**********************************************************************
+Gets the next node in a hash chain. */
+UNIV_INLINE
+ha_node_t*
+ha_chain_get_next(
+/*==============*/
+				/* out: next node, NULL if none */
+	ha_node_t*	node)	/* in: hash chain node */
+{
+	return(node->next);
+}
+
+/**********************************************************************
+Gets the first node in a hash chain. */
+UNIV_INLINE
+ha_node_t*
+ha_chain_get_first(
+/*===============*/
+				/* out: first node, NULL if none */
+	hash_table_t*	table,	/* in: hash table */
+	ulint		fold)	/* in: fold value determining the chain */
+{
+	return(hash_get_nth_cell(table, hash_calc_hash(fold, table))->node);
+}
+
+/*****************************************************************
+Looks for an element in a hash table. */
+UNIV_INLINE
+ha_node_t*
+ha_search(
+/*======*/
+				/* out: pointer to the first hash table node
+				in chain having the fold number, NULL if not
+				found */
+	hash_table_t*	table,	/* in: hash table */
+	ulint		fold)	/* in: folded value of the searched data */
+{
+	ha_node_t*	node;
+
+#ifdef UNIV_SYNC_DEBUG
+	ut_ad(!table->mutexes || mutex_own(hash_get_mutex(table, fold)));
+#endif /* UNIV_SYNC_DEBUG */
+
+	node = ha_chain_get_first(table, fold);
+
+	while (node) {
+		if (node->fold == fold) {
+
+			return(node);
+		}
+
+		node = ha_chain_get_next(node);
+	}
+
+	return(NULL);
+}
+
+/*****************************************************************
+Looks for an element in a hash table. */
+UNIV_INLINE
+void*
+ha_search_and_get_data(
+/*===================*/
+				/* out: pointer to the data of the first hash
+				table node in chain having the fold number,
+				NULL if not found */
+	hash_table_t*	table,	/* in: hash table */
+	ulint		fold)	/* in: folded value of the searched data */
+{
+	ha_node_t*	node;
+
+#ifdef UNIV_SYNC_DEBUG
+	ut_ad(!table->mutexes || mutex_own(hash_get_mutex(table, fold)));
+#endif /* UNIV_SYNC_DEBUG */
+
+	node = ha_chain_get_first(table, fold);
+
+	while (node) {
+		if (node->fold == fold) {
+
+			return(node->data);
+		}
+
+		node = ha_chain_get_next(node);
+	}
+
+	return(NULL);
+}
+
+/*************************************************************
+Looks for an element when we know the pointer to the data. */
+UNIV_INLINE
+ha_node_t*
+ha_search_with_data(
+/*================*/
+				/* out: pointer to the hash table node, NULL
+				if not found in the table */
+	hash_table_t*	table,	/* in: hash table */
+	ulint		fold,	/* in: folded value of the searched data */
+	void*		data)	/* in: pointer to the data */
+{
+	ha_node_t*	node;
+
+#ifdef UNIV_SYNC_DEBUG
+	ut_ad(!table->mutexes || mutex_own(hash_get_mutex(table, fold)));
+#endif /* UNIV_SYNC_DEBUG */
+
+	node = ha_chain_get_first(table, fold);
+
+	while (node) {
+		if (node->data == data) {
+
+			return(node);
+		}
+
+		node = ha_chain_get_next(node);
+	}
+
+	return(NULL);
+}
+
+/*************************************************************
+Looks for an element when we know the pointer to the data, and deletes
+it from the hash table, if found. */
+UNIV_INLINE
+ibool
+ha_search_and_delete_if_found(
+/*==========================*/
+				/* out: TRUE if found */
+	hash_table_t*	table,	/* in: hash table */
+	ulint		fold,	/* in: folded value of the searched data */
+	void*		data)	/* in: pointer to the data */
+{
+	ha_node_t*	node;
+
+#ifdef UNIV_SYNC_DEBUG
+	ut_ad(!table->mutexes || mutex_own(hash_get_mutex(table, fold)));
+#endif /* UNIV_SYNC_DEBUG */
+
+	node = ha_search_with_data(table, fold, data);
+
+	if (node) {
+		ha_delete_hash_node(table, node);
+
+		return(TRUE);
+	}
+
+	return(FALSE);
+}
+
+/*****************************************************************
+Reserves the necessary hash table mutex and inserts an entry into the hash
+table. */
+UNIV_INLINE
+ibool
+ha_insert_for_fold_mutex(
+/*=====================*/
+				/* out: TRUE if succeed, FALSE if no more
+				memory could be allocated */
+	hash_table_t*	table,	/* in: hash table */
+	ulint		fold,	/* in: folded value of data; if a node with
+				the same fold value already exists, it is
+				updated to point to the same data, and no new
+				node is created! */
+	void*		data)	/* in: data, must not be NULL */
+{
+	ibool	ret;
+
+	hash_mutex_enter(table, fold);
+
+	ret = ha_insert_for_fold(table, fold, data);
+
+	hash_mutex_exit(table, fold);
+
+	return(ret);
+}
--- a/include/hash0hash.h
+++ b/include/hash0hash.h
@ -0,0 +1,330 @@
+/******************************************************
+The simple hash table utility
+
+(c) 1997 Innobase Oy
+
+Created 5/20/1997 Heikki Tuuri
+*******************************************************/
+
+#ifndef hash0hash_h
+#define hash0hash_h
+
+#include "univ.i"
+#include "mem0mem.h"
+#include "sync0sync.h"
+
+typedef struct hash_table_struct hash_table_t;
+typedef struct hash_cell_struct hash_cell_t;
+
+typedef void*	hash_node_t;
+
+/*****************************************************************
+Creates a hash table with >= n array cells. The actual number
+of cells is chosen to be a prime number slightly bigger than n. */
+
+hash_table_t*
+hash_create(
+/*========*/
+			/* out, own: created table */
+	ulint	n);	/* in: number of array cells */
+/*****************************************************************
+Creates a mutex array to protect a hash table. */
+
+void
+hash_create_mutexes(
+/*================*/
+	hash_table_t*	table,		/* in: hash table */
+	ulint		n_mutexes,	/* in: number of mutexes */
+	ulint		sync_level);	/* in: latching order level of the
+					mutexes: used in the debug version */
+/*****************************************************************
+Frees a hash table. */
+
+void
+hash_table_free(
+/*============*/
+	hash_table_t*	table);	/* in, own: hash table */
+/******************************************************************
+Calculates the hash value from a folded value. */
+UNIV_INLINE
+ulint
+hash_calc_hash(
+/*===========*/
+				/* out: hashed value */
+	ulint		fold,	/* in: folded value */
+	hash_table_t*	table);	/* in: hash table */
+/************************************************************************
+Assert that the mutex for the table in a hash operation is owned. */
+#ifdef UNIV_SYNC_DEBUG
+# define HASH_ASSERT_OWNED(TABLE, FOLD) \
+ut_ad(!(TABLE)->mutexes || mutex_own(hash_get_mutex(TABLE, FOLD)));
+#else
+# define HASH_ASSERT_OWNED(TABLE, FOLD)
+#endif
+
+/***********************************************************************
+Inserts a struct to a hash table. */
+
+#define HASH_INSERT(TYPE, NAME, TABLE, FOLD, DATA)\
+do {\
+	hash_cell_t*	cell3333;\
+	TYPE*		struct3333;\
+\
+	HASH_ASSERT_OWNED(TABLE, FOLD)\
+\
+	(DATA)->NAME = NULL;\
+\
+	cell3333 = hash_get_nth_cell(TABLE, hash_calc_hash(FOLD, TABLE));\
+\
+	if (cell3333->node == NULL) {\
+		cell3333->node = DATA;\
+	} else {\
+		struct3333 = cell3333->node;\
+\
+		while (struct3333->NAME != NULL) {\
+\
+			struct3333 = struct3333->NAME;\
+		}\
+\
+		struct3333->NAME = DATA;\
+	}\
+} while (0)
+
+/***********************************************************************
+Deletes a struct from a hash table. */
+
+#define HASH_DELETE(TYPE, NAME, TABLE, FOLD, DATA)\
+do {\
+	hash_cell_t*	cell3333;\
+	TYPE*		struct3333;\
+\
+	HASH_ASSERT_OWNED(TABLE, FOLD)\
+\
+	cell3333 = hash_get_nth_cell(TABLE, hash_calc_hash(FOLD, TABLE));\
+\
+	if (cell3333->node == DATA) {\
+		cell3333->node = DATA->NAME;\
+	} else {\
+		struct3333 = cell3333->node;\
+\
+		while (struct3333->NAME != DATA) {\
+\
+			ut_a(struct3333);\
+			struct3333 = struct3333->NAME;\
+		}\
+\
+		struct3333->NAME = DATA->NAME;\
+	}\
+} while (0)
+
+/***********************************************************************
+Gets the first struct in a hash chain, NULL if none. */
+
+#define HASH_GET_FIRST(TABLE, HASH_VAL)\
+	(hash_get_nth_cell(TABLE, HASH_VAL)->node)
+
+/***********************************************************************
+Gets the next struct in a hash chain, NULL if none. */
+
+#define HASH_GET_NEXT(NAME, DATA)	((DATA)->NAME)
+
+/************************************************************************
+Looks for a struct in a hash table. */
+#define HASH_SEARCH(NAME, TABLE, FOLD, DATA, TEST)\
+{\
+\
+	HASH_ASSERT_OWNED(TABLE, FOLD)\
+\
+	(DATA) = HASH_GET_FIRST(TABLE, hash_calc_hash(FOLD, TABLE));\
+\
+	while ((DATA) != NULL) {\
+		if (TEST) {\
+			break;\
+		} else {\
+			(DATA) = HASH_GET_NEXT(NAME, DATA);\
+		}\
+	}\
+}
+
+/****************************************************************
+Gets the nth cell in a hash table. */
+UNIV_INLINE
+hash_cell_t*
+hash_get_nth_cell(
+/*==============*/
+				/* out: pointer to cell */
+	hash_table_t* 	table,	/* in: hash table */
+	ulint 		n);	/* in: cell index */
+/*****************************************************************
+Returns the number of cells in a hash table. */
+UNIV_INLINE
+ulint
+hash_get_n_cells(
+/*=============*/
+				/* out: number of cells */
+	hash_table_t*	table);	/* in: table */
+/***********************************************************************
+Deletes a struct which is stored in the heap of the hash table, and compacts
+the heap. The fold value must be stored in the struct NODE in a field named
+'fold'. */
+
+#define HASH_DELETE_AND_COMPACT(TYPE, NAME, TABLE, NODE)\
+do {\
+	TYPE*		node111;\
+	TYPE*		top_node111;\
+	hash_cell_t*	cell111;\
+	ulint		fold111;\
+\
+	fold111 = (NODE)->fold;\
+\
+	HASH_DELETE(TYPE, NAME, TABLE, fold111, NODE);\
+\
+	top_node111 = (TYPE*)mem_heap_get_top(\
+				hash_get_heap(TABLE, fold111),\
+							sizeof(TYPE));\
+\
+	/* If the node to remove is not the top node in the heap, compact the\
+	heap of nodes by moving the top node in the place of NODE. */\
+\
+	if (NODE != top_node111) {\
+\
+		/* Copy the top node in place of NODE */\
+\
+		*(NODE) = *top_node111;\
+\
+		cell111 = hash_get_nth_cell(TABLE,\
+				hash_calc_hash(top_node111->fold, TABLE));\
+\
+		/* Look for the pointer to the top node, to update it */\
+\
+		if (cell111->node == top_node111) {\
+			/* The top node is the first in the chain */\
+\
+			cell111->node = NODE;\
+		} else {\
+			/* We have to look for the predecessor of the top\
+			node */\
+			node111 = cell111->node;\
+\
+			while (top_node111 != HASH_GET_NEXT(NAME, node111)) {\
+\
+				node111 = HASH_GET_NEXT(NAME, node111);\
+			}\
+\
+			/* Now we have the predecessor node */\
+\
+			node111->NAME = NODE;\
+		}\
+	}\
+\
+	/* Free the space occupied by the top node */\
+\
+	mem_heap_free_top(hash_get_heap(TABLE, fold111), sizeof(TYPE));\
+} while (0)
+
+/****************************************************************
+Gets the mutex index for a fold value in a hash table. */
+UNIV_INLINE
+ulint
+hash_get_mutex_no(
+/*==============*/
+				/* out: mutex number */
+	hash_table_t* 	table,	/* in: hash table */
+	ulint 		fold);	/* in: fold */
+/****************************************************************
+Gets the nth heap in a hash table. */
+UNIV_INLINE
+mem_heap_t*
+hash_get_nth_heap(
+/*==============*/
+				/* out: mem heap */
+	hash_table_t* 	table,	/* in: hash table */
+	ulint 		i);	/* in: index of the heap */
+/****************************************************************
+Gets the heap for a fold value in a hash table. */
+UNIV_INLINE
+mem_heap_t*
+hash_get_heap(
+/*==========*/
+				/* out: mem heap */
+	hash_table_t* 	table,	/* in: hash table */
+	ulint 		fold);	/* in: fold */
+/****************************************************************
+Gets the nth mutex in a hash table. */
+UNIV_INLINE
+mutex_t*
+hash_get_nth_mutex(
+/*===============*/
+				/* out: mutex */
+	hash_table_t* 	table,	/* in: hash table */
+	ulint 		i);	/* in: index of the mutex */
+/****************************************************************
+Gets the mutex for a fold value in a hash table. */
+UNIV_INLINE
+mutex_t*
+hash_get_mutex(
+/*===========*/
+				/* out: mutex */
+	hash_table_t* 	table,	/* in: hash table */
+	ulint 		fold);	/* in: fold */
+/****************************************************************
+Reserves the mutex for a fold value in a hash table. */
+
+void
+hash_mutex_enter(
+/*=============*/
+	hash_table_t* 	table,	/* in: hash table */
+	ulint 		fold);	/* in: fold */
+/****************************************************************
+Releases the mutex for a fold value in a hash table. */
+
+void
+hash_mutex_exit(
+/*============*/
+	hash_table_t* 	table,	/* in: hash table */
+	ulint 		fold);	/* in: fold */
+/****************************************************************
+Reserves all the mutexes of a hash table, in an ascending order. */
+
+void
+hash_mutex_enter_all(
+/*=================*/
+	hash_table_t* 	table);	/* in: hash table */
+/****************************************************************
+Releases all the mutexes of a hash table. */
+
+void
+hash_mutex_exit_all(
+/*================*/
+	hash_table_t* 	table);	/* in: hash table */
+
+
+struct hash_cell_struct{
+	void*	node;	/* hash chain node, NULL if none */
+};
+
+/* The hash table structure */
+struct hash_table_struct {
+	ibool		adaptive;/* TRUE if this is the hash table of the
+				adaptive hash index */
+	ulint		n_cells;/* number of cells in the hash table */
+	hash_cell_t*	array;	/* pointer to cell array */
+	ulint		n_mutexes;/* if mutexes != NULL, then the number of
+				mutexes, must be a power of 2 */
+	mutex_t*	mutexes;/* NULL, or an array of mutexes used to
+				protect segments of the hash table */
+	mem_heap_t**	heaps;	/* if this is non-NULL, hash chain nodes for
+				external chaining can be allocated from these
+				memory heaps; there are then n_mutexes many of
+				these heaps */
+	mem_heap_t*	heap;
+	ulint		magic_n;
+};
+
+#define HASH_TABLE_MAGIC_N	76561114
+
+#ifndef UNIV_NONINL
+#include "hash0hash.ic"
+#endif
+
+#endif
--- a/include/hash0hash.ic
+++ b/include/hash0hash.ic
@ -0,0 +1,130 @@
+/******************************************************
+The simple hash table utility
+
+(c) 1997 Innobase Oy
+
+Created 5/20/1997 Heikki Tuuri
+*******************************************************/
+
+#include "ut0rnd.h"
+
+/****************************************************************
+Gets the nth cell in a hash table. */
+UNIV_INLINE
+hash_cell_t*
+hash_get_nth_cell(
+/*==============*/
+				/* out: pointer to cell */
+	hash_table_t* 	table,	/* in: hash table */
+	ulint 		n)	/* in: cell index */
+{
+	ut_ad(n < table->n_cells);
+
+	return(table->array + n);
+}
+
+/*****************************************************************
+Returns the number of cells in a hash table. */
+UNIV_INLINE
+ulint
+hash_get_n_cells(
+/*=============*/
+				/* out: number of cells */
+	hash_table_t*	table)	/* in: table */
+{
+	return(table->n_cells);
+}
+
+/******************************************************************
+Calculates the hash value from a folded value. */
+UNIV_INLINE
+ulint
+hash_calc_hash(
+/*===========*/
+				/* out: hashed value */
+	ulint		fold,	/* in: folded value */
+	hash_table_t*	table)	/* in: hash table */
+{
+	return(ut_hash_ulint(fold, table->n_cells));
+}
+
+/****************************************************************
+Gets the mutex index for a fold value in a hash table. */
+UNIV_INLINE
+ulint
+hash_get_mutex_no(
+/*==============*/
+				/* out: mutex number */
+	hash_table_t* 	table,	/* in: hash table */
+	ulint 		fold)	/* in: fold */
+{
+	return(ut_2pow_remainder(fold, table->n_mutexes));
+}
+
+/****************************************************************
+Gets the nth heap in a hash table. */
+UNIV_INLINE
+mem_heap_t*
+hash_get_nth_heap(
+/*==============*/
+				/* out: mem heap */
+	hash_table_t* 	table,	/* in: hash table */
+	ulint 		i)	/* in: index of the heap */
+{
+	ut_ad(i < table->n_mutexes);
+
+	return(table->heaps[i]);
+}
+
+/****************************************************************
+Gets the heap for a fold value in a hash table. */
+UNIV_INLINE
+mem_heap_t*
+hash_get_heap(
+/*==========*/
+				/* out: mem heap */
+	hash_table_t* 	table,	/* in: hash table */
+	ulint 		fold)	/* in: fold */
+{
+	ulint	i;
+
+	if (table->heap) {
+		return(table->heap);
+	}
+
+	i = hash_get_mutex_no(table, fold);
+
+	return(hash_get_nth_heap(table, i));
+}
+
+/****************************************************************
+Gets the nth mutex in a hash table. */
+UNIV_INLINE
+mutex_t*
+hash_get_nth_mutex(
+/*===============*/
+				/* out: mutex */
+	hash_table_t* 	table,	/* in: hash table */
+	ulint 		i)	/* in: index of the mutex */
+{
+	ut_ad(i < table->n_mutexes);
+	
+	return(table->mutexes + i);
+}
+
+/****************************************************************
+Gets the mutex for a fold value in a hash table. */
+UNIV_INLINE
+mutex_t*
+hash_get_mutex(
+/*===========*/
+				/* out: mutex */
+	hash_table_t* 	table,	/* in: hash table */
+	ulint 		fold)	/* in: fold */
+{
+	ulint	i;
+
+	i = hash_get_mutex_no(table, fold);
+
+	return(hash_get_nth_mutex(table, i));
+}
--- a/include/ibuf0ibuf.h
+++ b/include/ibuf0ibuf.h
@ -0,0 +1,307 @@
+/******************************************************
+Insert buffer
+
+(c) 1997 Innobase Oy
+
+Created 7/19/1997 Heikki Tuuri
+*******************************************************/
+
+#ifndef ibuf0ibuf_h
+#define ibuf0ibuf_h
+
+#include "univ.i"
+
+#include "dict0mem.h"
+#include "dict0dict.h"
+#include "mtr0mtr.h"
+#include "que0types.h"
+#include "ibuf0types.h"
+#include "fsp0fsp.h"
+
+extern ibuf_t*	ibuf;
+
+/**********************************************************************
+Creates the insert buffer data struct for a single tablespace. Reads the
+root page of the insert buffer tree in the tablespace. This function can
+be called only after the dictionary system has been initialized, as this
+creates also the insert buffer table and index for this tablespace. */
+
+ibuf_data_t*
+ibuf_data_init_for_space(
+/*=====================*/
+			/* out, own: ibuf data struct, linked to the list
+			in ibuf control structure. */
+	ulint	space);	/* in: space id */
+/**********************************************************************
+Creates the insert buffer data structure at a database startup and
+initializes the data structures for the insert buffer of each tablespace. */
+
+void
+ibuf_init_at_db_start(void);
+/*=======================*/
+/*************************************************************************
+Reads the biggest tablespace id from the high end of the insert buffer
+tree and updates the counter in fil_system. */
+
+void
+ibuf_update_max_tablespace_id(void);
+/*===============================*/
+/*************************************************************************
+Initializes an ibuf bitmap page. */
+
+void
+ibuf_bitmap_page_init(
+/*==================*/
+	page_t*	page,	/* in: bitmap page */
+	mtr_t*	mtr);	/* in: mtr */
+/****************************************************************************
+Resets the free bits of the page in the ibuf bitmap. This is done in a
+separate mini-transaction, hence this operation does not restrict further
+work to only ibuf bitmap operations, which would result if the latch to the
+bitmap page were kept. */
+
+void
+ibuf_reset_free_bits_with_type(
+/*===========================*/
+	ulint	type,	/* in: index type */
+	page_t*	page);	/* in: index page; free bits are set to 0 if the index
+			is non-clustered and non-unique and the page level is
+			0 */
+/****************************************************************************
+Resets the free bits of the page in the ibuf bitmap. This is done in a
+separate mini-transaction, hence this operation does not restrict further
+work to solely ibuf bitmap operations, which would result if the latch to
+the bitmap page were kept. */
+
+void
+ibuf_reset_free_bits(
+/*=================*/
+	dict_index_t*	index,	/* in: index */
+	page_t*		page);	/* in: index page; free bits are set to 0 if
+				the index is non-clustered and non-unique and
+				the page level is 0 */
+/****************************************************************************
+Updates the free bits of the page in the ibuf bitmap if there is not enough
+free on the page any more. This is done in a separate mini-transaction, hence
+this operation does not restrict further work to only ibuf bitmap operations,
+which would result if the latch to the bitmap page were kept. */
+UNIV_INLINE
+void
+ibuf_update_free_bits_if_full(
+/*==========================*/
+	dict_index_t*	index,	/* in: index */
+	page_t*		page,	/* in: index page to which we have added new
+				records; the free bits are updated if the
+				index is non-clustered and non-unique and
+				the page level is 0, and the page becomes
+				fuller */
+	ulint		max_ins_size,/* in: value of maximum insert size with
+				reorganize before the latest operation
+				performed to the page */
+	ulint		increase);/* in: upper limit for the additional space
+				used in the latest operation, if known, or
+				ULINT_UNDEFINED */
+/**************************************************************************
+Updates the free bits for the page to reflect the present state. Does this
+in the mtr given, which means that the latching order rules virtually
+prevent any further operations for this OS thread until mtr is committed. */
+
+void
+ibuf_update_free_bits_low(
+/*======================*/
+	dict_index_t*	index,		/* in: index */
+	page_t*		page,		/* in: index page */
+	ulint		max_ins_size,	/* in: value of maximum insert size
+					with reorganize before the latest
+					operation performed to the page */
+	mtr_t*		mtr);		/* in: mtr */
+/**************************************************************************
+Updates the free bits for the two pages to reflect the present state. Does
+this in the mtr given, which means that the latching order rules virtually
+prevent any further operations until mtr is committed. */
+
+void
+ibuf_update_free_bits_for_two_pages_low(
+/*====================================*/
+	dict_index_t*	index,	/* in: index */
+	page_t*		page1,	/* in: index page */
+	page_t*		page2,	/* in: index page */
+	mtr_t*		mtr);	/* in: mtr */
+/**************************************************************************
+A basic partial test if an insert to the insert buffer could be possible and
+recommended. */
+UNIV_INLINE
+ibool
+ibuf_should_try(
+/*============*/
+	dict_index_t*	index,			/* in: index where to insert */
+	ulint		ignore_sec_unique);	/* in: if != 0, we should
+						ignore UNIQUE constraint on
+						a secondary index when we
+						decide */
+/**********************************************************************
+Returns TRUE if the current OS thread is performing an insert buffer
+routine. */
+
+ibool
+ibuf_inside(void);
+/*=============*/
+		/* out: TRUE if inside an insert buffer routine: for instance,
+		a read-ahead of non-ibuf pages is then forbidden */
+/***************************************************************************
+Checks if a page address is an ibuf bitmap page (level 3 page) address. */
+UNIV_INLINE
+ibool
+ibuf_bitmap_page(
+/*=============*/
+			/* out: TRUE if a bitmap page */
+	ulint	page_no);/* in: page number */
+/***************************************************************************
+Checks if a page is a level 2 or 3 page in the ibuf hierarchy of pages. */
+
+ibool
+ibuf_page(
+/*======*/
+			/* out: TRUE if level 2 or level 3 page */
+	ulint	space,	/* in: space id */
+	ulint	page_no);/* in: page number */
+/***************************************************************************
+Checks if a page is a level 2 or 3 page in the ibuf hierarchy of pages. */
+
+ibool
+ibuf_page_low(
+/*==========*/
+			/* out: TRUE if level 2 or level 3 page */
+	ulint	space,	/* in: space id */
+	ulint	page_no,/* in: page number */
+	mtr_t*	mtr);	/* in: mtr which will contain an x-latch to the
+			bitmap page if the page is not one of the fixed
+			address ibuf pages */
+/***************************************************************************
+Frees excess pages from the ibuf free list. This function is called when an OS
+thread calls fsp services to allocate a new file segment, or a new page to a
+file segment, and the thread did not own the fsp latch before this call. */ 
+
+void
+ibuf_free_excess_pages(
+/*===================*/
+	ulint	space);	/* in: space id */
+/*************************************************************************
+Makes an index insert to the insert buffer, instead of directly to the disk
+page, if this is possible. Does not do insert if the index is clustered
+or unique. */
+
+ibool
+ibuf_insert(
+/*========*/
+				/* out: TRUE if success */
+	dtuple_t*	entry,	/* in: index entry to insert */
+	dict_index_t*	index,	/* in: index where to insert */
+	ulint		space,	/* in: space id where to insert */
+	ulint		page_no,/* in: page number where to insert */
+	que_thr_t*	thr);	/* in: query thread */
+/*************************************************************************
+When an index page is read from a disk to the buffer pool, this function
+inserts to the page the possible index entries buffered in the insert buffer.
+The entries are deleted from the insert buffer. If the page is not read, but
+created in the buffer pool, this function deletes its buffered entries from
+the insert buffer; there can exist entries for such a page if the page
+belonged to an index which subsequently was dropped. */
+
+void
+ibuf_merge_or_delete_for_page(
+/*==========================*/
+	page_t*	page,	/* in: if page has been read from disk, pointer to
+			the page x-latched, else NULL */
+	ulint	space,	/* in: space id of the index page */
+	ulint	page_no,/* in: page number of the index page */
+	ibool	update_ibuf_bitmap);/* in: normally this is set to TRUE, but if
+			we have deleted or are deleting the tablespace, then we
+			naturally do not want to update a non-existent bitmap
+			page */
+/*************************************************************************
+Deletes all entries in the insert buffer for a given space id. This is used
+in DISCARD TABLESPACE and IMPORT TABLESPACE.
+NOTE: this does not update the page free bitmaps in the space. The space will
+become CORRUPT when you call this function! */
+
+void
+ibuf_delete_for_discarded_space(
+/*============================*/
+	ulint	space);	/* in: space id */
+/*************************************************************************
+Contracts insert buffer trees by reading pages to the buffer pool. */
+
+ulint
+ibuf_contract(
+/*==========*/
+			/* out: a lower limit for the combined size in bytes
+			of entries which will be merged from ibuf trees to the
+			pages read, 0 if ibuf is empty */
+	ibool	sync);	/* in: TRUE if the caller wants to wait for the
+			issued read with the highest tablespace address
+			to complete */
+/*************************************************************************
+Contracts insert buffer trees by reading pages to the buffer pool. */
+
+ulint
+ibuf_contract_for_n_pages(
+/*======================*/
+			/* out: a lower limit for the combined size in bytes
+			of entries which will be merged from ibuf trees to the
+			pages read, 0 if ibuf is empty */
+	ibool	sync,	/* in: TRUE if the caller wants to wait for the
+			issued read with the highest tablespace address
+			to complete */
+	ulint	n_pages);/* in: try to read at least this many pages to
+			the buffer pool and merge the ibuf contents to
+			them */
+/*************************************************************************
+Parses a redo log record of an ibuf bitmap page init. */
+
+byte*
+ibuf_parse_bitmap_init(
+/*===================*/
+			/* out: end of log record or NULL */
+	byte*	ptr,	/* in: buffer */
+	byte*	end_ptr,/* in: buffer end */
+	page_t*	page,	/* in: page or NULL */
+	mtr_t*	mtr);	/* in: mtr or NULL */
+/**********************************************************************
+Gets the ibuf count for a given page. */
+
+ulint
+ibuf_count_get(
+/*===========*/
+			/* out: number of entries in the insert buffer
+			currently buffered for this page */
+	ulint	space,	/* in: space id */
+	ulint	page_no);/* in: page number */
+/**********************************************************************
+Looks if the insert buffer is empty. */
+
+ibool
+ibuf_is_empty(void);
+/*===============*/
+			/* out: TRUE if empty */
+/**********************************************************************
+Prints info of ibuf. */
+
+void
+ibuf_print(
+/*=======*/
+	FILE*	file);	/* in: file where to print */
+
+#define IBUF_HEADER_PAGE_NO	FSP_IBUF_HEADER_PAGE_NO
+#define IBUF_TREE_ROOT_PAGE_NO	FSP_IBUF_TREE_ROOT_PAGE_NO
+
+/* The ibuf header page currently contains only the file segment header
+for the file segment from which the pages for the ibuf tree are allocated */
+#define IBUF_HEADER		PAGE_DATA
+#define	IBUF_TREE_SEG_HEADER	0	/* fseg header for ibuf tree */
+
+#ifndef UNIV_NONINL
+#include "ibuf0ibuf.ic"
+#endif
+
+#endif 
--- a/include/ibuf0ibuf.ic
+++ b/include/ibuf0ibuf.ic
@ -0,0 +1,232 @@
+/******************************************************
+Insert buffer
+
+(c) 1997 Innobase Oy
+
+Created 7/19/1997 Heikki Tuuri
+*******************************************************/
+
+#include "buf0lru.h"
+#include "page0page.h"
+
+extern ulint	ibuf_flush_count;
+
+/* If this number is n, an index page must contain at least the page size
+per n bytes of free space for ibuf to try to buffer inserts to this page.
+If there is this much of free space, the corresponding bits are set in the
+ibuf bitmap. */
+#define IBUF_PAGE_SIZE_PER_FREE_SPACE	32
+
+/* Insert buffer data struct for a single tablespace */
+struct ibuf_data_struct{
+	ulint		space;	/* space id */
+	ulint		seg_size;/* allocated pages if the file segment
+				containing ibuf header and tree */
+	ulint		size;	/* size of the insert buffer tree in pages */
+	ibool		empty;	/* after an insert to the ibuf tree is
+				performed, this is set to FALSE, and if a
+				contract operation finds the tree empty, this
+				is set to TRUE */
+	ulint		free_list_len;
+				/* length of the free list */
+	ulint		height;	/* tree height */
+	dict_index_t*	index;	/* insert buffer index */
+	UT_LIST_NODE_T(ibuf_data_t) data_list;
+				/* list of ibuf data structs */
+	ulint		n_inserts;/* number of inserts made to the insert
+				buffer */
+	ulint		n_merges;/* number of pages merged */
+	ulint		n_merged_recs;/* number of records merged */
+};
+
+/* If the ibuf meter exceeds this value, then the suitable inserts are made to
+the insert buffer instead of directly to the disk page */
+#define IBUF_THRESHOLD	50	
+
+struct ibuf_struct{
+	ulint		size;		/* current size of the ibuf index
+					trees in pages */
+	ulint		max_size;	/* recommended maximum size in pages
+					for the ibuf index tree */
+	ulint		meter;		/* heuristic meter which measures
+					desirability of doing inserts to the
+					insert buffer instead of directly to
+					the disk page */
+	UT_LIST_BASE_NODE_T(ibuf_data_t) data_list;
+					/* list of ibuf data structs for
+					each tablespace */
+};
+
+/****************************************************************************
+Sets the free bit of the page in the ibuf bitmap. This is done in a separate
+mini-transaction, hence this operation does not restrict further work to only
+ibuf bitmap operations, which would result if the latch to the bitmap page
+were kept. */
+
+void
+ibuf_set_free_bits(
+/*===============*/
+	ulint	type,	/* in: index type */
+	page_t*	page,	/* in: index page; free bit is reset if the index is
+			a non-clustered non-unique, and page level is 0 */
+	ulint	val,	/* in: value to set: < 4 */
+	ulint	max_val);/* in: ULINT_UNDEFINED or a maximum value which
+			the bits must have before setting; this is for
+			debugging */
+
+/**************************************************************************
+A basic partial test if an insert to the insert buffer could be possible and
+recommended. */
+UNIV_INLINE
+ibool
+ibuf_should_try(
+/*============*/
+	dict_index_t*	index,			/* in: index where to insert */
+	ulint		ignore_sec_unique)	/* in: if != 0, we should
+						ignore UNIQUE constraint on
+						a secondary index when we
+						decide */
+{
+	if (!(index->type & DICT_CLUSTERED)
+	    && (ignore_sec_unique || !(index->type & DICT_UNIQUE))
+	    && ibuf->meter > IBUF_THRESHOLD) {
+
+		ibuf_flush_count++;
+
+		if (ibuf_flush_count % 8 == 0) {
+	    
+			buf_LRU_try_free_flushed_blocks();
+		}
+
+		return(TRUE);
+	}
+
+	return(FALSE);
+}
+
+/***************************************************************************
+Checks if a page address is an ibuf bitmap page address. */
+UNIV_INLINE
+ibool
+ibuf_bitmap_page(
+/*=============*/
+			/* out: TRUE if a bitmap page */
+	ulint	page_no)/* in: page number */
+{
+	if (page_no % XDES_DESCRIBED_PER_PAGE == FSP_IBUF_BITMAP_OFFSET) {
+
+		return(TRUE);
+	}
+
+	return(FALSE);
+}
+
+/*************************************************************************
+Translates the free space on a page to a value in the ibuf bitmap.*/
+UNIV_INLINE
+ulint
+ibuf_index_page_calc_free_bits(
+/*===========================*/
+				/* out: value for ibuf bitmap bits */
+	ulint	max_ins_size)	/* in: maximum insert size after reorganize
+				for the page */
+{
+	ulint	n;
+	
+	n = max_ins_size / (UNIV_PAGE_SIZE / IBUF_PAGE_SIZE_PER_FREE_SPACE);
+
+	if (n == 3) {
+		n = 2;
+	}
+	
+	if (n > 3) {
+		n = 3;
+	}
+
+	return(n);
+}
+
+/*************************************************************************
+Translates the ibuf free bits to the free space on a page in bytes. */
+UNIV_INLINE
+ulint
+ibuf_index_page_calc_free_from_bits(
+/*================================*/
+			/* out: maximum insert size after reorganize for the
+			page */
+	ulint	bits)	/* in: value for ibuf bitmap bits */
+{
+	ut_ad(bits < 4);
+
+	if (bits == 3) {
+		return(4 * UNIV_PAGE_SIZE / IBUF_PAGE_SIZE_PER_FREE_SPACE);
+	}
+
+	return(bits * UNIV_PAGE_SIZE / IBUF_PAGE_SIZE_PER_FREE_SPACE);
+}
+
+/*************************************************************************
+Translates the free space on a page to a value in the ibuf bitmap.*/
+UNIV_INLINE
+ulint
+ibuf_index_page_calc_free(
+/*======================*/
+			/* out: value for ibuf bitmap bits */
+	page_t*	page)	/* in: non-unique secondary index page */
+{
+	return(ibuf_index_page_calc_free_bits(
+		page_get_max_insert_size_after_reorganize(page, 1)));
+}
+
+/****************************************************************************
+Updates the free bits of the page in the ibuf bitmap if there is not enough
+free on the page any more. This is done in a separate mini-transaction, hence
+this operation does not restrict further work to only ibuf bitmap operations,
+which would result if the latch to the bitmap page were kept. */
+UNIV_INLINE
+void
+ibuf_update_free_bits_if_full(
+/*==========================*/
+	dict_index_t*	index,	/* in: index */
+	page_t*		page,	/* in: index page to which we have added new
+				records; the free bits are updated if the
+				index is non-clustered and non-unique and
+				the page level is 0, and the page becomes
+				fuller */
+	ulint		max_ins_size,/* in: value of maximum insert size with
+				reorganize before the latest operation
+				performed to the page */
+	ulint		increase)/* in: upper limit for the additional space
+				used in the latest operation, if known, or
+				ULINT_UNDEFINED */
+{
+	ulint	before;
+	ulint	after;
+
+	before = ibuf_index_page_calc_free_bits(max_ins_size);
+
+	if (max_ins_size >= increase) {
+		ut_ad(ULINT_UNDEFINED > UNIV_PAGE_SIZE);
+		
+		after = ibuf_index_page_calc_free_bits(max_ins_size
+								- increase);
+#ifdef UNIV_IBUF_DEBUG
+		ut_a(after <= ibuf_index_page_calc_free(page));
+#endif
+	} else {
+		after = ibuf_index_page_calc_free(page);
+	}
+
+	if (after == 0) {
+		/* We move the page to the front of the buffer pool LRU list:
+		the purpose of this is to prevent those pages to which we
+		cannot make inserts using the insert buffer from slipping
+		out of the buffer pool */
+
+		buf_page_make_young(page);
+	}
+
+	if (before > after) {
+		ibuf_set_free_bits(index->type, page, after, before);
+	}
+}
--- a/include/ibuf0types.h
+++ b/include/ibuf0types.h
@ -0,0 +1,15 @@
+/******************************************************
+Insert buffer global types
+
+(c) 1997 Innobase Oy
+
+Created 7/29/1997 Heikki Tuuri
+*******************************************************/
+
+#ifndef ibuf0types_h
+#define ibuf0types_h
+
+typedef struct ibuf_data_struct	ibuf_data_t;
+typedef	struct ibuf_struct	ibuf_t;
+
+#endif
--- a/include/lock0lock.h
+++ b/include/lock0lock.h
@ -0,0 +1,677 @@
+/******************************************************
+The transaction lock system
+
+(c) 1996 Innobase Oy
+
+Created 5/7/1996 Heikki Tuuri
+*******************************************************/
+
+#ifndef lock0lock_h
+#define lock0lock_h
+
+#include "univ.i"
+#include "trx0types.h"
+#include "rem0types.h"
+#include "dict0types.h"
+#include "que0types.h"
+#include "page0types.h"
+#include "lock0types.h"
+#include "read0types.h"
+#include "hash0hash.h"
+
+#ifdef UNIV_DEBUG
+extern ibool	lock_print_waits;
+#endif /* UNIV_DEBUG */
+/* Buffer for storing information about the most recent deadlock error */
+extern FILE*	lock_latest_err_file;
+
+/*************************************************************************
+Gets the size of a lock struct. */
+
+ulint
+lock_get_size(void);
+/*===============*/
+			/* out: size in bytes */
+/*************************************************************************
+Creates the lock system at database start. */
+
+void
+lock_sys_create(
+/*============*/
+	ulint	n_cells);	/* in: number of slots in lock hash table */
+/*************************************************************************
+Checks if some transaction has an implicit x-lock on a record in a secondary
+index. */
+
+trx_t*
+lock_sec_rec_some_has_impl_off_kernel(
+/*==================================*/
+				/* out: transaction which has the x-lock, or
+				NULL */
+	rec_t*		rec,	/* in: user record */
+	dict_index_t*	index,	/* in: secondary index */
+	const ulint*	offsets);/* in: rec_get_offsets(rec, index) */
+/*************************************************************************
+Checks if some transaction has an implicit x-lock on a record in a clustered
+index. */
+UNIV_INLINE
+trx_t*
+lock_clust_rec_some_has_impl(
+/*=========================*/
+				/* out: transaction which has the x-lock, or
+				NULL */
+	rec_t*		rec,	/* in: user record */
+	dict_index_t*	index,	/* in: clustered index */
+	const ulint*	offsets);/* in: rec_get_offsets(rec, index) */
+/*****************************************************************
+Resets the lock bits for a single record. Releases transactions
+waiting for lock requests here. */
+
+void
+lock_rec_reset_and_release_wait(
+/*============================*/
+	rec_t*	rec);	/* in: record whose locks bits should be reset */
+/*****************************************************************
+Makes a record to inherit the locks of another record as gap type
+locks, but does not reset the lock bits of the other record. Also
+waiting lock requests on rec are inherited as GRANTED gap locks. */
+
+void
+lock_rec_inherit_to_gap(
+/*====================*/
+	rec_t*	heir,	/* in: record which inherits */
+	rec_t*	rec);	/* in: record from which inherited; does NOT reset
+			the locks on this record */
+/*****************************************************************
+Updates the lock table when we have reorganized a page. NOTE: we copy
+also the locks set on the infimum of the page; the infimum may carry
+locks if an update of a record is occurring on the page, and its locks
+were temporarily stored on the infimum. */
+
+void
+lock_move_reorganize_page(
+/*======================*/
+	page_t*	page,		/* in: old index page */
+	page_t*	new_page);	/* in: reorganized page */
+/*****************************************************************
+Moves the explicit locks on user records to another page if a record
+list end is moved to another page. */
+
+void
+lock_move_rec_list_end(
+/*===================*/
+	page_t*	new_page,	/* in: index page to move to */
+	page_t*	page,		/* in: index page */
+	rec_t*	rec);		/* in: record on page: this is the
+				first record moved */
+/*****************************************************************
+Moves the explicit locks on user records to another page if a record
+list start is moved to another page. */
+
+void
+lock_move_rec_list_start(
+/*=====================*/
+	page_t*	new_page,	/* in: index page to move to */
+	page_t*	page,		/* in: index page */
+	rec_t*	rec,		/* in: record on page: this is the
+				first record NOT copied */
+	rec_t*	old_end);	/* in: old previous-to-last record on
+				new_page before the records were copied */
+/*****************************************************************
+Updates the lock table when a page is split to the right. */
+
+void
+lock_update_split_right(
+/*====================*/
+	page_t*	right_page,	/* in: right page */
+	page_t*	left_page);	/* in: left page */
+/*****************************************************************
+Updates the lock table when a page is merged to the right. */
+
+void
+lock_update_merge_right(
+/*====================*/
+	rec_t*	orig_succ,	/* in: original successor of infimum
+				on the right page before merge */
+	page_t*	left_page);	/* in: merged index page which will be
+				discarded */
+/*****************************************************************
+Updates the lock table when the root page is copied to another in
+btr_root_raise_and_insert. Note that we leave lock structs on the
+root page, even though they do not make sense on other than leaf
+pages: the reason is that in a pessimistic update the infimum record
+of the root page will act as a dummy carrier of the locks of the record
+to be updated. */
+
+void
+lock_update_root_raise(
+/*===================*/
+	page_t*	new_page,	/* in: index page to which copied */
+	page_t*	root);		/* in: root page */
+/*****************************************************************
+Updates the lock table when a page is copied to another and the original page
+is removed from the chain of leaf pages, except if page is the root! */
+
+void
+lock_update_copy_and_discard(
+/*=========================*/
+	page_t*	new_page,	/* in: index page to which copied */
+	page_t*	page);		/* in: index page; NOT the root! */
+/*****************************************************************
+Updates the lock table when a page is split to the left. */
+
+void
+lock_update_split_left(
+/*===================*/
+	page_t*	right_page,	/* in: right page */
+	page_t*	left_page);	/* in: left page */
+/*****************************************************************
+Updates the lock table when a page is merged to the left. */
+
+void
+lock_update_merge_left(
+/*===================*/
+	page_t*	left_page,	/* in: left page to which merged */
+	rec_t*	orig_pred,	/* in: original predecessor of supremum
+				on the left page before merge */
+	page_t*	right_page);	/* in: merged index page which will be
+				discarded */
+/*****************************************************************
+Resets the original locks on heir and replaces them with gap type locks
+inherited from rec. */
+
+void
+lock_rec_reset_and_inherit_gap_locks(
+/*=================================*/
+	rec_t*	heir,	/* in: heir record */
+	rec_t*	rec);	/* in: record */
+/*****************************************************************
+Updates the lock table when a page is discarded. */
+
+void
+lock_update_discard(
+/*================*/
+	rec_t*	heir,	/* in: record which will inherit the locks */
+	page_t*	page);	/* in: index page which will be discarded */
+/*****************************************************************
+Updates the lock table when a new user record is inserted. */
+
+void
+lock_update_insert(
+/*===============*/
+	rec_t*	rec);	/* in: the inserted record */
+/*****************************************************************
+Updates the lock table when a record is removed. */
+
+void
+lock_update_delete(
+/*===============*/
+	rec_t*	rec);	/* in: the record to be removed */
+/*************************************************************************
+Stores on the page infimum record the explicit locks of another record.
+This function is used to store the lock state of a record when it is
+updated and the size of the record changes in the update. The record
+is in such an update moved, perhaps to another page. The infimum record
+acts as a dummy carrier record, taking care of lock releases while the
+actual record is being moved. */
+
+void
+lock_rec_store_on_page_infimum(
+/*===========================*/
+	page_t*	page,	/* in: page containing the record */
+	rec_t*	rec);	/* in: record whose lock state is stored
+			on the infimum record of the same page; lock
+			bits are reset on the record */
+/*************************************************************************
+Restores the state of explicit lock requests on a single record, where the
+state was stored on the infimum of the page. */
+
+void
+lock_rec_restore_from_page_infimum(
+/*===============================*/
+	rec_t*	rec,	/* in: record whose lock state is restored */
+	page_t*	page);	/* in: page (rec is not necessarily on this page)
+			whose infimum stored the lock state; lock bits are
+			reset on the infimum */ 
+/*************************************************************************
+Returns TRUE if there are explicit record locks on a page. */
+
+ibool
+lock_rec_expl_exist_on_page(
+/*========================*/
+			/* out: TRUE if there are explicit record locks on
+			the page */
+	ulint	space,	/* in: space id */
+	ulint	page_no);/* in: page number */
+/*************************************************************************
+Checks if locks of other transactions prevent an immediate insert of
+a record. If they do, first tests if the query thread should anyway
+be suspended for some reason; if not, then puts the transaction and
+the query thread to the lock wait state and inserts a waiting request
+for a gap x-lock to the lock queue. */
+
+ulint
+lock_rec_insert_check_and_lock(
+/*===========================*/
+				/* out: DB_SUCCESS, DB_LOCK_WAIT,
+				DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
+	ulint		flags,	/* in: if BTR_NO_LOCKING_FLAG bit is set,
+				does nothing */
+	rec_t*		rec,	/* in: record after which to insert */
+	dict_index_t*	index,	/* in: index */
+	que_thr_t*	thr,	/* in: query thread */
+	ibool*		inherit);/* out: set to TRUE if the new inserted
+				record maybe should inherit LOCK_GAP type
+				locks from the successor record */
+/*************************************************************************
+Checks if locks of other transactions prevent an immediate modify (update,
+delete mark, or delete unmark) of a clustered index record. If they do,
+first tests if the query thread should anyway be suspended for some
+reason; if not, then puts the transaction and the query thread to the
+lock wait state and inserts a waiting request for a record x-lock to the
+lock queue. */
+
+ulint
+lock_clust_rec_modify_check_and_lock(
+/*=================================*/
+				/* out: DB_SUCCESS, DB_LOCK_WAIT,
+				DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
+	ulint		flags,	/* in: if BTR_NO_LOCKING_FLAG bit is set,
+				does nothing */
+	rec_t*		rec,	/* in: record which should be modified */
+	dict_index_t*	index,	/* in: clustered index */
+	const ulint*	offsets,/* in: rec_get_offsets(rec, index) */
+	que_thr_t*	thr);	/* in: query thread */
+/*************************************************************************
+Checks if locks of other transactions prevent an immediate modify
+(delete mark or delete unmark) of a secondary index record. */
+
+ulint
+lock_sec_rec_modify_check_and_lock(
+/*===============================*/
+				/* out: DB_SUCCESS, DB_LOCK_WAIT,
+				DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
+	ulint		flags,	/* in: if BTR_NO_LOCKING_FLAG bit is set,
+				does nothing */
+	rec_t*		rec,	/* in: record which should be modified;
+				NOTE: as this is a secondary index, we
+				always have to modify the clustered index
+				record first: see the comment below */
+	dict_index_t*	index,	/* in: secondary index */
+	que_thr_t*	thr);	/* in: query thread */
+/*************************************************************************
+Like the counterpart for a clustered index below, but now we read a
+secondary index record. */
+
+ulint
+lock_sec_rec_read_check_and_lock(
+/*=============================*/
+				/* out: DB_SUCCESS, DB_LOCK_WAIT,
+				DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
+	ulint		flags,	/* in: if BTR_NO_LOCKING_FLAG bit is set,
+				does nothing */
+	rec_t*		rec,	/* in: user record or page supremum record
+				which should be read or passed over by a read
+				cursor */
+	dict_index_t*	index,	/* in: secondary index */
+	const ulint*	offsets,/* in: rec_get_offsets(rec, index) */
+	ulint		mode,	/* in: mode of the lock which the read cursor
+				should set on records: LOCK_S or LOCK_X; the
+				latter is possible in SELECT FOR UPDATE */
+	ulint		gap_mode,/* in: LOCK_ORDINARY, LOCK_GAP, or
+				LOCK_REC_NOT_GAP */
+	que_thr_t*	thr);	/* in: query thread */
+/*************************************************************************
+Checks if locks of other transactions prevent an immediate read, or passing
+over by a read cursor, of a clustered index record. If they do, first tests
+if the query thread should anyway be suspended for some reason; if not, then
+puts the transaction and the query thread to the lock wait state and inserts a
+waiting request for a record lock to the lock queue. Sets the requested mode
+lock on the record. */
+
+ulint
+lock_clust_rec_read_check_and_lock(
+/*===============================*/
+				/* out: DB_SUCCESS, DB_LOCK_WAIT,
+				DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
+	ulint		flags,	/* in: if BTR_NO_LOCKING_FLAG bit is set,
+				does nothing */
+	rec_t*		rec,	/* in: user record or page supremum record
+				which should be read or passed over by a read
+				cursor */
+	dict_index_t*	index,	/* in: clustered index */
+	const ulint*	offsets,/* in: rec_get_offsets(rec, index) */
+	ulint		mode,	/* in: mode of the lock which the read cursor
+				should set on records: LOCK_S or LOCK_X; the
+				latter is possible in SELECT FOR UPDATE */
+	ulint		gap_mode,/* in: LOCK_ORDINARY, LOCK_GAP, or
+				LOCK_REC_NOT_GAP */
+	que_thr_t*	thr);	/* in: query thread */
+/*************************************************************************
+Checks if locks of other transactions prevent an immediate read, or passing
+over by a read cursor, of a clustered index record. If they do, first tests
+if the query thread should anyway be suspended for some reason; if not, then
+puts the transaction and the query thread to the lock wait state and inserts a
+waiting request for a record lock to the lock queue. Sets the requested mode
+lock on the record. This is an alternative version of
+lock_clust_rec_read_check_and_lock() that does not require the parameter
+"offsets". */
+
+ulint
+lock_clust_rec_read_check_and_lock_alt(
+/*===================================*/
+				/* out: DB_SUCCESS, DB_LOCK_WAIT,
+				DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
+	ulint		flags,	/* in: if BTR_NO_LOCKING_FLAG bit is set,
+				does nothing */
+	rec_t*		rec,	/* in: user record or page supremum record
+				which should be read or passed over by a read
+				cursor */
+	dict_index_t*	index,	/* in: clustered index */
+	ulint		mode,	/* in: mode of the lock which the read cursor
+				should set on records: LOCK_S or LOCK_X; the
+				latter is possible in SELECT FOR UPDATE */
+	ulint		gap_mode,/* in: LOCK_ORDINARY, LOCK_GAP, or
+				LOCK_REC_NOT_GAP */
+	que_thr_t*	thr);	/* in: query thread */
+/*************************************************************************
+Checks that a record is seen in a consistent read. */
+
+ibool
+lock_clust_rec_cons_read_sees(
+/*==========================*/
+				/* out: TRUE if sees, or FALSE if an earlier
+				version of the record should be retrieved */
+	rec_t*		rec,	/* in: user record which should be read or
+				passed over by a read cursor */
+	dict_index_t*	index,	/* in: clustered index */
+	const ulint*	offsets,/* in: rec_get_offsets(rec, index) */
+	read_view_t*	view);	/* in: consistent read view */
+/*************************************************************************
+Checks that a non-clustered index record is seen in a consistent read. */
+
+ulint
+lock_sec_rec_cons_read_sees(
+/*========================*/
+				/* out: TRUE if certainly sees, or FALSE if an
+				earlier version of the clustered index record
+				might be needed: NOTE that a non-clustered
+				index page contains so little information on
+				its modifications that also in the case FALSE,
+				the present version of rec may be the right,
+				but we must check this from the clustered
+				index record */
+	rec_t*		rec,	/* in: user record which should be read or
+				passed over by a read cursor */
+	dict_index_t*	index,	/* in: non-clustered index */
+	read_view_t*	view);	/* in: consistent read view */
+/*************************************************************************
+Locks the specified database table in the mode given. If the lock cannot
+be granted immediately, the query thread is put to wait. */
+
+ulint
+lock_table(
+/*=======*/
+				/* out: DB_SUCCESS, DB_LOCK_WAIT,
+				DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
+	ulint		flags,	/* in: if BTR_NO_LOCKING_FLAG bit is set,
+				does nothing */
+	dict_table_t*	table,	/* in: database table in dictionary cache */
+	ulint		mode,	/* in: lock mode */
+	que_thr_t*	thr);	/* in: query thread */
+/*************************************************************************
+Checks if there are any locks set on the table. */
+
+ibool
+lock_is_on_table(
+/*=============*/
+				/* out: TRUE if there are lock(s) */
+	dict_table_t*	table);	/* in: database table in dictionary cache */
+/*************************************************************************
+Releases a table lock.
+Releases possible other transactions waiting for this lock. */
+
+void
+lock_table_unlock(
+/*==============*/
+	lock_t*	lock);	/* in: lock */
+/*************************************************************************
+Releases an auto-inc lock a transaction possibly has on a table.
+Releases possible other transactions waiting for this lock. */
+
+void
+lock_table_unlock_auto_inc(
+/*=======================*/
+	trx_t*	trx);	/* in: transaction */
+/*************************************************************************
+Releases transaction locks, and releases possible other transactions waiting
+because of these locks. */
+
+void
+lock_release_off_kernel(
+/*====================*/
+	trx_t*	trx);	/* in: transaction */
+/*************************************************************************
+Cancels a waiting lock request and releases possible other transactions
+waiting behind it. */
+
+void
+lock_cancel_waiting_and_release(
+/*============================*/
+	lock_t*	lock);	/* in: waiting lock request */
+/*************************************************************************
+Resets all locks, both table and record locks, on a table to be dropped.
+No lock is allowed to be a wait lock. */
+
+void
+lock_reset_all_on_table(
+/*====================*/
+	dict_table_t*	table);	/* in: table to be dropped */
+/*************************************************************************
+Calculates the fold value of a page file address: used in inserting or
+searching for a lock in the hash table. */
+UNIV_INLINE
+ulint
+lock_rec_fold(
+/*===========*/
+			/* out: folded value */
+	ulint	space,	/* in: space */
+	ulint	page_no);/* in: page number */
+/*************************************************************************
+Calculates the hash value of a page file address: used in inserting or
+searching for a lock in the hash table. */
+UNIV_INLINE
+ulint
+lock_rec_hash(
+/*==========*/
+			/* out: hashed value */
+	ulint	space,	/* in: space */
+	ulint	page_no);/* in: page number */
+/*************************************************************************
+Gets the source table of an ALTER TABLE transaction.  The table must be
+covered by an IX or IS table lock. */
+
+dict_table_t*
+lock_get_src_table(
+/*===============*/
+				/* out: the source table of transaction,
+				if it is covered by an IX or IS table lock;
+				dest if there is no source table, and
+				NULL if the transaction is locking more than
+				two tables or an inconsistency is found */
+	trx_t*		trx,	/* in: transaction */
+	dict_table_t*	dest,	/* in: destination of ALTER TABLE */
+	ulint*		mode);	/* out: lock mode of the source table */
+/*************************************************************************
+Determine if the given table is exclusively "owned" by the given
+transaction, i.e., transaction holds LOCK_IX and possibly LOCK_AUTO_INC
+on the table. */
+
+ibool
+lock_is_table_exclusive(
+/*====================*/
+				/* out: TRUE if table is only locked by trx,
+				with LOCK_IX, and possibly LOCK_AUTO_INC */
+	dict_table_t*	table,	/* in: table */
+	trx_t*		trx);	/* in: transaction */
+/*************************************************************************
+Checks that a transaction id is sensible, i.e., not in the future. */
+
+ibool
+lock_check_trx_id_sanity(
+/*=====================*/
+					/* out: TRUE if ok */
+	dulint		trx_id,		/* in: trx id */
+	rec_t*		rec,		/* in: user record */
+	dict_index_t*	index,		/* in: clustered index */
+	const ulint*	offsets,	/* in: rec_get_offsets(rec, index) */
+	ibool		has_kernel_mutex);/* in: TRUE if the caller owns the
+					kernel mutex */
+/*************************************************************************
+Validates the lock queue on a single record. */
+
+ibool
+lock_rec_queue_validate(
+/*====================*/
+				/* out: TRUE if ok */
+	rec_t*		rec,	/* in: record to look at */
+	dict_index_t*	index,	/* in: index, or NULL if not known */
+	const ulint*	offsets);/* in: rec_get_offsets(rec, index) */
+/*************************************************************************
+Prints info of a table lock. */
+
+void
+lock_table_print(
+/*=============*/
+	FILE*	file,	/* in: file where to print */
+	lock_t*	lock);	/* in: table type lock */
+/*************************************************************************
+Prints info of a record lock. */
+
+void
+lock_rec_print(
+/*===========*/
+	FILE*	file,	/* in: file where to print */
+	lock_t*	lock);	/* in: record type lock */
+/*************************************************************************
+Prints info of locks for all transactions. */
+
+void
+lock_print_info_summary(
+/*====================*/
+	FILE*	file);	/* in: file where to print */
+/*************************************************************************
+Prints info of locks for each transaction. */
+
+void
+lock_print_info_all_transactions(
+/*=============================*/
+	FILE*	file);	/* in: file where to print */
+/*************************************************************************
+Validates the lock queue on a table. */
+
+ibool
+lock_table_queue_validate(
+/*======================*/
+				/* out: TRUE if ok */
+	dict_table_t*	table);	/* in: table */
+/*************************************************************************
+Validates the record lock queues on a page. */
+
+ibool
+lock_rec_validate_page(
+/*===================*/
+			/* out: TRUE if ok */
+	ulint	space,	/* in: space id */
+	ulint	page_no);/* in: page number */
+/*************************************************************************
+Validates the lock system. */
+
+ibool
+lock_validate(void);
+/*===============*/
+			/* out: TRUE if ok */
+
+/* The lock system */
+extern lock_sys_t*	lock_sys;
+
+/* Lock modes and types */
+/* Basic modes */
+#define	LOCK_NONE	0	/* this flag is used elsewhere to note
+				consistent read */
+#define	LOCK_IS		2	/* intention shared */
+#define	LOCK_IX		3	/* intention exclusive */
+#define	LOCK_S		4	/* shared */
+#define	LOCK_X		5	/* exclusive */
+#define	LOCK_AUTO_INC	6	/* locks the auto-inc counter of a table
+				in an exclusive mode */
+#define LOCK_MODE_MASK	0xFUL	/* mask used to extract mode from the
+				type_mode field in a lock */
+/* Lock types */
+#define LOCK_TABLE	16	/* these type values should be so high that */
+#define	LOCK_REC	32	/* they can be ORed to the lock mode */
+#define LOCK_TYPE_MASK	0xF0UL	/* mask used to extract lock type from the
+				type_mode field in a lock */
+/* Waiting lock flag */
+#define LOCK_WAIT	256	/* this wait bit should be so high that
+				it can be ORed to the lock mode and type;
+				when this bit is set, it means that the
+				lock has not yet been granted, it is just
+				waiting for its turn in the wait queue */
+/* Precise modes */
+#define LOCK_ORDINARY	0	/* this flag denotes an ordinary next-key lock
+				in contrast to LOCK_GAP or LOCK_REC_NOT_GAP */ 
+#define LOCK_GAP	512	/* this gap bit should be so high that
+				it can be ORed to the other flags;
+				when this bit is set, it means that the
+				lock holds only on the gap before the record;
+				for instance, an x-lock on the gap does not
+				give permission to modify the record on which
+				the bit is set; locks of this type are created
+				when records are removed from the index chain
+				of records */
+#define LOCK_REC_NOT_GAP 1024 	/* this bit means that the lock is only on
+				the index record and does NOT block inserts
+				to the gap before the index record; this is
+				used in the case when we retrieve a record
+				with a unique key, and is also used in
+				locking plain SELECTs (not part of UPDATE
+				or DELETE) when the user has set the READ
+				COMMITTED isolation level */
+#define LOCK_INSERT_INTENTION 2048 /* this bit is set when we place a waiting
+				gap type record lock request in order to let
+				an insert of an index record to wait until
+				there are no conflicting locks by other
+				transactions on the gap; note that this flag
+				remains set when the waiting lock is granted,
+				or if the lock is inherited to a neighboring
+				record */
+				
+/* When lock bits are reset, the following flags are available: */
+#define LOCK_RELEASE_WAIT	1
+#define LOCK_NOT_RELEASE_WAIT	2
+
+/* Lock operation struct */
+typedef struct lock_op_struct	lock_op_t;
+struct lock_op_struct{
+	dict_table_t*	table;	/* table to be locked */
+	ulint		mode;	/* lock mode */
+};
+
+#define LOCK_OP_START		1
+#define LOCK_OP_COMPLETE	2
+
+/* The lock system struct */
+struct lock_sys_struct{
+	hash_table_t*	rec_hash;	/* hash table of the record locks */
+};
+
+/* The lock system */
+extern lock_sys_t*	lock_sys;
+
+
+#ifndef UNIV_NONINL
+#include "lock0lock.ic"
+#endif
+
+#endif 
--- a/include/lock0lock.ic
+++ b/include/lock0lock.ic
@ -0,0 +1,83 @@
+/******************************************************
+The transaction lock system
+
+(c) 1996 Innobase Oy
+
+Created 5/7/1996 Heikki Tuuri
+*******************************************************/
+
+#include "sync0sync.h"
+#include "srv0srv.h"
+#include "dict0dict.h"
+#include "row0row.h"
+#include "trx0sys.h"
+#include "trx0trx.h"
+#include "buf0buf.h"
+#include "page0page.h"
+#include "page0cur.h"
+#include "row0vers.h"
+#include "que0que.h"
+#include "btr0cur.h"
+#include "read0read.h"
+#include "log0recv.h"
+
+/*************************************************************************
+Calculates the fold value of a page file address: used in inserting or
+searching for a lock in the hash table. */
+UNIV_INLINE
+ulint
+lock_rec_fold(
+/*==========*/
+			/* out: folded value */
+	ulint	space,	/* in: space */
+	ulint	page_no)/* in: page number */
+{
+	return(ut_fold_ulint_pair(space, page_no));
+}
+
+/*************************************************************************
+Calculates the hash value of a page file address: used in inserting or
+searching for a lock in the hash table. */
+UNIV_INLINE
+ulint
+lock_rec_hash(
+/*==========*/
+			/* out: hashed value */
+	ulint	space,	/* in: space */
+	ulint	page_no)/* in: page number */
+{
+	return(hash_calc_hash(lock_rec_fold(space, page_no),
+							lock_sys->rec_hash));
+}
+
+/*************************************************************************
+Checks if some transaction has an implicit x-lock on a record in a clustered
+index. */
+UNIV_INLINE
+trx_t*
+lock_clust_rec_some_has_impl(
+/*=========================*/
+				/* out: transaction which has the x-lock, or
+				NULL */
+	rec_t*		rec,	/* in: user record */
+	dict_index_t*	index,	/* in: clustered index */
+	const ulint*	offsets)/* in: rec_get_offsets(rec, index) */
+{
+	dulint	trx_id;
+
+#ifdef UNIV_SYNC_DEBUG
+	ut_ad(mutex_own(&kernel_mutex));
+#endif /* UNIV_SYNC_DEBUG */
+	ut_ad(index->type & DICT_CLUSTERED);
+	ut_ad(page_rec_is_user_rec(rec));
+
+	trx_id = row_get_rec_trx_id(rec, index, offsets);
+
+	if (trx_is_active(trx_id)) {
+		/* The modifying or inserting transaction is active */
+
+		return(trx_get_on_id(trx_id));
+	}
+
+	return(NULL);
+}
--- a/Show more
+++ b/Show more