Import branches/innodb+ from SVN on top of storage/innobase.

This commit is contained in:
Vasil Dimov 2010-04-12 18:20:41 +03:00
commit c877ff39bc
165 changed files with 13060 additions and 2981 deletions

View file

@ -22,13 +22,21 @@ INCLUDE(CheckCSourceRuns)
# OS tests
IF(UNIX)
IF(CMAKE_SYSTEM_NAME STREQUAL "Linux")
CHECK_INCLUDE_FILES (libaio.h HAVE_LIBAIO_H)
CHECK_LIBRARY_EXISTS(aio io_queue_init "" HAVE_LIBAIO)
ADD_DEFINITIONS("-DUNIV_LINUX -D_GNU_SOURCE=1")
IF(HAVE_LIBAIO_H AND HAVE_LIBAIO)
ADD_DEFINITIONS(-DLINUX_NATIVE_AIO=1)
LINK_LIBRARIES(aio)
ENDIF()
ELSEIF(CMAKE_SYSTEM_NAME MATCHES "HP*")
ADD_DEFINITIONS("-DUNIV_HPUX -DUNIV_MUST_NOT_INLINE")
ELSEIF(CMAKE_SYSTEM_NAME STREQUAL "AIX")
ADD_DEFINITIONS("-DUNIV_AIX -DUNIX_MUST_NOT_INLINE")
ELSEIF(CMAKE_SYSTEM_NAME STREQUAL "SunOS")
ADD_DEFINITIONS("-DUNIV_SOLARIS")
ELSE()
ADD_DEFINITIONS("-DUNIV_MUST_NOT_INLINE")
ENDIF()
ENDIF()
@ -230,8 +238,12 @@ SET(INNOBASE_SOURCES btr/btr0btr.c btr/btr0cur.c btr/btr0pcur.c btr/btr0sea.c
trx/trx0i_s.c trx/trx0purge.c trx/trx0rec.c trx/trx0roll.c trx/trx0rseg.c
trx/trx0sys.c trx/trx0trx.c trx/trx0undo.c
usr/usr0sess.c
ut/ut0byte.c ut/ut0dbg.c ut/ut0mem.c ut/ut0rnd.c ut/ut0ut.c ut/ut0vec.c
ut/ut0list.c ut/ut0wqueue.c)
ut/ut0byte.c ut/ut0dbg.c ut/ut0list.c ut/ut0mem.c ut/ut0rbt.c ut/ut0rnd.c
ut/ut0ut.c ut/ut0vec.c ut/ut0wqueue.c)
# Windows atomics do not perform well. Disable Windows atomics by default.
# See bug#52102 for details.
#ADD_DEFINITIONS(-DHAVE_WINDOWS_ATOMICS -DINNODB_RW_LOCKS_USE_ATOMICS -DHAVE_IB_PAUSE_INSTRUCTION)
ADD_DEFINITIONS(-DHAVE_IB_PAUSE_INSTRUCTION)
IF(WITH_INNODB)
# Legacy option

View file

@ -1,3 +1,200 @@
2010-03-31 The InnoDB Team
* mysql-test/innodb_bug51920.test, mysql-test/innodb_bug51920.result,
srv/srv0srv.c:
Fix Bug#51920 InnoDB connections in row lock wait ignore KILL
until lock wait timeout
2010-03-31 The InnoDB Team
* mysql-test/innodb_bug38231.test:
Remove non-determinism in the test case.
2010-03-18 The InnoDB Team
* CMakeLists.txt:
Fix Bug#52102 InnoDB Plugin shows performance drop compared to
InnoDB (Windows)
2010-03-18 The InnoDB Team
* buf0buf.ic:
When comparing the time of the first access to a block against
innodb_old_blocks_time, use 32-bit arithmetics. The comparison was
incorrect on 64-bit systems.
2010-03-11 The InnoDB Team
* buf0buf.h, buf0buf.ic:
Fix and clarify the latching of some buf_block_t members.
Note that check_index_page_at_flush is not protected by any mutex.
Note and assert that lock_hash_val is protected by the rw-latch.
2010-03-10 The InnoDB Team
* trx/trx0sys.c:
Fix Bug#51653 outdated reference to set-variable
2010-03-10 The InnoDB Team
* handler/ha_innodb.cc, mysql-test/innodb_bug21704.result,
mysql-test/innodb_bug47621.result, mysql-test/innodb_bug47621.test:
Fix Bug#47621 MySQL and InnoDB data dictionaries will become out of
sync when renaming columns
2010-03-10 The InnoDB Team
* handler/ha_innodb.cc:
Fix Bug#51356 Many Valgrind errors in error messages
with concurrent DDL
2010-03-10 The InnoDB Team
* handler/ha_innodb.cc, handler/handler0alter.cc,
mysql-test/innodb_bug51378.result, mysql-test/innodb_bug51378.test:
Fix Bug#51378 Init 'ref_length' to correct value, in case an out
of bound MySQL primary_key
2010-03-10 The InnoDB Team
* log/log0recv.c:
Remove a bogus assertion about page numbers exceeding 0x90000000
in the redo log. Abort when encountering a corrupted redo log
record, unless innodb_force_recovery is set.
2010-03-09 The InnoDB Team
* handler/ha_innodb.cc:
Make SHOW ENGINE INNODB MUTEX STATUS display SUM(os_waits)
for the buffer pool block mutexes and locks.
2010-03-08 The InnoDB Team
* fil/fil0fil.c:
Fix ALTER TABLE ... IMPORT TABLESPACE of compressed tables.
2010-03-03 The InnoDB Team
* handler/handler0alter.cc, innodb-index.result, innodb-index.test,
innodb.result, innodb.test:
Disallow a duplicate index name when creating an index.
2010-02-11 The InnoDB Team
* include/mem0mem.h, include/mem0mem.ic, mem/mem0mem.c:
Fix Bug#49535 Available memory check slows down crash
recovery tens of times
2010-02-09 The InnoDB Team
* buf/buf0buf.c:
Fix Bug#38901 InnoDB logs error repeatedly when trying to load
page into buffer pool
2010-02-09 The InnoDB Team
* srv/srv0srv.c:
Let the master thread sleep if the amount of work to be done is
calibrated as taking less than a second.
2010-02-04 The InnoDB Team
* btr/btr0btr.c, btr/btr0cur.c, btr/btr0pcur.c, buf/buf0buf.c,
include/btr0btr.h, include/btr0cur.h, include/btr0pcur.h,
include/btr0pcur.ic, include/buf0buf.h, row/row0ins.c, row/row0sel.c:
Pass the file name and line number of the caller of the
b-tree cursor functions to the buffer pool requests, in order
to make the latch diagnostics more accurate.
2010-02-03 The InnoDB Team
* lock/lock0lock.c:
Fix Bug#49001 SHOW INNODB STATUS deadlock info incorrect
when deadlock detection aborts
2010-02-03 The InnoDB Team
* buf/buf0lru.c:
Fix Bug#35077 Very slow DROP TABLE (ALTER TABLE, OPTIMIZE TABLE)
on compressed tables
2010-02-03 The InnoDB Team
* handler/ha_innodb.cc, include/row0mysql.h, row/row0mysql.c:
Clean up CHECK TABLE error handling.
2010-02-01 The InnoDB Team
* handler/ha_innodb.cc, mysql-test/innodb-autoinc.test,
mysql-test/innodb-autoinc.result,
mysql-test/innodb-autoinc-44030.test,
mysql-test/innodb-autoinc-44030.result:
Fix Bug#49497 Error 1467 (ER_AUTOINC_READ_FAILED) on inserting
a negative value
2010-01-27 The InnoDB Team
* include/row0mysql.h, log/log0recv.c, row/row0mysql.c:
Drop temporary tables at startup.
This addresses the third aspect of
Bug#41609 Crash recovery does not work for InnoDB temporary tables.
2010-01-21 The InnoDB Team
* buf/buf0buf.c:
Do not merge buffered inserts to compressed pages before
the redo log has been applied in crash recovery.
2010-01-13 The InnoDB Team
* row/row0sel.c:
On the READ UNCOMMITTED isolation level, do not attempt to access
a clustered index record that has been marked for deletion. The
built-in InnoDB in MySQL 5.1 and earlier would attempt to retrieve
a previous version of the record in this case.
2010-01-13 The InnoDB Team
* buf/buf0buf.c:
When disabling the adaptive hash index, check the block state
before checking block->is_hashed, because the latter may be
uninitialized right after server startup.
2010-01-12 The InnoDB Team
* handler/ha_innodb.cc, handler/ha_innodb.h:
Fix Bug#46193 crash when accessing tables after enabling
innodb_force_recovery option
2010-01-12 The InnoDB Team
* row/row0mysql.c:
Fix Bug#49238 Creating/Dropping a temporary table while at 1023
transactions will cause assert.
2009-12-02 The InnoDB Team
* srv/srv0start.c:
Display the zlib version number at startup.
InnoDB compressed tables use zlib, and the implementation depends
on the zlib function compressBound(), whose definition was slightly
changed in zlib version 1.2.3.1 in 2006. MySQL bundles zlib 1.2.3
from 2005, but some installations use a more recent zlib.
2009-11-30 The InnoDB Team
* dict/dict0crea.c, dict/dict0mem.c, dict/dict0load.c,
dict/dict0boot.c, fil/fil0fil.c, handler/ha_innodb.cc,
include/dict0mem.h, row/row0mysql.c:
Fix the bogus warning messages for non-existing temporary
tables that were reported in
Bug#41609 Crash recovery does not work for InnoDB temporary tables.
The actual crash recovery bug was corrected on 2009-04-29.
2009-11-27 The InnoDB Team
InnoDB Plugin 1.0.6 released
2009-11-20 The InnoDB Team
* handler/ha_innodb.cc:
@ -79,8 +276,8 @@
sync/sync0arr.c, sync/sync0sync.c, thr/thr0loc.c, trx/trx0i_s.c,
trx/trx0purge.c, trx/trx0rseg.c, trx/trx0sys.c, trx/trx0undo.c,
usr/usr0sess.c, ut/ut0mem.c:
Fix Bug #45992 innodb memory not freed after shutdown
Fix Bug #46656 InnoDB plugin: memory leaks (Valgrind)
Fix Bug#45992 innodb memory not freed after shutdown
Fix Bug#46656 InnoDB plugin: memory leaks (Valgrind)
2009-10-29 The InnoDB Team
@ -422,7 +619,7 @@
* dict/dict0dict.c:
When an index column cannot be found in the table during index
creation, display additional diagnostic before an assertion failure.
This does NOT fix Bug #44571 InnoDB Plugin crashes on ADD INDEX,
This does NOT fix Bug#44571 InnoDB Plugin crashes on ADD INDEX,
but it helps understand the reason of the crash.
2009-06-17 The InnoDB Team
@ -535,6 +732,12 @@
Fix Bug#44320 InnoDB: missing DB_ROLL_PTR in Table Monitor COLUMNS
output
2009-04-29 The InnoDB Team
* fil/fil0fil.c, include/fil0fil.h, include/mtr0mtr.h,
log/log0recv.c:
Fix Bug#41609 Crash recovery does not work for InnoDB temporary tables
2009-04-23 The InnoDB Team
* row/row0mysql.c:

View file

@ -28,7 +28,6 @@ INCLUDES= -I$(top_srcdir)/include -I$(top_builddir)/include \
DEFS= @DEFS@
noinst_HEADERS= \
handler/ha_innodb.h \
handler/i_s.h \
@ -118,6 +117,7 @@ noinst_HEADERS= \
include/mtr0types.h \
include/mysql_addons.h \
include/os0file.h \
include/os0file.ic \
include/os0proc.h \
include/os0proc.ic \
include/os0sync.h \
@ -217,6 +217,7 @@ noinst_HEADERS= \
include/ut0lst.h \
include/ut0mem.h \
include/ut0mem.ic \
include/ut0rbt.h \
include/ut0rnd.h \
include/ut0rnd.ic \
include/ut0sort.h \
@ -318,6 +319,7 @@ libinnobase_a_SOURCES= \
ut/ut0dbg.c \
ut/ut0list.c \
ut/ut0mem.c \
ut/ut0rbt.c \
ut/ut0rnd.c \
ut/ut0ut.c \
ut/ut0vec.c \

View file

@ -1,6 +1,6 @@
/*****************************************************************************
Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
Copyright (c) 1994, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@ -592,15 +592,18 @@ an x-latch on the tree.
@return rec_get_offsets() of the node pointer record */
static
ulint*
btr_page_get_father_node_ptr(
/*=========================*/
btr_page_get_father_node_ptr_func(
/*==============================*/
ulint* offsets,/*!< in: work area for the return value */
mem_heap_t* heap, /*!< in: memory heap to use */
btr_cur_t* cursor, /*!< in: cursor pointing to user record,
out: cursor on node pointer record,
its page x-latched */
const char* file, /*!< in: file name */
ulint line, /*!< in: line where called */
mtr_t* mtr) /*!< in: mtr */
{
page_t* page;
dtuple_t* tuple;
rec_t* user_rec;
rec_t* node_ptr;
@ -617,12 +620,15 @@ btr_page_get_father_node_ptr(
ut_ad(dict_index_get_page(index) != page_no);
level = btr_page_get_level(btr_cur_get_page(cursor), mtr);
page = btr_cur_get_page(cursor);
user_rec = btr_cur_get_rec(cursor);
ut_a(page_rec_is_user_rec(user_rec));
tuple = dict_index_build_node_ptr(index, user_rec, 0, heap, level);
btr_cur_search_to_nth_level(index, level + 1, tuple, PAGE_CUR_LE,
BTR_CONT_MODIFY_TREE, cursor, 0, mtr);
BTR_CONT_MODIFY_TREE, cursor, 0,
file, line, mtr);
node_ptr = btr_cur_get_rec(cursor);
ut_ad(!page_rec_is_comp(node_ptr)
@ -670,6 +676,9 @@ btr_page_get_father_node_ptr(
return(offsets);
}
#define btr_page_get_father_node_ptr(of,heap,cur,mtr) \
btr_page_get_father_node_ptr_func(of,heap,cur,__FILE__,__LINE__,mtr)
/************************************************************//**
Returns the upper level node pointer to a page. It is assumed that mtr holds
an x-latch on the tree.
@ -1662,11 +1671,13 @@ Inserts a data tuple to a tree on a non-leaf level. It is assumed
that mtr holds an x-latch on the tree. */
UNIV_INTERN
void
btr_insert_on_non_leaf_level(
/*=========================*/
btr_insert_on_non_leaf_level_func(
/*==============================*/
dict_index_t* index, /*!< in: index */
ulint level, /*!< in: level, must be > 0 */
dtuple_t* tuple, /*!< in: the record to be inserted */
const char* file, /*!< in: file name */
ulint line, /*!< in: line where called */
mtr_t* mtr) /*!< in: mtr */
{
big_rec_t* dummy_big_rec;
@ -1678,7 +1689,7 @@ btr_insert_on_non_leaf_level(
btr_cur_search_to_nth_level(index, level, tuple, PAGE_CUR_LE,
BTR_CONT_MODIFY_TREE,
&cursor, 0, mtr);
&cursor, 0, file, line, mtr);
err = btr_cur_pessimistic_insert(BTR_NO_LOCKING_FLAG
| BTR_KEEP_SYS_FLAG

View file

@ -1,6 +1,6 @@
/*****************************************************************************
Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
Copyright (c) 1994, 2010, Innobase Oy. All Rights Reserved.
Copyright (c) 2008, Google Inc.
Portions of this file contain modifications contributed and copyrighted by
@ -57,6 +57,8 @@ Created 10/16/1994 Heikki Tuuri
#include "buf0lru.h"
#include "btr0btr.h"
#include "btr0sea.h"
#include "row0purge.h"
#include "row0upd.h"
#include "trx0rec.h"
#include "trx0roll.h" /* trx_is_recv() */
#include "que0que.h"
@ -66,6 +68,15 @@ Created 10/16/1994 Heikki Tuuri
#include "lock0lock.h"
#include "zlib.h"
/** Buffered B-tree operation types, introduced as part of delete buffering. */
typedef enum btr_op_enum {
BTR_NO_OP = 0, /*!< Not buffered */
BTR_INSERT_OP, /*!< Insert, do not ignore UNIQUE */
BTR_INSERT_IGNORE_UNIQUE_OP, /*!< Insert, ignoring UNIQUE */
BTR_DELETE_OP, /*!< Purge a delete-marked record */
BTR_DELMARK_OP /*!< Mark a record for deletion */
} btr_op_t;
#ifdef UNIV_DEBUG
/** If the following is set to TRUE, this module prints a lot of
trace information of individual record operations */
@ -328,7 +339,8 @@ btr_cur_search_to_nth_level(
Inserts should always be made using
PAGE_CUR_LE to search the position! */
ulint latch_mode, /*!< in: BTR_SEARCH_LEAF, ..., ORed with
BTR_INSERT and BTR_ESTIMATE;
at most one of BTR_INSERT, BTR_DELETE_MARK,
BTR_DELETE, or BTR_ESTIMATE;
cursor->left_block is used to store a pointer
to the left neighbor page, in the cases
BTR_SEARCH_PREV and BTR_MODIFY_PREV;
@ -342,25 +354,30 @@ btr_cur_search_to_nth_level(
ulint has_search_latch,/*!< in: info on the latch mode the
caller currently has on btr_search_latch:
RW_S_LATCH, or 0 */
const char* file, /*!< in: file name */
ulint line, /*!< in: line where called */
mtr_t* mtr) /*!< in: mtr */
{
page_cur_t* page_cursor;
page_t* page;
buf_block_t* guess;
rec_t* node_ptr;
ulint page_no;
buf_block_t* block;
ulint space;
buf_block_t* guess;
ulint height;
ulint page_no;
ulint up_match;
ulint up_bytes;
ulint low_match;
ulint low_bytes;
ulint height;
ulint savepoint;
ulint rw_latch;
ulint page_mode;
ulint insert_planned;
ulint buf_mode;
ulint estimate;
ulint ignore_sec_unique;
ulint zip_size;
page_cur_t* page_cursor;
btr_op_t btr_op;
ulint root_height = 0; /* remove warning */
#ifdef BTR_CUR_ADAPT
btr_search_t* info;
#endif
@ -380,17 +397,53 @@ btr_cur_search_to_nth_level(
cursor->up_match = ULINT_UNDEFINED;
cursor->low_match = ULINT_UNDEFINED;
#endif
insert_planned = latch_mode & BTR_INSERT;
estimate = latch_mode & BTR_ESTIMATE;
ignore_sec_unique = latch_mode & BTR_IGNORE_SEC_UNIQUE;
latch_mode = latch_mode & ~(BTR_INSERT | BTR_ESTIMATE
| BTR_IGNORE_SEC_UNIQUE);
ut_ad(!insert_planned || (mode == PAGE_CUR_LE));
/* These flags are mutually exclusive, they are lumped together
with the latch mode for historical reasons. It's possible for
none of the flags to be set. */
switch (UNIV_EXPECT(latch_mode
& (BTR_INSERT | BTR_DELETE | BTR_DELETE_MARK),
0)) {
case 0:
btr_op = BTR_NO_OP;
break;
case BTR_INSERT:
btr_op = (latch_mode & BTR_IGNORE_SEC_UNIQUE)
? BTR_INSERT_IGNORE_UNIQUE_OP
: BTR_INSERT_OP;
break;
case BTR_DELETE:
btr_op = BTR_DELETE_OP;
ut_a(cursor->purge_node);
break;
case BTR_DELETE_MARK:
btr_op = BTR_DELMARK_OP;
break;
default:
/* only one of BTR_INSERT, BTR_DELETE, BTR_DELETE_MARK
should be specified at a time */
ut_error;
}
/* Operations on the insert buffer tree cannot be buffered. */
ut_ad(btr_op == BTR_NO_OP || !dict_index_is_ibuf(index));
/* Operations on the clustered index cannot be buffered. */
ut_ad(btr_op == BTR_NO_OP || !dict_index_is_clust(index));
estimate = latch_mode & BTR_ESTIMATE;
/* Turn the flags unrelated to the latch mode off. */
latch_mode &= ~(BTR_INSERT
| BTR_DELETE_MARK
| BTR_DELETE
| BTR_ESTIMATE
| BTR_IGNORE_SEC_UNIQUE);
cursor->flag = BTR_CUR_BINARY;
cursor->index = index;
cursor->ibuf_cnt = ULINT_UNDEFINED;
#ifndef BTR_CUR_ADAPT
guess = NULL;
#else
@ -404,7 +457,8 @@ btr_cur_search_to_nth_level(
info->n_searches++;
#endif
if (rw_lock_get_writer(&btr_search_latch) == RW_LOCK_NOT_LOCKED
&& latch_mode <= BTR_MODIFY_LEAF && info->last_hash_succ
&& latch_mode <= BTR_MODIFY_LEAF
&& info->last_hash_succ
&& !estimate
#ifdef PAGE_CUR_LE_OR_EXTENDS
&& mode != PAGE_CUR_LE_OR_EXTENDS
@ -493,156 +547,219 @@ btr_cur_search_to_nth_level(
/* Loop and search until we arrive at the desired level */
for (;;) {
ulint zip_size;
buf_block_t* block;
ulint rw_latch;
ulint buf_mode;
search_loop:
buf_mode = BUF_GET;
rw_latch = RW_NO_LATCH;
zip_size = dict_table_zip_size(index->table);
rw_latch = RW_NO_LATCH;
buf_mode = BUF_GET;
if (height != 0) {
/* We are about to fetch the root or a non-leaf page. */
} else if (latch_mode <= BTR_MODIFY_LEAF) {
rw_latch = latch_mode;
if (height == 0 && latch_mode <= BTR_MODIFY_LEAF) {
if (btr_op != BTR_NO_OP
&& ibuf_should_try(index, btr_op != BTR_INSERT_OP)) {
rw_latch = latch_mode;
/* Try to buffer the operation if the leaf
page is not in the buffer pool. */
if (insert_planned
&& ibuf_should_try(index, ignore_sec_unique)) {
/* Try insert to the insert buffer if the
page is not in the buffer pool */
buf_mode = BUF_GET_IF_IN_POOL;
}
buf_mode = btr_op == BTR_DELETE_OP
? BUF_GET_IF_IN_POOL_OR_WATCH
: BUF_GET_IF_IN_POOL;
}
}
zip_size = dict_table_zip_size(index->table);
retry_page_get:
block = buf_page_get_gen(space, zip_size, page_no,
rw_latch, guess, buf_mode,
__FILE__, __LINE__, mtr);
if (block == NULL) {
/* This must be a search to perform an insert;
try insert to the insert buffer */
block = buf_page_get_gen(
space, zip_size, page_no, rw_latch, guess, buf_mode,
file, line, mtr);
if (block == NULL) {
/* This must be a search to perform an insert/delete
mark/ delete; try using the insert/delete buffer */
ut_ad(height == 0);
ut_ad(cursor->thr);
switch (btr_op) {
case BTR_INSERT_OP:
case BTR_INSERT_IGNORE_UNIQUE_OP:
ut_ad(buf_mode == BUF_GET_IF_IN_POOL);
ut_ad(insert_planned);
ut_ad(cursor->thr);
if (ibuf_insert(tuple, index, space, zip_size,
page_no, cursor->thr)) {
/* Insertion to the insert buffer succeeded */
if (ibuf_insert(IBUF_OP_INSERT, tuple, index,
space, zip_size, page_no,
cursor->thr)) {
cursor->flag = BTR_CUR_INSERT_TO_IBUF;
if (UNIV_LIKELY_NULL(heap)) {
mem_heap_free(heap);
}
goto func_exit;
}
break;
case BTR_DELMARK_OP:
ut_ad(buf_mode == BUF_GET_IF_IN_POOL);
if (ibuf_insert(IBUF_OP_DELETE_MARK, tuple,
index, space, zip_size,
page_no, cursor->thr)) {
cursor->flag = BTR_CUR_DEL_MARK_IBUF;
goto func_exit;
}
/* Insert to the insert buffer did not succeed:
retry page get */
break;
buf_mode = BUF_GET;
case BTR_DELETE_OP:
ut_ad(buf_mode == BUF_GET_IF_IN_POOL_OR_WATCH);
goto retry_page_get;
if (!row_purge_poss_sec(cursor->purge_node,
index, tuple)) {
/* The record cannot be purged yet. */
cursor->flag = BTR_CUR_DELETE_REF;
} else if (ibuf_insert(IBUF_OP_DELETE, tuple,
index, space, zip_size,
page_no,
cursor->thr)) {
/* The purge was buffered. */
cursor->flag = BTR_CUR_DELETE_IBUF;
} else {
/* The purge could not be buffered. */
buf_pool_watch_unset(space, page_no);
break;
}
buf_pool_watch_unset(space, page_no);
goto func_exit;
default:
ut_error;
}
page = buf_block_get_frame(block);
/* Insert to the insert/delete buffer did not succeed, we
must read the page from disk. */
block->check_index_page_at_flush = TRUE;
buf_mode = BUF_GET;
if (rw_latch != RW_NO_LATCH) {
goto retry_page_get;
}
block->check_index_page_at_flush = TRUE;
page = buf_block_get_frame(block);
if (rw_latch != RW_NO_LATCH) {
#ifdef UNIV_ZIP_DEBUG
const page_zip_des_t* page_zip
= buf_block_get_page_zip(block);
ut_a(!page_zip || page_zip_validate(page_zip, page));
const page_zip_des_t* page_zip
= buf_block_get_page_zip(block);
ut_a(!page_zip || page_zip_validate(page_zip, page));
#endif /* UNIV_ZIP_DEBUG */
buf_block_dbg_add_level(block, SYNC_TREE_NODE);
}
buf_block_dbg_add_level(block, SYNC_TREE_NODE);
}
ut_ad(0 == ut_dulint_cmp(index->id,
btr_page_get_index_id(page)));
ut_ad(0 == ut_dulint_cmp(index->id, btr_page_get_index_id(page)));
if (UNIV_UNLIKELY(height == ULINT_UNDEFINED)) {
/* We are in the root node */
if (UNIV_UNLIKELY(height == ULINT_UNDEFINED)) {
/* We are in the root node */
height = btr_page_get_level(page, mtr);
root_height = height;
cursor->tree_height = root_height + 1;
height = btr_page_get_level(page, mtr);
root_height = height;
cursor->tree_height = root_height + 1;
#ifdef BTR_CUR_ADAPT
if (block != guess) {
info->root_guess = block;
}
if (block != guess) {
info->root_guess = block;
}
#endif
}
if (height == 0) {
if (rw_latch == RW_NO_LATCH) {
btr_cur_latch_leaves(
page, space, zip_size, page_no, latch_mode,
cursor, mtr);
}
if (height == 0) {
if (rw_latch == RW_NO_LATCH) {
if (latch_mode != BTR_MODIFY_TREE
&& latch_mode != BTR_CONT_MODIFY_TREE) {
btr_cur_latch_leaves(page, space, zip_size,
page_no, latch_mode,
cursor, mtr);
}
/* Release the tree s-latch */
if ((latch_mode != BTR_MODIFY_TREE)
&& (latch_mode != BTR_CONT_MODIFY_TREE)) {
/* Release the tree s-latch */
mtr_release_s_latch_at_savepoint(
mtr, savepoint,
dict_index_get_lock(index));
}
page_mode = mode;
mtr_release_s_latch_at_savepoint(
mtr, savepoint, dict_index_get_lock(index));
}
page_cur_search_with_match(block, index, tuple, page_mode,
&up_match, &up_bytes,
&low_match, &low_bytes,
page_cursor);
page_mode = mode;
}
if (estimate) {
btr_cur_add_path_info(cursor, height, root_height);
}
page_cur_search_with_match(
block, index, tuple, page_mode, &up_match, &up_bytes,
&low_match, &low_bytes, page_cursor);
/* If this is the desired level, leave the loop */
if (estimate) {
btr_cur_add_path_info(cursor, height, root_height);
}
ut_ad(height == btr_page_get_level(
page_cur_get_page(page_cursor), mtr));
/* If this is the desired level, leave the loop */
if (level == height) {
ut_ad(height == btr_page_get_level(page_cur_get_page(page_cursor),
mtr));
if (level > 0) {
/* x-latch the page */
page = btr_page_get(space, zip_size,
page_no, RW_X_LATCH, mtr);
ut_a((ibool)!!page_is_comp(page)
== dict_table_is_comp(index->table));
}
break;
}
if (level != height) {
const rec_t* node_ptr;
ut_ad(height > 0);
height--;
guess = NULL;
node_ptr = page_cur_get_rec(page_cursor);
offsets = rec_get_offsets(node_ptr, cursor->index, offsets,
ULINT_UNDEFINED, &heap);
offsets = rec_get_offsets(
node_ptr, index, offsets, ULINT_UNDEFINED, &heap);
/* Go to the child node */
page_no = btr_node_ptr_get_child_page_no(node_ptr, offsets);
if (UNIV_UNLIKELY(height == 0 && dict_index_is_ibuf(index))) {
/* We're doing a search on an ibuf tree and we're one
level above the leaf page. */
ulint is_min_rec;
ut_ad(level == 0);
is_min_rec = rec_get_info_bits(node_ptr, 0)
& REC_INFO_MIN_REC_FLAG;
if (!is_min_rec) {
cursor->ibuf_cnt
= ibuf_rec_get_counter(node_ptr);
ut_a(cursor->ibuf_cnt <= 0xFFFF
|| cursor->ibuf_cnt == ULINT_UNDEFINED);
}
buf_mode = BUF_GET;
rw_latch = RW_NO_LATCH;
goto retry_page_get;
}
goto search_loop;
}
if (UNIV_LIKELY_NULL(heap)) {
mem_heap_free(heap);
}
if (level != 0) {
/* x-latch the page */
page = btr_page_get(
space, zip_size, page_no, RW_X_LATCH, mtr);
if (level == 0) {
ut_a((ibool)!!page_is_comp(page)
== dict_table_is_comp(index->table));
} else {
cursor->low_match = low_match;
cursor->low_bytes = low_bytes;
cursor->up_match = up_match;
@ -667,6 +784,11 @@ retry_page_get:
}
func_exit:
if (UNIV_LIKELY_NULL(heap)) {
mem_heap_free(heap);
}
if (has_search_latch) {
rw_lock_s_lock(&btr_search_latch);
@ -677,13 +799,15 @@ func_exit:
Opens a cursor at either end of an index. */
UNIV_INTERN
void
btr_cur_open_at_index_side(
/*=======================*/
btr_cur_open_at_index_side_func(
/*============================*/
ibool from_left, /*!< in: TRUE if open to the low end,
FALSE if to the high end */
dict_index_t* index, /*!< in: index */
ulint latch_mode, /*!< in: latch mode */
btr_cur_t* cursor, /*!< in: cursor */
const char* file, /*!< in: file name */
ulint line, /*!< in: line where called */
mtr_t* mtr) /*!< in: mtr */
{
page_cur_t* page_cursor;
@ -728,7 +852,7 @@ btr_cur_open_at_index_side(
page_t* page;
block = buf_page_get_gen(space, zip_size, page_no,
RW_NO_LATCH, NULL, BUF_GET,
__FILE__, __LINE__, mtr);
file, line, mtr);
page = buf_block_get_frame(block);
ut_ad(0 == ut_dulint_cmp(index->id,
btr_page_get_index_id(page)));
@ -808,11 +932,13 @@ btr_cur_open_at_index_side(
Positions a cursor at a randomly chosen position within a B-tree. */
UNIV_INTERN
void
btr_cur_open_at_rnd_pos(
/*====================*/
btr_cur_open_at_rnd_pos_func(
/*=========================*/
dict_index_t* index, /*!< in: index */
ulint latch_mode, /*!< in: BTR_SEARCH_LEAF, ... */
btr_cur_t* cursor, /*!< in/out: B-tree cursor */
const char* file, /*!< in: file name */
ulint line, /*!< in: line where called */
mtr_t* mtr) /*!< in: mtr */
{
page_cur_t* page_cursor;
@ -847,7 +973,7 @@ btr_cur_open_at_rnd_pos(
block = buf_page_get_gen(space, zip_size, page_no,
RW_NO_LATCH, NULL, BUF_GET,
__FILE__, __LINE__, mtr);
file, line, mtr);
page = buf_block_get_frame(block);
ut_ad(0 == ut_dulint_cmp(index->id,
btr_page_get_index_id(page)));
@ -2729,25 +2855,26 @@ btr_cur_del_mark_set_sec_rec(
}
/***********************************************************//**
Clear a secondary index record's delete mark. This function is only
used by the insert buffer insert merge mechanism. */
Sets a secondary index record's delete mark to the given value. This
function is only used by the insert buffer merge mechanism. */
UNIV_INTERN
void
btr_cur_del_unmark_for_ibuf(
/*========================*/
rec_t* rec, /*!< in/out: record to delete unmark */
btr_cur_set_deleted_flag_for_ibuf(
/*==============================*/
rec_t* rec, /*!< in/out: record */
page_zip_des_t* page_zip, /*!< in/out: compressed page
corresponding to rec, or NULL
when the tablespace is
uncompressed */
ibool val, /*!< in: value to set */
mtr_t* mtr) /*!< in: mtr */
{
/* We do not need to reserve btr_search_latch, as the page has just
been read to the buffer pool and there cannot be a hash index to it. */
btr_rec_set_deleted_flag(rec, page_zip, FALSE);
btr_rec_set_deleted_flag(rec, page_zip, val);
btr_cur_del_mark_set_sec_rec_log(rec, FALSE, mtr);
btr_cur_del_mark_set_sec_rec_log(rec, val, mtr);
}
/*==================== B-TREE RECORD REMOVE =========================*/
@ -3100,7 +3227,8 @@ btr_estimate_n_rows_in_range(
btr_cur_search_to_nth_level(index, 0, tuple1, mode1,
BTR_SEARCH_LEAF | BTR_ESTIMATE,
&cursor, 0, &mtr);
&cursor, 0,
__FILE__, __LINE__, &mtr);
} else {
btr_cur_open_at_index_side(TRUE, index,
BTR_SEARCH_LEAF | BTR_ESTIMATE,
@ -3117,7 +3245,8 @@ btr_estimate_n_rows_in_range(
btr_cur_search_to_nth_level(index, 0, tuple2, mode2,
BTR_SEARCH_LEAF | BTR_ESTIMATE,
&cursor, 0, &mtr);
&cursor, 0,
__FILE__, __LINE__, &mtr);
} else {
btr_cur_open_at_index_side(FALSE, index,
BTR_SEARCH_LEAF | BTR_ESTIMATE,
@ -4252,7 +4381,7 @@ btr_free_externally_stored_field(
/* In the rollback of uncommitted transactions, we may
encounter a clustered index record whose BLOBs have
not been written. There is nothing to free then. */
ut_a(rb_ctx == RB_RECOVERY);
ut_a(rb_ctx == RB_RECOVERY || rb_ctx == RB_RECOVERY_PURGE_REC);
return;
}
@ -4298,7 +4427,7 @@ btr_free_externally_stored_field(
|| (mach_read_from_1(field_ref + BTR_EXTERN_LEN)
& BTR_EXTERN_OWNER_FLAG)
/* Rollback and inherited field */
|| (rb_ctx != RB_NONE
|| ((rb_ctx == RB_NORMAL || rb_ctx == RB_RECOVERY)
&& (mach_read_from_1(field_ref + BTR_EXTERN_LEN)
& BTR_EXTERN_INHERITED_FLAG))) {

View file

@ -1,6 +1,6 @@
/*****************************************************************************
Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@ -205,10 +205,12 @@ record and it can be restored on a user record whose ordering fields
are identical to the ones of the original user record */
UNIV_INTERN
ibool
btr_pcur_restore_position(
/*======================*/
btr_pcur_restore_position_func(
/*===========================*/
ulint latch_mode, /*!< in: BTR_SEARCH_LEAF, ... */
btr_pcur_t* cursor, /*!< in: detached persistent cursor */
const char* file, /*!< in: file name */
ulint line, /*!< in: line where called */
mtr_t* mtr) /*!< in: mtr */
{
dict_index_t* index;
@ -217,6 +219,9 @@ btr_pcur_restore_position(
ulint old_mode;
mem_heap_t* heap;
ut_ad(mtr);
ut_ad(mtr->state == MTR_ACTIVE);
index = btr_cur_get_index(btr_pcur_get_btr_cur(cursor));
if (UNIV_UNLIKELY(cursor->old_stored != BTR_PCUR_OLD_STORED)
@ -257,7 +262,8 @@ btr_pcur_restore_position(
if (UNIV_LIKELY(buf_page_optimistic_get(
latch_mode,
cursor->block_when_stored,
cursor->modify_clock, mtr))) {
cursor->modify_clock,
file, line, mtr))) {
cursor->pos_state = BTR_PCUR_IS_POSITIONED;
buf_block_dbg_add_level(btr_pcur_get_block(cursor),
@ -312,8 +318,8 @@ btr_pcur_restore_position(
mode = PAGE_CUR_L;
}
btr_pcur_open_with_no_init(index, tuple, mode, latch_mode,
cursor, 0, mtr);
btr_pcur_open_with_no_init_func(index, tuple, mode, latch_mode,
cursor, 0, file, line, mtr);
/* Restore the old search mode */
cursor->search_mode = old_mode;
@ -553,8 +559,8 @@ before first in tree. The latching mode must be BTR_SEARCH_LEAF or
BTR_MODIFY_LEAF. */
UNIV_INTERN
void
btr_pcur_open_on_user_rec(
/*======================*/
btr_pcur_open_on_user_rec_func(
/*===========================*/
dict_index_t* index, /*!< in: index */
const dtuple_t* tuple, /*!< in: tuple on which search done */
ulint mode, /*!< in: PAGE_CUR_L, ... */
@ -562,9 +568,12 @@ btr_pcur_open_on_user_rec(
BTR_MODIFY_LEAF */
btr_pcur_t* cursor, /*!< in: memory buffer for persistent
cursor */
const char* file, /*!< in: file name */
ulint line, /*!< in: line where called */
mtr_t* mtr) /*!< in: mtr */
{
btr_pcur_open(index, tuple, mode, latch_mode, cursor, mtr);
btr_pcur_open_func(index, tuple, mode, latch_mode, cursor,
file, line, mtr);
if ((mode == PAGE_CUR_GE) || (mode == PAGE_CUR_G)) {

View file

@ -50,6 +50,11 @@ UNIV_INTERN char btr_search_enabled = TRUE;
/** Mutex protecting btr_search_enabled */
static mutex_t btr_search_enabled_mutex;
#ifdef UNIV_PFS_MUTEX
/* Key to register btr_search_enabled_mutex with performance schema */
UNIV_INTERN mysql_pfs_key_t btr_search_enabled_mutex_key;
#endif /* UNIV_PFS_MUTEX */
/** A dummy variable to fool the compiler */
UNIV_INTERN ulint btr_search_this_is_zero = 0;
@ -82,6 +87,11 @@ UNIV_INTERN byte btr_sea_pad2[64];
/** The adaptive hash index */
UNIV_INTERN btr_search_sys_t* btr_search_sys;
#ifdef UNIV_PFS_RWLOCK
/* Key to register btr_search_sys with performance schema */
UNIV_INTERN mysql_pfs_key_t btr_search_latch_key;
#endif /* UNIV_PFS_RWLOCK */
/** If the number of records on the page divided by this parameter
would have been successfully accessed using a hash index, the index
is then built on the page, assuming the global limit has been reached */
@ -167,8 +177,10 @@ btr_search_sys_create(
btr_search_latch_temp = mem_alloc(sizeof(rw_lock_t));
rw_lock_create(&btr_search_latch, SYNC_SEARCH_SYS);
mutex_create(&btr_search_enabled_mutex, SYNC_SEARCH_SYS_CONF);
rw_lock_create(btr_search_latch_key, &btr_search_latch,
SYNC_SEARCH_SYS);
mutex_create(btr_search_enabled_mutex_key,
&btr_search_enabled_mutex, SYNC_SEARCH_SYS_CONF);
btr_search_sys = mem_alloc(sizeof(btr_search_sys_t));

View file

@ -1,6 +1,6 @@
/*****************************************************************************
Copyright (c) 2006, 2009, Innobase Oy. All Rights Reserved.
Copyright (c) 2006, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@ -391,6 +391,8 @@ buf_buddy_relocate_block(
UT_LIST_ADD_FIRST(list, buf_pool->zip_clean, dpage);
}
UNIV_MEM_INVALID(bpage, sizeof *bpage);
mutex_exit(&buf_pool_zip_mutex);
return(TRUE);
}
@ -455,6 +457,8 @@ buf_buddy_relocate(
return(FALSE);
}
ut_ad(!buf_pool_watch_is(bpage));
if (page_zip_get_size(&bpage->zip) != size) {
/* The block is of different size. We would
have to relocate all blocks covered by src.

View file

@ -1,6 +1,6 @@
/*****************************************************************************
Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved.
Copyright (c) 2008, Google Inc.
Portions of this file contain modifications contributed and copyrighted by
@ -153,12 +153,12 @@ list. We also keep a pointer to near the end of the LRU list,
which we can use when we want to artificially age a page in the
buf_pool. This is used if we know that some page is not needed
again for some time: we insert the block right after the pointer,
causing it to be replaced sooner than would noramlly be the case.
causing it to be replaced sooner than would normally be the case.
Currently this aging mechanism is used for read-ahead mechanism
of pages, and it can also be used when there is a scan of a full
table which cannot fit in the memory. Putting the pages near the
of the LRU list, we make sure that most of the buf_pool stays in the
main memory, undisturbed.
end of the LRU list, we make sure that most of the buf_pool stays
in the main memory, undisturbed.
The unzip_LRU list contains a subset of the common LRU list. The
blocks on the unzip_LRU list hold a compressed file page and the
@ -172,6 +172,7 @@ The chain of modified blocks (buf_pool->flush_list) contains the blocks
holding file pages that have been modified in the memory
but not written to disk yet. The block with the oldest modification
which has not yet been written to disk is at the end of the chain.
The access to this list is protected by flush_list_mutex.
The chain of unmodified compressed blocks (buf_pool->zip_clean)
contains the control blocks (buf_page_t) of those compressed pages
@ -242,6 +243,8 @@ the read requests for the whole area.
#ifndef UNIV_HOTBACKUP
/** Value in microseconds */
static const int WAIT_FOR_READ = 5000;
/** Number of attemtps made to read in a page in the buffer pool */
static const ulint BUF_PAGE_READ_MAX_RETRIES = 100;
/** The buffer buf_pool of the database */
UNIV_INTERN buf_pool_t* buf_pool = NULL;
@ -267,6 +270,41 @@ read-ahead or flush occurs */
UNIV_INTERN ibool buf_debug_prints = FALSE;
#endif /* UNIV_DEBUG */
#ifdef UNIV_PFS_RWLOCK
/* Keys to register buffer block related rwlocks and mutexes with
performance schema */
UNIV_INTERN mysql_pfs_key_t buf_block_lock_key;
# ifdef UNIV_SYNC_DEBUG
UNIV_INTERN mysql_pfs_key_t buf_block_debug_latch_key;
# endif /* UNIV_SYNC_DEBUG */
#endif /* UNIV_PFS_RWLOCK */
#ifdef UNIV_PFS_MUTEX
UNIV_INTERN mysql_pfs_key_t buffer_block_mutex_key;
UNIV_INTERN mysql_pfs_key_t buf_pool_mutex_key;
UNIV_INTERN mysql_pfs_key_t buf_pool_zip_mutex_key;
UNIV_INTERN mysql_pfs_key_t flush_list_mutex_key;
UNIV_INTERN mysql_pfs_key_t flush_order_mutex_key;
#endif /* UNIV_PFS_MUTEX */
#if defined UNIV_PFS_MUTEX || defined UNIV_PFS_RWLOCK
# ifndef PFS_SKIP_BUFFER_MUTEX_RWLOCK
/* Buffer block mutexes and rwlocks can be registered
in one group rather than individually. If PFS_GROUP_BUFFER_SYNC
is defined, register buffer block mutex and rwlock
in one group after their initialization. */
# define PFS_GROUP_BUFFER_SYNC
/* This define caps the number of mutexes/rwlocks can
be registered with performance schema. Developers can
modify this define if necessary. Please note, this would
be effective only if PFS_GROUP_BUFFER_SYNC is defined. */
# define PFS_MAX_BUFFER_MUTEX_LOCK_REGISTER ULINT_MAX
# endif /* !PFS_SKIP_BUFFER_MUTEX_RWLOCK */
#endif /* UNIV_PFS_MUTEX || UNIV_PFS_RWLOCK */
/** A chunk of buffers. The buffer pool is allocated in chunks. */
struct buf_chunk_struct{
ulint mem_size; /*!< allocated size of the chunk */
@ -636,6 +674,53 @@ buf_page_print(
}
#ifndef UNIV_HOTBACKUP
# ifdef PFS_GROUP_BUFFER_SYNC
/********************************************************************//**
This function registers mutexes and rwlocks in buffer blocks with
performance schema. If PFS_MAX_BUFFER_MUTEX_LOCK_REGISTER is
defined to be a value less than chunk->size, then only mutexes
and rwlocks in the first PFS_MAX_BUFFER_MUTEX_LOCK_REGISTER
blocks are registered. */
static
void
pfs_register_buffer_block(
/*======================*/
buf_chunk_t* chunk) /*!< in/out: chunk of buffers */
{
ulint i;
ulint num_to_register;
buf_block_t* block;
block = chunk->blocks;
num_to_register = ut_min(chunk->size,
PFS_MAX_BUFFER_MUTEX_LOCK_REGISTER);
for (i = 0; i < num_to_register; i++) {
mutex_t* mutex;
rw_lock_t* rwlock;
# ifdef UNIV_PFS_MUTEX
mutex = &block->mutex;
ut_a(!mutex->pfs_psi);
mutex->pfs_psi = (PSI_server)
? PSI_server->init_mutex(buffer_block_mutex_key, mutex)
: NULL;
# endif /* UNIV_PFS_MUTEX */
# ifdef UNIV_PFS_RWLOCK
rwlock = &block->lock;
ut_a(!rwlock->pfs_psi);
rwlock->pfs_psi = (PSI_server)
? PSI_server->init_rwlock(buf_block_lock_key, rwlock)
: NULL;
# endif /* UNIV_PFS_RWLOCK */
block++;
}
}
# endif /* PFS_GROUP_BUFFER_SYNC */
/********************************************************************//**
Initializes a buffer control block when the buf_pool is created. */
static
@ -675,13 +760,25 @@ buf_block_init(
#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
page_zip_des_init(&block->page.zip);
mutex_create(&block->mutex, SYNC_BUF_BLOCK);
#if defined PFS_SKIP_BUFFER_MUTEX_RWLOCK || defined PFS_GROUP_BUFFER_SYNC
/* If PFS_SKIP_BUFFER_MUTEX_RWLOCK is defined, skip registration
of buffer block mutex/rwlock with performance schema. If
PFS_GROUP_BUFFER_SYNC is defined, skip the registration
since buffer block mutex/rwlock will be registered later in
pfs_register_buffer_block() */
mutex_create(PFS_NOT_INSTRUMENTED, &block->mutex, SYNC_BUF_BLOCK);
rw_lock_create(PFS_NOT_INSTRUMENTED, &block->lock, SYNC_LEVEL_VARYING);
#else /* PFS_SKIP_BUFFER_MUTEX_RWLOCK || PFS_GROUP_BUFFER_SYNC */
mutex_create(buffer_block_mutex_key, &block->mutex, SYNC_BUF_BLOCK);
rw_lock_create(buf_block_lock_key, &block->lock, SYNC_LEVEL_VARYING);
#endif /* PFS_SKIP_BUFFER_MUTEX_RWLOCK || PFS_GROUP_BUFFER_SYNC */
rw_lock_create(&block->lock, SYNC_LEVEL_VARYING);
ut_ad(rw_lock_validate(&(block->lock)));
#ifdef UNIV_SYNC_DEBUG
rw_lock_create(&block->debug_latch, SYNC_NO_ORDER_CHECK);
rw_lock_create(buf_block_debug_latch_key,
&block->debug_latch, SYNC_NO_ORDER_CHECK);
#endif /* UNIV_SYNC_DEBUG */
}
@ -761,6 +858,9 @@ buf_chunk_init(
frame += UNIV_PAGE_SIZE;
}
#ifdef PFS_GROUP_BUFFER_SYNC
pfs_register_buffer_block(chunk);
#endif
return(chunk);
}
@ -952,8 +1052,10 @@ buf_pool_init(void)
/* 1. Initialize general fields
------------------------------- */
mutex_create(&buf_pool_mutex, SYNC_BUF_POOL);
mutex_create(&buf_pool_zip_mutex, SYNC_BUF_BLOCK);
mutex_create(buf_pool_mutex_key,
&buf_pool_mutex, SYNC_BUF_POOL);
mutex_create(buf_pool_zip_mutex_key,
&buf_pool_zip_mutex, SYNC_BUF_BLOCK);
buf_pool_mutex_enter();
@ -981,6 +1083,11 @@ buf_pool_init(void)
/* 2. Initialize flushing fields
-------------------------------- */
mutex_create(flush_list_mutex_key, &buf_pool->flush_list_mutex,
SYNC_BUF_FLUSH_LIST);
mutex_create(flush_order_mutex_key, &buf_pool->flush_order_mutex,
SYNC_BUF_FLUSH_ORDER);
for (i = BUF_FLUSH_LRU; i < BUF_FLUSH_N_TYPES; i++) {
buf_pool->no_flush[i] = os_event_create(NULL);
}
@ -1058,7 +1165,9 @@ buf_pool_drop_hash_index(void)
when we have an x-latch on btr_search_latch;
see the comment in buf0buf.h */
if (!block->is_hashed) {
if (buf_block_get_state(block)
!= BUF_BLOCK_FILE_PAGE
|| !block->is_hashed) {
continue;
}
@ -1132,6 +1241,7 @@ buf_relocate(
ut_ad(!bpage->in_zip_hash);
ut_ad(bpage->in_page_hash);
ut_ad(bpage == buf_page_hash_get(bpage->space, bpage->offset));
ut_ad(!buf_pool_watch_is(bpage));
#ifdef UNIV_DEBUG
switch (buf_page_get_state(bpage)) {
case BUF_BLOCK_ZIP_FREE:
@ -1187,8 +1297,6 @@ buf_relocate(
HASH_DELETE(buf_page_t, hash, buf_pool->page_hash, fold, bpage);
HASH_INSERT(buf_page_t, hash, buf_pool->page_hash, fold, dpage);
UNIV_MEM_INVALID(bpage, sizeof *bpage);
}
/********************************************************************//**
@ -1409,6 +1517,7 @@ buf_pool_page_hash_rebuild(void)
buf_page_address_fold(b->space, b->offset), b);
}
buf_flush_list_mutex_enter();
for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
b = UT_LIST_GET_NEXT(list, b)) {
ut_ad(b->in_flush_list);
@ -1436,6 +1545,7 @@ buf_pool_page_hash_rebuild(void)
}
}
buf_flush_list_mutex_exit();
buf_pool_mutex_exit();
}
@ -1496,6 +1606,199 @@ buf_pool_resize(void)
buf_pool_page_hash_rebuild();
}
/** Maximum number of concurrent buffer pool watches */
#define BUF_POOL_WATCH_SIZE 1
/** Sentinel records for buffer pool watches. Protected by buf_pool_mutex. */
static buf_page_t buf_pool_watch[BUF_POOL_WATCH_SIZE];
/********************************************************************
Determine if a block is a sentinel for a buffer pool watch.
@return TRUE if a sentinel for a buffer pool watch, FALSE if not */
UNIV_INTERN
ibool
buf_pool_watch_is(
/*==============*/
const buf_page_t* bpage) /*!< in: block */
{
ut_ad(buf_page_in_file(bpage));
if (UNIV_LIKELY(bpage < &buf_pool_watch[0]
|| bpage >= &buf_pool_watch[BUF_POOL_WATCH_SIZE])) {
ut_ad(buf_page_get_state(bpage) != BUF_BLOCK_ZIP_PAGE
|| bpage->zip.data != NULL);
return(FALSE);
}
ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_PAGE);
ut_ad(!bpage->in_zip_hash);
ut_ad(bpage->in_page_hash);
ut_ad(bpage->zip.data == NULL);
ut_ad(bpage->buf_fix_count > 0);
return(TRUE);
}
/****************************************************************//**
Add watch for the given page to be read in. Caller must have the buffer pool
mutex reserved.
@return NULL if watch set, block if the page is in the buffer pool */
UNIV_INTERN
buf_page_t*
buf_pool_watch_set(
/*===============*/
ulint space, /*!< in: space id */
ulint offset, /*!< in: page number */
ulint fold) /*!< in: buf_page_address_fold(space, offset) */
{
buf_page_t* bpage;
ulint i;
ut_ad(buf_pool_mutex_own());
bpage = buf_page_hash_get_low(space, offset, fold);
if (UNIV_LIKELY_NULL(bpage)) {
if (!buf_pool_watch_is(bpage)) {
/* The page was loaded meanwhile. */
return(bpage);
}
/* Add to an existing watch. */
bpage->buf_fix_count++;
return(NULL);
}
for (i = 0; i < BUF_POOL_WATCH_SIZE; i++) {
bpage = &buf_pool_watch[i];
ut_ad(bpage->access_time == 0);
ut_ad(bpage->newest_modification == 0);
ut_ad(bpage->oldest_modification == 0);
ut_ad(bpage->zip.data == NULL);
ut_ad(!bpage->in_zip_hash);
switch (bpage->state) {
case BUF_BLOCK_POOL_WATCH:
ut_ad(!bpage->in_page_hash);
ut_ad(bpage->buf_fix_count == 0);
/* bpage is pointing to buf_pool_watch[],
which is protected by buf_pool_mutex.
Normally, buf_page_t objects are protected by
buf_block_t::mutex or buf_pool_zip_mutex or both. */
bpage->state = BUF_BLOCK_ZIP_PAGE;
bpage->space = space;
bpage->offset = offset;
bpage->buf_fix_count = 1;
ut_d(bpage->in_page_hash = TRUE);
HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
fold, bpage);
return(NULL);
case BUF_BLOCK_ZIP_PAGE:
ut_ad(bpage->in_page_hash);
ut_ad(bpage->buf_fix_count > 0);
break;
default:
ut_error;
}
}
/* Allocation failed. Either the maximum number of purge
threads should never exceed BUF_POOL_WATCH_SIZE, or this code
should be modified to return a special non-NULL value and the
caller should purge the record directly. */
ut_error;
/* Fix compiler warning */
return(NULL);
}
/****************************************************************//**
Remove the sentinel block for the watch before replacing it with a real block.
buf_page_watch_clear() or buf_page_watch_occurred() will notice that
the block has been replaced with the real block.
@return reference count, to be added to the replacement block */
static
void
buf_pool_watch_remove(
/*==================*/
ulint fold, /*!< in: buf_page_address_fold(space, offset) */
buf_page_t* watch) /*!< in/out: sentinel for watch */
{
ut_ad(buf_pool_mutex_own());
HASH_DELETE(buf_page_t, hash, buf_pool->page_hash, fold, watch);
ut_d(watch->in_page_hash = FALSE);
watch->buf_fix_count = 0;
watch->state = BUF_BLOCK_POOL_WATCH;
}
/****************************************************************//**
Stop watching if the page has been read in.
buf_pool_watch_set(space,offset) must have returned NULL before. */
UNIV_INTERN
void
buf_pool_watch_unset(
/*=================*/
ulint space, /*!< in: space id */
ulint offset) /*!< in: page number */
{
buf_page_t* bpage;
ulint fold = buf_page_address_fold(space, offset);
buf_pool_mutex_enter();
bpage = buf_page_hash_get_low(space, offset, fold);
/* The page must exist because buf_pool_watch_set()
increments buf_fix_count. */
ut_a(bpage);
if (UNIV_UNLIKELY(!buf_pool_watch_is(bpage))) {
mutex_t* mutex = buf_page_get_mutex(bpage);
mutex_enter(mutex);
ut_a(bpage->buf_fix_count > 0);
bpage->buf_fix_count--;
mutex_exit(mutex);
} else {
ut_a(bpage->buf_fix_count > 0);
if (UNIV_LIKELY(!--bpage->buf_fix_count)) {
buf_pool_watch_remove(fold, bpage);
}
}
buf_pool_mutex_exit();
}
/****************************************************************//**
Check if the page has been read in.
This may only be called after buf_pool_watch_set(space,offset)
has returned NULL and before invoking buf_pool_watch_unset(space,offset).
@return FALSE if the given page was not read in, TRUE if it was */
UNIV_INTERN
ibool
buf_pool_watch_occurred(
/*====================*/
ulint space, /*!< in: space id */
ulint offset) /*!< in: page number */
{
buf_page_t* bpage;
ulint fold = buf_page_address_fold(space, offset);
ibool ret;
buf_pool_mutex_enter();
bpage = buf_page_hash_get_low(space, offset, fold);
/* The page must exist because buf_pool_watch_set()
increments buf_fix_count. */
ut_a(bpage);
ret = !buf_pool_watch_is(bpage);
buf_pool_mutex_exit();
return(ret);
}
/********************************************************************//**
Moves a page to the start of the buffer pool LRU list. This high-level
function can be used to prevent an important page from slipping out of
@ -1562,6 +1865,7 @@ buf_reset_check_index_page_at_flush(
block = (buf_block_t*) buf_page_hash_get(space, offset);
if (block && buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE) {
ut_ad(!buf_pool_watch_is(&block->page));
block->check_index_page_at_flush = FALSE;
}
@ -1590,6 +1894,7 @@ buf_page_peek_if_search_hashed(
if (!block || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
is_hashed = FALSE;
} else {
ut_ad(!buf_pool_watch_is(&block->page));
is_hashed = block->is_hashed;
}
@ -1619,6 +1924,7 @@ buf_page_set_file_page_was_freed(
bpage = buf_page_hash_get(space, offset);
if (bpage) {
ut_ad(!buf_pool_watch_is(bpage));
bpage->file_page_was_freed = TRUE;
}
@ -1647,6 +1953,7 @@ buf_page_reset_file_page_was_freed(
bpage = buf_page_hash_get(space, offset);
if (bpage) {
ut_ad(!buf_pool_watch_is(bpage));
bpage->file_page_was_freed = FALSE;
}
@ -1688,6 +1995,7 @@ buf_page_get_zip(
lookup:
bpage = buf_page_hash_get(space, offset);
if (bpage) {
ut_ad(!buf_pool_watch_is(bpage));
break;
}
@ -1709,6 +2017,8 @@ err_exit:
return(NULL);
}
ut_ad(!buf_pool_watch_is(bpage));
switch (buf_page_get_state(bpage)) {
case BUF_BLOCK_NOT_USED:
case BUF_BLOCK_READY_FOR_USE:
@ -2025,29 +2335,36 @@ buf_page_get_gen(
ulint rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH */
buf_block_t* guess, /*!< in: guessed block or NULL */
ulint mode, /*!< in: BUF_GET, BUF_GET_IF_IN_POOL,
BUF_GET_NO_LATCH */
BUF_GET_NO_LATCH, or
BUF_GET_IF_IN_POOL_OR_WATCH */
const char* file, /*!< in: file name */
ulint line, /*!< in: line where called */
mtr_t* mtr) /*!< in: mini-transaction */
{
buf_block_t* block;
ulint fold;
unsigned access_time;
ulint fix_type;
ibool must_read;
ulint retries = 0;
ut_ad(mtr);
ut_ad(mtr->state == MTR_ACTIVE);
ut_ad((rw_latch == RW_S_LATCH)
|| (rw_latch == RW_X_LATCH)
|| (rw_latch == RW_NO_LATCH));
ut_ad((mode != BUF_GET_NO_LATCH) || (rw_latch == RW_NO_LATCH));
ut_ad((mode == BUF_GET) || (mode == BUF_GET_IF_IN_POOL)
|| (mode == BUF_GET_NO_LATCH));
ut_ad(mode == BUF_GET
|| mode == BUF_GET_IF_IN_POOL
|| mode == BUF_GET_NO_LATCH
|| mode == BUF_GET_IF_IN_POOL_OR_WATCH);
ut_ad(zip_size == fil_space_get_zip_size(space));
ut_ad(ut_is_2pow(zip_size));
#ifndef UNIV_LOG_DEBUG
ut_ad(!ibuf_inside() || ibuf_page(space, zip_size, offset, NULL));
#endif
buf_pool->stat.n_page_gets++;
fold = buf_page_address_fold(space, offset);
loop:
block = guess;
buf_pool_mutex_enter();
@ -2074,21 +2391,59 @@ loop:
}
if (block == NULL) {
block = (buf_block_t*) buf_page_hash_get(space, offset);
block = (buf_block_t*) buf_page_hash_get_low(space, offset,
fold);
}
loop2:
if (block && buf_pool_watch_is(&block->page)) {
block = NULL;
}
if (block == NULL) {
/* Page not in buf_pool: needs to be read from file */
if (mode == BUF_GET_IF_IN_POOL_OR_WATCH) {
block = (buf_block_t*) buf_pool_watch_set(
space, offset, fold);
if (UNIV_LIKELY_NULL(block)) {
goto got_block;
}
}
buf_pool_mutex_exit();
if (mode == BUF_GET_IF_IN_POOL) {
if (mode == BUF_GET_IF_IN_POOL
|| mode == BUF_GET_IF_IN_POOL_OR_WATCH) {
return(NULL);
}
buf_read_page(space, zip_size, offset);
if (buf_read_page(space, zip_size, offset)) {
retries = 0;
} else if (retries < BUF_PAGE_READ_MAX_RETRIES) {
++retries;
} else {
fprintf(stderr, "InnoDB: Error: Unable"
" to read tablespace %lu page no"
" %lu into the buffer pool after"
" %lu attempts\n"
"InnoDB: The most probable cause"
" of this error may be that the"
" table has been corrupted.\n"
"InnoDB: You can try to fix this"
" problem by using"
" innodb_force_recovery.\n"
"InnoDB: Please see reference manual"
" for more details.\n"
"InnoDB: Aborting...\n",
space, offset,
BUF_PAGE_READ_MAX_RETRIES);
ut_error;
}
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
ut_a(++buf_dbg_counter % 37 || buf_validate());
@ -2096,12 +2451,16 @@ loop2:
goto loop;
}
got_block:
ut_ad(page_zip_get_size(&block->page.zip) == zip_size);
must_read = buf_block_get_io_fix(block) == BUF_IO_READ;
if (must_read && mode == BUF_GET_IF_IN_POOL) {
/* The page is only being read to buffer */
/* The page is being read to buffer pool,
but we cannot wait around for the read to
complete. */
buf_pool_mutex_exit();
return(NULL);
@ -2147,7 +2506,7 @@ wait_until_unfixed:
{
buf_page_t* hash_bpage
= buf_page_hash_get(space, offset);
= buf_page_hash_get_low(space, offset, fold);
if (UNIV_UNLIKELY(bpage != hash_bpage)) {
/* The buf_pool->page_hash was modified
@ -2196,22 +2555,8 @@ wait_until_unfixed:
ut_ad(!block->page.in_flush_list);
} else {
/* Relocate buf_pool->flush_list. */
buf_page_t* b;
b = UT_LIST_GET_PREV(list, &block->page);
ut_ad(block->page.in_flush_list);
UT_LIST_REMOVE(list, buf_pool->flush_list,
&block->page);
if (b) {
UT_LIST_INSERT_AFTER(
list, buf_pool->flush_list, b,
&block->page);
} else {
UT_LIST_ADD_FIRST(
list, buf_pool->flush_list,
&block->page);
}
buf_flush_relocate_on_flush_list(bpage,
&block->page);
}
/* Buffer-fix, I/O-fix, and X-latch the block
@ -2225,6 +2570,9 @@ wait_until_unfixed:
block->page.buf_fix_count = 1;
buf_block_set_io_fix(block, BUF_IO_READ);
rw_lock_x_lock(&block->lock);
UNIV_MEM_INVALID(bpage, sizeof *bpage);
mutex_exit(&block->mutex);
mutex_exit(&buf_pool_zip_mutex);
buf_pool->n_pend_unzip++;
@ -2237,7 +2585,7 @@ wait_until_unfixed:
while not holding buf_pool_mutex or block->mutex. */
success = buf_zip_decompress(block, srv_use_checksums);
if (UNIV_LIKELY(success)) {
if (UNIV_LIKELY(success && !recv_no_ibuf_operations)) {
ibuf_merge_or_delete_for_page(block, space, offset,
zip_size, TRUE);
}
@ -2356,8 +2704,8 @@ page.
@return TRUE if success */
UNIV_INTERN
ibool
buf_page_optimistic_get_func(
/*=========================*/
buf_page_optimistic_get(
/*====================*/
ulint rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH */
buf_block_t* block, /*!< in: guessed buffer block */
ib_uint64_t modify_clock,/*!< in: modify clock value if mode is
@ -2370,7 +2718,9 @@ buf_page_optimistic_get_func(
ibool success;
ulint fix_type;
ut_ad(mtr && block);
ut_ad(block);
ut_ad(mtr);
ut_ad(mtr->state == MTR_ACTIVE);
ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH));
mutex_enter(&block->mutex);
@ -2482,6 +2832,7 @@ buf_page_get_known_nowait(
ulint fix_type;
ut_ad(mtr);
ut_ad(mtr->state == MTR_ACTIVE);
ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH));
mutex_enter(&block->mutex);
@ -2581,14 +2932,19 @@ buf_page_try_get_func(
ibool success;
ulint fix_type;
ut_ad(mtr);
ut_ad(mtr->state == MTR_ACTIVE);
buf_pool_mutex_enter();
block = buf_block_hash_get(space_id, page_no);
if (!block) {
if (!block || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
buf_pool_mutex_exit();
return(NULL);
}
ut_ad(!buf_pool_watch_is(&block->page));
mutex_enter(&block->mutex);
buf_pool_mutex_exit();
@ -2673,6 +3029,7 @@ buf_page_init(
ulint space, /*!< in: space id */
ulint offset, /*!< in: offset of the page within space
in units of a page */
ulint fold, /*!< in: buf_page_address_fold(space,offset) */
buf_block_t* block) /*!< in: block to init */
{
buf_page_t* hash_page;
@ -2697,11 +3054,20 @@ buf_page_init(
block->lock_hash_val = lock_rec_hash(space, offset);
buf_page_init_low(&block->page);
/* Insert into the hash table of file pages */
hash_page = buf_page_hash_get(space, offset);
hash_page = buf_page_hash_get_low(space, offset, fold);
if (UNIV_LIKELY_NULL(hash_page)) {
if (UNIV_LIKELY(!hash_page)) {
} else if (UNIV_LIKELY(buf_pool_watch_is(hash_page))) {
/* Preserve the reference count. */
ulint buf_fix_count = hash_page->buf_fix_count;
ut_a(buf_fix_count > 0);
block->page.buf_fix_count += buf_fix_count;
buf_pool_watch_remove(fold, hash_page);
} else {
fprintf(stderr,
"InnoDB: Error: page %lu %lu already found"
" in the hash table: %p, %p\n",
@ -2719,13 +3085,11 @@ buf_page_init(
ut_error;
}
buf_page_init_low(&block->page);
ut_ad(!block->page.in_zip_hash);
ut_ad(!block->page.in_page_hash);
ut_d(block->page.in_page_hash = TRUE);
HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
buf_page_address_fold(space, offset), &block->page);
fold, &block->page);
}
/********************************************************************//**
@ -2753,8 +3117,10 @@ buf_page_init_for_read(
ulint offset) /*!< in: page number */
{
buf_block_t* block;
buf_page_t* bpage;
buf_page_t* bpage = NULL;
buf_page_t* watch_page;
mtr_t mtr;
ulint fold;
ibool lru = FALSE;
void* data;
@ -2789,10 +3155,14 @@ buf_page_init_for_read(
ut_ad(block);
}
fold = buf_page_address_fold(space, offset);
buf_pool_mutex_enter();
if (buf_page_hash_get(space, offset)) {
watch_page = buf_page_hash_get_low(space, offset, fold);
if (watch_page && !buf_pool_watch_is(watch_page)) {
/* The page is already in the buffer pool. */
watch_page = NULL;
err_exit:
if (block) {
mutex_enter(&block->mutex);
@ -2816,7 +3186,8 @@ err_exit:
if (block) {
bpage = &block->page;
mutex_enter(&block->mutex);
buf_page_init(space, offset, block);
buf_page_init(space, offset, fold, block);
/* The block must be put to the LRU list, to the old blocks */
buf_LRU_add_block(bpage, TRUE/* to old blocks */);
@ -2875,15 +3246,19 @@ err_exit:
/* If buf_buddy_alloc() allocated storage from the LRU list,
it released and reacquired buf_pool_mutex. Thus, we must
check the page_hash again, as it may have been modified. */
if (UNIV_UNLIKELY(lru)
&& UNIV_LIKELY_NULL(buf_page_hash_get(space, offset))) {
if (UNIV_UNLIKELY(lru)) {
watch_page = buf_page_hash_get_low(space, offset, fold);
if (UNIV_UNLIKELY
(watch_page && !buf_pool_watch_is(watch_page))) {
/* The block was added by some other thread. */
buf_buddy_free(bpage, sizeof *bpage);
buf_buddy_free(data, zip_size);
/* The block was added by some other thread. */
watch_page = NULL;
buf_buddy_free(bpage, sizeof *bpage);
buf_buddy_free(data, zip_size);
bpage = NULL;
goto func_exit;
bpage = NULL;
goto func_exit;
}
}
page_zip_des_init(&bpage->zip);
@ -2893,11 +3268,14 @@ err_exit:
mutex_enter(&buf_pool_zip_mutex);
UNIV_MEM_DESC(bpage->zip.data,
page_zip_get_size(&bpage->zip), bpage);
buf_page_init_low(bpage);
bpage->state = BUF_BLOCK_ZIP_PAGE;
bpage->space = space;
bpage->offset = offset;
#ifdef UNIV_DEBUG
bpage->in_page_hash = FALSE;
bpage->in_zip_hash = FALSE;
@ -2907,8 +3285,18 @@ err_exit:
#endif /* UNIV_DEBUG */
ut_d(bpage->in_page_hash = TRUE);
HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
buf_page_address_fold(space, offset), bpage);
if (UNIV_LIKELY_NULL(watch_page)) {
/* Preserve the reference count. */
ulint buf_fix_count = watch_page->buf_fix_count;
ut_a(buf_fix_count > 0);
bpage->buf_fix_count += buf_fix_count;
ut_ad(buf_pool_watch_is(watch_page));
buf_pool_watch_remove(fold, watch_page);
}
HASH_INSERT(buf_page_t, hash, buf_pool->page_hash, fold,
bpage);
/* The block must be put to the LRU list, to the old blocks */
buf_LRU_add_block(bpage, TRUE/* to old blocks */);
@ -2952,17 +3340,22 @@ buf_page_create(
buf_block_t* block;
buf_block_t* free_block = NULL;
ulint time_ms = ut_time_ms();
ulint fold;
ut_ad(mtr);
ut_ad(mtr->state == MTR_ACTIVE);
ut_ad(space || !zip_size);
free_block = buf_LRU_get_free_block(0);
fold = buf_page_address_fold(space, offset);
buf_pool_mutex_enter();
block = (buf_block_t*) buf_page_hash_get(space, offset);
block = (buf_block_t*) buf_page_hash_get_low(space, offset, fold);
if (block && buf_page_in_file(&block->page)) {
if (block && buf_page_in_file(&block->page)
&& !buf_pool_watch_is(&block->page)) {
#ifdef UNIV_IBUF_COUNT_DEBUG
ut_a(ibuf_count_get(space, offset) == 0);
#endif
@ -2992,7 +3385,7 @@ buf_page_create(
mutex_enter(&block->mutex);
buf_page_init(space, offset, block);
buf_page_init(space, offset, fold, block);
/* The block must be put to the LRU list */
buf_LRU_add_block(&block->page, FALSE);
@ -3441,11 +3834,6 @@ buf_validate(void)
}
n_lru++;
if (block->page.oldest_modification > 0) {
n_flush++;
}
break;
case BUF_BLOCK_NOT_USED:
@ -3484,6 +3872,10 @@ buf_validate(void)
ut_error;
break;
}
/* It is OK to read oldest_modification here because
we have acquired buf_pool_zip_mutex above which acts
as the 'block->mutex' for these bpages. */
ut_a(!b->oldest_modification);
ut_a(buf_page_hash_get(b->space, b->offset) == b);
@ -3491,23 +3883,23 @@ buf_validate(void)
n_zip++;
}
/* Check dirty compressed-only blocks. */
/* Check dirty blocks. */
buf_flush_list_mutex_enter();
for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
b = UT_LIST_GET_NEXT(list, b)) {
ut_ad(b->in_flush_list);
ut_a(b->oldest_modification);
n_flush++;
switch (buf_page_get_state(b)) {
case BUF_BLOCK_ZIP_DIRTY:
ut_a(b->oldest_modification);
n_lru++;
n_flush++;
n_zip++;
switch (buf_page_get_io_fix(b)) {
case BUF_IO_NONE:
case BUF_IO_READ:
break;
case BUF_IO_WRITE:
switch (buf_page_get_flush_type(b)) {
case BUF_FLUSH_LRU:
@ -3540,6 +3932,10 @@ buf_validate(void)
ut_a(buf_page_hash_get(b->space, b->offset) == b);
}
ut_a(UT_LIST_GET_LEN(buf_pool->flush_list) == n_flush);
buf_flush_list_mutex_exit();
mutex_exit(&buf_pool_zip_mutex);
if (n_lru + n_free > buf_pool->curr_size + n_zip) {
@ -3556,7 +3952,6 @@ buf_validate(void)
(ulong) n_free);
ut_error;
}
ut_a(UT_LIST_GET_LEN(buf_pool->flush_list) == n_flush);
ut_a(buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE] == n_single_flush);
ut_a(buf_pool->n_flush[BUF_FLUSH_LIST] == n_list_flush);
@ -3597,6 +3992,7 @@ buf_print(void)
counts = mem_alloc(sizeof(ulint) * size);
buf_pool_mutex_enter();
buf_flush_list_mutex_enter();
fprintf(stderr,
"buf_pool size %lu\n"
@ -3623,6 +4019,8 @@ buf_print(void)
(ulong) buf_pool->stat.n_pages_created,
(ulong) buf_pool->stat.n_pages_written);
buf_flush_list_mutex_exit();
/* Count the number of blocks belonging to each index in the buffer */
n_found = 0;
@ -3746,6 +4144,7 @@ buf_get_latched_pages_number(void)
}
}
buf_flush_list_mutex_enter();
for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
b = UT_LIST_GET_NEXT(list, b)) {
ut_ad(b->in_flush_list);
@ -3771,6 +4170,7 @@ buf_get_latched_pages_number(void)
}
}
buf_flush_list_mutex_exit();
mutex_exit(&buf_pool_zip_mutex);
buf_pool_mutex_exit();
@ -3803,16 +4203,13 @@ buf_get_modified_ratio_pct(void)
{
ulint ratio;
buf_pool_mutex_enter();
/* This is for heuristics. No need to grab any mutex here. */
ratio = (100 * UT_LIST_GET_LEN(buf_pool->flush_list))
/ (1 + UT_LIST_GET_LEN(buf_pool->LRU)
+ UT_LIST_GET_LEN(buf_pool->free));
/* 1 + is there to avoid division by zero */
buf_pool_mutex_exit();
return(ratio);
}
@ -3831,6 +4228,7 @@ buf_print_io(
ut_ad(buf_pool);
buf_pool_mutex_enter();
buf_flush_list_mutex_enter();
fprintf(file,
"Buffer pool size %lu\n"
@ -3852,6 +4250,8 @@ buf_print_io(
+ buf_pool->init_flush[BUF_FLUSH_LIST],
(ulong) buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE]);
buf_flush_list_mutex_exit();
current_time = time(NULL);
time_elapsed = 0.001 + difftime(current_time,
buf_pool->last_printout_time);

View file

@ -1,6 +1,6 @@
/*****************************************************************************
Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@ -87,30 +87,176 @@ buf_flush_validate_low(void);
/*========================*/
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
/******************************************************************//**
Insert a block in the flush_rbt and returns a pointer to its
predecessor or NULL if no predecessor. The ordering is maintained
on the basis of the <oldest_modification, space, offset> key.
@return pointer to the predecessor or NULL if no predecessor. */
static
buf_page_t*
buf_flush_insert_in_flush_rbt(
/*==========================*/
buf_page_t* bpage) /*!< in: bpage to be inserted. */
{
buf_page_t* prev = NULL;
const ib_rbt_node_t* c_node;
const ib_rbt_node_t* p_node;
ut_ad(buf_flush_list_mutex_own());
/* Insert this buffer into the rbt. */
c_node = rbt_insert(buf_pool->flush_rbt, &bpage, &bpage);
ut_a(c_node != NULL);
/* Get the predecessor. */
p_node = rbt_prev(buf_pool->flush_rbt, c_node);
if (p_node != NULL) {
prev = *rbt_value(buf_page_t*, p_node);
ut_a(prev != NULL);
}
return(prev);
}
/*********************************************************//**
Delete a bpage from the flush_rbt. */
static
void
buf_flush_delete_from_flush_rbt(
/*============================*/
buf_page_t* bpage) /*!< in: bpage to be removed. */
{
ibool ret = FALSE;
ut_ad(buf_flush_list_mutex_own());
ret = rbt_delete(buf_pool->flush_rbt, &bpage);
ut_ad(ret);
}
/*****************************************************************//**
Compare two modified blocks in the buffer pool. The key for comparison
is:
key = <oldest_modification, space, offset>
This comparison is used to maintian ordering of blocks in the
buf_pool->flush_rbt.
Note that for the purpose of flush_rbt, we only need to order blocks
on the oldest_modification. The other two fields are used to uniquely
identify the blocks.
@return < 0 if b2 < b1, 0 if b2 == b1, > 0 if b2 > b1 */
static
int
buf_flush_block_cmp(
/*================*/
const void* p1, /*!< in: block1 */
const void* p2) /*!< in: block2 */
{
int ret;
const buf_page_t* b1 = *(const buf_page_t**) p1;
const buf_page_t* b2 = *(const buf_page_t**) p2;
ut_ad(b1 != NULL);
ut_ad(b2 != NULL);
ut_ad(buf_flush_list_mutex_own());
ut_ad(b1->in_flush_list);
ut_ad(b2->in_flush_list);
if (b2->oldest_modification
> b1->oldest_modification) {
return(1);
}
if (b2->oldest_modification
< b1->oldest_modification) {
return(-1);
}
/* If oldest_modification is same then decide on the space. */
ret = (int)(b2->space - b1->space);
/* Or else decide ordering on the offset field. */
return(ret ? ret : (int)(b2->offset - b1->offset));
}
/********************************************************************//**
Initialize the red-black tree to speed up insertions into the flush_list
during recovery process. Should be called at the start of recovery
process before any page has been read/written. */
UNIV_INTERN
void
buf_flush_init_flush_rbt(void)
/*==========================*/
{
buf_flush_list_mutex_enter();
/* Create red black tree for speedy insertions in flush list. */
buf_pool->flush_rbt = rbt_create(sizeof(buf_page_t*),
buf_flush_block_cmp);
buf_flush_list_mutex_exit();
}
/********************************************************************//**
Frees up the red-black tree. */
UNIV_INTERN
void
buf_flush_free_flush_rbt(void)
/*==========================*/
{
buf_flush_list_mutex_enter();
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
ut_a(buf_flush_validate_low());
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
rbt_free(buf_pool->flush_rbt);
buf_pool->flush_rbt = NULL;
buf_flush_list_mutex_exit();
}
/********************************************************************//**
Inserts a modified block into the flush list. */
UNIV_INTERN
void
buf_flush_insert_into_flush_list(
/*=============================*/
buf_block_t* block) /*!< in/out: block which is modified */
buf_block_t* block, /*!< in/out: block which is modified */
ib_uint64_t lsn) /*!< in: oldest modification */
{
ut_ad(buf_pool_mutex_own());
ut_ad(!buf_pool_mutex_own());
ut_ad(buf_flush_order_mutex_own());
ut_ad(mutex_own(&block->mutex));
buf_flush_list_mutex_enter();
ut_ad((UT_LIST_GET_FIRST(buf_pool->flush_list) == NULL)
|| (UT_LIST_GET_FIRST(buf_pool->flush_list)->oldest_modification
<= block->page.oldest_modification));
<= lsn));
/* If we are in the recovery then we need to update the flush
red-black tree as well. */
if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
buf_flush_list_mutex_exit();
buf_flush_insert_sorted_into_flush_list(block, lsn);
return;
}
ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
ut_ad(block->page.in_LRU_list);
ut_ad(block->page.in_page_hash);
ut_ad(!block->page.in_zip_hash);
ut_ad(!block->page.in_flush_list);
ut_d(block->page.in_flush_list = TRUE);
block->page.oldest_modification = lsn;
UT_LIST_ADD_FIRST(list, buf_pool->flush_list, &block->page);
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
ut_a(buf_flush_validate_low());
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
buf_flush_list_mutex_exit();
}
/********************************************************************//**
@ -121,27 +267,61 @@ UNIV_INTERN
void
buf_flush_insert_sorted_into_flush_list(
/*====================================*/
buf_block_t* block) /*!< in/out: block which is modified */
buf_block_t* block, /*!< in/out: block which is modified */
ib_uint64_t lsn) /*!< in: oldest modification */
{
buf_page_t* prev_b;
buf_page_t* b;
ut_ad(buf_pool_mutex_own());
ut_ad(!buf_pool_mutex_own());
ut_ad(buf_flush_order_mutex_own());
ut_ad(mutex_own(&block->mutex));
ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
buf_flush_list_mutex_enter();
/* The field in_LRU_list is protected by buf_pool_mutex, which
we are not holding. However, while a block is in the flush
list, it is dirty and cannot be discarded, not from the
page_hash or from the LRU list. At most, the uncompressed
page frame of a compressed block may be discarded or created
(copying the block->page to or from a buf_page_t that is
dynamically allocated from buf_buddy_alloc()). Because those
transitions hold block->mutex and the flush list mutex (via
buf_flush_relocate_on_flush_list()), there is no possibility
of a race condition in the assertions below. */
ut_ad(block->page.in_LRU_list);
ut_ad(block->page.in_page_hash);
/* buf_buddy_block_register() will take a block in the
BUF_BLOCK_MEMORY state, not a file page. */
ut_ad(!block->page.in_zip_hash);
ut_ad(!block->page.in_flush_list);
ut_d(block->page.in_flush_list = TRUE);
block->page.oldest_modification = lsn;
prev_b = NULL;
b = UT_LIST_GET_FIRST(buf_pool->flush_list);
while (b && b->oldest_modification > block->page.oldest_modification) {
ut_ad(b->in_flush_list);
prev_b = b;
b = UT_LIST_GET_NEXT(list, b);
/* For the most part when this function is called the flush_rbt
should not be NULL. In a very rare boundary case it is possible
that the flush_rbt has already been freed by the recovery thread
before the last page was hooked up in the flush_list by the
io-handler thread. In that case we'll just do a simple
linear search in the else block. */
if (buf_pool->flush_rbt) {
prev_b = buf_flush_insert_in_flush_rbt(&block->page);
} else {
b = UT_LIST_GET_FIRST(buf_pool->flush_list);
while (b && b->oldest_modification
> block->page.oldest_modification) {
ut_ad(b->in_flush_list);
prev_b = b;
b = UT_LIST_GET_NEXT(list, b);
}
}
if (prev_b == NULL) {
@ -154,6 +334,8 @@ buf_flush_insert_sorted_into_flush_list(
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
ut_a(buf_flush_validate_low());
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
buf_flush_list_mutex_exit();
}
/********************************************************************//**
@ -237,7 +419,8 @@ buf_flush_remove(
ut_ad(buf_pool_mutex_own());
ut_ad(mutex_own(buf_page_get_mutex(bpage)));
ut_ad(bpage->in_flush_list);
ut_d(bpage->in_flush_list = FALSE);
buf_flush_list_mutex_enter();
switch (buf_page_get_state(bpage)) {
case BUF_BLOCK_ZIP_PAGE:
@ -259,10 +442,97 @@ buf_flush_remove(
break;
}
/* If the flush_rbt is active then delete from it as well. */
if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
buf_flush_delete_from_flush_rbt(bpage);
}
/* Must be done after we have removed it from the flush_rbt
because we assert on in_flush_list in comparison function. */
ut_d(bpage->in_flush_list = FALSE);
bpage->oldest_modification = 0;
ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->flush_list,
ut_ad(ut_list_node_313->in_flush_list)));
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
ut_a(buf_flush_validate_low());
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
buf_flush_list_mutex_exit();
}
/*******************************************************************//**
Relocates a buffer control block on the flush_list.
Note that it is assumed that the contents of bpage has already been
copied to dpage.
IMPORTANT: When this function is called bpage and dpage are not
exact copy of each other. For example, they both will have different
::state. Also the ::list pointers in dpage may be stale. We need to
use the current list node (bpage) to do the list manipulation because
the list pointers could have changed between the time that we copied
the contents of bpage to the dpage and the flush list manipulation
below. */
UNIV_INTERN
void
buf_flush_relocate_on_flush_list(
/*=============================*/
buf_page_t* bpage, /*!< in/out: control block being moved */
buf_page_t* dpage) /*!< in/out: destination block */
{
buf_page_t* prev;
buf_page_t* prev_b = NULL;
ut_ad(buf_pool_mutex_own());
ut_ad(mutex_own(buf_page_get_mutex(bpage)));
buf_flush_list_mutex_enter();
/* FIXME: At this point we have both buf_pool and flush_list
mutexes. Theoratically removal of a block from flush list is
only covered by flush_list mutex but currently we do
have buf_pool mutex in buf_flush_remove() therefore this block
is guaranteed to be in the flush list. We need to check if
this will work without the assumption of block removing code
having the buf_pool mutex. */
ut_ad(bpage->in_flush_list);
ut_ad(dpage->in_flush_list);
/* If recovery is active we must swap the control blocks in
the flush_rbt as well. */
if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
buf_flush_delete_from_flush_rbt(bpage);
prev_b = buf_flush_insert_in_flush_rbt(dpage);
}
/* Must be done after we have removed it from the flush_rbt
because we assert on in_flush_list in comparison function. */
ut_d(bpage->in_flush_list = FALSE);
prev = UT_LIST_GET_PREV(list, bpage);
UT_LIST_REMOVE(list, buf_pool->flush_list, bpage);
if (prev) {
ut_ad(prev->in_flush_list);
UT_LIST_INSERT_AFTER(
list,
buf_pool->flush_list,
prev, dpage);
} else {
UT_LIST_ADD_FIRST(
list,
buf_pool->flush_list,
dpage);
}
/* Just an extra check. Previous in flush_list
should be the same control block as in flush_rbt. */
ut_a(!buf_pool->flush_rbt || prev_b == prev);
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
ut_a(buf_flush_validate_low());
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
buf_flush_list_mutex_exit();
}
/********************************************************************//**
@ -723,6 +993,7 @@ buf_flush_write_block_low(
relocated in the buffer pool or removed from flush_list or
LRU_list. */
ut_ad(!buf_pool_mutex_own());
ut_ad(!buf_flush_list_mutex_own());
ut_ad(!mutex_own(buf_page_get_mutex(bpage)));
ut_ad(buf_page_get_io_fix(bpage) == BUF_IO_WRITE);
ut_ad(bpage->oldest_modification != 0);
@ -918,17 +1189,19 @@ buf_flush_try_neighbors(
ulint count = 0;
ulint i;
ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
ut_ad(flush_type == BUF_FLUSH_LRU
|| flush_type == BUF_FLUSH_LIST);
if (UT_LIST_GET_LEN(buf_pool->LRU) < BUF_LRU_OLD_MIN_LEN) {
/* If there is little space, it is better not to flush any
block except from the end of the LRU list */
/* If there is little space, it is better not to flush
any block except from the end of the LRU list */
low = offset;
high = offset + 1;
} else {
/* When flushed, dirty blocks are searched in neighborhoods of
this size, and flushed along with the original page. */
/* When flushed, dirty blocks are searched in
neighborhoods of this size, and flushed along with the
original page. */
ulint buf_flush_area = ut_min(BUF_READ_AHEAD_AREA,
buf_pool->curr_size / 16);
@ -969,11 +1242,12 @@ buf_flush_try_neighbors(
if (buf_flush_ready_for_flush(bpage, flush_type)
&& (i == offset || !bpage->buf_fix_count)) {
/* We only try to flush those
neighbors != offset where the buf fix count is
zero, as we then know that we probably can
latch the page without a semaphore wait.
Semaphore waits are expensive because we must
flush the doublewrite buffer before we start
neighbors != offset where the buf fix
count is zero, as we then know that we
probably can latch the page without a
semaphore wait. Semaphore waits are
expensive because we must flush the
doublewrite buffer before we start
waiting. */
buf_flush_page(bpage, flush_type);
@ -992,6 +1266,206 @@ buf_flush_try_neighbors(
return(count);
}
/********************************************************************//**
Check if the block is modified and ready for flushing. If the the block
is ready to flush then flush the page and try o flush its neighbors.
@return TRUE if buf_pool mutex was not released during this function.
This does not guarantee that some pages were written as well.
Number of pages written are incremented to the count. */
static
ibool
buf_flush_page_and_try_neighbors(
/*=============================*/
buf_page_t* bpage, /*!< in: buffer control block,
must be
buf_page_in_file(bpage) */
enum buf_flush flush_type, /*!< in: BUF_FLUSH_LRU
or BUF_FLUSH_LIST */
ulint* count) /*!< in/out: number of pages
flushed */
{
ibool flushed = FALSE;
mutex_t* block_mutex;
ut_ad(buf_pool_mutex_own());
block_mutex = buf_page_get_mutex(bpage);
mutex_enter(block_mutex);
ut_a(buf_page_in_file(bpage));
if (buf_flush_ready_for_flush(bpage, flush_type)) {
ulint space;
ulint offset;
buf_pool_mutex_exit();
/* These fields are protected by both the
buffer pool mutex and block mutex. */
space = buf_page_get_space(bpage);
offset = buf_page_get_page_no(bpage);
mutex_exit(block_mutex);
/* Try to flush also all the neighbors */
*count += buf_flush_try_neighbors(space, offset,
flush_type);
buf_pool_mutex_enter();
flushed = TRUE;
} else {
mutex_exit(block_mutex);
}
ut_ad(buf_pool_mutex_own());
return(flushed);
}
/*******************************************************************//**
This utility flushes dirty blocks from the end of the LRU list.
In the case of an LRU flush the calling thread may own latches to
pages: to avoid deadlocks, this function must be written so that it
cannot end up waiting for these latches!
@return number of blocks for which the write request was queued. */
static
ulint
buf_flush_LRU_list_batch(
/*=====================*/
ulint max) /*!< in: max of blocks to flush */
{
buf_page_t* bpage;
ulint count = 0;
ut_ad(buf_pool_mutex_own());
do {
/* Start from the end of the list looking for a
suitable block to be flushed. */
bpage = UT_LIST_GET_LAST(buf_pool->LRU);
/* Iterate backwards over the flush list till we find
a page that isn't ready for flushing. */
while (bpage != NULL
&& !buf_flush_page_and_try_neighbors(
bpage, BUF_FLUSH_LRU, &count)) {
bpage = UT_LIST_GET_PREV(LRU, bpage);
}
} while (bpage != NULL && count < max);
/* We keep track of all flushes happening as part of LRU
flush. When estimating the desired rate at which flush_list
should be flushed, we factor in this value. */
buf_lru_flush_page_count += count;
ut_ad(buf_pool_mutex_own());
return(count);
}
/*******************************************************************//**
This utility flushes dirty blocks from the end of the flush_list.
the calling thread is not allowed to own any latches on pages!
@return number of blocks for which the write request was queued;
ULINT_UNDEFINED if there was a flush of the same type already
running */
static
ulint
buf_flush_flush_list_batch(
/*=======================*/
ulint min_n, /*!< in: wished minimum mumber
of blocks flushed (it is not
guaranteed that the actual
number is that big, though) */
ib_uint64_t lsn_limit) /*!< all blocks whose
oldest_modification is smaller
than this should be flushed (if
their number does not exceed
min_n) */
{
ulint len;
buf_page_t* bpage;
ulint count = 0;
ut_ad(buf_pool_mutex_own());
/* If we have flushed enough, leave the loop */
do {
/* Start from the end of the list looking for a suitable
block to be flushed. */
buf_flush_list_mutex_enter();
/* We use len here because theoratically insertions can
happen in the flush_list below while we are traversing
it for a suitable candidate for flushing. We'd like to
set a limit on how farther we are willing to traverse
the list. */
len = UT_LIST_GET_LEN(buf_pool->flush_list);
bpage = UT_LIST_GET_LAST(buf_pool->flush_list);
if (bpage) {
ut_a(bpage->oldest_modification > 0);
}
if (!bpage || bpage->oldest_modification >= lsn_limit) {
/* We have flushed enough */
buf_flush_list_mutex_exit();
break;
}
ut_a(bpage->oldest_modification > 0);
ut_ad(bpage->in_flush_list);
buf_flush_list_mutex_exit();
/* The list may change during the flushing and we cannot
safely preserve within this function a pointer to a
block in the list! */
while (bpage != NULL
&& len > 0
&& !buf_flush_page_and_try_neighbors(
bpage, BUF_FLUSH_LIST, &count)) {
buf_flush_list_mutex_enter();
/* If we are here that means that buf_pool
mutex was not released in
buf_flush_page_and_try_neighbors() above and
this guarantees that bpage didn't get
relocated since we released the flush_list
mutex above. There is a chance, however, that
the bpage got removed from flush_list (not
currently possible because flush_list_remove()
also obtains buf_pool mutex but that may change
in future). To avoid this scenario we check
the oldest_modification and if it is zero
we start all over again. */
if (bpage->oldest_modification == 0) {
buf_flush_list_mutex_exit();
break;
}
bpage = UT_LIST_GET_PREV(list, bpage);
ut_ad(!bpage || bpage->in_flush_list);
buf_flush_list_mutex_exit();
--len;
}
} while (count < min_n && bpage != NULL && len > 0);
ut_ad(buf_pool_mutex_own());
return(count);
}
/*******************************************************************//**
This utility flushes dirty blocks from the end of the LRU list or flush_list.
NOTE 1: in the case of an LRU flush the calling thread may own latches to
@ -1017,22 +1491,18 @@ buf_flush_batch(
(if their number does not exceed
min_n), otherwise ignored */
{
buf_page_t* bpage;
ulint page_count = 0;
ulint old_page_count;
ulint space;
ulint offset;
ulint count = 0;
ut_ad((flush_type == BUF_FLUSH_LRU)
|| (flush_type == BUF_FLUSH_LIST));
ut_ad(flush_type == BUF_FLUSH_LRU
|| flush_type == BUF_FLUSH_LIST);
#ifdef UNIV_SYNC_DEBUG
ut_ad((flush_type != BUF_FLUSH_LIST)
|| sync_thread_levels_empty_gen(TRUE));
#endif /* UNIV_SYNC_DEBUG */
buf_pool_mutex_enter();
if ((buf_pool->n_flush[flush_type] > 0)
|| (buf_pool->init_flush[flush_type] == TRUE)) {
if (buf_pool->n_flush[flush_type] > 0
|| buf_pool->init_flush[flush_type] == TRUE) {
/* There is already a flush batch of the same type running */
@ -1043,82 +1513,21 @@ buf_flush_batch(
buf_pool->init_flush[flush_type] = TRUE;
for (;;) {
flush_next:
/* If we have flushed enough, leave the loop */
if (page_count >= min_n) {
break;
}
/* Start from the end of the list looking for a suitable
block to be flushed. */
if (flush_type == BUF_FLUSH_LRU) {
bpage = UT_LIST_GET_LAST(buf_pool->LRU);
} else {
ut_ad(flush_type == BUF_FLUSH_LIST);
bpage = UT_LIST_GET_LAST(buf_pool->flush_list);
if (!bpage
|| bpage->oldest_modification >= lsn_limit) {
/* We have flushed enough */
break;
}
ut_ad(bpage->in_flush_list);
}
/* Note that after finding a single flushable page, we try to
flush also all its neighbors, and after that start from the
END of the LRU list or flush list again: the list may change
during the flushing and we cannot safely preserve within this
function a pointer to a block in the list! */
do {
mutex_t*block_mutex = buf_page_get_mutex(bpage);
ibool ready;
ut_a(buf_page_in_file(bpage));
mutex_enter(block_mutex);
ready = buf_flush_ready_for_flush(bpage, flush_type);
mutex_exit(block_mutex);
if (ready) {
space = buf_page_get_space(bpage);
offset = buf_page_get_page_no(bpage);
buf_pool_mutex_exit();
old_page_count = page_count;
/* Try to flush also all the neighbors */
page_count += buf_flush_try_neighbors(
space, offset, flush_type);
/* fprintf(stderr,
"Flush type %lu, page no %lu, neighb %lu\n",
flush_type, offset,
page_count - old_page_count); */
buf_pool_mutex_enter();
goto flush_next;
} else if (flush_type == BUF_FLUSH_LRU) {
bpage = UT_LIST_GET_PREV(LRU, bpage);
} else {
ut_ad(flush_type == BUF_FLUSH_LIST);
bpage = UT_LIST_GET_PREV(list, bpage);
ut_ad(!bpage || bpage->in_flush_list);
}
} while (bpage != NULL);
/* If we could not find anything to flush, leave the loop */
/* Note: The buffer pool mutex is released and reacquired within
the flush functions. */
switch(flush_type) {
case BUF_FLUSH_LRU:
count = buf_flush_LRU_list_batch(min_n);
break;
case BUF_FLUSH_LIST:
count = buf_flush_flush_list_batch(min_n, lsn_limit);
break;
default:
ut_error;
}
ut_ad(buf_pool_mutex_own());
buf_pool->init_flush[flush_type] = FALSE;
if (buf_pool->n_flush[flush_type] == 0) {
@ -1133,26 +1542,17 @@ flush_next:
buf_flush_buffered_writes();
#ifdef UNIV_DEBUG
if (buf_debug_prints && page_count > 0) {
ut_a(flush_type == BUF_FLUSH_LRU
|| flush_type == BUF_FLUSH_LIST);
if (buf_debug_prints && count > 0) {
fprintf(stderr, flush_type == BUF_FLUSH_LRU
? "Flushed %lu pages in LRU flush\n"
: "Flushed %lu pages in flush list flush\n",
(ulong) page_count);
(ulong) count);
}
#endif /* UNIV_DEBUG */
srv_buf_pool_flushed += page_count;
srv_buf_pool_flushed += count;
/* We keep track of all flushes happening as part of LRU
flush. When estimating the desired rate at which flush_list
should be flushed we factor in this value. */
if (flush_type == BUF_FLUSH_LRU) {
buf_lru_flush_page_count += page_count;
}
return(page_count);
return(count);
}
/******************************************************************//**
@ -1367,24 +1767,56 @@ ibool
buf_flush_validate_low(void)
/*========================*/
{
buf_page_t* bpage;
buf_page_t* bpage;
const ib_rbt_node_t* rnode = NULL;
ut_ad(buf_flush_list_mutex_own());
UT_LIST_VALIDATE(list, buf_page_t, buf_pool->flush_list,
ut_ad(ut_list_node_313->in_flush_list));
bpage = UT_LIST_GET_FIRST(buf_pool->flush_list);
/* If we are in recovery mode i.e.: flush_rbt != NULL
then each block in the flush_list must also be present
in the flush_rbt. */
if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
rnode = rbt_first(buf_pool->flush_rbt);
}
while (bpage != NULL) {
const ib_uint64_t om = bpage->oldest_modification;
ut_ad(bpage->in_flush_list);
ut_a(buf_page_in_file(bpage));
/* A page in flush_list can be in BUF_BLOCK_REMOVE_HASH
state. This happens when a page is in the middle of
being relocated. In that case the original descriptor
can have this state and still be in the flush list
waiting to acquire the flush_list_mutex to complete
the relocation. */
ut_a(buf_page_in_file(bpage)
|| buf_page_get_state(bpage)
== BUF_BLOCK_REMOVE_HASH);
ut_a(om > 0);
if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
ut_a(rnode);
buf_page_t* rpage = *rbt_value(buf_page_t*,
rnode);
ut_a(rpage);
ut_a(rpage == bpage);
rnode = rbt_next(buf_pool->flush_rbt, rnode);
}
bpage = UT_LIST_GET_NEXT(list, bpage);
ut_a(!bpage || om >= bpage->oldest_modification);
}
/* By this time we must have exhausted the traversal of
flush_rbt (if active) as well. */
ut_a(rnode == NULL);
return(TRUE);
}
@ -1398,11 +1830,11 @@ buf_flush_validate(void)
{
ibool ret;
buf_pool_mutex_enter();
buf_flush_list_mutex_enter();
ret = buf_flush_validate_low();
buf_pool_mutex_exit();
buf_flush_list_mutex_exit();
return(ret);
}

View file

@ -1,6 +1,6 @@
/*****************************************************************************
Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@ -350,17 +350,31 @@ scan_again:
bpage = UT_LIST_GET_LAST(buf_pool->LRU);
while (bpage != NULL) {
mutex_t* block_mutex = buf_page_get_mutex(bpage);
buf_page_t* prev_bpage;
ibool prev_bpage_buf_fix = FALSE;
ut_a(buf_page_in_file(bpage));
mutex_enter(block_mutex);
prev_bpage = UT_LIST_GET_PREV(LRU, bpage);
if (buf_page_get_space(bpage) == id) {
if (bpage->buf_fix_count > 0
|| buf_page_get_io_fix(bpage) != BUF_IO_NONE) {
/* bpage->space and bpage->io_fix are protected by
buf_pool_mutex and block_mutex. It is safe to check
them while holding buf_pool_mutex only. */
if (buf_page_get_space(bpage) != id) {
/* Skip this block, as it does not belong to
the space that is being invalidated. */
} else if (buf_page_get_io_fix(bpage) != BUF_IO_NONE) {
/* We cannot remove this page during this scan
yet; maybe the system is currently reading it
in, or flushing the modifications to the file */
all_freed = FALSE;
} else {
mutex_t* block_mutex = buf_page_get_mutex(bpage);
mutex_enter(block_mutex);
if (bpage->buf_fix_count > 0) {
/* We cannot remove this page during
this scan yet; maybe the system is
@ -380,8 +394,40 @@ scan_again:
(ulong) buf_page_get_page_no(bpage));
}
#endif
if (buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE
&& ((buf_block_t*) bpage)->is_hashed) {
if (buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE) {
/* This is a compressed-only block
descriptor. Ensure that prev_bpage
cannot be relocated when bpage is freed. */
if (UNIV_LIKELY(prev_bpage != NULL)) {
switch (buf_page_get_state(
prev_bpage)) {
case BUF_BLOCK_FILE_PAGE:
/* Descriptors of uncompressed
blocks will not be relocated,
because we are holding the
buf_pool_mutex. */
break;
case BUF_BLOCK_ZIP_PAGE:
case BUF_BLOCK_ZIP_DIRTY:
/* Descriptors of compressed-
only blocks can be relocated,
unless they are buffer-fixed.
Because both bpage and
prev_bpage are protected by
buf_pool_zip_mutex, it is
not necessary to acquire
further mutexes. */
ut_ad(&buf_pool_zip_mutex
== block_mutex);
ut_ad(mutex_own(block_mutex));
prev_bpage_buf_fix = TRUE;
prev_bpage->buf_fix_count++;
break;
default:
ut_error;
}
}
} else if (((buf_block_t*) bpage)->is_hashed) {
ulint page_no;
ulint zip_size;
@ -405,7 +451,8 @@ scan_again:
buf_flush_remove(bpage);
}
/* Remove from the LRU list */
/* Remove from the LRU list. */
if (buf_LRU_block_remove_hashed_page(bpage, TRUE)
!= BUF_BLOCK_ZIP_FREE) {
buf_LRU_block_free_hashed_page((buf_block_t*)
@ -417,18 +464,27 @@ scan_again:
ut_ad(block_mutex == &buf_pool_zip_mutex);
ut_ad(!mutex_own(block_mutex));
/* The compressed block descriptor
(bpage) has been deallocated and
block_mutex released. Also,
buf_buddy_free() may have relocated
prev_bpage. Rescan the LRU list. */
if (prev_bpage_buf_fix) {
/* We temporarily buffer-fixed
prev_bpage, so that
buf_buddy_free() could not
relocate it, in case it was a
compressed-only block
descriptor. */
bpage = UT_LIST_GET_LAST(buf_pool->LRU);
continue;
mutex_enter(block_mutex);
ut_ad(prev_bpage->buf_fix_count > 0);
prev_bpage->buf_fix_count--;
mutex_exit(block_mutex);
}
goto next_page_no_mutex;
}
}
next_page:
mutex_exit(block_mutex);
mutex_exit(block_mutex);
}
next_page_no_mutex:
bpage = prev_bpage;
}
@ -1398,8 +1454,10 @@ alloc:
buf_page_t* prev_b = UT_LIST_GET_PREV(LRU, b);
const ulint fold = buf_page_address_fold(
bpage->space, bpage->offset);
buf_page_t* hash_b = buf_page_hash_get_low(
bpage->space, bpage->offset, fold);
ut_a(!buf_page_hash_get(bpage->space, bpage->offset));
ut_a(!hash_b);
b->state = b->oldest_modification
? BUF_BLOCK_ZIP_DIRTY
@ -1474,26 +1532,8 @@ alloc:
if (b->state == BUF_BLOCK_ZIP_PAGE) {
buf_LRU_insert_zip_clean(b);
} else {
buf_page_t* prev;
ut_ad(b->in_flush_list);
ut_d(bpage->in_flush_list = FALSE);
prev = UT_LIST_GET_PREV(list, b);
UT_LIST_REMOVE(list, buf_pool->flush_list, b);
if (prev) {
ut_ad(prev->in_flush_list);
UT_LIST_INSERT_AFTER(
list,
buf_pool->flush_list,
prev, b);
} else {
UT_LIST_ADD_FIRST(
list,
buf_pool->flush_list,
b);
}
/* Relocate on buf_pool->flush_list. */
buf_flush_relocate_on_flush_list(bpage, b);
}
bpage->zip.data = NULL;
@ -1642,6 +1682,7 @@ buf_LRU_block_remove_hashed_page(
ibool zip) /*!< in: TRUE if should remove also the
compressed page of an uncompressed page */
{
ulint fold;
const buf_page_t* hashed_bpage;
ut_ad(bpage);
ut_ad(buf_pool_mutex_own());
@ -1725,7 +1766,9 @@ buf_LRU_block_remove_hashed_page(
break;
}
hashed_bpage = buf_page_hash_get(bpage->space, bpage->offset);
fold = buf_page_address_fold(bpage->space, bpage->offset);
hashed_bpage = buf_page_hash_get_low(bpage->space, bpage->offset,
fold);
if (UNIV_UNLIKELY(bpage != hashed_bpage)) {
fprintf(stderr,
@ -1757,9 +1800,7 @@ buf_LRU_block_remove_hashed_page(
ut_ad(!bpage->in_zip_hash);
ut_ad(bpage->in_page_hash);
ut_d(bpage->in_page_hash = FALSE);
HASH_DELETE(buf_page_t, hash, buf_pool->page_hash,
buf_page_address_fold(bpage->space, bpage->offset),
bpage);
HASH_DELETE(buf_page_t, hash, buf_pool->page_hash, fold, bpage);
switch (buf_page_get_state(bpage)) {
case BUF_BLOCK_ZIP_PAGE:
ut_ad(!bpage->in_free_list);
@ -2036,6 +2077,7 @@ buf_LRU_print(void)
while (bpage != NULL) {
mutex_enter(buf_page_get_mutex(bpage));
fprintf(stderr, "BLOCK space %lu page %lu ",
(ulong) buf_page_get_space(bpage),
(ulong) buf_page_get_page_no(bpage));
@ -2084,6 +2126,7 @@ buf_LRU_print(void)
break;
}
mutex_exit(buf_page_get_mutex(bpage));
bpage = UT_LIST_GET_NEXT(LRU, bpage);
}

View file

@ -1,6 +1,6 @@
/*****************************************************************************
Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@ -608,14 +608,14 @@ buf_read_recv_pages(
while (buf_pool->n_pend_reads >= recv_n_pool_free_frames / 2) {
os_aio_simulated_wake_handler_threads();
os_thread_sleep(500000);
os_thread_sleep(10000);
count++;
if (count > 100) {
if (count > 1000) {
fprintf(stderr,
"InnoDB: Error: InnoDB has waited for"
" 50 seconds for pending\n"
" 10 seconds for pending\n"
"InnoDB: reads to the buffer pool to"
" be finished.\n"
"InnoDB: Number of pending reads %lu,"

View file

@ -1,6 +1,6 @@
/*****************************************************************************
Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@ -274,6 +274,9 @@ dict_boot(void)
and (TYPE & DICT_TF_FORMAT_MASK) are nonzero and TYPE = table->flags */
dict_mem_table_add_col(table, heap, "TYPE", DATA_INT, 0, 4);
dict_mem_table_add_col(table, heap, "MIX_ID", DATA_BINARY, 0, 0);
/* MIX_LEN may contain additional table flags when
ROW_FORMAT!=REDUNDANT. Currently, these flags include
DICT_TF2_TEMPORARY. */
dict_mem_table_add_col(table, heap, "MIX_LEN", DATA_INT, 0, 4);
dict_mem_table_add_col(table, heap, "CLUSTER_NAME", DATA_BINARY, 0, 0);
dict_mem_table_add_col(table, heap, "SPACE", DATA_INT, 0, 4);
@ -355,7 +358,7 @@ dict_boot(void)
dict_mem_table_add_col(table, heap, "SPACE", DATA_INT, 0, 4);
dict_mem_table_add_col(table, heap, "PAGE_NO", DATA_INT, 0, 4);
/* The '+ 2' below comes from the 2 system fields */
/* The '+ 2' below comes from the fields DB_TRX_ID, DB_ROLL_PTR */
#if DICT_SYS_INDEXES_PAGE_NO_FIELD != 6 + 2
#error "DICT_SYS_INDEXES_PAGE_NO_FIELD != 6 + 2"
#endif
@ -364,6 +367,9 @@ dict_boot(void)
#endif
#if DICT_SYS_INDEXES_TYPE_FIELD != 4 + 2
#error "DICT_SYS_INDEXES_TYPE_FIELD != 4 + 2"
#endif
#if DICT_SYS_INDEXES_NAME_FIELD != 1 + 2
#error "DICT_SYS_INDEXES_NAME_FIELD != 1 + 2"
#endif
table->id = DICT_INDEXES_ID;

View file

@ -1,6 +1,6 @@
/*****************************************************************************
Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@ -51,16 +51,18 @@ static
dtuple_t*
dict_create_sys_tables_tuple(
/*=========================*/
dict_table_t* table, /*!< in: table */
mem_heap_t* heap) /*!< in: memory heap from which the memory for
the built tuple is allocated */
const dict_table_t* table, /*!< in: table */
mem_heap_t* heap) /*!< in: memory heap from
which the memory for the built
tuple is allocated */
{
dict_table_t* sys_tables;
dtuple_t* entry;
dfield_t* dfield;
byte* ptr;
ut_ad(table && heap);
ut_ad(table);
ut_ad(heap);
sys_tables = dict_sys->sys_tables;
@ -69,18 +71,18 @@ dict_create_sys_tables_tuple(
dict_table_copy_types(entry, sys_tables);
/* 0: NAME -----------------------------*/
dfield = dtuple_get_nth_field(entry, 0);
dfield = dtuple_get_nth_field(entry, 0/*NAME*/);
dfield_set_data(dfield, table->name, ut_strlen(table->name));
/* 3: ID -------------------------------*/
dfield = dtuple_get_nth_field(entry, 1);
dfield = dtuple_get_nth_field(entry, 1/*ID*/);
ptr = mem_heap_alloc(heap, 8);
mach_write_to_8(ptr, table->id);
dfield_set_data(dfield, ptr, 8);
/* 4: N_COLS ---------------------------*/
dfield = dtuple_get_nth_field(entry, 2);
dfield = dtuple_get_nth_field(entry, 2/*N_COLS*/);
#if DICT_TF_COMPACT != 1
#error
@ -91,40 +93,41 @@ dict_create_sys_tables_tuple(
| ((table->flags & DICT_TF_COMPACT) << 31));
dfield_set_data(dfield, ptr, 4);
/* 5: TYPE -----------------------------*/
dfield = dtuple_get_nth_field(entry, 3);
dfield = dtuple_get_nth_field(entry, 3/*TYPE*/);
ptr = mem_heap_alloc(heap, 4);
if (table->flags & ~DICT_TF_COMPACT) {
if (table->flags & (~DICT_TF_COMPACT & ~(~0 << DICT_TF_BITS))) {
ut_a(table->flags & DICT_TF_COMPACT);
ut_a(dict_table_get_format(table) >= DICT_TF_FORMAT_ZIP);
ut_a((table->flags & DICT_TF_ZSSIZE_MASK)
<= (DICT_TF_ZSSIZE_MAX << DICT_TF_ZSSIZE_SHIFT));
ut_a(!(table->flags & (~0 << DICT_TF_BITS)));
mach_write_to_4(ptr, table->flags);
ut_a(!(table->flags & (~0 << DICT_TF2_BITS)));
mach_write_to_4(ptr, table->flags & ~(~0 << DICT_TF_BITS));
} else {
mach_write_to_4(ptr, DICT_TABLE_ORDINARY);
}
dfield_set_data(dfield, ptr, 4);
/* 6: MIX_ID (obsolete) ---------------------------*/
dfield = dtuple_get_nth_field(entry, 4);
dfield = dtuple_get_nth_field(entry, 4/*MIX_ID*/);
ptr = mem_heap_zalloc(heap, 8);
dfield_set_data(dfield, ptr, 8);
/* 7: MIX_LEN (obsolete) --------------------------*/
/* 7: MIX_LEN (additional flags) --------------------------*/
dfield = dtuple_get_nth_field(entry, 5);
dfield = dtuple_get_nth_field(entry, 5/*MIX_LEN*/);
ptr = mem_heap_zalloc(heap, 4);
ptr = mem_heap_alloc(heap, 4);
mach_write_to_4(ptr, table->flags >> DICT_TF2_SHIFT);
dfield_set_data(dfield, ptr, 4);
/* 8: CLUSTER_NAME ---------------------*/
dfield = dtuple_get_nth_field(entry, 6);
dfield = dtuple_get_nth_field(entry, 6/*CLUSTER_NAME*/);
dfield_set_null(dfield); /* not supported */
/* 9: SPACE ----------------------------*/
dfield = dtuple_get_nth_field(entry, 7);
dfield = dtuple_get_nth_field(entry, 7/*SPACE*/);
ptr = mem_heap_alloc(heap, 4);
mach_write_to_4(ptr, table->space);
@ -143,19 +146,21 @@ static
dtuple_t*
dict_create_sys_columns_tuple(
/*==========================*/
dict_table_t* table, /*!< in: table */
ulint i, /*!< in: column number */
mem_heap_t* heap) /*!< in: memory heap from which the memory for
the built tuple is allocated */
const dict_table_t* table, /*!< in: table */
ulint i, /*!< in: column number */
mem_heap_t* heap) /*!< in: memory heap from
which the memory for the built
tuple is allocated */
{
dict_table_t* sys_columns;
dtuple_t* entry;
const dict_col_t* column;
dfield_t* dfield;
byte* ptr;
const char* col_name;
const char* col_name;
ut_ad(table && heap);
ut_ad(table);
ut_ad(heap);
column = dict_table_get_nth_col(table, i);
@ -166,47 +171,47 @@ dict_create_sys_columns_tuple(
dict_table_copy_types(entry, sys_columns);
/* 0: TABLE_ID -----------------------*/
dfield = dtuple_get_nth_field(entry, 0);
dfield = dtuple_get_nth_field(entry, 0/*TABLE_ID*/);
ptr = mem_heap_alloc(heap, 8);
mach_write_to_8(ptr, table->id);
dfield_set_data(dfield, ptr, 8);
/* 1: POS ----------------------------*/
dfield = dtuple_get_nth_field(entry, 1);
dfield = dtuple_get_nth_field(entry, 1/*POS*/);
ptr = mem_heap_alloc(heap, 4);
mach_write_to_4(ptr, i);
dfield_set_data(dfield, ptr, 4);
/* 4: NAME ---------------------------*/
dfield = dtuple_get_nth_field(entry, 2);
dfield = dtuple_get_nth_field(entry, 2/*NAME*/);
col_name = dict_table_get_col_name(table, i);
dfield_set_data(dfield, col_name, ut_strlen(col_name));
/* 5: MTYPE --------------------------*/
dfield = dtuple_get_nth_field(entry, 3);
dfield = dtuple_get_nth_field(entry, 3/*MTYPE*/);
ptr = mem_heap_alloc(heap, 4);
mach_write_to_4(ptr, column->mtype);
dfield_set_data(dfield, ptr, 4);
/* 6: PRTYPE -------------------------*/
dfield = dtuple_get_nth_field(entry, 4);
dfield = dtuple_get_nth_field(entry, 4/*PRTYPE*/);
ptr = mem_heap_alloc(heap, 4);
mach_write_to_4(ptr, column->prtype);
dfield_set_data(dfield, ptr, 4);
/* 7: LEN ----------------------------*/
dfield = dtuple_get_nth_field(entry, 5);
dfield = dtuple_get_nth_field(entry, 5/*LEN*/);
ptr = mem_heap_alloc(heap, 4);
mach_write_to_4(ptr, column->len);
dfield_set_data(dfield, ptr, 4);
/* 8: PREC ---------------------------*/
dfield = dtuple_get_nth_field(entry, 6);
dfield = dtuple_get_nth_field(entry, 6/*PREC*/);
ptr = mem_heap_alloc(heap, 4);
mach_write_to_4(ptr, 0/* unused */);
@ -230,6 +235,7 @@ dict_build_table_def_step(
dict_table_t* table;
dtuple_t* row;
ulint error;
ulint flags;
const char* path_or_name;
ibool is_path;
mtr_t mtr;
@ -268,9 +274,10 @@ dict_build_table_def_step(
ut_ad(!dict_table_zip_size(table)
|| dict_table_get_format(table) >= DICT_TF_FORMAT_ZIP);
flags = table->flags & ~(~0 << DICT_TF_BITS);
error = fil_create_new_single_table_tablespace(
&space, path_or_name, is_path,
table->flags == DICT_TF_COMPACT ? 0 : table->flags,
flags == DICT_TF_COMPACT ? 0 : flags,
FIL_IBD_FILE_INITIAL_SIZE);
table->space = (unsigned int) space;
@ -286,7 +293,7 @@ dict_build_table_def_step(
mtr_commit(&mtr);
} else {
/* Create in the system tablespace: disallow new features */
table->flags &= DICT_TF_COMPACT;
table->flags &= (~0 << DICT_TF_BITS) | DICT_TF_COMPACT;
}
row = dict_create_sys_tables_tuple(table, node->heap);
@ -322,9 +329,10 @@ static
dtuple_t*
dict_create_sys_indexes_tuple(
/*==========================*/
dict_index_t* index, /*!< in: index */
mem_heap_t* heap) /*!< in: memory heap from which the memory for
the built tuple is allocated */
const dict_index_t* index, /*!< in: index */
mem_heap_t* heap) /*!< in: memory heap from
which the memory for the built
tuple is allocated */
{
dict_table_t* sys_indexes;
dict_table_t* table;
@ -333,7 +341,8 @@ dict_create_sys_indexes_tuple(
byte* ptr;
ut_ad(mutex_own(&(dict_sys->mutex)));
ut_ad(index && heap);
ut_ad(index);
ut_ad(heap);
sys_indexes = dict_sys->sys_indexes;
@ -344,32 +353,32 @@ dict_create_sys_indexes_tuple(
dict_table_copy_types(entry, sys_indexes);
/* 0: TABLE_ID -----------------------*/
dfield = dtuple_get_nth_field(entry, 0);
dfield = dtuple_get_nth_field(entry, 0/*TABLE_ID*/);
ptr = mem_heap_alloc(heap, 8);
mach_write_to_8(ptr, table->id);
dfield_set_data(dfield, ptr, 8);
/* 1: ID ----------------------------*/
dfield = dtuple_get_nth_field(entry, 1);
dfield = dtuple_get_nth_field(entry, 1/*ID*/);
ptr = mem_heap_alloc(heap, 8);
mach_write_to_8(ptr, index->id);
dfield_set_data(dfield, ptr, 8);
/* 4: NAME --------------------------*/
dfield = dtuple_get_nth_field(entry, 2);
dfield = dtuple_get_nth_field(entry, 2/*NAME*/);
dfield_set_data(dfield, index->name, ut_strlen(index->name));
/* 5: N_FIELDS ----------------------*/
dfield = dtuple_get_nth_field(entry, 3);
dfield = dtuple_get_nth_field(entry, 3/*N_FIELDS*/);
ptr = mem_heap_alloc(heap, 4);
mach_write_to_4(ptr, index->n_fields);
dfield_set_data(dfield, ptr, 4);
/* 6: TYPE --------------------------*/
dfield = dtuple_get_nth_field(entry, 4);
dfield = dtuple_get_nth_field(entry, 4/*TYPE*/);
ptr = mem_heap_alloc(heap, 4);
mach_write_to_4(ptr, index->type);
@ -381,7 +390,7 @@ dict_create_sys_indexes_tuple(
#error "DICT_SYS_INDEXES_SPACE_NO_FIELD != 7"
#endif
dfield = dtuple_get_nth_field(entry, 5);
dfield = dtuple_get_nth_field(entry, 5/*SPACE*/);
ptr = mem_heap_alloc(heap, 4);
mach_write_to_4(ptr, index->space);
@ -393,7 +402,7 @@ dict_create_sys_indexes_tuple(
#error "DICT_SYS_INDEXES_PAGE_NO_FIELD != 8"
#endif
dfield = dtuple_get_nth_field(entry, 6);
dfield = dtuple_get_nth_field(entry, 6/*PAGE_NO*/);
ptr = mem_heap_alloc(heap, 4);
mach_write_to_4(ptr, FIL_NULL);
@ -412,10 +421,11 @@ static
dtuple_t*
dict_create_sys_fields_tuple(
/*=========================*/
dict_index_t* index, /*!< in: index */
ulint i, /*!< in: field number */
mem_heap_t* heap) /*!< in: memory heap from which the memory for
the built tuple is allocated */
const dict_index_t* index, /*!< in: index */
ulint i, /*!< in: field number */
mem_heap_t* heap) /*!< in: memory heap from
which the memory for the built
tuple is allocated */
{
dict_table_t* sys_fields;
dtuple_t* entry;
@ -425,7 +435,8 @@ dict_create_sys_fields_tuple(
ibool index_contains_column_prefix_field = FALSE;
ulint j;
ut_ad(index && heap);
ut_ad(index);
ut_ad(heap);
for (j = 0; j < index->n_fields; j++) {
if (dict_index_get_nth_field(index, j)->prefix_len > 0) {
@ -443,7 +454,7 @@ dict_create_sys_fields_tuple(
dict_table_copy_types(entry, sys_fields);
/* 0: INDEX_ID -----------------------*/
dfield = dtuple_get_nth_field(entry, 0);
dfield = dtuple_get_nth_field(entry, 0/*INDEX_ID*/);
ptr = mem_heap_alloc(heap, 8);
mach_write_to_8(ptr, index->id);
@ -451,7 +462,7 @@ dict_create_sys_fields_tuple(
dfield_set_data(dfield, ptr, 8);
/* 1: POS + PREFIX LENGTH ----------------------------*/
dfield = dtuple_get_nth_field(entry, 1);
dfield = dtuple_get_nth_field(entry, 1/*POS*/);
ptr = mem_heap_alloc(heap, 4);
@ -471,7 +482,7 @@ dict_create_sys_fields_tuple(
dfield_set_data(dfield, ptr, 4);
/* 4: COL_NAME -------------------------*/
dfield = dtuple_get_nth_field(entry, 2);
dfield = dtuple_get_nth_field(entry, 2/*COL_NAME*/);
dfield_set_data(dfield, field->name,
ut_strlen(field->name));
@ -602,6 +613,7 @@ dict_create_index_tree_step(
dict_table_t* sys_indexes;
dict_table_t* table;
dtuple_t* search_tuple;
ulint zip_size;
btr_pcur_t pcur;
mtr_t mtr;
@ -626,8 +638,9 @@ dict_create_index_tree_step(
btr_pcur_move_to_next_user_rec(&pcur, &mtr);
node->page_no = btr_create(index->type, index->space,
dict_table_zip_size(index->table),
zip_size = dict_table_zip_size(index->table);
node->page_no = btr_create(index->type, index->space, zip_size,
index->id, index, &mtr);
/* printf("Created a new index tree in space %lu root page %lu\n",
index->space, index->page_no); */

View file

@ -1,6 +1,6 @@
/*****************************************************************************
Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@ -70,6 +70,17 @@ we need this; NOTE: a transaction which reserves this must keep book
on the mode in trx_struct::dict_operation_lock_mode */
UNIV_INTERN rw_lock_t dict_operation_lock;
/* Keys to register rwlocks and mutexes with performance schema */
#ifdef UNIV_PFS_RWLOCK
UNIV_INTERN mysql_pfs_key_t dict_operation_lock_key;
UNIV_INTERN mysql_pfs_key_t index_tree_rw_lock_key;
#endif /* UNIV_PFS_RWLOCK */
#ifdef UNIV_PFS_MUTEX
UNIV_INTERN mysql_pfs_key_t dict_sys_mutex_key;
UNIV_INTERN mysql_pfs_key_t dict_foreign_err_mutex_key;
#endif /* UNIV_PFS_MUTEX */
#define DICT_HEAP_SIZE 100 /*!< initial memory heap size when
creating a table or index object */
#define DICT_POOL_PER_TABLE_HASH 512 /*!< buffer pool max size per table
@ -140,7 +151,7 @@ static
void
dict_field_print_low(
/*=================*/
dict_field_t* field); /*!< in: field */
const dict_field_t* field); /*!< in: field */
/*********************************************************************//**
Frees a foreign key struct. */
static
@ -607,7 +618,7 @@ dict_init(void)
{
dict_sys = mem_alloc(sizeof(dict_sys_t));
mutex_create(&dict_sys->mutex, SYNC_DICT);
mutex_create(dict_sys_mutex_key, &dict_sys->mutex, SYNC_DICT);
dict_sys->table_hash = hash_create(buf_pool_get_curr_size()
/ (DICT_POOL_PER_TABLE_HASH
@ -619,12 +630,14 @@ dict_init(void)
UT_LIST_INIT(dict_sys->table_LRU);
rw_lock_create(&dict_operation_lock, SYNC_DICT_OPERATION);
rw_lock_create(dict_operation_lock_key,
&dict_operation_lock, SYNC_DICT_OPERATION);
dict_foreign_err_file = os_file_create_tmpfile();
ut_a(dict_foreign_err_file);
mutex_create(&dict_foreign_err_mutex, SYNC_ANY_LATCH);
mutex_create(dict_foreign_err_mutex_key,
&dict_foreign_err_mutex, SYNC_ANY_LATCH);
}
/**********************************************************************//**
@ -1460,6 +1473,7 @@ dict_index_add_to_cache(
if (!dict_index_find_cols(table, index)) {
dict_mem_index_free(index);
return(DB_CORRUPTION);
}
@ -1566,7 +1580,8 @@ undo_size_ok:
new_index->stat_n_leaf_pages = 1;
new_index->page = page_no;
rw_lock_create(&new_index->lock, SYNC_INDEX_TREE);
rw_lock_create(index_tree_rw_lock_key, &new_index->lock,
SYNC_INDEX_TREE);
if (!UNIV_UNLIKELY(new_index->type & DICT_UNIVERSAL)) {
@ -4402,7 +4417,7 @@ static
void
dict_field_print_low(
/*=================*/
dict_field_t* field) /*!< in: field */
const dict_field_t* field) /*!< in: field */
{
ut_ad(mutex_own(&(dict_sys->mutex)));
@ -4766,8 +4781,10 @@ UNIV_INTERN
void
dict_table_check_for_dup_indexes(
/*=============================*/
const dict_table_t* table) /*!< in: Check for dup indexes
const dict_table_t* table, /*!< in: Check for dup indexes
in this table */
ibool tmp_ok) /*!< in: TRUE=allow temporary
index names */
{
/* Check for duplicates, ignoring indexes that are marked
as to be dropped */
@ -4775,13 +4792,17 @@ dict_table_check_for_dup_indexes(
const dict_index_t* index1;
const dict_index_t* index2;
ut_ad(mutex_own(&dict_sys->mutex));
/* The primary index _must_ exist */
ut_a(UT_LIST_GET_LEN(table->indexes) > 0);
index1 = UT_LIST_GET_FIRST(table->indexes);
index2 = UT_LIST_GET_NEXT(indexes, index1);
while (index1 && index2) {
do {
ut_ad(tmp_ok || *index1->name != TEMP_INDEX_PREFIX);
index2 = UT_LIST_GET_NEXT(indexes, index1);
while (index2) {
@ -4793,8 +4814,7 @@ dict_table_check_for_dup_indexes(
}
index1 = UT_LIST_GET_NEXT(indexes, index1);
index2 = UT_LIST_GET_NEXT(indexes, index1);
}
} while (index1);
}
#endif /* UNIV_DEBUG */

View file

@ -1,6 +1,6 @@
/*****************************************************************************
Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@ -260,7 +260,7 @@ dict_sys_tables_get_flags(
return(0);
}
field = rec_get_nth_field_old(rec, 4, &len);
field = rec_get_nth_field_old(rec, 4/*N_COLS*/, &len);
n_cols = mach_read_from_4(field);
if (UNIV_UNLIKELY(!(n_cols & 0x80000000UL))) {
@ -390,15 +390,35 @@ loop:
mtr_commit(&mtr);
if (space_id != 0 && in_crash_recovery) {
if (space_id == 0) {
/* The system tablespace always exists. */
} else if (in_crash_recovery) {
/* Check that the tablespace (the .ibd file) really
exists; print a warning to the .err log if not */
exists; print a warning to the .err log if not.
Do not print warnings for temporary tables. */
ibool is_temp;
fil_space_for_table_exists_in_mem(space_id, name,
FALSE, TRUE, TRUE);
}
field = rec_get_nth_field_old(rec, 4, &len);
if (0x80000000UL & mach_read_from_4(field)) {
/* ROW_FORMAT=COMPACT: read the is_temp
flag from SYS_TABLES.MIX_LEN. */
field = rec_get_nth_field_old(rec, 7, &len);
is_temp = mach_read_from_4(field)
& DICT_TF2_TEMPORARY;
} else {
/* For tables created with old versions
of InnoDB, SYS_TABLES.MIX_LEN may contain
garbage. Such tables would always be
in ROW_FORMAT=REDUNDANT. Pretend that
all such tables are non-temporary. That is,
do not suppress error printouts about
temporary tables not being found. */
is_temp = FALSE;
}
if (space_id != 0 && !in_crash_recovery) {
fil_space_for_table_exists_in_mem(
space_id, name, is_temp, TRUE, !is_temp);
} else {
/* It is a normal database startup: create the space
object and check that the .ibd file exists. */
@ -894,31 +914,6 @@ err_exit:
(ulong) flags);
goto err_exit;
}
if (fil_space_for_table_exists_in_mem(space, name, FALSE,
FALSE, FALSE)) {
/* Ok; (if we did a crash recovery then the tablespace
can already be in the memory cache) */
} else {
/* In >= 4.1.9, InnoDB scans the data dictionary also
at a normal mysqld startup. It is an error if the
space object does not exist in memory. */
ut_print_timestamp(stderr);
fprintf(stderr,
" InnoDB: error: space object of table %s,\n"
"InnoDB: space id %lu did not exist in memory."
" Retrying an open.\n",
name, (ulong)space);
/* Try to open the tablespace */
if (!fil_open_single_table_tablespace(
TRUE, space, flags, name)) {
/* We failed to find a sensible tablespace
file */
ibd_file_missing = TRUE;
}
}
} else {
flags = 0;
}
@ -928,9 +923,63 @@ err_exit:
field = rec_get_nth_field_old(rec, 4, &len);
n_cols = mach_read_from_4(field);
/* The high-order bit of N_COLS is the "compact format" flag. */
/* The high-order bit of N_COLS is the "compact format" flag.
For tables in that format, MIX_LEN may hold additional flags. */
if (n_cols & 0x80000000UL) {
ulint flags2;
flags |= DICT_TF_COMPACT;
ut_a(name_of_col_is(sys_tables, sys_index, 7, "MIX_LEN"));
field = rec_get_nth_field_old(rec, 7, &len);
flags2 = mach_read_from_4(field);
if (flags2 & (~0 << (DICT_TF2_BITS - DICT_TF2_SHIFT))) {
ut_print_timestamp(stderr);
fputs(" InnoDB: Warning: table ", stderr);
ut_print_filename(stderr, name);
fprintf(stderr, "\n"
"InnoDB: in InnoDB data dictionary"
" has unknown flags %lx.\n",
(ulong) flags2);
flags2 &= ~(~0 << (DICT_TF2_BITS - DICT_TF2_SHIFT));
}
flags |= flags2 << DICT_TF2_SHIFT;
}
/* See if the tablespace is available. */
if (space == 0) {
/* The system tablespace is always available. */
} else if (!fil_space_for_table_exists_in_mem(
space, name,
(flags >> DICT_TF2_SHIFT) & DICT_TF2_TEMPORARY,
FALSE, FALSE)) {
if ((flags >> DICT_TF2_SHIFT) & DICT_TF2_TEMPORARY) {
/* Do not bother to retry opening temporary tables. */
ibd_file_missing = TRUE;
} else {
ut_print_timestamp(stderr);
fprintf(stderr,
" InnoDB: error: space object of table");
ut_print_filename(stderr, name);
fprintf(stderr, ",\n"
"InnoDB: space id %lu did not exist in memory."
" Retrying an open.\n",
(ulong) space);
/* Try to open the tablespace */
if (!fil_open_single_table_tablespace(
TRUE, space,
flags & ~(~0 << DICT_TF_BITS), name)) {
/* We failed to find a sensible
tablespace file */
ibd_file_missing = TRUE;
}
}
}
table = dict_mem_table_create(name, space, n_cols & ~0x80000000UL,

View file

@ -1,6 +1,6 @@
/*****************************************************************************
Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@ -40,6 +40,11 @@ Created 1/8/1996 Heikki Tuuri
#define DICT_HEAP_SIZE 100 /*!< initial memory heap size when
creating a table or index object */
#ifdef UNIV_PFS_MUTEX
/* Key to register autoinc_mutex with performance schema */
UNIV_INTERN mysql_pfs_key_t autoinc_mutex_key;
#endif /* UNIV_PFS_MUTEX */
/**********************************************************************//**
Creates a table memory object.
@return own: table object */
@ -59,7 +64,7 @@ dict_mem_table_create(
mem_heap_t* heap;
ut_ad(name);
ut_a(!(flags & (~0 << DICT_TF_BITS)));
ut_a(!(flags & (~0 << DICT_TF2_BITS)));
heap = mem_heap_create(DICT_HEAP_SIZE);
@ -78,7 +83,8 @@ dict_mem_table_create(
#ifndef UNIV_HOTBACKUP
table->autoinc_lock = mem_heap_alloc(heap, lock_get_size());
mutex_create(&table->autoinc_mutex, SYNC_DICT_AUTOINC_MUTEX);
mutex_create(autoinc_mutex_key,
&table->autoinc_mutex, SYNC_DICT_AUTOINC_MUTEX);
table->autoinc = 0;

View file

@ -1,6 +1,6 @@
/*****************************************************************************
Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@ -38,6 +38,7 @@ Created 10/25/1995 Heikki Tuuri
#include "mtr0mtr.h"
#include "mtr0log.h"
#include "dict0dict.h"
#include "page0page.h"
#include "page0zip.h"
#ifndef UNIV_HOTBACKUP
# include "buf0lru.h"
@ -120,6 +121,16 @@ UNIV_INTERN ulint fil_n_pending_tablespace_flushes = 0;
/** The null file address */
UNIV_INTERN fil_addr_t fil_addr_null = {FIL_NULL, 0};
#ifdef UNIV_PFS_MUTEX
/* Key to register fil_system_mutex with performance schema */
UNIV_INTERN mysql_pfs_key_t fil_system_mutex_key;
#endif /* UNIV_PFS_MUTEX */
#ifdef UNIV_PFS_RWLOCK
/* Key to register file space latch with performance schema */
UNIV_INTERN mysql_pfs_key_t fil_space_latch_key;
#endif /* UNIV_PFS_RWLOCK */
/** File node of a tablespace or the log data space */
struct fil_node_struct {
fil_space_t* space; /*!< backpointer to the space where this node
@ -648,7 +659,8 @@ fil_node_open_file(
async I/O! */
node->handle = os_file_create_simple_no_error_handling(
node->name, OS_FILE_OPEN, OS_FILE_READ_ONLY, &success);
innodb_file_data_key, node->name, OS_FILE_OPEN,
OS_FILE_READ_ONLY, &success);
if (!success) {
/* The following call prints an error message */
os_file_get_last_error(TRUE);
@ -766,15 +778,21 @@ add_size:
os_file_create() to fall back to the normal file I/O mode. */
if (space->purpose == FIL_LOG) {
node->handle = os_file_create(node->name, OS_FILE_OPEN,
OS_FILE_AIO, OS_LOG_FILE, &ret);
node->handle = os_file_create(innodb_file_log_key,
node->name, OS_FILE_OPEN,
OS_FILE_AIO, OS_LOG_FILE,
&ret);
} else if (node->is_raw_disk) {
node->handle = os_file_create(node->name,
node->handle = os_file_create(innodb_file_data_key,
node->name,
OS_FILE_OPEN_RAW,
OS_FILE_AIO, OS_DATA_FILE, &ret);
OS_FILE_AIO, OS_DATA_FILE,
&ret);
} else {
node->handle = os_file_create(node->name, OS_FILE_OPEN,
OS_FILE_AIO, OS_DATA_FILE, &ret);
node->handle = os_file_create(innodb_file_data_key,
node->name, OS_FILE_OPEN,
OS_FILE_AIO, OS_DATA_FILE,
&ret);
}
ut_a(ret);
@ -1097,10 +1115,13 @@ fil_space_create(
fil_space_t* space;
/* The tablespace flags (FSP_SPACE_FLAGS) should be 0 for
ROW_FORMAT=COMPACT (table->flags == DICT_TF_COMPACT) and
ROW_FORMAT=COMPACT
((table->flags & ~(~0 << DICT_TF_BITS)) == DICT_TF_COMPACT) and
ROW_FORMAT=REDUNDANT (table->flags == 0). For any other
format, the tablespace flags should equal table->flags. */
format, the tablespace flags should equal
(table->flags & ~(~0 << DICT_TF_BITS)). */
ut_a(flags != DICT_TF_COMPACT);
ut_a(!(flags & (~0UL << DICT_TF_BITS)));
try_again:
/*printf(
@ -1208,7 +1229,7 @@ try_again:
UT_LIST_INIT(space->chain);
space->magic_n = FIL_SPACE_MAGIC_N;
rw_lock_create(&space->latch, SYNC_FSP);
rw_lock_create(fil_space_latch_key, &space->latch, SYNC_FSP);
HASH_INSERT(fil_space_t, hash, fil_system->spaces, id, space);
@ -1510,7 +1531,8 @@ fil_init(
fil_system = mem_alloc(sizeof(fil_system_t));
mutex_create(&fil_system->mutex, SYNC_ANY_LATCH);
mutex_create(fil_system_mutex_key,
&fil_system->mutex, SYNC_ANY_LATCH);
fil_system->spaces = hash_create(hash_size);
fil_system->name_hash = hash_create(hash_size);
@ -2515,7 +2537,7 @@ retry:
success = fil_rename_tablespace_in_mem(space, node, path);
if (success) {
success = os_file_rename(old_path, path);
success = os_file_rename(innodb_file_data_key, old_path, path);
if (!success) {
/* We have to revert the changes we made
@ -2582,14 +2604,18 @@ fil_create_new_single_table_tablespace(
ut_a(size >= FIL_IBD_FILE_INITIAL_SIZE);
/* The tablespace flags (FSP_SPACE_FLAGS) should be 0 for
ROW_FORMAT=COMPACT (table->flags == DICT_TF_COMPACT) and
ROW_FORMAT=COMPACT
((table->flags & ~(~0 << DICT_TF_BITS)) == DICT_TF_COMPACT) and
ROW_FORMAT=REDUNDANT (table->flags == 0). For any other
format, the tablespace flags should equal table->flags. */
format, the tablespace flags should equal
(table->flags & ~(~0 << DICT_TF_BITS)). */
ut_a(flags != DICT_TF_COMPACT);
ut_a(!(flags & (~0UL << DICT_TF_BITS)));
path = fil_make_ibd_name(tablename, is_temp);
file = os_file_create(path, OS_FILE_CREATE, OS_FILE_NORMAL,
file = os_file_create(innodb_file_data_key, path,
OS_FILE_CREATE, OS_FILE_NORMAL,
OS_DATA_FILE, &ret);
if (ret == FALSE) {
ut_print_timestamp(stderr);
@ -2786,11 +2812,13 @@ fil_reset_too_high_lsns(
ib_int64_t offset;
ulint zip_size;
ibool success;
page_zip_des_t page_zip;
filepath = fil_make_ibd_name(name, FALSE);
file = os_file_create_simple_no_error_handling(
filepath, OS_FILE_OPEN, OS_FILE_READ_WRITE, &success);
innodb_file_data_key, filepath, OS_FILE_OPEN,
OS_FILE_READ_WRITE, &success);
if (!success) {
/* The following call prints an error message */
os_file_get_last_error(TRUE);
@ -2833,6 +2861,12 @@ fil_reset_too_high_lsns(
space_id = fsp_header_get_space_id(page);
zip_size = fsp_header_get_zip_size(page);
page_zip_des_init(&page_zip);
page_zip_set_size(&page_zip, zip_size);
if (zip_size) {
page_zip.data = page + UNIV_PAGE_SIZE;
}
ut_print_timestamp(stderr);
fprintf(stderr,
" InnoDB: Flush lsn in the tablespace file %lu"
@ -2867,20 +2901,23 @@ fil_reset_too_high_lsns(
/* We have to reset the lsn */
if (zip_size) {
memcpy(page + UNIV_PAGE_SIZE, page, zip_size);
memcpy(page_zip.data, page, zip_size);
buf_flush_init_for_writing(
page, page + UNIV_PAGE_SIZE,
current_lsn);
page, &page_zip, current_lsn);
success = os_file_write(
filepath, file, page_zip.data,
(ulint) offset & 0xFFFFFFFFUL,
(ulint) (offset >> 32), zip_size);
} else {
buf_flush_init_for_writing(
page, NULL, current_lsn);
success = os_file_write(
filepath, file, page,
(ulint)(offset & 0xFFFFFFFFUL),
(ulint)(offset >> 32),
UNIV_PAGE_SIZE);
}
success = os_file_write(filepath, file, page,
(ulint)(offset & 0xFFFFFFFFUL),
(ulint)(offset >> 32),
zip_size
? zip_size
: UNIV_PAGE_SIZE);
if (!success) {
goto func_exit;
@ -2956,13 +2993,17 @@ fil_open_single_table_tablespace(
filepath = fil_make_ibd_name(name, FALSE);
/* The tablespace flags (FSP_SPACE_FLAGS) should be 0 for
ROW_FORMAT=COMPACT (table->flags == DICT_TF_COMPACT) and
ROW_FORMAT=COMPACT
((table->flags & ~(~0 << DICT_TF_BITS)) == DICT_TF_COMPACT) and
ROW_FORMAT=REDUNDANT (table->flags == 0). For any other
format, the tablespace flags should equal table->flags. */
format, the tablespace flags should equal
(table->flags & ~(~0 << DICT_TF_BITS)). */
ut_a(flags != DICT_TF_COMPACT);
ut_a(!(flags & (~0UL << DICT_TF_BITS)));
file = os_file_create_simple_no_error_handling(
filepath, OS_FILE_OPEN, OS_FILE_READ_ONLY, &success);
innodb_file_data_key, filepath, OS_FILE_OPEN,
OS_FILE_READ_ONLY, &success);
if (!success) {
/* The following call prints an error message */
os_file_get_last_error(TRUE);
@ -3011,7 +3052,8 @@ fil_open_single_table_tablespace(
ut_free(buf2);
if (UNIV_UNLIKELY(space_id != id || space_flags != flags)) {
if (UNIV_UNLIKELY(space_id != id
|| space_flags != (flags & ~(~0 << DICT_TF_BITS)))) {
ut_print_timestamp(stderr);
fputs(" InnoDB: Error: tablespace id and flags in file ",
@ -3117,7 +3159,8 @@ fil_load_single_table_tablespace(
# endif /* !UNIV_HOTBACKUP */
#endif
file = os_file_create_simple_no_error_handling(
filepath, OS_FILE_OPEN, OS_FILE_READ_ONLY, &success);
innodb_file_data_key, filepath, OS_FILE_OPEN,
OS_FILE_READ_ONLY, &success);
if (!success) {
/* The following call prints an error message */
os_file_get_last_error(TRUE);
@ -3275,7 +3318,7 @@ fil_load_single_table_tablespace(
os_file_close(file);
new_path = fil_make_ibbackup_old_name(filepath);
ut_a(os_file_rename(filepath, new_path));
ut_a(os_file_rename(innodb_file_data_key, filepath, new_path));
ut_free(buf2);
mem_free(filepath);
@ -3313,7 +3356,7 @@ fil_load_single_table_tablespace(
mutex_exit(&fil_system->mutex);
ut_a(os_file_rename(filepath, new_path));
ut_a(os_file_rename(innodb_file_data_key, filepath, new_path));
ut_free(buf2);
mem_free(filepath);
@ -4435,11 +4478,14 @@ fil_aio_wait(
ut_ad(fil_validate());
if (os_aio_use_native_aio) {
if (srv_use_native_aio) {
srv_set_io_thread_op_info(segment, "native aio handle");
#ifdef WIN_ASYNC_IO
ret = os_aio_windows_handle(segment, 0, &fil_node,
&message, &type);
#elif defined(LINUX_NATIVE_AIO)
ret = os_aio_linux_handle(segment, &fil_node,
&message, &type);
#else
ret = 0; /* Eliminate compiler warning */
ut_error;
@ -4781,8 +4827,10 @@ void
fil_close(void)
/*===========*/
{
#ifndef UNIV_HOTBACKUP
/* The mutex should already have been freed. */
ut_ad(fil_system->mutex.magic_n == 0);
#endif /* !UNIV_HOTBACKUP */
hash_table_free(fil_system->spaces);

View file

@ -1,6 +1,6 @@
/*****************************************************************************
Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@ -386,11 +386,11 @@ UNIV_INLINE
ibool
xdes_get_bit(
/*=========*/
xdes_t* descr, /*!< in: descriptor */
ulint bit, /*!< in: XDES_FREE_BIT or XDES_CLEAN_BIT */
ulint offset, /*!< in: page offset within extent:
0 ... FSP_EXTENT_SIZE - 1 */
mtr_t* mtr) /*!< in: mtr */
const xdes_t* descr, /*!< in: descriptor */
ulint bit, /*!< in: XDES_FREE_BIT or XDES_CLEAN_BIT */
ulint offset, /*!< in: page offset within extent:
0 ... FSP_EXTENT_SIZE - 1 */
mtr_t* mtr) /*!< in: mtr */
{
ulint index;
ulint byte_index;
@ -527,8 +527,8 @@ UNIV_INLINE
ulint
xdes_get_n_used(
/*============*/
xdes_t* descr, /*!< in: descriptor */
mtr_t* mtr) /*!< in: mtr */
const xdes_t* descr, /*!< in: descriptor */
mtr_t* mtr) /*!< in: mtr */
{
ulint i;
ulint count = 0;
@ -551,8 +551,8 @@ UNIV_INLINE
ibool
xdes_is_free(
/*=========*/
xdes_t* descr, /*!< in: descriptor */
mtr_t* mtr) /*!< in: mtr */
const xdes_t* descr, /*!< in: descriptor */
mtr_t* mtr) /*!< in: mtr */
{
if (0 == xdes_get_n_used(descr, mtr)) {
@ -569,8 +569,8 @@ UNIV_INLINE
ibool
xdes_is_full(
/*=========*/
xdes_t* descr, /*!< in: descriptor */
mtr_t* mtr) /*!< in: mtr */
const xdes_t* descr, /*!< in: descriptor */
mtr_t* mtr) /*!< in: mtr */
{
if (FSP_EXTENT_SIZE == xdes_get_n_used(descr, mtr)) {
@ -586,7 +586,7 @@ UNIV_INLINE
void
xdes_set_state(
/*===========*/
xdes_t* descr, /*!< in: descriptor */
xdes_t* descr, /*!< in/out: descriptor */
ulint state, /*!< in: state to set */
mtr_t* mtr) /*!< in: mtr handle */
{
@ -605,8 +605,8 @@ UNIV_INLINE
ulint
xdes_get_state(
/*===========*/
xdes_t* descr, /*!< in: descriptor */
mtr_t* mtr) /*!< in: mtr handle */
const xdes_t* descr, /*!< in: descriptor */
mtr_t* mtr) /*!< in: mtr handle */
{
ulint state;
@ -705,7 +705,7 @@ UNIV_INLINE
xdes_t*
xdes_get_descriptor_with_space_hdr(
/*===============================*/
fsp_header_t* sp_header,/*!< in: space header, x-latched */
fsp_header_t* sp_header,/*!< in/out: space header, x-latched */
ulint space, /*!< in: space id */
ulint offset, /*!< in: page offset;
if equal to the free limit,
@ -869,9 +869,7 @@ fsp_init_file_page_low(
return;
}
#ifdef UNIV_BASIC_LOG_DEBUG
memset(page, 0xff, UNIV_PAGE_SIZE);
#endif
UNIV_MEM_INVALID(page, UNIV_PAGE_SIZE);
mach_write_to_4(page + FIL_PAGE_OFFSET, buf_block_get_page_no(block));
memset(page + FIL_PAGE_LSN, 0, 8);
mach_write_to_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID,
@ -1342,7 +1340,7 @@ fsp_fill_free_list(
descriptor page and ibuf bitmap page;
then we do not allocate more extents */
ulint space, /*!< in: space */
fsp_header_t* header, /*!< in: space header */
fsp_header_t* header, /*!< in/out: space header */
mtr_t* mtr) /*!< in: mtr */
{
ulint limit;

View file

@ -101,6 +101,8 @@ ha_clear(
ulint i;
ulint n;
ut_ad(table);
ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
#ifdef UNIV_SYNC_DEBUG
ut_ad(rw_lock_own(&btr_search_latch, RW_LOCK_EXCLUSIVE));
#endif /* UNIV_SYNC_DEBUG */
@ -146,7 +148,9 @@ ha_insert_for_fold_func(
ha_node_t* prev_node;
ulint hash;
ut_ad(table && data);
ut_ad(data);
ut_ad(table);
ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
ut_a(block->frame == page_align(data));
#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
@ -237,6 +241,8 @@ ha_delete_hash_node(
hash_table_t* table, /*!< in: hash table */
ha_node_t* del_node) /*!< in: node to be deleted */
{
ut_ad(table);
ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
# ifndef UNIV_HOTBACKUP
if (table->adaptive) {
@ -267,6 +273,8 @@ ha_search_and_update_if_found_func(
{
ha_node_t* node;
ut_ad(table);
ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
ASSERT_HASH_MUTEX_OWN(table, fold);
#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
ut_a(new_block->frame == page_align(new_data));
@ -304,6 +312,8 @@ ha_remove_all_nodes_to_page(
{
ha_node_t* node;
ut_ad(table);
ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
ASSERT_HASH_MUTEX_OWN(table, fold);
node = ha_chain_get_first(table, fold);
@ -353,6 +363,8 @@ ha_validate(
ibool ok = TRUE;
ulint i;
ut_ad(table);
ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
ut_a(start_index <= end_index);
ut_a(start_index < hash_get_n_cells(table));
ut_a(end_index < hash_get_n_cells(table));
@ -391,6 +403,8 @@ ha_print_info(
FILE* file, /*!< in: file where to print */
hash_table_t* table) /*!< in: hash table */
{
ut_ad(table);
ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
#ifdef UNIV_DEBUG
/* Some of the code here is disabled for performance reasons in production
builds, see http://bugs.mysql.com/36941 */

View file

@ -31,6 +31,11 @@ Created 5/20/1997 Heikki Tuuri
#include "mem0mem.h"
#ifndef UNIV_HOTBACKUP
# ifdef UNIV_PFS_MUTEX
UNIV_INTERN mysql_pfs_key_t hash_table_mutex_key;
# endif /* UNIV_PFS_MUTEX */
/************************************************************//**
Reserves the mutex for a fold value in a hash table. */
UNIV_INTERN
@ -119,7 +124,7 @@ hash_create(
table->heaps = NULL;
#endif /* !UNIV_HOTBACKUP */
table->heap = NULL;
table->magic_n = HASH_TABLE_MAGIC_N;
ut_d(table->magic_n = HASH_TABLE_MAGIC_N);
/* Initialize the cell array */
hash_table_clear(table);
@ -135,6 +140,8 @@ hash_table_free(
/*============*/
hash_table_t* table) /*!< in, own: hash table */
{
ut_ad(table);
ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
#ifndef UNIV_HOTBACKUP
ut_a(table->mutexes == NULL);
#endif /* !UNIV_HOTBACKUP */
@ -160,13 +167,16 @@ hash_create_mutexes_func(
{
ulint i;
ut_ad(table);
ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
ut_a(n_mutexes > 0);
ut_a(ut_is_2pow(n_mutexes));
table->mutexes = mem_alloc(n_mutexes * sizeof(mutex_t));
for (i = 0; i < n_mutexes; i++) {
mutex_create(table->mutexes + i, sync_level);
mutex_create(hash_table_mutex_key,
table->mutexes + i, sync_level);
}
table->n_mutexes = n_mutexes;

File diff suppressed because it is too large Load diff

View file

@ -1,6 +1,6 @@
/*****************************************************************************
Copyright (c) 2000, 2009, MySQL AB & Innobase Oy. All Rights Reserved.
Copyright (c) 2000, 2010, MySQL AB & Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@ -27,15 +27,31 @@ Place, Suite 330, Boston, MA 02111-1307 USA
#pragma interface /* gcc class implementation */
#endif
/* Structure defines translation table between mysql index and innodb
index structures */
typedef struct innodb_idx_translate_struct {
ulint index_count; /*!< number of valid index entries
in the index_mapping array */
ulint array_size; /*!< array size of index_mapping */
dict_index_t** index_mapping; /*!< index pointer array directly
maps to index in Innodb from MySQL
array index */
} innodb_idx_translate_t;
/** InnoDB table share */
typedef struct st_innobase_share {
THR_LOCK lock; /*!< MySQL lock protecting
this structure */
const char* table_name; /*!< InnoDB table name */
uint use_count; /*!< reference count,
incremented in get_share()
and decremented in free_share() */
void* table_name_hash;/*!< hash table chain node */
THR_LOCK lock; /*!< MySQL lock protecting
this structure */
const char* table_name; /*!< InnoDB table name */
uint use_count; /*!< reference count,
incremented in get_share()
and decremented in
free_share() */
void* table_name_hash;/*!< hash table chain node */
innodb_idx_translate_t idx_trans_tbl; /*!< index translation
table between MySQL and
Innodb */
} INNOBASE_SHARE;
@ -91,9 +107,8 @@ class ha_innobase: public handler
ulint innobase_reset_autoinc(ulonglong auto_inc);
ulint innobase_get_autoinc(ulonglong* value);
ulint innobase_update_autoinc(ulonglong auto_inc);
ulint innobase_initialize_autoinc();
void innobase_initialize_autoinc();
dict_index_t* innobase_get_index(uint keynr);
ulonglong innobase_get_int_col_max_value(const Field* field);
/* Init values for the class: */
public:

View file

@ -1,6 +1,6 @@
/*****************************************************************************
Copyright (c) 2005, 2009, Innobase Oy. All Rights Reserved.
Copyright (c) 2005, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@ -21,10 +21,8 @@ Place, Suite 330, Boston, MA 02111-1307 USA
Smart ALTER TABLE
*******************************************************/
#include <unireg.h>
#include <mysql_priv.h>
#include <mysqld_error.h>
#include <sql_lex.h> // SQLCOM_CREATE_INDEX
#include <mysql/innodb_priv.h>
extern "C" {
#include "log0log.h"
@ -231,9 +229,11 @@ static
int
innobase_check_index_keys(
/*======================*/
const KEY* key_info, /*!< in: Indexes to be created */
ulint num_of_keys) /*!< in: Number of indexes to
be created */
const KEY* key_info, /*!< in: Indexes to be
created */
ulint num_of_keys, /*!< in: Number of
indexes to be created */
const dict_table_t* table) /*!< in: Existing indexes */
{
ulint key_num;
@ -250,9 +250,22 @@ innobase_check_index_keys(
const KEY& key2 = key_info[i];
if (0 == strcmp(key.name, key2.name)) {
sql_print_error("InnoDB: key name `%s` appears"
" twice in CREATE INDEX\n",
key.name);
my_error(ER_WRONG_NAME_FOR_INDEX, MYF(0),
key.name);
return(ER_WRONG_NAME_FOR_INDEX);
}
}
/* Check that the same index name does not already exist. */
for (const dict_index_t* index
= dict_table_get_first_index(table);
index; index = dict_table_get_next_index(index)) {
if (0 == strcmp(key.name, index->name)) {
my_error(ER_WRONG_NAME_FOR_INDEX, MYF(0),
key.name);
return(ER_WRONG_NAME_FOR_INDEX);
}
@ -260,7 +273,7 @@ innobase_check_index_keys(
/* Check that MySQL does not try to create a column
prefix index field on an inappropriate data type and
that the same colum does not appear twice in the index. */
that the same column does not appear twice in the index. */
for (ulint i = 0; i < key.key_parts; i++) {
const KEY_PART_INFO& key_part1
@ -291,14 +304,8 @@ innobase_check_index_keys(
}
}
sql_print_error("InnoDB: MySQL is trying to"
" create a column prefix"
" index field on an"
" inappropriate data type."
" column `%s`,"
" index `%s`.\n",
field->field_name,
key.name);
my_error(ER_WRONG_KEY_COLUMN, MYF(0),
field->field_name);
return(ER_WRONG_KEY_COLUMN);
}
@ -311,11 +318,8 @@ innobase_check_index_keys(
continue;
}
sql_print_error("InnoDB: column `%s`"
" is not allowed to occur"
" twice in index `%s`.\n",
key_part1.field->field_name,
key.name);
my_error(ER_WRONG_KEY_COLUMN, MYF(0),
key_part1.field->field_name);
return(ER_WRONG_KEY_COLUMN);
}
}
@ -524,12 +528,14 @@ innobase_create_key_def(
key_info->name, "PRIMARY");
/* If there is a UNIQUE INDEX consisting entirely of NOT NULL
columns, MySQL will treat it as a PRIMARY KEY unless the
table already has one. */
columns and if the index does not contain column prefix(es)
(only prefix/part of the column is indexed), MySQL will treat the
index as a PRIMARY KEY unless the table already has one. */
if (!new_primary && (key_info->flags & HA_NOSAME)
&& (!(key_info->flags & HA_KEY_HAS_PART_KEY_SEG))
&& row_table_got_default_clust_index(table)) {
uint key_part = key_info->key_parts;
uint key_part = key_info->key_parts;
new_primary = TRUE;
@ -658,12 +664,18 @@ ha_innobase::add_index(
innodb_table = indexed_table
= dict_table_get(prebuilt->table->name, FALSE);
if (UNIV_UNLIKELY(!innodb_table)) {
error = HA_ERR_NO_SUCH_TABLE;
goto err_exit;
}
/* Check if the index name is reserved. */
if (innobase_index_name_is_reserved(trx, key_info, num_of_keys)) {
error = -1;
} else {
/* Check that index keys are sensible */
error = innobase_check_index_keys(key_info, num_of_keys);
error = innobase_check_index_keys(key_info, num_of_keys,
innodb_table);
}
if (UNIV_UNLIKELY(error)) {
@ -710,6 +722,8 @@ err_exit:
row_mysql_lock_data_dictionary(trx);
dict_locked = TRUE;
ut_d(dict_table_check_for_dup_indexes(innodb_table, FALSE));
/* If a new primary key is defined for the table we need
to drop the original table and rebuild all indexes. */
@ -742,6 +756,8 @@ err_exit:
user_thd);
}
ut_d(dict_table_check_for_dup_indexes(innodb_table,
FALSE));
row_mysql_unlock_data_dictionary(trx);
goto err_exit;
}
@ -766,6 +782,10 @@ err_exit:
ut_ad(error == DB_SUCCESS);
/* We will need to rebuild index translation table. Set
valid index entry count in the translation table to zero */
share->idx_trans_tbl.index_count = 0;
/* Commit the data dictionary transaction in order to release
the table locks on the system tables. This means that if
MySQL crashes while creating a new primary key inside
@ -801,18 +821,6 @@ err_exit:
index, num_of_idx, table);
error_handling:
#ifdef UNIV_DEBUG
/* TODO: At the moment we can't handle the following statement
in our debugging code below:
alter table t drop index b, add index (b);
The fix will have to parse the SQL and note that the index
being added has the same name as the one being dropped and
ignore that in the dup index check.*/
//dict_table_check_for_dup_indexes(prebuilt->table);
#endif
/* After an error, remove all those index definitions from the
dictionary which were defined. */
@ -824,6 +832,8 @@ error_handling:
row_mysql_lock_data_dictionary(trx);
dict_locked = TRUE;
ut_d(dict_table_check_for_dup_indexes(prebuilt->table, TRUE));
if (!new_primary) {
error = row_merge_rename_indexes(trx, indexed_table);
@ -910,6 +920,8 @@ convert_error:
trx_commit_for_mysql(prebuilt->trx);
}
ut_d(dict_table_check_for_dup_indexes(innodb_table, FALSE));
if (dict_locked) {
row_mysql_unlock_data_dictionary(trx);
}
@ -953,6 +965,7 @@ ha_innobase::prepare_drop_index(
/* Test and mark all the indexes to be dropped */
row_mysql_lock_data_dictionary(trx);
ut_d(dict_table_check_for_dup_indexes(prebuilt->table, FALSE));
/* Check that none of the indexes have previously been flagged
for deletion. */
@ -1118,6 +1131,7 @@ func_exit:
} while (index);
}
ut_d(dict_table_check_for_dup_indexes(prebuilt->table, FALSE));
row_mysql_unlock_data_dictionary(trx);
DBUG_RETURN(err);
@ -1164,6 +1178,7 @@ ha_innobase::final_drop_index(
prebuilt->table->flags, user_thd);
row_mysql_lock_data_dictionary(trx);
ut_d(dict_table_check_for_dup_indexes(prebuilt->table, FALSE));
if (UNIV_UNLIKELY(err)) {
@ -1200,11 +1215,12 @@ ha_innobase::final_drop_index(
ut_a(!index->to_be_dropped);
}
#ifdef UNIV_DEBUG
dict_table_check_for_dup_indexes(prebuilt->table);
#endif
/* We will need to rebuild index translation table. Set
valid index entry count in the translation table to zero */
share->idx_trans_tbl.index_count = 0;
func_exit:
ut_d(dict_table_check_for_dup_indexes(prebuilt->table, FALSE));
trx_commit_for_mysql(trx);
trx_commit_for_mysql(prebuilt->trx);
row_mysql_unlock_data_dictionary(trx);

View file

@ -23,8 +23,8 @@ InnoDB INFORMATION SCHEMA tables interface to MySQL.
Created July 18, 2007 Vasil Dimov
*******************************************************/
#include <mysql_priv.h>
#include <mysqld_error.h>
#include <sql_acl.h> // PROCESS_ACL
#include <m_ctype.h>
#include <hash.h>
@ -32,8 +32,7 @@ Created July 18, 2007 Vasil Dimov
#include <mysys_err.h>
#include <my_sys.h>
#include "i_s.h"
#include <sql_plugin.h>
#include <mysql/innodb_priv.h>
#include <mysql/plugin.h>
extern "C" {
#include "trx0i_s.h"

View file

@ -36,7 +36,7 @@ Created November 07, 2007 Vasil Dimov
#define MYSQL_SERVER
#endif /* MYSQL_SERVER */
#include <sql_priv.h>
#include <mysql_priv.h>
#include "mysql_addons.h"
#include "univ.i"

File diff suppressed because it is too large Load diff

View file

@ -1,6 +1,6 @@
/*****************************************************************************
Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
Copyright (c) 1994, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@ -68,19 +68,30 @@ enum btr_latch_mode {
BTR_MODIFY_PREV = 36
};
/* BTR_INSERT, BTR_DELETE and BTR_DELETE_MARK are mutually exclusive. */
/** If this is ORed to btr_latch_mode, it means that the search tuple
will be inserted to the index, at the searched position */
will be inserted to the index, at the searched position.
When the record is not in the buffer pool, try to use the insert buffer. */
#define BTR_INSERT 512
/** This flag ORed to btr_latch_mode says that we do the search in query
optimization */
#define BTR_ESTIMATE 1024
/** This flag ORed to btr_latch_mode says that we can ignore possible
/** This flag ORed to BTR_INSERT says that we can ignore possible
UNIQUE definition on secondary indexes when we decide if we can use
the insert buffer to speed up inserts */
#define BTR_IGNORE_SEC_UNIQUE 2048
/** Try to delete mark the record at the searched position using the
insert/delete buffer when the record is not in the buffer pool. */
#define BTR_DELETE_MARK 4096
/** Try to purge the record at the searched position using the insert/delete
buffer when the record is not in the buffer pool. */
#define BTR_DELETE 8192
/**************************************************************//**
Gets the root node of a tree and x-latches it.
@return root page, x-latched */
@ -193,6 +204,10 @@ btr_leaf_page_release(
mtr_t* mtr); /*!< in: mtr */
/**************************************************************//**
Gets the child node file address in a node pointer.
NOTE: the offsets array must contain all offsets for the record since
we read the last field according to offsets and assume that it contains
the child page number. In other words offsets must have been retrieved
with rec_get_offsets(n_fields=ULINT_UNDEFINED).
@return child node address */
UNIV_INLINE
ulint
@ -317,12 +332,16 @@ Inserts a data tuple to a tree on a non-leaf level. It is assumed
that mtr holds an x-latch on the tree. */
UNIV_INTERN
void
btr_insert_on_non_leaf_level(
/*=========================*/
btr_insert_on_non_leaf_level_func(
/*==============================*/
dict_index_t* index, /*!< in: index */
ulint level, /*!< in: level, must be > 0 */
dtuple_t* tuple, /*!< in: the record to be inserted */
const char* file, /*!< in: file name */
ulint line, /*!< in: line where called */
mtr_t* mtr); /*!< in: mtr */
# define btr_insert_on_non_leaf_level(i,l,t,m) \
btr_insert_on_non_leaf_level_func(i,l,t,__FILE__,__LINE__,m)
#endif /* !UNIV_HOTBACKUP */
/****************************************************************//**
Sets a record as the predefined minimum record. */

View file

@ -1,6 +1,6 @@
/*****************************************************************************
Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
Copyright (c) 1994, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@ -255,6 +255,10 @@ btr_page_set_prev(
/**************************************************************//**
Gets the child node file address in a node pointer.
NOTE: the offsets array must contain all offsets for the record since
we read the last field according to offsets and assume that it contains
the child page number. In other words offsets must have been retrieved
with rec_get_offsets(n_fields=ULINT_UNDEFINED).
@return child node address */
UNIV_INLINE
ulint

View file

@ -1,6 +1,6 @@
/*****************************************************************************
Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
Copyright (c) 1994, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@ -138,7 +138,8 @@ btr_cur_search_to_nth_level(
should always be made using PAGE_CUR_LE to
search the position! */
ulint latch_mode, /*!< in: BTR_SEARCH_LEAF, ..., ORed with
BTR_INSERT and BTR_ESTIMATE;
at most one of BTR_INSERT, BTR_DELETE_MARK,
BTR_DELETE, or BTR_ESTIMATE;
cursor->left_block is used to store a pointer
to the left neighbor page, in the cases
BTR_SEARCH_PREV and BTR_MODIFY_PREV;
@ -152,29 +153,39 @@ btr_cur_search_to_nth_level(
ulint has_search_latch,/*!< in: latch mode the caller
currently has on btr_search_latch:
RW_S_LATCH, or 0 */
const char* file, /*!< in: file name */
ulint line, /*!< in: line where called */
mtr_t* mtr); /*!< in: mtr */
/*****************************************************************//**
Opens a cursor at either end of an index. */
UNIV_INTERN
void
btr_cur_open_at_index_side(
/*=======================*/
btr_cur_open_at_index_side_func(
/*============================*/
ibool from_left, /*!< in: TRUE if open to the low end,
FALSE if to the high end */
dict_index_t* index, /*!< in: index */
ulint latch_mode, /*!< in: latch mode */
btr_cur_t* cursor, /*!< in: cursor */
const char* file, /*!< in: file name */
ulint line, /*!< in: line where called */
mtr_t* mtr); /*!< in: mtr */
#define btr_cur_open_at_index_side(f,i,l,c,m) \
btr_cur_open_at_index_side_func(f,i,l,c,__FILE__,__LINE__,m)
/**********************************************************************//**
Positions a cursor at a randomly chosen position within a B-tree. */
UNIV_INTERN
void
btr_cur_open_at_rnd_pos(
/*====================*/
btr_cur_open_at_rnd_pos_func(
/*=========================*/
dict_index_t* index, /*!< in: index */
ulint latch_mode, /*!< in: BTR_SEARCH_LEAF, ... */
btr_cur_t* cursor, /*!< in/out: B-tree cursor */
const char* file, /*!< in: file name */
ulint line, /*!< in: line where called */
mtr_t* mtr); /*!< in: mtr */
#define btr_cur_open_at_rnd_pos(i,l,c,m) \
btr_cur_open_at_rnd_pos_func(i,l,c,__FILE__,__LINE__,m)
/*************************************************************//**
Tries to perform an insert to a page in an index tree, next to cursor.
It is assumed that mtr holds an x-latch on the page. The operation does
@ -322,19 +333,6 @@ btr_cur_del_mark_set_sec_rec(
ibool val, /*!< in: value to set */
que_thr_t* thr, /*!< in: query thread */
mtr_t* mtr); /*!< in: mtr */
/***********************************************************//**
Clear a secondary index record's delete mark. This function is only
used by the insert buffer insert merge mechanism. */
UNIV_INTERN
void
btr_cur_del_unmark_for_ibuf(
/*========================*/
rec_t* rec, /*!< in/out: record to delete unmark */
page_zip_des_t* page_zip, /*!< in/out: compressed page
corresponding to rec, or NULL
when the tablespace is
uncompressed */
mtr_t* mtr); /*!< in: mtr */
/*************************************************************//**
Tries to compress a page of the tree if it seems useful. It is assumed
that mtr holds an x-latch on the tree and on the cursor page. To avoid
@ -586,7 +584,20 @@ btr_push_update_extern_fields(
const upd_t* update, /*!< in: update vector */
mem_heap_t* heap) /*!< in: memory heap */
__attribute__((nonnull));
/***********************************************************//**
Sets a secondary index record's delete mark to the given value. This
function is only used by the insert buffer merge mechanism. */
UNIV_INTERN
void
btr_cur_set_deleted_flag_for_ibuf(
/*==============================*/
rec_t* rec, /*!< in/out: record */
page_zip_des_t* page_zip, /*!< in/out: compressed page
corresponding to rec, or NULL
when the tablespace is
uncompressed */
ibool val, /*!< in: value to set */
mtr_t* mtr); /*!< in: mtr */
/*######################################################################*/
/** In the pessimistic delete, if the page data size drops below this
@ -618,8 +629,13 @@ enum btr_cur_method {
hash_node, and might be necessary to
update */
BTR_CUR_BINARY, /*!< success using the binary search */
BTR_CUR_INSERT_TO_IBUF /*!< performed the intended insert to
BTR_CUR_INSERT_TO_IBUF, /*!< performed the intended insert to
the insert buffer */
BTR_CUR_DEL_MARK_IBUF, /*!< performed the intended delete
mark in the insert/delete buffer */
BTR_CUR_DELETE_IBUF, /*!< performed the intended delete in
the insert/delete buffer */
BTR_CUR_DELETE_REF /*!< row_purge_poss_sec() failed */
};
/** The tree cursor: the definition appears here only for the compiler
@ -627,6 +643,7 @@ to know struct size! */
struct btr_cur_struct {
dict_index_t* index; /*!< index where positioned */
page_cur_t page_cur; /*!< page cursor */
purge_node_t* purge_node; /*!< purge node, for BTR_DELETE */
buf_block_t* left_block; /*!< this field is used to store
a pointer to the left neighbor
page, in the cases
@ -683,6 +700,23 @@ struct btr_cur_struct {
NULL */
ulint fold; /*!< fold value used in the search if
flag is BTR_CUR_HASH */
/*----- Delete buffering -------*/
ulint ibuf_cnt; /* in searches done on insert buffer
trees, this contains the "counter"
value (the first two bytes of the
fourth field) extracted from the
page above the leaf page, from the
father node pointer that pointed to
the leaf page. in other words, it
contains the minimum counter value
for records to be inserted on the
chosen leaf page. If for some reason
this can't be read, or if the search
ended on the leftmost leaf page in
the tree (in which case the father
node pointer had the 'minimum
record' flag set), this is
ULINT_UNDEFINED. */
/*------------------------------*/
/* @} */
btr_path_t* path_arr; /*!< in estimating the number of

View file

@ -1,6 +1,6 @@
/*****************************************************************************
Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@ -82,8 +82,8 @@ Initializes and opens a persistent cursor to an index tree. It should be
closed with btr_pcur_close. */
UNIV_INLINE
void
btr_pcur_open(
/*==========*/
btr_pcur_open_func(
/*===============*/
dict_index_t* index, /*!< in: index */
const dtuple_t* tuple, /*!< in: tuple on which search done */
ulint mode, /*!< in: PAGE_CUR_L, ...;
@ -94,14 +94,18 @@ btr_pcur_open(
record! */
ulint latch_mode,/*!< in: BTR_SEARCH_LEAF, ... */
btr_pcur_t* cursor, /*!< in: memory buffer for persistent cursor */
const char* file, /*!< in: file name */
ulint line, /*!< in: line where called */
mtr_t* mtr); /*!< in: mtr */
#define btr_pcur_open(i,t,md,l,c,m) \
btr_pcur_open_func(i,t,md,l,c,__FILE__,__LINE__,m)
/**************************************************************//**
Opens an persistent cursor to an index tree without initializing the
cursor. */
UNIV_INLINE
void
btr_pcur_open_with_no_init(
/*=======================*/
btr_pcur_open_with_no_init_func(
/*============================*/
dict_index_t* index, /*!< in: index */
const dtuple_t* tuple, /*!< in: tuple on which search done */
ulint mode, /*!< in: PAGE_CUR_L, ...;
@ -119,7 +123,12 @@ btr_pcur_open_with_no_init(
ulint has_search_latch,/*!< in: latch mode the caller
currently has on btr_search_latch:
RW_S_LATCH, or 0 */
const char* file, /*!< in: file name */
ulint line, /*!< in: line where called */
mtr_t* mtr); /*!< in: mtr */
#define btr_pcur_open_with_no_init(ix,t,md,l,cur,has,m) \
btr_pcur_open_with_no_init_func(ix,t,md,l,cur,has,__FILE__,__LINE__,m)
/*****************************************************************//**
Opens a persistent cursor at either end of an index. */
UNIV_INLINE
@ -160,8 +169,8 @@ before first in tree. The latching mode must be BTR_SEARCH_LEAF or
BTR_MODIFY_LEAF. */
UNIV_INTERN
void
btr_pcur_open_on_user_rec(
/*======================*/
btr_pcur_open_on_user_rec_func(
/*===========================*/
dict_index_t* index, /*!< in: index */
const dtuple_t* tuple, /*!< in: tuple on which search done */
ulint mode, /*!< in: PAGE_CUR_L, ... */
@ -169,17 +178,25 @@ btr_pcur_open_on_user_rec(
BTR_MODIFY_LEAF */
btr_pcur_t* cursor, /*!< in: memory buffer for persistent
cursor */
const char* file, /*!< in: file name */
ulint line, /*!< in: line where called */
mtr_t* mtr); /*!< in: mtr */
#define btr_pcur_open_on_user_rec(i,t,md,l,c,m) \
btr_pcur_open_on_user_rec_func(i,t,md,l,c,__FILE__,__LINE__,m)
/**********************************************************************//**
Positions a cursor at a randomly chosen position within a B-tree. */
UNIV_INLINE
void
btr_pcur_open_at_rnd_pos(
/*=====================*/
btr_pcur_open_at_rnd_pos_func(
/*==========================*/
dict_index_t* index, /*!< in: index */
ulint latch_mode, /*!< in: BTR_SEARCH_LEAF, ... */
btr_pcur_t* cursor, /*!< in/out: B-tree pcur */
const char* file, /*!< in: file name */
ulint line, /*!< in: line where called */
mtr_t* mtr); /*!< in: mtr */
#define btr_pcur_open_at_rnd_pos(i,l,c,m) \
btr_pcur_open_at_rnd_pos_func(i,l,c,__FILE__,__LINE__,m)
/**************************************************************//**
Frees the possible old_rec_buf buffer of a persistent cursor and sets the
latch mode of the persistent cursor to BTR_NO_LATCHES. */
@ -218,11 +235,15 @@ record and it can be restored on a user record whose ordering fields
are identical to the ones of the original user record */
UNIV_INTERN
ibool
btr_pcur_restore_position(
/*======================*/
btr_pcur_restore_position_func(
/*===========================*/
ulint latch_mode, /*!< in: BTR_SEARCH_LEAF, ... */
btr_pcur_t* cursor, /*!< in: detached persistent cursor */
const char* file, /*!< in: file name */
ulint line, /*!< in: line where called */
mtr_t* mtr); /*!< in: mtr */
#define btr_pcur_restore_position(l,cur,mtr) \
btr_pcur_restore_position_func(l,cur,__FILE__,__LINE__,mtr)
/**************************************************************//**
If the latch mode of the cursor is BTR_LEAF_SEARCH or BTR_LEAF_MODIFY,
releases the page latch and bufferfix reserved by the cursor.
@ -260,20 +281,13 @@ btr_pcur_get_mtr(
/*=============*/
btr_pcur_t* cursor); /*!< in: persistent cursor */
/**************************************************************//**
Commits the pcur mtr and sets the pcur latch mode to BTR_NO_LATCHES,
Commits the mtr and sets the pcur latch mode to BTR_NO_LATCHES,
that is, the cursor becomes detached. If there have been modifications
to the page where pcur is positioned, this can be used instead of
btr_pcur_release_leaf. Function btr_pcur_store_position should be used
before calling this, if restoration of cursor is wanted later. */
UNIV_INLINE
void
btr_pcur_commit(
/*============*/
btr_pcur_t* pcur); /*!< in: persistent cursor */
/**************************************************************//**
Differs from btr_pcur_commit in that we can specify the mtr to commit. */
UNIV_INLINE
void
btr_pcur_commit_specify_mtr(
/*========================*/
btr_pcur_t* pcur, /*!< in: persistent cursor */

View file

@ -1,6 +1,6 @@
/*****************************************************************************
Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@ -395,30 +395,13 @@ btr_pcur_move_to_next(
}
/**************************************************************//**
Commits the pcur mtr and sets the pcur latch mode to BTR_NO_LATCHES,
Commits the mtr and sets the pcur latch mode to BTR_NO_LATCHES,
that is, the cursor becomes detached. If there have been modifications
to the page where pcur is positioned, this can be used instead of
btr_pcur_release_leaf. Function btr_pcur_store_position should be used
before calling this, if restoration of cursor is wanted later. */
UNIV_INLINE
void
btr_pcur_commit(
/*============*/
btr_pcur_t* pcur) /*!< in: persistent cursor */
{
ut_a(pcur->pos_state == BTR_PCUR_IS_POSITIONED);
pcur->latch_mode = BTR_NO_LATCHES;
mtr_commit(pcur->mtr);
pcur->pos_state = BTR_PCUR_WAS_POSITIONED;
}
/**************************************************************//**
Differs from btr_pcur_commit in that we can specify the mtr to commit. */
UNIV_INLINE
void
btr_pcur_commit_specify_mtr(
/*========================*/
btr_pcur_t* pcur, /*!< in: persistent cursor */
@ -483,8 +466,8 @@ Initializes and opens a persistent cursor to an index tree. It should be
closed with btr_pcur_close. */
UNIV_INLINE
void
btr_pcur_open(
/*==========*/
btr_pcur_open_func(
/*===============*/
dict_index_t* index, /*!< in: index */
const dtuple_t* tuple, /*!< in: tuple on which search done */
ulint mode, /*!< in: PAGE_CUR_L, ...;
@ -495,6 +478,8 @@ btr_pcur_open(
record! */
ulint latch_mode,/*!< in: BTR_SEARCH_LEAF, ... */
btr_pcur_t* cursor, /*!< in: memory buffer for persistent cursor */
const char* file, /*!< in: file name */
ulint line, /*!< in: line where called */
mtr_t* mtr) /*!< in: mtr */
{
btr_cur_t* btr_cursor;
@ -511,7 +496,7 @@ btr_pcur_open(
btr_cursor = btr_pcur_get_btr_cur(cursor);
btr_cur_search_to_nth_level(index, 0, tuple, mode, latch_mode,
btr_cursor, 0, mtr);
btr_cursor, 0, file, line, mtr);
cursor->pos_state = BTR_PCUR_IS_POSITIONED;
cursor->trx_if_known = NULL;
@ -522,8 +507,8 @@ Opens an persistent cursor to an index tree without initializing the
cursor. */
UNIV_INLINE
void
btr_pcur_open_with_no_init(
/*=======================*/
btr_pcur_open_with_no_init_func(
/*============================*/
dict_index_t* index, /*!< in: index */
const dtuple_t* tuple, /*!< in: tuple on which search done */
ulint mode, /*!< in: PAGE_CUR_L, ...;
@ -541,6 +526,8 @@ btr_pcur_open_with_no_init(
ulint has_search_latch,/*!< in: latch mode the caller
currently has on btr_search_latch:
RW_S_LATCH, or 0 */
const char* file, /*!< in: file name */
ulint line, /*!< in: line where called */
mtr_t* mtr) /*!< in: mtr */
{
btr_cur_t* btr_cursor;
@ -553,7 +540,8 @@ btr_pcur_open_with_no_init(
btr_cursor = btr_pcur_get_btr_cur(cursor);
btr_cur_search_to_nth_level(index, 0, tuple, mode, latch_mode,
btr_cursor, has_search_latch, mtr);
btr_cursor, has_search_latch,
file, line, mtr);
cursor->pos_state = BTR_PCUR_IS_POSITIONED;
cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
@ -600,11 +588,13 @@ btr_pcur_open_at_index_side(
Positions a cursor at a randomly chosen position within a B-tree. */
UNIV_INLINE
void
btr_pcur_open_at_rnd_pos(
/*=====================*/
btr_pcur_open_at_rnd_pos_func(
/*==========================*/
dict_index_t* index, /*!< in: index */
ulint latch_mode, /*!< in: BTR_SEARCH_LEAF, ... */
btr_pcur_t* cursor, /*!< in/out: B-tree pcur */
const char* file, /*!< in: file name */
ulint line, /*!< in: line where called */
mtr_t* mtr) /*!< in: mtr */
{
/* Initialize the cursor */
@ -614,8 +604,9 @@ btr_pcur_open_at_rnd_pos(
btr_pcur_init(cursor);
btr_cur_open_at_rnd_pos(index, latch_mode,
btr_pcur_get_btr_cur(cursor), mtr);
btr_cur_open_at_rnd_pos_func(index, latch_mode,
btr_pcur_get_btr_cur(cursor),
file, line, mtr);
cursor->pos_state = BTR_PCUR_IS_POSITIONED;
cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;

View file

@ -1,6 +1,6 @@
/*****************************************************************************
Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@ -34,6 +34,7 @@ Created 11/5/1995 Heikki Tuuri
#include "ut0byte.h"
#include "page0types.h"
#ifndef UNIV_HOTBACKUP
#include "ut0rbt.h"
#include "os0proc.h"
/** @name Modes for buf_page_get_gen */
@ -46,6 +47,10 @@ Created 11/5/1995 Heikki Tuuri
it is error-prone programming
not to set a latch, and it
should be used with care */
#define BUF_GET_IF_IN_POOL_OR_WATCH 15
/*!< Get the page only if it's in the
buffer pool, if not then set a watch
on the page. */
/* @} */
/** @name Modes for buf_page_get_known_nowait */
/* @{ */
@ -81,6 +86,8 @@ The enumeration values must be 0..7. */
enum buf_page_state {
BUF_BLOCK_ZIP_FREE = 0, /*!< contains a free
compressed page */
BUF_BLOCK_POOL_WATCH = 0, /*!< a sentinel for the buffer pool
watch, element of buf_pool_watch[] */
BUF_BLOCK_ZIP_PAGE, /*!< contains a clean
compressed page */
BUF_BLOCK_ZIP_DIRTY, /*!< contains a compressed
@ -202,20 +209,14 @@ with care. */
#define buf_page_get_with_no_latch(SP, ZS, OF, MTR) buf_page_get_gen(\
SP, ZS, OF, RW_NO_LATCH, NULL,\
BUF_GET_NO_LATCH, __FILE__, __LINE__, MTR)
/**************************************************************//**
NOTE! The following macros should be used instead of
buf_page_optimistic_get_func, to improve debugging. Only values RW_S_LATCH and
RW_X_LATCH are allowed as LA! */
#define buf_page_optimistic_get(LA, BL, MC, MTR) \
buf_page_optimistic_get_func(LA, BL, MC, __FILE__, __LINE__, MTR)
/********************************************************************//**
This is the general function used to get optimistic access to a database
page.
@return TRUE if success */
UNIV_INTERN
ibool
buf_page_optimistic_get_func(
/*=========================*/
buf_page_optimistic_get(
/*====================*/
ulint rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH */
buf_block_t* block, /*!< in: guessed block */
ib_uint64_t modify_clock,/*!< in: modify clock value if mode is
@ -291,7 +292,8 @@ buf_page_get_gen(
ulint rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH */
buf_block_t* guess, /*!< in: guessed block or NULL */
ulint mode, /*!< in: BUF_GET, BUF_GET_IF_IN_POOL,
BUF_GET_NO_LATCH */
BUF_GET_NO_LATCH or
BUF_GET_IF_IN_POOL_OR_WATCH */
const char* file, /*!< in: file name */
ulint line, /*!< in: line where called */
mtr_t* mtr); /*!< in: mini-transaction */
@ -341,9 +343,8 @@ void
buf_page_release(
/*=============*/
buf_block_t* block, /*!< in: buffer block */
ulint rw_latch, /*!< in: RW_S_LATCH, RW_X_LATCH,
ulint rw_latch); /*!< in: RW_S_LATCH, RW_X_LATCH,
RW_NO_LATCH */
mtr_t* mtr); /*!< in: mtr */
/********************************************************************//**
Moves a page to the start of the buffer pool LRU list. This high-level
function can be used to prevent an important page from slipping out of
@ -995,6 +996,16 @@ Returns the control block of a file page, NULL if not found.
@return block, NULL if not found */
UNIV_INLINE
buf_page_t*
buf_page_hash_get_low(
/*==================*/
ulint space, /*!< in: space id */
ulint offset, /*!< in: offset of the page within space */
ulint fold); /*!< in: buf_page_address_fold(space, offset) */
/******************************************************************//**
Returns the control block of a file page, NULL if not found.
@return block, NULL if not found or not a real control block */
UNIV_INLINE
buf_page_t*
buf_page_hash_get(
/*==============*/
ulint space, /*!< in: space id */
@ -1016,8 +1027,49 @@ UNIV_INTERN
ulint
buf_get_free_list_len(void);
/*=======================*/
#endif /* !UNIV_HOTBACKUP */
/********************************************************************
Determine if a block is a sentinel for a buffer pool watch.
@return TRUE if a sentinel for a buffer pool watch, FALSE if not */
UNIV_INTERN
ibool
buf_pool_watch_is(
/*==============*/
const buf_page_t* bpage) /*!< in: block */
__attribute__((nonnull, warn_unused_result));
/****************************************************************//**
Add watch for the given page to be read in. Caller must have the buffer pool
@return NULL if watch set, block if the page is in the buffer pool */
UNIV_INTERN
buf_page_t*
buf_pool_watch_set(
/*===============*/
ulint space, /*!< in: space id */
ulint offset, /*!< in: page number */
ulint fold) /*!< in: buf_page_address_fold(space, offset) */
__attribute__((warn_unused_result));
/****************************************************************//**
Stop watching if the page has been read in.
buf_pool_watch_set(space,offset) must have returned NULL before. */
UNIV_INTERN
void
buf_pool_watch_unset(
/*=================*/
ulint space, /*!< in: space id */
ulint offset);/*!< in: page number */
/****************************************************************//**
Check if the page has been read in.
This may only be called after buf_pool_watch_set(space,offset)
has returned NULL and before invoking buf_pool_watch_unset(space,offset).
@return FALSE if the given page was not read in, TRUE if it was */
UNIV_INTERN
ibool
buf_pool_watch_occurred(
/*====================*/
ulint space, /*!< in: space id */
ulint offset) /*!< in: page number */
__attribute__((warn_unused_result));
#endif /* !UNIV_HOTBACKUP */
/** The common buffer control block structure
for compressed and uncompressed frames */
@ -1057,7 +1109,10 @@ struct buf_page_struct{
#endif /* !UNIV_HOTBACKUP */
page_zip_des_t zip; /*!< compressed page; zip.data
(but not the data it points to) is
also protected by buf_pool_mutex */
also protected by buf_pool_mutex;
state == BUF_BLOCK_ZIP_PAGE and
zip.data == NULL means an active
buf_pool_watch */
#ifndef UNIV_HOTBACKUP
buf_page_t* hash; /*!< node used in chaining to
buf_pool->page_hash or
@ -1073,8 +1128,9 @@ struct buf_page_struct{
UT_LIST_NODE_T(buf_page_t) list;
/*!< based on state, this is a
list node, protected only by
buf_pool_mutex, in one of the
list node, protected either by
buf_pool_mutex or by
flush_list_mutex, in one of the
following lists in buf_pool:
- BUF_BLOCK_NOT_USED: free
@ -1083,6 +1139,12 @@ struct buf_page_struct{
- BUF_BLOCK_ZIP_PAGE: zip_clean
- BUF_BLOCK_ZIP_FREE: zip_free[]
If bpage is part of flush_list
then the node pointers are
covered by flush_list_mutex.
Otherwise these pointers are
protected by buf_pool_mutex.
The contents of the list node
is undefined if !in_flush_list
&& state == BUF_BLOCK_FILE_PAGE,
@ -1093,10 +1155,15 @@ struct buf_page_struct{
#ifdef UNIV_DEBUG
ibool in_flush_list; /*!< TRUE if in buf_pool->flush_list;
when buf_pool_mutex is free, the
when flush_list_mutex is free, the
following should hold: in_flush_list
== (state == BUF_BLOCK_FILE_PAGE
|| state == BUF_BLOCK_ZIP_DIRTY) */
|| state == BUF_BLOCK_ZIP_DIRTY)
Writes to this field must be
covered by both block->mutex
and flush_list_mutex. Hence
reads can happen while holding
any one of the two mutexes */
ibool in_free_list; /*!< TRUE if in buf_pool->free; when
buf_pool_mutex is free, the following
should hold: in_free_list
@ -1106,7 +1173,8 @@ struct buf_page_struct{
/*!< log sequence number of
the youngest modification to
this block, zero if not
modified */
modified. Protected by block
mutex */
ib_uint64_t oldest_modification;
/*!< log sequence number of
the START of the log entry
@ -1114,7 +1182,12 @@ struct buf_page_struct{
modification to this block
which has not yet been flushed
on disk; zero if all
modifications are on disk */
modifications are on disk.
Writes to this field must be
covered by both block->mutex
and flush_list_mutex. Hence
reads can happen while holding
any one of the two mutexes */
/* @} */
/** @name LRU replacement algorithm fields
These fields are protected by buf_pool_mutex only (not
@ -1185,15 +1258,21 @@ struct buf_block_struct{
rw_lock_t lock; /*!< read-write lock of the buffer
frame */
unsigned lock_hash_val:32;/*!< hashed value of the page address
in the record lock hash table */
unsigned check_index_page_at_flush:1;
in the record lock hash table;
protected by buf_block_t::lock
(or buf_block_t::mutex, buf_pool_mutex
in buf_page_get_gen(),
buf_page_init_for_read()
and buf_page_create()) */
ibool check_index_page_at_flush;
/*!< TRUE if we know that this is
an index page, and want the database
to check its consistency before flush;
note that there may be pages in the
buffer pool which are index pages,
but this flag is not set because
we do not keep track of all pages */
we do not keep track of all pages;
NOT protected by any mutex */
/* @} */
/** @name Optimistic search field */
/* @{ */
@ -1346,6 +1425,21 @@ struct buf_pool_struct{
/* @{ */
mutex_t flush_list_mutex;/*!< mutex protecting the
flush list access. This mutex
protects flush_list, flush_rbt
and bpage::list pointers when
the bpage is on flush_list. It
also protects writes to
bpage::oldest_modification */
mutex_t flush_order_mutex;/*!< mutex to serialize access to
the flush list when we are putting
dirty blocks in the list. The idea
behind this mutex is to be able
to release log_sys->mutex during
mtr_commit and still ensure that
insertions in the flush_list happen
in the LSN order. */
UT_LIST_BASE_NODE_T(buf_page_t) flush_list;
/*!< base node of the modified block
list */
@ -1359,6 +1453,20 @@ struct buf_pool_struct{
/*!< this is in the set state
when there is no flush batch
of the given type running */
ib_rbt_t* flush_rbt; /*!< a red-black tree is used
exclusively during recovery to
speed up insertions in the
flush_list. This tree contains
blocks in order of
oldest_modification LSN and is
kept in sync with the
flush_list.
Each member of the tree MUST
also be on the flush_list.
This tree is relevant only in
recovery and is set to NULL
once the recovery is over.
Protected by flush_list_mutex */
ulint freed_page_clock;/*!< a sequence number used
to count the number of buffer
blocks removed from the end of
@ -1372,8 +1480,8 @@ struct buf_pool_struct{
this is incremented by one; this is
set to zero when a buffer block is
allocated */
/* @} */
/** @name LRU replacement algorithm fields */
/* @{ */
@ -1439,6 +1547,31 @@ Use these instead of accessing buf_pool_mutex directly. */
mutex_enter(&buf_pool_mutex); \
} while (0)
/** Test if flush list mutex is owned. */
#define buf_flush_list_mutex_own() mutex_own(&buf_pool->flush_list_mutex)
/** Acquire the flush list mutex. */
#define buf_flush_list_mutex_enter() do { \
mutex_enter(&buf_pool->flush_list_mutex); \
} while (0)
/** Release the flush list mutex. */
# define buf_flush_list_mutex_exit() do { \
mutex_exit(&buf_pool->flush_list_mutex); \
} while (0)
/** Test if flush order mutex is owned. */
#define buf_flush_order_mutex_own() mutex_own(&buf_pool->flush_order_mutex)
/** Acquire the flush order mutex. */
#define buf_flush_order_mutex_enter() do { \
mutex_enter(&buf_pool->flush_order_mutex); \
} while (0)
/** Release the flush order mutex. */
# define buf_flush_order_mutex_exit() do { \
mutex_exit(&buf_pool->flush_order_mutex); \
} while (0)
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
/** Flag to forbid the release of the buffer pool mutex.
Protected by buf_pool_mutex. */

View file

@ -1,6 +1,6 @@
/*****************************************************************************
Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved.
Copyright (c) 2008, Google Inc.
Portions of this file contain modifications contributed and copyrighted by
@ -81,7 +81,7 @@ buf_page_peek_if_too_old(
unsigned access_time = buf_page_is_accessed(bpage);
if (access_time > 0
&& (ut_time_ms() - access_time)
&& ((ib_uint32_t) (ut_time_ms() - access_time))
>= buf_LRU_old_threshold_ms) {
return(TRUE);
}
@ -121,7 +121,7 @@ buf_pool_get_oldest_modification(void)
buf_page_t* bpage;
ib_uint64_t lsn;
buf_pool_mutex_enter();
buf_flush_list_mutex_enter();
bpage = UT_LIST_GET_LAST(buf_pool->flush_list);
@ -132,7 +132,7 @@ buf_pool_get_oldest_modification(void)
lsn = bpage->oldest_modification;
}
buf_pool_mutex_exit();
buf_flush_list_mutex_exit();
/* The returned answer may be out of date: the flush_list can
change after the mutex has been released. */
@ -705,6 +705,12 @@ buf_block_get_lock_hash_val(
/*========================*/
const buf_block_t* block) /*!< in: block */
{
ut_ad(block);
ut_ad(buf_page_in_file(&block->page));
#ifdef UNIV_SYNC_DEBUG
ut_ad(rw_lock_own(&(((buf_block_t*) block)->lock), RW_LOCK_EXCLUSIVE)
|| rw_lock_own(&(((buf_block_t*) block)->lock), RW_LOCK_SHARED));
#endif /* UNIV_SYNC_DEBUG */
return(block->lock_hash_val);
}
@ -902,21 +908,20 @@ Returns the control block of a file page, NULL if not found.
@return block, NULL if not found */
UNIV_INLINE
buf_page_t*
buf_page_hash_get(
/*==============*/
buf_page_hash_get_low(
/*==================*/
ulint space, /*!< in: space id */
ulint offset) /*!< in: offset of the page within space */
ulint offset, /*!< in: offset of the page within space */
ulint fold) /*!< in: buf_page_address_fold(space, offset) */
{
buf_page_t* bpage;
ulint fold;
ut_ad(buf_pool);
ut_ad(buf_pool_mutex_own());
ut_ad(fold == buf_page_address_fold(space, offset));
/* Look for the page in the hash table */
fold = buf_page_address_fold(space, offset);
HASH_SEARCH(hash, buf_pool->page_hash, fold, buf_page_t*, bpage,
ut_ad(bpage->in_page_hash && !bpage->in_zip_hash
&& buf_page_in_file(bpage)),
@ -931,6 +936,26 @@ buf_page_hash_get(
return(bpage);
}
/******************************************************************//**
Returns the control block of a file page, NULL if not found.
@return block, NULL if not found or not a real control block */
UNIV_INLINE
buf_page_t*
buf_page_hash_get(
/*==============*/
ulint space, /*!< in: space id */
ulint offset) /*!< in: offset of the page within space */
{
ulint fold = buf_page_address_fold(space, offset);
buf_page_t* bpage = buf_page_hash_get_low(space, offset, fold);
if (bpage && UNIV_UNLIKELY(buf_pool_watch_is(bpage))) {
bpage = NULL;
}
return(bpage);
}
/******************************************************************//**
Returns the control block of a file page, NULL if not found
or an uncompressed page frame does not exist.
@ -942,7 +967,11 @@ buf_block_hash_get(
ulint space, /*!< in: space id */
ulint offset) /*!< in: offset of the page within space */
{
return(buf_page_get_block(buf_page_hash_get(space, offset)));
buf_block_t* block;
block = buf_page_get_block(buf_page_hash_get(space, offset));
return(block);
}
/********************************************************************//**
@ -1018,21 +1047,14 @@ void
buf_page_release(
/*=============*/
buf_block_t* block, /*!< in: buffer block */
ulint rw_latch, /*!< in: RW_S_LATCH, RW_X_LATCH,
ulint rw_latch) /*!< in: RW_S_LATCH, RW_X_LATCH,
RW_NO_LATCH */
mtr_t* mtr) /*!< in: mtr */
{
ut_ad(block);
ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
ut_a(block->page.buf_fix_count > 0);
if (rw_latch == RW_X_LATCH && mtr->modifications) {
buf_pool_mutex_enter();
buf_flush_note_modification(block, mtr);
buf_pool_mutex_exit();
}
mutex_enter(&block->mutex);
#ifdef UNIV_SYNC_DEBUG

View file

@ -1,6 +1,6 @@
/*****************************************************************************
Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@ -39,6 +39,16 @@ void
buf_flush_remove(
/*=============*/
buf_page_t* bpage); /*!< in: pointer to the block in question */
/*******************************************************************//**
Relocates a buffer control block on the flush_list.
Note that it is assumed that the contents of bpage has already been
copied to dpage. */
UNIV_INTERN
void
buf_flush_relocate_on_flush_list(
/*=============================*/
buf_page_t* bpage, /*!< in/out: control block being moved */
buf_page_t* dpage); /*!< in/out: destination block */
/********************************************************************//**
Updates the flush system data structures when a write is completed. */
UNIV_INTERN
@ -175,6 +185,22 @@ buf_flush_validate(void);
/*====================*/
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
/********************************************************************//**
Initialize the red-black tree to speed up insertions into the flush_list
during recovery process. Should be called at the start of recovery
process before any page has been read/written. */
UNIV_INTERN
void
buf_flush_init_flush_rbt(void);
/*==========================*/
/********************************************************************//**
Frees up the red-black tree. */
UNIV_INTERN
void
buf_flush_free_flush_rbt(void);
/*==========================*/
/** When buf_flush_free_margin is called, it tries to make this many blocks
available to replacement in the free list and at the end of the LRU list (to
make sure that a read-ahead batch can be read efficiently in a single

View file

@ -33,7 +33,8 @@ UNIV_INTERN
void
buf_flush_insert_into_flush_list(
/*=============================*/
buf_block_t* block); /*!< in/out: block which is modified */
buf_block_t* block, /*!< in/out: block which is modified */
ib_uint64_t lsn); /*!< in: oldest modification */
/********************************************************************//**
Inserts a modified block into the flush list in the right sorted position.
This function is used by recovery, because there the modifications do not
@ -42,7 +43,8 @@ UNIV_INTERN
void
buf_flush_insert_sorted_into_flush_list(
/*====================================*/
buf_block_t* block); /*!< in/out: block which is modified */
buf_block_t* block, /*!< in/out: block which is modified */
ib_uint64_t lsn); /*!< in: oldest modification */
/********************************************************************//**
This function should be called at a mini-transaction commit, if a page was
@ -61,24 +63,27 @@ buf_flush_note_modification(
#ifdef UNIV_SYNC_DEBUG
ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX));
#endif /* UNIV_SYNC_DEBUG */
ut_ad(buf_pool_mutex_own());
ut_ad(!buf_pool_mutex_own());
ut_ad(!buf_flush_list_mutex_own());
ut_ad(buf_flush_order_mutex_own());
ut_ad(mtr->start_lsn != 0);
ut_ad(mtr->modifications);
mutex_enter(&block->mutex);
ut_ad(block->page.newest_modification <= mtr->end_lsn);
block->page.newest_modification = mtr->end_lsn;
if (!block->page.oldest_modification) {
block->page.oldest_modification = mtr->start_lsn;
ut_ad(block->page.oldest_modification != 0);
buf_flush_insert_into_flush_list(block);
buf_flush_insert_into_flush_list(block, mtr->start_lsn);
} else {
ut_ad(block->page.oldest_modification <= mtr->start_lsn);
}
mutex_exit(&block->mutex);
++srv_buf_pool_write_requests;
}
@ -101,23 +106,23 @@ buf_flush_recv_note_modification(
ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX));
#endif /* UNIV_SYNC_DEBUG */
buf_pool_mutex_enter();
ut_ad(!buf_pool_mutex_own());
ut_ad(!buf_flush_list_mutex_own());
ut_ad(buf_flush_order_mutex_own());
ut_ad(start_lsn != 0);
ut_ad(block->page.newest_modification <= end_lsn);
mutex_enter(&block->mutex);
block->page.newest_modification = end_lsn;
if (!block->page.oldest_modification) {
block->page.oldest_modification = start_lsn;
ut_ad(block->page.oldest_modification != 0);
buf_flush_insert_sorted_into_flush_list(block);
buf_flush_insert_sorted_into_flush_list(block, start_lsn);
} else {
ut_ad(block->page.oldest_modification <= start_lsn);
}
buf_pool_mutex_exit();
mutex_exit(&block->mutex);
}
#endif /* !UNIV_HOTBACKUP */

View file

@ -1,6 +1,6 @@
/*****************************************************************************
Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@ -285,6 +285,10 @@ dtype_new_store_for_order_and_null_size(
#endif
ulint len;
ut_ad(type);
ut_ad(type->mtype >= DATA_VARCHAR);
ut_ad(type->mtype <= DATA_MYSQL);
buf[0] = (byte)(type->mtype & 0xFFUL);
if (type->prtype & DATA_BINARY_TYPE) {

View file

@ -93,6 +93,13 @@ enum db_err {
DB_PRIMARY_KEY_IS_NULL, /* a column in the PRIMARY KEY
was found to be NULL */
DB_STATS_DO_NOT_EXIST, /* an operation that requires the
persistent storage, used for recording
table and index statistics, was
requested but this storage does not
exist itself or the stats for a given
table do not exist */
/* The following are partial failure codes */
DB_FAIL = 1000,
DB_OVERFLOW,

View file

@ -1,6 +1,6 @@
/*****************************************************************************
Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@ -137,6 +137,7 @@ clustered index */
#define DICT_SYS_INDEXES_PAGE_NO_FIELD 8
#define DICT_SYS_INDEXES_SPACE_NO_FIELD 7
#define DICT_SYS_INDEXES_TYPE_FIELD 6
#define DICT_SYS_INDEXES_NAME_FIELD 3
/* When a row id which is zero modulo this number (which must be a power of
two) is assigned, the field DICT_HDR_ROW_ID on the dictionary header page is

View file

@ -928,9 +928,10 @@ UNIV_INTERN
void
dict_table_check_for_dup_indexes(
/*=============================*/
const dict_table_t* table); /*!< in: Check for dup indexes
const dict_table_t* table, /*!< in: Check for dup indexes
in this table */
ibool tmp_ok);/*!< in: TRUE=allow temporary
index names */
#endif /* UNIV_DEBUG */
/**********************************************************************//**
Builds a node pointer out of a physical record and a page number.

View file

@ -1,6 +1,6 @@
/*****************************************************************************
Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@ -80,21 +80,39 @@ combination of types */
/** File format */
/* @{ */
#define DICT_TF_FORMAT_SHIFT 5 /* file format */
#define DICT_TF_FORMAT_MASK (127 << DICT_TF_FORMAT_SHIFT)
#define DICT_TF_FORMAT_MASK \
((~(~0 << (DICT_TF_BITS - DICT_TF_FORMAT_SHIFT))) << DICT_TF_FORMAT_SHIFT)
#define DICT_TF_FORMAT_51 0 /*!< InnoDB/MySQL up to 5.1 */
#define DICT_TF_FORMAT_ZIP 1 /*!< InnoDB plugin for 5.1:
compressed tables,
new BLOB treatment */
/** Maximum supported file format */
#define DICT_TF_FORMAT_MAX DICT_TF_FORMAT_ZIP
/* @} */
#define DICT_TF_BITS 6 /*!< number of flag bits */
#if (1 << (DICT_TF_BITS - DICT_TF_FORMAT_SHIFT)) <= DICT_TF_FORMAT_MAX
# error "DICT_TF_BITS is insufficient for DICT_TF_FORMAT_MAX"
#endif
/* @} */
/** @brief Additional table flags.
These flags will be stored in SYS_TABLES.MIX_LEN. All unused flags
will be written as 0. The column may contain garbage for tables
created with old versions of InnoDB that only implemented
ROW_FORMAT=REDUNDANT. */
/* @{ */
#define DICT_TF2_SHIFT DICT_TF_BITS
/*!< Shift value for
table->flags. */
#define DICT_TF2_TEMPORARY 1 /*!< TRUE for tables from
CREATE TEMPORARY TABLE. */
#define DICT_TF2_BITS (DICT_TF2_SHIFT + 1)
/*!< Total number of bits
in table->flags. */
/* @} */
/**********************************************************************//**
Creates a table memory object.
@return own: table object */
@ -374,7 +392,7 @@ struct dict_table_struct{
unsigned space:32;
/*!< space where the clustered index of the
table is placed */
unsigned flags:DICT_TF_BITS;/*!< DICT_TF_COMPACT, ... */
unsigned flags:DICT_TF2_BITS;/*!< DICT_TF_COMPACT, ... */
unsigned ibd_file_missing:1;
/*!< TRUE if this is in a single-table
tablespace and the .ibd file is missing; then

View file

@ -1,6 +1,6 @@
/*****************************************************************************
Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@ -26,13 +26,13 @@ Created 10/25/1995 Heikki Tuuri
#ifndef fil0fil_h
#define fil0fil_h
#include "univ.i"
#ifndef UNIV_HOTBACKUP
#include "sync0rw.h"
#endif /* !UNIV_HOTBACKUP */
#include "dict0types.h"
#include "ut0byte.h"
#include "os0file.h"
#ifndef UNIV_HOTBACKUP
#include "sync0rw.h"
#include "ibuf0types.h"
#endif /* !UNIV_HOTBACKUP */
/** When mysqld is run, the default directory "." is the mysqld datadir,
but in the MySQL Embedded Server Library and ibbackup it is not the default
@ -110,9 +110,10 @@ extern fil_addr_t fil_addr_null;
contents of this field is valid
for all uncompressed pages. */
#define FIL_PAGE_FILE_FLUSH_LSN 26 /*!< this is only defined for the
first page in a data file: the file
has been flushed to disk at least up
to this lsn */
first page in a system tablespace
data file (ibdata*, not *.ibd):
the file has been flushed to disk
at least up to this lsn */
#define FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID 34 /*!< starting from 4.1.x this
contains the space id of the page */
#define FIL_PAGE_DATA 38 /*!< start of the data on the page */

View file

@ -1,6 +1,6 @@
/*****************************************************************************
Copyright (c) 2006, 2009, Innobase Oy. All Rights Reserved.
Copyright (c) 2006, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software

View file

@ -1,6 +1,6 @@
/*****************************************************************************
Copyright (c) 2005, 2009, Innobase Oy. All Rights Reserved.
Copyright (c) 2005, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software

View file

@ -434,11 +434,12 @@ struct hash_table_struct {
these heaps */
#endif /* !UNIV_HOTBACKUP */
mem_heap_t* heap;
#ifdef UNIV_DEBUG
ulint magic_n;
# define HASH_TABLE_MAGIC_N 76561114
#endif /* UNIV_DEBUG */
};
#define HASH_TABLE_MAGIC_N 76561114
#ifndef UNIV_NONINL
#include "hash0hash.ic"
#endif

View file

@ -35,6 +35,8 @@ hash_get_nth_cell(
hash_table_t* table, /*!< in: hash table */
ulint n) /*!< in: cell index */
{
ut_ad(table);
ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
ut_ad(n < table->n_cells);
return(table->array + n);
@ -48,6 +50,8 @@ hash_table_clear(
/*=============*/
hash_table_t* table) /*!< in/out: hash table */
{
ut_ad(table);
ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
memset(table->array, 0x0,
table->n_cells * sizeof(*table->array));
}
@ -61,6 +65,8 @@ hash_get_n_cells(
/*=============*/
hash_table_t* table) /*!< in: table */
{
ut_ad(table);
ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
return(table->n_cells);
}
@ -74,6 +80,8 @@ hash_calc_hash(
ulint fold, /*!< in: folded value */
hash_table_t* table) /*!< in: hash table */
{
ut_ad(table);
ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
return(ut_hash_ulint(fold, table->n_cells));
}
@ -88,6 +96,8 @@ hash_get_mutex_no(
hash_table_t* table, /*!< in: hash table */
ulint fold) /*!< in: fold */
{
ut_ad(table);
ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
ut_ad(ut_is_2pow(table->n_mutexes));
return(ut_2pow_remainder(hash_calc_hash(fold, table),
table->n_mutexes));
@ -103,6 +113,8 @@ hash_get_nth_heap(
hash_table_t* table, /*!< in: hash table */
ulint i) /*!< in: index of the heap */
{
ut_ad(table);
ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
ut_ad(i < table->n_mutexes);
return(table->heaps[i]);
@ -120,6 +132,9 @@ hash_get_heap(
{
ulint i;
ut_ad(table);
ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
if (table->heap) {
return(table->heap);
}
@ -139,6 +154,8 @@ hash_get_nth_mutex(
hash_table_t* table, /*!< in: hash table */
ulint i) /*!< in: index of the mutex */
{
ut_ad(table);
ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
ut_ad(i < table->n_mutexes);
return(table->mutexes + i);
@ -156,6 +173,9 @@ hash_get_mutex(
{
ulint i;
ut_ad(table);
ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
i = hash_get_mutex_no(table, fold);
return(hash_get_nth_mutex(table, i));

View file

@ -35,12 +35,27 @@ Created 7/19/1997 Heikki Tuuri
#ifndef UNIV_HOTBACKUP
# include "ibuf0types.h"
/* Possible operations buffered in the insert/whatever buffer. See
ibuf_insert(). DO NOT CHANGE THE VALUES OF THESE, THEY ARE STORED ON DISK. */
typedef enum {
IBUF_OP_INSERT = 0,
IBUF_OP_DELETE_MARK = 1,
IBUF_OP_DELETE = 2,
/* Number of different operation types. */
IBUF_OP_COUNT = 3,
} ibuf_op_t;
/** Combinations of operations that can be buffered. Because the enum
values are used for indexing innobase_change_buffering_values[], they
should start at 0 and there should not be any gaps. */
typedef enum {
IBUF_USE_NONE = 0,
IBUF_USE_INSERT, /* insert */
IBUF_USE_DELETE_MARK, /* delete */
IBUF_USE_INSERT_DELETE_MARK, /* insert+delete */
IBUF_USE_DELETE, /* delete+purge */
IBUF_USE_ALL, /* insert+delete+purge */
IBUF_USE_COUNT /* number of entries in ibuf_use_t */
} ibuf_use_t;
@ -72,8 +87,7 @@ separately committed mini-transaction, because in crash recovery, the
free bits could momentarily be set too high. */
/******************************************************************//**
Creates the insert buffer data structure at a database startup and
initializes the data structures for the insert buffer of each tablespace. */
Creates the insert buffer data structure at a database startup. */
UNIV_INTERN
void
ibuf_init_at_db_start(void);
@ -243,14 +257,15 @@ void
ibuf_free_excess_pages(void);
/*========================*/
/*********************************************************************//**
Makes an index insert to the insert buffer, instead of directly to the disk
page, if this is possible. Does not do insert if the index is clustered
or unique.
Buffer an operation in the insert/delete buffer, instead of doing it
directly to the disk page, if this is possible. Does not do it if the index
is clustered or unique.
@return TRUE if success */
UNIV_INTERN
ibool
ibuf_insert(
/*========*/
ibuf_op_t op, /*!< in: operation type */
const dtuple_t* entry, /*!< in: index entry to insert */
dict_index_t* index, /*!< in: index where to insert */
ulint space, /*!< in: space id where to insert */
@ -259,11 +274,11 @@ ibuf_insert(
que_thr_t* thr); /*!< in: query thread */
/*********************************************************************//**
When an index page is read from a disk to the buffer pool, this function
inserts to the page the possible index entries buffered in the insert buffer.
The entries are deleted from the insert buffer. If the page is not read, but
created in the buffer pool, this function deletes its buffered entries from
the insert buffer; there can exist entries for such a page if the page
belonged to an index which subsequently was dropped. */
applies any buffered operations to the page and deletes the entries from the
insert buffer. If the page is not read, but created in the buffer pool, this
function deletes its buffered entries from the insert buffer; there can
exist entries for such a page if the page belonged to an index which
subsequently was dropped. */
UNIV_INTERN
void
ibuf_merge_or_delete_for_page(
@ -356,6 +371,15 @@ void
ibuf_print(
/*=======*/
FILE* file); /*!< in: file where to print */
/********************************************************************
Read the first two bytes from a record's fourth field (counter field in new
records; something else in older records).
@return "counter" field, or ULINT_UNDEFINED if for some reason it can't be read */
UNIV_INTERN
ulint
ibuf_rec_get_counter(
/*=================*/
const rec_t* rec); /*!< in: ibuf record */
/******************************************************************//**
Closes insert buffer and frees the data structures. */
UNIV_INTERN

View file

@ -55,10 +55,15 @@ struct ibuf_struct{
ulint height; /*!< tree height */
dict_index_t* index; /*!< insert buffer index */
ulint n_inserts; /*!< number of inserts made to
the insert buffer */
ulint n_merges; /*!< number of pages merged */
ulint n_merged_recs; /*!< number of records merged */
ulint n_merged_ops[IBUF_OP_COUNT];
/*!< number of operations of each type
merged to index pages */
ulint n_discarded_ops[IBUF_OP_COUNT];
/*!< number of operations of each type
discarded without merging due to the
tablespace being deleted or the
index being dropped */
};
/************************************************************************//**

View file

@ -1,6 +1,6 @@
/*****************************************************************************
Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@ -613,13 +613,16 @@ lock_rec_print(
FILE* file, /*!< in: file where to print */
const lock_t* lock); /*!< in: record type lock */
/*********************************************************************//**
Prints info of locks for all transactions. */
Prints info of locks for all transactions.
@return FALSE if not able to obtain kernel mutex
and exits without printing info */
UNIV_INTERN
void
ibool
lock_print_info_summary(
/*====================*/
FILE* file); /*!< in: file where to print */
/*********************************************************************//**
FILE* file, /*!< in: file where to print */
ibool nowait);/*!< in: whether to wait for the kernel mutex */
/*************************************************************************
Prints info of locks for each transaction. */
UNIV_INTERN
void

View file

@ -1,23 +1,6 @@
/*****************************************************************************
Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
Place, Suite 330, Boston, MA 02111-1307 USA
*****************************************************************************/
/*****************************************************************************
Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved.
Copyright (c) 2009, Google Inc.
Portions of this file contain modifications contributed and copyrighted by
@ -825,7 +808,17 @@ struct log_struct{
written to some log group; for this to
be advanced, it is enough that the
write i/o has been completed for all
log groups */
log groups.
Note that since InnoDB currently
has only one log group therefore
this value is redundant. Also it
is possible that this value
falls behind the
flushed_to_disk_lsn transiently.
It is appropriate to use either
flushed_to_disk_lsn or
write_lsn which are always
up-to-date and accurate. */
ib_uint64_t write_lsn; /*!< end lsn for the current running
write */
ulint write_end_offset;/*!< the data in buffer has

View file

@ -1,6 +1,6 @@
/*****************************************************************************
Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@ -314,12 +314,15 @@ log_reserve_and_write_fast(
ulint data_len;
#ifdef UNIV_LOG_LSN_DEBUG
/* length of the LSN pseudo-record */
ulint lsn_len = 1
+ mach_get_compressed_size(log_sys->lsn >> 32)
+ mach_get_compressed_size(log_sys->lsn & 0xFFFFFFFFUL);
ulint lsn_len;
#endif /* UNIV_LOG_LSN_DEBUG */
mutex_enter(&log_sys->mutex);
#ifdef UNIV_LOG_LSN_DEBUG
lsn_len = 1
+ mach_get_compressed_size(log_sys->lsn >> 32)
+ mach_get_compressed_size(log_sys->lsn & 0xFFFFFFFFUL);
#endif /* UNIV_LOG_LSN_DEBUG */
data_len = len
#ifdef UNIV_LOG_LSN_DEBUG

View file

@ -1,6 +1,6 @@
/*****************************************************************************
Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
Copyright (c) 1997, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@ -176,6 +176,12 @@ UNIV_INTERN
void
recv_recovery_from_checkpoint_finish(void);
/*======================================*/
/********************************************************//**
Initiates the rollback of active transactions. */
UNIV_INTERN
void
recv_recovery_rollback_active(void);
/*===============================*/
/*******************************************************//**
Scans log from a buffer and stores new log data to the parsing buffer.
Parses and hashes the log records if new data found. Unless
@ -258,12 +264,14 @@ void
recv_sys_init(
/*==========*/
ulint available_memory); /*!< in: available memory in bytes */
#ifndef UNIV_HOTBACKUP
/********************************************************//**
Reset the state of the recovery system variables. */
UNIV_INTERN
void
recv_sys_var_init(void);
/*===================*/
#endif /* !UNIV_HOTBACKUP */
/*******************************************************************//**
Empties the hash table of stored log records, applying them to appropriate
pages. */

View file

@ -1,6 +1,6 @@
/*****************************************************************************
Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
Copyright (c) 1994, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@ -28,6 +28,13 @@ Created 6/9/1994 Heikki Tuuri
check fields whose sizes are given below */
#ifdef UNIV_MEM_DEBUG
# ifndef UNIV_HOTBACKUP
/* The mutex which protects in the debug version the hash table
containing the list of live memory heaps, and also the global
variables in mem0dbg.c. */
extern mutex_t mem_hash_mutex;
# endif /* !UNIV_HOTBACKUP */
#define MEM_FIELD_HEADER_SIZE ut_calc_align(2 * sizeof(ulint),\
UNIV_MEM_ALIGNMENT)
#define MEM_FIELD_TRAILER_SIZE sizeof(ulint)

View file

@ -1,6 +1,6 @@
/*****************************************************************************
Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
Copyright (c) 1994, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@ -25,9 +25,6 @@ Created 6/8/1994 Heikki Tuuri
*************************************************************************/
#ifdef UNIV_MEM_DEBUG
# ifndef UNIV_HOTBACKUP
extern mutex_t mem_hash_mutex;
# endif /* !UNIV_HOTBACKUP */
extern ulint mem_current_allocated_memory;
/******************************************************************//**

View file

@ -1,6 +1,6 @@
/*****************************************************************************
Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
Copyright (c) 1994, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@ -359,6 +359,9 @@ struct mem_block_info_struct {
to the heap is also the first block in this list,
though it also contains the base node of the list. */
ulint len; /*!< physical length of this block in bytes */
ulint total_size; /*!< physical length in bytes of all blocks
in the heap. This is defined only in the base
node and is set to ULINT_UNDEFINED in others. */
ulint type; /*!< type of heap: MEM_HEAP_DYNAMIC, or
MEM_HEAP_BUF possibly ORed to MEM_HEAP_BTR_SEARCH */
ulint free; /*!< offset in bytes of the first free position for

View file

@ -1,6 +1,6 @@
/*****************************************************************************
Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
Copyright (c) 1994, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@ -579,18 +579,12 @@ mem_heap_get_size(
/*==============*/
mem_heap_t* heap) /*!< in: heap */
{
mem_block_t* block;
ulint size = 0;
ut_ad(mem_heap_check(heap));
block = heap;
size = heap->total_size;
while (block != NULL) {
size += mem_block_get_len(block);
block = UT_LIST_GET_NEXT(list, block);
}
#ifndef UNIV_HOTBACKUP
if (heap->free_block) {
size += UNIV_PAGE_SIZE;

View file

@ -1,6 +1,6 @@
/*****************************************************************************
Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@ -70,6 +70,7 @@ mtr_memo_push(
ut_ad(type <= MTR_MEMO_X_LOCK);
ut_ad(mtr);
ut_ad(mtr->magic_n == MTR_MAGIC_N);
ut_ad(mtr->state == MTR_ACTIVE);
memo = &(mtr->memo);
@ -92,6 +93,7 @@ mtr_set_savepoint(
ut_ad(mtr);
ut_ad(mtr->magic_n == MTR_MAGIC_N);
ut_ad(mtr->state == MTR_ACTIVE);
memo = &(mtr->memo);
@ -149,6 +151,7 @@ mtr_memo_contains(
ut_ad(mtr);
ut_ad(mtr->magic_n == MTR_MAGIC_N);
ut_ad(mtr->state == MTR_ACTIVE || mtr->state == MTR_COMMITTING);
memo = &(mtr->memo);

View file

@ -1,23 +1,6 @@
/*****************************************************************************
Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
Place, Suite 330, Boston, MA 02111-1307 USA
*****************************************************************************/
/***********************************************************************
Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved.
Copyright (c) 2009, Percona Inc.
Portions of this file contain modifications contributed and copyrighted
@ -93,29 +76,23 @@ extern ulint os_n_pending_writes;
#ifdef __WIN__
/** File handle */
#define os_file_t HANDLE
# define os_file_t HANDLE
/** Convert a C file descriptor to a native file handle
@param fd file descriptor
@return native file handle */
#define OS_FILE_FROM_FD(fd) (HANDLE) _get_osfhandle(fd)
# define OS_FILE_FROM_FD(fd) (HANDLE) _get_osfhandle(fd)
#else
/** File handle */
typedef int os_file_t;
/** Convert a C file descriptor to a native file handle
@param fd file descriptor
@return native file handle */
#define OS_FILE_FROM_FD(fd) fd
# define OS_FILE_FROM_FD(fd) fd
#endif
/** Umask for creating files */
extern ulint os_innodb_umask;
/** If this flag is TRUE, then we will use the native aio of the
OS (provided we compiled Innobase with it in), otherwise we will
use simulated aio we build below with threads */
extern ibool os_aio_use_native_aio;
/** The next value should be smaller or equal to the smallest sector size used
on any disk. A log block is required to be a portion of disk which is written
so that if the start and the end of a block get written to disk, then the
@ -158,7 +135,8 @@ log. */
#define OS_FILE_SHARING_VIOLATION 76
#define OS_FILE_ERROR_NOT_SPECIFIED 77
#define OS_FILE_INSUFFICIENT_RESOURCE 78
#define OS_FILE_OPERATION_ABORTED 79
#define OS_FILE_AIO_INTERRUPTED 79
#define OS_FILE_OPERATION_ABORTED 80
/* @} */
/** Types for aio operations @{ */
@ -204,6 +182,157 @@ extern ulint os_n_file_reads;
extern ulint os_n_file_writes;
extern ulint os_n_fsyncs;
#ifdef UNIV_PFS_IO
/* Keys to register InnoDB I/O with performance schema */
extern mysql_pfs_key_t innodb_file_data_key;
extern mysql_pfs_key_t innodb_file_log_key;
extern mysql_pfs_key_t innodb_file_temp_key;
/* Following four macros are instumentations to register
various file I/O operations with performance schema.
1) register_pfs_file_open_begin() and register_pfs_file_open_end() are
used to register file creation, opening, closing and renaming.
2) register_pfs_file_io_begin() and register_pfs_file_io_end() are
used to register actual file read, write and flush */
# define register_pfs_file_open_begin(locker, key, op, name, \
src_file, src_line) \
do { \
if (PSI_server) { \
locker = PSI_server->get_thread_file_name_locker( \
key, op, name, &locker); \
if (locker) { \
PSI_server->start_file_open_wait( \
locker, src_file, src_line); \
} \
} \
} while (0)
# define register_pfs_file_open_end(locker, file) \
do { \
if (locker) { \
PSI_server->end_file_open_wait_and_bind_to_descriptor( \
locker, file); \
} \
} while (0)
# define register_pfs_file_io_begin(locker, file, count, op, \
src_file, src_line) \
do { \
if (PSI_server) { \
locker = PSI_server->get_thread_file_descriptor_locker( \
file, op); \
if (locker) { \
PSI_server->start_file_wait( \
locker, count, src_file, src_line); \
} \
} \
} while (0)
# define register_pfs_file_io_end(locker, count) \
do { \
if (locker) { \
PSI_server->end_file_wait(locker, count); \
} \
} while (0)
#endif /* UNIV_PFS_IO */
/* Following macros/functions are file I/O APIs that would be performance
schema instrumented if "UNIV_PFS_IO" is defined. They would point to
wrapper functions with performance schema instrumentation in such case.
os_file_create
os_file_create_simple
os_file_create_simple_no_error_handling
os_file_close
os_file_rename
os_aio
os_file_read
os_file_read_no_error_handling
os_file_write
The wrapper functions have the prefix of "innodb_". */
#ifdef UNIV_PFS_IO
# define os_file_create(key, name, create, purpose, type, success) \
pfs_os_file_create_func(key, name, create, purpose, type, \
success, __FILE__, __LINE__)
# define os_file_create_simple(key, name, create, access, success) \
pfs_os_file_create_simple_func(key, name, create, access, \
success, __FILE__, __LINE__)
# define os_file_create_simple_no_error_handling( \
key, name, create_mode, access, success) \
pfs_os_file_create_simple_no_error_handling_func( \
key, name, create_mode, access, success, __FILE__, __LINE__)
# define os_file_close(file) \
pfs_os_file_close_func(file, __FILE__, __LINE__)
# define os_aio(type, mode, name, file, buf, offset, offset_high, \
n, message1, message2) \
pfs_os_aio_func(type, mode, name, file, buf, offset, \
offset_high, n, message1, message2, \
__FILE__, __LINE__)
# define os_file_read(file, buf, offset, offset_high, n) \
pfs_os_file_read_func(file, buf, offset, offset_high, n, \
__FILE__, __LINE__)
# define os_file_read_no_error_handling(file, buf, offset, \
offset_high, n) \
pfs_os_file_read_no_error_handling_func(file, buf, offset, \
offset_high, n, \
__FILE__, __LINE__)
# define os_file_write(name, file, buf, offset, offset_high, n) \
pfs_os_file_write_func(name, file, buf, offset, offset_high, \
n, __FILE__, __LINE__)
# define os_file_flush(file) \
pfs_os_file_flush_func(file, __FILE__, __LINE__)
# define os_file_rename(key, oldpath, newpath) \
pfs_os_file_rename_func(key, oldpath, newpath, __FILE__, __LINE__)
#else /* UNIV_PFS_IO */
/* If UNIV_PFS_IO is not defined, these I/O APIs point
to original un-instrumented file I/O APIs */
# define os_file_create(key, name, create, purpose, type, success) \
os_file_create_func(name, create, purpose, type, success)
# define os_file_create_simple(key, name, create, access, success) \
os_file_create_simple_func(name, create_mode, access, success)
# define os_file_create_simple_no_error_handling( \
key, name, create_mode, access, success) \
os_file_create_simple_no_error_handling_func( \
name, create_mode, access, success)
# define os_file_close(file) os_file_close_func(file)
# define os_aio(type, mode, name, file, buf, offset, offset_high, \
n, message1, message2) \
os_aio_func(type, mode, name, file, buf, offset, offset_high, n,\
message1, message2)
# define os_file_read(file, buf, offset, offset_high, n) \
os_file_read_func(file, buf, offset, offset_high, n)
# define os_file_read_no_error_handling(file, buf, offset, \
offset_high, n) \
os_file_read_no_error_handling_func(file, buf, offset, offset_high, n)
# define os_file_write(name, file, buf, offset, offset_high, n) \
os_file_write_func(name, file, buf, offset, offset_high, n)
# define os_file_flush(file) os_file_flush_func(file)
# define os_file_rename(key, oldpath, newpath) \
os_file_rename_func(oldpath, newpath)
#endif /* UNIV_PFS_IO */
/* File types for directory entry data type */
enum os_file_type_enum{
@ -313,13 +442,15 @@ os_file_create_directory(
ibool fail_if_exists);/*!< in: if TRUE, pre-existing directory
is treated as an error. */
/****************************************************************//**
NOTE! Use the corresponding macro os_file_create_simple(), not directly
this function!
A simple function to open or create a file.
@return own: handle to the file, not defined if error, error number
can be retrieved with os_file_get_last_error */
UNIV_INTERN
os_file_t
os_file_create_simple(
/*==================*/
os_file_create_simple_func(
/*=======================*/
const char* name, /*!< in: name of the file or path as a
null-terminated string */
ulint create_mode,/*!< in: OS_FILE_OPEN if an existing file is
@ -333,13 +464,15 @@ os_file_create_simple(
OS_FILE_READ_WRITE */
ibool* success);/*!< out: TRUE if succeed, FALSE if error */
/****************************************************************//**
NOTE! Use the corresponding macro
os_file_create_simple_no_error_handling(), not directly this function!
A simple function to open or create a file.
@return own: handle to the file, not defined if error, error number
can be retrieved with os_file_get_last_error */
UNIV_INTERN
os_file_t
os_file_create_simple_no_error_handling(
/*====================================*/
os_file_create_simple_no_error_handling_func(
/*=========================================*/
const char* name, /*!< in: name of the file or path as a
null-terminated string */
ulint create_mode,/*!< in: OS_FILE_OPEN if an existing file
@ -363,13 +496,15 @@ os_file_set_nocache(
const char* operation_name);/*!< in: "open" or "create"; used in the
diagnostic message */
/****************************************************************//**
NOTE! Use the corresponding macro os_file_create(), not directly
this function!
Opens an existing file or creates a new.
@return own: handle to the file, not defined if error, error number
can be retrieved with os_file_get_last_error */
UNIV_INTERN
os_file_t
os_file_create(
/*===========*/
os_file_create_func(
/*================*/
const char* name, /*!< in: name of the file or path as a
null-terminated string */
ulint create_mode,/*!< in: OS_FILE_OPEN if an existing file
@ -407,25 +542,258 @@ os_file_delete_if_exists(
/*=====================*/
const char* name); /*!< in: file path as a null-terminated string */
/***********************************************************************//**
NOTE! Use the corresponding macro os_file_rename(), not directly
this function!
Renames a file (can also move it to another directory). It is safest that the
file is closed before calling this function.
@return TRUE if success */
UNIV_INTERN
ibool
os_file_rename(
/*===========*/
os_file_rename_func(
/*================*/
const char* oldpath, /*!< in: old file path as a
null-terminated string */
const char* newpath); /*!< in: new file path */
/***********************************************************************//**
NOTE! Use the corresponding macro os_file_close(), not directly this
function!
Closes a file handle. In case of error, error number can be retrieved with
os_file_get_last_error.
@return TRUE if success */
UNIV_INTERN
ibool
os_file_close(
/*==========*/
os_file_close_func(
/*===============*/
os_file_t file); /*!< in, own: handle to a file */
#ifdef UNIV_PFS_IO
/****************************************************************//**
NOTE! Please use the corresponding macro os_file_create_simple(),
not directly this function!
A performance schema instrumented wrapper function for
os_file_create_simple() which opens or creates a file.
@return own: handle to the file, not defined if error, error number
can be retrieved with os_file_get_last_error */
UNIV_INLINE
os_file_t
pfs_os_file_create_simple_func(
/*===========================*/
mysql_pfs_key_t key, /*!< in: Performance Schema Key */
const char* name, /*!< in: name of the file or path as a
null-terminated string */
ulint create_mode,/*!< in: OS_FILE_OPEN if an existing file is
opened (if does not exist, error), or
OS_FILE_CREATE if a new file is created
(if exists, error), or
OS_FILE_CREATE_PATH if new file
(if exists, error) and subdirectories along
its path are created (if needed)*/
ulint access_type,/*!< in: OS_FILE_READ_ONLY or
OS_FILE_READ_WRITE */
ibool* success,/*!< out: TRUE if succeed, FALSE if error */
const char* src_file,/*!< in: file name where func invoked */
ulint src_line);/*!< in: line where the func invoked */
/****************************************************************//**
NOTE! Please use the corresponding macro
os_file_create_simple_no_error_handling(), not directly this function!
A performance schema instrumented wrapper function for
os_file_create_simple_no_error_handling(). Add instrumentation to
monitor file creation/open.
@return own: handle to the file, not defined if error, error number
can be retrieved with os_file_get_last_error */
UNIV_INLINE
os_file_t
pfs_os_file_create_simple_no_error_handling_func(
/*=============================================*/
mysql_pfs_key_t key, /*!< in: Performance Schema Key */
const char* name, /*!< in: name of the file or path as a
null-terminated string */
ulint create_mode,/*!< in: OS_FILE_OPEN if an existing file
is opened (if does not exist, error), or
OS_FILE_CREATE if a new file is created
(if exists, error) */
ulint access_type,/*!< in: OS_FILE_READ_ONLY,
OS_FILE_READ_WRITE, or
OS_FILE_READ_ALLOW_DELETE; the last option is
used by a backup program reading the file */
ibool* success,/*!< out: TRUE if succeed, FALSE if error */
const char* src_file,/*!< in: file name where func invoked */
ulint src_line);/*!< in: line where the func invoked */
/****************************************************************//**
NOTE! Please use the corresponding macro os_file_create(), not directly
this function!
A performance schema wrapper function for os_file_create().
Add instrumentation to monitor file creation/open.
@return own: handle to the file, not defined if error, error number
can be retrieved with os_file_get_last_error */
UNIV_INLINE
os_file_t
pfs_os_file_create_func(
/*====================*/
mysql_pfs_key_t key, /*!< in: Performance Schema Key */
const char* name, /*!< in: name of the file or path as a
null-terminated string */
ulint create_mode,/*!< in: OS_FILE_OPEN if an existing file
is opened (if does not exist, error), or
OS_FILE_CREATE if a new file is created
(if exists, error),
OS_FILE_OVERWRITE if a new file is created
or an old overwritten;
OS_FILE_OPEN_RAW, if a raw device or disk
partition should be opened */
ulint purpose,/*!< in: OS_FILE_AIO, if asynchronous,
non-buffered i/o is desired,
OS_FILE_NORMAL, if any normal file;
NOTE that it also depends on type, os_aio_..
and srv_.. variables whether we really use
async i/o or unbuffered i/o: look in the
function source code for the exact rules */
ulint type, /*!< in: OS_DATA_FILE or OS_LOG_FILE */
ibool* success,/*!< out: TRUE if succeed, FALSE if error */
const char* src_file,/*!< in: file name where func invoked */
ulint src_line);/*!< in: line where the func invoked */
/***********************************************************************//**
NOTE! Please use the corresponding macro os_file_close(), not directly
this function!
A performance schema instrumented wrapper function for os_file_close().
@return TRUE if success */
UNIV_INLINE
ibool
pfs_os_file_close_func(
/*===================*/
os_file_t file, /*!< in, own: handle to a file */
const char* src_file,/*!< in: file name where func invoked */
ulint src_line);/*!< in: line where the func invoked */
/*******************************************************************//**
NOTE! Please use the corresponding macro os_file_read(), not directly
this function!
This is the performance schema instrumented wrapper function for
os_file_read() which requests a synchronous read operation.
@return TRUE if request was successful, FALSE if fail */
UNIV_INLINE
ibool
pfs_os_file_read_func(
/*==================*/
os_file_t file, /*!< in: handle to a file */
void* buf, /*!< in: buffer where to read */
ulint offset, /*!< in: least significant 32 bits of file
offset where to read */
ulint offset_high,/*!< in: most significant 32 bits of
offset */
ulint n, /*!< in: number of bytes to read */
const char* src_file,/*!< in: file name where func invoked */
ulint src_line);/*!< in: line where the func invoked */
/*******************************************************************//**
NOTE! Please use the corresponding macro os_file_read_no_error_handling(),
not directly this function!
This is the performance schema instrumented wrapper function for
os_file_read_no_error_handling_func() which requests a synchronous
read operation.
@return TRUE if request was successful, FALSE if fail */
UNIV_INLINE
ibool
pfs_os_file_read_no_error_handling_func(
/*====================================*/
os_file_t file, /*!< in: handle to a file */
void* buf, /*!< in: buffer where to read */
ulint offset, /*!< in: least significant 32 bits of file
offset where to read */
ulint offset_high,/*!< in: most significant 32 bits of
offset */
ulint n, /*!< in: number of bytes to read */
const char* src_file,/*!< in: file name where func invoked */
ulint src_line);/*!< in: line where the func invoked */
/*******************************************************************//**
NOTE! Please use the corresponding macro os_aio(), not directly this
function!
Performance schema wrapper function of os_aio() which requests
an asynchronous i/o operation.
@return TRUE if request was queued successfully, FALSE if fail */
UNIV_INLINE
ibool
pfs_os_aio_func(
/*============*/
ulint type, /*!< in: OS_FILE_READ or OS_FILE_WRITE */
ulint mode, /*!< in: OS_AIO_NORMAL etc. I/O mode */
const char* name, /*!< in: name of the file or path as a
null-terminated string */
os_file_t file, /*!< in: handle to a file */
void* buf, /*!< in: buffer where to read or from which
to write */
ulint offset, /*!< in: least significant 32 bits of file
offset where to read or write */
ulint offset_high,/*!< in: most significant 32 bits of
offset */
ulint n, /*!< in: number of bytes to read or write */
fil_node_t* message1,/*!< in: message for the aio handler
(can be used to identify a completed
aio operation); ignored if mode is
OS_AIO_SYNC */
void* message2,/*!< in: message for the aio handler
(can be used to identify a completed
aio operation); ignored if mode is
OS_AIO_SYNC */
const char* src_file,/*!< in: file name where func invoked */
ulint src_line);/*!< in: line where the func invoked */
/*******************************************************************//**
NOTE! Please use the corresponding macro os_file_write(), not directly
this function!
This is the performance schema instrumented wrapper function for
os_file_write() which requests a synchronous write operation.
@return TRUE if request was successful, FALSE if fail */
UNIV_INLINE
ibool
pfs_os_file_write_func(
/*===================*/
const char* name, /*!< in: name of the file or path as a
null-terminated string */
os_file_t file, /*!< in: handle to a file */
const void* buf, /*!< in: buffer from which to write */
ulint offset, /*!< in: least significant 32 bits of file
offset where to write */
ulint offset_high,/*!< in: most significant 32 bits of
offset */
ulint n, /*!< in: number of bytes to write */
const char* src_file,/*!< in: file name where func invoked */
ulint src_line);/*!< in: line where the func invoked */
/***********************************************************************//**
NOTE! Please use the corresponding macro os_file_flush(), not directly
this function!
This is the performance schema instrumented wrapper function for
os_file_flush() which flushes the write buffers of a given file to the disk.
Flushes the write buffers of a given file to the disk.
@return TRUE if success */
UNIV_INLINE
ibool
pfs_os_file_flush_func(
/*===================*/
os_file_t file, /*!< in, own: handle to a file */
const char* src_file,/*!< in: file name where func invoked */
ulint src_line);/*!< in: line where the func invoked */
/***********************************************************************//**
NOTE! Please use the corresponding macro os_file_rename(), not directly
this function!
This is the performance schema instrumented wrapper function for
os_file_rename()
@return TRUE if success */
UNIV_INLINE
ibool
pfs_os_file_rename_func(
/*====================*/
mysql_pfs_key_t key, /*!< in: Performance Schema Key */
const char* oldpath,/*!< in: old file path as a null-terminated
string */
const char* newpath,/*!< in: new file path */
const char* src_file,/*!< in: file name where func invoked */
ulint src_line);/*!< in: line where the func invoked */
#endif /* UNIV_PFS_IO */
#ifdef UNIV_HOTBACKUP
/***********************************************************************//**
Closes a file handle.
@ -477,12 +845,13 @@ os_file_set_eof(
/*============*/
FILE* file); /*!< in: file to be truncated */
/***********************************************************************//**
NOTE! Use the corresponding macro os_file_flush(), not directly this function!
Flushes the write buffers of a given file to the disk.
@return TRUE if success */
UNIV_INTERN
ibool
os_file_flush(
/*==========*/
os_file_flush_func(
/*===============*/
os_file_t file); /*!< in, own: handle to a file */
/***********************************************************************//**
Retrieves the last error number if an error occurs in a file io function.
@ -497,12 +866,13 @@ os_file_get_last_error(
ibool report_all_errors); /*!< in: TRUE if we want an error message
printed of all errors */
/*******************************************************************//**
NOTE! Use the corresponding macro os_file_read(), not directly this function!
Requests a synchronous read operation.
@return TRUE if request was successful, FALSE if fail */
UNIV_INTERN
ibool
os_file_read(
/*=========*/
os_file_read_func(
/*==============*/
os_file_t file, /*!< in: handle to a file */
void* buf, /*!< in: buffer where to read */
ulint offset, /*!< in: least significant 32 bits of file
@ -522,13 +892,15 @@ os_file_read_string(
char* str, /*!< in: buffer where to read */
ulint size); /*!< in: size of buffer */
/*******************************************************************//**
NOTE! Use the corresponding macro os_file_read_no_error_handling(),
not directly this function!
Requests a synchronous positioned read operation. This function does not do
any error handling. In case of error it returns FALSE.
@return TRUE if request was successful, FALSE if fail */
UNIV_INTERN
ibool
os_file_read_no_error_handling(
/*===========================*/
os_file_read_no_error_handling_func(
/*================================*/
os_file_t file, /*!< in: handle to a file */
void* buf, /*!< in: buffer where to read */
ulint offset, /*!< in: least significant 32 bits of file
@ -538,12 +910,14 @@ os_file_read_no_error_handling(
ulint n); /*!< in: number of bytes to read */
/*******************************************************************//**
NOTE! Use the corresponding macro os_file_write(), not directly this
function!
Requests a synchronous write operation.
@return TRUE if request was successful, FALSE if fail */
UNIV_INTERN
ibool
os_file_write(
/*==========*/
os_file_write_func(
/*===============*/
const char* name, /*!< in: name of the file or path as a
null-terminated string */
os_file_t file, /*!< in: handle to a file */
@ -612,7 +986,7 @@ respectively. The caller must create an i/o handler thread for each
segment in these arrays. This function also creates the sync array.
No i/o handler thread needs to be created for that */
UNIV_INTERN
void
ibool
os_aio_init(
/*========*/
ulint n_per_seg, /*<! in: maximum number of pending aio
@ -629,12 +1003,13 @@ os_aio_free(void);
/*=============*/
/*******************************************************************//**
NOTE! Use the corresponding macro os_aio(), not directly this function!
Requests an asynchronous i/o operation.
@return TRUE if request was queued successfully, FALSE if fail */
UNIV_INTERN
ibool
os_aio(
/*===*/
os_aio_func(
/*========*/
ulint type, /*!< in: OS_FILE_READ or OS_FILE_WRITE */
ulint mode, /*!< in: OS_AIO_NORMAL, ..., possibly ORed
to OS_AIO_SIMULATED_WAKE_LATER: the
@ -802,4 +1177,36 @@ innobase_mysql_tmpfile(void);
/*========================*/
#endif /* !UNIV_HOTBACKUP && !__NETWARE__ */
#if defined(LINUX_NATIVE_AIO)
/**************************************************************************
This function is only used in Linux native asynchronous i/o.
Waits for an aio operation to complete. This function is used to wait the
for completed requests. The aio array of pending requests is divided
into segments. The thread specifies which segment or slot it wants to wait
for. NOTE: this function will also take care of freeing the aio slot,
therefore no other thread is allowed to do the freeing!
@return TRUE if the IO was successful */
UNIV_INTERN
ibool
os_aio_linux_handle(
/*================*/
ulint global_seg, /*!< in: segment number in the aio array
to wait for; segment 0 is the ibuf
i/o thread, segment 1 is log i/o thread,
then follow the non-ibuf read threads,
and the last are the non-ibuf write
threads. */
fil_node_t**message1, /*!< out: the messages passed with the */
void** message2, /*!< aio request; note that in case the
aio operation failed, these output
parameters are valid and can be used to
restart the operation. */
ulint* type); /*!< out: OS_FILE_WRITE or ..._READ */
#endif /* LINUX_NATIVE_AIO */
#ifndef UNIV_NONINL
#include "os0file.ic"
#endif
#endif

View file

@ -0,0 +1,408 @@
/*****************************************************************************
Copyright (c) 2010, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
Place, Suite 330, Boston, MA 02111-1307 USA
*****************************************************************************/
/**************************************************//**
@file include/os0file.ic
The interface to the operating system file io
Created 2/20/2010 Jimmy Yang
*******************************************************/
#include "univ.i"
#ifdef UNIV_PFS_IO
/****************************************************************//**
NOTE! Please use the corresponding macro os_file_create_simple(),
not directly this function!
A performance schema instrumented wrapper function for
os_file_create_simple() which opens or creates a file.
@return own: handle to the file, not defined if error, error number
can be retrieved with os_file_get_last_error */
UNIV_INLINE
os_file_t
pfs_os_file_create_simple_func(
/*===========================*/
mysql_pfs_key_t key, /*!< in: Performance Schema Key */
const char* name, /*!< in: name of the file or path as a
null-terminated string */
ulint create_mode,/*!< in: OS_FILE_OPEN if an existing file is
opened (if does not exist, error), or
OS_FILE_CREATE if a new file is created
(if exists, error), or
OS_FILE_CREATE_PATH if new file
(if exists, error) and subdirectories along
its path are created (if needed)*/
ulint access_type,/*!< in: OS_FILE_READ_ONLY or
OS_FILE_READ_WRITE */
ibool* success,/*!< out: TRUE if succeed, FALSE if error */
const char* src_file,/*!< in: file name where func invoked */
ulint src_line)/*!< in: line where the func invoked */
{
os_file_t file;
struct PSI_file_locker* locker = NULL;
/* register a file open or creation depending on "create_mode" */
register_pfs_file_open_begin(locker, key,
((create_mode == OS_FILE_CREATE)
? PSI_FILE_CREATE
: PSI_FILE_OPEN),
name, src_file, src_line);
file = os_file_create_simple_func(name, create_mode,
access_type, success);
/* Regsiter the returning "file" value with the system */
register_pfs_file_open_end(locker, file);
return(file);
}
/****************************************************************//**
NOTE! Please use the corresponding macro
os_file_create_simple_no_error_handling(), not directly this function!
A performance schema instrumented wrapper function for
os_file_create_simple_no_error_handling(). Add instrumentation to
monitor file creation/open.
@return own: handle to the file, not defined if error, error number
can be retrieved with os_file_get_last_error */
UNIV_INLINE
os_file_t
pfs_os_file_create_simple_no_error_handling_func(
/*=============================================*/
mysql_pfs_key_t key, /*!< in: Performance Schema Key */
const char* name, /*!< in: name of the file or path as a
null-terminated string */
ulint create_mode,/*!< in: OS_FILE_OPEN if an existing file
is opened (if does not exist, error), or
OS_FILE_CREATE if a new file is created
(if exists, error) */
ulint access_type,/*!< in: OS_FILE_READ_ONLY,
OS_FILE_READ_WRITE, or
OS_FILE_READ_ALLOW_DELETE; the last option is
used by a backup program reading the file */
ibool* success,/*!< out: TRUE if succeed, FALSE if error */
const char* src_file,/*!< in: file name where func invoked */
ulint src_line)/*!< in: line where the func invoked */
{
os_file_t file;
struct PSI_file_locker* locker = NULL;
/* register a file open or creation depending on "create_mode" */
register_pfs_file_open_begin(locker, key,
((create_mode == OS_FILE_CREATE)
? PSI_FILE_CREATE
: PSI_FILE_OPEN),
name, src_file, src_line);
file = os_file_create_simple_no_error_handling_func(
name, create_mode, access_type, success);
register_pfs_file_open_end(locker, file);
return(file);
}
/****************************************************************//**
NOTE! Please use the corresponding macro os_file_create(), not directly
this function!
A performance schema wrapper function for os_file_create().
Add instrumentation to monitor file creation/open.
@return own: handle to the file, not defined if error, error number
can be retrieved with os_file_get_last_error */
UNIV_INLINE
os_file_t
pfs_os_file_create_func(
/*====================*/
mysql_pfs_key_t key, /*!< in: Performance Schema Key */
const char* name, /*!< in: name of the file or path as a
null-terminated string */
ulint create_mode,/*!< in: OS_FILE_OPEN if an existing file
is opened (if does not exist, error), or
OS_FILE_CREATE if a new file is created
(if exists, error),
OS_FILE_OVERWRITE if a new file is created
or an old overwritten;
OS_FILE_OPEN_RAW, if a raw device or disk
partition should be opened */
ulint purpose,/*!< in: OS_FILE_AIO, if asynchronous,
non-buffered i/o is desired,
OS_FILE_NORMAL, if any normal file;
NOTE that it also depends on type, os_aio_..
and srv_.. variables whether we really use
async i/o or unbuffered i/o: look in the
function source code for the exact rules */
ulint type, /*!< in: OS_DATA_FILE or OS_LOG_FILE */
ibool* success,/*!< out: TRUE if succeed, FALSE if error */
const char* src_file,/*!< in: file name where func invoked */
ulint src_line)/*!< in: line where the func invoked */
{
os_file_t file;
struct PSI_file_locker* locker = NULL;
/* register a file open or creation depending on "create_mode" */
register_pfs_file_open_begin(locker, key,
((create_mode == OS_FILE_CREATE)
? PSI_FILE_CREATE
: PSI_FILE_OPEN),
name, src_file, src_line);
file = os_file_create_func(name, create_mode, purpose, type, success);
register_pfs_file_open_end(locker, file);
return(file);
}
/***********************************************************************//**
NOTE! Please use the corresponding macro os_file_close(), not directly
this function!
A performance schema instrumented wrapper function for os_file_close().
@return TRUE if success */
UNIV_INLINE
ibool
pfs_os_file_close_func(
/*===================*/
os_file_t file, /*!< in, own: handle to a file */
const char* src_file,/*!< in: file name where func invoked */
ulint src_line)/*!< in: line where the func invoked */
{
ibool result;
struct PSI_file_locker* locker = NULL;
/* register the file close */
register_pfs_file_io_begin(locker, file, 0, PSI_FILE_CLOSE,
src_file, src_line);
result = os_file_close_func(file);
register_pfs_file_io_end(locker, 0);
return(result);
}
/*******************************************************************//**
NOTE! Please use the corresponding macro os_aio(), not directly this
function!
Performance schema instrumented wrapper function of os_aio() which
requests an asynchronous i/o operation.
@return TRUE if request was queued successfully, FALSE if fail */
UNIV_INLINE
ibool
pfs_os_aio_func(
/*============*/
ulint type, /*!< in: OS_FILE_READ or OS_FILE_WRITE */
ulint mode, /*!< in: OS_AIO_NORMAL etc. I/O mode */
const char* name, /*!< in: name of the file or path as a
null-terminated string */
os_file_t file, /*!< in: handle to a file */
void* buf, /*!< in: buffer where to read or from which
to write */
ulint offset, /*!< in: least significant 32 bits of file
offset where to read or write */
ulint offset_high,/*!< in: most significant 32 bits of
offset */
ulint n, /*!< in: number of bytes to read or write */
fil_node_t* message1,/*!< in: message for the aio handler
(can be used to identify a completed
aio operation); ignored if mode is
OS_AIO_SYNC */
void* message2,/*!< in: message for the aio handler
(can be used to identify a completed
aio operation); ignored if mode is
OS_AIO_SYNC */
const char* src_file,/*!< in: file name where func invoked */
ulint src_line)/*!< in: line where the func invoked */
{
ibool result;
struct PSI_file_locker* locker = NULL;
/* Register the read or write I/O depending on "type" */
register_pfs_file_io_begin(locker, file, n,
(type == OS_FILE_WRITE)
? PSI_FILE_WRITE
: PSI_FILE_READ,
src_file, src_line);
result = os_aio_func(type, mode, name, file, buf, offset, offset_high,
n, message1, message2);
register_pfs_file_io_end(locker, n);
return(result);
}
/*******************************************************************//**
NOTE! Please use the corresponding macro os_file_read(), not directly
this function!
This is the performance schema instrumented wrapper function for
os_file_read() which requests a synchronous read operation.
@return TRUE if request was successful, FALSE if fail */
UNIV_INLINE
ibool
pfs_os_file_read_func(
/*==================*/
os_file_t file, /*!< in: handle to a file */
void* buf, /*!< in: buffer where to read */
ulint offset, /*!< in: least significant 32 bits of file
offset where to read */
ulint offset_high,/*!< in: most significant 32 bits of
offset */
ulint n, /*!< in: number of bytes to read */
const char* src_file,/*!< in: file name where func invoked */
ulint src_line)/*!< in: line where the func invoked */
{
ibool result;
struct PSI_file_locker* locker = NULL;
register_pfs_file_io_begin(locker, file, n, PSI_FILE_READ,
src_file, src_line);
result = os_file_read_func(file, buf, offset, offset_high, n);
register_pfs_file_io_end(locker, n);
return(result);
}
/*******************************************************************//**
NOTE! Please use the corresponding macro
os_file_read_no_error_handling(), not directly this function!
This is the performance schema instrumented wrapper function for
os_file_read_no_error_handling() which requests a synchronous
positioned read operation. This function does not do any error
handling. In case of error it returns FALSE.
@return TRUE if request was successful, FALSE if fail */
UNIV_INLINE
ibool
pfs_os_file_read_no_error_handling_func(
/*====================================*/
os_file_t file, /*!< in: handle to a file */
void* buf, /*!< in: buffer where to read */
ulint offset, /*!< in: least significant 32 bits of file
offset where to read */
ulint offset_high,/*!< in: most significant 32 bits of
offset */
ulint n, /*!< in: number of bytes to read */
const char* src_file,/*!< in: file name where func invoked */
ulint src_line)/*!< in: line where the func invoked */
{
ibool result;
struct PSI_file_locker* locker = NULL;
register_pfs_file_io_begin(locker, file, n, PSI_FILE_READ,
src_file, src_line);
result = os_file_read_no_error_handling_func(file, buf, offset,
offset_high, n);
register_pfs_file_io_end(locker, n);
return(result);
}
/*******************************************************************//**
NOTE! Please use the corresponding macro os_file_write(), not directly
this function!
This is the performance schema instrumented wrapper function for
os_file_write() which requests a synchronous write operation.
@return TRUE if request was successful, FALSE if fail */
UNIV_INLINE
ibool
pfs_os_file_write_func(
/*===================*/
const char* name, /*!< in: name of the file or path as a
null-terminated string */
os_file_t file, /*!< in: handle to a file */
const void* buf, /*!< in: buffer from which to write */
ulint offset, /*!< in: least significant 32 bits of file
offset where to write */
ulint offset_high,/*!< in: most significant 32 bits of
offset */
ulint n, /*!< in: number of bytes to write */
const char* src_file,/*!< in: file name where func invoked */
ulint src_line)/*!< in: line where the func invoked */
{
ibool result;
struct PSI_file_locker* locker = NULL;
register_pfs_file_io_begin(locker, file, n, PSI_FILE_WRITE,
src_file, src_line);
result = os_file_write_func(name, file, buf, offset, offset_high, n);
register_pfs_file_io_end(locker, n);
return(result);
}
/***********************************************************************//**
NOTE! Please use the corresponding macro os_file_flush(), not directly
this function!
This is the performance schema instrumented wrapper function for
os_file_flush() which flushes the write buffers of a given file to the disk.
@return TRUE if success */
UNIV_INLINE
ibool
pfs_os_file_flush_func(
/*===================*/
os_file_t file, /*!< in, own: handle to a file */
const char* src_file,/*!< in: file name where func invoked */
ulint src_line)/*!< in: line where the func invoked */
{
ibool result;
struct PSI_file_locker* locker = NULL;
register_pfs_file_io_begin(locker, file, 0, PSI_FILE_SYNC,
src_file, src_line);
result = os_file_flush_func(file);
register_pfs_file_io_end(locker, 0);
return(result);
}
/***********************************************************************//**
NOTE! Please use the corresponding macro os_file_rename(), not directly
this function!
This is the performance schema instrumented wrapper function for
os_file_rename()
@return TRUE if success */
UNIV_INLINE
ibool
pfs_os_file_rename_func(
/*====================*/
mysql_pfs_key_t key, /*!< in: Performance Schema Key */
const char* oldpath,/*!< in: old file path as a null-terminated
string */
const char* newpath,/*!< in: new file path */
const char* src_file,/*!< in: file name where func invoked */
ulint src_line)/*!< in: line where the func invoked */
{
ibool result;
struct PSI_file_locker* locker = NULL;
register_pfs_file_open_begin(locker, key, PSI_FILE_RENAME, newpath,
src_file, src_line);
result = os_file_rename_func(oldpath, newpath);
register_pfs_file_open_end(locker, 0);
return(result);
}
#endif /* UNIV_PFS_IO */

View file

@ -56,6 +56,11 @@ typedef os_thread_t os_thread_id_t; /*!< In Unix we use the thread
/* Define a function pointer type to use in a typecast */
typedef void* (*os_posix_f_t) (void*);
#ifdef HAVE_PSI_INTERFACE
/* Define for performance schema registration key */
typedef unsigned int mysql_pfs_key_t;
#endif
/***************************************************************//**
Compares two thread ids for equality.
@return TRUE if equal */
@ -86,7 +91,7 @@ os_thread_t
os_thread_create(
/*=============*/
#ifndef __WIN__
os_posix_f_t start_f,
os_posix_f_t start_f,
#else
ulint (*start_f)(void*), /*!< in: pointer to function
from which to start */

View file

@ -1,6 +1,6 @@
/*****************************************************************************
Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@ -519,6 +519,23 @@ pars_info_add_int4_literal(
/****************************************************************//**
Equivalent to:
char buf[8];
mach_write_ull(buf, val);
pars_info_add_literal(info, name, buf, 8, DATA_INT, 0);
except that the buffer is dynamically allocated from the info struct's
heap. */
UNIV_INTERN
void
pars_info_add_uint64_literal(
/*=========================*/
pars_info_t* info, /*!< in: info struct */
const char* name, /*!< in: name */
ib_uint64_t val); /*!< in: value */
/****************************************************************//**
Equivalent to:
char buf[8];
mach_write_to_8(buf, val);
pars_info_add_literal(info, name, buf, 8, DATA_BINARY, 0);

View file

@ -1,6 +1,6 @@
/*****************************************************************************
Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@ -30,6 +30,7 @@ Created 5/27/1996 Heikki Tuuri
#include "data0data.h"
#include "dict0types.h"
#include "trx0trx.h"
#include "trx0roll.h"
#include "srv0srv.h"
#include "usr0types.h"
#include "que0types.h"
@ -215,6 +216,16 @@ trx_t*
thr_get_trx(
/*========*/
que_thr_t* thr); /*!< in: query thread */
/*******************************************************************//**
Determines if this thread is rolling back an incomplete transaction
in crash recovery.
@return TRUE if thr is rolling back an incomplete transaction in crash
recovery */
UNIV_INLINE
ibool
thr_is_recv(
/*========*/
const que_thr_t* thr); /*!< in: query thread */
/***********************************************************************//**
Gets the type of a graph node. */
UNIV_INLINE

View file

@ -1,6 +1,6 @@
/*****************************************************************************
Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@ -38,6 +38,20 @@ thr_get_trx(
return(thr->graph->trx);
}
/*******************************************************************//**
Determines if this thread is rolling back an incomplete transaction
in crash recovery.
@return TRUE if thr is rolling back an incomplete transaction in crash
recovery */
UNIV_INLINE
ibool
thr_is_recv(
/*========*/
const que_thr_t* thr) /*!< in: query thread */
{
return(trx_is_recv(thr->graph->trx));
}
/***********************************************************************//**
Gets the first thr in a fork. */
UNIV_INLINE

View file

@ -1,6 +1,6 @@
/*****************************************************************************
Copyright (c) 2005, 2009, Innobase Oy. All Rights Reserved.
Copyright (c) 2005, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software

View file

@ -1,6 +1,6 @@
/*****************************************************************************
Copyright (c) 2000, 2009, Innobase Oy. All Rights Reserved.
Copyright (c) 2000, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@ -451,6 +451,12 @@ row_drop_table_for_mysql(
const char* name, /*!< in: table name */
trx_t* trx, /*!< in: transaction handle */
ibool drop_db);/*!< in: TRUE=dropping whole database */
/*********************************************************************//**
Drop all temporary tables during crash recovery. */
UNIV_INTERN
void
row_mysql_drop_temp_tables(void);
/*============================*/
/*********************************************************************//**
Discards the tablespace of a table which stored in an .ibd file. Discarding
@ -494,14 +500,19 @@ row_rename_table_for_mysql(
trx_t* trx, /*!< in: transaction handle */
ibool commit); /*!< in: if TRUE then commit trx */
/*********************************************************************//**
Checks a table for corruption.
@return DB_ERROR or DB_SUCCESS */
Checks that the index contains entries in an ascending order, unique
constraint is not broken, and calculates the number of index entries
in the read view of the current transaction.
@return DB_SUCCESS if ok */
UNIV_INTERN
ulint
row_check_table_for_mysql(
row_check_index_for_mysql(
/*======================*/
row_prebuilt_t* prebuilt); /*!< in: prebuilt struct in MySQL
handle */
row_prebuilt_t* prebuilt, /*!< in: prebuilt struct
in MySQL handle */
const dict_index_t* index, /*!< in: index */
ulint* n_rows); /*!< out: number of entries
seen in the consistent read */
/*********************************************************************//**
Determines if a table is a magic monitor table.

View file

@ -45,6 +45,28 @@ row_purge_node_create(
que_thr_t* parent, /*!< in: parent node, i.e., a thr node */
mem_heap_t* heap); /*!< in: memory heap where created */
/***********************************************************//**
Determines if it is possible to remove a secondary index entry.
Removal is possible if the secondary index entry does not refer to any
not delete marked version of a clustered index record where DB_TRX_ID
is newer than the purge view.
NOTE: This function should only be called by the purge thread, only
while holding a latch on the leaf page of the secondary index entry
(or keeping the buffer pool watch on the page). It is possible that
this function first returns TRUE and then FALSE, if a user transaction
inserts a record that the secondary index entry would refer to.
However, in that case, the user transaction would also re-insert the
secondary index entry after purge has removed it and released the leaf
page latch.
@return TRUE if the secondary index record can be purged */
UNIV_INTERN
ibool
row_purge_poss_sec(
/*===============*/
purge_node_t* node, /*!< in/out: row purge node */
dict_index_t* index, /*!< in: secondary index */
const dtuple_t* entry); /*!< in: secondary index entry */
/***************************************************************
Does the purge operation for a single undo log record. This is a high-level
function used in an SQL execution graph.
@return query thread to run next or NULL */

View file

@ -1,6 +1,6 @@
/*****************************************************************************
Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@ -257,11 +257,25 @@ row_get_clust_rec(
dict_index_t* index, /*!< in: secondary index */
dict_index_t** clust_index,/*!< out: clustered index */
mtr_t* mtr); /*!< in: mtr */
/** Result of row_search_index_entry */
enum row_search_result {
ROW_FOUND = 0, /*!< the record was found */
ROW_NOT_FOUND, /*!< record not found */
ROW_BUFFERED, /*!< one of BTR_INSERT, BTR_DELETE, or
BTR_DELETE_MARK was specified, the
secondary index leaf page was not in
the buffer pool, and the operation was
enqueued in the insert/delete buffer */
ROW_NOT_DELETED_REF, /*!< BTR_DELETE was specified, and
row_purge_poss_sec() failed */
};
/***************************************************************//**
Searches an index record.
@return TRUE if found */
@return whether the record was found or buffered */
UNIV_INTERN
ibool
enum row_search_result
row_search_index_entry(
/*===================*/
dict_index_t* index, /*!< in: index */

View file

@ -1,6 +1,6 @@
/*****************************************************************************
Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
Copyright (c) 1997, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@ -105,17 +105,6 @@ row_fetch_print(
/*============*/
void* row, /*!< in: sel_node_t* */
void* user_arg); /*!< in: not used */
/****************************************************************//**
Callback function for fetch that stores an unsigned 4 byte integer to the
location pointed. The column's type must be DATA_INT, DATA_UNSIGNED, length
= 4.
@return always returns NULL */
UNIV_INTERN
void*
row_fetch_store_uint4(
/*==================*/
void* row, /*!< in: sel_node_t* */
void* user_arg); /*!< in: data pointer */
/***********************************************************//**
Prints a row in a select result.
@return query thread to run next or NULL */

View file

@ -1,6 +1,6 @@
/*****************************************************************************
Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software

View file

@ -1,7 +1,8 @@
/*****************************************************************************
Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved.
Copyright (c) 2008, 2009, Google Inc.
Copyright (c) 2009, Percona Inc.
Portions of this file contain modifications contributed and copyrighted by
Google, Inc. Those modifications are gratefully acknowledged and are described
@ -9,6 +10,13 @@ briefly in the InnoDB documentation. The contributions by Google are
incorporated with their permission, and subject to the conditions contained in
the file COPYING.Google.
Portions of this file contain modifications contributed and copyrighted
by Percona Inc.. Those modifications are
gratefully acknowledged and are described briefly in the InnoDB
documentation. The contributions by Percona Inc. are incorporated with
their permission, and subject to the conditions contained in the file
COPYING.Percona.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.
@ -22,32 +30,6 @@ this program; if not, write to the Free Software Foundation, Inc., 59 Temple
Place, Suite 330, Boston, MA 02111-1307 USA
*****************************************************************************/
/***********************************************************************
Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
Copyright (c) 2009, Percona Inc.
Portions of this file contain modifications contributed and copyrighted
by Percona Inc.. Those modifications are
gratefully acknowledged and are described briefly in the InnoDB
documentation. The contributions by Percona Inc. are incorporated with
their permission, and subject to the conditions contained in the file
COPYING.Percona.
This program is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the
Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
***********************************************************************/
/**************************************************//**
@file include/srv0srv.h
@ -125,6 +107,11 @@ on duplicate key checking and foreign key checking */
extern ibool srv_locks_unsafe_for_binlog;
#endif /* !UNIV_HOTBACKUP */
/* If this flag is TRUE, then we will use the native aio of the
OS (provided we compiled Innobase with it in), otherwise we will
use simulated aio we build below with threads.
Currently we support native aio on windows and linux */
extern my_bool srv_use_native_aio;
extern ulint srv_n_data_files;
extern char** srv_data_file_names;
extern ulint* srv_data_file_sizes;
@ -224,7 +211,8 @@ extern ibool srv_print_innodb_tablespace_monitor;
extern ibool srv_print_verbose_log;
extern ibool srv_print_innodb_table_monitor;
extern ibool srv_lock_timeout_and_monitor_active;
extern ibool srv_lock_timeout_active;
extern ibool srv_monitor_active;
extern ibool srv_error_monitor_active;
extern ulong srv_n_spin_wait_rounds;
@ -283,6 +271,12 @@ extern ulint srv_os_log_pending_writes;
log buffer and have to flush it */
extern ulint srv_log_waits;
/* the number of purge threads to use from the worker pool (currently 0 or 1) */
extern ulint srv_n_purge_threads;
/* the number of records to purge in one batch */
extern ulint srv_purge_batch_size;
/* variable that counts amount of data read in total (in bytes) */
extern ulint srv_data_read;
@ -324,6 +318,37 @@ typedef struct srv_sys_struct srv_sys_t;
/** The server system */
extern srv_sys_t* srv_sys;
# ifdef UNIV_PFS_THREAD
/* Keys to register InnoDB threads with performance schema */
extern mysql_pfs_key_t trx_rollback_clean_thread_key;
extern mysql_pfs_key_t io_handler_thread_key;
extern mysql_pfs_key_t srv_lock_timeout_thread_key;
extern mysql_pfs_key_t srv_error_monitor_thread_key;
extern mysql_pfs_key_t srv_monitor_thread_key;
extern mysql_pfs_key_t srv_master_thread_key;
/* This macro register the current thread and its key with performance
schema */
# define pfs_register_thread(key) \
do { \
if (PSI_server) { \
struct PSI_thread* psi = PSI_server->new_thread(key, NULL, 0);\
if (psi) { \
PSI_server->set_thread(psi); \
} \
} \
} while (0)
/* This macro delist the current thread from performance schema */
# define pfs_delete_thread() \
do { \
if (PSI_server) { \
PSI_server->delete_current_thread(); \
} \
} while (0)
# endif /* UNIV_PFS_THREAD */
#endif /* !UNIV_HOTBACKUP */
/** Types of raw partitions in innodb_data_file_path */
@ -464,6 +489,12 @@ srv_master_thread(
void* arg); /*!< in: a dummy parameter required by
os_thread_create */
/*******************************************************************//**
Wakes up the purge thread if it's not already awake. */
UNIV_INTERN
void
srv_wake_purge_thread(void);
/*=======================*/
/*******************************************************************//**
Tells the Innobase server that there has been activity in the database
and wakes up the master thread if it is suspended (not sleeping). Used
in the MySQL interface. Note that there is a small chance that the master
@ -479,6 +510,16 @@ UNIV_INTERN
void
srv_wake_master_thread(void);
/*========================*/
/*******************************************************************//**
Tells the purge thread that there has been activity in the database
and wakes up the purge thread if it is suspended (not sleeping). Note
that there is a small chance that the purge thread stays suspended
(we do not protect our operation with the kernel mutex, for
performace reasons). */
UNIV_INTERN
void
srv_wake_purge_thread_if_not_active(void);
/*=====================================*/
/*********************************************************************//**
Puts an OS thread to wait if there are too many concurrent threads
(>= srv_thread_concurrency) inside InnoDB. The threads wait in a FIFO queue. */
@ -537,15 +578,23 @@ srv_release_mysql_thread_if_suspended(
MySQL OS thread */
/*********************************************************************//**
A thread which wakes up threads whose lock wait may have lasted too long.
This also prints the info output by various InnoDB monitors.
@return a dummy parameter */
UNIV_INTERN
os_thread_ret_t
srv_lock_timeout_and_monitor_thread(
/*================================*/
srv_lock_timeout_thread(
/*====================*/
void* arg); /*!< in: a dummy parameter required by
os_thread_create */
/*********************************************************************//**
A thread which prints the info output by various InnoDB monitors.
@return a dummy parameter */
UNIV_INTERN
os_thread_ret_t
srv_monitor_thread(
/*===============*/
void* arg); /*!< in: a dummy parameter required by
os_thread_create */
/*************************************************************************
A thread which prints warnings about semaphore waits which have lasted
too long. These can be used to track bugs which cause hangs.
@return a dummy parameter */
@ -556,12 +605,15 @@ srv_error_monitor_thread(
void* arg); /*!< in: a dummy parameter required by
os_thread_create */
/******************************************************************//**
Outputs to a file the output of the InnoDB Monitor. */
Outputs to a file the output of the InnoDB Monitor.
@return FALSE if not all information printed
due to failure to obtain necessary mutex */
UNIV_INTERN
void
ibool
srv_printf_innodb_monitor(
/*======================*/
FILE* file, /*!< in: output stream */
ibool nowait, /*!< in: whether to wait for kernel mutex */
ulint* trx_start, /*!< out: file position of the start of
the list of active transactions */
ulint* trx_end); /*!< out: file position of the end of
@ -574,6 +626,15 @@ void
srv_export_innodb_status(void);
/*==========================*/
/*********************************************************************//**
Asynchronous purge thread.
@return a dummy parameter */
UNIV_INTERN
os_thread_ret_t
srv_purge_thread(
/*=============*/
void* arg __attribute__((unused))); /*!< in: a dummy parameter
required by os_thread_create */
/** Thread slot in the thread table */
typedef struct srv_slot_struct srv_slot_t;
@ -643,8 +704,9 @@ struct srv_sys_struct{
extern ulint srv_n_threads_active[];
#else /* !UNIV_HOTBACKUP */
# define srv_use_checksums TRUE
# define srv_use_adaptive_hash_indexes FALSE
# define srv_use_checksums TRUE
# define srv_use_native_aio FALSE
# define srv_force_recovery 0UL
# define srv_set_io_thread_op_info(t,info) ((void) 0)
# define srv_is_being_started 0

View file

@ -1,6 +1,6 @@
/*****************************************************************************
Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved.
Copyright (c) 2008, Google Inc.
Portions of this file contain modifications contributed and copyrighted by
@ -105,23 +105,140 @@ extern ib_int64_t rw_x_os_wait_count;
set only when UNIV_SYNC_PERF_STAT is defined */
extern ib_int64_t rw_x_exit_count;
#ifdef UNIV_PFS_RWLOCK
/* Following are rwlock keys used to register with MySQL
performance schema */
# ifdef UNIV_LOG_ARCHIVE
extern mysql_pfs_key_t archive_lock_key;
# endif /* UNIV_LOG_ARCHIVE */
extern mysql_pfs_key_t btr_search_latch_key;
extern mysql_pfs_key_t buf_block_lock_key;
# ifdef UNIV_SYNC_DEBUG
extern mysql_pfs_key_t buf_block_debug_latch_key;
# endif /* UNIV_SYNC_DEBUG */
extern mysql_pfs_key_t dict_operation_lock_key;
extern mysql_pfs_key_t fil_space_latch_key;
extern mysql_pfs_key_t checkpoint_lock_key;
extern mysql_pfs_key_t trx_i_s_cache_lock_key;
extern mysql_pfs_key_t trx_purge_latch_key;
extern mysql_pfs_key_t index_tree_rw_lock_key;
#endif /* UNIV_PFS_RWLOCK */
#ifndef UNIV_PFS_RWLOCK
/******************************************************************//**
Creates, or rather, initializes an rw-lock object in a specified memory
location (which must be appropriately aligned). The rw-lock is initialized
to the non-locked state. Explicit freeing of the rw-lock with rw_lock_free
is necessary only if the memory block containing it is freed. */
#ifdef UNIV_DEBUG
# ifdef UNIV_SYNC_DEBUG
# define rw_lock_create(L, level) \
is necessary only if the memory block containing it is freed.
if MySQL performance schema is enabled and "UNIV_PFS_RWLOCK" is
defined, the rwlock are instrumented with performance schema probes. */
# ifdef UNIV_DEBUG
# ifdef UNIV_SYNC_DEBUG
# define rw_lock_create(K, L, level) \
rw_lock_create_func((L), (level), #L, __FILE__, __LINE__)
# else /* UNIV_SYNC_DEBUG */
# define rw_lock_create(L, level) \
# else /* UNIV_SYNC_DEBUG */
# define rw_lock_create(K, L, level) \
rw_lock_create_func((L), #L, __FILE__, __LINE__)
# endif /* UNIV_SYNC_DEBUG */
#else /* UNIV_DEBUG */
# define rw_lock_create(L, level) \
# endif/* UNIV_SYNC_DEBUG */
# else /* UNIV_DEBUG */
# define rw_lock_create(K, L, level) \
rw_lock_create_func((L), __FILE__, __LINE__)
#endif /* UNIV_DEBUG */
# endif /* UNIV_DEBUG */
/**************************************************************//**
NOTE! The following macros should be used in rw locking and
unlocking, not the corresponding function. */
# define rw_lock_s_lock(M) \
rw_lock_s_lock_func((M), 0, __FILE__, __LINE__)
# define rw_lock_s_lock_gen(M, P) \
rw_lock_s_lock_func((M), (P), __FILE__, __LINE__)
# define rw_lock_s_lock_nowait(M, F, L) \
rw_lock_s_lock_low((M), 0, (F), (L))
# ifdef UNIV_SYNC_DEBUG
# define rw_lock_s_unlock_gen(L, P) rw_lock_s_unlock_func(P, L)
# else
# define rw_lock_s_unlock_gen(L, P) rw_lock_s_unlock_func(L)
# endif
# define rw_lock_x_lock(M) \
rw_lock_x_lock_func((M), 0, __FILE__, __LINE__)
# define rw_lock_x_lock_gen(M, P) \
rw_lock_x_lock_func((M), (P), __FILE__, __LINE__)
# define rw_lock_x_lock_nowait(M) \
rw_lock_x_lock_func_nowait((M), __FILE__, __LINE__)
# ifdef UNIV_SYNC_DEBUG
# define rw_lock_x_unlock_gen(L, P) rw_lock_x_unlock_func(P, L)
# else
# define rw_lock_x_unlock_gen(L, P) rw_lock_x_unlock_func(L)
# endif
# define rw_lock_free(M) rw_lock_free_func(M)
#else /* !UNIV_PFS_RWLOCK */
/* Following macros point to Performance Schema instrumented functions. */
# ifdef UNIV_DEBUG
# ifdef UNIV_SYNC_DEBUG
# define rw_lock_create(K, L, level) \
pfs_rw_lock_create_func((K), (L), (level), #L, __FILE__, __LINE__)
# else /* UNIV_SYNC_DEBUG */
# define rw_lock_create(K, L, level) \
pfs_rw_lock_create_func((K), (L), #L, __FILE__, __LINE__)
# endif/* UNIV_SYNC_DEBUG */
# else /* UNIV_DEBUG */
# define rw_lock_create(K, L, level) \
pfs_rw_lock_create_func((K), (L), __FILE__, __LINE__)
# endif /* UNIV_DEBUG */
/******************************************************************
NOTE! The following macros should be used in rw locking and
unlocking, not the corresponding function. */
# define rw_lock_s_lock(M) \
pfs_rw_lock_s_lock_func((M), 0, __FILE__, __LINE__)
# define rw_lock_s_lock_gen(M, P) \
pfs_rw_lock_s_lock_func((M), (P), __FILE__, __LINE__)
# define rw_lock_s_lock_nowait(M, F, L) \
pfs_rw_lock_s_lock_low((M), 0, (F), (L))
# ifdef UNIV_SYNC_DEBUG
# define rw_lock_s_unlock_gen(L, P) pfs_rw_lock_s_unlock_func(P, L)
# else
# define rw_lock_s_unlock_gen(L, P) pfs_rw_lock_s_unlock_func(L)
# endif
# define rw_lock_x_lock(M) \
pfs_rw_lock_x_lock_func((M), 0, __FILE__, __LINE__)
# define rw_lock_x_lock_gen(M, P) \
pfs_rw_lock_x_lock_func((M), (P), __FILE__, __LINE__)
# define rw_lock_x_lock_nowait(M) \
pfs_rw_lock_x_lock_func_nowait((M), __FILE__, __LINE__)
# ifdef UNIV_SYNC_DEBUG
# define rw_lock_x_unlock_gen(L, P) pfs_rw_lock_x_unlock_func(P, L)
# else
# define rw_lock_x_unlock_gen(L, P) pfs_rw_lock_x_unlock_func(L)
# endif
# define rw_lock_free(M) pfs_rw_lock_free_func(M)
#endif /* UNIV_PFS_RWLOCK */
#define rw_lock_s_unlock(L) rw_lock_s_unlock_gen(L, 0)
#define rw_lock_x_unlock(L) rw_lock_x_unlock_gen(L, 0)
/******************************************************************//**
Creates, or rather, initializes an rw-lock object in a specified memory
@ -137,18 +254,18 @@ rw_lock_create_func(
# ifdef UNIV_SYNC_DEBUG
ulint level, /*!< in: level */
# endif /* UNIV_SYNC_DEBUG */
const char* cmutex_name, /*!< in: mutex name */
const char* cmutex_name, /*!< in: mutex name */
#endif /* UNIV_DEBUG */
const char* cfile_name, /*!< in: file name where created */
ulint cline); /*!< in: file line where created */
ulint cline); /*!< in: file line where created */
/******************************************************************//**
Calling this function is obligatory only if the memory buffer containing
the rw-lock is freed. Removes an rw-lock object from the global list. The
rw-lock is checked to be in the non-locked state. */
UNIV_INTERN
void
rw_lock_free(
/*=========*/
rw_lock_free_func(
/*==============*/
rw_lock_t* lock); /*!< in: rw-lock */
#ifdef UNIV_DEBUG
/******************************************************************//**
@ -161,24 +278,6 @@ rw_lock_validate(
/*=============*/
rw_lock_t* lock); /*!< in: rw-lock */
#endif /* UNIV_DEBUG */
/**************************************************************//**
NOTE! The following macros should be used in rw s-locking, not the
corresponding function. */
#define rw_lock_s_lock(M) rw_lock_s_lock_func(\
(M), 0, __FILE__, __LINE__)
/**************************************************************//**
NOTE! The following macros should be used in rw s-locking, not the
corresponding function. */
#define rw_lock_s_lock_gen(M, P) rw_lock_s_lock_func(\
(M), (P), __FILE__, __LINE__)
/**************************************************************//**
NOTE! The following macros should be used in rw s-locking, not the
corresponding function. */
#define rw_lock_s_lock_nowait(M, F, L) rw_lock_s_lock_low(\
(M), 0, (F), (L))
/******************************************************************//**
Low-level function which tries to lock an rw-lock in s-mode. Performs no
spinning.
@ -233,33 +332,6 @@ rw_lock_s_unlock_func(
#endif
rw_lock_t* lock); /*!< in/out: rw-lock */
#ifdef UNIV_SYNC_DEBUG
# define rw_lock_s_unlock_gen(L, P) rw_lock_s_unlock_func(P, L)
#else
# define rw_lock_s_unlock_gen(L, P) rw_lock_s_unlock_func(L)
#endif
/*******************************************************************//**
Releases a shared mode lock. */
#define rw_lock_s_unlock(L) rw_lock_s_unlock_gen(L, 0)
/**************************************************************//**
NOTE! The following macro should be used in rw x-locking, not the
corresponding function. */
#define rw_lock_x_lock(M) rw_lock_x_lock_func(\
(M), 0, __FILE__, __LINE__)
/**************************************************************//**
NOTE! The following macro should be used in rw x-locking, not the
corresponding function. */
#define rw_lock_x_lock_gen(M, P) rw_lock_x_lock_func(\
(M), (P), __FILE__, __LINE__)
/**************************************************************//**
NOTE! The following macros should be used in rw x-locking, not the
corresponding function. */
#define rw_lock_x_lock_nowait(M) rw_lock_x_lock_func_nowait(\
(M), __FILE__, __LINE__)
/******************************************************************//**
NOTE! Use the corresponding macro, not directly this function! Lock an
rw-lock in exclusive mode for the current thread. If the rw-lock is locked
@ -290,14 +362,6 @@ rw_lock_x_unlock_func(
#endif
rw_lock_t* lock); /*!< in/out: rw-lock */
#ifdef UNIV_SYNC_DEBUG
# define rw_lock_x_unlock_gen(L, P) rw_lock_x_unlock_func(P, L)
#else
# define rw_lock_x_unlock_gen(L, P) rw_lock_x_unlock_func(L)
#endif
/*******************************************************************//**
Releases an exclusive mode lock. */
#define rw_lock_x_unlock(L) rw_lock_x_unlock_gen(L, 0)
/******************************************************************//**
Low-level function which locks an rw-lock in s-mode when we know that it
@ -429,8 +493,9 @@ ibool
rw_lock_own(
/*========*/
rw_lock_t* lock, /*!< in: rw-lock */
ulint lock_type); /*!< in: lock type: RW_LOCK_SHARED,
ulint lock_type) /*!< in: lock type: RW_LOCK_SHARED,
RW_LOCK_EX */
__attribute__((warn_unused_result));
#endif /* UNIV_SYNC_DEBUG */
/******************************************************************//**
Checks if somebody has locked the rw-lock in the specified mode. */
@ -539,6 +604,9 @@ struct rw_lock_struct {
info list of the lock */
ulint level; /*!< Level in the global latching order. */
#endif /* UNIV_SYNC_DEBUG */
#ifdef UNIV_PFS_RWLOCK
struct PSI_rwlock *pfs_psi;/*!< The instrumentation hook */
#endif
ulint count_os_wait; /*!< Count of os_waits. May not be accurate */
const char* cfile_name;/*!< File name where lock created */
/* last s-lock file/line is not guaranteed to be correct */
@ -577,6 +645,160 @@ struct rw_lock_debug_struct {
};
#endif /* UNIV_SYNC_DEBUG */
/* For performance schema instrumentation, a new set of rwlock
wrap functions are created if "UNIV_PFS_RWLOCK" is defined.
The instrumentations are not planted directly into original
functions, so that we keep the underlying function as they
are. And in case, user wants to "take out" some rwlock from
instrumentation even if performance schema (UNIV_PFS_RWLOCK)
is defined, they can do so by reinstating APIs directly link to
original underlying functions.
The instrumented function names have prefix of "pfs_rw_lock_" vs.
original name prefix of "rw_lock_". Following are list of functions
that have been instrumented:
rw_lock_create()
rw_lock_x_lock()
rw_lock_x_lock_gen()
rw_lock_x_lock_nowait()
rw_lock_x_unlock_gen()
rw_lock_s_lock()
rw_lock_s_lock_gen()
rw_lock_s_lock_nowait()
rw_lock_s_unlock_gen()
rw_lock_free()
Two function APIs rw_lock_x_unlock_direct() and rw_lock_s_unlock_direct()
do not have any caller/user, they are not instrumented.
*/
#ifdef UNIV_PFS_RWLOCK
/******************************************************************//**
Performance schema instrumented wrap function for rw_lock_create_func()
NOTE! Please use the corresponding macro rw_lock_create(), not
directly this function! */
UNIV_INLINE
void
pfs_rw_lock_create_func(
/*====================*/
PSI_rwlock_key key, /*!< in: key registered with
performance schema */
rw_lock_t* lock, /*!< in: rw lock */
#ifdef UNIV_DEBUG
# ifdef UNIV_SYNC_DEBUG
ulint level, /*!< in: level */
# endif /* UNIV_SYNC_DEBUG */
const char* cmutex_name, /*!< in: mutex name */
#endif /* UNIV_DEBUG */
const char* cfile_name, /*!< in: file name where created */
ulint cline); /*!< in: file line where created */
/******************************************************************//**
Performance schema instrumented wrap function for rw_lock_x_lock_func()
NOTE! Please use the corresponding macro rw_lock_x_lock(), not
directly this function! */
UNIV_INLINE
void
pfs_rw_lock_x_lock_func(
/*====================*/
rw_lock_t* lock, /*!< in: pointer to rw-lock */
ulint pass, /*!< in: pass value; != 0, if the lock will
be passed to another thread to unlock */
const char* file_name,/*!< in: file name where lock requested */
ulint line); /*!< in: line where requested */
/******************************************************************//**
Performance schema instrumented wrap function for
rw_lock_x_lock_func_nowait()
NOTE! Please use the corresponding macro, not directly this function!
@return TRUE if success */
UNIV_INLINE
ibool
pfs_rw_lock_x_lock_func_nowait(
/*===========================*/
rw_lock_t* lock, /*!< in: pointer to rw-lock */
const char* file_name,/*!< in: file name where lock requested */
ulint line); /*!< in: line where requested */
/******************************************************************//**
Performance schema instrumented wrap function for rw_lock_s_lock_func()
NOTE! Please use the corresponding macro rw_lock_s_lock(), not directly
this function! */
UNIV_INLINE
void
pfs_rw_lock_s_lock_func(
/*====================*/
rw_lock_t* lock, /*!< in: pointer to rw-lock */
ulint pass, /*!< in: pass value; != 0, if the lock will
be passed to another thread to unlock */
const char* file_name,/*!< in: file name where lock requested */
ulint line); /*!< in: line where requested */
/******************************************************************//**
Performance schema instrumented wrap function for rw_lock_s_lock_func()
NOTE! Please use the corresponding macro rw_lock_s_lock(), not directly
this function!
@return TRUE if success */
UNIV_INLINE
ibool
pfs_rw_lock_s_lock_low(
/*===================*/
rw_lock_t* lock, /*!< in: pointer to rw-lock */
ulint pass, /*!< in: pass value; != 0, if the
lock will be passed to another
thread to unlock */
const char* file_name, /*!< in: file name where lock requested */
ulint line); /*!< in: line where requested */
/******************************************************************//**
Performance schema instrumented wrap function for rw_lock_x_lock_func()
NOTE! Please use the corresponding macro rw_lock_x_lock(), not directly
this function! */
UNIV_INLINE
void
pfs_rw_lock_x_lock_func(
/*====================*/
rw_lock_t* lock, /*!< in: pointer to rw-lock */
ulint pass, /*!< in: pass value; != 0, if the lock will
be passed to another thread to unlock */
const char* file_name,/*!< in: file name where lock requested */
ulint line); /*!< in: line where requested */
/******************************************************************//**
Performance schema instrumented wrap function for rw_lock_s_unlock_func()
NOTE! Please use the corresponding macro rw_lock_s_unlock(), not directly
this function! */
UNIV_INLINE
void
pfs_rw_lock_s_unlock_func(
/*======================*/
#ifdef UNIV_SYNC_DEBUG
ulint pass, /*!< in: pass value; != 0, if the
lock may have been passed to another
thread to unlock */
#endif
rw_lock_t* lock); /*!< in/out: rw-lock */
/******************************************************************//**
Performance schema instrumented wrap function for rw_lock_s_unlock_func()
NOTE! Please use the corresponding macro rw_lock_x_unlock(), not directly
this function! */
UNIV_INLINE
void
pfs_rw_lock_x_unlock_func(
/*======================*/
#ifdef UNIV_SYNC_DEBUG
ulint pass, /*!< in: pass value; != 0, if the
lock may have been passed to another
thread to unlock */
#endif
rw_lock_t* lock); /*!< in/out: rw-lock */
/******************************************************************//**
Performance schema instrumented wrap function for rw_lock_free_func()
NOTE! Please use the corresponding macro rw_lock_free(), not directly
this function! */
UNIV_INLINE
void
pfs_rw_lock_free_func(
/*==================*/
rw_lock_t* lock); /*!< in: rw-lock */
#endif /* UNIV_PFS_RWLOCK */
#ifndef UNIV_NONINL
#include "sync0rw.ic"
#endif

View file

@ -622,3 +622,265 @@ rw_lock_x_unlock_direct(
rw_x_exit_count++;
#endif
}
#ifdef UNIV_PFS_RWLOCK
/******************************************************************//**
Performance schema instrumented wrap function for rw_lock_create_func().
NOTE! Please use the corresponding macro rw_lock_create(), not directly
this function! */
UNIV_INLINE
void
pfs_rw_lock_create_func(
/*====================*/
mysql_pfs_key_t key, /*!< in: key registered with
performance schema */
rw_lock_t* lock, /*!< in: pointer to memory */
# ifdef UNIV_DEBUG
# ifdef UNIV_SYNC_DEBUG
ulint level, /*!< in: level */
# endif /* UNIV_SYNC_DEBUG */
const char* cmutex_name, /*!< in: mutex name */
# endif /* UNIV_DEBUG */
const char* cfile_name, /*!< in: file name where created */
ulint cline) /*!< in: file line where created */
{
/* Initialize the rwlock for performance schema */
lock->pfs_psi = (PSI_server && PFS_IS_INSTRUMENTED(key))
? PSI_server->init_rwlock(key, lock)
: NULL;
/* The actual function to initialize an rwlock */
rw_lock_create_func(lock,
# ifdef UNIV_DEBUG
# ifdef UNIV_SYNC_DEBUG
level,
# endif /* UNIV_SYNC_DEBUG */
cmutex_name,
# endif /* UNIV_DEBUG */
cfile_name,
cline);
}
/******************************************************************//**
Performance schema instrumented wrap function for rw_lock_x_lock_func()
NOTE! Please use the corresponding macro rw_lock_x_lock(), not directly
this function! */
UNIV_INLINE
void
pfs_rw_lock_x_lock_func(
/*====================*/
rw_lock_t* lock, /*!< in: pointer to rw-lock */
ulint pass, /*!< in: pass value; != 0, if the lock will
be passed to another thread to unlock */
const char* file_name,/*!< in: file name where lock requested */
ulint line) /*!< in: line where requested */
{
struct PSI_rwlock_locker* locker = NULL;
/* Record the entry of rw x lock request in performance schema */
if (UNIV_LIKELY(PSI_server && lock->pfs_psi)) {
locker = PSI_server->get_thread_rwlock_locker(
lock->pfs_psi, PSI_RWLOCK_WRITELOCK);
if (locker) {
PSI_server->start_rwlock_wrwait(locker,
file_name, line);
}
}
rw_lock_x_lock_func(lock, pass, file_name, line);
if (locker) {
PSI_server->end_rwlock_wrwait(locker, 0);
}
}
/******************************************************************//**
Performance schema instrumented wrap function for
rw_lock_x_lock_func_nowait()
NOTE! Please use the corresponding macro rw_lock_x_lock_func(),
not directly this function!
@return TRUE if success */
UNIV_INLINE
ibool
pfs_rw_lock_x_lock_func_nowait(
/*===========================*/
rw_lock_t* lock, /*!< in: pointer to rw-lock */
const char* file_name,/*!< in: file name where lock
requested */
ulint line) /*!< in: line where requested */
{
struct PSI_rwlock_locker* locker = NULL;
ibool ret;
/* Record the entry of rw x lock request in performance schema */
if (UNIV_LIKELY(PSI_server && lock->pfs_psi)) {
locker = PSI_server->get_thread_rwlock_locker(
lock->pfs_psi, PSI_RWLOCK_WRITELOCK);
if (locker) {
PSI_server->start_rwlock_wrwait(locker,
file_name, line);
}
}
ret = rw_lock_x_lock_func_nowait(lock, file_name, line);
if (locker) {
PSI_server->end_rwlock_wrwait(locker, 0);
}
return(ret);
}
/******************************************************************//**
Performance schema instrumented wrap function for rw_lock_free_func()
NOTE! Please use the corresponding macro rw_lock_free(), not directly
this function! */
UNIV_INLINE
void
pfs_rw_lock_free_func(
/*==================*/
rw_lock_t* lock) /*!< in: pointer to rw-lock */
{
if (UNIV_LIKELY(PSI_server && lock->pfs_psi)) {
PSI_server->destroy_rwlock(lock->pfs_psi);
lock->pfs_psi = NULL;
}
rw_lock_free_func(lock);
}
/******************************************************************//**
Performance schema instrumented wrap function for rw_lock_s_lock_func()
NOTE! Please use the corresponding macro rw_lock_s_lock(), not
directly this function! */
UNIV_INLINE
void
pfs_rw_lock_s_lock_func(
/*====================*/
rw_lock_t* lock, /*!< in: pointer to rw-lock */
ulint pass, /*!< in: pass value; != 0, if the
lock will be passed to another
thread to unlock */
const char* file_name,/*!< in: file name where lock
requested */
ulint line) /*!< in: line where requested */
{
struct PSI_rwlock_locker* locker = NULL;
/* Instrumented to inform we are aquiring a shared rwlock */
if (UNIV_LIKELY(PSI_server && lock->pfs_psi)) {
locker = PSI_server->get_thread_rwlock_locker(
lock->pfs_psi, PSI_RWLOCK_READLOCK);
if (locker) {
PSI_server->start_rwlock_rdwait(locker,
file_name, line);
}
}
rw_lock_s_lock_func(lock, pass, file_name, line);
if (locker) {
PSI_server->end_rwlock_rdwait(locker, 0);
}
}
/******************************************************************//**
Performance schema instrumented wrap function for rw_lock_s_lock_func()
NOTE! Please use the corresponding macro rw_lock_s_lock(), not
directly this function!
@return TRUE if success */
UNIV_INLINE
ibool
pfs_rw_lock_s_lock_low(
/*===================*/
rw_lock_t* lock, /*!< in: pointer to rw-lock */
ulint pass, /*!< in: pass value; != 0, if the
lock will be passed to another
thread to unlock */
const char* file_name, /*!< in: file name where lock requested */
ulint line) /*!< in: line where requested */
{
struct PSI_rwlock_locker* locker = NULL;
ibool ret;
/* Instrumented to inform we are aquiring a shared rwlock */
if (UNIV_LIKELY(PSI_server && lock->pfs_psi)) {
locker = PSI_server->get_thread_rwlock_locker(
lock->pfs_psi, PSI_RWLOCK_READLOCK);
if (locker) {
PSI_server->start_rwlock_rdwait(locker,
file_name, line);
}
}
ret = rw_lock_s_lock_low(lock, pass, file_name, line);
if (locker) {
PSI_server->end_rwlock_rdwait(locker, 0);
}
return(ret);
}
/******************************************************************//**
Performance schema instrumented wrap function for rw_lock_x_unlock_func()
NOTE! Please use the corresponding macro rw_lock_x_unlock(), not directly
this function! */
UNIV_INLINE
void
pfs_rw_lock_x_unlock_func(
/*======================*/
#ifdef UNIV_SYNC_DEBUG
ulint pass, /*!< in: pass value; != 0, if the
lock may have been passed to another
thread to unlock */
#endif
rw_lock_t* lock) /*!< in/out: rw-lock */
{
/* Inform performance schema we are unlocking the lock */
if (UNIV_LIKELY(PSI_server && lock->pfs_psi)) {
struct PSI_thread* thread;
thread = PSI_server->get_thread();
if (thread) {
PSI_server->unlock_rwlock(thread, lock->pfs_psi);
}
}
rw_lock_x_unlock_func(
#ifdef UNIV_SYNC_DEBUG
pass,
#endif
lock);
}
/******************************************************************//**
Performance schema instrumented wrap function for rw_lock_s_unlock_func()
NOTE! Please use the corresponding macro pfs_rw_lock_s_unlock(), not
directly this function! */
UNIV_INLINE
void
pfs_rw_lock_s_unlock_func(
/*======================*/
#ifdef UNIV_SYNC_DEBUG
ulint pass, /*!< in: pass value; != 0, if the
lock may have been passed to another
thread to unlock */
#endif
rw_lock_t* lock) /*!< in/out: rw-lock */
{
/* Inform performance schema we are unlocking the lock */
if (UNIV_LIKELY(PSI_server && lock->pfs_psi)) {
struct PSI_thread* thread;
thread = PSI_server->get_thread();
if (thread) {
PSI_server->unlock_rwlock(thread, lock->pfs_psi);
}
}
rw_lock_s_unlock_func(
#ifdef UNIV_SYNC_DEBUG
pass,
#endif
lock);
}
#endif /* UNIV_PFS_RWLOCK */

View file

@ -1,6 +1,6 @@
/*****************************************************************************
Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved.
Copyright (c) 2008, Google Inc.
Portions of this file contain modifications contributed and copyrighted by
@ -52,6 +52,69 @@ typedef LONG lock_word_t; /*!< On Windows, InterlockedExchange operates
typedef byte lock_word_t;
#endif
#if defined UNIV_PFS_MUTEX || defined UNIV_PFS_RWLOCK
/* There are mutexes/rwlocks that we want to exclude from
instrumentation even if their corresponding performance schema
define is set. And this PFS_NOT_INSTRUMENTED is used
as the key value to dentify those objects that would
be excluded from instrumentation. */
# define PFS_NOT_INSTRUMENTED ULINT32_UNDEFINED
# define PFS_IS_INSTRUMENTED(key) ((key) != PFS_NOT_INSTRUMENTED)
/* By default, buffer mutexes and rwlocks will be excluded from
instrumentation due to their large number of instances. */
# define PFS_SKIP_BUFFER_MUTEX_RWLOCK
#endif /* UNIV_PFS_MUTEX || UNIV_PFS_RWLOCK */
#ifdef UNIV_PFS_MUTEX
/* Key defines to register InnoDB mutexes with performance schema */
extern mysql_pfs_key_t autoinc_mutex_key;
extern mysql_pfs_key_t btr_search_enabled_mutex_key;
extern mysql_pfs_key_t buffer_block_mutex_key;
extern mysql_pfs_key_t buf_pool_mutex_key;
extern mysql_pfs_key_t buf_pool_zip_mutex_key;
extern mysql_pfs_key_t cache_last_read_mutex_key;
extern mysql_pfs_key_t dict_foreign_err_mutex_key;
extern mysql_pfs_key_t dict_sys_mutex_key;
extern mysql_pfs_key_t file_format_max_mutex_key;
extern mysql_pfs_key_t fil_system_mutex_key;
extern mysql_pfs_key_t flush_list_mutex_key;
extern mysql_pfs_key_t flush_order_mutex_key;
extern mysql_pfs_key_t hash_table_mutex_key;
extern mysql_pfs_key_t ibuf_bitmap_mutex_key;
extern mysql_pfs_key_t ibuf_mutex_key;
extern mysql_pfs_key_t ibuf_pessimistic_insert_mutex_key;
extern mysql_pfs_key_t ios_mutex_key;
extern mysql_pfs_key_t log_sys_mutex_key;
extern mysql_pfs_key_t kernel_mutex_key;
# ifdef UNIV_MEM_DEBUG
extern mysql_pfs_key_t mem_hash_mutex_key;
# endif /* UNIV_MEM_DEBUG */
extern mysql_pfs_key_t mem_pool_mutex_key;
extern mysql_pfs_key_t mutex_list_mutex_key;
extern mysql_pfs_key_t purge_sys_mutex_key;
extern mysql_pfs_key_t recv_sys_mutex_key;
extern mysql_pfs_key_t rseg_mutex_key;
# ifdef UNIV_SYNC_DEBUG
extern mysql_pfs_key_t rw_lock_debug_mutex_key;
# endif /* UNIV_SYNC_DEBUG */
extern mysql_pfs_key_t rw_lock_list_mutex_key;
extern mysql_pfs_key_t rw_lock_mutex_key;
extern mysql_pfs_key_t srv_dict_tmpfile_mutex_key;
extern mysql_pfs_key_t srv_innodb_monitor_mutex_key;
extern mysql_pfs_key_t srv_misc_tmpfile_mutex_key;
extern mysql_pfs_key_t srv_monitor_file_mutex_key;
extern mysql_pfs_key_t syn_arr_mutex_key;
# ifdef UNIV_SYNC_DEBUG
extern mysql_pfs_key_t sync_thread_mutex_key;
# endif /* UNIV_SYNC_DEBUG */
extern mysql_pfs_key_t trx_doublewrite_mutex_key;
extern mysql_pfs_key_t thr_local_mutex_key;
extern mysql_pfs_key_t trx_undo_mutex_key;
#endif /* UNIV_PFS_MUTEX */
/******************************************************************//**
Initializes the synchronization data structures. */
UNIV_INTERN
@ -64,24 +127,82 @@ UNIV_INTERN
void
sync_close(void);
/*===========*/
#undef mutex_free /* Fix for MacOS X */
#ifdef UNIV_PFS_MUTEX
/**********************************************************************
Following mutex APIs would be performance schema instrumented
if "UNIV_PFS_MUTEX" is defined:
mutex_create
mutex_enter
mutex_exit
mutex_enter_nowait
mutex_free
These mutex APIs will point to corresponding wrapper functions that contain
the performance schema instrumentation if "UNIV_PFS_MUTEX" is defined.
The instrumented wrapper functions have the prefix of "innodb_".
NOTE! The following macro should be used in mutex operation, not the
corresponding function. */
/******************************************************************//**
Creates, or rather, initializes a mutex object to a specified memory
location (which must be appropriately aligned). The mutex is initialized
in the reset state. Explicit freeing of the mutex with mutex_free is
necessary only if the memory block containing it is freed. */
#ifdef UNIV_DEBUG
# ifdef UNIV_SYNC_DEBUG
# define mutex_create(M, level) \
mutex_create_func((M), #M, (level), __FILE__, __LINE__)
# ifdef UNIV_DEBUG
# ifdef UNIV_SYNC_DEBUG
# define mutex_create(K, M, level) \
pfs_mutex_create_func((K), (M), #M, (level), __FILE__, __LINE__)
# else
# define mutex_create(K, M, level) \
pfs_mutex_create_func((K), (M), #M, __FILE__, __LINE__)
# endif/* UNIV_SYNC_DEBUG */
# else
# define mutex_create(M, level) \
# define mutex_create(K, M, level) \
pfs_mutex_create_func((K), (M), __FILE__, __LINE__)
# endif /* UNIV_DEBUG */
# define mutex_enter(M) \
pfs_mutex_enter_func((M), __FILE__, __LINE__)
# define mutex_enter_nowait(M) \
pfs_mutex_enter_nowait_func((M), __FILE__, __LINE__)
# define mutex_exit(M) pfs_mutex_exit_func(M)
# define mutex_free(M) pfs_mutex_free_func(M)
#else /* UNIV_PFS_MUTEX */
/* If "UNIV_PFS_MUTEX" is not defined, the mutex APIs point to
original non-instrumented functions */
# ifdef UNIV_DEBUG
# ifdef UNIV_SYNC_DEBUG
# define mutex_create(K, M, level) \
mutex_create_func((M), #M, (level), __FILE__, __LINE__)
# else /* UNIV_SYNC_DEBUG */
# define mutex_create(K, M, level) \
mutex_create_func((M), #M, __FILE__, __LINE__)
# endif
#else
# define mutex_create(M, level) \
# endif /* UNIV_SYNC_DEBUG */
# else /* UNIV_DEBUG */
# define mutex_create(K, M, level) \
mutex_create_func((M), __FILE__, __LINE__)
#endif
# endif /* UNIV_DEBUG */
# define mutex_enter(M) mutex_enter_func((M), __FILE__, __LINE__)
# define mutex_enter_nowait(M) \
mutex_enter_nowait_func((M), __FILE__, __LINE__)
# define mutex_exit(M) mutex_exit_func(M)
# define mutex_free(M) mutex_free_func(M)
#endif /* UNIV_PFS_MUTEX */
/******************************************************************//**
Creates, or rather, initializes a mutex object in a specified memory
@ -102,26 +223,20 @@ mutex_create_func(
const char* cfile_name, /*!< in: file name where created */
ulint cline); /*!< in: file line where created */
#undef mutex_free /* Fix for MacOS X */
/******************************************************************//**
NOTE! Use the corresponding macro mutex_free(), not directly this function!
Calling this function is obligatory only if the memory buffer containing
the mutex is freed. Removes a mutex object from the mutex list. The mutex
is checked to be in the reset state. */
UNIV_INTERN
void
mutex_free(
/*=======*/
mutex_free_func(
/*============*/
mutex_t* mutex); /*!< in: mutex */
/**************************************************************//**
NOTE! The following macro should be used in mutex locking, not the
corresponding function. */
#define mutex_enter(M) mutex_enter_func((M), __FILE__, __LINE__)
/**************************************************************//**
NOTE! The following macro should be used in mutex locking, not the
corresponding function. */
/* NOTE! currently same as mutex_enter! */
#define mutex_enter_fast(M) mutex_enter_func((M), __FILE__, __LINE__)
@ -137,12 +252,6 @@ mutex_enter_func(
mutex_t* mutex, /*!< in: pointer to mutex */
const char* file_name, /*!< in: file name where locked */
ulint line); /*!< in: line where locked */
/**************************************************************//**
NOTE! The following macro should be used in mutex locking, not the
corresponding function. */
#define mutex_enter_nowait(M) \
mutex_enter_nowait_func((M), __FILE__, __LINE__)
/********************************************************************//**
NOTE! Use the corresponding macro in the header file, not this function
directly. Tries to lock the mutex for the current thread. If the lock is not
@ -157,12 +266,86 @@ mutex_enter_nowait_func(
requested */
ulint line); /*!< in: line where requested */
/******************************************************************//**
NOTE! Use the corresponding macro mutex_exit(), not directly this function!
Unlocks a mutex owned by the current thread. */
UNIV_INLINE
void
mutex_exit(
/*=======*/
mutex_exit_func(
/*============*/
mutex_t* mutex); /*!< in: pointer to mutex */
#ifdef UNIV_PFS_MUTEX
/******************************************************************//**
NOTE! Please use the corresponding macro mutex_create(), not directly
this function!
A wrapper function for mutex_create_func(), registers the mutex
with peformance schema if "UNIV_PFS_MUTEX" is defined when
creating the mutex */
UNIV_INLINE
void
pfs_mutex_create_func(
/*==================*/
PSI_mutex_key key, /*!< in: Performance Schema key */
mutex_t* mutex, /*!< in: pointer to memory */
# ifdef UNIV_DEBUG
const char* cmutex_name, /*!< in: mutex name */
# ifdef UNIV_SYNC_DEBUG
ulint level, /*!< in: level */
# endif /* UNIV_SYNC_DEBUG */
# endif /* UNIV_DEBUG */
const char* cfile_name, /*!< in: file name where created */
ulint cline); /*!< in: file line where created */
/******************************************************************//**
NOTE! Please use the corresponding macro mutex_enter(), not directly
this function!
This is a performance schema instrumented wrapper function for
mutex_enter_func(). */
UNIV_INLINE
void
pfs_mutex_enter_func(
/*=================*/
mutex_t* mutex, /*!< in: pointer to mutex */
const char* file_name, /*!< in: file name where locked */
ulint line); /*!< in: line where locked */
/********************************************************************//**
NOTE! Please use the corresponding macro mutex_enter_nowait(), not directly
this function!
This is a performance schema instrumented wrapper function for
mutex_enter_nowait_func.
@return 0 if succeed, 1 if not */
UNIV_INLINE
ulint
pfs_mutex_enter_nowait_func(
/*========================*/
mutex_t* mutex, /*!< in: pointer to mutex */
const char* file_name, /*!< in: file name where mutex
requested */
ulint line); /*!< in: line where requested */
/******************************************************************//**
NOTE! Please use the corresponding macro mutex_exit(), not directly
this function!
A wrap function of mutex_exit_func() with peformance schema instrumentation.
Unlocks a mutex owned by the current thread. */
UNIV_INLINE
void
pfs_mutex_exit_func(
/*================*/
mutex_t* mutex); /*!< in: pointer to mutex */
/******************************************************************//**
NOTE! Please use the corresponding macro mutex_free(), not directly
this function!
Wrapper function for mutex_free_func(). Also destroys the performance
schema probes when freeing the mutex */
UNIV_INLINE
void
pfs_mutex_free_func(
/*================*/
mutex_t* mutex); /*!< in: mutex */
#endif /* UNIV_PFS_MUTEX */
#ifdef UNIV_SYNC_DEBUG
/******************************************************************//**
Returns TRUE if no mutex or rw-lock is currently locked.
@ -206,7 +389,8 @@ UNIV_INTERN
ibool
mutex_own(
/*======*/
const mutex_t* mutex); /*!< in: mutex */
const mutex_t* mutex) /*!< in: mutex */
__attribute__((warn_unused_result));
#endif /* UNIV_DEBUG */
#ifdef UNIV_SYNC_DEBUG
/******************************************************************//**
@ -238,16 +422,27 @@ ibool
sync_thread_levels_empty(void);
/*==========================*/
/******************************************************************//**
Checks that the level array for the current thread is empty.
@return TRUE if empty except the exceptions specified below */
Checks if the level array for the current thread contains a
mutex or rw-latch at the specified level.
@return a matching latch, or NULL if not found */
UNIV_INTERN
ibool
sync_thread_levels_empty_gen(
/*=========================*/
void*
sync_thread_levels_contains(
/*========================*/
ulint level); /*!< in: latching order level
(SYNC_DICT, ...)*/
/******************************************************************//**
Checks if the level array for the current thread is empty.
@return a latch, or NULL if empty except the exceptions specified below */
UNIV_INTERN
void*
sync_thread_levels_nonempty_gen(
/*============================*/
ibool dict_mutex_allowed); /*!< in: TRUE if dictionary mutex is
allowed to be owned by the thread,
also purge_is_running mutex is
allowed */
#define sync_thread_levels_empty_gen(d) (!sync_thread_levels_nonempty_gen(d))
/******************************************************************//**
Gets the debug information for a reserved mutex. */
UNIV_INTERN
@ -475,8 +670,10 @@ or row lock! */
SYNC_SEARCH_SYS, as memory allocation
can call routines there! Otherwise
the level is SYNC_MEM_HASH. */
#define SYNC_BUF_POOL 150
#define SYNC_BUF_BLOCK 149
#define SYNC_BUF_POOL 150 /* Buffer pool mutex */
#define SYNC_BUF_FLUSH_ORDER 147
#define SYNC_BUF_BLOCK 146 /* Block mutex */
#define SYNC_BUF_FLUSH_LIST 145 /* Buffer flush list mutex */
#define SYNC_DOUBLEWRITE 140
#define SYNC_ANY_LATCH 135
#define SYNC_THR_LOCAL 133
@ -538,6 +735,10 @@ struct mutex_struct {
const char* cmutex_name; /*!< mutex name */
ulint mutex_type; /*!< 0=usual mutex, 1=rw_lock mutex */
#endif /* UNIV_DEBUG */
#ifdef UNIV_PFS_MUTEX
struct PSI_mutex* pfs_psi; /*!< The performance schema
instrumentation hook */
#endif
};
/** The global array of wait cells for implementation of the databases own

View file

@ -152,11 +152,12 @@ mutex_get_waiters(
}
/******************************************************************//**
NOTE! Use the corresponding macro mutex_exit(), not directly this function!
Unlocks a mutex owned by the current thread. */
UNIV_INLINE
void
mutex_exit(
/*=======*/
mutex_exit_func(
/*============*/
mutex_t* mutex) /*!< in: pointer to mutex */
{
ut_ad(mutex_own(mutex));
@ -220,3 +221,148 @@ mutex_enter_func(
mutex_spin_wait(mutex, file_name, line);
}
#ifdef UNIV_PFS_MUTEX
/******************************************************************//**
NOTE! Please use the corresponding macro mutex_enter(), not directly
this function!
This is a performance schema instrumented wrapper function for
mutex_enter_func(). */
UNIV_INLINE
void
pfs_mutex_enter_func(
/*=================*/
mutex_t* mutex, /*!< in: pointer to mutex */
const char* file_name, /*!< in: file name where locked */
ulint line) /*!< in: line where locked */
{
struct PSI_mutex_locker* locker = NULL;
int result = 0;
if (UNIV_LIKELY(PSI_server && mutex->pfs_psi)) {
locker = PSI_server->get_thread_mutex_locker(
mutex->pfs_psi, PSI_MUTEX_LOCK);
if (locker) {
PSI_server->start_mutex_wait(locker, file_name, line);
}
}
mutex_enter_func(mutex, file_name, line);
if (locker) {
PSI_server->end_mutex_wait(locker, result);
}
}
/********************************************************************//**
NOTE! Please use the corresponding macro mutex_enter_nowait(), not directly
this function!
This is a performance schema instrumented wrapper function for
mutex_enter_nowait_func.
@return 0 if succeed, 1 if not */
UNIV_INLINE
ulint
pfs_mutex_enter_nowait_func(
/*========================*/
mutex_t* mutex, /*!< in: pointer to mutex */
const char* file_name, /*!< in: file name where mutex
requested */
ulint line) /*!< in: line where requested */
{
ulint ret;
struct PSI_mutex_locker* locker = NULL;
int result = 0;
if (UNIV_LIKELY(PSI_server && mutex->pfs_psi)) {
locker = PSI_server->get_thread_mutex_locker(
mutex->pfs_psi, PSI_MUTEX_LOCK);
if (locker) {
PSI_server->start_mutex_wait(locker, file_name, line);
}
}
ret = mutex_enter_nowait_func(mutex, file_name, line);
if (locker) {
PSI_server->end_mutex_wait(locker, result);
}
return(ret);
}
/******************************************************************//**
NOTE! Please use the corresponding macro mutex_exit(), not directly
this function!
A wrap function of mutex_exit_func() with performance schema instrumentation.
Unlocks a mutex owned by the current thread. */
UNIV_INLINE
void
pfs_mutex_exit_func(
/*================*/
mutex_t* mutex) /*!< in: pointer to mutex */
{
if (UNIV_LIKELY(PSI_server && mutex->pfs_psi)) {
struct PSI_thread* thread;
thread = PSI_server->get_thread();
if (thread) {
PSI_server->unlock_mutex(thread, mutex->pfs_psi);
}
}
mutex_exit_func(mutex);
}
/******************************************************************//**
NOTE! Please use the corresponding macro mutex_create(), not directly
this function!
A wrapper function for mutex_create_func(), registers the mutex
with performance schema if "UNIV_PFS_MUTEX" is defined when
creating the mutex */
UNIV_INLINE
void
pfs_mutex_create_func(
/*==================*/
mysql_pfs_key_t key, /*!< in: Performance Schema key */
mutex_t* mutex, /*!< in: pointer to memory */
# ifdef UNIV_DEBUG
const char* cmutex_name, /*!< in: mutex name */
# ifdef UNIV_SYNC_DEBUG
ulint level, /*!< in: level */
# endif /* UNIV_SYNC_DEBUG */
# endif /* UNIV_DEBUG */
const char* cfile_name, /*!< in: file name where created */
ulint cline) /*!< in: file line where created */
{
mutex->pfs_psi = (PSI_server && PFS_IS_INSTRUMENTED(key))
? PSI_server->init_mutex(key, mutex)
: NULL;
mutex_create_func(mutex,
# ifdef UNIV_DEBUG
cmutex_name,
# ifdef UNIV_SYNC_DEBUG
level,
# endif /* UNIV_SYNC_DEBUG */
# endif /* UNIV_DEBUG */
cfile_name,
cline);
}
/******************************************************************//**
NOTE! Please use the corresponding macro mutex_free(), not directly
this function!
Wrapper function for mutex_free_func(). Also destroys the performance
schema probes when freeing the mutex */
UNIV_INLINE
void
pfs_mutex_free_func(
/*===================*/
mutex_t* mutex) /*!< in: mutex */
{
if (UNIV_LIKELY(PSI_server && mutex->pfs_psi)) {
PSI_server->destroy_mutex(mutex->pfs_psi);
mutex->pfs_psi= NULL;
}
mutex_free_func(mutex);
}
#endif /* UNIV_PFS_MUTEX */

View file

@ -112,8 +112,10 @@ This function runs a purge batch.
@return number of undo log pages handled in the batch */
UNIV_INTERN
ulint
trx_purge(void);
/*===========*/
trx_purge(
/*======*/
ulint limit); /*!< in: the maximum number of records to
purge in one batch */
/******************************************************************//**
Prints information of the purge system to stderr. */
UNIV_INTERN

View file

@ -1,6 +1,6 @@
/*****************************************************************************
Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@ -103,7 +103,7 @@ trx_rseg_header_create(
ulint zip_size, /*!< in: compressed page size in bytes
or 0 for uncompressed pages */
ulint max_size, /*!< in: max size in pages */
ulint* slot_no, /*!< out: rseg id == slot number in trx sys */
ulint rseg_slot_no, /*!< in: rseg id == slot number in trx sys */
mtr_t* mtr); /*!< in: mtr */
/*********************************************************************//**
Creates the memory copies for rollback segments and initializes the
@ -114,17 +114,6 @@ trx_rseg_list_and_array_init(
/*=========================*/
trx_sysf_t* sys_header, /*!< in: trx system header */
mtr_t* mtr); /*!< in: mtr */
/****************************************************************//**
Creates a new rollback segment to the database.
@return the created segment object, NULL if fail */
UNIV_INTERN
trx_rseg_t*
trx_rseg_create(
/*============*/
ulint space, /*!< in: space id */
ulint max_size, /*!< in: max size in pages */
ulint* id, /*!< out: rseg id */
mtr_t* mtr); /*!< in: mtr */
/***************************************************************************
Free's an instance of the rollback segment in memory. */
UNIV_INTERN
@ -133,6 +122,12 @@ trx_rseg_mem_free(
/*==============*/
trx_rseg_t* rseg); /* in, own: instance to free */
/*********************************************************************
Creates a rollback segment. */
UNIV_INTERN
trx_rseg_t*
trx_rseg_create(void);
/*==================*/
/* Number of undo log slots in a rollback segment file copy */
#define TRX_RSEG_N_SLOTS (UNIV_PAGE_SIZE / 16)

View file

@ -1,6 +1,6 @@
/*****************************************************************************
Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@ -333,12 +333,14 @@ UNIV_INTERN
void
trx_sys_file_format_tag_init(void);
/*==============================*/
#ifndef UNIV_HOTBACKUP
/*****************************************************************//**
Shutdown/Close the transaction system. */
UNIV_INTERN
void
trx_sys_close(void);
/*===============*/
#endif /* !UNIV_HOTBACKUP */
/*****************************************************************//**
Get the name representation of the file format from its id.
@return pointer to the name */
@ -429,6 +431,14 @@ trx_sys_file_format_id_to_name(
const ulint id); /*!< in: id of the file format */
#endif /* !UNIV_HOTBACKUP */
/*********************************************************************
Creates the rollback segments */
UNIV_INTERN
void
trx_sys_create_rsegs(
/*=================*/
ulint n_rsegs); /*!< number of rollback segments to create */
/* The automatically created system rollback segment has this id */
#define TRX_SYS_SYSTEM_RSEG_ID 0
@ -463,11 +473,16 @@ trx_sys_file_format_id_to_name(
slots */
/*------------------------------------------------------------- @} */
/** Maximum number of rollback segments: the number of segment
specification slots in the transaction system array; rollback segment
id must fit in one byte, therefore 256; each slot is currently 8 bytes
in size */
#define TRX_SYS_N_RSEGS 256
/* Max number of rollback segments: the number of segment specification slots
in the transaction system array; rollback segment id must fit in one (signed)
byte, therefore 128; each slot is currently 8 bytes in size. If you want
to raise the level to 256 then you will need to fix some assertions that
impose the 7 bit restriction. e.g., mach_write_to_3() */
#define TRX_SYS_N_RSEGS 128
/* Originally, InnoDB defined TRX_SYS_N_RSEGS as 256 but created only one
rollback segment. It initialized some arrays with this number of entries.
We must remember this limit in order to keep file compatibility. */
#define TRX_SYS_OLD_N_RSEGS 256
/** Maximum length of MySQL binlog file name, in bytes.
@see trx_sys_mysql_master_log_name
@ -495,7 +510,6 @@ this contains the same fields as TRX_SYS_MYSQL_LOG_INFO below */
within that file */
#define TRX_SYS_MYSQL_LOG_NAME 12 /*!< MySQL log file name */
#ifndef UNIV_HOTBACKUP
/** Doublewrite buffer */
/* @{ */
/** The offset of the doublewrite buffer header on the trx system header page */
@ -547,6 +561,7 @@ FIL_PAGE_ARCH_LOG_NO_OR_SPACE_NO. */
#define TRX_SYS_DOUBLEWRITE_BLOCK_SIZE FSP_EXTENT_SIZE
/* @} */
#ifndef UNIV_HOTBACKUP
/** File format tag */
/* @{ */
/** The offset of the file format tag on the trx system header page

View file

@ -1,6 +1,6 @@
/*****************************************************************************
Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@ -349,7 +349,7 @@ trx_print(
use the default max length */
/** Type of data dictionary operation */
enum trx_dict_op {
typedef enum trx_dict_op {
/** The transaction is not modifying the data dictionary. */
TRX_DICT_OP_NONE = 0,
/** The transaction is creating a table or an index, or
@ -361,7 +361,7 @@ enum trx_dict_op {
existing table. In crash recovery, the data dictionary
must be locked, but the table must not be dropped. */
TRX_DICT_OP_INDEX = 2
};
} trx_dict_op_t;
/**********************************************************************//**
Determine if a transaction is a dictionary operation.
@ -463,69 +463,79 @@ rolling back after a database recovery */
struct trx_struct{
ulint magic_n;
/* All the next fields are protected by the kernel mutex, except the
undo logs which are protected by undo_mutex */
/* These fields are not protected by any mutex. */
const char* op_info; /*!< English text describing the
current operation, or an empty
string */
unsigned is_purge:1; /*!< 0=user transaction, 1=purge */
unsigned is_recovered:1; /*!< 0=normal transaction,
1=recovered, must be rolled back */
unsigned conc_state:2; /*!< state of the trx from the point
ulint conc_state; /*!< state of the trx from the point
of view of concurrency control:
TRX_ACTIVE, TRX_COMMITTED_IN_MEMORY,
... */
unsigned que_state:2; /*!< valid when conc_state == TRX_ACTIVE:
TRX_QUE_RUNNING, TRX_QUE_LOCK_WAIT,
... */
unsigned isolation_level:2;/* TRX_ISO_REPEATABLE_READ, ... */
unsigned check_foreigns:1;/* normally TRUE, but if the user
ulint isolation_level;/* TRX_ISO_REPEATABLE_READ, ... */
ulint check_foreigns; /* normally TRUE, but if the user
wants to suppress foreign key checks,
(in table imports, for example) we
set this FALSE */
unsigned check_unique_secondary:1;
ulint check_unique_secondary;
/* normally TRUE, but if the user
wants to speed up inserts by
suppressing unique key checks
for secondary indexes when we decide
if we can use the insert buffer for
them, we set this FALSE */
unsigned support_xa:1; /*!< normally we do the XA two-phase
ulint support_xa; /*!< normally we do the XA two-phase
commit steps, but by setting this to
FALSE, one can save CPU time and about
150 bytes in the undo log size as then
we skip XA steps */
unsigned flush_log_later:1;/* In 2PC, we hold the
ulint flush_log_later;/* In 2PC, we hold the
prepare_commit mutex across
both phases. In that case, we
defer flush of the logs to disk
until after we release the
mutex. */
unsigned must_flush_log_later:1;/* this flag is set to TRUE in
ulint must_flush_log_later;/* this flag is set to TRUE in
trx_commit_off_kernel() if
flush_log_later was TRUE, and there
were modifications by the transaction;
in that case we must flush the log
in trx_commit_complete_for_mysql() */
unsigned dict_operation:2;/**< @see enum trx_dict_op */
unsigned duplicates:2; /*!< TRX_DUP_IGNORE | TRX_DUP_REPLACE */
unsigned active_trans:2; /*!< 1 - if a transaction in MySQL
ulint duplicates; /*!< TRX_DUP_IGNORE | TRX_DUP_REPLACE */
ulint active_trans; /*!< 1 - if a transaction in MySQL
is active. 2 - if prepare_commit_mutex
was taken */
unsigned has_search_latch:1;
ulint has_search_latch;
/* TRUE if this trx has latched the
search system latch in S-mode */
unsigned declared_to_be_inside_innodb:1;
ulint deadlock_mark; /*!< a mark field used in deadlock
checking algorithm. */
trx_dict_op_t dict_operation; /**< @see enum trx_dict_op */
/* Fields protected by the srv_conc_mutex. */
ulint declared_to_be_inside_innodb;
/* this is TRUE if we have declared
this transaction in
srv_conc_enter_innodb to be inside the
InnoDB engine */
unsigned handling_signals:1;/* this is TRUE as long as the trx
is handling signals */
unsigned dict_operation_lock_mode:2;
/* 0, RW_S_LATCH, or RW_X_LATCH:
/* Fields protected by dict_operation_lock. The very latch
it is used to track. */
ulint dict_operation_lock_mode;
/*!< 0, RW_S_LATCH, or RW_X_LATCH:
the latch mode trx currently holds
on dict_operation_lock */
/* All the next fields are protected by the kernel mutex, except the
undo logs which are protected by undo_mutex */
ulint is_purge; /*!< 0=user transaction, 1=purge */
ulint is_recovered; /*!< 0=normal transaction,
1=recovered, must be rolled back */
ulint que_state; /*!< valid when conc_state
== TRX_ACTIVE: TRX_QUE_RUNNING,
TRX_QUE_LOCK_WAIT, ... */
ulint handling_signals;/* this is TRUE as long as the trx
is handling signals */
time_t start_time; /*!< time the trx object was created
or the state last time became
TRX_ACTIVE */
@ -640,11 +650,6 @@ struct trx_struct{
wait_thrs; /*!< query threads belonging to this
trx that are in the QUE_THR_LOCK_WAIT
state */
ulint deadlock_mark; /*!< a mark field used in deadlock
checking algorithm. This must be
in its own machine word, because
it can be changed by other
threads while holding kernel_mutex. */
/*------------------------------*/
mem_heap_t* lock_heap; /*!< memory heap for the locks of the
transaction */

View file

@ -1,6 +1,6 @@
/*****************************************************************************
Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@ -70,6 +70,13 @@ typedef struct trx_named_savept_struct trx_named_savept_t;
enum trx_rb_ctx {
RB_NONE = 0, /*!< no rollback */
RB_NORMAL, /*!< normal rollback */
RB_RECOVERY_PURGE_REC,
/*!< rolling back an incomplete transaction,
in crash recovery, rolling back an
INSERT that was performed by updating a
delete-marked record; if the delete-marked record
no longer exists in an active read view, it will
be purged */
RB_RECOVERY /*!< rolling back an incomplete transaction,
in crash recovery */
};

View file

@ -42,7 +42,7 @@ trx_undo_build_roll_ptr(
#if DATA_ROLL_PTR_LEN != 7
# error "DATA_ROLL_PTR_LEN != 7"
#endif
ut_ad(rseg_id < 128);
ut_ad(rseg_id < TRX_SYS_N_RSEGS);
return(ut_dulint_create(is_insert * 128 * 256 * 256
+ rseg_id * 256 * 256

View file

@ -1,6 +1,6 @@
/*****************************************************************************
Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
Copyright (c) 1994, 2010, Innobase Oy. All Rights Reserved.
Copyright (c) 2008, Google Inc.
Copyright (c) 2009, Sun Microsystems, Inc.
@ -45,8 +45,8 @@ Created 1/20/1994 Heikki Tuuri
#endif /* UNIV_HOTBACKUP */
#define INNODB_VERSION_MAJOR 1
#define INNODB_VERSION_MINOR 0
#define INNODB_VERSION_BUGFIX 6
#define INNODB_VERSION_MINOR 1
#define INNODB_VERSION_BUGFIX 0
/* The following is the InnoDB version as shown in
SELECT plugin_version FROM information_schema.plugins;
@ -144,6 +144,23 @@ Sun Studio */
#endif /* #if (defined(WIN32) || ... */
/* Following defines are to enable performance schema
instrumentation in each of four InnoDB modules if
HAVE_PSI_INTERFACE is defined. */
#ifdef HAVE_PSI_INTERFACE
# define UNIV_PFS_MUTEX
# define UNIV_PFS_RWLOCK
/* For I/O instrumentation, performance schema rely
on a native descriptor to identify the file, this
descriptor could conflict with our OS level descriptor.
Disable IO instrumentation on Windows until this is
resolved */
# ifndef __WIN__
# define UNIV_PFS_IO
# endif
# define UNIV_PFS_THREAD
#endif /* HAVE_PSI_INTERFACE */
/* DEBUG VERSION CONTROL
===================== */
@ -208,6 +225,9 @@ operations (very slow); also UNIV_DEBUG must be defined */
for compressed pages */
#define UNIV_ZIP_COPY /* call page_zip_copy_recs()
more often */
#define UNIV_AIO_DEBUG /* prints info about
submitted and reaped AIO
requests to the log. */
#endif
#define UNIV_BTR_DEBUG /* check B-tree links */
@ -229,11 +249,6 @@ by one. */
/* the above option prevents forcing of log to disk
at a buffer page write: it should be tested with this
option off; also some ibuf tests are suppressed */
/*
#define UNIV_BASIC_LOG_DEBUG
*/
/* the above option enables basic recovery debugging:
new allocated file pages are reset */
/* Linkage specifier for non-static InnoDB symbols (variables and functions)
that are only referenced from within InnoDB, not from MySQL */

View file

@ -1,6 +1,6 @@
/*****************************************************************************
Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@ -110,7 +110,7 @@ Adds the node as the last element in a two-way linked list.
*/
#define UT_LIST_ADD_LAST(NAME, BASE, N)\
{\
ut_ad(N);\
ut_ad(N != NULL);\
((BASE).count)++;\
((N)->NAME).prev = (BASE).end;\
((N)->NAME).next = NULL;\

View file

@ -0,0 +1,293 @@
/******************************************************
Red-Black tree implementation.
(c) 2007 Oracle/Innobase Oy
Created 2007-03-20 Sunny Bains
*******************************************************/
#ifndef INNOBASE_UT0RBT_H
#define INNOBASE_UT0RBT_H
#if !defined(IB_RBT_TESTING)
#include "univ.i"
#include "ut0mem.h"
#else
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#define ut_malloc malloc
#define ut_free free
#define ulint unsigned long
#define ut_a(c) assert(c)
#define ut_error assert(0)
#define ibool unsigned int
#define TRUE 1
#define FALSE 0
#endif
/* Red black tree typedefs */
typedef struct ib_rbt_struct ib_rbt_t;
typedef struct ib_rbt_node_struct ib_rbt_node_t;
// FIXME: Iterator is a better name than _bound_
typedef struct ib_rbt_bound_struct ib_rbt_bound_t;
typedef void (*ib_rbt_print_node)(const ib_rbt_node_t* node);
typedef int (*ib_rbt_compare)(const void* p1, const void* p2);
/* Red black tree color types */
enum ib_rbt_color_enum {
IB_RBT_RED,
IB_RBT_BLACK
};
typedef enum ib_rbt_color_enum ib_rbt_color_t;
/* Red black tree node */
struct ib_rbt_node_struct {
ib_rbt_color_t color; /* color of this node */
ib_rbt_node_t* left; /* points left child */
ib_rbt_node_t* right; /* points right child */
ib_rbt_node_t* parent; /* points parent node */
char value[1]; /* Data value */
};
/* Red black tree instance.*/
struct ib_rbt_struct {
ib_rbt_node_t* nil; /* Black colored node that is
used as a sentinel. This is
pre-allocated too.*/
ib_rbt_node_t* root; /* Root of the tree, this is
pre-allocated and the first
data node is the left child.*/
ulint n_nodes; /* Total number of data nodes */
ib_rbt_compare compare; /* Fn. to use for comparison */
ulint sizeof_value; /* Sizeof the item in bytes */
};
/* The result of searching for a key in the tree, this is useful for
a speedy lookup and insert if key doesn't exist.*/
struct ib_rbt_bound_struct {
const ib_rbt_node_t*
last; /* Last node visited */
int result; /* Result of comparing with
the last non-nil node that
was visited */
};
/* Size in elements (t is an rb tree instance) */
#define rbt_size(t) (t->n_nodes)
/* Check whether the rb tree is empty (t is an rb tree instance) */
#define rbt_empty(t) (rbt_size(t) == 0)
/* Get data value (t is the data type, n is an rb tree node instance) */
#define rbt_value(t, n) ((t*) &n->value[0])
/* Compare a key with the node value (t is tree, k is key, n is node)*/
#define rbt_compare(t, k, n) (t->compare(k, n->value))
/************************************************************************
Free an instance of a red black tree */
UNIV_INTERN
void
rbt_free(
/*=====*/
ib_rbt_t* tree); /*!< in: rb tree to free */
/************************************************************************
Create an instance of a red black tree
@return rb tree instance */
UNIV_INTERN
ib_rbt_t*
rbt_create(
/*=======*/
size_t sizeof_value, /*!< in: size in bytes */
ib_rbt_compare compare); /*!< in: comparator */
/************************************************************************
Delete a node from the red black tree, identified by key */
UNIV_INTERN
ibool
rbt_delete(
/*=======*/
/* in: TRUE on success */
ib_rbt_t* tree, /* in: rb tree */
const void* key); /* in: key to delete */
/************************************************************************
Remove a node from the red black tree, NOTE: This function will not delete
the node instance, THAT IS THE CALLERS RESPONSIBILITY.
@return the deleted node with the const. */
UNIV_INTERN
ib_rbt_node_t*
rbt_remove_node(
/*============*/
ib_rbt_t* tree, /*!< in: rb tree */
const ib_rbt_node_t*
node); /*!< in: node to delete, this
is a fudge and declared const
because the caller has access
only to const nodes.*/
/************************************************************************
Return a node from the red black tree, identified by
key, NULL if not found
@return node if found else return NULL */
UNIV_INTERN
const ib_rbt_node_t*
rbt_lookup(
/*=======*/
const ib_rbt_t* tree, /*!< in: rb tree to search */
const void* key); /*!< in: key to lookup */
/************************************************************************
Add data to the red black tree, identified by key (no dups yet!)
@return inserted node */
UNIV_INTERN
const ib_rbt_node_t*
rbt_insert(
/*=======*/
ib_rbt_t* tree, /*!< in: rb tree */
const void* key, /*!< in: key for ordering */
const void* value); /*!< in: data that will be
copied to the node.*/
/************************************************************************
Add a new node to the tree, useful for data that is pre-sorted.
@return appended node */
UNIV_INTERN
const ib_rbt_node_t*
rbt_add_node(
/*=========*/
ib_rbt_t* tree, /*!< in: rb tree */
ib_rbt_bound_t* parent, /*!< in: parent */
const void* value); /*!< in: this value is copied
to the node */
/************************************************************************
Return the left most data node in the tree
@return left most node */
UNIV_INTERN
const ib_rbt_node_t*
rbt_first(
/*======*/
const ib_rbt_t* tree); /*!< in: rb tree */
/************************************************************************
Return the right most data node in the tree
@return right most node */
UNIV_INTERN
const ib_rbt_node_t*
rbt_last(
/*=====*/
const ib_rbt_t* tree); /*!< in: rb tree */
/************************************************************************
Return the next node from current.
@return successor node to current that is passed in. */
UNIV_INTERN
const ib_rbt_node_t*
rbt_next(
/*=====*/
const ib_rbt_t* tree, /*!< in: rb tree */
const ib_rbt_node_t* /* in: current node */
current);
/************************************************************************
Return the prev node from current.
@return precedessor node to current that is passed in */
UNIV_INTERN
const ib_rbt_node_t*
rbt_prev(
/*=====*/
const ib_rbt_t* tree, /*!< in: rb tree */
const ib_rbt_node_t* /* in: current node */
current);
/************************************************************************
Find the node that has the lowest key that is >= key.
@return node that satisfies the lower bound constraint or NULL */
UNIV_INTERN
const ib_rbt_node_t*
rbt_lower_bound(
/*============*/
const ib_rbt_t* tree, /*!< in: rb tree */
const void* key); /*!< in: key to search */
/************************************************************************
Find the node that has the greatest key that is <= key.
@return node that satisifies the upper bound constraint or NULL */
UNIV_INTERN
const ib_rbt_node_t*
rbt_upper_bound(
/*============*/
const ib_rbt_t* tree, /*!< in: rb tree */
const void* key); /*!< in: key to search */
/************************************************************************
Search for the key, a node will be retuned in parent.last, whether it
was found or not. If not found then parent.last will contain the
parent node for the possibly new key otherwise the matching node.
@return result of last comparison */
UNIV_INTERN
int
rbt_search(
/*=======*/
const ib_rbt_t* tree, /*!< in: rb tree */
ib_rbt_bound_t* parent, /*!< in: search bounds */
const void* key); /*!< in: key to search */
/************************************************************************
Search for the key, a node will be retuned in parent.last, whether it
was found or not. If not found then parent.last will contain the
parent node for the possibly new key otherwise the matching node.
@return result of last comparison */
UNIV_INTERN
int
rbt_search_cmp(
/*===========*/
const ib_rbt_t* tree, /*!< in: rb tree */
ib_rbt_bound_t* parent, /*!< in: search bounds */
const void* key, /*!< in: key to search */
ib_rbt_compare compare); /*!< in: comparator */
/************************************************************************
Clear the tree, deletes (and free's) all the nodes. */
UNIV_INTERN
void
rbt_clear(
/*======*/
ib_rbt_t* tree); /*!< in: rb tree */
/************************************************************************
Merge the node from dst into src. Return the number of nodes merged.
@return no. of recs merged */
UNIV_INTERN
ulint
rbt_merge_uniq(
/*===========*/
ib_rbt_t* dst, /*!< in: dst rb tree */
const ib_rbt_t* src); /*!< in: src rb tree */
/************************************************************************
Merge the node from dst into src. Return the number of nodes merged.
Delete the nodes from src after copying node to dst. As a side effect
the duplicates will be left untouched in the src, since we don't support
duplicates (yet). NOTE: src and dst must be similar, the function doesn't
check for this condition (yet).
@return no. of recs merged */
UNIV_INTERN
ulint
rbt_merge_uniq_destructive(
/*=======================*/
ib_rbt_t* dst, /*!< in: dst rb tree */
ib_rbt_t* src); /*!< in: src rb tree */
/************************************************************************
Verify the integrity of the RB tree. For debugging. 0 failure else height
of tree (in count of black nodes).
@return TRUE if OK FALSE if tree invalid. */
UNIV_INTERN
ibool
rbt_validate(
/*=========*/
const ib_rbt_t* tree); /*!< in: tree to validate */
/************************************************************************
Iterate over the tree in depth first order. */
UNIV_INTERN
void
rbt_print(
/*======*/
const ib_rbt_t* tree, /*!< in: tree to traverse */
ib_rbt_print_node print); /*!< in: print function */
#endif /* INNOBASE_UT0RBT_H */

View file

@ -152,6 +152,7 @@ ut_hash_ulint(
ulint key, /*!< in: value to be hashed */
ulint table_size) /*!< in: hash table size */
{
ut_ad(table_size);
key = key ^ UT_HASH_RANDOM_MASK2;
return(key % table_size);

View file

@ -1,6 +1,6 @@
/*****************************************************************************
Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
Copyright (c) 1994, 2010, Innobase Oy. All Rights Reserved.
Copyright (c) 2009, Sun Microsystems, Inc.
Portions of this file contain modifications contributed and copyrighted by
@ -35,6 +35,8 @@ Created 1/20/1994 Heikki Tuuri
#include "univ.i"
#include "db0err.h"
#ifndef UNIV_HOTBACKUP
# include "os0sync.h" /* for HAVE_ATOMIC_BUILTINS */
#endif /* UNIV_HOTBACKUP */
@ -395,6 +397,16 @@ a limited buffer. */
# define ut_snprintf snprintf
#endif /* __WIN__ */
/*************************************************************//**
Convert an error number to a human readable text message. The
returned string is static and should not be freed or modified.
@return string, describing the error */
UNIV_INTERN
const char*
ut_strerr(
/*======*/
enum db_err num); /*!< in: error number */
#ifndef UNIV_NONINL
#include "ut0ut.ic"
#endif

View file

@ -1,6 +1,6 @@
/*****************************************************************************
Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@ -376,6 +376,7 @@ UNIV_INTERN FILE* lock_latest_err_file;
/* Flags for recursive deadlock search */
#define LOCK_VICTIM_IS_START 1
#define LOCK_VICTIM_IS_OTHER 2
#define LOCK_EXCEED_MAX_DEPTH 3
/********************************************************************//**
Checks if a lock request results in a deadlock.
@ -394,24 +395,25 @@ Looks recursively for a deadlock.
deadlock and we chose 'start' as the victim, LOCK_VICTIM_IS_OTHER if a
deadlock was found and we chose some other trx as a victim: we must do
the search again in this last case because there may be another
deadlock! */
deadlock!
LOCK_EXCEED_MAX_DEPTH if the lock search exceeds max steps or max depth. */
static
ulint
lock_deadlock_recursive(
/*====================*/
trx_t* start, /*!< in: recursion starting point */
trx_t* trx, /*!< in: a transaction waiting for a lock */
lock_t* wait_lock, /*!< in: the lock trx is waiting to be granted */
lock_t* wait_lock, /*!< in: lock that is waiting to be granted */
ulint* cost, /*!< in/out: number of calculation steps thus
far: if this exceeds LOCK_MAX_N_STEPS_...
we return LOCK_VICTIM_IS_START */
we return LOCK_EXCEED_MAX_DEPTH */
ulint depth); /*!< in: recursion depth: if this exceeds
LOCK_MAX_DEPTH_IN_DEADLOCK_CHECK, we
return LOCK_VICTIM_IS_START */
return LOCK_EXCEED_MAX_DEPTH */
/*********************************************************************//**
Gets the nth bit of a record lock.
@return TRUE if bit set */
@return TRUE if bit set also if i == ULINT_UNDEFINED return FALSE*/
UNIV_INLINE
ibool
lock_rec_get_nth_bit(
@ -1222,7 +1224,7 @@ lock_rec_get_first_on_page(
/*********************************************************************//**
Gets the next explicit lock request on a record.
@return next lock, NULL if none exists */
@return next lock, NULL if none exists or if heap_no == ULINT_UNDEFINED */
UNIV_INLINE
lock_t*
lock_rec_get_next(
@ -3261,8 +3263,6 @@ lock_deadlock_occurs(
lock_t* lock, /*!< in: lock the transaction is requesting */
trx_t* trx) /*!< in: transaction */
{
dict_table_t* table;
dict_index_t* index;
trx_t* mark_trx;
ulint ret;
ulint cost = 0;
@ -3284,31 +3284,50 @@ retry:
ret = lock_deadlock_recursive(trx, trx, lock, &cost, 0);
if (ret == LOCK_VICTIM_IS_OTHER) {
switch (ret) {
case LOCK_VICTIM_IS_OTHER:
/* We chose some other trx as a victim: retry if there still
is a deadlock */
goto retry;
}
if (UNIV_UNLIKELY(ret == LOCK_VICTIM_IS_START)) {
if (lock_get_type_low(lock) & LOCK_TABLE) {
table = lock->un_member.tab_lock.table;
index = NULL;
} else {
index = lock->index;
table = index->table;
}
case LOCK_EXCEED_MAX_DEPTH:
/* If the lock search exceeds the max step
or the max depth, the current trx will be
the victim. Print its information. */
rewind(lock_latest_err_file);
ut_print_timestamp(lock_latest_err_file);
lock_deadlock_found = TRUE;
fputs("*** WE ROLL BACK TRANSACTION (2)\n",
fputs("TOO DEEP OR LONG SEARCH IN THE LOCK TABLE"
" WAITS-FOR GRAPH, WE WILL ROLL BACK"
" FOLLOWING TRANSACTION \n",
lock_latest_err_file);
return(TRUE);
fputs("\n*** TRANSACTION:\n", lock_latest_err_file);
trx_print(lock_latest_err_file, trx, 3000);
fputs("*** WAITING FOR THIS LOCK TO BE GRANTED:\n",
lock_latest_err_file);
if (lock_get_type(lock) == LOCK_REC) {
lock_rec_print(lock_latest_err_file, lock);
} else {
lock_table_print(lock_latest_err_file, lock);
}
break;
case LOCK_VICTIM_IS_START:
fputs("*** WE ROLL BACK TRANSACTION (2)\n",
lock_latest_err_file);
break;
default:
/* No deadlock detected*/
return(FALSE);
}
return(FALSE);
lock_deadlock_found = TRUE;
return(TRUE);
}
/********************************************************************//**
@ -3317,25 +3336,26 @@ Looks recursively for a deadlock.
deadlock and we chose 'start' as the victim, LOCK_VICTIM_IS_OTHER if a
deadlock was found and we chose some other trx as a victim: we must do
the search again in this last case because there may be another
deadlock! */
deadlock!
LOCK_EXCEED_MAX_DEPTH if the lock search exceeds max steps or max depth. */
static
ulint
lock_deadlock_recursive(
/*====================*/
trx_t* start, /*!< in: recursion starting point */
trx_t* trx, /*!< in: a transaction waiting for a lock */
lock_t* wait_lock, /*!< in: the lock trx is waiting to be granted */
lock_t* wait_lock, /*!< in: lock that is waiting to be granted */
ulint* cost, /*!< in/out: number of calculation steps thus
far: if this exceeds LOCK_MAX_N_STEPS_...
we return LOCK_VICTIM_IS_START */
we return LOCK_EXCEED_MAX_DEPTH */
ulint depth) /*!< in: recursion depth: if this exceeds
LOCK_MAX_DEPTH_IN_DEADLOCK_CHECK, we
return LOCK_VICTIM_IS_START */
return LOCK_EXCEED_MAX_DEPTH */
{
lock_t* lock;
ulint bit_no = ULINT_UNDEFINED;
trx_t* lock_trx;
ulint ret;
lock_t* lock;
trx_t* lock_trx;
ulint heap_no = ULINT_UNDEFINED;
ut_a(trx);
ut_a(start);
@ -3351,27 +3371,44 @@ lock_deadlock_recursive(
*cost = *cost + 1;
lock = wait_lock;
if (lock_get_type_low(wait_lock) == LOCK_REC) {
ulint space;
ulint page_no;
bit_no = lock_rec_find_set_bit(wait_lock);
heap_no = lock_rec_find_set_bit(wait_lock);
ut_a(heap_no != ULINT_UNDEFINED);
ut_a(bit_no != ULINT_UNDEFINED);
space = wait_lock->un_member.rec_lock.space;
page_no = wait_lock->un_member.rec_lock.page_no;
lock = lock_rec_get_first_on_page_addr(space, page_no);
/* Position the iterator on the first matching record lock. */
while (lock != NULL
&& lock != wait_lock
&& !lock_rec_get_nth_bit(lock, heap_no)) {
lock = lock_rec_get_next_on_page(lock);
}
if (lock == wait_lock) {
lock = NULL;
}
ut_ad(lock == NULL || lock_rec_get_nth_bit(lock, heap_no));
} else {
lock = wait_lock;
}
/* Look at the locks ahead of wait_lock in the lock queue */
for (;;) {
if (lock_get_type_low(lock) & LOCK_TABLE) {
/* Get previous table lock. */
if (heap_no == ULINT_UNDEFINED) {
lock = UT_LIST_GET_PREV(un_member.tab_lock.locks,
lock);
} else {
ut_ad(lock_get_type_low(lock) == LOCK_REC);
ut_a(bit_no != ULINT_UNDEFINED);
lock = (lock_t*) lock_rec_get_prev(lock, bit_no);
lock = UT_LIST_GET_PREV(
un_member.tab_lock.locks, lock);
}
if (lock == NULL) {
@ -3389,7 +3426,7 @@ lock_deadlock_recursive(
lock_trx = lock->trx;
if (lock_trx == start || too_far) {
if (lock_trx == start) {
/* We came back to the recursion starting
point: a deadlock detected; or we have
@ -3436,19 +3473,10 @@ lock_deadlock_recursive(
}
#ifdef UNIV_DEBUG
if (lock_print_waits) {
fputs("Deadlock detected"
" or too long search\n",
fputs("Deadlock detected\n",
stderr);
}
#endif /* UNIV_DEBUG */
if (too_far) {
fputs("TOO DEEP OR LONG SEARCH"
" IN THE LOCK TABLE"
" WAITS-FOR GRAPH\n", ef);
return(LOCK_VICTIM_IS_START);
}
if (trx_weight_cmp(wait_lock->trx,
start) >= 0) {
@ -3484,6 +3512,21 @@ lock_deadlock_recursive(
return(LOCK_VICTIM_IS_OTHER);
}
if (too_far) {
#ifdef UNIV_DEBUG
if (lock_print_waits) {
fputs("Deadlock search exceeds"
" max steps or depth.\n",
stderr);
}
#endif /* UNIV_DEBUG */
/* The information about transaction/lock
to be rolled back is available in the top
level. Do not print anything here. */
return(LOCK_EXCEED_MAX_DEPTH);
}
if (lock_trx->que_state == TRX_QUE_LOCK_WAIT) {
/* Another trx ahead has requested lock in an
@ -3493,12 +3536,28 @@ lock_deadlock_recursive(
ret = lock_deadlock_recursive(
start, lock_trx,
lock_trx->wait_lock, cost, depth + 1);
if (ret != 0) {
return(ret);
}
}
}
/* Get the next record lock to check. */
if (heap_no != ULINT_UNDEFINED) {
ut_a(lock != NULL);
do {
lock = lock_rec_get_next_on_page(lock);
} while (lock != NULL
&& lock != wait_lock
&& !lock_rec_get_nth_bit(lock, heap_no));
if (lock == wait_lock) {
lock = NULL;
}
}
}/* end of the 'for (;;)'-loop */
}
@ -3694,9 +3753,10 @@ lock_table_enqueue_waiting(
/*********************************************************************//**
Checks if other transactions have an incompatible mode lock request in
the lock queue. */
the lock queue.
@return lock or NULL */
UNIV_INLINE
ibool
lock_t*
lock_table_other_has_incompatible(
/*==============================*/
trx_t* trx, /*!< in: transaction, or NULL if all
@ -3718,13 +3778,13 @@ lock_table_other_has_incompatible(
&& (!lock_mode_compatible(lock_get_mode(lock), mode))
&& (wait || !(lock_get_wait(lock)))) {
return(TRUE);
return(lock);
}
lock = UT_LIST_GET_PREV(un_member.tab_lock.locks, lock);
}
return(FALSE);
return(NULL);
}
/*********************************************************************//**
@ -4249,28 +4309,29 @@ lock_rec_print(
block = buf_page_try_get(space, page_no, &mtr);
if (block) {
for (i = 0; i < lock_rec_get_n_bits(lock); i++) {
for (i = 0; i < lock_rec_get_n_bits(lock); ++i) {
if (lock_rec_get_nth_bit(lock, i)) {
const rec_t* rec
= page_find_rec_with_heap_no(
buf_block_get_frame(block), i);
offsets = rec_get_offsets(
rec, lock->index, offsets,
ULINT_UNDEFINED, &heap);
fprintf(file, "Record lock, heap no %lu ",
(ulong) i);
rec_print_new(file, rec, offsets);
putc('\n', file);
}
if (!lock_rec_get_nth_bit(lock, i)) {
continue;
}
} else {
for (i = 0; i < lock_rec_get_n_bits(lock); i++) {
fprintf(file, "Record lock, heap no %lu\n", (ulong) i);
fprintf(file, "Record lock, heap no %lu", (ulong) i);
if (block) {
const rec_t* rec;
rec = page_find_rec_with_heap_no(
buf_block_get_frame(block), i);
offsets = rec_get_offsets(
rec, lock->index, offsets,
ULINT_UNDEFINED, &heap);
putc(' ', file);
rec_print_new(file, rec, offsets);
}
putc('\n', file);
}
mtr_commit(&mtr);
@ -4317,14 +4378,26 @@ lock_get_n_rec_locks(void)
#endif /* PRINT_NUM_OF_LOCK_STRUCTS */
/*********************************************************************//**
Prints info of locks for all transactions. */
Prints info of locks for all transactions.
@return FALSE if not able to obtain kernel mutex
and exits without printing info */
UNIV_INTERN
void
ibool
lock_print_info_summary(
/*====================*/
FILE* file) /*!< in: file where to print */
FILE* file, /*!< in: file where to print */
ibool nowait) /*!< in: whether to wait for the kernel mutex */
{
lock_mutex_enter_kernel();
/* if nowait is FALSE, wait on the kernel mutex,
otherwise return immediately if fail to obtain the
mutex. */
if (!nowait) {
lock_mutex_enter_kernel();
} else if (mutex_enter_nowait(&kernel_mutex)) {
fputs("FAIL TO OBTAIN KERNEL MUTEX, "
"SKIP LOCK INFO PRINTING\n", file);
return(FALSE);
}
if (lock_deadlock_found) {
fputs("------------------------\n"
@ -4356,6 +4429,7 @@ lock_print_info_summary(
"Total number of lock structs in row lock hash table %lu\n",
(ulong) lock_get_n_rec_locks());
#endif /* PRINT_NUM_OF_LOCK_STRUCTS */
return(TRUE);
}
/*********************************************************************//**
@ -4753,6 +4827,13 @@ loop:
|| lock->trx->conc_state == TRX_PREPARED
|| lock->trx->conc_state == TRX_COMMITTED_IN_MEMORY);
# ifdef UNIV_SYNC_DEBUG
/* Only validate the record queues when this thread is not
holding a space->latch. Deadlocks are possible due to
latching order violation when UNIV_DEBUG is defined while
UNIV_SYNC_DEBUG is not. */
if (!sync_thread_levels_contains(SYNC_FSP))
# endif /* UNIV_SYNC_DEBUG */
for (i = nth_bit; i < lock_rec_get_n_bits(lock); i++) {
if (i == 1 || lock_rec_get_nth_bit(lock, i)) {
@ -4918,7 +4999,7 @@ lock_rec_insert_check_and_lock(
}
trx = thr_get_trx(thr);
next_rec = page_rec_get_next((rec_t*) rec);
next_rec = page_rec_get_next_const(rec);
next_rec_heap_no = page_rec_get_heap_no(next_rec);
lock_mutex_enter_kernel();

View file

@ -1,23 +1,6 @@
/*****************************************************************************
Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
Place, Suite 330, Boston, MA 02111-1307 USA
*****************************************************************************/
/*****************************************************************************
Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved.
Copyright (c) 2009, Google Inc.
Portions of this file contain modifications contributed and copyrighted by
@ -99,6 +82,17 @@ UNIV_INTERN ulint log_fsp_current_free_limit = 0;
/* Global log system variable */
UNIV_INTERN log_t* log_sys = NULL;
#ifdef UNIV_PFS_RWLOCK
UNIV_INTERN mysql_pfs_key_t checkpoint_lock_key;
# ifdef UNIV_LOG_ARCHIVE
UNIV_INTERN mysql_pfs_key_t archive_lock_key;
# endif
#endif /* UNIV_PFS_RWLOCK */
#ifdef UNIV_PFS_MUTEX
UNIV_INTERN mysql_pfs_key_t log_sys_mutex_key;
#endif /* UNIV_PFS_MUTEX */
#ifdef UNIV_DEBUG
UNIV_INTERN ibool log_do_write = TRUE;
#endif /* UNIV_DEBUG */
@ -773,7 +767,7 @@ log_init(void)
{
log_sys = mem_alloc(sizeof(log_t));
mutex_create(&log_sys->mutex, SYNC_LOG);
mutex_create(log_sys_mutex_key, &log_sys->mutex, SYNC_LOG);
mutex_enter(&(log_sys->mutex));
@ -829,7 +823,8 @@ log_init(void)
log_sys->last_checkpoint_lsn = log_sys->lsn;
log_sys->n_pending_checkpoint_writes = 0;
rw_lock_create(&log_sys->checkpoint_lock, SYNC_NO_ORDER_CHECK);
rw_lock_create(checkpoint_lock_key, &log_sys->checkpoint_lock,
SYNC_NO_ORDER_CHECK);
log_sys->checkpoint_buf_ptr = mem_alloc(2 * OS_FILE_LOG_BLOCK_SIZE);
log_sys->checkpoint_buf = ut_align(log_sys->checkpoint_buf_ptr,
@ -845,7 +840,8 @@ log_init(void)
log_sys->n_pending_archive_ios = 0;
rw_lock_create(&log_sys->archive_lock, SYNC_NO_ORDER_CHECK);
rw_lock_create(archive_lock_key, &log_sys->archive_lock,
SYNC_NO_ORDER_CHECK);
log_sys->archive_buf = NULL;
@ -2013,7 +2009,7 @@ log_checkpoint(
return(TRUE);
}
ut_ad(log_sys->written_to_all_lsn >= oldest_lsn);
ut_ad(log_sys->flushed_to_disk_lsn >= oldest_lsn);
if (log_sys->n_pending_checkpoint_writes > 0) {
/* A checkpoint write is running */
@ -2371,13 +2367,15 @@ loop:
log_archived_file_name_gen(name, group->id,
group->archived_file_no + n_files);
file_handle = os_file_create(name, open_mode, OS_FILE_AIO,
file_handle = os_file_create(innodb_file_log_key,
name, open_mode,
OS_FILE_AIO,
OS_DATA_FILE, &ret);
if (!ret && (open_mode == OS_FILE_CREATE)) {
file_handle = os_file_create(
name, OS_FILE_OPEN, OS_FILE_AIO,
OS_DATA_FILE, &ret);
innodb_file_log_key, name, OS_FILE_OPEN,
OS_FILE_AIO, OS_DATA_FILE, &ret);
}
if (!ret) {
@ -3095,7 +3093,7 @@ loop:
if (srv_fast_shutdown < 2
&& (srv_error_monitor_active
|| srv_lock_timeout_and_monitor_active)) {
|| srv_lock_timeout_active || srv_monitor_active)) {
mutex_exit(&kernel_mutex);

View file

@ -1,6 +1,6 @@
/*****************************************************************************
Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
Copyright (c) 1997, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@ -138,7 +138,9 @@ UNIV_INTERN ulint recv_max_parsed_page_no;
/** This many frames must be left free in the buffer pool when we scan
the log and store the scanned log records in the buffer pool: we will
use these free frames to read in pages when we start applying the
log records to the database. */
log records to the database.
This is the default value. If the actual size of the buffer pool is
larger than 10 MB we'll set this value to 512. */
UNIV_INTERN ulint recv_n_pool_free_frames;
/** The maximum lsn we see for a page during the recovery process. If this
@ -146,6 +148,14 @@ is bigger than the lsn we are able to scan up to, that is an indication that
the recovery failed and the database may be corrupt. */
UNIV_INTERN ib_uint64_t recv_max_page_lsn;
#ifdef UNIV_PFS_THREAD
UNIV_INTERN mysql_pfs_key_t trx_rollback_clean_thread_key;
#endif /* UNIV_PFS_THREAD */
#ifdef UNIV_PFS_MUTEX
UNIV_INTERN mysql_pfs_key_t recv_sys_mutex_key;
#endif /* UNIV_PFS_MUTEX */
/* prototypes */
#ifndef UNIV_HOTBACKUP
@ -173,7 +183,7 @@ recv_sys_create(void)
recv_sys = mem_alloc(sizeof(*recv_sys));
memset(recv_sys, 0x0, sizeof(*recv_sys));
mutex_create(&recv_sys->mutex, SYNC_RECV);
mutex_create(recv_sys_mutex_key, &recv_sys->mutex, SYNC_RECV);
recv_sys->heap = NULL;
recv_sys->addr_hash = NULL;
@ -239,6 +249,7 @@ recv_sys_mem_free(void)
}
}
#ifndef UNIV_HOTBACKUP
/************************************************************
Reset the state of the recovery system variables. */
UNIV_INTERN
@ -278,6 +289,7 @@ recv_sys_var_init(void)
recv_max_page_lsn = 0;
}
#endif /* !UNIV_HOTBACKUP */
/************************************************************
Inits the recovery system for a recovery operation. */
@ -292,6 +304,14 @@ recv_sys_init(
return;
}
#ifndef UNIV_HOTBACKUP
/* Initialize red-black tree for fast insertions into the
flush_list during recovery process.
As this initialization is done while holding the buffer pool
mutex we perform it before acquiring recv_sys->mutex. */
buf_flush_init_flush_rbt();
#endif /* !UNIV_HOTBACKUP */
mutex_enter(&(recv_sys->mutex));
#ifndef UNIV_HOTBACKUP
@ -301,6 +321,12 @@ recv_sys_init(
recv_is_from_backup = TRUE;
#endif /* !UNIV_HOTBACKUP */
/* Set appropriate value of recv_n_pool_free_frames. */
if (buf_pool_get_curr_size() >= (10 * 1024 * 1024)) {
/* Buffer pool of size greater than 10 MB. */
recv_n_pool_free_frames = 512;
}
recv_sys->buf = ut_malloc(RECV_PARSING_BUF_SIZE);
recv_sys->len = 0;
recv_sys->recovered_offset = 0;
@ -370,6 +396,9 @@ recv_sys_debug_free(void)
recv_sys->last_block_buf_start = NULL;
mutex_exit(&(recv_sys->mutex));
/* Free up the flush_rbt. */
buf_flush_free_flush_rbt();
}
# endif /* UNIV_LOG_DEBUG */
@ -1632,7 +1661,9 @@ recv_recover_page_func(
if (modification_to_page) {
ut_a(block);
buf_flush_order_mutex_enter();
buf_flush_recv_note_modification(block, start_lsn, end_lsn);
buf_flush_order_mutex_exit();
}
#endif /* !UNIV_HOTBACKUP */
@ -2050,15 +2081,6 @@ recv_parse_log_rec(
}
#endif /* UNIV_LOG_LSN_DEBUG */
/* Check that page_no is sensible */
if (UNIV_UNLIKELY(*page_no > 0x8FFFFFFFUL)) {
recv_sys->found_corrupt_log = TRUE;
return(0);
}
new_ptr = recv_parse_or_apply_log_rec_body(*type, new_ptr, end_ptr,
NULL, NULL);
if (UNIV_UNLIKELY(new_ptr == NULL)) {
@ -2167,6 +2189,14 @@ recv_report_corrupt_log(
putc('\n', stderr);
}
#ifndef UNIV_HOTBACKUP
if (!srv_force_recovery) {
fputs("InnoDB: Set innodb_force_recovery"
" to ignore this error.\n", stderr);
ut_error;
}
#endif /* !UNIV_HOTBACKUP */
fputs("InnoDB: WARNING: the log file may have been corrupt and it\n"
"InnoDB: is possible that the log scan did not proceed\n"
"InnoDB: far enough in recovery! Please run CHECK TABLE\n"
@ -2556,7 +2586,7 @@ recv_scan_log_recs(
ut_ad(start_lsn % OS_FILE_LOG_BLOCK_SIZE == 0);
ut_ad(len % OS_FILE_LOG_BLOCK_SIZE == 0);
ut_ad(len > 0);
ut_ad(len >= OS_FILE_LOG_BLOCK_SIZE);
ut_a(store_to_hash <= TRUE);
finished = FALSE;
@ -2681,6 +2711,16 @@ recv_scan_log_recs(
recv_sys->found_corrupt_log = TRUE;
#ifndef UNIV_HOTBACKUP
if (!srv_force_recovery) {
fputs("InnoDB: Set"
" innodb_force_recovery"
" to ignore this error.\n",
stderr);
ut_error;
}
#endif /* !UNIV_HOTBACKUP */
} else if (!recv_sys->found_corrupt_log) {
more_data = recv_sys_add_to_parsing_buf(
log_block, scanned_lsn);
@ -3210,8 +3250,6 @@ void
recv_recovery_from_checkpoint_finish(void)
/*======================================*/
{
int i;
/* Apply the hashed log records to the respective file pages */
if (srv_force_recovery < SRV_FORCE_NO_LOG_REDO) {
@ -3259,9 +3297,16 @@ recv_recovery_from_checkpoint_finish(void)
The data dictionary latch should guarantee that there is at
most one data dictionary transaction active at a time. */
trx_rollback_or_clean_recovered(FALSE);
}
/* Drop partially created indexes. */
row_merge_drop_temp_indexes();
/********************************************************//**
Initiates the rollback of active transactions. */
UNIV_INTERN
void
recv_recovery_rollback_active(void)
/*===============================*/
{
int i;
#ifdef UNIV_SYNC_DEBUG
/* Wait for a while so that created threads have time to suspend
@ -3271,6 +3316,11 @@ recv_recovery_from_checkpoint_finish(void)
/* Switch latching order checks on in sync0sync.c */
sync_order_checks_on = TRUE;
#endif
/* Drop partially created indexes. */
row_merge_drop_temp_indexes();
/* Drop temporary tables. */
row_mysql_drop_temp_tables();
if (srv_force_recovery < SRV_FORCE_NO_TRX_UNDO) {
/* Rollback the uncommitted transactions which have no user
session */
@ -3386,8 +3436,10 @@ recv_reset_log_files_for_backup(
sprintf(name, "%s%s%lu", log_dir,
ib_logfile_basename, (ulong)i);
log_file = os_file_create_simple(name, OS_FILE_CREATE,
OS_FILE_READ_WRITE, &success);
log_file = os_file_create_simple(innodb_file_log_key,
name, OS_FILE_CREATE,
OS_FILE_READ_WRITE,
&success);
if (!success) {
fprintf(stderr,
"InnoDB: Cannot create %s. Check that"
@ -3426,7 +3478,8 @@ recv_reset_log_files_for_backup(
LOG_BLOCK_HDR_SIZE);
sprintf(name, "%s%s%lu", log_dir, ib_logfile_basename, (ulong)0);
log_file = os_file_create_simple(name, OS_FILE_OPEN,
log_file = os_file_create_simple(innodb_file_log_key,
name, OS_FILE_OPEN,
OS_FILE_READ_WRITE, &success);
if (!success) {
fprintf(stderr, "InnoDB: Cannot open %s.\n", name);
@ -3476,7 +3529,8 @@ try_open_again:
log_archived_file_name_gen(name, group->id, group->archived_file_no);
file_handle = os_file_create(name, OS_FILE_OPEN,
file_handle = os_file_create(innodb_file_log_key,
name, OS_FILE_OPEN,
OS_FILE_LOG, OS_FILE_AIO, &ret);
if (ret == FALSE) {

View file

@ -1,6 +1,6 @@
/*****************************************************************************
Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
Copyright (c) 1994, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@ -29,7 +29,13 @@ Created 6/9/1994 Heikki Tuuri
/* The mutex which protects in the debug version the hash table
containing the list of live memory heaps, and also the global
variables below. */
UNIV_INTERN mutex_t mem_hash_mutex;
UNIV_INTERN mutex_t mem_hash_mutex;
#ifdef UNIV_PFS_MUTEX
/* Key to register mem_hash_mutex with performance schema */
UNIV_INTERN mysql_pfs_key_t mem_hash_mutex_key;
#endif /* UNIV_PFS_MUTEX */
# endif /* !UNIV_HOTBACKUP */
/* The following variables contain information about the
@ -149,7 +155,7 @@ mem_init(
/* Initialize the hash table */
ut_a(FALSE == mem_hash_initialized);
mutex_create(&mem_hash_mutex, SYNC_MEM_HASH);
mutex_create(mem_hash_mutex_key, &mem_hash_mutex, SYNC_MEM_HASH);
for (i = 0; i < MEM_HASH_SIZE; i++) {
UT_LIST_INIT(*mem_hash_get_nth_cell(i));
@ -180,6 +186,10 @@ mem_close(void)
{
mem_pool_free(mem_comm_pool);
mem_comm_pool = NULL;
#ifdef UNIV_MEM_DEBUG
mutex_free(&mem_hash_mutex);
mem_hash_initialized = FALSE;
#endif /* UNIV_MEM_DEBUG */
}
#endif /* !UNIV_HOTBACKUP */

View file

@ -1,6 +1,6 @@
/*****************************************************************************
Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
Copyright (c) 1994, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@ -383,6 +383,20 @@ mem_heap_create_block(
mem_block_set_free(block, MEM_BLOCK_HEADER_SIZE);
mem_block_set_start(block, MEM_BLOCK_HEADER_SIZE);
if (UNIV_UNLIKELY(heap == NULL)) {
/* This is the first block of the heap. The field
total_size should be initialized here */
block->total_size = len;
} else {
/* Not the first allocation for the heap. This block's
total_length field should be set to undefined. */
ut_d(block->total_size = ULINT_UNDEFINED);
UNIV_MEM_INVALID(&block->total_size,
sizeof block->total_size);
heap->total_size += len;
}
ut_ad((ulint)MEM_BLOCK_HEADER_SIZE < len);
return(block);
@ -471,6 +485,10 @@ mem_heap_block_free(
mem_pool_mutex_exit();
#endif
ut_ad(heap->total_size >= block->len);
heap->total_size -= block->len;
type = heap->type;
len = block->len;
block->magic_n = MEM_FREED_BLOCK_MAGIC_N;

View file

@ -114,6 +114,11 @@ struct mem_pool_struct{
/** The common memory pool */
UNIV_INTERN mem_pool_t* mem_comm_pool = NULL;
#ifdef UNIV_PFS_MUTEX
/* Key to register mutex in mem_pool_struct with performance schema */
UNIV_INTERN mysql_pfs_key_t mem_pool_mutex_key;
#endif /* UNIV_PFS_MUTEX */
/* We use this counter to check that the mem pool mutex does not leak;
this is to track a strange assertion failure reported at
mysql@lists.mysql.com */
@ -219,7 +224,7 @@ mem_pool_create(
pool->buf = ut_malloc_low(size, FALSE, TRUE);
pool->size = size;
mutex_create(&pool->mutex, SYNC_MEM_POOL);
mutex_create(mem_pool_mutex_key, &pool->mutex, SYNC_MEM_POOL);
/* Initialize the free lists */

View file

@ -30,6 +30,7 @@ Created 11/26/1995 Heikki Tuuri
#endif
#include "buf0buf.h"
#include "buf0flu.h"
#include "page0types.h"
#include "mtr0log.h"
#include "log0log.h"
@ -38,7 +39,7 @@ Created 11/26/1995 Heikki Tuuri
# include "log0recv.h"
/*****************************************************************//**
Releases the item in the slot given. */
UNIV_INLINE
static
void
mtr_memo_slot_release(
/*==================*/
@ -48,14 +49,19 @@ mtr_memo_slot_release(
void* object;
ulint type;
ut_ad(mtr && slot);
ut_ad(mtr);
ut_ad(slot);
#ifndef UNIV_DEBUG
UT_NOT_USED(mtr);
#endif /* UNIV_DEBUG */
object = slot->object;
type = slot->type;
if (UNIV_LIKELY(object != NULL)) {
if (type <= MTR_MEMO_BUF_FIX) {
buf_page_release((buf_block_t*)object, type, mtr);
buf_page_release((buf_block_t*)object, type);
} else if (type == MTR_MEMO_S_LOCK) {
rw_lock_s_unlock((rw_lock_t*)object);
#ifdef UNIV_DEBUG
@ -73,13 +79,10 @@ mtr_memo_slot_release(
}
/**********************************************************//**
Releases the mlocks and other objects stored in an mtr memo. They are released
in the order opposite to which they were pushed to the memo. NOTE! It is
essential that the x-rw-lock on a modified buffer page is not released before
buf_page_note_modification is called for that page! Otherwise, some thread
might race to modify it, and the flush list sort order on lsn would be
destroyed. */
UNIV_INLINE
Releases the mlocks and other objects stored in an mtr memo.
They are released in the order opposite to which they were pushed
to the memo. */
static
void
mtr_memo_pop_all(
/*=============*/
@ -105,6 +108,59 @@ mtr_memo_pop_all(
}
}
/*****************************************************************//**
Releases the item in the slot given. */
static
void
mtr_memo_slot_note_modification(
/*============================*/
mtr_t* mtr, /*!< in: mtr */
mtr_memo_slot_t* slot) /*!< in: memo slot */
{
ut_ad(mtr);
ut_ad(mtr->magic_n == MTR_MAGIC_N);
ut_ad(mtr->modifications);
ut_ad(buf_flush_order_mutex_own());
if (slot->object != NULL && slot->type == MTR_MEMO_PAGE_X_FIX) {
buf_flush_note_modification((buf_block_t*) slot->object, mtr);
}
}
/**********************************************************//**
Add the modified pages to the buffer flush list. They are released
in the order opposite to which they were pushed to the memo. NOTE! It is
essential that the x-rw-lock on a modified buffer page is not released
before buf_page_note_modification is called for that page! Otherwise,
some thread might race to modify it, and the flush list sort order on
lsn would be destroyed. */
static
void
mtr_memo_note_modifications(
/*========================*/
mtr_t* mtr) /*!< in: mtr */
{
dyn_array_t* memo;
ulint offset;
ut_ad(mtr);
ut_ad(mtr->magic_n == MTR_MAGIC_N);
ut_ad(mtr->state == MTR_COMMITTING); /* Currently only used in
commit */
memo = &mtr->memo;
offset = dyn_array_get_data_size(memo);
while (offset > 0) {
mtr_memo_slot_t* slot;
offset -= sizeof(mtr_memo_slot_t);
slot = dyn_array_get_element(memo, offset);
mtr_memo_slot_note_modification(mtr, slot);
}
}
/************************************************************//**
Writes the contents of a mini-transaction log, if any, to the database log. */
static
@ -137,7 +193,9 @@ mtr_log_reserve_and_write(
&mtr->start_lsn);
if (mtr->end_lsn) {
return;
/* Success. We have the log mutex.
Add pages to flush list and exit */
goto func_exit;
}
}
@ -161,6 +219,18 @@ mtr_log_reserve_and_write(
}
mtr->end_lsn = log_close();
func_exit:
buf_flush_order_mutex_enter();
/* It is now safe to release the log mutex because the
flush_order mutex will ensure that we are the first one
to insert into the flush list. */
log_release();
if (mtr->modifications) {
mtr_memo_note_modifications(mtr);
}
buf_flush_order_mutex_exit();
}
#endif /* !UNIV_HOTBACKUP */
@ -172,10 +242,6 @@ mtr_commit(
/*=======*/
mtr_t* mtr) /*!< in: mini-transaction */
{
#ifndef UNIV_HOTBACKUP
ibool write_log;
#endif /* !UNIV_HOTBACKUP */
ut_ad(mtr);
ut_ad(mtr->magic_n == MTR_MAGIC_N);
ut_ad(mtr->state == MTR_ACTIVE);
@ -184,25 +250,12 @@ mtr_commit(
#ifndef UNIV_HOTBACKUP
/* This is a dirty read, for debugging. */
ut_ad(!recv_no_log_write);
write_log = mtr->modifications && mtr->n_log_recs;
if (write_log) {
if (mtr->modifications && mtr->n_log_recs) {
mtr_log_reserve_and_write(mtr);
}
/* We first update the modification info to buffer pages, and only
after that release the log mutex: this guarantees that when the log
mutex is free, all buffer pages contain an up-to-date info of their
modifications. This fact is used in making a checkpoint when we look
at the oldest modification of any page in the buffer pool. It is also
required when we insert modified buffer pages in to the flush list
which must be sorted on oldest_modification. */
mtr_memo_pop_all(mtr);
if (write_log) {
log_release();
}
#endif /* !UNIV_HOTBACKUP */
ut_d(mtr->state = MTR_COMMITTED);
@ -241,6 +294,10 @@ mtr_rollback_to_savepoint(
slot = dyn_array_get_element(memo, offset);
ut_ad(slot->type != MTR_MEMO_MODIFY);
/* We do not call mtr_memo_slot_note_modification()
because there MUST be no changes made to the buffer
pages after the savepoint */
mtr_memo_slot_release(mtr, slot);
}
}
@ -267,18 +324,23 @@ mtr_memo_release(
offset = dyn_array_get_data_size(memo);
buf_flush_order_mutex_enter();
while (offset > 0) {
offset -= sizeof(mtr_memo_slot_t);
slot = dyn_array_get_element(memo, offset);
if ((object == slot->object) && (type == slot->type)) {
if (object == slot->object && type == slot->type) {
if (mtr->modifications) {
mtr_memo_slot_note_modification(mtr, slot);
}
mtr_memo_slot_release(mtr, slot);
break;
}
}
buf_flush_order_mutex_exit();
}
#endif /* !UNIV_HOTBACKUP */

View file

@ -0,0 +1,30 @@
drop table if exists t1;
SET @@SESSION.AUTO_INCREMENT_INCREMENT=1, @@SESSION.AUTO_INCREMENT_OFFSET=1;
CREATE TABLE t1 (c1 INT PRIMARY KEY AUTO_INCREMENT) ENGINE=InnoDB;
INSERT INTO t1 VALUES (null);
INSERT INTO t1 VALUES (null);
ALTER TABLE t1 CHANGE c1 d1 INT NOT NULL AUTO_INCREMENT;
SELECT * FROM t1;
d1
1
2
SELECT * FROM t1;
d1
1
2
INSERT INTO t1 VALUES(null);
Got one of the listed errors
ALTER TABLE t1 AUTO_INCREMENT = 3;
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
`d1` int(11) NOT NULL AUTO_INCREMENT,
PRIMARY KEY (`d1`)
) ENGINE=InnoDB AUTO_INCREMENT=3 DEFAULT CHARSET=latin1
INSERT INTO t1 VALUES(null);
SELECT * FROM t1;
d1
1
2
3
DROP TABLE t1;

View file

@ -0,0 +1,34 @@
-- source include/have_innodb.inc
# embedded server ignores 'delayed', so skip this
-- source include/not_embedded.inc
--disable_warnings
drop table if exists t1;
--enable_warnings
#
# 44030: Error: (1500) Couldn't read the MAX(ID) autoinc value from
# the index (PRIMARY)
# This test requires a restart of the server
SET @@SESSION.AUTO_INCREMENT_INCREMENT=1, @@SESSION.AUTO_INCREMENT_OFFSET=1;
CREATE TABLE t1 (c1 INT PRIMARY KEY AUTO_INCREMENT) ENGINE=InnoDB;
INSERT INTO t1 VALUES (null);
INSERT INTO t1 VALUES (null);
ALTER TABLE t1 CHANGE c1 d1 INT NOT NULL AUTO_INCREMENT;
SELECT * FROM t1;
# Restart the server
-- source include/restart_mysqld.inc
# The MySQL and InnoDB data dictionaries should now be out of sync.
# The select should print message to the error log
SELECT * FROM t1;
# MySQL have made a change (http://lists.mysql.com/commits/75268) that no
# longer results in the two data dictionaries being out of sync. If they
# revert their changes then this check for ER_AUTOINC_READ_FAILED will need
# to be enabled. Also, see http://bugs.mysql.com/bug.php?id=47621.
-- error ER_AUTOINC_READ_FAILED,1467
INSERT INTO t1 VALUES(null);
ALTER TABLE t1 AUTO_INCREMENT = 3;
SHOW CREATE TABLE t1;
INSERT INTO t1 VALUES(null);
SELECT * FROM t1;
DROP TABLE t1;

View file

@ -867,25 +867,380 @@ INSERT INTO t2 SELECT NULL FROM t1;
Got one of the listed errors
DROP TABLE t1;
DROP TABLE t2;
CREATE TABLE t1 (c1 INT PRIMARY KEY AUTO_INCREMENT) ENGINE=InnoDB;
INSERT INTO t1 VALUES (null);
INSERT INTO t1 VALUES (null);
ALTER TABLE t1 CHANGE c1 d1 INT NOT NULL AUTO_INCREMENT;
SET @@SESSION.AUTO_INCREMENT_INCREMENT=1, @@SESSION.AUTO_INCREMENT_OFFSET=1;
SHOW VARIABLES LIKE "%auto_inc%";
Variable_name Value
auto_increment_increment 1
auto_increment_offset 1
CREATE TABLE t1 (c1 TINYINT PRIMARY KEY AUTO_INCREMENT, c2 VARCHAR(10)) ENGINE=InnoDB;
INSERT INTO t1 VALUES (1, NULL);
INSERT INTO t1 VALUES (-1, 'innodb');
INSERT INTO t1 VALUES (-127, 'innodb');
INSERT INTO t1 VALUES (NULL, NULL);
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
`c1` tinyint(4) NOT NULL AUTO_INCREMENT,
`c2` varchar(10) DEFAULT NULL,
PRIMARY KEY (`c1`)
) ENGINE=InnoDB AUTO_INCREMENT=3 DEFAULT CHARSET=latin1
SELECT * FROM t1;
d1
1
3
c1 c2
-127 innodb
-1 innodb
1 NULL
2 NULL
DROP TABLE t1;
CREATE TABLE t1 (c1 TINYINT UNSIGNED PRIMARY KEY AUTO_INCREMENT, c2 VARCHAR(10)) ENGINE=InnoDB;
INSERT INTO t1 VALUES (1, NULL);
INSERT INTO t1 VALUES (-1, 'innodb');
Warnings:
Warning 1264 Out of range value for column 'c1' at row 1
INSERT INTO t1 VALUES (-127, 'innodb');
Warnings:
Warning 1264 Out of range value for column 'c1' at row 1
INSERT INTO t1 VALUES (NULL, NULL);
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
`c1` tinyint(3) unsigned NOT NULL AUTO_INCREMENT,
`c2` varchar(10) DEFAULT NULL,
PRIMARY KEY (`c1`)
) ENGINE=InnoDB AUTO_INCREMENT=5 DEFAULT CHARSET=latin1
SELECT * FROM t1;
d1
c1 c2
1 NULL
2 innodb
3 innodb
4 NULL
DROP TABLE t1;
CREATE TABLE t1 (c1 SMALLINT PRIMARY KEY AUTO_INCREMENT, c2 VARCHAR(10)) ENGINE=InnoDB;
INSERT INTO t1 VALUES (1, NULL);
INSERT INTO t1 VALUES (-1, 'innodb');
INSERT INTO t1 VALUES (-32767, 'innodb');
INSERT INTO t1 VALUES (NULL, NULL);
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
`c1` smallint(6) NOT NULL AUTO_INCREMENT,
`c2` varchar(10) DEFAULT NULL,
PRIMARY KEY (`c1`)
) ENGINE=InnoDB AUTO_INCREMENT=3 DEFAULT CHARSET=latin1
SELECT * FROM t1;
c1 c2
-32767 innodb
-1 innodb
1 NULL
2 NULL
DROP TABLE t1;
CREATE TABLE t1 (c1 SMALLINT UNSIGNED PRIMARY KEY AUTO_INCREMENT, c2 VARCHAR(10)) ENGINE=InnoDB;
INSERT INTO t1 VALUES (1, NULL);
INSERT INTO t1 VALUES (-1, 'innodb');
Warnings:
Warning 1264 Out of range value for column 'c1' at row 1
INSERT INTO t1 VALUES (-32757, 'innodb');
Warnings:
Warning 1264 Out of range value for column 'c1' at row 1
INSERT INTO t1 VALUES (NULL, NULL);
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
`c1` smallint(5) unsigned NOT NULL AUTO_INCREMENT,
`c2` varchar(10) DEFAULT NULL,
PRIMARY KEY (`c1`)
) ENGINE=InnoDB AUTO_INCREMENT=5 DEFAULT CHARSET=latin1
SELECT * FROM t1;
c1 c2
1 NULL
2 innodb
3 innodb
4 NULL
DROP TABLE t1;
CREATE TABLE t1 (c1 MEDIUMINT PRIMARY KEY AUTO_INCREMENT, c2 VARCHAR(10)) ENGINE=InnoDB;
INSERT INTO t1 VALUES (1, NULL);
INSERT INTO t1 VALUES (-1, 'innodb');
INSERT INTO t1 VALUES (-8388607, 'innodb');
INSERT INTO t1 VALUES (NULL, NULL);
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
`c1` mediumint(9) NOT NULL AUTO_INCREMENT,
`c2` varchar(10) DEFAULT NULL,
PRIMARY KEY (`c1`)
) ENGINE=InnoDB AUTO_INCREMENT=3 DEFAULT CHARSET=latin1
SELECT * FROM t1;
c1 c2
-8388607 innodb
-1 innodb
1 NULL
2 NULL
DROP TABLE t1;
CREATE TABLE t1 (c1 MEDIUMINT UNSIGNED PRIMARY KEY AUTO_INCREMENT, c2 VARCHAR(10)) ENGINE=InnoDB;
INSERT INTO t1 VALUES (1, NULL);
INSERT INTO t1 VALUES (-1, 'innodb');
Warnings:
Warning 1264 Out of range value for column 'c1' at row 1
INSERT INTO t1 VALUES (-8388607, 'innodb');
Warnings:
Warning 1264 Out of range value for column 'c1' at row 1
INSERT INTO t1 VALUES (NULL, NULL);
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
`c1` mediumint(8) unsigned NOT NULL AUTO_INCREMENT,
`c2` varchar(10) DEFAULT NULL,
PRIMARY KEY (`c1`)
) ENGINE=InnoDB AUTO_INCREMENT=5 DEFAULT CHARSET=latin1
SELECT * FROM t1;
c1 c2
1 NULL
2 innodb
3 innodb
4 NULL
DROP TABLE t1;
CREATE TABLE t1 (c1 INT PRIMARY KEY AUTO_INCREMENT, c2 VARCHAR(10)) ENGINE=InnoDB;
INSERT INTO t1 VALUES (1, NULL);
INSERT INTO t1 VALUES (-1, 'innodb');
INSERT INTO t1 VALUES (-2147483647, 'innodb');
INSERT INTO t1 VALUES (NULL, NULL);
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
`c1` int(11) NOT NULL AUTO_INCREMENT,
`c2` varchar(10) DEFAULT NULL,
PRIMARY KEY (`c1`)
) ENGINE=InnoDB AUTO_INCREMENT=3 DEFAULT CHARSET=latin1
SELECT * FROM t1;
c1 c2
-2147483647 innodb
-1 innodb
1 NULL
2 NULL
DROP TABLE t1;
CREATE TABLE t1 (c1 INT UNSIGNED PRIMARY KEY AUTO_INCREMENT, c2 VARCHAR(10)) ENGINE=InnoDB;
INSERT INTO t1 VALUES (1, NULL);
INSERT INTO t1 VALUES (-1, 'innodb');
Warnings:
Warning 1264 Out of range value for column 'c1' at row 1
INSERT INTO t1 VALUES (-2147483647, 'innodb');
Warnings:
Warning 1264 Out of range value for column 'c1' at row 1
INSERT INTO t1 VALUES (NULL, NULL);
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
`c1` int(10) unsigned NOT NULL AUTO_INCREMENT,
`c2` varchar(10) DEFAULT NULL,
PRIMARY KEY (`c1`)
) ENGINE=InnoDB AUTO_INCREMENT=5 DEFAULT CHARSET=latin1
SELECT * FROM t1;
c1 c2
1 NULL
2 innodb
3 innodb
4 NULL
DROP TABLE t1;
CREATE TABLE t1 (c1 BIGINT PRIMARY KEY AUTO_INCREMENT, c2 VARCHAR(10)) ENGINE=InnoDB;
INSERT INTO t1 VALUES (1, NULL);
INSERT INTO t1 VALUES (-1, 'innodb');
INSERT INTO t1 VALUES (-9223372036854775807, 'innodb');
INSERT INTO t1 VALUES (NULL, NULL);
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
`c1` bigint(20) NOT NULL AUTO_INCREMENT,
`c2` varchar(10) DEFAULT NULL,
PRIMARY KEY (`c1`)
) ENGINE=InnoDB AUTO_INCREMENT=3 DEFAULT CHARSET=latin1
SELECT * FROM t1;
c1 c2
-9223372036854775807 innodb
-1 innodb
1 NULL
2 NULL
DROP TABLE t1;
CREATE TABLE t1 (c1 BIGINT UNSIGNED PRIMARY KEY AUTO_INCREMENT, c2 VARCHAR(10)) ENGINE=InnoDB;
INSERT INTO t1 VALUES (1, NULL);
INSERT INTO t1 VALUES (-1, 'innodb');
Warnings:
Warning 1264 Out of range value for column 'c1' at row 1
INSERT INTO t1 VALUES (-9223372036854775807, 'innodb');
Warnings:
Warning 1264 Out of range value for column 'c1' at row 1
INSERT INTO t1 VALUES (NULL, NULL);
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
`c1` bigint(20) unsigned NOT NULL AUTO_INCREMENT,
`c2` varchar(10) DEFAULT NULL,
PRIMARY KEY (`c1`)
) ENGINE=InnoDB AUTO_INCREMENT=5 DEFAULT CHARSET=latin1
SELECT * FROM t1;
c1 c2
1 NULL
2 innodb
3 innodb
4 NULL
DROP TABLE t1;
CREATE TABLE t1 (c1 INT AUTO_INCREMENT, c2 INT, PRIMARY KEY(c1)) AUTO_INCREMENT=10 ENGINE=InnoDB;
CREATE INDEX i1 on t1(c2);
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
`c1` int(11) NOT NULL AUTO_INCREMENT,
`c2` int(11) DEFAULT NULL,
PRIMARY KEY (`c1`),
KEY `i1` (`c2`)
) ENGINE=InnoDB AUTO_INCREMENT=10 DEFAULT CHARSET=latin1
INSERT INTO t1 (c2) values (0);
SELECT * FROM t1;
c1 c2
10 0
DROP TABLE t1;
DROP TABLE IF EXISTS t1;
Warnings:
Note 1051 Unknown table 't1'
CREATE TABLE t1(C1 DOUBLE AUTO_INCREMENT KEY, C2 CHAR(10)) ENGINE=InnoDB;
INSERT INTO t1(C1, C2) VALUES (1, 'innodb'), (3, 'innodb');
INSERT INTO t1(C2) VALUES ('innodb');
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
`C1` double NOT NULL AUTO_INCREMENT,
`C2` char(10) DEFAULT NULL,
PRIMARY KEY (`C1`)
) ENGINE=InnoDB AUTO_INCREMENT=5 DEFAULT CHARSET=latin1
DROP TABLE t1;
CREATE TABLE t1(C1 FLOAT AUTO_INCREMENT KEY, C2 CHAR(10)) ENGINE=InnoDB;
INSERT INTO t1(C1, C2) VALUES (1, 'innodb'), (3, 'innodb');
INSERT INTO t1(C2) VALUES ('innodb');
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
`C1` float NOT NULL AUTO_INCREMENT,
`C2` char(10) DEFAULT NULL,
PRIMARY KEY (`C1`)
) ENGINE=InnoDB AUTO_INCREMENT=5 DEFAULT CHARSET=latin1
DROP TABLE t1;
DROP TABLE IF EXISTS t1;
Warnings:
Note 1051 Unknown table 't1'
CREATE TABLE t1 (c1 INT AUTO_INCREMENT PRIMARY KEY) ENGINE=InnoDB;
INSERT INTO t1 SET c1 = 1;
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
`c1` int(11) NOT NULL AUTO_INCREMENT,
PRIMARY KEY (`c1`)
) ENGINE=InnoDB AUTO_INCREMENT=2 DEFAULT CHARSET=latin1
INSERT INTO t1 SET c1 = 2;
INSERT INTO t1 SET c1 = -1;
SELECT * FROM t1;
c1
-1
1
3
INSERT INTO t1 VALUES(null);
2
INSERT INTO t1 SET c1 = -1;
Got one of the listed errors
ALTER TABLE t1 AUTO_INCREMENT = 3;
INSERT INTO t1 VALUES(null);
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
`c1` int(11) NOT NULL AUTO_INCREMENT,
PRIMARY KEY (`c1`)
) ENGINE=InnoDB AUTO_INCREMENT=3 DEFAULT CHARSET=latin1
REPLACE INTO t1 VALUES (-1);
SELECT * FROM t1;
d1
c1
-1
1
2
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
`c1` int(11) NOT NULL AUTO_INCREMENT,
PRIMARY KEY (`c1`)
) ENGINE=InnoDB AUTO_INCREMENT=3 DEFAULT CHARSET=latin1
DROP TABLE t1;
DROP TABLE IF EXISTS t1;
Warnings:
Note 1051 Unknown table 't1'
CREATE TABLE t1 (c1 INTEGER AUTO_INCREMENT, PRIMARY KEY (c1)) ENGINE=InnoDB;
INSERT INTO t1 VALUES (-685113344), (1), (NULL), (NULL);
SELECT * FROM t1;
c1
-685113344
1
2
3
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
`c1` int(11) NOT NULL AUTO_INCREMENT,
PRIMARY KEY (`c1`)
) ENGINE=InnoDB AUTO_INCREMENT=6 DEFAULT CHARSET=latin1
DROP TABLE t1;
CREATE TABLE t1 (c1 INTEGER AUTO_INCREMENT, PRIMARY KEY (c1)) ENGINE=InnoDB;
INSERT INTO t1 VALUES (-685113344), (2), (NULL), (NULL);
SELECT * FROM t1;
c1
-685113344
2
3
4
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
`c1` int(11) NOT NULL AUTO_INCREMENT,
PRIMARY KEY (`c1`)
) ENGINE=InnoDB AUTO_INCREMENT=7 DEFAULT CHARSET=latin1
DROP TABLE t1;
CREATE TABLE t1 (c1 INTEGER AUTO_INCREMENT, PRIMARY KEY (c1)) ENGINE=InnoDB;
INSERT INTO t1 VALUES (NULL), (2), (-685113344), (NULL);
INSERT INTO t1 VALUES (4), (5), (6), (NULL);
SELECT * FROM t1;
c1
-685113344
1
2
3
4
5
6
7
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
`c1` int(11) NOT NULL AUTO_INCREMENT,
PRIMARY KEY (`c1`)
) ENGINE=InnoDB AUTO_INCREMENT=11 DEFAULT CHARSET=latin1
DROP TABLE t1;
CREATE TABLE t1 (c1 INTEGER AUTO_INCREMENT, PRIMARY KEY (c1)) ENGINE=InnoDB;
INSERT INTO t1 VALUES (NULL), (2), (-685113344), (5);
SELECT * FROM t1;
c1
-685113344
1
2
5
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
`c1` int(11) NOT NULL AUTO_INCREMENT,
PRIMARY KEY (`c1`)
) ENGINE=InnoDB AUTO_INCREMENT=6 DEFAULT CHARSET=latin1
DROP TABLE t1;
CREATE TABLE t1 (c1 INTEGER AUTO_INCREMENT, PRIMARY KEY (c1)) ENGINE=InnoDB;
INSERT INTO t1 VALUES (1), (2), (-685113344), (NULL);
SELECT * FROM t1;
c1
-685113344
1
2
3
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
`c1` int(11) NOT NULL AUTO_INCREMENT,
PRIMARY KEY (`c1`)
) ENGINE=InnoDB AUTO_INCREMENT=7 DEFAULT CHARSET=latin1
DROP TABLE t1;

View file

@ -478,23 +478,187 @@ INSERT INTO t2 SELECT c1 FROM t1;
INSERT INTO t2 SELECT NULL FROM t1;
DROP TABLE t1;
DROP TABLE t2;
#
# 44030: Error: (1500) Couldn't read the MAX(ID) autoinc value from
# the index (PRIMARY)
# This test requires a restart of the server
CREATE TABLE t1 (c1 INT PRIMARY KEY AUTO_INCREMENT) ENGINE=InnoDB;
INSERT INTO t1 VALUES (null);
INSERT INTO t1 VALUES (null);
ALTER TABLE t1 CHANGE c1 d1 INT NOT NULL AUTO_INCREMENT;
SELECT * FROM t1;
# Restart the server
-- source include/restart_mysqld.inc
# The MySQL and InnoDB data dictionaries should now be out of sync.
# The select should print message to the error log
SELECT * FROM t1;
-- error ER_AUTOINC_READ_FAILED,1467
INSERT INTO t1 VALUES(null);
ALTER TABLE t1 AUTO_INCREMENT = 3;
INSERT INTO t1 VALUES(null);
# If the user has specified negative values for an AUTOINC column then
# InnoDB should ignore those values when setting the table's max value.
SET @@SESSION.AUTO_INCREMENT_INCREMENT=1, @@SESSION.AUTO_INCREMENT_OFFSET=1;
SHOW VARIABLES LIKE "%auto_inc%";
# TINYINT
CREATE TABLE t1 (c1 TINYINT PRIMARY KEY AUTO_INCREMENT, c2 VARCHAR(10)) ENGINE=InnoDB;
INSERT INTO t1 VALUES (1, NULL);
INSERT INTO t1 VALUES (-1, 'innodb');
INSERT INTO t1 VALUES (-127, 'innodb');
INSERT INTO t1 VALUES (NULL, NULL);
SHOW CREATE TABLE t1;
SELECT * FROM t1;
DROP TABLE t1;
CREATE TABLE t1 (c1 TINYINT UNSIGNED PRIMARY KEY AUTO_INCREMENT, c2 VARCHAR(10)) ENGINE=InnoDB;
INSERT INTO t1 VALUES (1, NULL);
INSERT INTO t1 VALUES (-1, 'innodb');
INSERT INTO t1 VALUES (-127, 'innodb');
INSERT INTO t1 VALUES (NULL, NULL);
SHOW CREATE TABLE t1;
SELECT * FROM t1;
DROP TABLE t1;
#
# SMALLINT
#
CREATE TABLE t1 (c1 SMALLINT PRIMARY KEY AUTO_INCREMENT, c2 VARCHAR(10)) ENGINE=InnoDB;
INSERT INTO t1 VALUES (1, NULL);
INSERT INTO t1 VALUES (-1, 'innodb');
INSERT INTO t1 VALUES (-32767, 'innodb');
INSERT INTO t1 VALUES (NULL, NULL);
SHOW CREATE TABLE t1;
SELECT * FROM t1;
DROP TABLE t1;
CREATE TABLE t1 (c1 SMALLINT UNSIGNED PRIMARY KEY AUTO_INCREMENT, c2 VARCHAR(10)) ENGINE=InnoDB;
INSERT INTO t1 VALUES (1, NULL);
INSERT INTO t1 VALUES (-1, 'innodb');
INSERT INTO t1 VALUES (-32757, 'innodb');
INSERT INTO t1 VALUES (NULL, NULL);
SHOW CREATE TABLE t1;
SELECT * FROM t1;
DROP TABLE t1;
#
# MEDIUMINT
#
CREATE TABLE t1 (c1 MEDIUMINT PRIMARY KEY AUTO_INCREMENT, c2 VARCHAR(10)) ENGINE=InnoDB;
INSERT INTO t1 VALUES (1, NULL);
INSERT INTO t1 VALUES (-1, 'innodb');
INSERT INTO t1 VALUES (-8388607, 'innodb');
INSERT INTO t1 VALUES (NULL, NULL);
SHOW CREATE TABLE t1;
SELECT * FROM t1;
DROP TABLE t1;
CREATE TABLE t1 (c1 MEDIUMINT UNSIGNED PRIMARY KEY AUTO_INCREMENT, c2 VARCHAR(10)) ENGINE=InnoDB;
INSERT INTO t1 VALUES (1, NULL);
INSERT INTO t1 VALUES (-1, 'innodb');
INSERT INTO t1 VALUES (-8388607, 'innodb');
INSERT INTO t1 VALUES (NULL, NULL);
SHOW CREATE TABLE t1;
SELECT * FROM t1;
DROP TABLE t1;
#
# INT
#
CREATE TABLE t1 (c1 INT PRIMARY KEY AUTO_INCREMENT, c2 VARCHAR(10)) ENGINE=InnoDB;
INSERT INTO t1 VALUES (1, NULL);
INSERT INTO t1 VALUES (-1, 'innodb');
INSERT INTO t1 VALUES (-2147483647, 'innodb');
INSERT INTO t1 VALUES (NULL, NULL);
SHOW CREATE TABLE t1;
SELECT * FROM t1;
DROP TABLE t1;
CREATE TABLE t1 (c1 INT UNSIGNED PRIMARY KEY AUTO_INCREMENT, c2 VARCHAR(10)) ENGINE=InnoDB;
INSERT INTO t1 VALUES (1, NULL);
INSERT INTO t1 VALUES (-1, 'innodb');
INSERT INTO t1 VALUES (-2147483647, 'innodb');
INSERT INTO t1 VALUES (NULL, NULL);
SHOW CREATE TABLE t1;
SELECT * FROM t1;
DROP TABLE t1;
#
# BIGINT
#
CREATE TABLE t1 (c1 BIGINT PRIMARY KEY AUTO_INCREMENT, c2 VARCHAR(10)) ENGINE=InnoDB;
INSERT INTO t1 VALUES (1, NULL);
INSERT INTO t1 VALUES (-1, 'innodb');
INSERT INTO t1 VALUES (-9223372036854775807, 'innodb');
INSERT INTO t1 VALUES (NULL, NULL);
SHOW CREATE TABLE t1;
SELECT * FROM t1;
DROP TABLE t1;
CREATE TABLE t1 (c1 BIGINT UNSIGNED PRIMARY KEY AUTO_INCREMENT, c2 VARCHAR(10)) ENGINE=InnoDB;
INSERT INTO t1 VALUES (1, NULL);
INSERT INTO t1 VALUES (-1, 'innodb');
INSERT INTO t1 VALUES (-9223372036854775807, 'innodb');
INSERT INTO t1 VALUES (NULL, NULL);
SHOW CREATE TABLE t1;
SELECT * FROM t1;
DROP TABLE t1;
#
# End negative number check
##
# 47125: auto_increment start value is ignored if an index is created
# and engine=innodb
#
CREATE TABLE t1 (c1 INT AUTO_INCREMENT, c2 INT, PRIMARY KEY(c1)) AUTO_INCREMENT=10 ENGINE=InnoDB;
CREATE INDEX i1 on t1(c2);
SHOW CREATE TABLE t1;
INSERT INTO t1 (c2) values (0);
SELECT * FROM t1;
DROP TABLE t1;
##
# 49032: Use the correct function to read the AUTOINC column value
#
DROP TABLE IF EXISTS t1;
CREATE TABLE t1(C1 DOUBLE AUTO_INCREMENT KEY, C2 CHAR(10)) ENGINE=InnoDB;
INSERT INTO t1(C1, C2) VALUES (1, 'innodb'), (3, 'innodb');
# Restart the server
-- source include/restart_mysqld.inc
INSERT INTO t1(C2) VALUES ('innodb');
SHOW CREATE TABLE t1;
DROP TABLE t1;
CREATE TABLE t1(C1 FLOAT AUTO_INCREMENT KEY, C2 CHAR(10)) ENGINE=InnoDB;
INSERT INTO t1(C1, C2) VALUES (1, 'innodb'), (3, 'innodb');
# Restart the server
-- source include/restart_mysqld.inc
INSERT INTO t1(C2) VALUES ('innodb');
SHOW CREATE TABLE t1;
DROP TABLE t1;
##
# 47720: REPLACE INTO Autoincrement column with negative values
#
DROP TABLE IF EXISTS t1;
CREATE TABLE t1 (c1 INT AUTO_INCREMENT PRIMARY KEY) ENGINE=InnoDB;
INSERT INTO t1 SET c1 = 1;
SHOW CREATE TABLE t1;
INSERT INTO t1 SET c1 = 2;
INSERT INTO t1 SET c1 = -1;
SELECT * FROM t1;
-- error ER_DUP_ENTRY,1062
INSERT INTO t1 SET c1 = -1;
SHOW CREATE TABLE t1;
REPLACE INTO t1 VALUES (-1);
SELECT * FROM t1;
SHOW CREATE TABLE t1;
DROP TABLE t1;
##
# 49497: Error 1467 (ER_AUTOINC_READ_FAILED) on inserting a negative value
#
DROP TABLE IF EXISTS t1;
CREATE TABLE t1 (c1 INTEGER AUTO_INCREMENT, PRIMARY KEY (c1)) ENGINE=InnoDB;
INSERT INTO t1 VALUES (-685113344), (1), (NULL), (NULL);
SELECT * FROM t1;
SHOW CREATE TABLE t1;
DROP TABLE t1;
CREATE TABLE t1 (c1 INTEGER AUTO_INCREMENT, PRIMARY KEY (c1)) ENGINE=InnoDB;
INSERT INTO t1 VALUES (-685113344), (2), (NULL), (NULL);
SELECT * FROM t1;
SHOW CREATE TABLE t1;
DROP TABLE t1;
CREATE TABLE t1 (c1 INTEGER AUTO_INCREMENT, PRIMARY KEY (c1)) ENGINE=InnoDB;
INSERT INTO t1 VALUES (NULL), (2), (-685113344), (NULL);
INSERT INTO t1 VALUES (4), (5), (6), (NULL);
SELECT * FROM t1;
SHOW CREATE TABLE t1;
DROP TABLE t1;
CREATE TABLE t1 (c1 INTEGER AUTO_INCREMENT, PRIMARY KEY (c1)) ENGINE=InnoDB;
INSERT INTO t1 VALUES (NULL), (2), (-685113344), (5);
SELECT * FROM t1;
SHOW CREATE TABLE t1;
DROP TABLE t1;
CREATE TABLE t1 (c1 INTEGER AUTO_INCREMENT, PRIMARY KEY (c1)) ENGINE=InnoDB;
INSERT INTO t1 VALUES (1), (2), (-685113344), (NULL);
SELECT * FROM t1;
SHOW CREATE TABLE t1;
DROP TABLE t1;

View file

@ -1 +1 @@
--innodb_lock_wait_timeout=2
--loose-innodb_lock_wait_timeout=2

View file

@ -1,58 +1,58 @@
-- source include/not_embedded.inc
-- source include/have_innodb.inc
--disable_warnings
drop table if exists t1;
--enable_warnings
# REPLACE INTO ... SELECT and INSERT INTO ... SELECT should do
# a consistent read of the source table.
connect (a,localhost,root,,);
connect (b,localhost,root,,);
connection a;
set session transaction isolation level read committed;
create table t1(a int not null) engine=innodb DEFAULT CHARSET=latin1;
create table t2 like t1;
insert into t2 values (1),(2),(3),(4),(5),(6),(7);
set autocommit=0;
# REPLACE INTO ... SELECT case
begin;
# this should not result in any locks on t2.
replace into t1 select * from t2;
connection b;
set session transaction isolation level read committed;
set autocommit=0;
# should not cuase a lock wait.
delete from t2 where a=5;
commit;
delete from t2;
commit;
connection a;
commit;
# INSERT INTO ... SELECT case
begin;
# this should not result in any locks on t2.
insert into t1 select * from t2;
connection b;
set session transaction isolation level read committed;
set autocommit=0;
# should not cuase a lock wait.
delete from t2 where a=5;
commit;
delete from t2;
commit;
connection a;
commit;
select * from t1;
drop table t1;
drop table t2;
connection default;
disconnect a;
disconnect b;
-- source include/not_embedded.inc
-- source include/have_innodb.inc
--disable_warnings
drop table if exists t1;
--enable_warnings
# REPLACE INTO ... SELECT and INSERT INTO ... SELECT should do
# a consistent read of the source table.
connect (a,localhost,root,,);
connect (b,localhost,root,,);
connection a;
set session transaction isolation level read committed;
create table t1(a int not null) engine=innodb DEFAULT CHARSET=latin1;
create table t2 like t1;
insert into t2 values (1),(2),(3),(4),(5),(6),(7);
set autocommit=0;
# REPLACE INTO ... SELECT case
begin;
# this should not result in any locks on t2.
replace into t1 select * from t2;
connection b;
set session transaction isolation level read committed;
set autocommit=0;
# should not cause a lock wait.
delete from t2 where a=5;
commit;
delete from t2;
commit;
connection a;
commit;
# INSERT INTO ... SELECT case
begin;
# this should not result in any locks on t2.
insert into t1 select * from t2;
connection b;
set session transaction isolation level read committed;
set autocommit=0;
# should not cause a lock wait.
delete from t2 where a=5;
commit;
delete from t2;
commit;
connection a;
commit;
select * from t1;
drop table t1;
drop table t2;
connection default;
disconnect a;
disconnect b;

View file

@ -46,13 +46,6 @@ t1 CREATE TABLE `t1` (
KEY `d2` (`d`),
KEY `b` (`b`)
) ENGINE=InnoDB DEFAULT CHARSET=latin1
CREATE TABLE `t1#1`(a INT PRIMARY KEY) ENGINE=InnoDB;
alter table t1 add unique index (c), add index (d);
ERROR HY000: Table 'test.t1#1' already exists
rename table `t1#1` to `t1#2`;
alter table t1 add unique index (c), add index (d);
ERROR HY000: Table 'test.t1#2' already exists
drop table `t1#2`;
alter table t1 add unique index (c), add index (d);
show create table t1;
Table Create Table
@ -441,6 +434,7 @@ t3 CREATE TABLE `t3` (
KEY `c` (`c`)
) ENGINE=InnoDB DEFAULT CHARSET=latin1
alter table t2 drop index b, add index (b);
ERROR 42000: Incorrect index name 'b'
show create table t2;
Table Create Table
t2 CREATE TABLE `t2` (
@ -451,8 +445,8 @@ t2 CREATE TABLE `t2` (
`e` int(11) DEFAULT NULL,
PRIMARY KEY (`a`),
UNIQUE KEY `dc` (`d`,`c`),
KEY `c` (`c`),
KEY `b` (`b`),
KEY `c` (`c`),
CONSTRAINT `t2_ibfk_1` FOREIGN KEY (`b`) REFERENCES `t1` (`b`) ON DELETE CASCADE,
CONSTRAINT `t2_ibfk_2` FOREIGN KEY (`c`) REFERENCES `t3` (`c`),
CONSTRAINT `t2_ibfk_3` FOREIGN KEY (`d`) REFERENCES `t4` (`d`)
@ -968,6 +962,7 @@ create index t1u on t1 (u(1));
drop table t1;
set global innodb_file_per_table=0;
set global innodb_file_format=Antelope;
set global innodb_file_format_check=Antelope;
SET @OLD_UNIQUE_CHECKS=@@UNIQUE_CHECKS, UNIQUE_CHECKS=0;
SET @OLD_FOREIGN_KEY_CHECKS=@@FOREIGN_KEY_CHECKS, FOREIGN_KEY_CHECKS=0;
CREATE TABLE t1(

View file

@ -1,5 +1,9 @@
-- source include/have_innodb.inc
let $MYSQLD_DATADIR= `select @@datadir`;
let $innodb_file_format_check_orig=`select @@innodb_file_format_check`;
create table t1(a int not null, b int, c char(10) not null, d varchar(20)) engine = innodb;
insert into t1 values (5,5,'oo','oo'),(4,4,'tr','tr'),(3,4,'ad','ad'),(2,3,'ak','ak');
commit;
@ -17,16 +21,6 @@ show create table t1;
alter table t1 add index (b);
show create table t1;
# Check how existing tables interfere with temporary tables.
CREATE TABLE `t1#1`(a INT PRIMARY KEY) ENGINE=InnoDB;
--error 156
alter table t1 add unique index (c), add index (d);
rename table `t1#1` to `t1#2`;
--error 156
alter table t1 add unique index (c), add index (d);
drop table `t1#2`;
alter table t1 add unique index (c), add index (d);
show create table t1;
explain select * from t1 force index(c) order by c;
@ -137,6 +131,8 @@ show create table t4;
--error ER_CANT_CREATE_TABLE
alter table t3 add constraint dc foreign key (a) references t1(a);
show create table t3;
# this should be fixed by MySQL (see Bug #51451)
--error ER_WRONG_NAME_FOR_INDEX
alter table t2 drop index b, add index (b);
show create table t2;
--error ER_ROW_IS_REFERENCED_2
@ -144,7 +140,9 @@ delete from t1;
--error ER_CANT_DROP_FIELD_OR_KEY
drop index dc on t4;
# there is no foreign key dc on t3
--replace_regex /'\.\/test\/#sql2-[0-9a-f-]*'/'#sql2-temporary'/
--replace_regex /'[^']*test\/#sql2-[0-9a-f-]*'/'#sql2-temporary'/
# Embedded server doesn't chdir to data directory
--replace_result $MYSQLD_DATADIR ./ master-data/ ''
--error ER_ERROR_ON_RENAME
alter table t3 drop foreign key dc;
alter table t4 drop foreign key dc;
@ -398,6 +396,7 @@ create index t1u on t1 (u(1));
drop table t1;
eval set global innodb_file_per_table=$per_table;
eval set global innodb_file_format=$format;
eval set global innodb_file_format_check=$format;
#
# Test to check whether CREATE INDEX handles implicit foreign key
@ -532,3 +531,10 @@ disconnect a;
disconnect b;
DROP TABLE t1;
#
# restore environment to the state it was before this test execution
#
-- disable_query_log
eval SET GLOBAL innodb_file_format_check=$innodb_file_format_check_orig;

Some files were not shown because too many files have changed in this diff Show more