mariadb/storage/innodb_plugin/include/btr0btr.ic
Marko Mäkelä d5c3640064 Bug #11766591 59733: Possible deadlock when buffered changes are to be
discarded in buf_page_create()

This bug turned out to be a false alarm, a bug in the UNIV_SYNC_DEBUG
diagnostic code. Because of this, the patch was not backported to the
built-in InnoDB in MySQL 5.1. Furthermore, there is no test case for
InnoDB Plugin in MySQL 5.1, because the delete buffering in MySQL 5.5
makes triggering the failure much easier.

When a freed page for which there exist orphaned buffered changes is
allocated and reused for something else, buf_page_create() will discard
the buffered changes by invoking ibuf_merge_or_delete_for_page().
This would violate the InnoDB latching order.

Tweak the latching order as follows. Move SYNC_IBUF_MUTEX below
SYNC_FSP_PAGE, where it logically belongs, and assign new latching
levels for the ibuf->index->lock and the insert buffer B-tree pages:

#define SYNC_IBUF_MUTEX		370	/* ibuf_mutex */
#define SYNC_IBUF_INDEX_TREE	360
#define SYNC_IBUF_TREE_NODE_NEW	359
#define SYNC_IBUF_TREE_NODE	358

btr_block_get(), btr_page_get(): In UNIV_SYNC_DEBUG, add the parameter
"index" for determining the appropriate latching order
(SYNC_IBUF_TREE_NODE or SYNC_TREE_NODE).

btr_page_alloc_for_ibuf(), btr_create(): Use SYNC_IBUF_TREE_NODE_NEW
instead of SYNC_TREE_NODE_NEW for insert buffer pages.

btr_cur_search_to_nth_level(), btr_pcur_restore_position_func(): Use
SYNC_IBUF_TREE_NODE instead of SYNC_TREE_NODE for insert buffer pages.

btr_search_guess_on_hash(): Assert that the index is not an insert buffer tree.

dict_index_add_to_cache(): Use SYNC_IBUF_INDEX_TREE for the insert
buffer tree (ibuf->index->lock).

ibuf0ibuf.c: Use SYNC_IBUF_TREE_NODE or SYNC_IBUF_TREE_NODE_NEW for
all B-tree pages.

ibuf_merge_or_delete_for_page(): Assert that the user page is
BUF_IO_READ fixed. Only in this way it is OK to latch it as
SYNC_IBUF_TREE_NODE instead of the proper SYNC_TREE_NODE (which would
violate the changed latching order).

sync_thread_add_level(): Remove the special tweak for
SYNC_IBUF_MUTEX. Add rules for the added latching levels.

rb:591 approved by Jimmy Yang
2011-08-15 12:11:43 +03:00

306 lines
8.6 KiB
Text

/*****************************************************************************
Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
Place, Suite 330, Boston, MA 02111-1307 USA
*****************************************************************************/
/**************************************************//**
@file include/btr0btr.ic
The B-tree
Created 6/2/1994 Heikki Tuuri
*******************************************************/
#include "mach0data.h"
#ifndef UNIV_HOTBACKUP
#include "mtr0mtr.h"
#include "mtr0log.h"
#include "page0zip.h"
#define BTR_MAX_NODE_LEVEL 50 /*!< Maximum B-tree page level
(not really a hard limit).
Used in debug assertions
in btr_page_set_level and
btr_page_get_level_low */
/**************************************************************//**
Gets a buffer page and declares its latching order level. */
UNIV_INLINE
buf_block_t*
btr_block_get_func(
/*===============*/
ulint space, /*!< in: space id */
ulint zip_size, /*!< in: compressed page size in bytes
or 0 for uncompressed pages */
ulint page_no, /*!< in: page number */
ulint mode, /*!< in: latch mode */
const char* file, /*!< in: file name */
ulint line, /*!< in: line where called */
#ifdef UNIV_SYNC_DEBUG
const dict_index_t* index, /*!< in: index tree, may be NULL
if it is not an insert buffer tree */
#endif /* UNIV_SYNC_DEBUG */
mtr_t* mtr) /*!< in/out: mtr */
{
buf_block_t* block;
block = buf_page_get_gen(space, zip_size, page_no, mode,
NULL, BUF_GET, file, line, mtr);
if (mode != RW_NO_LATCH) {
buf_block_dbg_add_level(
block, index != NULL && dict_index_is_ibuf(index)
? SYNC_IBUF_TREE_NODE : SYNC_TREE_NODE);
}
return(block);
}
/**************************************************************//**
Sets the index id field of a page. */
UNIV_INLINE
void
btr_page_set_index_id(
/*==================*/
page_t* page, /*!< in: page to be created */
page_zip_des_t* page_zip,/*!< in: compressed page whose uncompressed
part will be updated, or NULL */
dulint id, /*!< in: index id */
mtr_t* mtr) /*!< in: mtr */
{
if (UNIV_LIKELY_NULL(page_zip)) {
mach_write_to_8(page + (PAGE_HEADER + PAGE_INDEX_ID), id);
page_zip_write_header(page_zip,
page + (PAGE_HEADER + PAGE_INDEX_ID),
8, mtr);
} else {
mlog_write_dulint(page + (PAGE_HEADER + PAGE_INDEX_ID),
id, mtr);
}
}
#endif /* !UNIV_HOTBACKUP */
/**************************************************************//**
Gets the index id field of a page.
@return index id */
UNIV_INLINE
dulint
btr_page_get_index_id(
/*==================*/
const page_t* page) /*!< in: index page */
{
return(mach_read_from_8(page + PAGE_HEADER + PAGE_INDEX_ID));
}
#ifndef UNIV_HOTBACKUP
/********************************************************//**
Gets the node level field in an index page.
@return level, leaf level == 0 */
UNIV_INLINE
ulint
btr_page_get_level_low(
/*===================*/
const page_t* page) /*!< in: index page */
{
ulint level;
ut_ad(page);
level = mach_read_from_2(page + PAGE_HEADER + PAGE_LEVEL);
ut_ad(level <= BTR_MAX_NODE_LEVEL);
return(level);
}
/********************************************************//**
Gets the node level field in an index page.
@return level, leaf level == 0 */
UNIV_INLINE
ulint
btr_page_get_level(
/*===============*/
const page_t* page, /*!< in: index page */
mtr_t* mtr __attribute__((unused)))
/*!< in: mini-transaction handle */
{
ut_ad(page && mtr);
return(btr_page_get_level_low(page));
}
/********************************************************//**
Sets the node level field in an index page. */
UNIV_INLINE
void
btr_page_set_level(
/*===============*/
page_t* page, /*!< in: index page */
page_zip_des_t* page_zip,/*!< in: compressed page whose uncompressed
part will be updated, or NULL */
ulint level, /*!< in: level, leaf level == 0 */
mtr_t* mtr) /*!< in: mini-transaction handle */
{
ut_ad(page && mtr);
ut_ad(level <= BTR_MAX_NODE_LEVEL);
if (UNIV_LIKELY_NULL(page_zip)) {
mach_write_to_2(page + (PAGE_HEADER + PAGE_LEVEL), level);
page_zip_write_header(page_zip,
page + (PAGE_HEADER + PAGE_LEVEL),
2, mtr);
} else {
mlog_write_ulint(page + (PAGE_HEADER + PAGE_LEVEL), level,
MLOG_2BYTES, mtr);
}
}
/********************************************************//**
Gets the next index page number.
@return next page number */
UNIV_INLINE
ulint
btr_page_get_next(
/*==============*/
const page_t* page, /*!< in: index page */
mtr_t* mtr __attribute__((unused)))
/*!< in: mini-transaction handle */
{
ut_ad(page && mtr);
ut_ad(mtr_memo_contains_page(mtr, page, MTR_MEMO_PAGE_X_FIX)
|| mtr_memo_contains_page(mtr, page, MTR_MEMO_PAGE_S_FIX));
return(mach_read_from_4(page + FIL_PAGE_NEXT));
}
/********************************************************//**
Sets the next index page field. */
UNIV_INLINE
void
btr_page_set_next(
/*==============*/
page_t* page, /*!< in: index page */
page_zip_des_t* page_zip,/*!< in: compressed page whose uncompressed
part will be updated, or NULL */
ulint next, /*!< in: next page number */
mtr_t* mtr) /*!< in: mini-transaction handle */
{
ut_ad(page && mtr);
if (UNIV_LIKELY_NULL(page_zip)) {
mach_write_to_4(page + FIL_PAGE_NEXT, next);
page_zip_write_header(page_zip, page + FIL_PAGE_NEXT, 4, mtr);
} else {
mlog_write_ulint(page + FIL_PAGE_NEXT, next, MLOG_4BYTES, mtr);
}
}
/********************************************************//**
Gets the previous index page number.
@return prev page number */
UNIV_INLINE
ulint
btr_page_get_prev(
/*==============*/
const page_t* page, /*!< in: index page */
mtr_t* mtr __attribute__((unused))) /*!< in: mini-transaction handle */
{
ut_ad(page && mtr);
return(mach_read_from_4(page + FIL_PAGE_PREV));
}
/********************************************************//**
Sets the previous index page field. */
UNIV_INLINE
void
btr_page_set_prev(
/*==============*/
page_t* page, /*!< in: index page */
page_zip_des_t* page_zip,/*!< in: compressed page whose uncompressed
part will be updated, or NULL */
ulint prev, /*!< in: previous page number */
mtr_t* mtr) /*!< in: mini-transaction handle */
{
ut_ad(page && mtr);
if (UNIV_LIKELY_NULL(page_zip)) {
mach_write_to_4(page + FIL_PAGE_PREV, prev);
page_zip_write_header(page_zip, page + FIL_PAGE_PREV, 4, mtr);
} else {
mlog_write_ulint(page + FIL_PAGE_PREV, prev, MLOG_4BYTES, mtr);
}
}
/**************************************************************//**
Gets the child node file address in a node pointer.
NOTE: the offsets array must contain all offsets for the record since
we read the last field according to offsets and assume that it contains
the child page number. In other words offsets must have been retrieved
with rec_get_offsets(n_fields=ULINT_UNDEFINED).
@return child node address */
UNIV_INLINE
ulint
btr_node_ptr_get_child_page_no(
/*===========================*/
const rec_t* rec, /*!< in: node pointer record */
const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
{
const byte* field;
ulint len;
ulint page_no;
ut_ad(!rec_offs_comp(offsets) || rec_get_node_ptr_flag(rec));
/* The child address is in the last field */
field = rec_get_nth_field(rec, offsets,
rec_offs_n_fields(offsets) - 1, &len);
ut_ad(len == 4);
page_no = mach_read_from_4(field);
if (UNIV_UNLIKELY(page_no == 0)) {
fprintf(stderr,
"InnoDB: a nonsensical page number 0"
" in a node ptr record at offset %lu\n",
(ulong) page_offset(rec));
buf_page_print(page_align(rec), 0);
}
return(page_no);
}
/**************************************************************//**
Releases the latches on a leaf page and bufferunfixes it. */
UNIV_INLINE
void
btr_leaf_page_release(
/*==================*/
buf_block_t* block, /*!< in: buffer block */
ulint latch_mode, /*!< in: BTR_SEARCH_LEAF or
BTR_MODIFY_LEAF */
mtr_t* mtr) /*!< in: mtr */
{
ut_ad(latch_mode == BTR_SEARCH_LEAF || latch_mode == BTR_MODIFY_LEAF);
ut_ad(!mtr_memo_contains(mtr, block, MTR_MEMO_MODIFY));
mtr_memo_release(mtr, block,
latch_mode == BTR_SEARCH_LEAF
? MTR_MEMO_PAGE_S_FIX
: MTR_MEMO_PAGE_X_FIX);
}
#endif /* !UNIV_HOTBACKUP */