mirror of
https://github.com/MariaDB/server.git
synced 2025-01-20 14:02:32 +01:00
branches/zip: This is patch from Inaam that uses red-black tree
to speed up insertions into the flush_list and thus the recovery process. The patch has been tested by Nokia.
This commit is contained in:
parent
d2b4c4f532
commit
38d926a8d0
12 changed files with 1880 additions and 52 deletions
|
@ -78,7 +78,7 @@ SET(INNOBASE_SOURCES btr/btr0btr.c btr/btr0cur.c btr/btr0pcur.c btr/btr0sea.c
|
|||
trx/trx0i_s.c trx/trx0purge.c trx/trx0rec.c trx/trx0roll.c trx/trx0rseg.c
|
||||
trx/trx0sys.c trx/trx0trx.c trx/trx0undo.c
|
||||
usr/usr0sess.c
|
||||
ut/ut0byte.c ut/ut0dbg.c ut/ut0mem.c ut/ut0rnd.c ut/ut0ut.c ut/ut0vec.c
|
||||
ut/ut0byte.c ut/ut0dbg.c ut/ut0mem.c ut/ut0rbt.c ut/ut0rnd.c ut/ut0ut.c ut/ut0vec.c
|
||||
ut/ut0list.c ut/ut0wqueue.c)
|
||||
# Windows atomics do not perform well. Disable Windows atomics by default.
|
||||
# See bug#52102 for details.
|
||||
|
|
|
@ -217,6 +217,7 @@ noinst_HEADERS= \
|
|||
include/ut0lst.h \
|
||||
include/ut0mem.h \
|
||||
include/ut0mem.ic \
|
||||
include/ut0rbt.h \
|
||||
include/ut0rnd.h \
|
||||
include/ut0rnd.ic \
|
||||
include/ut0sort.h \
|
||||
|
@ -318,6 +319,7 @@ libinnobase_a_SOURCES= \
|
|||
ut/ut0dbg.c \
|
||||
ut/ut0list.c \
|
||||
ut/ut0mem.c \
|
||||
ut/ut0rbt.c \
|
||||
ut/ut0rnd.c \
|
||||
ut/ut0ut.c \
|
||||
ut/ut0vec.c \
|
||||
|
|
|
@ -391,6 +391,8 @@ buf_buddy_relocate_block(
|
|||
UT_LIST_ADD_FIRST(list, buf_pool->zip_clean, dpage);
|
||||
}
|
||||
|
||||
UNIV_MEM_INVALID(bpage, sizeof *bpage);
|
||||
|
||||
mutex_exit(&buf_pool_zip_mutex);
|
||||
return(TRUE);
|
||||
}
|
||||
|
|
|
@ -1191,8 +1191,6 @@ buf_relocate(
|
|||
|
||||
HASH_DELETE(buf_page_t, hash, buf_pool->page_hash, fold, bpage);
|
||||
HASH_INSERT(buf_page_t, hash, buf_pool->page_hash, fold, dpage);
|
||||
|
||||
UNIV_MEM_INVALID(bpage, sizeof *bpage);
|
||||
}
|
||||
|
||||
/********************************************************************//**
|
||||
|
@ -2224,22 +2222,8 @@ wait_until_unfixed:
|
|||
ut_ad(!block->page.in_flush_list);
|
||||
} else {
|
||||
/* Relocate buf_pool->flush_list. */
|
||||
buf_page_t* b;
|
||||
|
||||
b = UT_LIST_GET_PREV(list, &block->page);
|
||||
ut_ad(block->page.in_flush_list);
|
||||
UT_LIST_REMOVE(list, buf_pool->flush_list,
|
||||
&block->page);
|
||||
|
||||
if (b) {
|
||||
UT_LIST_INSERT_AFTER(
|
||||
list, buf_pool->flush_list, b,
|
||||
&block->page);
|
||||
} else {
|
||||
UT_LIST_ADD_FIRST(
|
||||
list, buf_pool->flush_list,
|
||||
&block->page);
|
||||
}
|
||||
buf_flush_relocate_on_flush_list(bpage,
|
||||
&block->page);
|
||||
}
|
||||
|
||||
/* Buffer-fix, I/O-fix, and X-latch the block
|
||||
|
@ -2253,6 +2237,9 @@ wait_until_unfixed:
|
|||
block->page.buf_fix_count = 1;
|
||||
buf_block_set_io_fix(block, BUF_IO_READ);
|
||||
rw_lock_x_lock(&block->lock);
|
||||
|
||||
UNIV_MEM_INVALID(bpage, sizeof *bpage);
|
||||
|
||||
mutex_exit(&block->mutex);
|
||||
mutex_exit(&buf_pool_zip_mutex);
|
||||
buf_pool->n_pend_unzip++;
|
||||
|
|
254
buf/buf0flu.c
254
buf/buf0flu.c
|
@ -87,6 +87,138 @@ buf_flush_validate_low(void);
|
|||
/*========================*/
|
||||
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
|
||||
|
||||
/********************************************************************//**
|
||||
Insert a block in the flush_rbt and returns a pointer to its
|
||||
predecessor or NULL if no predecessor. The ordering is maintained
|
||||
on the basis of the <oldest_modification, space, offset> key.
|
||||
@return pointer to the predecessor or NULL if no predecessor. */
|
||||
static
|
||||
buf_page_t*
|
||||
buf_flush_insert_in_flush_rbt(
|
||||
/*==========================*/
|
||||
buf_page_t* bpage) /*!< in: bpage to be inserted. */
|
||||
{
|
||||
buf_page_t* prev = NULL;
|
||||
const ib_rbt_node_t* c_node;
|
||||
const ib_rbt_node_t* p_node;
|
||||
|
||||
ut_ad(buf_pool_mutex_own());
|
||||
|
||||
/* Insert this buffer into the rbt. */
|
||||
c_node = rbt_insert(buf_pool->flush_rbt, &bpage, &bpage);
|
||||
ut_a(c_node != NULL);
|
||||
|
||||
/* Get the predecessor. */
|
||||
p_node = rbt_prev(buf_pool->flush_rbt, c_node);
|
||||
|
||||
if (p_node != NULL) {
|
||||
prev = *rbt_value(buf_page_t*, p_node);
|
||||
ut_a(prev != NULL);
|
||||
}
|
||||
|
||||
return(prev);
|
||||
}
|
||||
|
||||
/********************************************************************//**
|
||||
Delete a bpage from the flush_rbt. */
|
||||
static
|
||||
void
|
||||
buf_flush_delete_from_flush_rbt(
|
||||
/*============================*/
|
||||
buf_page_t* bpage) /*!< in: bpage to be removed. */
|
||||
{
|
||||
|
||||
ibool ret = FALSE;
|
||||
|
||||
ut_ad(buf_pool_mutex_own());
|
||||
ret = rbt_delete(buf_pool->flush_rbt, &bpage);
|
||||
ut_ad(ret);
|
||||
}
|
||||
|
||||
/********************************************************************//**
|
||||
Compare two modified blocks in the buffer pool. The key for comparison
|
||||
is:
|
||||
key = <oldest_modification, space, offset>
|
||||
This comparison is used to maintian ordering of blocks in the
|
||||
buf_pool->flush_rbt.
|
||||
Note that for the purpose of flush_rbt, we only need to order blocks
|
||||
on the oldest_modification. The other two fields are used to uniquely
|
||||
identify the blocks.
|
||||
@return < 0 if b2 < b1, 0 if b2 == b1, > 0 if b2 > b1 */
|
||||
static
|
||||
int
|
||||
buf_flush_block_cmp(
|
||||
/*================*/
|
||||
const void* p1, /*!< in: block1 */
|
||||
const void* p2) /*!< in: block2 */
|
||||
{
|
||||
int ret;
|
||||
|
||||
ut_ad(p1 != NULL);
|
||||
ut_ad(p2 != NULL);
|
||||
|
||||
const buf_page_t* b1 = *(const buf_page_t**) p1;
|
||||
const buf_page_t* b2 = *(const buf_page_t**) p2;
|
||||
|
||||
ut_ad(b1 != NULL);
|
||||
ut_ad(b2 != NULL);
|
||||
|
||||
ut_ad(b1->in_flush_list);
|
||||
ut_ad(b2->in_flush_list);
|
||||
|
||||
if (b2->oldest_modification
|
||||
> b1->oldest_modification) {
|
||||
return(1);
|
||||
}
|
||||
|
||||
if (b2->oldest_modification
|
||||
< b1->oldest_modification) {
|
||||
return(-1);
|
||||
}
|
||||
|
||||
/* If oldest_modification is same then decide on the space. */
|
||||
ret = (int)(b2->space - b1->space);
|
||||
|
||||
/* Or else decide ordering on the offset field. */
|
||||
return(ret ? ret : (int)(b2->offset - b1->offset));
|
||||
}
|
||||
|
||||
/********************************************************************//**
|
||||
Initialize the red-black tree to speed up insertions into the flush_list
|
||||
during recovery process. Should be called at the start of recovery
|
||||
process before any page has been read/written. */
|
||||
UNIV_INTERN
|
||||
void
|
||||
buf_flush_init_flush_rbt(void)
|
||||
/*==========================*/
|
||||
{
|
||||
buf_pool_mutex_enter();
|
||||
|
||||
/* Create red black tree for speedy insertions in flush list. */
|
||||
buf_pool->flush_rbt = rbt_create(sizeof(buf_page_t*),
|
||||
buf_flush_block_cmp);
|
||||
buf_pool_mutex_exit();
|
||||
}
|
||||
|
||||
/********************************************************************//**
|
||||
Frees up the red-black tree. */
|
||||
UNIV_INTERN
|
||||
void
|
||||
buf_flush_free_flush_rbt(void)
|
||||
/*==========================*/
|
||||
{
|
||||
buf_pool_mutex_enter();
|
||||
|
||||
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
|
||||
ut_a(buf_flush_validate_low());
|
||||
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
|
||||
|
||||
rbt_free(buf_pool->flush_rbt);
|
||||
buf_pool->flush_rbt = NULL;
|
||||
|
||||
buf_pool_mutex_exit();
|
||||
}
|
||||
|
||||
/********************************************************************//**
|
||||
Inserts a modified block into the flush list. */
|
||||
UNIV_INTERN
|
||||
|
@ -100,6 +232,13 @@ buf_flush_insert_into_flush_list(
|
|||
|| (UT_LIST_GET_FIRST(buf_pool->flush_list)->oldest_modification
|
||||
<= block->page.oldest_modification));
|
||||
|
||||
/* If we are in the recovery then we need to update the flush
|
||||
red-black tree as well. */
|
||||
if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
|
||||
buf_flush_insert_sorted_into_flush_list(block);
|
||||
return;
|
||||
}
|
||||
|
||||
ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
|
||||
ut_ad(block->page.in_LRU_list);
|
||||
ut_ad(block->page.in_page_hash);
|
||||
|
@ -136,12 +275,27 @@ buf_flush_insert_sorted_into_flush_list(
|
|||
ut_d(block->page.in_flush_list = TRUE);
|
||||
|
||||
prev_b = NULL;
|
||||
b = UT_LIST_GET_FIRST(buf_pool->flush_list);
|
||||
|
||||
while (b && b->oldest_modification > block->page.oldest_modification) {
|
||||
ut_ad(b->in_flush_list);
|
||||
prev_b = b;
|
||||
b = UT_LIST_GET_NEXT(list, b);
|
||||
/* For the most part when this function is called the flush_rbt
|
||||
should not be NULL. In a very rare boundary case it is possible
|
||||
that the flush_rbt has already been freed by the recovery thread
|
||||
before the last page was hooked up in the flush_list by the
|
||||
io-handler thread. In that case we'll just do a simple
|
||||
linear search in the else block. */
|
||||
if (buf_pool->flush_rbt) {
|
||||
|
||||
prev_b = buf_flush_insert_in_flush_rbt(&block->page);
|
||||
|
||||
} else {
|
||||
|
||||
b = UT_LIST_GET_FIRST(buf_pool->flush_list);
|
||||
|
||||
while (b && b->oldest_modification
|
||||
> block->page.oldest_modification) {
|
||||
ut_ad(b->in_flush_list);
|
||||
prev_b = b;
|
||||
b = UT_LIST_GET_NEXT(list, b);
|
||||
}
|
||||
}
|
||||
|
||||
if (prev_b == NULL) {
|
||||
|
@ -237,7 +391,6 @@ buf_flush_remove(
|
|||
ut_ad(buf_pool_mutex_own());
|
||||
ut_ad(mutex_own(buf_page_get_mutex(bpage)));
|
||||
ut_ad(bpage->in_flush_list);
|
||||
ut_d(bpage->in_flush_list = FALSE);
|
||||
|
||||
switch (buf_page_get_state(bpage)) {
|
||||
case BUF_BLOCK_ZIP_PAGE:
|
||||
|
@ -259,12 +412,78 @@ buf_flush_remove(
|
|||
break;
|
||||
}
|
||||
|
||||
/* If the flush_rbt is active then delete from it as well. */
|
||||
if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
|
||||
buf_flush_delete_from_flush_rbt(bpage);
|
||||
}
|
||||
|
||||
/* Must be done after we have removed it from the flush_rbt
|
||||
because we assert on in_flush_list in comparison function. */
|
||||
ut_d(bpage->in_flush_list = FALSE);
|
||||
|
||||
bpage->oldest_modification = 0;
|
||||
|
||||
ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->flush_list,
|
||||
ut_ad(ut_list_node_313->in_flush_list)));
|
||||
}
|
||||
|
||||
/********************************************************************//**
|
||||
Relocates a buffer control block on the flush_list.
|
||||
Note that it is assumed that the contents of bpage has already been
|
||||
copied to dpage. */
|
||||
UNIV_INTERN
|
||||
void
|
||||
buf_flush_relocate_on_flush_list(
|
||||
/*=============================*/
|
||||
buf_page_t* bpage, /*!< in/out: control block being moved */
|
||||
buf_page_t* dpage) /*!< in/out: destination block */
|
||||
{
|
||||
buf_page_t* prev;
|
||||
buf_page_t* prev_b = NULL;
|
||||
|
||||
ut_ad(buf_pool_mutex_own());
|
||||
|
||||
ut_ad(mutex_own(buf_page_get_mutex(bpage)));
|
||||
|
||||
ut_ad(bpage->in_flush_list);
|
||||
ut_ad(dpage->in_flush_list);
|
||||
|
||||
/* If recovery is active we must swap the control blocks in
|
||||
the flush_rbt as well. */
|
||||
if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
|
||||
buf_flush_delete_from_flush_rbt(bpage);
|
||||
prev_b = buf_flush_insert_in_flush_rbt(dpage);
|
||||
}
|
||||
|
||||
/* Must be done after we have removed it from the flush_rbt
|
||||
because we assert on in_flush_list in comparison function. */
|
||||
ut_d(bpage->in_flush_list = FALSE);
|
||||
|
||||
prev = UT_LIST_GET_PREV(list, bpage);
|
||||
UT_LIST_REMOVE(list, buf_pool->flush_list, bpage);
|
||||
|
||||
if (prev) {
|
||||
ut_ad(prev->in_flush_list);
|
||||
UT_LIST_INSERT_AFTER(
|
||||
list,
|
||||
buf_pool->flush_list,
|
||||
prev, dpage);
|
||||
} else {
|
||||
UT_LIST_ADD_FIRST(
|
||||
list,
|
||||
buf_pool->flush_list,
|
||||
dpage);
|
||||
}
|
||||
|
||||
/* Just an extra check. Previous in flush_list
|
||||
should be the same control block as in flush_rbt. */
|
||||
ut_a(!buf_pool->flush_rbt || prev_b == prev);
|
||||
|
||||
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
|
||||
ut_a(buf_flush_validate_low());
|
||||
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
|
||||
}
|
||||
|
||||
/********************************************************************//**
|
||||
Updates the flush system data structures when a write is completed. */
|
||||
UNIV_INTERN
|
||||
|
@ -1367,24 +1586,45 @@ ibool
|
|||
buf_flush_validate_low(void)
|
||||
/*========================*/
|
||||
{
|
||||
buf_page_t* bpage;
|
||||
buf_page_t* bpage;
|
||||
const ib_rbt_node_t* rnode = NULL;
|
||||
|
||||
UT_LIST_VALIDATE(list, buf_page_t, buf_pool->flush_list,
|
||||
ut_ad(ut_list_node_313->in_flush_list));
|
||||
|
||||
bpage = UT_LIST_GET_FIRST(buf_pool->flush_list);
|
||||
|
||||
/* If we are in recovery mode i.e.: flush_rbt != NULL
|
||||
then each block in the flush_list must also be present
|
||||
in the flush_rbt. */
|
||||
if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
|
||||
rnode = rbt_first(buf_pool->flush_rbt);
|
||||
}
|
||||
|
||||
while (bpage != NULL) {
|
||||
const ib_uint64_t om = bpage->oldest_modification;
|
||||
ut_ad(bpage->in_flush_list);
|
||||
ut_a(buf_page_in_file(bpage));
|
||||
ut_a(om > 0);
|
||||
|
||||
if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
|
||||
ut_a(rnode);
|
||||
buf_page_t* rpage = *rbt_value(buf_page_t*,
|
||||
rnode);
|
||||
ut_a(rpage);
|
||||
ut_a(rpage == bpage);
|
||||
rnode = rbt_next(buf_pool->flush_rbt, rnode);
|
||||
}
|
||||
|
||||
bpage = UT_LIST_GET_NEXT(list, bpage);
|
||||
|
||||
ut_a(!bpage || om >= bpage->oldest_modification);
|
||||
}
|
||||
|
||||
/* By this time we must have exhausted the traversal of
|
||||
flush_rbt (if active) as well. */
|
||||
ut_a(rnode == NULL);
|
||||
|
||||
return(TRUE);
|
||||
}
|
||||
|
||||
|
|
|
@ -1530,26 +1530,8 @@ alloc:
|
|||
if (b->state == BUF_BLOCK_ZIP_PAGE) {
|
||||
buf_LRU_insert_zip_clean(b);
|
||||
} else {
|
||||
buf_page_t* prev;
|
||||
|
||||
ut_ad(b->in_flush_list);
|
||||
ut_d(bpage->in_flush_list = FALSE);
|
||||
|
||||
prev = UT_LIST_GET_PREV(list, b);
|
||||
UT_LIST_REMOVE(list, buf_pool->flush_list, b);
|
||||
|
||||
if (prev) {
|
||||
ut_ad(prev->in_flush_list);
|
||||
UT_LIST_INSERT_AFTER(
|
||||
list,
|
||||
buf_pool->flush_list,
|
||||
prev, b);
|
||||
} else {
|
||||
UT_LIST_ADD_FIRST(
|
||||
list,
|
||||
buf_pool->flush_list,
|
||||
b);
|
||||
}
|
||||
/* Relocate on buf_pool->flush_list. */
|
||||
buf_flush_relocate_on_flush_list(bpage, b);
|
||||
}
|
||||
|
||||
bpage->zip.data = NULL;
|
||||
|
|
|
@ -608,14 +608,14 @@ buf_read_recv_pages(
|
|||
while (buf_pool->n_pend_reads >= recv_n_pool_free_frames / 2) {
|
||||
|
||||
os_aio_simulated_wake_handler_threads();
|
||||
os_thread_sleep(500000);
|
||||
os_thread_sleep(10000);
|
||||
|
||||
count++;
|
||||
|
||||
if (count > 100) {
|
||||
if (count > 1000) {
|
||||
fprintf(stderr,
|
||||
"InnoDB: Error: InnoDB has waited for"
|
||||
" 50 seconds for pending\n"
|
||||
" 10 seconds for pending\n"
|
||||
"InnoDB: reads to the buffer pool to"
|
||||
" be finished.\n"
|
||||
"InnoDB: Number of pending reads %lu,"
|
||||
|
|
|
@ -33,6 +33,7 @@ Created 11/5/1995 Heikki Tuuri
|
|||
#include "hash0hash.h"
|
||||
#include "ut0byte.h"
|
||||
#include "page0types.h"
|
||||
#include "ut0rbt.h"
|
||||
#ifndef UNIV_HOTBACKUP
|
||||
#include "os0proc.h"
|
||||
|
||||
|
@ -1359,6 +1360,19 @@ struct buf_pool_struct{
|
|||
/*!< this is in the set state
|
||||
when there is no flush batch
|
||||
of the given type running */
|
||||
ib_rbt_t* flush_rbt; /* !< a red-black tree is used
|
||||
exclusively during recovery to
|
||||
speed up insertions in the
|
||||
flush_list. This tree contains
|
||||
blocks in order of
|
||||
oldest_modification LSN and is
|
||||
kept in sync with the
|
||||
flush_list.
|
||||
Each member of the tree MUST
|
||||
also be on the flush_list.
|
||||
This tree is relevant only in
|
||||
recovery and is set to NULL
|
||||
once the recovery is over. */
|
||||
ulint freed_page_clock;/*!< a sequence number used
|
||||
to count the number of buffer
|
||||
blocks removed from the end of
|
||||
|
|
|
@ -40,6 +40,16 @@ buf_flush_remove(
|
|||
/*=============*/
|
||||
buf_page_t* bpage); /*!< in: pointer to the block in question */
|
||||
/********************************************************************//**
|
||||
Relocates a buffer control block on the flush_list.
|
||||
Note that it is assumed that the contents of bpage has already been
|
||||
copied to dpage. */
|
||||
UNIV_INTERN
|
||||
void
|
||||
buf_flush_relocate_on_flush_list(
|
||||
/*=============================*/
|
||||
buf_page_t* bpage, /*!< in/out: control block being moved */
|
||||
buf_page_t* dpage); /*!< in/out: destination block */
|
||||
/********************************************************************//**
|
||||
Updates the flush system data structures when a write is completed. */
|
||||
UNIV_INTERN
|
||||
void
|
||||
|
@ -139,8 +149,8 @@ how much redo the workload is generating and at what rate. */
|
|||
|
||||
struct buf_flush_stat_struct
|
||||
{
|
||||
ib_uint64_t redo; /**< amount of redo generated. */
|
||||
ulint n_flushed; /**< number of pages flushed. */
|
||||
ib_uint64_t redo; /*!< amount of redo generated. */
|
||||
ulint n_flushed; /*!< number of pages flushed. */
|
||||
};
|
||||
|
||||
/** Statistics for selecting flush rate of dirty pages. */
|
||||
|
@ -175,6 +185,22 @@ buf_flush_validate(void);
|
|||
/*====================*/
|
||||
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
|
||||
|
||||
/******************************************************************//**
|
||||
Initialize the red-black tree to speed up insertions into the flush_list
|
||||
during recovery process. Should be called at the start of recovery
|
||||
process before any page has been read/written. */
|
||||
UNIV_INTERN
|
||||
void
|
||||
buf_flush_init_flush_rbt(void);
|
||||
/*==========================*/
|
||||
|
||||
/******************************************************************//**
|
||||
Frees up the red-black tree. */
|
||||
UNIV_INTERN
|
||||
void
|
||||
buf_flush_free_flush_rbt(void);
|
||||
/*==========================*/
|
||||
|
||||
/** When buf_flush_free_margin is called, it tries to make this many blocks
|
||||
available to replacement in the free list and at the end of the LRU list (to
|
||||
make sure that a read-ahead batch can be read efficiently in a single
|
||||
|
|
309
include/ut0rbt.h
Normal file
309
include/ut0rbt.h
Normal file
|
@ -0,0 +1,309 @@
|
|||
/*****************************************************************************
|
||||
Copyright (c) 2006, 2009, Innobase Oy. All Rights Reserved.
|
||||
|
||||
This program is free software; you can redistribute it and/or modify it under
|
||||
the terms of the GNU General Public License as published by the Free Software
|
||||
Foundation; version 2 of the License.
|
||||
|
||||
This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along with
|
||||
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
|
||||
Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
|
||||
*****************************************************************************/
|
||||
|
||||
/*******************************************************************//**
|
||||
@file include/ut0rbt.h
|
||||
Red-Black tree implementation.
|
||||
|
||||
Created 2007-03-20 Sunny Bains
|
||||
************************************************************************/
|
||||
|
||||
#ifndef INNOBASE_UT0RBT_H
|
||||
#define INNOBASE_UT0RBT_H
|
||||
|
||||
#if !defined(IB_RBT_TESTING)
|
||||
#include "univ.i"
|
||||
#include "ut0mem.h"
|
||||
#else
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
|
||||
#define ut_malloc malloc
|
||||
#define ut_free free
|
||||
#define ulint unsigned long
|
||||
#define ut_a(c) assert(c)
|
||||
#define ut_error assert(0)
|
||||
#define ibool unsigned int
|
||||
#define TRUE 1
|
||||
#define FALSE 0
|
||||
#endif
|
||||
|
||||
/* Red black tree typedefs */
|
||||
typedef struct ib_rbt_struct ib_rbt_t;
|
||||
typedef struct ib_rbt_node_struct ib_rbt_node_t;
|
||||
/* FIXME: Iterator is a better name than _bound_ */
|
||||
typedef struct ib_rbt_bound_struct ib_rbt_bound_t;
|
||||
typedef void (*ib_rbt_print_node)(const ib_rbt_node_t* node);
|
||||
typedef int (*ib_rbt_compare)(const void* p1, const void* p2);
|
||||
|
||||
/* Red black tree color types */
|
||||
enum ib_rbt_color_enum {
|
||||
IB_RBT_RED,
|
||||
IB_RBT_BLACK
|
||||
};
|
||||
|
||||
typedef enum ib_rbt_color_enum ib_rbt_color_t;
|
||||
|
||||
/* Red black tree node */
|
||||
struct ib_rbt_node_struct {
|
||||
ib_rbt_color_t color; /* color of this node */
|
||||
|
||||
ib_rbt_node_t* left; /* points left child */
|
||||
ib_rbt_node_t* right; /* points right child */
|
||||
ib_rbt_node_t* parent; /* points parent node */
|
||||
|
||||
char value[1]; /* Data value */
|
||||
};
|
||||
|
||||
/* Red black tree instance.*/
|
||||
struct ib_rbt_struct {
|
||||
ib_rbt_node_t* nil; /* Black colored node that is
|
||||
used as a sentinel. This is
|
||||
pre-allocated too.*/
|
||||
|
||||
ib_rbt_node_t* root; /* Root of the tree, this is
|
||||
pre-allocated and the first
|
||||
data node is the left child.*/
|
||||
|
||||
ulint n_nodes; /* Total number of data nodes */
|
||||
|
||||
ib_rbt_compare compare; /* Fn. to use for comparison */
|
||||
ulint sizeof_value; /* Sizeof the item in bytes */
|
||||
};
|
||||
|
||||
/* The result of searching for a key in the tree, this is useful for
|
||||
a speedy lookup and insert if key doesn't exist.*/
|
||||
struct ib_rbt_bound_struct {
|
||||
const ib_rbt_node_t*
|
||||
last; /* Last node visited */
|
||||
|
||||
int result; /* Result of comparing with
|
||||
the last non-nil node that
|
||||
was visited */
|
||||
};
|
||||
|
||||
/* Size in elements (t is an rb tree instance) */
|
||||
#define rbt_size(t) (t->n_nodes)
|
||||
|
||||
/* Check whether the rb tree is empty (t is an rb tree instance) */
|
||||
#define rbt_empty(t) (rbt_size(t) == 0)
|
||||
|
||||
/* Get data value (t is the data type, n is an rb tree node instance) */
|
||||
#define rbt_value(t, n) ((t*) &n->value[0])
|
||||
|
||||
/* Compare a key with the node value (t is tree, k is key, n is node)*/
|
||||
#define rbt_compare(t, k, n) (t->compare(k, n->value))
|
||||
|
||||
/****************************************************************//**
|
||||
Free an instance of a red black tree */
|
||||
UNIV_INTERN
|
||||
void
|
||||
rbt_free(
|
||||
/*=====*/
|
||||
ib_rbt_t* tree); /*!< in: rb tree to free */
|
||||
/****************************************************************//**
|
||||
Create an instance of a red black tree
|
||||
@return rb tree instance */
|
||||
UNIV_INTERN
|
||||
ib_rbt_t*
|
||||
rbt_create(
|
||||
/*=======*/
|
||||
size_t sizeof_value, /*!< in: size in bytes */
|
||||
ib_rbt_compare compare); /*!< in: comparator */
|
||||
/****************************************************************//**
|
||||
Delete a node from the red black tree, identified by key.
|
||||
@return TRUE if success FALSE if not found */
|
||||
UNIV_INTERN
|
||||
ibool
|
||||
rbt_delete(
|
||||
/*=======*/
|
||||
ib_rbt_t* tree, /*!< in: rb tree */
|
||||
const void* key); /*!< in: key to delete */
|
||||
/****************************************************************//**
|
||||
Remove a node from the rb tree, the node is not free'd, that is the
|
||||
callers responsibility.
|
||||
@return the deleted node with the const. */
|
||||
UNIV_INTERN
|
||||
ib_rbt_node_t*
|
||||
rbt_remove_node(
|
||||
/*============*/
|
||||
ib_rbt_t* tree, /*!< in: rb tree */
|
||||
const ib_rbt_node_t*
|
||||
node); /*!< in: node to delete, this
|
||||
is a fudge and declared const
|
||||
because the caller has access
|
||||
only to const nodes.*/
|
||||
/****************************************************************//**
|
||||
Find a matching node in the rb tree.
|
||||
@return node if found else return NULL */
|
||||
UNIV_INTERN
|
||||
const ib_rbt_node_t*
|
||||
rbt_lookup(
|
||||
/*=======*/
|
||||
const ib_rbt_t* tree, /*!< in: rb tree to search */
|
||||
const void* key); /*!< in: key to lookup */
|
||||
/****************************************************************//**
|
||||
Generic insert of a value in the rb tree.
|
||||
@return inserted node */
|
||||
UNIV_INTERN
|
||||
const ib_rbt_node_t*
|
||||
rbt_insert(
|
||||
/*=======*/
|
||||
ib_rbt_t* tree, /*!< in: rb tree */
|
||||
const void* key, /*!< in: key for ordering */
|
||||
const void* value); /*!< in: data that will be
|
||||
copied to the node.*/
|
||||
/****************************************************************//**
|
||||
Add a new node to the tree, useful for data that is pre-sorted.
|
||||
@return appended node */
|
||||
UNIV_INTERN
|
||||
const ib_rbt_node_t*
|
||||
rbt_add_node(
|
||||
/*=========*/
|
||||
ib_rbt_t* tree, /*!< in: rb tree */
|
||||
ib_rbt_bound_t* parent, /*!< in: parent */
|
||||
const void* value); /*!< in: this value is copied
|
||||
to the node */
|
||||
/****************************************************************//**
|
||||
Return the left most data node in the tree
|
||||
@return left most node */
|
||||
UNIV_INTERN
|
||||
const ib_rbt_node_t*
|
||||
rbt_first(
|
||||
/*======*/
|
||||
const ib_rbt_t* tree); /*!< in: rb tree */
|
||||
/****************************************************************//**
|
||||
Return the right most data node in the tree
|
||||
@return right most node */
|
||||
UNIV_INTERN
|
||||
const ib_rbt_node_t*
|
||||
rbt_last(
|
||||
/*=====*/
|
||||
const ib_rbt_t* tree); /*!< in: rb tree */
|
||||
/****************************************************************//**
|
||||
Return the next node from current.
|
||||
@return successor node to current that is passed in. */
|
||||
UNIV_INTERN
|
||||
const ib_rbt_node_t*
|
||||
rbt_next(
|
||||
/*=====*/
|
||||
const ib_rbt_t* tree, /*!< in: rb tree */
|
||||
const ib_rbt_node_t* /*!< in: current node */
|
||||
current);
|
||||
/****************************************************************//**
|
||||
Return the prev node from current.
|
||||
@return precedessor node to current that is passed in */
|
||||
UNIV_INTERN
|
||||
const ib_rbt_node_t*
|
||||
rbt_prev(
|
||||
/*=====*/
|
||||
const ib_rbt_t* tree, /*!< in: rb tree */
|
||||
const ib_rbt_node_t* /*!< in: current node */
|
||||
current);
|
||||
/****************************************************************//**
|
||||
Find the node that has the lowest key that is >= key.
|
||||
@return node that satisfies the lower bound constraint or NULL */
|
||||
UNIV_INTERN
|
||||
const ib_rbt_node_t*
|
||||
rbt_lower_bound(
|
||||
/*============*/
|
||||
const ib_rbt_t* tree, /*!< in: rb tree */
|
||||
const void* key); /*!< in: key to search */
|
||||
/****************************************************************//**
|
||||
Find the node that has the greatest key that is <= key.
|
||||
@return node that satisifies the upper bound constraint or NULL */
|
||||
UNIV_INTERN
|
||||
const ib_rbt_node_t*
|
||||
rbt_upper_bound(
|
||||
/*============*/
|
||||
const ib_rbt_t* tree, /*!< in: rb tree */
|
||||
const void* key); /*!< in: key to search */
|
||||
/****************************************************************//**
|
||||
Search for the key, a node will be retuned in parent.last, whether it
|
||||
was found or not. If not found then parent.last will contain the
|
||||
parent node for the possibly new key otherwise the matching node.
|
||||
@return result of last comparison */
|
||||
UNIV_INTERN
|
||||
int
|
||||
rbt_search(
|
||||
/*=======*/
|
||||
const ib_rbt_t* tree, /*!< in: rb tree */
|
||||
ib_rbt_bound_t* parent, /*!< in: search bounds */
|
||||
const void* key); /*!< in: key to search */
|
||||
/****************************************************************//**
|
||||
Search for the key, a node will be retuned in parent.last, whether it
|
||||
was found or not. If not found then parent.last will contain the
|
||||
parent node for the possibly new key otherwise the matching node.
|
||||
@return result of last comparison */
|
||||
UNIV_INTERN
|
||||
int
|
||||
rbt_search_cmp(
|
||||
/*===========*/
|
||||
const ib_rbt_t* tree, /*!< in: rb tree */
|
||||
ib_rbt_bound_t* parent, /*!< in: search bounds */
|
||||
const void* key, /*!< in: key to search */
|
||||
ib_rbt_compare compare); /*!< in: comparator */
|
||||
/****************************************************************//**
|
||||
Clear the tree, deletes (and free's) all the nodes. */
|
||||
UNIV_INTERN
|
||||
void
|
||||
rbt_clear(
|
||||
/*======*/
|
||||
ib_rbt_t* tree); /*!< in: rb tree */
|
||||
/****************************************************************//**
|
||||
Merge the node from dst into src. Return the number of nodes merged.
|
||||
@return no. of recs merged */
|
||||
UNIV_INTERN
|
||||
ulint
|
||||
rbt_merge_uniq(
|
||||
/*===========*/
|
||||
ib_rbt_t* dst, /*!< in: dst rb tree */
|
||||
const ib_rbt_t* src); /*!< in: src rb tree */
|
||||
/****************************************************************//**
|
||||
Merge the node from dst into src. Return the number of nodes merged.
|
||||
Delete the nodes from src after copying node to dst. As a side effect
|
||||
the duplicates will be left untouched in the src, since we don't support
|
||||
duplicates (yet). NOTE: src and dst must be similar, the function doesn't
|
||||
check for this condition (yet).
|
||||
@return no. of recs merged */
|
||||
UNIV_INTERN
|
||||
ulint
|
||||
rbt_merge_uniq_destructive(
|
||||
/*=======================*/
|
||||
ib_rbt_t* dst, /*!< in: dst rb tree */
|
||||
ib_rbt_t* src); /*!< in: src rb tree */
|
||||
/****************************************************************//**
|
||||
Verify the integrity of the RB tree. For debugging. 0 failure else height
|
||||
of tree (in count of black nodes).
|
||||
@return TRUE if OK FALSE if tree invalid. */
|
||||
UNIV_INTERN
|
||||
ibool
|
||||
rbt_validate(
|
||||
/*=========*/
|
||||
const ib_rbt_t* tree); /*!< in: tree to validate */
|
||||
/****************************************************************//**
|
||||
Iterate over the tree in depth first order. */
|
||||
UNIV_INTERN
|
||||
void
|
||||
rbt_print(
|
||||
/*======*/
|
||||
const ib_rbt_t* tree, /*!< in: tree to traverse */
|
||||
ib_rbt_print_node print); /*!< in: print function */
|
||||
|
||||
#endif /* INNOBASE_UT0RBT_H */
|
|
@ -138,7 +138,9 @@ UNIV_INTERN ulint recv_max_parsed_page_no;
|
|||
/** This many frames must be left free in the buffer pool when we scan
|
||||
the log and store the scanned log records in the buffer pool: we will
|
||||
use these free frames to read in pages when we start applying the
|
||||
log records to the database. */
|
||||
log records to the database.
|
||||
This is the default value. If the actual size of the buffer pool is
|
||||
larger than 10 MB we'll set this value to 512. */
|
||||
UNIV_INTERN ulint recv_n_pool_free_frames;
|
||||
|
||||
/** The maximum lsn we see for a page during the recovery process. If this
|
||||
|
@ -294,6 +296,12 @@ recv_sys_init(
|
|||
return;
|
||||
}
|
||||
|
||||
/* Initialize red-black tree for fast insertions into the
|
||||
flush_list during recovery process.
|
||||
As this initialization is done while holding the buffer pool
|
||||
mutex we perform it before acquiring recv_sys->mutex. */
|
||||
buf_flush_init_flush_rbt();
|
||||
|
||||
mutex_enter(&(recv_sys->mutex));
|
||||
|
||||
#ifndef UNIV_HOTBACKUP
|
||||
|
@ -303,6 +311,12 @@ recv_sys_init(
|
|||
recv_is_from_backup = TRUE;
|
||||
#endif /* !UNIV_HOTBACKUP */
|
||||
|
||||
/* Set appropriate value of recv_n_pool_free_frames. */
|
||||
if (buf_pool_get_curr_size() >= (10 * 1024 * 1024)) {
|
||||
/* Buffer pool of size greater than 10 MB. */
|
||||
recv_n_pool_free_frames = 512;
|
||||
}
|
||||
|
||||
recv_sys->buf = ut_malloc(RECV_PARSING_BUF_SIZE);
|
||||
recv_sys->len = 0;
|
||||
recv_sys->recovered_offset = 0;
|
||||
|
@ -372,6 +386,9 @@ recv_sys_debug_free(void)
|
|||
recv_sys->last_block_buf_start = NULL;
|
||||
|
||||
mutex_exit(&(recv_sys->mutex));
|
||||
|
||||
/* Free up the flush_rbt. */
|
||||
buf_flush_free_flush_rbt();
|
||||
}
|
||||
# endif /* UNIV_LOG_DEBUG */
|
||||
|
||||
|
|
1249
ut/ut0rbt.c
Normal file
1249
ut/ut0rbt.c
Normal file
File diff suppressed because it is too large
Load diff
Loading…
Reference in a new issue