2009-05-27 15:15:59 +05:30
|
|
|
/*****************************************************************************
|
|
|
|
|
2010-04-01 17:01:22 +04:00
|
|
|
Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved.
|
2009-05-27 15:15:59 +05:30
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify it under
|
|
|
|
the terms of the GNU General Public License as published by the Free Software
|
|
|
|
Foundation; version 2 of the License.
|
|
|
|
|
|
|
|
This program is distributed in the hope that it will be useful, but WITHOUT
|
|
|
|
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
|
|
|
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU General Public License along with
|
|
|
|
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
|
|
|
|
Place, Suite 330, Boston, MA 02111-1307 USA
|
|
|
|
|
|
|
|
*****************************************************************************/
|
|
|
|
|
|
|
|
/**************************************************//**
|
|
|
|
@file buf/buf0rea.c
|
|
|
|
The database buffer read
|
|
|
|
|
|
|
|
Created 11/5/1995 Heikki Tuuri
|
|
|
|
*******************************************************/
|
|
|
|
|
|
|
|
#include "buf0rea.h"
|
|
|
|
|
|
|
|
#include "fil0fil.h"
|
|
|
|
#include "mtr0mtr.h"
|
|
|
|
|
|
|
|
#include "buf0buf.h"
|
|
|
|
#include "buf0flu.h"
|
|
|
|
#include "buf0lru.h"
|
|
|
|
#include "ibuf0ibuf.h"
|
|
|
|
#include "log0recv.h"
|
|
|
|
#include "trx0sys.h"
|
|
|
|
#include "os0file.h"
|
|
|
|
#include "srv0start.h"
|
|
|
|
#include "srv0srv.h"
|
|
|
|
|
|
|
|
/** The linear read-ahead area size */
|
|
|
|
#define BUF_READ_AHEAD_LINEAR_AREA BUF_READ_AHEAD_AREA
|
|
|
|
|
|
|
|
/** If there are buf_pool->curr_size per the number below pending reads, then
|
|
|
|
read-ahead is not done: this is to prevent flooding the buffer pool with
|
|
|
|
i/o-fixed buffer blocks */
|
|
|
|
#define BUF_READ_AHEAD_PEND_LIMIT 2
|
|
|
|
|
|
|
|
/********************************************************************//**
|
|
|
|
Low-level function which reads a page asynchronously from a file to the
|
|
|
|
buffer buf_pool if it is not already there, in which case does nothing.
|
|
|
|
Sets the io_fix flag and sets an exclusive lock on the buffer frame. The
|
|
|
|
flag is cleared and the x-lock released by an i/o-handler thread.
|
|
|
|
@return 1 if a read request was queued, 0 if the page already resided
|
|
|
|
in buf_pool, or if the page is in the doublewrite buffer blocks in
|
|
|
|
which case it is never read into the pool, or if the tablespace does
|
Applying InnoDB Plugin 1.0.5 snapshot, part 4
From revision r5703 to r5716
Detailed revision comments:
r5703 | marko | 2009-08-27 02:25:00 -0500 (Thu, 27 Aug 2009) | 41 lines
branches/zip: Replace the constant 3/8 ratio that controls the LRU_old
size with the settable global variable innodb_old_blocks_pct. The
minimum and maximum values are 5 and 95 per cent, respectively. The
default is 100*3/8, in line with the old behavior.
ut_time_ms(): New utility function, to return the current time in
milliseconds. TODO: Is there a more efficient timestamp function, such
as rdtsc divided by a power of two?
buf_LRU_old_threshold_ms: New variable, corresponding to
innodb_old_blocks_time. The value 0 is the default behaviour: no
timeout before making blocks 'new'.
bpage->accessed, bpage->LRU_position, buf_pool->ulint_clock: Remove.
bpage->access_time: New field, replacing bpage->accessed. Protected by
buf_pool_mutex instead of bpage->mutex. Updated when a page is created
or accessed the first time in the buffer pool.
buf_LRU_old_ratio, innobase_old_blocks_pct: New variables,
corresponding to innodb_old_blocks_pct
buf_LRU_old_ratio_update(), innobase_old_blocks_pct_update(): Update
functions for buf_LRU_old_ratio, innobase_old_blocks_pct.
buf_page_peek_if_too_old(): Compare ut_time_ms() to bpage->access_time
if buf_LRU_old_threshold_ms && bpage->old. Else observe
buf_LRU_old_ratio and bpage->freed_page_clock.
buf_pool_t: Add n_pages_made_young, n_pages_not_made_young,
n_pages_made_young_old, n_pages_not_made_young, for statistics.
buf_print(): Display buf_pool->n_pages_made_young,
buf_pool->n_pages_not_made_young. This function is only for crash
diagnostics.
buf_print_io(): Display buf_pool->LRU_old_len and quantities derived
from buf_pool->n_pages_made_young, buf_pool->n_pages_not_made_young.
This function is invoked by SHOW ENGINE INNODB STATUS.
rb://129 approved by Heikki Tuuri. This addresses Bug #45015.
r5704 | marko | 2009-08-27 03:31:17 -0500 (Thu, 27 Aug 2009) | 32 lines
branches/zip: Fix a critical bug in fast index creation that could
corrupt the created indexes.
row_merge(): Make "half" an in/out parameter. Determine the offset of
half the output file. Copy the last blocks record-by-record instead of
block-by-block, so that the records can be counted. Check that the
input and output have matching n_rec.
row_merge_sort(): Do not assume that two blocks of size N are merged
into a block of size 2*N. The output block can be shorter than the
input if the last page of each input block is almost empty. Use an
accurate termination condition, based on the "half" computed by
row_merge().
row_merge_read(), row_merge_write(), row_merge_blocks(): Add debug output.
merge_file_t, row_merge_file_create(): Add n_rec, the number of records
in the merge file.
row_merge_read_clustered_index(): Update n_rec.
row_merge_blocks(): Update and check n_rec.
row_merge_blocks_copy(): New function, for copying the last blocks in
row_merge(). Update and check n_rec.
This bug was discovered with a user-supplied test case that creates an
index where the initial temporary file is 249 one-megabyte blocks and
the merged files become smaller. In the test, possible merge record
sizes are 10, 18, and 26 bytes.
rb://150 approved by Sunny Bains. This addresses Issue #320.
r5705 | marko | 2009-08-27 06:56:24 -0500 (Thu, 27 Aug 2009) | 11 lines
branches/zip: dict_index_find_cols(): On column name lookup failure,
return DB_CORRUPTION (HA_ERR_CRASHED) instead of abnormally
terminating the server. Also, disable the previously added diagnostic
output to the error log, because mysql-test-run does not like extra
output in the error log. (Bug #44571)
dict_index_add_to_cache(): Handle errors from dict_index_find_cols().
mysql-test/innodb_bug44571.test: A test case for triggering the bug.
rb://135 approved by Sunny Bains.
r5706 | inaam | 2009-08-27 11:00:27 -0500 (Thu, 27 Aug 2009) | 20 lines
branches/zip rb://147
Done away with following two status variables:
innodb_buffer_pool_read_ahead_rnd
innodb_buffer_pool_read_ahead_seq
Introduced two new status variables:
innodb_buffer_pool_read_ahead = number of pages read as part of
readahead since server startup
innodb_buffer_pool_read_ahead_evicted = number of pages that are read
in as readahead but were evicted before ever being accessed since
server startup i.e.: a measure of how badly our readahead is
performing
SHOW INNODB STATUS will show two extra numbers in buffer pool section:
pages read ahead/sec and pages evicted without access/sec
Approved by: Marko
r5707 | inaam | 2009-08-27 11:20:35 -0500 (Thu, 27 Aug 2009) | 6 lines
branches/zip
Remove unused macros as we erased the random readahead code in r5703.
Also fixed some comments.
r5708 | inaam | 2009-08-27 17:43:32 -0500 (Thu, 27 Aug 2009) | 4 lines
branches/zip
Remove redundant TRUE : FALSE from the return statement
r5709 | inaam | 2009-08-28 01:22:46 -0500 (Fri, 28 Aug 2009) | 5 lines
branches/zip rb://152
Disable display of deprecated parameter innodb_file_io_threads in
'show variables'.
r5714 | marko | 2009-08-31 01:10:10 -0500 (Mon, 31 Aug 2009) | 5 lines
branches/zip: buf_chunk_not_freed(): Do not acquire block->mutex unless
block->page.state == BUF_BLOCK_FILE_PAGE. Check that block->page.state
makes sense.
Approved by Sunny Bains over the IM.
r5716 | vasil | 2009-08-31 02:47:49 -0500 (Mon, 31 Aug 2009) | 9 lines
branches/zip:
Fix Bug#46718 InnoDB plugin incompatible with gcc 4.1 (at least: on PPC): "Undefined symbol"
by implementing our own check in plug.in instead of using the result from
the check from MySQL because it is insufficient.
Approved by: Marko (rb://154)
2009-10-08 16:58:37 +05:30
|
|
|
not exist or is being dropped
|
|
|
|
@return 1 if read request is issued. 0 if it is not */
|
2009-05-27 15:15:59 +05:30
|
|
|
static
|
|
|
|
ulint
|
|
|
|
buf_read_page_low(
|
|
|
|
/*==============*/
|
|
|
|
ulint* err, /*!< out: DB_SUCCESS or DB_TABLESPACE_DELETED if we are
|
|
|
|
trying to read from a non-existent tablespace, or a
|
|
|
|
tablespace which is just now being dropped */
|
|
|
|
ibool sync, /*!< in: TRUE if synchronous aio is desired */
|
|
|
|
ulint mode, /*!< in: BUF_READ_IBUF_PAGES_ONLY, ...,
|
|
|
|
ORed to OS_AIO_SIMULATED_WAKE_LATER (see below
|
|
|
|
at read-ahead functions) */
|
|
|
|
ulint space, /*!< in: space id */
|
|
|
|
ulint zip_size,/*!< in: compressed page size, or 0 */
|
|
|
|
ibool unzip, /*!< in: TRUE=request uncompressed page */
|
|
|
|
ib_int64_t tablespace_version, /*!< in: if the space memory object has
|
|
|
|
this timestamp different from what we are giving here,
|
|
|
|
treat the tablespace as dropped; this is a timestamp we
|
|
|
|
use to stop dangling page reads from a tablespace
|
|
|
|
which we have DISCARDed + IMPORTed back */
|
|
|
|
ulint offset) /*!< in: page number */
|
|
|
|
{
|
|
|
|
buf_page_t* bpage;
|
|
|
|
ulint wake_later;
|
|
|
|
|
|
|
|
*err = DB_SUCCESS;
|
|
|
|
|
|
|
|
wake_later = mode & OS_AIO_SIMULATED_WAKE_LATER;
|
|
|
|
mode = mode & ~OS_AIO_SIMULATED_WAKE_LATER;
|
|
|
|
|
|
|
|
if (trx_doublewrite && space == TRX_SYS_SPACE
|
|
|
|
&& ( (offset >= trx_doublewrite->block1
|
|
|
|
&& offset < trx_doublewrite->block1
|
|
|
|
+ TRX_SYS_DOUBLEWRITE_BLOCK_SIZE)
|
|
|
|
|| (offset >= trx_doublewrite->block2
|
|
|
|
&& offset < trx_doublewrite->block2
|
|
|
|
+ TRX_SYS_DOUBLEWRITE_BLOCK_SIZE))) {
|
|
|
|
ut_print_timestamp(stderr);
|
|
|
|
fprintf(stderr,
|
|
|
|
" InnoDB: Warning: trying to read"
|
|
|
|
" doublewrite buffer page %lu\n",
|
|
|
|
(ulong) offset);
|
|
|
|
|
|
|
|
return(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (ibuf_bitmap_page(zip_size, offset)
|
|
|
|
|| trx_sys_hdr_page(space, offset)) {
|
|
|
|
|
|
|
|
/* Trx sys header is so low in the latching order that we play
|
|
|
|
safe and do not leave the i/o-completion to an asynchronous
|
|
|
|
i/o-thread. Ibuf bitmap pages must always be read with
|
|
|
|
syncronous i/o, to make sure they do not get involved in
|
|
|
|
thread deadlocks. */
|
|
|
|
|
|
|
|
sync = TRUE;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* The following call will also check if the tablespace does not exist
|
|
|
|
or is being dropped; if we succeed in initing the page in the buffer
|
|
|
|
pool for read, then DISCARD cannot proceed until the read has
|
|
|
|
completed */
|
|
|
|
bpage = buf_page_init_for_read(err, mode, space, zip_size, unzip,
|
|
|
|
tablespace_version, offset);
|
|
|
|
if (bpage == NULL) {
|
|
|
|
|
|
|
|
return(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
#ifdef UNIV_DEBUG
|
|
|
|
if (buf_debug_prints) {
|
|
|
|
fprintf(stderr,
|
|
|
|
"Posting read request for page %lu, sync %lu\n",
|
|
|
|
(ulong) offset,
|
|
|
|
(ulong) sync);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
ut_ad(buf_page_in_file(bpage));
|
|
|
|
|
|
|
|
if (zip_size) {
|
|
|
|
*err = fil_io(OS_FILE_READ | wake_later,
|
|
|
|
sync, space, zip_size, offset, 0, zip_size,
|
|
|
|
bpage->zip.data, bpage);
|
|
|
|
} else {
|
|
|
|
ut_a(buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE);
|
|
|
|
|
|
|
|
*err = fil_io(OS_FILE_READ | wake_later,
|
|
|
|
sync, space, 0, offset, 0, UNIV_PAGE_SIZE,
|
|
|
|
((buf_block_t*) bpage)->frame, bpage);
|
|
|
|
}
|
|
|
|
ut_a(*err == DB_SUCCESS);
|
|
|
|
|
|
|
|
if (sync) {
|
|
|
|
/* The i/o is already completed when we arrive from
|
|
|
|
fil_read */
|
|
|
|
buf_page_io_complete(bpage);
|
|
|
|
}
|
|
|
|
|
|
|
|
return(1);
|
|
|
|
}
|
|
|
|
|
|
|
|
/********************************************************************//**
|
|
|
|
High-level function which reads a page asynchronously from a file to the
|
|
|
|
buffer buf_pool if it is not already there. Sets the io_fix flag and sets
|
|
|
|
an exclusive lock on the buffer frame. The flag is cleared and the x-lock
|
Applying InnoDB Plugin 1.0.5 snapshot, part 4
From revision r5703 to r5716
Detailed revision comments:
r5703 | marko | 2009-08-27 02:25:00 -0500 (Thu, 27 Aug 2009) | 41 lines
branches/zip: Replace the constant 3/8 ratio that controls the LRU_old
size with the settable global variable innodb_old_blocks_pct. The
minimum and maximum values are 5 and 95 per cent, respectively. The
default is 100*3/8, in line with the old behavior.
ut_time_ms(): New utility function, to return the current time in
milliseconds. TODO: Is there a more efficient timestamp function, such
as rdtsc divided by a power of two?
buf_LRU_old_threshold_ms: New variable, corresponding to
innodb_old_blocks_time. The value 0 is the default behaviour: no
timeout before making blocks 'new'.
bpage->accessed, bpage->LRU_position, buf_pool->ulint_clock: Remove.
bpage->access_time: New field, replacing bpage->accessed. Protected by
buf_pool_mutex instead of bpage->mutex. Updated when a page is created
or accessed the first time in the buffer pool.
buf_LRU_old_ratio, innobase_old_blocks_pct: New variables,
corresponding to innodb_old_blocks_pct
buf_LRU_old_ratio_update(), innobase_old_blocks_pct_update(): Update
functions for buf_LRU_old_ratio, innobase_old_blocks_pct.
buf_page_peek_if_too_old(): Compare ut_time_ms() to bpage->access_time
if buf_LRU_old_threshold_ms && bpage->old. Else observe
buf_LRU_old_ratio and bpage->freed_page_clock.
buf_pool_t: Add n_pages_made_young, n_pages_not_made_young,
n_pages_made_young_old, n_pages_not_made_young, for statistics.
buf_print(): Display buf_pool->n_pages_made_young,
buf_pool->n_pages_not_made_young. This function is only for crash
diagnostics.
buf_print_io(): Display buf_pool->LRU_old_len and quantities derived
from buf_pool->n_pages_made_young, buf_pool->n_pages_not_made_young.
This function is invoked by SHOW ENGINE INNODB STATUS.
rb://129 approved by Heikki Tuuri. This addresses Bug #45015.
r5704 | marko | 2009-08-27 03:31:17 -0500 (Thu, 27 Aug 2009) | 32 lines
branches/zip: Fix a critical bug in fast index creation that could
corrupt the created indexes.
row_merge(): Make "half" an in/out parameter. Determine the offset of
half the output file. Copy the last blocks record-by-record instead of
block-by-block, so that the records can be counted. Check that the
input and output have matching n_rec.
row_merge_sort(): Do not assume that two blocks of size N are merged
into a block of size 2*N. The output block can be shorter than the
input if the last page of each input block is almost empty. Use an
accurate termination condition, based on the "half" computed by
row_merge().
row_merge_read(), row_merge_write(), row_merge_blocks(): Add debug output.
merge_file_t, row_merge_file_create(): Add n_rec, the number of records
in the merge file.
row_merge_read_clustered_index(): Update n_rec.
row_merge_blocks(): Update and check n_rec.
row_merge_blocks_copy(): New function, for copying the last blocks in
row_merge(). Update and check n_rec.
This bug was discovered with a user-supplied test case that creates an
index where the initial temporary file is 249 one-megabyte blocks and
the merged files become smaller. In the test, possible merge record
sizes are 10, 18, and 26 bytes.
rb://150 approved by Sunny Bains. This addresses Issue #320.
r5705 | marko | 2009-08-27 06:56:24 -0500 (Thu, 27 Aug 2009) | 11 lines
branches/zip: dict_index_find_cols(): On column name lookup failure,
return DB_CORRUPTION (HA_ERR_CRASHED) instead of abnormally
terminating the server. Also, disable the previously added diagnostic
output to the error log, because mysql-test-run does not like extra
output in the error log. (Bug #44571)
dict_index_add_to_cache(): Handle errors from dict_index_find_cols().
mysql-test/innodb_bug44571.test: A test case for triggering the bug.
rb://135 approved by Sunny Bains.
r5706 | inaam | 2009-08-27 11:00:27 -0500 (Thu, 27 Aug 2009) | 20 lines
branches/zip rb://147
Done away with following two status variables:
innodb_buffer_pool_read_ahead_rnd
innodb_buffer_pool_read_ahead_seq
Introduced two new status variables:
innodb_buffer_pool_read_ahead = number of pages read as part of
readahead since server startup
innodb_buffer_pool_read_ahead_evicted = number of pages that are read
in as readahead but were evicted before ever being accessed since
server startup i.e.: a measure of how badly our readahead is
performing
SHOW INNODB STATUS will show two extra numbers in buffer pool section:
pages read ahead/sec and pages evicted without access/sec
Approved by: Marko
r5707 | inaam | 2009-08-27 11:20:35 -0500 (Thu, 27 Aug 2009) | 6 lines
branches/zip
Remove unused macros as we erased the random readahead code in r5703.
Also fixed some comments.
r5708 | inaam | 2009-08-27 17:43:32 -0500 (Thu, 27 Aug 2009) | 4 lines
branches/zip
Remove redundant TRUE : FALSE from the return statement
r5709 | inaam | 2009-08-28 01:22:46 -0500 (Fri, 28 Aug 2009) | 5 lines
branches/zip rb://152
Disable display of deprecated parameter innodb_file_io_threads in
'show variables'.
r5714 | marko | 2009-08-31 01:10:10 -0500 (Mon, 31 Aug 2009) | 5 lines
branches/zip: buf_chunk_not_freed(): Do not acquire block->mutex unless
block->page.state == BUF_BLOCK_FILE_PAGE. Check that block->page.state
makes sense.
Approved by Sunny Bains over the IM.
r5716 | vasil | 2009-08-31 02:47:49 -0500 (Mon, 31 Aug 2009) | 9 lines
branches/zip:
Fix Bug#46718 InnoDB plugin incompatible with gcc 4.1 (at least: on PPC): "Undefined symbol"
by implementing our own check in plug.in instead of using the result from
the check from MySQL because it is insufficient.
Approved by: Marko (rb://154)
2009-10-08 16:58:37 +05:30
|
|
|
released by the i/o-handler thread.
|
|
|
|
@return TRUE if page has been read in, FALSE in case of failure */
|
2009-05-27 15:15:59 +05:30
|
|
|
UNIV_INTERN
|
Applying InnoDB Plugin 1.0.5 snapshot, part 4
From revision r5703 to r5716
Detailed revision comments:
r5703 | marko | 2009-08-27 02:25:00 -0500 (Thu, 27 Aug 2009) | 41 lines
branches/zip: Replace the constant 3/8 ratio that controls the LRU_old
size with the settable global variable innodb_old_blocks_pct. The
minimum and maximum values are 5 and 95 per cent, respectively. The
default is 100*3/8, in line with the old behavior.
ut_time_ms(): New utility function, to return the current time in
milliseconds. TODO: Is there a more efficient timestamp function, such
as rdtsc divided by a power of two?
buf_LRU_old_threshold_ms: New variable, corresponding to
innodb_old_blocks_time. The value 0 is the default behaviour: no
timeout before making blocks 'new'.
bpage->accessed, bpage->LRU_position, buf_pool->ulint_clock: Remove.
bpage->access_time: New field, replacing bpage->accessed. Protected by
buf_pool_mutex instead of bpage->mutex. Updated when a page is created
or accessed the first time in the buffer pool.
buf_LRU_old_ratio, innobase_old_blocks_pct: New variables,
corresponding to innodb_old_blocks_pct
buf_LRU_old_ratio_update(), innobase_old_blocks_pct_update(): Update
functions for buf_LRU_old_ratio, innobase_old_blocks_pct.
buf_page_peek_if_too_old(): Compare ut_time_ms() to bpage->access_time
if buf_LRU_old_threshold_ms && bpage->old. Else observe
buf_LRU_old_ratio and bpage->freed_page_clock.
buf_pool_t: Add n_pages_made_young, n_pages_not_made_young,
n_pages_made_young_old, n_pages_not_made_young, for statistics.
buf_print(): Display buf_pool->n_pages_made_young,
buf_pool->n_pages_not_made_young. This function is only for crash
diagnostics.
buf_print_io(): Display buf_pool->LRU_old_len and quantities derived
from buf_pool->n_pages_made_young, buf_pool->n_pages_not_made_young.
This function is invoked by SHOW ENGINE INNODB STATUS.
rb://129 approved by Heikki Tuuri. This addresses Bug #45015.
r5704 | marko | 2009-08-27 03:31:17 -0500 (Thu, 27 Aug 2009) | 32 lines
branches/zip: Fix a critical bug in fast index creation that could
corrupt the created indexes.
row_merge(): Make "half" an in/out parameter. Determine the offset of
half the output file. Copy the last blocks record-by-record instead of
block-by-block, so that the records can be counted. Check that the
input and output have matching n_rec.
row_merge_sort(): Do not assume that two blocks of size N are merged
into a block of size 2*N. The output block can be shorter than the
input if the last page of each input block is almost empty. Use an
accurate termination condition, based on the "half" computed by
row_merge().
row_merge_read(), row_merge_write(), row_merge_blocks(): Add debug output.
merge_file_t, row_merge_file_create(): Add n_rec, the number of records
in the merge file.
row_merge_read_clustered_index(): Update n_rec.
row_merge_blocks(): Update and check n_rec.
row_merge_blocks_copy(): New function, for copying the last blocks in
row_merge(). Update and check n_rec.
This bug was discovered with a user-supplied test case that creates an
index where the initial temporary file is 249 one-megabyte blocks and
the merged files become smaller. In the test, possible merge record
sizes are 10, 18, and 26 bytes.
rb://150 approved by Sunny Bains. This addresses Issue #320.
r5705 | marko | 2009-08-27 06:56:24 -0500 (Thu, 27 Aug 2009) | 11 lines
branches/zip: dict_index_find_cols(): On column name lookup failure,
return DB_CORRUPTION (HA_ERR_CRASHED) instead of abnormally
terminating the server. Also, disable the previously added diagnostic
output to the error log, because mysql-test-run does not like extra
output in the error log. (Bug #44571)
dict_index_add_to_cache(): Handle errors from dict_index_find_cols().
mysql-test/innodb_bug44571.test: A test case for triggering the bug.
rb://135 approved by Sunny Bains.
r5706 | inaam | 2009-08-27 11:00:27 -0500 (Thu, 27 Aug 2009) | 20 lines
branches/zip rb://147
Done away with following two status variables:
innodb_buffer_pool_read_ahead_rnd
innodb_buffer_pool_read_ahead_seq
Introduced two new status variables:
innodb_buffer_pool_read_ahead = number of pages read as part of
readahead since server startup
innodb_buffer_pool_read_ahead_evicted = number of pages that are read
in as readahead but were evicted before ever being accessed since
server startup i.e.: a measure of how badly our readahead is
performing
SHOW INNODB STATUS will show two extra numbers in buffer pool section:
pages read ahead/sec and pages evicted without access/sec
Approved by: Marko
r5707 | inaam | 2009-08-27 11:20:35 -0500 (Thu, 27 Aug 2009) | 6 lines
branches/zip
Remove unused macros as we erased the random readahead code in r5703.
Also fixed some comments.
r5708 | inaam | 2009-08-27 17:43:32 -0500 (Thu, 27 Aug 2009) | 4 lines
branches/zip
Remove redundant TRUE : FALSE from the return statement
r5709 | inaam | 2009-08-28 01:22:46 -0500 (Fri, 28 Aug 2009) | 5 lines
branches/zip rb://152
Disable display of deprecated parameter innodb_file_io_threads in
'show variables'.
r5714 | marko | 2009-08-31 01:10:10 -0500 (Mon, 31 Aug 2009) | 5 lines
branches/zip: buf_chunk_not_freed(): Do not acquire block->mutex unless
block->page.state == BUF_BLOCK_FILE_PAGE. Check that block->page.state
makes sense.
Approved by Sunny Bains over the IM.
r5716 | vasil | 2009-08-31 02:47:49 -0500 (Mon, 31 Aug 2009) | 9 lines
branches/zip:
Fix Bug#46718 InnoDB plugin incompatible with gcc 4.1 (at least: on PPC): "Undefined symbol"
by implementing our own check in plug.in instead of using the result from
the check from MySQL because it is insufficient.
Approved by: Marko (rb://154)
2009-10-08 16:58:37 +05:30
|
|
|
ibool
|
2009-05-27 15:15:59 +05:30
|
|
|
buf_read_page(
|
|
|
|
/*==========*/
|
|
|
|
ulint space, /*!< in: space id */
|
|
|
|
ulint zip_size,/*!< in: compressed page size in bytes, or 0 */
|
|
|
|
ulint offset) /*!< in: page number */
|
|
|
|
{
|
|
|
|
ib_int64_t tablespace_version;
|
|
|
|
ulint count;
|
|
|
|
ulint err;
|
|
|
|
|
|
|
|
tablespace_version = fil_space_get_version(space);
|
|
|
|
|
|
|
|
/* We do the i/o in the synchronous aio mode to save thread
|
|
|
|
switches: hence TRUE */
|
|
|
|
|
Applying InnoDB Plugin 1.0.5 snapshot, part 4
From revision r5703 to r5716
Detailed revision comments:
r5703 | marko | 2009-08-27 02:25:00 -0500 (Thu, 27 Aug 2009) | 41 lines
branches/zip: Replace the constant 3/8 ratio that controls the LRU_old
size with the settable global variable innodb_old_blocks_pct. The
minimum and maximum values are 5 and 95 per cent, respectively. The
default is 100*3/8, in line with the old behavior.
ut_time_ms(): New utility function, to return the current time in
milliseconds. TODO: Is there a more efficient timestamp function, such
as rdtsc divided by a power of two?
buf_LRU_old_threshold_ms: New variable, corresponding to
innodb_old_blocks_time. The value 0 is the default behaviour: no
timeout before making blocks 'new'.
bpage->accessed, bpage->LRU_position, buf_pool->ulint_clock: Remove.
bpage->access_time: New field, replacing bpage->accessed. Protected by
buf_pool_mutex instead of bpage->mutex. Updated when a page is created
or accessed the first time in the buffer pool.
buf_LRU_old_ratio, innobase_old_blocks_pct: New variables,
corresponding to innodb_old_blocks_pct
buf_LRU_old_ratio_update(), innobase_old_blocks_pct_update(): Update
functions for buf_LRU_old_ratio, innobase_old_blocks_pct.
buf_page_peek_if_too_old(): Compare ut_time_ms() to bpage->access_time
if buf_LRU_old_threshold_ms && bpage->old. Else observe
buf_LRU_old_ratio and bpage->freed_page_clock.
buf_pool_t: Add n_pages_made_young, n_pages_not_made_young,
n_pages_made_young_old, n_pages_not_made_young, for statistics.
buf_print(): Display buf_pool->n_pages_made_young,
buf_pool->n_pages_not_made_young. This function is only for crash
diagnostics.
buf_print_io(): Display buf_pool->LRU_old_len and quantities derived
from buf_pool->n_pages_made_young, buf_pool->n_pages_not_made_young.
This function is invoked by SHOW ENGINE INNODB STATUS.
rb://129 approved by Heikki Tuuri. This addresses Bug #45015.
r5704 | marko | 2009-08-27 03:31:17 -0500 (Thu, 27 Aug 2009) | 32 lines
branches/zip: Fix a critical bug in fast index creation that could
corrupt the created indexes.
row_merge(): Make "half" an in/out parameter. Determine the offset of
half the output file. Copy the last blocks record-by-record instead of
block-by-block, so that the records can be counted. Check that the
input and output have matching n_rec.
row_merge_sort(): Do not assume that two blocks of size N are merged
into a block of size 2*N. The output block can be shorter than the
input if the last page of each input block is almost empty. Use an
accurate termination condition, based on the "half" computed by
row_merge().
row_merge_read(), row_merge_write(), row_merge_blocks(): Add debug output.
merge_file_t, row_merge_file_create(): Add n_rec, the number of records
in the merge file.
row_merge_read_clustered_index(): Update n_rec.
row_merge_blocks(): Update and check n_rec.
row_merge_blocks_copy(): New function, for copying the last blocks in
row_merge(). Update and check n_rec.
This bug was discovered with a user-supplied test case that creates an
index where the initial temporary file is 249 one-megabyte blocks and
the merged files become smaller. In the test, possible merge record
sizes are 10, 18, and 26 bytes.
rb://150 approved by Sunny Bains. This addresses Issue #320.
r5705 | marko | 2009-08-27 06:56:24 -0500 (Thu, 27 Aug 2009) | 11 lines
branches/zip: dict_index_find_cols(): On column name lookup failure,
return DB_CORRUPTION (HA_ERR_CRASHED) instead of abnormally
terminating the server. Also, disable the previously added diagnostic
output to the error log, because mysql-test-run does not like extra
output in the error log. (Bug #44571)
dict_index_add_to_cache(): Handle errors from dict_index_find_cols().
mysql-test/innodb_bug44571.test: A test case for triggering the bug.
rb://135 approved by Sunny Bains.
r5706 | inaam | 2009-08-27 11:00:27 -0500 (Thu, 27 Aug 2009) | 20 lines
branches/zip rb://147
Done away with following two status variables:
innodb_buffer_pool_read_ahead_rnd
innodb_buffer_pool_read_ahead_seq
Introduced two new status variables:
innodb_buffer_pool_read_ahead = number of pages read as part of
readahead since server startup
innodb_buffer_pool_read_ahead_evicted = number of pages that are read
in as readahead but were evicted before ever being accessed since
server startup i.e.: a measure of how badly our readahead is
performing
SHOW INNODB STATUS will show two extra numbers in buffer pool section:
pages read ahead/sec and pages evicted without access/sec
Approved by: Marko
r5707 | inaam | 2009-08-27 11:20:35 -0500 (Thu, 27 Aug 2009) | 6 lines
branches/zip
Remove unused macros as we erased the random readahead code in r5703.
Also fixed some comments.
r5708 | inaam | 2009-08-27 17:43:32 -0500 (Thu, 27 Aug 2009) | 4 lines
branches/zip
Remove redundant TRUE : FALSE from the return statement
r5709 | inaam | 2009-08-28 01:22:46 -0500 (Fri, 28 Aug 2009) | 5 lines
branches/zip rb://152
Disable display of deprecated parameter innodb_file_io_threads in
'show variables'.
r5714 | marko | 2009-08-31 01:10:10 -0500 (Mon, 31 Aug 2009) | 5 lines
branches/zip: buf_chunk_not_freed(): Do not acquire block->mutex unless
block->page.state == BUF_BLOCK_FILE_PAGE. Check that block->page.state
makes sense.
Approved by Sunny Bains over the IM.
r5716 | vasil | 2009-08-31 02:47:49 -0500 (Mon, 31 Aug 2009) | 9 lines
branches/zip:
Fix Bug#46718 InnoDB plugin incompatible with gcc 4.1 (at least: on PPC): "Undefined symbol"
by implementing our own check in plug.in instead of using the result from
the check from MySQL because it is insufficient.
Approved by: Marko (rb://154)
2009-10-08 16:58:37 +05:30
|
|
|
count = buf_read_page_low(&err, TRUE, BUF_READ_ANY_PAGE, space,
|
|
|
|
zip_size, FALSE,
|
|
|
|
tablespace_version, offset);
|
|
|
|
srv_buf_pool_reads += count;
|
2009-05-27 15:15:59 +05:30
|
|
|
if (err == DB_TABLESPACE_DELETED) {
|
|
|
|
ut_print_timestamp(stderr);
|
|
|
|
fprintf(stderr,
|
|
|
|
" InnoDB: Error: trying to access"
|
|
|
|
" tablespace %lu page no. %lu,\n"
|
|
|
|
"InnoDB: but the tablespace does not exist"
|
|
|
|
" or is just being dropped.\n",
|
|
|
|
(ulong) space, (ulong) offset);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Flush pages from the end of the LRU list if necessary */
|
|
|
|
buf_flush_free_margin();
|
|
|
|
|
|
|
|
/* Increment number of I/O operations used for LRU policy. */
|
|
|
|
buf_LRU_stat_inc_io();
|
|
|
|
|
Applying InnoDB Plugin 1.0.5 snapshot, part 4
From revision r5703 to r5716
Detailed revision comments:
r5703 | marko | 2009-08-27 02:25:00 -0500 (Thu, 27 Aug 2009) | 41 lines
branches/zip: Replace the constant 3/8 ratio that controls the LRU_old
size with the settable global variable innodb_old_blocks_pct. The
minimum and maximum values are 5 and 95 per cent, respectively. The
default is 100*3/8, in line with the old behavior.
ut_time_ms(): New utility function, to return the current time in
milliseconds. TODO: Is there a more efficient timestamp function, such
as rdtsc divided by a power of two?
buf_LRU_old_threshold_ms: New variable, corresponding to
innodb_old_blocks_time. The value 0 is the default behaviour: no
timeout before making blocks 'new'.
bpage->accessed, bpage->LRU_position, buf_pool->ulint_clock: Remove.
bpage->access_time: New field, replacing bpage->accessed. Protected by
buf_pool_mutex instead of bpage->mutex. Updated when a page is created
or accessed the first time in the buffer pool.
buf_LRU_old_ratio, innobase_old_blocks_pct: New variables,
corresponding to innodb_old_blocks_pct
buf_LRU_old_ratio_update(), innobase_old_blocks_pct_update(): Update
functions for buf_LRU_old_ratio, innobase_old_blocks_pct.
buf_page_peek_if_too_old(): Compare ut_time_ms() to bpage->access_time
if buf_LRU_old_threshold_ms && bpage->old. Else observe
buf_LRU_old_ratio and bpage->freed_page_clock.
buf_pool_t: Add n_pages_made_young, n_pages_not_made_young,
n_pages_made_young_old, n_pages_not_made_young, for statistics.
buf_print(): Display buf_pool->n_pages_made_young,
buf_pool->n_pages_not_made_young. This function is only for crash
diagnostics.
buf_print_io(): Display buf_pool->LRU_old_len and quantities derived
from buf_pool->n_pages_made_young, buf_pool->n_pages_not_made_young.
This function is invoked by SHOW ENGINE INNODB STATUS.
rb://129 approved by Heikki Tuuri. This addresses Bug #45015.
r5704 | marko | 2009-08-27 03:31:17 -0500 (Thu, 27 Aug 2009) | 32 lines
branches/zip: Fix a critical bug in fast index creation that could
corrupt the created indexes.
row_merge(): Make "half" an in/out parameter. Determine the offset of
half the output file. Copy the last blocks record-by-record instead of
block-by-block, so that the records can be counted. Check that the
input and output have matching n_rec.
row_merge_sort(): Do not assume that two blocks of size N are merged
into a block of size 2*N. The output block can be shorter than the
input if the last page of each input block is almost empty. Use an
accurate termination condition, based on the "half" computed by
row_merge().
row_merge_read(), row_merge_write(), row_merge_blocks(): Add debug output.
merge_file_t, row_merge_file_create(): Add n_rec, the number of records
in the merge file.
row_merge_read_clustered_index(): Update n_rec.
row_merge_blocks(): Update and check n_rec.
row_merge_blocks_copy(): New function, for copying the last blocks in
row_merge(). Update and check n_rec.
This bug was discovered with a user-supplied test case that creates an
index where the initial temporary file is 249 one-megabyte blocks and
the merged files become smaller. In the test, possible merge record
sizes are 10, 18, and 26 bytes.
rb://150 approved by Sunny Bains. This addresses Issue #320.
r5705 | marko | 2009-08-27 06:56:24 -0500 (Thu, 27 Aug 2009) | 11 lines
branches/zip: dict_index_find_cols(): On column name lookup failure,
return DB_CORRUPTION (HA_ERR_CRASHED) instead of abnormally
terminating the server. Also, disable the previously added diagnostic
output to the error log, because mysql-test-run does not like extra
output in the error log. (Bug #44571)
dict_index_add_to_cache(): Handle errors from dict_index_find_cols().
mysql-test/innodb_bug44571.test: A test case for triggering the bug.
rb://135 approved by Sunny Bains.
r5706 | inaam | 2009-08-27 11:00:27 -0500 (Thu, 27 Aug 2009) | 20 lines
branches/zip rb://147
Done away with following two status variables:
innodb_buffer_pool_read_ahead_rnd
innodb_buffer_pool_read_ahead_seq
Introduced two new status variables:
innodb_buffer_pool_read_ahead = number of pages read as part of
readahead since server startup
innodb_buffer_pool_read_ahead_evicted = number of pages that are read
in as readahead but were evicted before ever being accessed since
server startup i.e.: a measure of how badly our readahead is
performing
SHOW INNODB STATUS will show two extra numbers in buffer pool section:
pages read ahead/sec and pages evicted without access/sec
Approved by: Marko
r5707 | inaam | 2009-08-27 11:20:35 -0500 (Thu, 27 Aug 2009) | 6 lines
branches/zip
Remove unused macros as we erased the random readahead code in r5703.
Also fixed some comments.
r5708 | inaam | 2009-08-27 17:43:32 -0500 (Thu, 27 Aug 2009) | 4 lines
branches/zip
Remove redundant TRUE : FALSE from the return statement
r5709 | inaam | 2009-08-28 01:22:46 -0500 (Fri, 28 Aug 2009) | 5 lines
branches/zip rb://152
Disable display of deprecated parameter innodb_file_io_threads in
'show variables'.
r5714 | marko | 2009-08-31 01:10:10 -0500 (Mon, 31 Aug 2009) | 5 lines
branches/zip: buf_chunk_not_freed(): Do not acquire block->mutex unless
block->page.state == BUF_BLOCK_FILE_PAGE. Check that block->page.state
makes sense.
Approved by Sunny Bains over the IM.
r5716 | vasil | 2009-08-31 02:47:49 -0500 (Mon, 31 Aug 2009) | 9 lines
branches/zip:
Fix Bug#46718 InnoDB plugin incompatible with gcc 4.1 (at least: on PPC): "Undefined symbol"
by implementing our own check in plug.in instead of using the result from
the check from MySQL because it is insufficient.
Approved by: Marko (rb://154)
2009-10-08 16:58:37 +05:30
|
|
|
return(count > 0);
|
2009-05-27 15:15:59 +05:30
|
|
|
}
|
|
|
|
|
|
|
|
/********************************************************************//**
|
|
|
|
Applies linear read-ahead if in the buf_pool the page is a border page of
|
|
|
|
a linear read-ahead area and all the pages in the area have been accessed.
|
|
|
|
Does not read any page if the read-ahead mechanism is not activated. Note
|
Applying InnoDB Plugin 1.0.5 snapshot, part 2
From r5639 to r5685
Detailed revision comments:
r5639 | marko | 2009-08-06 05:39:34 -0500 (Thu, 06 Aug 2009) | 3 lines
branches/zip: mem_heap_block_free(): If innodb_use_sys_malloc is set,
do not tell Valgrind that the memory is free, to avoid
a bogus warning in Valgrind's built-in free() hook.
r5642 | calvin | 2009-08-06 18:04:03 -0500 (Thu, 06 Aug 2009) | 2 lines
branches/zip: remove duplicate "the" in comments.
r5662 | marko | 2009-08-11 04:54:16 -0500 (Tue, 11 Aug 2009) | 1 line
branches/zip: Bump the version number to 1.0.5 after releasing 1.0.4.
r5663 | marko | 2009-08-11 06:42:37 -0500 (Tue, 11 Aug 2009) | 2 lines
branches/zip: trx_general_rollback_for_mysql(): Remove the redundant
parameter partial. If savept==NULL, partial==FALSE.
r5670 | marko | 2009-08-12 08:16:37 -0500 (Wed, 12 Aug 2009) | 2 lines
branches/zip: trx_undo_rec_copy(): Add const qualifier to undo_rec.
This is a non-functional change.
r5671 | marko | 2009-08-13 03:46:33 -0500 (Thu, 13 Aug 2009) | 5 lines
branches/zip: ha_innobase::add_index(): Fix Bug #46557:
after a successful operation, read innodb_table->flags from
the newly created table object, not from the old one that was just freed.
Approved by Sunny.
r5681 | sunny | 2009-08-14 01:16:24 -0500 (Fri, 14 Aug 2009) | 3 lines
branches/zip: When building HotBackup srv_use_sys_malloc is #ifdef out. We
move access to the this variable within a !UNIV_HOTBACKUP block.
r5684 | sunny | 2009-08-20 03:05:30 -0500 (Thu, 20 Aug 2009) | 10 lines
branches/zip: Fix bug# 46650: Innodb assertion autoinc_lock == lock in lock_table_remove_low on INSERT SELECT
We only store the autoinc locks that are granted in the transaction's autoinc
lock vector. A transacton, that has been rolled back due to a deadlock because
of an AUTOINC lock attempt, will not have added that lock to the vector. We
need to check for that when we remove that lock.
rb://145
Approved by Marko.
r5685 | sunny | 2009-08-20 03:18:29 -0500 (Thu, 20 Aug 2009) | 2 lines
branches/zip: Update the ChangeLog with r5684 change.
2009-10-08 15:30:49 +05:30
|
|
|
that the algorithm looks at the 'natural' adjacent successor and
|
2009-05-27 15:15:59 +05:30
|
|
|
predecessor of the page, which on the leaf level of a B-tree are the next
|
|
|
|
and previous page in the chain of leaves. To know these, the page specified
|
|
|
|
in (space, offset) must already be present in the buf_pool. Thus, the
|
|
|
|
natural way to use this function is to call it when a page in the buf_pool
|
|
|
|
is accessed the first time, calling this function just after it has been
|
|
|
|
bufferfixed.
|
|
|
|
NOTE 1: as this function looks at the natural predecessor and successor
|
|
|
|
fields on the page, what happens, if these are not initialized to any
|
|
|
|
sensible value? No problem, before applying read-ahead we check that the
|
|
|
|
area to read is within the span of the space, if not, read-ahead is not
|
|
|
|
applied. An uninitialized value may result in a useless read operation, but
|
|
|
|
only very improbably.
|
|
|
|
NOTE 2: the calling thread may own latches on pages: to avoid deadlocks this
|
|
|
|
function must be written such that it cannot end up waiting for these
|
|
|
|
latches!
|
|
|
|
NOTE 3: the calling thread must want access to the page given: this rule is
|
|
|
|
set to prevent unintended read-aheads performed by ibuf routines, a situation
|
|
|
|
which could result in a deadlock if the OS does not support asynchronous io.
|
|
|
|
@return number of page read requests issued */
|
|
|
|
UNIV_INTERN
|
|
|
|
ulint
|
|
|
|
buf_read_ahead_linear(
|
|
|
|
/*==================*/
|
|
|
|
ulint space, /*!< in: space id */
|
|
|
|
ulint zip_size,/*!< in: compressed page size in bytes, or 0 */
|
|
|
|
ulint offset) /*!< in: page number of a page; NOTE: the current thread
|
|
|
|
must want access to this page (see NOTE 3 above) */
|
|
|
|
{
|
|
|
|
ib_int64_t tablespace_version;
|
|
|
|
buf_page_t* bpage;
|
|
|
|
buf_frame_t* frame;
|
|
|
|
buf_page_t* pred_bpage = NULL;
|
|
|
|
ulint pred_offset;
|
|
|
|
ulint succ_offset;
|
|
|
|
ulint count;
|
|
|
|
int asc_or_desc;
|
|
|
|
ulint new_offset;
|
|
|
|
ulint fail_count;
|
|
|
|
ulint ibuf_mode;
|
|
|
|
ulint low, high;
|
|
|
|
ulint err;
|
|
|
|
ulint i;
|
|
|
|
const ulint buf_read_ahead_linear_area
|
|
|
|
= BUF_READ_AHEAD_LINEAR_AREA;
|
2009-07-30 17:42:56 +05:00
|
|
|
ulint threshold;
|
2009-05-27 15:15:59 +05:30
|
|
|
|
|
|
|
if (UNIV_UNLIKELY(srv_startup_is_before_trx_rollback_phase)) {
|
|
|
|
/* No read-ahead to avoid thread deadlocks */
|
|
|
|
return(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
low = (offset / buf_read_ahead_linear_area)
|
|
|
|
* buf_read_ahead_linear_area;
|
|
|
|
high = (offset / buf_read_ahead_linear_area + 1)
|
|
|
|
* buf_read_ahead_linear_area;
|
|
|
|
|
|
|
|
if ((offset != low) && (offset != high - 1)) {
|
|
|
|
/* This is not a border page of the area: return */
|
|
|
|
|
|
|
|
return(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (ibuf_bitmap_page(zip_size, offset)
|
|
|
|
|| trx_sys_hdr_page(space, offset)) {
|
|
|
|
|
|
|
|
/* If it is an ibuf bitmap page or trx sys hdr, we do
|
|
|
|
no read-ahead, as that could break the ibuf page access
|
|
|
|
order */
|
|
|
|
|
|
|
|
return(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Remember the tablespace version before we ask te tablespace size
|
|
|
|
below: if DISCARD + IMPORT changes the actual .ibd file meanwhile, we
|
|
|
|
do not try to read outside the bounds of the tablespace! */
|
|
|
|
|
|
|
|
tablespace_version = fil_space_get_version(space);
|
|
|
|
|
|
|
|
buf_pool_mutex_enter();
|
|
|
|
|
|
|
|
if (high > fil_space_get_size(space)) {
|
|
|
|
buf_pool_mutex_exit();
|
|
|
|
/* The area is not whole, return */
|
|
|
|
|
|
|
|
return(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (buf_pool->n_pend_reads
|
|
|
|
> buf_pool->curr_size / BUF_READ_AHEAD_PEND_LIMIT) {
|
|
|
|
buf_pool_mutex_exit();
|
|
|
|
|
|
|
|
return(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Check that almost all pages in the area have been accessed; if
|
|
|
|
offset == low, the accesses must be in a descending order, otherwise,
|
|
|
|
in an ascending order. */
|
|
|
|
|
|
|
|
asc_or_desc = 1;
|
|
|
|
|
|
|
|
if (offset == low) {
|
|
|
|
asc_or_desc = -1;
|
|
|
|
}
|
|
|
|
|
2009-07-30 17:42:56 +05:00
|
|
|
/* How many out of order accessed pages can we ignore
|
|
|
|
when working out the access pattern for linear readahead */
|
|
|
|
threshold = ut_min((64 - srv_read_ahead_threshold),
|
|
|
|
BUF_READ_AHEAD_AREA);
|
|
|
|
|
2009-05-27 15:15:59 +05:30
|
|
|
fail_count = 0;
|
|
|
|
|
|
|
|
for (i = low; i < high; i++) {
|
|
|
|
bpage = buf_page_hash_get(space, i);
|
|
|
|
|
|
|
|
if ((bpage == NULL) || !buf_page_is_accessed(bpage)) {
|
|
|
|
/* Not accessed */
|
|
|
|
fail_count++;
|
|
|
|
|
2009-07-30 17:42:56 +05:00
|
|
|
} else if (pred_bpage) {
|
Applying InnoDB Plugin 1.0.5 snapshot, part 4
From revision r5703 to r5716
Detailed revision comments:
r5703 | marko | 2009-08-27 02:25:00 -0500 (Thu, 27 Aug 2009) | 41 lines
branches/zip: Replace the constant 3/8 ratio that controls the LRU_old
size with the settable global variable innodb_old_blocks_pct. The
minimum and maximum values are 5 and 95 per cent, respectively. The
default is 100*3/8, in line with the old behavior.
ut_time_ms(): New utility function, to return the current time in
milliseconds. TODO: Is there a more efficient timestamp function, such
as rdtsc divided by a power of two?
buf_LRU_old_threshold_ms: New variable, corresponding to
innodb_old_blocks_time. The value 0 is the default behaviour: no
timeout before making blocks 'new'.
bpage->accessed, bpage->LRU_position, buf_pool->ulint_clock: Remove.
bpage->access_time: New field, replacing bpage->accessed. Protected by
buf_pool_mutex instead of bpage->mutex. Updated when a page is created
or accessed the first time in the buffer pool.
buf_LRU_old_ratio, innobase_old_blocks_pct: New variables,
corresponding to innodb_old_blocks_pct
buf_LRU_old_ratio_update(), innobase_old_blocks_pct_update(): Update
functions for buf_LRU_old_ratio, innobase_old_blocks_pct.
buf_page_peek_if_too_old(): Compare ut_time_ms() to bpage->access_time
if buf_LRU_old_threshold_ms && bpage->old. Else observe
buf_LRU_old_ratio and bpage->freed_page_clock.
buf_pool_t: Add n_pages_made_young, n_pages_not_made_young,
n_pages_made_young_old, n_pages_not_made_young, for statistics.
buf_print(): Display buf_pool->n_pages_made_young,
buf_pool->n_pages_not_made_young. This function is only for crash
diagnostics.
buf_print_io(): Display buf_pool->LRU_old_len and quantities derived
from buf_pool->n_pages_made_young, buf_pool->n_pages_not_made_young.
This function is invoked by SHOW ENGINE INNODB STATUS.
rb://129 approved by Heikki Tuuri. This addresses Bug #45015.
r5704 | marko | 2009-08-27 03:31:17 -0500 (Thu, 27 Aug 2009) | 32 lines
branches/zip: Fix a critical bug in fast index creation that could
corrupt the created indexes.
row_merge(): Make "half" an in/out parameter. Determine the offset of
half the output file. Copy the last blocks record-by-record instead of
block-by-block, so that the records can be counted. Check that the
input and output have matching n_rec.
row_merge_sort(): Do not assume that two blocks of size N are merged
into a block of size 2*N. The output block can be shorter than the
input if the last page of each input block is almost empty. Use an
accurate termination condition, based on the "half" computed by
row_merge().
row_merge_read(), row_merge_write(), row_merge_blocks(): Add debug output.
merge_file_t, row_merge_file_create(): Add n_rec, the number of records
in the merge file.
row_merge_read_clustered_index(): Update n_rec.
row_merge_blocks(): Update and check n_rec.
row_merge_blocks_copy(): New function, for copying the last blocks in
row_merge(). Update and check n_rec.
This bug was discovered with a user-supplied test case that creates an
index where the initial temporary file is 249 one-megabyte blocks and
the merged files become smaller. In the test, possible merge record
sizes are 10, 18, and 26 bytes.
rb://150 approved by Sunny Bains. This addresses Issue #320.
r5705 | marko | 2009-08-27 06:56:24 -0500 (Thu, 27 Aug 2009) | 11 lines
branches/zip: dict_index_find_cols(): On column name lookup failure,
return DB_CORRUPTION (HA_ERR_CRASHED) instead of abnormally
terminating the server. Also, disable the previously added diagnostic
output to the error log, because mysql-test-run does not like extra
output in the error log. (Bug #44571)
dict_index_add_to_cache(): Handle errors from dict_index_find_cols().
mysql-test/innodb_bug44571.test: A test case for triggering the bug.
rb://135 approved by Sunny Bains.
r5706 | inaam | 2009-08-27 11:00:27 -0500 (Thu, 27 Aug 2009) | 20 lines
branches/zip rb://147
Done away with following two status variables:
innodb_buffer_pool_read_ahead_rnd
innodb_buffer_pool_read_ahead_seq
Introduced two new status variables:
innodb_buffer_pool_read_ahead = number of pages read as part of
readahead since server startup
innodb_buffer_pool_read_ahead_evicted = number of pages that are read
in as readahead but were evicted before ever being accessed since
server startup i.e.: a measure of how badly our readahead is
performing
SHOW INNODB STATUS will show two extra numbers in buffer pool section:
pages read ahead/sec and pages evicted without access/sec
Approved by: Marko
r5707 | inaam | 2009-08-27 11:20:35 -0500 (Thu, 27 Aug 2009) | 6 lines
branches/zip
Remove unused macros as we erased the random readahead code in r5703.
Also fixed some comments.
r5708 | inaam | 2009-08-27 17:43:32 -0500 (Thu, 27 Aug 2009) | 4 lines
branches/zip
Remove redundant TRUE : FALSE from the return statement
r5709 | inaam | 2009-08-28 01:22:46 -0500 (Fri, 28 Aug 2009) | 5 lines
branches/zip rb://152
Disable display of deprecated parameter innodb_file_io_threads in
'show variables'.
r5714 | marko | 2009-08-31 01:10:10 -0500 (Mon, 31 Aug 2009) | 5 lines
branches/zip: buf_chunk_not_freed(): Do not acquire block->mutex unless
block->page.state == BUF_BLOCK_FILE_PAGE. Check that block->page.state
makes sense.
Approved by Sunny Bains over the IM.
r5716 | vasil | 2009-08-31 02:47:49 -0500 (Mon, 31 Aug 2009) | 9 lines
branches/zip:
Fix Bug#46718 InnoDB plugin incompatible with gcc 4.1 (at least: on PPC): "Undefined symbol"
by implementing our own check in plug.in instead of using the result from
the check from MySQL because it is insufficient.
Approved by: Marko (rb://154)
2009-10-08 16:58:37 +05:30
|
|
|
/* Note that buf_page_is_accessed() returns
|
|
|
|
the time of the first access. If some blocks
|
|
|
|
of the extent existed in the buffer pool at
|
|
|
|
the time of a linear access pattern, the first
|
|
|
|
access times may be nonmonotonic, even though
|
|
|
|
the latest access times were linear. The
|
|
|
|
threshold (srv_read_ahead_factor) should help
|
|
|
|
a little against this. */
|
|
|
|
int res = ut_ulint_cmp(
|
|
|
|
buf_page_is_accessed(bpage),
|
|
|
|
buf_page_is_accessed(pred_bpage));
|
2009-05-27 15:15:59 +05:30
|
|
|
/* Accesses not in the right order */
|
2009-07-30 17:42:56 +05:00
|
|
|
if (res != 0 && res != asc_or_desc) {
|
|
|
|
fail_count++;
|
|
|
|
}
|
2009-05-27 15:15:59 +05:30
|
|
|
}
|
|
|
|
|
2009-07-30 17:42:56 +05:00
|
|
|
if (fail_count > threshold) {
|
|
|
|
/* Too many failures: return */
|
|
|
|
buf_pool_mutex_exit();
|
|
|
|
return(0);
|
|
|
|
}
|
2009-05-27 15:15:59 +05:30
|
|
|
|
2009-07-30 17:42:56 +05:00
|
|
|
if (bpage && buf_page_is_accessed(bpage)) {
|
|
|
|
pred_bpage = bpage;
|
|
|
|
}
|
2009-05-27 15:15:59 +05:30
|
|
|
}
|
|
|
|
|
|
|
|
/* If we got this far, we know that enough pages in the area have
|
|
|
|
been accessed in the right order: linear read-ahead can be sensible */
|
|
|
|
|
|
|
|
bpage = buf_page_hash_get(space, offset);
|
|
|
|
|
|
|
|
if (bpage == NULL) {
|
|
|
|
buf_pool_mutex_exit();
|
|
|
|
|
|
|
|
return(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
switch (buf_page_get_state(bpage)) {
|
|
|
|
case BUF_BLOCK_ZIP_PAGE:
|
|
|
|
frame = bpage->zip.data;
|
|
|
|
break;
|
|
|
|
case BUF_BLOCK_FILE_PAGE:
|
|
|
|
frame = ((buf_block_t*) bpage)->frame;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
ut_error;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Read the natural predecessor and successor page addresses from
|
|
|
|
the page; NOTE that because the calling thread may have an x-latch
|
|
|
|
on the page, we do not acquire an s-latch on the page, this is to
|
|
|
|
prevent deadlocks. Even if we read values which are nonsense, the
|
|
|
|
algorithm will work. */
|
|
|
|
|
|
|
|
pred_offset = fil_page_get_prev(frame);
|
|
|
|
succ_offset = fil_page_get_next(frame);
|
|
|
|
|
|
|
|
buf_pool_mutex_exit();
|
|
|
|
|
|
|
|
if ((offset == low) && (succ_offset == offset + 1)) {
|
|
|
|
|
|
|
|
/* This is ok, we can continue */
|
|
|
|
new_offset = pred_offset;
|
|
|
|
|
|
|
|
} else if ((offset == high - 1) && (pred_offset == offset - 1)) {
|
|
|
|
|
|
|
|
/* This is ok, we can continue */
|
|
|
|
new_offset = succ_offset;
|
|
|
|
} else {
|
|
|
|
/* Successor or predecessor not in the right order */
|
|
|
|
|
|
|
|
return(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
low = (new_offset / buf_read_ahead_linear_area)
|
|
|
|
* buf_read_ahead_linear_area;
|
|
|
|
high = (new_offset / buf_read_ahead_linear_area + 1)
|
|
|
|
* buf_read_ahead_linear_area;
|
|
|
|
|
|
|
|
if ((new_offset != low) && (new_offset != high - 1)) {
|
|
|
|
/* This is not a border page of the area: return */
|
|
|
|
|
|
|
|
return(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (high > fil_space_get_size(space)) {
|
|
|
|
/* The area is not whole, return */
|
|
|
|
|
|
|
|
return(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* If we got this far, read-ahead can be sensible: do it */
|
|
|
|
|
|
|
|
if (ibuf_inside()) {
|
|
|
|
ibuf_mode = BUF_READ_IBUF_PAGES_ONLY;
|
|
|
|
} else {
|
|
|
|
ibuf_mode = BUF_READ_ANY_PAGE;
|
|
|
|
}
|
|
|
|
|
|
|
|
count = 0;
|
|
|
|
|
|
|
|
/* Since Windows XP seems to schedule the i/o handler thread
|
|
|
|
very eagerly, and consequently it does not wait for the
|
|
|
|
full read batch to be posted, we use special heuristics here */
|
|
|
|
|
|
|
|
os_aio_simulated_put_read_threads_to_sleep();
|
|
|
|
|
|
|
|
for (i = low; i < high; i++) {
|
|
|
|
/* It is only sensible to do read-ahead in the non-sync
|
|
|
|
aio mode: hence FALSE as the first parameter */
|
|
|
|
|
|
|
|
if (!ibuf_bitmap_page(zip_size, i)) {
|
|
|
|
count += buf_read_page_low(
|
|
|
|
&err, FALSE,
|
|
|
|
ibuf_mode | OS_AIO_SIMULATED_WAKE_LATER,
|
|
|
|
space, zip_size, FALSE, tablespace_version, i);
|
|
|
|
if (err == DB_TABLESPACE_DELETED) {
|
|
|
|
ut_print_timestamp(stderr);
|
|
|
|
fprintf(stderr,
|
|
|
|
" InnoDB: Warning: in"
|
|
|
|
" linear readahead trying to access\n"
|
|
|
|
"InnoDB: tablespace %lu page %lu,\n"
|
|
|
|
"InnoDB: but the tablespace does not"
|
|
|
|
" exist or is just being dropped.\n",
|
|
|
|
(ulong) space, (ulong) i);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* In simulated aio we wake the aio handler threads only after
|
|
|
|
queuing all aio requests, in native aio the following call does
|
|
|
|
nothing: */
|
|
|
|
|
|
|
|
os_aio_simulated_wake_handler_threads();
|
|
|
|
|
|
|
|
/* Flush pages from the end of the LRU list if necessary */
|
|
|
|
buf_flush_free_margin();
|
|
|
|
|
|
|
|
#ifdef UNIV_DEBUG
|
|
|
|
if (buf_debug_prints && (count > 0)) {
|
|
|
|
fprintf(stderr,
|
|
|
|
"LINEAR read-ahead space %lu offset %lu pages %lu\n",
|
|
|
|
(ulong) space, (ulong) offset, (ulong) count);
|
|
|
|
}
|
|
|
|
#endif /* UNIV_DEBUG */
|
|
|
|
|
|
|
|
/* Read ahead is considered one I/O operation for the purpose of
|
|
|
|
LRU policy decision. */
|
|
|
|
buf_LRU_stat_inc_io();
|
|
|
|
|
Applying InnoDB Plugin 1.0.5 snapshot, part 4
From revision r5703 to r5716
Detailed revision comments:
r5703 | marko | 2009-08-27 02:25:00 -0500 (Thu, 27 Aug 2009) | 41 lines
branches/zip: Replace the constant 3/8 ratio that controls the LRU_old
size with the settable global variable innodb_old_blocks_pct. The
minimum and maximum values are 5 and 95 per cent, respectively. The
default is 100*3/8, in line with the old behavior.
ut_time_ms(): New utility function, to return the current time in
milliseconds. TODO: Is there a more efficient timestamp function, such
as rdtsc divided by a power of two?
buf_LRU_old_threshold_ms: New variable, corresponding to
innodb_old_blocks_time. The value 0 is the default behaviour: no
timeout before making blocks 'new'.
bpage->accessed, bpage->LRU_position, buf_pool->ulint_clock: Remove.
bpage->access_time: New field, replacing bpage->accessed. Protected by
buf_pool_mutex instead of bpage->mutex. Updated when a page is created
or accessed the first time in the buffer pool.
buf_LRU_old_ratio, innobase_old_blocks_pct: New variables,
corresponding to innodb_old_blocks_pct
buf_LRU_old_ratio_update(), innobase_old_blocks_pct_update(): Update
functions for buf_LRU_old_ratio, innobase_old_blocks_pct.
buf_page_peek_if_too_old(): Compare ut_time_ms() to bpage->access_time
if buf_LRU_old_threshold_ms && bpage->old. Else observe
buf_LRU_old_ratio and bpage->freed_page_clock.
buf_pool_t: Add n_pages_made_young, n_pages_not_made_young,
n_pages_made_young_old, n_pages_not_made_young, for statistics.
buf_print(): Display buf_pool->n_pages_made_young,
buf_pool->n_pages_not_made_young. This function is only for crash
diagnostics.
buf_print_io(): Display buf_pool->LRU_old_len and quantities derived
from buf_pool->n_pages_made_young, buf_pool->n_pages_not_made_young.
This function is invoked by SHOW ENGINE INNODB STATUS.
rb://129 approved by Heikki Tuuri. This addresses Bug #45015.
r5704 | marko | 2009-08-27 03:31:17 -0500 (Thu, 27 Aug 2009) | 32 lines
branches/zip: Fix a critical bug in fast index creation that could
corrupt the created indexes.
row_merge(): Make "half" an in/out parameter. Determine the offset of
half the output file. Copy the last blocks record-by-record instead of
block-by-block, so that the records can be counted. Check that the
input and output have matching n_rec.
row_merge_sort(): Do not assume that two blocks of size N are merged
into a block of size 2*N. The output block can be shorter than the
input if the last page of each input block is almost empty. Use an
accurate termination condition, based on the "half" computed by
row_merge().
row_merge_read(), row_merge_write(), row_merge_blocks(): Add debug output.
merge_file_t, row_merge_file_create(): Add n_rec, the number of records
in the merge file.
row_merge_read_clustered_index(): Update n_rec.
row_merge_blocks(): Update and check n_rec.
row_merge_blocks_copy(): New function, for copying the last blocks in
row_merge(). Update and check n_rec.
This bug was discovered with a user-supplied test case that creates an
index where the initial temporary file is 249 one-megabyte blocks and
the merged files become smaller. In the test, possible merge record
sizes are 10, 18, and 26 bytes.
rb://150 approved by Sunny Bains. This addresses Issue #320.
r5705 | marko | 2009-08-27 06:56:24 -0500 (Thu, 27 Aug 2009) | 11 lines
branches/zip: dict_index_find_cols(): On column name lookup failure,
return DB_CORRUPTION (HA_ERR_CRASHED) instead of abnormally
terminating the server. Also, disable the previously added diagnostic
output to the error log, because mysql-test-run does not like extra
output in the error log. (Bug #44571)
dict_index_add_to_cache(): Handle errors from dict_index_find_cols().
mysql-test/innodb_bug44571.test: A test case for triggering the bug.
rb://135 approved by Sunny Bains.
r5706 | inaam | 2009-08-27 11:00:27 -0500 (Thu, 27 Aug 2009) | 20 lines
branches/zip rb://147
Done away with following two status variables:
innodb_buffer_pool_read_ahead_rnd
innodb_buffer_pool_read_ahead_seq
Introduced two new status variables:
innodb_buffer_pool_read_ahead = number of pages read as part of
readahead since server startup
innodb_buffer_pool_read_ahead_evicted = number of pages that are read
in as readahead but were evicted before ever being accessed since
server startup i.e.: a measure of how badly our readahead is
performing
SHOW INNODB STATUS will show two extra numbers in buffer pool section:
pages read ahead/sec and pages evicted without access/sec
Approved by: Marko
r5707 | inaam | 2009-08-27 11:20:35 -0500 (Thu, 27 Aug 2009) | 6 lines
branches/zip
Remove unused macros as we erased the random readahead code in r5703.
Also fixed some comments.
r5708 | inaam | 2009-08-27 17:43:32 -0500 (Thu, 27 Aug 2009) | 4 lines
branches/zip
Remove redundant TRUE : FALSE from the return statement
r5709 | inaam | 2009-08-28 01:22:46 -0500 (Fri, 28 Aug 2009) | 5 lines
branches/zip rb://152
Disable display of deprecated parameter innodb_file_io_threads in
'show variables'.
r5714 | marko | 2009-08-31 01:10:10 -0500 (Mon, 31 Aug 2009) | 5 lines
branches/zip: buf_chunk_not_freed(): Do not acquire block->mutex unless
block->page.state == BUF_BLOCK_FILE_PAGE. Check that block->page.state
makes sense.
Approved by Sunny Bains over the IM.
r5716 | vasil | 2009-08-31 02:47:49 -0500 (Mon, 31 Aug 2009) | 9 lines
branches/zip:
Fix Bug#46718 InnoDB plugin incompatible with gcc 4.1 (at least: on PPC): "Undefined symbol"
by implementing our own check in plug.in instead of using the result from
the check from MySQL because it is insufficient.
Approved by: Marko (rb://154)
2009-10-08 16:58:37 +05:30
|
|
|
buf_pool->stat.n_ra_pages_read += count;
|
2009-05-27 15:15:59 +05:30
|
|
|
return(count);
|
|
|
|
}
|
|
|
|
|
|
|
|
/********************************************************************//**
|
|
|
|
Issues read requests for pages which the ibuf module wants to read in, in
|
|
|
|
order to contract the insert buffer tree. Technically, this function is like
|
|
|
|
a read-ahead function. */
|
|
|
|
UNIV_INTERN
|
|
|
|
void
|
|
|
|
buf_read_ibuf_merge_pages(
|
|
|
|
/*======================*/
|
|
|
|
ibool sync, /*!< in: TRUE if the caller
|
|
|
|
wants this function to wait
|
|
|
|
for the highest address page
|
|
|
|
to get read in, before this
|
|
|
|
function returns */
|
|
|
|
const ulint* space_ids, /*!< in: array of space ids */
|
|
|
|
const ib_int64_t* space_versions,/*!< in: the spaces must have
|
|
|
|
this version number
|
|
|
|
(timestamp), otherwise we
|
|
|
|
discard the read; we use this
|
|
|
|
to cancel reads if DISCARD +
|
|
|
|
IMPORT may have changed the
|
|
|
|
tablespace size */
|
|
|
|
const ulint* page_nos, /*!< in: array of page numbers
|
|
|
|
to read, with the highest page
|
|
|
|
number the last in the
|
|
|
|
array */
|
|
|
|
ulint n_stored) /*!< in: number of elements
|
|
|
|
in the arrays */
|
|
|
|
{
|
|
|
|
ulint i;
|
|
|
|
|
|
|
|
ut_ad(!ibuf_inside());
|
|
|
|
#ifdef UNIV_IBUF_DEBUG
|
|
|
|
ut_a(n_stored < UNIV_PAGE_SIZE);
|
|
|
|
#endif
|
|
|
|
while (buf_pool->n_pend_reads
|
|
|
|
> buf_pool->curr_size / BUF_READ_AHEAD_PEND_LIMIT) {
|
|
|
|
os_thread_sleep(500000);
|
|
|
|
}
|
|
|
|
|
|
|
|
for (i = 0; i < n_stored; i++) {
|
|
|
|
ulint zip_size = fil_space_get_zip_size(space_ids[i]);
|
|
|
|
ulint err;
|
|
|
|
|
|
|
|
if (UNIV_UNLIKELY(zip_size == ULINT_UNDEFINED)) {
|
|
|
|
|
|
|
|
goto tablespace_deleted;
|
|
|
|
}
|
|
|
|
|
|
|
|
buf_read_page_low(&err, sync && (i + 1 == n_stored),
|
|
|
|
BUF_READ_ANY_PAGE, space_ids[i],
|
|
|
|
zip_size, TRUE, space_versions[i],
|
|
|
|
page_nos[i]);
|
|
|
|
|
|
|
|
if (UNIV_UNLIKELY(err == DB_TABLESPACE_DELETED)) {
|
|
|
|
tablespace_deleted:
|
|
|
|
/* We have deleted or are deleting the single-table
|
|
|
|
tablespace: remove the entries for that page */
|
|
|
|
|
|
|
|
ibuf_merge_or_delete_for_page(NULL, space_ids[i],
|
|
|
|
page_nos[i],
|
|
|
|
zip_size, FALSE);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
os_aio_simulated_wake_handler_threads();
|
|
|
|
|
|
|
|
/* Flush pages from the end of the LRU list if necessary */
|
|
|
|
buf_flush_free_margin();
|
|
|
|
|
|
|
|
#ifdef UNIV_DEBUG
|
|
|
|
if (buf_debug_prints) {
|
|
|
|
fprintf(stderr,
|
|
|
|
"Ibuf merge read-ahead space %lu pages %lu\n",
|
|
|
|
(ulong) space_ids[0], (ulong) n_stored);
|
|
|
|
}
|
|
|
|
#endif /* UNIV_DEBUG */
|
|
|
|
}
|
|
|
|
|
|
|
|
/********************************************************************//**
|
|
|
|
Issues read requests for pages which recovery wants to read in. */
|
|
|
|
UNIV_INTERN
|
|
|
|
void
|
|
|
|
buf_read_recv_pages(
|
|
|
|
/*================*/
|
|
|
|
ibool sync, /*!< in: TRUE if the caller
|
|
|
|
wants this function to wait
|
|
|
|
for the highest address page
|
|
|
|
to get read in, before this
|
|
|
|
function returns */
|
|
|
|
ulint space, /*!< in: space id */
|
|
|
|
ulint zip_size, /*!< in: compressed page size in
|
|
|
|
bytes, or 0 */
|
|
|
|
const ulint* page_nos, /*!< in: array of page numbers
|
|
|
|
to read, with the highest page
|
|
|
|
number the last in the
|
|
|
|
array */
|
|
|
|
ulint n_stored) /*!< in: number of page numbers
|
|
|
|
in the array */
|
|
|
|
{
|
|
|
|
ib_int64_t tablespace_version;
|
|
|
|
ulint count;
|
|
|
|
ulint err;
|
|
|
|
ulint i;
|
|
|
|
|
|
|
|
zip_size = fil_space_get_zip_size(space);
|
2009-07-30 17:42:56 +05:00
|
|
|
|
|
|
|
if (UNIV_UNLIKELY(zip_size == ULINT_UNDEFINED)) {
|
|
|
|
/* It is a single table tablespace and the .ibd file is
|
|
|
|
missing: do nothing */
|
|
|
|
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2009-05-27 15:15:59 +05:30
|
|
|
tablespace_version = fil_space_get_version(space);
|
|
|
|
|
|
|
|
for (i = 0; i < n_stored; i++) {
|
|
|
|
|
|
|
|
count = 0;
|
|
|
|
|
|
|
|
os_aio_print_debug = FALSE;
|
|
|
|
|
|
|
|
while (buf_pool->n_pend_reads >= recv_n_pool_free_frames / 2) {
|
|
|
|
|
|
|
|
os_aio_simulated_wake_handler_threads();
|
2010-04-01 16:56:22 +04:00
|
|
|
os_thread_sleep(10000);
|
2009-05-27 15:15:59 +05:30
|
|
|
|
|
|
|
count++;
|
|
|
|
|
2010-04-01 16:56:22 +04:00
|
|
|
if (count > 1000) {
|
2009-05-27 15:15:59 +05:30
|
|
|
fprintf(stderr,
|
|
|
|
"InnoDB: Error: InnoDB has waited for"
|
2010-04-01 16:56:22 +04:00
|
|
|
" 10 seconds for pending\n"
|
2009-05-27 15:15:59 +05:30
|
|
|
"InnoDB: reads to the buffer pool to"
|
|
|
|
" be finished.\n"
|
|
|
|
"InnoDB: Number of pending reads %lu,"
|
|
|
|
" pending pread calls %lu\n",
|
|
|
|
(ulong) buf_pool->n_pend_reads,
|
|
|
|
(ulong)os_file_n_pending_preads);
|
|
|
|
|
|
|
|
os_aio_print_debug = TRUE;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
os_aio_print_debug = FALSE;
|
|
|
|
|
|
|
|
if ((i + 1 == n_stored) && sync) {
|
|
|
|
buf_read_page_low(&err, TRUE, BUF_READ_ANY_PAGE, space,
|
|
|
|
zip_size, TRUE, tablespace_version,
|
|
|
|
page_nos[i]);
|
|
|
|
} else {
|
|
|
|
buf_read_page_low(&err, FALSE, BUF_READ_ANY_PAGE
|
|
|
|
| OS_AIO_SIMULATED_WAKE_LATER,
|
|
|
|
space, zip_size, TRUE,
|
|
|
|
tablespace_version, page_nos[i]);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
os_aio_simulated_wake_handler_threads();
|
|
|
|
|
|
|
|
/* Flush pages from the end of the LRU list if necessary */
|
|
|
|
buf_flush_free_margin();
|
|
|
|
|
|
|
|
#ifdef UNIV_DEBUG
|
|
|
|
if (buf_debug_prints) {
|
|
|
|
fprintf(stderr,
|
|
|
|
"Recovery applies read-ahead pages %lu\n",
|
|
|
|
(ulong) n_stored);
|
|
|
|
}
|
|
|
|
#endif /* UNIV_DEBUG */
|
|
|
|
}
|