mirror of
https://github.com/MariaDB/server.git
synced 2025-01-29 10:14:19 +01:00
Merge gbichot@bk-internal.mysql.com:/home/bk/mysql-maria
into gbichot4.local:/home/mysql_src/mysql-maria-monty
This commit is contained in:
commit
8e8362cc50
19 changed files with 476 additions and 185 deletions
29
mysql-test/r/maria-recovery-bitmap.result
Normal file
29
mysql-test/r/maria-recovery-bitmap.result
Normal file
|
@ -0,0 +1,29 @@
|
|||
drop database if exists mysqltest;
|
||||
create database mysqltest;
|
||||
use mysqltest;
|
||||
* shut down mysqld, removed logs, restarted it
|
||||
use mysqltest;
|
||||
create table t1 (a varchar(10000)) engine=maria;
|
||||
* TEST of over-allocated bitmap not flushed by checkpoint
|
||||
insert into t1 values ("bbbbbbb");
|
||||
flush table t1;
|
||||
* copied t1 for comparison
|
||||
insert into t1 values ("bbbbbbb");
|
||||
delete from t1 limit 1;
|
||||
set session debug="+d,info,enter,exit,maria_over_alloc_bitmap";
|
||||
insert into t1 values ("aaaaaaaaa");
|
||||
set global maria_checkpoint_interval=1;
|
||||
SET SESSION debug="+d,maria_crash";
|
||||
* crashing mysqld intentionally
|
||||
set global maria_checkpoint_interval=1;
|
||||
ERROR HY000: Lost connection to MySQL server during query
|
||||
* recovery happens
|
||||
check table t1 extended;
|
||||
Table Op Msg_type Msg_text
|
||||
mysqltest.t1 check status OK
|
||||
* testing that checksum after recovery is as expected
|
||||
Checksum-check
|
||||
ok
|
||||
use mysqltest;
|
||||
drop database mysqltest_for_comparison;
|
||||
drop database mysqltest;
|
|
@ -1,3 +1,4 @@
|
|||
set global maria_log_file_size=4294967296;
|
||||
drop database if exists mysqltest;
|
||||
create database mysqltest;
|
||||
use mysqltest;
|
||||
|
@ -118,6 +119,7 @@ a
|
|||
00000000
|
||||
00000000
|
||||
drop table t1;
|
||||
* TEST of two REDOs for same page in one REDO group
|
||||
* shut down mysqld, removed logs, restarted it
|
||||
use mysqltest;
|
||||
CREATE TABLE t1 (
|
||||
|
@ -150,6 +152,7 @@ SELECT LENGTH(b) FROM t1 WHERE i=3;
|
|||
LENGTH(b)
|
||||
5001
|
||||
drop table t1;
|
||||
* TEST of INSERT vs state.auto_increment
|
||||
* shut down mysqld, removed logs, restarted it
|
||||
use mysqltest;
|
||||
CREATE TABLE t1 (
|
||||
|
@ -184,6 +187,7 @@ t1 CREATE TABLE `t1` (
|
|||
PRIMARY KEY (`i`),
|
||||
KEY `c` (`c`)
|
||||
) ENGINE=MARIA AUTO_INCREMENT=5 DEFAULT CHARSET=latin1
|
||||
* TEST of UPDATE vs state.auto_increment
|
||||
* copied t1 for feeding_recovery
|
||||
update t1 set i=15 where c="a";
|
||||
flush table t1;
|
||||
|
|
2
mysql-test/t/maria-recovery-bitmap-master.opt
Normal file
2
mysql-test/t/maria-recovery-bitmap-master.opt
Normal file
|
@ -0,0 +1,2 @@
|
|||
--skip-stack-trace --skip-core-file
|
||||
|
79
mysql-test/t/maria-recovery-bitmap.test
Normal file
79
mysql-test/t/maria-recovery-bitmap.test
Normal file
|
@ -0,0 +1,79 @@
|
|||
# Tests of Maria's recovery of the bitmap pages
|
||||
|
||||
--source include/not_embedded.inc
|
||||
# Don't test this under valgrind, memory leaks will occur as we crash
|
||||
--source include/not_valgrind.inc
|
||||
# Binary must be compiled with debug for crash to occur
|
||||
--source include/have_debug.inc
|
||||
--source include/have_maria.inc
|
||||
|
||||
--disable_warnings
|
||||
drop database if exists mysqltest;
|
||||
--enable_warnings
|
||||
create database mysqltest;
|
||||
|
||||
# Include scripts can perform SQL. For it to not influence the main test
|
||||
# they use a separate connection. This way if they use a DDL it would
|
||||
# not autocommit in the main test.
|
||||
connect (admin, 127.0.0.1, root,,mysqltest,,);
|
||||
--enable_reconnect
|
||||
|
||||
connection default;
|
||||
use mysqltest;
|
||||
--enable_reconnect
|
||||
|
||||
-- source include/maria_empty_logs.inc
|
||||
let $mms_tables=1;
|
||||
create table t1 (a varchar(10000)) engine=maria;
|
||||
|
||||
# we want recovery to use the tables as they were at time of crash
|
||||
let $mvr_restore_old_snapshot=0;
|
||||
# UNDO phase prevents physical comparison, normally,
|
||||
# so we'll only use checksums to compare.
|
||||
let $mms_compare_physically=0;
|
||||
let $mvr_crash_statement= set global maria_checkpoint_interval=1;
|
||||
|
||||
--echo * TEST of over-allocated bitmap not flushed by checkpoint
|
||||
let $mvr_debug_option="+d,maria_crash";
|
||||
insert into t1 values ("bbbbbbb");
|
||||
-- source include/maria_make_snapshot_for_comparison.inc
|
||||
# make_snapshot_for_comparison closed the table, which lost its id.
|
||||
# So we make a null operation just to give a short id to the table so
|
||||
# that checkpoint includes table in checkpoint (otherwise nothing to
|
||||
# test).
|
||||
insert into t1 values ("bbbbbbb");
|
||||
delete from t1 limit 1;
|
||||
set session debug="+d,info,enter,exit,maria_over_alloc_bitmap";
|
||||
send insert into t1 values ("aaaaaaaaa");
|
||||
connection admin;
|
||||
# Leave time for INSERT to block after modifying bitmap;
|
||||
# in the future we should not use sleep but something like
|
||||
# debug_sync_point().
|
||||
sleep 5;
|
||||
# force a checkpoint, which could, if buggy, flush over-allocated
|
||||
# bitmap page; as REDO-UNDO was not written, bitmap and data page
|
||||
# would be inconsistent. Correct checkpoint will wait until UNDO is
|
||||
# written.
|
||||
set global maria_checkpoint_interval=1;
|
||||
-- source include/maria_verify_recovery.inc
|
||||
|
||||
# disabled until pagecache callback framework is coded at which point
|
||||
# we can add a get_lsn() callback for bitmaps, fixing the below bug.
|
||||
if (0)
|
||||
{
|
||||
--echo * TEST of bitmap flushed without REDO-UNDO in the log (WAL violation)
|
||||
# before crashing we'll flush the bitmap page
|
||||
let $mvr_debug_option="+d,maria_flush_bitmap,maria_crash";
|
||||
-- source include/maria_make_snapshot_for_comparison.inc
|
||||
lock tables t1 write;
|
||||
insert into t1 values (REPEAT('a', 6000));
|
||||
# bitmap of after-INSERT will be on disk, but data pages will not; if
|
||||
# log is not flushed the bitmap is inconsistent with the data.
|
||||
-- source include/maria_verify_recovery.inc
|
||||
drop table t1;
|
||||
}
|
||||
|
||||
# clean up everything
|
||||
let $mms_purpose=comparison;
|
||||
eval drop database mysqltest_for_$mms_purpose;
|
||||
drop database mysqltest;
|
|
@ -122,6 +122,7 @@ drop table t1;
|
|||
# the rewrite was ignored.
|
||||
#
|
||||
|
||||
--echo * TEST of two REDOs for same page in one REDO group
|
||||
-- source include/maria_empty_logs.inc
|
||||
let $mms_tables=1;
|
||||
CREATE TABLE t1 (
|
||||
|
@ -144,6 +145,7 @@ SELECT LENGTH(b) FROM t1 WHERE i=3;
|
|||
drop table t1;
|
||||
|
||||
# Test that INSERT's effect on auto-increment is recovered
|
||||
--echo * TEST of INSERT vs state.auto_increment
|
||||
-- source include/maria_empty_logs.inc
|
||||
let $mms_tables=1;
|
||||
CREATE TABLE t1 (
|
||||
|
@ -165,6 +167,7 @@ let $mvr_crash_statement= set global maria_checkpoint_interval=1;
|
|||
show create table t1;
|
||||
|
||||
# Test that UPDATE's effect on auto-increment is recovered
|
||||
--echo * TEST of UPDATE vs state.auto_increment
|
||||
-- source include/maria_make_snapshot_for_feeding_recovery.inc
|
||||
update t1 set i=15 where c="a";
|
||||
-- source include/maria_make_snapshot_for_comparison.inc
|
||||
|
|
|
@ -132,6 +132,8 @@ uchar maria_bitmap_marker[4]=
|
|||
{(uchar) 255, (uchar) 255, (uchar) 255, (uchar) 254};
|
||||
uchar maria_normal_page_marker[4]=
|
||||
{(uchar) 255, (uchar) 255, (uchar) 255, (uchar) 255};
|
||||
/*#define WRONG_BITMAP_FLUSH 1*/ /*define only for provoking bugs*/
|
||||
#undef WRONG_BITMAP_FLUSH
|
||||
|
||||
static my_bool _ma_read_bitmap_page(MARIA_SHARE *share,
|
||||
MARIA_FILE_BITMAP *bitmap,
|
||||
|
@ -143,14 +145,48 @@ static my_bool _ma_read_bitmap_page(MARIA_SHARE *share,
|
|||
static inline my_bool write_changed_bitmap(MARIA_SHARE *share,
|
||||
MARIA_FILE_BITMAP *bitmap)
|
||||
{
|
||||
DBUG_ENTER("write_changed_bitmap");
|
||||
DBUG_ASSERT(share->pagecache->block_size == bitmap->block_size);
|
||||
return (pagecache_write(share->pagecache,
|
||||
&bitmap->file, bitmap->page, 0,
|
||||
(uchar*) bitmap->map, PAGECACHE_PLAIN_PAGE,
|
||||
PAGECACHE_LOCK_LEFT_UNLOCKED,
|
||||
PAGECACHE_PIN_LEFT_UNPINNED,
|
||||
PAGECACHE_WRITE_DELAY, 0,
|
||||
LSN_IMPOSSIBLE));
|
||||
DBUG_PRINT("info", ("bitmap->flushable: %d", bitmap->flushable));
|
||||
if (bitmap->flushable
|
||||
#ifdef WRONG_BITMAP_FLUSH
|
||||
|| 1
|
||||
#endif
|
||||
)
|
||||
{
|
||||
my_bool res= pagecache_write(share->pagecache,
|
||||
&bitmap->file, bitmap->page, 0,
|
||||
(uchar*) bitmap->map, PAGECACHE_PLAIN_PAGE,
|
||||
PAGECACHE_LOCK_LEFT_UNLOCKED,
|
||||
PAGECACHE_PIN_LEFT_UNPINNED,
|
||||
PAGECACHE_WRITE_DELAY, 0, LSN_IMPOSSIBLE);
|
||||
DBUG_RETURN(res);
|
||||
}
|
||||
else
|
||||
{
|
||||
/**
|
||||
@todo RECOVERY BUG
|
||||
Not flushable: its content is not reflected by the log, to honour WAL we
|
||||
must keep the bitmap page pinned. Scenario of INSERT:
|
||||
REDO - UNDO (written to log but not forced)
|
||||
bitmap goes to page cache (because other INSERT needs to)
|
||||
and then to disk (pagecache eviction)
|
||||
crash: recovery will not find REDO-UNDO, table is corrupted.
|
||||
Solutions:
|
||||
give LSNs to bitmap pages or change pagecache to flush all log when
|
||||
flushing a bitmap page or keep bitmap page pinned until checkpoint.
|
||||
*/
|
||||
MARIA_PINNED_PAGE page_link;
|
||||
int res= pagecache_write(share->pagecache,
|
||||
&bitmap->file, bitmap->page, 0,
|
||||
(uchar*) bitmap->map, PAGECACHE_PLAIN_PAGE,
|
||||
PAGECACHE_LOCK_WRITE, PAGECACHE_PIN,
|
||||
PAGECACHE_WRITE_DELAY, &page_link.link,
|
||||
LSN_IMPOSSIBLE);
|
||||
page_link.unlock= PAGECACHE_LOCK_WRITE_UNLOCK;
|
||||
push_dynamic(&bitmap->pinned_pages, (void*) &page_link);
|
||||
DBUG_RETURN(res);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -180,7 +216,9 @@ my_bool _ma_bitmap_init(MARIA_SHARE *share, File file)
|
|||
size*= 2;
|
||||
#endif
|
||||
|
||||
if (!(bitmap->map= (uchar*) my_malloc(size, MYF(MY_WME))))
|
||||
if (((bitmap->map= (uchar*) my_malloc(size, MYF(MY_WME))) == NULL) ||
|
||||
my_init_dynamic_array(&bitmap->pinned_pages,
|
||||
sizeof(MARIA_PINNED_PAGE), 1, 1))
|
||||
return 1;
|
||||
|
||||
bitmap->file.file= file;
|
||||
|
@ -193,6 +231,7 @@ my_bool _ma_bitmap_init(MARIA_SHARE *share, File file)
|
|||
The +1 is to add the bitmap page, as this doesn't have to be covered
|
||||
*/
|
||||
bitmap->pages_covered= aligned_bit_blocks * 16 + 1;
|
||||
bitmap->flushable= TRUE;
|
||||
|
||||
/* Update size for bits */
|
||||
/* TODO; Make this dependent of the row size */
|
||||
|
@ -207,6 +246,7 @@ my_bool _ma_bitmap_init(MARIA_SHARE *share, File file)
|
|||
bitmap->sizes[7]= 0;
|
||||
|
||||
pthread_mutex_init(&share->bitmap.bitmap_lock, MY_MUTEX_INIT_SLOW);
|
||||
pthread_cond_init(&share->bitmap.bitmap_cond, 0);
|
||||
|
||||
_ma_bitmap_reset_cache(share);
|
||||
|
||||
|
@ -231,6 +271,8 @@ my_bool _ma_bitmap_end(MARIA_SHARE *share)
|
|||
{
|
||||
my_bool res= _ma_bitmap_flush(share);
|
||||
pthread_mutex_destroy(&share->bitmap.bitmap_lock);
|
||||
pthread_cond_destroy(&share->bitmap.bitmap_cond);
|
||||
delete_dynamic(&share->bitmap.pinned_pages);
|
||||
my_free((uchar*) share->bitmap.map, MYF(MY_ALLOW_ZERO_PTR));
|
||||
share->bitmap.map= 0;
|
||||
return res;
|
||||
|
@ -273,6 +315,104 @@ my_bool _ma_bitmap_flush(MARIA_SHARE *share)
|
|||
}
|
||||
|
||||
|
||||
/**
|
||||
Dirty-page filtering criteria for bitmap pages
|
||||
|
||||
@param type Page's type
|
||||
@param pageno Page's number
|
||||
@param rec_lsn Page's rec_lsn
|
||||
@param arg pages_covered of bitmap
|
||||
*/
|
||||
|
||||
static enum pagecache_flush_filter_result
|
||||
filter_flush_bitmap_pages(enum pagecache_page_type type
|
||||
__attribute__ ((unused)),
|
||||
pgcache_page_no_t pageno,
|
||||
LSN rec_lsn __attribute__ ((unused)),
|
||||
void *arg)
|
||||
{
|
||||
return ((pageno % (*(ulong*)arg)) == 0);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
Flushes current bitmap page to the pagecache, and then all bitmap pages
|
||||
from pagecache to the file. Used by Checkpoint.
|
||||
|
||||
@param share Table's share
|
||||
*/
|
||||
|
||||
my_bool _ma_bitmap_flush_all(MARIA_SHARE *share)
|
||||
{
|
||||
my_bool res= 0;
|
||||
MARIA_FILE_BITMAP *bitmap= &share->bitmap;
|
||||
DBUG_ENTER("_ma_bitmap_flush_all");
|
||||
pthread_mutex_lock(&bitmap->bitmap_lock);
|
||||
if (bitmap->changed)
|
||||
{
|
||||
#ifndef WRONG_BITMAP_FLUSH
|
||||
while (!bitmap->flushable)
|
||||
{
|
||||
DBUG_PRINT("info", ("waiting for bitmap to be flushable"));
|
||||
pthread_cond_wait(&bitmap->bitmap_cond, &bitmap->bitmap_lock);
|
||||
}
|
||||
#endif
|
||||
/*
|
||||
Bitmap is in a flushable state: its contents in memory are reflected by
|
||||
log records (complete REDO-UNDO groups) and all bitmap pages are
|
||||
unpinned. We keep the mutex to preserve this situation, and flush to the
|
||||
file.
|
||||
*/
|
||||
res= write_changed_bitmap(share, bitmap);
|
||||
bitmap->changed= FALSE;
|
||||
/*
|
||||
We do NOT use FLUSH_KEEP_LAZY because we must be sure that bitmap
|
||||
pages have been flushed. That's a condition of correctness of
|
||||
Recovery: data pages may have been all flushed, if we write the
|
||||
checkpoint record Recovery will start from after their REDOs. If
|
||||
bitmap page was not flushed, as the REDOs about it will be skipped, it
|
||||
will wrongly not be recovered. If bitmap pages had a rec_lsn it would
|
||||
be different.
|
||||
There should be no pinned pages as bitmap->flushable is true.
|
||||
*/
|
||||
if (flush_pagecache_blocks_with_filter(share->pagecache,
|
||||
&bitmap->file, FLUSH_KEEP,
|
||||
filter_flush_bitmap_pages,
|
||||
&bitmap->pages_covered) &
|
||||
PCFLUSH_PINNED_AND_ERROR)
|
||||
res= TRUE;
|
||||
}
|
||||
pthread_mutex_unlock(&bitmap->bitmap_lock);
|
||||
DBUG_RETURN(res);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
@brief Unpin all pinned bitmap pages
|
||||
|
||||
@param share Table's share
|
||||
|
||||
@return Operation status
|
||||
@retval 0 ok
|
||||
*/
|
||||
|
||||
static void _ma_bitmap_unpin_all(MARIA_SHARE *share)
|
||||
{
|
||||
MARIA_FILE_BITMAP *bitmap= &share->bitmap;
|
||||
MARIA_PINNED_PAGE *page_link= ((MARIA_PINNED_PAGE*)
|
||||
dynamic_array_ptr(&bitmap->pinned_pages, 0));
|
||||
MARIA_PINNED_PAGE *pinned_page= page_link + bitmap->pinned_pages.elements;
|
||||
DBUG_ENTER("_ma_bitmap_unpin_all");
|
||||
DBUG_PRINT("info", ("pinned: %u", bitmap->pinned_pages.elements));
|
||||
while (pinned_page-- != page_link)
|
||||
pagecache_unlock_by_link(share->pagecache, pinned_page->link,
|
||||
pinned_page->unlock, PAGECACHE_UNPIN,
|
||||
LSN_IMPOSSIBLE, LSN_IMPOSSIBLE, TRUE);
|
||||
bitmap->pinned_pages.elements= 0;
|
||||
DBUG_VOID_RETURN;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
Intialize bitmap in memory to a zero bitmap
|
||||
|
||||
|
@ -684,12 +824,6 @@ static my_bool _ma_change_bitmap_page(MARIA_HA *info,
|
|||
|
||||
if (bitmap->changed)
|
||||
{
|
||||
/**
|
||||
@todo RECOVERY BUG this is going to flush the bitmap page possibly to
|
||||
disk even though it could be over-allocated with not yet any REDO-UNDO
|
||||
complete group (WAL violation: no way to undo the over-allocation if
|
||||
crash). See also collect_tables().
|
||||
*/
|
||||
if (write_changed_bitmap(info->s, bitmap))
|
||||
DBUG_RETURN(1);
|
||||
bitmap->changed= 0;
|
||||
|
@ -1973,6 +2107,46 @@ my_bool _ma_bitmap_set_full_page_bits(MARIA_HA *info,
|
|||
}
|
||||
|
||||
|
||||
/**
|
||||
Make a transition of MARIA_FILE_BITMAP::flushable.
|
||||
If the bitmap becomes flushable, which requires that REDO-UNDO has been
|
||||
logged and all bitmap pages touched by the thread have a correct
|
||||
allocation, it unpins all bitmap pages, and if checkpoint is waiting, it
|
||||
wakes it up.
|
||||
If the bitmap becomes unflushable, it just records it.
|
||||
|
||||
@param share Table's share
|
||||
@param flushable New state
|
||||
*/
|
||||
|
||||
void _ma_bitmap_flushable(MARIA_SHARE *share, my_bool flushable)
|
||||
{
|
||||
MARIA_FILE_BITMAP *bitmap= &share->bitmap;
|
||||
if (flushable)
|
||||
{
|
||||
pthread_mutex_lock(&bitmap->bitmap_lock);
|
||||
_ma_bitmap_unpin_all(share);
|
||||
bitmap->flushable= TRUE;
|
||||
pthread_mutex_unlock(&bitmap->bitmap_lock);
|
||||
/*
|
||||
Ok to read in_checkpoint without mutex, as it is set before Checkpoint
|
||||
calls _ma_bitmap_flush_all().
|
||||
*/
|
||||
if (share->in_checkpoint)
|
||||
{
|
||||
DBUG_PRINT("info", ("bitmap ready waking up checkpoint"));
|
||||
pthread_cond_broadcast(&bitmap->bitmap_cond);
|
||||
}
|
||||
return;
|
||||
}
|
||||
/*
|
||||
Ok to set without mutex: we didn't touch the bitmap yet; when we touch it
|
||||
we will take the mutex.
|
||||
*/
|
||||
bitmap->flushable= FALSE;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
Correct bitmap pages to reflect the true allocation
|
||||
|
||||
|
@ -2015,7 +2189,7 @@ my_bool _ma_bitmap_release_unused(MARIA_HA *info, MARIA_BITMAP_BLOCKS *blocks)
|
|||
*/
|
||||
current_bitmap_value= FULL_HEAD_PAGE;
|
||||
|
||||
pthread_mutex_lock(&info->s->bitmap.bitmap_lock);
|
||||
pthread_mutex_lock(&bitmap->bitmap_lock);
|
||||
|
||||
/* First handle head block */
|
||||
if (block->used & BLOCKUSED_USED)
|
||||
|
@ -2065,11 +2239,19 @@ my_bool _ma_bitmap_release_unused(MARIA_HA *info, MARIA_BITMAP_BLOCKS *blocks)
|
|||
block->page, page_count))
|
||||
goto err;
|
||||
}
|
||||
pthread_mutex_unlock(&info->s->bitmap.bitmap_lock);
|
||||
|
||||
_ma_bitmap_unpin_all(info->s);
|
||||
bitmap->flushable= TRUE;
|
||||
pthread_mutex_unlock(&bitmap->bitmap_lock);
|
||||
if (info->s->in_checkpoint)
|
||||
{
|
||||
DBUG_PRINT("info", ("bitmap ready waking up checkpoint"));
|
||||
pthread_cond_broadcast(&bitmap->bitmap_cond);
|
||||
}
|
||||
DBUG_RETURN(0);
|
||||
|
||||
err:
|
||||
pthread_mutex_unlock(&info->s->bitmap.bitmap_lock);
|
||||
pthread_mutex_unlock(&bitmap->bitmap_lock);
|
||||
DBUG_RETURN(1);
|
||||
}
|
||||
|
||||
|
|
|
@ -2692,32 +2692,21 @@ static my_bool allocate_and_write_block_record(MARIA_HA *info,
|
|||
MARIA_BITMAP_BLOCKS *blocks= &row->insert_blocks;
|
||||
DBUG_ENTER("allocate_and_write_block_record");
|
||||
|
||||
_ma_bitmap_flushable(info->s, FALSE);
|
||||
if (_ma_bitmap_find_place(info, row, blocks))
|
||||
DBUG_RETURN(1); /* Error reading bitmap */
|
||||
goto err; /* Error reading bitmap */
|
||||
|
||||
#ifdef RECOVERY_EXTRA_DEBUG
|
||||
/* Send this over-allocated bitmap to disk and crash, see if recovers */
|
||||
DBUG_EXECUTE_IF("maria_flush_bitmap",
|
||||
{
|
||||
DBUG_PRINT("maria_flush_bitmap", ("now"));
|
||||
_ma_bitmap_flush(info->s);
|
||||
_ma_flush_table_files(info, MARIA_FLUSH_DATA |
|
||||
MARIA_FLUSH_INDEX,
|
||||
FLUSH_KEEP, FLUSH_KEEP);
|
||||
});
|
||||
DBUG_EXECUTE_IF("maria_crash",
|
||||
{
|
||||
DBUG_PRINT("maria_crash", ("now"));
|
||||
fflush(DBUG_FILE);
|
||||
abort();
|
||||
});
|
||||
#endif
|
||||
/*
|
||||
Sleep; a checkpoint will happen and should not send this over-allocated
|
||||
bitmap to disk but rather wait.
|
||||
*/
|
||||
DBUG_EXECUTE_IF("maria_over_alloc_bitmap", sleep(10););
|
||||
|
||||
/* page will be pinned & locked by get_head_or_tail_page */
|
||||
if (get_head_or_tail_page(info, blocks->block, info->buff,
|
||||
row->space_on_head_page, HEAD_PAGE,
|
||||
PAGECACHE_LOCK_WRITE, &row_pos))
|
||||
DBUG_RETURN(1);
|
||||
goto err;
|
||||
row->lastpos= ma_recordpos(blocks->block->page, row_pos.rownr);
|
||||
if (info->s->calc_checksum)
|
||||
{
|
||||
|
@ -2732,11 +2721,17 @@ static my_bool allocate_and_write_block_record(MARIA_HA *info,
|
|||
if (write_block_record(info, (uchar*) 0, record, row,
|
||||
blocks, blocks->block->org_bitmap_value != 0,
|
||||
&row_pos, undo_lsn, 0))
|
||||
DBUG_RETURN(1); /* Error reading bitmap */
|
||||
goto err; /* Error reading bitmap */
|
||||
DBUG_PRINT("exit", ("Rowid: %lu (%lu:%u)", (ulong) row->lastpos,
|
||||
(ulong) ma_recordpos_to_page(row->lastpos),
|
||||
ma_recordpos_to_dir_entry(row->lastpos)));
|
||||
/* Now let checkpoint happen but don't commit */
|
||||
DBUG_EXECUTE_IF("maria_over_alloc_bitmap", sleep(1000););
|
||||
DBUG_RETURN(0);
|
||||
err:
|
||||
_ma_bitmap_flushable(info->s, TRUE);
|
||||
_ma_unpin_all_pages_and_finalize_row(info, LSN_IMPOSSIBLE);
|
||||
DBUG_RETURN(1);
|
||||
}
|
||||
|
||||
|
||||
|
@ -2806,6 +2801,7 @@ my_bool _ma_write_abort_block_record(MARIA_HA *info)
|
|||
MARIA_SHARE *share= info->s;
|
||||
DBUG_ENTER("_ma_write_abort_block_record");
|
||||
|
||||
_ma_bitmap_flushable(share, FALSE);
|
||||
if (delete_head_or_tail(info,
|
||||
ma_recordpos_to_page(info->cur_row.lastpos),
|
||||
ma_recordpos_to_dir_entry(info->cur_row.lastpos), 1,
|
||||
|
@ -2840,6 +2836,7 @@ my_bool _ma_write_abort_block_record(MARIA_HA *info)
|
|||
&lsn, (void*) 0))
|
||||
res= 1;
|
||||
}
|
||||
_ma_bitmap_flushable(share, TRUE);
|
||||
_ma_unpin_all_pages_and_finalize_row(info, lsn);
|
||||
DBUG_RETURN(res);
|
||||
}
|
||||
|
@ -2889,12 +2886,13 @@ static my_bool _ma_update_block_record2(MARIA_HA *info,
|
|||
calc_record_size(info, record, new_row);
|
||||
page= ma_recordpos_to_page(record_pos);
|
||||
|
||||
_ma_bitmap_flushable(share, FALSE);
|
||||
DBUG_ASSERT(share->pagecache->block_size == block_size);
|
||||
if (!(buff= pagecache_read(share->pagecache,
|
||||
&info->dfile, (pgcache_page_no_t) page, 0,
|
||||
info->buff, share->page_type,
|
||||
PAGECACHE_LOCK_WRITE, &page_link.link)))
|
||||
DBUG_RETURN(1);
|
||||
goto err;
|
||||
page_link.unlock= PAGECACHE_LOCK_WRITE_UNLOCK;
|
||||
page_link.changed= 1;
|
||||
push_dynamic(&info->pinned_pages, (void*) &page_link);
|
||||
|
@ -2918,7 +2916,7 @@ static my_bool _ma_update_block_record2(MARIA_HA *info,
|
|||
if (extend_area_on_page(buff, dir, rownr, share->block_size,
|
||||
new_row->total_length, &org_empty_size,
|
||||
&rec_offset, &length))
|
||||
DBUG_RETURN(1);
|
||||
goto err;
|
||||
|
||||
row_pos.buff= buff;
|
||||
row_pos.rownr= rownr;
|
||||
|
@ -2980,6 +2978,7 @@ static my_bool _ma_update_block_record2(MARIA_HA *info,
|
|||
DBUG_RETURN(res);
|
||||
|
||||
err:
|
||||
_ma_bitmap_flushable(share, TRUE);
|
||||
_ma_unpin_all_pages_and_finalize_row(info, LSN_IMPOSSIBLE);
|
||||
DBUG_RETURN(1);
|
||||
}
|
||||
|
@ -3288,6 +3287,7 @@ my_bool _ma_delete_block_record(MARIA_HA *info, const uchar *record)
|
|||
DBUG_PRINT("enter", ("Rowid: %lu (%lu:%u)", (ulong) info->cur_row.lastpos,
|
||||
(ulong) page, record_number));
|
||||
|
||||
_ma_bitmap_flushable(share, FALSE);
|
||||
if (delete_head_or_tail(info, page, record_number, 1, 0) ||
|
||||
delete_tails(info, info->cur_row.tail_positions))
|
||||
goto err;
|
||||
|
@ -3334,10 +3334,12 @@ my_bool _ma_delete_block_record(MARIA_HA *info, const uchar *record)
|
|||
|
||||
}
|
||||
|
||||
_ma_bitmap_flushable(share, TRUE);
|
||||
_ma_unpin_all_pages_and_finalize_row(info, lsn);
|
||||
DBUG_RETURN(0);
|
||||
|
||||
err:
|
||||
_ma_bitmap_flushable(share, TRUE);
|
||||
_ma_unpin_all_pages_and_finalize_row(info, LSN_IMPOSSIBLE);
|
||||
DBUG_RETURN(1);
|
||||
}
|
||||
|
@ -5509,10 +5511,14 @@ uint _ma_apply_redo_insert_row_blobs(MARIA_HA *info,
|
|||
enum pagecache_page_pin unpin_method;
|
||||
uint length;
|
||||
|
||||
if ((page * info->s->block_size) > info->state->data_file_length)
|
||||
if (((page + 1) * info->s->block_size) >
|
||||
info->state->data_file_length)
|
||||
{
|
||||
/* New page or half written page at end of file */
|
||||
info->state->data_file_length= page * info->s->block_size;
|
||||
DBUG_PRINT("info", ("Enlarging data file from %lu to %lu",
|
||||
(ulong) info->state->data_file_length,
|
||||
(ulong) ((page + 1 ) * info->s->block_size)));
|
||||
info->state->data_file_length= (page + 1) * info->s->block_size;
|
||||
buff= info->keyread_buff;
|
||||
info->keyread_buff_used= 1;
|
||||
make_empty_page(info, buff, BLOB_PAGE);
|
||||
|
@ -5540,7 +5546,12 @@ uint _ma_apply_redo_insert_row_blobs(MARIA_HA *info,
|
|||
LSN_IMPOSSIBLE, 0);
|
||||
DBUG_RETURN(my_errno);
|
||||
}
|
||||
/* Physical file was too short; Create new page */
|
||||
/*
|
||||
Physical file was too short, create new page. It can be that
|
||||
recovery started with a file with N pages, wrote page N+2 into
|
||||
pagecache (increased data_file_length but not physical file
|
||||
length), now reads page N+1: the read fails.
|
||||
*/
|
||||
buff= info->keyread_buff;
|
||||
info->keyread_buff_used= 1;
|
||||
make_empty_page(info, buff, BLOB_PAGE);
|
||||
|
@ -5637,6 +5648,7 @@ my_bool _ma_apply_undo_row_insert(MARIA_HA *info, LSN undo_lsn,
|
|||
if (read_row_extent_info(info, buff, rownr))
|
||||
DBUG_RETURN(1);
|
||||
|
||||
_ma_bitmap_flushable(share, FALSE);
|
||||
if (delete_head_or_tail(info, page, rownr, 1, 1) ||
|
||||
delete_tails(info, info->cur_row.tail_positions))
|
||||
goto err;
|
||||
|
@ -5653,6 +5665,7 @@ my_bool _ma_apply_undo_row_insert(MARIA_HA *info, LSN undo_lsn,
|
|||
|
||||
res= 0;
|
||||
err:
|
||||
_ma_bitmap_flushable(share, TRUE);
|
||||
_ma_unpin_all_pages_and_finalize_row(info, lsn);
|
||||
DBUG_RETURN(res);
|
||||
}
|
||||
|
|
|
@ -171,6 +171,7 @@ my_bool _ma_compare_block_record(register MARIA_HA *info,
|
|||
my_bool _ma_bitmap_init(MARIA_SHARE *share, File file);
|
||||
my_bool _ma_bitmap_end(MARIA_SHARE *share);
|
||||
my_bool _ma_bitmap_flush(MARIA_SHARE *share);
|
||||
my_bool _ma_bitmap_flush_all(MARIA_SHARE *share);
|
||||
void _ma_bitmap_reset_cache(MARIA_SHARE *share);
|
||||
my_bool _ma_bitmap_find_place(MARIA_HA *info, MARIA_ROW *row,
|
||||
MARIA_BITMAP_BLOCKS *result_blocks);
|
||||
|
@ -198,6 +199,7 @@ my_bool _ma_check_if_right_bitmap_type(MARIA_HA *info,
|
|||
uint *bitmap_pattern);
|
||||
void _ma_bitmap_delete_all(MARIA_SHARE *share);
|
||||
int _ma_bitmap_create_first(MARIA_SHARE *share);
|
||||
void _ma_bitmap_flushable(MARIA_SHARE *share, my_bool flushable);
|
||||
#ifndef DBUG_OFF
|
||||
void _ma_print_bitmap(MARIA_FILE_BITMAP *bitmap, uchar *data,
|
||||
ulonglong page);
|
||||
|
|
|
@ -59,9 +59,7 @@ static uint checkpoints_total= 0, /**< all checkpoint requests made */
|
|||
|
||||
struct st_filter_param
|
||||
{
|
||||
my_bool is_data_file; /**< is the file about data or index */
|
||||
LSN up_to_lsn; /**< only pages with rec_lsn < this LSN */
|
||||
ulong pages_covered_by_bitmap; /**< to know which page is a bitmap page */
|
||||
uint max_pages; /**< stop after flushing this number pages */
|
||||
}; /**< information to determine which dirty pages should be flushed */
|
||||
|
||||
|
@ -74,10 +72,6 @@ filter_flush_file_full(enum pagecache_page_type type,
|
|||
pgcache_page_no_t page,
|
||||
LSN rec_lsn, void *arg);
|
||||
static enum pagecache_flush_filter_result
|
||||
filter_flush_file_indirect(enum pagecache_page_type type,
|
||||
pgcache_page_no_t page,
|
||||
LSN rec_lsn, void *arg);
|
||||
static enum pagecache_flush_filter_result
|
||||
filter_flush_file_evenly(enum pagecache_page_type type,
|
||||
pgcache_page_no_t pageno,
|
||||
LSN rec_lsn, void *arg);
|
||||
|
@ -264,8 +258,8 @@ static int really_execute_checkpoint(void)
|
|||
/* checkpoint succeeded */
|
||||
ptr= record_pieces[3].str;
|
||||
pages_to_flush_before_next_checkpoint= uint4korr(ptr);
|
||||
DBUG_PRINT("info",("%u pages to flush before next checkpoint",
|
||||
(uint)pages_to_flush_before_next_checkpoint));
|
||||
DBUG_PRINT("checkpoint",("%u pages to flush before next checkpoint",
|
||||
(uint)pages_to_flush_before_next_checkpoint));
|
||||
|
||||
/* compute log's low-water mark */
|
||||
TRANSLOG_ADDRESS log_low_water_mark= min_page_rec_lsn;
|
||||
|
@ -350,9 +344,11 @@ int ma_checkpoint_init(ulong interval)
|
|||
|
||||
@param what_to_flush 0: current bitmap and all data pages
|
||||
1: state
|
||||
2: all bitmap pages
|
||||
*/
|
||||
static void flush_all_tables(int what_to_flush)
|
||||
{
|
||||
int res= 0;
|
||||
LIST *pos; /**< to iterate over open tables */
|
||||
pthread_mutex_lock(&THR_LOCK_maria);
|
||||
for (pos= maria_open_list; pos; pos= pos->next)
|
||||
|
@ -363,17 +359,21 @@ static void flush_all_tables(int what_to_flush)
|
|||
switch (what_to_flush)
|
||||
{
|
||||
case 0:
|
||||
_ma_flush_table_files(info, MARIA_FLUSH_DATA | MARIA_FLUSH_INDEX,
|
||||
res= _ma_flush_table_files(info, MARIA_FLUSH_DATA | MARIA_FLUSH_INDEX,
|
||||
FLUSH_KEEP, FLUSH_KEEP);
|
||||
break;
|
||||
case 1:
|
||||
_ma_state_info_write(info->s, 1|4);
|
||||
res= _ma_state_info_write(info->s, 1|4);
|
||||
DBUG_PRINT("maria_flush_states",
|
||||
("is_of_horizon: LSN (%lu,0x%lx)",
|
||||
LSN_IN_PARTS(info->s->state.is_of_horizon)));
|
||||
break;
|
||||
case 2:
|
||||
res= _ma_bitmap_flush_all(info->s);
|
||||
break;
|
||||
}
|
||||
}
|
||||
DBUG_ASSERT(res == 0);
|
||||
}
|
||||
pthread_mutex_unlock(&THR_LOCK_maria);
|
||||
}
|
||||
|
@ -387,6 +387,11 @@ static void flush_all_tables(int what_to_flush)
|
|||
void ma_checkpoint_end(void)
|
||||
{
|
||||
DBUG_ENTER("ma_checkpoint_end");
|
||||
DBUG_EXECUTE_IF("maria_flush_bitmap",
|
||||
{
|
||||
DBUG_PRINT("maria_flush_bitmap", ("now"));
|
||||
flush_all_tables(2);
|
||||
});
|
||||
DBUG_EXECUTE_IF("maria_flush_whole_page_cache",
|
||||
{
|
||||
DBUG_PRINT("maria_flush_whole_page_cache", ("now"));
|
||||
|
@ -447,8 +452,8 @@ void ma_checkpoint_end(void)
|
|||
|
||||
We flush data/index pages which have been dirty since the previous
|
||||
checkpoint (this is the two-checkpoint rule: the REDO phase will not have
|
||||
to start from earlier than the next-to-last checkpoint), and all dirty
|
||||
bitmap pages.
|
||||
to start from earlier than the next-to-last checkpoint).
|
||||
Bitmap pages are handled by _ma_bitmap_flush_all().
|
||||
|
||||
@param type Page's type
|
||||
@param pageno Page's number
|
||||
|
@ -458,21 +463,20 @@ void ma_checkpoint_end(void)
|
|||
|
||||
static enum pagecache_flush_filter_result
|
||||
filter_flush_file_medium(enum pagecache_page_type type,
|
||||
pgcache_page_no_t pageno,
|
||||
pgcache_page_no_t pageno __attribute__ ((unused)),
|
||||
LSN rec_lsn, void *arg)
|
||||
{
|
||||
struct st_filter_param *param= (struct st_filter_param *)arg;
|
||||
return ((type == PAGECACHE_LSN_PAGE) &&
|
||||
(cmp_translog_addr(rec_lsn, param->up_to_lsn) <= 0)) ||
|
||||
(param->is_data_file &&
|
||||
((pageno % param->pages_covered_by_bitmap) == 0));
|
||||
return (type == PAGECACHE_LSN_PAGE) &&
|
||||
(cmp_translog_addr(rec_lsn, param->up_to_lsn) <= 0);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
@brief dirty-page filtering criteria for FULL checkpoint.
|
||||
|
||||
We flush all dirty data/index pages and all dirty bitmap pages.
|
||||
We flush all dirty data/index pages.
|
||||
Bitmap pages are handled by _ma_bitmap_flush_all().
|
||||
|
||||
@param type Page's type
|
||||
@param pageno Page's number
|
||||
|
@ -482,39 +486,11 @@ filter_flush_file_medium(enum pagecache_page_type type,
|
|||
|
||||
static enum pagecache_flush_filter_result
|
||||
filter_flush_file_full(enum pagecache_page_type type,
|
||||
pgcache_page_no_t pageno,
|
||||
pgcache_page_no_t pageno __attribute__ ((unused)),
|
||||
LSN rec_lsn __attribute__ ((unused)),
|
||||
void *arg)
|
||||
void *arg __attribute__ ((unused)))
|
||||
{
|
||||
struct st_filter_param *param= (struct st_filter_param *)arg;
|
||||
return (type == PAGECACHE_LSN_PAGE) ||
|
||||
(param->is_data_file &&
|
||||
((pageno % param->pages_covered_by_bitmap) == 0));
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
@brief dirty-page filtering criteria for INDIRECT checkpoint.
|
||||
|
||||
We flush all dirty bitmap pages.
|
||||
|
||||
@param type Page's type
|
||||
@param pageno Page's number
|
||||
@param rec_lsn Page's rec_lsn
|
||||
@param arg filter_param
|
||||
*/
|
||||
|
||||
static enum pagecache_flush_filter_result
|
||||
filter_flush_file_indirect(enum pagecache_page_type type
|
||||
__attribute__ ((unused)),
|
||||
pgcache_page_no_t pageno,
|
||||
LSN rec_lsn __attribute__ ((unused)),
|
||||
void *arg)
|
||||
{
|
||||
struct st_filter_param *param= (struct st_filter_param *)arg;
|
||||
return
|
||||
(param->is_data_file &&
|
||||
((pageno % param->pages_covered_by_bitmap) == 0));
|
||||
return (type == PAGECACHE_LSN_PAGE);
|
||||
}
|
||||
|
||||
|
||||
|
@ -526,6 +502,8 @@ filter_flush_file_indirect(enum pagecache_page_type type
|
|||
to start from earlier than the next-to-last checkpoint), and no
|
||||
bitmap pages. But we flush no more than a certain number of pages (to have
|
||||
an even flushing, no write burst).
|
||||
The reason to not flush bitmap pages is that they may not be in a flushable
|
||||
state at this moment and we don't want to wait for them.
|
||||
|
||||
@param type Page's type
|
||||
@param pageno Page's number
|
||||
|
@ -574,9 +552,11 @@ pthread_handler_t ma_checkpoint_background(void *arg)
|
|||
about the interval's value when it started.
|
||||
*/
|
||||
const ulong interval= (ulong)arg;
|
||||
uint sleeps;
|
||||
TRANSLOG_ADDRESS log_horizon_at_last_checkpoint= LSN_IMPOSSIBLE;
|
||||
ulonglong pagecache_flushes_at_last_checkpoint= 0;
|
||||
uint sleeps, sleep_time;
|
||||
TRANSLOG_ADDRESS log_horizon_at_last_checkpoint=
|
||||
translog_get_horizon();
|
||||
ulonglong pagecache_flushes_at_last_checkpoint=
|
||||
maria_pagecache->global_cache_write;
|
||||
uint pages_bunch_size;
|
||||
struct st_filter_param filter_param;
|
||||
PAGECACHE_FILE *dfile; /**< data file currently being flushed */
|
||||
|
@ -602,7 +582,7 @@ pthread_handler_t ma_checkpoint_background(void *arg)
|
|||
sleeps=0;
|
||||
#endif
|
||||
struct timespec abstime;
|
||||
switch((sleeps++) % interval)
|
||||
switch (sleeps % interval)
|
||||
{
|
||||
case 0:
|
||||
/*
|
||||
|
@ -626,8 +606,10 @@ pthread_handler_t ma_checkpoint_background(void *arg)
|
|||
{
|
||||
/* don't take checkpoint, so don't know what to flush */
|
||||
pages_to_flush_before_next_checkpoint= 0;
|
||||
sleep_time= interval;
|
||||
break;
|
||||
}
|
||||
sleep_time= 1;
|
||||
ma_checkpoint_execute(CHECKPOINT_MEDIUM, TRUE);
|
||||
/*
|
||||
Snapshot this kind of "state" of the engine. Note that the value below
|
||||
|
@ -653,11 +635,11 @@ pthread_handler_t ma_checkpoint_background(void *arg)
|
|||
default:
|
||||
if (pages_bunch_size > 0)
|
||||
{
|
||||
DBUG_PRINT("info", ("Maria background checkpoint thread: %u pages",
|
||||
pages_bunch_size));
|
||||
DBUG_PRINT("checkpoint",
|
||||
("Maria background checkpoint thread: %u pages",
|
||||
pages_bunch_size));
|
||||
/* flush a bunch of dirty pages */
|
||||
filter_param.max_pages= pages_bunch_size;
|
||||
filter_param.is_data_file= TRUE;
|
||||
while (dfile != dfiles_end)
|
||||
{
|
||||
/*
|
||||
|
@ -683,7 +665,6 @@ pthread_handler_t ma_checkpoint_background(void *arg)
|
|||
we wrote enough pages.
|
||||
*/
|
||||
}
|
||||
filter_param.is_data_file= FALSE;
|
||||
while (kfile != kfiles_end)
|
||||
{
|
||||
int res=
|
||||
|
@ -697,6 +678,12 @@ pthread_handler_t ma_checkpoint_background(void *arg)
|
|||
break; /* and we will continue with the same file */
|
||||
kfile++; /* otherwise all this file is flushed, move to next file */
|
||||
}
|
||||
sleep_time= 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Can directly sleep until the next checkpoint moment */
|
||||
sleep_time= interval - (sleeps % interval);
|
||||
}
|
||||
}
|
||||
pthread_mutex_lock(&LOCK_checkpoint);
|
||||
|
@ -708,12 +695,14 @@ pthread_handler_t ma_checkpoint_background(void *arg)
|
|||
pthread_mutex_lock(&LOCK_checkpoint);
|
||||
#else
|
||||
/* To have a killable sleep, we use timedwait like our SQL GET_LOCK() */
|
||||
set_timespec(abstime, 1);
|
||||
DBUG_PRINT("info", ("sleeping %u seconds", sleep_time));
|
||||
set_timespec(abstime, sleep_time);
|
||||
pthread_cond_timedwait(&COND_checkpoint, &LOCK_checkpoint, &abstime);
|
||||
#endif
|
||||
if (checkpoint_thread_die == 1)
|
||||
break;
|
||||
pthread_mutex_unlock(&LOCK_checkpoint);
|
||||
sleeps+= sleep_time;
|
||||
}
|
||||
pthread_mutex_unlock(&LOCK_checkpoint);
|
||||
DBUG_PRINT("info",("Maria background checkpoint thread ends"));
|
||||
|
@ -855,7 +844,7 @@ static int collect_tables(LEX_STRING *str, LSN checkpoint_start_log_horizon)
|
|||
filter= &filter_flush_file_full;
|
||||
break;
|
||||
case CHECKPOINT_INDIRECT:
|
||||
filter= &filter_flush_file_indirect;
|
||||
filter= NULL;
|
||||
break;
|
||||
default:
|
||||
DBUG_ASSERT(0);
|
||||
|
@ -888,6 +877,7 @@ static int collect_tables(LEX_STRING *str, LSN checkpoint_start_log_horizon)
|
|||
{
|
||||
MARIA_SHARE *share= distinct_shares[i];
|
||||
PAGECACHE_FILE kfile, dfile;
|
||||
my_bool ignore_share;
|
||||
if (!(share->in_checkpoint & MARIA_CHECKPOINT_LOOKS_AT_ME))
|
||||
{
|
||||
/* No need for a mutex to read the above, only us can write this flag */
|
||||
|
@ -957,7 +947,6 @@ static int collect_tables(LEX_STRING *str, LSN checkpoint_start_log_horizon)
|
|||
for ( ; state_copy->index != i; state_copy++)
|
||||
DBUG_ASSERT(state_copy < state_copies_end);
|
||||
|
||||
filter_param.pages_covered_by_bitmap= share->bitmap.pages_covered;
|
||||
/* OS file descriptors are ints which we stored in 4 bytes */
|
||||
compile_time_assert(sizeof(int) <= 4);
|
||||
pthread_mutex_lock(&share->intern_lock);
|
||||
|
@ -978,7 +967,9 @@ static int collect_tables(LEX_STRING *str, LSN checkpoint_start_log_horizon)
|
|||
onto a newer one (assuming the table has been reopened with a different
|
||||
share but of course same physical index file).
|
||||
*/
|
||||
if ((share->id != 0) && (share->last_version != 0))
|
||||
ignore_share= (share->id == 0) | (share->last_version == 0);
|
||||
DBUG_PRINT("info", ("ignore_share: %d", ignore_share));
|
||||
if (!ignore_share)
|
||||
{
|
||||
/** @todo avoid strlen */
|
||||
uint open_file_name_len= strlen(share->open_file_name) + 1;
|
||||
|
@ -1061,14 +1052,12 @@ static int collect_tables(LEX_STRING *str, LSN checkpoint_start_log_horizon)
|
|||
each checkpoint if the table was once written and then not anymore.
|
||||
*/
|
||||
}
|
||||
/**
|
||||
@todo RECOVERY BUG this is going to flush the bitmap page possibly to
|
||||
disk even though it could be over-allocated with not yet any
|
||||
REDO-UNDO complete group (WAL violation: no way to undo the
|
||||
over-allocation if crash); see also _ma_change_bitmap_page().
|
||||
*/
|
||||
sync_error|=
|
||||
_ma_bitmap_flush(share); /* after that, all is in page cache */
|
||||
if (_ma_bitmap_flush_all(share))
|
||||
{
|
||||
sync_error= 1;
|
||||
/** @todo all write failures should mark table corrupted */
|
||||
ma_message_no_user(0, "checkpoint bitmap page flush failed");
|
||||
}
|
||||
DBUG_ASSERT(share->pagecache == maria_pagecache);
|
||||
}
|
||||
if (share->in_checkpoint & MARIA_CHECKPOINT_SHOULD_FREE_ME)
|
||||
|
@ -1135,37 +1124,33 @@ static int collect_tables(LEX_STRING *str, LSN checkpoint_start_log_horizon)
|
|||
the evicter will fail to write their page: corruption.
|
||||
*/
|
||||
|
||||
/*
|
||||
We do NOT use FLUSH_KEEP_LAZY because we must be sure that bitmap pages
|
||||
have been flushed. That's a condition of correctness of Recovery: data
|
||||
pages may have been all flushed, if we write the checkpoint record
|
||||
Recovery will start from after their REDOs. If bitmap page was not
|
||||
flushed, as the REDOs about it will be skipped, it will wrongly not be
|
||||
recovered. If bitmap pages had a rec_lsn it would be different.
|
||||
*/
|
||||
if ((filter_param.is_data_file= TRUE),
|
||||
(flush_pagecache_blocks_with_filter(maria_pagecache,
|
||||
&dfile, FLUSH_KEEP,
|
||||
filter, &filter_param) &
|
||||
PCFLUSH_ERROR))
|
||||
ma_message_no_user(0, "checkpoint data page flush failed");
|
||||
if ((filter_param.is_data_file= FALSE),
|
||||
(flush_pagecache_blocks_with_filter(maria_pagecache,
|
||||
&kfile, FLUSH_KEEP,
|
||||
filter, &filter_param) &
|
||||
PCFLUSH_ERROR))
|
||||
ma_message_no_user(0, "checkpoint index page flush failed");
|
||||
if (!ignore_share)
|
||||
{
|
||||
if (filter != NULL)
|
||||
{
|
||||
if ((flush_pagecache_blocks_with_filter(maria_pagecache,
|
||||
&dfile, FLUSH_KEEP_LAZY,
|
||||
filter, &filter_param) &
|
||||
PCFLUSH_ERROR))
|
||||
ma_message_no_user(0, "checkpoint data page flush failed");
|
||||
if ((flush_pagecache_blocks_with_filter(maria_pagecache,
|
||||
&kfile, FLUSH_KEEP_LAZY,
|
||||
filter, &filter_param) &
|
||||
PCFLUSH_ERROR))
|
||||
ma_message_no_user(0, "checkpoint index page flush failed");
|
||||
}
|
||||
/*
|
||||
fsyncs the fd, that's the loooong operation (e.g. max 150 fsync
|
||||
per second, so if you have touched 1000 files it's 7 seconds).
|
||||
*/
|
||||
sync_error|=
|
||||
my_sync(dfile.file, MYF(MY_WME | MY_IGNORE_BADFD)) |
|
||||
my_sync(kfile.file, MYF(MY_WME | MY_IGNORE_BADFD));
|
||||
/*
|
||||
in case of error, we continue because writing other tables to disk is
|
||||
still useful.
|
||||
*/
|
||||
sync_error|=
|
||||
my_sync(dfile.file, MYF(MY_WME | MY_IGNORE_BADFD)) |
|
||||
my_sync(kfile.file, MYF(MY_WME | MY_IGNORE_BADFD));
|
||||
/*
|
||||
in case of error, we continue because writing other tables to disk is
|
||||
still useful.
|
||||
*/
|
||||
}
|
||||
}
|
||||
|
||||
if (sync_error)
|
||||
|
|
|
@ -51,12 +51,6 @@ int ma_commit(TRN *trn)
|
|||
So we need to go the first way.
|
||||
*/
|
||||
|
||||
/**
|
||||
@todo RECOVERY share's state is written to disk only in
|
||||
maria_lock_database(), so COMMIT record is not the last record of the
|
||||
transaction! It is probably an issue. Recovery of the state is a problem
|
||||
not yet solved.
|
||||
*/
|
||||
/*
|
||||
We do not store "thd->transaction.xid_state.xid" for now, it will be
|
||||
needed only when we support XA.
|
||||
|
|
|
@ -175,7 +175,7 @@ my_bool write_hook_for_clr_end(enum translog_record_type type
|
|||
|
||||
|
||||
/**
|
||||
@brief write hook for undo key insert
|
||||
@brief write hook for undo key
|
||||
*/
|
||||
|
||||
my_bool write_hook_for_undo_key(enum translog_record_type type,
|
||||
|
|
|
@ -389,8 +389,6 @@ static LOG_DESC INIT_LOGREC_REDO_NOT_USED=
|
|||
{LOGRECTYPE_VARIABLE_LENGTH, 0, 8, NULL, write_hook_for_redo, NULL, 0,
|
||||
"redo_insert_row_blob", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
|
||||
|
||||
/** @todo RECOVERY BUG handle it in recovery */
|
||||
/*QQ:TODO:header???*/
|
||||
static LOG_DESC INIT_LOGREC_REDO_INSERT_ROW_BLOBS=
|
||||
{LOGRECTYPE_VARIABLE_LENGTH, 0, FILEID_STORE_SIZE, NULL,
|
||||
write_hook_for_redo, NULL, 0,
|
||||
|
|
|
@ -1100,7 +1100,6 @@ uint _ma_state_info_write(MARIA_SHARE *share, uint pWrite)
|
|||
|
||||
uint _ma_state_info_write_sub(File file, MARIA_STATE_INFO *state, uint pWrite)
|
||||
{
|
||||
/** @todo RECOVERY write it only at checkpoint time */
|
||||
uchar buff[MARIA_STATE_INFO_SIZE + MARIA_STATE_EXTRA_SIZE];
|
||||
uchar *ptr=buff;
|
||||
uint i, keys= (uint) state->header.keys;
|
||||
|
@ -1143,7 +1142,6 @@ uint _ma_state_info_write_sub(File file, MARIA_STATE_INFO *state, uint pWrite)
|
|||
{
|
||||
mi_sizestore(ptr,state->key_root[i]); ptr+= 8;
|
||||
}
|
||||
/** @todo RECOVERY BUG key_del is a problem for recovery */
|
||||
mi_sizestore(ptr,state->key_del); ptr+= 8;
|
||||
if (pWrite & 2) /* From maria_chk */
|
||||
{
|
||||
|
|
|
@ -601,6 +601,10 @@ static uint pagecache_fwrite(PAGECACHE *pagecache,
|
|||
{
|
||||
DBUG_ENTER("pagecache_fwrite");
|
||||
DBUG_ASSERT(type != PAGECACHE_READ_UNKNOWN_PAGE);
|
||||
/**
|
||||
@todo RECOVERY BUG Here, we should call a callback get_lsn(): it will use
|
||||
lsn_korr() for LSN pages, and translog_get_horizon() for bitmap pages.
|
||||
*/
|
||||
if (type == PAGECACHE_LSN_PAGE)
|
||||
{
|
||||
LSN lsn;
|
||||
|
@ -4185,18 +4189,8 @@ my_bool pagecache_collect_changed_blocks_with_lsn(PAGECACHE *pagecache,
|
|||
*/
|
||||
DBUG_ASSERT(block->hash_link != NULL);
|
||||
DBUG_ASSERT(block->status & PCBLOCK_CHANGED);
|
||||
/**
|
||||
@todo RECOVERY BUG
|
||||
REDO phase uses PAGECACHE_PLAIN_PAGE, so the lines below would
|
||||
confuse the indirect Checkpoint taken at the end of the REDO phase.
|
||||
So we below collect even dirty pages of temporary tables as a result
|
||||
:( Soon we should have the MARIA_SHARE accessible from the
|
||||
pagecache's block and then we can test born_transactional.
|
||||
*/
|
||||
#ifdef TRANS_TABLES_ALWAYS_USE_LSN_PAGE
|
||||
if (block->type != PAGECACHE_LSN_PAGE)
|
||||
continue; /* no need to store it */
|
||||
#endif
|
||||
stored_list_size++;
|
||||
}
|
||||
}
|
||||
|
@ -4221,10 +4215,8 @@ my_bool pagecache_collect_changed_blocks_with_lsn(PAGECACHE *pagecache,
|
|||
block;
|
||||
block= block->next_changed)
|
||||
{
|
||||
#ifdef TRANS_TABLES_ALWAYS_USE_LSN_PAGE
|
||||
if (block->type != PAGECACHE_LSN_PAGE)
|
||||
continue; /* no need to store it in the checkpoint record */
|
||||
#endif
|
||||
compile_time_assert(sizeof(block->hash_link->file.file) <= 4);
|
||||
compile_time_assert(sizeof(block->hash_link->pageno) <= 4);
|
||||
int4store(ptr, block->hash_link->file.file);
|
||||
|
|
|
@ -348,11 +348,14 @@ int maria_apply_log(LSN from_lsn, enum maria_apply_log_way apply,
|
|||
REDO phase does not fill blocks' rec_lsn, so a checkpoint now would be
|
||||
wrong: if a future recovery used it, the REDO phase would always
|
||||
start from the checkpoint and never from before, wrongly skipping REDOs
|
||||
(tested).
|
||||
(tested). Another problem is that the REDO phase uses
|
||||
PAGECACHE_PLAIN_PAGE, while Checkpoint only collects PAGECACHE_LSN_PAGE.
|
||||
|
||||
@todo fix this; pagecache_write() now can have a rec_lsn argument.
|
||||
@todo fix this. pagecache_write() now can have a rec_lsn argument. And we
|
||||
could make a function which goes through pages at end of REDO phase and
|
||||
changes their type.
|
||||
*/
|
||||
#if 0
|
||||
#ifdef FIX_AND_ENABLE_LATER
|
||||
if (take_checkpoints && checkpoint_useful)
|
||||
{
|
||||
/*
|
||||
|
@ -478,14 +481,11 @@ prototype_redo_exec_hook(LONG_TRANSACTION_ID)
|
|||
{
|
||||
uint16 sid= rec->short_trid;
|
||||
TrID long_trid= all_active_trans[sid].long_trid;
|
||||
/* abort group of this trn (must be of before a crash) */
|
||||
LSN gslsn= all_active_trans[sid].group_start_lsn;
|
||||
if (gslsn != LSN_IMPOSSIBLE)
|
||||
{
|
||||
tprint(tracef, "Group at LSN (%lu,0x%lx) short_trid %u incomplete\n",
|
||||
LSN_IN_PARTS(gslsn), sid);
|
||||
all_active_trans[sid].group_start_lsn= LSN_IMPOSSIBLE;
|
||||
}
|
||||
/*
|
||||
Any incomplete group should be of an old crash which already had a
|
||||
recovery and thus has logged INCOMPLETE_GROUP which we must have seen.
|
||||
*/
|
||||
DBUG_ASSERT(all_active_trans[sid].group_start_lsn == LSN_IMPOSSIBLE);
|
||||
if (long_trid != 0)
|
||||
{
|
||||
LSN ulsn= all_active_trans[sid].undo_lsn;
|
||||
|
@ -1160,6 +1160,7 @@ static int new_table(uint16 sid, const char *name,
|
|||
}
|
||||
if (maria_is_crashed(info))
|
||||
{
|
||||
/** @todo what should we do? how to continue recovery? */
|
||||
tprint(tracef, "Table is crashed, can't apply log records to it\n");
|
||||
goto end;
|
||||
}
|
||||
|
@ -1566,10 +1567,6 @@ prototype_redo_exec_hook(UNDO_ROW_INSERT)
|
|||
}
|
||||
share->state.state.checksum+= ha_checksum_korr(buff);
|
||||
}
|
||||
/**
|
||||
@todo some bits below will rather be set when executing UNDOs related
|
||||
to keys
|
||||
*/
|
||||
info->s->state.changed|= STATE_CHANGED | STATE_NOT_ANALYZED;
|
||||
}
|
||||
tprint(tracef, " rows' count %lu\n", (ulong)info->s->state.state.records);
|
||||
|
@ -1605,8 +1602,8 @@ prototype_redo_exec_hook(UNDO_ROW_DELETE)
|
|||
}
|
||||
share->state.state.checksum+= ha_checksum_korr(buff);
|
||||
}
|
||||
share->state.changed|= (STATE_CHANGED | STATE_NOT_ANALYZED |
|
||||
STATE_NOT_OPTIMIZED_ROWS);
|
||||
share->state.changed|= STATE_CHANGED | STATE_NOT_ANALYZED |
|
||||
STATE_NOT_OPTIMIZED_ROWS;
|
||||
}
|
||||
tprint(tracef, " rows' count %lu\n", (ulong)share->state.state.records);
|
||||
_ma_unpin_all_pages(info, rec->lsn);
|
||||
|
@ -1743,6 +1740,7 @@ prototype_redo_exec_hook(COMMIT)
|
|||
{
|
||||
tprint(tracef, "We don't know about transaction with short_trid %u;"
|
||||
"it probably committed long ago, forget it\n", sid);
|
||||
bzero(&all_active_trans[sid], sizeof(all_active_trans[sid]));
|
||||
return 0;
|
||||
}
|
||||
llstr(long_trid, llbuf);
|
||||
|
@ -1792,6 +1790,7 @@ prototype_redo_exec_hook(CLR_END)
|
|||
break;
|
||||
case LOGREC_UNDO_ROW_INSERT:
|
||||
share->state.state.records--;
|
||||
share->state.changed|= STATE_NOT_OPTIMIZED_ROWS;
|
||||
row_entry= 1;
|
||||
break;
|
||||
case LOGREC_UNDO_ROW_UPDATE:
|
||||
|
@ -1865,7 +1864,8 @@ prototype_undo_exec_hook(UNDO_ROW_INSERT)
|
|||
return 1;
|
||||
}
|
||||
share= info->s;
|
||||
share->state.changed|= STATE_CHANGED | STATE_NOT_ANALYZED;
|
||||
share->state.changed|= STATE_CHANGED | STATE_NOT_ANALYZED |
|
||||
STATE_NOT_OPTIMIZED_ROWS;
|
||||
|
||||
record_ptr= rec->header;
|
||||
if (share->calc_checksum)
|
||||
|
@ -2205,8 +2205,9 @@ static int run_redo_phase(LSN lsn, enum maria_apply_log_way apply)
|
|||
if (log_desc->record_in_group == LOGREC_IS_GROUP_ITSELF)
|
||||
{
|
||||
/*
|
||||
can happen if the transaction got a table write error, then
|
||||
unlocked tables thus wrote a COMMIT record.
|
||||
Can happen if the transaction got a table write error, then
|
||||
unlocked tables thus wrote a COMMIT record. Or can be an
|
||||
INCOMPLETE_GROUP record written by a previous recovery.
|
||||
*/
|
||||
tprint(tracef, "\nDiscarding incomplete group before this record\n");
|
||||
all_active_trans[sid].group_start_lsn= LSN_IMPOSSIBLE;
|
||||
|
@ -2677,6 +2678,8 @@ static LSN parse_checkpoint_record(LSN lsn)
|
|||
tprint(tracef, "%u active transactions\n", nb_active_transactions);
|
||||
LSN minimum_rec_lsn_of_active_transactions= lsn_korr(ptr);
|
||||
ptr+= LSN_STORE_SIZE;
|
||||
max_long_trid= transid_korr(ptr);
|
||||
ptr+= TRANSID_SIZE;
|
||||
|
||||
/*
|
||||
how much brain juice and discussions there was to come to writing this
|
||||
|
|
|
@ -104,8 +104,8 @@ int main(int argc, char **argv)
|
|||
maria_init();
|
||||
|
||||
/*
|
||||
If we are doing a repair and we have requested logging (on by default),
|
||||
enable transaction log handling.
|
||||
If we are doing a repair, user may want to store this repair into the log
|
||||
so that the log has a complete history and can be used to replay.
|
||||
*/
|
||||
if (opt_transaction_logging && (check_param.testflag & T_REP_ANY) &&
|
||||
(ma_control_file_create_or_open() ||
|
||||
|
|
|
@ -217,16 +217,19 @@ typedef struct st_maria_file_bitmap
|
|||
ulonglong page; /* Page number for current bitmap */
|
||||
uint used_size; /* Size of bitmap head that is not 0 */
|
||||
my_bool changed; /* 1 if page needs to be flushed */
|
||||
my_bool flushable; /**< If bitmap and log are in sync */
|
||||
PAGECACHE_FILE file; /* datafile where bitmap is stored */
|
||||
|
||||
#ifdef THREAD
|
||||
pthread_mutex_t bitmap_lock;
|
||||
pthread_cond_t bitmap_cond; /**< When bitmap becomes flushable */
|
||||
#endif
|
||||
/* Constants, allocated when initiating bitmaps */
|
||||
uint sizes[8]; /* Size per bit combination */
|
||||
uint total_size; /* Total usable size of bitmap page */
|
||||
uint block_size; /* Block size of file */
|
||||
ulong pages_covered; /* Pages covered by bitmap + 1 */
|
||||
DYNAMIC_ARRAY pinned_pages; /**< not-yet-flushable bitmap pages */
|
||||
} MARIA_FILE_BITMAP;
|
||||
|
||||
#define MARIA_CHECKPOINT_LOOKS_AT_ME 1
|
||||
|
@ -511,7 +514,6 @@ struct st_maria_handler
|
|||
|
||||
#define USE_WHOLE_KEY 65535 /* Use whole key in _search() */
|
||||
#define F_EXTRA_LCK -1
|
||||
#define TRANSID_SIZE 6
|
||||
|
||||
/* bits in opt_flag */
|
||||
#define MEMMAP_USED 32
|
||||
|
|
|
@ -598,6 +598,7 @@ my_bool trnman_collect_transactions(LEX_STRING *str_act, LEX_STRING *str_com,
|
|||
pthread_mutex_lock(&LOCK_trn_list);
|
||||
str_act->length= 2 + /* number of active transactions */
|
||||
LSN_STORE_SIZE + /* minimum of their rec_lsn */
|
||||
TRANSID_SIZE + /* current TrID generator value */
|
||||
(2 + /* short id */
|
||||
6 + /* long id */
|
||||
LSN_STORE_SIZE + /* undo_lsn */
|
||||
|
@ -618,6 +619,8 @@ my_bool trnman_collect_transactions(LEX_STRING *str_act, LEX_STRING *str_com,
|
|||
goto err;
|
||||
/* First, the active transactions */
|
||||
ptr= str_act->str + 2 + LSN_STORE_SIZE;
|
||||
transid_store(ptr, global_trid_generator);
|
||||
ptr+= TRANSID_SIZE;
|
||||
for (trn= active_list_min.next; trn != &active_list_max; trn= trn->next)
|
||||
{
|
||||
/*
|
||||
|
|
|
@ -55,6 +55,8 @@ my_bool trnman_has_locked_tables(TRN *trn);
|
|||
void trnman_reset_locked_tables(TRN *trn);
|
||||
TRN *trnman_recreate_trn_from_recovery(uint16 shortid, TrID longid);
|
||||
TRN *trnman_get_any_trn();
|
||||
|
||||
#define TRANSID_SIZE 6
|
||||
#define transid_store(dst, id) int6store(dst,id)
|
||||
#define transid_korr(P) uint6korr(P)
|
||||
C_MODE_END
|
||||
#endif
|
||||
|
|
Loading…
Add table
Reference in a new issue