Fixes for redo/undo logging of key pages

New extendable format for maria_log_control file
Fixed some compiler warnings


include/maria.h:
  Added maria_disable_logging() and maria_enable_logging()
mysql-test/include/maria_verify_recovery.inc:
  Updated tests now when key redo/undo works
mysql-test/r/maria-recovery.result:
  Updated tests now when key redo/undo works
storage/maria/ma_blockrec.c:
  Use unified CLR code
  Added rec_lsn for full pages
  Moved clr write hook to ma_key_recover.c
  Changed REDO code to keep pages pinned until undo
  Mark page_link's as changed
storage/maria/ma_blockrec.h:
  Moved write_hook_for_clr_end() to ma_key_recover.c
storage/maria/ma_check.c:
  Changed key check code to use PAGECACHE_READ_UNKNOWN_PAGE
  Fixed wrong warning when checking files after maria_pack
  When unpacking files, we have to use new keypos_to_recpos method
  When doing repair, we can disregard index key file pages in page cache
storage/maria/ma_commit.c:
  Added simple enable/disable logging functions
  (Needed for recovery)
storage/maria/ma_control_file.c:
  Make maria control file extendable without having to make it incompatible for older versions
storage/maria/ma_control_file.h:
  New error messages
  Added CONTROL_FILE_VERSION
storage/maria/ma_delete.c:
  Added redo/undo for key pages
  change_length -> changed_length to make things similar
  More comments & more DBUG
storage/maria/ma_key_recover.c:
  Unified CLR method
  Moved here write_hook_for_clr_end() and common keypage log functions
  Changed REDO to keep pages pinned until undo
  Changed UNDO code to change key_root under log mutex
storage/maria/ma_key_recover.h:
  New structures and functions
storage/maria/ma_loghandler.c:
  Include needed files
storage/maria/ma_open.c:
  Change maria_open() to use pread() instead of read()
storage/maria/ma_page.c:
  Fixed bug in key_del handling
  Clear pages if IDENTICAL_PAGES_AFTER_RECOVERY is defined
storage/maria/ma_pagecache.c:
  Indentation and spelling fixes
  More DBUG
  Added helper function: pagecache_block_link_to_buffer()
storage/maria/ma_pagecache.h:
  Added pagecache_block_link_to_buffer()
storage/maria/ma_recovery.c:
  Fixed state.changed
  Fixed that REDO keeps pages pinned until UNDO
  Some bug fixes from previous commit
  Fixes for UNDO/REDO of key pages
storage/maria/ma_search.c:
  Fixed packing and storing of keys to provide more information to caller so
  that we can do efficent REDO logging of the changes.
storage/maria/ma_test1.c:
  Fixed bug with not initialized variable
storage/maria/ma_test2.c:
  Removed not used code
storage/maria/ma_test_all.res:
  Updated results
storage/maria/ma_test_all.sh:
  Changed one test to test more
  Removed timing tests as not relevant here
storage/maria/ma_test_recovery.expected:
  Updated test result after redo/undo if key pages works
storage/maria/ma_test_recovery:
  Updated test after redo/undo if key pages works
storage/maria/ma_write.c:
  Moved some general log functions to ma_key_recover.c
  Fixed some bugs in undo
  Moved ma_log_split() to _ma_split_page()
  Small changes in some function arguments to be able to do redo logging
storage/maria/maria_chk.c:
  disable logging while doing repair table
storage/maria/maria_def.h:
  New function prototypes
  Move some structs and functions to ma_key_recover.c
storage/maria/unittest/ma_control_file-t.c:
  Updated with patch from Sanja
  NOTE: This is not complete and need to be updated to new control file format
storage/maria/unittest/ma_test_loghandler-t.c:
  Fixed compiler warning
This commit is contained in:
unknown 2007-11-20 17:42:16 +02:00
commit 6b3743f0aa
30 changed files with 1535 additions and 1904 deletions

View file

@ -306,6 +306,8 @@ extern int maria_delete_all_rows(MARIA_HA *info);
extern uint maria_get_pointer_length(ulonglong file_length, uint def);
extern int maria_commit(MARIA_HA *info);
extern int maria_begin(MARIA_HA *info);
extern void maria_disable_logging(MARIA_HA *info);
extern void maria_enable_logging(MARIA_HA *info);
/* this is used to pass to mysql_mariachk_table */

View file

@ -77,16 +77,15 @@ let $mms_purpose=comparison;
let $mms_compare_physically=$mms_compare_physically_save;
while ($mms_table_to_use)
{
# Todo: remove this REPAIR when we have index recovery working.
# It is a quick repair, so that it will fail if data file is corrupted.
--echo * rebuilding index (until we have recovery of index)
eval repair table t$mms_table_to_use quick;
eval check table t$mms_table_to_use extended;
--echo * testing that checksum after recovery is as expected
let $new_checksum=`CHECKSUM TABLE t$mms_table_to_use`;
let $old_checksum=`CHECKSUM TABLE mysqltest_for_$mms_purpose.t$mms_table_to_use`;
# the $ text variables above are of the form "db.tablename\tchecksum",
# as db differs, we use substring().
eval select if(substring("$new_checksum",instr("$new_checksum",".t1")) = substring("$old_checksum",instr("$old_checksum",".t1")),"ok","failure");
--disable_query_log
eval select if(substring("$new_checksum",instr("$new_checksum",".t1")) = substring("$old_checksum",instr("$old_checksum",".t1")),"ok","failure") as "Checksum-check";
--enable_query_log
# this script may compare physically or do nothing
-- source include/maria_make_snapshot.inc
dec $mms_table_to_use;

View file

@ -15,13 +15,11 @@ set global maria_checkpoint_interval=1;
ERROR HY000: Lost connection to MySQL server during query
* copied t1 back for feeding_recovery
* recovery happens
* rebuilding index (until we have recovery of index)
repair table t1 quick;
check table t1 extended;
Table Op Msg_type Msg_text
mysqltest.t1 repair status OK
mysqltest.t1 check status OK
* testing that checksum after recovery is as expected
select if(substring("mysqltest.t1 488070860",instr("mysqltest.t1 488070860",".t1")) = substring("mysqltest_for_comparison.t1 488070860",instr("mysqltest_for_comparison.t1 488070860",".t1")),"ok","failure");
if(substring("mysqltest.t1 488070860",instr("mysqltest.t1 488070860",".t1")) = substring("mysqltest_for_comparison.t1 488070860",instr("mysqltest_for_comparison.t1 488070860",".t1")),"ok","failure")
Checksum-check
ok
* compared t1 to old version
use mysqltest;
@ -39,13 +37,11 @@ SET SESSION debug="+d,maria_crash";
set global maria_checkpoint_interval=1;
ERROR HY000: Lost connection to MySQL server during query
* recovery happens
* rebuilding index (until we have recovery of index)
repair table t1 quick;
check table t1 extended;
Table Op Msg_type Msg_text
mysqltest.t1 repair status OK
mysqltest.t1 check status OK
* testing that checksum after recovery is as expected
select if(substring("mysqltest.t1 976141720",instr("mysqltest.t1 976141720",".t1")) = substring("mysqltest_for_comparison.t1 976141720",instr("mysqltest_for_comparison.t1 976141720",".t1")),"ok","failure");
if(substring("mysqltest.t1 976141720",instr("mysqltest.t1 976141720",".t1")) = substring("mysqltest_for_comparison.t1 976141720",instr("mysqltest_for_comparison.t1 976141720",".t1")),"ok","failure")
Checksum-check
ok
use mysqltest;
select * from t1;
@ -62,13 +58,11 @@ SET SESSION debug="+d,maria_flush_whole_page_cache,maria_crash";
set global maria_checkpoint_interval=1;
ERROR HY000: Lost connection to MySQL server during query
* recovery happens
* rebuilding index (until we have recovery of index)
repair table t1 quick;
check table t1 extended;
Table Op Msg_type Msg_text
mysqltest.t1 repair status OK
mysqltest.t1 check status OK
* testing that checksum after recovery is as expected
select if(substring("mysqltest.t1 1464212580",instr("mysqltest.t1 1464212580",".t1")) = substring("mysqltest_for_comparison.t1 1464212580",instr("mysqltest_for_comparison.t1 1464212580",".t1")),"ok","failure");
if(substring("mysqltest.t1 1464212580",instr("mysqltest.t1 1464212580",".t1")) = substring("mysqltest_for_comparison.t1 1464212580",instr("mysqltest_for_comparison.t1 1464212580",".t1")),"ok","failure")
Checksum-check
ok
use mysqltest;
select * from t1;
@ -86,13 +80,11 @@ SET SESSION debug="+d,maria_flush_states,maria_flush_whole_log,maria_crash";
set global maria_checkpoint_interval=1;
ERROR HY000: Lost connection to MySQL server during query
* recovery happens
* rebuilding index (until we have recovery of index)
repair table t1 quick;
check table t1 extended;
Table Op Msg_type Msg_text
mysqltest.t1 repair status OK
mysqltest.t1 check status OK
* testing that checksum after recovery is as expected
select if(substring("mysqltest.t1 1952283440",instr("mysqltest.t1 1952283440",".t1")) = substring("mysqltest_for_comparison.t1 1952283440",instr("mysqltest_for_comparison.t1 1952283440",".t1")),"ok","failure");
if(substring("mysqltest.t1 1952283440",instr("mysqltest.t1 1952283440",".t1")) = substring("mysqltest_for_comparison.t1 1952283440",instr("mysqltest_for_comparison.t1 1952283440",".t1")),"ok","failure")
Checksum-check
ok
use mysqltest;
select * from t1;
@ -111,13 +103,11 @@ SET SESSION debug="+d,maria_flush_whole_log,maria_crash";
set global maria_checkpoint_interval=1;
ERROR HY000: Lost connection to MySQL server during query
* recovery happens
* rebuilding index (until we have recovery of index)
repair table t1 quick;
check table t1 extended;
Table Op Msg_type Msg_text
mysqltest.t1 repair status OK
mysqltest.t1 check status OK
* testing that checksum after recovery is as expected
select if(substring("mysqltest.t1 2440354300",instr("mysqltest.t1 2440354300",".t1")) = substring("mysqltest_for_comparison.t1 2440354300",instr("mysqltest_for_comparison.t1 2440354300",".t1")),"ok","failure");
if(substring("mysqltest.t1 2440354300",instr("mysqltest.t1 2440354300",".t1")) = substring("mysqltest_for_comparison.t1 2440354300",instr("mysqltest_for_comparison.t1 2440354300",".t1")),"ok","failure")
Checksum-check
ok
use mysqltest;
select * from t1;
@ -149,13 +139,11 @@ set global maria_checkpoint_interval=1;
ERROR HY000: Lost connection to MySQL server during query
* copied t1 back for feeding_recovery
* recovery happens
* rebuilding index (until we have recovery of index)
repair table t1 quick;
check table t1 extended;
Table Op Msg_type Msg_text
mysqltest.t1 repair status OK
mysqltest.t1 check status OK
* testing that checksum after recovery is as expected
select if(substring("mysqltest.t1 3472399915",instr("mysqltest.t1 3472399915",".t1")) = substring("mysqltest_for_comparison.t1 3472399915",instr("mysqltest_for_comparison.t1 3472399915",".t1")),"ok","failure");
if(substring("mysqltest.t1 3472399915",instr("mysqltest.t1 3472399915",".t1")) = substring("mysqltest_for_comparison.t1 3472399915",instr("mysqltest_for_comparison.t1 3472399915",".t1")),"ok","failure")
Checksum-check
ok
use mysqltest;
SELECT LENGTH(b) FROM t1 WHERE i=3;

View file

@ -1549,14 +1549,13 @@ static my_bool write_full_pages(MARIA_HA *info,
KEYPAGE_CHECKSUM_SIZE, (uchar) 255);
DBUG_ASSERT(share->pagecache->block_size == block_size);
/** @todo RECOVERY BUG the page does not get a rec_lsn with this! */
if (pagecache_write(share->pagecache,
&info->dfile, page, 0,
buff, share->page_type,
PAGECACHE_LOCK_LEFT_UNLOCKED,
PAGECACHE_PIN_LEFT_UNPINNED,
PAGECACHE_WRITE_DELAY,
0, LSN_IMPOSSIBLE))
0, info->trn->rec_lsn))
DBUG_RETURN(1);
page++;
block->used= BLOCKUSED_USED;
@ -2491,34 +2490,17 @@ static my_bool write_block_record(MARIA_HA *info,
if (undo_lsn != LSN_ERROR)
{
uchar log_data[LSN_STORE_SIZE + FILEID_STORE_SIZE +
CLR_TYPE_STORE_SIZE + HA_CHECKSUM_STORE_SIZE];
struct st_msg_to_write_hook_for_clr_end msg;
/* undo_lsn must be first for compression to work */
lsn_store(log_data, undo_lsn);
/*
Store if this CLR is about UNDO_DELETE or UNDO_UPDATE;
in the first case, Recovery, when it sees the CLR_END in the
REDO phase, may decrement the records' count.
*/
log_array[TRANSLOG_INTERNAL_PARTS + 0].str= (char*) log_data;
log_array[TRANSLOG_INTERNAL_PARTS + 0].length=
sizeof(log_data) - HA_CHECKSUM_STORE_SIZE;
msg.undone_record_type=
old_record ? LOGREC_UNDO_ROW_UPDATE : LOGREC_UNDO_ROW_DELETE;
clr_type_store(log_data + LSN_STORE_SIZE + FILEID_STORE_SIZE,
msg.undone_record_type);
msg.previous_undo_lsn= undo_lsn;
store_checksum_in_rec(share, msg.checksum_delta,
row->checksum - old_record_checksum,
log_data + LSN_STORE_SIZE + FILEID_STORE_SIZE +
CLR_TYPE_STORE_SIZE,
log_array[TRANSLOG_INTERNAL_PARTS + 0].length);
if (translog_write_record(&lsn, LOGREC_CLR_END,
info->trn, info,
log_array[TRANSLOG_INTERNAL_PARTS + 0].length,
TRANSLOG_INTERNAL_PARTS + 1, log_array,
log_data + LSN_STORE_SIZE, &msg))
if (_ma_write_clr(info, undo_lsn,
old_record ? LOGREC_UNDO_ROW_UPDATE :
LOGREC_UNDO_ROW_DELETE,
share->calc_checksum != 0,
row->checksum - old_record_checksum,
&lsn, (void*) 0))
goto disk_err;
}
else
@ -2804,29 +2786,11 @@ my_bool _ma_write_abort_block_record(MARIA_HA *info)
if (share->now_transactional)
{
LEX_STRING log_array[TRANSLOG_INTERNAL_PARTS + 1];
uchar log_data[LSN_STORE_SIZE + FILEID_STORE_SIZE +
CLR_TYPE_STORE_SIZE + HA_CHECKSUM_STORE_SIZE];
struct st_msg_to_write_hook_for_clr_end msg;
lsn_store(log_data, info->cur_row.orig_undo_lsn);
msg.previous_undo_lsn= info->cur_row.orig_undo_lsn;
msg.undone_record_type= LOGREC_UNDO_ROW_INSERT;
clr_type_store(log_data + LSN_STORE_SIZE + FILEID_STORE_SIZE,
LOGREC_UNDO_ROW_INSERT);
log_array[TRANSLOG_INTERNAL_PARTS + 0].str= (char*) log_data;
log_array[TRANSLOG_INTERNAL_PARTS + 0].length=
sizeof(log_data) - HA_CHECKSUM_STORE_SIZE;
store_checksum_in_rec(share, msg.checksum_delta,
- info->cur_row.checksum,
log_data + LSN_STORE_SIZE + FILEID_STORE_SIZE +
CLR_TYPE_STORE_SIZE,
log_array[TRANSLOG_INTERNAL_PARTS + 0].length);
if (translog_write_record(&lsn, LOGREC_CLR_END,
info->trn, info,
log_array[TRANSLOG_INTERNAL_PARTS + 0].length,
TRANSLOG_INTERNAL_PARTS + 1, log_array,
log_data + LSN_STORE_SIZE, &msg))
if (_ma_write_clr(info, info->cur_row.orig_undo_lsn,
LOGREC_UNDO_ROW_INSERT,
share->calc_checksum != 0,
-info->cur_row.checksum,
&lsn, (void*) 0))
res= 1;
}
_ma_unpin_all_pages_and_finalize_row(info, lsn);
@ -4970,49 +4934,6 @@ my_bool write_hook_for_undo_row_update(enum translog_record_type type
}
/**
@brief Sets transaction's undo_lsn, first_undo_lsn if needed
@return Operation status, always 0 (success)
*/
my_bool write_hook_for_clr_end(enum translog_record_type type
__attribute__ ((unused)),
TRN *trn, MARIA_HA *tbl_info
__attribute__ ((unused)),
LSN *lsn __attribute__ ((unused)),
void *hook_arg)
{
MARIA_SHARE *share= tbl_info->s;
struct st_msg_to_write_hook_for_clr_end *msg=
(struct st_msg_to_write_hook_for_clr_end *)hook_arg;
DBUG_ASSERT(trn->trid != 0);
trn->undo_lsn= msg->previous_undo_lsn;
switch (msg->undone_record_type) {
case LOGREC_UNDO_ROW_DELETE:
share->state.state.records++;
share->state.state.checksum+= msg->checksum_delta;
break;
case LOGREC_UNDO_ROW_INSERT:
share->state.state.records--;
share->state.state.checksum+= msg->checksum_delta;
break;
case LOGREC_UNDO_ROW_UPDATE:
share->state.state.checksum+= msg->checksum_delta;
break;
case LOGREC_UNDO_KEY_INSERT:
case LOGREC_UNDO_KEY_DELETE:
break;
default:
DBUG_ASSERT(0);
}
if (trn->undo_lsn == LSN_IMPOSSIBLE) /* has fully rolled back */
trn->first_undo_lsn= LSN_WITH_FLAGS_TO_FLAGS(trn->first_undo_lsn);
return 0;
}
/**
@brief Updates table's lsn_of_file_id.
@ -5066,6 +4987,7 @@ uint _ma_apply_redo_insert_row_head_or_tail(MARIA_HA *info, LSN lsn,
uint block_size= share->block_size;
uint rec_offset;
uchar *buff, *dir;
uint result;
MARIA_PINNED_PAGE page_link;
enum pagecache_page_lock unlock_method;
enum pagecache_page_pin unpin_method;
@ -5202,25 +5124,29 @@ uint _ma_apply_redo_insert_row_head_or_tail(MARIA_HA *info, LSN lsn,
int2store(buff + EMPTY_SPACE_OFFSET, empty_space);
/*
Write modified page. We don't update its LSN, and keep it pinned. When we
have processed all REDOs for this page in the current REDO's group, we
will stamp page with UNDO's LSN (if we stamped it now, a next REDO, in
If page was not read before, write it but keep it pinned.
We don't update its LSN When we have processed all REDOs for this page
in the current REDO's group, we will stamp page with UNDO's LSN
(if we stamped it now, a next REDO, in
this group, for this page, would be skipped) and unpin then.
*/
if (pagecache_write(share->pagecache,
result= 0;
if (unlock_method == PAGECACHE_LOCK_WRITE &&
pagecache_write(share->pagecache,
&info->dfile, page, 0,
buff, PAGECACHE_PLAIN_PAGE,
unlock_method, unpin_method,
PAGECACHE_WRITE_DELAY, &page_link.link,
LSN_IMPOSSIBLE))
DBUG_RETURN(my_errno);
result= my_errno;
page_link.unlock= PAGECACHE_LOCK_WRITE_UNLOCK;
page_link.changed= 1;
push_dynamic(&info->pinned_pages, (void*) &page_link);
/* Fix bitmap */
if (_ma_bitmap_set(info, page, page_type == HEAD_PAGE, empty_space))
DBUG_RETURN(my_errno);
result= my_errno;
/*
Data page and bitmap page are in place, we can update data_file_length in
@ -5232,7 +5158,7 @@ uint _ma_apply_redo_insert_row_head_or_tail(MARIA_HA *info, LSN lsn,
set_if_bigger(info->state->data_file_length, end_of_page);
}
DBUG_RETURN(0);
DBUG_RETURN(result);
err:
if (unlock_method == PAGECACHE_LOCK_LEFT_WRITELOCKED)
@ -5322,23 +5248,14 @@ uint _ma_apply_redo_purge_row_head_or_tail(MARIA_HA *info, LSN lsn,
if (delete_dir_entry(buff, block_size, rownr, &empty_space) < 0)
goto err;
page_link.unlock= PAGECACHE_LOCK_WRITE_UNLOCK;
page_link.changed= 1;
push_dynamic(&info->pinned_pages, (void*) &page_link);
result= 0;
if (pagecache_write(share->pagecache,
&info->dfile, page, 0,
buff, PAGECACHE_PLAIN_PAGE,
PAGECACHE_LOCK_LEFT_WRITELOCKED,
PAGECACHE_PIN_LEFT_PINNED,
PAGECACHE_WRITE_DELAY, 0,
LSN_IMPOSSIBLE))
/* This will work even if the page was marked as UNALLOCATED_PAGE */
if (_ma_bitmap_set(info, page, page_type == HEAD_PAGE, empty_space))
result= my_errno;
else
{
page_link.unlock= PAGECACHE_LOCK_WRITE_UNLOCK;
push_dynamic(&info->pinned_pages, (void*) &page_link);
/* This will work even if the page was marked as UNALLOCATED_PAGE */
if (_ma_bitmap_set(info, page, page_type == HEAD_PAGE, empty_space))
result= my_errno;
}
DBUG_RETURN(result);
@ -5387,10 +5304,8 @@ uint _ma_apply_redo_free_blocks(MARIA_HA *info,
header+= PAGE_STORE_SIZE;
/* Page range may have this bit set to indicate a tail page */
page_range= pagerange_korr(header) & ~TAIL_BIT;
/** @todo RECOVERY BUG enable this assertion when newer tree pulled */
#if 0
DBUG_ASSERT(page_range > 0);
#endif
header+= PAGERANGE_STORE_SIZE;
DBUG_PRINT("info", ("page: %lu pages: %u", (long) page, page_range));
@ -5466,13 +5381,10 @@ uint _ma_apply_redo_free_head_or_tail(MARIA_HA *info, LSN lsn,
bzero(dir, number_of_records * DIR_ENTRY_SIZE);
}
#endif
lsn_store(buff, lsn);
if (pagecache_write(share->pagecache,
&info->dfile, page, 0,
buff, PAGECACHE_PLAIN_PAGE,
PAGECACHE_LOCK_WRITE_UNLOCK, PAGECACHE_UNPIN,
PAGECACHE_WRITE_DELAY, 0))
DBUG_RETURN(1);
page_link.unlock= PAGECACHE_LOCK_WRITE_UNLOCK;
page_link.changed= 1;
push_dynamic(&info->pinned_pages, (void*) &page_link);
}
/** @todo leave bitmap lock to the bitmap code... */
pthread_mutex_lock(&share->bitmap.bitmap_lock);
@ -5591,6 +5503,11 @@ uint _ma_apply_redo_insert_row_blobs(MARIA_HA *info,
unlock_method= PAGECACHE_LOCK_WRITE_UNLOCK;
unpin_method= PAGECACHE_UNPIN;
}
/*
Blob pages are never updated twice in same redo-undo chain, so
it's safe to update lsn for them here
*/
lsn_store(buff, lsn);
buff[PAGE_TYPE_OFFSET]= BLOB_PAGE;
@ -5610,7 +5527,7 @@ uint _ma_apply_redo_insert_row_blobs(MARIA_HA *info,
&info->dfile, page, 0,
buff, PAGECACHE_PLAIN_PAGE,
unlock_method, unpin_method,
PAGECACHE_WRITE_DELAY, 0))
PAGECACHE_WRITE_DELAY, 0, LSN_IMPOSSIBLE))
DBUG_RETURN(my_errno);
}
/** @todo leave bitmap lock to the bitmap code... */
@ -5674,7 +5591,7 @@ my_bool _ma_apply_undo_row_insert(MARIA_HA *info, LSN undo_lsn,
if (share->calc_checksum)
checksum= -ha_checksum_korr(header);
if (_ma_write_clr(info, undo_lsn, LOGREC_UNDO_ROW_INSERT,
share->calc_checksum != 0, checksum, &lsn))
share->calc_checksum != 0, checksum, &lsn, (void*) 0))
goto err;
res= 0;

View file

@ -235,9 +235,6 @@ my_bool write_hook_for_undo_row_delete(enum translog_record_type type,
my_bool write_hook_for_undo_row_update(enum translog_record_type type,
TRN *trn, MARIA_HA *tbl_info,
LSN *lsn, void *hook_arg);
my_bool write_hook_for_clr_end(enum translog_record_type type,
TRN *trn, MARIA_HA *tbl_info, LSN *lsn,
void *hook_arg);
my_bool write_hook_for_file_id(enum translog_record_type type,
TRN *trn, MARIA_HA *tbl_info, LSN *lsn,
void *hook_arg);

View file

@ -305,7 +305,7 @@ static int check_k_link(HA_CHECK *param, register MARIA_HA *info,
&info->s->kfile, next_link/block_size,
DFLT_INIT_HITS,
(uchar*) info->buff,
PAGECACHE_PLAIN_PAGE,
PAGECACHE_READ_UNKNOWN_PAGE,
PAGECACHE_LOCK_LEFT_UNLOCKED, 0)))
{
/* purecov: begin tested */
@ -595,7 +595,7 @@ do_stat:
puts("");
}
if (param->key_file_blocks != info->state->key_file_length &&
param->keys_in_use != ~(ulonglong) 0)
share->state.key_map == ~(ulonglong) 0)
_ma_check_print_warning(param, "Some data are unreferenced in keyfile");
if (found_keys != full_text_keys)
param->record_checksum=old_record_checksum-init_checksum; /* Remove delete links */
@ -2123,8 +2123,7 @@ int maria_repair(HA_CHECK *param, register MARIA_HA *info,
info->s->state.dellink= HA_OFFSET_ERROR;
info->rec_cache.file= new_file;
if (share->data_file_type == BLOCK_RECORD ||
((param->testflag & T_UNPACK) &&
share->state.header.org_data_file_type == BLOCK_RECORD))
(param->testflag & T_UNPACK))
{
MARIA_HA *new_info;
/*
@ -2152,6 +2151,10 @@ int maria_repair(HA_CHECK *param, register MARIA_HA *info,
if (_ma_initialize_data_file(sort_info.new_info->s, new_file))
goto err;
block_record= 1;
/* Use new virtual functions for key generation */
info->s->keypos_to_recpos= new_info->s->keypos_to_recpos;
info->s->recpos_to_keypos= new_info->s->recpos_to_keypos;
}
}
@ -2901,8 +2904,8 @@ int maria_repair_by_sort(HA_CHECK *param, register MARIA_HA *info,
if (info->s->options & (HA_OPTION_CHECKSUM | HA_OPTION_COMPRESS_RECORD))
param->testflag|=T_CALC_CHECKSUM;
if (_ma_flush_table_files(info, MARIA_FLUSH_DATA, FLUSH_FORCE_WRITE,
FLUSH_KEEP))
if (_ma_flush_table_files(info, MARIA_FLUSH_DATA | MARIA_FLUSH_INDEX,
FLUSH_FORCE_WRITE, FLUSH_IGNORE_CHANGED))
goto err;
if (!(sort_info.key_block=
@ -3328,8 +3331,8 @@ int maria_repair_parallel(HA_CHECK *param, register MARIA_HA *info,
if (info->s->options & (HA_OPTION_CHECKSUM | HA_OPTION_COMPRESS_RECORD))
param->testflag|=T_CALC_CHECKSUM;
if (_ma_flush_table_files(info, MARIA_FLUSH_DATA, FLUSH_FORCE_WRITE,
FLUSH_KEEP))
if (_ma_flush_table_files(info, MARIA_FLUSH_DATA | MARIA_FLUSH_INDEX,
FLUSH_FORCE_WRITE, FLUSH_IGNORE_CHANGED))
goto err;
/*
@ -5502,12 +5505,19 @@ set_data_file_type(MARIA_SORT_INFO *sort_info, MARIA_SHARE *share)
static void restore_data_file_type(MARIA_SHARE *share)
{
MARIA_SHARE tmp_share;
share->options&= ~HA_OPTION_COMPRESS_RECORD;
mi_int2store(share->state.header.options,share->options);
share->state.header.data_file_type=
share->state.header.org_data_file_type;
share->data_file_type= share->state.header.data_file_type;
share->pack.header_length= 0;
/* Use new virtual functions for key generation */
tmp_share= *share;
_ma_setup_functions(&tmp_share);
share->keypos_to_recpos= tmp_share.keypos_to_recpos;
share->recpos_to_keypos= tmp_share.recpos_to_keypos;
}

View file

@ -122,3 +122,26 @@ int maria_begin(MARIA_HA *info)
}
DBUG_RETURN(0);
}
/*
@brief Disable logging for this table
@note
Mainly used during repair table, where we don't want to log all
changes to index or rows
*/
void maria_disable_logging(MARIA_HA *info)
{
info->s->now_transactional= 0;
info->trn= &dummy_transaction_object;
info->s->page_type= PAGECACHE_PLAIN_PAGE;
}
void maria_enable_logging(MARIA_HA *info)
{
if ((info->s->now_transactional= info->s->base.born_transactional))
info->s->page_type= PAGECACHE_LSN_PAGE;
}

View file

@ -1,4 +1,4 @@
/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
/* Copyright (C) 2007 MySQL AB & Guilhem Bichot & Michael Widenius
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -16,31 +16,79 @@
/*
WL#3234 Maria control file
First version written by Guilhem Bichot on 2006-04-27.
Does not compile yet.
*/
#include "maria_def.h"
/* Here is the implementation of this module */
/*
a control file contains 3 objects: magic string, LSN of last checkpoint,
number of last log.
A control file contains the following objects:
Start of create time variables (at start of file):
- Magic string (including version number of Maria control file)
- Uuid
- Size of create time part
- Size of dynamic part
- Maria block size
..... Here we can add new variables without changing format
- Checksum of create time part (last of block)
Start of changeable part:
- Checksum of changeable part
- LSN of last checkpoint
- Number of last log file
..... Here we can add new variables without changing format
The idea is that one can add new variables to the control file and still
use it with old program versions. If one needs to do an incompatible change
one should increment the control file version number.
*/
/* total size should be < sector size for atomic write operation */
#define CONTROL_FILE_MAGIC_STRING "\xfe\xfe\xc\1MACF"
#define CONTROL_FILE_MAGIC_STRING_OFFSET 0
#define CONTROL_FILE_MAGIC_STRING_SIZE (sizeof(CONTROL_FILE_MAGIC_STRING)-1)
#define CONTROL_FILE_UUID_OFFSET (CONTROL_FILE_MAGIC_STRING_OFFSET + CONTROL_FILE_MAGIC_STRING_SIZE)
#define CONTROL_FILE_UUID_SIZE MY_UUID_SIZE
#define CONTROL_FILE_CHECKSUM_OFFSET (CONTROL_FILE_UUID_OFFSET + CONTROL_FILE_UUID_SIZE)
#define CONTROL_FILE_CHECKSUM_SIZE 4
#define CONTROL_FILE_LSN_OFFSET (CONTROL_FILE_CHECKSUM_OFFSET + CONTROL_FILE_CHECKSUM_SIZE)
#define CONTROL_FILE_LSN_SIZE LSN_STORE_SIZE
#define CONTROL_FILE_FILENO_OFFSET (CONTROL_FILE_LSN_OFFSET + CONTROL_FILE_LSN_SIZE)
#define CONTROL_FILE_FILENO_SIZE 4
#define CONTROL_FILE_SIZE (CONTROL_FILE_FILENO_OFFSET + CONTROL_FILE_FILENO_SIZE)
/* Total size should be < sector size for atomic write operation */
#define CF_MAX_SIZE 512
#define CF_MIN_SIZE (CF_BLOCKSIZE_OFFSET + CF_BLOCKSIZE_SIZE + \
CF_CHECKSUM_SIZE * 2 + CF_LSN_SIZE + CF_FILENO_SIZE)
/* Create time variables */
#define CF_MAGIC_STRING "\xfe\xfe\xc"
#define CF_MAGIC_STRING_OFFSET 0
#define CF_MAGIC_STRING_SIZE (sizeof(CF_MAGIC_STRING)-1)
#define CF_VERSION_OFFSET (CF_MAGIC_STRING_OFFSET + CF_MAGIC_STRING_SIZE)
#define CF_VERSION_SIZE 1
#define CF_UUID_OFFSET (CF_VERSION_OFFSET + CF_VERSION_SIZE)
#define CF_UUID_SIZE MY_UUID_SIZE
#define CF_CREATE_TIME_SIZE_OFFSET (CF_UUID_OFFSET + CF_UUID_SIZE)
#define CF_SIZE_SIZE 2
#define CF_CHANGEABLE_SIZE_OFFSET (CF_CREATE_TIME_SIZE_OFFSET + CF_SIZE_SIZE)
#define CF_BLOCKSIZE_OFFSET (CF_CHANGEABLE_SIZE_OFFSET + CF_SIZE_SIZE)
#define CF_BLOCKSIZE_SIZE 2
#define CF_CREATE_TIME_TOTAL_SIZE (CF_BLOCKSIZE_OFFSET + CF_BLOCKSIZE_SIZE + \
CF_CHECKSUM_SIZE)
/*
Start of the part that changes during execution
This is stored at offset uint2korr(file[CF_CHANGEABLE_SIZE])
*/
#define CF_CHECKSUM_OFFSET 0
#define CF_CHECKSUM_SIZE 4
#define CF_LSN_OFFSET (CF_CHECKSUM_OFFSET + CF_CHECKSUM_SIZE)
#define CF_LSN_SIZE LSN_STORE_SIZE
#define CF_FILENO_OFFSET (CF_LSN_OFFSET + CF_LSN_SIZE)
#define CF_FILENO_SIZE 4
#define CF_CHANGEABLE_TOTAL_SIZE (CF_FILENO_OFFSET + CF_FILENO_SIZE)
/*
The following values should not be changed, except when changing version
number of the maria control file. These are the minimum sizes of the
parts the code can handle.
*/
#define CF_MIN_CREATE_TIME_TOTAL_SIZE \
(CF_BLOCKSIZE_OFFSET + CF_BLOCKSIZE_SIZE + CF_CHECKSUM_SIZE)
#define CF_MIN_CHANGEABLE_TOTAL_SIZE \
(CF_FILENO_OFFSET + CF_FILENO_SIZE)
/* This module owns these two vars. */
/**
@ -66,6 +114,77 @@ my_bool maria_in_recovery= FALSE;
*/
static int control_file_fd= -1;
static uint cf_create_time_size;
static uint cf_changeable_size;
/**
@brief Create Maria control file
*/
static CONTROL_FILE_ERROR create_control_file(const char *name,
int open_flags)
{
uint32 sum;
uchar buffer[CF_CREATE_TIME_TOTAL_SIZE];
DBUG_ENTER("maria_create_control_file");
/* in a recovery, we expect to find a control file */
if (maria_in_recovery)
DBUG_RETURN(CONTROL_FILE_MISSING);
if ((control_file_fd= my_create(name, 0,
open_flags,
MYF(MY_SYNC_DIR | MY_WME))) < 0)
DBUG_RETURN(CONTROL_FILE_UNKNOWN_ERROR);
/* Reset variables, as we are creating the file */
cf_create_time_size= CF_CREATE_TIME_TOTAL_SIZE;
cf_changeable_size= CF_CHANGEABLE_TOTAL_SIZE;
/* Create unique uuid for the control file */
my_uuid_init((ulong) &buffer, (ulong) &maria_uuid);
my_uuid(maria_uuid);
/* Prepare and write the file header */
memcpy(buffer, CF_MAGIC_STRING, CF_MAGIC_STRING_SIZE);
buffer[CF_VERSION_OFFSET]= CONTROL_FILE_VERSION;
memcpy(buffer + CF_UUID_OFFSET, maria_uuid, CF_UUID_SIZE);
int2store(buffer + CF_CREATE_TIME_SIZE_OFFSET, cf_create_time_size);
int2store(buffer + CF_CHANGEABLE_SIZE_OFFSET, cf_changeable_size);
/* Write create time variables */
int2store(buffer + CF_BLOCKSIZE_OFFSET, maria_block_size);
/* Store checksum for create time parts */
sum= (uint32) my_checksum(0, buffer, cf_create_time_size -
CF_CHECKSUM_SIZE);
int4store(buffer + cf_create_time_size - CF_CHECKSUM_SIZE, sum);
if (my_pwrite(control_file_fd, buffer, cf_create_time_size,
0, MYF(MY_FNABP | MY_WME)))
DBUG_RETURN(1);
/*
To be safer we should make sure that there are no logs or data/index
files around (indeed it could be that the control file alone was deleted
or not restored, and we should not go on with life at this point).
TODO: For now we trust (this is alpha version), but for beta if would
be great to verify.
We could have a tool which can rebuild the control file, by reading the
directory of logs, finding the newest log, reading it to find last
checkpoint... Slow but can save your db. For this to be possible, we
must always write to the control file right after writing the checkpoint
log record, and do nothing in between (i.e. the checkpoint must be
usable as soon as it has been written to the log).
*/
/* init the file with these "undefined" values */
DBUG_RETURN(ma_control_file_write_and_force(LSN_IMPOSSIBLE,
FILENO_IMPOSSIBLE,
CONTROL_FILE_UPDATE_ALL));
}
/*
@brief Initialize control file subsystem
@ -75,12 +194,8 @@ static int control_file_fd= -1;
Called at engine's start.
@note
The format of the control file is:
4 bytes: magic string
4 bytes: checksum of the following bytes
4 bytes: number of log where last checkpoint is
4 bytes: offset in log where last checkpoint is
4 bytes: number of last log
The format of the control file is defined in the comments and defines
at the start of this file.
@note If in recovery, file is not created
@ -88,13 +203,14 @@ static int control_file_fd= -1;
@retval 0 OK
@retval 1 Error (in which case the file is left closed)
*/
CONTROL_FILE_ERROR ma_control_file_create_or_open()
{
char buffer[CONTROL_FILE_SIZE];
char name[FN_REFLEN];
uchar buffer[CF_MAX_SIZE];
char name[FN_REFLEN], errmsg_buff[256];
const char *errmsg;
MY_STAT stat_buff;
my_bool create_file;
uint new_cf_create_time_size, new_cf_changeable_size, new_block_size;
int open_flags= O_BINARY | /*O_DIRECT |*/ O_RDWR;
int error= CONTROL_FILE_UNKNOWN_ERROR;
DBUG_ENTER("ma_control_file_create_or_open");
@ -104,8 +220,8 @@ CONTROL_FILE_ERROR ma_control_file_create_or_open()
"*store" and "*korr" calls in this file, and can even create backward
compatibility problems. Beware!
*/
DBUG_ASSERT(CONTROL_FILE_LSN_SIZE == (3+4));
DBUG_ASSERT(CONTROL_FILE_FILENO_SIZE == 4);
DBUG_ASSERT(CF_LSN_SIZE == (3+4));
DBUG_ASSERT(CF_FILENO_SIZE == 4);
if (control_file_fd >= 0) /* already open */
DBUG_RETURN(0);
@ -114,43 +230,8 @@ CONTROL_FILE_ERROR ma_control_file_create_or_open()
maria_data_root, "", MYF(MY_WME)) == NullS)
DBUG_RETURN(CONTROL_FILE_UNKNOWN_ERROR);
create_file= test(my_access(name,F_OK));
if (create_file)
{
/* in a recovery, we expect to find a control file */
if (maria_in_recovery)
DBUG_RETURN(CONTROL_FILE_MISSING);
if ((control_file_fd= my_create(name, 0,
open_flags,
MYF(MY_SYNC_DIR | MY_WME))) < 0)
DBUG_RETURN(CONTROL_FILE_UNKNOWN_ERROR);
/* Create unique uuid for the control file */
my_uuid_init((ulong) &buffer, (ulong) &maria_uuid);
my_uuid(maria_uuid);
/*
To be safer we should make sure that there are no logs or data/index
files around (indeed it could be that the control file alone was deleted
or not restored, and we should not go on with life at this point).
TODO: For now we trust (this is alpha version), but for beta if would
be great to verify.
We could have a tool which can rebuild the control file, by reading the
directory of logs, finding the newest log, reading it to find last
checkpoint... Slow but can save your db. For this to be possible, we
must always write to the control file right after writing the checkpoint
log record, and do nothing in between (i.e. the checkpoint must be
usable as soon as it has been written to the log).
*/
/* init the file with these "undefined" values */
DBUG_RETURN(ma_control_file_write_and_force(LSN_IMPOSSIBLE,
FILENO_IMPOSSIBLE,
CONTROL_FILE_UPDATE_ALL));
}
if (my_access(name,F_OK))
DBUG_RETURN(create_control_file(name, open_flags));
/* Otherwise, file exists */
@ -166,7 +247,7 @@ CONTROL_FILE_ERROR ma_control_file_create_or_open()
goto err;
}
if ((uint)stat_buff.st_size < CONTROL_FILE_SIZE)
if ((uint) stat_buff.st_size < CF_MIN_SIZE)
{
/*
Given that normally we write only a sector and it's atomic, the only
@ -179,49 +260,89 @@ CONTROL_FILE_ERROR ma_control_file_create_or_open()
disk/filesystem has a problem.
So let's be rigid.
*/
/*
TODO: store a message "too small file" somewhere, so that it goes to
MySQL's error log at startup.
*/
error= CONTROL_FILE_TOO_SMALL;
errmsg= "File size to small";
errmsg= "Size of control file is smaller than expected";
goto err;
}
if ((uint)stat_buff.st_size > CONTROL_FILE_SIZE)
/* Check if control file is unexpectedly big */
if ((uint)stat_buff.st_size > CF_MAX_SIZE)
{
/* TODO: store "too big file" message */
error= CONTROL_FILE_TOO_BIG;
errmsg= "File size bigger than expected";
goto err;
}
if (my_read(control_file_fd, buffer, CONTROL_FILE_SIZE, MYF(MY_FNABP)))
if (my_read(control_file_fd, buffer, stat_buff.st_size, MYF(MY_FNABP)))
{
errmsg= "Can't read file";
goto err;
}
if (memcmp(buffer + CONTROL_FILE_MAGIC_STRING_OFFSET,
CONTROL_FILE_MAGIC_STRING, CONTROL_FILE_MAGIC_STRING_SIZE))
if (memcmp(buffer + CF_MAGIC_STRING_OFFSET,
CF_MAGIC_STRING, CF_MAGIC_STRING_SIZE))
{
/* TODO: store message "bad magic string" somewhere */
error= CONTROL_FILE_BAD_MAGIC_STRING;
errmsg= "Missing valid id at start of file";
errmsg= "Missing valid id at start of file. File is not a valid maria control file";
goto err;
}
memcpy(maria_uuid, buffer + CONTROL_FILE_UUID_OFFSET,
CONTROL_FILE_UUID_SIZE);
if (my_checksum(0, buffer + CONTROL_FILE_LSN_OFFSET,
CONTROL_FILE_SIZE - CONTROL_FILE_LSN_OFFSET) !=
uint4korr(buffer + CONTROL_FILE_CHECKSUM_OFFSET))
if (buffer[CF_VERSION_OFFSET] > CONTROL_FILE_VERSION)
{
error= CONTROL_FILE_BAD_VERSION;
sprintf(errmsg_buff, "File is from a future maria system: %d. Current version is: %d",
(int) buffer[CF_VERSION_OFFSET], CONTROL_FILE_VERSION);
errmsg= errmsg_buff;
goto err;
}
new_cf_create_time_size= uint2korr(buffer + CF_CREATE_TIME_SIZE_OFFSET);
new_cf_changeable_size= uint2korr(buffer + CF_CHANGEABLE_SIZE_OFFSET);
if (new_cf_create_time_size < CF_MIN_CREATE_TIME_TOTAL_SIZE ||
new_cf_changeable_size < CF_MIN_CHANGEABLE_TOTAL_SIZE ||
new_cf_create_time_size + new_cf_changeable_size !=
stat_buff.st_size)
{
error= CONTROL_FILE_INCONSISTENT_INFORMATION;
errmsg= "Sizes stored in control file are inconsistent";
goto err;
}
new_block_size= uint2korr(buffer + CF_BLOCKSIZE_OFFSET);
if (new_block_size != maria_block_size)
{
error= CONTROL_FILE_WRONG_BLOCKSIZE;
sprintf(errmsg_buff,
"Block size in control file (%u) is different than given maria_block_size: %u",
new_block_size, (uint) maria_block_size);
errmsg= errmsg_buff;
goto err;
}
if (my_checksum(0, buffer, new_cf_create_time_size - CF_CHECKSUM_SIZE) !=
uint4korr(buffer + new_cf_create_time_size - CF_CHECKSUM_SIZE))
{
error= CONTROL_FILE_BAD_HEAD_CHECKSUM;
errmsg= "Fixed part checksum mismatch";
goto err;
}
if (my_checksum(0, buffer + new_cf_create_time_size + CF_CHECKSUM_SIZE,
new_cf_changeable_size - CF_CHECKSUM_SIZE) !=
uint4korr(buffer + new_cf_create_time_size))
{
error= CONTROL_FILE_BAD_CHECKSUM;
errmsg= "Checksum missmatch";
errmsg= "Changeable part (end of control file) checksum missmatch";
goto err;
}
last_checkpoint_lsn= lsn_korr(buffer + CONTROL_FILE_LSN_OFFSET);
last_logno= uint4korr(buffer + CONTROL_FILE_FILENO_OFFSET);
memcpy(maria_uuid, buffer + CF_UUID_OFFSET, CF_UUID_SIZE);
cf_create_time_size= new_cf_create_time_size;
cf_changeable_size= new_cf_changeable_size;
last_checkpoint_lsn= lsn_korr(buffer + new_cf_create_time_size +
CF_LSN_OFFSET);
last_logno= uint4korr(buffer + new_cf_create_time_size + CF_FILENO_OFFSET);
DBUG_RETURN(0);
@ -247,12 +368,12 @@ err:
checkpoint_lsn LSN of last checkpoint
logno last log file number
objs_to_write which of the arguments should be used as new values
(for example, CONTROL_FILE_UPDATE_ONLY_LSN will not
(for example, CF_UPDATE_ONLY_LSN will not
write the logno argument to the control file and will
not update the last_logno global variable); can be:
CONTROL_FILE_UPDATE_ALL
CONTROL_FILE_UPDATE_ONLY_LSN
CONTROL_FILE_UPDATE_ONLY_LOGNO.
CF_UPDATE_ALL
CF_UPDATE_ONLY_LSN
CF_UPDATE_ONLY_LOGNO.
NOTE
We always want to do one single my_pwrite() here to be as atomic as
@ -266,8 +387,9 @@ err:
int ma_control_file_write_and_force(const LSN checkpoint_lsn, uint32 logno,
uint objs_to_write)
{
char buffer[CONTROL_FILE_SIZE];
char buffer[CF_MAX_SIZE];
my_bool update_checkpoint_lsn= FALSE, update_logno= FALSE;
uint32 sum;
DBUG_ENTER("ma_control_file_write_and_force");
DBUG_ASSERT(control_file_fd >= 0); /* must be open */
@ -276,11 +398,6 @@ int ma_control_file_write_and_force(const LSN checkpoint_lsn, uint32 logno,
translog_lock_assert_owner();
#endif
memcpy(buffer + CONTROL_FILE_MAGIC_STRING_OFFSET,
CONTROL_FILE_MAGIC_STRING, CONTROL_FILE_MAGIC_STRING_SIZE);
memcpy(buffer + CONTROL_FILE_UUID_OFFSET, maria_uuid,
CONTROL_FILE_UUID_SIZE);
if (objs_to_write == CONTROL_FILE_UPDATE_ONLY_LSN)
update_checkpoint_lsn= TRUE;
else if (objs_to_write == CONTROL_FILE_UPDATE_ONLY_LOGNO)
@ -291,24 +408,31 @@ int ma_control_file_write_and_force(const LSN checkpoint_lsn, uint32 logno,
DBUG_ASSERT(0);
if (update_checkpoint_lsn)
lsn_store(buffer + CONTROL_FILE_LSN_OFFSET, checkpoint_lsn);
lsn_store(buffer + CF_LSN_OFFSET, checkpoint_lsn);
else /* store old value == change nothing */
lsn_store(buffer + CONTROL_FILE_LSN_OFFSET, last_checkpoint_lsn);
lsn_store(buffer + CF_LSN_OFFSET, last_checkpoint_lsn);
if (update_logno)
int4store(buffer + CONTROL_FILE_FILENO_OFFSET, logno);
int4store(buffer + CF_FILENO_OFFSET, logno);
else
int4store(buffer + CONTROL_FILE_FILENO_OFFSET, last_logno);
int4store(buffer + CF_FILENO_OFFSET, last_logno);
{
uint32 sum= (uint32)
my_checksum(0, buffer + CONTROL_FILE_LSN_OFFSET,
CONTROL_FILE_SIZE - CONTROL_FILE_LSN_OFFSET);
int4store(buffer + CONTROL_FILE_CHECKSUM_OFFSET, sum);
}
/*
Clear unknown part of changeable part.
Other option would be to remember the original values in the file
and copy them here, but this should be safer.
*/
bzero(buffer + CF_CHANGEABLE_TOTAL_SIZE,
cf_changeable_size - CF_CHANGEABLE_TOTAL_SIZE);
if (my_pwrite(control_file_fd, buffer, sizeof(buffer),
0, MYF(MY_FNABP | MY_WME)) ||
/* Checksum is stored first */
compile_time_assert(CF_CHECKSUM_OFFSET == 0);
sum= my_checksum(0, buffer + CF_CHECKSUM_SIZE,
cf_changeable_size - CF_CHECKSUM_SIZE);
int4store(buffer, sum);
if (my_pwrite(control_file_fd, buffer, cf_changeable_size,
cf_create_time_size, MYF(MY_FNABP | MY_WME)) ||
my_sync(control_file_fd, MYF(MY_WME)))
DBUG_RETURN(1);

View file

@ -22,6 +22,12 @@
#define _ma_control_file_h
#define CONTROL_FILE_BASE_NAME "maria_log_control"
/*
Major version for control file. Should only be changed when doing
big changes that made the new control file incompatible with all
older versions of Maria.
*/
#define CONTROL_FILE_VERSION 1
/* Here is the interface of this module */
@ -43,8 +49,12 @@ typedef enum enum_control_file_error {
CONTROL_FILE_TOO_SMALL,
CONTROL_FILE_TOO_BIG,
CONTROL_FILE_BAD_MAGIC_STRING,
CONTROL_FILE_BAD_VERSION,
CONTROL_FILE_BAD_CHECKSUM,
CONTROL_FILE_BAD_HEAD_CHECKSUM,
CONTROL_FILE_MISSING,
CONTROL_FILE_INCONSISTENT_INFORMATION,
CONTROL_FILE_WRONG_BLOCKSIZE,
CONTROL_FILE_UNKNOWN_ERROR /* any other error */
} CONTROL_FILE_ERROR;

View file

@ -13,8 +13,6 @@
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
/* Remove a row from a MARIA table */
#include "ma_fulltext.h"
#include "ma_rt_index.h"
#include "trnman.h"
@ -25,19 +23,21 @@ static int d_search(MARIA_HA *info,MARIA_KEYDEF *keyinfo,uint comp_flag,
my_off_t page, uchar *anc_buff,
MARIA_PINNED_PAGE *anc_page_link);
static int del(MARIA_HA *info,MARIA_KEYDEF *keyinfo, uchar *key,
uchar *anc_buff, my_off_t leaf_page, uchar *leaf_buff,
MARIA_PINNED_PAGE *leaf_page_link, uchar *keypos,
my_off_t next_block, uchar *ret_key);
static int underflow(MARIA_HA *info,MARIA_KEYDEF *keyinfo,uchar *anc_buff,
my_off_t leaf_page,uchar *leaf_buff,
my_off_t anc_page, uchar *anc_buff, my_off_t leaf_page,
uchar *leaf_buff, MARIA_PINNED_PAGE *leaf_page_link,
uchar *keypos, my_off_t next_block, uchar *ret_key);
static int underflow(MARIA_HA *info,MARIA_KEYDEF *keyinfo,
my_off_t anc_page, uchar *anc_buff,
my_off_t leaf_page, uchar *leaf_buff,
MARIA_PINNED_PAGE *leaf_page_link, uchar *keypos);
static uint remove_key(MARIA_KEYDEF *keyinfo,uint nod_flag,uchar *keypos,
uchar *lastkey,uchar *page_end,
my_off_t *next_block, MARIA_KEY_PARAM *s_temp);
static my_bool _ma_log_delete(MARIA_HA *info, my_off_t page, uchar *buff,
uchar *key_pos, uint move_length,
uint change_length);
uchar *key_pos, uint changed_length,
uint move_length);
/* @breif Remove a row from a MARIA table */
int maria_delete(MARIA_HA *info,const uchar *record)
{
@ -48,8 +48,7 @@ int maria_delete(MARIA_HA *info,const uchar *record)
MARIA_SHARE *share=info->s;
DBUG_ENTER("maria_delete");
/* Test if record is in datafile */
/* Test if record is in datafile */
DBUG_EXECUTE_IF("maria_pretend_crashed_table_on_usage",
maria_print_error(share, HA_ERR_CRASHED);
DBUG_RETURN(my_errno= HA_ERR_CRASHED););
@ -157,8 +156,15 @@ int _ma_ck_delete(register MARIA_HA *info, uint keynr, uchar *key,
int res;
LSN lsn= LSN_IMPOSSIBLE;
my_off_t new_root= info->s->state.key_root[keynr];
uchar key_buff[HA_MAX_KEY_BUFF];
DBUG_ENTER("_ma_ck_delete");
if (info->s->now_transactional)
{
/* Save original value as the key may change */
memcpy(key_buff, key, key_length + info->s->rec_reflength);
}
res= _ma_ck_real_delete(info, info->s->keyinfo+keynr, key, key_length,
&new_root);
@ -184,9 +190,10 @@ int _ma_ck_delete(register MARIA_HA *info, uint keynr, uchar *key,
log_type= LOGREC_UNDO_KEY_DELETE_WITH_ROOT;
}
key_length+= info->s->rec_reflength;
log_array[TRANSLOG_INTERNAL_PARTS + 0].str= (char*) log_data;
log_array[TRANSLOG_INTERNAL_PARTS + 0].length= (uint) (log_pos - log_data);
log_array[TRANSLOG_INTERNAL_PARTS + 1].str= (char*) key;
log_array[TRANSLOG_INTERNAL_PARTS + 1].str= (char*) key_buff;
log_array[TRANSLOG_INTERNAL_PARTS + 1].length= key_length;
msg.root= &info->s->state.key_root[keynr];
@ -278,16 +285,16 @@ err:
} /* _ma_ck_real_delete */
/*
@brief Remove key below key root
/**
@brief Remove key below key root
@param key Key to delete. Will contain new key if block was enlarged
@param key Key to delete. Will contain new key if block was enlarged
@return
@retval 0 ok (anc_page is not changed)
@retval 1 If data on page is too small; In this case anc_buff is not saved
@retval 2 If data on page is too big
@retval -1 On errors
@return
@retval 0 ok (anc_page is not changed)
@retval 1 If data on page is too small; In this case anc_buff is not saved
@retval 2 If data on page is too big
@retval -1 On errors
*/
static int d_search(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo,
@ -421,14 +428,15 @@ static int d_search(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo,
else
{ /* Found key */
uint tmp;
length= _ma_get_page_used(info, anc_buff);
if (!(tmp= remove_key(keyinfo,nod_flag,keypos,lastkey,anc_buff+length,
uint anc_buff_length= _ma_get_page_used(info, anc_buff);
if (!(tmp= remove_key(keyinfo,nod_flag,keypos,lastkey,
anc_buff + anc_buff_length,
&next_block, &s_temp)))
goto err;
anc_page_link->changed= 1;
length-= tmp;
_ma_store_page_used(info, anc_buff, length, nod_flag);
anc_buff_length-= tmp;
_ma_store_page_used(info, anc_buff, anc_buff_length, nod_flag);
/*
Log initial changes on pages
@ -437,14 +445,14 @@ static int d_search(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo,
*/
if (info->s->now_transactional &&
_ma_log_delete(info, anc_page, anc_buff, s_temp.key_pos,
s_temp.move_length, s_temp.changed_length))
s_temp.changed_length, s_temp.move_length))
DBUG_RETURN(-1);
if (!nod_flag)
{ /* On leaf page */
if (test(length <= (info->quick_mode ?
MARIA_MIN_KEYBLOCK_LENGTH :
(uint) keyinfo->underflow_block_length)))
if (test(anc_buff_length <= (info->quick_mode ?
MARIA_MIN_KEYBLOCK_LENGTH :
(uint) keyinfo->underflow_block_length)))
{
/* Page will be written by caller if we return 1 */
DBUG_RETURN(1);
@ -456,15 +464,16 @@ static int d_search(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo,
DBUG_RETURN(0);
}
save_flag=1; /* Mark that anc_buff is changed */
ret_value= del(info, keyinfo, key, anc_buff, leaf_page, leaf_buff,
leaf_page_link, keypos, next_block, lastkey);
ret_value= del(info, keyinfo, key, anc_page, anc_buff,
leaf_page, leaf_buff, leaf_page_link,
keypos, next_block, lastkey);
}
if (ret_value >0)
{
save_flag=1;
if (ret_value == 1)
ret_value= underflow(info, keyinfo, anc_buff, leaf_page, leaf_buff,
leaf_page_link, keypos);
ret_value= underflow(info, keyinfo, anc_page, anc_buff,
leaf_page, leaf_buff, leaf_page_link, keypos);
else
{ /* This happens only with packed keys */
DBUG_PRINT("test",("Enlarging of key when deleting"));
@ -478,8 +487,12 @@ static int d_search(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo,
if (ret_value == 0 && _ma_get_page_used(info, anc_buff) >
(uint) (keyinfo->block_length - KEYPAGE_CHECKSUM_SIZE))
{
/* parent buffer got too big ; We have to split the page */
save_flag=1;
ret_value= _ma_split_page(info,keyinfo,key,anc_buff,lastkey,0) | 2;
ret_value= _ma_split_page(info, keyinfo, key, anc_page, anc_buff,
(uint) (keyinfo->block_length -
KEYPAGE_CHECKSUM_SIZE),
(uchar*) 0, 0, 0, lastkey, 0) | 2;
}
if (save_flag && ret_value != 1)
{
@ -503,17 +516,39 @@ err:
} /* d_search */
/* Remove a key that has a page-reference */
/**
@brief Remove a key that has a page-reference
static int del(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo,
uchar *key, uchar *anc_buff, my_off_t leaf_page,
uchar *leaf_buff, MARIA_PINNED_PAGE *leaf_page_link,
uchar *keypos, /* Pos to where deleted key was */
my_off_t next_block,
uchar *ret_key) /* key before keypos in anc_buff */
@param info Maria handler
@param key Buffer for key to be inserted at upper level
@param anc_page Page address for page where deleted key was
@param anc_buff Page buffer (nod) where deleted key was
@param leaf_page Page address for nod before the deleted key
@param leaf_buff Buffer for leaf_page
@param leaf_buff_link Pinned page link for leaf_buff
@param keypos Pos to where deleted key was on anc_buff
@param next_block Page adress for nod after deleted key
@param ret_key Key before keypos in anc_buff
@notes
leaf_buff is written to disk
anc_buff is not updated on disk. Caller should do this
@return
@retval < 0 Error
@retval 0 OK
@retval 1 key contains key to upper level (from balance page)
@retval 2 key contains key to upper level (from split space)
*/
static int del(register MARIA_HA *info, MARIA_KEYDEF *keyinfo,
uchar *key, my_off_t anc_page, uchar *anc_buff,
my_off_t leaf_page, uchar *leaf_buff,
MARIA_PINNED_PAGE *leaf_page_link,
uchar *keypos, my_off_t next_block, uchar *ret_key)
{
int ret_value,length;
uint a_length, nod_flag, used_length, tmp;
uint a_length, nod_flag, leaf_length, tmp;
my_off_t next_page;
uchar keybuff[HA_MAX_KEY_BUFF],*endpos,*next_buff,*key_start, *prev_key;
MARIA_SHARE *share=info->s;
@ -523,10 +558,10 @@ static int del(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo,
DBUG_PRINT("enter",("leaf_page: %ld keypos: 0x%lx", (long) leaf_page,
(ulong) keypos));
_ma_get_used_and_nod(info, leaf_buff, used_length, nod_flag);
DBUG_DUMP("leaf_buff", leaf_buff, used_length);
_ma_get_used_and_nod(info, leaf_buff, leaf_length, nod_flag);
DBUG_DUMP("leaf_buff", leaf_buff, leaf_length);
endpos= leaf_buff + used_length;
endpos= leaf_buff + leaf_length;
if (!(key_start= _ma_get_last_key(info,keyinfo,leaf_buff,keybuff,endpos,
&tmp)))
DBUG_RETURN(-1);
@ -543,21 +578,26 @@ static int del(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo,
else
{
DBUG_DUMP("next_page", next_buff, _ma_get_page_used(info, next_buff));
if ((ret_value= del(info,keyinfo,key,anc_buff, next_page, next_buff,
next_page_link, keypos, next_block, ret_key)) >0)
if ((ret_value= del(info, keyinfo, key, anc_page, anc_buff, next_page,
next_buff, next_page_link, keypos, next_block,
ret_key)) >0)
{
/* Get new length after key was deleted */
endpos=leaf_buff+_ma_get_page_used(info, leaf_buff);
if (ret_value == 1)
{
ret_value= underflow(info, keyinfo, leaf_buff, next_page,
ret_value= underflow(info, keyinfo, leaf_page, leaf_buff, next_page,
next_buff, next_page_link, endpos);
if (ret_value == 0 &&
_ma_get_page_used(info, leaf_buff) >
(uint) (keyinfo->block_length - KEYPAGE_CHECKSUM_SIZE))
{
ret_value= (_ma_split_page(info,keyinfo,key,leaf_buff,ret_key,0) |
2);
ret_value= (_ma_split_page(info, keyinfo, key,
leaf_page, leaf_buff,
(uint) (keyinfo->block_length -
KEYPAGE_CHECKSUM_SIZE),
(uchar*) 0, 0, 0,
ret_key, 0) | 2);
}
}
else
@ -582,8 +622,14 @@ static int del(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo,
}
/* Remove last key from leaf page */
leaf_page_link->changed= 1;
_ma_store_page_used(info, leaf_buff, key_start-leaf_buff, nod_flag);
if (info->s->now_transactional &&
_ma_log_suffix(info, leaf_page, leaf_buff, leaf_length,
(uint) (key_start - leaf_buff)))
goto err;
leaf_page_link->changed= 1;
if (_ma_write_keypage(info, keyinfo, leaf_page,
PAGECACHE_LOCK_LEFT_WRITELOCKED, DFLT_INIT_HITS,
leaf_buff))
@ -607,6 +653,8 @@ static int del(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo,
else
bmove(keypos,keypos-length, (int) (endpos-keypos)+length);
(*keyinfo->store_key)(keyinfo,keypos,&s_temp);
key_start= keypos;
/* Save pointer to next leaf */
if (!(*keyinfo->get_key)(keyinfo,share->base.key_reflength,&keypos,ret_key))
goto err;
@ -614,6 +662,11 @@ static int del(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo,
_ma_store_page_used(info, anc_buff, a_length + length,
share->base.key_reflength);
if (info->s->now_transactional &&
_ma_log_add(info, anc_page, anc_buff, a_length + length,
key_start, s_temp.changed_length, s_temp.move_length, 1))
goto err;
DBUG_RETURN(_ma_get_page_used(info, leaf_buff) <=
(info->quick_mode ? MARIA_MIN_KEYBLOCK_LENGTH :
(uint) keyinfo->underflow_block_length));
@ -622,25 +675,44 @@ err:
} /* del */
/* Balances adjacent pages if underflow occours */
/**
@brief Balances adjacent pages if underflow occours
@fn underflow()
@param anc_buff Anchestor page data
@param leaf_page Page number of leaf page
@param leaf_buff Leaf page (page that underflowed)
@param leaf_page_link Pointer to pin information about leaf page
@param keypos Position after current key in anc_buff
@note
This function writes redo entries for all changes
Caller must save anc_buff
@return
@retval 0 ok
@retval 1 ok, but anc_buff did underflow
@retval -1 error
*/
static int underflow(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo,
uchar *anc_buff,
my_off_t leaf_page,/* Ancestor page and underflow page */
uchar *leaf_buff,
my_off_t anc_page, uchar *anc_buff,
my_off_t leaf_page, uchar *leaf_buff,
MARIA_PINNED_PAGE *leaf_page_link,
uchar *keypos) /* Position to pos after key */
uchar *keypos)
{
int t_length;
uint length,anc_length,buff_length,leaf_length,p_length,s_length,nod_flag,
key_reflength,key_length;
uint length,anc_length,buff_length,leaf_length,p_length,s_length,nod_flag;
uint next_buff_length, new_buff_length, key_reflength, key_length;
uint unchanged_leaf_length, new_leaf_length;
my_off_t next_page;
uchar anc_key[HA_MAX_KEY_BUFF],leaf_key[HA_MAX_KEY_BUFF];
uchar *buff,*endpos,*next_keypos,*anc_pos,*half_pos,*temp_pos,*prev_key;
uchar *after_key;
MARIA_KEY_PARAM s_temp;
uchar *buff,*endpos,*next_keypos,*anc_pos,*half_pos,*prev_key;
uchar *after_key, *anc_end_pos;
MARIA_KEY_PARAM key_deleted, key_inserted;
MARIA_SHARE *share=info->s;
MARIA_PINNED_PAGE *next_page_link;
my_bool first_key;
DBUG_ENTER("underflow");
DBUG_PRINT("enter",("leaf_page: %ld keypos: 0x%lx",(long) leaf_page,
(ulong) keypos));
@ -657,10 +729,12 @@ static int underflow(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo,
key_reflength=share->base.key_reflength;
if (info->s->keyinfo+info->lastinx == keyinfo)
info->page_changed=1;
first_key= keypos == anc_buff + info->s->keypage_header + key_reflength;
if ((keypos < anc_buff + anc_length && (info->state->records & 1)) ||
keypos == anc_buff + info->s->keypage_header + key_reflength)
{ /* Use page right of anc-page */
first_key)
{
/* Use page right of anc-page */
DBUG_PRINT("test",("use right page"));
if (keyinfo->flag & HA_BINARY_PACK_KEY)
@ -681,8 +755,8 @@ static int underflow(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo,
if (!_ma_fetch_keypage(info,keyinfo, next_page, PAGECACHE_LOCK_WRITE,
DFLT_INIT_HITS, buff, 0, &next_page_link))
goto err;
buff_length= _ma_get_page_used(info, buff);
DBUG_DUMP("next",buff,buff_length);
next_buff_length= _ma_get_page_used(info, buff);
DBUG_DUMP("next", buff, next_buff_length);
/* find keys to make a big key-page */
bmove(next_keypos-key_reflength, buff + info->s->keypage_header,
@ -696,67 +770,97 @@ static int underflow(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo,
prev_key=(leaf_length == p_length ? (uchar*) 0 : leaf_key);
t_length=(*keyinfo->pack_key)(keyinfo,nod_flag,buff+p_length,
prev_key, prev_key,
anc_key, &s_temp);
length=buff_length-p_length;
anc_key, &key_inserted);
length= next_buff_length - p_length;
endpos=buff+length+leaf_length+t_length;
/* buff will always be larger than before !*/
bmove_upp(endpos, buff+buff_length,length);
bmove_upp(endpos, buff + next_buff_length, length);
memcpy(buff, leaf_buff,(size_t) leaf_length);
(*keyinfo->store_key)(keyinfo,buff+leaf_length,&s_temp);
buff_length=(uint) (endpos-buff);
(*keyinfo->store_key)(keyinfo, buff+leaf_length, &key_inserted);
buff_length= (uint) (endpos-buff);
_ma_store_page_used(info, buff, buff_length, nod_flag);
/* remove key from anc_buff */
if (!(s_length=remove_key(keyinfo,key_reflength,keypos,anc_key,
anc_buff+anc_length,(my_off_t *) 0, &s_temp)))
anc_buff+anc_length,(my_off_t *) 0,
&key_deleted)))
goto err;
anc_length-=s_length;
_ma_store_page_used(info, anc_buff, anc_length, key_reflength);
if (buff_length <= (uint) (keyinfo->block_length - KEYPAGE_CHECKSUM_SIZE))
{ /* Keys in one page */
memcpy(leaf_buff,buff,(size_t) buff_length);
{
/* All keys fitted into one page */
next_page_link->changed= 1;
if (_ma_dispose(info, next_page, 0))
goto err;
memcpy(leaf_buff, buff, (size_t) buff_length);
if (info->s->now_transactional)
{
/* Log changes to parent page */
if (_ma_log_delete(info, anc_page, anc_buff, key_deleted.key_pos,
key_deleted.changed_length,
key_deleted.move_length))
goto err;
/*
Log changes to leaf page. Data for leaf page is in buff
which contains original leaf_buff, parting key and next_buff
*/
if (_ma_log_suffix(info, leaf_page, leaf_buff,
leaf_length, buff_length))
goto err;
}
}
else
{ /* Page is full */
endpos=anc_buff+anc_length;
DBUG_PRINT("test",("anc_buff: 0x%lx endpos: 0x%lx",
(long) anc_buff, (long) endpos));
if (keypos != anc_buff + info->s->keypage_header + key_reflength &&
{
/*
Balancing didn't free a page, so we have to split 'buff' into two
pages:
- Find key in middle of buffer
- Store everything before key in 'leaf_buff'
- Pack key into anc_buff at position of deleted key
Note that anc_buff may overflow! (is handled by caller)
- Store remaining keys in next_page (buff)
*/
MARIA_KEY_PARAM anc_key_inserted;
anc_end_pos= anc_buff + anc_length;
DBUG_PRINT("test",("anc_buff: 0x%lx anc_end_pos: 0x%lx",
(long) anc_buff, (long) anc_end_pos));
if (!first_key &&
!_ma_get_last_key(info,keyinfo,anc_buff,anc_key,keypos,&length))
goto err;
if (!(half_pos= _ma_find_half_pos(info, nod_flag, keyinfo, buff,
leaf_key, &key_length, &after_key)))
goto err;
length=(uint) (half_pos-buff);
memcpy(leaf_buff,buff,(size_t) length);
_ma_store_page_used(info, leaf_buff, length, nod_flag);
new_leaf_length= (uint) (half_pos-buff);
memcpy(leaf_buff, buff, (size_t) new_leaf_length);
_ma_store_page_used(info, leaf_buff, new_leaf_length, nod_flag);
/* Correct new keypointer to leaf_page */
half_pos=after_key;
_ma_kpointer(info,leaf_key+key_length,next_page);
/* Save key in anc_buff */
prev_key=(keypos == anc_buff + info->s->keypage_header + key_reflength ?
(uchar*) 0 : anc_key),
prev_key= (first_key ? (uchar*) 0 : anc_key);
t_length=(*keyinfo->pack_key)(keyinfo,key_reflength,
(keypos == endpos ? (uchar*) 0 :
(keypos == anc_end_pos ? (uchar*) 0 :
keypos),
prev_key, prev_key,
leaf_key, &s_temp);
leaf_key, &anc_key_inserted);
if (t_length >= 0)
bmove_upp(endpos+t_length, endpos, (uint) (endpos-keypos));
bmove_upp(anc_end_pos+t_length, anc_end_pos,
(uint) (anc_end_pos - keypos));
else
bmove(keypos,keypos-t_length,(uint) (endpos-keypos)+t_length);
(*keyinfo->store_key)(keyinfo,keypos,&s_temp);
bmove(keypos,keypos-t_length,(uint) (anc_end_pos-keypos)+t_length);
(*keyinfo->store_key)(keyinfo,keypos, &anc_key_inserted);
anc_length+= t_length;
_ma_store_page_used(info, anc_buff, anc_length, key_reflength);
/* Store key first in new page */
/* Store key first in new page */
if (nod_flag)
bmove(buff+info->s->keypage_header, half_pos-nod_flag,
(size_t) nod_flag);
@ -764,19 +868,73 @@ static int underflow(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo,
goto err;
t_length=(int) (*keyinfo->pack_key)(keyinfo, nod_flag, (uchar*) 0,
(uchar*) 0, (uchar*) 0,
leaf_key, &s_temp);
leaf_key, &key_inserted);
/* t_length will always be > 0 for a new page !*/
length=(uint) ((buff+_ma_get_page_used(info, buff))-half_pos);
length= (uint) ((buff + buff_length) - half_pos);
bmove(buff+p_length+t_length, half_pos, (size_t) length);
(*keyinfo->store_key)(keyinfo,buff+p_length,&s_temp);
_ma_store_page_used(info, buff, length + t_length + p_length, nod_flag);
(*keyinfo->store_key)(keyinfo,buff+p_length, &key_inserted);
new_buff_length= length + t_length + p_length;
_ma_store_page_used(info, buff, new_buff_length, nod_flag);
if (info->s->now_transactional)
{
/*
Log changes to parent page
This has one key deleted from it and one key inserted to it at
keypos
ma_log_add ensures that we don't log changes that is outside of
key block size, as the REDO code can't handle that
*/
if (_ma_log_add(info, anc_page, anc_buff, anc_length,
keypos,
anc_key_inserted.move_length +
min(anc_key_inserted.changed_length -
anc_key_inserted.move_length,
key_deleted.changed_length),
anc_key_inserted.move_length -
key_deleted.move_length, 1))
goto err;
/*
Log changes to leaf page.
This contains original data with new data added at end
*/
DBUG_ASSERT(leaf_length <= new_leaf_length);
if (_ma_log_suffix(info, leaf_page, leaf_buff, leaf_length,
new_leaf_length))
goto err;
/*
Log changes to next page
This contains original data with some prefix data deleted and
some compressed data at start possible extended
Data in buff was originally:
org_leaf_buff [leaf_length]
separator_key [buff_key_inserted.move_length]
next_key_changes [buff_key_inserted.changed_length -move_length]
next_page_data [next_buff_length - p_length -
(buff_key_inserted.changed_length -move_length)]
After changes it's now:
unpacked_key [key_inserted.changed_length]
next_suffix [next_buff_length - key_inserted.changed_length]
*/
DBUG_ASSERT(new_buff_length <= next_buff_length);
if (_ma_log_prefix(info, next_page, buff,
key_inserted.changed_length,
(int) (new_buff_length - next_buff_length)))
goto err;
}
next_page_link->changed= 1;
if (_ma_write_keypage(info, keyinfo, next_page,
PAGECACHE_LOCK_LEFT_WRITELOCKED, DFLT_INIT_HITS,
buff))
goto err;
}
leaf_page_link->changed= 1;
if (_ma_write_keypage(info, keyinfo, leaf_page,
PAGECACHE_LOCK_LEFT_WRITELOCKED, DFLT_INIT_HITS,
@ -796,7 +954,7 @@ static int underflow(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo,
DFLT_INIT_HITS, buff, 0, &next_page_link))
goto err;
buff_length= _ma_get_page_used(info, buff);
endpos=buff+buff_length;
endpos= buff + buff_length;
DBUG_DUMP("prev",buff,buff_length);
/* find keys to make a big key-page */
@ -815,59 +973,96 @@ static int underflow(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo,
(leaf_length == p_length ?
(uchar*) 0 : leaf_buff+p_length),
prev_key, prev_key,
anc_key, &s_temp);
anc_key, &key_inserted);
if (t_length >= 0)
bmove(endpos+t_length, leaf_buff+p_length,
(size_t) (leaf_length-p_length));
else /* We gained space */
bmove(endpos,leaf_buff+((int) p_length-t_length),
(size_t) (leaf_length-p_length+t_length));
(*keyinfo->store_key)(keyinfo,endpos, &key_inserted);
(*keyinfo->store_key)(keyinfo,endpos,&s_temp);
buff_length=buff_length+leaf_length-p_length+t_length;
_ma_store_page_used(info, buff, buff_length, nod_flag);
/* Remember for logging how many bytes of leaf_buff that are not changed */
DBUG_ASSERT((int) key_inserted.changed_length >= key_inserted.move_length);
unchanged_leaf_length= leaf_length - (key_inserted.changed_length -
key_inserted.move_length);
new_buff_length= buff_length + leaf_length - p_length + t_length;
_ma_store_page_used(info, buff, new_buff_length, nod_flag);
/* remove key from anc_buff */
if (!(s_length= remove_key(keyinfo,key_reflength,keypos,anc_key,
anc_buff+anc_length,(my_off_t *) 0, &s_temp)))
anc_buff+anc_length,(my_off_t *) 0,
&key_deleted)))
goto err;
anc_length-=s_length;
_ma_store_page_used(info, anc_buff, anc_length, key_reflength);
if (buff_length <= (uint) (keyinfo->block_length - KEYPAGE_CHECKSUM_SIZE))
{ /* Keys in one page */
if (new_buff_length <= (uint) (keyinfo->block_length -
KEYPAGE_CHECKSUM_SIZE))
{
/* All keys fitted into one page */
leaf_page_link->changed= 1;
if (_ma_dispose(info, leaf_page, 0))
goto err;
if (info->s->now_transactional)
{
/* Log changes to parent page */
if (_ma_log_delete(info, anc_page, anc_buff, key_deleted.key_pos,
key_deleted.changed_length, key_deleted.move_length))
goto err;
/*
Log changes to next page. Data for leaf page is in buff
that contains original leaf_buff, parting key and next_buff
*/
if (_ma_log_suffix(info, next_page, buff,
buff_length, new_buff_length))
goto err;
}
}
else
{ /* Page is full */
if (keypos == anc_buff+ info->s->keypage_header + key_reflength)
anc_pos=0; /* First key */
{
/*
Balancing didn't free a page, so we have to split 'buff' into two
pages
- Find key in middle of buffer (buff)
- Pack key at half_buff into anc_buff at position of deleted key
Note that anc_buff may overflow! (is handled by caller)
- Move everything after middlekey to 'leaf_buff'
- Shorten buff at 'endpos'
*/
MARIA_KEY_PARAM anc_key_inserted;
if (first_key)
anc_pos= 0; /* First key */
else if (!_ma_get_last_key(info,keyinfo,anc_buff,anc_pos=anc_key,keypos,
&length))
goto err;
endpos= _ma_find_half_pos(info, nod_flag, keyinfo, buff, leaf_key,
&key_length, &half_pos);
if (!endpos)
if (!(endpos= _ma_find_half_pos(info, nod_flag, keyinfo, buff, leaf_key,
&key_length, &half_pos)))
goto err;
/* Correct new keypointer to leaf_page */
_ma_kpointer(info,leaf_key+key_length,leaf_page);
/* Save key in anc_buff */
DBUG_DUMP("anc_buff",anc_buff,anc_length);
DBUG_DUMP("key_to_anc",leaf_key,key_length);
temp_pos=anc_buff+anc_length;
anc_end_pos= anc_buff + anc_length;
t_length=(*keyinfo->pack_key)(keyinfo,key_reflength,
keypos == temp_pos ? (uchar*) 0
keypos == anc_end_pos ? (uchar*) 0
: keypos,
anc_pos, anc_pos,
leaf_key,&s_temp);
if (t_length > 0)
bmove_upp(temp_pos+t_length, temp_pos, (uint) (temp_pos-keypos));
leaf_key, &anc_key_inserted);
if (t_length >= 0)
bmove_upp(anc_end_pos+t_length, anc_end_pos,
(uint) (anc_end_pos-keypos));
else
bmove(keypos,keypos-t_length,(uint) (temp_pos-keypos)+t_length);
(*keyinfo->store_key)(keyinfo,keypos,&s_temp);
bmove(keypos,keypos-t_length,(uint) (anc_end_pos-keypos)+t_length);
(*keyinfo->store_key)(keyinfo,keypos, &anc_key_inserted);
anc_length+= t_length;
_ma_store_page_used(info, anc_buff, anc_length, key_reflength);
@ -879,20 +1074,63 @@ static int underflow(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo,
goto err;
DBUG_DUMP("key_to_leaf",leaf_key,length);
t_length=(*keyinfo->pack_key)(keyinfo,nod_flag, (uchar*) 0,
(uchar*) 0, (uchar*) 0, leaf_key, &s_temp);
length=(uint) ((buff+buff_length)-half_pos);
(uchar*) 0, (uchar*) 0, leaf_key,
&key_inserted);
/* t_length will always be > 0 for a new page !*/
length= (uint) ((buff + new_buff_length) - half_pos);
DBUG_PRINT("info",("t_length: %d length: %d",t_length,(int) length));
bmove(leaf_buff+p_length+t_length,half_pos,
(size_t) length);
(*keyinfo->store_key)(keyinfo,leaf_buff+p_length,&s_temp);
_ma_store_page_used(info, leaf_buff, length + t_length + p_length,
nod_flag);
bmove(leaf_buff+p_length+t_length, half_pos, (size_t) length);
(*keyinfo->store_key)(keyinfo,leaf_buff+p_length, &key_inserted);
new_leaf_length= length + t_length + p_length;
_ma_store_page_used(info, leaf_buff, new_leaf_length, nod_flag);
new_buff_length= (uint) (endpos - buff);
_ma_store_page_used(info, buff, new_buff_length, nod_flag);
if (info->s->now_transactional)
{
/*
Log changes to parent page
This has one key deleted from it and one key inserted to it at
keypos
ma_log_add() ensures that we don't log changes that is outside of
key block size, as the REDO code can't handle that
*/
if (_ma_log_add(info, anc_page, anc_buff, anc_length,
keypos,
anc_key_inserted.move_length +
min(anc_key_inserted.changed_length -
anc_key_inserted.move_length,
key_deleted.changed_length),
anc_key_inserted.move_length -
key_deleted.move_length, 1))
goto err;
/*
Log changes to leaf page.
This contains original data with new data added first
*/
DBUG_ASSERT(leaf_length <= new_leaf_length);
if (_ma_log_prefix(info, leaf_page, leaf_buff,
new_leaf_length - unchanged_leaf_length,
(int) (new_leaf_length - leaf_length)))
goto err;
/*
Log changes to next page
This contains original data with some suffix data deleted
*/
DBUG_ASSERT(new_buff_length <= buff_length);
if (_ma_log_suffix(info, next_page, buff,
buff_length, new_buff_length))
goto err;
}
leaf_page_link->changed= 1;
if (_ma_write_keypage(info, keyinfo, leaf_page,
PAGECACHE_LOCK_LEFT_WRITELOCKED, DFLT_INIT_HITS,
leaf_buff))
goto err;
_ma_store_page_used(info, buff, (uint) (endpos - buff),nod_flag);
}
next_page_link->changed= 1;
if (_ma_write_keypage(info, keyinfo, next_page,
@ -906,7 +1144,7 @@ err:
} /* underflow */
/*
/**
@brief Remove a key from page
@fn remove_key()
@ -1067,13 +1305,22 @@ static uint remove_key(MARIA_KEYDEF *keyinfo, uint nod_flag,
Logging of redos
****************************************************************************/
/*
@brief log entry where some parts are deleted and some things are changed
/**
@brief log entry where some parts are deleted and some things are changed
@fn _ma_log_delete()
@param info Maria handler
@param page Pageaddress for changed page
@param buff Page buffer
@param key_pos Start of change area
@param changed_length How many bytes where changed at key_pos
@param move_length How many bytes where deleted at key_pos
*/
static my_bool _ma_log_delete(MARIA_HA *info, my_off_t page, uchar *buff,
uchar *key_pos, uint move_length,
uint change_length)
uchar *key_pos, uint changed_length,
uint move_length)
{
LSN lsn;
uchar log_data[FILEID_STORE_SIZE + PAGE_STORE_SIZE + 9], *log_pos;
@ -1081,6 +1328,8 @@ static my_bool _ma_log_delete(MARIA_HA *info, my_off_t page, uchar *buff,
uint translog_parts;
uint offset= (uint) (key_pos - buff);
DBUG_ENTER("_ma_log_delete");
DBUG_PRINT("enter", ("page: %lu changed_length: %u move_length: %d",
(ulong) page, changed_length, move_length));
DBUG_ASSERT(info->s->now_transactional && move_length);
/* Store address of new root page */
@ -1093,14 +1342,14 @@ static my_bool _ma_log_delete(MARIA_HA *info, my_off_t page, uchar *buff,
int2store(log_pos+4, -(int) move_length);
log_pos+= 6;
translog_parts= 1;
if (change_length)
if (changed_length)
{
log_pos[0]= KEY_OP_CHANGE;
int2store(log_pos+1, change_length);
int2store(log_pos+1, changed_length);
log_pos+= 3;
translog_parts= 2;
log_array[TRANSLOG_INTERNAL_PARTS + 1].str= buff + offset;
log_array[TRANSLOG_INTERNAL_PARTS + 1].length= change_length;
log_array[TRANSLOG_INTERNAL_PARTS + 1].length= changed_length;
}
log_array[TRANSLOG_INTERNAL_PARTS + 0].str= (char*) log_data;
@ -1109,7 +1358,7 @@ static my_bool _ma_log_delete(MARIA_HA *info, my_off_t page, uchar *buff,
if (translog_write_record(&lsn, LOGREC_REDO_INDEX,
info->trn, info,
log_array[TRANSLOG_INTERNAL_PARTS + 0].length +
change_length,
changed_length,
TRANSLOG_INTERNAL_PARTS + translog_parts,
log_array, log_data, NULL))
DBUG_RETURN(1);

View file

@ -24,7 +24,7 @@
Some helper functions used both by key page loggin and block page loggin
****************************************************************************/
/*
/**
@brief Unpin all pinned pages
@fn _ma_unpin_all_pages()
@ -50,7 +50,7 @@ void _ma_unpin_all_pages(MARIA_HA *info, LSN undo_lsn)
DBUG_PRINT("info", ("undo_lsn: %lu", (ulong) undo_lsn));
if (!info->s->now_transactional)
undo_lsn= LSN_IMPOSSIBLE; /* don't try to set a LSN on pages */
DBUG_ASSERT(undo_lsn == LSN_IMPOSSIBLE || maria_in_recovery);
while (pinned_page-- != page_link)
{
@ -70,7 +70,7 @@ void _ma_unpin_all_pages(MARIA_HA *info, LSN undo_lsn)
my_bool _ma_write_clr(MARIA_HA *info, LSN undo_lsn,
enum translog_record_type undo_type,
my_bool store_checksum, ha_checksum checksum,
LSN *res_lsn)
LSN *res_lsn, void *extra_msg)
{
uchar log_data[LSN_STORE_SIZE + FILEID_STORE_SIZE +
PAGE_STORE_SIZE + DIRPOS_STORE_SIZE +
@ -86,11 +86,16 @@ my_bool _ma_write_clr(MARIA_HA *info, LSN undo_lsn,
undo_type);
log_array[TRANSLOG_INTERNAL_PARTS + 0].length=
sizeof(log_data) - HA_CHECKSUM_STORE_SIZE;
/* Extra_msg is handled in write_hook_for_clr_end() */
msg.undone_record_type= undo_type;
msg.previous_undo_lsn= undo_lsn;
msg.extra_msg= extra_msg;
msg.checksum_delta= 0;
if (store_checksum)
{
msg.checksum_delta= checksum;
ha_checksum_store(log_data + LSN_STORE_SIZE + FILEID_STORE_SIZE +
CLR_TYPE_STORE_SIZE, checksum);
log_array[TRANSLOG_INTERNAL_PARTS + 0].length= sizeof(log_data);
@ -106,6 +111,293 @@ my_bool _ma_write_clr(MARIA_HA *info, LSN undo_lsn,
}
/**
@brief Sets transaction's undo_lsn, first_undo_lsn if needed
@return Operation status, always 0 (success)
*/
my_bool write_hook_for_clr_end(enum translog_record_type type
__attribute__ ((unused)),
TRN *trn, MARIA_HA *tbl_info
__attribute__ ((unused)),
LSN *lsn __attribute__ ((unused)),
void *hook_arg)
{
MARIA_SHARE *share= tbl_info->s;
struct st_msg_to_write_hook_for_clr_end *msg=
(struct st_msg_to_write_hook_for_clr_end *)hook_arg;
DBUG_ASSERT(trn->trid != 0);
trn->undo_lsn= msg->previous_undo_lsn;
switch (msg->undone_record_type) {
case LOGREC_UNDO_ROW_DELETE:
share->state.state.records++;
share->state.state.checksum+= msg->checksum_delta;
break;
case LOGREC_UNDO_ROW_INSERT:
share->state.state.records--;
share->state.state.checksum+= msg->checksum_delta;
break;
case LOGREC_UNDO_ROW_UPDATE:
share->state.state.checksum+= msg->checksum_delta;
break;
case LOGREC_UNDO_KEY_INSERT:
case LOGREC_UNDO_KEY_DELETE:
{
/* Update key root */
struct st_msg_to_write_hook_for_undo_key *extra_msg=
(struct st_msg_to_write_hook_for_undo_key *) msg->extra_msg;
*extra_msg->root= extra_msg->value;
break;
}
default:
DBUG_ASSERT(0);
}
if (trn->undo_lsn == LSN_IMPOSSIBLE) /* has fully rolled back */
trn->first_undo_lsn= LSN_WITH_FLAGS_TO_FLAGS(trn->first_undo_lsn);
return 0;
}
/**
@brief write hook for undo key insert
*/
my_bool write_hook_for_undo_key(enum translog_record_type type,
TRN *trn, MARIA_HA *tbl_info,
LSN *lsn, void *hook_arg)
{
struct st_msg_to_write_hook_for_undo_key *msg=
(struct st_msg_to_write_hook_for_undo_key *) hook_arg;
*msg->root= msg->value;
_ma_fast_unlock_key_del(tbl_info);
return write_hook_for_undo(type, trn, tbl_info, lsn, 0);
}
/*****************************************************************************
Functions for logging of key page changes
*****************************************************************************/
/**
@brief
Write log entry for page that has got data added or deleted at start of page
*/
my_bool _ma_log_prefix(MARIA_HA *info, my_off_t page,
uchar *buff, uint changed_length,
int move_length)
{
uint translog_parts;
LSN lsn;
uchar log_data[FILEID_STORE_SIZE + PAGE_STORE_SIZE + 7], *log_pos;
LEX_STRING log_array[TRANSLOG_INTERNAL_PARTS + 2];
DBUG_ENTER("_ma_log_prefix");
DBUG_PRINT("enter", ("page: %lu changed_length: %u move_length: %d",
(ulong) page, changed_length, move_length));
page/= info->s->block_size;
log_pos= log_data + FILEID_STORE_SIZE;
page_store(log_pos, page);
log_pos+= PAGE_STORE_SIZE;
if (move_length < 0)
{
/* Delete prefix */
log_pos[0]= KEY_OP_DEL_PREFIX;
int2store(log_pos+1, -move_length);
log_pos+= 3;
translog_parts= 1;
if (changed_length)
{
/*
We don't need a KEY_OP_OFFSET as KEY_OP_DEL_PREFIX has an implicit
offset
*/
log_pos[0]= KEY_OP_CHANGE;
int2store(log_pos+1, changed_length);
log_pos+= 3;
}
}
else
{
/* Add prefix */
DBUG_ASSERT(changed_length >0 && (int) changed_length >= move_length);
log_pos[0]= KEY_OP_ADD_PREFIX;
int2store(log_pos+1, move_length);
int2store(log_pos+3, changed_length);
log_pos+= 5;
}
log_array[TRANSLOG_INTERNAL_PARTS + 0].str= log_data;
log_array[TRANSLOG_INTERNAL_PARTS + 0].length= (uint) (log_pos -
log_data);
if (changed_length)
{
log_array[TRANSLOG_INTERNAL_PARTS + 1].str= ((char*) buff +
info->s->keypage_header);
log_array[TRANSLOG_INTERNAL_PARTS + 1].length= changed_length;
translog_parts= 2;
}
DBUG_RETURN(translog_write_record(&lsn, LOGREC_REDO_INDEX,
info->trn, info,
log_array[TRANSLOG_INTERNAL_PARTS +
0].length + changed_length,
TRANSLOG_INTERNAL_PARTS + translog_parts,
log_array, log_data, NULL));
}
/**
@brief
Write log entry for page that has got data added or deleted at end of page
*/
my_bool _ma_log_suffix(MARIA_HA *info, my_off_t page,
uchar *buff, uint org_length, uint new_length)
{
LSN lsn;
LEX_STRING log_array[TRANSLOG_INTERNAL_PARTS + 2];
uchar log_data[FILEID_STORE_SIZE + PAGE_STORE_SIZE + 10], *log_pos;
int diff;
uint translog_parts, extra_length;
DBUG_ENTER("_ma_log_suffix");
DBUG_PRINT("enter", ("page: %lu org_length: %u new_length: %u",
(ulong) page, org_length, new_length));
page/= info->s->block_size;
log_pos= log_data + FILEID_STORE_SIZE;
page_store(log_pos, page);
log_pos+= PAGE_STORE_SIZE;
if ((diff= (int) (new_length - org_length)) < 0)
{
log_pos[0]= KEY_OP_DEL_SUFFIX;
int2store(log_pos+1, -diff);
log_pos+= 3;
translog_parts= 1;
extra_length= 0;
}
else
{
log_pos[0]= KEY_OP_ADD_SUFFIX;
int2store(log_pos+1, diff);
log_pos+= 3;
log_array[TRANSLOG_INTERNAL_PARTS + 1].str= (char*) buff + org_length;
log_array[TRANSLOG_INTERNAL_PARTS + 1].length= (uint) diff;
translog_parts= 2;
extra_length= (uint) diff;
}
log_array[TRANSLOG_INTERNAL_PARTS + 0].str= log_data;
log_array[TRANSLOG_INTERNAL_PARTS + 0].length= (uint) (log_pos -
log_data);
DBUG_RETURN(translog_write_record(&lsn, LOGREC_REDO_INDEX,
info->trn, info,
log_array[TRANSLOG_INTERNAL_PARTS +
0].length + extra_length,
TRANSLOG_INTERNAL_PARTS + translog_parts,
log_array, log_data, NULL));
}
/**
@brief Log that a key was added to the page
@note
If handle_overflow is set, then we have to protect against
logging changes that is outside of the page.
This may happen during underflow() handling where the buffer
in memory temporary contains more data than block_size
*/
my_bool _ma_log_add(MARIA_HA *info, my_off_t page, uchar *buff,
uint buff_length, uchar *key_pos,
uint changed_length, int move_length,
my_bool handle_overflow __attribute__ ((unused)))
{
LSN lsn;
uchar log_data[FILEID_STORE_SIZE + PAGE_STORE_SIZE + 3+ 3 + 3 + 3], *log_pos;
LEX_STRING log_array[TRANSLOG_INTERNAL_PARTS + 2];
uint offset= (uint) (key_pos - buff);
uint page_length= info->s->block_size - KEYPAGE_CHECKSUM_SIZE;
DBUG_ENTER("_ma_log_add");
DBUG_PRINT("enter", ("page: %lu page_length: %u changed_length: %u "
"move_length: %d",
(ulong) page, buff_length, changed_length,
move_length));
DBUG_ASSERT(info->s->now_transactional);
/*
Write REDO entry that contains the logical operations we need
to do the page
*/
log_pos= log_data + FILEID_STORE_SIZE;
page/= info->s->block_size;
page_store(log_pos, page);
log_pos+= PAGE_STORE_SIZE;
if (buff_length + move_length > page_length)
{
/*
Overflow. Cut either key or data from page end so that key fits
The code that splits the too big page will ignore logging any
data over page_length
*/
DBUG_ASSERT(handle_overflow);
if (offset + changed_length > page_length)
{
changed_length= page_length - offset;
move_length= 0;
}
else
{
uint diff= buff_length + move_length - page_length;
log_pos[0]= KEY_OP_DEL_SUFFIX;
int2store(log_pos+1, diff);
log_pos+= 3;
buff_length= page_length - move_length;
}
}
if (offset == buff_length)
log_pos[0]= KEY_OP_ADD_SUFFIX;
else
{
log_pos[0]= KEY_OP_OFFSET;
int2store(log_pos+1, offset);
log_pos+= 3;
if (move_length)
{
log_pos[0]= KEY_OP_SHIFT;
int2store(log_pos+1, move_length);
log_pos+= 3;
}
log_pos[0]= KEY_OP_CHANGE;
}
int2store(log_pos+1, changed_length);
log_pos+= 3;
log_array[TRANSLOG_INTERNAL_PARTS + 0].str= log_data;
log_array[TRANSLOG_INTERNAL_PARTS + 0].length= (uint) (log_pos -
log_data);
log_array[TRANSLOG_INTERNAL_PARTS + 1].str= key_pos;
log_array[TRANSLOG_INTERNAL_PARTS + 1].length= changed_length;
if (translog_write_record(&lsn, LOGREC_REDO_INDEX,
info->trn, info,
log_array[TRANSLOG_INTERNAL_PARTS +
0].length + changed_length,
TRANSLOG_INTERNAL_PARTS + 2, log_array,
log_data, NULL))
DBUG_RETURN(-1);
DBUG_RETURN(0);
}
/****************************************************************************
Redo of key pages
****************************************************************************/
@ -136,6 +428,8 @@ uint _ma_apply_redo_index_new_page(MARIA_HA *info, LSN lsn,
uint result;
MARIA_SHARE *share= info->s;
DBUG_ENTER("_ma_apply_redo_index_new_page");
DBUG_PRINT("enter", ("root_page: %lu free_page: %lu",
(ulong) root_page, (ulong) free_page));
/* Set header to point at key data */
@ -156,7 +450,8 @@ uint _ma_apply_redo_index_new_page(MARIA_HA *info, LSN lsn,
file_size= (my_off_t) (root_page + 1) * share->block_size;
/* If root page */
if (page_type_flag)
if (page_type_flag &&
cmp_translog_addr(lsn, share->state.is_of_horizon) >= 0)
share->state.key_root[key_nr]= file_size - share->block_size;
if (file_size > info->state->key_file_length)
@ -164,8 +459,8 @@ uint _ma_apply_redo_index_new_page(MARIA_HA *info, LSN lsn,
info->state->key_file_length= file_size;
buff= info->keyread_buff;
info->keyread_buff_used= 1;
unlock_method= PAGECACHE_LOCK_LEFT_UNLOCKED;
unpin_method= PAGECACHE_PIN_LEFT_UNPINNED;
unlock_method= PAGECACHE_LOCK_WRITE;
unpin_method= PAGECACHE_PIN;
}
else
{
@ -183,24 +478,32 @@ uint _ma_apply_redo_index_new_page(MARIA_HA *info, LSN lsn,
result= 0;
goto err;
}
unlock_method= PAGECACHE_LOCK_WRITE_UNLOCK;
unpin_method= PAGECACHE_UNPIN;
unlock_method= PAGECACHE_LOCK_LEFT_WRITELOCKED;
unpin_method= PAGECACHE_PIN_LEFT_PINNED;
}
/* Write modified page */
lsn_store(buff, lsn);
memcpy(buff + LSN_STORE_SIZE, header, length);
bzero(buff + LSN_STORE_SIZE + length,
share->block_size - LSN_STORE_SIZE - KEYPAGE_CHECKSUM_SIZE - length);
bfill(buff + share->block_size - KEYPAGE_CHECKSUM_SIZE,
KEYPAGE_CHECKSUM_SIZE, (uchar) 255);
if (pagecache_write(share->pagecache,
result= 0;
if (unlock_method == PAGECACHE_LOCK_WRITE &&
pagecache_write(share->pagecache,
&share->kfile, root_page, 0,
buff, PAGECACHE_PLAIN_PAGE,
unlock_method, unpin_method,
PAGECACHE_WRITE_DELAY, 0))
DBUG_RETURN(my_errno);
DBUG_RETURN(0);
PAGECACHE_WRITE_DELAY, &page_link.link,
LSN_IMPOSSIBLE))
result= 1;
/* Mark page to be unlocked and written at _ma_unpin_all_pages() */
page_link.unlock= PAGECACHE_LOCK_WRITE_UNLOCK;
page_link.changed= 1;
push_dynamic(&info->pinned_pages, (void*) &page_link);
DBUG_RETURN(result);
err:
pagecache_unlock_by_link(share->pagecache, page_link.link,
@ -234,14 +537,16 @@ uint _ma_apply_redo_index_free_page(MARIA_HA *info,
uchar *buff;
int result;
DBUG_ENTER("_ma_apply_redo_index_free_page");
DBUG_PRINT("enter", ("page: %lu free_page: %lu",
(ulong) page, (ulong) free_page));
share->state.changed|= (STATE_CHANGED | STATE_NOT_OPTIMIZED_KEYS |
STATE_NOT_SORTED_PAGES);
old_link= share->state.key_del;
share->state.key_del= ((free_page != IMPOSSIBLE_PAGE_NO) ?
(my_off_t) free_page * share->block_size :
HA_OFFSET_ERROR);
share->state.key_del= (my_off_t) page * share->block_size;
old_link= ((free_page != IMPOSSIBLE_PAGE_NO) ?
(my_off_t) free_page * share->block_size :
HA_OFFSET_ERROR);
if (!(buff= pagecache_read(share->pagecache, &info->s->kfile,
page, 0, 0,
PAGECACHE_PLAIN_PAGE, PAGECACHE_LOCK_WRITE,
@ -256,20 +561,24 @@ uint _ma_apply_redo_index_free_page(MARIA_HA *info,
result= 0;
goto err;
}
/* Write modified page */
lsn_store(buff, lsn);
/* Free page */
bzero(buff + LSN_STORE_SIZE, share->keypage_header - LSN_STORE_SIZE);
_ma_store_keynr(info, buff, (uchar) MARIA_DELETE_KEY_NR);
mi_sizestore(buff + share->keypage_header, old_link);
share->state.changed|= STATE_NOT_SORTED_PAGES;
if (pagecache_write(share->pagecache,
&info->s->kfile, page, 0,
buff, PAGECACHE_PLAIN_PAGE,
PAGECACHE_LOCK_WRITE_UNLOCK,
PAGECACHE_UNPIN,
PAGECACHE_WRITE_DELAY, 0))
DBUG_RETURN(my_errno);
#ifdef IDENTICAL_PAGES_AFTER_RECOVERY
{
bzero(buff + share->keypage_header + 8,
share->block_size - share->keypage_header - 8 -
KEYPAGE_CHECKSUM_SIZE);
}
#endif
/* Mark page to be unlocked and written at _ma_unpin_all_pages() */
page_link.unlock= PAGECACHE_LOCK_WRITE_UNLOCK;
page_link.changed= 1;
push_dynamic(&info->pinned_pages, (void*) &page_link);
DBUG_RETURN(0);
err:
@ -303,7 +612,7 @@ err:
KEY_OP_DEL_PREFIX 2 length Bytes to be deleted at page start
KEY_OP_ADD_SUFFIX 2 length, data Add data to end of page
KEY_OP_DEL_SUFFIX 2 length Reduce page length with this
Sets position to start of page
@return Operation status
@retval 0 OK
@retval 1 Error
@ -322,6 +631,7 @@ uint _ma_apply_redo_index(MARIA_HA *info,
int result;
uint org_page_length;
DBUG_ENTER("_ma_apply_redo_index");
DBUG_PRINT("enter", ("page: %lu", (ulong) page));
/* Set header to point at key data */
header+= PAGE_STORE_SIZE;
@ -344,6 +654,7 @@ uint _ma_apply_redo_index(MARIA_HA *info,
_ma_get_used_and_nod(info, buff, page_length, nod_flag);
keypage_header= share->keypage_header;
org_page_length= page_length;
DBUG_PRINT("info", ("page_length: %u", page_length));
/* Apply modifications to page */
do
@ -358,7 +669,7 @@ uint _ma_apply_redo_index(MARIA_HA *info,
{
int length= sint2korr(header);
header+= 2;
DBUG_ASSERT(page_offset != 0 && page_offset < page_length &&
DBUG_ASSERT(page_offset != 0 && page_offset <= page_length &&
page_length + length < share->block_size);
if (length < 0)
@ -382,14 +693,14 @@ uint _ma_apply_redo_index(MARIA_HA *info,
case KEY_OP_ADD_PREFIX:
{
uint insert_length= uint2korr(header);
uint change_length= uint2korr(header+2);
DBUG_ASSERT(insert_length <= change_length &&
page_length + change_length <= share->block_size);
uint changed_length= uint2korr(header+2);
DBUG_ASSERT(insert_length <= changed_length &&
page_length + changed_length <= share->block_size);
bmove_upp(buff + page_length + insert_length, buff + page_length,
page_length - keypage_header);
memcpy(buff + keypage_header, header + 4 , change_length);
header+= 4 + change_length;
memcpy(buff + keypage_header, header + 4 , changed_length);
header+= 4 + changed_length;
page_length+= insert_length;
break;
}
@ -402,6 +713,8 @@ uint _ma_apply_redo_index(MARIA_HA *info,
bmove(buff + keypage_header, buff + keypage_header +
length, page_length - keypage_header - length);
page_length-= length;
page_offset= keypage_header; /* Prepare for change */
break;
}
case KEY_OP_ADD_SUFFIX:
@ -432,7 +745,6 @@ uint _ma_apply_redo_index(MARIA_HA *info,
DBUG_ASSERT(header == header_end);
/* Write modified page */
lsn_store(buff, lsn);
_ma_store_page_used(info, buff, page_length, nod_flag);
/*
@ -442,12 +754,19 @@ uint _ma_apply_redo_index(MARIA_HA *info,
if (page_length < org_page_length)
bzero(buff + page_length, org_page_length-page_length);
result= 0;
if (pagecache_write(share->pagecache,
&info->s->kfile, page, 0,
buff, PAGECACHE_PLAIN_PAGE,
PAGECACHE_LOCK_WRITE_UNLOCK, PAGECACHE_UNPIN,
PAGECACHE_WRITE_DELAY, 0))
DBUG_RETURN(my_errno);
PAGECACHE_LOCK_LEFT_WRITELOCKED,
PAGECACHE_PIN_LEFT_PINNED,
PAGECACHE_WRITE_DELAY, 0, LSN_IMPOSSIBLE))
result= 1;
/* Mark page to be unlocked and written at _ma_unpin_all_pages() */
page_link.unlock= PAGECACHE_LOCK_WRITE_UNLOCK;
page_link.changed= 1;
push_dynamic(&info->pinned_pages, (void*) &page_link);
DBUG_RETURN(0);
err:
@ -463,7 +782,6 @@ err:
Undo of key block changes
****************************************************************************/
/**
@brief Undo of insert of key (ie, delete the inserted key)
*/
@ -476,6 +794,8 @@ my_bool _ma_apply_undo_key_insert(MARIA_HA *info, LSN undo_lsn,
uint keynr;
uchar key[HA_MAX_KEY_BUFF];
MARIA_SHARE *share= info->s;
my_off_t new_root;
struct st_msg_to_write_hook_for_undo_key msg;
DBUG_ENTER("_ma_apply_undo_key_insert");
share->state.changed|= (STATE_CHANGED | STATE_NOT_OPTIMIZED_KEYS |
@ -484,15 +804,20 @@ my_bool _ma_apply_undo_key_insert(MARIA_HA *info, LSN undo_lsn,
length-= KEY_NR_STORE_SIZE;
/* We have to copy key as _ma_ck_real_delete() may change it */
memcpy(key, header+ KEY_NR_STORE_SIZE, length);
memcpy(key, header + KEY_NR_STORE_SIZE, length);
DBUG_DUMP("key", key, length);
res= _ma_ck_real_delete(info, share->keyinfo+keynr, key, length,
&share->state.key_root[keynr]);
new_root= share->state.key_root[keynr];
res= _ma_ck_real_delete(info, share->keyinfo+keynr, key,
length - info->s->rec_reflength, &new_root);
if (_ma_write_clr(info, undo_lsn, LOGREC_UNDO_KEY_INSERT, 1, 0, &lsn))
msg.root= &share->state.key_root[keynr];
msg.value= new_root;
if (_ma_write_clr(info, undo_lsn, LOGREC_UNDO_KEY_INSERT, 1, 0, &lsn,
(void*) &msg))
res= 1;
_ma_fast_unlock_key_del(info);
_ma_unpin_all_pages_and_finalize_row(info, lsn);
DBUG_RETURN(res);
}
@ -510,6 +835,8 @@ my_bool _ma_apply_undo_key_delete(MARIA_HA *info, LSN undo_lsn,
uint keynr;
uchar key[HA_MAX_KEY_BUFF];
MARIA_SHARE *share= info->s;
my_off_t new_root;
struct st_msg_to_write_hook_for_undo_key msg;
DBUG_ENTER("_ma_apply_undo_key_delete");
share->state.changed|= (STATE_CHANGED | STATE_NOT_OPTIMIZED_KEYS |
@ -517,17 +844,22 @@ my_bool _ma_apply_undo_key_delete(MARIA_HA *info, LSN undo_lsn,
keynr= key_nr_korr(header);
length-= KEY_NR_STORE_SIZE;
/* We have to copy key as _ma_ck_real_delete() may change it */
memcpy(key, header+ KEY_NR_STORE_SIZE, length);
/* We have to copy key as _ma_ck_real_write_btree() may change it */
memcpy(key, header + KEY_NR_STORE_SIZE, length);
DBUG_DUMP("key", key, length);
res= _ma_ck_real_write_btree(info, share->keyinfo+keynr, key, length,
&share->state.key_root[keynr],
new_root= share->state.key_root[keynr];
res= _ma_ck_real_write_btree(info, share->keyinfo+keynr, key,
length - info->s->rec_reflength,
&new_root,
share->keyinfo[keynr].write_comp_flag);
if (_ma_write_clr(info, undo_lsn, LOGREC_UNDO_KEY_DELETE, 1, 0, &lsn))
msg.root= &share->state.key_root[keynr];
msg.value= new_root;
if (_ma_write_clr(info, undo_lsn, LOGREC_UNDO_KEY_DELETE, 1, 0, &lsn,
(void*) &msg))
res= 1;
_ma_fast_unlock_key_del(info);
_ma_unpin_all_pages_and_finalize_row(info, lsn);
DBUG_RETURN(res);
}
@ -537,7 +869,7 @@ my_bool _ma_apply_undo_key_delete(MARIA_HA *info, LSN undo_lsn,
Handle some local variables
****************************************************************************/
/*
/**
@brief lock key_del for other threads usage
@fn _ma_lock_key_del()
@ -573,13 +905,14 @@ my_bool _ma_lock_key_del(MARIA_HA *info, my_bool insert_at_end)
#endif
info->used_key_del= 1;
share->used_key_del= 1;
share->current_key_del= share->state.key_del;
pthread_mutex_unlock(&share->intern_lock);
}
return 0;
}
/*
/**
@brief copy changes to key_del and unlock it
*/

View file

@ -21,14 +21,48 @@
called).
*/
/* Struct for clr_end */
struct st_msg_to_write_hook_for_clr_end
{
LSN previous_undo_lsn;
enum translog_record_type undone_record_type;
ha_checksum checksum_delta;
void *extra_msg;
};
struct st_msg_to_write_hook_for_undo_key
{
my_off_t *root;
my_off_t value;
};
/* Function definitions for some redo functions */
my_bool _ma_write_clr(MARIA_HA *info, LSN undo_lsn,
enum translog_record_type undo_type,
my_bool store_checksum, ha_checksum checksum,
LSN *res_lsn);
LSN *res_lsn, void *extra_msg);
my_bool write_hook_for_clr_end(enum translog_record_type type,
TRN *trn, MARIA_HA *tbl_info, LSN *lsn,
void *hook_arg);
extern my_bool write_hook_for_undo_key(enum translog_record_type type,
TRN *trn, MARIA_HA *tbl_info,
LSN *lsn, void *hook_arg);
void _ma_unpin_all_pages(MARIA_HA *info, LSN undo_lsn);
my_bool _ma_log_prefix(MARIA_HA *info, my_off_t page,
uchar *buff, uint changed_length,
int move_length);
my_bool _ma_log_suffix(MARIA_HA *info, my_off_t page,
uchar *buff, uint org_length,
uint new_length);
my_bool _ma_log_add(MARIA_HA *info, my_off_t page, uchar *buff,
uint buff_length, uchar *key_pos,
uint changed_length, int move_length,
my_bool handle_overflow);
uint _ma_apply_redo_index_new_page(MARIA_HA *info, LSN lsn,
const uchar *header, uint length);
uint _ma_apply_redo_index_free_page(MARIA_HA *info, LSN lsn,

View file

@ -14,8 +14,9 @@
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
#include "maria_def.h"
#include "ma_blockrec.h" /* for some constants and in-write hooks */
#include "trnman.h"
#include "ma_blockrec.h" /* for some constants and in-write hooks */
#include "ma_key_recover.h" /* For some in-write hooks */
/**
@file

View file

@ -117,7 +117,7 @@ static MARIA_HA *maria_clone_internal(MARIA_SHARE *share, int mode,
&info.blobs,sizeof(MARIA_BLOB)*share->base.blobs,
&info.buff,(share->base.max_key_block_length*2+
share->base.max_key_length),
&info.lastkey,share->base.max_key_length*3+1,
&info.lastkey,share->base.max_key_length*2+1,
&info.first_mbr_key, share->base.max_key_length,
&info.maria_rtree_recursion_state,
share->have_rtree ? 1024 : 0,
@ -304,8 +304,8 @@ MARIA_HA *maria_open(const char *name, int mode, uint open_flags)
}
share->mode=open_mode;
errpos= 1;
if (my_read(kfile,share->state.header.file_version, head_length,
MYF(MY_NABP)))
if (my_pread(kfile,share->state.header.file_version, head_length, 0,
MYF(MY_NABP)))
{
my_errno= HA_ERR_NOT_A_TABLE;
goto err;
@ -355,11 +355,8 @@ MARIA_HA *maria_open(const char *name, int mode, uint open_flags)
goto err;
}
end_pos=disk_cache+info_length;
errpos= 2;
VOID(my_seek(kfile,0L,MY_SEEK_SET,MYF(0)));
errpos= 3;
if (my_read(kfile,disk_cache,info_length,MYF(MY_NABP)))
if (my_pread(kfile, disk_cache, info_length, 0L, MYF(MY_NABP)))
{
my_errno=HA_ERR_CRASHED;
goto err;
@ -418,8 +415,10 @@ MARIA_HA *maria_open(const char *name, int mode, uint open_flags)
goto err;
}
/*
If page cache is not initialized, then assume we will create it
after the table is opened!
If page cache is not initialized, then assume we will create the
page_cache after the table is opened!
This is only used by maria_check to allow it to check/repair tables
with different block sizes.
*/
if (share->base.block_size != maria_block_size &&
share_buff.pagecache->inited != 0)
@ -1226,7 +1225,6 @@ static uchar *_ma_state_info_read(uchar *ptr, MARIA_STATE_INFO *state)
@param file file to read from
@param state state which will be filled
@param pRead if true, use my_pread(), otherwise my_read()
*/
uint _ma_state_info_read_dsk(File file, MARIA_STATE_INFO *state)

View file

@ -122,6 +122,7 @@ int _ma_write_keypage(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo,
#ifdef IDENTICAL_PAGES_AFTER_RECOVERY
{
uint length= _ma_get_page_used(info, buff);
DBUG_ASSERT(length <= block_size - KEYPAGE_CHECKSUM_SIZE);
bzero(buff + length, block_size - length);
}
#endif
@ -186,13 +187,14 @@ int _ma_dispose(register MARIA_HA *info, my_off_t pos, my_bool page_not_read)
(void) _ma_lock_key_del(info, 0);
old_link= share->state.key_del;
share->state.key_del= pos;
old_link= share->current_key_del;
share->current_key_del= pos;
page_no= pos / block_size;
bzero(buff, share->keypage_header);
_ma_store_keynr(info, buff, (uchar) MARIA_DELETE_KEY_NR);
mi_sizestore(buff + share->keypage_header, old_link);
share->state.changed|= STATE_NOT_SORTED_PAGES;
if (info->s->now_transactional)
{
LSN lsn;
@ -239,6 +241,14 @@ int _ma_dispose(register MARIA_HA *info, my_off_t pos, my_bool page_not_read)
0, share->keypage_header+8, 0, 0))
result= 1;
#ifdef IDENTICAL_PAGES_AFTER_RECOVERY
{
uchar *page_buff= pagecache_block_link_to_buffer(page_link.link);
bzero(page_buff + share->keypage_header + 8,
block_size - share->keypage_header - 8 - KEYPAGE_CHECKSUM_SIZE);
}
#endif
if (page_not_read)
{
/* It was not locked before, we have to unlock it when we unpin pages */
@ -295,7 +305,7 @@ my_off_t _ma_new(register MARIA_HA *info, int level,
TODO: replace PAGECACHE_PLAIN_PAGE with PAGECACHE_LSN_PAGE when
LSN on the pages will be implemented
*/
pos= info->s->state.key_del; /* Protected */
pos= share->current_key_del; /* Protected */
DBUG_ASSERT(share->pagecache->block_size == block_size);
if (!(buff= pagecache_read(share->pagecache,
&share->kfile, pos / block_size, level,
@ -312,7 +322,7 @@ my_off_t _ma_new(register MARIA_HA *info, int level,
(*page_link)->unlock= PAGECACHE_LOCK_WRITE_UNLOCK;
(*page_link)->write_lock= PAGECACHE_LOCK_WRITE;
(*page_link)->changed= 0;
push_dynamic(&info->pinned_pages, (void*) &page_link);
push_dynamic(&info->pinned_pages, (void*) *page_link);
*page_link= dynamic_element(&info->pinned_pages,
info->pinned_pages.elements-1,
MARIA_PINNED_PAGE *);

View file

@ -405,8 +405,7 @@ static my_bool info_check_lock(PAGECACHE_BLOCK_LINK *block,
(PAGECACHE_LOCK_INFO *) info_find((PAGECACHE_PIN_INFO *) block->lock_list,
thread);
DBUG_ENTER("info_check_lock");
switch(lock)
{
switch(lock) {
case PAGECACHE_LOCK_LEFT_UNLOCKED:
if (pin != PAGECACHE_PIN_LEFT_UNPINNED ||
info)
@ -1199,7 +1198,7 @@ static inline void link_to_changed_list(PAGECACHE *pagecache,
none
NOTES.
The LRU chain is represented by a curcular list of block structures.
The LRU chain is represented by a circular list of block structures.
The list is double-linked of the type (**prev,*next) type.
The LRU chain is divided into two parts - hot and warm.
There are two pointers to access the last blocks of these two
@ -1268,7 +1267,7 @@ static void link_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block,
}
#else /* THREAD */
KEYCACHE_DBUG_ASSERT(! (!hot && pagecache->waiting_for_block.last_thread));
/* Condition not transformed using DeMorgan, to keep the text identical */
/* Condition not transformed using DeMorgan, to keep the text identical */
#endif /* THREAD */
ptr_ins= hot ? &pagecache->used_ins : &pagecache->used_last;
ins= *ptr_ins;
@ -2730,10 +2729,10 @@ void pagecache_unlock_by_link(PAGECACHE *pagecache,
LSN lsn, my_bool was_changed)
{
DBUG_ENTER("pagecache_unlock_by_link");
DBUG_PRINT("enter", ("block: 0x%lx fd: %u page: %lu %s %s",
DBUG_PRINT("enter", ("block: 0x%lx fd: %u page: %lu changed: %d %s %s",
(ulong) block,
(uint) block->hash_link->file.file,
(ulong) block->hash_link->pageno,
(ulong) block->hash_link->pageno, was_changed,
page_cache_page_lock_str[lock],
page_cache_page_pin_str[pin]));
/*
@ -2799,7 +2798,6 @@ void pagecache_unlock_by_link(PAGECACHE *pagecache,
(ulong) block));
}
pagecache_set_block_rec_lsn(block, first_REDO_LSN_for_page);
if (make_lock_and_pin(pagecache, block, lock, pin, 0))
DBUG_ASSERT(0); /* should not happend */
@ -4246,6 +4244,11 @@ static void test_key_cache(PAGECACHE *pagecache __attribute__((unused)),
}
#endif
uchar *pagecache_block_link_to_buffer(PAGECACHE_BLOCK_LINK *block)
{
return block->buffer;
}
#if defined(PAGECACHE_TIMEOUT)
#define KEYCACHE_DUMP_FILE "pagecache_dump.txt"

View file

@ -269,6 +269,7 @@ extern my_bool pagecache_collect_changed_blocks_with_lsn(PAGECACHE *pagecache,
LEX_STRING *str,
LSN *min_lsn);
extern int reset_pagecache_counters(const char *name, PAGECACHE *pagecache);
extern uchar *pagecache_block_link_to_buffer(PAGECACHE_BLOCK_LINK *block);
/* Functions to handle multiple key caches */

View file

@ -1529,7 +1529,8 @@ prototype_redo_exec_hook(UNDO_ROW_DELETE)
}
share->state.state.checksum+= ha_checksum_korr(buff);
}
share->state.changed|= STATE_CHANGED | STATE_NOT_ANALYZED;
share->state.changed|= (STATE_CHANGED | STATE_NOT_ANALYZED |
STATE_NOT_OPTIMIZED_ROWS);
}
tprint(tracef, " rows' count %lu\n", (ulong)share->state.state.records);
_ma_unpin_all_pages(info, rec->lsn);
@ -1569,6 +1570,9 @@ prototype_redo_exec_hook(UNDO_ROW_UPDATE)
prototype_redo_exec_hook(UNDO_KEY_INSERT)
{
MARIA_HA *info;
if (!(info= get_MARIA_HA_from_UNDO_record(rec)))
return 0;
set_undo_lsn_for_active_trans(rec->short_trid, rec->lsn);
_ma_unpin_all_pages(info, rec->lsn);
return 0;
@ -1577,6 +1581,9 @@ prototype_redo_exec_hook(UNDO_KEY_INSERT)
prototype_redo_exec_hook(UNDO_KEY_DELETE)
{
MARIA_HA *info;
if (!(info= get_MARIA_HA_from_UNDO_record(rec)))
return 0;
set_undo_lsn_for_active_trans(rec->short_trid, rec->lsn);
_ma_unpin_all_pages(info, rec->lsn);
return 0;
@ -1595,9 +1602,9 @@ prototype_redo_exec_hook(UNDO_KEY_DELETE_WITH_ROOT)
{
uint key_nr;
my_off_t page;
page= page_korr(rec->header + LSN_STORE_SIZE + FILEID_STORE_SIZE);
key_nr= key_nr_korr(rec->header + LSN_STORE_SIZE + FILEID_STORE_SIZE +
PAGE_STORE_SIZE);
key_nr= key_nr_korr(rec->header + LSN_STORE_SIZE + FILEID_STORE_SIZE);
page= page_korr(rec->header + LSN_STORE_SIZE + FILEID_STORE_SIZE +
KEY_NR_STORE_SIZE);
share->state.key_root[key_nr]= (page == IMPOSSIBLE_PAGE_NO ?
HA_OFFSET_ERROR :
page * share->block_size);
@ -1653,6 +1660,7 @@ prototype_redo_exec_hook(CLR_END)
LSN previous_undo_lsn;
enum translog_record_type undone_record_type;
const LOG_DESC *log_desc;
my_bool row_entry= 0;
if (info == NULL)
return 0;
@ -1668,7 +1676,25 @@ prototype_redo_exec_hook(CLR_END)
if (cmp_translog_addr(rec->lsn, share->state.is_of_horizon) >= 0)
{
tprint(tracef, " state older than record, updating rows' count\n");
if (share->calc_checksum)
switch (undone_record_type) {
case LOGREC_UNDO_ROW_DELETE:
row_entry= 1;
share->state.state.records++;
break;
case LOGREC_UNDO_ROW_INSERT:
share->state.state.records--;
row_entry= 1;
break;
case LOGREC_UNDO_ROW_UPDATE:
row_entry= 1;
break;
case LOGREC_UNDO_KEY_INSERT:
case LOGREC_UNDO_KEY_DELETE:
break;
default:
DBUG_ASSERT(0);
}
if (row_entry && share->calc_checksum)
{
uchar buff[HA_CHECKSUM_STORE_SIZE];
if (translog_read_record(rec->lsn, LSN_STORE_SIZE + FILEID_STORE_SIZE +
@ -1680,21 +1706,10 @@ prototype_redo_exec_hook(CLR_END)
}
share->state.state.checksum+= ha_checksum_korr(buff);
}
switch (undone_record_type) {
case LOGREC_UNDO_ROW_DELETE:
share->state.state.records++;
break;
case LOGREC_UNDO_ROW_INSERT:
share->state.state.records--;
break;
case LOGREC_UNDO_ROW_UPDATE:
break;
default:
DBUG_ASSERT(0);
}
share->state.changed|= STATE_CHANGED | STATE_NOT_ANALYZED;
}
tprint(tracef, " rows' count %lu\n", (ulong)share->state.state.records);
if (row_entry)
tprint(tracef, " rows' count %lu\n", (ulong)share->state.state.records);
_ma_unpin_all_pages(info, rec->lsn);
return 0;
}
@ -2356,19 +2371,22 @@ static MARIA_HA *get_MARIA_HA_from_REDO_record(const
pgcache_page_no_t page;
MARIA_HA *info;
char llbuf[22];
my_bool index_page_redo_entry= 0;
print_redo_phase_progress(rec->lsn);
sid= fileid_korr(rec->header);
page= page_korr(rec->header + FILEID_STORE_SIZE);
switch(rec->type) {
switch (rec->type) {
/* not all REDO records have a page: */
case LOGREC_REDO_INDEX_NEW_PAGE:
case LOGREC_REDO_INDEX:
case LOGREC_REDO_INDEX_FREE_PAGE:
index_page_redo_entry= 1;
/* Fall trough*/
case LOGREC_REDO_INSERT_ROW_HEAD:
case LOGREC_REDO_INSERT_ROW_TAIL:
case LOGREC_REDO_PURGE_ROW_HEAD:
case LOGREC_REDO_PURGE_ROW_TAIL:
case LOGREC_REDO_INDEX_NEW_PAGE:
case LOGREC_REDO_INDEX:
case LOGREC_REDO_INDEX_FREE_PAGE:
llstr(page, llbuf);
tprint(tracef, " For page %s of table of short id %u", llbuf, sid);
break;
@ -2407,12 +2425,9 @@ static MARIA_HA *get_MARIA_HA_from_REDO_record(const
DBUG_ASSERT(info->s->last_version != 0);
if (cmp_translog_addr(rec->lsn, checkpoint_start) < 0)
{
/**
@todo RECOVERY BUG always assuming this is REDO for data file, but it
could soon be index file
*/
uint64 file_and_page_id=
(((uint64)all_tables[sid].org_dfile) << 32) | page;
(((uint64) (index_page_redo_entry ? all_tables[sid].org_kfile :
all_tables[sid].org_dfile)) << 32) | page;
struct st_dirty_page *dirty_page= (struct st_dirty_page *)
hash_search(&all_dirty_pages,
(uchar *)&file_and_page_id, sizeof(file_and_page_id));

View file

@ -1182,7 +1182,10 @@ static my_bool _ma_get_prev_key(MARIA_HA *info, MARIA_KEYDEF *keyinfo,
/*
@brief Get last key from key-page
@brief Get last key from key-page before 'endpos'
@note
endpos may be either end of buffer or start of a key
@return
@retval pointer to where key starts
@ -1506,7 +1509,7 @@ _ma_calc_static_key_length(MARIA_KEYDEF *keyinfo,uint nod_flag,
const uchar *key, MARIA_KEY_PARAM *s_temp)
{
s_temp->key= key;
return (int) (s_temp->totlength=keyinfo->keylength+nod_flag);
return (int) (s_temp->move_length= keyinfo->keylength + nod_flag);
}
/* Variable length key */
@ -1519,26 +1522,28 @@ _ma_calc_var_key_length(MARIA_KEYDEF *keyinfo,uint nod_flag,
const uchar *key, MARIA_KEY_PARAM *s_temp)
{
s_temp->key= key;
return (int) (s_temp->totlength= _ma_keylength(keyinfo,key)+nod_flag);
return (int) (s_temp->move_length= _ma_keylength(keyinfo,key)+nod_flag);
}
/*
length of key with a variable length first segment which is prefix
compressed (maria_chk reports 'packed + stripped')
/**
@brief Calc length needed to store prefixed compressed keys
Keys are compressed the following way:
@info
Variable length first segment which is prefix compressed
(maria_chk reports 'packed + stripped')
If the max length of first key segment <= 127 bytes the prefix is
1 uchar else it's 2 byte
Keys are compressed the following way:
prefix byte(s) The high bit is set if this is a prefix for the prev key
length Packed length if the previous was a prefix byte
[length] data bytes ('length' bytes)
next-key-seg Next key segments
If the max length of first key segment <= 127 bytes the prefix is
1 uchar else it's 2 byte
If the first segment can have NULL:
The length is 0 for NULLS and 1+length for not null columns.
prefix byte(s) The high bit is set if this is a prefix for the prev key
length Packed length if the previous was a prefix byte
[length] data bytes ('length' bytes)
next-key-seg Next key segments
If the first segment can have NULL:
The length is 0 for NULLS and 1+length for not null columns.
*/
int
@ -1589,7 +1594,7 @@ _ma_calc_var_pack_key_length(MARIA_KEYDEF *keyinfo, uint nod_flag,
s_temp->key_length= 0;
s_temp->totlength= key_length-1+diff_flag;
s_temp->next_key_pos= 0; /* No next key */
return (s_temp->totlength);
return (s_temp->move_length= s_temp->totlength);
}
s_temp->store_not_null=1;
key_length--; /* We don't store NULL */
@ -1744,7 +1749,7 @@ _ma_calc_var_pack_key_length(MARIA_KEYDEF *keyinfo, uint nod_flag,
s_temp->n_ref_length=s_temp->n_length= org_key_length;
length+= org_key_length;
}
return (int) length;
return (s_temp->move_length= (int) length);
}
ref_length=n_length;
@ -1757,7 +1762,8 @@ _ma_calc_var_pack_key_length(MARIA_KEYDEF *keyinfo, uint nod_flag,
s_temp->part_of_prev_key= 0;
s_temp->prev_length= ref_length;
s_temp->n_ref_length= s_temp->n_length= n_length+ref_length;
return (int) length+ref_length-next_length_pack;
return s_temp->move_length= ((int) length+ref_length-
next_length_pack);
}
if (ref_length+pack_marker > new_ref_length)
{
@ -1768,7 +1774,7 @@ _ma_calc_var_pack_key_length(MARIA_KEYDEF *keyinfo, uint nod_flag,
s_temp->n_ref_length=s_temp->n_length=n_length + s_temp->prev_length;
s_temp->prev_key+= new_pack_length;
length-= (next_length_pack - get_pack_length(s_temp->n_length));
return (int) length + s_temp->prev_length;
return s_temp->move_length= ((int) length + s_temp->prev_length);
}
}
else
@ -1803,7 +1809,7 @@ _ma_calc_var_pack_key_length(MARIA_KEYDEF *keyinfo, uint nod_flag,
if (!(tmp_length=(uint) (key-start)))
{ /* Key can't be re-packed */
s_temp->next_key_pos=0;
return length;
return (s_temp->move_length= length);
}
ref_length+=tmp_length;
n_length-=tmp_length;
@ -1821,7 +1827,7 @@ _ma_calc_var_pack_key_length(MARIA_KEYDEF *keyinfo, uint nod_flag,
}
}
}
return length;
return (s_temp->move_length= length);
}
@ -1884,8 +1890,9 @@ int _ma_calc_bin_pack_key_length(MARIA_KEYDEF *keyinfo, uint nod_flag,
s_temp->n_ref_length= ref_length;
s_temp->prev_length= next_length-ref_length;
s_temp->prev_key+= ref_length;
return (int) (length+ s_temp->prev_length - next_length_pack +
get_pack_length(ref_length));
return s_temp->move_length= ((int) (length+ s_temp->prev_length -
next_length_pack +
get_pack_length(ref_length)));
}
/* Check how many characters are identical to next key */
key= s_temp->key+next_length;
@ -1893,14 +1900,15 @@ int _ma_calc_bin_pack_key_length(MARIA_KEYDEF *keyinfo, uint nod_flag,
if ((ref_length= (uint) (key - s_temp->key)-1) == next_length)
{
s_temp->next_key_pos=0;
return length; /* can't pack next key */
return (s_temp->move_length= length); /* Can't pack next key */
}
s_temp->prev_length=0;
s_temp->n_ref_length=ref_length;
return (int) (length-(ref_length - next_length) - next_length_pack +
get_pack_length(ref_length));
return s_temp->move_length= (int) (length-(ref_length - next_length) -
next_length_pack +
get_pack_length(ref_length));
}
return (int) length;
return (s_temp->move_length= (int) length);
}
@ -1914,8 +1922,8 @@ void _ma_store_static_key(MARIA_KEYDEF *keyinfo __attribute__((unused)),
register uchar *key_pos,
register MARIA_KEY_PARAM *s_temp)
{
memcpy(key_pos, s_temp->key,(size_t) s_temp->totlength);
s_temp->changed_length= s_temp->totlength;
memcpy(key_pos, s_temp->key,(size_t) s_temp->move_length);
s_temp->changed_length= s_temp->move_length;
}

View file

@ -97,7 +97,7 @@ int main(int argc,char *argv[])
static int run_test(const char *filename)
{
MARIA_HA *file;
int i,j,error,deleted,rec_length,uniques=0;
int i,j= 0,error,deleted,rec_length,uniques=0;
uint offset_to_key;
ha_rows found,row_count;
uchar record[MAX_REC_LENGTH],key[MAX_REC_LENGTH],read_record[MAX_REC_LENGTH];
@ -275,7 +275,7 @@ static int run_test(const char *filename)
{
if (!silent)
printf("- Checking unique constraint\n");
create_record(record,j);
create_record(record,j); /* Check last created row */
if (!maria_write(file,record) || my_errno != HA_ERR_FOUND_DUPP_UNIQUE)
{
printf("unique check failed\n");

View file

@ -880,15 +880,6 @@ int main(int argc, char *argv[])
goto err;
}
opt_delete++;
#if TO_BE_REMOVED
/
/*
179 is ok, 180 causes a difference between runtime and log-applying.
This is now fixed (we zero the last directory entry during
log-applying, just to eliminate this irrelevant difference).
*/
if (opt_delete==180) goto end;
#endif
}
else
found_parts++;

View file

@ -1,6 +1,7 @@
Running tests with dynamic row format
Running tests with static row format
Running tests with block row format
Running tests with block row format and transactions
ma_test2 -s -L -K -R1 -m2000 ; Should give error 135
Error: 135 in write at record: 1099
got error: 135 when using MARIA-database
@ -8,55 +9,6 @@ got error: 135 when using MARIA-database
maria_chk: MARIA file test2
maria_chk: warning: Datafile is almost full, 65516 of 65534 used
MARIA-table 'test2' is usable but should be fixed
real 0m0.808s
user 0m0.584s
sys 0m0.212s
real 0m0.780s
user 0m0.584s
sys 0m0.176s
real 0m0.809s
user 0m0.616s
sys 0m0.180s
real 0m1.356s
user 0m1.140s
sys 0m0.188s
real 0m0.783s
user 0m0.600s
sys 0m0.176s
real 0m1.390s
user 0m1.184s
sys 0m0.152s
real 0m1.875s
user 0m1.632s
sys 0m0.244s
real 0m1.313s
user 0m1.148s
sys 0m0.160s
real 0m1.846s
user 0m1.644s
sys 0m0.188s
real 0m1.875s
user 0m1.632s
sys 0m0.212s
real 0m1.819s
user 0m1.672s
sys 0m0.124s
real 0m2.117s
user 0m1.816s
sys 0m0.292s
real 0m1.871s
user 0m1.636s
sys 0m0.196s
MARIA RECOVERY TESTS
ALL RECOVERY TESTS OK
!!!!!!!! BUT REMEMBER to FIX this BLOB issue !!!!!!!

View file

@ -115,7 +115,7 @@ run_tests()
$maria_path/maria_chk$suffix -sm test2
$maria_path/ma_test2$suffix $silent -m10000 -e4096 -K $row_type
$maria_path/maria_chk$suffix -sm test2
$maria_path/ma_test2$suffix $silent -m10000 -e8192 -K $row_type
$maria_path/ma_test2$suffix $silent -m10000 -e8192 -K $row_type -P
$maria_path/maria_chk$suffix -sm test2
$maria_path/ma_test2$suffix $silent -m10000 -e16384 -E16384 -K -L $row_type
$maria_path/maria_chk$suffix -sm test2
@ -232,16 +232,16 @@ $maria_path/maria_chk$suffix -ssm test2
#
# Some timing tests
#
time $maria_path/ma_test2$suffix $silent
time $maria_path/ma_test2$suffix $silent -S
time $maria_path/ma_test2$suffix $silent -M
time $maria_path/ma_test2$suffix $silent -B
time $maria_path/ma_test2$suffix $silent -L
time $maria_path/ma_test2$suffix $silent -K
time $maria_path/ma_test2$suffix $silent -K -B
time $maria_path/ma_test2$suffix $silent -L -B
time $maria_path/ma_test2$suffix $silent -L -K -B
time $maria_path/ma_test2$suffix $silent -L -K -W -B
time $maria_path/ma_test2$suffix $silent -L -K -W -B -S
time $maria_path/ma_test2$suffix $silent -L -K -W -B -M
time $maria_path/ma_test2$suffix $silent -D -K -W -B -S
#time $maria_path/ma_test2$suffix $silent
#time $maria_path/ma_test2$suffix $silent -S
#time $maria_path/ma_test2$suffix $silent -M
#time $maria_path/ma_test2$suffix $silent -B
#time $maria_path/ma_test2$suffix $silent -L
#time $maria_path/ma_test2$suffix $silent -K
#time $maria_path/ma_test2$suffix $silent -K -B
#time $maria_path/ma_test2$suffix $silent -L -B
#time $maria_path/ma_test2$suffix $silent -L -K -B
#time $maria_path/ma_test2$suffix $silent -L -K -W -B
#time $maria_path/ma_test2$suffix $silent -L -K -W -B -S
#time $maria_path/ma_test2$suffix $silent -L -K -W -B -M
#time $maria_path/ma_test2$suffix $silent -D -K -W -B -S

View file

@ -87,9 +87,6 @@ apply_log()
(
# this message is to remember about the problem with -b (see @todo below)
echo "!!!!!!!! REMEMBER to FIX this BLOB issue !!!!!!!"
echo "Testing the REDO PHASE ALONE"
# runs a program inserting/deleting rows, then moves the resulting table
# elsewhere; applies the log and checks that the data file is
@ -111,10 +108,12 @@ do
mv $table.MAI $tmp/$table-good.MAI
apply_log "shouldnotchangelog"
cmp $table.MAD $tmp/$table-good.MAD
cmp $table.MAI $tmp/$table-good.MAI
check_table_is_same
echo "testing idempotency"
apply_log "shouldnotchangelog"
cmp $table.MAD $tmp/$table-good.MAD
cmp $table.MAI $tmp/$table-good.MAI
check_table_is_same
shift
done
@ -157,6 +156,12 @@ do
echo "TEST WITH $prog $abort_run_args$test_undo (additional aborted work)"
$maria_path/$prog $abort_run_args$test_undo
cp $table.MAD $tmp/$table.MAD.before_undo
# We have to copy and restore logs, as running maria_read_log will
# change the maria_control_file
rm -f $tmp/maria_log.* $tmp/maria_log_control
cp $maria_path/maria_log* $tmp
if [ $test_undo -lt 3 ]
then
apply_log "shouldchangelog" # should undo aborted work
@ -179,19 +184,9 @@ do
check_table_is_same
echo "testing applying of CLRs to recreate table"
rm $table.MA?
apply_log "shouldnotchangelog"
# the cmp below fails with ma_test1+blobs! @todo RECOVERY BUG why?
# It is probably serious; REDOs shouldn't place rows in different
# positions from what the run-time code did. Indeed it may lead to
# more or less free space...
# Execution of UNDO re-inserted rows at different positions than
# originally. This generated REDOs which do not insert at the same
# positions as the execution of UNDOs, but at the same positions
# as before the row was originally deleted.
if [ "$blobs" == "" ]
then
cmp $table.MAD $tmp/$table.MAD.after_undo
fi
cp $tmp/maria_log* $maria_path
apply_log "dontknow"
cmp $table.MAD $tmp/$table.MAD.after_undo
check_table_is_same
shift 3
done
@ -213,5 +208,3 @@ if [ "$diff_failed" == "1" ]
exit 1
fi
echo "ALL RECOVERY TESTS OK"
# this message is to remember about the problem with -b (see @todo above)
echo "!!!!!!!! BUT REMEMBER to FIX this BLOB issue !!!!!!!"

File diff suppressed because it is too large Load diff

View file

@ -47,9 +47,6 @@ static int _ma_ck_write_btree_with_log(MARIA_HA *info, MARIA_KEYDEF *keyinfo,
my_off_t *root, uint comp_flag);
static my_bool _ma_log_new(MARIA_HA *info, my_off_t page, uchar *buff,
uint page_length, uint key_nr, my_bool root_page);
static my_bool _ma_log_add(MARIA_HA *info, my_off_t page, uchar *buff,
uchar *end_buff, uchar *key_pos,
uint changed_length, int move_length);
static my_bool _ma_log_change(MARIA_HA *info, my_off_t page, uchar *buff,
uchar *key_pos, uint length);
static my_bool _ma_log_split(MARIA_HA *info, my_off_t page, uchar *buff,
@ -58,7 +55,7 @@ static my_bool _ma_log_split(MARIA_HA *info, my_off_t page, uchar *buff,
uint key_length, int move_length,
enum en_key_op prefix_or_suffix,
uchar *data, uint data_length,
uint change_length);
uint changed_length);
static my_bool _ma_log_del_prefix(MARIA_HA *info, my_off_t page, uchar *buff,
uint org_length, uint new_length,
uchar *key_pos, uint key_length,
@ -70,12 +67,6 @@ static my_bool _ma_log_key_middle(MARIA_HA *info, my_off_t page, uchar *buff,
uint data_deleted_last,
uchar *key_pos,
uint key_length, int move_length);
static my_bool _ma_log_prefix(MARIA_HA *info, my_off_t page,
uchar *buff, uint changed_length,
int move_length);
static my_bool _ma_log_suffix(MARIA_HA *info, my_off_t page,
uchar *buff, uint org_length,
uint new_length);
/*
@brief Default handler for returing position to new row
@ -388,10 +379,17 @@ static int _ma_ck_write_btree_with_log(MARIA_HA *info, MARIA_KEYDEF *keyinfo,
LSN lsn= LSN_IMPOSSIBLE;
int error;
my_off_t new_root= *root;
uchar key_buff[HA_MAX_KEY_BUFF];
#ifdef NOT_YET
DBUG_ENTER("_ma_ck_write_btree_with_log");
#endif
if (info->s->now_transactional)
{
/* Save original value as the key may change */
memcpy(key_buff, key, key_length + info->s->rec_reflength);
}
error= _ma_ck_real_write_btree(info, keyinfo, key, key_length, &new_root,
comp_flag);
if (!error && info->s->now_transactional)
@ -404,9 +402,10 @@ static int _ma_ck_write_btree_with_log(MARIA_HA *info, MARIA_KEYDEF *keyinfo,
lsn_store(log_data, info->trn->undo_lsn);
key_nr_store(log_data + LSN_STORE_SIZE + FILEID_STORE_SIZE,
keyinfo->key_nr);
key_length+= info->s->rec_reflength;
log_array[TRANSLOG_INTERNAL_PARTS + 0].str= (char*) log_data;
log_array[TRANSLOG_INTERNAL_PARTS + 0].length= sizeof(log_data);
log_array[TRANSLOG_INTERNAL_PARTS + 1].str= (char*) key;
log_array[TRANSLOG_INTERNAL_PARTS + 1].str= (char*) key_buff;
log_array[TRANSLOG_INTERNAL_PARTS + 1].length= key_length;
msg.root= root;
@ -460,23 +459,6 @@ int _ma_ck_real_write_btree(MARIA_HA *info, MARIA_KEYDEF *keyinfo,
} /* _ma_ck_real_write_btree */
/*
@brief write hook for undo key insert
*/
my_bool write_hook_for_undo_key(enum translog_record_type type,
TRN *trn, MARIA_HA *tbl_info,
LSN *lsn, void *hook_arg)
{
struct st_msg_to_write_hook_for_undo_key *msg=
(struct st_msg_to_write_hook_for_undo_key *) hook_arg;
*msg->root= msg->value;
_ma_fast_unlock_key_del(tbl_info);
return write_hook_for_undo(type, trn, tbl_info, lsn, 0);
}
/**
@brief Make a new root with key as only pointer
@ -675,6 +657,9 @@ err:
Insert new key at right of key_pos.
Note that caller must save anc_buff
This function writes log records for all changed pages
(Including anc_buff and father page)
RETURN
< 0 Error.
0 OK
@ -689,7 +674,7 @@ int _ma_insert(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo,
my_bool insert_last)
{
uint a_length, nod_flag, org_anc_length;
int t_length, res;
int t_length;
uchar *endpos, *prev_key;
MARIA_KEY_PARAM s_temp;
DBUG_ENTER("_ma_insert");
@ -804,8 +789,8 @@ int _ma_insert(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo,
else
{
if (info->s->now_transactional &&
_ma_log_add(info, anc_page, anc_buff, endpos, key_pos,
s_temp.changed_length, t_length))
_ma_log_add(info, anc_page, anc_buff, (uint) (endpos - anc_buff),
key_pos, s_temp.changed_length, t_length, 0))
DBUG_RETURN(-1);
}
DBUG_RETURN(0); /* There is room on page */
@ -817,44 +802,51 @@ int _ma_insert(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo,
father_buff && !insert_last)
{
s_temp.key_pos= key_pos;
s_temp.move_length= t_length;
father_page_link->changed= 1;
DBUG_RETURN(_ma_balance_page(info, keyinfo, key, anc_buff, anc_page,
father_page, father_buff, father_key_pos,
&s_temp));
}
res= _ma_split_page(info,keyinfo,key,anc_buff,key_buff, insert_last);
if (res < 0)
DBUG_RETURN(res); /* Error */
if (info->s->now_transactional)
{
if (_ma_log_split(info, anc_page, anc_buff, org_anc_length,
_ma_get_page_used(info, anc_buff),
key_pos,
s_temp.changed_length,
t_length, KEY_OP_NONE, (uchar*) 0, 0, 0))
res= -1;
}
DBUG_RETURN(res);
DBUG_RETURN(_ma_split_page(info, keyinfo, key, anc_page,
anc_buff, org_anc_length,
key_pos, s_temp.changed_length, t_length,
key_buff, insert_last));
} /* _ma_insert */
/**
@brief split a full page in two and assign emerging item to key
RETURN
@retval 0 ok
@fn _ma_split_page()
info Maria handler
keyinfo Key handler
key Buffer for middle key
split_page Address on disk for split_buff
split_buff Page buffer for page that should be split
org_split_length Original length of split_buff before key was inserted
inserted_key_pos Address in buffer where key was inserted
changed_length Number of bytes changed at 'inserted_key_pos'
move_length Number of bytes buffer was moved when key was inserted
key_buff Key buffer to use for temporary storage of key
insert_last_key If we are insert key on rightmost key page
@note
split_buff is not stored on disk (caller has to do this)
@return
@retval 2 ok (Middle key up from _ma_insert())
@retval -1 error
*/
int _ma_split_page(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo,
uchar *key, uchar *buff, uchar *key_buff,
my_bool insert_last_key)
uchar *key, my_off_t split_page, uchar *split_buff,
uint org_split_length,
uchar *inserted_key_pos, uint changed_length,
int move_length,
uchar *key_buff, my_bool insert_last_key)
{
uint length,a_length,key_ref_length,t_length,nod_flag,key_length;
uint page_length;
uint page_length, split_length;
uchar *key_pos,*pos, *after_key, *new_buff;
my_off_t new_pos;
MARIA_KEY_PARAM s_temp;
@ -863,25 +855,26 @@ int _ma_split_page(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo,
DBUG_ENTER("maria_split_page");
LINT_INIT(after_key);
DBUG_DUMP("buff", buff, _ma_get_page_used(info, buff));
DBUG_DUMP("buff", split_buff, _ma_get_page_used(info, split_buff));
info->page_changed=1; /* Info->buff is used */
info->keyread_buff_used=1;
new_buff= info->buff;
nod_flag=_ma_test_if_nod(info, buff);
nod_flag=_ma_test_if_nod(info, split_buff);
key_ref_length= info->s->keypage_header + nod_flag;
if (insert_last_key)
key_pos= _ma_find_last_pos(info, keyinfo, buff, key_buff, &key_length,
key_pos= _ma_find_last_pos(info, keyinfo, split_buff,
key_buff, &key_length,
&after_key);
else
key_pos= _ma_find_half_pos(info, nod_flag, keyinfo, buff, key_buff,
key_pos= _ma_find_half_pos(info, nod_flag, keyinfo, split_buff, key_buff,
&key_length, &after_key);
if (!key_pos)
DBUG_RETURN(-1);
length=(uint) (key_pos-buff);
a_length= _ma_get_page_used(info, buff);
_ma_store_page_used(info, buff, length, nod_flag);
split_length= (uint) (key_pos - split_buff);
a_length= _ma_get_page_used(info, split_buff);
_ma_store_page_used(info, split_buff, split_length, nod_flag);
key_pos=after_key;
if (nod_flag)
@ -898,14 +891,14 @@ int _ma_split_page(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo,
DBUG_RETURN(-1);
_ma_kpointer(info, _ma_move_key(keyinfo,key,key_buff),new_pos);
/* Store new page */
/* Store new page */
if (!(*keyinfo->get_key)(keyinfo,nod_flag,&key_pos,key_buff))
DBUG_RETURN(-1);
t_length=(*keyinfo->pack_key)(keyinfo,nod_flag,(uchar *) 0,
(uchar*) 0, (uchar*) 0,
key_buff, &s_temp);
length=(uint) ((buff+a_length)-key_pos);
length=(uint) ((split_buff + a_length) - key_pos);
memcpy((uchar*) new_buff+key_ref_length+t_length,(uchar*) key_pos,
(size_t) length);
(*keyinfo->store_key)(keyinfo,new_buff+key_ref_length,&s_temp);
@ -916,8 +909,8 @@ int _ma_split_page(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo,
/* Copy key number */
new_buff[info->s->keypage_header - KEYPAGE_USED_SIZE - KEYPAGE_KEYID_SIZE -
KEYPAGE_FLAG_SIZE]=
buff[info->s->keypage_header - KEYPAGE_USED_SIZE - KEYPAGE_KEYID_SIZE -
KEYPAGE_FLAG_SIZE];
split_buff[info->s->keypage_header - KEYPAGE_USED_SIZE -
KEYPAGE_KEYID_SIZE - KEYPAGE_FLAG_SIZE];
res= 2; /* Middle key up */
if (info->s->now_transactional &&
@ -928,6 +921,15 @@ int _ma_split_page(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo,
if (_ma_write_keypage(info, keyinfo, new_pos, page_link->write_lock,
DFLT_INIT_HITS, new_buff))
res= -1;
/* Save changes to split pages */
if (info->s->now_transactional &&
_ma_log_split(info, split_page, split_buff, org_split_length,
split_length,
inserted_key_pos, changed_length, move_length,
KEY_OP_NONE, (uchar*) 0, 0, 0))
res= -1;
DBUG_DUMP("key",(uchar*) key, _ma_keylength(keyinfo,key));
DBUG_RETURN(res);
} /* _ma_split_page */
@ -1046,10 +1048,11 @@ static uchar *_ma_find_last_pos(MARIA_HA *info, MARIA_KEYDEF *keyinfo,
@notes
Father_buff will always be changed
Caller must handle saving of curr_buff
@return
@retval 0 Balance was done
@retval 1 Middle key up
@retval 0 Balance was done (father buff is saved)
@retval 1 Middle key up (father buff is not saved)
@retval -1 Error
*/
@ -1360,7 +1363,7 @@ static int _ma_balance_page(register MARIA_HA *info, MARIA_KEYDEF *keyinfo,
Log changes to page on left (page shortened page at end)
*/
if (_ma_log_split(info, curr_page, curr_buff,
left_length, new_left_length,
left_length - s_temp->move_length, new_left_length,
s_temp->key_pos, s_temp->changed_length,
s_temp->move_length,
KEY_OP_NONE, (uchar*) 0, 0, 0))
@ -1370,7 +1373,7 @@ static int _ma_balance_page(register MARIA_HA *info, MARIA_KEYDEF *keyinfo,
This contains the last 'extra_buff' from 'buff'
*/
if (_ma_log_prefix(info, next_page, extra_buff,
0, (int) (extra_length - right_length)))
0, (int) (extra_buff_length - right_length)))
goto err;
/*
@ -1652,68 +1655,6 @@ static my_bool _ma_log_new(MARIA_HA *info, my_off_t page, uchar *buff,
}
/**
@brief
Log that a key was added to the page
*/
static my_bool _ma_log_add(MARIA_HA *info, my_off_t page, uchar *buff,
uchar *end_buff, uchar *key_pos,
uint changed_length, int move_length)
{
LSN lsn;
uchar log_data[FILEID_STORE_SIZE + PAGE_STORE_SIZE + 3 + 3 + 3], *log_pos;
LEX_STRING log_array[TRANSLOG_INTERNAL_PARTS + 2];
DBUG_ENTER("_ma_log_add");
DBUG_PRINT("enter", ("page: %lu", (ulong) page));
DBUG_ASSERT(info->s->now_transactional);
/*
Write REDO entry that contains the logical operations we need
to do the page
*/
log_pos= log_data + FILEID_STORE_SIZE;
page/= info->s->block_size;
page_store(log_pos, page);
log_pos+= PAGE_STORE_SIZE;
if (key_pos == end_buff)
log_pos[0]= KEY_OP_ADD_SUFFIX;
else
{
uint offset= (uint) (key_pos - buff);
log_pos[0]= KEY_OP_OFFSET;
int2store(log_pos+1, offset);
log_pos+= 3;
if (move_length)
{
log_pos[0]= KEY_OP_SHIFT;
int2store(log_pos+1, move_length);
log_pos+= 3;
}
log_pos[0]= KEY_OP_CHANGE;
}
int2store(log_pos+1, changed_length);
log_pos+= 3;
log_array[TRANSLOG_INTERNAL_PARTS + 0].str= log_data;
log_array[TRANSLOG_INTERNAL_PARTS + 0].length= (uint) (log_pos -
log_data);
log_array[TRANSLOG_INTERNAL_PARTS + 1].str= key_pos;
log_array[TRANSLOG_INTERNAL_PARTS + 1].length= changed_length;
if (translog_write_record(&lsn, LOGREC_REDO_INDEX,
info->trn, info,
log_array[TRANSLOG_INTERNAL_PARTS +
0].length + changed_length,
TRANSLOG_INTERNAL_PARTS + 2, log_array,
log_data, NULL))
DBUG_RETURN(-1);
DBUG_RETURN(0);
}
/**
@brief
Log when some part of the key page changes
@ -1755,12 +1696,14 @@ static my_bool _ma_log_change(MARIA_HA *info, my_off_t page, uchar *buff,
/**
@brief
Write log entry for page that has got a key added to the page under
one and only one of the following senarios:
- Page is shortened from end
- Data is added to end of page
- Data added at front of page
@brief Write log entry for page splitting
@note
Write log entry for page that has got a key added to the page under
one and only one of the following senarios:
- Page is shortened from end
- Data is added to end of page
- Data added at front of page
@param prefix_or_suffix KEY_OP_NONE Ignored
KEY_OP_ADD_PREFIX Add data to start of page
@ -1773,7 +1716,7 @@ static my_bool _ma_log_split(MARIA_HA *info, my_off_t page, uchar *buff,
uchar *key_pos, uint key_length, int move_length,
enum en_key_op prefix_or_suffix,
uchar *data, uint data_length,
uint change_length)
uint changed_length)
{
LSN lsn;
uchar log_data[FILEID_STORE_SIZE + PAGE_STORE_SIZE + 3+3+3+3+3+2];
@ -1790,7 +1733,7 @@ static my_bool _ma_log_split(MARIA_HA *info, my_off_t page, uchar *buff,
page_store(log_pos, page);
log_pos+= PAGE_STORE_SIZE;
if (new_length <= offset)
if (new_length <= offset || !key_pos)
{
/*
Page was split before inserted key. Write redo entry where
@ -1859,9 +1802,9 @@ static my_bool _ma_log_split(MARIA_HA *info, my_off_t page, uchar *buff,
log_pos+= 3;
if (prefix_or_suffix == KEY_OP_ADD_PREFIX)
{
int2store(log_pos+1, change_length);
int2store(log_pos+1, changed_length);
log_pos+= 2;
data_length= change_length;
data_length= changed_length;
}
log_array[TRANSLOG_INTERNAL_PARTS + translog_parts].str= (char*) data;
log_array[TRANSLOG_INTERNAL_PARTS + translog_parts].length= data_length;
@ -2089,118 +2032,6 @@ static my_bool _ma_log_key_middle(MARIA_HA *info, my_off_t page, uchar *buff,
}
/**
@brief
Write log entry for page that has got data added or deleted at start of page
*/
static my_bool _ma_log_prefix(MARIA_HA *info, my_off_t page,
uchar *buff, uint changed_length,
int move_length)
{
uint translog_parts;
LSN lsn;
uchar log_data[FILEID_STORE_SIZE + PAGE_STORE_SIZE + 7], *log_pos;
LEX_STRING log_array[TRANSLOG_INTERNAL_PARTS + 2];
DBUG_ENTER("_ma_log_prefix");
DBUG_PRINT("enter", ("page: %lu change_length: %u move_length: %d",
(ulong) page, changed_length, move_length));
page/= info->s->block_size;
log_pos= log_data + FILEID_STORE_SIZE;
page_store(log_pos, page);
log_pos+= PAGE_STORE_SIZE;
if (move_length < 0)
{
/* Delete prefix */
DBUG_ASSERT(changed_length == 0);
log_pos[0]= KEY_OP_DEL_PREFIX;
int2store(log_pos+1, -move_length);
log_pos+= 3;
translog_parts= 1;
}
else
{
/* Add prefix */
DBUG_ASSERT(changed_length >0 && (int) changed_length >= move_length);
log_pos[0]= KEY_OP_ADD_PREFIX;
int2store(log_pos+1, move_length);
int2store(log_pos+3, changed_length);
log_pos+= 5;
log_array[TRANSLOG_INTERNAL_PARTS + 1].str= ((char*) buff +
info->s->keypage_header);
log_array[TRANSLOG_INTERNAL_PARTS + 1].length= changed_length;
translog_parts= 2;
}
log_array[TRANSLOG_INTERNAL_PARTS + 0].str= log_data;
log_array[TRANSLOG_INTERNAL_PARTS + 0].length= (uint) (log_pos -
log_data);
DBUG_RETURN(translog_write_record(&lsn, LOGREC_REDO_INDEX,
info->trn, info,
log_array[TRANSLOG_INTERNAL_PARTS +
0].length + changed_length,
TRANSLOG_INTERNAL_PARTS + translog_parts,
log_array, log_data, NULL));
}
/**
@brief
Write log entry for page that has got data added or deleted at end of page
*/
static my_bool _ma_log_suffix(MARIA_HA *info, my_off_t page,
uchar *buff, uint org_length,
uint new_length)
{
LSN lsn;
LEX_STRING log_array[TRANSLOG_INTERNAL_PARTS + 2];
uchar log_data[FILEID_STORE_SIZE + PAGE_STORE_SIZE + 10], *log_pos;
int diff;
uint translog_parts, extra_length;
DBUG_ENTER("_ma_log_suffix");
DBUG_PRINT("enter", ("page: %lu org_length: %u new_length: %u",
(ulong) page, org_length, new_length));
page/= info->s->block_size;
log_pos= log_data + FILEID_STORE_SIZE;
page_store(log_pos, page);
log_pos+= PAGE_STORE_SIZE;
if ((diff= (int) (new_length - org_length)) < 0)
{
log_pos[0]= KEY_OP_DEL_SUFFIX;
int2store(log_pos+1, -diff);
log_pos+= 3;
translog_parts= 1;
extra_length= 0;
}
else
{
log_pos[0]= KEY_OP_ADD_SUFFIX;
int2store(log_pos+1, diff);
log_pos+= 3;
log_array[TRANSLOG_INTERNAL_PARTS + 1].str= (char*) buff + org_length;
log_array[TRANSLOG_INTERNAL_PARTS + 1].length= (uint) diff;
translog_parts= 2;
extra_length= (uint) diff;
}
log_array[TRANSLOG_INTERNAL_PARTS + 0].str= log_data;
log_array[TRANSLOG_INTERNAL_PARTS + 0].length= (uint) (log_pos -
log_data);
DBUG_RETURN(translog_write_record(&lsn, LOGREC_REDO_INDEX,
info->trn, info,
log_array[TRANSLOG_INTERNAL_PARTS +
0].length + extra_length,
TRANSLOG_INTERNAL_PARTS + translog_parts,
log_array, log_data, NULL));
}
#ifdef NOT_NEEDED
/**

View file

@ -1007,6 +1007,9 @@ static int maria_chk(HA_CHECK *param, char *filename)
if (param->testflag & (T_REP_ANY | T_SORT_RECORDS | T_SORT_INDEX))
{
/* Mark table as not transactional to avoid logging */
maria_disable_logging(info);
if (param->testflag & T_REP_ANY)
{
ulonglong tmp=share->state.key_map;
@ -1181,6 +1184,7 @@ static int maria_chk(HA_CHECK *param, char *filename)
((param->testflag & T_SORT_RECORDS) ?
UPDATE_SORT : 0)));
info->update&= ~HA_STATE_CHANGED;
maria_enable_logging(info);
maria_lock_database(info, F_UNLCK);
end2:

View file

@ -29,7 +29,9 @@
#include "ma_control_file.h"
/* For testing recovery */
#ifndef DBUG_OFF
#define IDENTICAL_PAGES_AFTER_RECOVERY 1
#endif
/* Do extra sanity checking */
#define SANITY_CHECKS 1
@ -718,17 +720,18 @@ extern int _ma_insert(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo,
extern int _ma_ck_real_write_btree(MARIA_HA *info, MARIA_KEYDEF *keyinfo,
uchar *key, uint key_length,
MARIA_RECORD_POS *root, uint comp_flag);
extern int _ma_split_page(MARIA_HA *info, MARIA_KEYDEF *keyinfo,
uchar *key, uchar *buff, uchar *key_buff,
my_bool insert_last);
extern int _ma_split_page(register MARIA_HA *info,
register MARIA_KEYDEF *keyinfo,
uchar *key, my_off_t split_page, uchar *split_buff,
uint org_split_length,
uchar *inserted_key_pos, uint changed_length,
int move_length,
uchar *key_buff, my_bool insert_last_key);
extern uchar *_ma_find_half_pos(MARIA_HA *info, uint nod_flag,
MARIA_KEYDEF *keyinfo,
uchar *page, uchar *key,
uint *return_key_length,
uchar ** after_key);
extern my_bool write_hook_for_undo_key(enum translog_record_type type,
TRN *trn, MARIA_HA *tbl_info,
LSN *lsn, void *hook_arg);
extern int _ma_calc_static_key_length(MARIA_KEYDEF *keyinfo, uint nod_flag,
uchar *key_pos, uchar *org_key,
uchar *key_buff, const uchar *key,
@ -912,21 +915,6 @@ typedef struct st_maria_block_info
#define SORT_BUFFER_INIT (2048L*1024L-MALLOC_OVERHEAD)
#define MIN_SORT_BUFFER (4096-MALLOC_OVERHEAD)
/* Struct for clr_end */
struct st_msg_to_write_hook_for_clr_end
{
LSN previous_undo_lsn;
enum translog_record_type undone_record_type;
ha_checksum checksum_delta;
};
struct st_msg_to_write_hook_for_undo_key
{
my_off_t *root;
my_off_t value;
};
#define fast_ma_writeinfo(INFO) if (!(INFO)->s->tot_locks) (void) _ma_writeinfo((INFO),0)
#define fast_ma_readinfo(INFO) ((INFO)->lock_type == F_UNLCK) && _ma_readinfo((INFO),F_RDLCK,1)

View file

@ -60,6 +60,7 @@ static int test_start_stop();
static int test_2_open_and_2_close();
static int test_bad_magic_string();
static int test_bad_checksum();
static int test_bad_hchecksum();
static int test_bad_size();
/* Utility */
@ -85,7 +86,7 @@ int main(int argc,char *argv[])
MY_INIT(argv[0]);
maria_data_root= ".";
plan(9);
plan(10);
diag("Unit tests for control file");
@ -106,6 +107,7 @@ int main(int argc,char *argv[])
"test of two open and two close (strange call sequence)");
ok(0 == test_bad_magic_string(), "test of bad magic string");
ok(0 == test_bad_checksum(), "test of bad checksum");
ok(0 == test_bad_hchecksum(), "test of bad hchecksum");
ok(0 == test_bad_size(), "test of too small/big file");
return exit_status();
@ -264,18 +266,18 @@ static int test_binary_content()
future change/breakage.
*/
char buffer[23];
char buffer[43];
RET_ERR_UNLESS((fd= my_open(file_name,
O_BINARY | O_RDWR,
MYF(MY_WME))) >= 0);
RET_ERR_UNLESS(my_read(fd, buffer, 23, MYF(MY_FNABP | MY_WME)) == 0);
RET_ERR_UNLESS(my_read(fd, buffer, 43, MYF(MY_FNABP | MY_WME)) == 0);
RET_ERR_UNLESS(my_close(fd, MYF(MY_WME)) == 0);
RET_ERR_UNLESS(create_or_open_file() == CONTROL_FILE_OK);
i= uint3korr(buffer+12);
i= uint3korr(buffer + 32 );
RET_ERR_UNLESS(i == LSN_FILE_NO(last_checkpoint_lsn));
i= uint4korr(buffer+15);
i= uint4korr(buffer + 35);
RET_ERR_UNLESS(i == LSN_OFFSET(last_checkpoint_lsn));
i= uint4korr(buffer+19);
i= uint4korr(buffer + 39);
RET_ERR_UNLESS(i == last_logno);
RET_ERR_UNLESS(close_file() == 0);
return 0;
@ -342,15 +344,42 @@ static int test_bad_checksum()
RET_ERR_UNLESS((fd= my_open(file_name,
O_BINARY | O_RDWR,
MYF(MY_WME))) >= 0);
RET_ERR_UNLESS(my_pread(fd, buffer, 1, 8, MYF(MY_FNABP | MY_WME)) == 0);
RET_ERR_UNLESS(my_pread(fd, buffer, 1, 28, MYF(MY_FNABP | MY_WME)) == 0);
buffer[0]+= 3; /* mangle checksum */
RET_ERR_UNLESS(my_pwrite(fd, buffer, 1, 8, MYF(MY_FNABP | MY_WME)) == 0);
RET_ERR_UNLESS(my_pwrite(fd, buffer, 1, 28, MYF(MY_FNABP | MY_WME)) == 0);
/* Check that control file module sees the problem */
RET_ERR_UNLESS(ma_control_file_create_or_open(TRUE) ==
CONTROL_FILE_BAD_CHECKSUM);
/* Restore checksum */
buffer[0]-= 3;
RET_ERR_UNLESS(my_pwrite(fd, buffer, 1, 4, MYF(MY_FNABP | MY_WME)) == 0);
RET_ERR_UNLESS(my_pwrite(fd, buffer, 1, 28, MYF(MY_FNABP | MY_WME)) == 0);
RET_ERR_UNLESS(my_close(fd, MYF(MY_WME)) == 0);
return 0;
}
static int test_bad_hchecksum()
{
char buffer[4];
int fd;
RET_ERR_UNLESS(create_or_open_file() == CONTROL_FILE_OK);
RET_ERR_UNLESS(close_file() == 0);
/* Corrupt checksum */
RET_ERR_UNLESS((fd= my_open(file_name,
O_BINARY | O_RDWR,
MYF(MY_WME))) >= 0);
RET_ERR_UNLESS(my_pread(fd, buffer, 1, 24, MYF(MY_FNABP | MY_WME)) == 0);
buffer[0]+= 3; /* mangle checksum */
RET_ERR_UNLESS(my_pwrite(fd, buffer, 1, 24, MYF(MY_FNABP | MY_WME)) == 0);
/* Check that control file module sees the problem */
RET_ERR_UNLESS(ma_control_file_create_or_open(TRUE) ==
CONTROL_FILE_BAD_HEAD_CHECKSUM);
/* Restore checksum */
buffer[0]-= 3;
RET_ERR_UNLESS(my_pwrite(fd, buffer, 1, 24, MYF(MY_FNABP | MY_WME)) == 0);
RET_ERR_UNLESS(my_close(fd, MYF(MY_WME)) == 0);
return 0;
@ -359,7 +388,7 @@ static int test_bad_checksum()
static int test_bad_size()
{
char buffer[]="aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa";
char buffer[]="aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa";
int fd;
/* A too short file */
@ -371,7 +400,7 @@ static int test_bad_size()
/* Check that control file module sees the problem */
RET_ERR_UNLESS(ma_control_file_create_or_open(TRUE) ==
CONTROL_FILE_TOO_SMALL);
RET_ERR_UNLESS(my_write(fd, buffer, 30, MYF(MY_FNABP | MY_WME)) == 0);
RET_ERR_UNLESS(my_write(fd, buffer, 50, MYF(MY_FNABP | MY_WME)) == 0);
/* Check that control file module sees the problem */
RET_ERR_UNLESS(ma_control_file_create_or_open(TRUE) == CONTROL_FILE_TOO_BIG);
RET_ERR_UNLESS(my_close(fd, MYF(MY_WME)) == 0);

View file

@ -337,7 +337,7 @@ int main(int argc __attribute__((unused)), char *argv[])
if (translog_flush(translog_get_horizon()))
{
fprintf(stderr, "Can't flush up to horizon\n", (ulong) i);
fprintf(stderr, "Can't flush up to horizon\n");
translog_destroy();
ok(0, "flush");
exit(1);