From 91bc9cc9e66d55e003e90a206a1771c9f27d54b1 Mon Sep 17 00:00:00 2001 From: Guilhem Bichot Date: Fri, 5 Dec 2008 22:11:46 +0100 Subject: [PATCH] Fix for BUG#41037 "Maria: recovery failure (pushbuild2)" (checkpoint bug). mysql-test/suite/maria/r/maria-recovery3.result: result. mysql-test/suite/maria/t/maria-recovery3-master.opt: usual recovery test options mysql-test/suite/maria/t/maria-recovery3.test: Test for BUG#41037. Without the bugfix, the test would hang because Recovery would fail with "undo_key_insert got error 126": Recovery would believe INSERT of 2 is uncommitted, try to execute its UNDO_KEY_INSERT, fail to find the key to delete because DELETE deleted it. That failure of _ma_ck_real_delete() would mark table as corrupted (see in d_search()) which is error 126 (HA_ERR_CRASHED). storage/maria/ma_blockrec.c: set trn->rec_lsn to LSN of commit record, when writing it. storage/maria/ma_blockrec.h: new function storage/maria/ma_commit.c: when committing or rolling back, rec_lsn should be LSN_IMPOSSIBLE: assertion is moved here from trnman_end_trn(), because now ma_commit() can set rec_lsn and so when we reach trnman_end_trn() it's not LSN_IMPOSSIBLE anymore. Adding debug possibility to pause between COMMIT record write and trnman_commit_trn(), to be able to fire checkpoint in between. storage/maria/ma_loghandler.c: in-write hook for COMMIT records storage/maria/ma_recovery.c: More debugging info in maria_recovery.trace, to see how the starting point of REDO phase is determined storage/maria/trnman.c: assertion cannot be here anymore see ma_commit.c --- .../suite/maria/r/maria-recovery3.result | 29 ++++++++ .../suite/maria/t/maria-recovery3-master.opt | 1 + mysql-test/suite/maria/t/maria-recovery3.test | 71 +++++++++++++++++++ storage/maria/ma_blockrec.c | 40 ++++++++++- storage/maria/ma_blockrec.h | 3 + storage/maria/ma_commit.c | 11 ++- storage/maria/ma_loghandler.c | 4 +- storage/maria/ma_recovery.c | 14 +++- storage/maria/trnman.c | 1 - 9 files changed, 166 insertions(+), 8 deletions(-) create mode 100644 mysql-test/suite/maria/r/maria-recovery3.result create mode 100644 mysql-test/suite/maria/t/maria-recovery3-master.opt create mode 100644 mysql-test/suite/maria/t/maria-recovery3.test diff --git a/mysql-test/suite/maria/r/maria-recovery3.result b/mysql-test/suite/maria/r/maria-recovery3.result new file mode 100644 index 00000000000..c1acef57a5a --- /dev/null +++ b/mysql-test/suite/maria/r/maria-recovery3.result @@ -0,0 +1,29 @@ +set global maria_log_file_size=4294967295; +drop database if exists mysqltest; +create database mysqltest; +use mysqltest; +* shut down mysqld, removed logs, restarted it +* TEST of Checkpoint between writing the commit log record and committing in trnman +create table t1(a int primary key) engine=maria; +insert into t1 values(1); +flush table t1; +* copied t1 for comparison +set session debug="+d,maria_sleep_in_commit"; +insert into t1 values(2); +set global maria_checkpoint_interval=1000; +delete from t1 where a=2; +SET SESSION debug="+d,maria_flush_whole_log,maria_crash"; +* crashing mysqld intentionally +set global maria_checkpoint_interval=1; +ERROR HY000: Lost connection to MySQL server during query +* recovery happens +check table t1 extended; +Table Op Msg_type Msg_text +mysqltest.t1 check status OK +* testing that checksum after recovery is as expected +Checksum-check +ok +use mysqltest; +drop table t1; +drop database mysqltest_for_comparison; +drop database mysqltest; diff --git a/mysql-test/suite/maria/t/maria-recovery3-master.opt b/mysql-test/suite/maria/t/maria-recovery3-master.opt new file mode 100644 index 00000000000..9023fb74e8b --- /dev/null +++ b/mysql-test/suite/maria/t/maria-recovery3-master.opt @@ -0,0 +1 @@ +--skip-stack-trace --skip-core-file --loose-maria-log-dir-path=$MYSQLTEST_VARDIR/tmp diff --git a/mysql-test/suite/maria/t/maria-recovery3.test b/mysql-test/suite/maria/t/maria-recovery3.test new file mode 100644 index 00000000000..68cc67fd328 --- /dev/null +++ b/mysql-test/suite/maria/t/maria-recovery3.test @@ -0,0 +1,71 @@ +--source include/not_embedded.inc +# Don't test this under valgrind, memory leaks will occur as we crash +--source include/not_valgrind.inc +# Binary must be compiled with debug for crash to occur +--source include/have_debug.inc +--source include/have_maria.inc + +set global maria_log_file_size=4294967295; +let $MARIA_LOG=../tmp; + +--disable_warnings +drop database if exists mysqltest; +--enable_warnings +create database mysqltest; +let $mms_tname=t; + +# Include scripts can perform SQL. For it to not influence the main test +# they use a separate connection. This way if they use a DDL it would +# not autocommit in the main test. +connect (admin, 127.0.0.1, root,,mysqltest,,); +--enable_reconnect + +connection default; +use mysqltest; +--enable_reconnect + +let $mms_tables=1; +let $mvr_restore_old_snapshot=0; +let $mms_compare_physically=0; +let $mvr_debug_option="+d,maria_flush_whole_log,maria_crash"; +let $mvr_crash_statement= set global maria_checkpoint_interval=1; + +-- source include/maria_empty_logs.inc + +# Test for BUG#41037 (recovery failure) +--echo * TEST of Checkpoint between writing the commit log record and committing in trnman +# we want recovery to use the tables as they were at time of crash +let $mvr_restore_old_snapshot=0; +# UNDO phase prevents physical comparison, normally, +# so we'll only use checksums to compare. +let $mms_compare_physically=0; +let $mvr_crash_statement= set global maria_checkpoint_interval=1; +create table t1(a int primary key) engine=maria; +insert into t1 values(1); +-- source include/maria_make_snapshot_for_comparison.inc +set session debug="+d,maria_sleep_in_commit"; +send insert into t1 values(2); +sleep 1; +# Now the INSERT of 2 has written a commit record +# but not yet called trnman_commit(), so for checkpoint it's not +# committed. +connection admin; +set global maria_checkpoint_interval=1000; # force a checkpoint +connection default; +reap; # end of INSERT +delete from t1 where a=2; +# Bug was that: Recovery starts REDO scanning from too far: from +# Checkpoint record which says INSERT is not committed, so +# Recovery executes the INSERT's UNDO and finds no key to delete +# (as DELETE already deleted it), fails. +-- source include/maria_verify_recovery.inc +drop table t1; + +# Note that even if machine is loaded and thus INSERT is committed +# before checkpoint happens, test should still pass (though it won't +# reproduce the conditions of the bug). + +# clean up everything +let $mms_purpose=comparison; +eval drop database mysqltest_for_$mms_purpose; +drop database mysqltest; diff --git a/storage/maria/ma_blockrec.c b/storage/maria/ma_blockrec.c index ed4d98015b1..2709a2bba5c 100644 --- a/storage/maria/ma_blockrec.c +++ b/storage/maria/ma_blockrec.c @@ -5914,7 +5914,7 @@ my_bool write_hook_for_file_id(enum translog_record_type type TRN *trn __attribute__ ((unused)), MARIA_HA *tbl_info, - LSN *lsn __attribute__ ((unused)), + LSN *lsn, void *hook_arg __attribute__ ((unused))) { @@ -5923,6 +5923,44 @@ my_bool write_hook_for_file_id(enum translog_record_type type return 0; } + +/** + Updates transaction's rec_lsn when committing. + + A transaction writes its commit record before being committed in trnman, so + if Checkpoint happens just between the COMMIT record log write and the + commit in trnman, it will record that transaction is not committed. Assume + the transaction (trn1) did an INSERT; after the checkpoint, a second + transaction (trn2) does a DELETE of what trn1 has inserted. Then crash, + Checkpoint record says that trn1 was not committed, and REDO phase starts + from Checkpoint record's LSN. So it will not find the COMMIT record of + trn1, will want to roll back trn1, which will fail because the row/key + which it wants to delete does not exist anymore. + To avoid this, Checkpoint needs to know that the REDO phase must start + before this COMMIT record, so transaction sets its rec_lsn to the COMMIT's + record LSN, and as Checkpoint reads the transaction's rec_lsn, Checkpoint + will know. + + @note so after commit trn->rec_lsn is a "commit LSN", which could be of + use later. + + @return Operation status, always 0 (success) +*/ + +my_bool write_hook_for_commit(enum translog_record_type type + __attribute__ ((unused)), + TRN *trn, + MARIA_HA *tbl_info + __attribute__ ((unused)), + LSN *lsn, + void *hook_arg + __attribute__ ((unused))) +{ + trn->rec_lsn= *lsn; + return 0; +} + + /*************************************************************************** Applying of REDO log records ***************************************************************************/ diff --git a/storage/maria/ma_blockrec.h b/storage/maria/ma_blockrec.h index 34f6a3e3008..9b373d1d0ea 100644 --- a/storage/maria/ma_blockrec.h +++ b/storage/maria/ma_blockrec.h @@ -271,6 +271,9 @@ my_bool write_hook_for_undo_bulk_insert(enum translog_record_type type, my_bool write_hook_for_file_id(enum translog_record_type type, TRN *trn, MARIA_HA *tbl_info, LSN *lsn, void *hook_arg); +my_bool write_hook_for_commit(enum translog_record_type type, + TRN *trn, MARIA_HA *tbl_info, LSN *lsn, + void *hook_arg); void _ma_block_get_status(void* param, my_bool concurrent_insert); void _ma_block_update_status(void *param); void _ma_block_restore_status(void *param); diff --git a/storage/maria/ma_commit.c b/storage/maria/ma_commit.c index 1066eb15e11..951de1beeff 100644 --- a/storage/maria/ma_commit.c +++ b/storage/maria/ma_commit.c @@ -33,6 +33,7 @@ int ma_commit(TRN *trn) LEX_CUSTRING log_array[TRANSLOG_INTERNAL_PARTS]; DBUG_ENTER("ma_commit"); + DBUG_ASSERT(trn->rec_lsn == LSN_IMPOSSIBLE); if (trn->undo_lsn == 0) /* no work done, rollback (cheaper than commit) */ DBUG_RETURN(trnman_rollback_trn(trn)); /* @@ -61,8 +62,14 @@ int ma_commit(TRN *trn) trn, NULL, 0, sizeof(log_array)/sizeof(log_array[0]), log_array, NULL, NULL) | - translog_flush(commit_lsn) | - trnman_commit_trn(trn)); + translog_flush(commit_lsn)); + + DBUG_EXECUTE_IF("maria_sleep_in_commit", + { + DBUG_PRINT("info", ("maria_sleep_in_commit")); + sleep(3); + }); + res|= trnman_commit_trn(trn); /* diff --git a/storage/maria/ma_loghandler.c b/storage/maria/ma_loghandler.c index cc8c7898d84..93bb6e32418 100644 --- a/storage/maria/ma_loghandler.c +++ b/storage/maria/ma_loghandler.c @@ -618,11 +618,11 @@ static LOG_DESC INIT_LOGREC_PREPARE_WITH_UNDO_PURGE= static LOG_DESC INIT_LOGREC_COMMIT= {LOGRECTYPE_FIXEDLENGTH, 0, 0, NULL, - NULL, NULL, 0, "commit", LOGREC_IS_GROUP_ITSELF, NULL, + write_hook_for_commit, NULL, 0, "commit", LOGREC_IS_GROUP_ITSELF, NULL, NULL}; static LOG_DESC INIT_LOGREC_COMMIT_WITH_UNDO_PURGE= -{LOGRECTYPE_PSEUDOFIXEDLENGTH, 5, 5, NULL, NULL, NULL, 1, +{LOGRECTYPE_PSEUDOFIXEDLENGTH, 5, 5, NULL, write_hook_for_commit, NULL, 1, "commit_with_undo_purge", LOGREC_IS_GROUP_ITSELF, NULL, NULL}; static LOG_DESC INIT_LOGREC_CHECKPOINT= diff --git a/storage/maria/ma_recovery.c b/storage/maria/ma_recovery.c index 51eedf157c5..baebdcf2eb4 100644 --- a/storage/maria/ma_recovery.c +++ b/storage/maria/ma_recovery.c @@ -555,8 +555,9 @@ static void new_transaction(uint16 sid, TrID long_id, LSN undo_lsn, char llbuf[22]; all_active_trans[sid].long_trid= long_id; llstr(long_id, llbuf); - tprint(tracef, "Transaction long_trid %s short_trid %u starts\n", - llbuf, sid); + tprint(tracef, "Transaction long_trid %s short_trid %u starts," + " undo_lsn (%lu,0x%lx) first_undo_lsn (%lu,0x%lx)\n", + llbuf, sid, LSN_IN_PARTS(undo_lsn), LSN_IN_PARTS(first_undo_lsn)); all_active_trans[sid].undo_lsn= undo_lsn; all_active_trans[sid].first_undo_lsn= first_undo_lsn; set_if_bigger(max_long_trid, long_id); @@ -2968,6 +2969,8 @@ static LSN parse_checkpoint_record(LSN lsn) ptr= log_record_buffer.str; start_address= lsn_korr(ptr); ptr+= LSN_STORE_SIZE; + tprint(tracef, "Checkpoint record has start_horizon at (%lu,0x%lx)\n", + LSN_IN_PARTS(start_address)); /* transactions */ nb_active_transactions= uint2korr(ptr); @@ -2983,6 +2986,9 @@ static LSN parse_checkpoint_record(LSN lsn) line. It may make start_address slightly decrease (only by the time it takes to write one or a few rows, roughly). */ + tprint(tracef, "Checkpoint record has min_rec_lsn of active transactions" + " at (%lu,0x%lx)\n", + LSN_IN_PARTS(minimum_rec_lsn_of_active_transactions)); set_if_smaller(start_address, minimum_rec_lsn_of_active_transactions); for (i= 0; i < nb_active_transactions; i++) @@ -3086,6 +3092,8 @@ static LSN parse_checkpoint_record(LSN lsn) */ start_address= checkpoint_start= translog_next_LSN(start_address, LSN_IMPOSSIBLE); + tprint(tracef, "Checkpoint record start_horizon now adjusted to" + " LSN (%lu,0x%lx)\n", LSN_IN_PARTS(start_address)); if (checkpoint_start == LSN_IMPOSSIBLE) { /* @@ -3095,6 +3103,8 @@ static LSN parse_checkpoint_record(LSN lsn) return LSN_ERROR; } /* now, where the REDO phase should start reading log: */ + tprint(tracef, "Checkpoint has min_rec_lsn of dirty pages at" + " LSN (%lu,0x%lx)\n", LSN_IN_PARTS(minimum_rec_lsn_of_dirty_pages)); set_if_smaller(start_address, minimum_rec_lsn_of_dirty_pages); DBUG_PRINT("info", ("checkpoint_start: (%lu,0x%lx) start_address: (%lu,0x%lx)", diff --git a/storage/maria/trnman.c b/storage/maria/trnman.c index 538dc75b663..51940705952 100644 --- a/storage/maria/trnman.c +++ b/storage/maria/trnman.c @@ -389,7 +389,6 @@ my_bool trnman_end_trn(TRN *trn, my_bool commit) LF_PINS *pins= trn->pins; DBUG_ENTER("trnman_end_trn"); - DBUG_ASSERT(trn->rec_lsn == 0); /* if a rollback, all UNDO records should have been executed */ DBUG_ASSERT(commit || trn->undo_lsn == 0); DBUG_PRINT("info", ("pthread_mutex_lock LOCK_trn_list"));