MDEV-232: Remove one fsync() from commit phase.

Introduce a new storage engine API method commit_checkpoint_request().
This is used to replace the fsync() at the end of every storage engine
commit with a single fsync() when a binlog is rotated.

Binlog rotation is now done during group commit instead of being
delayed until unlog(), removing some server stall and avoiding an
expensive lock/unlock of LOCK_log inside unlog().
This commit is contained in:
unknown 2012-09-13 14:31:29 +02:00
parent 0697ee265f
commit 288eeb3a31
43 changed files with 1331 additions and 476 deletions

View file

@ -133,7 +133,7 @@ if (`SELECT @@global.binlog_format = 'STATEMENT'`)
{
#must show two INSERT DELAYED
--let $binlog_file= query_get_value(SHOW MASTER STATUS, File, 1)
--let $binlog_limit= 1,6
--let $binlog_limit= 2,6
--source include/show_binlog_events.inc
}
select * from t1;

View file

@ -892,6 +892,7 @@ DROP DATABASE test1;
FLUSH LOGS;
show binlog events in 'master-bin.000002' from <binlog_start>;
Log_name Pos Event_type Server_id End_log_pos Info
master-bin.000002 # Binlog_checkpoint # # master-bin.000002
master-bin.000002 # Query # # CREATE DATABASE test1
master-bin.000002 # Query # # use `test1`; CREATE TABLE t1(id int)
master-bin.000002 # Query # # DROP DATABASE test1

View file

@ -697,7 +697,6 @@ SET @@session.lc_time_names=0/*!*/;
SET @@session.collation_database=DEFAULT/*!*/;
BEGIN
/*!*/;
SET INSERT_ID=6/*!*/;
DELIMITER ;
# End of log file
ROLLBACK /* added by mysqlbinlog */;
@ -1483,17 +1482,6 @@ COMMIT
/*!*/;
DELIMITER ;
DELIMITER /*!*/;
SET TIMESTAMP=1579609943/*!*/;
SET @@session.pseudo_thread_id=999999999/*!*/;
SET @@session.foreign_key_checks=1, @@session.sql_auto_is_null=0, @@session.unique_checks=1, @@session.autocommit=1/*!*/;
SET @@session.sql_mode=0/*!*/;
SET @@session.auto_increment_increment=1, @@session.auto_increment_offset=1/*!*/;
/*!\C latin1 *//*!*/;
SET @@session.character_set_client=8,@@session.collation_connection=8,@@session.collation_server=8/*!*/;
SET @@session.lc_time_names=0/*!*/;
SET @@session.collation_database=DEFAULT/*!*/;
BEGIN
/*!*/;
DELIMITER ;
# End of log file
ROLLBACK /* added by mysqlbinlog */;

View file

@ -0,0 +1,88 @@
SET @old_max_binlog_size= @@global.max_binlog_size;
SET GLOBAL max_binlog_size= 4096;
SET @old_innodb_flush_log_at_trx_commit= @@global.innodb_flush_log_at_trx_commit;
SET GLOBAL innodb_flush_log_at_trx_commit= 1;
RESET MASTER;
CREATE TABLE t1 (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=Innodb;
CREATE TABLE t2 (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=Myisam;
*** Test that RESET MASTER waits for pending commit checkpoints to complete.
SET DEBUG_SYNC= "commit_after_group_release_commit_ordered SIGNAL con1_ready WAIT_FOR con1_go";
INSERT INTO t1 VALUES (1, REPEAT("x", 4100));
SET DEBUG_SYNC= "now WAIT_FOR con1_ready";
INSERT INTO t2 VALUES (1, REPEAT("x", 4100));
INSERT INTO t2 VALUES (2, REPEAT("x", 4100));
show binary logs;
Log_name File_size
master-bin.000001 #
master-bin.000002 #
master-bin.000003 #
master-bin.000004 #
show binlog events in 'master-bin.00000<binlog_start>' from <binlog_start>;
Log_name Pos Event_type Server_id End_log_pos Info
master-bin.00000<binlog_start> # Format_desc # # SERVER_VERSION, BINLOG_VERSION
master-bin.00000<binlog_start> # Binlog_checkpoint # # master-bin.000001
SET DEBUG_SYNC= "execute_command_after_close_tables SIGNAL reset_master_done";
RESET MASTER;
This will timeout, as RESET MASTER is blocked
SET DEBUG_SYNC= "now WAIT_FOR reset_master_done TIMEOUT 1";
Warnings:
Warning 1639 debug sync point wait timed out
SET DEBUG_SYNC= "now SIGNAL con1_go";
show binary logs;
Log_name File_size
master-bin.000001 #
show binlog events in 'master-bin.000001' from <binlog_start>;
Log_name Pos Event_type Server_id End_log_pos Info
master-bin.000001 # Format_desc # # SERVER_VERSION, BINLOG_VERSION
master-bin.000001 # Binlog_checkpoint # # master-bin.000001
*** Test that binlog N is active, and commit checkpoint for (N-1) is
*** done while there is still a pending commit checkpoint for (N-2).
SET DEBUG_SYNC= "commit_after_group_release_commit_ordered SIGNAL con1_ready WAIT_FOR con1_continue";
INSERT INTO t1 VALUES (20, REPEAT("x", 4100));
SET DEBUG_SYNC= "now WAIT_FOR con1_ready";
SET DEBUG_SYNC= "commit_after_group_release_commit_ordered SIGNAL con2_ready WAIT_FOR con2_continue";
INSERT INTO t1 VALUES (21, REPEAT("x", 4100));
SET DEBUG_SYNC= "now WAIT_FOR con2_ready";
show binary logs;
Log_name File_size
master-bin.000001 #
master-bin.000002 #
master-bin.000003 #
show binlog events in 'master-bin.000001' from <binlog_start>;
Log_name Pos Event_type Server_id End_log_pos Info
master-bin.000001 # Format_desc # # SERVER_VERSION, BINLOG_VERSION
master-bin.000001 # Binlog_checkpoint # # master-bin.000001
master-bin.000001 # Query # # BEGIN
master-bin.000001 # Table_map # # table_id: # (test.t1)
master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
master-bin.000001 # Xid # # COMMIT /* XID */
master-bin.000001 # Rotate # # master-bin.000002;pos=<binlog_start>
show binlog events in 'master-bin.000002' from <binlog_start>;
Log_name Pos Event_type Server_id End_log_pos Info
master-bin.000002 # Format_desc # # SERVER_VERSION, BINLOG_VERSION
master-bin.000002 # Binlog_checkpoint # # master-bin.000001
master-bin.000002 # Query # # BEGIN
master-bin.000002 # Table_map # # table_id: # (test.t1)
master-bin.000002 # Write_rows # # table_id: # flags: STMT_END_F
master-bin.000002 # Xid # # COMMIT /* XID */
master-bin.000002 # Rotate # # master-bin.000003;pos=<binlog_start>
show binlog events in 'master-bin.000003' from <binlog_start>;
Log_name Pos Event_type Server_id End_log_pos Info
master-bin.000003 # Format_desc # # SERVER_VERSION, BINLOG_VERSION
master-bin.000003 # Binlog_checkpoint # # master-bin.000001
SET DEBUG_SYNC= "now SIGNAL con2_continue";
con1 is still pending, no new binlog checkpoint should have been logged.
show binlog events in 'master-bin.000003' from <binlog_start>;
Log_name Pos Event_type Server_id End_log_pos Info
master-bin.000003 # Format_desc # # SERVER_VERSION, BINLOG_VERSION
master-bin.000003 # Binlog_checkpoint # # master-bin.000001
SET DEBUG_SYNC= "now SIGNAL con1_continue";
No commit checkpoints are pending, a new binlog checkpoint should have been logged.
show binlog events in 'master-bin.000003' from <binlog_start>;
Log_name Pos Event_type Server_id End_log_pos Info
master-bin.000003 # Format_desc # # SERVER_VERSION, BINLOG_VERSION
master-bin.000003 # Binlog_checkpoint # # master-bin.000001
master-bin.000003 # Binlog_checkpoint # # master-bin.000003
DROP TABLE t1, t2;
SET GLOBAL max_binlog_size= @old_max_binlog_size;
SET GLOBAL innodb_flush_log_at_trx_commit= @old_innodb_flush_log_at_trx_commit;

View file

@ -21,6 +21,7 @@ master-bin.000002 #
show binlog events in 'master-bin.000001' from <binlog_start>;
Log_name Pos Event_type Server_id End_log_pos Info
master-bin.000001 # Format_desc # # SERVER_VERSION, BINLOG_VERSION
master-bin.000001 # Binlog_checkpoint # # master-bin.000001
master-bin.000001 # Query # # use `test`; CREATE TABLE t1 (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=Innodb
master-bin.000001 # Query # # BEGIN
master-bin.000001 # Table_map # # table_id: # (test.t1)

View file

@ -234,6 +234,7 @@ master-bin.000001 # Xid # # COMMIT /* XID */
master-bin.000001 # Rotate # # master-bin.000002;pos=4
show binlog events in 'master-bin.000002' from <binlog_start>;
Log_name Pos Event_type Server_id End_log_pos Info
master-bin.000002 # Binlog_checkpoint # # master-bin.000002
master-bin.000002 # Query # # use `test`; DROP TABLE `t1` /* generated by server */
set @ac = @@autocommit;
set autocommit= 0;

View file

@ -34,8 +34,8 @@ DELIMITER /*!*/;
# at #
#010909 4:46:40 server id # end_log_pos # Start: binlog v 4, server v #.##.## created 010909 4:46:40 at startup
ROLLBACK/*!*/;
#010909 4:46:40 server id # end_log_pos # Binlog checkpoint master-bin.000001
# at #
#010909 4:46:40 server id # end_log_pos # Binlog checkpoint master-bin.000001
# at #
use `new_test1`/*!*/;
#010909 4:46:40 server id # end_log_pos # Query thread_id=# exec_time=# error_code=0
@ -230,8 +230,8 @@ DELIMITER /*!*/;
# at #
#010909 4:46:40 server id # end_log_pos # Start: binlog v 4, server v #.##.## created 010909 4:46:40 at startup
ROLLBACK/*!*/;
#010909 4:46:40 server id # end_log_pos # Binlog checkpoint master-bin.000001
# at #
#010909 4:46:40 server id # end_log_pos # Binlog checkpoint master-bin.000001
# at #
use `new_test1`/*!*/;
#010909 4:46:40 server id # end_log_pos # Query thread_id=# exec_time=# error_code=0

View file

@ -145,6 +145,7 @@ master-bin.000001 # Xid # # COMMIT /* XID */
master-bin.000001 # Rotate # # master-bin.000002;pos=4
show binlog events in 'master-bin.000002' from <binlog_start>;
Log_name Pos Event_type Server_id End_log_pos Info
master-bin.000002 # Binlog_checkpoint # # master-bin.000002
master-bin.000002 # Query # # use `test`; DROP TABLE `t1` /* generated by server */
set @ac = @@autocommit;
set autocommit= 0;

View file

@ -1,175 +1,198 @@
SET GLOBAL max_binlog_size= 4096;
SET GLOBAL innodb_flush_log_at_trx_commit= 1;
RESET MASTER;
CREATE TABLE t1 (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=Innodb;
CREATE TABLE t2 (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=Myisam;
SET @@global.debug_dbug='+d,skip_commit_ordered';
INSERT INTO t1 VALUES (0, REPEAT("x", 4100));
SET DEBUG_SYNC= "ha_commit_trans_after_log_and_order SIGNAL con1_ready WAIT_FOR _ever";
INSERT INTO t1 VALUES (100, REPEAT("x", 4100));
INSERT INTO t1 VALUES (101, REPEAT("x", 4100));
INSERT INTO t1 VALUES (102, REPEAT("x", 4100));
SET DEBUG_SYNC= "ha_commit_trans_before_log_and_order SIGNAL con1_wait WAIT_FOR con1_cont";
SET DEBUG_SYNC= "commit_after_group_release_commit_ordered SIGNAL con1_ready WAIT_FOR _ever";
INSERT INTO t1 VALUES (1, REPEAT("x", 4100));
SET DEBUG_SYNC= "now WAIT_FOR con1_ready";
INSERT INTO t2 VALUES (1, "force binlog rotation");
SET DEBUG_SYNC= "ha_commit_trans_after_log_and_order SIGNAL con2_ready WAIT_FOR _ever";
SET DEBUG_SYNC= "now WAIT_FOR con1_wait";
SET DEBUG_SYNC= "ha_commit_trans_before_log_and_order SIGNAL con2_wait WAIT_FOR con2_cont";
SET DEBUG_SYNC= "commit_after_group_release_commit_ordered SIGNAL con2_ready WAIT_FOR _ever";
INSERT INTO t1 VALUES (2, NULL);
SET DEBUG_SYNC= "now WAIT_FOR con2_ready";
SET DEBUG_SYNC= "ha_commit_trans_after_log_and_order SIGNAL con3_ready WAIT_FOR _ever";
SET DEBUG_SYNC= "now WAIT_FOR con2_wait";
SET DEBUG_SYNC= "ha_commit_trans_before_log_and_order SIGNAL con3_wait WAIT_FOR con3_cont";
SET DEBUG_SYNC= "commit_after_group_release_commit_ordered SIGNAL con3_ready WAIT_FOR _ever";
INSERT INTO t1 VALUES (3, REPEAT("x", 4100));
SET DEBUG_SYNC= "now WAIT_FOR con3_wait";
SET DEBUG_SYNC= "ha_commit_trans_before_log_and_order SIGNAL con4_wait WAIT_FOR con4_cont";
SET SESSION debug_dbug="+d,crash_commit_after_log";
INSERT INTO t1 VALUES (4, NULL);
SET DEBUG_SYNC= "now WAIT_FOR con4_wait";
SET DEBUG_SYNC= "now SIGNAL con1_cont";
SET DEBUG_SYNC= "now WAIT_FOR con1_ready";
SET DEBUG_SYNC= "now SIGNAL con2_cont";
SET DEBUG_SYNC= "now WAIT_FOR con2_ready";
SET DEBUG_SYNC= "now SIGNAL con3_cont";
SET DEBUG_SYNC= "now WAIT_FOR con3_ready";
INSERT INTO t2 VALUES (2, "force binlog rotation");
FLUSH TABLES t2;
show binary logs;
Log_name File_size
master-bin.000001 #
master-bin.000002 #
master-bin.000003 #
master-bin.000004 #
show binlog events in 'master-bin.000001' from <binlog_start>;
Log_name Pos Event_type Server_id End_log_pos Info
master-bin.000001 # Format_desc # # SERVER_VERSION, BINLOG_VERSION
master-bin.000001 # Binlog_checkpoint # # master-bin.000001
master-bin.000001 # Query # # use `test`; CREATE TABLE t1 (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=Innodb
master-bin.000001 # Query # # use `test`; CREATE TABLE t2 (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=Myisam
master-bin.000001 # Query # # BEGIN
master-bin.000001 # Table_map # # table_id: # (test.t1)
master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
master-bin.000001 # Xid # # COMMIT /* XID */
master-bin.000001 # Rotate # # master-bin.000002;pos=<binlog_start>
show binlog events in 'master-bin.000002' from <binlog_start>;
Log_name Pos Event_type Server_id End_log_pos Info
master-bin.000002 # Format_desc # # SERVER_VERSION, BINLOG_VERSION
master-bin.000002 # Binlog_checkpoint # # master-bin.000002
master-bin.000002 # Query # # BEGIN
master-bin.000002 # Table_map # # table_id: # (test.t1)
master-bin.000002 # Write_rows # # table_id: # flags: STMT_END_F
master-bin.000002 # Xid # # COMMIT /* XID */
master-bin.000002 # Query # # BEGIN
master-bin.000002 # Table_map # # table_id: # (test.t2)
master-bin.000002 # Write_rows # # table_id: # flags: STMT_END_F
master-bin.000002 # Query # # COMMIT
master-bin.000002 # Rotate # # master-bin.000003;pos=<binlog_start>
show binlog events in 'master-bin.000003' from <binlog_start>;
Log_name Pos Event_type Server_id End_log_pos Info
master-bin.000003 # Format_desc # # SERVER_VERSION, BINLOG_VERSION
master-bin.000003 # Binlog_checkpoint # # master-bin.000002
master-bin.000003 # Query # # BEGIN
master-bin.000003 # Table_map # # table_id: # (test.t1)
master-bin.000003 # Write_rows # # table_id: # flags: STMT_END_F
master-bin.000003 # Xid # # COMMIT /* XID */
master-bin.000003 # Query # # BEGIN
master-bin.000003 # Table_map # # table_id: # (test.t1)
master-bin.000003 # Write_rows # # table_id: # flags: STMT_END_F
master-bin.000003 # Xid # # COMMIT /* XID */
master-bin.000003 # Query # # BEGIN
master-bin.000003 # Table_map # # table_id: # (test.t2)
master-bin.000003 # Write_rows # # table_id: # flags: STMT_END_F
master-bin.000003 # Query # # COMMIT
master-bin.000003 # Rotate # # master-bin.00000<binlog_start>;pos=<binlog_start>
show binlog events in 'master-bin.00000<binlog_start>' from <binlog_start>;
Log_name Pos Event_type Server_id End_log_pos Info
master-bin.00000<binlog_start> # Format_desc # # SERVER_VERSION, BINLOG_VERSION
master-bin.00000<binlog_start> # Binlog_checkpoint # # master-bin.000002
master-bin.00000<binlog_start> # Query # # use `test`; FLUSH TABLES t2
We should see only one entry here, a=0:
SELECT a FROM t1 ORDER BY a;
a
0
PURGE BINARY LOGS TO "master-bin.000004";
show binary logs;
Log_name File_size
master-bin.000002 #
master-bin.000003 #
master-bin.000004 #
SET SESSION debug_dbug="+d,crash_commit_after_log";
INSERT INTO t1 VALUES (4, NULL);
Got one of the listed errors
SELECT a FROM t1 ORDER BY a;
a
0
1
2
3
4
*** Test that RESET MASTER waits for pending XIDs to be unlogged.
SET @old_max_binlog_size= @@global.max_binlog_size;
SET GLOBAL max_binlog_size= 4096;
SET DEBUG_SYNC= "ha_commit_trans_after_log_and_order SIGNAL con10_ready WAIT_FOR con10_go";
INSERT INTO t1 VALUES (10, NULL);
SET DEBUG_SYNC= "now WAIT_FOR con10_ready";
INSERT INTO t2 VALUES (10, REPEAT("x", 4100));
INSERT INTO t2 VALUES (11, REPEAT("x", 4100));
show binary logs;
Log_name File_size
master-bin.000002 #
master-bin.000003 #
master-bin.000004 #
master-bin.000005 #
master-bin.000006 #
master-bin.000007 #
SET DEBUG_SYNC= "execute_command_after_close_tables SIGNAL reset_master_done";
RESET MASTER;
This will timeout, as RESET MASTER is blocked
SET DEBUG_SYNC= "now WAIT_FOR reset_master_done TIMEOUT 1";
Warnings:
Warning 1639 debug sync point wait timed out
SET DEBUG_SYNC= "now SIGNAL con10_go";
show binlog events in 'master-bin.000003' from <binlog_start>;
Log_name Pos Event_type Server_id End_log_pos Info
master-bin.000003 # Format_desc # # SERVER_VERSION, BINLOG_VERSION
master-bin.000003 # Binlog_checkpoint # # master-bin.000002
master-bin.000003 # Binlog_checkpoint # # master-bin.000003
master-bin.000003 # Query # # BEGIN
master-bin.000003 # Table_map # # table_id: # (test.t1)
master-bin.000003 # Write_rows # # table_id: # flags: STMT_END_F
master-bin.000003 # Xid # # COMMIT /* XID */
master-bin.000003 # Rotate # # master-bin.00000<binlog_start>;pos=<binlog_start>
show binlog events in 'master-bin.00000<binlog_start>' from <binlog_start>;
Log_name Pos Event_type Server_id End_log_pos Info
master-bin.00000<binlog_start> # Format_desc # # SERVER_VERSION, BINLOG_VERSION
master-bin.00000<binlog_start> # Binlog_checkpoint # # master-bin.000003
master-bin.00000<binlog_start> # Binlog_checkpoint # # master-bin.00000<binlog_start>
master-bin.00000<binlog_start> # Query # # BEGIN
master-bin.00000<binlog_start> # Table_map # # table_id: # (test.t1)
master-bin.00000<binlog_start> # Write_rows # # table_id: # flags: STMT_END_F
master-bin.00000<binlog_start> # Xid # # COMMIT /* XID */
master-bin.00000<binlog_start> # Rotate # # master-bin.000005;pos=<binlog_start>
show binlog events in 'master-bin.000005' from <binlog_start>;
Log_name Pos Event_type Server_id End_log_pos Info
master-bin.000005 # Format_desc # # SERVER_VERSION, BINLOG_VERSION
master-bin.000005 # Binlog_checkpoint # # master-bin.00000<binlog_start>
master-bin.000005 # Query # # BEGIN
master-bin.000005 # Table_map # # table_id: # (test.t1)
master-bin.000005 # Write_rows # # table_id: # flags: STMT_END_F
master-bin.000005 # Xid # # COMMIT /* XID */
master-bin.000005 # Query # # BEGIN
master-bin.000005 # Table_map # # table_id: # (test.t1)
master-bin.000005 # Write_rows # # table_id: # flags: STMT_END_F
master-bin.000005 # Xid # # COMMIT /* XID */
master-bin.000005 # Rotate # # master-bin.000006;pos=<binlog_start>
show binlog events in 'master-bin.000006' from <binlog_start>;
Log_name Pos Event_type Server_id End_log_pos Info
master-bin.000006 # Format_desc # # SERVER_VERSION, BINLOG_VERSION
master-bin.000006 # Binlog_checkpoint # # master-bin.00000<binlog_start>
PURGE BINARY LOGS TO "master-bin.000006";
show binary logs;
Log_name File_size
master-bin.000001 #
*** Test that binlog N is active, and last pending trx in (N-1) is
unlogged while there is still a pending trx in (N-2).
SET DEBUG_SYNC= "ha_commit_trans_after_log_and_order SIGNAL con10_ready WAIT_FOR con10_continue";
INSERT INTO t1 VALUES (20, REPEAT("x", 4100));
master-bin.000004 #
master-bin.000005 #
master-bin.000006 #
SET DEBUG_SYNC= "now SIGNAL con4_cont";
Got one of the listed errors
SELECT a FROM t1 ORDER BY a;
a
1
2
3
4
100
101
102
Test that with multiple binlog checkpoints, recovery starts from the last one.
SET GLOBAL max_binlog_size= 4096;
SET GLOBAL innodb_flush_log_at_trx_commit= 1;
RESET MASTER;
SET DEBUG_SYNC= "commit_after_group_release_commit_ordered SIGNAL con10_ready WAIT_FOR con10_cont";
INSERT INTO t1 VALUES (10, REPEAT("x", 4100));
SET DEBUG_SYNC= "now WAIT_FOR con10_ready";
INSERT INTO t2 VALUES (3, "force binlog rotation");
SET DEBUG_SYNC= "ha_commit_trans_after_log_and_order SIGNAL con11_ready WAIT_FOR con11_continue";
INSERT INTO t1 VALUES (21, REPEAT("x", 4100));
SET DEBUG_SYNC= "commit_after_group_release_commit_ordered SIGNAL con11_ready WAIT_FOR con11_cont";
INSERT INTO t1 VALUES (11, REPEAT("x", 4100));
SET DEBUG_SYNC= "now WAIT_FOR con11_ready";
INSERT INTO t2 VALUES (4, "force binlog rotation");
SET DEBUG_SYNC= "commit_after_group_release_commit_ordered SIGNAL con12_ready WAIT_FOR con12_cont";
INSERT INTO t1 VALUES (12, REPEAT("x", 4100));
SET DEBUG_SYNC= "now WAIT_FOR con12_ready";
INSERT INTO t1 VALUES (13, NULL);
show binary logs;
Log_name File_size
master-bin.000001 #
master-bin.000002 #
master-bin.000003 #
show binlog events in 'master-bin.000001' from <binlog_start>;
master-bin.000004 #
show binlog events in 'master-bin.00000<binlog_start>' from <binlog_start>;
Log_name Pos Event_type Server_id End_log_pos Info
master-bin.000001 # Format_desc # # SERVER_VERSION, BINLOG_VERSION
master-bin.000001 # Binlog_checkpoint # # master-bin.000001
master-bin.000001 # Query # # BEGIN
master-bin.000001 # Table_map # # table_id: # (test.t1)
master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
master-bin.000001 # Xid # # COMMIT /* XID */
master-bin.000001 # Query # # BEGIN
master-bin.000001 # Table_map # # table_id: # (test.t2)
master-bin.000001 # Write_rows # # table_id: # flags: STMT_END_F
master-bin.000001 # Query # # COMMIT
master-bin.000001 # Rotate # # master-bin.000002;pos=<binlog_start>
show binlog events in 'master-bin.000002' from <binlog_start>;
master-bin.00000<binlog_start> # Format_desc # # SERVER_VERSION, BINLOG_VERSION
master-bin.00000<binlog_start> # Binlog_checkpoint # # master-bin.000001
master-bin.00000<binlog_start> # Query # # BEGIN
master-bin.00000<binlog_start> # Table_map # # table_id: # (test.t1)
master-bin.00000<binlog_start> # Write_rows # # table_id: # flags: STMT_END_F
master-bin.00000<binlog_start> # Xid # # COMMIT /* XID */
SET DEBUG_SYNC= "now SIGNAL con10_cont";
SET DEBUG_SYNC= "now SIGNAL con12_cont";
SET DEBUG_SYNC= "now SIGNAL con11_cont";
Checking that master-bin.000004 is the last binlog checkpoint
show binlog events in 'master-bin.00000<binlog_start>' from <binlog_start>;
Log_name Pos Event_type Server_id End_log_pos Info
master-bin.000002 # Format_desc # # SERVER_VERSION, BINLOG_VERSION
master-bin.000002 # Binlog_checkpoint # # master-bin.000001
master-bin.000002 # Query # # BEGIN
master-bin.000002 # Table_map # # table_id: # (test.t1)
master-bin.000002 # Write_rows # # table_id: # flags: STMT_END_F
master-bin.000002 # Xid # # COMMIT /* XID */
master-bin.000002 # Query # # BEGIN
master-bin.000002 # Table_map # # table_id: # (test.t2)
master-bin.000002 # Write_rows # # table_id: # flags: STMT_END_F
master-bin.000002 # Query # # COMMIT
master-bin.000002 # Rotate # # master-bin.000003;pos=<binlog_start>
show binlog events in 'master-bin.000003' from <binlog_start>;
master-bin.00000<binlog_start> # Format_desc # # SERVER_VERSION, BINLOG_VERSION
master-bin.00000<binlog_start> # Binlog_checkpoint # # master-bin.000001
master-bin.00000<binlog_start> # Query # # BEGIN
master-bin.00000<binlog_start> # Table_map # # table_id: # (test.t1)
master-bin.00000<binlog_start> # Write_rows # # table_id: # flags: STMT_END_F
master-bin.00000<binlog_start> # Xid # # COMMIT /* XID */
master-bin.00000<binlog_start> # Binlog_checkpoint # # master-bin.000002
master-bin.00000<binlog_start> # Binlog_checkpoint # # master-bin.00000<binlog_start>
Now crash the server
SET SESSION debug_dbug="+d,crash_commit_after_log";
INSERT INTO t1 VALUES (14, NULL);
Got one of the listed errors
SELECT a FROM t1 ORDER BY a;
a
1
2
3
4
10
11
12
13
14
100
101
102
*** Check that recovery works if we crashed early during rotate, before
*** binlog checkpoint event could be written.
SET GLOBAL max_binlog_size= 4096;
SET GLOBAL innodb_flush_log_at_trx_commit= 1;
RESET MASTER;
INSERT INTO t1 VALUES (21, REPEAT("x", 4100));
INSERT INTO t1 VALUES (22, REPEAT("x", 4100));
INSERT INTO t1 VALUES (23, REPEAT("x", 4100));
SET SESSION debug_dbug="+d,crash_before_write_checkpoint_event";
INSERT INTO t1 VALUES (24, REPEAT("x", 4100));
Got one of the listed errors
SELECT a FROM t1 ORDER BY a;
a
1
2
3
4
10
11
12
13
14
21
22
23
24
100
101
102
show binary logs;
Log_name File_size
master-bin.000001 #
master-bin.000002 #
master-bin.000003 #
master-bin.000004 #
master-bin.000005 #
show binlog events in 'master-bin.00000<binlog_start>' from <binlog_start>;
Log_name Pos Event_type Server_id End_log_pos Info
master-bin.000003 # Format_desc # # SERVER_VERSION, BINLOG_VERSION
master-bin.000003 # Binlog_checkpoint # # master-bin.000001
SET DEBUG_SYNC= "now SIGNAL con11_continue";
con10 is still pending, no new binlog checkpoint should have been logged.
show binlog events in 'master-bin.000003' from <binlog_start>;
Log_name Pos Event_type Server_id End_log_pos Info
master-bin.000003 # Format_desc # # SERVER_VERSION, BINLOG_VERSION
master-bin.000003 # Binlog_checkpoint # # master-bin.000001
SET DEBUG_SYNC= "now SIGNAL con10_continue";
No XIDs are pending, a new binlog checkpoint should have been logged.
show binlog events in 'master-bin.000003' from <binlog_start>;
Log_name Pos Event_type Server_id End_log_pos Info
master-bin.000003 # Format_desc # # SERVER_VERSION, BINLOG_VERSION
master-bin.000003 # Binlog_checkpoint # # master-bin.000001
master-bin.000003 # Binlog_checkpoint # # master-bin.000003
DROP TABLE t1, t2;
SET GLOBAL max_binlog_size= @old_max_binlog_size;
master-bin.00000<binlog_start> # Format_desc # # SERVER_VERSION, BINLOG_VERSION
master-bin.00000<binlog_start> # Binlog_checkpoint # # master-bin.000003
master-bin.00000<binlog_start> # Binlog_checkpoint # # master-bin.00000<binlog_start>
master-bin.00000<binlog_start> # Query # # BEGIN
master-bin.00000<binlog_start> # Table_map # # table_id: # (test.t1)
master-bin.00000<binlog_start> # Write_rows # # table_id: # flags: STMT_END_F
master-bin.00000<binlog_start> # Xid # # COMMIT /* XID */
master-bin.00000<binlog_start> # Rotate # # master-bin.000005;pos=<binlog_start>
DROP TABLE t1;

View file

@ -0,0 +1,108 @@
--source include/have_innodb.inc
--source include/have_debug.inc
--source include/have_debug_sync.inc
--source include/have_binlog_format_row.inc
SET @old_max_binlog_size= @@global.max_binlog_size;
SET GLOBAL max_binlog_size= 4096;
SET @old_innodb_flush_log_at_trx_commit= @@global.innodb_flush_log_at_trx_commit;
SET GLOBAL innodb_flush_log_at_trx_commit= 1;
RESET MASTER;
CREATE TABLE t1 (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=Innodb;
CREATE TABLE t2 (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=Myisam;
--echo *** Test that RESET MASTER waits for pending commit checkpoints to complete.
# con1 will hang before doing commit checkpoint, blocking RESET MASTER.
connect(con1,localhost,root,,);
SET DEBUG_SYNC= "commit_after_group_release_commit_ordered SIGNAL con1_ready WAIT_FOR con1_go";
send INSERT INTO t1 VALUES (1, REPEAT("x", 4100));
connection default;
SET DEBUG_SYNC= "now WAIT_FOR con1_ready";
# Let's add a few binlog rotations just for good measure.
INSERT INTO t2 VALUES (1, REPEAT("x", 4100));
INSERT INTO t2 VALUES (2, REPEAT("x", 4100));
--source include/show_binary_logs.inc
--let $binlog_file= master-bin.000004
--let $binlog_start= 4
--source include/show_binlog_events.inc
SET DEBUG_SYNC= "execute_command_after_close_tables SIGNAL reset_master_done";
send RESET MASTER;
connect(con2,localhost,root,,);
--echo This will timeout, as RESET MASTER is blocked
SET DEBUG_SYNC= "now WAIT_FOR reset_master_done TIMEOUT 1";
# Wake up transaction to allow RESET MASTER to complete.
SET DEBUG_SYNC= "now SIGNAL con1_go";
connection con1;
reap;
connection default;
reap;
--source include/show_binary_logs.inc
--let $binlog_file= master-bin.000001
--let $binlog_start= 4
--source include/show_binlog_events.inc
--echo *** Test that binlog N is active, and commit checkpoint for (N-1) is
--echo *** done while there is still a pending commit checkpoint for (N-2).
connection con1;
SET DEBUG_SYNC= "commit_after_group_release_commit_ordered SIGNAL con1_ready WAIT_FOR con1_continue";
send INSERT INTO t1 VALUES (20, REPEAT("x", 4100));
connection default;
SET DEBUG_SYNC= "now WAIT_FOR con1_ready";
connection con2;
SET DEBUG_SYNC= "commit_after_group_release_commit_ordered SIGNAL con2_ready WAIT_FOR con2_continue";
send INSERT INTO t1 VALUES (21, REPEAT("x", 4100));
connection default;
SET DEBUG_SYNC= "now WAIT_FOR con2_ready";
--source include/show_binary_logs.inc
--let $binlog_file= master-bin.000001
--source include/show_binlog_events.inc
--let $binlog_file= master-bin.000002
--source include/show_binlog_events.inc
--let $binlog_file= master-bin.000003
--source include/show_binlog_events.inc
SET DEBUG_SYNC= "now SIGNAL con2_continue";
connection con2;
reap;
connection default;
--echo con1 is still pending, no new binlog checkpoint should have been logged.
--let $binlog_file= master-bin.000003
--source include/show_binlog_events.inc
SET DEBUG_SYNC= "now SIGNAL con1_continue";
connection con1;
reap;
connection default;
--echo No commit checkpoints are pending, a new binlog checkpoint should have been logged.
--let $binlog_file= master-bin.000003
# Wait for the master-bin.000003 binlog checkpoint to appear.
--let $wait_for_all= 0
--let $show_statement= SHOW BINLOG EVENTS IN "$binlog_file"
--let $field= Info
--let $condition= = "master-bin.000003"
--source include/wait_show_condition.inc
--source include/show_binlog_events.inc
# Cleanup
connection default;
DROP TABLE t1, t2;
SET GLOBAL max_binlog_size= @old_max_binlog_size;
SET GLOBAL innodb_flush_log_at_trx_commit= @old_innodb_flush_log_at_trx_commit;

View file

@ -1 +1 @@
--skip-stack-trace --skip-core-file
--skip-stack-trace --skip-core-file --loose-debug-dbug=+d,xa_recover_expect_master_bin_000004

View file

@ -5,81 +5,191 @@
# Valgrind does not work well with test that crashes the server
--source include/not_valgrind.inc
# (We do not need to restore these settings, as we crash the server).
SET GLOBAL max_binlog_size= 4096;
SET GLOBAL innodb_flush_log_at_trx_commit= 1;
RESET MASTER;
CREATE TABLE t1 (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=Innodb;
CREATE TABLE t2 (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=Myisam;
# Transactions are not guaranteed stored durably on disk in the engine until
# they are fsync()ed, which normally happens during commit(). But there is no
# guarantee that they will _not_ be durable, in particular loosing results
# of a write(2) system call normally requires a kernel crash (as opposed to
# just mysqld crash), which is inconvenient to do in a test suite.
# So instead we do an error insert to prevent commit_ordered() from being
# called in the engine - so nothing will be written to disk at all, and crash
# recovery is sure to be needed.
SET @@global.debug_dbug='+d,skip_commit_ordered';
INSERT INTO t1 VALUES (0, REPEAT("x", 4100));
# Insert some data to force a couple binlog rotations (3), so we get some
# normal binlog checkpoints before starting the test.
INSERT INTO t1 VALUES (100, REPEAT("x", 4100));
INSERT INTO t1 VALUES (101, REPEAT("x", 4100));
INSERT INTO t1 VALUES (102, REPEAT("x", 4100));
# Now start a bunch of transactions that span multiple binlog
# files. Leave then in the state prepared-but-not-committed in the engine
# and crash the server. Check that crash recovery is able to recover all
# of them.
#
# We use debug_sync to get all the transactions into the prepared state before
# we commit any of them. This is because the prepare step flushes the InnoDB
# redo log - including any commits made before, so recovery would become
# unnecessary, decreasing the value of this test.
#
# We arrange to have con1 with a prepared transaction in master-bin.000004,
# con2 and con3 with a prepared transaction in master-bin.000005, and a new
# empty master-bin.000006. So the latest binlog checkpoint should be
# master-bin.000006.
connect(con1,localhost,root,,);
SET DEBUG_SYNC= "ha_commit_trans_after_log_and_order SIGNAL con1_ready WAIT_FOR _ever";
# First wait after prepare and before write to binlog.
SET DEBUG_SYNC= "ha_commit_trans_before_log_and_order SIGNAL con1_wait WAIT_FOR con1_cont";
# Then complete InnoDB commit in memory (but not commit checkpoint / write to
# disk), and hang until crash, leaving a transaction to be XA recovered.
SET DEBUG_SYNC= "commit_after_group_release_commit_ordered SIGNAL con1_ready WAIT_FOR _ever";
send INSERT INTO t1 VALUES (1, REPEAT("x", 4100));
connection default;
SET DEBUG_SYNC= "now WAIT_FOR con1_ready";
INSERT INTO t2 VALUES (1, "force binlog rotation");
SET DEBUG_SYNC= "now WAIT_FOR con1_wait";
connect(con2,localhost,root,,);
SET DEBUG_SYNC= "ha_commit_trans_after_log_and_order SIGNAL con2_ready WAIT_FOR _ever";
SET DEBUG_SYNC= "ha_commit_trans_before_log_and_order SIGNAL con2_wait WAIT_FOR con2_cont";
SET DEBUG_SYNC= "commit_after_group_release_commit_ordered SIGNAL con2_ready WAIT_FOR _ever";
send INSERT INTO t1 VALUES (2, NULL);
connection default;
SET DEBUG_SYNC= "now WAIT_FOR con2_ready";
SET DEBUG_SYNC= "now WAIT_FOR con2_wait";
connect(con3,localhost,root,,);
SET DEBUG_SYNC= "ha_commit_trans_after_log_and_order SIGNAL con3_ready WAIT_FOR _ever";
SET DEBUG_SYNC= "ha_commit_trans_before_log_and_order SIGNAL con3_wait WAIT_FOR con3_cont";
SET DEBUG_SYNC= "commit_after_group_release_commit_ordered SIGNAL con3_ready WAIT_FOR _ever";
send INSERT INTO t1 VALUES (3, REPEAT("x", 4100));
connection default;
SET DEBUG_SYNC= "now WAIT_FOR con3_wait";
connect(con4,localhost,root,,);
SET DEBUG_SYNC= "ha_commit_trans_before_log_and_order SIGNAL con4_wait WAIT_FOR con4_cont";
SET SESSION debug_dbug="+d,crash_commit_after_log";
send INSERT INTO t1 VALUES (4, NULL);
connection default;
SET DEBUG_SYNC= "now WAIT_FOR con4_wait";
SET DEBUG_SYNC= "now SIGNAL con1_cont";
SET DEBUG_SYNC= "now WAIT_FOR con1_ready";
SET DEBUG_SYNC= "now SIGNAL con2_cont";
SET DEBUG_SYNC= "now WAIT_FOR con2_ready";
SET DEBUG_SYNC= "now SIGNAL con3_cont";
SET DEBUG_SYNC= "now WAIT_FOR con3_ready";
INSERT INTO t2 VALUES (2, "force binlog rotation");
# So we won't get warnings about t2 being crashed.
FLUSH TABLES t2;
# Check that everything is committed in binary log.
--source include/show_binary_logs.inc
--let $binlog_file= master-bin.000001
--let $binlog_start= 4
--source include/show_binlog_events.inc
--let $binlog_file= master-bin.000002
--source include/show_binlog_events.inc
--let $binlog_file= master-bin.000003
--let $binlog_start= 4
--source include/show_binlog_events.inc
--let $binlog_file= master-bin.000004
--source include/show_binlog_events.inc
# Check that transactions really are not yet committed in engine.
# (This works because of debug_dbug='+d,skip_commit_ordered').
--echo We should see only one entry here, a=0:
SELECT a FROM t1 ORDER BY a;
--let $binlog_file= master-bin.000005
--source include/show_binlog_events.inc
--let $binlog_file= master-bin.000006
--source include/show_binlog_events.inc
# Check that server will not purge too much.
PURGE BINARY LOGS TO "master-bin.000004";
PURGE BINARY LOGS TO "master-bin.000006";
--source include/show_binary_logs.inc
# Now crash the server with one more transaction in prepared state.
system echo wait-binlog_xa_recover.test >> $MYSQLTEST_VARDIR/tmp/mysqld.1.expect;
--write_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
wait-binlog_xa_recover.test
EOF
SET DEBUG_SYNC= "now SIGNAL con4_cont";
connection con4;
--error 2006,2013
reap;
--remove_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
--write_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
restart-group_commit_binlog_pos.test
EOF
connection default;
--enable_reconnect
--source include/wait_until_connected_again.inc
# Check that all transactions are recovered.
SELECT a FROM t1 ORDER BY a;
--echo Test that with multiple binlog checkpoints, recovery starts from the last one.
SET GLOBAL max_binlog_size= 4096;
SET GLOBAL innodb_flush_log_at_trx_commit= 1;
RESET MASTER;
# Rotate to binlog master-bin.000003 while delaying binlog checkpoints.
# So we get multiple binlog checkpoints in master-bin.000003.
# Then complete the checkpoints, crash, and check that we only scan
# the necessary binlog file (ie. that we use the _last_ checkpoint).
connect(con10,localhost,root,,);
SET DEBUG_SYNC= "commit_after_group_release_commit_ordered SIGNAL con10_ready WAIT_FOR con10_cont";
send INSERT INTO t1 VALUES (10, REPEAT("x", 4100));
connection default;
SET DEBUG_SYNC= "now WAIT_FOR con10_ready";
connect(con11,localhost,root,,);
SET DEBUG_SYNC= "commit_after_group_release_commit_ordered SIGNAL con11_ready WAIT_FOR con11_cont";
send INSERT INTO t1 VALUES (11, REPEAT("x", 4100));
connection default;
SET DEBUG_SYNC= "now WAIT_FOR con11_ready";
connect(con12,localhost,root,,);
SET DEBUG_SYNC= "commit_after_group_release_commit_ordered SIGNAL con12_ready WAIT_FOR con12_cont";
send INSERT INTO t1 VALUES (12, REPEAT("x", 4100));
connection default;
SET DEBUG_SYNC= "now WAIT_FOR con12_ready";
INSERT INTO t1 VALUES (13, NULL);
--source include/show_binary_logs.inc
--let $binlog_file= master-bin.000004
--let $binlog_start= 4
--source include/show_binlog_events.inc
SET DEBUG_SYNC= "now SIGNAL con10_cont";
connection con10;
reap;
connection default;
SET DEBUG_SYNC= "now SIGNAL con12_cont";
connection con12;
reap;
connection default;
SET DEBUG_SYNC= "now SIGNAL con11_cont";
connection con11;
reap;
connection default;
# Wait for the last (master-bin.000004) binlog checkpoint to appear.
--let $wait_for_all= 0
--let $show_statement= SHOW BINLOG EVENTS IN "master-bin.000004"
--let $field= Info
--let $condition= = "master-bin.000004"
--source include/wait_show_condition.inc
--echo Checking that master-bin.000004 is the last binlog checkpoint
--source include/show_binlog_events.inc
--echo Now crash the server
# It is not too easy to test XA recovery, as it runs early during server
# startup, before any connections can be made.
# What we do is set a DBUG error insert which will crash if XA recovery
# starts from any other binlog than master-bin.000004 (check the file
# binlog_xa_recover-master.opt). Then we will fail here if XA recovery
# would start from the wrong place.
--write_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
wait-binlog_xa_recover.test
EOF
SET SESSION debug_dbug="+d,crash_commit_after_log";
--error 2006,2013
INSERT INTO t1 VALUES (4, NULL);
INSERT INTO t1 VALUES (14, NULL);
system echo restart-group_commit_binlog_pos.test >> $MYSQLTEST_VARDIR/tmp/mysqld.1.expect;
--remove_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
--write_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
restart-group_commit_binlog_pos.test
EOF
connection default;
--enable_reconnect
@ -89,86 +199,41 @@ connection default;
SELECT a FROM t1 ORDER BY a;
--echo *** Test that RESET MASTER waits for pending XIDs to be unlogged.
--echo *** Check that recovery works if we crashed early during rotate, before
--echo *** binlog checkpoint event could be written.
SET @old_max_binlog_size= @@global.max_binlog_size;
SET GLOBAL max_binlog_size= 4096;
# con10 will hang with a pending XID, blocking RESET MASTER.
connect(con10,localhost,root,,);
SET DEBUG_SYNC= "ha_commit_trans_after_log_and_order SIGNAL con10_ready WAIT_FOR con10_go";
send INSERT INTO t1 VALUES (10, NULL);
SET GLOBAL innodb_flush_log_at_trx_commit= 1;
RESET MASTER;
# We need some initial data to reach binlog master-bin.000004. Otherwise
# crash recovery fails due to the error insert used for previous test.
INSERT INTO t1 VALUES (21, REPEAT("x", 4100));
INSERT INTO t1 VALUES (22, REPEAT("x", 4100));
INSERT INTO t1 VALUES (23, REPEAT("x", 4100));
--write_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
wait-binlog_xa_recover.test
EOF
SET SESSION debug_dbug="+d,crash_before_write_checkpoint_event";
--error 2006,2013
INSERT INTO t1 VALUES (24, REPEAT("x", 4100));
--remove_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
--write_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
restart-group_commit_binlog_pos.test
EOF
--enable_reconnect
--source include/wait_until_connected_again.inc
# Check that all transactions are recovered.
SELECT a FROM t1 ORDER BY a;
connection default;
SET DEBUG_SYNC= "now WAIT_FOR con10_ready";
# Let's add a few binlog rotations just for good measure.
INSERT INTO t2 VALUES (10, REPEAT("x", 4100));
INSERT INTO t2 VALUES (11, REPEAT("x", 4100));
--source include/show_binary_logs.inc
SET DEBUG_SYNC= "execute_command_after_close_tables SIGNAL reset_master_done";
send RESET MASTER;
connect(con11,localhost,root,,);
--echo This will timeout, as RESET MASTER is blocked
SET DEBUG_SYNC= "now WAIT_FOR reset_master_done TIMEOUT 1";
# Wake up transaction to allow RESET MASTER to complete.
SET DEBUG_SYNC= "now SIGNAL con10_go";
connection con10;
reap;
connection default;
reap;
--source include/show_binary_logs.inc
--echo *** Test that binlog N is active, and last pending trx in (N-1) is
--echo unlogged while there is still a pending trx in (N-2).
connection con10;
SET DEBUG_SYNC= "ha_commit_trans_after_log_and_order SIGNAL con10_ready WAIT_FOR con10_continue";
send INSERT INTO t1 VALUES (20, REPEAT("x", 4100));
connection default;
SET DEBUG_SYNC= "now WAIT_FOR con10_ready";
INSERT INTO t2 VALUES (3, "force binlog rotation");
connection con11;
SET DEBUG_SYNC= "ha_commit_trans_after_log_and_order SIGNAL con11_ready WAIT_FOR con11_continue";
send INSERT INTO t1 VALUES (21, REPEAT("x", 4100));
connection default;
SET DEBUG_SYNC= "now WAIT_FOR con11_ready";
INSERT INTO t2 VALUES (4, "force binlog rotation");
--source include/show_binary_logs.inc
--let $binlog_file= master-bin.000001
--let $binlog_file= master-bin.000004
--let $binlog_start= 4
--source include/show_binlog_events.inc
--let $binlog_file= master-bin.000002
--source include/show_binlog_events.inc
--let $binlog_file= master-bin.000003
--source include/show_binlog_events.inc
SET DEBUG_SYNC= "now SIGNAL con11_continue";
connection con11;
reap;
connection default;
--echo con10 is still pending, no new binlog checkpoint should have been logged.
--let $binlog_file= master-bin.000003
--source include/show_binlog_events.inc
SET DEBUG_SYNC= "now SIGNAL con10_continue";
connection con10;
reap;
connection default;
--echo No XIDs are pending, a new binlog checkpoint should have been logged.
--let $binlog_file= master-bin.000003
--source include/show_binlog_events.inc
# Cleanup
connection default;
DROP TABLE t1, t2;
SET GLOBAL max_binlog_size= @old_max_binlog_size;
DROP TABLE t1;

View file

@ -63,15 +63,15 @@ binlog_snapshot_file master-bin.000001
binlog_snapshot_position 945
SHOW MASTER STATUS;
File Position Binlog_Do_DB Binlog_Ignore_DB
master-bin.000002 286
master-bin.000002 326
COMMIT;
SHOW STATUS LIKE 'binlog_snapshot_%';
Variable_name Value
binlog_snapshot_file master-bin.000002
binlog_snapshot_position 286
binlog_snapshot_position 326
SHOW MASTER STATUS;
File Position Binlog_Do_DB Binlog_Ignore_DB
master-bin.000002 286
master-bin.000002 326
SHOW BINLOG EVENTS;
Log_name Pos Event_type Server_id End_log_pos Info
master-bin.000001 4 Format_desc 1 246 Server ver: #, Binlog ver: #

View file

@ -1,3 +1,4 @@
SET GLOBAL innodb_flush_log_at_trx_commit=3;
CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=innodb;
INSERT INTO t1 VALUES (0);
SET DEBUG_SYNC= "commit_after_get_LOCK_log SIGNAL con1_waiting WAIT_FOR con3_queued";

View file

@ -1,3 +1,4 @@
SET GLOBAL innodb_flush_log_at_trx_commit=3;
CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=innodb;
INSERT INTO t1 VALUES (0);
SET DEBUG_SYNC= "commit_after_get_LOCK_log SIGNAL con1_waiting WAIT_FOR con3_queued";

View file

@ -17,6 +17,19 @@
# Test that we get the correct position when we group commit several
# transactions together.
# What we really want to test here is what happens when a group of
# transactions get written only partially to disk inside InnoDB before
# the crash. But that is hard to test in mysql-test-run automated
# tests. Instead, we use debug_sync to tightly control when each
# transaction is written to the redo log. And we set
# innodb_flush_log_at_trx_commit=3 so that we can write out
# transactions individually - as with
# innodb_flush_log_at_trx_commit=1, all commits are written together,
# as part of a commit_checkpoint.
# (Note that we do not have to restore innodb_flush_log_at_trx_commit, as
# we crash the server).
SET GLOBAL innodb_flush_log_at_trx_commit=3;
CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=innodb;
INSERT INTO t1 VALUES (0);

View file

@ -17,6 +17,19 @@
# Test that we get the correct position when we group commit several
# transactions together.
# What we really want to test here is what happens when a group of
# transactions get written only partially to disk inside InnoDB before
# the crash. But that is hard to test in mysql-test-run automated
# tests. Instead, we use debug_sync to tightly control when each
# transaction is written to the redo log. And we set
# innodb_flush_log_at_trx_commit=3 so that we can write out
# transactions individually - as with
# innodb_flush_log_at_trx_commit=1, all commits are written together,
# as part of a commit_checkpoint.
# (Note that we do not have to restore innodb_flush_log_at_trx_commit, as
# we crash the server).
SET GLOBAL innodb_flush_log_at_trx_commit=3;
CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=innodb;
INSERT INTO t1 VALUES (0);

View file

@ -71,7 +71,7 @@ insert into t1 values (1) /* will not be applied on slave due to simulation */;
set @@global.debug_dbug='d,simulate_slave_unaware_checksum';
start slave;
include/wait_for_slave_io_error.inc [errno=1236]
Last_IO_Error = 'Got fatal error 1236 from master when reading data from binary log: 'Slave can not handle replication events with the checksum that master is configured to log; the first event 'master-bin.000009' at 286, the last event read from 'master-bin.000010' at 246, the last byte read from 'master-bin.000010' at 246.''
Last_IO_Error = 'Got fatal error 1236 from master when reading data from binary log: 'Slave can not handle replication events with the checksum that master is configured to log; the first event 'master-bin.000009' at 326, the last event read from 'master-bin.000010' at 246, the last byte read from 'master-bin.000010' at 246.''
select count(*) as zero from t1;
zero
0

View file

@ -36,7 +36,7 @@
a
1
On slave
+show binlog events in 'slave-bin.000002' from <binlog_start> limit 1,6;
+show binlog events in 'slave-bin.000002' from <binlog_start> limit 2,6;
+Log_name Pos Event_type Server_id End_log_pos Info
+slave-bin.000002 # Query # # BEGIN
+slave-bin.000002 # Query # # use `test`; INSERT IGNORE INTO t1 VALUES(1)

View file

@ -54,7 +54,7 @@ master-bin.000002 # Query # # COMMIT
SELECT * FROM t1;
a
2
show relaylog events in 'slave-relay-bin.000005' from <binlog_start> limit 4,5;
show relaylog events in 'slave-relay-bin.000005' from <binlog_start> limit 5,5;
Log_name Pos Event_type Server_id End_log_pos Info
slave-relay-bin.000005 # Query # # BEGIN
slave-relay-bin.000005 # Query # # # Dummy ev

View file

@ -205,6 +205,7 @@ master-bin.000001 # Query # # COMMIT
master-bin.000001 # Rotate # # master-bin.000002;pos=4
show binlog events in 'master-bin.000002' from <binlog_start>;
Log_name Pos Event_type Server_id End_log_pos Info
master-bin.000002 # Binlog_checkpoint # # master-bin.000002
master-bin.000002 # Query # # use `test`; create table t3 (a int)ENGINE=MyISAM
master-bin.000002 # Query # # use `test`; create table t2 (n int)ENGINE=MyISAM
master-bin.000002 # Query # # BEGIN
@ -236,6 +237,7 @@ slave-bin.000001 # Query # # use `test`; create table t3 (a int)ENGINE=MyISAM
slave-bin.000001 # Rotate # # slave-bin.000002;pos=4
show binlog events in 'slave-bin.000002' from <binlog_start>;
Log_name Pos Event_type Server_id End_log_pos Info
slave-bin.000002 # Binlog_checkpoint # # slave-bin.000002
slave-bin.000002 # Query # # use `test`; create table t2 (n int)ENGINE=MyISAM
slave-bin.000002 # Query # # BEGIN
slave-bin.000002 # Table_map # # table_id: # (test.t2)

View file

@ -205,6 +205,7 @@ master-bin.000001 # Xid # # COMMIT /* XID */
master-bin.000001 # Rotate # # master-bin.000002;pos=4
show binlog events in 'master-bin.000002' from <binlog_start>;
Log_name Pos Event_type Server_id End_log_pos Info
master-bin.000002 # Binlog_checkpoint # # master-bin.000002
master-bin.000002 # Query # # use `test`; create table t3 (a int)ENGINE=InnoDB
master-bin.000002 # Query # # use `test`; create table t2 (n int)ENGINE=InnoDB
master-bin.000002 # Query # # BEGIN
@ -236,6 +237,7 @@ slave-bin.000001 # Query # # use `test`; create table t3 (a int)ENGINE=InnoDB
slave-bin.000001 # Rotate # # slave-bin.000002;pos=4
show binlog events in 'slave-bin.000002' from <binlog_start>;
Log_name Pos Event_type Server_id End_log_pos Info
slave-bin.000002 # Binlog_checkpoint # # slave-bin.000002
slave-bin.000002 # Query # # use `test`; create table t2 (n int)ENGINE=InnoDB
slave-bin.000002 # Query # # BEGIN
slave-bin.000002 # Table_map # # table_id: # (test.t2)

View file

@ -128,14 +128,16 @@ DROP TABLE t1;
******** [master] SHOW BINLOG EVENTS IN <FILE> ********
show binlog events in 'master-bin.000002' from <binlog_start>;
Log_name Pos Event_type Server_id End_log_pos Info
master-bin.000002 # Binlog_checkpoint # # master-bin.000002
master-bin.000002 # Query # # use `test`; DROP TABLE `t1` /* generated by server */
******** [master] SHOW BINLOG EVENTS IN <FILE> LIMIT 1 ********
show binlog events in 'master-bin.000002' from <binlog_start> limit 1;
Log_name Pos Event_type Server_id End_log_pos Info
master-bin.000002 # Query # # use `test`; DROP TABLE `t1` /* generated by server */
master-bin.000002 # Binlog_checkpoint # # master-bin.000002
******** [master] SHOW BINLOG EVENTS IN <FILE> LIMIT 1,3 ********
show binlog events in 'master-bin.000002' from <binlog_start> limit 1,3;
Log_name Pos Event_type Server_id End_log_pos Info
master-bin.000002 # Query # # use `test`; DROP TABLE `t1` /* generated by server */
******** [master] SHOW BINLOG EVENTS ********
show binlog events from <binlog_start>;
Log_name Pos Event_type Server_id End_log_pos Info
@ -156,14 +158,16 @@ master-bin.000001 # Rotate # # master-bin.000002;pos=4
******** [slave] SHOW BINLOG EVENTS IN <FILE> ********
show binlog events in 'slave-bin.000002' from <binlog_start>;
Log_name Pos Event_type Server_id End_log_pos Info
slave-bin.000002 # Binlog_checkpoint # # slave-bin.000002
slave-bin.000002 # Query # # use `test`; DROP TABLE `t1` /* generated by server */
******** [slave] SHOW BINLOG EVENTS IN <FILE> LIMIT 1 ********
show binlog events in 'slave-bin.000002' from <binlog_start> limit 1;
Log_name Pos Event_type Server_id End_log_pos Info
slave-bin.000002 # Query # # use `test`; DROP TABLE `t1` /* generated by server */
slave-bin.000002 # Binlog_checkpoint # # slave-bin.000002
******** [slave] SHOW BINLOG EVENTS IN <FILE> LIMIT 1,3 ********
show binlog events in 'slave-bin.000002' from <binlog_start> limit 1,3;
Log_name Pos Event_type Server_id End_log_pos Info
slave-bin.000002 # Query # # use `test`; DROP TABLE `t1` /* generated by server */
******** [slave] SHOW BINLOG EVENTS ********
show binlog events from <binlog_start>;
Log_name Pos Event_type Server_id End_log_pos Info
@ -186,6 +190,7 @@ show relaylog events in 'slave-relay-bin.000006' from <binlog_start>;
Log_name Pos Event_type Server_id End_log_pos Info
slave-relay-bin.000006 # Rotate # # master-bin.000002;pos=4
slave-relay-bin.000006 # Format_desc # # SERVER_VERSION, BINLOG_VERSION
slave-relay-bin.000006 # Binlog_checkpoint # # master-bin.000001
slave-relay-bin.000006 # Binlog_checkpoint # # master-bin.000002
slave-relay-bin.000006 # Query # # use `test`; DROP TABLE `t1` /* generated by server */
******** [slave] SHOW RELAYLOG EVENTS IN <FILE> LIMIT 1 ********
@ -196,8 +201,8 @@ slave-relay-bin.000006 # Rotate # # master-bin.000002;pos=4
show relaylog events in 'slave-relay-bin.000006' from <binlog_start> limit 1,3;
Log_name Pos Event_type Server_id End_log_pos Info
slave-relay-bin.000006 # Format_desc # # SERVER_VERSION, BINLOG_VERSION
slave-relay-bin.000006 # Binlog_checkpoint # # master-bin.000001
slave-relay-bin.000006 # Binlog_checkpoint # # master-bin.000002
slave-relay-bin.000006 # Query # # use `test`; DROP TABLE `t1` /* generated by server */
******** [slave] SHOW RELAYLOG EVENTS ********
show relaylog events from <binlog_start>;
Log_name Pos Event_type Server_id End_log_pos Info

View file

@ -205,6 +205,7 @@ master-bin.000001 # Query # # COMMIT
master-bin.000001 # Rotate # # master-bin.000002;pos=4
show binlog events in 'master-bin.000002' from <binlog_start>;
Log_name Pos Event_type Server_id End_log_pos Info
master-bin.000002 # Binlog_checkpoint # # master-bin.000002
master-bin.000002 # Query # # use `test`; create table t3 (a int)ENGINE=MyISAM
master-bin.000002 # Query # # use `test`; create table t2 (n int)ENGINE=MyISAM
master-bin.000002 # Query # # BEGIN
@ -235,6 +236,7 @@ slave-bin.000001 # Query # # use `test`; create table t3 (a int)ENGINE=MyISAM
slave-bin.000001 # Rotate # # slave-bin.000002;pos=4
show binlog events in 'slave-bin.000002' from <binlog_start>;
Log_name Pos Event_type Server_id End_log_pos Info
slave-bin.000002 # Binlog_checkpoint # # slave-bin.000002
slave-bin.000002 # Query # # use `test`; create table t2 (n int)ENGINE=MyISAM
slave-bin.000002 # Query # # BEGIN
slave-bin.000002 # Query # # use `test`; insert into t2 values (1)

View file

@ -113,14 +113,16 @@ DROP TABLE t1;
******** [master] SHOW BINLOG EVENTS IN <FILE> ********
show binlog events in 'master-bin.000002' from <binlog_start>;
Log_name Pos Event_type Server_id End_log_pos Info
master-bin.000002 # Binlog_checkpoint # # master-bin.000002
master-bin.000002 # Query # # use `test`; DROP TABLE `t1` /* generated by server */
******** [master] SHOW BINLOG EVENTS IN <FILE> LIMIT 1 ********
show binlog events in 'master-bin.000002' from <binlog_start> limit 1;
Log_name Pos Event_type Server_id End_log_pos Info
master-bin.000002 # Query # # use `test`; DROP TABLE `t1` /* generated by server */
master-bin.000002 # Binlog_checkpoint # # master-bin.000002
******** [master] SHOW BINLOG EVENTS IN <FILE> LIMIT 1,3 ********
show binlog events in 'master-bin.000002' from <binlog_start> limit 1,3;
Log_name Pos Event_type Server_id End_log_pos Info
master-bin.000002 # Query # # use `test`; DROP TABLE `t1` /* generated by server */
******** [master] SHOW BINLOG EVENTS ********
show binlog events from <binlog_start>;
Log_name Pos Event_type Server_id End_log_pos Info
@ -138,14 +140,16 @@ master-bin.000001 # Rotate # # master-bin.000002;pos=4
******** [slave] SHOW BINLOG EVENTS IN <FILE> ********
show binlog events in 'slave-bin.000002' from <binlog_start>;
Log_name Pos Event_type Server_id End_log_pos Info
slave-bin.000002 # Binlog_checkpoint # # slave-bin.000002
slave-bin.000002 # Query # # use `test`; DROP TABLE `t1` /* generated by server */
******** [slave] SHOW BINLOG EVENTS IN <FILE> LIMIT 1 ********
show binlog events in 'slave-bin.000002' from <binlog_start> limit 1;
Log_name Pos Event_type Server_id End_log_pos Info
slave-bin.000002 # Query # # use `test`; DROP TABLE `t1` /* generated by server */
slave-bin.000002 # Binlog_checkpoint # # slave-bin.000002
******** [slave] SHOW BINLOG EVENTS IN <FILE> LIMIT 1,3 ********
show binlog events in 'slave-bin.000002' from <binlog_start> limit 1,3;
Log_name Pos Event_type Server_id End_log_pos Info
slave-bin.000002 # Query # # use `test`; DROP TABLE `t1` /* generated by server */
******** [slave] SHOW BINLOG EVENTS ********
show binlog events from <binlog_start>;
Log_name Pos Event_type Server_id End_log_pos Info
@ -165,6 +169,7 @@ show relaylog events in 'slave-relay-bin.000006' from <binlog_start>;
Log_name Pos Event_type Server_id End_log_pos Info
slave-relay-bin.000006 # Rotate # # master-bin.000002;pos=4
slave-relay-bin.000006 # Format_desc # # SERVER_VERSION, BINLOG_VERSION
slave-relay-bin.000006 # Binlog_checkpoint # # master-bin.000001
slave-relay-bin.000006 # Binlog_checkpoint # # master-bin.000002
slave-relay-bin.000006 # Query # # use `test`; DROP TABLE `t1` /* generated by server */
******** [slave] SHOW RELAYLOG EVENTS IN <FILE> LIMIT 1 ********
@ -175,8 +180,8 @@ slave-relay-bin.000006 # Rotate # # master-bin.000002;pos=4
show relaylog events in 'slave-relay-bin.000006' from <binlog_start> limit 1,3;
Log_name Pos Event_type Server_id End_log_pos Info
slave-relay-bin.000006 # Format_desc # # SERVER_VERSION, BINLOG_VERSION
slave-relay-bin.000006 # Binlog_checkpoint # # master-bin.000001
slave-relay-bin.000006 # Binlog_checkpoint # # master-bin.000002
slave-relay-bin.000006 # Query # # use `test`; DROP TABLE `t1` /* generated by server */
******** [slave] SHOW RELAYLOG EVENTS ********
show relaylog events from <binlog_start>;
Log_name Pos Event_type Server_id End_log_pos Info

View file

@ -61,7 +61,7 @@ connection slave;
SELECT * FROM t1;
let $binlog_file= query_get_value(SHOW SLAVE STATUS, Relay_Log_File, 1);
let $binlog_start= 0;
let $binlog_limit=4,5;
let $binlog_limit=5,5;
--source include/show_relaylog_events.inc
--echo # Test that slave which cannot tolerate holes in binlog stream but

View file

@ -50,7 +50,7 @@ Warnings:
Warning 1292 Truncated incorrect innodb_flush_log_at_trx_commit value: '1001'
SELECT @@global.innodb_flush_log_at_trx_commit;
@@global.innodb_flush_log_at_trx_commit
2
3
'#----------------------FN_DYNVARS_046_05------------------------#'
SELECT @@global.innodb_flush_log_at_trx_commit =
VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
@ -60,22 +60,22 @@ VARIABLE_VALUE
1
SELECT @@global.innodb_flush_log_at_trx_commit;
@@global.innodb_flush_log_at_trx_commit
2
3
SELECT VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
WHERE VARIABLE_NAME='innodb_flush_log_at_trx_commit';
VARIABLE_VALUE
2
3
'#---------------------FN_DYNVARS_046_06-------------------------#'
SET @@global.innodb_flush_log_at_trx_commit = OFF;
ERROR 42000: Incorrect argument type to variable 'innodb_flush_log_at_trx_commit'
SELECT @@global.innodb_flush_log_at_trx_commit;
@@global.innodb_flush_log_at_trx_commit
2
3
SET @@global.innodb_flush_log_at_trx_commit = ON;
ERROR 42000: Incorrect argument type to variable 'innodb_flush_log_at_trx_commit'
SELECT @@global.innodb_flush_log_at_trx_commit;
@@global.innodb_flush_log_at_trx_commit
2
3
'#---------------------FN_DYNVARS_046_07----------------------#'
SET @@global.innodb_flush_log_at_trx_commit = TRUE;
SELECT @@global.innodb_flush_log_at_trx_commit;

View file

@ -644,6 +644,43 @@ void ha_checkpoint_state(bool disable)
}
struct st_commit_checkpoint_request {
void *cookie;
void (*pre_hook)(void *);
};
static my_bool commit_checkpoint_request_handlerton(THD *unused1, plugin_ref plugin,
void *data)
{
st_commit_checkpoint_request *st= (st_commit_checkpoint_request *)data;
handlerton *hton= plugin_data(plugin, handlerton *);
if (hton->state == SHOW_OPTION_YES && hton->commit_checkpoint_request)
{
void *cookie= st->cookie;
if (st->pre_hook)
(*st->pre_hook)(cookie);
(*hton->commit_checkpoint_request)(hton, cookie);
}
return FALSE;
}
/*
Invoke commit_checkpoint_request() in all storage engines that implement it.
If pre_hook is non-NULL, the hook will be called prior to each invocation.
*/
void
ha_commit_checkpoint_request(void *cookie, void (*pre_hook)(void *))
{
st_commit_checkpoint_request st;
st.cookie= cookie;
st.pre_hook= pre_hook;
plugin_foreach(NULL, commit_checkpoint_request_handlerton,
MYSQL_STORAGE_ENGINE_PLUGIN, &st);
}
static my_bool closecon_handlerton(THD *thd, plugin_ref plugin,
void *unused)
@ -1281,6 +1318,7 @@ int ha_commit_trans(THD *thd, bool all)
goto done;
}
DEBUG_SYNC(thd, "ha_commit_trans_before_log_and_order");
cookie= tc_log->log_and_order(thd, xid, all, need_prepare_ordered,
need_commit_ordered);
if (!cookie)
@ -1778,6 +1816,17 @@ bool mysql_xa_recover(THD *thd)
DBUG_RETURN(0);
}
/*
Called by engine to notify TC that a new commit checkpoint has been reached.
See comments on handlerton method commit_checkpoint_request() for details.
*/
void
commit_checkpoint_notify_ha(handlerton *hton, void *cookie)
{
tc_log->commit_checkpoint_notify(cookie);
}
/**
@details
This function should be called when MySQL sends rows of a SELECT result set

View file

@ -976,6 +976,46 @@ struct handlerton
int (*recover)(handlerton *hton, XID *xid_list, uint len);
int (*commit_by_xid)(handlerton *hton, XID *xid);
int (*rollback_by_xid)(handlerton *hton, XID *xid);
/*
The commit_checkpoint_request() handlerton method is used to checkpoint
the XA recovery process for storage engines that support two-phase
commit.
The method is optional - an engine that does not implemented is expected
to work the traditional way, where every commit() durably flushes the
transaction to disk in the engine before completion, so XA recovery will
no longer be needed for that transaction.
An engine that does implement commit_checkpoint_request() is also
expected to implement commit_ordered(), so that ordering of commits is
consistent between 2pc participants. Such engine is no longer required to
durably flush to disk transactions in commit(), provided that the
transaction has been successfully prepare()d and commit_ordered(); thus
potentionally saving one fsync() call. (Engine must still durably flush
to disk in commit() when no prepare()/commit_ordered() steps took place,
at least if durable commits are wanted; this happens eg. if binlog is
disabled).
The TC will periodically (eg. once per binlog rotation) call
commit_checkpoint_request(). When this happens, the engine must arrange
for all transaction that have completed commit_ordered() to be durably
flushed to disk (this does not include transactions that might be in the
middle of executing commit_ordered()). When such flush has completed, the
engine must call commit_checkpoint_notify_ha(), passing back the opaque
"cookie".
The flush and call of commit_checkpoint_notify_ha() need not happen
immediately - it can be scheduled and performed asynchroneously (ie. as
part of next prepare(), or sync every second, or whatever), but should
not be postponed indefinitely. It is however also permissible to do it
immediately, before returning from commit_checkpoint_request().
When commit_checkpoint_notify_ha() is called, the TC will know that the
transactions are durably committed, and thus no longer require XA
recovery. It uses that to reduce the work needed for any subsequent XA
recovery process.
*/
void (*commit_checkpoint_request)(handlerton *hton, void *cookie);
/*
"Disable or enable checkpointing internal to the storage engine. This is
used for FLUSH TABLES WITH READ LOCK AND DISABLE CHECKPOINT to ensure that
@ -2977,6 +3017,7 @@ void ha_close_connection(THD* thd);
bool ha_flush_logs(handlerton *db_type);
void ha_drop_database(char* path);
void ha_checkpoint_state(bool disable);
void ha_commit_checkpoint_request(void *cookie, void (*pre_hook)(void *));
int ha_create_table(THD *thd, const char *path,
const char *db, const char *table_name,
HA_CREATE_INFO *create_info,
@ -3057,6 +3098,7 @@ int ha_binlog_end(THD *thd);
const char *get_canonical_filename(handler *file, const char *path,
char *tmp_path);
bool mysql_xa_recover(THD *thd);
void commit_checkpoint_notify_ha(handlerton *hton, void *cookie);
inline const char *table_case_name(HA_CREATE_INFO *info, const char *name)
{

File diff suppressed because it is too large Load diff

View file

@ -49,6 +49,7 @@ class TC_LOG
bool need_prepare_ordered,
bool need_commit_ordered) = 0;
virtual int unlog(ulong cookie, my_xid xid)=0;
virtual void commit_checkpoint_notify(void *cookie)= 0;
protected:
/*
@ -98,8 +99,12 @@ public:
return 1;
}
int unlog(ulong cookie, my_xid xid) { return 0; }
void commit_checkpoint_notify(void *cookie) { DBUG_ASSERT(0); };
};
#define TC_LOG_PAGE_SIZE 8192
#define TC_LOG_MIN_SIZE (3*TC_LOG_PAGE_SIZE)
#ifdef HAVE_MMAP
class TC_LOG_MMAP: public TC_LOG
{
@ -110,6 +115,12 @@ class TC_LOG_MMAP: public TC_LOG
PS_DIRTY // new xids added since last sync
} PAGE_STATE;
struct pending_cookies {
uint count;
uint pending_count;
ulong cookies[TC_LOG_PAGE_SIZE];
};
private:
typedef struct st_page {
struct st_page *next; // page a linked in a fifo queue
@ -141,7 +152,7 @@ class TC_LOG_MMAP: public TC_LOG
one has to use active->lock.
Same for LOCK_pool and LOCK_sync
*/
mysql_mutex_t LOCK_active, LOCK_pool, LOCK_sync;
mysql_mutex_t LOCK_active, LOCK_pool, LOCK_sync, LOCK_pending_checkpoint;
mysql_cond_t COND_pool, COND_active;
/*
Queue of threads that need to call commit_ordered().
@ -163,14 +174,16 @@ class TC_LOG_MMAP: public TC_LOG
*/
mysql_cond_t COND_queue_busy;
my_bool commit_ordered_queue_busy;
pending_cookies* pending_checkpoint;
public:
TC_LOG_MMAP(): inited(0) {}
TC_LOG_MMAP(): inited(0), pending_checkpoint(0) {}
int open(const char *opt_name);
void close();
int log_and_order(THD *thd, my_xid xid, bool all,
bool need_prepare_ordered, bool need_commit_ordered);
int unlog(ulong cookie, my_xid xid);
void commit_checkpoint_notify(void *cookie);
int recover();
private:
@ -178,6 +191,7 @@ class TC_LOG_MMAP: public TC_LOG
void get_active_from_pool();
int sync();
int overflow();
int delete_entry(ulong cookie);
};
#else
#define TC_LOG_MMAP TC_LOG_DUMMY
@ -356,12 +370,32 @@ private:
/*
We assign each binlog file an internal ID, used to identify them for unlog().
Ids start from BINLOG_COOKIE_START; the value BINLOG_COOKIE_DUMMY is special
meaning "no binlog" (we cannot use zero as that is reserved for error return
from log_and_order).
*/
#define BINLOG_COOKIE_DUMMY 1
#define BINLOG_COOKIE_START 2
The IDs start from 0 and increment for each new binlog created.
In unlog() we need to know the ID of the binlog file that the corresponding
transaction was written into. We also need a special value for a corner
case where there is no corresponding binlog id (since nothing was logged).
And we need an error flag to mark that unlog() must return failure.
We use the following macros to pack all of this information into the single
ulong available with log_and_order() / unlog().
Note that we cannot use the value 0 for cookie, as that is reserved as error
return value from log_and_order().
*/
#define BINLOG_COOKIE_ERROR_RETURN 0
#define BINLOG_COOKIE_DUMMY_ID 1
#define BINLOG_COOKIE_BASE 2
#define BINLOG_COOKIE_DUMMY(error_flag) \
( (BINLOG_COOKIE_DUMMY_ID<<1) | ((error_flag)&1) )
#define BINLOG_COOKIE_MAKE(id, error_flag) \
( (((id)+BINLOG_COOKIE_BASE)<<1) | ((error_flag)&1) )
#define BINLOG_COOKIE_GET_ERROR_FLAG(c) ((c) & 1)
#define BINLOG_COOKIE_GET_ID(c) ( ((ulong)(c)>>1) - BINLOG_COOKIE_BASE )
#define BINLOG_COOKIE_IS_DUMMY(c) \
( ((ulong)(c)>>1) == BINLOG_COOKIE_DUMMY_ID )
void binlog_checkpoint_callback(void *cookie);
class binlog_cache_mngr;
class MYSQL_BIN_LOG: public TC_LOG, private MYSQL_LOG
@ -401,11 +435,25 @@ class MYSQL_BIN_LOG: public TC_LOG, private MYSQL_LOG
IO_CACHE *error_cache;
/* This is the `all' parameter for ha_commit_ordered(). */
bool all;
/*
True if we need to increment xid_count in trx_group_commit_leader() and
decrement in unlog() (this is needed if there is a participating engine
that does not implement the commit_checkpoint_request() handlerton
method).
*/
bool need_unlog;
/*
Fields used to pass the necessary information to the last thread in a
group commit, only used when opt_optimize_thread_scheduling is not set.
*/
bool check_purge;
ulong binlog_id;
};
/*
A list of struct xid_count_per_binlog is used to keep track of how many
XIDs are in prepared, but not committed, state in each binlog.
XIDs are in prepared, but not committed, state in each binlog. And how
many commit_checkpoint_request()'s are pending.
When count drops to zero in a binlog after rotation, it means that there
are no more XIDs in prepared state, so that binlog is no longer needed
@ -418,10 +466,10 @@ class MYSQL_BIN_LOG: public TC_LOG, private MYSQL_LOG
char *binlog_name;
uint binlog_name_len;
ulong binlog_id;
/* Total prepared XIDs and pending checkpoint requests in this binlog. */
long xid_count;
xid_count_per_binlog(); /* Give link error if constructor used. */
};
ulong current_binlog_id;
I_List<xid_count_per_binlog> binlog_xid_count_list;
/*
When this is set, a RESET MASTER is in progress.
@ -432,6 +480,7 @@ class MYSQL_BIN_LOG: public TC_LOG, private MYSQL_LOG
checkpoint arrives - when all have arrived, RESET MASTER will complete.
*/
bool reset_master_pending;
friend void binlog_checkpoint_callback(void *cookie);
/* LOCK_log and LOCK_index are inited by init_pthread_objects() */
mysql_mutex_t LOCK_index;
@ -464,15 +513,6 @@ class MYSQL_BIN_LOG: public TC_LOG, private MYSQL_LOG
uint file_id;
uint open_count; // For replication
int readers_count;
bool need_start_event;
/*
no_auto_events means we don't want any of these automatic events :
Start/Rotate/Stop. That is, in 4.x when we rotate a relay log, we don't
want a Rotate_log event to be written to the relay log. When we start a
relay log etc. So in 4.x this is 1 for relay logs, 0 for binlogs.
In 5.0 it's 0 for relay logs too!
*/
bool no_auto_events;
/* Queue of transactions queued up to participate in group commit. */
group_commit_entry *group_commit_queue;
/*
@ -508,10 +548,12 @@ class MYSQL_BIN_LOG: public TC_LOG, private MYSQL_LOG
*/
int new_file_without_locking();
int new_file_impl(bool need_lock);
void do_checkpoint_request(ulong binlog_id);
void purge();
int write_transaction_or_stmt(group_commit_entry *entry);
bool write_transaction_to_binlog_events(group_commit_entry *entry);
void trx_group_commit_leader(group_commit_entry *leader);
void mark_xid_done(ulong cookie);
void mark_xid_done(ulong cookie, bool write_checkpoint);
void mark_xids_active(ulong cookie, uint xid_count);
public:
@ -572,6 +614,7 @@ public:
*/
char last_commit_pos_file[FN_REFLEN];
my_off_t last_commit_pos_offset;
ulong current_binlog_id;
MYSQL_BIN_LOG(uint *sync_period);
/*
@ -600,6 +643,7 @@ public:
int log_and_order(THD *thd, my_xid xid, bool all,
bool need_prepare_ordered, bool need_commit_ordered);
int unlog(ulong cookie, my_xid xid);
void commit_checkpoint_notify(void *cookie);
int recover(LOG_INFO *linfo, const char *last_log_name, IO_CACHE *first_log,
Format_description_log_event *fdle);
#if !defined(MYSQL_CLIENT)
@ -629,15 +673,14 @@ public:
void signal_update();
void wait_for_update_relay_log(THD* thd);
int wait_for_update_bin_log(THD* thd, const struct timespec * timeout);
void set_need_start_event() { need_start_event = 1; }
void init(bool no_auto_events_arg, ulong max_size);
void init(ulong max_size);
void init_pthread_objects();
void cleanup();
bool open(const char *log_name,
enum_log_type log_type,
const char *new_name,
enum cache_type io_cache_type_arg,
bool no_auto_events_arg, ulong max_size,
ulong max_size,
bool null_created,
bool need_mutex);
bool open_index_file(const char *index_file_name_arg,
@ -674,7 +717,7 @@ public:
bool can_purge_log(const char *log_file_name);
int update_log_index(LOG_INFO* linfo, bool need_update_threads);
int rotate(bool force_rotate, bool* check_purge);
void purge();
void checkpoint_and_purge(ulong binlog_id);
int rotate_and_purge(bool force_rotate);
/**
Flush binlog cache and synchronize to disk.

View file

@ -5874,7 +5874,7 @@ Rotate_log_event::do_shall_skip(Relay_log_info *rli)
**************************************************************************/
#if defined(HAVE_REPLICATION) && !defined(MYSQL_CLIENT)
void Binlog_checkpoint_log_event::pack_info(Protocol *protocol)
void Binlog_checkpoint_log_event::pack_info(THD *thd, Protocol *protocol)
{
protocol->store(binlog_file_name, binlog_file_len, &my_charset_bin);
}

View file

@ -2911,7 +2911,7 @@ public:
Binlog_checkpoint_log_event(const char *binlog_file_name_arg,
uint binlog_file_len_arg);
#ifdef HAVE_REPLICATION
void pack_info(Protocol *protocol);
void pack_info(THD *thd, Protocol *protocol);
#endif
#else
void print(FILE *file, PRINT_EVENT_INFO *print_event_info);

View file

@ -715,7 +715,8 @@ char **orig_argv;
#ifdef HAVE_PSI_INTERFACE
#ifdef HAVE_MMAP
PSI_mutex_key key_PAGE_lock, key_LOCK_sync, key_LOCK_active, key_LOCK_pool;
PSI_mutex_key key_PAGE_lock, key_LOCK_sync, key_LOCK_active, key_LOCK_pool,
key_LOCK_pending_checkpoint;
#endif /* HAVE_MMAP */
#ifdef HAVE_OPENSSL
@ -756,6 +757,7 @@ static PSI_mutex_info all_server_mutexes[]=
{ &key_LOCK_sync, "TC_LOG_MMAP::LOCK_sync", 0},
{ &key_LOCK_active, "TC_LOG_MMAP::LOCK_active", 0},
{ &key_LOCK_pool, "TC_LOG_MMAP::LOCK_pool", 0},
{ &key_LOCK_pool, "TC_LOG_MMAP::LOCK_pending_checkpoint", 0},
#endif /* HAVE_MMAP */
#ifdef HAVE_OPENSSL
@ -4418,7 +4420,7 @@ a file name for --log-bin-index option", opt_binlog_index_name);
}
if (opt_bin_log && mysql_bin_log.open(opt_bin_logname, LOG_BIN, 0,
WRITE_CACHE, 0, max_binlog_size, 0, TRUE))
WRITE_CACHE, max_binlog_size, 0, TRUE))
unireg_abort(1);
#ifdef HAVE_REPLICATION

View file

@ -218,7 +218,7 @@ extern pthread_key(MEM_ROOT**,THR_MALLOC);
#ifdef HAVE_PSI_INTERFACE
#ifdef HAVE_MMAP
extern PSI_mutex_key key_PAGE_lock, key_LOCK_sync, key_LOCK_active,
key_LOCK_pool;
key_LOCK_pool, key_LOCK_pending_checkpoint;
#endif /* HAVE_MMAP */
#ifdef HAVE_OPENSSL

View file

@ -213,7 +213,7 @@ a file name for --relay-log-index option", opt_relaylog_index_name);
but a destructor will take care of that
*/
if (rli->relay_log.open_index_file(opt_relaylog_index_name, ln, TRUE) ||
rli->relay_log.open(ln, LOG_BIN, 0, SEQ_READ_APPEND, 0,
rli->relay_log.open(ln, LOG_BIN, 0, SEQ_READ_APPEND,
(max_relay_log_size ? max_relay_log_size :
max_binlog_size), 1, TRUE))
{

View file

@ -1757,13 +1757,12 @@ past_checksum:
/* Announce MariaDB slave capabilities. */
DBUG_EXECUTE_IF("simulate_slave_capability_none", goto after_set_capability;);
{
const char *q=
DBUG_EVALUATE_IF("simulate_slave_capability_old_53",
"SET @mariadb_slave_capability="
STRINGIFY_ARG(MARIA_SLAVE_CAPABILITY_ANNOTATE),
"SET @mariadb_slave_capability="
STRINGIFY_ARG(MARIA_SLAVE_CAPABILITY_MINE));
if (mysql_real_query(mysql, q, strlen(q)))
int rc= DBUG_EVALUATE_IF("simulate_slave_capability_old_53",
mysql_real_query(mysql, STRING_WITH_LEN("SET @mariadb_slave_capability="
STRINGIFY_ARG(MARIA_SLAVE_CAPABILITY_ANNOTATE))),
mysql_real_query(mysql, STRING_WITH_LEN("SET @mariadb_slave_capability="
STRINGIFY_ARG(MARIA_SLAVE_CAPABILITY_MINE))));
if (rc)
{
err_code= mysql_errno(mysql);
if (is_network_error(err_code))

View file

@ -149,9 +149,6 @@ public:
};
#define TC_LOG_PAGE_SIZE 8192
#define TC_LOG_MIN_SIZE (3*TC_LOG_PAGE_SIZE)
#define TC_HEURISTIC_RECOVER_COMMIT 1
#define TC_HEURISTIC_RECOVER_ROLLBACK 2
extern ulong tc_heuristic_recover;

View file

@ -347,6 +347,7 @@ static int innobase_rollback_to_savepoint(handlerton *hton, THD* thd,
static int innobase_savepoint(handlerton *hton, THD* thd, void *savepoint);
static int innobase_release_savepoint(handlerton *hton, THD* thd,
void *savepoint);
static void innobase_checkpoint_request(handlerton *hton, void *cookie);
static handler *innobase_create_handler(handlerton *hton,
TABLE_SHARE *table,
MEM_ROOT *mem_root);
@ -2250,6 +2251,7 @@ innobase_init(
innobase_hton->recover=innobase_xa_recover;
innobase_hton->commit_by_xid=innobase_commit_by_xid;
innobase_hton->rollback_by_xid=innobase_rollback_by_xid;
innobase_hton->commit_checkpoint_request=innobase_checkpoint_request;
innobase_hton->create_cursor_read_view=innobase_create_cursor_view;
innobase_hton->set_cursor_read_view=innobase_set_cursor_view;
innobase_hton->close_cursor_read_view=innobase_close_cursor_view;
@ -3006,6 +3008,19 @@ innobase_rollback_trx(
DBUG_RETURN(convert_error_code_to_mysql(error, 0, NULL));
}
/*****************************************************************//**
Handle a commit checkpoint request from server layer.
We simply flush the redo log immediately and do the notify call.*/
static
void
innobase_checkpoint_request(
handlerton *hton,
void *cookie)
{
log_buffer_flush_to_disk();
commit_checkpoint_notify_ha(hton, cookie);
}
/*****************************************************************//**
Rolls back a transaction to a savepoint.
@return 0 if success, HA_ERR_NO_SAVEPOINT if no savepoint with the
@ -11460,10 +11475,17 @@ static MYSQL_SYSVAR_STR(file_format_max, innobase_file_format_max,
static MYSQL_SYSVAR_ULONG(flush_log_at_trx_commit, srv_flush_log_at_trx_commit,
PLUGIN_VAR_OPCMDARG,
"Set to 0 (write and flush once per second),"
" 1 (write and flush at each commit)"
" or 2 (write at commit, flush once per second).",
NULL, NULL, 1, 0, 2, 0);
"Controls the durability/speed trade-off for commits."
" Set to 0 (write and flush redo log to disk only once per second),"
" 1 (flush to disk at each commit),"
" 2 (write to log at commit but flush to disk only once per second)"
" or 3 (flush to disk at prepare and at commit, slower and usually redundant)."
" 1 and 3 guarantees that after a crash, committed transactions will"
" not be lost and will be consistent with the binlog and other transactional"
" engines. 2 can get inconsistent and lose transactions if there is a"
" power failure or kernel crash but not if mysqld crashes. 0 has no"
" guarantees in case of crash. 0 and 2 can be faster than 1 or 3.",
NULL, NULL, 1, 0, 3, 0);
static MYSQL_SYSVAR_STR(flush_method, innobase_file_flush_method,
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,

View file

@ -1025,7 +1025,8 @@ trx_commit_off_kernel(
trx->must_flush_log_later = TRUE;
} else if (srv_flush_log_at_trx_commit == 0) {
/* Do nothing */
} else if (srv_flush_log_at_trx_commit == 1) {
} else if (srv_flush_log_at_trx_commit == 1 ||
srv_flush_log_at_trx_commit == 3) {
if (srv_unix_file_flush_method == SRV_UNIX_NOSYNC) {
/* Write the log but do not flush it to disk */
@ -1712,7 +1713,11 @@ trx_commit_complete_for_mysql(
/* Do nothing */
} else if (srv_flush_log_at_trx_commit == 0) {
/* Do nothing */
} else if (srv_flush_log_at_trx_commit == 1) {
} else if (srv_flush_log_at_trx_commit == 1 && trx->active_commit_ordered) {
/* Do nothing - we already flushed the prepare and binlog write
to disk, so transaction is durable (will be recovered from
binlog if necessary) */
} else if (srv_flush_log_at_trx_commit == 1 || srv_flush_log_at_trx_commit == 3) {
if (srv_unix_file_flush_method == SRV_UNIX_NOSYNC) {
/* Write the log but do not flush it to disk */
@ -1992,7 +1997,7 @@ trx_prepare_off_kernel(
if (srv_flush_log_at_trx_commit == 0) {
/* Do nothing */
} else if (srv_flush_log_at_trx_commit == 1) {
} else if (srv_flush_log_at_trx_commit == 1 || srv_flush_log_at_trx_commit == 3) {
if (srv_unix_file_flush_method == SRV_UNIX_NOSYNC) {
/* Write the log but do not flush it to disk */

View file

@ -383,6 +383,7 @@ static int innobase_rollback_to_savepoint(handlerton *hton, THD* thd,
static int innobase_savepoint(handlerton *hton, THD* thd, void *savepoint);
static int innobase_release_savepoint(handlerton *hton, THD* thd,
void *savepoint);
static void innobase_checkpoint_request(handlerton *hton, void *cookie);
static handler *innobase_create_handler(handlerton *hton,
TABLE_SHARE *table,
MEM_ROOT *mem_root);
@ -483,10 +484,17 @@ static MYSQL_THDVAR_ULONG(lock_wait_timeout, PLUGIN_VAR_RQCMDARG,
NULL, NULL, 50, 1, 1024 * 1024 * 1024, 0);
static MYSQL_THDVAR_ULONG(flush_log_at_trx_commit, PLUGIN_VAR_OPCMDARG,
"Set to 0 (write and flush once per second),"
" 1 (write and flush at each commit)"
" or 2 (write at commit, flush once per second).",
NULL, NULL, 1, 0, 2, 0);
"Controls the durability/speed trade-off for commits."
" Set to 0 (write and flush redo log to disk only once per second),"
" 1 (flush to disk at each commit),"
" 2 (write to log at commit but flush to disk only once per second)"
" or 3 (flush to disk at prepare and at commit, slower and usually redundant)."
" 1 and 3 guarantees that after a crash, committed transactions will"
" not be lost and will be consistent with the binlog and other transactional"
" engines. 2 can get inconsistent and lose transactions if there is a"
" power failure or kernel crash but not if mysqld crashes. 0 has no"
" guarantees in case of crash. 0 and 2 can be faster than 1 or 3.",
NULL, NULL, 1, 0, 3, 0);
static MYSQL_THDVAR_BOOL(fake_changes, PLUGIN_VAR_OPCMDARG,
"In the transaction after enabled, UPDATE, INSERT and DELETE only move the cursor to the records "
@ -2469,6 +2477,7 @@ innobase_init(
innobase_hton->recover=innobase_xa_recover;
innobase_hton->commit_by_xid=innobase_commit_by_xid;
innobase_hton->rollback_by_xid=innobase_rollback_by_xid;
innobase_hton->commit_checkpoint_request=innobase_checkpoint_request;
innobase_hton->checkpoint_state= innobase_checkpoint_state;
innobase_hton->create_cursor_read_view=innobase_create_cursor_view;
innobase_hton->set_cursor_read_view=innobase_set_cursor_view;
@ -3491,6 +3500,19 @@ innobase_rollback_trx(
DBUG_RETURN(convert_error_code_to_mysql(error, 0, NULL));
}
/*****************************************************************//**
Handle a commit checkpoint request from server layer.
We simply flush the redo log immediately and do the notify call.*/
static
void
innobase_checkpoint_request(
handlerton *hton,
void *cookie)
{
log_buffer_flush_to_disk();
commit_checkpoint_notify_ha(hton, cookie);
}
/*****************************************************************//**
Rolls back a transaction to a savepoint.
@return 0 if success, HA_ERR_NO_SAVEPOINT if no savepoint with the

View file

@ -494,7 +494,6 @@ struct trx_struct{
this is set to 1 then registered should
also be set to 1. This is used in the
XA code */
unsigned called_commit_ordered:1;/* 1 if innobase_commit_ordered has run. */
/*------------------------------*/
ulint isolation_level;/* TRX_ISO_REPEATABLE_READ, ... */
ulint check_foreigns; /* normally TRUE, but if the user

View file

@ -1099,7 +1099,8 @@ trx_commit_off_kernel(
trx->must_flush_log_later = TRUE;
} else if (flush_log_at_trx_commit == 0) {
/* Do nothing */
} else if (flush_log_at_trx_commit == 1) {
} else if (flush_log_at_trx_commit == 1 ||
flush_log_at_trx_commit == 3) {
if (srv_unix_file_flush_method == SRV_UNIX_NOSYNC) {
/* Write the log but do not flush it to disk */
@ -1809,7 +1810,11 @@ trx_commit_complete_for_mysql(
/* Do nothing */
} else if (flush_log_at_trx_commit == 0) {
/* Do nothing */
} else if (flush_log_at_trx_commit == 1) {
} else if (flush_log_at_trx_commit == 1 && trx->active_commit_ordered) {
/* Do nothing - we already flushed the prepare and binlog write
to disk, so transaction is durable (will be recovered from
binlog if necessary) */
} else if (flush_log_at_trx_commit == 1 || flush_log_at_trx_commit == 3) {
if (srv_unix_file_flush_method == SRV_UNIX_NOSYNC) {
/* Write the log but do not flush it to disk */
@ -2097,7 +2102,7 @@ trx_prepare_off_kernel(
if (flush_log_at_trx_commit == 0) {
/* Do nothing */
} else if (flush_log_at_trx_commit == 1) {
} else if (flush_log_at_trx_commit == 1 || flush_log_at_trx_commit == 3) {
if (srv_unix_file_flush_method == SRV_UNIX_NOSYNC) {
/* Write the log but do not flush it to disk */