diff --git a/mysql-test/suite/rpl/r/rpl_parallel.result b/mysql-test/suite/rpl/r/rpl_parallel.result index 7ceb5ee6622..666978afc98 100644 --- a/mysql-test/suite/rpl/r/rpl_parallel.result +++ b/mysql-test/suite/rpl/r/rpl_parallel.result @@ -1136,11 +1136,297 @@ SET GLOBAL debug_dbug=@old_dbug; SET GLOBAL slave_parallel_threads=0; SET GLOBAL slave_parallel_threads=10; include/start_slave.inc +*** MDEV-7847: "Slave worker thread retried transaction 10 time(s) in vain, giving up", followed by replication hanging *** +*** MDEV-7882: Excessive transaction retry in parallel replication *** +CREATE TABLE t7 (a int PRIMARY KEY, b INT) ENGINE=InnoDB; +CREATE TABLE t8 (a int PRIMARY KEY, b INT) ENGINE=InnoDB; +include/stop_slave.inc +SET GLOBAL slave_parallel_threads=40; +SELECT @old_retries:=@@GLOBAL.slave_transaction_retries; +@old_retries:=@@GLOBAL.slave_transaction_retries +10 +SET GLOBAL slave_transaction_retries= 5; +INSERT INTO t7 VALUES (1,1), (2,2), (3,3), (4,4), (5,5); +SET @old_dbug= @@SESSION.debug_dbug; +SET @commit_id= 42; +SET SESSION debug_dbug="+d,binlog_force_commit_id"; +INSERT INTO t8 VALUES (1,1); +INSERT INTO t8 VALUES (2,2); +INSERT INTO t8 VALUES (3,3); +INSERT INTO t8 VALUES (4,4); +INSERT INTO t8 VALUES (5,5); +INSERT INTO t8 VALUES (6,6); +INSERT INTO t8 VALUES (7,7); +INSERT INTO t8 VALUES (8,8); +UPDATE t7 SET b=9 WHERE a=3; +UPDATE t7 SET b=10 WHERE a=3; +UPDATE t7 SET b=11 WHERE a=3; +INSERT INTO t8 VALUES (12,12); +INSERT INTO t8 VALUES (13,13); +UPDATE t7 SET b=14 WHERE a=3; +UPDATE t7 SET b=15 WHERE a=3; +INSERT INTO t8 VALUES (16,16); +UPDATE t7 SET b=17 WHERE a=3; +INSERT INTO t8 VALUES (18,18); +INSERT INTO t8 VALUES (19,19); +UPDATE t7 SET b=20 WHERE a=3; +INSERT INTO t8 VALUES (21,21); +UPDATE t7 SET b=22 WHERE a=3; +INSERT INTO t8 VALUES (23,24); +INSERT INTO t8 VALUES (24,24); +UPDATE t7 SET b=25 WHERE a=3; +INSERT INTO t8 VALUES (26,26); +UPDATE t7 SET b=27 WHERE a=3; +BEGIN; +INSERT INTO t8 VALUES (28,28); +INSERT INTO t8 VALUES (29,28), (30,28); +INSERT INTO t8 VALUES (31,28); +INSERT INTO t8 VALUES (32,28); +INSERT INTO t8 VALUES (33,28); +INSERT INTO t8 VALUES (34,28); +INSERT INTO t8 VALUES (35,28); +INSERT INTO t8 VALUES (36,28); +INSERT INTO t8 VALUES (37,28); +INSERT INTO t8 VALUES (38,28); +INSERT INTO t8 VALUES (39,28); +INSERT INTO t8 VALUES (40,28); +INSERT INTO t8 VALUES (41,28); +INSERT INTO t8 VALUES (42,28); +COMMIT; +SET @commit_id=43; +INSERT INTO t8 VALUES (43,43); +INSERT INTO t8 VALUES (44,44); +UPDATE t7 SET b=45 WHERE a=3; +INSERT INTO t8 VALUES (46,46); +INSERT INTO t8 VALUES (47,47); +UPDATE t7 SET b=48 WHERE a=3; +INSERT INTO t8 VALUES (49,49); +INSERT INTO t8 VALUES (50,50); +SET @commit_id=44; +INSERT INTO t8 VALUES (51,51); +INSERT INTO t8 VALUES (52,52); +UPDATE t7 SET b=53 WHERE a=3; +INSERT INTO t8 VALUES (54,54); +INSERT INTO t8 VALUES (55,55); +UPDATE t7 SET b=56 WHERE a=3; +INSERT INTO t8 VALUES (57,57); +UPDATE t7 SET b=58 WHERE a=3; +INSERT INTO t8 VALUES (58,58); +INSERT INTO t8 VALUES (59,59); +INSERT INTO t8 VALUES (60,60); +INSERT INTO t8 VALUES (61,61); +UPDATE t7 SET b=62 WHERE a=3; +INSERT INTO t8 VALUES (63,63); +INSERT INTO t8 VALUES (64,64); +INSERT INTO t8 VALUES (65,65); +INSERT INTO t8 VALUES (66,66); +UPDATE t7 SET b=67 WHERE a=3; +INSERT INTO t8 VALUES (68,68); +UPDATE t7 SET b=69 WHERE a=3; +UPDATE t7 SET b=70 WHERE a=3; +UPDATE t7 SET b=71 WHERE a=3; +INSERT INTO t8 VALUES (72,72); +UPDATE t7 SET b=73 WHERE a=3; +UPDATE t7 SET b=74 WHERE a=3; +UPDATE t7 SET b=75 WHERE a=3; +UPDATE t7 SET b=76 WHERE a=3; +INSERT INTO t8 VALUES (77,77); +UPDATE t7 SET b=78 WHERE a=3; +INSERT INTO t8 VALUES (79,79); +UPDATE t7 SET b=80 WHERE a=3; +INSERT INTO t8 VALUES (81,81); +UPDATE t7 SET b=82 WHERE a=3; +INSERT INTO t8 VALUES (83,83); +UPDATE t7 SET b=84 WHERE a=3; +SET @commit_id=45; +INSERT INTO t8 VALUES (85,85); +UPDATE t7 SET b=86 WHERE a=3; +INSERT INTO t8 VALUES (87,87); +SET @commit_id=46; +INSERT INTO t8 VALUES (88,88); +INSERT INTO t8 VALUES (89,89); +INSERT INTO t8 VALUES (90,90); +SET SESSION debug_dbug=@old_dbug; +INSERT INTO t8 VALUES (91,91); +INSERT INTO t8 VALUES (92,92); +INSERT INTO t8 VALUES (93,93); +INSERT INTO t8 VALUES (94,94); +INSERT INTO t8 VALUES (95,95); +INSERT INTO t8 VALUES (96,96); +INSERT INTO t8 VALUES (97,97); +INSERT INTO t8 VALUES (98,98); +INSERT INTO t8 VALUES (99,99); +SELECT * FROM t7 ORDER BY a; +a b +1 1 +2 2 +3 86 +4 4 +5 5 +SELECT * FROM t8 ORDER BY a; +a b +1 1 +2 2 +3 3 +4 4 +5 5 +6 6 +7 7 +8 8 +12 12 +13 13 +16 16 +18 18 +19 19 +21 21 +23 24 +24 24 +26 26 +28 28 +29 28 +30 28 +31 28 +32 28 +33 28 +34 28 +35 28 +36 28 +37 28 +38 28 +39 28 +40 28 +41 28 +42 28 +43 43 +44 44 +46 46 +47 47 +49 49 +50 50 +51 51 +52 52 +54 54 +55 55 +57 57 +58 58 +59 59 +60 60 +61 61 +63 63 +64 64 +65 65 +66 66 +68 68 +72 72 +77 77 +79 79 +81 81 +83 83 +85 85 +87 87 +88 88 +89 89 +90 90 +91 91 +92 92 +93 93 +94 94 +95 95 +96 96 +97 97 +98 98 +99 99 +include/save_master_gtid.inc +include/start_slave.inc +include/sync_with_master_gtid.inc +SELECT * FROM t7 ORDER BY a; +a b +1 1 +2 2 +3 86 +4 4 +5 5 +SELECT * FROM t8 ORDER BY a; +a b +1 1 +2 2 +3 3 +4 4 +5 5 +6 6 +7 7 +8 8 +12 12 +13 13 +16 16 +18 18 +19 19 +21 21 +23 24 +24 24 +26 26 +28 28 +29 28 +30 28 +31 28 +32 28 +33 28 +34 28 +35 28 +36 28 +37 28 +38 28 +39 28 +40 28 +41 28 +42 28 +43 43 +44 44 +46 46 +47 47 +49 49 +50 50 +51 51 +52 52 +54 54 +55 55 +57 57 +58 58 +59 59 +60 60 +61 61 +63 63 +64 64 +65 65 +66 66 +68 68 +72 72 +77 77 +79 79 +81 81 +83 83 +85 85 +87 87 +88 88 +89 89 +90 90 +91 91 +92 92 +93 93 +94 94 +95 95 +96 96 +97 97 +98 98 +99 99 +include/stop_slave.inc +SET GLOBAL slave_transaction_retries= @old_retries; +include/start_slave.inc include/stop_slave.inc SET GLOBAL slave_parallel_threads=@old_parallel_threads; include/start_slave.inc SET DEBUG_SYNC= 'RESET'; DROP function foo; -DROP TABLE t1,t2,t3,t4,t5,t6; +DROP TABLE t1,t2,t3,t4,t5,t6,t7,t8; SET DEBUG_SYNC= 'RESET'; include/rpl_end.inc diff --git a/mysql-test/suite/rpl/t/rpl_parallel.test b/mysql-test/suite/rpl/t/rpl_parallel.test index d4b99d4b0f7..d6d39690f09 100644 --- a/mysql-test/suite/rpl/t/rpl_parallel.test +++ b/mysql-test/suite/rpl/t/rpl_parallel.test @@ -1843,6 +1843,208 @@ SET GLOBAL slave_parallel_threads=10; --source include/start_slave.inc +--echo *** MDEV-7847: "Slave worker thread retried transaction 10 time(s) in vain, giving up", followed by replication hanging *** +--echo *** MDEV-7882: Excessive transaction retry in parallel replication *** + +--connection server_1 +CREATE TABLE t7 (a int PRIMARY KEY, b INT) ENGINE=InnoDB; +CREATE TABLE t8 (a int PRIMARY KEY, b INT) ENGINE=InnoDB; +--save_master_pos + +--connection server_2 +--sync_with_master +--source include/stop_slave.inc +SET GLOBAL slave_parallel_threads=40; +SELECT @old_retries:=@@GLOBAL.slave_transaction_retries; +SET GLOBAL slave_transaction_retries= 5; + + +# Using dbug error injection, we artificially create event groups with a lot of +# conflicting transactions in each event group. The bugs were originally seen +# "in the wild" with transactions that did not conflict on the master, and only +# conflicted very rarely on the slave (maybe some edge case with InnoDB btree +# page splits or something like that). The event groups here loosely reflect +# the structure of the original failure's group commits. + + +--connection server_1 +INSERT INTO t7 VALUES (1,1), (2,2), (3,3), (4,4), (5,5); +SET @old_dbug= @@SESSION.debug_dbug; +SET @commit_id= 42; +SET SESSION debug_dbug="+d,binlog_force_commit_id"; +INSERT INTO t8 VALUES (1,1); +INSERT INTO t8 VALUES (2,2); +INSERT INTO t8 VALUES (3,3); +INSERT INTO t8 VALUES (4,4); +INSERT INTO t8 VALUES (5,5); +INSERT INTO t8 VALUES (6,6); +INSERT INTO t8 VALUES (7,7); +INSERT INTO t8 VALUES (8,8); + +UPDATE t7 SET b=9 WHERE a=3; +UPDATE t7 SET b=10 WHERE a=3; +UPDATE t7 SET b=11 WHERE a=3; + +INSERT INTO t8 VALUES (12,12); +INSERT INTO t8 VALUES (13,13); + +UPDATE t7 SET b=14 WHERE a=3; +UPDATE t7 SET b=15 WHERE a=3; + +INSERT INTO t8 VALUES (16,16); + +UPDATE t7 SET b=17 WHERE a=3; + +INSERT INTO t8 VALUES (18,18); +INSERT INTO t8 VALUES (19,19); + +UPDATE t7 SET b=20 WHERE a=3; + +INSERT INTO t8 VALUES (21,21); + +UPDATE t7 SET b=22 WHERE a=3; + +INSERT INTO t8 VALUES (23,24); +INSERT INTO t8 VALUES (24,24); + +UPDATE t7 SET b=25 WHERE a=3; + +INSERT INTO t8 VALUES (26,26); + +UPDATE t7 SET b=27 WHERE a=3; + +BEGIN; +INSERT INTO t8 VALUES (28,28); +INSERT INTO t8 VALUES (29,28), (30,28); +INSERT INTO t8 VALUES (31,28); +INSERT INTO t8 VALUES (32,28); +INSERT INTO t8 VALUES (33,28); +INSERT INTO t8 VALUES (34,28); +INSERT INTO t8 VALUES (35,28); +INSERT INTO t8 VALUES (36,28); +INSERT INTO t8 VALUES (37,28); +INSERT INTO t8 VALUES (38,28); +INSERT INTO t8 VALUES (39,28); +INSERT INTO t8 VALUES (40,28); +INSERT INTO t8 VALUES (41,28); +INSERT INTO t8 VALUES (42,28); +COMMIT; + + +SET @commit_id=43; +INSERT INTO t8 VALUES (43,43); +INSERT INTO t8 VALUES (44,44); + +UPDATE t7 SET b=45 WHERE a=3; + +INSERT INTO t8 VALUES (46,46); +INSERT INTO t8 VALUES (47,47); + +UPDATE t7 SET b=48 WHERE a=3; + +INSERT INTO t8 VALUES (49,49); +INSERT INTO t8 VALUES (50,50); + + +SET @commit_id=44; +INSERT INTO t8 VALUES (51,51); +INSERT INTO t8 VALUES (52,52); + +UPDATE t7 SET b=53 WHERE a=3; + +INSERT INTO t8 VALUES (54,54); +INSERT INTO t8 VALUES (55,55); + +UPDATE t7 SET b=56 WHERE a=3; + +INSERT INTO t8 VALUES (57,57); + +UPDATE t7 SET b=58 WHERE a=3; + +INSERT INTO t8 VALUES (58,58); +INSERT INTO t8 VALUES (59,59); +INSERT INTO t8 VALUES (60,60); +INSERT INTO t8 VALUES (61,61); + +UPDATE t7 SET b=62 WHERE a=3; + +INSERT INTO t8 VALUES (63,63); +INSERT INTO t8 VALUES (64,64); +INSERT INTO t8 VALUES (65,65); +INSERT INTO t8 VALUES (66,66); + +UPDATE t7 SET b=67 WHERE a=3; + +INSERT INTO t8 VALUES (68,68); + +UPDATE t7 SET b=69 WHERE a=3; +UPDATE t7 SET b=70 WHERE a=3; +UPDATE t7 SET b=71 WHERE a=3; + +INSERT INTO t8 VALUES (72,72); + +UPDATE t7 SET b=73 WHERE a=3; +UPDATE t7 SET b=74 WHERE a=3; +UPDATE t7 SET b=75 WHERE a=3; +UPDATE t7 SET b=76 WHERE a=3; + +INSERT INTO t8 VALUES (77,77); + +UPDATE t7 SET b=78 WHERE a=3; + +INSERT INTO t8 VALUES (79,79); + +UPDATE t7 SET b=80 WHERE a=3; + +INSERT INTO t8 VALUES (81,81); + +UPDATE t7 SET b=82 WHERE a=3; + +INSERT INTO t8 VALUES (83,83); + +UPDATE t7 SET b=84 WHERE a=3; + + +SET @commit_id=45; +INSERT INTO t8 VALUES (85,85); +UPDATE t7 SET b=86 WHERE a=3; +INSERT INTO t8 VALUES (87,87); + + +SET @commit_id=46; +INSERT INTO t8 VALUES (88,88); +INSERT INTO t8 VALUES (89,89); +INSERT INTO t8 VALUES (90,90); + +SET SESSION debug_dbug=@old_dbug; + +INSERT INTO t8 VALUES (91,91); +INSERT INTO t8 VALUES (92,92); +INSERT INTO t8 VALUES (93,93); +INSERT INTO t8 VALUES (94,94); +INSERT INTO t8 VALUES (95,95); +INSERT INTO t8 VALUES (96,96); +INSERT INTO t8 VALUES (97,97); +INSERT INTO t8 VALUES (98,98); +INSERT INTO t8 VALUES (99,99); + + +SELECT * FROM t7 ORDER BY a; +SELECT * FROM t8 ORDER BY a; +--source include/save_master_gtid.inc + + +--connection server_2 +--source include/start_slave.inc +--source include/sync_with_master_gtid.inc +SELECT * FROM t7 ORDER BY a; +SELECT * FROM t8 ORDER BY a; + +--source include/stop_slave.inc +SET GLOBAL slave_transaction_retries= @old_retries; +--source include/start_slave.inc + + # Clean up. --connection server_2 --source include/stop_slave.inc @@ -1852,7 +2054,7 @@ SET DEBUG_SYNC= 'RESET'; --connection server_1 DROP function foo; -DROP TABLE t1,t2,t3,t4,t5,t6; +DROP TABLE t1,t2,t3,t4,t5,t6,t7,t8; SET DEBUG_SYNC= 'RESET'; --source include/rpl_end.inc diff --git a/sql/log.cc b/sql/log.cc index 38fe1066896..5801b3a1141 100644 --- a/sql/log.cc +++ b/sql/log.cc @@ -7224,6 +7224,15 @@ MYSQL_BIN_LOG::trx_group_commit_leader(group_commit_entry *leader) if (likely(is_open())) // Should always be true { commit_id= (last_in_queue == leader ? 0 : (uint64)leader->thd->query_id); + DBUG_EXECUTE_IF("binlog_force_commit_id", + { + const LEX_STRING name= { C_STRING_WITH_LEN("commit_id") }; + bool null_value; + user_var_entry *entry= + (user_var_entry*) my_hash_search(&leader->thd->user_vars, + (uchar*) name.str, name.length); + commit_id= entry->val_int(&null_value); + }); /* Commit every transaction in the queue. diff --git a/sql/rpl_parallel.cc b/sql/rpl_parallel.cc index 3fbc5298fea..7df1ea3bd4b 100644 --- a/sql/rpl_parallel.cc +++ b/sql/rpl_parallel.cc @@ -113,6 +113,7 @@ finish_event_group(rpl_parallel_thread *rpt, uint64 sub_id, wait_for_commit *wfc= &rgi->commit_orderer; int err; + thd->get_stmt_da()->set_overwrite_status(true); /* Remove any left-over registration to wait for a prior commit to complete. Normally, such wait would already have been removed at @@ -129,14 +130,14 @@ finish_event_group(rpl_parallel_thread *rpt, uint64 sub_id, for us to complete and rely on this also ensuring that any other event in the group has completed. - But in the error case, we have to abort anyway, and it seems best - to just complete as quickly as possible with unregister. Anyone - waiting for us will in any case receive the error back from their - wait_for_prior_commit() call. + And in the error case, correct GCO lifetime relies on the fact that once + the last event group in the GCO has executed wait_for_prior_commit(), + all earlier event groups have also committed; this way no more + mark_start_commit() calls can be made and it is safe to de-allocate + the GCO. */ - if (rgi->worker_error) - wfc->unregister_wait_for_prior_commit(); - else if ((err= wfc->wait_for_prior_commit(thd))) + err= wfc->wait_for_prior_commit(thd); + if (unlikely(err) && !rgi->worker_error) signal_error_to_sql_driver_thread(thd, rgi, err); thd->wait_for_commit_ptr= NULL; @@ -193,6 +194,10 @@ finish_event_group(rpl_parallel_thread *rpt, uint64 sub_id, thd->clear_error(); thd->reset_killed(); + /* + Would do thd->get_stmt_da()->set_overwrite_status(false) here, but + reset_diagnostics_area() already does that. + */ thd->get_stmt_da()->reset_diagnostics_area(); wfc->wakeup_subsequent_commits(rgi->worker_error); } @@ -761,8 +766,7 @@ handle_rpl_parallel_thread(void *arg) if (unlikely(entry->stop_on_error_sub_id <= rgi->wait_commit_sub_id)) skip_event_group= true; - else - register_wait_for_prior_event_group_commit(rgi, entry); + register_wait_for_prior_event_group_commit(rgi, entry); unlock_or_exit_cond(thd, &entry->LOCK_parallel_entry, &did_enter_cond, &old_stage); @@ -849,7 +853,9 @@ handle_rpl_parallel_thread(void *arg) else { delete qev->ev; + thd->get_stmt_da()->set_overwrite_status(true); err= thd->wait_for_prior_commit(); + thd->get_stmt_da()->set_overwrite_status(false); } end_of_group= diff --git a/sql/rpl_parallel.h b/sql/rpl_parallel.h index 2604cd98527..256dffe9828 100644 --- a/sql/rpl_parallel.h +++ b/sql/rpl_parallel.h @@ -53,7 +53,7 @@ struct group_commit_orderer { group_commit_orderer *prev_gco; group_commit_orderer *next_gco; /* - The sub_id of last event group in this the previous GCO. + The sub_id of last event group in the previous GCO. Only valid if prev_gco != NULL. */ uint64 prior_sub_id;