From 567c0973591eb66797bb0f982f312b516f8fe82c Mon Sep 17 00:00:00 2001 From: Monty Date: Fri, 1 Mar 2024 18:16:33 +0200 Subject: [PATCH] MDEV-33582 Add more warnings to be able to better diagnose network issues Warnings are added to net_server.cc when global_system_variables.log_warnings >= 4. When the above condition holds then: - All communication errors from net_serv.cc is also written to the error log. - In case of a of not being able to read or write a packet, a more detailed error is given. Other things: - Added detection of slaves that has hangup to Ack_receiver::run() - vio_close() is now first marking the socket closed before closing it. The reason for this is to ensure that the connection that gets a read error can check if the reason was that the socket was closed. - Add a new state to vio to be able to detect if vio is acive, shutdown or closed. This is used to detect if socket is closed by another thread. - Testing of the new warnings is done in rpl_get_lock.test - Suppress some of the new warnings in mtr to allow one to run some of the tests with -mysqld=--log-warnings=4. All test in the 'rpl' suite can now be run with this option. - Ensure that global.log_warnings are restored at test end in a way that allows one to use mtr --mysqld=--log-warnings=4. Reviewed-by: , --- include/my_global.h | 2 + include/violite.h | 8 +++ mysql-test/mariadb-test-run.pl | 7 ++ .../suite/binlog_encryption/rpl_packet.result | 2 + .../rpl/include/rpl_extra_col_master.test | 4 ++ ...l_binlog_dump_slave_gtid_state_info.result | 3 +- .../rpl_domain_id_filter_master_crash.result | 3 - mysql-test/suite/rpl/r/rpl_get_lock.result | 3 +- mysql-test/suite/rpl/r/rpl_gtid_crash.result | 1 + .../suite/rpl/r/rpl_gtid_grouping.result | 1 - .../suite/rpl/r/rpl_heartbeat_debug.result | 2 - mysql-test/suite/rpl/r/rpl_packet.result | 2 + .../rpl/r/rpl_parallel_optimistic.result | 3 +- .../rpl/r/rpl_parallel_optimistic_xa.result | 1 - .../rpl_parallel_optimistic_xa_lsu_off.result | 1 - .../suite/rpl/r/rpl_row_find_row_debug.result | 1 - ..._semi_sync_slave_enabled_consistent.result | 2 + .../r/show_status_stop_slave_race-7126.result | 1 - ...rpl_binlog_dump_slave_gtid_state_info.test | 3 +- .../t/rpl_domain_id_filter_master_crash.test | 3 + .../rpl/t/rpl_dump_request_retry_warning.test | 4 ++ mysql-test/suite/rpl/t/rpl_get_lock.test | 14 ++++ mysql-test/suite/rpl/t/rpl_gtid_crash.test | 1 + .../suite/rpl/t/rpl_gtid_crash_myisam.test | 4 ++ mysql-test/suite/rpl/t/rpl_gtid_grouping.test | 1 - .../suite/rpl/t/rpl_heartbeat_debug.test | 11 ++- mysql-test/suite/rpl/t/rpl_packet.test | 3 + .../suite/rpl/t/rpl_parallel_optimistic.test | 3 +- .../rpl/t/rpl_parallel_optimistic_xa.test | 1 - .../suite/rpl/t/rpl_row_find_row_debug.test | 2 + .../suite/rpl/t/rpl_semi_sync_fail_over.test | 5 ++ .../t/rpl_semi_sync_shutdown_await_ack.cnf | 6 +- ...pl_semi_sync_slave_enabled_consistent.test | 3 +- .../t/show_status_stop_slave_race-7126.test | 3 + sql/net_serv.cc | 51 ++++++++++--- sql/semisync_master_ack_receiver.cc | 13 +++- sql/semisync_master_ack_receiver.h | 10 +++ sql/slave.cc | 2 + vio/vio.c | 2 + vio/viosocket.c | 72 +++++++++++++------ 40 files changed, 211 insertions(+), 53 deletions(-) diff --git a/include/my_global.h b/include/my_global.h index e89249ec890..d6af6e8b938 100644 --- a/include/my_global.h +++ b/include/my_global.h @@ -962,6 +962,7 @@ typedef struct st_mysql_lex_string LEX_STRING; #define SOCKET_ECONNRESET WSAECONNRESET #define SOCKET_ENFILE ENFILE #define SOCKET_EMFILE EMFILE +#define SOCKET_CLOSED EIO #else /* Unix */ #define socket_errno errno #define closesocket(A) close(A) @@ -971,6 +972,7 @@ typedef struct st_mysql_lex_string LEX_STRING; #define SOCKET_EADDRINUSE EADDRINUSE #define SOCKET_ETIMEDOUT ETIMEDOUT #define SOCKET_ECONNRESET ECONNRESET +#define SOCKET_CLOSED EIO #define SOCKET_ENFILE ENFILE #define SOCKET_EMFILE EMFILE #endif diff --git a/include/violite.h b/include/violite.h index b823e62b2e1..f1e5c95a648 100644 --- a/include/violite.h +++ b/include/violite.h @@ -41,6 +41,13 @@ enum enum_vio_type VIO_TYPE_SSL /* see also vio_type_names[] */ }; + +enum enum_vio_state +{ + VIO_STATE_NOT_INITIALIZED, VIO_STATE_ACTIVE, VIO_STATE_SHUTDOWN, + VIO_STATE_CLOSED +}; + #define FIRST_VIO_TYPE VIO_CLOSED #define LAST_VIO_TYPE VIO_TYPE_SSL @@ -244,6 +251,7 @@ struct st_vio struct sockaddr_storage local; /* Local internet address */ struct sockaddr_storage remote; /* Remote internet address */ enum enum_vio_type type; /* Type of connection */ + enum enum_vio_state state; /* State of the connection */ const char *desc; /* String description */ char *read_buffer; /* buffer for vio_read_buff */ char *read_pos; /* start of unfetched data in the diff --git a/mysql-test/mariadb-test-run.pl b/mysql-test/mariadb-test-run.pl index 4b3d323760d..d1124724e73 100755 --- a/mysql-test/mariadb-test-run.pl +++ b/mysql-test/mariadb-test-run.pl @@ -4486,6 +4486,13 @@ sub extract_warning_lines ($$) { qr/Slave I\/0: Master command COM_BINLOG_DUMP failed/, qr/Error reading packet/, qr/Lost connection to MariaDB server at 'reading initial communication packet'/, + qr/Could not read packet:.* state: [2-3] /, + qr/Could not read packet:.* errno: 104 /, + qr/Could not read packet:.* errno: 0 .* length: 0/, + qr/Could not write packet:.* errno: 32 /, + qr/Could not write packet:.* errno: 104 /, + qr/Semisync ack receiver got error 1158/, + qr/Connection was killed/, qr/Failed on request_dump/, qr/Slave: Can't drop database.* database doesn't exist/, qr/Slave: Operation DROP USER failed for 'create_rout_db'/, diff --git a/mysql-test/suite/binlog_encryption/rpl_packet.result b/mysql-test/suite/binlog_encryption/rpl_packet.result index 4a2a5d70d39..bb6269607fe 100644 --- a/mysql-test/suite/binlog_encryption/rpl_packet.result +++ b/mysql-test/suite/binlog_encryption/rpl_packet.result @@ -2,6 +2,8 @@ include/master-slave.inc [connection master] call mtr.add_suppression("Slave I/O: Got a packet bigger than 'slave_max_allowed_packet' bytes, .*error.* 1153"); call mtr.add_suppression("Log entry on master is longer than slave_max_allowed_packet"); +call mtr.add_suppression("Could not write packet:"); +call mtr.add_suppression("Got a packet bigger than 'max_allowed_packet' bytes"); drop database if exists DB_NAME_OF_MAX_LENGTH_AKA_NAME_LEN_64_BYTES_____________________; create database DB_NAME_OF_MAX_LENGTH_AKA_NAME_LEN_64_BYTES_____________________; connection master; diff --git a/mysql-test/suite/rpl/include/rpl_extra_col_master.test b/mysql-test/suite/rpl/include/rpl_extra_col_master.test index 02645a577f5..512118f91cd 100644 --- a/mysql-test/suite/rpl/include/rpl_extra_col_master.test +++ b/mysql-test/suite/rpl/include/rpl_extra_col_master.test @@ -59,6 +59,10 @@ #VARCHAR(M) # +--disable_query_log +call mtr.add_suppression("Could not read packet:.* errno: 11"); +--enable_query_log + --let $_saved_conn= $CURRENT_CONNECTION let $binformat = `SHOW VARIABLES LIKE '%binlog_format%'`; diff --git a/mysql-test/suite/rpl/r/rpl_binlog_dump_slave_gtid_state_info.result b/mysql-test/suite/rpl/r/rpl_binlog_dump_slave_gtid_state_info.result index 38a9afd4d02..a8c70ef4953 100644 --- a/mysql-test/suite/rpl/r/rpl_binlog_dump_slave_gtid_state_info.result +++ b/mysql-test/suite/rpl/r/rpl_binlog_dump_slave_gtid_state_info.result @@ -1,6 +1,7 @@ include/master-slave.inc [connection master] connection master; +SET @org_log_warnings=@@GLOBAL.LOG_WARNINGS; SET GLOBAL LOG_WARNINGS=2; connection slave; include/stop_slave.inc @@ -41,11 +42,11 @@ connection master; include/wait_for_pattern_in_file.inc FOUND 1 /using_gtid\(1\), gtid\(\'0-1-2,10-1-1\'\).*/ in mysqld.1.err "===== Clean up =====" +SET GLOBAL LOG_WARNINGS=@org_log_warnings; connection slave; include/stop_slave.inc CHANGE MASTER TO MASTER_USE_GTID=no; include/start_slave.inc connection master; DROP TABLE t; -SET GLOBAL LOG_WARNINGS=default; include/rpl_end.inc diff --git a/mysql-test/suite/rpl/r/rpl_domain_id_filter_master_crash.result b/mysql-test/suite/rpl/r/rpl_domain_id_filter_master_crash.result index a54ff99b591..cd13590b61f 100644 --- a/mysql-test/suite/rpl/r/rpl_domain_id_filter_master_crash.result +++ b/mysql-test/suite/rpl/r/rpl_domain_id_filter_master_crash.result @@ -1,9 +1,6 @@ include/master-slave.inc [connection master] connection master; -call mtr.add_suppression("mysqld: Table '.*gtid_slave_pos' is marked as crashed and should be repaired"); -call mtr.add_suppression("Checking table: './mysql/gtid_slave_pos'"); -call mtr.add_suppression("mysql.gtid_slave_pos: 1 client is using or hasn't closed the table properly"); SET @@session.gtid_domain_id= 0; create table ti (a int auto_increment primary key) engine=innodb; create table tm (a int auto_increment primary key) engine=myisam; diff --git a/mysql-test/suite/rpl/r/rpl_get_lock.result b/mysql-test/suite/rpl/r/rpl_get_lock.result index b852546e1bf..cbb02a32648 100644 --- a/mysql-test/suite/rpl/r/rpl_get_lock.result +++ b/mysql-test/suite/rpl/r/rpl_get_lock.result @@ -1,6 +1,6 @@ include/master-slave.inc [connection master] -CALL mtr.add_suppression("Unsafe statement written to the binary log using statement format since BINLOG_FORMAT = STATEMENT"); +SET GLOBAL LOG_WARNINGS=4; create table t1(n int); insert into t1 values(get_lock("lock",2)); disconnect master; @@ -35,4 +35,5 @@ NULL connection master1; drop table t1; connection slave; +connection default; include/rpl_end.inc diff --git a/mysql-test/suite/rpl/r/rpl_gtid_crash.result b/mysql-test/suite/rpl/r/rpl_gtid_crash.result index 4e74a3b25ac..36b0452bc50 100644 --- a/mysql-test/suite/rpl/r/rpl_gtid_crash.result +++ b/mysql-test/suite/rpl/r/rpl_gtid_crash.result @@ -4,6 +4,7 @@ connection server_1; call mtr.add_suppression("Checking table:"); call mtr.add_suppression("client is using or hasn't closed the table properly"); call mtr.add_suppression("Table .* is marked as crashed and should be repaired"); +call mtr.add_suppression("Could not read packet:.* errno: 11"); flush tables; ALTER TABLE mysql.gtid_slave_pos ENGINE=InnoDB; CREATE TABLE t1 (a INT PRIMARY KEY, b INT) ENGINE=InnoDB; diff --git a/mysql-test/suite/rpl/r/rpl_gtid_grouping.result b/mysql-test/suite/rpl/r/rpl_gtid_grouping.result index ad7d6116c49..1b4d86dd8e2 100644 --- a/mysql-test/suite/rpl/r/rpl_gtid_grouping.result +++ b/mysql-test/suite/rpl/r/rpl_gtid_grouping.result @@ -50,5 +50,4 @@ CHANGE MASTER TO MASTER_USE_GTID=no; include/start_slave.inc connection master; DROP TABLE t; -SET GLOBAL LOG_WARNINGS=default; include/rpl_end.inc diff --git a/mysql-test/suite/rpl/r/rpl_heartbeat_debug.result b/mysql-test/suite/rpl/r/rpl_heartbeat_debug.result index dc45c0b9ab3..a89010cd432 100644 --- a/mysql-test/suite/rpl/r/rpl_heartbeat_debug.result +++ b/mysql-test/suite/rpl/r/rpl_heartbeat_debug.result @@ -9,8 +9,6 @@ Variable_name Slave_heartbeat_period Value 60.000 SET @saved_dbug= @@GLOBAL.debug_dbug; SET GLOBAL debug_dbug="+d,simulate_slave_heartbeat_network_error"; -CALL mtr.add_suppression('SET @master_heartbeat_period to master failed with error'); -CALL mtr.add_suppression('Master command COM_REGISTER_SLAVE failed: failed registering on master, reconnecting to try again'); include/start_slave.inc connection master; drop table if exists t1; diff --git a/mysql-test/suite/rpl/r/rpl_packet.result b/mysql-test/suite/rpl/r/rpl_packet.result index 4a2a5d70d39..bb6269607fe 100644 --- a/mysql-test/suite/rpl/r/rpl_packet.result +++ b/mysql-test/suite/rpl/r/rpl_packet.result @@ -2,6 +2,8 @@ include/master-slave.inc [connection master] call mtr.add_suppression("Slave I/O: Got a packet bigger than 'slave_max_allowed_packet' bytes, .*error.* 1153"); call mtr.add_suppression("Log entry on master is longer than slave_max_allowed_packet"); +call mtr.add_suppression("Could not write packet:"); +call mtr.add_suppression("Got a packet bigger than 'max_allowed_packet' bytes"); drop database if exists DB_NAME_OF_MAX_LENGTH_AKA_NAME_LEN_64_BYTES_____________________; create database DB_NAME_OF_MAX_LENGTH_AKA_NAME_LEN_64_BYTES_____________________; connection master; diff --git a/mysql-test/suite/rpl/r/rpl_parallel_optimistic.result b/mysql-test/suite/rpl/r/rpl_parallel_optimistic.result index eff939512e2..0b317c4682b 100644 --- a/mysql-test/suite/rpl/r/rpl_parallel_optimistic.result +++ b/mysql-test/suite/rpl/r/rpl_parallel_optimistic.result @@ -626,6 +626,7 @@ include/save_master_gtid.inc connection server_2; include/sync_with_master_gtid.inc connection server_2; +SET @org_log_warnings=@@GLOBAL.LOG_WARNINGS; set global log_warnings=2; BEGIN; INSERT INTO t1 SET a=1; @@ -651,7 +652,7 @@ connection server_2; include/sync_with_master_gtid.inc connection server_2; include/stop_slave.inc -set global log_warnings=default; +set global log_warnings=@org_log_warnings; SET GLOBAL slave_parallel_mode=@old_parallel_mode; SET GLOBAL slave_parallel_threads=@old_parallel_threads; include/start_slave.inc diff --git a/mysql-test/suite/rpl/r/rpl_parallel_optimistic_xa.result b/mysql-test/suite/rpl/r/rpl_parallel_optimistic_xa.result index 4136f1885db..90c3e5db614 100644 --- a/mysql-test/suite/rpl/r/rpl_parallel_optimistic_xa.result +++ b/mysql-test/suite/rpl/r/rpl_parallel_optimistic_xa.result @@ -32,7 +32,6 @@ include/diff_tables.inc [master:t0, slave:t0] include/diff_tables.inc [master:t1, slave:t1] connection slave; include/stop_slave.inc -set global log_warnings=default; SET GLOBAL slave_parallel_mode=@old_parallel_mode; SET GLOBAL slave_parallel_threads=@old_parallel_threads; include/start_slave.inc diff --git a/mysql-test/suite/rpl/r/rpl_parallel_optimistic_xa_lsu_off.result b/mysql-test/suite/rpl/r/rpl_parallel_optimistic_xa_lsu_off.result index 4136f1885db..90c3e5db614 100644 --- a/mysql-test/suite/rpl/r/rpl_parallel_optimistic_xa_lsu_off.result +++ b/mysql-test/suite/rpl/r/rpl_parallel_optimistic_xa_lsu_off.result @@ -32,7 +32,6 @@ include/diff_tables.inc [master:t0, slave:t0] include/diff_tables.inc [master:t1, slave:t1] connection slave; include/stop_slave.inc -set global log_warnings=default; SET GLOBAL slave_parallel_mode=@old_parallel_mode; SET GLOBAL slave_parallel_threads=@old_parallel_threads; include/start_slave.inc diff --git a/mysql-test/suite/rpl/r/rpl_row_find_row_debug.result b/mysql-test/suite/rpl/r/rpl_row_find_row_debug.result index f1a0059a04f..c2bb256a750 100644 --- a/mysql-test/suite/rpl/r/rpl_row_find_row_debug.result +++ b/mysql-test/suite/rpl/r/rpl_row_find_row_debug.result @@ -20,6 +20,5 @@ FOUND 1 /The slave is applying a ROW event on behalf of an UPDATE statement on t FOUND 1 /The slave is applying a ROW event on behalf of a DELETE statement on table t1 and is currently taking a considerable amount/ in mysqld.2.err include/stop_slave.inc SET @@GLOBAL.debug_dbug = @saved_dbug; -SET GLOBAL log_warnings = 2; include/start_slave.inc include/rpl_end.inc diff --git a/mysql-test/suite/rpl/r/rpl_semi_sync_slave_enabled_consistent.result b/mysql-test/suite/rpl/r/rpl_semi_sync_slave_enabled_consistent.result index 99c3124957f..11cdb965dd6 100644 --- a/mysql-test/suite/rpl/r/rpl_semi_sync_slave_enabled_consistent.result +++ b/mysql-test/suite/rpl/r/rpl_semi_sync_slave_enabled_consistent.result @@ -2,6 +2,8 @@ include/master-slave.inc [connection master] call mtr.add_suppression("Replication event checksum verification failed"); call mtr.add_suppression("could not queue event from master"); +call mtr.add_suppression("Semisync ack receiver.*error reading communication packets"); +call mtr.add_suppression("Semisync ack receiver got hangup"); # # Set up a semisync connection connection master; diff --git a/mysql-test/suite/rpl/r/show_status_stop_slave_race-7126.result b/mysql-test/suite/rpl/r/show_status_stop_slave_race-7126.result index 999d9417b3f..35cb3fcdc13 100644 --- a/mysql-test/suite/rpl/r/show_status_stop_slave_race-7126.result +++ b/mysql-test/suite/rpl/r/show_status_stop_slave_race-7126.result @@ -1,6 +1,5 @@ include/master-slave.inc [connection master] -call mtr.add_suppression("Master is configured to log replication events"); connection slave; connection slave; include/wait_for_slave_to_stop.inc diff --git a/mysql-test/suite/rpl/t/rpl_binlog_dump_slave_gtid_state_info.test b/mysql-test/suite/rpl/t/rpl_binlog_dump_slave_gtid_state_info.test index 942a23576c4..b77aa7797e3 100644 --- a/mysql-test/suite/rpl/t/rpl_binlog_dump_slave_gtid_state_info.test +++ b/mysql-test/suite/rpl/t/rpl_binlog_dump_slave_gtid_state_info.test @@ -39,6 +39,7 @@ --source include/master-slave.inc --connection master +SET @org_log_warnings=@@GLOBAL.LOG_WARNINGS; SET GLOBAL LOG_WARNINGS=2; --connection slave @@ -110,6 +111,7 @@ CHANGE MASTER TO MASTER_USE_GTID=slave_pos; --source include/wait_for_pattern_in_file.inc --echo "===== Clean up =====" +SET GLOBAL LOG_WARNINGS=@org_log_warnings; --connection slave --source include/stop_slave.inc CHANGE MASTER TO MASTER_USE_GTID=no; @@ -117,5 +119,4 @@ CHANGE MASTER TO MASTER_USE_GTID=no; --connection master DROP TABLE t; -SET GLOBAL LOG_WARNINGS=default; --source include/rpl_end.inc diff --git a/mysql-test/suite/rpl/t/rpl_domain_id_filter_master_crash.test b/mysql-test/suite/rpl/t/rpl_domain_id_filter_master_crash.test index cdfdc098f5a..2ce273860fe 100644 --- a/mysql-test/suite/rpl/t/rpl_domain_id_filter_master_crash.test +++ b/mysql-test/suite/rpl/t/rpl_domain_id_filter_master_crash.test @@ -6,9 +6,12 @@ connection master; +--disable_query_log call mtr.add_suppression("mysqld: Table '.*gtid_slave_pos' is marked as crashed and should be repaired"); call mtr.add_suppression("Checking table: './mysql/gtid_slave_pos'"); call mtr.add_suppression("mysql.gtid_slave_pos: 1 client is using or hasn't closed the table properly"); +call mtr.add_suppression("Could not read packet:.* errno: 11"); +--enable_query_log SET @@session.gtid_domain_id= 0; create table ti (a int auto_increment primary key) engine=innodb; diff --git a/mysql-test/suite/rpl/t/rpl_dump_request_retry_warning.test b/mysql-test/suite/rpl/t/rpl_dump_request_retry_warning.test index 1ee043623ae..633071a89b3 100644 --- a/mysql-test/suite/rpl/t/rpl_dump_request_retry_warning.test +++ b/mysql-test/suite/rpl/t/rpl_dump_request_retry_warning.test @@ -30,6 +30,10 @@ --let $rpl_skip_start_slave=1 --source include/master-slave.inc +--disable_query_log +call mtr.add_suppression("Could not read packet:.* errno: 11"); +--enable_query_log + # Do an insert on master CREATE TABLE t1(a int); INSERT INTO t1 VALUES(1); diff --git a/mysql-test/suite/rpl/t/rpl_get_lock.test b/mysql-test/suite/rpl/t/rpl_get_lock.test index b5c08858055..f0a11e712f6 100644 --- a/mysql-test/suite/rpl/t/rpl_get_lock.test +++ b/mysql-test/suite/rpl/t/rpl_get_lock.test @@ -1,6 +1,16 @@ source include/master-slave.inc; +--disable_query_log CALL mtr.add_suppression("Unsafe statement written to the binary log using statement format since BINLOG_FORMAT = STATEMENT"); +call mtr.add_suppression("Could not read packet:.* errno: 11 "); +# The following one comes from calling dirty_close on client side +call mtr.add_suppression("Could not read packet:.* errno: 2 "); +--enable_query_log + +let $org_log_warnings=`select @@global.log_warnings`; + +# Test extended warnings +SET GLOBAL LOG_WARNINGS=4; create table t1(n int); # Use of get_lock gives a warning for unsafeness if binlog_format=statement @@ -41,6 +51,10 @@ connection master1; drop table t1; sync_slave_with_master; +connection default; +--disable_query_log +--eval SET GLOBAL LOG_WARNINGS=$org_log_warnings; +--enable_query_log --source include/rpl_end.inc diff --git a/mysql-test/suite/rpl/t/rpl_gtid_crash.test b/mysql-test/suite/rpl/t/rpl_gtid_crash.test index f2167911101..84dd9aa65e0 100644 --- a/mysql-test/suite/rpl/t/rpl_gtid_crash.test +++ b/mysql-test/suite/rpl/t/rpl_gtid_crash.test @@ -12,6 +12,7 @@ call mtr.add_suppression("Checking table:"); call mtr.add_suppression("client is using or hasn't closed the table properly"); call mtr.add_suppression("Table .* is marked as crashed and should be repaired"); +call mtr.add_suppression("Could not read packet:.* errno: 11"); flush tables; ALTER TABLE mysql.gtid_slave_pos ENGINE=InnoDB; diff --git a/mysql-test/suite/rpl/t/rpl_gtid_crash_myisam.test b/mysql-test/suite/rpl/t/rpl_gtid_crash_myisam.test index faf388f5bed..e113e17b7ec 100644 --- a/mysql-test/suite/rpl/t/rpl_gtid_crash_myisam.test +++ b/mysql-test/suite/rpl/t/rpl_gtid_crash_myisam.test @@ -5,6 +5,10 @@ --let $rpl_topology=1->2 --source include/rpl_init.inc +--disable_query_log +call mtr.add_suppression("Could not read packet:.* errno: 11"); +--enable_query_log + --echo *** Test crashing master with InnoDB disabled, the binlog gtid state should still be correctly recovered. *** --connection server_1 diff --git a/mysql-test/suite/rpl/t/rpl_gtid_grouping.test b/mysql-test/suite/rpl/t/rpl_gtid_grouping.test index 66448c4f96c..bb1057a6e4f 100644 --- a/mysql-test/suite/rpl/t/rpl_gtid_grouping.test +++ b/mysql-test/suite/rpl/t/rpl_gtid_grouping.test @@ -93,5 +93,4 @@ CHANGE MASTER TO MASTER_USE_GTID=no; --connection master DROP TABLE t; -SET GLOBAL LOG_WARNINGS=default; --source include/rpl_end.inc diff --git a/mysql-test/suite/rpl/t/rpl_heartbeat_debug.test b/mysql-test/suite/rpl/t/rpl_heartbeat_debug.test index bd66a249ada..e593786655b 100644 --- a/mysql-test/suite/rpl/t/rpl_heartbeat_debug.test +++ b/mysql-test/suite/rpl/t/rpl_heartbeat_debug.test @@ -3,6 +3,12 @@ --source include/have_debug.inc --source include/master-slave.inc +--disable_query_log +CALL mtr.add_suppression('SET @master_heartbeat_period to master failed with error'); +CALL mtr.add_suppression('Master command COM_REGISTER_SLAVE failed: failed registering on master, reconnecting to try again'); +call mtr.add_suppression("Could not read packet:.* errno: 11"); +--enable_query_log + connection slave; --source include/stop_slave.inc set @restore_slave_net_timeout= @@global.slave_net_timeout; @@ -14,14 +20,13 @@ set @@global.slave_net_timeout= 10; ### Checking the range ### -# + # default period slave_net_timeout/2 # --query_vertical show status like 'Slave_heartbeat_period'; SET @saved_dbug= @@GLOBAL.debug_dbug; SET GLOBAL debug_dbug="+d,simulate_slave_heartbeat_network_error"; -CALL mtr.add_suppression('SET @master_heartbeat_period to master failed with error'); -CALL mtr.add_suppression('Master command COM_REGISTER_SLAVE failed: failed registering on master, reconnecting to try again'); + --source include/start_slave.inc diff --git a/mysql-test/suite/rpl/t/rpl_packet.test b/mysql-test/suite/rpl/t/rpl_packet.test index cbde486bcbb..f1814e61f9b 100644 --- a/mysql-test/suite/rpl/t/rpl_packet.test +++ b/mysql-test/suite/rpl/t/rpl_packet.test @@ -20,6 +20,9 @@ source include/master-slave.inc; call mtr.add_suppression("Slave I/O: Got a packet bigger than 'slave_max_allowed_packet' bytes, .*error.* 1153"); call mtr.add_suppression("Log entry on master is longer than slave_max_allowed_packet"); +call mtr.add_suppression("Could not write packet:"); +call mtr.add_suppression("Got a packet bigger than 'max_allowed_packet' bytes"); + let $db= DB_NAME_OF_MAX_LENGTH_AKA_NAME_LEN_64_BYTES_____________________; disable_warnings; eval drop database if exists $db; diff --git a/mysql-test/suite/rpl/t/rpl_parallel_optimistic.test b/mysql-test/suite/rpl/t/rpl_parallel_optimistic.test index f5e48282326..09da8c2df15 100644 --- a/mysql-test/suite/rpl/t/rpl_parallel_optimistic.test +++ b/mysql-test/suite/rpl/t/rpl_parallel_optimistic.test @@ -508,6 +508,7 @@ DELETE FROM t2; # The 1st of the following two trx:s a blocker on slave --connection server_2 +SET @org_log_warnings=@@GLOBAL.LOG_WARNINGS; set global log_warnings=2; BEGIN; INSERT INTO t1 SET a=1; @@ -555,7 +556,7 @@ DELETE FROM t2; # --connection server_2 --source include/stop_slave.inc -set global log_warnings=default; +set global log_warnings=@org_log_warnings; SET GLOBAL slave_parallel_mode=@old_parallel_mode; SET GLOBAL slave_parallel_threads=@old_parallel_threads; --source include/start_slave.inc diff --git a/mysql-test/suite/rpl/t/rpl_parallel_optimistic_xa.test b/mysql-test/suite/rpl/t/rpl_parallel_optimistic_xa.test index 35c22d1e92e..6e5cf4367a7 100644 --- a/mysql-test/suite/rpl/t/rpl_parallel_optimistic_xa.test +++ b/mysql-test/suite/rpl/t/rpl_parallel_optimistic_xa.test @@ -206,7 +206,6 @@ while($i > 0) # --connection slave --source include/stop_slave.inc -set global log_warnings=default; SET GLOBAL slave_parallel_mode=@old_parallel_mode; SET GLOBAL slave_parallel_threads=@old_parallel_threads; --source include/start_slave.inc diff --git a/mysql-test/suite/rpl/t/rpl_row_find_row_debug.test b/mysql-test/suite/rpl/t/rpl_row_find_row_debug.test index e3edabe239d..f06b2982ec5 100644 --- a/mysql-test/suite/rpl/t/rpl_row_find_row_debug.test +++ b/mysql-test/suite/rpl/t/rpl_row_find_row_debug.test @@ -51,7 +51,9 @@ DROP TABLE t1; # cleanup --source include/stop_slave.inc SET @@GLOBAL.debug_dbug = @saved_dbug; +--disable_query_log --eval SET GLOBAL log_warnings = $log_warnings_save +--enable_query_log --source include/start_slave.inc --source include/rpl_end.inc diff --git a/mysql-test/suite/rpl/t/rpl_semi_sync_fail_over.test b/mysql-test/suite/rpl/t/rpl_semi_sync_fail_over.test index 17d7b50d614..e1e7a4f1de3 100644 --- a/mysql-test/suite/rpl/t/rpl_semi_sync_fail_over.test +++ b/mysql-test/suite/rpl/t/rpl_semi_sync_fail_over.test @@ -10,6 +10,11 @@ --source include/have_binlog_format_row.inc --source include/master-slave.inc +--disable_query_log +call mtr.add_suppression("Could not read packet:.* errno: 11"); +flush tables; +--enable_query_log + # Initial slave --connection server_2 --source include/stop_slave.inc diff --git a/mysql-test/suite/rpl/t/rpl_semi_sync_shutdown_await_ack.cnf b/mysql-test/suite/rpl/t/rpl_semi_sync_shutdown_await_ack.cnf index 2cf1b1786bd..5f69b557188 100644 --- a/mysql-test/suite/rpl/t/rpl_semi_sync_shutdown_await_ack.cnf +++ b/mysql-test/suite/rpl/t/rpl_semi_sync_shutdown_await_ack.cnf @@ -1,13 +1,13 @@ !include ../my.cnf [mysqld.1] -log_warnings=9 +log_warnings=3 [mysqld.2] -log_warnings=9 +log_warnings=3 [mysqld.3] -log_warnings=9 +log_warnings=3 [ENV] SERVER_MYPORT_3= @mysqld.3.port diff --git a/mysql-test/suite/rpl/t/rpl_semi_sync_slave_enabled_consistent.test b/mysql-test/suite/rpl/t/rpl_semi_sync_slave_enabled_consistent.test index 9e388ab4419..ed78eff52ea 100644 --- a/mysql-test/suite/rpl/t/rpl_semi_sync_slave_enabled_consistent.test +++ b/mysql-test/suite/rpl/t/rpl_semi_sync_slave_enabled_consistent.test @@ -14,7 +14,8 @@ call mtr.add_suppression("Replication event checksum verification failed"); call mtr.add_suppression("could not queue event from master"); - +call mtr.add_suppression("Semisync ack receiver.*error reading communication packets"); +call mtr.add_suppression("Semisync ack receiver got hangup"); --echo # --echo # Set up a semisync connection --connection master diff --git a/mysql-test/suite/rpl/t/show_status_stop_slave_race-7126.test b/mysql-test/suite/rpl/t/show_status_stop_slave_race-7126.test index cd0f8aad106..815b2537e93 100644 --- a/mysql-test/suite/rpl/t/show_status_stop_slave_race-7126.test +++ b/mysql-test/suite/rpl/t/show_status_stop_slave_race-7126.test @@ -3,7 +3,10 @@ # --source include/master-slave.inc +--disable_query_log call mtr.add_suppression("Master is configured to log replication events"); +call mtr.add_suppression("Could not read packet:.* errno: 11"); +--enable_query_log --connection slave diff --git a/sql/net_serv.cc b/sql/net_serv.cc index 3dff8442c6a..b1009b6552b 100644 --- a/sql/net_serv.cc +++ b/sql/net_serv.cc @@ -74,7 +74,13 @@ static int inline EXTRA_DEBUG_fflush(...) { return 0; } #ifdef MYSQL_SERVER #include #include -#define MYSQL_SERVER_my_error my_error + +static void inline MYSQL_SERVER_my_error(uint error, myf flags) +{ + my_error(error, + flags | MYF(global_system_variables.log_warnings > 3 ? ME_ERROR_LOG : 0)); +} + #else static void inline MYSQL_SERVER_my_error(...) {} #endif @@ -766,7 +772,19 @@ net_real_write(NET *net,const uchar *packet, size_t len) #endif /* !defined(MYSQL_SERVER) */ net->error= 2; /* Close socket */ net->last_errno= (interrupted ? ER_NET_WRITE_INTERRUPTED : - ER_NET_ERROR_ON_WRITE); + ER_NET_ERROR_ON_WRITE); +#ifdef MYSQL_SERVER + if (global_system_variables.log_warnings > 3) + { + my_printf_error(net->last_errno, + "Could not write packet: fd: %lld state: %d " + "errno: %d vio_errno: %d length: %ld", + MYF(ME_ERROR_LOG), + (longlong) vio_fd(net->vio), (int) net->vio->state, + vio_errno(net->vio), net->last_errno, (ulong) (end-pos)); + break; + } +#endif MYSQL_SERVER_my_error(net->last_errno, MYF(0)); break; } @@ -1064,20 +1082,34 @@ retry: (longlong) vio_fd(net->vio)); } #ifndef MYSQL_SERVER - if (length != 0 && vio_errno(net->vio) == SOCKET_EINTR) + if (length != 0 && vio_should_retry(net->vio)) { DBUG_PRINT("warning",("Interrupted read. Retrying...")); continue; } #endif - DBUG_PRINT("error",("Couldn't read packet: remain: %u errno: %d length: %ld", + DBUG_PRINT("error",("Could not read packet: remain: %u errno: %d length: %ld", remain, vio_errno(net->vio), (long) length)); len= packet_error; net->error= 2; /* Close socket */ net->last_errno= (vio_was_timeout(net->vio) ? - ER_NET_READ_INTERRUPTED : - ER_NET_READ_ERROR); - MYSQL_SERVER_my_error(net->last_errno, MYF(0)); + ER_NET_READ_INTERRUPTED : + ER_NET_READ_ERROR); +#ifdef MYSQL_SERVER + if (global_system_variables.log_warnings > 3) + { + my_printf_error(net->last_errno, + "Could not read packet: fd: %lld state: %d " + "remain: %u errno: %d vio_errno: %d " + "length: %lld", + MYF(ME_ERROR_LOG), + (longlong) vio_fd(net->vio), (int) net->vio->state, + remain, vio_errno(net->vio), net->last_errno, + (longlong) length); + } + else + my_error(net->last_errno, MYF(0)); +#endif /* MYSQL_SERVER */ goto end; } remain -= (uint32) length; @@ -1282,7 +1314,10 @@ ulong my_net_read_packet(NET *net, my_bool read_from_server) { ulong reallen = 0; - return my_net_read_packet_reallen(net, read_from_server, &reallen); + ulong length; + DBUG_ENTER("my_net_read_packet"); + length= my_net_read_packet_reallen(net, read_from_server, &reallen); + DBUG_RETURN(length); } diff --git a/sql/semisync_master_ack_receiver.cc b/sql/semisync_master_ack_receiver.cc index 9320c7fc3e6..bcc5bda8220 100644 --- a/sql/semisync_master_ack_receiver.cc +++ b/sql/semisync_master_ack_receiver.cc @@ -149,7 +149,7 @@ bool Ack_receiver::add_slave(THD *thd) slave->thd= thd; slave->vio= *thd->net.vio; slave->vio.mysql_socket.m_psi= NULL; - slave->vio.read_timeout= 1; + slave->vio.read_timeout= 1; // 1 ms mysql_mutex_lock(&m_mutex); @@ -337,6 +337,17 @@ void Ack_receiver::run() */ net.compress= slave->thd->net.compress; + if (unlikely(listener.is_socket_hangup(slave))) + { + if (global_system_variables.log_warnings > 2) + sql_print_warning("Semisync ack receiver got hangup " + "from slave server-id %d", + slave->server_id()); + it.remove(); + m_slaves_changed= true; + continue; + } + len= my_net_read(&net); if (likely(len != packet_error)) { diff --git a/sql/semisync_master_ack_receiver.h b/sql/semisync_master_ack_receiver.h index 817df513069..6b3ff3695d0 100644 --- a/sql/semisync_master_ack_receiver.h +++ b/sql/semisync_master_ack_receiver.h @@ -228,6 +228,11 @@ public: return m_fds[slave->m_fds_index].revents & POLLIN; } + bool is_socket_hangup(const Slave *slave) + { + return m_fds[slave->m_fds_index].revents & POLLHUP; + } + void clear_socket_info(const Slave *slave) { m_fds[slave->m_fds_index].fd= -1; @@ -296,6 +301,11 @@ public: return FD_ISSET(slave->sock_fd(), &m_fds); } + bool is_socket_hangup(const Slave *slave) + { + return 0; + } + bool has_signal_data() override { return FD_ISSET(local_read_signal, &m_fds); diff --git a/sql/slave.cc b/sql/slave.cc index 61c638c9fdf..b5558868cac 100644 --- a/sql/slave.cc +++ b/sql/slave.cc @@ -1340,6 +1340,8 @@ static bool io_slave_killed(Master_info* mi) DBUG_ENTER("io_slave_killed"); DBUG_ASSERT(mi->slave_running); // tracking buffer overrun + if (mi->abort_slave || mi->io_thd->killed) + DBUG_PRINT("info", ("killed")); DBUG_RETURN(mi->abort_slave || mi->io_thd->killed); } diff --git a/vio/vio.c b/vio/vio.c index 7a98eb2af7b..bf1e79ae36b 100644 --- a/vio/vio.c +++ b/vio/vio.c @@ -79,6 +79,7 @@ static my_bool has_no_data(Vio *vio __attribute__((unused))) int vio_pipe_shutdown(Vio *vio, int how) { vio->shutdown_flag= how; + vio->state= VIO_STATE_SHUTDOWN; return CancelIoEx(vio->hPipe, NULL); } #endif @@ -98,6 +99,7 @@ static void vio_init(Vio *vio, enum enum_vio_type type, #endif memset(vio, 0, sizeof(*vio)); vio->type= type; + vio->state= VIO_STATE_ACTIVE; vio->mysql_socket= MYSQL_INVALID_SOCKET; mysql_socket_setfd(&vio->mysql_socket, sd); vio->localhost= flags & VIO_LOCALHOST; diff --git a/vio/viosocket.c b/vio/viosocket.c index 002ff274b74..bb6baac571e 100644 --- a/vio/viosocket.c +++ b/vio/viosocket.c @@ -288,12 +288,18 @@ size_t vio_write(Vio *vio, const uchar* buf, size_t size) int vio_socket_shutdown(Vio *vio, int how) { - int ret= shutdown(mysql_socket_getfd(vio->mysql_socket), how); + int ret; + DBUG_ENTER("vio_socket_shutdown"); + DBUG_PRINT("enter", ("sd: %d", (int)mysql_socket_getfd(vio->mysql_socket))); + + vio->state= VIO_STATE_SHUTDOWN; + ret= shutdown(mysql_socket_getfd(vio->mysql_socket), how); + #ifdef _WIN32 /* Cancel possible IO in progress (shutdown does not do that on Windows). */ (void) CancelIoEx((HANDLE)mysql_socket_getfd(vio->mysql_socket), NULL); #endif - return ret; + DBUG_RETURN(ret); } @@ -552,8 +558,10 @@ my_bool vio_should_retry(Vio *vio) { DBUG_ENTER("vio_should_retry"); - DBUG_PRINT("info", ("vio_errno: %d", vio_errno(vio))); - DBUG_RETURN(vio_errno(vio) == SOCKET_EINTR); + DBUG_PRINT("info", ("vio_errno: %d state: %d", + vio_errno(vio), (int) vio->state)); + DBUG_RETURN(vio_errno(vio) == SOCKET_EINTR && + vio->state < VIO_STATE_SHUTDOWN); } @@ -576,28 +584,30 @@ vio_was_timeout(Vio *vio) int vio_close(Vio *vio) { - int r=0; DBUG_ENTER("vio_close"); DBUG_PRINT("enter", ("sd: %d", (int)mysql_socket_getfd(vio->mysql_socket))); if (vio->type != VIO_CLOSED) { + MYSQL_SOCKET mysql_socket= vio->mysql_socket; DBUG_ASSERT(vio->type == VIO_TYPE_TCPIP || - vio->type == VIO_TYPE_SOCKET || - vio->type == VIO_TYPE_SSL); + vio->type == VIO_TYPE_SOCKET || + vio->type == VIO_TYPE_SSL); - DBUG_ASSERT(mysql_socket_getfd(vio->mysql_socket) >= 0); - if (mysql_socket_close(vio->mysql_socket)) - r= -1; + + vio->type= VIO_CLOSED; + vio->state= VIO_STATE_CLOSED; + vio->mysql_socket= MYSQL_INVALID_SOCKET; + + DBUG_ASSERT(mysql_socket_getfd(mysql_socket) >= 0); + if (mysql_socket_close(mysql_socket)) + { + DBUG_PRINT("vio_error", ("close() failed, error: %d",socket_errno)); + /* FIXME: error handling (not critical for MySQL) */ + DBUG_RETURN(-1); + } } - if (r) - { - DBUG_PRINT("vio_error", ("close() failed, error: %d",socket_errno)); - /* FIXME: error handling (not critical for MySQL) */ - } - vio->type= VIO_CLOSED; - vio->mysql_socket= MYSQL_INVALID_SOCKET; - DBUG_RETURN(r); + DBUG_RETURN(0); } @@ -917,7 +927,17 @@ int vio_io_wait(Vio *vio, enum enum_vio_io_event event, int timeout) my_socket sd= mysql_socket_getfd(vio->mysql_socket); MYSQL_SOCKET_WAIT_VARIABLES(locker, state) /* no ';' */ DBUG_ENTER("vio_io_wait"); - DBUG_PRINT("enter", ("timeout: %d", timeout)); + DBUG_PRINT("enter", ("sd: %d timeout: %d", + (int) mysql_socket_getfd(vio->mysql_socket), + timeout)); + + if (vio->state >= VIO_STATE_SHUTDOWN) + { + DBUG_PRINT("info", ("vio deactivated. state: %ld", (int) vio->state)); + /* Socket is shutdown or closed */ + errno= EIO; + DBUG_RETURN(-1); + } memset(&pfd, 0, sizeof(pfd)); @@ -948,7 +968,7 @@ int vio_io_wait(Vio *vio, enum enum_vio_io_event event, int timeout) switch ((ret= poll(&pfd, 1, timeout))) { case -1: - DBUG_PRINT("error", ("poll returned -1")); + DBUG_PRINT("error", ("poll returned -1 errno: %d", vio_errno(vio))); /* On error, -1 is returned. */ break; case 0: @@ -980,6 +1000,14 @@ int vio_io_wait(Vio *vio, enum enum_vio_io_event event, int timeout) MYSQL_SOCKET_WAIT_VARIABLES(locker, state) /* no ';' */ DBUG_ENTER("vio_io_wait"); + if (vio->state >= VIO_STATE_SHUTDOWN) + { + DBUG_PRINT("info", ("vio deactive. state: %ld", (int) vio->state)); + /* Socket is shutdown or closed */ + errno= EIO; + DBUG_RETURN(-1); + } + /* Convert the timeout, in milliseconds, to seconds and microseconds. */ if (timeout >= 0) { @@ -1153,12 +1181,16 @@ my_bool vio_is_connected(Vio *vio) uint bytes= 0; DBUG_ENTER("vio_is_connected"); + if (vio->state >= VIO_STATE_SHUTDOWN) + DBUG_RETURN(FALSE); + /* The first step of detecting an EOF condition is verifying whether there is data to read. Data in this case would be the EOF. An exceptional condition event and/or errors are interpreted as if there is data to read. */ + if (!vio_io_wait(vio, VIO_IO_EVENT_READ, 0)) DBUG_RETURN(TRUE);