MDEV-33582 Add more warnings to be able to better diagnose network issues

Warnings are added to net_server.cc when
global_system_variables.log_warnings >= 4.

When the above condition holds then:
- All communication errors from net_serv.cc is also written to the
  error log.
- In case of a of not being able to read or write a packet, a more
  detailed error is given.

Other things:
- Added detection of slaves that has hangup to Ack_receiver::run()
- vio_close() is now first marking the socket closed before closing it.
  The reason for this is to ensure that the connection that gets a read
  error can check if the reason was that the socket was closed.
- Add a new state to vio to be able to detect if vio is acive, shutdown or
  closed. This is used to detect if socket is closed by another thread.
- Testing of the new warnings is done in rpl_get_lock.test
- Suppress some of the new warnings in mtr to allow one to run some of
  the tests with -mysqld=--log-warnings=4. All test in the 'rpl' suite
  can now be run with this option.
 - Ensure that global.log_warnings are restored at test end in a way
   that allows one to use mtr --mysqld=--log-warnings=4.

Reviewed-by: <serg@mariadb.org>,<brandon.nesterenko@mariadb.com>
This commit is contained in:
Monty 2024-03-01 18:16:33 +02:00
parent 48f42ab2e5
commit 567c097359
40 changed files with 211 additions and 53 deletions

View file

@ -962,6 +962,7 @@ typedef struct st_mysql_lex_string LEX_STRING;
#define SOCKET_ECONNRESET WSAECONNRESET #define SOCKET_ECONNRESET WSAECONNRESET
#define SOCKET_ENFILE ENFILE #define SOCKET_ENFILE ENFILE
#define SOCKET_EMFILE EMFILE #define SOCKET_EMFILE EMFILE
#define SOCKET_CLOSED EIO
#else /* Unix */ #else /* Unix */
#define socket_errno errno #define socket_errno errno
#define closesocket(A) close(A) #define closesocket(A) close(A)
@ -971,6 +972,7 @@ typedef struct st_mysql_lex_string LEX_STRING;
#define SOCKET_EADDRINUSE EADDRINUSE #define SOCKET_EADDRINUSE EADDRINUSE
#define SOCKET_ETIMEDOUT ETIMEDOUT #define SOCKET_ETIMEDOUT ETIMEDOUT
#define SOCKET_ECONNRESET ECONNRESET #define SOCKET_ECONNRESET ECONNRESET
#define SOCKET_CLOSED EIO
#define SOCKET_ENFILE ENFILE #define SOCKET_ENFILE ENFILE
#define SOCKET_EMFILE EMFILE #define SOCKET_EMFILE EMFILE
#endif #endif

View file

@ -41,6 +41,13 @@ enum enum_vio_type
VIO_TYPE_SSL VIO_TYPE_SSL
/* see also vio_type_names[] */ /* see also vio_type_names[] */
}; };
enum enum_vio_state
{
VIO_STATE_NOT_INITIALIZED, VIO_STATE_ACTIVE, VIO_STATE_SHUTDOWN,
VIO_STATE_CLOSED
};
#define FIRST_VIO_TYPE VIO_CLOSED #define FIRST_VIO_TYPE VIO_CLOSED
#define LAST_VIO_TYPE VIO_TYPE_SSL #define LAST_VIO_TYPE VIO_TYPE_SSL
@ -244,6 +251,7 @@ struct st_vio
struct sockaddr_storage local; /* Local internet address */ struct sockaddr_storage local; /* Local internet address */
struct sockaddr_storage remote; /* Remote internet address */ struct sockaddr_storage remote; /* Remote internet address */
enum enum_vio_type type; /* Type of connection */ enum enum_vio_type type; /* Type of connection */
enum enum_vio_state state; /* State of the connection */
const char *desc; /* String description */ const char *desc; /* String description */
char *read_buffer; /* buffer for vio_read_buff */ char *read_buffer; /* buffer for vio_read_buff */
char *read_pos; /* start of unfetched data in the char *read_pos; /* start of unfetched data in the

View file

@ -4486,6 +4486,13 @@ sub extract_warning_lines ($$) {
qr/Slave I\/0: Master command COM_BINLOG_DUMP failed/, qr/Slave I\/0: Master command COM_BINLOG_DUMP failed/,
qr/Error reading packet/, qr/Error reading packet/,
qr/Lost connection to MariaDB server at 'reading initial communication packet'/, qr/Lost connection to MariaDB server at 'reading initial communication packet'/,
qr/Could not read packet:.* state: [2-3] /,
qr/Could not read packet:.* errno: 104 /,
qr/Could not read packet:.* errno: 0 .* length: 0/,
qr/Could not write packet:.* errno: 32 /,
qr/Could not write packet:.* errno: 104 /,
qr/Semisync ack receiver got error 1158/,
qr/Connection was killed/,
qr/Failed on request_dump/, qr/Failed on request_dump/,
qr/Slave: Can't drop database.* database doesn't exist/, qr/Slave: Can't drop database.* database doesn't exist/,
qr/Slave: Operation DROP USER failed for 'create_rout_db'/, qr/Slave: Operation DROP USER failed for 'create_rout_db'/,

View file

@ -2,6 +2,8 @@ include/master-slave.inc
[connection master] [connection master]
call mtr.add_suppression("Slave I/O: Got a packet bigger than 'slave_max_allowed_packet' bytes, .*error.* 1153"); call mtr.add_suppression("Slave I/O: Got a packet bigger than 'slave_max_allowed_packet' bytes, .*error.* 1153");
call mtr.add_suppression("Log entry on master is longer than slave_max_allowed_packet"); call mtr.add_suppression("Log entry on master is longer than slave_max_allowed_packet");
call mtr.add_suppression("Could not write packet:");
call mtr.add_suppression("Got a packet bigger than 'max_allowed_packet' bytes");
drop database if exists DB_NAME_OF_MAX_LENGTH_AKA_NAME_LEN_64_BYTES_____________________; drop database if exists DB_NAME_OF_MAX_LENGTH_AKA_NAME_LEN_64_BYTES_____________________;
create database DB_NAME_OF_MAX_LENGTH_AKA_NAME_LEN_64_BYTES_____________________; create database DB_NAME_OF_MAX_LENGTH_AKA_NAME_LEN_64_BYTES_____________________;
connection master; connection master;

View file

@ -59,6 +59,10 @@
#VARCHAR(M) #VARCHAR(M)
# #
--disable_query_log
call mtr.add_suppression("Could not read packet:.* errno: 11");
--enable_query_log
--let $_saved_conn= $CURRENT_CONNECTION --let $_saved_conn= $CURRENT_CONNECTION
let $binformat = `SHOW VARIABLES LIKE '%binlog_format%'`; let $binformat = `SHOW VARIABLES LIKE '%binlog_format%'`;

View file

@ -1,6 +1,7 @@
include/master-slave.inc include/master-slave.inc
[connection master] [connection master]
connection master; connection master;
SET @org_log_warnings=@@GLOBAL.LOG_WARNINGS;
SET GLOBAL LOG_WARNINGS=2; SET GLOBAL LOG_WARNINGS=2;
connection slave; connection slave;
include/stop_slave.inc include/stop_slave.inc
@ -41,11 +42,11 @@ connection master;
include/wait_for_pattern_in_file.inc include/wait_for_pattern_in_file.inc
FOUND 1 /using_gtid\(1\), gtid\(\'0-1-2,10-1-1\'\).*/ in mysqld.1.err FOUND 1 /using_gtid\(1\), gtid\(\'0-1-2,10-1-1\'\).*/ in mysqld.1.err
"===== Clean up =====" "===== Clean up ====="
SET GLOBAL LOG_WARNINGS=@org_log_warnings;
connection slave; connection slave;
include/stop_slave.inc include/stop_slave.inc
CHANGE MASTER TO MASTER_USE_GTID=no; CHANGE MASTER TO MASTER_USE_GTID=no;
include/start_slave.inc include/start_slave.inc
connection master; connection master;
DROP TABLE t; DROP TABLE t;
SET GLOBAL LOG_WARNINGS=default;
include/rpl_end.inc include/rpl_end.inc

View file

@ -1,9 +1,6 @@
include/master-slave.inc include/master-slave.inc
[connection master] [connection master]
connection master; connection master;
call mtr.add_suppression("mysqld: Table '.*gtid_slave_pos' is marked as crashed and should be repaired");
call mtr.add_suppression("Checking table: './mysql/gtid_slave_pos'");
call mtr.add_suppression("mysql.gtid_slave_pos: 1 client is using or hasn't closed the table properly");
SET @@session.gtid_domain_id= 0; SET @@session.gtid_domain_id= 0;
create table ti (a int auto_increment primary key) engine=innodb; create table ti (a int auto_increment primary key) engine=innodb;
create table tm (a int auto_increment primary key) engine=myisam; create table tm (a int auto_increment primary key) engine=myisam;

View file

@ -1,6 +1,6 @@
include/master-slave.inc include/master-slave.inc
[connection master] [connection master]
CALL mtr.add_suppression("Unsafe statement written to the binary log using statement format since BINLOG_FORMAT = STATEMENT"); SET GLOBAL LOG_WARNINGS=4;
create table t1(n int); create table t1(n int);
insert into t1 values(get_lock("lock",2)); insert into t1 values(get_lock("lock",2));
disconnect master; disconnect master;
@ -35,4 +35,5 @@ NULL
connection master1; connection master1;
drop table t1; drop table t1;
connection slave; connection slave;
connection default;
include/rpl_end.inc include/rpl_end.inc

View file

@ -4,6 +4,7 @@ connection server_1;
call mtr.add_suppression("Checking table:"); call mtr.add_suppression("Checking table:");
call mtr.add_suppression("client is using or hasn't closed the table properly"); call mtr.add_suppression("client is using or hasn't closed the table properly");
call mtr.add_suppression("Table .* is marked as crashed and should be repaired"); call mtr.add_suppression("Table .* is marked as crashed and should be repaired");
call mtr.add_suppression("Could not read packet:.* errno: 11");
flush tables; flush tables;
ALTER TABLE mysql.gtid_slave_pos ENGINE=InnoDB; ALTER TABLE mysql.gtid_slave_pos ENGINE=InnoDB;
CREATE TABLE t1 (a INT PRIMARY KEY, b INT) ENGINE=InnoDB; CREATE TABLE t1 (a INT PRIMARY KEY, b INT) ENGINE=InnoDB;

View file

@ -50,5 +50,4 @@ CHANGE MASTER TO MASTER_USE_GTID=no;
include/start_slave.inc include/start_slave.inc
connection master; connection master;
DROP TABLE t; DROP TABLE t;
SET GLOBAL LOG_WARNINGS=default;
include/rpl_end.inc include/rpl_end.inc

View file

@ -9,8 +9,6 @@ Variable_name Slave_heartbeat_period
Value 60.000 Value 60.000
SET @saved_dbug= @@GLOBAL.debug_dbug; SET @saved_dbug= @@GLOBAL.debug_dbug;
SET GLOBAL debug_dbug="+d,simulate_slave_heartbeat_network_error"; SET GLOBAL debug_dbug="+d,simulate_slave_heartbeat_network_error";
CALL mtr.add_suppression('SET @master_heartbeat_period to master failed with error');
CALL mtr.add_suppression('Master command COM_REGISTER_SLAVE failed: failed registering on master, reconnecting to try again');
include/start_slave.inc include/start_slave.inc
connection master; connection master;
drop table if exists t1; drop table if exists t1;

View file

@ -2,6 +2,8 @@ include/master-slave.inc
[connection master] [connection master]
call mtr.add_suppression("Slave I/O: Got a packet bigger than 'slave_max_allowed_packet' bytes, .*error.* 1153"); call mtr.add_suppression("Slave I/O: Got a packet bigger than 'slave_max_allowed_packet' bytes, .*error.* 1153");
call mtr.add_suppression("Log entry on master is longer than slave_max_allowed_packet"); call mtr.add_suppression("Log entry on master is longer than slave_max_allowed_packet");
call mtr.add_suppression("Could not write packet:");
call mtr.add_suppression("Got a packet bigger than 'max_allowed_packet' bytes");
drop database if exists DB_NAME_OF_MAX_LENGTH_AKA_NAME_LEN_64_BYTES_____________________; drop database if exists DB_NAME_OF_MAX_LENGTH_AKA_NAME_LEN_64_BYTES_____________________;
create database DB_NAME_OF_MAX_LENGTH_AKA_NAME_LEN_64_BYTES_____________________; create database DB_NAME_OF_MAX_LENGTH_AKA_NAME_LEN_64_BYTES_____________________;
connection master; connection master;

View file

@ -626,6 +626,7 @@ include/save_master_gtid.inc
connection server_2; connection server_2;
include/sync_with_master_gtid.inc include/sync_with_master_gtid.inc
connection server_2; connection server_2;
SET @org_log_warnings=@@GLOBAL.LOG_WARNINGS;
set global log_warnings=2; set global log_warnings=2;
BEGIN; BEGIN;
INSERT INTO t1 SET a=1; INSERT INTO t1 SET a=1;
@ -651,7 +652,7 @@ connection server_2;
include/sync_with_master_gtid.inc include/sync_with_master_gtid.inc
connection server_2; connection server_2;
include/stop_slave.inc include/stop_slave.inc
set global log_warnings=default; set global log_warnings=@org_log_warnings;
SET GLOBAL slave_parallel_mode=@old_parallel_mode; SET GLOBAL slave_parallel_mode=@old_parallel_mode;
SET GLOBAL slave_parallel_threads=@old_parallel_threads; SET GLOBAL slave_parallel_threads=@old_parallel_threads;
include/start_slave.inc include/start_slave.inc

View file

@ -32,7 +32,6 @@ include/diff_tables.inc [master:t0, slave:t0]
include/diff_tables.inc [master:t1, slave:t1] include/diff_tables.inc [master:t1, slave:t1]
connection slave; connection slave;
include/stop_slave.inc include/stop_slave.inc
set global log_warnings=default;
SET GLOBAL slave_parallel_mode=@old_parallel_mode; SET GLOBAL slave_parallel_mode=@old_parallel_mode;
SET GLOBAL slave_parallel_threads=@old_parallel_threads; SET GLOBAL slave_parallel_threads=@old_parallel_threads;
include/start_slave.inc include/start_slave.inc

View file

@ -32,7 +32,6 @@ include/diff_tables.inc [master:t0, slave:t0]
include/diff_tables.inc [master:t1, slave:t1] include/diff_tables.inc [master:t1, slave:t1]
connection slave; connection slave;
include/stop_slave.inc include/stop_slave.inc
set global log_warnings=default;
SET GLOBAL slave_parallel_mode=@old_parallel_mode; SET GLOBAL slave_parallel_mode=@old_parallel_mode;
SET GLOBAL slave_parallel_threads=@old_parallel_threads; SET GLOBAL slave_parallel_threads=@old_parallel_threads;
include/start_slave.inc include/start_slave.inc

View file

@ -20,6 +20,5 @@ FOUND 1 /The slave is applying a ROW event on behalf of an UPDATE statement on t
FOUND 1 /The slave is applying a ROW event on behalf of a DELETE statement on table t1 and is currently taking a considerable amount/ in mysqld.2.err FOUND 1 /The slave is applying a ROW event on behalf of a DELETE statement on table t1 and is currently taking a considerable amount/ in mysqld.2.err
include/stop_slave.inc include/stop_slave.inc
SET @@GLOBAL.debug_dbug = @saved_dbug; SET @@GLOBAL.debug_dbug = @saved_dbug;
SET GLOBAL log_warnings = 2;
include/start_slave.inc include/start_slave.inc
include/rpl_end.inc include/rpl_end.inc

View file

@ -2,6 +2,8 @@ include/master-slave.inc
[connection master] [connection master]
call mtr.add_suppression("Replication event checksum verification failed"); call mtr.add_suppression("Replication event checksum verification failed");
call mtr.add_suppression("could not queue event from master"); call mtr.add_suppression("could not queue event from master");
call mtr.add_suppression("Semisync ack receiver.*error reading communication packets");
call mtr.add_suppression("Semisync ack receiver got hangup");
# #
# Set up a semisync connection # Set up a semisync connection
connection master; connection master;

View file

@ -1,6 +1,5 @@
include/master-slave.inc include/master-slave.inc
[connection master] [connection master]
call mtr.add_suppression("Master is configured to log replication events");
connection slave; connection slave;
connection slave; connection slave;
include/wait_for_slave_to_stop.inc include/wait_for_slave_to_stop.inc

View file

@ -39,6 +39,7 @@
--source include/master-slave.inc --source include/master-slave.inc
--connection master --connection master
SET @org_log_warnings=@@GLOBAL.LOG_WARNINGS;
SET GLOBAL LOG_WARNINGS=2; SET GLOBAL LOG_WARNINGS=2;
--connection slave --connection slave
@ -110,6 +111,7 @@ CHANGE MASTER TO MASTER_USE_GTID=slave_pos;
--source include/wait_for_pattern_in_file.inc --source include/wait_for_pattern_in_file.inc
--echo "===== Clean up =====" --echo "===== Clean up ====="
SET GLOBAL LOG_WARNINGS=@org_log_warnings;
--connection slave --connection slave
--source include/stop_slave.inc --source include/stop_slave.inc
CHANGE MASTER TO MASTER_USE_GTID=no; CHANGE MASTER TO MASTER_USE_GTID=no;
@ -117,5 +119,4 @@ CHANGE MASTER TO MASTER_USE_GTID=no;
--connection master --connection master
DROP TABLE t; DROP TABLE t;
SET GLOBAL LOG_WARNINGS=default;
--source include/rpl_end.inc --source include/rpl_end.inc

View file

@ -6,9 +6,12 @@
connection master; connection master;
--disable_query_log
call mtr.add_suppression("mysqld: Table '.*gtid_slave_pos' is marked as crashed and should be repaired"); call mtr.add_suppression("mysqld: Table '.*gtid_slave_pos' is marked as crashed and should be repaired");
call mtr.add_suppression("Checking table: './mysql/gtid_slave_pos'"); call mtr.add_suppression("Checking table: './mysql/gtid_slave_pos'");
call mtr.add_suppression("mysql.gtid_slave_pos: 1 client is using or hasn't closed the table properly"); call mtr.add_suppression("mysql.gtid_slave_pos: 1 client is using or hasn't closed the table properly");
call mtr.add_suppression("Could not read packet:.* errno: 11");
--enable_query_log
SET @@session.gtid_domain_id= 0; SET @@session.gtid_domain_id= 0;
create table ti (a int auto_increment primary key) engine=innodb; create table ti (a int auto_increment primary key) engine=innodb;

View file

@ -30,6 +30,10 @@
--let $rpl_skip_start_slave=1 --let $rpl_skip_start_slave=1
--source include/master-slave.inc --source include/master-slave.inc
--disable_query_log
call mtr.add_suppression("Could not read packet:.* errno: 11");
--enable_query_log
# Do an insert on master # Do an insert on master
CREATE TABLE t1(a int); CREATE TABLE t1(a int);
INSERT INTO t1 VALUES(1); INSERT INTO t1 VALUES(1);

View file

@ -1,6 +1,16 @@
source include/master-slave.inc; source include/master-slave.inc;
--disable_query_log
CALL mtr.add_suppression("Unsafe statement written to the binary log using statement format since BINLOG_FORMAT = STATEMENT"); CALL mtr.add_suppression("Unsafe statement written to the binary log using statement format since BINLOG_FORMAT = STATEMENT");
call mtr.add_suppression("Could not read packet:.* errno: 11 ");
# The following one comes from calling dirty_close on client side
call mtr.add_suppression("Could not read packet:.* errno: 2 ");
--enable_query_log
let $org_log_warnings=`select @@global.log_warnings`;
# Test extended warnings
SET GLOBAL LOG_WARNINGS=4;
create table t1(n int); create table t1(n int);
# Use of get_lock gives a warning for unsafeness if binlog_format=statement # Use of get_lock gives a warning for unsafeness if binlog_format=statement
@ -41,6 +51,10 @@ connection master1;
drop table t1; drop table t1;
sync_slave_with_master; sync_slave_with_master;
connection default;
--disable_query_log
--eval SET GLOBAL LOG_WARNINGS=$org_log_warnings;
--enable_query_log
--source include/rpl_end.inc --source include/rpl_end.inc

View file

@ -12,6 +12,7 @@
call mtr.add_suppression("Checking table:"); call mtr.add_suppression("Checking table:");
call mtr.add_suppression("client is using or hasn't closed the table properly"); call mtr.add_suppression("client is using or hasn't closed the table properly");
call mtr.add_suppression("Table .* is marked as crashed and should be repaired"); call mtr.add_suppression("Table .* is marked as crashed and should be repaired");
call mtr.add_suppression("Could not read packet:.* errno: 11");
flush tables; flush tables;
ALTER TABLE mysql.gtid_slave_pos ENGINE=InnoDB; ALTER TABLE mysql.gtid_slave_pos ENGINE=InnoDB;

View file

@ -5,6 +5,10 @@
--let $rpl_topology=1->2 --let $rpl_topology=1->2
--source include/rpl_init.inc --source include/rpl_init.inc
--disable_query_log
call mtr.add_suppression("Could not read packet:.* errno: 11");
--enable_query_log
--echo *** Test crashing master with InnoDB disabled, the binlog gtid state should still be correctly recovered. *** --echo *** Test crashing master with InnoDB disabled, the binlog gtid state should still be correctly recovered. ***
--connection server_1 --connection server_1

View file

@ -93,5 +93,4 @@ CHANGE MASTER TO MASTER_USE_GTID=no;
--connection master --connection master
DROP TABLE t; DROP TABLE t;
SET GLOBAL LOG_WARNINGS=default;
--source include/rpl_end.inc --source include/rpl_end.inc

View file

@ -3,6 +3,12 @@
--source include/have_debug.inc --source include/have_debug.inc
--source include/master-slave.inc --source include/master-slave.inc
--disable_query_log
CALL mtr.add_suppression('SET @master_heartbeat_period to master failed with error');
CALL mtr.add_suppression('Master command COM_REGISTER_SLAVE failed: failed registering on master, reconnecting to try again');
call mtr.add_suppression("Could not read packet:.* errno: 11");
--enable_query_log
connection slave; connection slave;
--source include/stop_slave.inc --source include/stop_slave.inc
set @restore_slave_net_timeout= @@global.slave_net_timeout; set @restore_slave_net_timeout= @@global.slave_net_timeout;
@ -14,14 +20,13 @@ set @@global.slave_net_timeout= 10;
### Checking the range ### Checking the range
### ###
#
# default period slave_net_timeout/2 # default period slave_net_timeout/2
# #
--query_vertical show status like 'Slave_heartbeat_period'; --query_vertical show status like 'Slave_heartbeat_period';
SET @saved_dbug= @@GLOBAL.debug_dbug; SET @saved_dbug= @@GLOBAL.debug_dbug;
SET GLOBAL debug_dbug="+d,simulate_slave_heartbeat_network_error"; SET GLOBAL debug_dbug="+d,simulate_slave_heartbeat_network_error";
CALL mtr.add_suppression('SET @master_heartbeat_period to master failed with error');
CALL mtr.add_suppression('Master command COM_REGISTER_SLAVE failed: failed registering on master, reconnecting to try again');
--source include/start_slave.inc --source include/start_slave.inc

View file

@ -20,6 +20,9 @@ source include/master-slave.inc;
call mtr.add_suppression("Slave I/O: Got a packet bigger than 'slave_max_allowed_packet' bytes, .*error.* 1153"); call mtr.add_suppression("Slave I/O: Got a packet bigger than 'slave_max_allowed_packet' bytes, .*error.* 1153");
call mtr.add_suppression("Log entry on master is longer than slave_max_allowed_packet"); call mtr.add_suppression("Log entry on master is longer than slave_max_allowed_packet");
call mtr.add_suppression("Could not write packet:");
call mtr.add_suppression("Got a packet bigger than 'max_allowed_packet' bytes");
let $db= DB_NAME_OF_MAX_LENGTH_AKA_NAME_LEN_64_BYTES_____________________; let $db= DB_NAME_OF_MAX_LENGTH_AKA_NAME_LEN_64_BYTES_____________________;
disable_warnings; disable_warnings;
eval drop database if exists $db; eval drop database if exists $db;

View file

@ -508,6 +508,7 @@ DELETE FROM t2;
# The 1st of the following two trx:s a blocker on slave # The 1st of the following two trx:s a blocker on slave
--connection server_2 --connection server_2
SET @org_log_warnings=@@GLOBAL.LOG_WARNINGS;
set global log_warnings=2; set global log_warnings=2;
BEGIN; BEGIN;
INSERT INTO t1 SET a=1; INSERT INTO t1 SET a=1;
@ -555,7 +556,7 @@ DELETE FROM t2;
# #
--connection server_2 --connection server_2
--source include/stop_slave.inc --source include/stop_slave.inc
set global log_warnings=default; set global log_warnings=@org_log_warnings;
SET GLOBAL slave_parallel_mode=@old_parallel_mode; SET GLOBAL slave_parallel_mode=@old_parallel_mode;
SET GLOBAL slave_parallel_threads=@old_parallel_threads; SET GLOBAL slave_parallel_threads=@old_parallel_threads;
--source include/start_slave.inc --source include/start_slave.inc

View file

@ -206,7 +206,6 @@ while($i > 0)
# #
--connection slave --connection slave
--source include/stop_slave.inc --source include/stop_slave.inc
set global log_warnings=default;
SET GLOBAL slave_parallel_mode=@old_parallel_mode; SET GLOBAL slave_parallel_mode=@old_parallel_mode;
SET GLOBAL slave_parallel_threads=@old_parallel_threads; SET GLOBAL slave_parallel_threads=@old_parallel_threads;
--source include/start_slave.inc --source include/start_slave.inc

View file

@ -51,7 +51,9 @@ DROP TABLE t1;
# cleanup # cleanup
--source include/stop_slave.inc --source include/stop_slave.inc
SET @@GLOBAL.debug_dbug = @saved_dbug; SET @@GLOBAL.debug_dbug = @saved_dbug;
--disable_query_log
--eval SET GLOBAL log_warnings = $log_warnings_save --eval SET GLOBAL log_warnings = $log_warnings_save
--enable_query_log
--source include/start_slave.inc --source include/start_slave.inc
--source include/rpl_end.inc --source include/rpl_end.inc

View file

@ -10,6 +10,11 @@
--source include/have_binlog_format_row.inc --source include/have_binlog_format_row.inc
--source include/master-slave.inc --source include/master-slave.inc
--disable_query_log
call mtr.add_suppression("Could not read packet:.* errno: 11");
flush tables;
--enable_query_log
# Initial slave # Initial slave
--connection server_2 --connection server_2
--source include/stop_slave.inc --source include/stop_slave.inc

View file

@ -1,13 +1,13 @@
!include ../my.cnf !include ../my.cnf
[mysqld.1] [mysqld.1]
log_warnings=9 log_warnings=3
[mysqld.2] [mysqld.2]
log_warnings=9 log_warnings=3
[mysqld.3] [mysqld.3]
log_warnings=9 log_warnings=3
[ENV] [ENV]
SERVER_MYPORT_3= @mysqld.3.port SERVER_MYPORT_3= @mysqld.3.port

View file

@ -14,7 +14,8 @@
call mtr.add_suppression("Replication event checksum verification failed"); call mtr.add_suppression("Replication event checksum verification failed");
call mtr.add_suppression("could not queue event from master"); call mtr.add_suppression("could not queue event from master");
call mtr.add_suppression("Semisync ack receiver.*error reading communication packets");
call mtr.add_suppression("Semisync ack receiver got hangup");
--echo # --echo #
--echo # Set up a semisync connection --echo # Set up a semisync connection
--connection master --connection master

View file

@ -3,7 +3,10 @@
# #
--source include/master-slave.inc --source include/master-slave.inc
--disable_query_log
call mtr.add_suppression("Master is configured to log replication events"); call mtr.add_suppression("Master is configured to log replication events");
call mtr.add_suppression("Could not read packet:.* errno: 11");
--enable_query_log
--connection slave --connection slave

View file

@ -74,7 +74,13 @@ static int inline EXTRA_DEBUG_fflush(...) { return 0; }
#ifdef MYSQL_SERVER #ifdef MYSQL_SERVER
#include <sql_class.h> #include <sql_class.h>
#include <sql_connect.h> #include <sql_connect.h>
#define MYSQL_SERVER_my_error my_error
static void inline MYSQL_SERVER_my_error(uint error, myf flags)
{
my_error(error,
flags | MYF(global_system_variables.log_warnings > 3 ? ME_ERROR_LOG : 0));
}
#else #else
static void inline MYSQL_SERVER_my_error(...) {} static void inline MYSQL_SERVER_my_error(...) {}
#endif #endif
@ -766,7 +772,19 @@ net_real_write(NET *net,const uchar *packet, size_t len)
#endif /* !defined(MYSQL_SERVER) */ #endif /* !defined(MYSQL_SERVER) */
net->error= 2; /* Close socket */ net->error= 2; /* Close socket */
net->last_errno= (interrupted ? ER_NET_WRITE_INTERRUPTED : net->last_errno= (interrupted ? ER_NET_WRITE_INTERRUPTED :
ER_NET_ERROR_ON_WRITE); ER_NET_ERROR_ON_WRITE);
#ifdef MYSQL_SERVER
if (global_system_variables.log_warnings > 3)
{
my_printf_error(net->last_errno,
"Could not write packet: fd: %lld state: %d "
"errno: %d vio_errno: %d length: %ld",
MYF(ME_ERROR_LOG),
(longlong) vio_fd(net->vio), (int) net->vio->state,
vio_errno(net->vio), net->last_errno, (ulong) (end-pos));
break;
}
#endif
MYSQL_SERVER_my_error(net->last_errno, MYF(0)); MYSQL_SERVER_my_error(net->last_errno, MYF(0));
break; break;
} }
@ -1064,20 +1082,34 @@ retry:
(longlong) vio_fd(net->vio)); (longlong) vio_fd(net->vio));
} }
#ifndef MYSQL_SERVER #ifndef MYSQL_SERVER
if (length != 0 && vio_errno(net->vio) == SOCKET_EINTR) if (length != 0 && vio_should_retry(net->vio))
{ {
DBUG_PRINT("warning",("Interrupted read. Retrying...")); DBUG_PRINT("warning",("Interrupted read. Retrying..."));
continue; continue;
} }
#endif #endif
DBUG_PRINT("error",("Couldn't read packet: remain: %u errno: %d length: %ld", DBUG_PRINT("error",("Could not read packet: remain: %u errno: %d length: %ld",
remain, vio_errno(net->vio), (long) length)); remain, vio_errno(net->vio), (long) length));
len= packet_error; len= packet_error;
net->error= 2; /* Close socket */ net->error= 2; /* Close socket */
net->last_errno= (vio_was_timeout(net->vio) ? net->last_errno= (vio_was_timeout(net->vio) ?
ER_NET_READ_INTERRUPTED : ER_NET_READ_INTERRUPTED :
ER_NET_READ_ERROR); ER_NET_READ_ERROR);
MYSQL_SERVER_my_error(net->last_errno, MYF(0)); #ifdef MYSQL_SERVER
if (global_system_variables.log_warnings > 3)
{
my_printf_error(net->last_errno,
"Could not read packet: fd: %lld state: %d "
"remain: %u errno: %d vio_errno: %d "
"length: %lld",
MYF(ME_ERROR_LOG),
(longlong) vio_fd(net->vio), (int) net->vio->state,
remain, vio_errno(net->vio), net->last_errno,
(longlong) length);
}
else
my_error(net->last_errno, MYF(0));
#endif /* MYSQL_SERVER */
goto end; goto end;
} }
remain -= (uint32) length; remain -= (uint32) length;
@ -1282,7 +1314,10 @@ ulong
my_net_read_packet(NET *net, my_bool read_from_server) my_net_read_packet(NET *net, my_bool read_from_server)
{ {
ulong reallen = 0; ulong reallen = 0;
return my_net_read_packet_reallen(net, read_from_server, &reallen); ulong length;
DBUG_ENTER("my_net_read_packet");
length= my_net_read_packet_reallen(net, read_from_server, &reallen);
DBUG_RETURN(length);
} }

View file

@ -149,7 +149,7 @@ bool Ack_receiver::add_slave(THD *thd)
slave->thd= thd; slave->thd= thd;
slave->vio= *thd->net.vio; slave->vio= *thd->net.vio;
slave->vio.mysql_socket.m_psi= NULL; slave->vio.mysql_socket.m_psi= NULL;
slave->vio.read_timeout= 1; slave->vio.read_timeout= 1; // 1 ms
mysql_mutex_lock(&m_mutex); mysql_mutex_lock(&m_mutex);
@ -337,6 +337,17 @@ void Ack_receiver::run()
*/ */
net.compress= slave->thd->net.compress; net.compress= slave->thd->net.compress;
if (unlikely(listener.is_socket_hangup(slave)))
{
if (global_system_variables.log_warnings > 2)
sql_print_warning("Semisync ack receiver got hangup "
"from slave server-id %d",
slave->server_id());
it.remove();
m_slaves_changed= true;
continue;
}
len= my_net_read(&net); len= my_net_read(&net);
if (likely(len != packet_error)) if (likely(len != packet_error))
{ {

View file

@ -228,6 +228,11 @@ public:
return m_fds[slave->m_fds_index].revents & POLLIN; return m_fds[slave->m_fds_index].revents & POLLIN;
} }
bool is_socket_hangup(const Slave *slave)
{
return m_fds[slave->m_fds_index].revents & POLLHUP;
}
void clear_socket_info(const Slave *slave) void clear_socket_info(const Slave *slave)
{ {
m_fds[slave->m_fds_index].fd= -1; m_fds[slave->m_fds_index].fd= -1;
@ -296,6 +301,11 @@ public:
return FD_ISSET(slave->sock_fd(), &m_fds); return FD_ISSET(slave->sock_fd(), &m_fds);
} }
bool is_socket_hangup(const Slave *slave)
{
return 0;
}
bool has_signal_data() override bool has_signal_data() override
{ {
return FD_ISSET(local_read_signal, &m_fds); return FD_ISSET(local_read_signal, &m_fds);

View file

@ -1340,6 +1340,8 @@ static bool io_slave_killed(Master_info* mi)
DBUG_ENTER("io_slave_killed"); DBUG_ENTER("io_slave_killed");
DBUG_ASSERT(mi->slave_running); // tracking buffer overrun DBUG_ASSERT(mi->slave_running); // tracking buffer overrun
if (mi->abort_slave || mi->io_thd->killed)
DBUG_PRINT("info", ("killed"));
DBUG_RETURN(mi->abort_slave || mi->io_thd->killed); DBUG_RETURN(mi->abort_slave || mi->io_thd->killed);
} }

View file

@ -79,6 +79,7 @@ static my_bool has_no_data(Vio *vio __attribute__((unused)))
int vio_pipe_shutdown(Vio *vio, int how) int vio_pipe_shutdown(Vio *vio, int how)
{ {
vio->shutdown_flag= how; vio->shutdown_flag= how;
vio->state= VIO_STATE_SHUTDOWN;
return CancelIoEx(vio->hPipe, NULL); return CancelIoEx(vio->hPipe, NULL);
} }
#endif #endif
@ -98,6 +99,7 @@ static void vio_init(Vio *vio, enum enum_vio_type type,
#endif #endif
memset(vio, 0, sizeof(*vio)); memset(vio, 0, sizeof(*vio));
vio->type= type; vio->type= type;
vio->state= VIO_STATE_ACTIVE;
vio->mysql_socket= MYSQL_INVALID_SOCKET; vio->mysql_socket= MYSQL_INVALID_SOCKET;
mysql_socket_setfd(&vio->mysql_socket, sd); mysql_socket_setfd(&vio->mysql_socket, sd);
vio->localhost= flags & VIO_LOCALHOST; vio->localhost= flags & VIO_LOCALHOST;

View file

@ -288,12 +288,18 @@ size_t vio_write(Vio *vio, const uchar* buf, size_t size)
int vio_socket_shutdown(Vio *vio, int how) int vio_socket_shutdown(Vio *vio, int how)
{ {
int ret= shutdown(mysql_socket_getfd(vio->mysql_socket), how); int ret;
DBUG_ENTER("vio_socket_shutdown");
DBUG_PRINT("enter", ("sd: %d", (int)mysql_socket_getfd(vio->mysql_socket)));
vio->state= VIO_STATE_SHUTDOWN;
ret= shutdown(mysql_socket_getfd(vio->mysql_socket), how);
#ifdef _WIN32 #ifdef _WIN32
/* Cancel possible IO in progress (shutdown does not do that on Windows). */ /* Cancel possible IO in progress (shutdown does not do that on Windows). */
(void) CancelIoEx((HANDLE)mysql_socket_getfd(vio->mysql_socket), NULL); (void) CancelIoEx((HANDLE)mysql_socket_getfd(vio->mysql_socket), NULL);
#endif #endif
return ret; DBUG_RETURN(ret);
} }
@ -552,8 +558,10 @@ my_bool
vio_should_retry(Vio *vio) vio_should_retry(Vio *vio)
{ {
DBUG_ENTER("vio_should_retry"); DBUG_ENTER("vio_should_retry");
DBUG_PRINT("info", ("vio_errno: %d", vio_errno(vio))); DBUG_PRINT("info", ("vio_errno: %d state: %d",
DBUG_RETURN(vio_errno(vio) == SOCKET_EINTR); vio_errno(vio), (int) vio->state));
DBUG_RETURN(vio_errno(vio) == SOCKET_EINTR &&
vio->state < VIO_STATE_SHUTDOWN);
} }
@ -576,28 +584,30 @@ vio_was_timeout(Vio *vio)
int vio_close(Vio *vio) int vio_close(Vio *vio)
{ {
int r=0;
DBUG_ENTER("vio_close"); DBUG_ENTER("vio_close");
DBUG_PRINT("enter", ("sd: %d", (int)mysql_socket_getfd(vio->mysql_socket))); DBUG_PRINT("enter", ("sd: %d", (int)mysql_socket_getfd(vio->mysql_socket)));
if (vio->type != VIO_CLOSED) if (vio->type != VIO_CLOSED)
{ {
MYSQL_SOCKET mysql_socket= vio->mysql_socket;
DBUG_ASSERT(vio->type == VIO_TYPE_TCPIP || DBUG_ASSERT(vio->type == VIO_TYPE_TCPIP ||
vio->type == VIO_TYPE_SOCKET || vio->type == VIO_TYPE_SOCKET ||
vio->type == VIO_TYPE_SSL); vio->type == VIO_TYPE_SSL);
DBUG_ASSERT(mysql_socket_getfd(vio->mysql_socket) >= 0);
if (mysql_socket_close(vio->mysql_socket)) vio->type= VIO_CLOSED;
r= -1; vio->state= VIO_STATE_CLOSED;
vio->mysql_socket= MYSQL_INVALID_SOCKET;
DBUG_ASSERT(mysql_socket_getfd(mysql_socket) >= 0);
if (mysql_socket_close(mysql_socket))
{
DBUG_PRINT("vio_error", ("close() failed, error: %d",socket_errno));
/* FIXME: error handling (not critical for MySQL) */
DBUG_RETURN(-1);
}
} }
if (r) DBUG_RETURN(0);
{
DBUG_PRINT("vio_error", ("close() failed, error: %d",socket_errno));
/* FIXME: error handling (not critical for MySQL) */
}
vio->type= VIO_CLOSED;
vio->mysql_socket= MYSQL_INVALID_SOCKET;
DBUG_RETURN(r);
} }
@ -917,7 +927,17 @@ int vio_io_wait(Vio *vio, enum enum_vio_io_event event, int timeout)
my_socket sd= mysql_socket_getfd(vio->mysql_socket); my_socket sd= mysql_socket_getfd(vio->mysql_socket);
MYSQL_SOCKET_WAIT_VARIABLES(locker, state) /* no ';' */ MYSQL_SOCKET_WAIT_VARIABLES(locker, state) /* no ';' */
DBUG_ENTER("vio_io_wait"); DBUG_ENTER("vio_io_wait");
DBUG_PRINT("enter", ("timeout: %d", timeout)); DBUG_PRINT("enter", ("sd: %d timeout: %d",
(int) mysql_socket_getfd(vio->mysql_socket),
timeout));
if (vio->state >= VIO_STATE_SHUTDOWN)
{
DBUG_PRINT("info", ("vio deactivated. state: %ld", (int) vio->state));
/* Socket is shutdown or closed */
errno= EIO;
DBUG_RETURN(-1);
}
memset(&pfd, 0, sizeof(pfd)); memset(&pfd, 0, sizeof(pfd));
@ -948,7 +968,7 @@ int vio_io_wait(Vio *vio, enum enum_vio_io_event event, int timeout)
switch ((ret= poll(&pfd, 1, timeout))) switch ((ret= poll(&pfd, 1, timeout)))
{ {
case -1: case -1:
DBUG_PRINT("error", ("poll returned -1")); DBUG_PRINT("error", ("poll returned -1 errno: %d", vio_errno(vio)));
/* On error, -1 is returned. */ /* On error, -1 is returned. */
break; break;
case 0: case 0:
@ -980,6 +1000,14 @@ int vio_io_wait(Vio *vio, enum enum_vio_io_event event, int timeout)
MYSQL_SOCKET_WAIT_VARIABLES(locker, state) /* no ';' */ MYSQL_SOCKET_WAIT_VARIABLES(locker, state) /* no ';' */
DBUG_ENTER("vio_io_wait"); DBUG_ENTER("vio_io_wait");
if (vio->state >= VIO_STATE_SHUTDOWN)
{
DBUG_PRINT("info", ("vio deactive. state: %ld", (int) vio->state));
/* Socket is shutdown or closed */
errno= EIO;
DBUG_RETURN(-1);
}
/* Convert the timeout, in milliseconds, to seconds and microseconds. */ /* Convert the timeout, in milliseconds, to seconds and microseconds. */
if (timeout >= 0) if (timeout >= 0)
{ {
@ -1153,12 +1181,16 @@ my_bool vio_is_connected(Vio *vio)
uint bytes= 0; uint bytes= 0;
DBUG_ENTER("vio_is_connected"); DBUG_ENTER("vio_is_connected");
if (vio->state >= VIO_STATE_SHUTDOWN)
DBUG_RETURN(FALSE);
/* /*
The first step of detecting an EOF condition is verifying The first step of detecting an EOF condition is verifying
whether there is data to read. Data in this case would be whether there is data to read. Data in this case would be
the EOF. An exceptional condition event and/or errors are the EOF. An exceptional condition event and/or errors are
interpreted as if there is data to read. interpreted as if there is data to read.
*/ */
if (!vio_io_wait(vio, VIO_IO_EVENT_READ, 0)) if (!vio_io_wait(vio, VIO_IO_EVENT_READ, 0))
DBUG_RETURN(TRUE); DBUG_RETURN(TRUE);