mirror of
https://github.com/MariaDB/server.git
synced 2025-01-15 19:42:28 +01:00
MDEV-24845 Oddities around innodb_fatal_semaphore_wait_threshold and global.innodb_disallow_writes
This commit adds a mtr test for reproducing a test scenario where despite of innodb_disallow_writes blocking, writes to file system can still happen. The test launches a garbd node, which triggers one of the cluster node to switch to SST donor state. In this state, all disk activity should be halted, and e.g. innodb_disallow_writes has been set. The test records md5sum aggregate over mariadb data directory when the node enters the donor state, and records another md5sum when the node leaves the donor state. If there is no IO activity in data directory, these hashes should be equal. For this test, the Donor state processing, has beeen instrumented so that, SST donor thread can be stopped when entering the donor state. The test uses this new dbug sync point, to control when to record the md5sums. New SST script was added: wsrep_sst_backup, and garbd uses backup method to lauch the donor node to call this script, and to enter in donor state. The backup script could be later extended as general purpose backup method for the cluster. This commit fixes also one race condition happening in wsrep_sst_rsync, like this: * wsrep_rsync_sst script requests for flush tables, and then waits in a loop until mariadbd has created file tables_flushed, as confirmation that FLUSH TABLES has completed * mariadbd's SST donor thread, wakes for the flush table request and then performs FTWRL, and after this it creates the tables_flushed file * note that SST script will now continue to startup rsync sending * mariadbd's SST donor thread now calls for sst_disallow_writes(), so that innodb would setup disk IO blockage, however rsyncing may already be ongoing at this point This race condition is fixed in this commit, by performing all disk IO blocking before creating the tables_flushed file. Reviewed-by: Jan Lindström <jan.lindstrom@mariadb.com>
This commit is contained in:
parent
6437b30404
commit
9b2fa2ae8e
6 changed files with 350 additions and 31 deletions
41
mysql-test/suite/galera_3nodes/r/galera_garbd_backup.result
Normal file
41
mysql-test/suite/galera_3nodes/r/galera_garbd_backup.result
Normal file
|
@ -0,0 +1,41 @@
|
|||
connection node_1;
|
||||
connection node_1;
|
||||
connection node_2;
|
||||
connection node_3;
|
||||
connection node_1;
|
||||
SET GLOBAL innodb_max_dirty_pages_pct=99;
|
||||
SET GLOBAL innodb_max_dirty_pages_pct_lwm=99;
|
||||
connection node_1;
|
||||
CREATE TABLE t1 (f1 INTEGER, f2 varchar(1024)) Engine=InnoDB;
|
||||
CREATE TABLE ten (f1 INTEGER) ENGINE=InnoDB;
|
||||
INSERT INTO ten VALUES (1),(2),(3),(4),(5),(6),(7),(8),(9),(10);
|
||||
INSERT INTO t1 (f2) SELECT REPEAT('x', 1024) FROM ten AS a1, ten AS a2, ten AS a3, ten AS a4;
|
||||
connection node_2;
|
||||
Killing node #3 to free ports for garbd ...
|
||||
connection node_3;
|
||||
connection node_1;
|
||||
SET GLOBAL debug_dbug = "+d,sync.wsrep_donor_state";
|
||||
Starting garbd ...
|
||||
SET SESSION debug_sync = "now WAIT_FOR sync.wsrep_donor_state_reached";
|
||||
SET GLOBAL innodb_max_dirty_pages_pct_lwm=0;
|
||||
SET GLOBAL innodb_max_dirty_pages_pct=0;
|
||||
SET SESSION debug_sync = "now SIGNAL signal.wsrep_donor_state";
|
||||
SET GLOBAL debug_dbug = "";
|
||||
SET debug_sync='RESET';
|
||||
connection node_2;
|
||||
Killing garbd ...
|
||||
connection node_1;
|
||||
connection node_2;
|
||||
DROP TABLE t1;
|
||||
DROP TABLE ten;
|
||||
Restarting node #3 to satisfy MTR's end-of-test checks
|
||||
connection node_3;
|
||||
connection node_1;
|
||||
SET GLOBAL innodb_max_dirty_pages_pct = 75.000000;
|
||||
SET GLOBAL innodb_max_dirty_pages_pct_lwm = 0.000000;
|
||||
connection node_1;
|
||||
CALL mtr.add_suppression("WSREP: Protocol violation\. JOIN message sender 1\.0 \(.*\) is not in state transfer \(SYNCED\)");
|
||||
connection node_2;
|
||||
CALL mtr.add_suppression("WSREP: Protocol violation\. JOIN message sender 1\.0 \(.*\) is not in state transfer \(SYNCED\)");
|
||||
connection node_3;
|
||||
CALL mtr.add_suppression("WSREP: Protocol violation\. JOIN message sender 1\.0 \(.*\) is not in state transfer \(SYNCED\)");
|
13
mysql-test/suite/galera_3nodes/t/galera_garbd_backup.cnf
Normal file
13
mysql-test/suite/galera_3nodes/t/galera_garbd_backup.cnf
Normal file
|
@ -0,0 +1,13 @@
|
|||
!include ../galera_3nodes.cnf
|
||||
|
||||
[mysqld]
|
||||
wsrep_sst_method=rsync
|
||||
|
||||
[mysqld.1]
|
||||
wsrep_node_name=node1
|
||||
|
||||
[mysqld.2]
|
||||
wsrep_node_name=node2
|
||||
|
||||
[mysqld.3]
|
||||
wsrep_node_name=node3
|
134
mysql-test/suite/galera_3nodes/t/galera_garbd_backup.test
Normal file
134
mysql-test/suite/galera_3nodes/t/galera_garbd_backup.test
Normal file
|
@ -0,0 +1,134 @@
|
|||
#
|
||||
# A very basic test for the galera arbitrator. We shut down node #3 and use its port allocation to start garbd.
|
||||
# As MTR does not allow multiple servers to be down at the same time, we are limited as to what we can test.
|
||||
#
|
||||
|
||||
--source include/galera_cluster.inc
|
||||
--source include/have_innodb.inc
|
||||
--source include/have_garbd.inc
|
||||
--source include/big_test.inc
|
||||
--source include/have_debug.inc
|
||||
--source include/have_debug_sync.inc
|
||||
|
||||
--connection node_1
|
||||
# Save original auto_increment_offset values.
|
||||
--let $node_1=node_1
|
||||
--let $node_2=node_2
|
||||
--let $node_3=node_3
|
||||
|
||||
--let $galera_connection_name = node_3
|
||||
--let $galera_server_number = 3
|
||||
--source include/galera_connect.inc
|
||||
--source suite/galera/include/galera_base_port.inc
|
||||
--let $NODE_GALERAPORT_3 = $_NODE_GALERAPORT
|
||||
|
||||
--source ../galera/include/auto_increment_offset_save.inc
|
||||
|
||||
# Save galera ports
|
||||
--connection node_1
|
||||
--source suite/galera/include/galera_base_port.inc
|
||||
--let $NODE_GALERAPORT_1 = $_NODE_GALERAPORT
|
||||
--let $datadir= `SELECT @@datadir`
|
||||
|
||||
--let $innodb_max_dirty_pages_pct = `SELECT @@innodb_max_dirty_pages_pct`
|
||||
--let $innodb_max_dirty_pages_pct_lwm = `SELECT @@innodb_max_dirty_pages_pct_lwm`
|
||||
|
||||
SET GLOBAL innodb_max_dirty_pages_pct=99;
|
||||
SET GLOBAL innodb_max_dirty_pages_pct_lwm=99;
|
||||
|
||||
--connection node_1
|
||||
CREATE TABLE t1 (f1 INTEGER, f2 varchar(1024)) Engine=InnoDB;
|
||||
CREATE TABLE ten (f1 INTEGER) ENGINE=InnoDB;
|
||||
INSERT INTO ten VALUES (1),(2),(3),(4),(5),(6),(7),(8),(9),(10);
|
||||
INSERT INTO t1 (f2) SELECT REPEAT('x', 1024) FROM ten AS a1, ten AS a2, ten AS a3, ten AS a4;
|
||||
|
||||
--connection node_2
|
||||
--source suite/galera/include/galera_base_port.inc
|
||||
--let $NODE_GALERAPORT_2 = $_NODE_GALERAPORT
|
||||
|
||||
--echo Killing node #3 to free ports for garbd ...
|
||||
--connection node_3
|
||||
--source include/shutdown_mysqld.inc
|
||||
|
||||
--connection node_1
|
||||
--let $wait_condition = SELECT VARIABLE_VALUE = 2 FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_size'
|
||||
--source include/wait_condition.inc
|
||||
|
||||
# stop SST donor thread when node is in donor state
|
||||
SET GLOBAL debug_dbug = "+d,sync.wsrep_donor_state";
|
||||
|
||||
--echo Starting garbd ...
|
||||
--exec $MTR_GARBD_EXE --address "gcomm://127.0.0.1:$NODE_GALERAPORT_1" --group my_wsrep_cluster --donor node1 --sst backup --options 'base_port=$NODE_GALERAPORT_3' > $MYSQL_TMP_DIR/garbd.log 2>&1 &
|
||||
|
||||
SET SESSION debug_sync = "now WAIT_FOR sync.wsrep_donor_state_reached";
|
||||
|
||||
#
|
||||
# get hash of data directory contents before BP dirty page flushing
|
||||
#
|
||||
--exec find $datadir -type f ! -name tables_flushed ! -name backup_sst_complete -exec md5sum {} \; | md5sum >$MYSQLTEST_VARDIR/tmp/innodb_before
|
||||
|
||||
# this should force buffer pool flushing, if not already done by donor state change transfer
|
||||
SET GLOBAL innodb_max_dirty_pages_pct_lwm=0;
|
||||
SET GLOBAL innodb_max_dirty_pages_pct=0;
|
||||
|
||||
--disable_query_log
|
||||
--disable_result_log
|
||||
select f1 from t1;
|
||||
select * from ten;
|
||||
--enable_result_log
|
||||
--enable_query_log
|
||||
|
||||
#
|
||||
#
|
||||
# record the hash of data directory contents after BP dirty page flushing
|
||||
#
|
||||
--exec find $datadir -type f ! -name tables_flushed ! -name backup_sst_complete -exec md5sum {} \; | md5sum >$MYSQLTEST_VARDIR/tmp/innodb_after
|
||||
|
||||
# there should be no disk writes
|
||||
--diff_files $MYSQLTEST_VARDIR/tmp/innodb_before $MYSQLTEST_VARDIR/tmp/innodb_after
|
||||
|
||||
SET SESSION debug_sync = "now SIGNAL signal.wsrep_donor_state";
|
||||
SET GLOBAL debug_dbug = "";
|
||||
SET debug_sync='RESET';
|
||||
|
||||
--connection node_2
|
||||
|
||||
#
|
||||
# garbd will die automatically, because of the backup SST script
|
||||
# but just to be sure, sending explicit kill here, as well
|
||||
#
|
||||
--echo Killing garbd ...
|
||||
# FreeBSD's /bin/pkill only supports short versions of the options:
|
||||
# -o Select only the oldest (least recently started)
|
||||
# -f Match against full argument lists
|
||||
--error 0,1
|
||||
--exec pkill -o -f garbd.*$NODE_GALERAPORT_3
|
||||
|
||||
--connection node_1
|
||||
--let $wait_condition = SELECT VARIABLE_VALUE = 2 FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_size'
|
||||
--source include/wait_condition.inc
|
||||
|
||||
--connection node_2
|
||||
|
||||
DROP TABLE t1;
|
||||
DROP TABLE ten;
|
||||
|
||||
--echo Restarting node #3 to satisfy MTR's end-of-test checks
|
||||
--connection node_3
|
||||
let $restart_noprint=2;
|
||||
--source include/start_mysqld.inc
|
||||
|
||||
--connection node_1
|
||||
--eval SET GLOBAL innodb_max_dirty_pages_pct = $innodb_max_dirty_pages_pct
|
||||
--eval SET GLOBAL innodb_max_dirty_pages_pct_lwm = $innodb_max_dirty_pages_pct_lwm
|
||||
|
||||
--source ../galera/include/auto_increment_offset_restore.inc
|
||||
|
||||
--connection node_1
|
||||
CALL mtr.add_suppression("WSREP: Protocol violation\. JOIN message sender 1\.0 \(.*\) is not in state transfer \(SYNCED\)");
|
||||
|
||||
--connection node_2
|
||||
CALL mtr.add_suppression("WSREP: Protocol violation\. JOIN message sender 1\.0 \(.*\) is not in state transfer \(SYNCED\)");
|
||||
|
||||
--connection node_3
|
||||
CALL mtr.add_suppression("WSREP: Protocol violation\. JOIN message sender 1\.0 \(.*\) is not in state transfer \(SYNCED\)");
|
|
@ -275,6 +275,7 @@ ELSE()
|
|||
wsrep_sst_mysqldump
|
||||
wsrep_sst_rsync
|
||||
wsrep_sst_mariabackup
|
||||
wsrep_sst_backup
|
||||
)
|
||||
# The following script is sourced from other SST scripts, so it should
|
||||
# not be made executable.
|
||||
|
|
112
scripts/wsrep_sst_backup.sh
Normal file
112
scripts/wsrep_sst_backup.sh
Normal file
|
@ -0,0 +1,112 @@
|
|||
#!/usr/bin/env bash
|
||||
|
||||
set -ue
|
||||
|
||||
# Copyright (C) 2017-2021 MariaDB
|
||||
# Copyright (C) 2010-2014 Codership Oy
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; version 2 of the License.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program; see the file COPYING. If not, write to the
|
||||
# Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston
|
||||
# MA 02110-1335 USA.
|
||||
|
||||
# This is a reference script for rsync-based state snapshot transfer
|
||||
|
||||
RSYNC_REAL_PID=0 # rsync process id
|
||||
STUNNEL_REAL_PID=0 # stunnel process id
|
||||
|
||||
OS="$(uname)"
|
||||
[ "$OS" = 'Darwin' ] && export -n LD_LIBRARY_PATH
|
||||
|
||||
# Setting the path for lsof on CentOS
|
||||
export PATH="/usr/sbin:/sbin:$PATH"
|
||||
|
||||
. $(dirname "$0")/wsrep_sst_common
|
||||
|
||||
MAGIC_FILE="$WSREP_SST_OPT_DATA/backup_sst_complete"
|
||||
rm -rf "$MAGIC_FILE"
|
||||
|
||||
WSREP_LOG_DIR=${WSREP_LOG_DIR:-""}
|
||||
# if WSREP_LOG_DIR env. variable is not set, try to get it from my.cnf
|
||||
if [ -z "$WSREP_LOG_DIR" ]; then
|
||||
WSREP_LOG_DIR=$(parse_cnf mysqld innodb-log-group-home-dir '')
|
||||
fi
|
||||
|
||||
if [ -n "$WSREP_LOG_DIR" ]; then
|
||||
# handle both relative and absolute paths
|
||||
WSREP_LOG_DIR=$(cd $WSREP_SST_OPT_DATA; mkdir -p "$WSREP_LOG_DIR"; cd $WSREP_LOG_DIR; pwd -P)
|
||||
else
|
||||
# default to datadir
|
||||
WSREP_LOG_DIR=$(cd $WSREP_SST_OPT_DATA; pwd -P)
|
||||
fi
|
||||
|
||||
if [ "$WSREP_SST_OPT_ROLE" = 'donor' ]
|
||||
then
|
||||
|
||||
[ -f "$MAGIC_FILE" ] && rm -f "$MAGIC_FILE"
|
||||
|
||||
RC=0
|
||||
|
||||
if [ $WSREP_SST_OPT_BYPASS -eq 0 ]; then
|
||||
|
||||
FLUSHED="$WSREP_SST_OPT_DATA/tables_flushed"
|
||||
ERROR="$WSREP_SST_OPT_DATA/sst_error"
|
||||
|
||||
[ -f "$FLUSHED" ] && rm -f "$FLUSHED"
|
||||
[ -f "$ERROR" ] && rm -f "$ERROR"
|
||||
|
||||
echo "flush tables"
|
||||
|
||||
# Wait for :
|
||||
# (a) Tables to be flushed, AND
|
||||
# (b) Cluster state ID & wsrep_gtid_domain_id to be written to the file, OR
|
||||
# (c) ERROR file, in case flush tables operation failed.
|
||||
|
||||
while [ ! -r "$FLUSHED" ] && \
|
||||
! grep -q -F ':' '--' "$FLUSHED" >/dev/null 2>&1
|
||||
do
|
||||
# Check whether ERROR file exists.
|
||||
if [ -f "$ERROR" ]; then
|
||||
# Flush tables operation failed.
|
||||
rm -f "$ERROR"
|
||||
exit 255
|
||||
fi
|
||||
sleep 0.2
|
||||
done
|
||||
|
||||
STATE=$(cat "$FLUSHED")
|
||||
rm -f "$FLUSHED"
|
||||
|
||||
|
||||
else # BYPASS
|
||||
|
||||
wsrep_log_info "Bypassing state dump."
|
||||
fi
|
||||
|
||||
echo 'continue' # now server can resume updating data
|
||||
|
||||
echo "$STATE" > "$MAGIC_FILE"
|
||||
|
||||
echo "done $STATE"
|
||||
|
||||
elif [ "$WSREP_SST_OPT_ROLE" = 'joiner' ]
|
||||
then
|
||||
wsrep_log_error "Unrecognized role: '$WSREP_SST_OPT_ROLE'"
|
||||
exit 22 # EINVAL
|
||||
|
||||
|
||||
else
|
||||
wsrep_log_error "Unrecognized role: '$WSREP_SST_OPT_ROLE'"
|
||||
exit 22 # EINVAL
|
||||
fi
|
||||
|
||||
exit 0
|
|
@ -1,4 +1,4 @@
|
|||
/* Copyright 2008-2020 Codership Oy <http://www.codership.com>
|
||||
/* Copyright 2008-2022 Codership Oy <http://www.codership.com>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -30,6 +30,7 @@
|
|||
#include "wsrep_xid.h"
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include "debug_sync.h"
|
||||
|
||||
#include <my_service_manager.h>
|
||||
|
||||
|
@ -1415,6 +1416,33 @@ static int run_sql_command(THD *thd, const char *query)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static void sst_disallow_writes (THD* thd, bool yes)
|
||||
{
|
||||
char query_str[64]= { 0, };
|
||||
ssize_t const query_max= sizeof(query_str) - 1;
|
||||
CHARSET_INFO *current_charset;
|
||||
|
||||
current_charset= thd->variables.character_set_client;
|
||||
|
||||
if (!is_supported_parser_charset(current_charset))
|
||||
{
|
||||
/* Do not use non-supported parser character sets */
|
||||
WSREP_WARN("Current client character set is non-supported parser character set: %s", current_charset->csname);
|
||||
thd->variables.character_set_client= &my_charset_latin1;
|
||||
WSREP_WARN("For SST temporally setting character set to : %s",
|
||||
my_charset_latin1.csname);
|
||||
}
|
||||
|
||||
snprintf (query_str, query_max, "SET GLOBAL innodb_disallow_writes=%d",
|
||||
yes ? 1 : 0);
|
||||
|
||||
if (run_sql_command(thd, query_str))
|
||||
{
|
||||
WSREP_ERROR("Failed to disallow InnoDB writes");
|
||||
}
|
||||
thd->variables.character_set_client= current_charset;
|
||||
}
|
||||
|
||||
|
||||
static int sst_flush_tables(THD* thd)
|
||||
{
|
||||
|
@ -1477,6 +1505,10 @@ static int sst_flush_tables(THD* thd)
|
|||
{
|
||||
WSREP_INFO("Tables flushed.");
|
||||
|
||||
/* disable further disk IO */
|
||||
sst_disallow_writes(thd, true);
|
||||
WSREP_INFO("Disabled further disk IO.");
|
||||
|
||||
/*
|
||||
Tables have been flushed. Create a file with cluster state ID and
|
||||
wsrep_gtid_domain_id.
|
||||
|
@ -1485,39 +1517,14 @@ static int sst_flush_tables(THD* thd)
|
|||
snprintf(content, sizeof(content), "%s:%lld %d\n", wsrep_cluster_state_uuid,
|
||||
(long long)wsrep_locked_seqno, wsrep_gtid_domain_id);
|
||||
err= sst_create_file(flush_success, content);
|
||||
|
||||
if(err)
|
||||
WSREP_INFO("Creating file for flush_success failed %d",err);
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
static void sst_disallow_writes (THD* thd, bool yes)
|
||||
{
|
||||
char query_str[64] = { 0, };
|
||||
ssize_t const query_max = sizeof(query_str) - 1;
|
||||
CHARSET_INFO *current_charset;
|
||||
|
||||
current_charset = thd->variables.character_set_client;
|
||||
|
||||
if (!is_supported_parser_charset(current_charset))
|
||||
{
|
||||
/* Do not use non-supported parser character sets */
|
||||
WSREP_WARN("Current client character set is non-supported parser character set: %s", current_charset->csname);
|
||||
thd->variables.character_set_client = &my_charset_latin1;
|
||||
WSREP_WARN("For SST temporally setting character set to : %s",
|
||||
my_charset_latin1.csname);
|
||||
}
|
||||
|
||||
snprintf (query_str, query_max, "SET GLOBAL innodb_disallow_writes=%d",
|
||||
yes ? 1 : 0);
|
||||
|
||||
if (run_sql_command(thd, query_str))
|
||||
{
|
||||
WSREP_ERROR("Failed to disallow InnoDB writes");
|
||||
}
|
||||
thd->variables.character_set_client = current_charset;
|
||||
}
|
||||
|
||||
static void* sst_donor_thread (void* a)
|
||||
{
|
||||
sst_thread_arg* arg= (sst_thread_arg*)a;
|
||||
|
@ -1565,8 +1572,7 @@ wait_signal:
|
|||
err= sst_flush_tables (thd.ptr);
|
||||
if (!err)
|
||||
{
|
||||
sst_disallow_writes (thd.ptr, true);
|
||||
/*
|
||||
/*
|
||||
Lets also keep statements that modify binary logs (like RESET LOGS,
|
||||
RESET MASTER) from proceeding until the files have been transferred
|
||||
to the joiner node.
|
||||
|
@ -1577,6 +1583,18 @@ wait_signal:
|
|||
}
|
||||
|
||||
locked= true;
|
||||
|
||||
WSREP_INFO("Donor state reached");
|
||||
|
||||
DBUG_EXECUTE_IF("sync.wsrep_donor_state",
|
||||
{
|
||||
const char act[]=
|
||||
"now "
|
||||
"SIGNAL sync.wsrep_donor_state_reached "
|
||||
"WAIT_FOR signal.wsrep_donor_state";
|
||||
assert(!debug_sync_set_action(thd.ptr,
|
||||
STRING_WITH_LEN(act)));
|
||||
};);
|
||||
goto wait_signal;
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue