mirror of
https://github.com/MariaDB/server.git
synced 2025-02-23 13:53:10 +01:00

Improve the performance of slave connect using B+-Tree indexes on each binlog file. The index allows fast lookup of a GTID position to the corresponding offset in the binlog file, as well as lookup of a position to find the corresponding GTID position. This eliminates a costly sequential scan of the starting binlog file to find the GTID starting position when a slave connects. This is especially costly if the binlog file is not cached in memory (IO cost), or if it is encrypted or a lot of slaves connect simultaneously (CPU cost). The size of the index files is generally less than 1% of the binlog data, so not expected to be an issue. Most of the work writing the index is done as a background task, in the binlog background thread. This minimises the performance impact on transaction commit. A simple global mutex is used to protect index reads and (background) index writes; this is fine as slave connect is a relatively infrequent operation. Here are the user-visible options and status variables. The feature is on by default and is expected to need no tuning or configuration for most users. binlog_gtid_index On by default. Can be used to disable the indexes for testing purposes. binlog_gtid_index_page_size (default 4096) Page size to use for the binlog GTID index. This is the size of the nodes in the B+-tree used internally in the index. A very small page-size (64 is the minimum) will be less efficient, but can be used to stress the BTree-code during testing. binlog_gtid_index_span_min (default 65536) Control sparseness of the binlog GTID index. If set to N, at most one index record will be added for every N bytes of binlog file written. This can be used to reduce the number of records in the index, at the cost only of having to scan a few more events in the binlog file before finding the target position Two status variables are available to monitor the use of the GTID indexes: Binlog_gtid_index_hit Binlog_gtid_index_miss The "hit" status increments for each successful lookup in a GTID index. The "miss" increments when a lookup is not possible. This indicates that the index file is missing (eg. binlog written by old server version without GTID index support), or corrupt. Signed-off-by: Kristian Nielsen <knielsen@knielsen-hq.org>
187 lines
6.3 KiB
PHP
187 lines
6.3 KiB
PHP
# Include file for main test rpl.rpl_gtid_index.
|
|
# Test GTID indexes with given parameters.
|
|
#
|
|
# Parameters:
|
|
# $NUM_POS Number of GTIDs/binlog positions to create
|
|
# $NUM_DOMAIN Number of different domains to use
|
|
# $NUM_SERVER Number of different server_id to use
|
|
# $NUM_SLAVE_CONNECTS How many GTID slave connect positions to test
|
|
# $RND_SEED Random seed
|
|
|
|
|
|
--echo *** Testing $NUM_POS GTIDs with $NUM_SLAVE_CONNECTS test connects
|
|
|
|
--connection master
|
|
DELETE FROM t1 WHERE a >= 1000;
|
|
# Rotate binlogs to make new GTID index settings take effect.
|
|
FLUSH NO_WRITE_TO_BINLOG BINARY LOGS;
|
|
|
|
# Prepare some random values, but deterministic between test runs.
|
|
CREATE TABLE rand_data(idx INT PRIMARY KEY, domain_id INT, server_id INT)
|
|
ENGINE=InnoDB;
|
|
INSERT INTO rand_data(idx, domain_id, server_id) VALUES (0, 0, 1);
|
|
eval
|
|
INSERT INTO rand_data(idx, domain_id, server_id)
|
|
SELECT seq,
|
|
@tmp:=floor($NUM_DOMAIN*POW(rand($RND_SEED),2)),
|
|
100 + $NUM_SERVER*@tmp + floor($NUM_SERVER*rand($RND_SEED))
|
|
FROM seq_1_to_$NUM_POS;
|
|
# Let's check that the test data is deterministic.
|
|
# If this changes due to some server changes, it's fine, the .result can just
|
|
# be updated. But we want it to be identical between test runs on same code,
|
|
# to facilitate debugging test failures.
|
|
SELECT COUNT(*), SUM(domain_id), SUM(server_id) FROM rand_data;
|
|
|
|
# Create some data for the binlog (and GTID index), recording the correct
|
|
# binlog positions and GTIDs.
|
|
CREATE TABLE gtid_data(
|
|
idx INT PRIMARY KEY,
|
|
gtid VARCHAR(44),
|
|
gtid_pos VARCHAR(255),
|
|
file VARCHAR(100),
|
|
pos INT,
|
|
row_count INT,
|
|
KEY(file, pos)) ENGINE=InnoDB;
|
|
--let $gtid= `SELECT @@last_gtid`
|
|
|
|
--source include/save_master_gtid.inc
|
|
|
|
--connection slave
|
|
--source include/sync_with_master_gtid.inc
|
|
--source include/stop_slave.inc
|
|
|
|
--connection master
|
|
SET @orig_domain_id= @@gtid_domain_id;
|
|
SET @orig_server_id= @@server_id;
|
|
--let $i= 0
|
|
--let $rotate_point= `SELECT floor($NUM_POS/2)`
|
|
--let $base_count= `SELECT COUNT(*) FROM t1`
|
|
--disable_query_log
|
|
while ($i < $NUM_POS) {
|
|
--let $file= query_get_value(SHOW MASTER STATUS, File, 1)
|
|
--let $pos= query_get_value(SHOW MASTER STATUS, Position, 1)
|
|
--let $gtid_pos= `SELECT @@gtid_binlog_pos`
|
|
--let $row_count= `SELECT $base_count + $i`
|
|
eval SET gtid_domain_id= (SELECT domain_id FROM rand_data WHERE idx=$i+1);
|
|
eval SET server_id= (SELECT server_id FROM rand_data WHERE idx=$i+1);
|
|
BEGIN;
|
|
eval INSERT INTO gtid_data(idx, gtid, gtid_pos, file, pos, row_count)
|
|
VALUES ($i, '$gtid', '$gtid_pos', '$file', $pos, $row_count);
|
|
eval INSERT INTO t1 VALUES ($i + 1000, 0);
|
|
COMMIT;
|
|
--let $gtid= `SELECT @@last_gtid`
|
|
inc $i;
|
|
if ($i==$rotate_point) {
|
|
FLUSH NO_WRITE_TO_BINLOG BINARY LOGS;
|
|
}
|
|
}
|
|
--enable_query_log
|
|
SET gtid_domain_id= @orig_domain_id;
|
|
SET server_id= @orig_server_id;
|
|
|
|
SELECT COUNT(*) FROM gtid_data;
|
|
|
|
# Test that BINLOG_GTID_POS returns correct positions for every GTID position.
|
|
--echo *** The result should be empty, otherwise some result is wrong:
|
|
SELECT idx, gtid_pos, BINLOG_GTID_POS(file, pos)
|
|
FROM gtid_data
|
|
WHERE NOT gtid_eq(CONVERT(gtid_pos USING utf8),BINLOG_GTID_POS(file, pos))
|
|
ORDER BY idx;
|
|
|
|
# Prepare to rewind the slave to this point to test again on same binlog.
|
|
--connection slave
|
|
SET @orig_pos= @@GLOBAL.gtid_slave_pos;
|
|
SET @orig_t1_limit= (SELECT MAX(a) FROM t1);
|
|
|
|
--echo *** Now connect the slave to each position in turn, and test that
|
|
--echo *** the right amount of data is replicated at each point.
|
|
--let $old_silent= $keep_include_silent
|
|
--let $keep_include_silent= 1
|
|
--let $i= 0
|
|
--disable_query_log
|
|
while ($i < $NUM_POS) {
|
|
--connection master
|
|
--let $gtid_pos= `SELECT gtid_pos FROM gtid_data WHERE idx=$i`
|
|
--let $master_count= `SELECT row_count FROM gtid_data WHERE idx=$i`
|
|
--connection slave
|
|
--disable_result_log
|
|
eval START SLAVE UNTIL master_gtid_pos='$gtid_pos';
|
|
--enable_result_log
|
|
--let $res= `SELECT MASTER_GTID_WAIT('$gtid_pos')`
|
|
if ($res != 0) {
|
|
--die "FAIL: MASTER_GTID_WAIT($gtid_pos) returned $res, should have been 0"
|
|
}
|
|
--source include/wait_for_slave_to_stop.inc
|
|
--let $slave_count = `SELECT COUNT(*) FROM t1`
|
|
if ($master_count != $slave_count) {
|
|
SELECT * FROM gtid_data ORDER BY file, pos;
|
|
SELECT * FROM t1 ORDER BY a;
|
|
--die "Not all rows replicated. $master_count on master but $slave_count on slave."
|
|
}
|
|
--let $i= `SELECT $i + ceil($NUM_POS / $NUM_SLAVE_CONNECTS)`
|
|
}
|
|
--enable_query_log
|
|
|
|
--echo *** Test slave connecting to some GTID positions where the position in
|
|
--echo *** the master's binlog is different between the different domains.
|
|
--echo *** Revind the slave and test on the same binlog data from the master as before.
|
|
--connection slave
|
|
SET sql_log_bin= 0;
|
|
TRUNCATE gtid_data;
|
|
DELETE FROM t1 WHERE a > @orig_t1_limit;
|
|
SET sql_log_bin= 1;
|
|
SET GLOBAL gtid_slave_pos= @orig_pos;
|
|
|
|
--let $i= 0
|
|
--disable_query_log
|
|
while ($i <= $NUM_DOMAIN) {
|
|
# Build a GTID position from GTIDs that are picked at different locations
|
|
# in the gtid_data table for each domain.
|
|
--connection master
|
|
let $until_pos=`
|
|
SELECT GROUP_CONCAT(gtid SEPARATOR ',')
|
|
FROM gtid_data
|
|
WHERE idx IN (
|
|
SELECT MAX(gtid_data.idx) AS pick
|
|
FROM gtid_data
|
|
INNER JOIN rand_data ON (rand_data.idx = gtid_data.idx)
|
|
WHERE gtid_data.idx*$NUM_DOMAIN <= (domain_id + $i)*$NUM_POS
|
|
GROUP BY domain_id
|
|
)`;
|
|
--connection slave
|
|
--disable_result_log
|
|
eval START SLAVE UNTIL master_gtid_pos='$until_pos';
|
|
--enable_result_log
|
|
--let $res= `SELECT MASTER_GTID_WAIT('$until_pos')`
|
|
if ($res != 0) {
|
|
--die "FAIL: MASTER_GTID_WAIT($until_pos) returned $res, should have been 0"
|
|
}
|
|
--source include/wait_for_slave_to_stop.inc
|
|
|
|
inc $i;
|
|
}
|
|
--enable_query_log
|
|
--let $keep_include_silent= $old_silent
|
|
|
|
# Check that everything was replicated (nothing skipped).
|
|
# We have one less row on the slave since the last UNTIL is the one before
|
|
# the master inserted the last row.
|
|
--connection master
|
|
--let $master_count= `SELECT COUNT(*)-1 FROM t1`
|
|
--connection slave
|
|
--let $slave_count= `SELECT COUNT(*) FROM t1`
|
|
if ($master_count != $slave_count) {
|
|
SELECT * FROM gtid_data ORDER BY file, pos;
|
|
SELECT * FROM t1 ORDER BY a;
|
|
--die "Not all rows replicated. $master_count on master but $slave_count on slave."
|
|
}
|
|
|
|
--connection master
|
|
DROP TABLE gtid_data, rand_data;
|
|
--source include/save_master_gtid.inc
|
|
|
|
--connection slave
|
|
--source include/start_slave.inc
|
|
--source include/sync_with_master_gtid.inc
|
|
|
|
--connection master
|