mirror of
https://github.com/MariaDB/server.git
synced 2026-05-16 20:07:13 +02:00
MDEV-5834: Merge Kakao Defragmentation implementation to MariaDB 10.1
Merge https://github.com/kakao/mariadb-10.0 that contains Facebook's implementation for defragmentation facebook/mysql-5.6@a2d3a74 facebook/mysql-5.6@def96c8 facebook/mysql-5.6@9c67c5d facebook/mysql-5.6@921a81b facebook/mysql-5.6@aa519bd facebook/mysql-5.6@fea7d13 facebook/mysql-5.6@09b29d3 facebook/mysql-5.6@9284abb facebook/mysql-5.6@dbd623d facebook/mysql-5.6@aed55dc facebook/mysql-5.6@aad5c82 This version does not add new SQL-syntax and new handler API function. Instead optimize table is mapped to defragment table if innodb_defragment=ON, by default the feature is off. Contains changes authored by Sunguck Lee (Kakao).
This commit is contained in:
parent
e974b56438
commit
6dad23f04a
91 changed files with 5772 additions and 168 deletions
29
mysql-test/suite/innodb/r/innodb_defrag_binlog.result
Normal file
29
mysql-test/suite/innodb/r/innodb_defrag_binlog.result
Normal file
|
|
@ -0,0 +1,29 @@
|
|||
include/master-slave.inc
|
||||
[connection master]
|
||||
drop table if exists t1;
|
||||
create table t1(a int not null primary key auto_increment, b varchar(256), key second(b)) engine=innodb;
|
||||
insert into t1 values (1, REPEAT("a", 256));
|
||||
insert into t1 values (2, REPEAT("a", 256));
|
||||
optimize table t1;
|
||||
Table Op Msg_type Msg_text
|
||||
test.t1 optimize status OK
|
||||
drop table t1;
|
||||
show binlog events in 'master-bin.000001' from 313;
|
||||
Log_name Pos Event_type Server_id End_log_pos Info
|
||||
master-bin.000001 313 Gtid 1 351 GTID 0-1-1
|
||||
master-bin.000001 351 Query 1 465 use `test`; DROP TABLE IF EXISTS `t1`
|
||||
master-bin.000001 465 Gtid 1 503 GTID 0-1-2
|
||||
master-bin.000001 503 Query 1 669 use `test`; create table t1(a int not null primary key auto_increment, b varchar(256), key second(b)) engine=innodb
|
||||
master-bin.000001 669 Gtid 1 707 BEGIN GTID 0-1-3
|
||||
master-bin.000001 707 Table_map 1 751 table_id: 82 (test.t1)
|
||||
master-bin.000001 751 Write_rows_v1 1 1043 table_id: 82 flags: STMT_END_F
|
||||
master-bin.000001 1043 Xid 1 1070 COMMIT
|
||||
master-bin.000001 1070 Gtid 1 1108 BEGIN GTID 0-1-4
|
||||
master-bin.000001 1108 Table_map 1 1152 table_id: 82 (test.t1)
|
||||
master-bin.000001 1152 Write_rows_v1 1 1444 table_id: 82 flags: STMT_END_F
|
||||
master-bin.000001 1444 Xid 1 1471 COMMIT
|
||||
master-bin.000001 1471 Gtid 1 1509 GTID 0-1-5
|
||||
master-bin.000001 1509 Query 1 1589 use `test`; optimize table t1
|
||||
master-bin.000001 1589 Gtid 1 1627 GTID 0-1-6
|
||||
master-bin.000001 1627 Query 1 1731 use `test`; DROP TABLE `t1`
|
||||
include/rpl_end.inc
|
||||
73
mysql-test/suite/innodb/r/innodb_defrag_concurrent.result
Normal file
73
mysql-test/suite/innodb/r/innodb_defrag_concurrent.result
Normal file
|
|
@ -0,0 +1,73 @@
|
|||
DROP TABLE if exists t1;
|
||||
select @@global.innodb_stats_persistent;
|
||||
@@global.innodb_stats_persistent
|
||||
0
|
||||
set global innodb_defragment_stats_accuracy = 80;
|
||||
CREATE TABLE t1 (a INT NOT NULL PRIMARY KEY AUTO_INCREMENT, b VARCHAR(256), c INT, KEY second(a, b),KEY third(c)) ENGINE=INNODB;
|
||||
SET @@global.innodb_defragment_n_pages = 20;
|
||||
after populate PRIMARY
|
||||
select count(*) from t1;
|
||||
count(*)
|
||||
20000
|
||||
after populate second
|
||||
select count(*) from t1 force index (second);
|
||||
count(*)
|
||||
20000
|
||||
after populate third
|
||||
select count(*) from t1 force index (third);
|
||||
count(*)
|
||||
20000
|
||||
select count(*) from t1;
|
||||
count(*)
|
||||
15800
|
||||
after delete PRIMAY
|
||||
select count(*) from t1 force index (second);
|
||||
count(*)
|
||||
15800
|
||||
after delete second
|
||||
select count(*) from t1 force index (third);
|
||||
count(*)
|
||||
15800
|
||||
after delete third
|
||||
select count(stat_value) > 0 from mysql.innodb_index_stats where table_name like '%t1%' and stat_name in ('n_pages_freed');
|
||||
count(stat_value) > 0
|
||||
0
|
||||
select count(stat_value) > 0 from mysql.innodb_index_stats where table_name like '%t1%' and stat_name in ('n_page_split');
|
||||
count(stat_value) > 0
|
||||
1
|
||||
select count(stat_value) > 0 from mysql.innodb_index_stats where table_name like '%t1%' and stat_name in ('n_leaf_pages_defrag');
|
||||
count(stat_value) > 0
|
||||
1
|
||||
optimize table t1;;
|
||||
INSERT INTO t1 VALUES (400000, REPEAT('A', 256),300000);;
|
||||
INSERT INTO t1 VALUES (500000, REPEAT('A', 256),400000);;
|
||||
DELETE FROM t1 where a between 1 and 100;;
|
||||
UPDATE t1 SET c = c + 1 where c between 2000 and 8000;;
|
||||
optimize table t1;
|
||||
Table Op Msg_type Msg_text
|
||||
test.t1 optimize status OK
|
||||
select sleep(5);
|
||||
sleep(5)
|
||||
0
|
||||
select count(*) from t1;
|
||||
count(*)
|
||||
15723
|
||||
after optimize PRIMARY
|
||||
select count(*) from t1 force index (second);
|
||||
count(*)
|
||||
15723
|
||||
after optimize second
|
||||
select count(*) from t1 force index (third);
|
||||
count(*)
|
||||
15723
|
||||
after optimize third
|
||||
select count(stat_value) > 0 from mysql.innodb_index_stats where table_name like '%t1%' and stat_name in ('n_pages_freed');
|
||||
count(stat_value) > 0
|
||||
1
|
||||
select count(stat_value) > 0 from mysql.innodb_index_stats where table_name like '%t1%' and stat_name in ('n_page_split');
|
||||
count(stat_value) > 0
|
||||
1
|
||||
select count(stat_value) > 0 from mysql.innodb_index_stats where table_name like '%t1%' and stat_name in ('n_leaf_pages_defrag');
|
||||
count(stat_value) > 0
|
||||
1
|
||||
drop table t1;
|
||||
94
mysql-test/suite/innodb/r/innodb_defrag_stats.result
Normal file
94
mysql-test/suite/innodb/r/innodb_defrag_stats.result
Normal file
|
|
@ -0,0 +1,94 @@
|
|||
DROP TABLE if exists t1;
|
||||
select @@global.innodb_stats_persistent;
|
||||
@@global.innodb_stats_persistent
|
||||
0
|
||||
set global innodb_defragment_stats_accuracy = 20;
|
||||
# Create table.
|
||||
CREATE TABLE t1 (a INT NOT NULL PRIMARY KEY AUTO_INCREMENT, b VARCHAR(256), KEY SECOND(a, b)) ENGINE=INNODB;
|
||||
# Populate data
|
||||
INSERT INTO t1 VALUES(1, REPEAT('A', 256));
|
||||
INSERT INTO t1 (b) SELECT b from t1;
|
||||
INSERT INTO t1 (b) SELECT b from t1;
|
||||
INSERT INTO t1 (b) SELECT b from t1;
|
||||
INSERT INTO t1 (b) SELECT b from t1;
|
||||
INSERT INTO t1 (b) SELECT b from t1;
|
||||
INSERT INTO t1 (b) SELECT b from t1;
|
||||
INSERT INTO t1 (b) SELECT b from t1;
|
||||
INSERT INTO t1 (b) SELECT b from t1;
|
||||
INSERT INTO t1 (b) SELECT b from t1;
|
||||
INSERT INTO t1 (b) SELECT b from t1;
|
||||
# Not enough page splits to trigger persistent stats write yet.
|
||||
select count(*) from mysql.innodb_index_stats where table_name like '%t1%' and stat_name in ('n_page_split', 'n_leaf_pages_defrag');
|
||||
count(*)
|
||||
0
|
||||
INSERT INTO t1 (b) SELECT b from t1;
|
||||
# Persistent stats recorded.
|
||||
select count(stat_value) > 0 from mysql.innodb_index_stats where table_name like '%t1%' and stat_name in ('n_page_split', 'n_leaf_pages_defrag');
|
||||
count(stat_value) > 0
|
||||
0
|
||||
# Delete some rows.
|
||||
delete from t1 where a between 100 * 20 and 100 * 20 + 30;
|
||||
delete from t1 where a between 100 * 19 and 100 * 19 + 30;
|
||||
delete from t1 where a between 100 * 18 and 100 * 18 + 30;
|
||||
delete from t1 where a between 100 * 17 and 100 * 17 + 30;
|
||||
delete from t1 where a between 100 * 16 and 100 * 16 + 30;
|
||||
delete from t1 where a between 100 * 15 and 100 * 15 + 30;
|
||||
delete from t1 where a between 100 * 14 and 100 * 14 + 30;
|
||||
delete from t1 where a between 100 * 13 and 100 * 13 + 30;
|
||||
delete from t1 where a between 100 * 12 and 100 * 12 + 30;
|
||||
delete from t1 where a between 100 * 11 and 100 * 11 + 30;
|
||||
delete from t1 where a between 100 * 10 and 100 * 10 + 30;
|
||||
delete from t1 where a between 100 * 9 and 100 * 9 + 30;
|
||||
delete from t1 where a between 100 * 8 and 100 * 8 + 30;
|
||||
delete from t1 where a between 100 * 7 and 100 * 7 + 30;
|
||||
delete from t1 where a between 100 * 6 and 100 * 6 + 30;
|
||||
delete from t1 where a between 100 * 5 and 100 * 5 + 30;
|
||||
delete from t1 where a between 100 * 4 and 100 * 4 + 30;
|
||||
delete from t1 where a between 100 * 3 and 100 * 3 + 30;
|
||||
delete from t1 where a between 100 * 2 and 100 * 2 + 30;
|
||||
delete from t1 where a between 100 * 1 and 100 * 1 + 30;
|
||||
# Server Restarted
|
||||
# Confirm persistent stats still there after restart.
|
||||
select count(stat_value) > 0 from mysql.innodb_index_stats where table_name like '%t1%' and stat_name in ('n_page_split', 'n_leaf_pages_defrag');
|
||||
count(stat_value) > 0
|
||||
0
|
||||
optimize table t1;
|
||||
Table Op Msg_type Msg_text
|
||||
test.t1 optimize status OK
|
||||
# n_page_split should be 0 after defragmentation, n_pages_freed should be non-zero.
|
||||
select stat_value = 0 from mysql.innodb_index_stats where table_name like '%t1%' and stat_name = 'n_page_split';
|
||||
stat_value = 0
|
||||
1
|
||||
1
|
||||
select count(stat_value) > 0 from mysql.innodb_index_stats where table_name like '%t1%' and stat_name in ('n_pages_freed', 'n_leaf_pages_defrag');
|
||||
count(stat_value) > 0
|
||||
1
|
||||
set global innodb_defragment_stats_accuracy = 40;
|
||||
INSERT INTO t1 (b) SELECT b from t1;
|
||||
# Not enough operation to trigger persistent stats write
|
||||
select stat_value = 0 from mysql.innodb_index_stats where table_name like '%t1%' and stat_name = 'n_page_split';
|
||||
stat_value = 0
|
||||
1
|
||||
1
|
||||
INSERT INTO t1 (b) SELECT b from t1;
|
||||
# Persistent stats write triggered
|
||||
select stat_value > 0 from mysql.innodb_index_stats where table_name like '%t1%' and stat_name = 'n_page_split';
|
||||
stat_value > 0
|
||||
0
|
||||
0
|
||||
# Table rename should cause stats rename.
|
||||
rename table t1 to t2;
|
||||
select count(stat_value) > 0 from mysql.innodb_index_stats where table_name like '%t2%' and stat_name in ('n_pages_freed', 'n_page_split', 'n_leaf_pages_defrag');
|
||||
count(stat_value) > 0
|
||||
1
|
||||
# Drop index should cause stats drop.
|
||||
drop index SECOND on t2;
|
||||
select count(*) from mysql.innodb_index_stats where table_name like '%t2%' and index_name = 'SECOND';
|
||||
count(*)
|
||||
4
|
||||
Server Restarted
|
||||
select count(stat_value) > 0 from mysql.innodb_index_stats where table_name like '%t2%' and stat_name in ('n_pages_freed', 'n_page_split', 'n_leaf_pages_defrag');
|
||||
count(stat_value) > 0
|
||||
1
|
||||
# Clean up
|
||||
DROP TABLE t2;
|
||||
|
|
@ -0,0 +1,36 @@
|
|||
DROP TABLE if exists t1;
|
||||
SET @start_table_definition_cache = @@global.table_definition_cache;
|
||||
SET @@global.table_definition_cache = 400;
|
||||
SET @start_innodb_defragment_stats_accuracy = @@global.innodb_defragment_stats_accuracy;
|
||||
SET @@global.innodb_defragment_stats_accuracy = 10;
|
||||
CREATE TABLE t1 (a INT NOT NULL PRIMARY KEY AUTO_INCREMENT, b VARCHAR(256), KEY SECOND(a, b)) ENGINE=INNODB;
|
||||
INSERT INTO t1 VALUES(1, REPEAT('A', 256));
|
||||
INSERT INTO t1 (b) SELECT b from t1;
|
||||
INSERT INTO t1 (b) SELECT b from t1;
|
||||
INSERT INTO t1 (b) SELECT b from t1;
|
||||
INSERT INTO t1 (b) SELECT b from t1;
|
||||
INSERT INTO t1 (b) SELECT b from t1;
|
||||
INSERT INTO t1 (b) SELECT b from t1;
|
||||
INSERT INTO t1 (b) SELECT b from t1;
|
||||
INSERT INTO t1 (b) SELECT b from t1;
|
||||
INSERT INTO t1 (b) SELECT b from t1;
|
||||
INSERT INTO t1 (b) SELECT b from t1;
|
||||
INSERT INTO t1 (b) SELECT b from t1;
|
||||
select stat_value > 0 from mysql.innodb_index_stats where table_name like '%t1%' and stat_name = 'n_page_split';
|
||||
stat_value > 0
|
||||
Create 405 table to overflow the table cache.
|
||||
Sleep for a while to make sure t1 is evicted.
|
||||
select sleep(10);
|
||||
sleep(10)
|
||||
0
|
||||
Reload t1 to get defrag stats from persistent storage
|
||||
INSERT INTO t1 (b) SELECT b from t1;
|
||||
make sure the stats thread will wake up and do the write even if there's a race condition between set and reset.
|
||||
select sleep(12);
|
||||
sleep(12)
|
||||
0
|
||||
select stat_value > 0 from mysql.innodb_index_stats where table_name like '%t1%' and stat_name = 'n_page_split';
|
||||
stat_value > 0
|
||||
SET @@global.innodb_defragment_stats_accuracy = @start_innodb_defragment_stats_accuracy;
|
||||
SET @@global.table_definition_cache = @start_table_definition_cache;
|
||||
DROP TABLE t1;
|
||||
81
mysql-test/suite/innodb/r/innodb_defragment.result
Normal file
81
mysql-test/suite/innodb/r/innodb_defragment.result
Normal file
|
|
@ -0,0 +1,81 @@
|
|||
DROP TABLE if exists t1;
|
||||
set global innodb_defragment_stats_accuracy = 80;
|
||||
CREATE TABLE t1 (a INT NOT NULL PRIMARY KEY AUTO_INCREMENT, b VARCHAR(256), KEY SECOND(a, b)) ENGINE=INNODB;
|
||||
optimize table t1;
|
||||
Table Op Msg_type Msg_text
|
||||
test.t1 optimize status OK
|
||||
INSERT INTO t1 VALUES (100000, REPEAT('A', 256));
|
||||
INSERT INTO t1 VALUES (200000, REPEAT('A', 256));
|
||||
INSERT INTO t1 VALUES (300000, REPEAT('A', 256));
|
||||
INSERT INTO t1 VALUES (400000, REPEAT('A', 256));
|
||||
optimize table t1;
|
||||
Table Op Msg_type Msg_text
|
||||
test.t1 optimize status OK
|
||||
create procedure defragment()
|
||||
begin
|
||||
set @i = 0;
|
||||
repeat
|
||||
set @i = @i + 1;
|
||||
optimize table t1;
|
||||
select sleep(5);
|
||||
until @i = 3 end repeat;
|
||||
end //
|
||||
select count(stat_value) = 0 from mysql.innodb_index_stats where table_name like '%t1%' and stat_name in ('n_pages_freed');
|
||||
count(stat_value) = 0
|
||||
1
|
||||
select count(stat_value) > 0 from mysql.innodb_index_stats where table_name like '%t1%' and stat_name in ('n_page_split');
|
||||
count(stat_value) > 0
|
||||
1
|
||||
select count(stat_value) > 0 from mysql.innodb_index_stats where table_name like '%t1%' and stat_name in ('n_leaf_pages_defrag');
|
||||
count(stat_value) > 0
|
||||
1
|
||||
select count(*) from t1;
|
||||
count(*)
|
||||
10004
|
||||
select count(*) from t1 force index (second);
|
||||
count(*)
|
||||
10004
|
||||
call defragment();
|
||||
optimize table t1;
|
||||
Table Op Msg_type Msg_text
|
||||
test.t1 optimize status OK
|
||||
select sleep(5);
|
||||
sleep(5)
|
||||
0
|
||||
select count(*) from t1;
|
||||
count(*)
|
||||
7904
|
||||
select count(stat_value) > 0 from mysql.innodb_index_stats where table_name like '%t2%' and stat_name in ('n_pages_freed', 'n_page_split', 'n_leaf_pages_defrag');
|
||||
count(stat_value) > 0
|
||||
0
|
||||
select count(*) from t1 force index (second);
|
||||
count(*)
|
||||
7904
|
||||
SET @@global.innodb_defragment_n_pages = 3;
|
||||
optimize table t1;
|
||||
Table Op Msg_type Msg_text
|
||||
test.t1 optimize status OK
|
||||
select count(stat_value) > 0 from mysql.innodb_index_stats where table_name like '%t2%' and stat_name in ('n_pages_freed', 'n_page_split', 'n_leaf_pages_defrag');
|
||||
count(stat_value) > 0
|
||||
0
|
||||
select count(*) from t1;
|
||||
count(*)
|
||||
6904
|
||||
select count(*) from t1 force index (second);
|
||||
count(*)
|
||||
6904
|
||||
SET @@global.innodb_defragment_n_pages = 10;
|
||||
optimize table t1;
|
||||
Table Op Msg_type Msg_text
|
||||
test.t1 optimize status OK
|
||||
select count(stat_value) > 0 from mysql.innodb_index_stats where table_name like '%t2%' and stat_name in ('n_pages_freed', 'n_page_split', 'n_leaf_pages_defrag');
|
||||
count(stat_value) > 0
|
||||
0
|
||||
select count(*) from t1;
|
||||
count(*)
|
||||
6904
|
||||
select count(*) from t1 force index (second);
|
||||
count(*)
|
||||
6904
|
||||
DROP PROCEDURE defragment;
|
||||
DROP TABLE t1;
|
||||
|
|
@ -0,0 +1,59 @@
|
|||
DROP TABLE if exists t1;
|
||||
DROP TABLE if exists t2;
|
||||
Testing tables with large records
|
||||
CREATE TABLE t1 (a INT NOT NULL PRIMARY KEY AUTO_INCREMENT, b VARCHAR(256), KEY SECOND(a, b)) ENGINE=INNODB;
|
||||
optimize table t1;
|
||||
Table Op Msg_type Msg_text
|
||||
test.t1 optimize status OK
|
||||
select count(*) from t1;
|
||||
count(*)
|
||||
790
|
||||
select count(*) from t1 force index (second);
|
||||
count(*)
|
||||
790
|
||||
# A few more insertions on the page should not cause a page split.
|
||||
insert into t1 values (81, REPEAT('A', 256));
|
||||
insert into t1 values (83, REPEAT('A', 256));
|
||||
insert into t1 values (87, REPEAT('A', 256));
|
||||
insert into t1 values (82, REPEAT('A', 256));
|
||||
insert into t1 values (86, REPEAT('A', 256));
|
||||
# More insertions will cause page splits
|
||||
insert into t1 values (88, REPEAT('A', 50));
|
||||
Too much space are reserved on primary index.
|
||||
Too much space are reserved on second index.
|
||||
DROP TABLE t1;
|
||||
Testing table with small records
|
||||
CREATE TABLE t2 (a INT NOT NULL PRIMARY KEY AUTO_INCREMENT, b VARchar(16), KEY SECOND(a,b)) ENGINE=INNODB;
|
||||
optimize table t2;
|
||||
Table Op Msg_type Msg_text
|
||||
test.t2 optimize status OK
|
||||
select count(*) from t2 force index(second);
|
||||
count(*)
|
||||
3701
|
||||
The page should have room for about 20 insertions
|
||||
insert into t2 values(1181, REPEAT('A', 16));
|
||||
insert into t2 values(1191, REPEAT('A', 16));
|
||||
insert into t2 values(1182, REPEAT('A', 16));
|
||||
insert into t2 values(1192, REPEAT('A', 16));
|
||||
insert into t2 values(1183, REPEAT('A', 16));
|
||||
insert into t2 values(1193, REPEAT('A', 16));
|
||||
insert into t2 values(1184, REPEAT('A', 16));
|
||||
insert into t2 values(1194, REPEAT('A', 16));
|
||||
insert into t2 values(1185, REPEAT('A', 16));
|
||||
insert into t2 values(1195, REPEAT('A', 16));
|
||||
insert into t2 values(1186, REPEAT('A', 16));
|
||||
insert into t2 values(1196, REPEAT('A', 16));
|
||||
insert into t2 values(1187, REPEAT('A', 16));
|
||||
insert into t2 values(1197, REPEAT('A', 16));
|
||||
insert into t2 values(1188, REPEAT('A', 16));
|
||||
insert into t2 values(1198, REPEAT('A', 16));
|
||||
insert into t2 values(1189, REPEAT('A', 16));
|
||||
insert into t2 values(1199, REPEAT('A', 16));
|
||||
insert into t2 values(1190, REPEAT('A', 16));
|
||||
insert into t2 values(1180, REPEAT('A', 16));
|
||||
More insertions will cause page split.
|
||||
insert into t2 values(1280, REPEAT('A', 16));
|
||||
insert into t2 values(1290, REPEAT('A', 16));
|
||||
insert into t2 values(1281, REPEAT('A', 16));
|
||||
insert into t2 values(1291, REPEAT('A', 16));
|
||||
DROP TABLE t2;
|
||||
1
mysql-test/suite/innodb/t/innodb.opt
Normal file
1
mysql-test/suite/innodb/t/innodb.opt
Normal file
|
|
@ -0,0 +1 @@
|
|||
--innodb-defragment=0
|
||||
5
mysql-test/suite/innodb/t/innodb_defrag_binlog.opt
Normal file
5
mysql-test/suite/innodb/t/innodb_defrag_binlog.opt
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
--loose-innodb-buffer-pool-stats
|
||||
--loose-innodb-buffer-page
|
||||
--loose-innodb-buffer-page-lru
|
||||
--binlog-format=row
|
||||
--innodb-defragment=1
|
||||
19
mysql-test/suite/innodb/t/innodb_defrag_binlog.test
Normal file
19
mysql-test/suite/innodb/t/innodb_defrag_binlog.test
Normal file
|
|
@ -0,0 +1,19 @@
|
|||
--source include/have_innodb.inc
|
||||
--source include/master-slave.inc
|
||||
|
||||
--disable_warnings
|
||||
drop table if exists t1;
|
||||
--enable_warnings
|
||||
|
||||
create table t1(a int not null primary key auto_increment, b varchar(256), key second(b)) engine=innodb;
|
||||
|
||||
insert into t1 values (1, REPEAT("a", 256));
|
||||
insert into t1 values (2, REPEAT("a", 256));
|
||||
optimize table t1;
|
||||
|
||||
drop table t1;
|
||||
|
||||
--replace_regex /\/\*.*//
|
||||
show binlog events in 'master-bin.000001' from 313;
|
||||
|
||||
--source include/rpl_end.inc
|
||||
4
mysql-test/suite/innodb/t/innodb_defrag_concurrent.opt
Normal file
4
mysql-test/suite/innodb/t/innodb_defrag_concurrent.opt
Normal file
|
|
@ -0,0 +1,4 @@
|
|||
--loose-innodb-buffer-pool-stats
|
||||
--loose-innodb-buffer-page
|
||||
--loose-innodb-buffer-page-lru
|
||||
--innodb-defragment=1
|
||||
180
mysql-test/suite/innodb/t/innodb_defrag_concurrent.test
Normal file
180
mysql-test/suite/innodb/t/innodb_defrag_concurrent.test
Normal file
|
|
@ -0,0 +1,180 @@
|
|||
--source include/have_innodb.inc
|
||||
|
||||
--disable_warnings
|
||||
DROP TABLE if exists t1;
|
||||
--enable_warnings
|
||||
|
||||
--disable_query_log
|
||||
let $innodb_defragment_n_pages_orig=`select @@innodb_defragment_n_pages`;
|
||||
let $innodb_defragment_stats_accuracy_orig=`select @@innodb_defragment_stats_accuracy`;
|
||||
--enable_query_log
|
||||
|
||||
select @@global.innodb_stats_persistent;
|
||||
set global innodb_defragment_stats_accuracy = 80;
|
||||
|
||||
# Create table.
|
||||
CREATE TABLE t1 (a INT NOT NULL PRIMARY KEY AUTO_INCREMENT, b VARCHAR(256), c INT, KEY second(a, b),KEY third(c)) ENGINE=INNODB;
|
||||
|
||||
connect (con1,localhost,root,,test,$MASTER_MYPORT,$MASTER_MYSOCK);
|
||||
connect (con2,localhost,root,,test,$MASTER_MYPORT,$MASTER_MYSOCK);
|
||||
connect (con3,localhost,root,,test,$MASTER_MYPORT,$MASTER_MYSOCK);
|
||||
connect (con4,localhost,root,,test,$MASTER_MYPORT,$MASTER_MYSOCK);
|
||||
|
||||
connection default;
|
||||
|
||||
SET @@global.innodb_defragment_n_pages = 20;
|
||||
|
||||
let $data_size = 20000;
|
||||
let $delete_size = 2000;
|
||||
|
||||
# Populate table.
|
||||
let $i = $data_size;
|
||||
--disable_query_log
|
||||
while ($i)
|
||||
{
|
||||
eval
|
||||
INSERT INTO t1 VALUES ($data_size + 1 - $i, REPEAT('A', 256), $i);
|
||||
dec $i;
|
||||
}
|
||||
--enable_query_log
|
||||
|
||||
--echo after populate PRIMARY
|
||||
select count(*) from t1;
|
||||
|
||||
if (`select count(*) < 30 from information_schema.innodb_buffer_page where table_name like '%t1%' and index_name = 'PRIMARY' order by page_number;`)
|
||||
{
|
||||
aelect count(*) from information_schema.innodb_buffer_page where table_name like '%t1%' and index_name = 'PRIMARY' order by page_number;
|
||||
}
|
||||
|
||||
--echo after populate second
|
||||
select count(*) from t1 force index (second);
|
||||
|
||||
if (`select count(*) < 320 from information_schema.innodb_buffer_page where table_name like '%t1%' and index_name = 'second' order by page_number;`)
|
||||
{
|
||||
select count(*) from information_schema.innodb_buffer_page where table_name like '%t1%' and index_name = 'second' order by page_number;
|
||||
}
|
||||
|
||||
--ECHO after populate third
|
||||
select count(*) from t1 force index (third);
|
||||
|
||||
if (`select count(*) < 20 from information_schema.innodb_buffer_page where table_name like '%t1%' and index_name = 'third' order by page_number;`)
|
||||
{
|
||||
select count(*) from information_schema.innodb_buffer_page where table_name like '%t1%' and index_name = 'third' order by page_number;
|
||||
}
|
||||
|
||||
# Delete some data
|
||||
--disable_query_log
|
||||
let $size = $delete_size;
|
||||
while ($size)
|
||||
{
|
||||
let $j = 100 * $size;
|
||||
eval delete from t1 where a between $j - 20 and $j;
|
||||
dec $size;
|
||||
}
|
||||
--enable_query_log
|
||||
|
||||
select count(*) from t1;
|
||||
|
||||
--echo after delete PRIMAY
|
||||
if (`select count(*) < 30 from information_schema.innodb_buffer_page where table_name like '%t1%' and index_name = 'PRIMARY' order by page_number;`)
|
||||
{
|
||||
select count(*) from information_schema.innodb_buffer_page where table_name like '%t1%' and index_name = 'PRIMARY' order by page_number;
|
||||
}
|
||||
|
||||
select count(*) from t1 force index (second);
|
||||
|
||||
--echo after delete second
|
||||
if (`select count(*) < 300 from information_schema.innodb_buffer_page where table_name like '%t1%' and index_name = 'second' order by page_number;`)
|
||||
{
|
||||
select count(*) from information_schema.innodb_buffer_page where table_name like '%t1%' and index_name = 'second' order by page_number;
|
||||
}
|
||||
|
||||
select count(*) from t1 force index (third);
|
||||
|
||||
--echo after delete third
|
||||
if (`select count(*) > 20 from information_schema.innodb_buffer_page where table_name like '%t1%' and index_name = 'third' order by page_number;`)
|
||||
{
|
||||
select count(*) from information_schema.innodb_buffer_page where table_name like '%t1%' and index_name = 'third' order by page_number;
|
||||
}
|
||||
|
||||
# Above delete will free some pages and insert causes page split and these could cause defrag
|
||||
select count(stat_value) > 0 from mysql.innodb_index_stats where table_name like '%t1%' and stat_name in ('n_pages_freed');
|
||||
select count(stat_value) > 0 from mysql.innodb_index_stats where table_name like '%t1%' and stat_name in ('n_page_split');
|
||||
select count(stat_value) > 0 from mysql.innodb_index_stats where table_name like '%t1%' and stat_name in ('n_leaf_pages_defrag');
|
||||
|
||||
connection con1;
|
||||
--send optimize table t1;
|
||||
|
||||
connection default;
|
||||
--send INSERT INTO t1 VALUES (400000, REPEAT('A', 256),300000);
|
||||
|
||||
connection con2;
|
||||
--send INSERT INTO t1 VALUES (500000, REPEAT('A', 256),400000);
|
||||
|
||||
connection con3;
|
||||
--send DELETE FROM t1 where a between 1 and 100;
|
||||
|
||||
connection con4;
|
||||
--send UPDATE t1 SET c = c + 1 where c between 2000 and 8000;
|
||||
|
||||
connection con1;
|
||||
--disable_result_log
|
||||
--reap
|
||||
--enable_result_log
|
||||
|
||||
connection con2;
|
||||
--reap
|
||||
|
||||
connection con3;
|
||||
--reap
|
||||
|
||||
connection con4;
|
||||
--reap
|
||||
|
||||
connection default;
|
||||
--reap
|
||||
|
||||
disconnect con1;
|
||||
disconnect con2;
|
||||
disconnect con3;
|
||||
disconnect con4;
|
||||
|
||||
optimize table t1;
|
||||
select sleep(5);
|
||||
|
||||
select count(*) from t1;
|
||||
|
||||
--echo after optimize PRIMARY
|
||||
if (`select count(*) > 62 from information_schema.innodb_buffer_page where table_name like '%t1%' and index_name = 'PRIMARY' order by page_number;`)
|
||||
{
|
||||
select count(*) from information_schema.innodb_buffer_page where table_name like '%t1%' and index_name = 'PRIMARY' order by page_number;
|
||||
}
|
||||
|
||||
select count(*) from t1 force index (second);
|
||||
|
||||
--echo after optimize second
|
||||
if (`select count(*) > 340 from information_schema.innodb_buffer_page where table_name like '%t1%' and index_name = 'second' order by page_number;`)
|
||||
{
|
||||
select count(*) from information_schema.innodb_buffer_page where table_name like '%t1%' and index_name = 'second' order by page_number;
|
||||
}
|
||||
|
||||
select count(*) from t1 force index (third);
|
||||
|
||||
--echo after optimize third
|
||||
if (`select count(*) > 25 from information_schema.innodb_buffer_page where table_name like '%t1%' and index_name = 'third' order by page_number;`)
|
||||
{
|
||||
select count(*) from information_schema.innodb_buffer_page where table_name like '%t1%' and index_name = 'third' order by page_number;
|
||||
}
|
||||
|
||||
# Now pages are freed
|
||||
select count(stat_value) > 0 from mysql.innodb_index_stats where table_name like '%t1%' and stat_name in ('n_pages_freed');
|
||||
select count(stat_value) > 0 from mysql.innodb_index_stats where table_name like '%t1%' and stat_name in ('n_page_split');
|
||||
select count(stat_value) > 0 from mysql.innodb_index_stats where table_name like '%t1%' and stat_name in ('n_leaf_pages_defrag');
|
||||
|
||||
drop table t1;
|
||||
|
||||
# reset system
|
||||
--disable_query_log
|
||||
EVAL SET GLOBAL innodb_defragment_n_pages = $innodb_defragment_n_pages_orig;
|
||||
EVAL SET GLOBAL innodb_defragment_stats_accuracy = $innodb_defragment_stats_accuracy_orig;
|
||||
--enable_query_log
|
||||
1
mysql-test/suite/innodb/t/innodb_defrag_stats.opt
Normal file
1
mysql-test/suite/innodb/t/innodb_defrag_stats.opt
Normal file
|
|
@ -0,0 +1 @@
|
|||
--innodb-defragment=1
|
||||
87
mysql-test/suite/innodb/t/innodb_defrag_stats.test
Normal file
87
mysql-test/suite/innodb/t/innodb_defrag_stats.test
Normal file
|
|
@ -0,0 +1,87 @@
|
|||
--source include/have_innodb.inc
|
||||
--source include/big_test.inc
|
||||
|
||||
--disable_warnings
|
||||
DROP TABLE if exists t1;
|
||||
--enable_warnings
|
||||
|
||||
--disable_query_log
|
||||
let $innodb_defragment_stats_accuracy_orig=`select @@innodb_defragment_stats_accuracy`;
|
||||
--enable_query_log
|
||||
|
||||
select @@global.innodb_stats_persistent;
|
||||
set global innodb_defragment_stats_accuracy = 20;
|
||||
|
||||
--echo # Create table.
|
||||
CREATE TABLE t1 (a INT NOT NULL PRIMARY KEY AUTO_INCREMENT, b VARCHAR(256), KEY SECOND(a, b)) ENGINE=INNODB;
|
||||
|
||||
--echo # Populate data
|
||||
INSERT INTO t1 VALUES(1, REPEAT('A', 256));
|
||||
INSERT INTO t1 (b) SELECT b from t1;
|
||||
INSERT INTO t1 (b) SELECT b from t1;
|
||||
INSERT INTO t1 (b) SELECT b from t1;
|
||||
INSERT INTO t1 (b) SELECT b from t1;
|
||||
INSERT INTO t1 (b) SELECT b from t1;
|
||||
INSERT INTO t1 (b) SELECT b from t1;
|
||||
INSERT INTO t1 (b) SELECT b from t1;
|
||||
INSERT INTO t1 (b) SELECT b from t1;
|
||||
INSERT INTO t1 (b) SELECT b from t1;
|
||||
INSERT INTO t1 (b) SELECT b from t1;
|
||||
|
||||
--echo # Not enough page splits to trigger persistent stats write yet.
|
||||
select count(*) from mysql.innodb_index_stats where table_name like '%t1%' and stat_name in ('n_page_split', 'n_leaf_pages_defrag');
|
||||
|
||||
INSERT INTO t1 (b) SELECT b from t1;
|
||||
|
||||
--echo # Persistent stats recorded.
|
||||
select count(stat_value) > 0 from mysql.innodb_index_stats where table_name like '%t1%' and stat_name in ('n_page_split', 'n_leaf_pages_defrag');
|
||||
|
||||
--echo # Delete some rows.
|
||||
let $num_delete = 20;
|
||||
while ($num_delete)
|
||||
{
|
||||
let $j = 100 * $num_delete;
|
||||
eval delete from t1 where a between $j and $j + 30;
|
||||
dec $num_delete;
|
||||
}
|
||||
|
||||
--source include/restart_mysqld.inc
|
||||
--echo # Server Restarted
|
||||
|
||||
--echo # Confirm persistent stats still there after restart.
|
||||
select count(stat_value) > 0 from mysql.innodb_index_stats where table_name like '%t1%' and stat_name in ('n_page_split', 'n_leaf_pages_defrag');
|
||||
|
||||
optimize table t1;
|
||||
--echo # n_page_split should be 0 after defragmentation, n_pages_freed should be non-zero.
|
||||
select stat_value = 0 from mysql.innodb_index_stats where table_name like '%t1%' and stat_name = 'n_page_split';
|
||||
select count(stat_value) > 0 from mysql.innodb_index_stats where table_name like '%t1%' and stat_name in ('n_pages_freed', 'n_leaf_pages_defrag');
|
||||
|
||||
set global innodb_defragment_stats_accuracy = 40;
|
||||
|
||||
INSERT INTO t1 (b) SELECT b from t1;
|
||||
--echo # Not enough operation to trigger persistent stats write
|
||||
select stat_value = 0 from mysql.innodb_index_stats where table_name like '%t1%' and stat_name = 'n_page_split';
|
||||
|
||||
INSERT INTO t1 (b) SELECT b from t1;
|
||||
--echo # Persistent stats write triggered
|
||||
select stat_value > 0 from mysql.innodb_index_stats where table_name like '%t1%' and stat_name = 'n_page_split';
|
||||
|
||||
--echo # Table rename should cause stats rename.
|
||||
rename table t1 to t2;
|
||||
select count(stat_value) > 0 from mysql.innodb_index_stats where table_name like '%t2%' and stat_name in ('n_pages_freed', 'n_page_split', 'n_leaf_pages_defrag');
|
||||
|
||||
--echo # Drop index should cause stats drop.
|
||||
drop index SECOND on t2;
|
||||
select count(*) from mysql.innodb_index_stats where table_name like '%t2%' and index_name = 'SECOND';
|
||||
|
||||
--source include/restart_mysqld.inc
|
||||
--echo Server Restarted
|
||||
|
||||
select count(stat_value) > 0 from mysql.innodb_index_stats where table_name like '%t2%' and stat_name in ('n_pages_freed', 'n_page_split', 'n_leaf_pages_defrag');
|
||||
|
||||
--echo # Clean up
|
||||
DROP TABLE t2;
|
||||
|
||||
--disable_query_log
|
||||
EVAL SET GLOBAL innodb_defragment_stats_accuracy = $innodb_defragment_stats_accuracy_orig;
|
||||
--enable_query_log
|
||||
|
|
@ -0,0 +1 @@
|
|||
--innodb-defragment=1
|
||||
|
|
@ -0,0 +1,71 @@
|
|||
--source include/have_innodb.inc
|
||||
--source include/big_test.inc
|
||||
|
||||
--disable_warnings
|
||||
DROP TABLE if exists t1;
|
||||
--enable_warnings
|
||||
|
||||
let $num_tables = 405;
|
||||
|
||||
SET @start_table_definition_cache = @@global.table_definition_cache;
|
||||
SET @@global.table_definition_cache = 400;
|
||||
|
||||
# set stats accuracy to be pretty high so stats sync is easily triggered.
|
||||
SET @start_innodb_defragment_stats_accuracy = @@global.innodb_defragment_stats_accuracy;
|
||||
SET @@global.innodb_defragment_stats_accuracy = 10;
|
||||
|
||||
# Create table.
|
||||
CREATE TABLE t1 (a INT NOT NULL PRIMARY KEY AUTO_INCREMENT, b VARCHAR(256), KEY SECOND(a, b)) ENGINE=INNODB;
|
||||
|
||||
# Populate data
|
||||
INSERT INTO t1 VALUES(1, REPEAT('A', 256));
|
||||
INSERT INTO t1 (b) SELECT b from t1;
|
||||
INSERT INTO t1 (b) SELECT b from t1;
|
||||
INSERT INTO t1 (b) SELECT b from t1;
|
||||
INSERT INTO t1 (b) SELECT b from t1;
|
||||
INSERT INTO t1 (b) SELECT b from t1;
|
||||
INSERT INTO t1 (b) SELECT b from t1;
|
||||
INSERT INTO t1 (b) SELECT b from t1;
|
||||
INSERT INTO t1 (b) SELECT b from t1;
|
||||
INSERT INTO t1 (b) SELECT b from t1;
|
||||
INSERT INTO t1 (b) SELECT b from t1;
|
||||
INSERT INTO t1 (b) SELECT b from t1;
|
||||
|
||||
select stat_value > 0 from mysql.innodb_index_stats where table_name like '%t1%' and stat_name = 'n_page_split';
|
||||
|
||||
# Create many tables to over flow the table definition cache
|
||||
|
||||
--echo Create $num_tables table to overflow the table cache.
|
||||
--disable_query_log
|
||||
let $count = $num_tables;
|
||||
while ($count)
|
||||
{
|
||||
EVAL CREATE TABLE t_$count (a INT NOT NULL PRIMARY KEY AUTO_INCREMENT) ENGINE=INNODB;
|
||||
EVAL INSERT INTO t_$count VALUES (1), (2);
|
||||
dec $count;
|
||||
}
|
||||
--enable_query_log
|
||||
--echo Sleep for a while to make sure t1 is evicted.
|
||||
select sleep(10);
|
||||
|
||||
--echo Reload t1 to get defrag stats from persistent storage
|
||||
INSERT INTO t1 (b) SELECT b from t1;
|
||||
|
||||
--echo make sure the stats thread will wake up and do the write even if there's a race condition between set and reset.
|
||||
select sleep(12);
|
||||
|
||||
select stat_value > 0 from mysql.innodb_index_stats where table_name like '%t1%' and stat_name = 'n_page_split';
|
||||
|
||||
|
||||
# Clean up
|
||||
SET @@global.innodb_defragment_stats_accuracy = @start_innodb_defragment_stats_accuracy;
|
||||
SET @@global.table_definition_cache = @start_table_definition_cache;
|
||||
--disable_query_log
|
||||
let $count = $num_tables;
|
||||
while ($count)
|
||||
{
|
||||
EVAL DROP TABLE t_$count;
|
||||
dec $count;
|
||||
}
|
||||
--enable_query_log
|
||||
DROP TABLE t1;
|
||||
2
mysql-test/suite/innodb/t/innodb_defragment-master.opt
Normal file
2
mysql-test/suite/innodb/t/innodb_defragment-master.opt
Normal file
|
|
@ -0,0 +1,2 @@
|
|||
--innodb_file_per_table
|
||||
--innodb-defragment=1
|
||||
4
mysql-test/suite/innodb/t/innodb_defragment.opt
Normal file
4
mysql-test/suite/innodb/t/innodb_defragment.opt
Normal file
|
|
@ -0,0 +1,4 @@
|
|||
--loose-innodb-buffer-pool-stats
|
||||
--loose-innodb-buffer-page
|
||||
--loose-innodb-buffer-page-lru
|
||||
--innodb-defragment=1
|
||||
190
mysql-test/suite/innodb/t/innodb_defragment.test
Normal file
190
mysql-test/suite/innodb/t/innodb_defragment.test
Normal file
|
|
@ -0,0 +1,190 @@
|
|||
--source include/have_innodb.inc
|
||||
|
||||
--disable_warnings
|
||||
DROP TABLE if exists t1;
|
||||
--enable_warnings
|
||||
|
||||
--disable_query_log
|
||||
let $innodb_defragment_n_pages_orig=`select @@innodb_defragment_n_pages`;
|
||||
let $innodb_defragment_stats_accuracy_orig=`select @@innodb_defragment_stats_accuracy`;
|
||||
--enable_query_log
|
||||
|
||||
set global innodb_defragment_stats_accuracy = 80;
|
||||
|
||||
# Create table.
|
||||
CREATE TABLE t1 (a INT NOT NULL PRIMARY KEY AUTO_INCREMENT, b VARCHAR(256), KEY SECOND(a, b)) ENGINE=INNODB;
|
||||
|
||||
## Test-1 defragment an empty table
|
||||
optimize table t1;
|
||||
|
||||
## Test-2 defragment a single page table
|
||||
INSERT INTO t1 VALUES (100000, REPEAT('A', 256));
|
||||
INSERT INTO t1 VALUES (200000, REPEAT('A', 256));
|
||||
INSERT INTO t1 VALUES (300000, REPEAT('A', 256));
|
||||
INSERT INTO t1 VALUES (400000, REPEAT('A', 256));
|
||||
|
||||
optimize table t1;
|
||||
|
||||
## Test-3 defragment (somewhat) in parallel with delete queries
|
||||
let $data_size = 10000;
|
||||
let $delete_size = 100;
|
||||
|
||||
delimiter //;
|
||||
create procedure defragment()
|
||||
begin
|
||||
set @i = 0;
|
||||
repeat
|
||||
set @i = @i + 1;
|
||||
optimize table t1;
|
||||
select sleep(5);
|
||||
until @i = 3 end repeat;
|
||||
end //
|
||||
delimiter ;//
|
||||
|
||||
|
||||
# Populate table.
|
||||
let $i = $data_size;
|
||||
--disable_query_log
|
||||
while ($i)
|
||||
{
|
||||
eval
|
||||
INSERT INTO t1 VALUES ($data_size + 1 - $i, REPEAT('A', 256));
|
||||
dec $i;
|
||||
}
|
||||
--enable_query_log
|
||||
|
||||
select count(stat_value) = 0 from mysql.innodb_index_stats where table_name like '%t1%' and stat_name in ('n_pages_freed');
|
||||
select count(stat_value) > 0 from mysql.innodb_index_stats where table_name like '%t1%' and stat_name in ('n_page_split');
|
||||
select count(stat_value) > 0 from mysql.innodb_index_stats where table_name like '%t1%' and stat_name in ('n_leaf_pages_defrag');
|
||||
|
||||
select count(*) from t1;
|
||||
|
||||
if (!`select count(*) > 180 from information_schema.innodb_buffer_page where table_name like '%t1%' and index_name = 'PRIMARY' order by page_number;`)
|
||||
{
|
||||
select count(*) from information_schema.innodb_buffer_page where table_name like '%t1%' and index_name = 'PRIMARY' order by page_number;
|
||||
}
|
||||
|
||||
select count(*) from t1 force index (second);
|
||||
|
||||
if (!`select count(*) > 170 from information_schema.innodb_buffer_page where table_name like '%t1%' and index_name = 'second' order by page_number;`)
|
||||
{
|
||||
select count(*) from information_schema.innodb_buffer_page where table_name like '%t1%' and index_name = 'second' order by page_number;
|
||||
}
|
||||
|
||||
|
||||
connect (con1,localhost,root,,test,$MASTER_MYPORT,$MASTER_MYSOCK);
|
||||
|
||||
connection con1;
|
||||
--send call defragment()
|
||||
|
||||
connection default;
|
||||
|
||||
--disable_query_log
|
||||
let $size = $delete_size;
|
||||
while ($size)
|
||||
{
|
||||
let $j = 100 * $size;
|
||||
eval delete from t1 where a between $j - 20 and $j;
|
||||
dec $size;
|
||||
}
|
||||
--enable_query_log
|
||||
|
||||
connection con1;
|
||||
--disable_result_log
|
||||
--reap
|
||||
--enable_result_log
|
||||
|
||||
connection default;
|
||||
disconnect con1;
|
||||
|
||||
optimize table t1;
|
||||
select sleep(5);
|
||||
|
||||
--source include/restart_mysqld.inc
|
||||
select count(*) from t1;
|
||||
|
||||
select count(stat_value) > 0 from mysql.innodb_index_stats where table_name like '%t2%' and stat_name in ('n_pages_freed', 'n_page_split', 'n_leaf_pages_defrag');
|
||||
|
||||
# After deletion & defragmentation, there are 8000 records left
|
||||
if (!`select count(*) < 180 from information_schema.innodb_buffer_page where table_name like '%t1%' and index_name = 'PRIMARY' order by page_number;`)
|
||||
{
|
||||
select count(*) from information_schema.innodb_buffer_page where table_name like '%t1%' and index_name = 'PRIMARY' order by page_number;
|
||||
}
|
||||
|
||||
select count(*) from t1 force index (second);
|
||||
|
||||
# secondary index is pretty much the same size as primary index so the number of pages should be similar.
|
||||
if (!`select count(*) < 180 from information_schema.innodb_buffer_page where table_name like '%t1%' and index_name = 'second' order by page_number;`)
|
||||
{
|
||||
select count(*) from information_schema.innodb_buffer_page where table_name like '%t1%' and index_name = 'second' order by page_number;
|
||||
}
|
||||
|
||||
## Test-4 defragment with larger n_pages
|
||||
|
||||
# delete some more records
|
||||
--disable_query_log
|
||||
let $size = $delete_size;
|
||||
while ($size)
|
||||
{
|
||||
let $j = 100 * $size;
|
||||
eval delete from t1 where a between $j - 30 and $j - 20;
|
||||
dec $size;
|
||||
}
|
||||
--enable_query_log
|
||||
|
||||
SET @@global.innodb_defragment_n_pages = 3;
|
||||
|
||||
# This will not reduce number of pages by a lot
|
||||
optimize table t1;
|
||||
|
||||
--source include/restart_mysqld.inc
|
||||
|
||||
select count(stat_value) > 0 from mysql.innodb_index_stats where table_name like '%t2%' and stat_name in ('n_pages_freed', 'n_page_split', 'n_leaf_pages_defrag');
|
||||
|
||||
select count(*) from t1;
|
||||
|
||||
# We didn't create large wholes with the previous deletion, so if innodb_defragment_n_pages = 3, we won't be able to free up many pages.
|
||||
if (!`select count(*) > 130 from information_schema.innodb_buffer_page where table_name like '%t1%' and index_name = 'PRIMARY' order by page_number;`)
|
||||
{
|
||||
select count(*) from information_schema.innodb_buffer_page where table_name like '%t1%' and index_name = 'PRIMARY' order by page_number;
|
||||
}
|
||||
|
||||
select count(*) from t1 force index (second);
|
||||
|
||||
# Same holds for secondary index, not many pages are released.
|
||||
if (!`select count(*) > 100 from information_schema.innodb_buffer_page where table_name like '%t1%' and index_name = 'second' order by page_number;`)
|
||||
{
|
||||
select count(*) from information_schema.innodb_buffer_page where table_name like '%t1%' and index_name = 'second' order by page_number;
|
||||
}
|
||||
|
||||
SET @@global.innodb_defragment_n_pages = 10;
|
||||
|
||||
optimize table t1;
|
||||
|
||||
--source include/restart_mysqld.inc
|
||||
|
||||
select count(stat_value) > 0 from mysql.innodb_index_stats where table_name like '%t2%' and stat_name in ('n_pages_freed', 'n_page_split', 'n_leaf_pages_defrag');
|
||||
|
||||
select count(*) from t1;
|
||||
|
||||
# This time we used innodb_defragment_n_pages = 10, so we should be able to free up some pages.
|
||||
if (!`select count(*) < 165 from information_schema.innodb_buffer_page where table_name like '%t1%' and index_name = 'PRIMARY' order by page_number;`)
|
||||
{
|
||||
select count(*) from information_schema.innodb_buffer_page where table_name like '%t1%' and index_name = 'PRIMARY' order by page_number;
|
||||
}
|
||||
|
||||
select count(*) from t1 force index (second);
|
||||
|
||||
if (!`select count(*) < 165 from information_schema.innodb_buffer_page where table_name like '%t1%' and index_name = 'second' order by page_number;`)
|
||||
{
|
||||
select count(*) from information_schema.innodb_buffer_page where table_name like '%t1%' and index_name = 'second' order by page_number;
|
||||
}
|
||||
|
||||
DROP PROCEDURE defragment;
|
||||
DROP TABLE t1;
|
||||
# reset system
|
||||
--disable_query_log
|
||||
EVAL SET GLOBAL innodb_defragment_n_pages = $innodb_defragment_n_pages_orig;
|
||||
EVAL SET GLOBAL innodb_defragment_stats_accuracy = $innodb_defragment_stats_accuracy_orig;
|
||||
--enable_query_log
|
||||
|
||||
|
|
@ -0,0 +1,4 @@
|
|||
--loose-innodb-buffer-pool-stats
|
||||
--loose-innodb-buffer-page
|
||||
--loose-innodb-buffer-page-lru
|
||||
--innodb-defragment=1
|
||||
130
mysql-test/suite/innodb/t/innodb_defragment_fill_factor.test
Normal file
130
mysql-test/suite/innodb/t/innodb_defragment_fill_factor.test
Normal file
|
|
@ -0,0 +1,130 @@
|
|||
--source include/have_innodb.inc
|
||||
--disable_warnings
|
||||
DROP TABLE if exists t1;
|
||||
DROP TABLE if exists t2;
|
||||
--enable_warnings
|
||||
--echo Testing tables with large records
|
||||
# Create table.
|
||||
CREATE TABLE t1 (a INT NOT NULL PRIMARY KEY AUTO_INCREMENT, b VARCHAR(256), KEY SECOND(a, b)) ENGINE=INNODB;
|
||||
# Populate table.
|
||||
let $i = 1000;
|
||||
--disable_query_log
|
||||
while ($i)
|
||||
{
|
||||
eval
|
||||
INSERT INTO t1 VALUES ($i, REPEAT('A', 256));
|
||||
dec $i;
|
||||
}
|
||||
--enable_query_log
|
||||
--disable_query_log
|
||||
let $size = 10;
|
||||
while ($size)
|
||||
{
|
||||
let $j = 100 * $size;
|
||||
eval delete from t1 where a between $j - 20 and $j;
|
||||
dec $size;
|
||||
}
|
||||
--enable_query_log
|
||||
optimize table t1;
|
||||
--source include/restart_mysqld.inc
|
||||
select count(*) from t1;
|
||||
# After deletion & defragmentation, there are 800 records left. Each page can hold about 57 records. We fill the page 90% full,
|
||||
# so there should be less than 16 pages total.
|
||||
--let $primary_before = query_get_value(select count(*) as Value from information_schema.innodb_buffer_page where table_name like '%t1%' and index_name = 'PRIMARY', Value, 1)
|
||||
select count(*) from t1 force index (second);
|
||||
# secondary index is slightly bigger than primary index so the number of pages should be similar.
|
||||
--let $second_before = query_get_value(select count(*) as Value from information_schema.innodb_buffer_page where table_name like '%t1%' and index_name = 'second', Value, 1)
|
||||
--echo # A few more insertions on the page should not cause a page split.
|
||||
insert into t1 values (81, REPEAT('A', 256));
|
||||
insert into t1 values (83, REPEAT('A', 256));
|
||||
insert into t1 values (87, REPEAT('A', 256));
|
||||
insert into t1 values (82, REPEAT('A', 256));
|
||||
insert into t1 values (86, REPEAT('A', 256));
|
||||
--let $primary_after = query_get_value(select count(*) as Value from information_schema.innodb_buffer_page where table_name like '%t1%' and index_name = 'PRIMARY', Value, 1)
|
||||
--let $second_after = query_get_value(select count(*) as Value from information_schema.innodb_buffer_page where table_name like '%t1%' and index_name = 'second', Value, 1)
|
||||
if ($primary_before != $primary_after) {
|
||||
--echo Insertion caused page split on primary, which should be avoided by innodb_defragment_fill_factor.
|
||||
}
|
||||
if ($second_before != $second_after) {
|
||||
--echo Insertion caused page split on second, which should be avoided by innodb_defragment_fill_factor.
|
||||
}
|
||||
--echo # More insertions will cause page splits
|
||||
insert into t1 values (88, REPEAT('A', 50));
|
||||
#insert into t1 values (85, REPEAT('A', 256));
|
||||
#insert into t1 values (84, REPEAT('A', 256));
|
||||
#insert into t1 values (89, REPEAT('A', 256));
|
||||
--let $primary_after = query_get_value(select count(*) as Value from information_schema.innodb_buffer_page where table_name like '%t1%' and index_name = 'PRIMARY', Value, 1)
|
||||
--let $second_after = query_get_value(select count(*) as Value from information_schema.innodb_buffer_page where table_name like '%t1%' and index_name = 'second', Value, 1)
|
||||
if ($primary_before == $primary_after) {
|
||||
--echo Too much space are reserved on primary index.
|
||||
}
|
||||
if ($second_before == $second_after) {
|
||||
--echo Too much space are reserved on second index.
|
||||
}
|
||||
DROP TABLE t1;
|
||||
--echo Testing table with small records
|
||||
CREATE TABLE t2 (a INT NOT NULL PRIMARY KEY AUTO_INCREMENT, b VARchar(16), KEY SECOND(a,b)) ENGINE=INNODB;
|
||||
# Populate table.
|
||||
--disable_query_log
|
||||
INSERT INTO t2 VALUES (1, REPEAT('A', 16));
|
||||
INSERT INTO t2 (b) SELECT b from t2;
|
||||
INSERT INTO t2 (b) SELECT b from t2;
|
||||
INSERT INTO t2 (b) SELECT b from t2;
|
||||
INSERT INTO t2 (b) SELECT b from t2;
|
||||
INSERT INTO t2 (b) SELECT b from t2;
|
||||
INSERT INTO t2 (b) SELECT b from t2;
|
||||
INSERT INTO t2 (b) SELECT b from t2;
|
||||
INSERT INTO t2 (b) SELECT b from t2;
|
||||
INSERT INTO t2 (b) SELECT b from t2;
|
||||
INSERT INTO t2 (b) SELECT b from t2;
|
||||
INSERT INTO t2 (b) SELECT b from t2;
|
||||
INSERT INTO t2 (b) SELECT b from t2;
|
||||
--enable_query_log
|
||||
--disable_query_log
|
||||
let $size = 40;
|
||||
while ($size)
|
||||
{
|
||||
let $j = 100 * $size;
|
||||
eval delete from t2 where a between $j - 20 and $j;
|
||||
dec $size;
|
||||
}
|
||||
--enable_query_log
|
||||
optimize table t2;
|
||||
--source include/restart_mysqld.inc
|
||||
select count(*) from t2 force index(second);
|
||||
--let $second_before = query_get_value(select count(*) as Value from information_schema.innodb_buffer_page where table_name like '%t2%' and index_name = 'second', Value, 1)
|
||||
--echo The page should have room for about 20 insertions
|
||||
insert into t2 values(1181, REPEAT('A', 16));
|
||||
insert into t2 values(1191, REPEAT('A', 16));
|
||||
insert into t2 values(1182, REPEAT('A', 16));
|
||||
insert into t2 values(1192, REPEAT('A', 16));
|
||||
insert into t2 values(1183, REPEAT('A', 16));
|
||||
insert into t2 values(1193, REPEAT('A', 16));
|
||||
insert into t2 values(1184, REPEAT('A', 16));
|
||||
insert into t2 values(1194, REPEAT('A', 16));
|
||||
insert into t2 values(1185, REPEAT('A', 16));
|
||||
insert into t2 values(1195, REPEAT('A', 16));
|
||||
insert into t2 values(1186, REPEAT('A', 16));
|
||||
insert into t2 values(1196, REPEAT('A', 16));
|
||||
insert into t2 values(1187, REPEAT('A', 16));
|
||||
insert into t2 values(1197, REPEAT('A', 16));
|
||||
insert into t2 values(1188, REPEAT('A', 16));
|
||||
insert into t2 values(1198, REPEAT('A', 16));
|
||||
insert into t2 values(1189, REPEAT('A', 16));
|
||||
insert into t2 values(1199, REPEAT('A', 16));
|
||||
insert into t2 values(1190, REPEAT('A', 16));
|
||||
insert into t2 values(1180, REPEAT('A', 16));
|
||||
--let $second_after = query_get_value(select count(*) as Value from information_schema.innodb_buffer_page where table_name like '%t2%' and index_name = 'second', Value, 1)
|
||||
if ($second_before != $second_after) {
|
||||
--echo Insertion caused page split on second, which should be avoided by innodb_defragment_fill_factor.
|
||||
}
|
||||
--echo More insertions will cause page split.
|
||||
insert into t2 values(1280, REPEAT('A', 16));
|
||||
insert into t2 values(1290, REPEAT('A', 16));
|
||||
insert into t2 values(1281, REPEAT('A', 16));
|
||||
insert into t2 values(1291, REPEAT('A', 16));
|
||||
--let $second_after = query_get_value(select count(*) as Value from information_schema.innodb_buffer_page where table_name like '%t2%' and index_name = 'second', Value, 1)
|
||||
if ($second_before == $second_after) {
|
||||
--echo Too much space are reserved on second index.
|
||||
}
|
||||
DROP TABLE t2;
|
||||
18
mysql-test/suite/sys_vars/r/innodb_defragment_basic.result
Normal file
18
mysql-test/suite/sys_vars/r/innodb_defragment_basic.result
Normal file
|
|
@ -0,0 +1,18 @@
|
|||
SET @orig = @@global.innodb_defragment;
|
||||
SELECT @orig;
|
||||
@orig
|
||||
0
|
||||
SET GLOBAL innodb_defragment = OFF;
|
||||
SELECT @@global.innodb_defragment;
|
||||
@@global.innodb_defragment
|
||||
0
|
||||
SET GLOBAL innodb_defragment = ON;
|
||||
SELECT @@global.innodb_defragment;
|
||||
@@global.innodb_defragment
|
||||
1
|
||||
SET GLOBAL innodb_defragment = 100;
|
||||
ERROR 42000: Variable 'innodb_defragment' can't be set to the value of '100'
|
||||
SELECT @@global.innodb_defragment;
|
||||
@@global.innodb_defragment
|
||||
1
|
||||
SET GLOBAL innodb_defragment = @orig;
|
||||
|
|
@ -0,0 +1,37 @@
|
|||
SET @start_innodb_defragment_fill_factor = @@global.innodb_defragment_fill_factor;
|
||||
SELECT @start_innodb_defragment_fill_factor;
|
||||
@start_innodb_defragment_fill_factor
|
||||
0.9
|
||||
SELECT COUNT(@@global.innodb_defragment_fill_factor);
|
||||
COUNT(@@global.innodb_defragment_fill_factor)
|
||||
1
|
||||
SET @@global.innodb_defragment_fill_factor = 0.77777777777777;
|
||||
SELECT @@global.innodb_defragment_fill_factor;
|
||||
@@global.innodb_defragment_fill_factor
|
||||
0.777778
|
||||
SET @@global.innodb_defragment_fill_factor = 1;
|
||||
SELECT @@global.innodb_defragment_fill_factor;
|
||||
@@global.innodb_defragment_fill_factor
|
||||
1.000000
|
||||
SET @@global.innodb_defragment_fill_factor = 0.7;
|
||||
SELECT @@global.innodb_defragment_fill_factor;
|
||||
@@global.innodb_defragment_fill_factor
|
||||
0.700000
|
||||
SET @@global.innodb_defragment_fill_factor = -1;
|
||||
Warnings:
|
||||
Warning 1292 Truncated incorrect innodb_defragment_fill_factor value: '-1'
|
||||
SELECT @@global.innodb_defragment_fill_factor;
|
||||
@@global.innodb_defragment_fill_factor
|
||||
0.700000
|
||||
SET @@global.innodb_defragment_fill_factor = 2;
|
||||
Warnings:
|
||||
Warning 1292 Truncated incorrect innodb_defragment_fill_factor value: '2'
|
||||
SELECT @@global.innodb_defragment_fill_factor;
|
||||
@@global.innodb_defragment_fill_factor
|
||||
1.000000
|
||||
SET @@global.innodb_defragment_fill_factor = "abc";
|
||||
ERROR 42000: Incorrect argument type to variable 'innodb_defragment_fill_factor'
|
||||
SELECT @@global.innodb_defragment_fill_factor;
|
||||
@@global.innodb_defragment_fill_factor
|
||||
1.000000
|
||||
SET @@global.innodb_defragment_fill_factor = @start_innodb_defragment_fill_factor;
|
||||
|
|
@ -0,0 +1,42 @@
|
|||
SET @start_innodb_defragment_fill_factor_n_recs = @@global.innodb_defragment_fill_factor_n_recs;
|
||||
SELECT @start_innodb_defragment_fill_factor_n_recs;
|
||||
@start_innodb_defragment_fill_factor_n_recs
|
||||
20
|
||||
SELECT COUNT(@@global.innodb_defragment_fill_factor_n_recs);
|
||||
COUNT(@@global.innodb_defragment_fill_factor_n_recs)
|
||||
1
|
||||
SET @@global.innodb_defragment_fill_factor_n_recs = 50;
|
||||
SELECT @@global.innodb_defragment_fill_factor_n_recs;
|
||||
@@global.innodb_defragment_fill_factor_n_recs
|
||||
50
|
||||
SET @@global.innodb_defragment_fill_factor_n_recs = 100;
|
||||
SELECT @@global.innodb_defragment_fill_factor_n_recs;
|
||||
@@global.innodb_defragment_fill_factor_n_recs
|
||||
100
|
||||
SET @@global.innodb_defragment_fill_factor_n_recs = 1;
|
||||
SELECT @@global.innodb_defragment_fill_factor_n_recs;
|
||||
@@global.innodb_defragment_fill_factor_n_recs
|
||||
1
|
||||
SET @@global.innodb_defragment_fill_factor_n_recs = -1;
|
||||
Warnings:
|
||||
Warning 1292 Truncated incorrect innodb_defragment_fill_factor_n_ value: '-1'
|
||||
SELECT @@global.innodb_defragment_fill_factor_n_recs;
|
||||
@@global.innodb_defragment_fill_factor_n_recs
|
||||
1
|
||||
SET @@global.innodb_defragment_fill_factor_n_recs = 10000;
|
||||
Warnings:
|
||||
Warning 1292 Truncated incorrect innodb_defragment_fill_factor_n_ value: '10000'
|
||||
SELECT @@global.innodb_defragment_fill_factor_n_recs;
|
||||
@@global.innodb_defragment_fill_factor_n_recs
|
||||
100
|
||||
SET @@global.innodb_defragment_fill_factor_n_recs = 10.5;
|
||||
ERROR 42000: Incorrect argument type to variable 'innodb_defragment_fill_factor_n_recs'
|
||||
SELECT @@global.innodb_defragment_fill_factor_n_recs;
|
||||
@@global.innodb_defragment_fill_factor_n_recs
|
||||
100
|
||||
SET @@global.innodb_defragment_fill_factor_n_recs = "abc";
|
||||
ERROR 42000: Incorrect argument type to variable 'innodb_defragment_fill_factor_n_recs'
|
||||
SELECT @@global.innodb_defragment_fill_factor_n_recs;
|
||||
@@global.innodb_defragment_fill_factor_n_recs
|
||||
100
|
||||
SET @@global.innodb_defragment_fill_factor_n_recs = @start_innodb_defragment_fill_factor_n_recs;
|
||||
|
|
@ -0,0 +1,42 @@
|
|||
SET @start_innodb_defragment_frequency = @@global.innodb_defragment_frequency;
|
||||
SELECT @start_innodb_defragment_frequency;
|
||||
@start_innodb_defragment_frequency
|
||||
40
|
||||
SELECT COUNT(@@global.innodb_defragment_frequency);
|
||||
COUNT(@@global.innodb_defragment_frequency)
|
||||
1
|
||||
SET @@global.innodb_defragment_frequency = 200;
|
||||
SELECT @@global.innodb_defragment_frequency;
|
||||
@@global.innodb_defragment_frequency
|
||||
200
|
||||
SET @@global.innodb_defragment_frequency = 1;
|
||||
SELECT @@global.innodb_defragment_frequency;
|
||||
@@global.innodb_defragment_frequency
|
||||
1
|
||||
SET @@global.innodb_defragment_frequency = 1000;
|
||||
SELECT @@global.innodb_defragment_frequency;
|
||||
@@global.innodb_defragment_frequency
|
||||
1000
|
||||
SET @@global.innodb_defragment_frequency = -1;
|
||||
Warnings:
|
||||
Warning 1292 Truncated incorrect innodb_defragment_frequency value: '-1'
|
||||
SELECT @@global.innodb_defragment_frequency;
|
||||
@@global.innodb_defragment_frequency
|
||||
1
|
||||
SET @@global.innodb_defragment_frequency = 10000;
|
||||
Warnings:
|
||||
Warning 1292 Truncated incorrect innodb_defragment_frequency value: '10000'
|
||||
SELECT @@global.innodb_defragment_frequency;
|
||||
@@global.innodb_defragment_frequency
|
||||
1000
|
||||
SET @@global.innodb_defragment_frequency = 10.5;
|
||||
ERROR 42000: Incorrect argument type to variable 'innodb_defragment_frequency'
|
||||
SELECT @@global.innodb_defragment_frequency;
|
||||
@@global.innodb_defragment_frequency
|
||||
1000
|
||||
SET @@global.innodb_defragment_frequency = "abc";
|
||||
ERROR 42000: Incorrect argument type to variable 'innodb_defragment_frequency'
|
||||
SELECT @@global.innodb_defragment_frequency;
|
||||
@@global.innodb_defragment_frequency
|
||||
1000
|
||||
SET @@global.innodb_defragment_frequency = @start_innodb_defragment_frequency;
|
||||
|
|
@ -0,0 +1,28 @@
|
|||
SET @start_innodb_defragment_n_pages = @@global.innodb_defragment_n_pages;
|
||||
SELECT @start_innodb_defragment_n_pages;
|
||||
@start_innodb_defragment_n_pages
|
||||
7
|
||||
SELECT COUNT(@@global.innodb_defragment_n_pages);
|
||||
COUNT(@@global.innodb_defragment_n_pages)
|
||||
1
|
||||
SET @@global.innodb_defragment_n_pages = 1;
|
||||
Warnings:
|
||||
Warning 1292 Truncated incorrect innodb_defragment_n_pages value: '1'
|
||||
SELECT @@global.innodb_defragment_n_pages;
|
||||
@@global.innodb_defragment_n_pages
|
||||
2
|
||||
SET @@global.innodb_defragment_n_pages = 2;
|
||||
SELECT @@global.innodb_defragment_n_pages;
|
||||
@@global.innodb_defragment_n_pages
|
||||
2
|
||||
SET @@global.innodb_defragment_n_pages = 32;
|
||||
SELECT @@global.innodb_defragment_n_pages;
|
||||
@@global.innodb_defragment_n_pages
|
||||
32
|
||||
SET @@global.innodb_defragment_n_pages = 64;
|
||||
Warnings:
|
||||
Warning 1292 Truncated incorrect innodb_defragment_n_pages value: '64'
|
||||
SELECT @@global.innodb_defragment_n_pages;
|
||||
@@global.innodb_defragment_n_pages
|
||||
32
|
||||
SET @@global.innodb_defragment_n_pages = @start_innodb_defragment_n_pages;
|
||||
|
|
@ -0,0 +1,33 @@
|
|||
SET @start_innodb_defragment_stats_accuracy = @@global.innodb_defragment_stats_accuracy;
|
||||
SELECT @start_innodb_defragment_stats_accuracy;
|
||||
@start_innodb_defragment_stats_accuracy
|
||||
0
|
||||
SELECT COUNT(@@global.innodb_defragment_stats_accuracy);
|
||||
COUNT(@@global.innodb_defragment_stats_accuracy)
|
||||
1
|
||||
SET @@global.innodb_defragment_stats_accuracy = 1;
|
||||
SELECT @@global.innodb_defragment_stats_accuracy;
|
||||
@@global.innodb_defragment_stats_accuracy
|
||||
1
|
||||
SET @@global.innodb_defragment_stats_accuracy = 1000;
|
||||
SELECT @@global.innodb_defragment_stats_accuracy;
|
||||
@@global.innodb_defragment_stats_accuracy
|
||||
1000
|
||||
SET @@global.innodb_defragment_stats_accuracy = -1;
|
||||
Warnings:
|
||||
Warning 1292 Truncated incorrect innodb_defragment_stats_accuracy value: '-1'
|
||||
SELECT @@global.innodb_defragment_stats_accuracy;
|
||||
@@global.innodb_defragment_stats_accuracy
|
||||
0
|
||||
SET @@global.innodb_defragment_stats_accuracy = 1000000000000;
|
||||
Warnings:
|
||||
Warning 1292 Truncated incorrect innodb_defragment_stats_accuracy value: '1000000000000'
|
||||
SELECT @@global.innodb_defragment_stats_accuracy;
|
||||
@@global.innodb_defragment_stats_accuracy
|
||||
4294967295
|
||||
SET @@global.innodb_defragment_stats_accuracy = "abc";
|
||||
ERROR 42000: Incorrect argument type to variable 'innodb_defragment_stats_accuracy'
|
||||
SELECT @@global.innodb_defragment_stats_accuracy;
|
||||
@@global.innodb_defragment_stats_accuracy
|
||||
4294967295
|
||||
SET @@global.innodb_defragment_stats_accuracy = @start_innodb_defragment_stats_accuracy;
|
||||
20
mysql-test/suite/sys_vars/t/innodb_defragment_basic.test
Normal file
20
mysql-test/suite/sys_vars/t/innodb_defragment_basic.test
Normal file
|
|
@ -0,0 +1,20 @@
|
|||
-- source include/have_innodb.inc
|
||||
|
||||
# Check the default value
|
||||
SET @orig = @@global.innodb_defragment;
|
||||
SELECT @orig;
|
||||
|
||||
# Turn off
|
||||
SET GLOBAL innodb_defragment = OFF;
|
||||
SELECT @@global.innodb_defragment;
|
||||
|
||||
# Turn on
|
||||
SET GLOBAL innodb_defragment = ON;
|
||||
SELECT @@global.innodb_defragment;
|
||||
|
||||
# Wrong value
|
||||
--error ER_WRONG_VALUE_FOR_VAR
|
||||
SET GLOBAL innodb_defragment = 100;
|
||||
SELECT @@global.innodb_defragment;
|
||||
|
||||
SET GLOBAL innodb_defragment = @orig;
|
||||
|
|
@ -0,0 +1,27 @@
|
|||
--source include/have_innodb.inc
|
||||
|
||||
SET @start_innodb_defragment_fill_factor = @@global.innodb_defragment_fill_factor;
|
||||
SELECT @start_innodb_defragment_fill_factor;
|
||||
|
||||
SELECT COUNT(@@global.innodb_defragment_fill_factor);
|
||||
|
||||
SET @@global.innodb_defragment_fill_factor = 0.77777777777777;
|
||||
SELECT @@global.innodb_defragment_fill_factor;
|
||||
|
||||
SET @@global.innodb_defragment_fill_factor = 1;
|
||||
SELECT @@global.innodb_defragment_fill_factor;
|
||||
|
||||
SET @@global.innodb_defragment_fill_factor = 0.7;
|
||||
SELECT @@global.innodb_defragment_fill_factor;
|
||||
|
||||
SET @@global.innodb_defragment_fill_factor = -1;
|
||||
SELECT @@global.innodb_defragment_fill_factor;
|
||||
|
||||
SET @@global.innodb_defragment_fill_factor = 2;
|
||||
SELECT @@global.innodb_defragment_fill_factor;
|
||||
|
||||
--Error ER_WRONG_TYPE_FOR_VAR
|
||||
SET @@global.innodb_defragment_fill_factor = "abc";
|
||||
SELECT @@global.innodb_defragment_fill_factor;
|
||||
|
||||
SET @@global.innodb_defragment_fill_factor = @start_innodb_defragment_fill_factor;
|
||||
|
|
@ -0,0 +1,31 @@
|
|||
--source include/have_innodb.inc
|
||||
|
||||
SET @start_innodb_defragment_fill_factor_n_recs = @@global.innodb_defragment_fill_factor_n_recs;
|
||||
SELECT @start_innodb_defragment_fill_factor_n_recs;
|
||||
|
||||
SELECT COUNT(@@global.innodb_defragment_fill_factor_n_recs);
|
||||
|
||||
SET @@global.innodb_defragment_fill_factor_n_recs = 50;
|
||||
SELECT @@global.innodb_defragment_fill_factor_n_recs;
|
||||
|
||||
SET @@global.innodb_defragment_fill_factor_n_recs = 100;
|
||||
SELECT @@global.innodb_defragment_fill_factor_n_recs;
|
||||
|
||||
SET @@global.innodb_defragment_fill_factor_n_recs = 1;
|
||||
SELECT @@global.innodb_defragment_fill_factor_n_recs;
|
||||
|
||||
SET @@global.innodb_defragment_fill_factor_n_recs = -1;
|
||||
SELECT @@global.innodb_defragment_fill_factor_n_recs;
|
||||
|
||||
SET @@global.innodb_defragment_fill_factor_n_recs = 10000;
|
||||
SELECT @@global.innodb_defragment_fill_factor_n_recs;
|
||||
|
||||
--Error ER_WRONG_TYPE_FOR_VAR
|
||||
SET @@global.innodb_defragment_fill_factor_n_recs = 10.5;
|
||||
SELECT @@global.innodb_defragment_fill_factor_n_recs;
|
||||
|
||||
--Error ER_WRONG_TYPE_FOR_VAR
|
||||
SET @@global.innodb_defragment_fill_factor_n_recs = "abc";
|
||||
SELECT @@global.innodb_defragment_fill_factor_n_recs;
|
||||
|
||||
SET @@global.innodb_defragment_fill_factor_n_recs = @start_innodb_defragment_fill_factor_n_recs;
|
||||
|
|
@ -0,0 +1,37 @@
|
|||
--source include/have_innodb.inc
|
||||
|
||||
SET @start_innodb_defragment_frequency = @@global.innodb_defragment_frequency;
|
||||
SELECT @start_innodb_defragment_frequency;
|
||||
|
||||
SELECT COUNT(@@global.innodb_defragment_frequency);
|
||||
|
||||
# test valid value
|
||||
SET @@global.innodb_defragment_frequency = 200;
|
||||
SELECT @@global.innodb_defragment_frequency;
|
||||
|
||||
# test valid min
|
||||
SET @@global.innodb_defragment_frequency = 1;
|
||||
SELECT @@global.innodb_defragment_frequency;
|
||||
|
||||
# test valid max
|
||||
SET @@global.innodb_defragment_frequency = 1000;
|
||||
SELECT @@global.innodb_defragment_frequency;
|
||||
|
||||
# test invalid value < min
|
||||
SET @@global.innodb_defragment_frequency = -1;
|
||||
SELECT @@global.innodb_defragment_frequency;
|
||||
|
||||
# test invalid value > max
|
||||
SET @@global.innodb_defragment_frequency = 10000;
|
||||
SELECT @@global.innodb_defragment_frequency;
|
||||
|
||||
# test wrong type
|
||||
--Error ER_WRONG_TYPE_FOR_VAR
|
||||
SET @@global.innodb_defragment_frequency = 10.5;
|
||||
SELECT @@global.innodb_defragment_frequency;
|
||||
|
||||
--Error ER_WRONG_TYPE_FOR_VAR
|
||||
SET @@global.innodb_defragment_frequency = "abc";
|
||||
SELECT @@global.innodb_defragment_frequency;
|
||||
|
||||
SET @@global.innodb_defragment_frequency = @start_innodb_defragment_frequency;
|
||||
|
|
@ -0,0 +1,22 @@
|
|||
--source include/have_innodb.inc
|
||||
|
||||
SET @start_innodb_defragment_n_pages = @@global.innodb_defragment_n_pages;
|
||||
SELECT @start_innodb_defragment_n_pages;
|
||||
|
||||
SELECT COUNT(@@global.innodb_defragment_n_pages);
|
||||
|
||||
SET @@global.innodb_defragment_n_pages = 1;
|
||||
SELECT @@global.innodb_defragment_n_pages;
|
||||
|
||||
SET @@global.innodb_defragment_n_pages = 2;
|
||||
SELECT @@global.innodb_defragment_n_pages;
|
||||
|
||||
SET @@global.innodb_defragment_n_pages = 32;
|
||||
SELECT @@global.innodb_defragment_n_pages;
|
||||
|
||||
SET @@global.innodb_defragment_n_pages = 64;
|
||||
SELECT @@global.innodb_defragment_n_pages;
|
||||
|
||||
SET @@global.innodb_defragment_n_pages = @start_innodb_defragment_n_pages;
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,24 @@
|
|||
--source include/have_innodb.inc
|
||||
|
||||
SET @start_innodb_defragment_stats_accuracy = @@global.innodb_defragment_stats_accuracy;
|
||||
SELECT @start_innodb_defragment_stats_accuracy;
|
||||
|
||||
SELECT COUNT(@@global.innodb_defragment_stats_accuracy);
|
||||
|
||||
SET @@global.innodb_defragment_stats_accuracy = 1;
|
||||
SELECT @@global.innodb_defragment_stats_accuracy;
|
||||
|
||||
SET @@global.innodb_defragment_stats_accuracy = 1000;
|
||||
SELECT @@global.innodb_defragment_stats_accuracy;
|
||||
|
||||
SET @@global.innodb_defragment_stats_accuracy = -1;
|
||||
SELECT @@global.innodb_defragment_stats_accuracy;
|
||||
|
||||
SET @@global.innodb_defragment_stats_accuracy = 1000000000000;
|
||||
SELECT @@global.innodb_defragment_stats_accuracy;
|
||||
|
||||
--Error ER_WRONG_TYPE_FOR_VAR
|
||||
SET @@global.innodb_defragment_stats_accuracy = "abc";
|
||||
SELECT @@global.innodb_defragment_stats_accuracy;
|
||||
|
||||
SET @@global.innodb_defragment_stats_accuracy = @start_innodb_defragment_stats_accuracy;
|
||||
|
|
@ -1 +1,2 @@
|
|||
--default-storage-engine=MyISAM
|
||||
--innodb-defragment=0
|
||||
|
|
|
|||
|
|
@ -285,6 +285,7 @@ SET(INNOBASE_SOURCES
|
|||
btr/btr0cur.cc
|
||||
btr/btr0pcur.cc
|
||||
btr/btr0sea.cc
|
||||
btr/btr0defragment.cc
|
||||
buf/buf0buddy.cc
|
||||
buf/buf0buf.cc
|
||||
buf/buf0dblwr.cc
|
||||
|
|
@ -395,7 +396,8 @@ SET(INNOBASE_SOURCES
|
|||
ut/ut0rnd.cc
|
||||
ut/ut0ut.cc
|
||||
ut/ut0vec.cc
|
||||
ut/ut0wqueue.cc)
|
||||
ut/ut0wqueue.cc
|
||||
ut/ut0timer.cc)
|
||||
|
||||
IF(WITH_INNODB)
|
||||
# Legacy option
|
||||
|
|
|
|||
|
|
@ -38,6 +38,7 @@ Created 6/2/1994 Heikki Tuuri
|
|||
#include "btr0cur.h"
|
||||
#include "btr0sea.h"
|
||||
#include "btr0pcur.h"
|
||||
#include "btr0defragment.h"
|
||||
#include "rem0cmp.h"
|
||||
#include "lock0lock.h"
|
||||
#include "ibuf0ibuf.h"
|
||||
|
|
@ -1192,6 +1193,32 @@ btr_get_size(
|
|||
ulint flag, /*!< in: BTR_N_LEAF_PAGES or BTR_TOTAL_SIZE */
|
||||
mtr_t* mtr) /*!< in/out: mini-transaction where index
|
||||
is s-latched */
|
||||
{
|
||||
ulint used;
|
||||
if (flag == BTR_N_LEAF_PAGES) {
|
||||
btr_get_size_and_reserved(index, flag, &used, mtr);
|
||||
return used;
|
||||
} else if (flag == BTR_TOTAL_SIZE) {
|
||||
return btr_get_size_and_reserved(index, flag, &used, mtr);
|
||||
} else {
|
||||
ut_error;
|
||||
}
|
||||
return (ULINT_UNDEFINED);
|
||||
}
|
||||
|
||||
/**************************************************************//**
|
||||
Gets the number of reserved and used pages in a B-tree.
|
||||
@return number of pages reserved, or ULINT_UNDEFINED if the index
|
||||
is unavailable */
|
||||
UNIV_INTERN
|
||||
ulint
|
||||
btr_get_size_and_reserved(
|
||||
/*======================*/
|
||||
dict_index_t* index, /*!< in: index */
|
||||
ulint flag, /*!< in: BTR_N_LEAF_PAGES or BTR_TOTAL_SIZE */
|
||||
ulint* used, /*!< out: number of pages used (<= reserved) */
|
||||
mtr_t* mtr) /*!< in/out: mini-transaction where index
|
||||
is s-latched */
|
||||
{
|
||||
fseg_header_t* seg_header;
|
||||
page_t* root;
|
||||
|
|
@ -1201,6 +1228,8 @@ btr_get_size(
|
|||
ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
|
||||
MTR_MEMO_S_LOCK));
|
||||
|
||||
ut_a(flag == BTR_N_LEAF_PAGES || flag == BTR_TOTAL_SIZE);
|
||||
|
||||
if (index->page == FIL_NULL || dict_index_is_online_ddl(index)
|
||||
|| *index->name == TEMP_INDEX_PREFIX) {
|
||||
return(ULINT_UNDEFINED);
|
||||
|
|
@ -1208,21 +1237,16 @@ btr_get_size(
|
|||
|
||||
root = btr_root_get(index, mtr);
|
||||
|
||||
if (flag == BTR_N_LEAF_PAGES) {
|
||||
seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_LEAF;
|
||||
seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_LEAF;
|
||||
|
||||
fseg_n_reserved_pages(seg_header, &n, mtr);
|
||||
n = fseg_n_reserved_pages(seg_header, used, mtr);
|
||||
|
||||
} else if (flag == BTR_TOTAL_SIZE) {
|
||||
if (flag == BTR_TOTAL_SIZE) {
|
||||
seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_TOP;
|
||||
|
||||
n = fseg_n_reserved_pages(seg_header, &dummy, mtr);
|
||||
|
||||
seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_LEAF;
|
||||
|
||||
n += fseg_n_reserved_pages(seg_header, &dummy, mtr);
|
||||
} else {
|
||||
ut_error;
|
||||
*used += dummy;
|
||||
|
||||
}
|
||||
|
||||
return(n);
|
||||
|
|
@ -1971,7 +1995,7 @@ IBUF_BITMAP_FREE is unaffected by reorganization.
|
|||
|
||||
@retval true if the operation was successful
|
||||
@retval false if it is a compressed page, and recompression failed */
|
||||
static __attribute__((nonnull))
|
||||
UNIV_INTERN
|
||||
bool
|
||||
btr_page_reorganize_block(
|
||||
/*======================*/
|
||||
|
|
@ -2923,6 +2947,12 @@ func_start:
|
|||
new_page_zip = buf_block_get_page_zip(new_block);
|
||||
btr_page_create(new_block, new_page_zip, cursor->index,
|
||||
btr_page_get_level(page, mtr), mtr);
|
||||
/* Only record the leaf level page splits. */
|
||||
if (btr_page_get_level(page, mtr) == 0) {
|
||||
cursor->index->stat_defrag_n_page_split ++;
|
||||
cursor->index->stat_defrag_modified_counter ++;
|
||||
btr_defragment_save_defrag_stats_if_needed(cursor->index);
|
||||
}
|
||||
|
||||
/* 3. Calculate the first record on the upper half-page, and the
|
||||
first record (move_limit) on original page which ends up on the
|
||||
|
|
@ -3181,31 +3211,9 @@ func_exit:
|
|||
return(rec);
|
||||
}
|
||||
|
||||
#ifdef UNIV_SYNC_DEBUG
|
||||
/*************************************************************//**
|
||||
Removes a page from the level list of pages.
|
||||
@param space in: space where removed
|
||||
@param zip_size in: compressed page size in bytes, or 0 for uncompressed
|
||||
@param page in/out: page to remove
|
||||
@param index in: index tree
|
||||
@param mtr in/out: mini-transaction */
|
||||
# define btr_level_list_remove(space,zip_size,page,index,mtr) \
|
||||
btr_level_list_remove_func(space,zip_size,page,index,mtr)
|
||||
#else /* UNIV_SYNC_DEBUG */
|
||||
/*************************************************************//**
|
||||
Removes a page from the level list of pages.
|
||||
@param space in: space where removed
|
||||
@param zip_size in: compressed page size in bytes, or 0 for uncompressed
|
||||
@param page in/out: page to remove
|
||||
@param index in: index tree
|
||||
@param mtr in/out: mini-transaction */
|
||||
# define btr_level_list_remove(space,zip_size,page,index,mtr) \
|
||||
btr_level_list_remove_func(space,zip_size,page,mtr)
|
||||
#endif /* UNIV_SYNC_DEBUG */
|
||||
|
||||
/*************************************************************//**
|
||||
Removes a page from the level list of pages. */
|
||||
static __attribute__((nonnull))
|
||||
UNIV_INTERN
|
||||
void
|
||||
btr_level_list_remove_func(
|
||||
/*=======================*/
|
||||
|
|
@ -3377,7 +3385,7 @@ btr_node_ptr_delete(
|
|||
If page is the only on its level, this function moves its records to the
|
||||
father page, thus reducing the tree height.
|
||||
@return father block */
|
||||
static
|
||||
UNIV_INTERN
|
||||
buf_block_t*
|
||||
btr_lift_page_up(
|
||||
/*=============*/
|
||||
|
|
|
|||
814
storage/innobase/btr/btr0defragment.cc
Normal file
814
storage/innobase/btr/btr0defragment.cc
Normal file
|
|
@ -0,0 +1,814 @@
|
|||
/*****************************************************************************
|
||||
|
||||
Copyright (C) 2013, 2014 Facebook, Inc. All Rights Reserved.
|
||||
Copyright (C) 2014, SkySQL Ab. All Rights Reserved.
|
||||
|
||||
This program is free software; you can redistribute it and/or modify it under
|
||||
the terms of the GNU General Public License as published by the Free Software
|
||||
Foundation; version 2 of the License.
|
||||
|
||||
This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along with
|
||||
this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
|
||||
|
||||
*****************************************************************************/
|
||||
/**************************************************//**
|
||||
@file btr/btr0defragment.cc
|
||||
Index defragmentation.
|
||||
|
||||
Created 05/29/2014 Rongrong Zhong
|
||||
Modified 16/07/2014 Sunguck Lee
|
||||
Modified 30/07/2014 Jan Lindström jan.lindstrom@skysql.com
|
||||
*******************************************************/
|
||||
|
||||
#include "btr0defragment.h"
|
||||
#ifndef UNIV_HOTBACKUP
|
||||
#include "btr0cur.h"
|
||||
#include "btr0sea.h"
|
||||
#include "btr0pcur.h"
|
||||
#include "dict0stats.h"
|
||||
#include "dict0stats_bg.h"
|
||||
#include "ibuf0ibuf.h"
|
||||
#include "lock0lock.h"
|
||||
#include "srv0start.h"
|
||||
#include "ut0timer.h"
|
||||
|
||||
#include <list>
|
||||
|
||||
/**************************************************//**
|
||||
Custom nullptr implementation for under g++ 4.6
|
||||
*******************************************************/
|
||||
// #pragma once
|
||||
namespace std
|
||||
{
|
||||
// based on SC22/WG21/N2431 = J16/07-0301
|
||||
struct nullptr_t
|
||||
{
|
||||
template<typename any> operator any * () const
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
template<class any, typename T> operator T any:: * () const
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef _MSC_VER
|
||||
struct pad {};
|
||||
pad __[sizeof(void*)/sizeof(pad)];
|
||||
#else
|
||||
char __[sizeof(void*)];
|
||||
#endif
|
||||
private:
|
||||
// nullptr_t();// {}
|
||||
// nullptr_t(const nullptr_t&);
|
||||
// void operator = (const nullptr_t&);
|
||||
void operator &() const;
|
||||
template<typename any> void operator +(any) const
|
||||
{
|
||||
/*I Love MSVC 2005!*/
|
||||
}
|
||||
template<typename any> void operator -(any) const
|
||||
{
|
||||
/*I Love MSVC 2005!*/
|
||||
}
|
||||
};
|
||||
static const nullptr_t __nullptr = {};
|
||||
}
|
||||
|
||||
#ifndef nullptr
|
||||
#define nullptr std::__nullptr
|
||||
#endif
|
||||
/**************************************************//**
|
||||
End of Custom nullptr implementation for under g++ 4.6
|
||||
*******************************************************/
|
||||
|
||||
/* When there's no work, either because defragment is disabled, or because no
|
||||
query is submitted, thread checks state every BTR_DEFRAGMENT_SLEEP_IN_USECS.*/
|
||||
#define BTR_DEFRAGMENT_SLEEP_IN_USECS 1000000
|
||||
/* Reduce the target page size by this amount when compression failure happens
|
||||
during defragmentaiton. 512 is chosen because it's a power of 2 and it is about
|
||||
3% of the page size. When there are compression failures in defragmentation,
|
||||
our goal is to get a decent defrag ratio with as few compression failure as
|
||||
possible. From experimentation it seems that reduce the target size by 512 every
|
||||
time will make sure the page is compressible within a couple of iterations. */
|
||||
#define BTR_DEFRAGMENT_PAGE_REDUCTION_STEP_SIZE 512
|
||||
|
||||
/* Work queue for defragmentation. */
|
||||
typedef std::list<btr_defragment_item_t*> btr_defragment_wq_t;
|
||||
static btr_defragment_wq_t btr_defragment_wq;
|
||||
|
||||
/* Mutex protecting the defragmentation work queue.*/
|
||||
ib_mutex_t btr_defragment_mutex;
|
||||
#ifdef UNIV_PFS_MUTEX
|
||||
UNIV_INTERN mysql_pfs_key_t btr_defragment_mutex_key;
|
||||
#endif /* UNIV_PFS_MUTEX */
|
||||
|
||||
/* Number of compression failures caused by defragmentation since server
|
||||
start. */
|
||||
ulint btr_defragment_compression_failures = 0;
|
||||
/* Number of btr_defragment_n_pages calls that altered page but didn't
|
||||
manage to release any page. */
|
||||
ulint btr_defragment_failures = 0;
|
||||
/* Total number of btr_defragment_n_pages calls that altered page.
|
||||
The difference between btr_defragment_count and btr_defragment_failures shows
|
||||
the amount of effort wasted. */
|
||||
ulint btr_defragment_count = 0;
|
||||
|
||||
/******************************************************************//**
|
||||
Constructor for btr_defragment_item_t. */
|
||||
btr_defragment_item_t::btr_defragment_item_t(
|
||||
btr_pcur_t* pcur,
|
||||
os_event_t event)
|
||||
{
|
||||
this->pcur = pcur;
|
||||
this->event = event;
|
||||
this->removed = false;
|
||||
this->last_processed = 0;
|
||||
}
|
||||
|
||||
/******************************************************************//**
|
||||
Destructor for btr_defragment_item_t. */
|
||||
btr_defragment_item_t::~btr_defragment_item_t() {
|
||||
if (this->pcur) {
|
||||
btr_pcur_free_for_mysql(this->pcur);
|
||||
}
|
||||
if (this->event) {
|
||||
os_event_set(this->event);
|
||||
}
|
||||
}
|
||||
|
||||
/******************************************************************//**
|
||||
Initialize defragmentation. */
|
||||
void
|
||||
btr_defragment_init()
|
||||
{
|
||||
srv_defragment_interval = ut_microseconds_to_timer(
|
||||
1000000.0 / srv_defragment_frequency);
|
||||
mutex_create(btr_defragment_mutex_key, &btr_defragment_mutex,
|
||||
SYNC_ANY_LATCH);
|
||||
os_thread_create(btr_defragment_thread, NULL, NULL);
|
||||
}
|
||||
|
||||
/******************************************************************//**
|
||||
Shutdown defragmentation. Release all resources. */
|
||||
void
|
||||
btr_defragment_shutdown()
|
||||
{
|
||||
mutex_enter(&btr_defragment_mutex);
|
||||
list< btr_defragment_item_t* >::iterator iter = btr_defragment_wq.begin();
|
||||
while(iter != btr_defragment_wq.end()) {
|
||||
btr_defragment_item_t* item = *iter;
|
||||
iter = btr_defragment_wq.erase(iter);
|
||||
delete item;
|
||||
}
|
||||
mutex_exit(&btr_defragment_mutex);
|
||||
mutex_free(&btr_defragment_mutex);
|
||||
}
|
||||
|
||||
|
||||
/******************************************************************//**
|
||||
Functions used by the query threads: btr_defragment_xxx_index
|
||||
Query threads find/add/remove index. */
|
||||
/******************************************************************//**
|
||||
Check whether the given index is in btr_defragment_wq. We use index->id
|
||||
to identify indices. */
|
||||
bool
|
||||
btr_defragment_find_index(
|
||||
dict_index_t* index) /*!< Index to find. */
|
||||
{
|
||||
mutex_enter(&btr_defragment_mutex);
|
||||
for (list< btr_defragment_item_t* >::iterator iter = btr_defragment_wq.begin();
|
||||
iter != btr_defragment_wq.end();
|
||||
++iter) {
|
||||
btr_defragment_item_t* item = *iter;
|
||||
btr_pcur_t* pcur = item->pcur;
|
||||
btr_cur_t* cursor = btr_pcur_get_btr_cur(pcur);
|
||||
dict_index_t* idx = btr_cur_get_index(cursor);
|
||||
if (index->id == idx->id) {
|
||||
mutex_exit(&btr_defragment_mutex);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
mutex_exit(&btr_defragment_mutex);
|
||||
return false;
|
||||
}
|
||||
|
||||
/******************************************************************//**
|
||||
Query thread uses this function to add an index to btr_defragment_wq.
|
||||
Return a pointer to os_event for the query thread to wait on if this is a
|
||||
synchronized defragmentation. */
|
||||
os_event_t
|
||||
btr_defragment_add_index(
|
||||
dict_index_t* index, /*!< index to be added */
|
||||
bool async) /*!< whether this is an async defragmentation */
|
||||
{
|
||||
mtr_t mtr;
|
||||
ulint space = dict_index_get_space(index);
|
||||
ulint zip_size = dict_table_zip_size(index->table);
|
||||
ulint page_no = dict_index_get_page(index);
|
||||
mtr_start(&mtr);
|
||||
// Load index rood page.
|
||||
page_t* page = btr_page_get(space, zip_size, page_no,
|
||||
RW_NO_LATCH, index, &mtr);
|
||||
if (btr_page_get_level(page, &mtr) == 0) {
|
||||
// Index root is a leaf page, no need to defragment.
|
||||
mtr_commit(&mtr);
|
||||
return NULL;
|
||||
}
|
||||
btr_pcur_t* pcur = btr_pcur_create_for_mysql();
|
||||
os_event_t event = NULL;
|
||||
if (!async) {
|
||||
event = os_event_create();
|
||||
}
|
||||
btr_pcur_open_at_index_side(true, index, BTR_SEARCH_LEAF, pcur,
|
||||
true, 0, &mtr);
|
||||
btr_pcur_move_to_next(pcur, &mtr);
|
||||
btr_pcur_store_position(pcur, &mtr);
|
||||
mtr_commit(&mtr);
|
||||
dict_stats_empty_defrag_summary(index);
|
||||
btr_defragment_item_t* item = new btr_defragment_item_t(pcur, event);
|
||||
mutex_enter(&btr_defragment_mutex);
|
||||
btr_defragment_wq.push_back(item);
|
||||
mutex_exit(&btr_defragment_mutex);
|
||||
return event;
|
||||
}
|
||||
|
||||
/******************************************************************//**
|
||||
When table is dropped, this function is called to mark a table as removed in
|
||||
btr_efragment_wq. The difference between this function and the remove_index
|
||||
function is this will not NULL the event. */
|
||||
void
|
||||
btr_defragment_remove_table(
|
||||
dict_table_t* table) /*!< Index to be removed. */
|
||||
{
|
||||
mutex_enter(&btr_defragment_mutex);
|
||||
for (list< btr_defragment_item_t* >::iterator iter = btr_defragment_wq.begin();
|
||||
iter != btr_defragment_wq.end();
|
||||
++iter) {
|
||||
btr_defragment_item_t* item = *iter;
|
||||
btr_pcur_t* pcur = item->pcur;
|
||||
btr_cur_t* cursor = btr_pcur_get_btr_cur(pcur);
|
||||
dict_index_t* idx = btr_cur_get_index(cursor);
|
||||
if (table->id == idx->table->id) {
|
||||
item->removed = true;
|
||||
}
|
||||
}
|
||||
mutex_exit(&btr_defragment_mutex);
|
||||
}
|
||||
|
||||
/******************************************************************//**
|
||||
Query thread uses this function to mark an index as removed in
|
||||
btr_efragment_wq. */
|
||||
void
|
||||
btr_defragment_remove_index(
|
||||
dict_index_t* index) /*!< Index to be removed. */
|
||||
{
|
||||
mutex_enter(&btr_defragment_mutex);
|
||||
for (list< btr_defragment_item_t* >::iterator iter = btr_defragment_wq.begin();
|
||||
iter != btr_defragment_wq.end();
|
||||
++iter) {
|
||||
btr_defragment_item_t* item = *iter;
|
||||
btr_pcur_t* pcur = item->pcur;
|
||||
btr_cur_t* cursor = btr_pcur_get_btr_cur(pcur);
|
||||
dict_index_t* idx = btr_cur_get_index(cursor);
|
||||
if (index->id == idx->id) {
|
||||
item->removed = true;
|
||||
item->event = NULL;
|
||||
break;
|
||||
}
|
||||
}
|
||||
mutex_exit(&btr_defragment_mutex);
|
||||
}
|
||||
|
||||
/******************************************************************//**
|
||||
Functions used by defragmentation thread: btr_defragment_xxx_item.
|
||||
Defragmentation thread operates on the work *item*. It gets/removes
|
||||
item from the work queue. */
|
||||
/******************************************************************//**
|
||||
Defragment thread uses this to remove an item from btr_defragment_wq.
|
||||
When an item is removed from the work queue, all resources associated with it
|
||||
are free as well. */
|
||||
void
|
||||
btr_defragment_remove_item(
|
||||
btr_defragment_item_t* item) /*!< Item to be removed. */
|
||||
{
|
||||
mutex_enter(&btr_defragment_mutex);
|
||||
for (list< btr_defragment_item_t* >::iterator iter = btr_defragment_wq.begin();
|
||||
iter != btr_defragment_wq.end();
|
||||
++iter) {
|
||||
if (item == *iter) {
|
||||
btr_defragment_wq.erase(iter);
|
||||
delete item;
|
||||
break;
|
||||
}
|
||||
}
|
||||
mutex_exit(&btr_defragment_mutex);
|
||||
}
|
||||
|
||||
/******************************************************************//**
|
||||
Defragment thread uses this to get an item from btr_defragment_wq to work on.
|
||||
The item is not removed from the work queue so query threads can still access
|
||||
this item. We keep it this way so query threads can find and kill a
|
||||
defragmentation even if that index is being worked on. Be aware that while you
|
||||
work on this item you have no lock protection on it whatsoever. This is OK as
|
||||
long as the query threads and defragment thread won't modify the same fields
|
||||
without lock protection.
|
||||
*/
|
||||
btr_defragment_item_t*
|
||||
btr_defragment_get_item()
|
||||
{
|
||||
if (btr_defragment_wq.empty()) {
|
||||
return nullptr;
|
||||
}
|
||||
mutex_enter(&btr_defragment_mutex);
|
||||
list< btr_defragment_item_t* >::iterator iter = btr_defragment_wq.begin();
|
||||
if (iter == btr_defragment_wq.end()) {
|
||||
iter = btr_defragment_wq.begin();
|
||||
}
|
||||
btr_defragment_item_t* item = *iter;
|
||||
iter++;
|
||||
mutex_exit(&btr_defragment_mutex);
|
||||
return item;
|
||||
}
|
||||
|
||||
/*********************************************************************//**
|
||||
Check whether we should save defragmentation statistics to persistent storage.
|
||||
Currently we save the stats to persistent storage every 100 updates. */
|
||||
UNIV_INTERN
|
||||
void
|
||||
btr_defragment_save_defrag_stats_if_needed(
|
||||
dict_index_t* index) /*!< in: index */
|
||||
{
|
||||
if (srv_defragment_stats_accuracy != 0 // stats tracking disabled
|
||||
&& dict_index_get_space(index) != 0 // do not track system tables
|
||||
&& index->stat_defrag_modified_counter
|
||||
>= srv_defragment_stats_accuracy) {
|
||||
dict_stats_defrag_pool_add(index);
|
||||
index->stat_defrag_modified_counter = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/*********************************************************************//**
|
||||
Main defragment functionalities used by defragment thread.*/
|
||||
/*************************************************************//**
|
||||
Calculate number of records from beginning of block that can
|
||||
fit into size_limit
|
||||
@return number of records */
|
||||
UNIV_INTERN
|
||||
ulint
|
||||
btr_defragment_calc_n_recs_for_size(
|
||||
buf_block_t* block, /*!< in: B-tree page */
|
||||
dict_index_t* index, /*!< in: index of the page */
|
||||
ulint size_limit, /*!< in: size limit to fit records in */
|
||||
ulint* n_recs_size) /*!< out: actual size of the records that fit
|
||||
in size_limit. */
|
||||
{
|
||||
page_t* page = buf_block_get_frame(block);
|
||||
ulint n_recs = 0;
|
||||
ulint offsets_[REC_OFFS_NORMAL_SIZE];
|
||||
ulint* offsets = offsets_;
|
||||
rec_offs_init(offsets_);
|
||||
mem_heap_t* heap = NULL;
|
||||
ulint size = 0;
|
||||
page_cur_t cur;
|
||||
|
||||
page_cur_set_before_first(block, &cur);
|
||||
page_cur_move_to_next(&cur);
|
||||
while (page_cur_get_rec(&cur) != page_get_supremum_rec(page)) {
|
||||
rec_t* cur_rec = page_cur_get_rec(&cur);
|
||||
offsets = rec_get_offsets(cur_rec, index, offsets,
|
||||
ULINT_UNDEFINED, &heap);
|
||||
ulint rec_size = rec_offs_size(offsets);
|
||||
size += rec_size;
|
||||
if (size > size_limit) {
|
||||
size = size - rec_size;
|
||||
break;
|
||||
}
|
||||
n_recs ++;
|
||||
page_cur_move_to_next(&cur);
|
||||
}
|
||||
*n_recs_size = size;
|
||||
return n_recs;
|
||||
}
|
||||
|
||||
/*************************************************************//**
|
||||
Merge as many records from the from_block to the to_block. Delete
|
||||
the from_block if all records are successfully merged to to_block.
|
||||
@return the to_block to target for next merge operation. */
|
||||
UNIV_INTERN
|
||||
buf_block_t*
|
||||
btr_defragment_merge_pages(
|
||||
dict_index_t* index, /*!< in: index tree */
|
||||
buf_block_t* from_block, /*!< in: origin of merge */
|
||||
buf_block_t* to_block, /*!< in: destination of merge */
|
||||
ulint zip_size, /*!< in: zip size of the block */
|
||||
ulint reserved_space, /*!< in: space reserved for future
|
||||
insert to avoid immediate page split */
|
||||
ulint* max_data_size, /*!< in/out: max data size to
|
||||
fit in a single compressed page. */
|
||||
mem_heap_t* heap, /*!< in/out: pointer to memory heap */
|
||||
mtr_t* mtr) /*!< in/out: mini-transaction */
|
||||
{
|
||||
page_t* from_page = buf_block_get_frame(from_block);
|
||||
page_t* to_page = buf_block_get_frame(to_block);
|
||||
ulint space = dict_index_get_space(index);
|
||||
ulint level = btr_page_get_level(from_page, mtr);
|
||||
ulint n_recs = page_get_n_recs(from_page);
|
||||
ulint new_data_size = page_get_data_size(to_page);
|
||||
ulint max_ins_size =
|
||||
page_get_max_insert_size(to_page, n_recs);
|
||||
ulint max_ins_size_reorg =
|
||||
page_get_max_insert_size_after_reorganize(
|
||||
to_page, n_recs);
|
||||
ulint max_ins_size_to_use = max_ins_size_reorg > reserved_space
|
||||
? max_ins_size_reorg - reserved_space : 0;
|
||||
ulint move_size = 0;
|
||||
ulint n_recs_to_move = 0;
|
||||
rec_t* rec = NULL;
|
||||
ulint target_n_recs = 0;
|
||||
rec_t* orig_pred;
|
||||
|
||||
// Estimate how many records can be moved from the from_page to
|
||||
// the to_page.
|
||||
if (zip_size) {
|
||||
ulint page_diff = UNIV_PAGE_SIZE - *max_data_size;
|
||||
max_ins_size_to_use = (max_ins_size_to_use > page_diff)
|
||||
? max_ins_size_to_use - page_diff : 0;
|
||||
}
|
||||
n_recs_to_move = btr_defragment_calc_n_recs_for_size(
|
||||
from_block, index, max_ins_size_to_use, &move_size);
|
||||
|
||||
// If max_ins_size >= move_size, we can move the records without
|
||||
// reorganizing the page, otherwise we need to reorganize the page
|
||||
// first to release more space.
|
||||
if (move_size > max_ins_size) {
|
||||
if (!btr_page_reorganize_block(false, page_zip_level,
|
||||
to_block, index,
|
||||
mtr)) {
|
||||
if (!dict_index_is_clust(index)
|
||||
&& page_is_leaf(to_page)) {
|
||||
ibuf_reset_free_bits(to_block);
|
||||
}
|
||||
// If reorganization fails, that means page is
|
||||
// not compressable. There's no point to try
|
||||
// merging into this page. Continue to the
|
||||
// next page.
|
||||
return from_block;
|
||||
}
|
||||
ut_ad(page_validate(to_page, index));
|
||||
max_ins_size = page_get_max_insert_size(to_page, n_recs);
|
||||
ut_a(max_ins_size >= move_size);
|
||||
}
|
||||
|
||||
// Move records to pack to_page more full.
|
||||
orig_pred = NULL;
|
||||
target_n_recs = n_recs_to_move;
|
||||
while (n_recs_to_move > 0) {
|
||||
rec = page_rec_get_nth(from_page,
|
||||
n_recs_to_move + 1);
|
||||
orig_pred = page_copy_rec_list_start(
|
||||
to_block, from_block, rec, index, mtr);
|
||||
if (orig_pred)
|
||||
break;
|
||||
// If we reach here, that means compression failed after packing
|
||||
// n_recs_to_move number of records to to_page. We try to reduce
|
||||
// the targeted data size on the to_page by
|
||||
// BTR_DEFRAGMENT_PAGE_REDUCTION_STEP_SIZE and try again.
|
||||
os_atomic_increment_ulint(
|
||||
&btr_defragment_compression_failures, 1);
|
||||
max_ins_size_to_use =
|
||||
move_size > BTR_DEFRAGMENT_PAGE_REDUCTION_STEP_SIZE
|
||||
? move_size - BTR_DEFRAGMENT_PAGE_REDUCTION_STEP_SIZE
|
||||
: 0;
|
||||
if (max_ins_size_to_use == 0) {
|
||||
n_recs_to_move = 0;
|
||||
move_size = 0;
|
||||
break;
|
||||
}
|
||||
n_recs_to_move = btr_defragment_calc_n_recs_for_size(
|
||||
from_block, index, max_ins_size_to_use, &move_size);
|
||||
}
|
||||
// If less than target_n_recs are moved, it means there are
|
||||
// compression failures during page_copy_rec_list_start. Adjust
|
||||
// the max_data_size estimation to reduce compression failures
|
||||
// in the following runs.
|
||||
if (target_n_recs > n_recs_to_move
|
||||
&& *max_data_size > new_data_size + move_size) {
|
||||
*max_data_size = new_data_size + move_size;
|
||||
}
|
||||
// Set ibuf free bits if necessary.
|
||||
if (!dict_index_is_clust(index)
|
||||
&& page_is_leaf(to_page)) {
|
||||
if (zip_size) {
|
||||
ibuf_reset_free_bits(to_block);
|
||||
} else {
|
||||
ibuf_update_free_bits_if_full(
|
||||
to_block,
|
||||
UNIV_PAGE_SIZE,
|
||||
ULINT_UNDEFINED);
|
||||
}
|
||||
}
|
||||
if (n_recs_to_move == n_recs) {
|
||||
/* The whole page is merged with the previous page,
|
||||
free it. */
|
||||
lock_update_merge_left(to_block, orig_pred,
|
||||
from_block);
|
||||
btr_search_drop_page_hash_index(from_block);
|
||||
btr_level_list_remove(space, zip_size, from_page,
|
||||
index, mtr);
|
||||
btr_node_ptr_delete(index, from_block, mtr);
|
||||
btr_blob_dbg_remove(from_page, index,
|
||||
"btr_defragment_n_pages");
|
||||
btr_page_free(index, from_block, mtr);
|
||||
} else {
|
||||
// There are still records left on the page, so
|
||||
// increment n_defragmented. Node pointer will be changed
|
||||
// so remove the old node pointer.
|
||||
if (n_recs_to_move > 0) {
|
||||
// Part of the page is merged to left, remove
|
||||
// the merged records, update record locks and
|
||||
// node pointer.
|
||||
dtuple_t* node_ptr;
|
||||
page_delete_rec_list_start(rec, from_block,
|
||||
index, mtr);
|
||||
lock_update_split_and_merge(to_block,
|
||||
orig_pred,
|
||||
from_block);
|
||||
btr_node_ptr_delete(index, from_block, mtr);
|
||||
rec = page_rec_get_next(
|
||||
page_get_infimum_rec(from_page));
|
||||
node_ptr = dict_index_build_node_ptr(
|
||||
index, rec, page_get_page_no(from_page),
|
||||
heap, level + 1);
|
||||
btr_insert_on_non_leaf_level(0, index, level+1,
|
||||
node_ptr, mtr);
|
||||
}
|
||||
to_block = from_block;
|
||||
}
|
||||
return to_block;
|
||||
}
|
||||
|
||||
/*************************************************************//**
|
||||
Tries to merge N consecutive pages, starting from the page pointed by the
|
||||
cursor. Skip space 0. Only consider leaf pages.
|
||||
This function first loads all N pages into memory, then for each of
|
||||
the pages other than the first page, it tries to move as many records
|
||||
as possible to the left sibling to keep the left sibling full. During
|
||||
the process, if any page becomes empty, that page will be removed from
|
||||
the level list. Record locks, hash, and node pointers are updated after
|
||||
page reorganization.
|
||||
@return pointer to the last block processed, or NULL if reaching end of index */
|
||||
UNIV_INTERN
|
||||
buf_block_t*
|
||||
btr_defragment_n_pages(
|
||||
buf_block_t* block, /*!< in: starting block for defragmentation */
|
||||
dict_index_t* index, /*!< in: index tree */
|
||||
uint n_pages,/*!< in: number of pages to defragment */
|
||||
mtr_t* mtr) /*!< in/out: mini-transaction */
|
||||
{
|
||||
ulint space;
|
||||
ulint zip_size;
|
||||
/* We will need to load the n+1 block because if the last page is freed
|
||||
and we need to modify the prev_page_no of that block. */
|
||||
buf_block_t* blocks[BTR_DEFRAGMENT_MAX_N_PAGES + 1];
|
||||
page_t* first_page;
|
||||
buf_block_t* current_block;
|
||||
ulint total_data_size = 0;
|
||||
ulint total_n_recs = 0;
|
||||
ulint data_size_per_rec;
|
||||
ulint optimal_page_size;
|
||||
ulint reserved_space;
|
||||
ulint level;
|
||||
ulint max_data_size = 0;
|
||||
uint n_defragmented = 0;
|
||||
uint n_new_slots;
|
||||
mem_heap_t* heap;
|
||||
ibool end_of_index = FALSE;
|
||||
|
||||
/* It doesn't make sense to call this function with n_pages = 1. */
|
||||
ut_ad(n_pages > 1);
|
||||
|
||||
ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
|
||||
MTR_MEMO_X_LOCK));
|
||||
space = dict_index_get_space(index);
|
||||
if (space == 0) {
|
||||
/* Ignore space 0. */
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (n_pages > BTR_DEFRAGMENT_MAX_N_PAGES) {
|
||||
n_pages = BTR_DEFRAGMENT_MAX_N_PAGES;
|
||||
}
|
||||
|
||||
zip_size = dict_table_zip_size(index->table);
|
||||
first_page = buf_block_get_frame(block);
|
||||
level = btr_page_get_level(first_page, mtr);
|
||||
|
||||
if (level != 0) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* 1. Load the pages and calculate the total data size. */
|
||||
blocks[0] = block;
|
||||
for (uint i = 1; i <= n_pages; i++) {
|
||||
page_t* page = buf_block_get_frame(blocks[i-1]);
|
||||
ulint page_no = btr_page_get_next(page, mtr);
|
||||
total_data_size += page_get_data_size(page);
|
||||
total_n_recs += page_get_n_recs(page);
|
||||
if (page_no == FIL_NULL) {
|
||||
n_pages = i;
|
||||
end_of_index = TRUE;
|
||||
break;
|
||||
}
|
||||
blocks[i] = btr_block_get(space, zip_size, page_no,
|
||||
RW_X_LATCH, index, mtr);
|
||||
}
|
||||
|
||||
if (n_pages == 1) {
|
||||
if (btr_page_get_prev(first_page, mtr) == FIL_NULL) {
|
||||
/* last page in the index */
|
||||
if (dict_index_get_page(index)
|
||||
== page_get_page_no(first_page))
|
||||
return NULL;
|
||||
/* given page is the last page.
|
||||
Lift the records to father. */
|
||||
btr_lift_page_up(index, block, mtr);
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* 2. Calculate how many pages data can fit in. If not compressable,
|
||||
return early. */
|
||||
ut_a(total_n_recs != 0);
|
||||
data_size_per_rec = total_data_size / total_n_recs;
|
||||
// For uncompressed pages, the optimal data size if the free space of a
|
||||
// empty page.
|
||||
optimal_page_size = page_get_free_space_of_empty(
|
||||
page_is_comp(first_page));
|
||||
// For compressed pages, we take compression failures into account.
|
||||
if (zip_size) {
|
||||
ulint size = 0;
|
||||
int i = 0;
|
||||
// We estimate the optimal data size of the index use samples of
|
||||
// data size. These samples are taken when pages failed to
|
||||
// compress due to insertion on the page. We use the average
|
||||
// of all samples we have as the estimation. Different pages of
|
||||
// the same index vary in compressibility. Average gives a good
|
||||
// enough estimation.
|
||||
for (;i < STAT_DEFRAG_DATA_SIZE_N_SAMPLE; i++) {
|
||||
if (index->stat_defrag_data_size_sample[i] == 0) {
|
||||
break;
|
||||
}
|
||||
size += index->stat_defrag_data_size_sample[i];
|
||||
}
|
||||
if (i != 0) {
|
||||
size = size / i;
|
||||
optimal_page_size = min(optimal_page_size, size);
|
||||
}
|
||||
max_data_size = optimal_page_size;
|
||||
}
|
||||
|
||||
reserved_space = min((ulint)(optimal_page_size
|
||||
* (1 - srv_defragment_fill_factor)),
|
||||
(data_size_per_rec
|
||||
* srv_defragment_fill_factor_n_recs));
|
||||
optimal_page_size -= reserved_space;
|
||||
n_new_slots = (total_data_size + optimal_page_size - 1)
|
||||
/ optimal_page_size;
|
||||
if (n_new_slots >= n_pages) {
|
||||
/* Can't defragment. */
|
||||
if (end_of_index)
|
||||
return NULL;
|
||||
return blocks[n_pages-1];
|
||||
}
|
||||
|
||||
/* 3. Defragment pages. */
|
||||
heap = mem_heap_create(256);
|
||||
// First defragmented page will be the first page.
|
||||
current_block = blocks[0];
|
||||
// Start from the second page.
|
||||
for (uint i = 1; i < n_pages; i ++) {
|
||||
buf_block_t* new_block = btr_defragment_merge_pages(
|
||||
index, blocks[i], current_block, zip_size,
|
||||
reserved_space, &max_data_size, heap, mtr);
|
||||
if (new_block != current_block) {
|
||||
n_defragmented ++;
|
||||
current_block = new_block;
|
||||
}
|
||||
}
|
||||
mem_heap_free(heap);
|
||||
n_defragmented ++;
|
||||
os_atomic_increment_ulint(
|
||||
&btr_defragment_count, 1);
|
||||
if (n_pages == n_defragmented) {
|
||||
os_atomic_increment_ulint(
|
||||
&btr_defragment_failures, 1);
|
||||
} else {
|
||||
index->stat_defrag_n_pages_freed += (n_pages - n_defragmented);
|
||||
}
|
||||
if (end_of_index)
|
||||
return NULL;
|
||||
return current_block;
|
||||
}
|
||||
|
||||
/******************************************************************//**
|
||||
Thread that merges consecutive b-tree pages into fewer pages to defragment
|
||||
the index. */
|
||||
extern "C" UNIV_INTERN
|
||||
os_thread_ret_t
|
||||
DECLARE_THREAD(btr_defragment_thread)(
|
||||
/*==========================================*/
|
||||
void* arg) /*!< in: work queue */
|
||||
{
|
||||
btr_pcur_t* pcur;
|
||||
btr_cur_t* cursor;
|
||||
dict_index_t* index;
|
||||
mtr_t mtr;
|
||||
buf_block_t* first_block;
|
||||
buf_block_t* last_block;
|
||||
|
||||
while (srv_shutdown_state == SRV_SHUTDOWN_NONE) {
|
||||
/* If defragmentation is disabled, sleep before
|
||||
checking whether it's enabled. */
|
||||
if (!srv_defragment) {
|
||||
os_thread_sleep(BTR_DEFRAGMENT_SLEEP_IN_USECS);
|
||||
continue;
|
||||
}
|
||||
/* The following call won't remove the item from work queue.
|
||||
We only get a pointer to it to work on. This will make sure
|
||||
when user issue a kill command, all indices are in the work
|
||||
queue to be searched. This also means that the user thread
|
||||
cannot directly remove the item from queue (since we might be
|
||||
using it). So user thread only marks index as removed. */
|
||||
btr_defragment_item_t* item = btr_defragment_get_item();
|
||||
/* If work queue is empty, sleep and check later. */
|
||||
if (!item) {
|
||||
os_thread_sleep(BTR_DEFRAGMENT_SLEEP_IN_USECS);
|
||||
continue;
|
||||
}
|
||||
/* If an index is marked as removed, we remove it from the work
|
||||
queue. No other thread could be using this item at this point so
|
||||
it's safe to remove now. */
|
||||
if (item->removed) {
|
||||
btr_defragment_remove_item(item);
|
||||
continue;
|
||||
}
|
||||
|
||||
pcur = item->pcur;
|
||||
ulonglong now = ut_timer_now();
|
||||
ulonglong elapsed = now - item->last_processed;
|
||||
|
||||
if (elapsed < srv_defragment_interval) {
|
||||
/* If we see an index again before the interval
|
||||
determined by the configured frequency is reached,
|
||||
we just sleep until the interval pass. Since
|
||||
defragmentation of all indices queue up on a single
|
||||
thread, it's likely other indices that follow this one
|
||||
don't need to sleep again. */
|
||||
os_thread_sleep(((ulint)ut_timer_to_microseconds(
|
||||
srv_defragment_interval - elapsed)));
|
||||
}
|
||||
|
||||
now = ut_timer_now();
|
||||
mtr_start(&mtr);
|
||||
btr_pcur_restore_position(BTR_MODIFY_TREE, pcur, &mtr);
|
||||
cursor = btr_pcur_get_btr_cur(pcur);
|
||||
index = btr_cur_get_index(cursor);
|
||||
first_block = btr_cur_get_block(cursor);
|
||||
last_block = btr_defragment_n_pages(first_block, index,
|
||||
srv_defragment_n_pages,
|
||||
&mtr);
|
||||
if (last_block) {
|
||||
/* If we haven't reached the end of the index,
|
||||
place the cursor on the last record of last page,
|
||||
store the cursor position, and put back in queue. */
|
||||
page_t* last_page = buf_block_get_frame(last_block);
|
||||
rec_t* rec = page_rec_get_prev(
|
||||
page_get_supremum_rec(last_page));
|
||||
ut_a(page_rec_is_user_rec(rec));
|
||||
page_cur_position(rec, last_block,
|
||||
btr_cur_get_page_cur(cursor));
|
||||
btr_pcur_store_position(pcur, &mtr);
|
||||
mtr_commit(&mtr);
|
||||
/* Update the last_processed time of this index. */
|
||||
item->last_processed = now;
|
||||
} else {
|
||||
mtr_commit(&mtr);
|
||||
/* Reaching the end of the index. */
|
||||
dict_stats_empty_defrag_stats(index);
|
||||
dict_stats_save_defrag_stats(index);
|
||||
dict_stats_save_defrag_summary(index);
|
||||
btr_defragment_remove_item(item);
|
||||
}
|
||||
}
|
||||
btr_defragment_shutdown();
|
||||
os_thread_exit(NULL);
|
||||
OS_THREAD_DUMMY_RETURN;
|
||||
}
|
||||
|
||||
#endif /* !UNIV_HOTBACKUP */
|
||||
|
|
@ -408,7 +408,7 @@ dict_table_try_drop_aborted(
|
|||
|
||||
if (table == NULL) {
|
||||
table = dict_table_open_on_id_low(
|
||||
table_id, DICT_ERR_IGNORE_NONE);
|
||||
table_id, DICT_ERR_IGNORE_NONE, FALSE);
|
||||
} else {
|
||||
ut_ad(table->id == table_id);
|
||||
}
|
||||
|
|
@ -795,7 +795,8 @@ dict_table_open_on_id(
|
|||
table_id,
|
||||
table_op == DICT_TABLE_OP_LOAD_TABLESPACE
|
||||
? DICT_ERR_IGNORE_RECOVER_LOCK
|
||||
: DICT_ERR_IGNORE_NONE);
|
||||
: DICT_ERR_IGNORE_NONE,
|
||||
table_op == DICT_TABLE_OP_OPEN_ONLY_IF_CACHED);
|
||||
|
||||
if (table != NULL) {
|
||||
|
||||
|
|
@ -1313,7 +1314,7 @@ dict_table_move_from_non_lru_to_lru(
|
|||
/**********************************************************************//**
|
||||
Looks for an index with the given id given a table instance.
|
||||
@return index or NULL */
|
||||
static
|
||||
UNIV_INTERN
|
||||
dict_index_t*
|
||||
dict_table_find_index_on_id(
|
||||
/*========================*/
|
||||
|
|
@ -2408,6 +2409,13 @@ undo_size_ok:
|
|||
new_index->stat_index_size = 1;
|
||||
new_index->stat_n_leaf_pages = 1;
|
||||
|
||||
new_index->stat_defrag_n_pages_freed = 0;
|
||||
new_index->stat_defrag_n_page_split = 0;
|
||||
|
||||
new_index->stat_defrag_sample_next_slot = 0;
|
||||
memset(&new_index->stat_defrag_data_size_sample,
|
||||
0x0, sizeof(ulint) * STAT_DEFRAG_DATA_SIZE_N_SAMPLE);
|
||||
|
||||
/* Add the new index as the last index for the table */
|
||||
|
||||
UT_LIST_ADD_LAST(indexes, table->indexes, new_index);
|
||||
|
|
|
|||
|
|
@ -492,6 +492,9 @@ dict_stats_table_clone_create(
|
|||
heap,
|
||||
idx->n_uniq * sizeof(idx->stat_n_non_null_key_vals[0]));
|
||||
ut_d(idx->magic_n = DICT_INDEX_MAGIC_N);
|
||||
|
||||
idx->stat_defrag_n_page_split = 0;
|
||||
idx->stat_defrag_n_pages_freed = 0;
|
||||
}
|
||||
|
||||
ut_d(t->magic_n = DICT_TABLE_MAGIC_N);
|
||||
|
|
@ -520,7 +523,9 @@ static
|
|||
void
|
||||
dict_stats_empty_index(
|
||||
/*===================*/
|
||||
dict_index_t* index) /*!< in/out: index */
|
||||
dict_index_t* index, /*!< in/out: index */
|
||||
bool empty_defrag_stats)
|
||||
/*!< in: whether to empty defrag stats */
|
||||
{
|
||||
ut_ad(!(index->type & DICT_FTS));
|
||||
ut_ad(!dict_index_is_univ(index));
|
||||
|
|
@ -535,6 +540,34 @@ dict_stats_empty_index(
|
|||
|
||||
index->stat_index_size = 1;
|
||||
index->stat_n_leaf_pages = 1;
|
||||
|
||||
if (empty_defrag_stats) {
|
||||
dict_stats_empty_defrag_stats(index);
|
||||
dict_stats_empty_defrag_summary(index);
|
||||
}
|
||||
}
|
||||
|
||||
/**********************************************************************//**
|
||||
Clear defragmentation summary. */
|
||||
UNIV_INTERN
|
||||
void
|
||||
dict_stats_empty_defrag_summary(
|
||||
/*==================*/
|
||||
dict_index_t* index) /*!< in: index to clear defragmentation stats */
|
||||
{
|
||||
index->stat_defrag_n_pages_freed = 0;
|
||||
}
|
||||
|
||||
/**********************************************************************//**
|
||||
Clear defragmentation related index stats. */
|
||||
UNIV_INTERN
|
||||
void
|
||||
dict_stats_empty_defrag_stats(
|
||||
/*==================*/
|
||||
dict_index_t* index) /*!< in: index to clear defragmentation stats */
|
||||
{
|
||||
index->stat_defrag_modified_counter = 0;
|
||||
index->stat_defrag_n_page_split = 0;
|
||||
}
|
||||
|
||||
/*********************************************************************//**
|
||||
|
|
@ -544,7 +577,9 @@ static
|
|||
void
|
||||
dict_stats_empty_table(
|
||||
/*===================*/
|
||||
dict_table_t* table) /*!< in/out: table */
|
||||
dict_table_t* table, /*!< in/out: table */
|
||||
bool empty_defrag_stats)
|
||||
/*!< in: whether to empty defrag stats */
|
||||
{
|
||||
/* Zero the stats members */
|
||||
|
||||
|
|
@ -569,7 +604,7 @@ dict_stats_empty_table(
|
|||
|
||||
ut_ad(!dict_index_is_univ(index));
|
||||
|
||||
dict_stats_empty_index(index);
|
||||
dict_stats_empty_index(index, empty_defrag_stats);
|
||||
}
|
||||
|
||||
table->stat_initialized = TRUE;
|
||||
|
|
@ -704,7 +739,7 @@ dict_stats_copy(
|
|||
}
|
||||
|
||||
if (!INDEX_EQ(src_idx, dst_idx)) {
|
||||
dict_stats_empty_index(dst_idx);
|
||||
dict_stats_empty_index(dst_idx, true);
|
||||
continue;
|
||||
}
|
||||
|
||||
|
|
@ -715,7 +750,7 @@ dict_stats_copy(
|
|||
/* Since src is smaller some elements in dst
|
||||
will remain untouched by the following memmove(),
|
||||
thus we init all of them here. */
|
||||
dict_stats_empty_index(dst_idx);
|
||||
dict_stats_empty_index(dst_idx, true);
|
||||
} else {
|
||||
n_copy_el = dst_idx->n_uniq;
|
||||
}
|
||||
|
|
@ -735,6 +770,13 @@ dict_stats_copy(
|
|||
dst_idx->stat_index_size = src_idx->stat_index_size;
|
||||
|
||||
dst_idx->stat_n_leaf_pages = src_idx->stat_n_leaf_pages;
|
||||
|
||||
dst_idx->stat_defrag_modified_counter =
|
||||
src_idx->stat_defrag_modified_counter;
|
||||
dst_idx->stat_defrag_n_pages_freed =
|
||||
src_idx->stat_defrag_n_pages_freed;
|
||||
dst_idx->stat_defrag_n_page_split =
|
||||
src_idx->stat_defrag_n_page_split;
|
||||
}
|
||||
|
||||
dst->stat_initialized = TRUE;
|
||||
|
|
@ -758,6 +800,9 @@ dict_index_t::stat_n_sample_sizes[]
|
|||
dict_index_t::stat_n_non_null_key_vals[]
|
||||
dict_index_t::stat_index_size
|
||||
dict_index_t::stat_n_leaf_pages
|
||||
dict_index_t::stat_defrag_modified_counter
|
||||
dict_index_t::stat_defrag_n_pages_freed
|
||||
dict_index_t::stat_defrag_n_page_split
|
||||
The returned object should be freed with dict_stats_snapshot_free()
|
||||
when no longer needed.
|
||||
@return incomplete table object */
|
||||
|
|
@ -807,7 +852,9 @@ dict_stats_snapshot_free(
|
|||
Calculates new estimates for index statistics. This function is
|
||||
relatively quick and is used to calculate transient statistics that
|
||||
are not saved on disk. This was the only way to calculate statistics
|
||||
before the Persistent Statistics feature was introduced. */
|
||||
before the Persistent Statistics feature was introduced.
|
||||
This function doesn't update the defragmentation related stats.
|
||||
Only persistent statistics supports defragmentation stats. */
|
||||
static
|
||||
void
|
||||
dict_stats_update_transient_for_index(
|
||||
|
|
@ -823,10 +870,10 @@ dict_stats_update_transient_for_index(
|
|||
Initialize some bogus index cardinality
|
||||
statistics, so that the data can be queried in
|
||||
various means, also via secondary indexes. */
|
||||
dict_stats_empty_index(index);
|
||||
dict_stats_empty_index(index, false);
|
||||
#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
|
||||
} else if (ibuf_debug && !dict_index_is_clust(index)) {
|
||||
dict_stats_empty_index(index);
|
||||
dict_stats_empty_index(index, false);
|
||||
#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
|
||||
} else {
|
||||
mtr_t mtr;
|
||||
|
|
@ -847,7 +894,7 @@ dict_stats_update_transient_for_index(
|
|||
|
||||
switch (size) {
|
||||
case ULINT_UNDEFINED:
|
||||
dict_stats_empty_index(index);
|
||||
dict_stats_empty_index(index, false);
|
||||
return;
|
||||
case 0:
|
||||
/* The root node of the tree is a leaf */
|
||||
|
|
@ -882,7 +929,7 @@ dict_stats_update_transient(
|
|||
|
||||
if (dict_table_is_discarded(table)) {
|
||||
/* Nothing to do. */
|
||||
dict_stats_empty_table(table);
|
||||
dict_stats_empty_table(table, false);
|
||||
return;
|
||||
} else if (index == NULL) {
|
||||
/* Table definition is corrupt */
|
||||
|
|
@ -892,7 +939,7 @@ dict_stats_update_transient(
|
|||
fprintf(stderr, " InnoDB: table %s has no indexes. "
|
||||
"Cannot calculate statistics.\n",
|
||||
ut_format_name(table->name, TRUE, buf, sizeof(buf)));
|
||||
dict_stats_empty_table(table);
|
||||
dict_stats_empty_table(table, false);
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
@ -904,7 +951,7 @@ dict_stats_update_transient(
|
|||
continue;
|
||||
}
|
||||
|
||||
dict_stats_empty_index(index);
|
||||
dict_stats_empty_index(index, false);
|
||||
|
||||
if (dict_stats_should_ignore_index(index)) {
|
||||
continue;
|
||||
|
|
@ -1794,7 +1841,7 @@ dict_stats_analyze_index(
|
|||
|
||||
DEBUG_PRINTF(" %s(index=%s)\n", __func__, index->name);
|
||||
|
||||
dict_stats_empty_index(index);
|
||||
dict_stats_empty_index(index, false);
|
||||
|
||||
mtr_start(&mtr);
|
||||
|
||||
|
|
@ -2059,7 +2106,7 @@ dict_stats_update_persistent(
|
|||
|
||||
/* Table definition is corrupt */
|
||||
dict_table_stats_unlock(table, RW_X_LATCH);
|
||||
dict_stats_empty_table(table);
|
||||
dict_stats_empty_table(table, true);
|
||||
|
||||
return(DB_CORRUPTION);
|
||||
}
|
||||
|
|
@ -2088,7 +2135,7 @@ dict_stats_update_persistent(
|
|||
continue;
|
||||
}
|
||||
|
||||
dict_stats_empty_index(index);
|
||||
dict_stats_empty_index(index, false);
|
||||
|
||||
if (dict_stats_should_ignore_index(index)) {
|
||||
continue;
|
||||
|
|
@ -2657,6 +2704,16 @@ dict_stats_fetch_index_stats_step(
|
|||
== 0) {
|
||||
index->stat_n_leaf_pages = (ulint) stat_value;
|
||||
arg->stats_were_modified = true;
|
||||
} else if (stat_name_len == 12 /* strlen("n_page_split") */
|
||||
&& strncasecmp("n_page_split", stat_name, stat_name_len)
|
||||
== 0) {
|
||||
index->stat_defrag_n_page_split = (ulint) stat_value;
|
||||
arg->stats_were_modified = true;
|
||||
} else if (stat_name_len == 13 /* strlen("n_pages_freed") */
|
||||
&& strncasecmp("n_pages_freed", stat_name, stat_name_len)
|
||||
== 0) {
|
||||
index->stat_defrag_n_pages_freed = (ulint) stat_value;
|
||||
arg->stats_were_modified = true;
|
||||
} else if (stat_name_len > PFX_LEN /* e.g. stat_name=="n_diff_pfx01" */
|
||||
&& strncasecmp(PFX, stat_name, PFX_LEN) == 0) {
|
||||
|
||||
|
|
@ -2776,7 +2833,7 @@ dict_stats_fetch_from_ps(
|
|||
the persistent storage contains incomplete stats (e.g. missing stats
|
||||
for some index) then we would end up with (partially) uninitialized
|
||||
stats. */
|
||||
dict_stats_empty_table(table);
|
||||
dict_stats_empty_table(table, true);
|
||||
|
||||
trx = trx_allocate_for_background();
|
||||
|
||||
|
|
@ -2877,6 +2934,22 @@ dict_stats_fetch_from_ps(
|
|||
return(ret);
|
||||
}
|
||||
|
||||
/*********************************************************************//**
|
||||
Clear defragmentation stats modified counter for all indices in table. */
|
||||
static
|
||||
void
|
||||
dict_stats_empty_defrag_modified_counter(
|
||||
dict_table_t* table) /*!< in: table */
|
||||
{
|
||||
dict_index_t* index;
|
||||
ut_a(table);
|
||||
for (index = dict_table_get_first_index(table);
|
||||
index != NULL;
|
||||
index = dict_table_get_next_index(index)) {
|
||||
index->stat_defrag_modified_counter = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/*********************************************************************//**
|
||||
Fetches or calculates new estimates for index statistics. */
|
||||
UNIV_INTERN
|
||||
|
|
@ -2949,13 +3022,13 @@ dict_stats_update(
|
|||
"because the .ibd file is missing. For help, please "
|
||||
"refer to " REFMAN "innodb-troubleshooting.html\n",
|
||||
ut_format_name(table->name, TRUE, buf, sizeof(buf)));
|
||||
dict_stats_empty_table(table);
|
||||
dict_stats_empty_table(table, true);
|
||||
return(DB_TABLESPACE_DELETED);
|
||||
} else if (srv_force_recovery >= SRV_FORCE_NO_IBUF_MERGE) {
|
||||
/* If we have set a high innodb_force_recovery level, do
|
||||
not calculate statistics, as a badly corrupted index can
|
||||
cause a crash in it. */
|
||||
dict_stats_empty_table(table);
|
||||
dict_stats_empty_table(table, false);
|
||||
return(DB_SUCCESS);
|
||||
}
|
||||
|
||||
|
|
@ -3014,7 +3087,7 @@ dict_stats_update(
|
|||
|
||||
case DICT_STATS_EMPTY_TABLE:
|
||||
|
||||
dict_stats_empty_table(table);
|
||||
dict_stats_empty_table(table, true);
|
||||
|
||||
/* If table is using persistent stats,
|
||||
then save the stats on disk */
|
||||
|
|
@ -3073,6 +3146,7 @@ dict_stats_update(
|
|||
|
||||
t->stats_last_recalc = table->stats_last_recalc;
|
||||
t->stat_modified_counter = 0;
|
||||
dict_stats_empty_defrag_modified_counter(t);
|
||||
|
||||
switch (err) {
|
||||
case DB_SUCCESS:
|
||||
|
|
@ -3083,7 +3157,7 @@ dict_stats_update(
|
|||
copying because dict_stats_table_clone_create() does
|
||||
skip corrupted indexes so our dummy object 't' may
|
||||
have less indexes than the real object 'table'. */
|
||||
dict_stats_empty_table(table);
|
||||
dict_stats_empty_table(table, true);
|
||||
|
||||
dict_stats_copy(table, t);
|
||||
|
||||
|
|
@ -3650,6 +3724,117 @@ dict_stats_rename_table(
|
|||
return(ret);
|
||||
}
|
||||
|
||||
/*********************************************************************//**
|
||||
Save defragmentation result.
|
||||
@return DB_SUCCESS or error code */
|
||||
UNIV_INTERN
|
||||
dberr_t
|
||||
dict_stats_save_defrag_summary(
|
||||
dict_index_t* index) /*!< in: index */
|
||||
{
|
||||
dberr_t ret;
|
||||
lint now = (lint) ut_time();
|
||||
if (dict_index_is_univ(index)) {
|
||||
return DB_SUCCESS;
|
||||
}
|
||||
rw_lock_x_lock(&dict_operation_lock);
|
||||
mutex_enter(&dict_sys->mutex);
|
||||
ret = dict_stats_save_index_stat(index, now, "n_pages_freed",
|
||||
index->stat_defrag_n_pages_freed,
|
||||
NULL,
|
||||
"Number of pages freed during"
|
||||
" last defragmentation run.",
|
||||
NULL);
|
||||
|
||||
mutex_exit(&dict_sys->mutex);
|
||||
rw_lock_x_unlock(&dict_operation_lock);
|
||||
return (ret);
|
||||
}
|
||||
|
||||
/*********************************************************************//**
|
||||
Save defragmentation stats for a given index.
|
||||
@return DB_SUCCESS or error code */
|
||||
UNIV_INTERN
|
||||
dberr_t
|
||||
dict_stats_save_defrag_stats(
|
||||
dict_index_t* index) /*!< in: index */
|
||||
{
|
||||
dberr_t ret;
|
||||
|
||||
if (index->table->ibd_file_missing) {
|
||||
ut_print_timestamp(stderr);
|
||||
fprintf(stderr,
|
||||
" InnoDB: Cannot save defragment stats because "
|
||||
".ibd file is missing.\n");
|
||||
return (DB_TABLESPACE_DELETED);
|
||||
}
|
||||
if (dict_index_is_corrupted(index)) {
|
||||
ut_print_timestamp(stderr);
|
||||
fprintf(stderr,
|
||||
" InnoDB: Cannot save defragment stats because "
|
||||
"index is corrupted.\n");
|
||||
return(DB_CORRUPTION);
|
||||
}
|
||||
|
||||
if (dict_index_is_univ(index)) {
|
||||
return DB_SUCCESS;
|
||||
}
|
||||
|
||||
lint now = (lint) ut_time();
|
||||
mtr_t mtr;
|
||||
ulint n_leaf_pages;
|
||||
ulint n_leaf_reserved;
|
||||
mtr_start(&mtr);
|
||||
mtr_s_lock(dict_index_get_lock(index), &mtr);
|
||||
n_leaf_reserved = btr_get_size_and_reserved(index, BTR_N_LEAF_PAGES,
|
||||
&n_leaf_pages, &mtr);
|
||||
mtr_commit(&mtr);
|
||||
|
||||
if (n_leaf_reserved == ULINT_UNDEFINED) {
|
||||
// The index name is different during fast index creation,
|
||||
// so the stats won't be associated with the right index
|
||||
// for later use. We just return without saving.
|
||||
return DB_SUCCESS;
|
||||
}
|
||||
|
||||
rw_lock_x_lock(&dict_operation_lock);
|
||||
|
||||
mutex_enter(&dict_sys->mutex);
|
||||
ret = dict_stats_save_index_stat(index, now, "n_page_split",
|
||||
index->stat_defrag_n_page_split,
|
||||
NULL,
|
||||
"Number of new page splits on leaves"
|
||||
" since last defragmentation.",
|
||||
NULL);
|
||||
if (ret != DB_SUCCESS) {
|
||||
goto end;
|
||||
}
|
||||
|
||||
ret = dict_stats_save_index_stat(
|
||||
index, now, "n_leaf_pages_defrag",
|
||||
n_leaf_pages,
|
||||
NULL,
|
||||
"Number of leaf pages when this stat is saved to disk",
|
||||
NULL);
|
||||
if (ret != DB_SUCCESS) {
|
||||
goto end;
|
||||
}
|
||||
|
||||
ret = dict_stats_save_index_stat(
|
||||
index, now, "n_leaf_pages_reserved",
|
||||
n_leaf_reserved,
|
||||
NULL,
|
||||
"Number of pages reserved for this index leaves when this stat "
|
||||
"is saved to disk",
|
||||
NULL);
|
||||
|
||||
end:
|
||||
mutex_exit(&dict_sys->mutex);
|
||||
rw_lock_x_unlock(&dict_operation_lock);
|
||||
|
||||
return (ret);
|
||||
}
|
||||
|
||||
/* tests @{ */
|
||||
#ifdef UNIV_COMPILE_TEST_FUNCS
|
||||
|
||||
|
|
|
|||
|
|
@ -25,6 +25,7 @@ Created Apr 25, 2012 Vasil Dimov
|
|||
|
||||
#include "row0mysql.h"
|
||||
#include "srv0start.h"
|
||||
#include "dict0dict.h"
|
||||
#include "dict0stats.h"
|
||||
#include "dict0stats_bg.h"
|
||||
|
||||
|
|
@ -44,8 +45,10 @@ UNIV_INTERN os_event_t dict_stats_event = NULL;
|
|||
|
||||
/** This mutex protects the "recalc_pool" variable. */
|
||||
static ib_mutex_t recalc_pool_mutex;
|
||||
static ib_mutex_t defrag_pool_mutex;
|
||||
#ifdef HAVE_PSI_INTERFACE
|
||||
static mysql_pfs_key_t recalc_pool_mutex_key;
|
||||
static mysql_pfs_key_t defrag_pool_mutex_key;
|
||||
#endif /* HAVE_PSI_INTERFACE */
|
||||
|
||||
/** The number of tables that can be added to "recalc_pool" before
|
||||
|
|
@ -59,16 +62,26 @@ static recalc_pool_t recalc_pool;
|
|||
|
||||
typedef recalc_pool_t::iterator recalc_pool_iterator_t;
|
||||
|
||||
/** Indices whose defrag stats need to be saved to persistent storage.*/
|
||||
struct defrag_pool_item_t {
|
||||
table_id_t table_id;
|
||||
index_id_t index_id;
|
||||
};
|
||||
typedef std::vector<defrag_pool_item_t> defrag_pool_t;
|
||||
static defrag_pool_t defrag_pool;
|
||||
typedef defrag_pool_t::iterator defrag_pool_iterator_t;
|
||||
|
||||
/*****************************************************************//**
|
||||
Initialize the recalc pool, called once during thread initialization. */
|
||||
static
|
||||
void
|
||||
dict_stats_recalc_pool_init()
|
||||
dict_stats_pool_init()
|
||||
/*=========================*/
|
||||
{
|
||||
ut_ad(!srv_read_only_mode);
|
||||
|
||||
recalc_pool.reserve(RECALC_POOL_INITIAL_SLOTS);
|
||||
defrag_pool.reserve(RECALC_POOL_INITIAL_SLOTS);
|
||||
}
|
||||
|
||||
/*****************************************************************//**
|
||||
|
|
@ -76,12 +89,13 @@ Free the resources occupied by the recalc pool, called once during
|
|||
thread de-initialization. */
|
||||
static
|
||||
void
|
||||
dict_stats_recalc_pool_deinit()
|
||||
/*===========================*/
|
||||
dict_stats_pool_deinit()
|
||||
/*====================*/
|
||||
{
|
||||
ut_ad(!srv_read_only_mode);
|
||||
|
||||
recalc_pool.clear();
|
||||
defrag_pool.clear();
|
||||
/*
|
||||
recalc_pool may still have its buffer allocated. It will free it when
|
||||
its destructor is called.
|
||||
|
|
@ -90,8 +104,12 @@ dict_stats_recalc_pool_deinit()
|
|||
memory. To avoid that, we force recalc_pool to surrender its buffer
|
||||
to empty_pool object, which will free it when leaving this function:
|
||||
*/
|
||||
recalc_pool_t empty_pool;
|
||||
recalc_pool.swap(empty_pool);
|
||||
recalc_pool_t recalc_empty_pool;
|
||||
defrag_pool_t defrag_empty_pool;
|
||||
memset(&recalc_empty_pool, 0, sizeof(recalc_pool_t));
|
||||
memset(&defrag_empty_pool, 0, sizeof(defrag_pool_t));
|
||||
recalc_pool.swap(recalc_empty_pool);
|
||||
defrag_pool.swap(defrag_empty_pool);
|
||||
}
|
||||
|
||||
/*****************************************************************//**
|
||||
|
|
@ -187,6 +205,111 @@ dict_stats_recalc_pool_del(
|
|||
mutex_exit(&recalc_pool_mutex);
|
||||
}
|
||||
|
||||
/*****************************************************************//**
|
||||
Add an index in a table to the defrag pool, which is processed by the
|
||||
background stats gathering thread. Only the table id and index id are
|
||||
added to the list, so the table can be closed after being enqueued and
|
||||
it will be opened when needed. If the table or index does not exist later
|
||||
(has been DROPped), then it will be removed from the pool and skipped. */
|
||||
UNIV_INTERN
|
||||
void
|
||||
dict_stats_defrag_pool_add(
|
||||
/*=======================*/
|
||||
const dict_index_t* index) /*!< in: table to add */
|
||||
{
|
||||
defrag_pool_item_t item;
|
||||
|
||||
ut_ad(!srv_read_only_mode);
|
||||
|
||||
mutex_enter(&defrag_pool_mutex);
|
||||
|
||||
/* quit if already in the list */
|
||||
for (defrag_pool_iterator_t iter = defrag_pool.begin();
|
||||
iter != defrag_pool.end();
|
||||
++iter) {
|
||||
if ((*iter).table_id == index->table->id
|
||||
&& (*iter).index_id == index->id) {
|
||||
mutex_exit(&defrag_pool_mutex);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
item.table_id = index->table->id;
|
||||
item.index_id = index->id;
|
||||
defrag_pool.push_back(item);
|
||||
|
||||
mutex_exit(&defrag_pool_mutex);
|
||||
|
||||
os_event_set(dict_stats_event);
|
||||
}
|
||||
|
||||
/*****************************************************************//**
|
||||
Get an index from the auto defrag pool. The returned index id is removed
|
||||
from the pool.
|
||||
@return true if the pool was non-empty and "id" was set, false otherwise */
|
||||
static
|
||||
bool
|
||||
dict_stats_defrag_pool_get(
|
||||
/*=======================*/
|
||||
table_id_t* table_id, /*!< out: table id, or unmodified if
|
||||
list is empty */
|
||||
index_id_t* index_id) /*!< out: index id, or unmodified if
|
||||
list is empty */
|
||||
{
|
||||
ut_ad(!srv_read_only_mode);
|
||||
|
||||
mutex_enter(&defrag_pool_mutex);
|
||||
|
||||
if (defrag_pool.empty()) {
|
||||
mutex_exit(&defrag_pool_mutex);
|
||||
return(false);
|
||||
}
|
||||
|
||||
defrag_pool_item_t& item = defrag_pool.back();
|
||||
*table_id = item.table_id;
|
||||
*index_id = item.index_id;
|
||||
|
||||
defrag_pool.pop_back();
|
||||
|
||||
mutex_exit(&defrag_pool_mutex);
|
||||
|
||||
return(true);
|
||||
}
|
||||
|
||||
/*****************************************************************//**
|
||||
Delete a given index from the auto defrag pool. */
|
||||
UNIV_INTERN
|
||||
void
|
||||
dict_stats_defrag_pool_del(
|
||||
/*=======================*/
|
||||
const dict_table_t* table, /*!<in: if given, remove
|
||||
all entries for the table */
|
||||
const dict_index_t* index) /*!< in: if given, remove this index */
|
||||
{
|
||||
ut_a((table && !index) || (!table && index));
|
||||
ut_ad(!srv_read_only_mode);
|
||||
ut_ad(mutex_own(&dict_sys->mutex));
|
||||
|
||||
mutex_enter(&defrag_pool_mutex);
|
||||
|
||||
defrag_pool_iterator_t iter = defrag_pool.begin();
|
||||
while (iter != defrag_pool.end()) {
|
||||
if ((table && (*iter).table_id == table->id)
|
||||
|| (index
|
||||
&& (*iter).table_id == index->table->id
|
||||
&& (*iter).index_id == index->id)) {
|
||||
/* erase() invalidates the iterator */
|
||||
iter = defrag_pool.erase(iter);
|
||||
if (index)
|
||||
break;
|
||||
} else {
|
||||
iter++;
|
||||
}
|
||||
}
|
||||
|
||||
mutex_exit(&defrag_pool_mutex);
|
||||
}
|
||||
|
||||
/*****************************************************************//**
|
||||
Wait until background stats thread has stopped using the specified table.
|
||||
The caller must have locked the data dictionary using
|
||||
|
|
@ -237,7 +360,10 @@ dict_stats_thread_init()
|
|||
mutex_create(recalc_pool_mutex_key, &recalc_pool_mutex,
|
||||
SYNC_STATS_AUTO_RECALC);
|
||||
|
||||
dict_stats_recalc_pool_init();
|
||||
/* We choose SYNC_STATS_DEFRAG to be below SYNC_FSP_PAGE. */
|
||||
mutex_create(defrag_pool_mutex_key, &defrag_pool_mutex,
|
||||
SYNC_STATS_DEFRAG);
|
||||
dict_stats_pool_init();
|
||||
}
|
||||
|
||||
/*****************************************************************//**
|
||||
|
|
@ -251,11 +377,14 @@ dict_stats_thread_deinit()
|
|||
ut_a(!srv_read_only_mode);
|
||||
ut_ad(!srv_dict_stats_thread_active);
|
||||
|
||||
dict_stats_recalc_pool_deinit();
|
||||
dict_stats_pool_deinit();
|
||||
|
||||
mutex_free(&recalc_pool_mutex);
|
||||
memset(&recalc_pool_mutex, 0x0, sizeof(recalc_pool_mutex));
|
||||
|
||||
mutex_free(&defrag_pool_mutex);
|
||||
memset(&defrag_pool_mutex, 0x0, sizeof(defrag_pool_mutex));
|
||||
|
||||
os_event_free(dict_stats_event);
|
||||
dict_stats_event = NULL;
|
||||
}
|
||||
|
|
@ -332,6 +461,63 @@ dict_stats_process_entry_from_recalc_pool()
|
|||
mutex_exit(&dict_sys->mutex);
|
||||
}
|
||||
|
||||
/*****************************************************************//**
|
||||
Get the first index that has been added for updating persistent defrag
|
||||
stats and eventually save its stats. */
|
||||
static
|
||||
void
|
||||
dict_stats_process_entry_from_defrag_pool()
|
||||
/*=======================================*/
|
||||
{
|
||||
table_id_t table_id;
|
||||
index_id_t index_id;
|
||||
|
||||
ut_ad(!srv_read_only_mode);
|
||||
|
||||
/* pop the first index from the auto defrag pool */
|
||||
if (!dict_stats_defrag_pool_get(&table_id, &index_id)) {
|
||||
/* no index in defrag pool */
|
||||
return;
|
||||
}
|
||||
|
||||
dict_table_t* table;
|
||||
|
||||
mutex_enter(&dict_sys->mutex);
|
||||
|
||||
/* If the table is no longer cached, we've already lost the in
|
||||
memory stats so there's nothing really to write to disk. */
|
||||
table = dict_table_open_on_id(table_id, TRUE,
|
||||
DICT_TABLE_OP_OPEN_ONLY_IF_CACHED);
|
||||
|
||||
if (table == NULL) {
|
||||
mutex_exit(&dict_sys->mutex);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Check whether table is corrupted */
|
||||
if (table->corrupted) {
|
||||
dict_table_close(table, TRUE, FALSE);
|
||||
mutex_exit(&dict_sys->mutex);
|
||||
return;
|
||||
}
|
||||
mutex_exit(&dict_sys->mutex);
|
||||
|
||||
dict_index_t* index = dict_table_find_index_on_id(table, index_id);
|
||||
|
||||
if (index == NULL) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* Check whether index is corrupted */
|
||||
if (dict_index_is_corrupted(index)) {
|
||||
dict_table_close(table, FALSE, FALSE);
|
||||
return;
|
||||
}
|
||||
|
||||
dict_stats_save_defrag_stats(index);
|
||||
dict_table_close(table, FALSE, FALSE);
|
||||
}
|
||||
|
||||
/*****************************************************************//**
|
||||
This is the thread for background stats gathering. It pops tables, from
|
||||
the auto recalc list and proceeds them, eventually recalculating their
|
||||
|
|
@ -364,6 +550,9 @@ DECLARE_THREAD(dict_stats_thread)(
|
|||
|
||||
dict_stats_process_entry_from_recalc_pool();
|
||||
|
||||
while (defrag_pool.size())
|
||||
dict_stats_process_entry_from_defrag_pool();
|
||||
|
||||
os_event_reset(dict_stats_event);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -57,6 +57,7 @@ this program; if not, write to the Free Software Foundation, Inc.,
|
|||
#include "buf0flu.h"
|
||||
#include "buf0dblwr.h"
|
||||
#include "btr0sea.h"
|
||||
#include "btr0defragment.h"
|
||||
#include "os0file.h"
|
||||
#include "os0thread.h"
|
||||
#include "srv0start.h"
|
||||
|
|
@ -65,7 +66,6 @@ this program; if not, write to the Free Software Foundation, Inc.,
|
|||
#include "trx0trx.h"
|
||||
|
||||
#include "trx0sys.h"
|
||||
#include "mtr0mtr.h"
|
||||
#include "rem0types.h"
|
||||
#include "row0ins.h"
|
||||
#include "row0mysql.h"
|
||||
|
|
@ -86,6 +86,7 @@ this program; if not, write to the Free Software Foundation, Inc.,
|
|||
#include "dict0stats_bg.h"
|
||||
#include "ha_prototypes.h"
|
||||
#include "ut0mem.h"
|
||||
#include "ut0timer.h"
|
||||
#include "ibuf0ibuf.h"
|
||||
#include "dict0dict.h"
|
||||
#include "srv0mon.h"
|
||||
|
|
@ -752,6 +753,14 @@ static SHOW_VAR innodb_status_variables[]= {
|
|||
{"have_bzip2",
|
||||
(char*) &innodb_have_bzip2, SHOW_BOOL},
|
||||
|
||||
/* Defragmentation */
|
||||
{"defragment_compression_failures",
|
||||
(char*) &export_vars.innodb_defragment_compression_failures, SHOW_LONG},
|
||||
{"defragment_failures",
|
||||
(char*) &export_vars.innodb_defragment_failures, SHOW_LONG},
|
||||
{"defragment_count",
|
||||
(char*) &export_vars.innodb_defragment_count, SHOW_LONG},
|
||||
|
||||
{NullS, NullS, SHOW_LONG}
|
||||
};
|
||||
|
||||
|
|
@ -2351,7 +2360,8 @@ ha_innobase::ha_innobase(
|
|||
(srv_force_primary_key ? HA_REQUIRE_PRIMARY_KEY : 0 ) |
|
||||
HA_CAN_FULLTEXT_EXT | HA_CAN_EXPORT),
|
||||
start_of_scan(0),
|
||||
num_write_row(0)
|
||||
num_write_row(0),
|
||||
ha_partition_stats(NULL)
|
||||
{}
|
||||
|
||||
/*********************************************************************//**
|
||||
|
|
@ -10678,6 +10688,71 @@ ha_innobase::delete_table(
|
|||
|
||||
DBUG_RETURN(convert_error_code_to_mysql(err, 0, NULL));
|
||||
}
|
||||
/*****************************************************************//**
|
||||
Defragment table.
|
||||
@return error number */
|
||||
UNIV_INTERN
|
||||
int
|
||||
ha_innobase::defragment_table(
|
||||
/*==========================*/
|
||||
const char* name, /*!< in: table name */
|
||||
const char* index_name, /*!< in: index name */
|
||||
bool async) /*!< in: whether to wait until finish */
|
||||
{
|
||||
char norm_name[FN_REFLEN];
|
||||
dict_table_t* table;
|
||||
dict_index_t* index;
|
||||
ibool one_index = (index_name != 0);
|
||||
int ret = 0;
|
||||
if (!srv_defragment) {
|
||||
return ER_FEATURE_DISABLED;
|
||||
}
|
||||
normalize_table_name(norm_name, name);
|
||||
table = dict_table_open_on_name(norm_name, FALSE,
|
||||
FALSE, DICT_ERR_IGNORE_NONE);
|
||||
for (index = dict_table_get_first_index(table); index;
|
||||
index = dict_table_get_next_index(index)) {
|
||||
if (one_index && strcasecmp(index_name, index->name) != 0)
|
||||
continue;
|
||||
if (btr_defragment_find_index(index)) {
|
||||
// We borrow this error code. When the same index is
|
||||
// already in the defragmentation queue, issue another
|
||||
// defragmentation only introduces overhead. We return
|
||||
// an error here to let the user know this is not
|
||||
// necessary. Note that this will fail a query that's
|
||||
// trying to defragment a full table if one of the
|
||||
// indicies in that table is already in defragmentation.
|
||||
// We choose this behavior so user is aware of this
|
||||
// rather than silently defragment other indicies of
|
||||
// that table.
|
||||
ret = ER_SP_ALREADY_EXISTS;
|
||||
break;
|
||||
}
|
||||
os_event_t event = btr_defragment_add_index(index, async);
|
||||
if (!async && event) {
|
||||
while(os_event_wait_time(event, 1000000)) {
|
||||
if (thd_killed(current_thd)) {
|
||||
btr_defragment_remove_index(index);
|
||||
ret = ER_QUERY_INTERRUPTED;
|
||||
break;
|
||||
}
|
||||
}
|
||||
os_event_free(event);
|
||||
}
|
||||
if (ret) {
|
||||
break;
|
||||
}
|
||||
if (one_index) {
|
||||
one_index = FALSE;
|
||||
break;
|
||||
}
|
||||
}
|
||||
dict_table_close(table, FALSE, FALSE);
|
||||
if (ret == 0 && one_index) {
|
||||
ret = ER_NO_SUCH_INDEX;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*****************************************************************//**
|
||||
Removes all tables in the named database inside InnoDB. */
|
||||
|
|
@ -11816,6 +11891,27 @@ ha_innobase::optimize(
|
|||
This works OK otherwise, but MySQL locks the entire table during
|
||||
calls to OPTIMIZE, which is undesirable. */
|
||||
|
||||
if (srv_defragment) {
|
||||
int err;
|
||||
|
||||
err = defragment_table(prebuilt->table->name, NULL, false);
|
||||
|
||||
if (err == 0) {
|
||||
return (HA_ADMIN_OK);
|
||||
} else {
|
||||
push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
|
||||
err,
|
||||
"InnoDB: Cannot defragment table %s: returned error code %d\n",
|
||||
prebuilt->table->name, err);
|
||||
|
||||
if(err == ER_SP_ALREADY_EXISTS) {
|
||||
return (HA_ADMIN_OK);
|
||||
} else {
|
||||
return (HA_ADMIN_TRY_ALTER);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (innodb_optimize_fulltext_only) {
|
||||
if (prebuilt->table->fts && prebuilt->table->fts->cache
|
||||
&& !dict_table_is_discarded(prebuilt->table)) {
|
||||
|
|
@ -14520,6 +14616,13 @@ innodb_max_dirty_pages_pct_lwm_update(
|
|||
srv_max_dirty_pages_pct_lwm = in_val;
|
||||
}
|
||||
|
||||
UNIV_INTERN
|
||||
void
|
||||
ha_innobase::set_partition_owner_stats(ha_statistics *stats)
|
||||
{
|
||||
ha_partition_stats= stats;
|
||||
}
|
||||
|
||||
/************************************************************//**
|
||||
Validate the file format name and return its corresponding id.
|
||||
@return valid file format id */
|
||||
|
|
@ -15773,6 +15876,23 @@ innodb_reset_all_monitor_update(
|
|||
TRUE);
|
||||
}
|
||||
|
||||
static
|
||||
void
|
||||
innodb_defragment_frequency_update(
|
||||
/*===============================*/
|
||||
THD* thd, /*!< in: thread handle */
|
||||
struct st_mysql_sys_var* var, /*!< in: pointer to
|
||||
system variable */
|
||||
void* var_ptr,/*!< out: where the
|
||||
formal string goes */
|
||||
const void* save) /*!< in: immediate result
|
||||
from check function */
|
||||
{
|
||||
srv_defragment_frequency = (*static_cast<const uint*>(save));
|
||||
srv_defragment_interval = ut_microseconds_to_timer(
|
||||
1000000.0 / srv_defragment_frequency);
|
||||
}
|
||||
|
||||
/****************************************************************//**
|
||||
Parse and enable InnoDB monitor counters during server startup.
|
||||
User can list the monitor counters/groups to be enable by specifying
|
||||
|
|
@ -16631,6 +16751,60 @@ static MYSQL_SYSVAR_BOOL(buffer_pool_load_at_startup, srv_buffer_pool_load_at_st
|
|||
"Load the buffer pool from a file named @@innodb_buffer_pool_filename",
|
||||
NULL, NULL, FALSE);
|
||||
|
||||
static MYSQL_SYSVAR_BOOL(defragment, srv_defragment,
|
||||
PLUGIN_VAR_RQCMDARG,
|
||||
"Enable/disable InnoDB defragmentation (default FALSE). When set to FALSE, all existing "
|
||||
"defragmentation will be paused. And new defragmentation command will fail."
|
||||
"Paused defragmentation commands will resume when this variable is set to "
|
||||
"true again.",
|
||||
NULL, NULL, FALSE);
|
||||
|
||||
static MYSQL_SYSVAR_UINT(defragment_n_pages, srv_defragment_n_pages,
|
||||
PLUGIN_VAR_RQCMDARG,
|
||||
"Number of pages considered at once when merging multiple pages to "
|
||||
"defragment",
|
||||
NULL, NULL, 7, 2, 32, 0);
|
||||
|
||||
static MYSQL_SYSVAR_UINT(defragment_stats_accuracy,
|
||||
srv_defragment_stats_accuracy,
|
||||
PLUGIN_VAR_RQCMDARG,
|
||||
"How many defragment stats changes there are before the stats "
|
||||
"are written to persistent storage. Set to 0 meaning disable "
|
||||
"defragment stats tracking.",
|
||||
NULL, NULL, 0, 0, ~0U, 0);
|
||||
|
||||
static MYSQL_SYSVAR_UINT(defragment_fill_factor_n_recs,
|
||||
srv_defragment_fill_factor_n_recs,
|
||||
PLUGIN_VAR_RQCMDARG,
|
||||
"How many records of space defragmentation should leave on the page. "
|
||||
"This variable, together with innodb_defragment_fill_factor, is introduced "
|
||||
"so defragmentation won't pack the page too full and cause page split on "
|
||||
"the next insert on every page. The variable indicating more defragmentation"
|
||||
" gain is the one effective.",
|
||||
NULL, NULL, 20, 1, 100, 0);
|
||||
|
||||
static MYSQL_SYSVAR_DOUBLE(defragment_fill_factor, srv_defragment_fill_factor,
|
||||
PLUGIN_VAR_RQCMDARG,
|
||||
"A number between [0.7, 1] that tells defragmentation how full it should "
|
||||
"fill a page. Default is 0.9. Number below 0.7 won't make much sense."
|
||||
"This variable, together with innodb_defragment_fill_factor_n_recs, is "
|
||||
"introduced so defragmentation won't pack the page too full and cause "
|
||||
"page split on the next insert on every page. The variable indicating more "
|
||||
"defragmentation gain is the one effective.",
|
||||
NULL, NULL, 0.9, 0.7, 1, 0);
|
||||
|
||||
static MYSQL_SYSVAR_UINT(defragment_frequency, srv_defragment_frequency,
|
||||
PLUGIN_VAR_RQCMDARG,
|
||||
"Do not defragment a single index more than this number of time per second."
|
||||
"This controls the number of time defragmentation thread can request X_LOCK "
|
||||
"on an index. Defragmentation thread will check whether "
|
||||
"1/defragment_frequency (s) has passed since it worked on this index last "
|
||||
"time, and put the index back to the queue if not enough time has passed. "
|
||||
"The actual frequency can only be lower than this given number.",
|
||||
NULL, innodb_defragment_frequency_update,
|
||||
SRV_DEFRAGMENT_FREQUENCY_DEFAULT, 1, 1000, 0);
|
||||
|
||||
|
||||
static MYSQL_SYSVAR_ULONG(lru_scan_depth, srv_LRU_scan_depth,
|
||||
PLUGIN_VAR_RQCMDARG,
|
||||
"How deep to scan LRU to keep it clean",
|
||||
|
|
@ -17116,6 +17290,12 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
|
|||
MYSQL_SYSVAR(buffer_pool_load_now),
|
||||
MYSQL_SYSVAR(buffer_pool_load_abort),
|
||||
MYSQL_SYSVAR(buffer_pool_load_at_startup),
|
||||
MYSQL_SYSVAR(defragment),
|
||||
MYSQL_SYSVAR(defragment_n_pages),
|
||||
MYSQL_SYSVAR(defragment_stats_accuracy),
|
||||
MYSQL_SYSVAR(defragment_fill_factor),
|
||||
MYSQL_SYSVAR(defragment_fill_factor_n_recs),
|
||||
MYSQL_SYSVAR(defragment_frequency),
|
||||
MYSQL_SYSVAR(lru_scan_depth),
|
||||
MYSQL_SYSVAR(flush_neighbors),
|
||||
MYSQL_SYSVAR(checksum_algorithm),
|
||||
|
|
|
|||
|
|
@ -105,6 +105,8 @@ class ha_innobase: public handler
|
|||
or undefined */
|
||||
uint num_write_row; /*!< number of write_row() calls */
|
||||
|
||||
ha_statistics* ha_partition_stats; /*!< stats of the partition owner
|
||||
handler (if there is one) */
|
||||
uint store_key_val_for_row(uint keynr, char* buff, uint buff_len,
|
||||
const uchar* record);
|
||||
inline void update_thd(THD* thd);
|
||||
|
|
@ -206,6 +208,8 @@ class ha_innobase: public handler
|
|||
int truncate();
|
||||
int delete_table(const char *name);
|
||||
int rename_table(const char* from, const char* to);
|
||||
int defragment_table(const char* name, const char* index_name,
|
||||
bool async);
|
||||
int check(THD* thd, HA_CHECK_OPT* check_opt);
|
||||
char* update_table_comment(const char* comment);
|
||||
char* get_foreign_key_create_info();
|
||||
|
|
@ -309,6 +313,7 @@ class ha_innobase: public handler
|
|||
Alter_inplace_info* ha_alter_info,
|
||||
bool commit);
|
||||
/** @} */
|
||||
void set_partition_owner_stats(ha_statistics *stats);
|
||||
bool check_if_incompatible_data(HA_CREATE_INFO *info,
|
||||
uint table_changes);
|
||||
private:
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@
|
|||
|
||||
Copyright (c) 1994, 2013, Oracle and/or its affiliates. All Rights Reserved.
|
||||
Copyright (c) 2012, Facebook Inc.
|
||||
Copyright (c) 2014, SkySQL Ab. All Rights Reserved.
|
||||
|
||||
This program is free software; you can redistribute it and/or modify it under
|
||||
the terms of the GNU General Public License as published by the Free Software
|
||||
|
|
@ -671,6 +672,21 @@ btr_get_size(
|
|||
is s-latched */
|
||||
__attribute__((nonnull, warn_unused_result));
|
||||
/**************************************************************//**
|
||||
Gets the number of reserved and used pages in a B-tree.
|
||||
@return number of pages reserved, or ULINT_UNDEFINED if the index
|
||||
is unavailable */
|
||||
UNIV_INTERN
|
||||
ulint
|
||||
btr_get_size_and_reserved(
|
||||
/*======================*/
|
||||
dict_index_t* index, /*!< in: index */
|
||||
ulint flag, /*!< in: BTR_N_LEAF_PAGES or BTR_TOTAL_SIZE */
|
||||
ulint* used, /*!< out: number of pages used (<= reserved) */
|
||||
mtr_t* mtr) /*!< in/out: mini-transaction where index
|
||||
is s-latched */
|
||||
__attribute__((nonnull));
|
||||
|
||||
/**************************************************************//**
|
||||
Allocates a new file page to be used in an index tree. NOTE: we assume
|
||||
that the caller has made the reservation for free extents!
|
||||
@retval NULL if no page could be allocated
|
||||
|
|
@ -717,6 +733,33 @@ btr_page_free_low(
|
|||
ulint level, /*!< in: page level */
|
||||
mtr_t* mtr) /*!< in: mtr */
|
||||
__attribute__((nonnull));
|
||||
/*************************************************************//**
|
||||
Reorganizes an index page.
|
||||
|
||||
IMPORTANT: On success, the caller will have to update IBUF_BITMAP_FREE
|
||||
if this is a compressed leaf page in a secondary index. This has to
|
||||
be done either within the same mini-transaction, or by invoking
|
||||
ibuf_reset_free_bits() before mtr_commit(). On uncompressed pages,
|
||||
IBUF_BITMAP_FREE is unaffected by reorganization.
|
||||
|
||||
@retval true if the operation was successful
|
||||
@retval false if it is a compressed page, and recompression failed */
|
||||
UNIV_INTERN
|
||||
bool
|
||||
btr_page_reorganize_block(
|
||||
/*======================*/
|
||||
bool recovery,/*!< in: true if called in recovery:
|
||||
locks should not be updated, i.e.,
|
||||
there cannot exist locks on the
|
||||
page, and a hash index should not be
|
||||
dropped: it cannot exist */
|
||||
ulint z_level,/*!< in: compression level to be used
|
||||
if dealing with compressed page */
|
||||
buf_block_t* block, /*!< in/out: B-tree page */
|
||||
dict_index_t* index, /*!< in: the index tree of the page */
|
||||
mtr_t* mtr) /*!< in/out: mini-transaction */
|
||||
__attribute__((nonnull));
|
||||
|
||||
#ifdef UNIV_BTR_PRINT
|
||||
/*************************************************************//**
|
||||
Prints size info of a B-tree. */
|
||||
|
|
@ -762,6 +805,60 @@ btr_validate_index(
|
|||
const trx_t* trx) /*!< in: transaction or 0 */
|
||||
__attribute__((nonnull(1), warn_unused_result));
|
||||
|
||||
#ifdef UNIV_SYNC_DEBUG
|
||||
/*************************************************************//**
|
||||
Removes a page from the level list of pages.
|
||||
@param space in: space where removed
|
||||
@param zip_size in: compressed page size in bytes, or 0 for uncompressed
|
||||
@param page in/out: page to remove
|
||||
@param index in: index tree
|
||||
@param mtr in/out: mini-transaction */
|
||||
# define btr_level_list_remove(space,zip_size,page,index,mtr) \
|
||||
btr_level_list_remove_func(space,zip_size,page,index,mtr)
|
||||
#else /* UNIV_SYNC_DEBUG */
|
||||
/*************************************************************//**
|
||||
Removes a page from the level list of pages.
|
||||
@param space in: space where removed
|
||||
@param zip_size in: compressed page size in bytes, or 0 for uncompressed
|
||||
@param page in/out: page to remove
|
||||
@param index in: index tree
|
||||
@param mtr in/out: mini-transaction */
|
||||
# define btr_level_list_remove(space,zip_size,page,index,mtr) \
|
||||
btr_level_list_remove_func(space,zip_size,page,mtr)
|
||||
#endif /* UNIV_SYNC_DEBUG */
|
||||
|
||||
/*************************************************************//**
|
||||
Removes a page from the level list of pages. */
|
||||
UNIV_INTERN
|
||||
void
|
||||
btr_level_list_remove_func(
|
||||
/*=======================*/
|
||||
ulint space, /*!< in: space where removed */
|
||||
ulint zip_size,/*!< in: compressed page size in bytes
|
||||
or 0 for uncompressed pages */
|
||||
page_t* page, /*!< in/out: page to remove */
|
||||
#ifdef UNIV_SYNC_DEBUG
|
||||
const dict_index_t* index, /*!< in: index tree */
|
||||
#endif /* UNIV_SYNC_DEBUG */
|
||||
mtr_t* mtr) /*!< in/out: mini-transaction */
|
||||
__attribute__((nonnull));
|
||||
|
||||
/*************************************************************//**
|
||||
If page is the only on its level, this function moves its records to the
|
||||
father page, thus reducing the tree height.
|
||||
@return father block */
|
||||
UNIV_INTERN
|
||||
buf_block_t*
|
||||
btr_lift_page_up(
|
||||
/*=============*/
|
||||
dict_index_t* index, /*!< in: index tree */
|
||||
buf_block_t* block, /*!< in: page which is the only on its level;
|
||||
must not be empty: use
|
||||
btr_discard_only_page_on_level if the last
|
||||
record from the page should be removed */
|
||||
mtr_t* mtr) /*!< in: mtr */
|
||||
__attribute__((nonnull));
|
||||
|
||||
#define BTR_N_LEAF_PAGES 1
|
||||
#define BTR_TOTAL_SIZE 2
|
||||
#endif /* !UNIV_HOTBACKUP */
|
||||
|
|
|
|||
|
|
@ -163,9 +163,10 @@ btr_page_get_next(
|
|||
/*!< in: mini-transaction handle */
|
||||
{
|
||||
ut_ad(page && mtr);
|
||||
#ifndef UNIV_INNOCHECKSUM
|
||||
ut_ad(mtr_memo_contains_page(mtr, page, MTR_MEMO_PAGE_X_FIX)
|
||||
|| mtr_memo_contains_page(mtr, page, MTR_MEMO_PAGE_S_FIX));
|
||||
|
||||
#endif /* UNIV_INNOCHECKSUM */
|
||||
return(mach_read_from_4(page + FIL_PAGE_NEXT));
|
||||
}
|
||||
|
||||
|
|
|
|||
101
storage/innobase/include/btr0defragment.h
Normal file
101
storage/innobase/include/btr0defragment.h
Normal file
|
|
@ -0,0 +1,101 @@
|
|||
/*****************************************************************************
|
||||
|
||||
Copyright (C) 2013, 2014 Facebook, Inc. All Rights Reserved.
|
||||
|
||||
This program is free software; you can redistribute it and/or modify it under
|
||||
the terms of the GNU General Public License as published by the Free Software
|
||||
Foundation; version 2 of the License.
|
||||
|
||||
This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along with
|
||||
this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
|
||||
|
||||
*****************************************************************************/
|
||||
|
||||
#ifndef btr0defragment_h
|
||||
#define btr0defragment_h
|
||||
|
||||
#include "univ.i"
|
||||
|
||||
#ifndef UNIV_HOTBACKUP
|
||||
|
||||
#include "btr0pcur.h"
|
||||
|
||||
/* Max number of pages to consider at once during defragmentation. */
|
||||
#define BTR_DEFRAGMENT_MAX_N_PAGES 32
|
||||
|
||||
/** stats in btr_defragment */
|
||||
extern ulint btr_defragment_compression_failures;
|
||||
extern ulint btr_defragment_failures;
|
||||
extern ulint btr_defragment_count;
|
||||
|
||||
/** Item in the work queue for btr_degrament_thread. */
|
||||
struct btr_defragment_item_t
|
||||
{
|
||||
btr_pcur_t* pcur; /* persistent cursor where
|
||||
btr_defragment_n_pages should start */
|
||||
os_event_t event; /* if not null, signal after work
|
||||
is done */
|
||||
bool removed; /* Mark an item as removed */
|
||||
ulonglong last_processed; /* timestamp of last time this index
|
||||
is processed by defragment thread */
|
||||
|
||||
btr_defragment_item_t(btr_pcur_t* pcur, os_event_t event);
|
||||
~btr_defragment_item_t();
|
||||
};
|
||||
|
||||
/******************************************************************//**
|
||||
Initialize defragmentation. */
|
||||
void
|
||||
btr_defragment_init(void);
|
||||
/******************************************************************//**
|
||||
Shutdown defragmentation. */
|
||||
void
|
||||
btr_defragment_shutdown();
|
||||
/******************************************************************//**
|
||||
Check whether the given index is in btr_defragment_wq. */
|
||||
bool
|
||||
btr_defragment_find_index(
|
||||
dict_index_t* index); /*!< Index to find. */
|
||||
/******************************************************************//**
|
||||
Add an index to btr_defragment_wq. Return a pointer to os_event if this
|
||||
is a synchronized defragmentation. */
|
||||
os_event_t
|
||||
btr_defragment_add_index(
|
||||
dict_index_t* index, /*!< index to be added */
|
||||
bool async); /*!< whether this is an async defragmentation */
|
||||
/******************************************************************//**
|
||||
When table is dropped, this function is called to mark a table as removed in
|
||||
btr_efragment_wq. The difference between this function and the remove_index
|
||||
function is this will not NULL the event. */
|
||||
void
|
||||
btr_defragment_remove_table(
|
||||
dict_table_t* table); /*!< Index to be removed. */
|
||||
/******************************************************************//**
|
||||
Mark an index as removed from btr_defragment_wq. */
|
||||
void
|
||||
btr_defragment_remove_index(
|
||||
dict_index_t* index); /*!< Index to be removed. */
|
||||
/*********************************************************************//**
|
||||
Check whether we should save defragmentation statistics to persistent storage.*/
|
||||
UNIV_INTERN
|
||||
void
|
||||
btr_defragment_save_defrag_stats_if_needed(
|
||||
dict_index_t* index); /*!< in: index */
|
||||
/******************************************************************//**
|
||||
Thread that merges consecutive b-tree pages into fewer pages to defragment
|
||||
the index. */
|
||||
extern "C" UNIV_INTERN
|
||||
os_thread_ret_t
|
||||
DECLARE_THREAD(btr_defragment_thread)(
|
||||
/*==========================================*/
|
||||
void* arg); /*!< in: a dummy parameter required by
|
||||
os_thread_create */
|
||||
|
||||
|
||||
#endif /* !UNIV_HOTBACKUP */
|
||||
#endif
|
||||
|
|
@ -120,7 +120,9 @@ enum dict_table_op_t {
|
|||
DICT_TABLE_OP_DROP_ORPHAN,
|
||||
/** Silently load the tablespace if it does not exist,
|
||||
and do not load the definitions of incomplete indexes. */
|
||||
DICT_TABLE_OP_LOAD_TABLESPACE
|
||||
DICT_TABLE_OP_LOAD_TABLESPACE,
|
||||
/** Open the table only if it's in table cache. */
|
||||
DICT_TABLE_OP_OPEN_ONLY_IF_CACHED
|
||||
};
|
||||
|
||||
/**********************************************************************//**
|
||||
|
|
@ -1496,6 +1498,16 @@ dict_table_get_index_on_name(
|
|||
const char* name) /*!< in: name of the index to find */
|
||||
__attribute__((nonnull, warn_unused_result));
|
||||
/**********************************************************************//**
|
||||
Looks for an index with the given id given a table instance.
|
||||
@return index or NULL */
|
||||
UNIV_INTERN
|
||||
dict_index_t*
|
||||
dict_table_find_index_on_id(
|
||||
/*========================*/
|
||||
const dict_table_t* table, /*!< in: table instance */
|
||||
index_id_t id) /*!< in: index id */
|
||||
__attribute__((nonnull, warn_unused_result));
|
||||
/**********************************************************************//**
|
||||
In case there is more than one index with the same name return the index
|
||||
with the min(id).
|
||||
@return index, NULL if does not exist */
|
||||
|
|
|
|||
|
|
@ -588,6 +588,10 @@ struct zip_pad_info_t {
|
|||
rounds */
|
||||
};
|
||||
|
||||
/** Number of samples of data size kept when page compression fails for
|
||||
a certain index.*/
|
||||
#define STAT_DEFRAG_DATA_SIZE_N_SAMPLE 10
|
||||
|
||||
/** Data structure for an index. Most fields will be
|
||||
initialized to 0, NULL or FALSE in dict_mem_index_create(). */
|
||||
struct dict_index_t{
|
||||
|
|
@ -676,6 +680,23 @@ struct dict_index_t{
|
|||
/*!< approximate number of leaf pages in the
|
||||
index tree */
|
||||
/* @} */
|
||||
/** Statistics for defragmentation, these numbers are estimations and
|
||||
could be very inaccurate at certain times, e.g. right after restart,
|
||||
during defragmentation, etc. */
|
||||
/* @{ */
|
||||
ulint stat_defrag_modified_counter;
|
||||
ulint stat_defrag_n_pages_freed;
|
||||
/* number of pages freed by defragmentation. */
|
||||
ulint stat_defrag_n_page_split;
|
||||
/* number of page splits since last full index
|
||||
defragmentation. */
|
||||
ulint stat_defrag_data_size_sample[STAT_DEFRAG_DATA_SIZE_N_SAMPLE];
|
||||
/* data size when compression failure happened
|
||||
the most recent 10 times. */
|
||||
ulint stat_defrag_sample_next_slot;
|
||||
/* in which slot the next sample should be
|
||||
saved. */
|
||||
/* @} */
|
||||
rw_lock_t lock; /*!< read-write lock protecting the
|
||||
upper levels of the index tree */
|
||||
trx_id_t trx_id; /*!< id of the transaction that created this
|
||||
|
|
|
|||
|
|
@ -53,8 +53,9 @@ dict_table_t*
|
|||
dict_table_open_on_id_low(
|
||||
/*=====================*/
|
||||
table_id_t table_id, /*!< in: table id */
|
||||
dict_err_ignore_t ignore_err); /*!< in: errors to ignore
|
||||
dict_err_ignore_t ignore_err, /*!< in: errors to ignore
|
||||
when loading the table */
|
||||
ibool open_only_if_in_cache);
|
||||
|
||||
#ifndef UNIV_NONINL
|
||||
#include "dict0priv.ic"
|
||||
|
|
|
|||
|
|
@ -74,8 +74,9 @@ dict_table_t*
|
|||
dict_table_open_on_id_low(
|
||||
/*======================*/
|
||||
table_id_t table_id, /*!< in: table id */
|
||||
dict_err_ignore_t ignore_err) /*!< in: errors to ignore
|
||||
dict_err_ignore_t ignore_err, /*!< in: errors to ignore
|
||||
when loading the table */
|
||||
ibool open_only_if_in_cache)
|
||||
{
|
||||
dict_table_t* table;
|
||||
ulint fold;
|
||||
|
|
@ -88,7 +89,7 @@ dict_table_open_on_id_low(
|
|||
HASH_SEARCH(id_hash, dict_sys->table_id_hash, fold,
|
||||
dict_table_t*, table, ut_ad(table->cached),
|
||||
table->id == table_id);
|
||||
if (table == NULL) {
|
||||
if (table == NULL && !open_only_if_in_cache) {
|
||||
table = dict_load_table_on_id(table_id, ignore_err);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -195,6 +195,39 @@ dict_stats_rename_table(
|
|||
is returned */
|
||||
size_t errstr_sz); /*!< in: errstr size */
|
||||
|
||||
/*********************************************************************//**
|
||||
Save defragmentation result.
|
||||
@return DB_SUCCESS or error code */
|
||||
UNIV_INTERN
|
||||
dberr_t
|
||||
dict_stats_save_defrag_summary(
|
||||
dict_index_t* index); /*!< in: index */
|
||||
|
||||
/*********************************************************************//**
|
||||
Save defragmentation stats for a given index.
|
||||
@return DB_SUCCESS or error code */
|
||||
UNIV_INTERN
|
||||
dberr_t
|
||||
dict_stats_save_defrag_stats(
|
||||
dict_index_t* index); /*!< in: index */
|
||||
|
||||
/**********************************************************************//**
|
||||
Clear defragmentation summary. */
|
||||
UNIV_INTERN
|
||||
void
|
||||
dict_stats_empty_defrag_summary(
|
||||
/*==================*/
|
||||
dict_index_t* index); /*!< in: index to clear defragmentation stats */
|
||||
|
||||
/**********************************************************************//**
|
||||
Clear defragmentation related index stats. */
|
||||
UNIV_INTERN
|
||||
void
|
||||
dict_stats_empty_defrag_stats(
|
||||
/*==================*/
|
||||
dict_index_t* index); /*!< in: index to clear defragmentation stats */
|
||||
|
||||
|
||||
#ifndef UNIV_NONINL
|
||||
#include "dict0stats.ic"
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -56,6 +56,28 @@ dict_stats_recalc_pool_del(
|
|||
/*=======================*/
|
||||
const dict_table_t* table); /*!< in: table to remove */
|
||||
|
||||
/*****************************************************************//**
|
||||
Add an index in a table to the defrag pool, which is processed by the
|
||||
background stats gathering thread. Only the table id and index id are
|
||||
added to the list, so the table can be closed after being enqueued and
|
||||
it will be opened when needed. If the table or index does not exist later
|
||||
(has been DROPped), then it will be removed from the pool and skipped. */
|
||||
UNIV_INTERN
|
||||
void
|
||||
dict_stats_defrag_pool_add(
|
||||
/*=======================*/
|
||||
const dict_index_t* index); /*!< in: table to add */
|
||||
|
||||
/*****************************************************************//**
|
||||
Delete a given index from the auto defrag pool. */
|
||||
UNIV_INTERN
|
||||
void
|
||||
dict_stats_defrag_pool_del(
|
||||
/*=======================*/
|
||||
const dict_table_t* table, /*!<in: if given, remove
|
||||
all entries for the table */
|
||||
const dict_index_t* index); /*!< in: index to remove */
|
||||
|
||||
/** Yield the data dictionary latch when waiting
|
||||
for the background thread to stop accessing a table.
|
||||
@param trx transaction holding the data dictionary locks */
|
||||
|
|
|
|||
|
|
@ -181,6 +181,16 @@ lock_update_merge_left(
|
|||
const buf_block_t* right_block); /*!< in: merged index page
|
||||
which will be discarded */
|
||||
/*************************************************************//**
|
||||
Updates the lock table when a page is splited and merged to
|
||||
two pages. */
|
||||
UNIV_INTERN
|
||||
void
|
||||
lock_update_split_and_merge(
|
||||
const buf_block_t* left_block, /*!< in: left page to which merged */
|
||||
const rec_t* orig_pred, /*!< in: original predecessor of
|
||||
supremum on the left page before merge*/
|
||||
const buf_block_t* right_block);/*!< in: right page from which merged */
|
||||
/*************************************************************//**
|
||||
Resets the original locks on heir and replaces them with gap type locks
|
||||
inherited from rec. */
|
||||
UNIV_INTERN
|
||||
|
|
|
|||
|
|
@ -335,6 +335,15 @@ extern my_bool srv_random_read_ahead;
|
|||
extern ulong srv_read_ahead_threshold;
|
||||
extern ulint srv_n_read_io_threads;
|
||||
extern ulint srv_n_write_io_threads;
|
||||
/* Defragmentation, Origianlly facebook default value is 100, but it's too high */
|
||||
#define SRV_DEFRAGMENT_FREQUENCY_DEFAULT 40
|
||||
extern my_bool srv_defragment;
|
||||
extern uint srv_defragment_n_pages;
|
||||
extern uint srv_defragment_stats_accuracy;
|
||||
extern uint srv_defragment_fill_factor_n_recs;
|
||||
extern double srv_defragment_fill_factor;
|
||||
extern uint srv_defragment_frequency;
|
||||
extern ulonglong srv_defragment_interval;
|
||||
|
||||
/* Number of IO operations per second the server can do */
|
||||
extern ulong srv_io_capacity;
|
||||
|
|
@ -888,7 +897,12 @@ struct export_var_t{
|
|||
ulint innodb_rows_deleted; /*!< srv_n_rows_deleted */
|
||||
ulint innodb_num_open_files; /*!< fil_n_file_opened */
|
||||
ulint innodb_truncated_status_writes; /*!< srv_truncated_status_writes */
|
||||
ulint innodb_available_undo_logs; /*!< srv_available_undo_logs */
|
||||
ulint innodb_available_undo_logs; /*!< srv_available_undo_logs
|
||||
*/
|
||||
ulint innodb_defragment_compression_failures;
|
||||
ulint innodb_defragment_failures;
|
||||
ulint innodb_defragment_count;
|
||||
|
||||
#ifdef UNIV_DEBUG
|
||||
ulint innodb_purge_trx_id_age; /*!< rw_max_trx_id - purged trx_id */
|
||||
ulint innodb_purge_view_trx_id_age; /*!< rw_max_trx_id
|
||||
|
|
|
|||
|
|
@ -687,6 +687,7 @@ or row lock! */
|
|||
#define SYNC_EXTERN_STORAGE 500
|
||||
#define SYNC_FSP 400
|
||||
#define SYNC_FSP_PAGE 395
|
||||
#define SYNC_STATS_DEFRAG 390
|
||||
/*------------------------------------- Change buffer headers */
|
||||
#define SYNC_IBUF_MUTEX 370 /* ibuf_mutex */
|
||||
/*------------------------------------- Change buffer tree */
|
||||
|
|
|
|||
104
storage/innobase/include/ut0timer.h
Normal file
104
storage/innobase/include/ut0timer.h
Normal file
|
|
@ -0,0 +1,104 @@
|
|||
/*****************************************************************************
|
||||
|
||||
Copyright (c) 2013, 2014, Facebook, Inc. All Rights Reserved.
|
||||
Copyright (c) 2014, SkySQL Ab. All Rights Reserved.
|
||||
|
||||
This program is free software; you can redistribute it and/or modify it under
|
||||
the terms of the GNU General Public License as published by the Free Software
|
||||
Foundation; version 2 of the License.
|
||||
|
||||
This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along with
|
||||
this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
|
||||
|
||||
*****************************************************************************/
|
||||
|
||||
/********************************************************************//**
|
||||
@file include/ut0timer.h
|
||||
Timer rountines
|
||||
|
||||
Created 30/07/2014 Jan Lindström jan.lindstrom@skysql.com
|
||||
modified from https://github.com/facebook/mysql-5.6/commit/c75a413edeb96eb99bf11d7269bdfea06f96d6b6
|
||||
*************************************************************************/
|
||||
#ifndef ut0timer_h
|
||||
#define ut0timer_h
|
||||
|
||||
#include "univ.i"
|
||||
#include "data0type.h"
|
||||
#include <my_rdtsc.h>
|
||||
|
||||
/* Current timer stats */
|
||||
extern struct my_timer_unit_info ut_timer;
|
||||
|
||||
/**************************************************************//**
|
||||
Function pointer to point selected timer function.
|
||||
@return timer current value */
|
||||
extern ulonglong (*ut_timer_now)(void);
|
||||
|
||||
/**************************************************************//**
|
||||
Sets up the data required for use of my_timer_* functions.
|
||||
Selects the best timer by high frequency, and tight resolution.
|
||||
Points my_timer_now() to the selected timer function.
|
||||
Initializes my_timer struct to contain the info for selected timer.*/
|
||||
UNIV_INTERN
|
||||
void ut_init_timer(void);
|
||||
|
||||
/**************************************************************//**
|
||||
Return time passed since time then, automatically adjusted
|
||||
for the estimated timer overhead.
|
||||
@return time passed since "then" */
|
||||
UNIV_INLINE
|
||||
ulonglong
|
||||
ut_timer_since(
|
||||
/*===========*/
|
||||
ulonglong then); /*!< in: time where to calculate */
|
||||
/**************************************************************//**
|
||||
Get time passed since "then", and update then to now
|
||||
@return time passed sinche "then" */
|
||||
UNIV_INLINE
|
||||
ulonglong
|
||||
ut_timer_since_and_update(
|
||||
/*======================*/
|
||||
ulonglong *then); /*!< in: time where to calculate */
|
||||
/**************************************************************//**
|
||||
Convert native timer units in a ulonglong into seconds in a double
|
||||
@return time in a seconds */
|
||||
UNIV_INLINE
|
||||
double
|
||||
ut_timer_to_seconds(
|
||||
/*=================*/
|
||||
ulonglong when); /*!< in: time where to calculate */
|
||||
/**************************************************************//**
|
||||
Convert native timer units in a ulonglong into milliseconds in a double
|
||||
@return time in milliseconds */
|
||||
UNIV_INLINE
|
||||
double
|
||||
ut_timer_to_milliseconds(
|
||||
/*=====================*/
|
||||
ulonglong when); /*!< in: time where to calculate */
|
||||
/**************************************************************//**
|
||||
Convert native timer units in a ulonglong into microseconds in a double
|
||||
@return time in microseconds */
|
||||
UNIV_INLINE
|
||||
double
|
||||
ut_timer_to_microseconds(
|
||||
/*=====================*/
|
||||
ulonglong when); /*!< in: time where to calculate */
|
||||
/**************************************************************//**
|
||||
Convert microseconds in a double to native timer units in a ulonglong
|
||||
@return time in microseconds */
|
||||
UNIV_INLINE
|
||||
ulonglong
|
||||
ut_microseconds_to_timer(
|
||||
/*=====================*/
|
||||
ulonglong when); /*!< in: time where to calculate */
|
||||
|
||||
#ifndef UNIV_NONINL
|
||||
#include "ut0timer.ic"
|
||||
#endif
|
||||
|
||||
#endif
|
||||
113
storage/innobase/include/ut0timer.ic
Normal file
113
storage/innobase/include/ut0timer.ic
Normal file
|
|
@ -0,0 +1,113 @@
|
|||
/*****************************************************************************
|
||||
|
||||
Copyright (c) 2013, 2014, Facebook, Inc. All Rights Reserved.
|
||||
Copyright (c) 2014, SkySQL Ab. All Rights Reserved.
|
||||
|
||||
This program is free software; you can redistribute it and/or modify it under
|
||||
the terms of the GNU General Public License as published by the Free Software
|
||||
Foundation; version 2 of the License.
|
||||
|
||||
This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along with
|
||||
this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
|
||||
|
||||
*****************************************************************************/
|
||||
|
||||
/********************************************************************//**
|
||||
@file include/ut0timer.ic
|
||||
Timer rountines
|
||||
|
||||
Created 30/07/2014 Jan Lindström jan.lindstrom@skysql.com
|
||||
modified from https://github.com/facebook/mysql-5.6/commit/c75a413edeb96eb99bf11d7269bdfea06f96d6b6
|
||||
*************************************************************************/
|
||||
|
||||
/**************************************************************//**
|
||||
Return time passed since time then, automatically adjusted
|
||||
for the estimated timer overhead.
|
||||
@return time passed since "then" */
|
||||
UNIV_INLINE
|
||||
ulonglong
|
||||
ut_timer_since(
|
||||
/*===========*/
|
||||
ulonglong then) /*!< in: time where to calculate */
|
||||
{
|
||||
return (ut_timer_now() - then) - ut_timer.overhead;
|
||||
}
|
||||
|
||||
/**************************************************************//**
|
||||
Get time passed since "then", and update then to now
|
||||
@return time passed sinche "then" */
|
||||
UNIV_INLINE
|
||||
ulonglong
|
||||
ut_timer_since_and_update(
|
||||
/*======================*/
|
||||
ulonglong *then) /*!< in: time where to calculate */
|
||||
{
|
||||
ulonglong now = ut_timer_now();
|
||||
ulonglong ret = (now - (*then)) - ut_timer.overhead;
|
||||
*then = now;
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**************************************************************//**
|
||||
Convert native timer units in a ulonglong into seconds in a double
|
||||
@return time in a seconds */
|
||||
UNIV_INLINE
|
||||
double
|
||||
ut_timer_to_seconds(
|
||||
/*=================*/
|
||||
ulonglong when) /*!< in: time where to calculate */
|
||||
{
|
||||
double ret = (double)(when);
|
||||
ret /= (double)(ut_timer.frequency);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**************************************************************//**
|
||||
Convert native timer units in a ulonglong into milliseconds in a double
|
||||
@return time in milliseconds */
|
||||
UNIV_INLINE
|
||||
double
|
||||
ut_timer_to_milliseconds(
|
||||
/*=====================*/
|
||||
ulonglong when) /*!< in: time where to calculate */
|
||||
{
|
||||
double ret = (double)(when);
|
||||
ret *= 1000.0;
|
||||
ret /= (double)(ut_timer.frequency);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**************************************************************//**
|
||||
Convert native timer units in a ulonglong into microseconds in a double
|
||||
@return time in microseconds */
|
||||
UNIV_INLINE
|
||||
double
|
||||
ut_timer_to_microseconds(
|
||||
/*=====================*/
|
||||
ulonglong when) /*!< in: time where to calculate */
|
||||
{
|
||||
double ret = (double)(when);
|
||||
ret *= 1000000.0;
|
||||
ret /= (double)(ut_timer.frequency);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**************************************************************//**
|
||||
Convert microseconds in a double to native timer units in a ulonglong
|
||||
@return time in microseconds */
|
||||
UNIV_INLINE
|
||||
ulonglong
|
||||
ut_microseconds_to_timer(
|
||||
/*=====================*/
|
||||
ulonglong when) /*!< in: time where to calculate */
|
||||
{
|
||||
double ret = when;
|
||||
ret *= (double)(ut_timer.frequency);
|
||||
ret /= 1000000.0;
|
||||
return (ulonglong)ret;
|
||||
}
|
||||
|
|
@ -3267,6 +3267,47 @@ lock_update_merge_left(
|
|||
lock_mutex_exit();
|
||||
}
|
||||
|
||||
/*************************************************************//**
|
||||
Updates the lock table when a page is split and merged to
|
||||
two pages. */
|
||||
UNIV_INTERN
|
||||
void
|
||||
lock_update_split_and_merge(
|
||||
const buf_block_t* left_block, /*!< in: left page to which merged */
|
||||
const rec_t* orig_pred, /*!< in: original predecessor of
|
||||
supremum on the left page before merge*/
|
||||
const buf_block_t* right_block) /*!< in: right page from which merged */
|
||||
{
|
||||
const rec_t* left_next_rec;
|
||||
|
||||
ut_a(left_block && right_block);
|
||||
ut_a(orig_pred);
|
||||
|
||||
lock_mutex_enter();
|
||||
|
||||
left_next_rec = page_rec_get_next_const(orig_pred);
|
||||
|
||||
/* Inherit the locks on the supremum of the left page to the
|
||||
first record which was moved from the right page */
|
||||
lock_rec_inherit_to_gap(
|
||||
left_block, left_block,
|
||||
page_rec_get_heap_no(left_next_rec),
|
||||
PAGE_HEAP_NO_SUPREMUM);
|
||||
|
||||
/* Reset the locks on the supremum of the left page,
|
||||
releasing waiting transactions */
|
||||
lock_rec_reset_and_release_wait(left_block,
|
||||
PAGE_HEAP_NO_SUPREMUM);
|
||||
|
||||
/* Inherit the locks to the supremum of the left page from the
|
||||
successor of the infimum on the right page */
|
||||
lock_rec_inherit_to_gap(left_block, right_block,
|
||||
PAGE_HEAP_NO_SUPREMUM,
|
||||
lock_get_min_heap_no(right_block));
|
||||
|
||||
lock_mutex_exit();
|
||||
}
|
||||
|
||||
/*************************************************************//**
|
||||
Resets the original locks on heir and replaces them with gap type locks
|
||||
inherited from rec. */
|
||||
|
|
|
|||
|
|
@ -1349,6 +1349,21 @@ page_cur_insert_rec_zip(
|
|||
return(insert_rec);
|
||||
}
|
||||
|
||||
/* Page compress failed. If this happened on a
|
||||
leaf page, put the data size into the sample
|
||||
buffer. */
|
||||
if (page_is_leaf(page)) {
|
||||
ulint occupied = page_get_data_size(page)
|
||||
+ page_dir_calc_reserved_space(
|
||||
page_get_n_recs(page));
|
||||
index->stat_defrag_data_size_sample[
|
||||
index->stat_defrag_sample_next_slot] =
|
||||
occupied;
|
||||
index->stat_defrag_sample_next_slot =
|
||||
(index->stat_defrag_sample_next_slot
|
||||
+ 1) % STAT_DEFRAG_DATA_SIZE_N_SAMPLE;
|
||||
}
|
||||
|
||||
ut_ad(cursor->rec
|
||||
== (pos > 1
|
||||
? page_rec_get_nth(
|
||||
|
|
|
|||
|
|
@ -54,6 +54,7 @@ Created 9/17/2000 Heikki Tuuri
|
|||
#include "rem0cmp.h"
|
||||
#include "log0log.h"
|
||||
#include "btr0sea.h"
|
||||
#include "btr0defragment.h"
|
||||
#include "fil0fil.h"
|
||||
#include "ibuf0ibuf.h"
|
||||
#include "fts0fts.h"
|
||||
|
|
@ -3843,6 +3844,8 @@ row_drop_table_for_mysql(
|
|||
if (!dict_table_is_temporary(table)) {
|
||||
|
||||
dict_stats_recalc_pool_del(table);
|
||||
dict_stats_defrag_pool_del(table, NULL);
|
||||
btr_defragment_remove_table(table);
|
||||
|
||||
/* Remove stats for this table and all of its indexes from the
|
||||
persistent storage if it exists and if there are stats for this
|
||||
|
|
@ -5128,18 +5131,6 @@ end:
|
|||
trx->error_state = DB_SUCCESS;
|
||||
trx_rollback_to_savepoint(trx, NULL);
|
||||
trx->error_state = DB_SUCCESS;
|
||||
} else {
|
||||
if (old_is_tmp && !new_is_tmp) {
|
||||
/* After ALTER TABLE the table statistics
|
||||
needs to be rebuilt. Even if we close
|
||||
table below there could be other
|
||||
transactions using this table (e.g.
|
||||
SELECT * FROM INFORMATION_SCHEMA.`TABLE_CONSTRAINTS`),
|
||||
thus we can't remove table from dictionary cache
|
||||
here. Therefore, we initialize the
|
||||
transient statistics here. */
|
||||
dict_stats_update_transient(table);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -68,6 +68,7 @@ Created 10/8/1995 Heikki Tuuri
|
|||
#include "os0sync.h" /* for HAVE_ATOMIC_BUILTINS */
|
||||
#include "srv0mon.h"
|
||||
#include "ut0crc32.h"
|
||||
#include "btr0defragment.h"
|
||||
|
||||
#include "mysql/plugin.h"
|
||||
#include "mysql/service_thd_wait.h"
|
||||
|
|
@ -396,6 +397,15 @@ UNIV_INTERN ib_uint64_t srv_page_compressed_trim_op = 0;
|
|||
UNIV_INTERN ib_uint64_t srv_page_compressed_trim_op_saved = 0;
|
||||
UNIV_INTERN ib_uint64_t srv_index_page_decompressed = 0;
|
||||
|
||||
/* Defragmentation */
|
||||
UNIV_INTERN my_bool srv_defragment = FALSE;
|
||||
UNIV_INTERN uint srv_defragment_n_pages = 7;
|
||||
UNIV_INTERN uint srv_defragment_stats_accuracy = 0;
|
||||
UNIV_INTERN uint srv_defragment_fill_factor_n_recs = 20;
|
||||
UNIV_INTERN double srv_defragment_fill_factor = 0.9;
|
||||
UNIV_INTERN uint srv_defragment_frequency =
|
||||
SRV_DEFRAGMENT_FREQUENCY_DEFAULT;
|
||||
UNIV_INTERN ulonglong srv_defragment_interval = 0;
|
||||
|
||||
/* Set the following to 0 if you want InnoDB to write messages on
|
||||
stderr on startup/shutdown. */
|
||||
|
|
@ -1492,6 +1502,11 @@ srv_export_innodb_status(void)
|
|||
export_vars.innodb_page_compressed_trim_op_saved = srv_stats.page_compressed_trim_op_saved;
|
||||
export_vars.innodb_pages_page_decompressed = srv_stats.pages_page_decompressed;
|
||||
|
||||
export_vars.innodb_defragment_compression_failures =
|
||||
btr_defragment_compression_failures;
|
||||
export_vars.innodb_defragment_failures = btr_defragment_failures;
|
||||
export_vars.innodb_defragment_count = btr_defragment_count;
|
||||
|
||||
#ifdef UNIV_DEBUG
|
||||
rw_lock_s_lock(&purge_sys->latch);
|
||||
trx_id_t done_trx_no = purge_sys->done.trx_no;
|
||||
|
|
|
|||
|
|
@ -43,6 +43,7 @@ Created 2/16/1996 Heikki Tuuri
|
|||
#include "pars0pars.h"
|
||||
#include "row0ftsort.h"
|
||||
#include "ut0mem.h"
|
||||
#include "ut0timer.h"
|
||||
#include "mem0mem.h"
|
||||
#include "data0data.h"
|
||||
#include "data0type.h"
|
||||
|
|
@ -67,6 +68,8 @@ Created 2/16/1996 Heikki Tuuri
|
|||
#include "ibuf0ibuf.h"
|
||||
#include "srv0start.h"
|
||||
#include "srv0srv.h"
|
||||
#include "btr0defragment.h"
|
||||
|
||||
#ifndef UNIV_HOTBACKUP
|
||||
# include "trx0rseg.h"
|
||||
# include "os0proc.h"
|
||||
|
|
@ -1531,6 +1534,9 @@ innobase_start_or_create_for_mysql(void)
|
|||
char* logfile0 = NULL;
|
||||
size_t dirnamelen;
|
||||
|
||||
/* This should be initialized early */
|
||||
ut_init_timer();
|
||||
|
||||
if (srv_force_recovery > SRV_FORCE_NO_TRX_UNDO) {
|
||||
srv_read_only_mode = true;
|
||||
}
|
||||
|
|
@ -2877,6 +2883,9 @@ files_checked:
|
|||
fts_optimize_init();
|
||||
}
|
||||
|
||||
/* Initialize online defragmentation. */
|
||||
btr_defragment_init();
|
||||
|
||||
srv_was_started = TRUE;
|
||||
|
||||
return(DB_SUCCESS);
|
||||
|
|
|
|||
|
|
@ -1164,6 +1164,7 @@ sync_thread_add_level(
|
|||
case SYNC_IBUF_MUTEX:
|
||||
case SYNC_INDEX_ONLINE_LOG:
|
||||
case SYNC_STATS_AUTO_RECALC:
|
||||
case SYNC_STATS_DEFRAG:
|
||||
if (!sync_thread_levels_g(array, level, TRUE)) {
|
||||
fprintf(stderr,
|
||||
"InnoDB: sync_thread_levels_g(array, %lu)"
|
||||
|
|
|
|||
92
storage/innobase/ut/ut0timer.cc
Normal file
92
storage/innobase/ut/ut0timer.cc
Normal file
|
|
@ -0,0 +1,92 @@
|
|||
/*****************************************************************************
|
||||
|
||||
Copyright (c) 2013, 2014, Facebook, Inc. All Rights Reserved.
|
||||
Copyright (c) 2014, SkySQL Ab. All Rights Reserved.
|
||||
|
||||
This program is free software; you can redistribute it and/or modify it under
|
||||
the terms of the GNU General Public License as published by the Free Software
|
||||
Foundation; version 2 of the License.
|
||||
|
||||
This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along with
|
||||
this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
|
||||
|
||||
*****************************************************************************/
|
||||
|
||||
/********************************************************************//**
|
||||
@file ut/ut0timer.cc
|
||||
Timer rountines
|
||||
|
||||
Created 30/07/2014 Jan Lindström jan.lindstrom@skysql.com
|
||||
modified from https://github.com/facebook/mysql-5.6/commit/c75a413edeb96eb99bf11d7269bdfea06f96d6b6
|
||||
*************************************************************************/
|
||||
|
||||
#include "data0type.h"
|
||||
#include <my_rdtsc.h>
|
||||
#include <ut0timer.h>
|
||||
|
||||
/**************************************************************//**
|
||||
Initial timer definition
|
||||
@return 0 */
|
||||
static
|
||||
ulonglong
|
||||
ut_timer_none(void)
|
||||
/*===============*/
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**************************************************************//**
|
||||
Function pointer to point selected timer function.
|
||||
@return timer current value */
|
||||
ulonglong (*ut_timer_now)(void) = &ut_timer_none;
|
||||
|
||||
struct my_timer_unit_info ut_timer;
|
||||
|
||||
/**************************************************************//**
|
||||
Sets up the data required for use of my_timer_* functions.
|
||||
Selects the best timer by high frequency, and tight resolution.
|
||||
Points my_timer_now() to the selected timer function.
|
||||
Initializes my_timer struct to contain the info for selected timer.*/
|
||||
UNIV_INTERN
|
||||
void
|
||||
ut_init_timer(void)
|
||||
/*===============*/
|
||||
{
|
||||
MY_TIMER_INFO all_timer_info;
|
||||
my_timer_init(&all_timer_info);
|
||||
|
||||
if (all_timer_info.cycles.frequency > 1000000 &&
|
||||
all_timer_info.cycles.resolution == 1) {
|
||||
ut_timer = all_timer_info.cycles;
|
||||
ut_timer_now = &my_timer_cycles;
|
||||
} else if (all_timer_info.nanoseconds.frequency > 1000000 &&
|
||||
all_timer_info.nanoseconds.resolution == 1) {
|
||||
ut_timer = all_timer_info.nanoseconds;
|
||||
ut_timer_now = &my_timer_nanoseconds;
|
||||
} else if (all_timer_info.microseconds.frequency >= 1000000 &&
|
||||
all_timer_info.microseconds.resolution == 1) {
|
||||
ut_timer = all_timer_info.microseconds;
|
||||
ut_timer_now = &my_timer_microseconds;
|
||||
|
||||
} else if (all_timer_info.milliseconds.frequency >= 1000 &&
|
||||
all_timer_info.milliseconds.resolution == 1) {
|
||||
ut_timer = all_timer_info.milliseconds;
|
||||
ut_timer_now = &my_timer_milliseconds;
|
||||
} else if (all_timer_info.ticks.frequency >= 1000 &&
|
||||
/* Will probably be false */
|
||||
all_timer_info.ticks.resolution == 1) {
|
||||
ut_timer = all_timer_info.ticks;
|
||||
ut_timer_now = &my_timer_ticks;
|
||||
} else {
|
||||
/* None are acceptable, so leave it as "None", and fill in struct */
|
||||
ut_timer.frequency = 1; /* Avoid div-by-zero */
|
||||
ut_timer.overhead = 0; /* Since it doesn't do anything */
|
||||
ut_timer.resolution = 10; /* Another sign it's bad */
|
||||
ut_timer.routine = 0; /* None */
|
||||
}
|
||||
}
|
||||
|
|
@ -292,6 +292,7 @@ SET(INNOBASE_SOURCES
|
|||
btr/btr0cur.cc
|
||||
btr/btr0pcur.cc
|
||||
btr/btr0sea.cc
|
||||
btr/btr0defragment.cc
|
||||
buf/buf0buddy.cc
|
||||
buf/buf0buf.cc
|
||||
buf/buf0dblwr.cc
|
||||
|
|
@ -405,7 +406,8 @@ SET(INNOBASE_SOURCES
|
|||
ut/ut0rnd.cc
|
||||
ut/ut0ut.cc
|
||||
ut/ut0vec.cc
|
||||
ut/ut0wqueue.cc)
|
||||
ut/ut0wqueue.cc
|
||||
ut/ut0timer.cc)
|
||||
|
||||
IF(NOT XTRADB_OK)
|
||||
MESSAGE(FATAL_ERROR "Percona XtraDB is not supported on this platform")
|
||||
|
|
|
|||
|
|
@ -38,6 +38,7 @@ Created 6/2/1994 Heikki Tuuri
|
|||
#include "btr0cur.h"
|
||||
#include "btr0sea.h"
|
||||
#include "btr0pcur.h"
|
||||
#include "btr0defragment.h"
|
||||
#include "rem0cmp.h"
|
||||
#include "lock0lock.h"
|
||||
#include "ibuf0ibuf.h"
|
||||
|
|
@ -1212,6 +1213,32 @@ btr_get_size(
|
|||
ulint flag, /*!< in: BTR_N_LEAF_PAGES or BTR_TOTAL_SIZE */
|
||||
mtr_t* mtr) /*!< in/out: mini-transaction where index
|
||||
is s-latched */
|
||||
{
|
||||
ulint used;
|
||||
if (flag == BTR_N_LEAF_PAGES) {
|
||||
btr_get_size_and_reserved(index, flag, &used, mtr);
|
||||
return used;
|
||||
} else if (flag == BTR_TOTAL_SIZE) {
|
||||
return btr_get_size_and_reserved(index, flag, &used, mtr);
|
||||
} else {
|
||||
ut_error;
|
||||
}
|
||||
return (ULINT_UNDEFINED);
|
||||
}
|
||||
|
||||
/**************************************************************//**
|
||||
Gets the number of reserved and used pages in a B-tree.
|
||||
@return number of pages reserved, or ULINT_UNDEFINED if the index
|
||||
is unavailable */
|
||||
UNIV_INTERN
|
||||
ulint
|
||||
btr_get_size_and_reserved(
|
||||
/*======================*/
|
||||
dict_index_t* index, /*!< in: index */
|
||||
ulint flag, /*!< in: BTR_N_LEAF_PAGES or BTR_TOTAL_SIZE */
|
||||
ulint* used, /*!< out: number of pages used (<= reserved) */
|
||||
mtr_t* mtr) /*!< in/out: mini-transaction where index
|
||||
is s-latched */
|
||||
{
|
||||
fseg_header_t* seg_header;
|
||||
page_t* root;
|
||||
|
|
@ -1221,6 +1248,8 @@ btr_get_size(
|
|||
ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
|
||||
MTR_MEMO_S_LOCK));
|
||||
|
||||
ut_a(flag == BTR_N_LEAF_PAGES || flag == BTR_TOTAL_SIZE);
|
||||
|
||||
if (index->page == FIL_NULL || dict_index_is_online_ddl(index)
|
||||
|| *index->name == TEMP_INDEX_PREFIX) {
|
||||
return(ULINT_UNDEFINED);
|
||||
|
|
@ -1228,27 +1257,16 @@ btr_get_size(
|
|||
|
||||
root = btr_root_get(index, mtr);
|
||||
|
||||
SRV_CORRUPT_TABLE_CHECK(root,
|
||||
{
|
||||
mtr_commit(mtr);
|
||||
return(0);
|
||||
});
|
||||
seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_LEAF;
|
||||
|
||||
if (flag == BTR_N_LEAF_PAGES) {
|
||||
seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_LEAF;
|
||||
n = fseg_n_reserved_pages(seg_header, used, mtr);
|
||||
|
||||
fseg_n_reserved_pages(seg_header, &n, mtr);
|
||||
|
||||
} else if (flag == BTR_TOTAL_SIZE) {
|
||||
if (flag == BTR_TOTAL_SIZE) {
|
||||
seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_TOP;
|
||||
|
||||
n = fseg_n_reserved_pages(seg_header, &dummy, mtr);
|
||||
|
||||
seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_LEAF;
|
||||
|
||||
n += fseg_n_reserved_pages(seg_header, &dummy, mtr);
|
||||
} else {
|
||||
ut_error;
|
||||
*used += dummy;
|
||||
|
||||
}
|
||||
|
||||
return(n);
|
||||
|
|
@ -2013,7 +2031,7 @@ IBUF_BITMAP_FREE is unaffected by reorganization.
|
|||
|
||||
@retval true if the operation was successful
|
||||
@retval false if it is a compressed page, and recompression failed */
|
||||
static __attribute__((nonnull))
|
||||
UNIV_INTERN
|
||||
bool
|
||||
btr_page_reorganize_block(
|
||||
/*======================*/
|
||||
|
|
@ -2965,6 +2983,12 @@ func_start:
|
|||
new_page_zip = buf_block_get_page_zip(new_block);
|
||||
btr_page_create(new_block, new_page_zip, cursor->index,
|
||||
btr_page_get_level(page, mtr), mtr);
|
||||
/* Only record the leaf level page splits. */
|
||||
if (btr_page_get_level(page, mtr) == 0) {
|
||||
cursor->index->stat_defrag_n_page_split ++;
|
||||
cursor->index->stat_defrag_modified_counter ++;
|
||||
btr_defragment_save_defrag_stats_if_needed(cursor->index);
|
||||
}
|
||||
|
||||
/* 3. Calculate the first record on the upper half-page, and the
|
||||
first record (move_limit) on original page which ends up on the
|
||||
|
|
@ -3223,31 +3247,9 @@ func_exit:
|
|||
return(rec);
|
||||
}
|
||||
|
||||
#ifdef UNIV_SYNC_DEBUG
|
||||
/*************************************************************//**
|
||||
Removes a page from the level list of pages.
|
||||
@param space in: space where removed
|
||||
@param zip_size in: compressed page size in bytes, or 0 for uncompressed
|
||||
@param page in/out: page to remove
|
||||
@param index in: index tree
|
||||
@param mtr in/out: mini-transaction */
|
||||
# define btr_level_list_remove(space,zip_size,page,index,mtr) \
|
||||
btr_level_list_remove_func(space,zip_size,page,index,mtr)
|
||||
#else /* UNIV_SYNC_DEBUG */
|
||||
/*************************************************************//**
|
||||
Removes a page from the level list of pages.
|
||||
@param space in: space where removed
|
||||
@param zip_size in: compressed page size in bytes, or 0 for uncompressed
|
||||
@param page in/out: page to remove
|
||||
@param index in: index tree
|
||||
@param mtr in/out: mini-transaction */
|
||||
# define btr_level_list_remove(space,zip_size,page,index,mtr) \
|
||||
btr_level_list_remove_func(space,zip_size,page,mtr)
|
||||
#endif /* UNIV_SYNC_DEBUG */
|
||||
|
||||
/*************************************************************//**
|
||||
Removes a page from the level list of pages. */
|
||||
static __attribute__((nonnull))
|
||||
UNIV_INTERN
|
||||
void
|
||||
btr_level_list_remove_func(
|
||||
/*=======================*/
|
||||
|
|
@ -3419,7 +3421,7 @@ btr_node_ptr_delete(
|
|||
If page is the only on its level, this function moves its records to the
|
||||
father page, thus reducing the tree height.
|
||||
@return father block */
|
||||
static
|
||||
UNIV_INTERN
|
||||
buf_block_t*
|
||||
btr_lift_page_up(
|
||||
/*=============*/
|
||||
|
|
|
|||
815
storage/xtradb/btr/btr0defragment.cc
Normal file
815
storage/xtradb/btr/btr0defragment.cc
Normal file
|
|
@ -0,0 +1,815 @@
|
|||
/*****************************************************************************
|
||||
|
||||
Copyright (C) 2012, 2014 Facebook, Inc. All Rights Reserved.
|
||||
Copyright (C) 2014, SkySQL Ab. All Rights Reserved.
|
||||
|
||||
This program is free software; you can redistribute it and/or modify it under
|
||||
the terms of the GNU General Public License as published by the Free Software
|
||||
Foundation; version 2 of the License.
|
||||
|
||||
This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along with
|
||||
this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
|
||||
|
||||
*****************************************************************************/
|
||||
/**************************************************//**
|
||||
@file btr/btr0defragment.cc
|
||||
Index defragmentation.
|
||||
|
||||
Created 05/29/2014 Rongrong Zhong
|
||||
Modified 16/07/2014 Sunguck Lee
|
||||
Modified 30/07/2014 Jan Lindström jan.lindstrom@skysql.com
|
||||
*******************************************************/
|
||||
|
||||
#include "btr0defragment.h"
|
||||
#ifndef UNIV_HOTBACKUP
|
||||
#include "btr0cur.h"
|
||||
#include "btr0sea.h"
|
||||
#include "btr0pcur.h"
|
||||
#include "dict0stats.h"
|
||||
#include "dict0stats_bg.h"
|
||||
#include "ibuf0ibuf.h"
|
||||
#include "lock0lock.h"
|
||||
#include "srv0start.h"
|
||||
#include "srv0srv.h"
|
||||
#include "ut0timer.h"
|
||||
|
||||
#include <list>
|
||||
|
||||
/**************************************************//**
|
||||
Custom nullptr implementation for under g++ 4.6
|
||||
*******************************************************/
|
||||
// #pragma once
|
||||
namespace std
|
||||
{
|
||||
// based on SC22/WG21/N2431 = J16/07-0301
|
||||
struct nullptr_t
|
||||
{
|
||||
template<typename any> operator any * () const
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
template<class any, typename T> operator T any:: * () const
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef _MSC_VER
|
||||
struct pad {};
|
||||
pad __[sizeof(void*)/sizeof(pad)];
|
||||
#else
|
||||
char __[sizeof(void*)];
|
||||
#endif
|
||||
private:
|
||||
// nullptr_t();// {}
|
||||
// nullptr_t(const nullptr_t&);
|
||||
// void operator = (const nullptr_t&);
|
||||
void operator &() const;
|
||||
template<typename any> void operator +(any) const
|
||||
{
|
||||
/*I Love MSVC 2005!*/
|
||||
}
|
||||
template<typename any> void operator -(any) const
|
||||
{
|
||||
/*I Love MSVC 2005!*/
|
||||
}
|
||||
};
|
||||
static const nullptr_t __nullptr = {};
|
||||
}
|
||||
|
||||
#ifndef nullptr
|
||||
#define nullptr std::__nullptr
|
||||
#endif
|
||||
/**************************************************//**
|
||||
End of Custom nullptr implementation for under g++ 4.6
|
||||
*******************************************************/
|
||||
|
||||
/* When there's no work, either because defragment is disabled, or because no
|
||||
query is submitted, thread checks state every BTR_DEFRAGMENT_SLEEP_IN_USECS.*/
|
||||
#define BTR_DEFRAGMENT_SLEEP_IN_USECS 1000000
|
||||
/* Reduce the target page size by this amount when compression failure happens
|
||||
during defragmentaiton. 512 is chosen because it's a power of 2 and it is about
|
||||
3% of the page size. When there are compression failures in defragmentation,
|
||||
our goal is to get a decent defrag ratio with as few compression failure as
|
||||
possible. From experimentation it seems that reduce the target size by 512 every
|
||||
time will make sure the page is compressible within a couple of iterations. */
|
||||
#define BTR_DEFRAGMENT_PAGE_REDUCTION_STEP_SIZE 512
|
||||
|
||||
/* Work queue for defragmentation. */
|
||||
typedef std::list<btr_defragment_item_t*> btr_defragment_wq_t;
|
||||
static btr_defragment_wq_t btr_defragment_wq;
|
||||
|
||||
/* Mutex protecting the defragmentation work queue.*/
|
||||
ib_mutex_t btr_defragment_mutex;
|
||||
#ifdef UNIV_PFS_MUTEX
|
||||
UNIV_INTERN mysql_pfs_key_t btr_defragment_mutex_key;
|
||||
#endif /* UNIV_PFS_MUTEX */
|
||||
|
||||
/* Number of compression failures caused by defragmentation since server
|
||||
start. */
|
||||
ulint btr_defragment_compression_failures = 0;
|
||||
/* Number of btr_defragment_n_pages calls that altered page but didn't
|
||||
manage to release any page. */
|
||||
ulint btr_defragment_failures = 0;
|
||||
/* Total number of btr_defragment_n_pages calls that altered page.
|
||||
The difference between btr_defragment_count and btr_defragment_failures shows
|
||||
the amount of effort wasted. */
|
||||
ulint btr_defragment_count = 0;
|
||||
|
||||
/******************************************************************//**
|
||||
Constructor for btr_defragment_item_t. */
|
||||
btr_defragment_item_t::btr_defragment_item_t(
|
||||
btr_pcur_t* pcur,
|
||||
os_event_t event)
|
||||
{
|
||||
this->pcur = pcur;
|
||||
this->event = event;
|
||||
this->removed = false;
|
||||
this->last_processed = 0;
|
||||
}
|
||||
|
||||
/******************************************************************//**
|
||||
Destructor for btr_defragment_item_t. */
|
||||
btr_defragment_item_t::~btr_defragment_item_t() {
|
||||
if (this->pcur) {
|
||||
btr_pcur_free_for_mysql(this->pcur);
|
||||
}
|
||||
if (this->event) {
|
||||
os_event_set(this->event);
|
||||
}
|
||||
}
|
||||
|
||||
/******************************************************************//**
|
||||
Initialize defragmentation. */
|
||||
void
|
||||
btr_defragment_init()
|
||||
{
|
||||
srv_defragment_interval = ut_microseconds_to_timer(
|
||||
1000000.0 / srv_defragment_frequency);
|
||||
mutex_create(btr_defragment_mutex_key, &btr_defragment_mutex,
|
||||
SYNC_ANY_LATCH);
|
||||
os_thread_create(btr_defragment_thread, NULL, NULL);
|
||||
}
|
||||
|
||||
/******************************************************************//**
|
||||
Shutdown defragmentation. Release all resources. */
|
||||
void
|
||||
btr_defragment_shutdown()
|
||||
{
|
||||
mutex_enter(&btr_defragment_mutex);
|
||||
list< btr_defragment_item_t* >::iterator iter = btr_defragment_wq.begin();
|
||||
while(iter != btr_defragment_wq.end()) {
|
||||
btr_defragment_item_t* item = *iter;
|
||||
iter = btr_defragment_wq.erase(iter);
|
||||
delete item;
|
||||
}
|
||||
mutex_exit(&btr_defragment_mutex);
|
||||
mutex_free(&btr_defragment_mutex);
|
||||
}
|
||||
|
||||
|
||||
/******************************************************************//**
|
||||
Functions used by the query threads: btr_defragment_xxx_index
|
||||
Query threads find/add/remove index. */
|
||||
/******************************************************************//**
|
||||
Check whether the given index is in btr_defragment_wq. We use index->id
|
||||
to identify indices. */
|
||||
bool
|
||||
btr_defragment_find_index(
|
||||
dict_index_t* index) /*!< Index to find. */
|
||||
{
|
||||
mutex_enter(&btr_defragment_mutex);
|
||||
for (list< btr_defragment_item_t* >::iterator iter = btr_defragment_wq.begin();
|
||||
iter != btr_defragment_wq.end();
|
||||
++iter) {
|
||||
btr_defragment_item_t* item = *iter;
|
||||
btr_pcur_t* pcur = item->pcur;
|
||||
btr_cur_t* cursor = btr_pcur_get_btr_cur(pcur);
|
||||
dict_index_t* idx = btr_cur_get_index(cursor);
|
||||
if (index->id == idx->id) {
|
||||
mutex_exit(&btr_defragment_mutex);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
mutex_exit(&btr_defragment_mutex);
|
||||
return false;
|
||||
}
|
||||
|
||||
/******************************************************************//**
|
||||
Query thread uses this function to add an index to btr_defragment_wq.
|
||||
Return a pointer to os_event for the query thread to wait on if this is a
|
||||
synchronized defragmentation. */
|
||||
os_event_t
|
||||
btr_defragment_add_index(
|
||||
dict_index_t* index, /*!< index to be added */
|
||||
bool async) /*!< whether this is an async defragmentation */
|
||||
{
|
||||
mtr_t mtr;
|
||||
ulint space = dict_index_get_space(index);
|
||||
ulint zip_size = dict_table_zip_size(index->table);
|
||||
ulint page_no = dict_index_get_page(index);
|
||||
mtr_start(&mtr);
|
||||
// Load index rood page.
|
||||
page_t* page = btr_page_get(space, zip_size, page_no,
|
||||
RW_NO_LATCH, index, &mtr);
|
||||
if (btr_page_get_level(page, &mtr) == 0) {
|
||||
// Index root is a leaf page, no need to defragment.
|
||||
mtr_commit(&mtr);
|
||||
return NULL;
|
||||
}
|
||||
btr_pcur_t* pcur = btr_pcur_create_for_mysql();
|
||||
os_event_t event = NULL;
|
||||
if (!async) {
|
||||
event = os_event_create();
|
||||
}
|
||||
btr_pcur_open_at_index_side(true, index, BTR_SEARCH_LEAF, pcur,
|
||||
true, 0, &mtr);
|
||||
btr_pcur_move_to_next(pcur, &mtr);
|
||||
btr_pcur_store_position(pcur, &mtr);
|
||||
mtr_commit(&mtr);
|
||||
dict_stats_empty_defrag_summary(index);
|
||||
btr_defragment_item_t* item = new btr_defragment_item_t(pcur, event);
|
||||
mutex_enter(&btr_defragment_mutex);
|
||||
btr_defragment_wq.push_back(item);
|
||||
mutex_exit(&btr_defragment_mutex);
|
||||
return event;
|
||||
}
|
||||
|
||||
/******************************************************************//**
|
||||
When table is dropped, this function is called to mark a table as removed in
|
||||
btr_efragment_wq. The difference between this function and the remove_index
|
||||
function is this will not NULL the event. */
|
||||
void
|
||||
btr_defragment_remove_table(
|
||||
dict_table_t* table) /*!< Index to be removed. */
|
||||
{
|
||||
mutex_enter(&btr_defragment_mutex);
|
||||
for (list< btr_defragment_item_t* >::iterator iter = btr_defragment_wq.begin();
|
||||
iter != btr_defragment_wq.end();
|
||||
++iter) {
|
||||
btr_defragment_item_t* item = *iter;
|
||||
btr_pcur_t* pcur = item->pcur;
|
||||
btr_cur_t* cursor = btr_pcur_get_btr_cur(pcur);
|
||||
dict_index_t* idx = btr_cur_get_index(cursor);
|
||||
if (table->id == idx->table->id) {
|
||||
item->removed = true;
|
||||
}
|
||||
}
|
||||
mutex_exit(&btr_defragment_mutex);
|
||||
}
|
||||
|
||||
/******************************************************************//**
|
||||
Query thread uses this function to mark an index as removed in
|
||||
btr_efragment_wq. */
|
||||
void
|
||||
btr_defragment_remove_index(
|
||||
dict_index_t* index) /*!< Index to be removed. */
|
||||
{
|
||||
mutex_enter(&btr_defragment_mutex);
|
||||
for (list< btr_defragment_item_t* >::iterator iter = btr_defragment_wq.begin();
|
||||
iter != btr_defragment_wq.end();
|
||||
++iter) {
|
||||
btr_defragment_item_t* item = *iter;
|
||||
btr_pcur_t* pcur = item->pcur;
|
||||
btr_cur_t* cursor = btr_pcur_get_btr_cur(pcur);
|
||||
dict_index_t* idx = btr_cur_get_index(cursor);
|
||||
if (index->id == idx->id) {
|
||||
item->removed = true;
|
||||
item->event = NULL;
|
||||
break;
|
||||
}
|
||||
}
|
||||
mutex_exit(&btr_defragment_mutex);
|
||||
}
|
||||
|
||||
/******************************************************************//**
|
||||
Functions used by defragmentation thread: btr_defragment_xxx_item.
|
||||
Defragmentation thread operates on the work *item*. It gets/removes
|
||||
item from the work queue. */
|
||||
/******************************************************************//**
|
||||
Defragment thread uses this to remove an item from btr_defragment_wq.
|
||||
When an item is removed from the work queue, all resources associated with it
|
||||
are free as well. */
|
||||
void
|
||||
btr_defragment_remove_item(
|
||||
btr_defragment_item_t* item) /*!< Item to be removed. */
|
||||
{
|
||||
mutex_enter(&btr_defragment_mutex);
|
||||
for (list< btr_defragment_item_t* >::iterator iter = btr_defragment_wq.begin();
|
||||
iter != btr_defragment_wq.end();
|
||||
++iter) {
|
||||
if (item == *iter) {
|
||||
btr_defragment_wq.erase(iter);
|
||||
delete item;
|
||||
break;
|
||||
}
|
||||
}
|
||||
mutex_exit(&btr_defragment_mutex);
|
||||
}
|
||||
|
||||
/******************************************************************//**
|
||||
Defragment thread uses this to get an item from btr_defragment_wq to work on.
|
||||
The item is not removed from the work queue so query threads can still access
|
||||
this item. We keep it this way so query threads can find and kill a
|
||||
defragmentation even if that index is being worked on. Be aware that while you
|
||||
work on this item you have no lock protection on it whatsoever. This is OK as
|
||||
long as the query threads and defragment thread won't modify the same fields
|
||||
without lock protection.
|
||||
*/
|
||||
btr_defragment_item_t*
|
||||
btr_defragment_get_item()
|
||||
{
|
||||
if (btr_defragment_wq.empty()) {
|
||||
return nullptr;
|
||||
}
|
||||
mutex_enter(&btr_defragment_mutex);
|
||||
list< btr_defragment_item_t* >::iterator iter = btr_defragment_wq.begin();
|
||||
if (iter == btr_defragment_wq.end()) {
|
||||
iter = btr_defragment_wq.begin();
|
||||
}
|
||||
btr_defragment_item_t* item = *iter;
|
||||
iter++;
|
||||
mutex_exit(&btr_defragment_mutex);
|
||||
return item;
|
||||
}
|
||||
|
||||
/*********************************************************************//**
|
||||
Check whether we should save defragmentation statistics to persistent storage.
|
||||
Currently we save the stats to persistent storage every 100 updates. */
|
||||
UNIV_INTERN
|
||||
void
|
||||
btr_defragment_save_defrag_stats_if_needed(
|
||||
dict_index_t* index) /*!< in: index */
|
||||
{
|
||||
if (srv_defragment_stats_accuracy != 0 // stats tracking disabled
|
||||
&& dict_index_get_space(index) != 0 // do not track system tables
|
||||
&& index->stat_defrag_modified_counter
|
||||
>= srv_defragment_stats_accuracy) {
|
||||
dict_stats_defrag_pool_add(index);
|
||||
index->stat_defrag_modified_counter = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/*********************************************************************//**
|
||||
Main defragment functionalities used by defragment thread.*/
|
||||
/*************************************************************//**
|
||||
Calculate number of records from beginning of block that can
|
||||
fit into size_limit
|
||||
@return number of records */
|
||||
UNIV_INTERN
|
||||
ulint
|
||||
btr_defragment_calc_n_recs_for_size(
|
||||
buf_block_t* block, /*!< in: B-tree page */
|
||||
dict_index_t* index, /*!< in: index of the page */
|
||||
ulint size_limit, /*!< in: size limit to fit records in */
|
||||
ulint* n_recs_size) /*!< out: actual size of the records that fit
|
||||
in size_limit. */
|
||||
{
|
||||
page_t* page = buf_block_get_frame(block);
|
||||
ulint n_recs = 0;
|
||||
ulint offsets_[REC_OFFS_NORMAL_SIZE];
|
||||
ulint* offsets = offsets_;
|
||||
rec_offs_init(offsets_);
|
||||
mem_heap_t* heap = NULL;
|
||||
ulint size = 0;
|
||||
page_cur_t cur;
|
||||
|
||||
page_cur_set_before_first(block, &cur);
|
||||
page_cur_move_to_next(&cur);
|
||||
while (page_cur_get_rec(&cur) != page_get_supremum_rec(page)) {
|
||||
rec_t* cur_rec = page_cur_get_rec(&cur);
|
||||
offsets = rec_get_offsets(cur_rec, index, offsets,
|
||||
ULINT_UNDEFINED, &heap);
|
||||
ulint rec_size = rec_offs_size(offsets);
|
||||
size += rec_size;
|
||||
if (size > size_limit) {
|
||||
size = size - rec_size;
|
||||
break;
|
||||
}
|
||||
n_recs ++;
|
||||
page_cur_move_to_next(&cur);
|
||||
}
|
||||
*n_recs_size = size;
|
||||
return n_recs;
|
||||
}
|
||||
|
||||
/*************************************************************//**
|
||||
Merge as many records from the from_block to the to_block. Delete
|
||||
the from_block if all records are successfully merged to to_block.
|
||||
@return the to_block to target for next merge operation. */
|
||||
UNIV_INTERN
|
||||
buf_block_t*
|
||||
btr_defragment_merge_pages(
|
||||
dict_index_t* index, /*!< in: index tree */
|
||||
buf_block_t* from_block, /*!< in: origin of merge */
|
||||
buf_block_t* to_block, /*!< in: destination of merge */
|
||||
ulint zip_size, /*!< in: zip size of the block */
|
||||
ulint reserved_space, /*!< in: space reserved for future
|
||||
insert to avoid immediate page split */
|
||||
ulint* max_data_size, /*!< in/out: max data size to
|
||||
fit in a single compressed page. */
|
||||
mem_heap_t* heap, /*!< in/out: pointer to memory heap */
|
||||
mtr_t* mtr) /*!< in/out: mini-transaction */
|
||||
{
|
||||
page_t* from_page = buf_block_get_frame(from_block);
|
||||
page_t* to_page = buf_block_get_frame(to_block);
|
||||
ulint space = dict_index_get_space(index);
|
||||
ulint level = btr_page_get_level(from_page, mtr);
|
||||
ulint n_recs = page_get_n_recs(from_page);
|
||||
ulint new_data_size = page_get_data_size(to_page);
|
||||
ulint max_ins_size =
|
||||
page_get_max_insert_size(to_page, n_recs);
|
||||
ulint max_ins_size_reorg =
|
||||
page_get_max_insert_size_after_reorganize(
|
||||
to_page, n_recs);
|
||||
ulint max_ins_size_to_use = max_ins_size_reorg > reserved_space
|
||||
? max_ins_size_reorg - reserved_space : 0;
|
||||
ulint move_size = 0;
|
||||
ulint n_recs_to_move = 0;
|
||||
rec_t* rec = NULL;
|
||||
ulint target_n_recs = 0;
|
||||
rec_t* orig_pred;
|
||||
|
||||
// Estimate how many records can be moved from the from_page to
|
||||
// the to_page.
|
||||
if (zip_size) {
|
||||
ulint page_diff = UNIV_PAGE_SIZE - *max_data_size;
|
||||
max_ins_size_to_use = (max_ins_size_to_use > page_diff)
|
||||
? max_ins_size_to_use - page_diff : 0;
|
||||
}
|
||||
n_recs_to_move = btr_defragment_calc_n_recs_for_size(
|
||||
from_block, index, max_ins_size_to_use, &move_size);
|
||||
|
||||
// If max_ins_size >= move_size, we can move the records without
|
||||
// reorganizing the page, otherwise we need to reorganize the page
|
||||
// first to release more space.
|
||||
if (move_size > max_ins_size) {
|
||||
if (!btr_page_reorganize_block(false, page_zip_level,
|
||||
to_block, index,
|
||||
mtr)) {
|
||||
if (!dict_index_is_clust(index)
|
||||
&& page_is_leaf(to_page)) {
|
||||
ibuf_reset_free_bits(to_block);
|
||||
}
|
||||
// If reorganization fails, that means page is
|
||||
// not compressable. There's no point to try
|
||||
// merging into this page. Continue to the
|
||||
// next page.
|
||||
return from_block;
|
||||
}
|
||||
ut_ad(page_validate(to_page, index));
|
||||
max_ins_size = page_get_max_insert_size(to_page, n_recs);
|
||||
ut_a(max_ins_size >= move_size);
|
||||
}
|
||||
|
||||
// Move records to pack to_page more full.
|
||||
orig_pred = NULL;
|
||||
target_n_recs = n_recs_to_move;
|
||||
while (n_recs_to_move > 0) {
|
||||
rec = page_rec_get_nth(from_page,
|
||||
n_recs_to_move + 1);
|
||||
orig_pred = page_copy_rec_list_start(
|
||||
to_block, from_block, rec, index, mtr);
|
||||
if (orig_pred)
|
||||
break;
|
||||
// If we reach here, that means compression failed after packing
|
||||
// n_recs_to_move number of records to to_page. We try to reduce
|
||||
// the targeted data size on the to_page by
|
||||
// BTR_DEFRAGMENT_PAGE_REDUCTION_STEP_SIZE and try again.
|
||||
os_atomic_increment_ulint(
|
||||
&btr_defragment_compression_failures, 1);
|
||||
max_ins_size_to_use =
|
||||
move_size > BTR_DEFRAGMENT_PAGE_REDUCTION_STEP_SIZE
|
||||
? move_size - BTR_DEFRAGMENT_PAGE_REDUCTION_STEP_SIZE
|
||||
: 0;
|
||||
if (max_ins_size_to_use == 0) {
|
||||
n_recs_to_move = 0;
|
||||
move_size = 0;
|
||||
break;
|
||||
}
|
||||
n_recs_to_move = btr_defragment_calc_n_recs_for_size(
|
||||
from_block, index, max_ins_size_to_use, &move_size);
|
||||
}
|
||||
// If less than target_n_recs are moved, it means there are
|
||||
// compression failures during page_copy_rec_list_start. Adjust
|
||||
// the max_data_size estimation to reduce compression failures
|
||||
// in the following runs.
|
||||
if (target_n_recs > n_recs_to_move
|
||||
&& *max_data_size > new_data_size + move_size) {
|
||||
*max_data_size = new_data_size + move_size;
|
||||
}
|
||||
// Set ibuf free bits if necessary.
|
||||
if (!dict_index_is_clust(index)
|
||||
&& page_is_leaf(to_page)) {
|
||||
if (zip_size) {
|
||||
ibuf_reset_free_bits(to_block);
|
||||
} else {
|
||||
ibuf_update_free_bits_if_full(
|
||||
to_block,
|
||||
UNIV_PAGE_SIZE,
|
||||
ULINT_UNDEFINED);
|
||||
}
|
||||
}
|
||||
if (n_recs_to_move == n_recs) {
|
||||
/* The whole page is merged with the previous page,
|
||||
free it. */
|
||||
lock_update_merge_left(to_block, orig_pred,
|
||||
from_block);
|
||||
btr_search_drop_page_hash_index(from_block);
|
||||
btr_level_list_remove(space, zip_size, from_page,
|
||||
index, mtr);
|
||||
btr_node_ptr_delete(index, from_block, mtr);
|
||||
btr_blob_dbg_remove(from_page, index,
|
||||
"btr_defragment_n_pages");
|
||||
btr_page_free(index, from_block, mtr);
|
||||
} else {
|
||||
// There are still records left on the page, so
|
||||
// increment n_defragmented. Node pointer will be changed
|
||||
// so remove the old node pointer.
|
||||
if (n_recs_to_move > 0) {
|
||||
// Part of the page is merged to left, remove
|
||||
// the merged records, update record locks and
|
||||
// node pointer.
|
||||
dtuple_t* node_ptr;
|
||||
page_delete_rec_list_start(rec, from_block,
|
||||
index, mtr);
|
||||
lock_update_split_and_merge(to_block,
|
||||
orig_pred,
|
||||
from_block);
|
||||
btr_node_ptr_delete(index, from_block, mtr);
|
||||
rec = page_rec_get_next(
|
||||
page_get_infimum_rec(from_page));
|
||||
node_ptr = dict_index_build_node_ptr(
|
||||
index, rec, page_get_page_no(from_page),
|
||||
heap, level + 1);
|
||||
btr_insert_on_non_leaf_level(0, index, level+1,
|
||||
node_ptr, mtr);
|
||||
}
|
||||
to_block = from_block;
|
||||
}
|
||||
return to_block;
|
||||
}
|
||||
|
||||
/*************************************************************//**
|
||||
Tries to merge N consecutive pages, starting from the page pointed by the
|
||||
cursor. Skip space 0. Only consider leaf pages.
|
||||
This function first loads all N pages into memory, then for each of
|
||||
the pages other than the first page, it tries to move as many records
|
||||
as possible to the left sibling to keep the left sibling full. During
|
||||
the process, if any page becomes empty, that page will be removed from
|
||||
the level list. Record locks, hash, and node pointers are updated after
|
||||
page reorganization.
|
||||
@return pointer to the last block processed, or NULL if reaching end of index */
|
||||
UNIV_INTERN
|
||||
buf_block_t*
|
||||
btr_defragment_n_pages(
|
||||
buf_block_t* block, /*!< in: starting block for defragmentation */
|
||||
dict_index_t* index, /*!< in: index tree */
|
||||
uint n_pages,/*!< in: number of pages to defragment */
|
||||
mtr_t* mtr) /*!< in/out: mini-transaction */
|
||||
{
|
||||
ulint space;
|
||||
ulint zip_size;
|
||||
/* We will need to load the n+1 block because if the last page is freed
|
||||
and we need to modify the prev_page_no of that block. */
|
||||
buf_block_t* blocks[BTR_DEFRAGMENT_MAX_N_PAGES + 1];
|
||||
page_t* first_page;
|
||||
buf_block_t* current_block;
|
||||
ulint total_data_size = 0;
|
||||
ulint total_n_recs = 0;
|
||||
ulint data_size_per_rec;
|
||||
ulint optimal_page_size;
|
||||
ulint reserved_space;
|
||||
ulint level;
|
||||
ulint max_data_size = 0;
|
||||
uint n_defragmented = 0;
|
||||
uint n_new_slots;
|
||||
mem_heap_t* heap;
|
||||
ibool end_of_index = FALSE;
|
||||
|
||||
/* It doesn't make sense to call this function with n_pages = 1. */
|
||||
ut_ad(n_pages > 1);
|
||||
|
||||
ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
|
||||
MTR_MEMO_X_LOCK));
|
||||
space = dict_index_get_space(index);
|
||||
if (space == 0) {
|
||||
/* Ignore space 0. */
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (n_pages > BTR_DEFRAGMENT_MAX_N_PAGES) {
|
||||
n_pages = BTR_DEFRAGMENT_MAX_N_PAGES;
|
||||
}
|
||||
|
||||
zip_size = dict_table_zip_size(index->table);
|
||||
first_page = buf_block_get_frame(block);
|
||||
level = btr_page_get_level(first_page, mtr);
|
||||
|
||||
if (level != 0) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* 1. Load the pages and calculate the total data size. */
|
||||
blocks[0] = block;
|
||||
for (uint i = 1; i <= n_pages; i++) {
|
||||
page_t* page = buf_block_get_frame(blocks[i-1]);
|
||||
ulint page_no = btr_page_get_next(page, mtr);
|
||||
total_data_size += page_get_data_size(page);
|
||||
total_n_recs += page_get_n_recs(page);
|
||||
if (page_no == FIL_NULL) {
|
||||
n_pages = i;
|
||||
end_of_index = TRUE;
|
||||
break;
|
||||
}
|
||||
blocks[i] = btr_block_get(space, zip_size, page_no,
|
||||
RW_X_LATCH, index, mtr);
|
||||
}
|
||||
|
||||
if (n_pages == 1) {
|
||||
if (btr_page_get_prev(first_page, mtr) == FIL_NULL) {
|
||||
/* last page in the index */
|
||||
if (dict_index_get_page(index)
|
||||
== page_get_page_no(first_page))
|
||||
return NULL;
|
||||
/* given page is the last page.
|
||||
Lift the records to father. */
|
||||
btr_lift_page_up(index, block, mtr);
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* 2. Calculate how many pages data can fit in. If not compressable,
|
||||
return early. */
|
||||
ut_a(total_n_recs != 0);
|
||||
data_size_per_rec = total_data_size / total_n_recs;
|
||||
// For uncompressed pages, the optimal data size if the free space of a
|
||||
// empty page.
|
||||
optimal_page_size = page_get_free_space_of_empty(
|
||||
page_is_comp(first_page));
|
||||
// For compressed pages, we take compression failures into account.
|
||||
if (zip_size) {
|
||||
ulint size = 0;
|
||||
int i = 0;
|
||||
// We estimate the optimal data size of the index use samples of
|
||||
// data size. These samples are taken when pages failed to
|
||||
// compress due to insertion on the page. We use the average
|
||||
// of all samples we have as the estimation. Different pages of
|
||||
// the same index vary in compressibility. Average gives a good
|
||||
// enough estimation.
|
||||
for (;i < STAT_DEFRAG_DATA_SIZE_N_SAMPLE; i++) {
|
||||
if (index->stat_defrag_data_size_sample[i] == 0) {
|
||||
break;
|
||||
}
|
||||
size += index->stat_defrag_data_size_sample[i];
|
||||
}
|
||||
if (i != 0) {
|
||||
size = size / i;
|
||||
optimal_page_size = min(optimal_page_size, size);
|
||||
}
|
||||
max_data_size = optimal_page_size;
|
||||
}
|
||||
|
||||
reserved_space = min((ulint)(optimal_page_size
|
||||
* (1 - srv_defragment_fill_factor)),
|
||||
(data_size_per_rec
|
||||
* srv_defragment_fill_factor_n_recs));
|
||||
optimal_page_size -= reserved_space;
|
||||
n_new_slots = (total_data_size + optimal_page_size - 1)
|
||||
/ optimal_page_size;
|
||||
if (n_new_slots >= n_pages) {
|
||||
/* Can't defragment. */
|
||||
if (end_of_index)
|
||||
return NULL;
|
||||
return blocks[n_pages-1];
|
||||
}
|
||||
|
||||
/* 3. Defragment pages. */
|
||||
heap = mem_heap_create(256);
|
||||
// First defragmented page will be the first page.
|
||||
current_block = blocks[0];
|
||||
// Start from the second page.
|
||||
for (uint i = 1; i < n_pages; i ++) {
|
||||
buf_block_t* new_block = btr_defragment_merge_pages(
|
||||
index, blocks[i], current_block, zip_size,
|
||||
reserved_space, &max_data_size, heap, mtr);
|
||||
if (new_block != current_block) {
|
||||
n_defragmented ++;
|
||||
current_block = new_block;
|
||||
}
|
||||
}
|
||||
mem_heap_free(heap);
|
||||
n_defragmented ++;
|
||||
os_atomic_increment_ulint(
|
||||
&btr_defragment_count, 1);
|
||||
if (n_pages == n_defragmented) {
|
||||
os_atomic_increment_ulint(
|
||||
&btr_defragment_failures, 1);
|
||||
} else {
|
||||
index->stat_defrag_n_pages_freed += (n_pages - n_defragmented);
|
||||
}
|
||||
if (end_of_index)
|
||||
return NULL;
|
||||
return current_block;
|
||||
}
|
||||
|
||||
/******************************************************************//**
|
||||
Thread that merges consecutive b-tree pages into fewer pages to defragment
|
||||
the index. */
|
||||
extern "C" UNIV_INTERN
|
||||
os_thread_ret_t
|
||||
DECLARE_THREAD(btr_defragment_thread)(
|
||||
/*==========================================*/
|
||||
void* arg) /*!< in: work queue */
|
||||
{
|
||||
btr_pcur_t* pcur;
|
||||
btr_cur_t* cursor;
|
||||
dict_index_t* index;
|
||||
mtr_t mtr;
|
||||
buf_block_t* first_block;
|
||||
buf_block_t* last_block;
|
||||
|
||||
while (srv_shutdown_state == SRV_SHUTDOWN_NONE) {
|
||||
/* If defragmentation is disabled, sleep before
|
||||
checking whether it's enabled. */
|
||||
if (!srv_defragment) {
|
||||
os_thread_sleep(BTR_DEFRAGMENT_SLEEP_IN_USECS);
|
||||
continue;
|
||||
}
|
||||
/* The following call won't remove the item from work queue.
|
||||
We only get a pointer to it to work on. This will make sure
|
||||
when user issue a kill command, all indices are in the work
|
||||
queue to be searched. This also means that the user thread
|
||||
cannot directly remove the item from queue (since we might be
|
||||
using it). So user thread only marks index as removed. */
|
||||
btr_defragment_item_t* item = btr_defragment_get_item();
|
||||
/* If work queue is empty, sleep and check later. */
|
||||
if (!item) {
|
||||
os_thread_sleep(BTR_DEFRAGMENT_SLEEP_IN_USECS);
|
||||
continue;
|
||||
}
|
||||
/* If an index is marked as removed, we remove it from the work
|
||||
queue. No other thread could be using this item at this point so
|
||||
it's safe to remove now. */
|
||||
if (item->removed) {
|
||||
btr_defragment_remove_item(item);
|
||||
continue;
|
||||
}
|
||||
|
||||
pcur = item->pcur;
|
||||
ulonglong now = ut_timer_now();
|
||||
ulonglong elapsed = now - item->last_processed;
|
||||
|
||||
if (elapsed < srv_defragment_interval) {
|
||||
/* If we see an index again before the interval
|
||||
determined by the configured frequency is reached,
|
||||
we just sleep until the interval pass. Since
|
||||
defragmentation of all indices queue up on a single
|
||||
thread, it's likely other indices that follow this one
|
||||
don't need to sleep again. */
|
||||
os_thread_sleep(((ulint)ut_timer_to_microseconds(
|
||||
srv_defragment_interval - elapsed)));
|
||||
}
|
||||
|
||||
now = ut_timer_now();
|
||||
mtr_start(&mtr);
|
||||
btr_pcur_restore_position(BTR_MODIFY_TREE, pcur, &mtr);
|
||||
cursor = btr_pcur_get_btr_cur(pcur);
|
||||
index = btr_cur_get_index(cursor);
|
||||
first_block = btr_cur_get_block(cursor);
|
||||
last_block = btr_defragment_n_pages(first_block, index,
|
||||
srv_defragment_n_pages,
|
||||
&mtr);
|
||||
if (last_block) {
|
||||
/* If we haven't reached the end of the index,
|
||||
place the cursor on the last record of last page,
|
||||
store the cursor position, and put back in queue. */
|
||||
page_t* last_page = buf_block_get_frame(last_block);
|
||||
rec_t* rec = page_rec_get_prev(
|
||||
page_get_supremum_rec(last_page));
|
||||
ut_a(page_rec_is_user_rec(rec));
|
||||
page_cur_position(rec, last_block,
|
||||
btr_cur_get_page_cur(cursor));
|
||||
btr_pcur_store_position(pcur, &mtr);
|
||||
mtr_commit(&mtr);
|
||||
/* Update the last_processed time of this index. */
|
||||
item->last_processed = now;
|
||||
} else {
|
||||
mtr_commit(&mtr);
|
||||
/* Reaching the end of the index. */
|
||||
dict_stats_empty_defrag_stats(index);
|
||||
dict_stats_save_defrag_stats(index);
|
||||
dict_stats_save_defrag_summary(index);
|
||||
btr_defragment_remove_item(item);
|
||||
}
|
||||
}
|
||||
btr_defragment_shutdown();
|
||||
os_thread_exit(NULL);
|
||||
OS_THREAD_DUMMY_RETURN;
|
||||
}
|
||||
|
||||
#endif /* !UNIV_HOTBACKUP */
|
||||
|
|
@ -408,7 +408,7 @@ dict_table_try_drop_aborted(
|
|||
|
||||
if (table == NULL) {
|
||||
table = dict_table_open_on_id_low(
|
||||
table_id, DICT_ERR_IGNORE_NONE);
|
||||
table_id, DICT_ERR_IGNORE_NONE, FALSE);
|
||||
} else {
|
||||
ut_ad(table->id == table_id);
|
||||
}
|
||||
|
|
@ -795,7 +795,8 @@ dict_table_open_on_id(
|
|||
table_id,
|
||||
table_op == DICT_TABLE_OP_LOAD_TABLESPACE
|
||||
? DICT_ERR_IGNORE_RECOVER_LOCK
|
||||
: DICT_ERR_IGNORE_NONE);
|
||||
: DICT_ERR_IGNORE_NONE,
|
||||
table_op == DICT_TABLE_OP_OPEN_ONLY_IF_CACHED);
|
||||
|
||||
if (table != NULL) {
|
||||
|
||||
|
|
@ -1313,7 +1314,7 @@ dict_table_move_from_non_lru_to_lru(
|
|||
/**********************************************************************//**
|
||||
Looks for an index with the given id given a table instance.
|
||||
@return index or NULL */
|
||||
static
|
||||
UNIV_INTERN
|
||||
dict_index_t*
|
||||
dict_table_find_index_on_id(
|
||||
/*========================*/
|
||||
|
|
@ -2408,6 +2409,13 @@ undo_size_ok:
|
|||
new_index->stat_index_size = 1;
|
||||
new_index->stat_n_leaf_pages = 1;
|
||||
|
||||
new_index->stat_defrag_n_pages_freed = 0;
|
||||
new_index->stat_defrag_n_page_split = 0;
|
||||
|
||||
new_index->stat_defrag_sample_next_slot = 0;
|
||||
memset(&new_index->stat_defrag_data_size_sample,
|
||||
0x0, sizeof(ulint) * STAT_DEFRAG_DATA_SIZE_N_SAMPLE);
|
||||
|
||||
/* Add the new index as the last index for the table */
|
||||
|
||||
UT_LIST_ADD_LAST(indexes, table->indexes, new_index);
|
||||
|
|
|
|||
|
|
@ -492,6 +492,9 @@ dict_stats_table_clone_create(
|
|||
heap,
|
||||
idx->n_uniq * sizeof(idx->stat_n_non_null_key_vals[0]));
|
||||
ut_d(idx->magic_n = DICT_INDEX_MAGIC_N);
|
||||
|
||||
idx->stat_defrag_n_page_split = 0;
|
||||
idx->stat_defrag_n_pages_freed = 0;
|
||||
}
|
||||
|
||||
ut_d(t->magic_n = DICT_TABLE_MAGIC_N);
|
||||
|
|
@ -520,7 +523,9 @@ static
|
|||
void
|
||||
dict_stats_empty_index(
|
||||
/*===================*/
|
||||
dict_index_t* index) /*!< in/out: index */
|
||||
dict_index_t* index, /*!< in/out: index */
|
||||
bool empty_defrag_stats)
|
||||
/*!< in: whether to empty defrag stats */
|
||||
{
|
||||
ut_ad(!(index->type & DICT_FTS));
|
||||
ut_ad(!dict_index_is_univ(index));
|
||||
|
|
@ -535,6 +540,34 @@ dict_stats_empty_index(
|
|||
|
||||
index->stat_index_size = 1;
|
||||
index->stat_n_leaf_pages = 1;
|
||||
|
||||
if (empty_defrag_stats) {
|
||||
dict_stats_empty_defrag_stats(index);
|
||||
dict_stats_empty_defrag_summary(index);
|
||||
}
|
||||
}
|
||||
|
||||
/**********************************************************************//**
|
||||
Clear defragmentation summary. */
|
||||
UNIV_INTERN
|
||||
void
|
||||
dict_stats_empty_defrag_summary(
|
||||
/*==================*/
|
||||
dict_index_t* index) /*!< in: index to clear defragmentation stats */
|
||||
{
|
||||
index->stat_defrag_n_pages_freed = 0;
|
||||
}
|
||||
|
||||
/**********************************************************************//**
|
||||
Clear defragmentation related index stats. */
|
||||
UNIV_INTERN
|
||||
void
|
||||
dict_stats_empty_defrag_stats(
|
||||
/*==================*/
|
||||
dict_index_t* index) /*!< in: index to clear defragmentation stats */
|
||||
{
|
||||
index->stat_defrag_modified_counter = 0;
|
||||
index->stat_defrag_n_page_split = 0;
|
||||
}
|
||||
|
||||
/*********************************************************************//**
|
||||
|
|
@ -544,7 +577,9 @@ static
|
|||
void
|
||||
dict_stats_empty_table(
|
||||
/*===================*/
|
||||
dict_table_t* table) /*!< in/out: table */
|
||||
dict_table_t* table, /*!< in/out: table */
|
||||
bool empty_defrag_stats)
|
||||
/*!< in: whether to empty defrag stats */
|
||||
{
|
||||
/* Zero the stats members */
|
||||
|
||||
|
|
@ -569,7 +604,7 @@ dict_stats_empty_table(
|
|||
|
||||
ut_ad(!dict_index_is_univ(index));
|
||||
|
||||
dict_stats_empty_index(index);
|
||||
dict_stats_empty_index(index, empty_defrag_stats);
|
||||
}
|
||||
|
||||
table->stat_initialized = TRUE;
|
||||
|
|
@ -704,7 +739,7 @@ dict_stats_copy(
|
|||
}
|
||||
|
||||
if (!INDEX_EQ(src_idx, dst_idx)) {
|
||||
dict_stats_empty_index(dst_idx);
|
||||
dict_stats_empty_index(dst_idx, true);
|
||||
continue;
|
||||
}
|
||||
|
||||
|
|
@ -715,7 +750,7 @@ dict_stats_copy(
|
|||
/* Since src is smaller some elements in dst
|
||||
will remain untouched by the following memmove(),
|
||||
thus we init all of them here. */
|
||||
dict_stats_empty_index(dst_idx);
|
||||
dict_stats_empty_index(dst_idx, true);
|
||||
} else {
|
||||
n_copy_el = dst_idx->n_uniq;
|
||||
}
|
||||
|
|
@ -735,6 +770,13 @@ dict_stats_copy(
|
|||
dst_idx->stat_index_size = src_idx->stat_index_size;
|
||||
|
||||
dst_idx->stat_n_leaf_pages = src_idx->stat_n_leaf_pages;
|
||||
|
||||
dst_idx->stat_defrag_modified_counter =
|
||||
src_idx->stat_defrag_modified_counter;
|
||||
dst_idx->stat_defrag_n_pages_freed =
|
||||
src_idx->stat_defrag_n_pages_freed;
|
||||
dst_idx->stat_defrag_n_page_split =
|
||||
src_idx->stat_defrag_n_page_split;
|
||||
}
|
||||
|
||||
dst->stat_initialized = TRUE;
|
||||
|
|
@ -758,6 +800,9 @@ dict_index_t::stat_n_sample_sizes[]
|
|||
dict_index_t::stat_n_non_null_key_vals[]
|
||||
dict_index_t::stat_index_size
|
||||
dict_index_t::stat_n_leaf_pages
|
||||
dict_index_t::stat_defrag_modified_counter
|
||||
dict_index_t::stat_defrag_n_pages_freed
|
||||
dict_index_t::stat_defrag_n_page_split
|
||||
The returned object should be freed with dict_stats_snapshot_free()
|
||||
when no longer needed.
|
||||
@return incomplete table object */
|
||||
|
|
@ -807,7 +852,9 @@ dict_stats_snapshot_free(
|
|||
Calculates new estimates for index statistics. This function is
|
||||
relatively quick and is used to calculate transient statistics that
|
||||
are not saved on disk. This was the only way to calculate statistics
|
||||
before the Persistent Statistics feature was introduced. */
|
||||
before the Persistent Statistics feature was introduced.
|
||||
This function doesn't update the defragmentation related stats.
|
||||
Only persistent statistics supports defragmentation stats. */
|
||||
static
|
||||
void
|
||||
dict_stats_update_transient_for_index(
|
||||
|
|
@ -823,10 +870,10 @@ dict_stats_update_transient_for_index(
|
|||
Initialize some bogus index cardinality
|
||||
statistics, so that the data can be queried in
|
||||
various means, also via secondary indexes. */
|
||||
dict_stats_empty_index(index);
|
||||
dict_stats_empty_index(index, false);
|
||||
#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
|
||||
} else if (ibuf_debug && !dict_index_is_clust(index)) {
|
||||
dict_stats_empty_index(index);
|
||||
dict_stats_empty_index(index, false);
|
||||
#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
|
||||
} else {
|
||||
mtr_t mtr;
|
||||
|
|
@ -847,7 +894,7 @@ dict_stats_update_transient_for_index(
|
|||
|
||||
switch (size) {
|
||||
case ULINT_UNDEFINED:
|
||||
dict_stats_empty_index(index);
|
||||
dict_stats_empty_index(index, false);
|
||||
return;
|
||||
case 0:
|
||||
/* The root node of the tree is a leaf */
|
||||
|
|
@ -882,7 +929,7 @@ dict_stats_update_transient(
|
|||
|
||||
if (dict_table_is_discarded(table)) {
|
||||
/* Nothing to do. */
|
||||
dict_stats_empty_table(table);
|
||||
dict_stats_empty_table(table, false);
|
||||
return;
|
||||
} else if (index == NULL) {
|
||||
/* Table definition is corrupt */
|
||||
|
|
@ -892,7 +939,7 @@ dict_stats_update_transient(
|
|||
fprintf(stderr, " InnoDB: table %s has no indexes. "
|
||||
"Cannot calculate statistics.\n",
|
||||
ut_format_name(table->name, TRUE, buf, sizeof(buf)));
|
||||
dict_stats_empty_table(table);
|
||||
dict_stats_empty_table(table, false);
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
@ -904,7 +951,7 @@ dict_stats_update_transient(
|
|||
continue;
|
||||
}
|
||||
|
||||
dict_stats_empty_index(index);
|
||||
dict_stats_empty_index(index, false);
|
||||
|
||||
if (dict_stats_should_ignore_index(index)) {
|
||||
continue;
|
||||
|
|
@ -1794,7 +1841,7 @@ dict_stats_analyze_index(
|
|||
|
||||
DEBUG_PRINTF(" %s(index=%s)\n", __func__, index->name);
|
||||
|
||||
dict_stats_empty_index(index);
|
||||
dict_stats_empty_index(index, false);
|
||||
|
||||
mtr_start(&mtr);
|
||||
|
||||
|
|
@ -2059,7 +2106,7 @@ dict_stats_update_persistent(
|
|||
|
||||
/* Table definition is corrupt */
|
||||
dict_table_stats_unlock(table, RW_X_LATCH);
|
||||
dict_stats_empty_table(table);
|
||||
dict_stats_empty_table(table, true);
|
||||
|
||||
return(DB_CORRUPTION);
|
||||
}
|
||||
|
|
@ -2088,7 +2135,7 @@ dict_stats_update_persistent(
|
|||
continue;
|
||||
}
|
||||
|
||||
dict_stats_empty_index(index);
|
||||
dict_stats_empty_index(index, false);
|
||||
|
||||
if (dict_stats_should_ignore_index(index)) {
|
||||
continue;
|
||||
|
|
@ -2657,6 +2704,16 @@ dict_stats_fetch_index_stats_step(
|
|||
== 0) {
|
||||
index->stat_n_leaf_pages = (ulint) stat_value;
|
||||
arg->stats_were_modified = true;
|
||||
} else if (stat_name_len == 12 /* strlen("n_page_split") */
|
||||
&& strncasecmp("n_page_split", stat_name, stat_name_len)
|
||||
== 0) {
|
||||
index->stat_defrag_n_page_split = (ulint) stat_value;
|
||||
arg->stats_were_modified = true;
|
||||
} else if (stat_name_len == 13 /* strlen("n_pages_freed") */
|
||||
&& strncasecmp("n_pages_freed", stat_name, stat_name_len)
|
||||
== 0) {
|
||||
index->stat_defrag_n_pages_freed = (ulint) stat_value;
|
||||
arg->stats_were_modified = true;
|
||||
} else if (stat_name_len > PFX_LEN /* e.g. stat_name=="n_diff_pfx01" */
|
||||
&& strncasecmp(PFX, stat_name, PFX_LEN) == 0) {
|
||||
|
||||
|
|
@ -2776,7 +2833,7 @@ dict_stats_fetch_from_ps(
|
|||
the persistent storage contains incomplete stats (e.g. missing stats
|
||||
for some index) then we would end up with (partially) uninitialized
|
||||
stats. */
|
||||
dict_stats_empty_table(table);
|
||||
dict_stats_empty_table(table, true);
|
||||
|
||||
trx = trx_allocate_for_background();
|
||||
|
||||
|
|
@ -2877,6 +2934,22 @@ dict_stats_fetch_from_ps(
|
|||
return(ret);
|
||||
}
|
||||
|
||||
/*********************************************************************//**
|
||||
Clear defragmentation stats modified counter for all indices in table. */
|
||||
static
|
||||
void
|
||||
dict_stats_empty_defrag_modified_counter(
|
||||
dict_table_t* table) /*!< in: table */
|
||||
{
|
||||
dict_index_t* index;
|
||||
ut_a(table);
|
||||
for (index = dict_table_get_first_index(table);
|
||||
index != NULL;
|
||||
index = dict_table_get_next_index(index)) {
|
||||
index->stat_defrag_modified_counter = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/*********************************************************************//**
|
||||
Fetches or calculates new estimates for index statistics. */
|
||||
UNIV_INTERN
|
||||
|
|
@ -2949,13 +3022,13 @@ dict_stats_update(
|
|||
"because the .ibd file is missing. For help, please "
|
||||
"refer to " REFMAN "innodb-troubleshooting.html\n",
|
||||
ut_format_name(table->name, TRUE, buf, sizeof(buf)));
|
||||
dict_stats_empty_table(table);
|
||||
dict_stats_empty_table(table, true);
|
||||
return(DB_TABLESPACE_DELETED);
|
||||
} else if (srv_force_recovery >= SRV_FORCE_NO_IBUF_MERGE) {
|
||||
/* If we have set a high innodb_force_recovery level, do
|
||||
not calculate statistics, as a badly corrupted index can
|
||||
cause a crash in it. */
|
||||
dict_stats_empty_table(table);
|
||||
dict_stats_empty_table(table, false);
|
||||
return(DB_SUCCESS);
|
||||
}
|
||||
|
||||
|
|
@ -3014,7 +3087,7 @@ dict_stats_update(
|
|||
|
||||
case DICT_STATS_EMPTY_TABLE:
|
||||
|
||||
dict_stats_empty_table(table);
|
||||
dict_stats_empty_table(table, true);
|
||||
|
||||
/* If table is using persistent stats,
|
||||
then save the stats on disk */
|
||||
|
|
@ -3073,6 +3146,7 @@ dict_stats_update(
|
|||
|
||||
t->stats_last_recalc = table->stats_last_recalc;
|
||||
t->stat_modified_counter = 0;
|
||||
dict_stats_empty_defrag_modified_counter(t);
|
||||
|
||||
switch (err) {
|
||||
case DB_SUCCESS:
|
||||
|
|
@ -3083,7 +3157,7 @@ dict_stats_update(
|
|||
copying because dict_stats_table_clone_create() does
|
||||
skip corrupted indexes so our dummy object 't' may
|
||||
have less indexes than the real object 'table'. */
|
||||
dict_stats_empty_table(table);
|
||||
dict_stats_empty_table(table, true);
|
||||
|
||||
dict_stats_copy(table, t);
|
||||
|
||||
|
|
@ -3650,6 +3724,117 @@ dict_stats_rename_table(
|
|||
return(ret);
|
||||
}
|
||||
|
||||
/*********************************************************************//**
|
||||
Save defragmentation result.
|
||||
@return DB_SUCCESS or error code */
|
||||
UNIV_INTERN
|
||||
dberr_t
|
||||
dict_stats_save_defrag_summary(
|
||||
dict_index_t* index) /*!< in: index */
|
||||
{
|
||||
dberr_t ret;
|
||||
lint now = (lint) ut_time();
|
||||
if (dict_index_is_univ(index)) {
|
||||
return DB_SUCCESS;
|
||||
}
|
||||
rw_lock_x_lock(&dict_operation_lock);
|
||||
mutex_enter(&dict_sys->mutex);
|
||||
ret = dict_stats_save_index_stat(index, now, "n_pages_freed",
|
||||
index->stat_defrag_n_pages_freed,
|
||||
NULL,
|
||||
"Number of pages freed during"
|
||||
" last defragmentation run.",
|
||||
NULL);
|
||||
|
||||
mutex_exit(&dict_sys->mutex);
|
||||
rw_lock_x_unlock(&dict_operation_lock);
|
||||
return (ret);
|
||||
}
|
||||
|
||||
/*********************************************************************//**
|
||||
Save defragmentation stats for a given index.
|
||||
@return DB_SUCCESS or error code */
|
||||
UNIV_INTERN
|
||||
dberr_t
|
||||
dict_stats_save_defrag_stats(
|
||||
dict_index_t* index) /*!< in: index */
|
||||
{
|
||||
dberr_t ret;
|
||||
|
||||
if (index->table->ibd_file_missing) {
|
||||
ut_print_timestamp(stderr);
|
||||
fprintf(stderr,
|
||||
" InnoDB: Cannot save defragment stats because "
|
||||
".ibd file is missing.\n");
|
||||
return (DB_TABLESPACE_DELETED);
|
||||
}
|
||||
if (dict_index_is_corrupted(index)) {
|
||||
ut_print_timestamp(stderr);
|
||||
fprintf(stderr,
|
||||
" InnoDB: Cannot save defragment stats because "
|
||||
"index is corrupted.\n");
|
||||
return(DB_CORRUPTION);
|
||||
}
|
||||
|
||||
if (dict_index_is_univ(index)) {
|
||||
return DB_SUCCESS;
|
||||
}
|
||||
|
||||
lint now = (lint) ut_time();
|
||||
mtr_t mtr;
|
||||
ulint n_leaf_pages;
|
||||
ulint n_leaf_reserved;
|
||||
mtr_start(&mtr);
|
||||
mtr_s_lock(dict_index_get_lock(index), &mtr);
|
||||
n_leaf_reserved = btr_get_size_and_reserved(index, BTR_N_LEAF_PAGES,
|
||||
&n_leaf_pages, &mtr);
|
||||
mtr_commit(&mtr);
|
||||
|
||||
if (n_leaf_reserved == ULINT_UNDEFINED) {
|
||||
// The index name is different during fast index creation,
|
||||
// so the stats won't be associated with the right index
|
||||
// for later use. We just return without saving.
|
||||
return DB_SUCCESS;
|
||||
}
|
||||
|
||||
rw_lock_x_lock(&dict_operation_lock);
|
||||
|
||||
mutex_enter(&dict_sys->mutex);
|
||||
ret = dict_stats_save_index_stat(index, now, "n_page_split",
|
||||
index->stat_defrag_n_page_split,
|
||||
NULL,
|
||||
"Number of new page splits on leaves"
|
||||
" since last defragmentation.",
|
||||
NULL);
|
||||
if (ret != DB_SUCCESS) {
|
||||
goto end;
|
||||
}
|
||||
|
||||
ret = dict_stats_save_index_stat(
|
||||
index, now, "n_leaf_pages_defrag",
|
||||
n_leaf_pages,
|
||||
NULL,
|
||||
"Number of leaf pages when this stat is saved to disk",
|
||||
NULL);
|
||||
if (ret != DB_SUCCESS) {
|
||||
goto end;
|
||||
}
|
||||
|
||||
ret = dict_stats_save_index_stat(
|
||||
index, now, "n_leaf_pages_reserved",
|
||||
n_leaf_reserved,
|
||||
NULL,
|
||||
"Number of pages reserved for this index leaves when this stat "
|
||||
"is saved to disk",
|
||||
NULL);
|
||||
|
||||
end:
|
||||
mutex_exit(&dict_sys->mutex);
|
||||
rw_lock_x_unlock(&dict_operation_lock);
|
||||
|
||||
return (ret);
|
||||
}
|
||||
|
||||
/* tests @{ */
|
||||
#ifdef UNIV_COMPILE_TEST_FUNCS
|
||||
|
||||
|
|
|
|||
|
|
@ -25,6 +25,7 @@ Created Apr 25, 2012 Vasil Dimov
|
|||
|
||||
#include "row0mysql.h"
|
||||
#include "srv0start.h"
|
||||
#include "dict0dict.h"
|
||||
#include "dict0stats.h"
|
||||
#include "dict0stats_bg.h"
|
||||
|
||||
|
|
@ -44,8 +45,10 @@ UNIV_INTERN os_event_t dict_stats_event = NULL;
|
|||
|
||||
/** This mutex protects the "recalc_pool" variable. */
|
||||
static ib_mutex_t recalc_pool_mutex;
|
||||
static ib_mutex_t defrag_pool_mutex;
|
||||
#ifdef HAVE_PSI_INTERFACE
|
||||
static mysql_pfs_key_t recalc_pool_mutex_key;
|
||||
static mysql_pfs_key_t defrag_pool_mutex_key;
|
||||
#endif /* HAVE_PSI_INTERFACE */
|
||||
|
||||
/** The number of tables that can be added to "recalc_pool" before
|
||||
|
|
@ -59,16 +62,26 @@ static recalc_pool_t recalc_pool;
|
|||
|
||||
typedef recalc_pool_t::iterator recalc_pool_iterator_t;
|
||||
|
||||
/** Indices whose defrag stats need to be saved to persistent storage.*/
|
||||
struct defrag_pool_item_t {
|
||||
table_id_t table_id;
|
||||
index_id_t index_id;
|
||||
};
|
||||
typedef std::vector<defrag_pool_item_t> defrag_pool_t;
|
||||
static defrag_pool_t defrag_pool;
|
||||
typedef defrag_pool_t::iterator defrag_pool_iterator_t;
|
||||
|
||||
/*****************************************************************//**
|
||||
Initialize the recalc pool, called once during thread initialization. */
|
||||
static
|
||||
void
|
||||
dict_stats_recalc_pool_init()
|
||||
dict_stats_pool_init()
|
||||
/*=========================*/
|
||||
{
|
||||
ut_ad(!srv_read_only_mode);
|
||||
|
||||
recalc_pool.reserve(RECALC_POOL_INITIAL_SLOTS);
|
||||
defrag_pool.reserve(RECALC_POOL_INITIAL_SLOTS);
|
||||
}
|
||||
|
||||
/*****************************************************************//**
|
||||
|
|
@ -76,12 +89,13 @@ Free the resources occupied by the recalc pool, called once during
|
|||
thread de-initialization. */
|
||||
static
|
||||
void
|
||||
dict_stats_recalc_pool_deinit()
|
||||
dict_stats_pool_deinit()
|
||||
/*===========================*/
|
||||
{
|
||||
ut_ad(!srv_read_only_mode);
|
||||
|
||||
recalc_pool.clear();
|
||||
defrag_pool.clear();
|
||||
}
|
||||
|
||||
/*****************************************************************//**
|
||||
|
|
@ -177,6 +191,111 @@ dict_stats_recalc_pool_del(
|
|||
mutex_exit(&recalc_pool_mutex);
|
||||
}
|
||||
|
||||
/*****************************************************************//**
|
||||
Add an index in a table to the defrag pool, which is processed by the
|
||||
background stats gathering thread. Only the table id and index id are
|
||||
added to the list, so the table can be closed after being enqueued and
|
||||
it will be opened when needed. If the table or index does not exist later
|
||||
(has been DROPped), then it will be removed from the pool and skipped. */
|
||||
UNIV_INTERN
|
||||
void
|
||||
dict_stats_defrag_pool_add(
|
||||
/*=======================*/
|
||||
const dict_index_t* index) /*!< in: table to add */
|
||||
{
|
||||
defrag_pool_item_t item;
|
||||
|
||||
ut_ad(!srv_read_only_mode);
|
||||
|
||||
mutex_enter(&defrag_pool_mutex);
|
||||
|
||||
/* quit if already in the list */
|
||||
for (defrag_pool_iterator_t iter = defrag_pool.begin();
|
||||
iter != defrag_pool.end();
|
||||
++iter) {
|
||||
if ((*iter).table_id == index->table->id
|
||||
&& (*iter).index_id == index->id) {
|
||||
mutex_exit(&defrag_pool_mutex);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
item.table_id = index->table->id;
|
||||
item.index_id = index->id;
|
||||
defrag_pool.push_back(item);
|
||||
|
||||
mutex_exit(&defrag_pool_mutex);
|
||||
|
||||
os_event_set(dict_stats_event);
|
||||
}
|
||||
|
||||
/*****************************************************************//**
|
||||
Get an index from the auto defrag pool. The returned index id is removed
|
||||
from the pool.
|
||||
@return true if the pool was non-empty and "id" was set, false otherwise */
|
||||
static
|
||||
bool
|
||||
dict_stats_defrag_pool_get(
|
||||
/*=======================*/
|
||||
table_id_t* table_id, /*!< out: table id, or unmodified if
|
||||
list is empty */
|
||||
index_id_t* index_id) /*!< out: index id, or unmodified if
|
||||
list is empty */
|
||||
{
|
||||
ut_ad(!srv_read_only_mode);
|
||||
|
||||
mutex_enter(&defrag_pool_mutex);
|
||||
|
||||
if (defrag_pool.empty()) {
|
||||
mutex_exit(&defrag_pool_mutex);
|
||||
return(false);
|
||||
}
|
||||
|
||||
defrag_pool_item_t& item = defrag_pool.back();
|
||||
*table_id = item.table_id;
|
||||
*index_id = item.index_id;
|
||||
|
||||
defrag_pool.pop_back();
|
||||
|
||||
mutex_exit(&defrag_pool_mutex);
|
||||
|
||||
return(true);
|
||||
}
|
||||
|
||||
/*****************************************************************//**
|
||||
Delete a given index from the auto defrag pool. */
|
||||
UNIV_INTERN
|
||||
void
|
||||
dict_stats_defrag_pool_del(
|
||||
/*=======================*/
|
||||
const dict_table_t* table, /*!<in: if given, remove
|
||||
all entries for the table */
|
||||
const dict_index_t* index) /*!< in: if given, remove this index */
|
||||
{
|
||||
ut_a((table && !index) || (!table && index));
|
||||
ut_ad(!srv_read_only_mode);
|
||||
ut_ad(mutex_own(&dict_sys->mutex));
|
||||
|
||||
mutex_enter(&defrag_pool_mutex);
|
||||
|
||||
defrag_pool_iterator_t iter = defrag_pool.begin();
|
||||
while (iter != defrag_pool.end()) {
|
||||
if ((table && (*iter).table_id == table->id)
|
||||
|| (index
|
||||
&& (*iter).table_id == index->table->id
|
||||
&& (*iter).index_id == index->id)) {
|
||||
/* erase() invalidates the iterator */
|
||||
iter = defrag_pool.erase(iter);
|
||||
if (index)
|
||||
break;
|
||||
} else {
|
||||
iter++;
|
||||
}
|
||||
}
|
||||
|
||||
mutex_exit(&defrag_pool_mutex);
|
||||
}
|
||||
|
||||
/*****************************************************************//**
|
||||
Wait until background stats thread has stopped using the specified table.
|
||||
The caller must have locked the data dictionary using
|
||||
|
|
@ -227,7 +346,10 @@ dict_stats_thread_init()
|
|||
mutex_create(recalc_pool_mutex_key, &recalc_pool_mutex,
|
||||
SYNC_STATS_AUTO_RECALC);
|
||||
|
||||
dict_stats_recalc_pool_init();
|
||||
/* We choose SYNC_STATS_DEFRAG to be below SYNC_FSP_PAGE. */
|
||||
mutex_create(defrag_pool_mutex_key, &defrag_pool_mutex,
|
||||
SYNC_STATS_DEFRAG);
|
||||
dict_stats_pool_init();
|
||||
}
|
||||
|
||||
/*****************************************************************//**
|
||||
|
|
@ -241,11 +363,14 @@ dict_stats_thread_deinit()
|
|||
ut_a(!srv_read_only_mode);
|
||||
ut_ad(!srv_dict_stats_thread_active);
|
||||
|
||||
dict_stats_recalc_pool_deinit();
|
||||
dict_stats_pool_deinit();
|
||||
|
||||
mutex_free(&recalc_pool_mutex);
|
||||
memset(&recalc_pool_mutex, 0x0, sizeof(recalc_pool_mutex));
|
||||
|
||||
mutex_free(&defrag_pool_mutex);
|
||||
memset(&defrag_pool_mutex, 0x0, sizeof(defrag_pool_mutex));
|
||||
|
||||
os_event_free(dict_stats_event);
|
||||
dict_stats_event = NULL;
|
||||
}
|
||||
|
|
@ -322,6 +447,63 @@ dict_stats_process_entry_from_recalc_pool()
|
|||
mutex_exit(&dict_sys->mutex);
|
||||
}
|
||||
|
||||
/*****************************************************************//**
|
||||
Get the first index that has been added for updating persistent defrag
|
||||
stats and eventually save its stats. */
|
||||
static
|
||||
void
|
||||
dict_stats_process_entry_from_defrag_pool()
|
||||
/*=======================================*/
|
||||
{
|
||||
table_id_t table_id;
|
||||
index_id_t index_id;
|
||||
|
||||
ut_ad(!srv_read_only_mode);
|
||||
|
||||
/* pop the first index from the auto defrag pool */
|
||||
if (!dict_stats_defrag_pool_get(&table_id, &index_id)) {
|
||||
/* no index in defrag pool */
|
||||
return;
|
||||
}
|
||||
|
||||
dict_table_t* table;
|
||||
|
||||
mutex_enter(&dict_sys->mutex);
|
||||
|
||||
/* If the table is no longer cached, we've already lost the in
|
||||
memory stats so there's nothing really to write to disk. */
|
||||
table = dict_table_open_on_id(table_id, TRUE,
|
||||
DICT_TABLE_OP_OPEN_ONLY_IF_CACHED);
|
||||
|
||||
if (table == NULL) {
|
||||
mutex_exit(&dict_sys->mutex);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Check whether table is corrupted */
|
||||
if (table->corrupted) {
|
||||
dict_table_close(table, TRUE, FALSE);
|
||||
mutex_exit(&dict_sys->mutex);
|
||||
return;
|
||||
}
|
||||
mutex_exit(&dict_sys->mutex);
|
||||
|
||||
dict_index_t* index = dict_table_find_index_on_id(table, index_id);
|
||||
|
||||
if (index == NULL) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* Check whether index is corrupted */
|
||||
if (dict_index_is_corrupted(index)) {
|
||||
dict_table_close(table, FALSE, FALSE);
|
||||
return;
|
||||
}
|
||||
|
||||
dict_stats_save_defrag_stats(index);
|
||||
dict_table_close(table, FALSE, FALSE);
|
||||
}
|
||||
|
||||
/*****************************************************************//**
|
||||
This is the thread for background stats gathering. It pops tables, from
|
||||
the auto recalc list and proceeds them, eventually recalculating their
|
||||
|
|
@ -354,6 +536,9 @@ DECLARE_THREAD(dict_stats_thread)(
|
|||
|
||||
dict_stats_process_entry_from_recalc_pool();
|
||||
|
||||
while (defrag_pool.size())
|
||||
dict_stats_process_entry_from_defrag_pool();
|
||||
|
||||
os_event_reset(dict_stats_event);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -58,6 +58,7 @@ this program; if not, write to the Free Software Foundation, Inc.,
|
|||
#include "buf0flu.h"
|
||||
#include "buf0dblwr.h"
|
||||
#include "btr0sea.h"
|
||||
#include "btr0defragment.h"
|
||||
#include "os0file.h"
|
||||
#include "os0thread.h"
|
||||
#include "srv0start.h"
|
||||
|
|
@ -66,7 +67,6 @@ this program; if not, write to the Free Software Foundation, Inc.,
|
|||
#include "trx0trx.h"
|
||||
|
||||
#include "trx0sys.h"
|
||||
#include "mtr0mtr.h"
|
||||
#include "rem0types.h"
|
||||
#include "row0ins.h"
|
||||
#include "row0mysql.h"
|
||||
|
|
@ -88,6 +88,7 @@ this program; if not, write to the Free Software Foundation, Inc.,
|
|||
#include "dict0stats_bg.h"
|
||||
#include "ha_prototypes.h"
|
||||
#include "ut0mem.h"
|
||||
#include "ut0timer.h"
|
||||
#include "ibuf0ibuf.h"
|
||||
#include "dict0dict.h"
|
||||
#include "srv0mon.h"
|
||||
|
|
@ -946,6 +947,14 @@ static SHOW_VAR innodb_status_variables[]= {
|
|||
{"have_bzip2",
|
||||
(char*) &innodb_have_bzip2, SHOW_BOOL},
|
||||
|
||||
/* Defragment */
|
||||
{"defragment_compression_failures",
|
||||
(char*) &export_vars.innodb_defragment_compression_failures, SHOW_LONG},
|
||||
{"defragment_failures",
|
||||
(char*) &export_vars.innodb_defragment_failures, SHOW_LONG},
|
||||
{"defragment_count",
|
||||
(char*) &export_vars.innodb_defragment_count, SHOW_LONG},
|
||||
|
||||
{NullS, NullS, SHOW_LONG}
|
||||
};
|
||||
|
||||
|
|
@ -2700,7 +2709,8 @@ ha_innobase::ha_innobase(
|
|||
(srv_force_primary_key ? HA_REQUIRE_PRIMARY_KEY : 0 ) |
|
||||
HA_CAN_FULLTEXT_EXT | HA_CAN_EXPORT),
|
||||
start_of_scan(0),
|
||||
num_write_row(0)
|
||||
num_write_row(0),
|
||||
ha_partition_stats(NULL)
|
||||
{}
|
||||
|
||||
/*********************************************************************//**
|
||||
|
|
@ -11222,6 +11232,72 @@ ha_innobase::delete_table(
|
|||
DBUG_RETURN(convert_error_code_to_mysql(err, 0, NULL));
|
||||
}
|
||||
|
||||
/*****************************************************************//**
|
||||
Defragment table.
|
||||
@return error number */
|
||||
UNIV_INTERN
|
||||
int
|
||||
ha_innobase::defragment_table(
|
||||
/*==========================*/
|
||||
const char* name, /*!< in: table name */
|
||||
const char* index_name, /*!< in: index name */
|
||||
bool async) /*!< in: whether to wait until finish */
|
||||
{
|
||||
char norm_name[FN_REFLEN];
|
||||
dict_table_t* table;
|
||||
dict_index_t* index;
|
||||
ibool one_index = (index_name != 0);
|
||||
int ret = 0;
|
||||
if (!srv_defragment) {
|
||||
return ER_FEATURE_DISABLED;
|
||||
}
|
||||
normalize_table_name(norm_name, name);
|
||||
table = dict_table_open_on_name(norm_name, FALSE,
|
||||
FALSE, DICT_ERR_IGNORE_NONE);
|
||||
for (index = dict_table_get_first_index(table); index;
|
||||
index = dict_table_get_next_index(index)) {
|
||||
if (one_index && strcasecmp(index_name, index->name) != 0)
|
||||
continue;
|
||||
if (btr_defragment_find_index(index)) {
|
||||
// We borrow this error code. When the same index is
|
||||
// already in the defragmentation queue, issue another
|
||||
// defragmentation only introduces overhead. We return
|
||||
// an error here to let the user know this is not
|
||||
// necessary. Note that this will fail a query that's
|
||||
// trying to defragment a full table if one of the
|
||||
// indicies in that table is already in defragmentation.
|
||||
// We choose this behavior so user is aware of this
|
||||
// rather than silently defragment other indicies of
|
||||
// that table.
|
||||
ret = ER_SP_ALREADY_EXISTS;
|
||||
break;
|
||||
}
|
||||
os_event_t event = btr_defragment_add_index(index, async);
|
||||
if (!async && event) {
|
||||
while(os_event_wait_time(event, 1000000)) {
|
||||
if (thd_killed(current_thd)) {
|
||||
btr_defragment_remove_index(index);
|
||||
ret = ER_QUERY_INTERRUPTED;
|
||||
break;
|
||||
}
|
||||
}
|
||||
os_event_free(event);
|
||||
}
|
||||
if (ret) {
|
||||
break;
|
||||
}
|
||||
if (one_index) {
|
||||
one_index = FALSE;
|
||||
break;
|
||||
}
|
||||
}
|
||||
dict_table_close(table, FALSE, FALSE);
|
||||
if (ret == 0 && one_index) {
|
||||
ret = ER_NO_SUCH_INDEX;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*****************************************************************//**
|
||||
Removes all tables in the named database inside InnoDB. */
|
||||
static
|
||||
|
|
@ -12389,6 +12465,27 @@ ha_innobase::optimize(
|
|||
This works OK otherwise, but MySQL locks the entire table during
|
||||
calls to OPTIMIZE, which is undesirable. */
|
||||
|
||||
if (srv_defragment) {
|
||||
int err;
|
||||
|
||||
err = defragment_table(prebuilt->table->name, NULL, false);
|
||||
|
||||
if (err == 0) {
|
||||
return (HA_ADMIN_OK);
|
||||
} else {
|
||||
push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
|
||||
err,
|
||||
"InnoDB: Cannot defragment table %s: returned error code %d\n",
|
||||
prebuilt->table->name, err);
|
||||
|
||||
if(err == ER_SP_ALREADY_EXISTS) {
|
||||
return (HA_ADMIN_OK);
|
||||
} else {
|
||||
return (HA_ADMIN_TRY_ALTER);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (innodb_optimize_fulltext_only) {
|
||||
if (prebuilt->table->fts && prebuilt->table->fts->cache
|
||||
&& !dict_table_is_discarded(prebuilt->table)) {
|
||||
|
|
@ -15190,6 +15287,13 @@ innodb_max_dirty_pages_pct_lwm_update(
|
|||
srv_max_dirty_pages_pct_lwm = in_val;
|
||||
}
|
||||
|
||||
UNIV_INTERN
|
||||
void
|
||||
ha_innobase::set_partition_owner_stats(ha_statistics *stats)
|
||||
{
|
||||
ha_partition_stats= stats;
|
||||
}
|
||||
|
||||
/************************************************************//**
|
||||
Validate the file format name and return its corresponding id.
|
||||
@return valid file format id */
|
||||
|
|
@ -16448,6 +16552,23 @@ innodb_reset_all_monitor_update(
|
|||
TRUE);
|
||||
}
|
||||
|
||||
static
|
||||
void
|
||||
innodb_defragment_frequency_update(
|
||||
/*===============================*/
|
||||
THD* thd, /*!< in: thread handle */
|
||||
struct st_mysql_sys_var* var, /*!< in: pointer to
|
||||
system variable */
|
||||
void* var_ptr,/*!< out: where the
|
||||
formal string goes */
|
||||
const void* save) /*!< in: immediate result
|
||||
from check function */
|
||||
{
|
||||
srv_defragment_frequency = (*static_cast<const uint*>(save));
|
||||
srv_defragment_interval = ut_microseconds_to_timer(
|
||||
1000000.0 / srv_defragment_frequency);
|
||||
}
|
||||
|
||||
/****************************************************************//**
|
||||
Parse and enable InnoDB monitor counters during server startup.
|
||||
User can list the monitor counters/groups to be enable by specifying
|
||||
|
|
@ -17735,6 +17856,60 @@ static MYSQL_SYSVAR_BOOL(buffer_pool_load_at_startup, srv_buffer_pool_load_at_st
|
|||
"Load the buffer pool from a file named @@innodb_buffer_pool_filename",
|
||||
NULL, NULL, FALSE);
|
||||
|
||||
static MYSQL_SYSVAR_BOOL(defragment, srv_defragment,
|
||||
PLUGIN_VAR_RQCMDARG,
|
||||
"Enable/disable InnoDB defragmentation (default FALSE). When set to FALSE, all existing "
|
||||
"defragmentation will be paused. And new defragmentation command will fail."
|
||||
"Paused defragmentation commands will resume when this variable is set to "
|
||||
"true again.",
|
||||
NULL, NULL, FALSE);
|
||||
|
||||
static MYSQL_SYSVAR_UINT(defragment_n_pages, srv_defragment_n_pages,
|
||||
PLUGIN_VAR_RQCMDARG,
|
||||
"Number of pages considered at once when merging multiple pages to "
|
||||
"defragment",
|
||||
NULL, NULL, 7, 2, 32, 0);
|
||||
|
||||
static MYSQL_SYSVAR_UINT(defragment_stats_accuracy,
|
||||
srv_defragment_stats_accuracy,
|
||||
PLUGIN_VAR_RQCMDARG,
|
||||
"How many defragment stats changes there are before the stats "
|
||||
"are written to persistent storage. Set to 0 meaning disable "
|
||||
"defragment stats tracking.",
|
||||
NULL, NULL, 0, 0, ~0U, 0);
|
||||
|
||||
static MYSQL_SYSVAR_UINT(defragment_fill_factor_n_recs,
|
||||
srv_defragment_fill_factor_n_recs,
|
||||
PLUGIN_VAR_RQCMDARG,
|
||||
"How many records of space defragmentation should leave on the page. "
|
||||
"This variable, together with innodb_defragment_fill_factor, is introduced "
|
||||
"so defragmentation won't pack the page too full and cause page split on "
|
||||
"the next insert on every page. The variable indicating more defragmentation"
|
||||
" gain is the one effective.",
|
||||
NULL, NULL, 20, 1, 100, 0);
|
||||
|
||||
static MYSQL_SYSVAR_DOUBLE(defragment_fill_factor, srv_defragment_fill_factor,
|
||||
PLUGIN_VAR_RQCMDARG,
|
||||
"A number between [0.7, 1] that tells defragmentation how full it should "
|
||||
"fill a page. Default is 0.9. Number below 0.7 won't make much sense."
|
||||
"This variable, together with innodb_defragment_fill_factor_n_recs, is "
|
||||
"introduced so defragmentation won't pack the page too full and cause "
|
||||
"page split on the next insert on every page. The variable indicating more "
|
||||
"defragmentation gain is the one effective.",
|
||||
NULL, NULL, 0.9, 0.7, 1, 0);
|
||||
|
||||
static MYSQL_SYSVAR_UINT(defragment_frequency, srv_defragment_frequency,
|
||||
PLUGIN_VAR_RQCMDARG,
|
||||
"Do not defragment a single index more than this number of time per second."
|
||||
"This controls the number of time defragmentation thread can request X_LOCK "
|
||||
"on an index. Defragmentation thread will check whether "
|
||||
"1/defragment_frequency (s) has passed since it worked on this index last "
|
||||
"time, and put the index back to the queue if not enough time has passed. "
|
||||
"The actual frequency can only be lower than this given number.",
|
||||
NULL, innodb_defragment_frequency_update,
|
||||
SRV_DEFRAGMENT_FREQUENCY_DEFAULT, 1, 1000, 0);
|
||||
|
||||
|
||||
static MYSQL_SYSVAR_ULONG(lru_scan_depth, srv_LRU_scan_depth,
|
||||
PLUGIN_VAR_RQCMDARG,
|
||||
"How deep to scan LRU to keep it clean",
|
||||
|
|
@ -18291,6 +18466,12 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
|
|||
MYSQL_SYSVAR(buffer_pool_load_now),
|
||||
MYSQL_SYSVAR(buffer_pool_load_abort),
|
||||
MYSQL_SYSVAR(buffer_pool_load_at_startup),
|
||||
MYSQL_SYSVAR(defragment),
|
||||
MYSQL_SYSVAR(defragment_n_pages),
|
||||
MYSQL_SYSVAR(defragment_stats_accuracy),
|
||||
MYSQL_SYSVAR(defragment_fill_factor),
|
||||
MYSQL_SYSVAR(defragment_fill_factor_n_recs),
|
||||
MYSQL_SYSVAR(defragment_frequency),
|
||||
MYSQL_SYSVAR(lru_scan_depth),
|
||||
MYSQL_SYSVAR(flush_neighbors),
|
||||
MYSQL_SYSVAR(checksum_algorithm),
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
/*****************************************************************************
|
||||
|
||||
Copyright (c) 2000, 2012, Oracle and/or its affiliates. All Rights Reserved.
|
||||
Copyright (c) 2013, SkySQL Ab. All Rights Reserved.
|
||||
Copyright (c) 2013, 2014, SkySQL Ab. All Rights Reserved.
|
||||
|
||||
This program is free software; you can redistribute it and/or modify it under
|
||||
the terms of the GNU General Public License as published by the Free Software
|
||||
|
|
@ -105,6 +105,8 @@ class ha_innobase: public handler
|
|||
or undefined */
|
||||
uint num_write_row; /*!< number of write_row() calls */
|
||||
|
||||
ha_statistics* ha_partition_stats; /*!< stats of the partition owner
|
||||
handler (if there is one) */
|
||||
uint store_key_val_for_row(uint keynr, char* buff, uint buff_len,
|
||||
const uchar* record);
|
||||
inline void update_thd(THD* thd);
|
||||
|
|
@ -207,6 +209,8 @@ class ha_innobase: public handler
|
|||
int truncate();
|
||||
int delete_table(const char *name);
|
||||
int rename_table(const char* from, const char* to);
|
||||
int defragment_table(const char* name, const char* index_name,
|
||||
bool async);
|
||||
int check(THD* thd, HA_CHECK_OPT* check_opt);
|
||||
char* update_table_comment(const char* comment);
|
||||
char* get_foreign_key_create_info();
|
||||
|
|
@ -310,6 +314,7 @@ class ha_innobase: public handler
|
|||
Alter_inplace_info* ha_alter_info,
|
||||
bool commit);
|
||||
/** @} */
|
||||
void set_partition_owner_stats(ha_statistics *stats);
|
||||
bool check_if_incompatible_data(HA_CREATE_INFO *info,
|
||||
uint table_changes);
|
||||
bool check_if_supported_virtual_columns(void) { return TRUE; }
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@
|
|||
|
||||
Copyright (c) 1994, 2013, Oracle and/or its affiliates. All Rights Reserved.
|
||||
Copyright (c) 2012, Facebook Inc.
|
||||
Copyright (c) 2014, SkySQL Ab. All Rights Reserved.
|
||||
|
||||
This program is free software; you can redistribute it and/or modify it under
|
||||
the terms of the GNU General Public License as published by the Free Software
|
||||
|
|
@ -674,6 +675,21 @@ btr_get_size(
|
|||
is s-latched */
|
||||
__attribute__((nonnull, warn_unused_result));
|
||||
/**************************************************************//**
|
||||
Gets the number of reserved and used pages in a B-tree.
|
||||
@return number of pages reserved, or ULINT_UNDEFINED if the index
|
||||
is unavailable */
|
||||
UNIV_INTERN
|
||||
ulint
|
||||
btr_get_size_and_reserved(
|
||||
/*======================*/
|
||||
dict_index_t* index, /*!< in: index */
|
||||
ulint flag, /*!< in: BTR_N_LEAF_PAGES or BTR_TOTAL_SIZE */
|
||||
ulint* used, /*!< out: number of pages used (<= reserved) */
|
||||
mtr_t* mtr) /*!< in/out: mini-transaction where index
|
||||
is s-latched */
|
||||
__attribute__((nonnull));
|
||||
|
||||
/**************************************************************//**
|
||||
Allocates a new file page to be used in an index tree. NOTE: we assume
|
||||
that the caller has made the reservation for free extents!
|
||||
@retval NULL if no page could be allocated
|
||||
|
|
@ -720,6 +736,33 @@ btr_page_free_low(
|
|||
ulint level, /*!< in: page level */
|
||||
mtr_t* mtr) /*!< in: mtr */
|
||||
__attribute__((nonnull));
|
||||
/*************************************************************//**
|
||||
Reorganizes an index page.
|
||||
|
||||
IMPORTANT: On success, the caller will have to update IBUF_BITMAP_FREE
|
||||
if this is a compressed leaf page in a secondary index. This has to
|
||||
be done either within the same mini-transaction, or by invoking
|
||||
ibuf_reset_free_bits() before mtr_commit(). On uncompressed pages,
|
||||
IBUF_BITMAP_FREE is unaffected by reorganization.
|
||||
|
||||
@retval true if the operation was successful
|
||||
@retval false if it is a compressed page, and recompression failed */
|
||||
UNIV_INTERN
|
||||
bool
|
||||
btr_page_reorganize_block(
|
||||
/*======================*/
|
||||
bool recovery,/*!< in: true if called in recovery:
|
||||
locks should not be updated, i.e.,
|
||||
there cannot exist locks on the
|
||||
page, and a hash index should not be
|
||||
dropped: it cannot exist */
|
||||
ulint z_level,/*!< in: compression level to be used
|
||||
if dealing with compressed page */
|
||||
buf_block_t* block, /*!< in/out: B-tree page */
|
||||
dict_index_t* index, /*!< in: the index tree of the page */
|
||||
mtr_t* mtr) /*!< in/out: mini-transaction */
|
||||
__attribute__((nonnull));
|
||||
|
||||
#ifdef UNIV_BTR_PRINT
|
||||
/*************************************************************//**
|
||||
Prints size info of a B-tree. */
|
||||
|
|
@ -765,6 +808,60 @@ btr_validate_index(
|
|||
const trx_t* trx) /*!< in: transaction or 0 */
|
||||
__attribute__((nonnull(1), warn_unused_result));
|
||||
|
||||
#ifdef UNIV_SYNC_DEBUG
|
||||
/*************************************************************//**
|
||||
Removes a page from the level list of pages.
|
||||
@param space in: space where removed
|
||||
@param zip_size in: compressed page size in bytes, or 0 for uncompressed
|
||||
@param page in/out: page to remove
|
||||
@param index in: index tree
|
||||
@param mtr in/out: mini-transaction */
|
||||
# define btr_level_list_remove(space,zip_size,page,index,mtr) \
|
||||
btr_level_list_remove_func(space,zip_size,page,index,mtr)
|
||||
#else /* UNIV_SYNC_DEBUG */
|
||||
/*************************************************************//**
|
||||
Removes a page from the level list of pages.
|
||||
@param space in: space where removed
|
||||
@param zip_size in: compressed page size in bytes, or 0 for uncompressed
|
||||
@param page in/out: page to remove
|
||||
@param index in: index tree
|
||||
@param mtr in/out: mini-transaction */
|
||||
# define btr_level_list_remove(space,zip_size,page,index,mtr) \
|
||||
btr_level_list_remove_func(space,zip_size,page,mtr)
|
||||
#endif /* UNIV_SYNC_DEBUG */
|
||||
|
||||
/*************************************************************//**
|
||||
Removes a page from the level list of pages. */
|
||||
UNIV_INTERN
|
||||
void
|
||||
btr_level_list_remove_func(
|
||||
/*=======================*/
|
||||
ulint space, /*!< in: space where removed */
|
||||
ulint zip_size,/*!< in: compressed page size in bytes
|
||||
or 0 for uncompressed pages */
|
||||
page_t* page, /*!< in/out: page to remove */
|
||||
#ifdef UNIV_SYNC_DEBUG
|
||||
const dict_index_t* index, /*!< in: index tree */
|
||||
#endif /* UNIV_SYNC_DEBUG */
|
||||
mtr_t* mtr) /*!< in/out: mini-transaction */
|
||||
__attribute__((nonnull));
|
||||
|
||||
/*************************************************************//**
|
||||
If page is the only on its level, this function moves its records to the
|
||||
father page, thus reducing the tree height.
|
||||
@return father block */
|
||||
UNIV_INTERN
|
||||
buf_block_t*
|
||||
btr_lift_page_up(
|
||||
/*=============*/
|
||||
dict_index_t* index, /*!< in: index tree */
|
||||
buf_block_t* block, /*!< in: page which is the only on its level;
|
||||
must not be empty: use
|
||||
btr_discard_only_page_on_level if the last
|
||||
record from the page should be removed */
|
||||
mtr_t* mtr) /*!< in: mtr */
|
||||
__attribute__((nonnull));
|
||||
|
||||
#define BTR_N_LEAF_PAGES 1
|
||||
#define BTR_TOTAL_SIZE 2
|
||||
#endif /* !UNIV_HOTBACKUP */
|
||||
|
|
|
|||
|
|
@ -28,7 +28,7 @@ Created 6/2/1994 Heikki Tuuri
|
|||
#include "mtr0mtr.h"
|
||||
#include "mtr0log.h"
|
||||
#include "page0zip.h"
|
||||
#include "srv0srv.h"
|
||||
|
||||
#define BTR_MAX_NODE_LEVEL 50 /*!< Maximum B-tree page level
|
||||
(not really a hard limit).
|
||||
Used in debug assertions
|
||||
|
|
@ -59,9 +59,7 @@ btr_block_get_func(
|
|||
block = buf_page_get_gen(space, zip_size, page_no, mode,
|
||||
NULL, BUF_GET, file, line, mtr);
|
||||
|
||||
SRV_CORRUPT_TABLE_CHECK(block, ; /* do nothing */);
|
||||
|
||||
if (block && mode != RW_NO_LATCH) {
|
||||
if (mode != RW_NO_LATCH) {
|
||||
|
||||
buf_block_dbg_add_level(
|
||||
block, index != NULL && dict_index_is_ibuf(index)
|
||||
|
|
@ -165,9 +163,10 @@ btr_page_get_next(
|
|||
/*!< in: mini-transaction handle */
|
||||
{
|
||||
ut_ad(page && mtr);
|
||||
#ifndef UNIV_INNOCHECKSUM
|
||||
ut_ad(mtr_memo_contains_page(mtr, page, MTR_MEMO_PAGE_X_FIX)
|
||||
|| mtr_memo_contains_page(mtr, page, MTR_MEMO_PAGE_S_FIX));
|
||||
|
||||
#endif /* UNIV_INNOCHECKSUM */
|
||||
return(mach_read_from_4(page + FIL_PAGE_NEXT));
|
||||
}
|
||||
|
||||
|
|
|
|||
100
storage/xtradb/include/btr0defragment.h
Normal file
100
storage/xtradb/include/btr0defragment.h
Normal file
|
|
@ -0,0 +1,100 @@
|
|||
/*****************************************************************************
|
||||
|
||||
Copyright (C) 2013, 2014 Facebook, Inc. All Rights Reserved.
|
||||
|
||||
This program is free software; you can redistribute it and/or modify it under
|
||||
the terms of the GNU General Public License as published by the Free Software
|
||||
Foundation; version 2 of the License.
|
||||
|
||||
This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along with
|
||||
this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
|
||||
|
||||
*****************************************************************************/
|
||||
|
||||
#ifndef btr0defragment_h
|
||||
#define btr0defragment_h
|
||||
|
||||
#include "univ.i"
|
||||
|
||||
#ifndef UNIV_HOTBACKUP
|
||||
|
||||
#include "btr0pcur.h"
|
||||
|
||||
/* Max number of pages to consider at once during defragmentation. */
|
||||
#define BTR_DEFRAGMENT_MAX_N_PAGES 32
|
||||
|
||||
/** stats in btr_defragment */
|
||||
extern ulint btr_defragment_compression_failures;
|
||||
extern ulint btr_defragment_failures;
|
||||
extern ulint btr_defragment_count;
|
||||
|
||||
/** Item in the work queue for btr_degrament_thread. */
|
||||
struct btr_defragment_item_t
|
||||
{
|
||||
btr_pcur_t* pcur; /* persistent cursor where
|
||||
btr_defragment_n_pages should start */
|
||||
os_event_t event; /* if not null, signal after work
|
||||
is done */
|
||||
bool removed; /* Mark an item as removed */
|
||||
ulonglong last_processed; /* timestamp of last time this index
|
||||
is processed by defragment thread */
|
||||
|
||||
btr_defragment_item_t(btr_pcur_t* pcur, os_event_t event);
|
||||
~btr_defragment_item_t();
|
||||
};
|
||||
|
||||
/******************************************************************//**
|
||||
Initialize defragmentation. */
|
||||
void
|
||||
btr_defragment_init(void);
|
||||
/******************************************************************//**
|
||||
Shutdown defragmentation. */
|
||||
void
|
||||
btr_defragment_shutdown();
|
||||
/******************************************************************//**
|
||||
Check whether the given index is in btr_defragment_wq. */
|
||||
bool
|
||||
btr_defragment_find_index(
|
||||
dict_index_t* index); /*!< Index to find. */
|
||||
/******************************************************************//**
|
||||
Add an index to btr_defragment_wq. Return a pointer to os_event if this
|
||||
is a synchronized defragmentation. */
|
||||
os_event_t
|
||||
btr_defragment_add_index(
|
||||
dict_index_t* index, /*!< index to be added */
|
||||
bool async); /*!< whether this is an async defragmentation */
|
||||
/******************************************************************//**
|
||||
When table is dropped, this function is called to mark a table as removed in
|
||||
btr_efragment_wq. The difference between this function and the remove_index
|
||||
function is this will not NULL the event. */
|
||||
void
|
||||
btr_defragment_remove_table(
|
||||
dict_table_t* table); /*!< Index to be removed. */
|
||||
/******************************************************************//**
|
||||
Mark an index as removed from btr_defragment_wq. */
|
||||
void
|
||||
btr_defragment_remove_index(
|
||||
dict_index_t* index); /*!< Index to be removed. */
|
||||
/*********************************************************************//**
|
||||
Check whether we should save defragmentation statistics to persistent storage.*/
|
||||
UNIV_INTERN
|
||||
void
|
||||
btr_defragment_save_defrag_stats_if_needed(
|
||||
dict_index_t* index); /*!< in: index */
|
||||
/******************************************************************//**
|
||||
Thread that merges consecutive b-tree pages into fewer pages to defragment
|
||||
the index. */
|
||||
extern "C" UNIV_INTERN
|
||||
os_thread_ret_t
|
||||
DECLARE_THREAD(btr_defragment_thread)(
|
||||
/*==========================================*/
|
||||
void* arg); /*!< in: a dummy parameter required by
|
||||
os_thread_create */
|
||||
|
||||
#endif /* !UNIV_HOTBACKUP */
|
||||
#endif
|
||||
|
|
@ -120,7 +120,9 @@ enum dict_table_op_t {
|
|||
DICT_TABLE_OP_DROP_ORPHAN,
|
||||
/** Silently load the tablespace if it does not exist,
|
||||
and do not load the definitions of incomplete indexes. */
|
||||
DICT_TABLE_OP_LOAD_TABLESPACE
|
||||
DICT_TABLE_OP_LOAD_TABLESPACE,
|
||||
/** Open the table only if it's in table cache. */
|
||||
DICT_TABLE_OP_OPEN_ONLY_IF_CACHED
|
||||
};
|
||||
|
||||
/**********************************************************************//**
|
||||
|
|
@ -1495,6 +1497,16 @@ dict_table_get_index_on_name(
|
|||
const char* name) /*!< in: name of the index to find */
|
||||
__attribute__((nonnull, warn_unused_result));
|
||||
/**********************************************************************//**
|
||||
Looks for an index with the given id given a table instance.
|
||||
@return index or NULL */
|
||||
UNIV_INTERN
|
||||
dict_index_t*
|
||||
dict_table_find_index_on_id(
|
||||
/*========================*/
|
||||
const dict_table_t* table, /*!< in: table instance */
|
||||
index_id_t id) /*!< in: index id */
|
||||
__attribute__((nonnull, warn_unused_result));
|
||||
/**********************************************************************//**
|
||||
In case there is more than one index with the same name return the index
|
||||
with the min(id).
|
||||
@return index, NULL if does not exist */
|
||||
|
|
|
|||
|
|
@ -597,6 +597,10 @@ struct zip_pad_info_t {
|
|||
rounds */
|
||||
};
|
||||
|
||||
/** Number of samples of data size kept when page compression fails for
|
||||
a certain index.*/
|
||||
#define STAT_DEFRAG_DATA_SIZE_N_SAMPLE 10
|
||||
|
||||
/** Data structure for an index. Most fields will be
|
||||
initialized to 0, NULL or FALSE in dict_mem_index_create(). */
|
||||
struct dict_index_t{
|
||||
|
|
@ -689,6 +693,23 @@ struct dict_index_t{
|
|||
/*!< approximate number of leaf pages in the
|
||||
index tree */
|
||||
/* @} */
|
||||
/** Statistics for defragmentation, these numbers are estimations and
|
||||
could be very inaccurate at certain times, e.g. right after restart,
|
||||
during defragmentation, etc. */
|
||||
/* @{ */
|
||||
ulint stat_defrag_modified_counter;
|
||||
ulint stat_defrag_n_pages_freed;
|
||||
/* number of pages freed by defragmentation. */
|
||||
ulint stat_defrag_n_page_split;
|
||||
/* number of page splits since last full index
|
||||
defragmentation. */
|
||||
ulint stat_defrag_data_size_sample[STAT_DEFRAG_DATA_SIZE_N_SAMPLE];
|
||||
/* data size when compression failure happened
|
||||
the most recent 10 times. */
|
||||
ulint stat_defrag_sample_next_slot;
|
||||
/* in which slot the next sample should be
|
||||
saved. */
|
||||
/* @} */
|
||||
prio_rw_lock_t lock; /*!< read-write lock protecting the
|
||||
upper levels of the index tree */
|
||||
trx_id_t trx_id; /*!< id of the transaction that created this
|
||||
|
|
|
|||
|
|
@ -53,8 +53,9 @@ dict_table_t*
|
|||
dict_table_open_on_id_low(
|
||||
/*=====================*/
|
||||
table_id_t table_id, /*!< in: table id */
|
||||
dict_err_ignore_t ignore_err); /*!< in: errors to ignore
|
||||
dict_err_ignore_t ignore_err, /*!< in: errors to ignore
|
||||
when loading the table */
|
||||
ibool open_only_if_in_cache);
|
||||
|
||||
#ifndef UNIV_NONINL
|
||||
#include "dict0priv.ic"
|
||||
|
|
|
|||
|
|
@ -74,8 +74,9 @@ dict_table_t*
|
|||
dict_table_open_on_id_low(
|
||||
/*======================*/
|
||||
table_id_t table_id, /*!< in: table id */
|
||||
dict_err_ignore_t ignore_err) /*!< in: errors to ignore
|
||||
dict_err_ignore_t ignore_err, /*!< in: errors to ignore
|
||||
when loading the table */
|
||||
ibool open_only_if_in_cache)
|
||||
{
|
||||
dict_table_t* table;
|
||||
ulint fold;
|
||||
|
|
@ -88,7 +89,7 @@ dict_table_open_on_id_low(
|
|||
HASH_SEARCH(id_hash, dict_sys->table_id_hash, fold,
|
||||
dict_table_t*, table, ut_ad(table->cached),
|
||||
table->id == table_id);
|
||||
if (table == NULL) {
|
||||
if (table == NULL && !open_only_if_in_cache) {
|
||||
table = dict_load_table_on_id(table_id, ignore_err);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -195,6 +195,39 @@ dict_stats_rename_table(
|
|||
is returned */
|
||||
size_t errstr_sz); /*!< in: errstr size */
|
||||
|
||||
/*********************************************************************//**
|
||||
Save defragmentation result.
|
||||
@return DB_SUCCESS or error code */
|
||||
UNIV_INTERN
|
||||
dberr_t
|
||||
dict_stats_save_defrag_summary(
|
||||
dict_index_t* index); /*!< in: index */
|
||||
|
||||
/*********************************************************************//**
|
||||
Save defragmentation stats for a given index.
|
||||
@return DB_SUCCESS or error code */
|
||||
UNIV_INTERN
|
||||
dberr_t
|
||||
dict_stats_save_defrag_stats(
|
||||
dict_index_t* index); /*!< in: index */
|
||||
|
||||
/**********************************************************************//**
|
||||
Clear defragmentation summary. */
|
||||
UNIV_INTERN
|
||||
void
|
||||
dict_stats_empty_defrag_summary(
|
||||
/*==================*/
|
||||
dict_index_t* index); /*!< in: index to clear defragmentation stats */
|
||||
|
||||
/**********************************************************************//**
|
||||
Clear defragmentation related index stats. */
|
||||
UNIV_INTERN
|
||||
void
|
||||
dict_stats_empty_defrag_stats(
|
||||
/*==================*/
|
||||
dict_index_t* index); /*!< in: index to clear defragmentation stats */
|
||||
|
||||
|
||||
#ifndef UNIV_NONINL
|
||||
#include "dict0stats.ic"
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -56,6 +56,28 @@ dict_stats_recalc_pool_del(
|
|||
/*=======================*/
|
||||
const dict_table_t* table); /*!< in: table to remove */
|
||||
|
||||
/*****************************************************************//**
|
||||
Add an index in a table to the defrag pool, which is processed by the
|
||||
background stats gathering thread. Only the table id and index id are
|
||||
added to the list, so the table can be closed after being enqueued and
|
||||
it will be opened when needed. If the table or index does not exist later
|
||||
(has been DROPped), then it will be removed from the pool and skipped. */
|
||||
UNIV_INTERN
|
||||
void
|
||||
dict_stats_defrag_pool_add(
|
||||
/*=======================*/
|
||||
const dict_index_t* index); /*!< in: table to add */
|
||||
|
||||
/*****************************************************************//**
|
||||
Delete a given index from the auto defrag pool. */
|
||||
UNIV_INTERN
|
||||
void
|
||||
dict_stats_defrag_pool_del(
|
||||
/*=======================*/
|
||||
const dict_table_t* table, /*!<in: if given, remove
|
||||
all entries for the table */
|
||||
const dict_index_t* index); /*!< in: index to remove */
|
||||
|
||||
/** Yield the data dictionary latch when waiting
|
||||
for the background thread to stop accessing a table.
|
||||
@param trx transaction holding the data dictionary locks */
|
||||
|
|
|
|||
|
|
@ -183,6 +183,16 @@ lock_update_merge_left(
|
|||
const buf_block_t* right_block); /*!< in: merged index page
|
||||
which will be discarded */
|
||||
/*************************************************************//**
|
||||
Updates the lock table when a page is splited and merged to
|
||||
two pages. */
|
||||
UNIV_INTERN
|
||||
void
|
||||
lock_update_split_and_merge(
|
||||
const buf_block_t* left_block, /*!< in: left page to which merged */
|
||||
const rec_t* orig_pred, /*!< in: original predecessor of
|
||||
supremum on the left page before merge*/
|
||||
const buf_block_t* right_block);/*!< in: right page from which merged */
|
||||
/*************************************************************//**
|
||||
Resets the original locks on heir and replaces them with gap type locks
|
||||
inherited from rec. */
|
||||
UNIV_INTERN
|
||||
|
|
|
|||
|
|
@ -397,6 +397,15 @@ extern my_bool srv_random_read_ahead;
|
|||
extern ulong srv_read_ahead_threshold;
|
||||
extern ulint srv_n_read_io_threads;
|
||||
extern ulint srv_n_write_io_threads;
|
||||
/* Defragmentation, Origianlly facebook default value is 100, but it's too high */
|
||||
#define SRV_DEFRAGMENT_FREQUENCY_DEFAULT 40
|
||||
extern my_bool srv_defragment;
|
||||
extern uint srv_defragment_n_pages;
|
||||
extern uint srv_defragment_stats_accuracy;
|
||||
extern uint srv_defragment_fill_factor_n_recs;
|
||||
extern double srv_defragment_fill_factor;
|
||||
extern uint srv_defragment_frequency;
|
||||
extern ulonglong srv_defragment_interval;
|
||||
|
||||
/* Number of IO operations per second the server can do */
|
||||
extern ulong srv_io_capacity;
|
||||
|
|
@ -1099,6 +1108,9 @@ struct export_var_t{
|
|||
ib_int64_t innodb_x_lock_os_waits;
|
||||
ib_int64_t innodb_x_lock_spin_rounds;
|
||||
ib_int64_t innodb_x_lock_spin_waits;
|
||||
ulint innodb_defragment_compression_failures;
|
||||
ulint innodb_defragment_failures;
|
||||
ulint innodb_defragment_count;
|
||||
#ifdef UNIV_DEBUG
|
||||
ulint innodb_purge_trx_id_age; /*!< rw_max_trx_id - purged trx_id */
|
||||
ulint innodb_purge_view_trx_id_age; /*!< rw_max_trx_id
|
||||
|
|
|
|||
|
|
@ -864,6 +864,7 @@ or row lock! */
|
|||
#define SYNC_EXTERN_STORAGE 500
|
||||
#define SYNC_FSP 400
|
||||
#define SYNC_FSP_PAGE 395
|
||||
#define SYNC_STATS_DEFRAG 390
|
||||
/*------------------------------------- Change buffer headers */
|
||||
#define SYNC_IBUF_MUTEX 370 /* ibuf_mutex */
|
||||
/*------------------------------------- Change buffer tree */
|
||||
|
|
|
|||
104
storage/xtradb/include/ut0timer.h
Normal file
104
storage/xtradb/include/ut0timer.h
Normal file
|
|
@ -0,0 +1,104 @@
|
|||
/*****************************************************************************
|
||||
|
||||
Copyright (c) 2013, 2014, Facebook, Inc. All Rights Reserved.
|
||||
Copyright (c) 2014, SkySQL Ab. All Rights Reserved.
|
||||
|
||||
This program is free software; you can redistribute it and/or modify it under
|
||||
the terms of the GNU General Public License as published by the Free Software
|
||||
Foundation; version 2 of the License.
|
||||
|
||||
This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along with
|
||||
this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
|
||||
|
||||
*****************************************************************************/
|
||||
|
||||
/********************************************************************//**
|
||||
@file include/ut0timer.h
|
||||
Timer rountines
|
||||
|
||||
Created 30/07/2014 Jan Lindström jan.lindstrom@skysql.com
|
||||
modified from https://github.com/facebook/mysql-5.6/commit/c75a413edeb96eb99bf11d7269bdfea06f96d6b6
|
||||
*************************************************************************/
|
||||
#ifndef ut0timer_h
|
||||
#define ut0timer_h
|
||||
|
||||
#include "univ.i"
|
||||
#include "data0type.h"
|
||||
#include <my_rdtsc.h>
|
||||
|
||||
/* Current timer stats */
|
||||
extern struct my_timer_unit_info ut_timer;
|
||||
|
||||
/**************************************************************//**
|
||||
Function pointer to point selected timer function.
|
||||
@return timer current value */
|
||||
extern ulonglong (*ut_timer_now)(void);
|
||||
|
||||
/**************************************************************//**
|
||||
Sets up the data required for use of my_timer_* functions.
|
||||
Selects the best timer by high frequency, and tight resolution.
|
||||
Points my_timer_now() to the selected timer function.
|
||||
Initializes my_timer struct to contain the info for selected timer.*/
|
||||
UNIV_INTERN
|
||||
void ut_init_timer(void);
|
||||
|
||||
/**************************************************************//**
|
||||
Return time passed since time then, automatically adjusted
|
||||
for the estimated timer overhead.
|
||||
@return time passed since "then" */
|
||||
UNIV_INLINE
|
||||
ulonglong
|
||||
ut_timer_since(
|
||||
/*===========*/
|
||||
ulonglong then); /*!< in: time where to calculate */
|
||||
/**************************************************************//**
|
||||
Get time passed since "then", and update then to now
|
||||
@return time passed sinche "then" */
|
||||
UNIV_INLINE
|
||||
ulonglong
|
||||
ut_timer_since_and_update(
|
||||
/*======================*/
|
||||
ulonglong *then); /*!< in: time where to calculate */
|
||||
/**************************************************************//**
|
||||
Convert native timer units in a ulonglong into seconds in a double
|
||||
@return time in a seconds */
|
||||
UNIV_INLINE
|
||||
double
|
||||
ut_timer_to_seconds(
|
||||
/*=================*/
|
||||
ulonglong when); /*!< in: time where to calculate */
|
||||
/**************************************************************//**
|
||||
Convert native timer units in a ulonglong into milliseconds in a double
|
||||
@return time in milliseconds */
|
||||
UNIV_INLINE
|
||||
double
|
||||
ut_timer_to_milliseconds(
|
||||
/*=====================*/
|
||||
ulonglong when); /*!< in: time where to calculate */
|
||||
/**************************************************************//**
|
||||
Convert native timer units in a ulonglong into microseconds in a double
|
||||
@return time in microseconds */
|
||||
UNIV_INLINE
|
||||
double
|
||||
ut_timer_to_microseconds(
|
||||
/*=====================*/
|
||||
ulonglong when); /*!< in: time where to calculate */
|
||||
/**************************************************************//**
|
||||
Convert microseconds in a double to native timer units in a ulonglong
|
||||
@return time in microseconds */
|
||||
UNIV_INLINE
|
||||
ulonglong
|
||||
ut_microseconds_to_timer(
|
||||
/*=====================*/
|
||||
ulonglong when); /*!< in: time where to calculate */
|
||||
|
||||
#ifndef UNIV_NONINL
|
||||
#include "ut0timer.ic"
|
||||
#endif
|
||||
|
||||
#endif
|
||||
113
storage/xtradb/include/ut0timer.ic
Normal file
113
storage/xtradb/include/ut0timer.ic
Normal file
|
|
@ -0,0 +1,113 @@
|
|||
/*****************************************************************************
|
||||
|
||||
Copyright (c) 2013, 2014, Facebook, Inc. All Rights Reserved.
|
||||
Copyright (c) 2014, SkySQL Ab. All Rights Reserved.
|
||||
|
||||
This program is free software; you can redistribute it and/or modify it under
|
||||
the terms of the GNU General Public License as published by the Free Software
|
||||
Foundation; version 2 of the License.
|
||||
|
||||
This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along with
|
||||
this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
|
||||
|
||||
*****************************************************************************/
|
||||
|
||||
/********************************************************************//**
|
||||
@file include/ut0timer.ic
|
||||
Timer rountines
|
||||
|
||||
Created 30/07/2014 Jan Lindström jan.lindstrom@skysql.com
|
||||
modified from https://github.com/facebook/mysql-5.6/commit/c75a413edeb96eb99bf11d7269bdfea06f96d6b6
|
||||
*************************************************************************/
|
||||
|
||||
/**************************************************************//**
|
||||
Return time passed since time then, automatically adjusted
|
||||
for the estimated timer overhead.
|
||||
@return time passed since "then" */
|
||||
UNIV_INLINE
|
||||
ulonglong
|
||||
ut_timer_since(
|
||||
/*===========*/
|
||||
ulonglong then) /*!< in: time where to calculate */
|
||||
{
|
||||
return (ut_timer_now() - then) - ut_timer.overhead;
|
||||
}
|
||||
|
||||
/**************************************************************//**
|
||||
Get time passed since "then", and update then to now
|
||||
@return time passed sinche "then" */
|
||||
UNIV_INLINE
|
||||
ulonglong
|
||||
ut_timer_since_and_update(
|
||||
/*======================*/
|
||||
ulonglong *then) /*!< in: time where to calculate */
|
||||
{
|
||||
ulonglong now = ut_timer_now();
|
||||
ulonglong ret = (now - (*then)) - ut_timer.overhead;
|
||||
*then = now;
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**************************************************************//**
|
||||
Convert native timer units in a ulonglong into seconds in a double
|
||||
@return time in a seconds */
|
||||
UNIV_INLINE
|
||||
double
|
||||
ut_timer_to_seconds(
|
||||
/*=================*/
|
||||
ulonglong when) /*!< in: time where to calculate */
|
||||
{
|
||||
double ret = (double)(when);
|
||||
ret /= (double)(ut_timer.frequency);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**************************************************************//**
|
||||
Convert native timer units in a ulonglong into milliseconds in a double
|
||||
@return time in milliseconds */
|
||||
UNIV_INLINE
|
||||
double
|
||||
ut_timer_to_milliseconds(
|
||||
/*=====================*/
|
||||
ulonglong when) /*!< in: time where to calculate */
|
||||
{
|
||||
double ret = (double)(when);
|
||||
ret *= 1000.0;
|
||||
ret /= (double)(ut_timer.frequency);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**************************************************************//**
|
||||
Convert native timer units in a ulonglong into microseconds in a double
|
||||
@return time in microseconds */
|
||||
UNIV_INLINE
|
||||
double
|
||||
ut_timer_to_microseconds(
|
||||
/*=====================*/
|
||||
ulonglong when) /*!< in: time where to calculate */
|
||||
{
|
||||
double ret = (double)(when);
|
||||
ret *= 1000000.0;
|
||||
ret /= (double)(ut_timer.frequency);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**************************************************************//**
|
||||
Convert microseconds in a double to native timer units in a ulonglong
|
||||
@return time in microseconds */
|
||||
UNIV_INLINE
|
||||
ulonglong
|
||||
ut_microseconds_to_timer(
|
||||
/*=====================*/
|
||||
ulonglong when) /*!< in: time where to calculate */
|
||||
{
|
||||
double ret = when;
|
||||
ret *= (double)(ut_timer.frequency);
|
||||
ret /= 1000000.0;
|
||||
return (ulonglong)ret;
|
||||
}
|
||||
|
|
@ -3290,6 +3290,47 @@ lock_update_merge_left(
|
|||
lock_mutex_exit();
|
||||
}
|
||||
|
||||
/*************************************************************//**
|
||||
Updates the lock table when a page is split and merged to
|
||||
two pages. */
|
||||
UNIV_INTERN
|
||||
void
|
||||
lock_update_split_and_merge(
|
||||
const buf_block_t* left_block, /*!< in: left page to which merged */
|
||||
const rec_t* orig_pred, /*!< in: original predecessor of
|
||||
supremum on the left page before merge*/
|
||||
const buf_block_t* right_block) /*!< in: right page from which merged */
|
||||
{
|
||||
const rec_t* left_next_rec;
|
||||
|
||||
ut_a(left_block && right_block);
|
||||
ut_a(orig_pred);
|
||||
|
||||
lock_mutex_enter();
|
||||
|
||||
left_next_rec = page_rec_get_next_const(orig_pred);
|
||||
|
||||
/* Inherit the locks on the supremum of the left page to the
|
||||
first record which was moved from the right page */
|
||||
lock_rec_inherit_to_gap(
|
||||
left_block, left_block,
|
||||
page_rec_get_heap_no(left_next_rec),
|
||||
PAGE_HEAP_NO_SUPREMUM);
|
||||
|
||||
/* Reset the locks on the supremum of the left page,
|
||||
releasing waiting transactions */
|
||||
lock_rec_reset_and_release_wait(left_block,
|
||||
PAGE_HEAP_NO_SUPREMUM);
|
||||
|
||||
/* Inherit the locks to the supremum of the left page from the
|
||||
successor of the infimum on the right page */
|
||||
lock_rec_inherit_to_gap(left_block, right_block,
|
||||
PAGE_HEAP_NO_SUPREMUM,
|
||||
lock_get_min_heap_no(right_block));
|
||||
|
||||
lock_mutex_exit();
|
||||
}
|
||||
|
||||
/*************************************************************//**
|
||||
Resets the original locks on heir and replaces them with gap type locks
|
||||
inherited from rec. */
|
||||
|
|
|
|||
|
|
@ -1349,6 +1349,21 @@ page_cur_insert_rec_zip(
|
|||
return(insert_rec);
|
||||
}
|
||||
|
||||
/* Page compress failed. If this happened on a
|
||||
leaf page, put the data size into the sample
|
||||
buffer. */
|
||||
if (page_is_leaf(page)) {
|
||||
ulint occupied = page_get_data_size(page)
|
||||
+ page_dir_calc_reserved_space(
|
||||
page_get_n_recs(page));
|
||||
index->stat_defrag_data_size_sample[
|
||||
index->stat_defrag_sample_next_slot] =
|
||||
occupied;
|
||||
index->stat_defrag_sample_next_slot =
|
||||
(index->stat_defrag_sample_next_slot
|
||||
+ 1) % STAT_DEFRAG_DATA_SIZE_N_SAMPLE;
|
||||
}
|
||||
|
||||
ut_ad(cursor->rec
|
||||
== (pos > 1
|
||||
? page_rec_get_nth(
|
||||
|
|
|
|||
|
|
@ -53,6 +53,7 @@ Created 9/17/2000 Heikki Tuuri
|
|||
#include "rem0cmp.h"
|
||||
#include "log0log.h"
|
||||
#include "btr0sea.h"
|
||||
#include "btr0defragment.h"
|
||||
#include "fil0fil.h"
|
||||
#include "ibuf0ibuf.h"
|
||||
#include "fts0fts.h"
|
||||
|
|
@ -3857,6 +3858,8 @@ row_drop_table_for_mysql(
|
|||
if (!dict_table_is_temporary(table)) {
|
||||
|
||||
dict_stats_recalc_pool_del(table);
|
||||
dict_stats_defrag_pool_del(table, NULL);
|
||||
btr_defragment_remove_table(table);
|
||||
|
||||
/* Remove stats for this table and all of its indexes from the
|
||||
persistent storage if it exists and if there are stats for this
|
||||
|
|
|
|||
|
|
@ -70,10 +70,11 @@ Created 10/8/1995 Heikki Tuuri
|
|||
#include "srv0mon.h"
|
||||
#include "ut0crc32.h"
|
||||
#include "os0file.h"
|
||||
|
||||
#include "btr0defragment.h"
|
||||
#include "mysql/plugin.h"
|
||||
#include "mysql/service_thd_wait.h"
|
||||
#include "fil0pagecompress.h"
|
||||
#include <my_rdtsc.h>
|
||||
|
||||
/* prototypes of new functions added to ha_innodb.cc for kill_idle_transaction */
|
||||
ibool innobase_thd_is_idle(const void* thd);
|
||||
|
|
@ -280,6 +281,16 @@ UNIV_INTERN ulint srv_buf_pool_curr_size = 0;
|
|||
UNIV_INTERN ulint srv_mem_pool_size = ULINT_MAX;
|
||||
UNIV_INTERN ulint srv_lock_table_size = ULINT_MAX;
|
||||
|
||||
/* Defragmentation */
|
||||
UNIV_INTERN my_bool srv_defragment = FALSE;
|
||||
UNIV_INTERN uint srv_defragment_n_pages = 7;
|
||||
UNIV_INTERN uint srv_defragment_stats_accuracy = 0;
|
||||
UNIV_INTERN uint srv_defragment_fill_factor_n_recs = 20;
|
||||
UNIV_INTERN double srv_defragment_fill_factor = 0.9;
|
||||
UNIV_INTERN uint srv_defragment_frequency =
|
||||
SRV_DEFRAGMENT_FREQUENCY_DEFAULT;
|
||||
UNIV_INTERN ulonglong srv_defragment_interval = 0;
|
||||
|
||||
/** Query thread preflush algorithm */
|
||||
UNIV_INTERN ulong srv_foreground_preflush
|
||||
= SRV_FOREGROUND_PREFLUSH_EXP_BACKOFF;
|
||||
|
|
@ -1876,6 +1887,11 @@ srv_export_innodb_status(void)
|
|||
export_vars.innodb_page_compressed_trim_op_saved = srv_stats.page_compressed_trim_op_saved;
|
||||
export_vars.innodb_pages_page_decompressed = srv_stats.pages_page_decompressed;
|
||||
|
||||
export_vars.innodb_defragment_compression_failures =
|
||||
btr_defragment_compression_failures;
|
||||
export_vars.innodb_defragment_failures = btr_defragment_failures;
|
||||
export_vars.innodb_defragment_count = btr_defragment_count;
|
||||
|
||||
#ifdef UNIV_DEBUG
|
||||
rw_lock_s_lock(&purge_sys->latch);
|
||||
trx_id_t done_trx_no = purge_sys->done.trx_no;
|
||||
|
|
|
|||
|
|
@ -69,6 +69,8 @@ Created 2/16/1996 Heikki Tuuri
|
|||
#include "srv0start.h"
|
||||
#include "srv0srv.h"
|
||||
#include "buf0flu.h"
|
||||
#include "btr0defragment.h"
|
||||
#include "ut0timer.h"
|
||||
|
||||
#ifndef UNIV_HOTBACKUP
|
||||
# include "trx0rseg.h"
|
||||
|
|
@ -1575,6 +1577,9 @@ innobase_start_or_create_for_mysql(void)
|
|||
char* logfile0 = NULL;
|
||||
size_t dirnamelen;
|
||||
|
||||
/* This should be initialized early */
|
||||
ut_init_timer();
|
||||
|
||||
if (srv_force_recovery > SRV_FORCE_NO_TRX_UNDO) {
|
||||
srv_read_only_mode = true;
|
||||
}
|
||||
|
|
@ -2960,6 +2965,9 @@ files_checked:
|
|||
fts_optimize_init();
|
||||
}
|
||||
|
||||
/* Initialize online defragmentation. */
|
||||
btr_defragment_init();
|
||||
|
||||
srv_was_started = TRUE;
|
||||
|
||||
return(DB_SUCCESS);
|
||||
|
|
|
|||
|
|
@ -1272,6 +1272,7 @@ sync_thread_add_level(
|
|||
case SYNC_IBUF_MUTEX:
|
||||
case SYNC_INDEX_ONLINE_LOG:
|
||||
case SYNC_STATS_AUTO_RECALC:
|
||||
case SYNC_STATS_DEFRAG:
|
||||
if (!sync_thread_levels_g(array, level, TRUE)) {
|
||||
fprintf(stderr,
|
||||
"InnoDB: sync_thread_levels_g(array, %lu)"
|
||||
|
|
|
|||
92
storage/xtradb/ut/ut0timer.cc
Normal file
92
storage/xtradb/ut/ut0timer.cc
Normal file
|
|
@ -0,0 +1,92 @@
|
|||
/*****************************************************************************
|
||||
|
||||
Copyright (c) 2013, 2014, Facebook, Inc. All Rights Reserved.
|
||||
Copyright (c) 2014, SkySQL Ab. All Rights Reserved.
|
||||
|
||||
This program is free software; you can redistribute it and/or modify it under
|
||||
the terms of the GNU General Public License as published by the Free Software
|
||||
Foundation; version 2 of the License.
|
||||
|
||||
This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along with
|
||||
this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
|
||||
|
||||
*****************************************************************************/
|
||||
|
||||
/********************************************************************//**
|
||||
@file ut/ut0timer.cc
|
||||
Timer rountines
|
||||
|
||||
Created 30/07/2014 Jan Lindström jan.lindstrom@skysql.com
|
||||
modified from https://github.com/facebook/mysql-5.6/commit/c75a413edeb96eb99bf11d7269bdfea06f96d6b6
|
||||
*************************************************************************/
|
||||
|
||||
#include "data0type.h"
|
||||
#include <my_rdtsc.h>
|
||||
#include <ut0timer.h>
|
||||
|
||||
/**************************************************************//**
|
||||
Initial timer definition
|
||||
@return 0 */
|
||||
static
|
||||
ulonglong
|
||||
ut_timer_none(void)
|
||||
/*===============*/
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**************************************************************//**
|
||||
Function pointer to point selected timer function.
|
||||
@return timer current value */
|
||||
ulonglong (*ut_timer_now)(void) = &ut_timer_none;
|
||||
|
||||
struct my_timer_unit_info ut_timer;
|
||||
|
||||
/**************************************************************//**
|
||||
Sets up the data required for use of my_timer_* functions.
|
||||
Selects the best timer by high frequency, and tight resolution.
|
||||
Points my_timer_now() to the selected timer function.
|
||||
Initializes my_timer struct to contain the info for selected timer.*/
|
||||
UNIV_INTERN
|
||||
void
|
||||
ut_init_timer(void)
|
||||
/*===============*/
|
||||
{
|
||||
MY_TIMER_INFO all_timer_info;
|
||||
my_timer_init(&all_timer_info);
|
||||
|
||||
if (all_timer_info.cycles.frequency > 1000000 &&
|
||||
all_timer_info.cycles.resolution == 1) {
|
||||
ut_timer = all_timer_info.cycles;
|
||||
ut_timer_now = &my_timer_cycles;
|
||||
} else if (all_timer_info.nanoseconds.frequency > 1000000 &&
|
||||
all_timer_info.nanoseconds.resolution == 1) {
|
||||
ut_timer = all_timer_info.nanoseconds;
|
||||
ut_timer_now = &my_timer_nanoseconds;
|
||||
} else if (all_timer_info.microseconds.frequency >= 1000000 &&
|
||||
all_timer_info.microseconds.resolution == 1) {
|
||||
ut_timer = all_timer_info.microseconds;
|
||||
ut_timer_now = &my_timer_microseconds;
|
||||
|
||||
} else if (all_timer_info.milliseconds.frequency >= 1000 &&
|
||||
all_timer_info.milliseconds.resolution == 1) {
|
||||
ut_timer = all_timer_info.milliseconds;
|
||||
ut_timer_now = &my_timer_milliseconds;
|
||||
} else if (all_timer_info.ticks.frequency >= 1000 &&
|
||||
/* Will probably be false */
|
||||
all_timer_info.ticks.resolution == 1) {
|
||||
ut_timer = all_timer_info.ticks;
|
||||
ut_timer_now = &my_timer_ticks;
|
||||
} else {
|
||||
/* None are acceptable, so leave it as "None", and fill in struct */
|
||||
ut_timer.frequency = 1; /* Avoid div-by-zero */
|
||||
ut_timer.overhead = 0; /* Since it doesn't do anything */
|
||||
ut_timer.resolution = 10; /* Another sign it's bad */
|
||||
ut_timer.routine = 0; /* None */
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue