mirror of
https://github.com/MariaDB/server.git
synced 2025-01-29 02:05:57 +01:00
Merge branch '10.0' of github.com:MariaDB/server into 10.0
This commit is contained in:
commit
23af6f5942
162 changed files with 4967 additions and 4289 deletions
1
CREDITS
1
CREDITS
|
@ -10,6 +10,7 @@ Visma http://visma.com (2015 - 2016)
|
|||
Acronis http://acronis.com (2016)
|
||||
Nexedi https://www.nexedi.com (2016)
|
||||
Automattic https://automattic.com (2014 - 2016)
|
||||
Tencent Game DBA http://tencentdba.com/about (2016)
|
||||
Verkkokauppa.com https://www.verkkokauppa.com (2015 - 2016)
|
||||
Virtuozzo https://virtuozzo.com (2016)
|
||||
|
||||
|
|
2
VERSION
2
VERSION
|
@ -1,3 +1,3 @@
|
|||
MYSQL_VERSION_MAJOR=10
|
||||
MYSQL_VERSION_MINOR=0
|
||||
MYSQL_VERSION_PATCH=27
|
||||
MYSQL_VERSION_PATCH=28
|
||||
|
|
|
@ -220,6 +220,9 @@ SETA(CPACK_RPM_test_PACKAGE_PROVIDES
|
|||
"perl(mtr_io.pl)"
|
||||
"perl(mtr_match)"
|
||||
"perl(mtr_misc.pl)"
|
||||
"perl(mtr_gcov.pl)"
|
||||
"perl(mtr_gprof.pl)"
|
||||
"perl(mtr_process.pl)"
|
||||
"perl(mtr_report)"
|
||||
"perl(mtr_results)"
|
||||
"perl(mtr_unique)")
|
||||
|
|
|
@ -882,8 +882,7 @@ typedef long long my_ptrdiff_t;
|
|||
and related routines are refactored.
|
||||
*/
|
||||
|
||||
#define my_offsetof(TYPE, MEMBER) \
|
||||
((size_t)((char *)&(((TYPE *)0x10)->MEMBER) - (char*)0x10))
|
||||
#define my_offsetof(TYPE, MEMBER) PTR_BYTE_DIFF(&((TYPE *)0x10)->MEMBER, 0x10)
|
||||
|
||||
#define NullS (char *) 0
|
||||
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/* Copyright (c) 2000, 2013, Oracle and/or its affiliates.
|
||||
Copyright (c) 2010, 2013, Monty Program Ab.
|
||||
Copyright (c) 2010, 2016, Monty Program Ab.
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -271,7 +271,7 @@ extern my_bool my_use_symdir;
|
|||
extern ulong my_default_record_cache_size;
|
||||
extern my_bool my_disable_locking, my_disable_async_io,
|
||||
my_disable_flush_key_blocks, my_disable_symlinks;
|
||||
extern my_bool my_disable_sync;
|
||||
extern my_bool my_disable_sync, my_disable_copystat_in_redel;
|
||||
extern char wild_many,wild_one,wild_prefix;
|
||||
extern const char *charsets_dir;
|
||||
extern my_bool timed_mutexes;
|
||||
|
|
|
@ -52,7 +52,7 @@ eval SELECT 'hello' INTO OUTFILE 'fake_file.$prefix';
|
|||
|
||||
# Use '/' instead of '\' in the error message. On windows platform, dir is
|
||||
# formed with '\'.
|
||||
--replace_regex /\\testing_1\\*/\/testing_1\// /66/39/ /17/39/ /File exists/Directory not empty/
|
||||
--replace_regex /\\testing_1\\*/\/testing_1\// /66/39/ /17/39/ /247/39/ /File exists/Directory not empty/
|
||||
--error 1010
|
||||
DROP DATABASE testing_1;
|
||||
let $wait_binlog_event= DROP TABLE IF EXIST;
|
||||
|
|
|
@ -341,6 +341,7 @@ while ($1)
|
|||
alter table t1 add index i2(key2);
|
||||
alter table t1 add index i3(key3);
|
||||
update t1 set key2=key1,key3=key1;
|
||||
analyze table t1;
|
||||
|
||||
# to test the bug, the following must use "sort_union":
|
||||
--replace_column 9 REF
|
||||
|
|
|
@ -261,11 +261,7 @@ sub show {
|
|||
# On Windows, rely on cdb to be there...
|
||||
if (IS_WINDOWS)
|
||||
{
|
||||
# Starting cdb is unsafe when used with --parallel > 1 option
|
||||
if ( $parallel < 2 )
|
||||
{
|
||||
_cdb($core_name);
|
||||
}
|
||||
_cdb($core_name);
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
|
@ -60,8 +60,6 @@ use My::Test;
|
|||
use My::Find;
|
||||
use My::Suite;
|
||||
|
||||
require "mtr_misc.pl";
|
||||
|
||||
# locate plugin suites, depending on whether it's a build tree or installed
|
||||
my @plugin_suitedirs;
|
||||
my $plugin_suitedir_regex;
|
||||
|
@ -1122,7 +1120,7 @@ sub get_tags_from_file($$) {
|
|||
$file_to_tags{$file}= $tags;
|
||||
$file_to_master_opts{$file}= $master_opts;
|
||||
$file_to_slave_opts{$file}= $slave_opts;
|
||||
$file_combinations{$file}= [ uniq(@combinations) ];
|
||||
$file_combinations{$file}= [ ::uniq(@combinations) ];
|
||||
$file_in_overlay{$file} = 1 if $in_overlay;
|
||||
return @{$tags};
|
||||
}
|
||||
|
|
|
@ -34,7 +34,6 @@ use mtr_match;
|
|||
use My::Platform;
|
||||
use POSIX qw[ _exit ];
|
||||
use IO::Handle qw[ flush ];
|
||||
require "mtr_io.pl";
|
||||
use mtr_results;
|
||||
|
||||
my $tot_real_time= 0;
|
||||
|
@ -92,7 +91,7 @@ sub mtr_report_test_passed ($) {
|
|||
my $timer_str= "";
|
||||
if ( $timer and -f "$::opt_vardir/log/timer" )
|
||||
{
|
||||
$timer_str= mtr_fromfile("$::opt_vardir/log/timer");
|
||||
$timer_str= ::mtr_fromfile("$::opt_vardir/log/timer");
|
||||
$tinfo->{timer}= $timer_str;
|
||||
resfile_test_info('duration', $timer_str) if $::opt_resfile;
|
||||
}
|
||||
|
|
|
@ -102,11 +102,11 @@ use mtr_results;
|
|||
use IO::Socket::INET;
|
||||
use IO::Select;
|
||||
|
||||
require "lib/mtr_process.pl";
|
||||
require "lib/mtr_io.pl";
|
||||
require "lib/mtr_gcov.pl";
|
||||
require "lib/mtr_gprof.pl";
|
||||
require "lib/mtr_misc.pl";
|
||||
require "mtr_process.pl";
|
||||
require "mtr_io.pl";
|
||||
require "mtr_gcov.pl";
|
||||
require "mtr_gprof.pl";
|
||||
require "mtr_misc.pl";
|
||||
|
||||
$SIG{INT}= sub { mtr_error("Got ^C signal"); };
|
||||
$SIG{HUP}= sub { mtr_error("Hangup detected on controlling terminal"); };
|
||||
|
|
|
@ -9,6 +9,7 @@ Acronis http://www.acronis.com Silver Sponsor of the MariaDB Foundation
|
|||
Auttomattic https://automattic.com Bronze Sponsor of the MariaDB Foundation
|
||||
Verkkokauppa.com https://virtuozzo.com Bronze Sponsor of the MariaDB Foundation
|
||||
Virtuozzo https://virtuozzo.com/ Bronze Sponsor of the MariaDB Foundation
|
||||
Tencent Game DBA http://tencentdba.com/about/ Bronze Sponsor of the MariaDB Foundation
|
||||
Google USA Sponsoring encryption, parallel replication and GTID
|
||||
Facebook USA Sponsoring non-blocking API, LIMIT ROWS EXAMINED etc
|
||||
Ronald Bradford Brisbane, Australia EFF contribution for UC2006 Auction
|
||||
|
|
|
@ -1658,6 +1658,9 @@ CHAR_LENGTH(TRIM(BOTH 0x61 FROM _utf32 0x00000061))
|
|||
SELECT CHAR_LENGTH(TRIM(BOTH 0x00 FROM _utf32 0x00000061));
|
||||
CHAR_LENGTH(TRIM(BOTH 0x00 FROM _utf32 0x00000061))
|
||||
1
|
||||
select hex(lower(cast(0xffff0000 as char character set utf32))) as c;
|
||||
c
|
||||
FFFF0000
|
||||
#
|
||||
# End of 5.5 tests
|
||||
#
|
||||
|
|
|
@ -286,3 +286,19 @@ F 28 28
|
|||
F 29 29
|
||||
F 30 30
|
||||
DROP TABLE t0,t1,t2;
|
||||
#
|
||||
# MDEV-MariaDB daemon leaks memory with specific query
|
||||
#
|
||||
CREATE TABLE t1 (`voter_id` int(11) unsigned NOT NULL,
|
||||
`language_id` int(11) unsigned NOT NULL DEFAULT '1'
|
||||
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
|
||||
CREATE TABLE t2 (`voter_id` int(10) unsigned NOT NULL DEFAULT '0',
|
||||
`serialized_c` mediumblob) ENGINE=InnoDB DEFAULT CHARSET=utf8;
|
||||
insert into t2 values (1,repeat("a",1000)),(2,repeat("a",1000)),(3,repeat("b",1000)),(4,repeat("c",1000)),(4,repeat("b",1000));
|
||||
SELECT GROUP_CONCAT(t1.language_id SEPARATOR ',') AS `translation_resources`, `d`.`serialized_c` FROM t2 AS `d` LEFT JOIN t1 ON `d`.`voter_id` = t1.`voter_id` GROUP BY `d`.`voter_id` ORDER BY 10-d.voter_id+RAND()*0;
|
||||
translation_resources serialized_c
|
||||
NULL cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc
|
||||
NULL bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb
|
||||
NULL aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
|
||||
NULL aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
|
||||
drop table t1,t2;
|
||||
|
|
|
@ -311,6 +311,9 @@ set @d=@d*2;
|
|||
alter table t1 add index i2(key2);
|
||||
alter table t1 add index i3(key3);
|
||||
update t1 set key2=key1,key3=key1;
|
||||
analyze table t1;
|
||||
Table Op Msg_type Msg_text
|
||||
test.t1 analyze status OK
|
||||
explain select * from t1 where (key3 > 30 and key3<35) or (key2 >32 and key2 < 40);
|
||||
id select_type table type possible_keys key key_len ref rows Extra
|
||||
1 SIMPLE t1 index_merge i2,i3 i3,i2 4,4 NULL REF Using sort_union(i3,i2); Using where
|
||||
|
|
|
@ -1146,6 +1146,9 @@ set @d=@d*2;
|
|||
alter table t1 add index i2(key2);
|
||||
alter table t1 add index i3(key3);
|
||||
update t1 set key2=key1,key3=key1;
|
||||
analyze table t1;
|
||||
Table Op Msg_type Msg_text
|
||||
test.t1 analyze status OK
|
||||
explain select * from t1 where (key3 > 30 and key3<35) or (key2 >32 and key2 < 40);
|
||||
id select_type table type possible_keys key key_len ref rows Extra
|
||||
1 SIMPLE t1 index_merge i2,i3 i3,i2 4,4 NULL REF Using sort_union(i3,i2); Using where
|
||||
|
|
|
@ -3832,6 +3832,23 @@ test.m1 repair error Corrupt
|
|||
# Clean-up.
|
||||
drop tables m1, t1, t4;
|
||||
drop view t3;
|
||||
#
|
||||
# MDEV-10424 - Assertion `ticket == __null' failed in
|
||||
# MDL_request::set_type
|
||||
#
|
||||
CREATE TABLE t1 (f1 INT) ENGINE=MyISAM;
|
||||
CREATE TABLE tmerge (f1 INT) ENGINE=MERGE UNION=(t1);
|
||||
PREPARE stmt FROM "ANALYZE TABLE tmerge, t1";
|
||||
EXECUTE stmt;
|
||||
Table Op Msg_type Msg_text
|
||||
test.tmerge analyze note The storage engine for the table doesn't support analyze
|
||||
test.t1 analyze status Table is already up to date
|
||||
EXECUTE stmt;
|
||||
Table Op Msg_type Msg_text
|
||||
test.tmerge analyze note The storage engine for the table doesn't support analyze
|
||||
test.t1 analyze status Table is already up to date
|
||||
DEALLOCATE PREPARE stmt;
|
||||
DROP TABLE t1, tmerge;
|
||||
End of 5.5 tests
|
||||
#
|
||||
# Additional coverage for refactoring which is made as part
|
||||
|
|
|
@ -4076,4 +4076,35 @@ id value
|
|||
deallocate prepare stmt;
|
||||
SET SESSION sql_mode = @save_sql_mode;
|
||||
DROP TABLE t1,t2;
|
||||
# End of 10.0 tests
|
||||
#
|
||||
# MDEV-8833: Crash of server on prepared statement with
|
||||
# conversion to semi-join
|
||||
#
|
||||
CREATE TABLE t1 (column1 INT);
|
||||
INSERT INTO t1 VALUES (3),(9);
|
||||
CREATE TABLE t2 (column2 INT);
|
||||
INSERT INTO t2 VALUES (1),(4);
|
||||
CREATE TABLE t3 (column3 INT);
|
||||
INSERT INTO t3 VALUES (6),(8);
|
||||
CREATE TABLE t4 (column4 INT);
|
||||
INSERT INTO t4 VALUES (2),(5);
|
||||
PREPARE stmt FROM "SELECT ( SELECT MAX( table1.column1 ) AS field1
|
||||
FROM t1 AS table1
|
||||
WHERE table3.column3 IN ( SELECT table2.column2 AS field2 FROM t2 AS table2 )
|
||||
) AS sq
|
||||
FROM t3 AS table3, t4 AS table4";
|
||||
EXECUTE stmt;
|
||||
sq
|
||||
NULL
|
||||
NULL
|
||||
NULL
|
||||
NULL
|
||||
EXECUTE stmt;
|
||||
sq
|
||||
NULL
|
||||
NULL
|
||||
NULL
|
||||
NULL
|
||||
deallocate prepare stmt;
|
||||
drop table t1,t2,t3,t4;
|
||||
# End of 5.5 tests
|
||||
|
|
|
@ -14,6 +14,25 @@ this
|
|||
0
|
||||
4294967295
|
||||
drop table t1;
|
||||
create table t1 (a bigint unsigned, b mediumint unsigned);
|
||||
insert t1 values (1,2),(0xffffffffffffffff,0xffffff);
|
||||
select coalesce(a,b), coalesce(b,a) from t1;
|
||||
coalesce(a,b) coalesce(b,a)
|
||||
1 2
|
||||
18446744073709551615 16777215
|
||||
create table t2 as select a from t1 union select b from t1;
|
||||
show create table t2;
|
||||
Table Create Table
|
||||
t2 CREATE TABLE `t2` (
|
||||
`a` bigint(20) unsigned DEFAULT NULL
|
||||
) ENGINE=MyISAM DEFAULT CHARSET=latin1
|
||||
select * from t2;
|
||||
a
|
||||
1
|
||||
18446744073709551615
|
||||
2
|
||||
16777215
|
||||
drop table t1, t2;
|
||||
#
|
||||
# Start of 10.0 tests
|
||||
#
|
||||
|
|
|
@ -6,7 +6,8 @@ table_54044 CREATE TEMPORARY TABLE `table_54044` (
|
|||
`IF(NULL IS NOT NULL, NULL, NULL)` binary(0) DEFAULT NULL
|
||||
) ENGINE=InnoDB DEFAULT CHARSET=latin1
|
||||
DROP TABLE table_54044;
|
||||
CREATE TABLE tmp ENGINE = INNODB AS SELECT COALESCE(NULL, NULL, NULL), GREATEST(NULL, NULL), NULL;
|
||||
CREATE TABLE tmp ENGINE = INNODB
|
||||
AS SELECT COALESCE(NULL, NULL, NULL), GREATEST(NULL, NULL), NULL;
|
||||
SHOW CREATE TABLE tmp;
|
||||
Table Create Table
|
||||
tmp CREATE TABLE `tmp` (
|
||||
|
|
8
mysql-test/suite/innodb/r/system_tables.result
Normal file
8
mysql-test/suite/innodb/r/system_tables.result
Normal file
|
@ -0,0 +1,8 @@
|
|||
alter table mysql.time_zone_name engine=InnoDB;
|
||||
create table envois3 (starttime datetime) engine=InnoDB;
|
||||
insert envois3 values ('2008-08-11 22:43:00');
|
||||
select convert_tz(starttime,'UTC','Europe/Moscow') starttime from envois3;
|
||||
starttime
|
||||
2008-08-12 02:43:00
|
||||
drop table envois3;
|
||||
alter table mysql.time_zone_name engine=MyISAM;
|
|
@ -10,7 +10,10 @@ CREATE TEMPORARY TABLE table_54044 ENGINE = INNODB
|
|||
SHOW CREATE TABLE table_54044;
|
||||
DROP TABLE table_54044;
|
||||
|
||||
CREATE TABLE tmp ENGINE = INNODB AS SELECT COALESCE(NULL, NULL, NULL), GREATEST(NULL, NULL), NULL;
|
||||
# This 'create table' should pass since it uses a Field_string of size 0.
|
||||
|
||||
CREATE TABLE tmp ENGINE = INNODB
|
||||
AS SELECT COALESCE(NULL, NULL, NULL), GREATEST(NULL, NULL), NULL;
|
||||
SHOW CREATE TABLE tmp;
|
||||
DROP TABLE tmp;
|
||||
|
||||
|
@ -23,4 +26,3 @@ FLUSH TABLES;
|
|||
--error 1005
|
||||
CREATE TEMPORARY TABLE tmp ENGINE=InnoDB AS SELECT VALUES(a) FROM t1;
|
||||
DROP TABLE t1;
|
||||
|
||||
|
|
12
mysql-test/suite/innodb/t/system_tables.test
Normal file
12
mysql-test/suite/innodb/t/system_tables.test
Normal file
|
@ -0,0 +1,12 @@
|
|||
--source include/have_innodb.inc
|
||||
|
||||
#
|
||||
# MDEV-10775 System table in InnoDB format allowed in MariaDB could lead to crash
|
||||
#
|
||||
alter table mysql.time_zone_name engine=InnoDB;
|
||||
create table envois3 (starttime datetime) engine=InnoDB;
|
||||
insert envois3 values ('2008-08-11 22:43:00');
|
||||
--source include/restart_mysqld.inc
|
||||
select convert_tz(starttime,'UTC','Europe/Moscow') starttime from envois3;
|
||||
drop table envois3;
|
||||
alter table mysql.time_zone_name engine=MyISAM;
|
|
@ -1,121 +0,0 @@
|
|||
"General cleanup"
|
||||
set @aria_checkpoint_interval_save= @@global.aria_checkpoint_interval;
|
||||
set @@global.aria_checkpoint_interval= 0;
|
||||
drop table if exists t1;
|
||||
update performance_schema.setup_instruments set enabled = 'NO';
|
||||
update performance_schema.setup_consumers set enabled = 'NO';
|
||||
truncate table performance_schema.file_summary_by_event_name;
|
||||
truncate table performance_schema.file_summary_by_instance;
|
||||
truncate table performance_schema.socket_summary_by_event_name;
|
||||
truncate table performance_schema.socket_summary_by_instance;
|
||||
truncate table performance_schema.events_waits_summary_global_by_event_name;
|
||||
truncate table performance_schema.events_waits_summary_by_instance;
|
||||
truncate table performance_schema.events_waits_summary_by_thread_by_event_name;
|
||||
update performance_schema.setup_consumers set enabled = 'YES';
|
||||
update performance_schema.setup_instruments
|
||||
set enabled = 'YES', timed = 'YES';
|
||||
create table t1 (
|
||||
id INT PRIMARY KEY,
|
||||
b CHAR(100) DEFAULT 'initial value')
|
||||
ENGINE=MyISAM;
|
||||
insert into t1 (id) values (1), (2), (3), (4), (5), (6), (7), (8);
|
||||
update performance_schema.setup_instruments SET enabled = 'NO';
|
||||
update performance_schema.setup_consumers set enabled = 'NO';
|
||||
set @dump_all=FALSE;
|
||||
"Verifying file aggregate consistency"
|
||||
SELECT EVENT_NAME, e.COUNT_READ, SUM(i.COUNT_READ)
|
||||
FROM performance_schema.file_summary_by_event_name AS e
|
||||
JOIN performance_schema.file_summary_by_instance AS i USING (EVENT_NAME)
|
||||
GROUP BY EVENT_NAME
|
||||
HAVING (e.COUNT_READ <> SUM(i.COUNT_READ))
|
||||
OR @dump_all;
|
||||
EVENT_NAME COUNT_READ SUM(i.COUNT_READ)
|
||||
SELECT EVENT_NAME, e.COUNT_WRITE, SUM(i.COUNT_WRITE)
|
||||
FROM performance_schema.file_summary_by_event_name AS e
|
||||
JOIN performance_schema.file_summary_by_instance AS i USING (EVENT_NAME)
|
||||
GROUP BY EVENT_NAME
|
||||
HAVING (e.COUNT_WRITE <> SUM(i.COUNT_WRITE))
|
||||
OR @dump_all;
|
||||
EVENT_NAME COUNT_WRITE SUM(i.COUNT_WRITE)
|
||||
SELECT EVENT_NAME, e.COUNT_READ, SUM(i.COUNT_READ)
|
||||
FROM performance_schema.socket_summary_by_event_name AS e
|
||||
JOIN performance_schema.socket_summary_by_instance AS i USING (EVENT_NAME)
|
||||
GROUP BY EVENT_NAME
|
||||
HAVING (e.COUNT_READ <> SUM(i.COUNT_READ))
|
||||
OR @dump_all;
|
||||
EVENT_NAME COUNT_READ SUM(i.COUNT_READ)
|
||||
SELECT EVENT_NAME, e.COUNT_WRITE, SUM(i.COUNT_WRITE)
|
||||
FROM performance_schema.socket_summary_by_event_name AS e
|
||||
JOIN performance_schema.socket_summary_by_instance AS i USING (EVENT_NAME)
|
||||
GROUP BY EVENT_NAME
|
||||
HAVING (e.COUNT_WRITE <> SUM(i.COUNT_WRITE))
|
||||
OR @dump_all;
|
||||
EVENT_NAME COUNT_WRITE SUM(i.COUNT_WRITE)
|
||||
SELECT EVENT_NAME, e.SUM_NUMBER_OF_BYTES_READ, SUM(i.SUM_NUMBER_OF_BYTES_READ)
|
||||
FROM performance_schema.file_summary_by_event_name AS e
|
||||
JOIN performance_schema.file_summary_by_instance AS i USING (EVENT_NAME)
|
||||
GROUP BY EVENT_NAME
|
||||
HAVING (e.SUM_NUMBER_OF_BYTES_READ <> SUM(i.SUM_NUMBER_OF_BYTES_READ))
|
||||
OR @dump_all;
|
||||
EVENT_NAME SUM_NUMBER_OF_BYTES_READ SUM(i.SUM_NUMBER_OF_BYTES_READ)
|
||||
SELECT EVENT_NAME, e.SUM_NUMBER_OF_BYTES_WRITE, SUM(i.SUM_NUMBER_OF_BYTES_WRITE)
|
||||
FROM performance_schema.file_summary_by_event_name AS e
|
||||
JOIN performance_schema.file_summary_by_instance AS i USING (EVENT_NAME)
|
||||
GROUP BY EVENT_NAME
|
||||
HAVING (e.SUM_NUMBER_OF_BYTES_WRITE <> SUM(i.SUM_NUMBER_OF_BYTES_WRITE))
|
||||
OR @dump_all;
|
||||
EVENT_NAME SUM_NUMBER_OF_BYTES_WRITE SUM(i.SUM_NUMBER_OF_BYTES_WRITE)
|
||||
"Verifying waits aggregate consistency (instance)"
|
||||
SELECT EVENT_NAME, e.SUM_TIMER_WAIT, SUM(i.SUM_TIMER_WAIT)
|
||||
FROM performance_schema.events_waits_summary_global_by_event_name AS e
|
||||
JOIN performance_schema.events_waits_summary_by_instance AS i USING (EVENT_NAME)
|
||||
GROUP BY EVENT_NAME
|
||||
HAVING (e.SUM_TIMER_WAIT < SUM(i.SUM_TIMER_WAIT))
|
||||
OR @dump_all;
|
||||
EVENT_NAME SUM_TIMER_WAIT SUM(i.SUM_TIMER_WAIT)
|
||||
SELECT EVENT_NAME, e.MIN_TIMER_WAIT, MIN(i.MIN_TIMER_WAIT)
|
||||
FROM performance_schema.events_waits_summary_global_by_event_name AS e
|
||||
JOIN performance_schema.events_waits_summary_by_instance AS i USING (EVENT_NAME)
|
||||
GROUP BY EVENT_NAME
|
||||
HAVING (e.MIN_TIMER_WAIT > MIN(i.MIN_TIMER_WAIT))
|
||||
AND (MIN(i.MIN_TIMER_WAIT) != 0)
|
||||
OR @dump_all;
|
||||
EVENT_NAME MIN_TIMER_WAIT MIN(i.MIN_TIMER_WAIT)
|
||||
SELECT EVENT_NAME, e.MAX_TIMER_WAIT, MAX(i.MAX_TIMER_WAIT)
|
||||
FROM performance_schema.events_waits_summary_global_by_event_name AS e
|
||||
JOIN performance_schema.events_waits_summary_by_instance AS i USING (EVENT_NAME)
|
||||
GROUP BY EVENT_NAME
|
||||
HAVING (e.MAX_TIMER_WAIT < MAX(i.MAX_TIMER_WAIT))
|
||||
OR @dump_all;
|
||||
EVENT_NAME MAX_TIMER_WAIT MAX(i.MAX_TIMER_WAIT)
|
||||
"Verifying waits aggregate consistency (thread)"
|
||||
SELECT EVENT_NAME, e.SUM_TIMER_WAIT, SUM(t.SUM_TIMER_WAIT)
|
||||
FROM performance_schema.events_waits_summary_global_by_event_name AS e
|
||||
JOIN performance_schema.events_waits_summary_by_thread_by_event_name AS t
|
||||
USING (EVENT_NAME)
|
||||
GROUP BY EVENT_NAME
|
||||
HAVING (e.SUM_TIMER_WAIT < SUM(t.SUM_TIMER_WAIT))
|
||||
OR @dump_all;
|
||||
EVENT_NAME SUM_TIMER_WAIT SUM(t.SUM_TIMER_WAIT)
|
||||
SELECT EVENT_NAME, e.MIN_TIMER_WAIT, MIN(t.MIN_TIMER_WAIT)
|
||||
FROM performance_schema.events_waits_summary_global_by_event_name AS e
|
||||
JOIN performance_schema.events_waits_summary_by_thread_by_event_name AS t
|
||||
USING (EVENT_NAME)
|
||||
GROUP BY EVENT_NAME
|
||||
HAVING (e.MIN_TIMER_WAIT > MIN(t.MIN_TIMER_WAIT))
|
||||
AND (MIN(t.MIN_TIMER_WAIT) != 0)
|
||||
OR @dump_all;
|
||||
EVENT_NAME MIN_TIMER_WAIT MIN(t.MIN_TIMER_WAIT)
|
||||
SELECT EVENT_NAME, e.MAX_TIMER_WAIT, MAX(t.MAX_TIMER_WAIT)
|
||||
FROM performance_schema.events_waits_summary_global_by_event_name AS e
|
||||
JOIN performance_schema.events_waits_summary_by_thread_by_event_name AS t
|
||||
USING (EVENT_NAME)
|
||||
GROUP BY EVENT_NAME
|
||||
HAVING (e.MAX_TIMER_WAIT < MAX(t.MAX_TIMER_WAIT))
|
||||
OR @dump_all;
|
||||
EVENT_NAME MAX_TIMER_WAIT MAX(t.MAX_TIMER_WAIT)
|
||||
update performance_schema.setup_consumers set enabled = 'YES';
|
||||
update performance_schema.setup_instruments
|
||||
set enabled = 'YES', timed = 'YES';
|
||||
drop table test.t1;
|
||||
set @@global.aria_checkpoint_interval= @aria_checkpoint_interval_save;
|
|
@ -1,197 +0,0 @@
|
|||
# Tests for PERFORMANCE_SCHEMA
|
||||
# Verify that statistics aggregated by different criteria are consistent.
|
||||
|
||||
--source include/not_embedded.inc
|
||||
--source include/have_perfschema.inc
|
||||
|
||||
--echo "General cleanup"
|
||||
|
||||
# MDEV-7187 - test fails sporadically in buildbot
|
||||
set @aria_checkpoint_interval_save= @@global.aria_checkpoint_interval;
|
||||
set @@global.aria_checkpoint_interval= 0;
|
||||
|
||||
--disable_warnings
|
||||
drop table if exists t1;
|
||||
--enable_warnings
|
||||
|
||||
update performance_schema.setup_instruments set enabled = 'NO';
|
||||
update performance_schema.setup_consumers set enabled = 'NO';
|
||||
|
||||
# Cleanup statistics
|
||||
truncate table performance_schema.file_summary_by_event_name;
|
||||
truncate table performance_schema.file_summary_by_instance;
|
||||
truncate table performance_schema.socket_summary_by_event_name;
|
||||
truncate table performance_schema.socket_summary_by_instance;
|
||||
truncate table performance_schema.events_waits_summary_global_by_event_name;
|
||||
truncate table performance_schema.events_waits_summary_by_instance;
|
||||
truncate table performance_schema.events_waits_summary_by_thread_by_event_name;
|
||||
|
||||
# Start recording data
|
||||
update performance_schema.setup_consumers set enabled = 'YES';
|
||||
update performance_schema.setup_instruments
|
||||
set enabled = 'YES', timed = 'YES';
|
||||
|
||||
|
||||
create table t1 (
|
||||
id INT PRIMARY KEY,
|
||||
b CHAR(100) DEFAULT 'initial value')
|
||||
ENGINE=MyISAM;
|
||||
|
||||
insert into t1 (id) values (1), (2), (3), (4), (5), (6), (7), (8);
|
||||
|
||||
# Stop recording data, so the select below don't add noise.
|
||||
update performance_schema.setup_instruments SET enabled = 'NO';
|
||||
# Disable all consumers, for long standing waits
|
||||
update performance_schema.setup_consumers set enabled = 'NO';
|
||||
|
||||
# Helper to debug
|
||||
set @dump_all=FALSE;
|
||||
|
||||
# Note that in general:
|
||||
# - COUNT/SUM/MAX(file_summary_by_event_name) >=
|
||||
# COUNT/SUM/MAX(file_summary_by_instance).
|
||||
# - MIN(file_summary_by_event_name) <=
|
||||
# MIN(file_summary_by_instance).
|
||||
# There will be equality only when file instances are not removed,
|
||||
# aka when a file is not deleted from the file system,
|
||||
# because doing so removes a row in file_summary_by_instance.
|
||||
|
||||
# Likewise:
|
||||
# - COUNT/SUM/MAX(events_waits_summary_global_by_event_name) >=
|
||||
# COUNT/SUM/MAX(events_waits_summary_by_instance)
|
||||
# - MIN(events_waits_summary_global_by_event_name) <=
|
||||
# MIN(events_waits_summary_by_instance)
|
||||
# There will be equality only when an instrument instance
|
||||
# is not removed, which is next to impossible to predictably guarantee
|
||||
# in the server.
|
||||
# For example, a MyISAM table removed from the table cache
|
||||
# will cause a mysql_mutex_destroy on myisam/MYISAM_SHARE::intern_lock.
|
||||
# Another example, a thread terminating will cause a mysql_mutex_destroy
|
||||
# on sql/LOCK_delete
|
||||
# Both cause a row to be deleted from events_waits_summary_by_instance.
|
||||
|
||||
# Likewise:
|
||||
# - COUNT/SUM/MAX(events_waits_summary_global_by_event_name) >=
|
||||
# COUNT/SUM/MAX(events_waits_summary_by_thread_by_event_name)
|
||||
# - MIN(events_waits_summary_global_by_event_name) <=
|
||||
# MIN(events_waits_summary_by_thread_by_event_name)
|
||||
# There will be equality only when no thread is removed,
|
||||
# that is if no thread disconnects, or no sub thread (for example insert
|
||||
# delayed) ever completes.
|
||||
# A thread completing will cause rows in
|
||||
# events_waits_summary_by_thread_by_event_name to be removed.
|
||||
|
||||
--echo "Verifying file aggregate consistency"
|
||||
|
||||
# Since the code generating the load in this test does:
|
||||
# - create table
|
||||
# - insert
|
||||
# - does not cause temporary tables to be used
|
||||
# we can test for equality here for file aggregates.
|
||||
|
||||
# If any of these queries returns data, the test failed.
|
||||
|
||||
SELECT EVENT_NAME, e.COUNT_READ, SUM(i.COUNT_READ)
|
||||
FROM performance_schema.file_summary_by_event_name AS e
|
||||
JOIN performance_schema.file_summary_by_instance AS i USING (EVENT_NAME)
|
||||
GROUP BY EVENT_NAME
|
||||
HAVING (e.COUNT_READ <> SUM(i.COUNT_READ))
|
||||
OR @dump_all;
|
||||
|
||||
SELECT EVENT_NAME, e.COUNT_WRITE, SUM(i.COUNT_WRITE)
|
||||
FROM performance_schema.file_summary_by_event_name AS e
|
||||
JOIN performance_schema.file_summary_by_instance AS i USING (EVENT_NAME)
|
||||
GROUP BY EVENT_NAME
|
||||
HAVING (e.COUNT_WRITE <> SUM(i.COUNT_WRITE))
|
||||
OR @dump_all;
|
||||
|
||||
SELECT EVENT_NAME, e.COUNT_READ, SUM(i.COUNT_READ)
|
||||
FROM performance_schema.socket_summary_by_event_name AS e
|
||||
JOIN performance_schema.socket_summary_by_instance AS i USING (EVENT_NAME)
|
||||
GROUP BY EVENT_NAME
|
||||
HAVING (e.COUNT_READ <> SUM(i.COUNT_READ))
|
||||
OR @dump_all;
|
||||
|
||||
SELECT EVENT_NAME, e.COUNT_WRITE, SUM(i.COUNT_WRITE)
|
||||
FROM performance_schema.socket_summary_by_event_name AS e
|
||||
JOIN performance_schema.socket_summary_by_instance AS i USING (EVENT_NAME)
|
||||
GROUP BY EVENT_NAME
|
||||
HAVING (e.COUNT_WRITE <> SUM(i.COUNT_WRITE))
|
||||
OR @dump_all;
|
||||
|
||||
SELECT EVENT_NAME, e.SUM_NUMBER_OF_BYTES_READ, SUM(i.SUM_NUMBER_OF_BYTES_READ)
|
||||
FROM performance_schema.file_summary_by_event_name AS e
|
||||
JOIN performance_schema.file_summary_by_instance AS i USING (EVENT_NAME)
|
||||
GROUP BY EVENT_NAME
|
||||
HAVING (e.SUM_NUMBER_OF_BYTES_READ <> SUM(i.SUM_NUMBER_OF_BYTES_READ))
|
||||
OR @dump_all;
|
||||
|
||||
SELECT EVENT_NAME, e.SUM_NUMBER_OF_BYTES_WRITE, SUM(i.SUM_NUMBER_OF_BYTES_WRITE)
|
||||
FROM performance_schema.file_summary_by_event_name AS e
|
||||
JOIN performance_schema.file_summary_by_instance AS i USING (EVENT_NAME)
|
||||
GROUP BY EVENT_NAME
|
||||
HAVING (e.SUM_NUMBER_OF_BYTES_WRITE <> SUM(i.SUM_NUMBER_OF_BYTES_WRITE))
|
||||
OR @dump_all;
|
||||
|
||||
--echo "Verifying waits aggregate consistency (instance)"
|
||||
|
||||
SELECT EVENT_NAME, e.SUM_TIMER_WAIT, SUM(i.SUM_TIMER_WAIT)
|
||||
FROM performance_schema.events_waits_summary_global_by_event_name AS e
|
||||
JOIN performance_schema.events_waits_summary_by_instance AS i USING (EVENT_NAME)
|
||||
GROUP BY EVENT_NAME
|
||||
HAVING (e.SUM_TIMER_WAIT < SUM(i.SUM_TIMER_WAIT))
|
||||
OR @dump_all;
|
||||
|
||||
SELECT EVENT_NAME, e.MIN_TIMER_WAIT, MIN(i.MIN_TIMER_WAIT)
|
||||
FROM performance_schema.events_waits_summary_global_by_event_name AS e
|
||||
JOIN performance_schema.events_waits_summary_by_instance AS i USING (EVENT_NAME)
|
||||
GROUP BY EVENT_NAME
|
||||
HAVING (e.MIN_TIMER_WAIT > MIN(i.MIN_TIMER_WAIT))
|
||||
AND (MIN(i.MIN_TIMER_WAIT) != 0)
|
||||
OR @dump_all;
|
||||
|
||||
SELECT EVENT_NAME, e.MAX_TIMER_WAIT, MAX(i.MAX_TIMER_WAIT)
|
||||
FROM performance_schema.events_waits_summary_global_by_event_name AS e
|
||||
JOIN performance_schema.events_waits_summary_by_instance AS i USING (EVENT_NAME)
|
||||
GROUP BY EVENT_NAME
|
||||
HAVING (e.MAX_TIMER_WAIT < MAX(i.MAX_TIMER_WAIT))
|
||||
OR @dump_all;
|
||||
|
||||
--echo "Verifying waits aggregate consistency (thread)"
|
||||
|
||||
SELECT EVENT_NAME, e.SUM_TIMER_WAIT, SUM(t.SUM_TIMER_WAIT)
|
||||
FROM performance_schema.events_waits_summary_global_by_event_name AS e
|
||||
JOIN performance_schema.events_waits_summary_by_thread_by_event_name AS t
|
||||
USING (EVENT_NAME)
|
||||
GROUP BY EVENT_NAME
|
||||
HAVING (e.SUM_TIMER_WAIT < SUM(t.SUM_TIMER_WAIT))
|
||||
OR @dump_all;
|
||||
|
||||
SELECT EVENT_NAME, e.MIN_TIMER_WAIT, MIN(t.MIN_TIMER_WAIT)
|
||||
FROM performance_schema.events_waits_summary_global_by_event_name AS e
|
||||
JOIN performance_schema.events_waits_summary_by_thread_by_event_name AS t
|
||||
USING (EVENT_NAME)
|
||||
GROUP BY EVENT_NAME
|
||||
HAVING (e.MIN_TIMER_WAIT > MIN(t.MIN_TIMER_WAIT))
|
||||
AND (MIN(t.MIN_TIMER_WAIT) != 0)
|
||||
OR @dump_all;
|
||||
|
||||
SELECT EVENT_NAME, e.MAX_TIMER_WAIT, MAX(t.MAX_TIMER_WAIT)
|
||||
FROM performance_schema.events_waits_summary_global_by_event_name AS e
|
||||
JOIN performance_schema.events_waits_summary_by_thread_by_event_name AS t
|
||||
USING (EVENT_NAME)
|
||||
GROUP BY EVENT_NAME
|
||||
HAVING (e.MAX_TIMER_WAIT < MAX(t.MAX_TIMER_WAIT))
|
||||
OR @dump_all;
|
||||
|
||||
|
||||
# Cleanup
|
||||
|
||||
update performance_schema.setup_consumers set enabled = 'YES';
|
||||
update performance_schema.setup_instruments
|
||||
set enabled = 'YES', timed = 'YES';
|
||||
|
||||
drop table test.t1;
|
||||
|
||||
set @@global.aria_checkpoint_interval= @aria_checkpoint_interval_save;
|
||||
|
|
@ -8,7 +8,6 @@ server_audit_file_rotate_now OFF
|
|||
server_audit_file_rotate_size 1000000
|
||||
server_audit_file_rotations 9
|
||||
server_audit_incl_users
|
||||
server_audit_loc_info
|
||||
server_audit_logging OFF
|
||||
server_audit_mode 0
|
||||
server_audit_output_type file
|
||||
|
@ -72,7 +71,6 @@ server_audit_file_rotate_now OFF
|
|||
server_audit_file_rotate_size 1000000
|
||||
server_audit_file_rotations 9
|
||||
server_audit_incl_users odin, root, dva, tri
|
||||
server_audit_loc_info
|
||||
server_audit_logging ON
|
||||
server_audit_mode 0
|
||||
server_audit_output_type file
|
||||
|
@ -218,7 +216,6 @@ server_audit_file_rotate_now OFF
|
|||
server_audit_file_rotate_size 1000000
|
||||
server_audit_file_rotations 9
|
||||
server_audit_incl_users odin, root, dva, tri
|
||||
server_audit_loc_info
|
||||
server_audit_logging ON
|
||||
server_audit_mode 1
|
||||
server_audit_output_type file
|
||||
|
|
|
@ -8,7 +8,6 @@ server_audit_file_rotate_now OFF
|
|||
server_audit_file_rotate_size 1000000
|
||||
server_audit_file_rotations 9
|
||||
server_audit_incl_users
|
||||
server_audit_loc_info
|
||||
server_audit_logging OFF
|
||||
server_audit_mode 0
|
||||
server_audit_output_type file
|
||||
|
@ -72,7 +71,6 @@ server_audit_file_rotate_now OFF
|
|||
server_audit_file_rotate_size 1000000
|
||||
server_audit_file_rotations 9
|
||||
server_audit_incl_users odin, root, dva, tri
|
||||
server_audit_loc_info
|
||||
server_audit_logging ON
|
||||
server_audit_mode 0
|
||||
server_audit_output_type file
|
||||
|
@ -218,7 +216,6 @@ server_audit_file_rotate_now OFF
|
|||
server_audit_file_rotate_size 1000000
|
||||
server_audit_file_rotations 9
|
||||
server_audit_incl_users odin, root, dva, tri
|
||||
server_audit_loc_info
|
||||
server_audit_logging ON
|
||||
server_audit_mode 1
|
||||
server_audit_output_type file
|
||||
|
|
|
@ -13,7 +13,7 @@ insert into mysqltest1.t1 values (1);
|
|||
select * from mysqltest1.t1 into outfile 'mysqltest1/f1.txt';
|
||||
create table mysqltest1.t2 (n int);
|
||||
create table mysqltest1.t3 (n int);
|
||||
--replace_result \\ / 66 39 17 39 "File exists" "Directory not empty"
|
||||
--replace_result \\ / 66 39 17 39 247 39 "File exists" "Directory not empty"
|
||||
--error 1010
|
||||
drop database mysqltest1;
|
||||
use mysqltest1;
|
||||
|
@ -30,7 +30,7 @@ while ($1)
|
|||
}
|
||||
--enable_query_log
|
||||
|
||||
--replace_result \\ / 66 39 17 39 "File exists" "Directory not empty"
|
||||
--replace_result \\ / 66 39 17 39 247 39 "File exists" "Directory not empty"
|
||||
--error 1010
|
||||
drop database mysqltest1;
|
||||
use mysqltest1;
|
||||
|
|
|
@ -889,6 +889,11 @@ SELECT CHAR_LENGTH(TRIM(BOTH 0x0001 FROM _utf32 0x00000061));
|
|||
SELECT CHAR_LENGTH(TRIM(BOTH 0x61 FROM _utf32 0x00000061));
|
||||
SELECT CHAR_LENGTH(TRIM(BOTH 0x00 FROM _utf32 0x00000061));
|
||||
|
||||
#
|
||||
# potential signedness issue
|
||||
#
|
||||
select hex(lower(cast(0xffff0000 as char character set utf32))) as c;
|
||||
|
||||
--echo #
|
||||
--echo # End of 5.5 tests
|
||||
--echo #
|
||||
|
|
|
@ -230,3 +230,16 @@ eval EXPLAIN $query;
|
|||
eval $query;
|
||||
|
||||
DROP TABLE t0,t1,t2;
|
||||
|
||||
--echo #
|
||||
--echo # MDEV-MariaDB daemon leaks memory with specific query
|
||||
--echo #
|
||||
|
||||
CREATE TABLE t1 (`voter_id` int(11) unsigned NOT NULL,
|
||||
`language_id` int(11) unsigned NOT NULL DEFAULT '1'
|
||||
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
|
||||
CREATE TABLE t2 (`voter_id` int(10) unsigned NOT NULL DEFAULT '0',
|
||||
`serialized_c` mediumblob) ENGINE=InnoDB DEFAULT CHARSET=utf8;
|
||||
insert into t2 values (1,repeat("a",1000)),(2,repeat("a",1000)),(3,repeat("b",1000)),(4,repeat("c",1000)),(4,repeat("b",1000));
|
||||
SELECT GROUP_CONCAT(t1.language_id SEPARATOR ',') AS `translation_resources`, `d`.`serialized_c` FROM t2 AS `d` LEFT JOIN t1 ON `d`.`voter_id` = t1.`voter_id` GROUP BY `d`.`voter_id` ORDER BY 10-d.voter_id+RAND()*0;
|
||||
drop table t1,t2;
|
||||
|
|
|
@ -2880,6 +2880,19 @@ drop tables m1, t1, t4;
|
|||
drop view t3;
|
||||
|
||||
|
||||
--echo #
|
||||
--echo # MDEV-10424 - Assertion `ticket == __null' failed in
|
||||
--echo # MDL_request::set_type
|
||||
--echo #
|
||||
CREATE TABLE t1 (f1 INT) ENGINE=MyISAM;
|
||||
CREATE TABLE tmerge (f1 INT) ENGINE=MERGE UNION=(t1);
|
||||
PREPARE stmt FROM "ANALYZE TABLE tmerge, t1";
|
||||
EXECUTE stmt;
|
||||
EXECUTE stmt;
|
||||
DEALLOCATE PREPARE stmt;
|
||||
DROP TABLE t1, tmerge;
|
||||
|
||||
|
||||
--echo End of 5.5 tests
|
||||
|
||||
|
||||
|
|
|
@ -3653,5 +3653,32 @@ deallocate prepare stmt;
|
|||
SET SESSION sql_mode = @save_sql_mode;
|
||||
DROP TABLE t1,t2;
|
||||
|
||||
--echo #
|
||||
--echo # MDEV-8833: Crash of server on prepared statement with
|
||||
--echo # conversion to semi-join
|
||||
--echo #
|
||||
|
||||
--echo # End of 10.0 tests
|
||||
CREATE TABLE t1 (column1 INT);
|
||||
INSERT INTO t1 VALUES (3),(9);
|
||||
|
||||
CREATE TABLE t2 (column2 INT);
|
||||
INSERT INTO t2 VALUES (1),(4);
|
||||
|
||||
CREATE TABLE t3 (column3 INT);
|
||||
INSERT INTO t3 VALUES (6),(8);
|
||||
|
||||
CREATE TABLE t4 (column4 INT);
|
||||
INSERT INTO t4 VALUES (2),(5);
|
||||
|
||||
PREPARE stmt FROM "SELECT ( SELECT MAX( table1.column1 ) AS field1
|
||||
FROM t1 AS table1
|
||||
WHERE table3.column3 IN ( SELECT table2.column2 AS field2 FROM t2 AS table2 )
|
||||
) AS sq
|
||||
FROM t3 AS table3, t4 AS table4";
|
||||
EXECUTE stmt;
|
||||
EXECUTE stmt;
|
||||
deallocate prepare stmt;
|
||||
drop table t1,t2,t3,t4;
|
||||
|
||||
|
||||
--echo # End of 5.5 tests
|
||||
|
|
|
@ -16,6 +16,13 @@ drop table t1;
|
|||
|
||||
# End of 4.1 tests
|
||||
|
||||
create table t1 (a bigint unsigned, b mediumint unsigned);
|
||||
insert t1 values (1,2),(0xffffffffffffffff,0xffffff);
|
||||
select coalesce(a,b), coalesce(b,a) from t1;
|
||||
create table t2 as select a from t1 union select b from t1;
|
||||
show create table t2;
|
||||
select * from t2;
|
||||
drop table t1, t2;
|
||||
|
||||
--echo #
|
||||
--echo # Start of 10.0 tests
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
Copyright (c) 2000, 2010, Oracle and/or its affiliates
|
||||
/* Copyright (c) 2000, 2010, Oracle and/or its affiliates
|
||||
Copyright (c) 2009, 2016, MariaDB
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -49,7 +49,8 @@ int my_redel(const char *org_name, const char *tmp_name,
|
|||
DBUG_PRINT("my",("org_name: '%s' tmp_name: '%s' MyFlags: %lu",
|
||||
org_name,tmp_name,MyFlags));
|
||||
|
||||
if (my_copystat(org_name,tmp_name,MyFlags) < 0)
|
||||
if (!my_disable_copystat_in_redel &&
|
||||
my_copystat(org_name,tmp_name,MyFlags) < 0)
|
||||
goto end;
|
||||
if (MyFlags & MY_REDEL_MAKE_BACKUP)
|
||||
{
|
||||
|
|
|
@ -98,3 +98,4 @@ my_bool my_disable_sync=0;
|
|||
my_bool my_disable_async_io=0;
|
||||
my_bool my_disable_flush_key_blocks=0;
|
||||
my_bool my_disable_symlinks=0;
|
||||
my_bool my_disable_copystat_in_redel=0;
|
||||
|
|
|
@ -427,9 +427,8 @@ static MYSQL_SYSVAR_UINT(query_log_limit, query_log_limit,
|
|||
char locinfo_ini_value[sizeof(struct connection_info)+4];
|
||||
|
||||
static MYSQL_THDVAR_STR(loc_info,
|
||||
PLUGIN_VAR_READONLY | PLUGIN_VAR_MEMALLOC,
|
||||
"Auxiliary info.", NULL, NULL,
|
||||
locinfo_ini_value);
|
||||
PLUGIN_VAR_NOSYSVAR | PLUGIN_VAR_NOCMDOPT | PLUGIN_VAR_MEMALLOC,
|
||||
"Internal info", NULL, NULL, locinfo_ini_value);
|
||||
|
||||
static const char *syslog_facility_names[]=
|
||||
{
|
||||
|
|
|
@ -46,6 +46,7 @@ struct show_table_contributors_st show_table_contributors[]= {
|
|||
{"Auttomattic", "https://automattic.com", "Bronze Sponsor of the MariaDB Foundation"},
|
||||
{"Verkkokauppa.com", "https://virtuozzo.com", "Bronze Sponsor of the MariaDB Foundation"},
|
||||
{"Virtuozzo", "https://virtuozzo.com/", "Bronze Sponsor of the MariaDB Foundation"},
|
||||
{"Tencent Game DBA", "http://tencentdba.com/about/", "Bronze Sponsor of the MariaDB Foundation"},
|
||||
|
||||
/* Sponsors of important features */
|
||||
{"Google", "USA", "Sponsoring encryption, parallel replication and GTID"},
|
||||
|
|
|
@ -355,7 +355,7 @@ static enum_field_types field_types_merge_rules [FIELDTYPE_NUM][FIELDTYPE_NUM]=
|
|||
//MYSQL_TYPE_NULL MYSQL_TYPE_TIMESTAMP
|
||||
MYSQL_TYPE_LONGLONG, MYSQL_TYPE_VARCHAR,
|
||||
//MYSQL_TYPE_LONGLONG MYSQL_TYPE_INT24
|
||||
MYSQL_TYPE_LONGLONG, MYSQL_TYPE_LONG,
|
||||
MYSQL_TYPE_LONGLONG, MYSQL_TYPE_LONGLONG,
|
||||
//MYSQL_TYPE_DATE MYSQL_TYPE_TIME
|
||||
MYSQL_TYPE_VARCHAR, MYSQL_TYPE_VARCHAR,
|
||||
//MYSQL_TYPE_DATETIME MYSQL_TYPE_YEAR
|
||||
|
|
23
sql/item.cc
23
sql/item.cc
|
@ -2743,9 +2743,28 @@ void Item_field::fix_after_pullout(st_select_lex *new_parent, Item **ref)
|
|||
if (context)
|
||||
{
|
||||
Name_resolution_context *ctx= new Name_resolution_context();
|
||||
ctx->outer_context= NULL; // We don't build a complete name resolver
|
||||
ctx->table_list= NULL; // We rely on first_name_resolution_table instead
|
||||
if (context->select_lex == new_parent)
|
||||
{
|
||||
/*
|
||||
This field was pushed in then pulled out
|
||||
(for example left part of IN)
|
||||
*/
|
||||
ctx->outer_context= context->outer_context;
|
||||
}
|
||||
else if (context->outer_context)
|
||||
{
|
||||
/* just pull to the upper context */
|
||||
ctx->outer_context= context->outer_context->outer_context;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* No upper context (merging Derived/VIEW where context chain ends) */
|
||||
ctx->outer_context= NULL;
|
||||
}
|
||||
ctx->table_list= context->first_name_resolution_table;
|
||||
ctx->select_lex= new_parent;
|
||||
if (context->select_lex == NULL)
|
||||
ctx->select_lex= NULL;
|
||||
ctx->first_name_resolution_table= context->first_name_resolution_table;
|
||||
ctx->last_name_resolution_table= context->last_name_resolution_table;
|
||||
ctx->error_processor= context->error_processor;
|
||||
|
|
|
@ -3011,7 +3011,7 @@ bool MYSQL_QUERY_LOG::write(THD *thd, time_t current_time,
|
|||
if (! write_error)
|
||||
{
|
||||
write_error= 1;
|
||||
sql_print_error(ER(ER_ERROR_ON_WRITE), name, error);
|
||||
sql_print_error(ER(ER_ERROR_ON_WRITE), name, tmp_errno);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -3916,6 +3916,7 @@ static int init_common_variables()
|
|||
|
||||
max_system_variables.pseudo_thread_id= (ulong)~0;
|
||||
server_start_time= flush_status_time= my_time(0);
|
||||
my_disable_copystat_in_redel= 1;
|
||||
|
||||
global_rpl_filter= new Rpl_filter;
|
||||
binlog_filter= new Rpl_filter;
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/* Copyright (c) 2000, 2013, Oracle and/or its affiliates.
|
||||
Copyright (c) 2010, 2014, SkySQL Ab.
|
||||
/* Copyright (c) 2000, 2016, Oracle and/or its affiliates.
|
||||
Copyright (c) 2012, 2016, MariaDB
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
|
|
@ -42,9 +42,9 @@ enum file_opt_type {
|
|||
|
||||
struct File_option
|
||||
{
|
||||
LEX_STRING name; /**< Name of the option */
|
||||
int offset; /**< offset to base address of value */
|
||||
file_opt_type type; /**< Option type */
|
||||
LEX_STRING name; /**< Name of the option */
|
||||
my_ptrdiff_t offset; /**< offset to base address of value */
|
||||
file_opt_type type; /**< Option type */
|
||||
};
|
||||
|
||||
|
||||
|
|
|
@ -64,13 +64,13 @@ extern "C" sig_handler handle_fatal_signal(int sig)
|
|||
struct tm tm;
|
||||
#ifdef HAVE_STACKTRACE
|
||||
THD *thd;
|
||||
#endif
|
||||
/*
|
||||
This flag remembers if the query pointer was found invalid.
|
||||
We will try and print the query at the end of the signal handler, in case
|
||||
we're wrong.
|
||||
*/
|
||||
bool print_invalid_query_pointer= false;
|
||||
#endif
|
||||
|
||||
if (segfaulted)
|
||||
{
|
||||
|
@ -265,6 +265,7 @@ extern "C" sig_handler handle_fatal_signal(int sig)
|
|||
"\"mlockall\" bugs.\n");
|
||||
}
|
||||
|
||||
#ifdef HAVE_STACKTRACE
|
||||
if (print_invalid_query_pointer)
|
||||
{
|
||||
my_safe_printf_stderr(
|
||||
|
@ -274,6 +275,7 @@ extern "C" sig_handler handle_fatal_signal(int sig)
|
|||
my_write_stderr(thd->query(), MY_MIN(65536U, thd->query_length()));
|
||||
my_safe_printf_stderr("\n\n");
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_WRITE_CORE
|
||||
if (test_flags & TEST_CORE_ON_SIGNAL)
|
||||
|
|
|
@ -455,7 +455,19 @@ static bool mysql_admin_table(THD* thd, TABLE_LIST* tables,
|
|||
}
|
||||
thd->prepare_derived_at_open= FALSE;
|
||||
|
||||
table->next_global= save_next_global;
|
||||
/*
|
||||
MERGE engine may adjust table->next_global chain, thus we have to
|
||||
append save_next_global after merge children.
|
||||
*/
|
||||
if (save_next_global)
|
||||
{
|
||||
TABLE_LIST *table_list_iterator= table;
|
||||
while (table_list_iterator->next_global)
|
||||
table_list_iterator= table_list_iterator->next_global;
|
||||
table_list_iterator->next_global= save_next_global;
|
||||
save_next_global->prev_global= &table_list_iterator->next_global;
|
||||
}
|
||||
|
||||
table->next_local= save_next_local;
|
||||
thd->open_options&= ~extra_open_options;
|
||||
|
||||
|
|
|
@ -9223,6 +9223,7 @@ open_system_tables_for_read(THD *thd, TABLE_LIST *table_list,
|
|||
*/
|
||||
lex->reset_n_backup_query_tables_list(&query_tables_list_backup);
|
||||
thd->reset_n_backup_open_tables_state(backup);
|
||||
thd->lex->sql_command= SQLCOM_SELECT;
|
||||
|
||||
if (open_and_lock_tables(thd, table_list, FALSE,
|
||||
MYSQL_OPEN_IGNORE_FLUSH |
|
||||
|
|
|
@ -5371,9 +5371,11 @@ int THD::decide_logging_format(TABLE_LIST *tables)
|
|||
{
|
||||
static const char *prelocked_mode_name[] = {
|
||||
"NON_PRELOCKED",
|
||||
"LOCK_TABLES",
|
||||
"PRELOCKED",
|
||||
"PRELOCKED_UNDER_LOCK_TABLES",
|
||||
};
|
||||
compile_time_assert(array_elements(prelocked_mode_name) == LTM_always_last);
|
||||
DBUG_PRINT("debug", ("prelocked_mode: %s",
|
||||
prelocked_mode_name[locked_tables_mode]));
|
||||
}
|
||||
|
|
|
@ -1182,7 +1182,8 @@ enum enum_locked_tables_mode
|
|||
LTM_NONE= 0,
|
||||
LTM_LOCK_TABLES,
|
||||
LTM_PRELOCKED,
|
||||
LTM_PRELOCKED_UNDER_LOCK_TABLES
|
||||
LTM_PRELOCKED_UNDER_LOCK_TABLES,
|
||||
LTM_always_last
|
||||
};
|
||||
|
||||
|
||||
|
@ -4302,6 +4303,11 @@ public:
|
|||
save_copy_field_end= copy_field_end= NULL;
|
||||
}
|
||||
}
|
||||
void free_copy_field_data()
|
||||
{
|
||||
for (Copy_field *ptr= copy_field ; ptr != copy_field_end ; ptr++)
|
||||
ptr->tmp.free();
|
||||
}
|
||||
};
|
||||
|
||||
class select_union :public select_result_interceptor
|
||||
|
|
|
@ -2810,6 +2810,22 @@ static st_bookmark *find_bookmark(const char *plugin, const char *name,
|
|||
}
|
||||
|
||||
|
||||
static size_t var_storage_size(int flags)
|
||||
{
|
||||
switch (flags & PLUGIN_VAR_TYPEMASK) {
|
||||
case PLUGIN_VAR_BOOL: return sizeof(my_bool);
|
||||
case PLUGIN_VAR_INT: return sizeof(int);
|
||||
case PLUGIN_VAR_LONG: return sizeof(long);
|
||||
case PLUGIN_VAR_ENUM: return sizeof(long);
|
||||
case PLUGIN_VAR_LONGLONG: return sizeof(ulonglong);
|
||||
case PLUGIN_VAR_SET: return sizeof(ulonglong);
|
||||
case PLUGIN_VAR_STR: return sizeof(char*);
|
||||
case PLUGIN_VAR_DOUBLE: return sizeof(double);
|
||||
default: DBUG_ASSERT(0); return 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
returns a bookmark for thd-local variables, creating if neccessary.
|
||||
returns null for non thd-local variables.
|
||||
|
@ -2818,39 +2834,13 @@ static st_bookmark *find_bookmark(const char *plugin, const char *name,
|
|||
static st_bookmark *register_var(const char *plugin, const char *name,
|
||||
int flags)
|
||||
{
|
||||
uint length= strlen(plugin) + strlen(name) + 3, size= 0, offset, new_size;
|
||||
uint length= strlen(plugin) + strlen(name) + 3, size, offset, new_size;
|
||||
st_bookmark *result;
|
||||
char *varname, *p;
|
||||
|
||||
if (!(flags & PLUGIN_VAR_THDLOCAL))
|
||||
return NULL;
|
||||
|
||||
switch (flags & PLUGIN_VAR_TYPEMASK) {
|
||||
case PLUGIN_VAR_BOOL:
|
||||
size= sizeof(my_bool);
|
||||
break;
|
||||
case PLUGIN_VAR_INT:
|
||||
size= sizeof(int);
|
||||
break;
|
||||
case PLUGIN_VAR_LONG:
|
||||
case PLUGIN_VAR_ENUM:
|
||||
size= sizeof(long);
|
||||
break;
|
||||
case PLUGIN_VAR_LONGLONG:
|
||||
case PLUGIN_VAR_SET:
|
||||
size= sizeof(ulonglong);
|
||||
break;
|
||||
case PLUGIN_VAR_STR:
|
||||
size= sizeof(char*);
|
||||
break;
|
||||
case PLUGIN_VAR_DOUBLE:
|
||||
size= sizeof(double);
|
||||
break;
|
||||
default:
|
||||
DBUG_ASSERT(0);
|
||||
return NULL;
|
||||
};
|
||||
DBUG_ASSERT(flags & PLUGIN_VAR_THDLOCAL);
|
||||
|
||||
size= var_storage_size(flags);
|
||||
varname= ((char*) my_alloca(length));
|
||||
strxmov(varname + 1, plugin, "_", name, NullS);
|
||||
for (p= varname + 1; *p; p++)
|
||||
|
@ -3046,25 +3036,17 @@ void sync_dynamic_session_variables(THD* thd, bool global_lock)
|
|||
*/
|
||||
for (idx= 0; idx < bookmark_hash.records; idx++)
|
||||
{
|
||||
sys_var_pluginvar *pi;
|
||||
sys_var *var;
|
||||
st_bookmark *v= (st_bookmark*) my_hash_element(&bookmark_hash,idx);
|
||||
|
||||
if (v->version <= thd->variables.dynamic_variables_version)
|
||||
continue; /* already in thd->variables */
|
||||
|
||||
if (!(var= intern_find_sys_var(v->key + 1, v->name_len)) ||
|
||||
!(pi= var->cast_pluginvar()) ||
|
||||
v->key[0] != plugin_var_bookmark_key(pi->plugin_var->flags))
|
||||
continue;
|
||||
|
||||
/* Here we do anything special that may be required of the data types */
|
||||
|
||||
if ((pi->plugin_var->flags & PLUGIN_VAR_TYPEMASK) == PLUGIN_VAR_STR &&
|
||||
pi->plugin_var->flags & PLUGIN_VAR_MEMALLOC)
|
||||
if ((v->key[0] & PLUGIN_VAR_TYPEMASK) == PLUGIN_VAR_STR &&
|
||||
v->key[0] & BOOKMARK_MEMALLOC)
|
||||
{
|
||||
int offset= ((thdvar_str_t *)(pi->plugin_var))->offset;
|
||||
char **pp= (char**) (thd->variables.dynamic_variables_ptr + offset);
|
||||
char **pp= (char**) (thd->variables.dynamic_variables_ptr + v->offset);
|
||||
if (*pp)
|
||||
*pp= my_strdup(*pp, MYF(MY_WME|MY_FAE));
|
||||
}
|
||||
|
@ -3325,6 +3307,48 @@ bool sys_var_pluginvar::session_update(THD *thd, set_var *var)
|
|||
return false;
|
||||
}
|
||||
|
||||
static const void *var_def_ptr(st_mysql_sys_var *pv)
|
||||
{
|
||||
switch (pv->flags & (PLUGIN_VAR_TYPEMASK | PLUGIN_VAR_THDLOCAL)) {
|
||||
case PLUGIN_VAR_INT:
|
||||
return &((sysvar_uint_t*) pv)->def_val;
|
||||
case PLUGIN_VAR_LONG:
|
||||
return &((sysvar_ulong_t*) pv)->def_val;
|
||||
case PLUGIN_VAR_LONGLONG:
|
||||
return &((sysvar_ulonglong_t*) pv)->def_val;
|
||||
case PLUGIN_VAR_ENUM:
|
||||
return &((sysvar_enum_t*) pv)->def_val;
|
||||
case PLUGIN_VAR_SET:
|
||||
return &((sysvar_set_t*) pv)->def_val;
|
||||
case PLUGIN_VAR_BOOL:
|
||||
return &((sysvar_bool_t*) pv)->def_val;
|
||||
case PLUGIN_VAR_STR:
|
||||
return &((sysvar_str_t*) pv)->def_val;
|
||||
case PLUGIN_VAR_DOUBLE:
|
||||
return &((sysvar_double_t*) pv)->def_val;
|
||||
case PLUGIN_VAR_INT | PLUGIN_VAR_THDLOCAL:
|
||||
return &((thdvar_uint_t*) pv)->def_val;
|
||||
case PLUGIN_VAR_LONG | PLUGIN_VAR_THDLOCAL:
|
||||
return &((thdvar_ulong_t*) pv)->def_val;
|
||||
case PLUGIN_VAR_LONGLONG | PLUGIN_VAR_THDLOCAL:
|
||||
return &((thdvar_ulonglong_t*) pv)->def_val;
|
||||
case PLUGIN_VAR_ENUM | PLUGIN_VAR_THDLOCAL:
|
||||
return &((thdvar_enum_t*) pv)->def_val;
|
||||
case PLUGIN_VAR_SET | PLUGIN_VAR_THDLOCAL:
|
||||
return &((thdvar_set_t*) pv)->def_val;
|
||||
case PLUGIN_VAR_BOOL | PLUGIN_VAR_THDLOCAL:
|
||||
return &((thdvar_bool_t*) pv)->def_val;
|
||||
case PLUGIN_VAR_STR | PLUGIN_VAR_THDLOCAL:
|
||||
return &((thdvar_str_t*) pv)->def_val;
|
||||
case PLUGIN_VAR_DOUBLE | PLUGIN_VAR_THDLOCAL:
|
||||
return &((thdvar_double_t*) pv)->def_val;
|
||||
default:
|
||||
DBUG_ASSERT(0);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
bool sys_var_pluginvar::global_update(THD *thd, set_var *var)
|
||||
{
|
||||
DBUG_ASSERT(!is_readonly());
|
||||
|
@ -3334,60 +3358,7 @@ bool sys_var_pluginvar::global_update(THD *thd, set_var *var)
|
|||
const void *src= &var->save_result;
|
||||
|
||||
if (!var->value)
|
||||
{
|
||||
switch (plugin_var->flags & (PLUGIN_VAR_TYPEMASK | PLUGIN_VAR_THDLOCAL)) {
|
||||
case PLUGIN_VAR_INT:
|
||||
src= &((sysvar_uint_t*) plugin_var)->def_val;
|
||||
break;
|
||||
case PLUGIN_VAR_LONG:
|
||||
src= &((sysvar_ulong_t*) plugin_var)->def_val;
|
||||
break;
|
||||
case PLUGIN_VAR_LONGLONG:
|
||||
src= &((sysvar_ulonglong_t*) plugin_var)->def_val;
|
||||
break;
|
||||
case PLUGIN_VAR_ENUM:
|
||||
src= &((sysvar_enum_t*) plugin_var)->def_val;
|
||||
break;
|
||||
case PLUGIN_VAR_SET:
|
||||
src= &((sysvar_set_t*) plugin_var)->def_val;
|
||||
break;
|
||||
case PLUGIN_VAR_BOOL:
|
||||
src= &((sysvar_bool_t*) plugin_var)->def_val;
|
||||
break;
|
||||
case PLUGIN_VAR_STR:
|
||||
src= &((sysvar_str_t*) plugin_var)->def_val;
|
||||
break;
|
||||
case PLUGIN_VAR_DOUBLE:
|
||||
src= &((sysvar_double_t*) plugin_var)->def_val;
|
||||
break;
|
||||
case PLUGIN_VAR_INT | PLUGIN_VAR_THDLOCAL:
|
||||
src= &((thdvar_uint_t*) plugin_var)->def_val;
|
||||
break;
|
||||
case PLUGIN_VAR_LONG | PLUGIN_VAR_THDLOCAL:
|
||||
src= &((thdvar_ulong_t*) plugin_var)->def_val;
|
||||
break;
|
||||
case PLUGIN_VAR_LONGLONG | PLUGIN_VAR_THDLOCAL:
|
||||
src= &((thdvar_ulonglong_t*) plugin_var)->def_val;
|
||||
break;
|
||||
case PLUGIN_VAR_ENUM | PLUGIN_VAR_THDLOCAL:
|
||||
src= &((thdvar_enum_t*) plugin_var)->def_val;
|
||||
break;
|
||||
case PLUGIN_VAR_SET | PLUGIN_VAR_THDLOCAL:
|
||||
src= &((thdvar_set_t*) plugin_var)->def_val;
|
||||
break;
|
||||
case PLUGIN_VAR_BOOL | PLUGIN_VAR_THDLOCAL:
|
||||
src= &((thdvar_bool_t*) plugin_var)->def_val;
|
||||
break;
|
||||
case PLUGIN_VAR_STR | PLUGIN_VAR_THDLOCAL:
|
||||
src= &((thdvar_str_t*) plugin_var)->def_val;
|
||||
break;
|
||||
case PLUGIN_VAR_DOUBLE | PLUGIN_VAR_THDLOCAL:
|
||||
src= &((thdvar_double_t*) plugin_var)->def_val;
|
||||
break;
|
||||
default:
|
||||
DBUG_ASSERT(0);
|
||||
}
|
||||
}
|
||||
src= var_def_ptr(plugin_var);
|
||||
|
||||
plugin_var->update(thd, plugin_var, tgt, src);
|
||||
return false;
|
||||
|
@ -3743,7 +3714,18 @@ static int construct_options(MEM_ROOT *mem_root, struct st_plugin_int *tmp,
|
|||
*(int*)(opt + 1)= offset= v->offset;
|
||||
|
||||
if (opt->flags & PLUGIN_VAR_NOCMDOPT)
|
||||
{
|
||||
char *val= global_system_variables.dynamic_variables_ptr + offset;
|
||||
if (((opt->flags & PLUGIN_VAR_TYPEMASK) == PLUGIN_VAR_STR) &&
|
||||
(opt->flags & PLUGIN_VAR_MEMALLOC))
|
||||
{
|
||||
char *def_val= *(char**)var_def_ptr(opt);
|
||||
*(char**)val= def_val ? my_strdup(def_val, MYF(0)) : NULL;
|
||||
}
|
||||
else
|
||||
memcpy(val, var_def_ptr(opt), var_storage_size(opt->flags));
|
||||
continue;
|
||||
}
|
||||
|
||||
optname= (char*) memdup_root(mem_root, v->key + 1,
|
||||
(optnamelen= v->name_len) + 1);
|
||||
|
@ -3951,10 +3933,11 @@ static int test_plugin_options(MEM_ROOT *tmp_root, struct st_plugin_int *tmp,
|
|||
*str->value= strdup_root(mem_root, *str->value);
|
||||
}
|
||||
|
||||
var= find_bookmark(plugin_name.str, o->name, o->flags);
|
||||
if (o->flags & PLUGIN_VAR_NOSYSVAR)
|
||||
continue;
|
||||
tmp_backup[tmp->nbackups++].save(&o->name);
|
||||
if ((var= find_bookmark(plugin_name.str, o->name, o->flags)))
|
||||
if (var)
|
||||
v= new (mem_root) sys_var_pluginvar(&chain, var->key + 1, o, tmp);
|
||||
else
|
||||
{
|
||||
|
|
|
@ -9004,9 +9004,26 @@ JOIN::make_simple_join(JOIN *parent, TABLE *temp_table)
|
|||
We need to destruct the copy_field (allocated in create_tmp_table())
|
||||
before setting it to 0 if the join is not "reusable".
|
||||
*/
|
||||
if (!tmp_join || tmp_join != this)
|
||||
tmp_table_param.cleanup();
|
||||
tmp_table_param.copy_field= tmp_table_param.copy_field_end=0;
|
||||
if (!tmp_join || tmp_join != this)
|
||||
tmp_table_param.cleanup();
|
||||
else
|
||||
{
|
||||
/*
|
||||
Free data buffered in copy_fields, but keep data pointed by copy_field
|
||||
around for next iteration (possibly stored in save_copy_fields).
|
||||
|
||||
It would be logically simpler to not clear copy_field
|
||||
below, but as we have loops that runs over copy_field to
|
||||
copy_field_end that should not be done anymore, it's simpler to
|
||||
just clear the pointers.
|
||||
|
||||
Another option would be to just clear copy_field_end and not run
|
||||
the loops if this is not set or to have tmp_table_param.cleanup()
|
||||
to run cleanup on save_copy_field if copy_field is not set.
|
||||
*/
|
||||
tmp_table_param.free_copy_field_data();
|
||||
tmp_table_param.copy_field= tmp_table_param.copy_field_end=0;
|
||||
}
|
||||
first_record= sort_and_group=0;
|
||||
send_records= (ha_rows) 0;
|
||||
|
||||
|
@ -11687,7 +11704,7 @@ void JOIN::join_free()
|
|||
/**
|
||||
Free resources of given join.
|
||||
|
||||
@param fill true if we should free all resources, call with full==1
|
||||
@param full true if we should free all resources, call with full==1
|
||||
should be last, before it this function can be called with
|
||||
full==0
|
||||
|
||||
|
@ -11806,7 +11823,7 @@ void JOIN::cleanup(bool full)
|
|||
/*
|
||||
If we have tmp_join and 'this' JOIN is not tmp_join and
|
||||
tmp_table_param.copy_field's of them are equal then we have to remove
|
||||
pointer to tmp_table_param.copy_field from tmp_join, because it qill
|
||||
pointer to tmp_table_param.copy_field from tmp_join, because it will
|
||||
be removed in tmp_table_param.cleanup().
|
||||
*/
|
||||
if (tmp_join &&
|
||||
|
@ -15710,6 +15727,7 @@ Field *create_tmp_field(THD *thd, TABLE *table,Item *item, Item::Type type,
|
|||
case Item::VARBIN_ITEM:
|
||||
case Item::CACHE_ITEM:
|
||||
case Item::EXPR_CACHE_ITEM:
|
||||
case Item::PARAM_ITEM:
|
||||
if (make_copy_field)
|
||||
{
|
||||
DBUG_ASSERT(((Item_result_field*)item)->result_field);
|
||||
|
@ -22240,7 +22258,7 @@ setup_copy_fields(THD *thd, TMP_TABLE_PARAM *param,
|
|||
err:
|
||||
if (copy)
|
||||
delete [] param->copy_field; // This is never 0
|
||||
param->copy_field=0;
|
||||
param->copy_field= 0;
|
||||
err2:
|
||||
DBUG_RETURN(TRUE);
|
||||
}
|
||||
|
|
|
@ -876,6 +876,8 @@ void tdc_release_share(TABLE_SHARE *share)
|
|||
}
|
||||
if (--share->tdc.ref_count)
|
||||
{
|
||||
if (!share->is_view)
|
||||
mysql_cond_broadcast(&share->tdc.COND_release);
|
||||
mysql_mutex_unlock(&share->tdc.LOCK_table_share);
|
||||
mysql_mutex_unlock(&LOCK_unused_shares);
|
||||
DBUG_VOID_RETURN;
|
||||
|
|
|
@ -108,6 +108,7 @@ UNIV_INTERN mysql_pfs_key_t fts_pll_tokenize_mutex_key;
|
|||
/** variable to record innodb_fts_internal_tbl_name for information
|
||||
schema table INNODB_FTS_INSERTED etc. */
|
||||
UNIV_INTERN char* fts_internal_tbl_name = NULL;
|
||||
UNIV_INTERN char* fts_internal_tbl_name2 = NULL;
|
||||
|
||||
/** InnoDB default stopword list:
|
||||
There are different versions of stopwords, the stop words listed
|
||||
|
@ -6570,6 +6571,36 @@ fts_check_corrupt_index(
|
|||
return(0);
|
||||
}
|
||||
|
||||
/* Get parent table name if it's a fts aux table
|
||||
@param[in] aux_table_name aux table name
|
||||
@param[in] aux_table_len aux table length
|
||||
@return parent table name, or NULL */
|
||||
char*
|
||||
fts_get_parent_table_name(
|
||||
const char* aux_table_name,
|
||||
ulint aux_table_len)
|
||||
{
|
||||
fts_aux_table_t aux_table;
|
||||
char* parent_table_name = NULL;
|
||||
|
||||
if (fts_is_aux_table_name(&aux_table, aux_table_name, aux_table_len)) {
|
||||
dict_table_t* parent_table;
|
||||
|
||||
parent_table = dict_table_open_on_id(
|
||||
aux_table.parent_id, TRUE, DICT_TABLE_OP_NORMAL);
|
||||
|
||||
if (parent_table != NULL) {
|
||||
parent_table_name = mem_strdupl(
|
||||
parent_table->name,
|
||||
strlen(parent_table->name));
|
||||
|
||||
dict_table_close(parent_table, TRUE, FALSE);
|
||||
}
|
||||
}
|
||||
|
||||
return(parent_table_name);
|
||||
}
|
||||
|
||||
/** Check the validity of the parent table.
|
||||
@param[in] aux_table auxiliary table
|
||||
@return true if it is a valid table or false if it is not */
|
||||
|
|
|
@ -15010,7 +15010,12 @@ innodb_internal_table_update(
|
|||
my_free(old);
|
||||
}
|
||||
|
||||
fts_internal_tbl_name = *(char**) var_ptr;
|
||||
fts_internal_tbl_name2 = *(char**) var_ptr;
|
||||
if (fts_internal_tbl_name2 == NULL) {
|
||||
fts_internal_tbl_name = const_cast<char*>("default");
|
||||
} else {
|
||||
fts_internal_tbl_name = fts_internal_tbl_name2;
|
||||
}
|
||||
}
|
||||
|
||||
/****************************************************************//**
|
||||
|
@ -16793,7 +16798,7 @@ static MYSQL_SYSVAR_BOOL(disable_sort_file_cache, srv_disable_sort_file_cache,
|
|||
"Whether to disable OS system file cache for sort I/O",
|
||||
NULL, NULL, FALSE);
|
||||
|
||||
static MYSQL_SYSVAR_STR(ft_aux_table, fts_internal_tbl_name,
|
||||
static MYSQL_SYSVAR_STR(ft_aux_table, fts_internal_tbl_name2,
|
||||
PLUGIN_VAR_NOCMDARG,
|
||||
"FTS internal auxiliary table to be checked",
|
||||
innodb_internal_table_validate,
|
||||
|
|
|
@ -209,7 +209,10 @@ innobase_need_rebuild(
|
|||
const Alter_inplace_info* ha_alter_info,
|
||||
const TABLE* altered_table)
|
||||
{
|
||||
if (ha_alter_info->handler_flags
|
||||
Alter_inplace_info::HA_ALTER_FLAGS alter_inplace_flags =
|
||||
ha_alter_info->handler_flags & ~(INNOBASE_INPLACE_IGNORE);
|
||||
|
||||
if (alter_inplace_flags
|
||||
== Alter_inplace_info::CHANGE_CREATE_OPTION
|
||||
&& !(ha_alter_info->create_info->used_fields
|
||||
& (HA_CREATE_USED_ROW_FORMAT
|
||||
|
@ -3933,7 +3936,7 @@ err_exit:
|
|||
}
|
||||
|
||||
if (!(ha_alter_info->handler_flags & INNOBASE_ALTER_DATA)
|
||||
|| (ha_alter_info->handler_flags
|
||||
|| ((ha_alter_info->handler_flags & ~INNOBASE_INPLACE_IGNORE)
|
||||
== Alter_inplace_info::CHANGE_CREATE_OPTION
|
||||
&& !innobase_need_rebuild(ha_alter_info, table))) {
|
||||
|
||||
|
@ -4107,7 +4110,7 @@ ok_exit:
|
|||
DBUG_RETURN(false);
|
||||
}
|
||||
|
||||
if (ha_alter_info->handler_flags
|
||||
if ((ha_alter_info->handler_flags & ~INNOBASE_INPLACE_IGNORE)
|
||||
== Alter_inplace_info::CHANGE_CREATE_OPTION
|
||||
&& !innobase_need_rebuild(ha_alter_info, table)) {
|
||||
goto ok_exit;
|
||||
|
|
|
@ -3981,6 +3981,8 @@ i_s_fts_config_fill(
|
|||
DBUG_RETURN(0);
|
||||
}
|
||||
|
||||
DEBUG_SYNC_C("i_s_fts_config_fille_check");
|
||||
|
||||
fields = table->field;
|
||||
|
||||
/* Prevent DDL to drop fts aux tables. */
|
||||
|
|
|
@ -375,6 +375,7 @@ extern bool fts_need_sync;
|
|||
/** Variable specifying the table that has Fulltext index to display its
|
||||
content through information schema table */
|
||||
extern char* fts_internal_tbl_name;
|
||||
extern char* fts_internal_tbl_name2;
|
||||
|
||||
#define fts_que_graph_free(graph) \
|
||||
do { \
|
||||
|
@ -823,6 +824,15 @@ void
|
|||
fts_drop_orphaned_tables(void);
|
||||
/*==========================*/
|
||||
|
||||
/* Get parent table name if it's a fts aux table
|
||||
@param[in] aux_table_name aux table name
|
||||
@param[in] aux_table_len aux table length
|
||||
@return parent table name, or NULL */
|
||||
char*
|
||||
fts_get_parent_table_name(
|
||||
const char* aux_table_name,
|
||||
ulint aux_table_len);
|
||||
|
||||
/******************************************************************//**
|
||||
Since we do a horizontal split on the index table, we need to drop
|
||||
all the split tables.
|
||||
|
|
|
@ -44,7 +44,7 @@ Created 1/20/1994 Heikki Tuuri
|
|||
|
||||
#define INNODB_VERSION_MAJOR 5
|
||||
#define INNODB_VERSION_MINOR 6
|
||||
#define INNODB_VERSION_BUGFIX 32
|
||||
#define INNODB_VERSION_BUGFIX 33
|
||||
|
||||
/* The following is the InnoDB version as shown in
|
||||
SELECT plugin_version FROM information_schema.plugins;
|
||||
|
|
|
@ -613,7 +613,7 @@ row_log_table_delete(
|
|||
&old_pk_extra_size);
|
||||
ut_ad(old_pk_extra_size < 0x100);
|
||||
|
||||
mrec_size = 4 + old_pk_size;
|
||||
mrec_size = 6 + old_pk_size;
|
||||
|
||||
/* Log enough prefix of the BLOB unless both the
|
||||
old and new table are in COMPACT or REDUNDANT format,
|
||||
|
@ -643,8 +643,8 @@ row_log_table_delete(
|
|||
*b++ = static_cast<byte>(old_pk_extra_size);
|
||||
|
||||
/* Log the size of external prefix we saved */
|
||||
mach_write_to_2(b, ext_size);
|
||||
b += 2;
|
||||
mach_write_to_4(b, ext_size);
|
||||
b += 4;
|
||||
|
||||
rec_convert_dtuple_to_temp(
|
||||
b + old_pk_extra_size, new_index,
|
||||
|
@ -2268,14 +2268,14 @@ row_log_table_apply_op(
|
|||
break;
|
||||
|
||||
case ROW_T_DELETE:
|
||||
/* 1 (extra_size) + 2 (ext_size) + at least 1 (payload) */
|
||||
if (mrec + 4 >= mrec_end) {
|
||||
/* 1 (extra_size) + 4 (ext_size) + at least 1 (payload) */
|
||||
if (mrec + 6 >= mrec_end) {
|
||||
return(NULL);
|
||||
}
|
||||
|
||||
extra_size = *mrec++;
|
||||
ext_size = mach_read_from_2(mrec);
|
||||
mrec += 2;
|
||||
ext_size = mach_read_from_4(mrec);
|
||||
mrec += 4;
|
||||
ut_ad(mrec < mrec_end);
|
||||
|
||||
/* We assume extra_size < 0x100 for the PRIMARY KEY prefix.
|
||||
|
|
|
@ -2715,6 +2715,10 @@ loop:
|
|||
return(n_tables + n_tables_dropped);
|
||||
}
|
||||
|
||||
DBUG_EXECUTE_IF("row_drop_tables_in_background_sleep",
|
||||
os_thread_sleep(5000000);
|
||||
);
|
||||
|
||||
table = dict_table_open_on_name(drop->table_name, FALSE, FALSE,
|
||||
DICT_ERR_IGNORE_NONE);
|
||||
|
||||
|
@ -2725,6 +2729,16 @@ loop:
|
|||
goto already_dropped;
|
||||
}
|
||||
|
||||
if (!table->to_be_dropped) {
|
||||
/* There is a scenario: the old table is dropped
|
||||
just after it's added into drop list, and new
|
||||
table with the same name is created, then we try
|
||||
to drop the new table in background. */
|
||||
dict_table_close(table, FALSE, FALSE);
|
||||
|
||||
goto already_dropped;
|
||||
}
|
||||
|
||||
ut_a(!table->can_be_evicted);
|
||||
|
||||
dict_table_close(table, FALSE, FALSE);
|
||||
|
@ -3992,6 +4006,13 @@ row_drop_table_for_mysql(
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
DBUG_EXECUTE_IF("row_drop_table_add_to_background",
|
||||
row_add_table_to_background_drop_list(table->name);
|
||||
err = DB_SUCCESS;
|
||||
goto funct_exit;
|
||||
);
|
||||
|
||||
/* TODO: could we replace the counter n_foreign_key_checks_running
|
||||
with lock checks on the table? Acquire here an exclusive lock on the
|
||||
table, and rewrite lock0lock.cc and the lock wait in srv0srv.cc so that
|
||||
|
@ -4608,6 +4629,19 @@ loop:
|
|||
row_mysql_lock_data_dictionary(trx);
|
||||
|
||||
while ((table_name = dict_get_first_table_name_in_db(name))) {
|
||||
/* Drop parent table if it is a fts aux table, to
|
||||
avoid accessing dropped fts aux tables in information
|
||||
scheam when parent table still exists.
|
||||
Note: Drop parent table will drop fts aux tables. */
|
||||
char* parent_table_name;
|
||||
parent_table_name = fts_get_parent_table_name(
|
||||
table_name, strlen(table_name));
|
||||
|
||||
if (parent_table_name != NULL) {
|
||||
mem_free(table_name);
|
||||
table_name = parent_table_name;
|
||||
}
|
||||
|
||||
ut_a(memcmp(table_name, name, namelen) == 0);
|
||||
|
||||
table = dict_table_open_on_name(
|
||||
|
|
|
@ -205,7 +205,7 @@ maria_declare_plugin(perfschema)
|
|||
0x0001,
|
||||
pfs_status_vars,
|
||||
NULL,
|
||||
"5.6.32",
|
||||
"5.6.33",
|
||||
MariaDB_PLUGIN_MATURITY_STABLE
|
||||
}
|
||||
maria_declare_plugin_end;
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
SET(TOKUDB_VERSION 5.6.31-77.0)
|
||||
SET(TOKUDB_VERSION 5.6.32-78.1)
|
||||
# PerconaFT only supports x86-64 and cmake-2.8.9+
|
||||
IF(CMAKE_VERSION VERSION_LESS "2.8.9")
|
||||
MESSAGE(STATUS "CMake 2.8.9 or higher is required by TokuDB")
|
||||
|
|
|
@ -367,8 +367,8 @@ static void print_db_env_struct (void) {
|
|||
"int (*checkpointing_get_period) (DB_ENV*, uint32_t*) /* Retrieve the delay between automatic checkpoints. 0 means disabled. */",
|
||||
"int (*cleaner_set_period) (DB_ENV*, uint32_t) /* Change the delay between automatic cleaner attempts. 0 means disabled. */",
|
||||
"int (*cleaner_get_period) (DB_ENV*, uint32_t*) /* Retrieve the delay between automatic cleaner attempts. 0 means disabled. */",
|
||||
"int (*cleaner_set_iterations) (DB_ENV*, uint32_t) /* Change the number of attempts on each cleaner invokation. 0 means disabled. */",
|
||||
"int (*cleaner_get_iterations) (DB_ENV*, uint32_t*) /* Retrieve the number of attempts on each cleaner invokation. 0 means disabled. */",
|
||||
"int (*cleaner_set_iterations) (DB_ENV*, uint32_t) /* Change the number of attempts on each cleaner invocation. 0 means disabled. */",
|
||||
"int (*cleaner_get_iterations) (DB_ENV*, uint32_t*) /* Retrieve the number of attempts on each cleaner invocation. 0 means disabled. */",
|
||||
"int (*evictor_set_enable_partial_eviction) (DB_ENV*, bool) /* Enables or disabled partial eviction of nodes from cachetable. */",
|
||||
"int (*evictor_get_enable_partial_eviction) (DB_ENV*, bool*) /* Retrieve the status of partial eviction of nodes from cachetable. */",
|
||||
"int (*checkpointing_postpone) (DB_ENV*) /* Use for 'rename table' or any other operation that must be disjoint from a checkpoint */",
|
||||
|
|
|
@ -103,6 +103,7 @@ set_cflags_if_supported(
|
|||
-Wno-pointer-bool-conversion
|
||||
-fno-rtti
|
||||
-fno-exceptions
|
||||
-Wno-error=nonnull-compare
|
||||
)
|
||||
## set_cflags_if_supported_named("-Weffc++" -Weffcpp)
|
||||
|
||||
|
|
|
@ -55,8 +55,8 @@ set(FT_SOURCES
|
|||
msg_buffer
|
||||
node
|
||||
pivotkeys
|
||||
serialize/rbtree_mhs
|
||||
serialize/block_allocator
|
||||
serialize/block_allocator_strategy
|
||||
serialize/block_table
|
||||
serialize/compress
|
||||
serialize/ft_node-serialize
|
||||
|
|
|
@ -496,7 +496,7 @@ handle_split_of_child(
|
|||
|
||||
// We never set the rightmost blocknum to be the root.
|
||||
// Instead, we wait for the root to split and let promotion initialize the rightmost
|
||||
// blocknum to be the first non-root leaf node on the right extreme to recieve an insert.
|
||||
// blocknum to be the first non-root leaf node on the right extreme to receive an insert.
|
||||
BLOCKNUM rightmost_blocknum = toku_unsafe_fetch(&ft->rightmost_blocknum);
|
||||
invariant(ft->h->root_blocknum.b != rightmost_blocknum.b);
|
||||
if (childa->blocknum.b == rightmost_blocknum.b) {
|
||||
|
@ -1470,7 +1470,7 @@ void toku_ft_flush_some_child(FT ft, FTNODE parent, struct flusher_advice *fa)
|
|||
// It is possible after reading in the entire child,
|
||||
// that we now know that the child is not reactive
|
||||
// if so, we can unpin parent right now
|
||||
// we wont be splitting/merging child
|
||||
// we won't be splitting/merging child
|
||||
// and we have already replaced the bnc
|
||||
// for the root with a fresh one
|
||||
enum reactivity child_re = toku_ftnode_get_reactivity(ft, child);
|
||||
|
|
|
@ -598,15 +598,12 @@ void toku_ftnode_checkpoint_complete_callback(void *value_data) {
|
|||
}
|
||||
}
|
||||
|
||||
void toku_ftnode_clone_callback(
|
||||
void* value_data,
|
||||
void** cloned_value_data,
|
||||
long* clone_size,
|
||||
PAIR_ATTR* new_attr,
|
||||
bool for_checkpoint,
|
||||
void* write_extraargs
|
||||
)
|
||||
{
|
||||
void toku_ftnode_clone_callback(void *value_data,
|
||||
void **cloned_value_data,
|
||||
long *clone_size,
|
||||
PAIR_ATTR *new_attr,
|
||||
bool for_checkpoint,
|
||||
void *write_extraargs) {
|
||||
FTNODE node = static_cast<FTNODE>(value_data);
|
||||
toku_ftnode_assert_fully_in_memory(node);
|
||||
FT ft = static_cast<FT>(write_extraargs);
|
||||
|
@ -618,13 +615,16 @@ void toku_ftnode_clone_callback(
|
|||
toku_ftnode_leaf_rebalance(node, ft->h->basementnodesize);
|
||||
}
|
||||
|
||||
cloned_node->oldest_referenced_xid_known = node->oldest_referenced_xid_known;
|
||||
cloned_node->max_msn_applied_to_node_on_disk = node->max_msn_applied_to_node_on_disk;
|
||||
cloned_node->oldest_referenced_xid_known =
|
||||
node->oldest_referenced_xid_known;
|
||||
cloned_node->max_msn_applied_to_node_on_disk =
|
||||
node->max_msn_applied_to_node_on_disk;
|
||||
cloned_node->flags = node->flags;
|
||||
cloned_node->blocknum = node->blocknum;
|
||||
cloned_node->layout_version = node->layout_version;
|
||||
cloned_node->layout_version_original = node->layout_version_original;
|
||||
cloned_node->layout_version_read_from_disk = node->layout_version_read_from_disk;
|
||||
cloned_node->layout_version_read_from_disk =
|
||||
node->layout_version_read_from_disk;
|
||||
cloned_node->build_id = node->build_id;
|
||||
cloned_node->height = node->height;
|
||||
cloned_node->dirty = node->dirty;
|
||||
|
@ -649,38 +649,39 @@ void toku_ftnode_clone_callback(
|
|||
// set new pair attr if necessary
|
||||
if (node->height == 0) {
|
||||
*new_attr = make_ftnode_pair_attr(node);
|
||||
}
|
||||
else {
|
||||
for (int i = 0; i < node->n_children; i++) {
|
||||
BLB(node, i)->logical_rows_delta = 0;
|
||||
BLB(cloned_node, i)->logical_rows_delta = 0;
|
||||
}
|
||||
} else {
|
||||
new_attr->is_valid = false;
|
||||
}
|
||||
*clone_size = ftnode_memory_size(cloned_node);
|
||||
*cloned_value_data = cloned_node;
|
||||
}
|
||||
|
||||
void toku_ftnode_flush_callback(
|
||||
CACHEFILE UU(cachefile),
|
||||
int fd,
|
||||
BLOCKNUM blocknum,
|
||||
void *ftnode_v,
|
||||
void** disk_data,
|
||||
void *extraargs,
|
||||
PAIR_ATTR size __attribute__((unused)),
|
||||
PAIR_ATTR* new_size,
|
||||
bool write_me,
|
||||
bool keep_me,
|
||||
bool for_checkpoint,
|
||||
bool is_clone
|
||||
)
|
||||
{
|
||||
FT ft = (FT) extraargs;
|
||||
FTNODE ftnode = (FTNODE) ftnode_v;
|
||||
FTNODE_DISK_DATA* ndd = (FTNODE_DISK_DATA*)disk_data;
|
||||
void toku_ftnode_flush_callback(CACHEFILE UU(cachefile),
|
||||
int fd,
|
||||
BLOCKNUM blocknum,
|
||||
void *ftnode_v,
|
||||
void **disk_data,
|
||||
void *extraargs,
|
||||
PAIR_ATTR size __attribute__((unused)),
|
||||
PAIR_ATTR *new_size,
|
||||
bool write_me,
|
||||
bool keep_me,
|
||||
bool for_checkpoint,
|
||||
bool is_clone) {
|
||||
FT ft = (FT)extraargs;
|
||||
FTNODE ftnode = (FTNODE)ftnode_v;
|
||||
FTNODE_DISK_DATA *ndd = (FTNODE_DISK_DATA *)disk_data;
|
||||
assert(ftnode->blocknum.b == blocknum.b);
|
||||
int height = ftnode->height;
|
||||
if (write_me) {
|
||||
toku_ftnode_assert_fully_in_memory(ftnode);
|
||||
if (height > 0 && !is_clone) {
|
||||
// cloned nodes already had their stale messages moved, see toku_ftnode_clone_callback()
|
||||
// cloned nodes already had their stale messages moved, see
|
||||
// toku_ftnode_clone_callback()
|
||||
toku_move_ftnode_messages_to_stale(ft, ftnode);
|
||||
} else if (height == 0) {
|
||||
toku_ftnode_leaf_run_gc(ft, ftnode);
|
||||
|
@ -688,7 +689,8 @@ void toku_ftnode_flush_callback(
|
|||
toku_ftnode_update_disk_stats(ftnode, ft, for_checkpoint);
|
||||
}
|
||||
}
|
||||
int r = toku_serialize_ftnode_to(fd, ftnode->blocknum, ftnode, ndd, !is_clone, ft, for_checkpoint);
|
||||
int r = toku_serialize_ftnode_to(
|
||||
fd, ftnode->blocknum, ftnode, ndd, !is_clone, ft, for_checkpoint);
|
||||
assert_zero(r);
|
||||
ftnode->layout_version_read_from_disk = FT_LAYOUT_VERSION;
|
||||
}
|
||||
|
@ -703,20 +705,22 @@ void toku_ftnode_flush_callback(
|
|||
FT_STATUS_INC(FT_FULL_EVICTIONS_NONLEAF_BYTES, node_size);
|
||||
}
|
||||
toku_free(*disk_data);
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
if (ftnode->height == 0) {
|
||||
for (int i = 0; i < ftnode->n_children; i++) {
|
||||
if (BP_STATE(ftnode,i) == PT_AVAIL) {
|
||||
if (BP_STATE(ftnode, i) == PT_AVAIL) {
|
||||
BASEMENTNODE bn = BLB(ftnode, i);
|
||||
toku_ft_decrease_stats(&ft->in_memory_stats, bn->stat64_delta);
|
||||
toku_ft_decrease_stats(&ft->in_memory_stats,
|
||||
bn->stat64_delta);
|
||||
if (!ftnode->dirty)
|
||||
toku_ft_adjust_logical_row_count(
|
||||
ft, -bn->logical_rows_delta);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
toku_ftnode_free(&ftnode);
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
*new_size = make_ftnode_pair_attr(ftnode);
|
||||
}
|
||||
}
|
||||
|
@ -845,10 +849,13 @@ static void compress_internal_node_partition(FTNODE node, int i, enum toku_compr
|
|||
}
|
||||
|
||||
// callback for partially evicting a node
|
||||
int toku_ftnode_pe_callback(void *ftnode_pv, PAIR_ATTR old_attr, void *write_extraargs,
|
||||
void (*finalize)(PAIR_ATTR new_attr, void *extra), void *finalize_extra) {
|
||||
FTNODE node = (FTNODE) ftnode_pv;
|
||||
FT ft = (FT) write_extraargs;
|
||||
int toku_ftnode_pe_callback(void *ftnode_pv,
|
||||
PAIR_ATTR old_attr,
|
||||
void *write_extraargs,
|
||||
void (*finalize)(PAIR_ATTR new_attr, void *extra),
|
||||
void *finalize_extra) {
|
||||
FTNODE node = (FTNODE)ftnode_pv;
|
||||
FT ft = (FT)write_extraargs;
|
||||
int num_partial_evictions = 0;
|
||||
|
||||
// Hold things we intend to destroy here.
|
||||
|
@ -866,7 +873,8 @@ int toku_ftnode_pe_callback(void *ftnode_pv, PAIR_ATTR old_attr, void *write_ext
|
|||
}
|
||||
// Don't partially evict nodes whose partitions can't be read back
|
||||
// from disk individually
|
||||
if (node->layout_version_read_from_disk < FT_FIRST_LAYOUT_VERSION_WITH_BASEMENT_NODES) {
|
||||
if (node->layout_version_read_from_disk <
|
||||
FT_FIRST_LAYOUT_VERSION_WITH_BASEMENT_NODES) {
|
||||
goto exit;
|
||||
}
|
||||
//
|
||||
|
@ -874,77 +882,77 @@ int toku_ftnode_pe_callback(void *ftnode_pv, PAIR_ATTR old_attr, void *write_ext
|
|||
//
|
||||
if (node->height > 0) {
|
||||
for (int i = 0; i < node->n_children; i++) {
|
||||
if (BP_STATE(node,i) == PT_AVAIL) {
|
||||
if (BP_SHOULD_EVICT(node,i)) {
|
||||
if (BP_STATE(node, i) == PT_AVAIL) {
|
||||
if (BP_SHOULD_EVICT(node, i)) {
|
||||
NONLEAF_CHILDINFO bnc = BNC(node, i);
|
||||
if (ft_compress_buffers_before_eviction &&
|
||||
// We may not serialize and compress a partition in memory if its
|
||||
// in memory layout version is different than what's on disk (and
|
||||
// therefore requires upgrade).
|
||||
// We may not serialize and compress a partition in
|
||||
// memory if its in memory layout version is different
|
||||
// than what's on disk (and therefore requires upgrade).
|
||||
//
|
||||
// Auto-upgrade code assumes that if a node's layout version read
|
||||
// from disk is not current, it MUST require upgrade. Breaking
|
||||
// this rule would cause upgrade code to upgrade this partition
|
||||
// again after we serialize it as the current version, which is bad.
|
||||
node->layout_version == node->layout_version_read_from_disk) {
|
||||
// Auto-upgrade code assumes that if a node's layout
|
||||
// version read from disk is not current, it MUST
|
||||
// require upgrade.
|
||||
// Breaking this rule would cause upgrade code to
|
||||
// upgrade this partition again after we serialize it as
|
||||
// the current version, which is bad.
|
||||
node->layout_version ==
|
||||
node->layout_version_read_from_disk) {
|
||||
toku_ft_bnc_move_messages_to_stale(ft, bnc);
|
||||
compress_internal_node_partition(
|
||||
node,
|
||||
i,
|
||||
// Always compress with quicklz
|
||||
TOKU_QUICKLZ_METHOD
|
||||
);
|
||||
TOKU_QUICKLZ_METHOD);
|
||||
} else {
|
||||
// We're not compressing buffers before eviction. Simply
|
||||
// detach the buffer and set the child's state to on-disk.
|
||||
// detach the buffer and set the child's state to
|
||||
// on-disk.
|
||||
set_BNULL(node, i);
|
||||
BP_STATE(node, i) = PT_ON_DISK;
|
||||
}
|
||||
buffers_to_destroy[num_buffers_to_destroy++] = bnc;
|
||||
num_partial_evictions++;
|
||||
} else {
|
||||
BP_SWEEP_CLOCK(node, i);
|
||||
}
|
||||
else {
|
||||
BP_SWEEP_CLOCK(node,i);
|
||||
}
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
//
|
||||
// partial eviction strategy for basement nodes:
|
||||
// if the bn is compressed, evict it
|
||||
// else: check if it requires eviction, if it does, evict it, if not, sweep the clock count
|
||||
//
|
||||
else {
|
||||
} else {
|
||||
//
|
||||
// partial eviction strategy for basement nodes:
|
||||
// if the bn is compressed, evict it
|
||||
// else: check if it requires eviction, if it does, evict it, if not,
|
||||
// sweep the clock count
|
||||
//
|
||||
for (int i = 0; i < node->n_children; i++) {
|
||||
// Get rid of compressed stuff no matter what.
|
||||
if (BP_STATE(node,i) == PT_COMPRESSED) {
|
||||
if (BP_STATE(node, i) == PT_COMPRESSED) {
|
||||
SUB_BLOCK sb = BSB(node, i);
|
||||
pointers_to_free[num_pointers_to_free++] = sb->compressed_ptr;
|
||||
pointers_to_free[num_pointers_to_free++] = sb;
|
||||
set_BNULL(node, i);
|
||||
BP_STATE(node,i) = PT_ON_DISK;
|
||||
BP_STATE(node, i) = PT_ON_DISK;
|
||||
num_partial_evictions++;
|
||||
}
|
||||
else if (BP_STATE(node,i) == PT_AVAIL) {
|
||||
if (BP_SHOULD_EVICT(node,i)) {
|
||||
} else if (BP_STATE(node, i) == PT_AVAIL) {
|
||||
if (BP_SHOULD_EVICT(node, i)) {
|
||||
BASEMENTNODE bn = BLB(node, i);
|
||||
basements_to_destroy[num_basements_to_destroy++] = bn;
|
||||
toku_ft_decrease_stats(&ft->in_memory_stats, bn->stat64_delta);
|
||||
toku_ft_decrease_stats(&ft->in_memory_stats,
|
||||
bn->stat64_delta);
|
||||
toku_ft_adjust_logical_row_count(ft,
|
||||
-bn->logical_rows_delta);
|
||||
set_BNULL(node, i);
|
||||
BP_STATE(node, i) = PT_ON_DISK;
|
||||
num_partial_evictions++;
|
||||
} else {
|
||||
BP_SWEEP_CLOCK(node, i);
|
||||
}
|
||||
else {
|
||||
BP_SWEEP_CLOCK(node,i);
|
||||
}
|
||||
}
|
||||
else if (BP_STATE(node,i) == PT_ON_DISK) {
|
||||
} else if (BP_STATE(node, i) == PT_ON_DISK) {
|
||||
continue;
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
abort();
|
||||
}
|
||||
}
|
||||
|
@ -2378,12 +2386,16 @@ ft_send_update_msg(FT_HANDLE ft_h, const ft_msg &msg, TOKUTXN txn) {
|
|||
toku_ft_root_put_msg(ft_h->ft, msg, &gc_info);
|
||||
}
|
||||
|
||||
void toku_ft_maybe_update(FT_HANDLE ft_h, const DBT *key, const DBT *update_function_extra,
|
||||
TOKUTXN txn, bool oplsn_valid, LSN oplsn,
|
||||
bool do_logging) {
|
||||
void toku_ft_maybe_update(FT_HANDLE ft_h,
|
||||
const DBT *key,
|
||||
const DBT *update_function_extra,
|
||||
TOKUTXN txn,
|
||||
bool oplsn_valid,
|
||||
LSN oplsn,
|
||||
bool do_logging) {
|
||||
TXNID_PAIR xid = toku_txn_get_txnid(txn);
|
||||
if (txn) {
|
||||
BYTESTRING keybs = { key->size, (char *) key->data };
|
||||
BYTESTRING keybs = {key->size, (char *)key->data};
|
||||
toku_logger_save_rollback_cmdupdate(
|
||||
txn, toku_cachefile_filenum(ft_h->ft->cf), &keybs);
|
||||
toku_txn_maybe_note_ft(txn, ft_h->ft);
|
||||
|
@ -2392,22 +2404,33 @@ void toku_ft_maybe_update(FT_HANDLE ft_h, const DBT *key, const DBT *update_func
|
|||
TOKULOGGER logger;
|
||||
logger = toku_txn_logger(txn);
|
||||
if (do_logging && logger) {
|
||||
BYTESTRING keybs = {.len=key->size, .data=(char *) key->data};
|
||||
BYTESTRING extrabs = {.len=update_function_extra->size,
|
||||
.data = (char *) update_function_extra->data};
|
||||
toku_log_enq_update(logger, NULL, 0, txn,
|
||||
toku_cachefile_filenum(ft_h->ft->cf),
|
||||
xid, keybs, extrabs);
|
||||
BYTESTRING keybs = {.len = key->size, .data = (char *)key->data};
|
||||
BYTESTRING extrabs = {.len = update_function_extra->size,
|
||||
.data = (char *)update_function_extra->data};
|
||||
toku_log_enq_update(logger,
|
||||
NULL,
|
||||
0,
|
||||
txn,
|
||||
toku_cachefile_filenum(ft_h->ft->cf),
|
||||
xid,
|
||||
keybs,
|
||||
extrabs);
|
||||
}
|
||||
|
||||
LSN treelsn;
|
||||
if (oplsn_valid && oplsn.lsn <= (treelsn = toku_ft_checkpoint_lsn(ft_h->ft)).lsn) {
|
||||
if (oplsn_valid &&
|
||||
oplsn.lsn <= (treelsn = toku_ft_checkpoint_lsn(ft_h->ft)).lsn) {
|
||||
// do nothing
|
||||
} else {
|
||||
XIDS message_xids = txn ? toku_txn_get_xids(txn) : toku_xids_get_root_xids();
|
||||
ft_msg msg(key, update_function_extra, FT_UPDATE, ZERO_MSN, message_xids);
|
||||
XIDS message_xids =
|
||||
txn ? toku_txn_get_xids(txn) : toku_xids_get_root_xids();
|
||||
ft_msg msg(
|
||||
key, update_function_extra, FT_UPDATE, ZERO_MSN, message_xids);
|
||||
ft_send_update_msg(ft_h, msg, txn);
|
||||
}
|
||||
// updates get converted to insert messages, which should do a -1 on the
|
||||
// logical row count when the messages are permanently applied
|
||||
toku_ft_adjust_logical_row_count(ft_h->ft, 1);
|
||||
}
|
||||
|
||||
void toku_ft_maybe_update_broadcast(FT_HANDLE ft_h, const DBT *update_function_extra,
|
||||
|
|
|
@ -73,30 +73,20 @@ static bool recount_rows_interrupt(void* extra, uint64_t deleted_rows) {
|
|||
return rre->_cancelled =
|
||||
rre->_progress_callback(rre->_keys, deleted_rows, rre->_progress_extra);
|
||||
}
|
||||
int toku_ft_recount_rows(
|
||||
FT_HANDLE ft,
|
||||
int (*progress_callback)(
|
||||
uint64_t count,
|
||||
uint64_t deleted,
|
||||
void* progress_extra),
|
||||
void* progress_extra) {
|
||||
|
||||
int toku_ft_recount_rows(FT_HANDLE ft,
|
||||
int (*progress_callback)(uint64_t count,
|
||||
uint64_t deleted,
|
||||
void* progress_extra),
|
||||
void* progress_extra) {
|
||||
int ret = 0;
|
||||
recount_rows_extra_t rre = {
|
||||
progress_callback,
|
||||
progress_extra,
|
||||
0,
|
||||
false
|
||||
};
|
||||
recount_rows_extra_t rre = {progress_callback, progress_extra, 0, false};
|
||||
|
||||
ft_cursor c;
|
||||
ret = toku_ft_cursor_create(ft, &c, nullptr, C_READ_ANY, false, false);
|
||||
if (ret) return ret;
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
toku_ft_cursor_set_check_interrupt_cb(
|
||||
&c,
|
||||
recount_rows_interrupt,
|
||||
&rre);
|
||||
toku_ft_cursor_set_check_interrupt_cb(&c, recount_rows_interrupt, &rre);
|
||||
|
||||
ret = toku_ft_cursor_first(&c, recount_rows_found, &rre);
|
||||
while (FT_LIKELY(ret == 0)) {
|
||||
|
@ -108,6 +98,7 @@ int toku_ft_recount_rows(
|
|||
if (rre._cancelled == false) {
|
||||
// update ft count
|
||||
toku_unsafe_set(&ft->ft->in_memory_logical_rows, rre._keys);
|
||||
ft->ft->h->dirty = 1;
|
||||
ret = 0;
|
||||
}
|
||||
|
||||
|
|
|
@ -903,6 +903,9 @@ void toku_ft_adjust_logical_row_count(FT ft, int64_t delta) {
|
|||
// must be returned in toku_ft_stat64.
|
||||
if (delta != 0 && ft->in_memory_logical_rows != (uint64_t)-1) {
|
||||
toku_sync_fetch_and_add(&(ft->in_memory_logical_rows), delta);
|
||||
if (ft->in_memory_logical_rows == (uint64_t)-1) {
|
||||
toku_sync_fetch_and_add(&(ft->in_memory_logical_rows), 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -301,7 +301,7 @@ int toku_ft_loader_internal_init (/* out */ FTLOADER *blp,
|
|||
|
||||
void toku_ft_loader_internal_destroy (FTLOADER bl, bool is_error);
|
||||
|
||||
// For test purposes only. (In production, the rowset size is determined by negotation with the cachetable for some memory. See #2613.)
|
||||
// For test purposes only. (In production, the rowset size is determined by negotiation with the cachetable for some memory. See #2613.)
|
||||
uint64_t toku_ft_loader_get_rowset_budget_for_testing (void);
|
||||
|
||||
int toku_ft_loader_finish_extractor(FTLOADER bl);
|
||||
|
|
|
@ -91,7 +91,7 @@ toku_ft_loader_set_size_factor(uint32_t factor) {
|
|||
|
||||
uint64_t
|
||||
toku_ft_loader_get_rowset_budget_for_testing (void)
|
||||
// For test purposes only. In production, the rowset size is determined by negotation with the cachetable for some memory. (See #2613).
|
||||
// For test purposes only. In production, the rowset size is determined by negotiation with the cachetable for some memory. (See #2613).
|
||||
{
|
||||
return 16ULL*size_factor*1024ULL;
|
||||
}
|
||||
|
|
|
@ -373,52 +373,48 @@ find_bounds_within_message_tree(
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* For each message in the ancestor's buffer (determined by childnum) that
|
||||
* is key-wise between lower_bound_exclusive and upper_bound_inclusive,
|
||||
* apply the message to the basement node. We treat the bounds as minus
|
||||
* or plus infinity respectively if they are NULL. Do not mark the node
|
||||
* as dirty (preserve previous state of 'dirty' bit).
|
||||
*/
|
||||
// For each message in the ancestor's buffer (determined by childnum) that
|
||||
// is key-wise between lower_bound_exclusive and upper_bound_inclusive,
|
||||
// apply the message to the basement node. We treat the bounds as minus
|
||||
// or plus infinity respectively if they are NULL. Do not mark the node
|
||||
// as dirty (preserve previous state of 'dirty' bit).
|
||||
static void bnc_apply_messages_to_basement_node(
|
||||
FT_HANDLE t, // used for comparison function
|
||||
BASEMENTNODE bn, // where to apply messages
|
||||
FT_HANDLE t, // used for comparison function
|
||||
BASEMENTNODE bn, // where to apply messages
|
||||
FTNODE ancestor, // the ancestor node where we can find messages to apply
|
||||
int childnum, // which child buffer of ancestor contains messages we want
|
||||
const pivot_bounds &bounds, // contains pivot key bounds of this basement node
|
||||
txn_gc_info* gc_info,
|
||||
bool* msgs_applied) {
|
||||
|
||||
int childnum, // which child buffer of ancestor contains messages we want
|
||||
const pivot_bounds &
|
||||
bounds, // contains pivot key bounds of this basement node
|
||||
txn_gc_info *gc_info,
|
||||
bool *msgs_applied) {
|
||||
int r;
|
||||
NONLEAF_CHILDINFO bnc = BNC(ancestor, childnum);
|
||||
|
||||
// Determine the offsets in the message trees between which we need to
|
||||
// apply messages from this buffer
|
||||
STAT64INFO_S stats_delta = {0,0};
|
||||
STAT64INFO_S stats_delta = {0, 0};
|
||||
uint64_t workdone_this_ancestor = 0;
|
||||
int64_t logical_rows_delta = 0;
|
||||
|
||||
uint32_t stale_lbi, stale_ube;
|
||||
if (!bn->stale_ancestor_messages_applied) {
|
||||
find_bounds_within_message_tree(
|
||||
t->ft->cmp,
|
||||
bnc->stale_message_tree,
|
||||
&bnc->msg_buffer,
|
||||
bounds,
|
||||
&stale_lbi,
|
||||
&stale_ube);
|
||||
find_bounds_within_message_tree(t->ft->cmp,
|
||||
bnc->stale_message_tree,
|
||||
&bnc->msg_buffer,
|
||||
bounds,
|
||||
&stale_lbi,
|
||||
&stale_ube);
|
||||
} else {
|
||||
stale_lbi = 0;
|
||||
stale_ube = 0;
|
||||
}
|
||||
uint32_t fresh_lbi, fresh_ube;
|
||||
find_bounds_within_message_tree(
|
||||
t->ft->cmp,
|
||||
bnc->fresh_message_tree,
|
||||
&bnc->msg_buffer,
|
||||
bounds,
|
||||
&fresh_lbi,
|
||||
&fresh_ube);
|
||||
find_bounds_within_message_tree(t->ft->cmp,
|
||||
bnc->fresh_message_tree,
|
||||
&bnc->msg_buffer,
|
||||
bounds,
|
||||
&fresh_lbi,
|
||||
&fresh_ube);
|
||||
|
||||
// We now know where all the messages we must apply are, so one of the
|
||||
// following 4 cases will do the application, depending on which of
|
||||
|
@ -432,44 +428,53 @@ static void bnc_apply_messages_to_basement_node(
|
|||
// We have messages in multiple trees, so we grab all
|
||||
// the relevant messages' offsets and sort them by MSN, then apply
|
||||
// them in MSN order.
|
||||
const int buffer_size = ((stale_ube - stale_lbi) +
|
||||
(fresh_ube - fresh_lbi) +
|
||||
bnc->broadcast_list.size());
|
||||
const int buffer_size =
|
||||
((stale_ube - stale_lbi) + (fresh_ube - fresh_lbi) +
|
||||
bnc->broadcast_list.size());
|
||||
toku::scoped_malloc offsets_buf(buffer_size * sizeof(int32_t));
|
||||
int32_t *offsets = reinterpret_cast<int32_t *>(offsets_buf.get());
|
||||
struct store_msg_buffer_offset_extra sfo_extra = { .offsets = offsets, .i = 0 };
|
||||
struct store_msg_buffer_offset_extra sfo_extra = {.offsets = offsets,
|
||||
.i = 0};
|
||||
|
||||
// Populate offsets array with offsets to stale messages
|
||||
r = bnc->stale_message_tree.iterate_on_range<struct store_msg_buffer_offset_extra, store_msg_buffer_offset>(stale_lbi, stale_ube, &sfo_extra);
|
||||
r = bnc->stale_message_tree
|
||||
.iterate_on_range<struct store_msg_buffer_offset_extra,
|
||||
store_msg_buffer_offset>(
|
||||
stale_lbi, stale_ube, &sfo_extra);
|
||||
assert_zero(r);
|
||||
|
||||
// Then store fresh offsets, and mark them to be moved to stale later.
|
||||
r = bnc->fresh_message_tree.iterate_and_mark_range<struct store_msg_buffer_offset_extra, store_msg_buffer_offset>(fresh_lbi, fresh_ube, &sfo_extra);
|
||||
r = bnc->fresh_message_tree
|
||||
.iterate_and_mark_range<struct store_msg_buffer_offset_extra,
|
||||
store_msg_buffer_offset>(
|
||||
fresh_lbi, fresh_ube, &sfo_extra);
|
||||
assert_zero(r);
|
||||
|
||||
// Store offsets of all broadcast messages.
|
||||
r = bnc->broadcast_list.iterate<struct store_msg_buffer_offset_extra, store_msg_buffer_offset>(&sfo_extra);
|
||||
r = bnc->broadcast_list.iterate<struct store_msg_buffer_offset_extra,
|
||||
store_msg_buffer_offset>(&sfo_extra);
|
||||
assert_zero(r);
|
||||
invariant(sfo_extra.i == buffer_size);
|
||||
|
||||
// Sort by MSN.
|
||||
toku::sort<int32_t, message_buffer, msg_buffer_offset_msn_cmp>::mergesort_r(offsets, buffer_size, bnc->msg_buffer);
|
||||
toku::sort<int32_t, message_buffer, msg_buffer_offset_msn_cmp>::
|
||||
mergesort_r(offsets, buffer_size, bnc->msg_buffer);
|
||||
|
||||
// Apply the messages in MSN order.
|
||||
for (int i = 0; i < buffer_size; ++i) {
|
||||
*msgs_applied = true;
|
||||
do_bn_apply_msg(
|
||||
t,
|
||||
bn,
|
||||
&bnc->msg_buffer,
|
||||
offsets[i],
|
||||
gc_info,
|
||||
&workdone_this_ancestor,
|
||||
&stats_delta,
|
||||
&logical_rows_delta);
|
||||
do_bn_apply_msg(t,
|
||||
bn,
|
||||
&bnc->msg_buffer,
|
||||
offsets[i],
|
||||
gc_info,
|
||||
&workdone_this_ancestor,
|
||||
&stats_delta,
|
||||
&logical_rows_delta);
|
||||
}
|
||||
} else if (stale_lbi == stale_ube) {
|
||||
// No stale messages to apply, we just apply fresh messages, and mark them to be moved to stale later.
|
||||
// No stale messages to apply, we just apply fresh messages, and mark
|
||||
// them to be moved to stale later.
|
||||
struct iterate_do_bn_apply_msg_extra iter_extra = {
|
||||
.t = t,
|
||||
.bn = bn,
|
||||
|
@ -477,16 +482,20 @@ static void bnc_apply_messages_to_basement_node(
|
|||
.gc_info = gc_info,
|
||||
.workdone = &workdone_this_ancestor,
|
||||
.stats_to_update = &stats_delta,
|
||||
.logical_rows_delta = &logical_rows_delta
|
||||
};
|
||||
if (fresh_ube - fresh_lbi > 0) *msgs_applied = true;
|
||||
r = bnc->fresh_message_tree.iterate_and_mark_range<struct iterate_do_bn_apply_msg_extra, iterate_do_bn_apply_msg>(fresh_lbi, fresh_ube, &iter_extra);
|
||||
.logical_rows_delta = &logical_rows_delta};
|
||||
if (fresh_ube - fresh_lbi > 0)
|
||||
*msgs_applied = true;
|
||||
r = bnc->fresh_message_tree
|
||||
.iterate_and_mark_range<struct iterate_do_bn_apply_msg_extra,
|
||||
iterate_do_bn_apply_msg>(
|
||||
fresh_lbi, fresh_ube, &iter_extra);
|
||||
assert_zero(r);
|
||||
} else {
|
||||
invariant(fresh_lbi == fresh_ube);
|
||||
// No fresh messages to apply, we just apply stale messages.
|
||||
|
||||
if (stale_ube - stale_lbi > 0) *msgs_applied = true;
|
||||
if (stale_ube - stale_lbi > 0)
|
||||
*msgs_applied = true;
|
||||
struct iterate_do_bn_apply_msg_extra iter_extra = {
|
||||
.t = t,
|
||||
.bn = bn,
|
||||
|
@ -494,22 +503,26 @@ static void bnc_apply_messages_to_basement_node(
|
|||
.gc_info = gc_info,
|
||||
.workdone = &workdone_this_ancestor,
|
||||
.stats_to_update = &stats_delta,
|
||||
.logical_rows_delta = &logical_rows_delta
|
||||
};
|
||||
.logical_rows_delta = &logical_rows_delta};
|
||||
|
||||
r = bnc->stale_message_tree.iterate_on_range<struct iterate_do_bn_apply_msg_extra, iterate_do_bn_apply_msg>(stale_lbi, stale_ube, &iter_extra);
|
||||
r = bnc->stale_message_tree
|
||||
.iterate_on_range<struct iterate_do_bn_apply_msg_extra,
|
||||
iterate_do_bn_apply_msg>(
|
||||
stale_lbi, stale_ube, &iter_extra);
|
||||
assert_zero(r);
|
||||
}
|
||||
//
|
||||
// update stats
|
||||
//
|
||||
if (workdone_this_ancestor > 0) {
|
||||
(void) toku_sync_fetch_and_add(&BP_WORKDONE(ancestor, childnum), workdone_this_ancestor);
|
||||
(void)toku_sync_fetch_and_add(&BP_WORKDONE(ancestor, childnum),
|
||||
workdone_this_ancestor);
|
||||
}
|
||||
if (stats_delta.numbytes || stats_delta.numrows) {
|
||||
toku_ft_update_stats(&t->ft->in_memory_stats, stats_delta);
|
||||
}
|
||||
toku_ft_adjust_logical_row_count(t->ft, logical_rows_delta);
|
||||
bn->logical_rows_delta += logical_rows_delta;
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
|
@ -199,6 +199,7 @@ struct ftnode_leaf_basement_node {
|
|||
MSN max_msn_applied; // max message sequence number applied
|
||||
bool stale_ancestor_messages_applied;
|
||||
STAT64INFO_S stat64_delta; // change in stat64 counters since basement was last written to disk
|
||||
int64_t logical_rows_delta;
|
||||
};
|
||||
typedef struct ftnode_leaf_basement_node *BASEMENTNODE;
|
||||
|
||||
|
|
|
@ -46,415 +46,214 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
|||
#include "portability/toku_stdlib.h"
|
||||
|
||||
#include "ft/serialize/block_allocator.h"
|
||||
#include "ft/serialize/block_allocator_strategy.h"
|
||||
#include "ft/serialize/rbtree_mhs.h"
|
||||
|
||||
#if TOKU_DEBUG_PARANOID
|
||||
#define VALIDATE() validate()
|
||||
#define VALIDATE() Validate()
|
||||
#else
|
||||
#define VALIDATE()
|
||||
#endif
|
||||
|
||||
static FILE *ba_trace_file = nullptr;
|
||||
|
||||
void block_allocator::maybe_initialize_trace(void) {
|
||||
const char *ba_trace_path = getenv("TOKU_BA_TRACE_PATH");
|
||||
if (ba_trace_path != nullptr) {
|
||||
ba_trace_file = toku_os_fopen(ba_trace_path, "w");
|
||||
if (ba_trace_file == nullptr) {
|
||||
fprintf(stderr, "tokuft: error: block allocator trace path found in environment (%s), "
|
||||
"but it could not be opened for writing (errno %d)\n",
|
||||
ba_trace_path, get_maybe_error_errno());
|
||||
} else {
|
||||
fprintf(stderr, "tokuft: block allocator tracing enabled, path: %s\n", ba_trace_path);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void block_allocator::maybe_close_trace() {
|
||||
if (ba_trace_file != nullptr) {
|
||||
int r = toku_os_fclose(ba_trace_file);
|
||||
if (r != 0) {
|
||||
fprintf(stderr, "tokuft: error: block allocator trace file did not close properly (r %d, errno %d)\n",
|
||||
r, get_maybe_error_errno());
|
||||
} else {
|
||||
fprintf(stderr, "tokuft: block allocator tracing finished, file closed successfully\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void block_allocator::_create_internal(uint64_t reserve_at_beginning, uint64_t alignment) {
|
||||
// the alignment must be at least 512 and aligned with 512 to work with direct I/O
|
||||
assert(alignment >= 512 && (alignment % 512) == 0);
|
||||
void BlockAllocator::CreateInternal(uint64_t reserve_at_beginning,
|
||||
uint64_t alignment) {
|
||||
// the alignment must be at least 512 and aligned with 512 to work with
|
||||
// direct I/O
|
||||
invariant(alignment >= 512 && (alignment % 512) == 0);
|
||||
|
||||
_reserve_at_beginning = reserve_at_beginning;
|
||||
_alignment = alignment;
|
||||
_n_blocks = 0;
|
||||
_blocks_array_size = 1;
|
||||
XMALLOC_N(_blocks_array_size, _blocks_array);
|
||||
_n_bytes_in_use = reserve_at_beginning;
|
||||
_strategy = BA_STRATEGY_FIRST_FIT;
|
||||
|
||||
memset(&_trace_lock, 0, sizeof(toku_mutex_t));
|
||||
toku_mutex_init(&_trace_lock, nullptr);
|
||||
_tree = new MhsRbTree::Tree(alignment);
|
||||
}
|
||||
|
||||
void BlockAllocator::Create(uint64_t reserve_at_beginning, uint64_t alignment) {
|
||||
CreateInternal(reserve_at_beginning, alignment);
|
||||
_tree->Insert({reserve_at_beginning, MAX_BYTE});
|
||||
VALIDATE();
|
||||
}
|
||||
|
||||
void block_allocator::create(uint64_t reserve_at_beginning, uint64_t alignment) {
|
||||
_create_internal(reserve_at_beginning, alignment);
|
||||
_trace_create();
|
||||
void BlockAllocator::Destroy() {
|
||||
delete _tree;
|
||||
}
|
||||
|
||||
void block_allocator::destroy() {
|
||||
toku_free(_blocks_array);
|
||||
_trace_destroy();
|
||||
toku_mutex_destroy(&_trace_lock);
|
||||
}
|
||||
|
||||
void block_allocator::set_strategy(enum allocation_strategy strategy) {
|
||||
_strategy = strategy;
|
||||
}
|
||||
|
||||
void block_allocator::grow_blocks_array_by(uint64_t n_to_add) {
|
||||
if (_n_blocks + n_to_add > _blocks_array_size) {
|
||||
uint64_t new_size = _n_blocks + n_to_add;
|
||||
uint64_t at_least = _blocks_array_size * 2;
|
||||
if (at_least > new_size) {
|
||||
new_size = at_least;
|
||||
}
|
||||
_blocks_array_size = new_size;
|
||||
XREALLOC_N(_blocks_array_size, _blocks_array);
|
||||
}
|
||||
}
|
||||
|
||||
void block_allocator::grow_blocks_array() {
|
||||
grow_blocks_array_by(1);
|
||||
}
|
||||
|
||||
void block_allocator::create_from_blockpairs(uint64_t reserve_at_beginning, uint64_t alignment,
|
||||
struct blockpair *pairs, uint64_t n_blocks) {
|
||||
_create_internal(reserve_at_beginning, alignment);
|
||||
|
||||
void BlockAllocator::CreateFromBlockPairs(uint64_t reserve_at_beginning,
|
||||
uint64_t alignment,
|
||||
struct BlockPair *translation_pairs,
|
||||
uint64_t n_blocks) {
|
||||
CreateInternal(reserve_at_beginning, alignment);
|
||||
_n_blocks = n_blocks;
|
||||
grow_blocks_array_by(_n_blocks);
|
||||
memcpy(_blocks_array, pairs, _n_blocks * sizeof(struct blockpair));
|
||||
std::sort(_blocks_array, _blocks_array + _n_blocks);
|
||||
for (uint64_t i = 0; i < _n_blocks; i++) {
|
||||
// Allocator does not support size 0 blocks. See block_allocator_free_block.
|
||||
invariant(_blocks_array[i].size > 0);
|
||||
invariant(_blocks_array[i].offset >= _reserve_at_beginning);
|
||||
invariant(_blocks_array[i].offset % _alignment == 0);
|
||||
|
||||
_n_bytes_in_use += _blocks_array[i].size;
|
||||
struct BlockPair *XMALLOC_N(n_blocks, pairs);
|
||||
memcpy(pairs, translation_pairs, n_blocks * sizeof(struct BlockPair));
|
||||
std::sort(pairs, pairs + n_blocks);
|
||||
|
||||
if (pairs[0]._offset > reserve_at_beginning) {
|
||||
_tree->Insert(
|
||||
{reserve_at_beginning, pairs[0]._offset - reserve_at_beginning});
|
||||
}
|
||||
for (uint64_t i = 0; i < _n_blocks; i++) {
|
||||
// Allocator does not support size 0 blocks. See
|
||||
// block_allocator_free_block.
|
||||
invariant(pairs[i]._size > 0);
|
||||
invariant(pairs[i]._offset >= _reserve_at_beginning);
|
||||
invariant(pairs[i]._offset % _alignment == 0);
|
||||
|
||||
_n_bytes_in_use += pairs[i]._size;
|
||||
|
||||
MhsRbTree::OUUInt64 free_size(MAX_BYTE);
|
||||
MhsRbTree::OUUInt64 free_offset(pairs[i]._offset + pairs[i]._size);
|
||||
if (i < n_blocks - 1) {
|
||||
MhsRbTree::OUUInt64 next_offset(pairs[i + 1]._offset);
|
||||
invariant(next_offset >= free_offset);
|
||||
free_size = next_offset - free_offset;
|
||||
if (free_size == 0)
|
||||
continue;
|
||||
}
|
||||
_tree->Insert({free_offset, free_size});
|
||||
}
|
||||
toku_free(pairs);
|
||||
VALIDATE();
|
||||
|
||||
_trace_create_from_blockpairs();
|
||||
}
|
||||
|
||||
// Effect: align a value by rounding up.
|
||||
static inline uint64_t align(uint64_t value, uint64_t ba_alignment) {
|
||||
static inline uint64_t Align(uint64_t value, uint64_t ba_alignment) {
|
||||
return ((value + ba_alignment - 1) / ba_alignment) * ba_alignment;
|
||||
}
|
||||
|
||||
struct block_allocator::blockpair *
|
||||
block_allocator::choose_block_to_alloc_after(size_t size, uint64_t heat) {
|
||||
switch (_strategy) {
|
||||
case BA_STRATEGY_FIRST_FIT:
|
||||
return block_allocator_strategy::first_fit(_blocks_array, _n_blocks, size, _alignment);
|
||||
case BA_STRATEGY_BEST_FIT:
|
||||
return block_allocator_strategy::best_fit(_blocks_array, _n_blocks, size, _alignment);
|
||||
case BA_STRATEGY_HEAT_ZONE:
|
||||
return block_allocator_strategy::heat_zone(_blocks_array, _n_blocks, size, _alignment, heat);
|
||||
case BA_STRATEGY_PADDED_FIT:
|
||||
return block_allocator_strategy::padded_fit(_blocks_array, _n_blocks, size, _alignment);
|
||||
default:
|
||||
abort();
|
||||
}
|
||||
}
|
||||
|
||||
// Effect: Allocate a block. The resulting block must be aligned on the ba->alignment (which to make direct_io happy must be a positive multiple of 512).
|
||||
void block_allocator::alloc_block(uint64_t size, uint64_t heat, uint64_t *offset) {
|
||||
struct blockpair *bp;
|
||||
|
||||
// Effect: Allocate a block. The resulting block must be aligned on the
|
||||
// ba->alignment (which to make direct_io happy must be a positive multiple of
|
||||
// 512).
|
||||
void BlockAllocator::AllocBlock(uint64_t size,
|
||||
uint64_t *offset) {
|
||||
// Allocator does not support size 0 blocks. See block_allocator_free_block.
|
||||
invariant(size > 0);
|
||||
|
||||
grow_blocks_array();
|
||||
_n_bytes_in_use += size;
|
||||
*offset = _tree->Remove(size);
|
||||
|
||||
uint64_t end_of_reserve = align(_reserve_at_beginning, _alignment);
|
||||
|
||||
if (_n_blocks == 0) {
|
||||
// First and only block
|
||||
assert(_n_bytes_in_use == _reserve_at_beginning + size); // we know exactly how many are in use
|
||||
_blocks_array[0].offset = align(_reserve_at_beginning, _alignment);
|
||||
_blocks_array[0].size = size;
|
||||
*offset = _blocks_array[0].offset;
|
||||
goto done;
|
||||
} else if (end_of_reserve + size <= _blocks_array[0].offset ) {
|
||||
// Check to see if the space immediately after the reserve is big enough to hold the new block.
|
||||
bp = &_blocks_array[0];
|
||||
memmove(bp + 1, bp, _n_blocks * sizeof(*bp));
|
||||
bp[0].offset = end_of_reserve;
|
||||
bp[0].size = size;
|
||||
*offset = end_of_reserve;
|
||||
goto done;
|
||||
}
|
||||
|
||||
bp = choose_block_to_alloc_after(size, heat);
|
||||
if (bp != nullptr) {
|
||||
// our allocation strategy chose the space after `bp' to fit the new block
|
||||
uint64_t answer_offset = align(bp->offset + bp->size, _alignment);
|
||||
uint64_t blocknum = bp - _blocks_array;
|
||||
invariant(&_blocks_array[blocknum] == bp);
|
||||
invariant(blocknum < _n_blocks);
|
||||
memmove(bp + 2, bp + 1, (_n_blocks - blocknum - 1) * sizeof(*bp));
|
||||
bp[1].offset = answer_offset;
|
||||
bp[1].size = size;
|
||||
*offset = answer_offset;
|
||||
} else {
|
||||
// It didn't fit anywhere, so fit it on the end.
|
||||
assert(_n_blocks < _blocks_array_size);
|
||||
bp = &_blocks_array[_n_blocks];
|
||||
uint64_t answer_offset = align(bp[-1].offset + bp[-1].size, _alignment);
|
||||
bp->offset = answer_offset;
|
||||
bp->size = size;
|
||||
*offset = answer_offset;
|
||||
}
|
||||
|
||||
done:
|
||||
_n_blocks++;
|
||||
VALIDATE();
|
||||
|
||||
_trace_alloc(size, heat, *offset);
|
||||
}
|
||||
|
||||
// Find the index in the blocks array that has a particular offset. Requires that the block exist.
|
||||
// Use binary search so it runs fast.
|
||||
int64_t block_allocator::find_block(uint64_t offset) {
|
||||
VALIDATE();
|
||||
if (_n_blocks == 1) {
|
||||
assert(_blocks_array[0].offset == offset);
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint64_t lo = 0;
|
||||
uint64_t hi = _n_blocks;
|
||||
while (1) {
|
||||
assert(lo < hi); // otherwise no such block exists.
|
||||
uint64_t mid = (lo + hi) / 2;
|
||||
uint64_t thisoff = _blocks_array[mid].offset;
|
||||
if (thisoff < offset) {
|
||||
lo = mid + 1;
|
||||
} else if (thisoff > offset) {
|
||||
hi = mid;
|
||||
} else {
|
||||
return mid;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// To support 0-sized blocks, we need to include size as an input to this function.
|
||||
// To support 0-sized blocks, we need to include size as an input to this
|
||||
// function.
|
||||
// All 0-sized blocks at the same offset can be considered identical, but
|
||||
// a 0-sized block can share offset with a non-zero sized block.
|
||||
// The non-zero sized block is not exchangable with a zero sized block (or vice versa),
|
||||
// so inserting 0-sized blocks can cause corruption here.
|
||||
void block_allocator::free_block(uint64_t offset) {
|
||||
// The non-zero sized block is not exchangable with a zero sized block (or vice
|
||||
// versa), so inserting 0-sized blocks can cause corruption here.
|
||||
void BlockAllocator::FreeBlock(uint64_t offset, uint64_t size) {
|
||||
VALIDATE();
|
||||
int64_t bn = find_block(offset);
|
||||
assert(bn >= 0); // we require that there is a block with that offset.
|
||||
_n_bytes_in_use -= _blocks_array[bn].size;
|
||||
memmove(&_blocks_array[bn], &_blocks_array[bn + 1],
|
||||
(_n_blocks - bn - 1) * sizeof(struct blockpair));
|
||||
_n_bytes_in_use -= size;
|
||||
_tree->Insert({offset, size});
|
||||
_n_blocks--;
|
||||
VALIDATE();
|
||||
|
||||
_trace_free(offset);
|
||||
}
|
||||
|
||||
uint64_t block_allocator::block_size(uint64_t offset) {
|
||||
int64_t bn = find_block(offset);
|
||||
assert(bn >=0); // we require that there is a block with that offset.
|
||||
return _blocks_array[bn].size;
|
||||
uint64_t BlockAllocator::AllocatedLimit() const {
|
||||
MhsRbTree::Node *max_node = _tree->MaxNode();
|
||||
return rbn_offset(max_node).ToInt();
|
||||
}
|
||||
|
||||
uint64_t block_allocator::allocated_limit() const {
|
||||
if (_n_blocks == 0) {
|
||||
return _reserve_at_beginning;
|
||||
} else {
|
||||
struct blockpair *last = &_blocks_array[_n_blocks - 1];
|
||||
return last->offset + last->size;
|
||||
}
|
||||
}
|
||||
|
||||
// Effect: Consider the blocks in sorted order. The reserved block at the beginning is number 0. The next one is number 1 and so forth.
|
||||
// Effect: Consider the blocks in sorted order. The reserved block at the
|
||||
// beginning is number 0. The next one is number 1 and so forth.
|
||||
// Return the offset and size of the block with that number.
|
||||
// Return 0 if there is a block that big, return nonzero if b is too big.
|
||||
int block_allocator::get_nth_block_in_layout_order(uint64_t b, uint64_t *offset, uint64_t *size) {
|
||||
if (b ==0 ) {
|
||||
int BlockAllocator::NthBlockInLayoutOrder(uint64_t b,
|
||||
uint64_t *offset,
|
||||
uint64_t *size) {
|
||||
MhsRbTree::Node *x, *y;
|
||||
if (b == 0) {
|
||||
*offset = 0;
|
||||
*size = _reserve_at_beginning;
|
||||
return 0;
|
||||
return 0;
|
||||
} else if (b > _n_blocks) {
|
||||
return -1;
|
||||
} else {
|
||||
*offset =_blocks_array[b - 1].offset;
|
||||
*size =_blocks_array[b - 1].size;
|
||||
x = _tree->MinNode();
|
||||
for (uint64_t i = 1; i <= b; i++) {
|
||||
y = x;
|
||||
x = _tree->Successor(x);
|
||||
}
|
||||
*size = (rbn_offset(x) - (rbn_offset(y) + rbn_size(y))).ToInt();
|
||||
*offset = (rbn_offset(y) + rbn_size(y)).ToInt();
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
struct VisUnusedExtra {
|
||||
TOKU_DB_FRAGMENTATION _report;
|
||||
uint64_t _align;
|
||||
};
|
||||
|
||||
static void VisUnusedCollector(void *extra,
|
||||
MhsRbTree::Node *node,
|
||||
uint64_t UU(depth)) {
|
||||
struct VisUnusedExtra *v_e = (struct VisUnusedExtra *)extra;
|
||||
TOKU_DB_FRAGMENTATION report = v_e->_report;
|
||||
uint64_t alignm = v_e->_align;
|
||||
|
||||
MhsRbTree::OUUInt64 offset = rbn_offset(node);
|
||||
MhsRbTree::OUUInt64 size = rbn_size(node);
|
||||
MhsRbTree::OUUInt64 answer_offset(Align(offset.ToInt(), alignm));
|
||||
uint64_t free_space = (offset + size - answer_offset).ToInt();
|
||||
if (free_space > 0) {
|
||||
report->unused_bytes += free_space;
|
||||
report->unused_blocks++;
|
||||
if (free_space > report->largest_unused_block) {
|
||||
report->largest_unused_block = free_space;
|
||||
}
|
||||
}
|
||||
}
|
||||
// Requires: report->file_size_bytes is filled in
|
||||
// Requires: report->data_bytes is filled in
|
||||
// Requires: report->checkpoint_bytes_additional is filled in
|
||||
void block_allocator::get_unused_statistics(TOKU_DB_FRAGMENTATION report) {
|
||||
assert(_n_bytes_in_use == report->data_bytes + report->checkpoint_bytes_additional);
|
||||
void BlockAllocator::UnusedStatistics(TOKU_DB_FRAGMENTATION report) {
|
||||
invariant(_n_bytes_in_use ==
|
||||
report->data_bytes + report->checkpoint_bytes_additional);
|
||||
|
||||
report->unused_bytes = 0;
|
||||
report->unused_blocks = 0;
|
||||
report->largest_unused_block = 0;
|
||||
if (_n_blocks > 0) {
|
||||
//Deal with space before block 0 and after reserve:
|
||||
{
|
||||
struct blockpair *bp = &_blocks_array[0];
|
||||
assert(bp->offset >= align(_reserve_at_beginning, _alignment));
|
||||
uint64_t free_space = bp->offset - align(_reserve_at_beginning, _alignment);
|
||||
if (free_space > 0) {
|
||||
report->unused_bytes += free_space;
|
||||
report->unused_blocks++;
|
||||
if (free_space > report->largest_unused_block) {
|
||||
report->largest_unused_block = free_space;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//Deal with space between blocks:
|
||||
for (uint64_t blocknum = 0; blocknum +1 < _n_blocks; blocknum ++) {
|
||||
// Consider the space after blocknum
|
||||
struct blockpair *bp = &_blocks_array[blocknum];
|
||||
uint64_t this_offset = bp[0].offset;
|
||||
uint64_t this_size = bp[0].size;
|
||||
uint64_t end_of_this_block = align(this_offset+this_size, _alignment);
|
||||
uint64_t next_offset = bp[1].offset;
|
||||
uint64_t free_space = next_offset - end_of_this_block;
|
||||
if (free_space > 0) {
|
||||
report->unused_bytes += free_space;
|
||||
report->unused_blocks++;
|
||||
if (free_space > report->largest_unused_block) {
|
||||
report->largest_unused_block = free_space;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//Deal with space after last block
|
||||
{
|
||||
struct blockpair *bp = &_blocks_array[_n_blocks-1];
|
||||
uint64_t this_offset = bp[0].offset;
|
||||
uint64_t this_size = bp[0].size;
|
||||
uint64_t end_of_this_block = align(this_offset+this_size, _alignment);
|
||||
if (end_of_this_block < report->file_size_bytes) {
|
||||
uint64_t free_space = report->file_size_bytes - end_of_this_block;
|
||||
assert(free_space > 0);
|
||||
report->unused_bytes += free_space;
|
||||
report->unused_blocks++;
|
||||
if (free_space > report->largest_unused_block) {
|
||||
report->largest_unused_block = free_space;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// No blocks. Just the reserve.
|
||||
uint64_t end_of_this_block = align(_reserve_at_beginning, _alignment);
|
||||
if (end_of_this_block < report->file_size_bytes) {
|
||||
uint64_t free_space = report->file_size_bytes - end_of_this_block;
|
||||
assert(free_space > 0);
|
||||
report->unused_bytes += free_space;
|
||||
report->unused_blocks++;
|
||||
if (free_space > report->largest_unused_block) {
|
||||
report->largest_unused_block = free_space;
|
||||
}
|
||||
}
|
||||
}
|
||||
struct VisUnusedExtra extra = {report, _alignment};
|
||||
_tree->InOrderVisitor(VisUnusedCollector, &extra);
|
||||
}
|
||||
|
||||
void block_allocator::get_statistics(TOKU_DB_FRAGMENTATION report) {
|
||||
report->data_bytes = _n_bytes_in_use;
|
||||
report->data_blocks = _n_blocks;
|
||||
void BlockAllocator::Statistics(TOKU_DB_FRAGMENTATION report) {
|
||||
report->data_bytes = _n_bytes_in_use;
|
||||
report->data_blocks = _n_blocks;
|
||||
report->file_size_bytes = 0;
|
||||
report->checkpoint_bytes_additional = 0;
|
||||
get_unused_statistics(report);
|
||||
UnusedStatistics(report);
|
||||
}
|
||||
|
||||
void block_allocator::validate() const {
|
||||
uint64_t n_bytes_in_use = _reserve_at_beginning;
|
||||
for (uint64_t i = 0; i < _n_blocks; i++) {
|
||||
n_bytes_in_use += _blocks_array[i].size;
|
||||
if (i > 0) {
|
||||
assert(_blocks_array[i].offset > _blocks_array[i - 1].offset);
|
||||
assert(_blocks_array[i].offset >= _blocks_array[i - 1].offset + _blocks_array[i - 1].size );
|
||||
}
|
||||
struct ValidateExtra {
|
||||
uint64_t _bytes;
|
||||
MhsRbTree::Node *_pre_node;
|
||||
};
|
||||
static void VisUsedBlocksInOrder(void *extra,
|
||||
MhsRbTree::Node *cur_node,
|
||||
uint64_t UU(depth)) {
|
||||
struct ValidateExtra *v_e = (struct ValidateExtra *)extra;
|
||||
MhsRbTree::Node *pre_node = v_e->_pre_node;
|
||||
// verify no overlaps
|
||||
if (pre_node) {
|
||||
invariant(rbn_size(pre_node) > 0);
|
||||
invariant(rbn_offset(cur_node) >
|
||||
rbn_offset(pre_node) + rbn_size(pre_node));
|
||||
MhsRbTree::OUUInt64 used_space =
|
||||
rbn_offset(cur_node) - (rbn_offset(pre_node) + rbn_size(pre_node));
|
||||
v_e->_bytes += used_space.ToInt();
|
||||
} else {
|
||||
v_e->_bytes += rbn_offset(cur_node).ToInt();
|
||||
}
|
||||
assert(n_bytes_in_use == _n_bytes_in_use);
|
||||
v_e->_pre_node = cur_node;
|
||||
}
|
||||
|
||||
// Tracing
|
||||
|
||||
void block_allocator::_trace_create(void) {
|
||||
if (ba_trace_file != nullptr) {
|
||||
toku_mutex_lock(&_trace_lock);
|
||||
fprintf(ba_trace_file, "ba_trace_create %p %" PRIu64 " %" PRIu64 "\n",
|
||||
this, _reserve_at_beginning, _alignment);
|
||||
toku_mutex_unlock(&_trace_lock);
|
||||
|
||||
fflush(ba_trace_file);
|
||||
}
|
||||
}
|
||||
|
||||
void block_allocator::_trace_create_from_blockpairs(void) {
|
||||
if (ba_trace_file != nullptr) {
|
||||
toku_mutex_lock(&_trace_lock);
|
||||
fprintf(ba_trace_file, "ba_trace_create_from_blockpairs %p %" PRIu64 " %" PRIu64 " ",
|
||||
this, _reserve_at_beginning, _alignment);
|
||||
for (uint64_t i = 0; i < _n_blocks; i++) {
|
||||
fprintf(ba_trace_file, "[%" PRIu64 " %" PRIu64 "] ",
|
||||
_blocks_array[i].offset, _blocks_array[i].size);
|
||||
}
|
||||
fprintf(ba_trace_file, "\n");
|
||||
toku_mutex_unlock(&_trace_lock);
|
||||
|
||||
fflush(ba_trace_file);
|
||||
}
|
||||
}
|
||||
|
||||
void block_allocator::_trace_destroy(void) {
|
||||
if (ba_trace_file != nullptr) {
|
||||
toku_mutex_lock(&_trace_lock);
|
||||
fprintf(ba_trace_file, "ba_trace_destroy %p\n", this);
|
||||
toku_mutex_unlock(&_trace_lock);
|
||||
|
||||
fflush(ba_trace_file);
|
||||
}
|
||||
}
|
||||
|
||||
void block_allocator::_trace_alloc(uint64_t size, uint64_t heat, uint64_t offset) {
|
||||
if (ba_trace_file != nullptr) {
|
||||
toku_mutex_lock(&_trace_lock);
|
||||
fprintf(ba_trace_file, "ba_trace_alloc %p %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
|
||||
this, size, heat, offset);
|
||||
toku_mutex_unlock(&_trace_lock);
|
||||
|
||||
fflush(ba_trace_file);
|
||||
}
|
||||
}
|
||||
|
||||
void block_allocator::_trace_free(uint64_t offset) {
|
||||
if (ba_trace_file != nullptr) {
|
||||
toku_mutex_lock(&_trace_lock);
|
||||
fprintf(ba_trace_file, "ba_trace_free %p %" PRIu64 "\n", this, offset);
|
||||
toku_mutex_unlock(&_trace_lock);
|
||||
|
||||
fflush(ba_trace_file);
|
||||
}
|
||||
void BlockAllocator::Validate() const {
|
||||
_tree->ValidateBalance();
|
||||
_tree->ValidateMhs();
|
||||
struct ValidateExtra extra = {0, nullptr};
|
||||
_tree->InOrderVisitor(VisUsedBlocksInOrder, &extra);
|
||||
invariant(extra._bytes == _n_bytes_in_use);
|
||||
}
|
||||
|
|
|
@ -43,6 +43,7 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
|||
#include "portability/toku_pthread.h"
|
||||
#include "portability/toku_stdint.h"
|
||||
#include "portability/toku_stdlib.h"
|
||||
#include "ft/serialize/rbtree_mhs.h"
|
||||
|
||||
// Block allocator.
|
||||
//
|
||||
|
@ -51,151 +52,128 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
|||
// The allocation of block numbers is handled elsewhere.
|
||||
//
|
||||
// When creating a block allocator we also specify a certain-sized
|
||||
// block at the beginning that is preallocated (and cannot be allocated or freed)
|
||||
// block at the beginning that is preallocated (and cannot be allocated or
|
||||
// freed)
|
||||
//
|
||||
// We can allocate blocks of a particular size at a particular location.
|
||||
// We can allocate blocks of a particular size at a location chosen by the allocator.
|
||||
// We can free blocks.
|
||||
// We can determine the size of a block.
|
||||
|
||||
class block_allocator {
|
||||
public:
|
||||
#define MAX_BYTE 0xffffffffffffffff
|
||||
class BlockAllocator {
|
||||
public:
|
||||
static const size_t BLOCK_ALLOCATOR_ALIGNMENT = 4096;
|
||||
|
||||
// How much must be reserved at the beginning for the block?
|
||||
// The actual header is 8+4+4+8+8_4+8+ the length of the db names + 1 pointer for each root.
|
||||
// The actual header is 8+4+4+8+8_4+8+ the length of the db names + 1
|
||||
// pointer for each root.
|
||||
// So 4096 should be enough.
|
||||
static const size_t BLOCK_ALLOCATOR_HEADER_RESERVE = 4096;
|
||||
|
||||
static_assert(BLOCK_ALLOCATOR_HEADER_RESERVE % BLOCK_ALLOCATOR_ALIGNMENT == 0,
|
||||
|
||||
static_assert(BLOCK_ALLOCATOR_HEADER_RESERVE % BLOCK_ALLOCATOR_ALIGNMENT ==
|
||||
0,
|
||||
"block allocator header must have proper alignment");
|
||||
|
||||
static const size_t BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE = BLOCK_ALLOCATOR_HEADER_RESERVE * 2;
|
||||
static const size_t BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE =
|
||||
BLOCK_ALLOCATOR_HEADER_RESERVE * 2;
|
||||
|
||||
enum allocation_strategy {
|
||||
BA_STRATEGY_FIRST_FIT = 1,
|
||||
BA_STRATEGY_BEST_FIT,
|
||||
BA_STRATEGY_PADDED_FIT,
|
||||
BA_STRATEGY_HEAT_ZONE
|
||||
struct BlockPair {
|
||||
uint64_t _offset;
|
||||
uint64_t _size;
|
||||
BlockPair(uint64_t o, uint64_t s) : _offset(o), _size(s) {}
|
||||
int operator<(const struct BlockPair &rhs) const {
|
||||
return _offset < rhs._offset;
|
||||
}
|
||||
int operator<(const uint64_t &o) const { return _offset < o; }
|
||||
};
|
||||
|
||||
struct blockpair {
|
||||
uint64_t offset;
|
||||
uint64_t size;
|
||||
blockpair(uint64_t o, uint64_t s) :
|
||||
offset(o), size(s) {
|
||||
}
|
||||
int operator<(const struct blockpair &rhs) const {
|
||||
return offset < rhs.offset;
|
||||
}
|
||||
int operator<(const uint64_t &o) const {
|
||||
return offset < o;
|
||||
}
|
||||
};
|
||||
|
||||
// Effect: Create a block allocator, in which the first RESERVE_AT_BEGINNING bytes are not put into a block.
|
||||
// The default allocation strategy is first fit (BA_STRATEGY_FIRST_FIT)
|
||||
// Effect: Create a block allocator, in which the first RESERVE_AT_BEGINNING
|
||||
// bytes are not put into a block.
|
||||
// The default allocation strategy is first fit
|
||||
// (BA_STRATEGY_FIRST_FIT)
|
||||
// All blocks be start on a multiple of ALIGNMENT.
|
||||
// Aborts if we run out of memory.
|
||||
// Parameters
|
||||
// reserve_at_beginning (IN) Size of reserved block at beginning. This size does not have to be aligned.
|
||||
// reserve_at_beginning (IN) Size of reserved block at beginning.
|
||||
// This size does not have to be aligned.
|
||||
// alignment (IN) Block alignment.
|
||||
void create(uint64_t reserve_at_beginning, uint64_t alignment);
|
||||
void Create(uint64_t reserve_at_beginning, uint64_t alignment);
|
||||
|
||||
// Effect: Create a block allocator, in which the first RESERVE_AT_BEGINNING bytes are not put into a block.
|
||||
// The default allocation strategy is first fit (BA_STRATEGY_FIRST_FIT)
|
||||
// The allocator is initialized to contain `n_blocks' of blockpairs, taken from `pairs'
|
||||
// Effect: Create a block allocator, in which the first RESERVE_AT_BEGINNING
|
||||
// bytes are not put into a block.
|
||||
// The allocator is initialized to contain `n_blocks' of BlockPairs,
|
||||
// taken from `pairs'
|
||||
// All blocks be start on a multiple of ALIGNMENT.
|
||||
// Aborts if we run out of memory.
|
||||
// Parameters
|
||||
// pairs, unowned array of pairs to copy
|
||||
// n_blocks, Size of pairs array
|
||||
// reserve_at_beginning (IN) Size of reserved block at beginning. This size does not have to be aligned.
|
||||
// reserve_at_beginning (IN) Size of reserved block at beginning.
|
||||
// This size does not have to be aligned.
|
||||
// alignment (IN) Block alignment.
|
||||
void create_from_blockpairs(uint64_t reserve_at_beginning, uint64_t alignment,
|
||||
struct blockpair *pairs, uint64_t n_blocks);
|
||||
void CreateFromBlockPairs(uint64_t reserve_at_beginning,
|
||||
uint64_t alignment,
|
||||
struct BlockPair *pairs,
|
||||
uint64_t n_blocks);
|
||||
|
||||
// Effect: Destroy this block allocator
|
||||
void destroy();
|
||||
void Destroy();
|
||||
|
||||
// Effect: Set the allocation strategy that the allocator should use
|
||||
// Requires: No other threads are operating on this block allocator
|
||||
void set_strategy(enum allocation_strategy strategy);
|
||||
|
||||
// Effect: Allocate a block of the specified size at an address chosen by the allocator.
|
||||
// Effect: Allocate a block of the specified size at an address chosen by
|
||||
// the allocator.
|
||||
// Aborts if anything goes wrong.
|
||||
// The block address will be a multiple of the alignment.
|
||||
// Parameters:
|
||||
// size (IN): The size of the block. (The size does not have to be aligned.)
|
||||
// size (IN): The size of the block. (The size does not have to be
|
||||
// aligned.)
|
||||
// offset (OUT): The location of the block.
|
||||
// heat (IN): A higher heat means we should be prepared to free this block soon (perhaps in the next checkpoint)
|
||||
// Heat values are lexiographically ordered (like integers), but their specific values are arbitrary
|
||||
void alloc_block(uint64_t size, uint64_t heat, uint64_t *offset);
|
||||
// block soon (perhaps in the next checkpoint)
|
||||
// Heat values are lexiographically ordered (like integers),
|
||||
// but their specific values are arbitrary
|
||||
void AllocBlock(uint64_t size, uint64_t *offset);
|
||||
|
||||
// Effect: Free the block at offset.
|
||||
// Requires: There must be a block currently allocated at that offset.
|
||||
// Parameters:
|
||||
// offset (IN): The offset of the block.
|
||||
void free_block(uint64_t offset);
|
||||
void FreeBlock(uint64_t offset, uint64_t size);
|
||||
|
||||
// Effect: Return the size of the block that starts at offset.
|
||||
// Requires: There must be a block currently allocated at that offset.
|
||||
// Parameters:
|
||||
// offset (IN): The offset of the block.
|
||||
uint64_t block_size(uint64_t offset);
|
||||
|
||||
// Effect: Check to see if the block allocator is OK. This may take a long time.
|
||||
// Effect: Check to see if the block allocator is OK. This may take a long
|
||||
// time.
|
||||
// Usage Hints: Probably only use this for unit tests.
|
||||
// TODO: Private?
|
||||
void validate() const;
|
||||
void Validate() const;
|
||||
|
||||
// Effect: Return the unallocated block address of "infinite" size.
|
||||
// That is, return the smallest address that is above all the allocated blocks.
|
||||
uint64_t allocated_limit() const;
|
||||
// That is, return the smallest address that is above all the allocated
|
||||
// blocks.
|
||||
uint64_t AllocatedLimit() const;
|
||||
|
||||
// Effect: Consider the blocks in sorted order. The reserved block at the beginning is number 0. The next one is number 1 and so forth.
|
||||
// Effect: Consider the blocks in sorted order. The reserved block at the
|
||||
// beginning is number 0. The next one is number 1 and so forth.
|
||||
// Return the offset and size of the block with that number.
|
||||
// Return 0 if there is a block that big, return nonzero if b is too big.
|
||||
// Rationale: This is probably useful only for tests.
|
||||
int get_nth_block_in_layout_order(uint64_t b, uint64_t *offset, uint64_t *size);
|
||||
int NthBlockInLayoutOrder(uint64_t b, uint64_t *offset, uint64_t *size);
|
||||
|
||||
// Effect: Fill in report to indicate how the file is used.
|
||||
// Requires:
|
||||
// Requires:
|
||||
// report->file_size_bytes is filled in
|
||||
// report->data_bytes is filled in
|
||||
// report->checkpoint_bytes_additional is filled in
|
||||
void get_unused_statistics(TOKU_DB_FRAGMENTATION report);
|
||||
void UnusedStatistics(TOKU_DB_FRAGMENTATION report);
|
||||
|
||||
// Effect: Fill in report->data_bytes with the number of bytes in use
|
||||
// Fill in report->data_blocks with the number of blockpairs in use
|
||||
// Fill in report->data_blocks with the number of BlockPairs in use
|
||||
// Fill in unused statistics using this->get_unused_statistics()
|
||||
// Requires:
|
||||
// report->file_size is ignored on return
|
||||
// report->checkpoint_bytes_additional is ignored on return
|
||||
void get_statistics(TOKU_DB_FRAGMENTATION report);
|
||||
void Statistics(TOKU_DB_FRAGMENTATION report);
|
||||
|
||||
// Block allocator tracing.
|
||||
// - Enabled by setting TOKU_BA_TRACE_PATH to the file that the trace file
|
||||
// should be written to.
|
||||
// - Trace may be replayed by ba_trace_replay tool in tools/ directory
|
||||
// eg: "cat mytracefile | ba_trace_replay"
|
||||
static void maybe_initialize_trace();
|
||||
static void maybe_close_trace();
|
||||
virtual ~BlockAllocator(){};
|
||||
|
||||
private:
|
||||
void _create_internal(uint64_t reserve_at_beginning, uint64_t alignment);
|
||||
void grow_blocks_array_by(uint64_t n_to_add);
|
||||
void grow_blocks_array();
|
||||
int64_t find_block(uint64_t offset);
|
||||
struct blockpair *choose_block_to_alloc_after(size_t size, uint64_t heat);
|
||||
|
||||
// Tracing
|
||||
toku_mutex_t _trace_lock;
|
||||
void _trace_create(void);
|
||||
void _trace_create_from_blockpairs(void);
|
||||
void _trace_destroy(void);
|
||||
void _trace_alloc(uint64_t size, uint64_t heat, uint64_t offset);
|
||||
void _trace_free(uint64_t offset);
|
||||
private:
|
||||
void CreateInternal(uint64_t reserve_at_beginning, uint64_t alignment);
|
||||
|
||||
// How much to reserve at the beginning
|
||||
uint64_t _reserve_at_beginning;
|
||||
|
@ -203,12 +181,8 @@ private:
|
|||
uint64_t _alignment;
|
||||
// How many blocks
|
||||
uint64_t _n_blocks;
|
||||
// How big is the blocks_array. Must be >= n_blocks.
|
||||
uint64_t _blocks_array_size;
|
||||
// These blocks are sorted by address.
|
||||
struct blockpair *_blocks_array;
|
||||
// Including the reserve_at_beginning
|
||||
uint64_t _n_bytes_in_use;
|
||||
// The allocation strategy are we using
|
||||
enum allocation_strategy _strategy;
|
||||
|
||||
// These blocks are sorted by address.
|
||||
MhsRbTree::Tree *_tree;
|
||||
};
|
||||
|
|
|
@ -1,224 +0,0 @@
|
|||
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||
#ident "$Id$"
|
||||
/*======
|
||||
This file is part of PerconaFT.
|
||||
|
||||
|
||||
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||
|
||||
PerconaFT is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License, version 2,
|
||||
as published by the Free Software Foundation.
|
||||
|
||||
PerconaFT is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
----------------------------------------
|
||||
|
||||
PerconaFT is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License, version 3,
|
||||
as published by the Free Software Foundation.
|
||||
|
||||
PerconaFT is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||
======= */
|
||||
|
||||
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#include "portability/toku_assert.h"
|
||||
|
||||
#include "ft/serialize/block_allocator_strategy.h"
|
||||
|
||||
static uint64_t _align(uint64_t value, uint64_t ba_alignment) {
|
||||
return ((value + ba_alignment - 1) / ba_alignment) * ba_alignment;
|
||||
}
|
||||
|
||||
static uint64_t _roundup_to_power_of_two(uint64_t value) {
|
||||
uint64_t r = 4096;
|
||||
while (r < value) {
|
||||
r *= 2;
|
||||
invariant(r > 0);
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
// First fit block allocation
|
||||
static struct block_allocator::blockpair *
|
||||
_first_fit(struct block_allocator::blockpair *blocks_array,
|
||||
uint64_t n_blocks, uint64_t size, uint64_t alignment,
|
||||
uint64_t max_padding) {
|
||||
if (n_blocks == 1) {
|
||||
// won't enter loop, can't underflow the direction < 0 case
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
struct block_allocator::blockpair *bp = &blocks_array[0];
|
||||
for (uint64_t n_spaces_to_check = n_blocks - 1; n_spaces_to_check > 0;
|
||||
n_spaces_to_check--, bp++) {
|
||||
// Consider the space after bp
|
||||
uint64_t padded_alignment = max_padding != 0 ? _align(max_padding, alignment) : alignment;
|
||||
uint64_t possible_offset = _align(bp->offset + bp->size, padded_alignment);
|
||||
if (possible_offset + size <= bp[1].offset) { // bp[1] is always valid since bp < &blocks_array[n_blocks-1]
|
||||
invariant(bp - blocks_array < (int64_t) n_blocks);
|
||||
return bp;
|
||||
}
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
static struct block_allocator::blockpair *
|
||||
_first_fit_bw(struct block_allocator::blockpair *blocks_array,
|
||||
uint64_t n_blocks, uint64_t size, uint64_t alignment,
|
||||
uint64_t max_padding, struct block_allocator::blockpair *blocks_array_limit) {
|
||||
if (n_blocks == 1) {
|
||||
// won't enter loop, can't underflow the direction < 0 case
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
struct block_allocator::blockpair *bp = &blocks_array[-1];
|
||||
for (uint64_t n_spaces_to_check = n_blocks - 1; n_spaces_to_check > 0;
|
||||
n_spaces_to_check--, bp--) {
|
||||
// Consider the space after bp
|
||||
uint64_t padded_alignment = max_padding != 0 ? _align(max_padding, alignment) : alignment;
|
||||
uint64_t possible_offset = _align(bp->offset + bp->size, padded_alignment);
|
||||
if (&bp[1] < blocks_array_limit && possible_offset + size <= bp[1].offset) {
|
||||
invariant(blocks_array - bp < (int64_t) n_blocks);
|
||||
return bp;
|
||||
}
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
struct block_allocator::blockpair *
|
||||
block_allocator_strategy::first_fit(struct block_allocator::blockpair *blocks_array,
|
||||
uint64_t n_blocks, uint64_t size, uint64_t alignment) {
|
||||
return _first_fit(blocks_array, n_blocks, size, alignment, 0);
|
||||
}
|
||||
|
||||
// Best fit block allocation
|
||||
struct block_allocator::blockpair *
|
||||
block_allocator_strategy::best_fit(struct block_allocator::blockpair *blocks_array,
|
||||
uint64_t n_blocks, uint64_t size, uint64_t alignment) {
|
||||
struct block_allocator::blockpair *best_bp = nullptr;
|
||||
uint64_t best_hole_size = 0;
|
||||
for (uint64_t blocknum = 0; blocknum + 1 < n_blocks; blocknum++) {
|
||||
// Consider the space after blocknum
|
||||
struct block_allocator::blockpair *bp = &blocks_array[blocknum];
|
||||
uint64_t possible_offset = _align(bp->offset + bp->size, alignment);
|
||||
uint64_t possible_end_offset = possible_offset + size;
|
||||
if (possible_end_offset <= bp[1].offset) {
|
||||
// It fits here. Is it the best fit?
|
||||
uint64_t hole_size = bp[1].offset - possible_end_offset;
|
||||
if (best_bp == nullptr || hole_size < best_hole_size) {
|
||||
best_hole_size = hole_size;
|
||||
best_bp = bp;
|
||||
}
|
||||
}
|
||||
}
|
||||
return best_bp;
|
||||
}
|
||||
|
||||
static uint64_t padded_fit_alignment = 4096;
|
||||
|
||||
// TODO: These compiler specific directives should be abstracted in a portability header
|
||||
// portability/toku_compiler.h?
|
||||
__attribute__((__constructor__))
|
||||
static void determine_padded_fit_alignment_from_env(void) {
|
||||
// TODO: Should be in portability as 'toku_os_getenv()?'
|
||||
const char *s = getenv("TOKU_BA_PADDED_FIT_ALIGNMENT");
|
||||
if (s != nullptr && strlen(s) > 0) {
|
||||
const int64_t alignment = strtoll(s, nullptr, 10);
|
||||
if (alignment <= 0) {
|
||||
fprintf(stderr, "tokuft: error: block allocator padded fit alignment found in environment (%s), "
|
||||
"but it's out of range (should be an integer > 0). defaulting to %" PRIu64 "\n",
|
||||
s, padded_fit_alignment);
|
||||
} else {
|
||||
padded_fit_alignment = _roundup_to_power_of_two(alignment);
|
||||
fprintf(stderr, "tokuft: setting block allocator padded fit alignment to %" PRIu64 "\n",
|
||||
padded_fit_alignment);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// First fit into a block that is oversized by up to max_padding.
|
||||
// The hope is that if we purposefully waste a bit of space at allocation
|
||||
// time we'll be more likely to reuse this block later.
|
||||
struct block_allocator::blockpair *
|
||||
block_allocator_strategy::padded_fit(struct block_allocator::blockpair *blocks_array,
|
||||
uint64_t n_blocks, uint64_t size, uint64_t alignment) {
|
||||
return _first_fit(blocks_array, n_blocks, size, alignment, padded_fit_alignment);
|
||||
}
|
||||
|
||||
static double hot_zone_threshold = 0.85;
|
||||
|
||||
// TODO: These compiler specific directives should be abstracted in a portability header
|
||||
// portability/toku_compiler.h?
|
||||
__attribute__((__constructor__))
|
||||
static void determine_hot_zone_threshold_from_env(void) {
|
||||
// TODO: Should be in portability as 'toku_os_getenv()?'
|
||||
const char *s = getenv("TOKU_BA_HOT_ZONE_THRESHOLD");
|
||||
if (s != nullptr && strlen(s) > 0) {
|
||||
const double hot_zone = strtod(s, nullptr);
|
||||
if (hot_zone < 1 || hot_zone > 99) {
|
||||
fprintf(stderr, "tokuft: error: block allocator hot zone threshold found in environment (%s), "
|
||||
"but it's out of range (should be an integer 1 through 99). defaulting to 85\n", s);
|
||||
hot_zone_threshold = 85 / 100;
|
||||
} else {
|
||||
fprintf(stderr, "tokuft: setting block allocator hot zone threshold to %s\n", s);
|
||||
hot_zone_threshold = hot_zone / 100;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct block_allocator::blockpair *
|
||||
block_allocator_strategy::heat_zone(struct block_allocator::blockpair *blocks_array,
|
||||
uint64_t n_blocks, uint64_t size, uint64_t alignment,
|
||||
uint64_t heat) {
|
||||
if (heat > 0) {
|
||||
struct block_allocator::blockpair *bp, *boundary_bp;
|
||||
|
||||
// Hot allocation. Find the beginning of the hot zone.
|
||||
boundary_bp = &blocks_array[n_blocks - 1];
|
||||
uint64_t highest_offset = _align(boundary_bp->offset + boundary_bp->size, alignment);
|
||||
uint64_t hot_zone_offset = static_cast<uint64_t>(hot_zone_threshold * highest_offset);
|
||||
|
||||
boundary_bp = std::lower_bound(blocks_array, blocks_array + n_blocks, hot_zone_offset);
|
||||
uint64_t blocks_in_zone = (blocks_array + n_blocks) - boundary_bp;
|
||||
uint64_t blocks_outside_zone = boundary_bp - blocks_array;
|
||||
invariant(blocks_in_zone + blocks_outside_zone == n_blocks);
|
||||
|
||||
if (blocks_in_zone > 0) {
|
||||
// Find the first fit in the hot zone, going forward.
|
||||
bp = _first_fit(boundary_bp, blocks_in_zone, size, alignment, 0);
|
||||
if (bp != nullptr) {
|
||||
return bp;
|
||||
}
|
||||
}
|
||||
if (blocks_outside_zone > 0) {
|
||||
// Find the first fit in the cold zone, going backwards.
|
||||
bp = _first_fit_bw(boundary_bp, blocks_outside_zone, size, alignment, 0, &blocks_array[n_blocks]);
|
||||
if (bp != nullptr) {
|
||||
return bp;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Cold allocations are simply first-fit from the beginning.
|
||||
return _first_fit(blocks_array, n_blocks, size, alignment, 0);
|
||||
}
|
||||
return nullptr;
|
||||
}
|
File diff suppressed because it is too large
Load diff
|
@ -62,13 +62,16 @@ enum {
|
|||
RESERVED_BLOCKNUMS
|
||||
};
|
||||
|
||||
typedef int (*BLOCKTABLE_CALLBACK)(BLOCKNUM b, int64_t size, int64_t address, void *extra);
|
||||
typedef int (*BLOCKTABLE_CALLBACK)(BLOCKNUM b,
|
||||
int64_t size,
|
||||
int64_t address,
|
||||
void *extra);
|
||||
|
||||
static inline BLOCKNUM make_blocknum(int64_t b) {
|
||||
BLOCKNUM result = { .b = b };
|
||||
BLOCKNUM result = {.b = b};
|
||||
return result;
|
||||
}
|
||||
static const BLOCKNUM ROLLBACK_NONE = { .b = 0 };
|
||||
static const BLOCKNUM ROLLBACK_NONE = {.b = 0};
|
||||
|
||||
/**
|
||||
* There are three copies of the translation table (btt) in the block table:
|
||||
|
@ -80,18 +83,20 @@ static const BLOCKNUM ROLLBACK_NONE = { .b = 0 };
|
|||
*
|
||||
* inprogress Is only filled by copying from current,
|
||||
* and is the only version ever serialized to disk.
|
||||
* (It is serialized to disk on checkpoint and clean shutdown.)
|
||||
* (It is serialized to disk on checkpoint and clean
|
||||
*shutdown.)
|
||||
* At end of checkpoint it replaces 'checkpointed'.
|
||||
* During a checkpoint, any 'pending' dirty writes will update
|
||||
* inprogress.
|
||||
*
|
||||
* current Is initialized by copying from checkpointed,
|
||||
* is the only version ever modified while the database is in use,
|
||||
* is the only version ever modified while the database is in
|
||||
*use,
|
||||
* and is the only version ever copied to inprogress.
|
||||
* It is never stored on disk.
|
||||
*/
|
||||
class block_table {
|
||||
public:
|
||||
public:
|
||||
enum translation_type {
|
||||
TRANSLATION_NONE = 0,
|
||||
TRANSLATION_CURRENT,
|
||||
|
@ -102,7 +107,10 @@ public:
|
|||
|
||||
void create();
|
||||
|
||||
int create_from_buffer(int fd, DISKOFF location_on_disk, DISKOFF size_on_disk, unsigned char *translation_buffer);
|
||||
int create_from_buffer(int fd,
|
||||
DISKOFF location_on_disk,
|
||||
DISKOFF size_on_disk,
|
||||
unsigned char *translation_buffer);
|
||||
|
||||
void destroy();
|
||||
|
||||
|
@ -114,11 +122,21 @@ public:
|
|||
|
||||
// Blocknums
|
||||
void allocate_blocknum(BLOCKNUM *res, struct ft *ft);
|
||||
void realloc_on_disk(BLOCKNUM b, DISKOFF size, DISKOFF *offset, struct ft *ft, int fd, bool for_checkpoint, uint64_t heat);
|
||||
void realloc_on_disk(BLOCKNUM b,
|
||||
DISKOFF size,
|
||||
DISKOFF *offset,
|
||||
struct ft *ft,
|
||||
int fd,
|
||||
bool for_checkpoint);
|
||||
void free_blocknum(BLOCKNUM *b, struct ft *ft, bool for_checkpoint);
|
||||
void translate_blocknum_to_offset_size(BLOCKNUM b, DISKOFF *offset, DISKOFF *size);
|
||||
void translate_blocknum_to_offset_size(BLOCKNUM b,
|
||||
DISKOFF *offset,
|
||||
DISKOFF *size);
|
||||
void free_unused_blocknums(BLOCKNUM root);
|
||||
void realloc_descriptor_on_disk(DISKOFF size, DISKOFF *offset, struct ft *ft, int fd);
|
||||
void realloc_descriptor_on_disk(DISKOFF size,
|
||||
DISKOFF *offset,
|
||||
struct ft *ft,
|
||||
int fd);
|
||||
void get_descriptor_offset_size(DISKOFF *offset, DISKOFF *size);
|
||||
|
||||
// External verfication
|
||||
|
@ -127,15 +145,22 @@ public:
|
|||
void verify_no_free_blocknums();
|
||||
|
||||
// Serialization
|
||||
void serialize_translation_to_wbuf(int fd, struct wbuf *w, int64_t *address, int64_t *size);
|
||||
void serialize_translation_to_wbuf(int fd,
|
||||
struct wbuf *w,
|
||||
int64_t *address,
|
||||
int64_t *size);
|
||||
|
||||
// DEBUG ONLY (ftdump included), tests included
|
||||
void blocknum_dump_translation(BLOCKNUM b);
|
||||
void dump_translation_table_pretty(FILE *f);
|
||||
void dump_translation_table(FILE *f);
|
||||
void block_free(uint64_t offset);
|
||||
void block_free(uint64_t offset, uint64_t size);
|
||||
|
||||
int iterate(enum translation_type type, BLOCKTABLE_CALLBACK f, void *extra, bool data_only, bool used_only);
|
||||
int iterate(enum translation_type type,
|
||||
BLOCKTABLE_CALLBACK f,
|
||||
void *extra,
|
||||
bool data_only,
|
||||
bool used_only);
|
||||
void internal_fragmentation(int64_t *total_sizep, int64_t *used_sizep);
|
||||
|
||||
// Requires: blocktable lock is held.
|
||||
|
@ -146,13 +171,16 @@ public:
|
|||
|
||||
void get_info64(struct ftinfo64 *);
|
||||
|
||||
int iterate_translation_tables(uint64_t, int (*)(uint64_t, int64_t, int64_t, int64_t, int64_t, void *), void *);
|
||||
int iterate_translation_tables(
|
||||
uint64_t,
|
||||
int (*)(uint64_t, int64_t, int64_t, int64_t, int64_t, void *),
|
||||
void *);
|
||||
|
||||
private:
|
||||
private:
|
||||
struct block_translation_pair {
|
||||
// If in the freelist, use next_free_blocknum, otherwise diskoff.
|
||||
union {
|
||||
DISKOFF diskoff;
|
||||
DISKOFF diskoff;
|
||||
BLOCKNUM next_free_blocknum;
|
||||
} u;
|
||||
|
||||
|
@ -173,7 +201,8 @@ private:
|
|||
struct translation {
|
||||
enum translation_type type;
|
||||
|
||||
// Number of elements in array (block_translation). always >= smallest_never_used_blocknum
|
||||
// Number of elements in array (block_translation). always >=
|
||||
// smallest_never_used_blocknum
|
||||
int64_t length_of_array;
|
||||
BLOCKNUM smallest_never_used_blocknum;
|
||||
|
||||
|
@ -181,20 +210,28 @@ private:
|
|||
BLOCKNUM blocknum_freelist_head;
|
||||
struct block_translation_pair *block_translation;
|
||||
|
||||
// size_on_disk is stored in block_translation[RESERVED_BLOCKNUM_TRANSLATION].size
|
||||
// location_on is stored in block_translation[RESERVED_BLOCKNUM_TRANSLATION].u.diskoff
|
||||
// size_on_disk is stored in
|
||||
// block_translation[RESERVED_BLOCKNUM_TRANSLATION].size
|
||||
// location_on is stored in
|
||||
// block_translation[RESERVED_BLOCKNUM_TRANSLATION].u.diskoff
|
||||
};
|
||||
|
||||
void _create_internal();
|
||||
int _translation_deserialize_from_buffer(struct translation *t, // destination into which to deserialize
|
||||
DISKOFF location_on_disk, // location of translation_buffer
|
||||
uint64_t size_on_disk,
|
||||
unsigned char * translation_buffer); // buffer with serialized translation
|
||||
int _translation_deserialize_from_buffer(
|
||||
struct translation *t, // destination into which to deserialize
|
||||
DISKOFF location_on_disk, // location of translation_buffer
|
||||
uint64_t size_on_disk,
|
||||
unsigned char *
|
||||
translation_buffer); // buffer with serialized translation
|
||||
|
||||
void _copy_translation(struct translation *dst, struct translation *src, enum translation_type newtype);
|
||||
void _copy_translation(struct translation *dst,
|
||||
struct translation *src,
|
||||
enum translation_type newtype);
|
||||
void _maybe_optimize_translation(struct translation *t);
|
||||
void _maybe_expand_translation(struct translation *t);
|
||||
bool _translation_prevents_freeing(struct translation *t, BLOCKNUM b, struct block_translation_pair *old_pair);
|
||||
bool _translation_prevents_freeing(struct translation *t,
|
||||
BLOCKNUM b,
|
||||
struct block_translation_pair *old_pair);
|
||||
void _free_blocknum_in_translation(struct translation *t, BLOCKNUM b);
|
||||
int64_t _calculate_size_on_disk(struct translation *t);
|
||||
bool _pair_is_unallocated(struct block_translation_pair *pair);
|
||||
|
@ -203,14 +240,26 @@ private:
|
|||
|
||||
// Blocknum management
|
||||
void _allocate_blocknum_unlocked(BLOCKNUM *res, struct ft *ft);
|
||||
void _free_blocknum_unlocked(BLOCKNUM *bp, struct ft *ft, bool for_checkpoint);
|
||||
void _realloc_descriptor_on_disk_unlocked(DISKOFF size, DISKOFF *offset, struct ft *ft);
|
||||
void _realloc_on_disk_internal(BLOCKNUM b, DISKOFF size, DISKOFF *offset, struct ft *ft, bool for_checkpoint, uint64_t heat);
|
||||
void _translate_blocknum_to_offset_size_unlocked(BLOCKNUM b, DISKOFF *offset, DISKOFF *size);
|
||||
void _free_blocknum_unlocked(BLOCKNUM *bp,
|
||||
struct ft *ft,
|
||||
bool for_checkpoint);
|
||||
void _realloc_descriptor_on_disk_unlocked(DISKOFF size,
|
||||
DISKOFF *offset,
|
||||
struct ft *ft);
|
||||
void _realloc_on_disk_internal(BLOCKNUM b,
|
||||
DISKOFF size,
|
||||
DISKOFF *offset,
|
||||
struct ft *ft,
|
||||
bool for_checkpoint);
|
||||
void _translate_blocknum_to_offset_size_unlocked(BLOCKNUM b,
|
||||
DISKOFF *offset,
|
||||
DISKOFF *size);
|
||||
|
||||
// File management
|
||||
void _maybe_truncate_file(int fd, uint64_t size_needed_before);
|
||||
void _ensure_safe_write_unlocked(int fd, DISKOFF block_size, DISKOFF block_offset);
|
||||
void _ensure_safe_write_unlocked(int fd,
|
||||
DISKOFF block_size,
|
||||
DISKOFF block_offset);
|
||||
|
||||
// Verification
|
||||
bool _is_valid_blocknum(struct translation *t, BLOCKNUM b);
|
||||
|
@ -220,29 +269,33 @@ private:
|
|||
bool _no_data_blocks_except_root(BLOCKNUM root);
|
||||
bool _blocknum_allocated(BLOCKNUM b);
|
||||
|
||||
// Locking
|
||||
// Locking
|
||||
//
|
||||
// TODO: Move the lock to the FT
|
||||
void _mutex_lock();
|
||||
void _mutex_unlock();
|
||||
|
||||
// The current translation is the one used by client threads.
|
||||
// The current translation is the one used by client threads.
|
||||
// It is not represented on disk.
|
||||
struct translation _current;
|
||||
|
||||
// The translation used by the checkpoint currently in progress.
|
||||
// If the checkpoint thread allocates a block, it must also update the current translation.
|
||||
// The translation used by the checkpoint currently in progress.
|
||||
// If the checkpoint thread allocates a block, it must also update the
|
||||
// current translation.
|
||||
struct translation _inprogress;
|
||||
|
||||
// The translation for the data that shall remain inviolate on disk until the next checkpoint finishes,
|
||||
// The translation for the data that shall remain inviolate on disk until
|
||||
// the next checkpoint finishes,
|
||||
// after which any blocks used only in this translation can be freed.
|
||||
struct translation _checkpointed;
|
||||
|
||||
// The in-memory data structure for block allocation.
|
||||
// The in-memory data structure for block allocation.
|
||||
// There is no on-disk data structure for block allocation.
|
||||
// Note: This is *allocation* not *translation* - the block allocator is unaware of which
|
||||
// blocks are used for which translation, but simply allocates and deallocates blocks.
|
||||
block_allocator _bt_block_allocator;
|
||||
// Note: This is *allocation* not *translation* - the block allocator is
|
||||
// unaware of which
|
||||
// blocks are used for which translation, but simply allocates and
|
||||
// deallocates blocks.
|
||||
BlockAllocator *_bt_block_allocator;
|
||||
toku_mutex_t _mutex;
|
||||
struct nb_mutex _safe_file_size_lock;
|
||||
bool _checkpoint_skipped;
|
||||
|
@ -257,16 +310,16 @@ private:
|
|||
|
||||
#include "ft/serialize/wbuf.h"
|
||||
|
||||
static inline void wbuf_BLOCKNUM (struct wbuf *w, BLOCKNUM b) {
|
||||
static inline void wbuf_BLOCKNUM(struct wbuf *w, BLOCKNUM b) {
|
||||
wbuf_ulonglong(w, b.b);
|
||||
}
|
||||
|
||||
static inline void wbuf_nocrc_BLOCKNUM (struct wbuf *w, BLOCKNUM b) {
|
||||
static inline void wbuf_nocrc_BLOCKNUM(struct wbuf *w, BLOCKNUM b) {
|
||||
wbuf_nocrc_ulonglong(w, b.b);
|
||||
}
|
||||
|
||||
static inline void wbuf_DISKOFF(struct wbuf *wb, DISKOFF off) {
|
||||
wbuf_ulonglong(wb, (uint64_t) off);
|
||||
wbuf_ulonglong(wb, (uint64_t)off);
|
||||
}
|
||||
|
||||
#include "ft/serialize/rbuf.h"
|
||||
|
@ -280,6 +333,8 @@ static inline BLOCKNUM rbuf_blocknum(struct rbuf *rb) {
|
|||
return result;
|
||||
}
|
||||
|
||||
static inline void rbuf_ma_BLOCKNUM(struct rbuf *rb, memarena *UU(ma), BLOCKNUM *blocknum) {
|
||||
static inline void rbuf_ma_BLOCKNUM(struct rbuf *rb,
|
||||
memarena *UU(ma),
|
||||
BLOCKNUM *blocknum) {
|
||||
*blocknum = rbuf_blocknum(rb);
|
||||
}
|
||||
|
|
|
@ -235,7 +235,7 @@ void toku_decompress (Bytef *dest, uLongf destLen,
|
|||
strm.zalloc = Z_NULL;
|
||||
strm.zfree = Z_NULL;
|
||||
strm.opaque = Z_NULL;
|
||||
char windowBits = source[1];
|
||||
int8_t windowBits = source[1];
|
||||
int r = inflateInit2(&strm, windowBits);
|
||||
lazy_assert(r == Z_OK);
|
||||
strm.next_out = dest;
|
||||
|
|
|
@ -217,8 +217,8 @@ int deserialize_ft_versioned(int fd, struct rbuf *rb, FT *ftp, uint32_t version)
|
|||
// translation table itself won't fit in main memory.
|
||||
ssize_t readsz = toku_os_pread(fd, tbuf, size_to_read,
|
||||
translation_address_on_disk);
|
||||
assert(readsz >= translation_size_on_disk);
|
||||
assert(readsz <= (ssize_t)size_to_read);
|
||||
invariant(readsz >= translation_size_on_disk);
|
||||
invariant(readsz <= (ssize_t)size_to_read);
|
||||
}
|
||||
// Create table and read in data.
|
||||
r = ft->blocktable.create_from_buffer(fd,
|
||||
|
@ -411,73 +411,90 @@ exit:
|
|||
return r;
|
||||
}
|
||||
|
||||
static size_t
|
||||
serialize_ft_min_size (uint32_t version) {
|
||||
static size_t serialize_ft_min_size(uint32_t version) {
|
||||
size_t size = 0;
|
||||
|
||||
switch(version) {
|
||||
case FT_LAYOUT_VERSION_29:
|
||||
size += sizeof(uint64_t); // logrows in ft
|
||||
case FT_LAYOUT_VERSION_28:
|
||||
size += sizeof(uint32_t); // fanout in ft
|
||||
case FT_LAYOUT_VERSION_27:
|
||||
case FT_LAYOUT_VERSION_26:
|
||||
case FT_LAYOUT_VERSION_25:
|
||||
case FT_LAYOUT_VERSION_24:
|
||||
case FT_LAYOUT_VERSION_23:
|
||||
case FT_LAYOUT_VERSION_22:
|
||||
case FT_LAYOUT_VERSION_21:
|
||||
size += sizeof(MSN); // max_msn_in_ft
|
||||
case FT_LAYOUT_VERSION_20:
|
||||
case FT_LAYOUT_VERSION_19:
|
||||
size += 1; // compression method
|
||||
size += sizeof(MSN); // highest_unused_msn_for_upgrade
|
||||
case FT_LAYOUT_VERSION_18:
|
||||
size += sizeof(uint64_t); // time_of_last_optimize_begin
|
||||
size += sizeof(uint64_t); // time_of_last_optimize_end
|
||||
size += sizeof(uint32_t); // count_of_optimize_in_progress
|
||||
size += sizeof(MSN); // msn_at_start_of_last_completed_optimize
|
||||
size -= 8; // removed num_blocks_to_upgrade_14
|
||||
size -= 8; // removed num_blocks_to_upgrade_13
|
||||
case FT_LAYOUT_VERSION_17:
|
||||
size += 16;
|
||||
invariant(sizeof(STAT64INFO_S) == 16);
|
||||
case FT_LAYOUT_VERSION_16:
|
||||
case FT_LAYOUT_VERSION_15:
|
||||
size += 4; // basement node size
|
||||
size += 8; // num_blocks_to_upgrade_14 (previously num_blocks_to_upgrade, now one int each for upgrade from 13, 14
|
||||
size += 8; // time of last verification
|
||||
case FT_LAYOUT_VERSION_14:
|
||||
size += 8; //TXNID that created
|
||||
case FT_LAYOUT_VERSION_13:
|
||||
size += ( 4 // build_id
|
||||
+4 // build_id_original
|
||||
+8 // time_of_creation
|
||||
+8 // time_of_last_modification
|
||||
);
|
||||
switch (version) {
|
||||
case FT_LAYOUT_VERSION_29:
|
||||
size += sizeof(uint64_t); // logrows in ft
|
||||
case FT_LAYOUT_VERSION_28:
|
||||
size += sizeof(uint32_t); // fanout in ft
|
||||
case FT_LAYOUT_VERSION_27:
|
||||
case FT_LAYOUT_VERSION_26:
|
||||
case FT_LAYOUT_VERSION_25:
|
||||
case FT_LAYOUT_VERSION_24:
|
||||
case FT_LAYOUT_VERSION_23:
|
||||
case FT_LAYOUT_VERSION_22:
|
||||
case FT_LAYOUT_VERSION_21:
|
||||
size += sizeof(MSN); // max_msn_in_ft
|
||||
case FT_LAYOUT_VERSION_20:
|
||||
case FT_LAYOUT_VERSION_19:
|
||||
size += 1; // compression method
|
||||
size += sizeof(MSN); // highest_unused_msn_for_upgrade
|
||||
case FT_LAYOUT_VERSION_18:
|
||||
size += sizeof(uint64_t); // time_of_last_optimize_begin
|
||||
size += sizeof(uint64_t); // time_of_last_optimize_end
|
||||
size += sizeof(uint32_t); // count_of_optimize_in_progress
|
||||
size += sizeof(MSN); // msn_at_start_of_last_completed_optimize
|
||||
size -= 8; // removed num_blocks_to_upgrade_14
|
||||
size -= 8; // removed num_blocks_to_upgrade_13
|
||||
case FT_LAYOUT_VERSION_17:
|
||||
size += 16;
|
||||
invariant(sizeof(STAT64INFO_S) == 16);
|
||||
case FT_LAYOUT_VERSION_16:
|
||||
case FT_LAYOUT_VERSION_15:
|
||||
size += 4; // basement node size
|
||||
size += 8; // num_blocks_to_upgrade_14 (previously
|
||||
// num_blocks_to_upgrade, now one int each for upgrade
|
||||
// from 13, 14
|
||||
size += 8; // time of last verification
|
||||
case FT_LAYOUT_VERSION_14:
|
||||
size += 8; // TXNID that created
|
||||
case FT_LAYOUT_VERSION_13:
|
||||
size += (4 // build_id
|
||||
+
|
||||
4 // build_id_original
|
||||
+
|
||||
8 // time_of_creation
|
||||
+
|
||||
8 // time_of_last_modification
|
||||
);
|
||||
// fall through
|
||||
case FT_LAYOUT_VERSION_12:
|
||||
size += (+8 // "tokudata"
|
||||
+4 // version
|
||||
+4 // original_version
|
||||
+4 // size
|
||||
+8 // byte order verification
|
||||
+8 // checkpoint_count
|
||||
+8 // checkpoint_lsn
|
||||
+4 // tree's nodesize
|
||||
+8 // translation_size_on_disk
|
||||
+8 // translation_address_on_disk
|
||||
+4 // checksum
|
||||
+8 // Number of blocks in old version.
|
||||
+8 // diskoff
|
||||
+4 // flags
|
||||
);
|
||||
break;
|
||||
default:
|
||||
abort();
|
||||
case FT_LAYOUT_VERSION_12:
|
||||
size += (+8 // "tokudata"
|
||||
+
|
||||
4 // version
|
||||
+
|
||||
4 // original_version
|
||||
+
|
||||
4 // size
|
||||
+
|
||||
8 // byte order verification
|
||||
+
|
||||
8 // checkpoint_count
|
||||
+
|
||||
8 // checkpoint_lsn
|
||||
+
|
||||
4 // tree's nodesize
|
||||
+
|
||||
8 // translation_size_on_disk
|
||||
+
|
||||
8 // translation_address_on_disk
|
||||
+
|
||||
4 // checksum
|
||||
+
|
||||
8 // Number of blocks in old version.
|
||||
+
|
||||
8 // diskoff
|
||||
+
|
||||
4 // flags
|
||||
);
|
||||
break;
|
||||
default:
|
||||
abort();
|
||||
}
|
||||
|
||||
lazy_assert(size <= block_allocator::BLOCK_ALLOCATOR_HEADER_RESERVE);
|
||||
lazy_assert(size <= BlockAllocator::BLOCK_ALLOCATOR_HEADER_RESERVE);
|
||||
return size;
|
||||
}
|
||||
|
||||
|
@ -486,7 +503,7 @@ int deserialize_ft_from_fd_into_rbuf(int fd,
|
|||
struct rbuf *rb,
|
||||
uint64_t *checkpoint_count,
|
||||
LSN *checkpoint_lsn,
|
||||
uint32_t * version_p)
|
||||
uint32_t *version_p)
|
||||
// Effect: Read and parse the header of a fractalal tree
|
||||
//
|
||||
// Simply reading the raw bytes of the header into an rbuf is insensitive
|
||||
|
@ -496,18 +513,18 @@ int deserialize_ft_from_fd_into_rbuf(int fd,
|
|||
// file AND the header is useless
|
||||
{
|
||||
int r = 0;
|
||||
const int64_t prefix_size = 8 + // magic ("tokudata")
|
||||
4 + // version
|
||||
4 + // build_id
|
||||
4; // size
|
||||
const int64_t prefix_size = 8 + // magic ("tokudata")
|
||||
4 + // version
|
||||
4 + // build_id
|
||||
4; // size
|
||||
const int64_t read_size = roundup_to_multiple(512, prefix_size);
|
||||
unsigned char *XMALLOC_N_ALIGNED(512, read_size, prefix);
|
||||
rb->buf = NULL;
|
||||
int64_t n = toku_os_pread(fd, prefix, read_size, offset_of_header);
|
||||
if (n != read_size) {
|
||||
if (n==0) {
|
||||
if (n == 0) {
|
||||
r = TOKUDB_DICTIONARY_NO_HEADER;
|
||||
} else if (n<0) {
|
||||
} else if (n < 0) {
|
||||
r = get_error_errno();
|
||||
} else {
|
||||
r = EINVAL;
|
||||
|
@ -518,95 +535,102 @@ int deserialize_ft_from_fd_into_rbuf(int fd,
|
|||
|
||||
rbuf_init(rb, prefix, prefix_size);
|
||||
|
||||
//Check magic number
|
||||
// Check magic number
|
||||
const void *magic;
|
||||
rbuf_literal_bytes(rb, &magic, 8);
|
||||
if (memcmp(magic,"tokudata",8)!=0) {
|
||||
if ((*(uint64_t*)magic) == 0) {
|
||||
if (memcmp(magic, "tokudata", 8) != 0) {
|
||||
if ((*(uint64_t *)magic) == 0) {
|
||||
r = TOKUDB_DICTIONARY_NO_HEADER;
|
||||
} else {
|
||||
r = EINVAL; //Not a tokudb file! Do not use.
|
||||
r = EINVAL; // Not a tokudb file! Do not use.
|
||||
}
|
||||
goto exit;
|
||||
}
|
||||
|
||||
//Version MUST be in network order regardless of disk order.
|
||||
// Version MUST be in network order regardless of disk order.
|
||||
uint32_t version;
|
||||
version = rbuf_network_int(rb);
|
||||
*version_p = version;
|
||||
if (version < FT_LAYOUT_MIN_SUPPORTED_VERSION) {
|
||||
r = TOKUDB_DICTIONARY_TOO_OLD; //Cannot use
|
||||
r = TOKUDB_DICTIONARY_TOO_OLD; // Cannot use
|
||||
goto exit;
|
||||
} else if (version > FT_LAYOUT_VERSION) {
|
||||
r = TOKUDB_DICTIONARY_TOO_NEW; //Cannot use
|
||||
r = TOKUDB_DICTIONARY_TOO_NEW; // Cannot use
|
||||
goto exit;
|
||||
}
|
||||
|
||||
//build_id MUST be in network order regardless of disk order.
|
||||
// build_id MUST be in network order regardless of disk order.
|
||||
uint32_t build_id __attribute__((__unused__));
|
||||
build_id = rbuf_network_int(rb);
|
||||
int64_t min_header_size;
|
||||
min_header_size = serialize_ft_min_size(version);
|
||||
|
||||
//Size MUST be in network order regardless of disk order.
|
||||
// Size MUST be in network order regardless of disk order.
|
||||
uint32_t size;
|
||||
size = rbuf_network_int(rb);
|
||||
//If too big, it is corrupt. We would probably notice during checksum
|
||||
//but may have to do a multi-gigabyte malloc+read to find out.
|
||||
//If its too small reading rbuf would crash, so verify.
|
||||
if (size > block_allocator::BLOCK_ALLOCATOR_HEADER_RESERVE || size < min_header_size) {
|
||||
// If too big, it is corrupt. We would probably notice during checksum
|
||||
// but may have to do a multi-gigabyte malloc+read to find out.
|
||||
// If its too small reading rbuf would crash, so verify.
|
||||
if (size > BlockAllocator::BLOCK_ALLOCATOR_HEADER_RESERVE ||
|
||||
size < min_header_size) {
|
||||
r = TOKUDB_DICTIONARY_NO_HEADER;
|
||||
goto exit;
|
||||
}
|
||||
|
||||
lazy_assert(rb->ndone==prefix_size);
|
||||
lazy_assert(rb->ndone == prefix_size);
|
||||
rb->size = size;
|
||||
{
|
||||
toku_free(rb->buf);
|
||||
uint32_t size_to_read = roundup_to_multiple(512, size);
|
||||
XMALLOC_N_ALIGNED(512, size_to_read, rb->buf);
|
||||
|
||||
assert(offset_of_header%512==0);
|
||||
invariant(offset_of_header % 512 == 0);
|
||||
n = toku_os_pread(fd, rb->buf, size_to_read, offset_of_header);
|
||||
if (n != size_to_read) {
|
||||
if (n < 0) {
|
||||
r = get_error_errno();
|
||||
} else {
|
||||
r = EINVAL; //Header might be useless (wrong size) or could be a disk read error.
|
||||
r = EINVAL; // Header might be useless (wrong size) or could be
|
||||
// a disk read error.
|
||||
}
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
//It's version 14 or later. Magic looks OK.
|
||||
//We have an rbuf that represents the header.
|
||||
//Size is within acceptable bounds.
|
||||
// It's version 14 or later. Magic looks OK.
|
||||
// We have an rbuf that represents the header.
|
||||
// Size is within acceptable bounds.
|
||||
|
||||
//Verify checksum (FT_LAYOUT_VERSION_13 or later, when checksum function changed)
|
||||
// Verify checksum (FT_LAYOUT_VERSION_13 or later, when checksum function
|
||||
// changed)
|
||||
uint32_t calculated_x1764;
|
||||
calculated_x1764 = toku_x1764_memory(rb->buf, rb->size-4);
|
||||
calculated_x1764 = toku_x1764_memory(rb->buf, rb->size - 4);
|
||||
uint32_t stored_x1764;
|
||||
stored_x1764 = toku_dtoh32(*(int*)(rb->buf+rb->size-4));
|
||||
stored_x1764 = toku_dtoh32(*(int *)(rb->buf + rb->size - 4));
|
||||
if (calculated_x1764 != stored_x1764) {
|
||||
r = TOKUDB_BAD_CHECKSUM; //Header useless
|
||||
fprintf(stderr, "Header checksum failure: calc=0x%08x read=0x%08x\n", calculated_x1764, stored_x1764);
|
||||
r = TOKUDB_BAD_CHECKSUM; // Header useless
|
||||
fprintf(stderr,
|
||||
"Header checksum failure: calc=0x%08x read=0x%08x\n",
|
||||
calculated_x1764,
|
||||
stored_x1764);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
//Verify byte order
|
||||
// Verify byte order
|
||||
const void *tmp_byte_order_check;
|
||||
lazy_assert((sizeof toku_byte_order_host) == 8);
|
||||
rbuf_literal_bytes(rb, &tmp_byte_order_check, 8); //Must not translate byte order
|
||||
rbuf_literal_bytes(
|
||||
rb, &tmp_byte_order_check, 8); // Must not translate byte order
|
||||
int64_t byte_order_stored;
|
||||
byte_order_stored = *(int64_t*)tmp_byte_order_check;
|
||||
byte_order_stored = *(int64_t *)tmp_byte_order_check;
|
||||
if (byte_order_stored != toku_byte_order_host) {
|
||||
r = TOKUDB_DICTIONARY_NO_HEADER; //Cannot use dictionary
|
||||
r = TOKUDB_DICTIONARY_NO_HEADER; // Cannot use dictionary
|
||||
goto exit;
|
||||
}
|
||||
|
||||
//Load checkpoint count
|
||||
// Load checkpoint count
|
||||
*checkpoint_count = rbuf_ulonglong(rb);
|
||||
*checkpoint_lsn = rbuf_LSN(rb);
|
||||
//Restart at beginning during regular deserialization
|
||||
// Restart at beginning during regular deserialization
|
||||
rb->ndone = 0;
|
||||
|
||||
exit:
|
||||
|
@ -620,11 +644,7 @@ exit:
|
|||
// Read ft from file into struct. Read both headers and use one.
|
||||
// We want the latest acceptable header whose checkpoint_lsn is no later
|
||||
// than max_acceptable_lsn.
|
||||
int
|
||||
toku_deserialize_ft_from(int fd,
|
||||
LSN max_acceptable_lsn,
|
||||
FT *ft)
|
||||
{
|
||||
int toku_deserialize_ft_from(int fd, LSN max_acceptable_lsn, FT *ft) {
|
||||
struct rbuf rb_0;
|
||||
struct rbuf rb_1;
|
||||
uint64_t checkpoint_count_0 = 0;
|
||||
|
@ -638,13 +658,23 @@ toku_deserialize_ft_from(int fd,
|
|||
int r0, r1, r;
|
||||
|
||||
toku_off_t header_0_off = 0;
|
||||
r0 = deserialize_ft_from_fd_into_rbuf(fd, header_0_off, &rb_0, &checkpoint_count_0, &checkpoint_lsn_0, &version_0);
|
||||
r0 = deserialize_ft_from_fd_into_rbuf(fd,
|
||||
header_0_off,
|
||||
&rb_0,
|
||||
&checkpoint_count_0,
|
||||
&checkpoint_lsn_0,
|
||||
&version_0);
|
||||
if (r0 == 0 && checkpoint_lsn_0.lsn <= max_acceptable_lsn.lsn) {
|
||||
h0_acceptable = true;
|
||||
}
|
||||
|
||||
toku_off_t header_1_off = block_allocator::BLOCK_ALLOCATOR_HEADER_RESERVE;
|
||||
r1 = deserialize_ft_from_fd_into_rbuf(fd, header_1_off, &rb_1, &checkpoint_count_1, &checkpoint_lsn_1, &version_1);
|
||||
toku_off_t header_1_off = BlockAllocator::BLOCK_ALLOCATOR_HEADER_RESERVE;
|
||||
r1 = deserialize_ft_from_fd_into_rbuf(fd,
|
||||
header_1_off,
|
||||
&rb_1,
|
||||
&checkpoint_count_1,
|
||||
&checkpoint_lsn_1,
|
||||
&version_1);
|
||||
if (r1 == 0 && checkpoint_lsn_1.lsn <= max_acceptable_lsn.lsn) {
|
||||
h1_acceptable = true;
|
||||
}
|
||||
|
@ -655,24 +685,29 @@ toku_deserialize_ft_from(int fd,
|
|||
// We were unable to read either header or at least one is too
|
||||
// new. Certain errors are higher priority than others. Order of
|
||||
// these if/else if is important.
|
||||
if (r0 == TOKUDB_DICTIONARY_TOO_NEW || r1 == TOKUDB_DICTIONARY_TOO_NEW) {
|
||||
if (r0 == TOKUDB_DICTIONARY_TOO_NEW ||
|
||||
r1 == TOKUDB_DICTIONARY_TOO_NEW) {
|
||||
r = TOKUDB_DICTIONARY_TOO_NEW;
|
||||
} else if (r0 == TOKUDB_DICTIONARY_TOO_OLD || r1 == TOKUDB_DICTIONARY_TOO_OLD) {
|
||||
} else if (r0 == TOKUDB_DICTIONARY_TOO_OLD ||
|
||||
r1 == TOKUDB_DICTIONARY_TOO_OLD) {
|
||||
r = TOKUDB_DICTIONARY_TOO_OLD;
|
||||
} else if (r0 == TOKUDB_BAD_CHECKSUM && r1 == TOKUDB_BAD_CHECKSUM) {
|
||||
fprintf(stderr, "Both header checksums failed.\n");
|
||||
r = TOKUDB_BAD_CHECKSUM;
|
||||
} else if (r0 == TOKUDB_DICTIONARY_NO_HEADER || r1 == TOKUDB_DICTIONARY_NO_HEADER) {
|
||||
} else if (r0 == TOKUDB_DICTIONARY_NO_HEADER ||
|
||||
r1 == TOKUDB_DICTIONARY_NO_HEADER) {
|
||||
r = TOKUDB_DICTIONARY_NO_HEADER;
|
||||
} else {
|
||||
r = r0 ? r0 : r1; //Arbitrarily report the error from the
|
||||
//first header, unless it's readable
|
||||
r = r0 ? r0 : r1; // Arbitrarily report the error from the
|
||||
// first header, unless it's readable
|
||||
}
|
||||
|
||||
// it should not be possible for both headers to be later than the max_acceptable_lsn
|
||||
invariant(!((r0==0 && checkpoint_lsn_0.lsn > max_acceptable_lsn.lsn) &&
|
||||
(r1==0 && checkpoint_lsn_1.lsn > max_acceptable_lsn.lsn)));
|
||||
invariant(r!=0);
|
||||
// it should not be possible for both headers to be later than the
|
||||
// max_acceptable_lsn
|
||||
invariant(
|
||||
!((r0 == 0 && checkpoint_lsn_0.lsn > max_acceptable_lsn.lsn) &&
|
||||
(r1 == 0 && checkpoint_lsn_1.lsn > max_acceptable_lsn.lsn)));
|
||||
invariant(r != 0);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
|
@ -682,8 +717,7 @@ toku_deserialize_ft_from(int fd,
|
|||
invariant(version_0 >= version_1);
|
||||
rb = &rb_0;
|
||||
version = version_0;
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
invariant(checkpoint_count_1 == checkpoint_count_0 + 1);
|
||||
invariant(version_1 >= version_0);
|
||||
rb = &rb_1;
|
||||
|
@ -692,14 +726,18 @@ toku_deserialize_ft_from(int fd,
|
|||
} else if (h0_acceptable) {
|
||||
if (r1 == TOKUDB_BAD_CHECKSUM) {
|
||||
// print something reassuring
|
||||
fprintf(stderr, "Header 2 checksum failed, but header 1 ok. Proceeding.\n");
|
||||
fprintf(
|
||||
stderr,
|
||||
"Header 2 checksum failed, but header 1 ok. Proceeding.\n");
|
||||
}
|
||||
rb = &rb_0;
|
||||
version = version_0;
|
||||
} else if (h1_acceptable) {
|
||||
if (r0 == TOKUDB_BAD_CHECKSUM) {
|
||||
// print something reassuring
|
||||
fprintf(stderr, "Header 1 checksum failed, but header 2 ok. Proceeding.\n");
|
||||
fprintf(
|
||||
stderr,
|
||||
"Header 1 checksum failed, but header 2 ok. Proceeding.\n");
|
||||
}
|
||||
rb = &rb_1;
|
||||
version = version_1;
|
||||
|
@ -718,15 +756,13 @@ exit:
|
|||
return r;
|
||||
}
|
||||
|
||||
|
||||
size_t toku_serialize_ft_size (FT_HEADER h) {
|
||||
size_t toku_serialize_ft_size(FT_HEADER h) {
|
||||
size_t size = serialize_ft_min_size(h->layout_version);
|
||||
//There is no dynamic data.
|
||||
lazy_assert(size <= block_allocator::BLOCK_ALLOCATOR_HEADER_RESERVE);
|
||||
// There is no dynamic data.
|
||||
lazy_assert(size <= BlockAllocator::BLOCK_ALLOCATOR_HEADER_RESERVE);
|
||||
return size;
|
||||
}
|
||||
|
||||
|
||||
void toku_serialize_ft_to_wbuf (
|
||||
struct wbuf *wbuf,
|
||||
FT_HEADER h,
|
||||
|
@ -771,52 +807,60 @@ void toku_serialize_ft_to_wbuf (
|
|||
}
|
||||
|
||||
void toku_serialize_ft_to(int fd, FT_HEADER h, block_table *bt, CACHEFILE cf) {
|
||||
lazy_assert(h->type==FT_CHECKPOINT_INPROGRESS);
|
||||
lazy_assert(h->type == FT_CHECKPOINT_INPROGRESS);
|
||||
struct wbuf w_translation;
|
||||
int64_t size_translation;
|
||||
int64_t address_translation;
|
||||
|
||||
// Must serialize translation first, to get address,size for header.
|
||||
bt->serialize_translation_to_wbuf(fd, &w_translation,
|
||||
&address_translation,
|
||||
&size_translation);
|
||||
assert(size_translation == w_translation.ndone);
|
||||
bt->serialize_translation_to_wbuf(
|
||||
fd, &w_translation, &address_translation, &size_translation);
|
||||
invariant(size_translation == w_translation.ndone);
|
||||
|
||||
// the number of bytes available in the buffer is 0 mod 512, and those last bytes are all initialized.
|
||||
assert(w_translation.size % 512 == 0);
|
||||
// the number of bytes available in the buffer is 0 mod 512, and those last
|
||||
// bytes are all initialized.
|
||||
invariant(w_translation.size % 512 == 0);
|
||||
|
||||
struct wbuf w_main;
|
||||
size_t size_main = toku_serialize_ft_size(h);
|
||||
size_t size_main = toku_serialize_ft_size(h);
|
||||
size_t size_main_aligned = roundup_to_multiple(512, size_main);
|
||||
assert(size_main_aligned<block_allocator::BLOCK_ALLOCATOR_HEADER_RESERVE);
|
||||
invariant(size_main_aligned <
|
||||
BlockAllocator::BLOCK_ALLOCATOR_HEADER_RESERVE);
|
||||
char *XMALLOC_N_ALIGNED(512, size_main_aligned, mainbuf);
|
||||
for (size_t i=size_main; i<size_main_aligned; i++) mainbuf[i]=0; // initialize the end of the buffer with zeros
|
||||
for (size_t i = size_main; i < size_main_aligned; i++)
|
||||
mainbuf[i] = 0; // initialize the end of the buffer with zeros
|
||||
wbuf_init(&w_main, mainbuf, size_main);
|
||||
toku_serialize_ft_to_wbuf(&w_main, h, address_translation, size_translation);
|
||||
toku_serialize_ft_to_wbuf(
|
||||
&w_main, h, address_translation, size_translation);
|
||||
lazy_assert(w_main.ndone == size_main);
|
||||
|
||||
// Actually write translation table
|
||||
// This write is guaranteed to read good data at the end of the buffer, since the
|
||||
// This write is guaranteed to read good data at the end of the buffer,
|
||||
// since the
|
||||
// w_translation.buf is padded with zeros to a 512-byte boundary.
|
||||
toku_os_full_pwrite(fd, w_translation.buf, roundup_to_multiple(512, size_translation), address_translation);
|
||||
toku_os_full_pwrite(fd,
|
||||
w_translation.buf,
|
||||
roundup_to_multiple(512, size_translation),
|
||||
address_translation);
|
||||
|
||||
//Everything but the header MUST be on disk before header starts.
|
||||
//Otherwise we will think the header is good and some blocks might not
|
||||
//yet be on disk.
|
||||
//If the header has a cachefile we need to do cachefile fsync (to
|
||||
//prevent crash if we redirected to dev null)
|
||||
//If there is no cachefile we still need to do an fsync.
|
||||
// Everything but the header MUST be on disk before header starts.
|
||||
// Otherwise we will think the header is good and some blocks might not
|
||||
// yet be on disk.
|
||||
// If the header has a cachefile we need to do cachefile fsync (to
|
||||
// prevent crash if we redirected to dev null)
|
||||
// If there is no cachefile we still need to do an fsync.
|
||||
if (cf) {
|
||||
toku_cachefile_fsync(cf);
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
toku_file_fsync(fd);
|
||||
}
|
||||
|
||||
//Alternate writing header to two locations:
|
||||
// Alternate writing header to two locations:
|
||||
// Beginning (0) or BLOCK_ALLOCATOR_HEADER_RESERVE
|
||||
toku_off_t main_offset;
|
||||
main_offset = (h->checkpoint_count & 0x1) ? 0 : block_allocator::BLOCK_ALLOCATOR_HEADER_RESERVE;
|
||||
main_offset = (h->checkpoint_count & 0x1)
|
||||
? 0
|
||||
: BlockAllocator::BLOCK_ALLOCATOR_HEADER_RESERVE;
|
||||
toku_os_full_pwrite(fd, w_main.buf, size_main_aligned, main_offset);
|
||||
toku_free(w_main.buf);
|
||||
toku_free(w_translation.buf);
|
||||
|
|
|
@ -99,13 +99,11 @@ void toku_ft_serialize_layer_init(void) {
|
|||
num_cores = toku_os_get_number_active_processors();
|
||||
int r = toku_thread_pool_create(&ft_pool, num_cores);
|
||||
lazy_assert_zero(r);
|
||||
block_allocator::maybe_initialize_trace();
|
||||
toku_serialize_in_parallel = false;
|
||||
}
|
||||
|
||||
void toku_ft_serialize_layer_destroy(void) {
|
||||
toku_thread_pool_destroy(&ft_pool);
|
||||
block_allocator::maybe_close_trace();
|
||||
}
|
||||
|
||||
enum { FILE_CHANGE_INCREMENT = (16 << 20) };
|
||||
|
@ -773,19 +771,23 @@ int toku_serialize_ftnode_to_memory(FTNODE node,
|
|||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
toku_serialize_ftnode_to (int fd, BLOCKNUM blocknum, FTNODE node, FTNODE_DISK_DATA* ndd, bool do_rebalancing, FT ft, bool for_checkpoint) {
|
||||
|
||||
int toku_serialize_ftnode_to(int fd,
|
||||
BLOCKNUM blocknum,
|
||||
FTNODE node,
|
||||
FTNODE_DISK_DATA *ndd,
|
||||
bool do_rebalancing,
|
||||
FT ft,
|
||||
bool for_checkpoint) {
|
||||
size_t n_to_write;
|
||||
size_t n_uncompressed_bytes;
|
||||
char *compressed_buf = nullptr;
|
||||
|
||||
// because toku_serialize_ftnode_to is only called for
|
||||
// because toku_serialize_ftnode_to is only called for
|
||||
// in toku_ftnode_flush_callback, we pass false
|
||||
// for in_parallel. The reasoning is that when we write
|
||||
// nodes to disk via toku_ftnode_flush_callback, we
|
||||
// nodes to disk via toku_ftnode_flush_callback, we
|
||||
// assume that it is being done on a non-critical
|
||||
// background thread (probably for checkpointing), and therefore
|
||||
// background thread (probably for checkpointing), and therefore
|
||||
// should not hog CPU,
|
||||
//
|
||||
// Should the above facts change, we may want to revisit
|
||||
|
@ -802,32 +804,32 @@ toku_serialize_ftnode_to (int fd, BLOCKNUM blocknum, FTNODE node, FTNODE_DISK_DA
|
|||
toku_unsafe_fetch(&toku_serialize_in_parallel),
|
||||
&n_to_write,
|
||||
&n_uncompressed_bytes,
|
||||
&compressed_buf
|
||||
);
|
||||
&compressed_buf);
|
||||
if (r != 0) {
|
||||
return r;
|
||||
}
|
||||
|
||||
// If the node has never been written, then write the whole buffer, including the zeros
|
||||
invariant(blocknum.b>=0);
|
||||
// If the node has never been written, then write the whole buffer,
|
||||
// including the zeros
|
||||
invariant(blocknum.b >= 0);
|
||||
DISKOFF offset;
|
||||
|
||||
// Dirties the ft
|
||||
ft->blocktable.realloc_on_disk(blocknum, n_to_write, &offset,
|
||||
ft, fd, for_checkpoint,
|
||||
// Allocations for nodes high in the tree are considered 'hot',
|
||||
// as they are likely to move again in the next checkpoint.
|
||||
node->height);
|
||||
ft->blocktable.realloc_on_disk(
|
||||
blocknum, n_to_write, &offset, ft, fd, for_checkpoint);
|
||||
|
||||
tokutime_t t0 = toku_time_now();
|
||||
toku_os_full_pwrite(fd, compressed_buf, n_to_write, offset);
|
||||
tokutime_t t1 = toku_time_now();
|
||||
|
||||
tokutime_t io_time = t1 - t0;
|
||||
toku_ft_status_update_flush_reason(node, n_uncompressed_bytes, n_to_write, io_time, for_checkpoint);
|
||||
toku_ft_status_update_flush_reason(
|
||||
node, n_uncompressed_bytes, n_to_write, io_time, for_checkpoint);
|
||||
|
||||
toku_free(compressed_buf);
|
||||
node->dirty = 0; // See #1957. Must set the node to be clean after serializing it so that it doesn't get written again on the next checkpoint or eviction.
|
||||
node->dirty = 0; // See #1957. Must set the node to be clean after
|
||||
// serializing it so that it doesn't get written again on
|
||||
// the next checkpoint or eviction.
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -994,6 +996,7 @@ BASEMENTNODE toku_clone_bn(BASEMENTNODE orig_bn) {
|
|||
bn->seqinsert = orig_bn->seqinsert;
|
||||
bn->stale_ancestor_messages_applied = orig_bn->stale_ancestor_messages_applied;
|
||||
bn->stat64_delta = orig_bn->stat64_delta;
|
||||
bn->logical_rows_delta = orig_bn->logical_rows_delta;
|
||||
bn->data_buffer.clone(&orig_bn->data_buffer);
|
||||
return bn;
|
||||
}
|
||||
|
@ -1004,6 +1007,7 @@ BASEMENTNODE toku_create_empty_bn_no_buffer(void) {
|
|||
bn->seqinsert = 0;
|
||||
bn->stale_ancestor_messages_applied = false;
|
||||
bn->stat64_delta = ZEROSTATS;
|
||||
bn->logical_rows_delta = 0;
|
||||
bn->data_buffer.init_zero();
|
||||
return bn;
|
||||
}
|
||||
|
@ -1897,7 +1901,7 @@ read_and_decompress_block_from_fd_into_rbuf(int fd, BLOCKNUM blocknum,
|
|||
/* out */ int *layout_version_p);
|
||||
|
||||
// This function upgrades a version 14 or 13 ftnode to the current
|
||||
// verison. NOTE: This code assumes the first field of the rbuf has
|
||||
// version. NOTE: This code assumes the first field of the rbuf has
|
||||
// already been read from the buffer (namely the layout_version of the
|
||||
// ftnode.)
|
||||
static int
|
||||
|
@ -2488,9 +2492,12 @@ toku_serialize_rollback_log_to_memory_uncompressed(ROLLBACK_LOG_NODE log, SERIAL
|
|||
serialized->blocknum = log->blocknum;
|
||||
}
|
||||
|
||||
int
|
||||
toku_serialize_rollback_log_to (int fd, ROLLBACK_LOG_NODE log, SERIALIZED_ROLLBACK_LOG_NODE serialized_log, bool is_serialized,
|
||||
FT ft, bool for_checkpoint) {
|
||||
int toku_serialize_rollback_log_to(int fd,
|
||||
ROLLBACK_LOG_NODE log,
|
||||
SERIALIZED_ROLLBACK_LOG_NODE serialized_log,
|
||||
bool is_serialized,
|
||||
FT ft,
|
||||
bool for_checkpoint) {
|
||||
size_t n_to_write;
|
||||
char *compressed_buf;
|
||||
struct serialized_rollback_log_node serialized_local;
|
||||
|
@ -2511,21 +2518,21 @@ toku_serialize_rollback_log_to (int fd, ROLLBACK_LOG_NODE log, SERIALIZED_ROLLBA
|
|||
serialized_log->n_sub_blocks,
|
||||
serialized_log->sub_block,
|
||||
ft->h->compression_method,
|
||||
&n_to_write, &compressed_buf);
|
||||
&n_to_write,
|
||||
&compressed_buf);
|
||||
|
||||
// Dirties the ft
|
||||
DISKOFF offset;
|
||||
ft->blocktable.realloc_on_disk(blocknum, n_to_write, &offset,
|
||||
ft, fd, for_checkpoint,
|
||||
// We consider rollback log flushing the hottest possible allocation,
|
||||
// since rollback logs are short-lived compared to FT nodes.
|
||||
INT_MAX);
|
||||
ft->blocktable.realloc_on_disk(
|
||||
blocknum, n_to_write, &offset, ft, fd, for_checkpoint);
|
||||
|
||||
toku_os_full_pwrite(fd, compressed_buf, n_to_write, offset);
|
||||
toku_free(compressed_buf);
|
||||
if (!is_serialized) {
|
||||
toku_static_serialized_rollback_log_destroy(&serialized_local);
|
||||
log->dirty = 0; // See #1957. Must set the node to be clean after serializing it so that it doesn't get written again on the next checkpoint or eviction.
|
||||
log->dirty = 0; // See #1957. Must set the node to be clean after
|
||||
// serializing it so that it doesn't get written again
|
||||
// on the next checkpoint or eviction.
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
@ -2704,7 +2711,7 @@ exit:
|
|||
}
|
||||
|
||||
static int decompress_from_raw_block_into_rbuf_versioned(uint32_t version, uint8_t *raw_block, size_t raw_block_size, struct rbuf *rb, BLOCKNUM blocknum) {
|
||||
// This function exists solely to accomodate future changes in compression.
|
||||
// This function exists solely to accommodate future changes in compression.
|
||||
int r = 0;
|
||||
if ((version == FT_LAYOUT_VERSION_13 || version == FT_LAYOUT_VERSION_14) ||
|
||||
(FT_LAYOUT_VERSION_25 <= version && version <= FT_LAYOUT_VERSION_27) ||
|
||||
|
|
833
storage/tokudb/PerconaFT/ft/serialize/rbtree_mhs.cc
Normal file
833
storage/tokudb/PerconaFT/ft/serialize/rbtree_mhs.cc
Normal file
|
@ -0,0 +1,833 @@
|
|||
/*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||
#ident "$Id$"
|
||||
/*======
|
||||
This file is part of PerconaFT.
|
||||
|
||||
|
||||
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||
|
||||
PerconaFT is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License, version 2,
|
||||
as published by the Free Software Foundation.
|
||||
|
||||
PerconaFT is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILIT or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
----------------------------------------
|
||||
|
||||
PerconaFT is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License, version 3,
|
||||
as published by the Free Software Foundation.
|
||||
|
||||
PerconaFT is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||
======= */
|
||||
|
||||
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||
|
||||
#include "ft/serialize/rbtree_mhs.h"
|
||||
#include "portability/toku_assert.h"
|
||||
#include "portability/toku_portability.h"
|
||||
#include <algorithm>
|
||||
|
||||
namespace MhsRbTree {
|
||||
|
||||
Tree::Tree() : _root(NULL), _align(1) {}
|
||||
|
||||
Tree::Tree(uint64_t align) : _root(NULL), _align(align) {}
|
||||
|
||||
Tree::~Tree() { Destroy(); }
|
||||
|
||||
void Tree::PreOrder(Node *tree) const {
|
||||
if (tree != NULL) {
|
||||
fprintf(stderr, "%" PRIu64 " ", rbn_offset(tree).ToInt());
|
||||
PreOrder(tree->_left);
|
||||
PreOrder(tree->_right);
|
||||
}
|
||||
}
|
||||
|
||||
void Tree::PreOrder() { PreOrder(_root); }
|
||||
|
||||
void Tree::InOrder(Node *tree) const {
|
||||
if (tree != NULL) {
|
||||
InOrder(tree->_left);
|
||||
fprintf(stderr, "%" PRIu64 " ", rbn_offset(tree).ToInt());
|
||||
InOrder(tree->_right);
|
||||
}
|
||||
}
|
||||
|
||||
// yeah, i only care about in order visitor. -Jun
|
||||
void Tree::InOrderVisitor(Node *tree,
|
||||
void (*f)(void *, Node *, uint64_t),
|
||||
void *extra,
|
||||
uint64_t depth) {
|
||||
if (tree != NULL) {
|
||||
InOrderVisitor(tree->_left, f, extra, depth + 1);
|
||||
f(extra, tree, depth);
|
||||
InOrderVisitor(tree->_right, f, extra, depth + 1);
|
||||
}
|
||||
}
|
||||
|
||||
void Tree::InOrderVisitor(void (*f)(void *, Node *, uint64_t),
|
||||
void *extra) {
|
||||
InOrderVisitor(_root, f, extra, 0);
|
||||
}
|
||||
|
||||
void Tree::InOrder() { InOrder(_root); }
|
||||
|
||||
void Tree::PostOrder(Node *tree) const {
|
||||
if (tree != NULL) {
|
||||
PostOrder(tree->_left);
|
||||
PostOrder(tree->_right);
|
||||
fprintf(stderr, "%" PRIu64 " ", rbn_offset(tree).ToInt());
|
||||
}
|
||||
}
|
||||
|
||||
void Tree::PostOrder() { PostOrder(_root); }
|
||||
|
||||
Node *Tree::SearchByOffset(uint64_t offset) {
|
||||
Node *x = _root;
|
||||
while ((x != NULL) && (rbn_offset(x).ToInt() != offset)) {
|
||||
if (offset < rbn_offset(x).ToInt())
|
||||
x = x->_left;
|
||||
else
|
||||
x = x->_right;
|
||||
}
|
||||
|
||||
return x;
|
||||
}
|
||||
|
||||
// mostly for testing
|
||||
Node *Tree::SearchFirstFitBySize(uint64_t size) {
|
||||
if (EffectiveSize(_root) < size && rbn_left_mhs(_root) < size &&
|
||||
rbn_right_mhs(_root) < size) {
|
||||
return nullptr;
|
||||
} else {
|
||||
return SearchFirstFitBySizeHelper(_root, size);
|
||||
}
|
||||
}
|
||||
|
||||
Node *Tree::SearchFirstFitBySizeHelper(Node *x, uint64_t size) {
|
||||
if (EffectiveSize(x) >= size) {
|
||||
// only possible to go left
|
||||
if (rbn_left_mhs(x) >= size)
|
||||
return SearchFirstFitBySizeHelper(x->_left, size);
|
||||
else
|
||||
return x;
|
||||
}
|
||||
if (rbn_left_mhs(x) >= size)
|
||||
return SearchFirstFitBySizeHelper(x->_left, size);
|
||||
|
||||
if (rbn_right_mhs(x) >= size)
|
||||
return SearchFirstFitBySizeHelper(x->_right, size);
|
||||
|
||||
// this is an invalid state
|
||||
Dump();
|
||||
ValidateBalance();
|
||||
ValidateMhs();
|
||||
invariant(0);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
Node *Tree::MinNode(Node *tree) {
|
||||
if (tree == NULL)
|
||||
return NULL;
|
||||
|
||||
while (tree->_left != NULL)
|
||||
tree = tree->_left;
|
||||
return tree;
|
||||
}
|
||||
|
||||
Node *Tree::MinNode() { return MinNode(_root); }
|
||||
|
||||
Node *Tree::MaxNode(Node *tree) {
|
||||
if (tree == NULL)
|
||||
return NULL;
|
||||
|
||||
while (tree->_right != NULL)
|
||||
tree = tree->_right;
|
||||
return tree;
|
||||
}
|
||||
|
||||
Node *Tree::MaxNode() { return MaxNode(_root); }
|
||||
|
||||
Node *Tree::SuccessorHelper(Node *y, Node *x) {
|
||||
while ((y != NULL) && (x == y->_right)) {
|
||||
x = y;
|
||||
y = y->_parent;
|
||||
}
|
||||
return y;
|
||||
}
|
||||
Node *Tree::Successor(Node *x) {
|
||||
if (x->_right != NULL)
|
||||
return MinNode(x->_right);
|
||||
|
||||
Node *y = x->_parent;
|
||||
return SuccessorHelper(y, x);
|
||||
}
|
||||
|
||||
Node *Tree::PredecessorHelper(Node *y, Node *x) {
|
||||
while ((y != NULL) && (x == y->_left)) {
|
||||
x = y;
|
||||
y = y->_parent;
|
||||
}
|
||||
|
||||
return y;
|
||||
}
|
||||
Node *Tree::Predecessor(Node *x) {
|
||||
if (x->_left != NULL)
|
||||
return MaxNode(x->_left);
|
||||
|
||||
Node *y = x->_parent;
|
||||
return SuccessorHelper(y, x);
|
||||
}
|
||||
|
||||
/*
|
||||
* px px
|
||||
* / /
|
||||
* x y
|
||||
* / \ --(left rotation)--> / \ #
|
||||
* lx y x ry
|
||||
* / \ / \
|
||||
* ly ry lx ly
|
||||
* max_hole_size updates are pretty local
|
||||
*/
|
||||
|
||||
void Tree::LeftRotate(Node *&root, Node *x) {
|
||||
Node *y = x->_right;
|
||||
|
||||
x->_right = y->_left;
|
||||
rbn_right_mhs(x) = rbn_left_mhs(y);
|
||||
|
||||
if (y->_left != NULL)
|
||||
y->_left->_parent = x;
|
||||
|
||||
y->_parent = x->_parent;
|
||||
|
||||
if (x->_parent == NULL) {
|
||||
root = y;
|
||||
} else {
|
||||
if (x->_parent->_left == x) {
|
||||
x->_parent->_left = y;
|
||||
} else {
|
||||
x->_parent->_right = y;
|
||||
}
|
||||
}
|
||||
y->_left = x;
|
||||
rbn_left_mhs(y) = mhs_of_subtree(x);
|
||||
|
||||
x->_parent = y;
|
||||
}
|
||||
|
||||
/* py py
|
||||
* / /
|
||||
* y x
|
||||
* / \ --(right rotate)--> / \ #
|
||||
* x ry lx y
|
||||
* / \ / \ #
|
||||
* lx rx rx ry
|
||||
*
|
||||
*/
|
||||
|
||||
void Tree::RightRotate(Node *&root, Node *y) {
|
||||
Node *x = y->_left;
|
||||
|
||||
y->_left = x->_right;
|
||||
rbn_left_mhs(y) = rbn_right_mhs(x);
|
||||
|
||||
if (x->_right != NULL)
|
||||
x->_right->_parent = y;
|
||||
|
||||
x->_parent = y->_parent;
|
||||
|
||||
if (y->_parent == NULL) {
|
||||
root = x;
|
||||
} else {
|
||||
if (y == y->_parent->_right)
|
||||
y->_parent->_right = x;
|
||||
else
|
||||
y->_parent->_left = x;
|
||||
}
|
||||
|
||||
x->_right = y;
|
||||
rbn_right_mhs(x) = mhs_of_subtree(y);
|
||||
y->_parent = x;
|
||||
}
|
||||
|
||||
// walking from this node up to update the mhs info
|
||||
// whenver there is change on left/right mhs or size we should recalculate.
|
||||
// prerequisit: the children of the node are mhs up-to-date.
|
||||
void Tree::RecalculateMhs(Node *node) {
|
||||
uint64_t *p_node_mhs = 0;
|
||||
Node *parent = node->_parent;
|
||||
|
||||
if (!parent)
|
||||
return;
|
||||
|
||||
uint64_t max_mhs = mhs_of_subtree(node);
|
||||
if (node == parent->_left) {
|
||||
p_node_mhs = &rbn_left_mhs(parent);
|
||||
} else if (node == parent->_right) {
|
||||
p_node_mhs = &rbn_right_mhs(parent);
|
||||
} else {
|
||||
return;
|
||||
}
|
||||
if (*p_node_mhs != max_mhs) {
|
||||
*p_node_mhs = max_mhs;
|
||||
RecalculateMhs(parent);
|
||||
}
|
||||
}
|
||||
|
||||
void Tree::IsNewNodeMergable(Node *pred,
|
||||
Node *succ,
|
||||
Node::BlockPair pair,
|
||||
bool *left_merge,
|
||||
bool *right_merge) {
|
||||
if (pred) {
|
||||
OUUInt64 end_of_pred = rbn_size(pred) + rbn_offset(pred);
|
||||
if (end_of_pred < pair._offset)
|
||||
*left_merge = false;
|
||||
else {
|
||||
invariant(end_of_pred == pair._offset);
|
||||
*left_merge = true;
|
||||
}
|
||||
}
|
||||
if (succ) {
|
||||
OUUInt64 begin_of_succ = rbn_offset(succ);
|
||||
OUUInt64 end_of_node = pair._offset + pair._size;
|
||||
if (end_of_node < begin_of_succ) {
|
||||
*right_merge = false;
|
||||
} else {
|
||||
invariant(end_of_node == begin_of_succ);
|
||||
*right_merge = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Tree::AbsorbNewNode(Node *pred,
|
||||
Node *succ,
|
||||
Node::BlockPair pair,
|
||||
bool left_merge,
|
||||
bool right_merge,
|
||||
bool is_right_child) {
|
||||
invariant(left_merge || right_merge);
|
||||
if (left_merge && right_merge) {
|
||||
// merge to the succ
|
||||
if (!is_right_child) {
|
||||
rbn_size(succ) += pair._size;
|
||||
rbn_offset(succ) = pair._offset;
|
||||
// merge to the pred
|
||||
rbn_size(pred) += rbn_size(succ);
|
||||
// to keep the invariant of the tree -no overlapping holes
|
||||
rbn_offset(succ) += rbn_size(succ);
|
||||
rbn_size(succ) = 0;
|
||||
RecalculateMhs(succ);
|
||||
RecalculateMhs(pred);
|
||||
// pred dominates succ. this is going to
|
||||
// update the pred labels separately.
|
||||
// remove succ
|
||||
RawRemove(_root, succ);
|
||||
} else {
|
||||
rbn_size(pred) += pair._size;
|
||||
rbn_offset(succ) = rbn_offset(pred);
|
||||
rbn_size(succ) += rbn_size(pred);
|
||||
rbn_offset(pred) += rbn_size(pred);
|
||||
rbn_size(pred) = 0;
|
||||
RecalculateMhs(pred);
|
||||
RecalculateMhs(succ);
|
||||
// now remove pred
|
||||
RawRemove(_root, pred);
|
||||
}
|
||||
} else if (left_merge) {
|
||||
rbn_size(pred) += pair._size;
|
||||
RecalculateMhs(pred);
|
||||
} else if (right_merge) {
|
||||
rbn_offset(succ) -= pair._size;
|
||||
rbn_size(succ) += pair._size;
|
||||
RecalculateMhs(succ);
|
||||
}
|
||||
}
|
||||
// this is the most tedious part, but not complicated:
|
||||
// 1.find where to insert the pair
|
||||
// 2.if the pred and succ can merge with the pair. merge with them. either
|
||||
// pred
|
||||
// or succ can be removed.
|
||||
// 3. if only left-mergable or right-mergeable, just merge
|
||||
// 4. non-mergable case. insert the node and run the fixup.
|
||||
|
||||
int Tree::Insert(Node *&root, Node::BlockPair pair) {
|
||||
Node *x = _root;
|
||||
Node *y = NULL;
|
||||
bool left_merge = false;
|
||||
bool right_merge = false;
|
||||
Node *node = NULL;
|
||||
|
||||
while (x != NULL) {
|
||||
y = x;
|
||||
if (pair._offset < rbn_key(x))
|
||||
x = x->_left;
|
||||
else
|
||||
x = x->_right;
|
||||
}
|
||||
|
||||
// we found where to insert, lets find out the pred and succ for
|
||||
// possible
|
||||
// merges.
|
||||
// node->parent = y;
|
||||
Node *pred, *succ;
|
||||
if (y != NULL) {
|
||||
if (pair._offset < rbn_key(y)) {
|
||||
// as the left child
|
||||
pred = PredecessorHelper(y->_parent, y);
|
||||
succ = y;
|
||||
IsNewNodeMergable(pred, succ, pair, &left_merge, &right_merge);
|
||||
if (left_merge || right_merge) {
|
||||
AbsorbNewNode(
|
||||
pred, succ, pair, left_merge, right_merge, false);
|
||||
} else {
|
||||
// construct the node
|
||||
Node::Pair mhsp {0, 0};
|
||||
node =
|
||||
new Node(EColor::BLACK, pair, mhsp, nullptr, nullptr, nullptr);
|
||||
if (!node)
|
||||
return -1;
|
||||
y->_left = node;
|
||||
node->_parent = y;
|
||||
RecalculateMhs(node);
|
||||
}
|
||||
|
||||
} else {
|
||||
// as the right child
|
||||
pred = y;
|
||||
succ = SuccessorHelper(y->_parent, y);
|
||||
IsNewNodeMergable(pred, succ, pair, &left_merge, &right_merge);
|
||||
if (left_merge || right_merge) {
|
||||
AbsorbNewNode(
|
||||
pred, succ, pair, left_merge, right_merge, true);
|
||||
} else {
|
||||
// construct the node
|
||||
Node::Pair mhsp {0, 0};
|
||||
node =
|
||||
new Node(EColor::BLACK, pair, mhsp, nullptr, nullptr, nullptr);
|
||||
if (!node)
|
||||
return -1;
|
||||
y->_right = node;
|
||||
node->_parent = y;
|
||||
RecalculateMhs(node);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
Node::Pair mhsp {0, 0};
|
||||
node = new Node(EColor::BLACK, pair, mhsp, nullptr, nullptr, nullptr);
|
||||
if (!node)
|
||||
return -1;
|
||||
root = node;
|
||||
}
|
||||
if (!left_merge && !right_merge) {
|
||||
invariant_notnull(node);
|
||||
node->_color = EColor::RED;
|
||||
return InsertFixup(root, node);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int Tree::InsertFixup(Node *&root, Node *node) {
|
||||
Node *parent, *gparent;
|
||||
while ((parent = rbn_parent(node)) && rbn_is_red(parent)) {
|
||||
gparent = rbn_parent(parent);
|
||||
if (parent == gparent->_left) {
|
||||
{
|
||||
Node *uncle = gparent->_right;
|
||||
if (uncle && rbn_is_red(uncle)) {
|
||||
rbn_set_black(uncle);
|
||||
rbn_set_black(parent);
|
||||
rbn_set_red(gparent);
|
||||
node = gparent;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if (parent->_right == node) {
|
||||
Node *tmp;
|
||||
LeftRotate(root, parent);
|
||||
tmp = parent;
|
||||
parent = node;
|
||||
node = tmp;
|
||||
}
|
||||
|
||||
rbn_set_black(parent);
|
||||
rbn_set_red(gparent);
|
||||
RightRotate(root, gparent);
|
||||
} else {
|
||||
{
|
||||
Node *uncle = gparent->_left;
|
||||
if (uncle && rbn_is_red(uncle)) {
|
||||
rbn_set_black(uncle);
|
||||
rbn_set_black(parent);
|
||||
rbn_set_red(gparent);
|
||||
node = gparent;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if (parent->_left == node) {
|
||||
Node *tmp;
|
||||
RightRotate(root, parent);
|
||||
tmp = parent;
|
||||
parent = node;
|
||||
node = tmp;
|
||||
}
|
||||
rbn_set_black(parent);
|
||||
rbn_set_red(gparent);
|
||||
LeftRotate(root, gparent);
|
||||
}
|
||||
}
|
||||
rbn_set_black(root);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int Tree::Insert(Node::BlockPair pair) { return Insert(_root, pair); }
|
||||
|
||||
uint64_t Tree::Remove(size_t size) {
|
||||
Node *node = SearchFirstFitBySize(size);
|
||||
return Remove(_root, node, size);
|
||||
}
|
||||
|
||||
void Tree::RawRemove(Node *&root, Node *node) {
|
||||
Node *child, *parent;
|
||||
EColor color;
|
||||
|
||||
if ((node->_left != NULL) && (node->_right != NULL)) {
|
||||
Node *replace = node;
|
||||
replace = replace->_right;
|
||||
while (replace->_left != NULL)
|
||||
replace = replace->_left;
|
||||
|
||||
if (rbn_parent(node)) {
|
||||
if (rbn_parent(node)->_left == node)
|
||||
rbn_parent(node)->_left = replace;
|
||||
else
|
||||
rbn_parent(node)->_right = replace;
|
||||
} else {
|
||||
root = replace;
|
||||
}
|
||||
child = replace->_right;
|
||||
parent = rbn_parent(replace);
|
||||
color = rbn_color(replace);
|
||||
|
||||
if (parent == node) {
|
||||
parent = replace;
|
||||
} else {
|
||||
if (child)
|
||||
rbn_parent(child) = parent;
|
||||
|
||||
parent->_left = child;
|
||||
rbn_left_mhs(parent) = rbn_right_mhs(replace);
|
||||
RecalculateMhs(parent);
|
||||
replace->_right = node->_right;
|
||||
rbn_set_parent(node->_right, replace);
|
||||
rbn_right_mhs(replace) = rbn_right_mhs(node);
|
||||
}
|
||||
|
||||
replace->_parent = node->_parent;
|
||||
replace->_color = node->_color;
|
||||
replace->_left = node->_left;
|
||||
rbn_left_mhs(replace) = rbn_left_mhs(node);
|
||||
node->_left->_parent = replace;
|
||||
RecalculateMhs(replace);
|
||||
if (color == EColor::BLACK)
|
||||
RawRemoveFixup(root, child, parent);
|
||||
delete node;
|
||||
return;
|
||||
}
|
||||
|
||||
if (node->_left != NULL)
|
||||
child = node->_left;
|
||||
else
|
||||
child = node->_right;
|
||||
|
||||
parent = node->_parent;
|
||||
color = node->_color;
|
||||
|
||||
if (child)
|
||||
child->_parent = parent;
|
||||
|
||||
if (parent) {
|
||||
if (parent->_left == node) {
|
||||
parent->_left = child;
|
||||
rbn_left_mhs(parent) = child ? mhs_of_subtree(child) : 0;
|
||||
} else {
|
||||
parent->_right = child;
|
||||
rbn_right_mhs(parent) = child ? mhs_of_subtree(child) : 0;
|
||||
}
|
||||
RecalculateMhs(parent);
|
||||
} else
|
||||
root = child;
|
||||
if (color == EColor::BLACK)
|
||||
RawRemoveFixup(root, child, parent);
|
||||
delete node;
|
||||
}
|
||||
|
||||
void Tree::RawRemove(uint64_t offset) {
|
||||
Node *node = SearchByOffset(offset);
|
||||
RawRemove(_root, node);
|
||||
}
|
||||
static inline uint64_t align(uint64_t value, uint64_t ba_alignment) {
|
||||
return ((value + ba_alignment - 1) / ba_alignment) * ba_alignment;
|
||||
}
|
||||
uint64_t Tree::Remove(Node *&root, Node *node, size_t size) {
|
||||
OUUInt64 n_offset = rbn_offset(node);
|
||||
OUUInt64 n_size = rbn_size(node);
|
||||
OUUInt64 answer_offset(align(rbn_offset(node).ToInt(), _align));
|
||||
|
||||
invariant((answer_offset + size) <= (n_offset + n_size));
|
||||
if (answer_offset == n_offset) {
|
||||
rbn_offset(node) += size;
|
||||
rbn_size(node) -= size;
|
||||
RecalculateMhs(node);
|
||||
if (rbn_size(node) == 0) {
|
||||
RawRemove(root, node);
|
||||
}
|
||||
|
||||
} else {
|
||||
if (answer_offset + size == n_offset + n_size) {
|
||||
rbn_size(node) -= size;
|
||||
RecalculateMhs(node);
|
||||
} else {
|
||||
// well, cut in the middle...
|
||||
rbn_size(node) = answer_offset - n_offset;
|
||||
RecalculateMhs(node);
|
||||
Insert(_root,
|
||||
{(answer_offset + size),
|
||||
(n_offset + n_size) - (answer_offset + size)});
|
||||
}
|
||||
}
|
||||
return answer_offset.ToInt();
|
||||
}
|
||||
|
||||
void Tree::RawRemoveFixup(Node *&root, Node *node, Node *parent) {
|
||||
Node *other;
|
||||
while ((!node || rbn_is_black(node)) && node != root) {
|
||||
if (parent->_left == node) {
|
||||
other = parent->_right;
|
||||
if (rbn_is_red(other)) {
|
||||
// Case 1: the brother of X, w, is read
|
||||
rbn_set_black(other);
|
||||
rbn_set_red(parent);
|
||||
LeftRotate(root, parent);
|
||||
other = parent->_right;
|
||||
}
|
||||
if ((!other->_left || rbn_is_black(other->_left)) &&
|
||||
(!other->_right || rbn_is_black(other->_right))) {
|
||||
// Case 2: w is black and both of w's children are black
|
||||
rbn_set_red(other);
|
||||
node = parent;
|
||||
parent = rbn_parent(node);
|
||||
} else {
|
||||
if (!other->_right || rbn_is_black(other->_right)) {
|
||||
// Case 3: w is black and left child of w is red but
|
||||
// right
|
||||
// child is black
|
||||
rbn_set_black(other->_left);
|
||||
rbn_set_red(other);
|
||||
RightRotate(root, other);
|
||||
other = parent->_right;
|
||||
}
|
||||
// Case 4: w is black and right child of w is red,
|
||||
// regardless of
|
||||
// left child's color
|
||||
rbn_set_color(other, rbn_color(parent));
|
||||
rbn_set_black(parent);
|
||||
rbn_set_black(other->_right);
|
||||
LeftRotate(root, parent);
|
||||
node = root;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
other = parent->_left;
|
||||
if (rbn_is_red(other)) {
|
||||
// Case 1: w is red
|
||||
rbn_set_black(other);
|
||||
rbn_set_red(parent);
|
||||
RightRotate(root, parent);
|
||||
other = parent->_left;
|
||||
}
|
||||
if ((!other->_left || rbn_is_black(other->_left)) &&
|
||||
(!other->_right || rbn_is_black(other->_right))) {
|
||||
// Case 2: w is black and both children are black
|
||||
rbn_set_red(other);
|
||||
node = parent;
|
||||
parent = rbn_parent(node);
|
||||
} else {
|
||||
if (!other->_left || rbn_is_black(other->_left)) {
|
||||
// Case 3: w is black and left child of w is red whereas
|
||||
// right child is black
|
||||
rbn_set_black(other->_right);
|
||||
rbn_set_red(other);
|
||||
LeftRotate(root, other);
|
||||
other = parent->_left;
|
||||
}
|
||||
// Case 4:w is black and right child of w is red, regardless
|
||||
// of
|
||||
// the left child's color
|
||||
rbn_set_color(other, rbn_color(parent));
|
||||
rbn_set_black(parent);
|
||||
rbn_set_black(other->_left);
|
||||
RightRotate(root, parent);
|
||||
node = root;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (node)
|
||||
rbn_set_black(node);
|
||||
}
|
||||
|
||||
void Tree::Destroy(Node *&tree) {
|
||||
if (tree == NULL)
|
||||
return;
|
||||
|
||||
if (tree->_left != NULL)
|
||||
Destroy(tree->_left);
|
||||
if (tree->_right != NULL)
|
||||
Destroy(tree->_right);
|
||||
|
||||
delete tree;
|
||||
tree = NULL;
|
||||
}
|
||||
|
||||
void Tree::Destroy() { Destroy(_root); }
|
||||
|
||||
void Tree::Dump(Node *tree, Node::BlockPair pair, EDirection dir) {
|
||||
if (tree != NULL) {
|
||||
if (dir == EDirection::NONE)
|
||||
fprintf(stderr,
|
||||
"(%" PRIu64 ",%" PRIu64 ", mhs:(%" PRIu64 ",%" PRIu64
|
||||
"))(B) is root\n",
|
||||
rbn_offset(tree).ToInt(),
|
||||
rbn_size(tree).ToInt(),
|
||||
rbn_left_mhs(tree),
|
||||
rbn_right_mhs(tree));
|
||||
else
|
||||
fprintf(stderr,
|
||||
"(%" PRIu64 ",%" PRIu64 ",mhs:(%" PRIu64 ",%" PRIu64
|
||||
"))(%c) is %" PRIu64 "'s %s\n",
|
||||
rbn_offset(tree).ToInt(),
|
||||
rbn_size(tree).ToInt(),
|
||||
rbn_left_mhs(tree),
|
||||
rbn_right_mhs(tree),
|
||||
rbn_is_red(tree) ? 'R' : 'B',
|
||||
pair._offset.ToInt(),
|
||||
dir == EDirection::RIGHT ? "right child" : "left child");
|
||||
|
||||
Dump(tree->_left, tree->_hole, EDirection::LEFT);
|
||||
Dump(tree->_right, tree->_hole, EDirection::RIGHT);
|
||||
}
|
||||
}
|
||||
|
||||
uint64_t Tree::EffectiveSize(Node *node) {
|
||||
OUUInt64 offset = rbn_offset(node);
|
||||
OUUInt64 size = rbn_size(node);
|
||||
OUUInt64 end = offset + size;
|
||||
OUUInt64 aligned_offset(align(offset.ToInt(), _align));
|
||||
if (aligned_offset > end) {
|
||||
return 0;
|
||||
}
|
||||
return (end - aligned_offset).ToInt();
|
||||
}
|
||||
|
||||
void Tree::Dump() {
|
||||
if (_root != NULL)
|
||||
Dump(_root, _root->_hole, (EDirection)0);
|
||||
}
|
||||
|
||||
static void vis_bal_f(void *extra, Node *node, uint64_t depth) {
|
||||
uint64_t **p = (uint64_t **)extra;
|
||||
uint64_t min = *p[0];
|
||||
uint64_t max = *p[1];
|
||||
if (node->_left) {
|
||||
Node *left = node->_left;
|
||||
invariant(node == left->_parent);
|
||||
}
|
||||
|
||||
if (node->_right) {
|
||||
Node *right = node->_right;
|
||||
invariant(node == right->_parent);
|
||||
}
|
||||
|
||||
if (!node->_left || !node->_right) {
|
||||
if (min > depth) {
|
||||
*p[0] = depth;
|
||||
} else if (max < depth) {
|
||||
*p[1] = depth;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Tree::ValidateBalance() {
|
||||
uint64_t min_depth = 0xffffffffffffffff;
|
||||
uint64_t max_depth = 0;
|
||||
if (!_root) {
|
||||
return;
|
||||
}
|
||||
uint64_t *p[2] = {&min_depth, &max_depth};
|
||||
InOrderVisitor(vis_bal_f, (void *)p);
|
||||
invariant((min_depth + 1) * 2 >= max_depth + 1);
|
||||
}
|
||||
|
||||
static void vis_cmp_f(void *extra, Node *node, uint64_t UU(depth)) {
|
||||
Node::BlockPair **p = (Node::BlockPair **)extra;
|
||||
|
||||
invariant_notnull(*p);
|
||||
invariant((*p)->_offset == node->_hole._offset);
|
||||
|
||||
*p = *p + 1;
|
||||
}
|
||||
|
||||
// validate the input pairs matches with sorted pairs
|
||||
void Tree::ValidateInOrder(Node::BlockPair *pairs) {
|
||||
InOrderVisitor(vis_cmp_f, &pairs);
|
||||
}
|
||||
|
||||
uint64_t Tree::ValidateMhs(Node *node) {
|
||||
if (!node)
|
||||
return 0;
|
||||
else {
|
||||
uint64_t mhs_left = ValidateMhs(node->_left);
|
||||
uint64_t mhs_right = ValidateMhs(node->_right);
|
||||
if (mhs_left != rbn_left_mhs(node)) {
|
||||
printf("assert failure: mhs_left = %" PRIu64 "\n", mhs_left);
|
||||
Dump(node, node->_hole, (EDirection)0);
|
||||
}
|
||||
invariant(mhs_left == rbn_left_mhs(node));
|
||||
|
||||
if (mhs_right != rbn_right_mhs(node)) {
|
||||
printf("assert failure: mhs_right = %" PRIu64 "\n", mhs_right);
|
||||
Dump(node, node->_hole, (EDirection)0);
|
||||
}
|
||||
invariant(mhs_right == rbn_right_mhs(node));
|
||||
return std::max(EffectiveSize(node), std::max(mhs_left, mhs_right));
|
||||
}
|
||||
}
|
||||
|
||||
void Tree::ValidateMhs() {
|
||||
if (!_root)
|
||||
return;
|
||||
uint64_t mhs_left = ValidateMhs(_root->_left);
|
||||
uint64_t mhs_right = ValidateMhs(_root->_right);
|
||||
invariant(mhs_left == rbn_left_mhs(_root));
|
||||
invariant(mhs_right == rbn_right_mhs(_root));
|
||||
}
|
||||
|
||||
} // namespace MhsRbTree
|
351
storage/tokudb/PerconaFT/ft/serialize/rbtree_mhs.h
Normal file
351
storage/tokudb/PerconaFT/ft/serialize/rbtree_mhs.h
Normal file
|
@ -0,0 +1,351 @@
|
|||
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||
#ident "$Id$"
|
||||
/*======
|
||||
This file is part of PerconaFT.
|
||||
|
||||
|
||||
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||
|
||||
PerconaFT is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License, version 2,
|
||||
as published by the Free Software Foundation.
|
||||
|
||||
PerconaFT is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
----------------------------------------
|
||||
|
||||
PerconaFT is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License, version 3,
|
||||
as published by the Free Software Foundation.
|
||||
|
||||
PerconaFT is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||
======= */
|
||||
|
||||
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <db.h>
|
||||
|
||||
#include "portability/toku_pthread.h"
|
||||
#include "portability/toku_stdint.h"
|
||||
#include "portability/toku_stdlib.h"
|
||||
|
||||
// RBTree(Red-black tree) with max hole sizes for subtrees.
|
||||
|
||||
// This is a tentative data struct to improve the block allocation time
|
||||
// complexity from the linear time to the log time. Please be noted this DS only
|
||||
// supports first-fit for now. It is actually easier to do it with
|
||||
// best-fit.(just
|
||||
// sort by size).
|
||||
|
||||
// RBTree is a classic data struct with O(log(n)) for insertion, deletion and
|
||||
// search. Many years have seen its efficiency.
|
||||
|
||||
// a *hole* is the representation of an available BlockPair for allocation.
|
||||
// defined as (start_address,size) or (offset, size) interchangably.
|
||||
|
||||
// each node has a *label* to indicate a pair of the max hole sizes for its
|
||||
// subtree.
|
||||
|
||||
// We are implementing a RBTree with max hole sizes for subtree. It is a red
|
||||
// black tree that is sorted by the start_address but also labeld with the max
|
||||
// hole sizes of the subtrees.
|
||||
|
||||
// [(6,3)] -> [(offset, size)], the hole
|
||||
// [{2,5}] -> [{mhs_of_left, mhs_of_right}], the label
|
||||
/* / \ */
|
||||
// [(0, 1)] [(10, 5)]
|
||||
// [{0, 2}] [{0, 0}]
|
||||
/* \ */
|
||||
// [(3, 2)]
|
||||
// [{0, 0}]
|
||||
// request of allocation size=2 goes from root to [(3,2)].
|
||||
|
||||
// above example shows a simplified RBTree_max_holes.
|
||||
// it is easier to tell the search time is O(log(n)) as we can make a decision
|
||||
// on each descent until we get to the target.
|
||||
|
||||
// the only question is if we can keep the maintenance cost low -- and i think
|
||||
// it is not a problem becoz an insertion/deletion is only going to update the
|
||||
// max_hole_sizes of the nodes along the path from the root to the node to be
|
||||
// deleted/inserted. The path can be cached and search is anyway O(log(n)).
|
||||
|
||||
// unlike the typical rbtree, Tree has to handle the inserts and deletes
|
||||
// with more care: an allocation that triggers the delete might leave some
|
||||
// unused space which we can simply update the start_addr and size without
|
||||
// worrying overlapping. An free might not only mean the insertion but also
|
||||
// *merging* with the adjacent holes.
|
||||
|
||||
namespace MhsRbTree {
|
||||
|
||||
#define offset_t uint64_t
|
||||
enum class EColor { RED, BLACK };
|
||||
enum class EDirection { NONE = 0, LEFT, RIGHT };
|
||||
|
||||
// I am a bit tired of fixing overflow/underflow, just quickly craft some
|
||||
// int
|
||||
// class that has an infinity-like max value and prevents overflow and
|
||||
// underflow. If you got a file offset larger than MHS_MAX_VAL, it is not
|
||||
// a problem here. :-/ - JYM
|
||||
class OUUInt64 {
|
||||
public:
|
||||
static const uint64_t MHS_MAX_VAL = 0xffffffffffffffff;
|
||||
OUUInt64() : _value(0) {}
|
||||
OUUInt64(uint64_t s) : _value(s) {}
|
||||
bool operator<(const OUUInt64 &r) const {
|
||||
invariant(!(_value == MHS_MAX_VAL && r.ToInt() == MHS_MAX_VAL));
|
||||
return _value < r.ToInt();
|
||||
}
|
||||
bool operator>(const OUUInt64 &r) const {
|
||||
invariant(!(_value == MHS_MAX_VAL && r.ToInt() == MHS_MAX_VAL));
|
||||
return _value > r.ToInt();
|
||||
}
|
||||
bool operator<=(const OUUInt64 &r) const {
|
||||
invariant(!(_value == MHS_MAX_VAL && r.ToInt() == MHS_MAX_VAL));
|
||||
return _value <= r.ToInt();
|
||||
}
|
||||
bool operator>=(const OUUInt64 &r) const {
|
||||
invariant(!(_value == MHS_MAX_VAL && r.ToInt() == MHS_MAX_VAL));
|
||||
return _value >= r.ToInt();
|
||||
}
|
||||
OUUInt64 operator+(const OUUInt64 &r) const {
|
||||
if (_value == MHS_MAX_VAL || r.ToInt() == MHS_MAX_VAL) {
|
||||
OUUInt64 tmp(MHS_MAX_VAL);
|
||||
return tmp;
|
||||
} else {
|
||||
// detecting overflow
|
||||
invariant((MHS_MAX_VAL - _value) >= r.ToInt());
|
||||
uint64_t plus = _value + r.ToInt();
|
||||
OUUInt64 tmp(plus);
|
||||
return tmp;
|
||||
}
|
||||
}
|
||||
OUUInt64 operator-(const OUUInt64 &r) const {
|
||||
invariant(r.ToInt() != MHS_MAX_VAL);
|
||||
if (_value == MHS_MAX_VAL) {
|
||||
return *this;
|
||||
} else {
|
||||
invariant(_value >= r.ToInt());
|
||||
uint64_t minus = _value - r.ToInt();
|
||||
OUUInt64 tmp(minus);
|
||||
return tmp;
|
||||
}
|
||||
}
|
||||
OUUInt64 operator-=(const OUUInt64 &r) {
|
||||
if (_value != MHS_MAX_VAL) {
|
||||
invariant(r.ToInt() != MHS_MAX_VAL);
|
||||
invariant(_value >= r.ToInt());
|
||||
_value -= r.ToInt();
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
OUUInt64 operator+=(const OUUInt64 &r) {
|
||||
if (_value != MHS_MAX_VAL) {
|
||||
if (r.ToInt() == MHS_MAX_VAL) {
|
||||
_value = MHS_MAX_VAL;
|
||||
} else {
|
||||
invariant((MHS_MAX_VAL - _value) >= r.ToInt());
|
||||
this->_value += r.ToInt();
|
||||
}
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
bool operator==(const OUUInt64 &r) const {
|
||||
return _value == r.ToInt();
|
||||
}
|
||||
bool operator!=(const OUUInt64 &r) const {
|
||||
return _value != r.ToInt();
|
||||
}
|
||||
OUUInt64 operator=(const OUUInt64 &r) {
|
||||
_value = r.ToInt();
|
||||
return *this;
|
||||
}
|
||||
uint64_t ToInt() const { return _value; }
|
||||
|
||||
private:
|
||||
uint64_t _value;
|
||||
};
|
||||
|
||||
class Node {
|
||||
public:
|
||||
struct BlockPair {
|
||||
OUUInt64 _offset;
|
||||
OUUInt64 _size;
|
||||
|
||||
BlockPair() : _offset(0), _size(0) {}
|
||||
BlockPair(uint64_t o, uint64_t s) : _offset(o), _size(s) {}
|
||||
|
||||
BlockPair(OUUInt64 o, OUUInt64 s) : _offset(o), _size(s) {}
|
||||
int operator<(const struct BlockPair &rhs) const {
|
||||
return _offset < rhs._offset;
|
||||
}
|
||||
int operator<(const uint64_t &o) const { return _offset < o; }
|
||||
};
|
||||
|
||||
struct Pair {
|
||||
uint64_t _left;
|
||||
uint64_t _right;
|
||||
Pair(uint64_t l, uint64_t r) : _left(l), _right(r) {}
|
||||
};
|
||||
|
||||
EColor _color;
|
||||
struct BlockPair _hole;
|
||||
struct Pair _label;
|
||||
Node *_left;
|
||||
Node *_right;
|
||||
Node *_parent;
|
||||
|
||||
Node(EColor c,
|
||||
Node::BlockPair h,
|
||||
struct Pair lb,
|
||||
Node *l,
|
||||
Node *r,
|
||||
Node *p)
|
||||
: _color(c),
|
||||
_hole(h),
|
||||
_label(lb),
|
||||
_left(l),
|
||||
_right(r),
|
||||
_parent(p) {}
|
||||
};
|
||||
|
||||
class Tree {
|
||||
private:
|
||||
Node *_root;
|
||||
uint64_t _align;
|
||||
|
||||
public:
|
||||
Tree();
|
||||
Tree(uint64_t);
|
||||
~Tree();
|
||||
|
||||
void PreOrder();
|
||||
void InOrder();
|
||||
void PostOrder();
|
||||
// immutable operations
|
||||
Node *SearchByOffset(uint64_t addr);
|
||||
Node *SearchFirstFitBySize(uint64_t size);
|
||||
|
||||
Node *MinNode();
|
||||
Node *MaxNode();
|
||||
|
||||
Node *Successor(Node *);
|
||||
Node *Predecessor(Node *);
|
||||
|
||||
// mapped from tree_allocator::free_block
|
||||
int Insert(Node::BlockPair pair);
|
||||
// mapped from tree_allocator::alloc_block
|
||||
uint64_t Remove(size_t size);
|
||||
// mapped from tree_allocator::alloc_block_after
|
||||
|
||||
void RawRemove(uint64_t offset);
|
||||
void Destroy();
|
||||
// print the tree
|
||||
void Dump();
|
||||
// validation
|
||||
// balance
|
||||
void ValidateBalance();
|
||||
void ValidateInOrder(Node::BlockPair *);
|
||||
void InOrderVisitor(void (*f)(void *, Node *, uint64_t), void *);
|
||||
void ValidateMhs();
|
||||
|
||||
private:
|
||||
void PreOrder(Node *node) const;
|
||||
void InOrder(Node *node) const;
|
||||
void PostOrder(Node *node) const;
|
||||
Node *SearchByOffset(Node *node, offset_t addr) const;
|
||||
Node *SearchFirstFitBySize(Node *node, size_t size) const;
|
||||
|
||||
Node *MinNode(Node *node);
|
||||
Node *MaxNode(Node *node);
|
||||
|
||||
// rotations to fix up. we will have to update the labels too.
|
||||
void LeftRotate(Node *&root, Node *x);
|
||||
void RightRotate(Node *&root, Node *y);
|
||||
|
||||
int Insert(Node *&root, Node::BlockPair pair);
|
||||
int InsertFixup(Node *&root, Node *node);
|
||||
|
||||
void RawRemove(Node *&root, Node *node);
|
||||
uint64_t Remove(Node *&root, Node *node, size_t size);
|
||||
void RawRemoveFixup(Node *&root, Node *node, Node *parent);
|
||||
|
||||
void Destroy(Node *&tree);
|
||||
void Dump(Node *tree, Node::BlockPair pair, EDirection dir);
|
||||
void RecalculateMhs(Node *node);
|
||||
void IsNewNodeMergable(Node *, Node *, Node::BlockPair, bool *, bool *);
|
||||
void AbsorbNewNode(Node *, Node *, Node::BlockPair, bool, bool, bool);
|
||||
Node *SearchFirstFitBySizeHelper(Node *x, uint64_t size);
|
||||
|
||||
Node *SuccessorHelper(Node *y, Node *x);
|
||||
|
||||
Node *PredecessorHelper(Node *y, Node *x);
|
||||
|
||||
void InOrderVisitor(Node *,
|
||||
void (*f)(void *, Node *, uint64_t),
|
||||
void *,
|
||||
uint64_t);
|
||||
uint64_t ValidateMhs(Node *);
|
||||
|
||||
uint64_t EffectiveSize(Node *);
|
||||
// mixed with some macros.....
|
||||
#define rbn_parent(r) ((r)->_parent)
|
||||
#define rbn_color(r) ((r)->_color)
|
||||
#define rbn_is_red(r) ((r)->_color == EColor::RED)
|
||||
#define rbn_is_black(r) ((r)->_color == EColor::BLACK)
|
||||
#define rbn_set_black(r) \
|
||||
do { \
|
||||
(r)->_color = EColor::BLACK; \
|
||||
} while (0)
|
||||
#define rbn_set_red(r) \
|
||||
do { \
|
||||
(r)->_color = EColor::RED; \
|
||||
} while (0)
|
||||
#define rbn_set_parent(r, p) \
|
||||
do { \
|
||||
(r)->_parent = (p); \
|
||||
} while (0)
|
||||
#define rbn_set_color(r, c) \
|
||||
do { \
|
||||
(r)->_color = (c); \
|
||||
} while (0)
|
||||
#define rbn_set_offset(r) \
|
||||
do { \
|
||||
(r)->_hole._offset = (c); \
|
||||
} while (0)
|
||||
#define rbn_set_size(r, c) \
|
||||
do { \
|
||||
(r)->_hole._size = (c); \
|
||||
} while (0)
|
||||
#define rbn_set_left_mhs(r, c) \
|
||||
do { \
|
||||
(r)->_label._left = (c); \
|
||||
} while (0)
|
||||
#define rbn_set_right_mhs(r, c) \
|
||||
do { \
|
||||
(r)->_label._right = (c); \
|
||||
} while (0)
|
||||
#define rbn_size(r) ((r)->_hole._size)
|
||||
#define rbn_offset(r) ((r)->_hole._offset)
|
||||
#define rbn_key(r) ((r)->_hole._offset)
|
||||
#define rbn_left_mhs(r) ((r)->_label._left)
|
||||
#define rbn_right_mhs(r) ((r)->_label._right)
|
||||
#define mhs_of_subtree(y) \
|
||||
(std::max(std::max(rbn_left_mhs(y), rbn_right_mhs(y)), EffectiveSize(y)))
|
||||
};
|
||||
|
||||
} // namespace MhsRbTree
|
|
@ -1,126 +0,0 @@
|
|||
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||
#ident "$Id$"
|
||||
/*======
|
||||
This file is part of PerconaFT.
|
||||
|
||||
|
||||
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||
|
||||
PerconaFT is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License, version 2,
|
||||
as published by the Free Software Foundation.
|
||||
|
||||
PerconaFT is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
----------------------------------------
|
||||
|
||||
PerconaFT is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License, version 3,
|
||||
as published by the Free Software Foundation.
|
||||
|
||||
PerconaFT is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||
======= */
|
||||
|
||||
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||
|
||||
#include "ft/tests/test.h"
|
||||
|
||||
#include "ft/serialize/block_allocator_strategy.h"
|
||||
|
||||
static const uint64_t alignment = 4096;
|
||||
|
||||
static void test_first_vs_best_fit(void) {
|
||||
struct block_allocator::blockpair pairs[] = {
|
||||
block_allocator::blockpair(1 * alignment, 6 * alignment),
|
||||
// hole between 7x align -> 8x align
|
||||
block_allocator::blockpair(8 * alignment, 4 * alignment),
|
||||
// hole between 12x align -> 16x align
|
||||
block_allocator::blockpair(16 * alignment, 1 * alignment),
|
||||
block_allocator::blockpair(17 * alignment, 2 * alignment),
|
||||
// hole between 19 align -> 21x align
|
||||
block_allocator::blockpair(21 * alignment, 2 * alignment),
|
||||
};
|
||||
const uint64_t n_blocks = sizeof(pairs) / sizeof(pairs[0]);
|
||||
|
||||
block_allocator::blockpair *bp;
|
||||
|
||||
// first fit
|
||||
bp = block_allocator_strategy::first_fit(pairs, n_blocks, 100, alignment);
|
||||
assert(bp == &pairs[0]);
|
||||
bp = block_allocator_strategy::first_fit(pairs, n_blocks, 4096, alignment);
|
||||
assert(bp == &pairs[0]);
|
||||
bp = block_allocator_strategy::first_fit(pairs, n_blocks, 3 * 4096, alignment);
|
||||
assert(bp == &pairs[1]);
|
||||
bp = block_allocator_strategy::first_fit(pairs, n_blocks, 5 * 4096, alignment);
|
||||
assert(bp == nullptr);
|
||||
|
||||
// best fit
|
||||
bp = block_allocator_strategy::best_fit(pairs, n_blocks, 100, alignment);
|
||||
assert(bp == &pairs[0]);
|
||||
bp = block_allocator_strategy::best_fit(pairs, n_blocks, 4100, alignment);
|
||||
assert(bp == &pairs[3]);
|
||||
bp = block_allocator_strategy::best_fit(pairs, n_blocks, 3 * 4096, alignment);
|
||||
assert(bp == &pairs[1]);
|
||||
bp = block_allocator_strategy::best_fit(pairs, n_blocks, 5 * 4096, alignment);
|
||||
assert(bp == nullptr);
|
||||
}
|
||||
|
||||
static void test_padded_fit(void) {
|
||||
struct block_allocator::blockpair pairs[] = {
|
||||
block_allocator::blockpair(1 * alignment, 1 * alignment),
|
||||
// 4096 byte hole after bp[0]
|
||||
block_allocator::blockpair(3 * alignment, 1 * alignment),
|
||||
// 8192 byte hole after bp[1]
|
||||
block_allocator::blockpair(6 * alignment, 1 * alignment),
|
||||
// 16384 byte hole after bp[2]
|
||||
block_allocator::blockpair(11 * alignment, 1 * alignment),
|
||||
// 32768 byte hole after bp[3]
|
||||
block_allocator::blockpair(17 * alignment, 1 * alignment),
|
||||
// 116kb hole after bp[4]
|
||||
block_allocator::blockpair(113 * alignment, 1 * alignment),
|
||||
// 256kb hole after bp[5]
|
||||
block_allocator::blockpair(371 * alignment, 1 * alignment),
|
||||
};
|
||||
const uint64_t n_blocks = sizeof(pairs) / sizeof(pairs[0]);
|
||||
|
||||
block_allocator::blockpair *bp;
|
||||
|
||||
// padding for a 100 byte allocation will be < than standard alignment,
|
||||
// so it should fit in the first 4096 byte hole.
|
||||
bp = block_allocator_strategy::padded_fit(pairs, n_blocks, 4000, alignment);
|
||||
assert(bp == &pairs[0]);
|
||||
|
||||
// Even padded, a 12kb alloc will fit in a 16kb hole
|
||||
bp = block_allocator_strategy::padded_fit(pairs, n_blocks, 3 * alignment, alignment);
|
||||
assert(bp == &pairs[2]);
|
||||
|
||||
// would normally fit in the 116kb hole but the padding will bring it over
|
||||
bp = block_allocator_strategy::padded_fit(pairs, n_blocks, 116 * alignment, alignment);
|
||||
assert(bp == &pairs[5]);
|
||||
|
||||
bp = block_allocator_strategy::padded_fit(pairs, n_blocks, 127 * alignment, alignment);
|
||||
assert(bp == &pairs[5]);
|
||||
}
|
||||
|
||||
int test_main(int argc, const char *argv[]) {
|
||||
(void) argc;
|
||||
(void) argv;
|
||||
|
||||
test_first_vs_best_fit();
|
||||
test_padded_fit();
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -38,253 +38,243 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
|||
|
||||
#include "test.h"
|
||||
|
||||
static void ba_alloc(block_allocator *ba, uint64_t size, uint64_t *answer) {
|
||||
ba->validate();
|
||||
static void ba_alloc(BlockAllocator *ba, uint64_t size, uint64_t *answer) {
|
||||
ba->Validate();
|
||||
uint64_t actual_answer;
|
||||
const uint64_t heat = random() % 2;
|
||||
ba->alloc_block(512 * size, heat, &actual_answer);
|
||||
ba->validate();
|
||||
ba->AllocBlock(512 * size, &actual_answer);
|
||||
ba->Validate();
|
||||
|
||||
assert(actual_answer%512==0);
|
||||
*answer = actual_answer/512;
|
||||
invariant(actual_answer % 512 == 0);
|
||||
*answer = actual_answer / 512;
|
||||
}
|
||||
|
||||
static void ba_free(block_allocator *ba, uint64_t offset) {
|
||||
ba->validate();
|
||||
ba->free_block(offset * 512);
|
||||
ba->validate();
|
||||
static void ba_free(BlockAllocator *ba, uint64_t offset, uint64_t size) {
|
||||
ba->Validate();
|
||||
ba->FreeBlock(offset * 512, 512 * size);
|
||||
ba->Validate();
|
||||
}
|
||||
|
||||
static void ba_check_l(block_allocator *ba, uint64_t blocknum_in_layout_order,
|
||||
uint64_t expected_offset, uint64_t expected_size) {
|
||||
static void ba_check_l(BlockAllocator *ba,
|
||||
uint64_t blocknum_in_layout_order,
|
||||
uint64_t expected_offset,
|
||||
uint64_t expected_size) {
|
||||
uint64_t actual_offset, actual_size;
|
||||
int r = ba->get_nth_block_in_layout_order(blocknum_in_layout_order, &actual_offset, &actual_size);
|
||||
assert(r==0);
|
||||
assert(expected_offset*512 == actual_offset);
|
||||
assert(expected_size *512 == actual_size);
|
||||
int r = ba->NthBlockInLayoutOrder(
|
||||
blocknum_in_layout_order, &actual_offset, &actual_size);
|
||||
invariant(r == 0);
|
||||
invariant(expected_offset * 512 == actual_offset);
|
||||
invariant(expected_size * 512 == actual_size);
|
||||
}
|
||||
|
||||
static void ba_check_none(block_allocator *ba, uint64_t blocknum_in_layout_order) {
|
||||
static void ba_check_none(BlockAllocator *ba,
|
||||
uint64_t blocknum_in_layout_order) {
|
||||
uint64_t actual_offset, actual_size;
|
||||
int r = ba->get_nth_block_in_layout_order(blocknum_in_layout_order, &actual_offset, &actual_size);
|
||||
assert(r==-1);
|
||||
int r = ba->NthBlockInLayoutOrder(
|
||||
blocknum_in_layout_order, &actual_offset, &actual_size);
|
||||
invariant(r == -1);
|
||||
}
|
||||
|
||||
|
||||
// Simple block allocator test
|
||||
static void test_ba0(block_allocator::allocation_strategy strategy) {
|
||||
block_allocator allocator;
|
||||
block_allocator *ba = &allocator;
|
||||
ba->create(100*512, 1*512);
|
||||
ba->set_strategy(strategy);
|
||||
assert(ba->allocated_limit()==100*512);
|
||||
static void test_ba0() {
|
||||
BlockAllocator allocator;
|
||||
BlockAllocator *ba = &allocator;
|
||||
ba->Create(100 * 512, 1 * 512);
|
||||
invariant(ba->AllocatedLimit() == 100 * 512);
|
||||
|
||||
uint64_t b2, b3, b4, b5, b6, b7;
|
||||
ba_alloc(ba, 100, &b2);
|
||||
ba_alloc(ba, 100, &b3);
|
||||
ba_alloc(ba, 100, &b4);
|
||||
ba_alloc(ba, 100, &b5);
|
||||
ba_alloc(ba, 100, &b6);
|
||||
ba_alloc(ba, 100, &b7);
|
||||
ba_free(ba, b2);
|
||||
ba_alloc(ba, 100, &b2);
|
||||
ba_free(ba, b4);
|
||||
ba_free(ba, b6);
|
||||
ba_alloc(ba, 100, &b2);
|
||||
ba_alloc(ba, 100, &b3);
|
||||
ba_alloc(ba, 100, &b4);
|
||||
ba_alloc(ba, 100, &b5);
|
||||
ba_alloc(ba, 100, &b6);
|
||||
ba_alloc(ba, 100, &b7);
|
||||
ba_free(ba, b2, 100);
|
||||
ba_alloc(ba, 100, &b2);
|
||||
ba_free(ba, b4, 100);
|
||||
ba_free(ba, b6, 100);
|
||||
uint64_t b8, b9;
|
||||
ba_alloc(ba, 100, &b4);
|
||||
ba_free(ba, b2);
|
||||
ba_alloc(ba, 100, &b6);
|
||||
ba_alloc(ba, 100, &b8);
|
||||
ba_alloc(ba, 100, &b9);
|
||||
ba_free(ba, b6);
|
||||
ba_free(ba, b7);
|
||||
ba_free(ba, b8);
|
||||
ba_alloc(ba, 100, &b6);
|
||||
ba_alloc(ba, 100, &b7);
|
||||
ba_free(ba, b4);
|
||||
ba_alloc(ba, 100, &b4);
|
||||
ba_alloc(ba, 100, &b4);
|
||||
ba_free(ba, b2, 100);
|
||||
ba_alloc(ba, 100, &b6);
|
||||
ba_alloc(ba, 100, &b8);
|
||||
ba_alloc(ba, 100, &b9);
|
||||
ba_free(ba, b6, 100);
|
||||
ba_free(ba, b7, 100);
|
||||
ba_free(ba, b8, 100);
|
||||
ba_alloc(ba, 100, &b6);
|
||||
ba_alloc(ba, 100, &b7);
|
||||
ba_free(ba, b4, 100);
|
||||
ba_alloc(ba, 100, &b4);
|
||||
|
||||
ba->destroy();
|
||||
ba->Destroy();
|
||||
}
|
||||
|
||||
// Manually to get coverage of all the code in the block allocator.
|
||||
static void
|
||||
test_ba1(block_allocator::allocation_strategy strategy, int n_initial) {
|
||||
block_allocator allocator;
|
||||
block_allocator *ba = &allocator;
|
||||
ba->create(0*512, 1*512);
|
||||
ba->set_strategy(strategy);
|
||||
static void test_ba1(int n_initial) {
|
||||
BlockAllocator allocator;
|
||||
BlockAllocator *ba = &allocator;
|
||||
ba->Create(0 * 512, 1 * 512);
|
||||
|
||||
int n_blocks=0;
|
||||
int n_blocks = 0;
|
||||
uint64_t blocks[1000];
|
||||
for (int i = 0; i < 1000; i++) {
|
||||
if (i < n_initial || random() % 2 == 0) {
|
||||
if (n_blocks < 1000) {
|
||||
ba_alloc(ba, 1, &blocks[n_blocks]);
|
||||
//printf("A[%d]=%ld\n", n_blocks, blocks[n_blocks]);
|
||||
n_blocks++;
|
||||
}
|
||||
} else {
|
||||
if (n_blocks > 0) {
|
||||
int blocknum = random()%n_blocks;
|
||||
//printf("F[%d]%ld\n", blocknum, blocks[blocknum]);
|
||||
ba_free(ba, blocks[blocknum]);
|
||||
blocks[blocknum]=blocks[n_blocks-1];
|
||||
n_blocks--;
|
||||
}
|
||||
}
|
||||
if (i < n_initial || random() % 2 == 0) {
|
||||
if (n_blocks < 1000) {
|
||||
ba_alloc(ba, 1, &blocks[n_blocks]);
|
||||
// printf("A[%d]=%ld\n", n_blocks, blocks[n_blocks]);
|
||||
n_blocks++;
|
||||
}
|
||||
} else {
|
||||
if (n_blocks > 0) {
|
||||
int blocknum = random() % n_blocks;
|
||||
// printf("F[%d]=%ld\n", blocknum, blocks[blocknum]);
|
||||
ba_free(ba, blocks[blocknum], 1);
|
||||
blocks[blocknum] = blocks[n_blocks - 1];
|
||||
n_blocks--;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ba->destroy();
|
||||
|
||||
ba->Destroy();
|
||||
}
|
||||
|
||||
|
||||
// Check to see if it is first fit or best fit.
|
||||
static void
|
||||
test_ba2 (void)
|
||||
{
|
||||
block_allocator allocator;
|
||||
block_allocator *ba = &allocator;
|
||||
static void test_ba2(void) {
|
||||
BlockAllocator allocator;
|
||||
BlockAllocator *ba = &allocator;
|
||||
uint64_t b[6];
|
||||
enum { BSIZE = 1024 };
|
||||
ba->create(100*512, BSIZE*512);
|
||||
ba->set_strategy(block_allocator::BA_STRATEGY_FIRST_FIT);
|
||||
assert(ba->allocated_limit()==100*512);
|
||||
ba->Create(100 * 512, BSIZE * 512);
|
||||
invariant(ba->AllocatedLimit() == 100 * 512);
|
||||
|
||||
ba_check_l (ba, 0, 0, 100);
|
||||
ba_check_none (ba, 1);
|
||||
ba_check_l(ba, 0, 0, 100);
|
||||
ba_check_none(ba, 1);
|
||||
|
||||
ba_alloc (ba, 100, &b[0]);
|
||||
ba_check_l (ba, 0, 0, 100);
|
||||
ba_check_l (ba, 1, BSIZE, 100);
|
||||
ba_check_none (ba, 2);
|
||||
ba_alloc(ba, 100, &b[0]);
|
||||
ba_check_l(ba, 0, 0, 100);
|
||||
ba_check_l(ba, 1, BSIZE, 100);
|
||||
ba_check_none(ba, 2);
|
||||
|
||||
ba_alloc (ba, BSIZE + 100, &b[1]);
|
||||
ba_check_l (ba, 0, 0, 100);
|
||||
ba_check_l (ba, 1, BSIZE, 100);
|
||||
ba_check_l (ba, 2, 2*BSIZE, BSIZE + 100);
|
||||
ba_check_none (ba, 3);
|
||||
ba_alloc(ba, BSIZE + 100, &b[1]);
|
||||
ba_check_l(ba, 0, 0, 100);
|
||||
ba_check_l(ba, 1, BSIZE, 100);
|
||||
ba_check_l(ba, 2, 2 * BSIZE, BSIZE + 100);
|
||||
ba_check_none(ba, 3);
|
||||
|
||||
ba_alloc (ba, 100, &b[2]);
|
||||
ba_check_l (ba, 0, 0, 100);
|
||||
ba_check_l (ba, 1, BSIZE, 100);
|
||||
ba_check_l (ba, 2, 2*BSIZE, BSIZE + 100);
|
||||
ba_check_l (ba, 3, 4*BSIZE, 100);
|
||||
ba_check_none (ba, 4);
|
||||
ba_alloc(ba, 100, &b[2]);
|
||||
ba_check_l(ba, 0, 0, 100);
|
||||
ba_check_l(ba, 1, BSIZE, 100);
|
||||
ba_check_l(ba, 2, 2 * BSIZE, BSIZE + 100);
|
||||
ba_check_l(ba, 3, 4 * BSIZE, 100);
|
||||
ba_check_none(ba, 4);
|
||||
|
||||
ba_alloc (ba, 100, &b[3]);
|
||||
ba_alloc (ba, 100, &b[4]);
|
||||
ba_alloc (ba, 100, &b[5]);
|
||||
ba_check_l (ba, 0, 0, 100);
|
||||
ba_check_l (ba, 1, BSIZE, 100);
|
||||
ba_check_l (ba, 2, 2*BSIZE, BSIZE + 100);
|
||||
ba_check_l (ba, 3, 4*BSIZE, 100);
|
||||
ba_check_l (ba, 4, 5*BSIZE, 100);
|
||||
ba_check_l (ba, 5, 6*BSIZE, 100);
|
||||
ba_check_l (ba, 6, 7*BSIZE, 100);
|
||||
ba_check_none (ba, 7);
|
||||
|
||||
ba_free (ba, 4*BSIZE);
|
||||
ba_check_l (ba, 0, 0, 100);
|
||||
ba_check_l (ba, 1, BSIZE, 100);
|
||||
ba_check_l (ba, 2, 2*BSIZE, BSIZE + 100);
|
||||
ba_check_l (ba, 3, 5*BSIZE, 100);
|
||||
ba_check_l (ba, 4, 6*BSIZE, 100);
|
||||
ba_check_l (ba, 5, 7*BSIZE, 100);
|
||||
ba_check_none (ba, 6);
|
||||
ba_alloc(ba, 100, &b[3]);
|
||||
ba_alloc(ba, 100, &b[4]);
|
||||
ba_alloc(ba, 100, &b[5]);
|
||||
ba_check_l(ba, 0, 0, 100);
|
||||
ba_check_l(ba, 1, BSIZE, 100);
|
||||
ba_check_l(ba, 2, 2 * BSIZE, BSIZE + 100);
|
||||
ba_check_l(ba, 3, 4 * BSIZE, 100);
|
||||
ba_check_l(ba, 4, 5 * BSIZE, 100);
|
||||
ba_check_l(ba, 5, 6 * BSIZE, 100);
|
||||
ba_check_l(ba, 6, 7 * BSIZE, 100);
|
||||
ba_check_none(ba, 7);
|
||||
|
||||
ba_free(ba, 4 * BSIZE, 100);
|
||||
ba_check_l(ba, 0, 0, 100);
|
||||
ba_check_l(ba, 1, BSIZE, 100);
|
||||
ba_check_l(ba, 2, 2 * BSIZE, BSIZE + 100);
|
||||
ba_check_l(ba, 3, 5 * BSIZE, 100);
|
||||
ba_check_l(ba, 4, 6 * BSIZE, 100);
|
||||
ba_check_l(ba, 5, 7 * BSIZE, 100);
|
||||
ba_check_none(ba, 6);
|
||||
|
||||
uint64_t b2;
|
||||
ba_alloc(ba, 100, &b2);
|
||||
assert(b2==4*BSIZE);
|
||||
ba_check_l (ba, 0, 0, 100);
|
||||
ba_check_l (ba, 1, BSIZE, 100);
|
||||
ba_check_l (ba, 2, 2*BSIZE, BSIZE + 100);
|
||||
ba_check_l (ba, 3, 4*BSIZE, 100);
|
||||
ba_check_l (ba, 4, 5*BSIZE, 100);
|
||||
ba_check_l (ba, 5, 6*BSIZE, 100);
|
||||
ba_check_l (ba, 6, 7*BSIZE, 100);
|
||||
ba_check_none (ba, 7);
|
||||
invariant(b2 == 4 * BSIZE);
|
||||
ba_check_l(ba, 0, 0, 100);
|
||||
ba_check_l(ba, 1, BSIZE, 100);
|
||||
ba_check_l(ba, 2, 2 * BSIZE, BSIZE + 100);
|
||||
ba_check_l(ba, 3, 4 * BSIZE, 100);
|
||||
ba_check_l(ba, 4, 5 * BSIZE, 100);
|
||||
ba_check_l(ba, 5, 6 * BSIZE, 100);
|
||||
ba_check_l(ba, 6, 7 * BSIZE, 100);
|
||||
ba_check_none(ba, 7);
|
||||
|
||||
ba_free (ba, BSIZE);
|
||||
ba_free (ba, 5*BSIZE);
|
||||
ba_check_l (ba, 0, 0, 100);
|
||||
ba_check_l (ba, 1, 2*BSIZE, BSIZE + 100);
|
||||
ba_check_l (ba, 2, 4*BSIZE, 100);
|
||||
ba_check_l (ba, 3, 6*BSIZE, 100);
|
||||
ba_check_l (ba, 4, 7*BSIZE, 100);
|
||||
ba_check_none (ba, 5);
|
||||
ba_free(ba, BSIZE, 100);
|
||||
ba_free(ba, 5 * BSIZE, 100);
|
||||
ba_check_l(ba, 0, 0, 100);
|
||||
ba_check_l(ba, 1, 2 * BSIZE, BSIZE + 100);
|
||||
ba_check_l(ba, 2, 4 * BSIZE, 100);
|
||||
ba_check_l(ba, 3, 6 * BSIZE, 100);
|
||||
ba_check_l(ba, 4, 7 * BSIZE, 100);
|
||||
ba_check_none(ba, 5);
|
||||
|
||||
// This alloc will allocate the first block after the reserve space in the case of first fit.
|
||||
// This alloc will allocate the first block after the reserve space in the
|
||||
// case of first fit.
|
||||
uint64_t b3;
|
||||
ba_alloc(ba, 100, &b3);
|
||||
assert(b3== BSIZE); // First fit.
|
||||
invariant(b3 == BSIZE); // First fit.
|
||||
// if (b3==5*BSIZE) then it is next fit.
|
||||
|
||||
// Now 5*BSIZE is free
|
||||
uint64_t b5;
|
||||
ba_alloc(ba, 100, &b5);
|
||||
assert(b5==5*BSIZE);
|
||||
ba_check_l (ba, 0, 0, 100);
|
||||
ba_check_l (ba, 1, BSIZE, 100);
|
||||
ba_check_l (ba, 2, 2*BSIZE, BSIZE + 100);
|
||||
ba_check_l (ba, 3, 4*BSIZE, 100);
|
||||
ba_check_l (ba, 4, 5*BSIZE, 100);
|
||||
ba_check_l (ba, 5, 6*BSIZE, 100);
|
||||
ba_check_l (ba, 6, 7*BSIZE, 100);
|
||||
ba_check_none (ba, 7);
|
||||
invariant(b5 == 5 * BSIZE);
|
||||
ba_check_l(ba, 0, 0, 100);
|
||||
ba_check_l(ba, 1, BSIZE, 100);
|
||||
ba_check_l(ba, 2, 2 * BSIZE, BSIZE + 100);
|
||||
ba_check_l(ba, 3, 4 * BSIZE, 100);
|
||||
ba_check_l(ba, 4, 5 * BSIZE, 100);
|
||||
ba_check_l(ba, 5, 6 * BSIZE, 100);
|
||||
ba_check_l(ba, 6, 7 * BSIZE, 100);
|
||||
ba_check_none(ba, 7);
|
||||
|
||||
// Now all blocks are busy
|
||||
uint64_t b6, b7, b8;
|
||||
ba_alloc(ba, 100, &b6);
|
||||
ba_alloc(ba, 100, &b7);
|
||||
ba_alloc(ba, 100, &b8);
|
||||
assert(b6==8*BSIZE);
|
||||
assert(b7==9*BSIZE);
|
||||
assert(b8==10*BSIZE);
|
||||
ba_check_l (ba, 0, 0, 100);
|
||||
ba_check_l (ba, 1, BSIZE, 100);
|
||||
ba_check_l (ba, 2, 2*BSIZE, BSIZE + 100);
|
||||
ba_check_l (ba, 3, 4*BSIZE, 100);
|
||||
ba_check_l (ba, 4, 5*BSIZE, 100);
|
||||
ba_check_l (ba, 5, 6*BSIZE, 100);
|
||||
ba_check_l (ba, 6, 7*BSIZE, 100);
|
||||
ba_check_l (ba, 7, 8*BSIZE, 100);
|
||||
ba_check_l (ba, 8, 9*BSIZE, 100);
|
||||
ba_check_l (ba, 9, 10*BSIZE, 100);
|
||||
ba_check_none (ba, 10);
|
||||
|
||||
ba_free(ba, 9*BSIZE);
|
||||
ba_free(ba, 7*BSIZE);
|
||||
invariant(b6 == 8 * BSIZE);
|
||||
invariant(b7 == 9 * BSIZE);
|
||||
invariant(b8 == 10 * BSIZE);
|
||||
ba_check_l(ba, 0, 0, 100);
|
||||
ba_check_l(ba, 1, BSIZE, 100);
|
||||
ba_check_l(ba, 2, 2 * BSIZE, BSIZE + 100);
|
||||
ba_check_l(ba, 3, 4 * BSIZE, 100);
|
||||
ba_check_l(ba, 4, 5 * BSIZE, 100);
|
||||
ba_check_l(ba, 5, 6 * BSIZE, 100);
|
||||
ba_check_l(ba, 6, 7 * BSIZE, 100);
|
||||
ba_check_l(ba, 7, 8 * BSIZE, 100);
|
||||
ba_check_l(ba, 8, 9 * BSIZE, 100);
|
||||
ba_check_l(ba, 9, 10 * BSIZE, 100);
|
||||
ba_check_none(ba, 10);
|
||||
|
||||
ba_free(ba, 9 * BSIZE, 100);
|
||||
ba_free(ba, 7 * BSIZE, 100);
|
||||
uint64_t b9;
|
||||
ba_alloc(ba, 100, &b9);
|
||||
assert(b9==7*BSIZE);
|
||||
invariant(b9 == 7 * BSIZE);
|
||||
|
||||
ba_free(ba, 5*BSIZE);
|
||||
ba_free(ba, 2*BSIZE);
|
||||
ba_free(ba, 5 * BSIZE, 100);
|
||||
ba_free(ba, 2 * BSIZE, BSIZE + 100);
|
||||
uint64_t b10, b11;
|
||||
ba_alloc(ba, 100, &b10);
|
||||
assert(b10==2*BSIZE);
|
||||
invariant(b10 == 2 * BSIZE);
|
||||
ba_alloc(ba, 100, &b11);
|
||||
assert(b11==3*BSIZE);
|
||||
invariant(b11 == 3 * BSIZE);
|
||||
ba_alloc(ba, 100, &b11);
|
||||
assert(b11==5*BSIZE);
|
||||
invariant(b11 == 5 * BSIZE);
|
||||
|
||||
ba->destroy();
|
||||
ba->Destroy();
|
||||
}
|
||||
|
||||
int
|
||||
test_main (int argc __attribute__((__unused__)), const char *argv[] __attribute__((__unused__))) {
|
||||
enum block_allocator::allocation_strategy strategies[] = {
|
||||
block_allocator::BA_STRATEGY_FIRST_FIT,
|
||||
block_allocator::BA_STRATEGY_BEST_FIT,
|
||||
block_allocator::BA_STRATEGY_PADDED_FIT,
|
||||
block_allocator::BA_STRATEGY_HEAT_ZONE,
|
||||
};
|
||||
for (size_t i = 0; i < sizeof(strategies) / sizeof(strategies[0]); i++) {
|
||||
test_ba0(strategies[i]);
|
||||
test_ba1(strategies[i], 0);
|
||||
test_ba1(strategies[i], 10);
|
||||
test_ba1(strategies[i], 20);
|
||||
}
|
||||
int test_main(int argc __attribute__((__unused__)),
|
||||
const char *argv[] __attribute__((__unused__))) {
|
||||
test_ba0();
|
||||
test_ba1(0);
|
||||
test_ba1(10);
|
||||
test_ba1(20);
|
||||
test_ba2();
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -45,7 +45,7 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
|||
// #5978 is fixed. Here is what we do. We have four pairs with
|
||||
// blocknums and fullhashes of 1,2,3,4. The cachetable has only
|
||||
// two bucket mutexes, so 1 and 3 share a pair mutex, as do 2 and 4.
|
||||
// We pin all four with expensive write locks. Then, on backgroud threads,
|
||||
// We pin all four with expensive write locks. Then, on background threads,
|
||||
// we call get_and_pin_nonblocking on 3, where the unlockers unpins 2, and
|
||||
// we call get_and_pin_nonblocking on 4, where the unlockers unpins 1. Run this
|
||||
// enough times, and we should see a deadlock before the fix, and no deadlock
|
||||
|
|
|
@ -77,7 +77,7 @@ flush (
|
|||
|
||||
//
|
||||
// test the following things for simple cloning:
|
||||
// - verifies that after teh checkpoint ends, the PAIR is properly
|
||||
// - verifies that after the checkpoint ends, the PAIR is properly
|
||||
// dirty or clean based on the second unpin
|
||||
//
|
||||
static void
|
||||
|
|
|
@ -38,69 +38,72 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
|||
|
||||
#include "test.h"
|
||||
|
||||
static int
|
||||
int64_key_cmp (DB *db UU(), const DBT *a, const DBT *b) {
|
||||
int64_t x = *(int64_t *) a->data;
|
||||
int64_t y = *(int64_t *) b->data;
|
||||
static int int64_key_cmp(DB *db UU(), const DBT *a, const DBT *b) {
|
||||
int64_t x = *(int64_t *)a->data;
|
||||
int64_t y = *(int64_t *)b->data;
|
||||
|
||||
if (x<y) return -1;
|
||||
if (x>y) return 1;
|
||||
if (x < y)
|
||||
return -1;
|
||||
if (x > y)
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void
|
||||
test_prefetch_read(int fd, FT_HANDLE UU(ft), FT ft_h) {
|
||||
static void test_prefetch_read(int fd, FT_HANDLE UU(ft), FT ft_h) {
|
||||
int r;
|
||||
FT_CURSOR XMALLOC(cursor);
|
||||
FTNODE dn = NULL;
|
||||
PAIR_ATTR attr;
|
||||
|
||||
|
||||
// first test that prefetching everything should work
|
||||
memset(&cursor->range_lock_left_key, 0 , sizeof(DBT));
|
||||
memset(&cursor->range_lock_right_key, 0 , sizeof(DBT));
|
||||
memset(&cursor->range_lock_left_key, 0, sizeof(DBT));
|
||||
memset(&cursor->range_lock_right_key, 0, sizeof(DBT));
|
||||
cursor->left_is_neg_infty = true;
|
||||
cursor->right_is_pos_infty = true;
|
||||
cursor->disable_prefetching = false;
|
||||
|
||||
|
||||
ftnode_fetch_extra bfe;
|
||||
|
||||
// quick test to see that we have the right behavior when we set
|
||||
// disable_prefetching to true
|
||||
cursor->disable_prefetching = true;
|
||||
bfe.create_for_prefetch( ft_h, cursor);
|
||||
bfe.create_for_prefetch(ft_h, cursor);
|
||||
FTNODE_DISK_DATA ndd = NULL;
|
||||
r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd, &bfe);
|
||||
assert(r==0);
|
||||
assert(dn->n_children == 3);
|
||||
assert(BP_STATE(dn,0) == PT_ON_DISK);
|
||||
assert(BP_STATE(dn,1) == PT_ON_DISK);
|
||||
assert(BP_STATE(dn,2) == PT_ON_DISK);
|
||||
r = toku_deserialize_ftnode_from(
|
||||
fd, make_blocknum(20), 0 /*pass zero for hash*/, &dn, &ndd, &bfe);
|
||||
invariant(r == 0);
|
||||
invariant(dn->n_children == 3);
|
||||
invariant(BP_STATE(dn, 0) == PT_ON_DISK);
|
||||
invariant(BP_STATE(dn, 1) == PT_ON_DISK);
|
||||
invariant(BP_STATE(dn, 2) == PT_ON_DISK);
|
||||
r = toku_ftnode_pf_callback(dn, ndd, &bfe, fd, &attr);
|
||||
assert(BP_STATE(dn,0) == PT_ON_DISK);
|
||||
assert(BP_STATE(dn,1) == PT_ON_DISK);
|
||||
assert(BP_STATE(dn,2) == PT_ON_DISK);
|
||||
invariant(BP_STATE(dn, 0) == PT_ON_DISK);
|
||||
invariant(BP_STATE(dn, 1) == PT_ON_DISK);
|
||||
invariant(BP_STATE(dn, 2) == PT_ON_DISK);
|
||||
bfe.destroy();
|
||||
toku_ftnode_free(&dn);
|
||||
toku_free(ndd);
|
||||
|
||||
// now enable prefetching again
|
||||
cursor->disable_prefetching = false;
|
||||
|
||||
bfe.create_for_prefetch( ft_h, cursor);
|
||||
r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd, &bfe);
|
||||
assert(r==0);
|
||||
assert(dn->n_children == 3);
|
||||
assert(BP_STATE(dn,0) == PT_AVAIL);
|
||||
assert(BP_STATE(dn,1) == PT_AVAIL);
|
||||
assert(BP_STATE(dn,2) == PT_AVAIL);
|
||||
toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
|
||||
assert(BP_STATE(dn,0) == PT_COMPRESSED);
|
||||
assert(BP_STATE(dn,1) == PT_COMPRESSED);
|
||||
assert(BP_STATE(dn,2) == PT_COMPRESSED);
|
||||
|
||||
bfe.create_for_prefetch(ft_h, cursor);
|
||||
r = toku_deserialize_ftnode_from(
|
||||
fd, make_blocknum(20), 0 /*pass zero for hash*/, &dn, &ndd, &bfe);
|
||||
invariant(r == 0);
|
||||
invariant(dn->n_children == 3);
|
||||
invariant(BP_STATE(dn, 0) == PT_AVAIL);
|
||||
invariant(BP_STATE(dn, 1) == PT_AVAIL);
|
||||
invariant(BP_STATE(dn, 2) == PT_AVAIL);
|
||||
toku_ftnode_pe_callback(
|
||||
dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
|
||||
invariant(BP_STATE(dn, 0) == PT_COMPRESSED);
|
||||
invariant(BP_STATE(dn, 1) == PT_COMPRESSED);
|
||||
invariant(BP_STATE(dn, 2) == PT_COMPRESSED);
|
||||
r = toku_ftnode_pf_callback(dn, ndd, &bfe, fd, &attr);
|
||||
assert(BP_STATE(dn,0) == PT_AVAIL);
|
||||
assert(BP_STATE(dn,1) == PT_AVAIL);
|
||||
assert(BP_STATE(dn,2) == PT_AVAIL);
|
||||
invariant(BP_STATE(dn, 0) == PT_AVAIL);
|
||||
invariant(BP_STATE(dn, 1) == PT_AVAIL);
|
||||
invariant(BP_STATE(dn, 2) == PT_AVAIL);
|
||||
bfe.destroy();
|
||||
toku_ftnode_free(&dn);
|
||||
toku_free(ndd);
|
||||
|
@ -108,21 +111,23 @@ test_prefetch_read(int fd, FT_HANDLE UU(ft), FT ft_h) {
|
|||
uint64_t left_key = 150;
|
||||
toku_fill_dbt(&cursor->range_lock_left_key, &left_key, sizeof(uint64_t));
|
||||
cursor->left_is_neg_infty = false;
|
||||
bfe.create_for_prefetch( ft_h, cursor);
|
||||
r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd, &bfe);
|
||||
assert(r==0);
|
||||
assert(dn->n_children == 3);
|
||||
assert(BP_STATE(dn,0) == PT_ON_DISK);
|
||||
assert(BP_STATE(dn,1) == PT_AVAIL);
|
||||
assert(BP_STATE(dn,2) == PT_AVAIL);
|
||||
toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
|
||||
assert(BP_STATE(dn,0) == PT_ON_DISK);
|
||||
assert(BP_STATE(dn,1) == PT_COMPRESSED);
|
||||
assert(BP_STATE(dn,2) == PT_COMPRESSED);
|
||||
bfe.create_for_prefetch(ft_h, cursor);
|
||||
r = toku_deserialize_ftnode_from(
|
||||
fd, make_blocknum(20), 0 /*pass zero for hash*/, &dn, &ndd, &bfe);
|
||||
invariant(r == 0);
|
||||
invariant(dn->n_children == 3);
|
||||
invariant(BP_STATE(dn, 0) == PT_ON_DISK);
|
||||
invariant(BP_STATE(dn, 1) == PT_AVAIL);
|
||||
invariant(BP_STATE(dn, 2) == PT_AVAIL);
|
||||
toku_ftnode_pe_callback(
|
||||
dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
|
||||
invariant(BP_STATE(dn, 0) == PT_ON_DISK);
|
||||
invariant(BP_STATE(dn, 1) == PT_COMPRESSED);
|
||||
invariant(BP_STATE(dn, 2) == PT_COMPRESSED);
|
||||
r = toku_ftnode_pf_callback(dn, ndd, &bfe, fd, &attr);
|
||||
assert(BP_STATE(dn,0) == PT_ON_DISK);
|
||||
assert(BP_STATE(dn,1) == PT_AVAIL);
|
||||
assert(BP_STATE(dn,2) == PT_AVAIL);
|
||||
invariant(BP_STATE(dn, 0) == PT_ON_DISK);
|
||||
invariant(BP_STATE(dn, 1) == PT_AVAIL);
|
||||
invariant(BP_STATE(dn, 2) == PT_AVAIL);
|
||||
bfe.destroy();
|
||||
toku_ftnode_free(&dn);
|
||||
toku_free(ndd);
|
||||
|
@ -130,63 +135,69 @@ test_prefetch_read(int fd, FT_HANDLE UU(ft), FT ft_h) {
|
|||
uint64_t right_key = 151;
|
||||
toku_fill_dbt(&cursor->range_lock_right_key, &right_key, sizeof(uint64_t));
|
||||
cursor->right_is_pos_infty = false;
|
||||
bfe.create_for_prefetch( ft_h, cursor);
|
||||
r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd, &bfe);
|
||||
assert(r==0);
|
||||
assert(dn->n_children == 3);
|
||||
assert(BP_STATE(dn,0) == PT_ON_DISK);
|
||||
assert(BP_STATE(dn,1) == PT_AVAIL);
|
||||
assert(BP_STATE(dn,2) == PT_ON_DISK);
|
||||
toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
|
||||
assert(BP_STATE(dn,0) == PT_ON_DISK);
|
||||
assert(BP_STATE(dn,1) == PT_COMPRESSED);
|
||||
assert(BP_STATE(dn,2) == PT_ON_DISK);
|
||||
bfe.create_for_prefetch(ft_h, cursor);
|
||||
r = toku_deserialize_ftnode_from(
|
||||
fd, make_blocknum(20), 0 /*pass zero for hash*/, &dn, &ndd, &bfe);
|
||||
invariant(r == 0);
|
||||
invariant(dn->n_children == 3);
|
||||
invariant(BP_STATE(dn, 0) == PT_ON_DISK);
|
||||
invariant(BP_STATE(dn, 1) == PT_AVAIL);
|
||||
invariant(BP_STATE(dn, 2) == PT_ON_DISK);
|
||||
toku_ftnode_pe_callback(
|
||||
dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
|
||||
invariant(BP_STATE(dn, 0) == PT_ON_DISK);
|
||||
invariant(BP_STATE(dn, 1) == PT_COMPRESSED);
|
||||
invariant(BP_STATE(dn, 2) == PT_ON_DISK);
|
||||
r = toku_ftnode_pf_callback(dn, ndd, &bfe, fd, &attr);
|
||||
assert(BP_STATE(dn,0) == PT_ON_DISK);
|
||||
assert(BP_STATE(dn,1) == PT_AVAIL);
|
||||
assert(BP_STATE(dn,2) == PT_ON_DISK);
|
||||
invariant(BP_STATE(dn, 0) == PT_ON_DISK);
|
||||
invariant(BP_STATE(dn, 1) == PT_AVAIL);
|
||||
invariant(BP_STATE(dn, 2) == PT_ON_DISK);
|
||||
bfe.destroy();
|
||||
toku_ftnode_free(&dn);
|
||||
toku_free(ndd);
|
||||
|
||||
left_key = 100000;
|
||||
right_key = 100000;
|
||||
bfe.create_for_prefetch( ft_h, cursor);
|
||||
r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd, &bfe);
|
||||
assert(r==0);
|
||||
assert(dn->n_children == 3);
|
||||
assert(BP_STATE(dn,0) == PT_ON_DISK);
|
||||
assert(BP_STATE(dn,1) == PT_ON_DISK);
|
||||
assert(BP_STATE(dn,2) == PT_AVAIL);
|
||||
toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
|
||||
assert(BP_STATE(dn,0) == PT_ON_DISK);
|
||||
assert(BP_STATE(dn,1) == PT_ON_DISK);
|
||||
assert(BP_STATE(dn,2) == PT_COMPRESSED);
|
||||
bfe.create_for_prefetch(ft_h, cursor);
|
||||
r = toku_deserialize_ftnode_from(
|
||||
fd, make_blocknum(20), 0 /*pass zero for hash*/, &dn, &ndd, &bfe);
|
||||
invariant(r == 0);
|
||||
invariant(dn->n_children == 3);
|
||||
invariant(BP_STATE(dn, 0) == PT_ON_DISK);
|
||||
invariant(BP_STATE(dn, 1) == PT_ON_DISK);
|
||||
invariant(BP_STATE(dn, 2) == PT_AVAIL);
|
||||
toku_ftnode_pe_callback(
|
||||
dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
|
||||
invariant(BP_STATE(dn, 0) == PT_ON_DISK);
|
||||
invariant(BP_STATE(dn, 1) == PT_ON_DISK);
|
||||
invariant(BP_STATE(dn, 2) == PT_COMPRESSED);
|
||||
r = toku_ftnode_pf_callback(dn, ndd, &bfe, fd, &attr);
|
||||
assert(BP_STATE(dn,0) == PT_ON_DISK);
|
||||
assert(BP_STATE(dn,1) == PT_ON_DISK);
|
||||
assert(BP_STATE(dn,2) == PT_AVAIL);
|
||||
invariant(BP_STATE(dn, 0) == PT_ON_DISK);
|
||||
invariant(BP_STATE(dn, 1) == PT_ON_DISK);
|
||||
invariant(BP_STATE(dn, 2) == PT_AVAIL);
|
||||
bfe.destroy();
|
||||
toku_free(ndd);
|
||||
toku_ftnode_free(&dn);
|
||||
|
||||
left_key = 100;
|
||||
right_key = 100;
|
||||
bfe.create_for_prefetch( ft_h, cursor);
|
||||
r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd, &bfe);
|
||||
assert(r==0);
|
||||
assert(dn->n_children == 3);
|
||||
assert(BP_STATE(dn,0) == PT_AVAIL);
|
||||
assert(BP_STATE(dn,1) == PT_ON_DISK);
|
||||
assert(BP_STATE(dn,2) == PT_ON_DISK);
|
||||
toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
|
||||
assert(BP_STATE(dn,0) == PT_COMPRESSED);
|
||||
assert(BP_STATE(dn,1) == PT_ON_DISK);
|
||||
assert(BP_STATE(dn,2) == PT_ON_DISK);
|
||||
bfe.create_for_prefetch(ft_h, cursor);
|
||||
r = toku_deserialize_ftnode_from(
|
||||
fd, make_blocknum(20), 0 /*pass zero for hash*/, &dn, &ndd, &bfe);
|
||||
invariant(r == 0);
|
||||
invariant(dn->n_children == 3);
|
||||
invariant(BP_STATE(dn, 0) == PT_AVAIL);
|
||||
invariant(BP_STATE(dn, 1) == PT_ON_DISK);
|
||||
invariant(BP_STATE(dn, 2) == PT_ON_DISK);
|
||||
toku_ftnode_pe_callback(
|
||||
dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
|
||||
invariant(BP_STATE(dn, 0) == PT_COMPRESSED);
|
||||
invariant(BP_STATE(dn, 1) == PT_ON_DISK);
|
||||
invariant(BP_STATE(dn, 2) == PT_ON_DISK);
|
||||
r = toku_ftnode_pf_callback(dn, ndd, &bfe, fd, &attr);
|
||||
assert(BP_STATE(dn,0) == PT_AVAIL);
|
||||
assert(BP_STATE(dn,1) == PT_ON_DISK);
|
||||
assert(BP_STATE(dn,2) == PT_ON_DISK);
|
||||
invariant(BP_STATE(dn, 0) == PT_AVAIL);
|
||||
invariant(BP_STATE(dn, 1) == PT_ON_DISK);
|
||||
invariant(BP_STATE(dn, 2) == PT_ON_DISK);
|
||||
bfe.destroy();
|
||||
toku_ftnode_free(&dn);
|
||||
toku_free(ndd);
|
||||
|
@ -194,20 +205,19 @@ test_prefetch_read(int fd, FT_HANDLE UU(ft), FT ft_h) {
|
|||
toku_free(cursor);
|
||||
}
|
||||
|
||||
static void
|
||||
test_subset_read(int fd, FT_HANDLE UU(ft), FT ft_h) {
|
||||
static void test_subset_read(int fd, FT_HANDLE UU(ft), FT ft_h) {
|
||||
int r;
|
||||
FT_CURSOR XMALLOC(cursor);
|
||||
FTNODE dn = NULL;
|
||||
FTNODE_DISK_DATA ndd = NULL;
|
||||
PAIR_ATTR attr;
|
||||
|
||||
|
||||
// first test that prefetching everything should work
|
||||
memset(&cursor->range_lock_left_key, 0 , sizeof(DBT));
|
||||
memset(&cursor->range_lock_right_key, 0 , sizeof(DBT));
|
||||
memset(&cursor->range_lock_left_key, 0, sizeof(DBT));
|
||||
memset(&cursor->range_lock_right_key, 0, sizeof(DBT));
|
||||
cursor->left_is_neg_infty = true;
|
||||
cursor->right_is_pos_infty = true;
|
||||
|
||||
|
||||
uint64_t left_key = 150;
|
||||
uint64_t right_key = 151;
|
||||
DBT left, right;
|
||||
|
@ -216,101 +226,106 @@ test_subset_read(int fd, FT_HANDLE UU(ft), FT ft_h) {
|
|||
|
||||
ftnode_fetch_extra bfe;
|
||||
bfe.create_for_subset_read(
|
||||
ft_h,
|
||||
NULL,
|
||||
&left,
|
||||
&right,
|
||||
false,
|
||||
false,
|
||||
false,
|
||||
false
|
||||
);
|
||||
|
||||
ft_h, NULL, &left, &right, false, false, false, false);
|
||||
|
||||
// fake the childnum to read
|
||||
// set disable_prefetching ON
|
||||
bfe.child_to_read = 2;
|
||||
bfe.disable_prefetching = true;
|
||||
r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd, &bfe);
|
||||
assert(r==0);
|
||||
assert(dn->n_children == 3);
|
||||
assert(BP_STATE(dn,0) == PT_ON_DISK);
|
||||
assert(BP_STATE(dn,1) == PT_ON_DISK);
|
||||
assert(BP_STATE(dn,2) == PT_AVAIL);
|
||||
// need to call this twice because we had a subset read before, that touched the clock
|
||||
toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
|
||||
assert(BP_STATE(dn,0) == PT_ON_DISK);
|
||||
assert(BP_STATE(dn,1) == PT_ON_DISK);
|
||||
assert(BP_STATE(dn,2) == PT_AVAIL);
|
||||
toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
|
||||
assert(BP_STATE(dn,0) == PT_ON_DISK);
|
||||
assert(BP_STATE(dn,1) == PT_ON_DISK);
|
||||
assert(BP_STATE(dn,2) == PT_COMPRESSED);
|
||||
r = toku_deserialize_ftnode_from(
|
||||
fd, make_blocknum(20), 0 /*pass zero for hash*/, &dn, &ndd, &bfe);
|
||||
invariant(r == 0);
|
||||
invariant(dn->n_children == 3);
|
||||
invariant(BP_STATE(dn, 0) == PT_ON_DISK);
|
||||
invariant(BP_STATE(dn, 1) == PT_ON_DISK);
|
||||
invariant(BP_STATE(dn, 2) == PT_AVAIL);
|
||||
// need to call this twice because we had a subset read before, that touched
|
||||
// the clock
|
||||
toku_ftnode_pe_callback(
|
||||
dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
|
||||
invariant(BP_STATE(dn, 0) == PT_ON_DISK);
|
||||
invariant(BP_STATE(dn, 1) == PT_ON_DISK);
|
||||
invariant(BP_STATE(dn, 2) == PT_AVAIL);
|
||||
toku_ftnode_pe_callback(
|
||||
dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
|
||||
invariant(BP_STATE(dn, 0) == PT_ON_DISK);
|
||||
invariant(BP_STATE(dn, 1) == PT_ON_DISK);
|
||||
invariant(BP_STATE(dn, 2) == PT_COMPRESSED);
|
||||
r = toku_ftnode_pf_callback(dn, ndd, &bfe, fd, &attr);
|
||||
assert(BP_STATE(dn,0) == PT_ON_DISK);
|
||||
assert(BP_STATE(dn,1) == PT_ON_DISK);
|
||||
assert(BP_STATE(dn,2) == PT_AVAIL);
|
||||
invariant(BP_STATE(dn, 0) == PT_ON_DISK);
|
||||
invariant(BP_STATE(dn, 1) == PT_ON_DISK);
|
||||
invariant(BP_STATE(dn, 2) == PT_AVAIL);
|
||||
toku_ftnode_free(&dn);
|
||||
toku_free(ndd);
|
||||
|
||||
// fake the childnum to read
|
||||
bfe.child_to_read = 2;
|
||||
bfe.disable_prefetching = false;
|
||||
r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd, &bfe);
|
||||
assert(r==0);
|
||||
assert(dn->n_children == 3);
|
||||
assert(BP_STATE(dn,0) == PT_ON_DISK);
|
||||
assert(BP_STATE(dn,1) == PT_AVAIL);
|
||||
assert(BP_STATE(dn,2) == PT_AVAIL);
|
||||
// need to call this twice because we had a subset read before, that touched the clock
|
||||
toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
|
||||
assert(BP_STATE(dn,0) == PT_ON_DISK);
|
||||
assert(BP_STATE(dn,1) == PT_COMPRESSED);
|
||||
assert(BP_STATE(dn,2) == PT_AVAIL);
|
||||
toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
|
||||
assert(BP_STATE(dn,0) == PT_ON_DISK);
|
||||
assert(BP_STATE(dn,1) == PT_COMPRESSED);
|
||||
assert(BP_STATE(dn,2) == PT_COMPRESSED);
|
||||
r = toku_deserialize_ftnode_from(
|
||||
fd, make_blocknum(20), 0 /*pass zero for hash*/, &dn, &ndd, &bfe);
|
||||
invariant(r == 0);
|
||||
invariant(dn->n_children == 3);
|
||||
invariant(BP_STATE(dn, 0) == PT_ON_DISK);
|
||||
invariant(BP_STATE(dn, 1) == PT_AVAIL);
|
||||
invariant(BP_STATE(dn, 2) == PT_AVAIL);
|
||||
// need to call this twice because we had a subset read before, that touched
|
||||
// the clock
|
||||
toku_ftnode_pe_callback(
|
||||
dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
|
||||
invariant(BP_STATE(dn, 0) == PT_ON_DISK);
|
||||
invariant(BP_STATE(dn, 1) == PT_COMPRESSED);
|
||||
invariant(BP_STATE(dn, 2) == PT_AVAIL);
|
||||
toku_ftnode_pe_callback(
|
||||
dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
|
||||
invariant(BP_STATE(dn, 0) == PT_ON_DISK);
|
||||
invariant(BP_STATE(dn, 1) == PT_COMPRESSED);
|
||||
invariant(BP_STATE(dn, 2) == PT_COMPRESSED);
|
||||
r = toku_ftnode_pf_callback(dn, ndd, &bfe, fd, &attr);
|
||||
assert(BP_STATE(dn,0) == PT_ON_DISK);
|
||||
assert(BP_STATE(dn,1) == PT_AVAIL);
|
||||
assert(BP_STATE(dn,2) == PT_AVAIL);
|
||||
invariant(BP_STATE(dn, 0) == PT_ON_DISK);
|
||||
invariant(BP_STATE(dn, 1) == PT_AVAIL);
|
||||
invariant(BP_STATE(dn, 2) == PT_AVAIL);
|
||||
toku_ftnode_free(&dn);
|
||||
toku_free(ndd);
|
||||
|
||||
// fake the childnum to read
|
||||
bfe.child_to_read = 0;
|
||||
r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd, &bfe);
|
||||
assert(r==0);
|
||||
assert(dn->n_children == 3);
|
||||
assert(BP_STATE(dn,0) == PT_AVAIL);
|
||||
assert(BP_STATE(dn,1) == PT_AVAIL);
|
||||
assert(BP_STATE(dn,2) == PT_ON_DISK);
|
||||
// need to call this twice because we had a subset read before, that touched the clock
|
||||
toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
|
||||
assert(BP_STATE(dn,0) == PT_AVAIL);
|
||||
assert(BP_STATE(dn,1) == PT_COMPRESSED);
|
||||
assert(BP_STATE(dn,2) == PT_ON_DISK);
|
||||
toku_ftnode_pe_callback(dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
|
||||
assert(BP_STATE(dn,0) == PT_COMPRESSED);
|
||||
assert(BP_STATE(dn,1) == PT_COMPRESSED);
|
||||
assert(BP_STATE(dn,2) == PT_ON_DISK);
|
||||
r = toku_deserialize_ftnode_from(
|
||||
fd, make_blocknum(20), 0 /*pass zero for hash*/, &dn, &ndd, &bfe);
|
||||
invariant(r == 0);
|
||||
invariant(dn->n_children == 3);
|
||||
invariant(BP_STATE(dn, 0) == PT_AVAIL);
|
||||
invariant(BP_STATE(dn, 1) == PT_AVAIL);
|
||||
invariant(BP_STATE(dn, 2) == PT_ON_DISK);
|
||||
// need to call this twice because we had a subset read before, that touched
|
||||
// the clock
|
||||
toku_ftnode_pe_callback(
|
||||
dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
|
||||
invariant(BP_STATE(dn, 0) == PT_AVAIL);
|
||||
invariant(BP_STATE(dn, 1) == PT_COMPRESSED);
|
||||
invariant(BP_STATE(dn, 2) == PT_ON_DISK);
|
||||
toku_ftnode_pe_callback(
|
||||
dn, make_pair_attr(0xffffffff), ft_h, def_pe_finalize_impl, nullptr);
|
||||
invariant(BP_STATE(dn, 0) == PT_COMPRESSED);
|
||||
invariant(BP_STATE(dn, 1) == PT_COMPRESSED);
|
||||
invariant(BP_STATE(dn, 2) == PT_ON_DISK);
|
||||
r = toku_ftnode_pf_callback(dn, ndd, &bfe, fd, &attr);
|
||||
assert(BP_STATE(dn,0) == PT_AVAIL);
|
||||
assert(BP_STATE(dn,1) == PT_AVAIL);
|
||||
assert(BP_STATE(dn,2) == PT_ON_DISK);
|
||||
invariant(BP_STATE(dn, 0) == PT_AVAIL);
|
||||
invariant(BP_STATE(dn, 1) == PT_AVAIL);
|
||||
invariant(BP_STATE(dn, 2) == PT_ON_DISK);
|
||||
toku_ftnode_free(&dn);
|
||||
toku_free(ndd);
|
||||
|
||||
toku_free(cursor);
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
test_prefetching(void) {
|
||||
static void test_prefetching(void) {
|
||||
// struct ft_handle source_ft;
|
||||
struct ftnode sn;
|
||||
|
||||
int fd = open(TOKU_TEST_FILENAME, O_RDWR|O_CREAT|O_BINARY, S_IRWXU|S_IRWXG|S_IRWXO); assert(fd >= 0);
|
||||
int fd = open(TOKU_TEST_FILENAME,
|
||||
O_RDWR | O_CREAT | O_BINARY,
|
||||
S_IRWXU | S_IRWXG | S_IRWXO);
|
||||
invariant(fd >= 0);
|
||||
|
||||
int r;
|
||||
|
||||
|
@ -327,7 +342,7 @@ test_prefetching(void) {
|
|||
|
||||
uint64_t key1 = 100;
|
||||
uint64_t key2 = 200;
|
||||
|
||||
|
||||
MALLOC_N(sn.n_children, sn.bp);
|
||||
DBT pivotkeys[2];
|
||||
toku_fill_dbt(&pivotkeys[0], &key1, sizeof(key1));
|
||||
|
@ -336,13 +351,13 @@ test_prefetching(void) {
|
|||
BP_BLOCKNUM(&sn, 0).b = 30;
|
||||
BP_BLOCKNUM(&sn, 1).b = 35;
|
||||
BP_BLOCKNUM(&sn, 2).b = 40;
|
||||
BP_STATE(&sn,0) = PT_AVAIL;
|
||||
BP_STATE(&sn,1) = PT_AVAIL;
|
||||
BP_STATE(&sn,2) = PT_AVAIL;
|
||||
BP_STATE(&sn, 0) = PT_AVAIL;
|
||||
BP_STATE(&sn, 1) = PT_AVAIL;
|
||||
BP_STATE(&sn, 2) = PT_AVAIL;
|
||||
set_BNC(&sn, 0, toku_create_empty_nl());
|
||||
set_BNC(&sn, 1, toku_create_empty_nl());
|
||||
set_BNC(&sn, 2, toku_create_empty_nl());
|
||||
//Create XIDS
|
||||
// Create XIDS
|
||||
XIDS xids_0 = toku_xids_get_root_xids();
|
||||
XIDS xids_123;
|
||||
XIDS xids_234;
|
||||
|
@ -352,7 +367,7 @@ test_prefetching(void) {
|
|||
CKERR(r);
|
||||
|
||||
// data in the buffers does not matter in this test
|
||||
//Cleanup:
|
||||
// Cleanup:
|
||||
toku_xids_destroy(&xids_0);
|
||||
toku_xids_destroy(&xids_123);
|
||||
toku_xids_destroy(&xids_234);
|
||||
|
@ -363,41 +378,48 @@ test_prefetching(void) {
|
|||
make_blocknum(0),
|
||||
ZERO_LSN,
|
||||
TXNID_NONE,
|
||||
4*1024*1024,
|
||||
128*1024,
|
||||
4 * 1024 * 1024,
|
||||
128 * 1024,
|
||||
TOKU_DEFAULT_COMPRESSION_METHOD,
|
||||
16);
|
||||
ft_h->cmp.create(int64_key_cmp, nullptr);
|
||||
ft->ft = ft_h;
|
||||
ft_h->blocktable.create();
|
||||
{ int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); }
|
||||
//Want to use block #20
|
||||
{
|
||||
int r_truncate = ftruncate(fd, 0);
|
||||
CKERR(r_truncate);
|
||||
}
|
||||
// Want to use block #20
|
||||
BLOCKNUM b = make_blocknum(0);
|
||||
while (b.b < 20) {
|
||||
ft_h->blocktable.allocate_blocknum(&b, ft_h);
|
||||
}
|
||||
assert(b.b == 20);
|
||||
invariant(b.b == 20);
|
||||
|
||||
{
|
||||
DISKOFF offset;
|
||||
DISKOFF size;
|
||||
ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0);
|
||||
assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
|
||||
ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false);
|
||||
invariant(offset ==
|
||||
(DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
|
||||
|
||||
ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size);
|
||||
assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
|
||||
assert(size == 100);
|
||||
invariant(offset ==
|
||||
(DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
|
||||
invariant(size == 100);
|
||||
}
|
||||
FTNODE_DISK_DATA ndd = NULL;
|
||||
r = toku_serialize_ftnode_to(fd, make_blocknum(20), &sn, &ndd, true, ft->ft, false);
|
||||
assert(r==0);
|
||||
r = toku_serialize_ftnode_to(
|
||||
fd, make_blocknum(20), &sn, &ndd, true, ft->ft, false);
|
||||
invariant(r == 0);
|
||||
|
||||
test_prefetch_read(fd, ft, ft_h);
|
||||
test_prefetch_read(fd, ft, ft_h);
|
||||
test_subset_read(fd, ft, ft_h);
|
||||
|
||||
toku_destroy_ftnode_internals(&sn);
|
||||
|
||||
ft_h->blocktable.block_free(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
|
||||
ft_h->blocktable.block_free(
|
||||
BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE, 100);
|
||||
ft_h->blocktable.destroy();
|
||||
ft_h->cmp.destroy();
|
||||
toku_free(ft_h->h);
|
||||
|
@ -405,11 +427,12 @@ test_prefetching(void) {
|
|||
toku_free(ft);
|
||||
toku_free(ndd);
|
||||
|
||||
r = close(fd); assert(r != -1);
|
||||
r = close(fd);
|
||||
invariant(r != -1);
|
||||
}
|
||||
|
||||
int
|
||||
test_main (int argc __attribute__((__unused__)), const char *argv[] __attribute__((__unused__))) {
|
||||
int test_main(int argc __attribute__((__unused__)),
|
||||
const char *argv[] __attribute__((__unused__))) {
|
||||
test_prefetching();
|
||||
|
||||
return 0;
|
||||
|
|
|
@ -40,38 +40,28 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
|||
|
||||
#include "ft/cursor.h"
|
||||
|
||||
enum ftnode_verify_type {
|
||||
read_all=1,
|
||||
read_compressed,
|
||||
read_none
|
||||
};
|
||||
enum ftnode_verify_type { read_all = 1, read_compressed, read_none };
|
||||
|
||||
#ifndef MIN
|
||||
#define MIN(x, y) (((x) < (y)) ? (x) : (y))
|
||||
#endif
|
||||
|
||||
static int
|
||||
string_key_cmp(DB *UU(e), const DBT *a, const DBT *b)
|
||||
{
|
||||
static int string_key_cmp(DB *UU(e), const DBT *a, const DBT *b) {
|
||||
char *CAST_FROM_VOIDP(s, a->data);
|
||||
char *CAST_FROM_VOIDP(t, b->data);
|
||||
return strcmp(s, t);
|
||||
}
|
||||
|
||||
static void
|
||||
le_add_to_bn(bn_data* bn, uint32_t idx, const char *key, int keylen, const char *val, int vallen)
|
||||
{
|
||||
static void le_add_to_bn(bn_data *bn,
|
||||
uint32_t idx,
|
||||
const char *key,
|
||||
int keylen,
|
||||
const char *val,
|
||||
int vallen) {
|
||||
LEAFENTRY r = NULL;
|
||||
uint32_t size_needed = LE_CLEAN_MEMSIZE(vallen);
|
||||
void *maybe_free = nullptr;
|
||||
bn->get_space_for_insert(
|
||||
idx,
|
||||
key,
|
||||
keylen,
|
||||
size_needed,
|
||||
&r,
|
||||
&maybe_free
|
||||
);
|
||||
bn->get_space_for_insert(idx, key, keylen, size_needed, &r, &maybe_free);
|
||||
if (maybe_free) {
|
||||
toku_free(maybe_free);
|
||||
}
|
||||
|
@ -81,70 +71,67 @@ le_add_to_bn(bn_data* bn, uint32_t idx, const char *key, int keylen, const char
|
|||
memcpy(r->u.clean.val, val, vallen);
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
le_malloc(bn_data* bn, uint32_t idx, const char *key, const char *val)
|
||||
{
|
||||
static void le_malloc(bn_data *bn,
|
||||
uint32_t idx,
|
||||
const char *key,
|
||||
const char *val) {
|
||||
int keylen = strlen(key) + 1;
|
||||
int vallen = strlen(val) + 1;
|
||||
le_add_to_bn(bn, idx, key, keylen, val, vallen);
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
test1(int fd, FT ft_h, FTNODE *dn) {
|
||||
static void test1(int fd, FT ft_h, FTNODE *dn) {
|
||||
int r;
|
||||
ftnode_fetch_extra bfe_all;
|
||||
bfe_all.create_for_full_read(ft_h);
|
||||
FTNODE_DISK_DATA ndd = NULL;
|
||||
r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, dn, &ndd, &bfe_all);
|
||||
r = toku_deserialize_ftnode_from(
|
||||
fd, make_blocknum(20), 0 /*pass zero for hash*/, dn, &ndd, &bfe_all);
|
||||
bool is_leaf = ((*dn)->height == 0);
|
||||
assert(r==0);
|
||||
invariant(r == 0);
|
||||
for (int i = 0; i < (*dn)->n_children; i++) {
|
||||
assert(BP_STATE(*dn,i) == PT_AVAIL);
|
||||
invariant(BP_STATE(*dn, i) == PT_AVAIL);
|
||||
}
|
||||
// should sweep and NOT get rid of anything
|
||||
PAIR_ATTR attr;
|
||||
memset(&attr,0,sizeof(attr));
|
||||
memset(&attr, 0, sizeof(attr));
|
||||
toku_ftnode_pe_callback(*dn, attr, ft_h, def_pe_finalize_impl, nullptr);
|
||||
for (int i = 0; i < (*dn)->n_children; i++) {
|
||||
assert(BP_STATE(*dn,i) == PT_AVAIL);
|
||||
invariant(BP_STATE(*dn, i) == PT_AVAIL);
|
||||
}
|
||||
// should sweep and get compress all
|
||||
toku_ftnode_pe_callback(*dn, attr, ft_h, def_pe_finalize_impl, nullptr);
|
||||
for (int i = 0; i < (*dn)->n_children; i++) {
|
||||
if (!is_leaf) {
|
||||
assert(BP_STATE(*dn,i) == PT_COMPRESSED);
|
||||
}
|
||||
else {
|
||||
assert(BP_STATE(*dn,i) == PT_ON_DISK);
|
||||
invariant(BP_STATE(*dn, i) == PT_COMPRESSED);
|
||||
} else {
|
||||
invariant(BP_STATE(*dn, i) == PT_ON_DISK);
|
||||
}
|
||||
}
|
||||
PAIR_ATTR size;
|
||||
bool req = toku_ftnode_pf_req_callback(*dn, &bfe_all);
|
||||
assert(req);
|
||||
invariant(req);
|
||||
toku_ftnode_pf_callback(*dn, ndd, &bfe_all, fd, &size);
|
||||
toku_ftnode_pe_callback(*dn, attr, ft_h, def_pe_finalize_impl, nullptr);
|
||||
for (int i = 0; i < (*dn)->n_children; i++) {
|
||||
assert(BP_STATE(*dn,i) == PT_AVAIL);
|
||||
invariant(BP_STATE(*dn, i) == PT_AVAIL);
|
||||
}
|
||||
// should sweep and get compress all
|
||||
toku_ftnode_pe_callback(*dn, attr, ft_h, def_pe_finalize_impl, nullptr);
|
||||
for (int i = 0; i < (*dn)->n_children; i++) {
|
||||
if (!is_leaf) {
|
||||
assert(BP_STATE(*dn,i) == PT_COMPRESSED);
|
||||
invariant(BP_STATE(*dn, i) == PT_COMPRESSED);
|
||||
} else {
|
||||
invariant(BP_STATE(*dn, i) == PT_ON_DISK);
|
||||
}
|
||||
else {
|
||||
assert(BP_STATE(*dn,i) == PT_ON_DISK);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
req = toku_ftnode_pf_req_callback(*dn, &bfe_all);
|
||||
assert(req);
|
||||
invariant(req);
|
||||
toku_ftnode_pf_callback(*dn, ndd, &bfe_all, fd, &size);
|
||||
toku_ftnode_pe_callback(*dn, attr, ft_h, def_pe_finalize_impl, nullptr);
|
||||
for (int i = 0; i < (*dn)->n_children; i++) {
|
||||
assert(BP_STATE(*dn,i) == PT_AVAIL);
|
||||
invariant(BP_STATE(*dn, i) == PT_AVAIL);
|
||||
}
|
||||
(*dn)->dirty = 1;
|
||||
toku_ftnode_pe_callback(*dn, attr, ft_h, def_pe_finalize_impl, nullptr);
|
||||
|
@ -152,101 +139,102 @@ test1(int fd, FT ft_h, FTNODE *dn) {
|
|||
toku_ftnode_pe_callback(*dn, attr, ft_h, def_pe_finalize_impl, nullptr);
|
||||
toku_ftnode_pe_callback(*dn, attr, ft_h, def_pe_finalize_impl, nullptr);
|
||||
for (int i = 0; i < (*dn)->n_children; i++) {
|
||||
assert(BP_STATE(*dn,i) == PT_AVAIL);
|
||||
invariant(BP_STATE(*dn, i) == PT_AVAIL);
|
||||
}
|
||||
toku_free(ndd);
|
||||
toku_ftnode_free(dn);
|
||||
}
|
||||
|
||||
|
||||
static int search_cmp(const struct ft_search& UU(so), const DBT* UU(key)) {
|
||||
static int search_cmp(const struct ft_search &UU(so), const DBT *UU(key)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void
|
||||
test2(int fd, FT ft_h, FTNODE *dn) {
|
||||
static void test2(int fd, FT ft_h, FTNODE *dn) {
|
||||
DBT left, right;
|
||||
DB dummy_db;
|
||||
memset(&dummy_db, 0, sizeof(dummy_db));
|
||||
memset(&left, 0, sizeof(left));
|
||||
memset(&right, 0, sizeof(right));
|
||||
ft_search search;
|
||||
|
||||
|
||||
ftnode_fetch_extra bfe_subset;
|
||||
bfe_subset.create_for_subset_read(
|
||||
ft_h,
|
||||
ft_search_init(&search, search_cmp, FT_SEARCH_LEFT, nullptr, nullptr, nullptr),
|
||||
ft_search_init(
|
||||
&search, search_cmp, FT_SEARCH_LEFT, nullptr, nullptr, nullptr),
|
||||
&left,
|
||||
&right,
|
||||
true,
|
||||
true,
|
||||
false,
|
||||
false
|
||||
);
|
||||
false);
|
||||
|
||||
FTNODE_DISK_DATA ndd = NULL;
|
||||
int r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, dn, &ndd, &bfe_subset);
|
||||
assert(r==0);
|
||||
int r = toku_deserialize_ftnode_from(
|
||||
fd, make_blocknum(20), 0 /*pass zero for hash*/, dn, &ndd, &bfe_subset);
|
||||
invariant(r == 0);
|
||||
bool is_leaf = ((*dn)->height == 0);
|
||||
// at this point, although both partitions are available, only the
|
||||
// at this point, although both partitions are available, only the
|
||||
// second basement node should have had its clock
|
||||
// touched
|
||||
assert(BP_STATE(*dn, 0) == PT_AVAIL);
|
||||
assert(BP_STATE(*dn, 1) == PT_AVAIL);
|
||||
assert(BP_SHOULD_EVICT(*dn, 0));
|
||||
assert(!BP_SHOULD_EVICT(*dn, 1));
|
||||
invariant(BP_STATE(*dn, 0) == PT_AVAIL);
|
||||
invariant(BP_STATE(*dn, 1) == PT_AVAIL);
|
||||
invariant(BP_SHOULD_EVICT(*dn, 0));
|
||||
invariant(!BP_SHOULD_EVICT(*dn, 1));
|
||||
PAIR_ATTR attr;
|
||||
memset(&attr,0,sizeof(attr));
|
||||
memset(&attr, 0, sizeof(attr));
|
||||
toku_ftnode_pe_callback(*dn, attr, ft_h, def_pe_finalize_impl, nullptr);
|
||||
assert(BP_STATE(*dn, 0) == (is_leaf) ? PT_ON_DISK : PT_COMPRESSED);
|
||||
assert(BP_STATE(*dn, 1) == PT_AVAIL);
|
||||
assert(BP_SHOULD_EVICT(*dn, 1));
|
||||
invariant(BP_STATE(*dn, 0) == (is_leaf) ? PT_ON_DISK : PT_COMPRESSED);
|
||||
invariant(BP_STATE(*dn, 1) == PT_AVAIL);
|
||||
invariant(BP_SHOULD_EVICT(*dn, 1));
|
||||
toku_ftnode_pe_callback(*dn, attr, ft_h, def_pe_finalize_impl, nullptr);
|
||||
assert(BP_STATE(*dn, 1) == (is_leaf) ? PT_ON_DISK : PT_COMPRESSED);
|
||||
invariant(BP_STATE(*dn, 1) == (is_leaf) ? PT_ON_DISK : PT_COMPRESSED);
|
||||
|
||||
bool req = toku_ftnode_pf_req_callback(*dn, &bfe_subset);
|
||||
assert(req);
|
||||
invariant(req);
|
||||
toku_ftnode_pf_callback(*dn, ndd, &bfe_subset, fd, &attr);
|
||||
assert(BP_STATE(*dn, 0) == PT_AVAIL);
|
||||
assert(BP_STATE(*dn, 1) == PT_AVAIL);
|
||||
assert(BP_SHOULD_EVICT(*dn, 0));
|
||||
assert(!BP_SHOULD_EVICT(*dn, 1));
|
||||
invariant(BP_STATE(*dn, 0) == PT_AVAIL);
|
||||
invariant(BP_STATE(*dn, 1) == PT_AVAIL);
|
||||
invariant(BP_SHOULD_EVICT(*dn, 0));
|
||||
invariant(!BP_SHOULD_EVICT(*dn, 1));
|
||||
|
||||
toku_free(ndd);
|
||||
toku_ftnode_free(dn);
|
||||
}
|
||||
|
||||
static void
|
||||
test3_leaf(int fd, FT ft_h, FTNODE *dn) {
|
||||
static void test3_leaf(int fd, FT ft_h, FTNODE *dn) {
|
||||
DBT left, right;
|
||||
DB dummy_db;
|
||||
memset(&dummy_db, 0, sizeof(dummy_db));
|
||||
memset(&left, 0, sizeof(left));
|
||||
memset(&right, 0, sizeof(right));
|
||||
|
||||
|
||||
ftnode_fetch_extra bfe_min;
|
||||
bfe_min.create_for_min_read(ft_h);
|
||||
|
||||
FTNODE_DISK_DATA ndd = NULL;
|
||||
int r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, dn, &ndd, &bfe_min);
|
||||
assert(r==0);
|
||||
int r = toku_deserialize_ftnode_from(
|
||||
fd, make_blocknum(20), 0 /*pass zero for hash*/, dn, &ndd, &bfe_min);
|
||||
invariant(r == 0);
|
||||
//
|
||||
// make sure we have a leaf
|
||||
//
|
||||
assert((*dn)->height == 0);
|
||||
invariant((*dn)->height == 0);
|
||||
for (int i = 0; i < (*dn)->n_children; i++) {
|
||||
assert(BP_STATE(*dn, i) == PT_ON_DISK);
|
||||
invariant(BP_STATE(*dn, i) == PT_ON_DISK);
|
||||
}
|
||||
toku_ftnode_free(dn);
|
||||
toku_free(ndd);
|
||||
}
|
||||
|
||||
static void
|
||||
test_serialize_nonleaf(void) {
|
||||
static void test_serialize_nonleaf(void) {
|
||||
// struct ft_handle source_ft;
|
||||
struct ftnode sn, *dn;
|
||||
|
||||
int fd = open(TOKU_TEST_FILENAME, O_RDWR|O_CREAT|O_BINARY, S_IRWXU|S_IRWXG|S_IRWXO); assert(fd >= 0);
|
||||
int fd = open(TOKU_TEST_FILENAME,
|
||||
O_RDWR | O_CREAT | O_BINARY,
|
||||
S_IRWXU | S_IRWXG | S_IRWXO);
|
||||
invariant(fd >= 0);
|
||||
|
||||
int r;
|
||||
|
||||
|
@ -265,11 +253,11 @@ test_serialize_nonleaf(void) {
|
|||
sn.pivotkeys.create_from_dbts(toku_fill_dbt(&pivotkey, "hello", 6), 1);
|
||||
BP_BLOCKNUM(&sn, 0).b = 30;
|
||||
BP_BLOCKNUM(&sn, 1).b = 35;
|
||||
BP_STATE(&sn,0) = PT_AVAIL;
|
||||
BP_STATE(&sn,1) = PT_AVAIL;
|
||||
BP_STATE(&sn, 0) = PT_AVAIL;
|
||||
BP_STATE(&sn, 1) = PT_AVAIL;
|
||||
set_BNC(&sn, 0, toku_create_empty_nl());
|
||||
set_BNC(&sn, 1, toku_create_empty_nl());
|
||||
//Create XIDS
|
||||
// Create XIDS
|
||||
XIDS xids_0 = toku_xids_get_root_xids();
|
||||
XIDS xids_123;
|
||||
XIDS xids_234;
|
||||
|
@ -281,11 +269,38 @@ test_serialize_nonleaf(void) {
|
|||
toku::comparator cmp;
|
||||
cmp.create(string_key_cmp, nullptr);
|
||||
|
||||
toku_bnc_insert_msg(BNC(&sn, 0), "a", 2, "aval", 5, FT_NONE, next_dummymsn(), xids_0, true, cmp);
|
||||
toku_bnc_insert_msg(BNC(&sn, 0), "b", 2, "bval", 5, FT_NONE, next_dummymsn(), xids_123, false, cmp);
|
||||
toku_bnc_insert_msg(BNC(&sn, 1), "x", 2, "xval", 5, FT_NONE, next_dummymsn(), xids_234, true, cmp);
|
||||
toku_bnc_insert_msg(BNC(&sn, 0),
|
||||
"a",
|
||||
2,
|
||||
"aval",
|
||||
5,
|
||||
FT_NONE,
|
||||
next_dummymsn(),
|
||||
xids_0,
|
||||
true,
|
||||
cmp);
|
||||
toku_bnc_insert_msg(BNC(&sn, 0),
|
||||
"b",
|
||||
2,
|
||||
"bval",
|
||||
5,
|
||||
FT_NONE,
|
||||
next_dummymsn(),
|
||||
xids_123,
|
||||
false,
|
||||
cmp);
|
||||
toku_bnc_insert_msg(BNC(&sn, 1),
|
||||
"x",
|
||||
2,
|
||||
"xval",
|
||||
5,
|
||||
FT_NONE,
|
||||
next_dummymsn(),
|
||||
xids_234,
|
||||
true,
|
||||
cmp);
|
||||
|
||||
//Cleanup:
|
||||
// Cleanup:
|
||||
toku_xids_destroy(&xids_0);
|
||||
toku_xids_destroy(&xids_123);
|
||||
toku_xids_destroy(&xids_234);
|
||||
|
@ -297,35 +312,41 @@ test_serialize_nonleaf(void) {
|
|||
make_blocknum(0),
|
||||
ZERO_LSN,
|
||||
TXNID_NONE,
|
||||
4*1024*1024,
|
||||
128*1024,
|
||||
4 * 1024 * 1024,
|
||||
128 * 1024,
|
||||
TOKU_DEFAULT_COMPRESSION_METHOD,
|
||||
16);
|
||||
ft_h->cmp.create(string_key_cmp, nullptr);
|
||||
ft->ft = ft_h;
|
||||
|
||||
|
||||
ft_h->blocktable.create();
|
||||
{ int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); }
|
||||
//Want to use block #20
|
||||
{
|
||||
int r_truncate = ftruncate(fd, 0);
|
||||
CKERR(r_truncate);
|
||||
}
|
||||
// Want to use block #20
|
||||
BLOCKNUM b = make_blocknum(0);
|
||||
while (b.b < 20) {
|
||||
ft_h->blocktable.allocate_blocknum(&b, ft_h);
|
||||
}
|
||||
assert(b.b == 20);
|
||||
invariant(b.b == 20);
|
||||
|
||||
{
|
||||
DISKOFF offset;
|
||||
DISKOFF size;
|
||||
ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0);
|
||||
assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
|
||||
ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false);
|
||||
invariant(offset ==
|
||||
(DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
|
||||
|
||||
ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size);
|
||||
assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
|
||||
assert(size == 100);
|
||||
invariant(offset ==
|
||||
(DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
|
||||
invariant(size == 100);
|
||||
}
|
||||
FTNODE_DISK_DATA ndd = NULL;
|
||||
r = toku_serialize_ftnode_to(fd, make_blocknum(20), &sn, &ndd, true, ft->ft, false);
|
||||
assert(r==0);
|
||||
r = toku_serialize_ftnode_to(
|
||||
fd, make_blocknum(20), &sn, &ndd, true, ft->ft, false);
|
||||
invariant(r == 0);
|
||||
|
||||
test1(fd, ft_h, &dn);
|
||||
test2(fd, ft_h, &dn);
|
||||
|
@ -333,22 +354,26 @@ test_serialize_nonleaf(void) {
|
|||
toku_destroy_ftnode_internals(&sn);
|
||||
toku_free(ndd);
|
||||
|
||||
ft_h->blocktable.block_free(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
|
||||
ft_h->blocktable.block_free(
|
||||
BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE, 100);
|
||||
ft_h->blocktable.destroy();
|
||||
toku_free(ft_h->h);
|
||||
ft_h->cmp.destroy();
|
||||
toku_free(ft_h);
|
||||
toku_free(ft);
|
||||
|
||||
r = close(fd); assert(r != -1);
|
||||
r = close(fd);
|
||||
invariant(r != -1);
|
||||
}
|
||||
|
||||
static void
|
||||
test_serialize_leaf(void) {
|
||||
static void test_serialize_leaf(void) {
|
||||
// struct ft_handle source_ft;
|
||||
struct ftnode sn, *dn;
|
||||
|
||||
int fd = open(TOKU_TEST_FILENAME, O_RDWR|O_CREAT|O_BINARY, S_IRWXU|S_IRWXG|S_IRWXO); assert(fd >= 0);
|
||||
int fd = open(TOKU_TEST_FILENAME,
|
||||
O_RDWR | O_CREAT | O_BINARY,
|
||||
S_IRWXU | S_IRWXG | S_IRWXO);
|
||||
invariant(fd >= 0);
|
||||
|
||||
int r;
|
||||
|
||||
|
@ -364,8 +389,8 @@ test_serialize_leaf(void) {
|
|||
MALLOC_N(sn.n_children, sn.bp);
|
||||
DBT pivotkey;
|
||||
sn.pivotkeys.create_from_dbts(toku_fill_dbt(&pivotkey, "b", 2), 1);
|
||||
BP_STATE(&sn,0) = PT_AVAIL;
|
||||
BP_STATE(&sn,1) = PT_AVAIL;
|
||||
BP_STATE(&sn, 0) = PT_AVAIL;
|
||||
BP_STATE(&sn, 1) = PT_AVAIL;
|
||||
set_BLB(&sn, 0, toku_create_empty_bn());
|
||||
set_BLB(&sn, 1, toku_create_empty_bn());
|
||||
le_malloc(BLB_DATA(&sn, 0), 0, "a", "aval");
|
||||
|
@ -378,51 +403,59 @@ test_serialize_leaf(void) {
|
|||
make_blocknum(0),
|
||||
ZERO_LSN,
|
||||
TXNID_NONE,
|
||||
4*1024*1024,
|
||||
128*1024,
|
||||
4 * 1024 * 1024,
|
||||
128 * 1024,
|
||||
TOKU_DEFAULT_COMPRESSION_METHOD,
|
||||
16);
|
||||
ft->ft = ft_h;
|
||||
|
||||
|
||||
ft_h->blocktable.create();
|
||||
{ int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); }
|
||||
//Want to use block #20
|
||||
{
|
||||
int r_truncate = ftruncate(fd, 0);
|
||||
CKERR(r_truncate);
|
||||
}
|
||||
// Want to use block #20
|
||||
BLOCKNUM b = make_blocknum(0);
|
||||
while (b.b < 20) {
|
||||
ft_h->blocktable.allocate_blocknum(&b, ft_h);
|
||||
}
|
||||
assert(b.b == 20);
|
||||
invariant(b.b == 20);
|
||||
|
||||
{
|
||||
DISKOFF offset;
|
||||
DISKOFF size;
|
||||
ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0);
|
||||
assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
|
||||
ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false);
|
||||
invariant(offset ==
|
||||
(DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
|
||||
|
||||
ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size);
|
||||
assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
|
||||
assert(size == 100);
|
||||
invariant(offset ==
|
||||
(DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
|
||||
invariant(size == 100);
|
||||
}
|
||||
FTNODE_DISK_DATA ndd = NULL;
|
||||
r = toku_serialize_ftnode_to(fd, make_blocknum(20), &sn, &ndd, true, ft->ft, false);
|
||||
assert(r==0);
|
||||
r = toku_serialize_ftnode_to(
|
||||
fd, make_blocknum(20), &sn, &ndd, true, ft->ft, false);
|
||||
invariant(r == 0);
|
||||
|
||||
test1(fd, ft_h, &dn);
|
||||
test3_leaf(fd, ft_h,&dn);
|
||||
test3_leaf(fd, ft_h, &dn);
|
||||
|
||||
toku_destroy_ftnode_internals(&sn);
|
||||
|
||||
ft_h->blocktable.block_free(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
|
||||
ft_h->blocktable.block_free(
|
||||
BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE, 100);
|
||||
ft_h->blocktable.destroy();
|
||||
toku_free(ft_h->h);
|
||||
toku_free(ft_h);
|
||||
toku_free(ft);
|
||||
toku_free(ndd);
|
||||
r = close(fd); assert(r != -1);
|
||||
r = close(fd);
|
||||
invariant(r != -1);
|
||||
}
|
||||
|
||||
int
|
||||
test_main (int argc __attribute__((__unused__)), const char *argv[] __attribute__((__unused__))) {
|
||||
int test_main(int argc __attribute__((__unused__)),
|
||||
const char *argv[] __attribute__((__unused__))) {
|
||||
initialize_dummymsn();
|
||||
test_serialize_nonleaf();
|
||||
test_serialize_leaf();
|
||||
|
|
|
@ -41,27 +41,21 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
|||
#include <sys/time.h>
|
||||
#include "test.h"
|
||||
|
||||
|
||||
|
||||
#ifndef MIN
|
||||
#define MIN(x, y) (((x) < (y)) ? (x) : (y))
|
||||
#endif
|
||||
const double USECS_PER_SEC = 1000000.0;
|
||||
|
||||
static void
|
||||
le_add_to_bn(bn_data* bn, uint32_t idx, char *key, int keylen, char *val, int vallen)
|
||||
{
|
||||
static void le_add_to_bn(bn_data *bn,
|
||||
uint32_t idx,
|
||||
char *key,
|
||||
int keylen,
|
||||
char *val,
|
||||
int vallen) {
|
||||
LEAFENTRY r = NULL;
|
||||
uint32_t size_needed = LE_CLEAN_MEMSIZE(vallen);
|
||||
void *maybe_free = nullptr;
|
||||
bn->get_space_for_insert(
|
||||
idx,
|
||||
key,
|
||||
keylen,
|
||||
size_needed,
|
||||
&r,
|
||||
&maybe_free
|
||||
);
|
||||
bn->get_space_for_insert(idx, key, keylen, size_needed, &r, &maybe_free);
|
||||
if (maybe_free) {
|
||||
toku_free(maybe_free);
|
||||
}
|
||||
|
@ -71,20 +65,24 @@ le_add_to_bn(bn_data* bn, uint32_t idx, char *key, int keylen, char *val, int va
|
|||
memcpy(r->u.clean.val, val, vallen);
|
||||
}
|
||||
|
||||
static int
|
||||
long_key_cmp(DB *UU(e), const DBT *a, const DBT *b)
|
||||
{
|
||||
static int long_key_cmp(DB *UU(e), const DBT *a, const DBT *b) {
|
||||
const long *CAST_FROM_VOIDP(x, a->data);
|
||||
const long *CAST_FROM_VOIDP(y, b->data);
|
||||
return (*x > *y) - (*x < *y);
|
||||
}
|
||||
|
||||
static void
|
||||
test_serialize_leaf(int valsize, int nelts, double entropy, int ser_runs, int deser_runs) {
|
||||
static void test_serialize_leaf(int valsize,
|
||||
int nelts,
|
||||
double entropy,
|
||||
int ser_runs,
|
||||
int deser_runs) {
|
||||
// struct ft_handle source_ft;
|
||||
struct ftnode *sn, *dn;
|
||||
|
||||
int fd = open(TOKU_TEST_FILENAME, O_RDWR|O_CREAT|O_BINARY, S_IRWXU|S_IRWXG|S_IRWXO); assert(fd >= 0);
|
||||
int fd = open(TOKU_TEST_FILENAME,
|
||||
O_RDWR | O_CREAT | O_BINARY,
|
||||
S_IRWXU | S_IRWXG | S_IRWXO);
|
||||
invariant(fd >= 0);
|
||||
|
||||
int r;
|
||||
|
||||
|
@ -102,7 +100,7 @@ test_serialize_leaf(int valsize, int nelts, double entropy, int ser_runs, int de
|
|||
MALLOC_N(sn->n_children, sn->bp);
|
||||
sn->pivotkeys.create_empty();
|
||||
for (int i = 0; i < sn->n_children; ++i) {
|
||||
BP_STATE(sn,i) = PT_AVAIL;
|
||||
BP_STATE(sn, i) = PT_AVAIL;
|
||||
set_BLB(sn, i, toku_create_empty_bn());
|
||||
}
|
||||
int nperbn = nelts / sn->n_children;
|
||||
|
@ -112,24 +110,19 @@ test_serialize_leaf(int valsize, int nelts, double entropy, int ser_runs, int de
|
|||
k = ck * nperbn + i;
|
||||
char buf[valsize];
|
||||
int c;
|
||||
for (c = 0; c < valsize * entropy; ) {
|
||||
int *p = (int *) &buf[c];
|
||||
for (c = 0; c < valsize * entropy;) {
|
||||
int *p = (int *)&buf[c];
|
||||
*p = rand();
|
||||
c += sizeof(*p);
|
||||
}
|
||||
memset(&buf[c], 0, valsize - c);
|
||||
le_add_to_bn(
|
||||
BLB_DATA(sn,ck),
|
||||
i,
|
||||
(char *)&k,
|
||||
sizeof k,
|
||||
buf,
|
||||
sizeof buf
|
||||
);
|
||||
BLB_DATA(sn, ck), i, (char *)&k, sizeof k, buf, sizeof buf);
|
||||
}
|
||||
if (ck < 7) {
|
||||
DBT pivotkey;
|
||||
sn->pivotkeys.insert_at(toku_fill_dbt(&pivotkey, &k, sizeof(k)), ck);
|
||||
sn->pivotkeys.insert_at(toku_fill_dbt(&pivotkey, &k, sizeof(k)),
|
||||
ck);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -139,31 +132,36 @@ test_serialize_leaf(int valsize, int nelts, double entropy, int ser_runs, int de
|
|||
make_blocknum(0),
|
||||
ZERO_LSN,
|
||||
TXNID_NONE,
|
||||
4*1024*1024,
|
||||
128*1024,
|
||||
4 * 1024 * 1024,
|
||||
128 * 1024,
|
||||
TOKU_DEFAULT_COMPRESSION_METHOD,
|
||||
16);
|
||||
ft_h->cmp.create(long_key_cmp, nullptr);
|
||||
ft->ft = ft_h;
|
||||
|
||||
|
||||
ft_h->blocktable.create();
|
||||
{ int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); }
|
||||
//Want to use block #20
|
||||
{
|
||||
int r_truncate = ftruncate(fd, 0);
|
||||
CKERR(r_truncate);
|
||||
}
|
||||
// Want to use block #20
|
||||
BLOCKNUM b = make_blocknum(0);
|
||||
while (b.b < 20) {
|
||||
ft_h->blocktable.allocate_blocknum(&b, ft_h);
|
||||
}
|
||||
assert(b.b == 20);
|
||||
invariant(b.b == 20);
|
||||
|
||||
{
|
||||
DISKOFF offset;
|
||||
DISKOFF size;
|
||||
ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0);
|
||||
assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
|
||||
ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false);
|
||||
invariant(offset ==
|
||||
(DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
|
||||
|
||||
ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size);
|
||||
assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
|
||||
assert(size == 100);
|
||||
invariant(offset ==
|
||||
(DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
|
||||
invariant(size == 100);
|
||||
}
|
||||
|
||||
struct timeval total_start;
|
||||
|
@ -176,8 +174,9 @@ test_serialize_leaf(int valsize, int nelts, double entropy, int ser_runs, int de
|
|||
gettimeofday(&t[0], NULL);
|
||||
ndd = NULL;
|
||||
sn->dirty = 1;
|
||||
r = toku_serialize_ftnode_to(fd, make_blocknum(20), sn, &ndd, true, ft->ft, false);
|
||||
assert(r==0);
|
||||
r = toku_serialize_ftnode_to(
|
||||
fd, make_blocknum(20), sn, &ndd, true, ft->ft, false);
|
||||
invariant(r == 0);
|
||||
gettimeofday(&t[1], NULL);
|
||||
total_start.tv_sec += t[0].tv_sec;
|
||||
total_start.tv_usec += t[0].tv_usec;
|
||||
|
@ -186,12 +185,14 @@ test_serialize_leaf(int valsize, int nelts, double entropy, int ser_runs, int de
|
|||
toku_free(ndd);
|
||||
}
|
||||
double dt;
|
||||
dt = (total_end.tv_sec - total_start.tv_sec) + ((total_end.tv_usec - total_start.tv_usec) / USECS_PER_SEC);
|
||||
dt = (total_end.tv_sec - total_start.tv_sec) +
|
||||
((total_end.tv_usec - total_start.tv_usec) / USECS_PER_SEC);
|
||||
dt *= 1000;
|
||||
dt /= ser_runs;
|
||||
printf("serialize leaf(ms): %0.05lf (average of %d runs)\n", dt, ser_runs);
|
||||
printf(
|
||||
"serialize leaf(ms): %0.05lf (average of %d runs)\n", dt, ser_runs);
|
||||
|
||||
//reset
|
||||
// reset
|
||||
total_start.tv_sec = total_start.tv_usec = 0;
|
||||
total_end.tv_sec = total_end.tv_usec = 0;
|
||||
|
||||
|
@ -200,8 +201,9 @@ test_serialize_leaf(int valsize, int nelts, double entropy, int ser_runs, int de
|
|||
bfe.create_for_full_read(ft_h);
|
||||
gettimeofday(&t[0], NULL);
|
||||
FTNODE_DISK_DATA ndd2 = NULL;
|
||||
r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd2, &bfe);
|
||||
assert(r==0);
|
||||
r = toku_deserialize_ftnode_from(
|
||||
fd, make_blocknum(20), 0 /*pass zero for hash*/, &dn, &ndd2, &bfe);
|
||||
invariant(r == 0);
|
||||
gettimeofday(&t[1], NULL);
|
||||
|
||||
total_start.tv_sec += t[0].tv_sec;
|
||||
|
@ -212,35 +214,46 @@ test_serialize_leaf(int valsize, int nelts, double entropy, int ser_runs, int de
|
|||
toku_ftnode_free(&dn);
|
||||
toku_free(ndd2);
|
||||
}
|
||||
dt = (total_end.tv_sec - total_start.tv_sec) + ((total_end.tv_usec - total_start.tv_usec) / USECS_PER_SEC);
|
||||
dt = (total_end.tv_sec - total_start.tv_sec) +
|
||||
((total_end.tv_usec - total_start.tv_usec) / USECS_PER_SEC);
|
||||
dt *= 1000;
|
||||
dt /= deser_runs;
|
||||
printf("deserialize leaf(ms): %0.05lf (average of %d runs)\n", dt, deser_runs);
|
||||
printf("io time(ms) %lf decompress time(ms) %lf deserialize time(ms) %lf (average of %d runs)\n",
|
||||
tokutime_to_seconds(bfe.io_time)*1000,
|
||||
tokutime_to_seconds(bfe.decompress_time)*1000,
|
||||
tokutime_to_seconds(bfe.deserialize_time)*1000,
|
||||
deser_runs
|
||||
);
|
||||
printf(
|
||||
"deserialize leaf(ms): %0.05lf (average of %d runs)\n", dt, deser_runs);
|
||||
printf(
|
||||
"io time(ms) %lf decompress time(ms) %lf deserialize time(ms) %lf "
|
||||
"(average of %d runs)\n",
|
||||
tokutime_to_seconds(bfe.io_time) * 1000,
|
||||
tokutime_to_seconds(bfe.decompress_time) * 1000,
|
||||
tokutime_to_seconds(bfe.deserialize_time) * 1000,
|
||||
deser_runs);
|
||||
|
||||
toku_ftnode_free(&sn);
|
||||
|
||||
ft_h->blocktable.block_free(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
|
||||
ft_h->blocktable.block_free(
|
||||
BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE, 100);
|
||||
ft_h->blocktable.destroy();
|
||||
ft_h->cmp.destroy();
|
||||
toku_free(ft_h->h);
|
||||
toku_free(ft_h);
|
||||
toku_free(ft);
|
||||
|
||||
r = close(fd); assert(r != -1);
|
||||
r = close(fd);
|
||||
invariant(r != -1);
|
||||
}
|
||||
|
||||
static void
|
||||
test_serialize_nonleaf(int valsize, int nelts, double entropy, int ser_runs, int deser_runs) {
|
||||
static void test_serialize_nonleaf(int valsize,
|
||||
int nelts,
|
||||
double entropy,
|
||||
int ser_runs,
|
||||
int deser_runs) {
|
||||
// struct ft_handle source_ft;
|
||||
struct ftnode sn, *dn;
|
||||
|
||||
int fd = open(TOKU_TEST_FILENAME, O_RDWR|O_CREAT|O_BINARY, S_IRWXU|S_IRWXG|S_IRWXO); assert(fd >= 0);
|
||||
int fd = open(TOKU_TEST_FILENAME,
|
||||
O_RDWR | O_CREAT | O_BINARY,
|
||||
S_IRWXU | S_IRWXG | S_IRWXO);
|
||||
invariant(fd >= 0);
|
||||
|
||||
int r;
|
||||
|
||||
|
@ -257,11 +270,11 @@ test_serialize_nonleaf(int valsize, int nelts, double entropy, int ser_runs, int
|
|||
MALLOC_N(sn.n_children, sn.bp);
|
||||
sn.pivotkeys.create_empty();
|
||||
for (int i = 0; i < sn.n_children; ++i) {
|
||||
BP_BLOCKNUM(&sn, i).b = 30 + (i*5);
|
||||
BP_STATE(&sn,i) = PT_AVAIL;
|
||||
BP_BLOCKNUM(&sn, i).b = 30 + (i * 5);
|
||||
BP_STATE(&sn, i) = PT_AVAIL;
|
||||
set_BNC(&sn, i, toku_create_empty_nl());
|
||||
}
|
||||
//Create XIDS
|
||||
// Create XIDS
|
||||
XIDS xids_0 = toku_xids_get_root_xids();
|
||||
XIDS xids_123;
|
||||
r = toku_xids_create_child(xids_0, &xids_123, (TXNID)123);
|
||||
|
@ -276,14 +289,23 @@ test_serialize_nonleaf(int valsize, int nelts, double entropy, int ser_runs, int
|
|||
k = ck * nperchild + i;
|
||||
char buf[valsize];
|
||||
int c;
|
||||
for (c = 0; c < valsize * entropy; ) {
|
||||
int *p = (int *) &buf[c];
|
||||
for (c = 0; c < valsize * entropy;) {
|
||||
int *p = (int *)&buf[c];
|
||||
*p = rand();
|
||||
c += sizeof(*p);
|
||||
}
|
||||
memset(&buf[c], 0, valsize - c);
|
||||
|
||||
toku_bnc_insert_msg(bnc, &k, sizeof k, buf, valsize, FT_NONE, next_dummymsn(), xids_123, true, cmp);
|
||||
toku_bnc_insert_msg(bnc,
|
||||
&k,
|
||||
sizeof k,
|
||||
buf,
|
||||
valsize,
|
||||
FT_NONE,
|
||||
next_dummymsn(),
|
||||
xids_123,
|
||||
true,
|
||||
cmp);
|
||||
}
|
||||
if (ck < 7) {
|
||||
DBT pivotkey;
|
||||
|
@ -291,7 +313,7 @@ test_serialize_nonleaf(int valsize, int nelts, double entropy, int ser_runs, int
|
|||
}
|
||||
}
|
||||
|
||||
//Cleanup:
|
||||
// Cleanup:
|
||||
toku_xids_destroy(&xids_0);
|
||||
toku_xids_destroy(&xids_123);
|
||||
cmp.destroy();
|
||||
|
@ -302,65 +324,78 @@ test_serialize_nonleaf(int valsize, int nelts, double entropy, int ser_runs, int
|
|||
make_blocknum(0),
|
||||
ZERO_LSN,
|
||||
TXNID_NONE,
|
||||
4*1024*1024,
|
||||
128*1024,
|
||||
4 * 1024 * 1024,
|
||||
128 * 1024,
|
||||
TOKU_DEFAULT_COMPRESSION_METHOD,
|
||||
16);
|
||||
ft_h->cmp.create(long_key_cmp, nullptr);
|
||||
ft->ft = ft_h;
|
||||
|
||||
|
||||
ft_h->blocktable.create();
|
||||
{ int r_truncate = ftruncate(fd, 0); CKERR(r_truncate); }
|
||||
//Want to use block #20
|
||||
{
|
||||
int r_truncate = ftruncate(fd, 0);
|
||||
CKERR(r_truncate);
|
||||
}
|
||||
// Want to use block #20
|
||||
BLOCKNUM b = make_blocknum(0);
|
||||
while (b.b < 20) {
|
||||
ft_h->blocktable.allocate_blocknum(&b, ft_h);
|
||||
}
|
||||
assert(b.b == 20);
|
||||
invariant(b.b == 20);
|
||||
|
||||
{
|
||||
DISKOFF offset;
|
||||
DISKOFF size;
|
||||
ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0);
|
||||
assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
|
||||
ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false);
|
||||
invariant(offset ==
|
||||
(DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
|
||||
|
||||
ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size);
|
||||
assert(offset == (DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
|
||||
assert(size == 100);
|
||||
invariant(offset ==
|
||||
(DISKOFF)BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
|
||||
invariant(size == 100);
|
||||
}
|
||||
|
||||
struct timeval t[2];
|
||||
gettimeofday(&t[0], NULL);
|
||||
FTNODE_DISK_DATA ndd = NULL;
|
||||
r = toku_serialize_ftnode_to(fd, make_blocknum(20), &sn, &ndd, true, ft->ft, false);
|
||||
assert(r==0);
|
||||
r = toku_serialize_ftnode_to(
|
||||
fd, make_blocknum(20), &sn, &ndd, true, ft->ft, false);
|
||||
invariant(r == 0);
|
||||
gettimeofday(&t[1], NULL);
|
||||
double dt;
|
||||
dt = (t[1].tv_sec - t[0].tv_sec) + ((t[1].tv_usec - t[0].tv_usec) / USECS_PER_SEC);
|
||||
dt = (t[1].tv_sec - t[0].tv_sec) +
|
||||
((t[1].tv_usec - t[0].tv_usec) / USECS_PER_SEC);
|
||||
dt *= 1000;
|
||||
printf("serialize nonleaf(ms): %0.05lf (IGNORED RUNS=%d)\n", dt, ser_runs);
|
||||
printf(
|
||||
"serialize nonleaf(ms): %0.05lf (IGNORED RUNS=%d)\n", dt, ser_runs);
|
||||
|
||||
ftnode_fetch_extra bfe;
|
||||
bfe.create_for_full_read(ft_h);
|
||||
gettimeofday(&t[0], NULL);
|
||||
FTNODE_DISK_DATA ndd2 = NULL;
|
||||
r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd2, &bfe);
|
||||
assert(r==0);
|
||||
r = toku_deserialize_ftnode_from(
|
||||
fd, make_blocknum(20), 0 /*pass zero for hash*/, &dn, &ndd2, &bfe);
|
||||
invariant(r == 0);
|
||||
gettimeofday(&t[1], NULL);
|
||||
dt = (t[1].tv_sec - t[0].tv_sec) + ((t[1].tv_usec - t[0].tv_usec) / USECS_PER_SEC);
|
||||
dt = (t[1].tv_sec - t[0].tv_sec) +
|
||||
((t[1].tv_usec - t[0].tv_usec) / USECS_PER_SEC);
|
||||
dt *= 1000;
|
||||
printf("deserialize nonleaf(ms): %0.05lf (IGNORED RUNS=%d)\n", dt, deser_runs);
|
||||
printf("io time(ms) %lf decompress time(ms) %lf deserialize time(ms) %lf (IGNORED RUNS=%d)\n",
|
||||
tokutime_to_seconds(bfe.io_time)*1000,
|
||||
tokutime_to_seconds(bfe.decompress_time)*1000,
|
||||
tokutime_to_seconds(bfe.deserialize_time)*1000,
|
||||
deser_runs
|
||||
);
|
||||
printf(
|
||||
"deserialize nonleaf(ms): %0.05lf (IGNORED RUNS=%d)\n", dt, deser_runs);
|
||||
printf(
|
||||
"io time(ms) %lf decompress time(ms) %lf deserialize time(ms) %lf "
|
||||
"(IGNORED RUNS=%d)\n",
|
||||
tokutime_to_seconds(bfe.io_time) * 1000,
|
||||
tokutime_to_seconds(bfe.decompress_time) * 1000,
|
||||
tokutime_to_seconds(bfe.deserialize_time) * 1000,
|
||||
deser_runs);
|
||||
|
||||
toku_ftnode_free(&dn);
|
||||
toku_destroy_ftnode_internals(&sn);
|
||||
|
||||
ft_h->blocktable.block_free(block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
|
||||
ft_h->blocktable.block_free(
|
||||
BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE, 100);
|
||||
ft_h->blocktable.destroy();
|
||||
toku_free(ft_h->h);
|
||||
ft_h->cmp.destroy();
|
||||
|
@ -369,17 +404,21 @@ test_serialize_nonleaf(int valsize, int nelts, double entropy, int ser_runs, int
|
|||
toku_free(ndd);
|
||||
toku_free(ndd2);
|
||||
|
||||
r = close(fd); assert(r != -1);
|
||||
r = close(fd);
|
||||
invariant(r != -1);
|
||||
}
|
||||
|
||||
int
|
||||
test_main (int argc __attribute__((__unused__)), const char *argv[] __attribute__((__unused__))) {
|
||||
int test_main(int argc __attribute__((__unused__)),
|
||||
const char *argv[] __attribute__((__unused__))) {
|
||||
const int DEFAULT_RUNS = 5;
|
||||
long valsize, nelts, ser_runs = DEFAULT_RUNS, deser_runs = DEFAULT_RUNS;
|
||||
double entropy = 0.3;
|
||||
|
||||
if (argc != 3 && argc != 5) {
|
||||
fprintf(stderr, "Usage: %s <valsize> <nelts> [<serialize_runs> <deserialize_runs>]\n", argv[0]);
|
||||
fprintf(stderr,
|
||||
"Usage: %s <valsize> <nelts> [<serialize_runs> "
|
||||
"<deserialize_runs>]\n",
|
||||
argv[0]);
|
||||
fprintf(stderr, "Default (and min) runs is %d\n", DEFAULT_RUNS);
|
||||
return 2;
|
||||
}
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -164,17 +164,16 @@ static void test_read_what_was_written (void) {
|
|||
int r;
|
||||
const int NVALS=10000;
|
||||
|
||||
if (verbose) printf("test_read_what_was_written(): "); fflush(stdout);
|
||||
if (verbose) {
|
||||
printf("test_read_what_was_written(): "); fflush(stdout);
|
||||
}
|
||||
|
||||
unlink(fname);
|
||||
|
||||
|
||||
toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr);
|
||||
r = toku_open_ft_handle(fname, 1, &ft, 1<<12, 1<<9, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun); assert(r==0);
|
||||
r = toku_close_ft_handle_nolsn(ft, 0); assert(r==0);
|
||||
toku_cachetable_close(&ct);
|
||||
|
||||
|
||||
toku_cachetable_close(&ct);
|
||||
|
||||
/* Now see if we can read an empty tree in. */
|
||||
toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr);
|
||||
|
@ -189,8 +188,6 @@ static void test_read_what_was_written (void) {
|
|||
r = toku_close_ft_handle_nolsn(ft, 0); assert(r==0);
|
||||
toku_cachetable_close(&ct);
|
||||
|
||||
|
||||
|
||||
/* Now see if we can read it in and get the value. */
|
||||
toku_cachetable_create(&ct, 0, ZERO_LSN, nullptr);
|
||||
r = toku_open_ft_handle(fname, 0, &ft, 1<<12, 1<<9, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun); assert(r==0);
|
||||
|
|
|
@ -109,7 +109,9 @@ static int run_test(void)
|
|||
r = pqueue_pop(pq, &node); assert(r==0);
|
||||
if (verbose) printf("%d : %d\n", i, *(int*)(node->key->data));
|
||||
if ( *(int*)(node->key->data) != i ) {
|
||||
if (verbose) printf("FAIL\n"); return -1;
|
||||
if (verbose)
|
||||
printf("FAIL\n");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
pqueue_free(pq);
|
||||
|
|
|
@ -793,7 +793,7 @@ static void test_le_garbage_collection_birdie(void) {
|
|||
do_garbage_collect = ule_worth_running_garbage_collection(&ule, 200);
|
||||
invariant(do_garbage_collect);
|
||||
|
||||
// It is definately worth doing when the above case is true
|
||||
// It is definitely worth doing when the above case is true
|
||||
// and there is more than one provisional entry.
|
||||
ule.num_cuxrs = 1;
|
||||
ule.num_puxrs = 2;
|
||||
|
|
|
@ -72,7 +72,7 @@ static void dummy_update_status(FTNODE UU(child), int UU(dirtied), void* UU(extr
|
|||
|
||||
enum { NODESIZE = 1024, KSIZE=NODESIZE-100, TOKU_PSIZE=20 };
|
||||
|
||||
static void test_oldest_referenced_xid_gets_propogated(void) {
|
||||
static void test_oldest_referenced_xid_gets_propagated(void) {
|
||||
int r;
|
||||
CACHETABLE ct;
|
||||
FT_HANDLE t;
|
||||
|
@ -166,7 +166,7 @@ static void test_oldest_referenced_xid_gets_propogated(void) {
|
|||
toku_ft_flush_some_child(t->ft, node, &fa);
|
||||
|
||||
// pin the child, verify that oldest referenced xid was
|
||||
// propogated from parent to child during the flush
|
||||
// propagated from parent to child during the flush
|
||||
toku_pin_ftnode(
|
||||
t->ft,
|
||||
child_nonleaf_blocknum,
|
||||
|
@ -185,6 +185,6 @@ static void test_oldest_referenced_xid_gets_propogated(void) {
|
|||
|
||||
int test_main(int argc __attribute__((__unused__)), const char *argv[] __attribute__((__unused__))) {
|
||||
default_parse_args(argc, argv);
|
||||
test_oldest_referenced_xid_gets_propogated();
|
||||
test_oldest_referenced_xid_gets_propagated();
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -36,30 +36,62 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
|||
|
||||
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||
|
||||
#pragma once
|
||||
#include "ft/serialize/rbtree_mhs.h"
|
||||
#include "test.h"
|
||||
#include <algorithm>
|
||||
#include <vector>
|
||||
#include <ctime>
|
||||
#include <cstdlib>
|
||||
|
||||
#include <db.h>
|
||||
static void test_insert_remove(void) {
|
||||
uint64_t i;
|
||||
MhsRbTree::Tree *tree = new MhsRbTree::Tree();
|
||||
verbose = 0;
|
||||
|
||||
#include "ft/serialize/block_allocator.h"
|
||||
tree->Insert({0, 100});
|
||||
|
||||
// Block allocation strategy implementations
|
||||
for (i = 0; i < 10; i++) {
|
||||
tree->Remove(3);
|
||||
tree->Remove(2);
|
||||
}
|
||||
tree->ValidateBalance();
|
||||
tree->ValidateMhs();
|
||||
|
||||
class block_allocator_strategy {
|
||||
public:
|
||||
static struct block_allocator::blockpair *
|
||||
first_fit(struct block_allocator::blockpair *blocks_array,
|
||||
uint64_t n_blocks, uint64_t size, uint64_t alignment);
|
||||
for (i = 0; i < 10; i++) {
|
||||
tree->Insert({5 * i, 3});
|
||||
}
|
||||
tree->ValidateBalance();
|
||||
tree->ValidateMhs();
|
||||
|
||||
static struct block_allocator::blockpair *
|
||||
best_fit(struct block_allocator::blockpair *blocks_array,
|
||||
uint64_t n_blocks, uint64_t size, uint64_t alignment);
|
||||
uint64_t offset = tree->Remove(2);
|
||||
invariant(offset == 0);
|
||||
offset = tree->Remove(10);
|
||||
invariant(offset == 50);
|
||||
offset = tree->Remove(3);
|
||||
invariant(offset == 5);
|
||||
tree->ValidateBalance();
|
||||
tree->ValidateMhs();
|
||||
|
||||
static struct block_allocator::blockpair *
|
||||
padded_fit(struct block_allocator::blockpair *blocks_array,
|
||||
uint64_t n_blocks, uint64_t size, uint64_t alignment);
|
||||
tree->Insert({48, 2});
|
||||
tree->Insert({50, 10});
|
||||
|
||||
static struct block_allocator::blockpair *
|
||||
heat_zone(struct block_allocator::blockpair *blocks_array,
|
||||
uint64_t n_blocks, uint64_t size, uint64_t alignment,
|
||||
uint64_t heat);
|
||||
};
|
||||
tree->ValidateBalance();
|
||||
tree->ValidateMhs();
|
||||
|
||||
tree->Insert({3, 7});
|
||||
offset = tree->Remove(10);
|
||||
invariant(offset == 2);
|
||||
tree->ValidateBalance();
|
||||
tree->ValidateMhs();
|
||||
tree->Dump();
|
||||
delete tree;
|
||||
}
|
||||
|
||||
int test_main(int argc, const char *argv[]) {
|
||||
default_parse_args(argc, argv);
|
||||
|
||||
test_insert_remove();
|
||||
if (verbose)
|
||||
printf("test ok\n");
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,102 @@
|
|||
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
||||
#ident "$Id$"
|
||||
/*======
|
||||
This file is part of PerconaFT.
|
||||
|
||||
|
||||
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
||||
|
||||
PerconaFT is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License, version 2,
|
||||
as published by the Free Software Foundation.
|
||||
|
||||
PerconaFT is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
----------------------------------------
|
||||
|
||||
PerconaFT is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License, version 3,
|
||||
as published by the Free Software Foundation.
|
||||
|
||||
PerconaFT is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
||||
======= */
|
||||
|
||||
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
||||
|
||||
#include "ft/serialize/rbtree_mhs.h"
|
||||
#include "test.h"
|
||||
#include <algorithm>
|
||||
#include <vector>
|
||||
#include <ctime>
|
||||
#include <cstdlib>
|
||||
|
||||
#define N 1000000
|
||||
std::vector<MhsRbTree::Node::BlockPair> input_vector;
|
||||
MhsRbTree::Node::BlockPair old_vector[N];
|
||||
|
||||
static int myrandom(int i) { return std::rand() % i; }
|
||||
|
||||
static void generate_random_input() {
|
||||
std::srand(unsigned(std::time(0)));
|
||||
|
||||
// set some values:
|
||||
for (uint64_t i = 1; i < N; ++i) {
|
||||
input_vector.push_back({i, 0});
|
||||
old_vector[i] = {i, 0};
|
||||
}
|
||||
// using built-in random generator:
|
||||
std::random_shuffle(input_vector.begin(), input_vector.end(), myrandom);
|
||||
}
|
||||
|
||||
static void test_insert_remove(void) {
|
||||
int i;
|
||||
MhsRbTree::Tree *tree = new MhsRbTree::Tree();
|
||||
verbose = 0;
|
||||
generate_random_input();
|
||||
if (verbose) {
|
||||
printf("\n we are going to insert the following block offsets\n");
|
||||
for (i = 0; i < N; i++)
|
||||
printf("%" PRIu64 "\t", input_vector[i]._offset.ToInt());
|
||||
}
|
||||
for (i = 0; i < N; i++) {
|
||||
tree->Insert(input_vector[i]);
|
||||
// tree->ValidateBalance();
|
||||
}
|
||||
tree->ValidateBalance();
|
||||
MhsRbTree::Node::BlockPair *p_bps = &old_vector[0];
|
||||
tree->ValidateInOrder(p_bps);
|
||||
printf("min node of the tree:%" PRIu64 "\n",
|
||||
rbn_offset(tree->MinNode()).ToInt());
|
||||
printf("max node of the tree:%" PRIu64 "\n",
|
||||
rbn_offset(tree->MaxNode()).ToInt());
|
||||
|
||||
for (i = 0; i < N; i++) {
|
||||
// tree->ValidateBalance();
|
||||
tree->RawRemove(input_vector[i]._offset.ToInt());
|
||||
}
|
||||
|
||||
tree->Destroy();
|
||||
delete tree;
|
||||
}
|
||||
|
||||
int test_main(int argc, const char *argv[]) {
|
||||
default_parse_args(argc, argv);
|
||||
|
||||
test_insert_remove();
|
||||
if (verbose)
|
||||
printf("test ok\n");
|
||||
return 0;
|
||||
}
|
|
@ -49,7 +49,7 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
|||
// functionality provided by roll.c is exposed by an autogenerated
|
||||
// header file, logheader.h
|
||||
//
|
||||
// this (poorly) explains the absense of "roll.h"
|
||||
// this (poorly) explains the absence of "roll.h"
|
||||
|
||||
// these flags control whether or not we send commit messages for
|
||||
// various operations
|
||||
|
|
|
@ -169,7 +169,7 @@ int toku_rollback_commit(TOKUTXN txn, LSN lsn) {
|
|||
txn->roll_info.spilled_rollback_head = ROLLBACK_NONE;
|
||||
txn->roll_info.spilled_rollback_tail = ROLLBACK_NONE;
|
||||
}
|
||||
// if we're commiting a child rollback, put its entries into the parent
|
||||
// if we're committing a child rollback, put its entries into the parent
|
||||
// by pinning both child and parent and then linking the child log entry
|
||||
// list to the end of the parent log entry list.
|
||||
if (txn_has_current_rollback_log(txn)) {
|
||||
|
|
|
@ -59,21 +59,18 @@ rollback_log_destroy(ROLLBACK_LOG_NODE log) {
|
|||
|
||||
// flush an ununused log to disk, by allocating a size 0 blocknum in
|
||||
// the blocktable
|
||||
static void
|
||||
toku_rollback_flush_unused_log(
|
||||
ROLLBACK_LOG_NODE log,
|
||||
BLOCKNUM logname,
|
||||
int fd,
|
||||
FT ft,
|
||||
bool write_me,
|
||||
bool keep_me,
|
||||
bool for_checkpoint,
|
||||
bool is_clone
|
||||
)
|
||||
{
|
||||
static void toku_rollback_flush_unused_log(ROLLBACK_LOG_NODE log,
|
||||
BLOCKNUM logname,
|
||||
int fd,
|
||||
FT ft,
|
||||
bool write_me,
|
||||
bool keep_me,
|
||||
bool for_checkpoint,
|
||||
bool is_clone) {
|
||||
if (write_me) {
|
||||
DISKOFF offset;
|
||||
ft->blocktable.realloc_on_disk(logname, 0, &offset, ft, fd, for_checkpoint, INT_MAX);
|
||||
ft->blocktable.realloc_on_disk(
|
||||
logname, 0, &offset, ft, fd, for_checkpoint);
|
||||
}
|
||||
if (!keep_me && !is_clone) {
|
||||
toku_free(log);
|
||||
|
|
|
@ -587,8 +587,8 @@ bool toku_le_worth_running_garbage_collection(
|
|||
// by new txns.
|
||||
// 2.) There is only one committed entry, but the outermost
|
||||
// provisional entry is older than the oldest known referenced
|
||||
// xid, so it must have commited. Therefor we can promote it to
|
||||
// committed and get rid of the old commited entry.
|
||||
// xid, so it must have committed. Therefor we can promote it to
|
||||
// committed and get rid of the old committed entry.
|
||||
if (le->type != LE_MVCC) {
|
||||
return false;
|
||||
}
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Reference in a new issue