MDEV-14795 InnoDB system tablespace cannot be shrunk

- Introduce the option :autoshrink attribute to be
added to innodb_data_file_path variable to allow
the shrinking of system tablespace during startup process.

Steps for shrinking the system tablespace:

1) Find the last used extent in system tablespace
by iterating through the BITMAP in extent descriptor pages

2) If the last used extent is lesser than user specified size
then set desired target size to user specified size.

3) Store the page contents of "to be modified" extent
descriptor pages, latches the "to be modified"
extent descriptor pages and check for buffer pool
memory availability

4) Make checkpoint to flush all pages in buffer pool, so
that pages in flush list doesn't have to use doublewrite
buffer and disable doublewrite buffer during shrinking process

5) Update the FSP_SIZE and FSP_FREE_LIMIT in header page

6) Remove the "to be truncated" pages from FSP_FREE and
FSP_FREE_FRAG list

7) Reset the bitmap in the last descriptor pages for the
"to be truncated" pages.

8) In case of multiple files, calculate the truncated last
file size and do the truncation in last file

9) Check whether mini-transaction log size doesn't exceed
the minimum value of innodb_log_buffer_size which is 2MB.
In that case, replace the modified buffer pool pages with
the page old content.

11) Commit the mini-transaction for shrinking the tablespace
and enable/disable the doublewrite buffer depends on user
specified value.

recv_sys_t::apply(): Handle the truncation of system tablespace
only if the recovered tablespace size is lesser than actual
existing size.
This commit is contained in:
Thirunarayanan Balathandayuthapani 2023-08-01 15:44:14 +05:30
parent e81fa34502
commit f9003c73a1
24 changed files with 1019 additions and 21 deletions

View file

@ -0,0 +1,21 @@
SET GLOBAL INNODB_FILE_PER_TABLE= 0;
Warnings:
Warning 1287 '@@innodb_file_per_table' is deprecated and will be removed in a future release
SET UNIQUE_CHECKS=0, FOREIGN_KEY_CHECKS=0;
CREATE TABLE t1(f1 INT NOT NULL, f2 INT NOT NULL,
f3 INT NOT NULL, INDEX(f1),
INDEX(f2), INDEX(f3))ENGINE=InnoDB;
BEGIN;
INSERT INTO t1 SELECT seq, seq, seq FROM seq_1_to_16384;
INSERT INTO t1 SELECT seq, seq, seq FROM seq_1_to_16384;
INSERT INTO t1 SELECT seq, seq, seq FROM seq_1_to_16384;
COMMIT;
DROP TABLE t1;
InnoDB 0 transactions not purged
SELECT NAME, FILE_SIZE FROM information_schema.innodb_sys_tablespaces WHERE SPACE = 0;
NAME FILE_SIZE
innodb_system 77594624
# restart
SELECT NAME, FILE_SIZE FROM information_schema.innodb_sys_tablespaces WHERE SPACE = 0;
NAME FILE_SIZE
innodb_system 10485760

View file

@ -0,0 +1,49 @@
call mtr.add_suppression("InnoDB: Cannot shrink the system tablespace");
call mtr.add_suppression("InnoDB: Plugin initialization aborted");
call mtr.add_suppression("Plugin 'InnoDB' init function returned error");
call mtr.add_suppression("Plugin 'InnoDB' registration as a STORAGE ENGINE failed");
SET GLOBAL INNODB_LIMIT_OPTIMISTIC_INSERT_DEBUG=2;
SET GLOBAL INNODB_FILE_PER_TABLE= 0;
Warnings:
Warning 1287 '@@innodb_file_per_table' is deprecated and will be removed in a future release
SET UNIQUE_CHECKS=0, FOREIGN_KEY_CHECKS=0;
CREATE TABLE t1(f1 INT NOT NULL, f2 INT NOT NULL,
f3 INT NOT NULL, INDEX(f1),
INDEX(f2), INDEX(f3))ENGINE=InnoDB;
BEGIN;
INSERT INTO t1 SELECT seq, seq, seq FROM seq_1_to_16384;
INSERT INTO t1 SELECT seq, seq, seq FROM seq_1_to_16384;
COMMIT;
DROP TABLE t1;
InnoDB 0 transactions not purged
SELECT NAME, FILE_SIZE FROM INFORMATION_SCHEMA.INNODB_SYS_TABLESPACES WHERE SPACE = 0;
NAME FILE_SIZE
innodb_system 540016640
# restart: --debug_dbug=+d,sys_shrink_buffer_pool_full
FOUND 1 /\[Warning\] InnoDB: Cannot shrink the system tablespace/ in mysqld.1.err
SELECT * FROM INFORMATION_SCHEMA.ENGINES
WHERE engine = 'innodb'
AND support IN ('YES', 'DEFAULT', 'ENABLED');
ENGINE SUPPORT COMMENT TRANSACTIONS XA SAVEPOINTS
InnoDB YES Supports transactions, row-level locking, foreign keys and encryption for tables YES YES YES
# restart: --debug_dbug=+d,mtr_log_max_size
FOUND 1 /\[ERROR\] InnoDB: Cannot shrink the system tablespace/ in mysqld.1.err
SELECT * FROM INFORMATION_SCHEMA.ENGINES
WHERE engine = 'innodb'
AND support IN ('YES', 'DEFAULT', 'ENABLED');
ENGINE SUPPORT COMMENT TRANSACTIONS XA SAVEPOINTS
InnoDB YES Supports transactions, row-level locking, foreign keys and encryption for tables YES YES YES
# restart: --debug_dbug=+d,crash_after_sys_truncate
SELECT * FROM INFORMATION_SCHEMA.ENGINES
WHERE engine = 'innodb'
AND support IN ('YES', 'DEFAULT', 'ENABLED');
ENGINE SUPPORT COMMENT TRANSACTIONS XA SAVEPOINTS
# restart: --innodb_buffer_pool_size=5M
SELECT * FROM INFORMATION_SCHEMA.ENGINES
WHERE engine = 'innodb'
AND support IN ('YES', 'DEFAULT', 'ENABLED');
ENGINE SUPPORT COMMENT TRANSACTIONS XA SAVEPOINTS
InnoDB YES Supports transactions, row-level locking, foreign keys and encryption for tables YES YES YES
SELECT NAME, FILE_SIZE FROM INFORMATION_SCHEMA.INNODB_SYS_TABLESPACES WHERE SPACE=0;
NAME FILE_SIZE
innodb_system 3145728

View file

@ -0,0 +1,22 @@
set global innodb_file_per_table=0;
Warnings:
Warning 1287 '@@innodb_file_per_table' is deprecated and will be removed in a future release
set global innodb_limit_optimistic_insert_debug=2;
set unique_checks=0, foreign_key_checks=0;
create table t1(f1 int not null)engine=innodb;
begin;
insert into t1 select * from seq_1_to_8192;
insert into t1 select * from seq_1_to_8192;
insert into t1 select * from seq_1_to_65536;
commit;
create table t2(f1 int not null)engine=innodb;
insert into t2 select * from seq_1_to_65536;
create table t3(f1 int not null)engine=innodb;
insert into t3 select * from seq_1_to_65536;
CREATE TABLE t4(f1 int not null)engine=innodb;
insert into t4 select * from seq_1_to_65536;
drop table t2;
drop table t4;
InnoDB 0 transactions not purged
# restart
drop table t3, t1;

View file

@ -0,0 +1,2 @@
--innodb_data_file_path=ibdata1:10M:autoextend:autoshrink
--innodb_sys_tablespaces

View file

@ -0,0 +1,17 @@
--source include/have_innodb.inc
--source include/have_sequence.inc
SET GLOBAL INNODB_FILE_PER_TABLE= 0;
SET UNIQUE_CHECKS=0, FOREIGN_KEY_CHECKS=0;
CREATE TABLE t1(f1 INT NOT NULL, f2 INT NOT NULL,
f3 INT NOT NULL, INDEX(f1),
INDEX(f2), INDEX(f3))ENGINE=InnoDB;
BEGIN;
INSERT INTO t1 SELECT seq, seq, seq FROM seq_1_to_16384;
INSERT INTO t1 SELECT seq, seq, seq FROM seq_1_to_16384;
INSERT INTO t1 SELECT seq, seq, seq FROM seq_1_to_16384;
COMMIT;
DROP TABLE t1;
--source include/wait_all_purged.inc
SELECT NAME, FILE_SIZE FROM information_schema.innodb_sys_tablespaces WHERE SPACE = 0;
--source include/restart_mysqld.inc
SELECT NAME, FILE_SIZE FROM information_schema.innodb_sys_tablespaces WHERE SPACE = 0;

View file

@ -0,0 +1,3 @@
--innodb_data_file_path=ibdata1:1M:autoextend:autoshrink
--innodb_sys_tablespaces
--innodb_page_size=4k

View file

@ -0,0 +1,62 @@
--source include/have_innodb.inc
--source include/have_sequence.inc
--source include/not_embedded.inc
--source include/have_debug.inc
--source include/not_windows.inc
call mtr.add_suppression("InnoDB: Cannot shrink the system tablespace");
call mtr.add_suppression("InnoDB: Plugin initialization aborted");
call mtr.add_suppression("Plugin 'InnoDB' init function returned error");
call mtr.add_suppression("Plugin 'InnoDB' registration as a STORAGE ENGINE failed");
SET GLOBAL INNODB_LIMIT_OPTIMISTIC_INSERT_DEBUG=2;
SET GLOBAL INNODB_FILE_PER_TABLE= 0;
SET UNIQUE_CHECKS=0, FOREIGN_KEY_CHECKS=0;
CREATE TABLE t1(f1 INT NOT NULL, f2 INT NOT NULL,
f3 INT NOT NULL, INDEX(f1),
INDEX(f2), INDEX(f3))ENGINE=InnoDB;
BEGIN;
INSERT INTO t1 SELECT seq, seq, seq FROM seq_1_to_16384;
INSERT INTO t1 SELECT seq, seq, seq FROM seq_1_to_16384;
COMMIT;
DROP TABLE t1;
--source include/wait_all_purged.inc
SELECT NAME, FILE_SIZE FROM INFORMATION_SCHEMA.INNODB_SYS_TABLESPACES WHERE SPACE = 0;
# Ran out of buffer pool
let $restart_parameters=--debug_dbug="+d,sys_shrink_buffer_pool_full";
--source include/restart_mysqld.inc
--let SEARCH_PATTERN= \[Warning\] InnoDB: Cannot shrink the system tablespace
let SEARCH_FILE= $MYSQLTEST_VARDIR/log/mysqld.1.err;
--source include/search_pattern_in_file.inc
SELECT * FROM INFORMATION_SCHEMA.ENGINES
WHERE engine = 'innodb'
AND support IN ('YES', 'DEFAULT', 'ENABLED');
# Ran out of mtr log size
let $restart_parameters=--debug_dbug="+d,mtr_log_max_size";
--source include/restart_mysqld.inc
--let SEARCH_PATTERN= \[ERROR\] InnoDB: Cannot shrink the system tablespace
let SEARCH_FILE= $MYSQLTEST_VARDIR/log/mysqld.1.err;
--source include/search_pattern_in_file.inc
SELECT * FROM INFORMATION_SCHEMA.ENGINES
WHERE engine = 'innodb'
AND support IN ('YES', 'DEFAULT', 'ENABLED');
# Crash after shrinking the system tablespace
let $restart_parameters=--debug_dbug="+d,crash_after_sys_truncate";
--source include/restart_mysqld.inc
SELECT * FROM INFORMATION_SCHEMA.ENGINES
WHERE engine = 'innodb'
AND support IN ('YES', 'DEFAULT', 'ENABLED');
let $restart_parameters=--innodb_buffer_pool_size=5M;
--source include/restart_mysqld.inc
SELECT * FROM INFORMATION_SCHEMA.ENGINES
WHERE engine = 'innodb'
AND support IN ('YES', 'DEFAULT', 'ENABLED');
SELECT NAME, FILE_SIZE FROM INFORMATION_SCHEMA.INNODB_SYS_TABLESPACES WHERE SPACE=0;

View file

@ -0,0 +1,2 @@
--innodb_data_file_path=ibdata1:1M:autoextend:autoshrink
--innodb_page_size=4k

View file

@ -0,0 +1,31 @@
--source include/big_test.inc
--source include/have_innodb.inc
--source include/have_sequence.inc
--source include/not_valgrind.inc
--source include/have_debug.inc
set global innodb_file_per_table=0;
set global innodb_limit_optimistic_insert_debug=2;
set unique_checks=0, foreign_key_checks=0;
create table t1(f1 int not null)engine=innodb;
begin;
insert into t1 select * from seq_1_to_8192;
insert into t1 select * from seq_1_to_8192;
insert into t1 select * from seq_1_to_65536;
commit;
create table t2(f1 int not null)engine=innodb;
insert into t2 select * from seq_1_to_65536;
create table t3(f1 int not null)engine=innodb;
insert into t3 select * from seq_1_to_65536;
CREATE TABLE t4(f1 int not null)engine=innodb;
insert into t4 select * from seq_1_to_65536;
drop table t2;
drop table t4;
--source include/wait_all_purged.inc
--source include/restart_mysqld.inc
drop table t3, t1;

View file

@ -0,0 +1,2 @@
--innodb_data_file_path=ibdata1:1M:autoextend:autoshrink
--innodb_sys_tablespaces

View file

@ -0,0 +1,28 @@
SET GLOBAL INNODB_FILE_PER_TABLE= 0;
Warnings:
Warning 1287 '@@innodb_file_per_table' is deprecated and will be removed in a future release
CREATE TABLE t1(f1 INT NOT NULL, f2 INT NOT NULL,
f3 INT NOT NULL, INDEX(f1),
INDEX(f2), INDEX(f3))ENGINE=InnoDB;
INSERT INTO t1 SELECT seq, seq, seq FROM seq_1_to_16384;
INSERT INTO t1 SELECT seq, seq, seq FROM seq_1_to_16384;
INSERT INTO t1 SELECT seq, seq, seq FROM seq_1_to_16384;
DROP TABLE t1;
InnoDB 0 transactions not purged
SELECT NAME, FILE_SIZE FROM information_schema.innodb_sys_tablespaces WHERE SPACE = 0;
NAME FILE_SIZE
innodb_system 70254592
# restart
SELECT NAME, FILE_SIZE FROM information_schema.innodb_sys_tablespaces WHERE SPACE = 0;
NAME FILE_SIZE
innodb_system 3145728
# Incremental backup
# Prepare full backup, apply incremental one
# Restore and check results
# shutdown server
# remove datadir
# xtrabackup move back
# restart
SELECT NAME, FILE_SIZE FROM information_schema.innodb_sys_tablespaces WHERE SPACE = 0;
NAME FILE_SIZE
innodb_system 3145728

View file

@ -0,0 +1,36 @@
--source include/have_innodb.inc
--source include/have_sequence.inc
let basedir=$MYSQLTEST_VARDIR/tmp/backup;
let incremental_dir=$MYSQLTEST_VARDIR/tmp/backup_inc1;
SET GLOBAL INNODB_FILE_PER_TABLE= 0;
CREATE TABLE t1(f1 INT NOT NULL, f2 INT NOT NULL,
f3 INT NOT NULL, INDEX(f1),
INDEX(f2), INDEX(f3))ENGINE=InnoDB;
INSERT INTO t1 SELECT seq, seq, seq FROM seq_1_to_16384;
INSERT INTO t1 SELECT seq, seq, seq FROM seq_1_to_16384;
INSERT INTO t1 SELECT seq, seq, seq FROM seq_1_to_16384;
DROP TABLE t1;
--source ../innodb/include/wait_all_purged.inc
SELECT NAME, FILE_SIZE FROM information_schema.innodb_sys_tablespaces WHERE SPACE = 0;
--disable_result_log
exec $XTRABACKUP --defaults-file=$MYSQLTEST_VARDIR/my.cnf --backup --parallel=10 --target-dir=$basedir --throttle=1000;
--enable_result_log
--source include/restart_mysqld.inc
SELECT NAME, FILE_SIZE FROM information_schema.innodb_sys_tablespaces WHERE SPACE = 0;
--echo # Incremental backup
exec $XTRABACKUP --defaults-file=$MYSQLTEST_VARDIR/my.cnf --backup --parallel=2 --ftwrl-wait-timeout=5 --ftwrl-wait-threshold=300 --ftwrl-wait-query-type=all --target-dir=$incremental_dir --incremental-basedir=$basedir;
--disable_result_log
echo # Prepare full backup, apply incremental one;
exec $XTRABACKUP --prepare --target-dir=$basedir;
exec $XTRABACKUP --prepare --target-dir=$basedir --incremental-dir=$incremental_dir;
echo # Restore and check results;
let $targetdir=$basedir;
-- source include/restart_and_restore.inc
--enable_result_log
SELECT NAME, FILE_SIZE FROM information_schema.innodb_sys_tablespaces WHERE SPACE = 0;
# Cleanup
rmdir $basedir;
rmdir $incremental_dir;

View file

@ -668,6 +668,19 @@ fil_space_extend_must_retry(
return false;
}
bool recv_sys_t::check_sys_truncate()
{
mysql_mutex_assert_owner(&fil_system.mutex);
if (!truncated_sys_space.lsn)
return false;
if (fil_system.sys_space->size <= fil_system.sys_space->recv_size)
{
truncated_sys_space={0,0};
return false;
}
return true;
}
/** @return whether the file is usable for io() */
ATTRIBUTE_COLD bool fil_space_t::prepare_acquired()
{
@ -684,6 +697,8 @@ ATTRIBUTE_COLD bool fil_space_t::prepare_acquired()
else if (node->deferred);
else if (auto desired_size= recv_size)
{
if (id == TRX_SYS_SPACE && recv_sys.check_sys_truncate())
goto clear;
bool success;
while (fil_space_extend_must_retry(this, node, desired_size, &success))
mysql_mutex_lock(&fil_system.mutex);

View file

@ -3065,3 +3065,614 @@ std::ostream &fseg_header::to_stream(std::ostream &out) const
return out;
}
#endif /* UNIV_DEBUG */
/** Get the latched extent descriptor page or
acquire the extent descriptor page.
@param page_no page number to be acquired
@param mtr mini-transaction
@param err error code
@return block descriptor */
static
buf_block_t *fsp_get_latched_xdes_page(
uint32_t page_no, mtr_t *mtr, dberr_t *err)
{
buf_block_t *block= nullptr;
block= mtr->get_already_latched(
page_id_t{0, page_no}, MTR_MEMO_PAGE_SX_FIX);
if (block)
return block;
return buf_page_get_gen(
page_id_t{0, page_no}, 0, RW_SX_LATCH, nullptr,
BUF_GET_POSSIBLY_FREED, mtr, err);
}
/** Used during system tablespace truncation. Stores
the "to be modified" extent descriptor page and its
old page state */
class fsp_xdes_old_page
{
std::vector<buf_block_t*> m_old_xdes_pages;
public:
ulint n_pages()
{
uint32_t count=0;
for (uint32_t i= 0; i < m_old_xdes_pages.size(); i++)
if (m_old_xdes_pages[i]) count++;
return count;
}
__attribute__((warn_unused_result))
dberr_t insert(uint32_t page_no, mtr_t *mtr)
{
uint32_t m_index= page_no >> srv_page_size_shift;
if (m_old_xdes_pages.size() > m_index &&
m_old_xdes_pages[m_index] != nullptr)
return DB_SUCCESS;
DBUG_EXECUTE_IF("sys_shrink_buffer_pool_full",
return DB_OUT_OF_MEMORY;);
dberr_t err= DB_SUCCESS;
buf_block_t *block= fsp_get_latched_xdes_page(page_no, mtr, &err);
if (block)
{
buf_block_t *old= buf_LRU_get_free_block(have_no_mutex_soft);
if (!old) return DB_OUT_OF_MEMORY;
memcpy_aligned<UNIV_PAGE_SIZE_MIN>(
old->page.frame, block->page.frame, srv_page_size);
if (m_index >= m_old_xdes_pages.size())
m_old_xdes_pages.resize(m_index + 1);
m_old_xdes_pages[m_index] = old;
}
return err;
}
buf_block_t *search(uint32_t page_no)
{
uint32_t m_index= page_no >> srv_page_size_shift;
if (m_index > m_old_xdes_pages.size())
return nullptr;
return m_old_xdes_pages[m_index];
}
void restore(mtr_t *mtr)
{
for (uint32_t i= 0; i < m_old_xdes_pages.size(); i++)
{
if (m_old_xdes_pages[i] == nullptr) continue;
buf_block_t *block= mtr->get_already_latched(
page_id_t{0, i << srv_page_size_shift},
MTR_MEMO_PAGE_SX_FIX);
ut_ad(block);
memcpy_aligned<UNIV_PAGE_SIZE_MIN>(
block->page.frame, m_old_xdes_pages[i]->page.frame, srv_page_size);
}
}
fsp_xdes_old_page()=default;
~fsp_xdes_old_page()
{
for (uint32_t i= 0; i < m_old_xdes_pages.size(); i++)
if (m_old_xdes_pages[i])
buf_block_free(m_old_xdes_pages[i]);
}
};
/** Update the current descriptor entry with last valid
descriptor entry with skipped descriptor pages
@param header File segment header
@param hdr_offset FSP_FREE or FSP_FREE_FRAG
@param cur_addr current descriptor
@param last_valid_addr last valid descriptor
@param skip_len number of truncated extent descriptor entry
@param mtr mini-transaction
@return error code or DB_SUCCESS */
__attribute__((warn_unused_result))
static
dberr_t fsp_lst_update_skip(
buf_block_t *header, uint16_t hdr_offset,
fil_addr_t cur_addr, fil_addr_t last_valid_addr,
uint32_t skip_len, mtr_t *mtr)
{
dberr_t err= DB_SUCCESS;
buf_block_t *cur= fsp_get_latched_xdes_page(
cur_addr.page, mtr, &err);
if (!cur) return err;
if (last_valid_addr.page == FIL_NULL)
{
/* First node, so update the FIRST pointer of base
with current extent descriptor and update
the PREV pointer of last valid descriptor with
FIL_NULL */
flst_write_addr(
*header,
header->page.frame + hdr_offset + FLST_FIRST,
cur_addr.page, cur_addr.boffset, mtr);
flst_write_addr(
*cur,
cur->page.frame + cur_addr.boffset + FLST_PREV,
last_valid_addr.page, last_valid_addr.boffset, mtr);
}
else
{
buf_block_t *prev= nullptr;
if (cur->page.id().page_no() == last_valid_addr.page)
prev= cur;
else
{
prev= fsp_get_latched_xdes_page(
last_valid_addr.page, mtr, &err);
if (!prev) return err;
}
/* Update the NEXT pointer of last valid extent
descriptor entry with current extent descriptor */
flst_write_addr(
*prev,
prev->page.frame + last_valid_addr.boffset + FLST_NEXT,
cur_addr.page, cur_addr.boffset, mtr);
/* Update the PREV pointer of current extent
descriptor entry with last valid extent descriptor */
flst_write_addr(
*cur,
cur->page.frame + cur_addr.boffset + FLST_PREV,
last_valid_addr.page, last_valid_addr.boffset, mtr);
}
byte *len_bytes= &header->page.frame[hdr_offset + FLST_LEN];
uint32_t len= mach_read_from_4(len_bytes);
ut_ad(len > skip_len);
mtr->write<4>(*header, len_bytes, len - skip_len);
return DB_SUCCESS;
}
/** Write the FLST_NEXT pointer of the last valid node with FIL_NULL
@param header File segment header
@param hdr_offset FSP_HEADER_OFFSET + FSP_FREE or FSP_FREE_FRAG
@param cur_addr current descriptor
@param skip_len number of truncated extent descriptor entry
@param orig_len original length of the list
@param mtr mini-transaction
@return error code or DB_SUCCESS */
__attribute__((warn_unused_result))
dberr_t
fsp_lst_write_end(
buf_block_t *header, uint16_t hdr_offset,
fil_addr_t cur_addr, uint32_t skip_len, uint32_t orig_len,
mtr_t *mtr)
{
dberr_t err= DB_SUCCESS;
byte *len_bytes= &header->page.frame[hdr_offset + FLST_LEN];
uint32_t len= mach_read_from_4(len_bytes);
if (skip_len == 0)
{
func_exit:
if (hdr_offset == FSP_FREE_FRAG + FSP_HEADER_OFFSET)
{
byte *frag_used_byte= &header->page.frame[
FSP_HEADER_OFFSET + FSP_FRAG_N_USED];
uint32_t n_used_frag= mach_read_from_4(frag_used_byte);
/* Update the FSP_FRAG_N_USED value after removing
the truncated pages from FSP_FREE_FRAG list */
if (len != orig_len)
mtr->write<4>(*header, frag_used_byte,
n_used_frag - ((orig_len - len) * 2));
}
return DB_SUCCESS;
}
if (cur_addr.page == FIL_NULL)
{
/* There is no list, so reset base node */
mtr->memset(
header,
FLST_FIRST + FIL_ADDR_PAGE + hdr_offset, 4, 0xff);
mtr->memset(
header,
FLST_LAST + FIL_ADDR_PAGE + hdr_offset, 4, 0xff);
}
else
{
/* Update the FLST_LAST pointer in base node with current
valid extent descriptor and mark the FIL_NULL as next in
current extent descriptr */
flst_write_addr(
*header,
header->page.frame + hdr_offset + FLST_LAST,
cur_addr.page, cur_addr.boffset, mtr);
buf_block_t *cur_block= fsp_get_latched_xdes_page(
cur_addr.page, mtr, &err);
if (!cur_block) return err;
flst_write_addr(
*cur_block,
cur_block->page.frame + cur_addr.boffset + FLST_NEXT,
FIL_NULL, 0, mtr);
}
ut_ad(len >= skip_len);
len-= skip_len;
mtr->write<4>(*header, len_bytes, len);
goto func_exit;
}
/** Remove the truncated extents from the FSP_FREE list
@param header tablespace header
@param hdr_offset FSP_FREE or FSP_FREE_FRAG
@param threshold Remove the pages from the list which is
greater than threshold
@param mtr mini-transaction to remove the extents
@return DB_SUCCESS on success or error code */
__attribute__((warn_unused_result))
static
dberr_t fsp_shrink_list(buf_block_t *header, uint16_t hdr_offset,
uint32_t threshold, mtr_t *mtr)
{
ut_ad(mach_read_from_4(header->page.frame + FIL_PAGE_OFFSET) == 0);
const uint32_t len= flst_get_len(hdr_offset + header->page.frame);
if (len == 0)
return DB_SUCCESS;
buf_block_t *descr_block= nullptr;
dberr_t err= DB_SUCCESS;
uint32_t skip_len= 0;
fil_addr_t last_valid_addr {FIL_NULL, 0}, next_addr{FIL_NULL, 0};
fil_addr_t addr= flst_get_first(header->page.frame + hdr_offset);
for (uint32_t i= len; i > 0; i--)
{
ut_ad(addr.page < fil_system.sys_space->size);
ut_ad(!(addr.page & (srv_page_size - 1)));
if (!descr_block || descr_block->page.id().page_no() != addr.page)
{
descr_block= fsp_get_latched_xdes_page(
addr.page, mtr, &err);
if (!descr_block)
return err;
}
if (addr.page < threshold)
{
/* Update only if only non-truncated page */
if (skip_len)
{
err= fsp_lst_update_skip(
header, hdr_offset, addr, last_valid_addr, skip_len, mtr);
if (err) return err;
skip_len= 0;
}
if (threshold <= xdes_get_offset(
descr_block->page.frame + addr.boffset - XDES_FLST_NODE))
skip_len++;
else last_valid_addr= addr;
}
else skip_len++;
next_addr= flst_get_next_addr(
descr_block->page.frame + addr.boffset);
if (next_addr.page != addr.page && addr.page >= threshold)
{
mtr->release_last_page();
descr_block= nullptr;
}
if (next_addr.page == FIL_NULL)
{
err= fsp_lst_write_end(header, hdr_offset, last_valid_addr,
skip_len, len, mtr);
break;
}
addr= next_addr;
}
ut_d(if (err == DB_SUCCESS) flst_validate(header, hdr_offset, mtr););
return err;
}
/** Reset the XDES_BITMAP for the truncated extents
@param space tablespace to be truncated
@param threshold truncated size
@param mtr mini-transaction to reset XDES_BITMAP
@return DB_SUCCESS or error code on failure */
__attribute__((warn_unused_result))
static
dberr_t fsp_xdes_reset(fil_space_t *space, uint32_t threshold, mtr_t *mtr)
{
if (!(threshold & (srv_page_size - 1)))
return DB_SUCCESS;
uint32_t cur_descr_page= xdes_calc_descriptor_page(0, threshold);
ulint descr_offset= XDES_ARR_OFFSET + XDES_SIZE
* xdes_calc_descriptor_index(0, threshold);
ulint last_descr_offset= XDES_ARR_OFFSET + XDES_SIZE
* xdes_calc_descriptor_index(
0, (cur_descr_page + srv_page_size - 1));
last_descr_offset+= XDES_SIZE;
dberr_t err= DB_SUCCESS;
buf_block_t *block= fsp_get_latched_xdes_page(
cur_descr_page, mtr, &err);
if (!block)
return err;
mtr->memset(
block, descr_offset, (last_descr_offset - descr_offset), 0);
return err;
}
/** This function does 2 things by traversing all the used
extents in the system tablespace
1) Find the last used extent
2) Store the old page frame of the "to be modified" extent
descriptor pages.
@param space system tablespace
@param last_used_extent value is 0 in case of finding the last used
extent; else it could be last used extent
@param old_xdes_entry nullptr or object to store the
old page content of "to be modified"
extent descriptor pages
@return DB_SUCCESS or error code */
__attribute__((warn_unused_result))
dberr_t fsp_traverse_extents(
fil_space_t *space, uint32_t *last_used_extent, mtr_t *mtr,
fsp_xdes_old_page *old_xdes_entry= nullptr)
{
dberr_t err= DB_SUCCESS;
bool find_last_used_extent= (old_xdes_entry == nullptr);
uint32_t threshold= *last_used_extent;
uint32_t last_descr_page_no= xdes_calc_descriptor_page(
0, space->free_limit - 1);
if (find_last_used_extent)
*last_used_extent= space->free_limit;
else
{
err= old_xdes_entry->insert(0, mtr);
if (err) return err;
if (threshold & (srv_page_size - 1))
err= old_xdes_entry->insert(
xdes_calc_descriptor_page(0, threshold), mtr);
}
buf_block_t *block= nullptr;
std::vector<uint32_t> modified_xdes;
for (uint32_t cur_extent=
((space->free_limit - 1)/ FSP_EXTENT_SIZE) * FSP_EXTENT_SIZE;
cur_extent >= threshold;)
{
if (!block)
{
block= fsp_get_latched_xdes_page(last_descr_page_no, mtr, &err);
if (!block) return err;
}
xdes_t *descr= XDES_ARR_OFFSET + XDES_SIZE
* xdes_calc_descriptor_index(0, cur_extent)
+ block->page.frame;
if (find_last_used_extent)
{
ulint state= xdes_get_state(descr);
if (state == XDES_FREE)
*last_used_extent= cur_extent;
else if (state == XDES_FREE_FRAG &&
!(cur_extent & (srv_page_size - 1)) &&
xdes_get_n_used(descr) == 2)
/* Extent Descriptor Page */
*last_used_extent= cur_extent;
else return DB_SUCCESS;
}
else
{
fil_addr_t prev_addr= flst_get_prev_addr(
descr + XDES_FLST_NODE);
ut_ad(prev_addr.page < fil_system.sys_space->size ||
prev_addr.page == FIL_NULL);
ut_ad(prev_addr.page == FIL_NULL ||
!(prev_addr.page & (srv_page_size - 1)));
fil_addr_t next_addr= flst_get_next_addr(
descr + XDES_FLST_NODE);
ut_ad(next_addr.page < fil_system.sys_space->size ||
next_addr.page == FIL_NULL);
ut_ad(next_addr.page == FIL_NULL ||
!(next_addr.page & (srv_page_size - 1)));
if (prev_addr.page < threshold)
modified_xdes.push_back(prev_addr.page);
if (next_addr.page < threshold)
modified_xdes.push_back(next_addr.page);
}
cur_extent-= FSP_EXTENT_SIZE;
uint32_t cur_descr_page= xdes_calc_descriptor_page(0, cur_extent);
if (last_descr_page_no != cur_descr_page)
{
if (last_descr_page_no >= threshold)
mtr->release_last_page();
last_descr_page_no= cur_descr_page;
block= nullptr;
}
}
if (!find_last_used_extent)
{
for (auto it : modified_xdes)
{
err= old_xdes_entry->insert(it, mtr);
if (err) return err;
}
modified_xdes.clear();
}
return err;
}
#ifdef UNIV_DEBUG
/** Validate the system tablespace list */
__attribute__((warn_unused_result))
dberr_t fsp_sys_tablespace_validate()
{
/* Validate all FSP list in system tablespace */
mtr_t local_mtr;
dberr_t err= DB_SUCCESS;
local_mtr.start();
if (buf_block_t *header= fsp_get_header(
fil_system.sys_space, &local_mtr, &err))
{
flst_validate(header, FSP_FREE + FSP_HEADER_OFFSET, &local_mtr);
flst_validate(header, FSP_FREE_FRAG + FSP_HEADER_OFFSET,
&local_mtr);
flst_validate(header, FSP_HEADER_OFFSET + FSP_FULL_FRAG,
&local_mtr);
flst_validate(header, FSP_HEADER_OFFSET + FSP_SEG_INODES_FULL,
&local_mtr);
flst_validate(header, FSP_HEADER_OFFSET + FSP_SEG_INODES_FREE,
&local_mtr);
}
local_mtr.commit();
return err;
}
#endif /* UNIV_DEBUG */
void fsp_system_tablespace_truncate()
{
uint32_t last_used_extent= 0;
fil_space_t *space= fil_system.sys_space;
mtr_t mtr;
mtr.start();
mtr.x_lock_space(space);
dberr_t err= fsp_traverse_extents(space, &last_used_extent, &mtr);
if (err != DB_SUCCESS)
{
func_exit:
sql_print_warning("InnoDB: Cannot shrink the system tablespace "
"due to %s", ut_strerr(err));
mtr.commit();
return;
}
uint32_t fixed_size= srv_sys_space.get_min_size(),
header_size= space->size_in_header;
mtr.commit();
if (last_used_extent >= header_size || fixed_size >= header_size)
/* Tablespace is being used within fixed size */
return;
/* Set fixed size as threshold to truncate */
if (fixed_size > last_used_extent)
last_used_extent= fixed_size;
my_bool old_dblwr_buf= srv_use_doublewrite_buf;
/* Flush all pages in buffer pool, so that it doesn't have to
use doublewrite buffer and disable dblwr and there should
be enough space in redo log */
log_make_checkpoint();
srv_use_doublewrite_buf= false;
buf_block_t *header= nullptr;
ut_ad(!fsp_sys_tablespace_validate());
mtr.start();
mtr.x_lock_space(space);
{
/* Take the rough estimation of modified extent
descriptor page and store their old state */
fsp_xdes_old_page old_xdes_list;
err= fsp_traverse_extents(space, &last_used_extent, &mtr, &old_xdes_list);
if (err == DB_OUT_OF_MEMORY)
{
mtr.commit();
sql_print_warning("InnoDB: Cannot shrink the system "
"tablespace from " UINT32PF" to "
UINT32PF " pages due to insufficient "
"innodb_buffer_pool_size", space->size,
last_used_extent);
return;
}
sql_print_information("InnoDB: Truncating system tablespace from "
UINT32PF " to " UINT32PF " pages", space->size,
last_used_extent);
header= fsp_get_latched_xdes_page(0, &mtr, &err);
if (!header)
goto func_exit;
mtr.write<4, mtr_t::FORCED>(
*header, FSP_HEADER_OFFSET + FSP_SIZE + header->page.frame,
last_used_extent);
if (space->free_limit > last_used_extent)
mtr.write<4,mtr_t::MAYBE_NOP>(*header, FSP_HEADER_OFFSET
+ FSP_FREE_LIMIT + header->page.frame,
last_used_extent);
err= fsp_shrink_list(
header, FSP_HEADER_OFFSET + FSP_FREE, last_used_extent, &mtr);
if (err != DB_SUCCESS)
goto func_exit;
err= fsp_shrink_list(
header, FSP_HEADER_OFFSET + FSP_FREE_FRAG, last_used_extent, &mtr);
if (err != DB_SUCCESS)
goto func_exit;
err= fsp_xdes_reset(space, last_used_extent, &mtr);
if (err != DB_SUCCESS)
goto func_exit;
mtr.trim_pages(page_id_t(0, last_used_extent));
size_t shrink_redo_size= mtr.get_log_size();
DBUG_EXECUTE_IF("mtr_log_max_size", goto mtr_max;);
if (shrink_redo_size >
(2 << 20) - 8 /* encryption nonce */ - 5 /* EOF, checksum */)
{
#ifndef DBUG_OFF
mtr_max:
#endif
/* Replace the modified copy from buffer pool with
original copy of the pages. */
old_xdes_list.restore(&mtr);
mtr.discard_modifications();
mtr.commit();
ut_ad(!fsp_sys_tablespace_validate());
sql_print_error(
"InnoDB: Cannot shrink the system tablespace "
"because the mini-transaction log size (%zu bytes) "
"exceeds 2 MiB", shrink_redo_size + 8 + 5);
return;
}
}
mysql_mutex_lock(&fil_system.mutex);
space->size= last_used_extent;
if (space->free_limit > last_used_extent)
space->free_limit= space->size;
space->free_len= flst_get_len(
FSP_HEADER_OFFSET + FSP_FREE+ header->page.frame);
/* Last file new size after truncation */
uint32_t new_last_file_size=
last_used_extent -
(fixed_size - srv_sys_space.m_files.at(
srv_sys_space.m_files.size() - 1).param_size());
space->size_in_header= space->size;
space->is_being_truncated= true;
space->set_stopping();
space->chain.end->size= new_last_file_size;
srv_sys_space.set_last_file_size(new_last_file_size);
mysql_mutex_unlock(&fil_system.mutex);
mtr.commit_shrink(*space);
sql_print_information("InnoDB: System tablespace truncated successfully");
srv_use_doublewrite_buf= old_dblwr_buf;
}

View file

@ -100,6 +100,7 @@ SysTablespace::parse_params(
ut_ad(m_last_file_size_max == 0);
ut_ad(!m_auto_extend_last_file);
ut_ad(!m_auto_shrink);
char* new_str = mem_strdup(filepath_spec);
char* str = new_str;
@ -146,6 +147,11 @@ SysTablespace::parse_params(
str = parse_units(str, &size);
}
if (0 == strncmp(str, ":autoshrink",
(sizeof ":autoshrink") - 1)) {
str += (sizeof ":autoshrink") - 1;
}
if (*str != '\0') {
ut_free(new_str);
ib::error()
@ -266,6 +272,12 @@ SysTablespace::parse_params(
str = parse_units(str, &m_last_file_size_max);
}
if (0 == strncmp(str, ":autoshrink",
(sizeof ":autoshrink") - 1)) {
str += (sizeof ":autoshrink") - 1;
m_auto_shrink = true;
}
if (*str != '\0') {
ut_free(new_str);
ib::error() << "syntax error in file path or"
@ -333,6 +345,7 @@ SysTablespace::shutdown()
m_created_new_raw = 0;
m_is_tablespace_full = false;
m_sanity_checks_done = false;
m_auto_shrink = false;
}
/** Verify the size of the physical file.
@ -974,6 +987,7 @@ SysTablespace::normalize_size()
for (files_t::iterator it = m_files.begin(); it != end; ++it) {
it->m_size <<= (20U - srv_page_size_shift);
it->m_user_param_size = it->m_size;
}
m_last_file_size_max <<= (20U - srv_page_size_shift);

View file

@ -35,8 +35,8 @@ Created 11/28/1995 Heikki Tuuri
@param[in] page page number
@param[in] boffset byte offset
@param[in,out] mtr mini-transaction */
static void flst_write_addr(const buf_block_t& block, byte *faddr,
uint32_t page, uint16_t boffset, mtr_t* mtr)
void flst_write_addr(const buf_block_t &block, byte *faddr,
uint32_t page, uint16_t boffset, mtr_t *mtr)
{
ut_ad(mtr->memo_contains_page_flagged(faddr, MTR_MEMO_PAGE_X_FIX |
MTR_MEMO_PAGE_SX_FIX));

View file

@ -317,6 +317,8 @@ public:
void set_space_id(uint32_t space_id) { m_space_id= space_id; }
void set_flags(uint32_t flags) { m_flags = flags; }
uint32_t param_size() const { return m_user_param_size; }
private:
/** Free the filepath buffer. */
void free_filepath();
@ -406,6 +408,9 @@ private:
pages in SysTablespace::normalize_size() */
uint32_t m_size;
/** Size in pages; Initial parameter size */
uint32_t m_user_param_size;
/** ordinal position of this datafile in the tablespace */
ulint m_order;

View file

@ -573,6 +573,9 @@ inline void fsp_init_file_page(
mtr->init(block);
}
/** Truncate the system tablespace */
void fsp_system_tablespace_truncate();
#ifndef UNIV_DEBUG
# define fsp_init_file_page(space, block, mtr) fsp_init_file_page(block, mtr)
#endif

View file

@ -119,6 +119,12 @@ public:
return(m_auto_extend_last_file);
}
/** @return auto shrink */
bool can_auto_shrink() const
{
return m_auto_shrink;
}
/** Set the last file size.
@param[in] size the size to set */
void set_last_file_size(uint32_t size)
@ -143,6 +149,16 @@ public:
<< (20 - srv_page_size_shift);
}
/**
@return user specified tablespace size */
uint32_t get_min_size() const
{
uint32_t full_size= 0;
for (uint32_t i= 0; i < m_files.size(); i++)
full_size+= m_files.at(i).m_user_param_size;
return full_size;
}
/**
@return next increment size */
uint32_t get_increment() const;
@ -251,6 +267,10 @@ private:
/** if false, then sanity checks are still pending */
bool m_sanity_checks_done;
/** Shrink the system tablespace if the value is
enabled */
bool m_auto_shrink;
};
/* GLOBAL OBJECTS */

View file

@ -148,6 +148,15 @@ inline fil_addr_t flst_get_prev_addr(const flst_node_t *node)
return flst_read_addr(node + FLST_PREV);
}
/** Write a file address.
@param[in] block file page
@param[in,out] faddr file address location
@param[in] page page number
@param[in] boffset byte offset
@param[in,out] mtr mini-transaction */
void flst_write_addr(const buf_block_t &block, byte *faddr,
uint32_t page, uint16_t boffset, mtr_t *mtr);
# ifdef UNIV_DEBUG
/** Validate a file-based list. */
void flst_validate(const buf_block_t *base, uint16_t boffset, mtr_t *mtr);

View file

@ -235,7 +235,10 @@ private:
lsn_t lsn;
/** truncated size of the tablespace, or 0 if not truncated */
unsigned pages;
} truncated_undo_spaces[127];
};
trunc truncated_undo_spaces[127];
trunc truncated_sys_space;
public:
/** The contents of the doublewrite buffer */
@ -260,6 +263,12 @@ public:
pages_it= pages.end();
}
/** Allow to apply system tablespace truncate redo log only
if the size to be extended is lesser than current size.
@retval true To apply the truncate shrink redo log record
@retval false otherwise */
bool check_sys_truncate();
private:
/** Attempt to initialize a page based on redo log records.
@param p iterator

View file

@ -1319,6 +1319,7 @@ void recv_sys_t::create()
recv_max_page_lsn = 0;
memset(truncated_undo_spaces, 0, sizeof truncated_undo_spaces);
truncated_sys_space= {0, 0};
UT_LIST_INIT(blocks, &buf_block_t::unzip_LRU);
}
@ -2665,23 +2666,28 @@ restart:
cl= l.copy_if_needed(iv, decrypt_buf, recs, rlen);
if (rlen == 1 && *cl == TRIM_PAGES)
{
#if 0 /* For now, we can only truncate an undo log tablespace */
if (UNIV_UNLIKELY(!space_id || !page_no))
goto record_corrupted;
#else
if (!srv_is_undo_tablespace(space_id) ||
page_no != SRV_UNDO_TABLESPACE_SIZE_IN_PAGES)
goto record_corrupted;
if (srv_is_undo_tablespace(space_id))
{
if (page_no != SRV_UNDO_TABLESPACE_SIZE_IN_PAGES)
goto record_corrupted;
/* The entire undo tablespace will be reinitialized by
innodb_undo_log_truncate=ON. Discard old log for all
pages. */
trim({space_id, 0}, start_lsn);
truncated_undo_spaces[space_id - srv_undo_space_id_start]=
{ start_lsn, page_no};
}
else if (space_id != 0) goto record_corrupted;
else
{
/* Shrink the system tablespace */
trim({space_id, page_no}, start_lsn);
truncated_sys_space= {start_lsn, page_no};
}
static_assert(UT_ARR_SIZE(truncated_undo_spaces) ==
TRX_SYS_MAX_UNDO_SPACES, "compatibility");
/* The entire undo tablespace will be reinitialized by
innodb_undo_log_truncate=ON. Discard old log for all pages. */
trim({space_id, 0}, start_lsn);
truncated_undo_spaces[space_id - srv_undo_space_id_start]=
{ start_lsn, page_no };
if (!store && undo_space_trunc)
if (!store && undo_space_trunc && space_id)
undo_space_trunc(space_id);
#endif
last_offset= 1; /* the next record must not be same_page */
continue;
}
@ -3723,6 +3729,30 @@ void recv_sys_t::apply(bool last_batch)
apply_log_recs= true;
if (truncated_sys_space.lsn)
{
trim({0, truncated_sys_space.pages}, truncated_sys_space.lsn);
fil_node_t *file= UT_LIST_GET_LAST(fil_system.sys_space->chain);
ut_ad(file->is_open());
/* Last file new size after truncation */
uint32_t new_last_file_size=
truncated_sys_space.pages -
(srv_sys_space.get_min_size()
- srv_sys_space.m_files.at(
srv_sys_space.m_files.size() - 1). param_size());
os_file_truncate(
file->name, file->handle,
os_offset_t{new_last_file_size} << srv_page_size_shift, true);
mysql_mutex_lock(&fil_system.mutex);
fil_system.sys_space->size= truncated_sys_space.pages;
fil_system.sys_space->chain.end->size= new_last_file_size;
srv_sys_space.set_last_file_size(new_last_file_size);
truncated_sys_space={0, 0};
mysql_mutex_unlock(&fil_system.mutex);
}
for (auto id= srv_undo_tablespaces_open; id--;)
{
const trunc& t= truncated_undo_spaces[id];

View file

@ -495,12 +495,11 @@ void mtr_t::commit_shrink(fil_space_t &space)
ut_ad(is_active());
ut_ad(!high_level_read_only);
ut_ad(m_modifications);
ut_ad(m_made_dirty);
ut_ad(!space.id || m_made_dirty);
ut_ad(!m_memo.empty());
ut_ad(!recv_recovery_is_on());
ut_ad(m_log_mode == MTR_LOG_ALL);
ut_ad(!m_freed_pages);
ut_ad(UT_LIST_GET_LEN(space.chain) == 1);
log_write_and_flush_prepare();
m_latch_ex= true;
@ -515,8 +514,9 @@ void mtr_t::commit_shrink(fil_space_t &space)
ut_ad(log_sys.latch.is_write_locked());
#endif
os_file_truncate(space.chain.start->name, space.chain.start->handle,
os_offset_t{space.size} << srv_page_size_shift, true);
os_file_truncate(
space.chain.end->name, space.chain.end->handle,
os_offset_t{space.chain.end->size} << srv_page_size_shift, true);
space.clear_freed_ranges();

View file

@ -1733,6 +1733,13 @@ dberr_t srv_start(bool create_new_db)
ut_ad(high_level_read_only
|| srv_force_recovery < SRV_FORCE_NO_UNDO_LOG_SCAN);
if (!high_level_read_only
&& srv_sys_space.can_auto_shrink()) {
fsp_system_tablespace_truncate();
DBUG_EXECUTE_IF("crash_after_sys_truncate",
return srv_init_abort(DB_ERROR););
}
/* Validate a few system page types that were left
uninitialized before MySQL or MariaDB 5.5. */
if (!high_level_read_only