MDEV-11254: innodb-use-trim has no effect in 10.2

Problem was that implementation merged from 10.1 was incompatible
with InnoDB 5.7.

buf0buf.cc: Add functions to return should we punch hole and
how big.

buf0flu.cc: Add written page to IORequest

fil0fil.cc: Remove unneeded status call and add test is
sparse files and punch hole supported by file system when
tablespace is created. Add call to get file system
block size. Used file node is added to IORequest. Added
functions to check is punch hole supported and setting
punch hole.

ha_innodb.cc: Remove unneeded status variables (trim512-32768)
and trim_op_saved. Deprecate innodb_use_trim and
set it ON by default. Add function to set innodb-use-trim
dynamically.

dberr.h: Add error code DB_IO_NO_PUNCH_HOLE
if punch hole operation fails.

fil0fil.h: Add punch_hole variable to fil_space_t and
block size to fil_node_t.

os0api.h: Header to helper functions on buf0buf.cc and
fil0fil.cc for os0file.h

os0file.h: Remove unneeded m_block_size from IORequest
and add bpage to IORequest to know actual size of
the block and m_fil_node to know tablespace file
system block size and does it support punch hole.

os0file.cc: Add function punch_hole() to IORequest
to do punch_hole operation,
get the file system block size and determine
does file system support sparse files (for punch hole).

page0size.h: remove implicit copy disable and
use this implicit copy to implement copy_from()
function.

buf0dblwr.cc, buf0flu.cc, buf0rea.cc, fil0fil.cc, fil0fil.h,
os0file.h, os0file.cc, log0log.cc, log0recv.cc:
Remove unneeded write_size parameter from fil_io
calls.

srv0mon.h, srv0srv.h, srv0mon.cc: Remove unneeded
trim512-trim32678 status variables. Removed
these from monitor tests.
This commit is contained in:
Jan Lindström 2017-01-24 14:40:58 +02:00
parent 0d107a85b3
commit 6495806e59
34 changed files with 885 additions and 336 deletions

View file

@ -0,0 +1 @@
--loose-innodb-use-trim=0

View file

@ -0,0 +1,4 @@
if (!`SELECT COUNT(*) FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE LOWER(variable_name) = 'innodb_have_punch_hole' AND variable_value = 'ON'`)
{
--skip Test requires InnoDB compiled with fallocate(FALLOC_PUNCH_HOLE| FALLOC_KEEP_SIZE)
}

View file

@ -0,0 +1,20 @@
set global innodb_compression_algorithm = 1;
create table innodb_page_compressed (c1 int not null primary key auto_increment, b char(200), c char(200), d char(200)) engine=innodb page_compressed=1 page_compression_level=9;
show warnings;
Level Code Message
create procedure innodb_insert_proc (repeat_count int)
begin
declare current_num int;
set current_num = 0;
while current_num < repeat_count do
insert into innodb_page_compressed values (NULL,repeat('A',150),repeat('AB',75),repeat('B', 175));
set current_num = current_num + 1;
end while;
end//
commit;
set autocommit=0;
call innodb_insert_proc(16000);
commit;
set autocommit=1;
DROP PROCEDURE innodb_insert_proc;
DROP TABLE innodb_page_compressed;

View file

@ -181,16 +181,8 @@ compress_pages_decompressed disabled
compression_pad_increments disabled
compression_pad_decrements disabled
compress_saved disabled
compress_trim_sect512 disabled
compress_trim_sect1024 disabled
compress_trim_sect2048 disabled
compress_trim_sect4096 disabled
compress_trim_sect8192 disabled
compress_trim_sect16384 disabled
compress_trim_sect32768 disabled
compress_pages_page_compressed disabled
compress_page_compressed_trim_op disabled
compress_page_compressed_trim_op_saved disabled
compress_pages_page_decompressed disabled
compress_pages_page_compression_error disabled
compress_pages_encrypted disabled

View file

@ -216,16 +216,8 @@ compress_pages_decompressed compression 0 NULL NULL NULL 0 NULL NULL NULL NULL N
compression_pad_increments compression 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of times padding is incremented to avoid compression failures
compression_pad_decrements compression 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of times padding is decremented due to good compressibility
compress_saved compression 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of bytes saved by page compression
compress_trim_sect512 compression 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of sect-512 TRIMed by page compression
compress_trim_sect1024 compression 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of sect-1024 TRIMed by page compression
compress_trim_sect2048 compression 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of sect-2048 TRIMed by page compression
compress_trim_sect4096 compression 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of sect-4K TRIMed by page compression
compress_trim_sect8192 compression 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of sect-8K TRIMed by page compression
compress_trim_sect16384 compression 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of sect-16K TRIMed by page compression
compress_trim_sect32768 compression 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of sect-32K TRIMed by page compression
compress_pages_page_compressed compression 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of pages compressed by page compression
compress_page_compressed_trim_op compression 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of TRIM operation performed by page compression
compress_page_compressed_trim_op_saved compression 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of TRIM operation saved by page compression
compress_pages_page_decompressed compression 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of pages decompressed by page compression
compress_pages_page_compression_error compression 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of page compression errors
compress_pages_encrypted compression 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL disabled counter Number of pages encrypted

View file

@ -0,0 +1 @@
--loose-innodb-use-trim=1

View file

@ -0,0 +1,44 @@
--source include/have_innodb.inc
--source include/have_innodb_punchhole.inc
--disable_query_log
--disable_warnings
let $innodb_compression_algorithm_orig=`SELECT @@innodb_compression_algorithm`;
--enable_warnings
--enable_query_log
# zlib
set global innodb_compression_algorithm = 1;
create table innodb_page_compressed (c1 int not null primary key auto_increment, b char(200), c char(200), d char(200)) engine=innodb page_compressed=1 page_compression_level=9;
show warnings;
delimiter //;
create procedure innodb_insert_proc (repeat_count int)
begin
declare current_num int;
set current_num = 0;
while current_num < repeat_count do
insert into innodb_page_compressed values (NULL,repeat('A',150),repeat('AB',75),repeat('B', 175));
set current_num = current_num + 1;
end while;
end//
delimiter ;//
commit;
set autocommit=0;
call innodb_insert_proc(16000);
commit;
set autocommit=1;
let $wait_condition= SELECT variable_value > 5 FROM information_schema.global_status WHERE variable_name = 'innodb_num_page_compressed_trim_op';
--source include/wait_condition.inc
DROP PROCEDURE innodb_insert_proc;
DROP TABLE innodb_page_compressed;
--disable_query_log
--disable_warnings
EVAL SET GLOBAL innodb_compression_algorithm = $innodb_compression_algorithm_orig;
--enable_warnings
--enable_query_log

View file

@ -181,16 +181,8 @@ compress_pages_decompressed disabled
compression_pad_increments disabled
compression_pad_decrements disabled
compress_saved disabled
compress_trim_sect512 disabled
compress_trim_sect1024 disabled
compress_trim_sect2048 disabled
compress_trim_sect4096 disabled
compress_trim_sect8192 disabled
compress_trim_sect16384 disabled
compress_trim_sect32768 disabled
compress_pages_page_compressed disabled
compress_page_compressed_trim_op disabled
compress_page_compressed_trim_op_saved disabled
compress_pages_page_decompressed disabled
compress_pages_page_compression_error disabled
compress_pages_encrypted disabled

View file

@ -181,16 +181,8 @@ compress_pages_decompressed disabled
compression_pad_increments disabled
compression_pad_decrements disabled
compress_saved disabled
compress_trim_sect512 disabled
compress_trim_sect1024 disabled
compress_trim_sect2048 disabled
compress_trim_sect4096 disabled
compress_trim_sect8192 disabled
compress_trim_sect16384 disabled
compress_trim_sect32768 disabled
compress_pages_page_compressed disabled
compress_page_compressed_trim_op disabled
compress_page_compressed_trim_op_saved disabled
compress_pages_page_decompressed disabled
compress_pages_page_compression_error disabled
compress_pages_encrypted disabled

View file

@ -181,16 +181,8 @@ compress_pages_decompressed disabled
compression_pad_increments disabled
compression_pad_decrements disabled
compress_saved disabled
compress_trim_sect512 disabled
compress_trim_sect1024 disabled
compress_trim_sect2048 disabled
compress_trim_sect4096 disabled
compress_trim_sect8192 disabled
compress_trim_sect16384 disabled
compress_trim_sect32768 disabled
compress_pages_page_compressed disabled
compress_page_compressed_trim_op disabled
compress_page_compressed_trim_op_saved disabled
compress_pages_page_decompressed disabled
compress_pages_page_compression_error disabled
compress_pages_encrypted disabled

View file

@ -181,16 +181,8 @@ compress_pages_decompressed disabled
compression_pad_increments disabled
compression_pad_decrements disabled
compress_saved disabled
compress_trim_sect512 disabled
compress_trim_sect1024 disabled
compress_trim_sect2048 disabled
compress_trim_sect4096 disabled
compress_trim_sect8192 disabled
compress_trim_sect16384 disabled
compress_trim_sect32768 disabled
compress_pages_page_compressed disabled
compress_page_compressed_trim_op disabled
compress_page_compressed_trim_op_saved disabled
compress_pages_page_decompressed disabled
compress_pages_page_compression_error disabled
compress_pages_encrypted disabled

View file

@ -1,12 +1,14 @@
SET @start_use_trim = @@global.innodb_use_trim;
SELECT @start_use_trim;
@start_use_trim
0
1
SELECT COUNT(@@GLOBAL.innodb_use_trim);
COUNT(@@GLOBAL.innodb_use_trim)
1
1 Expected
SET @@GLOBAL.innodb_use_trim=1;
Warnings:
Warning 131 Using innodb_use_trim is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
SELECT COUNT(@@GLOBAL.innodb_use_trim);
COUNT(@@GLOBAL.innodb_use_trim)
1
@ -28,6 +30,8 @@ COUNT(VARIABLE_VALUE)
1
1 Expected
SET @@global.innodb_use_trim = @start_use_trim;
Warnings:
Warning 131 Using innodb_use_trim is deprecated and the parameter may be removed in future releases. See http://dev.mysql.com/doc/refman/5.7/en/innodb-file-format.html
SELECT @@global.innodb_use_trim;
@@global.innodb_use_trim
0
1

View file

@ -2612,12 +2612,12 @@ READ_ONLY YES
COMMAND_LINE_ARGUMENT NONE
VARIABLE_NAME INNODB_USE_TRIM
SESSION_VALUE NULL
GLOBAL_VALUE OFF
GLOBAL_VALUE ON
GLOBAL_VALUE_ORIGIN COMPILE-TIME
DEFAULT_VALUE OFF
DEFAULT_VALUE ON
VARIABLE_SCOPE GLOBAL
VARIABLE_TYPE BOOLEAN
VARIABLE_COMMENT Use trim. Default FALSE.
VARIABLE_COMMENT Deallocate (punch_hole|trim) unused portions of the page compressed page (on by default)
NUMERIC_MIN_VALUE NULL
NUMERIC_MAX_VALUE NULL
NUMERIC_BLOCK_SIZE NULL

View file

@ -35,6 +35,7 @@ Created 11/5/1995 Heikki Tuuri
#include "page0size.h"
#include "buf0buf.h"
#include "os0api.h"
#ifdef UNIV_NONINL
#include "buf0buf.ic"
@ -7659,4 +7660,30 @@ buf_page_decrypt_after_read(
return (success);
}
/**
Should we punch hole to deallocate unused portion of the page.
@param[in] bpage Page control block
@return true if punch hole should be used, false if not */
bool
buf_page_should_punch_hole(
const buf_page_t* bpage)
{
return (bpage->real_size != bpage->size.physical());
}
/**
Calculate the length of trim (punch_hole) operation.
@param[in] bpage Page control block
@param[in] write_length Write length
@return length of the trim or zero. */
ulint
buf_page_get_trim_length(
const buf_page_t* bpage,
ulint write_length)
{
return (bpage->size.physical() - write_length);
}
#endif /* !UNIV_INNOCHECKSUM */

View file

@ -591,7 +591,7 @@ buf_dblwr_process(void)
dberr_t err = fil_io(
request, true,
page_id, page_size,
0, page_size.physical(), read_buf, NULL, NULL);
0, page_size.physical(), read_buf, NULL);
if (err != DB_SUCCESS) {
ib::warn()
@ -679,7 +679,7 @@ buf_dblwr_process(void)
fil_io(write_request, true, page_id, page_size,
0, page_size.physical(),
const_cast<byte*>(page), NULL, NULL);
const_cast<byte*>(page), NULL);
ib::info() << "Recovered page " << page_id
<< " from the doublewrite buffer.";
@ -912,7 +912,7 @@ buf_dblwr_write_block_to_datafile(
type |= IORequest::DO_NOT_WAKE;
}
IORequest request(type);
IORequest request(type, const_cast<buf_page_t*>(bpage));
/* We request frame here to get correct buffer in case of
encryption and/or page compression */
@ -924,7 +924,7 @@ buf_dblwr_write_block_to_datafile(
fil_io(request, sync, bpage->id, bpage->size, 0,
bpage->size.physical(),
(void*) frame,
(void*) bpage, NULL);
(void*) bpage);
} else {
ut_ad(!bpage->size.is_compressed());
@ -938,8 +938,8 @@ buf_dblwr_write_block_to_datafile(
buf_dblwr_check_page_lsn(block->frame);
fil_io(request,
sync, bpage->id, bpage->size, 0, bpage->size.physical(),
frame, block, (ulint *)&bpage->write_size);
sync, bpage->id, bpage->size, 0, bpage->real_size,
frame, block);
}
}
@ -1041,7 +1041,7 @@ try_again:
fil_io(IORequestWrite, true,
page_id_t(TRX_SYS_SPACE, buf_dblwr->block1), univ_page_size,
0, len, (void*) write_buf, NULL, NULL);
0, len, (void*) write_buf, NULL);
if (buf_dblwr->first_free <= TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
/* No unwritten pages in the second block. */
@ -1057,7 +1057,7 @@ try_again:
fil_io(IORequestWrite, true,
page_id_t(TRX_SYS_SPACE, buf_dblwr->block2), univ_page_size,
0, len, (void*) write_buf, NULL, NULL);
0, len, (void*) write_buf, NULL);
flush:
/* increment the doublewrite flushed pages counter */
@ -1292,7 +1292,6 @@ retry:
0,
univ_page_size.physical(),
(void *)(buf_dblwr->write_buf + univ_page_size.physical() * i),
NULL,
NULL);
} else {
/* It is a regular page. Write it directly to the
@ -1304,7 +1303,6 @@ retry:
0,
univ_page_size.physical(),
(void*) frame,
NULL,
NULL);
}

View file

@ -1093,11 +1093,11 @@ buf_flush_write_block_low(
ulint type = IORequest::WRITE | IORequest::DO_NOT_WAKE;
IORequest request(type);
IORequest request(type, bpage);
fil_io(request,
sync, bpage->id, bpage->size, 0, bpage->size.physical(),
frame, bpage, NULL);
frame, bpage);
} else {
if (flush_type == BUF_FLUSH_SINGLE_PAGE) {
buf_dblwr_write_single_page(bpage, sync);

View file

@ -182,7 +182,7 @@ buf_read_page_low(
*err = fil_io(
request, sync, page_id, page_size, 0, page_size.physical(),
dst, bpage, NULL);
dst, bpage);
if (sync) {
thd_wait_end(NULL);

View file

@ -58,6 +58,7 @@ Created 10/25/1995 Heikki Tuuri
#include "srv0start.h"
#include "trx0purge.h"
#include "ut0new.h"
#include "os0api.h"
/** Tries to close a file in the LRU list. The caller must hold the fil_sys
mutex.
@ -280,7 +281,7 @@ fil_read(
void* buf)
{
return(fil_io(IORequestRead, true, page_id, page_size,
byte_offset, len, buf, NULL, NULL));
byte_offset, len, buf, NULL));
}
/** Writes data to a space from a buffer. Remember that the possible incomplete
@ -308,7 +309,7 @@ fil_write(
ut_ad(!srv_read_only_mode);
return(fil_io(IORequestWrite, true, page_id, page_size,
byte_offset, len, buf, NULL, NULL));
byte_offset, len, buf, NULL));
}
/*******************************************************************//**
@ -524,20 +525,6 @@ fil_node_create_low(
node->space = space;
os_file_stat_t stat_info;
#ifdef UNIV_DEBUG
dberr_t err =
#endif /* UNIV_DEBUG */
os_file_get_status(
node->name, &stat_info, false,
fsp_is_system_temporary(space->id) ? true : srv_read_only_mode);
ut_ad(err == DB_SUCCESS);
node->block_size = stat_info.block_size;
node->atomic_write = atomic_write;
UT_LIST_ADD_LAST(space->chain, node);
@ -1043,7 +1030,7 @@ fil_write_zeros(
err = os_aio(
request, OS_AIO_SYNC, node->name,
node->handle, buf, offset, n_bytes, read_only_mode,
NULL, NULL, NULL);
NULL, NULL);
if (err != DB_SUCCESS) {
break;
@ -3758,12 +3745,31 @@ fil_ibd_create(
success = true;
}
#endif /* HAVE_POSIX_FALLOCATE */
if (!success)
{
if (!success) {
success = os_file_set_size(
path, file, size * UNIV_PAGE_SIZE, srv_read_only_mode);
}
/* Note: We are actually punching a hole, previous contents will
be lost after this call, if it succeeds. In this case the file
should be full of NULs. */
bool punch_hole = os_is_sparse_file_supported(path, file);
if (punch_hole) {
dberr_t punch_err;
punch_err = os_file_punch_hole(file, 0, size * UNIV_PAGE_SIZE);
if (punch_err != DB_SUCCESS) {
punch_hole = false;
}
}
ulint block_size = os_file_get_block_size(file, path);
if (!success) {
os_file_close(file);
os_file_delete(innodb_data_file_key, path);
@ -3866,7 +3872,13 @@ fil_ibd_create(
space = fil_space_create(name, space_id, flags, FIL_TYPE_TABLESPACE,
crypt_data, true);
if (!fil_node_create_low(path, size, space, false, true)) {
fil_node_t* node = NULL;
if (space) {
node = fil_node_create_low(path, size, space, false, true);
}
if (!space || !node) {
if (crypt_data) {
free(crypt_data);
}
@ -3883,6 +3895,9 @@ fil_ibd_create(
fil_name_write(space, 0, file, &mtr);
mtr.commit();
node->block_size = block_size;
space->punch_hole = punch_hole;
err = DB_SUCCESS;
}
@ -5038,8 +5053,6 @@ fil_report_invalid_page_access(
aligned
@param[in] message message for aio handler if non-sync aio
used, else ignored
@param[in] write_size actual payload size when written
to avoid extra punch holes in compression
@return DB_SUCCESS, DB_TABLESPACE_DELETED or DB_TABLESPACE_TRUNCATED
if we are trying to do i/o on a tablespace which does not exist */
dberr_t
@ -5051,8 +5064,7 @@ fil_io(
ulint byte_offset,
ulint len,
void* buf,
void* message,
ulint* write_size)
void* message)
{
os_offset_t offset;
IORequest req_type(type);
@ -5285,7 +5297,7 @@ fil_io(
const char* name = node->name == NULL ? space->name : node->name;
req_type.block_size(node->block_size);
req_type.set_fil_node(node);
/* Queue the aio request */
dberr_t err = os_aio(
@ -5293,7 +5305,7 @@ fil_io(
mode, name, node->handle, buf, offset, len,
space->purpose != FIL_TYPE_TEMPORARY
&& srv_read_only_mode,
node, message, write_size);
node, message);
/* We an try to recover the page from the double write buffer if
the decompression fails or the page is corrupt. */
@ -6973,3 +6985,26 @@ fil_system_exit(void)
ut_ad(mutex_own(&fil_system->mutex));
mutex_exit(&fil_system->mutex);
}
/**
Get should we punch hole to tablespace.
@param[in] node File node
@return true, if punch hole should be tried, false if not. */
bool
fil_node_should_punch_hole(
const fil_node_t* node)
{
return (node->space->punch_hole);
}
/**
Set punch hole to tablespace to given value.
@param[in] node File node
@param[in] val value to be set. */
void
fil_space_set_punch_hole(
fil_node_t* node,
bool val)
{
node->space->punch_hole = val;
}

View file

@ -927,6 +927,7 @@ static ibool innodb_have_lz4=IF_LZ4(1, 0);
static ibool innodb_have_lzma=IF_LZMA(1, 0);
static ibool innodb_have_bzip2=IF_BZIP2(1, 0);
static ibool innodb_have_snappy=IF_SNAPPY(1, 0);
static ibool innodb_have_punch_hole=IF_PUNCH_HOLE(1, 0);
static
int
@ -1134,20 +1135,6 @@ static SHOW_VAR innodb_status_variables[]= {
/* Status variables for page compression */
{"page_compression_saved",
(char*) &export_vars.innodb_page_compression_saved, SHOW_LONGLONG},
{"page_compression_trim_sect512",
(char*) &export_vars.innodb_page_compression_trim_sect512, SHOW_LONGLONG},
{"page_compression_trim_sect1024",
(char*) &export_vars.innodb_page_compression_trim_sect1024, SHOW_LONGLONG},
{"page_compression_trim_sect2048",
(char*) &export_vars.innodb_page_compression_trim_sect2048, SHOW_LONGLONG},
{"page_compression_trim_sect4096",
(char*) &export_vars.innodb_page_compression_trim_sect4096, SHOW_LONGLONG},
{"page_compression_trim_sect8192",
(char*) &export_vars.innodb_page_compression_trim_sect8192, SHOW_LONGLONG},
{"page_compression_trim_sect16384",
(char*) &export_vars.innodb_page_compression_trim_sect16384, SHOW_LONGLONG},
{"page_compression_trim_sect32768",
(char*) &export_vars.innodb_page_compression_trim_sect32768, SHOW_LONGLONG},
{"num_index_pages_written",
(char*) &export_vars.innodb_index_pages_written, SHOW_LONGLONG},
{"num_non_index_pages_written",
@ -1156,8 +1143,6 @@ static SHOW_VAR innodb_status_variables[]= {
(char*) &export_vars.innodb_pages_page_compressed, SHOW_LONGLONG},
{"num_page_compressed_trim_op",
(char*) &export_vars.innodb_page_compressed_trim_op, SHOW_LONGLONG},
{"num_page_compressed_trim_op_saved",
(char*) &export_vars.innodb_page_compressed_trim_op_saved, SHOW_LONGLONG},
{"num_pages_page_decompressed",
(char*) &export_vars.innodb_pages_page_decompressed, SHOW_LONGLONG},
{"num_pages_page_compression_error",
@ -1176,6 +1161,8 @@ static SHOW_VAR innodb_status_variables[]= {
(char*) &innodb_have_bzip2, SHOW_BOOL},
{"have_snappy",
(char*) &innodb_have_snappy, SHOW_BOOL},
{"have_punch_hole",
(char*) &innodb_have_punch_hole, SHOW_BOOL},
/* Defragmentation */
{"defragment_compression_failures",
@ -3830,6 +3817,10 @@ static const char* deprecated_file_format_check
static const char* deprecated_file_format_max
= DEPRECATED_FORMAT_PARAMETER("innodb_file_format_max");
/** Deprecation message about innodb_use_trim */
static const char* deprecated_use_trim
= DEPRECATED_FORMAT_PARAMETER("innodb_use_trim");
/** Update log_checksum_algorithm_ptr with a pointer to the function
corresponding to whether checksums are enabled.
@param[in] check whether redo log block checksums are enabled */
@ -20660,6 +20651,25 @@ wsrep_fake_trx_id(
#endif /* WITH_WSREP */
/** Update the innodb_use_trim parameter.
@param[in] thd thread handle
@param[in] var system variable
@param[out] var_ptr current value
@param[in] save immediate result from check function */
static
void
innodb_use_trim_update(
THD* thd,
struct st_mysql_sys_var* var,
void* var_ptr,
const void* save)
{
srv_use_trim = *static_cast<const my_bool*>(save);
push_warning(thd, Sql_condition::WARN_LEVEL_WARN,
HA_ERR_WRONG_COMMAND, deprecated_use_trim);
}
/* plugin options */
static MYSQL_SYSVAR_ENUM(checksum_algorithm, srv_checksum_algorithm,
@ -21761,8 +21771,8 @@ static MYSQL_SYSVAR_BOOL(force_primary_key,
static MYSQL_SYSVAR_BOOL(use_trim, srv_use_trim,
PLUGIN_VAR_OPCMDARG,
"Use trim. Default FALSE.",
NULL, NULL, FALSE);
"Deallocate (punch_hole|trim) unused portions of the page compressed page (on by default)",
NULL, innodb_use_trim_update, TRUE);
static const char *page_compression_algorithms[]= { "none", "zlib", "lz4", "lzo", "lzma", "bzip2", "snappy", 0 };
static TYPELIB page_compression_algorithms_typelib=

View file

@ -160,6 +160,9 @@ enum dberr_t {
placed on the base column of
stored column */
DB_IO_NO_PUNCH_HOLE, /*!< Punch hole not supported by
file system. */
/* The following are partial failure codes */
DB_FAIL = 1000,
DB_OVERFLOW,

View file

@ -185,6 +185,10 @@ struct fil_space_t {
@param[in] n_reserved number of reserved extents */
void release_free_extents(ulint n_reserved);
/** True if file system storing this tablespace supports
punch hole */
bool punch_hole;
ulint magic_n;/*!< FIL_SPACE_MAGIC_N */
};
@ -229,12 +233,12 @@ struct fil_node_t {
/** link to the fil_system->LRU list (keeping track of open files) */
UT_LIST_NODE_T(fil_node_t) LRU;
/** block size to use for punching holes */
ulint block_size;
/** whether this file could use atomic write (data file) */
bool atomic_write;
/** Filesystem block size */
ulint block_size;
/** FIL_NODE_MAGIC_N */
ulint magic_n;
};
@ -1129,11 +1133,6 @@ fil_space_get_n_reserved_extents(
aligned
@param[in] message message for aio handler if non-sync aio
used, else ignored
@param[in,out] write_size Actual write size initialized
after fist successfull trim
operation for this page and if
nitialized we do not trim again if
Actual page
@return DB_SUCCESS, DB_TABLESPACE_DELETED or DB_TABLESPACE_TRUNCATED
if we are trying to do i/o on a tablespace which does not exist */
@ -1146,8 +1145,7 @@ fil_io(
ulint byte_offset,
ulint len,
void* buf,
void* message,
ulint* write_size);
void* message);
/**********************************************************************//**
Waits for an aio operation to complete. This function is used to write the
handler for completed requests. The aio array of pending requests is divided

View file

@ -0,0 +1,75 @@
/***********************************************************************
Copyright (c) 2017, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the
Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
Public License for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
***********************************************************************/
/**************************************************//**
@file os0api.h
The interface to the helper functions.
These functions are used on os0file.h where
including full full header is not feasible and
implemented on buf0buf.cc and fil0fil.cc.
*******************************************************/
#ifndef OS_API_H
#define OS_API_H 1
/** Page control block */
struct buf_page_t;
/** File Node */
struct fil_node_t;
/**
Should we punch hole to deallocate unused portion of the page.
@param[in] bpage Page control block
@return true if punch hole should be used, false if not */
bool
buf_page_should_punch_hole(
const buf_page_t* bpage)
MY_ATTRIBUTE((warn_unused_result));
/**
Calculate the length of trim (punch_hole) operation.
@param[in] bpage Page control block
@param[in] write_length Write length
@return length of the trim or zero. */
ulint
buf_page_get_trim_length(
const buf_page_t* bpage,
ulint write_length)
MY_ATTRIBUTE((warn_unused_result));
/**
Get should we punch hole to tablespace.
@param[in] space Tablespace
@return true, if punch hole should be tried, false if not. */
bool
fil_node_should_punch_hole(
const fil_node_t* node)
MY_ATTRIBUTE((warn_unused_result));
/**
Set punch hole to tablespace to given value.
@param[in] space Tablespace
@param[in] val value to be set. */
void
fil_space_set_punch_hole(
fil_node_t* node,
bool val);
#endif /* OS_API_H */

View file

@ -36,7 +36,8 @@ Created 10/21/1995 Heikki Tuuri
#ifndef os0file_h
#define os0file_h
#include "univ.i"
#include "page0size.h"
#include "os0api.h"
#ifndef _WIN32
#include <dirent.h>
@ -46,8 +47,10 @@ Created 10/21/1995 Heikki Tuuri
/** File node of a tablespace or the log data space */
struct fil_node_t;
struct fil_space_t;
extern bool os_has_said_disk_full;
extern my_bool srv_use_trim;
/** Number of pending read operations */
extern ulint os_n_pending_reads;
@ -177,6 +180,8 @@ static const ulint OS_FILE_ERROR_MAX = 200;
#define IORequestLogRead IORequest(IORequest::LOG | IORequest::READ)
#define IORequestLogWrite IORequest(IORequest::LOG | IORequest::WRITE)
/**
The IO Context that is passed down to the low level IO code */
class IORequest {
@ -211,12 +216,16 @@ public:
/** Ignore failed reads of non-existent pages */
IGNORE_MISSING = 128,
/** Use punch hole if available*/
PUNCH_HOLE = 256,
};
/** Default constructor */
IORequest()
:
m_block_size(UNIV_SECTOR_SIZE),
m_bpage(NULL),
m_fil_node(NULL),
m_type(READ)
{
/* No op */
@ -227,9 +236,32 @@ public:
ORed from the above enum */
explicit IORequest(ulint type)
:
m_block_size(UNIV_SECTOR_SIZE),
m_bpage(NULL),
m_fil_node(NULL),
m_type(static_cast<uint16_t>(type))
{
if (!is_punch_hole_supported() || !srv_use_trim) {
clear_punch_hole();
}
}
/**
@param[in] type Request type, can be a value that is
ORed from the above enum
@param[in] bpage Page to be written */
IORequest(ulint type, buf_page_t* bpage)
:
m_bpage(bpage),
m_fil_node(NULL),
m_type(static_cast<uint16_t>(type))
{
if (bpage && buf_page_should_punch_hole(bpage)) {
set_punch_hole();
}
if (!is_punch_hole_supported() || !srv_use_trim) {
clear_punch_hole();
}
}
/** Destructor */
@ -270,6 +302,12 @@ public:
return((m_type & DO_NOT_WAKE) == 0);
}
/** Clear the punch hole flag */
void clear_punch_hole()
{
m_type &= ~PUNCH_HOLE;
}
/** @return true if partial read warning disabled */
bool is_partial_io_warning_disabled() const
MY_ATTRIBUTE((warn_unused_result))
@ -291,6 +329,13 @@ public:
return(ignore_missing(m_type));
}
/** @return true if punch hole should be used */
bool punch_hole() const
MY_ATTRIBUTE((warn_unused_result))
{
return((m_type & PUNCH_HOLE) == PUNCH_HOLE);
}
/** @return true if the read should be validated */
bool validate() const
MY_ATTRIBUTE((warn_unused_result))
@ -298,24 +343,30 @@ public:
return(is_read() ^ is_write());
}
/** Set the punch hole flag */
void set_punch_hole()
{
if (is_punch_hole_supported() && srv_use_trim) {
m_type |= PUNCH_HOLE;
}
}
/** Clear the do not wake flag */
void clear_do_not_wake()
{
m_type &= ~DO_NOT_WAKE;
}
/** @return the block size to use for IO */
ulint block_size() const
MY_ATTRIBUTE((warn_unused_result))
/** Set the pointer to file node for IO
@param[in] node File node */
void set_fil_node(fil_node_t* node)
{
return(m_block_size);
}
if (!srv_use_trim ||
(node && !fil_node_should_punch_hole(node))) {
clear_punch_hole();
}
/** Set the block size for IO
@param[in] block_size Block size to set */
void block_size(ulint block_size)
{
m_block_size = static_cast<uint32_t>(block_size);
m_fil_node = node;
}
/** Compare two requests
@ -338,9 +389,59 @@ public:
return((m_type & DBLWR_RECOVER) == DBLWR_RECOVER);
}
/** @return true if punch hole is supported */
static bool is_punch_hole_supported()
{
/* In this debugging mode, we act as if punch hole is supported,
and then skip any calls to actually punch a hole here.
In this way, Transparent Page Compression is still being tested. */
DBUG_EXECUTE_IF("ignore_punch_hole",
return(true);
);
#if defined(HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE) || defined(_WIN32)
return(true);
#else
return(false);
#endif /* HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE || _WIN32 */
}
ulint get_trim_length(ulint write_length) const
{
return (m_bpage ?
buf_page_get_trim_length(m_bpage, write_length)
: 0);
}
bool should_punch_hole() const {
return (m_fil_node ?
fil_node_should_punch_hole(m_fil_node)
: false);
}
void space_no_punch_hole() const {
if (m_fil_node) {
fil_space_set_punch_hole(m_fil_node, false);
}
}
/** Punch a hole in the file if it was a write
@param[in] fh Open file handle
@param[in] len Compressed buffer length for write
@return DB_SUCCESS or error code */
dberr_t punch_hole(
os_file_t fh,
ulint offset,
ulint len);
private:
/* File system best block size */
uint32_t m_block_size;
/** Page to be written on write operation. */
buf_page_t* m_bpage;
/** File node */
fil_node_t* m_fil_node;
/** Request type bit flags */
uint16_t m_type;
@ -706,10 +807,10 @@ The wrapper functions have the prefix of "innodb_". */
# define os_file_close(file) \
pfs_os_file_close_func(file, __FILE__, __LINE__)
# define os_aio(type, mode, name, file, buf, offset, \
n, read_only, message1, message2, wsize) \
pfs_os_aio_func(type, mode, name, file, buf, offset, \
n, read_only, message1, message2, wsize, \
# define os_aio(type, mode, name, file, buf, offset, \
n, read_only, message1, message2) \
pfs_os_aio_func(type, mode, name, file, buf, offset, \
n, read_only, message1, message2, \
__FILE__, __LINE__)
# define os_file_read(type, file, buf, offset, n) \
@ -721,7 +822,7 @@ The wrapper functions have the prefix of "innodb_". */
# define os_file_write(type, name, file, buf, offset, n) \
pfs_os_file_write_func(type, name, file, buf, offset, \
n, __FILE__, __LINE__)
n,__FILE__, __LINE__)
# define os_file_flush(file) \
pfs_os_file_flush_func(file, __FILE__, __LINE__)
@ -926,7 +1027,6 @@ pfs_os_aio_func(
bool read_only,
fil_node_t* m1,
void* m2,
ulint* wsize,
const char* src_file,
ulint src_line);
@ -1051,9 +1151,9 @@ to original un-instrumented file I/O APIs */
# define os_file_close(file) os_file_close_func(file)
# define os_aio(type, mode, name, file, buf, offset, \
n, read_only, message1, message2, wsize) \
n, read_only, message1, message2) \
os_aio_func(type, mode, name, file, buf, offset, \
n, read_only, message1, message2, wsize)
n, read_only, message1, message2)
# define os_file_read(type, file, buf, offset, n) \
os_file_read_func(type, file, buf, offset, n)
@ -1061,7 +1161,7 @@ to original un-instrumented file I/O APIs */
# define os_file_read_no_error_handling(type, file, buf, offset, n, o) \
os_file_read_no_error_handling_func(type, file, buf, offset, n, o)
# define os_file_write(type, name, file, buf, offset, n) \
# define os_file_write(type, name, file, buf, offset, n) \
os_file_write_func(type, name, file, buf, offset, n)
# define os_file_flush(file) os_file_flush_func(file)
@ -1324,8 +1424,7 @@ os_aio_func(
ulint n,
bool read_only,
fil_node_t* m1,
void* m2,
ulint* wsize);
void* m2);
/** Wakes up all async i/o threads so that they know to exit themselves in
shutdown. */
@ -1427,6 +1526,48 @@ innobase_mysql_tmpfile(
void
os_file_set_umask(ulint umask);
/** Check if the file system supports sparse files.
Warning: On POSIX systems we try and punch a hole from offset 0 to
the system configured page size. This should only be called on an empty
file.
Note: On Windows we use the name and on Unices we use the file handle.
@param[in] name File name
@param[in] fh File handle for the file - if opened
@return true if the file system supports sparse files */
bool
os_is_sparse_file_supported(
const char* path,
os_file_t fh)
MY_ATTRIBUTE((warn_unused_result));
/** Free storage space associated with a section of the file.
@param[in] fh Open file handle
@param[in] off Starting offset (SEEK_SET)
@param[in] len Size of the hole
@return DB_SUCCESS or error code */
dberr_t
os_file_punch_hole(
IORequest& type,
os_file_t fh,
os_offset_t off,
os_offset_t len)
MY_ATTRIBUTE((warn_unused_result));
/** Free storage space associated with a section of the file.
@param[in] fh Open file handle
@param[in] off Starting offset (SEEK_SET)
@param[in] len Size of the hole
@return DB_SUCCESS or error code */
dberr_t
os_file_punch_hole(
os_file_t fh,
os_offset_t off,
os_offset_t len)
MY_ATTRIBUTE((warn_unused_result));
/** Normalizes a directory path for the current OS:
On Windows, we convert '/' to '\', else we convert '\' to '/'.
@param[in,out] str A null-terminated directory and file path */
@ -1454,6 +1595,16 @@ is_absolute_path(
return(false);
}
/***********************************************************************//**
Try to get number of bytes per sector from file system.
@return file block size */
UNIV_INTERN
ulint
os_file_get_block_size(
/*===================*/
os_file_t file, /*!< in: handle to a file */
const char* name); /*!< in: file name */
#ifndef UNIV_NONINL
#include "os0file.ic"
#endif /* UNIV_NONINL */

View file

@ -219,11 +219,6 @@ an asynchronous i/o operation.
@param[in,out] m2 message for the AIO handler (can be used to
identify a completed AIO operation); ignored
if mode is OS_AIO_SYNC
@param[in,out] write_size Actual write size initialized
after fist successfull trim
operation for this page and if
initialized we do not trim again if
actual page size
@param[in] src_file file name where func invoked
@param[in] src_line line where the func invoked
@return DB_SUCCESS if request was queued successfully, FALSE if fail */
@ -240,7 +235,6 @@ pfs_os_aio_func(
bool read_only,
fil_node_t* m1,
void* m2,
ulint* write_size,
const char* src_file,
ulint src_line)
{
@ -256,7 +250,7 @@ pfs_os_aio_func(
src_file, src_line);
dberr_t result = os_aio_func(
type, mode, name, file, buf, offset, n, read_only, m1, m2, write_size);
type, mode, name, file, buf, offset, n, read_only, m1, m2);
register_pfs_file_io_end(locker, n);

View file

@ -1,6 +1,7 @@
/*****************************************************************************
Copyright (c) 2013, 2015, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2017, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@ -139,9 +140,7 @@ public:
@param[in] src page size object whose values to fetch */
inline void copy_from(const page_size_t& src)
{
m_physical = src.physical();
m_logical = src.logical();
m_is_compressed = src.is_compressed();
*this = src;
}
/** Check if a given page_size_t object is equal to the current one.
@ -156,9 +155,6 @@ public:
private:
/* Disable implicit copying. */
void operator=(const page_size_t&);
/* For non compressed tablespaces, physical page size is equal to
the logical page size and the data is stored in buf_page_t::frame
(and is also always equal to univ_page_size (--innodb-page-size=)).

View file

@ -343,16 +343,8 @@ enum monitor_id_t {
MONITOR_PAD_DECREMENTS,
/* New monitor variables for page compression */
MONITOR_OVLD_PAGE_COMPRESS_SAVED,
MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT512,
MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT1024,
MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT2048,
MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT4096,
MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT8192,
MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT16384,
MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT32768,
MONITOR_OVLD_PAGES_PAGE_COMPRESSED,
MONITOR_OVLD_PAGE_COMPRESSED_TRIM_OP,
MONITOR_OVLD_PAGE_COMPRESSED_TRIM_OP_SAVED,
MONITOR_OVLD_PAGES_PAGE_DECOMPRESSED,
MONITOR_OVLD_PAGES_PAGE_COMPRESSION_ERROR,

View file

@ -114,20 +114,6 @@ struct srv_stats_t {
/** Number of bytes saved by page compression */
ulint_ctr_64_t page_compression_saved;
/** Number of 512Byte TRIM by page compression */
ulint_ctr_64_t page_compression_trim_sect512;
/** Number of 1K TRIM by page compression */
ulint_ctr_64_t page_compression_trim_sect1024;
/** Number of 2K TRIM by page compression */
ulint_ctr_64_t page_compression_trim_sect2048;
/** Number of 4K TRIM by page compression */
ulint_ctr_64_t page_compression_trim_sect4096;
/** Number of 8K TRIM by page compression */
ulint_ctr_64_t page_compression_trim_sect8192;
/** Number of 16K TRIM by page compression */
ulint_ctr_64_t page_compression_trim_sect16384;
/** Number of 32K TRIM by page compression */
ulint_ctr_64_t page_compression_trim_sect32768;
/* Number of index pages written */
ulint_ctr_64_t index_pages_written;
/* Number of non index pages written */
@ -136,8 +122,6 @@ struct srv_stats_t {
ulint_ctr_64_t pages_page_compressed;
/* Number of TRIM operations induced by page compression */
ulint_ctr_64_t page_compressed_trim_op;
/* Number of TRIM operations saved by using actual write size knowledge */
ulint_ctr_64_t page_compressed_trim_op_saved;
/* Number of pages decompressed with page compression */
ulint_ctr_64_t pages_page_decompressed;
/* Number of page compression errors */
@ -1059,20 +1043,6 @@ struct export_var_t{
int64_t innodb_page_compression_saved;/*!< Number of bytes saved
by page compression */
int64_t innodb_page_compression_trim_sect512;/*!< Number of 512b TRIM
by page compression */
int64_t innodb_page_compression_trim_sect1024;/*!< Number of 1K TRIM
by page compression */
int64_t innodb_page_compression_trim_sect2048;/*!< Number of 2K TRIM
by page compression */
int64_t innodb_page_compression_trim_sect4096;/*!< Number of 4K byte TRIM
by page compression */
int64_t innodb_page_compression_trim_sect8192;/*!< Number of 8K TRIM
by page compression */
int64_t innodb_page_compression_trim_sect16384;/*!< Number of 16K TRIM
by page compression */
int64_t innodb_page_compression_trim_sect32768;/*!< Number of 32K TRIM
by page compression */
int64_t innodb_index_pages_written; /*!< Number of index pages
written */
int64_t innodb_non_index_pages_written; /*!< Number of non index pages
@ -1081,8 +1051,6 @@ struct export_var_t{
compressed by page compression */
int64_t innodb_page_compressed_trim_op;/*!< Number of TRIM operations
induced by page compression */
int64_t innodb_page_compressed_trim_op_saved;/*!< Number of TRIM operations
saved by page compression */
int64_t innodb_pages_page_decompressed;/*!< Number of pages
decompressed by page
compression */

View file

@ -366,6 +366,12 @@ typedef enum innodb_file_formats_enum innodb_file_formats_t;
#define IF_SNAPPY(A,B) B
#endif
#if defined (HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE) || defined(_WIN32)
#define IF_PUNCH_HOLE(A,B) A
#else
#define IF_PUNCH_HOLE(A,B) B
#endif
/** The universal page size of the database */
#define UNIV_PAGE_SIZE ((ulint) srv_page_size)

View file

@ -1019,7 +1019,7 @@ log_group_file_header_flush(
page_id_t(group->space_id, page_no),
univ_page_size,
(ulint) (dest_offset % univ_page_size.physical()),
OS_FILE_LOG_BLOCK_SIZE, buf, group, NULL);
OS_FILE_LOG_BLOCK_SIZE, buf, group);
srv_stats.os_log_pending_writes.dec();
}
@ -1144,7 +1144,7 @@ loop:
page_id_t(group->space_id, page_no),
univ_page_size,
(ulint) (next_offset % UNIV_PAGE_SIZE), write_len, buf,
group, NULL);
group);
srv_stats.os_log_pending_writes.dec();
@ -1664,7 +1664,7 @@ log_group_checkpoint(
(log_sys->next_checkpoint_no & 1)
? LOG_CHECKPOINT_2 : LOG_CHECKPOINT_1,
OS_FILE_LOG_BLOCK_SIZE,
buf, (byte*) group + 1, NULL);
buf, (byte*) group + 1);
ut_ad(((ulint) group & 0x1UL) == 0);
}
@ -1686,7 +1686,7 @@ log_group_header_read(
fil_io(IORequestLogRead, true,
page_id_t(group->space_id, header / univ_page_size.physical()),
univ_page_size, header % univ_page_size.physical(),
OS_FILE_LOG_BLOCK_SIZE, log_sys->checkpoint_buf, NULL, NULL);
OS_FILE_LOG_BLOCK_SIZE, log_sys->checkpoint_buf, NULL);
}
/** Write checkpoint info to the log header and invoke log_mutex_exit().
@ -2038,7 +2038,7 @@ loop:
page_id_t(group->space_id, page_no),
univ_page_size,
(ulint) (source_offset % univ_page_size.physical()),
len, buf, NULL, NULL);
len, buf, NULL);
#ifdef DEBUG_CRYPT
fprintf(stderr, "BEFORE DECRYPT: block: %lu checkpoint: %lu %.8lx %.8lx offset %lu\n",

View file

@ -869,7 +869,7 @@ recv_log_format_0_recover(lsn_t lsn)
univ_page_size,
(ulint) ((source_offset & ~(OS_FILE_LOG_BLOCK_SIZE - 1))
% univ_page_size.physical()),
OS_FILE_LOG_BLOCK_SIZE, buf, NULL, NULL);
OS_FILE_LOG_BLOCK_SIZE, buf, NULL);
if (log_block_calc_checksum_format_0(buf)
!= log_block_get_checksum(buf)) {

View file

@ -44,6 +44,11 @@ Created 10/21/1995 Heikki Tuuri
#include "os0file.ic"
#endif
#ifdef UNIV_LINUX
#include <sys/types.h>
#include <sys/stat.h>
#endif
#include "srv0srv.h"
#include "srv0start.h"
#include "fil0fil.h"
@ -63,17 +68,23 @@ Created 10/21/1995 Heikki Tuuri
#include <libaio.h>
#endif /* LINUX_NATIVE_AIO */
#ifdef HAVE_LZ4
#include <lz4.h>
#endif
#include <zlib.h>
#ifdef HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE
# include <fcntl.h>
# include <linux/falloc.h>
#endif /* HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE */
#ifdef UNIV_DEBUG
/** Set when InnoDB has invoked exit(). */
bool innodb_calling_exit;
#endif /* UNIV_DEBUG */
#if defined(UNIV_LINUX) && defined(HAVE_SYS_IOCTL_H)
# include <sys/ioctl.h>
# ifndef DFS_IOCTL_ATOMIC_WRITE_SET
# define DFS_IOCTL_ATOMIC_WRITE_SET _IOW(0x95, 2, uint)
# endif
#endif
#if defined(UNIV_LINUX) && defined(HAVE_SYS_STATVFS_H)
#include <sys/statvfs.h>
#endif
@ -82,12 +93,8 @@ bool innodb_calling_exit;
#include <linux/falloc.h>
#endif
#ifdef HAVE_LZO
#include "lzo/lzo1x.h"
#endif
#ifdef HAVE_SNAPPY
#include "snappy-c.h"
#ifdef _WIN32
#include <winioctl.h>
#endif
/** Insert buffer segment id */
@ -216,8 +223,6 @@ struct Slot {
/** buffer used in i/o */
byte* buf;
ulint is_log; /*!< 1 if OS_FILE_LOG or 0 */
ulint page_size; /*!< UNIV_PAGE_SIZE or zip_size */
/** Buffer pointer used for actual IO. We advance this
when partial IO is required and not buf */
@ -286,7 +291,6 @@ struct Slot {
/** Length of the block before it was compressed */
uint32 original_len;
ulint* write_size;
};
/** The asynchronous i/o array structure */
@ -328,8 +332,7 @@ public:
const char* name,
void* buf,
os_offset_t offset,
ulint len,
ulint* write_size)
ulint len)
MY_ATTRIBUTE((warn_unused_result));
/** @return number of reserved slots */
@ -759,6 +762,107 @@ os_aio_simulated_handler(
void** m2,
IORequest* type);
#ifdef _WIN32
static HANDLE win_get_syncio_event();
#endif
#ifdef _WIN32
/**
Wrapper around Windows DeviceIoControl() function.
Works synchronously, also in case for handle opened
for async access (i.e with FILE_FLAG_OVERLAPPED).
Accepts the same parameters as DeviceIoControl(),except
last parameter (OVERLAPPED).
*/
static
BOOL
os_win32_device_io_control(
HANDLE handle,
DWORD code,
LPVOID inbuf,
DWORD inbuf_size,
LPVOID outbuf,
DWORD outbuf_size,
LPDWORD bytes_returned
)
{
OVERLAPPED overlapped = { 0 };
overlapped.hEvent = win_get_syncio_event();
BOOL result = DeviceIoControl(handle, code, inbuf, inbuf_size, outbuf,
outbuf_size, bytes_returned, &overlapped);
if (!result && (GetLastError() == ERROR_IO_PENDING)) {
/* Wait for async io to complete */
result = GetOverlappedResult(handle, &overlapped, bytes_returned, TRUE);
}
return result;
}
#endif
/***********************************************************************//**
Try to get number of bytes per sector from file system.
@return file block size */
UNIV_INTERN
ulint
os_file_get_block_size(
/*===================*/
os_file_t file, /*!< in: handle to a file */
const char* name) /*!< in: file name */
{
ulint fblock_size = 512;
#if defined(UNIV_LINUX)
struct stat local_stat;
int err;
err = fstat((int)file, &local_stat);
if (err != 0) {
os_file_handle_error_no_exit(name, "fstat()", FALSE);
} else {
fblock_size = local_stat.st_blksize;
}
#endif /* UNIV_LINUX */
#ifdef _WIN32
DWORD outsize;
STORAGE_PROPERTY_QUERY storageQuery;
memset(&storageQuery, 0, sizeof(storageQuery));
storageQuery.PropertyId = StorageAccessAlignmentProperty;
storageQuery.QueryType = PropertyStandardQuery;
STORAGE_ACCESS_ALIGNMENT_DESCRIPTOR diskAlignment;
BOOL result = os_win32_device_io_control(file,
IOCTL_STORAGE_QUERY_PROPERTY,
&storageQuery,
sizeof(STORAGE_PROPERTY_QUERY),
&diskAlignment,
sizeof(STORAGE_ACCESS_ALIGNMENT_DESCRIPTOR),
&outsize);
if (!result) {
os_file_handle_error_no_exit(name, "DeviceIoControl()", FALSE);
fblock_size = 0;
}
fblock_size = diskAlignment.BytesPerPhysicalSector;
#endif /* _WIN32 */
/* Currently we support file block size up to 4Kb */
if (fblock_size > 4096 || fblock_size < 512) {
if (fblock_size < 512) {
fblock_size = 512;
} else {
fblock_size = 4096;
}
}
return fblock_size;
}
#ifdef WIN_ASYNC_IO
/** This function is only used in Windows asynchronous i/o.
Waits for an aio operation to complete. This function is used to wait the
@ -1443,6 +1547,48 @@ SyncFileIO::execute(const IORequest& request)
return(n_bytes);
}
/** Free storage space associated with a section of the file.
@param[in] fh Open file handle
@param[in] off Starting offset (SEEK_SET)
@param[in] len Size of the hole
@return DB_SUCCESS or error code */
static
dberr_t
os_file_punch_hole_posix(
os_file_t fh,
os_offset_t off,
os_offset_t len)
{
#ifdef HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE
const int mode = FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE;
int ret = fallocate(fh, mode, off, len);
if (ret == 0) {
return(DB_SUCCESS);
}
if (errno == ENOTSUP) {
return(DB_IO_NO_PUNCH_HOLE);
}
ib::warn()
<< "fallocate("
<<", FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, "
<< off << ", " << len << ") returned errno: "
<< errno;
return(DB_IO_ERROR);
#elif defined(UNIV_SOLARIS)
// Use F_FREESP
#endif /* HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE */
return(DB_IO_NO_PUNCH_HOLE);
}
#if defined(LINUX_NATIVE_AIO)
@ -1734,7 +1880,18 @@ LinuxAIOHandler::collect()
/* We have not overstepped to next segment. */
ut_a(slot->pos < end_pos);
slot->err = DB_SUCCESS;
/* Deallocate unused blocks from file system.
This is newer done to page 0 or to log files.*/
if (slot->offset > 0
&& !slot->skip_punch_hole
&& !slot->type.is_log()
&& slot->type.is_write()
&& slot->type.punch_hole()) {
slot->err = AIOHandler::io_complete(slot);
} else {
slot->err = DB_SUCCESS;
}
/* Mark this request as completed. The error handling
will be done in the calling function. */
@ -3353,6 +3510,76 @@ struct WinIoInit
/* Ensures proper initialization and shutdown */
static WinIoInit win_io_init;
/** Check if the file system supports sparse files.
@param[in] name File name
@return true if the file system supports sparse files */
static
bool
os_is_sparse_file_supported_win32(const char* filename)
{
char volname[MAX_PATH];
BOOL result = GetVolumePathName(filename, volname, MAX_PATH);
if (!result) {
ib::error()
<< "os_is_sparse_file_supported: "
<< "Failed to get the volume path name for: "
<< filename
<< "- OS error number " << GetLastError();
return(false);
}
DWORD flags;
result = GetVolumeInformation(
volname, NULL, MAX_PATH, NULL, NULL,
&flags, NULL, MAX_PATH);
if (!result) {
ib::error()
<< "os_is_sparse_file_supported: "
<< "Failed to get the volume info for: "
<< volname
<< "- OS error number " << GetLastError();
return(false);
}
return(flags & FILE_SUPPORTS_SPARSE_FILES) ? true : false;
}
/** Free storage space associated with a section of the file.
@param[in] fh Open file handle
@param[in] page_size Tablespace page size
@param[in] block_size File system block size
@param[in] off Starting offset (SEEK_SET)
@param[in] len Size of the hole
@return 0 on success or errno */
static
dberr_t
os_file_punch_hole_win32(
os_file_t fh,
os_offset_t off,
os_offset_t len)
{
FILE_ZERO_DATA_INFORMATION punch;
punch.FileOffset.QuadPart = off;
punch.BeyondFinalZero.QuadPart = off + len;
/* If lpOverlapped is NULL, lpBytesReturned cannot be NULL,
therefore we pass a dummy parameter. */
DWORD temp;
BOOL success = os_win32_device_io_control(
fh, FSCTL_SET_ZERO_DATA, &punch, sizeof(punch),
NULL, 0, &temp);
return(success ? DB_SUCCESS: DB_IO_NO_PUNCH_HOLE);
}
/** Check the existence and type of the given file.
@param[in] path path name of file
@param[out] exists true if the file exists
@ -3661,9 +3888,9 @@ os_file_create_simple_func(
/* This is a best effort use case, if it fails then
we will find out when we try and punch the hole. */
DeviceIoControl(
os_win32_device_io_control(
file, FSCTL_SET_SPARSE, NULL, 0, NULL, 0,
&temp, NULL);
&temp);
}
} while (retry);
@ -4020,9 +4247,9 @@ os_file_create_func(
/* This is a best effort use case, if it fails then
we will find out when we try and punch the hole. */
DeviceIoControl(
os_win32_device_io_control(
file, FSCTL_SET_SPARSE, NULL, 0, NULL, 0,
&temp, NULL);
&temp);
}
} while (retry);
@ -4459,28 +4686,6 @@ os_file_get_status_win32(
}
stat_info->block_size = bytesPerSector * sectorsPerCluster;
/* On Windows the block size is not used as the allocation
unit for sparse files. The underlying infra-structure for
sparse files is based on NTFS compression. The punch hole
is done on a "compression unit". This compression unit
is based on the cluster size. You cannot punch a hole if
the cluster size >= 8K. For smaller sizes the table is
as follows:
Cluster Size Compression Unit
512 Bytes 8 KB
1 KB 16 KB
2 KB 32 KB
4 KB 64 KB
Default NTFS cluster size is 4K, compression unit size of 64K.
Therefore unless the user has created the file system with
a smaller cluster size and used larger page sizes there is
little benefit from compression out of the box. */
stat_info->block_size = (stat_info->block_size <= 4096)
? stat_info->block_size * 16 : ULINT_UNDEFINED;
} else {
stat_info->type = OS_FILE_TYPE_UNKNOWN;
}
@ -4615,7 +4820,18 @@ os_file_io(
} else if ((ulint) n_bytes + bytes_returned == n) {
bytes_returned += n_bytes;
*err = DB_SUCCESS;
if (offset > 0
&& !type.is_log()
&& type.is_write()
&& type.punch_hole()) {
*err = type.punch_hole(file,
static_cast<ulint>(offset),
n);
} else {
*err = DB_SUCCESS;
}
return(original_n);
}
@ -4668,7 +4884,7 @@ ssize_t
os_file_pwrite(
IORequest& type,
os_file_t file,
const void* buf,
const byte* buf,
ulint n,
os_offset_t offset,
dberr_t* err)
@ -4680,7 +4896,7 @@ os_file_pwrite(
(void) my_atomic_addlint(&os_n_pending_writes, 1);
MONITOR_ATOMIC_INC(MONITOR_OS_PENDING_WRITES);
ssize_t n_bytes = os_file_io(type, file, const_cast<void*>(buf),
ssize_t n_bytes = os_file_io(type, file, const_cast<byte*>(buf),
n, offset, err);
(void) my_atomic_addlint(&os_n_pending_writes, -1);
@ -4696,8 +4912,9 @@ os_file_pwrite(
@param[in] offset file offset from the start where to read
@param[in] n number of bytes to read, starting from offset
@return DB_SUCCESS if request was successful, false if fail */
static MY_ATTRIBUTE((warn_unused_result))
dberr_t
os_file_write_func(
os_file_write_page(
IORequest& type,
const char* name,
os_file_t file,
@ -4711,7 +4928,7 @@ os_file_write_func(
ut_ad(type.validate());
ut_ad(n > 0);
ssize_t n_bytes = os_file_pwrite(type, file, buf, n, offset, &err);
ssize_t n_bytes = os_file_pwrite(type, file, (byte*)buf, n, offset, &err);
if ((ulint) n_bytes != n && !os_has_said_disk_full) {
@ -5195,6 +5412,31 @@ os_file_read_no_error_handling_func(
return(os_file_read_page(type, file, buf, offset, n, o, false));
}
/** NOTE! Use the corresponding macro os_file_write(), not directly
Requests a synchronous write operation.
@param[in] type IO flags
@param[in] file handle to an open file
@param[out] buf buffer from which to write
@param[in] offset file offset from the start where to read
@param[in] n number of bytes to read, starting from offset
@return DB_SUCCESS if request was successful, false if fail */
dberr_t
os_file_write_func(
IORequest& type,
const char* name,
os_file_t file,
const void* buf,
os_offset_t offset,
ulint n)
{
ut_ad(type.validate());
ut_ad(type.is_write());
const byte* ptr = reinterpret_cast<const byte*>(buf);
return(os_file_write_page(type, name, file, ptr, offset, n));
}
/** Check the existence and type of the given file.
@param[in] path path name of file
@param[out] exists true if the file exists
@ -5213,6 +5455,110 @@ os_file_status(
#endif /* _WIN32 */
}
/** Free storage space associated with a section of the file.
@param[in] fh Open file handle
@param[in] off Starting offset (SEEK_SET)
@param[in] len Size of the hole
@return DB_SUCCESS or error code */
dberr_t
os_file_punch_hole(
os_file_t fh,
os_offset_t off,
os_offset_t len)
{
dberr_t err;
#ifdef _WIN32
err = os_file_punch_hole_win32(fh, off, len);
#else
err = os_file_punch_hole_posix(fh, off, len);
#endif /* _WIN32 */
return (err);
}
/** Free storage space associated with a section of the file.
@param[in] fh Open file handle
@param[in] off Starting offset (SEEK_SET)
@param[in] len Size of the hole
@return DB_SUCCESS or error code */
dberr_t
IORequest::punch_hole(
os_file_t fh,
os_offset_t off,
os_offset_t len)
{
/* In this debugging mode, we act as if punch hole is supported,
and then skip any calls to actually punch a hole here.
In this way, Transparent Page Compression is still being tested. */
DBUG_EXECUTE_IF("ignore_punch_hole",
return(DB_SUCCESS);
);
ulint trim_len = get_trim_length(len);
if (trim_len == 0) {
return(DB_SUCCESS);
}
off += len;
/* Check does file system support punching holes for this
tablespace. */
if (!should_punch_hole() || !srv_use_trim) {
return DB_IO_NO_PUNCH_HOLE;
}
dberr_t err = os_file_punch_hole(fh, off, len);
if (err == DB_SUCCESS) {
srv_stats.page_compressed_trim_op.inc();
} else {
/* If punch hole is not supported,
set space so that it is not used. */
if (err == DB_IO_NO_PUNCH_HOLE) {
space_no_punch_hole();
err = DB_SUCCESS;
}
}
return (err);
}
/** Check if the file system supports sparse files.
Warning: On POSIX systems we try and punch a hole from offset 0 to
the system configured page size. This should only be called on an empty
file.
Note: On Windows we use the name and on Unices we use the file handle.
@param[in] name File name
@param[in] fh File handle for the file - if opened
@return true if the file system supports sparse files */
bool
os_is_sparse_file_supported(const char* path, os_file_t fh)
{
/* In this debugging mode, we act as if punch hole is supported,
then we skip any calls to actually punch a hole. In this way,
Transparent Page Compression is still being tested. */
DBUG_EXECUTE_IF("ignore_punch_hole",
return(true);
);
#ifdef _WIN32
return(os_is_sparse_file_supported_win32(path));
#else
dberr_t err;
/* We don't know the FS block size, use the sector size. The FS
will do the magic. */
err = os_file_punch_hole_posix(fh, 0, UNIV_PAGE_SIZE);
return(err == DB_SUCCESS);
#endif /* _WIN32 */
}
/** This function returns information about the specified file
@param[in] path pathname of the file
@param[out] stat_info information of a file in a directory
@ -5776,12 +6122,7 @@ AIO::reserve_slot(
const char* name,
void* buf,
os_offset_t offset,
ulint len,
ulint* write_size)/*!< in/out: Actual write size initialized
after fist successfull trim
operation for this page and if
initialized we do not trim again if
actual page size does not decrease. */
ulint len)
{
#ifdef WIN_ASYNC_IO
ut_a((len & 0xFFFFFFFFUL) == len);
@ -5871,8 +6212,6 @@ AIO::reserve_slot(
slot->ptr = slot->buf;
slot->offset = offset;
slot->err = DB_SUCCESS;
slot->write_size = write_size;
slot->is_log = type.is_log();
slot->original_len = static_cast<uint32>(len);
slot->io_already_done = false;
slot->buf = static_cast<byte*>(buf);
@ -6225,6 +6564,7 @@ Requests an asynchronous i/o operation.
@param[in,out] m2 message for the AIO handler (can be used to
identify a completed AIO operation); ignored
if mode is OS_AIO_SYNC
@return DB_SUCCESS or error code */
dberr_t
os_aio_func(
@ -6237,12 +6577,7 @@ os_aio_func(
ulint n,
bool read_only,
fil_node_t* m1,
void* m2,
ulint* write_size)/*!< in/out: Actual write size initialized
after fist successfull trim
operation for this page and if
initialized we do not trim again if
actual page size does not decrease. */
void* m2)
{
#ifdef WIN_ASYNC_IO
BOOL ret = TRUE;
@ -6278,7 +6613,7 @@ try_again:
Slot* slot;
slot = array->reserve_slot(type, m1, m2, file, name, buf, offset, n, write_size);
slot = array->reserve_slot(type, m1, m2, file, name, buf, offset, n);
if (type.is_read()) {

View file

@ -989,41 +989,6 @@ static monitor_info_t innodb_counter_info[] =
MONITOR_NONE,
MONITOR_DEFAULT_START, MONITOR_OVLD_PAGE_COMPRESS_SAVED},
{"compress_trim_sect512", "compression",
"Number of sect-512 TRIMed by page compression",
MONITOR_NONE,
MONITOR_DEFAULT_START, MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT512},
{"compress_trim_sect1024", "compression",
"Number of sect-1024 TRIMed by page compression",
MONITOR_NONE,
MONITOR_DEFAULT_START, MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT1024},
{"compress_trim_sect2048", "compression",
"Number of sect-2048 TRIMed by page compression",
MONITOR_NONE,
MONITOR_DEFAULT_START, MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT2048},
{"compress_trim_sect4096", "compression",
"Number of sect-4K TRIMed by page compression",
MONITOR_NONE,
MONITOR_DEFAULT_START, MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT4096},
{"compress_trim_sect8192", "compression",
"Number of sect-8K TRIMed by page compression",
MONITOR_NONE,
MONITOR_DEFAULT_START, MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT8192},
{"compress_trim_sect16384", "compression",
"Number of sect-16K TRIMed by page compression",
MONITOR_NONE,
MONITOR_DEFAULT_START, MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT16384},
{"compress_trim_sect32768", "compression",
"Number of sect-32K TRIMed by page compression",
MONITOR_NONE,
MONITOR_DEFAULT_START, MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT32768},
{"compress_pages_page_compressed", "compression",
"Number of pages compressed by page compression",
MONITOR_NONE,
@ -1034,11 +999,6 @@ static monitor_info_t innodb_counter_info[] =
MONITOR_NONE,
MONITOR_DEFAULT_START, MONITOR_OVLD_PAGE_COMPRESSED_TRIM_OP},
{"compress_page_compressed_trim_op_saved", "compression",
"Number of TRIM operation saved by page compression",
MONITOR_NONE,
MONITOR_DEFAULT_START, MONITOR_OVLD_PAGE_COMPRESSED_TRIM_OP_SAVED},
{"compress_pages_page_decompressed", "compression",
"Number of pages decompressed by page compression",
MONITOR_NONE,
@ -2073,36 +2033,12 @@ srv_mon_process_existing_counter(
case MONITOR_OVLD_PAGE_COMPRESS_SAVED:
value = srv_stats.page_compression_saved;
break;
case MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT512:
value = srv_stats.page_compression_trim_sect512;
break;
case MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT1024:
value = srv_stats.page_compression_trim_sect1024;
break;
case MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT2048:
value = srv_stats.page_compression_trim_sect2048;
break;
case MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT4096:
value = srv_stats.page_compression_trim_sect4096;
break;
case MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT8192:
value = srv_stats.page_compression_trim_sect8192;
break;
case MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT16384:
value = srv_stats.page_compression_trim_sect16384;
break;
case MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT32768:
value = srv_stats.page_compression_trim_sect32768;
break;
case MONITOR_OVLD_PAGES_PAGE_COMPRESSED:
value = srv_stats.pages_page_compressed;
break;
case MONITOR_OVLD_PAGE_COMPRESSED_TRIM_OP:
value = srv_stats.page_compressed_trim_op;
break;
case MONITOR_OVLD_PAGE_COMPRESSED_TRIM_OP_SAVED:
value = srv_stats.page_compressed_trim_op_saved;
break;
case MONITOR_OVLD_PAGES_PAGE_DECOMPRESSED:
value = srv_stats.pages_page_decompressed;
break;

View file

@ -184,7 +184,7 @@ my_bool srv_use_native_aio = TRUE;
my_bool srv_numa_interleave = FALSE;
/* If this flag is TRUE, then we will use fallocate(PUCH_HOLE)
to the pages */
UNIV_INTERN my_bool srv_use_trim = FALSE;
UNIV_INTERN my_bool srv_use_trim;
/* If this flag is TRUE, then we disable doublewrite buffer */
UNIV_INTERN my_bool srv_use_atomic_writes = FALSE;
/* If this flag IS TRUE, then we use this algorithm for page compressing the pages */
@ -1617,13 +1617,10 @@ srv_export_innodb_status(void)
export_vars.innodb_available_undo_logs = srv_available_undo_logs;
export_vars.innodb_page_compression_saved = srv_stats.page_compression_saved;
export_vars.innodb_page_compression_trim_sect512 = srv_stats.page_compression_trim_sect512;
export_vars.innodb_page_compression_trim_sect4096 = srv_stats.page_compression_trim_sect4096;
export_vars.innodb_index_pages_written = srv_stats.index_pages_written;
export_vars.innodb_non_index_pages_written = srv_stats.non_index_pages_written;
export_vars.innodb_pages_page_compressed = srv_stats.pages_page_compressed;
export_vars.innodb_page_compressed_trim_op = srv_stats.page_compressed_trim_op;
export_vars.innodb_page_compressed_trim_op_saved = srv_stats.page_compressed_trim_op_saved;
export_vars.innodb_pages_page_decompressed = srv_stats.pages_page_decompressed;
export_vars.innodb_pages_page_compression_error = srv_stats.pages_page_compression_error;
export_vars.innodb_pages_decrypted = srv_stats.pages_decrypted;

View file

@ -761,6 +761,8 @@ ut_strerr(
case DB_NO_FK_ON_S_BASE_COL:
return("Cannot add foreign key on the base column "
"of stored column");
case DB_IO_NO_PUNCH_HOLE:
return ("File system does not support punch hole (trim) operation.");
/* do not add default: in order to produce a warning if new code
is added to the enum but not added here */