mirror of
https://github.com/MariaDB/server.git
synced 2025-01-15 19:42:28 +01:00
MDEV-8901: InnoDB: Punch hole is incorrecty done also to log files causing assertion and database corruption
Analysis: Problem is that punch hole does not know the actual page size of the page and does the page belong to an data file or to a log file. Fix: Pass down the file type and page size to os layer to be used when trim is called. Also fix unsafe null pointer access to actual write_size.
This commit is contained in:
parent
90f2c82246
commit
21adad000a
10 changed files with 118 additions and 86 deletions
|
@ -219,6 +219,7 @@
|
|||
#cmakedefine HAVE_POSIX_FALLOCATE 1
|
||||
#cmakedefine HAVE_LINUX_FALLOC_H 1
|
||||
#cmakedefine HAVE_FALLOCATE 1
|
||||
#cmakedefine HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE 1
|
||||
#cmakedefine HAVE_PREAD 1
|
||||
#cmakedefine HAVE_PAUSE_INSTRUCTION 1
|
||||
#cmakedefine HAVE_FAKE_PAUSE_INSTRUCTION 1
|
||||
|
|
|
@ -1058,3 +1058,22 @@ CHECK_STRUCT_HAS_MEMBER("struct dirent" d_ino "dirent.h" STRUCT_DIRENT_HAS_D_IN
|
|||
CHECK_STRUCT_HAS_MEMBER("struct dirent" d_namlen "dirent.h" STRUCT_DIRENT_HAS_D_NAMLEN)
|
||||
SET(SPRINTF_RETURNS_INT 1)
|
||||
CHECK_INCLUDE_FILE(ucontext.h HAVE_UCONTEXT_H)
|
||||
|
||||
IF(NOT MSVC)
|
||||
CHECK_C_SOURCE_RUNS(
|
||||
"
|
||||
#define _GNU_SOURCE
|
||||
#include <fcntl.h>
|
||||
#include <linux/falloc.h>
|
||||
int main()
|
||||
{
|
||||
/* Ignore the return value for now. Check if the flags exist.
|
||||
The return value is checked at runtime. */
|
||||
fallocate(0, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 0, 0);
|
||||
|
||||
return(0);
|
||||
}"
|
||||
HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE
|
||||
)
|
||||
ENDIF()
|
||||
|
||||
|
|
|
@ -5223,9 +5223,9 @@ retry:
|
|||
success = os_file_write(node->name, node->handle, buf,
|
||||
offset, page_size * n_pages);
|
||||
#else
|
||||
success = os_aio(OS_FILE_WRITE, OS_AIO_SYNC,
|
||||
success = os_aio(OS_FILE_WRITE, 0, OS_AIO_SYNC,
|
||||
node->name, node->handle, buf,
|
||||
offset, page_size * n_pages,
|
||||
offset, page_size * n_pages, page_size,
|
||||
node, NULL, 0);
|
||||
#endif /* UNIV_HOTBACKUP */
|
||||
|
||||
|
@ -5872,12 +5872,14 @@ fil_io(
|
|||
/* Queue the aio request */
|
||||
ret = os_aio(
|
||||
type,
|
||||
is_log,
|
||||
mode | wake_later,
|
||||
node->name,
|
||||
node->handle,
|
||||
buf,
|
||||
offset,
|
||||
len,
|
||||
zip_size ? zip_size : UNIV_PAGE_SIZE,
|
||||
node,
|
||||
message,
|
||||
write_size);
|
||||
|
|
|
@ -311,10 +311,10 @@ The wrapper functions have the prefix of "innodb_". */
|
|||
# define os_file_close(file) \
|
||||
pfs_os_file_close_func(file, __FILE__, __LINE__)
|
||||
|
||||
# define os_aio(type, mode, name, file, buf, offset, \
|
||||
n, message1, message2, write_size) \
|
||||
pfs_os_aio_func(type, mode, name, file, buf, offset, \
|
||||
n, message1, message2, write_size, \
|
||||
# define os_aio(type, is_log, mode, name, file, buf, offset, \
|
||||
n, page_size, message1, message2, write_size) \
|
||||
pfs_os_aio_func(type, is_log, mode, name, file, buf, offset, \
|
||||
n, page_size, message1, message2, write_size, \
|
||||
__FILE__, __LINE__)
|
||||
|
||||
|
||||
|
@ -357,10 +357,10 @@ to original un-instrumented file I/O APIs */
|
|||
|
||||
# define os_file_close(file) os_file_close_func(file)
|
||||
|
||||
# define os_aio(type, mode, name, file, buf, offset, n, message1, \
|
||||
# define os_aio(type, is_log, mode, name, file, buf, offset, n, page_size, message1, \
|
||||
message2, write_size) \
|
||||
os_aio_func(type, mode, name, file, buf, offset, n, \
|
||||
message1, message2, write_size)
|
||||
os_aio_func(type, is_log, mode, name, file, buf, offset, n, \
|
||||
page_size, message1, message2, write_size)
|
||||
|
||||
# define os_file_read(file, buf, offset, n) \
|
||||
os_file_read_func(file, buf, offset, n)
|
||||
|
@ -749,6 +749,7 @@ ibool
|
|||
pfs_os_aio_func(
|
||||
/*============*/
|
||||
ulint type, /*!< in: OS_FILE_READ or OS_FILE_WRITE */
|
||||
ulint is_log, /*!< in: 1 is OS_FILE_LOG or 0 */
|
||||
ulint mode, /*!< in: OS_AIO_NORMAL etc. I/O mode */
|
||||
const char* name, /*!< in: name of the file or path as a
|
||||
null-terminated string */
|
||||
|
@ -757,6 +758,7 @@ pfs_os_aio_func(
|
|||
to write */
|
||||
os_offset_t offset, /*!< in: file offset where to read or write */
|
||||
ulint n, /*!< in: number of bytes to read or write */
|
||||
ulint page_size, /*!< in: page size in bytes */
|
||||
fil_node_t* message1,/*!< in: message for the aio handler
|
||||
(can be used to identify a completed
|
||||
aio operation); ignored if mode is
|
||||
|
@ -1107,6 +1109,7 @@ ibool
|
|||
os_aio_func(
|
||||
/*========*/
|
||||
ulint type, /*!< in: OS_FILE_READ or OS_FILE_WRITE */
|
||||
ulint is_log, /*!< in: 1 is OS_FILE_LOG or 0 */
|
||||
ulint mode, /*!< in: OS_AIO_NORMAL, ..., possibly ORed
|
||||
to OS_AIO_SIMULATED_WAKE_LATER: the
|
||||
last flag advises this function not to wake
|
||||
|
@ -1127,6 +1130,7 @@ os_aio_func(
|
|||
to write */
|
||||
os_offset_t offset, /*!< in: file offset where to read or write */
|
||||
ulint n, /*!< in: number of bytes to read or write */
|
||||
ulint page_size, /*!< in: page size in bytes */
|
||||
fil_node_t* message1,/*!< in: message for the aio handler
|
||||
(can be used to identify a completed
|
||||
aio operation); ignored if mode is
|
||||
|
|
|
@ -199,6 +199,7 @@ ibool
|
|||
pfs_os_aio_func(
|
||||
/*============*/
|
||||
ulint type, /*!< in: OS_FILE_READ or OS_FILE_WRITE */
|
||||
ulint is_log, /*!< in: 1 is OS_FILE_LOG or 0 */
|
||||
ulint mode, /*!< in: OS_AIO_NORMAL etc. I/O mode */
|
||||
const char* name, /*!< in: name of the file or path as a
|
||||
null-terminated string */
|
||||
|
@ -207,6 +208,7 @@ pfs_os_aio_func(
|
|||
to write */
|
||||
os_offset_t offset, /*!< in: file offset where to read or write */
|
||||
ulint n, /*!< in: number of bytes to read or write */
|
||||
ulint page_size, /*!< in: page size in bytes */
|
||||
fil_node_t* message1,/*!< in: message for the aio handler
|
||||
(can be used to identify a completed
|
||||
aio operation); ignored if mode is
|
||||
|
@ -234,8 +236,8 @@ pfs_os_aio_func(
|
|||
: PSI_FILE_READ,
|
||||
src_file, src_line);
|
||||
|
||||
result = os_aio_func(type, mode, name, file, buf, offset,
|
||||
n, message1, message2, write_size);
|
||||
result = os_aio_func(type, is_log, mode, name, file, buf, offset,
|
||||
n, page_size, message1, message2, write_size);
|
||||
|
||||
register_pfs_file_io_end(locker, n);
|
||||
|
||||
|
|
|
@ -49,9 +49,8 @@ Created 10/21/1995 Heikki Tuuri
|
|||
#include "buf0buf.h"
|
||||
#include "srv0mon.h"
|
||||
#include "srv0srv.h"
|
||||
#ifdef HAVE_POSIX_FALLOCATE
|
||||
#ifdef HAVE_LINUX_UNISTD_H
|
||||
#include "unistd.h"
|
||||
#include "fcntl.h"
|
||||
#endif
|
||||
#ifndef UNIV_HOTBACKUP
|
||||
# include "os0sync.h"
|
||||
|
@ -84,14 +83,10 @@ Created 10/21/1995 Heikki Tuuri
|
|||
#include <linux/falloc.h>
|
||||
#endif
|
||||
|
||||
#if defined(HAVE_FALLOCATE)
|
||||
#ifndef FALLOC_FL_KEEP_SIZE
|
||||
#define FALLOC_FL_KEEP_SIZE 0x01
|
||||
#endif
|
||||
#ifndef FALLOC_FL_PUNCH_HOLE
|
||||
#define FALLOC_FL_PUNCH_HOLE 0x02
|
||||
#endif
|
||||
#endif
|
||||
#ifdef HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE
|
||||
# include <fcntl.h>
|
||||
# include <linux/falloc.h>
|
||||
#endif /* HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE */
|
||||
|
||||
#ifdef HAVE_LZO
|
||||
#include "lzo/lzo1x.h"
|
||||
|
@ -209,6 +204,9 @@ struct os_aio_slot_t{
|
|||
write */
|
||||
byte* buf; /*!< buffer used in i/o */
|
||||
ulint type; /*!< OS_FILE_READ or OS_FILE_WRITE */
|
||||
ulint is_log; /*!< 1 if OS_FILE_LOG or 0 */
|
||||
ulint page_size; /*!< UNIV_PAGE_SIZE or zip_size */
|
||||
|
||||
os_offset_t offset; /*!< file offset in bytes */
|
||||
os_file_t file; /*!< file where to read or write */
|
||||
const char* name; /*!< file name or path */
|
||||
|
@ -4474,6 +4472,7 @@ os_aio_slot_t*
|
|||
os_aio_array_reserve_slot(
|
||||
/*======================*/
|
||||
ulint type, /*!< in: OS_FILE_READ or OS_FILE_WRITE */
|
||||
ulint is_log, /*!< in: 1 is OS_FILE_LOG or 0 */
|
||||
os_aio_array_t* array, /*!< in: aio array */
|
||||
fil_node_t* message1,/*!< in: message to be passed along with
|
||||
the aio operation */
|
||||
|
@ -4486,6 +4485,7 @@ os_aio_array_reserve_slot(
|
|||
to write */
|
||||
os_offset_t offset, /*!< in: file offset */
|
||||
ulint len, /*!< in: length of the block to read or write */
|
||||
ulint page_size, /*!< in: page size in bytes */
|
||||
ulint* write_size)/*!< in/out: Actual write size initialized
|
||||
after fist successfull trim
|
||||
operation for this page and if
|
||||
|
@ -4580,6 +4580,8 @@ found:
|
|||
slot->offset = offset;
|
||||
slot->io_already_done = FALSE;
|
||||
slot->write_size = write_size;
|
||||
slot->is_log = is_log;
|
||||
slot->page_size = page_size;
|
||||
|
||||
if (message1) {
|
||||
slot->file_block_size = fil_node_get_block_size(message1);
|
||||
|
@ -4836,6 +4838,7 @@ ibool
|
|||
os_aio_func(
|
||||
/*========*/
|
||||
ulint type, /*!< in: OS_FILE_READ or OS_FILE_WRITE */
|
||||
ulint is_log, /*!< in: 1 is OS_FILE_LOG or 0 */
|
||||
ulint mode, /*!< in: OS_AIO_NORMAL, ..., possibly ORed
|
||||
to OS_AIO_SIMULATED_WAKE_LATER: the
|
||||
last flag advises this function not to wake
|
||||
|
@ -4856,6 +4859,7 @@ os_aio_func(
|
|||
to write */
|
||||
os_offset_t offset, /*!< in: file offset where to read or write */
|
||||
ulint n, /*!< in: number of bytes to read or write */
|
||||
ulint page_size, /*!< in: page size in bytes */
|
||||
fil_node_t* message1,/*!< in: message for the aio handler
|
||||
(can be used to identify a completed
|
||||
aio operation); ignored if mode is
|
||||
|
@ -4982,8 +4986,8 @@ try_again:
|
|||
array = NULL; /* Eliminate compiler warning */
|
||||
}
|
||||
|
||||
slot = os_aio_array_reserve_slot(type, array, message1, message2, file,
|
||||
name, buf, offset, n, write_size);
|
||||
slot = os_aio_array_reserve_slot(type, is_log, array, message1, message2, file,
|
||||
name, buf, offset, n, page_size, write_size);
|
||||
|
||||
if (type == OS_FILE_READ) {
|
||||
if (srv_use_native_aio) {
|
||||
|
@ -5251,7 +5255,10 @@ os_aio_windows_handle(
|
|||
ret_val = ret && len == slot->len;
|
||||
}
|
||||
|
||||
if (slot->type == OS_FILE_WRITE && srv_use_trim && os_fallocate_failed == FALSE) {
|
||||
if (slot->type == OS_FILE_WRITE &&
|
||||
!slot->is_log &&
|
||||
srv_use_trim &&
|
||||
os_fallocate_failed == FALSE) {
|
||||
// Deallocate unused blocks from file system
|
||||
os_file_trim(slot);
|
||||
}
|
||||
|
@ -5345,7 +5352,10 @@ retry:
|
|||
/* We have not overstepped to next segment. */
|
||||
ut_a(slot->pos < end_pos);
|
||||
|
||||
if (slot->type == OS_FILE_WRITE && srv_use_trim && os_fallocate_failed == FALSE) {
|
||||
if (slot->type == OS_FILE_WRITE &&
|
||||
!slot->is_log &&
|
||||
srv_use_trim &&
|
||||
os_fallocate_failed == FALSE) {
|
||||
// Deallocate unused blocks from file system
|
||||
os_file_trim(slot);
|
||||
}
|
||||
|
@ -6220,19 +6230,13 @@ os_file_trim(
|
|||
{
|
||||
|
||||
size_t len = slot->len;
|
||||
size_t trim_len = UNIV_PAGE_SIZE - len;
|
||||
size_t trim_len = slot->page_size - len;
|
||||
os_offset_t off = slot->offset + len;
|
||||
size_t bsize = slot->file_block_size;
|
||||
|
||||
// len here should be alligned to sector size
|
||||
ut_ad((trim_len % bsize) == 0);
|
||||
ut_ad((len % bsize) == 0);
|
||||
ut_ad(bsize != 0);
|
||||
ut_ad((off % bsize) == 0);
|
||||
|
||||
#ifdef UNIV_TRIM_DEBUG
|
||||
fprintf(stderr, "Note: TRIM: write_size %lu trim_len %lu len %lu off %lu bz %lu\n",
|
||||
*slot->write_size, trim_len, len, off, bsize);
|
||||
slot->write_size ? *slot->write_size : 0, trim_len, len, off, bsize);
|
||||
#endif
|
||||
|
||||
// Nothing to do if trim length is zero or if actual write
|
||||
|
@ -6247,22 +6251,19 @@ os_file_trim(
|
|||
*slot->write_size > 0 &&
|
||||
len >= *slot->write_size)) {
|
||||
|
||||
#ifdef UNIV_PAGECOMPRESS_DEBUG
|
||||
fprintf(stderr, "Note: TRIM: write_size %lu trim_len %lu len %lu\n",
|
||||
*slot->write_size, trim_len, len);
|
||||
#endif
|
||||
if (slot->write_size) {
|
||||
if (*slot->write_size > 0 && len >= *slot->write_size) {
|
||||
srv_stats.page_compressed_trim_op_saved.inc();
|
||||
}
|
||||
|
||||
if (*slot->write_size > 0 && len >= *slot->write_size) {
|
||||
srv_stats.page_compressed_trim_op_saved.inc();
|
||||
*slot->write_size = len;
|
||||
}
|
||||
|
||||
*slot->write_size = len;
|
||||
|
||||
return (TRUE);
|
||||
}
|
||||
|
||||
#ifdef __linux__
|
||||
#if defined(HAVE_FALLOCATE)
|
||||
#if defined(HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE)
|
||||
int ret = fallocate(slot->file, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, off, trim_len);
|
||||
|
||||
if (ret) {
|
||||
|
@ -6300,7 +6301,7 @@ os_file_trim(
|
|||
*slot->write_size = 0;
|
||||
}
|
||||
|
||||
#endif /* HAVE_FALLOCATE ... */
|
||||
#endif /* HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE ... */
|
||||
|
||||
#elif defined(_WIN32)
|
||||
FILE_LEVEL_TRIM flt;
|
||||
|
|
|
@ -5253,9 +5253,9 @@ retry:
|
|||
success = os_file_write(node->name, node->handle, buf,
|
||||
offset, page_size * n_pages);
|
||||
#else
|
||||
success = os_aio(OS_FILE_WRITE, OS_AIO_SYNC,
|
||||
success = os_aio(OS_FILE_WRITE, 0, OS_AIO_SYNC,
|
||||
node->name, node->handle, buf,
|
||||
offset, page_size * n_pages,
|
||||
offset, page_size * n_pages, page_size,
|
||||
node, NULL, space_id, NULL, 0);
|
||||
#endif /* UNIV_HOTBACKUP */
|
||||
|
||||
|
@ -5918,12 +5918,14 @@ _fil_io(
|
|||
/* Queue the aio request */
|
||||
ret = os_aio(
|
||||
type,
|
||||
is_log,
|
||||
mode | wake_later,
|
||||
node->name,
|
||||
node->handle,
|
||||
buf,
|
||||
offset,
|
||||
len,
|
||||
zip_size ? zip_size : UNIV_PAGE_SIZE,
|
||||
node,
|
||||
message,
|
||||
space_id,
|
||||
|
|
|
@ -321,11 +321,11 @@ The wrapper functions have the prefix of "innodb_". */
|
|||
# define os_file_close(file) \
|
||||
pfs_os_file_close_func(file, __FILE__, __LINE__)
|
||||
|
||||
# define os_aio(type, mode, name, file, buf, offset, \
|
||||
n, message1, message2, space_id, \
|
||||
# define os_aio(type, is_log, mode, name, file, buf, offset, \
|
||||
n, page_size, message1, message2, space_id, \
|
||||
trx, write_size) \
|
||||
pfs_os_aio_func(type, mode, name, file, buf, offset, \
|
||||
n, message1, message2, space_id, trx, write_size, \
|
||||
pfs_os_aio_func(type, is_log, mode, name, file, buf, offset, \
|
||||
n, page_size, message1, message2, space_id, trx, write_size, \
|
||||
__FILE__, __LINE__)
|
||||
|
||||
# define os_file_read(file, buf, offset, n) \
|
||||
|
@ -372,10 +372,10 @@ to original un-instrumented file I/O APIs */
|
|||
|
||||
# define os_file_close(file) os_file_close_func(file)
|
||||
|
||||
# define os_aio(type, mode, name, file, buf, offset, n, message1, \
|
||||
# define os_aio(type, is_log, mode, name, file, buf, offset, n, page_size, message1, \
|
||||
message2, space_id, trx, write_size) \
|
||||
os_aio_func(type, mode, name, file, buf, offset, n, \
|
||||
message1, message2, space_id, trx, write_size)
|
||||
os_aio_func(type, is_log, mode, name, file, buf, offset, n, \
|
||||
page_size, message1, message2, space_id, trx, write_size)
|
||||
|
||||
# define os_file_read(file, buf, offset, n) \
|
||||
os_file_read_func(file, buf, offset, n, NULL)
|
||||
|
@ -772,6 +772,7 @@ ibool
|
|||
pfs_os_aio_func(
|
||||
/*============*/
|
||||
ulint type, /*!< in: OS_FILE_READ or OS_FILE_WRITE */
|
||||
ulint is_log, /*!< in: 1 is OS_FILE_LOG or 0 */
|
||||
ulint mode, /*!< in: OS_AIO_NORMAL etc. I/O mode */
|
||||
const char* name, /*!< in: name of the file or path as a
|
||||
null-terminated string */
|
||||
|
@ -780,6 +781,7 @@ pfs_os_aio_func(
|
|||
to write */
|
||||
os_offset_t offset, /*!< in: file offset where to read or write */
|
||||
ulint n, /*!< in: number of bytes to read or write */
|
||||
ulint page_size,/*!< in: page size in bytes */
|
||||
fil_node_t* message1,/*!< in: message for the aio handler
|
||||
(can be used to identify a completed
|
||||
aio operation); ignored if mode is
|
||||
|
@ -1139,6 +1141,7 @@ ibool
|
|||
os_aio_func(
|
||||
/*========*/
|
||||
ulint type, /*!< in: OS_FILE_READ or OS_FILE_WRITE */
|
||||
ulint is_log, /*!< in: 1 is OS_FILE_LOG or 0 */
|
||||
ulint mode, /*!< in: OS_AIO_NORMAL, ..., possibly ORed
|
||||
to OS_AIO_SIMULATED_WAKE_LATER: the
|
||||
last flag advises this function not to wake
|
||||
|
@ -1159,6 +1162,7 @@ os_aio_func(
|
|||
to write */
|
||||
os_offset_t offset, /*!< in: file offset where to read or write */
|
||||
ulint n, /*!< in: number of bytes to read or write */
|
||||
ulint page_size, /*!< in: page size in bytes */
|
||||
fil_node_t* message1,/*!< in: message for the aio handler
|
||||
(can be used to identify a completed
|
||||
aio operation); ignored if mode is
|
||||
|
|
|
@ -202,6 +202,7 @@ ibool
|
|||
pfs_os_aio_func(
|
||||
/*============*/
|
||||
ulint type, /*!< in: OS_FILE_READ or OS_FILE_WRITE */
|
||||
ulint is_log, /*!< in: 1 is OS_FILE_LOG or 0 */
|
||||
ulint mode, /*!< in: OS_AIO_NORMAL etc. I/O mode */
|
||||
const char* name, /*!< in: name of the file or path as a
|
||||
null-terminated string */
|
||||
|
@ -210,6 +211,7 @@ pfs_os_aio_func(
|
|||
to write */
|
||||
os_offset_t offset, /*!< in: file offset where to read or write */
|
||||
ulint n, /*!< in: number of bytes to read or write */
|
||||
ulint page_size, /*!< in: page size in bytes */
|
||||
fil_node_t* message1,/*!< in: message for the aio handler
|
||||
(can be used to identify a completed
|
||||
aio operation); ignored if mode is
|
||||
|
@ -239,8 +241,8 @@ pfs_os_aio_func(
|
|||
: PSI_FILE_READ,
|
||||
src_file, src_line);
|
||||
|
||||
result = os_aio_func(type, mode, name, file, buf, offset,
|
||||
n, message1, message2, space_id, trx,
|
||||
result = os_aio_func(type, is_log, mode, name, file, buf, offset,
|
||||
n, page_size, message1, message2, space_id, trx,
|
||||
write_size);
|
||||
|
||||
register_pfs_file_io_end(locker, n);
|
||||
|
|
|
@ -50,9 +50,8 @@ Created 10/21/1995 Heikki Tuuri
|
|||
#include "trx0trx.h"
|
||||
#include "srv0mon.h"
|
||||
#include "srv0srv.h"
|
||||
#ifdef HAVE_POSIX_FALLOCATE
|
||||
#ifdef HAVE_LINUX_UNISTD_H
|
||||
#include "unistd.h"
|
||||
#include "fcntl.h"
|
||||
#endif
|
||||
#ifndef UNIV_HOTBACKUP
|
||||
# include "os0sync.h"
|
||||
|
@ -89,14 +88,10 @@ Created 10/21/1995 Heikki Tuuri
|
|||
#include <linux/falloc.h>
|
||||
#endif
|
||||
|
||||
#if defined(HAVE_FALLOCATE)
|
||||
#ifndef FALLOC_FL_KEEP_SIZE
|
||||
#define FALLOC_FL_KEEP_SIZE 0x01
|
||||
#endif
|
||||
#ifndef FALLOC_FL_PUNCH_HOLE
|
||||
#define FALLOC_FL_PUNCH_HOLE 0x02
|
||||
#endif
|
||||
#endif
|
||||
#ifdef HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE
|
||||
# include <fcntl.h>
|
||||
# include <linux/falloc.h>
|
||||
#endif /* HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE */
|
||||
|
||||
#ifdef HAVE_LZO
|
||||
#include "lzo/lzo1x.h"
|
||||
|
@ -221,6 +216,9 @@ struct os_aio_slot_t{
|
|||
write */
|
||||
byte* buf; /*!< buffer used in i/o */
|
||||
ulint type; /*!< OS_FILE_READ or OS_FILE_WRITE */
|
||||
ulint is_log; /*!< 1 is OS_FILE_LOG or 0 */
|
||||
ulint page_size; /*!< UNIV_PAGE_SIZE or zip_size */
|
||||
|
||||
os_offset_t offset; /*!< file offset in bytes */
|
||||
os_file_t file; /*!< file where to read or write */
|
||||
const char* name; /*!< file name or path */
|
||||
|
@ -4573,6 +4571,7 @@ os_aio_slot_t*
|
|||
os_aio_array_reserve_slot(
|
||||
/*======================*/
|
||||
ulint type, /*!< in: OS_FILE_READ or OS_FILE_WRITE */
|
||||
ulint is_log, /*!< in: 1 is OS_FILE_LOG or 0 */
|
||||
os_aio_array_t* array, /*!< in: aio array */
|
||||
fil_node_t* message1,/*!< in: message to be passed along with
|
||||
the aio operation */
|
||||
|
@ -4585,6 +4584,7 @@ os_aio_array_reserve_slot(
|
|||
to write */
|
||||
os_offset_t offset, /*!< in: file offset */
|
||||
ulint len, /*!< in: length of the block to read or write */
|
||||
ulint page_size, /*!< in: page size in bytes */
|
||||
ulint space_id,
|
||||
ulint* write_size)/*!< in/out: Actual write size initialized
|
||||
after first successfull trim
|
||||
|
@ -4681,6 +4681,8 @@ found:
|
|||
slot->offset = offset;
|
||||
slot->io_already_done = FALSE;
|
||||
slot->space_id = space_id;
|
||||
slot->is_log = is_log;
|
||||
slot->page_size = page_size;
|
||||
|
||||
if (message1) {
|
||||
slot->file_block_size = fil_node_get_block_size(message1);
|
||||
|
@ -4934,6 +4936,7 @@ ibool
|
|||
os_aio_func(
|
||||
/*========*/
|
||||
ulint type, /*!< in: OS_FILE_READ or OS_FILE_WRITE */
|
||||
ulint is_log, /*!< in: 1 is OS_FILE_LOG or 0 */
|
||||
ulint mode, /*!< in: OS_AIO_NORMAL, ..., possibly ORed
|
||||
to OS_AIO_SIMULATED_WAKE_LATER: the
|
||||
last flag advises this function not to wake
|
||||
|
@ -4954,6 +4957,7 @@ os_aio_func(
|
|||
to write */
|
||||
os_offset_t offset, /*!< in: file offset where to read or write */
|
||||
ulint n, /*!< in: number of bytes to read or write */
|
||||
ulint page_size, /*!< in: page size in bytes */
|
||||
fil_node_t* message1,/*!< in: message for the aio handler
|
||||
(can be used to identify a completed
|
||||
aio operation); ignored if mode is
|
||||
|
@ -5072,8 +5076,8 @@ try_again:
|
|||
trx->io_read += n;
|
||||
}
|
||||
|
||||
slot = os_aio_array_reserve_slot(type, array, message1, message2, file,
|
||||
name, buf, offset, n, space_id,
|
||||
slot = os_aio_array_reserve_slot(type, is_log, array, message1, message2, file,
|
||||
name, buf, offset, n, page_size, space_id,
|
||||
write_size);
|
||||
|
||||
if (type == OS_FILE_READ) {
|
||||
|
@ -5294,7 +5298,7 @@ os_aio_windows_handle(
|
|||
}
|
||||
|
||||
if (slot->type == OS_FILE_WRITE) {
|
||||
if (srv_use_trim && os_fallocate_failed == FALSE) {
|
||||
if (!slot->is_log && srv_use_trim && os_fallocate_failed == FALSE) {
|
||||
// Deallocate unused blocks from file system
|
||||
os_file_trim(slot);
|
||||
}
|
||||
|
@ -5390,7 +5394,7 @@ retry:
|
|||
ut_a(slot->pos < end_pos);
|
||||
|
||||
if (slot->type == OS_FILE_WRITE) {
|
||||
if (srv_use_trim && os_fallocate_failed == FALSE) {
|
||||
if (!slot->is_log && srv_use_trim && os_fallocate_failed == FALSE) {
|
||||
// Deallocate unused blocks from file system
|
||||
os_file_trim(slot);
|
||||
}
|
||||
|
@ -6305,19 +6309,13 @@ os_file_trim(
|
|||
os_aio_slot_t* slot) /*!< in: slot structure */
|
||||
{
|
||||
size_t len = slot->len;
|
||||
size_t trim_len = UNIV_PAGE_SIZE - slot->len;
|
||||
size_t trim_len = slot->page_size - slot->len;
|
||||
os_offset_t off __attribute__((unused)) = slot->offset + len;
|
||||
size_t bsize = slot->file_block_size;
|
||||
|
||||
// len here should be alligned to sector size
|
||||
ut_ad((trim_len % bsize) == 0);
|
||||
ut_ad((len % bsize) == 0);
|
||||
ut_ad(bsize != 0);
|
||||
ut_ad((off % bsize) == 0);
|
||||
|
||||
#ifdef UNIV_TRIM_DEBUG
|
||||
fprintf(stderr, "Note: TRIM: write_size %lu trim_len %lu len %lu off %lu bz %lu\n",
|
||||
*slot->write_size, trim_len, len, off, bsize);
|
||||
slot->write_size ? *slot->write_size : 0, trim_len, len, off, bsize);
|
||||
#endif
|
||||
|
||||
// Nothing to do if trim length is zero or if actual write
|
||||
|
@ -6332,22 +6330,19 @@ os_file_trim(
|
|||
*slot->write_size > 0 &&
|
||||
len >= *slot->write_size)) {
|
||||
|
||||
#ifdef UNIV_PAGECOMPRESS_DEBUG
|
||||
fprintf(stderr, "Note: TRIM: write_size %lu trim_len %lu len %lu\n",
|
||||
*slot->write_size, trim_len, len);
|
||||
#endif
|
||||
if (slot->write_size) {
|
||||
if (*slot->write_size > 0 && len >= *slot->write_size) {
|
||||
srv_stats.page_compressed_trim_op_saved.inc();
|
||||
}
|
||||
|
||||
if (*slot->write_size > 0 && len >= *slot->write_size) {
|
||||
srv_stats.page_compressed_trim_op_saved.inc();
|
||||
*slot->write_size = len;
|
||||
}
|
||||
|
||||
*slot->write_size = len;
|
||||
|
||||
return (TRUE);
|
||||
}
|
||||
|
||||
#ifdef __linux__
|
||||
#if defined(HAVE_FALLOCATE)
|
||||
#if defined(HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE)
|
||||
int ret = fallocate(slot->file, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, off, trim_len);
|
||||
|
||||
if (ret) {
|
||||
|
@ -6385,7 +6380,7 @@ os_file_trim(
|
|||
*slot->write_size = 0;
|
||||
}
|
||||
|
||||
#endif /* HAVE_FALLOCATE ... */
|
||||
#endif /* HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE ... */
|
||||
|
||||
#elif defined(_WIN32)
|
||||
FILE_LEVEL_TRIM flt;
|
||||
|
|
Loading…
Reference in a new issue