mirror of
https://github.com/MariaDB/server.git
synced 2026-04-18 06:15:32 +02:00
MDEV-4338 - Support FusionIO/directFS atomic writes
This commit is contained in:
parent
5d4ba5589d
commit
0aa607a01a
5 changed files with 153 additions and 0 deletions
|
|
@ -4865,6 +4865,24 @@ fil_extend_space_to_desired_size(
|
|||
start_page_no = space->size;
|
||||
file_start_page_no = space->size - node->size;
|
||||
|
||||
#ifdef HAVE_POSIX_FALLOCATE
|
||||
if (srv_use_posix_fallocate) {
|
||||
offset_high = size_after_extend * page_size / (4ULL*1024*1024*1024);
|
||||
offset_low = size_after_extend * page_size % (4ULL*1024*1024*1024);
|
||||
|
||||
mutex_exit(&fil_system->mutex);
|
||||
success = os_file_set_size(node->name, node->handle,
|
||||
offset_low, offset_high);
|
||||
mutex_enter(&fil_system->mutex);
|
||||
if (success) {
|
||||
node->size += (size_after_extend - start_page_no);
|
||||
space->size += (size_after_extend - start_page_no);
|
||||
os_has_said_disk_full = FALSE;
|
||||
}
|
||||
goto complete_io;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Extend at most 64 pages at a time */
|
||||
buf_size = ut_min(64, size_after_extend - start_page_no) * page_size;
|
||||
buf2 = mem_alloc(buf_size + page_size);
|
||||
|
|
@ -4921,6 +4939,10 @@ fil_extend_space_to_desired_size(
|
|||
|
||||
mem_free(buf2);
|
||||
|
||||
#ifdef HAVE_POSIX_FALLOCATE
|
||||
complete_io:
|
||||
#endif
|
||||
|
||||
fil_node_complete_io(node, fil_system, OS_FILE_WRITE);
|
||||
|
||||
*actual_size = space->size;
|
||||
|
|
|
|||
|
|
@ -185,6 +185,8 @@ static my_bool innobase_file_format_check = TRUE;
|
|||
static my_bool innobase_log_archive = FALSE;
|
||||
static char* innobase_log_arch_dir = NULL;
|
||||
#endif /* UNIV_LOG_ARCHIVE */
|
||||
static my_bool innobase_use_atomic_writes = FALSE;
|
||||
static my_bool innobase_use_fallocate = TRUE;
|
||||
static my_bool innobase_use_doublewrite = TRUE;
|
||||
static my_bool innobase_use_checksums = TRUE;
|
||||
static my_bool innobase_fast_checksum = FALSE;
|
||||
|
|
@ -3057,6 +3059,38 @@ innobase_change_buffering_inited_ok:
|
|||
srv_kill_idle_transaction = 0;
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_POSIX_FALLOCATE
|
||||
srv_use_posix_fallocate = (ibool) innobase_use_fallocate;
|
||||
#endif
|
||||
srv_use_atomic_writes = (ibool) innobase_use_atomic_writes;
|
||||
if (innobase_use_atomic_writes) {
|
||||
fprintf(stderr, "InnoDB: using atomic writes.\n");
|
||||
|
||||
/* Force doublewrite buffer off, atomic writes replace it. */
|
||||
if (srv_use_doublewrite_buf) {
|
||||
fprintf(stderr, "InnoDB: Switching off doublewrite buffer "
|
||||
"because of atomic writes.\n");
|
||||
innobase_use_doublewrite = srv_use_doublewrite_buf = FALSE;
|
||||
}
|
||||
|
||||
/* Force O_DIRECT on Unixes (on Windows writes are always unbuffered)*/
|
||||
#ifndef _WIN32
|
||||
if(!innobase_file_flush_method ||
|
||||
!strstr(innobase_file_flush_method, "O_DIRECT")) {
|
||||
innobase_file_flush_method =
|
||||
srv_file_flush_method_str = (char*)"O_DIRECT";
|
||||
fprintf(stderr, "InnoDB: using O_DIRECT due to atomic writes.\n");
|
||||
}
|
||||
#endif
|
||||
#ifdef HAVE_POSIX_FALLOCATE
|
||||
/* Due to a bug in directFS, using atomics needs
|
||||
* posix_fallocate to extend the file
|
||||
* pwrite() past end of the file won't work
|
||||
*/
|
||||
srv_use_posix_fallocate = TRUE;
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef HAVE_PSI_INTERFACE
|
||||
/* Register keys with MySQL performance schema */
|
||||
if (PSI_server) {
|
||||
|
|
@ -12615,6 +12649,20 @@ static MYSQL_SYSVAR_BOOL(doublewrite, innobase_use_doublewrite,
|
|||
"Disable with --skip-innodb-doublewrite.",
|
||||
NULL, NULL, TRUE);
|
||||
|
||||
static MYSQL_SYSVAR_BOOL(use_atomic_writes, innobase_use_atomic_writes,
|
||||
PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
|
||||
"Prevent partial page writes, via atomic writes."
|
||||
"The option is used to prevent partial writes in case of a crash/poweroff, "
|
||||
"as faster alternative to doublewrite buffer."
|
||||
"Currently this option works only "
|
||||
"on Linux only with FusionIO device, and directFS filesystem.",
|
||||
NULL, NULL, FALSE);
|
||||
|
||||
static MYSQL_SYSVAR_BOOL(use_fallocate, innobase_use_fallocate,
|
||||
PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
|
||||
"Preallocate files fast, using operating system functionality. On POSIX systems, posix_fallocate system call is used.",
|
||||
NULL, NULL, TRUE);
|
||||
|
||||
static MYSQL_SYSVAR_ULONG(io_capacity, srv_io_capacity,
|
||||
PLUGIN_VAR_RQCMDARG,
|
||||
"Number of IOPs the server can do. Tunes the background IO rate",
|
||||
|
|
@ -13259,6 +13307,8 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
|
|||
MYSQL_SYSVAR(doublewrite_file),
|
||||
MYSQL_SYSVAR(data_home_dir),
|
||||
MYSQL_SYSVAR(doublewrite),
|
||||
MYSQL_SYSVAR(use_atomic_writes),
|
||||
MYSQL_SYSVAR(use_fallocate),
|
||||
MYSQL_SYSVAR(recovery_stats),
|
||||
MYSQL_SYSVAR(fast_shutdown),
|
||||
MYSQL_SYSVAR(file_io_threads),
|
||||
|
|
|
|||
|
|
@ -249,6 +249,11 @@ extern ulong srv_sys_stats_root_page;
|
|||
#endif
|
||||
|
||||
extern ibool srv_use_doublewrite_buf;
|
||||
extern ibool srv_use_atomic_writes;
|
||||
#ifdef HAVE_POSIX_FALLOCATE
|
||||
extern ibool srv_use_posix_fallocate;
|
||||
#endif
|
||||
|
||||
extern ibool srv_use_checksums;
|
||||
extern ibool srv_fast_checksum;
|
||||
|
||||
|
|
|
|||
|
|
@ -1454,6 +1454,43 @@ os_file_set_nocache(
|
|||
#endif
|
||||
}
|
||||
|
||||
|
||||
#ifdef __linux__
|
||||
#include <sys/ioctl.h>
|
||||
#ifndef DFS_IOCTL_ATOMIC_WRITE_SET
|
||||
#define DFS_IOCTL_ATOMIC_WRITE_SET _IOW(0x95, 2, uint)
|
||||
#endif
|
||||
static int os_file_set_atomic_writes(os_file_t file, const char *name)
|
||||
{
|
||||
static int first_time = 1;
|
||||
int atomic_option = 1;
|
||||
|
||||
int ret = ioctl (file, DFS_IOCTL_ATOMIC_WRITE_SET, &atomic_option);
|
||||
|
||||
if (ret) {
|
||||
fprintf(stderr,
|
||||
"InnoDB : can't use atomic write on %s, errno %d\n",
|
||||
name, errno);
|
||||
return ret;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
#else
|
||||
static int os_file_set_atomic_writes(os_file_t file, const char *name)
|
||||
{
|
||||
fprintf(stderr,
|
||||
"InnoDB : can't use atomic writes on %s - not implemented on this platform."
|
||||
"innodb_use_atomic_writes needs to be 0.\n",
|
||||
name);
|
||||
#ifdef _WIN32
|
||||
SetLastError(ERROR_INVALID_FUNCTION);
|
||||
#else
|
||||
errno = EINVAL;
|
||||
#endif
|
||||
return -1;
|
||||
}
|
||||
#endif
|
||||
|
||||
/****************************************************************//**
|
||||
NOTE! Use the corresponding macro os_file_create(), not directly
|
||||
this function!
|
||||
|
|
@ -1618,6 +1655,13 @@ try_again:
|
|||
}
|
||||
}
|
||||
|
||||
if (srv_use_atomic_writes && type == OS_DATA_FILE &&
|
||||
os_file_set_atomic_writes(file, name)) {
|
||||
CloseHandle(file);
|
||||
*success = FALSE;
|
||||
file = INVALID_HANDLE_VALUE;
|
||||
}
|
||||
|
||||
return(file);
|
||||
#else /* __WIN__ */
|
||||
os_file_t file;
|
||||
|
|
@ -1737,6 +1781,12 @@ try_again:
|
|||
file = -1;
|
||||
}
|
||||
#endif /* USE_FILE_LOCK */
|
||||
if (srv_use_atomic_writes && type == OS_DATA_FILE
|
||||
&& os_file_set_atomic_writes(file, name)) {
|
||||
close(file);
|
||||
*success = FALSE;
|
||||
file = -1;
|
||||
}
|
||||
|
||||
return(file);
|
||||
#endif /* __WIN__ */
|
||||
|
|
@ -2081,6 +2131,28 @@ os_file_set_size(
|
|||
current_size = 0;
|
||||
desired_size = (ib_int64_t)size + (((ib_int64_t)size_high) << 32);
|
||||
|
||||
#ifdef HAVE_POSIX_FALLOCATE
|
||||
if (srv_use_posix_fallocate) {
|
||||
if (posix_fallocate(file, current_size, desired_size) == -1) {
|
||||
fprintf(stderr,
|
||||
"InnoDB: Error: preallocating data for"
|
||||
" file %s failed at\n"
|
||||
"InnoDB: offset 0 size %lld %lld. Operating system"
|
||||
" error number %llu.\n"
|
||||
"InnoDB: Check that the disk is not full"
|
||||
" or a disk quota exceeded.\n"
|
||||
"InnoDB: Some operating system error numbers"
|
||||
" are described at\n"
|
||||
"InnoDB: "
|
||||
REFMAN "operating-system-error-codes.html\n",
|
||||
name, (long long)size_high, (long long)size, errno);
|
||||
|
||||
return (FALSE);
|
||||
}
|
||||
return (TRUE);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Write up to 1 megabyte at a time. */
|
||||
buf_size = ut_min(64, (ulint) (desired_size / UNIV_PAGE_SIZE))
|
||||
* UNIV_PAGE_SIZE;
|
||||
|
|
|
|||
|
|
@ -409,6 +409,10 @@ UNIV_INTERN ulong srv_sys_stats_root_page = 0;
|
|||
#endif
|
||||
|
||||
UNIV_INTERN ibool srv_use_doublewrite_buf = TRUE;
|
||||
UNIV_INTERN ibool srv_use_atomic_writes = FALSE;
|
||||
#ifdef HAVE_POSIX_FALLOCATE
|
||||
UNIV_INTERN ibool srv_use_posix_fallocate = TRUE;
|
||||
#endif
|
||||
UNIV_INTERN ibool srv_use_checksums = TRUE;
|
||||
UNIV_INTERN ibool srv_fast_checksum = FALSE;
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue