diff --git a/extra/mariabackup/xtrabackup.cc b/extra/mariabackup/xtrabackup.cc index 5facdefb193..b7ccdc784a2 100644 --- a/extra/mariabackup/xtrabackup.cc +++ b/extra/mariabackup/xtrabackup.cc @@ -380,8 +380,8 @@ static my_bool opt_check_privileges; extern const char *innodb_checksum_algorithm_names[]; extern TYPELIB innodb_checksum_algorithm_typelib; -extern const char *innodb_flush_method_names[]; extern TYPELIB innodb_flush_method_typelib; +extern TYPELIB innodb_doublewrite_typelib; /** Ignored option */ static ulong innodb_flush_method; @@ -1859,8 +1859,8 @@ struct my_option xb_server_options[] = &innobase_data_home_dir, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, {"innodb_doublewrite", OPT_INNODB_DOUBLEWRITE, "Enable InnoDB doublewrite buffer during --prepare.", - (G_PTR*) &srv_use_doublewrite_buf, - (G_PTR*) &srv_use_doublewrite_buf, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, + (G_PTR*) &buf_dblwr.use, (G_PTR*) &buf_dblwr.use, + &innodb_doublewrite_typelib, GET_ENUM, OPT_ARG, 0, 0, 0, 0, 0, 0}, {"innodb_io_capacity", OPT_INNODB_IO_CAPACITY, "Number of IOPs the server can do. Tunes the background IO rate", (G_PTR*) &srv_io_capacity, (G_PTR*) &srv_io_capacity, diff --git a/mysql-test/suite/innodb/r/alter_kill.result b/mysql-test/suite/innodb/r/alter_kill.result index c4469a8c322..af23efe9790 100644 --- a/mysql-test/suite/innodb/r/alter_kill.result +++ b/mysql-test/suite/innodb/r/alter_kill.result @@ -1,3 +1,7 @@ +SELECT @@innodb_doublewrite; +@@innodb_doublewrite +OFF +SET GLOBAL innodb_doublewrite=fast; # # Bug#16720368 INNODB CRASHES ON BROKEN #SQL*.IBD FILE AT STARTUP # @@ -12,7 +16,10 @@ connection default; disconnect con1; # Corrupt FIL_PAGE_TYPE in bug16720368.ibd, # and recompute innodb_checksum_algorithm=crc32 -# restart +# restart: --innodb-flush-method=O_DIRECT +SELECT @@innodb_doublewrite; +@@innodb_doublewrite +OFF SELECT COUNT(*) FROM bug16720368; ERROR HY000: Table `test`.`bug16720368` is corrupted. Please drop the table and recreate. INSERT INTO bug16720368 VALUES(1); diff --git a/mysql-test/suite/innodb/t/alter_kill-master.opt b/mysql-test/suite/innodb/t/alter_kill-master.opt index e472160c2b7..9eb72834ef6 100644 --- a/mysql-test/suite/innodb/t/alter_kill-master.opt +++ b/mysql-test/suite/innodb/t/alter_kill-master.opt @@ -1 +1 @@ ---innodb-doublewrite=false +--innodb-flush-method=O_DIRECT_NO_FSYNC --skip-innodb-doublewrite diff --git a/mysql-test/suite/innodb/t/alter_kill.test b/mysql-test/suite/innodb/t/alter_kill.test index 798f9af00db..3936b3fd9e4 100644 --- a/mysql-test/suite/innodb/t/alter_kill.test +++ b/mysql-test/suite/innodb/t/alter_kill.test @@ -7,6 +7,9 @@ let MYSQLD_DATADIR=`select @@datadir`; let PAGE_SIZE=`select @@innodb_page_size`; +SELECT @@innodb_doublewrite; +SET GLOBAL innodb_doublewrite=fast; + -- disable_query_log call mtr.add_suppression("InnoDB: innodb_force_recovery is on."); call mtr.add_suppression("InnoDB: Ignoring tablespace for.*bug16720368"); @@ -73,8 +76,11 @@ syswrite(FILE, $page, $ps)==$ps || die "Unable to write $file\n"; close(FILE) || die "Unable to close $file"; EOF +-- let $restart_parameters=--innodb-flush-method=O_DIRECT -- source include/start_mysqld.inc +-- let $restart_parameters= +SELECT @@innodb_doublewrite; --error ER_TABLE_CORRUPT SELECT COUNT(*) FROM bug16720368; --error ER_TABLE_CORRUPT diff --git a/mysql-test/suite/sys_vars/r/innodb_doublewrite_basic.result b/mysql-test/suite/sys_vars/r/innodb_doublewrite_basic.result index 4a5baf0aeda..9e93d943c9f 100644 --- a/mysql-test/suite/sys_vars/r/innodb_doublewrite_basic.result +++ b/mysql-test/suite/sys_vars/r/innodb_doublewrite_basic.result @@ -1,33 +1,25 @@ -'#---------------------BS_STVARS_026_01----------------------#' -SELECT COUNT(@@GLOBAL.innodb_doublewrite); -COUNT(@@GLOBAL.innodb_doublewrite) -1 -1 Expected -'#---------------------BS_STVARS_026_02----------------------#' +SELECT @@GLOBAL.innodb_doublewrite; +@@GLOBAL.innodb_doublewrite +ON +SET @@GLOBAL.innodb_doublewrite=0; +SELECT @@GLOBAL.innodb_doublewrite; +@@GLOBAL.innodb_doublewrite +OFF +SET @@GLOBAL.innodb_doublewrite=2; +SET @@GLOBAL.innodb_doublewrite=3; +ERROR 42000: Variable 'innodb_doublewrite' can't be set to the value of '3' +SELECT @@GLOBAL.innodb_doublewrite; +@@GLOBAL.innodb_doublewrite +fast SET @@GLOBAL.innodb_doublewrite=1; -ERROR HY000: Variable 'innodb_doublewrite' is a read only variable -Expected error 'Read only variable' -SELECT COUNT(@@GLOBAL.innodb_doublewrite); -COUNT(@@GLOBAL.innodb_doublewrite) -1 -1 Expected -'#---------------------BS_STVARS_026_03----------------------#' -SELECT IF(@@GLOBAL.innodb_doublewrite, "ON", "OFF") = VARIABLE_VALUE -FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES -WHERE VARIABLE_NAME='innodb_doublewrite'; -IF(@@GLOBAL.innodb_doublewrite, "ON", "OFF") = VARIABLE_VALUE -1 -1 Expected -SELECT COUNT(@@GLOBAL.innodb_doublewrite); -COUNT(@@GLOBAL.innodb_doublewrite) -1 -1 Expected +SELECT @@GLOBAL.innodb_doublewrite; +@@GLOBAL.innodb_doublewrite +ON SELECT COUNT(VARIABLE_VALUE) -FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES +FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES WHERE VARIABLE_NAME='innodb_doublewrite'; COUNT(VARIABLE_VALUE) 1 -1 Expected '#---------------------BS_STVARS_026_04----------------------#' SELECT @@innodb_doublewrite = @@GLOBAL.innodb_doublewrite; @@innodb_doublewrite = @@GLOBAL.innodb_doublewrite @@ -48,6 +40,5 @@ SELECT COUNT(@@GLOBAL.innodb_doublewrite); COUNT(@@GLOBAL.innodb_doublewrite) 1 1 Expected -SELECT innodb_doublewrite = @@SESSION.innodb_doublewrite; -ERROR 42S22: Unknown column 'innodb_doublewrite' in 'field list' -Expected error 'Readonly variable' +SELECT @@innodb_doublewrite = @@SESSION.innodb_doublewrite; +ERROR HY000: Variable 'innodb_doublewrite' is a GLOBAL variable diff --git a/mysql-test/suite/sys_vars/r/sysvars_innodb.result b/mysql-test/suite/sys_vars/r/sysvars_innodb.result index 83e137a946d..f486271cbcc 100644 --- a/mysql-test/suite/sys_vars/r/sysvars_innodb.result +++ b/mysql-test/suite/sys_vars/r/sysvars_innodb.result @@ -503,14 +503,14 @@ VARIABLE_NAME INNODB_DOUBLEWRITE SESSION_VALUE NULL DEFAULT_VALUE ON VARIABLE_SCOPE GLOBAL -VARIABLE_TYPE BOOLEAN -VARIABLE_COMMENT Enable InnoDB doublewrite buffer (enabled by default). Disable with --skip-innodb-doublewrite. +VARIABLE_TYPE ENUM +VARIABLE_COMMENT Whether and how to use the doublewrite buffer. OFF=Assume that writes of innodb_page_size are atomic; ON=Prevent torn writes (the default); fast=Like ON, but do not synchronize writes to data files NUMERIC_MIN_VALUE NULL NUMERIC_MAX_VALUE NULL NUMERIC_BLOCK_SIZE NULL -ENUM_VALUE_LIST OFF,ON -READ_ONLY YES -COMMAND_LINE_ARGUMENT NONE +ENUM_VALUE_LIST OFF,ON,fast +READ_ONLY NO +COMMAND_LINE_ARGUMENT OPTIONAL VARIABLE_NAME INNODB_ENCRYPTION_ROTATE_KEY_AGE SESSION_VALUE NULL DEFAULT_VALUE 1 diff --git a/mysql-test/suite/sys_vars/t/innodb_doublewrite_basic.opt b/mysql-test/suite/sys_vars/t/innodb_doublewrite_basic.opt new file mode 100644 index 00000000000..2bea5a22cde --- /dev/null +++ b/mysql-test/suite/sys_vars/t/innodb_doublewrite_basic.opt @@ -0,0 +1 @@ +--innodb-doublewrite diff --git a/mysql-test/suite/sys_vars/t/innodb_doublewrite_basic.test b/mysql-test/suite/sys_vars/t/innodb_doublewrite_basic.test index 1ae10d0f7cf..4e76c0ac33d 100644 --- a/mysql-test/suite/sys_vars/t/innodb_doublewrite_basic.test +++ b/mysql-test/suite/sys_vars/t/innodb_doublewrite_basic.test @@ -1,75 +1,20 @@ - - -################## mysql-test\t\innodb_doublewrite_basic.test ################# -# # -# Variable Name: innodb_doublewrite # -# Scope: Global # -# Access Type: Static # -# Data Type: boolean # -# # -# # -# Creation Date: 2008-02-07 # -# Author : Sharique Abdullah # -# # -# # -# Description:Test Cases of Dynamic System Variable innodb_doublewrite # -# that checks the behavior of this variable in the following ways # -# * Value Check # -# * Scope Check # -# # -# Reference: http://dev.mysql.com/doc/refman/5.1/en/ # -# server-system-variables.html # -# # -############################################################################### - --source include/have_innodb.inc ---echo '#---------------------BS_STVARS_026_01----------------------#' -#################################################################### -# Displaying default value # -#################################################################### -SELECT COUNT(@@GLOBAL.innodb_doublewrite); ---echo 1 Expected +SELECT @@GLOBAL.innodb_doublewrite; +SET @@GLOBAL.innodb_doublewrite=0; +SELECT @@GLOBAL.innodb_doublewrite; ---echo '#---------------------BS_STVARS_026_02----------------------#' -#################################################################### -# Check if Value can set # -#################################################################### - ---error ER_INCORRECT_GLOBAL_LOCAL_VAR +SET @@GLOBAL.innodb_doublewrite=2; +--error ER_WRONG_VALUE_FOR_VAR +SET @@GLOBAL.innodb_doublewrite=3; +SELECT @@GLOBAL.innodb_doublewrite; SET @@GLOBAL.innodb_doublewrite=1; ---echo Expected error 'Read only variable' +SELECT @@GLOBAL.innodb_doublewrite; -SELECT COUNT(@@GLOBAL.innodb_doublewrite); ---echo 1 Expected - - - - ---echo '#---------------------BS_STVARS_026_03----------------------#' -################################################################# -# Check if the value in GLOBAL Table matches value in variable # -################################################################# - ---disable_warnings -SELECT IF(@@GLOBAL.innodb_doublewrite, "ON", "OFF") = VARIABLE_VALUE +SELECT COUNT(VARIABLE_VALUE) FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES WHERE VARIABLE_NAME='innodb_doublewrite'; ---enable_warnings ---echo 1 Expected - -SELECT COUNT(@@GLOBAL.innodb_doublewrite); ---echo 1 Expected - ---disable_warnings -SELECT COUNT(VARIABLE_VALUE) -FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES -WHERE VARIABLE_NAME='innodb_doublewrite'; ---enable_warnings ---echo 1 Expected - - --echo '#---------------------BS_STVARS_026_04----------------------#' ################################################################################ @@ -78,8 +23,6 @@ WHERE VARIABLE_NAME='innodb_doublewrite'; SELECT @@innodb_doublewrite = @@GLOBAL.innodb_doublewrite; --echo 1 Expected - - --echo '#---------------------BS_STVARS_026_05----------------------#' ################################################################################ # Check if innodb_doublewrite can be accessed with and without @@ sign # @@ -99,8 +42,5 @@ SELECT COUNT(@@SESSION.innodb_doublewrite); SELECT COUNT(@@GLOBAL.innodb_doublewrite); --echo 1 Expected ---Error ER_BAD_FIELD_ERROR -SELECT innodb_doublewrite = @@SESSION.innodb_doublewrite; ---echo Expected error 'Readonly variable' - - +--error ER_INCORRECT_GLOBAL_LOCAL_VAR +SELECT @@innodb_doublewrite = @@SESSION.innodb_doublewrite; diff --git a/storage/innobase/buf/buf0dblwr.cc b/storage/innobase/buf/buf0dblwr.cc index e2702adc880..ec64d8d46ff 100644 --- a/storage/innobase/buf/buf0dblwr.cc +++ b/storage/innobase/buf/buf0dblwr.cc @@ -53,6 +53,7 @@ void buf_dblwr_t::init() active_slot= &slots[0]; mysql_mutex_init(buf_dblwr_mutex_key, &mutex, nullptr); pthread_cond_init(&cond, nullptr); + block_size= FSP_EXTENT_SIZE; } } @@ -67,7 +68,7 @@ inline void buf_dblwr_t::init(const byte *header) block1= page_id_t(0, mach_read_from_4(header + TRX_SYS_DOUBLEWRITE_BLOCK1)); block2= page_id_t(0, mach_read_from_4(header + TRX_SYS_DOUBLEWRITE_BLOCK2)); - const uint32_t buf_size= 2 * block_size(); + const uint32_t buf_size= 2 * block_size; for (int i= 0; i < 2; i++) { slots[i].write_buf= static_cast @@ -86,7 +87,7 @@ bool buf_dblwr_t::create() return true; mtr_t mtr; - const ulint size= block_size(); + const ulint size= block_size; start_again: mtr.start(); @@ -251,7 +252,7 @@ loads the pages from double write buffer into memory. dberr_t buf_dblwr_t::init_or_load_pages(pfs_os_file_t file, const char *path) { ut_ad(this == &buf_dblwr); - const uint32_t size= block_size(); + const uint32_t size= block_size; /* We do the file i/o past the buffer pool */ byte *read_buf= static_cast(aligned_malloc(srv_page_size, @@ -488,7 +489,6 @@ void buf_dblwr_t::write_completed() mysql_mutex_lock(&mutex); ut_ad(is_created()); - ut_ad(srv_use_doublewrite_buf); ut_ad(batch_running); slot *flush_slot= active_slot == &slots[0] ? &slots[1] : &slots[0]; ut_ad(flush_slot->reserved); @@ -574,7 +574,7 @@ static void buf_dblwr_check_block(const buf_page_t *bpage) bool buf_dblwr_t::flush_buffered_writes(const ulint size) { mysql_mutex_assert_owner(&mutex); - ut_ad(size == block_size()); + ut_ad(size == block_size); for (;;) { @@ -647,7 +647,6 @@ static void *get_frame(const IORequest &request) void buf_dblwr_t::flush_buffered_writes_completed(const IORequest &request) { ut_ad(this == &buf_dblwr); - ut_ad(srv_use_doublewrite_buf); ut_ad(is_created()); ut_ad(!srv_read_only_mode); ut_ad(!request.bpage); @@ -670,8 +669,14 @@ void buf_dblwr_t::flush_buffered_writes_completed(const IORequest &request) pages_written+= flush_slot->first_free; mysql_mutex_unlock(&mutex); - /* Now flush the doublewrite buffer data to disk */ - fil_system.sys_space->flush(); + /* Make the doublewrite durable. Note: The doublewrite buffer is + always in the first file of the system tablespace. We will not + bother about fil_system.unflushed_spaces, which can result in a + redundant call during fil_flush_file_spaces() in + log_checkpoint(). Writes to the system tablespace should be rare, + except when executing DDL or using the non-default settings + innodb_file_per_table=OFF or innodb_undo_tablespaces=0. */ + os_file_flush(request.node->handle); /* The writes have been flushed to disk now and in recovery we will find them in the doublewrite buffer blocks. Next, write the data pages. */ @@ -714,17 +719,18 @@ posted, and also when we may have to wait for a page latch! Otherwise a deadlock of threads can occur. */ void buf_dblwr_t::flush_buffered_writes() { - if (!is_created() || !srv_use_doublewrite_buf) + mysql_mutex_lock(&mutex); + + if (!in_use() && active_slot->first_free == 0) { + mysql_mutex_unlock(&mutex); fil_flush_file_spaces(); return; } ut_ad(!srv_read_only_mode); - const ulint size= block_size(); - mysql_mutex_lock(&mutex); - if (!flush_buffered_writes(size)) + if (!flush_buffered_writes(block_size)) mysql_mutex_unlock(&mutex); } @@ -734,8 +740,6 @@ flush_buffered_writes() will be invoked to make space. @param size payload size in bytes */ void buf_dblwr_t::add_to_batch(const IORequest &request, size_t size) { - ut_ad(request.is_async()); - ut_ad(request.is_write()); ut_ad(request.bpage); ut_ad(request.bpage->in_file()); ut_ad(request.node); @@ -744,7 +748,7 @@ void buf_dblwr_t::add_to_batch(const IORequest &request, size_t size) ut_ad(request.node->space->referenced()); ut_ad(!srv_read_only_mode); - const ulint buf_size= 2 * block_size(); + const ulint buf_size= 2 * block_size; mysql_mutex_lock(&mutex); @@ -773,7 +777,7 @@ void buf_dblwr_t::add_to_batch(const IORequest &request, size_t size) ut_ad(active_slot->reserved == active_slot->first_free); ut_ad(active_slot->reserved < buf_size); new (active_slot->buf_block_arr + active_slot->first_free++) - element{request, size}; + element{request.doublewritten(), size}; active_slot->reserved= active_slot->first_free; if (active_slot->first_free != buf_size || diff --git a/storage/innobase/buf/buf0flu.cc b/storage/innobase/buf/buf0flu.cc index 4e54c7055ca..ed1ef22bb4e 100644 --- a/storage/innobase/buf/buf0flu.cc +++ b/storage/innobase/buf/buf0flu.cc @@ -350,9 +350,9 @@ void buf_page_write_complete(const IORequest &request, bool error) else { bpage->write_complete(persistent, error, state); - if (state < buf_page_t::WRITE_FIX_REINIT && - request.node->space->use_doublewrite()) + if (request.is_doublewritten()) { + ut_ad(state < buf_page_t::WRITE_FIX_REINIT); ut_ad(persistent); buf_dblwr.write_completed(); } diff --git a/storage/innobase/fil/fil0fil.cc b/storage/innobase/fil/fil0fil.cc index ac491a958d6..0acc04f25d0 100644 --- a/storage/innobase/fil/fil0fil.cc +++ b/storage/innobase/fil/fil0fil.cc @@ -1231,9 +1231,6 @@ void fil_system_t::create(ulint hash_size) ut_ad(!is_initialised()); ut_ad(!(srv_page_size % FSP_EXTENT_SIZE)); ut_ad(srv_page_size); - ut_ad(!spaces.array); - - m_initialised = true; compile_time_assert(!(UNIV_PAGE_SIZE_MAX % FSP_EXTENT_SIZE_MAX)); compile_time_assert(!(UNIV_PAGE_SIZE_MIN % FSP_EXTENT_SIZE_MIN)); @@ -1244,6 +1241,8 @@ void fil_system_t::create(ulint hash_size) spaces.create(hash_size); + need_unflushed_spaces = !write_through && buf_dblwr.need_fsync(); + fil_space_crypt_init(); #ifdef __linux__ ssd.clear(); @@ -1317,13 +1316,12 @@ void fil_system_t::close() if (is_initialised()) { - m_initialised= false; spaces.free(); mysql_mutex_destroy(&mutex); fil_space_crypt_cleanup(); } - ut_ad(!spaces.array); + ut_ad(!is_initialised()); #ifdef __linux__ ssd.clear(); @@ -1464,6 +1462,7 @@ void fil_system_t::set_write_through(bool write_through) { this->write_through= write_through; fil_space_t::reopen_all(); + need_unflushed_spaces = !write_through && buf_dblwr.need_fsync(); } mysql_mutex_unlock(&mutex); @@ -2833,19 +2832,18 @@ static void fil_invalid_page_access_msg(const char *name, } /** Update the data structures on write completion */ -inline void fil_node_t::complete_write() +void fil_space_t::complete_write() { mysql_mutex_assert_not_owner(&fil_system.mutex); - if (space->purpose != FIL_TYPE_TEMPORARY && - (!fil_system.is_write_through() && !my_disable_sync) && - space->set_needs_flush()) + if (purpose != FIL_TYPE_TEMPORARY && + fil_system.use_unflushed_spaces() && set_needs_flush()) { mysql_mutex_lock(&fil_system.mutex); - if (!space->is_in_unflushed_spaces) + if (!is_in_unflushed_spaces) { - space->is_in_unflushed_spaces= true; - fil_system.unflushed_spaces.push_front(*space); + is_in_unflushed_spaces= true; + fil_system.unflushed_spaces.push_front(*this); } mysql_mutex_unlock(&fil_system.mutex); } @@ -2945,7 +2943,7 @@ io_error: if (!type.is_async()) { if (type.is_write()) { release_sync_write: - node->complete_write(); + complete_write(); release: release(); goto func_exit; @@ -2965,21 +2963,28 @@ void IORequest::write_complete(int io_error) const { ut_ad(fil_validate_skip()); ut_ad(node); + fil_space_t *space= node->space; ut_ad(is_write()); - node->complete_write(); if (!bpage) { ut_ad(!srv_read_only_mode); if (type == IORequest::DBLWR_BATCH) + { buf_dblwr.flush_buffered_writes_completed(*this); + /* Above, we already invoked os_file_flush() on the + doublewrite buffer if needed. */ + goto func_exit; + } else ut_ad(type == IORequest::WRITE_ASYNC); } else buf_page_write_complete(*this, io_error); - node->space->release(); + space->complete_write(); + func_exit: + space->release(); } void IORequest::read_complete(int io_error) const diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index 21c33ddd337..85fb7d765f9 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -352,7 +352,7 @@ static TYPELIB innodb_default_row_format_typelib = { }; /** Names of allowed values of innodb_flush_method */ -const char* innodb_flush_method_names[] = { +static const char* innodb_flush_method_names[] = { "fsync", "O_DSYNC", "littlesync", @@ -380,6 +380,18 @@ TYPELIB innodb_flush_method_typelib = { /** Deprecated parameter */ static ulong innodb_flush_method; +/** Names of allowed values of innodb_doublewrite */ +static const char *innodb_doublewrite_names[]= + {"OFF", "ON", "fast", nullptr}; + +/** Enumeration of innodb_doublewrite */ +TYPELIB innodb_doublewrite_typelib= { + array_elements(innodb_doublewrite_names) - 1, + "innodb_doublewrite_typelib", + innodb_doublewrite_names, + nullptr +}; + /** Names of allowed values of innodb_deadlock_report */ static const char *innodb_deadlock_report_names[]= { "off", /* Do not report any details of deadlocks */ @@ -3982,6 +3994,10 @@ static int innodb_init_params() } else if (innodb_flush_method >= 4 /* O_DIRECT */ IF_WIN(&& innodb_flush_method < 8 /* normal */,)) { /* O_DIRECT and similar settings do nothing */ + if (innodb_flush_method == 5 /* O_DIRECT_NO_FSYNC */ + && buf_dblwr.use) { + buf_dblwr.use = buf_dblwr.USE_FAST; + } #ifdef O_DIRECT } else if (srv_use_atomic_writes && my_may_have_atomic_write) { /* If atomic writes are enabled, do the same as with @@ -18442,6 +18458,12 @@ static void innodb_data_file_write_through_update(THD *, st_mysql_sys_var*, mysql_mutex_lock(&LOCK_global_system_variables); } +static void innodb_doublewrite_update(THD *, st_mysql_sys_var*, + void *, const void *save) +{ + fil_system.set_use_doublewrite(*static_cast(save)); +} + static void innodb_log_file_size_update(THD *thd, st_mysql_sys_var*, void *var, const void *save) { @@ -18775,11 +18797,14 @@ static MYSQL_SYSVAR_STR(data_home_dir, innobase_data_home_dir, "The common part for InnoDB table spaces.", NULL, NULL, NULL); -static MYSQL_SYSVAR_BOOL(doublewrite, srv_use_doublewrite_buf, - PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY, - "Enable InnoDB doublewrite buffer (enabled by default)." - " Disable with --skip-innodb-doublewrite.", - NULL, NULL, TRUE); +static MYSQL_SYSVAR_ENUM(doublewrite, buf_dblwr.use, + PLUGIN_VAR_OPCMDARG, + "Whether and how to use the doublewrite buffer. " + "OFF=Assume that writes of innodb_page_size are atomic; " + "ON=Prevent torn writes (the default); " + "fast=Like ON, but do not synchronize writes to data files", + nullptr, innodb_doublewrite_update, true, + &innodb_doublewrite_typelib); static MYSQL_SYSVAR_BOOL(use_atomic_writes, srv_use_atomic_writes, PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY, diff --git a/storage/innobase/include/buf0dblwr.h b/storage/innobase/include/buf0dblwr.h index 6e7662d9b81..f912775de59 100644 --- a/storage/innobase/include/buf0dblwr.h +++ b/storage/innobase/include/buf0dblwr.h @@ -53,9 +53,9 @@ class buf_dblwr_t element* buf_block_arr; }; - /** the page number of the first doublewrite block (block_size() pages) */ + /** the page number of the first doublewrite block (block_size pages) */ page_id_t block1{0, 0}; - /** the page number of the second doublewrite block (block_size() pages) */ + /** the page number of the second doublewrite block (block_size pages) */ page_id_t block2{0, 0}; /** mutex protecting the data members below */ @@ -74,6 +74,22 @@ class buf_dblwr_t slot slots[2]; slot *active_slot; + /** Size of the doublewrite block in pages */ + uint32_t block_size; + +public: + /** Values of use */ + enum usage { + /** Assume that writes are atomic */ + USE_NO= 0, + /** Use the doublewrite buffer with full durability */ + USE_YES, + /** Durable writes to the doublewrite buffer, not to data files */ + USE_FAST + }; + /** The value of innodb_doublewrite */ + ulong use; +private: /** Initialise the persistent storage of the doublewrite buffer. @param header doublewrite page header in the TRX_SYS page */ inline void init(const byte *header); @@ -126,9 +142,6 @@ public: @param request the completed batch write request */ void flush_buffered_writes_completed(const IORequest &request); - /** Size of the doublewrite block in pages */ - uint32_t block_size() const { return FSP_EXTENT_SIZE; } - /** Schedule a page write. If the doublewrite memory buffer is full, flush_buffered_writes() will be invoked to make space. @param request asynchronous write request @@ -139,6 +152,19 @@ public: bool is_created() const { return UNIV_LIKELY(block1 != page_id_t(0, 0)); } + /** @return whether the doublewrite buffer is in use */ + bool in_use() const { return is_created() && use; } + /** @return whether fsync() is needed on non-doublewrite pages */ + bool need_fsync() const { return use < USE_FAST; } + + void set_use(ulong use) + { + ut_ad(use <= USE_FAST); + mysql_mutex_lock(&mutex); + this->use= use; + mysql_mutex_unlock(&mutex); + } + /** @return whether a page identifier is part of the doublewrite buffer */ bool is_inside(const page_id_t id) const { @@ -147,8 +173,8 @@ public: ut_ad(block1 < block2); if (id < block1) return false; - const uint32_t size= block_size(); - return id < block1 + size || (id >= block2 && id < block2 + size); + return id < block1 + block_size || + (id >= block2 && id < block2 + block_size); } /** Wait for flush_buffered_writes() to be fully completed */ diff --git a/storage/innobase/include/fil0fil.h b/storage/innobase/include/fil0fil.h index 1f9b329efa3..94de29f494c 100644 --- a/storage/innobase/include/fil0fil.h +++ b/storage/innobase/include/fil0fil.h @@ -48,9 +48,6 @@ struct named_spaces_tag_t; using space_list_t= ilist; -// Forward declaration -extern my_bool srv_use_doublewrite_buf; - /** Undo tablespaces starts with space_id. */ extern uint32_t srv_undo_space_id_start; /** The number of UNDO tablespaces that are open and ready to use. */ @@ -1008,6 +1005,9 @@ public: /** @return the tablespace name (databasename/tablename) */ name_type name() const; + /** Update the data structures on write completion */ + void complete_write(); + private: /** @return whether the file is usable for io() */ ATTRIBUTE_COLD bool prepare_acquired(); @@ -1080,9 +1080,6 @@ struct fil_node_t final @return detached handle or OS_FILE_CLOSED */ inline pfs_os_file_t close_to_free(bool detach_handle= false); - /** Update the data structures on write completion */ - inline void complete_write(); - private: /** Does stuff common for close() and detach() */ void prepare_to_close_or_detach(); @@ -1090,8 +1087,7 @@ private: inline bool fil_space_t::use_doublewrite() const { - return !UT_LIST_GET_FIRST(chain)->atomic_write && srv_use_doublewrite_buf && - buf_dblwr.is_created(); + return !UT_LIST_GET_FIRST(chain)->atomic_write && buf_dblwr.in_use(); } inline void fil_space_t::set_imported() @@ -1352,9 +1348,9 @@ struct fil_system_t Some members may require late initialisation, thus we just mark object as uninitialised. Real initialisation happens in create(). */ - fil_system_t() : m_initialised(false) {} + fil_system_t() {} - bool is_initialised() const { return m_initialised; } + bool is_initialised() const { return spaces.array; } /** Create the file system interface at database start. @@ -1367,8 +1363,6 @@ struct fil_system_t void close(); private: - bool m_initialised; - /** Points to the last opened space in space_list. Protected with fil_system.mutex. */ fil_space_t *space_list_last_opened= nullptr; @@ -1404,19 +1398,32 @@ public: /** Map of fil_space_t::id to fil_space_t* */ hash_table_t spaces; - /** whether each write to data files is durable (O_DSYNC) */ + /** false=invoke fsync() or fdatasync() on data files before checkpoint; + true=each write is durable (O_DSYNC) */ my_bool write_through; /** whether data files are buffered (not O_DIRECT) */ my_bool buffered; + /** whether fdatasync() is needed on data files */ + Atomic_relaxed need_unflushed_spaces; /** Try to enable or disable write-through of data files */ void set_write_through(bool write_through); + /** Update innodb_doublewrite */ + void set_use_doublewrite(ulong use) + { + buf_dblwr.set_use(use); + need_unflushed_spaces= !write_through && buf_dblwr.need_fsync(); + } + /** Try to enable or disable file system caching of data files */ void set_buffered(bool buffered); TPOOL_SUPPRESS_TSAN bool is_write_through() const { return write_through; } TPOOL_SUPPRESS_TSAN bool is_buffered() const { return buffered; } + /** @return whether to update unflushed_spaces */ + bool use_unflushed_spaces() const { return need_unflushed_spaces; } + /** tablespaces for which fil_space_t::needs_flush() holds */ sized_ilist unflushed_spaces; /** number of currently open files; protected by mutex */ diff --git a/storage/innobase/include/os0file.h b/storage/innobase/include/os0file.h index a5953dcfd51..317c1c132ee 100644 --- a/storage/innobase/include/os0file.h +++ b/storage/innobase/include/os0file.h @@ -185,10 +185,14 @@ public: WRITE_SYNC= 16, /** Asynchronous write */ WRITE_ASYNC= WRITE_SYNC | 1, + /** Asynchronous doublewritten page */ + WRITE_DBL= WRITE_ASYNC | 4, /** A doublewrite batch */ DBLWR_BATCH= WRITE_ASYNC | 8, /** Write data and punch hole for the rest */ PUNCH= WRITE_ASYNC | 16, + /** Write doublewritten data and punch hole for the rest */ + PUNCH_DBL= PUNCH | 4, /** Zero out a range of bytes in fil_space_t::io() */ PUNCH_RANGE= WRITE_SYNC | 32, }; @@ -204,6 +208,14 @@ public: bool is_read() const { return (type & READ_SYNC) != 0; } bool is_write() const { return (type & WRITE_SYNC) != 0; } bool is_async() const { return (type & (READ_SYNC ^ READ_ASYNC)) != 0; } + bool is_doublewritten() const { return (type & 4) != 0; } + + /** Create a write request for the doublewrite buffer. */ + IORequest doublewritten() const + { + ut_ad(type == WRITE_ASYNC || type == PUNCH); + return IORequest{bpage, slot, node, Type(type | 4)}; + } void write_complete(int io_error) const; void read_complete(int io_error) const; diff --git a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h index 46fa1ca95e2..3f35bb78017 100644 --- a/storage/innobase/include/srv0srv.h +++ b/storage/innobase/include/srv0srv.h @@ -303,7 +303,6 @@ extern my_bool srv_stats_include_delete_marked; extern unsigned long long srv_stats_modified_counter; extern my_bool srv_stats_sample_traditional; -extern my_bool srv_use_doublewrite_buf; extern ulong srv_checksum_algorithm; extern my_bool srv_force_primary_key; diff --git a/storage/innobase/srv/srv0srv.cc b/storage/innobase/srv/srv0srv.cc index 3d24b97a21a..84b065b413b 100644 --- a/storage/innobase/srv/srv0srv.cc +++ b/storage/innobase/srv/srv0srv.cc @@ -305,8 +305,6 @@ unsigned long long srv_stats_modified_counter; based on number of configured pages */ my_bool srv_stats_sample_traditional; -my_bool srv_use_doublewrite_buf; - /** innodb_sync_spin_loops */ ulong srv_n_spin_wait_rounds; /** innodb_spin_wait_delay */ diff --git a/storage/innobase/srv/srv0start.cc b/storage/innobase/srv/srv0start.cc index 3c90186374c..875dbbe8d57 100644 --- a/storage/innobase/srv/srv0start.cc +++ b/storage/innobase/srv/srv0start.cc @@ -1193,7 +1193,7 @@ dberr_t srv_start(bool create_new_db) if (srv_read_only_mode) { sql_print_information("InnoDB: Started in read only mode"); - srv_use_doublewrite_buf = false; + buf_dblwr.use = buf_dblwr.USE_NO; } high_level_read_only = srv_read_only_mode