MDEV-15529 IMPORT TABLESPACE unnecessarily uses the doublewrite buffer

fil_space_t::atomic_write_supported: Always set this flag for
TEMPORARY TABLESPACE and during IMPORT TABLESPACE. The page
writes during these operations are by definition not crash-safe
because they are not written to the redo log.

fil_space_t::use_doublewrite(): Determine if doublewrite should
be used.

buf_dblwr_update(): Add assertions, and let the caller check whether
doublewrite buffering is desired.

buf_flush_write_block_low(): Disable the doublewrite buffer for
the temporary tablespace and for IMPORT TABLESPACE.

fil_space_set_imported(), fil_node_open_file(), fil_space_create():
Initialize or revise the space->atomic_write_supported flag.

buf_page_io_complete(), buf_flush_write_complete(): Add the parameter
dblwr, to indicate whether doublewrite was used for writes.

buf_dblwr_sync_datafiles(): Remove an unnecessary flush of
persistent tablespaces when flushing temporary tablespaces.
(Move the call to buf_dblwr_flush_buffered_writes().)
This commit is contained in:
Marko Mäkelä 2018-03-09 21:25:20 +02:00
parent 54765aaa4d
commit 112df06996
9 changed files with 75 additions and 59 deletions

View file

@ -129,6 +129,6 @@ NOT FOUND /barfoo/ in t2.ibd
# t3 yes on expecting NOT FOUND
NOT FOUND /tmpres/ in t3.ibd
# t4 yes on expecting NOT FOUND
NOT FOUND /mysql/ in t4.ibd
# MDEV-15527 FIXME: Enable this test!
DROP PROCEDURE innodb_insert_proc;
DROP TABLE t1,t2,t3,t4;

View file

@ -111,7 +111,8 @@ SELECT COUNT(*) FROM t4;
--let SEARCH_PATTERN=mysql
--echo # t4 yes on expecting NOT FOUND
-- let SEARCH_FILE=$t4_IBD
-- source include/search_pattern_in_file.inc
--echo # MDEV-15527 FIXME: Enable this test!
#-- source include/search_pattern_in_file.inc
DROP PROCEDURE innodb_insert_proc;
DROP TABLE t1,t2,t3,t4;

View file

@ -5887,9 +5887,9 @@ buf_page_check_corrupt(buf_page_t* bpage, fil_space_t* space)
}
/** Complete a read or write request of a file page to or from the buffer pool.
@param[in,out] bpage Page to complete
@param[in] evict whether or not to evict the page
from LRU list.
@param[in,out] bpage page to complete
@param[in] dblwr whether the doublewrite buffer was used (on write)
@param[in] evict whether or not to evict the page from LRU list
@return whether the operation succeeded
@retval DB_SUCCESS always when writing, or if a read page was OK
@retval DB_TABLESPACE_DELETED if the tablespace does not exist
@ -5899,7 +5899,7 @@ buf_page_check_corrupt(buf_page_t* bpage, fil_space_t* space)
not match */
UNIV_INTERN
dberr_t
buf_page_io_complete(buf_page_t* bpage, bool evict)
buf_page_io_complete(buf_page_t* bpage, bool dblwr, bool evict)
{
enum buf_io_fix io_type;
buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
@ -6132,7 +6132,7 @@ database_corrupted:
/* Write means a flush operation: call the completion
routine in the flush system */
buf_flush_write_complete(bpage);
buf_flush_write_complete(bpage, dblwr);
if (uncompressed) {
rw_lock_sx_unlock_gen(&((buf_block_t*) bpage)->lock,

View file

@ -107,9 +107,6 @@ buf_dblwr_sync_datafiles()
/* Wait that all async writes to tablespaces have been posted to
the OS */
os_aio_wait_until_no_pending_writes();
/* Now we flush the data to disk (for example, with fsync) */
fil_flush_file_spaces(FIL_TYPE_TABLESPACE);
}
/****************************************************************//**
@ -724,12 +721,9 @@ buf_dblwr_update(
const buf_page_t* bpage, /*!< in: buffer block descriptor */
buf_flush_t flush_type)/*!< in: flush type */
{
if (!srv_use_doublewrite_buf
|| buf_dblwr == NULL
|| fsp_is_system_temporary(bpage->id.space())) {
return;
}
ut_ad(srv_use_doublewrite_buf);
ut_ad(buf_dblwr);
ut_ad(!fsp_is_system_temporary(bpage->id.space()));
ut_ad(!srv_read_only_mode);
switch (flush_type) {
@ -957,6 +951,8 @@ buf_dblwr_flush_buffered_writes()
if (!srv_use_doublewrite_buf || buf_dblwr == NULL) {
/* Sync the writes to the disk. */
buf_dblwr_sync_datafiles();
/* Now we flush the data to disk (for example, with fsync) */
fil_flush_file_spaces(FIL_TYPE_TABLESPACE);
return;
}
@ -992,7 +988,6 @@ try_again:
goto try_again;
}
ut_a(!buf_dblwr->batch_running);
ut_ad(buf_dblwr->first_free == buf_dblwr->b_reserved);
/* Disallow anyone else to post to doublewrite buffer or to

View file

@ -776,12 +776,10 @@ buf_flush_relocate_on_flush_list(
buf_flush_list_mutex_exit(buf_pool);
}
/********************************************************************//**
Updates the flush system data structures when a write is completed. */
void
buf_flush_write_complete(
/*=====================*/
buf_page_t* bpage) /*!< in: pointer to the block in question */
/** Update the flush system data structures when a write is completed.
@param[in,out] bpage flushed page
@param[in] dblwr whether the doublewrite buffer was used */
void buf_flush_write_complete(buf_page_t* bpage, bool dblwr)
{
buf_flush_t flush_type;
buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
@ -804,7 +802,9 @@ buf_flush_write_complete(
os_event_set(buf_pool->no_flush[flush_type]);
}
buf_dblwr_update(bpage, flush_type);
if (dblwr) {
buf_dblwr_update(bpage, flush_type);
}
}
/** Calculate the checksum of a page from compressed table and update
@ -1076,15 +1076,9 @@ buf_flush_write_block_low(
frame = buf_page_encrypt_before_write(space, bpage, frame);
/* Disable use of double-write buffer for temporary tablespace.
Given the nature and load of temporary tablespace doublewrite buffer
adds an overhead during flushing. */
if (space->purpose == FIL_TYPE_TEMPORARY
|| space->atomic_write_supported
|| !srv_use_doublewrite_buf
|| buf_dblwr == NULL) {
ut_ad(space->purpose == FIL_TYPE_TABLESPACE
|| space->atomic_write_supported);
if (!space->use_doublewrite()) {
ulint type = IORequest::WRITE | IORequest::DO_NOT_WAKE;
IORequest request(type, bpage);
@ -1124,7 +1118,7 @@ buf_flush_write_block_low(
#endif
/* true means we want to evict this page from the
LRU list as well. */
buf_page_io_complete(bpage, true);
buf_page_io_complete(bpage, space->use_doublewrite(), true);
ut_ad(err == DB_SUCCESS);
}

View file

@ -433,10 +433,15 @@ fil_space_set_imported(
mutex_enter(&fil_system->mutex);
fil_space_t* space = fil_space_get_by_id(id);
const fil_node_t* node = UT_LIST_GET_FIRST(space->chain);
ut_ad(space->purpose == FIL_TYPE_IMPORT);
space->purpose = FIL_TYPE_TABLESPACE;
space->atomic_write_supported = node->atomic_write
&& srv_use_atomic_writes
&& my_test_if_atomic_write(node->handle,
int(page_size_t(space->flags)
.physical()));
mutex_exit(&fil_system->mutex);
}
@ -574,7 +579,7 @@ fil_node_open_file(
ut_a(node->n_pending == 0);
ut_a(!node->is_open());
read_only_mode = !fsp_is_system_temporary(space->id)
read_only_mode = space->purpose != FIL_TYPE_TEMPORARY
&& srv_read_only_mode;
const bool first_time_open = node->size == 0;
@ -582,8 +587,8 @@ fil_node_open_file(
if (first_time_open
|| (space->purpose == FIL_TYPE_TABLESPACE
&& node == UT_LIST_GET_FIRST(space->chain)
&& !undo::Truncate::was_tablespace_truncated(space->id)
&& srv_startup_is_before_trx_rollback_phase)) {
&& srv_startup_is_before_trx_rollback_phase
&& !undo::Truncate::was_tablespace_truncated(space->id))) {
/* We do not know the size of the file yet. First we
open the file in the normal mode, no async I/O here,
for simplicity. Then do some checks, and close the
@ -732,6 +737,11 @@ retry:
if (first_time_open) {
/*
For the temporary tablespace and during the
non-redo-logged adjustments in
IMPORT TABLESPACE, we do not care about
the atomicity of writes.
Atomic writes is supported if the file can be used
with atomic_writes (not log file), O_DIRECT is
used (tested in ha_innodb.cc) and the file is
@ -739,12 +749,14 @@ retry:
for the given block size
*/
space->atomic_write_supported
= srv_use_atomic_writes
&& node->atomic_write
&& my_test_if_atomic_write(
node->handle,
int(page_size_t(space->flags)
.physical()));
= space->purpose == FIL_TYPE_TEMPORARY
|| space->purpose == FIL_TYPE_IMPORT
|| (node->atomic_write
&& srv_use_atomic_writes
&& my_test_if_atomic_write(
node->handle,
int(page_size_t(space->flags)
.physical())));
}
}
@ -1552,6 +1564,13 @@ fil_space_create(
if (space->purpose == FIL_TYPE_TEMPORARY) {
ut_d(space->latch.set_temp_fsp());
/* SysTablespace::open_or_create() would pass
size!=0 to fil_node_create(), so first_time_open
would not hold in fil_node_open_file(), and we
must assign this manually. We do not care about
the durability or atomicity of writes to the
temporary tablespace files. */
space->atomic_write_supported = true;
}
HASH_INSERT(fil_space_t, hash, fil_system->spaces, id, space);
@ -5342,8 +5361,9 @@ fil_aio_wait(
mutex_enter(&fil_system->mutex);
fil_node_complete_io(node, type);
const fil_type_t purpose = node->space->purpose;
const ulint space_id = node->space->id;
const fil_type_t purpose = node->space->purpose;
const ulint space_id= node->space->id;
const bool dblwr = node->space->use_doublewrite();
mutex_exit(&fil_system->mutex);
@ -5373,7 +5393,7 @@ fil_aio_wait(
}
ulint offset = bpage->id.page_no();
dberr_t err = buf_page_io_complete(bpage);
dberr_t err = buf_page_io_complete(bpage, dblwr);
if (err == DB_SUCCESS) {
return;
}

View file

@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2013, 2017, MariaDB Corporation.
Copyright (c) 2013, 2018, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@ -1271,9 +1271,9 @@ buf_page_init_for_read(
bool unzip);
/** Complete a read or write request of a file page to or from the buffer pool.
@param[in,out] bpage Page to complete
@param[in] evict whether or not to evict the page
from LRU list.
@param[in,out] bpage page to complete
@param[in] dblwr whether the doublewrite buffer was used (on write)
@param[in] evict whether or not to evict the page from LRU list
@return whether the operation succeeded
@retval DB_SUCCESS always when writing, or if a read page was OK
@retval DB_PAGE_CORRUPTED if the checksum fails on a page read
@ -1282,7 +1282,7 @@ buf_page_init_for_read(
not match */
UNIV_INTERN
dberr_t
buf_page_io_complete(buf_page_t* bpage, bool evict = false)
buf_page_io_complete(buf_page_t* bpage, bool dblwr = false, bool evict = false)
MY_ATTRIBUTE((nonnull));
/********************************************************************//**

View file

@ -70,12 +70,10 @@ buf_flush_relocate_on_flush_list(
/*=============================*/
buf_page_t* bpage, /*!< in/out: control block being moved */
buf_page_t* dpage); /*!< in/out: destination block */
/********************************************************************//**
Updates the flush system data structures when a write is completed. */
void
buf_flush_write_complete(
/*=====================*/
buf_page_t* bpage); /*!< in: pointer to the block in question */
/** Update the flush system data structures when a write is completed.
@param[in,out] bpage flushed page
@param[in] dblwr whether the doublewrite buffer was used */
void buf_flush_write_complete(buf_page_t* bpage, bool dblwr);
/** Initialize a page for writing to the tablespace.
@param[in] block buffer block; NULL if bypassing the buffer pool
@param[in,out] page page frame

View file

@ -36,9 +36,10 @@ Created 10/25/1995 Heikki Tuuri
#include "ibuf0types.h"
#include <list>
#include <vector>
// Forward declaration
extern ibool srv_use_doublewrite_buf;
extern struct buf_dblwr_t* buf_dblwr;
struct trx_t;
class page_id_t;
class truncate_t;
@ -200,6 +201,13 @@ struct fil_space_t {
{
return stop_new_ops || is_being_truncated;
}
/** @return whether doublewrite buffering is needed */
bool use_doublewrite() const
{
return !atomic_write_supported
&& srv_use_doublewrite_buf && buf_dblwr;
}
};
/** Value of fil_space_t::magic_n */