mariadb/storage/innobase/fsp/fsp0file.cc
Marko Mäkelä 879ba1979b MDEV-11799 Doublewrite recovery can corrupt data pages
The purpose of the InnoDB doublewrite buffer is to make InnoDB
tolerant against cases where the server was killed in the middle
of a page write. (In Linux, killing a process may interrupt a
write system call, typically on a 4096-byte boundary.)

There may exist multiple copies of a page number in the doublewrite
buffer. Recovery should choose the latest valid copy of the page.
By design, the FIL_PAGE_LSN must not precede the latest checkpoint LSN
nor be later than the end of the recovered log.

For page_compressed and encrypted pages, we were missing proper
consistency checks. In the 10.4 data set generated for in MDEV-23231,
the data file contained a valid page_compressed page, and an
identical copy of that page was also present in the doublewrite
buffer. But, recovery would incorrectly consider the page invalid
and restore an uncompressed copy of the same page that had been
written before the log checkpoint. (In fact, no redo log was to
be applied to that page.)

buf_dblwr_process(): Validate the FIL_PAGE_LSN in the doublewrite
buffer pages, and always skip page 0, because those pages should
have been recovered by Datafile::restore_from_doublewrite() if
necessary.

Datafile::restore_from_doublewrite(): Choose the latest applicable
page from the doublewrite buffer.

recv_dblwr_t::find_page(): Also validate encrypted or
page_compressed pages.

recv_dblwr_t::validate_page(): New function to validate a page,
either a copy in a data file or in the doublewrite buffer.
Also validate encrypted or page_compressed pages.

This is joint work with Thirunarayanan Balathandayuthapani.
2020-07-31 11:54:35 +03:00

1055 lines
26 KiB
C++

/*****************************************************************************
Copyright (c) 2013, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2017, 2020, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
*****************************************************************************/
/**************************************************//**
@file fsp/fsp0file.cc
Tablespace data file implementation
Created 2013-7-26 by Kevin Lewis
*******************************************************/
#include "fil0fil.h"
#include "fsp0types.h"
#include "os0file.h"
#include "page0page.h"
#include "srv0start.h"
/** Initialize the name, size and order of this datafile
@param[in] name tablespace name, will be copied
@param[in] flags tablespace flags */
void
Datafile::init(
const char* name,
ulint flags)
{
ut_ad(m_name == NULL);
ut_ad(name != NULL);
m_name = mem_strdup(name);
m_flags = flags;
}
/** Release the resources. */
void
Datafile::shutdown()
{
close();
ut_free(m_name);
m_name = NULL;
free_filepath();
free_first_page();
}
/** Create/open a data file.
@param[in] read_only_mode if true, then readonly mode checks are enforced.
@return DB_SUCCESS or error code */
dberr_t
Datafile::open_or_create(bool read_only_mode)
{
bool success;
ut_a(m_filepath != NULL);
ut_ad(m_handle == OS_FILE_CLOSED);
m_handle = os_file_create(
innodb_data_file_key, m_filepath, m_open_flags,
OS_FILE_NORMAL, OS_DATA_FILE, read_only_mode, &success);
if (!success) {
m_last_os_error = os_file_get_last_error(true);
ib::error() << "Cannot open datafile '" << m_filepath << "'";
return(DB_CANNOT_OPEN_FILE);
}
return(DB_SUCCESS);
}
/** Open a data file in read-only mode to check if it exists so that it
can be validated.
@param[in] strict whether to issue error messages
@return DB_SUCCESS or error code */
dberr_t
Datafile::open_read_only(bool strict)
{
bool success = false;
ut_ad(m_handle == OS_FILE_CLOSED);
/* This function can be called for file objects that do not need
to be opened, which is the case when the m_filepath is NULL */
if (m_filepath == NULL) {
return(DB_ERROR);
}
set_open_flags(OS_FILE_OPEN);
m_handle = os_file_create_simple_no_error_handling(
innodb_data_file_key, m_filepath, m_open_flags,
OS_FILE_READ_ONLY, true, &success);
if (success) {
m_exists = true;
init_file_info();
return(DB_SUCCESS);
}
if (strict) {
m_last_os_error = os_file_get_last_error(true);
ib::error() << "Cannot open datafile for read-only: '"
<< m_filepath << "' OS error: " << m_last_os_error;
}
return(DB_CANNOT_OPEN_FILE);
}
/** Open a data file in read-write mode during start-up so that
doublewrite pages can be restored and then it can be validated.*
@param[in] read_only_mode if true, then readonly mode checks are enforced.
@return DB_SUCCESS or error code */
dberr_t
Datafile::open_read_write(bool read_only_mode)
{
bool success = false;
ut_ad(m_handle == OS_FILE_CLOSED);
/* This function can be called for file objects that do not need
to be opened, which is the case when the m_filepath is NULL */
if (m_filepath == NULL) {
return(DB_ERROR);
}
set_open_flags(OS_FILE_OPEN);
m_handle = os_file_create_simple_no_error_handling(
innodb_data_file_key, m_filepath, m_open_flags,
OS_FILE_READ_WRITE, read_only_mode, &success);
if (!success) {
m_last_os_error = os_file_get_last_error(true);
ib::error() << "Cannot open datafile for read-write: '"
<< m_filepath << "'";
return(DB_CANNOT_OPEN_FILE);
}
m_exists = true;
init_file_info();
return(DB_SUCCESS);
}
/** Initialize OS specific file info. */
void
Datafile::init_file_info()
{
#ifdef _WIN32
GetFileInformationByHandle(m_handle, &m_file_info);
#else
fstat(m_handle, &m_file_info);
#endif /* WIN32 */
}
/** Close a data file.
@return DB_SUCCESS or error code */
dberr_t
Datafile::close()
{
if (m_handle != OS_FILE_CLOSED) {
ibool success = os_file_close(m_handle);
ut_a(success);
m_handle = OS_FILE_CLOSED;
}
return(DB_SUCCESS);
}
/** Make a full filepath from a directory path and a filename.
Prepend the dirpath to filename using the extension given.
If dirpath is NULL, prepend the default datadir to filepath.
Store the result in m_filepath.
@param[in] dirpath directory path
@param[in] filename filename or filepath
@param[in] ext filename extension */
void
Datafile::make_filepath(
const char* dirpath,
const char* filename,
ib_extention ext)
{
ut_ad(dirpath != NULL || filename != NULL);
free_filepath();
m_filepath = fil_make_filepath(dirpath, filename, ext, false);
ut_ad(m_filepath != NULL);
set_filename();
}
/** Set the filepath by duplicating the filepath sent in. This is the
name of the file with its extension and absolute or relative path.
@param[in] filepath filepath to set */
void
Datafile::set_filepath(const char* filepath)
{
free_filepath();
m_filepath = static_cast<char*>(ut_malloc_nokey(strlen(filepath) + 1));
::strcpy(m_filepath, filepath);
set_filename();
}
/** Free the filepath buffer. */
void
Datafile::free_filepath()
{
if (m_filepath != NULL) {
ut_free(m_filepath);
m_filepath = NULL;
m_filename = NULL;
}
}
/** Do a quick test if the filepath provided looks the same as this filepath
byte by byte. If they are two different looking paths to the same file,
same_as() will be used to show that after the files are opened.
@param[in] other filepath to compare with
@retval true if it is the same filename by byte comparison
@retval false if it looks different */
bool
Datafile::same_filepath_as(
const char* other) const
{
return(0 == strcmp(m_filepath, other));
}
/** Test if another opened datafile is the same file as this object.
@param[in] other Datafile to compare with
@return true if it is the same file, else false */
bool
Datafile::same_as(
const Datafile& other) const
{
#ifdef _WIN32
return(m_file_info.dwVolumeSerialNumber
== other.m_file_info.dwVolumeSerialNumber
&& m_file_info.nFileIndexHigh
== other.m_file_info.nFileIndexHigh
&& m_file_info.nFileIndexLow
== other.m_file_info.nFileIndexLow);
#else
return(m_file_info.st_ino == other.m_file_info.st_ino
&& m_file_info.st_dev == other.m_file_info.st_dev);
#endif /* WIN32 */
}
/** Allocate and set the datafile or tablespace name in m_name.
If a name is provided, use it; else extract a file-per-table
tablespace name from m_filepath. The value of m_name
will be freed in the destructor.
@param[in] name tablespace name if known, NULL if not */
void
Datafile::set_name(const char* name)
{
ut_free(m_name);
if (name != NULL) {
m_name = mem_strdup(name);
} else {
m_name = fil_path_to_space_name(m_filepath);
}
}
/** Reads a few significant fields from the first page of the first
datafile. The Datafile must already be open.
@param[in] read_only_mode If true, then readonly mode checks are enforced.
@return DB_SUCCESS or DB_IO_ERROR if page cannot be read */
dberr_t
Datafile::read_first_page(bool read_only_mode)
{
if (m_handle == OS_FILE_CLOSED) {
dberr_t err = open_or_create(read_only_mode);
if (err != DB_SUCCESS) {
return(err);
}
}
m_first_page_buf = static_cast<byte*>(
ut_malloc_nokey(2 * UNIV_PAGE_SIZE_MAX));
/* Align the memory for a possible read from a raw device */
m_first_page = static_cast<byte*>(
ut_align(m_first_page_buf, UNIV_PAGE_SIZE));
IORequest request;
dberr_t err = DB_ERROR;
size_t page_size = UNIV_PAGE_SIZE_MAX;
/* Don't want unnecessary complaints about partial reads. */
request.disable_partial_io_warnings();
while (page_size >= UNIV_PAGE_SIZE_MIN) {
ulint n_read = 0;
err = os_file_read_no_error_handling(
request, m_handle, m_first_page, 0, page_size, &n_read);
if (err == DB_IO_ERROR && n_read >= UNIV_PAGE_SIZE_MIN) {
page_size >>= 1;
} else if (err == DB_SUCCESS) {
ut_a(n_read == page_size);
break;
} else if (srv_operation == SRV_OPERATION_BACKUP) {
break;
} else {
ib::error()
<< "Cannot read first page of '"
<< m_filepath << "' "
<< err;
break;
}
}
if (err != DB_SUCCESS) {
return(err);
}
if (m_order == 0) {
m_space_id = fsp_header_get_space_id(m_first_page);
m_flags = fsp_header_get_flags(m_first_page);
if (!fsp_flags_is_valid(m_flags, m_space_id)) {
ulint cflags = fsp_flags_convert_from_101(m_flags);
if (cflags == ULINT_UNDEFINED) {
ib::error()
<< "Invalid flags " << ib::hex(m_flags)
<< " in " << m_filepath;
return(DB_CORRUPTION);
} else {
m_flags = cflags;
}
}
}
const page_size_t ps(m_flags);
if (ps.physical() > page_size) {
ib::error() << "File " << m_filepath
<< " should be longer than "
<< page_size << " bytes";
return(DB_CORRUPTION);
}
return(err);
}
/** Free the first page from memory when it is no longer needed. */
void
Datafile::free_first_page()
{
if (m_first_page_buf) {
ut_free(m_first_page_buf);
m_first_page_buf = NULL;
m_first_page = NULL;
}
}
/** Validates the datafile and checks that it conforms with the expected
space ID and flags. The file should exist and be successfully opened
in order for this function to validate it.
@param[in] space_id The expected tablespace ID.
@param[in] flags The expected tablespace flags.
@retval DB_SUCCESS if tablespace is valid, DB_ERROR if not.
m_is_valid is also set true on success, else false. */
dberr_t
Datafile::validate_to_dd(ulint space_id, ulint flags)
{
dberr_t err;
if (!is_open()) {
return DB_ERROR;
}
/* Validate this single-table-tablespace with the data dictionary,
but do not compare the DATA_DIR flag, in case the tablespace was
remotely located. */
err = validate_first_page(0);
if (err != DB_SUCCESS) {
return(err);
}
flags &= ~FSP_FLAGS_MEM_MASK;
/* Make sure the datafile we found matched the space ID.
If the datafile is a file-per-table tablespace then also match
the row format and zip page size. */
if (m_space_id == space_id && m_flags == flags) {
/* Datafile matches the tablespace expected. */
return(DB_SUCCESS);
}
/* else do not use this tablespace. */
m_is_valid = false;
ib::error() << "Refusing to load '" << m_filepath << "' (id="
<< m_space_id << ", flags=" << ib::hex(m_flags)
<< "); dictionary contains id="
<< space_id << ", flags=" << ib::hex(flags);
return(DB_ERROR);
}
/** Validates this datafile for the purpose of recovery. The file should
exist and be successfully opened. We initially open it in read-only mode
because we just want to read the SpaceID. However, if the first page is
corrupt and needs to be restored from the doublewrite buffer, we will
reopen it in write mode and ry to restore that page.
@retval DB_SUCCESS if tablespace is valid, DB_ERROR if not.
m_is_valid is also set true on success, else false. */
dberr_t
Datafile::validate_for_recovery()
{
dberr_t err;
ut_ad(is_open());
ut_ad(!srv_read_only_mode);
err = validate_first_page(0);
switch (err) {
case DB_SUCCESS:
case DB_TABLESPACE_EXISTS:
break;
default:
/* Re-open the file in read-write mode Attempt to restore
page 0 from doublewrite and read the space ID from a survey
of the first few pages. */
close();
err = open_read_write(srv_read_only_mode);
if (err != DB_SUCCESS) {
return(err);
}
err = find_space_id();
if (err != DB_SUCCESS || m_space_id == 0) {
ib::error() << "Datafile '" << m_filepath << "' is"
" corrupted. Cannot determine the space ID from"
" the first 64 pages.";
return(err);
}
if (restore_from_doublewrite()) {
return(DB_CORRUPTION);
}
/* Free the previously read first page and then re-validate. */
free_first_page();
err = validate_first_page(0);
}
if (err == DB_SUCCESS) {
set_name(NULL);
}
return(err);
}
/** Check the consistency of the first page of a datafile when the
tablespace is opened. This occurs before the fil_space_t is created
so the Space ID found here must not already be open.
m_is_valid is set true on success, else false.
@param[out] flush_lsn contents of FIL_PAGE_FILE_FLUSH_LSN
@retval DB_SUCCESS on if the datafile is valid
@retval DB_CORRUPTION if the datafile is not readable
@retval DB_TABLESPACE_EXISTS if there is a duplicate space_id */
dberr_t
Datafile::validate_first_page(lsn_t* flush_lsn)
{
char* prev_name;
char* prev_filepath;
const char* error_txt = NULL;
m_is_valid = true;
if (m_first_page == NULL
&& read_first_page(srv_read_only_mode) != DB_SUCCESS) {
error_txt = "Cannot read first page";
} else {
ut_ad(m_first_page_buf);
ut_ad(m_first_page);
if (flush_lsn != NULL) {
*flush_lsn = mach_read_from_8(
m_first_page + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION);
}
}
if (error_txt != NULL) {
err_exit:
ib::info() << error_txt << " in datafile: " << m_filepath
<< ", Space ID:" << m_space_id << ", Flags: "
<< m_flags;
m_is_valid = false;
free_first_page();
return(DB_CORRUPTION);
}
/* Check if the whole page is blank. */
if (!m_space_id && !m_flags) {
const byte* b = m_first_page;
ulint nonzero_bytes = UNIV_PAGE_SIZE;
while (*b == '\0' && --nonzero_bytes != 0) {
b++;
}
if (nonzero_bytes == 0) {
error_txt = "Header page consists of zero bytes";
goto err_exit;
}
}
if (!fsp_flags_is_valid(m_flags, m_space_id)) {
/* Tablespace flags must be valid. */
error_txt = "Tablespace flags are invalid";
goto err_exit;
}
const page_size_t page_size(m_flags);
if (univ_page_size.logical() != page_size.logical()) {
/* Page size must be univ_page_size. */
ib::error()
<< "Data file '" << m_filepath << "' uses page size "
<< page_size.logical() << ", but the innodb_page_size"
" start-up parameter is "
<< univ_page_size.logical();
free_first_page();
return(DB_ERROR);
}
if (page_get_page_no(m_first_page) != 0) {
/* First page must be number 0 */
error_txt = "Header page contains inconsistent data";
goto err_exit;
}
if (m_space_id >= SRV_LOG_SPACE_FIRST_ID) {
error_txt = "A bad Space ID was found";
goto err_exit;
}
if (buf_page_is_corrupted(false, m_first_page, page_size)) {
/* Look for checksum and other corruptions. */
error_txt = "Checksum mismatch";
goto err_exit;
}
if (fil_space_read_name_and_filepath(
m_space_id, &prev_name, &prev_filepath)) {
if (0 == strcmp(m_filepath, prev_filepath)) {
ut_free(prev_name);
ut_free(prev_filepath);
return(DB_SUCCESS);
}
/* Make sure the space_id has not already been opened. */
ib::error() << "Attempted to open a previously opened"
" tablespace. Previous tablespace " << prev_name
<< " at filepath: " << prev_filepath
<< " uses space ID: " << m_space_id
<< ". Cannot open filepath: " << m_filepath
<< " which uses the same space ID.";
ut_free(prev_name);
ut_free(prev_filepath);
m_is_valid = false;
free_first_page();
return(is_predefined_tablespace(m_space_id)
? DB_CORRUPTION
: DB_TABLESPACE_EXISTS);
}
return(DB_SUCCESS);
}
/** Determine the space id of the given file descriptor by reading a few
pages from the beginning of the .ibd file.
@return DB_SUCCESS if space id was successfully identified, else DB_ERROR. */
dberr_t
Datafile::find_space_id()
{
os_offset_t file_size;
ut_ad(m_handle != OS_FILE_CLOSED);
file_size = os_file_get_size(m_handle);
if (file_size == (os_offset_t) -1) {
ib::error() << "Could not get file size of datafile '"
<< m_filepath << "'";
return(DB_CORRUPTION);
}
/* Assuming a page size, read the space_id from each page and store it
in a map. Find out which space_id is agreed on by majority of the
pages. Choose that space_id. */
for (ulint page_size = UNIV_ZIP_SIZE_MIN;
page_size <= UNIV_PAGE_SIZE_MAX;
page_size <<= 1) {
/* map[space_id] = count of pages */
typedef std::map<
ulint,
ulint,
std::less<ulint>,
ut_allocator<std::pair<const ulint, ulint> > >
Pages;
Pages verify;
ulint page_count = 64;
ulint valid_pages = 0;
/* Adjust the number of pages to analyze based on file size */
while ((page_count * page_size) > file_size) {
--page_count;
}
ib::info()
<< "Page size:" << page_size
<< ". Pages to analyze:" << page_count;
byte* buf = static_cast<byte*>(
ut_malloc_nokey(2 * UNIV_PAGE_SIZE_MAX));
byte* page = static_cast<byte*>(
ut_align(buf, UNIV_SECTOR_SIZE));
for (ulint j = 0; j < page_count; ++j) {
dberr_t err;
ulint n_bytes = j * page_size;
IORequest request(IORequest::READ);
err = os_file_read(
request, m_handle, page, n_bytes, page_size);
if (err != DB_SUCCESS) {
ib::info()
<< "READ FAIL: page_no:" << j;
continue;
}
bool noncompressed_ok = false;
/* For noncompressed pages, the page size must be
equal to univ_page_size.physical(). */
if (page_size == univ_page_size.physical()) {
noncompressed_ok = !buf_page_is_corrupted(
false, page, univ_page_size, NULL);
}
bool compressed_ok = false;
/* file-per-table tablespaces can be compressed with
the same physical and logical page size. General
tablespaces must have different physical and logical
page sizes in order to be compressed. For this check,
assume the page is compressed if univ_page_size.
logical() is equal to or less than 16k and the
page_size we are checking is equal to or less than
univ_page_size.logical(). */
if (univ_page_size.logical() <= UNIV_PAGE_SIZE_DEF
&& page_size <= univ_page_size.logical()) {
const page_size_t compr_page_size(
page_size, univ_page_size.logical(),
true);
compressed_ok = !buf_page_is_corrupted(
false, page, compr_page_size, NULL);
}
if (noncompressed_ok || compressed_ok) {
ulint space_id = mach_read_from_4(page
+ FIL_PAGE_SPACE_ID);
if (space_id > 0) {
ib::info()
<< "VALID: space:"
<< space_id << " page_no:" << j
<< " page_size:" << page_size;
++valid_pages;
++verify[space_id];
}
}
}
ut_free(buf);
ib::info()
<< "Page size: " << page_size
<< ". Possible space_id count:" << verify.size();
const ulint pages_corrupted = 3;
for (ulint missed = 0; missed <= pages_corrupted; ++missed) {
for (Pages::const_iterator it = verify.begin();
it != verify.end();
++it) {
ib::info() << "space_id:" << it->first
<< ", Number of pages matched: "
<< it->second << "/" << valid_pages
<< " (" << page_size << ")";
if (it->second == (valid_pages - missed)) {
ib::info() << "Chosen space:"
<< it->first;
m_space_id = it->first;
return(DB_SUCCESS);
}
}
}
}
return(DB_CORRUPTION);
}
/** Restore the first page of the tablespace from
the double write buffer.
@return whether the operation failed */
bool
Datafile::restore_from_doublewrite()
{
if (srv_operation != SRV_OPERATION_NORMAL) {
return true;
}
/* Find if double write buffer contains page_no of given space id. */
const page_id_t page_id(m_space_id, 0);
const byte* page = recv_sys->dblwr.find_page(page_id);
if (!page) {
/* If the first page of the given user tablespace is not there
in the doublewrite buffer, then the recovery is going to fail
now. Hence this is treated as an error. */
ib::error()
<< "Corrupted page " << page_id
<< " of datafile '" << m_filepath
<< "' could not be found in the doublewrite buffer.";
return(true);
}
ulint flags = mach_read_from_4(
FSP_HEADER_OFFSET + FSP_SPACE_FLAGS + page);
if (!fsp_flags_is_valid(flags, m_space_id)) {
flags = fsp_flags_convert_from_101(flags);
/* recv_dblwr_t::validate_page() inside find_page()
checked this already. */
ut_ad(flags != ULINT_UNDEFINED);
/* The flags on the page should be converted later. */
}
const page_size_t page_size(flags);
ut_a(page_get_page_no(page) == page_id.page_no());
ib::info() << "Restoring page " << page_id
<< " of datafile '" << m_filepath
<< "' from the doublewrite buffer. Writing "
<< page_size.physical() << " bytes into file '"
<< m_filepath << "'";
IORequest request(IORequest::WRITE);
return(os_file_write(
request,
m_filepath, m_handle, page, 0, page_size.physical())
!= DB_SUCCESS);
}
/** Create a link filename based on the contents of m_name,
open that file, and read the contents into m_filepath.
@retval DB_SUCCESS if remote linked tablespace file is opened and read.
@retval DB_CANNOT_OPEN_FILE if the link file does not exist. */
dberr_t
RemoteDatafile::open_link_file()
{
set_link_filepath(NULL);
m_filepath = read_link_file(m_link_filepath);
return(m_filepath == NULL ? DB_CANNOT_OPEN_FILE : DB_SUCCESS);
}
/** Opens a handle to the file linked to in an InnoDB Symbolic Link file
in read-only mode so that it can be validated.
@param[in] strict whether to issue error messages
@return DB_SUCCESS if remote linked tablespace file is found and opened. */
dberr_t
RemoteDatafile::open_read_only(bool strict)
{
if (m_filepath == NULL && open_link_file() == DB_CANNOT_OPEN_FILE) {
return(DB_ERROR);
}
dberr_t err = Datafile::open_read_only(strict);
if (err != DB_SUCCESS && strict) {
/* The following call prints an error message */
os_file_get_last_error(true);
ib::error() << "A link file was found named '"
<< m_link_filepath << "' but the linked tablespace '"
<< m_filepath << "' could not be opened read-only.";
}
return(err);
}
/** Opens a handle to the file linked to in an InnoDB Symbolic Link file
in read-write mode so that it can be restored from doublewrite and validated.
@param[in] read_only_mode If true, then readonly mode checks are enforced.
@return DB_SUCCESS if remote linked tablespace file is found and opened. */
dberr_t
RemoteDatafile::open_read_write(bool read_only_mode)
{
if (m_filepath == NULL && open_link_file() == DB_CANNOT_OPEN_FILE) {
return(DB_ERROR);
}
dberr_t err = Datafile::open_read_write(read_only_mode);
if (err != DB_SUCCESS) {
/* The following call prints an error message */
m_last_os_error = os_file_get_last_error(true);
ib::error() << "A link file was found named '"
<< m_link_filepath << "' but the linked data file '"
<< m_filepath << "' could not be opened for writing.";
}
return(err);
}
/** Release the resources. */
void
RemoteDatafile::shutdown()
{
Datafile::shutdown();
if (m_link_filepath != 0) {
ut_free(m_link_filepath);
m_link_filepath = 0;
}
}
/** Set the link filepath. Use default datadir, the base name of
the path provided without its suffix, plus DOT_ISL.
@param[in] path filepath which contains a basename to use.
If NULL, use m_name as the basename. */
void
RemoteDatafile::set_link_filepath(const char* path)
{
if (m_link_filepath == NULL) {
m_link_filepath = fil_make_filepath(NULL, name(), ISL, false);
}
}
/** Creates a new InnoDB Symbolic Link (ISL) file. It is always created
under the 'datadir' of MySQL. The datadir is the directory of a
running mysqld program. We can refer to it by simply using the path ".".
@param[in] name tablespace name
@param[in] filepath remote filepath of tablespace datafile
@return DB_SUCCESS or error code */
dberr_t
RemoteDatafile::create_link_file(
const char* name,
const char* filepath)
{
bool success;
dberr_t err = DB_SUCCESS;
char* link_filepath = NULL;
char* prev_filepath = NULL;
ut_ad(!srv_read_only_mode);
ut_ad(0 == strcmp(&filepath[strlen(filepath) - 4], DOT_IBD));
link_filepath = fil_make_filepath(NULL, name, ISL, false);
if (link_filepath == NULL) {
return(DB_ERROR);
}
prev_filepath = read_link_file(link_filepath);
if (prev_filepath) {
/* Truncate will call this with an existing
link file which contains the same filepath. */
bool same = !strcmp(prev_filepath, filepath);
ut_free(prev_filepath);
if (same) {
ut_free(link_filepath);
return(DB_SUCCESS);
}
}
/** Check if the file already exists. */
FILE* file = NULL;
bool exists;
os_file_type_t ftype;
success = os_file_status(link_filepath, &exists, &ftype);
ulint error = 0;
if (success && !exists) {
file = fopen(link_filepath, "w");
if (file == NULL) {
/* This call will print its own error message */
error = os_file_get_last_error(true);
}
} else {
error = OS_FILE_ALREADY_EXISTS;
}
if (error != 0) {
ib::error() << "Cannot create file " << link_filepath << ".";
if (error == OS_FILE_ALREADY_EXISTS) {
ib::error() << "The link file: " << link_filepath
<< " already exists.";
err = DB_TABLESPACE_EXISTS;
} else if (error == OS_FILE_DISK_FULL) {
err = DB_OUT_OF_FILE_SPACE;
} else {
err = DB_ERROR;
}
/* file is not open, no need to close it. */
ut_free(link_filepath);
return(err);
}
ulint rbytes = fwrite(filepath, 1, strlen(filepath), file);
if (rbytes != strlen(filepath)) {
error = os_file_get_last_error(true);
ib::error() <<
"Cannot write link file: "
<< link_filepath << " filepath: " << filepath;
err = DB_ERROR;
}
/* Close the file, we only need it at startup */
fclose(file);
ut_free(link_filepath);
return(err);
}
/** Delete an InnoDB Symbolic Link (ISL) file. */
void
RemoteDatafile::delete_link_file(void)
{
ut_ad(m_link_filepath != NULL);
if (m_link_filepath != NULL) {
os_file_delete_if_exists(innodb_data_file_key,
m_link_filepath, NULL);
}
}
/** Delete an InnoDB Symbolic Link (ISL) file by name.
@param[in] name tablespace name */
void
RemoteDatafile::delete_link_file(
const char* name)
{
char* link_filepath = fil_make_filepath(NULL, name, ISL, false);
if (link_filepath != NULL) {
os_file_delete_if_exists(
innodb_data_file_key, link_filepath, NULL);
ut_free(link_filepath);
}
}
/** Read an InnoDB Symbolic Link (ISL) file by name.
It is always created under the datadir of MySQL.
For file-per-table tablespaces, the isl file is expected to be
in a 'database' directory and called 'tablename.isl'.
The caller must free the memory returned if it is not null.
@param[in] link_filepath filepath of the ISL file
@return Filepath of the IBD file read from the ISL file */
char*
RemoteDatafile::read_link_file(
const char* link_filepath)
{
FILE* file = fopen(link_filepath, "r+b");
if (file == NULL) {
return(NULL);
}
char* filepath = static_cast<char*>(ut_malloc_nokey(OS_FILE_MAX_PATH));
os_file_read_string(file, filepath, OS_FILE_MAX_PATH);
fclose(file);
if (filepath[0] != '\0') {
/* Trim whitespace from end of filepath */
ulint last_ch = strlen(filepath) - 1;
while (last_ch > 4 && filepath[last_ch] <= 0x20) {
filepath[last_ch--] = 0x00;
}
os_normalize_path(filepath);
}
return(filepath);
}