mariadb/storage/innobase/fsp/fsp0file.cc

1049 lines
26 KiB
C++
Raw Normal View History

/*****************************************************************************
Copyright (c) 2013, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2017, 2018, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
/**************************************************//**
@file fsp/fsp0file.cc
Tablespace data file implementation
Created 2013-7-26 by Kevin Lewis
*******************************************************/
#include "fil0fil.h"
#include "fsp0types.h"
#include "os0file.h"
#include "page0page.h"
#include "srv0start.h"
/** Initialize the name, size and order of this datafile
@param[in] name tablespace name, will be copied
@param[in] flags tablespace flags */
void
Datafile::init(
const char* name,
ulint flags)
{
ut_ad(m_name == NULL);
ut_ad(name != NULL);
m_name = mem_strdup(name);
m_flags = flags;
}
/** Release the resources. */
void
Datafile::shutdown()
{
close();
ut_free(m_name);
m_name = NULL;
free_filepath();
free_first_page();
}
/** Create/open a data file.
@param[in] read_only_mode if true, then readonly mode checks are enforced.
@return DB_SUCCESS or error code */
dberr_t
Datafile::open_or_create(bool read_only_mode)
{
bool success;
ut_a(m_filepath != NULL);
ut_ad(m_handle == OS_FILE_CLOSED);
m_handle = os_file_create(
innodb_data_file_key, m_filepath, m_open_flags,
OS_FILE_NORMAL, OS_DATA_FILE, read_only_mode, &success);
if (!success) {
m_last_os_error = os_file_get_last_error(true);
ib::error() << "Cannot open datafile '" << m_filepath << "'";
return(DB_CANNOT_OPEN_FILE);
}
return(DB_SUCCESS);
}
/** Open a data file in read-only mode to check if it exists so that it
can be validated.
@param[in] strict whether to issue error messages
@return DB_SUCCESS or error code */
dberr_t
Datafile::open_read_only(bool strict)
{
bool success = false;
ut_ad(m_handle == OS_FILE_CLOSED);
/* This function can be called for file objects that do not need
to be opened, which is the case when the m_filepath is NULL */
if (m_filepath == NULL) {
return(DB_ERROR);
}
set_open_flags(OS_FILE_OPEN);
m_handle = os_file_create_simple_no_error_handling(
innodb_data_file_key, m_filepath, m_open_flags,
OS_FILE_READ_ONLY, true, &success);
if (success) {
m_exists = true;
init_file_info();
return(DB_SUCCESS);
}
if (strict) {
m_last_os_error = os_file_get_last_error(true);
ib::error() << "Cannot open datafile for read-only: '"
<< m_filepath << "' OS error: " << m_last_os_error;
}
return(DB_CANNOT_OPEN_FILE);
}
/** Open a data file in read-write mode during start-up so that
doublewrite pages can be restored and then it can be validated.*
@param[in] read_only_mode if true, then readonly mode checks are enforced.
@return DB_SUCCESS or error code */
dberr_t
Datafile::open_read_write(bool read_only_mode)
{
bool success = false;
ut_ad(m_handle == OS_FILE_CLOSED);
/* This function can be called for file objects that do not need
to be opened, which is the case when the m_filepath is NULL */
if (m_filepath == NULL) {
return(DB_ERROR);
}
set_open_flags(OS_FILE_OPEN);
m_handle = os_file_create_simple_no_error_handling(
innodb_data_file_key, m_filepath, m_open_flags,
OS_FILE_READ_WRITE, read_only_mode, &success);
if (!success) {
m_last_os_error = os_file_get_last_error(true);
ib::error() << "Cannot open datafile for read-write: '"
<< m_filepath << "'";
return(DB_CANNOT_OPEN_FILE);
}
m_exists = true;
init_file_info();
return(DB_SUCCESS);
}
/** Initialize OS specific file info. */
void
Datafile::init_file_info()
{
#ifdef _WIN32
GetFileInformationByHandle(m_handle, &m_file_info);
#else
fstat(m_handle, &m_file_info);
#endif /* WIN32 */
}
/** Close a data file.
@return DB_SUCCESS or error code */
dberr_t
Datafile::close()
{
if (m_handle != OS_FILE_CLOSED) {
ibool success = os_file_close(m_handle);
ut_a(success);
m_handle = OS_FILE_CLOSED;
}
return(DB_SUCCESS);
}
/** Make a full filepath from a directory path and a filename.
Prepend the dirpath to filename using the extension given.
If dirpath is NULL, prepend the default datadir to filepath.
Store the result in m_filepath.
@param[in] dirpath directory path
@param[in] filename filename or filepath
@param[in] ext filename extension */
void
Datafile::make_filepath(
const char* dirpath,
const char* filename,
ib_extention ext)
{
ut_ad(dirpath != NULL || filename != NULL);
free_filepath();
m_filepath = fil_make_filepath(dirpath, filename, ext, false);
ut_ad(m_filepath != NULL);
set_filename();
}
/** Set the filepath by duplicating the filepath sent in. This is the
name of the file with its extension and absolute or relative path.
@param[in] filepath filepath to set */
void
Datafile::set_filepath(const char* filepath)
{
free_filepath();
m_filepath = static_cast<char*>(ut_malloc_nokey(strlen(filepath) + 1));
::strcpy(m_filepath, filepath);
set_filename();
}
/** Free the filepath buffer. */
void
Datafile::free_filepath()
{
if (m_filepath != NULL) {
ut_free(m_filepath);
m_filepath = NULL;
m_filename = NULL;
}
}
/** Do a quick test if the filepath provided looks the same as this filepath
byte by byte. If they are two different looking paths to the same file,
same_as() will be used to show that after the files are opened.
@param[in] other filepath to compare with
@retval true if it is the same filename by byte comparison
@retval false if it looks different */
bool
Datafile::same_filepath_as(
const char* other) const
{
return(0 == strcmp(m_filepath, other));
}
/** Test if another opened datafile is the same file as this object.
@param[in] other Datafile to compare with
@return true if it is the same file, else false */
bool
Datafile::same_as(
const Datafile& other) const
{
#ifdef _WIN32
return(m_file_info.dwVolumeSerialNumber
== other.m_file_info.dwVolumeSerialNumber
&& m_file_info.nFileIndexHigh
== other.m_file_info.nFileIndexHigh
&& m_file_info.nFileIndexLow
== other.m_file_info.nFileIndexLow);
#else
return(m_file_info.st_ino == other.m_file_info.st_ino
&& m_file_info.st_dev == other.m_file_info.st_dev);
#endif /* WIN32 */
}
/** Allocate and set the datafile or tablespace name in m_name.
If a name is provided, use it; else extract a file-per-table
tablespace name from m_filepath. The value of m_name
will be freed in the destructor.
@param[in] name tablespace name if known, NULL if not */
void
Datafile::set_name(const char* name)
{
ut_free(m_name);
if (name != NULL) {
m_name = mem_strdup(name);
} else {
m_name = fil_path_to_space_name(m_filepath);
}
}
/** Reads a few significant fields from the first page of the first
datafile. The Datafile must already be open.
@param[in] read_only_mode If true, then readonly mode checks are enforced.
@return DB_SUCCESS or DB_IO_ERROR if page cannot be read */
dberr_t
Datafile::read_first_page(bool read_only_mode)
{
if (m_handle == OS_FILE_CLOSED) {
dberr_t err = open_or_create(read_only_mode);
if (err != DB_SUCCESS) {
return(err);
}
}
m_first_page_buf = static_cast<byte*>(
ut_malloc_nokey(2 * UNIV_PAGE_SIZE_MAX));
/* Align the memory for a possible read from a raw device */
m_first_page = static_cast<byte*>(
ut_align(m_first_page_buf, srv_page_size));
IORequest request;
dberr_t err = DB_ERROR;
size_t page_size = UNIV_PAGE_SIZE_MAX;
/* Don't want unnecessary complaints about partial reads. */
request.disable_partial_io_warnings();
while (page_size >= UNIV_PAGE_SIZE_MIN) {
ulint n_read = 0;
err = os_file_read_no_error_handling(
request, m_handle, m_first_page, 0, page_size, &n_read);
if (err == DB_IO_ERROR && n_read >= UNIV_PAGE_SIZE_MIN) {
page_size >>= 1;
} else if (err == DB_SUCCESS) {
ut_a(n_read == page_size);
break;
MDEV-12548 Initial implementation of Mariabackup for MariaDB 10.2 InnoDB I/O and buffer pool interfaces and the redo log format have been changed between MariaDB 10.1 and 10.2, and the backup code has to be adjusted accordingly. The code has been simplified, and many memory leaks have been fixed. Instead of the file name xtrabackup_logfile, the file name ib_logfile0 is being used for the copy of the redo log. Unnecessary InnoDB startup and shutdown and some unnecessary threads have been removed. Some help was provided by Vladislav Vaintroub. Parameters have been cleaned up and aligned with those of MariaDB 10.2. The --dbug option has been added, so that in debug builds, --dbug=d,ib_log can be specified to enable diagnostic messages for processing redo log entries. By default, innodb_doublewrite=OFF, so that --prepare works faster. If more crash-safety for --prepare is needed, double buffering can be enabled. The parameter innodb_log_checksums=OFF can be used to ignore redo log checksums in --backup. Some messages have been cleaned up. Unless --export is specified, Mariabackup will not deal with undo log. The InnoDB mini-transaction redo log is not only about user-level transactions; it is actually about mini-transactions. To avoid confusion, call it the redo log, not transaction log. We disable any undo log processing in --prepare. Because MariaDB 10.2 supports indexed virtual columns, the undo log processing would need to be able to evaluate virtual column expressions. To reduce the amount of code dependencies, we will not process any undo log in prepare. This means that the --export option must be disabled for now. This also means that the following options are redundant and have been removed: xtrabackup --apply-log-only innobackupex --redo-only In addition to disabling any undo log processing, we will disable any further changes to data pages during --prepare, including the change buffer merge. This means that restoring incremental backups should reliably work even when change buffering is being used on the server. Because of this, preparing a backup will not generate any further redo log, and the redo log file can be safely deleted. (If the --export option is enabled in the future, it must generate redo log when processing undo logs and buffered changes.) In --prepare, we cannot easily know if a partial backup was used, especially when restoring a series of incremental backups. So, we simply warn about any missing files, and ignore the redo log for them. FIXME: Enable the --export option. FIXME: Improve the handling of the MLOG_INDEX_LOAD record, and write a test that initiates a backup while an ALGORITHM=INPLACE operation is creating indexes or rebuilding a table. An error should be detected when preparing the backup. FIXME: In --incremental --prepare, xtrabackup_apply_delta() should ensure that if FSP_SIZE is modified, the file size will be adjusted accordingly.
2017-06-30 10:49:37 +03:00
} else if (srv_operation == SRV_OPERATION_BACKUP) {
break;
} else {
ib::error()
<< "Cannot read first page of '"
<< m_filepath << "' "
<< ut_strerr(err);
break;
}
}
if (err != DB_SUCCESS) {
return(err);
}
if (m_order == 0) {
m_space_id = fsp_header_get_space_id(m_first_page);
m_flags = fsp_header_get_flags(m_first_page);
MDEV-12026: Implement innodb_checksum_algorithm=full_crc32 MariaDB data-at-rest encryption (innodb_encrypt_tables) had repurposed the same unused data field that was repurposed in MySQL 5.7 (and MariaDB 10.2) for the Split Sequence Number (SSN) field of SPATIAL INDEX. Because of this, MariaDB was unable to support encryption on SPATIAL INDEX pages. Furthermore, InnoDB page checksums skipped some bytes, and there are multiple variations and checksum algorithms. By default, InnoDB accepts all variations of all algorithms that ever existed. This unnecessarily weakens the page checksums. We hereby introduce two more innodb_checksum_algorithm variants (full_crc32, strict_full_crc32) that are special in a way: When either setting is active, newly created data files will carry a flag (fil_space_t::full_crc32()) that indicates that all pages of the file will use a full CRC-32C checksum over the entire page contents (excluding the bytes where the checksum is stored, at the very end of the page). Such files will always use that checksum, no matter what the parameter innodb_checksum_algorithm is assigned to. For old files, the old checksum algorithms will continue to be used. The value strict_full_crc32 will be equivalent to strict_crc32 and the value full_crc32 will be equivalent to crc32. ROW_FORMAT=COMPRESSED tables will only use the old format. These tables do not support new features, such as larger innodb_page_size or instant ADD/DROP COLUMN. They may be deprecated in the future. We do not want an unnecessary file format change for them. The new full_crc32() format also cleans up the MariaDB tablespace flags. We will reserve flags to store the page_compressed compression algorithm, and to store the compressed payload length, so that checksum can be computed over the compressed (and possibly encrypted) stream and can be validated without decrypting or decompressing the page. In the full_crc32 format, there no longer are separate before-encryption and after-encryption checksums for pages. The single checksum is computed on the page contents that is written to the file. We do not make the new algorithm the default for two reasons. First, MariaDB 10.4.2 was a beta release, and the default values of parameters should not change after beta. Second, we did not yet implement the full_crc32 format for page_compressed pages. This will be fixed in MDEV-18644. This is joint work with Marko Mäkelä.
2019-02-19 21:00:00 +02:00
if (!fil_space_t::is_valid_flags(m_flags, m_space_id)) {
ulint cflags = fsp_flags_convert_from_101(m_flags);
if (cflags == ULINT_UNDEFINED) {
ib::error()
<< "Invalid flags " << ib::hex(m_flags)
<< " in " << m_filepath;
return(DB_CORRUPTION);
} else {
m_flags = cflags;
}
}
}
MDEV-12026: Implement innodb_checksum_algorithm=full_crc32 MariaDB data-at-rest encryption (innodb_encrypt_tables) had repurposed the same unused data field that was repurposed in MySQL 5.7 (and MariaDB 10.2) for the Split Sequence Number (SSN) field of SPATIAL INDEX. Because of this, MariaDB was unable to support encryption on SPATIAL INDEX pages. Furthermore, InnoDB page checksums skipped some bytes, and there are multiple variations and checksum algorithms. By default, InnoDB accepts all variations of all algorithms that ever existed. This unnecessarily weakens the page checksums. We hereby introduce two more innodb_checksum_algorithm variants (full_crc32, strict_full_crc32) that are special in a way: When either setting is active, newly created data files will carry a flag (fil_space_t::full_crc32()) that indicates that all pages of the file will use a full CRC-32C checksum over the entire page contents (excluding the bytes where the checksum is stored, at the very end of the page). Such files will always use that checksum, no matter what the parameter innodb_checksum_algorithm is assigned to. For old files, the old checksum algorithms will continue to be used. The value strict_full_crc32 will be equivalent to strict_crc32 and the value full_crc32 will be equivalent to crc32. ROW_FORMAT=COMPRESSED tables will only use the old format. These tables do not support new features, such as larger innodb_page_size or instant ADD/DROP COLUMN. They may be deprecated in the future. We do not want an unnecessary file format change for them. The new full_crc32() format also cleans up the MariaDB tablespace flags. We will reserve flags to store the page_compressed compression algorithm, and to store the compressed payload length, so that checksum can be computed over the compressed (and possibly encrypted) stream and can be validated without decrypting or decompressing the page. In the full_crc32 format, there no longer are separate before-encryption and after-encryption checksums for pages. The single checksum is computed on the page contents that is written to the file. We do not make the new algorithm the default for two reasons. First, MariaDB 10.4.2 was a beta release, and the default values of parameters should not change after beta. Second, we did not yet implement the full_crc32 format for page_compressed pages. This will be fixed in MDEV-18644. This is joint work with Marko Mäkelä.
2019-02-19 21:00:00 +02:00
const size_t physical_size = fil_space_t::physical_size(m_flags);
if (physical_size > page_size) {
ib::error() << "File " << m_filepath
<< " should be longer than "
<< page_size << " bytes";
return(DB_CORRUPTION);
}
return(err);
}
/** Free the first page from memory when it is no longer needed. */
void
Datafile::free_first_page()
{
if (m_first_page_buf) {
ut_free(m_first_page_buf);
m_first_page_buf = NULL;
m_first_page = NULL;
}
}
/** Validates the datafile and checks that it conforms with the expected
space ID and flags. The file should exist and be successfully opened
in order for this function to validate it.
@param[in] space_id The expected tablespace ID.
@param[in] flags The expected tablespace flags.
@retval DB_SUCCESS if tablespace is valid, DB_ERROR if not.
m_is_valid is also set true on success, else false. */
dberr_t
Datafile::validate_to_dd(ulint space_id, ulint flags)
{
dberr_t err;
if (!is_open()) {
return DB_ERROR;
}
/* Validate this single-table-tablespace with the data dictionary,
but do not compare the DATA_DIR flag, in case the tablespace was
remotely located. */
err = validate_first_page(0);
if (err != DB_SUCCESS) {
return(err);
}
flags &= ~FSP_FLAGS_MEM_MASK;
/* Make sure the datafile we found matched the space ID.
If the datafile is a file-per-table tablespace then also match
the row format and zip page size. */
MDEV-12026: Implement innodb_checksum_algorithm=full_crc32 MariaDB data-at-rest encryption (innodb_encrypt_tables) had repurposed the same unused data field that was repurposed in MySQL 5.7 (and MariaDB 10.2) for the Split Sequence Number (SSN) field of SPATIAL INDEX. Because of this, MariaDB was unable to support encryption on SPATIAL INDEX pages. Furthermore, InnoDB page checksums skipped some bytes, and there are multiple variations and checksum algorithms. By default, InnoDB accepts all variations of all algorithms that ever existed. This unnecessarily weakens the page checksums. We hereby introduce two more innodb_checksum_algorithm variants (full_crc32, strict_full_crc32) that are special in a way: When either setting is active, newly created data files will carry a flag (fil_space_t::full_crc32()) that indicates that all pages of the file will use a full CRC-32C checksum over the entire page contents (excluding the bytes where the checksum is stored, at the very end of the page). Such files will always use that checksum, no matter what the parameter innodb_checksum_algorithm is assigned to. For old files, the old checksum algorithms will continue to be used. The value strict_full_crc32 will be equivalent to strict_crc32 and the value full_crc32 will be equivalent to crc32. ROW_FORMAT=COMPRESSED tables will only use the old format. These tables do not support new features, such as larger innodb_page_size or instant ADD/DROP COLUMN. They may be deprecated in the future. We do not want an unnecessary file format change for them. The new full_crc32() format also cleans up the MariaDB tablespace flags. We will reserve flags to store the page_compressed compression algorithm, and to store the compressed payload length, so that checksum can be computed over the compressed (and possibly encrypted) stream and can be validated without decrypting or decompressing the page. In the full_crc32 format, there no longer are separate before-encryption and after-encryption checksums for pages. The single checksum is computed on the page contents that is written to the file. We do not make the new algorithm the default for two reasons. First, MariaDB 10.4.2 was a beta release, and the default values of parameters should not change after beta. Second, we did not yet implement the full_crc32 format for page_compressed pages. This will be fixed in MDEV-18644. This is joint work with Marko Mäkelä.
2019-02-19 21:00:00 +02:00
if (m_space_id == space_id
&& fil_space_t::is_flags_equal(m_flags, flags)) {
/* Datafile matches the tablespace expected. */
return(DB_SUCCESS);
}
/* else do not use this tablespace. */
m_is_valid = false;
MDEV-12873 InnoDB SYS_TABLES.TYPE incompatibility for PAGE_COMPRESSED=YES in MariaDB 10.2.2 to 10.2.6 Remove the SHARED_SPACE flag that was erroneously introduced in MariaDB 10.2.2, and shift the SYS_TABLES.TYPE flags back to where they were before MariaDB 10.2.2. While doing this, ensure that tables created with affected MariaDB versions can be loaded, and also ensure that tables created with MySQL 5.7 using the TABLESPACE attribute cannot be loaded. MariaDB 10.2.2 picked the SHARED_SPACE flag from MySQL 5.7, shifting the MariaDB 10.1 flags PAGE_COMPRESSION, PAGE_COMPRESSION_LEVEL, ATOMIC_WRITES by one bit. The SHARED_SPACE flag would always be written as 0 by MariaDB, because MariaDB does not support CREATE TABLESPACE or CREATE TABLE...TABLESPACE for InnoDB. So, instead of the bits AALLLLCxxxxxxx we would have AALLLLC0xxxxxxx if the table was created with MariaDB 10.2.2 to 10.2.6. (AA=ATOMIC_WRITES, LLLL=PAGE_COMPRESSION_LEVEL, C=PAGE_COMPRESSED, xxxxxxx=7 bits that were not moved.) PAGE_COMPRESSED=NO implies LLLLC=00000. That is not a problem. If someone created a table in MariaDB 10.2.2 or 10.2.3 with the attribute ATOMIC_WRITES=OFF (value 2; AA=10) and without PAGE_COMPRESSED=YES or PAGE_COMPRESSION_LEVEL, the table should be rejected. We ignore this problem, because it should be unlikely for anyone to specify ATOMIC_WRITES=OFF, and because 10.2.2 and 10.2.2 were not mature releases. The value ATOMIC_WRITES=ON (1) would be interpreted as ATOMIC_WRITES=OFF, but starting with MariaDB 10.2.4 the ATOMIC_WRITES attribute is ignored. PAGE_COMPRESSED=YES implies that PAGE_COMPRESSION_LEVEL be between 1 and 9 and that ROW_FORMAT be COMPACT or DYNAMIC. Thus, the affected wrong bit pattern in SYS_TABLES.TYPE is of the form AALLLL10DB00001 where D signals the presence of a DATA DIRECTORY attribute and B is 1 for ROW_FORMAT=DYNAMIC and 0 for ROW_FORMAT=COMPACT. We must interpret this bit pattern as AALLLL1DB00001 (discarding the extraneous 0 bit). dict_sys_tables_rec_read(): Adjust the affected bit pattern when reading the SYS_TABLES.TYPE column. In case of invalid flags, report both SYS_TABLES.TYPE (after possible adjustment) and SYS_TABLES.MIX_LEN. dict_load_table_one(): Replace an unreachable condition on !dict_tf2_is_valid() with a debug assertion. The flags will already have been validated by dict_sys_tables_rec_read(); if that validation fails, dict_load_table_low() will have failed. fil_ibd_create(): Shorten an error message about a file pre-existing. Datafile::validate_to_dd(): Clarify an error message about tablespace flags mismatch. ha_innobase::open(): Remove an unnecessary warning message. dict_tf_is_valid(): Simplify and stricten the logic. Validate the values of PAGE_COMPRESSION. Remove error log output; let the callers handle that. DICT_TF_BITS: Remove ATOMIC_WRITES, PAGE_ENCRYPTION, PAGE_ENCRYPTION_KEY. The ATOMIC_WRITES is ignored once the SYS_TABLES.TYPE has been validated; there is no need to store it in dict_table_t::flags. The PAGE_ENCRYPTION and PAGE_ENCRYPTION_KEY are unused since MariaDB 10.1.4 (the GA release was 10.1.8). DICT_TF_BIT_MASK: Remove (unused). FSP_FLAGS_MEM_ATOMIC_WRITES: Remove (the flags are never read). row_import_read_v1(): Display an error if dict_tf_is_valid() fails.
2017-06-14 14:08:49 +03:00
ib::error() << "Refusing to load '" << m_filepath << "' (id="
<< m_space_id << ", flags=" << ib::hex(m_flags)
<< "); dictionary contains id="
<< space_id << ", flags=" << ib::hex(flags);
return(DB_ERROR);
}
/** Validates this datafile for the purpose of recovery. The file should
exist and be successfully opened. We initially open it in read-only mode
because we just want to read the SpaceID. However, if the first page is
corrupt and needs to be restored from the doublewrite buffer, we will
reopen it in write mode and ry to restore that page.
@retval DB_SUCCESS if tablespace is valid, DB_ERROR if not.
m_is_valid is also set true on success, else false. */
dberr_t
Datafile::validate_for_recovery()
{
dberr_t err;
ut_ad(is_open());
ut_ad(!srv_read_only_mode);
err = validate_first_page(0);
switch (err) {
case DB_SUCCESS:
case DB_TABLESPACE_EXISTS:
break;
default:
/* Re-open the file in read-write mode Attempt to restore
page 0 from doublewrite and read the space ID from a survey
of the first few pages. */
close();
err = open_read_write(srv_read_only_mode);
if (err != DB_SUCCESS) {
return(err);
}
err = find_space_id();
if (err != DB_SUCCESS || m_space_id == 0) {
ib::error() << "Datafile '" << m_filepath << "' is"
" corrupted. Cannot determine the space ID from"
" the first 64 pages.";
return(err);
}
if (restore_from_doublewrite()) {
return(DB_CORRUPTION);
}
/* Free the previously read first page and then re-validate. */
free_first_page();
err = validate_first_page(0);
}
if (err == DB_SUCCESS) {
set_name(NULL);
}
return(err);
}
/** Check the consistency of the first page of a datafile when the
tablespace is opened. This occurs before the fil_space_t is created
so the Space ID found here must not already be open.
m_is_valid is set true on success, else false.
@param[out] flush_lsn contents of FIL_PAGE_FILE_FLUSH_LSN
@retval DB_SUCCESS on if the datafile is valid
@retval DB_CORRUPTION if the datafile is not readable
@retval DB_TABLESPACE_EXISTS if there is a duplicate space_id */
dberr_t
Datafile::validate_first_page(lsn_t* flush_lsn)
{
char* prev_name;
char* prev_filepath;
const char* error_txt = NULL;
m_is_valid = true;
if (m_first_page == NULL
&& read_first_page(srv_read_only_mode) != DB_SUCCESS) {
error_txt = "Cannot read first page";
} else {
ut_ad(m_first_page_buf);
ut_ad(m_first_page);
if (flush_lsn != NULL) {
*flush_lsn = mach_read_from_8(
m_first_page + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION);
}
}
if (error_txt != NULL) {
err_exit:
ib::error() << error_txt << " in datafile: " << m_filepath
<< ", Space ID:" << m_space_id << ", Flags: "
<< m_flags << ". " << TROUBLESHOOT_DATADICT_MSG;
m_is_valid = false;
free_first_page();
return(DB_CORRUPTION);
}
/* Check if the whole page is blank. */
if (!m_space_id && !m_flags) {
const byte* b = m_first_page;
ulint nonzero_bytes = srv_page_size;
while (*b == '\0' && --nonzero_bytes != 0) {
b++;
}
if (nonzero_bytes == 0) {
error_txt = "Header page consists of zero bytes";
goto err_exit;
}
}
MDEV-12026: Implement innodb_checksum_algorithm=full_crc32 MariaDB data-at-rest encryption (innodb_encrypt_tables) had repurposed the same unused data field that was repurposed in MySQL 5.7 (and MariaDB 10.2) for the Split Sequence Number (SSN) field of SPATIAL INDEX. Because of this, MariaDB was unable to support encryption on SPATIAL INDEX pages. Furthermore, InnoDB page checksums skipped some bytes, and there are multiple variations and checksum algorithms. By default, InnoDB accepts all variations of all algorithms that ever existed. This unnecessarily weakens the page checksums. We hereby introduce two more innodb_checksum_algorithm variants (full_crc32, strict_full_crc32) that are special in a way: When either setting is active, newly created data files will carry a flag (fil_space_t::full_crc32()) that indicates that all pages of the file will use a full CRC-32C checksum over the entire page contents (excluding the bytes where the checksum is stored, at the very end of the page). Such files will always use that checksum, no matter what the parameter innodb_checksum_algorithm is assigned to. For old files, the old checksum algorithms will continue to be used. The value strict_full_crc32 will be equivalent to strict_crc32 and the value full_crc32 will be equivalent to crc32. ROW_FORMAT=COMPRESSED tables will only use the old format. These tables do not support new features, such as larger innodb_page_size or instant ADD/DROP COLUMN. They may be deprecated in the future. We do not want an unnecessary file format change for them. The new full_crc32() format also cleans up the MariaDB tablespace flags. We will reserve flags to store the page_compressed compression algorithm, and to store the compressed payload length, so that checksum can be computed over the compressed (and possibly encrypted) stream and can be validated without decrypting or decompressing the page. In the full_crc32 format, there no longer are separate before-encryption and after-encryption checksums for pages. The single checksum is computed on the page contents that is written to the file. We do not make the new algorithm the default for two reasons. First, MariaDB 10.4.2 was a beta release, and the default values of parameters should not change after beta. Second, we did not yet implement the full_crc32 format for page_compressed pages. This will be fixed in MDEV-18644. This is joint work with Marko Mäkelä.
2019-02-19 21:00:00 +02:00
if (!fil_space_t::is_valid_flags(m_flags, m_space_id)) {
/* Tablespace flags must be valid. */
error_txt = "Tablespace flags are invalid";
goto err_exit;
}
ulint logical_size = fil_space_t::logical_size(m_flags);
if (srv_page_size != logical_size) {
/* Logical size must be innodb_page_size. */
ib::error()
<< "Data file '" << m_filepath << "' uses page size "
<< logical_size << ", but the innodb_page_size"
" start-up parameter is "
<< srv_page_size;
free_first_page();
return(DB_ERROR);
}
if (page_get_page_no(m_first_page) != 0) {
/* First page must be number 0 */
error_txt = "Header page contains inconsistent data";
goto err_exit;
}
if (m_space_id == ULINT_UNDEFINED) {
/* The space_id can be most anything, except -1. */
error_txt = "A bad Space ID was found";
goto err_exit;
}
MDEV-12026: Implement innodb_checksum_algorithm=full_crc32 MariaDB data-at-rest encryption (innodb_encrypt_tables) had repurposed the same unused data field that was repurposed in MySQL 5.7 (and MariaDB 10.2) for the Split Sequence Number (SSN) field of SPATIAL INDEX. Because of this, MariaDB was unable to support encryption on SPATIAL INDEX pages. Furthermore, InnoDB page checksums skipped some bytes, and there are multiple variations and checksum algorithms. By default, InnoDB accepts all variations of all algorithms that ever existed. This unnecessarily weakens the page checksums. We hereby introduce two more innodb_checksum_algorithm variants (full_crc32, strict_full_crc32) that are special in a way: When either setting is active, newly created data files will carry a flag (fil_space_t::full_crc32()) that indicates that all pages of the file will use a full CRC-32C checksum over the entire page contents (excluding the bytes where the checksum is stored, at the very end of the page). Such files will always use that checksum, no matter what the parameter innodb_checksum_algorithm is assigned to. For old files, the old checksum algorithms will continue to be used. The value strict_full_crc32 will be equivalent to strict_crc32 and the value full_crc32 will be equivalent to crc32. ROW_FORMAT=COMPRESSED tables will only use the old format. These tables do not support new features, such as larger innodb_page_size or instant ADD/DROP COLUMN. They may be deprecated in the future. We do not want an unnecessary file format change for them. The new full_crc32() format also cleans up the MariaDB tablespace flags. We will reserve flags to store the page_compressed compression algorithm, and to store the compressed payload length, so that checksum can be computed over the compressed (and possibly encrypted) stream and can be validated without decrypting or decompressing the page. In the full_crc32 format, there no longer are separate before-encryption and after-encryption checksums for pages. The single checksum is computed on the page contents that is written to the file. We do not make the new algorithm the default for two reasons. First, MariaDB 10.4.2 was a beta release, and the default values of parameters should not change after beta. Second, we did not yet implement the full_crc32 format for page_compressed pages. This will be fixed in MDEV-18644. This is joint work with Marko Mäkelä.
2019-02-19 21:00:00 +02:00
if (buf_page_is_corrupted(false, m_first_page, m_flags)) {
/* Look for checksum and other corruptions. */
error_txt = "Checksum mismatch";
goto err_exit;
}
if (fil_space_read_name_and_filepath(
m_space_id, &prev_name, &prev_filepath)) {
if (0 == strcmp(m_filepath, prev_filepath)) {
ut_free(prev_name);
ut_free(prev_filepath);
return(DB_SUCCESS);
}
/* Make sure the space_id has not already been opened. */
ib::error() << "Attempted to open a previously opened"
" tablespace. Previous tablespace " << prev_name
<< " at filepath: " << prev_filepath
<< " uses space ID: " << m_space_id
<< ". Cannot open filepath: " << m_filepath
<< " which uses the same space ID.";
ut_free(prev_name);
ut_free(prev_filepath);
m_is_valid = false;
free_first_page();
return(is_predefined_tablespace(m_space_id)
? DB_CORRUPTION
: DB_TABLESPACE_EXISTS);
}
return(DB_SUCCESS);
}
/** Determine the space id of the given file descriptor by reading a few
pages from the beginning of the .ibd file.
@return DB_SUCCESS if space id was successfully identified, else DB_ERROR. */
dberr_t
Datafile::find_space_id()
{
os_offset_t file_size;
ut_ad(m_handle != OS_FILE_CLOSED);
file_size = os_file_get_size(m_handle);
if (file_size == (os_offset_t) -1) {
ib::error() << "Could not get file size of datafile '"
<< m_filepath << "'";
return(DB_CORRUPTION);
}
/* Assuming a page size, read the space_id from each page and store it
in a map. Find out which space_id is agreed on by majority of the
pages. Choose that space_id. */
for (ulint page_size = UNIV_ZIP_SIZE_MIN;
page_size <= UNIV_PAGE_SIZE_MAX;
page_size <<= 1) {
/* map[space_id] = count of pages */
typedef std::map<
ulint,
ulint,
std::less<ulint>,
ut_allocator<std::pair<const ulint, ulint> > >
Pages;
Pages verify;
ulint page_count = 64;
ulint valid_pages = 0;
/* Adjust the number of pages to analyze based on file size */
while ((page_count * page_size) > file_size) {
--page_count;
}
ib::info()
<< "Page size:" << page_size
<< ". Pages to analyze:" << page_count;
byte* buf = static_cast<byte*>(
ut_malloc_nokey(2 * UNIV_PAGE_SIZE_MAX));
byte* page = static_cast<byte*>(
ut_align(buf, UNIV_SECTOR_SIZE));
MDEV-12026: Implement innodb_checksum_algorithm=full_crc32 MariaDB data-at-rest encryption (innodb_encrypt_tables) had repurposed the same unused data field that was repurposed in MySQL 5.7 (and MariaDB 10.2) for the Split Sequence Number (SSN) field of SPATIAL INDEX. Because of this, MariaDB was unable to support encryption on SPATIAL INDEX pages. Furthermore, InnoDB page checksums skipped some bytes, and there are multiple variations and checksum algorithms. By default, InnoDB accepts all variations of all algorithms that ever existed. This unnecessarily weakens the page checksums. We hereby introduce two more innodb_checksum_algorithm variants (full_crc32, strict_full_crc32) that are special in a way: When either setting is active, newly created data files will carry a flag (fil_space_t::full_crc32()) that indicates that all pages of the file will use a full CRC-32C checksum over the entire page contents (excluding the bytes where the checksum is stored, at the very end of the page). Such files will always use that checksum, no matter what the parameter innodb_checksum_algorithm is assigned to. For old files, the old checksum algorithms will continue to be used. The value strict_full_crc32 will be equivalent to strict_crc32 and the value full_crc32 will be equivalent to crc32. ROW_FORMAT=COMPRESSED tables will only use the old format. These tables do not support new features, such as larger innodb_page_size or instant ADD/DROP COLUMN. They may be deprecated in the future. We do not want an unnecessary file format change for them. The new full_crc32() format also cleans up the MariaDB tablespace flags. We will reserve flags to store the page_compressed compression algorithm, and to store the compressed payload length, so that checksum can be computed over the compressed (and possibly encrypted) stream and can be validated without decrypting or decompressing the page. In the full_crc32 format, there no longer are separate before-encryption and after-encryption checksums for pages. The single checksum is computed on the page contents that is written to the file. We do not make the new algorithm the default for two reasons. First, MariaDB 10.4.2 was a beta release, and the default values of parameters should not change after beta. Second, we did not yet implement the full_crc32 format for page_compressed pages. This will be fixed in MDEV-18644. This is joint work with Marko Mäkelä.
2019-02-19 21:00:00 +02:00
ulint fsp_flags;
for (ulint j = 0; j < page_count; ++j) {
dberr_t err;
ulint n_bytes = j * page_size;
IORequest request(IORequest::READ);
err = os_file_read(
request, m_handle, page, n_bytes, page_size);
if (err != DB_SUCCESS) {
ib::info()
<< "READ FAIL: page_no:" << j;
continue;
}
MDEV-12026: Implement innodb_checksum_algorithm=full_crc32 MariaDB data-at-rest encryption (innodb_encrypt_tables) had repurposed the same unused data field that was repurposed in MySQL 5.7 (and MariaDB 10.2) for the Split Sequence Number (SSN) field of SPATIAL INDEX. Because of this, MariaDB was unable to support encryption on SPATIAL INDEX pages. Furthermore, InnoDB page checksums skipped some bytes, and there are multiple variations and checksum algorithms. By default, InnoDB accepts all variations of all algorithms that ever existed. This unnecessarily weakens the page checksums. We hereby introduce two more innodb_checksum_algorithm variants (full_crc32, strict_full_crc32) that are special in a way: When either setting is active, newly created data files will carry a flag (fil_space_t::full_crc32()) that indicates that all pages of the file will use a full CRC-32C checksum over the entire page contents (excluding the bytes where the checksum is stored, at the very end of the page). Such files will always use that checksum, no matter what the parameter innodb_checksum_algorithm is assigned to. For old files, the old checksum algorithms will continue to be used. The value strict_full_crc32 will be equivalent to strict_crc32 and the value full_crc32 will be equivalent to crc32. ROW_FORMAT=COMPRESSED tables will only use the old format. These tables do not support new features, such as larger innodb_page_size or instant ADD/DROP COLUMN. They may be deprecated in the future. We do not want an unnecessary file format change for them. The new full_crc32() format also cleans up the MariaDB tablespace flags. We will reserve flags to store the page_compressed compression algorithm, and to store the compressed payload length, so that checksum can be computed over the compressed (and possibly encrypted) stream and can be validated without decrypting or decompressing the page. In the full_crc32 format, there no longer are separate before-encryption and after-encryption checksums for pages. The single checksum is computed on the page contents that is written to the file. We do not make the new algorithm the default for two reasons. First, MariaDB 10.4.2 was a beta release, and the default values of parameters should not change after beta. Second, we did not yet implement the full_crc32 format for page_compressed pages. This will be fixed in MDEV-18644. This is joint work with Marko Mäkelä.
2019-02-19 21:00:00 +02:00
if (j == 0) {
fsp_flags = mach_read_from_4(
page + FSP_HEADER_OFFSET + FSP_SPACE_FLAGS);
}
bool noncompressed_ok = false;
/* For noncompressed pages, the page size must be
equal to srv_page_size. */
if (page_size == srv_page_size) {
noncompressed_ok = !buf_page_is_corrupted(
MDEV-12026: Implement innodb_checksum_algorithm=full_crc32 MariaDB data-at-rest encryption (innodb_encrypt_tables) had repurposed the same unused data field that was repurposed in MySQL 5.7 (and MariaDB 10.2) for the Split Sequence Number (SSN) field of SPATIAL INDEX. Because of this, MariaDB was unable to support encryption on SPATIAL INDEX pages. Furthermore, InnoDB page checksums skipped some bytes, and there are multiple variations and checksum algorithms. By default, InnoDB accepts all variations of all algorithms that ever existed. This unnecessarily weakens the page checksums. We hereby introduce two more innodb_checksum_algorithm variants (full_crc32, strict_full_crc32) that are special in a way: When either setting is active, newly created data files will carry a flag (fil_space_t::full_crc32()) that indicates that all pages of the file will use a full CRC-32C checksum over the entire page contents (excluding the bytes where the checksum is stored, at the very end of the page). Such files will always use that checksum, no matter what the parameter innodb_checksum_algorithm is assigned to. For old files, the old checksum algorithms will continue to be used. The value strict_full_crc32 will be equivalent to strict_crc32 and the value full_crc32 will be equivalent to crc32. ROW_FORMAT=COMPRESSED tables will only use the old format. These tables do not support new features, such as larger innodb_page_size or instant ADD/DROP COLUMN. They may be deprecated in the future. We do not want an unnecessary file format change for them. The new full_crc32() format also cleans up the MariaDB tablespace flags. We will reserve flags to store the page_compressed compression algorithm, and to store the compressed payload length, so that checksum can be computed over the compressed (and possibly encrypted) stream and can be validated without decrypting or decompressing the page. In the full_crc32 format, there no longer are separate before-encryption and after-encryption checksums for pages. The single checksum is computed on the page contents that is written to the file. We do not make the new algorithm the default for two reasons. First, MariaDB 10.4.2 was a beta release, and the default values of parameters should not change after beta. Second, we did not yet implement the full_crc32 format for page_compressed pages. This will be fixed in MDEV-18644. This is joint work with Marko Mäkelä.
2019-02-19 21:00:00 +02:00
false, page, fsp_flags);
}
bool compressed_ok = false;
if (srv_page_size <= UNIV_PAGE_SIZE_DEF
&& page_size <= srv_page_size) {
compressed_ok = !buf_page_is_corrupted(
MDEV-12026: Implement innodb_checksum_algorithm=full_crc32 MariaDB data-at-rest encryption (innodb_encrypt_tables) had repurposed the same unused data field that was repurposed in MySQL 5.7 (and MariaDB 10.2) for the Split Sequence Number (SSN) field of SPATIAL INDEX. Because of this, MariaDB was unable to support encryption on SPATIAL INDEX pages. Furthermore, InnoDB page checksums skipped some bytes, and there are multiple variations and checksum algorithms. By default, InnoDB accepts all variations of all algorithms that ever existed. This unnecessarily weakens the page checksums. We hereby introduce two more innodb_checksum_algorithm variants (full_crc32, strict_full_crc32) that are special in a way: When either setting is active, newly created data files will carry a flag (fil_space_t::full_crc32()) that indicates that all pages of the file will use a full CRC-32C checksum over the entire page contents (excluding the bytes where the checksum is stored, at the very end of the page). Such files will always use that checksum, no matter what the parameter innodb_checksum_algorithm is assigned to. For old files, the old checksum algorithms will continue to be used. The value strict_full_crc32 will be equivalent to strict_crc32 and the value full_crc32 will be equivalent to crc32. ROW_FORMAT=COMPRESSED tables will only use the old format. These tables do not support new features, such as larger innodb_page_size or instant ADD/DROP COLUMN. They may be deprecated in the future. We do not want an unnecessary file format change for them. The new full_crc32() format also cleans up the MariaDB tablespace flags. We will reserve flags to store the page_compressed compression algorithm, and to store the compressed payload length, so that checksum can be computed over the compressed (and possibly encrypted) stream and can be validated without decrypting or decompressing the page. In the full_crc32 format, there no longer are separate before-encryption and after-encryption checksums for pages. The single checksum is computed on the page contents that is written to the file. We do not make the new algorithm the default for two reasons. First, MariaDB 10.4.2 was a beta release, and the default values of parameters should not change after beta. Second, we did not yet implement the full_crc32 format for page_compressed pages. This will be fixed in MDEV-18644. This is joint work with Marko Mäkelä.
2019-02-19 21:00:00 +02:00
false, page, fsp_flags);
}
if (noncompressed_ok || compressed_ok) {
ulint space_id = mach_read_from_4(page
+ FIL_PAGE_SPACE_ID);
if (space_id > 0) {
ib::info()
<< "VALID: space:"
<< space_id << " page_no:" << j
<< " page_size:" << page_size;
++valid_pages;
++verify[space_id];
}
}
}
ut_free(buf);
ib::info()
<< "Page size: " << page_size
<< ". Possible space_id count:" << verify.size();
const ulint pages_corrupted = 3;
for (ulint missed = 0; missed <= pages_corrupted; ++missed) {
for (Pages::const_iterator it = verify.begin();
it != verify.end();
++it) {
ib::info() << "space_id:" << it->first
<< ", Number of pages matched: "
<< it->second << "/" << valid_pages
<< " (" << page_size << ")";
if (it->second == (valid_pages - missed)) {
ib::info() << "Chosen space:"
<< it->first;
m_space_id = it->first;
return(DB_SUCCESS);
}
}
}
}
return(DB_CORRUPTION);
}
/** Restore the first page of the tablespace from
the double write buffer.
@return whether the operation failed */
bool
Datafile::restore_from_doublewrite()
{
if (srv_operation != SRV_OPERATION_NORMAL) {
return true;
}
/* Find if double write buffer contains page_no of given space id. */
const byte* page = recv_sys->dblwr.find_page(m_space_id, 0);
const page_id_t page_id(m_space_id, 0);
if (page == NULL) {
/* If the first page of the given user tablespace is not there
in the doublewrite buffer, then the recovery is going to fail
now. Hence this is treated as an error. */
ib::error()
<< "Corrupted page " << page_id
<< " of datafile '" << m_filepath
<< "' could not be found in the doublewrite buffer.";
return(true);
}
ulint flags = mach_read_from_4(
FSP_HEADER_OFFSET + FSP_SPACE_FLAGS + page);
MDEV-12026: Implement innodb_checksum_algorithm=full_crc32 MariaDB data-at-rest encryption (innodb_encrypt_tables) had repurposed the same unused data field that was repurposed in MySQL 5.7 (and MariaDB 10.2) for the Split Sequence Number (SSN) field of SPATIAL INDEX. Because of this, MariaDB was unable to support encryption on SPATIAL INDEX pages. Furthermore, InnoDB page checksums skipped some bytes, and there are multiple variations and checksum algorithms. By default, InnoDB accepts all variations of all algorithms that ever existed. This unnecessarily weakens the page checksums. We hereby introduce two more innodb_checksum_algorithm variants (full_crc32, strict_full_crc32) that are special in a way: When either setting is active, newly created data files will carry a flag (fil_space_t::full_crc32()) that indicates that all pages of the file will use a full CRC-32C checksum over the entire page contents (excluding the bytes where the checksum is stored, at the very end of the page). Such files will always use that checksum, no matter what the parameter innodb_checksum_algorithm is assigned to. For old files, the old checksum algorithms will continue to be used. The value strict_full_crc32 will be equivalent to strict_crc32 and the value full_crc32 will be equivalent to crc32. ROW_FORMAT=COMPRESSED tables will only use the old format. These tables do not support new features, such as larger innodb_page_size or instant ADD/DROP COLUMN. They may be deprecated in the future. We do not want an unnecessary file format change for them. The new full_crc32() format also cleans up the MariaDB tablespace flags. We will reserve flags to store the page_compressed compression algorithm, and to store the compressed payload length, so that checksum can be computed over the compressed (and possibly encrypted) stream and can be validated without decrypting or decompressing the page. In the full_crc32 format, there no longer are separate before-encryption and after-encryption checksums for pages. The single checksum is computed on the page contents that is written to the file. We do not make the new algorithm the default for two reasons. First, MariaDB 10.4.2 was a beta release, and the default values of parameters should not change after beta. Second, we did not yet implement the full_crc32 format for page_compressed pages. This will be fixed in MDEV-18644. This is joint work with Marko Mäkelä.
2019-02-19 21:00:00 +02:00
if (!fil_space_t::is_valid_flags(flags, m_space_id)) {
ulint cflags = fsp_flags_convert_from_101(flags);
if (cflags == ULINT_UNDEFINED) {
ib::warn()
<< "Ignoring a doublewrite copy of page "
<< page_id
<< " due to invalid flags " << ib::hex(flags);
return(true);
}
flags = cflags;
/* The flags on the page should be converted later. */
}
ulint physical_size = fil_space_t::physical_size(flags);
ut_a(page_get_page_no(page) == page_id.page_no());
ib::info() << "Restoring page " << page_id
<< " of datafile '" << m_filepath
<< "' from the doublewrite buffer. Writing "
<< physical_size << " bytes into file '"
<< m_filepath << "'";
IORequest request(IORequest::WRITE);
return(os_file_write(
request,
m_filepath, m_handle, page, 0, physical_size)
!= DB_SUCCESS);
}
/** Create a link filename based on the contents of m_name,
open that file, and read the contents into m_filepath.
@retval DB_SUCCESS if remote linked tablespace file is opened and read.
@retval DB_CANNOT_OPEN_FILE if the link file does not exist. */
dberr_t
RemoteDatafile::open_link_file()
{
if (m_link_filepath == NULL) {
m_link_filepath = fil_make_filepath(NULL, name(), ISL, false);
}
m_filepath = read_link_file(m_link_filepath);
return(m_filepath == NULL ? DB_CANNOT_OPEN_FILE : DB_SUCCESS);
}
/** Opens a handle to the file linked to in an InnoDB Symbolic Link file
in read-only mode so that it can be validated.
@param[in] strict whether to issue error messages
@return DB_SUCCESS if remote linked tablespace file is found and opened. */
dberr_t
RemoteDatafile::open_read_only(bool strict)
{
if (m_filepath == NULL && open_link_file() == DB_CANNOT_OPEN_FILE) {
return(DB_ERROR);
}
dberr_t err = Datafile::open_read_only(strict);
if (err != DB_SUCCESS && strict) {
/* The following call prints an error message */
os_file_get_last_error(true);
ib::error() << "A link file was found named '"
<< m_link_filepath << "' but the linked tablespace '"
<< m_filepath << "' could not be opened read-only.";
}
return(err);
}
/** Opens a handle to the file linked to in an InnoDB Symbolic Link file
in read-write mode so that it can be restored from doublewrite and validated.
@param[in] read_only_mode If true, then readonly mode checks are enforced.
@return DB_SUCCESS if remote linked tablespace file is found and opened. */
dberr_t
RemoteDatafile::open_read_write(bool read_only_mode)
{
if (m_filepath == NULL && open_link_file() == DB_CANNOT_OPEN_FILE) {
return(DB_ERROR);
}
dberr_t err = Datafile::open_read_write(read_only_mode);
if (err != DB_SUCCESS) {
/* The following call prints an error message */
m_last_os_error = os_file_get_last_error(true);
ib::error() << "A link file was found named '"
<< m_link_filepath << "' but the linked data file '"
<< m_filepath << "' could not be opened for writing.";
}
return(err);
}
/** Release the resources. */
void
RemoteDatafile::shutdown()
{
Datafile::shutdown();
if (m_link_filepath != 0) {
ut_free(m_link_filepath);
m_link_filepath = 0;
}
}
/** Creates a new InnoDB Symbolic Link (ISL) file. It is always created
under the 'datadir' of MySQL. The datadir is the directory of a
running mysqld program. We can refer to it by simply using the path ".".
@param[in] name tablespace name
@param[in] filepath remote filepath of tablespace datafile
@return DB_SUCCESS or error code */
dberr_t
RemoteDatafile::create_link_file(
const char* name,
const char* filepath)
{
bool success;
dberr_t err = DB_SUCCESS;
char* link_filepath = NULL;
char* prev_filepath = NULL;
ut_ad(!srv_read_only_mode);
ut_ad(0 == strcmp(&filepath[strlen(filepath) - 4], DOT_IBD));
link_filepath = fil_make_filepath(NULL, name, ISL, false);
if (link_filepath == NULL) {
return(DB_ERROR);
}
prev_filepath = read_link_file(link_filepath);
if (prev_filepath) {
/* Truncate will call this with an existing
link file which contains the same filepath. */
bool same = !strcmp(prev_filepath, filepath);
ut_free(prev_filepath);
if (same) {
ut_free(link_filepath);
return(DB_SUCCESS);
}
}
/** Check if the file already exists. */
FILE* file = NULL;
bool exists;
os_file_type_t ftype;
success = os_file_status(link_filepath, &exists, &ftype);
ulint error = 0;
if (success && !exists) {
file = fopen(link_filepath, "w");
if (file == NULL) {
/* This call will print its own error message */
error = os_file_get_last_error(true);
}
} else {
error = OS_FILE_ALREADY_EXISTS;
}
if (error != 0) {
ib::error() << "Cannot create file " << link_filepath << ".";
if (error == OS_FILE_ALREADY_EXISTS) {
ib::error() << "The link file: " << link_filepath
<< " already exists.";
err = DB_TABLESPACE_EXISTS;
} else if (error == OS_FILE_DISK_FULL) {
err = DB_OUT_OF_FILE_SPACE;
} else {
err = DB_ERROR;
}
/* file is not open, no need to close it. */
ut_free(link_filepath);
return(err);
}
ulint rbytes = fwrite(filepath, 1, strlen(filepath), file);
if (rbytes != strlen(filepath)) {
error = os_file_get_last_error(true);
ib::error() <<
"Cannot write link file: "
<< link_filepath << " filepath: " << filepath;
err = DB_ERROR;
}
/* Close the file, we only need it at startup */
fclose(file);
ut_free(link_filepath);
return(err);
}
/** Delete an InnoDB Symbolic Link (ISL) file. */
void
RemoteDatafile::delete_link_file(void)
{
ut_ad(m_link_filepath != NULL);
if (m_link_filepath != NULL) {
os_file_delete_if_exists(innodb_data_file_key,
m_link_filepath, NULL);
}
}
/** Delete an InnoDB Symbolic Link (ISL) file by name.
@param[in] name tablespace name */
void
RemoteDatafile::delete_link_file(
const char* name)
{
char* link_filepath = fil_make_filepath(NULL, name, ISL, false);
if (link_filepath != NULL) {
os_file_delete_if_exists(
innodb_data_file_key, link_filepath, NULL);
ut_free(link_filepath);
}
}
/** Read an InnoDB Symbolic Link (ISL) file by name.
It is always created under the datadir of MySQL.
For file-per-table tablespaces, the isl file is expected to be
in a 'database' directory and called 'tablename.isl'.
The caller must free the memory returned if it is not null.
@param[in] link_filepath filepath of the ISL file
@return Filepath of the IBD file read from the ISL file */
char*
RemoteDatafile::read_link_file(
const char* link_filepath)
{
FILE* file = fopen(link_filepath, "r+b" STR_O_CLOEXEC);
if (file == NULL) {
return(NULL);
}
char* filepath = static_cast<char*>(ut_malloc_nokey(OS_FILE_MAX_PATH));
os_file_read_string(file, filepath, OS_FILE_MAX_PATH);
fclose(file);
if (filepath[0] != '\0') {
/* Trim whitespace from end of filepath */
ulint last_ch = strlen(filepath) - 1;
while (last_ch > 4 && filepath[last_ch] <= 0x20) {
filepath[last_ch--] = 0x00;
}
os_normalize_path(filepath);
}
return(filepath);
}