2015-03-02 10:55:48 +02:00
|
|
|
/*****************************************************************************
|
|
|
|
Copyright (C) 2013, 2015, Google Inc. All Rights Reserved.
|
2018-04-06 12:55:43 +03:00
|
|
|
Copyright (c) 2014, 2018, MariaDB Corporation. All Rights Reserved.
|
2015-03-02 10:55:48 +02:00
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify it under
|
|
|
|
the terms of the GNU General Public License as published by the Free Software
|
|
|
|
Foundation; version 2 of the License.
|
|
|
|
|
|
|
|
This program is distributed in the hope that it will be useful, but WITHOUT
|
|
|
|
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
|
|
|
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU General Public License along with
|
|
|
|
this program; if not, write to the Free Software Foundation, Inc.,
|
|
|
|
51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
|
|
|
|
|
|
|
*****************************************************************************/
|
|
|
|
/**************************************************//**
|
|
|
|
@file fil0crypt.cc
|
|
|
|
Innodb file space encrypt/decrypt
|
|
|
|
|
|
|
|
Created Jonas Oreland Google
|
|
|
|
Modified Jan Lindström jan.lindstrom@mariadb.com
|
|
|
|
*******************************************************/
|
|
|
|
|
2014-12-22 16:53:17 +02:00
|
|
|
#include "fil0fil.h"
|
2017-08-04 13:57:26 +03:00
|
|
|
#include "mtr0types.h"
|
2017-01-03 14:35:08 +02:00
|
|
|
#include "mach0data.h"
|
|
|
|
#include "page0size.h"
|
|
|
|
#include "page0zip.h"
|
|
|
|
#ifndef UNIV_INNOCHECKSUM
|
2015-04-09 19:27:40 +02:00
|
|
|
#include "fil0crypt.h"
|
2014-12-22 16:53:17 +02:00
|
|
|
#include "srv0srv.h"
|
|
|
|
#include "srv0start.h"
|
|
|
|
#include "log0recv.h"
|
|
|
|
#include "mtr0mtr.h"
|
|
|
|
#include "mtr0log.h"
|
|
|
|
#include "ut0ut.h"
|
|
|
|
#include "btr0scrub.h"
|
|
|
|
#include "fsp0fsp.h"
|
|
|
|
#include "fil0pagecompress.h"
|
|
|
|
#include <my_crypt.h>
|
|
|
|
|
|
|
|
/** Mutex for keys */
|
2016-12-22 14:20:47 +02:00
|
|
|
static ib_mutex_t fil_crypt_key_mutex;
|
2014-12-22 16:53:17 +02:00
|
|
|
|
2015-05-17 14:14:16 +03:00
|
|
|
static bool fil_crypt_threads_inited = false;
|
|
|
|
|
2014-12-22 16:53:17 +02:00
|
|
|
/** Is encryption enabled/disabled */
|
2015-04-09 21:04:05 +02:00
|
|
|
UNIV_INTERN ulong srv_encrypt_tables = 0;
|
2014-12-22 16:53:17 +02:00
|
|
|
|
|
|
|
/** No of key rotation threads requested */
|
|
|
|
UNIV_INTERN uint srv_n_fil_crypt_threads = 0;
|
|
|
|
|
|
|
|
/** No of key rotation threads started */
|
MDEV-11638 Encryption causes race conditions in InnoDB shutdown
InnoDB shutdown failed to properly take fil_crypt_thread() into account.
The encryption threads were signalled to shut down together with other
non-critical tasks. This could be much too early in case of slow shutdown,
which could need minutes to complete the purge. Furthermore, InnoDB
failed to wait for the fil_crypt_thread() to actually exit before
proceeding to the final steps of shutdown, causing the race conditions.
Furthermore, the log_scrub_thread() was shut down way too early.
Also it should remain until the SRV_SHUTDOWN_FLUSH_PHASE.
fil_crypt_threads_end(): Remove. This would cause the threads to
be terminated way too early.
srv_buf_dump_thread_active, srv_dict_stats_thread_active,
lock_sys->timeout_thread_active, log_scrub_thread_active,
srv_monitor_active, srv_error_monitor_active: Remove a race condition
between startup and shutdown, by setting these in the startup thread
that creates threads, not in each created thread. In this way, once the
flag is cleared, it will remain cleared during shutdown.
srv_n_fil_crypt_threads_started, fil_crypt_threads_event: Declare in
global rather than static scope.
log_scrub_event, srv_log_scrub_thread_active, log_scrub_thread():
Declare in static rather than global scope. Let these be created by
log_init() and freed by log_shutdown().
rotate_thread_t::should_shutdown(): Do not shut down before the
SRV_SHUTDOWN_FLUSH_PHASE.
srv_any_background_threads_are_active(): Remove. These checks now
exist in logs_empty_and_mark_files_at_shutdown().
logs_empty_and_mark_files_at_shutdown(): Shut down the threads in
the proper order. Keep fil_crypt_thread() and log_scrub_thread() alive
until SRV_SHUTDOWN_FLUSH_PHASE, and check that they actually terminate.
2017-01-04 18:43:32 +02:00
|
|
|
UNIV_INTERN uint srv_n_fil_crypt_threads_started = 0;
|
2014-12-22 16:53:17 +02:00
|
|
|
|
|
|
|
/** At this age or older a space/page will be rotated */
|
2017-03-14 12:56:01 +02:00
|
|
|
UNIV_INTERN uint srv_fil_crypt_rotate_key_age;
|
2014-12-22 16:53:17 +02:00
|
|
|
|
|
|
|
/** Event to signal FROM the key rotation threads. */
|
2016-12-22 14:20:47 +02:00
|
|
|
static os_event_t fil_crypt_event;
|
2014-12-22 16:53:17 +02:00
|
|
|
|
|
|
|
/** Event to signal TO the key rotation threads. */
|
MDEV-11638 Encryption causes race conditions in InnoDB shutdown
InnoDB shutdown failed to properly take fil_crypt_thread() into account.
The encryption threads were signalled to shut down together with other
non-critical tasks. This could be much too early in case of slow shutdown,
which could need minutes to complete the purge. Furthermore, InnoDB
failed to wait for the fil_crypt_thread() to actually exit before
proceeding to the final steps of shutdown, causing the race conditions.
Furthermore, the log_scrub_thread() was shut down way too early.
Also it should remain until the SRV_SHUTDOWN_FLUSH_PHASE.
fil_crypt_threads_end(): Remove. This would cause the threads to
be terminated way too early.
srv_buf_dump_thread_active, srv_dict_stats_thread_active,
lock_sys->timeout_thread_active, log_scrub_thread_active,
srv_monitor_active, srv_error_monitor_active: Remove a race condition
between startup and shutdown, by setting these in the startup thread
that creates threads, not in each created thread. In this way, once the
flag is cleared, it will remain cleared during shutdown.
srv_n_fil_crypt_threads_started, fil_crypt_threads_event: Declare in
global rather than static scope.
log_scrub_event, srv_log_scrub_thread_active, log_scrub_thread():
Declare in static rather than global scope. Let these be created by
log_init() and freed by log_shutdown().
rotate_thread_t::should_shutdown(): Do not shut down before the
SRV_SHUTDOWN_FLUSH_PHASE.
srv_any_background_threads_are_active(): Remove. These checks now
exist in logs_empty_and_mark_files_at_shutdown().
logs_empty_and_mark_files_at_shutdown(): Shut down the threads in
the proper order. Keep fil_crypt_thread() and log_scrub_thread() alive
until SRV_SHUTDOWN_FLUSH_PHASE, and check that they actually terminate.
2017-01-04 18:43:32 +02:00
|
|
|
UNIV_INTERN os_event_t fil_crypt_threads_event;
|
2014-12-22 16:53:17 +02:00
|
|
|
|
2017-03-14 12:56:01 +02:00
|
|
|
/** Event for waking up threads throttle. */
|
2016-12-22 14:20:47 +02:00
|
|
|
static os_event_t fil_crypt_throttle_sleep_event;
|
2014-12-22 16:53:17 +02:00
|
|
|
|
2017-03-14 12:56:01 +02:00
|
|
|
/** Mutex for key rotation threads. */
|
|
|
|
UNIV_INTERN ib_mutex_t fil_crypt_threads_mutex;
|
2014-12-22 16:53:17 +02:00
|
|
|
|
|
|
|
/** Variable ensuring only 1 thread at time does initial conversion */
|
|
|
|
static bool fil_crypt_start_converting = false;
|
|
|
|
|
|
|
|
/** Variables for throttling */
|
|
|
|
UNIV_INTERN uint srv_n_fil_crypt_iops = 100; // 10ms per iop
|
|
|
|
static uint srv_alloc_time = 3; // allocate iops for 3s at a time
|
|
|
|
static uint n_fil_crypt_iops_allocated = 0;
|
|
|
|
|
|
|
|
/** Variables for scrubbing */
|
|
|
|
extern uint srv_background_scrub_data_interval;
|
|
|
|
extern uint srv_background_scrub_data_check_interval;
|
|
|
|
|
|
|
|
#define DEBUG_KEYROTATION_THROTTLING 0
|
|
|
|
|
|
|
|
/** Statistics variables */
|
|
|
|
static fil_crypt_stat_t crypt_stat;
|
|
|
|
static ib_mutex_t crypt_stat_mutex;
|
|
|
|
|
2017-03-14 12:56:01 +02:00
|
|
|
/** Is background scrubbing enabled, defined on btr0scrub.cc */
|
|
|
|
extern my_bool srv_background_scrub_data_uncompressed;
|
|
|
|
extern my_bool srv_background_scrub_data_compressed;
|
|
|
|
|
2017-08-03 08:29:36 +03:00
|
|
|
/***********************************************************************
|
|
|
|
Check if a key needs rotation given a key_state
|
2018-04-06 12:55:43 +03:00
|
|
|
@param[in] crypt_data Encryption information
|
2017-08-03 08:29:36 +03:00
|
|
|
@param[in] key_version Current key version
|
|
|
|
@param[in] latest_key_version Latest key version
|
|
|
|
@param[in] rotate_key_age when to rotate
|
|
|
|
@return true if key needs rotation, false if not */
|
2015-05-11 21:05:02 +02:00
|
|
|
static bool
|
|
|
|
fil_crypt_needs_rotation(
|
2018-04-06 12:55:43 +03:00
|
|
|
const fil_space_crypt_t* crypt_data,
|
|
|
|
uint key_version,
|
|
|
|
uint latest_key_version,
|
|
|
|
uint rotate_key_age)
|
2017-08-03 08:29:36 +03:00
|
|
|
MY_ATTRIBUTE((warn_unused_result));
|
2015-05-11 21:05:02 +02:00
|
|
|
|
2014-12-22 16:53:17 +02:00
|
|
|
/*********************************************************************
|
|
|
|
Init space crypt */
|
|
|
|
UNIV_INTERN
|
|
|
|
void
|
|
|
|
fil_space_crypt_init()
|
|
|
|
{
|
2016-08-12 11:17:45 +03:00
|
|
|
mutex_create(LATCH_ID_FIL_CRYPT_MUTEX, &fil_crypt_key_mutex);
|
2014-12-22 16:53:17 +02:00
|
|
|
|
2016-08-12 11:17:45 +03:00
|
|
|
fil_crypt_throttle_sleep_event = os_event_create(0);
|
2014-12-22 16:53:17 +02:00
|
|
|
|
2016-08-12 11:17:45 +03:00
|
|
|
mutex_create(LATCH_ID_FIL_CRYPT_STAT_MUTEX, &crypt_stat_mutex);
|
2014-12-22 16:53:17 +02:00
|
|
|
memset(&crypt_stat, 0, sizeof(crypt_stat));
|
|
|
|
}
|
|
|
|
|
|
|
|
/*********************************************************************
|
|
|
|
Cleanup space crypt */
|
|
|
|
UNIV_INTERN
|
|
|
|
void
|
|
|
|
fil_space_crypt_cleanup()
|
|
|
|
{
|
2016-08-12 11:17:45 +03:00
|
|
|
os_event_destroy(fil_crypt_throttle_sleep_event);
|
2016-12-22 10:23:42 +02:00
|
|
|
mutex_free(&fil_crypt_key_mutex);
|
|
|
|
mutex_free(&crypt_stat_mutex);
|
2014-12-22 16:53:17 +02:00
|
|
|
}
|
|
|
|
|
2016-12-13 11:51:33 +02:00
|
|
|
/**
|
|
|
|
Get latest key version from encryption plugin.
|
|
|
|
@return key version or ENCRYPTION_KEY_VERSION_INVALID */
|
|
|
|
uint
|
2017-03-14 12:56:01 +02:00
|
|
|
fil_space_crypt_t::key_get_latest_version(void)
|
2016-12-13 11:51:33 +02:00
|
|
|
{
|
|
|
|
uint key_version = key_found;
|
|
|
|
|
|
|
|
if (is_key_found()) {
|
|
|
|
key_version = encryption_key_get_latest_version(key_id);
|
|
|
|
srv_stats.n_key_requests.inc();
|
|
|
|
key_found = key_version;
|
|
|
|
}
|
|
|
|
|
|
|
|
return key_version;
|
|
|
|
}
|
|
|
|
|
2015-05-11 21:05:02 +02:00
|
|
|
/******************************************************************
|
2017-03-14 12:56:01 +02:00
|
|
|
Get the latest(key-version), waking the encrypt thread, if needed
|
|
|
|
@param[in,out] crypt_data Crypt data */
|
2015-05-11 21:05:02 +02:00
|
|
|
static inline
|
|
|
|
uint
|
|
|
|
fil_crypt_get_latest_key_version(
|
2017-03-14 12:56:01 +02:00
|
|
|
fil_space_crypt_t* crypt_data)
|
2015-05-11 21:05:02 +02:00
|
|
|
{
|
2016-12-13 11:51:33 +02:00
|
|
|
ut_ad(crypt_data != NULL);
|
2015-05-11 21:05:02 +02:00
|
|
|
|
2016-12-13 11:51:33 +02:00
|
|
|
uint key_version = crypt_data->key_get_latest_version();
|
|
|
|
|
|
|
|
if (crypt_data->is_key_found()) {
|
|
|
|
|
2018-04-06 12:55:43 +03:00
|
|
|
if (fil_crypt_needs_rotation(
|
|
|
|
crypt_data,
|
2016-12-13 11:51:33 +02:00
|
|
|
crypt_data->min_key_version,
|
|
|
|
key_version,
|
|
|
|
srv_fil_crypt_rotate_key_age)) {
|
2017-08-28 09:45:54 +03:00
|
|
|
/* Below event seen as NULL-pointer at startup
|
|
|
|
when new database was created and we create a
|
|
|
|
checkpoint. Only seen when debugging. */
|
|
|
|
if (fil_crypt_threads_inited) {
|
|
|
|
os_event_set(fil_crypt_threads_event);
|
|
|
|
}
|
2016-12-13 11:51:33 +02:00
|
|
|
}
|
2015-05-11 21:05:02 +02:00
|
|
|
}
|
|
|
|
|
2016-12-13 11:51:33 +02:00
|
|
|
return key_version;
|
2015-05-11 21:05:02 +02:00
|
|
|
}
|
|
|
|
|
2014-12-22 16:53:17 +02:00
|
|
|
/******************************************************************
|
2015-05-13 21:57:24 +02:00
|
|
|
Mutex helper for crypt_data->scheme */
|
2015-05-17 14:14:16 +03:00
|
|
|
void
|
|
|
|
crypt_data_scheme_locker(
|
|
|
|
/*=====================*/
|
|
|
|
st_encryption_scheme* scheme,
|
|
|
|
int exit)
|
2014-12-22 16:53:17 +02:00
|
|
|
{
|
2015-05-13 21:57:24 +02:00
|
|
|
fil_space_crypt_t* crypt_data =
|
|
|
|
static_cast<fil_space_crypt_t*>(scheme);
|
2014-12-22 16:53:17 +02:00
|
|
|
|
2015-05-13 21:57:24 +02:00
|
|
|
if (exit) {
|
|
|
|
mutex_exit(&crypt_data->mutex);
|
|
|
|
} else {
|
|
|
|
mutex_enter(&crypt_data->mutex);
|
2014-12-22 16:53:17 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/******************************************************************
|
2015-04-01 11:50:21 +03:00
|
|
|
Create a fil_space_crypt_t object
|
2017-03-14 12:56:01 +02:00
|
|
|
@param[in] type CRYPT_SCHEME_UNENCRYPTE or
|
|
|
|
CRYPT_SCHEME_1
|
|
|
|
@param[in] encrypt_mode FIL_ENCRYPTION_DEFAULT or
|
|
|
|
FIL_ENCRYPTION_ON or
|
|
|
|
FIL_ENCRYPTION_OFF
|
|
|
|
@param[in] min_key_version key_version or 0
|
|
|
|
@param[in] key_id Used key id
|
2015-04-01 11:50:21 +03:00
|
|
|
@return crypt object */
|
2016-12-13 11:51:33 +02:00
|
|
|
static
|
2014-12-22 16:53:17 +02:00
|
|
|
fil_space_crypt_t*
|
2015-05-13 11:41:22 +03:00
|
|
|
fil_space_create_crypt_data(
|
2016-12-13 11:51:33 +02:00
|
|
|
uint type,
|
|
|
|
fil_encryption_t encrypt_mode,
|
|
|
|
uint min_key_version,
|
2017-03-14 12:56:01 +02:00
|
|
|
uint key_id)
|
2014-12-22 16:53:17 +02:00
|
|
|
{
|
2016-12-13 11:51:33 +02:00
|
|
|
fil_space_crypt_t* crypt_data = NULL;
|
2017-03-30 12:48:42 +02:00
|
|
|
if (void* buf = ut_zalloc_nokey(sizeof(fil_space_crypt_t))) {
|
2016-12-13 11:51:33 +02:00
|
|
|
crypt_data = new(buf)
|
2017-03-14 12:56:01 +02:00
|
|
|
fil_space_crypt_t(
|
2016-12-13 11:51:33 +02:00
|
|
|
type,
|
|
|
|
min_key_version,
|
|
|
|
key_id,
|
|
|
|
encrypt_mode);
|
2014-12-22 16:53:17 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
return crypt_data;
|
|
|
|
}
|
|
|
|
|
2016-12-13 11:51:33 +02:00
|
|
|
/******************************************************************
|
|
|
|
Create a fil_space_crypt_t object
|
2017-03-14 12:56:01 +02:00
|
|
|
@param[in] encrypt_mode FIL_ENCRYPTION_DEFAULT or
|
|
|
|
FIL_ENCRYPTION_ON or
|
|
|
|
FIL_ENCRYPTION_OFF
|
|
|
|
|
|
|
|
@param[in] key_id Encryption key id
|
2016-12-13 11:51:33 +02:00
|
|
|
@return crypt object */
|
|
|
|
UNIV_INTERN
|
|
|
|
fil_space_crypt_t*
|
|
|
|
fil_space_create_crypt_data(
|
2017-03-14 12:56:01 +02:00
|
|
|
fil_encryption_t encrypt_mode,
|
|
|
|
uint key_id)
|
2016-12-13 11:51:33 +02:00
|
|
|
{
|
2017-03-14 12:56:01 +02:00
|
|
|
return (fil_space_create_crypt_data(0, encrypt_mode, 0, key_id));
|
2016-12-13 11:51:33 +02:00
|
|
|
}
|
|
|
|
|
2014-12-22 16:53:17 +02:00
|
|
|
/******************************************************************
|
2017-03-14 12:56:01 +02:00
|
|
|
Merge fil_space_crypt_t object
|
|
|
|
@param[in,out] dst Destination cryp data
|
|
|
|
@param[in] src Source crypt data */
|
2014-12-22 16:53:17 +02:00
|
|
|
UNIV_INTERN
|
2015-05-15 09:54:41 +02:00
|
|
|
void
|
|
|
|
fil_space_merge_crypt_data(
|
2017-03-14 12:56:01 +02:00
|
|
|
fil_space_crypt_t* dst,
|
|
|
|
const fil_space_crypt_t* src)
|
2014-12-22 16:53:17 +02:00
|
|
|
{
|
2015-05-15 09:54:41 +02:00
|
|
|
mutex_enter(&dst->mutex);
|
|
|
|
|
|
|
|
/* validate that they are mergeable */
|
|
|
|
ut_a(src->type == CRYPT_SCHEME_UNENCRYPTED ||
|
|
|
|
src->type == CRYPT_SCHEME_1);
|
2015-04-01 11:50:21 +03:00
|
|
|
|
2015-05-15 09:54:41 +02:00
|
|
|
ut_a(dst->type == CRYPT_SCHEME_UNENCRYPTED ||
|
|
|
|
dst->type == CRYPT_SCHEME_1);
|
2014-12-22 16:53:17 +02:00
|
|
|
|
2015-05-17 14:14:16 +03:00
|
|
|
dst->encryption = src->encryption;
|
2015-05-15 09:54:41 +02:00
|
|
|
dst->type = src->type;
|
|
|
|
dst->min_key_version = src->min_key_version;
|
|
|
|
dst->keyserver_requests += src->keyserver_requests;
|
2014-12-22 16:53:17 +02:00
|
|
|
|
2015-05-15 09:54:41 +02:00
|
|
|
mutex_exit(&dst->mutex);
|
2014-12-22 16:53:17 +02:00
|
|
|
}
|
|
|
|
|
2017-03-30 12:48:42 +02:00
|
|
|
/** Initialize encryption parameters from a tablespace header page.
|
|
|
|
@param[in] page_size page size of the tablespace
|
|
|
|
@param[in] page first page of the tablespace
|
|
|
|
@return crypt data from page 0
|
|
|
|
@retval NULL if not present or not valid */
|
2014-12-22 16:53:17 +02:00
|
|
|
UNIV_INTERN
|
|
|
|
fil_space_crypt_t*
|
2017-03-30 12:48:42 +02:00
|
|
|
fil_space_read_crypt_data(const page_size_t& page_size, const byte* page)
|
2014-12-22 16:53:17 +02:00
|
|
|
{
|
2017-03-30 12:48:42 +02:00
|
|
|
const ulint offset = FSP_HEADER_OFFSET
|
|
|
|
+ fsp_header_get_encryption_offset(page_size);
|
|
|
|
|
2014-12-22 16:53:17 +02:00
|
|
|
if (memcmp(page + offset, CRYPT_MAGIC, MAGIC_SZ) != 0) {
|
2015-09-04 15:54:20 +03:00
|
|
|
/* Crypt data is not stored. */
|
2015-05-20 13:35:51 +03:00
|
|
|
return NULL;
|
2014-12-22 16:53:17 +02:00
|
|
|
}
|
|
|
|
|
2017-05-10 09:07:50 +03:00
|
|
|
uint8_t type = mach_read_from_1(page + offset + MAGIC_SZ + 0);
|
|
|
|
uint8_t iv_length = mach_read_from_1(page + offset + MAGIC_SZ + 1);
|
2017-03-30 12:48:42 +02:00
|
|
|
fil_space_crypt_t* crypt_data;
|
2014-12-22 16:53:17 +02:00
|
|
|
|
2017-03-30 12:48:42 +02:00
|
|
|
if (!(type == CRYPT_SCHEME_UNENCRYPTED ||
|
|
|
|
type == CRYPT_SCHEME_1)
|
|
|
|
|| iv_length != sizeof crypt_data->iv) {
|
2016-08-12 11:17:45 +03:00
|
|
|
ib::error() << "Found non sensible crypt scheme: "
|
2017-03-30 12:48:42 +02:00
|
|
|
<< type << "," << iv_length << " for space: "
|
|
|
|
<< page_get_space_id(page) << " offset: "
|
2016-08-12 11:17:45 +03:00
|
|
|
<< offset << " bytes: ["
|
|
|
|
<< page[offset + 2 + MAGIC_SZ]
|
|
|
|
<< page[offset + 3 + MAGIC_SZ]
|
|
|
|
<< page[offset + 4 + MAGIC_SZ]
|
|
|
|
<< page[offset + 5 + MAGIC_SZ]
|
|
|
|
<< "].";
|
2017-03-30 12:48:42 +02:00
|
|
|
return NULL;
|
2014-12-22 16:53:17 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
uint min_key_version = mach_read_from_4
|
|
|
|
(page + offset + MAGIC_SZ + 2 + iv_length);
|
|
|
|
|
2015-04-09 00:37:54 +02:00
|
|
|
uint key_id = mach_read_from_4
|
|
|
|
(page + offset + MAGIC_SZ + 2 + iv_length + 4);
|
|
|
|
|
2015-04-01 19:37:00 +03:00
|
|
|
fil_encryption_t encryption = (fil_encryption_t)mach_read_from_1(
|
2015-04-09 00:37:54 +02:00
|
|
|
page + offset + MAGIC_SZ + 2 + iv_length + 8);
|
2015-04-01 19:37:00 +03:00
|
|
|
|
2016-12-13 11:51:33 +02:00
|
|
|
crypt_data = fil_space_create_crypt_data(encryption, key_id);
|
|
|
|
/* We need to overwrite these as above function will initialize
|
|
|
|
members */
|
2014-12-22 16:53:17 +02:00
|
|
|
crypt_data->type = type;
|
|
|
|
crypt_data->min_key_version = min_key_version;
|
|
|
|
crypt_data->page0_offset = offset;
|
|
|
|
memcpy(crypt_data->iv, page + offset + MAGIC_SZ + 2, iv_length);
|
|
|
|
|
|
|
|
return crypt_data;
|
|
|
|
}
|
|
|
|
|
|
|
|
/******************************************************************
|
2017-03-14 12:56:01 +02:00
|
|
|
Free a crypt data object
|
|
|
|
@param[in,out] crypt_data crypt data to be freed */
|
2014-12-22 16:53:17 +02:00
|
|
|
UNIV_INTERN
|
|
|
|
void
|
2015-04-01 11:50:21 +03:00
|
|
|
fil_space_destroy_crypt_data(
|
2017-03-14 12:56:01 +02:00
|
|
|
fil_space_crypt_t **crypt_data)
|
2014-12-22 16:53:17 +02:00
|
|
|
{
|
|
|
|
if (crypt_data != NULL && (*crypt_data) != NULL) {
|
MDEV-13485 MTR tests fail massively with --innodb-sync-debug
The parameter --innodb-sync-debug, which is disabled by default,
aims to find potential deadlocks in InnoDB.
When the parameter is enabled, lots of tests failed. Most of these
failures were due to bogus diagnostics. But, as part of this fix,
we are also fixing a bug in error handling code and removing dead
code, and fixing cases where an uninitialized mutex was being
locked and unlocked.
dict_create_foreign_constraints_low(): Remove an extraneous
mutex_exit() call that could cause corruption in an error handling
path. Also, do not unnecessarily acquire dict_foreign_err_mutex.
Its only purpose is to control concurrent access to
dict_foreign_err_file.
row_ins_foreign_trx_print(): Replace a redundant condition with a
debug assertion.
srv_dict_tmpfile, srv_dict_tmpfile_mutex: Remove. The
temporary file is never being written to or read from.
log_free_check(): Allow SYNC_FTS_CACHE (fts_cache_t::lock)
to be held.
ha_innobase::inplace_alter_table(), row_merge_insert_index_tuples():
Assert that no unexpected latches are being held.
sync_latch_meta_init(): Properly initialize dict_operation_lock_key
at SYNC_DICT_OPERATION. dict_sys->mutex is SYNC_DICT, and
the now-removed SRV_DICT_TMPFILE was wrongly registered at
SYNC_DICT_OPERATION.
buf_block_init(): Correctly register buf_block_t::debug_latch.
It was previously misleadingly reported as LATCH_ID_DICT_FOREIGN_ERR.
latch_level_t: Correct the relative latching order of
SYNC_IBUF_PESS_INSERT_MUTEX,SYNC_INDEX_TREE and
SYNC_FILE_FORMAT_TAG,SYNC_DICT_OPERATION to avoid bogus failures.
row_drop_table_for_mysql(): Avoid accessing btr_defragment_mutex
if the defragmentation thread has not been started. This is the
case during fts_drop_orphaned_tables() in recv_recovery_rollback_active().
fil_space_destroy_crypt_data(): Avoid acquiring fil_crypt_threads_mutex
when it is uninitialized. We may have created crypt_data before the
mutex was created, and the mutex creation would be skipped if
InnoDB startup failed or --innodb-read-only was specified.
2017-08-21 18:56:46 +03:00
|
|
|
fil_space_crypt_t* c;
|
|
|
|
if (UNIV_LIKELY(fil_crypt_threads_inited)) {
|
|
|
|
mutex_enter(&fil_crypt_threads_mutex);
|
|
|
|
c = *crypt_data;
|
|
|
|
*crypt_data = NULL;
|
|
|
|
mutex_exit(&fil_crypt_threads_mutex);
|
|
|
|
} else {
|
|
|
|
ut_ad(srv_read_only_mode || !srv_was_started);
|
|
|
|
c = *crypt_data;
|
|
|
|
*crypt_data = NULL;
|
|
|
|
}
|
2017-03-30 12:48:42 +02:00
|
|
|
if (c) {
|
|
|
|
c->~fil_space_crypt_t();
|
|
|
|
ut_free(c);
|
|
|
|
}
|
2014-12-22 16:53:17 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/******************************************************************
|
2017-03-14 12:56:01 +02:00
|
|
|
Write crypt data to a page (0)
|
2017-03-30 12:48:42 +02:00
|
|
|
@param[in] space tablespace
|
|
|
|
@param[in,out] page0 first page of the tablespace
|
|
|
|
@param[in,out] mtr mini-transaction */
|
2017-03-14 12:56:01 +02:00
|
|
|
UNIV_INTERN
|
2014-12-22 16:53:17 +02:00
|
|
|
void
|
2017-03-14 12:56:01 +02:00
|
|
|
fil_space_crypt_t::write_page0(
|
2017-03-30 12:48:42 +02:00
|
|
|
const fil_space_t* space,
|
2017-03-14 12:56:01 +02:00
|
|
|
byte* page,
|
|
|
|
mtr_t* mtr)
|
2014-12-22 16:53:17 +02:00
|
|
|
{
|
2017-03-30 12:48:42 +02:00
|
|
|
ut_ad(this == space->crypt_data);
|
2017-03-14 12:56:01 +02:00
|
|
|
const uint len = sizeof(iv);
|
2017-03-30 12:48:42 +02:00
|
|
|
const ulint offset = FSP_HEADER_OFFSET
|
|
|
|
+ fsp_header_get_encryption_offset(page_size_t(space->flags));
|
2017-03-14 12:56:01 +02:00
|
|
|
page0_offset = offset;
|
2014-12-22 16:53:17 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
redo log this as bytewise updates to page 0
|
|
|
|
followed by an MLOG_FILE_WRITE_CRYPT_DATA
|
|
|
|
(that will during recovery update fil_space_t)
|
|
|
|
*/
|
|
|
|
mlog_write_string(page + offset, CRYPT_MAGIC, MAGIC_SZ, mtr);
|
|
|
|
mlog_write_ulint(page + offset + MAGIC_SZ + 0, type, MLOG_1BYTE, mtr);
|
|
|
|
mlog_write_ulint(page + offset + MAGIC_SZ + 1, len, MLOG_1BYTE, mtr);
|
2017-03-14 12:56:01 +02:00
|
|
|
mlog_write_string(page + offset + MAGIC_SZ + 2, iv, len,
|
2014-12-22 16:53:17 +02:00
|
|
|
mtr);
|
|
|
|
mlog_write_ulint(page + offset + MAGIC_SZ + 2 + len, min_key_version,
|
|
|
|
MLOG_4BYTES, mtr);
|
2015-05-09 11:19:36 +02:00
|
|
|
mlog_write_ulint(page + offset + MAGIC_SZ + 2 + len + 4, key_id,
|
2015-04-09 00:37:54 +02:00
|
|
|
MLOG_4BYTES, mtr);
|
|
|
|
mlog_write_ulint(page + offset + MAGIC_SZ + 2 + len + 8, encryption,
|
2015-04-01 19:37:00 +03:00
|
|
|
MLOG_1BYTE, mtr);
|
2015-03-19 07:07:56 +02:00
|
|
|
|
2015-04-09 00:37:54 +02:00
|
|
|
byte* log_ptr = mlog_open(mtr, 11 + 17 + len);
|
2015-04-01 11:50:21 +03:00
|
|
|
|
2014-12-22 16:53:17 +02:00
|
|
|
if (log_ptr != NULL) {
|
|
|
|
log_ptr = mlog_write_initial_log_record_fast(
|
|
|
|
page,
|
|
|
|
MLOG_FILE_WRITE_CRYPT_DATA,
|
|
|
|
log_ptr, mtr);
|
2017-03-30 12:48:42 +02:00
|
|
|
mach_write_to_4(log_ptr, space->id);
|
2014-12-22 16:53:17 +02:00
|
|
|
log_ptr += 4;
|
|
|
|
mach_write_to_2(log_ptr, offset);
|
|
|
|
log_ptr += 2;
|
|
|
|
mach_write_to_1(log_ptr, type);
|
|
|
|
log_ptr += 1;
|
|
|
|
mach_write_to_1(log_ptr, len);
|
|
|
|
log_ptr += 1;
|
|
|
|
mach_write_to_4(log_ptr, min_key_version);
|
|
|
|
log_ptr += 4;
|
2015-04-09 00:37:54 +02:00
|
|
|
mach_write_to_4(log_ptr, key_id);
|
|
|
|
log_ptr += 4;
|
2015-04-01 19:37:00 +03:00
|
|
|
mach_write_to_1(log_ptr, encryption);
|
|
|
|
log_ptr += 1;
|
2014-12-22 16:53:17 +02:00
|
|
|
mlog_close(mtr, log_ptr);
|
|
|
|
|
2017-03-14 12:56:01 +02:00
|
|
|
mlog_catenate_string(mtr, iv, len);
|
2014-12-22 16:53:17 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/******************************************************************
|
2017-03-14 12:56:01 +02:00
|
|
|
Set crypt data for a tablespace
|
|
|
|
@param[in,out] space Tablespace
|
|
|
|
@param[in,out] crypt_data Crypt data to be set
|
|
|
|
@return crypt_data in tablespace */
|
|
|
|
static
|
|
|
|
fil_space_crypt_t*
|
|
|
|
fil_space_set_crypt_data(
|
|
|
|
fil_space_t* space,
|
|
|
|
fil_space_crypt_t* crypt_data)
|
2014-12-22 16:53:17 +02:00
|
|
|
{
|
2017-03-14 12:56:01 +02:00
|
|
|
fil_space_crypt_t* free_crypt_data = NULL;
|
|
|
|
fil_space_crypt_t* ret_crypt_data = NULL;
|
|
|
|
|
|
|
|
/* Provided space is protected using fil_space_acquire()
|
|
|
|
from concurrent operations. */
|
|
|
|
if (space->crypt_data != NULL) {
|
|
|
|
/* There is already crypt data present,
|
|
|
|
merge new crypt_data */
|
|
|
|
fil_space_merge_crypt_data(space->crypt_data,
|
|
|
|
crypt_data);
|
|
|
|
ret_crypt_data = space->crypt_data;
|
|
|
|
free_crypt_data = crypt_data;
|
|
|
|
} else {
|
|
|
|
space->crypt_data = crypt_data;
|
|
|
|
ret_crypt_data = space->crypt_data;
|
|
|
|
}
|
2015-03-19 14:09:49 +02:00
|
|
|
|
2017-03-14 12:56:01 +02:00
|
|
|
if (free_crypt_data != NULL) {
|
|
|
|
/* there was already crypt data present and the new crypt
|
|
|
|
* data provided as argument to this function has been merged
|
|
|
|
* into that => free new crypt data
|
|
|
|
*/
|
|
|
|
fil_space_destroy_crypt_data(&free_crypt_data);
|
2015-05-13 11:41:22 +03:00
|
|
|
}
|
|
|
|
|
2017-03-14 12:56:01 +02:00
|
|
|
return ret_crypt_data;
|
2014-12-22 16:53:17 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/******************************************************************
|
2015-04-01 11:50:21 +03:00
|
|
|
Parse a MLOG_FILE_WRITE_CRYPT_DATA log entry
|
2017-03-14 12:56:01 +02:00
|
|
|
@param[in] ptr Log entry start
|
|
|
|
@param[in] end_ptr Log entry end
|
|
|
|
@param[in] block buffer block
|
2015-04-01 11:50:21 +03:00
|
|
|
@return position on log buffer */
|
2014-12-22 16:53:17 +02:00
|
|
|
UNIV_INTERN
|
MDEV-12253: Buffer pool blocks are accessed after they have been freed
Problem was that bpage was referenced after it was already freed
from LRU. Fixed by adding a new variable encrypted that is
passed down to buf_page_check_corrupt() and used in
buf_page_get_gen() to stop processing page read.
This patch should also address following test failures and
bugs:
MDEV-12419: IMPORT should not look up tablespace in
PageConverter::validate(). This is now removed.
MDEV-10099: encryption.innodb_onlinealter_encryption fails
sporadically in buildbot
MDEV-11420: encryption.innodb_encryption-page-compression
failed in buildbot
MDEV-11222: encryption.encrypt_and_grep failed in buildbot on P8
Removed dict_table_t::is_encrypted and dict_table_t::ibd_file_missing
and replaced these with dict_table_t::file_unreadable. Table
ibd file is missing if fil_get_space(space_id) returns NULL
and encrypted if not. Removed dict_table_t::is_corrupted field.
Ported FilSpace class from 10.2 and using that on buf_page_check_corrupt(),
buf_page_decrypt_after_read(), buf_page_encrypt_before_write(),
buf_dblwr_process(), buf_read_page(), dict_stats_save_defrag_stats().
Added test cases when enrypted page could be read while doing
redo log crash recovery. Also added test case for row compressed
blobs.
btr_cur_open_at_index_side_func(),
btr_cur_open_at_rnd_pos_func(): Avoid referencing block that is
NULL.
buf_page_get_zip(): Issue error if page read fails.
buf_page_get_gen(): Use dberr_t for error detection and
do not reference bpage after we hare freed it.
buf_mark_space_corrupt(): remove bpage from LRU also when
it is encrypted.
buf_page_check_corrupt(): @return DB_SUCCESS if page has
been read and is not corrupted,
DB_PAGE_CORRUPTED if page based on checksum check is corrupted,
DB_DECRYPTION_FAILED if page post encryption checksum matches but
after decryption normal page checksum does not match. In read
case only DB_SUCCESS is possible.
buf_page_io_complete(): use dberr_t for error handling.
buf_flush_write_block_low(),
buf_read_ahead_random(),
buf_read_page_async(),
buf_read_ahead_linear(),
buf_read_ibuf_merge_pages(),
buf_read_recv_pages(),
fil_aio_wait():
Issue error if page read fails.
btr_pcur_move_to_next_page(): Do not reference page if it is
NULL.
Introduced dict_table_t::is_readable() and dict_index_t::is_readable()
that will return true if tablespace exists and pages read from
tablespace are not corrupted or page decryption failed.
Removed buf_page_t::key_version. After page decryption the
key version is not removed from page frame. For unencrypted
pages, old key_version is removed at buf_page_encrypt_before_write()
dict_stats_update_transient_for_index(),
dict_stats_update_transient()
Do not continue if table decryption failed or table
is corrupted.
dict0stats.cc: Introduced a dict_stats_report_error function
to avoid code duplication.
fil_parse_write_crypt_data():
Check that key read from redo log entry is found from
encryption plugin and if it is not, refuse to start.
PageConverter::validate(): Removed access to fil_space_t as
tablespace is not available during import.
Fixed error code on innodb.innodb test.
Merged test cased innodb-bad-key-change5 and innodb-bad-key-shutdown
to innodb-bad-key-change2. Removed innodb-bad-key-change5 test.
Decreased unnecessary complexity on some long lasting tests.
Removed fil_inc_pending_ops(), fil_decr_pending_ops(),
fil_get_first_space(), fil_get_next_space(),
fil_get_first_space_safe(), fil_get_next_space_safe()
functions.
fil_space_verify_crypt_checksum(): Fixed bug found using ASAN
where FIL_PAGE_END_LSN_OLD_CHECKSUM field was incorrectly
accessed from row compressed tables. Fixed out of page frame
bug for row compressed tables in
fil_space_verify_crypt_checksum() found using ASAN. Incorrect
function was called for compressed table.
Added new tests for discard, rename table and drop (we should allow them
even when page decryption fails). Alter table rename is not allowed.
Added test for restart with innodb-force-recovery=1 when page read on
redo-recovery cant be decrypted. Added test for corrupted table where
both page data and FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION is corrupted.
Adjusted the test case innodb_bug14147491 so that it does not anymore
expect crash. Instead table is just mostly not usable.
fil0fil.h: fil_space_acquire_low is not visible function
and fil_space_acquire and fil_space_acquire_silent are
inline functions. FilSpace class uses fil_space_acquire_low
directly.
recv_apply_hashed_log_recs() does not return anything.
2017-04-26 15:19:16 +03:00
|
|
|
byte*
|
2015-04-01 11:50:21 +03:00
|
|
|
fil_parse_write_crypt_data(
|
MDEV-12253: Buffer pool blocks are accessed after they have been freed
Problem was that bpage was referenced after it was already freed
from LRU. Fixed by adding a new variable encrypted that is
passed down to buf_page_check_corrupt() and used in
buf_page_get_gen() to stop processing page read.
This patch should also address following test failures and
bugs:
MDEV-12419: IMPORT should not look up tablespace in
PageConverter::validate(). This is now removed.
MDEV-10099: encryption.innodb_onlinealter_encryption fails
sporadically in buildbot
MDEV-11420: encryption.innodb_encryption-page-compression
failed in buildbot
MDEV-11222: encryption.encrypt_and_grep failed in buildbot on P8
Removed dict_table_t::is_encrypted and dict_table_t::ibd_file_missing
and replaced these with dict_table_t::file_unreadable. Table
ibd file is missing if fil_get_space(space_id) returns NULL
and encrypted if not. Removed dict_table_t::is_corrupted field.
Ported FilSpace class from 10.2 and using that on buf_page_check_corrupt(),
buf_page_decrypt_after_read(), buf_page_encrypt_before_write(),
buf_dblwr_process(), buf_read_page(), dict_stats_save_defrag_stats().
Added test cases when enrypted page could be read while doing
redo log crash recovery. Also added test case for row compressed
blobs.
btr_cur_open_at_index_side_func(),
btr_cur_open_at_rnd_pos_func(): Avoid referencing block that is
NULL.
buf_page_get_zip(): Issue error if page read fails.
buf_page_get_gen(): Use dberr_t for error detection and
do not reference bpage after we hare freed it.
buf_mark_space_corrupt(): remove bpage from LRU also when
it is encrypted.
buf_page_check_corrupt(): @return DB_SUCCESS if page has
been read and is not corrupted,
DB_PAGE_CORRUPTED if page based on checksum check is corrupted,
DB_DECRYPTION_FAILED if page post encryption checksum matches but
after decryption normal page checksum does not match. In read
case only DB_SUCCESS is possible.
buf_page_io_complete(): use dberr_t for error handling.
buf_flush_write_block_low(),
buf_read_ahead_random(),
buf_read_page_async(),
buf_read_ahead_linear(),
buf_read_ibuf_merge_pages(),
buf_read_recv_pages(),
fil_aio_wait():
Issue error if page read fails.
btr_pcur_move_to_next_page(): Do not reference page if it is
NULL.
Introduced dict_table_t::is_readable() and dict_index_t::is_readable()
that will return true if tablespace exists and pages read from
tablespace are not corrupted or page decryption failed.
Removed buf_page_t::key_version. After page decryption the
key version is not removed from page frame. For unencrypted
pages, old key_version is removed at buf_page_encrypt_before_write()
dict_stats_update_transient_for_index(),
dict_stats_update_transient()
Do not continue if table decryption failed or table
is corrupted.
dict0stats.cc: Introduced a dict_stats_report_error function
to avoid code duplication.
fil_parse_write_crypt_data():
Check that key read from redo log entry is found from
encryption plugin and if it is not, refuse to start.
PageConverter::validate(): Removed access to fil_space_t as
tablespace is not available during import.
Fixed error code on innodb.innodb test.
Merged test cased innodb-bad-key-change5 and innodb-bad-key-shutdown
to innodb-bad-key-change2. Removed innodb-bad-key-change5 test.
Decreased unnecessary complexity on some long lasting tests.
Removed fil_inc_pending_ops(), fil_decr_pending_ops(),
fil_get_first_space(), fil_get_next_space(),
fil_get_first_space_safe(), fil_get_next_space_safe()
functions.
fil_space_verify_crypt_checksum(): Fixed bug found using ASAN
where FIL_PAGE_END_LSN_OLD_CHECKSUM field was incorrectly
accessed from row compressed tables. Fixed out of page frame
bug for row compressed tables in
fil_space_verify_crypt_checksum() found using ASAN. Incorrect
function was called for compressed table.
Added new tests for discard, rename table and drop (we should allow them
even when page decryption fails). Alter table rename is not allowed.
Added test for restart with innodb-force-recovery=1 when page read on
redo-recovery cant be decrypted. Added test for corrupted table where
both page data and FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION is corrupted.
Adjusted the test case innodb_bug14147491 so that it does not anymore
expect crash. Instead table is just mostly not usable.
fil0fil.h: fil_space_acquire_low is not visible function
and fil_space_acquire and fil_space_acquire_silent are
inline functions. FilSpace class uses fil_space_acquire_low
directly.
recv_apply_hashed_log_recs() does not return anything.
2017-04-26 15:19:16 +03:00
|
|
|
byte* ptr,
|
2017-03-14 12:56:01 +02:00
|
|
|
const byte* end_ptr,
|
MDEV-12253: Buffer pool blocks are accessed after they have been freed
Problem was that bpage was referenced after it was already freed
from LRU. Fixed by adding a new variable encrypted that is
passed down to buf_page_check_corrupt() and used in
buf_page_get_gen() to stop processing page read.
This patch should also address following test failures and
bugs:
MDEV-12419: IMPORT should not look up tablespace in
PageConverter::validate(). This is now removed.
MDEV-10099: encryption.innodb_onlinealter_encryption fails
sporadically in buildbot
MDEV-11420: encryption.innodb_encryption-page-compression
failed in buildbot
MDEV-11222: encryption.encrypt_and_grep failed in buildbot on P8
Removed dict_table_t::is_encrypted and dict_table_t::ibd_file_missing
and replaced these with dict_table_t::file_unreadable. Table
ibd file is missing if fil_get_space(space_id) returns NULL
and encrypted if not. Removed dict_table_t::is_corrupted field.
Ported FilSpace class from 10.2 and using that on buf_page_check_corrupt(),
buf_page_decrypt_after_read(), buf_page_encrypt_before_write(),
buf_dblwr_process(), buf_read_page(), dict_stats_save_defrag_stats().
Added test cases when enrypted page could be read while doing
redo log crash recovery. Also added test case for row compressed
blobs.
btr_cur_open_at_index_side_func(),
btr_cur_open_at_rnd_pos_func(): Avoid referencing block that is
NULL.
buf_page_get_zip(): Issue error if page read fails.
buf_page_get_gen(): Use dberr_t for error detection and
do not reference bpage after we hare freed it.
buf_mark_space_corrupt(): remove bpage from LRU also when
it is encrypted.
buf_page_check_corrupt(): @return DB_SUCCESS if page has
been read and is not corrupted,
DB_PAGE_CORRUPTED if page based on checksum check is corrupted,
DB_DECRYPTION_FAILED if page post encryption checksum matches but
after decryption normal page checksum does not match. In read
case only DB_SUCCESS is possible.
buf_page_io_complete(): use dberr_t for error handling.
buf_flush_write_block_low(),
buf_read_ahead_random(),
buf_read_page_async(),
buf_read_ahead_linear(),
buf_read_ibuf_merge_pages(),
buf_read_recv_pages(),
fil_aio_wait():
Issue error if page read fails.
btr_pcur_move_to_next_page(): Do not reference page if it is
NULL.
Introduced dict_table_t::is_readable() and dict_index_t::is_readable()
that will return true if tablespace exists and pages read from
tablespace are not corrupted or page decryption failed.
Removed buf_page_t::key_version. After page decryption the
key version is not removed from page frame. For unencrypted
pages, old key_version is removed at buf_page_encrypt_before_write()
dict_stats_update_transient_for_index(),
dict_stats_update_transient()
Do not continue if table decryption failed or table
is corrupted.
dict0stats.cc: Introduced a dict_stats_report_error function
to avoid code duplication.
fil_parse_write_crypt_data():
Check that key read from redo log entry is found from
encryption plugin and if it is not, refuse to start.
PageConverter::validate(): Removed access to fil_space_t as
tablespace is not available during import.
Fixed error code on innodb.innodb test.
Merged test cased innodb-bad-key-change5 and innodb-bad-key-shutdown
to innodb-bad-key-change2. Removed innodb-bad-key-change5 test.
Decreased unnecessary complexity on some long lasting tests.
Removed fil_inc_pending_ops(), fil_decr_pending_ops(),
fil_get_first_space(), fil_get_next_space(),
fil_get_first_space_safe(), fil_get_next_space_safe()
functions.
fil_space_verify_crypt_checksum(): Fixed bug found using ASAN
where FIL_PAGE_END_LSN_OLD_CHECKSUM field was incorrectly
accessed from row compressed tables. Fixed out of page frame
bug for row compressed tables in
fil_space_verify_crypt_checksum() found using ASAN. Incorrect
function was called for compressed table.
Added new tests for discard, rename table and drop (we should allow them
even when page decryption fails). Alter table rename is not allowed.
Added test for restart with innodb-force-recovery=1 when page read on
redo-recovery cant be decrypted. Added test for corrupted table where
both page data and FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION is corrupted.
Adjusted the test case innodb_bug14147491 so that it does not anymore
expect crash. Instead table is just mostly not usable.
fil0fil.h: fil_space_acquire_low is not visible function
and fil_space_acquire and fil_space_acquire_silent are
inline functions. FilSpace class uses fil_space_acquire_low
directly.
recv_apply_hashed_log_recs() does not return anything.
2017-04-26 15:19:16 +03:00
|
|
|
const buf_block_t* block,
|
|
|
|
dberr_t* err)
|
2014-12-22 16:53:17 +02:00
|
|
|
{
|
|
|
|
/* check that redo log entry is complete */
|
|
|
|
uint entry_size =
|
|
|
|
4 + // size of space_id
|
|
|
|
2 + // size of offset
|
|
|
|
1 + // size of type
|
|
|
|
1 + // size of iv-len
|
2015-04-01 19:37:00 +03:00
|
|
|
4 + // size of min_key_version
|
2015-04-09 00:37:54 +02:00
|
|
|
4 + // size of key_id
|
2015-04-01 19:37:00 +03:00
|
|
|
1; // fil_encryption_t
|
2015-03-20 12:38:53 +02:00
|
|
|
|
MDEV-12253: Buffer pool blocks are accessed after they have been freed
Problem was that bpage was referenced after it was already freed
from LRU. Fixed by adding a new variable encrypted that is
passed down to buf_page_check_corrupt() and used in
buf_page_get_gen() to stop processing page read.
This patch should also address following test failures and
bugs:
MDEV-12419: IMPORT should not look up tablespace in
PageConverter::validate(). This is now removed.
MDEV-10099: encryption.innodb_onlinealter_encryption fails
sporadically in buildbot
MDEV-11420: encryption.innodb_encryption-page-compression
failed in buildbot
MDEV-11222: encryption.encrypt_and_grep failed in buildbot on P8
Removed dict_table_t::is_encrypted and dict_table_t::ibd_file_missing
and replaced these with dict_table_t::file_unreadable. Table
ibd file is missing if fil_get_space(space_id) returns NULL
and encrypted if not. Removed dict_table_t::is_corrupted field.
Ported FilSpace class from 10.2 and using that on buf_page_check_corrupt(),
buf_page_decrypt_after_read(), buf_page_encrypt_before_write(),
buf_dblwr_process(), buf_read_page(), dict_stats_save_defrag_stats().
Added test cases when enrypted page could be read while doing
redo log crash recovery. Also added test case for row compressed
blobs.
btr_cur_open_at_index_side_func(),
btr_cur_open_at_rnd_pos_func(): Avoid referencing block that is
NULL.
buf_page_get_zip(): Issue error if page read fails.
buf_page_get_gen(): Use dberr_t for error detection and
do not reference bpage after we hare freed it.
buf_mark_space_corrupt(): remove bpage from LRU also when
it is encrypted.
buf_page_check_corrupt(): @return DB_SUCCESS if page has
been read and is not corrupted,
DB_PAGE_CORRUPTED if page based on checksum check is corrupted,
DB_DECRYPTION_FAILED if page post encryption checksum matches but
after decryption normal page checksum does not match. In read
case only DB_SUCCESS is possible.
buf_page_io_complete(): use dberr_t for error handling.
buf_flush_write_block_low(),
buf_read_ahead_random(),
buf_read_page_async(),
buf_read_ahead_linear(),
buf_read_ibuf_merge_pages(),
buf_read_recv_pages(),
fil_aio_wait():
Issue error if page read fails.
btr_pcur_move_to_next_page(): Do not reference page if it is
NULL.
Introduced dict_table_t::is_readable() and dict_index_t::is_readable()
that will return true if tablespace exists and pages read from
tablespace are not corrupted or page decryption failed.
Removed buf_page_t::key_version. After page decryption the
key version is not removed from page frame. For unencrypted
pages, old key_version is removed at buf_page_encrypt_before_write()
dict_stats_update_transient_for_index(),
dict_stats_update_transient()
Do not continue if table decryption failed or table
is corrupted.
dict0stats.cc: Introduced a dict_stats_report_error function
to avoid code duplication.
fil_parse_write_crypt_data():
Check that key read from redo log entry is found from
encryption plugin and if it is not, refuse to start.
PageConverter::validate(): Removed access to fil_space_t as
tablespace is not available during import.
Fixed error code on innodb.innodb test.
Merged test cased innodb-bad-key-change5 and innodb-bad-key-shutdown
to innodb-bad-key-change2. Removed innodb-bad-key-change5 test.
Decreased unnecessary complexity on some long lasting tests.
Removed fil_inc_pending_ops(), fil_decr_pending_ops(),
fil_get_first_space(), fil_get_next_space(),
fil_get_first_space_safe(), fil_get_next_space_safe()
functions.
fil_space_verify_crypt_checksum(): Fixed bug found using ASAN
where FIL_PAGE_END_LSN_OLD_CHECKSUM field was incorrectly
accessed from row compressed tables. Fixed out of page frame
bug for row compressed tables in
fil_space_verify_crypt_checksum() found using ASAN. Incorrect
function was called for compressed table.
Added new tests for discard, rename table and drop (we should allow them
even when page decryption fails). Alter table rename is not allowed.
Added test for restart with innodb-force-recovery=1 when page read on
redo-recovery cant be decrypted. Added test for corrupted table where
both page data and FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION is corrupted.
Adjusted the test case innodb_bug14147491 so that it does not anymore
expect crash. Instead table is just mostly not usable.
fil0fil.h: fil_space_acquire_low is not visible function
and fil_space_acquire and fil_space_acquire_silent are
inline functions. FilSpace class uses fil_space_acquire_low
directly.
recv_apply_hashed_log_recs() does not return anything.
2017-04-26 15:19:16 +03:00
|
|
|
*err = DB_SUCCESS;
|
|
|
|
|
2017-03-09 15:09:44 +02:00
|
|
|
if (ptr + entry_size > end_ptr) {
|
2014-12-22 16:53:17 +02:00
|
|
|
return NULL;
|
2015-03-20 12:38:53 +02:00
|
|
|
}
|
2014-12-22 16:53:17 +02:00
|
|
|
|
|
|
|
ulint space_id = mach_read_from_4(ptr);
|
|
|
|
ptr += 4;
|
|
|
|
uint offset = mach_read_from_2(ptr);
|
|
|
|
ptr += 2;
|
|
|
|
uint type = mach_read_from_1(ptr);
|
|
|
|
ptr += 1;
|
|
|
|
uint len = mach_read_from_1(ptr);
|
|
|
|
ptr += 1;
|
|
|
|
|
2019-01-17 13:09:14 +05:30
|
|
|
if ((type != CRYPT_SCHEME_1 && type != CRYPT_SCHEME_UNENCRYPTED)
|
|
|
|
|| len != CRYPT_SCHEME_1_IV_LEN) {
|
|
|
|
*err = DB_CORRUPTION;
|
|
|
|
return NULL;
|
|
|
|
}
|
2015-04-01 11:50:21 +03:00
|
|
|
|
2014-12-22 16:53:17 +02:00
|
|
|
uint min_key_version = mach_read_from_4(ptr);
|
|
|
|
ptr += 4;
|
|
|
|
|
2015-04-09 00:37:54 +02:00
|
|
|
uint key_id = mach_read_from_4(ptr);
|
|
|
|
ptr += 4;
|
2014-12-22 16:53:17 +02:00
|
|
|
|
2015-04-01 19:37:00 +03:00
|
|
|
fil_encryption_t encryption = (fil_encryption_t)mach_read_from_1(ptr);
|
|
|
|
ptr +=1;
|
|
|
|
|
2017-03-09 15:09:44 +02:00
|
|
|
if (ptr + len > end_ptr) {
|
2015-04-01 19:37:00 +03:00
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2015-05-13 11:41:22 +03:00
|
|
|
fil_space_crypt_t* crypt_data = fil_space_create_crypt_data(encryption, key_id);
|
2016-12-13 11:51:33 +02:00
|
|
|
/* Need to overwrite these as above will initialize fields. */
|
2014-12-22 16:53:17 +02:00
|
|
|
crypt_data->page0_offset = offset;
|
|
|
|
crypt_data->min_key_version = min_key_version;
|
2015-04-01 19:37:00 +03:00
|
|
|
crypt_data->encryption = encryption;
|
2019-01-17 13:09:14 +05:30
|
|
|
crypt_data->type = type;
|
2014-12-22 16:53:17 +02:00
|
|
|
memcpy(crypt_data->iv, ptr, len);
|
|
|
|
ptr += len;
|
|
|
|
|
|
|
|
/* update fil_space memory cache with crypt_data */
|
2017-05-09 13:40:42 +03:00
|
|
|
if (fil_space_t* space = fil_space_acquire_silent(space_id)) {
|
2017-03-14 12:56:01 +02:00
|
|
|
crypt_data = fil_space_set_crypt_data(space, crypt_data);
|
|
|
|
fil_space_release(space);
|
2017-05-09 13:40:42 +03:00
|
|
|
/* Check is used key found from encryption plugin */
|
|
|
|
if (crypt_data->should_encrypt()
|
|
|
|
&& !crypt_data->is_key_found()) {
|
|
|
|
*err = DB_DECRYPTION_FAILED;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
fil_space_destroy_crypt_data(&crypt_data);
|
MDEV-12253: Buffer pool blocks are accessed after they have been freed
Problem was that bpage was referenced after it was already freed
from LRU. Fixed by adding a new variable encrypted that is
passed down to buf_page_check_corrupt() and used in
buf_page_get_gen() to stop processing page read.
This patch should also address following test failures and
bugs:
MDEV-12419: IMPORT should not look up tablespace in
PageConverter::validate(). This is now removed.
MDEV-10099: encryption.innodb_onlinealter_encryption fails
sporadically in buildbot
MDEV-11420: encryption.innodb_encryption-page-compression
failed in buildbot
MDEV-11222: encryption.encrypt_and_grep failed in buildbot on P8
Removed dict_table_t::is_encrypted and dict_table_t::ibd_file_missing
and replaced these with dict_table_t::file_unreadable. Table
ibd file is missing if fil_get_space(space_id) returns NULL
and encrypted if not. Removed dict_table_t::is_corrupted field.
Ported FilSpace class from 10.2 and using that on buf_page_check_corrupt(),
buf_page_decrypt_after_read(), buf_page_encrypt_before_write(),
buf_dblwr_process(), buf_read_page(), dict_stats_save_defrag_stats().
Added test cases when enrypted page could be read while doing
redo log crash recovery. Also added test case for row compressed
blobs.
btr_cur_open_at_index_side_func(),
btr_cur_open_at_rnd_pos_func(): Avoid referencing block that is
NULL.
buf_page_get_zip(): Issue error if page read fails.
buf_page_get_gen(): Use dberr_t for error detection and
do not reference bpage after we hare freed it.
buf_mark_space_corrupt(): remove bpage from LRU also when
it is encrypted.
buf_page_check_corrupt(): @return DB_SUCCESS if page has
been read and is not corrupted,
DB_PAGE_CORRUPTED if page based on checksum check is corrupted,
DB_DECRYPTION_FAILED if page post encryption checksum matches but
after decryption normal page checksum does not match. In read
case only DB_SUCCESS is possible.
buf_page_io_complete(): use dberr_t for error handling.
buf_flush_write_block_low(),
buf_read_ahead_random(),
buf_read_page_async(),
buf_read_ahead_linear(),
buf_read_ibuf_merge_pages(),
buf_read_recv_pages(),
fil_aio_wait():
Issue error if page read fails.
btr_pcur_move_to_next_page(): Do not reference page if it is
NULL.
Introduced dict_table_t::is_readable() and dict_index_t::is_readable()
that will return true if tablespace exists and pages read from
tablespace are not corrupted or page decryption failed.
Removed buf_page_t::key_version. After page decryption the
key version is not removed from page frame. For unencrypted
pages, old key_version is removed at buf_page_encrypt_before_write()
dict_stats_update_transient_for_index(),
dict_stats_update_transient()
Do not continue if table decryption failed or table
is corrupted.
dict0stats.cc: Introduced a dict_stats_report_error function
to avoid code duplication.
fil_parse_write_crypt_data():
Check that key read from redo log entry is found from
encryption plugin and if it is not, refuse to start.
PageConverter::validate(): Removed access to fil_space_t as
tablespace is not available during import.
Fixed error code on innodb.innodb test.
Merged test cased innodb-bad-key-change5 and innodb-bad-key-shutdown
to innodb-bad-key-change2. Removed innodb-bad-key-change5 test.
Decreased unnecessary complexity on some long lasting tests.
Removed fil_inc_pending_ops(), fil_decr_pending_ops(),
fil_get_first_space(), fil_get_next_space(),
fil_get_first_space_safe(), fil_get_next_space_safe()
functions.
fil_space_verify_crypt_checksum(): Fixed bug found using ASAN
where FIL_PAGE_END_LSN_OLD_CHECKSUM field was incorrectly
accessed from row compressed tables. Fixed out of page frame
bug for row compressed tables in
fil_space_verify_crypt_checksum() found using ASAN. Incorrect
function was called for compressed table.
Added new tests for discard, rename table and drop (we should allow them
even when page decryption fails). Alter table rename is not allowed.
Added test for restart with innodb-force-recovery=1 when page read on
redo-recovery cant be decrypted. Added test for corrupted table where
both page data and FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION is corrupted.
Adjusted the test case innodb_bug14147491 so that it does not anymore
expect crash. Instead table is just mostly not usable.
fil0fil.h: fil_space_acquire_low is not visible function
and fil_space_acquire and fil_space_acquire_silent are
inline functions. FilSpace class uses fil_space_acquire_low
directly.
recv_apply_hashed_log_recs() does not return anything.
2017-04-26 15:19:16 +03:00
|
|
|
}
|
|
|
|
|
2017-03-14 12:56:01 +02:00
|
|
|
return ptr;
|
2014-12-22 16:53:17 +02:00
|
|
|
}
|
|
|
|
|
2017-03-30 12:48:42 +02:00
|
|
|
/** Encrypt a buffer.
|
2017-03-14 12:56:01 +02:00
|
|
|
@param[in,out] crypt_data Crypt data
|
|
|
|
@param[in] space space_id
|
|
|
|
@param[in] offset Page offset
|
|
|
|
@param[in] lsn Log sequence number
|
|
|
|
@param[in] src_frame Page to encrypt
|
2017-03-30 12:48:42 +02:00
|
|
|
@param[in] page_size Page size
|
2017-03-14 12:56:01 +02:00
|
|
|
@param[in,out] dst_frame Output buffer
|
|
|
|
@return encrypted buffer or NULL */
|
2014-12-22 16:53:17 +02:00
|
|
|
UNIV_INTERN
|
2015-06-05 08:41:10 +03:00
|
|
|
byte*
|
2015-09-04 15:54:20 +03:00
|
|
|
fil_encrypt_buf(
|
2017-03-30 12:48:42 +02:00
|
|
|
fil_space_crypt_t* crypt_data,
|
|
|
|
ulint space,
|
|
|
|
ulint offset,
|
|
|
|
lsn_t lsn,
|
|
|
|
const byte* src_frame,
|
|
|
|
const page_size_t& page_size,
|
|
|
|
byte* dst_frame)
|
2014-12-22 16:53:17 +02:00
|
|
|
{
|
2017-05-10 09:07:50 +03:00
|
|
|
uint size = uint(page_size.physical());
|
2015-09-04 15:54:20 +03:00
|
|
|
uint key_version = fil_crypt_get_latest_key_version(crypt_data);
|
2014-12-22 16:53:17 +02:00
|
|
|
|
2017-03-30 12:48:42 +02:00
|
|
|
ut_a(key_version != ENCRYPTION_KEY_VERSION_INVALID);
|
2014-12-22 16:53:17 +02:00
|
|
|
|
2015-09-04 15:54:20 +03:00
|
|
|
ulint orig_page_type = mach_read_from_2(src_frame+FIL_PAGE_TYPE);
|
2015-06-03 13:10:18 +03:00
|
|
|
ibool page_compressed = (orig_page_type == FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED);
|
2017-05-10 09:07:50 +03:00
|
|
|
uint header_len = FIL_PAGE_DATA;
|
2015-06-08 19:36:35 +03:00
|
|
|
|
|
|
|
if (page_compressed) {
|
|
|
|
header_len += (FIL_PAGE_COMPRESSED_SIZE + FIL_PAGE_COMPRESSION_METHOD_SIZE);
|
|
|
|
}
|
2014-12-22 16:53:17 +02:00
|
|
|
|
2015-04-01 11:50:21 +03:00
|
|
|
/* FIL page header is not encrypted */
|
2015-06-08 19:36:35 +03:00
|
|
|
memcpy(dst_frame, src_frame, header_len);
|
2014-12-22 16:53:17 +02:00
|
|
|
|
2015-04-01 11:50:21 +03:00
|
|
|
/* Store key version */
|
|
|
|
mach_write_to_4(dst_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION, key_version);
|
2014-12-22 16:53:17 +02:00
|
|
|
|
2015-04-01 11:50:21 +03:00
|
|
|
/* Calculate the start offset in a page */
|
2017-05-10 09:07:50 +03:00
|
|
|
uint unencrypted_bytes = header_len + FIL_PAGE_DATA_END;
|
|
|
|
uint srclen = size - unencrypted_bytes;
|
2015-06-08 19:36:35 +03:00
|
|
|
const byte* src = src_frame + header_len;
|
|
|
|
byte* dst = dst_frame + header_len;
|
2015-04-09 19:27:40 +02:00
|
|
|
uint32 dstlen = 0;
|
2014-12-22 16:53:17 +02:00
|
|
|
|
2015-06-08 19:36:35 +03:00
|
|
|
if (page_compressed) {
|
|
|
|
srclen = mach_read_from_2(src_frame + FIL_PAGE_DATA);
|
|
|
|
}
|
|
|
|
|
2015-05-13 21:57:24 +02:00
|
|
|
int rc = encryption_scheme_encrypt(src, srclen, dst, &dstlen,
|
|
|
|
crypt_data, key_version,
|
|
|
|
space, offset, lsn);
|
2017-03-30 12:48:42 +02:00
|
|
|
ut_a(rc == MY_AES_OK);
|
|
|
|
ut_a(dstlen == srclen);
|
2014-12-22 16:53:17 +02:00
|
|
|
|
2015-04-01 11:50:21 +03:00
|
|
|
/* For compressed tables we do not store the FIL header because
|
|
|
|
the whole page is not stored to the disk. In compressed tables only
|
|
|
|
the FIL header + compressed (and now encrypted) payload alligned
|
|
|
|
to sector boundary is written. */
|
2014-12-22 16:53:17 +02:00
|
|
|
if (!page_compressed) {
|
2015-04-01 11:50:21 +03:00
|
|
|
/* FIL page trailer is also not encrypted */
|
2016-08-12 11:17:45 +03:00
|
|
|
memcpy(dst_frame + page_size.physical() - FIL_PAGE_DATA_END,
|
|
|
|
src_frame + page_size.physical() - FIL_PAGE_DATA_END,
|
2014-12-22 16:53:17 +02:00
|
|
|
FIL_PAGE_DATA_END);
|
2015-09-21 11:24:08 +03:00
|
|
|
} else {
|
|
|
|
/* Clean up rest of buffer */
|
2017-03-30 12:48:42 +02:00
|
|
|
memset(dst_frame+header_len+srclen, 0,
|
|
|
|
page_size.physical() - (header_len + srclen));
|
2015-06-03 13:10:18 +03:00
|
|
|
}
|
2014-12-22 16:53:17 +02:00
|
|
|
|
2015-06-03 13:10:18 +03:00
|
|
|
/* handle post encryption checksum */
|
|
|
|
ib_uint32_t checksum = 0;
|
2014-12-22 16:53:17 +02:00
|
|
|
|
2016-08-12 11:17:45 +03:00
|
|
|
checksum = fil_crypt_calculate_checksum(page_size, dst_frame);
|
2014-12-22 16:53:17 +02:00
|
|
|
|
2015-06-03 13:10:18 +03:00
|
|
|
// store the post-encryption checksum after the key-version
|
|
|
|
mach_write_to_4(dst_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION + 4, checksum);
|
|
|
|
|
2018-12-17 20:04:03 +02:00
|
|
|
ut_ad(fil_space_verify_crypt_checksum(dst_frame, page_size));
|
MDEV-11759: Encryption code in MariaDB 10.1/10.2 causes
compatibility problems
Pages that are encrypted contain post encryption checksum on
different location that normal checksum fields. Therefore,
we should before decryption check this checksum to avoid
unencrypting corrupted pages. After decryption we can use
traditional checksum check to detect if page is corrupted
or unencryption was done using incorrect key.
Pages that are page compressed do not contain any checksum,
here we need to fist unencrypt, decompress and finally
use tradional checksum check to detect page corruption
or that we used incorrect key in unencryption.
buf0buf.cc: buf_page_is_corrupted() mofified so that
compressed pages are skipped.
buf0buf.h, buf_block_init(), buf_page_init_low():
removed unnecessary page_encrypted, page_compressed,
stored_checksum, valculated_checksum fields from
buf_page_t
buf_page_get_gen(): use new buf_page_check_corrupt() function
to detect corrupted pages.
buf_page_check_corrupt(): If page was not yet decrypted
check if post encryption checksum still matches.
If page is not anymore encrypted, use buf_page_is_corrupted()
traditional checksum method.
If page is detected as corrupted and it is not encrypted
we print corruption message to error log.
If page is still encrypted or it was encrypted and now
corrupted, we will print message that page is
encrypted to error log.
buf_page_io_complete(): use new buf_page_check_corrupt()
function to detect corrupted pages.
buf_page_decrypt_after_read(): Verify post encryption
checksum before tring to decrypt.
fil0crypt.cc: fil_encrypt_buf() verify post encryption
checksum and ind fil_space_decrypt() return true
if we really decrypted the page.
fil_space_verify_crypt_checksum(): rewrite to use
the method used when calculating post encryption
checksum. We also check if post encryption checksum
matches that traditional checksum check does not
match.
fil0fil.ic: Add missed page type encrypted and page
compressed to fil_get_page_type_name()
Note that this change does not yet fix innochecksum tool,
that will be done in separate MDEV.
Fix test failures caused by buf page corruption injection.
2017-02-06 10:47:55 +02:00
|
|
|
|
2015-04-01 22:03:14 +03:00
|
|
|
srv_stats.pages_encrypted.inc();
|
2015-06-05 08:41:10 +03:00
|
|
|
|
|
|
|
return dst_frame;
|
2014-12-22 16:53:17 +02:00
|
|
|
}
|
|
|
|
|
2015-09-04 15:54:20 +03:00
|
|
|
/******************************************************************
|
2017-03-14 12:56:01 +02:00
|
|
|
Encrypt a page
|
|
|
|
|
|
|
|
@param[in] space Tablespace
|
|
|
|
@param[in] offset Page offset
|
|
|
|
@param[in] lsn Log sequence number
|
|
|
|
@param[in] src_frame Page to encrypt
|
|
|
|
@param[in,out] dst_frame Output buffer
|
|
|
|
@return encrypted buffer or NULL */
|
2015-09-04 15:54:20 +03:00
|
|
|
UNIV_INTERN
|
|
|
|
byte*
|
|
|
|
fil_space_encrypt(
|
2017-03-14 12:56:01 +02:00
|
|
|
const fil_space_t* space,
|
2017-03-30 12:48:42 +02:00
|
|
|
ulint offset,
|
|
|
|
lsn_t lsn,
|
|
|
|
byte* src_frame,
|
|
|
|
byte* dst_frame)
|
2015-09-04 15:54:20 +03:00
|
|
|
{
|
2017-03-30 12:48:42 +02:00
|
|
|
switch (mach_read_from_2(src_frame+FIL_PAGE_TYPE)) {
|
|
|
|
case FIL_PAGE_TYPE_FSP_HDR:
|
|
|
|
case FIL_PAGE_TYPE_XDES:
|
|
|
|
case FIL_PAGE_RTREE:
|
2017-02-07 15:55:01 +02:00
|
|
|
/* File space header, extent descriptor or spatial index
|
|
|
|
are not encrypted. */
|
2015-09-04 15:54:20 +03:00
|
|
|
return src_frame;
|
|
|
|
}
|
|
|
|
|
2017-03-14 12:56:01 +02:00
|
|
|
if (!space->crypt_data || !space->crypt_data->is_encrypted()) {
|
|
|
|
return (src_frame);
|
2015-09-04 15:54:20 +03:00
|
|
|
}
|
|
|
|
|
2017-03-14 12:56:01 +02:00
|
|
|
fil_space_crypt_t* crypt_data = space->crypt_data;
|
2017-03-30 12:48:42 +02:00
|
|
|
const page_size_t page_size(space->flags);
|
2017-04-28 12:23:35 +03:00
|
|
|
ut_ad(space->n_pending_ios > 0);
|
2017-03-30 12:48:42 +02:00
|
|
|
byte* tmp = fil_encrypt_buf(crypt_data, space->id, offset, lsn,
|
|
|
|
src_frame, page_size, dst_frame);
|
2016-08-12 11:17:45 +03:00
|
|
|
|
|
|
|
#ifdef UNIV_DEBUG
|
|
|
|
if (tmp) {
|
|
|
|
/* Verify that encrypted buffer is not corrupted */
|
|
|
|
dberr_t err = DB_SUCCESS;
|
|
|
|
byte* src = src_frame;
|
|
|
|
bool page_compressed_encrypted = (mach_read_from_2(tmp+FIL_PAGE_TYPE) == FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED);
|
2018-06-13 16:15:21 +03:00
|
|
|
byte uncomp_mem[UNIV_PAGE_SIZE_MAX];
|
|
|
|
byte tmp_mem[UNIV_PAGE_SIZE_MAX];
|
2016-08-12 11:17:45 +03:00
|
|
|
|
|
|
|
if (page_compressed_encrypted) {
|
2018-06-13 16:15:21 +03:00
|
|
|
memcpy(uncomp_mem, src, srv_page_size);
|
|
|
|
ulint unzipped1 = fil_page_decompress(
|
|
|
|
tmp_mem, uncomp_mem);
|
|
|
|
ut_ad(unzipped1);
|
|
|
|
if (unzipped1 != srv_page_size) {
|
|
|
|
src = uncomp_mem;
|
|
|
|
}
|
2016-08-12 11:17:45 +03:00
|
|
|
}
|
|
|
|
|
2018-06-13 16:15:21 +03:00
|
|
|
ut_ad(!buf_page_is_corrupted(true, src, page_size, space));
|
|
|
|
ut_ad(fil_space_decrypt(crypt_data, tmp_mem, page_size, tmp,
|
|
|
|
&err));
|
|
|
|
ut_ad(err == DB_SUCCESS);
|
2016-08-12 11:17:45 +03:00
|
|
|
|
|
|
|
/* Need to decompress the page if it was also compressed */
|
|
|
|
if (page_compressed_encrypted) {
|
2018-06-13 16:15:21 +03:00
|
|
|
byte buf[UNIV_PAGE_SIZE_MAX];
|
|
|
|
memcpy(buf, tmp_mem, srv_page_size);
|
|
|
|
ulint unzipped2 = fil_page_decompress(tmp_mem, buf);
|
|
|
|
ut_ad(unzipped2);
|
2016-08-12 11:17:45 +03:00
|
|
|
}
|
|
|
|
|
2018-06-13 16:15:21 +03:00
|
|
|
memcpy(tmp_mem + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION,
|
|
|
|
src + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION, 8);
|
|
|
|
ut_ad(!memcmp(src, tmp_mem, page_size.physical()));
|
2016-08-12 11:17:45 +03:00
|
|
|
}
|
|
|
|
#endif /* UNIV_DEBUG */
|
2015-09-04 15:54:20 +03:00
|
|
|
|
|
|
|
return tmp;
|
|
|
|
}
|
|
|
|
|
2017-03-30 12:48:42 +02:00
|
|
|
/** Decrypt a page.
|
2017-03-14 12:56:01 +02:00
|
|
|
@param[in] crypt_data crypt_data
|
|
|
|
@param[in] tmp_frame Temporary buffer
|
|
|
|
@param[in] page_size Page size
|
|
|
|
@param[in,out] src_frame Page to decrypt
|
|
|
|
@param[out] err DB_SUCCESS or DB_DECRYPTION_FAILED
|
2015-06-05 08:41:10 +03:00
|
|
|
@return true if page decrypted, false if not.*/
|
2014-12-22 16:53:17 +02:00
|
|
|
UNIV_INTERN
|
|
|
|
bool
|
2015-04-01 11:50:21 +03:00
|
|
|
fil_space_decrypt(
|
2017-03-14 12:56:01 +02:00
|
|
|
fil_space_crypt_t* crypt_data,
|
|
|
|
byte* tmp_frame,
|
2017-03-30 12:48:42 +02:00
|
|
|
const page_size_t& page_size,
|
2017-03-14 12:56:01 +02:00
|
|
|
byte* src_frame,
|
|
|
|
dberr_t* err)
|
2014-12-22 16:53:17 +02:00
|
|
|
{
|
|
|
|
ulint page_type = mach_read_from_2(src_frame+FIL_PAGE_TYPE);
|
2015-04-01 11:50:21 +03:00
|
|
|
uint key_version = mach_read_from_4(src_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION);
|
2015-06-03 13:10:18 +03:00
|
|
|
bool page_compressed = (page_type == FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED);
|
2017-05-10 09:07:50 +03:00
|
|
|
uint offset = mach_read_from_4(src_frame + FIL_PAGE_OFFSET);
|
|
|
|
uint space = mach_read_from_4(src_frame + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
|
2016-03-12 13:43:33 +02:00
|
|
|
ib_uint64_t lsn = mach_read_from_8(src_frame + FIL_PAGE_LSN);
|
2017-03-14 12:56:01 +02:00
|
|
|
|
2015-09-14 14:11:23 +03:00
|
|
|
*err = DB_SUCCESS;
|
|
|
|
|
2015-04-09 00:37:47 +02:00
|
|
|
if (key_version == ENCRYPTION_KEY_NOT_ENCRYPTED) {
|
2015-06-05 08:41:10 +03:00
|
|
|
return false;
|
2014-12-22 16:53:17 +02:00
|
|
|
}
|
|
|
|
|
2016-12-13 11:51:33 +02:00
|
|
|
ut_a(crypt_data != NULL && crypt_data->is_encrypted());
|
2016-03-12 13:43:33 +02:00
|
|
|
|
|
|
|
/* read space & lsn */
|
2017-05-10 09:07:50 +03:00
|
|
|
uint header_len = FIL_PAGE_DATA;
|
2015-06-08 19:36:35 +03:00
|
|
|
|
|
|
|
if (page_compressed) {
|
|
|
|
header_len += (FIL_PAGE_COMPRESSED_SIZE + FIL_PAGE_COMPRESSION_METHOD_SIZE);
|
|
|
|
}
|
2014-12-22 16:53:17 +02:00
|
|
|
|
2015-04-01 11:50:21 +03:00
|
|
|
/* Copy FIL page header, it is not encrypted */
|
2015-06-08 19:36:35 +03:00
|
|
|
memcpy(tmp_frame, src_frame, header_len);
|
2014-12-22 16:53:17 +02:00
|
|
|
|
2015-04-01 11:50:21 +03:00
|
|
|
/* Calculate the offset where decryption starts */
|
2015-06-08 19:36:35 +03:00
|
|
|
const byte* src = src_frame + header_len;
|
|
|
|
byte* dst = tmp_frame + header_len;
|
2015-04-09 19:27:40 +02:00
|
|
|
uint32 dstlen = 0;
|
2017-05-10 09:07:50 +03:00
|
|
|
uint srclen = uint(page_size.physical())
|
|
|
|
- header_len - FIL_PAGE_DATA_END;
|
2015-06-08 19:36:35 +03:00
|
|
|
|
|
|
|
if (page_compressed) {
|
|
|
|
srclen = mach_read_from_2(src_frame + FIL_PAGE_DATA);
|
|
|
|
}
|
2014-12-22 16:53:17 +02:00
|
|
|
|
2015-05-13 21:57:24 +02:00
|
|
|
int rc = encryption_scheme_decrypt(src, srclen, dst, &dstlen,
|
|
|
|
crypt_data, key_version,
|
|
|
|
space, offset, lsn);
|
2014-12-22 16:53:17 +02:00
|
|
|
|
2015-04-01 22:15:11 +02:00
|
|
|
if (! ((rc == MY_AES_OK) && ((ulint) dstlen == srclen))) {
|
2015-09-14 14:11:23 +03:00
|
|
|
|
|
|
|
if (rc == -1) {
|
|
|
|
*err = DB_DECRYPTION_FAILED;
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2017-03-30 12:48:42 +02:00
|
|
|
ib::fatal() << "Unable to decrypt data-block "
|
2016-08-12 11:17:45 +03:00
|
|
|
<< " src: " << src << "srclen: "
|
|
|
|
<< srclen << " buf: " << dst << "buflen: "
|
|
|
|
<< dstlen << " return-code: " << rc
|
|
|
|
<< " Can't continue!";
|
2014-12-22 16:53:17 +02:00
|
|
|
}
|
|
|
|
|
2015-04-01 11:50:21 +03:00
|
|
|
/* For compressed tables we do not store the FIL header because
|
|
|
|
the whole page is not stored to the disk. In compressed tables only
|
|
|
|
the FIL header + compressed (and now encrypted) payload alligned
|
|
|
|
to sector boundary is written. */
|
|
|
|
if (!page_compressed) {
|
|
|
|
/* Copy FIL trailer */
|
2016-08-12 11:17:45 +03:00
|
|
|
memcpy(tmp_frame + page_size.physical() - FIL_PAGE_DATA_END,
|
|
|
|
src_frame + page_size.physical() - FIL_PAGE_DATA_END,
|
2014-12-22 16:53:17 +02:00
|
|
|
FIL_PAGE_DATA_END);
|
|
|
|
}
|
|
|
|
|
2015-04-01 22:03:14 +03:00
|
|
|
srv_stats.pages_decrypted.inc();
|
|
|
|
|
2014-12-22 16:53:17 +02:00
|
|
|
return true; /* page was decrypted */
|
|
|
|
}
|
|
|
|
|
2017-03-30 12:48:42 +02:00
|
|
|
/**
|
|
|
|
Decrypt a page.
|
2017-03-14 12:56:01 +02:00
|
|
|
@param[in] space Tablespace
|
MDEV-11759: Encryption code in MariaDB 10.1/10.2 causes
compatibility problems
Pages that are encrypted contain post encryption checksum on
different location that normal checksum fields. Therefore,
we should before decryption check this checksum to avoid
unencrypting corrupted pages. After decryption we can use
traditional checksum check to detect if page is corrupted
or unencryption was done using incorrect key.
Pages that are page compressed do not contain any checksum,
here we need to fist unencrypt, decompress and finally
use tradional checksum check to detect page corruption
or that we used incorrect key in unencryption.
buf0buf.cc: buf_page_is_corrupted() mofified so that
compressed pages are skipped.
buf0buf.h, buf_block_init(), buf_page_init_low():
removed unnecessary page_encrypted, page_compressed,
stored_checksum, valculated_checksum fields from
buf_page_t
buf_page_get_gen(): use new buf_page_check_corrupt() function
to detect corrupted pages.
buf_page_check_corrupt(): If page was not yet decrypted
check if post encryption checksum still matches.
If page is not anymore encrypted, use buf_page_is_corrupted()
traditional checksum method.
If page is detected as corrupted and it is not encrypted
we print corruption message to error log.
If page is still encrypted or it was encrypted and now
corrupted, we will print message that page is
encrypted to error log.
buf_page_io_complete(): use new buf_page_check_corrupt()
function to detect corrupted pages.
buf_page_decrypt_after_read(): Verify post encryption
checksum before tring to decrypt.
fil0crypt.cc: fil_encrypt_buf() verify post encryption
checksum and ind fil_space_decrypt() return true
if we really decrypted the page.
fil_space_verify_crypt_checksum(): rewrite to use
the method used when calculating post encryption
checksum. We also check if post encryption checksum
matches that traditional checksum check does not
match.
fil0fil.ic: Add missed page type encrypted and page
compressed to fil_get_page_type_name()
Note that this change does not yet fix innochecksum tool,
that will be done in separate MDEV.
Fix test failures caused by buf page corruption injection.
2017-02-06 10:47:55 +02:00
|
|
|
@param[in] tmp_frame Temporary buffer used for decrypting
|
|
|
|
@param[in,out] src_frame Page to decrypt
|
|
|
|
@param[out] decrypted true if page was decrypted
|
|
|
|
@return decrypted page, or original not encrypted page if decryption is
|
|
|
|
not needed.*/
|
2014-12-22 16:53:17 +02:00
|
|
|
UNIV_INTERN
|
2015-06-05 08:41:10 +03:00
|
|
|
byte*
|
2015-04-01 11:50:21 +03:00
|
|
|
fil_space_decrypt(
|
2017-03-14 12:56:01 +02:00
|
|
|
const fil_space_t* space,
|
MDEV-11759: Encryption code in MariaDB 10.1/10.2 causes
compatibility problems
Pages that are encrypted contain post encryption checksum on
different location that normal checksum fields. Therefore,
we should before decryption check this checksum to avoid
unencrypting corrupted pages. After decryption we can use
traditional checksum check to detect if page is corrupted
or unencryption was done using incorrect key.
Pages that are page compressed do not contain any checksum,
here we need to fist unencrypt, decompress and finally
use tradional checksum check to detect page corruption
or that we used incorrect key in unencryption.
buf0buf.cc: buf_page_is_corrupted() mofified so that
compressed pages are skipped.
buf0buf.h, buf_block_init(), buf_page_init_low():
removed unnecessary page_encrypted, page_compressed,
stored_checksum, valculated_checksum fields from
buf_page_t
buf_page_get_gen(): use new buf_page_check_corrupt() function
to detect corrupted pages.
buf_page_check_corrupt(): If page was not yet decrypted
check if post encryption checksum still matches.
If page is not anymore encrypted, use buf_page_is_corrupted()
traditional checksum method.
If page is detected as corrupted and it is not encrypted
we print corruption message to error log.
If page is still encrypted or it was encrypted and now
corrupted, we will print message that page is
encrypted to error log.
buf_page_io_complete(): use new buf_page_check_corrupt()
function to detect corrupted pages.
buf_page_decrypt_after_read(): Verify post encryption
checksum before tring to decrypt.
fil0crypt.cc: fil_encrypt_buf() verify post encryption
checksum and ind fil_space_decrypt() return true
if we really decrypted the page.
fil_space_verify_crypt_checksum(): rewrite to use
the method used when calculating post encryption
checksum. We also check if post encryption checksum
matches that traditional checksum check does not
match.
fil0fil.ic: Add missed page type encrypted and page
compressed to fil_get_page_type_name()
Note that this change does not yet fix innochecksum tool,
that will be done in separate MDEV.
Fix test failures caused by buf page corruption injection.
2017-02-06 10:47:55 +02:00
|
|
|
byte* tmp_frame,
|
|
|
|
byte* src_frame,
|
|
|
|
bool* decrypted)
|
2014-12-22 16:53:17 +02:00
|
|
|
{
|
2015-09-14 14:11:23 +03:00
|
|
|
dberr_t err = DB_SUCCESS;
|
2016-02-17 12:32:07 +02:00
|
|
|
byte* res = NULL;
|
2017-03-30 12:48:42 +02:00
|
|
|
const page_size_t page_size(space->flags);
|
MDEV-11759: Encryption code in MariaDB 10.1/10.2 causes
compatibility problems
Pages that are encrypted contain post encryption checksum on
different location that normal checksum fields. Therefore,
we should before decryption check this checksum to avoid
unencrypting corrupted pages. After decryption we can use
traditional checksum check to detect if page is corrupted
or unencryption was done using incorrect key.
Pages that are page compressed do not contain any checksum,
here we need to fist unencrypt, decompress and finally
use tradional checksum check to detect page corruption
or that we used incorrect key in unencryption.
buf0buf.cc: buf_page_is_corrupted() mofified so that
compressed pages are skipped.
buf0buf.h, buf_block_init(), buf_page_init_low():
removed unnecessary page_encrypted, page_compressed,
stored_checksum, valculated_checksum fields from
buf_page_t
buf_page_get_gen(): use new buf_page_check_corrupt() function
to detect corrupted pages.
buf_page_check_corrupt(): If page was not yet decrypted
check if post encryption checksum still matches.
If page is not anymore encrypted, use buf_page_is_corrupted()
traditional checksum method.
If page is detected as corrupted and it is not encrypted
we print corruption message to error log.
If page is still encrypted or it was encrypted and now
corrupted, we will print message that page is
encrypted to error log.
buf_page_io_complete(): use new buf_page_check_corrupt()
function to detect corrupted pages.
buf_page_decrypt_after_read(): Verify post encryption
checksum before tring to decrypt.
fil0crypt.cc: fil_encrypt_buf() verify post encryption
checksum and ind fil_space_decrypt() return true
if we really decrypted the page.
fil_space_verify_crypt_checksum(): rewrite to use
the method used when calculating post encryption
checksum. We also check if post encryption checksum
matches that traditional checksum check does not
match.
fil0fil.ic: Add missed page type encrypted and page
compressed to fil_get_page_type_name()
Note that this change does not yet fix innochecksum tool,
that will be done in separate MDEV.
Fix test failures caused by buf page corruption injection.
2017-02-06 10:47:55 +02:00
|
|
|
*decrypted = false;
|
2015-09-14 14:11:23 +03:00
|
|
|
|
2017-03-14 12:56:01 +02:00
|
|
|
ut_ad(space->crypt_data != NULL && space->crypt_data->is_encrypted());
|
2017-04-28 12:23:35 +03:00
|
|
|
ut_ad(space->n_pending_ios > 0);
|
2015-09-14 14:11:23 +03:00
|
|
|
|
2017-03-30 12:48:42 +02:00
|
|
|
bool encrypted = fil_space_decrypt(space->crypt_data, tmp_frame,
|
|
|
|
page_size, src_frame, &err);
|
2015-06-05 08:41:10 +03:00
|
|
|
|
2016-02-17 12:32:07 +02:00
|
|
|
if (err == DB_SUCCESS) {
|
|
|
|
if (encrypted) {
|
MDEV-11759: Encryption code in MariaDB 10.1/10.2 causes
compatibility problems
Pages that are encrypted contain post encryption checksum on
different location that normal checksum fields. Therefore,
we should before decryption check this checksum to avoid
unencrypting corrupted pages. After decryption we can use
traditional checksum check to detect if page is corrupted
or unencryption was done using incorrect key.
Pages that are page compressed do not contain any checksum,
here we need to fist unencrypt, decompress and finally
use tradional checksum check to detect page corruption
or that we used incorrect key in unencryption.
buf0buf.cc: buf_page_is_corrupted() mofified so that
compressed pages are skipped.
buf0buf.h, buf_block_init(), buf_page_init_low():
removed unnecessary page_encrypted, page_compressed,
stored_checksum, valculated_checksum fields from
buf_page_t
buf_page_get_gen(): use new buf_page_check_corrupt() function
to detect corrupted pages.
buf_page_check_corrupt(): If page was not yet decrypted
check if post encryption checksum still matches.
If page is not anymore encrypted, use buf_page_is_corrupted()
traditional checksum method.
If page is detected as corrupted and it is not encrypted
we print corruption message to error log.
If page is still encrypted or it was encrypted and now
corrupted, we will print message that page is
encrypted to error log.
buf_page_io_complete(): use new buf_page_check_corrupt()
function to detect corrupted pages.
buf_page_decrypt_after_read(): Verify post encryption
checksum before tring to decrypt.
fil0crypt.cc: fil_encrypt_buf() verify post encryption
checksum and ind fil_space_decrypt() return true
if we really decrypted the page.
fil_space_verify_crypt_checksum(): rewrite to use
the method used when calculating post encryption
checksum. We also check if post encryption checksum
matches that traditional checksum check does not
match.
fil0fil.ic: Add missed page type encrypted and page
compressed to fil_get_page_type_name()
Note that this change does not yet fix innochecksum tool,
that will be done in separate MDEV.
Fix test failures caused by buf page corruption injection.
2017-02-06 10:47:55 +02:00
|
|
|
*decrypted = true;
|
2016-02-17 12:32:07 +02:00
|
|
|
/* Copy the decrypted page back to page buffer, not
|
|
|
|
really any other options. */
|
2016-08-12 11:17:45 +03:00
|
|
|
memcpy(src_frame, tmp_frame, page_size.physical());
|
2016-02-17 12:32:07 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
res = src_frame;
|
2015-06-05 08:41:10 +03:00
|
|
|
}
|
|
|
|
|
2016-02-17 12:32:07 +02:00
|
|
|
return res;
|
2014-12-22 16:53:17 +02:00
|
|
|
}
|
|
|
|
|
2015-08-31 19:47:14 +03:00
|
|
|
/******************************************************************
|
|
|
|
Calculate post encryption checksum
|
2017-03-30 12:48:42 +02:00
|
|
|
@param[in] page_size page size
|
2017-03-14 12:56:01 +02:00
|
|
|
@param[in] dst_frame Block where checksum is calculated
|
2017-05-30 12:02:42 +03:00
|
|
|
@return page checksum
|
2015-08-31 19:47:14 +03:00
|
|
|
not needed. */
|
|
|
|
UNIV_INTERN
|
2017-05-10 09:07:50 +03:00
|
|
|
uint32_t
|
2015-08-31 19:47:14 +03:00
|
|
|
fil_crypt_calculate_checksum(
|
2017-03-30 12:48:42 +02:00
|
|
|
const page_size_t& page_size,
|
|
|
|
const byte* dst_frame)
|
2015-08-31 19:47:14 +03:00
|
|
|
{
|
2017-05-30 12:02:42 +03:00
|
|
|
/* For encrypted tables we use only crc32 and strict_crc32 */
|
2017-06-08 12:45:08 +03:00
|
|
|
return page_size.is_compressed()
|
|
|
|
? page_zip_calc_checksum(dst_frame, page_size.physical(),
|
|
|
|
SRV_CHECKSUM_ALGORITHM_CRC32)
|
|
|
|
: buf_calc_page_crc32(dst_frame);
|
2015-08-31 19:47:14 +03:00
|
|
|
}
|
|
|
|
|
2014-12-22 16:53:17 +02:00
|
|
|
/***********************************************************************/
|
|
|
|
|
|
|
|
/** A copy of global key state */
|
|
|
|
struct key_state_t {
|
2015-05-11 21:05:02 +02:00
|
|
|
key_state_t() : key_id(0), key_version(0),
|
2014-12-22 16:53:17 +02:00
|
|
|
rotate_key_age(srv_fil_crypt_rotate_key_age) {}
|
|
|
|
bool operator==(const key_state_t& other) const {
|
|
|
|
return key_version == other.key_version &&
|
|
|
|
rotate_key_age == other.rotate_key_age;
|
|
|
|
}
|
2015-05-11 21:05:02 +02:00
|
|
|
uint key_id;
|
2014-12-22 16:53:17 +02:00
|
|
|
uint key_version;
|
|
|
|
uint rotate_key_age;
|
|
|
|
};
|
|
|
|
|
|
|
|
/***********************************************************************
|
2017-03-14 12:56:01 +02:00
|
|
|
Copy global key state
|
|
|
|
@param[in,out] new_state key state
|
|
|
|
@param[in] crypt_data crypt data */
|
2014-12-22 16:53:17 +02:00
|
|
|
static void
|
|
|
|
fil_crypt_get_key_state(
|
2017-03-14 12:56:01 +02:00
|
|
|
key_state_t* new_state,
|
|
|
|
fil_space_crypt_t* crypt_data)
|
2014-12-22 16:53:17 +02:00
|
|
|
{
|
2015-04-09 21:04:05 +02:00
|
|
|
if (srv_encrypt_tables) {
|
2016-12-13 11:51:33 +02:00
|
|
|
new_state->key_version = crypt_data->key_get_latest_version();
|
2014-12-22 16:53:17 +02:00
|
|
|
new_state->rotate_key_age = srv_fil_crypt_rotate_key_age;
|
2015-09-22 15:03:59 +03:00
|
|
|
|
2015-04-09 00:37:47 +02:00
|
|
|
ut_a(new_state->key_version != ENCRYPTION_KEY_NOT_ENCRYPTED);
|
2014-12-22 16:53:17 +02:00
|
|
|
} else {
|
|
|
|
new_state->key_version = 0;
|
|
|
|
new_state->rotate_key_age = 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/***********************************************************************
|
2015-04-01 11:50:21 +03:00
|
|
|
Check if a key needs rotation given a key_state
|
2018-04-06 12:55:43 +03:00
|
|
|
@param[in] crypt_data Encryption information
|
2017-03-14 12:56:01 +02:00
|
|
|
@param[in] key_version Current key version
|
|
|
|
@param[in] latest_key_version Latest key version
|
|
|
|
@param[in] rotate_key_age when to rotate
|
2015-04-01 11:50:21 +03:00
|
|
|
@return true if key needs rotation, false if not */
|
2014-12-22 16:53:17 +02:00
|
|
|
static bool
|
2015-04-01 11:50:21 +03:00
|
|
|
fil_crypt_needs_rotation(
|
2018-04-06 12:55:43 +03:00
|
|
|
const fil_space_crypt_t* crypt_data,
|
|
|
|
uint key_version,
|
|
|
|
uint latest_key_version,
|
|
|
|
uint rotate_key_age)
|
2014-12-22 16:53:17 +02:00
|
|
|
{
|
2015-05-17 14:14:16 +03:00
|
|
|
if (key_version == ENCRYPTION_KEY_VERSION_INVALID) {
|
2015-05-11 21:05:02 +02:00
|
|
|
return false;
|
2015-05-17 14:14:16 +03:00
|
|
|
}
|
2014-12-22 16:53:17 +02:00
|
|
|
|
2015-05-11 21:05:02 +02:00
|
|
|
if (key_version == 0 && latest_key_version != 0) {
|
2014-12-22 16:53:17 +02:00
|
|
|
/* this is rotation unencrypted => encrypted
|
|
|
|
* ignore rotate_key_age */
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2015-05-11 21:05:02 +02:00
|
|
|
if (latest_key_version == 0 && key_version != 0) {
|
2018-04-06 12:55:43 +03:00
|
|
|
if (crypt_data->encryption == FIL_ENCRYPTION_DEFAULT) {
|
2015-05-17 14:14:16 +03:00
|
|
|
/* this is rotation encrypted => unencrypted */
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
return false;
|
2014-12-22 16:53:17 +02:00
|
|
|
}
|
|
|
|
|
2018-04-06 12:55:43 +03:00
|
|
|
if (crypt_data->encryption == FIL_ENCRYPTION_DEFAULT
|
|
|
|
&& crypt_data->type == CRYPT_SCHEME_1
|
|
|
|
&& srv_encrypt_tables == 0 ) {
|
|
|
|
/* This is rotation encrypted => unencrypted */
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2014-12-22 16:53:17 +02:00
|
|
|
/* this is rotation encrypted => encrypted,
|
|
|
|
* only reencrypt if key is sufficiently old */
|
2015-05-11 21:05:02 +02:00
|
|
|
if (key_version + rotate_key_age < latest_key_version) {
|
2014-12-22 16:53:17 +02:00
|
|
|
return true;
|
2015-03-20 12:38:53 +02:00
|
|
|
}
|
2014-12-22 16:53:17 +02:00
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2017-06-08 15:40:25 +03:00
|
|
|
/** Read page 0 and possible crypt data from there.
|
2017-06-12 17:10:56 +03:00
|
|
|
@param[in,out] space Tablespace */
|
2017-06-08 15:40:25 +03:00
|
|
|
static inline
|
|
|
|
void
|
|
|
|
fil_crypt_read_crypt_data(fil_space_t* space)
|
|
|
|
{
|
2018-04-07 19:52:35 +03:00
|
|
|
if (space->crypt_data || space->size
|
|
|
|
|| !fil_space_get_size(space->id)) {
|
2017-06-12 17:10:56 +03:00
|
|
|
/* The encryption metadata has already been read, or
|
|
|
|
the tablespace is not encrypted and the file has been
|
2018-04-07 19:52:35 +03:00
|
|
|
opened already, or the file cannot be accessed,
|
MDEV-13564 Mariabackup does not work with TRUNCATE
Implement undo tablespace truncation via normal redo logging.
Implement TRUNCATE TABLE as a combination of RENAME to #sql-ib name,
CREATE, and DROP.
Note: Orphan #sql-ib*.ibd may be left behind if MariaDB Server 10.2
is killed before the DROP operation is committed. If MariaDB Server 10.2
is killed during TRUNCATE, it is also possible that the old table
was renamed to #sql-ib*.ibd but the data dictionary will refer to the
table using the original name.
In MariaDB Server 10.3, RENAME inside InnoDB is transactional,
and #sql-* tables will be dropped on startup. So, this new TRUNCATE
will be fully crash-safe in 10.3.
ha_mroonga::wrapper_truncate(): Pass table options to the underlying
storage engine, now that ha_innobase::truncate() will need them.
rpl_slave_state::truncate_state_table(): Before truncating
mysql.gtid_slave_pos, evict any cached table handles from
the table definition cache, so that there will be no stale
references to the old table after truncating.
== TRUNCATE TABLE ==
WL#6501 in MySQL 5.7 introduced separate log files for implementing
atomic and crash-safe TRUNCATE TABLE, instead of using the InnoDB
undo and redo log. Some convoluted logic was added to the InnoDB
crash recovery, and some extra synchronization (including a redo log
checkpoint) was introduced to make this work. This synchronization
has caused performance problems and race conditions, and the extra
log files cannot be copied or applied by external backup programs.
In order to support crash-upgrade from MariaDB 10.2, we will keep
the logic for parsing and applying the extra log files, but we will
no longer generate those files in TRUNCATE TABLE.
A prerequisite for crash-safe TRUNCATE is a crash-safe RENAME TABLE
(with full redo and undo logging and proper rollback). This will
be implemented in MDEV-14717.
ha_innobase::truncate(): Invoke RENAME, create(), delete_table().
Because RENAME cannot be fully rolled back before MariaDB 10.3
due to missing undo logging, add some explicit rename-back in
case the operation fails.
ha_innobase::delete(): Introduce a variant that takes sqlcom as
a parameter. In TRUNCATE TABLE, we do not want to touch any
FOREIGN KEY constraints.
ha_innobase::create(): Add the parameters file_per_table, trx.
In TRUNCATE, the new table must be created in the same transaction
that renames the old table.
create_table_info_t::create_table_info_t(): Add the parameters
file_per_table, trx.
row_drop_table_for_mysql(): Replace a bool parameter with sqlcom.
row_drop_table_after_create_fail(): New function, wrapping
row_drop_table_for_mysql().
dict_truncate_index_tree_in_mem(), fil_truncate_tablespace(),
fil_prepare_for_truncate(), fil_reinit_space_header_for_table(),
row_truncate_table_for_mysql(), TruncateLogger,
row_truncate_prepare(), row_truncate_rollback(),
row_truncate_complete(), row_truncate_fts(),
row_truncate_update_system_tables(),
row_truncate_foreign_key_checks(), row_truncate_sanity_checks():
Remove.
row_upd_check_references_constraints(): Remove a check for
TRUNCATE, now that the table is no longer truncated in place.
The new test innodb.truncate_foreign uses DEBUG_SYNC to cover some
race-condition like scenarios. The test innodb-innodb.truncate does
not use any synchronization.
We add a redo log subformat to indicate backup-friendly format.
MariaDB 10.4 will remove support for the old TRUNCATE logging,
so crash-upgrade from old 10.2 or 10.3 to 10.4 will involve
limitations.
== Undo tablespace truncation ==
MySQL 5.7 implements undo tablespace truncation. It is only
possible when innodb_undo_tablespaces is set to at least 2.
The logging is implemented similar to the WL#6501 TRUNCATE,
that is, using separate log files and a redo log checkpoint.
We can simply implement undo tablespace truncation within
a single mini-transaction that reinitializes the undo log
tablespace file. Unfortunately, due to the redo log format
of some operations, currently, the total redo log written by
undo tablespace truncation will be more than the combined size
of the truncated undo tablespace. It should be acceptable
to have a little more than 1 megabyte of log in a single
mini-transaction. This will be fixed in MDEV-17138 in
MariaDB Server 10.4.
recv_sys_t: Add truncated_undo_spaces[] to remember for which undo
tablespaces a MLOG_FILE_CREATE2 record was seen.
namespace undo: Remove some unnecessary declarations.
fil_space_t::is_being_truncated: Document that this flag now
only applies to undo tablespaces. Remove some references.
fil_space_t::is_stopping(): Do not refer to is_being_truncated.
This check is for tablespaces of tables. Potentially used
tablespaces are never truncated any more.
buf_dblwr_process(): Suppress the out-of-bounds warning
for undo tablespaces.
fil_truncate_log(): Write a MLOG_FILE_CREATE2 with a nonzero
page number (new size of the tablespace in pages) to inform
crash recovery that the undo tablespace size has been reduced.
fil_op_write_log(): Relax assertions, so that MLOG_FILE_CREATE2
can be written for undo tablespaces (without .ibd file suffix)
for a nonzero page number.
os_file_truncate(): Add the parameter allow_shrink=false
so that undo tablespaces can actually be shrunk using this function.
fil_name_parse(): For undo tablespace truncation,
buffer MLOG_FILE_CREATE2 in truncated_undo_spaces[].
recv_read_in_area(): Avoid reading pages for which no redo log
records remain buffered, after recv_addr_trim() removed them.
trx_rseg_header_create(): Add a FIXME comment that we could write
much less redo log.
trx_undo_truncate_tablespace(): Reinitialize the undo tablespace
in a single mini-transaction, which will be flushed to the redo log
before the file size is trimmed.
recv_addr_trim(): Discard any redo logs for pages that were
logged after the new end of a file, before the truncation LSN.
If the rec_list becomes empty, reduce n_addrs. After removing
any affected records, actually truncate the file.
recv_apply_hashed_log_recs(): Invoke recv_addr_trim() right before
applying any log records. The undo tablespace files must be open
at this point.
buf_flush_or_remove_pages(), buf_flush_dirty_pages(),
buf_LRU_flush_or_remove_pages(): Add a parameter for specifying
the number of the first page to flush or remove (default 0).
trx_purge_initiate_truncate(): Remove the log checkpoints, the
extra logging, and some unnecessary crash points. Merge the code
from trx_undo_truncate_tablespace(). First, flush all to-be-discarded
pages (beyond the new end of the file), then trim the space->size
to make the page allocation deterministic. At the only remaining
crash injection point, flush the redo log, so that the recovery
can be tested.
2018-08-28 13:43:06 +03:00
|
|
|
likely due to a concurrent DROP
|
|
|
|
(possibly as part of TRUNCATE or ALTER TABLE).
|
2018-04-07 19:52:35 +03:00
|
|
|
FIXME: The file can become unaccessible any time
|
|
|
|
after this check! We should really remove this
|
|
|
|
function and instead make crypt_data an integral
|
|
|
|
part of fil_space_t. */
|
2017-06-12 17:10:56 +03:00
|
|
|
return;
|
|
|
|
}
|
2017-06-08 15:40:25 +03:00
|
|
|
|
2017-06-12 17:43:07 +03:00
|
|
|
const page_size_t page_size(space->flags);
|
2017-06-12 17:10:56 +03:00
|
|
|
mtr_t mtr;
|
2017-06-12 17:43:07 +03:00
|
|
|
mtr.start();
|
|
|
|
if (buf_block_t* block = buf_page_get(page_id_t(space->id, 0),
|
|
|
|
page_size, RW_S_LATCH, &mtr)) {
|
2017-06-12 17:10:56 +03:00
|
|
|
mutex_enter(&fil_system->mutex);
|
|
|
|
if (!space->crypt_data) {
|
|
|
|
space->crypt_data = fil_space_read_crypt_data(
|
2017-06-12 17:43:07 +03:00
|
|
|
page_size, block->frame);
|
2017-06-08 15:40:25 +03:00
|
|
|
}
|
|
|
|
mutex_exit(&fil_system->mutex);
|
|
|
|
}
|
2017-06-12 17:43:07 +03:00
|
|
|
mtr.commit();
|
2017-06-08 15:40:25 +03:00
|
|
|
}
|
|
|
|
|
2014-12-22 16:53:17 +02:00
|
|
|
/***********************************************************************
|
|
|
|
Start encrypting a space
|
2017-03-14 12:56:01 +02:00
|
|
|
@param[in,out] space Tablespace
|
|
|
|
@return true if a recheck is needed */
|
2015-04-01 11:50:21 +03:00
|
|
|
static
|
|
|
|
bool
|
|
|
|
fil_crypt_start_encrypting_space(
|
2017-03-14 12:56:01 +02:00
|
|
|
fil_space_t* space)
|
2015-04-01 11:50:21 +03:00
|
|
|
{
|
2017-03-14 12:56:01 +02:00
|
|
|
bool recheck = false;
|
2017-06-08 15:40:25 +03:00
|
|
|
|
2014-12-22 16:53:17 +02:00
|
|
|
mutex_enter(&fil_crypt_threads_mutex);
|
2015-03-19 14:09:49 +02:00
|
|
|
|
2017-03-14 12:56:01 +02:00
|
|
|
fil_space_crypt_t *crypt_data = space->crypt_data;
|
2015-03-19 14:09:49 +02:00
|
|
|
|
2017-03-14 12:56:01 +02:00
|
|
|
/* If space is not encrypted and encryption is not enabled, then
|
2015-03-19 14:09:49 +02:00
|
|
|
do not continue encrypting the space. */
|
2017-03-14 12:56:01 +02:00
|
|
|
if (!crypt_data && !srv_encrypt_tables) {
|
2015-03-19 14:09:49 +02:00
|
|
|
mutex_exit(&fil_crypt_threads_mutex);
|
2017-03-14 12:56:01 +02:00
|
|
|
return false;
|
2015-03-19 14:09:49 +02:00
|
|
|
}
|
|
|
|
|
2014-12-22 16:53:17 +02:00
|
|
|
if (crypt_data != NULL || fil_crypt_start_converting) {
|
|
|
|
/* someone beat us to it */
|
2015-03-19 14:09:49 +02:00
|
|
|
if (fil_crypt_start_converting) {
|
2017-03-14 12:56:01 +02:00
|
|
|
recheck = true;
|
2015-03-19 14:09:49 +02:00
|
|
|
}
|
2014-12-22 16:53:17 +02:00
|
|
|
|
|
|
|
mutex_exit(&fil_crypt_threads_mutex);
|
2017-03-14 12:56:01 +02:00
|
|
|
return recheck;
|
2014-12-22 16:53:17 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/* NOTE: we need to write and flush page 0 before publishing
|
|
|
|
* the crypt data. This so that after restart there is no
|
|
|
|
* risk of finding encrypted pages without having
|
|
|
|
* crypt data in page 0 */
|
|
|
|
|
|
|
|
/* 1 - create crypt data */
|
2017-03-14 12:56:01 +02:00
|
|
|
crypt_data = fil_space_create_crypt_data(FIL_ENCRYPTION_DEFAULT, FIL_DEFAULT_ENCRYPTION_KEY);
|
|
|
|
|
2014-12-22 16:53:17 +02:00
|
|
|
if (crypt_data == NULL) {
|
|
|
|
mutex_exit(&fil_crypt_threads_mutex);
|
2017-03-14 12:56:01 +02:00
|
|
|
return false;
|
2014-12-22 16:53:17 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
crypt_data->type = CRYPT_SCHEME_UNENCRYPTED;
|
|
|
|
crypt_data->min_key_version = 0; // all pages are unencrypted
|
|
|
|
crypt_data->rotate_state.start_time = time(0);
|
|
|
|
crypt_data->rotate_state.starting = true;
|
|
|
|
crypt_data->rotate_state.active_threads = 1;
|
|
|
|
|
|
|
|
mutex_enter(&crypt_data->mutex);
|
2015-05-20 13:35:51 +03:00
|
|
|
crypt_data = fil_space_set_crypt_data(space, crypt_data);
|
2014-12-22 16:53:17 +02:00
|
|
|
mutex_exit(&crypt_data->mutex);
|
|
|
|
|
|
|
|
fil_crypt_start_converting = true;
|
|
|
|
mutex_exit(&fil_crypt_threads_mutex);
|
|
|
|
|
|
|
|
do
|
|
|
|
{
|
|
|
|
mtr_t mtr;
|
2017-05-09 17:23:08 +03:00
|
|
|
mtr.start();
|
|
|
|
mtr.set_named_space(space);
|
2014-12-22 16:53:17 +02:00
|
|
|
|
|
|
|
/* 2 - get page 0 */
|
2016-08-12 11:17:45 +03:00
|
|
|
dberr_t err = DB_SUCCESS;
|
2017-03-30 12:48:42 +02:00
|
|
|
buf_block_t* block = buf_page_get_gen(
|
|
|
|
page_id_t(space->id, 0), page_size_t(space->flags),
|
|
|
|
RW_X_LATCH, NULL, BUF_GET,
|
|
|
|
__FILE__, __LINE__,
|
|
|
|
&mtr, &err);
|
2014-12-22 16:53:17 +02:00
|
|
|
|
|
|
|
|
2017-03-14 12:56:01 +02:00
|
|
|
/* 3 - write crypt data to page 0 */
|
2014-12-22 16:53:17 +02:00
|
|
|
byte* frame = buf_block_get_frame(block);
|
2017-03-14 12:56:01 +02:00
|
|
|
crypt_data->type = CRYPT_SCHEME_1;
|
2017-03-30 12:48:42 +02:00
|
|
|
crypt_data->write_page0(space, frame, &mtr);
|
2014-12-22 16:53:17 +02:00
|
|
|
|
2017-05-09 17:23:08 +03:00
|
|
|
mtr.commit();
|
2014-12-22 16:53:17 +02:00
|
|
|
|
|
|
|
/* record lsn of update */
|
2016-08-12 11:17:45 +03:00
|
|
|
lsn_t end_lsn = mtr.commit_lsn();
|
2014-12-22 16:53:17 +02:00
|
|
|
|
|
|
|
/* 4 - sync tablespace before publishing crypt data */
|
|
|
|
|
|
|
|
bool success = false;
|
|
|
|
ulint sum_pages = 0;
|
2017-03-14 12:56:01 +02:00
|
|
|
|
2014-12-22 16:53:17 +02:00
|
|
|
do {
|
2017-03-14 12:56:01 +02:00
|
|
|
ulint n_pages = 0;
|
2016-08-12 11:17:45 +03:00
|
|
|
success = buf_flush_lists(ULINT_MAX, end_lsn, &n_pages);
|
2014-12-22 16:53:17 +02:00
|
|
|
buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST);
|
|
|
|
sum_pages += n_pages;
|
2017-03-14 12:56:01 +02:00
|
|
|
} while (!success);
|
2014-12-22 16:53:17 +02:00
|
|
|
|
|
|
|
/* 5 - publish crypt data */
|
|
|
|
mutex_enter(&fil_crypt_threads_mutex);
|
|
|
|
mutex_enter(&crypt_data->mutex);
|
|
|
|
crypt_data->type = CRYPT_SCHEME_1;
|
|
|
|
ut_a(crypt_data->rotate_state.active_threads == 1);
|
|
|
|
crypt_data->rotate_state.active_threads = 0;
|
|
|
|
crypt_data->rotate_state.starting = false;
|
|
|
|
|
|
|
|
fil_crypt_start_converting = false;
|
|
|
|
mutex_exit(&crypt_data->mutex);
|
|
|
|
mutex_exit(&fil_crypt_threads_mutex);
|
|
|
|
|
2017-03-14 12:56:01 +02:00
|
|
|
return recheck;
|
2014-12-22 16:53:17 +02:00
|
|
|
} while (0);
|
|
|
|
|
|
|
|
mutex_enter(&crypt_data->mutex);
|
|
|
|
ut_a(crypt_data->rotate_state.active_threads == 1);
|
|
|
|
crypt_data->rotate_state.active_threads = 0;
|
|
|
|
mutex_exit(&crypt_data->mutex);
|
|
|
|
|
|
|
|
mutex_enter(&fil_crypt_threads_mutex);
|
|
|
|
fil_crypt_start_converting = false;
|
|
|
|
mutex_exit(&fil_crypt_threads_mutex);
|
|
|
|
|
2017-03-14 12:56:01 +02:00
|
|
|
return recheck;
|
2014-12-22 16:53:17 +02:00
|
|
|
}
|
|
|
|
|
2015-05-14 10:13:03 +02:00
|
|
|
/** State of a rotation thread */
|
|
|
|
struct rotate_thread_t {
|
|
|
|
explicit rotate_thread_t(uint no) {
|
|
|
|
memset(this, 0, sizeof(* this));
|
|
|
|
thread_no = no;
|
|
|
|
first = true;
|
|
|
|
estimated_max_iops = 20;
|
|
|
|
}
|
|
|
|
|
|
|
|
uint thread_no;
|
|
|
|
bool first; /*!< is position before first space */
|
2017-03-14 12:56:01 +02:00
|
|
|
fil_space_t* space; /*!< current space or NULL */
|
2015-05-14 10:13:03 +02:00
|
|
|
ulint offset; /*!< current offset */
|
|
|
|
ulint batch; /*!< #pages to rotate */
|
|
|
|
uint min_key_version_found;/*!< min key version found but not rotated */
|
|
|
|
lsn_t end_lsn; /*!< max lsn when rotating this space */
|
|
|
|
|
|
|
|
uint estimated_max_iops; /*!< estimation of max iops */
|
|
|
|
uint allocated_iops; /*!< allocated iops */
|
2017-05-10 09:07:50 +03:00
|
|
|
ulint cnt_waited; /*!< #times waited during this slot */
|
2017-01-25 10:11:37 +02:00
|
|
|
uintmax_t sum_waited_us; /*!< wait time during this slot */
|
2015-05-14 10:13:03 +02:00
|
|
|
|
|
|
|
fil_crypt_stat_t crypt_stat; // statistics
|
|
|
|
|
|
|
|
btr_scrub_t scrub_data; /* thread local data used by btr_scrub-functions
|
|
|
|
* when iterating pages of tablespace */
|
|
|
|
|
MDEV-11638 Encryption causes race conditions in InnoDB shutdown
InnoDB shutdown failed to properly take fil_crypt_thread() into account.
The encryption threads were signalled to shut down together with other
non-critical tasks. This could be much too early in case of slow shutdown,
which could need minutes to complete the purge. Furthermore, InnoDB
failed to wait for the fil_crypt_thread() to actually exit before
proceeding to the final steps of shutdown, causing the race conditions.
Furthermore, the log_scrub_thread() was shut down way too early.
Also it should remain until the SRV_SHUTDOWN_FLUSH_PHASE.
fil_crypt_threads_end(): Remove. This would cause the threads to
be terminated way too early.
srv_buf_dump_thread_active, srv_dict_stats_thread_active,
lock_sys->timeout_thread_active, log_scrub_thread_active,
srv_monitor_active, srv_error_monitor_active: Remove a race condition
between startup and shutdown, by setting these in the startup thread
that creates threads, not in each created thread. In this way, once the
flag is cleared, it will remain cleared during shutdown.
srv_n_fil_crypt_threads_started, fil_crypt_threads_event: Declare in
global rather than static scope.
log_scrub_event, srv_log_scrub_thread_active, log_scrub_thread():
Declare in static rather than global scope. Let these be created by
log_init() and freed by log_shutdown().
rotate_thread_t::should_shutdown(): Do not shut down before the
SRV_SHUTDOWN_FLUSH_PHASE.
srv_any_background_threads_are_active(): Remove. These checks now
exist in logs_empty_and_mark_files_at_shutdown().
logs_empty_and_mark_files_at_shutdown(): Shut down the threads in
the proper order. Keep fil_crypt_thread() and log_scrub_thread() alive
until SRV_SHUTDOWN_FLUSH_PHASE, and check that they actually terminate.
2017-01-04 18:43:32 +02:00
|
|
|
/** @return whether this thread should terminate */
|
2015-05-14 10:13:03 +02:00
|
|
|
bool should_shutdown() const {
|
MDEV-11638 Encryption causes race conditions in InnoDB shutdown
InnoDB shutdown failed to properly take fil_crypt_thread() into account.
The encryption threads were signalled to shut down together with other
non-critical tasks. This could be much too early in case of slow shutdown,
which could need minutes to complete the purge. Furthermore, InnoDB
failed to wait for the fil_crypt_thread() to actually exit before
proceeding to the final steps of shutdown, causing the race conditions.
Furthermore, the log_scrub_thread() was shut down way too early.
Also it should remain until the SRV_SHUTDOWN_FLUSH_PHASE.
fil_crypt_threads_end(): Remove. This would cause the threads to
be terminated way too early.
srv_buf_dump_thread_active, srv_dict_stats_thread_active,
lock_sys->timeout_thread_active, log_scrub_thread_active,
srv_monitor_active, srv_error_monitor_active: Remove a race condition
between startup and shutdown, by setting these in the startup thread
that creates threads, not in each created thread. In this way, once the
flag is cleared, it will remain cleared during shutdown.
srv_n_fil_crypt_threads_started, fil_crypt_threads_event: Declare in
global rather than static scope.
log_scrub_event, srv_log_scrub_thread_active, log_scrub_thread():
Declare in static rather than global scope. Let these be created by
log_init() and freed by log_shutdown().
rotate_thread_t::should_shutdown(): Do not shut down before the
SRV_SHUTDOWN_FLUSH_PHASE.
srv_any_background_threads_are_active(): Remove. These checks now
exist in logs_empty_and_mark_files_at_shutdown().
logs_empty_and_mark_files_at_shutdown(): Shut down the threads in
the proper order. Keep fil_crypt_thread() and log_scrub_thread() alive
until SRV_SHUTDOWN_FLUSH_PHASE, and check that they actually terminate.
2017-01-04 18:43:32 +02:00
|
|
|
switch (srv_shutdown_state) {
|
|
|
|
case SRV_SHUTDOWN_NONE:
|
|
|
|
return thread_no >= srv_n_fil_crypt_threads;
|
2017-01-05 10:48:03 +02:00
|
|
|
case SRV_SHUTDOWN_EXIT_THREADS:
|
|
|
|
/* srv_init_abort() must have been invoked */
|
2017-05-19 14:28:57 +03:00
|
|
|
case SRV_SHUTDOWN_CLEANUP:
|
MDEV-11638 Encryption causes race conditions in InnoDB shutdown
InnoDB shutdown failed to properly take fil_crypt_thread() into account.
The encryption threads were signalled to shut down together with other
non-critical tasks. This could be much too early in case of slow shutdown,
which could need minutes to complete the purge. Furthermore, InnoDB
failed to wait for the fil_crypt_thread() to actually exit before
proceeding to the final steps of shutdown, causing the race conditions.
Furthermore, the log_scrub_thread() was shut down way too early.
Also it should remain until the SRV_SHUTDOWN_FLUSH_PHASE.
fil_crypt_threads_end(): Remove. This would cause the threads to
be terminated way too early.
srv_buf_dump_thread_active, srv_dict_stats_thread_active,
lock_sys->timeout_thread_active, log_scrub_thread_active,
srv_monitor_active, srv_error_monitor_active: Remove a race condition
between startup and shutdown, by setting these in the startup thread
that creates threads, not in each created thread. In this way, once the
flag is cleared, it will remain cleared during shutdown.
srv_n_fil_crypt_threads_started, fil_crypt_threads_event: Declare in
global rather than static scope.
log_scrub_event, srv_log_scrub_thread_active, log_scrub_thread():
Declare in static rather than global scope. Let these be created by
log_init() and freed by log_shutdown().
rotate_thread_t::should_shutdown(): Do not shut down before the
SRV_SHUTDOWN_FLUSH_PHASE.
srv_any_background_threads_are_active(): Remove. These checks now
exist in logs_empty_and_mark_files_at_shutdown().
logs_empty_and_mark_files_at_shutdown(): Shut down the threads in
the proper order. Keep fil_crypt_thread() and log_scrub_thread() alive
until SRV_SHUTDOWN_FLUSH_PHASE, and check that they actually terminate.
2017-01-04 18:43:32 +02:00
|
|
|
return true;
|
2017-05-19 14:28:57 +03:00
|
|
|
case SRV_SHUTDOWN_FLUSH_PHASE:
|
MDEV-11638 Encryption causes race conditions in InnoDB shutdown
InnoDB shutdown failed to properly take fil_crypt_thread() into account.
The encryption threads were signalled to shut down together with other
non-critical tasks. This could be much too early in case of slow shutdown,
which could need minutes to complete the purge. Furthermore, InnoDB
failed to wait for the fil_crypt_thread() to actually exit before
proceeding to the final steps of shutdown, causing the race conditions.
Furthermore, the log_scrub_thread() was shut down way too early.
Also it should remain until the SRV_SHUTDOWN_FLUSH_PHASE.
fil_crypt_threads_end(): Remove. This would cause the threads to
be terminated way too early.
srv_buf_dump_thread_active, srv_dict_stats_thread_active,
lock_sys->timeout_thread_active, log_scrub_thread_active,
srv_monitor_active, srv_error_monitor_active: Remove a race condition
between startup and shutdown, by setting these in the startup thread
that creates threads, not in each created thread. In this way, once the
flag is cleared, it will remain cleared during shutdown.
srv_n_fil_crypt_threads_started, fil_crypt_threads_event: Declare in
global rather than static scope.
log_scrub_event, srv_log_scrub_thread_active, log_scrub_thread():
Declare in static rather than global scope. Let these be created by
log_init() and freed by log_shutdown().
rotate_thread_t::should_shutdown(): Do not shut down before the
SRV_SHUTDOWN_FLUSH_PHASE.
srv_any_background_threads_are_active(): Remove. These checks now
exist in logs_empty_and_mark_files_at_shutdown().
logs_empty_and_mark_files_at_shutdown(): Shut down the threads in
the proper order. Keep fil_crypt_thread() and log_scrub_thread() alive
until SRV_SHUTDOWN_FLUSH_PHASE, and check that they actually terminate.
2017-01-04 18:43:32 +02:00
|
|
|
case SRV_SHUTDOWN_LAST_PHASE:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
ut_ad(0);
|
|
|
|
return true;
|
2015-05-14 10:13:03 +02:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2014-12-22 16:53:17 +02:00
|
|
|
/***********************************************************************
|
2015-04-01 11:50:21 +03:00
|
|
|
Check if space needs rotation given a key_state
|
2017-03-14 12:56:01 +02:00
|
|
|
@param[in,out] state Key rotation state
|
|
|
|
@param[in,out] key_state Key state
|
|
|
|
@param[in,out] recheck needs recheck ?
|
2015-04-01 11:50:21 +03:00
|
|
|
@return true if space needs key rotation */
|
|
|
|
static
|
|
|
|
bool
|
|
|
|
fil_crypt_space_needs_rotation(
|
2017-03-14 12:56:01 +02:00
|
|
|
rotate_thread_t* state,
|
|
|
|
key_state_t* key_state,
|
|
|
|
bool* recheck)
|
2014-12-22 16:53:17 +02:00
|
|
|
{
|
2017-03-14 12:56:01 +02:00
|
|
|
fil_space_t* space = state->space;
|
2015-05-21 15:07:19 +03:00
|
|
|
|
2017-03-14 12:56:01 +02:00
|
|
|
/* Make sure that tablespace is normal tablespace */
|
2017-03-30 12:48:42 +02:00
|
|
|
if (space->purpose != FIL_TYPE_TABLESPACE) {
|
2014-12-22 16:53:17 +02:00
|
|
|
return false;
|
2015-03-19 14:09:49 +02:00
|
|
|
}
|
2014-12-22 16:53:17 +02:00
|
|
|
|
2017-03-14 12:56:01 +02:00
|
|
|
ut_ad(space->n_pending_ops > 0);
|
2014-12-22 16:53:17 +02:00
|
|
|
|
2017-03-14 12:56:01 +02:00
|
|
|
fil_space_crypt_t *crypt_data = space->crypt_data;
|
2015-03-19 14:09:49 +02:00
|
|
|
|
2014-12-22 16:53:17 +02:00
|
|
|
if (crypt_data == NULL) {
|
|
|
|
/**
|
|
|
|
* space has no crypt data
|
|
|
|
* start encrypting it...
|
|
|
|
*/
|
2017-03-14 12:56:01 +02:00
|
|
|
*recheck = fil_crypt_start_encrypting_space(space);
|
|
|
|
crypt_data = space->crypt_data;
|
2015-03-19 14:09:49 +02:00
|
|
|
|
2014-12-22 16:53:17 +02:00
|
|
|
if (crypt_data == NULL) {
|
|
|
|
return false;
|
|
|
|
}
|
2016-12-13 11:51:33 +02:00
|
|
|
|
|
|
|
crypt_data->key_get_latest_version();
|
2014-12-22 16:53:17 +02:00
|
|
|
}
|
|
|
|
|
2016-10-27 14:51:10 +03:00
|
|
|
/* If used key_id is not found from encryption plugin we can't
|
|
|
|
continue to rotate the tablespace */
|
2016-12-13 11:51:33 +02:00
|
|
|
if (!crypt_data->is_key_found()) {
|
2016-10-27 14:51:10 +03:00
|
|
|
return false;
|
2014-12-22 16:53:17 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
mutex_enter(&crypt_data->mutex);
|
2015-03-19 14:09:49 +02:00
|
|
|
|
2014-12-22 16:53:17 +02:00
|
|
|
do {
|
|
|
|
/* prevent threads from starting to rotate space */
|
|
|
|
if (crypt_data->rotate_state.starting) {
|
|
|
|
/* recheck this space later */
|
|
|
|
*recheck = true;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* prevent threads from starting to rotate space */
|
2017-03-14 12:56:01 +02:00
|
|
|
if (space->is_stopping()) {
|
2014-12-22 16:53:17 +02:00
|
|
|
break;
|
2015-03-20 12:38:53 +02:00
|
|
|
}
|
2014-12-22 16:53:17 +02:00
|
|
|
|
2015-03-20 12:38:53 +02:00
|
|
|
if (crypt_data->rotate_state.flushing) {
|
2014-12-22 16:53:17 +02:00
|
|
|
break;
|
2015-03-20 12:38:53 +02:00
|
|
|
}
|
2014-12-22 16:53:17 +02:00
|
|
|
|
2015-09-22 15:03:59 +03:00
|
|
|
/* No need to rotate space if encryption is disabled */
|
2016-12-13 11:51:33 +02:00
|
|
|
if (crypt_data->not_encrypted()) {
|
2015-09-22 15:03:59 +03:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2015-05-11 21:05:02 +02:00
|
|
|
if (crypt_data->key_id != key_state->key_id) {
|
|
|
|
key_state->key_id= crypt_data->key_id;
|
2016-12-13 11:51:33 +02:00
|
|
|
fil_crypt_get_key_state(key_state, crypt_data);
|
2015-05-11 21:05:02 +02:00
|
|
|
}
|
|
|
|
|
2014-12-22 16:53:17 +02:00
|
|
|
bool need_key_rotation = fil_crypt_needs_rotation(
|
2018-04-06 12:55:43 +03:00
|
|
|
crypt_data,
|
2015-05-11 21:05:02 +02:00
|
|
|
crypt_data->min_key_version,
|
2018-04-06 12:55:43 +03:00
|
|
|
key_state->key_version,
|
|
|
|
key_state->rotate_key_age);
|
2014-12-22 16:53:17 +02:00
|
|
|
|
2015-05-14 10:13:03 +02:00
|
|
|
crypt_data->rotate_state.scrubbing.is_active =
|
2017-03-14 12:56:01 +02:00
|
|
|
btr_scrub_start_space(space->id, &state->scrub_data);
|
2015-05-14 10:13:03 +02:00
|
|
|
|
2014-12-22 16:53:17 +02:00
|
|
|
time_t diff = time(0) - crypt_data->rotate_state.scrubbing.
|
|
|
|
last_scrub_completed;
|
2016-12-13 11:51:33 +02:00
|
|
|
|
2014-12-22 16:53:17 +02:00
|
|
|
bool need_scrubbing =
|
2017-03-14 12:56:01 +02:00
|
|
|
(srv_background_scrub_data_uncompressed ||
|
|
|
|
srv_background_scrub_data_compressed) &&
|
2015-05-14 10:13:03 +02:00
|
|
|
crypt_data->rotate_state.scrubbing.is_active
|
2017-03-09 08:54:07 +02:00
|
|
|
&& diff >= 0
|
|
|
|
&& ulint(diff) >= srv_background_scrub_data_interval;
|
2014-12-22 16:53:17 +02:00
|
|
|
|
2016-12-13 11:51:33 +02:00
|
|
|
if (need_key_rotation == false && need_scrubbing == false) {
|
2014-12-22 16:53:17 +02:00
|
|
|
break;
|
2016-12-13 11:51:33 +02:00
|
|
|
}
|
2014-12-22 16:53:17 +02:00
|
|
|
|
|
|
|
mutex_exit(&crypt_data->mutex);
|
2017-03-14 12:56:01 +02:00
|
|
|
|
2014-12-22 16:53:17 +02:00
|
|
|
return true;
|
|
|
|
} while (0);
|
|
|
|
|
|
|
|
mutex_exit(&crypt_data->mutex);
|
2015-03-20 12:38:53 +02:00
|
|
|
|
|
|
|
|
2014-12-22 16:53:17 +02:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
/***********************************************************************
|
2017-03-14 12:56:01 +02:00
|
|
|
Update global statistics with thread statistics
|
|
|
|
@param[in,out] state key rotation statistics */
|
2014-12-22 16:53:17 +02:00
|
|
|
static void
|
2015-04-01 11:50:21 +03:00
|
|
|
fil_crypt_update_total_stat(
|
2017-03-14 12:56:01 +02:00
|
|
|
rotate_thread_t *state)
|
2014-12-22 16:53:17 +02:00
|
|
|
{
|
|
|
|
mutex_enter(&crypt_stat_mutex);
|
|
|
|
crypt_stat.pages_read_from_cache +=
|
|
|
|
state->crypt_stat.pages_read_from_cache;
|
|
|
|
crypt_stat.pages_read_from_disk +=
|
|
|
|
state->crypt_stat.pages_read_from_disk;
|
|
|
|
crypt_stat.pages_modified += state->crypt_stat.pages_modified;
|
|
|
|
crypt_stat.pages_flushed += state->crypt_stat.pages_flushed;
|
|
|
|
// remote old estimate
|
|
|
|
crypt_stat.estimated_iops -= state->crypt_stat.estimated_iops;
|
|
|
|
// add new estimate
|
|
|
|
crypt_stat.estimated_iops += state->estimated_max_iops;
|
|
|
|
mutex_exit(&crypt_stat_mutex);
|
|
|
|
|
|
|
|
// make new estimate "current" estimate
|
|
|
|
memset(&state->crypt_stat, 0, sizeof(state->crypt_stat));
|
|
|
|
// record our old (current) estimate
|
|
|
|
state->crypt_stat.estimated_iops = state->estimated_max_iops;
|
|
|
|
}
|
|
|
|
|
|
|
|
/***********************************************************************
|
|
|
|
Allocate iops to thread from global setting,
|
2015-04-01 11:50:21 +03:00
|
|
|
used before starting to rotate a space.
|
2017-03-14 12:56:01 +02:00
|
|
|
@param[in,out] state Rotation state
|
2015-04-01 11:50:21 +03:00
|
|
|
@return true if allocation succeeded, false if failed */
|
|
|
|
static
|
|
|
|
bool
|
|
|
|
fil_crypt_alloc_iops(
|
2017-03-14 12:56:01 +02:00
|
|
|
rotate_thread_t *state)
|
2014-12-22 16:53:17 +02:00
|
|
|
{
|
|
|
|
ut_ad(state->allocated_iops == 0);
|
|
|
|
|
2017-03-14 12:56:01 +02:00
|
|
|
/* We have not yet selected the space to rotate, thus
|
|
|
|
state might not contain space and we can't check
|
|
|
|
its status yet. */
|
|
|
|
|
2014-12-22 16:53:17 +02:00
|
|
|
uint max_iops = state->estimated_max_iops;
|
|
|
|
mutex_enter(&fil_crypt_threads_mutex);
|
2015-03-20 12:38:53 +02:00
|
|
|
|
2014-12-22 16:53:17 +02:00
|
|
|
if (n_fil_crypt_iops_allocated >= srv_n_fil_crypt_iops) {
|
|
|
|
/* this can happen when user decreases srv_fil_crypt_iops */
|
|
|
|
mutex_exit(&fil_crypt_threads_mutex);
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
uint alloc = srv_n_fil_crypt_iops - n_fil_crypt_iops_allocated;
|
2015-03-20 12:38:53 +02:00
|
|
|
|
|
|
|
if (alloc > max_iops) {
|
2014-12-22 16:53:17 +02:00
|
|
|
alloc = max_iops;
|
2015-03-20 12:38:53 +02:00
|
|
|
}
|
2014-12-22 16:53:17 +02:00
|
|
|
|
|
|
|
n_fil_crypt_iops_allocated += alloc;
|
|
|
|
mutex_exit(&fil_crypt_threads_mutex);
|
|
|
|
|
|
|
|
state->allocated_iops = alloc;
|
|
|
|
|
|
|
|
return alloc > 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/***********************************************************************
|
|
|
|
Reallocate iops to thread,
|
2017-03-14 12:56:01 +02:00
|
|
|
used when inside a space
|
|
|
|
@param[in,out] state Rotation state */
|
2015-04-01 11:50:21 +03:00
|
|
|
static
|
|
|
|
void
|
|
|
|
fil_crypt_realloc_iops(
|
2017-03-14 12:56:01 +02:00
|
|
|
rotate_thread_t *state)
|
2014-12-22 16:53:17 +02:00
|
|
|
{
|
|
|
|
ut_a(state->allocated_iops > 0);
|
|
|
|
|
|
|
|
if (10 * state->cnt_waited > state->batch) {
|
|
|
|
/* if we waited more than 10% re-estimate max_iops */
|
2017-04-21 08:45:48 +03:00
|
|
|
ulint avg_wait_time_us =
|
2017-09-28 10:38:02 +00:00
|
|
|
ulint(state->sum_waited_us / state->cnt_waited);
|
2014-12-22 16:53:17 +02:00
|
|
|
|
2017-04-21 08:45:48 +03:00
|
|
|
if (avg_wait_time_us == 0) {
|
|
|
|
avg_wait_time_us = 1; // prevent division by zero
|
|
|
|
}
|
|
|
|
|
2017-03-14 12:56:01 +02:00
|
|
|
DBUG_PRINT("ib_crypt",
|
2017-04-21 08:45:48 +03:00
|
|
|
("thr_no: %u - update estimated_max_iops from %u to "
|
|
|
|
ULINTPF ".",
|
2014-12-22 16:53:17 +02:00
|
|
|
state->thread_no,
|
|
|
|
state->estimated_max_iops,
|
2017-03-14 12:56:01 +02:00
|
|
|
1000000 / avg_wait_time_us));
|
|
|
|
|
2017-04-21 08:45:48 +03:00
|
|
|
state->estimated_max_iops = uint(1000000 / avg_wait_time_us);
|
2014-12-22 16:53:17 +02:00
|
|
|
state->cnt_waited = 0;
|
|
|
|
state->sum_waited_us = 0;
|
|
|
|
} else {
|
2017-03-14 12:56:01 +02:00
|
|
|
DBUG_PRINT("ib_crypt",
|
2017-08-29 14:23:34 +03:00
|
|
|
("thr_no: %u only waited " ULINTPF
|
|
|
|
"%% skip re-estimate.",
|
|
|
|
state->thread_no,
|
|
|
|
(100 * state->cnt_waited)
|
|
|
|
/ (state->batch ? state->batch : 1)));
|
2014-12-22 16:53:17 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
if (state->estimated_max_iops <= state->allocated_iops) {
|
|
|
|
/* return extra iops */
|
|
|
|
uint extra = state->allocated_iops - state->estimated_max_iops;
|
|
|
|
|
|
|
|
if (extra > 0) {
|
|
|
|
mutex_enter(&fil_crypt_threads_mutex);
|
|
|
|
if (n_fil_crypt_iops_allocated < extra) {
|
|
|
|
/* unknown bug!
|
|
|
|
* crash in debug
|
|
|
|
* keep n_fil_crypt_iops_allocated unchanged
|
|
|
|
* in release */
|
|
|
|
ut_ad(0);
|
|
|
|
extra = 0;
|
|
|
|
}
|
|
|
|
n_fil_crypt_iops_allocated -= extra;
|
|
|
|
state->allocated_iops -= extra;
|
|
|
|
|
|
|
|
if (state->allocated_iops == 0) {
|
|
|
|
/* no matter how slow io system seems to be
|
|
|
|
* never decrease allocated_iops to 0... */
|
|
|
|
state->allocated_iops ++;
|
|
|
|
n_fil_crypt_iops_allocated ++;
|
|
|
|
}
|
2017-03-14 12:56:01 +02:00
|
|
|
|
2014-12-22 16:53:17 +02:00
|
|
|
os_event_set(fil_crypt_threads_event);
|
2017-03-14 12:56:01 +02:00
|
|
|
mutex_exit(&fil_crypt_threads_mutex);
|
2014-12-22 16:53:17 +02:00
|
|
|
}
|
|
|
|
} else {
|
|
|
|
/* see if there are more to get */
|
|
|
|
mutex_enter(&fil_crypt_threads_mutex);
|
|
|
|
if (n_fil_crypt_iops_allocated < srv_n_fil_crypt_iops) {
|
|
|
|
/* there are extra iops free */
|
|
|
|
uint extra = srv_n_fil_crypt_iops -
|
|
|
|
n_fil_crypt_iops_allocated;
|
|
|
|
if (state->allocated_iops + extra >
|
|
|
|
state->estimated_max_iops) {
|
|
|
|
/* but don't alloc more than our max */
|
|
|
|
extra = state->estimated_max_iops -
|
|
|
|
state->allocated_iops;
|
|
|
|
}
|
|
|
|
n_fil_crypt_iops_allocated += extra;
|
|
|
|
state->allocated_iops += extra;
|
2017-03-14 12:56:01 +02:00
|
|
|
|
|
|
|
DBUG_PRINT("ib_crypt",
|
|
|
|
("thr_no: %u increased iops from %u to %u.",
|
2014-12-22 16:53:17 +02:00
|
|
|
state->thread_no,
|
|
|
|
state->allocated_iops - extra,
|
2017-03-14 12:56:01 +02:00
|
|
|
state->allocated_iops));
|
|
|
|
|
2014-12-22 16:53:17 +02:00
|
|
|
}
|
|
|
|
mutex_exit(&fil_crypt_threads_mutex);
|
|
|
|
}
|
|
|
|
|
|
|
|
fil_crypt_update_total_stat(state);
|
|
|
|
}
|
|
|
|
|
|
|
|
/***********************************************************************
|
2017-03-14 12:56:01 +02:00
|
|
|
Return allocated iops to global
|
|
|
|
@param[in,out] state Rotation state */
|
2015-04-01 11:50:21 +03:00
|
|
|
static
|
|
|
|
void
|
|
|
|
fil_crypt_return_iops(
|
2017-03-14 12:56:01 +02:00
|
|
|
rotate_thread_t *state)
|
2014-12-22 16:53:17 +02:00
|
|
|
{
|
|
|
|
if (state->allocated_iops > 0) {
|
|
|
|
uint iops = state->allocated_iops;
|
|
|
|
mutex_enter(&fil_crypt_threads_mutex);
|
|
|
|
if (n_fil_crypt_iops_allocated < iops) {
|
|
|
|
/* unknown bug!
|
|
|
|
* crash in debug
|
|
|
|
* keep n_fil_crypt_iops_allocated unchanged
|
|
|
|
* in release */
|
|
|
|
ut_ad(0);
|
|
|
|
iops = 0;
|
|
|
|
}
|
2017-03-14 12:56:01 +02:00
|
|
|
|
2014-12-22 16:53:17 +02:00
|
|
|
n_fil_crypt_iops_allocated -= iops;
|
|
|
|
state->allocated_iops = 0;
|
|
|
|
os_event_set(fil_crypt_threads_event);
|
2017-03-14 12:56:01 +02:00
|
|
|
mutex_exit(&fil_crypt_threads_mutex);
|
2014-12-22 16:53:17 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
fil_crypt_update_total_stat(state);
|
|
|
|
}
|
|
|
|
|
|
|
|
/***********************************************************************
|
2017-03-14 12:56:01 +02:00
|
|
|
Search for a space needing rotation
|
|
|
|
@param[in,out] key_state Key state
|
|
|
|
@param[in,out] state Rotation state
|
|
|
|
@param[in,out] recheck recheck ? */
|
|
|
|
static
|
2014-12-22 16:53:17 +02:00
|
|
|
bool
|
|
|
|
fil_crypt_find_space_to_rotate(
|
2017-03-14 12:56:01 +02:00
|
|
|
key_state_t* key_state,
|
|
|
|
rotate_thread_t* state,
|
|
|
|
bool* recheck)
|
2014-12-22 16:53:17 +02:00
|
|
|
{
|
|
|
|
/* we need iops to start rotating */
|
|
|
|
while (!state->should_shutdown() && !fil_crypt_alloc_iops(state)) {
|
|
|
|
os_event_reset(fil_crypt_threads_event);
|
2017-08-29 14:23:34 +03:00
|
|
|
os_event_wait_time(fil_crypt_threads_event, 100000);
|
2014-12-22 16:53:17 +02:00
|
|
|
}
|
|
|
|
|
2016-12-13 11:51:33 +02:00
|
|
|
if (state->should_shutdown()) {
|
2017-03-14 12:56:01 +02:00
|
|
|
if (state->space) {
|
|
|
|
fil_space_release(state->space);
|
|
|
|
state->space = NULL;
|
|
|
|
}
|
2014-12-22 16:53:17 +02:00
|
|
|
return false;
|
2016-12-13 11:51:33 +02:00
|
|
|
}
|
2014-12-22 16:53:17 +02:00
|
|
|
|
|
|
|
if (state->first) {
|
|
|
|
state->first = false;
|
2017-03-14 12:56:01 +02:00
|
|
|
if (state->space) {
|
|
|
|
fil_space_release(state->space);
|
|
|
|
}
|
|
|
|
state->space = NULL;
|
2014-12-22 16:53:17 +02:00
|
|
|
}
|
|
|
|
|
2017-03-14 12:56:01 +02:00
|
|
|
/* If key rotation is enabled (default) we iterate all tablespaces.
|
|
|
|
If key rotation is not enabled we iterate only the tablespaces
|
|
|
|
added to keyrotation list. */
|
|
|
|
if (srv_fil_crypt_rotate_key_age) {
|
|
|
|
state->space = fil_space_next(state->space);
|
|
|
|
} else {
|
|
|
|
state->space = fil_space_keyrotate_next(state->space);
|
|
|
|
}
|
2015-09-22 15:03:59 +03:00
|
|
|
|
2017-03-14 12:56:01 +02:00
|
|
|
while (!state->should_shutdown() && state->space) {
|
2017-06-12 17:43:07 +03:00
|
|
|
/* If there is no crypt data and we have not yet read
|
|
|
|
page 0 for this tablespace, we need to read it before
|
|
|
|
we can continue. */
|
|
|
|
if (!state->space->crypt_data) {
|
|
|
|
fil_crypt_read_crypt_data(state->space);
|
|
|
|
}
|
2017-06-08 15:40:25 +03:00
|
|
|
|
2017-03-14 12:56:01 +02:00
|
|
|
if (fil_crypt_space_needs_rotation(state, key_state, recheck)) {
|
|
|
|
ut_ad(key_state->key_id);
|
|
|
|
/* init state->min_key_version_found before
|
|
|
|
* starting on a space */
|
|
|
|
state->min_key_version_found = key_state->key_version;
|
|
|
|
return true;
|
2014-12-22 16:53:17 +02:00
|
|
|
}
|
|
|
|
|
2017-03-14 12:56:01 +02:00
|
|
|
if (srv_fil_crypt_rotate_key_age) {
|
|
|
|
state->space = fil_space_next(state->space);
|
|
|
|
} else {
|
|
|
|
state->space = fil_space_keyrotate_next(state->space);
|
|
|
|
}
|
2014-12-22 16:53:17 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/* if we didn't find any space return iops */
|
|
|
|
fil_crypt_return_iops(state);
|
|
|
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
/***********************************************************************
|
2017-03-14 12:56:01 +02:00
|
|
|
Start rotating a space
|
|
|
|
@param[in] key_state Key state
|
|
|
|
@param[in,out] state Rotation state */
|
2014-12-22 16:53:17 +02:00
|
|
|
static
|
|
|
|
void
|
|
|
|
fil_crypt_start_rotate_space(
|
2017-03-14 12:56:01 +02:00
|
|
|
const key_state_t* key_state,
|
|
|
|
rotate_thread_t* state)
|
2014-12-22 16:53:17 +02:00
|
|
|
{
|
2017-03-14 12:56:01 +02:00
|
|
|
fil_space_crypt_t *crypt_data = state->space->crypt_data;
|
2015-03-20 12:38:53 +02:00
|
|
|
|
2015-05-17 14:14:16 +03:00
|
|
|
ut_ad(crypt_data);
|
2014-12-22 16:53:17 +02:00
|
|
|
mutex_enter(&crypt_data->mutex);
|
2015-05-11 21:05:02 +02:00
|
|
|
ut_ad(key_state->key_id == crypt_data->key_id);
|
2015-03-20 12:38:53 +02:00
|
|
|
|
2014-12-22 16:53:17 +02:00
|
|
|
if (crypt_data->rotate_state.active_threads == 0) {
|
|
|
|
/* only first thread needs to init */
|
|
|
|
crypt_data->rotate_state.next_offset = 1; // skip page 0
|
|
|
|
/* no need to rotate beyond current max
|
|
|
|
* if space extends, it will be encrypted with newer version */
|
2017-03-14 12:56:01 +02:00
|
|
|
/* FIXME: max_offset could be removed and instead
|
|
|
|
space->size consulted.*/
|
|
|
|
crypt_data->rotate_state.max_offset = state->space->size;
|
2014-12-22 16:53:17 +02:00
|
|
|
crypt_data->rotate_state.end_lsn = 0;
|
|
|
|
crypt_data->rotate_state.min_key_version_found =
|
|
|
|
key_state->key_version;
|
|
|
|
|
|
|
|
crypt_data->rotate_state.start_time = time(0);
|
2015-05-21 15:07:19 +03:00
|
|
|
|
|
|
|
if (crypt_data->type == CRYPT_SCHEME_UNENCRYPTED &&
|
2016-12-13 11:51:33 +02:00
|
|
|
crypt_data->is_encrypted() &&
|
2015-05-21 15:07:19 +03:00
|
|
|
key_state->key_version != 0) {
|
|
|
|
/* this is rotation unencrypted => encrypted */
|
|
|
|
crypt_data->type = CRYPT_SCHEME_1;
|
|
|
|
}
|
2014-12-22 16:53:17 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/* count active threads in space */
|
|
|
|
crypt_data->rotate_state.active_threads++;
|
|
|
|
|
|
|
|
/* Initialize thread local state */
|
|
|
|
state->end_lsn = crypt_data->rotate_state.end_lsn;
|
|
|
|
state->min_key_version_found =
|
|
|
|
crypt_data->rotate_state.min_key_version_found;
|
|
|
|
|
|
|
|
mutex_exit(&crypt_data->mutex);
|
|
|
|
}
|
|
|
|
|
|
|
|
/***********************************************************************
|
2015-04-01 11:50:21 +03:00
|
|
|
Search for batch of pages needing rotation
|
2017-03-14 12:56:01 +02:00
|
|
|
@param[in] key_state Key state
|
|
|
|
@param[in,out] state Rotation state
|
2015-04-01 11:50:21 +03:00
|
|
|
@return true if page needing key rotation found, false if not found */
|
2014-12-22 16:53:17 +02:00
|
|
|
static
|
|
|
|
bool
|
|
|
|
fil_crypt_find_page_to_rotate(
|
2017-03-14 12:56:01 +02:00
|
|
|
const key_state_t* key_state,
|
|
|
|
rotate_thread_t* state)
|
2014-12-22 16:53:17 +02:00
|
|
|
{
|
|
|
|
ulint batch = srv_alloc_time * state->allocated_iops;
|
2017-03-14 12:56:01 +02:00
|
|
|
fil_space_t* space = state->space;
|
|
|
|
|
|
|
|
ut_ad(!space || space->n_pending_ops > 0);
|
|
|
|
|
|
|
|
/* If space is marked to be dropped stop rotation. */
|
|
|
|
if (!space || space->is_stopping()) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
fil_space_crypt_t *crypt_data = space->crypt_data;
|
2015-03-20 12:38:53 +02:00
|
|
|
|
2017-04-21 11:52:25 +03:00
|
|
|
mutex_enter(&crypt_data->mutex);
|
|
|
|
ut_ad(key_state->key_id == crypt_data->key_id);
|
2015-03-20 12:38:53 +02:00
|
|
|
|
2017-04-21 11:52:25 +03:00
|
|
|
bool found = crypt_data->rotate_state.max_offset >=
|
|
|
|
crypt_data->rotate_state.next_offset;
|
2014-12-22 16:53:17 +02:00
|
|
|
|
2017-04-21 11:52:25 +03:00
|
|
|
if (found) {
|
|
|
|
state->offset = crypt_data->rotate_state.next_offset;
|
|
|
|
ulint remaining = crypt_data->rotate_state.max_offset -
|
|
|
|
crypt_data->rotate_state.next_offset;
|
2014-12-22 16:53:17 +02:00
|
|
|
|
2017-04-21 11:52:25 +03:00
|
|
|
if (batch <= remaining) {
|
|
|
|
state->batch = batch;
|
|
|
|
} else {
|
|
|
|
state->batch = remaining;
|
2015-05-17 14:14:16 +03:00
|
|
|
}
|
2014-12-22 16:53:17 +02:00
|
|
|
}
|
|
|
|
|
2017-04-21 11:52:25 +03:00
|
|
|
crypt_data->rotate_state.next_offset += batch;
|
|
|
|
mutex_exit(&crypt_data->mutex);
|
|
|
|
return found;
|
2014-12-22 16:53:17 +02:00
|
|
|
}
|
|
|
|
|
2017-03-14 12:56:01 +02:00
|
|
|
#define fil_crypt_get_page_throttle(state,offset,mtr,sleeptime_ms) \
|
|
|
|
fil_crypt_get_page_throttle_func(state, offset, mtr, \
|
2014-12-22 16:53:17 +02:00
|
|
|
sleeptime_ms, __FILE__, __LINE__)
|
|
|
|
|
|
|
|
/***********************************************************************
|
2015-04-01 11:50:21 +03:00
|
|
|
Get a page and compute sleep time
|
2017-03-14 12:56:01 +02:00
|
|
|
@param[in,out] state Rotation state
|
|
|
|
@param[in] offset Page offset
|
|
|
|
@param[in,out] mtr Minitransaction
|
|
|
|
@param[out] sleeptime_ms Sleep time
|
|
|
|
@param[in] file File where called
|
|
|
|
@param[in] line Line where called
|
|
|
|
@return page or NULL*/
|
2014-12-22 16:53:17 +02:00
|
|
|
static
|
|
|
|
buf_block_t*
|
2015-04-01 11:50:21 +03:00
|
|
|
fil_crypt_get_page_throttle_func(
|
2017-03-14 12:56:01 +02:00
|
|
|
rotate_thread_t* state,
|
|
|
|
ulint offset,
|
|
|
|
mtr_t* mtr,
|
|
|
|
ulint* sleeptime_ms,
|
|
|
|
const char* file,
|
2017-05-10 09:07:50 +03:00
|
|
|
unsigned line)
|
2014-12-22 16:53:17 +02:00
|
|
|
{
|
2017-03-14 12:56:01 +02:00
|
|
|
fil_space_t* space = state->space;
|
2017-03-30 12:48:42 +02:00
|
|
|
const page_size_t page_size = page_size_t(space->flags);
|
|
|
|
const page_id_t page_id(space->id, offset);
|
2017-03-14 12:56:01 +02:00
|
|
|
ut_ad(space->n_pending_ops > 0);
|
2016-08-12 11:17:45 +03:00
|
|
|
|
2017-03-30 12:48:42 +02:00
|
|
|
/* Before reading from tablespace we need to make sure that
|
MDEV-13564 Mariabackup does not work with TRUNCATE
Implement undo tablespace truncation via normal redo logging.
Implement TRUNCATE TABLE as a combination of RENAME to #sql-ib name,
CREATE, and DROP.
Note: Orphan #sql-ib*.ibd may be left behind if MariaDB Server 10.2
is killed before the DROP operation is committed. If MariaDB Server 10.2
is killed during TRUNCATE, it is also possible that the old table
was renamed to #sql-ib*.ibd but the data dictionary will refer to the
table using the original name.
In MariaDB Server 10.3, RENAME inside InnoDB is transactional,
and #sql-* tables will be dropped on startup. So, this new TRUNCATE
will be fully crash-safe in 10.3.
ha_mroonga::wrapper_truncate(): Pass table options to the underlying
storage engine, now that ha_innobase::truncate() will need them.
rpl_slave_state::truncate_state_table(): Before truncating
mysql.gtid_slave_pos, evict any cached table handles from
the table definition cache, so that there will be no stale
references to the old table after truncating.
== TRUNCATE TABLE ==
WL#6501 in MySQL 5.7 introduced separate log files for implementing
atomic and crash-safe TRUNCATE TABLE, instead of using the InnoDB
undo and redo log. Some convoluted logic was added to the InnoDB
crash recovery, and some extra synchronization (including a redo log
checkpoint) was introduced to make this work. This synchronization
has caused performance problems and race conditions, and the extra
log files cannot be copied or applied by external backup programs.
In order to support crash-upgrade from MariaDB 10.2, we will keep
the logic for parsing and applying the extra log files, but we will
no longer generate those files in TRUNCATE TABLE.
A prerequisite for crash-safe TRUNCATE is a crash-safe RENAME TABLE
(with full redo and undo logging and proper rollback). This will
be implemented in MDEV-14717.
ha_innobase::truncate(): Invoke RENAME, create(), delete_table().
Because RENAME cannot be fully rolled back before MariaDB 10.3
due to missing undo logging, add some explicit rename-back in
case the operation fails.
ha_innobase::delete(): Introduce a variant that takes sqlcom as
a parameter. In TRUNCATE TABLE, we do not want to touch any
FOREIGN KEY constraints.
ha_innobase::create(): Add the parameters file_per_table, trx.
In TRUNCATE, the new table must be created in the same transaction
that renames the old table.
create_table_info_t::create_table_info_t(): Add the parameters
file_per_table, trx.
row_drop_table_for_mysql(): Replace a bool parameter with sqlcom.
row_drop_table_after_create_fail(): New function, wrapping
row_drop_table_for_mysql().
dict_truncate_index_tree_in_mem(), fil_truncate_tablespace(),
fil_prepare_for_truncate(), fil_reinit_space_header_for_table(),
row_truncate_table_for_mysql(), TruncateLogger,
row_truncate_prepare(), row_truncate_rollback(),
row_truncate_complete(), row_truncate_fts(),
row_truncate_update_system_tables(),
row_truncate_foreign_key_checks(), row_truncate_sanity_checks():
Remove.
row_upd_check_references_constraints(): Remove a check for
TRUNCATE, now that the table is no longer truncated in place.
The new test innodb.truncate_foreign uses DEBUG_SYNC to cover some
race-condition like scenarios. The test innodb-innodb.truncate does
not use any synchronization.
We add a redo log subformat to indicate backup-friendly format.
MariaDB 10.4 will remove support for the old TRUNCATE logging,
so crash-upgrade from old 10.2 or 10.3 to 10.4 will involve
limitations.
== Undo tablespace truncation ==
MySQL 5.7 implements undo tablespace truncation. It is only
possible when innodb_undo_tablespaces is set to at least 2.
The logging is implemented similar to the WL#6501 TRUNCATE,
that is, using separate log files and a redo log checkpoint.
We can simply implement undo tablespace truncation within
a single mini-transaction that reinitializes the undo log
tablespace file. Unfortunately, due to the redo log format
of some operations, currently, the total redo log written by
undo tablespace truncation will be more than the combined size
of the truncated undo tablespace. It should be acceptable
to have a little more than 1 megabyte of log in a single
mini-transaction. This will be fixed in MDEV-17138 in
MariaDB Server 10.4.
recv_sys_t: Add truncated_undo_spaces[] to remember for which undo
tablespaces a MLOG_FILE_CREATE2 record was seen.
namespace undo: Remove some unnecessary declarations.
fil_space_t::is_being_truncated: Document that this flag now
only applies to undo tablespaces. Remove some references.
fil_space_t::is_stopping(): Do not refer to is_being_truncated.
This check is for tablespaces of tables. Potentially used
tablespaces are never truncated any more.
buf_dblwr_process(): Suppress the out-of-bounds warning
for undo tablespaces.
fil_truncate_log(): Write a MLOG_FILE_CREATE2 with a nonzero
page number (new size of the tablespace in pages) to inform
crash recovery that the undo tablespace size has been reduced.
fil_op_write_log(): Relax assertions, so that MLOG_FILE_CREATE2
can be written for undo tablespaces (without .ibd file suffix)
for a nonzero page number.
os_file_truncate(): Add the parameter allow_shrink=false
so that undo tablespaces can actually be shrunk using this function.
fil_name_parse(): For undo tablespace truncation,
buffer MLOG_FILE_CREATE2 in truncated_undo_spaces[].
recv_read_in_area(): Avoid reading pages for which no redo log
records remain buffered, after recv_addr_trim() removed them.
trx_rseg_header_create(): Add a FIXME comment that we could write
much less redo log.
trx_undo_truncate_tablespace(): Reinitialize the undo tablespace
in a single mini-transaction, which will be flushed to the redo log
before the file size is trimmed.
recv_addr_trim(): Discard any redo logs for pages that were
logged after the new end of a file, before the truncation LSN.
If the rec_list becomes empty, reduce n_addrs. After removing
any affected records, actually truncate the file.
recv_apply_hashed_log_recs(): Invoke recv_addr_trim() right before
applying any log records. The undo tablespace files must be open
at this point.
buf_flush_or_remove_pages(), buf_flush_dirty_pages(),
buf_LRU_flush_or_remove_pages(): Add a parameter for specifying
the number of the first page to flush or remove (default 0).
trx_purge_initiate_truncate(): Remove the log checkpoints, the
extra logging, and some unnecessary crash points. Merge the code
from trx_undo_truncate_tablespace(). First, flush all to-be-discarded
pages (beyond the new end of the file), then trim the space->size
to make the page allocation deterministic. At the only remaining
crash injection point, flush the redo log, so that the recovery
can be tested.
2018-08-28 13:43:06 +03:00
|
|
|
the tablespace is not about to be dropped. */
|
2017-03-30 12:48:42 +02:00
|
|
|
if (space->is_stopping()) {
|
|
|
|
return NULL;
|
|
|
|
}
|
2016-08-12 11:17:45 +03:00
|
|
|
|
2017-03-30 12:48:42 +02:00
|
|
|
dberr_t err = DB_SUCCESS;
|
|
|
|
buf_block_t* block = buf_page_get_gen(page_id, page_size, RW_X_LATCH,
|
|
|
|
NULL,
|
|
|
|
BUF_PEEK_IF_IN_POOL, file, line,
|
|
|
|
mtr, &err);
|
2014-12-22 16:53:17 +02:00
|
|
|
if (block != NULL) {
|
|
|
|
/* page was in buffer pool */
|
|
|
|
state->crypt_stat.pages_read_from_cache++;
|
|
|
|
return block;
|
|
|
|
}
|
|
|
|
|
2017-03-14 12:56:01 +02:00
|
|
|
if (space->is_stopping()) {
|
2015-05-21 15:07:19 +03:00
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2014-12-22 16:53:17 +02:00
|
|
|
state->crypt_stat.pages_read_from_disk++;
|
|
|
|
|
2016-08-12 11:17:45 +03:00
|
|
|
uintmax_t start = ut_time_us(NULL);
|
|
|
|
block = buf_page_get_gen(page_id, page_size,
|
2014-12-22 16:53:17 +02:00
|
|
|
RW_X_LATCH,
|
|
|
|
NULL, BUF_GET_POSSIBLY_FREED,
|
2017-03-30 12:48:42 +02:00
|
|
|
file, line, mtr, &err);
|
2016-08-12 11:17:45 +03:00
|
|
|
uintmax_t end = ut_time_us(NULL);
|
2014-12-22 16:53:17 +02:00
|
|
|
|
|
|
|
if (end < start) {
|
|
|
|
end = start; // safety...
|
|
|
|
}
|
|
|
|
|
|
|
|
state->cnt_waited++;
|
|
|
|
state->sum_waited_us += (end - start);
|
|
|
|
|
|
|
|
/* average page load */
|
|
|
|
ulint add_sleeptime_ms = 0;
|
2017-09-28 10:38:02 +00:00
|
|
|
ulint avg_wait_time_us =ulint(state->sum_waited_us / state->cnt_waited);
|
2014-12-22 16:53:17 +02:00
|
|
|
ulint alloc_wait_us = 1000000 / state->allocated_iops;
|
2015-03-20 12:38:53 +02:00
|
|
|
|
2014-12-22 16:53:17 +02:00
|
|
|
if (avg_wait_time_us < alloc_wait_us) {
|
|
|
|
/* we reading faster than we allocated */
|
|
|
|
add_sleeptime_ms = (alloc_wait_us - avg_wait_time_us) / 1000;
|
|
|
|
} else {
|
|
|
|
/* if page load time is longer than we want, skip sleeping */
|
|
|
|
}
|
|
|
|
|
|
|
|
*sleeptime_ms += add_sleeptime_ms;
|
2017-03-14 12:56:01 +02:00
|
|
|
|
2014-12-22 16:53:17 +02:00
|
|
|
return block;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/***********************************************************************
|
|
|
|
Get block and allocation status
|
|
|
|
|
|
|
|
note: innodb locks fil_space_latch and then block when allocating page
|
|
|
|
but locks block and then fil_space_latch when freeing page.
|
2017-03-14 12:56:01 +02:00
|
|
|
|
|
|
|
@param[in,out] state Rotation state
|
|
|
|
@param[in] offset Page offset
|
|
|
|
@param[in,out] mtr Minitransaction
|
|
|
|
@param[out] allocation_status Allocation status
|
|
|
|
@param[out] sleeptime_ms Sleep time
|
|
|
|
@return block or NULL
|
2014-12-22 16:53:17 +02:00
|
|
|
*/
|
|
|
|
static
|
|
|
|
buf_block_t*
|
|
|
|
btr_scrub_get_block_and_allocation_status(
|
2017-03-14 12:56:01 +02:00
|
|
|
rotate_thread_t* state,
|
|
|
|
ulint offset,
|
|
|
|
mtr_t* mtr,
|
2014-12-22 16:53:17 +02:00
|
|
|
btr_scrub_page_allocation_status_t *allocation_status,
|
2017-03-14 12:56:01 +02:00
|
|
|
ulint* sleeptime_ms)
|
2014-12-22 16:53:17 +02:00
|
|
|
{
|
|
|
|
mtr_t local_mtr;
|
|
|
|
buf_block_t *block = NULL;
|
2017-03-14 12:56:01 +02:00
|
|
|
fil_space_t* space = state->space;
|
|
|
|
|
|
|
|
ut_ad(space->n_pending_ops > 0);
|
|
|
|
|
2014-12-22 16:53:17 +02:00
|
|
|
mtr_start(&local_mtr);
|
2017-03-14 12:56:01 +02:00
|
|
|
|
2017-08-23 09:47:50 +03:00
|
|
|
*allocation_status = fseg_page_is_free(space, offset) ?
|
2014-12-22 16:53:17 +02:00
|
|
|
BTR_SCRUB_PAGE_FREE :
|
|
|
|
BTR_SCRUB_PAGE_ALLOCATED;
|
|
|
|
|
|
|
|
if (*allocation_status == BTR_SCRUB_PAGE_FREE) {
|
|
|
|
/* this is easy case, we lock fil_space_latch first and
|
|
|
|
then block */
|
|
|
|
block = fil_crypt_get_page_throttle(state,
|
|
|
|
offset, mtr,
|
|
|
|
sleeptime_ms);
|
|
|
|
mtr_commit(&local_mtr);
|
|
|
|
} else {
|
|
|
|
/* page is allocated according to xdes */
|
|
|
|
|
|
|
|
/* release fil_space_latch *before* fetching block */
|
|
|
|
mtr_commit(&local_mtr);
|
|
|
|
|
|
|
|
/* NOTE: when we have locked dict_index_get_lock(),
|
|
|
|
* it's safe to release fil_space_latch and then fetch block
|
|
|
|
* as dict_index_get_lock() is needed to make tree modifications
|
|
|
|
* such as free-ing a page
|
|
|
|
*/
|
|
|
|
|
|
|
|
block = fil_crypt_get_page_throttle(state,
|
|
|
|
offset, mtr,
|
|
|
|
sleeptime_ms);
|
|
|
|
}
|
|
|
|
|
|
|
|
return block;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/***********************************************************************
|
2017-03-14 12:56:01 +02:00
|
|
|
Rotate one page
|
|
|
|
@param[in,out] key_state Key state
|
|
|
|
@param[in,out] state Rotation state */
|
2014-12-22 16:53:17 +02:00
|
|
|
static
|
|
|
|
void
|
|
|
|
fil_crypt_rotate_page(
|
2017-03-14 12:56:01 +02:00
|
|
|
const key_state_t* key_state,
|
|
|
|
rotate_thread_t* state)
|
2014-12-22 16:53:17 +02:00
|
|
|
{
|
2017-03-14 12:56:01 +02:00
|
|
|
fil_space_t*space = state->space;
|
|
|
|
ulint space_id = space->id;
|
2014-12-22 16:53:17 +02:00
|
|
|
ulint offset = state->offset;
|
|
|
|
ulint sleeptime_ms = 0;
|
2017-03-14 12:56:01 +02:00
|
|
|
fil_space_crypt_t *crypt_data = space->crypt_data;
|
2014-12-22 16:53:17 +02:00
|
|
|
|
2017-03-14 12:56:01 +02:00
|
|
|
ut_ad(space->n_pending_ops > 0);
|
MDEV-13167 InnoDB key rotation is not skipping unused pages
In key rotation, we must initialize unallocated but previously
initialized pages, so that if encryption is enabled on a table,
all clear-text data for the page will eventually be overwritten.
But we should not rotate keys on pages that were never allocated
after the data file was created.
According to the latching order rules, after acquiring the
tablespace latch, no page latches of previously allocated user pages
may be acquired. So, key rotation should check the page allocation
status after acquiring the page latch, not before. But, the latching
order rules also prohibit accessing pages that were not allocated first,
and then acquiring the tablespace latch. Such behaviour would indeed
result in a deadlock when running the following tests:
encryption.innodb_encryption-page-compression
encryption.innodb-checksum-algorithm
Because the key rotation is accessing potentially unallocated pages, it
cannot reliably check if these pages were allocated. It can only check
the page header. If the page number is zero, we can assume that the
page is unallocated.
fil_crypt_rotate_pages(): Skip pages that are known to be uninitialized.
fil_crypt_rotate_page(): Detect uninitialized pages by FIL_PAGE_OFFSET.
Page 0 is never encrypted, and on other pages that are initialized,
FIL_PAGE_OFFSET must contain the page number.
fil_crypt_is_page_uninitialized(): Remove. It suffices to check the
page number field in fil_crypt_rotate_page().
2017-08-23 10:01:48 +03:00
|
|
|
ut_ad(offset > 0);
|
2014-12-22 16:53:17 +02:00
|
|
|
|
2017-03-14 12:56:01 +02:00
|
|
|
/* In fil_crypt_thread where key rotation is done we have
|
|
|
|
acquired space and checked that this space is not yet
|
|
|
|
marked to be dropped. Similarly, in fil_crypt_find_page_to_rotate().
|
|
|
|
Check here also to give DROP TABLE or similar a change. */
|
|
|
|
if (space->is_stopping()) {
|
2014-12-22 16:53:17 +02:00
|
|
|
return;
|
2015-03-19 14:09:49 +02:00
|
|
|
}
|
2014-12-22 16:53:17 +02:00
|
|
|
|
2017-04-04 15:47:21 +10:00
|
|
|
if (space_id == TRX_SYS_SPACE && offset == TRX_SYS_PAGE_NO) {
|
2014-12-22 16:53:17 +02:00
|
|
|
/* don't encrypt this as it contains address to dblwr buffer */
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
MDEV-13167 InnoDB key rotation is not skipping unused pages
In key rotation, we must initialize unallocated but previously
initialized pages, so that if encryption is enabled on a table,
all clear-text data for the page will eventually be overwritten.
But we should not rotate keys on pages that were never allocated
after the data file was created.
According to the latching order rules, after acquiring the
tablespace latch, no page latches of previously allocated user pages
may be acquired. So, key rotation should check the page allocation
status after acquiring the page latch, not before. But, the latching
order rules also prohibit accessing pages that were not allocated first,
and then acquiring the tablespace latch. Such behaviour would indeed
result in a deadlock when running the following tests:
encryption.innodb_encryption-page-compression
encryption.innodb-checksum-algorithm
Because the key rotation is accessing potentially unallocated pages, it
cannot reliably check if these pages were allocated. It can only check
the page header. If the page number is zero, we can assume that the
page is unallocated.
fil_crypt_rotate_pages(): Skip pages that are known to be uninitialized.
fil_crypt_rotate_page(): Detect uninitialized pages by FIL_PAGE_OFFSET.
Page 0 is never encrypted, and on other pages that are initialized,
FIL_PAGE_OFFSET must contain the page number.
fil_crypt_is_page_uninitialized(): Remove. It suffices to check the
page number field in fil_crypt_rotate_page().
2017-08-23 10:01:48 +03:00
|
|
|
ut_d(const bool was_free = fseg_page_is_free(space, offset));
|
|
|
|
|
2014-12-22 16:53:17 +02:00
|
|
|
mtr_t mtr;
|
2017-05-09 17:23:08 +03:00
|
|
|
mtr.start();
|
|
|
|
if (buf_block_t* block = fil_crypt_get_page_throttle(state,
|
|
|
|
offset, &mtr,
|
|
|
|
&sleeptime_ms)) {
|
2015-05-21 15:07:19 +03:00
|
|
|
bool modified = false;
|
|
|
|
int needs_scrubbing = BTR_SCRUB_SKIP_PAGE;
|
|
|
|
lsn_t block_lsn = block->page.newest_modification;
|
MDEV-12253: Buffer pool blocks are accessed after they have been freed
Problem was that bpage was referenced after it was already freed
from LRU. Fixed by adding a new variable encrypted that is
passed down to buf_page_check_corrupt() and used in
buf_page_get_gen() to stop processing page read.
This patch should also address following test failures and
bugs:
MDEV-12419: IMPORT should not look up tablespace in
PageConverter::validate(). This is now removed.
MDEV-10099: encryption.innodb_onlinealter_encryption fails
sporadically in buildbot
MDEV-11420: encryption.innodb_encryption-page-compression
failed in buildbot
MDEV-11222: encryption.encrypt_and_grep failed in buildbot on P8
Removed dict_table_t::is_encrypted and dict_table_t::ibd_file_missing
and replaced these with dict_table_t::file_unreadable. Table
ibd file is missing if fil_get_space(space_id) returns NULL
and encrypted if not. Removed dict_table_t::is_corrupted field.
Ported FilSpace class from 10.2 and using that on buf_page_check_corrupt(),
buf_page_decrypt_after_read(), buf_page_encrypt_before_write(),
buf_dblwr_process(), buf_read_page(), dict_stats_save_defrag_stats().
Added test cases when enrypted page could be read while doing
redo log crash recovery. Also added test case for row compressed
blobs.
btr_cur_open_at_index_side_func(),
btr_cur_open_at_rnd_pos_func(): Avoid referencing block that is
NULL.
buf_page_get_zip(): Issue error if page read fails.
buf_page_get_gen(): Use dberr_t for error detection and
do not reference bpage after we hare freed it.
buf_mark_space_corrupt(): remove bpage from LRU also when
it is encrypted.
buf_page_check_corrupt(): @return DB_SUCCESS if page has
been read and is not corrupted,
DB_PAGE_CORRUPTED if page based on checksum check is corrupted,
DB_DECRYPTION_FAILED if page post encryption checksum matches but
after decryption normal page checksum does not match. In read
case only DB_SUCCESS is possible.
buf_page_io_complete(): use dberr_t for error handling.
buf_flush_write_block_low(),
buf_read_ahead_random(),
buf_read_page_async(),
buf_read_ahead_linear(),
buf_read_ibuf_merge_pages(),
buf_read_recv_pages(),
fil_aio_wait():
Issue error if page read fails.
btr_pcur_move_to_next_page(): Do not reference page if it is
NULL.
Introduced dict_table_t::is_readable() and dict_index_t::is_readable()
that will return true if tablespace exists and pages read from
tablespace are not corrupted or page decryption failed.
Removed buf_page_t::key_version. After page decryption the
key version is not removed from page frame. For unencrypted
pages, old key_version is removed at buf_page_encrypt_before_write()
dict_stats_update_transient_for_index(),
dict_stats_update_transient()
Do not continue if table decryption failed or table
is corrupted.
dict0stats.cc: Introduced a dict_stats_report_error function
to avoid code duplication.
fil_parse_write_crypt_data():
Check that key read from redo log entry is found from
encryption plugin and if it is not, refuse to start.
PageConverter::validate(): Removed access to fil_space_t as
tablespace is not available during import.
Fixed error code on innodb.innodb test.
Merged test cased innodb-bad-key-change5 and innodb-bad-key-shutdown
to innodb-bad-key-change2. Removed innodb-bad-key-change5 test.
Decreased unnecessary complexity on some long lasting tests.
Removed fil_inc_pending_ops(), fil_decr_pending_ops(),
fil_get_first_space(), fil_get_next_space(),
fil_get_first_space_safe(), fil_get_next_space_safe()
functions.
fil_space_verify_crypt_checksum(): Fixed bug found using ASAN
where FIL_PAGE_END_LSN_OLD_CHECKSUM field was incorrectly
accessed from row compressed tables. Fixed out of page frame
bug for row compressed tables in
fil_space_verify_crypt_checksum() found using ASAN. Incorrect
function was called for compressed table.
Added new tests for discard, rename table and drop (we should allow them
even when page decryption fails). Alter table rename is not allowed.
Added test for restart with innodb-force-recovery=1 when page read on
redo-recovery cant be decrypted. Added test for corrupted table where
both page data and FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION is corrupted.
Adjusted the test case innodb_bug14147491 so that it does not anymore
expect crash. Instead table is just mostly not usable.
fil0fil.h: fil_space_acquire_low is not visible function
and fil_space_acquire and fil_space_acquire_silent are
inline functions. FilSpace class uses fil_space_acquire_low
directly.
recv_apply_hashed_log_recs() does not return anything.
2017-04-26 15:19:16 +03:00
|
|
|
byte* frame = buf_block_get_frame(block);
|
|
|
|
uint kv = mach_read_from_4(frame+FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION);
|
2014-12-22 16:53:17 +02:00
|
|
|
|
MDEV-13167 InnoDB key rotation is not skipping unused pages
In key rotation, we must initialize unallocated but previously
initialized pages, so that if encryption is enabled on a table,
all clear-text data for the page will eventually be overwritten.
But we should not rotate keys on pages that were never allocated
after the data file was created.
According to the latching order rules, after acquiring the
tablespace latch, no page latches of previously allocated user pages
may be acquired. So, key rotation should check the page allocation
status after acquiring the page latch, not before. But, the latching
order rules also prohibit accessing pages that were not allocated first,
and then acquiring the tablespace latch. Such behaviour would indeed
result in a deadlock when running the following tests:
encryption.innodb_encryption-page-compression
encryption.innodb-checksum-algorithm
Because the key rotation is accessing potentially unallocated pages, it
cannot reliably check if these pages were allocated. It can only check
the page header. If the page number is zero, we can assume that the
page is unallocated.
fil_crypt_rotate_pages(): Skip pages that are known to be uninitialized.
fil_crypt_rotate_page(): Detect uninitialized pages by FIL_PAGE_OFFSET.
Page 0 is never encrypted, and on other pages that are initialized,
FIL_PAGE_OFFSET must contain the page number.
fil_crypt_is_page_uninitialized(): Remove. It suffices to check the
page number field in fil_crypt_rotate_page().
2017-08-23 10:01:48 +03:00
|
|
|
if (space->is_stopping()) {
|
|
|
|
/* The tablespace is closing (in DROP TABLE or
|
|
|
|
TRUNCATE TABLE or similar): avoid further access */
|
|
|
|
} else if (!*reinterpret_cast<uint32_t*>(FIL_PAGE_OFFSET
|
|
|
|
+ frame)) {
|
|
|
|
/* It looks like this page was never
|
|
|
|
allocated. Because key rotation is accessing
|
|
|
|
pages in a pattern that is unlike the normal
|
|
|
|
B-tree and undo log access pattern, we cannot
|
|
|
|
invoke fseg_page_is_free() here, because that
|
|
|
|
could result in a deadlock. If we invoked
|
|
|
|
fseg_page_is_free() and released the
|
|
|
|
tablespace latch before acquiring block->lock,
|
|
|
|
then the fseg_page_is_free() information
|
|
|
|
could be stale already. */
|
|
|
|
ut_ad(was_free);
|
|
|
|
ut_ad(kv == 0);
|
|
|
|
ut_ad(page_get_space_id(frame) == 0);
|
|
|
|
} else if (fil_crypt_needs_rotation(
|
2018-04-06 12:55:43 +03:00
|
|
|
crypt_data,
|
|
|
|
kv,
|
|
|
|
key_state->key_version,
|
|
|
|
key_state->rotate_key_age)) {
|
MDEV-13167 InnoDB key rotation is not skipping unused pages
In key rotation, we must initialize unallocated but previously
initialized pages, so that if encryption is enabled on a table,
all clear-text data for the page will eventually be overwritten.
But we should not rotate keys on pages that were never allocated
after the data file was created.
According to the latching order rules, after acquiring the
tablespace latch, no page latches of previously allocated user pages
may be acquired. So, key rotation should check the page allocation
status after acquiring the page latch, not before. But, the latching
order rules also prohibit accessing pages that were not allocated first,
and then acquiring the tablespace latch. Such behaviour would indeed
result in a deadlock when running the following tests:
encryption.innodb_encryption-page-compression
encryption.innodb-checksum-algorithm
Because the key rotation is accessing potentially unallocated pages, it
cannot reliably check if these pages were allocated. It can only check
the page header. If the page number is zero, we can assume that the
page is unallocated.
fil_crypt_rotate_pages(): Skip pages that are known to be uninitialized.
fil_crypt_rotate_page(): Detect uninitialized pages by FIL_PAGE_OFFSET.
Page 0 is never encrypted, and on other pages that are initialized,
FIL_PAGE_OFFSET must contain the page number.
fil_crypt_is_page_uninitialized(): Remove. It suffices to check the
page number field in fil_crypt_rotate_page().
2017-08-23 10:01:48 +03:00
|
|
|
|
|
|
|
mtr.set_named_space(space);
|
|
|
|
modified = true;
|
|
|
|
|
|
|
|
/* force rotation by dummy updating page */
|
|
|
|
mlog_write_ulint(frame + FIL_PAGE_SPACE_ID,
|
|
|
|
space_id, MLOG_4BYTES, &mtr);
|
|
|
|
|
|
|
|
/* statistics */
|
|
|
|
state->crypt_stat.pages_modified++;
|
|
|
|
} else {
|
|
|
|
if (crypt_data->is_encrypted()) {
|
|
|
|
if (kv < state->min_key_version_found) {
|
|
|
|
state->min_key_version_found = kv;
|
2015-05-17 14:14:16 +03:00
|
|
|
}
|
2014-12-22 16:53:17 +02:00
|
|
|
}
|
|
|
|
|
2015-05-21 15:07:19 +03:00
|
|
|
needs_scrubbing = btr_page_needs_scrubbing(
|
|
|
|
&state->scrub_data, block,
|
|
|
|
BTR_SCRUB_PAGE_ALLOCATION_UNKNOWN);
|
|
|
|
}
|
2014-12-22 16:53:17 +02:00
|
|
|
|
2017-05-09 17:23:08 +03:00
|
|
|
mtr.commit();
|
2016-08-12 11:17:45 +03:00
|
|
|
lsn_t end_lsn = mtr.commit_lsn();
|
2014-12-22 16:53:17 +02:00
|
|
|
|
|
|
|
if (needs_scrubbing == BTR_SCRUB_PAGE) {
|
2017-05-09 17:23:08 +03:00
|
|
|
mtr.start();
|
2015-05-21 15:07:19 +03:00
|
|
|
/*
|
|
|
|
* refetch page and allocation status
|
|
|
|
*/
|
|
|
|
btr_scrub_page_allocation_status_t allocated;
|
2016-08-12 11:17:45 +03:00
|
|
|
|
2014-12-22 16:53:17 +02:00
|
|
|
block = btr_scrub_get_block_and_allocation_status(
|
2017-03-30 12:48:42 +02:00
|
|
|
state, offset, &mtr,
|
2014-12-22 16:53:17 +02:00
|
|
|
&allocated,
|
|
|
|
&sleeptime_ms);
|
|
|
|
|
2015-05-21 15:07:19 +03:00
|
|
|
if (block) {
|
2017-05-09 17:23:08 +03:00
|
|
|
mtr.set_named_space(space);
|
2014-12-22 16:53:17 +02:00
|
|
|
|
2015-05-21 15:07:19 +03:00
|
|
|
/* get required table/index and index-locks */
|
|
|
|
needs_scrubbing = btr_scrub_recheck_page(
|
|
|
|
&state->scrub_data, block, allocated, &mtr);
|
2014-12-22 16:53:17 +02:00
|
|
|
|
2015-05-21 15:07:19 +03:00
|
|
|
if (needs_scrubbing == BTR_SCRUB_PAGE) {
|
|
|
|
/* we need to refetch it once more now that we have
|
|
|
|
* index locked */
|
|
|
|
block = btr_scrub_get_block_and_allocation_status(
|
2017-03-30 12:48:42 +02:00
|
|
|
state, offset, &mtr,
|
2015-05-21 15:07:19 +03:00
|
|
|
&allocated,
|
|
|
|
&sleeptime_ms);
|
2014-12-22 16:53:17 +02:00
|
|
|
|
2015-05-21 15:07:19 +03:00
|
|
|
needs_scrubbing = btr_scrub_page(&state->scrub_data,
|
|
|
|
block, allocated,
|
|
|
|
&mtr);
|
|
|
|
}
|
2014-12-22 16:53:17 +02:00
|
|
|
|
2015-05-21 15:07:19 +03:00
|
|
|
/* NOTE: mtr is committed inside btr_scrub_recheck_page()
|
|
|
|
* and/or btr_scrub_page. This is to make sure that
|
|
|
|
* locks & pages are latched in corrected order,
|
|
|
|
* the mtr is in some circumstances restarted.
|
|
|
|
* (mtr_commit() + mtr_start())
|
|
|
|
*/
|
|
|
|
}
|
|
|
|
}
|
2014-12-22 16:53:17 +02:00
|
|
|
|
2015-05-21 15:07:19 +03:00
|
|
|
if (needs_scrubbing != BTR_SCRUB_PAGE) {
|
|
|
|
/* if page didn't need scrubbing it might be that cleanups
|
|
|
|
are needed. do those outside of any mtr to prevent deadlocks.
|
|
|
|
|
|
|
|
the information what kinds of cleanups that are needed are
|
|
|
|
encoded inside the needs_scrubbing, but this is opaque to
|
|
|
|
this function (except the value BTR_SCRUB_PAGE) */
|
|
|
|
btr_scrub_skip_page(&state->scrub_data, needs_scrubbing);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (needs_scrubbing == BTR_SCRUB_TURNED_OFF) {
|
|
|
|
/* if we just detected that scrubbing was turned off
|
|
|
|
* update global state to reflect this */
|
|
|
|
ut_ad(crypt_data);
|
|
|
|
mutex_enter(&crypt_data->mutex);
|
|
|
|
crypt_data->rotate_state.scrubbing.is_active = false;
|
|
|
|
mutex_exit(&crypt_data->mutex);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (modified) {
|
|
|
|
/* if we modified page, we take lsn from mtr */
|
|
|
|
ut_a(end_lsn > state->end_lsn);
|
|
|
|
ut_a(end_lsn > block_lsn);
|
|
|
|
state->end_lsn = end_lsn;
|
|
|
|
} else {
|
|
|
|
/* if we did not modify page, check for max lsn */
|
|
|
|
if (block_lsn > state->end_lsn) {
|
|
|
|
state->end_lsn = block_lsn;
|
|
|
|
}
|
2014-12-22 16:53:17 +02:00
|
|
|
}
|
MDEV-12253: Buffer pool blocks are accessed after they have been freed
Problem was that bpage was referenced after it was already freed
from LRU. Fixed by adding a new variable encrypted that is
passed down to buf_page_check_corrupt() and used in
buf_page_get_gen() to stop processing page read.
This patch should also address following test failures and
bugs:
MDEV-12419: IMPORT should not look up tablespace in
PageConverter::validate(). This is now removed.
MDEV-10099: encryption.innodb_onlinealter_encryption fails
sporadically in buildbot
MDEV-11420: encryption.innodb_encryption-page-compression
failed in buildbot
MDEV-11222: encryption.encrypt_and_grep failed in buildbot on P8
Removed dict_table_t::is_encrypted and dict_table_t::ibd_file_missing
and replaced these with dict_table_t::file_unreadable. Table
ibd file is missing if fil_get_space(space_id) returns NULL
and encrypted if not. Removed dict_table_t::is_corrupted field.
Ported FilSpace class from 10.2 and using that on buf_page_check_corrupt(),
buf_page_decrypt_after_read(), buf_page_encrypt_before_write(),
buf_dblwr_process(), buf_read_page(), dict_stats_save_defrag_stats().
Added test cases when enrypted page could be read while doing
redo log crash recovery. Also added test case for row compressed
blobs.
btr_cur_open_at_index_side_func(),
btr_cur_open_at_rnd_pos_func(): Avoid referencing block that is
NULL.
buf_page_get_zip(): Issue error if page read fails.
buf_page_get_gen(): Use dberr_t for error detection and
do not reference bpage after we hare freed it.
buf_mark_space_corrupt(): remove bpage from LRU also when
it is encrypted.
buf_page_check_corrupt(): @return DB_SUCCESS if page has
been read and is not corrupted,
DB_PAGE_CORRUPTED if page based on checksum check is corrupted,
DB_DECRYPTION_FAILED if page post encryption checksum matches but
after decryption normal page checksum does not match. In read
case only DB_SUCCESS is possible.
buf_page_io_complete(): use dberr_t for error handling.
buf_flush_write_block_low(),
buf_read_ahead_random(),
buf_read_page_async(),
buf_read_ahead_linear(),
buf_read_ibuf_merge_pages(),
buf_read_recv_pages(),
fil_aio_wait():
Issue error if page read fails.
btr_pcur_move_to_next_page(): Do not reference page if it is
NULL.
Introduced dict_table_t::is_readable() and dict_index_t::is_readable()
that will return true if tablespace exists and pages read from
tablespace are not corrupted or page decryption failed.
Removed buf_page_t::key_version. After page decryption the
key version is not removed from page frame. For unencrypted
pages, old key_version is removed at buf_page_encrypt_before_write()
dict_stats_update_transient_for_index(),
dict_stats_update_transient()
Do not continue if table decryption failed or table
is corrupted.
dict0stats.cc: Introduced a dict_stats_report_error function
to avoid code duplication.
fil_parse_write_crypt_data():
Check that key read from redo log entry is found from
encryption plugin and if it is not, refuse to start.
PageConverter::validate(): Removed access to fil_space_t as
tablespace is not available during import.
Fixed error code on innodb.innodb test.
Merged test cased innodb-bad-key-change5 and innodb-bad-key-shutdown
to innodb-bad-key-change2. Removed innodb-bad-key-change5 test.
Decreased unnecessary complexity on some long lasting tests.
Removed fil_inc_pending_ops(), fil_decr_pending_ops(),
fil_get_first_space(), fil_get_next_space(),
fil_get_first_space_safe(), fil_get_next_space_safe()
functions.
fil_space_verify_crypt_checksum(): Fixed bug found using ASAN
where FIL_PAGE_END_LSN_OLD_CHECKSUM field was incorrectly
accessed from row compressed tables. Fixed out of page frame
bug for row compressed tables in
fil_space_verify_crypt_checksum() found using ASAN. Incorrect
function was called for compressed table.
Added new tests for discard, rename table and drop (we should allow them
even when page decryption fails). Alter table rename is not allowed.
Added test for restart with innodb-force-recovery=1 when page read on
redo-recovery cant be decrypted. Added test for corrupted table where
both page data and FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION is corrupted.
Adjusted the test case innodb_bug14147491 so that it does not anymore
expect crash. Instead table is just mostly not usable.
fil0fil.h: fil_space_acquire_low is not visible function
and fil_space_acquire and fil_space_acquire_silent are
inline functions. FilSpace class uses fil_space_acquire_low
directly.
recv_apply_hashed_log_recs() does not return anything.
2017-04-26 15:19:16 +03:00
|
|
|
} else {
|
|
|
|
/* If block read failed mtr memo and log should be empty. */
|
2017-05-05 10:25:29 +03:00
|
|
|
ut_ad(!mtr.has_modifications());
|
|
|
|
ut_ad(!mtr.is_dirty());
|
|
|
|
ut_ad(mtr.get_memo()->size() == 0);
|
|
|
|
ut_ad(mtr.get_log()->size() == 0);
|
|
|
|
mtr.commit();
|
2014-12-22 16:53:17 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
if (sleeptime_ms) {
|
|
|
|
os_event_reset(fil_crypt_throttle_sleep_event);
|
|
|
|
os_event_wait_time(fil_crypt_throttle_sleep_event,
|
|
|
|
1000 * sleeptime_ms);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/***********************************************************************
|
2017-03-14 12:56:01 +02:00
|
|
|
Rotate a batch of pages
|
|
|
|
@param[in,out] key_state Key state
|
|
|
|
@param[in,out] state Rotation state */
|
2014-12-22 16:53:17 +02:00
|
|
|
static
|
|
|
|
void
|
|
|
|
fil_crypt_rotate_pages(
|
2017-03-14 12:56:01 +02:00
|
|
|
const key_state_t* key_state,
|
|
|
|
rotate_thread_t* state)
|
2014-12-22 16:53:17 +02:00
|
|
|
{
|
2017-03-14 12:56:01 +02:00
|
|
|
ulint space = state->space->id;
|
MDEV-13167 InnoDB key rotation is not skipping unused pages
In key rotation, we must initialize unallocated but previously
initialized pages, so that if encryption is enabled on a table,
all clear-text data for the page will eventually be overwritten.
But we should not rotate keys on pages that were never allocated
after the data file was created.
According to the latching order rules, after acquiring the
tablespace latch, no page latches of previously allocated user pages
may be acquired. So, key rotation should check the page allocation
status after acquiring the page latch, not before. But, the latching
order rules also prohibit accessing pages that were not allocated first,
and then acquiring the tablespace latch. Such behaviour would indeed
result in a deadlock when running the following tests:
encryption.innodb_encryption-page-compression
encryption.innodb-checksum-algorithm
Because the key rotation is accessing potentially unallocated pages, it
cannot reliably check if these pages were allocated. It can only check
the page header. If the page number is zero, we can assume that the
page is unallocated.
fil_crypt_rotate_pages(): Skip pages that are known to be uninitialized.
fil_crypt_rotate_page(): Detect uninitialized pages by FIL_PAGE_OFFSET.
Page 0 is never encrypted, and on other pages that are initialized,
FIL_PAGE_OFFSET must contain the page number.
fil_crypt_is_page_uninitialized(): Remove. It suffices to check the
page number field in fil_crypt_rotate_page().
2017-08-23 10:01:48 +03:00
|
|
|
ulint end = std::min(state->offset + state->batch,
|
|
|
|
state->space->free_limit);
|
2015-03-20 12:38:53 +02:00
|
|
|
|
2017-03-14 12:56:01 +02:00
|
|
|
ut_ad(state->space->n_pending_ops > 0);
|
|
|
|
|
2014-12-22 16:53:17 +02:00
|
|
|
for (; state->offset < end; state->offset++) {
|
|
|
|
|
|
|
|
/* we can't rotate pages in dblwr buffer as
|
|
|
|
* it's not possible to read those due to lots of asserts
|
|
|
|
* in buffer pool.
|
|
|
|
*
|
|
|
|
* However since these are only (short-lived) copies of
|
|
|
|
* real pages, they will be updated anyway when the
|
|
|
|
* real page is updated
|
|
|
|
*/
|
|
|
|
if (space == TRX_SYS_SPACE &&
|
|
|
|
buf_dblwr_page_inside(state->offset)) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2018-04-16 15:06:41 +03:00
|
|
|
/* If space is marked as stopping, stop rotating
|
|
|
|
pages. */
|
|
|
|
if (state->space->is_stopping()) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2014-12-22 16:53:17 +02:00
|
|
|
fil_crypt_rotate_page(key_state, state);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/***********************************************************************
|
2017-03-14 12:56:01 +02:00
|
|
|
Flush rotated pages and then update page 0
|
|
|
|
|
|
|
|
@param[in,out] state rotation state */
|
2014-12-22 16:53:17 +02:00
|
|
|
static
|
|
|
|
void
|
2015-04-01 11:50:21 +03:00
|
|
|
fil_crypt_flush_space(
|
2017-03-14 12:56:01 +02:00
|
|
|
rotate_thread_t* state)
|
2014-12-22 16:53:17 +02:00
|
|
|
{
|
2017-03-14 12:56:01 +02:00
|
|
|
fil_space_t* space = state->space;
|
|
|
|
fil_space_crypt_t *crypt_data = space->crypt_data;
|
|
|
|
|
|
|
|
ut_ad(space->n_pending_ops > 0);
|
2014-12-22 16:53:17 +02:00
|
|
|
|
|
|
|
/* flush tablespace pages so that there are no pages left with old key */
|
|
|
|
lsn_t end_lsn = crypt_data->rotate_state.end_lsn;
|
2015-03-20 12:38:53 +02:00
|
|
|
|
2017-03-14 12:56:01 +02:00
|
|
|
if (end_lsn > 0 && !space->is_stopping()) {
|
2014-12-22 16:53:17 +02:00
|
|
|
bool success = false;
|
|
|
|
ulint n_pages = 0;
|
|
|
|
ulint sum_pages = 0;
|
2016-08-12 11:17:45 +03:00
|
|
|
uintmax_t start = ut_time_us(NULL);
|
2015-03-20 12:38:53 +02:00
|
|
|
|
2014-12-22 16:53:17 +02:00
|
|
|
do {
|
2016-08-12 11:17:45 +03:00
|
|
|
success = buf_flush_lists(ULINT_MAX, end_lsn, &n_pages);
|
2014-12-22 16:53:17 +02:00
|
|
|
buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST);
|
|
|
|
sum_pages += n_pages;
|
2017-03-14 12:56:01 +02:00
|
|
|
} while (!success && !space->is_stopping());
|
2015-03-20 12:38:53 +02:00
|
|
|
|
2016-08-12 11:17:45 +03:00
|
|
|
uintmax_t end = ut_time_us(NULL);
|
2015-03-20 12:38:53 +02:00
|
|
|
|
2014-12-22 16:53:17 +02:00
|
|
|
if (sum_pages && end > start) {
|
|
|
|
state->cnt_waited += sum_pages;
|
|
|
|
state->sum_waited_us += (end - start);
|
|
|
|
|
|
|
|
/* statistics */
|
|
|
|
state->crypt_stat.pages_flushed += sum_pages;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (crypt_data->min_key_version == 0) {
|
|
|
|
crypt_data->type = CRYPT_SCHEME_UNENCRYPTED;
|
|
|
|
}
|
|
|
|
|
2018-04-21 11:58:32 +03:00
|
|
|
if (space->is_stopping()) {
|
|
|
|
return;
|
|
|
|
}
|
2017-03-14 12:56:01 +02:00
|
|
|
|
2014-12-22 16:53:17 +02:00
|
|
|
/* update page 0 */
|
2017-03-14 12:56:01 +02:00
|
|
|
mtr_t mtr;
|
2017-03-30 12:48:42 +02:00
|
|
|
mtr.start();
|
2016-08-12 11:17:45 +03:00
|
|
|
|
2017-03-30 12:48:42 +02:00
|
|
|
dberr_t err;
|
2015-04-01 11:50:21 +03:00
|
|
|
|
2017-03-30 12:48:42 +02:00
|
|
|
if (buf_block_t* block = buf_page_get_gen(
|
|
|
|
page_id_t(space->id, 0), page_size_t(space->flags),
|
|
|
|
RW_X_LATCH, NULL, BUF_GET,
|
|
|
|
__FILE__, __LINE__, &mtr, &err)) {
|
2017-05-09 17:23:08 +03:00
|
|
|
mtr.set_named_space(space);
|
2017-03-30 12:48:42 +02:00
|
|
|
crypt_data->write_page0(space, block->frame, &mtr);
|
2014-12-22 16:53:17 +02:00
|
|
|
}
|
2017-03-14 12:56:01 +02:00
|
|
|
|
2017-03-30 12:48:42 +02:00
|
|
|
mtr.commit();
|
2014-12-22 16:53:17 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/***********************************************************************
|
2017-03-14 12:56:01 +02:00
|
|
|
Complete rotating a space
|
|
|
|
@param[in,out] key_state Key state
|
|
|
|
@param[in,out] state Rotation state */
|
2014-12-22 16:53:17 +02:00
|
|
|
static
|
|
|
|
void
|
|
|
|
fil_crypt_complete_rotate_space(
|
2017-03-14 12:56:01 +02:00
|
|
|
const key_state_t* key_state,
|
|
|
|
rotate_thread_t* state)
|
2014-12-22 16:53:17 +02:00
|
|
|
{
|
2017-03-14 12:56:01 +02:00
|
|
|
fil_space_crypt_t *crypt_data = state->space->crypt_data;
|
|
|
|
|
|
|
|
ut_ad(crypt_data);
|
|
|
|
ut_ad(state->space->n_pending_ops > 0);
|
2014-12-22 16:53:17 +02:00
|
|
|
|
2015-05-17 14:14:16 +03:00
|
|
|
/* Space might already be dropped */
|
2017-03-14 12:56:01 +02:00
|
|
|
if (!state->space->is_stopping()) {
|
2015-05-17 14:14:16 +03:00
|
|
|
mutex_enter(&crypt_data->mutex);
|
2014-12-22 16:53:17 +02:00
|
|
|
|
2015-05-17 14:14:16 +03:00
|
|
|
/**
|
|
|
|
* Update crypt data state with state from thread
|
|
|
|
*/
|
|
|
|
if (state->min_key_version_found <
|
|
|
|
crypt_data->rotate_state.min_key_version_found) {
|
|
|
|
crypt_data->rotate_state.min_key_version_found =
|
|
|
|
state->min_key_version_found;
|
|
|
|
}
|
2014-12-22 16:53:17 +02:00
|
|
|
|
2015-05-17 14:14:16 +03:00
|
|
|
if (state->end_lsn > crypt_data->rotate_state.end_lsn) {
|
|
|
|
crypt_data->rotate_state.end_lsn = state->end_lsn;
|
|
|
|
}
|
2014-12-22 16:53:17 +02:00
|
|
|
|
2015-05-17 14:14:16 +03:00
|
|
|
ut_a(crypt_data->rotate_state.active_threads > 0);
|
|
|
|
crypt_data->rotate_state.active_threads--;
|
|
|
|
bool last = crypt_data->rotate_state.active_threads == 0;
|
2014-12-22 16:53:17 +02:00
|
|
|
|
2015-05-17 14:14:16 +03:00
|
|
|
/**
|
|
|
|
* check if space is fully done
|
|
|
|
* this as when threads shutdown, it could be that we "complete"
|
|
|
|
* iterating before we have scanned the full space.
|
|
|
|
*/
|
|
|
|
bool done = crypt_data->rotate_state.next_offset >=
|
|
|
|
crypt_data->rotate_state.max_offset;
|
2014-12-22 16:53:17 +02:00
|
|
|
|
2015-05-17 14:14:16 +03:00
|
|
|
/**
|
|
|
|
* we should flush space if we're last thread AND
|
|
|
|
* the iteration is done
|
|
|
|
*/
|
|
|
|
bool should_flush = last && done;
|
2014-12-22 16:53:17 +02:00
|
|
|
|
2015-05-17 14:14:16 +03:00
|
|
|
if (should_flush) {
|
|
|
|
/* we're the last active thread */
|
|
|
|
crypt_data->rotate_state.flushing = true;
|
|
|
|
crypt_data->min_key_version =
|
|
|
|
crypt_data->rotate_state.min_key_version_found;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* inform scrubbing */
|
|
|
|
crypt_data->rotate_state.scrubbing.is_active = false;
|
|
|
|
mutex_exit(&crypt_data->mutex);
|
|
|
|
|
|
|
|
/* all threads must call btr_scrub_complete_space wo/ mutex held */
|
2017-08-31 11:08:43 +03:00
|
|
|
if (state->scrub_data.scrubbing) {
|
|
|
|
btr_scrub_complete_space(&state->scrub_data);
|
2015-05-17 14:14:16 +03:00
|
|
|
if (should_flush) {
|
|
|
|
/* only last thread updates last_scrub_completed */
|
2015-05-20 20:32:10 +03:00
|
|
|
ut_ad(crypt_data);
|
2015-05-17 14:14:16 +03:00
|
|
|
mutex_enter(&crypt_data->mutex);
|
|
|
|
crypt_data->rotate_state.scrubbing.
|
|
|
|
last_scrub_completed = time(0);
|
|
|
|
mutex_exit(&crypt_data->mutex);
|
|
|
|
}
|
|
|
|
}
|
2014-12-22 16:53:17 +02:00
|
|
|
|
|
|
|
if (should_flush) {
|
2017-03-14 12:56:01 +02:00
|
|
|
fil_crypt_flush_space(state);
|
2015-05-17 14:14:16 +03:00
|
|
|
|
2014-12-22 16:53:17 +02:00
|
|
|
mutex_enter(&crypt_data->mutex);
|
2015-05-17 14:14:16 +03:00
|
|
|
crypt_data->rotate_state.flushing = false;
|
2014-12-22 16:53:17 +02:00
|
|
|
mutex_exit(&crypt_data->mutex);
|
|
|
|
}
|
2017-05-02 08:09:16 +03:00
|
|
|
} else {
|
|
|
|
mutex_enter(&crypt_data->mutex);
|
|
|
|
ut_a(crypt_data->rotate_state.active_threads > 0);
|
|
|
|
crypt_data->rotate_state.active_threads--;
|
|
|
|
mutex_exit(&crypt_data->mutex);
|
2014-12-22 16:53:17 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*********************************************************************//**
|
|
|
|
A thread which monitors global key state and rotates tablespaces accordingly
|
|
|
|
@return a dummy parameter */
|
|
|
|
extern "C" UNIV_INTERN
|
|
|
|
os_thread_ret_t
|
|
|
|
DECLARE_THREAD(fil_crypt_thread)(
|
2015-04-01 11:50:21 +03:00
|
|
|
/*=============================*/
|
2014-12-22 16:53:17 +02:00
|
|
|
void* arg __attribute__((unused))) /*!< in: a dummy parameter required
|
|
|
|
* by os_thread_create */
|
|
|
|
{
|
|
|
|
UT_NOT_USED(arg);
|
|
|
|
|
|
|
|
mutex_enter(&fil_crypt_threads_mutex);
|
|
|
|
uint thread_no = srv_n_fil_crypt_threads_started;
|
|
|
|
srv_n_fil_crypt_threads_started++;
|
|
|
|
os_event_set(fil_crypt_event); /* signal that we started */
|
2017-03-14 12:56:01 +02:00
|
|
|
mutex_exit(&fil_crypt_threads_mutex);
|
2014-12-22 16:53:17 +02:00
|
|
|
|
|
|
|
/* state of this thread */
|
|
|
|
rotate_thread_t thr(thread_no);
|
|
|
|
|
|
|
|
/* if we find a space that is starting, skip over it and recheck it later */
|
|
|
|
bool recheck = false;
|
|
|
|
|
|
|
|
while (!thr.should_shutdown()) {
|
|
|
|
|
|
|
|
key_state_t new_state;
|
|
|
|
|
|
|
|
time_t wait_start = time(0);
|
2015-03-20 12:38:53 +02:00
|
|
|
|
2015-05-11 21:05:02 +02:00
|
|
|
while (!thr.should_shutdown()) {
|
2014-12-22 16:53:17 +02:00
|
|
|
|
|
|
|
/* wait for key state changes
|
|
|
|
* i.e either new key version of change or
|
|
|
|
* new rotate_key_age */
|
|
|
|
os_event_reset(fil_crypt_threads_event);
|
2017-03-14 12:56:01 +02:00
|
|
|
|
2015-05-11 21:05:02 +02:00
|
|
|
if (os_event_wait_time(fil_crypt_threads_event, 1000000) == 0) {
|
|
|
|
break;
|
|
|
|
}
|
2014-12-22 16:53:17 +02:00
|
|
|
|
|
|
|
if (recheck) {
|
|
|
|
/* check recheck here, after sleep, so
|
|
|
|
* that we don't busy loop while when one thread is starting
|
|
|
|
* a space*/
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
time_t waited = time(0) - wait_start;
|
2015-03-20 12:38:53 +02:00
|
|
|
|
2017-03-14 12:56:01 +02:00
|
|
|
/* Break if we have waited the background scrub
|
|
|
|
internal and background scrubbing is enabled */
|
2017-03-09 08:54:07 +02:00
|
|
|
if (waited >= 0
|
2017-03-14 12:56:01 +02:00
|
|
|
&& ulint(waited) >= srv_background_scrub_data_check_interval
|
|
|
|
&& (srv_background_scrub_data_uncompressed
|
|
|
|
|| srv_background_scrub_data_compressed)) {
|
2014-12-22 16:53:17 +02:00
|
|
|
break;
|
2015-03-20 12:38:53 +02:00
|
|
|
}
|
2014-12-22 16:53:17 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
recheck = false;
|
|
|
|
thr.first = true; // restart from first tablespace
|
|
|
|
|
|
|
|
/* iterate all spaces searching for those needing rotation */
|
|
|
|
while (!thr.should_shutdown() &&
|
|
|
|
fil_crypt_find_space_to_rotate(&new_state, &thr, &recheck)) {
|
|
|
|
|
|
|
|
/* we found a space to rotate */
|
|
|
|
fil_crypt_start_rotate_space(&new_state, &thr);
|
|
|
|
|
|
|
|
/* iterate all pages (cooperativly with other threads) */
|
2017-04-21 11:52:25 +03:00
|
|
|
while (!thr.should_shutdown() &&
|
2014-12-22 16:53:17 +02:00
|
|
|
fil_crypt_find_page_to_rotate(&new_state, &thr)) {
|
|
|
|
|
2017-06-08 15:40:25 +03:00
|
|
|
if (!thr.space->is_stopping()) {
|
|
|
|
/* rotate a (set) of pages */
|
|
|
|
fil_crypt_rotate_pages(&new_state, &thr);
|
|
|
|
}
|
2014-12-22 16:53:17 +02:00
|
|
|
|
2017-03-14 12:56:01 +02:00
|
|
|
/* If space is marked as stopping, release
|
|
|
|
space and stop rotation. */
|
|
|
|
if (thr.space->is_stopping()) {
|
2017-04-21 11:52:25 +03:00
|
|
|
fil_crypt_complete_rotate_space(
|
|
|
|
&new_state, &thr);
|
2017-03-14 12:56:01 +02:00
|
|
|
fil_space_release(thr.space);
|
|
|
|
thr.space = NULL;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2014-12-22 16:53:17 +02:00
|
|
|
/* realloc iops */
|
|
|
|
fil_crypt_realloc_iops(&thr);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* complete rotation */
|
2017-03-14 12:56:01 +02:00
|
|
|
if (thr.space) {
|
|
|
|
fil_crypt_complete_rotate_space(&new_state, &thr);
|
|
|
|
}
|
2014-12-22 16:53:17 +02:00
|
|
|
|
2015-05-11 21:05:02 +02:00
|
|
|
/* force key state refresh */
|
2017-03-14 12:56:01 +02:00
|
|
|
new_state.key_id = 0;
|
2014-12-22 16:53:17 +02:00
|
|
|
|
|
|
|
/* return iops */
|
|
|
|
fil_crypt_return_iops(&thr);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* return iops if shutting down */
|
|
|
|
fil_crypt_return_iops(&thr);
|
|
|
|
|
2017-03-14 12:56:01 +02:00
|
|
|
/* release current space if shutting down */
|
|
|
|
if (thr.space) {
|
|
|
|
fil_space_release(thr.space);
|
|
|
|
thr.space = NULL;
|
|
|
|
}
|
|
|
|
|
2014-12-22 16:53:17 +02:00
|
|
|
mutex_enter(&fil_crypt_threads_mutex);
|
|
|
|
srv_n_fil_crypt_threads_started--;
|
|
|
|
os_event_set(fil_crypt_event); /* signal that we stopped */
|
2017-03-14 12:56:01 +02:00
|
|
|
mutex_exit(&fil_crypt_threads_mutex);
|
2014-12-22 16:53:17 +02:00
|
|
|
|
|
|
|
/* We count the number of threads in os_thread_exit(). A created
|
|
|
|
thread should always use that to exit and not use return() to exit. */
|
|
|
|
|
2016-09-06 09:43:16 +03:00
|
|
|
os_thread_exit();
|
2014-12-22 16:53:17 +02:00
|
|
|
|
|
|
|
OS_THREAD_DUMMY_RETURN;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*********************************************************************
|
2017-03-14 12:56:01 +02:00
|
|
|
Adjust thread count for key rotation
|
|
|
|
@param[in] enw_cnt Number of threads to be used */
|
2014-12-22 16:53:17 +02:00
|
|
|
UNIV_INTERN
|
|
|
|
void
|
2015-04-01 11:50:21 +03:00
|
|
|
fil_crypt_set_thread_cnt(
|
2017-03-14 12:56:01 +02:00
|
|
|
const uint new_cnt)
|
2015-03-20 12:38:53 +02:00
|
|
|
{
|
2016-01-05 18:50:54 +02:00
|
|
|
if (!fil_crypt_threads_inited) {
|
|
|
|
fil_crypt_threads_init();
|
|
|
|
}
|
|
|
|
|
2017-03-14 12:56:01 +02:00
|
|
|
mutex_enter(&fil_crypt_threads_mutex);
|
|
|
|
|
2014-12-22 16:53:17 +02:00
|
|
|
if (new_cnt > srv_n_fil_crypt_threads) {
|
|
|
|
uint add = new_cnt - srv_n_fil_crypt_threads;
|
|
|
|
srv_n_fil_crypt_threads = new_cnt;
|
|
|
|
for (uint i = 0; i < add; i++) {
|
2015-04-01 11:50:21 +03:00
|
|
|
os_thread_id_t rotation_thread_id;
|
|
|
|
os_thread_create(fil_crypt_thread, NULL, &rotation_thread_id);
|
2017-08-31 09:28:59 +03:00
|
|
|
ib::info() << "Creating #"
|
2016-08-12 11:17:45 +03:00
|
|
|
<< i+1 << " encryption thread id "
|
|
|
|
<< os_thread_pf(rotation_thread_id)
|
|
|
|
<< " total threads " << new_cnt << ".";
|
2014-12-22 16:53:17 +02:00
|
|
|
}
|
|
|
|
} else if (new_cnt < srv_n_fil_crypt_threads) {
|
|
|
|
srv_n_fil_crypt_threads = new_cnt;
|
|
|
|
os_event_set(fil_crypt_threads_event);
|
|
|
|
}
|
|
|
|
|
2017-03-14 12:56:01 +02:00
|
|
|
mutex_exit(&fil_crypt_threads_mutex);
|
|
|
|
|
2014-12-22 16:53:17 +02:00
|
|
|
while(srv_n_fil_crypt_threads_started != srv_n_fil_crypt_threads) {
|
|
|
|
os_event_reset(fil_crypt_event);
|
2017-08-29 14:23:34 +03:00
|
|
|
os_event_wait_time(fil_crypt_event, 100000);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Send a message to encryption threads that there could be
|
|
|
|
something to do. */
|
|
|
|
if (srv_n_fil_crypt_threads) {
|
|
|
|
os_event_set(fil_crypt_threads_event);
|
2014-12-22 16:53:17 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*********************************************************************
|
2017-03-14 12:56:01 +02:00
|
|
|
Adjust max key age
|
|
|
|
@param[in] val New max key age */
|
2014-12-22 16:53:17 +02:00
|
|
|
UNIV_INTERN
|
|
|
|
void
|
2015-04-01 11:50:21 +03:00
|
|
|
fil_crypt_set_rotate_key_age(
|
2017-03-14 12:56:01 +02:00
|
|
|
uint val)
|
2014-12-22 16:53:17 +02:00
|
|
|
{
|
|
|
|
srv_fil_crypt_rotate_key_age = val;
|
|
|
|
os_event_set(fil_crypt_threads_event);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*********************************************************************
|
2017-03-14 12:56:01 +02:00
|
|
|
Adjust rotation iops
|
|
|
|
@param[in] val New max roation iops */
|
2014-12-22 16:53:17 +02:00
|
|
|
UNIV_INTERN
|
|
|
|
void
|
2015-04-01 11:50:21 +03:00
|
|
|
fil_crypt_set_rotation_iops(
|
2017-03-14 12:56:01 +02:00
|
|
|
uint val)
|
2014-12-22 16:53:17 +02:00
|
|
|
{
|
|
|
|
srv_n_fil_crypt_iops = val;
|
|
|
|
os_event_set(fil_crypt_threads_event);
|
|
|
|
}
|
|
|
|
|
2015-05-17 14:14:16 +03:00
|
|
|
/*********************************************************************
|
2017-03-14 12:56:01 +02:00
|
|
|
Adjust encrypt tables
|
|
|
|
@param[in] val New setting for innodb-encrypt-tables */
|
2015-05-17 14:14:16 +03:00
|
|
|
UNIV_INTERN
|
|
|
|
void
|
|
|
|
fil_crypt_set_encrypt_tables(
|
2017-03-14 12:56:01 +02:00
|
|
|
uint val)
|
2015-05-17 14:14:16 +03:00
|
|
|
{
|
2017-03-14 12:56:01 +02:00
|
|
|
srv_encrypt_tables = val;
|
|
|
|
os_event_set(fil_crypt_threads_event);
|
2015-05-17 14:14:16 +03:00
|
|
|
}
|
|
|
|
|
2014-12-22 16:53:17 +02:00
|
|
|
/*********************************************************************
|
|
|
|
Init threads for key rotation */
|
|
|
|
UNIV_INTERN
|
|
|
|
void
|
|
|
|
fil_crypt_threads_init()
|
|
|
|
{
|
2016-01-05 18:50:54 +02:00
|
|
|
if (!fil_crypt_threads_inited) {
|
2016-08-12 11:17:45 +03:00
|
|
|
fil_crypt_event = os_event_create(0);
|
|
|
|
fil_crypt_threads_event = os_event_create(0);
|
|
|
|
mutex_create(LATCH_ID_FIL_CRYPT_THREADS_MUTEX,
|
|
|
|
&fil_crypt_threads_mutex);
|
2016-01-05 18:50:54 +02:00
|
|
|
|
|
|
|
uint cnt = srv_n_fil_crypt_threads;
|
|
|
|
srv_n_fil_crypt_threads = 0;
|
|
|
|
fil_crypt_threads_inited = true;
|
|
|
|
fil_crypt_set_thread_cnt(cnt);
|
|
|
|
}
|
2014-12-22 16:53:17 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/*********************************************************************
|
|
|
|
Clean up key rotation threads resources */
|
|
|
|
UNIV_INTERN
|
|
|
|
void
|
2015-04-01 11:50:21 +03:00
|
|
|
fil_crypt_threads_cleanup()
|
|
|
|
{
|
2017-01-03 15:44:44 +02:00
|
|
|
if (!fil_crypt_threads_inited) {
|
|
|
|
return;
|
|
|
|
}
|
MDEV-11638 Encryption causes race conditions in InnoDB shutdown
InnoDB shutdown failed to properly take fil_crypt_thread() into account.
The encryption threads were signalled to shut down together with other
non-critical tasks. This could be much too early in case of slow shutdown,
which could need minutes to complete the purge. Furthermore, InnoDB
failed to wait for the fil_crypt_thread() to actually exit before
proceeding to the final steps of shutdown, causing the race conditions.
Furthermore, the log_scrub_thread() was shut down way too early.
Also it should remain until the SRV_SHUTDOWN_FLUSH_PHASE.
fil_crypt_threads_end(): Remove. This would cause the threads to
be terminated way too early.
srv_buf_dump_thread_active, srv_dict_stats_thread_active,
lock_sys->timeout_thread_active, log_scrub_thread_active,
srv_monitor_active, srv_error_monitor_active: Remove a race condition
between startup and shutdown, by setting these in the startup thread
that creates threads, not in each created thread. In this way, once the
flag is cleared, it will remain cleared during shutdown.
srv_n_fil_crypt_threads_started, fil_crypt_threads_event: Declare in
global rather than static scope.
log_scrub_event, srv_log_scrub_thread_active, log_scrub_thread():
Declare in static rather than global scope. Let these be created by
log_init() and freed by log_shutdown().
rotate_thread_t::should_shutdown(): Do not shut down before the
SRV_SHUTDOWN_FLUSH_PHASE.
srv_any_background_threads_are_active(): Remove. These checks now
exist in logs_empty_and_mark_files_at_shutdown().
logs_empty_and_mark_files_at_shutdown(): Shut down the threads in
the proper order. Keep fil_crypt_thread() and log_scrub_thread() alive
until SRV_SHUTDOWN_FLUSH_PHASE, and check that they actually terminate.
2017-01-04 18:43:32 +02:00
|
|
|
ut_a(!srv_n_fil_crypt_threads_started);
|
2016-08-12 11:17:45 +03:00
|
|
|
os_event_destroy(fil_crypt_event);
|
|
|
|
os_event_destroy(fil_crypt_threads_event);
|
2016-12-22 10:23:42 +02:00
|
|
|
mutex_free(&fil_crypt_threads_mutex);
|
2016-01-05 18:50:54 +02:00
|
|
|
fil_crypt_threads_inited = false;
|
2014-12-22 16:53:17 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/*********************************************************************
|
2017-03-14 12:56:01 +02:00
|
|
|
Wait for crypt threads to stop accessing space
|
|
|
|
@param[in] space Tablespace */
|
2014-12-22 16:53:17 +02:00
|
|
|
UNIV_INTERN
|
|
|
|
void
|
|
|
|
fil_space_crypt_close_tablespace(
|
2017-03-14 12:56:01 +02:00
|
|
|
const fil_space_t* space)
|
2014-12-22 16:53:17 +02:00
|
|
|
{
|
2017-03-30 12:48:42 +02:00
|
|
|
fil_space_crypt_t* crypt_data = space->crypt_data;
|
2015-05-06 14:09:10 +03:00
|
|
|
|
2019-01-17 11:24:38 +02:00
|
|
|
if (!crypt_data || srv_n_fil_crypt_threads == 0) {
|
2015-05-06 14:09:10 +03:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2014-12-22 16:53:17 +02:00
|
|
|
mutex_enter(&fil_crypt_threads_mutex);
|
2015-05-06 14:09:10 +03:00
|
|
|
|
2017-01-25 10:11:37 +02:00
|
|
|
time_t start = time(0);
|
|
|
|
time_t last = start;
|
2015-04-01 11:50:21 +03:00
|
|
|
|
2014-12-22 16:53:17 +02:00
|
|
|
mutex_enter(&crypt_data->mutex);
|
|
|
|
mutex_exit(&fil_crypt_threads_mutex);
|
2015-04-01 11:50:21 +03:00
|
|
|
|
2017-05-10 09:07:50 +03:00
|
|
|
ulint cnt = crypt_data->rotate_state.active_threads;
|
2014-12-22 16:53:17 +02:00
|
|
|
bool flushing = crypt_data->rotate_state.flushing;
|
2015-04-01 11:50:21 +03:00
|
|
|
|
2014-12-22 16:53:17 +02:00
|
|
|
while (cnt > 0 || flushing) {
|
|
|
|
mutex_exit(&crypt_data->mutex);
|
|
|
|
/* release dict mutex so that scrub threads can release their
|
|
|
|
* table references */
|
|
|
|
dict_mutex_exit_for_mysql();
|
2017-03-14 12:56:01 +02:00
|
|
|
|
2014-12-22 16:53:17 +02:00
|
|
|
/* wakeup throttle (all) sleepers */
|
|
|
|
os_event_set(fil_crypt_throttle_sleep_event);
|
2017-03-14 12:56:01 +02:00
|
|
|
|
2014-12-22 16:53:17 +02:00
|
|
|
os_thread_sleep(20000);
|
|
|
|
dict_mutex_enter_for_mysql();
|
|
|
|
mutex_enter(&crypt_data->mutex);
|
|
|
|
cnt = crypt_data->rotate_state.active_threads;
|
|
|
|
flushing = crypt_data->rotate_state.flushing;
|
|
|
|
|
2017-01-25 10:11:37 +02:00
|
|
|
time_t now = time(0);
|
2015-03-20 12:38:53 +02:00
|
|
|
|
2014-12-22 16:53:17 +02:00
|
|
|
if (now >= last + 30) {
|
2016-08-12 11:17:45 +03:00
|
|
|
ib::warn() << "Waited "
|
|
|
|
<< now - start
|
|
|
|
<< " seconds to drop space: "
|
2017-08-17 11:32:16 +02:00
|
|
|
<< space->name << " ("
|
|
|
|
<< space->id << ") active threads "
|
|
|
|
<< cnt << "flushing="
|
|
|
|
<< flushing << ".";
|
2014-12-22 16:53:17 +02:00
|
|
|
last = now;
|
|
|
|
}
|
|
|
|
}
|
2015-03-20 12:38:53 +02:00
|
|
|
|
2014-12-22 16:53:17 +02:00
|
|
|
mutex_exit(&crypt_data->mutex);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*********************************************************************
|
|
|
|
Get crypt status for a space (used by information_schema)
|
2017-03-14 12:56:01 +02:00
|
|
|
@param[in] space Tablespace
|
|
|
|
@param[out] status Crypt status */
|
2015-04-01 11:50:21 +03:00
|
|
|
UNIV_INTERN
|
2017-03-14 12:56:01 +02:00
|
|
|
void
|
2014-12-22 16:53:17 +02:00
|
|
|
fil_space_crypt_get_status(
|
2017-03-14 12:56:01 +02:00
|
|
|
const fil_space_t* space,
|
|
|
|
struct fil_space_crypt_status_t* status)
|
2014-12-22 16:53:17 +02:00
|
|
|
{
|
2016-12-13 11:51:33 +02:00
|
|
|
memset(status, 0, sizeof(*status));
|
|
|
|
|
2017-03-14 12:56:01 +02:00
|
|
|
ut_ad(space->n_pending_ops > 0);
|
2017-06-12 17:43:07 +03:00
|
|
|
|
|
|
|
/* If there is no crypt data and we have not yet read
|
|
|
|
page 0 for this tablespace, we need to read it before
|
|
|
|
we can continue. */
|
|
|
|
if (!space->crypt_data) {
|
|
|
|
fil_crypt_read_crypt_data(const_cast<fil_space_t*>(space));
|
|
|
|
}
|
|
|
|
|
2017-08-29 14:23:34 +03:00
|
|
|
status->space = ULINT_UNDEFINED;
|
2017-03-14 12:56:01 +02:00
|
|
|
|
2017-06-12 17:10:56 +03:00
|
|
|
if (fil_space_crypt_t* crypt_data = space->crypt_data) {
|
2017-08-29 14:23:34 +03:00
|
|
|
status->space = space->id;
|
2014-12-22 16:53:17 +02:00
|
|
|
mutex_enter(&crypt_data->mutex);
|
2017-03-14 12:56:01 +02:00
|
|
|
status->scheme = crypt_data->type;
|
2014-12-22 16:53:17 +02:00
|
|
|
status->keyserver_requests = crypt_data->keyserver_requests;
|
|
|
|
status->min_key_version = crypt_data->min_key_version;
|
2016-03-18 11:48:49 +02:00
|
|
|
status->key_id = crypt_data->key_id;
|
2015-04-01 11:50:21 +03:00
|
|
|
|
2014-12-22 16:53:17 +02:00
|
|
|
if (crypt_data->rotate_state.active_threads > 0 ||
|
|
|
|
crypt_data->rotate_state.flushing) {
|
|
|
|
status->rotating = true;
|
|
|
|
status->flushing =
|
|
|
|
crypt_data->rotate_state.flushing;
|
|
|
|
status->rotate_next_page_number =
|
|
|
|
crypt_data->rotate_state.next_offset;
|
|
|
|
status->rotate_max_page_number =
|
|
|
|
crypt_data->rotate_state.max_offset;
|
|
|
|
}
|
2016-12-13 11:51:33 +02:00
|
|
|
|
2014-12-22 16:53:17 +02:00
|
|
|
mutex_exit(&crypt_data->mutex);
|
2015-04-09 00:37:47 +02:00
|
|
|
|
2015-05-11 21:05:02 +02:00
|
|
|
if (srv_encrypt_tables || crypt_data->min_key_version) {
|
2015-04-09 00:37:47 +02:00
|
|
|
status->current_key_version =
|
2015-05-11 21:05:02 +02:00
|
|
|
fil_crypt_get_latest_key_version(crypt_data);
|
|
|
|
}
|
2014-12-22 16:53:17 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*********************************************************************
|
2017-03-14 12:56:01 +02:00
|
|
|
Return crypt statistics
|
|
|
|
@param[out] stat Crypt statistics */
|
2015-04-01 11:50:21 +03:00
|
|
|
UNIV_INTERN
|
2014-12-22 16:53:17 +02:00
|
|
|
void
|
2015-04-01 11:50:21 +03:00
|
|
|
fil_crypt_total_stat(
|
2017-03-14 12:56:01 +02:00
|
|
|
fil_crypt_stat_t *stat)
|
2014-12-22 16:53:17 +02:00
|
|
|
{
|
|
|
|
mutex_enter(&crypt_stat_mutex);
|
|
|
|
*stat = crypt_stat;
|
|
|
|
mutex_exit(&crypt_stat_mutex);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*********************************************************************
|
|
|
|
Get scrub status for a space (used by information_schema)
|
2017-03-14 12:56:01 +02:00
|
|
|
|
|
|
|
@param[in] space Tablespace
|
|
|
|
@param[out] status Scrub status */
|
2015-04-01 11:50:21 +03:00
|
|
|
UNIV_INTERN
|
2017-03-14 12:56:01 +02:00
|
|
|
void
|
2014-12-22 16:53:17 +02:00
|
|
|
fil_space_get_scrub_status(
|
2017-03-14 12:56:01 +02:00
|
|
|
const fil_space_t* space,
|
|
|
|
struct fil_space_scrub_status_t* status)
|
2014-12-22 16:53:17 +02:00
|
|
|
{
|
|
|
|
memset(status, 0, sizeof(*status));
|
2015-03-20 12:38:53 +02:00
|
|
|
|
2017-03-14 12:56:01 +02:00
|
|
|
ut_ad(space->n_pending_ops > 0);
|
|
|
|
fil_space_crypt_t* crypt_data = space->crypt_data;
|
|
|
|
|
|
|
|
status->space = space->id;
|
|
|
|
|
2014-12-22 16:53:17 +02:00
|
|
|
if (crypt_data != NULL) {
|
2017-03-30 12:48:42 +02:00
|
|
|
status->compressed = FSP_FLAGS_GET_ZIP_SSIZE(space->flags) > 0;
|
2014-12-22 16:53:17 +02:00
|
|
|
mutex_enter(&crypt_data->mutex);
|
|
|
|
status->last_scrub_completed =
|
|
|
|
crypt_data->rotate_state.scrubbing.last_scrub_completed;
|
|
|
|
if (crypt_data->rotate_state.active_threads > 0 &&
|
|
|
|
crypt_data->rotate_state.scrubbing.is_active) {
|
|
|
|
status->scrubbing = true;
|
|
|
|
status->current_scrub_started =
|
|
|
|
crypt_data->rotate_state.start_time;
|
|
|
|
status->current_scrub_active_threads =
|
|
|
|
crypt_data->rotate_state.active_threads;
|
|
|
|
status->current_scrub_page_number =
|
|
|
|
crypt_data->rotate_state.next_offset;
|
|
|
|
status->current_scrub_max_page_number =
|
|
|
|
crypt_data->rotate_state.max_offset;
|
|
|
|
}
|
2016-12-13 11:51:33 +02:00
|
|
|
|
2014-12-22 16:53:17 +02:00
|
|
|
mutex_exit(&crypt_data->mutex);
|
|
|
|
}
|
|
|
|
}
|
2017-01-03 14:35:08 +02:00
|
|
|
#endif /* UNIV_INNOCHECKSUM */
|
|
|
|
|
2017-03-30 12:48:42 +02:00
|
|
|
/**
|
|
|
|
Verify that post encryption checksum match calculated checksum.
|
|
|
|
This function should be called only if tablespace contains crypt_data
|
|
|
|
metadata (this is strong indication that tablespace is encrypted).
|
|
|
|
Function also verifies that traditional checksum does not match
|
|
|
|
calculated checksum as if it does page could be valid unencrypted,
|
|
|
|
encrypted, or corrupted.
|
|
|
|
|
|
|
|
@param[in,out] page page frame (checksum is temporarily modified)
|
|
|
|
@param[in] page_size page size
|
2018-12-17 19:00:35 +02:00
|
|
|
@return whether the encrypted page is OK */
|
2017-01-03 14:35:08 +02:00
|
|
|
bool
|
2018-12-17 20:04:03 +02:00
|
|
|
fil_space_verify_crypt_checksum(const byte* page, const page_size_t& page_size)
|
2017-01-03 14:35:08 +02:00
|
|
|
{
|
2018-12-17 19:00:35 +02:00
|
|
|
ut_ad(mach_read_from_4(page + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION));
|
2017-01-03 14:35:08 +02:00
|
|
|
|
2017-03-30 12:48:42 +02:00
|
|
|
/* Compressed and encrypted pages do not have checksum. Assume not
|
|
|
|
corrupted. Page verification happens after decompression in
|
|
|
|
buf_page_io_complete() using buf_page_is_corrupted(). */
|
2018-12-17 19:00:35 +02:00
|
|
|
if (mach_read_from_2(page + FIL_PAGE_TYPE)
|
|
|
|
== FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED) {
|
|
|
|
return true;
|
2017-03-30 12:48:42 +02:00
|
|
|
}
|
2017-01-03 14:35:08 +02:00
|
|
|
|
2018-12-17 19:00:35 +02:00
|
|
|
/* Read stored post encryption checksum. */
|
|
|
|
const ib_uint32_t checksum = mach_read_from_4(
|
|
|
|
page + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION + 4);
|
2017-01-03 14:35:08 +02:00
|
|
|
|
2017-03-30 12:48:42 +02:00
|
|
|
/* If stored checksum matches one of the calculated checksums
|
|
|
|
page is not corrupted. */
|
|
|
|
|
2018-12-18 09:52:28 +02:00
|
|
|
switch (srv_checksum_algorithm_t(srv_checksum_algorithm)) {
|
2018-12-17 19:00:35 +02:00
|
|
|
case SRV_CHECKSUM_ALGORITHM_STRICT_CRC32:
|
2018-12-17 20:04:03 +02:00
|
|
|
if (page_size.is_compressed()) {
|
2018-12-17 19:00:35 +02:00
|
|
|
return checksum == page_zip_calc_checksum(
|
2018-12-17 20:04:03 +02:00
|
|
|
page, page_size.physical(),
|
2018-12-18 10:01:15 +02:00
|
|
|
SRV_CHECKSUM_ALGORITHM_CRC32)
|
|
|
|
#ifdef INNODB_BUG_ENDIAN_CRC32
|
|
|
|
|| checksum == page_zip_calc_checksum(
|
|
|
|
page, page_size.physical(),
|
|
|
|
SRV_CHECKSUM_ALGORITHM_CRC32, true)
|
|
|
|
#endif
|
|
|
|
;
|
2018-12-17 19:00:35 +02:00
|
|
|
}
|
2017-03-30 12:48:42 +02:00
|
|
|
|
2018-12-17 22:45:21 +02:00
|
|
|
return checksum == buf_calc_page_crc32(page)
|
|
|
|
#ifdef INNODB_BUG_ENDIAN_CRC32
|
|
|
|
|| checksum == buf_calc_page_crc32(page, true)
|
|
|
|
#endif
|
|
|
|
;
|
2018-12-17 19:00:35 +02:00
|
|
|
case SRV_CHECKSUM_ALGORITHM_STRICT_NONE:
|
2018-12-18 09:52:28 +02:00
|
|
|
/* Starting with MariaDB 10.1.25, 10.2.7, 10.3.1,
|
|
|
|
due to MDEV-12114, fil_crypt_calculate_checksum()
|
|
|
|
is only using CRC32 for the encrypted pages.
|
|
|
|
Due to this, we must treat "strict_none" as "none". */
|
2018-12-17 19:00:35 +02:00
|
|
|
case SRV_CHECKSUM_ALGORITHM_NONE:
|
|
|
|
return true;
|
2018-12-18 09:52:28 +02:00
|
|
|
case SRV_CHECKSUM_ALGORITHM_STRICT_INNODB:
|
|
|
|
/* Starting with MariaDB 10.1.25, 10.2.7, 10.3.1,
|
|
|
|
due to MDEV-12114, fil_crypt_calculate_checksum()
|
|
|
|
is only using CRC32 for the encrypted pages.
|
|
|
|
Due to this, we must treat "strict_innodb" as "innodb". */
|
2018-12-17 19:00:35 +02:00
|
|
|
case SRV_CHECKSUM_ALGORITHM_INNODB:
|
|
|
|
case SRV_CHECKSUM_ALGORITHM_CRC32:
|
|
|
|
if (checksum == BUF_NO_CHECKSUM_MAGIC) {
|
|
|
|
return true;
|
|
|
|
}
|
2018-12-18 12:38:38 +02:00
|
|
|
if (page_size.is_compressed()) {
|
2018-12-17 19:00:35 +02:00
|
|
|
return checksum == page_zip_calc_checksum(
|
2018-12-18 10:01:15 +02:00
|
|
|
page, page_size.physical(),
|
|
|
|
SRV_CHECKSUM_ALGORITHM_CRC32)
|
|
|
|
#ifdef INNODB_BUG_ENDIAN_CRC32
|
2018-12-18 09:52:28 +02:00
|
|
|
|| checksum == page_zip_calc_checksum(
|
2018-12-18 10:01:15 +02:00
|
|
|
page, page_size.physical(),
|
|
|
|
SRV_CHECKSUM_ALGORITHM_CRC32, true)
|
|
|
|
#endif
|
|
|
|
|| checksum == page_zip_calc_checksum(
|
|
|
|
page, page_size.physical(),
|
2018-12-18 09:52:28 +02:00
|
|
|
SRV_CHECKSUM_ALGORITHM_INNODB);
|
2018-12-13 13:37:21 +02:00
|
|
|
}
|
2017-03-30 12:48:42 +02:00
|
|
|
|
2018-12-17 19:00:35 +02:00
|
|
|
return checksum == buf_calc_page_crc32(page)
|
2018-12-17 22:45:21 +02:00
|
|
|
#ifdef INNODB_BUG_ENDIAN_CRC32
|
|
|
|
|| checksum == buf_calc_page_crc32(page, true)
|
|
|
|
#endif
|
2018-12-17 19:00:35 +02:00
|
|
|
|| checksum == buf_calc_page_new_checksum(page);
|
2017-08-03 08:29:36 +03:00
|
|
|
}
|
2018-12-17 22:35:22 +02:00
|
|
|
|
|
|
|
ut_ad(!"unhandled innodb_checksum_algorithm");
|
|
|
|
return false;
|
2017-01-03 14:35:08 +02:00
|
|
|
}
|