mirror of
				https://github.com/MariaDB/server.git
				synced 2025-10-31 10:56:12 +01:00 
			
		
		
		
	
		
			
				
	
	
		
			2464 lines
		
	
	
	
		
			70 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			2464 lines
		
	
	
	
		
			70 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
| /*****************************************************************************
 | |
| Copyright (C) 2013, 2015, Google Inc. All Rights Reserved.
 | |
| Copyright (c) 2014, 2022, MariaDB Corporation.
 | |
| 
 | |
| This program is free software; you can redistribute it and/or modify it under
 | |
| the terms of the GNU General Public License as published by the Free Software
 | |
| Foundation; version 2 of the License.
 | |
| 
 | |
| This program is distributed in the hope that it will be useful, but WITHOUT
 | |
| ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 | |
| FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 | |
| 
 | |
| You should have received a copy of the GNU General Public License along with
 | |
| this program; if not, write to the Free Software Foundation, Inc.,
 | |
| 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA
 | |
| 
 | |
| *****************************************************************************/
 | |
| /**************************************************//**
 | |
| @file fil0crypt.cc
 | |
| Innodb file space encrypt/decrypt
 | |
| 
 | |
| Created            Jonas Oreland Google
 | |
| Modified           Jan Lindström jan.lindstrom@mariadb.com
 | |
| *******************************************************/
 | |
| 
 | |
| #include "fil0crypt.h"
 | |
| #include "mach0data.h"
 | |
| #include "page0zip.h"
 | |
| #include "buf0checksum.h"
 | |
| #ifdef UNIV_INNOCHECKSUM
 | |
| # include "buf0buf.h"
 | |
| #else
 | |
| #include "buf0flu.h"
 | |
| #include "buf0dblwr.h"
 | |
| #include "btr0sea.h"
 | |
| #include "srv0srv.h"
 | |
| #include "srv0start.h"
 | |
| #include "mtr0mtr.h"
 | |
| #include "mtr0log.h"
 | |
| #include "ut0ut.h"
 | |
| #include "fsp0fsp.h"
 | |
| #include "fil0pagecompress.h"
 | |
| #include <my_crypt.h>
 | |
| 
 | |
| static bool fil_crypt_threads_inited = false;
 | |
| 
 | |
| /** Is encryption enabled/disabled */
 | |
| ulong srv_encrypt_tables;
 | |
| 
 | |
| /** No of key rotation threads requested */
 | |
| uint srv_n_fil_crypt_threads;
 | |
| 
 | |
| /** No of key rotation threads started */
 | |
| uint srv_n_fil_crypt_threads_started;
 | |
| 
 | |
| /** At this age or older a space/page will be rotated */
 | |
| uint srv_fil_crypt_rotate_key_age;
 | |
| 
 | |
| /** Whether the encryption plugin does key rotation */
 | |
| Atomic_relaxed<bool> srv_encrypt_rotate;
 | |
| 
 | |
| /** Condition variable for srv_n_fil_crypt_threads_started */
 | |
| static pthread_cond_t fil_crypt_cond;
 | |
| 
 | |
| /** Condition variable to to signal the key rotation threads */
 | |
| static pthread_cond_t fil_crypt_threads_cond;
 | |
| 
 | |
| /** Condition variable for interrupting sleeptime_ms sleep at the end
 | |
| of fil_crypt_rotate_page() */
 | |
| static pthread_cond_t fil_crypt_throttle_sleep_cond;
 | |
| 
 | |
| /** Mutex for key rotation threads. Acquired before fil_system.mutex! */
 | |
| static mysql_mutex_t fil_crypt_threads_mutex;
 | |
| 
 | |
| /** Variable ensuring only 1 thread at time does initial conversion */
 | |
| static bool fil_crypt_start_converting;
 | |
| 
 | |
| /** Variables for throttling */
 | |
| uint srv_n_fil_crypt_iops;	 // 10ms per iop
 | |
| static constexpr uint srv_alloc_time = 3; // allocate iops for 3s at a time
 | |
| static uint n_fil_crypt_iops_allocated;
 | |
| 
 | |
| #define DEBUG_KEYROTATION_THROTTLING 0
 | |
| 
 | |
| /** Statistics variables */
 | |
| static fil_crypt_stat_t crypt_stat;
 | |
| static mysql_mutex_t crypt_stat_mutex;
 | |
| 
 | |
| /** Wake up the encryption threads */
 | |
| void fil_crypt_threads_signal(bool broadcast)
 | |
| {
 | |
|   mysql_mutex_lock(&fil_crypt_threads_mutex);
 | |
|   if (broadcast)
 | |
|     pthread_cond_broadcast(&fil_crypt_threads_cond);
 | |
|   else
 | |
|     pthread_cond_signal(&fil_crypt_threads_cond);
 | |
|   mysql_mutex_unlock(&fil_crypt_threads_mutex);
 | |
| }
 | |
| 
 | |
| /***********************************************************************
 | |
| Check if a key needs rotation given a key_state
 | |
| @param[in]	crypt_data		Encryption information
 | |
| @param[in]	key_version		Current key version
 | |
| @param[in]	latest_key_version	Latest key version
 | |
| @param[in]	rotate_key_age		when to rotate
 | |
| @return true if key needs rotation, false if not */
 | |
| static bool
 | |
| fil_crypt_needs_rotation(
 | |
| 	const fil_space_crypt_t*	crypt_data,
 | |
| 	uint				key_version,
 | |
| 	uint				latest_key_version,
 | |
| 	uint				rotate_key_age)
 | |
| 	MY_ATTRIBUTE((warn_unused_result));
 | |
| 
 | |
| /*********************************************************************
 | |
| Init space crypt */
 | |
| void fil_space_crypt_init()
 | |
| {
 | |
|   pthread_cond_init(&fil_crypt_throttle_sleep_cond, nullptr);
 | |
|   mysql_mutex_init(0, &crypt_stat_mutex, nullptr);
 | |
| }
 | |
| 
 | |
| /*********************************************************************
 | |
| Cleanup space crypt */
 | |
| void fil_space_crypt_cleanup()
 | |
| {
 | |
|   pthread_cond_destroy(&fil_crypt_throttle_sleep_cond);
 | |
|   mysql_mutex_destroy(&crypt_stat_mutex);
 | |
| }
 | |
| 
 | |
| /**
 | |
| Get latest key version from encryption plugin.
 | |
| @return key version or ENCRYPTION_KEY_VERSION_INVALID */
 | |
| uint
 | |
| fil_space_crypt_t::key_get_latest_version(void)
 | |
| {
 | |
| 	uint key_version = key_found;
 | |
| 
 | |
| 	if (is_key_found()) {
 | |
| 		key_version = encryption_key_get_latest_version(key_id);
 | |
| 		/* InnoDB does dirty read of srv_fil_crypt_rotate_key_age.
 | |
| 		It doesn't matter because srv_encrypt_rotate
 | |
| 		can be set to true only once */
 | |
| 		if (!srv_encrypt_rotate
 | |
| 		    && key_version > srv_fil_crypt_rotate_key_age) {
 | |
| 			srv_encrypt_rotate = true;
 | |
| 		}
 | |
| 
 | |
| 		srv_stats.n_key_requests.inc();
 | |
| 		key_found = key_version;
 | |
| 	}
 | |
| 
 | |
| 	return key_version;
 | |
| }
 | |
| 
 | |
| /******************************************************************
 | |
| Get the latest(key-version), waking the encrypt thread, if needed
 | |
| @param[in,out]	crypt_data	Crypt data */
 | |
| static inline
 | |
| uint
 | |
| fil_crypt_get_latest_key_version(
 | |
| 	fil_space_crypt_t* crypt_data)
 | |
| {
 | |
| 	ut_ad(crypt_data != NULL);
 | |
| 
 | |
| 	uint key_version = crypt_data->key_get_latest_version();
 | |
| 
 | |
| 	if (crypt_data->is_key_found()) {
 | |
| 
 | |
| 		if (fil_crypt_needs_rotation(
 | |
| 				crypt_data,
 | |
| 				crypt_data->min_key_version,
 | |
| 				key_version,
 | |
| 				srv_fil_crypt_rotate_key_age)) {
 | |
| 			if (fil_crypt_threads_inited) {
 | |
| 				fil_crypt_threads_signal();
 | |
| 			}
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	return key_version;
 | |
| }
 | |
| 
 | |
| /******************************************************************
 | |
| Mutex helper for crypt_data->scheme */
 | |
| void
 | |
| crypt_data_scheme_locker(
 | |
| /*=====================*/
 | |
| 	st_encryption_scheme*	scheme,
 | |
| 	int			exit)
 | |
| {
 | |
| 	fil_space_crypt_t* crypt_data =
 | |
| 		static_cast<fil_space_crypt_t*>(scheme);
 | |
| 
 | |
| 	if (exit) {
 | |
| 		mysql_mutex_unlock(&crypt_data->mutex);
 | |
| 	} else {
 | |
| 		mysql_mutex_lock(&crypt_data->mutex);
 | |
| 	}
 | |
| }
 | |
| 
 | |
| /******************************************************************
 | |
| Create a fil_space_crypt_t object
 | |
| @param[in]	type		CRYPT_SCHEME_UNENCRYPTE or
 | |
| 				CRYPT_SCHEME_1
 | |
| @param[in]	encrypt_mode	FIL_ENCRYPTION_DEFAULT or
 | |
| 				FIL_ENCRYPTION_ON or
 | |
| 				FIL_ENCRYPTION_OFF
 | |
| @param[in]	min_key_version key_version or 0
 | |
| @param[in]	key_id		Used key id
 | |
| @return crypt object */
 | |
| static
 | |
| fil_space_crypt_t*
 | |
| fil_space_create_crypt_data(
 | |
| 	uint			type,
 | |
| 	fil_encryption_t	encrypt_mode,
 | |
| 	uint			min_key_version,
 | |
| 	uint			key_id)
 | |
| {
 | |
| 	fil_space_crypt_t* crypt_data = NULL;
 | |
| 	if (void* buf = ut_zalloc_nokey(sizeof(fil_space_crypt_t))) {
 | |
| 		crypt_data = new(buf)
 | |
| 			fil_space_crypt_t(
 | |
| 				type,
 | |
| 				min_key_version,
 | |
| 				key_id,
 | |
| 				encrypt_mode);
 | |
| 	}
 | |
| 
 | |
| 	return crypt_data;
 | |
| }
 | |
| 
 | |
| /******************************************************************
 | |
| Create a fil_space_crypt_t object
 | |
| @param[in]	encrypt_mode	FIL_ENCRYPTION_DEFAULT or
 | |
| 				FIL_ENCRYPTION_ON or
 | |
| 				FIL_ENCRYPTION_OFF
 | |
| 
 | |
| @param[in]	key_id		Encryption key id
 | |
| @return crypt object */
 | |
| fil_space_crypt_t*
 | |
| fil_space_create_crypt_data(
 | |
| 	fil_encryption_t	encrypt_mode,
 | |
| 	uint			key_id)
 | |
| {
 | |
| 	return (fil_space_create_crypt_data(0, encrypt_mode, 0, key_id));
 | |
| }
 | |
| 
 | |
| /******************************************************************
 | |
| Merge fil_space_crypt_t object
 | |
| @param[in,out]	dst		Destination cryp data
 | |
| @param[in]	src		Source crypt data */
 | |
| static
 | |
| void
 | |
| fil_space_merge_crypt_data(
 | |
| 	fil_space_crypt_t* dst,
 | |
| 	const fil_space_crypt_t* src)
 | |
| {
 | |
| 	mysql_mutex_lock(&dst->mutex);
 | |
| 
 | |
| 	/* validate that they are mergeable */
 | |
| 	ut_a(src->type == CRYPT_SCHEME_UNENCRYPTED ||
 | |
| 	     src->type == CRYPT_SCHEME_1);
 | |
| 
 | |
| 	ut_a(dst->type == CRYPT_SCHEME_UNENCRYPTED ||
 | |
| 	     dst->type == CRYPT_SCHEME_1);
 | |
| 
 | |
| 	dst->encryption = src->encryption;
 | |
| 	dst->type = src->type;
 | |
| 	dst->min_key_version = src->min_key_version;
 | |
| 	dst->keyserver_requests += src->keyserver_requests;
 | |
| 
 | |
| 	mysql_mutex_unlock(&dst->mutex);
 | |
| }
 | |
| 
 | |
| /** Initialize encryption parameters from a tablespace header page.
 | |
| @param[in]	zip_size	ROW_FORMAT=COMPRESSED page size, or 0
 | |
| @param[in]	page		first page of the tablespace
 | |
| @return crypt data from page 0
 | |
| @retval	NULL	if not present or not valid */
 | |
| fil_space_crypt_t* fil_space_read_crypt_data(ulint zip_size, const byte* page)
 | |
| {
 | |
| 	const ulint offset = FSP_HEADER_OFFSET
 | |
| 		+ fsp_header_get_encryption_offset(zip_size);
 | |
| 
 | |
| 	if (memcmp(page + offset, CRYPT_MAGIC, MAGIC_SZ) != 0) {
 | |
| 		/* Crypt data is not stored. */
 | |
| 		return NULL;
 | |
| 	}
 | |
| 
 | |
| 	uint8_t type = mach_read_from_1(page + offset + MAGIC_SZ + 0);
 | |
| 	uint8_t iv_length = mach_read_from_1(page + offset + MAGIC_SZ + 1);
 | |
| 	fil_space_crypt_t* crypt_data;
 | |
| 
 | |
| 	if (!(type == CRYPT_SCHEME_UNENCRYPTED ||
 | |
| 	      type == CRYPT_SCHEME_1)
 | |
| 	    || iv_length != sizeof crypt_data->iv) {
 | |
| 		ib::error() << "Found non sensible crypt scheme: "
 | |
| 			    << type << "," << iv_length
 | |
| 			    << " for space: "
 | |
| 			    << page_get_space_id(page);
 | |
| 		return NULL;
 | |
| 	}
 | |
| 
 | |
| 	uint min_key_version = mach_read_from_4
 | |
| 		(page + offset + MAGIC_SZ + 2 + iv_length);
 | |
| 
 | |
| 	uint key_id = mach_read_from_4
 | |
| 		(page + offset + MAGIC_SZ + 2 + iv_length + 4);
 | |
| 
 | |
| 	fil_encryption_t encryption = (fil_encryption_t)mach_read_from_1(
 | |
| 		page + offset + MAGIC_SZ + 2 + iv_length + 8);
 | |
| 
 | |
| 	crypt_data = fil_space_create_crypt_data(encryption, key_id);
 | |
| 	/* We need to overwrite these as above function will initialize
 | |
| 	members */
 | |
| 	crypt_data->type = type;
 | |
| 	crypt_data->min_key_version = min_key_version;
 | |
| 	memcpy(crypt_data->iv, page + offset + MAGIC_SZ + 2, iv_length);
 | |
| 
 | |
| 	return crypt_data;
 | |
| }
 | |
| 
 | |
| /******************************************************************
 | |
| Free a crypt data object
 | |
| @param[in,out] crypt_data	crypt data to be freed */
 | |
| void fil_space_destroy_crypt_data(fil_space_crypt_t **crypt_data)
 | |
| {
 | |
| 	if (crypt_data != NULL && (*crypt_data) != NULL) {
 | |
| 		fil_space_crypt_t* c;
 | |
| 		if (UNIV_LIKELY(fil_crypt_threads_inited)) {
 | |
| 			mysql_mutex_lock(&fil_crypt_threads_mutex);
 | |
| 			c = *crypt_data;
 | |
| 			*crypt_data = NULL;
 | |
| 			mysql_mutex_unlock(&fil_crypt_threads_mutex);
 | |
| 		} else {
 | |
| 			ut_ad(srv_read_only_mode || !srv_was_started);
 | |
| 			c = *crypt_data;
 | |
| 			*crypt_data = NULL;
 | |
| 		}
 | |
| 		if (c) {
 | |
| 			c->~fil_space_crypt_t();
 | |
| 			ut_free(c);
 | |
| 		}
 | |
| 	}
 | |
| }
 | |
| 
 | |
| /** Amend encryption information from redo log.
 | |
| @param[in]	space	tablespace
 | |
| @param[in]	data	encryption metadata */
 | |
| void fil_crypt_parse(fil_space_t* space, const byte* data)
 | |
| {
 | |
| 	ut_ad(data[1] == MY_AES_BLOCK_SIZE);
 | |
| 	if (void* buf = ut_zalloc_nokey(sizeof(fil_space_crypt_t))) {
 | |
| 		fil_space_crypt_t* crypt_data = new(buf)
 | |
| 			fil_space_crypt_t(
 | |
| 				data[0],
 | |
| 				mach_read_from_4(&data[2 + MY_AES_BLOCK_SIZE]),
 | |
| 				mach_read_from_4(&data[6 + MY_AES_BLOCK_SIZE]),
 | |
| 				static_cast<fil_encryption_t>
 | |
| 				(data[10 + MY_AES_BLOCK_SIZE]));
 | |
| 		memcpy(crypt_data->iv, data + 2, MY_AES_BLOCK_SIZE);
 | |
| 		mysql_mutex_lock(&fil_system.mutex);
 | |
| 		if (space->crypt_data) {
 | |
| 			fil_space_merge_crypt_data(space->crypt_data,
 | |
| 						   crypt_data);
 | |
| 			fil_space_destroy_crypt_data(&crypt_data);
 | |
| 			crypt_data = space->crypt_data;
 | |
| 		} else {
 | |
| 			space->crypt_data = crypt_data;
 | |
| 		}
 | |
| 		mysql_mutex_unlock(&fil_system.mutex);
 | |
| 	}
 | |
| }
 | |
| 
 | |
| /** Write encryption metadata to the first page.
 | |
| @param[in,out]	block	first page of the tablespace
 | |
| @param[in,out]	mtr	mini-transaction */
 | |
| void fil_space_crypt_t::write_page0(buf_block_t* block, mtr_t* mtr)
 | |
| {
 | |
| 	const ulint offset = FSP_HEADER_OFFSET
 | |
| 		+ fsp_header_get_encryption_offset(block->zip_size());
 | |
| 	byte* b = block->page.frame + offset;
 | |
| 
 | |
| 	mtr->memcpy<mtr_t::MAYBE_NOP>(*block, b, CRYPT_MAGIC, MAGIC_SZ);
 | |
| 
 | |
| 	b += MAGIC_SZ;
 | |
| 	byte* const start = b;
 | |
| 	*b++ = static_cast<byte>(type);
 | |
| 	compile_time_assert(sizeof iv == MY_AES_BLOCK_SIZE);
 | |
| 	compile_time_assert(sizeof iv == CRYPT_SCHEME_1_IV_LEN);
 | |
| 	*b++ = sizeof iv;
 | |
| 	memcpy(b, iv, sizeof iv);
 | |
| 	b += sizeof iv;
 | |
| 	mach_write_to_4(b, min_key_version);
 | |
| 	b += 4;
 | |
| 	mach_write_to_4(b, key_id);
 | |
| 	b += 4;
 | |
| 	*b++ = byte(encryption);
 | |
| 	ut_ad(b - start == 11 + MY_AES_BLOCK_SIZE);
 | |
| 	/* We must log also any unchanged bytes, because recovery will
 | |
| 	invoke fil_crypt_parse() based on this log record. */
 | |
| 	mtr->memcpy(*block, offset + MAGIC_SZ, b - start);
 | |
| }
 | |
| 
 | |
| /** Encrypt a buffer for non full checksum.
 | |
| @param[in,out]		crypt_data		Crypt data
 | |
| @param[in]		space			space_id
 | |
| @param[in]		offset			Page offset
 | |
| @param[in]		lsn			Log sequence number
 | |
| @param[in]		src_frame		Page to encrypt
 | |
| @param[in]		zip_size		ROW_FORMAT=COMPRESSED
 | |
| 						page size, or 0
 | |
| @param[in,out]		dst_frame		Output buffer
 | |
| @return encrypted buffer or NULL */
 | |
| static byte* fil_encrypt_buf_for_non_full_checksum(
 | |
| 	fil_space_crypt_t*	crypt_data,
 | |
| 	ulint			space,
 | |
| 	ulint			offset,
 | |
| 	lsn_t			lsn,
 | |
| 	const byte*		src_frame,
 | |
| 	ulint			zip_size,
 | |
| 	byte*			dst_frame)
 | |
| {
 | |
| 	uint size = uint(zip_size ? zip_size : srv_page_size);
 | |
| 	uint key_version = fil_crypt_get_latest_key_version(crypt_data);
 | |
| 	ut_a(key_version != ENCRYPTION_KEY_VERSION_INVALID);
 | |
| 	ut_ad(!ut_align_offset(src_frame, 8));
 | |
| 	ut_ad(!ut_align_offset(dst_frame, 8));
 | |
| 
 | |
| 	const bool page_compressed = fil_page_get_type(src_frame)
 | |
| 		== FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED;
 | |
| 	uint header_len = FIL_PAGE_DATA;
 | |
| 
 | |
| 	if (page_compressed) {
 | |
| 		header_len += FIL_PAGE_ENCRYPT_COMP_METADATA_LEN;
 | |
| 	}
 | |
| 
 | |
| 	/* FIL page header is not encrypted */
 | |
| 	memcpy(dst_frame, src_frame, header_len);
 | |
| 	mach_write_to_4(dst_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION,
 | |
| 			key_version);
 | |
| 
 | |
| 	/* Calculate the start offset in a page */
 | |
| 	uint		unencrypted_bytes = header_len + FIL_PAGE_DATA_END;
 | |
| 	uint		srclen = size - unencrypted_bytes;
 | |
| 	const byte*	src = src_frame + header_len;
 | |
| 	byte*		dst = dst_frame + header_len;
 | |
| 
 | |
| 	if (page_compressed) {
 | |
| 		srclen = mach_read_from_2(src_frame + FIL_PAGE_DATA);
 | |
| 	}
 | |
| 	uint dstlen = srclen;
 | |
| 
 | |
| 	int rc = encryption_scheme_encrypt(src, srclen, dst, &dstlen,
 | |
| 					   crypt_data, key_version,
 | |
| 					   (uint32)space, (uint32)offset, lsn);
 | |
| 	ut_a(rc == MY_AES_OK);
 | |
| 	ut_a(dstlen == srclen);
 | |
| 
 | |
| 	/* For compressed tables we do not store the FIL header because
 | |
| 	the whole page is not stored to the disk. In compressed tables only
 | |
| 	the FIL header + compressed (and now encrypted) payload alligned
 | |
| 	to sector boundary is written. */
 | |
| 	if (!page_compressed) {
 | |
| 		/* FIL page trailer is also not encrypted */
 | |
| 		static_assert(FIL_PAGE_DATA_END == 8, "alignment");
 | |
| 		memcpy_aligned<8>(dst_frame + size - FIL_PAGE_DATA_END,
 | |
| 				  src_frame + size - FIL_PAGE_DATA_END, 8);
 | |
| 	} else {
 | |
| 		/* Clean up rest of buffer */
 | |
| 		memset(dst_frame+header_len+srclen, 0,
 | |
| 		       size - (header_len + srclen));
 | |
| 	}
 | |
| 
 | |
| 	/* store the post-encryption checksum after the key-version */
 | |
| 	mach_write_to_4(dst_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION + 4,
 | |
| 			zip_size
 | |
| 			? page_zip_calc_checksum(dst_frame, zip_size,
 | |
| 						 SRV_CHECKSUM_ALGORITHM_CRC32)
 | |
| 			: buf_calc_page_crc32(dst_frame));
 | |
| 
 | |
| 	ut_ad(fil_space_verify_crypt_checksum(dst_frame, zip_size));
 | |
| 
 | |
| 	srv_stats.pages_encrypted.inc();
 | |
| 
 | |
| 	return dst_frame;
 | |
| }
 | |
| 
 | |
| /** Encrypt a buffer for full checksum format.
 | |
| @param[in,out]		crypt_data		Crypt data
 | |
| @param[in]		space			space_id
 | |
| @param[in]		offset			Page offset
 | |
| @param[in]		lsn			Log sequence number
 | |
| @param[in]		src_frame		Page to encrypt
 | |
| @param[in,out]		dst_frame		Output buffer
 | |
| @return encrypted buffer or NULL */
 | |
| static byte* fil_encrypt_buf_for_full_crc32(
 | |
| 	fil_space_crypt_t*	crypt_data,
 | |
| 	ulint			space,
 | |
| 	ulint			offset,
 | |
| 	lsn_t			lsn,
 | |
| 	const byte*		src_frame,
 | |
| 	byte*			dst_frame)
 | |
| {
 | |
| 	uint key_version = fil_crypt_get_latest_key_version(crypt_data);
 | |
| 	ut_d(bool corrupted = false);
 | |
| 	const uint size = buf_page_full_crc32_size(src_frame, NULL,
 | |
| #ifdef UNIV_DEBUG
 | |
| 						   &corrupted
 | |
| #else
 | |
| 						   NULL
 | |
| #endif
 | |
| 						   );
 | |
| 	ut_ad(!corrupted);
 | |
| 	uint srclen = size - (FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION
 | |
| 			      + FIL_PAGE_FCRC32_CHECKSUM);
 | |
| 	const byte* src = src_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION;
 | |
| 	byte* dst = dst_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION;
 | |
| 	uint dstlen = srclen;
 | |
| 
 | |
| 	ut_a(key_version != ENCRYPTION_KEY_VERSION_INVALID);
 | |
| 
 | |
| 	/* Till FIL_PAGE_LSN, page is not encrypted */
 | |
| 	memcpy(dst_frame, src_frame, FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION);
 | |
| 
 | |
| 	/* Write key version to the page. */
 | |
| 	mach_write_to_4(dst_frame + FIL_PAGE_FCRC32_KEY_VERSION, key_version);
 | |
| 
 | |
| 	int rc = encryption_scheme_encrypt(src, srclen, dst, &dstlen,
 | |
| 					   crypt_data, key_version,
 | |
| 					   uint(space), uint(offset), lsn);
 | |
| 	ut_a(rc == MY_AES_OK);
 | |
| 	ut_a(dstlen == srclen);
 | |
| 
 | |
| 	const ulint payload = size - FIL_PAGE_FCRC32_CHECKSUM;
 | |
| 	mach_write_to_4(dst_frame + payload, my_crc32c(0, dst_frame, payload));
 | |
| 	/* Clean the rest of the buffer. FIXME: Punch holes when writing! */
 | |
| 	memset(dst_frame + (payload + 4), 0, srv_page_size - (payload + 4));
 | |
| 
 | |
| 	srv_stats.pages_encrypted.inc();
 | |
| 
 | |
| 	return dst_frame;
 | |
| }
 | |
| 
 | |
| /** Encrypt a buffer.
 | |
| @param[in,out]		crypt_data		Crypt data
 | |
| @param[in]		space			space_id
 | |
| @param[in]		offset			Page offset
 | |
| @param[in]		src_frame		Page to encrypt
 | |
| @param[in]		zip_size		ROW_FORMAT=COMPRESSED
 | |
| 						page size, or 0
 | |
| @param[in,out]		dst_frame		Output buffer
 | |
| @param[in]		use_full_checksum	full crc32 algo is used
 | |
| @return encrypted buffer or NULL */
 | |
| byte* fil_encrypt_buf(
 | |
| 	fil_space_crypt_t*	crypt_data,
 | |
| 	ulint			space,
 | |
| 	ulint			offset,
 | |
| 	const byte*		src_frame,
 | |
| 	ulint			zip_size,
 | |
| 	byte*			dst_frame,
 | |
| 	bool			use_full_checksum)
 | |
| {
 | |
| 	const lsn_t lsn = mach_read_from_8(src_frame + FIL_PAGE_LSN);
 | |
| 
 | |
| 	if (use_full_checksum) {
 | |
| 		ut_ad(!zip_size);
 | |
| 		return fil_encrypt_buf_for_full_crc32(
 | |
| 			crypt_data, space, offset,
 | |
| 			lsn, src_frame, dst_frame);
 | |
| 	}
 | |
| 
 | |
| 	return fil_encrypt_buf_for_non_full_checksum(
 | |
| 		crypt_data, space, offset, lsn,
 | |
| 		src_frame, zip_size, dst_frame);
 | |
| }
 | |
| 
 | |
| /** Check whether these page types are allowed to encrypt.
 | |
| @param[in]	space		tablespace object
 | |
| @param[in]	src_frame	source page
 | |
| @return true if it is valid page type */
 | |
| static bool fil_space_encrypt_valid_page_type(
 | |
| 	const fil_space_t*	space,
 | |
| 	const byte*		src_frame)
 | |
| {
 | |
| 	switch (fil_page_get_type(src_frame)) {
 | |
| 	case FIL_PAGE_RTREE:
 | |
| 		return space->full_crc32();
 | |
| 	case FIL_PAGE_TYPE_FSP_HDR:
 | |
| 	case FIL_PAGE_TYPE_XDES:
 | |
| 		return false;
 | |
| 	}
 | |
| 
 | |
| 	return true;
 | |
| }
 | |
| 
 | |
| /******************************************************************
 | |
| Encrypt a page
 | |
| 
 | |
| @param[in]		space		Tablespace
 | |
| @param[in]		offset		Page offset
 | |
| @param[in]		src_frame	Page to encrypt
 | |
| @param[in,out]		dst_frame	Output buffer
 | |
| @return encrypted buffer or NULL */
 | |
| byte* fil_space_encrypt(
 | |
| 	const fil_space_t*	space,
 | |
| 	ulint			offset,
 | |
| 	byte*			src_frame,
 | |
| 	byte*			dst_frame)
 | |
| {
 | |
| 	if (!fil_space_encrypt_valid_page_type(space, src_frame)) {
 | |
| 		return src_frame;
 | |
| 	}
 | |
| 
 | |
| 	if (!space->crypt_data || !space->crypt_data->is_encrypted()) {
 | |
| 		return (src_frame);
 | |
| 	}
 | |
| 
 | |
| 	ut_ad(space->referenced());
 | |
| 
 | |
| 	return fil_encrypt_buf(space->crypt_data, space->id, offset,
 | |
| 			       src_frame, space->zip_size(),
 | |
| 			       dst_frame, space->full_crc32());
 | |
| }
 | |
| 
 | |
| /** Decrypt a page for full checksum format.
 | |
| @param[in]	space			space id
 | |
| @param[in]	crypt_data		crypt_data
 | |
| @param[in]	tmp_frame		Temporary buffer
 | |
| @param[in,out]	src_frame		Page to decrypt
 | |
| @return DB_SUCCESS or error */
 | |
| static dberr_t fil_space_decrypt_full_crc32(
 | |
| 	ulint			space,
 | |
| 	fil_space_crypt_t*	crypt_data,
 | |
| 	byte*			tmp_frame,
 | |
| 	byte*			src_frame)
 | |
| {
 | |
| 	uint key_version = mach_read_from_4(
 | |
| 		src_frame + FIL_PAGE_FCRC32_KEY_VERSION);
 | |
| 	lsn_t lsn = mach_read_from_8(src_frame + FIL_PAGE_LSN);
 | |
| 	uint offset = mach_read_from_4(src_frame + FIL_PAGE_OFFSET);
 | |
| 
 | |
| 	ut_ad(key_version != ENCRYPTION_KEY_NOT_ENCRYPTED);
 | |
| 
 | |
| 	memcpy(tmp_frame, src_frame, FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION);
 | |
| 
 | |
| 	/* Calculate the offset where decryption starts */
 | |
| 	const byte* src = src_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION;
 | |
| 	byte* dst = tmp_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION;
 | |
| 	bool corrupted = false;
 | |
| 	uint size = buf_page_full_crc32_size(src_frame, NULL, &corrupted);
 | |
| 	if (UNIV_UNLIKELY(corrupted)) {
 | |
| 		return DB_DECRYPTION_FAILED;
 | |
| 	}
 | |
| 
 | |
| 	uint srclen = size - (FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION
 | |
| 			      + FIL_PAGE_FCRC32_CHECKSUM);
 | |
| 	uint dstlen = srclen;
 | |
| 
 | |
| 	int rc = encryption_scheme_decrypt(src, srclen, dst, &dstlen,
 | |
| 					   crypt_data, key_version,
 | |
| 					   (uint) space, offset, lsn);
 | |
| 
 | |
| 	if (rc != MY_AES_OK || dstlen != srclen) {
 | |
| 		return DB_DECRYPTION_FAILED;
 | |
| 	}
 | |
| 
 | |
| 	/* Copy only checksum part in the trailer */
 | |
| 	memcpy(tmp_frame + srv_page_size - FIL_PAGE_FCRC32_CHECKSUM,
 | |
| 	       src_frame + srv_page_size - FIL_PAGE_FCRC32_CHECKSUM,
 | |
| 	       FIL_PAGE_FCRC32_CHECKSUM);
 | |
| 
 | |
| 	srv_stats.pages_decrypted.inc();
 | |
| 
 | |
| 	return DB_SUCCESS; /* page was decrypted */
 | |
| }
 | |
| 
 | |
| /** Decrypt a page for non full checksum format.
 | |
| @param[in]	crypt_data		crypt_data
 | |
| @param[in]	tmp_frame		Temporary buffer
 | |
| @param[in]	physical_size		page size
 | |
| @param[in,out]	src_frame		Page to decrypt
 | |
| @return DB_SUCCESS or error */
 | |
| static dberr_t fil_space_decrypt_for_non_full_checksum(
 | |
| 	fil_space_crypt_t*	crypt_data,
 | |
| 	byte*			tmp_frame,
 | |
| 	ulint			physical_size,
 | |
| 	byte*			src_frame)
 | |
| {
 | |
| 	uint key_version = mach_read_from_4(
 | |
| 			src_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION);
 | |
| 	bool page_compressed = (fil_page_get_type(src_frame)
 | |
| 				== FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED);
 | |
| 	uint offset = mach_read_from_4(src_frame + FIL_PAGE_OFFSET);
 | |
| 	uint space = mach_read_from_4(
 | |
| 			src_frame + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
 | |
| 	ib_uint64_t lsn = mach_read_from_8(src_frame + FIL_PAGE_LSN);
 | |
| 
 | |
| 	ut_ad(key_version != ENCRYPTION_KEY_NOT_ENCRYPTED);
 | |
| 
 | |
| 	/* read space & lsn */
 | |
| 	uint header_len = FIL_PAGE_DATA;
 | |
| 
 | |
| 	if (page_compressed) {
 | |
| 		header_len += FIL_PAGE_ENCRYPT_COMP_METADATA_LEN;
 | |
| 	}
 | |
| 
 | |
| 	/* Copy FIL page header, it is not encrypted */
 | |
| 	memcpy(tmp_frame, src_frame, header_len);
 | |
| 
 | |
| 	/* Calculate the offset where decryption starts */
 | |
| 	const byte* src = src_frame + header_len;
 | |
| 	byte* dst = tmp_frame + header_len;
 | |
| 	uint srclen = uint(physical_size) - header_len - FIL_PAGE_DATA_END;
 | |
| 	uint dstlen = srclen;
 | |
| 
 | |
| 	if (page_compressed) {
 | |
| 		srclen = mach_read_from_2(src_frame + FIL_PAGE_DATA);
 | |
| 	}
 | |
| 
 | |
| 	int rc = encryption_scheme_decrypt(src, srclen, dst, &dstlen,
 | |
| 					   crypt_data, key_version,
 | |
| 					   space, offset, lsn);
 | |
| 
 | |
| 	if (! ((rc == MY_AES_OK) && ((ulint) dstlen == srclen))) {
 | |
| 		return DB_DECRYPTION_FAILED;
 | |
| 	}
 | |
| 
 | |
| 	/* For compressed tables we do not store the FIL header because
 | |
| 	the whole page is not stored to the disk. In compressed tables only
 | |
| 	the FIL header + compressed (and now encrypted) payload alligned
 | |
| 	to sector boundary is written. */
 | |
| 	if (!page_compressed) {
 | |
| 		/* Copy FIL trailer */
 | |
| 		memcpy(tmp_frame + physical_size - FIL_PAGE_DATA_END,
 | |
| 		       src_frame + physical_size - FIL_PAGE_DATA_END,
 | |
| 		       FIL_PAGE_DATA_END);
 | |
| 	}
 | |
| 
 | |
| 	srv_stats.pages_decrypted.inc();
 | |
| 
 | |
| 	return DB_SUCCESS; /* page was decrypted */
 | |
| }
 | |
| 
 | |
| /** Decrypt a page.
 | |
| @param[in]	space_id		tablespace id
 | |
| @param[in]	fsp_flags		Tablespace flags
 | |
| @param[in]	crypt_data		crypt_data
 | |
| @param[in]	tmp_frame		Temporary buffer
 | |
| @param[in]	physical_size		page size
 | |
| @param[in,out]	src_frame		Page to decrypt
 | |
| @retval DB_SUCCESS on success
 | |
| @retval DB_DECRYPTION_FAILED on error */
 | |
| dberr_t
 | |
| fil_space_decrypt(
 | |
| 	uint32_t		space_id,
 | |
| 	uint32_t		fsp_flags,
 | |
| 	fil_space_crypt_t*	crypt_data,
 | |
| 	byte*			tmp_frame,
 | |
| 	ulint			physical_size,
 | |
| 	byte*			src_frame)
 | |
| {
 | |
| 	if (!crypt_data || !crypt_data->is_encrypted()) {
 | |
| 		return DB_DECRYPTION_FAILED;
 | |
| 	}
 | |
| 
 | |
| 	if (fil_space_t::full_crc32(fsp_flags)) {
 | |
| 		return fil_space_decrypt_full_crc32(
 | |
| 			space_id, crypt_data, tmp_frame, src_frame);
 | |
| 	}
 | |
| 
 | |
| 	return fil_space_decrypt_for_non_full_checksum(crypt_data, tmp_frame,
 | |
| 						       physical_size,
 | |
| 						       src_frame);
 | |
| }
 | |
| 
 | |
| /**
 | |
| Decrypt a page.
 | |
| @param[in]	space			Tablespace
 | |
| @param[in]	tmp_frame		Temporary buffer used for decrypting
 | |
| @param[in,out]	src_frame		Page to decrypt
 | |
| @return decrypted page, or original not encrypted page if decryption is
 | |
| not needed.
 | |
| @retval nullptr on failure */
 | |
| byte*
 | |
| fil_space_decrypt(
 | |
| 	const fil_space_t* space,
 | |
| 	byte*		tmp_frame,
 | |
| 	byte*		src_frame)
 | |
| {
 | |
| 	const ulint physical_size = space->physical_size();
 | |
| 
 | |
| 	ut_ad(space->referenced());
 | |
| 
 | |
| 	if (DB_SUCCESS != fil_space_decrypt(space->id, space->flags,
 | |
| 					    space->crypt_data,
 | |
| 					    tmp_frame, physical_size,
 | |
| 					    src_frame)) {
 | |
| 		return nullptr;
 | |
| 	}
 | |
| 
 | |
| 	/* Copy the decrypted page back to page buffer, not
 | |
| 	really any other options. */
 | |
| 	return static_cast<byte*>(memcpy(src_frame, tmp_frame, physical_size));
 | |
| }
 | |
| 
 | |
| /***********************************************************************/
 | |
| 
 | |
| /** A copy of global key state */
 | |
| struct key_state_t {
 | |
| 	key_state_t() : key_id(0), key_version(0),
 | |
| 			rotate_key_age(srv_fil_crypt_rotate_key_age) {}
 | |
| 	bool operator==(const key_state_t& other) const {
 | |
| 		return key_version == other.key_version &&
 | |
| 			rotate_key_age == other.rotate_key_age;
 | |
| 	}
 | |
| 	uint key_id;
 | |
| 	uint key_version;
 | |
| 	uint rotate_key_age;
 | |
| };
 | |
| 
 | |
| /***********************************************************************
 | |
| Copy global key state
 | |
| @param[in,out]	new_state	key state
 | |
| @param[in]	crypt_data	crypt data */
 | |
| static void
 | |
| fil_crypt_get_key_state(
 | |
| 	key_state_t*			new_state,
 | |
| 	fil_space_crypt_t*		crypt_data)
 | |
| {
 | |
| 	if (srv_encrypt_tables) {
 | |
| 		new_state->key_version = crypt_data->key_get_latest_version();
 | |
| 		new_state->rotate_key_age = srv_fil_crypt_rotate_key_age;
 | |
| 
 | |
| 		ut_a(new_state->key_version != ENCRYPTION_KEY_NOT_ENCRYPTED);
 | |
| 	} else {
 | |
| 		new_state->key_version = 0;
 | |
| 		new_state->rotate_key_age = 0;
 | |
| 	}
 | |
| }
 | |
| 
 | |
| /***********************************************************************
 | |
| Check if a key needs rotation given a key_state
 | |
| @param[in]	crypt_data		Encryption information
 | |
| @param[in]	key_version		Current key version
 | |
| @param[in]	latest_key_version	Latest key version
 | |
| @param[in]	rotate_key_age		when to rotate
 | |
| @return true if key needs rotation, false if not */
 | |
| static bool
 | |
| fil_crypt_needs_rotation(
 | |
| 	const fil_space_crypt_t*	crypt_data,
 | |
| 	uint				key_version,
 | |
| 	uint				latest_key_version,
 | |
| 	uint				rotate_key_age)
 | |
| {
 | |
| 	if (key_version == ENCRYPTION_KEY_VERSION_INVALID) {
 | |
| 		return false;
 | |
| 	}
 | |
| 
 | |
| 	if (key_version == 0 && latest_key_version != 0) {
 | |
| 		/* this is rotation unencrypted => encrypted
 | |
| 		* ignore rotate_key_age */
 | |
| 		return true;
 | |
| 	}
 | |
| 
 | |
| 	if (latest_key_version == 0 && key_version != 0) {
 | |
| 		if (crypt_data->encryption == FIL_ENCRYPTION_DEFAULT) {
 | |
| 			/* this is rotation encrypted => unencrypted */
 | |
| 			return true;
 | |
| 		}
 | |
| 		return false;
 | |
| 	}
 | |
| 
 | |
| 	if (crypt_data->encryption == FIL_ENCRYPTION_DEFAULT
 | |
| 	    && crypt_data->type == CRYPT_SCHEME_1
 | |
| 	    && !srv_encrypt_tables) {
 | |
| 		/* This is rotation encrypted => unencrypted */
 | |
| 		return true;
 | |
| 	}
 | |
| 
 | |
| 	if (rotate_key_age == 0) {
 | |
| 		return false;
 | |
| 	}
 | |
| 
 | |
| 	/* this is rotation encrypted => encrypted,
 | |
| 	* only reencrypt if key is sufficiently old */
 | |
| 	if (key_version + rotate_key_age < latest_key_version) {
 | |
| 		return true;
 | |
| 	}
 | |
| 
 | |
| 	return false;
 | |
| }
 | |
| 
 | |
| /** Read page 0 and possible crypt data from there.
 | |
| @param[in,out]	space		Tablespace */
 | |
| static inline void fil_crypt_read_crypt_data(fil_space_t *space)
 | |
| {
 | |
|   if (space->crypt_data || space->size || !space->get_size())
 | |
|     /* The encryption metadata has already been read, or the
 | |
|     tablespace is not encrypted and the file has been opened already,
 | |
|     or the file cannot be accessed, likely due to a concurrent DROP
 | |
|     (possibly as part of TRUNCATE or ALTER TABLE).
 | |
| 
 | |
|     FIXME: The file can become unaccessible any time after this check!
 | |
|     We should really remove this function and instead make crypt_data
 | |
|     an integral part of fil_space_t. */
 | |
|     return;
 | |
| 
 | |
|   const ulint zip_size= space->zip_size();
 | |
|   mtr_t mtr;
 | |
|   mtr.start();
 | |
|   if (buf_block_t* b= buf_page_get_gen(page_id_t{space->id, 0}, zip_size,
 | |
|                                        RW_S_LATCH, nullptr,
 | |
|                                        BUF_GET_POSSIBLY_FREED, &mtr))
 | |
|   {
 | |
|     mysql_mutex_lock(&fil_system.mutex);
 | |
|     if (!space->crypt_data && !space->is_stopping())
 | |
|       space->crypt_data= fil_space_read_crypt_data(zip_size, b->page.frame);
 | |
|     mysql_mutex_unlock(&fil_system.mutex);
 | |
|   }
 | |
|   mtr.commit();
 | |
| }
 | |
| 
 | |
| /** Start encrypting a space
 | |
| @param[in,out]		space		Tablespace
 | |
| @return true if a recheck of tablespace is needed by encryption thread. */
 | |
| static bool fil_crypt_start_encrypting_space(fil_space_t* space)
 | |
| {
 | |
| 	mysql_mutex_lock(&fil_crypt_threads_mutex);
 | |
| 
 | |
| 	fil_space_crypt_t *crypt_data = space->crypt_data;
 | |
| 
 | |
| 	/* If space is not encrypted and encryption is not enabled, then
 | |
| 	do not continue encrypting the space. */
 | |
| 	if (!crypt_data && !srv_encrypt_tables) {
 | |
| func_exit:
 | |
| 		mysql_mutex_unlock(&fil_crypt_threads_mutex);
 | |
| 		return false;
 | |
| 	}
 | |
| 
 | |
| 	const bool recheck = fil_crypt_start_converting;
 | |
| 
 | |
| 	if (recheck || crypt_data || space->is_stopping()) {
 | |
| 		mysql_mutex_unlock(&fil_crypt_threads_mutex);
 | |
| 		return recheck;
 | |
| 	}
 | |
| 
 | |
| 	/* NOTE: we need to write and flush page 0 before publishing
 | |
| 	* the crypt data. This so that after restart there is no
 | |
| 	* risk of finding encrypted pages without having
 | |
| 	* crypt data in page 0 */
 | |
| 
 | |
| 	/* 1 - create crypt data */
 | |
| 	crypt_data = fil_space_create_crypt_data(
 | |
| 		FIL_ENCRYPTION_DEFAULT, FIL_DEFAULT_ENCRYPTION_KEY);
 | |
| 
 | |
| 	if (!crypt_data) {
 | |
| 		goto func_exit;
 | |
| 	}
 | |
| 
 | |
| 	fil_crypt_start_converting = true;
 | |
| 	mysql_mutex_unlock(&fil_crypt_threads_mutex);
 | |
| 
 | |
| 	mtr_t mtr;
 | |
| 	mtr.start();
 | |
| 
 | |
| 	/* 2 - get page 0 */
 | |
| 	if (buf_block_t* block = buf_page_get_gen(
 | |
| 		    page_id_t(space->id, 0), space->zip_size(),
 | |
| 		    RW_X_LATCH, NULL, BUF_GET_POSSIBLY_FREED, &mtr)) {
 | |
| 		crypt_data->type = CRYPT_SCHEME_1;
 | |
| 		crypt_data->min_key_version = 0; // all pages are unencrypted
 | |
| 		crypt_data->rotate_state.start_time = time(0);
 | |
| 		crypt_data->rotate_state.starting = true;
 | |
| 		crypt_data->rotate_state.active_threads = 1;
 | |
| 
 | |
| 		mysql_mutex_lock(&fil_system.mutex);
 | |
| 		const bool stopping = space->is_stopping();
 | |
| 		if (!stopping) {
 | |
| 			space->crypt_data = crypt_data;
 | |
| 		}
 | |
| 		mysql_mutex_unlock(&fil_system.mutex);
 | |
| 
 | |
| 		if (stopping) {
 | |
| 			goto abort;
 | |
| 		}
 | |
| 
 | |
| 		/* 3 - write crypt data to page 0 */
 | |
| 		mtr.set_named_space(space);
 | |
| 		crypt_data->write_page0(block, &mtr);
 | |
| 
 | |
| 		mtr.commit();
 | |
| 
 | |
| 		/* 4 - sync tablespace before publishing crypt data */
 | |
| 		while (buf_flush_list_space(space));
 | |
| 
 | |
| 		/* 5 - publish crypt data */
 | |
| 		mysql_mutex_lock(&fil_crypt_threads_mutex);
 | |
| 		mysql_mutex_lock(&crypt_data->mutex);
 | |
| 		crypt_data->type = CRYPT_SCHEME_1;
 | |
| 		ut_a(crypt_data->rotate_state.active_threads == 1);
 | |
| 		crypt_data->rotate_state.active_threads = 0;
 | |
| 		crypt_data->rotate_state.starting = false;
 | |
| 
 | |
| 		fil_crypt_start_converting = false;
 | |
| 		mysql_mutex_unlock(&fil_crypt_threads_mutex);
 | |
| 		mysql_mutex_unlock(&crypt_data->mutex);
 | |
| 
 | |
| 		return false;
 | |
| 	}
 | |
| 
 | |
| abort:
 | |
| 	mtr.commit();
 | |
| 	mysql_mutex_lock(&fil_crypt_threads_mutex);
 | |
| 	fil_crypt_start_converting = false;
 | |
| 	mysql_mutex_unlock(&fil_crypt_threads_mutex);
 | |
| 
 | |
| 	crypt_data->~fil_space_crypt_t();
 | |
| 	ut_free(crypt_data);
 | |
| 	return false;
 | |
| }
 | |
| 
 | |
| /** State of a rotation thread */
 | |
| struct rotate_thread_t {
 | |
|   explicit rotate_thread_t(uint no) : thread_no(no) {}
 | |
| 
 | |
|   uint thread_no;
 | |
|   bool first = true;              /*!< is position before first space */
 | |
|   space_list_t::iterator space
 | |
|     = fil_system.space_list.end();/*!< current space or .end() */
 | |
|   uint32_t offset = 0;            /*!< current page number */
 | |
|   ulint batch = 0;                /*!< #pages to rotate */
 | |
|   uint min_key_version_found = 0; /*!< min key version found but not rotated */
 | |
|   lsn_t end_lsn = 0;              /*!< max lsn when rotating this space */
 | |
| 
 | |
|   uint estimated_max_iops = 20;/*!< estimation of max iops */
 | |
|   uint allocated_iops = 0;     /*!< allocated iops */
 | |
|   ulint cnt_waited = 0;	       /*!< #times waited during this slot */
 | |
|   uintmax_t sum_waited_us = 0; /*!< wait time during this slot */
 | |
| 
 | |
| 	fil_crypt_stat_t crypt_stat; // statistics
 | |
| 
 | |
| 	/** @return whether this thread should terminate */
 | |
| 	bool should_shutdown() const {
 | |
| 		mysql_mutex_assert_owner(&fil_crypt_threads_mutex);
 | |
| 		switch (srv_shutdown_state) {
 | |
| 		case SRV_SHUTDOWN_NONE:
 | |
| 			return thread_no >= srv_n_fil_crypt_threads;
 | |
| 		case SRV_SHUTDOWN_EXIT_THREADS:
 | |
| 			/* srv_init_abort() must have been invoked */
 | |
| 		case SRV_SHUTDOWN_CLEANUP:
 | |
| 		case SRV_SHUTDOWN_INITIATED:
 | |
| 			return true;
 | |
| 		case SRV_SHUTDOWN_LAST_PHASE:
 | |
| 			break;
 | |
| 		}
 | |
| 		ut_ad(0);
 | |
| 		return true;
 | |
| 	}
 | |
| };
 | |
| 
 | |
| /** Avoid the removal of the tablespace from
 | |
| default_encrypt_list only when
 | |
| 1) Another active encryption thread working on tablespace
 | |
| 2) Eligible for tablespace key rotation
 | |
| 3) Tablespace is in flushing phase
 | |
| @return true if tablespace should be removed from
 | |
| default encrypt */
 | |
| static bool fil_crypt_must_remove(const fil_space_t &space)
 | |
| {
 | |
|   ut_ad(!space.is_temporary());
 | |
|   ut_ad(!space.is_being_imported());
 | |
|   fil_space_crypt_t *crypt_data = space.crypt_data;
 | |
|   mysql_mutex_assert_owner(&fil_system.mutex);
 | |
|   const ulong encrypt_tables= srv_encrypt_tables;
 | |
|   if (!crypt_data)
 | |
|     return !encrypt_tables;
 | |
|   if (!crypt_data->is_key_found())
 | |
|     return true;
 | |
| 
 | |
|   mysql_mutex_lock(&crypt_data->mutex);
 | |
|   const bool remove= (space.is_stopping() || crypt_data->not_encrypted()) &&
 | |
|     (!crypt_data->rotate_state.flushing &&
 | |
|      !encrypt_tables == !!crypt_data->min_key_version &&
 | |
|      !crypt_data->rotate_state.active_threads);
 | |
|   mysql_mutex_unlock(&crypt_data->mutex);
 | |
|   return remove;
 | |
| }
 | |
| 
 | |
| /***********************************************************************
 | |
| Check if space needs rotation given a key_state
 | |
| @param[in,out]		state		Key rotation state
 | |
| @param[in,out]		key_state	Key state
 | |
| @param[in,out]		recheck		needs recheck ?
 | |
| @return true if space needs key rotation */
 | |
| static
 | |
| bool
 | |
| fil_crypt_space_needs_rotation(
 | |
| 	rotate_thread_t*	state,
 | |
| 	key_state_t*		key_state,
 | |
| 	bool*			recheck)
 | |
| {
 | |
| 	mysql_mutex_assert_not_owner(&fil_crypt_threads_mutex);
 | |
| 
 | |
| 	fil_space_t* space = &*state->space;
 | |
| 
 | |
| 	ut_ad(space->referenced());
 | |
| 	ut_ad(!space->is_temporary());
 | |
| 	ut_ad(!space->is_being_imported());
 | |
| 
 | |
| 	fil_space_crypt_t *crypt_data = space->crypt_data;
 | |
| 
 | |
| 	if (crypt_data == NULL) {
 | |
| 		/**
 | |
| 		* space has no crypt data
 | |
| 		*   start encrypting it...
 | |
| 		*/
 | |
| 		*recheck = fil_crypt_start_encrypting_space(space);
 | |
| 		crypt_data = space->crypt_data;
 | |
| 
 | |
| 		if (crypt_data == NULL) {
 | |
| 			return false;
 | |
| 		}
 | |
| 
 | |
| 		crypt_data->key_get_latest_version();
 | |
| 	}
 | |
| 
 | |
| 	/* If used key_id is not found from encryption plugin we can't
 | |
| 	continue to rotate the tablespace */
 | |
| 	if (!crypt_data->is_key_found()) {
 | |
| 		return false;
 | |
| 	}
 | |
| 
 | |
| 	bool need_key_rotation = false;
 | |
| 
 | |
| 	mysql_mutex_lock(&crypt_data->mutex);
 | |
| 
 | |
| 	do {
 | |
| 		/* prevent threads from starting to rotate space */
 | |
| 		if (crypt_data->rotate_state.starting) {
 | |
| 			/* recheck this space later */
 | |
| 			*recheck = true;
 | |
| 			break;
 | |
| 		}
 | |
| 
 | |
| 		/* prevent threads from starting to rotate space */
 | |
| 		if (space->is_stopping()) {
 | |
| 			break;
 | |
| 		}
 | |
| 
 | |
| 		if (crypt_data->rotate_state.flushing) {
 | |
| 			break;
 | |
| 		}
 | |
| 
 | |
| 		/* No need to rotate space if encryption is disabled */
 | |
| 		if (crypt_data->not_encrypted()) {
 | |
| 			break;
 | |
| 		}
 | |
| 
 | |
| 		if (crypt_data->key_id != key_state->key_id) {
 | |
| 			key_state->key_id= crypt_data->key_id;
 | |
| 			fil_crypt_get_key_state(key_state, crypt_data);
 | |
| 		}
 | |
| 
 | |
| 		need_key_rotation = fil_crypt_needs_rotation(
 | |
| 			crypt_data,
 | |
| 			crypt_data->min_key_version,
 | |
| 			key_state->key_version,
 | |
| 			key_state->rotate_key_age);
 | |
| 	} while (0);
 | |
| 
 | |
| 	mysql_mutex_unlock(&crypt_data->mutex);
 | |
| 	return need_key_rotation;
 | |
| }
 | |
| 
 | |
| /***********************************************************************
 | |
| Update global statistics with thread statistics
 | |
| @param[in,out]	state		key rotation statistics */
 | |
| static void
 | |
| fil_crypt_update_total_stat(
 | |
| 	rotate_thread_t *state)
 | |
| {
 | |
| 	mysql_mutex_lock(&crypt_stat_mutex);
 | |
| 	crypt_stat.pages_read_from_cache +=
 | |
| 		state->crypt_stat.pages_read_from_cache;
 | |
| 	crypt_stat.pages_read_from_disk +=
 | |
| 		state->crypt_stat.pages_read_from_disk;
 | |
| 	crypt_stat.pages_modified += state->crypt_stat.pages_modified;
 | |
| 	crypt_stat.pages_flushed += state->crypt_stat.pages_flushed;
 | |
| 	// remote old estimate
 | |
| 	crypt_stat.estimated_iops -= state->crypt_stat.estimated_iops;
 | |
| 	// add new estimate
 | |
| 	crypt_stat.estimated_iops += state->estimated_max_iops;
 | |
| 	mysql_mutex_unlock(&crypt_stat_mutex);
 | |
| 
 | |
| 	// make new estimate "current" estimate
 | |
| 	state->crypt_stat.pages_read_from_cache = 0;
 | |
| 	state->crypt_stat.pages_read_from_disk = 0;
 | |
| 	state->crypt_stat.pages_modified = 0;
 | |
| 	state->crypt_stat.pages_flushed = 0;
 | |
| 	// record our old (current) estimate
 | |
| 	state->crypt_stat.estimated_iops = state->estimated_max_iops;
 | |
| }
 | |
| 
 | |
| /***********************************************************************
 | |
| Allocate iops to thread from global setting,
 | |
| used before starting to rotate a space.
 | |
| @param[in,out]		state		Rotation state
 | |
| @return true if allocation succeeded, false if failed */
 | |
| static bool fil_crypt_alloc_iops(rotate_thread_t *state)
 | |
| {
 | |
| 	mysql_mutex_assert_owner(&fil_crypt_threads_mutex);
 | |
| 	ut_ad(state->allocated_iops == 0);
 | |
| 
 | |
| 	/* We have not yet selected the space to rotate, thus
 | |
| 	state might not contain space and we can't check
 | |
| 	its status yet. */
 | |
| 
 | |
| 	uint max_iops = state->estimated_max_iops;
 | |
| 
 | |
| 	if (n_fil_crypt_iops_allocated >= srv_n_fil_crypt_iops) {
 | |
| wait:
 | |
| 		my_cond_wait(&fil_crypt_threads_cond,
 | |
| 			     &fil_crypt_threads_mutex.m_mutex);
 | |
| 		return false;
 | |
| 	}
 | |
| 
 | |
| 	uint alloc = srv_n_fil_crypt_iops - n_fil_crypt_iops_allocated;
 | |
| 
 | |
| 	if (alloc > max_iops) {
 | |
| 		alloc = max_iops;
 | |
| 	}
 | |
| 
 | |
| 	if (!alloc) {
 | |
| 		goto wait;
 | |
| 	}
 | |
| 
 | |
| 	n_fil_crypt_iops_allocated += alloc;
 | |
| 
 | |
| 	state->allocated_iops = alloc;
 | |
| 	return true;
 | |
| }
 | |
| 
 | |
| /**
 | |
| Reallocate iops to thread when processing a tablespace
 | |
| @param[in,out]		state		Rotation state
 | |
| @return whether the thread should continue running */
 | |
| static bool fil_crypt_realloc_iops(rotate_thread_t *state)
 | |
| {
 | |
| 	ut_a(state->allocated_iops > 0);
 | |
| 
 | |
| 	if (10 * state->cnt_waited > state->batch) {
 | |
| 		/* if we waited more than 10% re-estimate max_iops */
 | |
| 		ulint avg_wait_time_us =
 | |
| 			ulint(state->sum_waited_us / state->cnt_waited);
 | |
| 
 | |
| 		if (avg_wait_time_us == 0) {
 | |
| 			avg_wait_time_us = 1; // prevent division by zero
 | |
| 		}
 | |
| 
 | |
| 		DBUG_PRINT("ib_crypt",
 | |
| 			("thr_no: %u - update estimated_max_iops from %u to "
 | |
| 			 ULINTPF ".",
 | |
| 			state->thread_no,
 | |
| 			state->estimated_max_iops,
 | |
| 			1000000 / avg_wait_time_us));
 | |
| 
 | |
| 		state->estimated_max_iops = std::max(
 | |
| 			1U, uint(1000000 / avg_wait_time_us));
 | |
| 		state->cnt_waited = 0;
 | |
| 		state->sum_waited_us = 0;
 | |
| 	} else {
 | |
| 		DBUG_PRINT("ib_crypt",
 | |
| 			   ("thr_no: %u only waited " ULINTPF
 | |
| 			    "%% skip re-estimate.",
 | |
| 			    state->thread_no,
 | |
| 			    (100 * state->cnt_waited)
 | |
| 			    / (state->batch ? state->batch : 1)));
 | |
| 	}
 | |
| 
 | |
| 	ut_ad(state->estimated_max_iops);
 | |
| 
 | |
| 	mysql_mutex_lock(&fil_crypt_threads_mutex);
 | |
| 
 | |
| 	if (state->should_shutdown()) {
 | |
| 		mysql_mutex_unlock(&fil_crypt_threads_mutex);
 | |
| 		return false;
 | |
| 	}
 | |
| 
 | |
| 	if (state->allocated_iops > state->estimated_max_iops) {
 | |
| 		/* release iops */
 | |
| 		uint extra = state->allocated_iops - state->estimated_max_iops;
 | |
| 		state->allocated_iops = state->estimated_max_iops;
 | |
| 		ut_ad(n_fil_crypt_iops_allocated >= extra);
 | |
| 		n_fil_crypt_iops_allocated -= extra;
 | |
| 		pthread_cond_broadcast(&fil_crypt_threads_cond);
 | |
| 	} else if (srv_n_fil_crypt_iops > n_fil_crypt_iops_allocated) {
 | |
| 		/* there are extra iops free */
 | |
| 		uint add = srv_n_fil_crypt_iops - n_fil_crypt_iops_allocated;
 | |
| 		if (state->allocated_iops + add > state->estimated_max_iops) {
 | |
| 			/* but don't alloc more than our max */
 | |
| 			add= state->estimated_max_iops - state->allocated_iops;
 | |
| 		}
 | |
| 		n_fil_crypt_iops_allocated += add;
 | |
| 		state->allocated_iops += add;
 | |
| 
 | |
| 		DBUG_PRINT("ib_crypt",
 | |
| 			   ("thr_no: %u increased iops from %u to %u.",
 | |
| 			    state->thread_no,
 | |
| 			    state->allocated_iops - add,
 | |
| 			    state->allocated_iops));
 | |
| 	}
 | |
| 
 | |
| 	fil_crypt_update_total_stat(state);
 | |
| 	mysql_mutex_unlock(&fil_crypt_threads_mutex);
 | |
| 	return true;
 | |
| }
 | |
| 
 | |
| /** Release excess allocated iops
 | |
| @param state   rotation state
 | |
| @param wake    whether to wake up other threads */
 | |
| static void fil_crypt_return_iops(rotate_thread_t *state, bool wake= true)
 | |
| {
 | |
|   mysql_mutex_assert_owner(&fil_crypt_threads_mutex);
 | |
| 
 | |
|   if (uint iops= state->allocated_iops)
 | |
|   {
 | |
|     ut_ad(n_fil_crypt_iops_allocated >= iops);
 | |
|     n_fil_crypt_iops_allocated-= iops;
 | |
|     state->allocated_iops= 0;
 | |
|     if (wake)
 | |
|       pthread_cond_broadcast(&fil_crypt_threads_cond);
 | |
|   }
 | |
| 
 | |
|   fil_crypt_update_total_stat(state);
 | |
| }
 | |
| 
 | |
| /** Acquire a tablespace reference.
 | |
| @return whether a tablespace reference was successfully acquired */
 | |
| inline bool fil_space_t::acquire_if_not_stopped()
 | |
| {
 | |
|   mysql_mutex_assert_owner(&fil_system.mutex);
 | |
|   const uint32_t n= acquire_low();
 | |
|   if (UNIV_LIKELY(!(n & (STOPPING | CLOSING))))
 | |
|     return true;
 | |
|   if (UNIV_UNLIKELY(n & STOPPING))
 | |
|     return false;
 | |
|   return UNIV_LIKELY(!(n & CLOSING)) || prepare_acquired();
 | |
| }
 | |
| 
 | |
| bool fil_crypt_must_default_encrypt()
 | |
| {
 | |
|   /* prevents a race condition with fil_crypt_set_rotate_key_age() */
 | |
|   mysql_mutex_assert_owner(&fil_system.mutex);
 | |
|   return !srv_fil_crypt_rotate_key_age || !srv_encrypt_rotate;
 | |
| }
 | |
| 
 | |
| /** Return the next tablespace from default_encrypt_tables list.
 | |
| @param space   previous tablespace (nullptr to start from the start)
 | |
| @param recheck whether the removal condition needs to be rechecked after
 | |
| the encryption parameters were changed
 | |
| @param encrypt expected state of innodb_encrypt_tables
 | |
| @return the next tablespace to process (n_pending_ops incremented)
 | |
| @retval fil_system.temp_space if there is no work to do
 | |
| @retval nullptr upon reaching the end of the iteration */
 | |
| inline fil_space_t *fil_system_t::default_encrypt_next(fil_space_t *space,
 | |
|                                                        bool recheck,
 | |
|                                                        bool encrypt) noexcept
 | |
| {
 | |
|   mysql_mutex_assert_owner(&mutex);
 | |
| 
 | |
|   auto it= space && space->is_in_default_encrypt
 | |
|     ? sized_ilist<fil_space_t, default_encrypt_tag_t>::iterator(space)
 | |
|     : default_encrypt_tables.begin();
 | |
|   const auto end= default_encrypt_tables.end();
 | |
| 
 | |
|   if (space)
 | |
|   {
 | |
|     const bool released= !space->release();
 | |
| 
 | |
|     if (space->is_in_default_encrypt)
 | |
|     {
 | |
|       while (++it != end &&
 | |
|              (!UT_LIST_GET_LEN(it->chain) || it->is_stopping()));
 | |
| 
 | |
|       /* If one of the encryption threads already started
 | |
|       the encryption of the table then don't remove the
 | |
|       unencrypted spaces from default encrypt list.
 | |
| 
 | |
|       If there is a change in innodb_encrypt_tables variables
 | |
|       value then don't remove the last processed tablespace
 | |
|       from the default encrypt list. */
 | |
|       if (released && !recheck && fil_crypt_must_remove(*space))
 | |
|       {
 | |
|         ut_a(!default_encrypt_tables.empty());
 | |
|         default_encrypt_tables.remove(*space);
 | |
|         space->is_in_default_encrypt= false;
 | |
|       }
 | |
|     }
 | |
|   }
 | |
|   else while (it != end &&
 | |
| 	      (!UT_LIST_GET_LEN(it->chain) || it->is_stopping()))
 | |
|   {
 | |
|     /* Find the next suitable default encrypt table if
 | |
|     beginning of default_encrypt_tables list has been scheduled
 | |
|     to be deleted */
 | |
|     it++;
 | |
|   }
 | |
| 
 | |
|   if (it == end)
 | |
|     return temp_space;
 | |
| 
 | |
|   do
 | |
|   {
 | |
|     space= &*it;
 | |
|     if (space->acquire_if_not_stopped())
 | |
|       return space;
 | |
|     if (++it == end)
 | |
|       return nullptr;
 | |
|   }
 | |
|   while (!UT_LIST_GET_LEN(it->chain) || it->is_stopping());
 | |
| 
 | |
|   return nullptr;
 | |
| }
 | |
| 
 | |
| /** Determine the next tablespace for encryption key rotation.
 | |
| @param space    current tablespace (nullptr to start from the beginning)
 | |
| @param recheck  whether the removal condition needs to be rechecked after
 | |
| encryption parameters were changed
 | |
| @param encrypt  expected state of innodb_encrypt_tables
 | |
| @return the next tablespace
 | |
| @retval fil_system.temp_space if there is no work to do
 | |
| @retval end() upon reaching the end of the iteration */
 | |
| space_list_t::iterator fil_space_t::next(space_list_t::iterator space,
 | |
|                                          bool recheck, bool encrypt) noexcept
 | |
| {
 | |
|   mysql_mutex_lock(&fil_system.mutex);
 | |
| 
 | |
|   if (fil_crypt_must_default_encrypt())
 | |
|   {
 | |
|     fil_space_t *next_space=
 | |
|       fil_system.default_encrypt_next(space == fil_system.space_list.end()
 | |
| 				      ? nullptr : &*space, recheck, encrypt);
 | |
|     space= next_space
 | |
|       ? space_list_t::iterator(next_space)
 | |
|       : fil_system.space_list.end();
 | |
|   }
 | |
|   else
 | |
|   {
 | |
|     if (space == fil_system.space_list.end())
 | |
|       space= fil_system.space_list.begin();
 | |
|     else
 | |
|     {
 | |
|       /* Move on to the next fil_space_t */
 | |
|       space->release();
 | |
|       ++space;
 | |
|     }
 | |
| 
 | |
|     for (; space != fil_system.space_list.end(); ++space)
 | |
|     {
 | |
|       if (space->is_temporary() || space->is_being_imported())
 | |
|         continue;
 | |
|       const uint32_t n= space->acquire_low();
 | |
|       if (UNIV_LIKELY(!(n & (STOPPING | CLOSING))))
 | |
|         break;
 | |
|       if (!(n & STOPPING) && space->prepare_acquired())
 | |
|         break;
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   mysql_mutex_unlock(&fil_system.mutex);
 | |
|   return space;
 | |
| }
 | |
| 
 | |
| /** Search for a space needing rotation
 | |
| @param[in,out]	key_state	Key state
 | |
| @param[in,out]	state		Rotation state
 | |
| @param[in,out]	recheck		recheck of the tablespace is needed or
 | |
| 				still encryption thread does write page 0
 | |
| @return whether the thread should keep running */
 | |
| static bool fil_crypt_find_space_to_rotate(
 | |
| 	key_state_t*		key_state,
 | |
| 	rotate_thread_t*	state,
 | |
| 	bool*			recheck) noexcept
 | |
| {
 | |
| 	/* we need iops to start rotating */
 | |
| 	do {
 | |
| 		if (state->should_shutdown()) {
 | |
| 			if (state->space != fil_system.space_list.end()) {
 | |
| 				state->space->release();
 | |
| 				state->space = fil_system.space_list.end();
 | |
| 			}
 | |
| 			return false;
 | |
| 		}
 | |
| 	} while (!fil_crypt_alloc_iops(state));
 | |
| 
 | |
| 	if (state->first) {
 | |
| 		state->first = false;
 | |
| 		if (state->space != fil_system.space_list.end()) {
 | |
| 			state->space->release();
 | |
| 		}
 | |
| 		state->space = fil_system.space_list.end();
 | |
| 	}
 | |
| 
 | |
| 	state->space = fil_space_t::next(state->space, *recheck,
 | |
| 					 key_state->key_version != 0);
 | |
| 
 | |
| 	bool wake = true;
 | |
| 	while (state->space != fil_system.space_list.end()) {
 | |
| 		if (state->space
 | |
| 			== space_list_t::iterator(fil_system.temp_space)) {
 | |
| 			wake = false;
 | |
| 			goto done;
 | |
| 		}
 | |
| 
 | |
| 		if (state->should_shutdown()) {
 | |
| 			state->space->release();
 | |
| done:
 | |
| 			state->space = fil_system.space_list.end();
 | |
| 			break;
 | |
| 		}
 | |
| 
 | |
| 		mysql_mutex_unlock(&fil_crypt_threads_mutex);
 | |
| 		/* If there is no crypt data and we have not yet read
 | |
| 		page 0 for this tablespace, we need to read it before
 | |
| 		we can continue. */
 | |
| 		if (!state->space->crypt_data) {
 | |
| 			fil_crypt_read_crypt_data(&*state->space);
 | |
| 		}
 | |
| 
 | |
| 		if (fil_crypt_space_needs_rotation(state, key_state, recheck)) {
 | |
| 			ut_ad(key_state->key_id);
 | |
| 			/* init state->min_key_version_found before
 | |
| 			* starting on a space */
 | |
| 			state->min_key_version_found = key_state->key_version;
 | |
| 			mysql_mutex_lock(&fil_crypt_threads_mutex);
 | |
| 			return true;
 | |
| 		}
 | |
| 
 | |
| 		state->space = fil_space_t::next(state->space, *recheck,
 | |
| 						 key_state->key_version != 0);
 | |
| 		mysql_mutex_lock(&fil_crypt_threads_mutex);
 | |
| 	}
 | |
| 
 | |
| 	/* no work to do; release our allocation of I/O capacity */
 | |
| 	fil_crypt_return_iops(state, wake);
 | |
| 	return true;
 | |
| }
 | |
| 
 | |
| /***********************************************************************
 | |
| Start rotating a space
 | |
| @param[in]	key_state		Key state
 | |
| @param[in,out]	state			Rotation state */
 | |
| static
 | |
| void
 | |
| fil_crypt_start_rotate_space(
 | |
| 	const key_state_t*	key_state,
 | |
| 	rotate_thread_t*	state)
 | |
| {
 | |
| 	fil_space_crypt_t *crypt_data = state->space->crypt_data;
 | |
| 
 | |
| 	ut_ad(crypt_data);
 | |
| 	mysql_mutex_lock(&crypt_data->mutex);
 | |
| 	ut_ad(key_state->key_id == crypt_data->key_id);
 | |
| 
 | |
| 	if (crypt_data->rotate_state.active_threads == 0) {
 | |
| 		/* only first thread needs to init */
 | |
| 		crypt_data->rotate_state.next_offset = 1; // skip page 0
 | |
| 		/* no need to rotate beyond current max
 | |
| 		* if space extends, it will be encrypted with newer version */
 | |
| 		/* FIXME: max_offset could be removed and instead
 | |
| 		space->size consulted.*/
 | |
| 		crypt_data->rotate_state.max_offset = state->space->size;
 | |
| 		crypt_data->rotate_state.end_lsn = 0;
 | |
| 		crypt_data->rotate_state.min_key_version_found =
 | |
| 			key_state->key_version;
 | |
| 
 | |
| 		crypt_data->rotate_state.start_time = time(0);
 | |
| 
 | |
| 		if (crypt_data->type == CRYPT_SCHEME_UNENCRYPTED &&
 | |
| 			crypt_data->is_encrypted() &&
 | |
| 			key_state->key_version != 0) {
 | |
| 			/* this is rotation unencrypted => encrypted */
 | |
| 			crypt_data->type = CRYPT_SCHEME_1;
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	/* count active threads in space */
 | |
| 	crypt_data->rotate_state.active_threads++;
 | |
| 
 | |
| 	/* Initialize thread local state */
 | |
| 	state->end_lsn = crypt_data->rotate_state.end_lsn;
 | |
| 	state->min_key_version_found =
 | |
| 		crypt_data->rotate_state.min_key_version_found;
 | |
| 
 | |
| 	mysql_mutex_unlock(&crypt_data->mutex);
 | |
| }
 | |
| 
 | |
| /***********************************************************************
 | |
| Search for batch of pages needing rotation
 | |
| @param[in]	key_state		Key state
 | |
| @param[in,out]	state			Rotation state
 | |
| @return true if page needing key rotation found, false if not found */
 | |
| static
 | |
| bool
 | |
| fil_crypt_find_page_to_rotate(
 | |
| 	const key_state_t*	key_state,
 | |
| 	rotate_thread_t*	state)
 | |
| {
 | |
| 	ulint batch = srv_alloc_time * state->allocated_iops;
 | |
| 
 | |
| 	ut_ad(state->space == fil_system.space_list.end()
 | |
| 		|| state->space->referenced());
 | |
| 
 | |
| 	/* If space is marked to be dropped stop rotation. */
 | |
| 	if (state->space == fil_system.space_list.end()
 | |
| 		|| state->space->is_stopping()) {
 | |
| 		return false;
 | |
| 	}
 | |
| 
 | |
| 	fil_space_crypt_t *crypt_data = state->space->crypt_data;
 | |
| 
 | |
| 	mysql_mutex_lock(&crypt_data->mutex);
 | |
| 	ut_ad(key_state->key_id == crypt_data->key_id);
 | |
| 
 | |
| 	bool found = crypt_data->rotate_state.max_offset >=
 | |
| 		crypt_data->rotate_state.next_offset;
 | |
| 
 | |
| 	if (found) {
 | |
| 		state->offset = crypt_data->rotate_state.next_offset;
 | |
| 		ulint remaining = crypt_data->rotate_state.max_offset -
 | |
| 			crypt_data->rotate_state.next_offset;
 | |
| 
 | |
| 		if (batch <= remaining) {
 | |
| 			state->batch = batch;
 | |
| 		} else {
 | |
| 			state->batch = remaining;
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	crypt_data->rotate_state.next_offset += uint32_t(batch);
 | |
| 	mysql_mutex_unlock(&crypt_data->mutex);
 | |
| 	return found;
 | |
| }
 | |
| 
 | |
| /***********************************************************************
 | |
| Get a page and compute sleep time
 | |
| @param[in,out]		state		Rotation state
 | |
| @param[in]		offset		Page offset
 | |
| @param[in,out]		mtr		Minitransaction
 | |
| @param[out]		sleeptime_ms	Sleep time
 | |
| @return page or NULL*/
 | |
| static
 | |
| buf_block_t*
 | |
| fil_crypt_get_page_throttle(
 | |
| 	rotate_thread_t*	state,
 | |
| 	uint32_t		offset,
 | |
| 	mtr_t*			mtr,
 | |
| 	ulint*			sleeptime_ms)
 | |
| {
 | |
| 	fil_space_t* space = &*state->space;
 | |
| 	const ulint zip_size = space->zip_size();
 | |
| 	const page_id_t page_id(space->id, offset);
 | |
| 	ut_ad(space->referenced());
 | |
| 
 | |
| 	/* Before reading from tablespace we need to make sure that
 | |
| 	the tablespace is not about to be dropped. */
 | |
| 	if (space->is_stopping()) {
 | |
| 		return NULL;
 | |
| 	}
 | |
| 
 | |
| 	buf_block_t* block = buf_page_get_gen(page_id, zip_size, RW_X_LATCH,
 | |
| 					      NULL,
 | |
| 					      BUF_PEEK_IF_IN_POOL, mtr);
 | |
| 	if (block != NULL) {
 | |
| 		/* page was in buffer pool */
 | |
| 		btr_search_drop_page_hash_index(
 | |
| 			block, reinterpret_cast<dict_index_t*>(-1));
 | |
| 		state->crypt_stat.pages_read_from_cache++;
 | |
| 		return block;
 | |
| 	}
 | |
| 
 | |
| 	if (space->is_stopping()) {
 | |
| 		return NULL;
 | |
| 	}
 | |
| 
 | |
| 	if (offset % (zip_size ? zip_size : srv_page_size)
 | |
| 	    && DB_SUCCESS_LOCKED_REC
 | |
| 	    != fseg_page_is_allocated(space, offset)) {
 | |
| 		/* page is already freed */
 | |
| 		return NULL;
 | |
| 	}
 | |
| 
 | |
| 	state->crypt_stat.pages_read_from_disk++;
 | |
| 
 | |
| 	const ulonglong start = my_interval_timer();
 | |
| 	block = buf_page_get_gen(page_id, zip_size,
 | |
| 				 RW_X_LATCH,
 | |
| 				 NULL, BUF_GET_POSSIBLY_FREED, mtr);
 | |
| 	const ulonglong end = my_interval_timer();
 | |
| 
 | |
| 	state->cnt_waited++;
 | |
| 
 | |
| 	if (end > start) {
 | |
| 		state->sum_waited_us += (end - start) / 1000;
 | |
| 	}
 | |
| 
 | |
| 	/* average page load */
 | |
| 	ulint add_sleeptime_ms = 0;
 | |
| 	ulint avg_wait_time_us =ulint(state->sum_waited_us / state->cnt_waited);
 | |
| 	ulint alloc_wait_us = 1000000 / state->allocated_iops;
 | |
| 
 | |
| 	if (avg_wait_time_us < alloc_wait_us) {
 | |
| 		/* we reading faster than we allocated */
 | |
| 		add_sleeptime_ms = (alloc_wait_us - avg_wait_time_us) / 1000;
 | |
| 	} else {
 | |
| 		/* if page load time is longer than we want, skip sleeping */
 | |
| 	}
 | |
| 
 | |
| 	*sleeptime_ms += add_sleeptime_ms;
 | |
| 
 | |
| 	return block;
 | |
| }
 | |
| 
 | |
| /***********************************************************************
 | |
| Rotate one page
 | |
| @param[in,out]		key_state		Key state
 | |
| @param[in,out]		state			Rotation state */
 | |
| static
 | |
| void
 | |
| fil_crypt_rotate_page(
 | |
| 	const key_state_t*	key_state,
 | |
| 	rotate_thread_t*	state)
 | |
| {
 | |
| 	fil_space_t *space = &*state->space;
 | |
| 	ulint space_id = space->id;
 | |
| 	uint32_t offset = state->offset;
 | |
| 	ulint sleeptime_ms = 0;
 | |
| 	fil_space_crypt_t *crypt_data = space->crypt_data;
 | |
| 
 | |
| 	ut_ad(space->referenced());
 | |
| 	ut_ad(offset > 0);
 | |
| 
 | |
| 	/* In fil_crypt_thread where key rotation is done we have
 | |
| 	acquired space and checked that this space is not yet
 | |
| 	marked to be dropped. Similarly, in fil_crypt_find_page_to_rotate().
 | |
| 	Check here also to give DROP TABLE or similar a change. */
 | |
| 	if (space->is_stopping()) {
 | |
| 		return;
 | |
| 	}
 | |
| 
 | |
| 	if (space_id == TRX_SYS_SPACE && offset == TRX_SYS_PAGE_NO) {
 | |
| 		/* don't encrypt this as it contains address to dblwr buffer */
 | |
| 		return;
 | |
| 	}
 | |
| 
 | |
| 	mtr_t mtr;
 | |
| 	mtr.start();
 | |
| 	if (buf_block_t* block = fil_crypt_get_page_throttle(state,
 | |
| 							     offset, &mtr,
 | |
| 							     &sleeptime_ms)) {
 | |
| 		btr_search_drop_page_hash_index(
 | |
| 			block, reinterpret_cast<dict_index_t*>(-1));
 | |
| 		bool modified = false;
 | |
| 		byte* frame = buf_block_get_frame(block);
 | |
| 		const lsn_t block_lsn = mach_read_from_8(FIL_PAGE_LSN + frame);
 | |
| 		uint kv = buf_page_get_key_version(frame, space->flags);
 | |
| 
 | |
| 		if (block->page.oldest_modification() > 1) {
 | |
| 			/* Do not unnecessarily touch pages that are
 | |
| 			already dirty. */
 | |
| 		} else if (space->is_stopping()) {
 | |
| 			/* The tablespace is closing (in DROP TABLE or
 | |
| 			TRUNCATE TABLE or similar): avoid further access */
 | |
| 		} else if (!kv && !*reinterpret_cast<uint16_t*>
 | |
| 			   (&frame[FIL_PAGE_TYPE])) {
 | |
| 			/* It looks like this page is not
 | |
| 			allocated. Because key rotation is accessing
 | |
| 			pages in a pattern that is unlike the normal
 | |
| 			B-tree and undo log access pattern, we cannot
 | |
| 			invoke fseg_page_is_allocated() here, because that
 | |
| 			could result in a deadlock. If we invoked
 | |
| 			fseg_page_is_allocated() and released the
 | |
| 			tablespace latch before acquiring block->lock,
 | |
| 			then the fseg_page_is_allocated() information
 | |
| 			could be stale already. */
 | |
| 
 | |
| 			/* If the data file was originally created
 | |
| 			before MariaDB 10.0 or MySQL 5.6, some
 | |
| 			allocated data pages could carry 0 in
 | |
| 			FIL_PAGE_TYPE. The FIL_PAGE_TYPE on those
 | |
| 			pages will be updated in
 | |
| 			buf_flush_init_for_writing() when the page
 | |
| 			is modified the next time.
 | |
| 
 | |
| 			Also, when the doublewrite buffer pages are
 | |
| 			allocated on bootstrap in a non-debug build,
 | |
| 			some dummy pages will be allocated, with 0 in
 | |
| 			the FIL_PAGE_TYPE. Those pages should be
 | |
| 			skipped from key rotation forever. */
 | |
| 		} else if (fil_crypt_needs_rotation(
 | |
| 				crypt_data,
 | |
| 				kv,
 | |
| 				key_state->key_version,
 | |
| 				key_state->rotate_key_age)) {
 | |
| 
 | |
| 			mtr.set_named_space(space);
 | |
| 			modified = true;
 | |
| 
 | |
| 			/* force rotation by dummy updating page */
 | |
| 			mtr.write<1,mtr_t::FORCED>(*block,
 | |
| 						   &frame[FIL_PAGE_SPACE_ID],
 | |
| 						   frame[FIL_PAGE_SPACE_ID]);
 | |
| 
 | |
| 			/* statistics */
 | |
| 			state->crypt_stat.pages_modified++;
 | |
| 		} else {
 | |
| 			if (crypt_data->is_encrypted()) {
 | |
| 				if (kv < state->min_key_version_found) {
 | |
| 					state->min_key_version_found = kv;
 | |
| 				}
 | |
| 			}
 | |
| 		}
 | |
| 
 | |
| 		mtr.commit();
 | |
| 		lsn_t end_lsn = mtr.commit_lsn();
 | |
| 
 | |
| 
 | |
| 		if (modified) {
 | |
| 			/* if we modified page, we take lsn from mtr */
 | |
| 			ut_a(end_lsn > state->end_lsn);
 | |
| 			ut_a(end_lsn > block_lsn);
 | |
| 			state->end_lsn = end_lsn;
 | |
| 		} else {
 | |
| 			/* if we did not modify page, check for max lsn */
 | |
| 			if (block_lsn > state->end_lsn) {
 | |
| 				state->end_lsn = block_lsn;
 | |
| 			}
 | |
| 		}
 | |
| 	} else {
 | |
| 		/* If block read failed mtr memo and log should be empty. */
 | |
| 		ut_ad(!mtr.has_modifications());
 | |
| 		ut_ad(mtr.is_empty());
 | |
| 		mtr.commit();
 | |
| 	}
 | |
| 
 | |
| 	if (sleeptime_ms) {
 | |
| 		mysql_mutex_lock(&fil_crypt_threads_mutex);
 | |
| 		timespec abstime;
 | |
| 		set_timespec_nsec(abstime, 1000000ULL * sleeptime_ms);
 | |
| 		my_cond_timedwait(&fil_crypt_throttle_sleep_cond,
 | |
| 				  &fil_crypt_threads_mutex.m_mutex, &abstime);
 | |
| 		mysql_mutex_unlock(&fil_crypt_threads_mutex);
 | |
| 	}
 | |
| }
 | |
| 
 | |
| /***********************************************************************
 | |
| Rotate a batch of pages
 | |
| @param[in,out]		key_state		Key state
 | |
| @param[in,out]		state			Rotation state */
 | |
| static
 | |
| void
 | |
| fil_crypt_rotate_pages(
 | |
| 	const key_state_t*	key_state,
 | |
| 	rotate_thread_t*	state)
 | |
| {
 | |
| 	const uint32_t space_id = state->space->id;
 | |
| 	uint32_t end = std::min(state->offset + uint32_t(state->batch),
 | |
| 				state->space->free_limit);
 | |
| 
 | |
| 	ut_ad(state->space->referenced());
 | |
| 
 | |
| 	for (; state->offset < end; state->offset++) {
 | |
| 
 | |
| 		/* we can't rotate pages in dblwr buffer as
 | |
| 		* it's not possible to read those due to lots of asserts
 | |
| 		* in buffer pool.
 | |
| 		*
 | |
| 		* However since these are only (short-lived) copies of
 | |
| 		* real pages, they will be updated anyway when the
 | |
| 		* real page is updated
 | |
| 		*/
 | |
| 		if (buf_dblwr.is_inside(page_id_t(space_id, state->offset))) {
 | |
| 			continue;
 | |
| 		}
 | |
| 
 | |
| 		/* If space is marked as stopping, stop rotating
 | |
| 		pages. */
 | |
| 		if (state->space->is_stopping()) {
 | |
| 			break;
 | |
| 		}
 | |
| 
 | |
| 		fil_crypt_rotate_page(key_state, state);
 | |
| 	}
 | |
| }
 | |
| 
 | |
| /***********************************************************************
 | |
| Flush rotated pages and then update page 0
 | |
| 
 | |
| @param[in,out]		state	rotation state */
 | |
| static
 | |
| void
 | |
| fil_crypt_flush_space(
 | |
| 	rotate_thread_t*	state)
 | |
| {
 | |
| 	fil_space_t* space = &*state->space;
 | |
| 	fil_space_crypt_t *crypt_data = space->crypt_data;
 | |
| 
 | |
| 	ut_ad(space->referenced());
 | |
| 
 | |
| 	/* flush tablespace pages so that there are no pages left with old key */
 | |
| 	lsn_t end_lsn = crypt_data->rotate_state.end_lsn;
 | |
| 
 | |
| 	if (end_lsn > 0 && !space->is_stopping()) {
 | |
| 		ulint sum_pages = 0;
 | |
| 		const ulonglong start = my_interval_timer();
 | |
| 		while (buf_flush_list_space(space, &sum_pages));
 | |
| 		if (sum_pages) {
 | |
| 			const ulonglong end = my_interval_timer();
 | |
| 
 | |
| 			state->cnt_waited += sum_pages;
 | |
| 			state->sum_waited_us += (end - start) / 1000;
 | |
| 
 | |
| 			/* statistics */
 | |
| 			state->crypt_stat.pages_flushed += sum_pages;
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	if (crypt_data->min_key_version == 0) {
 | |
| 		crypt_data->type = CRYPT_SCHEME_UNENCRYPTED;
 | |
| 	}
 | |
| 
 | |
| 	if (space->is_stopping()) {
 | |
| 		return;
 | |
| 	}
 | |
| 
 | |
| 	/* update page 0 */
 | |
| 	mtr_t mtr;
 | |
| 	mtr.start();
 | |
| 
 | |
| 	if (buf_block_t* block = buf_page_get_gen(
 | |
| 		    page_id_t(space->id, 0), space->zip_size(),
 | |
| 		    RW_X_LATCH, NULL, BUF_GET_POSSIBLY_FREED, &mtr)) {
 | |
| 		mtr.set_named_space(space);
 | |
| 		crypt_data->write_page0(block, &mtr);
 | |
| 	}
 | |
| 
 | |
| 	mtr.commit();
 | |
| }
 | |
| 
 | |
| /***********************************************************************
 | |
| Complete rotating a space
 | |
| @param[in,out]		state			Rotation state */
 | |
| static void fil_crypt_complete_rotate_space(rotate_thread_t* state)
 | |
| {
 | |
| 	fil_space_crypt_t *crypt_data = state->space->crypt_data;
 | |
| 
 | |
| 	ut_ad(crypt_data);
 | |
| 	ut_ad(state->space->referenced());
 | |
| 
 | |
| 	mysql_mutex_lock(&crypt_data->mutex);
 | |
| 
 | |
| 	/* Space might already be dropped */
 | |
| 	if (!state->space->is_stopping()) {
 | |
| 		/**
 | |
| 		* Update crypt data state with state from thread
 | |
| 		*/
 | |
| 		if (state->min_key_version_found <
 | |
| 			crypt_data->rotate_state.min_key_version_found) {
 | |
| 			crypt_data->rotate_state.min_key_version_found =
 | |
| 				state->min_key_version_found;
 | |
| 		}
 | |
| 
 | |
| 		if (state->end_lsn > crypt_data->rotate_state.end_lsn) {
 | |
| 			crypt_data->rotate_state.end_lsn = state->end_lsn;
 | |
| 		}
 | |
| 
 | |
| 		ut_a(crypt_data->rotate_state.active_threads > 0);
 | |
| 		crypt_data->rotate_state.active_threads--;
 | |
| 		bool last = crypt_data->rotate_state.active_threads == 0;
 | |
| 
 | |
| 		/**
 | |
| 		* check if space is fully done
 | |
| 		* this as when threads shutdown, it could be that we "complete"
 | |
| 		* iterating before we have scanned the full space.
 | |
| 		*/
 | |
| 		bool done = crypt_data->rotate_state.next_offset >=
 | |
| 			crypt_data->rotate_state.max_offset;
 | |
| 
 | |
| 		/**
 | |
| 		* we should flush space if we're last thread AND
 | |
| 		* the iteration is done
 | |
| 		*/
 | |
| 		bool should_flush = last && done;
 | |
| 
 | |
| 		if (should_flush) {
 | |
| 			/* we're the last active thread */
 | |
| 			crypt_data->rotate_state.flushing = true;
 | |
| 			crypt_data->min_key_version =
 | |
| 				crypt_data->rotate_state.min_key_version_found;
 | |
| 			mysql_mutex_unlock(&crypt_data->mutex);
 | |
| 			fil_crypt_flush_space(state);
 | |
| 
 | |
| 			mysql_mutex_lock(&crypt_data->mutex);
 | |
| 			crypt_data->rotate_state.flushing = false;
 | |
| 		}
 | |
| 	} else {
 | |
| 		ut_a(crypt_data->rotate_state.active_threads > 0);
 | |
| 		crypt_data->rotate_state.active_threads--;
 | |
| 	}
 | |
| 
 | |
| 	mysql_mutex_unlock(&crypt_data->mutex);
 | |
| }
 | |
| 
 | |
| #ifdef UNIV_PFS_THREAD
 | |
| mysql_pfs_key_t page_encrypt_thread_key;
 | |
| #endif /* UNIV_PFS_THREAD */
 | |
| 
 | |
| /** A thread which monitors global key state and rotates tablespaces
 | |
| accordingly */
 | |
| static void fil_crypt_thread()
 | |
| {
 | |
| 	my_thread_init();
 | |
| #ifdef UNIV_PFS_THREAD
 | |
| 	pfs_register_thread(page_encrypt_thread_key);
 | |
| #endif /* UNIV_PFS_THREAD */
 | |
| 	mysql_mutex_lock(&fil_crypt_threads_mutex);
 | |
| 	rotate_thread_t thr(srv_n_fil_crypt_threads_started++);
 | |
| 	pthread_cond_signal(&fil_crypt_cond); /* signal that we started */
 | |
| 
 | |
| 	if (!thr.should_shutdown()) {
 | |
| 		/* if we find a tablespace that is starting, skip over it
 | |
| 		and recheck it later */
 | |
| 		bool recheck = false;
 | |
| 
 | |
| wait_for_work:
 | |
| 		if (!recheck && !thr.should_shutdown()) {
 | |
| 			/* wait for key state changes
 | |
| 			* i.e either new key version of change or
 | |
| 			* new rotate_key_age */
 | |
| 			my_cond_wait(&fil_crypt_threads_cond,
 | |
| 				     &fil_crypt_threads_mutex.m_mutex);
 | |
| 		}
 | |
| 
 | |
| 		recheck = false;
 | |
| 		thr.first = true;      // restart from first tablespace
 | |
| 
 | |
| 		key_state_t new_state;
 | |
| 
 | |
| 		/* iterate all spaces searching for those needing rotation */
 | |
| 		while (fil_crypt_find_space_to_rotate(&new_state, &thr,
 | |
| 						      &recheck)) {
 | |
| 			if (thr.space == fil_system.space_list.end()) {
 | |
| 				goto wait_for_work;
 | |
| 			}
 | |
| 
 | |
| 			/* we found a space to rotate */
 | |
| 			mysql_mutex_unlock(&fil_crypt_threads_mutex);
 | |
| 			fil_crypt_start_rotate_space(&new_state, &thr);
 | |
| 
 | |
| 			/* iterate all pages (cooperativly with other threads) */
 | |
| 			while (fil_crypt_find_page_to_rotate(&new_state, &thr)) {
 | |
| 
 | |
| 				/* If space is marked as stopping, release
 | |
| 				space and stop rotation. */
 | |
| 				if (thr.space->is_stopping()) {
 | |
| 					fil_crypt_complete_rotate_space(&thr);
 | |
| 					thr.space->release();
 | |
| 					thr.space = fil_system.space_list.end();
 | |
| 					break;
 | |
| 				}
 | |
| 
 | |
| 				fil_crypt_rotate_pages(&new_state, &thr);
 | |
| 				/* realloc iops */
 | |
| 				if (!fil_crypt_realloc_iops(&thr)) {
 | |
| 					break;
 | |
| 				}
 | |
| 			}
 | |
| 
 | |
| 			/* complete rotation */
 | |
| 			if (thr.space != fil_system.space_list.end()) {
 | |
| 				fil_crypt_complete_rotate_space(&thr);
 | |
| 			}
 | |
| 
 | |
| 			/* force key state refresh */
 | |
| 			new_state.key_id = 0;
 | |
| 
 | |
| 			mysql_mutex_lock(&fil_crypt_threads_mutex);
 | |
| 			/* release iops */
 | |
| 			fil_crypt_return_iops(&thr);
 | |
| 		}
 | |
| 
 | |
| 		if (thr.space != fil_system.space_list.end()) {
 | |
| 			thr.space->release();
 | |
| 			thr.space = fil_system.space_list.end();
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	fil_crypt_return_iops(&thr);
 | |
| 	srv_n_fil_crypt_threads_started--;
 | |
| 	pthread_cond_signal(&fil_crypt_cond); /* signal that we stopped */
 | |
| 	mysql_mutex_unlock(&fil_crypt_threads_mutex);
 | |
| 
 | |
| 	my_thread_end();
 | |
| #ifdef UNIV_PFS_THREAD
 | |
| 	pfs_delete_thread();
 | |
| #endif
 | |
| }
 | |
| 
 | |
| /*********************************************************************
 | |
| Adjust thread count for key rotation
 | |
| @param[in]	enw_cnt		Number of threads to be used */
 | |
| void fil_crypt_set_thread_cnt(const uint new_cnt)
 | |
| {
 | |
| 	if (!fil_crypt_threads_inited) {
 | |
| 		if (srv_shutdown_state != SRV_SHUTDOWN_NONE)
 | |
| 			return;
 | |
| 		fil_crypt_threads_init();
 | |
| 	}
 | |
| 
 | |
| 	mysql_mutex_lock(&fil_crypt_threads_mutex);
 | |
| 
 | |
| 	if (new_cnt > srv_n_fil_crypt_threads) {
 | |
| 		uint add = new_cnt - srv_n_fil_crypt_threads;
 | |
| 		srv_n_fil_crypt_threads = new_cnt;
 | |
| 		for (uint i = 0; i < add; i++) {
 | |
| 			std::thread thd(fil_crypt_thread);
 | |
| 			ib::info() << "Creating #"
 | |
| 				   << i+1 << " encryption thread id "
 | |
| 				   << thd.get_id()
 | |
| 				   << " total threads " << new_cnt << ".";
 | |
| 			thd.detach();
 | |
| 		}
 | |
| 	} else if (new_cnt < srv_n_fil_crypt_threads) {
 | |
| 		srv_n_fil_crypt_threads = new_cnt;
 | |
| 	}
 | |
| 
 | |
| 	pthread_cond_broadcast(&fil_crypt_threads_cond);
 | |
| 
 | |
| 	while (srv_n_fil_crypt_threads_started != srv_n_fil_crypt_threads) {
 | |
| 		my_cond_wait(&fil_crypt_cond,
 | |
| 			     &fil_crypt_threads_mutex.m_mutex);
 | |
| 	}
 | |
| 
 | |
| 	pthread_cond_broadcast(&fil_crypt_threads_cond);
 | |
| 	mysql_mutex_unlock(&fil_crypt_threads_mutex);
 | |
| }
 | |
| 
 | |
| /** Initialize the tablespace default_encrypt_tables
 | |
| if innodb_encryption_rotate_key_age=0. */
 | |
| static void fil_crypt_default_encrypt_tables_fill()
 | |
| {
 | |
| 	mysql_mutex_assert_owner(&fil_system.mutex);
 | |
| 
 | |
| 	for (fil_space_t& space : fil_system.space_list) {
 | |
| 		if (space.is_in_default_encrypt
 | |
| 		    || UT_LIST_GET_LEN(space.chain) == 0
 | |
| 		    || space.is_temporary() || space.is_being_imported()
 | |
| 		    || !space.acquire_if_not_stopped()) {
 | |
| 			continue;
 | |
| 		}
 | |
| 
 | |
| 		/* Ensure that crypt_data has been initialized. */
 | |
| 		ut_ad(space.size);
 | |
| 
 | |
| 		/* Skip ENCRYPTION!=DEFAULT tablespaces. */
 | |
| 		if (space.crypt_data
 | |
| 		    && !space.crypt_data->is_default_encryption()) {
 | |
| 			goto next;
 | |
| 		}
 | |
| 
 | |
| 		if (srv_encrypt_tables) {
 | |
| 			/* Skip encrypted tablespaces if
 | |
| 			innodb_encrypt_tables!=OFF */
 | |
| 			if (space.crypt_data
 | |
| 			    && space.crypt_data->min_key_version) {
 | |
| 				goto next;
 | |
| 			}
 | |
| 		} else {
 | |
| 			/* Skip unencrypted tablespaces if
 | |
| 			innodb_encrypt_tables=OFF */
 | |
| 			if (!space.crypt_data
 | |
| 			    || !space.crypt_data->min_key_version) {
 | |
| 				goto next;
 | |
| 			}
 | |
| 		}
 | |
| 
 | |
| 		fil_system.default_encrypt_tables.push_back(space);
 | |
| 		space.is_in_default_encrypt = true;
 | |
| next:
 | |
| 		space.release();
 | |
| 	}
 | |
| }
 | |
| 
 | |
| /*********************************************************************
 | |
| Adjust max key age
 | |
| @param[in]	val		New max key age */
 | |
| void fil_crypt_set_rotate_key_age(uint val)
 | |
| {
 | |
|   mysql_mutex_lock(&fil_crypt_threads_mutex);
 | |
|   mysql_mutex_lock(&fil_system.mutex);
 | |
|   srv_fil_crypt_rotate_key_age= val;
 | |
|   if (val == 0)
 | |
|     fil_crypt_default_encrypt_tables_fill();
 | |
|   mysql_mutex_unlock(&fil_system.mutex);
 | |
|   pthread_cond_broadcast(&fil_crypt_threads_cond);
 | |
|   mysql_mutex_unlock(&fil_crypt_threads_mutex);
 | |
| }
 | |
| 
 | |
| /*********************************************************************
 | |
| Adjust rotation iops
 | |
| @param[in]	val		New max roation iops */
 | |
| void fil_crypt_set_rotation_iops(uint val)
 | |
| {
 | |
|   mysql_mutex_lock(&fil_crypt_threads_mutex);
 | |
|   srv_n_fil_crypt_iops= val;
 | |
|   pthread_cond_broadcast(&fil_crypt_threads_cond);
 | |
|   mysql_mutex_unlock(&fil_crypt_threads_mutex);
 | |
| }
 | |
| 
 | |
| /** Add the import tablespace to default_encrypt list
 | |
| if necessary and signal fil_crypt_threads
 | |
| @param space imported tablespace */
 | |
| void fil_crypt_add_imported_space(fil_space_t *space)
 | |
| {
 | |
|   mysql_mutex_lock(&fil_crypt_threads_mutex);
 | |
| 
 | |
|   mysql_mutex_lock(&fil_system.mutex);
 | |
| 
 | |
|   if (fil_crypt_must_default_encrypt())
 | |
|   {
 | |
|     fil_system.default_encrypt_tables.push_back(*space);
 | |
|     space->is_in_default_encrypt= true;
 | |
|   }
 | |
| 
 | |
|   mysql_mutex_unlock(&fil_system.mutex);
 | |
| 
 | |
|   pthread_cond_broadcast(&fil_crypt_threads_cond);
 | |
|   mysql_mutex_unlock(&fil_crypt_threads_mutex);
 | |
| }
 | |
| 
 | |
| /*********************************************************************
 | |
| Adjust encrypt tables
 | |
| @param[in]	val		New setting for innodb-encrypt-tables */
 | |
| void fil_crypt_set_encrypt_tables(ulong val)
 | |
| {
 | |
|   if (!fil_crypt_threads_inited)
 | |
|     return;
 | |
| 
 | |
|   mysql_mutex_lock(&fil_crypt_threads_mutex);
 | |
| 
 | |
|   mysql_mutex_lock(&fil_system.mutex);
 | |
|   srv_encrypt_tables= val;
 | |
| 
 | |
|   if (fil_crypt_must_default_encrypt())
 | |
|     fil_crypt_default_encrypt_tables_fill();
 | |
| 
 | |
|   mysql_mutex_unlock(&fil_system.mutex);
 | |
| 
 | |
|   pthread_cond_broadcast(&fil_crypt_threads_cond);
 | |
|   mysql_mutex_unlock(&fil_crypt_threads_mutex);
 | |
| }
 | |
| 
 | |
| /*********************************************************************
 | |
| Init threads for key rotation */
 | |
| void fil_crypt_threads_init()
 | |
| {
 | |
| 	if (!fil_crypt_threads_inited) {
 | |
| 		pthread_cond_init(&fil_crypt_cond, nullptr);
 | |
| 		pthread_cond_init(&fil_crypt_threads_cond, nullptr);
 | |
| 		mysql_mutex_init(0, &fil_crypt_threads_mutex, nullptr);
 | |
| 		uint cnt = srv_n_fil_crypt_threads;
 | |
| 		srv_n_fil_crypt_threads = 0;
 | |
| 		fil_crypt_threads_inited = true;
 | |
| 		fil_crypt_set_thread_cnt(cnt);
 | |
| 	}
 | |
| }
 | |
| 
 | |
| /*********************************************************************
 | |
| Clean up key rotation threads resources */
 | |
| void fil_crypt_threads_cleanup()
 | |
| {
 | |
| 	if (!fil_crypt_threads_inited) {
 | |
| 		return;
 | |
| 	}
 | |
| 	ut_a(!srv_n_fil_crypt_threads_started);
 | |
| 	pthread_cond_destroy(&fil_crypt_cond);
 | |
| 	pthread_cond_destroy(&fil_crypt_threads_cond);
 | |
| 	mysql_mutex_destroy(&fil_crypt_threads_mutex);
 | |
| 	fil_crypt_threads_inited = false;
 | |
| }
 | |
| 
 | |
| /*********************************************************************
 | |
| Wait for crypt threads to stop accessing space
 | |
| @param[in]	space		Tablespace */
 | |
| void fil_space_crypt_close_tablespace(const fil_space_t *space)
 | |
| {
 | |
| 	fil_space_crypt_t* crypt_data = space->crypt_data;
 | |
| 
 | |
| 	if (!crypt_data || srv_n_fil_crypt_threads == 0
 | |
| 	    || !fil_crypt_threads_inited) {
 | |
| 		return;
 | |
| 	}
 | |
| 
 | |
| 	time_t start = time(0);
 | |
| 	time_t last = start;
 | |
| 
 | |
| 	mysql_mutex_lock(&crypt_data->mutex);
 | |
| 
 | |
| 	while (crypt_data->rotate_state.active_threads
 | |
| 	       || crypt_data->rotate_state.flushing) {
 | |
| 		mysql_mutex_unlock(&crypt_data->mutex);
 | |
| 
 | |
| 		/* wakeup throttle (all) sleepers */
 | |
| 		mysql_mutex_lock(&fil_crypt_threads_mutex);
 | |
| 		pthread_cond_broadcast(&fil_crypt_throttle_sleep_cond);
 | |
| 		pthread_cond_broadcast(&fil_crypt_threads_cond);
 | |
| 		mysql_mutex_unlock(&fil_crypt_threads_mutex);
 | |
| 
 | |
| 		std::this_thread::sleep_for(std::chrono::milliseconds(20));
 | |
| 
 | |
| 		time_t now = time(0);
 | |
| 
 | |
| 		if (UNIV_UNLIKELY(now >= last + 30)) {
 | |
| 			ib::warn() << "Waited "
 | |
| 				   << now - start
 | |
| 				   << " seconds to drop space: "
 | |
| 				   << space->chain.start->name << " ("
 | |
| 				   << space->id << ") active threads "
 | |
| 				   << crypt_data->rotate_state.active_threads
 | |
| 				   << " flushing="
 | |
| 				   << crypt_data->rotate_state.flushing << ".";
 | |
| 			last = now;
 | |
| 		}
 | |
| 
 | |
| 		mysql_mutex_lock(&crypt_data->mutex);
 | |
| 	}
 | |
| 
 | |
| 	mysql_mutex_unlock(&crypt_data->mutex);
 | |
| }
 | |
| 
 | |
| /*********************************************************************
 | |
| Get crypt status for a space (used by information_schema)
 | |
| @param[in]	space		Tablespace
 | |
| @param[out]	status		Crypt status */
 | |
| void
 | |
| fil_space_crypt_get_status(
 | |
| 	const fil_space_t*			space,
 | |
| 	struct fil_space_crypt_status_t*	status)
 | |
| {
 | |
| 	memset(status, 0, sizeof(*status));
 | |
| 
 | |
| 	ut_ad(space->referenced());
 | |
| 
 | |
| 	/* If there is no crypt data and we have not yet read
 | |
| 	page 0 for this tablespace, we need to read it before
 | |
| 	we can continue. */
 | |
| 	if (!space->crypt_data) {
 | |
| 		fil_crypt_read_crypt_data(const_cast<fil_space_t*>(space));
 | |
| 	}
 | |
| 
 | |
| 	status->space = ULINT_UNDEFINED;
 | |
| 
 | |
| 	if (fil_space_crypt_t* crypt_data = space->crypt_data) {
 | |
| 		status->space = space->id;
 | |
| 		mysql_mutex_lock(&crypt_data->mutex);
 | |
| 		status->scheme = crypt_data->type;
 | |
| 		status->keyserver_requests = crypt_data->keyserver_requests;
 | |
| 		status->min_key_version = crypt_data->min_key_version;
 | |
| 		status->key_id = crypt_data->key_id;
 | |
| 
 | |
| 		if (crypt_data->rotate_state.active_threads > 0 ||
 | |
| 		    crypt_data->rotate_state.flushing) {
 | |
| 			status->rotating = true;
 | |
| 			status->flushing =
 | |
| 				crypt_data->rotate_state.flushing;
 | |
| 			status->rotate_next_page_number =
 | |
| 				crypt_data->rotate_state.next_offset;
 | |
| 			status->rotate_max_page_number =
 | |
| 				crypt_data->rotate_state.max_offset;
 | |
| 		}
 | |
| 
 | |
| 		mysql_mutex_unlock(&crypt_data->mutex);
 | |
| 
 | |
| 		if (srv_encrypt_tables || crypt_data->min_key_version) {
 | |
| 			status->current_key_version =
 | |
| 				fil_crypt_get_latest_key_version(crypt_data);
 | |
| 		}
 | |
| 	}
 | |
| }
 | |
| 
 | |
| /*********************************************************************
 | |
| Return crypt statistics
 | |
| @param[out]	stat		Crypt statistics */
 | |
| void fil_crypt_total_stat(fil_crypt_stat_t *stat)
 | |
| {
 | |
| 	mysql_mutex_lock(&crypt_stat_mutex);
 | |
| 	*stat = crypt_stat;
 | |
| 	mysql_mutex_unlock(&crypt_stat_mutex);
 | |
| }
 | |
| 
 | |
| #endif /* UNIV_INNOCHECKSUM */
 | |
| 
 | |
| /**
 | |
| Verify that post encryption checksum match calculated checksum.
 | |
| This function should be called only if tablespace contains crypt_data
 | |
| metadata (this is strong indication that tablespace is encrypted).
 | |
| Function also verifies that traditional checksum does not match
 | |
| calculated checksum as if it does page could be valid unencrypted,
 | |
| encrypted, or corrupted.
 | |
| 
 | |
| @param[in,out]	page		page frame (checksum is temporarily modified)
 | |
| @param[in]	zip_size	ROW_FORMAT=COMPRESSED page size, or 0
 | |
| @return true if page is encrypted AND OK, false otherwise */
 | |
| bool fil_space_verify_crypt_checksum(const byte* page, ulint zip_size)
 | |
| {
 | |
| 	if (ENCRYPTION_KEY_NOT_ENCRYPTED == mach_read_from_4(
 | |
| 			page + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION)) {
 | |
| 		return false;
 | |
| 	}
 | |
| 
 | |
| 	/* Compressed and encrypted pages do not have checksum. Assume not
 | |
| 	corrupted. Page verification happens after decompression in
 | |
| 	buf_page_t::read_complete() using buf_page_is_corrupted(). */
 | |
| 	if (fil_page_get_type(page) == FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED) {
 | |
| 		return true;
 | |
| 	}
 | |
| 
 | |
| 	/* Read stored post encryption checksum. */
 | |
| 	const ib_uint32_t checksum = mach_read_from_4(
 | |
| 		page + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION + 4);
 | |
| 
 | |
| 	/* If stored checksum matches one of the calculated checksums
 | |
| 	page is not corrupted. */
 | |
| 
 | |
| #ifndef UNIV_INNOCHECKSUM
 | |
| 	switch (srv_checksum_algorithm) {
 | |
| 	case SRV_CHECKSUM_ALGORITHM_STRICT_FULL_CRC32:
 | |
| 	case SRV_CHECKSUM_ALGORITHM_STRICT_CRC32:
 | |
| #endif /* !UNIV_INNOCHECKSUM */
 | |
| 		if (zip_size) {
 | |
| 			return checksum == page_zip_calc_checksum(
 | |
| 				page, zip_size, false);
 | |
| 		}
 | |
| 
 | |
| 		return checksum == buf_calc_page_crc32(page);
 | |
| #ifndef UNIV_INNOCHECKSUM
 | |
| 	default:
 | |
| 		if (checksum == BUF_NO_CHECKSUM_MAGIC) {
 | |
| 			return true;
 | |
| 		}
 | |
| 		if (zip_size) {
 | |
| 			return checksum == page_zip_calc_checksum(
 | |
| 				page, zip_size, false)
 | |
| 				|| checksum == page_zip_calc_checksum(
 | |
| 					page, zip_size, true);
 | |
| 		}
 | |
| 
 | |
| 		return checksum == buf_calc_page_crc32(page)
 | |
| 			|| checksum == buf_calc_page_new_checksum(page);
 | |
| 	}
 | |
| #endif /* !UNIV_INNOCHECKSUM */
 | |
| }
 | 
