mariadb/storage/innobase/clone/clone0snapshot.cc
mariadb-DebarunBanerjee 2ab1b0fd7f Clone - Key flow part-I
2025-02-19 15:12:52 +05:30

1574 lines
46 KiB
C++

/*****************************************************************************
Copyright (c) 2017, 2024, Oracle and/or its affiliates.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License, version 2.0, as published by the
Free Software Foundation.
This program is designed to work with certain software (including
but not limited to OpenSSL) that is licensed under separate terms,
as designated in a particular file or component or in included license
documentation. The authors of MySQL hereby grant you an additional
permission to link the program and your derivative works with the
separately licensed software that they have either included with
the program or referenced in the documentation.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License, version 2.0,
for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*****************************************************************************/
/** @file clone/clone0snapshot.cc
Innodb physical Snapshot
*******************************************************/
#include "buf0flu.h"
#include "clone0snapshot.h"
#include "clone0clone.h"
#include "fil0pagecompress.h"
#include "log0log.h" /* log_get_lsn */
#include "page0zip.h"
#include "handler.h"
/** Snapshot heap initial size */
const uint SNAPSHOT_MEM_INITIAL_SIZE = 16 * 1024;
/** Number of clones that can attach to a snapshot. */
const uint MAX_CLONES_PER_SNAPSHOT = 1;
Clone_Snapshot::Clone_Snapshot(Clone_Handle_Type hdl_type,
Ha_clone_type clone_type, uint arr_idx,
uint64_t snap_id)
: m_snapshot_handle_type(hdl_type),
m_snapshot_type(clone_type),
m_snapshot_id(snap_id),
m_snapshot_arr_idx(arr_idx),
m_num_blockers(),
m_aborted(false),
m_num_clones(),
m_num_clones_transit(),
m_snapshot_state(CLONE_SNAPSHOT_INIT),
m_snapshot_next_state(CLONE_SNAPSHOT_NONE),
m_num_current_chunks(),
m_max_file_name_len(),
m_num_data_chunks(),
m_data_bytes_disk(),
m_page_ctx(false),
m_num_pages(),
m_num_duplicate_pages(),
m_redo_ctx(),
m_redo_start_offset(),
m_redo_header(),
m_redo_header_size(),
m_redo_trailer(),
m_redo_trailer_size(),
m_redo_trailer_offset(),
m_redo_file_size(),
m_num_redo_chunks(),
m_enable_pfs(false) {
mysql_mutex_init(0, &m_snapshot_mutex, nullptr);
m_snapshot_heap= mem_heap_create(SNAPSHOT_MEM_INITIAL_SIZE);
m_chunk_size_pow2 = SNAPSHOT_DEF_CHUNK_SIZE_POW2;
m_block_size_pow2 = SNAPSHOT_DEF_BLOCK_SIZE_POW2;
}
Clone_Snapshot::~Clone_Snapshot() {
m_redo_ctx.release();
if (m_page_ctx.is_active()) {
m_page_ctx.stop(nullptr);
}
m_page_ctx.release();
mem_heap_free(m_snapshot_heap);
mysql_mutex_destroy(&m_snapshot_mutex);
}
void Clone_Snapshot::get_state_info(bool do_estimate,
Clone_Desc_State *state_desc) {
state_desc->m_state = m_snapshot_state;
state_desc->m_num_chunks = m_num_current_chunks;
state_desc->m_is_start = true;
state_desc->m_is_ack = false;
if (do_estimate) {
state_desc->m_estimate = m_monitor.get_estimate();
state_desc->m_estimate_disk = m_data_bytes_disk;
} else {
state_desc->m_estimate = 0;
state_desc->m_estimate_disk = 0;
}
switch (m_snapshot_state) {
case CLONE_SNAPSHOT_FILE_COPY:
state_desc->m_num_files = static_cast<uint>(num_data_files());
break;
case CLONE_SNAPSHOT_PAGE_COPY:
state_desc->m_num_files = m_num_pages;
break;
case CLONE_SNAPSHOT_REDO_COPY:
state_desc->m_num_files = static_cast<uint>(num_redo_files());
break;
case CLONE_SNAPSHOT_DONE:
case CLONE_SNAPSHOT_INIT:
state_desc->m_num_files = 0;
break;
default:
state_desc->m_num_files = 0;
ut_d(ut_error);
}
}
void Clone_Snapshot::set_state_info(Clone_Desc_State *state_desc) {
mysql_mutex_assert_owner(&m_snapshot_mutex);
m_snapshot_state = state_desc->m_state;
m_num_current_chunks = state_desc->m_num_chunks;
if (m_snapshot_state == CLONE_SNAPSHOT_FILE_COPY) {
m_num_data_chunks = state_desc->m_num_chunks;
m_data_bytes_disk = state_desc->m_estimate_disk;
m_data_file_vector.resize(state_desc->m_num_files, nullptr);
m_monitor.init_state(srv_stage_clone_file_copy.m_key, m_enable_pfs);
m_monitor.add_estimate(state_desc->m_estimate);
m_monitor.change_phase();
} else if (m_snapshot_state == CLONE_SNAPSHOT_PAGE_COPY) {
m_num_pages = state_desc->m_num_files;
m_monitor.init_state(srv_stage_clone_page_copy.m_key, m_enable_pfs);
m_monitor.add_estimate(state_desc->m_estimate);
m_monitor.change_phase();
} else if (m_snapshot_state == CLONE_SNAPSHOT_REDO_COPY) {
m_num_redo_chunks = state_desc->m_num_chunks;
m_redo_file_vector.resize(state_desc->m_num_files, nullptr);
m_monitor.init_state(srv_stage_clone_redo_copy.m_key, m_enable_pfs);
m_monitor.add_estimate(state_desc->m_estimate);
m_monitor.change_phase();
} else if (m_snapshot_state == CLONE_SNAPSHOT_DONE) {
ut_ad(m_num_current_chunks == 0);
m_monitor.init_state(PSI_NOT_INSTRUMENTED, m_enable_pfs);
} else {
ut_d(ut_error);
}
}
Snapshot_State Clone_Snapshot::get_next_state() {
Snapshot_State next_state;
ut_ad(m_snapshot_state != CLONE_SNAPSHOT_NONE);
if (m_snapshot_state == CLONE_SNAPSHOT_INIT) {
next_state = CLONE_SNAPSHOT_FILE_COPY;
} else if (m_snapshot_state == CLONE_SNAPSHOT_FILE_COPY) {
if (m_snapshot_type == HA_CLONE_HYBRID ||
m_snapshot_type == HA_CLONE_PAGE) {
next_state = CLONE_SNAPSHOT_PAGE_COPY;
} else if (m_snapshot_type == HA_CLONE_REDO) {
next_state = CLONE_SNAPSHOT_REDO_COPY;
} else {
ut_ad(m_snapshot_type == HA_CLONE_BLOCKING);
next_state = CLONE_SNAPSHOT_DONE;
}
} else if (m_snapshot_state == CLONE_SNAPSHOT_PAGE_COPY) {
next_state = CLONE_SNAPSHOT_REDO_COPY;
} else {
ut_ad(m_snapshot_state == CLONE_SNAPSHOT_REDO_COPY);
next_state = CLONE_SNAPSHOT_DONE;
}
return (next_state);
}
bool Clone_Snapshot::attach(Clone_Handle_Type hdl_type, bool pfs_monitor) {
bool ret = false;
mysql_mutex_lock(&m_snapshot_mutex);
if (hdl_type == m_snapshot_handle_type &&
m_num_clones < MAX_CLONES_PER_SNAPSHOT) {
++m_num_clones;
m_enable_pfs = pfs_monitor;
ut_ad(!in_transit_state());
ret = true;
}
mysql_mutex_unlock(&m_snapshot_mutex);
return ret;
}
void Clone_Snapshot::detach() {
mysql_mutex_lock(&m_snapshot_mutex);
ut_ad(m_num_clones > 0);
ut_ad(!in_transit_state());
--m_num_clones;
ut_ad(m_num_clones == 0);
mysql_mutex_unlock(&m_snapshot_mutex);
}
bool Clone_Snapshot::is_aborted() const {
mysql_mutex_assert_owner(&m_snapshot_mutex);
return m_aborted;
}
void Clone_Snapshot::set_abort() {
Mysql_mutex_guard guard(&m_snapshot_mutex);
m_aborted = true;
ib::info() << "Clone Snapshot aborted";
}
Clone_Snapshot::State_transit::State_transit(Clone_Snapshot *snapshot,
Snapshot_State new_state)
: m_snapshot(snapshot) {
mysql_mutex_lock(&m_snapshot->m_snapshot_mutex);
ut_ad(!m_snapshot->in_transit_wait());
ut_ad(!m_snapshot->in_transit_state());
m_snapshot->begin_transit_ddl_wait();
ut_ad(m_snapshot->in_transit_wait());
/* Wait for DDLs blocking clone state transition. */
m_error = m_snapshot->wait(Wait_type::STATE_BLOCKER, nullptr, false, true);
if (m_error != 0) {
return; /* purecov: inspected */
}
m_snapshot->begin_transit(new_state);
ut_ad(m_snapshot->in_transit_state());
}
Clone_Snapshot::State_transit::~State_transit() {
if (m_error == 0) {
m_snapshot->end_transit();
}
ut_ad(!m_snapshot->in_transit_state());
ut_ad(!m_snapshot->in_transit_wait());
mysql_mutex_unlock(&m_snapshot->m_snapshot_mutex);
}
Clone_File_Meta *Clone_Snapshot::get_file_by_index(uint index) {
auto file_ctx = get_file_ctx_by_index(index);
if (file_ctx == nullptr) {
return nullptr;
}
return file_ctx->get_file_meta();
}
Clone_file_ctx *Clone_Snapshot::get_file_ctx_by_index(uint index) {
Clone_file_ctx *file_ctx = nullptr;
if (m_snapshot_state == CLONE_SNAPSHOT_FILE_COPY ||
m_snapshot_state == CLONE_SNAPSHOT_PAGE_COPY) {
auto num_data_files = m_data_file_vector.size();
if (index < num_data_files) {
file_ctx = m_data_file_vector[index];
}
} else if (m_snapshot_state == CLONE_SNAPSHOT_REDO_COPY) {
auto num_redo_files = m_redo_file_vector.size();
if (index < num_redo_files) {
file_ctx = m_redo_file_vector[index];
}
}
return (file_ctx);
}
int Clone_Snapshot::iterate_files(File_Cbk_Func &&func) {
int err = 0;
switch (m_snapshot_state) {
case CLONE_SNAPSHOT_FILE_COPY:
err = iterate_data_files(std::forward<File_Cbk_Func>(func));
break;
case CLONE_SNAPSHOT_REDO_COPY:
err = iterate_redo_files(std::forward<File_Cbk_Func>(func));
break;
default:
err = 0;
}
return err;
}
int Clone_Snapshot::iterate_data_files(File_Cbk_Func &&func) {
Mysql_mutex_guard guard(&m_snapshot_mutex);
for (auto file_ctx : m_data_file_vector) {
auto err = func(file_ctx);
if (err != 0) {
return err; /* purecov: inspected */
}
}
return 0;
}
int Clone_Snapshot::iterate_redo_files(File_Cbk_Func &&func) {
for (auto file_ctx : m_redo_file_vector) {
auto err = func(file_ctx);
if (err != 0) {
return err; /* purecov: inspected */
}
}
return 0;
}
int Clone_Snapshot::get_next_block(uint chunk_num, uint &block_num,
const Clone_file_ctx *&file_ctx,
uint64_t &data_offset, byte *&data_buf,
uint32_t &data_size, uint64_t &file_size) {
uint64_t start_offset = 0;
const auto file_meta = file_ctx->get_file_meta_read();
file_size = 0;
if (m_snapshot_state == CLONE_SNAPSHOT_PAGE_COPY) {
/* Copy the page from buffer pool. */
auto err = get_next_page(chunk_num, block_num, file_ctx, data_offset,
data_buf, data_size, file_size);
return (err);
} else if (m_snapshot_state == CLONE_SNAPSHOT_REDO_COPY) {
/* For redo copy header and trailer are returned in buffer. */
if (chunk_num == (m_num_current_chunks - 1)) {
/* Last but one chunk is the redo header. */
if (block_num != 0) {
block_num = 0;
return (0);
}
++block_num;
data_offset = 0;
data_buf = m_redo_header;
ut_ad(data_buf != nullptr);
data_size = m_redo_header_size;
return (0);
} else if (chunk_num == m_num_current_chunks) {
/* Last chunk is the redo trailer. */
if (block_num != 0 || m_redo_trailer_size == 0) {
block_num = 0;
return (0);
}
++block_num;
data_offset = m_redo_trailer_offset;
data_buf = m_redo_trailer;
ut_ad(data_buf != nullptr);
data_size = m_redo_trailer_size;
return (0);
}
/* This is not header or trailer chunk. Need to get redo
data from archived file. */
if (file_meta->m_begin_chunk == 1) {
/* Set start offset for the first file. */
start_offset = m_redo_start_offset;
}
/* Dummy redo file entry. Need to send metadata. */
if (file_meta->m_file_size == 0) {
if (block_num != 0) {
block_num = 0;
return (0);
}
++block_num;
data_buf = nullptr;
data_size = 0;
data_offset = 0;
return (0);
}
}
/* We have identified the file to transfer data at this point.
Get the data offset for next block to transfer. */
uint num_blocks;
data_buf = nullptr;
uint64_t file_chnuk_num = chunk_num - file_meta->m_begin_chunk;
/* Offset in pages for current chunk. */
uint64_t chunk_offset = file_chnuk_num << m_chunk_size_pow2;
/* Find number of blocks in current chunk. */
if (chunk_num == file_meta->m_end_chunk) {
/* If it is last chunk, we need to adjust the size. */
uint64_t size_in_pages;
uint aligned_sz;
ut_ad(file_meta->m_file_size >= start_offset);
size_in_pages = ut_uint64_align_up(file_meta->m_file_size - start_offset,
UNIV_PAGE_SIZE);
size_in_pages /= UNIV_PAGE_SIZE;
ut_ad(size_in_pages >= chunk_offset);
size_in_pages -= chunk_offset;
aligned_sz = static_cast<uint>(size_in_pages);
ut_ad(aligned_sz == size_in_pages);
aligned_sz = ut_calc_align(aligned_sz, block_size());
num_blocks = aligned_sz >> m_block_size_pow2;
} else {
num_blocks = blocks_per_chunk();
}
/* Current block is the last one. No more blocks in current chunk. */
if (block_num == num_blocks) {
block_num = 0;
return (0);
}
ut_ad(block_num < num_blocks);
/* Calculate the offset of next block. */
uint64_t block_offset;
block_offset = static_cast<uint64_t>(block_num);
block_offset *= block_size();
data_offset = chunk_offset + block_offset;
data_size = block_size();
++block_num;
/* Convert offset and length in bytes. */
data_size *= static_cast<uint32_t>(UNIV_PAGE_SIZE);
data_offset *= UNIV_PAGE_SIZE;
data_offset += start_offset;
ut_ad(data_offset < file_meta->m_file_size);
/* Adjust length for last block in last chunk. */
if (chunk_num == file_meta->m_end_chunk && block_num == num_blocks) {
ut_ad((data_offset + data_size) >= file_meta->m_file_size);
data_size = static_cast<uint>(file_meta->m_file_size - data_offset);
}
#ifdef UNIV_DEBUG
if (m_snapshot_state == CLONE_SNAPSHOT_REDO_COPY) {
/* Current file is the last redo file */
auto redo_file_ctx = m_redo_file_vector.back();
if (file_meta == redo_file_ctx->get_file_meta() &&
m_redo_trailer_size != 0) {
/* Should not exceed/overwrite the trailer */
ut_ad(data_offset + data_size <= m_redo_trailer_offset);
}
}
#endif /* UNIV_DEBUG */
return (0);
}
void Clone_Snapshot::update_block_size(uint buff_size) {
mysql_mutex_lock(&m_snapshot_mutex);
/* Transfer data block is used only for direct IO. */
if (m_snapshot_state != CLONE_SNAPSHOT_INIT || fil_system.is_buffered()) {
mysql_mutex_unlock(&m_snapshot_mutex);
return;
}
/* Try to set block size bigger than the transfer buffer. */
while (buff_size > (block_size() * UNIV_PAGE_SIZE) &&
m_block_size_pow2 < SNAPSHOT_MAX_BLOCK_SIZE_POW2) {
++m_block_size_pow2;
}
mysql_mutex_unlock(&m_snapshot_mutex);
}
uint32_t Clone_Snapshot::get_blocks_per_chunk() const {
Mysql_mutex_guard guard(&m_snapshot_mutex);
uint32_t num_blocks = 0;
switch (m_snapshot_state) {
case CLONE_SNAPSHOT_PAGE_COPY:
num_blocks = chunk_size();
break;
case CLONE_SNAPSHOT_FILE_COPY:
[[fallthrough]];
case CLONE_SNAPSHOT_REDO_COPY:
num_blocks = blocks_per_chunk();
break;
default:
/* purecov: begin deadcode */
num_blocks = 0;
break;
/* purecov: end */
}
return num_blocks;
}
int Clone_Snapshot::change_state(Clone_Desc_State *state_desc,
Snapshot_State new_state, byte *temp_buffer,
uint temp_buffer_len, Clone_Alert_Func cbk) {
ut_ad(m_snapshot_state != CLONE_SNAPSHOT_NONE);
int err = 0;
m_num_current_chunks = 0;
if (!is_copy()) {
err = init_apply_state(state_desc);
return (err);
}
switch (new_state) {
case CLONE_SNAPSHOT_NONE:
case CLONE_SNAPSHOT_INIT:
err = ER_INTERNAL_ERROR;
my_error(err, MYF(0), "Innodb Clone Snapshot Invalid state");
ut_d(ut_error);
break;
case CLONE_SNAPSHOT_FILE_COPY:
ib::info() << "Clone State BEGIN FILE COPY";
err = init_file_copy(new_state);
DEBUG_SYNC_C("clone_start_page_archiving");
DBUG_EXECUTE_IF("clone_crash_during_page_archiving", DBUG_SUICIDE(););
break;
case CLONE_SNAPSHOT_PAGE_COPY:
ib::info() << "Clone State BEGIN PAGE COPY";
err = init_page_copy(new_state, temp_buffer, temp_buffer_len);
DEBUG_SYNC_C("clone_start_redo_archiving");
break;
case CLONE_SNAPSHOT_REDO_COPY:
ib::info() << "Clone State BEGIN REDO COPY";
err = init_redo_copy(new_state, cbk);
break;
case CLONE_SNAPSHOT_DONE: {
ib::info() << "Clone State DONE ";
State_transit transit_guard(this, new_state);
m_monitor.init_state(PSI_NOT_INSTRUMENTED, m_enable_pfs);
m_redo_ctx.release();
err = transit_guard.get_error();
break;
}
}
return err;
}
Clone_file_ctx *Clone_Snapshot::get_file(Clone_File_Vec &file_vector,
uint32_t chunk_num,
uint32_t start_index) {
Clone_file_ctx *current_file = nullptr;
uint idx;
auto num_files = file_vector.size();
/* Scan through the file vector matching chunk number. */
for (idx = start_index; idx < num_files; idx++) {
current_file = file_vector[idx];
auto file_meta = current_file->get_file_meta();
ut_ad(chunk_num >= file_meta->m_begin_chunk);
if (chunk_num <= file_meta->m_end_chunk) {
break;
}
}
return (current_file);
}
void Clone_Snapshot::skip_deleted_blocks(uint32_t chunk_num,
uint32_t &block_num) {
/* For file copy entire chunk can be ignored because chunk
doesn't span across files. */
if (m_snapshot_state != CLONE_SNAPSHOT_PAGE_COPY) {
ut_ad(m_snapshot_state == CLONE_SNAPSHOT_FILE_COPY);
block_num = 0;
return;
}
const auto *cur_file_ctx = get_page_file_ctx(chunk_num, block_num);
const auto *next_file_ctx = cur_file_ctx;
ut_ad(cur_file_ctx->deleted());
/* Skip over the deleted file pages of current file context. */
while (next_file_ctx == cur_file_ctx) {
++block_num;
next_file_ctx = get_page_file_ctx(chunk_num, block_num);
/* End of current chunk. */
if (next_file_ctx == nullptr || block_num >= chunk_size()) {
block_num = 0;
break;
}
}
}
int Clone_Snapshot::get_next_page(uint chunk_num, uint &block_num,
const Clone_file_ctx *&file_ctx,
uint64_t &data_offset, byte *&data_buf,
uint32_t &data_size, uint64_t &file_size) {
ut_ad(data_size >= UNIV_PAGE_SIZE);
file_size = 0;
ut_ad(file_ctx->is_pinned());
ut_ad(block_num < chunk_size());
/* For "page copy", each block is a page. */
uint32_t page_index = chunk_size() * (chunk_num - 1);
page_index += block_num;
ut_a(page_index < m_page_vector.size());
auto clone_page = m_page_vector[page_index];
++block_num;
/* Get the data file for current page. */
auto file_meta = file_ctx->get_file_meta_read();
ut_ad(file_meta->m_space_id == clone_page.m_space_id);
/* Data offset could be beyond 32 BIT integer. */
data_offset = static_cast<uint64_t>(clone_page.m_page_no);
uint32_t page_size= fil_space_t::physical_size(file_meta->m_fsp_flags);
data_offset*= page_size;
auto file_index = file_meta->m_file_index;
/* Check if the page belongs to other nodes of the tablespace. */
while (num_data_files() > file_index + 1) {
const auto file_next = m_data_file_vector[file_index + 1];
const auto file_meta_next = file_next->get_file_meta();
/* Next node belongs to same tablespace and data offset
exceeds current node size */
if (file_meta_next->m_space_id == file_meta->m_space_id &&
data_offset >= file_meta->m_file_size) {
data_offset -= file_meta->m_file_size;
file_meta = file_meta_next;
file_index = file_meta->m_file_index;
file_ctx = file_next;
} else {
break;
}
}
/* Get page from buffer pool. */
page_id_t page_id(clone_page.m_space_id, clone_page.m_page_no);
auto err =
get_page_for_write(page_id, page_size, file_ctx, data_buf, data_size);
/* Update size from space header page. */
if (clone_page.m_page_no == 0) {
auto space_size = fsp_header_get_field(data_buf, FSP_SIZE);
auto size_bytes= static_cast<uint64_t>(space_size);
size_bytes*= page_size;
if (file_meta->m_file_size < size_bytes) {
file_size = size_bytes;
}
}
return (err);
}
void Clone_Snapshot::page_compress_encrypt(const Clone_File_Meta *file_meta,
byte *&page_data, uint32_t data_size,
ulint zip_size, bool full_crc32,
bool compress, bool encrypt,
uint32_t page_no)
{
auto encrypted_data= page_data + data_size;
/* Do transparent page compression if needed. */
if (compress)
{
auto compressed_data= page_data + data_size;
memset(compressed_data, 0, data_size);
auto len= fil_page_compress(page_data, compressed_data,
file_meta->m_fsp_flags, file_meta->m_fsblk_size, encrypt);
if (len > 0) {
encrypted_data= page_data;
page_data= compressed_data;
}
}
if (encrypt)
{
memset(encrypted_data, 0, data_size);
/* TODO: Pass encryption metadata. */
ut_ad(false);
page_data= fil_encrypt_buf(nullptr, file_meta->m_space_id, page_no,
page_data, zip_size, encrypted_data, full_crc32);
}
}
void Clone_Snapshot::page_update_for_flush(ulint zip_size, byte *&page_data,
bool full_crc32) {
/* For compressed table, must copy the compressed page. */
if (zip_size) {
page_zip_des_t page_zip;
auto data_size= zip_size;
page_zip_set_size(&page_zip, data_size);
page_zip.data = page_data;
ut_d(page_zip.m_start = 0);
page_zip.m_end = 0;
page_zip.n_blobs = 0;
page_zip.m_nonempty = false;
buf_flush_init_for_writing(nullptr, page_data, &page_zip, full_crc32);
} else {
buf_flush_init_for_writing(nullptr, page_data, nullptr, full_crc32);
}
}
int Clone_Snapshot::get_page_for_write(const page_id_t &page_id,
uint32_t page_size,
const Clone_file_ctx *file_ctx,
byte *&page_data, uint &data_size)
{
auto file_meta = file_ctx->get_file_meta_read();
mtr_t mtr;
mtr_start(&mtr);
ut_ad(data_size >= 2 * page_size);
data_size= page_size;
auto zip_size= fil_space_t::zip_size(file_meta->m_fsp_flags);
/* Space header page is modified with SX latch while extending. Also,
we would like to serialize with page flush to disk. */
auto block =
buf_page_get_gen(page_id, zip_size, RW_SX_LATCH, nullptr,
BUF_GET_POSSIBLY_FREED, &mtr);
auto bpage = &block->page;
ut_ad(!fsp_is_system_temporary(bpage->id().space()));
/* Get oldest and newest page modification LSN for dirty page. */
auto oldest_lsn = bpage->oldest_modification();
bool page_is_dirty= (oldest_lsn > 0);
byte *src_data= buf_block_get_frame(block);
if (bpage->zip.data)
/* If the page is not dirty, then zip descriptor always has the latest
flushed page copy with LSN and checksum set properly. If the page is
dirty, the latest modified page is in uncompressed form for uncompressed
page types. The LSN in such case is to be taken from block newest LSN and
checksum needs to be recalculated. */
if (!page_is_dirty || page_is_uncompressed_type(src_data))
src_data= bpage->zip.data;
memcpy(page_data, src_data, data_size);
auto cur_lsn = log_sys.get_lsn(std::memory_order_seq_cst);
auto frame_lsn=
static_cast<lsn_t>(mach_read_from_8(page_data + FIL_PAGE_LSN));
/* First page of a encrypted tablespace. */
/* TODO: Encryption metadata: Key*/
ut_ad(!file_meta->can_encrypt());
/* If the page is not dirty but frame LSN is zero, it could be half
initialized page left from incomplete operation. Assign valid LSN and checksum
before copy. */
if (frame_lsn == 0 && oldest_lsn == 0) {
page_is_dirty= true;
frame_lsn= cur_lsn;
mach_write_to_8(page_data + FIL_PAGE_LSN, frame_lsn);
}
bool full_crc32= fil_space_t::full_crc32(file_meta->m_fsp_flags);
auto page_no= page_id.page_no();
auto page_type= fil_page_get_type(page_data);
bool compression= file_meta->can_compress();
bool encryption= file_meta->can_encrypt();
/* Disable compression and encryption based on page number. */
if (page_no == 0
|| (page_id.space() == TRX_SYS_SPACE && page_no == TRX_SYS_PAGE_NO)) {
compression= false;
encryption= false;
}
/* Disable compression based on page type: fil_page_compress() */
if (page_type == 0 || page_type == FIL_PAGE_TYPE_FSP_HDR
|| page_type == FIL_PAGE_TYPE_XDES
|| page_type == FIL_PAGE_PAGE_COMPRESSED)
compression= false;
/* Disable encryption based on page type: fil_space_encrypt_valid_page_type() */
if (page_type == FIL_PAGE_TYPE_FSP_HDR || page_type == FIL_PAGE_TYPE_XDES
|| (page_type == FIL_PAGE_RTREE && !full_crc32))
encryption= false;
bool encrypt_before_checksum= !zip_size && full_crc32;
if (encrypt_before_checksum && (compression || encryption))
{
page_is_dirty= true;
page_compress_encrypt(file_meta, page_data, data_size, zip_size,
full_crc32, compression, encryption, page_no);
}
/* If page is dirty, we need to set checksum and page LSN. */
if (page_is_dirty) {
ut_ad(frame_lsn > 0);
page_update_for_flush(zip_size, page_data, full_crc32);
}
/* TODO: Validate checksum after updating page. */
// BlockReporter reporter(false, page_data, page_size, false);
const auto page_lsn=
static_cast<lsn_t>(mach_read_from_8(page_data + FIL_PAGE_LSN));
const auto page_checksum = static_cast<uint32_t>(
mach_read_from_4(page_data + FIL_PAGE_SPACE_OR_CHKSUM));
int err= 0;
if (/* reporter.is_corrupted() || */ page_lsn > cur_lsn ||
(page_checksum != 0 && page_lsn == 0)) {
my_error(ER_INTERNAL_ERROR, MYF(0), "Innodb Clone Corrupt Page");
err = ER_INTERNAL_ERROR;
ut_d(ut_error);
}
if (!encrypt_before_checksum && (compression || encryption))
page_compress_encrypt(file_meta, page_data, data_size, zip_size,
full_crc32, compression, encryption, page_no);
mtr_commit(&mtr);
return err;
}
uint32_t Clone_Snapshot::get_max_blocks_pin() const {
return (m_snapshot_state == CLONE_SNAPSHOT_PAGE_COPY) ? S_MAX_PAGES_PIN
: S_MAX_BLOCKS_PIN;
}
Clone_file_ctx *Clone_Snapshot::get_file_ctx(uint32_t chunk_num,
uint32_t block_num,
uint32_t hint_index) {
Clone_file_ctx *file = nullptr;
switch (m_snapshot_state) {
case CLONE_SNAPSHOT_FILE_COPY:
file = get_data_file_ctx(chunk_num, hint_index);
break;
case CLONE_SNAPSHOT_PAGE_COPY:
file = get_page_file_ctx(chunk_num, block_num);
break;
case CLONE_SNAPSHOT_REDO_COPY:
file = get_redo_file_ctx(chunk_num, hint_index);
break;
default:
ut_d(ut_error); /* purecov: deadcode */
}
return file;
}
Clone_file_ctx *Clone_Snapshot::get_data_file_ctx(uint32_t chunk_num,
uint32_t hint_index) {
return get_file(m_data_file_vector, chunk_num, hint_index);
}
Clone_file_ctx *Clone_Snapshot::get_redo_file_ctx(uint32_t chunk_num,
uint32_t hint_index) {
/* Last but one chunk is redo header */
if (chunk_num == (m_num_current_chunks - 1)) {
return m_redo_file_vector.front();
}
/* Last chunk is the redo trailer. */
if (chunk_num == m_num_current_chunks) {
return m_redo_file_vector.back();
}
return get_file(m_redo_file_vector, chunk_num, hint_index);
}
Clone_file_ctx *Clone_Snapshot::get_page_file_ctx(uint32_t chunk_num,
uint32_t block_num) {
/* Check if block is beyond the current chunk. */
if (block_num >= chunk_size()) {
ut_ad(block_num == chunk_size());
return nullptr;
}
auto page_index = chunk_size() * (chunk_num - 1);
page_index += block_num;
/* Check if all blocks are over. For last chunk, actual number of blocks
could be less than chunk_size. */
if (page_index >= m_page_vector.size()) {
ut_ad(page_index == m_page_vector.size());
return nullptr;
}
auto clone_page = m_page_vector[page_index];
auto file_index = m_data_file_map[clone_page.m_space_id];
if (file_index == 0) {
/* purecov: begin deadcode */
ut_d(ut_error);
return nullptr;
/* purecov: end */
}
--file_index;
auto page_file = get_file_ctx_by_index(file_index);
#ifdef UNIV_DEBUG
auto file_meta = page_file->get_file_meta();
ut_ad(file_meta->m_space_id == clone_page.m_space_id);
#endif // UNIV_DEBUG
return page_file;
}
void Clone_file_ctx::get_file_name(std::string &name) const {
name.assign(m_meta.m_file_name);
/* Add file name extension. */
switch (m_extension) {
case Extension::REPLACE:
name.append(CLONE_INNODB_REPLACED_FILE_EXTN);
break;
case Extension::DDL:
name.append(CLONE_INNODB_DDL_FILE_EXTN);
break;
case Extension::NONE:
default:
break;
}
}
bool Clone_Snapshot::begin_ddl_state(Clone_notify::Type type, space_id_t space,
bool no_wait, bool check_intr,
int &error) {
Mysql_mutex_guard guard(&m_snapshot_mutex);
error = 0;
bool blocked = false;
for (;;) {
mysql_mutex_assert_owner(&m_snapshot_mutex);
auto state = get_state();
switch (state) {
case CLONE_SNAPSHOT_NONE:
/* purecov: begin deadcode */
/* Clone must have started at this point. */
ut_d(ut_error);
break;
/* purecov: end */
case CLONE_SNAPSHOT_INIT:
/* Fall through. */
case CLONE_SNAPSHOT_FILE_COPY:
/* Allow clone to enter next stage only after the DDL file operation
is complete. */
blocked = block_state_change(type, space, no_wait, check_intr, error);
mysql_mutex_assert_owner(&m_snapshot_mutex);
if (error != 0) {
/* We should not have blocked in case of error but it is not fatal. */
ut_ad(!blocked);
break;
}
if (state != get_state()) {
/* purecov: begin inspected */
/* State is modified. Start again and recheck. This is safe
as clone has to eventually exit from the above two states. */
ut_ad(!blocked);
continue;
/* purecov: end */
}
ut_ad(blocked);
if (state == CLONE_SNAPSHOT_FILE_COPY) {
error = begin_ddl_file(type, space, no_wait, check_intr);
}
break;
case CLONE_SNAPSHOT_PAGE_COPY:
/* 1. Bulk operation currently need to wait if clone has entered page
copy. This is because bulk changes don't generate any redo log.
2. We don't let new encryption alter to begin during page copy state.
We currently cannot handle encryption key in redo log which is
encrypted by donor master key. */
ut_ad(!blocked);
if (type == Clone_notify::Type::SPACE_ALTER_INPLACE_BULK ||
type == Clone_notify::Type::SPACE_ALTER_ENCRYPT_GENERAL ||
type == Clone_notify::Type::SPACE_ALTER_ENCRYPT) {
error =
wait(Wait_type::STATE_END_PAGE_COPY, nullptr, false, check_intr);
break;
}
/* Try to block state change. If state is already modified then nothing
to do as the next states don't require blocking. */
blocked = block_state_change(type, space, no_wait, check_intr, error);
if (error != 0 || state != get_state()) {
/* We should not have blocked in case of error but it is not fatal. */
ut_ad(!blocked);
break;
}
ut_ad(blocked);
error = begin_ddl_file(type, space, no_wait, check_intr);
break;
case CLONE_SNAPSHOT_REDO_COPY:
/* Snapshot end point is already taken. This changes are not part of
snapshot. */
break;
case CLONE_SNAPSHOT_DONE:
/* Clone has already finished. */
break;
default:
/* purecov: begin deadcode */
ut_d(ut_error);
break;
/* purecov: end */
}
break;
} /* purecov: inspected */
/* Unblock clone, in case of error. */
if (blocked && error != 0) {
/* purecov: begin inspected */
unblock_state_change();
blocked = false;
/* purecov: end */
}
return blocked;
}
void Clone_Snapshot::end_ddl_state(Clone_notify::Type type, space_id_t space) {
/* Caller is responsible to call if we have blocked state change. */
Mysql_mutex_guard guard(&m_snapshot_mutex);
auto state = get_state();
if (state == CLONE_SNAPSHOT_FILE_COPY || state == CLONE_SNAPSHOT_PAGE_COPY) {
end_ddl_file(type, space);
}
unblock_state_change();
}
void Clone_Snapshot::get_wait_mesg(Wait_type wait_type, std::string &info,
std::string &error) {
switch (wait_type) {
case Wait_type::STATE_TRANSIT_WAIT:
break;
case Wait_type::STATE_TRANSIT:
info.assign("DDL waiting for clone state transition");
error.assign("DDL wait for clone state transition timed out");
break;
case Wait_type::STATE_END_PAGE_COPY:
info.assign("DDL waiting for Clone PAGE COPY to finish");
error.assign("DDL wait for Clone PAGE COPY timed out");
break;
case Wait_type::STATE_BLOCKER:
info.assign("Clone state transition waiting for DDL file operation");
error.assign(
"Clone state transition wait for DDL file operation timed out");
break;
case Wait_type::DATA_FILE_WAIT:
info.assign("DDL waiting for clone threads to exit from previous wait");
error.assign(
"DDL wait for clone threads to exit from wait state timed out");
break;
case Wait_type::DATA_FILE_CLOSE:
info.assign("DDL waiting for clone to close the open data file");
error.assign("DDL wait for clone data file close timed out");
break;
case Wait_type::DDL_FILE_OPERATION:
info.assign("Clone waiting for DDL file operation");
error.assign("Clone wait for DDL file operation timed out");
break;
default:
ut_d(ut_error); /* purecov: deadcode */
}
}
const char *Clone_Snapshot::wait_string(Wait_type wait_type) const {
const char *wait_info = nullptr;
switch (wait_type) {
/* DDL waiting for clone state transition */
case Wait_type::STATE_TRANSIT_WAIT:
[[fallthrough]];
case Wait_type::STATE_TRANSIT:
wait_info = "Waiting for clone state transition";
break;
/* DDL waiting till Clone PAGE COPY state is over. */
case Wait_type::STATE_END_PAGE_COPY:
wait_info = "Waiting for clone PAGE_COPY state";
break;
/*DDL waiting for clone file operation. */
case Wait_type::DATA_FILE_WAIT:
[[fallthrough]];
case Wait_type::DATA_FILE_CLOSE:
wait_info = "Waiting for clone to close files";
break;
/* Clone waiting for DDL. */
case Wait_type::DDL_FILE_OPERATION:
wait_info = "Waiting for ddl file operation";
break;
case Wait_type::STATE_BLOCKER:
wait_info = "Waiting for ddl before state transition";
[[fallthrough]];
default:
break;
}
return wait_info;
}
int Clone_Snapshot::wait(Wait_type wait_type, const Clone_file_ctx *ctx,
bool no_wait, bool check_intr) {
mysql_mutex_assert_owner(&m_snapshot_mutex);
std::string info_mesg;
std::string error_mesg;
get_wait_mesg(wait_type, info_mesg, error_mesg);
auto wait_cond = [&](bool alert, bool &wait) {
mysql_mutex_assert_owner(&m_snapshot_mutex);
bool early_exit = false;
switch (wait_type) {
case Wait_type::STATE_TRANSIT_WAIT:
wait = in_transit_wait();
/* For state transition wait by DDL, exit on alert to avoid
possible deadlock between DDLs. */
early_exit = true;
break;
case Wait_type::STATE_TRANSIT:
wait = in_transit_state();
break;
case Wait_type::STATE_END_PAGE_COPY:
/* If clone has aborted, don't wait for state to end. */
wait = !is_aborted() && (get_state() == CLONE_SNAPSHOT_PAGE_COPY);
DBUG_EXECUTE_IF("clone_ddl_abort_wait_page_copy", {
if (wait) {
my_error(ER_INTERNAL_ERROR, MYF(0), "Simulated Clone DDL error");
return ER_INTERNAL_ERROR;
}
});
break;
case Wait_type::STATE_BLOCKER:
wait = (m_num_blockers > 0);
break;
case Wait_type::DATA_FILE_WAIT:
wait = ctx->is_waiting();
early_exit = true;
break;
case Wait_type::DATA_FILE_CLOSE:
wait = ctx->is_pinned();
break;
case Wait_type::DDL_FILE_OPERATION:
wait = blocks_clone(ctx);
break;
default:
/* purecov: begin deadcode */
wait = false;
ut_d(ut_error);
/* purecov: end */
}
if (wait) {
if (no_wait || (alert && early_exit)) {
return ER_STATEMENT_TIMEOUT; /* purecov: inspected */
}
if (alert) {
ib::info() << info_mesg; /* purecov: tested */
}
if (check_intr && thd_killed(current_thd)) {
/* For early exit the caller would ignore error. */
if (!early_exit) {
my_error(ER_QUERY_INTERRUPTED, MYF(0));
}
return ER_QUERY_INTERRUPTED;
}
}
return 0;
};
/* SET THD information string to display waiting state in PROCESS LIST. */
Clone_Sys::Wait_stage wait_guard(wait_string(wait_type));
bool is_timeout = false;
int err = 0;
/* Increase the defaults to wait more while waiting for page copy state. */
if (wait_type == Wait_type::STATE_END_PAGE_COPY) {
/* Generate alert message every 5 minutes. */
Clone_Sec alert_interval(Clone_Min(5));
/* Wait for 2 hours for clone to finish. */
Clone_Sec time_out(Clone_Min(120));
err = Clone_Sys::wait(CLONE_DEF_SLEEP, time_out, alert_interval, wait_cond,
&m_snapshot_mutex, is_timeout);
} else {
err = Clone_Sys::wait_default(wait_cond, &m_snapshot_mutex, is_timeout);
}
if (!err && is_timeout) {
/* purecov: begin deadcode */
err = ER_INTERNAL_ERROR;
my_error(err, MYF(0), error_mesg.c_str());
ut_d(ut_error);
/* purecov: end */
}
return err;
}
bool Clone_Snapshot::block_state_change(Clone_notify::Type type,
space_id_t space, bool no_wait,
bool check_intr, int &error) {
mysql_mutex_assert_owner(&m_snapshot_mutex);
bool undo_ddl_ntfn = (type == Clone_notify::Type::SPACE_UNDO_DDL);
bool undo_space= srv_is_undo_tablespace(space);
/* For undo DDL, there could be recursive notification for file create
and drop which are !undo_ddl_ntfn. For such notifications we don't need
to wait for clone as we must have already blocked it. */
bool wait_clone = (!undo_space || undo_ddl_ntfn);
/* If no wait option is used, override any waiting clone. Used for undo
truncate background currently. We don't want to block purge threads. */
if (no_wait) {
wait_clone = false;
}
auto saved_state = get_state();
/* Wait for the waiting clone. That is if clone is blocked by other DDL and
waiting. This is an attempt to prevent starvation of clone by DDLs. We wait
here for limited time to prevent possible deadlock between DDLs.
e.g. DDL-2 <- DDL-1 (Critical section) <- Clone <- DDL-2. */
if (wait_clone) {
static_cast<void>(
wait(Wait_type::STATE_TRANSIT_WAIT, nullptr, false, false));
mysql_mutex_assert_owner(&m_snapshot_mutex);
if (saved_state != get_state()) {
/* State is modified. Return for possible recheck. */
return false; /* purecov: inspected */
}
}
/* Wait for state transition to get over. */
error = wait(Wait_type::STATE_TRANSIT, nullptr, no_wait, check_intr);
if (error != 0) {
return false;
}
mysql_mutex_assert_owner(&m_snapshot_mutex);
if (saved_state != get_state()) {
/* State is modified. Return for possible recheck. */
return false; /* purecov: inspected */
}
mysql_mutex_assert_owner(&m_snapshot_mutex);
++m_num_blockers;
return true;
}
inline void Clone_Snapshot::unblock_state_change() {
mysql_mutex_assert_owner(&m_snapshot_mutex);
--m_num_blockers;
}
Clone_file_ctx::State Clone_Snapshot::get_target_file_state(
Clone_notify::Type type, bool begin) {
auto target_state = Clone_file_ctx::State::NONE;
switch (type) {
case Clone_notify::Type::SPACE_DROP:
target_state = begin ? Clone_file_ctx::State::DROPPING
: Clone_file_ctx::State::DROPPED;
break;
case Clone_notify::Type::SPACE_RENAME:
target_state = begin ? Clone_file_ctx::State::RENAMING
: Clone_file_ctx::State::RENAMED;
break;
default:
target_state = Clone_file_ctx::State::NONE;
break;
}
return target_state;
}
bool Clone_Snapshot::blocks_clone(const Clone_file_ctx *file_ctx) {
bool block = false;
auto clone_state = get_state();
switch (clone_state) {
case CLONE_SNAPSHOT_FILE_COPY:
/* Block clone operation for both rename and delete operation
as we directly access the file. */
block = file_ctx->modifying();
break;
case CLONE_SNAPSHOT_PAGE_COPY:
/* Block clone operation only if deleting. In page copy state we don't
bother about space/file rename. If the page is not found in buffer pool,
it would need to be read from underlying file but this IO needs to be
synchronized with file operation irrespective of clone. */
block = file_ctx->deleting();
break;
default:
block = false;
break;
}
return block;
}
int Clone_Snapshot::begin_ddl_file(Clone_notify::Type type, space_id_t space,
bool no_wait, bool check_intr) {
mysql_mutex_assert_owner(&m_snapshot_mutex);
ut_ad(get_state() == CLONE_SNAPSHOT_FILE_COPY ||
get_state() == CLONE_SNAPSHOT_PAGE_COPY);
auto target_state = get_target_file_state(type, true);
/* The type doesn't need any file operation. */
if (target_state == Clone_file_ctx::State::NONE) {
return 0;
}
auto count = m_data_file_map.count(space);
/* The space is added concurrently and then modified again. */
if (count == 0) {
return 0;
}
/* If the space is already added for clone, we would have that in the map
with a valid file index (starts from 1). */
auto file_index = m_data_file_map[space];
if (file_index == 0) {
/* purecov: begin deadcode */
ut_d(ut_error);
return 0;
/* purecov: end */
}
--file_index;
auto file_ctx = get_file_ctx_by_index(file_index);
auto saved_state = file_ctx->m_state.load();
ut_ad(saved_state != Clone_file_ctx::State::DROPPING);
ut_ad(saved_state != Clone_file_ctx::State::RENAMING);
ut_ad(saved_state != Clone_file_ctx::State::DROPPED);
file_ctx->m_state.store(target_state);
/* Wait for all data files to be closed by clone threads. */
if (blocks_clone(file_ctx)) {
auto err = wait(Wait_type::DATA_FILE_CLOSE, file_ctx, no_wait, check_intr);
if (err != 0) {
/* purecov: begin inspected */
file_ctx->m_state.store(saved_state);
return err;
/* purecov: end */
}
}
return 0;
}
void Clone_Snapshot::end_ddl_file(Clone_notify::Type type, space_id_t space) {
mysql_mutex_assert_owner(&m_snapshot_mutex);
ut_ad(get_state() == CLONE_SNAPSHOT_FILE_COPY ||
get_state() == CLONE_SNAPSHOT_PAGE_COPY);
auto target_state = get_target_file_state(type, false);
/* The type doesn't need any file operation. */
if (target_state == Clone_file_ctx::State::NONE) {
return;
}
auto count = m_data_file_map.count(space);
/* The space is added concurrently and then modified again. */
if (count == 0) {
return;
}
uint32_t file_index = m_data_file_map[space];
if (file_index == 0) {
/* purecov: begin deadcode */
ut_d(ut_error);
return;
/* purecov: end */
}
--file_index;
auto file_ctx = get_file_ctx_by_index(file_index);
auto file_meta = file_ctx->get_file_meta();
file_ctx->set_ddl(get_next_state());
if (type == Clone_notify::Type::SPACE_DROP) {
file_meta->m_deleted = true;
file_ctx->m_state.store(target_state);
return;
}
bool blocking_clone = blocks_clone(file_ctx);
/* We need file handling for drop and rename. */
ut_ad(type == Clone_notify::Type::SPACE_RENAME);
file_meta->m_renamed = true;
file_ctx->m_state.store(target_state);
if (blocking_clone) {
auto fil_space = fil_space_get(space);
ut_ad(UT_LIST_GET_LEN(fil_space->chain) == 1);
auto node= UT_LIST_GET_FIRST(fil_space->chain);
build_file_name(file_meta, node->name);
/* Wait for any previously waiting clone threads to restart. This is to
avoid starvation of clone by repeated renames. We ignore any error. Although
not expected there is no functional impact of a timeout here. */
static_cast<void>(wait(Wait_type::DATA_FILE_WAIT, file_ctx, false, false));
}
}
bool Clone_Snapshot::update_deleted_state(Clone_file_ctx *file_ctx) {
mysql_mutex_assert_owner(&m_snapshot_mutex);
if (file_ctx->m_state == Clone_file_ctx::State::DROPPED_HANDLED) {
return false;
}
ut_ad(file_ctx->m_state == Clone_file_ctx::State::DROPPED);
/* The deleted file to be handled by current task. Set the
state here so that other tasks can ignore the deleted file. */
file_ctx->m_state = Clone_file_ctx::State::DROPPED_HANDLED;
return true;
}
int Clone_Snapshot::pin_file(Clone_file_ctx *file_ctx, bool &handle_delete) {
handle_delete = false;
file_ctx->pin();
/* Quick return without acquiring mutex if no DDL. */
if (!blocks_clone(file_ctx)) {
/* Check and update deleted state. */
if (file_ctx->deleted()) {
Mysql_mutex_guard guard(&m_snapshot_mutex);
handle_delete = update_deleted_state(file_ctx);
}
return 0;
}
file_ctx->unpin();
Mysql_mutex_guard guard(&m_snapshot_mutex);
if (!blocks_clone(file_ctx)) {
/* purecov: begin inspected */
file_ctx->pin();
/* Check and update deleted state. */
if (file_ctx->deleted()) {
handle_delete = update_deleted_state(file_ctx);
}
return 0;
/* purecov: end */
}
file_ctx->begin_wait();
/* Wait for DDL file operation to complete. */
auto err = wait(Wait_type::DDL_FILE_OPERATION, file_ctx, false, true);
if (err == 0) {
file_ctx->pin();
/* Check and update deleted state. */
if (file_ctx->deleted()) {
handle_delete = update_deleted_state(file_ctx);
}
}
file_ctx->end_wait();
return err;
}