mirror of
https://github.com/MariaDB/server.git
synced 2025-09-25 18:39:15 +02:00

The innodb_encrypt_log=ON subformat of FORMAT_10_8 is inefficient, because a new encryption or decryption context is being set up for every log record payload snippet. An in-place conversion between the old and new innodb_encrypt_log=ON format is technically possible. No such conversion has been implemented, though. There is some overhead with respect to the unencrypted format (innodb_encrypt_log=OFF): At the end of each mini-transaction, right before the CRC-32C, additional 8 bytes will be reserved for a nonce (really, log_sys.get_flushed_lsn()), which forms a part of an initialization vector. log_t::FORMAT_ENC_11: The new format identifier, a UTF-8 encoding of 🗝 U+1F5DD OLD KEY (encryption). In this format, everything except the types and lengths of log records will be encrypted. Thus, unlike in FORMAT_10_8, also page identifiers and FILE_ records will be encrypted. The initialization vector (IV) consists of the 8-byte nonce as well as the type and length byte(s) of the first record of the mini-transaction. Page identifiers will no longer form any part of the IV. The old log_t::FORMAT_ENC_10_8 (innodb_encrypt_log=ON) will be supported both by mariadb-backup and by crash recovery. Downgrade from the new format will only be possible if the new server has been running or restarted with innodb_encrypt_log=OFF. If innodb_encrypt_log=ON, only the new log_t::FORMAT_ENC_11 will be written. log_t::is_recoverable(): A new predicate, which holds for all 3 formats. recv_sys_t::tmp_buf: A heap-allocated buffer for decrypting a mini-transaction, or for making the wrap-around of a memory-mapped log file contiguous. recv_sys_t::start_lsn: The start of the mini-transaction. Updated at the start of parse_tail(). log_decrypt_mtr(): Decrypt a mini-transaction in recv_sys.tmp_buf. Theoretically, when reading the log via pread() rather than a read-only memory mapping, we could modify the contents of log_sys.buf in place. If we did that, we would have to re-read the last log block into log_sys.buf before resuming writes, because otherwise that block could be re-written as a mix of old decrypted data and new encrypted data, which would cause a subsequent recovery failure unless the log checkpoint had been advanced beyond this point. log_decrypt_legacy(): Decrypt a log_t::FORMAT_ENC_10_8 record snippet on stack. Replaces recv_buf::copy_if_needed(). recv_sys_t::get_backup_parser(): Return a recv_sys_t::parser, that is, a pointer to an instantiation of parse_mmap or parse_mtr for the current log format. recv_sys_t::parse_mtr(), recv_sys_t::parse_mmap(): Add a parameter template<uint32_t> for the current log_sys.format. log_parse_start(): Validate the CRC-32C of a mini-transaction. This has been split from the recv_sys_t::parse() template to reduce code duplication. These two are the lowest-level functions that will be instantiated for both recv_buf and recv_ring. recv_sys_t::parse(): Split into ::log_parse_start() and parse_tail(). Add a parameter template<uint32_t format> to specialize for log_sys.format at compilation time. recv_sys_t::parse_tail(): Operate on pointers to contiguous mini-transaction data. Use a parameter template<bool ENC_10_8> for special handling of the old innodb_encrypt_log=ON format. The former recv_buf::get_buf() is being inlined here. Much of the logic is split into non-inline functions, to avoid duplicating a lot of code for every template expansion. log_crypt: Encrypt or decrypt a mini-transaction in place in the new innodb_encrypt_log=ON format. We will use temporary buffers so that encryption_ctx_update() can be invoked on integer multiples of MY_AES_BLOCK_SIZE, except for the last bytes of the encrypted payload, which will be encrypted or decrypted in place thanks to ENCRYPTION_FLAG_NOPAD. log_crypt::append(): Invoke encryption_ctx_update() in MY_AES_BLOCK_SIZE (16-byte) blocks and scatter/gather shorter data blocks as needed. log_crypt::finish(), Handle the last (possibly incomplete) block as a special case, with ENCRYPTION_FLAG_NOPAD. mtr_t::parse_length(): Parse the length of a log record. mtr_t::encrypt(): Use log_crypt instead of the old log_encrypt_buf(). recv_buf::crc32c(): Add a parameter for the initial CRC-32C value. recv_sys_t::rewind(): Operate on pointers to the start of the mini-transaction and to the first skipped record. recv_sys_t::trim(): Declare as ATTRIBUTE_COLD so that this rarely invoked function will not be expanded inline in parse_tail(). recv_sys_t::parse_init(): Handle INIT_PAGE or FREE_PAGE while scanning to the end of the log. recv_sys_t::parse_page0(): Handle WRITE to FSP_SPACE_SIZE and FSP_SPACE_FLAGS. recv_sys_t::parse_store_if_exists(), recv_sys_t::parse_store(), recv_sys_t::parse_oom(): Handle page-level log records. mlog_decode_varint_length(): Make use of __builtin_clz() to avoid a loop when possible. mlog_decode_varint(): Define only on const byte*, as ATTRIBUTE_NOINLINE static because it is a rather large function. recv_buf::decode_varint(): Trivial wrapper for mlog_decode_varint(). recv_ring::decode_varint(): Special implementation. log_page_modify(): Note that a page will be modified in recovery. Split from recv_sys_t::parse_tail(). log_parse_file(): Handle non-page log records. log_record_corrupted(), log_unknown(), log_page_id_corrupted(): Common error reporting functions.
560 lines
18 KiB
C++
560 lines
18 KiB
C++
/*****************************************************************************
|
|
|
|
Copyright (c) 2019, 2023, MariaDB Corporation.
|
|
|
|
This program is free software; you can redistribute it and/or modify it under
|
|
the terms of the GNU General Public License as published by the Free Software
|
|
Foundation; version 2 of the License.
|
|
|
|
This program is distributed in the hope that it will be useful, but WITHOUT
|
|
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
|
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License along with
|
|
this program; if not, write to the Free Software Foundation, Inc.,
|
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
|
|
|
|
*****************************************************************************/
|
|
|
|
/**
|
|
@file include/mtr0log.h
|
|
Mini-transaction log record encoding and decoding
|
|
*******************************************************/
|
|
|
|
#pragma once
|
|
#include "mtr0mtr.h"
|
|
|
|
/** The smallest invalid page identifier for persistent tablespaces */
|
|
constexpr page_id_t end_page_id{SRV_SPACE_ID_UPPER_BOUND, 0};
|
|
|
|
/** The minimum 2-byte integer (0b10xxxxxx xxxxxxxx) */
|
|
constexpr uint32_t MIN_2BYTE= 1 << 7;
|
|
/** The minimum 3-byte integer (0b110xxxxx xxxxxxxx xxxxxxxx) */
|
|
constexpr uint32_t MIN_3BYTE= MIN_2BYTE + (1 << 14);
|
|
/** The minimum 4-byte integer (0b1110xxxx xxxxxxxx xxxxxxxx xxxxxxxx) */
|
|
constexpr uint32_t MIN_4BYTE= MIN_3BYTE + (1 << 21);
|
|
/** Minimum 5-byte integer (0b11110000 xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx) */
|
|
constexpr uint32_t MIN_5BYTE= MIN_4BYTE + (1 << 28);
|
|
|
|
/** Encode an integer in a redo log record.
|
|
@param log redo log record buffer
|
|
@param i the integer to encode
|
|
@return end of the encoded integer */
|
|
inline byte *mlog_encode_varint(byte *log, size_t i)
|
|
{
|
|
#if defined __GNUC__ && !defined __clang__ && __GNUC__ < 6
|
|
# pragma GCC diagnostic push
|
|
# pragma GCC diagnostic ignored "-Wconversion" /* GCC 4 and 5 need this here */
|
|
#endif
|
|
if (i < MIN_2BYTE)
|
|
{
|
|
}
|
|
else if (i < MIN_3BYTE)
|
|
{
|
|
i-= MIN_2BYTE;
|
|
static_assert(MIN_3BYTE - MIN_2BYTE == 1 << 14, "compatibility");
|
|
*log++= 0x80 | static_cast<byte>(i >> 8);
|
|
}
|
|
else if (i < MIN_4BYTE)
|
|
{
|
|
i-= MIN_3BYTE;
|
|
static_assert(MIN_4BYTE - MIN_3BYTE == 1 << 21, "compatibility");
|
|
*log++= 0xc0 | static_cast<byte>(i >> 16);
|
|
goto last2;
|
|
}
|
|
else if (i < MIN_5BYTE)
|
|
{
|
|
i-= MIN_4BYTE;
|
|
static_assert(MIN_5BYTE - MIN_4BYTE == 1 << 28, "compatibility");
|
|
*log++= 0xe0 | static_cast<byte>(i >> 24);
|
|
goto last3;
|
|
}
|
|
else
|
|
{
|
|
ut_ad(i == uint32_t(i));
|
|
i-= MIN_5BYTE;
|
|
*log++= 0xf0;
|
|
*log++= static_cast<byte>(i >> 24);
|
|
last3:
|
|
*log++= static_cast<byte>(i >> 16);
|
|
last2:
|
|
*log++= static_cast<byte>(i >> 8);
|
|
}
|
|
#if defined __GNUC__ && !defined __clang__ && __GNUC__ < 6
|
|
# pragma GCC diagnostic pop
|
|
#endif
|
|
*log++= static_cast<byte>(i);
|
|
return log;
|
|
}
|
|
|
|
/** Write 1, 2, 4, or 8 bytes to a file page.
|
|
@param[in] block file page
|
|
@param[in,out] ptr pointer in file page
|
|
@param[in] val value to write
|
|
@tparam l number of bytes to write
|
|
@tparam w write request type
|
|
@tparam V type of val
|
|
@return whether any log was written */
|
|
template<unsigned l,mtr_t::write_type w,typename V>
|
|
inline bool mtr_t::write(const buf_block_t &block, void *ptr, V val)
|
|
{
|
|
ut_ad(ut_align_down(ptr, srv_page_size) == block.page.frame);
|
|
static_assert(l == 1 || l == 2 || l == 4 || l == 8, "wrong length");
|
|
byte buf[l];
|
|
|
|
switch (l) {
|
|
case 1:
|
|
ut_ad(val == static_cast<byte>(val));
|
|
buf[0]= static_cast<byte>(val);
|
|
break;
|
|
case 2:
|
|
ut_ad(val == static_cast<uint16_t>(val));
|
|
mach_write_to_2(buf, static_cast<uint16_t>(val));
|
|
break;
|
|
case 4:
|
|
ut_ad(val == static_cast<uint32_t>(val));
|
|
mach_write_to_4(buf, static_cast<uint32_t>(val));
|
|
break;
|
|
case 8:
|
|
mach_write_to_8(buf, val);
|
|
break;
|
|
}
|
|
byte *p= static_cast<byte*>(ptr);
|
|
const byte *const end= p + l;
|
|
if (w != FORCED && is_logged())
|
|
{
|
|
const byte *b= buf;
|
|
while (*p++ == *b++)
|
|
{
|
|
if (p == end)
|
|
{
|
|
ut_ad(w == MAYBE_NOP);
|
|
return false;
|
|
}
|
|
}
|
|
p--;
|
|
}
|
|
::memcpy(ptr, buf, l);
|
|
memcpy_low(block, uint16_t(p - block.page.frame), p, end - p);
|
|
return true;
|
|
}
|
|
|
|
/** Log an initialization of a string of bytes.
|
|
@param[in] b buffer page
|
|
@param[in] ofs byte offset from b->frame
|
|
@param[in] len length of the data to write
|
|
@param[in] val the data byte to write */
|
|
inline void mtr_t::memset(const buf_block_t &b, ulint ofs, ulint len, byte val)
|
|
{
|
|
ut_ad(len);
|
|
set_modified(b);
|
|
if (!is_logged())
|
|
return;
|
|
|
|
static_assert(MIN_4BYTE > UNIV_PAGE_SIZE_MAX, "consistency");
|
|
size_t lenlen= (len < MIN_2BYTE ? 1 + 1 : len < MIN_3BYTE ? 2 + 1 : 3 + 1);
|
|
byte *l= log_write<MEMSET>(b.page.id(), &b.page, lenlen, true, ofs);
|
|
l= mlog_encode_varint(l, len);
|
|
*l++= val;
|
|
m_log.close(l);
|
|
m_last_offset= static_cast<uint16_t>(ofs + len);
|
|
}
|
|
|
|
/** Initialize a string of bytes.
|
|
@param[in,out] b buffer page
|
|
@param[in] ofs byte offset from block->frame
|
|
@param[in] len length of the data to write
|
|
@param[in] val the data byte to write */
|
|
inline void mtr_t::memset(const buf_block_t *b, ulint ofs, ulint len, byte val)
|
|
{
|
|
ut_ad(ofs <= ulint(srv_page_size));
|
|
ut_ad(ofs + len <= ulint(srv_page_size));
|
|
::memset(ofs + b->page.frame, val, len);
|
|
memset(*b, ofs, len, val);
|
|
}
|
|
|
|
/** Log an initialization of a repeating string of bytes.
|
|
@param[in] b buffer page
|
|
@param[in] ofs byte offset from b->frame
|
|
@param[in] len length of the data to write, in bytes
|
|
@param[in] str the string to write
|
|
@param[in] size size of str, in bytes */
|
|
inline void mtr_t::memset(const buf_block_t &b, ulint ofs, size_t len,
|
|
const void *str, size_t size)
|
|
{
|
|
ut_ad(size);
|
|
ut_ad(len > size); /* use mtr_t::memcpy() for shorter writes */
|
|
set_modified(b);
|
|
if (!is_logged())
|
|
return;
|
|
|
|
static_assert(MIN_4BYTE > UNIV_PAGE_SIZE_MAX, "consistency");
|
|
size_t lenlen= (len < MIN_2BYTE ? 1 : len < MIN_3BYTE ? 2 : 3);
|
|
byte *l= log_write<MEMSET>(b.page.id(), &b.page, lenlen + size, true, ofs);
|
|
l= mlog_encode_varint(l, len);
|
|
::memcpy(l, str, size);
|
|
l+= size;
|
|
m_log.close(l);
|
|
m_last_offset= static_cast<uint16_t>(ofs + len);
|
|
}
|
|
|
|
/** Initialize a repeating string of bytes.
|
|
@param[in,out] b buffer page
|
|
@param[in] ofs byte offset from b->frame
|
|
@param[in] len length of the data to write, in bytes
|
|
@param[in] str the string to write
|
|
@param[in] size size of str, in bytes */
|
|
inline void mtr_t::memset(const buf_block_t *b, ulint ofs, size_t len,
|
|
const void *str, size_t size)
|
|
{
|
|
ut_ad(ofs <= ulint(srv_page_size));
|
|
ut_ad(ofs + len <= ulint(srv_page_size));
|
|
ut_ad(len > size); /* use mtr_t::memcpy() for shorter writes */
|
|
size_t s= 0;
|
|
while (s < len)
|
|
{
|
|
::memcpy(ofs + s + b->page.frame, str, size);
|
|
s+= len;
|
|
}
|
|
::memcpy(ofs + s + b->page.frame, str, len - s);
|
|
memset(*b, ofs, len, str, size);
|
|
}
|
|
|
|
/** Log a write of a byte string to a page.
|
|
@param[in] b buffer page
|
|
@param[in] offset byte offset from b->frame
|
|
@param[in] str the data to write
|
|
@param[in] len length of the data to write */
|
|
inline void mtr_t::memcpy(const buf_block_t &b, ulint offset, ulint len)
|
|
{
|
|
ut_ad(len);
|
|
ut_ad(offset <= ulint(srv_page_size));
|
|
ut_ad(offset + len <= ulint(srv_page_size));
|
|
memcpy_low(b, uint16_t(offset), &b.page.frame[offset], len);
|
|
}
|
|
|
|
/** Log a write of a byte string to a page.
|
|
@param block page
|
|
@param offset byte offset within page
|
|
@param data data to be written
|
|
@param len length of the data, in bytes */
|
|
inline void mtr_t::memcpy_low(const buf_block_t &block, uint16_t offset,
|
|
const void *data, size_t len)
|
|
{
|
|
ut_ad(len);
|
|
set_modified(block);
|
|
if (!is_logged())
|
|
return;
|
|
if (len < mtr_buf_t::MAX_DATA_SIZE - (1 + 3 + 3 + 5 + 5))
|
|
{
|
|
byte *end= log_write<WRITE>(block.page.id(), &block.page, len, true,
|
|
offset);
|
|
::memcpy(end, data, len);
|
|
m_log.close(end + len);
|
|
}
|
|
else
|
|
{
|
|
m_log.close(log_write<WRITE>(block.page.id(), &block.page, len, false,
|
|
offset));
|
|
m_log.push(static_cast<const byte*>(data), static_cast<uint32_t>(len));
|
|
}
|
|
m_last_offset= static_cast<uint16_t>(offset + len);
|
|
}
|
|
|
|
/** Log that a string of bytes was copied from the same page.
|
|
@param[in] b buffer page
|
|
@param[in] d destination offset within the page
|
|
@param[in] s source offset within the page
|
|
@param[in] len length of the data to copy */
|
|
inline void mtr_t::memmove(const buf_block_t &b, ulint d, ulint s, ulint len)
|
|
{
|
|
ut_ad(d >= 8);
|
|
ut_ad(s >= 8);
|
|
ut_ad(len);
|
|
ut_ad(s <= ulint(srv_page_size));
|
|
ut_ad(s + len <= ulint(srv_page_size));
|
|
ut_ad(s != d);
|
|
ut_ad(d <= ulint(srv_page_size));
|
|
ut_ad(d + len <= ulint(srv_page_size));
|
|
|
|
set_modified(b);
|
|
if (!is_logged())
|
|
return;
|
|
static_assert(MIN_4BYTE > UNIV_PAGE_SIZE_MAX, "consistency");
|
|
size_t lenlen= (len < MIN_2BYTE ? 1 : len < MIN_3BYTE ? 2 : 3);
|
|
/* The source offset is encoded relative to the destination offset,
|
|
with the sign in the least significant bit. */
|
|
if (s > d)
|
|
s= (s - d) << 1;
|
|
else
|
|
s= (d - s) << 1 | 1;
|
|
/* The source offset 0 is not possible. */
|
|
s-= 1 << 1;
|
|
size_t slen= (s < MIN_2BYTE ? 1 : s < MIN_3BYTE ? 2 : 3);
|
|
byte *l= log_write<MEMMOVE>(b.page.id(), &b.page, lenlen + slen, true, d);
|
|
l= mlog_encode_varint(l, len);
|
|
l= mlog_encode_varint(l, s);
|
|
m_log.close(l);
|
|
m_last_offset= static_cast<uint16_t>(d + len);
|
|
}
|
|
|
|
/**
|
|
Write a log record.
|
|
@tparam type redo log record type
|
|
@param id persistent page identifier
|
|
@param bpage buffer pool page, or nullptr
|
|
@param len number of additional bytes to write
|
|
@param alloc whether to allocate the additional bytes
|
|
@param offset byte offset, or 0 if the record type does not allow one
|
|
@return end of mini-transaction log, minus len */
|
|
template<byte type>
|
|
inline byte *mtr_t::log_write(const page_id_t id, const buf_page_t *bpage,
|
|
size_t len, bool alloc, size_t offset)
|
|
{
|
|
static_assert(!(type & 15) && type != RESERVED &&
|
|
type <= FILE_CHECKPOINT, "invalid type");
|
|
ut_ad(type >= FILE_CREATE || is_named_space(id.space()));
|
|
ut_ad(!bpage || bpage->id() == id);
|
|
ut_ad(id < end_page_id);
|
|
constexpr bool have_len= type != INIT_PAGE && type != FREE_PAGE;
|
|
constexpr bool have_offset= type == WRITE || type == MEMSET ||
|
|
type == MEMMOVE;
|
|
static_assert(!have_offset || have_len, "consistency");
|
|
ut_ad(have_len || len == 0);
|
|
ut_ad(have_len || !alloc);
|
|
ut_ad(have_offset || offset == 0);
|
|
ut_ad(offset + len <= srv_page_size);
|
|
static_assert(MIN_4BYTE >= UNIV_PAGE_SIZE_MAX, "consistency");
|
|
ut_ad(type == FREE_PAGE || type == OPTION || (type == EXTENDED && !bpage) ||
|
|
memo_contains_flagged(bpage, MTR_MEMO_MODIFY));
|
|
size_t max_len;
|
|
if (!have_len)
|
|
max_len= 1 + 5 + 5;
|
|
else if (!have_offset)
|
|
max_len= bpage && m_last == bpage
|
|
? 1 + 3
|
|
: 1 + 3 + 5 + 5;
|
|
else if (bpage && m_last == bpage && m_last_offset <= offset)
|
|
{
|
|
/* Encode the offset relative from m_last_offset. */
|
|
offset-= m_last_offset;
|
|
max_len= 1 + 3 + 3;
|
|
}
|
|
else
|
|
max_len= 1 + 3 + 5 + 5 + 3;
|
|
byte *const log_ptr= m_log.open(alloc ? max_len + len : max_len);
|
|
byte *end= log_ptr + 1;
|
|
const byte same_page= max_len < 1 + 5 + 5 ? 0x80 : 0;
|
|
if (!same_page)
|
|
{
|
|
end= mlog_encode_varint(end, id.space());
|
|
end= mlog_encode_varint(end, id.page_no());
|
|
m_last= bpage;
|
|
}
|
|
if (have_offset)
|
|
{
|
|
byte* oend= mlog_encode_varint(end, offset);
|
|
if (oend + len > &log_ptr[16])
|
|
{
|
|
len+= oend - log_ptr - 15;
|
|
if (len >= MIN_3BYTE - 1)
|
|
len+= 2;
|
|
else if (len >= MIN_2BYTE)
|
|
len++;
|
|
|
|
*log_ptr= type | same_page;
|
|
end= mlog_encode_varint(log_ptr + 1, len);
|
|
if (!same_page)
|
|
{
|
|
end= mlog_encode_varint(end, id.space());
|
|
end= mlog_encode_varint(end, id.page_no());
|
|
}
|
|
end= mlog_encode_varint(end, offset);
|
|
return end;
|
|
}
|
|
else
|
|
end= oend;
|
|
}
|
|
else if (len >= 3 && end + len > &log_ptr[16])
|
|
{
|
|
len+= end - log_ptr - 15;
|
|
if (len >= MIN_3BYTE - 1)
|
|
len+= 2;
|
|
else if (len >= MIN_2BYTE)
|
|
len++;
|
|
|
|
end= log_ptr;
|
|
*end++= type | same_page;
|
|
end= mlog_encode_varint(end, len);
|
|
|
|
if (!same_page)
|
|
{
|
|
end= mlog_encode_varint(end, id.space());
|
|
end= mlog_encode_varint(end, id.page_no());
|
|
}
|
|
return end;
|
|
}
|
|
|
|
ut_ad(end + len >= &log_ptr[1] + !same_page);
|
|
ut_ad(end + len <= &log_ptr[16]);
|
|
ut_ad(end <= &log_ptr[max_len]);
|
|
*log_ptr= type | same_page | static_cast<byte>(end + len - log_ptr - 1);
|
|
ut_ad(*log_ptr & 15);
|
|
return end;
|
|
}
|
|
|
|
/** Write a byte string to a page.
|
|
@param[in] b buffer page
|
|
@param[in] dest destination within b.frame
|
|
@param[in] str the data to write
|
|
@param[in] len length of the data to write
|
|
@tparam w write request type */
|
|
template<mtr_t::write_type w>
|
|
inline void mtr_t::memcpy(const buf_block_t &b, void *dest, const void *str,
|
|
ulint len)
|
|
{
|
|
ut_ad(ut_align_down(dest, srv_page_size) == b.page.frame);
|
|
byte *d= static_cast<byte*>(dest);
|
|
const char *s= static_cast<const char*>(str);
|
|
if (w != FORCED && is_logged())
|
|
{
|
|
ut_ad(len);
|
|
const byte *const end= d + len;
|
|
while (*d++ == *s++)
|
|
{
|
|
if (d == end)
|
|
{
|
|
ut_ad(w == MAYBE_NOP);
|
|
return;
|
|
}
|
|
}
|
|
s--;
|
|
d--;
|
|
len= static_cast<ulint>(end - d);
|
|
}
|
|
::memcpy(d, s, len);
|
|
memcpy(b, d - b.page.frame, len);
|
|
}
|
|
|
|
/** Write an EXTENDED log record.
|
|
@param block buffer pool page
|
|
@param type extended record subtype; @see mrec_ext_t */
|
|
inline void mtr_t::log_write_extended(const buf_block_t &block, byte type)
|
|
{
|
|
set_modified(block);
|
|
if (!is_logged())
|
|
return;
|
|
byte *l= log_write<EXTENDED>(block.page.id(), &block.page, 1, true);
|
|
*l++= type;
|
|
m_log.close(l);
|
|
m_last_offset= FIL_PAGE_TYPE;
|
|
}
|
|
|
|
/** Write log for partly initializing a B-tree or R-tree page.
|
|
@param block B-tree or R-tree page
|
|
@param comp false=ROW_FORMAT=REDUNDANT, true=COMPACT or DYNAMIC */
|
|
inline void mtr_t::page_create(const buf_block_t &block, bool comp)
|
|
{
|
|
static_assert(false == INIT_ROW_FORMAT_REDUNDANT, "encoding");
|
|
static_assert(true == INIT_ROW_FORMAT_DYNAMIC, "encoding");
|
|
log_write_extended(block, comp);
|
|
}
|
|
|
|
/** Write log for deleting a B-tree or R-tree record in ROW_FORMAT=REDUNDANT.
|
|
@param block B-tree or R-tree page
|
|
@param prev_rec byte offset of the predecessor of the record to delete,
|
|
starting from PAGE_OLD_INFIMUM */
|
|
inline void mtr_t::page_delete(const buf_block_t &block, ulint prev_rec)
|
|
{
|
|
ut_ad(!block.zip_size());
|
|
ut_ad(prev_rec < block.physical_size());
|
|
set_modified(block);
|
|
if (!is_logged())
|
|
return;
|
|
size_t len= (prev_rec < MIN_2BYTE ? 2 : prev_rec < MIN_3BYTE ? 3 : 4);
|
|
byte *l= log_write<EXTENDED>(block.page.id(), &block.page, len, true);
|
|
ut_d(byte *end= l + len);
|
|
*l++= DELETE_ROW_FORMAT_REDUNDANT;
|
|
l= mlog_encode_varint(l, prev_rec);
|
|
ut_ad(end == l);
|
|
m_log.close(l);
|
|
m_last_offset= FIL_PAGE_TYPE;
|
|
}
|
|
|
|
/** Write log for deleting a COMPACT or DYNAMIC B-tree or R-tree record.
|
|
@param block B-tree or R-tree page
|
|
@param prev_rec byte offset of the predecessor of the record to delete,
|
|
starting from PAGE_NEW_INFIMUM
|
|
@param prev_rec the predecessor of the record to delete
|
|
@param hdr_size record header size, excluding REC_N_NEW_EXTRA_BYTES
|
|
@param data_size data payload size, in bytes */
|
|
inline void mtr_t::page_delete(const buf_block_t &block, ulint prev_rec,
|
|
size_t hdr_size, size_t data_size)
|
|
{
|
|
ut_ad(!block.zip_size());
|
|
set_modified(block);
|
|
ut_ad(hdr_size < MIN_3BYTE);
|
|
ut_ad(prev_rec < block.physical_size());
|
|
ut_ad(data_size < block.physical_size());
|
|
if (!is_logged())
|
|
return;
|
|
size_t len= prev_rec < MIN_2BYTE ? 2 : prev_rec < MIN_3BYTE ? 3 : 4;
|
|
len+= hdr_size < MIN_2BYTE ? 1 : 2;
|
|
len+= data_size < MIN_2BYTE ? 1 : data_size < MIN_3BYTE ? 2 : 3;
|
|
byte *l= log_write<EXTENDED>(block.page.id(), &block.page, len, true);
|
|
ut_d(byte *end= l + len);
|
|
*l++= DELETE_ROW_FORMAT_DYNAMIC;
|
|
l= mlog_encode_varint(l, prev_rec);
|
|
l= mlog_encode_varint(l, hdr_size);
|
|
l= mlog_encode_varint(l, data_size);
|
|
ut_ad(end == l);
|
|
m_log.close(l);
|
|
m_last_offset= FIL_PAGE_TYPE;
|
|
}
|
|
|
|
/** Write log for initializing an undo log page.
|
|
@param block undo page */
|
|
inline void mtr_t::undo_create(const buf_block_t &block)
|
|
{
|
|
log_write_extended(block, UNDO_INIT);
|
|
}
|
|
|
|
/** Write log for appending an undo log record.
|
|
@param block undo page
|
|
@param data record within the undo page
|
|
@param len length of the undo record, in bytes */
|
|
inline void mtr_t::undo_append(const buf_block_t &block,
|
|
const void *data, size_t len)
|
|
{
|
|
ut_ad(len > 2);
|
|
set_modified(block);
|
|
if (!is_logged())
|
|
return;
|
|
const bool small= len + 1 < mtr_buf_t::MAX_DATA_SIZE - (1 + 3 + 3 + 5 + 5);
|
|
byte *end= log_write<EXTENDED>(block.page.id(), &block.page, len + 1, small);
|
|
if (UNIV_LIKELY(small))
|
|
{
|
|
*end++= UNDO_APPEND;
|
|
::memcpy(end, data, len);
|
|
m_log.close(end + len);
|
|
}
|
|
else
|
|
{
|
|
m_log.close(end);
|
|
*m_log.push<byte*>(1)= UNDO_APPEND;
|
|
m_log.push(static_cast<const byte*>(data), static_cast<uint32_t>(len));
|
|
}
|
|
m_last_offset= FIL_PAGE_TYPE;
|
|
}
|
|
|
|
/** Trim the end of a tablespace.
|
|
@param id first page identifier that will not be in the file */
|
|
inline void mtr_t::trim_pages(const page_id_t id)
|
|
{
|
|
if (!is_logged())
|
|
return;
|
|
byte *l= log_write<EXTENDED>(id, nullptr, 1, true);
|
|
*l++= TRIM_PAGES;
|
|
m_log.close(l);
|
|
set_trim_pages();
|
|
}
|