mirror of
				https://github.com/MariaDB/server.git
				synced 2025-10-31 10:56:12 +01:00 
			
		
		
		
	 cc277a7d24
			
		
	
	
	cc277a7d24
	
	
	
		
			
			The innodb_encrypt_log=ON subformat of FORMAT_10_8 is inefficient, because a new encryption or decryption context is being set up for every log record payload snippet. An in-place conversion between the old and new innodb_encrypt_log=ON format is technically possible. No such conversion has been implemented, though. There is some overhead with respect to the unencrypted format (innodb_encrypt_log=OFF): At the end of each mini-transaction, right before the CRC-32C, additional 8 bytes will be reserved for a nonce (really, log_sys.get_flushed_lsn()), which forms a part of an initialization vector. log_t::FORMAT_ENC_11: The new format identifier, a UTF-8 encoding of 🗝 U+1F5DD OLD KEY (encryption). In this format, everything except the types and lengths of log records will be encrypted. Thus, unlike in FORMAT_10_8, also page identifiers and FILE_ records will be encrypted. The initialization vector (IV) consists of the 8-byte nonce as well as the type and length byte(s) of the first record of the mini-transaction. Page identifiers will no longer form any part of the IV. The old log_t::FORMAT_ENC_10_8 (innodb_encrypt_log=ON) will be supported both by mariadb-backup and by crash recovery. Downgrade from the new format will only be possible if the new server has been running or restarted with innodb_encrypt_log=OFF. If innodb_encrypt_log=ON, only the new log_t::FORMAT_ENC_11 will be written. log_t::is_recoverable(): A new predicate, which holds for all 3 formats. recv_sys_t::tmp_buf: A heap-allocated buffer for decrypting a mini-transaction, or for making the wrap-around of a memory-mapped log file contiguous. recv_sys_t::start_lsn: The start of the mini-transaction. Updated at the start of parse_tail(). log_decrypt_mtr(): Decrypt a mini-transaction in recv_sys.tmp_buf. Theoretically, when reading the log via pread() rather than a read-only memory mapping, we could modify the contents of log_sys.buf in place. If we did that, we would have to re-read the last log block into log_sys.buf before resuming writes, because otherwise that block could be re-written as a mix of old decrypted data and new encrypted data, which would cause a subsequent recovery failure unless the log checkpoint had been advanced beyond this point. log_decrypt_legacy(): Decrypt a log_t::FORMAT_ENC_10_8 record snippet on stack. Replaces recv_buf::copy_if_needed(). recv_sys_t::get_backup_parser(): Return a recv_sys_t::parser, that is, a pointer to an instantiation of parse_mmap or parse_mtr for the current log format. recv_sys_t::parse_mtr(), recv_sys_t::parse_mmap(): Add a parameter template<uint32_t> for the current log_sys.format. log_parse_start(): Validate the CRC-32C of a mini-transaction. This has been split from the recv_sys_t::parse() template to reduce code duplication. These two are the lowest-level functions that will be instantiated for both recv_buf and recv_ring. recv_sys_t::parse(): Split into ::log_parse_start() and parse_tail(). Add a parameter template<uint32_t format> to specialize for log_sys.format at compilation time. recv_sys_t::parse_tail(): Operate on pointers to contiguous mini-transaction data. Use a parameter template<bool ENC_10_8> for special handling of the old innodb_encrypt_log=ON format. The former recv_buf::get_buf() is being inlined here. Much of the logic is split into non-inline functions, to avoid duplicating a lot of code for every template expansion. log_crypt: Encrypt or decrypt a mini-transaction in place in the new innodb_encrypt_log=ON format. We will use temporary buffers so that encryption_ctx_update() can be invoked on integer multiples of MY_AES_BLOCK_SIZE, except for the last bytes of the encrypted payload, which will be encrypted or decrypted in place thanks to ENCRYPTION_FLAG_NOPAD. log_crypt::append(): Invoke encryption_ctx_update() in MY_AES_BLOCK_SIZE (16-byte) blocks and scatter/gather shorter data blocks as needed. log_crypt::finish(), Handle the last (possibly incomplete) block as a special case, with ENCRYPTION_FLAG_NOPAD. mtr_t::parse_length(): Parse the length of a log record. mtr_t::encrypt(): Use log_crypt instead of the old log_encrypt_buf(). recv_buf::crc32c(): Add a parameter for the initial CRC-32C value. recv_sys_t::rewind(): Operate on pointers to the start of the mini-transaction and to the first skipped record. recv_sys_t::trim(): Declare as ATTRIBUTE_COLD so that this rarely invoked function will not be expanded inline in parse_tail(). recv_sys_t::parse_init(): Handle INIT_PAGE or FREE_PAGE while scanning to the end of the log. recv_sys_t::parse_page0(): Handle WRITE to FSP_SPACE_SIZE and FSP_SPACE_FLAGS. recv_sys_t::parse_store_if_exists(), recv_sys_t::parse_store(), recv_sys_t::parse_oom(): Handle page-level log records. mlog_decode_varint_length(): Make use of __builtin_clz() to avoid a loop when possible. mlog_decode_varint(): Define only on const byte*, as ATTRIBUTE_NOINLINE static because it is a rather large function. recv_buf::decode_varint(): Trivial wrapper for mlog_decode_varint(). recv_ring::decode_varint(): Special implementation. log_page_modify(): Note that a page will be modified in recovery. Split from recv_sys_t::parse_tail(). log_parse_file(): Handle non-page log records. log_record_corrupted(), log_unknown(), log_page_id_corrupted(): Common error reporting functions.
		
			
				
	
	
		
			560 lines
		
	
	
	
		
			18 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			560 lines
		
	
	
	
		
			18 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
| /*****************************************************************************
 | |
| 
 | |
| Copyright (c) 2019, 2023, MariaDB Corporation.
 | |
| 
 | |
| This program is free software; you can redistribute it and/or modify it under
 | |
| the terms of the GNU General Public License as published by the Free Software
 | |
| Foundation; version 2 of the License.
 | |
| 
 | |
| This program is distributed in the hope that it will be useful, but WITHOUT
 | |
| ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 | |
| FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 | |
| 
 | |
| You should have received a copy of the GNU General Public License along with
 | |
| this program; if not, write to the Free Software Foundation, Inc.,
 | |
| 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
 | |
| 
 | |
| *****************************************************************************/
 | |
| 
 | |
| /**
 | |
| @file include/mtr0log.h
 | |
| Mini-transaction log record encoding and decoding
 | |
| *******************************************************/
 | |
| 
 | |
| #pragma once
 | |
| #include "mtr0mtr.h"
 | |
| 
 | |
| /** The smallest invalid page identifier for persistent tablespaces */
 | |
| constexpr page_id_t end_page_id{SRV_SPACE_ID_UPPER_BOUND, 0};
 | |
| 
 | |
| /** The minimum 2-byte integer (0b10xxxxxx xxxxxxxx) */
 | |
| constexpr uint32_t MIN_2BYTE= 1 << 7;
 | |
| /** The minimum 3-byte integer (0b110xxxxx xxxxxxxx xxxxxxxx) */
 | |
| constexpr uint32_t MIN_3BYTE= MIN_2BYTE + (1 << 14);
 | |
| /** The minimum 4-byte integer (0b1110xxxx xxxxxxxx xxxxxxxx xxxxxxxx) */
 | |
| constexpr uint32_t MIN_4BYTE= MIN_3BYTE + (1 << 21);
 | |
| /** Minimum 5-byte integer (0b11110000 xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx) */
 | |
| constexpr uint32_t MIN_5BYTE= MIN_4BYTE + (1 << 28);
 | |
| 
 | |
| /** Encode an integer in a redo log record.
 | |
| @param log  redo log record buffer
 | |
| @param i    the integer to encode
 | |
| @return end of the encoded integer */
 | |
| inline byte *mlog_encode_varint(byte *log, size_t i)
 | |
| {
 | |
| #if defined __GNUC__ && !defined __clang__ && __GNUC__ < 6
 | |
| # pragma GCC diagnostic push
 | |
| # pragma GCC diagnostic ignored "-Wconversion" /* GCC 4 and 5 need this here */
 | |
| #endif
 | |
|   if (i < MIN_2BYTE)
 | |
|   {
 | |
|   }
 | |
|   else if (i < MIN_3BYTE)
 | |
|   {
 | |
|     i-= MIN_2BYTE;
 | |
|     static_assert(MIN_3BYTE - MIN_2BYTE == 1 << 14, "compatibility");
 | |
|     *log++= 0x80 | static_cast<byte>(i >> 8);
 | |
|   }
 | |
|   else if (i < MIN_4BYTE)
 | |
|   {
 | |
|     i-= MIN_3BYTE;
 | |
|     static_assert(MIN_4BYTE - MIN_3BYTE == 1 << 21, "compatibility");
 | |
|     *log++= 0xc0 | static_cast<byte>(i >> 16);
 | |
|     goto last2;
 | |
|   }
 | |
|   else if (i < MIN_5BYTE)
 | |
|   {
 | |
|     i-= MIN_4BYTE;
 | |
|     static_assert(MIN_5BYTE - MIN_4BYTE == 1 << 28, "compatibility");
 | |
|     *log++= 0xe0 | static_cast<byte>(i >> 24);
 | |
|     goto last3;
 | |
|   }
 | |
|   else
 | |
|   {
 | |
|     ut_ad(i == uint32_t(i));
 | |
|     i-= MIN_5BYTE;
 | |
|     *log++= 0xf0;
 | |
|     *log++= static_cast<byte>(i >> 24);
 | |
| last3:
 | |
|     *log++= static_cast<byte>(i >> 16);
 | |
| last2:
 | |
|     *log++= static_cast<byte>(i >> 8);
 | |
|   }
 | |
| #if defined __GNUC__ && !defined __clang__ && __GNUC__ < 6
 | |
| # pragma GCC diagnostic pop
 | |
| #endif
 | |
|   *log++= static_cast<byte>(i);
 | |
|   return log;
 | |
| }
 | |
| 
 | |
| /** Write 1, 2, 4, or 8 bytes to a file page.
 | |
| @param[in]      block   file page
 | |
| @param[in,out]  ptr     pointer in file page
 | |
| @param[in]      val     value to write
 | |
| @tparam l       number of bytes to write
 | |
| @tparam w       write request type
 | |
| @tparam V       type of val
 | |
| @return whether any log was written */
 | |
| template<unsigned l,mtr_t::write_type w,typename V>
 | |
| inline bool mtr_t::write(const buf_block_t &block, void *ptr, V val)
 | |
| {
 | |
|   ut_ad(ut_align_down(ptr, srv_page_size) == block.page.frame);
 | |
|   static_assert(l == 1 || l == 2 || l == 4 || l == 8, "wrong length");
 | |
|   byte buf[l];
 | |
| 
 | |
|   switch (l) {
 | |
|   case 1:
 | |
|     ut_ad(val == static_cast<byte>(val));
 | |
|     buf[0]= static_cast<byte>(val);
 | |
|     break;
 | |
|   case 2:
 | |
|     ut_ad(val == static_cast<uint16_t>(val));
 | |
|     mach_write_to_2(buf, static_cast<uint16_t>(val));
 | |
|     break;
 | |
|   case 4:
 | |
|     ut_ad(val == static_cast<uint32_t>(val));
 | |
|     mach_write_to_4(buf, static_cast<uint32_t>(val));
 | |
|     break;
 | |
|   case 8:
 | |
|     mach_write_to_8(buf, val);
 | |
|     break;
 | |
|   }
 | |
|   byte *p= static_cast<byte*>(ptr);
 | |
|   const byte *const end= p + l;
 | |
|   if (w != FORCED && is_logged())
 | |
|   {
 | |
|     const byte *b= buf;
 | |
|     while (*p++ == *b++)
 | |
|     {
 | |
|       if (p == end)
 | |
|       {
 | |
|         ut_ad(w == MAYBE_NOP);
 | |
|         return false;
 | |
|       }
 | |
|     }
 | |
|     p--;
 | |
|   }
 | |
|   ::memcpy(ptr, buf, l);
 | |
|   memcpy_low(block, uint16_t(p - block.page.frame), p, end - p);
 | |
|   return true;
 | |
| }
 | |
| 
 | |
| /** Log an initialization of a string of bytes.
 | |
| @param[in]      b       buffer page
 | |
| @param[in]      ofs     byte offset from b->frame
 | |
| @param[in]      len     length of the data to write
 | |
| @param[in]      val     the data byte to write */
 | |
| inline void mtr_t::memset(const buf_block_t &b, ulint ofs, ulint len, byte val)
 | |
| {
 | |
|   ut_ad(len);
 | |
|   set_modified(b);
 | |
|   if (!is_logged())
 | |
|     return;
 | |
| 
 | |
|   static_assert(MIN_4BYTE > UNIV_PAGE_SIZE_MAX, "consistency");
 | |
|   size_t lenlen= (len < MIN_2BYTE ? 1 + 1 : len < MIN_3BYTE ? 2 + 1 : 3 + 1);
 | |
|   byte *l= log_write<MEMSET>(b.page.id(), &b.page, lenlen, true, ofs);
 | |
|   l= mlog_encode_varint(l, len);
 | |
|   *l++= val;
 | |
|   m_log.close(l);
 | |
|   m_last_offset= static_cast<uint16_t>(ofs + len);
 | |
| }
 | |
| 
 | |
| /** Initialize a string of bytes.
 | |
| @param[in,out]  b       buffer page
 | |
| @param[in]      ofs     byte offset from block->frame
 | |
| @param[in]      len     length of the data to write
 | |
| @param[in]      val     the data byte to write */
 | |
| inline void mtr_t::memset(const buf_block_t *b, ulint ofs, ulint len, byte val)
 | |
| {
 | |
|   ut_ad(ofs <= ulint(srv_page_size));
 | |
|   ut_ad(ofs + len <= ulint(srv_page_size));
 | |
|   ::memset(ofs + b->page.frame, val, len);
 | |
|   memset(*b, ofs, len, val);
 | |
| }
 | |
| 
 | |
| /** Log an initialization of a repeating string of bytes.
 | |
| @param[in]      b       buffer page
 | |
| @param[in]      ofs     byte offset from b->frame
 | |
| @param[in]      len     length of the data to write, in bytes
 | |
| @param[in]      str     the string to write
 | |
| @param[in]      size    size of str, in bytes */
 | |
| inline void mtr_t::memset(const buf_block_t &b, ulint ofs, size_t len,
 | |
|                           const void *str, size_t size)
 | |
| {
 | |
|   ut_ad(size);
 | |
|   ut_ad(len > size); /* use mtr_t::memcpy() for shorter writes */
 | |
|   set_modified(b);
 | |
|   if (!is_logged())
 | |
|     return;
 | |
| 
 | |
|   static_assert(MIN_4BYTE > UNIV_PAGE_SIZE_MAX, "consistency");
 | |
|   size_t lenlen= (len < MIN_2BYTE ? 1 : len < MIN_3BYTE ? 2 : 3);
 | |
|   byte *l= log_write<MEMSET>(b.page.id(), &b.page, lenlen + size, true, ofs);
 | |
|   l= mlog_encode_varint(l, len);
 | |
|   ::memcpy(l, str, size);
 | |
|   l+= size;
 | |
|   m_log.close(l);
 | |
|   m_last_offset= static_cast<uint16_t>(ofs + len);
 | |
| }
 | |
| 
 | |
| /** Initialize a repeating string of bytes.
 | |
| @param[in,out]  b       buffer page
 | |
| @param[in]      ofs     byte offset from b->frame
 | |
| @param[in]      len     length of the data to write, in bytes
 | |
| @param[in]      str     the string to write
 | |
| @param[in]      size    size of str, in bytes */
 | |
| inline void mtr_t::memset(const buf_block_t *b, ulint ofs, size_t len,
 | |
|                           const void *str, size_t size)
 | |
| {
 | |
|   ut_ad(ofs <= ulint(srv_page_size));
 | |
|   ut_ad(ofs + len <= ulint(srv_page_size));
 | |
|   ut_ad(len > size); /* use mtr_t::memcpy() for shorter writes */
 | |
|   size_t s= 0;
 | |
|   while (s < len)
 | |
|   {
 | |
|     ::memcpy(ofs + s + b->page.frame, str, size);
 | |
|     s+= len;
 | |
|   }
 | |
|   ::memcpy(ofs + s + b->page.frame, str, len - s);
 | |
|   memset(*b, ofs, len, str, size);
 | |
| }
 | |
| 
 | |
| /** Log a write of a byte string to a page.
 | |
| @param[in]      b       buffer page
 | |
| @param[in]      offset  byte offset from b->frame
 | |
| @param[in]      str     the data to write
 | |
| @param[in]      len     length of the data to write */
 | |
| inline void mtr_t::memcpy(const buf_block_t &b, ulint offset, ulint len)
 | |
| {
 | |
|   ut_ad(len);
 | |
|   ut_ad(offset <= ulint(srv_page_size));
 | |
|   ut_ad(offset + len <= ulint(srv_page_size));
 | |
|   memcpy_low(b, uint16_t(offset), &b.page.frame[offset], len);
 | |
| }
 | |
| 
 | |
| /** Log a write of a byte string to a page.
 | |
| @param block   page
 | |
| @param offset  byte offset within page
 | |
| @param data    data to be written
 | |
| @param len     length of the data, in bytes */
 | |
| inline void mtr_t::memcpy_low(const buf_block_t &block, uint16_t offset,
 | |
|                               const void *data, size_t len)
 | |
| {
 | |
|   ut_ad(len);
 | |
|   set_modified(block);
 | |
|   if (!is_logged())
 | |
|     return;
 | |
|   if (len < mtr_buf_t::MAX_DATA_SIZE - (1 + 3 + 3 + 5 + 5))
 | |
|   {
 | |
|     byte *end= log_write<WRITE>(block.page.id(), &block.page, len, true,
 | |
|                                 offset);
 | |
|     ::memcpy(end, data, len);
 | |
|     m_log.close(end + len);
 | |
|   }
 | |
|   else
 | |
|   {
 | |
|     m_log.close(log_write<WRITE>(block.page.id(), &block.page, len, false,
 | |
|                                  offset));
 | |
|     m_log.push(static_cast<const byte*>(data), static_cast<uint32_t>(len));
 | |
|   }
 | |
|   m_last_offset= static_cast<uint16_t>(offset + len);
 | |
| }
 | |
| 
 | |
| /** Log that a string of bytes was copied from the same page.
 | |
| @param[in]      b       buffer page
 | |
| @param[in]      d       destination offset within the page
 | |
| @param[in]      s       source offset within the page
 | |
| @param[in]      len     length of the data to copy */
 | |
| inline void mtr_t::memmove(const buf_block_t &b, ulint d, ulint s, ulint len)
 | |
| {
 | |
|   ut_ad(d >= 8);
 | |
|   ut_ad(s >= 8);
 | |
|   ut_ad(len);
 | |
|   ut_ad(s <= ulint(srv_page_size));
 | |
|   ut_ad(s + len <= ulint(srv_page_size));
 | |
|   ut_ad(s != d);
 | |
|   ut_ad(d <= ulint(srv_page_size));
 | |
|   ut_ad(d + len <= ulint(srv_page_size));
 | |
| 
 | |
|   set_modified(b);
 | |
|   if (!is_logged())
 | |
|     return;
 | |
|   static_assert(MIN_4BYTE > UNIV_PAGE_SIZE_MAX, "consistency");
 | |
|   size_t lenlen= (len < MIN_2BYTE ? 1 : len < MIN_3BYTE ? 2 : 3);
 | |
|   /* The source offset is encoded relative to the destination offset,
 | |
|   with the sign in the least significant bit. */
 | |
|   if (s > d)
 | |
|     s= (s - d) << 1;
 | |
|   else
 | |
|     s= (d - s) << 1 | 1;
 | |
|   /* The source offset 0 is not possible. */
 | |
|   s-= 1 << 1;
 | |
|   size_t slen= (s < MIN_2BYTE ? 1 : s < MIN_3BYTE ? 2 : 3);
 | |
|   byte *l= log_write<MEMMOVE>(b.page.id(), &b.page, lenlen + slen, true, d);
 | |
|   l= mlog_encode_varint(l, len);
 | |
|   l= mlog_encode_varint(l, s);
 | |
|   m_log.close(l);
 | |
|   m_last_offset= static_cast<uint16_t>(d + len);
 | |
| }
 | |
| 
 | |
| /**
 | |
| Write a log record.
 | |
| @tparam type   redo log record type
 | |
| @param id     persistent page identifier
 | |
| @param bpage  buffer pool page, or nullptr
 | |
| @param len    number of additional bytes to write
 | |
| @param alloc  whether to allocate the additional bytes
 | |
| @param offset byte offset, or 0 if the record type does not allow one
 | |
| @return end of mini-transaction log, minus len */
 | |
| template<byte type>
 | |
| inline byte *mtr_t::log_write(const page_id_t id, const buf_page_t *bpage,
 | |
|                               size_t len, bool alloc, size_t offset)
 | |
| {
 | |
|   static_assert(!(type & 15) && type != RESERVED &&
 | |
|                 type <= FILE_CHECKPOINT, "invalid type");
 | |
|   ut_ad(type >= FILE_CREATE || is_named_space(id.space()));
 | |
|   ut_ad(!bpage || bpage->id() == id);
 | |
|   ut_ad(id < end_page_id);
 | |
|   constexpr bool have_len= type != INIT_PAGE && type != FREE_PAGE;
 | |
|   constexpr bool have_offset= type == WRITE || type == MEMSET ||
 | |
|     type == MEMMOVE;
 | |
|   static_assert(!have_offset || have_len, "consistency");
 | |
|   ut_ad(have_len || len == 0);
 | |
|   ut_ad(have_len || !alloc);
 | |
|   ut_ad(have_offset || offset == 0);
 | |
|   ut_ad(offset + len <= srv_page_size);
 | |
|   static_assert(MIN_4BYTE >= UNIV_PAGE_SIZE_MAX, "consistency");
 | |
|   ut_ad(type == FREE_PAGE || type == OPTION || (type == EXTENDED && !bpage) ||
 | |
|         memo_contains_flagged(bpage, MTR_MEMO_MODIFY));
 | |
|   size_t max_len;
 | |
|   if (!have_len)
 | |
|     max_len= 1 + 5 + 5;
 | |
|   else if (!have_offset)
 | |
|     max_len= bpage && m_last == bpage
 | |
|       ? 1 + 3
 | |
|       : 1 + 3 + 5 + 5;
 | |
|   else if (bpage && m_last == bpage && m_last_offset <= offset)
 | |
|   {
 | |
|     /* Encode the offset relative from m_last_offset. */
 | |
|     offset-= m_last_offset;
 | |
|     max_len= 1 + 3 + 3;
 | |
|   }
 | |
|   else
 | |
|     max_len= 1 + 3 + 5 + 5 + 3;
 | |
|   byte *const log_ptr= m_log.open(alloc ? max_len + len : max_len);
 | |
|   byte *end= log_ptr + 1;
 | |
|   const byte same_page= max_len < 1 + 5 + 5 ? 0x80 : 0;
 | |
|   if (!same_page)
 | |
|   {
 | |
|     end= mlog_encode_varint(end, id.space());
 | |
|     end= mlog_encode_varint(end, id.page_no());
 | |
|     m_last= bpage;
 | |
|   }
 | |
|   if (have_offset)
 | |
|   {
 | |
|     byte* oend= mlog_encode_varint(end, offset);
 | |
|     if (oend + len > &log_ptr[16])
 | |
|     {
 | |
|       len+= oend - log_ptr - 15;
 | |
|       if (len >= MIN_3BYTE - 1)
 | |
|         len+= 2;
 | |
|       else if (len >= MIN_2BYTE)
 | |
|         len++;
 | |
| 
 | |
|       *log_ptr= type | same_page;
 | |
|       end= mlog_encode_varint(log_ptr + 1, len);
 | |
|       if (!same_page)
 | |
|       {
 | |
|         end= mlog_encode_varint(end, id.space());
 | |
|         end= mlog_encode_varint(end, id.page_no());
 | |
|       }
 | |
|       end= mlog_encode_varint(end, offset);
 | |
|       return end;
 | |
|     }
 | |
|     else
 | |
|       end= oend;
 | |
|   }
 | |
|   else if (len >= 3 && end + len > &log_ptr[16])
 | |
|   {
 | |
|     len+= end - log_ptr - 15;
 | |
|     if (len >= MIN_3BYTE - 1)
 | |
|       len+= 2;
 | |
|     else if (len >= MIN_2BYTE)
 | |
|       len++;
 | |
| 
 | |
|     end= log_ptr;
 | |
|     *end++= type | same_page;
 | |
|     end= mlog_encode_varint(end, len);
 | |
| 
 | |
|     if (!same_page)
 | |
|     {
 | |
|       end= mlog_encode_varint(end, id.space());
 | |
|       end= mlog_encode_varint(end, id.page_no());
 | |
|     }
 | |
|     return end;
 | |
|   }
 | |
| 
 | |
|   ut_ad(end + len >= &log_ptr[1] + !same_page);
 | |
|   ut_ad(end + len <= &log_ptr[16]);
 | |
|   ut_ad(end <= &log_ptr[max_len]);
 | |
|   *log_ptr= type | same_page | static_cast<byte>(end + len - log_ptr - 1);
 | |
|   ut_ad(*log_ptr & 15);
 | |
|   return end;
 | |
| }
 | |
| 
 | |
| /** Write a byte string to a page.
 | |
| @param[in]      b       buffer page
 | |
| @param[in]      dest    destination within b.frame
 | |
| @param[in]      str     the data to write
 | |
| @param[in]      len     length of the data to write
 | |
| @tparam w       write request type */
 | |
| template<mtr_t::write_type w>
 | |
| inline void mtr_t::memcpy(const buf_block_t &b, void *dest, const void *str,
 | |
|                           ulint len)
 | |
| {
 | |
|   ut_ad(ut_align_down(dest, srv_page_size) == b.page.frame);
 | |
|   byte *d= static_cast<byte*>(dest);
 | |
|   const char *s= static_cast<const char*>(str);
 | |
|   if (w != FORCED && is_logged())
 | |
|   {
 | |
|     ut_ad(len);
 | |
|     const byte *const end= d + len;
 | |
|     while (*d++ == *s++)
 | |
|     {
 | |
|       if (d == end)
 | |
|       {
 | |
|         ut_ad(w == MAYBE_NOP);
 | |
|         return;
 | |
|       }
 | |
|     }
 | |
|     s--;
 | |
|     d--;
 | |
|     len= static_cast<ulint>(end - d);
 | |
|   }
 | |
|   ::memcpy(d, s, len);
 | |
|   memcpy(b, d - b.page.frame, len);
 | |
| }
 | |
| 
 | |
| /** Write an EXTENDED log record.
 | |
| @param block  buffer pool page
 | |
| @param type   extended record subtype; @see mrec_ext_t */
 | |
| inline void mtr_t::log_write_extended(const buf_block_t &block, byte type)
 | |
| {
 | |
|   set_modified(block);
 | |
|   if (!is_logged())
 | |
|     return;
 | |
|   byte *l= log_write<EXTENDED>(block.page.id(), &block.page, 1, true);
 | |
|   *l++= type;
 | |
|   m_log.close(l);
 | |
|   m_last_offset= FIL_PAGE_TYPE;
 | |
| }
 | |
| 
 | |
| /** Write log for partly initializing a B-tree or R-tree page.
 | |
| @param block    B-tree or R-tree page
 | |
| @param comp     false=ROW_FORMAT=REDUNDANT, true=COMPACT or DYNAMIC */
 | |
| inline void mtr_t::page_create(const buf_block_t &block, bool comp)
 | |
| {
 | |
|   static_assert(false == INIT_ROW_FORMAT_REDUNDANT, "encoding");
 | |
|   static_assert(true == INIT_ROW_FORMAT_DYNAMIC, "encoding");
 | |
|   log_write_extended(block, comp);
 | |
| }
 | |
| 
 | |
| /** Write log for deleting a B-tree or R-tree record in ROW_FORMAT=REDUNDANT.
 | |
| @param block      B-tree or R-tree page
 | |
| @param prev_rec   byte offset of the predecessor of the record to delete,
 | |
|                   starting from PAGE_OLD_INFIMUM */
 | |
| inline void mtr_t::page_delete(const buf_block_t &block, ulint prev_rec)
 | |
| {
 | |
|   ut_ad(!block.zip_size());
 | |
|   ut_ad(prev_rec < block.physical_size());
 | |
|   set_modified(block);
 | |
|   if (!is_logged())
 | |
|     return;
 | |
|   size_t len= (prev_rec < MIN_2BYTE ? 2 : prev_rec < MIN_3BYTE ? 3 : 4);
 | |
|   byte *l= log_write<EXTENDED>(block.page.id(), &block.page, len, true);
 | |
|   ut_d(byte *end= l + len);
 | |
|   *l++= DELETE_ROW_FORMAT_REDUNDANT;
 | |
|   l= mlog_encode_varint(l, prev_rec);
 | |
|   ut_ad(end == l);
 | |
|   m_log.close(l);
 | |
|   m_last_offset= FIL_PAGE_TYPE;
 | |
| }
 | |
| 
 | |
| /** Write log for deleting a COMPACT or DYNAMIC B-tree or R-tree record.
 | |
| @param block      B-tree or R-tree page
 | |
| @param prev_rec   byte offset of the predecessor of the record to delete,
 | |
|                   starting from PAGE_NEW_INFIMUM
 | |
| @param prev_rec   the predecessor of the record to delete
 | |
| @param hdr_size   record header size, excluding REC_N_NEW_EXTRA_BYTES
 | |
| @param data_size  data payload size, in bytes */
 | |
| inline void mtr_t::page_delete(const buf_block_t &block, ulint prev_rec,
 | |
|                                size_t hdr_size, size_t data_size)
 | |
| {
 | |
|   ut_ad(!block.zip_size());
 | |
|   set_modified(block);
 | |
|   ut_ad(hdr_size < MIN_3BYTE);
 | |
|   ut_ad(prev_rec < block.physical_size());
 | |
|   ut_ad(data_size < block.physical_size());
 | |
|   if (!is_logged())
 | |
|     return;
 | |
|   size_t len= prev_rec < MIN_2BYTE ? 2 : prev_rec < MIN_3BYTE ? 3 : 4;
 | |
|   len+= hdr_size < MIN_2BYTE ? 1 : 2;
 | |
|   len+= data_size < MIN_2BYTE ? 1 : data_size < MIN_3BYTE ? 2 : 3;
 | |
|   byte *l= log_write<EXTENDED>(block.page.id(), &block.page, len, true);
 | |
|   ut_d(byte *end= l + len);
 | |
|   *l++= DELETE_ROW_FORMAT_DYNAMIC;
 | |
|   l= mlog_encode_varint(l, prev_rec);
 | |
|   l= mlog_encode_varint(l, hdr_size);
 | |
|   l= mlog_encode_varint(l, data_size);
 | |
|   ut_ad(end == l);
 | |
|   m_log.close(l);
 | |
|   m_last_offset= FIL_PAGE_TYPE;
 | |
| }
 | |
| 
 | |
| /** Write log for initializing an undo log page.
 | |
| @param block    undo page */
 | |
| inline void mtr_t::undo_create(const buf_block_t &block)
 | |
| {
 | |
|   log_write_extended(block, UNDO_INIT);
 | |
| }
 | |
| 
 | |
| /** Write log for appending an undo log record.
 | |
| @param block    undo page
 | |
| @param data     record within the undo page
 | |
| @param len      length of the undo record, in bytes */
 | |
| inline void mtr_t::undo_append(const buf_block_t &block,
 | |
|                                const void *data, size_t len)
 | |
| {
 | |
|   ut_ad(len > 2);
 | |
|   set_modified(block);
 | |
|   if (!is_logged())
 | |
|     return;
 | |
|   const bool small= len + 1 < mtr_buf_t::MAX_DATA_SIZE - (1 + 3 + 3 + 5 + 5);
 | |
|   byte *end= log_write<EXTENDED>(block.page.id(), &block.page, len + 1, small);
 | |
|   if (UNIV_LIKELY(small))
 | |
|   {
 | |
|     *end++= UNDO_APPEND;
 | |
|     ::memcpy(end, data, len);
 | |
|     m_log.close(end + len);
 | |
|   }
 | |
|   else
 | |
|   {
 | |
|     m_log.close(end);
 | |
|     *m_log.push<byte*>(1)= UNDO_APPEND;
 | |
|     m_log.push(static_cast<const byte*>(data), static_cast<uint32_t>(len));
 | |
|   }
 | |
|   m_last_offset= FIL_PAGE_TYPE;
 | |
| }
 | |
| 
 | |
| /** Trim the end of a tablespace.
 | |
| @param id       first page identifier that will not be in the file */
 | |
| inline void mtr_t::trim_pages(const page_id_t id)
 | |
| {
 | |
|   if (!is_logged())
 | |
|     return;
 | |
|   byte *l= log_write<EXTENDED>(id, nullptr, 1, true);
 | |
|   *l++= TRIM_PAGES;
 | |
|   m_log.close(l);
 | |
|   set_trim_pages();
 | |
| }
 |