mirror of
				https://github.com/MariaDB/server.git
				synced 2025-10-31 10:56:12 +01:00 
			
		
		
		
	
		
			
				
	
	
		
			2434 lines
		
	
	
	
		
			70 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			2434 lines
		
	
	
	
		
			70 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
| /*****************************************************************************
 | |
| 
 | |
| Copyright (c) 1996, 2019, Oracle and/or its affiliates. All Rights Reserved.
 | |
| Copyright (c) 2017, 2022, MariaDB Corporation.
 | |
| 
 | |
| This program is free software; you can redistribute it and/or modify it under
 | |
| the terms of the GNU General Public License as published by the Free Software
 | |
| Foundation; version 2 of the License.
 | |
| 
 | |
| This program is distributed in the hope that it will be useful, but WITHOUT
 | |
| ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 | |
| FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 | |
| 
 | |
| You should have received a copy of the GNU General Public License along with
 | |
| this program; if not, write to the Free Software Foundation, Inc.,
 | |
| 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
 | |
| 
 | |
| *****************************************************************************/
 | |
| 
 | |
| /**************************************************//**
 | |
| @file trx/trx0rec.cc
 | |
| Transaction undo log record
 | |
| 
 | |
| Created 3/26/1996 Heikki Tuuri
 | |
| *******************************************************/
 | |
| 
 | |
| #include "trx0rec.h"
 | |
| #include "fsp0fsp.h"
 | |
| #include "mach0data.h"
 | |
| #include "trx0undo.h"
 | |
| #include "mtr0log.h"
 | |
| #include "dict0dict.h"
 | |
| #include "ut0mem.h"
 | |
| #include "row0ext.h"
 | |
| #include "row0upd.h"
 | |
| #include "que0que.h"
 | |
| #include "trx0purge.h"
 | |
| #include "trx0rseg.h"
 | |
| #include "row0row.h"
 | |
| #include "row0mysql.h"
 | |
| #include "row0ins.h"
 | |
| #include "mariadb_stats.h"
 | |
| 
 | |
| /** The search tuple corresponding to TRX_UNDO_INSERT_METADATA. */
 | |
| const dtuple_t trx_undo_metadata = {
 | |
| 	/* This also works for REC_INFO_METADATA_ALTER, because the
 | |
| 	delete-mark (REC_INFO_DELETED_FLAG) is ignored when searching. */
 | |
| 	REC_INFO_METADATA_ADD, 0, 0, 0, nullptr, nullptr
 | |
| #ifdef UNIV_DEBUG
 | |
| 	, DATA_TUPLE_MAGIC_N
 | |
| #endif /* UNIV_DEBUG */
 | |
| };
 | |
| 
 | |
| /*=========== UNDO LOG RECORD CREATION AND DECODING ====================*/
 | |
| 
 | |
| /** Calculate the free space left for extending an undo log record.
 | |
| @param undo_block    undo log page
 | |
| @param ptr           current end of the undo page
 | |
| @return bytes left */
 | |
| static ulint trx_undo_left(const buf_block_t *undo_block, const byte *ptr)
 | |
| {
 | |
|   ut_ad(ptr >=
 | |
|         &undo_block->page.frame[TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_HDR_SIZE]);
 | |
|   /* The 10 is supposed to be an extra safety margin (and needed for
 | |
|   compatibility with older versions) */
 | |
|   lint left= srv_page_size - (ptr - undo_block->page.frame) -
 | |
|     (10 + FIL_PAGE_DATA_END);
 | |
|   ut_ad(left >= 0);
 | |
|   return left < 0 ? 0 : static_cast<ulint>(left);
 | |
| }
 | |
| 
 | |
| /**********************************************************************//**
 | |
| Set the next and previous pointers in the undo page for the undo record
 | |
| that was written to ptr. Update the first free value by the number of bytes
 | |
| written for this undo record.
 | |
| @return offset of the inserted entry on the page if succeeded, 0 if fail */
 | |
| static
 | |
| uint16_t
 | |
| trx_undo_page_set_next_prev_and_add(
 | |
| /*================================*/
 | |
| 	buf_block_t*	undo_block,	/*!< in/out: undo log page */
 | |
| 	byte*		ptr,		/*!< in: ptr up to where data has been
 | |
| 					written on this undo page. */
 | |
| 	mtr_t*		mtr)		/*!< in: mtr */
 | |
| {
 | |
|   ut_ad(page_align(ptr) == undo_block->page.frame);
 | |
| 
 | |
|   if (UNIV_UNLIKELY(trx_undo_left(undo_block, ptr) < 2))
 | |
|     return 0;
 | |
| 
 | |
|   byte *ptr_to_first_free= my_assume_aligned<2>(TRX_UNDO_PAGE_HDR +
 | |
| 						TRX_UNDO_PAGE_FREE +
 | |
| 						undo_block->page.frame);
 | |
| 
 | |
|   const uint16_t first_free= mach_read_from_2(ptr_to_first_free);
 | |
| 
 | |
|   /* Write offset of the previous undo log record */
 | |
|   memcpy(ptr, ptr_to_first_free, 2);
 | |
|   ptr += 2;
 | |
| 
 | |
|   const uint16_t end_of_rec= static_cast<uint16_t>
 | |
|     (ptr - undo_block->page.frame);
 | |
| 
 | |
|   /* Update the offset to first free undo record */
 | |
|   mach_write_to_2(ptr_to_first_free, end_of_rec);
 | |
|   /* Write offset of the next undo log record */
 | |
|   memcpy(undo_block->page.frame + first_free, ptr_to_first_free, 2);
 | |
|   const byte *start= undo_block->page.frame + first_free + 2;
 | |
| 
 | |
|   mtr->undo_append(*undo_block, start, ptr - start - 2);
 | |
|   return first_free;
 | |
| }
 | |
| 
 | |
| /** Virtual column undo log version. To distinguish it from a length value
 | |
| in 5.7.8 undo log, it starts with 0xF1 */
 | |
| static const ulint VIRTUAL_COL_UNDO_FORMAT_1 = 0xF1;
 | |
| 
 | |
| /** Write virtual column index info (index id and column position in index)
 | |
| to the undo log
 | |
| @param[in,out]	undo_block	undo log page
 | |
| @param[in]	table           the table
 | |
| @param[in]	pos		the virtual column position
 | |
| @param[in]      ptr             undo log record being written
 | |
| @param[in]	first_v_col	whether this is the first virtual column
 | |
| 				which could start with a version marker
 | |
| @return new undo log pointer */
 | |
| static
 | |
| byte*
 | |
| trx_undo_log_v_idx(
 | |
| 	buf_block_t*		undo_block,
 | |
| 	const dict_table_t*	table,
 | |
| 	ulint			pos,
 | |
| 	byte*			ptr,
 | |
| 	bool			first_v_col)
 | |
| {
 | |
| 	ut_ad(pos < table->n_v_def);
 | |
| 	dict_v_col_t*	vcol = dict_table_get_nth_v_col(table, pos);
 | |
| 	byte*		old_ptr;
 | |
| 
 | |
| 	ut_ad(!vcol->v_indexes.empty());
 | |
| 
 | |
| 	ulint		size = first_v_col ? 1 + 2 : 2;
 | |
| 	const ulint	avail = trx_undo_left(undo_block, ptr);
 | |
| 
 | |
| 	/* The mach_write_compressed(ptr, flen) in
 | |
| 	trx_undo_page_report_modify() will consume additional 1 to 5 bytes. */
 | |
| 	if (avail < size + 5) {
 | |
| 		return(NULL);
 | |
| 	}
 | |
| 
 | |
| 	ulint n_idx = 0;
 | |
| 	for (const auto& v_index : vcol->v_indexes) {
 | |
| 		n_idx++;
 | |
| 		if (uint32_t hi= uint32_t(v_index.index->id >> 32)) {
 | |
| 			size += 1 + mach_get_compressed_size(hi);
 | |
| 		}
 | |
| 		size += mach_get_compressed_size(uint32_t(v_index.index->id));
 | |
| 		size += mach_get_compressed_size(v_index.nth_field);
 | |
| 	}
 | |
| 
 | |
| 	size += mach_get_compressed_size(n_idx);
 | |
| 
 | |
| 	if (avail < size + 5) {
 | |
| 		return(NULL);
 | |
| 	}
 | |
| 
 | |
| 	ut_d(const byte* orig_ptr = ptr);
 | |
| 
 | |
| 	if (first_v_col) {
 | |
| 		/* write the version marker */
 | |
| 		mach_write_to_1(ptr, VIRTUAL_COL_UNDO_FORMAT_1);
 | |
| 
 | |
| 		ptr += 1;
 | |
| 	}
 | |
| 
 | |
| 	old_ptr = ptr;
 | |
| 
 | |
| 	ptr += 2;
 | |
| 
 | |
| 	ptr += mach_write_compressed(ptr, n_idx);
 | |
| 
 | |
| 	for (const auto& v_index : vcol->v_indexes) {
 | |
| 		/* This is compatible with
 | |
| 		ptr += mach_u64_write_much_compressed(ptr, v_index.index-id)
 | |
| 		(the added "if" statement is fixing an old regression). */
 | |
| 		if (uint32_t hi= uint32_t(v_index.index->id >> 32)) {
 | |
| 			*ptr++ = 0xff;
 | |
| 			ptr += mach_write_compressed(ptr, hi);
 | |
| 		}
 | |
| 		ptr += mach_write_compressed(ptr, uint32_t(v_index.index->id));
 | |
| 		ptr += mach_write_compressed(ptr, v_index.nth_field);
 | |
| 	}
 | |
| 
 | |
| 	ut_ad(orig_ptr + size == ptr);
 | |
| 
 | |
| 	mach_write_to_2(old_ptr, ulint(ptr - old_ptr));
 | |
| 
 | |
| 	return(ptr);
 | |
| }
 | |
| 
 | |
| /** Read virtual column index from undo log, and verify the column is still
 | |
| indexed, and return its position
 | |
| @param[in]	table		the table
 | |
| @param[in]	ptr		undo log pointer
 | |
| @param[out]	col_pos		the column number or FIL_NULL
 | |
| 				if the column is not indexed any more
 | |
| @return remaining part of undo log record after reading these values */
 | |
| static
 | |
| const byte*
 | |
| trx_undo_read_v_idx_low(
 | |
| 	const dict_table_t*	table,
 | |
| 	const byte*		ptr,
 | |
| 	uint32_t*		col_pos)
 | |
| {
 | |
| 	ulint		len = mach_read_from_2(ptr);
 | |
| 	const byte*	old_ptr = ptr;
 | |
| 
 | |
| 	*col_pos = FIL_NULL;
 | |
| 
 | |
| 	ptr += 2;
 | |
| 
 | |
| 	ulint	num_idx = mach_read_next_compressed(&ptr);
 | |
| 
 | |
| 	ut_ad(num_idx > 0);
 | |
| 
 | |
| 	dict_index_t*	clust_index = dict_table_get_first_index(table);
 | |
| 
 | |
| 	for (ulint i = 0; i < num_idx; i++) {
 | |
| 		index_id_t	id = 0;
 | |
| 		/* This is like mach_u64_read_much_compressed(),
 | |
| 		but advancing ptr to the next field. */
 | |
| 		if (*ptr == 0xff) {
 | |
| 			ptr++;
 | |
| 			id = mach_read_next_compressed(&ptr);
 | |
| 			id <<= 32;
 | |
| 		}
 | |
| 		id |= mach_read_next_compressed(&ptr);
 | |
| 		ulint		pos = mach_read_next_compressed(&ptr);
 | |
| 		dict_index_t*	index = dict_table_get_next_index(clust_index);
 | |
| 
 | |
| 		while (index != NULL) {
 | |
| 			/* Return if we find a matching index.
 | |
| 			TODO: in the future, it might be worth to add
 | |
| 			checks on other indexes */
 | |
| 			if (index->id == id) {
 | |
| 				const dict_col_t* col = dict_index_get_nth_col(
 | |
| 					index, pos);
 | |
| 				ut_ad(col->is_virtual());
 | |
| 				const dict_v_col_t*	vcol = reinterpret_cast<
 | |
| 					const dict_v_col_t*>(col);
 | |
| 				*col_pos = vcol->v_pos;
 | |
| 				return(old_ptr + len);
 | |
| 			}
 | |
| 
 | |
| 			index = dict_table_get_next_index(index);
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	return(old_ptr + len);
 | |
| }
 | |
| 
 | |
| /** Read virtual column index from undo log or online log if the log
 | |
| contains such info, and in the undo log case, verify the column is
 | |
| still indexed, and output its position
 | |
| @param[in]	table		the table
 | |
| @param[in]	ptr		undo log pointer
 | |
| @param[in]	first_v_col	if this is the first virtual column, which
 | |
| 				has the version marker
 | |
| @param[in,out]	is_undo_log	this function is used to parse both undo log,
 | |
| 				and online log for virtual columns. So
 | |
| 				check to see if this is undo log. When
 | |
| 				first_v_col is true, is_undo_log is output,
 | |
| 				when first_v_col is false, is_undo_log is input
 | |
| @param[out]	field_no	the column number, or FIL_NULL if not indexed
 | |
| @return remaining part of undo log record after reading these values */
 | |
| const byte*
 | |
| trx_undo_read_v_idx(
 | |
| 	const dict_table_t*	table,
 | |
| 	const byte*		ptr,
 | |
| 	bool			first_v_col,
 | |
| 	bool*			is_undo_log,
 | |
| 	uint32_t*		field_no)
 | |
| {
 | |
| 	/* Version marker only put on the first virtual column */
 | |
| 	if (first_v_col) {
 | |
| 		/* Undo log has the virtual undo log marker */
 | |
| 		*is_undo_log = (mach_read_from_1(ptr)
 | |
| 				== VIRTUAL_COL_UNDO_FORMAT_1);
 | |
| 
 | |
| 		if (*is_undo_log) {
 | |
| 			ptr += 1;
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	if (*is_undo_log) {
 | |
| 		ptr = trx_undo_read_v_idx_low(table, ptr, field_no);
 | |
| 	} else {
 | |
| 		*field_no -= REC_MAX_N_FIELDS;
 | |
| 	}
 | |
| 
 | |
| 	return(ptr);
 | |
| }
 | |
| 
 | |
| /** Reports in the undo log of an insert of virtual columns.
 | |
| @param[in]	undo_block	undo log page
 | |
| @param[in]	table		the table
 | |
| @param[in]	row		dtuple contains the virtual columns
 | |
| @param[in,out]	ptr		log ptr
 | |
| @return true if write goes well, false if out of space */
 | |
| static
 | |
| bool
 | |
| trx_undo_report_insert_virtual(
 | |
| 	buf_block_t*	undo_block,
 | |
| 	dict_table_t*	table,
 | |
| 	const dtuple_t*	row,
 | |
| 	byte**		ptr)
 | |
| {
 | |
| 	byte*	start = *ptr;
 | |
| 	bool	first_v_col = true;
 | |
| 
 | |
| 	if (trx_undo_left(undo_block, *ptr) < 2) {
 | |
| 		return(false);
 | |
| 	}
 | |
| 
 | |
| 	/* Reserve 2 bytes to write the number
 | |
| 	of bytes the stored fields take in this
 | |
| 	undo record */
 | |
| 	*ptr += 2;
 | |
| 
 | |
| 	for (ulint col_no = 0; col_no < dict_table_get_n_v_cols(table);
 | |
| 	     col_no++) {
 | |
| 		const dict_v_col_t*     col
 | |
| 			= dict_table_get_nth_v_col(table, col_no);
 | |
| 
 | |
| 		if (col->m_col.ord_part) {
 | |
| 
 | |
| 			/* make sure enought space to write the length */
 | |
| 			if (trx_undo_left(undo_block, *ptr) < 5) {
 | |
| 				return(false);
 | |
| 			}
 | |
| 
 | |
| 			ulint   pos = col_no;
 | |
| 			pos += REC_MAX_N_FIELDS;
 | |
| 			*ptr += mach_write_compressed(*ptr, pos);
 | |
| 
 | |
| 			*ptr = trx_undo_log_v_idx(undo_block, table,
 | |
| 						  col_no, *ptr, first_v_col);
 | |
| 			first_v_col = false;
 | |
| 
 | |
| 			if (*ptr == NULL) {
 | |
| 				return(false);
 | |
| 			}
 | |
| 
 | |
| 			const dfield_t* vfield = dtuple_get_nth_v_field(
 | |
| 				row, col->v_pos);
 | |
| 			switch (ulint flen = vfield->len) {
 | |
| 			case 0: case UNIV_SQL_NULL:
 | |
| 				if (trx_undo_left(undo_block, *ptr) < 5) {
 | |
| 					return(false);
 | |
| 				}
 | |
| 
 | |
| 				*ptr += mach_write_compressed(*ptr, flen);
 | |
| 				break;
 | |
| 			default:
 | |
| 				ulint	max_len
 | |
| 					= dict_max_v_field_len_store_undo(
 | |
| 						table, col_no);
 | |
| 
 | |
| 				if (flen > max_len) {
 | |
| 					flen = max_len;
 | |
| 				}
 | |
| 
 | |
| 				if (trx_undo_left(undo_block, *ptr)
 | |
| 				    < flen + 5) {
 | |
| 					return(false);
 | |
| 				}
 | |
| 				*ptr += mach_write_compressed(*ptr, flen);
 | |
| 
 | |
| 				memcpy(*ptr, vfield->data, flen);
 | |
| 				*ptr += flen;
 | |
| 			}
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	/* Always mark the end of the log with 2 bytes length field */
 | |
| 	mach_write_to_2(start, ulint(*ptr - start));
 | |
| 
 | |
| 	return(true);
 | |
| }
 | |
| 
 | |
| /** Reports in the undo log of an insert of a clustered index record.
 | |
| @param	undo_block	undo log page
 | |
| @param	trx		transaction
 | |
| @param	index		clustered index
 | |
| @param	clust_entry	index entry which will be inserted to the
 | |
| 			clustered index
 | |
| @param	mtr		mini-transaction
 | |
| @param	write_empty	write empty table undo log record
 | |
| @return offset of the inserted entry on the page if succeed, 0 if fail */
 | |
| static
 | |
| uint16_t
 | |
| trx_undo_page_report_insert(
 | |
| 	buf_block_t*	undo_block,
 | |
| 	trx_t*		trx,
 | |
| 	dict_index_t*	index,
 | |
| 	const dtuple_t*	clust_entry,
 | |
| 	mtr_t*		mtr,
 | |
| 	bool		write_empty)
 | |
| {
 | |
| 	ut_ad(index->is_primary());
 | |
| 	/* MariaDB 10.3.1+ in trx_undo_page_init() always initializes
 | |
| 	TRX_UNDO_PAGE_TYPE as 0, but previous versions wrote
 | |
| 	TRX_UNDO_INSERT == 1 into insert_undo pages,
 | |
| 	or TRX_UNDO_UPDATE == 2 into update_undo pages. */
 | |
| 	ut_ad(mach_read_from_2(TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_TYPE
 | |
| 			       + undo_block->page.frame) <= 2);
 | |
| 
 | |
| 	uint16_t first_free = mach_read_from_2(my_assume_aligned<2>
 | |
| 					       (TRX_UNDO_PAGE_HDR
 | |
| 						+ TRX_UNDO_PAGE_FREE
 | |
| 						+ undo_block->page.frame));
 | |
| 	byte* ptr = undo_block->page.frame + first_free;
 | |
| 
 | |
| 	if (trx_undo_left(undo_block, ptr) < 2 + 1 + 11 + 11) {
 | |
| 		/* Not enough space for writing the general parameters */
 | |
| 		return(0);
 | |
| 	}
 | |
| 
 | |
| 	/* Reserve 2 bytes for the pointer to the next undo log record */
 | |
| 	ptr += 2;
 | |
| 
 | |
| 	/* Store first some general parameters to the undo log */
 | |
| 	*ptr++ = TRX_UNDO_INSERT_REC;
 | |
| 	ptr += mach_u64_write_much_compressed(ptr, trx->undo_no);
 | |
| 	ptr += mach_u64_write_much_compressed(ptr, index->table->id);
 | |
| 
 | |
| 	if (write_empty) {
 | |
| 		/* Table is in bulk operation */
 | |
| 		undo_block->page.frame[first_free + 2] = TRX_UNDO_EMPTY;
 | |
| 		goto done;
 | |
| 	}
 | |
| 
 | |
| 	/*----------------------------------------*/
 | |
| 	/* Store then the fields required to uniquely determine the record
 | |
| 	to be inserted in the clustered index */
 | |
| 	if (UNIV_UNLIKELY(clust_entry->info_bits != 0)) {
 | |
| 		ut_ad(clust_entry->is_metadata());
 | |
| 		ut_ad(index->is_instant());
 | |
| 		ut_ad(undo_block->page.frame[first_free + 2]
 | |
| 		      == TRX_UNDO_INSERT_REC);
 | |
| 		undo_block->page.frame[first_free + 2]
 | |
| 			= TRX_UNDO_INSERT_METADATA;
 | |
| 		goto done;
 | |
| 	}
 | |
| 
 | |
| 	for (unsigned i = 0; i < dict_index_get_n_unique(index); i++) {
 | |
| 
 | |
| 		const dfield_t*	field	= dtuple_get_nth_field(clust_entry, i);
 | |
| 		ulint		flen	= dfield_get_len(field);
 | |
| 
 | |
| 		if (trx_undo_left(undo_block, ptr) < 5) {
 | |
| 
 | |
| 			return(0);
 | |
| 		}
 | |
| 
 | |
| 		ptr += mach_write_compressed(ptr, flen);
 | |
| 
 | |
| 		switch (flen) {
 | |
| 		case 0: case UNIV_SQL_NULL:
 | |
| 			break;
 | |
| 		default:
 | |
| 			if (trx_undo_left(undo_block, ptr) < flen) {
 | |
| 
 | |
| 				return(0);
 | |
| 			}
 | |
| 
 | |
| 			memcpy(ptr, dfield_get_data(field), flen);
 | |
| 			ptr += flen;
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	if (index->table->n_v_cols) {
 | |
| 		if (!trx_undo_report_insert_virtual(
 | |
| 			undo_block, index->table, clust_entry, &ptr)) {
 | |
| 			return(0);
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| done:
 | |
| 	return(trx_undo_page_set_next_prev_and_add(undo_block, ptr, mtr));
 | |
| }
 | |
| 
 | |
| /**********************************************************************//**
 | |
| Reads from an undo log record the general parameters.
 | |
| @return remaining part of undo log record after reading these values */
 | |
| const byte*
 | |
| trx_undo_rec_get_pars(
 | |
| /*==================*/
 | |
| 	const trx_undo_rec_t*	undo_rec,	/*!< in: undo log record */
 | |
| 	byte*		type,		/*!< out: undo record type:
 | |
| 					TRX_UNDO_INSERT_REC, ... */
 | |
| 	byte*		cmpl_info,	/*!< out: compiler info, relevant only
 | |
| 					for update type records */
 | |
| 	bool*		updated_extern,	/*!< out: true if we updated an
 | |
| 					externally stored fild */
 | |
| 	undo_no_t*	undo_no,	/*!< out: undo log record number */
 | |
| 	table_id_t*	table_id)	/*!< out: table id */
 | |
| {
 | |
| 	ulint		type_cmpl;
 | |
| 
 | |
| 	type_cmpl = undo_rec[2];
 | |
| 	const byte *ptr = undo_rec + 3;
 | |
| 
 | |
| 	*updated_extern = !!(type_cmpl & TRX_UNDO_UPD_EXTERN);
 | |
| 	type_cmpl &= ~TRX_UNDO_UPD_EXTERN;
 | |
| 	*type = type_cmpl & (TRX_UNDO_CMPL_INFO_MULT - 1);
 | |
| 	ut_ad(*type >= TRX_UNDO_RENAME_TABLE);
 | |
| 	ut_ad(*type <= TRX_UNDO_EMPTY);
 | |
| 	*cmpl_info = byte(type_cmpl / TRX_UNDO_CMPL_INFO_MULT);
 | |
| 
 | |
| 	*undo_no = mach_read_next_much_compressed(&ptr);
 | |
| 	*table_id = mach_read_next_much_compressed(&ptr);
 | |
| 	ut_ad(*table_id);
 | |
| 
 | |
| 	return ptr;
 | |
| }
 | |
| 
 | |
| /** Read from an undo log record a non-virtual column value.
 | |
| @param ptr	pointer to remaining part of the undo record
 | |
| @param field	stored field
 | |
| @param len	length of the field, or UNIV_SQL_NULL
 | |
| @param orig_len	original length of the locally stored part
 | |
| of an externally stored column, or 0
 | |
| @return remaining part of undo log record after reading these values */
 | |
| const byte *trx_undo_rec_get_col_val(const byte *ptr, const byte **field,
 | |
|                                      uint32_t *len, uint32_t *orig_len)
 | |
| {
 | |
| 	*len = mach_read_next_compressed(&ptr);
 | |
| 	*orig_len = 0;
 | |
| 
 | |
| 	switch (*len) {
 | |
| 	case UNIV_SQL_NULL:
 | |
| 		*field = NULL;
 | |
| 		break;
 | |
| 	case UNIV_EXTERN_STORAGE_FIELD:
 | |
| 		*orig_len = mach_read_next_compressed(&ptr);
 | |
| 		*len = mach_read_next_compressed(&ptr);
 | |
| 		*field = ptr;
 | |
| 		ptr += *len & ~SPATIAL_STATUS_MASK;
 | |
| 
 | |
| 		ut_ad(*orig_len >= BTR_EXTERN_FIELD_REF_SIZE);
 | |
| 		ut_ad(*len > *orig_len);
 | |
| 		/* @see dtuple_convert_big_rec() */
 | |
| 		ut_ad(*len >= BTR_EXTERN_FIELD_REF_SIZE);
 | |
| 
 | |
| 		/* we do not have access to index->table here
 | |
| 		ut_ad(dict_table_has_atomic_blobs(index->table)
 | |
| 		      || *len >= col->max_prefix
 | |
| 		      + BTR_EXTERN_FIELD_REF_SIZE);
 | |
| 		*/
 | |
| 
 | |
| 		*len += UNIV_EXTERN_STORAGE_FIELD;
 | |
| 		break;
 | |
| 	default:
 | |
| 		*field = ptr;
 | |
| 		if (*len >= UNIV_EXTERN_STORAGE_FIELD) {
 | |
| 			ptr += (*len - UNIV_EXTERN_STORAGE_FIELD)
 | |
| 				& ~SPATIAL_STATUS_MASK;
 | |
| 		} else {
 | |
| 			ptr += *len;
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	return ptr;
 | |
| }
 | |
| 
 | |
| /*******************************************************************//**
 | |
| Builds a row reference from an undo log record.
 | |
| @return pointer to remaining part of undo record */
 | |
| const byte*
 | |
| trx_undo_rec_get_row_ref(
 | |
| /*=====================*/
 | |
| 	const byte*	ptr,	/*!< in: remaining part of a copy of an undo log
 | |
| 				record, at the start of the row reference;
 | |
| 				NOTE that this copy of the undo log record must
 | |
| 				be preserved as long as the row reference is
 | |
| 				used, as we do NOT copy the data in the
 | |
| 				record! */
 | |
| 	dict_index_t*	index,	/*!< in: clustered index */
 | |
| 	const dtuple_t**ref,	/*!< out, own: row reference */
 | |
| 	mem_heap_t*	heap)	/*!< in: memory heap from which the memory
 | |
| 				needed is allocated */
 | |
| {
 | |
| 	ut_ad(index->is_primary());
 | |
| 
 | |
| 	const uint16_t ref_len = dict_index_get_n_unique(index);
 | |
| 
 | |
| 	dtuple_t* tuple = dtuple_create(heap, ref_len);
 | |
| 	*ref = tuple;
 | |
| 
 | |
| 	dict_index_copy_types(tuple, index, ref_len);
 | |
| 
 | |
| 	for (ulint i = 0; i < ref_len; i++) {
 | |
| 		const byte*	field;
 | |
| 		uint32_t	len, orig_len;
 | |
| 
 | |
| 		dfield_t* dfield = dtuple_get_nth_field(tuple, i);
 | |
| 
 | |
| 		ptr = trx_undo_rec_get_col_val(ptr, &field, &len, &orig_len);
 | |
| 
 | |
| 		dfield_set_data(dfield, field, len);
 | |
| 	}
 | |
| 
 | |
| 	return ptr;
 | |
| }
 | |
| 
 | |
| /** Skip a row reference from an undo log record.
 | |
| @param ptr    part of an update undo log record
 | |
| @param index  clustered index
 | |
| @return pointer to remaining part of undo record */
 | |
| static const byte *trx_undo_rec_skip_row_ref(const byte *ptr,
 | |
|                                              const dict_index_t *index)
 | |
| {
 | |
| 	ut_ad(index->is_primary());
 | |
| 
 | |
| 	ulint ref_len = dict_index_get_n_unique(index);
 | |
| 
 | |
| 	for (ulint i = 0; i < ref_len; i++) {
 | |
| 		const byte*	field;
 | |
| 		uint32_t len, orig_len;
 | |
| 
 | |
| 		ptr = trx_undo_rec_get_col_val(ptr, &field, &len, &orig_len);
 | |
| 	}
 | |
| 
 | |
| 	return(ptr);
 | |
| }
 | |
| 
 | |
| /** Fetch a prefix of an externally stored column, for writing to the undo
 | |
| log of an update or delete marking of a clustered index record.
 | |
| @param[out]	ext_buf		buffer to hold the prefix data and BLOB pointer
 | |
| @param[in]	prefix_len	prefix size to store in the undo log
 | |
| @param[in]	zip_size	ROW_FORMAT=COMPRESSED page size, or 0
 | |
| @param[in]	field		an externally stored column
 | |
| @param[in,out]	len		input: length of field; output: used length of
 | |
| ext_buf
 | |
| @return ext_buf */
 | |
| static
 | |
| byte*
 | |
| trx_undo_page_fetch_ext(
 | |
| 	byte*			ext_buf,
 | |
| 	ulint			prefix_len,
 | |
| 	ulint			zip_size,
 | |
| 	const byte*		field,
 | |
| 	ulint*			len)
 | |
| {
 | |
| 	/* Fetch the BLOB. */
 | |
| 	ulint	ext_len = btr_copy_externally_stored_field_prefix(
 | |
| 		ext_buf, prefix_len, zip_size, field, *len);
 | |
| 	/* BLOBs should always be nonempty. */
 | |
| 	ut_a(ext_len);
 | |
| 	/* Append the BLOB pointer to the prefix. */
 | |
| 	memcpy(ext_buf + ext_len,
 | |
| 	       field + *len - BTR_EXTERN_FIELD_REF_SIZE,
 | |
| 	       BTR_EXTERN_FIELD_REF_SIZE);
 | |
| 	*len = ext_len + BTR_EXTERN_FIELD_REF_SIZE;
 | |
| 	return(ext_buf);
 | |
| }
 | |
| 
 | |
| /** Writes to the undo log a prefix of an externally stored column.
 | |
| @param[out]	ptr		undo log position, at least 15 bytes must be
 | |
| available
 | |
| @param[out]	ext_buf		a buffer of DICT_MAX_FIELD_LEN_BY_FORMAT()
 | |
| 				size, or NULL when should not fetch a longer
 | |
| 				prefix
 | |
| @param[in]	prefix_len	prefix size to store in the undo log
 | |
| @param[in]	zip_size	ROW_FORMAT=COMPRESSED page size, or 0
 | |
| @param[in,out]	field		the locally stored part of the externally
 | |
| stored column
 | |
| @param[in,out]	len		length of field, in bytes
 | |
| @param[in]	spatial_status	whether the column is used by spatial index or
 | |
| 				regular index
 | |
| @return undo log position */
 | |
| static
 | |
| byte*
 | |
| trx_undo_page_report_modify_ext(
 | |
| 	byte*			ptr,
 | |
| 	byte*			ext_buf,
 | |
| 	ulint			prefix_len,
 | |
| 	ulint			zip_size,
 | |
| 	const byte**		field,
 | |
| 	ulint*			len,
 | |
| 	spatial_status_t	spatial_status)
 | |
| {
 | |
| 	ulint	spatial_len= 0;
 | |
| 
 | |
| 	switch (spatial_status) {
 | |
| 	case SPATIAL_UNKNOWN:
 | |
| 	case SPATIAL_NONE:
 | |
| 		break;
 | |
| 
 | |
| 	case SPATIAL_MIXED:
 | |
| 	case SPATIAL_ONLY:
 | |
| 		spatial_len = DATA_MBR_LEN;
 | |
| 		break;
 | |
| 	}
 | |
| 
 | |
| 	/* Encode spatial status into length. */
 | |
| 	spatial_len |= ulint(spatial_status) << SPATIAL_STATUS_SHIFT;
 | |
| 
 | |
| 	if (spatial_status == SPATIAL_ONLY) {
 | |
| 		/* If the column is only used by gis index, log its
 | |
| 		MBR is enough.*/
 | |
| 		ptr += mach_write_compressed(ptr, UNIV_EXTERN_STORAGE_FIELD
 | |
| 					     + spatial_len);
 | |
| 
 | |
| 		return(ptr);
 | |
| 	}
 | |
| 
 | |
| 	if (ext_buf) {
 | |
| 		ut_a(prefix_len > 0);
 | |
| 
 | |
| 		/* If an ordering column is externally stored, we will
 | |
| 		have to store a longer prefix of the field.  In this
 | |
| 		case, write to the log a marker followed by the
 | |
| 		original length and the real length of the field. */
 | |
| 		ptr += mach_write_compressed(ptr, UNIV_EXTERN_STORAGE_FIELD);
 | |
| 
 | |
| 		ptr += mach_write_compressed(ptr, *len);
 | |
| 
 | |
| 		*field = trx_undo_page_fetch_ext(ext_buf, prefix_len,
 | |
| 						 zip_size, *field, len);
 | |
| 
 | |
| 		ptr += mach_write_compressed(ptr, *len + spatial_len);
 | |
| 	} else {
 | |
| 		ptr += mach_write_compressed(ptr, UNIV_EXTERN_STORAGE_FIELD
 | |
| 					     + *len + spatial_len);
 | |
| 	}
 | |
| 
 | |
| 	return(ptr);
 | |
| }
 | |
| 
 | |
| /** Get MBR from a Geometry column stored externally
 | |
| @param[out]	mbr		MBR to fill
 | |
| @param[in]	zip_size	ROW_FORMAT=COMPRESSED page size, or 0
 | |
| @param[in]	field		field contain the geometry data
 | |
| @param[in,out]	len		length of field, in bytes
 | |
| */
 | |
| static
 | |
| void
 | |
| trx_undo_get_mbr_from_ext(
 | |
| /*======================*/
 | |
| 	double*		mbr,
 | |
| 	ulint		zip_size,
 | |
| 	const byte*	field,
 | |
| 	ulint*		len)
 | |
| {
 | |
| 	uchar*		dptr = NULL;
 | |
| 	ulint		dlen;
 | |
| 	mem_heap_t*	heap = mem_heap_create(100);
 | |
| 
 | |
| 	dptr = btr_copy_externally_stored_field(
 | |
| 		&dlen, field, zip_size, *len, heap);
 | |
| 
 | |
| 	if (dlen <= GEO_DATA_HEADER_SIZE) {
 | |
| 		for (uint i = 0; i < SPDIMS; ++i) {
 | |
| 			mbr[i * 2] = DBL_MAX;
 | |
| 			mbr[i * 2 + 1] = -DBL_MAX;
 | |
| 		}
 | |
| 	} else {
 | |
| 		rtree_mbr_from_wkb(dptr + GEO_DATA_HEADER_SIZE,
 | |
| 				   static_cast<uint>(dlen
 | |
| 				   - GEO_DATA_HEADER_SIZE), SPDIMS, mbr);
 | |
| 	}
 | |
| 
 | |
| 	mem_heap_free(heap);
 | |
| }
 | |
| 
 | |
| /**********************************************************************//**
 | |
| Reports in the undo log of an update or delete marking of a clustered index
 | |
| record.
 | |
| @return byte offset of the inserted undo log entry on the page if
 | |
| succeed, 0 if fail */
 | |
| static
 | |
| uint16_t
 | |
| trx_undo_page_report_modify(
 | |
| /*========================*/
 | |
| 	buf_block_t*	undo_block,	/*!< in: undo log page */
 | |
| 	trx_t*		trx,		/*!< in: transaction */
 | |
| 	dict_index_t*	index,		/*!< in: clustered index where update or
 | |
| 					delete marking is done */
 | |
| 	const rec_t*	rec,		/*!< in: clustered index record which
 | |
| 					has NOT yet been modified */
 | |
| 	const rec_offs*	offsets,	/*!< in: rec_get_offsets(rec, index) */
 | |
| 	const upd_t*	update,		/*!< in: update vector which tells the
 | |
| 					columns to be updated; in the case of
 | |
| 					a delete, this should be set to NULL */
 | |
| 	ulint		cmpl_info,	/*!< in: compiler info on secondary
 | |
| 					index updates */
 | |
| 	const dtuple_t*	row,		/*!< in: clustered index row contains
 | |
| 					virtual column info */
 | |
| 	mtr_t*		mtr)		/*!< in: mtr */
 | |
| {
 | |
| 	ut_ad(index->is_primary());
 | |
| 	ut_ad(rec_offs_validate(rec, index, offsets));
 | |
| 	/* MariaDB 10.3.1+ in trx_undo_page_init() always initializes
 | |
| 	TRX_UNDO_PAGE_TYPE as 0, but previous versions wrote
 | |
| 	TRX_UNDO_INSERT == 1 into insert_undo pages,
 | |
| 	or TRX_UNDO_UPDATE == 2 into update_undo pages. */
 | |
| 	ut_ad(mach_read_from_2(TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_TYPE
 | |
| 			       + undo_block->page.frame) <= 2);
 | |
| 
 | |
| 	byte* ptr_to_first_free = my_assume_aligned<2>(
 | |
| 		TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE
 | |
| 		+ undo_block->page.frame);
 | |
| 
 | |
| 	const uint16_t first_free = mach_read_from_2(ptr_to_first_free);
 | |
| 	byte *ptr = undo_block->page.frame + first_free;
 | |
| 
 | |
| 	if (trx_undo_left(undo_block, ptr) < 50) {
 | |
| 		/* NOTE: the value 50 must be big enough so that the general
 | |
| 		fields written below fit on the undo log page */
 | |
| 		return 0;
 | |
| 	}
 | |
| 
 | |
| 	/* Reserve 2 bytes for the pointer to the next undo log record */
 | |
| 	ptr += 2;
 | |
| 
 | |
| 	dict_table_t*	table		= index->table;
 | |
| 	const byte*	field;
 | |
| 	ulint		flen;
 | |
| 	ulint		col_no;
 | |
| 	ulint		type_cmpl;
 | |
| 	byte*		type_cmpl_ptr;
 | |
| 	ulint		i;
 | |
| 	trx_id_t	trx_id;
 | |
| 	ibool		ignore_prefix = FALSE;
 | |
| 	byte		ext_buf[REC_VERSION_56_MAX_INDEX_COL_LEN
 | |
| 				+ BTR_EXTERN_FIELD_REF_SIZE];
 | |
| 	bool		first_v_col = true;
 | |
| 
 | |
| 	/* Store first some general parameters to the undo log */
 | |
| 
 | |
| 	if (!update) {
 | |
| 		ut_ad(!rec_is_delete_marked(rec, dict_table_is_comp(table)));
 | |
| 		type_cmpl = TRX_UNDO_DEL_MARK_REC;
 | |
| 	} else if (rec_is_delete_marked(rec, dict_table_is_comp(table))) {
 | |
| 		/* In delete-marked records, DB_TRX_ID must
 | |
| 		always refer to an existing update_undo log record. */
 | |
| 		ut_ad(row_get_rec_trx_id(rec, index, offsets));
 | |
| 
 | |
| 		type_cmpl = TRX_UNDO_UPD_DEL_REC;
 | |
| 		/* We are about to update a delete marked record.
 | |
| 		We don't typically need the prefix in this case unless
 | |
| 		the delete marking is done by the same transaction
 | |
| 		(which we check below). */
 | |
| 		ignore_prefix = TRUE;
 | |
| 	} else {
 | |
| 		type_cmpl = TRX_UNDO_UPD_EXIST_REC;
 | |
| 	}
 | |
| 
 | |
| 	type_cmpl |= cmpl_info * TRX_UNDO_CMPL_INFO_MULT;
 | |
| 	type_cmpl_ptr = ptr;
 | |
| 
 | |
| 	*ptr++ = (byte) type_cmpl;
 | |
| 	ptr += mach_u64_write_much_compressed(ptr, trx->undo_no);
 | |
| 
 | |
| 	ptr += mach_u64_write_much_compressed(ptr, table->id);
 | |
| 
 | |
| 	/*----------------------------------------*/
 | |
| 	/* Store the state of the info bits */
 | |
| 
 | |
| 	*ptr++ = (byte) rec_get_info_bits(rec, dict_table_is_comp(table));
 | |
| 
 | |
| 	/* Store the values of the system columns */
 | |
| 	field = rec_get_nth_field(rec, offsets, index->db_trx_id(), &flen);
 | |
| 	ut_ad(flen == DATA_TRX_ID_LEN);
 | |
| 
 | |
| 	trx_id = trx_read_trx_id(field);
 | |
| 
 | |
| 	/* If it is an update of a delete marked record, then we are
 | |
| 	allowed to ignore blob prefixes if the delete marking was done
 | |
| 	by some other trx as it must have committed by now for us to
 | |
| 	allow an over-write. */
 | |
| 	if (trx_id == trx->id) {
 | |
| 		ignore_prefix = false;
 | |
| 	}
 | |
| 	ptr += mach_u64_write_compressed(ptr, trx_id);
 | |
| 
 | |
| 	field = rec_get_nth_field(rec, offsets, index->db_roll_ptr(), &flen);
 | |
| 	ut_ad(flen == DATA_ROLL_PTR_LEN);
 | |
| 	ut_ad(memcmp(field, field_ref_zero, DATA_ROLL_PTR_LEN));
 | |
| 
 | |
| 	ptr += mach_u64_write_compressed(ptr, trx_read_roll_ptr(field));
 | |
| 
 | |
| 	/*----------------------------------------*/
 | |
| 	/* Store then the fields required to uniquely determine the
 | |
| 	record which will be modified in the clustered index */
 | |
| 
 | |
| 	for (i = 0; i < dict_index_get_n_unique(index); i++) {
 | |
| 
 | |
| 		/* The ordering columns must not be instant added columns. */
 | |
| 		ut_ad(!rec_offs_nth_default(offsets, i));
 | |
| 		field = rec_get_nth_field(rec, offsets, i, &flen);
 | |
| 
 | |
| 		/* The ordering columns must not be stored externally. */
 | |
| 		ut_ad(!rec_offs_nth_extern(offsets, i));
 | |
| 		ut_ad(dict_index_get_nth_col(index, i)->ord_part);
 | |
| 
 | |
| 		if (trx_undo_left(undo_block, ptr) < 5) {
 | |
| 			return(0);
 | |
| 		}
 | |
| 
 | |
| 		ptr += mach_write_compressed(ptr, flen);
 | |
| 
 | |
| 		if (flen != UNIV_SQL_NULL) {
 | |
| 			if (trx_undo_left(undo_block, ptr) < flen) {
 | |
| 				return(0);
 | |
| 			}
 | |
| 
 | |
| 			memcpy(ptr, field, flen);
 | |
| 			ptr += flen;
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	/*----------------------------------------*/
 | |
| 	/* Save to the undo log the old values of the columns to be updated. */
 | |
| 
 | |
| 	if (update) {
 | |
| 		if (trx_undo_left(undo_block, ptr) < 5) {
 | |
| 			return(0);
 | |
| 		}
 | |
| 
 | |
| 		ulint	n_updated = upd_get_n_fields(update);
 | |
| 
 | |
| 		/* If this is an online update while an inplace alter table
 | |
| 		is in progress and the table has virtual column, we will
 | |
| 		need to double check if there are any non-indexed columns
 | |
| 		being registered in update vector in case they will be indexed
 | |
| 		in new table */
 | |
| 		if (dict_index_is_online_ddl(index) && table->n_v_cols > 0) {
 | |
| 			for (i = 0; i < upd_get_n_fields(update); i++) {
 | |
| 				upd_field_t*	fld = upd_get_nth_field(
 | |
| 					update, i);
 | |
| 				ulint		pos = fld->field_no;
 | |
| 
 | |
| 				/* These columns must not have an index
 | |
| 				on them */
 | |
| 				if (upd_fld_is_virtual_col(fld)
 | |
| 				    && dict_table_get_nth_v_col(
 | |
| 					    table, pos)->v_indexes.empty()) {
 | |
| 					n_updated--;
 | |
| 				}
 | |
| 			}
 | |
| 		}
 | |
| 
 | |
| 		i = 0;
 | |
| 
 | |
| 		if (UNIV_UNLIKELY(update->is_alter_metadata())) {
 | |
| 			ut_ad(update->n_fields >= 1);
 | |
| 			ut_ad(!upd_fld_is_virtual_col(&update->fields[0]));
 | |
| 			ut_ad(update->fields[0].field_no
 | |
| 			      == index->first_user_field());
 | |
| 			ut_ad(!dfield_is_ext(&update->fields[0].new_val));
 | |
| 			ut_ad(!dfield_is_null(&update->fields[0].new_val));
 | |
| 			/* The instant ADD COLUMN metadata record does not
 | |
| 			contain the BLOB. Do not write anything for it. */
 | |
| 			i = !rec_is_alter_metadata(rec, *index);
 | |
| 			n_updated -= i;
 | |
| 		}
 | |
| 
 | |
| 		ptr += mach_write_compressed(ptr, n_updated);
 | |
| 
 | |
| 		for (; i < upd_get_n_fields(update); i++) {
 | |
| 			if (trx_undo_left(undo_block, ptr) < 5) {
 | |
| 				return 0;
 | |
| 			}
 | |
| 
 | |
| 			upd_field_t*	fld = upd_get_nth_field(update, i);
 | |
| 
 | |
| 			bool	is_virtual = upd_fld_is_virtual_col(fld);
 | |
| 			ulint	max_v_log_len = 0;
 | |
| 
 | |
| 			ulint pos = fld->field_no;
 | |
| 			const dict_col_t* col = NULL;
 | |
| 
 | |
| 			if (is_virtual) {
 | |
| 				/* Skip the non-indexed column, during
 | |
| 				an online alter table */
 | |
| 				if (dict_index_is_online_ddl(index)
 | |
| 				    && dict_table_get_nth_v_col(
 | |
| 					table, pos)->v_indexes.empty()) {
 | |
| 					continue;
 | |
| 				}
 | |
| 
 | |
| 				/* add REC_MAX_N_FIELDS to mark this
 | |
| 				is a virtual col */
 | |
| 				ptr += mach_write_compressed(
 | |
| 					ptr, pos + REC_MAX_N_FIELDS);
 | |
| 
 | |
| 				if (trx_undo_left(undo_block, ptr) < 15) {
 | |
| 					return 0;
 | |
| 				}
 | |
| 
 | |
| 				ut_ad(fld->field_no < table->n_v_def);
 | |
| 
 | |
| 				ptr = trx_undo_log_v_idx(undo_block, table,
 | |
| 							 fld->field_no, ptr,
 | |
| 							 first_v_col);
 | |
| 				if (ptr == NULL) {
 | |
| 					 return(0);
 | |
| 				}
 | |
| 				first_v_col = false;
 | |
| 
 | |
| 				max_v_log_len
 | |
| 					= dict_max_v_field_len_store_undo(
 | |
| 						table, fld->field_no);
 | |
| 
 | |
| 				field = static_cast<byte*>(
 | |
| 					fld->old_v_val->data);
 | |
| 				flen = fld->old_v_val->len;
 | |
| 
 | |
| 				/* Only log sufficient bytes for index
 | |
| 				record update */
 | |
| 				if (flen != UNIV_SQL_NULL) {
 | |
| 					flen = ut_min(
 | |
| 						flen, max_v_log_len);
 | |
| 				}
 | |
| 
 | |
| 				goto store_len;
 | |
| 			}
 | |
| 
 | |
| 			if (UNIV_UNLIKELY(update->is_metadata())) {
 | |
| 				ut_ad(pos >= index->first_user_field());
 | |
| 				ut_ad(rec_is_metadata(rec, *index));
 | |
| 
 | |
| 				if (rec_is_alter_metadata(rec, *index)) {
 | |
| 					ut_ad(update->is_alter_metadata());
 | |
| 
 | |
| 					field = rec_offs_n_fields(offsets)
 | |
| 						> pos
 | |
| 						&& !rec_offs_nth_default(
 | |
| 							offsets, pos)
 | |
| 						? rec_get_nth_field(
 | |
| 							rec, offsets,
 | |
| 							pos, &flen)
 | |
| 						: index->instant_field_value(
 | |
| 							pos - 1, &flen);
 | |
| 
 | |
| 					if (pos == index->first_user_field()) {
 | |
| 						ut_ad(rec_offs_nth_extern(
 | |
| 							offsets, pos));
 | |
| 						ut_ad(flen == FIELD_REF_SIZE);
 | |
| 						goto write_field;
 | |
| 					}
 | |
| 					col = dict_index_get_nth_col(index,
 | |
| 								     pos - 1);
 | |
| 				} else if (!update->is_alter_metadata()) {
 | |
| 					goto get_field;
 | |
| 				} else {
 | |
| 					/* We are converting an ADD COLUMN
 | |
| 					metadata record to an ALTER TABLE
 | |
| 					metadata record, with BLOB. Subtract
 | |
| 					the missing metadata BLOB field. */
 | |
| 					ut_ad(pos > index->first_user_field());
 | |
| 					--pos;
 | |
| 					goto get_field;
 | |
| 				}
 | |
| 			} else {
 | |
| get_field:
 | |
| 				col = dict_index_get_nth_col(index, pos);
 | |
| 				field = rec_get_nth_cfield(
 | |
| 					rec, index, offsets, pos, &flen);
 | |
| 			}
 | |
| write_field:
 | |
| 			/* Write field number to undo log */
 | |
| 			ptr += mach_write_compressed(ptr, pos);
 | |
| 
 | |
| 			if (trx_undo_left(undo_block, ptr) < 15) {
 | |
| 				return 0;
 | |
| 			}
 | |
| 
 | |
| 			if (rec_offs_n_fields(offsets) > pos
 | |
| 			    && rec_offs_nth_extern(offsets, pos)) {
 | |
| 				ut_ad(col || pos == index->first_user_field());
 | |
| 				ut_ad(col || update->is_alter_metadata());
 | |
| 				ut_ad(col
 | |
| 				      || rec_is_alter_metadata(rec, *index));
 | |
| 				ulint prefix_len = col
 | |
| 					? dict_max_field_len_store_undo(
 | |
| 						table, col)
 | |
| 					: 0;
 | |
| 
 | |
| 				ut_ad(prefix_len + BTR_EXTERN_FIELD_REF_SIZE
 | |
| 				      <= sizeof ext_buf);
 | |
| 
 | |
| 				ptr = trx_undo_page_report_modify_ext(
 | |
| 					ptr,
 | |
| 					col
 | |
| 					&& col->ord_part
 | |
| 					&& !ignore_prefix
 | |
| 					&& flen < REC_ANTELOPE_MAX_INDEX_COL_LEN
 | |
| 					? ext_buf : NULL, prefix_len,
 | |
| 					table->space->zip_size(),
 | |
| 					&field, &flen, SPATIAL_UNKNOWN);
 | |
| 
 | |
| 				*type_cmpl_ptr |= TRX_UNDO_UPD_EXTERN;
 | |
| 			} else {
 | |
| store_len:
 | |
| 				ptr += mach_write_compressed(ptr, flen);
 | |
| 			}
 | |
| 
 | |
| 			if (flen != UNIV_SQL_NULL) {
 | |
| 				if (trx_undo_left(undo_block, ptr) < flen) {
 | |
| 					return(0);
 | |
| 				}
 | |
| 
 | |
| 				memcpy(ptr, field, flen);
 | |
| 				ptr += flen;
 | |
| 			}
 | |
| 
 | |
| 			/* Also record the new value for virtual column */
 | |
| 			if (is_virtual) {
 | |
| 				field = static_cast<byte*>(fld->new_val.data);
 | |
| 				flen = fld->new_val.len;
 | |
| 				if (flen != UNIV_SQL_NULL) {
 | |
| 					flen = ut_min(
 | |
| 						flen, max_v_log_len);
 | |
| 				}
 | |
| 
 | |
| 				if (trx_undo_left(undo_block, ptr) < 15) {
 | |
| 					return(0);
 | |
| 				}
 | |
| 
 | |
| 				ptr += mach_write_compressed(ptr, flen);
 | |
| 
 | |
| 				if (flen != UNIV_SQL_NULL) {
 | |
| 					if (trx_undo_left(undo_block, ptr)
 | |
| 					    < flen) {
 | |
| 						return(0);
 | |
| 					}
 | |
| 
 | |
| 					memcpy(ptr, field, flen);
 | |
| 					ptr += flen;
 | |
| 				}
 | |
| 			}
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	/* Reset the first_v_col, so to put the virtual column undo
 | |
| 	version marker again, when we log all the indexed columns */
 | |
| 	first_v_col = true;
 | |
| 
 | |
| 	/*----------------------------------------*/
 | |
| 	/* In the case of a delete marking, and also in the case of an update
 | |
| 	where any ordering field of any index changes, store the values of all
 | |
| 	columns which occur as ordering fields in any index. This info is used
 | |
| 	in the purge of old versions where we use it to build and search the
 | |
| 	delete marked index records, to look if we can remove them from the
 | |
| 	index tree. Note that starting from 4.0.14 also externally stored
 | |
| 	fields can be ordering in some index. Starting from 5.2, we no longer
 | |
| 	store REC_MAX_INDEX_COL_LEN first bytes to the undo log record,
 | |
| 	but we can construct the column prefix fields in the index by
 | |
| 	fetching the first page of the BLOB that is pointed to by the
 | |
| 	clustered index. This works also in crash recovery, because all pages
 | |
| 	(including BLOBs) are recovered before anything is rolled back. */
 | |
| 
 | |
| 	if (!update || !(cmpl_info & UPD_NODE_NO_ORD_CHANGE)) {
 | |
| 		byte*		old_ptr = ptr;
 | |
| 		double		mbr[SPDIMS * 2];
 | |
| 		mem_heap_t*	row_heap = NULL;
 | |
| 
 | |
| 		if (trx_undo_left(undo_block, ptr) < 5) {
 | |
| 			return(0);
 | |
| 		}
 | |
| 
 | |
| 		/* Reserve 2 bytes to write the number of bytes the stored
 | |
| 		fields take in this undo record */
 | |
| 
 | |
| 		ptr += 2;
 | |
| 
 | |
| 		for (col_no = 0; col_no < dict_table_get_n_cols(table);
 | |
| 		     col_no++) {
 | |
| 
 | |
| 			const dict_col_t*	col
 | |
| 				= dict_table_get_nth_col(table, col_no);
 | |
| 
 | |
| 			if (!col->ord_part) {
 | |
| 				continue;
 | |
| 			}
 | |
| 
 | |
| 			const ulint pos = dict_index_get_nth_col_pos(
 | |
| 				index, col_no, NULL);
 | |
| 			/* All non-virtual columns must be present in
 | |
| 			the clustered index. */
 | |
| 			ut_ad(pos != ULINT_UNDEFINED);
 | |
| 
 | |
| 			const bool is_ext = rec_offs_nth_extern(offsets, pos);
 | |
| 			const spatial_status_t spatial_status = is_ext
 | |
| 				? dict_col_get_spatial_status(col)
 | |
| 				: SPATIAL_NONE;
 | |
| 
 | |
| 			switch (spatial_status) {
 | |
| 			case SPATIAL_UNKNOWN:
 | |
| 				ut_ad(0);
 | |
| 				/* fall through */
 | |
| 			case SPATIAL_MIXED:
 | |
| 			case SPATIAL_ONLY:
 | |
| 				/* Externally stored spatially indexed
 | |
| 				columns will be (redundantly) logged
 | |
| 				again, because we did not write the
 | |
| 				MBR yet, that is, the previous call to
 | |
| 				trx_undo_page_report_modify_ext()
 | |
| 				was with SPATIAL_UNKNOWN. */
 | |
| 				break;
 | |
| 			case SPATIAL_NONE:
 | |
| 				if (!update) {
 | |
| 					/* This is a DELETE operation. */
 | |
| 					break;
 | |
| 				}
 | |
| 				/* Avoid redundantly logging indexed
 | |
| 				columns that were updated. */
 | |
| 
 | |
| 				for (i = 0; i < update->n_fields; i++) {
 | |
| 					const ulint field_no
 | |
| 						= upd_get_nth_field(update, i)
 | |
| 						->field_no;
 | |
| 					if (field_no >= index->n_fields
 | |
| 					    || dict_index_get_nth_field(
 | |
| 						    index, field_no)->col
 | |
| 					    == col) {
 | |
| 						goto already_logged;
 | |
| 					}
 | |
| 				}
 | |
| 			}
 | |
| 
 | |
| 			if (true) {
 | |
| 				/* Write field number to undo log */
 | |
| 				if (trx_undo_left(undo_block, ptr) < 5 + 15) {
 | |
| 					return(0);
 | |
| 				}
 | |
| 
 | |
| 				ptr += mach_write_compressed(ptr, pos);
 | |
| 
 | |
| 				/* Save the old value of field */
 | |
| 				field = rec_get_nth_cfield(
 | |
| 					rec, index, offsets, pos, &flen);
 | |
| 
 | |
| 				if (is_ext) {
 | |
| 					const dict_col_t*	col =
 | |
| 						dict_index_get_nth_col(
 | |
| 							index, pos);
 | |
| 					ulint			prefix_len =
 | |
| 						dict_max_field_len_store_undo(
 | |
| 							table, col);
 | |
| 
 | |
| 					ut_a(prefix_len < sizeof ext_buf);
 | |
| 					const ulint zip_size
 | |
| 						= table->space->zip_size();
 | |
| 
 | |
| 					/* If there is a spatial index on it,
 | |
| 					log its MBR */
 | |
| 					if (spatial_status != SPATIAL_NONE) {
 | |
| 						ut_ad(DATA_GEOMETRY_MTYPE(
 | |
| 								col->mtype));
 | |
| 
 | |
| 						trx_undo_get_mbr_from_ext(
 | |
| 							mbr, zip_size,
 | |
| 							field, &flen);
 | |
| 					}
 | |
| 
 | |
| 					ptr = trx_undo_page_report_modify_ext(
 | |
| 						ptr,
 | |
| 						flen < REC_ANTELOPE_MAX_INDEX_COL_LEN
 | |
| 						&& !ignore_prefix
 | |
| 						? ext_buf : NULL, prefix_len,
 | |
| 						zip_size,
 | |
| 						&field, &flen,
 | |
| 						spatial_status);
 | |
| 				} else {
 | |
| 					ptr += mach_write_compressed(
 | |
| 						ptr, flen);
 | |
| 				}
 | |
| 
 | |
| 				if (flen != UNIV_SQL_NULL
 | |
| 				    && spatial_status != SPATIAL_ONLY) {
 | |
| 					if (trx_undo_left(undo_block, ptr)
 | |
| 					    < flen) {
 | |
| 						return(0);
 | |
| 					}
 | |
| 
 | |
| 					memcpy(ptr, field, flen);
 | |
| 					ptr += flen;
 | |
| 				}
 | |
| 
 | |
| 				if (spatial_status != SPATIAL_NONE) {
 | |
| 					if (trx_undo_left(undo_block, ptr)
 | |
| 					    < DATA_MBR_LEN) {
 | |
| 						return(0);
 | |
| 					}
 | |
| 
 | |
| 					for (int i = 0; i < SPDIMS * 2;
 | |
| 					     i++) {
 | |
| 						mach_double_write(
 | |
| 							ptr, mbr[i]);
 | |
| 						ptr +=  sizeof(double);
 | |
| 					}
 | |
| 				}
 | |
| 			}
 | |
| 
 | |
| already_logged:
 | |
| 			continue;
 | |
| 		}
 | |
| 
 | |
| 		for (col_no = 0; col_no < dict_table_get_n_v_cols(table);
 | |
| 		     col_no++) {
 | |
| 			const dict_v_col_t*     col
 | |
| 				= dict_table_get_nth_v_col(table, col_no);
 | |
| 
 | |
| 			if (col->m_col.ord_part) {
 | |
| 				ulint   pos = col_no;
 | |
| 				ulint	max_v_log_len
 | |
| 					= dict_max_v_field_len_store_undo(
 | |
| 						table, pos);
 | |
| 
 | |
| 				/* Write field number to undo log.
 | |
| 				Make sure there is enought space in log */
 | |
| 				if (trx_undo_left(undo_block, ptr) < 5) {
 | |
| 					return(0);
 | |
| 				}
 | |
| 
 | |
| 				pos += REC_MAX_N_FIELDS;
 | |
| 				ptr += mach_write_compressed(ptr, pos);
 | |
| 
 | |
| 				ut_ad(col_no < table->n_v_def);
 | |
| 				ptr = trx_undo_log_v_idx(undo_block, table,
 | |
| 							 col_no, ptr,
 | |
| 							 first_v_col);
 | |
| 				first_v_col = false;
 | |
| 
 | |
| 				if (!ptr) {
 | |
| 					 return(0);
 | |
| 				}
 | |
| 
 | |
| 				const dfield_t* vfield = NULL;
 | |
| 
 | |
| 				if (update) {
 | |
| 					ut_ad(!row);
 | |
| 					if (update->old_vrow == NULL) {
 | |
| 						flen = UNIV_SQL_NULL;
 | |
| 					} else {
 | |
| 						vfield = dtuple_get_nth_v_field(
 | |
| 							update->old_vrow,
 | |
| 							col->v_pos);
 | |
| 					}
 | |
| 				} else if (row) {
 | |
| 					vfield = dtuple_get_nth_v_field(
 | |
| 						row, col->v_pos);
 | |
| 				} else {
 | |
| 					ut_ad(0);
 | |
| 				}
 | |
| 
 | |
| 				if (vfield) {
 | |
| 					field = static_cast<byte*>(vfield->data);
 | |
| 					flen = vfield->len;
 | |
| 				} else {
 | |
| 					ut_ad(flen == UNIV_SQL_NULL);
 | |
| 				}
 | |
| 
 | |
| 				if (flen != UNIV_SQL_NULL) {
 | |
| 					flen = ut_min(
 | |
| 						flen, max_v_log_len);
 | |
| 				}
 | |
| 
 | |
| 				ptr += mach_write_compressed(ptr, flen);
 | |
| 
 | |
| 				switch (flen) {
 | |
| 				case 0: case UNIV_SQL_NULL:
 | |
| 					break;
 | |
| 				default:
 | |
| 					if (trx_undo_left(undo_block, ptr)
 | |
| 					    < flen) {
 | |
| 						return(0);
 | |
| 					}
 | |
| 
 | |
| 					memcpy(ptr, field, flen);
 | |
| 					ptr += flen;
 | |
| 				}
 | |
| 			}
 | |
| 		}
 | |
| 
 | |
| 		mach_write_to_2(old_ptr, ulint(ptr - old_ptr));
 | |
| 
 | |
| 		if (row_heap) {
 | |
| 			mem_heap_free(row_heap);
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	/*----------------------------------------*/
 | |
| 	/* Write pointers to the previous and the next undo log records */
 | |
| 	if (trx_undo_left(undo_block, ptr) < 2) {
 | |
| 		return(0);
 | |
| 	}
 | |
| 
 | |
| 	mach_write_to_2(ptr, first_free);
 | |
| 	const uint16_t new_free = static_cast<uint16_t>(
 | |
| 		ptr + 2 - undo_block->page.frame);
 | |
| 	mach_write_to_2(undo_block->page.frame + first_free, new_free);
 | |
| 
 | |
| 	mach_write_to_2(ptr_to_first_free, new_free);
 | |
| 
 | |
| 	const byte* start = &undo_block->page.frame[first_free + 2];
 | |
| 	mtr->undo_append(*undo_block, start, ptr - start);
 | |
| 	return(first_free);
 | |
| }
 | |
| 
 | |
| /**********************************************************************//**
 | |
| Reads from an undo log update record the system field values of the old
 | |
| version.
 | |
| @return remaining part of undo log record after reading these values */
 | |
| byte*
 | |
| trx_undo_update_rec_get_sys_cols(
 | |
| /*=============================*/
 | |
| 	const byte*	ptr,		/*!< in: remaining part of undo
 | |
| 					log record after reading
 | |
| 					general parameters */
 | |
| 	trx_id_t*	trx_id,		/*!< out: trx id */
 | |
| 	roll_ptr_t*	roll_ptr,	/*!< out: roll ptr */
 | |
| 	byte*		info_bits)	/*!< out: info bits state */
 | |
| {
 | |
| 	/* Read the state of the info bits */
 | |
| 	*info_bits = *ptr++;
 | |
| 
 | |
| 	/* Read the values of the system columns */
 | |
| 
 | |
| 	*trx_id = mach_u64_read_next_compressed(&ptr);
 | |
| 	*roll_ptr = mach_u64_read_next_compressed(&ptr);
 | |
| 
 | |
| 	return(const_cast<byte*>(ptr));
 | |
| }
 | |
| 
 | |
| /*******************************************************************//**
 | |
| Builds an update vector based on a remaining part of an undo log record.
 | |
| @return remaining part of the record, NULL if an error detected, which
 | |
| means that the record is corrupted */
 | |
| byte*
 | |
| trx_undo_update_rec_get_update(
 | |
| /*===========================*/
 | |
| 	const byte*	ptr,	/*!< in: remaining part in update undo log
 | |
| 				record, after reading the row reference
 | |
| 				NOTE that this copy of the undo log record must
 | |
| 				be preserved as long as the update vector is
 | |
| 				used, as we do NOT copy the data in the
 | |
| 				record! */
 | |
| 	dict_index_t*	index,	/*!< in: clustered index */
 | |
| 	ulint		type,	/*!< in: TRX_UNDO_UPD_EXIST_REC,
 | |
| 				TRX_UNDO_UPD_DEL_REC, or
 | |
| 				TRX_UNDO_DEL_MARK_REC; in the last case,
 | |
| 				only trx id and roll ptr fields are added to
 | |
| 				the update vector */
 | |
| 	trx_id_t	trx_id,	/*!< in: transaction id from this undo record */
 | |
| 	roll_ptr_t	roll_ptr,/*!< in: roll pointer from this undo record */
 | |
| 	byte		info_bits,/*!< in: info bits from this undo record */
 | |
| 	mem_heap_t*	heap,	/*!< in: memory heap from which the memory
 | |
| 				needed is allocated */
 | |
| 	upd_t**		upd)	/*!< out, own: update vector */
 | |
| {
 | |
| 	upd_field_t*	upd_field;
 | |
| 	upd_t*		update;
 | |
| 	ulint		n_fields;
 | |
| 	byte*		buf;
 | |
| 	bool		first_v_col = true;
 | |
| 	bool		is_undo_log = true;
 | |
| 	ulint		n_skip_field = 0;
 | |
| 
 | |
| 	ut_a(dict_index_is_clust(index));
 | |
| 
 | |
| 	if (type != TRX_UNDO_DEL_MARK_REC) {
 | |
| 		n_fields = mach_read_next_compressed(&ptr);
 | |
| 	} else {
 | |
| 		n_fields = 0;
 | |
| 	}
 | |
| 
 | |
| 	*upd = update = upd_create(n_fields + 2, heap);
 | |
| 
 | |
| 	update->info_bits = info_bits;
 | |
| 
 | |
| 	/* Store first trx id and roll ptr to update vector */
 | |
| 
 | |
| 	upd_field = upd_get_nth_field(update, n_fields);
 | |
| 
 | |
| 	buf = static_cast<byte*>(mem_heap_alloc(heap, DATA_TRX_ID_LEN));
 | |
| 
 | |
| 	mach_write_to_6(buf, trx_id);
 | |
| 
 | |
| 	upd_field_set_field_no(upd_field, index->db_trx_id(), index);
 | |
| 	dfield_set_data(&(upd_field->new_val), buf, DATA_TRX_ID_LEN);
 | |
| 
 | |
| 	upd_field = upd_get_nth_field(update, n_fields + 1);
 | |
| 
 | |
| 	buf = static_cast<byte*>(mem_heap_alloc(heap, DATA_ROLL_PTR_LEN));
 | |
| 
 | |
| 	trx_write_roll_ptr(buf, roll_ptr);
 | |
| 
 | |
| 	upd_field_set_field_no(upd_field, index->db_roll_ptr(), index);
 | |
| 	dfield_set_data(&(upd_field->new_val), buf, DATA_ROLL_PTR_LEN);
 | |
| 
 | |
| 	/* Store then the updated ordinary columns to the update vector */
 | |
| 
 | |
| 	for (ulint i = 0; i < n_fields; i++) {
 | |
| 		const byte* field;
 | |
| 		uint32_t len, orig_len;
 | |
| 
 | |
| 		upd_field = upd_get_nth_field(update, i);
 | |
| 		uint32_t field_no = mach_read_next_compressed(&ptr);
 | |
| 
 | |
| 		const bool is_virtual = (field_no >= REC_MAX_N_FIELDS);
 | |
| 
 | |
| 		if (is_virtual) {
 | |
| 			/* If new version, we need to check index list to figure
 | |
| 			out the correct virtual column position */
 | |
| 			ptr = trx_undo_read_v_idx(
 | |
| 				index->table, ptr, first_v_col, &is_undo_log,
 | |
| 				&field_no);
 | |
| 			first_v_col = false;
 | |
| 			/* This column could be dropped or no longer indexed */
 | |
| 			if (field_no >= index->n_fields) {
 | |
| 				/* Mark this is no longer needed */
 | |
| 				upd_field->field_no = REC_MAX_N_FIELDS;
 | |
| 
 | |
| 				ptr = trx_undo_rec_get_col_val(
 | |
| 					ptr, &field, &len, &orig_len);
 | |
| 				ptr = trx_undo_rec_get_col_val(
 | |
| 					ptr, &field, &len, &orig_len);
 | |
| 				n_skip_field++;
 | |
| 				continue;
 | |
| 			}
 | |
| 
 | |
| 			upd_field_set_v_field_no(
 | |
| 				upd_field, static_cast<uint16_t>(field_no),
 | |
| 				index);
 | |
| 		} else if (UNIV_UNLIKELY((update->info_bits
 | |
| 					  & ~REC_INFO_DELETED_FLAG)
 | |
| 					 == REC_INFO_MIN_REC_FLAG)) {
 | |
| 			ut_ad(type == TRX_UNDO_UPD_EXIST_REC);
 | |
| 			const uint32_t uf = index->first_user_field();
 | |
| 			ut_ad(field_no >= uf);
 | |
| 
 | |
| 			if (update->info_bits != REC_INFO_MIN_REC_FLAG) {
 | |
| 				/* Generic instant ALTER TABLE */
 | |
| 				if (field_no == uf) {
 | |
| 					upd_field->new_val.type
 | |
| 						.metadata_blob_init();
 | |
| 				} else if (field_no >= index->n_fields) {
 | |
| 					/* This is reachable during
 | |
| 					purge if the table was emptied
 | |
| 					and converted to the canonical
 | |
| 					format on a later ALTER TABLE.
 | |
| 					In this case,
 | |
| 					row_purge_upd_exist_or_extern()
 | |
| 					would only be interested in
 | |
| 					freeing any BLOBs that were
 | |
| 					updated, that is, the metadata
 | |
| 					BLOB above.  Other BLOBs in
 | |
| 					the metadata record are never
 | |
| 					updated; they are for the
 | |
| 					initial DEFAULT values of the
 | |
| 					instantly added columns, and
 | |
| 					they will never change.
 | |
| 
 | |
| 					Note: if the table becomes
 | |
| 					empty during ROLLBACK or is
 | |
| 					empty during subsequent ALTER
 | |
| 					TABLE, and btr_page_empty() is
 | |
| 					called to re-create the root
 | |
| 					page without the metadata
 | |
| 					record, in that case we should
 | |
| 					only free the latest version
 | |
| 					of BLOBs in the record,
 | |
| 					which purge would never touch. */
 | |
| 					field_no = REC_MAX_N_FIELDS;
 | |
| 					n_skip_field++;
 | |
| 				} else {
 | |
| 					dict_col_copy_type(
 | |
| 						dict_index_get_nth_col(
 | |
| 							index, field_no - 1),
 | |
| 						&upd_field->new_val.type);
 | |
| 				}
 | |
| 			} else {
 | |
| 				/* Instant ADD COLUMN...LAST */
 | |
| 				dict_col_copy_type(
 | |
| 					dict_index_get_nth_col(index,
 | |
| 							       field_no),
 | |
| 					&upd_field->new_val.type);
 | |
| 			}
 | |
| 			upd_field->field_no = field_no
 | |
| 				& dict_index_t::MAX_N_FIELDS;
 | |
| 		} else if (field_no < index->n_fields) {
 | |
| 			upd_field_set_field_no(upd_field,
 | |
| 					       static_cast<uint16_t>(field_no),
 | |
| 					       index);
 | |
| 		} else {
 | |
| 			ib::error() << "Trying to access update undo rec"
 | |
| 				" field " << field_no
 | |
| 				<< " in index " << index->name
 | |
| 				<< " of table " << index->table->name
 | |
| 				<< " but index has only "
 | |
| 				<< dict_index_get_n_fields(index)
 | |
| 				<< " fields " << BUG_REPORT_MSG
 | |
| 				<< ". Run also CHECK TABLE "
 | |
| 				<< index->table->name << "."
 | |
| 				" n_fields = " << n_fields << ", i = " << i;
 | |
| 
 | |
| 			ut_ad(0);
 | |
| 			*upd = NULL;
 | |
| 			return(NULL);
 | |
| 		}
 | |
| 
 | |
| 		ptr = trx_undo_rec_get_col_val(ptr, &field, &len, &orig_len);
 | |
| 
 | |
| 		upd_field->orig_len = static_cast<uint16_t>(orig_len);
 | |
| 
 | |
| 		if (len == UNIV_SQL_NULL) {
 | |
| 			dfield_set_null(&upd_field->new_val);
 | |
| 		} else if (len < UNIV_EXTERN_STORAGE_FIELD) {
 | |
| 			dfield_set_data(&upd_field->new_val, field, len);
 | |
| 		} else {
 | |
| 			len -= UNIV_EXTERN_STORAGE_FIELD;
 | |
| 
 | |
| 			dfield_set_data(&upd_field->new_val, field, len);
 | |
| 			dfield_set_ext(&upd_field->new_val);
 | |
| 		}
 | |
| 
 | |
| 		ut_ad(update->info_bits != (REC_INFO_DELETED_FLAG
 | |
| 					    | REC_INFO_MIN_REC_FLAG)
 | |
| 		      || field_no != index->first_user_field()
 | |
| 		      || (upd_field->new_val.ext
 | |
| 			  && upd_field->new_val.len == FIELD_REF_SIZE));
 | |
| 
 | |
| 		if (is_virtual) {
 | |
| 			upd_field->old_v_val = static_cast<dfield_t*>(
 | |
| 				mem_heap_alloc(
 | |
| 					heap, sizeof *upd_field->old_v_val));
 | |
| 			ptr = trx_undo_rec_get_col_val(
 | |
| 				ptr, &field, &len, &orig_len);
 | |
| 	                if (len == UNIV_SQL_NULL) {
 | |
| 				dfield_set_null(upd_field->old_v_val);
 | |
| 			} else if (len < UNIV_EXTERN_STORAGE_FIELD) {
 | |
| 				dfield_set_data(
 | |
| 					upd_field->old_v_val, field, len);
 | |
| 			} else {
 | |
| 				ut_ad(0);
 | |
| 			}
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	/* We may have to skip dropped indexed virtual columns.
 | |
| 	Also, we may have to trim the update vector of a metadata record
 | |
| 	if dict_index_t::clear_instant_alter() was invoked on the table
 | |
| 	later, and the number of fields no longer matches. */
 | |
| 
 | |
| 	if (n_skip_field) {
 | |
| 		upd_field_t* d = upd_get_nth_field(update, 0);
 | |
| 		const upd_field_t* const end = d + n_fields + 2;
 | |
| 
 | |
| 		for (const upd_field_t* s = d; s != end; s++) {
 | |
| 			if (s->field_no != REC_MAX_N_FIELDS) {
 | |
| 				*d++ = *s;
 | |
| 			}
 | |
| 		}
 | |
| 
 | |
| 		ut_ad(d + n_skip_field == end);
 | |
| 		update->n_fields = d - upd_get_nth_field(update, 0);
 | |
| 	}
 | |
| 
 | |
| 	return(const_cast<byte*>(ptr));
 | |
| }
 | |
| 
 | |
| /** Report a RENAME TABLE operation.
 | |
| @param[in,out]	trx	transaction
 | |
| @param[in]	table	table that is being renamed
 | |
| @param[in,out]	block	undo page
 | |
| @param[in,out]	mtr	mini-transaction
 | |
| @return	byte offset of the undo log record
 | |
| @retval	0	in case of failure */
 | |
| static
 | |
| uint16_t
 | |
| trx_undo_page_report_rename(trx_t* trx, const dict_table_t* table,
 | |
| 			    buf_block_t* block, mtr_t* mtr)
 | |
| {
 | |
| 	byte*	ptr_first_free  = my_assume_aligned<2>(TRX_UNDO_PAGE_HDR
 | |
| 						       + TRX_UNDO_PAGE_FREE
 | |
| 						       + block->page.frame);
 | |
| 	const uint16_t first_free = mach_read_from_2(ptr_first_free);
 | |
| 	ut_ad(first_free >= TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_HDR_SIZE);
 | |
| 	ut_ad(first_free <= srv_page_size - FIL_PAGE_DATA_END);
 | |
| 	byte* const start = block->page.frame + first_free;
 | |
| 	size_t len = strlen(table->name.m_name);
 | |
| 	const size_t fixed = 2 + 1 + 11 + 11 + 2;
 | |
| 	ut_ad(len <= NAME_CHAR_LEN * 5 * 2 + 1);
 | |
| 	/* The -10 is used in trx_undo_left() */
 | |
| 	compile_time_assert(NAME_CHAR_LEN * 5 * 2 + fixed
 | |
| 			    + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_HDR_SIZE
 | |
| 			    < UNIV_PAGE_SIZE_MIN - 10 - FIL_PAGE_DATA_END);
 | |
| 
 | |
| 	if (trx_undo_left(block, start) < fixed + len) {
 | |
| 		ut_ad(first_free > TRX_UNDO_PAGE_HDR
 | |
| 		      + TRX_UNDO_PAGE_HDR_SIZE);
 | |
| 		return 0;
 | |
| 	}
 | |
| 
 | |
| 	byte* ptr = start + 2;
 | |
| 	*ptr++ = TRX_UNDO_RENAME_TABLE;
 | |
| 	ptr += mach_u64_write_much_compressed(ptr, trx->undo_no);
 | |
| 	ptr += mach_u64_write_much_compressed(ptr, table->id);
 | |
| 	memcpy(ptr, table->name.m_name, len);
 | |
| 	ptr += len;
 | |
| 	mach_write_to_2(ptr, first_free);
 | |
| 	mach_write_to_2(ptr_first_free, ptr + 2 - block->page.frame);
 | |
| 	memcpy(start, ptr_first_free, 2);
 | |
| 	mtr->undo_append(*block, start + 2, ptr - start - 2);
 | |
| 	return first_free;
 | |
| }
 | |
| 
 | |
| /** Report a RENAME TABLE operation.
 | |
| @param[in,out]	trx	transaction
 | |
| @param[in]	table	table that is being renamed
 | |
| @return	DB_SUCCESS or error code */
 | |
| dberr_t trx_undo_report_rename(trx_t* trx, const dict_table_t* table)
 | |
| {
 | |
| 	ut_ad(!trx->read_only);
 | |
| 	ut_ad(trx->id);
 | |
| 	ut_ad(!table->is_temporary());
 | |
| 
 | |
| 	mtr_t		mtr;
 | |
| 	dberr_t		err;
 | |
| 	mtr.start();
 | |
| 	if (buf_block_t* block = trx_undo_assign(trx, &err, &mtr)) {
 | |
| 		trx_undo_t*	undo = trx->rsegs.m_redo.undo;
 | |
| 		ut_ad(err == DB_SUCCESS);
 | |
| 		ut_ad(undo);
 | |
| 		for (ut_d(int loop_count = 0);;) {
 | |
| 			ut_ad(loop_count++ < 2);
 | |
| 			ut_ad(undo->last_page_no
 | |
| 			      == block->page.id().page_no());
 | |
| 
 | |
| 			if (uint16_t offset = trx_undo_page_report_rename(
 | |
| 				    trx, table, block, &mtr)) {
 | |
| 				undo->top_page_no = undo->last_page_no;
 | |
| 				undo->top_offset  = offset;
 | |
| 				undo->top_undo_no = trx->undo_no++;
 | |
| 				undo->guess_block = block;
 | |
| 				ut_ad(!undo->empty());
 | |
| 
 | |
| 				err = DB_SUCCESS;
 | |
| 				break;
 | |
| 			} else {
 | |
| 				mtr.commit();
 | |
| 				mtr.start();
 | |
| 				block = trx_undo_add_page(undo, &mtr, &err);
 | |
| 				if (!block) {
 | |
| 					break;
 | |
| 				}
 | |
| 			}
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	mtr.commit();
 | |
| 	return err;
 | |
| }
 | |
| 
 | |
| TRANSACTIONAL_TARGET ATTRIBUTE_NOINLINE
 | |
| /** @return whether the transaction holds an exclusive lock on a table */
 | |
| static bool trx_has_lock_x(const trx_t &trx, dict_table_t& table)
 | |
| {
 | |
|   ut_ad(!table.is_temporary());
 | |
| 
 | |
|   uint32_t n;
 | |
| 
 | |
| #if !defined NO_ELISION && !defined SUX_LOCK_GENERIC
 | |
|   if (xbegin())
 | |
|   {
 | |
|     if (table.lock_mutex_is_locked())
 | |
|       xabort();
 | |
|     n= table.n_lock_x_or_s;
 | |
|     xend();
 | |
|   }
 | |
|   else
 | |
| #endif
 | |
|   {
 | |
|     table.lock_mutex_lock();
 | |
|     n= table.n_lock_x_or_s;
 | |
|     table.lock_mutex_unlock();
 | |
|   }
 | |
| 
 | |
|   /* This thread is executing trx. No other thread can modify our table locks
 | |
|   (only record locks might be created, in an implicit-to-explicit conversion).
 | |
|   Hence, no mutex is needed here. */
 | |
|   if (n)
 | |
|     for (const lock_t *lock : trx.lock.table_locks)
 | |
|       if (lock && lock->type_mode == (LOCK_X | LOCK_TABLE))
 | |
|         return true;
 | |
| 
 | |
|   return false;
 | |
| }
 | |
| 
 | |
| /***********************************************************************//**
 | |
| Writes information to an undo log about an insert, update, or a delete marking
 | |
| of a clustered index record. This information is used in a rollback of the
 | |
| transaction and in consistent reads that must look to the history of this
 | |
| transaction.
 | |
| @return DB_SUCCESS or error code */
 | |
| dberr_t
 | |
| trx_undo_report_row_operation(
 | |
| /*==========================*/
 | |
| 	que_thr_t*	thr,		/*!< in: query thread */
 | |
| 	dict_index_t*	index,		/*!< in: clustered index */
 | |
| 	const dtuple_t*	clust_entry,	/*!< in: in the case of an insert,
 | |
| 					index entry to insert into the
 | |
| 					clustered index; in updates,
 | |
| 					may contain a clustered index
 | |
| 					record tuple that also contains
 | |
| 					virtual columns of the table;
 | |
| 					otherwise, NULL */
 | |
| 	const upd_t*	update,		/*!< in: in the case of an update,
 | |
| 					the update vector, otherwise NULL */
 | |
| 	ulint		cmpl_info,	/*!< in: compiler info on secondary
 | |
| 					index updates */
 | |
| 	const rec_t*	rec,		/*!< in: case of an update or delete
 | |
| 					marking, the record in the clustered
 | |
| 					index; NULL if insert */
 | |
| 	const rec_offs*	offsets,	/*!< in: rec_get_offsets(rec) */
 | |
| 	roll_ptr_t*	roll_ptr)	/*!< out: DB_ROLL_PTR to the
 | |
| 					undo log record */
 | |
| {
 | |
| 	trx_t*		trx;
 | |
| #ifdef UNIV_DEBUG
 | |
| 	int		loop_count	= 0;
 | |
| #endif /* UNIV_DEBUG */
 | |
| 
 | |
| 	ut_a(dict_index_is_clust(index));
 | |
| 	ut_ad(!update || rec);
 | |
| 	ut_ad(!rec || rec_offs_validate(rec, index, offsets));
 | |
| 	ut_ad(!srv_read_only_mode);
 | |
| 
 | |
| 	trx = thr_get_trx(thr);
 | |
| 	/* This function must not be invoked during rollback
 | |
| 	(of a TRX_STATE_PREPARE transaction or otherwise). */
 | |
| 	ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE));
 | |
| 	ut_ad(!trx->in_rollback);
 | |
| 
 | |
| 	/* We must determine if this is the first time when this
 | |
| 	transaction modifies this table. */
 | |
| 	auto m = trx->mod_tables.emplace(index->table, trx->undo_no);
 | |
| 	ut_ad(m.first->second.valid(trx->undo_no));
 | |
| 
 | |
| 	if (m.second && index->table->is_native_online_ddl()) {
 | |
| 		trx->apply_online_log= true;
 | |
| 	}
 | |
| 
 | |
| 	bool bulk = !rec;
 | |
| 
 | |
| 	if (!bulk) {
 | |
| 		/* An UPDATE or DELETE must not be covered by an
 | |
| 		earlier start_bulk_insert(). */
 | |
| 		ut_ad(!m.first->second.is_bulk_insert());
 | |
| 	} else if (m.first->second.is_bulk_insert()) {
 | |
| 		/* Above, the emplace() tried to insert an object with
 | |
| 		!is_bulk_insert(). Only an explicit start_bulk_insert()
 | |
| 		(below) can set the flag. */
 | |
| 		ut_ad(!m.second);
 | |
| 		/* We already wrote a TRX_UNDO_EMPTY record. */
 | |
| 		ut_ad(thr->run_node);
 | |
| 		ut_ad(que_node_get_type(thr->run_node) == QUE_NODE_INSERT);
 | |
| 		ut_ad(trx->bulk_insert);
 | |
| 		return DB_SUCCESS;
 | |
| 	} else if (!m.second || !trx->bulk_insert) {
 | |
| 		bulk = false;
 | |
| 	} else if (index->table->is_temporary()) {
 | |
| 	} else if (trx_has_lock_x(*trx, *index->table)
 | |
| 		   && index->table->bulk_trx_id == trx->id) {
 | |
| 		m.first->second.start_bulk_insert(
 | |
| 			index->table,
 | |
| 			thd_sql_command(trx->mysql_thd) != SQLCOM_LOAD);
 | |
| 
 | |
| 		if (dberr_t err = m.first->second.bulk_insert_buffered(
 | |
| 			    *clust_entry, *index, trx)) {
 | |
| 			return err;
 | |
| 		}
 | |
| 	} else {
 | |
| 		bulk = false;
 | |
| 	}
 | |
| 
 | |
| 	mtr_t		mtr;
 | |
| 	dberr_t		err;
 | |
| 	mtr.start();
 | |
| 	trx_undo_t**	pundo;
 | |
| 	trx_rseg_t*	rseg;
 | |
| 	const bool	is_temp	= index->table->is_temporary();
 | |
| 	buf_block_t*	undo_block;
 | |
| 
 | |
| 	if (is_temp) {
 | |
| 		mtr.set_log_mode(MTR_LOG_NO_REDO);
 | |
| 		rseg = trx->get_temp_rseg();
 | |
| 		pundo = &trx->rsegs.m_noredo.undo;
 | |
| 		undo_block = trx_undo_assign_low<true>(trx, rseg, pundo,
 | |
| 						       &mtr, &err);
 | |
| 	} else {
 | |
| 		ut_ad(!trx->read_only);
 | |
| 		ut_ad(trx->id);
 | |
| 		pundo = &trx->rsegs.m_redo.undo;
 | |
| 		rseg = trx->rsegs.m_redo.rseg;
 | |
| 		undo_block = trx_undo_assign_low<false>(trx, rseg, pundo,
 | |
| 							&mtr, &err);
 | |
| 	}
 | |
| 
 | |
| 	trx_undo_t*	undo	= *pundo;
 | |
| 	ut_ad((err == DB_SUCCESS) == (undo_block != NULL));
 | |
| 	if (UNIV_UNLIKELY(undo_block == NULL)) {
 | |
| err_exit:
 | |
| 		mtr.commit();
 | |
| 		return err;
 | |
| 	}
 | |
| 
 | |
| 	ut_ad(undo != NULL);
 | |
| 
 | |
| 	do {
 | |
| 		uint16_t offset = !rec
 | |
| 			? trx_undo_page_report_insert(
 | |
| 				undo_block, trx, index, clust_entry, &mtr,
 | |
| 				bulk)
 | |
| 			: trx_undo_page_report_modify(
 | |
| 				undo_block, trx, index, rec, offsets, update,
 | |
| 				cmpl_info, clust_entry, &mtr);
 | |
| 
 | |
| 		if (UNIV_UNLIKELY(offset == 0)) {
 | |
| 			const uint16_t first_free = mach_read_from_2(
 | |
| 				TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE
 | |
| 				+ undo_block->page.frame);
 | |
| 			memset(undo_block->page.frame + first_free, 0,
 | |
| 			       (srv_page_size - FIL_PAGE_DATA_END)
 | |
| 			       - first_free);
 | |
| 
 | |
| 			if (first_free
 | |
| 			    == TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_HDR_SIZE) {
 | |
| 				/* The record did not fit on an empty
 | |
| 				undo page. Discard the freshly allocated
 | |
| 				page and return an error. */
 | |
| 
 | |
| 				/* When we remove a page from an undo
 | |
| 				log, this is analogous to a
 | |
| 				pessimistic insert in a B-tree, and we
 | |
| 				must reserve the counterpart of the
 | |
| 				tree latch, which is the rseg
 | |
| 				mutex. We must commit the mini-transaction
 | |
| 				first, because it may be holding lower-level
 | |
| 				latches, such as SYNC_FSP_PAGE. */
 | |
| 
 | |
| 				mtr.commit();
 | |
| 				mtr.start();
 | |
| 				if (is_temp) {
 | |
| 					mtr.set_log_mode(MTR_LOG_NO_REDO);
 | |
| 				}
 | |
| 
 | |
| 				rseg->latch.wr_lock(SRW_LOCK_CALL);
 | |
| 				err = trx_undo_free_last_page(undo, &mtr);
 | |
| 				rseg->latch.wr_unlock();
 | |
| 
 | |
| 				if (m.second) {
 | |
| 					/* We are not going to modify
 | |
| 					this table after all. */
 | |
| 					trx->mod_tables.erase(m.first);
 | |
| 				}
 | |
| 
 | |
| 				if (err == DB_SUCCESS) {
 | |
| 					err = DB_UNDO_RECORD_TOO_BIG;
 | |
| 				}
 | |
| 				goto err_exit;
 | |
| 			} else {
 | |
| 				/* Write log for clearing the unused
 | |
| 				tail of the undo page. It might
 | |
| 				contain some garbage from a previously
 | |
| 				written record, and mtr_t::write()
 | |
| 				will optimize away writes of unchanged
 | |
| 				bytes. Failure to write this caused a
 | |
| 				recovery failure when we avoided
 | |
| 				reading the undo log page from the
 | |
| 				data file and initialized it based on
 | |
| 				redo log records (which included the
 | |
| 				write of the previous garbage). */
 | |
| 				mtr.memset(*undo_block, first_free,
 | |
| 					   srv_page_size - first_free
 | |
| 					   - FIL_PAGE_DATA_END, 0);
 | |
| 			}
 | |
| 
 | |
| 			mtr.commit();
 | |
| 		} else {
 | |
| 			/* Success */
 | |
| 			undo->top_page_no = undo_block->page.id().page_no();
 | |
| 			mtr.commit();
 | |
| 			undo->top_offset  = offset;
 | |
| 			undo->top_undo_no = trx->undo_no++;
 | |
| 			undo->guess_block = undo_block;
 | |
| 			ut_ad(!undo->empty());
 | |
| 
 | |
| 			if (!is_temp) {
 | |
| 				trx_mod_table_time_t& time = m.first->second;
 | |
| 				ut_ad(time.valid(undo->top_undo_no));
 | |
| 
 | |
| 				if (!time.is_versioned()
 | |
| 				    && index->table->versioned_by_id()
 | |
| 				    && (!rec /* INSERT */
 | |
| 					|| (update
 | |
| 					    && update->affects_versioned()))) {
 | |
| 					time.set_versioned(undo->top_undo_no);
 | |
| 				}
 | |
| 			}
 | |
| 
 | |
| 			if (!bulk) {
 | |
| 				*roll_ptr = trx_undo_build_roll_ptr(
 | |
| 					!rec, trx_sys.rseg_id(rseg, !is_temp),
 | |
| 					undo->top_page_no, offset);
 | |
| 			}
 | |
| 
 | |
| 			return(DB_SUCCESS);
 | |
| 		}
 | |
| 
 | |
| 		ut_ad(undo_block->page.id().page_no() == undo->last_page_no);
 | |
| 
 | |
| 		/* We have to extend the undo log by one page */
 | |
| 
 | |
| 		ut_ad(++loop_count < 2);
 | |
| 		mtr.start();
 | |
| 
 | |
| 		if (is_temp) {
 | |
| 			mtr.set_log_mode(MTR_LOG_NO_REDO);
 | |
| 		}
 | |
| 
 | |
| 		undo_block = trx_undo_add_page(undo, &mtr, &err);
 | |
| 
 | |
| 		DBUG_EXECUTE_IF("ib_err_ins_undo_page_add_failure",
 | |
| 				undo_block = NULL;);
 | |
| 	} while (UNIV_LIKELY(undo_block != NULL));
 | |
| 
 | |
| 	if (err != DB_OUT_OF_FILE_SPACE) {
 | |
| 		goto err_exit;
 | |
| 	}
 | |
| 
 | |
| 	ib_errf(trx->mysql_thd, IB_LOG_LEVEL_ERROR,
 | |
| 		DB_OUT_OF_FILE_SPACE,
 | |
| 		//ER_INNODB_UNDO_LOG_FULL,
 | |
| 		"No more space left over in %s tablespace for allocating UNDO"
 | |
| 		" log pages. Please add new data file to the tablespace or"
 | |
| 		" check if filesystem is full or enable auto-extension for"
 | |
| 		" the tablespace",
 | |
| 		undo->rseg->space == fil_system.sys_space
 | |
| 		? "system" : is_temp ? "temporary" : "undo");
 | |
| 
 | |
| 	goto err_exit;
 | |
| }
 | |
| 
 | |
| /*============== BUILDING PREVIOUS VERSION OF A RECORD ===============*/
 | |
| 
 | |
| static dberr_t trx_undo_prev_version(const rec_t *rec, dict_index_t *index,
 | |
|                                      rec_offs *offsets, mem_heap_t *heap,
 | |
|                                      rec_t **old_vers,
 | |
|                                      const purge_sys_t::view_guard &check,
 | |
|                                      ulint v_status,
 | |
|                                      mem_heap_t *v_heap, dtuple_t **vrow,
 | |
|                                      const trx_undo_rec_t *undo_rec);
 | |
| 
 | |
| inline const buf_block_t *
 | |
| purge_sys_t::view_guard::get(const page_id_t id, mtr_t *mtr)
 | |
| {
 | |
|   buf_block_t *block;
 | |
|   ut_ad(mtr->is_active());
 | |
|   if (!latch)
 | |
|   {
 | |
|     decltype(purge_sys.pages)::const_iterator i= purge_sys.pages.find(id);
 | |
|     if (i != purge_sys.pages.end())
 | |
|     {
 | |
|       block= i->second;
 | |
|       ut_ad(block);
 | |
|       return block;
 | |
|     }
 | |
|   }
 | |
|   block= buf_pool.page_fix(id);
 | |
|   if (block)
 | |
|   {
 | |
|     mtr->memo_push(block, MTR_MEMO_BUF_FIX);
 | |
|     if (latch)
 | |
|       /* In MVCC operations (outside purge tasks), we will refresh the
 | |
|       buf_pool.LRU position. In purge, we expect the page to be freed
 | |
|       soon, at the end of the current batch. */
 | |
|       buf_page_make_young_if_needed(&block->page);
 | |
|   }
 | |
|   return block;
 | |
| }
 | |
| 
 | |
| /** Build a previous version of a clustered index record. The caller
 | |
| must hold a latch on the index page of the clustered index record.
 | |
| @param rec       version of a clustered index record
 | |
| @param index     clustered index
 | |
| @param offsets   rec_get_offsets(rec, index)
 | |
| @param heap      memory heap from which the memory needed is allocated
 | |
| @param old_vers  previous version, or NULL if rec is the first inserted
 | |
|                  version, or if history data has been deleted (an error),
 | |
|                  or if the purge could have removed the version though
 | |
|                  it has not yet done so
 | |
| @param mtr       mini-transaction
 | |
| @param v_status  TRX_UNDO_PREV_IN_PURGE, ...
 | |
| @param v_heap    memory heap used to create vrow dtuple if it is not yet
 | |
|                  created. This heap diffs from "heap" above in that it could be
 | |
|                  prebuilt->old_vers_heap for selection
 | |
| @param vrow      virtual column info, if any
 | |
| @return error code
 | |
| @retval DB_SUCCESS if previous version was successfully built,
 | |
| or if it was an insert or the undo record refers to the table before rebuild
 | |
| @retval DB_MISSING_HISTORY if the history is missing */
 | |
| TRANSACTIONAL_TARGET
 | |
| dberr_t trx_undo_prev_version_build(const rec_t *rec, dict_index_t *index,
 | |
|                                     rec_offs *offsets, mem_heap_t *heap,
 | |
|                                     rec_t **old_vers, mtr_t *mtr,
 | |
|                                     ulint v_status,
 | |
|                                     mem_heap_t *v_heap, dtuple_t **vrow)
 | |
| {
 | |
|   ut_ad(!index->table->is_temporary());
 | |
|   ut_ad(rec_offs_validate(rec, index, offsets));
 | |
| 
 | |
|   const roll_ptr_t roll_ptr= row_get_rec_roll_ptr(rec, index, offsets);
 | |
|   *old_vers= nullptr;
 | |
| 
 | |
|   if (trx_undo_roll_ptr_is_insert(roll_ptr))
 | |
|     /* The record rec is the first inserted version */
 | |
|     return DB_SUCCESS;
 | |
| 
 | |
|   ut_ad(roll_ptr < 1ULL << 55);
 | |
|   ut_ad(uint16_t(roll_ptr) >= TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_HDR_SIZE);
 | |
|   ut_ad(uint32_t(roll_ptr >> 16) >= FSP_FIRST_INODE_PAGE_NO);
 | |
| 
 | |
|   const trx_id_t rec_trx_id= row_get_rec_trx_id(rec, index, offsets);
 | |
| 
 | |
|   ut_ad(!index->table->skip_alter_undo);
 | |
| 
 | |
|   mariadb_increment_undo_records_read();
 | |
|   const auto savepoint= mtr->get_savepoint();
 | |
|   dberr_t err= DB_MISSING_HISTORY;
 | |
|   purge_sys_t::view_guard check{v_status == TRX_UNDO_CHECK_PURGE_PAGES
 | |
|                                 ? purge_sys_t::view_guard::PURGE
 | |
|                                 : v_status == TRX_UNDO_CHECK_PURGEABILITY
 | |
|                                 ? purge_sys_t::view_guard::VIEW
 | |
|                                 : purge_sys_t::view_guard::END_VIEW};
 | |
|   if (!check.view().changes_visible(rec_trx_id))
 | |
|   {
 | |
|     trx_undo_rec_t *undo_rec= nullptr;
 | |
|     static_assert(ROLL_PTR_RSEG_ID_POS == 48, "");
 | |
|     static_assert(ROLL_PTR_PAGE_POS == 16, "");
 | |
|     if (const buf_block_t *undo_page=
 | |
|         check.get(page_id_t{trx_sys.rseg_array[(roll_ptr >> 48) & 0x7f].
 | |
|                             space->id,
 | |
|                             uint32_t(roll_ptr >> 16)}, mtr))
 | |
|     {
 | |
|       static_assert(ROLL_PTR_BYTE_POS == 0, "");
 | |
|       const uint16_t offset{uint16_t(roll_ptr)};
 | |
|       undo_rec= undo_page->page.frame + offset;
 | |
|       const size_t end= mach_read_from_2(undo_rec);
 | |
|       if (UNIV_UNLIKELY(end > offset &&
 | |
|                         end < srv_page_size - FIL_PAGE_DATA_END))
 | |
|         err= trx_undo_prev_version(rec, index, offsets, heap,
 | |
|                                    old_vers, check, v_status, v_heap, vrow,
 | |
|                                    undo_rec);
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   mtr->rollback_to_savepoint(savepoint);
 | |
|   return err;
 | |
| }
 | |
| 
 | |
| static dberr_t trx_undo_prev_version(const rec_t *rec, dict_index_t *index,
 | |
|                                      rec_offs *offsets, mem_heap_t *heap,
 | |
|                                      rec_t **old_vers,
 | |
|                                      const purge_sys_t::view_guard &check,
 | |
|                                      ulint v_status,
 | |
|                                      mem_heap_t *v_heap, dtuple_t **vrow,
 | |
|                                      const trx_undo_rec_t *undo_rec)
 | |
| {
 | |
| 	byte type, cmpl_info;
 | |
| 	bool dummy_extern;
 | |
| 	undo_no_t undo_no;
 | |
| 	table_id_t table_id;
 | |
| 	const byte *ptr =
 | |
| 		trx_undo_rec_get_pars(undo_rec, &type, &cmpl_info,
 | |
| 				      &dummy_extern, &undo_no, &table_id);
 | |
| 
 | |
| 	if (table_id != index->table->id) {
 | |
| 		/* The table should have been rebuilt, but purge has
 | |
| 		not yet removed the undo log records for the
 | |
| 		now-dropped old table (table_id). */
 | |
| 		return DB_SUCCESS;
 | |
| 	}
 | |
| 
 | |
| 	trx_id_t trx_id;
 | |
| 	roll_ptr_t roll_ptr;
 | |
| 	byte info_bits;
 | |
| 
 | |
| 	ptr = trx_undo_update_rec_get_sys_cols(ptr, &trx_id, &roll_ptr,
 | |
| 					       &info_bits);
 | |
| 
 | |
| 	/* (a) If a clustered index record version is such that the
 | |
| 	trx id stamp in it is bigger than purge_sys.view, then the
 | |
| 	BLOBs in that version are known to exist (the purge has not
 | |
| 	progressed that far);
 | |
| 
 | |
| 	(b) if the version is the first version such that trx id in it
 | |
| 	is less than purge_sys.view, and it is not delete-marked,
 | |
| 	then the BLOBs in that version are known to exist (the purge
 | |
| 	cannot have purged the BLOBs referenced by that version
 | |
| 	yet).
 | |
| 
 | |
| 	This function does not fetch any BLOBs.  The callers might, by
 | |
| 	possibly invoking row_ext_create() via row_build().  However,
 | |
| 	they should have all needed information in the *old_vers
 | |
| 	returned by this function.  This is because *old_vers is based
 | |
| 	on the transaction undo log records.  The function
 | |
| 	trx_undo_page_fetch_ext() will write BLOB prefixes to the
 | |
| 	transaction undo log that are at least as long as the longest
 | |
| 	possible column prefix in a secondary index.  Thus, secondary
 | |
| 	index entries for *old_vers can be constructed without
 | |
| 	dereferencing any BLOB pointers. */
 | |
| 
 | |
| 	ptr = trx_undo_rec_skip_row_ref(ptr, index);
 | |
| 
 | |
| 	upd_t* update;
 | |
| 	ptr = trx_undo_update_rec_get_update(ptr, index, type, trx_id,
 | |
| 					     roll_ptr, info_bits,
 | |
| 					     heap, &update);
 | |
| 	ut_a(ptr);
 | |
| 	byte* buf;
 | |
| 
 | |
| 	if (row_upd_changes_field_size_or_external(index, offsets, update)) {
 | |
| 		/* We should confirm the existence of disowned external data,
 | |
| 		if the previous version record is delete marked. If the trx_id
 | |
| 		of the previous record is seen by purge view, we should treat
 | |
| 		it as missing history, because the disowned external data
 | |
| 		might be purged already.
 | |
| 
 | |
| 		The inherited external data (BLOBs) can be freed (purged)
 | |
| 		after trx_id was committed, provided that no view was started
 | |
| 		before trx_id. If the purge view can see the committed
 | |
| 		delete-marked record by trx_id, no transactions need to access
 | |
| 		the BLOB. */
 | |
| 
 | |
| 		if (update->info_bits & REC_INFO_DELETED_FLAG
 | |
| 		    && check.view().changes_visible(trx_id)) {
 | |
| 			return DB_SUCCESS;
 | |
| 		}
 | |
| 
 | |
| 		/* We have to set the appropriate extern storage bits in the
 | |
| 		old version of the record: the extern bits in rec for those
 | |
| 		fields that update does NOT update, as well as the bits for
 | |
| 		those fields that update updates to become externally stored
 | |
| 		fields. Store the info: */
 | |
| 
 | |
| 		dtuple_t* entry = row_rec_to_index_entry(rec, index, offsets,
 | |
| 							 heap);
 | |
| 		/* The page containing the clustered index record
 | |
| 		corresponding to entry is latched.  Thus the
 | |
| 		following call is safe. */
 | |
| 		if (!row_upd_index_replace_new_col_vals(entry, *index, update,
 | |
| 							heap)) {
 | |
| 			return (v_status & TRX_UNDO_PREV_IN_PURGE)
 | |
| 				? DB_MISSING_HISTORY : DB_CORRUPTION;
 | |
| 		}
 | |
| 
 | |
| 		/* Get number of externally stored columns in updated record */
 | |
| 		const ulint n_ext = index->is_primary()
 | |
| 			? dtuple_get_n_ext(entry) : 0;
 | |
| 
 | |
| 		buf = static_cast<byte*>(mem_heap_alloc(
 | |
| 			heap, rec_get_converted_size(index, entry, n_ext)));
 | |
| 
 | |
| 		*old_vers = rec_convert_dtuple_to_rec(buf, index,
 | |
| 						      entry, n_ext);
 | |
| 	} else {
 | |
| 		buf = static_cast<byte*>(mem_heap_alloc(
 | |
| 			heap, rec_offs_size(offsets)));
 | |
| 
 | |
| 		*old_vers = rec_copy(buf, rec, offsets);
 | |
| 		rec_offs_make_valid(*old_vers, index, true, offsets);
 | |
| 		rec_set_bit_field_1(*old_vers, update->info_bits,
 | |
| 				    rec_offs_comp(offsets)
 | |
| 				    ? REC_NEW_INFO_BITS : REC_OLD_INFO_BITS,
 | |
| 				    REC_INFO_BITS_MASK, REC_INFO_BITS_SHIFT);
 | |
| 		for (ulint i = 0; i < update->n_fields; i++) {
 | |
| 			const upd_field_t* uf = upd_get_nth_field(update, i);
 | |
| 			if (upd_fld_is_virtual_col(uf)) {
 | |
| 				/* There are no virtual columns in
 | |
| 				a clustered index record. */
 | |
| 				continue;
 | |
| 			}
 | |
| 			const ulint n = uf->field_no;
 | |
| 			ut_ad(!dfield_is_ext(&uf->new_val)
 | |
| 			      == !rec_offs_nth_extern(offsets, n));
 | |
| 			ut_ad(!rec_offs_nth_default(offsets, n));
 | |
| 
 | |
| 			if (UNIV_UNLIKELY(dfield_is_null(&uf->new_val))) {
 | |
| 				if (rec_offs_nth_sql_null(offsets, n)) {
 | |
| 					ut_ad(index->table->is_instant());
 | |
| 					ut_ad(n >= index->n_core_fields);
 | |
| 					continue;
 | |
| 				}
 | |
| 				ut_ad(!index->table->not_redundant());
 | |
| 				ulint l = rec_get_1byte_offs_flag(*old_vers)
 | |
| 					? (n + 1) : (n + 1) * 2;
 | |
| 				byte* b = *old_vers - REC_N_OLD_EXTRA_BYTES
 | |
| 					- l;
 | |
| 				*b= byte(*b | REC_1BYTE_SQL_NULL_MASK);
 | |
| 				compile_time_assert(REC_1BYTE_SQL_NULL_MASK << 8
 | |
| 						    == REC_2BYTE_SQL_NULL_MASK);
 | |
| 				continue;
 | |
| 			}
 | |
| 
 | |
| 			ulint len;
 | |
| 			memcpy(rec_get_nth_field(*old_vers, offsets, n, &len),
 | |
| 			       uf->new_val.data, uf->new_val.len);
 | |
| 			if (UNIV_UNLIKELY(len != uf->new_val.len)) {
 | |
| 				ut_ad(len == UNIV_SQL_NULL);
 | |
| 				ut_ad(!rec_offs_comp(offsets));
 | |
| 				ut_ad(uf->new_val.len
 | |
| 				      == rec_get_nth_field_size(rec, n));
 | |
| 				ulint l = rec_get_1byte_offs_flag(*old_vers)
 | |
| 					? (n + 1) : (n + 1) * 2;
 | |
| 				*(*old_vers - REC_N_OLD_EXTRA_BYTES - l)
 | |
| 					&= byte(~REC_1BYTE_SQL_NULL_MASK);
 | |
| 			}
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	/* Set the old value (which is the after image of an update) in the
 | |
| 	update vector to dtuple vrow */
 | |
| 	if (v_status & TRX_UNDO_GET_OLD_V_VALUE) {
 | |
| 		row_upd_replace_vcol((dtuple_t*)*vrow, index->table, update,
 | |
| 				     false, nullptr, nullptr);
 | |
| 	}
 | |
| 
 | |
| #if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
 | |
| 	rec_offs offsets_dbg[REC_OFFS_NORMAL_SIZE];
 | |
| 	rec_offs_init(offsets_dbg);
 | |
| 	ut_a(!rec_offs_any_null_extern(
 | |
| 		*old_vers, rec_get_offsets(*old_vers, index, offsets_dbg,
 | |
| 					   index->n_core_fields,
 | |
| 					   ULINT_UNDEFINED, &heap)));
 | |
| #endif // defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
 | |
| 
 | |
| 	if (vrow && !(cmpl_info & UPD_NODE_NO_ORD_CHANGE)) {
 | |
| 		if (!(*vrow)) {
 | |
| 			*vrow = dtuple_create_with_vcol(
 | |
| 				v_heap ? v_heap : heap,
 | |
| 				dict_table_get_n_cols(index->table),
 | |
| 				dict_table_get_n_v_cols(index->table));
 | |
| 			dtuple_init_v_fld(*vrow);
 | |
| 		}
 | |
| 
 | |
| 		ut_ad(index->table->n_v_cols);
 | |
| 		trx_undo_read_v_cols(index->table, ptr, *vrow,
 | |
| 				     v_status & TRX_UNDO_PREV_IN_PURGE);
 | |
| 	}
 | |
| 
 | |
| 	return DB_SUCCESS;
 | |
| }
 | |
| 
 | |
| /** Read virtual column value from undo log
 | |
| @param[in]	table		the table
 | |
| @param[in]	ptr		undo log pointer
 | |
| @param[in,out]	row		the dtuple to fill
 | |
| @param[in]	in_purge	whether this is called by purge */
 | |
| void
 | |
| trx_undo_read_v_cols(
 | |
| 	const dict_table_t*	table,
 | |
| 	const byte*		ptr,
 | |
| 	dtuple_t*		row,
 | |
| 	bool			in_purge)
 | |
| {
 | |
| 	const byte*     end_ptr;
 | |
| 	bool		first_v_col = true;
 | |
| 	bool		is_undo_log = true;
 | |
| 
 | |
| 	end_ptr = ptr + mach_read_from_2(ptr);
 | |
| 	ptr += 2;
 | |
| 	while (ptr < end_ptr) {
 | |
| 		dfield_t* dfield;
 | |
| 		const byte* field;
 | |
| 		uint32_t field_no, len, orig_len;
 | |
| 
 | |
| 		field_no = mach_read_next_compressed(
 | |
| 				const_cast<const byte**>(&ptr));
 | |
| 
 | |
| 		const bool is_virtual = (field_no >= REC_MAX_N_FIELDS);
 | |
| 
 | |
| 		if (is_virtual) {
 | |
| 			ptr = trx_undo_read_v_idx(
 | |
| 				table, ptr, first_v_col, &is_undo_log,
 | |
| 				&field_no);
 | |
| 			first_v_col = false;
 | |
| 		}
 | |
| 
 | |
| 		ptr = trx_undo_rec_get_col_val(
 | |
| 			ptr, &field, &len, &orig_len);
 | |
| 
 | |
| 		/* The virtual column is no longer indexed or does not exist.
 | |
| 		This needs to put after trx_undo_rec_get_col_val() so the
 | |
| 		undo ptr advances */
 | |
| 		if (field_no == FIL_NULL) {
 | |
| 			ut_ad(is_virtual);
 | |
| 			continue;
 | |
| 		}
 | |
| 
 | |
| 		if (is_virtual) {
 | |
| 			dict_v_col_t*	vcol = dict_table_get_nth_v_col(
 | |
| 				table, field_no);
 | |
| 
 | |
| 			dfield = dtuple_get_nth_v_field(row, vcol->v_pos);
 | |
| 
 | |
| 			if (!in_purge
 | |
| 			    || dfield_get_type(dfield)->mtype == DATA_MISSING) {
 | |
| 				dict_col_copy_type(
 | |
| 					&vcol->m_col,
 | |
| 					dfield_get_type(dfield));
 | |
| 				dfield_set_data(dfield, field, len);
 | |
| 			}
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	ut_ad(ptr == end_ptr);
 | |
| }
 | 
