mirror of
				https://github.com/MariaDB/server.git
				synced 2025-10-25 08:58:14 +02:00 
			
		
		
		
	 4369a382a1
			
		
	
	
	4369a382a1
	
	
	
		
			
			mtr_t::trx: New public const data member. If it is nullptr, per-connection statistics will not be updated. The transaction is not necessarily in active state. We may merely use it as an "anchor" for buffering updates of buf_pool.stat.n_page_gets in trx_t::pages_accessed. As part of this, we try to create mtr_t less often, reusing one object in multiple places. Some read operations will invoke mtr_t::rollback_to_savepoint() to release their own page latches within a larger mini-transactions. Reviewed by: Vladislav Lesin Tested by: Saahil Alam
		
			
				
	
	
		
			1387 lines
		
	
	
	
		
			41 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			1387 lines
		
	
	
	
		
			41 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
| /*****************************************************************************
 | |
| 
 | |
| Copyright (c) 1997, 2017, Oracle and/or its affiliates. All Rights Reserved.
 | |
| Copyright (c) 2017, 2023, MariaDB Corporation.
 | |
| 
 | |
| This program is free software; you can redistribute it and/or modify it under
 | |
| the terms of the GNU General Public License as published by the Free Software
 | |
| Foundation; version 2 of the License.
 | |
| 
 | |
| This program is distributed in the hope that it will be useful, but WITHOUT
 | |
| ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 | |
| FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 | |
| 
 | |
| You should have received a copy of the GNU General Public License along with
 | |
| this program; if not, write to the Free Software Foundation, Inc.,
 | |
| 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
 | |
| 
 | |
| *****************************************************************************/
 | |
| 
 | |
| /**************************************************//**
 | |
| @file row/row0umod.cc
 | |
| Undo modify of a row
 | |
| 
 | |
| Created 2/27/1997 Heikki Tuuri
 | |
| *******************************************************/
 | |
| 
 | |
| #include "row0umod.h"
 | |
| #include "dict0dict.h"
 | |
| #include "dict0stats.h"
 | |
| #include "dict0boot.h"
 | |
| #include "trx0undo.h"
 | |
| #include "trx0roll.h"
 | |
| #include "trx0purge.h"
 | |
| #include "btr0btr.h"
 | |
| #include "mach0data.h"
 | |
| #include "row0undo.h"
 | |
| #include "row0vers.h"
 | |
| #include "trx0trx.h"
 | |
| #include "trx0rec.h"
 | |
| #include "row0row.h"
 | |
| #include "row0upd.h"
 | |
| #include "que0que.h"
 | |
| #include "log0log.h"
 | |
| 
 | |
| /* Considerations on undoing a modify operation.
 | |
| (1) Undoing a delete marking: all index records should be found. Some of
 | |
| them may have delete mark already FALSE, if the delete mark operation was
 | |
| stopped underway, or if the undo operation ended prematurely because of a
 | |
| system crash.
 | |
| (2) Undoing an update of a delete unmarked record: the newer version of
 | |
| an updated secondary index entry should be removed if no prior version
 | |
| of the clustered index record requires its existence. Otherwise, it should
 | |
| be delete marked.
 | |
| (3) Undoing an update of a delete marked record. In this kind of update a
 | |
| delete marked clustered index record was delete unmarked and possibly also
 | |
| some of its fields were changed. Now, it is possible that the delete marked
 | |
| version has become obsolete at the time the undo is started. */
 | |
| 
 | |
| /*************************************************************************
 | |
| IMPORTANT NOTE: Any operation that generates redo MUST check that there
 | |
| is enough space in the redo log before for that operation. This is
 | |
| done by calling log_free_check(). The reason for checking the
 | |
| availability of the redo log space before the start of the operation is
 | |
| that we MUST not hold any synchonization objects when performing the
 | |
| check.
 | |
| If you make a change in this module make sure that no codepath is
 | |
| introduced where a call to log_free_check() is bypassed. */
 | |
| 
 | |
| /***********************************************************//**
 | |
| Undoes a modify in a clustered index record.
 | |
| @return DB_SUCCESS, DB_FAIL, or error code: we may run out of file space */
 | |
| static MY_ATTRIBUTE((nonnull, warn_unused_result))
 | |
| dberr_t
 | |
| row_undo_mod_clust_low(
 | |
| /*===================*/
 | |
| 	undo_node_t*	node,	/*!< in: row undo node */
 | |
| 	rec_offs**	offsets,/*!< out: rec_get_offsets() on the record */
 | |
| 	mem_heap_t**	offsets_heap,
 | |
| 				/*!< in/out: memory heap that can be emptied */
 | |
| 	mem_heap_t*	heap,	/*!< in/out: memory heap */
 | |
| 	byte*		sys,	/*!< out: DB_TRX_ID, DB_ROLL_PTR
 | |
| 				for row_log_table_delete() */
 | |
| 	que_thr_t*	thr,	/*!< in: query thread */
 | |
| 	mtr_t*		mtr,	/*!< in: mtr; must be committed before
 | |
| 				latching any further pages */
 | |
| 	btr_latch_mode	mode)	/*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE */
 | |
| {
 | |
| 	btr_pcur_t*	pcur;
 | |
| 	btr_cur_t*	btr_cur;
 | |
| 	dberr_t		err;
 | |
| 
 | |
| 	pcur = &node->pcur;
 | |
| 	btr_cur = btr_pcur_get_btr_cur(pcur);
 | |
| 
 | |
| 	if (pcur->restore_position(mode, mtr) != btr_pcur_t::SAME_ALL) {
 | |
| 		return DB_CORRUPTION;
 | |
| 	}
 | |
| 
 | |
| 	ut_ad(rec_get_trx_id(btr_cur_get_rec(btr_cur),
 | |
| 			     btr_cur_get_index(btr_cur))
 | |
| 	      == thr_get_trx(thr)->id
 | |
| 	      || btr_cur_get_index(btr_cur)->table->is_temporary());
 | |
| 	ut_ad(node->ref != &trx_undo_metadata
 | |
| 	      || node->update->info_bits == REC_INFO_METADATA_ADD
 | |
| 	      || node->update->info_bits == REC_INFO_METADATA_ALTER);
 | |
| 
 | |
| 	if (mode != BTR_MODIFY_TREE) {
 | |
| 		ut_ad(mode == BTR_MODIFY_LEAF
 | |
| 		      || mode == BTR_MODIFY_LEAF_ALREADY_LATCHED);
 | |
| 
 | |
| 		err = btr_cur_optimistic_update(
 | |
| 			BTR_NO_LOCKING_FLAG | BTR_NO_UNDO_LOG_FLAG
 | |
| 			| BTR_KEEP_SYS_FLAG,
 | |
| 			btr_cur, offsets, offsets_heap,
 | |
| 			node->update, node->cmpl_info,
 | |
| 			thr, thr_get_trx(thr)->id, mtr);
 | |
| 		ut_ad(err != DB_SUCCESS || node->ref != &trx_undo_metadata);
 | |
| 	} else {
 | |
| 		big_rec_t*	dummy_big_rec;
 | |
| 
 | |
| 		err = btr_cur_pessimistic_update(
 | |
| 			BTR_NO_LOCKING_FLAG
 | |
| 			| BTR_NO_UNDO_LOG_FLAG
 | |
| 			| BTR_KEEP_SYS_FLAG,
 | |
| 			btr_cur, offsets, offsets_heap, heap,
 | |
| 			&dummy_big_rec, node->update,
 | |
| 			node->cmpl_info, thr, thr_get_trx(thr)->id, mtr);
 | |
| 
 | |
| 		ut_a(!dummy_big_rec);
 | |
| 
 | |
| 		if (err == DB_SUCCESS
 | |
| 		    && node->ref == &trx_undo_metadata
 | |
| 		    && btr_cur_get_index(btr_cur)->table->instant
 | |
| 		    && node->update->info_bits == REC_INFO_METADATA_ADD) {
 | |
| 			btr_reset_instant(*btr_cur->index(), false, mtr);
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	if (err != DB_SUCCESS) {
 | |
| 		return err;
 | |
| 	}
 | |
| 
 | |
| 	switch (const auto id = btr_cur_get_index(btr_cur)->table->id) {
 | |
| 		unsigned c;
 | |
| 	case DICT_TABLES_ID:
 | |
| 		if (node->trx != trx_roll_crash_recv_trx) {
 | |
| 			break;
 | |
| 		}
 | |
| 		c = DICT_COL__SYS_TABLES__ID;
 | |
| 		goto evict;
 | |
| 	case DICT_INDEXES_ID:
 | |
| 		if (node->trx != trx_roll_crash_recv_trx) {
 | |
| 			break;
 | |
| 		} else if (node->rec_type == TRX_UNDO_DEL_MARK_REC
 | |
| 			   && btr_cur_get_rec(btr_cur)
 | |
| 			   [8 + 8 + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN]
 | |
| 			   == static_cast<byte>(*TEMP_INDEX_PREFIX_STR)) {
 | |
| 			/* We are rolling back the DELETE of metadata
 | |
| 			for a failed ADD INDEX operation. This does
 | |
| 			not affect any cached table definition,
 | |
| 			because we are filtering out such indexes in
 | |
| 			dict_load_indexes(). */
 | |
| 			break;
 | |
| 		}
 | |
| 		/* fall through */
 | |
| 	case DICT_COLUMNS_ID:
 | |
| 		static_assert(!DICT_COL__SYS_INDEXES__TABLE_ID, "");
 | |
| 		static_assert(!DICT_COL__SYS_COLUMNS__TABLE_ID, "");
 | |
| 		c = DICT_COL__SYS_COLUMNS__TABLE_ID;
 | |
| 		/* This is rolling back an UPDATE or DELETE on SYS_COLUMNS.
 | |
| 		If it was part of an instant ALTER TABLE operation, we
 | |
| 		must evict the table definition, so that it can be
 | |
| 		reloaded after the dictionary operation has been
 | |
| 		completed. At this point, any corresponding operation
 | |
| 		to the metadata record will have been rolled back. */
 | |
| 	evict:
 | |
| 		const dfield_t& table_id = *dtuple_get_nth_field(node->row, c);
 | |
| 		ut_ad(dfield_get_len(&table_id) == 8);
 | |
| 		node->trx->evict_table(mach_read_from_8(
 | |
| 					       static_cast<byte*>(
 | |
| 						       table_id.data)),
 | |
| 				       id == DICT_COLUMNS_ID);
 | |
| 	}
 | |
| 
 | |
| 	return DB_SUCCESS;
 | |
| }
 | |
| 
 | |
| /** Get the byte offset of the DB_TRX_ID column
 | |
| @param[in]	rec	clustered index record
 | |
| @param[in]	index	clustered index
 | |
| @return	the byte offset of DB_TRX_ID, from the start of rec */
 | |
| ulint row_trx_id_offset(const rec_t* rec, const dict_index_t* index)
 | |
| {
 | |
| 	ut_ad(index->n_uniq <= MAX_REF_PARTS);
 | |
| 	ulint trx_id_offset = index->trx_id_offset;
 | |
| 	if (!trx_id_offset) {
 | |
| 		/* Reserve enough offsets for the PRIMARY KEY and 2 columns
 | |
| 		so that we can access DB_TRX_ID, DB_ROLL_PTR. */
 | |
| 		rec_offs offsets_[REC_OFFS_HEADER_SIZE + MAX_REF_PARTS + 2];
 | |
| 		rec_offs_init(offsets_);
 | |
| 		mem_heap_t* heap = NULL;
 | |
| 		const ulint trx_id_pos = index->n_uniq ? index->n_uniq : 1;
 | |
| 		rec_offs* offsets = rec_get_offsets(rec, index, offsets_,
 | |
| 						    index->n_core_fields,
 | |
| 						    trx_id_pos + 1, &heap);
 | |
| 		ut_ad(!heap);
 | |
| 		ulint len;
 | |
| 		trx_id_offset = rec_get_nth_field_offs(
 | |
| 			offsets, trx_id_pos, &len);
 | |
| 		ut_ad(len == DATA_TRX_ID_LEN);
 | |
| 	}
 | |
| 
 | |
| 	return trx_id_offset;
 | |
| }
 | |
| 
 | |
| /** Determine if rollback must execute a purge-like operation.
 | |
| @param node   row undo
 | |
| @return	whether the record should be purged */
 | |
| static bool row_undo_mod_must_purge(const undo_node_t &node)
 | |
| {
 | |
|   ut_ad(node.rec_type == TRX_UNDO_UPD_DEL_REC);
 | |
|   ut_ad(!node.table->is_temporary());
 | |
| 
 | |
|   const btr_cur_t &btr_cur= node.pcur.btr_cur;
 | |
|   ut_ad(btr_cur.index()->is_primary());
 | |
|   DEBUG_SYNC_C("rollback_purge_clust");
 | |
| 
 | |
|   if (!purge_sys.is_purgeable(node.new_trx_id))
 | |
|     return false;
 | |
| 
 | |
|   const rec_t *rec= btr_cur_get_rec(&btr_cur);
 | |
|   return trx_read_trx_id(rec + row_trx_id_offset(rec, btr_cur.index())) ==
 | |
|     node.new_trx_id;
 | |
| }
 | |
| 
 | |
| /***********************************************************//**
 | |
| Undoes a modify in a clustered index record. Sets also the node state for the
 | |
| next round of undo.
 | |
| @return DB_SUCCESS or error code: we may run out of file space */
 | |
| static MY_ATTRIBUTE((nonnull, warn_unused_result))
 | |
| dberr_t
 | |
| row_undo_mod_clust(
 | |
| /*===============*/
 | |
| 	undo_node_t*	node,	/*!< in: row undo node */
 | |
| 	que_thr_t*	thr)	/*!< in: query thread */
 | |
| {
 | |
| 	btr_pcur_t*	pcur;
 | |
| 	mtr_t		mtr{node->trx};
 | |
| 	dberr_t		err;
 | |
| 	dict_index_t*	index;
 | |
| 
 | |
| 	ut_ad(thr_get_trx(thr) == node->trx);
 | |
| 	ut_ad(node->trx->in_rollback);
 | |
| 
 | |
| 	log_free_check();
 | |
| 	pcur = &node->pcur;
 | |
| 	index = btr_cur_get_index(btr_pcur_get_btr_cur(pcur));
 | |
| 	ut_ad(index->is_primary());
 | |
| 
 | |
| 	mtr.start();
 | |
| 	if (index->table->is_temporary()) {
 | |
| 		mtr.set_log_mode(MTR_LOG_NO_REDO);
 | |
| 	} else {
 | |
| 		index->set_modified(mtr);
 | |
| 		ut_ad(lock_table_has_locks(index->table));
 | |
| 	}
 | |
| 
 | |
| 	mem_heap_t*	heap		= mem_heap_create(1024);
 | |
| 	mem_heap_t*	offsets_heap	= NULL;
 | |
| 	rec_offs*	offsets		= NULL;
 | |
| 	byte		sys[DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN];
 | |
| 
 | |
| 	/* Try optimistic processing of the record, keeping changes within
 | |
| 	the index page */
 | |
| 
 | |
| 	err = row_undo_mod_clust_low(node, &offsets, &offsets_heap,
 | |
| 				     heap, sys, thr, &mtr, BTR_MODIFY_LEAF);
 | |
| 
 | |
| 	if (err != DB_SUCCESS) {
 | |
| 		btr_pcur_commit_specify_mtr(pcur, &mtr);
 | |
| 
 | |
| 		/* We may have to modify tree structure: do a pessimistic
 | |
| 		descent down the index tree */
 | |
| 
 | |
| 		mtr.start();
 | |
| 		if (index->table->is_temporary()) {
 | |
| 			mtr.set_log_mode(MTR_LOG_NO_REDO);
 | |
| 		} else {
 | |
| 			index->set_modified(mtr);
 | |
| 		}
 | |
| 
 | |
| 		err = row_undo_mod_clust_low(node, &offsets, &offsets_heap,
 | |
| 					     heap, sys, thr, &mtr,
 | |
| 					     BTR_MODIFY_TREE);
 | |
| 		ut_ad(err == DB_SUCCESS || err == DB_OUT_OF_FILE_SPACE);
 | |
| 	}
 | |
| 
 | |
| 	/**
 | |
| 	* when scrubbing, and records gets cleared,
 | |
| 	*   the transaction id is not present afterwards.
 | |
| 	*   this is safe as: since the record is on free-list
 | |
| 	*   it can be reallocated at any time after this mtr-commits
 | |
| 	*   which is just below
 | |
| 	*/
 | |
| 	ut_ad(srv_immediate_scrub_data_uncompressed
 | |
| 	      || row_get_rec_trx_id(btr_pcur_get_rec(pcur), index, offsets)
 | |
| 	      == node->new_trx_id);
 | |
| 
 | |
| 	btr_pcur_commit_specify_mtr(pcur, &mtr);
 | |
| 	DEBUG_SYNC_C("rollback_undo_pk");
 | |
| 
 | |
| 	if (err != DB_SUCCESS) {
 | |
| 		goto func_exit;
 | |
| 	}
 | |
| 
 | |
| 	/* FIXME: Perform the below operations in the above
 | |
| 	mini-transaction when possible. */
 | |
| 
 | |
| 	if (node->rec_type == TRX_UNDO_UPD_DEL_REC) {
 | |
| 		/* In delete-marked records, DB_TRX_ID must
 | |
| 		always refer to an existing update_undo log record. */
 | |
| 		ut_ad(node->new_trx_id);
 | |
| 
 | |
| 		mtr.start();
 | |
| 		if (pcur->restore_position(BTR_MODIFY_LEAF, &mtr) !=
 | |
| 		    btr_pcur_t::SAME_ALL) {
 | |
| 			goto mtr_commit_exit;
 | |
| 		}
 | |
| 
 | |
| 		ut_ad(rec_get_deleted_flag(btr_pcur_get_rec(pcur),
 | |
| 					   dict_table_is_comp(node->table)));
 | |
| 
 | |
| 		if (index->table->is_temporary()) {
 | |
| 			mtr.set_log_mode(MTR_LOG_NO_REDO);
 | |
| 			err = btr_cur_optimistic_delete(&pcur->btr_cur, 0,
 | |
| 							&mtr);
 | |
| 			if (err != DB_FAIL) {
 | |
| 				goto mtr_commit_exit;
 | |
| 			}
 | |
| 			err = DB_SUCCESS;
 | |
| 			btr_pcur_commit_specify_mtr(pcur, &mtr);
 | |
| 		} else {
 | |
| 			index->set_modified(mtr);
 | |
| 			if (!row_undo_mod_must_purge(*node)) {
 | |
| 				goto mtr_commit_exit;
 | |
| 			}
 | |
| 			err = btr_cur_optimistic_delete(&pcur->btr_cur, 0,
 | |
| 							&mtr);
 | |
| 			if (err != DB_FAIL) {
 | |
| 				goto mtr_commit_exit;
 | |
| 			}
 | |
| 			err = DB_SUCCESS;
 | |
| 			btr_pcur_commit_specify_mtr(pcur, &mtr);
 | |
| 		}
 | |
| 
 | |
| 		mtr.start();
 | |
| 		if (pcur->restore_position(BTR_PURGE_TREE, &mtr) !=
 | |
| 		    btr_pcur_t::SAME_ALL) {
 | |
| 			goto mtr_commit_exit;
 | |
| 		}
 | |
| 
 | |
| 		ut_ad(rec_get_deleted_flag(btr_pcur_get_rec(pcur),
 | |
| 					   dict_table_is_comp(node->table)));
 | |
| 
 | |
| 		if (index->table->is_temporary()) {
 | |
| 			mtr.set_log_mode(MTR_LOG_NO_REDO);
 | |
| 		} else {
 | |
| 			if (!row_undo_mod_must_purge(*node)) {
 | |
| 				goto mtr_commit_exit;
 | |
| 			}
 | |
| 			index->set_modified(mtr);
 | |
| 		}
 | |
| 
 | |
| 		/* This operation is analogous to purge, we can free
 | |
| 		also inherited externally stored fields. We can also
 | |
| 		assume that the record was complete (including BLOBs),
 | |
| 		because it had been delete-marked after it had been
 | |
| 		completely inserted. Therefore, we are passing
 | |
| 		rollback=false, just like purge does. */
 | |
| 		btr_cur_pessimistic_delete(&err, FALSE, &pcur->btr_cur, 0,
 | |
| 					   false, &mtr);
 | |
| 		ut_ad(err == DB_SUCCESS || err == DB_OUT_OF_FILE_SPACE);
 | |
| 	} else if (!index->table->is_temporary() && node->new_trx_id) {
 | |
| 		/* We rolled back a record so that it still exists.
 | |
| 		We must reset the DB_TRX_ID if the history is no
 | |
| 		longer accessible by any active read view. */
 | |
| 
 | |
| 		mtr.start();
 | |
| 		if (pcur->restore_position(BTR_MODIFY_LEAF, &mtr)
 | |
| 		    != btr_pcur_t::SAME_ALL
 | |
| 		    || !purge_sys.is_purgeable(node->new_trx_id)) {
 | |
| 			goto mtr_commit_exit;
 | |
| 		}
 | |
| 
 | |
| 		rec_t* rec = btr_pcur_get_rec(pcur);
 | |
| 		ulint trx_id_offset = index->trx_id_offset;
 | |
| 		ulint trx_id_pos = index->n_uniq ? index->n_uniq : 1;
 | |
| 		/* Reserve enough offsets for the PRIMARY KEY and
 | |
| 		2 columns so that we can access DB_TRX_ID, DB_ROLL_PTR. */
 | |
| 		rec_offs offsets_[REC_OFFS_HEADER_SIZE + MAX_REF_PARTS + 2];
 | |
| 		if (trx_id_offset) {
 | |
| #ifdef UNIV_DEBUG
 | |
| 			ut_ad(rec_offs_validate(NULL, index, offsets));
 | |
| 			if (buf_block_get_page_zip(
 | |
| 				    btr_pcur_get_block(&node->pcur))) {
 | |
| 				/* Below, page_zip_write_trx_id_and_roll_ptr()
 | |
| 				needs offsets to access DB_TRX_ID,DB_ROLL_PTR.
 | |
| 				We already computed offsets for possibly
 | |
| 				another record in the clustered index.
 | |
| 				Because the PRIMARY KEY is fixed-length,
 | |
| 				the offsets for the PRIMARY KEY and
 | |
| 				DB_TRX_ID,DB_ROLL_PTR are still valid.
 | |
| 				Silence the rec_offs_validate() assertion. */
 | |
| 				rec_offs_make_valid(rec, index, true, offsets);
 | |
| 			}
 | |
| #endif
 | |
| 		} else if (rec_is_metadata(rec, *index)) {
 | |
| 			ut_ad(!buf_block_get_page_zip(btr_pcur_get_block(
 | |
| 							      pcur)));
 | |
| 			for (unsigned i = index->first_user_field(); i--; ) {
 | |
| 				trx_id_offset += index->fields[i].fixed_len;
 | |
| 			}
 | |
| 		} else {
 | |
| 			ut_ad(index->n_uniq <= MAX_REF_PARTS);
 | |
| 			rec_offs_init(offsets_);
 | |
| 			offsets = rec_get_offsets(rec, index, offsets_,
 | |
| 						  index->n_core_fields,
 | |
| 						  trx_id_pos + 2, &heap);
 | |
| 			ulint len;
 | |
| 			trx_id_offset = rec_get_nth_field_offs(
 | |
| 				offsets, trx_id_pos, &len);
 | |
| 			ut_ad(len == DATA_TRX_ID_LEN);
 | |
| 		}
 | |
| 
 | |
| 		if (trx_read_trx_id(rec + trx_id_offset) == node->new_trx_id) {
 | |
| 			ut_ad(!rec_get_deleted_flag(
 | |
| 				      rec, dict_table_is_comp(node->table))
 | |
| 			      || rec_is_alter_metadata(rec, *index));
 | |
| 			index->set_modified(mtr);
 | |
| 			buf_block_t* block = btr_pcur_get_block(pcur);
 | |
| 			if (UNIV_LIKELY_NULL(block->page.zip.data)) {
 | |
| 				page_zip_write_trx_id_and_roll_ptr(
 | |
| 					block, rec, offsets, trx_id_pos,
 | |
| 					0, 1ULL << ROLL_PTR_INSERT_FLAG_POS,
 | |
| 					&mtr);
 | |
| 			} else {
 | |
| 				size_t offs = rec + trx_id_offset
 | |
| 					- block->page.frame;
 | |
| 				mtr.memset(block, offs, DATA_TRX_ID_LEN, 0);
 | |
| 				offs += DATA_TRX_ID_LEN;
 | |
| 				mtr.write<1,mtr_t::MAYBE_NOP>(*block,
 | |
| 							      block->page.frame
 | |
| 							      + offs, 0x80U);
 | |
| 				mtr.memset(block, offs + 1,
 | |
| 					   DATA_ROLL_PTR_LEN - 1, 0);
 | |
| 			}
 | |
| 		}
 | |
| 	} else {
 | |
| 		goto func_exit;
 | |
| 	}
 | |
| 
 | |
| mtr_commit_exit:
 | |
| 	btr_pcur_commit_specify_mtr(pcur, &mtr);
 | |
| 
 | |
| func_exit:
 | |
| 	if (offsets_heap) {
 | |
| 		mem_heap_free(offsets_heap);
 | |
| 	}
 | |
| 	mem_heap_free(heap);
 | |
| 	return(err);
 | |
| }
 | |
| 
 | |
| bool dtuple_coll_eq(const dtuple_t &tuple1, const dtuple_t &tuple2);
 | |
| 
 | |
| /** Find out if an accessible version of a clustered index record
 | |
| corresponds to a secondary index entry.
 | |
| @param rec    record in a latched clustered index page
 | |
| @param index  secondary index
 | |
| @param ientry secondary index entry
 | |
| @param mtr    mini-transaction
 | |
| @param trx    transaction connected to current_thd
 | |
| @return whether an accessible non-dete-marked version of rec
 | |
| corresponds to ientry */
 | |
| static bool row_undo_mod_sec_is_unsafe(const rec_t *rec, dict_index_t *index,
 | |
|                                        const dtuple_t *ientry, mtr_t *mtr)
 | |
| {
 | |
| 	const rec_t*	version;
 | |
| 	rec_t*		prev_version;
 | |
| 	dict_index_t*	clust_index;
 | |
| 	rec_offs*	clust_offsets;
 | |
| 	mem_heap_t*	heap;
 | |
| 	mem_heap_t*	heap2;
 | |
| 	dtuple_t*	row;
 | |
| 	const dtuple_t*	entry;
 | |
| 	dtuple_t*	vrow = NULL;
 | |
| 	mem_heap_t*	v_heap = NULL;
 | |
| 	dtuple_t*	cur_vrow = NULL;
 | |
| 
 | |
| 	const bool comp = index->table->not_redundant();
 | |
| 	clust_index = dict_table_get_first_index(index->table);
 | |
| 
 | |
| 	ut_ad(!!page_rec_is_comp(rec) == comp);
 | |
| 	heap = mem_heap_create(1024);
 | |
| 	clust_offsets = rec_get_offsets(rec, clust_index, NULL,
 | |
| 					clust_index->n_core_fields,
 | |
| 					ULINT_UNDEFINED, &heap);
 | |
| 
 | |
| 	if (dict_index_has_virtual(index)) {
 | |
| 		v_heap = mem_heap_create(100);
 | |
| 		/* The current cluster index record could be
 | |
| 		deleted, but the previous version of it might not. We will
 | |
| 		need to get the virtual column data from undo record
 | |
| 		associated with current cluster index */
 | |
| 
 | |
| 		cur_vrow = row_vers_build_cur_vrow(
 | |
| 			rec, clust_index, &clust_offsets,
 | |
| 			index, 0, 0, heap, v_heap, mtr);
 | |
| 	}
 | |
| 
 | |
| 	version = rec;
 | |
| 
 | |
| 	for (;;) {
 | |
| 		heap2 = heap;
 | |
| 		heap = mem_heap_create(1024);
 | |
| 		vrow = NULL;
 | |
| 
 | |
| 		trx_undo_prev_version_build(version,
 | |
| 					    clust_index, clust_offsets,
 | |
| 					    heap, &prev_version, mtr,
 | |
| 					    TRX_UNDO_CHECK_PURGEABILITY,
 | |
| 					    nullptr,
 | |
| 					    dict_index_has_virtual(index)
 | |
| 					    ? &vrow : nullptr);
 | |
| 		mem_heap_free(heap2); /* free version and clust_offsets */
 | |
| 
 | |
| 		if (!prev_version) {
 | |
| 			break;
 | |
| 		}
 | |
| 
 | |
| 		clust_offsets = rec_get_offsets(prev_version, clust_index,
 | |
| 						NULL,
 | |
| 						clust_index->n_core_fields,
 | |
| 						ULINT_UNDEFINED, &heap);
 | |
| 
 | |
| 		if (dict_index_has_virtual(index)) {
 | |
| 			if (vrow) {
 | |
| 				if (dtuple_vcol_data_missing(*vrow, *index)) {
 | |
| 					goto nochange_index;
 | |
| 				}
 | |
| 				/* Keep the virtual row info for the next
 | |
| 				version, unless it is changed */
 | |
| 				mem_heap_empty(v_heap);
 | |
| 				cur_vrow = dtuple_copy(vrow, v_heap);
 | |
| 				dtuple_dup_v_fld(cur_vrow, v_heap);
 | |
| 			}
 | |
| 
 | |
| 			if (!cur_vrow) {
 | |
| 				/* Nothing for this index has changed,
 | |
| 				continue */
 | |
| nochange_index:
 | |
| 				version = prev_version;
 | |
| 				continue;
 | |
| 			}
 | |
| 		}
 | |
| 
 | |
| 		if (!rec_get_deleted_flag(prev_version, comp)) {
 | |
| 			row_ext_t*	ext;
 | |
| 
 | |
| 			/* The stack of versions is locked by mtr.
 | |
| 			Thus, it is safe to fetch the prefixes for
 | |
| 			externally stored columns. */
 | |
| 			row = row_build(ROW_COPY_POINTERS, clust_index,
 | |
| 					prev_version, clust_offsets,
 | |
| 					NULL, NULL, NULL, &ext, heap);
 | |
| 
 | |
| 			if (dict_index_has_virtual(index)) {
 | |
| 				ut_ad(cur_vrow);
 | |
| 				ut_ad(row->n_v_fields == cur_vrow->n_v_fields);
 | |
| 				dtuple_copy_v_fields(row, cur_vrow);
 | |
| 			}
 | |
| 
 | |
| 			entry = row_build_index_entry(row, ext, index, heap);
 | |
| 
 | |
| 			/* If entry == NULL, the record contains unset
 | |
| 			BLOB pointers.  This must be a freshly
 | |
| 			inserted record that we can safely ignore.
 | |
| 			For the justification, see the comments after
 | |
| 			the previous row_build_index_entry() call. */
 | |
| 
 | |
| 			/* NOTE that we cannot do the comparison as binary
 | |
| 			fields because maybe the secondary index record has
 | |
| 			already been updated to a different binary value in
 | |
| 			a char field, but the collation identifies the old
 | |
| 			and new value anyway! */
 | |
| 
 | |
| 			if (entry && dtuple_coll_eq(*ientry, *entry)) {
 | |
| 				break;
 | |
| 			}
 | |
| 		}
 | |
| 
 | |
| 		version = prev_version;
 | |
| 	}
 | |
| 
 | |
| 	mem_heap_free(heap);
 | |
| 
 | |
| 	if (v_heap) {
 | |
| 		mem_heap_free(v_heap);
 | |
| 	}
 | |
| 
 | |
| 	return !!prev_version;
 | |
| }
 | |
| 
 | |
| /***********************************************************//**
 | |
| Delete marks or removes a secondary index entry if found.
 | |
| @return DB_SUCCESS, DB_FAIL, or DB_OUT_OF_FILE_SPACE */
 | |
| static MY_ATTRIBUTE((nonnull, warn_unused_result))
 | |
| dberr_t
 | |
| row_undo_mod_del_mark_or_remove_sec_low(
 | |
| /*====================================*/
 | |
| 	undo_node_t*	node,	/*!< in: row undo node */
 | |
| 	que_thr_t*	thr,	/*!< in: query thread */
 | |
| 	dict_index_t*	index,	/*!< in: index */
 | |
| 	dtuple_t*	entry,	/*!< in: index entry */
 | |
| 	btr_latch_mode	mode)	/*!< in: latch mode BTR_MODIFY_LEAF or
 | |
| 				BTR_MODIFY_TREE */
 | |
| {
 | |
| 	btr_pcur_t		pcur;
 | |
| 	btr_cur_t*		btr_cur;
 | |
| 	dberr_t			err	= DB_SUCCESS;
 | |
| 	mtr_t			mtr{thr->graph->trx};
 | |
| 	const bool		modify_leaf = mode == BTR_MODIFY_LEAF;
 | |
| 
 | |
| 	row_mtr_start(&mtr, index);
 | |
| 
 | |
| 	pcur.btr_cur.page_cur.index = index;
 | |
| 	btr_cur = btr_pcur_get_btr_cur(&pcur);
 | |
| 
 | |
| 	if (index->is_spatial()) {
 | |
| 		mode = modify_leaf
 | |
| 			? btr_latch_mode(BTR_MODIFY_LEAF
 | |
| 					 | BTR_RTREE_DELETE_MARK
 | |
| 					 | BTR_RTREE_UNDO_INS)
 | |
| 			: btr_latch_mode(BTR_PURGE_TREE | BTR_RTREE_UNDO_INS);
 | |
| 		if (UNIV_LIKELY(!rtr_search(entry, mode, &pcur, thr, &mtr))) {
 | |
| 			goto found;
 | |
| 		} else {
 | |
| 			goto func_exit;
 | |
| 		}
 | |
| 	} else if (!index->is_committed()) {
 | |
| 		/* The index->online_status may change if the index is
 | |
| 		or was being created online, but not committed yet. It
 | |
| 		is protected by index->lock. */
 | |
| 		if (modify_leaf) {
 | |
| 			mode = BTR_MODIFY_LEAF_ALREADY_LATCHED;
 | |
| 			mtr_s_lock_index(index, &mtr);
 | |
| 		} else {
 | |
| 			ut_ad(mode == BTR_PURGE_TREE);
 | |
| 			mode = BTR_PURGE_TREE_ALREADY_LATCHED;
 | |
| 			mtr_x_lock_index(index, &mtr);
 | |
| 		}
 | |
| 	} else {
 | |
| 		/* For secondary indexes,
 | |
| 		index->online_status==ONLINE_INDEX_COMPLETE if
 | |
| 		index->is_committed(). */
 | |
| 		ut_ad(!dict_index_is_online_ddl(index));
 | |
| 	}
 | |
| 
 | |
| 	if (!row_search_index_entry(entry, mode, &pcur, &mtr)) {
 | |
| 		/* In crash recovery, the secondary index record may
 | |
| 		be missing if the UPDATE did not have time to insert
 | |
| 		the secondary index records before the crash.  When we
 | |
| 		are undoing that UPDATE in crash recovery, the record
 | |
| 		may be missing.
 | |
| 
 | |
| 		In normal processing, if an update ends in a deadlock
 | |
| 		before it has inserted all updated secondary index
 | |
| 		records, then the undo will not find those records. */
 | |
| 		goto func_exit;
 | |
| 	}
 | |
| 
 | |
| found:
 | |
| 	/* We should remove the index record if no prior version of the row,
 | |
| 	which cannot be purged yet, requires its existence. If some requires,
 | |
| 	we should delete mark the record. */
 | |
| 
 | |
| 	ut_a(node->pcur.restore_position(BTR_SEARCH_LEAF, &mtr) ==
 | |
| 	     btr_pcur_t::SAME_ALL);
 | |
| 
 | |
| 	/* For temporary table, we can skip to check older version of
 | |
| 	clustered index entry, because there is no MVCC or purge. */
 | |
| 	if (node->table->is_temporary()
 | |
| 	    || row_undo_mod_sec_is_unsafe(
 | |
| 		       btr_pcur_get_rec(&node->pcur), index, entry, &mtr)) {
 | |
| 		btr_rec_set_deleted<true>(btr_cur_get_block(btr_cur),
 | |
| 					  btr_cur_get_rec(btr_cur), &mtr);
 | |
| 	} else {
 | |
| 		/* Remove the index record */
 | |
| 
 | |
| 		if (dict_index_is_spatial(index)) {
 | |
| 			rec_t*	rec = btr_pcur_get_rec(&pcur);
 | |
| 			if (rec_get_deleted_flag(rec,
 | |
| 						 dict_table_is_comp(index->table))) {
 | |
| 				ib::error() << "Record found in index "
 | |
| 					<< index->name << " is deleted marked"
 | |
| 					" on rollback update.";
 | |
| 				ut_ad(0);
 | |
| 			}
 | |
| 		}
 | |
| 
 | |
| 		if (modify_leaf) {
 | |
| 			err = btr_cur_optimistic_delete(btr_cur, 0, &mtr);
 | |
| 		} else {
 | |
| 			/* Passing rollback=false,
 | |
| 			because we are deleting a secondary index record:
 | |
| 			the distinction only matters when deleting a
 | |
| 			record that contains externally stored columns. */
 | |
| 			ut_ad(!index->is_primary());
 | |
| 			btr_cur_pessimistic_delete(&err, FALSE, btr_cur, 0,
 | |
| 						   false, &mtr);
 | |
| 
 | |
| 			/* The delete operation may fail if we have little
 | |
| 			file space left: TODO: easiest to crash the database
 | |
| 			and restart with more file space */
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	ut_ad(node->pcur.pos_state == BTR_PCUR_IS_POSITIONED);
 | |
| 	node->pcur.pos_state = BTR_PCUR_WAS_POSITIONED;
 | |
| 	node->pcur.latch_mode = BTR_NO_LATCHES;
 | |
| 
 | |
| func_exit:
 | |
| 	btr_pcur_close(&pcur);
 | |
| 	mtr_commit(&mtr);
 | |
| 
 | |
| 	return(err);
 | |
| }
 | |
| 
 | |
| /***********************************************************//**
 | |
| Delete marks or removes a secondary index entry if found.
 | |
| NOTE that if we updated the fields of a delete-marked secondary index record
 | |
| so that alphabetically they stayed the same, e.g., 'abc' -> 'aBc', we cannot
 | |
| return to the original values because we do not know them. But this should
 | |
| not cause problems because in row0sel.cc, in queries we always retrieve the
 | |
| clustered index record or an earlier version of it, if the secondary index
 | |
| record through which we do the search is delete-marked.
 | |
| @return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
 | |
| static MY_ATTRIBUTE((nonnull, warn_unused_result))
 | |
| dberr_t
 | |
| row_undo_mod_del_mark_or_remove_sec(
 | |
| /*================================*/
 | |
| 	undo_node_t*	node,	/*!< in: row undo node */
 | |
| 	que_thr_t*	thr,	/*!< in: query thread */
 | |
| 	dict_index_t*	index,	/*!< in: index */
 | |
| 	dtuple_t*	entry)	/*!< in: index entry */
 | |
| {
 | |
| 	dberr_t	err;
 | |
| 
 | |
| 	err = row_undo_mod_del_mark_or_remove_sec_low(node, thr, index,
 | |
| 						      entry, BTR_MODIFY_LEAF);
 | |
| 	if (err == DB_SUCCESS) {
 | |
| 
 | |
| 		return(err);
 | |
| 	}
 | |
| 
 | |
| 	err = row_undo_mod_del_mark_or_remove_sec_low(node, thr, index,
 | |
| 		entry, BTR_PURGE_TREE);
 | |
| 	return(err);
 | |
| }
 | |
| 
 | |
| /***********************************************************//**
 | |
| Delete unmarks a secondary index entry which must be found. It might not be
 | |
| delete-marked at the moment, but it does not harm to unmark it anyway. We also
 | |
| need to update the fields of the secondary index record if we updated its
 | |
| fields but alphabetically they stayed the same, e.g., 'abc' -> 'aBc'.
 | |
| @retval DB_SUCCESS on success
 | |
| @retval DB_FAIL if BTR_MODIFY_TREE should be tried
 | |
| @retval DB_OUT_OF_FILE_SPACE when running out of tablespace
 | |
| @retval DB_DUPLICATE_KEY if the value was missing
 | |
| 	and an insert would lead to a duplicate exists */
 | |
| static MY_ATTRIBUTE((nonnull, warn_unused_result))
 | |
| dberr_t
 | |
| row_undo_mod_del_unmark_sec_and_undo_update(
 | |
| /*========================================*/
 | |
| 	btr_latch_mode	mode,	/*!< in: search mode: BTR_MODIFY_LEAF or
 | |
| 				BTR_MODIFY_TREE */
 | |
| 	que_thr_t*	thr,	/*!< in: query thread */
 | |
| 	dict_index_t*	index,	/*!< in: index */
 | |
| 	dtuple_t*	entry)	/*!< in: index entry */
 | |
| {
 | |
| 	btr_pcur_t		pcur;
 | |
| 	btr_cur_t*		btr_cur		= btr_pcur_get_btr_cur(&pcur);
 | |
| 	upd_t*			update;
 | |
| 	dberr_t			err		= DB_SUCCESS;
 | |
| 	big_rec_t*		dummy_big_rec;
 | |
| 	trx_t*			trx		= thr_get_trx(thr);
 | |
| 	mtr_t			mtr{trx};
 | |
| 	const ulint		flags
 | |
| 		= BTR_KEEP_SYS_FLAG | BTR_NO_LOCKING_FLAG;
 | |
| 	const auto		orig_mode = mode;
 | |
| 
 | |
| 	pcur.btr_cur.page_cur.index = index;
 | |
| 	ut_ad(trx->id != 0);
 | |
| 
 | |
| 	if (index->is_spatial()) {
 | |
| 		/* FIXME: Currently we do a 2-pass search for the undo
 | |
| 		due to avoid undel-mark a wrong rec in rolling back in
 | |
| 		partial update.  Later, we could log some info in
 | |
| 		secondary index updates to avoid this. */
 | |
| 		static_assert(BTR_MODIFY_TREE == (8 | BTR_MODIFY_LEAF), "");
 | |
| 		ut_ad(!(mode & 8));
 | |
| 		mode = btr_latch_mode(mode | BTR_RTREE_DELETE_MARK);
 | |
| 	}
 | |
| 
 | |
| try_again:
 | |
| 	row_mtr_start(&mtr, index);
 | |
| 
 | |
| 	mem_heap_t* offsets_heap = nullptr;
 | |
| 	rec_offs* offsets = nullptr;
 | |
| 
 | |
| 	if (index->is_spatial()) {
 | |
| 		if (!rtr_search(entry, mode, &pcur, thr, &mtr)) {
 | |
| 			goto found;
 | |
| 		}
 | |
| 
 | |
| 		if (mode != orig_mode && btr_cur->rtr_info->fd_del) {
 | |
| 			mode = orig_mode;
 | |
| 			btr_pcur_close(&pcur);
 | |
| 			mtr.commit();
 | |
| 			goto try_again;
 | |
| 		}
 | |
| 
 | |
| 		goto not_found;
 | |
| 	}
 | |
| 
 | |
| 	if (!row_search_index_entry(entry, mode, &pcur, &mtr)) {
 | |
| not_found:
 | |
| 		if (btr_cur->up_match >= dict_index_get_n_unique(index)
 | |
| 		    || btr_cur->low_match >= dict_index_get_n_unique(index)) {
 | |
| 			ib::warn() << "Record in index " << index->name
 | |
| 				<< " of table " << index->table->name
 | |
| 				<< " was not found on rollback, and"
 | |
| 				" a duplicate exists: "
 | |
| 				<< *entry
 | |
| 				<< " at: " << rec_index_print(
 | |
| 					btr_cur_get_rec(btr_cur), index);
 | |
| 			err = DB_DUPLICATE_KEY;
 | |
| 			goto func_exit;
 | |
| 		}
 | |
| 
 | |
| 		ib::warn() << "Record in index " << index->name
 | |
| 			<< " of table " << index->table->name
 | |
| 			<< " was not found on rollback, trying to insert: "
 | |
| 			<< *entry
 | |
| 			<< " at: " << rec_index_print(
 | |
| 				btr_cur_get_rec(btr_cur), index);
 | |
| 
 | |
| 		/* Insert the missing record that we were trying to
 | |
| 		delete-unmark. */
 | |
| 		big_rec_t*	big_rec;
 | |
| 		rec_t*		insert_rec;
 | |
| 
 | |
| 		err = btr_cur_optimistic_insert(
 | |
| 			flags, btr_cur, &offsets, &offsets_heap,
 | |
| 			entry, &insert_rec, &big_rec,
 | |
| 			0, thr, &mtr);
 | |
| 		ut_ad(!big_rec);
 | |
| 
 | |
| 		if (err == DB_FAIL && mode == BTR_MODIFY_TREE) {
 | |
| 			err = btr_cur_pessimistic_insert(
 | |
| 				flags, btr_cur,
 | |
| 				&offsets, &offsets_heap,
 | |
| 				entry, &insert_rec, &big_rec,
 | |
| 				0, thr, &mtr);
 | |
| 			/* There are no off-page columns in
 | |
| 			secondary indexes. */
 | |
| 			ut_ad(!big_rec);
 | |
| 		}
 | |
| 
 | |
| 		if (err == DB_SUCCESS) {
 | |
| 			page_update_max_trx_id(
 | |
| 				btr_cur_get_block(btr_cur),
 | |
| 				btr_cur_get_page_zip(btr_cur),
 | |
| 				trx->id, &mtr);
 | |
| 		}
 | |
| 
 | |
| 		if (offsets_heap) {
 | |
| 			mem_heap_free(offsets_heap);
 | |
| 		}
 | |
| 	} else {
 | |
| found:
 | |
| 		btr_rec_set_deleted<false>(btr_cur_get_block(btr_cur),
 | |
| 					   btr_cur_get_rec(btr_cur), &mtr);
 | |
| 		mem_heap_t* heap = mem_heap_create(
 | |
| 			sizeof(upd_t)
 | |
| 			+ dtuple_get_n_fields(entry) * sizeof(upd_field_t));
 | |
| 		offsets = rec_get_offsets(
 | |
| 			btr_cur_get_rec(btr_cur),
 | |
| 			index, nullptr, index->n_core_fields, ULINT_UNDEFINED,
 | |
| 			&offsets_heap);
 | |
| 		update = row_upd_build_sec_rec_difference_binary(
 | |
| 			btr_cur_get_rec(btr_cur), index, offsets, entry, heap);
 | |
| 		if (upd_get_n_fields(update) == 0) {
 | |
| 
 | |
| 			/* Do nothing */
 | |
| 
 | |
| 		} else if (mode != BTR_MODIFY_TREE) {
 | |
| 			/* Try an optimistic updating of the record, keeping
 | |
| 			changes within the page */
 | |
| 
 | |
| 			/* TODO: pass offsets, not &offsets */
 | |
| 			err = btr_cur_optimistic_update(
 | |
| 				flags, btr_cur, &offsets, &offsets_heap,
 | |
| 				update, 0, thr, thr_get_trx(thr)->id, &mtr);
 | |
| 			switch (err) {
 | |
| 			case DB_OVERFLOW:
 | |
| 			case DB_UNDERFLOW:
 | |
| 			case DB_ZIP_OVERFLOW:
 | |
| 				err = DB_FAIL;
 | |
| 			default:
 | |
| 				break;
 | |
| 			}
 | |
| 		} else {
 | |
| 			err = btr_cur_pessimistic_update(
 | |
| 				flags, btr_cur, &offsets, &offsets_heap,
 | |
| 				heap, &dummy_big_rec,
 | |
| 				update, 0, thr, thr_get_trx(thr)->id, &mtr);
 | |
| 			ut_a(!dummy_big_rec);
 | |
| 		}
 | |
| 
 | |
| 		mem_heap_free(heap);
 | |
| 		mem_heap_free(offsets_heap);
 | |
| 	}
 | |
| 
 | |
| func_exit:
 | |
| 	btr_pcur_close(&pcur);
 | |
| 	mtr_commit(&mtr);
 | |
| 
 | |
| 	return(err);
 | |
| }
 | |
| 
 | |
| /***********************************************************//**
 | |
| Undoes a modify in secondary indexes when undo record type is UPD_DEL.
 | |
| @return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
 | |
| static MY_ATTRIBUTE((nonnull, warn_unused_result))
 | |
| dberr_t
 | |
| row_undo_mod_upd_del_sec(
 | |
| /*=====================*/
 | |
| 	undo_node_t*	node,	/*!< in: row undo node */
 | |
| 	que_thr_t*	thr)	/*!< in: query thread */
 | |
| {
 | |
| 	mem_heap_t*	heap;
 | |
| 	dberr_t		err	= DB_SUCCESS;
 | |
| 
 | |
| 	ut_ad(node->rec_type == TRX_UNDO_UPD_DEL_REC);
 | |
| 	ut_ad(!node->undo_row);
 | |
| 
 | |
| 	heap = mem_heap_create(1024);
 | |
| 
 | |
| 	do {
 | |
| 		dict_index_t* index = node->index;
 | |
| 
 | |
| 		if (index->type & (DICT_FTS | DICT_CORRUPT)
 | |
| 		    || !index->is_committed()) {
 | |
| 			continue;
 | |
| 		}
 | |
| 
 | |
| 		/* During online index creation,
 | |
| 		HA_ALTER_INPLACE_COPY_NO_LOCK or HA_ALTER_INPLACE_NOCOPY_NO_LOCk
 | |
| 		should guarantee that any active transaction has not modified
 | |
| 		indexed columns such that col->ord_part was 0 at the
 | |
| 		time when the undo log record was written. When we get
 | |
| 		to roll back an undo log entry TRX_UNDO_DEL_MARK_REC,
 | |
| 		it should always cover all affected indexes. */
 | |
| 		dtuple_t* entry = row_build_index_entry(
 | |
| 			node->row, node->ext, index, heap);
 | |
| 
 | |
| 		if (UNIV_UNLIKELY(!entry)) {
 | |
| 			/* The database must have crashed after
 | |
| 			inserting a clustered index record but before
 | |
| 			writing all the externally stored columns of
 | |
| 			that record.  Because secondary index entries
 | |
| 			are inserted after the clustered index record,
 | |
| 			we may assume that the secondary index record
 | |
| 			does not exist.  However, this situation may
 | |
| 			only occur during the rollback of incomplete
 | |
| 			transactions. */
 | |
| 			ut_a(thr_get_trx(thr) == trx_roll_crash_recv_trx);
 | |
| 		} else {
 | |
| 			err = row_undo_mod_del_mark_or_remove_sec(
 | |
| 				node, thr, index, entry);
 | |
| 
 | |
| 			if (UNIV_UNLIKELY(err != DB_SUCCESS)) {
 | |
| 
 | |
| 				break;
 | |
| 			}
 | |
| 		}
 | |
| 
 | |
| 		mem_heap_empty(heap);
 | |
| 	} while ((node->index = dict_table_get_next_index(node->index)));
 | |
| 
 | |
| 	mem_heap_free(heap);
 | |
| 
 | |
| 	return(err);
 | |
| }
 | |
| 
 | |
| /***********************************************************//**
 | |
| Undoes a modify in secondary indexes when undo record type is DEL_MARK.
 | |
| @return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
 | |
| static MY_ATTRIBUTE((nonnull, warn_unused_result))
 | |
| dberr_t
 | |
| row_undo_mod_del_mark_sec(
 | |
| /*======================*/
 | |
| 	undo_node_t*	node,	/*!< in: row undo node */
 | |
| 	que_thr_t*	thr)	/*!< in: query thread */
 | |
| {
 | |
| 	mem_heap_t*	heap;
 | |
| 	dberr_t		err	= DB_SUCCESS;
 | |
| 
 | |
| 	ut_ad(!node->undo_row);
 | |
| 
 | |
| 	heap = mem_heap_create(1024);
 | |
| 
 | |
| 	do {
 | |
| 		dict_index_t* index = node->index;
 | |
| 
 | |
| 		if (index->type & (DICT_FTS | DICT_CORRUPT)
 | |
| 		    || !index->is_committed()) {
 | |
| 			continue;
 | |
| 		}
 | |
| 
 | |
| 		/* During online index creation,
 | |
| 		HA_ALTER_INPLACE_COPY_NO_LOCK or HA_ALTER_INPLACE_NOCOPY_NO_LOCK
 | |
| 		should guarantee that any active transaction has not modified
 | |
| 		indexed columns such that col->ord_part was 0 at the
 | |
| 		time when the undo log record was written. When we get
 | |
| 		to roll back an undo log entry TRX_UNDO_DEL_MARK_REC,
 | |
| 		it should always cover all affected indexes. */
 | |
| 		dtuple_t* entry = row_build_index_entry(
 | |
| 			node->row, node->ext, index, heap);
 | |
| 
 | |
| 		ut_a(entry);
 | |
| 
 | |
| 		err = row_undo_mod_del_unmark_sec_and_undo_update(
 | |
| 			BTR_MODIFY_LEAF, thr, index, entry);
 | |
| 		if (err == DB_FAIL) {
 | |
| 			err = row_undo_mod_del_unmark_sec_and_undo_update(
 | |
| 				BTR_MODIFY_TREE, thr, index, entry);
 | |
| 		}
 | |
| 
 | |
| 		if (err == DB_DUPLICATE_KEY) {
 | |
| 			index->type |= DICT_CORRUPT;
 | |
| 			err = DB_SUCCESS;
 | |
| 			/* Do not return any error to the caller. The
 | |
| 			duplicate will be reported by ALTER TABLE or
 | |
| 			CREATE UNIQUE INDEX. Unfortunately we cannot
 | |
| 			report the duplicate key value to the DDL
 | |
| 			thread, because the altered_table object is
 | |
| 			private to its call stack. */
 | |
| 		} else if (err != DB_SUCCESS) {
 | |
| 			break;
 | |
| 		}
 | |
| 
 | |
| 		mem_heap_empty(heap);
 | |
| 	} while ((node->index = dict_table_get_next_index(node->index)));
 | |
| 
 | |
| 	mem_heap_free(heap);
 | |
| 
 | |
| 	return(err);
 | |
| }
 | |
| 
 | |
| /***********************************************************//**
 | |
| Undoes a modify in secondary indexes when undo record type is UPD_EXIST.
 | |
| @return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
 | |
| static MY_ATTRIBUTE((nonnull, warn_unused_result))
 | |
| dberr_t
 | |
| row_undo_mod_upd_exist_sec(
 | |
| /*=======================*/
 | |
| 	undo_node_t*	node,	/*!< in: row undo node */
 | |
| 	que_thr_t*	thr)	/*!< in: query thread */
 | |
| {
 | |
| 	if (node->cmpl_info & UPD_NODE_NO_ORD_CHANGE) {
 | |
| 		return DB_SUCCESS;
 | |
| 	}
 | |
| 
 | |
| 	mem_heap_t* heap = mem_heap_create(1024);
 | |
| 	dberr_t err = DB_SUCCESS;
 | |
| 
 | |
| 	do {
 | |
| 		dict_index_t* index = node->index;
 | |
| 
 | |
| 		if (index->type & (DICT_FTS | DICT_CORRUPT)
 | |
| 		    || !index->is_committed()) {
 | |
| 			continue;
 | |
| 		}
 | |
| 
 | |
| 		if (!row_upd_changes_ord_field_binary_func(
 | |
| 			index, node->update,
 | |
| #ifdef UNIV_DEBUG
 | |
| 			thr,
 | |
| #endif /* UNIV_DEBUG */
 | |
| 			node->row, node->ext, ROW_BUILD_FOR_UNDO)) {
 | |
| 			continue;
 | |
| 		}
 | |
| 
 | |
| 		/* Build the newest version of the index entry */
 | |
| 		dtuple_t* entry = row_build_index_entry(
 | |
| 			node->row, node->ext, index, heap);
 | |
| 		if (UNIV_UNLIKELY(!entry)) {
 | |
| 			/* InnoDB must have run of space or been killed
 | |
| 			before the updated externally stored columns (BLOBs)
 | |
| 			of the new clustered index entry were written. */
 | |
| 
 | |
| 			/* The table must be in DYNAMIC or COMPRESSED
 | |
| 			format.  REDUNDANT and COMPACT formats
 | |
| 			store a local 768-byte prefix of each
 | |
| 			externally stored column. */
 | |
| 			ut_a(dict_table_has_atomic_blobs(index->table));
 | |
| 		} else {
 | |
| 			/* NOTE that if we updated the fields of a
 | |
| 			delete-marked secondary index record so that
 | |
| 			alphabetically they stayed the same, e.g.,
 | |
| 			'abc' -> 'aBc', we cannot return to the
 | |
| 			original values because we do not know them.
 | |
| 			But this should not cause problems because
 | |
| 			in row0sel.cc, in queries we always retrieve
 | |
| 			the clustered index record or an earlier
 | |
| 			version of it, if the secondary index record
 | |
| 			through which we do the search is
 | |
| 			delete-marked. */
 | |
| 
 | |
| 			err = row_undo_mod_del_mark_or_remove_sec(
 | |
| 				node, thr, index, entry);
 | |
| 			if (err != DB_SUCCESS) {
 | |
| 				break;
 | |
| 			}
 | |
| 		}
 | |
| 
 | |
| 		mem_heap_empty(heap);
 | |
| 		/* We may have to update the delete mark in the
 | |
| 		secondary index record of the previous version of
 | |
| 		the row. We also need to update the fields of
 | |
| 		the secondary index record if we updated its fields
 | |
| 		but alphabetically they stayed the same, e.g.,
 | |
| 		'abc' -> 'aBc'. */
 | |
| 		entry = row_build_index_entry_low(node->undo_row,
 | |
| 						  node->undo_ext,
 | |
| 						  index, heap,
 | |
| 						  ROW_BUILD_FOR_UNDO);
 | |
| 		ut_a(entry);
 | |
| 
 | |
| 		err = row_undo_mod_del_unmark_sec_and_undo_update(
 | |
| 			BTR_MODIFY_LEAF, thr, index, entry);
 | |
| 		if (err == DB_FAIL) {
 | |
| 			err = row_undo_mod_del_unmark_sec_and_undo_update(
 | |
| 				BTR_MODIFY_TREE, thr, index, entry);
 | |
| 		}
 | |
| 
 | |
| 		if (err == DB_DUPLICATE_KEY) {
 | |
| 			index->type |= DICT_CORRUPT;
 | |
| 			err = DB_SUCCESS;
 | |
| 		} else if (err != DB_SUCCESS) {
 | |
| 			break;
 | |
| 		}
 | |
| 
 | |
| 		mem_heap_empty(heap);
 | |
| 	} while ((node->index = dict_table_get_next_index(node->index)));
 | |
| 
 | |
| 	mem_heap_free(heap);
 | |
| 
 | |
| 	return(err);
 | |
| }
 | |
| 
 | |
| /** Parse an update undo record.
 | |
| @param[in,out]	node		row rollback state
 | |
| @param[in]	dict_locked	whether the data dictionary cache is locked */
 | |
| static bool row_undo_mod_parse_undo_rec(undo_node_t* node, bool dict_locked)
 | |
| {
 | |
| 	dict_index_t*	clust_index;
 | |
| 	undo_no_t	undo_no;
 | |
| 	table_id_t	table_id;
 | |
| 	trx_id_t	trx_id;
 | |
| 	roll_ptr_t	roll_ptr;
 | |
| 	byte		info_bits;
 | |
| 	byte		type;
 | |
| 	byte		cmpl_info;
 | |
| 	bool		dummy_extern;
 | |
| 
 | |
| 	ut_ad(node->trx->in_rollback);
 | |
| 	ut_ad(!trx_undo_roll_ptr_is_insert(node->roll_ptr));
 | |
| 
 | |
| 	const byte *ptr = trx_undo_rec_get_pars(
 | |
| 		node->undo_rec, &type, &cmpl_info,
 | |
| 		&dummy_extern, &undo_no, &table_id);
 | |
| 	node->rec_type = type;
 | |
| 
 | |
| 	if (!node->is_temp) {
 | |
| 		node->table = dict_table_open_on_id(table_id, dict_locked,
 | |
| 						    DICT_TABLE_OP_NORMAL);
 | |
| 	} else if (!dict_locked) {
 | |
| 		dict_sys.freeze(SRW_LOCK_CALL);
 | |
| 		node->table = dict_sys.acquire_temporary_table(table_id);
 | |
| 		dict_sys.unfreeze();
 | |
| 	} else {
 | |
| 		node->table = dict_sys.acquire_temporary_table(table_id);
 | |
| 	}
 | |
| 
 | |
| 	if (!node->table) {
 | |
| 		return false;
 | |
| 	}
 | |
| 
 | |
| 	ut_ad(!node->table->skip_alter_undo);
 | |
| 
 | |
| 	if (UNIV_UNLIKELY(!node->table->is_accessible())) {
 | |
| close_table:
 | |
| 		/* Normally, tables should not disappear or become
 | |
| 		unaccessible during ROLLBACK, because they should be
 | |
| 		protected by InnoDB table locks. Corruption could be
 | |
| 		a valid exception.
 | |
| 
 | |
| 		FIXME: When running out of temporary tablespace, it
 | |
| 		would probably be better to just drop all temporary
 | |
| 		tables (and temporary undo log records) of the current
 | |
| 		connection, instead of doing this rollback. */
 | |
| 		node->table->release();
 | |
| 		node->table = NULL;
 | |
| 		return false;
 | |
| 	}
 | |
| 
 | |
| 	clust_index = dict_table_get_first_index(node->table);
 | |
| 
 | |
| 	ptr = trx_undo_update_rec_get_sys_cols(ptr, &trx_id, &roll_ptr,
 | |
| 					       &info_bits);
 | |
| 
 | |
| 	ptr = trx_undo_rec_get_row_ref(ptr, clust_index, &(node->ref),
 | |
| 				       node->heap);
 | |
| 
 | |
| 	ptr = trx_undo_update_rec_get_update(ptr, clust_index, type, trx_id,
 | |
| 				       roll_ptr, info_bits,
 | |
| 				       node->heap, &(node->update));
 | |
| 	node->new_trx_id = trx_id;
 | |
| 	node->cmpl_info = cmpl_info;
 | |
| 	ut_ad(!node->ref->info_bits);
 | |
| 
 | |
| 	if (node->update->info_bits & REC_INFO_MIN_REC_FLAG) {
 | |
| 		if ((node->update->info_bits & ~REC_INFO_DELETED_FLAG)
 | |
| 		    != REC_INFO_MIN_REC_FLAG) {
 | |
| 			ut_ad("wrong info_bits in undo log record" == 0);
 | |
| 			goto close_table;
 | |
| 		}
 | |
| 		/* This must be an undo log record for a subsequent
 | |
| 		instant ALTER TABLE, extending the metadata record. */
 | |
| 		ut_ad(clust_index->is_instant());
 | |
| 		ut_ad(clust_index->table->instant
 | |
| 		      || !(node->update->info_bits & REC_INFO_DELETED_FLAG));
 | |
| 		node->ref = &trx_undo_metadata;
 | |
| 		node->update->info_bits = (node->update->info_bits
 | |
| 					   & REC_INFO_DELETED_FLAG)
 | |
| 			? REC_INFO_METADATA_ALTER
 | |
| 			: REC_INFO_METADATA_ADD;
 | |
| 	}
 | |
| 
 | |
| 	if (!row_undo_search_clust_to_pcur(node)) {
 | |
| 		/* As long as this rolling-back transaction exists,
 | |
| 		the PRIMARY KEY value pointed to by the undo log
 | |
| 		record should exist.
 | |
| 
 | |
| 		However, if InnoDB is killed during a rollback, or
 | |
| 		shut down during the rollback of recovered
 | |
| 		transactions, then after restart we may try to roll
 | |
| 		back some of the same undo log records again, because
 | |
| 		trx_roll_try_truncate() is not being invoked after
 | |
| 		every undo log record.
 | |
| 
 | |
| 		It is also possible that the record
 | |
| 		was not modified yet (the DB_ROLL_PTR does not match
 | |
| 		node->roll_ptr) and thus there is nothing to roll back.
 | |
| 
 | |
| 		btr_cur_upd_lock_and_undo() only writes the undo log
 | |
| 		record after successfully acquiring an exclusive lock
 | |
| 		on the the clustered index record. That lock will not
 | |
| 		be released before the transaction is committed or
 | |
| 		fully rolled back. (Exception: if the server was
 | |
| 		killed, restarted, and shut down again before the
 | |
| 		rollback of the recovered transaction was completed,
 | |
| 		it is possible that the transaction was partially
 | |
| 		rolled back and locks released.) */
 | |
| 		goto close_table;
 | |
| 	}
 | |
| 
 | |
| 	/* Extract indexed virtual columns from undo log */
 | |
| 	if (node->ref != &trx_undo_metadata && node->table->n_v_cols) {
 | |
| 		row_upd_replace_vcol(node->row, node->table,
 | |
| 				     node->update, false, node->undo_row,
 | |
| 				     (node->cmpl_info & UPD_NODE_NO_ORD_CHANGE)
 | |
| 				     ? nullptr : ptr);
 | |
| 	}
 | |
| 
 | |
| 	return true;
 | |
| }
 | |
| 
 | |
| /***********************************************************//**
 | |
| Undoes a modify operation on a row of a table.
 | |
| @return DB_SUCCESS or error code */
 | |
| dberr_t
 | |
| row_undo_mod(
 | |
| /*=========*/
 | |
| 	undo_node_t*	node,	/*!< in: row undo node */
 | |
| 	que_thr_t*	thr)	/*!< in: query thread */
 | |
| {
 | |
| 	dberr_t	err = DB_SUCCESS;
 | |
| 	ut_ad(thr_get_trx(thr) == node->trx);
 | |
| 	const bool dict_locked = node->trx->dict_operation_lock_mode;
 | |
| 
 | |
| 	if (!row_undo_mod_parse_undo_rec(node, dict_locked)) {
 | |
| 		return DB_SUCCESS;
 | |
| 	}
 | |
| 
 | |
| 	ut_ad(node->table->is_temporary()
 | |
| 	      || lock_table_has_locks(node->table));
 | |
| 	node->index = dict_table_get_first_index(node->table);
 | |
| 	ut_ad(dict_index_is_clust(node->index));
 | |
| 
 | |
| 	if (node->ref->info_bits) {
 | |
| 		ut_ad(node->ref->is_metadata());
 | |
| 		goto rollback_clust;
 | |
| 	}
 | |
| 
 | |
| 	/* Skip the clustered index (the first index) */
 | |
| 	node->index = dict_table_get_next_index(node->index);
 | |
| 	if (node->index) {
 | |
| 		switch (node->rec_type) {
 | |
| 		case TRX_UNDO_UPD_EXIST_REC:
 | |
| 			err = row_undo_mod_upd_exist_sec(node, thr);
 | |
| 			break;
 | |
| 		case TRX_UNDO_DEL_MARK_REC:
 | |
| 			err = row_undo_mod_del_mark_sec(node, thr);
 | |
| 			break;
 | |
| 		case TRX_UNDO_UPD_DEL_REC:
 | |
| 			err = row_undo_mod_upd_del_sec(node, thr);
 | |
| 			break;
 | |
| 		default:
 | |
| 			MY_ASSERT_UNREACHABLE();
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	if (err == DB_SUCCESS) {
 | |
| rollback_clust:
 | |
| 		err = row_undo_mod_clust(node, thr);
 | |
| 
 | |
| 		bool update_statistics
 | |
| 			= !(node->cmpl_info & UPD_NODE_NO_ORD_CHANGE);
 | |
| 
 | |
| 		if (err == DB_SUCCESS && node->table->stat_initialized()) {
 | |
| 			switch (node->rec_type) {
 | |
| 			case TRX_UNDO_UPD_EXIST_REC:
 | |
| 				break;
 | |
| 			case TRX_UNDO_DEL_MARK_REC:
 | |
| 				dict_table_n_rows_inc(node->table);
 | |
| 				update_statistics = update_statistics
 | |
| 					|| !srv_stats_include_delete_marked;
 | |
| 				break;
 | |
| 			case TRX_UNDO_UPD_DEL_REC:
 | |
| 				dict_table_n_rows_dec(node->table);
 | |
| 				update_statistics = update_statistics
 | |
| 					|| !srv_stats_include_delete_marked;
 | |
| 				break;
 | |
| 			}
 | |
| 
 | |
| 			/* Do not attempt to update statistics when
 | |
| 			executing ROLLBACK in the InnoDB SQL
 | |
| 			interpreter, because in that case we would
 | |
| 			already be holding dict_sys.latch, which
 | |
| 			would be acquired when updating statistics. */
 | |
| 			if (update_statistics && !dict_locked) {
 | |
| 				dict_stats_update_if_needed(node->table,
 | |
| 							    *node->trx);
 | |
| 			} else {
 | |
| 				node->table->stat_modified_counter++;
 | |
| 			}
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	node->table->release();
 | |
| 	node->table = NULL;
 | |
| 
 | |
| 	return(err);
 | |
| }
 |