mirror of
https://github.com/MariaDB/server.git
synced 2025-01-21 22:34:18 +01:00
632ec54002
Some things still fail in innodb-index.test, and there seems to be a race condition (data dictionary lock wait) when running with --valgrind. dfield_t: Add an "external storage" flag, dfield->ext. dfield_is_null(), dfield_is_ext(), dfield_set_ext(), dfield_set_null(): New functions. dfield_copy(), dfield_copy_data(): Add const qualifiers, fix in/out comments. data_write_sql_null(): Use memset(). big_rec_field_t: Replace byte* data with const void* data. ut_ulint_sort(): Remove. upd_field_t: Remove extern_storage. upd_node_t: Replace ext_vec, n_ext_vec with n_ext. row_merge_copy_blobs(): New function. row_ins_index_entry(): Add the parameter "ibool foreign" for suppressing foreign key checks during fast index creation or when inserting into secondary indexes. btr_page_insert_fits(): Add const qualifiers. btr_cur_add_ext(), upd_ext_vec_contains(): Remove. dfield_print_also_hex(), dfield_print(): Replace if...else if with switch. Observe dfield_is_ext().
1829 lines
48 KiB
C
1829 lines
48 KiB
C
/******************************************************
|
|
Transaction undo log record
|
|
|
|
(c) 1996 Innobase Oy
|
|
|
|
Created 3/26/1996 Heikki Tuuri
|
|
*******************************************************/
|
|
|
|
#include "trx0rec.h"
|
|
|
|
#ifdef UNIV_NONINL
|
|
#include "trx0rec.ic"
|
|
#endif
|
|
|
|
#include "fsp0fsp.h"
|
|
#include "mach0data.h"
|
|
#include "trx0rseg.h"
|
|
#include "trx0trx.h"
|
|
#include "trx0undo.h"
|
|
#include "dict0dict.h"
|
|
#include "ut0mem.h"
|
|
#include "row0ext.h"
|
|
#include "row0upd.h"
|
|
#include "que0que.h"
|
|
#include "trx0purge.h"
|
|
#include "row0row.h"
|
|
|
|
/*=========== UNDO LOG RECORD CREATION AND DECODING ====================*/
|
|
|
|
/**************************************************************************
|
|
Writes the mtr log entry of the inserted undo log record on the undo log
|
|
page. */
|
|
UNIV_INLINE
|
|
void
|
|
trx_undof_page_add_undo_rec_log(
|
|
/*============================*/
|
|
page_t* undo_page, /* in: undo log page */
|
|
ulint old_free, /* in: start offset of the inserted entry */
|
|
ulint new_free, /* in: end offset of the entry */
|
|
mtr_t* mtr) /* in: mtr */
|
|
{
|
|
byte* log_ptr;
|
|
const byte* log_end;
|
|
ulint len;
|
|
|
|
log_ptr = mlog_open(mtr, 11 + 13 + MLOG_BUF_MARGIN);
|
|
|
|
if (log_ptr == NULL) {
|
|
|
|
return;
|
|
}
|
|
|
|
log_end = &log_ptr[11 + 13 + MLOG_BUF_MARGIN];
|
|
log_ptr = mlog_write_initial_log_record_fast(
|
|
undo_page, MLOG_UNDO_INSERT, log_ptr, mtr);
|
|
len = new_free - old_free - 4;
|
|
|
|
mach_write_to_2(log_ptr, len);
|
|
log_ptr += 2;
|
|
|
|
if (log_ptr + len <= log_end) {
|
|
memcpy(log_ptr, undo_page + old_free + 2, len);
|
|
mlog_close(mtr, log_ptr + len);
|
|
} else {
|
|
mlog_close(mtr, log_ptr);
|
|
mlog_catenate_string(mtr, undo_page + old_free + 2, len);
|
|
}
|
|
}
|
|
|
|
/***************************************************************
|
|
Parses a redo log record of adding an undo log record. */
|
|
|
|
byte*
|
|
trx_undo_parse_add_undo_rec(
|
|
/*========================*/
|
|
/* out: end of log record or NULL */
|
|
byte* ptr, /* in: buffer */
|
|
byte* end_ptr,/* in: buffer end */
|
|
page_t* page) /* in: page or NULL */
|
|
{
|
|
ulint len;
|
|
byte* rec;
|
|
ulint first_free;
|
|
|
|
if (end_ptr < ptr + 2) {
|
|
|
|
return(NULL);
|
|
}
|
|
|
|
len = mach_read_from_2(ptr);
|
|
ptr += 2;
|
|
|
|
if (end_ptr < ptr + len) {
|
|
|
|
return(NULL);
|
|
}
|
|
|
|
if (page == NULL) {
|
|
|
|
return(ptr + len);
|
|
}
|
|
|
|
first_free = mach_read_from_2(page + TRX_UNDO_PAGE_HDR
|
|
+ TRX_UNDO_PAGE_FREE);
|
|
rec = page + first_free;
|
|
|
|
mach_write_to_2(rec, first_free + 4 + len);
|
|
mach_write_to_2(rec + 2 + len, first_free);
|
|
|
|
mach_write_to_2(page + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE,
|
|
first_free + 4 + len);
|
|
ut_memcpy(rec + 2, ptr, len);
|
|
|
|
return(ptr + len);
|
|
}
|
|
|
|
/**************************************************************************
|
|
Calculates the free space left for extending an undo log record. */
|
|
UNIV_INLINE
|
|
ulint
|
|
trx_undo_left(
|
|
/*==========*/
|
|
/* out: bytes left */
|
|
const page_t* page, /* in: undo log page */
|
|
const byte* ptr) /* in: pointer to page */
|
|
{
|
|
/* The '- 10' is a safety margin, in case we have some small
|
|
calculation error below */
|
|
|
|
return(UNIV_PAGE_SIZE - (ptr - page) - 10 - FIL_PAGE_DATA_END);
|
|
}
|
|
|
|
/**************************************************************************
|
|
Get the pointer to where the data for the undo log record will be written.*/
|
|
static
|
|
byte*
|
|
trx_undo_page_get_ptr(
|
|
/*==================*/
|
|
/* out: ptr to where the undo log
|
|
record data will be written,
|
|
0 if not enough space.*/
|
|
page_t* undo_page, /* in: undo log page */
|
|
ulint need) /* in: need these man bytes */
|
|
{
|
|
byte* ptr; /* pointer within undo_page */
|
|
ulint first_free; /* offset within undo page */
|
|
|
|
ut_ad(undo_page);
|
|
|
|
ut_ad(mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR
|
|
+ TRX_UNDO_PAGE_TYPE) == TRX_UNDO_INSERT);
|
|
|
|
first_free = mach_read_from_2(
|
|
undo_page + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE);
|
|
|
|
/* Start writing the undo information from the first free
|
|
bytes in the undo page */
|
|
ptr = undo_page + first_free;
|
|
|
|
ut_ad(first_free <= UNIV_PAGE_SIZE);
|
|
|
|
/* NOTE: the value need must be big enough such that the
|
|
general fields written below fit on the undo log page */
|
|
|
|
if (UNIV_UNLIKELY(trx_undo_left(undo_page, ptr) < need)) {
|
|
|
|
/* Error, not enough space */
|
|
ptr = 0;
|
|
|
|
} else {
|
|
/* Reserve 2 bytes for the pointer to the next undo log
|
|
record */
|
|
ptr += 2;
|
|
}
|
|
|
|
return(ptr);
|
|
}
|
|
|
|
/**************************************************************************
|
|
Set the next and previous pointers in the undo page for the undo record
|
|
that was written to ptr. Update the first free value by the number of bytes
|
|
written for this undo record.*/
|
|
static
|
|
ulint
|
|
trx_undo_page_set_next_prev_and_add(
|
|
/*================================*/
|
|
/* out: offset of the inserted entry
|
|
on the page if succeeded, 0 if fail */
|
|
page_t* undo_page, /* in/out: undo log page */
|
|
byte* ptr, /* in: ptr up to where data has been
|
|
written on this undo page. */
|
|
mtr_t* mtr) /* in: mtr */
|
|
{
|
|
ulint first_free; /* offset within undo_page */
|
|
ulint end_of_rec; /* offset within undo_page */
|
|
byte* ptr_to_first_free;
|
|
/* pointer within undo_page
|
|
that points to the next free
|
|
offset value within undo_page.*/
|
|
|
|
ut_ad(ptr > undo_page);
|
|
ut_ad(ptr < undo_page + UNIV_PAGE_SIZE);
|
|
|
|
if (UNIV_UNLIKELY(trx_undo_left(undo_page, ptr) < 2)) {
|
|
|
|
return(0);
|
|
}
|
|
|
|
ptr_to_first_free = undo_page + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE;
|
|
|
|
first_free = mach_read_from_2(ptr_to_first_free);
|
|
|
|
/* Write offset of the previous undo log record */
|
|
mach_write_to_2(ptr, first_free);
|
|
ptr += 2;
|
|
|
|
end_of_rec = ptr - undo_page;
|
|
|
|
/* Write offset of the next undo log record */
|
|
mach_write_to_2(undo_page + first_free, end_of_rec);
|
|
|
|
/* Update the offset to first free undo record */
|
|
mach_write_to_2(ptr_to_first_free, end_of_rec);
|
|
|
|
/* Write this log entry to the UNDO log */
|
|
trx_undof_page_add_undo_rec_log(undo_page, first_free,
|
|
end_of_rec, mtr);
|
|
|
|
return(first_free);
|
|
}
|
|
|
|
/**************************************************************************
|
|
Reports in the undo log of an insert of a clustered index record. */
|
|
static
|
|
ulint
|
|
trx_undo_page_report_insert(
|
|
/*========================*/
|
|
/* out: offset of the inserted entry
|
|
on the page if succeed, 0 if fail */
|
|
page_t* undo_page, /* in: undo log page */
|
|
trx_t* trx, /* in: transaction */
|
|
dict_index_t* index, /* in: clustered index */
|
|
const dtuple_t* clust_entry, /* in: index entry which will be
|
|
inserted to the clustered index */
|
|
mtr_t* mtr) /* in: mtr */
|
|
{
|
|
ulint first_free;
|
|
byte* ptr;
|
|
ulint i;
|
|
|
|
ut_ad(mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR
|
|
+ TRX_UNDO_PAGE_TYPE) == TRX_UNDO_INSERT);
|
|
|
|
first_free = mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR
|
|
+ TRX_UNDO_PAGE_FREE);
|
|
ptr = undo_page + first_free;
|
|
|
|
ut_ad(first_free <= UNIV_PAGE_SIZE);
|
|
|
|
if (trx_undo_left(undo_page, ptr) < 2 + 1 + 11 + 11) {
|
|
|
|
/* Not enough space for writing the general parameters */
|
|
|
|
return(0);
|
|
}
|
|
|
|
/* Reserve 2 bytes for the pointer to the next undo log record */
|
|
ptr += 2;
|
|
|
|
/* Store first some general parameters to the undo log */
|
|
*ptr++ = TRX_UNDO_INSERT_REC;
|
|
ptr += mach_dulint_write_much_compressed(ptr, trx->undo_no);
|
|
ptr += mach_dulint_write_much_compressed(ptr, index->table->id);
|
|
/*----------------------------------------*/
|
|
/* Store then the fields required to uniquely determine the record
|
|
to be inserted in the clustered index */
|
|
|
|
for (i = 0; i < dict_index_get_n_unique(index); i++) {
|
|
|
|
const dfield_t* field = dtuple_get_nth_field(clust_entry, i);
|
|
ulint flen = dfield_get_len(field);
|
|
|
|
if (trx_undo_left(undo_page, ptr) < 5) {
|
|
|
|
return(0);
|
|
}
|
|
|
|
ptr += mach_write_compressed(ptr, flen);
|
|
|
|
if (flen != UNIV_SQL_NULL) {
|
|
if (trx_undo_left(undo_page, ptr) < flen) {
|
|
|
|
return(0);
|
|
}
|
|
|
|
ut_memcpy(ptr, dfield_get_data(field), flen);
|
|
ptr += flen;
|
|
}
|
|
}
|
|
|
|
return(trx_undo_page_set_next_prev_and_add(undo_page, ptr, mtr));
|
|
}
|
|
|
|
/**************************************************************************
|
|
Reports in the undo log of an index create */
|
|
static
|
|
ulint
|
|
trx_undo_page_report_index_create(
|
|
/*==============================*/
|
|
/* out: offset of the inserted entry
|
|
on the page if succeed, 0 if fail */
|
|
page_t* undo_page, /* in: undo log page */
|
|
trx_t* trx, /* in: transaction */
|
|
dict_index_t* index, /* in: index */
|
|
mtr_t* mtr) /* in: mtr */
|
|
{
|
|
byte* ptr;
|
|
|
|
ut_ad(undo_page && trx && index && mtr);
|
|
|
|
/* Get the pointer to where we will write our undo data. */
|
|
|
|
ptr = trx_undo_page_get_ptr(undo_page, 1 + 11 + 1 + 11 + 11);
|
|
|
|
if (UNIV_UNLIKELY(!ptr)) {
|
|
return(0);
|
|
}
|
|
|
|
/* This is our internal dictionary undo log record. */
|
|
*ptr++ = TRX_UNDO_DICTIONARY_REC;
|
|
|
|
ptr += mach_dulint_write_much_compressed(ptr, trx->undo_no);
|
|
|
|
/* The sub type (discriminator) of this undo dictionary record */
|
|
*ptr++ = TRX_UNDO_INDEX_CREATE_REC;
|
|
|
|
/* For index create, we need both the table id and the index id
|
|
to be stored in the undo log record.*/
|
|
|
|
ptr += mach_dulint_write_much_compressed(ptr, index->table->id);
|
|
ptr += mach_dulint_write_much_compressed(ptr, index->id);
|
|
|
|
return(trx_undo_page_set_next_prev_and_add(undo_page, ptr, mtr));
|
|
}
|
|
|
|
/**************************************************************************
|
|
Reports in the undo log of a table create */
|
|
static
|
|
ulint
|
|
trx_undo_page_report_table_create(
|
|
/*==============================*/
|
|
/* out: offset of the inserted entry
|
|
on the page if succeed, 0 if fail */
|
|
page_t* undo_page, /* in: undo log page */
|
|
trx_t* trx, /* in: transaction */
|
|
const char* table_name, /* in: table name */
|
|
mtr_t* mtr) /* in: mtr */
|
|
{
|
|
byte* ptr;
|
|
ulint name_len;
|
|
|
|
ut_ad(undo_page && trx && table_name && mtr);
|
|
|
|
name_len = strlen(table_name) + 1;
|
|
|
|
/* Get the pointer to where we will write our undo data */
|
|
|
|
ptr = trx_undo_page_get_ptr(undo_page, 1 + 11 + 1 + name_len);
|
|
|
|
if (UNIV_UNLIKELY(!ptr)) {
|
|
return(0);
|
|
}
|
|
|
|
/* The type (discriminator) of this undo log */
|
|
*ptr++ = TRX_UNDO_DICTIONARY_REC;
|
|
|
|
ptr += mach_dulint_write_much_compressed(ptr, trx->undo_no);
|
|
|
|
/* The sub type (discriminator) of this dictionary undo log */
|
|
*ptr++ = TRX_UNDO_TABLE_CREATE_REC;
|
|
|
|
/* For table create we need to store table name */
|
|
memcpy(ptr, table_name, name_len);
|
|
ptr += name_len;
|
|
|
|
return(trx_undo_page_set_next_prev_and_add(undo_page, ptr, mtr));
|
|
}
|
|
|
|
/**************************************************************************
|
|
Reports in the undo log of a table drop */
|
|
static
|
|
ulint
|
|
trx_undo_page_report_table_drop(
|
|
/*============================*/
|
|
/* out: offset of the inserted entry
|
|
on the page if succeed, 0 if fail */
|
|
page_t* undo_page, /* in: undo log page */
|
|
trx_t* trx, /* in: transaction */
|
|
const char* table_name, /* in: table name */
|
|
mtr_t* mtr) /* in: mtr */
|
|
{
|
|
byte* ptr;
|
|
ulint name_len;
|
|
|
|
ut_ad(undo_page && trx && table_name && mtr);
|
|
|
|
name_len = strlen(table_name) + 1;
|
|
|
|
/* Get the pointer to where we will write our undo data */
|
|
|
|
ptr = trx_undo_page_get_ptr(undo_page, 1 + 11 + 1 + name_len);
|
|
|
|
if (UNIV_UNLIKELY(!ptr)) {
|
|
return(0);
|
|
}
|
|
|
|
*ptr++ = TRX_UNDO_DICTIONARY_REC;
|
|
|
|
ptr += mach_dulint_write_much_compressed(ptr, trx->undo_no);
|
|
|
|
/* The sub type (discriminator) of this dictionary undo log */
|
|
*ptr++ = TRX_UNDO_TABLE_DROP_REC;
|
|
|
|
/* For table drop we need to store a table name */
|
|
memcpy(ptr, table_name, name_len);
|
|
ptr += name_len;
|
|
|
|
return(trx_undo_page_set_next_prev_and_add(undo_page, ptr, mtr));
|
|
}
|
|
|
|
/**************************************************************************
|
|
Reports in the undo log of a table rename */
|
|
static
|
|
ulint
|
|
trx_undo_page_report_table_rename(
|
|
/*==============================*/
|
|
/* out: offset of the inserted entry
|
|
on the page if succeed, 0 if fail */
|
|
page_t* undo_page, /* in: undo log page */
|
|
trx_t* trx, /* in: transaction */
|
|
const char* new_table_name, /* in: new table name */
|
|
const char* old_table_name, /* in: old table name */
|
|
const char* tmp_table_name, /* in: the temp name */
|
|
mtr_t* mtr) /* in: mtr */
|
|
{
|
|
byte* ptr;
|
|
ulint new_name_len;
|
|
ulint old_name_len;
|
|
ulint tmp_name_len;
|
|
|
|
ut_ad(undo_page && trx && new_table_name && old_table_name && mtr);
|
|
|
|
new_name_len = strlen(new_table_name) + 1;
|
|
old_name_len = strlen(old_table_name) + 1;
|
|
tmp_name_len = strlen(tmp_table_name) + 1;
|
|
|
|
/* Get the pointer to where we will write our undo data. */
|
|
|
|
ptr = trx_undo_page_get_ptr(undo_page, 1 + 11 + 1
|
|
+ new_name_len + old_name_len
|
|
+ tmp_name_len);
|
|
|
|
if (UNIV_UNLIKELY(!ptr)) {
|
|
return(0);
|
|
}
|
|
|
|
/* The type (discriminator) of this undo log */
|
|
*ptr++ = TRX_UNDO_DICTIONARY_REC;
|
|
ptr += mach_dulint_write_much_compressed(ptr, trx->undo_no);
|
|
|
|
/* The sub type (discriminator) of this dictionary undo log */
|
|
*ptr++ = TRX_UNDO_TABLE_RENAME_REC;
|
|
|
|
/* For table rename we need to store the new table name and
|
|
the old table name */
|
|
|
|
memcpy(ptr, new_table_name, new_name_len);
|
|
ptr += new_name_len;
|
|
|
|
memcpy(ptr, old_table_name, old_name_len);
|
|
ptr += old_name_len;
|
|
|
|
memcpy(ptr, tmp_table_name, tmp_name_len);
|
|
ptr += tmp_name_len;
|
|
|
|
return(trx_undo_page_set_next_prev_and_add(undo_page, ptr, mtr));
|
|
}
|
|
|
|
/**************************************************************************
|
|
Reads from an undo log record the general parameters. */
|
|
|
|
byte*
|
|
trx_undo_rec_get_pars(
|
|
/*==================*/
|
|
/* out: remaining part of undo log
|
|
record after reading these values */
|
|
trx_undo_rec_t* undo_rec, /* in: undo log record */
|
|
ulint* type, /* out: undo record type:
|
|
TRX_UNDO_INSERT_REC, ... */
|
|
ulint* cmpl_info, /* out: compiler info, relevant only
|
|
for update type records */
|
|
ibool* updated_extern, /* out: TRUE if we updated an
|
|
externally stored fild */
|
|
dulint* undo_no, /* out: undo log record number */
|
|
dulint* table_id) /* out: table id */
|
|
{
|
|
byte* ptr;
|
|
ulint type_cmpl;
|
|
|
|
ptr = undo_rec + 2;
|
|
|
|
type_cmpl = mach_read_from_1(ptr);
|
|
ptr++;
|
|
|
|
if (type_cmpl & TRX_UNDO_UPD_EXTERN) {
|
|
*updated_extern = TRUE;
|
|
type_cmpl -= TRX_UNDO_UPD_EXTERN;
|
|
} else {
|
|
*updated_extern = FALSE;
|
|
}
|
|
|
|
*type = type_cmpl & (TRX_UNDO_CMPL_INFO_MULT - 1);
|
|
*cmpl_info = type_cmpl / TRX_UNDO_CMPL_INFO_MULT;
|
|
|
|
*undo_no = mach_dulint_read_much_compressed(ptr);
|
|
ptr += mach_dulint_get_much_compressed_size(*undo_no);
|
|
|
|
*table_id = mach_dulint_read_much_compressed(ptr);
|
|
ptr += mach_dulint_get_much_compressed_size(*table_id);
|
|
|
|
return(ptr);
|
|
}
|
|
|
|
/**************************************************************************
|
|
Reads from an undo log record a stored column value. */
|
|
static
|
|
byte*
|
|
trx_undo_rec_get_col_val(
|
|
/*=====================*/
|
|
/* out: remaining part of undo log record after
|
|
reading these values */
|
|
byte* ptr, /* in: pointer to remaining part of undo log record */
|
|
byte** field, /* out: pointer to stored field */
|
|
ulint* len) /* out: length of the field, or UNIV_SQL_NULL */
|
|
{
|
|
*len = mach_read_compressed(ptr);
|
|
ptr += mach_get_compressed_size(*len);
|
|
|
|
*field = ptr;
|
|
|
|
if (*len != UNIV_SQL_NULL) {
|
|
if (*len >= UNIV_EXTERN_STORAGE_FIELD) {
|
|
ptr += (*len - UNIV_EXTERN_STORAGE_FIELD);
|
|
} else {
|
|
ptr += *len;
|
|
}
|
|
}
|
|
|
|
return(ptr);
|
|
}
|
|
|
|
/***********************************************************************
|
|
Builds a row reference from an undo log record. */
|
|
|
|
byte*
|
|
trx_undo_rec_get_row_ref(
|
|
/*=====================*/
|
|
/* out: pointer to remaining part of undo
|
|
record */
|
|
byte* ptr, /* in: remaining part of a copy of an undo log
|
|
record, at the start of the row reference;
|
|
NOTE that this copy of the undo log record must
|
|
be preserved as long as the row reference is
|
|
used, as we do NOT copy the data in the
|
|
record! */
|
|
dict_index_t* index, /* in: clustered index */
|
|
dtuple_t** ref, /* out, own: row reference */
|
|
mem_heap_t* heap) /* in: memory heap from which the memory
|
|
needed is allocated */
|
|
{
|
|
dfield_t* dfield;
|
|
byte* field;
|
|
ulint len;
|
|
ulint ref_len;
|
|
ulint i;
|
|
|
|
ut_ad(index && ptr && ref && heap);
|
|
ut_a(dict_index_is_clust(index));
|
|
|
|
ref_len = dict_index_get_n_unique(index);
|
|
|
|
*ref = dtuple_create(heap, ref_len);
|
|
|
|
dict_index_copy_types(*ref, index, ref_len);
|
|
|
|
for (i = 0; i < ref_len; i++) {
|
|
dfield = dtuple_get_nth_field(*ref, i);
|
|
|
|
ptr = trx_undo_rec_get_col_val(ptr, &field, &len);
|
|
|
|
dfield_set_data(dfield, field, len);
|
|
}
|
|
|
|
return(ptr);
|
|
}
|
|
|
|
/***********************************************************************
|
|
Skips a row reference from an undo log record. */
|
|
|
|
byte*
|
|
trx_undo_rec_skip_row_ref(
|
|
/*======================*/
|
|
/* out: pointer to remaining part of undo
|
|
record */
|
|
byte* ptr, /* in: remaining part in update undo log
|
|
record, at the start of the row reference */
|
|
dict_index_t* index) /* in: clustered index */
|
|
{
|
|
byte* field;
|
|
ulint len;
|
|
ulint ref_len;
|
|
ulint i;
|
|
|
|
ut_ad(index && ptr);
|
|
ut_a(dict_index_is_clust(index));
|
|
|
|
ref_len = dict_index_get_n_unique(index);
|
|
|
|
for (i = 0; i < ref_len; i++) {
|
|
ptr = trx_undo_rec_get_col_val(ptr, &field, &len);
|
|
}
|
|
|
|
return(ptr);
|
|
}
|
|
|
|
/**************************************************************************
|
|
Reports in the undo log of an update or delete marking of a clustered index
|
|
record. */
|
|
static
|
|
ulint
|
|
trx_undo_page_report_modify(
|
|
/*========================*/
|
|
/* out: byte offset of the inserted
|
|
undo log entry on the page if succeed,
|
|
0 if fail */
|
|
page_t* undo_page, /* in: undo log page */
|
|
trx_t* trx, /* in: transaction */
|
|
dict_index_t* index, /* in: clustered index where update or
|
|
delete marking is done */
|
|
const rec_t* rec, /* in: clustered index record which
|
|
has NOT yet been modified */
|
|
const ulint* offsets, /* in: rec_get_offsets(rec, index) */
|
|
const upd_t* update, /* in: update vector which tells the
|
|
columns to be updated; in the case of
|
|
a delete, this should be set to NULL */
|
|
ulint cmpl_info, /* in: compiler info on secondary
|
|
index updates */
|
|
mtr_t* mtr) /* in: mtr */
|
|
{
|
|
dict_table_t* table;
|
|
ulint first_free;
|
|
byte* ptr;
|
|
const byte* field;
|
|
ulint flen;
|
|
ulint col_no;
|
|
ulint type_cmpl;
|
|
byte* type_cmpl_ptr;
|
|
ulint i;
|
|
|
|
ut_a(dict_index_is_clust(index));
|
|
ut_ad(rec_offs_validate(rec, index, offsets));
|
|
ut_ad(mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR
|
|
+ TRX_UNDO_PAGE_TYPE) == TRX_UNDO_UPDATE);
|
|
table = index->table;
|
|
|
|
first_free = mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR
|
|
+ TRX_UNDO_PAGE_FREE);
|
|
ptr = undo_page + first_free;
|
|
|
|
ut_ad(first_free <= UNIV_PAGE_SIZE);
|
|
|
|
if (trx_undo_left(undo_page, ptr) < 50) {
|
|
|
|
/* NOTE: the value 50 must be big enough so that the general
|
|
fields written below fit on the undo log page */
|
|
|
|
return(0);
|
|
}
|
|
|
|
/* Reserve 2 bytes for the pointer to the next undo log record */
|
|
ptr += 2;
|
|
|
|
/* Store first some general parameters to the undo log */
|
|
|
|
if (!update) {
|
|
type_cmpl = TRX_UNDO_DEL_MARK_REC;
|
|
} else if (rec_get_deleted_flag(rec, dict_table_is_comp(table))) {
|
|
type_cmpl = TRX_UNDO_UPD_DEL_REC;
|
|
} else {
|
|
type_cmpl = TRX_UNDO_UPD_EXIST_REC;
|
|
}
|
|
|
|
type_cmpl |= cmpl_info * TRX_UNDO_CMPL_INFO_MULT;
|
|
type_cmpl_ptr = ptr;
|
|
|
|
*ptr++ = type_cmpl;
|
|
ptr += mach_dulint_write_much_compressed(ptr, trx->undo_no);
|
|
|
|
ptr += mach_dulint_write_much_compressed(ptr, table->id);
|
|
|
|
/*----------------------------------------*/
|
|
/* Store the state of the info bits */
|
|
|
|
*ptr++ = rec_get_info_bits(rec, dict_table_is_comp(table));
|
|
|
|
/* Store the values of the system columns */
|
|
field = rec_get_nth_field(rec, offsets,
|
|
dict_index_get_sys_col_pos(
|
|
index, DATA_TRX_ID), &flen);
|
|
ut_ad(flen == DATA_TRX_ID_LEN);
|
|
|
|
ptr += mach_dulint_write_compressed(ptr, trx_read_trx_id(field));
|
|
|
|
field = rec_get_nth_field(rec, offsets,
|
|
dict_index_get_sys_col_pos(
|
|
index, DATA_ROLL_PTR), &flen);
|
|
ut_ad(flen == DATA_ROLL_PTR_LEN);
|
|
|
|
ptr += mach_dulint_write_compressed(ptr, trx_read_roll_ptr(field));
|
|
|
|
/*----------------------------------------*/
|
|
/* Store then the fields required to uniquely determine the
|
|
record which will be modified in the clustered index */
|
|
|
|
for (i = 0; i < dict_index_get_n_unique(index); i++) {
|
|
|
|
field = rec_get_nth_field(rec, offsets, i, &flen);
|
|
|
|
if (trx_undo_left(undo_page, ptr) < 4) {
|
|
|
|
return(0);
|
|
}
|
|
|
|
ptr += mach_write_compressed(ptr, flen);
|
|
|
|
if (flen != UNIV_SQL_NULL) {
|
|
if (trx_undo_left(undo_page, ptr) < flen) {
|
|
|
|
return(0);
|
|
}
|
|
|
|
ut_memcpy(ptr, field, flen);
|
|
ptr += flen;
|
|
}
|
|
}
|
|
|
|
/*----------------------------------------*/
|
|
/* Save to the undo log the old values of the columns to be updated. */
|
|
|
|
if (update) {
|
|
if (trx_undo_left(undo_page, ptr) < 5) {
|
|
|
|
return(0);
|
|
}
|
|
|
|
ptr += mach_write_compressed(ptr, upd_get_n_fields(update));
|
|
|
|
for (i = 0; i < upd_get_n_fields(update); i++) {
|
|
|
|
ulint pos = upd_get_nth_field(update, i)->field_no;
|
|
|
|
/* Write field number to undo log */
|
|
if (trx_undo_left(undo_page, ptr) < 5) {
|
|
|
|
return(0);
|
|
}
|
|
|
|
ptr += mach_write_compressed(ptr, pos);
|
|
|
|
/* Save the old value of field */
|
|
field = rec_get_nth_field(rec, offsets, pos, &flen);
|
|
|
|
if (trx_undo_left(undo_page, ptr) < 5) {
|
|
|
|
return(0);
|
|
}
|
|
|
|
if (rec_offs_nth_extern(offsets, pos)) {
|
|
/* If a field has external storage, we add
|
|
to flen the flag */
|
|
|
|
ptr += mach_write_compressed(
|
|
ptr,
|
|
UNIV_EXTERN_STORAGE_FIELD + flen);
|
|
|
|
/* Notify purge that it eventually has to
|
|
free the old externally stored field */
|
|
|
|
trx->update_undo->del_marks = TRUE;
|
|
|
|
*type_cmpl_ptr |= TRX_UNDO_UPD_EXTERN;
|
|
} else {
|
|
ptr += mach_write_compressed(ptr, flen);
|
|
}
|
|
|
|
if (flen != UNIV_SQL_NULL) {
|
|
if (trx_undo_left(undo_page, ptr) < flen) {
|
|
|
|
return(0);
|
|
}
|
|
|
|
ut_memcpy(ptr, field, flen);
|
|
ptr += flen;
|
|
}
|
|
}
|
|
}
|
|
|
|
/*----------------------------------------*/
|
|
/* In the case of a delete marking, and also in the case of an update
|
|
where any ordering field of any index changes, store the values of all
|
|
columns which occur as ordering fields in any index. This info is used
|
|
in the purge of old versions where we use it to build and search the
|
|
delete marked index records, to look if we can remove them from the
|
|
index tree. Note that starting from 4.0.14 also externally stored
|
|
fields can be ordering in some index. Starting from 5.2, we no longer
|
|
store REC_MAX_INDEX_COL_LEN first bytes to the undo log record,
|
|
but we can construct the column prefix fields in the index by
|
|
fetching the first page of the BLOB that is pointed to by the
|
|
clustered index. This works also in crash recovery, because all pages
|
|
(including BLOBs) are recovered before anything is rolled back. */
|
|
|
|
if (!update || !(cmpl_info & UPD_NODE_NO_ORD_CHANGE)) {
|
|
byte* old_ptr = ptr;
|
|
|
|
trx->update_undo->del_marks = TRUE;
|
|
|
|
if (trx_undo_left(undo_page, ptr) < 5) {
|
|
|
|
return(0);
|
|
}
|
|
|
|
/* Reserve 2 bytes to write the number of bytes the stored
|
|
fields take in this undo record */
|
|
|
|
ptr += 2;
|
|
|
|
for (col_no = 0; col_no < dict_table_get_n_cols(table);
|
|
col_no++) {
|
|
|
|
const dict_col_t* col
|
|
= dict_table_get_nth_col(table, col_no);
|
|
|
|
if (col->ord_part) {
|
|
ulint pos;
|
|
|
|
/* Write field number to undo log */
|
|
if (trx_undo_left(undo_page, ptr) < 5 + 5) {
|
|
|
|
return(0);
|
|
}
|
|
|
|
pos = dict_index_get_nth_col_pos(index,
|
|
col_no);
|
|
ptr += mach_write_compressed(ptr, pos);
|
|
|
|
/* Save the old value of field */
|
|
field = rec_get_nth_field(rec, offsets, pos,
|
|
&flen);
|
|
|
|
ptr += mach_write_compressed(ptr, flen);
|
|
|
|
if (flen != UNIV_SQL_NULL) {
|
|
if (trx_undo_left(undo_page, ptr)
|
|
< flen) {
|
|
|
|
return(0);
|
|
}
|
|
|
|
ut_memcpy(ptr, field, flen);
|
|
ptr += flen;
|
|
}
|
|
}
|
|
}
|
|
|
|
mach_write_to_2(old_ptr, ptr - old_ptr);
|
|
}
|
|
|
|
/*----------------------------------------*/
|
|
/* Write pointers to the previous and the next undo log records */
|
|
if (trx_undo_left(undo_page, ptr) < 2) {
|
|
|
|
return(0);
|
|
}
|
|
|
|
mach_write_to_2(ptr, first_free);
|
|
ptr += 2;
|
|
mach_write_to_2(undo_page + first_free, ptr - undo_page);
|
|
|
|
mach_write_to_2(undo_page + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE,
|
|
ptr - undo_page);
|
|
|
|
/* Write to the REDO log about this change in the UNDO log */
|
|
|
|
trx_undof_page_add_undo_rec_log(undo_page, first_free,
|
|
ptr - undo_page, mtr);
|
|
return(first_free);
|
|
}
|
|
|
|
/**************************************************************************
|
|
Reads from an undo log update record the system field values of the old
|
|
version. */
|
|
|
|
byte*
|
|
trx_undo_update_rec_get_sys_cols(
|
|
/*=============================*/
|
|
/* out: remaining part of undo log
|
|
record after reading these values */
|
|
byte* ptr, /* in: remaining part of undo log
|
|
record after reading general
|
|
parameters */
|
|
dulint* trx_id, /* out: trx id */
|
|
dulint* roll_ptr, /* out: roll ptr */
|
|
ulint* info_bits) /* out: info bits state */
|
|
{
|
|
/* Read the state of the info bits */
|
|
*info_bits = mach_read_from_1(ptr);
|
|
ptr += 1;
|
|
|
|
/* Read the values of the system columns */
|
|
|
|
*trx_id = mach_dulint_read_compressed(ptr);
|
|
ptr += mach_dulint_get_compressed_size(*trx_id);
|
|
|
|
*roll_ptr = mach_dulint_read_compressed(ptr);
|
|
ptr += mach_dulint_get_compressed_size(*roll_ptr);
|
|
|
|
return(ptr);
|
|
}
|
|
|
|
/**************************************************************************
|
|
Reads from an update undo log record the number of updated fields. */
|
|
UNIV_INLINE
|
|
byte*
|
|
trx_undo_update_rec_get_n_upd_fields(
|
|
/*=================================*/
|
|
/* out: remaining part of undo log record after
|
|
reading this value */
|
|
byte* ptr, /* in: pointer to remaining part of undo log record */
|
|
ulint* n) /* out: number of fields */
|
|
{
|
|
*n = mach_read_compressed(ptr);
|
|
ptr += mach_get_compressed_size(*n);
|
|
|
|
return(ptr);
|
|
}
|
|
|
|
/**************************************************************************
|
|
Reads from an update undo log record a stored field number. */
|
|
UNIV_INLINE
|
|
byte*
|
|
trx_undo_update_rec_get_field_no(
|
|
/*=============================*/
|
|
/* out: remaining part of undo log record after
|
|
reading this value */
|
|
byte* ptr, /* in: pointer to remaining part of undo log record */
|
|
ulint* field_no)/* out: field number */
|
|
{
|
|
*field_no = mach_read_compressed(ptr);
|
|
ptr += mach_get_compressed_size(*field_no);
|
|
|
|
return(ptr);
|
|
}
|
|
|
|
/***********************************************************************
|
|
Builds an update vector based on a remaining part of an undo log record. */
|
|
|
|
byte*
|
|
trx_undo_update_rec_get_update(
|
|
/*===========================*/
|
|
/* out: remaining part of the record,
|
|
NULL if an error detected, which means that
|
|
the record is corrupted */
|
|
byte* ptr, /* in: remaining part in update undo log
|
|
record, after reading the row reference
|
|
NOTE that this copy of the undo log record must
|
|
be preserved as long as the update vector is
|
|
used, as we do NOT copy the data in the
|
|
record! */
|
|
dict_index_t* index, /* in: clustered index */
|
|
ulint type, /* in: TRX_UNDO_UPD_EXIST_REC,
|
|
TRX_UNDO_UPD_DEL_REC, or
|
|
TRX_UNDO_DEL_MARK_REC; in the last case,
|
|
only trx id and roll ptr fields are added to
|
|
the update vector */
|
|
dulint trx_id, /* in: transaction id from this undo record */
|
|
dulint roll_ptr,/* in: roll pointer from this undo record */
|
|
ulint info_bits,/* in: info bits from this undo record */
|
|
trx_t* trx, /* in: transaction */
|
|
mem_heap_t* heap, /* in: memory heap from which the memory
|
|
needed is allocated */
|
|
upd_t** upd) /* out, own: update vector */
|
|
{
|
|
upd_field_t* upd_field;
|
|
upd_t* update;
|
|
ulint n_fields;
|
|
byte* buf;
|
|
byte* field;
|
|
ulint len;
|
|
ulint field_no;
|
|
ulint i;
|
|
|
|
ut_a(dict_index_is_clust(index));
|
|
|
|
if (type != TRX_UNDO_DEL_MARK_REC) {
|
|
ptr = trx_undo_update_rec_get_n_upd_fields(ptr, &n_fields);
|
|
} else {
|
|
n_fields = 0;
|
|
}
|
|
|
|
update = upd_create(n_fields + 2, heap);
|
|
|
|
update->info_bits = info_bits;
|
|
|
|
/* Store first trx id and roll ptr to update vector */
|
|
|
|
upd_field = upd_get_nth_field(update, n_fields);
|
|
buf = mem_heap_alloc(heap, DATA_TRX_ID_LEN);
|
|
trx_write_trx_id(buf, trx_id);
|
|
|
|
upd_field_set_field_no(upd_field,
|
|
dict_index_get_sys_col_pos(index, DATA_TRX_ID),
|
|
index, trx);
|
|
dfield_set_data(&(upd_field->new_val), buf, DATA_TRX_ID_LEN);
|
|
|
|
upd_field = upd_get_nth_field(update, n_fields + 1);
|
|
buf = mem_heap_alloc(heap, DATA_ROLL_PTR_LEN);
|
|
trx_write_roll_ptr(buf, roll_ptr);
|
|
|
|
upd_field_set_field_no(
|
|
upd_field, dict_index_get_sys_col_pos(index, DATA_ROLL_PTR),
|
|
index, trx);
|
|
dfield_set_data(&(upd_field->new_val), buf, DATA_ROLL_PTR_LEN);
|
|
|
|
/* Store then the updated ordinary columns to the update vector */
|
|
|
|
for (i = 0; i < n_fields; i++) {
|
|
|
|
ptr = trx_undo_update_rec_get_field_no(ptr, &field_no);
|
|
|
|
if (field_no >= dict_index_get_n_fields(index)) {
|
|
fprintf(stderr,
|
|
"InnoDB: Error: trying to access"
|
|
" update undo rec field %lu in ",
|
|
(ulong) field_no);
|
|
dict_index_name_print(stderr, trx, index);
|
|
fprintf(stderr, "\n"
|
|
"InnoDB: but index has only %lu fields\n"
|
|
"InnoDB: Submit a detailed bug report"
|
|
" to http://bugs.mysql.com\n"
|
|
"InnoDB: Run also CHECK TABLE ",
|
|
(ulong) dict_index_get_n_fields(index));
|
|
ut_print_name(stderr, trx, TRUE, index->table_name);
|
|
fprintf(stderr, "\n"
|
|
"InnoDB: n_fields = %lu, i = %lu, ptr %p\n",
|
|
(ulong) n_fields, (ulong) i, ptr);
|
|
return(NULL);
|
|
}
|
|
|
|
ptr = trx_undo_rec_get_col_val(ptr, &field, &len);
|
|
|
|
upd_field = upd_get_nth_field(update, i);
|
|
|
|
upd_field_set_field_no(upd_field, field_no, index, trx);
|
|
|
|
if (len != UNIV_SQL_NULL && len >= UNIV_EXTERN_STORAGE_FIELD) {
|
|
|
|
len -= UNIV_EXTERN_STORAGE_FIELD;
|
|
dfield_set_data(&upd_field->new_val, field, len);
|
|
dfield_set_ext(&upd_field->new_val);
|
|
} else {
|
|
dfield_set_data(&upd_field->new_val, field, len);
|
|
}
|
|
}
|
|
|
|
*upd = update;
|
|
|
|
return(ptr);
|
|
}
|
|
|
|
/***********************************************************************
|
|
Builds a partial row from an update undo log record. It contains the
|
|
columns which occur as ordering in any index of the table. */
|
|
|
|
byte*
|
|
trx_undo_rec_get_partial_row(
|
|
/*=========================*/
|
|
/* out: pointer to remaining part of undo
|
|
record */
|
|
byte* ptr, /* in: remaining part in update undo log
|
|
record of a suitable type, at the start of
|
|
the stored index columns;
|
|
NOTE that this copy of the undo log record must
|
|
be preserved as long as the partial row is
|
|
used, as we do NOT copy the data in the
|
|
record! */
|
|
dict_index_t* index, /* in: clustered index */
|
|
dtuple_t** row, /* out, own: partial row */
|
|
row_ext_t** ext, /* out, own: prefix cache for
|
|
externally stored columns */
|
|
mem_heap_t* heap) /* in: memory heap from which the memory
|
|
needed is allocated */
|
|
{
|
|
const byte* end_ptr;
|
|
ulint row_len;
|
|
ulint n_ext_cols;
|
|
ulint* ext_cols;
|
|
|
|
ut_ad(index && ptr && row && ext && heap);
|
|
|
|
row_len = dict_table_get_n_cols(index->table);
|
|
n_ext_cols = 0;
|
|
ext_cols = mem_heap_alloc(heap, row_len * sizeof *ext_cols);
|
|
|
|
*row = dtuple_create(heap, row_len);
|
|
|
|
dict_table_copy_types(*row, index->table);
|
|
|
|
end_ptr = ptr + mach_read_from_2(ptr);
|
|
ptr += 2;
|
|
|
|
while (ptr != end_ptr) {
|
|
dfield_t* dfield;
|
|
byte* field;
|
|
ulint field_no;
|
|
ulint col_no;
|
|
ulint len;
|
|
|
|
ptr = trx_undo_update_rec_get_field_no(ptr, &field_no);
|
|
|
|
col_no = dict_index_get_nth_col_no(index, field_no);
|
|
|
|
ptr = trx_undo_rec_get_col_val(ptr, &field, &len);
|
|
|
|
if (len >= UNIV_EXTERN_STORAGE_FIELD) {
|
|
ext_cols[n_ext_cols++] = col_no;
|
|
}
|
|
|
|
dfield = dtuple_get_nth_field(*row, col_no);
|
|
|
|
dfield_set_data(dfield, field, len);
|
|
}
|
|
|
|
if (n_ext_cols) {
|
|
*ext = row_ext_create(n_ext_cols, ext_cols,
|
|
dict_table_zip_size(index->table),
|
|
heap);
|
|
} else {
|
|
*ext = NULL;
|
|
}
|
|
|
|
return(ptr);
|
|
}
|
|
|
|
/***************************************************************************
|
|
Erases the unused undo log page end. */
|
|
static
|
|
void
|
|
trx_undo_erase_page_end(
|
|
/*====================*/
|
|
page_t* undo_page, /* in: undo page whose end to erase */
|
|
mtr_t* mtr) /* in: mtr */
|
|
{
|
|
ulint first_free;
|
|
|
|
first_free = mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR
|
|
+ TRX_UNDO_PAGE_FREE);
|
|
memset(undo_page + first_free, 0xff,
|
|
(UNIV_PAGE_SIZE - FIL_PAGE_DATA_END) - first_free);
|
|
|
|
mlog_write_initial_log_record(undo_page, MLOG_UNDO_ERASE_END, mtr);
|
|
}
|
|
|
|
/***************************************************************
|
|
Parses a redo log record of erasing of an undo page end. */
|
|
|
|
byte*
|
|
trx_undo_parse_erase_page_end(
|
|
/*==========================*/
|
|
/* out: end of log record or NULL */
|
|
byte* ptr, /* in: buffer */
|
|
byte* end_ptr __attribute__((unused)), /* in: buffer end */
|
|
page_t* page, /* in: page or NULL */
|
|
mtr_t* mtr) /* in: mtr or NULL */
|
|
{
|
|
ut_ad(ptr && end_ptr);
|
|
|
|
if (page == NULL) {
|
|
|
|
return(ptr);
|
|
}
|
|
|
|
trx_undo_erase_page_end(page, mtr);
|
|
|
|
return(ptr);
|
|
}
|
|
|
|
/***************************************************************************
|
|
Writes information to an undo log about an insert, update, or a delete marking
|
|
of a clustered index record. This information is used in a rollback of the
|
|
transaction and in consistent reads that must look to the history of this
|
|
transaction. */
|
|
|
|
ulint
|
|
trx_undo_report_row_operation(
|
|
/*==========================*/
|
|
/* out: DB_SUCCESS or error code */
|
|
ulint flags, /* in: if BTR_NO_UNDO_LOG_FLAG bit is
|
|
set, does nothing */
|
|
ulint op_type, /* in: TRX_UNDO_INSERT_OP or
|
|
TRX_UNDO_MODIFY_OP */
|
|
que_thr_t* thr, /* in: query thread */
|
|
dict_index_t* index, /* in: clustered index */
|
|
const dtuple_t* clust_entry, /* in: in the case of an insert,
|
|
index entry to insert into the
|
|
clustered index, otherwise NULL */
|
|
const upd_t* update, /* in: in the case of an update,
|
|
the update vector, otherwise NULL */
|
|
ulint cmpl_info, /* in: compiler info on secondary
|
|
index updates */
|
|
const rec_t* rec, /* in: in case of an update or delete
|
|
marking, the record in the clustered
|
|
index, otherwise NULL */
|
|
dulint* roll_ptr) /* out: rollback pointer to the
|
|
inserted undo log record,
|
|
ut_dulint_zero if BTR_NO_UNDO_LOG
|
|
flag was specified */
|
|
{
|
|
trx_t* trx;
|
|
trx_undo_t* undo;
|
|
ulint page_no;
|
|
trx_rseg_t* rseg;
|
|
mtr_t mtr;
|
|
ulint err = DB_SUCCESS;
|
|
mem_heap_t* heap = NULL;
|
|
ulint offsets_[REC_OFFS_NORMAL_SIZE];
|
|
ulint* offsets = offsets_;
|
|
*offsets_ = (sizeof offsets_) / sizeof *offsets_;
|
|
|
|
ut_a(dict_index_is_clust(index));
|
|
|
|
if (flags & BTR_NO_UNDO_LOG_FLAG) {
|
|
|
|
*roll_ptr = ut_dulint_zero;
|
|
|
|
return(DB_SUCCESS);
|
|
}
|
|
|
|
ut_ad(thr);
|
|
ut_ad((op_type != TRX_UNDO_INSERT_OP)
|
|
|| (clust_entry && !update && !rec));
|
|
|
|
trx = thr_get_trx(thr);
|
|
rseg = trx->rseg;
|
|
|
|
mutex_enter(&(trx->undo_mutex));
|
|
|
|
/* If the undo log is not assigned yet, assign one */
|
|
|
|
if (op_type == TRX_UNDO_INSERT_OP) {
|
|
|
|
if (trx->insert_undo == NULL) {
|
|
|
|
err = trx_undo_assign_undo(trx, TRX_UNDO_INSERT);
|
|
}
|
|
|
|
undo = trx->insert_undo;
|
|
|
|
if (UNIV_UNLIKELY(!undo)) {
|
|
/* Did not succeed */
|
|
mutex_exit(&(trx->undo_mutex));
|
|
|
|
return(err);
|
|
}
|
|
} else {
|
|
ut_ad(op_type == TRX_UNDO_MODIFY_OP);
|
|
|
|
if (trx->update_undo == NULL) {
|
|
|
|
err = trx_undo_assign_undo(trx, TRX_UNDO_UPDATE);
|
|
|
|
}
|
|
|
|
undo = trx->update_undo;
|
|
|
|
if (UNIV_UNLIKELY(!undo)) {
|
|
/* Did not succeed */
|
|
mutex_exit(&(trx->undo_mutex));
|
|
return(err);
|
|
}
|
|
|
|
offsets = rec_get_offsets(rec, index, offsets,
|
|
ULINT_UNDEFINED, &heap);
|
|
}
|
|
|
|
page_no = undo->last_page_no;
|
|
|
|
mtr_start(&mtr);
|
|
|
|
for (;;) {
|
|
buf_block_t* undo_block;
|
|
page_t* undo_page;
|
|
ulint offset;
|
|
|
|
undo_block = buf_page_get_gen(undo->space, undo->zip_size,
|
|
page_no, RW_X_LATCH,
|
|
undo->guess_block, BUF_GET,
|
|
__FILE__, __LINE__, &mtr);
|
|
#ifdef UNIV_SYNC_DEBUG
|
|
buf_block_dbg_add_level(undo_block, SYNC_TRX_UNDO_PAGE);
|
|
#endif /* UNIV_SYNC_DEBUG */
|
|
undo_page = buf_block_get_frame(undo_block);
|
|
|
|
if (op_type == TRX_UNDO_INSERT_OP) {
|
|
offset = trx_undo_page_report_insert(
|
|
undo_page, trx, index, clust_entry, &mtr);
|
|
} else {
|
|
offset = trx_undo_page_report_modify(
|
|
undo_page, trx, index, rec, offsets, update,
|
|
cmpl_info, &mtr);
|
|
}
|
|
|
|
if (UNIV_UNLIKELY(offset == 0)) {
|
|
/* The record did not fit on the page. We erase the
|
|
end segment of the undo log page and write a log
|
|
record of it: this is to ensure that in the debug
|
|
version the replicate page constructed using the log
|
|
records stays identical to the original page */
|
|
|
|
trx_undo_erase_page_end(undo_page, &mtr);
|
|
mtr_commit(&mtr);
|
|
} else {
|
|
/* Success */
|
|
|
|
mtr_commit(&mtr);
|
|
|
|
undo->empty = FALSE;
|
|
undo->top_page_no = page_no;
|
|
undo->top_offset = offset;
|
|
undo->top_undo_no = trx->undo_no;
|
|
undo->guess_block = undo_block;
|
|
|
|
UT_DULINT_INC(trx->undo_no);
|
|
|
|
mutex_exit(&trx->undo_mutex);
|
|
|
|
*roll_ptr = trx_undo_build_roll_ptr(
|
|
op_type == TRX_UNDO_INSERT_OP,
|
|
rseg->id, page_no, offset);
|
|
if (UNIV_LIKELY_NULL(heap)) {
|
|
mem_heap_free(heap);
|
|
}
|
|
return(DB_SUCCESS);
|
|
}
|
|
|
|
ut_ad(page_no == undo->last_page_no);
|
|
|
|
/* We have to extend the undo log by one page */
|
|
|
|
mtr_start(&mtr);
|
|
|
|
/* When we add a page to an undo log, this is analogous to
|
|
a pessimistic insert in a B-tree, and we must reserve the
|
|
counterpart of the tree latch, which is the rseg mutex. */
|
|
|
|
mutex_enter(&(rseg->mutex));
|
|
|
|
page_no = trx_undo_add_page(trx, undo, &mtr);
|
|
|
|
mutex_exit(&(rseg->mutex));
|
|
|
|
if (UNIV_UNLIKELY(page_no == FIL_NULL)) {
|
|
/* Did not succeed: out of space */
|
|
|
|
mutex_exit(&(trx->undo_mutex));
|
|
mtr_commit(&mtr);
|
|
if (UNIV_LIKELY_NULL(heap)) {
|
|
mem_heap_free(heap);
|
|
}
|
|
return(DB_OUT_OF_FILE_SPACE);
|
|
}
|
|
}
|
|
}
|
|
|
|
/***************************************************************************
|
|
Writes information to an undo log about dictionary operation e.g.
|
|
rename_table, create_table, create_index, drop table. This information
|
|
is used in a rollback of the transaction. */
|
|
|
|
ulint
|
|
trx_undo_report_dict_operation(
|
|
/*===========================*/
|
|
/* out: DB_SUCCESS or error code */
|
|
ulint op_type, /* in: TRX_UNDO_TABLE_CREATE_OP,
|
|
TRX_UNDO_TABLE_RENAME_OP,
|
|
TRX_UNDO_TABLE_DROP_OP, or
|
|
TRX_UNDO_INDEX_CREATE_OP */
|
|
trx_t* trx, /* in: trx */
|
|
dict_index_t* index, /* in:
|
|
if TRX_UNDO_INDEX_CREATE_OP
|
|
index to be created*/
|
|
const char* table_name, /* in: table name or NULL, used in
|
|
create table, rename table and
|
|
drop table*/
|
|
const char* old_table_name, /* in: old table name or NULL.
|
|
used in rename table */
|
|
const char* tmp_table_name, /* in: the intermediate name used
|
|
for renaming */
|
|
dulint* roll_ptr) /* out: rollback pointer to the
|
|
inserted undo log record */
|
|
{
|
|
trx_undo_t* undo;
|
|
ulint page_no;
|
|
trx_rseg_t* rseg;
|
|
mtr_t mtr;
|
|
|
|
ut_ad(trx);
|
|
|
|
#ifdef UNIV_DEBUG
|
|
switch (op_type) {
|
|
case TRX_UNDO_TABLE_RENAME_OP:
|
|
ut_ad(old_table_name);
|
|
case TRX_UNDO_TABLE_DROP_OP:
|
|
case TRX_UNDO_TABLE_CREATE_OP:
|
|
ut_ad(table_name);
|
|
break;
|
|
|
|
case TRX_UNDO_INDEX_CREATE_OP:
|
|
ut_ad(index);
|
|
break;
|
|
default:
|
|
ut_error;
|
|
}
|
|
#endif
|
|
|
|
rseg = trx->rseg;
|
|
|
|
mutex_enter(&(trx->undo_mutex));
|
|
|
|
/* If the undo log is not assigned yet, assign one */
|
|
|
|
if (trx->insert_undo == NULL) {
|
|
|
|
trx_undo_assign_undo(trx, TRX_UNDO_INSERT);
|
|
}
|
|
|
|
undo = trx->insert_undo;
|
|
|
|
if (undo == NULL) {
|
|
/* Did not succeed: out of space */
|
|
mutex_exit(&(trx->undo_mutex));
|
|
|
|
return(DB_OUT_OF_FILE_SPACE);
|
|
}
|
|
|
|
page_no = undo->last_page_no;
|
|
|
|
mtr_start(&mtr);
|
|
|
|
for (;;) {
|
|
buf_block_t* undo_block;
|
|
ulint offset;
|
|
|
|
undo_block = buf_page_get_gen(undo->space, undo->zip_size,
|
|
page_no, RW_X_LATCH,
|
|
undo->guess_block, BUF_GET,
|
|
__FILE__, __LINE__, &mtr);
|
|
|
|
#ifdef UNIV_SYNC_DEBUG
|
|
buf_block_dbg_add_level(undo_block, SYNC_TRX_UNDO_PAGE);
|
|
#endif /* UNIV_SYNC_DEBUG */
|
|
|
|
switch (op_type) {
|
|
|
|
case TRX_UNDO_TABLE_CREATE_OP:
|
|
offset = trx_undo_page_report_table_create(
|
|
undo_block->frame, trx, table_name, &mtr);
|
|
break;
|
|
|
|
case TRX_UNDO_INDEX_CREATE_OP:
|
|
offset = trx_undo_page_report_index_create(
|
|
undo_block->frame, trx, index, &mtr);
|
|
break;
|
|
|
|
case TRX_UNDO_TABLE_RENAME_OP:
|
|
offset = trx_undo_page_report_table_rename(
|
|
undo_block->frame, trx, table_name,
|
|
old_table_name, tmp_table_name, &mtr);
|
|
break;
|
|
|
|
case TRX_UNDO_TABLE_DROP_OP:
|
|
offset = trx_undo_page_report_table_drop(
|
|
undo_block->frame, trx, table_name, &mtr);
|
|
break;
|
|
|
|
default:
|
|
ut_print_timestamp(stderr);
|
|
fprintf(stderr,
|
|
" InnoDB: [Error]: Undefined op_type = %lu "
|
|
"at trx_undo_report_dict_operation\n",
|
|
(ulong) op_type);
|
|
|
|
mutex_enter(&kernel_mutex);
|
|
trx_print(stderr, trx, 1024);
|
|
mutex_exit(&kernel_mutex);
|
|
|
|
return(DB_ERROR);
|
|
}
|
|
|
|
if (UNIV_UNLIKELY(offset == 0)) {
|
|
/* The record did not fit on the page. We erase the
|
|
end segment of the undo log page and write a log
|
|
record of it: this is to ensure that in the debug
|
|
version the replicate page constructed using the log
|
|
records stays identical to the original page */
|
|
|
|
trx_undo_erase_page_end(undo_block->frame, &mtr);
|
|
mtr_commit(&mtr);
|
|
} else {
|
|
/* Success */
|
|
mtr_commit(&mtr);
|
|
|
|
undo->empty = FALSE;
|
|
undo->top_page_no = page_no;
|
|
undo->top_offset = offset;
|
|
undo->top_undo_no = trx->undo_no;
|
|
undo->guess_block = undo_block;
|
|
|
|
UT_DULINT_INC(trx->undo_no);
|
|
|
|
mutex_exit(&(trx->undo_mutex));
|
|
|
|
*roll_ptr = trx_undo_build_roll_ptr(TRUE, rseg->id,
|
|
page_no, offset);
|
|
|
|
return(DB_SUCCESS);
|
|
}
|
|
|
|
ut_ad(page_no == undo->last_page_no);
|
|
|
|
/* We have to extend the undo log by one page */
|
|
|
|
mtr_start(&mtr);
|
|
|
|
/* When we add a page to an undo log, this is analogous to
|
|
a pessimistic insert in a B-tree, and we must reserve the
|
|
counterpart of the tree latch, which is the rseg mutex. */
|
|
|
|
mutex_enter(&(rseg->mutex));
|
|
|
|
page_no = trx_undo_add_page(trx, undo, &mtr);
|
|
|
|
mutex_exit(&(rseg->mutex));
|
|
|
|
if (page_no == FIL_NULL) {
|
|
/* Did not succeed: out of space */
|
|
|
|
mutex_exit(&(trx->undo_mutex));
|
|
mtr_commit(&mtr);
|
|
|
|
return(DB_OUT_OF_FILE_SPACE);
|
|
}
|
|
}
|
|
}
|
|
|
|
/*============== BUILDING PREVIOUS VERSION OF A RECORD ===============*/
|
|
|
|
/**********************************************************************
|
|
Copies an undo record to heap. This function can be called if we know that
|
|
the undo log record exists. */
|
|
|
|
trx_undo_rec_t*
|
|
trx_undo_get_undo_rec_low(
|
|
/*======================*/
|
|
/* out, own: copy of the record */
|
|
dulint roll_ptr, /* in: roll pointer to record */
|
|
mem_heap_t* heap) /* in: memory heap where copied */
|
|
{
|
|
trx_undo_rec_t* undo_rec;
|
|
ulint rseg_id;
|
|
ulint page_no;
|
|
ulint offset;
|
|
page_t* undo_page;
|
|
trx_rseg_t* rseg;
|
|
ibool is_insert;
|
|
mtr_t mtr;
|
|
|
|
trx_undo_decode_roll_ptr(roll_ptr, &is_insert, &rseg_id, &page_no,
|
|
&offset);
|
|
rseg = trx_rseg_get_on_id(rseg_id);
|
|
|
|
mtr_start(&mtr);
|
|
|
|
undo_page = trx_undo_page_get_s_latched(rseg->space, rseg->zip_size,
|
|
page_no, &mtr);
|
|
|
|
undo_rec = trx_undo_rec_copy(undo_page + offset, heap);
|
|
|
|
mtr_commit(&mtr);
|
|
|
|
return(undo_rec);
|
|
}
|
|
|
|
/**********************************************************************
|
|
Copies an undo record to heap. */
|
|
|
|
ulint
|
|
trx_undo_get_undo_rec(
|
|
/*==================*/
|
|
/* out: DB_SUCCESS, or
|
|
DB_MISSING_HISTORY if the undo log
|
|
has been truncated and we cannot
|
|
fetch the old version; NOTE: the
|
|
caller must have latches on the
|
|
clustered index page and purge_view */
|
|
dulint roll_ptr, /* in: roll pointer to record */
|
|
dulint trx_id, /* in: id of the trx that generated
|
|
the roll pointer: it points to an
|
|
undo log of this transaction */
|
|
trx_undo_rec_t** undo_rec, /* out, own: copy of the record */
|
|
mem_heap_t* heap) /* in: memory heap where copied */
|
|
{
|
|
#ifdef UNIV_SYNC_DEBUG
|
|
ut_ad(rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED));
|
|
#endif /* UNIV_SYNC_DEBUG */
|
|
|
|
if (!trx_purge_update_undo_must_exist(trx_id)) {
|
|
|
|
/* It may be that the necessary undo log has already been
|
|
deleted */
|
|
|
|
return(DB_MISSING_HISTORY);
|
|
}
|
|
|
|
*undo_rec = trx_undo_get_undo_rec_low(roll_ptr, heap);
|
|
|
|
return(DB_SUCCESS);
|
|
}
|
|
|
|
/***********************************************************************
|
|
Build a previous version of a clustered index record. This function checks
|
|
that the caller has a latch on the index page of the clustered index record
|
|
and an s-latch on the purge_view. This guarantees that the stack of versions
|
|
is locked. */
|
|
|
|
ulint
|
|
trx_undo_prev_version_build(
|
|
/*========================*/
|
|
/* out: DB_SUCCESS, or DB_MISSING_HISTORY if
|
|
the previous version is not >= purge_view,
|
|
which means that it may have been removed,
|
|
DB_ERROR if corrupted record */
|
|
rec_t* index_rec,/* in: clustered index record in the
|
|
index tree */
|
|
mtr_t* index_mtr __attribute__((unused)),
|
|
/* in: mtr which contains the latch to
|
|
index_rec page and purge_view */
|
|
rec_t* rec, /* in: version of a clustered index record */
|
|
dict_index_t* index, /* in: clustered index */
|
|
ulint* offsets,/* in: rec_get_offsets(rec, index) */
|
|
mem_heap_t* heap, /* in: memory heap from which the memory
|
|
needed is allocated */
|
|
rec_t** old_vers)/* out, own: previous version, or NULL if
|
|
rec is the first inserted version, or if
|
|
history data has been deleted */
|
|
{
|
|
trx_undo_rec_t* undo_rec;
|
|
dtuple_t* entry;
|
|
dulint rec_trx_id;
|
|
ulint type;
|
|
dulint undo_no;
|
|
dulint table_id;
|
|
dulint trx_id;
|
|
dulint roll_ptr;
|
|
dulint old_roll_ptr;
|
|
upd_t* update;
|
|
byte* ptr;
|
|
ulint info_bits;
|
|
ulint cmpl_info;
|
|
ibool dummy_extern;
|
|
byte* buf;
|
|
ulint err;
|
|
#ifdef UNIV_SYNC_DEBUG
|
|
ut_ad(rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED));
|
|
#endif /* UNIV_SYNC_DEBUG */
|
|
ut_ad(mtr_memo_contains_page(index_mtr, index_rec, MTR_MEMO_PAGE_S_FIX)
|
|
|| mtr_memo_contains_page(index_mtr, index_rec,
|
|
MTR_MEMO_PAGE_X_FIX));
|
|
ut_ad(rec_offs_validate(rec, index, offsets));
|
|
|
|
if (!dict_index_is_clust(index)) {
|
|
fprintf(stderr, "InnoDB: Error: trying to access"
|
|
" update undo rec for non-clustered index %s\n"
|
|
"InnoDB: Submit a detailed bug report to"
|
|
" http://bugs.mysql.com\n"
|
|
"InnoDB: index record ", index->name);
|
|
rec_print(stderr, index_rec, index);
|
|
fputs("\n"
|
|
"InnoDB: record version ", stderr);
|
|
rec_print_new(stderr, rec, offsets);
|
|
putc('\n', stderr);
|
|
return(DB_ERROR);
|
|
}
|
|
|
|
roll_ptr = row_get_rec_roll_ptr(rec, index, offsets);
|
|
old_roll_ptr = roll_ptr;
|
|
|
|
*old_vers = NULL;
|
|
|
|
if (trx_undo_roll_ptr_is_insert(roll_ptr)) {
|
|
|
|
/* The record rec is the first inserted version */
|
|
|
|
return(DB_SUCCESS);
|
|
}
|
|
|
|
rec_trx_id = row_get_rec_trx_id(rec, index, offsets);
|
|
|
|
err = trx_undo_get_undo_rec(roll_ptr, rec_trx_id, &undo_rec, heap);
|
|
|
|
if (err != DB_SUCCESS) {
|
|
|
|
return(err);
|
|
}
|
|
|
|
ptr = trx_undo_rec_get_pars(undo_rec, &type, &cmpl_info,
|
|
&dummy_extern, &undo_no, &table_id);
|
|
|
|
ptr = trx_undo_update_rec_get_sys_cols(ptr, &trx_id, &roll_ptr,
|
|
&info_bits);
|
|
ptr = trx_undo_rec_skip_row_ref(ptr, index);
|
|
|
|
ptr = trx_undo_update_rec_get_update(ptr, index, type, trx_id,
|
|
roll_ptr, info_bits,
|
|
NULL, heap, &update);
|
|
|
|
if (ut_dulint_cmp(table_id, index->table->id) != 0) {
|
|
ptr = NULL;
|
|
|
|
fprintf(stderr,
|
|
"InnoDB: Error: trying to access update undo rec"
|
|
" for table %s\n"
|
|
"InnoDB: but the table id in the"
|
|
" undo record is wrong\n"
|
|
"InnoDB: Submit a detailed bug report"
|
|
" to http://bugs.mysql.com\n"
|
|
"InnoDB: Run also CHECK TABLE %s\n",
|
|
index->table_name, index->table_name);
|
|
}
|
|
|
|
if (ptr == NULL) {
|
|
/* The record was corrupted, return an error; these printfs
|
|
should catch an elusive bug in row_vers_old_has_index_entry */
|
|
|
|
fprintf(stderr,
|
|
"InnoDB: table %s, index %s, n_uniq %lu\n"
|
|
"InnoDB: undo rec address %p, type %lu cmpl_info %lu\n"
|
|
"InnoDB: undo rec table id %lu %lu,"
|
|
" index table id %lu %lu\n"
|
|
"InnoDB: dump of 150 bytes in undo rec: ",
|
|
index->table_name, index->name,
|
|
(ulong) dict_index_get_n_unique(index),
|
|
undo_rec, (ulong) type, (ulong) cmpl_info,
|
|
(ulong) ut_dulint_get_high(table_id),
|
|
(ulong) ut_dulint_get_low(table_id),
|
|
(ulong) ut_dulint_get_high(index->table->id),
|
|
(ulong) ut_dulint_get_low(index->table->id));
|
|
ut_print_buf(stderr, undo_rec, 150);
|
|
fputs("\n"
|
|
"InnoDB: index record ", stderr);
|
|
rec_print(stderr, index_rec, index);
|
|
fputs("\n"
|
|
"InnoDB: record version ", stderr);
|
|
rec_print_new(stderr, rec, offsets);
|
|
fprintf(stderr, "\n"
|
|
"InnoDB: Record trx id %lu %lu, update rec"
|
|
" trx id %lu %lu\n"
|
|
"InnoDB: Roll ptr in rec %lu %lu, in update rec"
|
|
" %lu %lu\n",
|
|
(ulong) ut_dulint_get_high(rec_trx_id),
|
|
(ulong) ut_dulint_get_low(rec_trx_id),
|
|
(ulong) ut_dulint_get_high(trx_id),
|
|
(ulong) ut_dulint_get_low(trx_id),
|
|
(ulong) ut_dulint_get_high(old_roll_ptr),
|
|
(ulong) ut_dulint_get_low(old_roll_ptr),
|
|
(ulong) ut_dulint_get_high(roll_ptr),
|
|
(ulong) ut_dulint_get_low(roll_ptr));
|
|
|
|
trx_purge_sys_print();
|
|
return(DB_ERROR);
|
|
}
|
|
|
|
if (row_upd_changes_field_size_or_external(index, offsets, update)) {
|
|
ulint n_ext;
|
|
|
|
/* We have to set the appropriate extern storage bits in the
|
|
old version of the record: the extern bits in rec for those
|
|
fields that update does NOT update, as well as the the bits for
|
|
those fields that update updates to become externally stored
|
|
fields. Store the info: */
|
|
|
|
entry = row_rec_to_index_entry(ROW_COPY_DATA, index, rec,
|
|
heap);
|
|
n_ext = btr_push_update_extern_fields(entry, offsets, update);
|
|
row_upd_index_replace_new_col_vals(entry, index, update, heap);
|
|
|
|
buf = mem_heap_alloc(heap, rec_get_converted_size(index, entry,
|
|
n_ext));
|
|
|
|
*old_vers = rec_convert_dtuple_to_rec(buf, index,
|
|
entry, n_ext);
|
|
} else {
|
|
buf = mem_heap_alloc(heap, rec_offs_size(offsets));
|
|
*old_vers = rec_copy(buf, rec, offsets);
|
|
rec_offs_make_valid(*old_vers, index, offsets);
|
|
row_upd_rec_in_place(*old_vers, index, offsets, update, NULL);
|
|
}
|
|
|
|
return(DB_SUCCESS);
|
|
}
|