mariadb/trx/trx0rec.c

1602 lines
44 KiB
C
Raw Normal View History

/*****************************************************************************
Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
Place, Suite 330, Boston, MA 02111-1307 USA
*****************************************************************************/
/**************************************************//**
@file trx/trx0rec.c
2005-10-27 07:29:40 +00:00
Transaction undo log record
Created 3/26/1996 Heikki Tuuri
*******************************************************/
#include "trx0rec.h"
#ifdef UNIV_NONINL
#include "trx0rec.ic"
#endif
#include "fsp0fsp.h"
#include "mach0data.h"
#include "trx0undo.h"
#include "mtr0log.h"
#ifndef UNIV_HOTBACKUP
2005-10-27 07:29:40 +00:00
#include "dict0dict.h"
#include "ut0mem.h"
#include "row0ext.h"
2005-10-27 07:29:40 +00:00
#include "row0upd.h"
#include "que0que.h"
#include "trx0purge.h"
#include "trx0rseg.h"
2005-10-27 07:29:40 +00:00
#include "row0row.h"
/*=========== UNDO LOG RECORD CREATION AND DECODING ====================*/
/**********************************************************************//**
2005-10-27 07:29:40 +00:00
Writes the mtr log entry of the inserted undo log record on the undo log
page. */
UNIV_INLINE
void
trx_undof_page_add_undo_rec_log(
/*============================*/
page_t* undo_page, /*!< in: undo log page */
ulint old_free, /*!< in: start offset of the inserted entry */
ulint new_free, /*!< in: end offset of the entry */
mtr_t* mtr) /*!< in: mtr */
2005-10-27 07:29:40 +00:00
{
byte* log_ptr;
const byte* log_end;
ulint len;
log_ptr = mlog_open(mtr, 11 + 13 + MLOG_BUF_MARGIN);
if (log_ptr == NULL) {
return;
}
log_end = &log_ptr[11 + 13 + MLOG_BUF_MARGIN];
log_ptr = mlog_write_initial_log_record_fast(
undo_page, MLOG_UNDO_INSERT, log_ptr, mtr);
2005-10-27 07:29:40 +00:00
len = new_free - old_free - 4;
mach_write_to_2(log_ptr, len);
log_ptr += 2;
if (log_ptr + len <= log_end) {
memcpy(log_ptr, undo_page + old_free + 2, len);
mlog_close(mtr, log_ptr + len);
} else {
mlog_close(mtr, log_ptr);
mlog_catenate_string(mtr, undo_page + old_free + 2, len);
}
}
#endif /* !UNIV_HOTBACKUP */
2005-10-27 07:29:40 +00:00
/***********************************************************//**
Parses a redo log record of adding an undo log record.
@return end of log record or NULL */
UNIV_INTERN
2005-10-27 07:29:40 +00:00
byte*
trx_undo_parse_add_undo_rec(
/*========================*/
byte* ptr, /*!< in: buffer */
byte* end_ptr,/*!< in: buffer end */
page_t* page) /*!< in: page or NULL */
2005-10-27 07:29:40 +00:00
{
ulint len;
byte* rec;
ulint first_free;
if (end_ptr < ptr + 2) {
return(NULL);
}
len = mach_read_from_2(ptr);
ptr += 2;
if (end_ptr < ptr + len) {
return(NULL);
}
2005-10-27 07:29:40 +00:00
if (page == NULL) {
return(ptr + len);
}
2005-10-27 07:29:40 +00:00
first_free = mach_read_from_2(page + TRX_UNDO_PAGE_HDR
+ TRX_UNDO_PAGE_FREE);
2005-10-27 07:29:40 +00:00
rec = page + first_free;
2005-10-27 07:29:40 +00:00
mach_write_to_2(rec, first_free + 4 + len);
mach_write_to_2(rec + 2 + len, first_free);
mach_write_to_2(page + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE,
first_free + 4 + len);
2005-10-27 07:29:40 +00:00
ut_memcpy(rec + 2, ptr, len);
return(ptr + len);
}
#ifndef UNIV_HOTBACKUP
/**********************************************************************//**
Calculates the free space left for extending an undo log record.
@return bytes left */
2005-10-27 07:29:40 +00:00
UNIV_INLINE
ulint
trx_undo_left(
/*==========*/
const page_t* page, /*!< in: undo log page */
const byte* ptr) /*!< in: pointer to page */
2005-10-27 07:29:40 +00:00
{
/* The '- 10' is a safety margin, in case we have some small
calculation error below */
return(UNIV_PAGE_SIZE - (ptr - page) - 10 - FIL_PAGE_DATA_END);
}
/**********************************************************************//**
Set the next and previous pointers in the undo page for the undo record
that was written to ptr. Update the first free value by the number of bytes
written for this undo record.
@return offset of the inserted entry on the page if succeeded, 0 if fail */
static
ulint
trx_undo_page_set_next_prev_and_add(
/*================================*/
page_t* undo_page, /*!< in/out: undo log page */
byte* ptr, /*!< in: ptr up to where data has been
written on this undo page. */
mtr_t* mtr) /*!< in: mtr */
{
ulint first_free; /*!< offset within undo_page */
ulint end_of_rec; /*!< offset within undo_page */
byte* ptr_to_first_free;
/* pointer within undo_page
that points to the next free
offset value within undo_page.*/
ut_ad(ptr > undo_page);
ut_ad(ptr < undo_page + UNIV_PAGE_SIZE);
if (UNIV_UNLIKELY(trx_undo_left(undo_page, ptr) < 2)) {
return(0);
}
ptr_to_first_free = undo_page + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE;
first_free = mach_read_from_2(ptr_to_first_free);
/* Write offset of the previous undo log record */
mach_write_to_2(ptr, first_free);
ptr += 2;
end_of_rec = ptr - undo_page;
/* Write offset of the next undo log record */
mach_write_to_2(undo_page + first_free, end_of_rec);
/* Update the offset to first free undo record */
mach_write_to_2(ptr_to_first_free, end_of_rec);
/* Write this log entry to the UNDO log */
trx_undof_page_add_undo_rec_log(undo_page, first_free,
end_of_rec, mtr);
return(first_free);
}
/**********************************************************************//**
Reports in the undo log of an insert of a clustered index record.
@return offset of the inserted entry on the page if succeed, 0 if fail */
2005-10-27 07:29:40 +00:00
static
ulint
trx_undo_page_report_insert(
/*========================*/
page_t* undo_page, /*!< in: undo log page */
trx_t* trx, /*!< in: transaction */
dict_index_t* index, /*!< in: clustered index */
const dtuple_t* clust_entry, /*!< in: index entry which will be
2005-10-27 07:29:40 +00:00
inserted to the clustered index */
mtr_t* mtr) /*!< in: mtr */
2005-10-27 07:29:40 +00:00
{
ulint first_free;
byte* ptr;
ulint i;
ut_ad(dict_index_is_clust(index));
2005-10-27 07:29:40 +00:00
ut_ad(mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR
+ TRX_UNDO_PAGE_TYPE) == TRX_UNDO_INSERT);
2005-10-27 07:29:40 +00:00
first_free = mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR
+ TRX_UNDO_PAGE_FREE);
2005-10-27 07:29:40 +00:00
ptr = undo_page + first_free;
2005-10-27 07:29:40 +00:00
ut_ad(first_free <= UNIV_PAGE_SIZE);
if (trx_undo_left(undo_page, ptr) < 2 + 1 + 11 + 11) {
2005-10-27 07:29:40 +00:00
/* Not enough space for writing the general parameters */
2005-10-27 07:29:40 +00:00
return(0);
}
/* Reserve 2 bytes for the pointer to the next undo log record */
ptr += 2;
/* Store first some general parameters to the undo log */
*ptr++ = TRX_UNDO_INSERT_REC;
ptr += mach_dulint_write_much_compressed(ptr, trx->undo_no);
ptr += mach_dulint_write_much_compressed(ptr, index->table->id);
2005-10-27 07:29:40 +00:00
/*----------------------------------------*/
/* Store then the fields required to uniquely determine the record
to be inserted in the clustered index */
for (i = 0; i < dict_index_get_n_unique(index); i++) {
const dfield_t* field = dtuple_get_nth_field(clust_entry, i);
ulint flen = dfield_get_len(field);
2005-10-27 07:29:40 +00:00
if (trx_undo_left(undo_page, ptr) < 5) {
return(0);
}
ptr += mach_write_compressed(ptr, flen);
2005-10-27 07:29:40 +00:00
if (flen != UNIV_SQL_NULL) {
if (trx_undo_left(undo_page, ptr) < flen) {
return(0);
}
ut_memcpy(ptr, dfield_get_data(field), flen);
2005-10-27 07:29:40 +00:00
ptr += flen;
}
}
return(trx_undo_page_set_next_prev_and_add(undo_page, ptr, mtr));
}
2005-10-27 07:29:40 +00:00
/**********************************************************************//**
Reads from an undo log record the general parameters.
@return remaining part of undo log record after reading these values */
UNIV_INTERN
2005-10-27 07:29:40 +00:00
byte*
trx_undo_rec_get_pars(
/*==================*/
trx_undo_rec_t* undo_rec, /*!< in: undo log record */
ulint* type, /*!< out: undo record type:
2005-10-27 07:29:40 +00:00
TRX_UNDO_INSERT_REC, ... */
ulint* cmpl_info, /*!< out: compiler info, relevant only
2005-10-27 07:29:40 +00:00
for update type records */
ibool* updated_extern, /*!< out: TRUE if we updated an
2005-10-27 07:29:40 +00:00
externally stored fild */
undo_no_t* undo_no, /*!< out: undo log record number */
dulint* table_id) /*!< out: table id */
2005-10-27 07:29:40 +00:00
{
byte* ptr;
ulint type_cmpl;
ptr = undo_rec + 2;
type_cmpl = mach_read_from_1(ptr);
ptr++;
if (type_cmpl & TRX_UNDO_UPD_EXTERN) {
*updated_extern = TRUE;
type_cmpl -= TRX_UNDO_UPD_EXTERN;
} else {
*updated_extern = FALSE;
}
*type = type_cmpl & (TRX_UNDO_CMPL_INFO_MULT - 1);
*cmpl_info = type_cmpl / TRX_UNDO_CMPL_INFO_MULT;
*undo_no = mach_dulint_read_much_compressed(ptr);
ptr += mach_dulint_get_much_compressed_size(*undo_no);
2005-10-27 07:29:40 +00:00
*table_id = mach_dulint_read_much_compressed(ptr);
ptr += mach_dulint_get_much_compressed_size(*table_id);
2005-10-27 07:29:40 +00:00
return(ptr);
}
/**********************************************************************//**
Reads from an undo log record a stored column value.
@return remaining part of undo log record after reading these values */
2005-10-27 07:29:40 +00:00
static
byte*
trx_undo_rec_get_col_val(
/*=====================*/
byte* ptr, /*!< in: pointer to remaining part of undo log record */
byte** field, /*!< out: pointer to stored field */
ulint* len, /*!< out: length of the field, or UNIV_SQL_NULL */
ulint* orig_len)/*!< out: original length of the locally
stored part of an externally stored column, or 0 */
2005-10-27 07:29:40 +00:00
{
*len = mach_read_compressed(ptr);
2005-10-27 07:29:40 +00:00
ptr += mach_get_compressed_size(*len);
*orig_len = 0;
switch (*len) {
case UNIV_SQL_NULL:
*field = NULL;
break;
case UNIV_EXTERN_STORAGE_FIELD:
*orig_len = mach_read_compressed(ptr);
ptr += mach_get_compressed_size(*orig_len);
*len = mach_read_compressed(ptr);
ptr += mach_get_compressed_size(*len);
*field = ptr;
ptr += *len;
ut_ad(*orig_len >= BTR_EXTERN_FIELD_REF_SIZE);
ut_ad(*len > *orig_len);
ut_ad(*len >= REC_MAX_INDEX_COL_LEN
+ BTR_EXTERN_FIELD_REF_SIZE);
*len += UNIV_EXTERN_STORAGE_FIELD;
break;
default:
*field = ptr;
2005-10-27 07:29:40 +00:00
if (*len >= UNIV_EXTERN_STORAGE_FIELD) {
ptr += *len - UNIV_EXTERN_STORAGE_FIELD;
2005-10-27 07:29:40 +00:00
} else {
ptr += *len;
}
}
return(ptr);
}
/*******************************************************************//**
Builds a row reference from an undo log record.
@return pointer to remaining part of undo record */
UNIV_INTERN
2005-10-27 07:29:40 +00:00
byte*
trx_undo_rec_get_row_ref(
/*=====================*/
byte* ptr, /*!< in: remaining part of a copy of an undo log
2005-10-27 07:29:40 +00:00
record, at the start of the row reference;
NOTE that this copy of the undo log record must
be preserved as long as the row reference is
used, as we do NOT copy the data in the
record! */
dict_index_t* index, /*!< in: clustered index */
dtuple_t** ref, /*!< out, own: row reference */
mem_heap_t* heap) /*!< in: memory heap from which the memory
2005-10-27 07:29:40 +00:00
needed is allocated */
{
ulint ref_len;
ulint i;
2005-10-27 07:29:40 +00:00
ut_ad(index && ptr && ref && heap);
ut_a(dict_index_is_clust(index));
2005-10-27 07:29:40 +00:00
ref_len = dict_index_get_n_unique(index);
*ref = dtuple_create(heap, ref_len);
dict_index_copy_types(*ref, index, ref_len);
for (i = 0; i < ref_len; i++) {
dfield_t* dfield;
byte* field;
ulint len;
ulint orig_len;
dfield = dtuple_get_nth_field(*ref, i);
2005-10-27 07:29:40 +00:00
ptr = trx_undo_rec_get_col_val(ptr, &field, &len, &orig_len);
2005-10-27 07:29:40 +00:00
dfield_set_data(dfield, field, len);
}
return(ptr);
}
2005-10-27 07:29:40 +00:00
/*******************************************************************//**
Skips a row reference from an undo log record.
@return pointer to remaining part of undo record */
UNIV_INTERN
2005-10-27 07:29:40 +00:00
byte*
trx_undo_rec_skip_row_ref(
/*======================*/
byte* ptr, /*!< in: remaining part in update undo log
2005-10-27 07:29:40 +00:00
record, at the start of the row reference */
dict_index_t* index) /*!< in: clustered index */
2005-10-27 07:29:40 +00:00
{
ulint ref_len;
ulint i;
2005-10-27 07:29:40 +00:00
ut_ad(index && ptr);
ut_a(dict_index_is_clust(index));
2005-10-27 07:29:40 +00:00
ref_len = dict_index_get_n_unique(index);
for (i = 0; i < ref_len; i++) {
byte* field;
ulint len;
ulint orig_len;
ptr = trx_undo_rec_get_col_val(ptr, &field, &len, &orig_len);
2005-10-27 07:29:40 +00:00
}
return(ptr);
}
2005-10-27 07:29:40 +00:00
/**********************************************************************//**
Fetch a prefix of an externally stored column, for writing to the undo log
of an update or delete marking of a clustered index record.
@return ext_buf */
static
byte*
trx_undo_page_fetch_ext(
/*====================*/
byte* ext_buf, /*!< in: a buffer of
REC_MAX_INDEX_COL_LEN
+ BTR_EXTERN_FIELD_REF_SIZE */
ulint zip_size, /*!< compressed page size in bytes,
or 0 for uncompressed BLOB */
const byte* field, /*!< in: an externally stored column */
ulint* len) /*!< in: length of field;
out: used length of ext_buf */
{
/* Fetch the BLOB. */
ulint ext_len = btr_copy_externally_stored_field_prefix(
ext_buf, REC_MAX_INDEX_COL_LEN, zip_size, field, *len);
/* BLOBs should always be nonempty. */
ut_a(ext_len);
/* Append the BLOB pointer to the prefix. */
memcpy(ext_buf + ext_len,
field + *len - BTR_EXTERN_FIELD_REF_SIZE,
BTR_EXTERN_FIELD_REF_SIZE);
*len = ext_len + BTR_EXTERN_FIELD_REF_SIZE;
return(ext_buf);
}
/**********************************************************************//**
Writes to the undo log a prefix of an externally stored column.
@return undo log position */
static
byte*
trx_undo_page_report_modify_ext(
/*============================*/
byte* ptr, /*!< in: undo log position,
at least 15 bytes must be available */
byte* ext_buf, /*!< in: a buffer of
REC_MAX_INDEX_COL_LEN
+ BTR_EXTERN_FIELD_REF_SIZE,
or NULL when should not fetch
a longer prefix */
ulint zip_size, /*!< compressed page size in bytes,
or 0 for uncompressed BLOB */
const byte** field, /*!< in/out: the locally stored part of
the externally stored column */
ulint* len) /*!< in/out: length of field, in bytes */
{
if (ext_buf) {
/* If an ordering column is externally stored, we will
have to store a longer prefix of the field. In this
case, write to the log a marker followed by the
original length and the real length of the field. */
ptr += mach_write_compressed(ptr, UNIV_EXTERN_STORAGE_FIELD);
ptr += mach_write_compressed(ptr, *len);
*field = trx_undo_page_fetch_ext(ext_buf, zip_size,
*field, len);
ptr += mach_write_compressed(ptr, *len);
} else {
ptr += mach_write_compressed(ptr, UNIV_EXTERN_STORAGE_FIELD
+ *len);
}
return(ptr);
}
/**********************************************************************//**
2005-10-27 07:29:40 +00:00
Reports in the undo log of an update or delete marking of a clustered index
record.
@return byte offset of the inserted undo log entry on the page if
succeed, 0 if fail */
2005-10-27 07:29:40 +00:00
static
ulint
trx_undo_page_report_modify(
/*========================*/
page_t* undo_page, /*!< in: undo log page */
trx_t* trx, /*!< in: transaction */
dict_index_t* index, /*!< in: clustered index where update or
2005-10-27 07:29:40 +00:00
delete marking is done */
const rec_t* rec, /*!< in: clustered index record which
2005-10-27 07:29:40 +00:00
has NOT yet been modified */
const ulint* offsets, /*!< in: rec_get_offsets(rec, index) */
const upd_t* update, /*!< in: update vector which tells the
2005-10-27 07:29:40 +00:00
columns to be updated; in the case of
a delete, this should be set to NULL */
ulint cmpl_info, /*!< in: compiler info on secondary
2005-10-27 07:29:40 +00:00
index updates */
mtr_t* mtr) /*!< in: mtr */
2005-10-27 07:29:40 +00:00
{
dict_table_t* table;
ulint first_free;
byte* ptr;
const byte* field;
2005-10-27 07:29:40 +00:00
ulint flen;
ulint col_no;
ulint type_cmpl;
byte* type_cmpl_ptr;
ulint i;
trx_id_t trx_id;
ibool ignore_prefix = FALSE;
byte ext_buf[REC_MAX_INDEX_COL_LEN
+ BTR_EXTERN_FIELD_REF_SIZE];
ut_a(dict_index_is_clust(index));
2005-10-27 07:29:40 +00:00
ut_ad(rec_offs_validate(rec, index, offsets));
ut_ad(mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR
+ TRX_UNDO_PAGE_TYPE) == TRX_UNDO_UPDATE);
2005-10-27 07:29:40 +00:00
table = index->table;
2005-10-27 07:29:40 +00:00
first_free = mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR
+ TRX_UNDO_PAGE_FREE);
2005-10-27 07:29:40 +00:00
ptr = undo_page + first_free;
2005-10-27 07:29:40 +00:00
ut_ad(first_free <= UNIV_PAGE_SIZE);
if (trx_undo_left(undo_page, ptr) < 50) {
/* NOTE: the value 50 must be big enough so that the general
fields written below fit on the undo log page */
return(0);
}
/* Reserve 2 bytes for the pointer to the next undo log record */
ptr += 2;
/* Store first some general parameters to the undo log */
if (!update) {
2005-10-27 07:29:40 +00:00
type_cmpl = TRX_UNDO_DEL_MARK_REC;
} else if (rec_get_deleted_flag(rec, dict_table_is_comp(table))) {
type_cmpl = TRX_UNDO_UPD_DEL_REC;
/* We are about to update a delete marked record.
We don't typically need the prefix in this case unless
the delete marking is done by the same transaction
(which we check below). */
ignore_prefix = TRUE;
} else {
type_cmpl = TRX_UNDO_UPD_EXIST_REC;
2005-10-27 07:29:40 +00:00
}
type_cmpl |= cmpl_info * TRX_UNDO_CMPL_INFO_MULT;
2005-10-27 07:29:40 +00:00
type_cmpl_ptr = ptr;
branches/zip: Fix most MSVC (Windows) compilation warnings. lock_get_table(), locks_row_eq_lock(), buf_page_get_mutex(): Add return after ut_error. On Windows, ut_error is not declared as "noreturn". Add explicit type casts when assigning ulint to byte to get rid of "possible loss of precision" warnings. struct i_s_table_cache_struct: Declare rows_used, rows_allocd as ulint instead of ullint. 32 bits should be enough. fill_innodb_trx_from_cache(), i_s_zip_fill_low(): Cast 64-bit unsigned integers to longlong when calling Field::store(longlong, bool is_unsigned). Otherwise, the compiler would implicitly convert them to double and invoke Field::store(double) instead. recv_truncate_group(), recv_copy_group(), recv_calc_lsn_on_data_add(): Cast ib_uint64_t expressions to ulint to get rid of "possible loss of precision" warnings. (There should not be any loss of precision in these cases.) log_close(), log_checkpoint_margin(): Declare some variables as ib_uint64_t instead of ulint, so that there won't be any potential loss of precision. mach_write_ull(): Cast the second argument of mach_write_to_4() to ulint. OS_FILE_FROM_FD(): Cast the return value of _get_osfhandle() to HANDLE. row_merge_dict_table_get_index(): Cast the parameter of mem_free() to (void*) in order to get rid of the bogus MSVC warning C4090, which has been reported as MSVC bug 101661: <http://connect.microsoft.com/VisualStudio/feedback/ViewFeedback.aspx?FeedbackID=101661> row_mysql_read_blob_ref(): To get rid of a bogus MSVC warning C4090, drop a const qualifier.
2008-03-04 08:57:07 +00:00
*ptr++ = (byte) type_cmpl;
ptr += mach_dulint_write_much_compressed(ptr, trx->undo_no);
2005-10-27 07:29:40 +00:00
ptr += mach_dulint_write_much_compressed(ptr, table->id);
2005-10-27 07:29:40 +00:00
/*----------------------------------------*/
/* Store the state of the info bits */
branches/zip: Fix most MSVC (Windows) compilation warnings. lock_get_table(), locks_row_eq_lock(), buf_page_get_mutex(): Add return after ut_error. On Windows, ut_error is not declared as "noreturn". Add explicit type casts when assigning ulint to byte to get rid of "possible loss of precision" warnings. struct i_s_table_cache_struct: Declare rows_used, rows_allocd as ulint instead of ullint. 32 bits should be enough. fill_innodb_trx_from_cache(), i_s_zip_fill_low(): Cast 64-bit unsigned integers to longlong when calling Field::store(longlong, bool is_unsigned). Otherwise, the compiler would implicitly convert them to double and invoke Field::store(double) instead. recv_truncate_group(), recv_copy_group(), recv_calc_lsn_on_data_add(): Cast ib_uint64_t expressions to ulint to get rid of "possible loss of precision" warnings. (There should not be any loss of precision in these cases.) log_close(), log_checkpoint_margin(): Declare some variables as ib_uint64_t instead of ulint, so that there won't be any potential loss of precision. mach_write_ull(): Cast the second argument of mach_write_to_4() to ulint. OS_FILE_FROM_FD(): Cast the return value of _get_osfhandle() to HANDLE. row_merge_dict_table_get_index(): Cast the parameter of mem_free() to (void*) in order to get rid of the bogus MSVC warning C4090, which has been reported as MSVC bug 101661: <http://connect.microsoft.com/VisualStudio/feedback/ViewFeedback.aspx?FeedbackID=101661> row_mysql_read_blob_ref(): To get rid of a bogus MSVC warning C4090, drop a const qualifier.
2008-03-04 08:57:07 +00:00
*ptr++ = (byte) rec_get_info_bits(rec, dict_table_is_comp(table));
2005-10-27 07:29:40 +00:00
/* Store the values of the system columns */
field = rec_get_nth_field(rec, offsets,
dict_index_get_sys_col_pos(
index, DATA_TRX_ID), &flen);
ut_ad(flen == DATA_TRX_ID_LEN);
trx_id = trx_read_trx_id(field);
/* If it is an update of a delete marked record, then we are
allowed to ignore blob prefixes if the delete marking was done
by some other trx as it must have committed by now for us to
allow an over-write. */
if (ignore_prefix) {
ignore_prefix = ut_dulint_cmp(trx_id, trx->id) != 0;
}
ptr += mach_dulint_write_compressed(ptr, trx_id);
2005-10-27 07:29:40 +00:00
field = rec_get_nth_field(rec, offsets,
dict_index_get_sys_col_pos(
index, DATA_ROLL_PTR), &flen);
ut_ad(flen == DATA_ROLL_PTR_LEN);
2005-10-27 07:29:40 +00:00
ptr += mach_dulint_write_compressed(ptr, trx_read_roll_ptr(field));
2005-10-27 07:29:40 +00:00
/*----------------------------------------*/
/* Store then the fields required to uniquely determine the
record which will be modified in the clustered index */
for (i = 0; i < dict_index_get_n_unique(index); i++) {
field = rec_get_nth_field(rec, offsets, i, &flen);
/* The ordering columns must not be stored externally. */
ut_ad(!rec_offs_nth_extern(offsets, i));
ut_ad(dict_index_get_nth_col(index, i)->ord_part);
if (trx_undo_left(undo_page, ptr) < 5) {
2005-10-27 07:29:40 +00:00
return(0);
}
ptr += mach_write_compressed(ptr, flen);
2005-10-27 07:29:40 +00:00
if (flen != UNIV_SQL_NULL) {
if (trx_undo_left(undo_page, ptr) < flen) {
return(0);
}
ut_memcpy(ptr, field, flen);
ptr += flen;
}
}
/*----------------------------------------*/
/* Save to the undo log the old values of the columns to be updated. */
if (update) {
if (trx_undo_left(undo_page, ptr) < 5) {
2005-10-27 07:29:40 +00:00
return(0);
}
2005-10-27 07:29:40 +00:00
ptr += mach_write_compressed(ptr, upd_get_n_fields(update));
2005-10-27 07:29:40 +00:00
for (i = 0; i < upd_get_n_fields(update); i++) {
2005-10-27 07:29:40 +00:00
ulint pos = upd_get_nth_field(update, i)->field_no;
2005-10-27 07:29:40 +00:00
/* Write field number to undo log */
if (trx_undo_left(undo_page, ptr) < 5) {
2005-10-27 07:29:40 +00:00
return(0);
}
2005-10-27 07:29:40 +00:00
ptr += mach_write_compressed(ptr, pos);
2005-10-27 07:29:40 +00:00
/* Save the old value of field */
field = rec_get_nth_field(rec, offsets, pos, &flen);
2005-10-27 07:29:40 +00:00
if (trx_undo_left(undo_page, ptr) < 15) {
2005-10-27 07:29:40 +00:00
return(0);
}
2005-10-27 07:29:40 +00:00
if (rec_offs_nth_extern(offsets, pos)) {
ptr = trx_undo_page_report_modify_ext(
ptr,
dict_index_get_nth_col(index, pos)
->ord_part
&& !ignore_prefix
&& flen < REC_MAX_INDEX_COL_LEN
? ext_buf : NULL,
dict_table_zip_size(table),
&field, &flen);
2005-10-27 07:29:40 +00:00
/* Notify purge that it eventually has to
free the old externally stored field */
2005-10-27 07:29:40 +00:00
trx->update_undo->del_marks = TRUE;
2005-10-27 07:29:40 +00:00
*type_cmpl_ptr |= TRX_UNDO_UPD_EXTERN;
} else {
ptr += mach_write_compressed(ptr, flen);
}
2005-10-27 07:29:40 +00:00
if (flen != UNIV_SQL_NULL) {
if (trx_undo_left(undo_page, ptr) < flen) {
2005-10-27 07:29:40 +00:00
return(0);
}
ut_memcpy(ptr, field, flen);
ptr += flen;
}
2005-10-27 07:29:40 +00:00
}
}
2005-10-27 07:29:40 +00:00
/*----------------------------------------*/
/* In the case of a delete marking, and also in the case of an update
where any ordering field of any index changes, store the values of all
columns which occur as ordering fields in any index. This info is used
in the purge of old versions where we use it to build and search the
delete marked index records, to look if we can remove them from the
index tree. Note that starting from 4.0.14 also externally stored
fields can be ordering in some index. Starting from 5.2, we no longer
store REC_MAX_INDEX_COL_LEN first bytes to the undo log record,
but we can construct the column prefix fields in the index by
fetching the first page of the BLOB that is pointed to by the
clustered index. This works also in crash recovery, because all pages
(including BLOBs) are recovered before anything is rolled back. */
2005-10-27 07:29:40 +00:00
if (!update || !(cmpl_info & UPD_NODE_NO_ORD_CHANGE)) {
byte* old_ptr = ptr;
2005-10-27 07:29:40 +00:00
trx->update_undo->del_marks = TRUE;
2005-10-27 07:29:40 +00:00
if (trx_undo_left(undo_page, ptr) < 5) {
2005-10-27 07:29:40 +00:00
return(0);
}
2005-10-27 07:29:40 +00:00
/* Reserve 2 bytes to write the number of bytes the stored
fields take in this undo record */
2005-10-27 07:29:40 +00:00
ptr += 2;
2005-10-27 07:29:40 +00:00
for (col_no = 0; col_no < dict_table_get_n_cols(table);
col_no++) {
2005-10-27 07:29:40 +00:00
const dict_col_t* col
= dict_table_get_nth_col(table, col_no);
2005-10-27 07:29:40 +00:00
if (col->ord_part) {
ulint pos;
2005-10-27 07:29:40 +00:00
/* Write field number to undo log */
if (trx_undo_left(undo_page, ptr) < 5 + 15) {
2005-10-27 07:29:40 +00:00
return(0);
}
pos = dict_index_get_nth_col_pos(index,
col_no);
ptr += mach_write_compressed(ptr, pos);
/* Save the old value of field */
field = rec_get_nth_field(rec, offsets, pos,
&flen);
if (rec_offs_nth_extern(offsets, pos)) {
ptr = trx_undo_page_report_modify_ext(
ptr,
flen < REC_MAX_INDEX_COL_LEN
&& !ignore_prefix
? ext_buf : NULL,
dict_table_zip_size(table),
&field, &flen);
} else {
ptr += mach_write_compressed(
ptr, flen);
}
if (flen != UNIV_SQL_NULL) {
if (trx_undo_left(undo_page, ptr)
< flen) {
return(0);
}
ut_memcpy(ptr, field, flen);
ptr += flen;
}
2005-10-27 07:29:40 +00:00
}
}
mach_write_to_2(old_ptr, ptr - old_ptr);
}
2005-10-27 07:29:40 +00:00
/*----------------------------------------*/
/* Write pointers to the previous and the next undo log records */
if (trx_undo_left(undo_page, ptr) < 2) {
return(0);
}
2005-10-27 07:29:40 +00:00
mach_write_to_2(ptr, first_free);
ptr += 2;
mach_write_to_2(undo_page + first_free, ptr - undo_page);
mach_write_to_2(undo_page + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE,
ptr - undo_page);
2005-10-27 07:29:40 +00:00
/* Write to the REDO log about this change in the UNDO log */
trx_undof_page_add_undo_rec_log(undo_page, first_free,
ptr - undo_page, mtr);
return(first_free);
2005-10-27 07:29:40 +00:00
}
/**********************************************************************//**
2005-10-27 07:29:40 +00:00
Reads from an undo log update record the system field values of the old
version.
@return remaining part of undo log record after reading these values */
UNIV_INTERN
2005-10-27 07:29:40 +00:00
byte*
trx_undo_update_rec_get_sys_cols(
/*=============================*/
byte* ptr, /*!< in: remaining part of undo
log record after reading
general parameters */
trx_id_t* trx_id, /*!< out: trx id */
roll_ptr_t* roll_ptr, /*!< out: roll ptr */
ulint* info_bits) /*!< out: info bits state */
2005-10-27 07:29:40 +00:00
{
/* Read the state of the info bits */
*info_bits = mach_read_from_1(ptr);
ptr += 1;
/* Read the values of the system columns */
*trx_id = mach_dulint_read_compressed(ptr);
ptr += mach_dulint_get_compressed_size(*trx_id);
2005-10-27 07:29:40 +00:00
*roll_ptr = mach_dulint_read_compressed(ptr);
ptr += mach_dulint_get_compressed_size(*roll_ptr);
2005-10-27 07:29:40 +00:00
return(ptr);
}
/**********************************************************************//**
Reads from an update undo log record the number of updated fields.
@return remaining part of undo log record after reading this value */
2005-10-27 07:29:40 +00:00
UNIV_INLINE
byte*
trx_undo_update_rec_get_n_upd_fields(
/*=================================*/
byte* ptr, /*!< in: pointer to remaining part of undo log record */
ulint* n) /*!< out: number of fields */
2005-10-27 07:29:40 +00:00
{
*n = mach_read_compressed(ptr);
2005-10-27 07:29:40 +00:00
ptr += mach_get_compressed_size(*n);
return(ptr);
}
/**********************************************************************//**
Reads from an update undo log record a stored field number.
@return remaining part of undo log record after reading this value */
2005-10-27 07:29:40 +00:00
UNIV_INLINE
byte*
trx_undo_update_rec_get_field_no(
/*=============================*/
byte* ptr, /*!< in: pointer to remaining part of undo log record */
ulint* field_no)/*!< out: field number */
2005-10-27 07:29:40 +00:00
{
*field_no = mach_read_compressed(ptr);
2005-10-27 07:29:40 +00:00
ptr += mach_get_compressed_size(*field_no);
return(ptr);
}
/*******************************************************************//**
Builds an update vector based on a remaining part of an undo log record.
@return remaining part of the record, NULL if an error detected, which
means that the record is corrupted */
UNIV_INTERN
2005-10-27 07:29:40 +00:00
byte*
trx_undo_update_rec_get_update(
/*===========================*/
byte* ptr, /*!< in: remaining part in update undo log
2005-10-27 07:29:40 +00:00
record, after reading the row reference
NOTE that this copy of the undo log record must
be preserved as long as the update vector is
used, as we do NOT copy the data in the
record! */
dict_index_t* index, /*!< in: clustered index */
ulint type, /*!< in: TRX_UNDO_UPD_EXIST_REC,
2005-10-27 07:29:40 +00:00
TRX_UNDO_UPD_DEL_REC, or
TRX_UNDO_DEL_MARK_REC; in the last case,
only trx id and roll ptr fields are added to
the update vector */
trx_id_t trx_id, /*!< in: transaction id from this undo record */
roll_ptr_t roll_ptr,/*!< in: roll pointer from this undo record */
ulint info_bits,/*!< in: info bits from this undo record */
trx_t* trx, /*!< in: transaction */
mem_heap_t* heap, /*!< in: memory heap from which the memory
2005-10-27 07:29:40 +00:00
needed is allocated */
upd_t** upd) /*!< out, own: update vector */
2005-10-27 07:29:40 +00:00
{
upd_field_t* upd_field;
upd_t* update;
ulint n_fields;
byte* buf;
ulint i;
ut_a(dict_index_is_clust(index));
2005-10-27 07:29:40 +00:00
if (type != TRX_UNDO_DEL_MARK_REC) {
ptr = trx_undo_update_rec_get_n_upd_fields(ptr, &n_fields);
} else {
n_fields = 0;
}
update = upd_create(n_fields + 2, heap);
update->info_bits = info_bits;
/* Store first trx id and roll ptr to update vector */
upd_field = upd_get_nth_field(update, n_fields);
buf = mem_heap_alloc(heap, DATA_TRX_ID_LEN);
trx_write_trx_id(buf, trx_id);
2005-10-27 07:29:40 +00:00
upd_field_set_field_no(upd_field,
dict_index_get_sys_col_pos(index, DATA_TRX_ID),
index, trx);
2005-10-27 07:29:40 +00:00
dfield_set_data(&(upd_field->new_val), buf, DATA_TRX_ID_LEN);
upd_field = upd_get_nth_field(update, n_fields + 1);
buf = mem_heap_alloc(heap, DATA_ROLL_PTR_LEN);
trx_write_roll_ptr(buf, roll_ptr);
2005-10-27 07:29:40 +00:00
upd_field_set_field_no(
upd_field, dict_index_get_sys_col_pos(index, DATA_ROLL_PTR),
index, trx);
2005-10-27 07:29:40 +00:00
dfield_set_data(&(upd_field->new_val), buf, DATA_ROLL_PTR_LEN);
2005-10-27 07:29:40 +00:00
/* Store then the updated ordinary columns to the update vector */
for (i = 0; i < n_fields; i++) {
byte* field;
ulint len;
ulint field_no;
ulint orig_len;
2005-10-27 07:29:40 +00:00
ptr = trx_undo_update_rec_get_field_no(ptr, &field_no);
if (field_no >= dict_index_get_n_fields(index)) {
fprintf(stderr,
"InnoDB: Error: trying to access"
" update undo rec field %lu in ",
(ulong) field_no);
2005-10-27 07:29:40 +00:00
dict_index_name_print(stderr, trx, index);
fprintf(stderr, "\n"
"InnoDB: but index has only %lu fields\n"
"InnoDB: Submit a detailed bug report"
" to http://bugs.mysql.com\n"
"InnoDB: Run also CHECK TABLE ",
2005-10-27 07:29:40 +00:00
(ulong) dict_index_get_n_fields(index));
ut_print_name(stderr, trx, TRUE, index->table_name);
2005-10-27 07:29:40 +00:00
fprintf(stderr, "\n"
"InnoDB: n_fields = %lu, i = %lu, ptr %p\n",
2005-10-27 07:29:40 +00:00
(ulong) n_fields, (ulong) i, ptr);
return(NULL);
}
upd_field = upd_get_nth_field(update, i);
upd_field_set_field_no(upd_field, field_no, index, trx);
ptr = trx_undo_rec_get_col_val(ptr, &field, &len, &orig_len);
2005-10-27 07:29:40 +00:00
upd_field->orig_len = orig_len;
if (len == UNIV_SQL_NULL) {
dfield_set_null(&upd_field->new_val);
} else if (len < UNIV_EXTERN_STORAGE_FIELD) {
dfield_set_data(&upd_field->new_val, field, len);
} else {
len -= UNIV_EXTERN_STORAGE_FIELD;
dfield_set_data(&upd_field->new_val, field, len);
dfield_set_ext(&upd_field->new_val);
2005-10-27 07:29:40 +00:00
}
}
*upd = update;
return(ptr);
}
/*******************************************************************//**
2005-10-27 07:29:40 +00:00
Builds a partial row from an update undo log record. It contains the
columns which occur as ordering in any index of the table.
@return pointer to remaining part of undo record */
UNIV_INTERN
2005-10-27 07:29:40 +00:00
byte*
trx_undo_rec_get_partial_row(
/*=========================*/
byte* ptr, /*!< in: remaining part in update undo log
2005-10-27 07:29:40 +00:00
record of a suitable type, at the start of
the stored index columns;
NOTE that this copy of the undo log record must
be preserved as long as the partial row is
used, as we do NOT copy the data in the
record! */
dict_index_t* index, /*!< in: clustered index */
dtuple_t** row, /*!< out, own: partial row */
ibool ignore_prefix, /*!< in: flag to indicate if we
expect blob prefixes in undo. Used
only in the assertion. */
mem_heap_t* heap) /*!< in: memory heap from which the memory
2005-10-27 07:29:40 +00:00
needed is allocated */
{
const byte* end_ptr;
2005-10-27 07:29:40 +00:00
ulint row_len;
ut_ad(index);
ut_ad(ptr);
ut_ad(row);
ut_ad(heap);
ut_ad(dict_index_is_clust(index));
2005-10-27 07:29:40 +00:00
row_len = dict_table_get_n_cols(index->table);
*row = dtuple_create(heap, row_len);
dict_table_copy_types(*row, index->table);
end_ptr = ptr + mach_read_from_2(ptr);
2005-10-27 07:29:40 +00:00
ptr += 2;
while (ptr != end_ptr) {
dfield_t* dfield;
byte* field;
ulint field_no;
const dict_col_t* col;
ulint col_no;
ulint len;
ulint orig_len;
2005-10-27 07:29:40 +00:00
ptr = trx_undo_update_rec_get_field_no(ptr, &field_no);
col = dict_index_get_nth_col(index, field_no);
col_no = dict_col_get_no(col);
ptr = trx_undo_rec_get_col_val(ptr, &field, &len, &orig_len);
2005-10-27 07:29:40 +00:00
dfield = dtuple_get_nth_field(*row, col_no);
2005-10-27 07:29:40 +00:00
dfield_set_data(dfield, field, len);
branches/zip: Initialize dfield_t::ext as soon as possible. This should fix the bugs introduced in r1591. row_rec_to_index_entry_low(): Clear "n_ext". Do not allow it to be NULL. Add const qualifier to dict_index_t*. row_rec_to_index_entry(): Add the parameters "offsets" and "n_ext". btr_cur_optimistic_update(): Add an assertion that there are no externally stored columns. Remove the unreachable call to btr_cur_unmark_extern_fields() and the preceding unnecessary call to rec_get_offsets(). btr_push_update_extern_fields(): Remove the parameters index, offsets. Only report the additional externally stored columns of the update vector. row_build(), trx_undo_rec_get_partial_row(): Flag externally stored columns also with dfield_set_ext(). rec_copy_prefix_to_dtuple(): Assert that there are no externally stored columns in the prefix. row_build_row_ref(): Note and assert that the index is a secondary index, and assert that there are no externally stored columns. row_build_row_ref_fast(): Assert that there are no externally stored columns. rec_offs_get_n_alloc(): Expose the function. row_build_row_ref_in_tuple(): Assert that there are no externally stored columns in a record of a secondary index. row_build_row_ref_from_row(): Assert that there are no externally stored columns. row_upd_check_references_constraints(): Add the parameter offsets, to avoid a redundant call to rec_get_offsets(). row_upd_del_mark_clust_rec(): Add the parameter offsets. Remove duplicated code. row_ins_index_entry_set_vals(): Copy the external storage flag. sel_pop_prefetched_row(): Assert that there are no externally stored columns. row_scan_and_check_index(): Copy offsets to a temporary heap across the invocation of row_rec_to_index_entry().
2007-10-17 12:13:29 +00:00
if (len != UNIV_SQL_NULL
&& len >= UNIV_EXTERN_STORAGE_FIELD) {
dfield_set_len(dfield,
len - UNIV_EXTERN_STORAGE_FIELD);
branches/zip: Initialize dfield_t::ext as soon as possible. This should fix the bugs introduced in r1591. row_rec_to_index_entry_low(): Clear "n_ext". Do not allow it to be NULL. Add const qualifier to dict_index_t*. row_rec_to_index_entry(): Add the parameters "offsets" and "n_ext". btr_cur_optimistic_update(): Add an assertion that there are no externally stored columns. Remove the unreachable call to btr_cur_unmark_extern_fields() and the preceding unnecessary call to rec_get_offsets(). btr_push_update_extern_fields(): Remove the parameters index, offsets. Only report the additional externally stored columns of the update vector. row_build(), trx_undo_rec_get_partial_row(): Flag externally stored columns also with dfield_set_ext(). rec_copy_prefix_to_dtuple(): Assert that there are no externally stored columns in the prefix. row_build_row_ref(): Note and assert that the index is a secondary index, and assert that there are no externally stored columns. row_build_row_ref_fast(): Assert that there are no externally stored columns. rec_offs_get_n_alloc(): Expose the function. row_build_row_ref_in_tuple(): Assert that there are no externally stored columns in a record of a secondary index. row_build_row_ref_from_row(): Assert that there are no externally stored columns. row_upd_check_references_constraints(): Add the parameter offsets, to avoid a redundant call to rec_get_offsets(). row_upd_del_mark_clust_rec(): Add the parameter offsets. Remove duplicated code. row_ins_index_entry_set_vals(): Copy the external storage flag. sel_pop_prefetched_row(): Assert that there are no externally stored columns. row_scan_and_check_index(): Copy offsets to a temporary heap across the invocation of row_rec_to_index_entry().
2007-10-17 12:13:29 +00:00
dfield_set_ext(dfield);
/* If the prefix of this column is indexed,
ensure that enough prefix is stored in the
undo log record. */
ut_a(ignore_prefix
|| !col->ord_part
|| dfield_get_len(dfield)
>= REC_MAX_INDEX_COL_LEN
+ BTR_EXTERN_FIELD_REF_SIZE);
branches/zip: Initialize dfield_t::ext as soon as possible. This should fix the bugs introduced in r1591. row_rec_to_index_entry_low(): Clear "n_ext". Do not allow it to be NULL. Add const qualifier to dict_index_t*. row_rec_to_index_entry(): Add the parameters "offsets" and "n_ext". btr_cur_optimistic_update(): Add an assertion that there are no externally stored columns. Remove the unreachable call to btr_cur_unmark_extern_fields() and the preceding unnecessary call to rec_get_offsets(). btr_push_update_extern_fields(): Remove the parameters index, offsets. Only report the additional externally stored columns of the update vector. row_build(), trx_undo_rec_get_partial_row(): Flag externally stored columns also with dfield_set_ext(). rec_copy_prefix_to_dtuple(): Assert that there are no externally stored columns in the prefix. row_build_row_ref(): Note and assert that the index is a secondary index, and assert that there are no externally stored columns. row_build_row_ref_fast(): Assert that there are no externally stored columns. rec_offs_get_n_alloc(): Expose the function. row_build_row_ref_in_tuple(): Assert that there are no externally stored columns in a record of a secondary index. row_build_row_ref_from_row(): Assert that there are no externally stored columns. row_upd_check_references_constraints(): Add the parameter offsets, to avoid a redundant call to rec_get_offsets(). row_upd_del_mark_clust_rec(): Add the parameter offsets. Remove duplicated code. row_ins_index_entry_set_vals(): Copy the external storage flag. sel_pop_prefetched_row(): Assert that there are no externally stored columns. row_scan_and_check_index(): Copy offsets to a temporary heap across the invocation of row_rec_to_index_entry().
2007-10-17 12:13:29 +00:00
}
2005-10-27 07:29:40 +00:00
}
return(ptr);
}
#endif /* !UNIV_HOTBACKUP */
2005-10-27 07:29:40 +00:00
/***********************************************************************//**
2005-10-27 07:29:40 +00:00
Erases the unused undo log page end. */
static
void
trx_undo_erase_page_end(
/*====================*/
page_t* undo_page, /*!< in: undo page whose end to erase */
mtr_t* mtr) /*!< in: mtr */
2005-10-27 07:29:40 +00:00
{
ulint first_free;
first_free = mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR
+ TRX_UNDO_PAGE_FREE);
2005-10-27 07:29:40 +00:00
memset(undo_page + first_free, 0xff,
(UNIV_PAGE_SIZE - FIL_PAGE_DATA_END) - first_free);
2005-10-27 07:29:40 +00:00
mlog_write_initial_log_record(undo_page, MLOG_UNDO_ERASE_END, mtr);
}
/***********************************************************//**
Parses a redo log record of erasing of an undo page end.
@return end of log record or NULL */
UNIV_INTERN
2005-10-27 07:29:40 +00:00
byte*
trx_undo_parse_erase_page_end(
/*==========================*/
byte* ptr, /*!< in: buffer */
byte* end_ptr __attribute__((unused)), /*!< in: buffer end */
page_t* page, /*!< in: page or NULL */
mtr_t* mtr) /*!< in: mtr or NULL */
2005-10-27 07:29:40 +00:00
{
ut_ad(ptr && end_ptr);
if (page == NULL) {
return(ptr);
}
trx_undo_erase_page_end(page, mtr);
return(ptr);
}
#ifndef UNIV_HOTBACKUP
/***********************************************************************//**
2005-10-27 07:29:40 +00:00
Writes information to an undo log about an insert, update, or a delete marking
of a clustered index record. This information is used in a rollback of the
transaction and in consistent reads that must look to the history of this
transaction.
@return DB_SUCCESS or error code */
UNIV_INTERN
2005-10-27 07:29:40 +00:00
ulint
trx_undo_report_row_operation(
/*==========================*/
ulint flags, /*!< in: if BTR_NO_UNDO_LOG_FLAG bit is
2005-10-27 07:29:40 +00:00
set, does nothing */
ulint op_type, /*!< in: TRX_UNDO_INSERT_OP or
2005-10-27 07:29:40 +00:00
TRX_UNDO_MODIFY_OP */
que_thr_t* thr, /*!< in: query thread */
dict_index_t* index, /*!< in: clustered index */
const dtuple_t* clust_entry, /*!< in: in the case of an insert,
2005-10-27 07:29:40 +00:00
index entry to insert into the
clustered index, otherwise NULL */
const upd_t* update, /*!< in: in the case of an update,
2005-10-27 07:29:40 +00:00
the update vector, otherwise NULL */
ulint cmpl_info, /*!< in: compiler info on secondary
2005-10-27 07:29:40 +00:00
index updates */
const rec_t* rec, /*!< in: in case of an update or delete
2005-10-27 07:29:40 +00:00
marking, the record in the clustered
index, otherwise NULL */
roll_ptr_t* roll_ptr) /*!< out: rollback pointer to the
2005-10-27 07:29:40 +00:00
inserted undo log record,
ut_dulint_zero if BTR_NO_UNDO_LOG
flag was specified */
{
trx_t* trx;
trx_undo_t* undo;
ulint page_no;
trx_rseg_t* rseg;
mtr_t mtr;
ulint err = DB_SUCCESS;
2005-10-27 07:29:40 +00:00
mem_heap_t* heap = NULL;
ulint offsets_[REC_OFFS_NORMAL_SIZE];
ulint* offsets = offsets_;
rec_offs_init(offsets_);
2005-10-27 07:29:40 +00:00
ut_a(dict_index_is_clust(index));
2005-10-27 07:29:40 +00:00
if (flags & BTR_NO_UNDO_LOG_FLAG) {
*roll_ptr = ut_dulint_zero;
return(DB_SUCCESS);
}
2005-10-27 07:29:40 +00:00
ut_ad(thr);
ut_ad((op_type != TRX_UNDO_INSERT_OP)
|| (clust_entry && !update && !rec));
2005-10-27 07:29:40 +00:00
trx = thr_get_trx(thr);
rseg = trx->rseg;
2005-10-27 07:29:40 +00:00
mutex_enter(&(trx->undo_mutex));
/* If the undo log is not assigned yet, assign one */
if (op_type == TRX_UNDO_INSERT_OP) {
if (trx->insert_undo == NULL) {
err = trx_undo_assign_undo(trx, TRX_UNDO_INSERT);
2005-10-27 07:29:40 +00:00
}
undo = trx->insert_undo;
if (UNIV_UNLIKELY(!undo)) {
/* Did not succeed */
mutex_exit(&(trx->undo_mutex));
return(err);
}
2005-10-27 07:29:40 +00:00
} else {
ut_ad(op_type == TRX_UNDO_MODIFY_OP);
if (trx->update_undo == NULL) {
err = trx_undo_assign_undo(trx, TRX_UNDO_UPDATE);
2005-10-27 07:29:40 +00:00
}
undo = trx->update_undo;
if (UNIV_UNLIKELY(!undo)) {
/* Did not succeed */
mutex_exit(&(trx->undo_mutex));
return(err);
}
2005-10-27 07:29:40 +00:00
offsets = rec_get_offsets(rec, index, offsets,
ULINT_UNDEFINED, &heap);
2005-10-27 07:29:40 +00:00
}
page_no = undo->last_page_no;
2005-10-27 07:29:40 +00:00
mtr_start(&mtr);
for (;;) {
buf_block_t* undo_block;
page_t* undo_page;
ulint offset;
undo_block = buf_page_get_gen(undo->space, undo->zip_size,
page_no, RW_X_LATCH,
undo->guess_block, BUF_GET,
__FILE__, __LINE__, &mtr);
buf_block_dbg_add_level(undo_block, SYNC_TRX_UNDO_PAGE);
undo_page = buf_block_get_frame(undo_block);
2005-10-27 07:29:40 +00:00
if (op_type == TRX_UNDO_INSERT_OP) {
offset = trx_undo_page_report_insert(
undo_page, trx, index, clust_entry, &mtr);
2005-10-27 07:29:40 +00:00
} else {
offset = trx_undo_page_report_modify(
undo_page, trx, index, rec, offsets, update,
cmpl_info, &mtr);
2005-10-27 07:29:40 +00:00
}
if (UNIV_UNLIKELY(offset == 0)) {
2005-10-27 07:29:40 +00:00
/* The record did not fit on the page. We erase the
end segment of the undo log page and write a log
record of it: this is to ensure that in the debug
version the replicate page constructed using the log
records stays identical to the original page */
2005-10-27 07:29:40 +00:00
trx_undo_erase_page_end(undo_page, &mtr);
mtr_commit(&mtr);
} else {
/* Success */
mtr_commit(&mtr);
2005-10-27 07:29:40 +00:00
undo->empty = FALSE;
undo->top_page_no = page_no;
undo->top_offset = offset;
undo->top_undo_no = trx->undo_no;
undo->guess_block = undo_block;
2005-10-27 07:29:40 +00:00
UT_DULINT_INC(trx->undo_no);
mutex_exit(&trx->undo_mutex);
*roll_ptr = trx_undo_build_roll_ptr(
op_type == TRX_UNDO_INSERT_OP,
rseg->id, page_no, offset);
if (UNIV_LIKELY_NULL(heap)) {
mem_heap_free(heap);
}
return(DB_SUCCESS);
2005-10-27 07:29:40 +00:00
}
ut_ad(page_no == undo->last_page_no);
2005-10-27 07:29:40 +00:00
/* We have to extend the undo log by one page */
mtr_start(&mtr);
/* When we add a page to an undo log, this is analogous to
a pessimistic insert in a B-tree, and we must reserve the
counterpart of the tree latch, which is the rseg mutex. */
mutex_enter(&(rseg->mutex));
2005-10-27 07:29:40 +00:00
page_no = trx_undo_add_page(trx, undo, &mtr);
mutex_exit(&(rseg->mutex));
if (UNIV_UNLIKELY(page_no == FIL_NULL)) {
2005-10-27 07:29:40 +00:00
/* Did not succeed: out of space */
mutex_exit(&(trx->undo_mutex));
mtr_commit(&mtr);
if (UNIV_LIKELY_NULL(heap)) {
mem_heap_free(heap);
}
return(DB_OUT_OF_FILE_SPACE);
}
}
}
/*============== BUILDING PREVIOUS VERSION OF A RECORD ===============*/
/******************************************************************//**
2005-10-27 07:29:40 +00:00
Copies an undo record to heap. This function can be called if we know that
the undo log record exists.
@return own: copy of the record */
UNIV_INTERN
2005-10-27 07:29:40 +00:00
trx_undo_rec_t*
trx_undo_get_undo_rec_low(
/*======================*/
roll_ptr_t roll_ptr, /*!< in: roll pointer to record */
mem_heap_t* heap) /*!< in: memory heap where copied */
2005-10-27 07:29:40 +00:00
{
trx_undo_rec_t* undo_rec;
ulint rseg_id;
ulint page_no;
ulint offset;
page_t* undo_page;
trx_rseg_t* rseg;
ibool is_insert;
mtr_t mtr;
2005-10-27 07:29:40 +00:00
trx_undo_decode_roll_ptr(roll_ptr, &is_insert, &rseg_id, &page_no,
&offset);
2005-10-27 07:29:40 +00:00
rseg = trx_rseg_get_on_id(rseg_id);
mtr_start(&mtr);
undo_page = trx_undo_page_get_s_latched(rseg->space, rseg->zip_size,
page_no, &mtr);
2005-10-27 07:29:40 +00:00
undo_rec = trx_undo_rec_copy(undo_page + offset, heap);
mtr_commit(&mtr);
return(undo_rec);
}
/******************************************************************//**
Copies an undo record to heap.
NOTE: the caller must have latches on the clustered index page and
purge_view.
@return DB_SUCCESS, or DB_MISSING_HISTORY if the undo log has been
truncated and we cannot fetch the old version */
UNIV_INTERN
2005-10-27 07:29:40 +00:00
ulint
trx_undo_get_undo_rec(
/*==================*/
roll_ptr_t roll_ptr, /*!< in: roll pointer to record */
trx_id_t trx_id, /*!< in: id of the trx that generated
2005-10-27 07:29:40 +00:00
the roll pointer: it points to an
undo log of this transaction */
trx_undo_rec_t** undo_rec, /*!< out, own: copy of the record */
mem_heap_t* heap) /*!< in: memory heap where copied */
2005-10-27 07:29:40 +00:00
{
#ifdef UNIV_SYNC_DEBUG
ut_ad(rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED));
#endif /* UNIV_SYNC_DEBUG */
if (!trx_purge_update_undo_must_exist(trx_id)) {
/* It may be that the necessary undo log has already been
2005-10-27 07:29:40 +00:00
deleted */
return(DB_MISSING_HISTORY);
}
*undo_rec = trx_undo_get_undo_rec_low(roll_ptr, heap);
2005-10-27 07:29:40 +00:00
return(DB_SUCCESS);
}
/*******************************************************************//**
2005-10-27 07:29:40 +00:00
Build a previous version of a clustered index record. This function checks
that the caller has a latch on the index page of the clustered index record
and an s-latch on the purge_view. This guarantees that the stack of versions
is locked all the way down to the purge_view.
@return DB_SUCCESS, or DB_MISSING_HISTORY if the previous version is
earlier than purge_view, which means that it may have been removed,
DB_ERROR if corrupted record */
UNIV_INTERN
2005-10-27 07:29:40 +00:00
ulint
trx_undo_prev_version_build(
/*========================*/
const rec_t* index_rec,/*!< in: clustered index record in the
2005-10-27 07:29:40 +00:00
index tree */
mtr_t* index_mtr __attribute__((unused)),
/*!< in: mtr which contains the latch to
2005-10-27 07:29:40 +00:00
index_rec page and purge_view */
const rec_t* rec, /*!< in: version of a clustered index record */
dict_index_t* index, /*!< in: clustered index */
ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
mem_heap_t* heap, /*!< in: memory heap from which the memory
2005-10-27 07:29:40 +00:00
needed is allocated */
rec_t** old_vers)/*!< out, own: previous version, or NULL if
2005-10-27 07:29:40 +00:00
rec is the first inserted version, or if
history data has been deleted (an error),
or if the purge COULD have removed the version
though it has not yet done so */
2005-10-27 07:29:40 +00:00
{
trx_undo_rec_t* undo_rec = NULL;
2005-10-27 07:29:40 +00:00
dtuple_t* entry;
trx_id_t rec_trx_id;
2005-10-27 07:29:40 +00:00
ulint type;
undo_no_t undo_no;
2005-10-27 07:29:40 +00:00
dulint table_id;
trx_id_t trx_id;
roll_ptr_t roll_ptr;
roll_ptr_t old_roll_ptr;
2005-10-27 07:29:40 +00:00
upd_t* update;
byte* ptr;
ulint info_bits;
ulint cmpl_info;
ibool dummy_extern;
byte* buf;
ulint err;
#ifdef UNIV_SYNC_DEBUG
ut_ad(rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED));
#endif /* UNIV_SYNC_DEBUG */
ut_ad(mtr_memo_contains_page(index_mtr, index_rec, MTR_MEMO_PAGE_S_FIX)
|| mtr_memo_contains_page(index_mtr, index_rec,
MTR_MEMO_PAGE_X_FIX));
2005-10-27 07:29:40 +00:00
ut_ad(rec_offs_validate(rec, index, offsets));
if (!dict_index_is_clust(index)) {
2005-10-27 07:29:40 +00:00
fprintf(stderr, "InnoDB: Error: trying to access"
" update undo rec for non-clustered index %s\n"
"InnoDB: Submit a detailed bug report to"
" http://bugs.mysql.com\n"
"InnoDB: index record ", index->name);
rec_print(stderr, index_rec, index);
fputs("\n"
"InnoDB: record version ", stderr);
2005-10-27 07:29:40 +00:00
rec_print_new(stderr, rec, offsets);
putc('\n', stderr);
return(DB_ERROR);
}
2005-10-27 07:29:40 +00:00
roll_ptr = row_get_rec_roll_ptr(rec, index, offsets);
old_roll_ptr = roll_ptr;
2005-10-27 07:29:40 +00:00
*old_vers = NULL;
if (trx_undo_roll_ptr_is_insert(roll_ptr)) {
/* The record rec is the first inserted version */
return(DB_SUCCESS);
}
rec_trx_id = row_get_rec_trx_id(rec, index, offsets);
2005-10-27 07:29:40 +00:00
err = trx_undo_get_undo_rec(roll_ptr, rec_trx_id, &undo_rec, heap);
if (UNIV_UNLIKELY(err != DB_SUCCESS)) {
/* The undo record may already have been purged.
This should never happen in InnoDB. */
2005-10-27 07:29:40 +00:00
return(err);
}
ptr = trx_undo_rec_get_pars(undo_rec, &type, &cmpl_info,
&dummy_extern, &undo_no, &table_id);
2005-10-27 07:29:40 +00:00
ptr = trx_undo_update_rec_get_sys_cols(ptr, &trx_id, &roll_ptr,
&info_bits);
/* (a) If a clustered index record version is such that the
trx id stamp in it is bigger than purge_sys->view, then the
BLOBs in that version are known to exist (the purge has not
progressed that far);
(b) if the version is the first version such that trx id in it
is less than purge_sys->view, and it is not delete-marked,
then the BLOBs in that version are known to exist (the purge
cannot have purged the BLOBs referenced by that version
yet).
This function does not fetch any BLOBs. The callers might, by
possibly invoking row_ext_create() via row_build(). However,
they should have all needed information in the *old_vers
returned by this function. This is because *old_vers is based
on the transaction undo log records. The function
trx_undo_page_fetch_ext() will write BLOB prefixes to the
transaction undo log that are at least as long as the longest
possible column prefix in a secondary index. Thus, secondary
index entries for *old_vers can be constructed without
dereferencing any BLOB pointers. */
2005-10-27 07:29:40 +00:00
ptr = trx_undo_rec_skip_row_ref(ptr, index);
ptr = trx_undo_update_rec_get_update(ptr, index, type, trx_id,
roll_ptr, info_bits,
NULL, heap, &update);
2005-10-27 07:29:40 +00:00
if (ut_dulint_cmp(table_id, index->table->id) != 0) {
ptr = NULL;
fprintf(stderr,
"InnoDB: Error: trying to access update undo rec"
" for table %s\n"
"InnoDB: but the table id in the"
" undo record is wrong\n"
"InnoDB: Submit a detailed bug report"
" to http://bugs.mysql.com\n"
"InnoDB: Run also CHECK TABLE %s\n",
2005-10-27 07:29:40 +00:00
index->table_name, index->table_name);
}
if (ptr == NULL) {
/* The record was corrupted, return an error; these printfs
should catch an elusive bug in row_vers_old_has_index_entry */
fprintf(stderr,
"InnoDB: table %s, index %s, n_uniq %lu\n"
"InnoDB: undo rec address %p, type %lu cmpl_info %lu\n"
"InnoDB: undo rec table id %lu %lu,"
" index table id %lu %lu\n"
"InnoDB: dump of 150 bytes in undo rec: ",
2005-10-27 07:29:40 +00:00
index->table_name, index->name,
(ulong) dict_index_get_n_unique(index),
undo_rec, (ulong) type, (ulong) cmpl_info,
(ulong) ut_dulint_get_high(table_id),
(ulong) ut_dulint_get_low(table_id),
(ulong) ut_dulint_get_high(index->table->id),
(ulong) ut_dulint_get_low(index->table->id));
ut_print_buf(stderr, undo_rec, 150);
fputs("\n"
"InnoDB: index record ", stderr);
2005-10-27 07:29:40 +00:00
rec_print(stderr, index_rec, index);
fputs("\n"
"InnoDB: record version ", stderr);
2005-10-27 07:29:40 +00:00
rec_print_new(stderr, rec, offsets);
fprintf(stderr, "\n"
"InnoDB: Record trx id " TRX_ID_FMT
", update rec trx id " TRX_ID_FMT "\n"
"InnoDB: Roll ptr in rec %lu %lu, in update rec"
" %lu %lu\n",
TRX_ID_PREP_PRINTF(rec_trx_id),
TRX_ID_PREP_PRINTF(trx_id),
(ulong) ut_dulint_get_high(old_roll_ptr),
(ulong) ut_dulint_get_low(old_roll_ptr),
(ulong) ut_dulint_get_high(roll_ptr),
(ulong) ut_dulint_get_low(roll_ptr));
2005-10-27 07:29:40 +00:00
trx_purge_sys_print();
return(DB_ERROR);
}
if (row_upd_changes_field_size_or_external(index, offsets, update)) {
branches/zip: dtuple_convert_big_rec(): Do not store anything locally of externally stored columns, and fix bugs introduced in r873. (Bug #22496) btr_page_get_sure_split_rec(), btr_page_insert_fits(), rec_get_converted_size(), rec_convert_dtuple_to_rec(), rec_convert_dtuple_to_rec_old(), rec_convert_dtuple_to_rec_new(): Add parameters ext and n_ext. Flag external fields during the conversion. rec_set_field_extern_bits(), rec_set_field_extern_bits_new(), rec_offs_set_nth_extern(), rec_set_nth_field_extern_bit_old(): Remove. The bits are set by rec_convert_dtuple_to_rec(). page_cur_insert_rec_low(): Remove the parameters ext and n_ext. btr_cur_add_ext(): New utility function for updating and sorting ext[]. Low-level functions now expect the array to be in ascending order for performance reasons. Used in btr_cur_optimistic_insert(), btr_cur_pessimistic_insert(), and btr_cur_pessimistic_update(). btr_cur_optimistic_insert(): Remove some defensive code, because we cannot compute the added parameters of rec_get_converted_size(). btr_push_update_extern_fields(): Sort the array. Require the array to be twice the maximum usage, so that ut_ulint_sort() can be used. dtuple_convert_big_rec(): Allocate new space for the BLOB pointer, to avoid overwriting prefix indexes to the same column. Adapt dtuple_convert_back_big_rec(). row_build_index_entry(): Fetch the columns also for prefix indexes of the clustered index. page_zip_apply_log(), page_zip_decompress_clust(): Allow externally stored fields to lack a locally stored part.
2006-09-29 10:40:42 +00:00
ulint n_ext;
2005-10-27 07:29:40 +00:00
/* We have to set the appropriate extern storage bits in the
old version of the record: the extern bits in rec for those
fields that update does NOT update, as well as the the bits for
those fields that update updates to become externally stored
fields. Store the info: */
2005-10-27 07:29:40 +00:00
branches/zip: Initialize dfield_t::ext as soon as possible. This should fix the bugs introduced in r1591. row_rec_to_index_entry_low(): Clear "n_ext". Do not allow it to be NULL. Add const qualifier to dict_index_t*. row_rec_to_index_entry(): Add the parameters "offsets" and "n_ext". btr_cur_optimistic_update(): Add an assertion that there are no externally stored columns. Remove the unreachable call to btr_cur_unmark_extern_fields() and the preceding unnecessary call to rec_get_offsets(). btr_push_update_extern_fields(): Remove the parameters index, offsets. Only report the additional externally stored columns of the update vector. row_build(), trx_undo_rec_get_partial_row(): Flag externally stored columns also with dfield_set_ext(). rec_copy_prefix_to_dtuple(): Assert that there are no externally stored columns in the prefix. row_build_row_ref(): Note and assert that the index is a secondary index, and assert that there are no externally stored columns. row_build_row_ref_fast(): Assert that there are no externally stored columns. rec_offs_get_n_alloc(): Expose the function. row_build_row_ref_in_tuple(): Assert that there are no externally stored columns in a record of a secondary index. row_build_row_ref_from_row(): Assert that there are no externally stored columns. row_upd_check_references_constraints(): Add the parameter offsets, to avoid a redundant call to rec_get_offsets(). row_upd_del_mark_clust_rec(): Add the parameter offsets. Remove duplicated code. row_ins_index_entry_set_vals(): Copy the external storage flag. sel_pop_prefetched_row(): Assert that there are no externally stored columns. row_scan_and_check_index(): Copy offsets to a temporary heap across the invocation of row_rec_to_index_entry().
2007-10-17 12:13:29 +00:00
entry = row_rec_to_index_entry(ROW_COPY_DATA, rec, index,
offsets, &n_ext, heap);
n_ext += btr_push_update_extern_fields(entry, update, heap);
/* The page containing the clustered index record
corresponding to entry is latched in mtr. Thus the
following call is safe. */
row_upd_index_replace_new_col_vals(entry, index, update, heap);
2005-10-27 07:29:40 +00:00
buf = mem_heap_alloc(heap, rec_get_converted_size(index, entry,
n_ext));
2005-10-27 07:29:40 +00:00
*old_vers = rec_convert_dtuple_to_rec(buf, index,
entry, n_ext);
2005-10-27 07:29:40 +00:00
} else {
buf = mem_heap_alloc(heap, rec_offs_size(offsets));
*old_vers = rec_copy(buf, rec, offsets);
rec_offs_make_valid(*old_vers, index, offsets);
row_upd_rec_in_place(*old_vers, index, offsets, update, NULL);
2005-10-27 07:29:40 +00:00
}
return(DB_SUCCESS);
}
#endif /* !UNIV_HOTBACKUP */