mariadb/innobase/row/row0vers.c
unknown cdddb960be row0vers.c:
Add diagnostic code to track a probable table corruption reported by a user


innobase/row/row0vers.c:
  Add diagnostic code to track a probable table corruption reported by a user
2002-07-25 22:51:43 +03:00

442 lines
12 KiB
C

/******************************************************
Row versions
(c) 1997 Innobase Oy
Created 2/6/1997 Heikki Tuuri
*******************************************************/
#include "row0vers.h"
#ifdef UNIV_NONINL
#include "row0vers.ic"
#endif
#include "dict0dict.h"
#include "dict0boot.h"
#include "btr0btr.h"
#include "mach0data.h"
#include "trx0rseg.h"
#include "trx0trx.h"
#include "trx0roll.h"
#include "trx0undo.h"
#include "trx0purge.h"
#include "trx0rec.h"
#include "que0que.h"
#include "row0row.h"
#include "row0upd.h"
#include "rem0cmp.h"
#include "read0read.h"
/*********************************************************************
Finds out if an active transaction has inserted or modified a secondary
index record. NOTE: the kernel mutex is temporarily released in this
function! */
trx_t*
row_vers_impl_x_locked_off_kernel(
/*==============================*/
/* out: NULL if committed, else the active
transaction; NOTE that the kernel mutex is
temporarily released! */
rec_t* rec, /* in: record in a secondary index */
dict_index_t* index) /* in: the secondary index */
{
dict_index_t* clust_index;
rec_t* clust_rec;
rec_t* version;
rec_t* prev_version;
dulint trx_id;
dulint prev_trx_id;
mem_heap_t* heap;
mem_heap_t* heap2;
dtuple_t* row;
dtuple_t* entry = NULL; /* assignment to eliminate compiler
warning */
trx_t* trx;
ibool vers_del;
ibool rec_del;
ulint err;
mtr_t mtr;
char err_buf[1000];
ut_ad(mutex_own(&kernel_mutex));
ut_ad(!rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED));
mutex_exit(&kernel_mutex);
mtr_start(&mtr);
/* Search for the clustered index record: this is a time-consuming
operation: therefore we release the kernel mutex; also, the release
is required by the latching order convention. The latch on the
clustered index locks the top of the stack of versions. We also
reserve purge_latch to lock the bottom of the version stack. */
clust_rec = row_get_clust_rec(BTR_SEARCH_LEAF, rec, index,
&clust_index, &mtr);
if (!clust_rec) {
rec_sprintf(err_buf, 900, rec);
ut_print_timestamp(stderr);
fprintf(stderr,
" InnoDB: Error: cannot find the clustered index record\n"
"InnoDB: for a secondary index record in table %s index %s.\n"
"InnoDB: Secondary index record %s.\n"
"InnoDB: The table is probably corrupt. Please run CHECK TABLE on it.\n"
"InnoDB: You can try to repair the table by dump + drop + reimport.\n"
"InnoDB: Send a detailed bug report to mysql@lists.mysql.com.\n",
index->table_name, index->name, err_buf);
mutex_enter(&kernel_mutex);
mtr_commit(&mtr);
/* We assume there is no lock on the record, though this
is not certain because the table is apparently corrupt */
return(NULL);
}
trx_id = row_get_rec_trx_id(clust_rec, clust_index);
mtr_s_lock(&(purge_sys->latch), &mtr);
mutex_enter(&kernel_mutex);
if (!trx_is_active(trx_id)) {
/* The transaction that modified or inserted clust_rec is no
longer active: no implicit lock on rec */
mtr_commit(&mtr);
return(NULL);
}
/* We look up if some earlier version of the clustered index record
would require rec to be in a different state (delete marked or
unmarked, or not existing). If there is such a version, then rec was
modified by the trx_id transaction, and it has an implicit x-lock on
rec. Note that if clust_rec itself would require rec to be in a
different state, then the trx_id transaction has not yet had time to
modify rec, and does not necessarily have an implicit x-lock on rec. */
rec_del = rec_get_deleted_flag(rec);
trx = NULL;
version = clust_rec;
heap = NULL;
for (;;) {
mutex_exit(&kernel_mutex);
/* While we retrieve an earlier version of clust_rec, we
release the kernel mutex, because it may take time to access
the disk. After the release, we have to check if the trx_id
transaction is still active. We keep the semaphore in mtr on
the clust_rec page, so that no other transaction can update
it and get an implicit x-lock on rec. */
heap2 = heap;
heap = mem_heap_create(1024);
err = trx_undo_prev_version_build(clust_rec, &mtr, version,
clust_index, heap,
&prev_version);
if (heap2) {
mem_heap_free(heap2); /* version was stored in heap2,
if heap2 != NULL */
}
if (prev_version) {
row = row_build(ROW_COPY_POINTERS, clust_index,
prev_version, heap);
entry = row_build_index_entry(row, index, heap);
}
mutex_enter(&kernel_mutex);
if (!trx_is_active(trx_id)) {
/* Transaction no longer active: no implicit x-lock */
break;
}
/* If the transaction is still active, the previous version
of clust_rec must be accessible if not a fresh insert; we
may assert the following: */
ut_ad(err == DB_SUCCESS);
if (prev_version == NULL) {
/* It was a freshly inserted version: there is an
implicit x-lock on rec */
trx = trx_get_on_id(trx_id);
break;
}
/* If we get here, we know that the trx_id transaction is
still active and it has modified prev_version. Let us check
if prev_version would require rec to be in a different state. */
vers_del = rec_get_deleted_flag(prev_version);
if (0 == cmp_dtuple_rec(entry, rec)) {
/* The delete marks of rec and prev_version should be
equal for rec to be in the state required by
prev_version */
if (rec_del != vers_del) {
trx = trx_get_on_id(trx_id);
break;
}
} else if (!rec_del) {
/* The delete mark should be set in rec for it to be
in the state required by prev_version */
trx = trx_get_on_id(trx_id);
break;
}
prev_trx_id = row_get_rec_trx_id(prev_version, clust_index);
if (0 != ut_dulint_cmp(trx_id, prev_trx_id)) {
/* The versions modified by the trx_id transaction end
to prev_version: no implicit x-lock */
break;
}
version = prev_version;
}/* for (;;) */
mtr_commit(&mtr);
mem_heap_free(heap);
return(trx);
}
/*********************************************************************
Finds out if we must preserve a delete marked earlier version of a clustered
index record, because it is >= the purge view. */
ibool
row_vers_must_preserve_del_marked(
/*==============================*/
/* out: TRUE if earlier version should be preserved */
dulint trx_id, /* in: transaction id in the version */
mtr_t* mtr) /* in: mtr holding the latch on the clustered index
record; it will also hold the latch on purge_view */
{
ut_ad(!rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED));
mtr_s_lock(&(purge_sys->latch), mtr);
if (trx_purge_update_undo_must_exist(trx_id)) {
/* A purge operation is not yet allowed to remove this
delete marked record */
return(TRUE);
}
return(FALSE);
}
/*********************************************************************
Finds out if a version of the record, where the version >= the current
purge view, should have ientry as its secondary index entry. We check
if there is any not delete marked version of the record where the trx
id >= purge view, and the secondary index entry == ientry; exactly in
this case we return TRUE. */
ibool
row_vers_old_has_index_entry(
/*=========================*/
/* out: TRUE if earlier version should have */
ibool also_curr,/* in: TRUE if also rec is included in the
versions to search; otherwise only versions
prior to it are searched */
rec_t* rec, /* in: record in the clustered index; the
caller must have a latch on the page */
mtr_t* mtr, /* in: mtr holding the latch on rec; it will
also hold the latch on purge_view */
dict_index_t* index, /* in: the secondary index */
dtuple_t* ientry) /* in: the secondary index entry */
{
rec_t* version;
rec_t* prev_version;
dict_index_t* clust_index;
mem_heap_t* heap;
mem_heap_t* heap2;
dtuple_t* row;
dtuple_t* entry;
ulint err;
ut_ad(mtr_memo_contains(mtr, buf_block_align(rec), MTR_MEMO_PAGE_X_FIX)
|| mtr_memo_contains(mtr, buf_block_align(rec),
MTR_MEMO_PAGE_S_FIX));
ut_ad(!rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED));
mtr_s_lock(&(purge_sys->latch), mtr);
clust_index = dict_table_get_first_index(index->table);
if (also_curr && !rec_get_deleted_flag(rec)) {
heap = mem_heap_create(1024);
row = row_build(ROW_COPY_POINTERS, clust_index, rec, heap);
entry = row_build_index_entry(row, index, heap);
/* NOTE that we cannot do the comparison as binary
fields because the row is maybe being modified so that
the clustered index record has already been updated
to a different binary value in a char field, but the
collation identifies the old and new value anyway! */
if (dtuple_datas_are_ordering_equal(ientry, entry)) {
mem_heap_free(heap);
return(TRUE);
}
mem_heap_free(heap);
}
version = rec;
heap = NULL;
for (;;) {
heap2 = heap;
heap = mem_heap_create(1024);
err = trx_undo_prev_version_build(rec, mtr, version,
clust_index, heap,
&prev_version);
if (heap2) {
mem_heap_free(heap2); /* version was stored in heap2,
if heap2 != NULL */
}
if (err != DB_SUCCESS || !prev_version) {
/* Versions end here */
mem_heap_free(heap);
return(FALSE);
}
if (!rec_get_deleted_flag(prev_version)) {
row = row_build(ROW_COPY_POINTERS, clust_index,
prev_version, heap);
entry = row_build_index_entry(row, index, heap);
/* NOTE that we cannot do the comparison as binary
fields because maybe the secondary index record has
already been updated to a different binary value in
a char field, but the collation identifies the old
and new value anyway! */
if (dtuple_datas_are_ordering_equal(ientry, entry)) {
mem_heap_free(heap);
return(TRUE);
}
}
version = prev_version;
}
}
/*********************************************************************
Constructs the version of a clustered index record which a consistent
read should see. We assume that the trx id stored in rec is such that
the consistent read should not see rec in its present version. */
ulint
row_vers_build_for_consistent_read(
/*===============================*/
/* out: DB_SUCCESS or DB_MISSING_HISTORY */
rec_t* rec, /* in: record in a clustered index; the
caller must have a latch on the page; this
latch locks the top of the stack of versions
of this records */
mtr_t* mtr, /* in: mtr holding the latch on rec */
dict_index_t* index, /* in: the clustered index */
read_view_t* view, /* in: the consistent read view */
mem_heap_t* in_heap,/* in: memory heap from which the memory for
old_vers is allocated; memory for possible
intermediate versions is allocated and freed
locally within the function */
rec_t** old_vers)/* out, own: old version, or NULL if the
record does not exist in the view, that is,
it was freshly inserted afterwards */
{
rec_t* version;
rec_t* prev_version;
dulint prev_trx_id;
mem_heap_t* heap;
mem_heap_t* heap2;
byte* buf;
ulint err;
ut_ad(index->type & DICT_CLUSTERED);
ut_ad(mtr_memo_contains(mtr, buf_block_align(rec), MTR_MEMO_PAGE_X_FIX)
|| mtr_memo_contains(mtr, buf_block_align(rec),
MTR_MEMO_PAGE_S_FIX));
ut_ad(!rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED));
ut_ad(!read_view_sees_trx_id(view, row_get_rec_trx_id(rec, index)));
rw_lock_s_lock(&(purge_sys->latch));
version = rec;
heap = NULL;
for (;;) {
heap2 = heap;
heap = mem_heap_create(1024);
err = trx_undo_prev_version_build(rec, mtr, version, index,
heap, &prev_version);
if (heap2) {
mem_heap_free(heap2); /* version was stored in heap2,
if heap2 != NULL */
}
if (err != DB_SUCCESS) {
break;
}
if (prev_version == NULL) {
/* It was a freshly inserted version */
*old_vers = NULL;
err = DB_SUCCESS;
break;
}
prev_trx_id = row_get_rec_trx_id(prev_version, index);
if (read_view_sees_trx_id(view, prev_trx_id)) {
/* The view already sees this version: we can copy
it to in_heap and return */
buf = mem_heap_alloc(in_heap, rec_get_size(
prev_version));
*old_vers = rec_copy(buf, prev_version);
err = DB_SUCCESS;
break;
}
version = prev_version;
}/* for (;;) */
mem_heap_free(heap);
rw_lock_s_unlock(&(purge_sys->latch));
return(err);
}