mariadb/storage/innobase/row/row0purge.c
aivanov@mysql.com 1d7de700e2 Applied innodb-5.1-ss594 snapshot.
Fixed BUG#19542 "InnoDB doesn't increase the Handler_read_prev couter".
 Fixed BUG#19609 "Case sensitivity of innodb_data_file_path gives stupid error".
 Fixed BUG#19727 "InnoDB crashed server and crashed tables are ot recoverable".
 Also:
 * Remove remnants of the obsolete concept of memoryfixing tables and indexes.
 * Remove unused dict_table_LRU_trim().
 * Remove unused 'trx' parameter from dict_table_get_on_id_low(),
   dict_table_get(), dict_table_get_and_increment_handle_count().
 * Add a normal linked list implementation.
 * Add a work queue implementation.
 * Add 'level' parameter to mutex_create() and rw_lock_create().
   Remove mutex_set_level() and rw_lock_set_level().
 * Rename SYNC_LEVEL_NONE to SYNC_LEVEL_VARYING.
 * Add support for bound ids in InnoDB's parser.
 * Define UNIV_BTR_DEBUG for enabling consistency checks of
   FIL_PAGE_NEXT and FIL_PAGE_PREV when accessing sibling
   pages of B-tree indexes.
   btr_validate_level(): Check the validity of the doubly linked
   list formed by FIL_PAGE_NEXT and FIL_PAGE_PREV.
 * Adapt InnoDB to the new tablename to filename encoding in MySQL 5.1.
   ut_print_name(), ut_print_name1(): Add parameter 'table_id' for
   distinguishing names of tables from other identifiers.
   New: innobase_convert_from_table_id(), innobase_convert_from_id(),
        innobase_convert_from_filename(), innobase_get_charset.
   dict_accept(), dict_scan_id(), dict_scan_col(), dict_scan_table_name(),
   dict_skip_word(), dict_create_foreign_constraints_low(): Add
   parameter 'cs' so that isspace() can be replaced with my_isspace(),
   whose operation depends on the connection character set.
   dict_scan_id(): Convert identifier to UTF-8.
   dict_str_starts_with_keyword(): New extern function, to replace
   dict_accept() in row_search_for_mysql().
   mysql_get_identifier_quote_char(): Replaced with innobase_print_identifier().
   ha_innobase::create(): Remove the thd->convert_strin() call. Pass the
   statement to InnoDB in the connection character set and let InnoDB
   convert the identifier to UTF-8.
 * Add max_row_size to dict_table_t.
 * btr0cur.c
   btr_copy_externally_stored_field(): Only set the 'offset' variable
   when needed.
 * buf0buf.c
   buf_page_io_complete(): Write to the error log if the page number or
   the space id o the disk do not match those in memory. Also write to
   the error log if a page was read from the doublewrite buffer. The
   doublewrite buffer should be only read by the lower-level function
   fil_io() at database startup.
 * dict0dict.c
   dict_scan_table_name(): Remove fallback to differently encoded name
   when the table is not found. The encoding is handled at a higher level.
 * ha_innodb.cc
   Increment statistic counter in ha_innobase::index_prev() (bug 19542).
   Add innobase_convert_string wrapper function and a new file
   ha_prototypes.h.
   innobase_print_identifier(): Remove TODO comment before calling
   get_quote_char_for_identifier(). That function apparently assumes
   the identifier to be encoded in UTF-8.
 * ibuf0ibuf.c|h
   ibuf_count_get(), ibuf_counts[], ibuf_count_inited(): Define these
   only #ifdef UNIV_IBUF_DEBUG. Previously, when compiled without
   UNIV_IBUF_DEBUG, invoking ibuf_count_get() would crash InnoDB.
   The function is only being called #ifdef UNIV_IBUF_DEBUG.
 * innodb.result
   Adjust the results for changes in the foreign key error messages.
 * mem0mem.c|h
   New: mem_heap_dup(), mem_heap_printf(), mem_heap_cat().
 * os0file.c
   Check the page trailers also after writing to disk. This improves
   chances of diagnosing bug 18886.
   os_file_check_page_trailers(): New function for checking that the
   two copies of the LSN stamped on the page match.
   os_aio_simulated_handle(): Call os_file_check_page_trailers()
   before and after os_file_write().
 * row0mysql.c
   Move trx_commit_for_mysql(trx) calls before calls to
   row_mysql_unlock_data_dictionary(trx) (bug 19727).
 * row0sel.c
   row_fetch_print(): Handle SQL NULL values without crashing.
   row_sel_store_mysql_rec(): Remove useless call to rec_get_nth_field
   when handling an externally stored column.
   Fetch externally stored fields when using InnoDB's internal SQL
   parser.
   Optimize BLOB selects by using prebuilt->blob_heap directly instead
   of first reading BLOB data to a temporary heap and then copying it
   to prebuilt->blob_heap.
 * srv0srv.c
   srv_master_thread(): Remove unreachable code.
 * srv0start.c
   srv_parse_data_file_paths_and_sizes(): Accept lower-case 'm' and
   'g' as abbreviations of megabyte and gigabyte (bug 19609).
   srv_parse_megabytes(): New fuction.
 * ut0dbg.c|h
   Implement InnoDB assertions (ut_a and ut_error) with abort() when
   the code is compiled with GCC 3 or later on other platforms than
   Windows or Netware. Also disable the variable ut_dbg_stop_threads
   and the function ut_dbg_stop_thread() i this case, unless
   UNIV_SYC_DEBUG is defined. This should allow the compiler to
   generate more compact code for assertions.
 * ut0list.c|h
   Add ib_list_create_heap().
2006-06-01 10:34:04 +04:00

671 lines
15 KiB
C

/******************************************************
Purge obsolete records
(c) 1997 Innobase Oy
Created 3/14/1997 Heikki Tuuri
*******************************************************/
#include "row0purge.h"
#ifdef UNIV_NONINL
#include "row0purge.ic"
#endif
#include "fsp0fsp.h"
#include "mach0data.h"
#include "trx0rseg.h"
#include "trx0trx.h"
#include "trx0roll.h"
#include "trx0undo.h"
#include "trx0purge.h"
#include "trx0rec.h"
#include "que0que.h"
#include "row0row.h"
#include "row0upd.h"
#include "row0vers.h"
#include "row0mysql.h"
#include "log0log.h"
/************************************************************************
Creates a purge node to a query graph. */
purge_node_t*
row_purge_node_create(
/*==================*/
/* out, own: purge node */
que_thr_t* parent, /* in: parent node, i.e., a thr node */
mem_heap_t* heap) /* in: memory heap where created */
{
purge_node_t* node;
ut_ad(parent && heap);
node = mem_heap_alloc(heap, sizeof(purge_node_t));
node->common.type = QUE_NODE_PURGE;
node->common.parent = parent;
node->heap = mem_heap_create(256);
return(node);
}
/***************************************************************
Repositions the pcur in the purge node on the clustered index record,
if found. */
static
ibool
row_purge_reposition_pcur(
/*======================*/
/* out: TRUE if the record was found */
ulint mode, /* in: latching mode */
purge_node_t* node, /* in: row purge node */
mtr_t* mtr) /* in: mtr */
{
ibool found;
if (node->found_clust) {
found = btr_pcur_restore_position(mode, &(node->pcur), mtr);
return(found);
}
found = row_search_on_row_ref(&(node->pcur), mode, node->table,
node->ref, mtr);
node->found_clust = found;
if (found) {
btr_pcur_store_position(&(node->pcur), mtr);
}
return(found);
}
/***************************************************************
Removes a delete marked clustered index record if possible. */
static
ibool
row_purge_remove_clust_if_poss_low(
/*===============================*/
/* out: TRUE if success, or if not found, or
if modified after the delete marking */
purge_node_t* node, /* in: row purge node */
ulint mode) /* in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE */
{
dict_index_t* index;
btr_pcur_t* pcur;
btr_cur_t* btr_cur;
ibool success;
ulint err;
mtr_t mtr;
rec_t* rec;
mem_heap_t* heap = NULL;
ulint offsets_[REC_OFFS_NORMAL_SIZE];
*offsets_ = (sizeof offsets_) / sizeof *offsets_;
index = dict_table_get_first_index(node->table);
pcur = &(node->pcur);
btr_cur = btr_pcur_get_btr_cur(pcur);
mtr_start(&mtr);
success = row_purge_reposition_pcur(mode, node, &mtr);
if (!success) {
/* The record is already removed */
btr_pcur_commit_specify_mtr(pcur, &mtr);
return(TRUE);
}
rec = btr_pcur_get_rec(pcur);
if (0 != ut_dulint_cmp(node->roll_ptr,
row_get_rec_roll_ptr(rec, index, rec_get_offsets(
rec, index, offsets_, ULINT_UNDEFINED, &heap)))) {
if (UNIV_LIKELY_NULL(heap)) {
mem_heap_free(heap);
}
/* Someone else has modified the record later: do not remove */
btr_pcur_commit_specify_mtr(pcur, &mtr);
return(TRUE);
}
if (UNIV_LIKELY_NULL(heap)) {
mem_heap_free(heap);
}
if (mode == BTR_MODIFY_LEAF) {
success = btr_cur_optimistic_delete(btr_cur, &mtr);
} else {
ut_ad(mode == BTR_MODIFY_TREE);
btr_cur_pessimistic_delete(&err, FALSE, btr_cur, FALSE, &mtr);
if (err == DB_SUCCESS) {
success = TRUE;
} else if (err == DB_OUT_OF_FILE_SPACE) {
success = FALSE;
} else {
ut_error;
}
}
btr_pcur_commit_specify_mtr(pcur, &mtr);
return(success);
}
/***************************************************************
Removes a clustered index record if it has not been modified after the delete
marking. */
static
void
row_purge_remove_clust_if_poss(
/*===========================*/
purge_node_t* node) /* in: row purge node */
{
ibool success;
ulint n_tries = 0;
/* fputs("Purge: Removing clustered record\n", stderr); */
success = row_purge_remove_clust_if_poss_low(node, BTR_MODIFY_LEAF);
if (success) {
return;
}
retry:
success = row_purge_remove_clust_if_poss_low(node, BTR_MODIFY_TREE);
/* The delete operation may fail if we have little
file space left: TODO: easiest to crash the database
and restart with more file space */
if (!success && n_tries < BTR_CUR_RETRY_DELETE_N_TIMES) {
n_tries++;
os_thread_sleep(BTR_CUR_RETRY_SLEEP_TIME);
goto retry;
}
ut_a(success);
}
/***************************************************************
Removes a secondary index entry if possible. */
static
ibool
row_purge_remove_sec_if_poss_low(
/*=============================*/
/* out: TRUE if success or if not found */
purge_node_t* node, /* in: row purge node */
dict_index_t* index, /* in: index */
dtuple_t* entry, /* in: index entry */
ulint mode) /* in: latch mode BTR_MODIFY_LEAF or
BTR_MODIFY_TREE */
{
btr_pcur_t pcur;
btr_cur_t* btr_cur;
ibool success;
ibool old_has = 0; /* remove warning */
ibool found;
ulint err;
mtr_t mtr;
mtr_t* mtr_vers;
log_free_check();
mtr_start(&mtr);
found = row_search_index_entry(index, entry, mode, &pcur, &mtr);
if (!found) {
/* Not found */
/* fputs("PURGE:........sec entry not found\n", stderr); */
/* dtuple_print(entry); */
btr_pcur_close(&pcur);
mtr_commit(&mtr);
return(TRUE);
}
btr_cur = btr_pcur_get_btr_cur(&pcur);
/* We should remove the index record if no later version of the row,
which cannot be purged yet, requires its existence. If some requires,
we should do nothing. */
mtr_vers = mem_alloc(sizeof(mtr_t));
mtr_start(mtr_vers);
success = row_purge_reposition_pcur(BTR_SEARCH_LEAF, node, mtr_vers);
if (success) {
old_has = row_vers_old_has_index_entry(TRUE,
btr_pcur_get_rec(&(node->pcur)),
mtr_vers, index, entry);
}
btr_pcur_commit_specify_mtr(&(node->pcur), mtr_vers);
mem_free(mtr_vers);
if (!success || !old_has) {
/* Remove the index record */
if (mode == BTR_MODIFY_LEAF) {
success = btr_cur_optimistic_delete(btr_cur, &mtr);
} else {
ut_ad(mode == BTR_MODIFY_TREE);
btr_cur_pessimistic_delete(&err, FALSE, btr_cur,
FALSE, &mtr);
if (err == DB_SUCCESS) {
success = TRUE;
} else if (err == DB_OUT_OF_FILE_SPACE) {
success = FALSE;
} else {
ut_error;
}
}
}
btr_pcur_close(&pcur);
mtr_commit(&mtr);
return(success);
}
/***************************************************************
Removes a secondary index entry if possible. */
UNIV_INLINE
void
row_purge_remove_sec_if_poss(
/*=========================*/
purge_node_t* node, /* in: row purge node */
dict_index_t* index, /* in: index */
dtuple_t* entry) /* in: index entry */
{
ibool success;
ulint n_tries = 0;
/* fputs("Purge: Removing secondary record\n", stderr); */
success = row_purge_remove_sec_if_poss_low(node, index, entry,
BTR_MODIFY_LEAF);
if (success) {
return;
}
retry:
success = row_purge_remove_sec_if_poss_low(node, index, entry,
BTR_MODIFY_TREE);
/* The delete operation may fail if we have little
file space left: TODO: easiest to crash the database
and restart with more file space */
if (!success && n_tries < BTR_CUR_RETRY_DELETE_N_TIMES) {
n_tries++;
os_thread_sleep(BTR_CUR_RETRY_SLEEP_TIME);
goto retry;
}
ut_a(success);
}
/***************************************************************
Purges a delete marking of a record. */
static
void
row_purge_del_mark(
/*===============*/
purge_node_t* node) /* in: row purge node */
{
mem_heap_t* heap;
dtuple_t* entry;
dict_index_t* index;
ut_ad(node);
heap = mem_heap_create(1024);
while (node->index != NULL) {
index = node->index;
/* Build the index entry */
entry = row_build_index_entry(node->row, index, heap);
row_purge_remove_sec_if_poss(node, index, entry);
node->index = dict_table_get_next_index(node->index);
}
mem_heap_free(heap);
row_purge_remove_clust_if_poss(node);
}
/***************************************************************
Purges an update of an existing record. Also purges an update of a delete
marked record if that record contained an externally stored field. */
static
void
row_purge_upd_exist_or_extern(
/*==========================*/
purge_node_t* node) /* in: row purge node */
{
mem_heap_t* heap;
dtuple_t* entry;
dict_index_t* index;
upd_field_t* ufield;
ibool is_insert;
ulint rseg_id;
ulint page_no;
ulint offset;
ulint internal_offset;
byte* data_field;
ulint data_field_len;
ulint i;
mtr_t mtr;
ut_ad(node);
if (node->rec_type == TRX_UNDO_UPD_DEL_REC) {
goto skip_secondaries;
}
heap = mem_heap_create(1024);
while (node->index != NULL) {
index = node->index;
if (row_upd_changes_ord_field_binary(NULL, node->index,
node->update)) {
/* Build the older version of the index entry */
entry = row_build_index_entry(node->row, index, heap);
row_purge_remove_sec_if_poss(node, index, entry);
}
node->index = dict_table_get_next_index(node->index);
}
mem_heap_free(heap);
skip_secondaries:
/* Free possible externally stored fields */
for (i = 0; i < upd_get_n_fields(node->update); i++) {
ufield = upd_get_nth_field(node->update, i);
if (ufield->extern_storage) {
/* We use the fact that new_val points to
node->undo_rec and get thus the offset of
dfield data inside the unod record. Then we
can calculate from node->roll_ptr the file
address of the new_val data */
internal_offset = ((byte*)ufield->new_val.data)
- node->undo_rec;
ut_a(internal_offset < UNIV_PAGE_SIZE);
trx_undo_decode_roll_ptr(node->roll_ptr,
&is_insert, &rseg_id,
&page_no, &offset);
mtr_start(&mtr);
/* We have to acquire an X-latch to the clustered
index tree */
index = dict_table_get_first_index(node->table);
mtr_x_lock(dict_tree_get_lock(index->tree), &mtr);
/* NOTE: we must also acquire an X-latch to the
root page of the tree. We will need it when we
free pages from the tree. If the tree is of height 1,
the tree X-latch does NOT protect the root page,
because it is also a leaf page. Since we will have a
latch on an undo log page, we would break the
latching order if we would only later latch the
root page of such a tree! */
btr_root_get(index->tree, &mtr);
/* We assume in purge of externally stored fields
that the space id of the undo log record is 0! */
data_field = buf_page_get(0, page_no, RW_X_LATCH, &mtr)
+ offset + internal_offset;
#ifdef UNIV_SYNC_DEBUG
buf_page_dbg_add_level(buf_frame_align(data_field),
SYNC_TRX_UNDO_PAGE);
#endif /* UNIV_SYNC_DEBUG */
data_field_len = ufield->new_val.len;
btr_free_externally_stored_field(index, data_field,
data_field_len, FALSE, &mtr);
mtr_commit(&mtr);
}
}
}
/***************************************************************
Parses the row reference and other info in a modify undo log record. */
static
ibool
row_purge_parse_undo_rec(
/*=====================*/
/* out: TRUE if purge operation required:
NOTE that then the CALLER must unfreeze
data dictionary! */
purge_node_t* node, /* in: row undo node */
ibool* updated_extern,
/* out: TRUE if an externally stored field
was updated */
que_thr_t* thr) /* in: query thread */
{
dict_index_t* clust_index;
byte* ptr;
trx_t* trx;
dulint undo_no;
dulint table_id;
dulint trx_id;
dulint roll_ptr;
ulint info_bits;
ulint type;
ulint cmpl_info;
ut_ad(node && thr);
trx = thr_get_trx(thr);
ptr = trx_undo_rec_get_pars(node->undo_rec, &type, &cmpl_info,
updated_extern, &undo_no, &table_id);
node->rec_type = type;
if (type == TRX_UNDO_UPD_DEL_REC && !(*updated_extern)) {
return(FALSE);
}
ptr = trx_undo_update_rec_get_sys_cols(ptr, &trx_id, &roll_ptr,
&info_bits);
node->table = NULL;
if (type == TRX_UNDO_UPD_EXIST_REC
&& cmpl_info & UPD_NODE_NO_ORD_CHANGE && !(*updated_extern)) {
/* Purge requires no changes to indexes: we may return */
return(FALSE);
}
/* Prevent DROP TABLE etc. from running when we are doing the purge
for this row */
row_mysql_freeze_data_dictionary(trx);
mutex_enter(&(dict_sys->mutex));
node->table = dict_table_get_on_id_low(table_id);
mutex_exit(&(dict_sys->mutex));
if (node->table == NULL) {
/* The table has been dropped: no need to do purge */
row_mysql_unfreeze_data_dictionary(trx);
return(FALSE);
}
if (node->table->ibd_file_missing) {
/* We skip purge of missing .ibd files */
node->table = NULL;
row_mysql_unfreeze_data_dictionary(trx);
return(FALSE);
}
clust_index = dict_table_get_first_index(node->table);
if (clust_index == NULL) {
/* The table was corrupt in the data dictionary */
row_mysql_unfreeze_data_dictionary(trx);
return(FALSE);
}
ptr = trx_undo_rec_get_row_ref(ptr, clust_index, &(node->ref),
node->heap);
ptr = trx_undo_update_rec_get_update(ptr, clust_index, type, trx_id,
roll_ptr, info_bits, trx,
node->heap, &(node->update));
/* Read to the partial row the fields that occur in indexes */
if (!cmpl_info & UPD_NODE_NO_ORD_CHANGE) {
ptr = trx_undo_rec_get_partial_row(ptr, clust_index,
&(node->row), node->heap);
}
return(TRUE);
}
/***************************************************************
Fetches an undo log record and does the purge for the recorded operation.
If none left, or the current purge completed, returns the control to the
parent node, which is always a query thread node. */
static
ulint
row_purge(
/*======*/
/* out: DB_SUCCESS if operation successfully
completed, else error code */
purge_node_t* node, /* in: row purge node */
que_thr_t* thr) /* in: query thread */
{
dulint roll_ptr;
ibool purge_needed;
ibool updated_extern;
trx_t* trx;
ut_ad(node && thr);
trx = thr_get_trx(thr);
node->undo_rec = trx_purge_fetch_next_rec(&roll_ptr,
&(node->reservation),
node->heap);
if (!node->undo_rec) {
/* Purge completed for this query thread */
thr->run_node = que_node_get_parent(node);
return(DB_SUCCESS);
}
node->roll_ptr = roll_ptr;
if (node->undo_rec == &trx_purge_dummy_rec) {
purge_needed = FALSE;
} else {
purge_needed = row_purge_parse_undo_rec(node, &updated_extern,
thr);
/* If purge_needed == TRUE, we must also remember to unfreeze
data dictionary! */
}
if (purge_needed) {
node->found_clust = FALSE;
node->index = dict_table_get_next_index(
dict_table_get_first_index(node->table));
if (node->rec_type == TRX_UNDO_DEL_MARK_REC) {
row_purge_del_mark(node);
} else if (updated_extern
|| node->rec_type == TRX_UNDO_UPD_EXIST_REC) {
row_purge_upd_exist_or_extern(node);
}
if (node->found_clust) {
btr_pcur_close(&(node->pcur));
}
row_mysql_unfreeze_data_dictionary(trx);
}
/* Do some cleanup */
trx_purge_rec_release(node->reservation);
mem_heap_empty(node->heap);
thr->run_node = node;
return(DB_SUCCESS);
}
/***************************************************************
Does the purge operation for a single undo log record. This is a high-level
function used in an SQL execution graph. */
que_thr_t*
row_purge_step(
/*===========*/
/* out: query thread to run next or NULL */
que_thr_t* thr) /* in: query thread */
{
purge_node_t* node;
ulint err;
ut_ad(thr);
node = thr->run_node;
ut_ad(que_node_get_type(node) == QUE_NODE_PURGE);
err = row_purge(node, thr);
ut_ad(err == DB_SUCCESS);
return(thr);
}