2009-03-26 07:11:11 +01:00
|
|
|
/*****************************************************************************
|
|
|
|
|
2016-08-10 19:24:58 +02:00
|
|
|
Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
|
2017-08-23 12:03:13 +02:00
|
|
|
Copyright (c) 2017, MariaDB Corporation.
|
2009-03-26 07:11:11 +01:00
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify it under
|
|
|
|
the terms of the GNU General Public License as published by the Free Software
|
|
|
|
Foundation; version 2 of the License.
|
|
|
|
|
|
|
|
This program is distributed in the hope that it will be useful, but WITHOUT
|
|
|
|
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
|
|
|
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU General Public License along with
|
2013-12-16 15:38:05 +01:00
|
|
|
this program; if not, write to the Free Software Foundation, Inc.,
|
|
|
|
51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
|
2009-03-26 07:11:11 +01:00
|
|
|
|
|
|
|
*****************************************************************************/
|
|
|
|
|
2009-09-07 12:22:53 +02:00
|
|
|
/**************************************************//**
|
2013-12-16 15:38:05 +01:00
|
|
|
@file trx/trx0trx.cc
|
2008-12-01 07:10:29 +01:00
|
|
|
The transaction
|
|
|
|
|
|
|
|
Created 3/26/1996 Heikki Tuuri
|
|
|
|
*******************************************************/
|
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
#include "btr0types.h"
|
2008-12-01 07:10:29 +01:00
|
|
|
#include "trx0trx.h"
|
|
|
|
|
|
|
|
#ifdef UNIV_NONINL
|
|
|
|
#include "trx0trx.ic"
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#include "trx0undo.h"
|
|
|
|
#include "trx0rseg.h"
|
|
|
|
#include "log0log.h"
|
|
|
|
#include "que0que.h"
|
|
|
|
#include "lock0lock.h"
|
|
|
|
#include "trx0roll.h"
|
|
|
|
#include "usr0sess.h"
|
|
|
|
#include "read0read.h"
|
|
|
|
#include "srv0srv.h"
|
2013-12-16 15:38:05 +01:00
|
|
|
#include "srv0start.h"
|
2008-12-01 07:10:29 +01:00
|
|
|
#include "btr0sea.h"
|
|
|
|
#include "os0proc.h"
|
|
|
|
#include "trx0xa.h"
|
2013-12-16 15:38:05 +01:00
|
|
|
#include "trx0rec.h"
|
2011-07-14 21:22:41 +02:00
|
|
|
#include "trx0purge.h"
|
2008-12-01 07:10:29 +01:00
|
|
|
#include "ha_prototypes.h"
|
2013-12-16 15:38:05 +01:00
|
|
|
#include "srv0mon.h"
|
|
|
|
#include "ut0vec.h"
|
|
|
|
|
|
|
|
#include<set>
|
|
|
|
|
2014-07-08 12:54:47 +02:00
|
|
|
extern "C"
|
|
|
|
int thd_deadlock_victim_preference(const MYSQL_THD thd1, const MYSQL_THD thd2);
|
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
/** Set of table_id */
|
|
|
|
typedef std::set<table_id_t> table_id_set;
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2009-09-07 12:22:53 +02:00
|
|
|
/** Dummy session used currently in MySQL interface */
|
2008-12-01 07:10:29 +01:00
|
|
|
UNIV_INTERN sess_t* trx_dummy_sess = NULL;
|
|
|
|
|
2011-07-14 21:22:41 +02:00
|
|
|
#ifdef UNIV_PFS_MUTEX
|
|
|
|
/* Key to register the mutex with performance schema */
|
2013-12-16 15:38:05 +01:00
|
|
|
UNIV_INTERN mysql_pfs_key_t trx_mutex_key;
|
|
|
|
/* Key to register the mutex with performance schema */
|
2011-07-14 21:22:41 +02:00
|
|
|
UNIV_INTERN mysql_pfs_key_t trx_undo_mutex_key;
|
|
|
|
#endif /* UNIV_PFS_MUTEX */
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2009-09-07 12:22:53 +02:00
|
|
|
/*************************************************************//**
|
2008-12-01 07:10:29 +01:00
|
|
|
Set detailed error message for the transaction. */
|
|
|
|
UNIV_INTERN
|
|
|
|
void
|
|
|
|
trx_set_detailed_error(
|
|
|
|
/*===================*/
|
2009-09-07 12:22:53 +02:00
|
|
|
trx_t* trx, /*!< in: transaction struct */
|
|
|
|
const char* msg) /*!< in: detailed error message */
|
2008-12-01 07:10:29 +01:00
|
|
|
{
|
|
|
|
ut_strlcpy(trx->detailed_error, msg, sizeof(trx->detailed_error));
|
|
|
|
}
|
|
|
|
|
2009-09-07 12:22:53 +02:00
|
|
|
/*************************************************************//**
|
2008-12-01 07:10:29 +01:00
|
|
|
Set detailed error message for the transaction from a file. Note that the
|
|
|
|
file is rewinded before reading from it. */
|
|
|
|
UNIV_INTERN
|
|
|
|
void
|
|
|
|
trx_set_detailed_error_from_file(
|
|
|
|
/*=============================*/
|
2009-09-07 12:22:53 +02:00
|
|
|
trx_t* trx, /*!< in: transaction struct */
|
|
|
|
FILE* file) /*!< in: file to read message from */
|
2008-12-01 07:10:29 +01:00
|
|
|
{
|
|
|
|
os_file_read_string(file, trx->detailed_error,
|
|
|
|
sizeof(trx->detailed_error));
|
|
|
|
}
|
|
|
|
|
2013-05-08 09:52:54 +02:00
|
|
|
/*************************************************************//**
|
|
|
|
Callback function for trx_find_descriptor() to compare trx IDs. */
|
|
|
|
UNIV_INTERN
|
|
|
|
int
|
|
|
|
trx_descr_cmp(
|
|
|
|
/*==========*/
|
|
|
|
const void *a, /*!< in: pointer to first comparison argument */
|
|
|
|
const void *b) /*!< in: pointer to second comparison argument */
|
|
|
|
{
|
|
|
|
const trx_id_t* da = (const trx_id_t*) a;
|
|
|
|
const trx_id_t* db = (const trx_id_t*) b;
|
|
|
|
|
|
|
|
if (*da < *db) {
|
|
|
|
return -1;
|
|
|
|
} else if (*da > *db) {
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*************************************************************//**
|
|
|
|
Reserve a slot for a given trx in the global descriptors array. */
|
|
|
|
UNIV_INLINE
|
|
|
|
void
|
|
|
|
trx_reserve_descriptor(
|
|
|
|
/*===================*/
|
|
|
|
const trx_t* trx) /*!< in: trx pointer */
|
|
|
|
{
|
|
|
|
ulint n_used;
|
|
|
|
ulint n_max;
|
|
|
|
trx_id_t* descr;
|
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
ut_ad(mutex_own(&trx_sys->mutex) || srv_is_being_started);
|
|
|
|
ut_ad(srv_is_being_started ||
|
|
|
|
!trx_find_descriptor(trx_sys->descriptors,
|
2013-05-08 09:52:54 +02:00
|
|
|
trx_sys->descr_n_used,
|
|
|
|
trx->id));
|
|
|
|
|
|
|
|
n_used = trx_sys->descr_n_used + 1;
|
|
|
|
n_max = trx_sys->descr_n_max;
|
|
|
|
|
|
|
|
if (UNIV_UNLIKELY(n_used > n_max)) {
|
|
|
|
|
|
|
|
n_max = n_max * 2;
|
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
trx_sys->descriptors = static_cast<trx_id_t*>(
|
2013-05-08 09:52:54 +02:00
|
|
|
ut_realloc(trx_sys->descriptors,
|
2013-12-16 15:38:05 +01:00
|
|
|
n_max * sizeof(trx_id_t)));
|
2013-05-08 09:52:54 +02:00
|
|
|
|
|
|
|
trx_sys->descr_n_max = n_max;
|
2013-07-16 14:55:47 +02:00
|
|
|
srv_descriptors_memory = n_max * sizeof(trx_id_t);
|
2013-05-08 09:52:54 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
descr = trx_sys->descriptors + n_used - 1;
|
|
|
|
|
|
|
|
if (UNIV_UNLIKELY(n_used > 1 && trx->id < descr[-1])) {
|
|
|
|
|
|
|
|
/* Find the slot where it should be inserted. We could use a
|
|
|
|
binary search, but in reality linear search should be faster,
|
|
|
|
because the slot we are looking for is near the array end. */
|
|
|
|
|
|
|
|
trx_id_t* tdescr;
|
|
|
|
|
|
|
|
for (tdescr = descr - 1;
|
|
|
|
tdescr >= trx_sys->descriptors && *tdescr > trx->id;
|
|
|
|
tdescr--) {
|
|
|
|
}
|
|
|
|
|
|
|
|
tdescr++;
|
|
|
|
|
|
|
|
ut_memmove(tdescr + 1, tdescr, (descr - tdescr) *
|
|
|
|
sizeof(trx_id_t));
|
|
|
|
|
|
|
|
descr = tdescr;
|
|
|
|
}
|
|
|
|
|
|
|
|
*descr = trx->id;
|
|
|
|
|
|
|
|
trx_sys->descr_n_used = n_used;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*************************************************************//**
|
|
|
|
Release a slot for a given trx in the global descriptors array. */
|
|
|
|
UNIV_INTERN
|
|
|
|
void
|
|
|
|
trx_release_descriptor(
|
|
|
|
/*===================*/
|
|
|
|
trx_t* trx) /*!< in: trx pointer */
|
|
|
|
{
|
|
|
|
ulint size;
|
|
|
|
trx_id_t* descr;
|
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
ut_ad(mutex_own(&trx_sys->mutex));
|
2013-05-08 09:52:54 +02:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
if (UNIV_LIKELY(trx->in_trx_serial_list)) {
|
2013-05-08 09:52:54 +02:00
|
|
|
|
|
|
|
UT_LIST_REMOVE(trx_serial_list, trx_sys->trx_serial_list,
|
|
|
|
trx);
|
2013-12-16 15:38:05 +01:00
|
|
|
trx->in_trx_serial_list = false;
|
2013-05-08 09:52:54 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
descr = trx_find_descriptor(trx_sys->descriptors,
|
|
|
|
trx_sys->descr_n_used,
|
|
|
|
trx->id);
|
|
|
|
|
|
|
|
if (UNIV_UNLIKELY(descr == NULL)) {
|
|
|
|
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
size = (trx_sys->descriptors + trx_sys->descr_n_used - 1 - descr) *
|
|
|
|
sizeof(trx_id_t);
|
|
|
|
|
|
|
|
if (UNIV_LIKELY(size > 0)) {
|
|
|
|
|
|
|
|
ut_memmove(descr, descr + 1, size);
|
|
|
|
}
|
|
|
|
|
|
|
|
trx_sys->descr_n_used--;
|
|
|
|
}
|
|
|
|
|
2009-09-07 12:22:53 +02:00
|
|
|
/****************************************************************//**
|
2013-12-16 15:38:05 +01:00
|
|
|
Creates and initializes a transaction object. It must be explicitly
|
|
|
|
started with trx_start_if_not_started() before using it. The default
|
|
|
|
isolation level is TRX_ISO_REPEATABLE_READ.
|
|
|
|
@return transaction instance, should never be NULL */
|
|
|
|
static
|
2008-12-01 07:10:29 +01:00
|
|
|
trx_t*
|
2013-12-16 15:38:05 +01:00
|
|
|
trx_create(void)
|
|
|
|
/*============*/
|
2008-12-01 07:10:29 +01:00
|
|
|
{
|
2013-12-16 15:38:05 +01:00
|
|
|
trx_t* trx;
|
|
|
|
mem_heap_t* heap;
|
|
|
|
ib_alloc_t* heap_alloc;
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
trx = static_cast<trx_t*>(mem_zalloc(sizeof(*trx)));
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
mutex_create(trx_mutex_key, &trx->mutex, SYNC_TRX);
|
2008-12-01 07:10:29 +01:00
|
|
|
|
|
|
|
trx->magic_n = TRX_MAGIC_N;
|
|
|
|
|
2013-12-22 17:06:50 +01:00
|
|
|
trx->active_commit_ordered = 0;
|
2013-12-16 15:38:05 +01:00
|
|
|
trx->state = TRX_STATE_NOT_STARTED;
|
2008-12-01 07:10:29 +01:00
|
|
|
|
|
|
|
trx->isolation_level = TRX_ISO_REPEATABLE_READ;
|
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
trx->no = TRX_ID_MAX;
|
|
|
|
trx->in_trx_serial_list = false;
|
2008-12-01 07:10:29 +01:00
|
|
|
|
|
|
|
trx->support_xa = TRUE;
|
|
|
|
|
2011-12-14 14:58:22 +01:00
|
|
|
trx->fake_changes = FALSE;
|
|
|
|
|
2008-12-01 07:10:29 +01:00
|
|
|
trx->check_foreigns = TRUE;
|
|
|
|
trx->check_unique_secondary = TRUE;
|
|
|
|
|
|
|
|
trx->dict_operation = TRX_DICT_OP_NONE;
|
|
|
|
|
2011-12-14 14:58:22 +01:00
|
|
|
trx->idle_start = 0;
|
|
|
|
trx->last_stmt_start = 0;
|
|
|
|
|
2011-07-14 21:22:41 +02:00
|
|
|
mutex_create(trx_undo_mutex_key, &trx->undo_mutex, SYNC_TRX_UNDO);
|
2008-12-01 07:10:29 +01:00
|
|
|
|
|
|
|
trx->error_state = DB_SUCCESS;
|
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
trx->lock.que_state = TRX_QUE_RUNNING;
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
trx->lock.lock_heap = mem_heap_create_typed(
|
|
|
|
256, MEM_HEAP_FOR_LOCK_HEAP);
|
2008-12-01 07:10:29 +01:00
|
|
|
|
|
|
|
trx->search_latch_timeout = BTR_SEA_TIMEOUT;
|
|
|
|
|
2009-11-04 21:11:12 +01:00
|
|
|
trx->io_reads = 0;
|
|
|
|
trx->io_read = 0;
|
|
|
|
trx->io_reads_wait_timer = 0;
|
|
|
|
trx->lock_que_wait_timer = 0;
|
|
|
|
trx->innodb_que_wait_timer = 0;
|
|
|
|
trx->distinct_page_access = 0;
|
|
|
|
trx->distinct_page_access_hash = NULL;
|
|
|
|
trx->take_stats = FALSE;
|
|
|
|
|
2008-12-01 07:10:29 +01:00
|
|
|
trx->xid.formatID = -1;
|
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
trx->op_info = "";
|
|
|
|
|
2014-02-26 16:25:11 +01:00
|
|
|
trx->api_trx = false;
|
|
|
|
|
|
|
|
trx->api_auto_commit = false;
|
|
|
|
|
|
|
|
trx->read_write = true;
|
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
heap = mem_heap_create(sizeof(ib_vector_t) + sizeof(void*) * 8);
|
|
|
|
heap_alloc = ib_heap_allocator_create(heap);
|
|
|
|
|
|
|
|
/* Remember to free the vector explicitly in trx_free(). */
|
|
|
|
trx->autoinc_locks = ib_vector_create(heap_alloc, sizeof(void**), 4);
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
/* Remember to free the vector explicitly in trx_free(). */
|
|
|
|
heap = mem_heap_create(sizeof(ib_vector_t) + sizeof(void*) * 128);
|
|
|
|
heap_alloc = ib_heap_allocator_create(heap);
|
|
|
|
|
|
|
|
trx->lock.table_locks = ib_vector_create(
|
|
|
|
heap_alloc, sizeof(void**), 32);
|
2008-12-03 06:06:00 +01:00
|
|
|
|
2008-12-01 07:10:29 +01:00
|
|
|
return(trx);
|
|
|
|
}
|
|
|
|
|
2009-09-07 12:22:53 +02:00
|
|
|
/********************************************************************//**
|
2013-12-16 15:38:05 +01:00
|
|
|
Creates a transaction object for background operations by the master thread.
|
2009-09-07 12:22:53 +02:00
|
|
|
@return own: transaction object */
|
2008-12-01 07:10:29 +01:00
|
|
|
UNIV_INTERN
|
|
|
|
trx_t*
|
2013-12-16 15:38:05 +01:00
|
|
|
trx_allocate_for_background(void)
|
|
|
|
/*=============================*/
|
2008-12-01 07:10:29 +01:00
|
|
|
{
|
|
|
|
trx_t* trx;
|
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
trx = trx_create();
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
trx->sess = trx_dummy_sess;
|
2009-11-04 21:11:12 +01:00
|
|
|
|
2008-12-01 07:10:29 +01:00
|
|
|
return(trx);
|
|
|
|
}
|
|
|
|
|
2009-09-07 12:22:53 +02:00
|
|
|
/********************************************************************//**
|
2013-12-16 15:38:05 +01:00
|
|
|
Creates a transaction object for MySQL.
|
2009-09-07 12:22:53 +02:00
|
|
|
@return own: transaction object */
|
2008-12-01 07:10:29 +01:00
|
|
|
UNIV_INTERN
|
|
|
|
trx_t*
|
2013-12-16 15:38:05 +01:00
|
|
|
trx_allocate_for_mysql(void)
|
|
|
|
/*========================*/
|
2008-12-01 07:10:29 +01:00
|
|
|
{
|
|
|
|
trx_t* trx;
|
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
trx = trx_allocate_for_background();
|
|
|
|
|
|
|
|
mutex_enter(&trx_sys->mutex);
|
|
|
|
|
|
|
|
ut_d(trx->in_mysql_trx_list = TRUE);
|
|
|
|
UT_LIST_ADD_FIRST(mysql_trx_list, trx_sys->mysql_trx_list, trx);
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
mutex_exit(&trx_sys->mutex);
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
if (UNIV_UNLIKELY(trx->take_stats)) {
|
|
|
|
trx->distinct_page_access_hash
|
|
|
|
= static_cast<byte *>(mem_alloc(DPAH_SIZE));
|
|
|
|
memset(trx->distinct_page_access_hash, 0, DPAH_SIZE);
|
|
|
|
}
|
2008-12-01 07:10:29 +01:00
|
|
|
|
|
|
|
return(trx);
|
|
|
|
}
|
|
|
|
|
2009-09-07 12:22:53 +02:00
|
|
|
/********************************************************************//**
|
2013-12-16 15:38:05 +01:00
|
|
|
Frees a transaction object without releasing the corresponding descriptor.
|
|
|
|
Should be used by callers that already own trx_sys->mutex. */
|
|
|
|
static
|
2008-12-01 07:10:29 +01:00
|
|
|
void
|
2013-12-16 15:38:05 +01:00
|
|
|
trx_free_low(
|
|
|
|
/*=========*/
|
2009-09-07 12:22:53 +02:00
|
|
|
trx_t* trx) /*!< in, own: trx object */
|
2008-12-01 07:10:29 +01:00
|
|
|
{
|
|
|
|
ut_a(trx->magic_n == TRX_MAGIC_N);
|
2013-12-16 15:38:05 +01:00
|
|
|
ut_ad(!trx->in_ro_trx_list);
|
|
|
|
ut_ad(!trx->in_rw_trx_list);
|
|
|
|
ut_ad(!trx->in_mysql_trx_list);
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
mutex_free(&trx->undo_mutex);
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
if (trx->undo_no_arr != NULL) {
|
2008-12-01 07:10:29 +01:00
|
|
|
trx_undo_arr_free(trx->undo_no_arr);
|
|
|
|
}
|
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
ut_a(trx->lock.wait_lock == NULL);
|
|
|
|
ut_a(trx->lock.wait_thr == NULL);
|
2008-12-01 07:10:29 +01:00
|
|
|
|
|
|
|
ut_a(!trx->has_search_latch);
|
2013-11-07 21:44:46 +01:00
|
|
|
#ifdef UNIV_SYNC_DEBUG
|
|
|
|
ut_ad(!btr_search_own_any());
|
|
|
|
#endif
|
2008-12-01 07:10:29 +01:00
|
|
|
|
|
|
|
ut_a(trx->dict_operation_lock_mode == 0);
|
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
if (trx->lock.lock_heap) {
|
|
|
|
mem_heap_free(trx->lock.lock_heap);
|
2008-12-01 07:10:29 +01:00
|
|
|
}
|
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
ut_a(UT_LIST_GET_LEN(trx->lock.trx_locks) == 0);
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2008-12-03 06:06:00 +01:00
|
|
|
ut_a(ib_vector_is_empty(trx->autoinc_locks));
|
|
|
|
/* We allocated a dedicated heap for the vector. */
|
|
|
|
ib_vector_free(trx->autoinc_locks);
|
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
if (trx->lock.table_locks != NULL) {
|
|
|
|
/* We allocated a dedicated heap for the vector. */
|
|
|
|
ib_vector_free(trx->lock.table_locks);
|
|
|
|
}
|
|
|
|
|
|
|
|
mutex_free(&trx->mutex);
|
|
|
|
|
|
|
|
read_view_free(trx->prebuilt_view);
|
2013-05-08 09:52:54 +02:00
|
|
|
|
2013-09-07 09:47:42 +02:00
|
|
|
mem_free(trx);
|
2008-12-01 07:10:29 +01:00
|
|
|
}
|
|
|
|
|
2011-07-14 21:22:41 +02:00
|
|
|
/********************************************************************//**
|
2013-12-16 15:38:05 +01:00
|
|
|
Frees a transaction object. */
|
|
|
|
static
|
2011-07-14 21:22:41 +02:00
|
|
|
void
|
2013-12-16 15:38:05 +01:00
|
|
|
trx_free(
|
|
|
|
/*=========*/
|
2011-07-14 21:22:41 +02:00
|
|
|
trx_t* trx) /*!< in, own: trx object */
|
|
|
|
{
|
2013-12-16 15:38:05 +01:00
|
|
|
mutex_enter(&trx_sys->mutex);
|
|
|
|
trx_release_descriptor(trx);
|
|
|
|
mutex_exit(&trx_sys->mutex);
|
2011-07-14 21:22:41 +02:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
trx_free_low(trx);
|
|
|
|
}
|
2011-07-14 21:22:41 +02:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
/********************************************************************//**
|
|
|
|
Frees a transaction object of a background operation of the master thread. */
|
|
|
|
UNIV_INTERN
|
|
|
|
void
|
|
|
|
trx_free_for_background(
|
|
|
|
/*====================*/
|
|
|
|
trx_t* trx) /*!< in, own: trx object */
|
|
|
|
{
|
2011-07-14 21:22:41 +02:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
if (trx->distinct_page_access_hash)
|
|
|
|
{
|
|
|
|
mem_free(trx->distinct_page_access_hash);
|
|
|
|
trx->distinct_page_access_hash= NULL;
|
2011-07-14 21:22:41 +02:00
|
|
|
}
|
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
if (trx->declared_to_be_inside_innodb) {
|
2011-07-14 21:22:41 +02:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
ib_logf(IB_LOG_LEVEL_ERROR,
|
|
|
|
"Freeing a trx (%p, " TRX_ID_FMT ") which is declared "
|
|
|
|
"to be processing inside InnoDB", trx, trx->id);
|
2011-07-14 21:22:41 +02:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
trx_print(stderr, trx, 600);
|
|
|
|
putc('\n', stderr);
|
2011-07-14 21:22:41 +02:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
/* This is an error but not a fatal error. We must keep
|
|
|
|
the counters like srv_conc_n_threads accurate. */
|
|
|
|
srv_conc_force_exit_innodb(trx);
|
2011-07-14 21:22:41 +02:00
|
|
|
}
|
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
if (trx->n_mysql_tables_in_use != 0
|
|
|
|
|| trx->mysql_n_tables_locked != 0) {
|
2011-07-14 21:22:41 +02:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
ib_logf(IB_LOG_LEVEL_ERROR,
|
|
|
|
"MySQL is freeing a thd though "
|
|
|
|
"trx->n_mysql_tables_in_use is %lu and "
|
|
|
|
"trx->mysql_n_tables_locked is %lu.",
|
|
|
|
(ulong) trx->n_mysql_tables_in_use,
|
|
|
|
(ulong) trx->mysql_n_tables_locked);
|
2013-05-08 09:52:54 +02:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
trx_print(stderr, trx, 600);
|
|
|
|
ut_print_buf(stderr, trx, sizeof(trx_t));
|
|
|
|
putc('\n', stderr);
|
2013-05-08 09:52:54 +02:00
|
|
|
}
|
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
ut_a(trx->state == TRX_STATE_NOT_STARTED);
|
|
|
|
ut_a(trx->insert_undo == NULL);
|
|
|
|
ut_a(trx->update_undo == NULL);
|
|
|
|
ut_a(trx->read_view == NULL);
|
2013-05-08 09:52:54 +02:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
trx_free(trx);
|
2011-07-14 21:22:41 +02:00
|
|
|
}
|
|
|
|
|
2009-09-07 12:22:53 +02:00
|
|
|
/********************************************************************//**
|
2013-12-16 15:38:05 +01:00
|
|
|
At shutdown, frees a transaction object that is in the PREPARED state. */
|
2008-12-01 07:10:29 +01:00
|
|
|
UNIV_INTERN
|
|
|
|
void
|
2013-12-16 15:38:05 +01:00
|
|
|
trx_free_prepared(
|
|
|
|
/*==============*/
|
2009-09-07 12:22:53 +02:00
|
|
|
trx_t* trx) /*!< in, own: trx object */
|
2008-12-01 07:10:29 +01:00
|
|
|
{
|
2017-02-03 11:25:42 +01:00
|
|
|
ut_a(trx_state_eq(trx, TRX_STATE_PREPARED)
|
2017-10-27 10:36:32 +02:00
|
|
|
|| (trx->is_recovered
|
|
|
|
&& (trx_state_eq(trx, TRX_STATE_ACTIVE)
|
|
|
|
|| trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY))
|
2017-02-03 11:25:42 +01:00
|
|
|
&& (srv_read_only_mode
|
|
|
|
|| srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO)));
|
2013-12-16 15:38:05 +01:00
|
|
|
ut_a(trx->magic_n == TRX_MAGIC_N);
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2015-06-16 16:33:21 +02:00
|
|
|
lock_trx_release_locks(trx);
|
2013-12-16 15:38:05 +01:00
|
|
|
trx_undo_free_prepared(trx);
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
assert_trx_in_rw_list(trx);
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
ut_a(!trx->read_only);
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
UT_LIST_REMOVE(trx_list, trx_sys->rw_trx_list, trx);
|
|
|
|
ut_d(trx->in_rw_trx_list = FALSE);
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2015-08-01 15:04:20 +02:00
|
|
|
mutex_enter(&trx_sys->mutex);
|
2013-12-16 15:38:05 +01:00
|
|
|
trx_release_descriptor(trx);
|
2015-08-01 15:04:20 +02:00
|
|
|
mutex_exit(&trx_sys->mutex);
|
2013-12-16 15:38:05 +01:00
|
|
|
|
|
|
|
/* Undo trx_resurrect_table_locks(). */
|
|
|
|
UT_LIST_INIT(trx->lock.trx_locks);
|
|
|
|
|
|
|
|
trx_free_low(trx);
|
|
|
|
|
|
|
|
ut_ad(trx_sys->descr_n_used <= UT_LIST_GET_LEN(trx_sys->rw_trx_list));
|
2008-12-01 07:10:29 +01:00
|
|
|
}
|
|
|
|
|
2009-09-07 12:22:53 +02:00
|
|
|
/********************************************************************//**
|
2013-12-16 15:38:05 +01:00
|
|
|
Frees a transaction object for MySQL. */
|
2008-12-01 07:10:29 +01:00
|
|
|
UNIV_INTERN
|
|
|
|
void
|
2013-12-16 15:38:05 +01:00
|
|
|
trx_free_for_mysql(
|
|
|
|
/*===============*/
|
2009-09-07 12:22:53 +02:00
|
|
|
trx_t* trx) /*!< in, own: trx object */
|
2008-12-01 07:10:29 +01:00
|
|
|
{
|
2009-11-04 21:11:12 +01:00
|
|
|
if (trx->distinct_page_access_hash)
|
|
|
|
{
|
|
|
|
mem_free(trx->distinct_page_access_hash);
|
|
|
|
trx->distinct_page_access_hash= NULL;
|
|
|
|
}
|
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
mutex_enter(&trx_sys->mutex);
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
ut_ad(trx->in_mysql_trx_list);
|
|
|
|
ut_d(trx->in_mysql_trx_list = FALSE);
|
|
|
|
UT_LIST_REMOVE(mysql_trx_list, trx_sys->mysql_trx_list, trx);
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
ut_ad(trx_sys_validate_trx_list());
|
|
|
|
|
|
|
|
mutex_exit(&trx_sys->mutex);
|
|
|
|
|
|
|
|
trx_free_for_background(trx);
|
2008-12-01 07:10:29 +01:00
|
|
|
}
|
|
|
|
|
2009-09-07 12:22:53 +02:00
|
|
|
/****************************************************************//**
|
2008-12-01 07:10:29 +01:00
|
|
|
Inserts the trx handle in the trx system trx list in the right position.
|
|
|
|
The list is sorted on the trx id so that the biggest id is at the list
|
|
|
|
start. This function is used at the database startup to insert incomplete
|
|
|
|
transactions to the list. */
|
|
|
|
static
|
|
|
|
void
|
2013-12-16 15:38:05 +01:00
|
|
|
trx_list_rw_insert_ordered(
|
|
|
|
/*=======================*/
|
2009-09-07 12:22:53 +02:00
|
|
|
trx_t* trx) /*!< in: trx handle */
|
2008-12-01 07:10:29 +01:00
|
|
|
{
|
|
|
|
trx_t* trx2;
|
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
ut_ad(!trx->read_only);
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
ut_d(trx->start_file = __FILE__);
|
|
|
|
ut_d(trx->start_line = __LINE__);
|
|
|
|
|
|
|
|
ut_a(srv_is_being_started);
|
|
|
|
ut_ad(!trx->in_ro_trx_list);
|
|
|
|
ut_ad(!trx->in_rw_trx_list);
|
|
|
|
ut_ad(trx->state != TRX_STATE_NOT_STARTED);
|
|
|
|
ut_ad(trx->is_recovered);
|
|
|
|
|
|
|
|
for (trx2 = UT_LIST_GET_FIRST(trx_sys->rw_trx_list);
|
|
|
|
trx2 != NULL;
|
|
|
|
trx2 = UT_LIST_GET_NEXT(trx_list, trx2)) {
|
|
|
|
|
|
|
|
assert_trx_in_rw_list(trx2);
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2011-07-14 21:22:41 +02:00
|
|
|
if (trx->id >= trx2->id) {
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2011-07-14 21:22:41 +02:00
|
|
|
ut_ad(trx->id > trx2->id);
|
2008-12-01 07:10:29 +01:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (trx2 != NULL) {
|
|
|
|
trx2 = UT_LIST_GET_PREV(trx_list, trx2);
|
|
|
|
|
|
|
|
if (trx2 == NULL) {
|
2013-12-16 15:38:05 +01:00
|
|
|
UT_LIST_ADD_FIRST(trx_list, trx_sys->rw_trx_list, trx);
|
2008-12-01 07:10:29 +01:00
|
|
|
} else {
|
2013-12-16 15:38:05 +01:00
|
|
|
UT_LIST_INSERT_AFTER(
|
|
|
|
trx_list, trx_sys->rw_trx_list, trx2, trx);
|
2008-12-01 07:10:29 +01:00
|
|
|
}
|
|
|
|
} else {
|
2013-12-16 15:38:05 +01:00
|
|
|
UT_LIST_ADD_LAST(trx_list, trx_sys->rw_trx_list, trx);
|
2008-12-01 07:10:29 +01:00
|
|
|
}
|
2013-12-16 15:38:05 +01:00
|
|
|
|
2014-02-26 16:25:11 +01:00
|
|
|
#ifdef UNIV_DEBUG
|
|
|
|
if (trx->id > trx_sys->rw_max_trx_id) {
|
|
|
|
trx_sys->rw_max_trx_id = trx->id;
|
|
|
|
}
|
|
|
|
#endif /* UNIV_DEBUG */
|
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
ut_ad(!trx->in_rw_trx_list);
|
|
|
|
ut_d(trx->in_rw_trx_list = TRUE);
|
2008-12-01 07:10:29 +01:00
|
|
|
}
|
|
|
|
|
2009-09-07 12:22:53 +02:00
|
|
|
/****************************************************************//**
|
2013-12-16 15:38:05 +01:00
|
|
|
Resurrect the table locks for a resurrected transaction. */
|
|
|
|
static
|
2008-12-01 07:10:29 +01:00
|
|
|
void
|
2013-12-16 15:38:05 +01:00
|
|
|
trx_resurrect_table_locks(
|
|
|
|
/*======================*/
|
|
|
|
trx_t* trx, /*!< in/out: transaction */
|
|
|
|
const trx_undo_t* undo) /*!< in: undo log */
|
2008-12-01 07:10:29 +01:00
|
|
|
{
|
2013-12-16 15:38:05 +01:00
|
|
|
mtr_t mtr;
|
|
|
|
page_t* undo_page;
|
|
|
|
trx_undo_rec_t* undo_rec;
|
|
|
|
table_id_set tables;
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
ut_ad(undo == trx->insert_undo || undo == trx->update_undo);
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
if (trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY)
|
|
|
|
|| undo->empty) {
|
|
|
|
return;
|
|
|
|
}
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
mtr_start(&mtr);
|
|
|
|
/* trx_rseg_mem_create() may have acquired an X-latch on this
|
|
|
|
page, so we cannot acquire an S-latch. */
|
|
|
|
undo_page = trx_undo_page_get(
|
|
|
|
undo->space, undo->zip_size, undo->top_page_no, &mtr);
|
|
|
|
undo_rec = undo_page + undo->top_offset;
|
|
|
|
|
|
|
|
do {
|
|
|
|
ulint type;
|
|
|
|
ulint cmpl_info;
|
|
|
|
bool updated_extern;
|
|
|
|
undo_no_t undo_no;
|
|
|
|
table_id_t table_id;
|
|
|
|
|
|
|
|
page_t* undo_rec_page = page_align(undo_rec);
|
|
|
|
|
|
|
|
if (undo_rec_page != undo_page) {
|
|
|
|
if (!mtr_memo_release(&mtr,
|
|
|
|
buf_block_align(undo_page),
|
|
|
|
MTR_MEMO_PAGE_X_FIX)) {
|
|
|
|
/* The page of the previous undo_rec
|
|
|
|
should have been latched by
|
|
|
|
trx_undo_page_get() or
|
|
|
|
trx_undo_get_prev_rec(). */
|
|
|
|
ut_ad(0);
|
|
|
|
}
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
undo_page = undo_rec_page;
|
|
|
|
}
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
trx_undo_rec_get_pars(
|
|
|
|
undo_rec, &type, &cmpl_info,
|
|
|
|
&updated_extern, &undo_no, &table_id);
|
|
|
|
tables.insert(table_id);
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
undo_rec = trx_undo_get_prev_rec(
|
|
|
|
undo_rec, undo->hdr_page_no,
|
|
|
|
undo->hdr_offset, false, &mtr);
|
|
|
|
} while (undo_rec);
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
mtr_commit(&mtr);
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
for (table_id_set::const_iterator i = tables.begin();
|
|
|
|
i != tables.end(); i++) {
|
|
|
|
if (dict_table_t* table = dict_table_open_on_id(
|
|
|
|
*i, FALSE, DICT_TABLE_OP_LOAD_TABLESPACE)) {
|
|
|
|
if (table->ibd_file_missing
|
|
|
|
|| dict_table_is_temporary(table)) {
|
|
|
|
mutex_enter(&dict_sys->mutex);
|
|
|
|
dict_table_close(table, TRUE, FALSE);
|
|
|
|
dict_table_remove_from_cache(table);
|
|
|
|
mutex_exit(&dict_sys->mutex);
|
|
|
|
continue;
|
|
|
|
}
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
lock_table_ix_resurrect(table, trx);
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
DBUG_PRINT("ib_trx",
|
|
|
|
("resurrect" TRX_ID_FMT
|
|
|
|
" table '%s' IX lock from %s undo",
|
|
|
|
trx->id, table->name,
|
|
|
|
undo == trx->insert_undo
|
|
|
|
? "insert" : "update"));
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
dict_table_close(table, FALSE, FALSE);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
/****************************************************************//**
|
|
|
|
Resurrect the transactions that were doing inserts the time of the
|
|
|
|
crash, they need to be undone.
|
|
|
|
@return trx_t instance */
|
|
|
|
static
|
|
|
|
trx_t*
|
|
|
|
trx_resurrect_insert(
|
|
|
|
/*=================*/
|
|
|
|
trx_undo_t* undo, /*!< in: entry to UNDO */
|
|
|
|
trx_rseg_t* rseg) /*!< in: rollback segment */
|
|
|
|
{
|
|
|
|
trx_t* trx;
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
trx = trx_allocate_for_background();
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
trx->rseg = rseg;
|
|
|
|
trx->xid = undo->xid;
|
|
|
|
trx->id = undo->trx_id;
|
|
|
|
trx->insert_undo = undo;
|
|
|
|
trx->is_recovered = TRUE;
|
2013-05-08 09:52:54 +02:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
/* This is single-threaded startup code, we do not need the
|
|
|
|
protection of trx->mutex or trx_sys->mutex here. */
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
if (undo->state != TRX_UNDO_ACTIVE) {
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
/* Prepared transactions are left in the prepared state
|
|
|
|
waiting for a commit or abort decision from MySQL */
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
if (undo->state == TRX_UNDO_PREPARED) {
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
fprintf(stderr,
|
|
|
|
"InnoDB: Transaction " TRX_ID_FMT " was in the"
|
|
|
|
" XA prepared state.\n", trx->id);
|
2013-05-08 09:52:54 +02:00
|
|
|
|
2017-08-23 12:03:13 +02:00
|
|
|
trx->state = TRX_STATE_PREPARED;
|
|
|
|
trx_sys->n_prepared_trx++;
|
|
|
|
trx_sys->n_prepared_recovered_trx++;
|
2013-12-16 15:38:05 +01:00
|
|
|
} else {
|
|
|
|
trx->state = TRX_STATE_COMMITTED_IN_MEMORY;
|
|
|
|
}
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
/* We give a dummy value for the trx no; this should have no
|
|
|
|
relevance since purge is not interested in committed
|
|
|
|
transaction numbers, unless they are in the history
|
|
|
|
list, in which case it looks the number from the disk based
|
|
|
|
undo log structure */
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
trx->no = trx->id;
|
|
|
|
} else {
|
|
|
|
trx->state = TRX_STATE_ACTIVE;
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
/* A running transaction always has the number
|
|
|
|
field inited to TRX_ID_MAX */
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
trx->no = TRX_ID_MAX;
|
|
|
|
}
|
2013-05-08 09:52:54 +02:00
|
|
|
|
2014-05-05 18:16:30 +02:00
|
|
|
/* trx_start_low() is not called with resurrect, so need to initialize
|
|
|
|
start time here.*/
|
|
|
|
if (trx->state == TRX_STATE_ACTIVE
|
|
|
|
|| trx->state == TRX_STATE_PREPARED) {
|
|
|
|
trx->start_time = ut_time();
|
|
|
|
}
|
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
if (undo->dict_operation) {
|
|
|
|
trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
|
|
|
|
trx->table_id = undo->table_id;
|
|
|
|
}
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
if (!undo->empty) {
|
|
|
|
trx->undo_no = undo->top_undo_no + 1;
|
|
|
|
}
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
return(trx);
|
|
|
|
}
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
/****************************************************************//**
|
|
|
|
Prepared transactions are left in the prepared state waiting for a
|
|
|
|
commit or abort decision from MySQL */
|
|
|
|
static
|
|
|
|
void
|
|
|
|
trx_resurrect_update_in_prepared_state(
|
|
|
|
/*===================================*/
|
|
|
|
trx_t* trx, /*!< in,out: transaction */
|
|
|
|
const trx_undo_t* undo) /*!< in: update UNDO record */
|
|
|
|
{
|
|
|
|
/* This is single-threaded startup code, we do not need the
|
|
|
|
protection of trx->mutex or trx_sys->mutex here. */
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
if (undo->state == TRX_UNDO_PREPARED) {
|
|
|
|
fprintf(stderr,
|
|
|
|
"InnoDB: Transaction " TRX_ID_FMT
|
|
|
|
" was in the XA prepared state.\n", trx->id);
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2017-08-23 12:03:13 +02:00
|
|
|
if (trx_state_eq(trx, TRX_STATE_NOT_STARTED)) {
|
|
|
|
trx_sys->n_prepared_trx++;
|
|
|
|
trx_sys->n_prepared_recovered_trx++;
|
2013-12-16 15:38:05 +01:00
|
|
|
} else {
|
2017-08-23 12:03:13 +02:00
|
|
|
ut_ad(trx_state_eq(trx, TRX_STATE_PREPARED));
|
2008-12-01 07:10:29 +01:00
|
|
|
}
|
2017-08-23 12:03:13 +02:00
|
|
|
|
|
|
|
trx->state = TRX_STATE_PREPARED;
|
2013-12-16 15:38:05 +01:00
|
|
|
} else {
|
|
|
|
trx->state = TRX_STATE_COMMITTED_IN_MEMORY;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/****************************************************************//**
|
|
|
|
Resurrect the transactions that were doing updates the time of the
|
|
|
|
crash, they need to be undone. */
|
|
|
|
static
|
|
|
|
void
|
|
|
|
trx_resurrect_update(
|
|
|
|
/*=================*/
|
|
|
|
trx_t* trx, /*!< in/out: transaction */
|
|
|
|
trx_undo_t* undo, /*!< in/out: update UNDO record */
|
|
|
|
trx_rseg_t* rseg) /*!< in/out: rollback segment */
|
|
|
|
{
|
|
|
|
trx->rseg = rseg;
|
|
|
|
trx->xid = undo->xid;
|
|
|
|
trx->id = undo->trx_id;
|
|
|
|
trx->update_undo = undo;
|
|
|
|
trx->is_recovered = TRUE;
|
|
|
|
|
|
|
|
/* This is single-threaded startup code, we do not need the
|
|
|
|
protection of trx->mutex or trx_sys->mutex here. */
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
if (undo->state != TRX_UNDO_ACTIVE) {
|
|
|
|
trx_resurrect_update_in_prepared_state(trx, undo);
|
|
|
|
|
|
|
|
/* We give a dummy value for the trx number */
|
|
|
|
|
|
|
|
trx->no = trx->id;
|
|
|
|
|
|
|
|
} else {
|
|
|
|
trx->state = TRX_STATE_ACTIVE;
|
|
|
|
|
|
|
|
/* A running transaction always has the number field inited to
|
|
|
|
TRX_ID_MAX */
|
|
|
|
|
|
|
|
trx->no = TRX_ID_MAX;
|
|
|
|
}
|
|
|
|
|
2014-05-05 18:16:30 +02:00
|
|
|
/* trx_start_low() is not called with resurrect, so need to initialize
|
|
|
|
start time here.*/
|
|
|
|
if (trx->state == TRX_STATE_ACTIVE
|
|
|
|
|| trx->state == TRX_STATE_PREPARED) {
|
|
|
|
trx->start_time = ut_time();
|
|
|
|
}
|
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
if (undo->dict_operation) {
|
|
|
|
trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
|
|
|
|
trx->table_id = undo->table_id;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!undo->empty && undo->top_undo_no >= trx->undo_no) {
|
|
|
|
|
|
|
|
trx->undo_no = undo->top_undo_no + 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/****************************************************************//**
|
|
|
|
Creates trx objects for transactions and initializes the trx list of
|
|
|
|
trx_sys at database start. Rollback segment and undo log lists must
|
|
|
|
already exist when this function is called, because the lists of
|
|
|
|
transactions to be rolled back or cleaned up are built based on the
|
|
|
|
undo log lists. */
|
|
|
|
UNIV_INTERN
|
|
|
|
void
|
|
|
|
trx_lists_init_at_db_start(void)
|
|
|
|
/*============================*/
|
|
|
|
{
|
|
|
|
ulint i;
|
|
|
|
|
|
|
|
ut_a(srv_is_being_started);
|
|
|
|
|
|
|
|
UT_LIST_INIT(trx_sys->ro_trx_list);
|
|
|
|
UT_LIST_INIT(trx_sys->rw_trx_list);
|
|
|
|
UT_LIST_INIT(trx_sys->trx_serial_list);
|
|
|
|
|
|
|
|
/* Look from the rollback segments if there exist undo logs for
|
|
|
|
transactions */
|
|
|
|
|
|
|
|
for (i = 0; i < TRX_SYS_N_RSEGS; ++i) {
|
|
|
|
trx_undo_t* undo;
|
|
|
|
trx_rseg_t* rseg;
|
|
|
|
|
|
|
|
rseg = trx_sys->rseg_array[i];
|
|
|
|
|
|
|
|
if (rseg == NULL) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Resurrect transactions that were doing inserts. */
|
|
|
|
for (undo = UT_LIST_GET_FIRST(rseg->insert_undo_list);
|
|
|
|
undo != NULL;
|
|
|
|
undo = UT_LIST_GET_NEXT(undo_list, undo)) {
|
|
|
|
trx_t* trx;
|
|
|
|
|
|
|
|
trx = trx_resurrect_insert(undo, rseg);
|
|
|
|
|
|
|
|
if (trx->state == TRX_STATE_ACTIVE ||
|
|
|
|
trx->state == TRX_STATE_PREPARED) {
|
|
|
|
|
|
|
|
trx_reserve_descriptor(trx);
|
|
|
|
}
|
|
|
|
trx_list_rw_insert_ordered(trx);
|
|
|
|
|
|
|
|
trx_resurrect_table_locks(trx, undo);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Ressurrect transactions that were doing updates. */
|
|
|
|
for (undo = UT_LIST_GET_FIRST(rseg->update_undo_list);
|
|
|
|
undo != NULL;
|
|
|
|
undo = UT_LIST_GET_NEXT(undo_list, undo)) {
|
|
|
|
trx_t* trx;
|
|
|
|
ibool trx_created;
|
|
|
|
|
|
|
|
/* Check the trx_sys->rw_trx_list first. */
|
|
|
|
mutex_enter(&trx_sys->mutex);
|
|
|
|
trx = trx_get_rw_trx_by_id(undo->trx_id);
|
|
|
|
mutex_exit(&trx_sys->mutex);
|
|
|
|
|
|
|
|
if (trx == NULL) {
|
|
|
|
trx = trx_allocate_for_background();
|
|
|
|
trx_created = TRUE;
|
|
|
|
} else {
|
|
|
|
trx_created = FALSE;
|
|
|
|
}
|
|
|
|
|
|
|
|
trx_resurrect_update(trx, undo, rseg);
|
|
|
|
|
|
|
|
if (trx_created) {
|
|
|
|
if (trx->state == TRX_STATE_ACTIVE ||
|
|
|
|
trx->state == TRX_STATE_PREPARED) {
|
|
|
|
|
|
|
|
trx_reserve_descriptor(trx);
|
|
|
|
}
|
|
|
|
trx_list_rw_insert_ordered(trx);
|
|
|
|
}
|
|
|
|
|
|
|
|
trx_resurrect_table_locks(trx, undo);
|
|
|
|
}
|
2008-12-01 07:10:29 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2009-09-07 12:22:53 +02:00
|
|
|
/******************************************************************//**
|
2008-12-01 07:10:29 +01:00
|
|
|
Assigns a rollback segment to a transaction in a round-robin fashion.
|
2011-07-14 21:22:41 +02:00
|
|
|
@return assigned rollback segment instance */
|
2013-12-16 15:38:05 +01:00
|
|
|
static
|
2011-07-14 21:22:41 +02:00
|
|
|
trx_rseg_t*
|
2013-12-16 15:38:05 +01:00
|
|
|
trx_assign_rseg_low(
|
|
|
|
/*================*/
|
|
|
|
ulong max_undo_logs, /*!< in: maximum number of UNDO logs to use */
|
|
|
|
ulint n_tablespaces) /*!< in: number of rollback tablespaces */
|
2008-12-01 07:10:29 +01:00
|
|
|
{
|
2013-12-16 15:38:05 +01:00
|
|
|
ulint i;
|
|
|
|
trx_rseg_t* rseg;
|
|
|
|
static ulint latest_rseg = 0;
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2014-02-26 16:25:11 +01:00
|
|
|
if (srv_read_only_mode) {
|
2013-12-16 15:38:05 +01:00
|
|
|
ut_a(max_undo_logs == ULONG_UNDEFINED);
|
|
|
|
return(NULL);
|
|
|
|
}
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
/* This breaks true round robin but that should be OK. */
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
ut_a(max_undo_logs > 0 && max_undo_logs <= TRX_SYS_N_RSEGS);
|
|
|
|
|
|
|
|
i = latest_rseg++;
|
|
|
|
i %= max_undo_logs;
|
|
|
|
|
|
|
|
/* Note: The assumption here is that there can't be any gaps in
|
|
|
|
the array. Once we implement more flexible rollback segment
|
|
|
|
management this may not hold. The assertion checks for that case. */
|
|
|
|
|
2014-02-26 16:25:11 +01:00
|
|
|
if (trx_sys->rseg_array[0] == NULL) {
|
|
|
|
return(NULL);
|
|
|
|
}
|
2013-12-16 15:38:05 +01:00
|
|
|
|
|
|
|
/* Skip the system tablespace if we have more than one tablespace
|
|
|
|
defined for rollback segments. We want all UNDO records to be in
|
|
|
|
the non-system tablespaces. */
|
|
|
|
|
|
|
|
do {
|
|
|
|
rseg = trx_sys->rseg_array[i];
|
|
|
|
ut_a(rseg == NULL || i == rseg->id);
|
|
|
|
|
|
|
|
i = (rseg == NULL) ? 0 : i + 1;
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
} while (rseg == NULL
|
|
|
|
|| (rseg->space == 0
|
|
|
|
&& n_tablespaces > 0
|
|
|
|
&& trx_sys->rseg_array[1] != NULL));
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2011-07-14 21:22:41 +02:00
|
|
|
return(rseg);
|
2008-12-01 07:10:29 +01:00
|
|
|
}
|
|
|
|
|
2009-09-07 12:22:53 +02:00
|
|
|
/****************************************************************//**
|
2013-12-16 15:38:05 +01:00
|
|
|
Assign a read-only transaction a rollback-segment, if it is attempting
|
|
|
|
to write to a TEMPORARY table. */
|
2008-12-01 07:10:29 +01:00
|
|
|
UNIV_INTERN
|
2013-12-16 15:38:05 +01:00
|
|
|
void
|
|
|
|
trx_assign_rseg(
|
|
|
|
/*============*/
|
|
|
|
trx_t* trx) /*!< A read-only transaction that
|
|
|
|
needs to be assigned a RBS. */
|
|
|
|
{
|
|
|
|
ut_a(trx->rseg == 0);
|
|
|
|
ut_a(trx->read_only);
|
|
|
|
ut_a(!srv_read_only_mode);
|
|
|
|
ut_a(!trx_is_autocommit_non_locking(trx));
|
|
|
|
|
|
|
|
trx->rseg = trx_assign_rseg_low(srv_undo_logs, srv_undo_tablespaces);
|
|
|
|
}
|
|
|
|
|
|
|
|
/****************************************************************//**
|
|
|
|
Starts a transaction. */
|
|
|
|
static
|
|
|
|
void
|
2008-12-01 07:10:29 +01:00
|
|
|
trx_start_low(
|
|
|
|
/*==========*/
|
2013-12-16 15:38:05 +01:00
|
|
|
trx_t* trx) /*!< in: transaction */
|
2008-12-01 07:10:29 +01:00
|
|
|
{
|
|
|
|
ut_ad(trx->rseg == NULL);
|
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
ut_ad(trx->start_file != 0);
|
|
|
|
ut_ad(trx->start_line != 0);
|
|
|
|
ut_ad(!trx->is_recovered);
|
|
|
|
ut_ad(trx_state_eq(trx, TRX_STATE_NOT_STARTED));
|
|
|
|
ut_ad(UT_LIST_GET_LEN(trx->lock.trx_locks) == 0);
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
/* Check whether it is an AUTOCOMMIT SELECT */
|
2014-02-26 16:25:11 +01:00
|
|
|
trx->auto_commit = (trx->api_trx && trx->api_auto_commit)
|
|
|
|
|| thd_trx_is_auto_commit(trx->mysql_thd);
|
2013-12-16 15:38:05 +01:00
|
|
|
|
|
|
|
trx->read_only =
|
2014-02-26 16:25:11 +01:00
|
|
|
(trx->api_trx && !trx->read_write)
|
|
|
|
|| (!trx->ddl && thd_trx_is_read_only(trx->mysql_thd))
|
2013-12-16 15:38:05 +01:00
|
|
|
|| srv_read_only_mode;
|
|
|
|
|
|
|
|
if (!trx->auto_commit) {
|
|
|
|
++trx->will_lock;
|
|
|
|
} else if (trx->will_lock == 0) {
|
|
|
|
trx->read_only = TRUE;
|
2008-12-01 07:10:29 +01:00
|
|
|
}
|
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
if (!trx->read_only) {
|
|
|
|
trx->rseg = trx_assign_rseg_low(
|
|
|
|
srv_undo_logs, srv_undo_tablespaces);
|
|
|
|
}
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
/* The initial value for trx->no: TRX_ID_MAX is used in
|
|
|
|
read_view_open_now: */
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
trx->no = TRX_ID_MAX;
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
ut_a(ib_vector_is_empty(trx->autoinc_locks));
|
|
|
|
ut_a(ib_vector_is_empty(trx->lock.table_locks));
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
mutex_enter(&trx_sys->mutex);
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
/* If this transaction came from trx_allocate_for_mysql(),
|
|
|
|
trx->in_mysql_trx_list would hold. In that case, the trx->state
|
|
|
|
change must be protected by the trx_sys->mutex, so that
|
|
|
|
lock_print_info_all_transactions() will have a consistent view. */
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
trx->state = TRX_STATE_ACTIVE;
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
trx->id = trx_sys_get_new_trx_id();
|
2013-05-08 09:52:54 +02:00
|
|
|
|
2015-12-13 10:13:18 +01:00
|
|
|
/* Cache the state of fake_changes that transaction will use for
|
|
|
|
lifetime. Any change in session/global fake_changes configuration during
|
|
|
|
lifetime of transaction will not be honored by already started
|
|
|
|
transaction. */
|
|
|
|
trx->fake_changes = thd_fake_changes(trx->mysql_thd);
|
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
ut_ad(!trx->in_rw_trx_list);
|
|
|
|
ut_ad(!trx->in_ro_trx_list);
|
2013-05-08 09:52:54 +02:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
if (trx->read_only) {
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
/* Note: The trx_sys_t::ro_trx_list doesn't really need to
|
|
|
|
be ordered, we should exploit this using a list type that
|
|
|
|
doesn't need a list wide lock to increase concurrency. */
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
if (!trx_is_autocommit_non_locking(trx)) {
|
|
|
|
UT_LIST_ADD_FIRST(trx_list, trx_sys->ro_trx_list, trx);
|
|
|
|
ut_d(trx->in_ro_trx_list = TRUE);
|
|
|
|
}
|
|
|
|
} else {
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
ut_ad(trx->rseg != NULL
|
|
|
|
|| srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO);
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
ut_ad(!trx_is_autocommit_non_locking(trx));
|
|
|
|
UT_LIST_ADD_FIRST(trx_list, trx_sys->rw_trx_list, trx);
|
|
|
|
ut_d(trx->in_rw_trx_list = TRUE);
|
2014-02-26 16:25:11 +01:00
|
|
|
|
|
|
|
#ifdef UNIV_DEBUG
|
|
|
|
if (trx->id > trx_sys->rw_max_trx_id) {
|
|
|
|
trx_sys->rw_max_trx_id = trx->id;
|
|
|
|
}
|
|
|
|
#endif /* UNIV_DEBUG */
|
2013-12-16 15:38:05 +01:00
|
|
|
|
|
|
|
trx_reserve_descriptor(trx);
|
|
|
|
}
|
2009-03-26 07:11:11 +01:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
ut_ad(trx_sys_validate_trx_list());
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
mutex_exit(&trx_sys->mutex);
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
trx->start_time = ut_time();
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
MONITOR_INC(MONITOR_TRX_ACTIVE);
|
2008-12-01 07:10:29 +01:00
|
|
|
}
|
|
|
|
|
2009-09-07 12:22:53 +02:00
|
|
|
/****************************************************************//**
|
2011-07-14 21:22:41 +02:00
|
|
|
Set the transaction serialisation number. */
|
|
|
|
static
|
2008-12-01 07:10:29 +01:00
|
|
|
void
|
2011-07-14 21:22:41 +02:00
|
|
|
trx_serialisation_number_get(
|
|
|
|
/*=========================*/
|
|
|
|
trx_t* trx) /*!< in: transaction */
|
2008-12-01 07:10:29 +01:00
|
|
|
{
|
|
|
|
trx_rseg_t* rseg;
|
2011-07-14 21:22:41 +02:00
|
|
|
|
|
|
|
rseg = trx->rseg;
|
|
|
|
|
|
|
|
ut_ad(mutex_own(&rseg->mutex));
|
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
mutex_enter(&trx_sys->mutex);
|
2011-07-14 21:22:41 +02:00
|
|
|
|
|
|
|
trx->no = trx_sys_get_new_trx_id();
|
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
if (UNIV_LIKELY(!trx->in_trx_serial_list)) {
|
2013-05-08 09:52:54 +02:00
|
|
|
|
|
|
|
UT_LIST_ADD_LAST(trx_serial_list, trx_sys->trx_serial_list,
|
|
|
|
trx);
|
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
trx->in_trx_serial_list = true;
|
2013-05-08 09:52:54 +02:00
|
|
|
}
|
|
|
|
|
2011-07-14 21:22:41 +02:00
|
|
|
/* If the rollack segment is not empty then the
|
|
|
|
new trx_t::no can't be less than any trx_t::no
|
|
|
|
already in the rollback segment. User threads only
|
|
|
|
produce events when a rollback segment is empty. */
|
|
|
|
|
|
|
|
if (rseg->last_page_no == FIL_NULL) {
|
|
|
|
void* ptr;
|
|
|
|
rseg_queue_t rseg_queue;
|
|
|
|
|
|
|
|
rseg_queue.rseg = rseg;
|
|
|
|
rseg_queue.trx_no = trx->no;
|
|
|
|
|
|
|
|
mutex_enter(&purge_sys->bh_mutex);
|
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
/* This is to reduce the pressure on the trx_sys_t::mutex
|
2011-07-14 21:22:41 +02:00
|
|
|
though in reality it should make very little (read no)
|
|
|
|
difference because this code path is only taken when the
|
|
|
|
rbs is empty. */
|
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
mutex_exit(&trx_sys->mutex);
|
2011-07-14 21:22:41 +02:00
|
|
|
|
|
|
|
ptr = ib_bh_push(purge_sys->ib_bh, &rseg_queue);
|
|
|
|
ut_a(ptr);
|
|
|
|
|
|
|
|
mutex_exit(&purge_sys->bh_mutex);
|
|
|
|
} else {
|
2013-12-16 15:38:05 +01:00
|
|
|
mutex_exit(&trx_sys->mutex);
|
2011-07-14 21:22:41 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/****************************************************************//**
|
|
|
|
Assign the transaction its history serialisation number and write the
|
2013-12-16 15:38:05 +01:00
|
|
|
update UNDO log record to the assigned rollback segment. */
|
2016-08-10 19:24:58 +02:00
|
|
|
static MY_ATTRIBUTE((nonnull))
|
2013-12-16 15:38:05 +01:00
|
|
|
void
|
2011-07-14 21:22:41 +02:00
|
|
|
trx_write_serialisation_history(
|
|
|
|
/*============================*/
|
2013-12-16 15:38:05 +01:00
|
|
|
trx_t* trx, /*!< in/out: transaction */
|
|
|
|
mtr_t* mtr) /*!< in/out: mini-transaction */
|
2011-07-14 21:22:41 +02:00
|
|
|
{
|
|
|
|
trx_rseg_t* rseg;
|
2008-12-01 07:10:29 +01:00
|
|
|
|
|
|
|
rseg = trx->rseg;
|
|
|
|
|
2011-07-14 21:22:41 +02:00
|
|
|
/* Change the undo log segment states from TRX_UNDO_ACTIVE
|
|
|
|
to some other state: these modifications to the file data
|
|
|
|
structure define the transaction as committed in the file
|
|
|
|
based domain, at the serialization point of the log sequence
|
|
|
|
number lsn obtained below. */
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2011-07-14 21:22:41 +02:00
|
|
|
if (trx->update_undo != NULL) {
|
|
|
|
page_t* undo_hdr_page;
|
|
|
|
trx_undo_t* undo = trx->update_undo;
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2011-07-14 21:22:41 +02:00
|
|
|
/* We have to hold the rseg mutex because update
|
|
|
|
log headers have to be put to the history list in the
|
|
|
|
(serialisation) order of the UNDO trx number. This is
|
|
|
|
required for the purge in-memory data structures too. */
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2011-07-14 21:22:41 +02:00
|
|
|
mutex_enter(&rseg->mutex);
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2011-07-14 21:22:41 +02:00
|
|
|
/* Assign the transaction serialisation number and also
|
|
|
|
update the purge min binary heap if this is the first
|
|
|
|
UNDO log being written to the assigned rollback segment. */
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2011-07-14 21:22:41 +02:00
|
|
|
trx_serialisation_number_get(trx);
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2011-07-14 21:22:41 +02:00
|
|
|
/* It is not necessary to obtain trx->undo_mutex here
|
|
|
|
because only a single OS thread is allowed to do the
|
|
|
|
transaction commit for this transaction. */
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
undo_hdr_page = trx_undo_set_state_at_finish(undo, mtr);
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
trx_undo_update_cleanup(trx, undo_hdr_page, mtr);
|
2011-07-14 21:22:41 +02:00
|
|
|
} else {
|
|
|
|
mutex_enter(&rseg->mutex);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (trx->insert_undo != NULL) {
|
2013-12-16 15:38:05 +01:00
|
|
|
trx_undo_set_state_at_finish(trx->insert_undo, mtr);
|
2011-07-14 21:22:41 +02:00
|
|
|
}
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2011-07-14 21:22:41 +02:00
|
|
|
mutex_exit(&rseg->mutex);
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
MONITOR_INC(MONITOR_TRX_COMMIT_UNDO);
|
|
|
|
|
2011-07-14 21:22:41 +02:00
|
|
|
/* Update the latest MySQL binlog name and offset info
|
|
|
|
in trx sys header if MySQL binlogging is on or the database
|
|
|
|
server is a MySQL replication slave */
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2011-07-14 21:22:41 +02:00
|
|
|
if (trx->mysql_log_file_name
|
|
|
|
&& trx->mysql_log_file_name[0] != '\0') {
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2011-07-14 21:22:41 +02:00
|
|
|
trx_sys_update_mysql_binlog_offset(
|
|
|
|
trx->mysql_log_file_name,
|
|
|
|
trx->mysql_log_offset,
|
2013-12-16 15:38:05 +01:00
|
|
|
TRX_SYS_MYSQL_LOG_INFO, mtr);
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2011-07-14 21:22:41 +02:00
|
|
|
trx->mysql_log_file_name = NULL;
|
|
|
|
}
|
2013-12-16 15:38:05 +01:00
|
|
|
}
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
/********************************************************************
|
|
|
|
Finalize a transaction containing updates for a FTS table. */
|
2016-08-10 19:24:58 +02:00
|
|
|
static MY_ATTRIBUTE((nonnull))
|
2013-12-16 15:38:05 +01:00
|
|
|
void
|
|
|
|
trx_finalize_for_fts_table(
|
|
|
|
/*=======================*/
|
|
|
|
fts_trx_table_t* ftt) /* in: FTS trx table */
|
|
|
|
{
|
|
|
|
fts_t* fts = ftt->table->fts;
|
|
|
|
fts_doc_ids_t* doc_ids = ftt->added_doc_ids;
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
mutex_enter(&fts->bg_threads_mutex);
|
2011-07-14 21:22:41 +02:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
if (fts->fts_status & BG_THREAD_STOP) {
|
|
|
|
/* The table is about to be dropped, no use
|
|
|
|
adding anything to its work queue. */
|
2011-07-14 21:22:41 +02:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
mutex_exit(&fts->bg_threads_mutex);
|
|
|
|
} else {
|
|
|
|
mem_heap_t* heap;
|
|
|
|
mutex_exit(&fts->bg_threads_mutex);
|
2013-07-16 14:55:47 +02:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
ut_a(fts->add_wq);
|
2011-07-14 21:22:41 +02:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
heap = static_cast<mem_heap_t*>(doc_ids->self_heap->arg);
|
|
|
|
|
|
|
|
ib_wqueue_add(fts->add_wq, doc_ids, heap);
|
|
|
|
|
|
|
|
/* fts_trx_table_t no longer owns the list. */
|
|
|
|
ftt->added_doc_ids = NULL;
|
|
|
|
}
|
2011-07-14 21:22:41 +02:00
|
|
|
}
|
2009-04-07 22:51:15 +02:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
/******************************************************************//**
|
|
|
|
Finalize a transaction containing updates to FTS tables. */
|
2016-08-10 19:24:58 +02:00
|
|
|
static MY_ATTRIBUTE((nonnull))
|
2011-07-14 21:22:41 +02:00
|
|
|
void
|
2013-12-16 15:38:05 +01:00
|
|
|
trx_finalize_for_fts(
|
|
|
|
/*=================*/
|
|
|
|
trx_t* trx, /*!< in/out: transaction */
|
|
|
|
bool is_commit) /*!< in: true if the transaction was
|
|
|
|
committed, false if it was rolled back. */
|
|
|
|
{
|
|
|
|
if (is_commit) {
|
|
|
|
const ib_rbt_node_t* node;
|
|
|
|
ib_rbt_t* tables;
|
|
|
|
fts_savepoint_t* savepoint;
|
|
|
|
|
|
|
|
savepoint = static_cast<fts_savepoint_t*>(
|
|
|
|
ib_vector_last(trx->fts_trx->savepoints));
|
|
|
|
|
|
|
|
tables = savepoint->tables;
|
|
|
|
|
|
|
|
for (node = rbt_first(tables);
|
|
|
|
node;
|
|
|
|
node = rbt_next(tables, node)) {
|
|
|
|
fts_trx_table_t** ftt;
|
|
|
|
|
|
|
|
ftt = rbt_value(fts_trx_table_t*, node);
|
|
|
|
|
|
|
|
if ((*ftt)->added_doc_ids) {
|
|
|
|
trx_finalize_for_fts_table(*ftt);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
fts_trx_free(trx->fts_trx);
|
|
|
|
trx->fts_trx = NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**********************************************************************//**
|
|
|
|
If required, flushes the log to disk based on the value of
|
|
|
|
innodb_flush_log_at_trx_commit. */
|
|
|
|
static
|
|
|
|
void
|
|
|
|
trx_flush_log_if_needed_low(
|
|
|
|
/*========================*/
|
|
|
|
lsn_t lsn, /*!< in: lsn up to which logs are to be
|
|
|
|
flushed. */
|
2011-07-14 21:22:41 +02:00
|
|
|
trx_t* trx) /*!< in: transaction */
|
|
|
|
{
|
2013-12-16 15:38:05 +01:00
|
|
|
ulint flush_log_at_trx_commit;
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
flush_log_at_trx_commit = srv_use_global_flush_log_at_trx_commit
|
|
|
|
? thd_flush_log_at_trx_commit(NULL)
|
|
|
|
: thd_flush_log_at_trx_commit(trx->mysql_thd);
|
2011-07-14 21:22:41 +02:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
switch (flush_log_at_trx_commit) {
|
|
|
|
case 0:
|
|
|
|
/* Do nothing */
|
|
|
|
break;
|
|
|
|
case 1:
|
2013-12-22 17:06:50 +01:00
|
|
|
case 3:
|
2013-12-16 15:38:05 +01:00
|
|
|
/* Write the log and optionally flush it to disk */
|
|
|
|
log_write_up_to(lsn, LOG_WAIT_ONE_GROUP,
|
|
|
|
srv_unix_file_flush_method != SRV_UNIX_NOSYNC);
|
|
|
|
break;
|
|
|
|
case 2:
|
|
|
|
/* Write the log but do not flush it to disk */
|
|
|
|
log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE);
|
|
|
|
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
ut_error;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/**********************************************************************//**
|
|
|
|
If required, flushes the log to disk based on the value of
|
|
|
|
innodb_flush_log_at_trx_commit. */
|
2016-08-10 19:24:58 +02:00
|
|
|
static MY_ATTRIBUTE((nonnull))
|
2013-12-16 15:38:05 +01:00
|
|
|
void
|
|
|
|
trx_flush_log_if_needed(
|
|
|
|
/*====================*/
|
|
|
|
lsn_t lsn, /*!< in: lsn up to which logs are to be
|
|
|
|
flushed. */
|
|
|
|
trx_t* trx) /*!< in/out: transaction */
|
|
|
|
{
|
|
|
|
trx->op_info = "flushing log";
|
|
|
|
trx_flush_log_if_needed_low(lsn, trx);
|
|
|
|
trx->op_info = "";
|
|
|
|
}
|
|
|
|
|
|
|
|
/****************************************************************//**
|
|
|
|
Commits a transaction in memory. */
|
2016-08-10 19:24:58 +02:00
|
|
|
static MY_ATTRIBUTE((nonnull))
|
2013-12-16 15:38:05 +01:00
|
|
|
void
|
|
|
|
trx_commit_in_memory(
|
|
|
|
/*=================*/
|
|
|
|
trx_t* trx, /*!< in/out: transaction */
|
|
|
|
lsn_t lsn) /*!< in: log sequence number of the mini-transaction
|
|
|
|
commit of trx_write_serialisation_history(), or 0
|
|
|
|
if the transaction did not modify anything */
|
|
|
|
{
|
2011-07-14 21:22:41 +02:00
|
|
|
trx->must_flush_log_later = FALSE;
|
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
if (trx_is_autocommit_non_locking(trx)) {
|
|
|
|
ut_ad(trx->read_only);
|
|
|
|
ut_a(!trx->is_recovered);
|
|
|
|
ut_ad(trx->rseg == NULL);
|
|
|
|
ut_ad(!trx->in_ro_trx_list);
|
|
|
|
ut_ad(!trx->in_rw_trx_list);
|
2011-07-14 21:22:41 +02:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
/* Note: We are asserting without holding the lock mutex. But
|
|
|
|
that is OK because this transaction is not waiting and cannot
|
|
|
|
be rolled back and no new locks can (or should not) be added
|
|
|
|
becuase it is flagged as a non-locking read-only transaction. */
|
|
|
|
|
|
|
|
ut_a(UT_LIST_GET_LEN(trx->lock.trx_locks) == 0);
|
|
|
|
|
|
|
|
/* This state change is not protected by any mutex, therefore
|
|
|
|
there is an inherent race here around state transition during
|
|
|
|
printouts. We ignore this race for the sake of efficiency.
|
|
|
|
However, the trx_sys_t::mutex will protect the trx_t instance
|
|
|
|
and it cannot be removed from the mysql_trx_list and freed
|
|
|
|
without first acquiring the trx_sys_t::mutex. */
|
|
|
|
|
|
|
|
ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE));
|
|
|
|
|
|
|
|
trx->state = TRX_STATE_NOT_STARTED;
|
2011-07-14 21:22:41 +02:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
read_view_remove(trx->global_read_view, false);
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
MONITOR_INC(MONITOR_TRX_NL_RO_COMMIT);
|
2011-07-14 21:22:41 +02:00
|
|
|
} else {
|
2013-12-16 15:38:05 +01:00
|
|
|
lock_trx_release_locks(trx);
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
/* Remove the transaction from the list of active
|
|
|
|
transactions now that it no longer holds any user locks. */
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
ut_ad(trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY));
|
2011-07-14 21:22:41 +02:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
mutex_enter(&trx_sys->mutex);
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
assert_trx_in_list(trx);
|
|
|
|
|
|
|
|
if (trx->read_only) {
|
|
|
|
UT_LIST_REMOVE(trx_list, trx_sys->ro_trx_list, trx);
|
|
|
|
ut_d(trx->in_ro_trx_list = FALSE);
|
|
|
|
MONITOR_INC(MONITOR_TRX_RO_COMMIT);
|
|
|
|
} else {
|
|
|
|
UT_LIST_REMOVE(trx_list, trx_sys->rw_trx_list, trx);
|
|
|
|
ut_d(trx->in_rw_trx_list = FALSE);
|
|
|
|
ut_ad(trx_sys->descr_n_used <=
|
|
|
|
UT_LIST_GET_LEN(trx_sys->rw_trx_list));
|
|
|
|
MONITOR_INC(MONITOR_TRX_RW_COMMIT);
|
|
|
|
}
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
/* If this transaction came from trx_allocate_for_mysql(),
|
|
|
|
trx->in_mysql_trx_list would hold. In that case, the
|
|
|
|
trx->state change must be protected by trx_sys->mutex, so that
|
|
|
|
lock_print_info_all_transactions() will have a consistent
|
|
|
|
view. */
|
2008-12-03 06:06:00 +01:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
trx->state = TRX_STATE_NOT_STARTED;
|
2008-12-03 06:06:00 +01:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
/* We already own the trx_sys_t::mutex, by doing it here we
|
|
|
|
avoid a potential context switch later. */
|
|
|
|
read_view_remove(trx->global_read_view, true);
|
|
|
|
|
|
|
|
ut_ad(trx_sys_validate_trx_list());
|
|
|
|
|
|
|
|
mutex_exit(&trx_sys->mutex);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (trx->global_read_view != NULL) {
|
2008-12-01 07:10:29 +01:00
|
|
|
|
|
|
|
trx->global_read_view = NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
trx->read_view = NULL;
|
|
|
|
|
|
|
|
if (lsn) {
|
2010-09-03 17:41:57 +02:00
|
|
|
ulint flush_log_at_trx_commit;
|
2008-12-01 07:10:29 +01:00
|
|
|
|
|
|
|
if (trx->insert_undo != NULL) {
|
|
|
|
|
|
|
|
trx_undo_insert_cleanup(trx);
|
|
|
|
}
|
|
|
|
|
2011-07-14 21:22:41 +02:00
|
|
|
if (srv_use_global_flush_log_at_trx_commit) {
|
|
|
|
flush_log_at_trx_commit = thd_flush_log_at_trx_commit(NULL);
|
2010-09-03 17:41:57 +02:00
|
|
|
} else {
|
2011-07-14 21:22:41 +02:00
|
|
|
flush_log_at_trx_commit = thd_flush_log_at_trx_commit(trx->mysql_thd);
|
2010-09-03 17:41:57 +02:00
|
|
|
}
|
|
|
|
|
2008-12-01 07:10:29 +01:00
|
|
|
/* NOTE that we could possibly make a group commit more
|
|
|
|
efficient here: call os_thread_yield here to allow also other
|
|
|
|
trxs to come to commit! */
|
|
|
|
|
|
|
|
/*-------------------------------------*/
|
|
|
|
|
|
|
|
/* Depending on the my.cnf options, we may now write the log
|
|
|
|
buffer to the log files, making the transaction durable if
|
|
|
|
the OS does not crash. We may also flush the log files to
|
|
|
|
disk, making the transaction durable also at an OS crash or a
|
|
|
|
power outage.
|
|
|
|
|
|
|
|
The idea in InnoDB's group commit is that a group of
|
|
|
|
transactions gather behind a trx doing a physical disk write
|
|
|
|
to log files, and when that physical write has been completed,
|
|
|
|
one of those transactions does a write which commits the whole
|
|
|
|
group. Note that this group commit will only bring benefit if
|
|
|
|
there are > 2 users in the database. Then at least 2 users can
|
|
|
|
gather behind one doing the physical log write to disk.
|
|
|
|
|
2009-09-07 12:22:53 +02:00
|
|
|
If we are calling trx_commit() under prepare_commit_mutex, we
|
2008-12-01 07:10:29 +01:00
|
|
|
will delay possible log write and flush to a separate function
|
|
|
|
trx_commit_complete_for_mysql(), which is only called when the
|
2009-09-07 12:22:53 +02:00
|
|
|
thread has released the mutex. This is to make the
|
|
|
|
group commit algorithm to work. Otherwise, the prepare_commit
|
2008-12-01 07:10:29 +01:00
|
|
|
mutex would serialize all commits and prevent a group of
|
|
|
|
transactions from gathering. */
|
|
|
|
|
|
|
|
if (trx->flush_log_later) {
|
|
|
|
/* Do nothing yet */
|
|
|
|
trx->must_flush_log_later = TRUE;
|
2013-12-16 15:38:05 +01:00
|
|
|
} else if (flush_log_at_trx_commit == 0
|
|
|
|
|| thd_requested_durability(trx->mysql_thd)
|
|
|
|
== HA_IGNORE_DURABILITY) {
|
2008-12-01 07:10:29 +01:00
|
|
|
/* Do nothing */
|
|
|
|
} else {
|
2013-12-16 15:38:05 +01:00
|
|
|
trx_flush_log_if_needed(lsn, trx);
|
2008-12-01 07:10:29 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
trx->commit_lsn = lsn;
|
2014-11-18 18:11:15 +01:00
|
|
|
|
|
|
|
/* Tell server some activity has happened, since the trx
|
|
|
|
does changes something. Background utility threads like
|
|
|
|
master thread, purge thread or page_cleaner thread might
|
|
|
|
have some work to do. */
|
|
|
|
srv_active_wake_master_thread();
|
2008-12-01 07:10:29 +01:00
|
|
|
}
|
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
/* undo_no is non-zero if we're doing the final commit. */
|
|
|
|
bool not_rollback = trx->undo_no != 0;
|
|
|
|
/* Free all savepoints, starting from the first. */
|
|
|
|
trx_named_savept_t* savep = UT_LIST_GET_FIRST(trx->trx_savepoints);
|
|
|
|
trx_roll_savepoints_free(trx, savep);
|
2008-12-01 07:10:29 +01:00
|
|
|
|
|
|
|
trx->rseg = NULL;
|
2011-07-14 21:22:41 +02:00
|
|
|
trx->undo_no = 0;
|
|
|
|
trx->last_sql_stat_start.least_undo_no = 0;
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
trx->ddl = false;
|
|
|
|
#ifdef UNIV_DEBUG
|
|
|
|
ut_ad(trx->start_file != 0);
|
|
|
|
ut_ad(trx->start_line != 0);
|
|
|
|
trx->start_file = 0;
|
|
|
|
trx->start_line = 0;
|
|
|
|
#endif /* UNIV_DEBUG */
|
2013-05-08 09:52:54 +02:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
trx->will_lock = 0;
|
|
|
|
trx->read_only = FALSE;
|
|
|
|
trx->auto_commit = FALSE;
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
if (trx->fts_trx) {
|
|
|
|
trx_finalize_for_fts(trx, not_rollback);
|
|
|
|
}
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
ut_ad(trx->lock.wait_thr == NULL);
|
|
|
|
ut_ad(UT_LIST_GET_LEN(trx->lock.trx_locks) == 0);
|
|
|
|
ut_ad(!trx->in_ro_trx_list);
|
|
|
|
ut_ad(!trx->in_rw_trx_list);
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
trx->dict_operation = TRX_DICT_OP_NONE;
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
trx->error_state = DB_SUCCESS;
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
/* trx->in_mysql_trx_list would hold between
|
|
|
|
trx_allocate_for_mysql() and trx_free_for_mysql(). It does not
|
|
|
|
hold for recovered transactions or system transactions. */
|
2008-12-01 07:10:29 +01:00
|
|
|
}
|
|
|
|
|
2009-09-07 12:22:53 +02:00
|
|
|
/****************************************************************//**
|
2013-12-16 15:38:05 +01:00
|
|
|
Commits a transaction and a mini-transaction. */
|
2008-12-01 07:10:29 +01:00
|
|
|
UNIV_INTERN
|
|
|
|
void
|
2013-12-16 15:38:05 +01:00
|
|
|
trx_commit_low(
|
|
|
|
/*===========*/
|
|
|
|
trx_t* trx, /*!< in/out: transaction */
|
|
|
|
mtr_t* mtr) /*!< in/out: mini-transaction (will be committed),
|
|
|
|
or NULL if trx made no modifications */
|
2008-12-01 07:10:29 +01:00
|
|
|
{
|
2013-12-16 15:38:05 +01:00
|
|
|
lsn_t lsn;
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
assert_trx_nonlocking_or_in_list(trx);
|
|
|
|
ut_ad(!trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY));
|
|
|
|
ut_ad(!mtr || mtr->state == MTR_ACTIVE);
|
|
|
|
ut_ad(!mtr == !(trx->insert_undo || trx->update_undo));
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
/* undo_no is non-zero if we're doing the final commit. */
|
|
|
|
if (trx->fts_trx && trx->undo_no != 0) {
|
|
|
|
dberr_t error;
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
ut_a(!trx_is_autocommit_non_locking(trx));
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
error = fts_commit(trx);
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
/* FTS-FIXME: Temporarily tolerate DB_DUPLICATE_KEY
|
|
|
|
instead of dying. This is a possible scenario if there
|
|
|
|
is a crash between insert to DELETED table committing
|
|
|
|
and transaction committing. The fix would be able to
|
|
|
|
return error from this function */
|
|
|
|
if (error != DB_SUCCESS && error != DB_DUPLICATE_KEY) {
|
|
|
|
/* FTS-FIXME: once we can return values from this
|
|
|
|
function, we should do so and signal an error
|
|
|
|
instead of just dying. */
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
ut_error;
|
2008-12-01 07:10:29 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
if (mtr) {
|
|
|
|
trx_write_serialisation_history(trx, mtr);
|
|
|
|
/* The following call commits the mini-transaction, making the
|
|
|
|
whole transaction committed in the file-based world, at this
|
|
|
|
log sequence number. The transaction becomes 'durable' when
|
|
|
|
we write the log to disk, but in the logical sense the commit
|
|
|
|
in the file-based data structures (undo logs etc.) happens
|
|
|
|
here.
|
|
|
|
|
|
|
|
NOTE that transaction numbers, which are assigned only to
|
|
|
|
transactions with an update undo log, do not necessarily come
|
|
|
|
in exactly the same order as commit lsn's, if the transactions
|
|
|
|
have different rollback segments. To get exactly the same
|
|
|
|
order we should hold the kernel mutex up to this point,
|
|
|
|
adding to the contention of the kernel mutex. However, if
|
|
|
|
a transaction T2 is able to see modifications made by
|
|
|
|
a transaction T1, T2 will always get a bigger transaction
|
|
|
|
number and a bigger commit lsn than T1. */
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
/*--------------*/
|
|
|
|
mtr_commit(mtr);
|
|
|
|
/*--------------*/
|
|
|
|
lsn = mtr->end_lsn;
|
2008-12-01 07:10:29 +01:00
|
|
|
} else {
|
2013-12-16 15:38:05 +01:00
|
|
|
lsn = 0;
|
2008-12-01 07:10:29 +01:00
|
|
|
}
|
2013-12-16 15:38:05 +01:00
|
|
|
|
|
|
|
trx_commit_in_memory(trx, lsn);
|
2008-12-01 07:10:29 +01:00
|
|
|
}
|
|
|
|
|
2009-09-07 12:22:53 +02:00
|
|
|
/****************************************************************//**
|
2013-12-16 15:38:05 +01:00
|
|
|
Commits a transaction. */
|
2008-12-01 07:10:29 +01:00
|
|
|
UNIV_INTERN
|
|
|
|
void
|
2013-12-16 15:38:05 +01:00
|
|
|
trx_commit(
|
|
|
|
/*=======*/
|
|
|
|
trx_t* trx) /*!< in/out: transaction */
|
2008-12-01 07:10:29 +01:00
|
|
|
{
|
2013-12-16 15:38:05 +01:00
|
|
|
mtr_t local_mtr;
|
|
|
|
mtr_t* mtr;
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
if (trx->insert_undo || trx->update_undo) {
|
|
|
|
mtr = &local_mtr;
|
|
|
|
mtr_start(mtr);
|
2008-12-01 07:10:29 +01:00
|
|
|
} else {
|
2013-12-16 15:38:05 +01:00
|
|
|
mtr = NULL;
|
2008-12-01 07:10:29 +01:00
|
|
|
}
|
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
trx_commit_low(trx, mtr);
|
2008-12-01 07:10:29 +01:00
|
|
|
}
|
|
|
|
|
2009-09-07 12:22:53 +02:00
|
|
|
/****************************************************************//**
|
2013-12-16 15:38:05 +01:00
|
|
|
Cleans up a transaction at database startup. The cleanup is needed if
|
|
|
|
the transaction already got to the middle of a commit when the database
|
|
|
|
crashed, and we cannot roll it back. */
|
2008-12-01 07:10:29 +01:00
|
|
|
UNIV_INTERN
|
|
|
|
void
|
2013-12-16 15:38:05 +01:00
|
|
|
trx_cleanup_at_db_startup(
|
|
|
|
/*======================*/
|
|
|
|
trx_t* trx) /*!< in: transaction */
|
2008-12-01 07:10:29 +01:00
|
|
|
{
|
2013-12-16 15:38:05 +01:00
|
|
|
ut_ad(trx->is_recovered);
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
if (trx->insert_undo != NULL) {
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
trx_undo_insert_cleanup(trx);
|
2008-12-01 07:10:29 +01:00
|
|
|
}
|
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
trx->rseg = NULL;
|
|
|
|
trx->undo_no = 0;
|
|
|
|
trx->last_sql_stat_start.least_undo_no = 0;
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
mutex_enter(&trx_sys->mutex);
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
ut_a(!trx->read_only);
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
UT_LIST_REMOVE(trx_list, trx_sys->rw_trx_list, trx);
|
|
|
|
ut_ad(trx_sys->descr_n_used <= UT_LIST_GET_LEN(trx_sys->rw_trx_list));
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
assert_trx_in_rw_list(trx);
|
|
|
|
ut_d(trx->in_rw_trx_list = FALSE);
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
trx->state = TRX_STATE_NOT_STARTED;
|
|
|
|
trx_release_descriptor(trx);
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
mutex_exit(&trx_sys->mutex);
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
/* Change the transaction state without mutex protection, now
|
|
|
|
that it no longer is in the trx_list. Recovered transactions
|
|
|
|
are never placed in the mysql_trx_list. */
|
|
|
|
ut_ad(trx->is_recovered);
|
|
|
|
ut_ad(!trx->in_ro_trx_list);
|
|
|
|
ut_ad(!trx->in_rw_trx_list);
|
|
|
|
ut_ad(!trx->in_mysql_trx_list);
|
2008-12-01 07:10:29 +01:00
|
|
|
}
|
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
/********************************************************************//**
|
|
|
|
Assigns a read view for a consistent read query. All the consistent reads
|
|
|
|
within the same transaction will get the same read view, which is created
|
|
|
|
when this function is first called for a new started transaction.
|
|
|
|
@return consistent read view */
|
2008-12-01 07:10:29 +01:00
|
|
|
UNIV_INTERN
|
2013-12-16 15:38:05 +01:00
|
|
|
read_view_t*
|
|
|
|
trx_assign_read_view(
|
|
|
|
/*=================*/
|
|
|
|
trx_t* trx) /*!< in: active transaction */
|
2008-12-01 07:10:29 +01:00
|
|
|
{
|
2013-12-16 15:38:05 +01:00
|
|
|
ut_ad(trx->state == TRX_STATE_ACTIVE);
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
if (trx->read_view != NULL) {
|
|
|
|
return(trx->read_view);
|
|
|
|
}
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
trx->read_view = read_view_open_now(trx->id, trx->prebuilt_view);
|
|
|
|
trx->global_read_view = trx->read_view;
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
return(trx->read_view);
|
2008-12-01 07:10:29 +01:00
|
|
|
}
|
|
|
|
|
2014-08-06 19:23:35 +02:00
|
|
|
/********************************************************************//**
|
|
|
|
Clones the read view from another transaction. All consistent reads within
|
|
|
|
the receiver transaction will get the same read view as the donor transaction
|
|
|
|
@return read view clone */
|
|
|
|
UNIV_INTERN
|
|
|
|
read_view_t*
|
|
|
|
trx_clone_read_view(
|
|
|
|
/*================*/
|
|
|
|
trx_t* trx, /*!< in: receiver transaction */
|
|
|
|
trx_t* from_trx) /*!< in: donor transaction */
|
|
|
|
{
|
|
|
|
ut_ad(lock_mutex_own());
|
|
|
|
ut_ad(mutex_own(&trx_sys->mutex));
|
|
|
|
ut_ad(trx_mutex_own(from_trx));
|
|
|
|
ut_ad(trx->read_view == NULL);
|
|
|
|
|
|
|
|
if (from_trx->state != TRX_STATE_ACTIVE ||
|
|
|
|
from_trx->read_view == NULL) {
|
|
|
|
|
|
|
|
return(NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
trx->read_view = read_view_clone(from_trx->read_view,
|
|
|
|
trx->prebuilt_view);
|
|
|
|
|
|
|
|
read_view_add(trx->read_view);
|
|
|
|
|
|
|
|
trx->global_read_view = trx->read_view;
|
|
|
|
|
|
|
|
return(trx->read_view);
|
|
|
|
}
|
|
|
|
|
2009-09-07 12:22:53 +02:00
|
|
|
/****************************************************************//**
|
2013-12-16 15:38:05 +01:00
|
|
|
Prepares a transaction for commit/rollback. */
|
2008-12-01 07:10:29 +01:00
|
|
|
UNIV_INTERN
|
|
|
|
void
|
2013-12-16 15:38:05 +01:00
|
|
|
trx_commit_or_rollback_prepare(
|
|
|
|
/*===========================*/
|
|
|
|
trx_t* trx) /*!< in/out: transaction */
|
2008-12-01 07:10:29 +01:00
|
|
|
{
|
2013-12-16 15:38:05 +01:00
|
|
|
/* We are reading trx->state without holding trx_sys->mutex
|
|
|
|
here, because the commit or rollback should be invoked for a
|
|
|
|
running (or recovered prepared) transaction that is associated
|
|
|
|
with the current thread. */
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
switch (trx->state) {
|
|
|
|
case TRX_STATE_NOT_STARTED:
|
|
|
|
trx_start_low(trx);
|
|
|
|
/* fall through */
|
|
|
|
case TRX_STATE_ACTIVE:
|
|
|
|
case TRX_STATE_PREPARED:
|
|
|
|
/* If the trx is in a lock wait state, moves the waiting
|
|
|
|
query thread to the suspended state */
|
|
|
|
|
|
|
|
if (trx->lock.que_state == TRX_QUE_LOCK_WAIT) {
|
|
|
|
|
|
|
|
ulint sec;
|
|
|
|
ulint ms;
|
|
|
|
ib_uint64_t now;
|
|
|
|
|
|
|
|
ut_a(trx->lock.wait_thr != NULL);
|
|
|
|
trx->lock.wait_thr->state = QUE_THR_SUSPENDED;
|
|
|
|
trx->lock.wait_thr = NULL;
|
|
|
|
|
|
|
|
if (UNIV_UNLIKELY(trx->take_stats)) {
|
|
|
|
ut_usectime(&sec, &ms);
|
|
|
|
now = (ib_uint64_t)sec * 1000000 + ms;
|
|
|
|
trx->lock_que_wait_timer
|
|
|
|
+= (ulint)
|
|
|
|
(now - trx->lock_que_wait_ustarted);
|
|
|
|
}
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
trx->lock.que_state = TRX_QUE_RUNNING;
|
|
|
|
}
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
ut_a(trx->lock.n_active_thrs == 1);
|
|
|
|
return;
|
|
|
|
case TRX_STATE_COMMITTED_IN_MEMORY:
|
|
|
|
break;
|
2008-12-01 07:10:29 +01:00
|
|
|
}
|
2013-12-16 15:38:05 +01:00
|
|
|
|
|
|
|
ut_error;
|
2008-12-01 07:10:29 +01:00
|
|
|
}
|
|
|
|
|
2009-09-07 12:22:53 +02:00
|
|
|
/*********************************************************************//**
|
|
|
|
Creates a commit command node struct.
|
|
|
|
@return own: commit node struct */
|
2008-12-01 07:10:29 +01:00
|
|
|
UNIV_INTERN
|
|
|
|
commit_node_t*
|
2013-12-16 15:38:05 +01:00
|
|
|
trx_commit_node_create(
|
|
|
|
/*===================*/
|
2009-09-07 12:22:53 +02:00
|
|
|
mem_heap_t* heap) /*!< in: mem heap where created */
|
2008-12-01 07:10:29 +01:00
|
|
|
{
|
|
|
|
commit_node_t* node;
|
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
node = static_cast<commit_node_t*>(mem_heap_alloc(heap, sizeof(*node)));
|
2008-12-01 07:10:29 +01:00
|
|
|
node->common.type = QUE_NODE_COMMIT;
|
|
|
|
node->state = COMMIT_NODE_SEND;
|
|
|
|
|
|
|
|
return(node);
|
|
|
|
}
|
|
|
|
|
2009-09-07 12:22:53 +02:00
|
|
|
/***********************************************************//**
|
|
|
|
Performs an execution step for a commit type node in a query graph.
|
|
|
|
@return query thread to run next, or NULL */
|
2008-12-01 07:10:29 +01:00
|
|
|
UNIV_INTERN
|
|
|
|
que_thr_t*
|
|
|
|
trx_commit_step(
|
|
|
|
/*============*/
|
2009-09-07 12:22:53 +02:00
|
|
|
que_thr_t* thr) /*!< in: query thread */
|
2008-12-01 07:10:29 +01:00
|
|
|
{
|
|
|
|
commit_node_t* node;
|
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
node = static_cast<commit_node_t*>(thr->run_node);
|
2008-12-01 07:10:29 +01:00
|
|
|
|
|
|
|
ut_ad(que_node_get_type(node) == QUE_NODE_COMMIT);
|
|
|
|
|
|
|
|
if (thr->prev_node == que_node_get_parent(node)) {
|
|
|
|
node->state = COMMIT_NODE_SEND;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (node->state == COMMIT_NODE_SEND) {
|
2013-12-16 15:38:05 +01:00
|
|
|
trx_t* trx;
|
2008-12-01 07:10:29 +01:00
|
|
|
|
|
|
|
node->state = COMMIT_NODE_WAIT;
|
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
trx = thr_get_trx(thr);
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
ut_a(trx->lock.wait_thr == NULL);
|
|
|
|
ut_a(trx->lock.que_state != TRX_QUE_LOCK_WAIT);
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
trx_commit_or_rollback_prepare(trx);
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
trx->lock.que_state = TRX_QUE_COMMITTING;
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
trx_commit(trx);
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
ut_ad(trx->lock.wait_thr == NULL);
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
trx->lock.que_state = TRX_QUE_RUNNING;
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
thr = NULL;
|
|
|
|
} else {
|
|
|
|
ut_ad(node->state == COMMIT_NODE_WAIT);
|
|
|
|
|
|
|
|
node->state = COMMIT_NODE_SEND;
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
thr->run_node = que_node_get_parent(node);
|
|
|
|
}
|
2008-12-01 07:10:29 +01:00
|
|
|
|
|
|
|
return(thr);
|
|
|
|
}
|
|
|
|
|
2009-09-07 12:22:53 +02:00
|
|
|
/**********************************************************************//**
|
|
|
|
Does the transaction commit for MySQL.
|
|
|
|
@return DB_SUCCESS or error number */
|
2008-12-01 07:10:29 +01:00
|
|
|
UNIV_INTERN
|
2013-12-16 15:38:05 +01:00
|
|
|
dberr_t
|
2008-12-01 07:10:29 +01:00
|
|
|
trx_commit_for_mysql(
|
|
|
|
/*=================*/
|
2013-12-16 15:38:05 +01:00
|
|
|
trx_t* trx) /*!< in/out: transaction */
|
2008-12-01 07:10:29 +01:00
|
|
|
{
|
|
|
|
/* Because we do not do the commit by sending an Innobase
|
|
|
|
sig to the transaction, we must here make sure that trx has been
|
|
|
|
started. */
|
|
|
|
|
|
|
|
ut_a(trx);
|
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
switch (trx->state) {
|
|
|
|
case TRX_STATE_NOT_STARTED:
|
|
|
|
/* Update the info whether we should skip XA steps that eat
|
|
|
|
CPU time.
|
|
|
|
|
|
|
|
For the duration of the transaction trx->support_xa is
|
|
|
|
not reread from thd so any changes in the value take
|
|
|
|
effect in the next transaction. This is to avoid a
|
|
|
|
scenario where some undo log records generated by a
|
|
|
|
transaction contain XA information and other undo log
|
|
|
|
records, generated by the same transaction do not. */
|
|
|
|
trx->support_xa = thd_supports_xa(trx->mysql_thd);
|
|
|
|
|
|
|
|
ut_d(trx->start_file = __FILE__);
|
|
|
|
ut_d(trx->start_line = __LINE__);
|
|
|
|
|
|
|
|
trx_start_low(trx);
|
|
|
|
/* fall through */
|
|
|
|
case TRX_STATE_ACTIVE:
|
|
|
|
case TRX_STATE_PREPARED:
|
|
|
|
trx->op_info = "committing";
|
|
|
|
trx_commit(trx);
|
|
|
|
MONITOR_DEC(MONITOR_TRX_ACTIVE);
|
|
|
|
trx->op_info = "";
|
|
|
|
return(DB_SUCCESS);
|
|
|
|
case TRX_STATE_COMMITTED_IN_MEMORY:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
ut_error;
|
|
|
|
return(DB_CORRUPTION);
|
2008-12-01 07:10:29 +01:00
|
|
|
}
|
|
|
|
|
2009-09-07 12:22:53 +02:00
|
|
|
/**********************************************************************//**
|
2008-12-01 07:10:29 +01:00
|
|
|
If required, flushes the log to disk if we called trx_commit_for_mysql()
|
2013-12-16 15:38:05 +01:00
|
|
|
with trx->flush_log_later == TRUE. */
|
2008-12-01 07:10:29 +01:00
|
|
|
UNIV_INTERN
|
2013-12-16 15:38:05 +01:00
|
|
|
void
|
2008-12-01 07:10:29 +01:00
|
|
|
trx_commit_complete_for_mysql(
|
|
|
|
/*==========================*/
|
2013-12-16 15:38:05 +01:00
|
|
|
trx_t* trx) /*!< in/out: transaction */
|
2008-12-01 07:10:29 +01:00
|
|
|
{
|
|
|
|
ut_a(trx);
|
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
if (!trx->must_flush_log_later
|
|
|
|
|| thd_requested_durability(trx->mysql_thd)
|
|
|
|
== HA_IGNORE_DURABILITY) {
|
|
|
|
return;
|
2010-09-03 17:41:57 +02:00
|
|
|
}
|
|
|
|
|
2013-12-22 17:06:50 +01:00
|
|
|
ulint flush_log_at_trx_commit;
|
|
|
|
|
|
|
|
flush_log_at_trx_commit = srv_use_global_flush_log_at_trx_commit
|
|
|
|
? thd_flush_log_at_trx_commit(NULL)
|
|
|
|
: thd_flush_log_at_trx_commit(trx->mysql_thd);
|
|
|
|
|
|
|
|
if (flush_log_at_trx_commit == 1 && trx->active_commit_ordered) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
trx_flush_log_if_needed(trx->commit_lsn, trx);
|
2008-12-01 07:10:29 +01:00
|
|
|
|
|
|
|
trx->must_flush_log_later = FALSE;
|
|
|
|
}
|
|
|
|
|
2009-09-07 12:22:53 +02:00
|
|
|
/**********************************************************************//**
|
2008-12-01 07:10:29 +01:00
|
|
|
Marks the latest SQL statement ended. */
|
|
|
|
UNIV_INTERN
|
|
|
|
void
|
|
|
|
trx_mark_sql_stat_end(
|
|
|
|
/*==================*/
|
2009-09-07 12:22:53 +02:00
|
|
|
trx_t* trx) /*!< in: trx handle */
|
2008-12-01 07:10:29 +01:00
|
|
|
{
|
|
|
|
ut_a(trx);
|
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
switch (trx->state) {
|
|
|
|
case TRX_STATE_PREPARED:
|
|
|
|
case TRX_STATE_COMMITTED_IN_MEMORY:
|
|
|
|
break;
|
|
|
|
case TRX_STATE_NOT_STARTED:
|
2011-07-14 21:22:41 +02:00
|
|
|
trx->undo_no = 0;
|
2013-12-16 15:38:05 +01:00
|
|
|
/* fall through */
|
|
|
|
case TRX_STATE_ACTIVE:
|
|
|
|
trx->last_sql_stat_start.least_undo_no = trx->undo_no;
|
|
|
|
|
|
|
|
if (trx->fts_trx) {
|
|
|
|
fts_savepoint_laststmt_refresh(trx);
|
|
|
|
}
|
|
|
|
|
|
|
|
return;
|
2008-12-01 07:10:29 +01:00
|
|
|
}
|
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
ut_error;
|
2008-12-01 07:10:29 +01:00
|
|
|
}
|
|
|
|
|
2009-09-07 12:22:53 +02:00
|
|
|
/**********************************************************************//**
|
2013-12-16 15:38:05 +01:00
|
|
|
Prints info about a transaction.
|
|
|
|
Caller must hold trx_sys->mutex. */
|
2008-12-01 07:10:29 +01:00
|
|
|
UNIV_INTERN
|
|
|
|
void
|
2013-12-16 15:38:05 +01:00
|
|
|
trx_print_low(
|
|
|
|
/*==========*/
|
|
|
|
FILE* f,
|
|
|
|
/*!< in: output stream */
|
|
|
|
const trx_t* trx,
|
|
|
|
/*!< in: transaction */
|
|
|
|
ulint max_query_len,
|
|
|
|
/*!< in: max query length to print,
|
|
|
|
or 0 to use the default max length */
|
|
|
|
ulint n_rec_locks,
|
|
|
|
/*!< in: lock_number_of_rows_locked(&trx->lock) */
|
|
|
|
ulint n_trx_locks,
|
|
|
|
/*!< in: length of trx->lock.trx_locks */
|
|
|
|
ulint heap_size)
|
|
|
|
/*!< in: mem_heap_get_size(trx->lock.lock_heap) */
|
2008-12-01 07:10:29 +01:00
|
|
|
{
|
2013-12-16 15:38:05 +01:00
|
|
|
ibool newline;
|
|
|
|
const char* op_info;
|
|
|
|
|
|
|
|
ut_ad(mutex_own(&trx_sys->mutex));
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
fprintf(f, "TRANSACTION " TRX_ID_FMT, trx->id);
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
/* trx->state cannot change from or to NOT_STARTED while we
|
|
|
|
are holding the trx_sys->mutex. It may change from ACTIVE to
|
|
|
|
PREPARED or COMMITTED. */
|
2013-05-08 09:52:54 +02:00
|
|
|
switch (trx->state) {
|
2013-12-16 15:38:05 +01:00
|
|
|
case TRX_STATE_NOT_STARTED:
|
2008-12-01 07:10:29 +01:00
|
|
|
fputs(", not started", f);
|
2013-12-16 15:38:05 +01:00
|
|
|
goto state_ok;
|
|
|
|
case TRX_STATE_ACTIVE:
|
2008-12-01 07:10:29 +01:00
|
|
|
fprintf(f, ", ACTIVE %lu sec",
|
2013-12-16 15:38:05 +01:00
|
|
|
(ulong) difftime(time(NULL), trx->start_time));
|
|
|
|
goto state_ok;
|
|
|
|
case TRX_STATE_PREPARED:
|
2008-12-01 07:10:29 +01:00
|
|
|
fprintf(f, ", ACTIVE (PREPARED) %lu sec",
|
2013-12-16 15:38:05 +01:00
|
|
|
(ulong) difftime(time(NULL), trx->start_time));
|
|
|
|
goto state_ok;
|
|
|
|
case TRX_STATE_COMMITTED_IN_MEMORY:
|
2008-12-01 07:10:29 +01:00
|
|
|
fputs(", COMMITTED IN MEMORY", f);
|
2013-12-16 15:38:05 +01:00
|
|
|
goto state_ok;
|
2008-12-01 07:10:29 +01:00
|
|
|
}
|
2013-12-16 15:38:05 +01:00
|
|
|
fprintf(f, ", state %lu", (ulong) trx->state);
|
|
|
|
ut_ad(0);
|
|
|
|
state_ok:
|
|
|
|
|
|
|
|
/* prevent a race condition */
|
|
|
|
op_info = trx->op_info;
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
if (*op_info) {
|
2008-12-01 07:10:29 +01:00
|
|
|
putc(' ', f);
|
2013-12-16 15:38:05 +01:00
|
|
|
fputs(op_info, f);
|
2008-12-01 07:10:29 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
if (trx->is_recovered) {
|
|
|
|
fputs(" recovered trx", f);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (trx->declared_to_be_inside_innodb) {
|
|
|
|
fprintf(f, ", thread declared inside InnoDB %lu",
|
|
|
|
(ulong) trx->n_tickets_to_enter_innodb);
|
|
|
|
}
|
|
|
|
|
|
|
|
putc('\n', f);
|
|
|
|
|
|
|
|
if (trx->n_mysql_tables_in_use > 0 || trx->mysql_n_tables_locked > 0) {
|
|
|
|
fprintf(f, "mysql tables in use %lu, locked %lu\n",
|
|
|
|
(ulong) trx->n_mysql_tables_in_use,
|
|
|
|
(ulong) trx->mysql_n_tables_locked);
|
|
|
|
}
|
|
|
|
|
|
|
|
newline = TRUE;
|
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
/* trx->lock.que_state of an ACTIVE transaction may change
|
|
|
|
while we are not holding trx->mutex. We perform a dirty read
|
|
|
|
for performance reasons. */
|
|
|
|
|
|
|
|
switch (trx->lock.que_state) {
|
2008-12-01 07:10:29 +01:00
|
|
|
case TRX_QUE_RUNNING:
|
|
|
|
newline = FALSE; break;
|
|
|
|
case TRX_QUE_LOCK_WAIT:
|
|
|
|
fputs("LOCK WAIT ", f); break;
|
|
|
|
case TRX_QUE_ROLLING_BACK:
|
|
|
|
fputs("ROLLING BACK ", f); break;
|
|
|
|
case TRX_QUE_COMMITTING:
|
|
|
|
fputs("COMMITTING ", f); break;
|
|
|
|
default:
|
2013-12-16 15:38:05 +01:00
|
|
|
fprintf(f, "que state %lu ", (ulong) trx->lock.que_state);
|
2008-12-01 07:10:29 +01:00
|
|
|
}
|
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
if (n_trx_locks > 0 || heap_size > 400) {
|
2008-12-01 07:10:29 +01:00
|
|
|
newline = TRUE;
|
|
|
|
|
|
|
|
fprintf(f, "%lu lock struct(s), heap size %lu,"
|
|
|
|
" %lu row lock(s)",
|
2013-12-16 15:38:05 +01:00
|
|
|
(ulong) n_trx_locks,
|
|
|
|
(ulong) heap_size,
|
|
|
|
(ulong) n_rec_locks);
|
2008-12-01 07:10:29 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
if (trx->has_search_latch) {
|
|
|
|
newline = TRUE;
|
|
|
|
fputs(", holds adaptive hash latch", f);
|
|
|
|
}
|
|
|
|
|
2011-07-14 21:22:41 +02:00
|
|
|
if (trx->undo_no != 0) {
|
2008-12-01 07:10:29 +01:00
|
|
|
newline = TRUE;
|
2014-08-06 19:23:35 +02:00
|
|
|
fprintf(f, ", undo log entries " TRX_ID_FMT, trx->undo_no);
|
2008-12-01 07:10:29 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
if (newline) {
|
|
|
|
putc('\n', f);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (trx->mysql_thd != NULL) {
|
2014-05-06 21:18:00 +02:00
|
|
|
innobase_mysql_print_thd(
|
|
|
|
f, trx->mysql_thd, static_cast<uint>(max_query_len));
|
2008-12-01 07:10:29 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
/**********************************************************************//**
|
|
|
|
Prints info about a transaction.
|
|
|
|
The caller must hold lock_sys->mutex and trx_sys->mutex.
|
|
|
|
When possible, use trx_print() instead. */
|
|
|
|
UNIV_INTERN
|
|
|
|
void
|
|
|
|
trx_print_latched(
|
|
|
|
/*==============*/
|
|
|
|
FILE* f, /*!< in: output stream */
|
|
|
|
const trx_t* trx, /*!< in: transaction */
|
|
|
|
ulint max_query_len) /*!< in: max query length to print,
|
|
|
|
or 0 to use the default max length */
|
|
|
|
{
|
|
|
|
ut_ad(lock_mutex_own());
|
|
|
|
ut_ad(mutex_own(&trx_sys->mutex));
|
|
|
|
|
|
|
|
trx_print_low(f, trx, max_query_len,
|
|
|
|
lock_number_of_rows_locked(&trx->lock),
|
|
|
|
UT_LIST_GET_LEN(trx->lock.trx_locks),
|
|
|
|
mem_heap_get_size(trx->lock.lock_heap));
|
|
|
|
}
|
|
|
|
|
|
|
|
/**********************************************************************//**
|
|
|
|
Prints info about a transaction.
|
|
|
|
Acquires and releases lock_sys->mutex and trx_sys->mutex. */
|
|
|
|
UNIV_INTERN
|
|
|
|
void
|
|
|
|
trx_print(
|
|
|
|
/*======*/
|
|
|
|
FILE* f, /*!< in: output stream */
|
|
|
|
const trx_t* trx, /*!< in: transaction */
|
|
|
|
ulint max_query_len) /*!< in: max query length to print,
|
|
|
|
or 0 to use the default max length */
|
|
|
|
{
|
|
|
|
ulint n_rec_locks;
|
|
|
|
ulint n_trx_locks;
|
|
|
|
ulint heap_size;
|
|
|
|
|
|
|
|
lock_mutex_enter();
|
|
|
|
n_rec_locks = lock_number_of_rows_locked(&trx->lock);
|
|
|
|
n_trx_locks = UT_LIST_GET_LEN(trx->lock.trx_locks);
|
|
|
|
heap_size = mem_heap_get_size(trx->lock.lock_heap);
|
|
|
|
lock_mutex_exit();
|
|
|
|
|
|
|
|
mutex_enter(&trx_sys->mutex);
|
|
|
|
trx_print_low(f, trx, max_query_len,
|
|
|
|
n_rec_locks, n_trx_locks, heap_size);
|
|
|
|
mutex_exit(&trx_sys->mutex);
|
|
|
|
}
|
|
|
|
|
|
|
|
#ifdef UNIV_DEBUG
|
|
|
|
/**********************************************************************//**
|
|
|
|
Asserts that a transaction has been started.
|
|
|
|
The caller must hold trx_sys->mutex.
|
|
|
|
@return TRUE if started */
|
|
|
|
UNIV_INTERN
|
|
|
|
ibool
|
|
|
|
trx_assert_started(
|
|
|
|
/*===============*/
|
|
|
|
const trx_t* trx) /*!< in: transaction */
|
|
|
|
{
|
|
|
|
ut_ad(mutex_own(&trx_sys->mutex));
|
|
|
|
|
|
|
|
/* Non-locking autocommits should not hold any locks and this
|
|
|
|
function is only called from the locking code. */
|
|
|
|
assert_trx_in_list(trx);
|
|
|
|
|
|
|
|
/* trx->state can change from or to NOT_STARTED while we are holding
|
|
|
|
trx_sys->mutex for non-locking autocommit selects but not for other
|
|
|
|
types of transactions. It may change from ACTIVE to PREPARED. Unless
|
|
|
|
we are holding lock_sys->mutex, it may also change to COMMITTED. */
|
|
|
|
|
|
|
|
switch (trx->state) {
|
|
|
|
case TRX_STATE_PREPARED:
|
|
|
|
return(TRUE);
|
|
|
|
|
|
|
|
case TRX_STATE_ACTIVE:
|
|
|
|
case TRX_STATE_COMMITTED_IN_MEMORY:
|
|
|
|
return(TRUE);
|
|
|
|
|
|
|
|
case TRX_STATE_NOT_STARTED:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
ut_error;
|
|
|
|
return(FALSE);
|
|
|
|
}
|
|
|
|
#endif /* UNIV_DEBUG */
|
|
|
|
|
2009-09-07 12:22:53 +02:00
|
|
|
/*******************************************************************//**
|
2014-07-09 13:02:52 +02:00
|
|
|
Compares the "weight" (or size) of two transactions. The heavier the weight,
|
|
|
|
the more reluctant we will be to choose the transaction as a deadlock victim.
|
2011-07-14 21:22:41 +02:00
|
|
|
@return TRUE if weight(a) >= weight(b) */
|
2008-12-01 07:10:29 +01:00
|
|
|
UNIV_INTERN
|
2011-07-14 21:22:41 +02:00
|
|
|
ibool
|
|
|
|
trx_weight_ge(
|
|
|
|
/*==========*/
|
2009-09-07 12:22:53 +02:00
|
|
|
const trx_t* a, /*!< in: the first transaction to be compared */
|
|
|
|
const trx_t* b) /*!< in: the second transaction to be compared */
|
2008-12-01 07:10:29 +01:00
|
|
|
{
|
MDEV-5262, MDEV-5914, MDEV-5941, MDEV-6020: Deadlocks during parallel
replication causing replication to fail.
Remove the temporary fix for MDEV-5914, which used READ COMMITTED for parallel
replication worker threads. Replace it with a better, more selective solution.
The issue is with certain edge cases of InnoDB gap locks, for example between
INSERT and ranged DELETE. It is possible for the gap lock set by the DELETE to
block the INSERT, if the DELETE runs first, while the record lock set by
INSERT does not block the DELETE, if the INSERT runs first. This can cause a
conflict between the two in parallel replication on the slave even though they
ran without conflicts on the master.
With this patch, InnoDB will ask the server layer about the two involved
transactions before blocking on a gap lock. If the server layer tells InnoDB
that the transactions are already fixed wrt. commit order, as they are in
parallel replication, InnoDB will ignore the gap lock and allow the two
transactions to proceed in parallel, avoiding the conflict.
Improve the fix for MDEV-6020. When InnoDB itself detects a deadlock, it now
asks the server layer for any preferences about which transaction to roll
back. In case of parallel replication with two transactions T1 and T2 fixed to
commit T1 before T2, the server layer will ask InnoDB to roll back T2 as the
deadlock victim, not T1. This helps in some cases to avoid excessive deadlock
rollback, as T2 will in any case need to wait for T1 to complete before it can
itself commit.
Also some misc. fixes found during development and testing:
- Remove thd_rpl_is_parallel(), it is not used or needed.
- Use KILL_CONNECTION instead of KILL_QUERY when a parallel replication
worker thread is killed to resolve a deadlock with fixed commit
ordering. There are some cases, eg. in sql/sql_parse.cc, where a KILL_QUERY
can be ignored if the query otherwise completed successfully, and this
could cause the deadlock kill to be lost, so that the deadlock was not
correctly resolved.
- Fix random test failure due to missing wait_for_binlog_checkpoint.inc.
- Make sure that deadlock or other temporary errors during parallel
replication are not printed to the the error log; there were some places
around the replication code with extra error logging. These conditions can
occur occasionally and are handled automatically without breaking
replication, so they should not pollute the error log.
- Fix handling of rgi->gtid_sub_id. We need to be able to access this also at
the end of a transaction, to be able to detect and resolve deadlocks due to
commit ordering. But this value was also used as a flag to mark whether
record_gtid() had been called, by being set to zero, losing the value. Now,
introduce a separate flag rgi->gtid_pending, so rgi->gtid_sub_id remains
valid for the entire duration of the transaction.
- Fix one place where the code to handle ignored errors called reset_killed()
unconditionally, even if no error was caught that should be ignored. This
could cause loss of a deadlock kill signal, breaking deadlock detection and
resolution.
- Fix a couple of missing mysql_reset_thd_for_next_command(). This could
cause a prior error condition to remain for the next event executed,
causing assertions about errors already being set and possibly giving
incorrect error handling for following event executions.
- Fix code that cleared thd->rgi_slave in the parallel replication worker
threads after each event execution; this caused the deadlock detection and
handling code to not be able to correctly process the associated
transactions as belonging to replication worker threads.
- Remove useless error code in slave_background_kill_request().
- Fix bug where wfc->wakeup_error was not cleared at
wait_for_commit::unregister_wait_for_prior_commit(). This could cause the
error condition to wrongly propagate to a later wait_for_prior_commit(),
causing spurious ER_PRIOR_COMMIT_FAILED errors.
- Do not put the binlog background thread into the processlist. It causes
too many result differences in mtr, but also it probably is not useful
for users to pollute the process list with a system thread that does not
really perform any user-visible tasks...
2014-06-10 10:13:15 +02:00
|
|
|
int pref;
|
|
|
|
|
|
|
|
/* First ask the upper server layer if it has any preference for which
|
|
|
|
to prefer as a deadlock victim. */
|
|
|
|
pref= thd_deadlock_victim_preference(a->mysql_thd, b->mysql_thd);
|
2014-07-09 13:02:52 +02:00
|
|
|
if (pref < 0) {
|
MDEV-5262, MDEV-5914, MDEV-5941, MDEV-6020: Deadlocks during parallel
replication causing replication to fail.
Remove the temporary fix for MDEV-5914, which used READ COMMITTED for parallel
replication worker threads. Replace it with a better, more selective solution.
The issue is with certain edge cases of InnoDB gap locks, for example between
INSERT and ranged DELETE. It is possible for the gap lock set by the DELETE to
block the INSERT, if the DELETE runs first, while the record lock set by
INSERT does not block the DELETE, if the INSERT runs first. This can cause a
conflict between the two in parallel replication on the slave even though they
ran without conflicts on the master.
With this patch, InnoDB will ask the server layer about the two involved
transactions before blocking on a gap lock. If the server layer tells InnoDB
that the transactions are already fixed wrt. commit order, as they are in
parallel replication, InnoDB will ignore the gap lock and allow the two
transactions to proceed in parallel, avoiding the conflict.
Improve the fix for MDEV-6020. When InnoDB itself detects a deadlock, it now
asks the server layer for any preferences about which transaction to roll
back. In case of parallel replication with two transactions T1 and T2 fixed to
commit T1 before T2, the server layer will ask InnoDB to roll back T2 as the
deadlock victim, not T1. This helps in some cases to avoid excessive deadlock
rollback, as T2 will in any case need to wait for T1 to complete before it can
itself commit.
Also some misc. fixes found during development and testing:
- Remove thd_rpl_is_parallel(), it is not used or needed.
- Use KILL_CONNECTION instead of KILL_QUERY when a parallel replication
worker thread is killed to resolve a deadlock with fixed commit
ordering. There are some cases, eg. in sql/sql_parse.cc, where a KILL_QUERY
can be ignored if the query otherwise completed successfully, and this
could cause the deadlock kill to be lost, so that the deadlock was not
correctly resolved.
- Fix random test failure due to missing wait_for_binlog_checkpoint.inc.
- Make sure that deadlock or other temporary errors during parallel
replication are not printed to the the error log; there were some places
around the replication code with extra error logging. These conditions can
occur occasionally and are handled automatically without breaking
replication, so they should not pollute the error log.
- Fix handling of rgi->gtid_sub_id. We need to be able to access this also at
the end of a transaction, to be able to detect and resolve deadlocks due to
commit ordering. But this value was also used as a flag to mark whether
record_gtid() had been called, by being set to zero, losing the value. Now,
introduce a separate flag rgi->gtid_pending, so rgi->gtid_sub_id remains
valid for the entire duration of the transaction.
- Fix one place where the code to handle ignored errors called reset_killed()
unconditionally, even if no error was caught that should be ignored. This
could cause loss of a deadlock kill signal, breaking deadlock detection and
resolution.
- Fix a couple of missing mysql_reset_thd_for_next_command(). This could
cause a prior error condition to remain for the next event executed,
causing assertions about errors already being set and possibly giving
incorrect error handling for following event executions.
- Fix code that cleared thd->rgi_slave in the parallel replication worker
threads after each event execution; this caused the deadlock detection and
handling code to not be able to correctly process the associated
transactions as belonging to replication worker threads.
- Remove useless error code in slave_background_kill_request().
- Fix bug where wfc->wakeup_error was not cleared at
wait_for_commit::unregister_wait_for_prior_commit(). This could cause the
error condition to wrongly propagate to a later wait_for_prior_commit(),
causing spurious ER_PRIOR_COMMIT_FAILED errors.
- Do not put the binlog background thread into the processlist. It causes
too many result differences in mtr, but also it probably is not useful
for users to pollute the process list with a system thread that does not
really perform any user-visible tasks...
2014-06-10 10:13:15 +02:00
|
|
|
return FALSE;
|
2014-07-09 13:02:52 +02:00
|
|
|
} else if (pref > 0) {
|
MDEV-5262, MDEV-5914, MDEV-5941, MDEV-6020: Deadlocks during parallel
replication causing replication to fail.
Remove the temporary fix for MDEV-5914, which used READ COMMITTED for parallel
replication worker threads. Replace it with a better, more selective solution.
The issue is with certain edge cases of InnoDB gap locks, for example between
INSERT and ranged DELETE. It is possible for the gap lock set by the DELETE to
block the INSERT, if the DELETE runs first, while the record lock set by
INSERT does not block the DELETE, if the INSERT runs first. This can cause a
conflict between the two in parallel replication on the slave even though they
ran without conflicts on the master.
With this patch, InnoDB will ask the server layer about the two involved
transactions before blocking on a gap lock. If the server layer tells InnoDB
that the transactions are already fixed wrt. commit order, as they are in
parallel replication, InnoDB will ignore the gap lock and allow the two
transactions to proceed in parallel, avoiding the conflict.
Improve the fix for MDEV-6020. When InnoDB itself detects a deadlock, it now
asks the server layer for any preferences about which transaction to roll
back. In case of parallel replication with two transactions T1 and T2 fixed to
commit T1 before T2, the server layer will ask InnoDB to roll back T2 as the
deadlock victim, not T1. This helps in some cases to avoid excessive deadlock
rollback, as T2 will in any case need to wait for T1 to complete before it can
itself commit.
Also some misc. fixes found during development and testing:
- Remove thd_rpl_is_parallel(), it is not used or needed.
- Use KILL_CONNECTION instead of KILL_QUERY when a parallel replication
worker thread is killed to resolve a deadlock with fixed commit
ordering. There are some cases, eg. in sql/sql_parse.cc, where a KILL_QUERY
can be ignored if the query otherwise completed successfully, and this
could cause the deadlock kill to be lost, so that the deadlock was not
correctly resolved.
- Fix random test failure due to missing wait_for_binlog_checkpoint.inc.
- Make sure that deadlock or other temporary errors during parallel
replication are not printed to the the error log; there were some places
around the replication code with extra error logging. These conditions can
occur occasionally and are handled automatically without breaking
replication, so they should not pollute the error log.
- Fix handling of rgi->gtid_sub_id. We need to be able to access this also at
the end of a transaction, to be able to detect and resolve deadlocks due to
commit ordering. But this value was also used as a flag to mark whether
record_gtid() had been called, by being set to zero, losing the value. Now,
introduce a separate flag rgi->gtid_pending, so rgi->gtid_sub_id remains
valid for the entire duration of the transaction.
- Fix one place where the code to handle ignored errors called reset_killed()
unconditionally, even if no error was caught that should be ignored. This
could cause loss of a deadlock kill signal, breaking deadlock detection and
resolution.
- Fix a couple of missing mysql_reset_thd_for_next_command(). This could
cause a prior error condition to remain for the next event executed,
causing assertions about errors already being set and possibly giving
incorrect error handling for following event executions.
- Fix code that cleared thd->rgi_slave in the parallel replication worker
threads after each event execution; this caused the deadlock detection and
handling code to not be able to correctly process the associated
transactions as belonging to replication worker threads.
- Remove useless error code in slave_background_kill_request().
- Fix bug where wfc->wakeup_error was not cleared at
wait_for_commit::unregister_wait_for_prior_commit(). This could cause the
error condition to wrongly propagate to a later wait_for_prior_commit(),
causing spurious ER_PRIOR_COMMIT_FAILED errors.
- Do not put the binlog background thread into the processlist. It causes
too many result differences in mtr, but also it probably is not useful
for users to pollute the process list with a system thread that does not
really perform any user-visible tasks...
2014-06-10 10:13:15 +02:00
|
|
|
return TRUE;
|
2014-07-09 13:02:52 +02:00
|
|
|
}
|
MDEV-5262, MDEV-5914, MDEV-5941, MDEV-6020: Deadlocks during parallel
replication causing replication to fail.
Remove the temporary fix for MDEV-5914, which used READ COMMITTED for parallel
replication worker threads. Replace it with a better, more selective solution.
The issue is with certain edge cases of InnoDB gap locks, for example between
INSERT and ranged DELETE. It is possible for the gap lock set by the DELETE to
block the INSERT, if the DELETE runs first, while the record lock set by
INSERT does not block the DELETE, if the INSERT runs first. This can cause a
conflict between the two in parallel replication on the slave even though they
ran without conflicts on the master.
With this patch, InnoDB will ask the server layer about the two involved
transactions before blocking on a gap lock. If the server layer tells InnoDB
that the transactions are already fixed wrt. commit order, as they are in
parallel replication, InnoDB will ignore the gap lock and allow the two
transactions to proceed in parallel, avoiding the conflict.
Improve the fix for MDEV-6020. When InnoDB itself detects a deadlock, it now
asks the server layer for any preferences about which transaction to roll
back. In case of parallel replication with two transactions T1 and T2 fixed to
commit T1 before T2, the server layer will ask InnoDB to roll back T2 as the
deadlock victim, not T1. This helps in some cases to avoid excessive deadlock
rollback, as T2 will in any case need to wait for T1 to complete before it can
itself commit.
Also some misc. fixes found during development and testing:
- Remove thd_rpl_is_parallel(), it is not used or needed.
- Use KILL_CONNECTION instead of KILL_QUERY when a parallel replication
worker thread is killed to resolve a deadlock with fixed commit
ordering. There are some cases, eg. in sql/sql_parse.cc, where a KILL_QUERY
can be ignored if the query otherwise completed successfully, and this
could cause the deadlock kill to be lost, so that the deadlock was not
correctly resolved.
- Fix random test failure due to missing wait_for_binlog_checkpoint.inc.
- Make sure that deadlock or other temporary errors during parallel
replication are not printed to the the error log; there were some places
around the replication code with extra error logging. These conditions can
occur occasionally and are handled automatically without breaking
replication, so they should not pollute the error log.
- Fix handling of rgi->gtid_sub_id. We need to be able to access this also at
the end of a transaction, to be able to detect and resolve deadlocks due to
commit ordering. But this value was also used as a flag to mark whether
record_gtid() had been called, by being set to zero, losing the value. Now,
introduce a separate flag rgi->gtid_pending, so rgi->gtid_sub_id remains
valid for the entire duration of the transaction.
- Fix one place where the code to handle ignored errors called reset_killed()
unconditionally, even if no error was caught that should be ignored. This
could cause loss of a deadlock kill signal, breaking deadlock detection and
resolution.
- Fix a couple of missing mysql_reset_thd_for_next_command(). This could
cause a prior error condition to remain for the next event executed,
causing assertions about errors already being set and possibly giving
incorrect error handling for following event executions.
- Fix code that cleared thd->rgi_slave in the parallel replication worker
threads after each event execution; this caused the deadlock detection and
handling code to not be able to correctly process the associated
transactions as belonging to replication worker threads.
- Remove useless error code in slave_background_kill_request().
- Fix bug where wfc->wakeup_error was not cleared at
wait_for_commit::unregister_wait_for_prior_commit(). This could cause the
error condition to wrongly propagate to a later wait_for_prior_commit(),
causing spurious ER_PRIOR_COMMIT_FAILED errors.
- Do not put the binlog background thread into the processlist. It causes
too many result differences in mtr, but also it probably is not useful
for users to pollute the process list with a system thread that does not
really perform any user-visible tasks...
2014-06-10 10:13:15 +02:00
|
|
|
|
|
|
|
/* Upper server layer had no preference, we fall back to comparing the
|
|
|
|
number of altered/locked rows. */
|
2008-12-01 07:10:29 +01:00
|
|
|
|
|
|
|
#if 0
|
|
|
|
fprintf(stderr,
|
|
|
|
"%s TRX_WEIGHT(a): %lld+%lu, TRX_WEIGHT(b): %lld+%lu\n",
|
|
|
|
__func__,
|
2013-12-16 15:38:05 +01:00
|
|
|
a->undo_no, UT_LIST_GET_LEN(a->lock.trx_locks),
|
|
|
|
b->undo_no, UT_LIST_GET_LEN(b->lock.trx_locks));
|
2008-12-01 07:10:29 +01:00
|
|
|
#endif
|
|
|
|
|
2011-07-14 21:22:41 +02:00
|
|
|
return(TRX_WEIGHT(a) >= TRX_WEIGHT(b));
|
2008-12-01 07:10:29 +01:00
|
|
|
}
|
|
|
|
|
2009-09-07 12:22:53 +02:00
|
|
|
/****************************************************************//**
|
2008-12-01 07:10:29 +01:00
|
|
|
Prepares a transaction. */
|
2013-12-16 15:38:05 +01:00
|
|
|
static
|
2008-12-01 07:10:29 +01:00
|
|
|
void
|
2013-12-16 15:38:05 +01:00
|
|
|
trx_prepare(
|
|
|
|
/*========*/
|
|
|
|
trx_t* trx) /*!< in/out: transaction */
|
2008-12-01 07:10:29 +01:00
|
|
|
{
|
|
|
|
trx_rseg_t* rseg;
|
2013-12-16 15:38:05 +01:00
|
|
|
lsn_t lsn;
|
2008-12-01 07:10:29 +01:00
|
|
|
mtr_t mtr;
|
|
|
|
|
|
|
|
rseg = trx->rseg;
|
2013-12-16 15:38:05 +01:00
|
|
|
/* Only fresh user transactions can be prepared.
|
|
|
|
Recovered transactions cannot. */
|
|
|
|
ut_a(!trx->is_recovered);
|
2008-12-01 07:10:29 +01:00
|
|
|
|
|
|
|
if (trx->insert_undo != NULL || trx->update_undo != NULL) {
|
|
|
|
|
|
|
|
mtr_start(&mtr);
|
|
|
|
|
|
|
|
/* Change the undo log segment states from TRX_UNDO_ACTIVE
|
|
|
|
to TRX_UNDO_PREPARED: these modifications to the file data
|
|
|
|
structure define the transaction as prepared in the
|
|
|
|
file-based world, at the serialization point of lsn. */
|
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
mutex_enter(&rseg->mutex);
|
2008-12-01 07:10:29 +01:00
|
|
|
|
|
|
|
if (trx->insert_undo != NULL) {
|
|
|
|
|
|
|
|
/* It is not necessary to obtain trx->undo_mutex here
|
|
|
|
because only a single OS thread is allowed to do the
|
|
|
|
transaction prepare for this transaction. */
|
|
|
|
|
|
|
|
trx_undo_set_state_at_prepare(trx, trx->insert_undo,
|
|
|
|
&mtr);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (trx->update_undo) {
|
2010-11-24 13:04:12 +01:00
|
|
|
trx_undo_set_state_at_prepare(
|
2008-12-01 07:10:29 +01:00
|
|
|
trx, trx->update_undo, &mtr);
|
|
|
|
}
|
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
mutex_exit(&rseg->mutex);
|
2012-11-21 23:24:18 +01:00
|
|
|
|
2008-12-01 07:10:29 +01:00
|
|
|
/*--------------*/
|
|
|
|
mtr_commit(&mtr); /* This mtr commit makes the
|
|
|
|
transaction prepared in the file-based
|
|
|
|
world */
|
|
|
|
/*--------------*/
|
|
|
|
lsn = mtr.end_lsn;
|
2013-12-16 15:38:05 +01:00
|
|
|
ut_ad(lsn);
|
|
|
|
} else {
|
|
|
|
lsn = 0;
|
2008-12-01 07:10:29 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/*--------------------------------------*/
|
2013-12-16 15:38:05 +01:00
|
|
|
ut_a(trx->state == TRX_STATE_ACTIVE);
|
|
|
|
mutex_enter(&trx_sys->mutex);
|
|
|
|
trx->state = TRX_STATE_PREPARED;
|
|
|
|
trx_sys->n_prepared_trx++;
|
|
|
|
mutex_exit(&trx_sys->mutex);
|
2008-12-01 07:10:29 +01:00
|
|
|
/*--------------------------------------*/
|
|
|
|
|
|
|
|
if (lsn) {
|
|
|
|
/* Depending on the my.cnf options, we may now write the log
|
|
|
|
buffer to the log files, making the prepared state of the
|
|
|
|
transaction durable if the OS does not crash. We may also
|
|
|
|
flush the log files to disk, making the prepared state of the
|
|
|
|
transaction durable also at an OS crash or a power outage.
|
|
|
|
|
|
|
|
The idea in InnoDB's group prepare is that a group of
|
|
|
|
transactions gather behind a trx doing a physical disk write
|
|
|
|
to log files, and when that physical write has been completed,
|
|
|
|
one of those transactions does a write which prepares the whole
|
|
|
|
group. Note that this group prepare will only bring benefit if
|
|
|
|
there are > 2 users in the database. Then at least 2 users can
|
|
|
|
gather behind one doing the physical log write to disk.
|
|
|
|
|
|
|
|
TODO: find out if MySQL holds some mutex when calling this.
|
|
|
|
That would spoil our group prepare algorithm. */
|
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
trx_flush_log_if_needed(lsn, trx);
|
2008-12-01 07:10:29 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2009-09-07 12:22:53 +02:00
|
|
|
/**********************************************************************//**
|
2013-12-16 15:38:05 +01:00
|
|
|
Does the transaction prepare for MySQL. */
|
2008-12-01 07:10:29 +01:00
|
|
|
UNIV_INTERN
|
2013-12-16 15:38:05 +01:00
|
|
|
void
|
2008-12-01 07:10:29 +01:00
|
|
|
trx_prepare_for_mysql(
|
|
|
|
/*==================*/
|
2013-12-16 15:38:05 +01:00
|
|
|
trx_t* trx) /*!< in/out: trx handle */
|
2008-12-01 07:10:29 +01:00
|
|
|
{
|
2013-12-16 15:38:05 +01:00
|
|
|
trx_start_if_not_started_xa(trx);
|
2008-12-01 07:10:29 +01:00
|
|
|
|
|
|
|
trx->op_info = "preparing";
|
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
trx_prepare(trx);
|
2008-12-01 07:10:29 +01:00
|
|
|
|
|
|
|
trx->op_info = "";
|
|
|
|
}
|
|
|
|
|
2009-09-07 12:22:53 +02:00
|
|
|
/**********************************************************************//**
|
2008-12-01 07:10:29 +01:00
|
|
|
This function is used to find number of prepared transactions and
|
2009-09-07 12:22:53 +02:00
|
|
|
their transaction objects for a recovery.
|
|
|
|
@return number of prepared transactions stored in xid_list */
|
2008-12-01 07:10:29 +01:00
|
|
|
UNIV_INTERN
|
|
|
|
int
|
|
|
|
trx_recover_for_mysql(
|
|
|
|
/*==================*/
|
2009-09-07 12:22:53 +02:00
|
|
|
XID* xid_list, /*!< in/out: prepared transactions */
|
|
|
|
ulint len) /*!< in: number of slots in xid_list */
|
2008-12-01 07:10:29 +01:00
|
|
|
{
|
2013-12-16 15:38:05 +01:00
|
|
|
const trx_t* trx;
|
|
|
|
ulint count = 0;
|
2008-12-01 07:10:29 +01:00
|
|
|
|
|
|
|
ut_ad(xid_list);
|
|
|
|
ut_ad(len);
|
|
|
|
|
|
|
|
/* We should set those transactions which are in the prepared state
|
|
|
|
to the xid_list */
|
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
mutex_enter(&trx_sys->mutex);
|
|
|
|
|
|
|
|
for (trx = UT_LIST_GET_FIRST(trx_sys->rw_trx_list);
|
|
|
|
trx != NULL;
|
|
|
|
trx = UT_LIST_GET_NEXT(trx_list, trx)) {
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
assert_trx_in_rw_list(trx);
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
/* The state of a read-write transaction cannot change
|
|
|
|
from or to NOT_STARTED while we are holding the
|
|
|
|
trx_sys->mutex. It may change to PREPARED, but not if
|
|
|
|
trx->is_recovered. It may also change to COMMITTED. */
|
|
|
|
if (trx_state_eq(trx, TRX_STATE_PREPARED)) {
|
2008-12-01 07:10:29 +01:00
|
|
|
xid_list[count] = trx->xid;
|
|
|
|
|
|
|
|
if (count == 0) {
|
|
|
|
ut_print_timestamp(stderr);
|
|
|
|
fprintf(stderr,
|
|
|
|
" InnoDB: Starting recovery for"
|
|
|
|
" XA transactions...\n");
|
|
|
|
}
|
|
|
|
|
|
|
|
ut_print_timestamp(stderr);
|
|
|
|
fprintf(stderr,
|
|
|
|
" InnoDB: Transaction " TRX_ID_FMT " in"
|
|
|
|
" prepared state after recovery\n",
|
2013-12-16 15:38:05 +01:00
|
|
|
trx->id);
|
2008-12-01 07:10:29 +01:00
|
|
|
|
|
|
|
ut_print_timestamp(stderr);
|
|
|
|
fprintf(stderr,
|
|
|
|
" InnoDB: Transaction contains changes"
|
2014-08-06 19:23:35 +02:00
|
|
|
" to " TRX_ID_FMT " rows\n",
|
2013-12-16 15:38:05 +01:00
|
|
|
trx->undo_no);
|
2008-12-01 07:10:29 +01:00
|
|
|
|
|
|
|
count++;
|
|
|
|
|
|
|
|
if (count == len) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
mutex_exit(&trx_sys->mutex);
|
2008-12-01 07:10:29 +01:00
|
|
|
|
|
|
|
if (count > 0){
|
|
|
|
ut_print_timestamp(stderr);
|
|
|
|
fprintf(stderr,
|
2013-12-16 15:38:05 +01:00
|
|
|
" InnoDB: %d transactions in prepared state"
|
2008-12-01 07:10:29 +01:00
|
|
|
" after recovery\n",
|
2013-12-16 15:38:05 +01:00
|
|
|
int (count));
|
2008-12-01 07:10:29 +01:00
|
|
|
}
|
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
return(int (count));
|
2008-12-01 07:10:29 +01:00
|
|
|
}
|
|
|
|
|
2009-09-07 12:22:53 +02:00
|
|
|
/*******************************************************************//**
|
2008-12-01 07:10:29 +01:00
|
|
|
This function is used to find one X/Open XA distributed transaction
|
2009-09-07 12:22:53 +02:00
|
|
|
which is in the prepared state
|
2013-12-16 15:38:05 +01:00
|
|
|
@return trx on match, the trx->xid will be invalidated;
|
|
|
|
note that the trx may have been committed, unless the caller is
|
|
|
|
holding lock_sys->mutex */
|
2016-08-10 19:24:58 +02:00
|
|
|
static MY_ATTRIBUTE((nonnull, warn_unused_result))
|
2008-12-01 07:10:29 +01:00
|
|
|
trx_t*
|
2013-12-16 15:38:05 +01:00
|
|
|
trx_get_trx_by_xid_low(
|
|
|
|
/*===================*/
|
|
|
|
const XID* xid) /*!< in: X/Open XA transaction
|
|
|
|
identifier */
|
2008-12-01 07:10:29 +01:00
|
|
|
{
|
2013-12-16 15:38:05 +01:00
|
|
|
trx_t* trx;
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
ut_ad(mutex_own(&trx_sys->mutex));
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
for (trx = UT_LIST_GET_FIRST(trx_sys->rw_trx_list);
|
|
|
|
trx != NULL;
|
|
|
|
trx = UT_LIST_GET_NEXT(trx_list, trx)) {
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
assert_trx_in_rw_list(trx);
|
2008-12-01 07:10:29 +01:00
|
|
|
|
|
|
|
/* Compare two X/Open XA transaction id's: their
|
|
|
|
length should be the same and binary comparison
|
2011-07-14 21:22:41 +02:00
|
|
|
of gtrid_length+bqual_length bytes should be
|
2008-12-01 07:10:29 +01:00
|
|
|
the same */
|
|
|
|
|
2011-07-14 21:22:41 +02:00
|
|
|
if (trx->is_recovered
|
2013-12-16 15:38:05 +01:00
|
|
|
&& trx_state_eq(trx, TRX_STATE_PREPARED)
|
2011-04-29 14:49:04 +02:00
|
|
|
&& xid->gtrid_length == trx->xid.gtrid_length
|
2008-12-01 07:10:29 +01:00
|
|
|
&& xid->bqual_length == trx->xid.bqual_length
|
|
|
|
&& memcmp(xid->data, trx->xid.data,
|
|
|
|
xid->gtrid_length + xid->bqual_length) == 0) {
|
2011-04-29 14:49:04 +02:00
|
|
|
|
|
|
|
/* Invalidate the XID, so that subsequent calls
|
|
|
|
will not find it. */
|
|
|
|
memset(&trx->xid, 0, sizeof(trx->xid));
|
|
|
|
trx->xid.formatID = -1;
|
2008-12-01 07:10:29 +01:00
|
|
|
break;
|
|
|
|
}
|
2013-12-16 15:38:05 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
return(trx);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*******************************************************************//**
|
|
|
|
This function is used to find one X/Open XA distributed transaction
|
|
|
|
which is in the prepared state
|
|
|
|
@return trx or NULL; on match, the trx->xid will be invalidated;
|
|
|
|
note that the trx may have been committed, unless the caller is
|
|
|
|
holding lock_sys->mutex */
|
|
|
|
UNIV_INTERN
|
|
|
|
trx_t*
|
|
|
|
trx_get_trx_by_xid(
|
|
|
|
/*===============*/
|
|
|
|
const XID* xid) /*!< in: X/Open XA transaction identifier */
|
|
|
|
{
|
|
|
|
trx_t* trx;
|
|
|
|
|
|
|
|
if (xid == NULL) {
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
return(NULL);
|
2008-12-01 07:10:29 +01:00
|
|
|
}
|
|
|
|
|
2013-12-16 15:38:05 +01:00
|
|
|
mutex_enter(&trx_sys->mutex);
|
|
|
|
|
|
|
|
/* Recovered/Resurrected transactions are always only on the
|
|
|
|
trx_sys_t::rw_trx_list. */
|
|
|
|
trx = trx_get_trx_by_xid_low(xid);
|
|
|
|
|
|
|
|
mutex_exit(&trx_sys->mutex);
|
2008-12-01 07:10:29 +01:00
|
|
|
|
2011-04-29 14:49:04 +02:00
|
|
|
return(trx);
|
2008-12-01 07:10:29 +01:00
|
|
|
}
|
2013-12-16 15:38:05 +01:00
|
|
|
|
|
|
|
/*************************************************************//**
|
|
|
|
Starts the transaction if it is not yet started. */
|
|
|
|
UNIV_INTERN
|
|
|
|
void
|
|
|
|
trx_start_if_not_started_xa_low(
|
|
|
|
/*============================*/
|
|
|
|
trx_t* trx) /*!< in: transaction */
|
|
|
|
{
|
|
|
|
switch (trx->state) {
|
|
|
|
case TRX_STATE_NOT_STARTED:
|
|
|
|
|
|
|
|
/* Update the info whether we should skip XA steps
|
|
|
|
that eat CPU time.
|
|
|
|
|
|
|
|
For the duration of the transaction trx->support_xa is
|
|
|
|
not reread from thd so any changes in the value take
|
|
|
|
effect in the next transaction. This is to avoid a
|
|
|
|
scenario where some undo generated by a transaction,
|
|
|
|
has XA stuff, and other undo, generated by the same
|
|
|
|
transaction, doesn't. */
|
|
|
|
trx->support_xa = thd_supports_xa(trx->mysql_thd);
|
|
|
|
|
|
|
|
trx_start_low(trx);
|
|
|
|
/* fall through */
|
|
|
|
case TRX_STATE_ACTIVE:
|
|
|
|
return;
|
|
|
|
case TRX_STATE_PREPARED:
|
|
|
|
case TRX_STATE_COMMITTED_IN_MEMORY:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
ut_error;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*************************************************************//**
|
|
|
|
Starts the transaction if it is not yet started. */
|
|
|
|
UNIV_INTERN
|
|
|
|
void
|
|
|
|
trx_start_if_not_started_low(
|
|
|
|
/*=========================*/
|
|
|
|
trx_t* trx) /*!< in: transaction */
|
|
|
|
{
|
|
|
|
switch (trx->state) {
|
|
|
|
case TRX_STATE_NOT_STARTED:
|
|
|
|
trx_start_low(trx);
|
|
|
|
/* fall through */
|
|
|
|
case TRX_STATE_ACTIVE:
|
|
|
|
return;
|
|
|
|
case TRX_STATE_PREPARED:
|
|
|
|
case TRX_STATE_COMMITTED_IN_MEMORY:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
ut_error;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*************************************************************//**
|
|
|
|
Starts the transaction for a DDL operation. */
|
|
|
|
UNIV_INTERN
|
|
|
|
void
|
|
|
|
trx_start_for_ddl_low(
|
|
|
|
/*==================*/
|
|
|
|
trx_t* trx, /*!< in/out: transaction */
|
|
|
|
trx_dict_op_t op) /*!< in: dictionary operation type */
|
|
|
|
{
|
|
|
|
switch (trx->state) {
|
|
|
|
case TRX_STATE_NOT_STARTED:
|
|
|
|
/* Flag this transaction as a dictionary operation, so that
|
|
|
|
the data dictionary will be locked in crash recovery. */
|
|
|
|
|
|
|
|
trx_set_dict_operation(trx, op);
|
|
|
|
|
|
|
|
/* Ensure it is not flagged as an auto-commit-non-locking
|
|
|
|
transation. */
|
|
|
|
trx->will_lock = 1;
|
|
|
|
|
|
|
|
trx->ddl = true;
|
|
|
|
|
|
|
|
trx_start_low(trx);
|
|
|
|
return;
|
|
|
|
|
|
|
|
case TRX_STATE_ACTIVE:
|
|
|
|
/* We have this start if not started idiom, therefore we
|
|
|
|
can't add stronger checks here. */
|
|
|
|
trx->ddl = true;
|
|
|
|
|
|
|
|
ut_ad(trx->dict_operation != TRX_DICT_OP_NONE);
|
|
|
|
ut_ad(trx->will_lock > 0);
|
|
|
|
return;
|
|
|
|
case TRX_STATE_PREPARED:
|
|
|
|
case TRX_STATE_COMMITTED_IN_MEMORY:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
ut_error;
|
|
|
|
}
|
|
|
|
|