2021-10-21 13:49:51 +02:00
|
|
|
/* Copyright (C) 2013-2021 Codership Oy <info@codership.com>
|
2014-08-06 14:39:15 +02:00
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
|
|
it under the terms of the GNU General Public License as published by
|
|
|
|
the Free Software Foundation; version 2 of the License.
|
|
|
|
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
GNU General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU General Public License along
|
|
|
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
2019-05-11 21:19:05 +02:00
|
|
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA. */
|
2014-08-06 14:39:15 +02:00
|
|
|
|
|
|
|
#include "wsrep_thd.h"
|
|
|
|
|
|
|
|
#include "transaction.h"
|
|
|
|
#include "rpl_rli.h"
|
|
|
|
#include "log_event.h"
|
|
|
|
#include "sql_parse.h"
|
|
|
|
//#include "global_threads.h" // LOCK_thread_count, etc.
|
|
|
|
#include "sql_base.h" // close_thread_tables()
|
|
|
|
#include "mysqld.h" // start_wsrep_THD();
|
MDEV-23851 BF-BF Conflict issue because of UK GAP locks
Some DML operations on tables having unique secondary keys cause scanning
in the secondary index, for instance to find potential unique key violations
in the seconday index. This scanning may involve GAP locking in the index.
As this locking happens also when applying replication events in high priority
applier threads, there is a probabality for lock conflicts between two wsrep
high priority threads.
This PR avoids lock conflicts of high priority wsrep threads, which do
secondary index scanning e.g. for duplicate key detection.
The actual fix is the patch in sql_class.cc:thd_need_ordering_with(), where
we allow relaxed GAP locking protocol between wsrep high priority threads.
wsrep high priority threads (replication appliers, replayers and TOI processors)
are ordered by the replication provider, and they will not need serializability
support gained by secondary index GAP locks.
PR contains also a mtr test, which exercises a scenario where two replication
applier threads have a false positive conflict in GAP of unique secondary index.
The conflicting local committing transaction has to replay, and the test verifies
also that the replaying phase will not conflict with the latter repllication applier.
Commit also contains new test scenario for galera.galera_UK_conflict.test,
where replayer starts applying after a slave applier thread, with later seqno,
has advanced to commit phase. The applier and replayer have false positive GAP
lock conflict on secondary unique index, and replayer should ignore this.
This test scenario caused crash with earlier version in this PR, and to fix this,
the secondary index uniquenes checking has been relaxed even further.
Now innodb trx_t structure has new member: bool wsrep_UK_scan, which is set to
true, when high priority thread is performing unique secondary index scanning.
The member trx_t::wsrep_UK_scan is defined inside WITH_WSREP directive, to make
it possible to prepare a MariaDB build where this additional trx_t member is
not present and is not used in the code base. trx->wsrep_UK_scan is set to true
only for the duration of function call for: lock_rec_lock() trx->wsrep_UK_scan
is used only in lock_rec_has_to_wait() function to relax the need to wait if
wsrep_UK_scan is set and conflicting transaction is also high priority.
Reviewed-by: Jan Lindström <jan.lindstrom@mariadb.com>
2020-12-09 20:53:18 +01:00
|
|
|
#include "debug_sync.h"
|
2014-08-06 14:39:15 +02:00
|
|
|
|
|
|
|
#include "slave.h" // opt_log_slave_updates
|
|
|
|
#include "rpl_filter.h"
|
|
|
|
#include "rpl_rli.h"
|
|
|
|
#include "rpl_mi.h"
|
|
|
|
|
|
|
|
#if (__LP64__)
|
|
|
|
static volatile int64 wsrep_bf_aborts_counter(0);
|
|
|
|
#define WSREP_ATOMIC_LOAD_LONG my_atomic_load64
|
|
|
|
#define WSREP_ATOMIC_ADD_LONG my_atomic_add64
|
|
|
|
#else
|
|
|
|
static volatile int32 wsrep_bf_aborts_counter(0);
|
|
|
|
#define WSREP_ATOMIC_LOAD_LONG my_atomic_load32
|
|
|
|
#define WSREP_ATOMIC_ADD_LONG my_atomic_add32
|
|
|
|
#endif
|
|
|
|
|
2015-02-01 14:24:22 +01:00
|
|
|
int wsrep_show_bf_aborts (THD *thd, SHOW_VAR *var, char *buff,
|
|
|
|
enum enum_var_type scope)
|
2014-08-06 14:39:15 +02:00
|
|
|
{
|
2015-02-01 14:24:22 +01:00
|
|
|
wsrep_local_bf_aborts = WSREP_ATOMIC_LOAD_LONG(&wsrep_bf_aborts_counter);
|
|
|
|
var->type = SHOW_LONGLONG;
|
|
|
|
var->value = (char*)&wsrep_local_bf_aborts;
|
|
|
|
return 0;
|
2014-08-06 14:39:15 +02:00
|
|
|
}
|
|
|
|
|
MDEV-16005 sporadic failures with galera tests MW-328B and MW-328C
These test can sporadically show mutex deadlock warnings between LOCK_wsrep_thd
and LOCK_thd_data mutexes. This means that these mutexes can be locked in opposite
order by different threads, and thus result in deadlock situation.
To fix such issue, the locking policy of these mutexes should be revised and
enforced to be uniform. However, a quick code review shows that the number of
lock/unlock operations for these mutexes combined is between 100-200, and all these
mutex invocations should be checked/fixed.
On the other hand, it turns out that LOCK_wsrep_thd is used for protecting access to
wsrep variables of THD (wsrep_conflict_state, wsrep_query_state), whereas LOCK_thd_data
protects query, db and mysys_var variables in THD. Extending LOCK_thd_data to protect
also wsrep variables looks like a viable solution, as there should not be a use case
where separate threads need simultaneous access to wsrep variables and THD data variables.
In this commit LOCK_wsrep_thd mutex is refactored to be replaced by LOCK_thd_data.
By bluntly replacing LOCK_wsrep_thd by LOCK_thd_data, will result in double locking
of LOCK_thd_data, and some adjustements have been performed to fix such situations.
2018-04-24 09:26:34 +02:00
|
|
|
/* must have (&thd->LOCK_thd_data) */
|
2014-08-06 14:39:15 +02:00
|
|
|
void wsrep_client_rollback(THD *thd)
|
|
|
|
{
|
2016-02-08 21:34:41 +01:00
|
|
|
WSREP_DEBUG("client rollback due to BF abort for (%lld), query: %s",
|
|
|
|
(longlong) thd->thread_id, thd->query());
|
2014-08-06 14:39:15 +02:00
|
|
|
|
|
|
|
WSREP_ATOMIC_ADD_LONG(&wsrep_bf_aborts_counter, 1);
|
|
|
|
|
|
|
|
thd->wsrep_conflict_state= ABORTING;
|
MDEV-16005 sporadic failures with galera tests MW-328B and MW-328C
These test can sporadically show mutex deadlock warnings between LOCK_wsrep_thd
and LOCK_thd_data mutexes. This means that these mutexes can be locked in opposite
order by different threads, and thus result in deadlock situation.
To fix such issue, the locking policy of these mutexes should be revised and
enforced to be uniform. However, a quick code review shows that the number of
lock/unlock operations for these mutexes combined is between 100-200, and all these
mutex invocations should be checked/fixed.
On the other hand, it turns out that LOCK_wsrep_thd is used for protecting access to
wsrep variables of THD (wsrep_conflict_state, wsrep_query_state), whereas LOCK_thd_data
protects query, db and mysys_var variables in THD. Extending LOCK_thd_data to protect
also wsrep variables looks like a viable solution, as there should not be a use case
where separate threads need simultaneous access to wsrep variables and THD data variables.
In this commit LOCK_wsrep_thd mutex is refactored to be replaced by LOCK_thd_data.
By bluntly replacing LOCK_wsrep_thd by LOCK_thd_data, will result in double locking
of LOCK_thd_data, and some adjustements have been performed to fix such situations.
2018-04-24 09:26:34 +02:00
|
|
|
mysql_mutex_unlock(&thd->LOCK_thd_data);
|
2014-08-06 14:39:15 +02:00
|
|
|
trans_rollback(thd);
|
|
|
|
|
|
|
|
if (thd->locked_tables_mode && thd->lock)
|
|
|
|
{
|
2016-02-08 21:34:41 +01:00
|
|
|
WSREP_DEBUG("unlocking tables for BF abort (%lld)",
|
|
|
|
(longlong) thd->thread_id);
|
2014-08-06 14:39:15 +02:00
|
|
|
thd->locked_tables_list.unlock_locked_tables(thd);
|
|
|
|
thd->variables.option_bits&= ~(OPTION_TABLE_LOCK);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (thd->global_read_lock.is_acquired())
|
|
|
|
{
|
2016-02-08 21:34:41 +01:00
|
|
|
WSREP_DEBUG("unlocking GRL for BF abort (%lld)",
|
|
|
|
(longlong) thd->thread_id);
|
2014-08-06 14:39:15 +02:00
|
|
|
thd->global_read_lock.unlock_global_read_lock(thd);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Release transactional metadata locks. */
|
2020-11-30 14:29:32 +01:00
|
|
|
thd->release_transactional_locks();
|
2014-08-06 14:39:15 +02:00
|
|
|
|
|
|
|
/* release explicit MDL locks */
|
|
|
|
thd->mdl_context.release_explicit_locks();
|
|
|
|
|
|
|
|
if (thd->get_binlog_table_maps())
|
|
|
|
{
|
2016-02-08 21:34:41 +01:00
|
|
|
WSREP_DEBUG("clearing binlog table map for BF abort (%lld)",
|
|
|
|
(longlong) thd->thread_id);
|
2014-08-06 14:39:15 +02:00
|
|
|
thd->clear_binlog_table_maps();
|
|
|
|
}
|
MDEV-16005 sporadic failures with galera tests MW-328B and MW-328C
These test can sporadically show mutex deadlock warnings between LOCK_wsrep_thd
and LOCK_thd_data mutexes. This means that these mutexes can be locked in opposite
order by different threads, and thus result in deadlock situation.
To fix such issue, the locking policy of these mutexes should be revised and
enforced to be uniform. However, a quick code review shows that the number of
lock/unlock operations for these mutexes combined is between 100-200, and all these
mutex invocations should be checked/fixed.
On the other hand, it turns out that LOCK_wsrep_thd is used for protecting access to
wsrep variables of THD (wsrep_conflict_state, wsrep_query_state), whereas LOCK_thd_data
protects query, db and mysys_var variables in THD. Extending LOCK_thd_data to protect
also wsrep variables looks like a viable solution, as there should not be a use case
where separate threads need simultaneous access to wsrep variables and THD data variables.
In this commit LOCK_wsrep_thd mutex is refactored to be replaced by LOCK_thd_data.
By bluntly replacing LOCK_wsrep_thd by LOCK_thd_data, will result in double locking
of LOCK_thd_data, and some adjustements have been performed to fix such situations.
2018-04-24 09:26:34 +02:00
|
|
|
mysql_mutex_lock(&thd->LOCK_thd_data);
|
2014-08-06 14:39:15 +02:00
|
|
|
thd->wsrep_conflict_state= ABORTED;
|
|
|
|
}
|
|
|
|
|
|
|
|
#define NUMBER_OF_FIELDS_TO_IDENTIFY_COORDINATOR 1
|
|
|
|
#define NUMBER_OF_FIELDS_TO_IDENTIFY_WORKER 2
|
|
|
|
|
2020-06-01 11:34:33 +02:00
|
|
|
static rpl_group_info* wsrep_relay_group_init(THD *thd, const char* log_fname)
|
2014-08-06 14:39:15 +02:00
|
|
|
{
|
|
|
|
Relay_log_info* rli= new Relay_log_info(false);
|
|
|
|
|
2020-06-01 11:34:33 +02:00
|
|
|
WSREP_DEBUG("wsrep_relay_group_init %s", log_fname);
|
2020-06-05 18:32:37 +02:00
|
|
|
|
2014-08-06 14:39:15 +02:00
|
|
|
if (!rli->relay_log.description_event_for_exec)
|
|
|
|
{
|
|
|
|
rli->relay_log.description_event_for_exec=
|
|
|
|
new Format_description_log_event(4);
|
|
|
|
}
|
|
|
|
|
2015-01-09 06:32:28 +01:00
|
|
|
static LEX_STRING connection_name= { C_STRING_WITH_LEN("wsrep") };
|
2013-12-04 09:32:43 +01:00
|
|
|
|
2015-01-09 06:32:28 +01:00
|
|
|
/*
|
|
|
|
Master_info's constructor initializes rpl_filter by either an already
|
|
|
|
constructed Rpl_filter object from global 'rpl_filters' list if the
|
|
|
|
specified connection name is same, or it constructs a new Rpl_filter
|
|
|
|
object and adds it to rpl_filters. This object is later destructed by
|
|
|
|
Mater_info's destructor by looking it up based on connection name in
|
|
|
|
rpl_filters list.
|
2014-08-06 14:39:15 +02:00
|
|
|
|
2015-01-09 06:32:28 +01:00
|
|
|
However, since all Master_info objects created here would share same
|
|
|
|
connection name ("wsrep"), destruction of any of the existing Master_info
|
|
|
|
objects (in wsrep_return_from_bf_mode()) would free rpl_filter referenced
|
|
|
|
by any/all existing Master_info objects.
|
|
|
|
|
|
|
|
In order to avoid that, we have added a check in Master_info's destructor
|
|
|
|
to not free the "wsrep" rpl_filter. It will eventually be freed by
|
|
|
|
free_all_rpl_filters() when server terminates.
|
|
|
|
*/
|
|
|
|
rli->mi = new Master_info(&connection_name, false);
|
2014-08-06 14:39:15 +02:00
|
|
|
|
|
|
|
struct rpl_group_info *rgi= new rpl_group_info(rli);
|
2020-06-01 11:34:33 +02:00
|
|
|
rgi->thd= rli->sql_driver_thd= thd;
|
2015-07-14 22:05:29 +02:00
|
|
|
|
|
|
|
if ((rgi->deferred_events_collecting= rli->mi->rpl_filter->is_on()))
|
|
|
|
{
|
|
|
|
rgi->deferred_events= new Deferred_log_events(rli);
|
|
|
|
}
|
2014-08-06 14:39:15 +02:00
|
|
|
|
|
|
|
return rgi;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void wsrep_prepare_bf_thd(THD *thd, struct wsrep_thd_shadow* shadow)
|
|
|
|
{
|
|
|
|
shadow->options = thd->variables.option_bits;
|
|
|
|
shadow->server_status = thd->server_status;
|
|
|
|
shadow->wsrep_exec_mode = thd->wsrep_exec_mode;
|
|
|
|
shadow->vio = thd->net.vio;
|
|
|
|
|
2016-04-05 13:08:39 +02:00
|
|
|
// Disable general logging on applier threads
|
|
|
|
thd->variables.option_bits |= OPTION_LOG_OFF;
|
MDEV-24327 wsrep XID checkpointing order with log_slave_updates=OFF
If log_slave_updates==OFF, wsrep applier threads used to be configured
with option: thd->variables.option_bits&= ~(OPTION_BIN_LOG);
(i.e. like sql_log_bin=ON). And this was regardless of log-bin configuration.
With this, having configuration of: --log-bin && --log-slave-updates=OFF,
local threads used binlogging, but applier threads did not. And further:
local threads went through binlog group commit, while applier threads did
direct commits. This resulted in situation, where applier threads entered
earlier in wsrep XID checkpointing, and could sync their wsrep XID out of order.
Later local thread commit would see that higher seqno was already checkpointed,
and fire an assert because of this.
As a fix, applier threads are now forced to enable binlogging regardless of
log-slave-updates configuration.
This PR comes with new mtr test: galera.MDEV-24327, which causes a scenario
where applier transaction is applied and committed while earlier local transaction
is parked before commit order monitor enter. A buggy mariadb versoin would fail
for assertion because of wsrep XID checkpoint order violation.
Reviewed-by: Jan Lindström <jan.lindstrom@mariadb.com>
2020-12-02 16:28:49 +01:00
|
|
|
|
|
|
|
/* enable binlogging regardless of log_slave_updates setting
|
|
|
|
this is for ensuring that both local and applier transaction go through
|
|
|
|
same commit ordering algorithm in group commit control
|
|
|
|
*/
|
|
|
|
thd->variables.option_bits|= OPTION_BIN_LOG;
|
2014-08-06 14:39:15 +02:00
|
|
|
|
2020-06-01 11:34:33 +02:00
|
|
|
if (!thd->wsrep_rgi) thd->wsrep_rgi= wsrep_relay_group_init(thd, "wsrep_relay");
|
2014-09-22 18:15:44 +02:00
|
|
|
|
|
|
|
/* thd->system_thread_info.rpl_sql_info isn't initialized. */
|
2020-02-23 09:29:42 +01:00
|
|
|
if (!thd->slave_thread)
|
|
|
|
thd->system_thread_info.rpl_sql_info=
|
|
|
|
new rpl_sql_thread_info(thd->wsrep_rgi->rli->mi->rpl_filter);
|
2014-08-06 14:39:15 +02:00
|
|
|
|
|
|
|
thd->wsrep_exec_mode= REPL_RECV;
|
|
|
|
thd->net.vio= 0;
|
|
|
|
thd->clear_error();
|
|
|
|
|
|
|
|
shadow->tx_isolation = thd->variables.tx_isolation;
|
|
|
|
thd->variables.tx_isolation = ISO_READ_COMMITTED;
|
|
|
|
thd->tx_isolation = ISO_READ_COMMITTED;
|
|
|
|
|
|
|
|
shadow->db = thd->db;
|
|
|
|
shadow->db_length = thd->db_length;
|
2016-07-14 14:29:59 +02:00
|
|
|
shadow->user_time = thd->user_time;
|
2017-03-23 12:22:55 +01:00
|
|
|
shadow->row_count_func= thd->get_row_count_func();
|
2014-08-06 14:39:15 +02:00
|
|
|
thd->reset_db(NULL, 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void wsrep_return_from_bf_mode(THD *thd, struct wsrep_thd_shadow* shadow)
|
|
|
|
{
|
|
|
|
thd->variables.option_bits = shadow->options;
|
|
|
|
thd->server_status = shadow->server_status;
|
|
|
|
thd->wsrep_exec_mode = shadow->wsrep_exec_mode;
|
|
|
|
thd->net.vio = shadow->vio;
|
|
|
|
thd->variables.tx_isolation = shadow->tx_isolation;
|
2016-07-14 14:29:59 +02:00
|
|
|
thd->user_time = shadow->user_time;
|
2014-08-06 14:39:15 +02:00
|
|
|
thd->reset_db(shadow->db, shadow->db_length);
|
|
|
|
|
2020-02-23 09:29:42 +01:00
|
|
|
if (!thd->slave_thread)
|
|
|
|
delete thd->system_thread_info.rpl_sql_info;
|
2014-08-06 14:39:15 +02:00
|
|
|
delete thd->wsrep_rgi->rli->mi;
|
|
|
|
delete thd->wsrep_rgi->rli;
|
2015-07-14 22:05:29 +02:00
|
|
|
|
|
|
|
thd->wsrep_rgi->cleanup_after_session();
|
2014-08-06 14:39:15 +02:00
|
|
|
delete thd->wsrep_rgi;
|
|
|
|
thd->wsrep_rgi = NULL;
|
2017-03-23 12:22:55 +01:00
|
|
|
thd->set_row_count_func(shadow->row_count_func);
|
2014-08-06 14:39:15 +02:00
|
|
|
}
|
|
|
|
|
2020-06-01 11:34:33 +02:00
|
|
|
void wsrep_replay_sp_transaction(THD* thd)
|
|
|
|
{
|
|
|
|
DBUG_ENTER("wsrep_replay_sp_transaction");
|
|
|
|
mysql_mutex_assert_owner(&thd->LOCK_thd_data);
|
|
|
|
DBUG_ASSERT(thd->wsrep_conflict_state == MUST_REPLAY);
|
|
|
|
DBUG_ASSERT(wsrep_thd_trx_seqno(thd) > 0);
|
|
|
|
|
|
|
|
WSREP_DEBUG("replaying SP transaction %llu", thd->thread_id);
|
|
|
|
close_thread_tables(thd);
|
|
|
|
if (thd->locked_tables_mode && thd->lock)
|
|
|
|
{
|
|
|
|
WSREP_DEBUG("releasing table lock for replaying (%u)",
|
|
|
|
thd->thread_id);
|
|
|
|
thd->locked_tables_list.unlock_locked_tables(thd);
|
|
|
|
thd->variables.option_bits&= ~(OPTION_TABLE_LOCK);
|
|
|
|
}
|
2020-11-30 14:29:32 +01:00
|
|
|
thd->release_transactional_locks();
|
2020-06-01 11:34:33 +02:00
|
|
|
|
|
|
|
mysql_mutex_unlock(&thd->LOCK_thd_data);
|
|
|
|
THD *replay_thd= new THD(true);
|
|
|
|
replay_thd->thread_stack= thd->thread_stack;
|
|
|
|
|
|
|
|
struct wsrep_thd_shadow shadow;
|
|
|
|
wsrep_prepare_bf_thd(replay_thd, &shadow);
|
|
|
|
WSREP_DEBUG("replaying set for %p rgi %p", replay_thd, replay_thd->wsrep_rgi); replay_thd->wsrep_trx_meta= thd->wsrep_trx_meta;
|
|
|
|
replay_thd->wsrep_ws_handle= thd->wsrep_ws_handle;
|
|
|
|
replay_thd->wsrep_ws_handle.trx_id= WSREP_UNDEFINED_TRX_ID;
|
|
|
|
replay_thd->wsrep_conflict_state= REPLAYING;
|
|
|
|
|
|
|
|
replay_thd->variables.option_bits|= OPTION_BEGIN;
|
|
|
|
replay_thd->server_status|= SERVER_STATUS_IN_TRANS;
|
|
|
|
|
|
|
|
thd->reset_globals();
|
|
|
|
replay_thd->store_globals();
|
|
|
|
wsrep_status_t rcode= wsrep->replay_trx(wsrep,
|
|
|
|
&replay_thd->wsrep_ws_handle,
|
|
|
|
(void*) replay_thd);
|
|
|
|
|
|
|
|
wsrep_return_from_bf_mode(replay_thd, &shadow);
|
|
|
|
replay_thd->reset_globals();
|
|
|
|
delete replay_thd;
|
|
|
|
|
|
|
|
mysql_mutex_lock(&thd->LOCK_thd_data);
|
|
|
|
|
|
|
|
thd->store_globals();
|
|
|
|
|
|
|
|
switch (rcode)
|
|
|
|
{
|
|
|
|
case WSREP_OK:
|
|
|
|
{
|
|
|
|
thd->wsrep_conflict_state= NO_CONFLICT;
|
|
|
|
thd->killed= NOT_KILLED;
|
|
|
|
wsrep_status_t rcode= wsrep->post_commit(wsrep, &thd->wsrep_ws_handle);
|
|
|
|
if (rcode != WSREP_OK)
|
|
|
|
{
|
|
|
|
WSREP_WARN("Post commit failed for SP replay: thd: %u error: %d",
|
|
|
|
thd->thread_id, rcode);
|
|
|
|
}
|
|
|
|
/* As replaying the transaction was successful, an error must not
|
|
|
|
be returned to client, so we need to reset the error state of
|
|
|
|
the diagnostics area */
|
|
|
|
thd->get_stmt_da()->reset_diagnostics_area();
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case WSREP_TRX_FAIL:
|
|
|
|
{
|
|
|
|
thd->wsrep_conflict_state= ABORTED;
|
|
|
|
wsrep_status_t rcode= wsrep->post_rollback(wsrep, &thd->wsrep_ws_handle);
|
|
|
|
if (rcode != WSREP_OK)
|
|
|
|
{
|
|
|
|
WSREP_WARN("Post rollback failed for SP replay: thd: %u error: %d",
|
|
|
|
thd->thread_id, rcode);
|
|
|
|
}
|
|
|
|
if (thd->get_stmt_da()->is_set())
|
|
|
|
{
|
|
|
|
thd->get_stmt_da()->reset_diagnostics_area();
|
|
|
|
}
|
|
|
|
my_error(ER_LOCK_DEADLOCK, MYF(0));
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
default:
|
|
|
|
WSREP_ERROR("trx_replay failed for: %d, schema: %s, query: %s",
|
|
|
|
rcode,
|
|
|
|
(thd->db ? thd->db : "(null)"),
|
|
|
|
WSREP_QUERY(thd));
|
|
|
|
/* we're now in inconsistent state, must abort */
|
|
|
|
mysql_mutex_unlock(&thd->LOCK_thd_data);
|
|
|
|
unireg_abort(1);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
wsrep_cleanup_transaction(thd);
|
|
|
|
|
|
|
|
mysql_mutex_lock(&LOCK_wsrep_replaying);
|
|
|
|
wsrep_replaying--;
|
|
|
|
WSREP_DEBUG("replaying decreased: %d, thd: %u",
|
|
|
|
wsrep_replaying, thd->thread_id);
|
|
|
|
mysql_cond_broadcast(&COND_wsrep_replaying);
|
|
|
|
mysql_mutex_unlock(&LOCK_wsrep_replaying);
|
|
|
|
|
|
|
|
DBUG_VOID_RETURN;
|
|
|
|
}
|
|
|
|
|
2014-08-06 14:39:15 +02:00
|
|
|
void wsrep_replay_transaction(THD *thd)
|
|
|
|
{
|
2016-03-08 17:10:21 +01:00
|
|
|
DBUG_ENTER("wsrep_replay_transaction");
|
2014-08-06 14:39:15 +02:00
|
|
|
/* checking if BF trx must be replayed */
|
|
|
|
if (thd->wsrep_conflict_state== MUST_REPLAY) {
|
|
|
|
DBUG_ASSERT(wsrep_thd_trx_seqno(thd));
|
|
|
|
if (thd->wsrep_exec_mode!= REPL_RECV) {
|
|
|
|
if (thd->get_stmt_da()->is_sent())
|
|
|
|
{
|
|
|
|
WSREP_ERROR("replay issue, thd has reported status already");
|
|
|
|
}
|
2016-03-08 17:10:21 +01:00
|
|
|
|
2017-03-23 12:22:55 +01:00
|
|
|
|
2016-03-08 17:10:21 +01:00
|
|
|
/*
|
|
|
|
PS reprepare observer should have been removed already.
|
|
|
|
open_table() will fail if we have dangling observer here.
|
|
|
|
*/
|
|
|
|
DBUG_ASSERT(thd->m_reprepare_observer == NULL);
|
|
|
|
|
2017-03-23 12:22:55 +01:00
|
|
|
struct da_shadow
|
|
|
|
{
|
|
|
|
enum Diagnostics_area::enum_diagnostics_status status;
|
|
|
|
ulonglong affected_rows;
|
|
|
|
ulonglong last_insert_id;
|
|
|
|
char message[MYSQL_ERRMSG_SIZE];
|
|
|
|
};
|
|
|
|
struct da_shadow da_status;
|
|
|
|
da_status.status= thd->get_stmt_da()->status();
|
|
|
|
if (da_status.status == Diagnostics_area::DA_OK)
|
|
|
|
{
|
|
|
|
da_status.affected_rows= thd->get_stmt_da()->affected_rows();
|
|
|
|
da_status.last_insert_id= thd->get_stmt_da()->last_insert_id();
|
|
|
|
strmake(da_status.message,
|
|
|
|
thd->get_stmt_da()->message(),
|
|
|
|
sizeof(da_status.message)-1);
|
|
|
|
}
|
|
|
|
|
2014-08-06 14:39:15 +02:00
|
|
|
thd->get_stmt_da()->reset_diagnostics_area();
|
|
|
|
|
|
|
|
thd->wsrep_conflict_state= REPLAYING;
|
MDEV-16005 sporadic failures with galera tests MW-328B and MW-328C
These test can sporadically show mutex deadlock warnings between LOCK_wsrep_thd
and LOCK_thd_data mutexes. This means that these mutexes can be locked in opposite
order by different threads, and thus result in deadlock situation.
To fix such issue, the locking policy of these mutexes should be revised and
enforced to be uniform. However, a quick code review shows that the number of
lock/unlock operations for these mutexes combined is between 100-200, and all these
mutex invocations should be checked/fixed.
On the other hand, it turns out that LOCK_wsrep_thd is used for protecting access to
wsrep variables of THD (wsrep_conflict_state, wsrep_query_state), whereas LOCK_thd_data
protects query, db and mysys_var variables in THD. Extending LOCK_thd_data to protect
also wsrep variables looks like a viable solution, as there should not be a use case
where separate threads need simultaneous access to wsrep variables and THD data variables.
In this commit LOCK_wsrep_thd mutex is refactored to be replaced by LOCK_thd_data.
By bluntly replacing LOCK_wsrep_thd by LOCK_thd_data, will result in double locking
of LOCK_thd_data, and some adjustements have been performed to fix such situations.
2018-04-24 09:26:34 +02:00
|
|
|
mysql_mutex_unlock(&thd->LOCK_thd_data);
|
2014-08-06 14:39:15 +02:00
|
|
|
|
2015-04-15 16:32:34 +02:00
|
|
|
thd->reset_for_next_command();
|
2017-08-05 18:26:10 +02:00
|
|
|
thd->reset_killed();
|
2014-08-06 14:39:15 +02:00
|
|
|
close_thread_tables(thd);
|
|
|
|
if (thd->locked_tables_mode && thd->lock)
|
|
|
|
{
|
2016-02-08 21:34:41 +01:00
|
|
|
WSREP_DEBUG("releasing table lock for replaying (%lld)",
|
|
|
|
(longlong) thd->thread_id);
|
2014-08-06 14:39:15 +02:00
|
|
|
thd->locked_tables_list.unlock_locked_tables(thd);
|
|
|
|
thd->variables.option_bits&= ~(OPTION_TABLE_LOCK);
|
|
|
|
}
|
2020-11-30 14:29:32 +01:00
|
|
|
thd->release_transactional_locks();
|
2014-08-06 14:39:15 +02:00
|
|
|
/*
|
|
|
|
Replaying will call MYSQL_START_STATEMENT when handling
|
|
|
|
BEGIN Query_log_event so end statement must be called before
|
|
|
|
replaying.
|
|
|
|
*/
|
|
|
|
MYSQL_END_STATEMENT(thd->m_statement_psi, thd->get_stmt_da());
|
|
|
|
thd->m_statement_psi= NULL;
|
2015-05-26 14:44:34 +02:00
|
|
|
thd->m_digest= NULL;
|
2014-08-06 14:39:15 +02:00
|
|
|
thd_proc_info(thd, "wsrep replaying trx");
|
|
|
|
WSREP_DEBUG("replay trx: %s %lld",
|
|
|
|
thd->query() ? thd->query() : "void",
|
|
|
|
(long long)wsrep_thd_trx_seqno(thd));
|
|
|
|
struct wsrep_thd_shadow shadow;
|
|
|
|
wsrep_prepare_bf_thd(thd, &shadow);
|
|
|
|
|
|
|
|
/* From trans_begin() */
|
|
|
|
thd->variables.option_bits|= OPTION_BEGIN;
|
|
|
|
thd->server_status|= SERVER_STATUS_IN_TRANS;
|
|
|
|
|
MDEV-23851 BF-BF Conflict issue because of UK GAP locks
Some DML operations on tables having unique secondary keys cause scanning
in the secondary index, for instance to find potential unique key violations
in the seconday index. This scanning may involve GAP locking in the index.
As this locking happens also when applying replication events in high priority
applier threads, there is a probabality for lock conflicts between two wsrep
high priority threads.
This PR avoids lock conflicts of high priority wsrep threads, which do
secondary index scanning e.g. for duplicate key detection.
The actual fix is the patch in sql_class.cc:thd_need_ordering_with(), where
we allow relaxed GAP locking protocol between wsrep high priority threads.
wsrep high priority threads (replication appliers, replayers and TOI processors)
are ordered by the replication provider, and they will not need serializability
support gained by secondary index GAP locks.
PR contains also a mtr test, which exercises a scenario where two replication
applier threads have a false positive conflict in GAP of unique secondary index.
The conflicting local committing transaction has to replay, and the test verifies
also that the replaying phase will not conflict with the latter repllication applier.
Commit also contains new test scenario for galera.galera_UK_conflict.test,
where replayer starts applying after a slave applier thread, with later seqno,
has advanced to commit phase. The applier and replayer have false positive GAP
lock conflict on secondary unique index, and replayer should ignore this.
This test scenario caused crash with earlier version in this PR, and to fix this,
the secondary index uniquenes checking has been relaxed even further.
Now innodb trx_t structure has new member: bool wsrep_UK_scan, which is set to
true, when high priority thread is performing unique secondary index scanning.
The member trx_t::wsrep_UK_scan is defined inside WITH_WSREP directive, to make
it possible to prepare a MariaDB build where this additional trx_t member is
not present and is not used in the code base. trx->wsrep_UK_scan is set to true
only for the duration of function call for: lock_rec_lock() trx->wsrep_UK_scan
is used only in lock_rec_has_to_wait() function to relax the need to wait if
wsrep_UK_scan is set and conflicting transaction is also high priority.
Reviewed-by: Jan Lindström <jan.lindstrom@mariadb.com>
2020-12-09 20:53:18 +01:00
|
|
|
/* Allow tests to block the replayer thread using the DBUG facilities */
|
|
|
|
#ifdef ENABLED_DEBUG_SYNC
|
|
|
|
DBUG_EXECUTE_IF("sync.wsrep_replay_cb",
|
|
|
|
{
|
|
|
|
const char act[]=
|
|
|
|
"now "
|
|
|
|
"SIGNAL sync.wsrep_replay_cb_reached "
|
|
|
|
"WAIT_FOR signal.wsrep_replay_cb";
|
|
|
|
DBUG_ASSERT(!debug_sync_set_action(thd,
|
|
|
|
STRING_WITH_LEN(act)));
|
|
|
|
};);
|
|
|
|
#endif /* ENABLED_DEBUG_SYNC */
|
|
|
|
|
2014-08-06 14:39:15 +02:00
|
|
|
int rcode = wsrep->replay_trx(wsrep,
|
|
|
|
&thd->wsrep_ws_handle,
|
|
|
|
(void *)thd);
|
|
|
|
|
|
|
|
wsrep_return_from_bf_mode(thd, &shadow);
|
|
|
|
if (thd->wsrep_conflict_state!= REPLAYING)
|
|
|
|
WSREP_WARN("lost replaying mode: %d", thd->wsrep_conflict_state );
|
|
|
|
|
MDEV-16005 sporadic failures with galera tests MW-328B and MW-328C
These test can sporadically show mutex deadlock warnings between LOCK_wsrep_thd
and LOCK_thd_data mutexes. This means that these mutexes can be locked in opposite
order by different threads, and thus result in deadlock situation.
To fix such issue, the locking policy of these mutexes should be revised and
enforced to be uniform. However, a quick code review shows that the number of
lock/unlock operations for these mutexes combined is between 100-200, and all these
mutex invocations should be checked/fixed.
On the other hand, it turns out that LOCK_wsrep_thd is used for protecting access to
wsrep variables of THD (wsrep_conflict_state, wsrep_query_state), whereas LOCK_thd_data
protects query, db and mysys_var variables in THD. Extending LOCK_thd_data to protect
also wsrep variables looks like a viable solution, as there should not be a use case
where separate threads need simultaneous access to wsrep variables and THD data variables.
In this commit LOCK_wsrep_thd mutex is refactored to be replaced by LOCK_thd_data.
By bluntly replacing LOCK_wsrep_thd by LOCK_thd_data, will result in double locking
of LOCK_thd_data, and some adjustements have been performed to fix such situations.
2018-04-24 09:26:34 +02:00
|
|
|
mysql_mutex_lock(&thd->LOCK_thd_data);
|
2014-08-06 14:39:15 +02:00
|
|
|
|
|
|
|
switch (rcode)
|
|
|
|
{
|
|
|
|
case WSREP_OK:
|
|
|
|
thd->wsrep_conflict_state= NO_CONFLICT;
|
|
|
|
wsrep->post_commit(wsrep, &thd->wsrep_ws_handle);
|
2016-02-08 21:34:41 +01:00
|
|
|
WSREP_DEBUG("trx_replay successful for: %lld %lld",
|
|
|
|
(longlong) thd->thread_id, (longlong) thd->real_id);
|
2014-08-06 14:39:15 +02:00
|
|
|
if (thd->get_stmt_da()->is_sent())
|
|
|
|
{
|
|
|
|
WSREP_WARN("replay ok, thd has reported status");
|
|
|
|
}
|
|
|
|
else if (thd->get_stmt_da()->is_set())
|
|
|
|
{
|
2016-06-29 20:03:06 +02:00
|
|
|
if (thd->get_stmt_da()->status() != Diagnostics_area::DA_OK &&
|
|
|
|
thd->get_stmt_da()->status() != Diagnostics_area::DA_OK_BULK)
|
2014-08-06 14:39:15 +02:00
|
|
|
{
|
|
|
|
WSREP_WARN("replay ok, thd has error status %d",
|
|
|
|
thd->get_stmt_da()->status());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2017-03-23 12:22:55 +01:00
|
|
|
if (da_status.status == Diagnostics_area::DA_OK)
|
|
|
|
{
|
|
|
|
my_ok(thd,
|
|
|
|
da_status.affected_rows,
|
|
|
|
da_status.last_insert_id,
|
|
|
|
da_status.message);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
my_ok(thd);
|
|
|
|
}
|
2014-08-06 14:39:15 +02:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
case WSREP_TRX_FAIL:
|
|
|
|
if (thd->get_stmt_da()->is_sent())
|
|
|
|
{
|
|
|
|
WSREP_ERROR("replay failed, thd has reported status");
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
WSREP_DEBUG("replay failed, rolling back");
|
|
|
|
//my_error(ER_LOCK_DEADLOCK, MYF(0), "wsrep aborted transaction");
|
|
|
|
}
|
|
|
|
thd->wsrep_conflict_state= ABORTED;
|
|
|
|
wsrep->post_rollback(wsrep, &thd->wsrep_ws_handle);
|
|
|
|
break;
|
|
|
|
default:
|
2015-10-19 10:56:04 +02:00
|
|
|
WSREP_ERROR("trx_replay failed for: %d, schema: %s, query: %s",
|
|
|
|
rcode,
|
|
|
|
(thd->db ? thd->db : "(null)"),
|
|
|
|
thd->query() ? thd->query() : "void");
|
2014-08-06 14:39:15 +02:00
|
|
|
/* we're now in inconsistent state, must abort */
|
2014-09-22 18:15:44 +02:00
|
|
|
|
|
|
|
/* http://bazaar.launchpad.net/~codership/codership-mysql/5.6/revision/3962#sql/wsrep_thd.cc */
|
MDEV-16005 sporadic failures with galera tests MW-328B and MW-328C
These test can sporadically show mutex deadlock warnings between LOCK_wsrep_thd
and LOCK_thd_data mutexes. This means that these mutexes can be locked in opposite
order by different threads, and thus result in deadlock situation.
To fix such issue, the locking policy of these mutexes should be revised and
enforced to be uniform. However, a quick code review shows that the number of
lock/unlock operations for these mutexes combined is between 100-200, and all these
mutex invocations should be checked/fixed.
On the other hand, it turns out that LOCK_wsrep_thd is used for protecting access to
wsrep variables of THD (wsrep_conflict_state, wsrep_query_state), whereas LOCK_thd_data
protects query, db and mysys_var variables in THD. Extending LOCK_thd_data to protect
also wsrep variables looks like a viable solution, as there should not be a use case
where separate threads need simultaneous access to wsrep variables and THD data variables.
In this commit LOCK_wsrep_thd mutex is refactored to be replaced by LOCK_thd_data.
By bluntly replacing LOCK_wsrep_thd by LOCK_thd_data, will result in double locking
of LOCK_thd_data, and some adjustements have been performed to fix such situations.
2018-04-24 09:26:34 +02:00
|
|
|
mysql_mutex_unlock(&thd->LOCK_thd_data);
|
2014-09-22 18:15:44 +02:00
|
|
|
|
2014-08-06 14:39:15 +02:00
|
|
|
unireg_abort(1);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
wsrep_cleanup_transaction(thd);
|
|
|
|
|
|
|
|
mysql_mutex_lock(&LOCK_wsrep_replaying);
|
|
|
|
wsrep_replaying--;
|
2016-02-08 21:34:41 +01:00
|
|
|
WSREP_DEBUG("replaying decreased: %d, thd: %lld",
|
|
|
|
wsrep_replaying, (longlong) thd->thread_id);
|
2014-08-06 14:39:15 +02:00
|
|
|
mysql_cond_broadcast(&COND_wsrep_replaying);
|
|
|
|
mysql_mutex_unlock(&LOCK_wsrep_replaying);
|
|
|
|
}
|
|
|
|
}
|
2016-03-08 17:10:21 +01:00
|
|
|
DBUG_VOID_RETURN;
|
2014-08-06 14:39:15 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
static void wsrep_replication_process(THD *thd)
|
|
|
|
{
|
|
|
|
int rcode;
|
|
|
|
DBUG_ENTER("wsrep_replication_process");
|
|
|
|
|
|
|
|
struct wsrep_thd_shadow shadow;
|
|
|
|
wsrep_prepare_bf_thd(thd, &shadow);
|
|
|
|
|
|
|
|
/* From trans_begin() */
|
|
|
|
thd->variables.option_bits|= OPTION_BEGIN;
|
|
|
|
thd->server_status|= SERVER_STATUS_IN_TRANS;
|
|
|
|
|
2019-07-11 06:13:58 +02:00
|
|
|
thd_proc_info(thd, "wsrep applier idle");
|
2014-08-06 14:39:15 +02:00
|
|
|
rcode = wsrep->recv(wsrep, (void *)thd);
|
|
|
|
DBUG_PRINT("wsrep",("wsrep_repl returned: %d", rcode));
|
|
|
|
|
|
|
|
WSREP_INFO("applier thread exiting (code:%d)", rcode);
|
|
|
|
|
|
|
|
switch (rcode) {
|
|
|
|
case WSREP_OK:
|
|
|
|
case WSREP_NOT_IMPLEMENTED:
|
|
|
|
case WSREP_CONN_FAIL:
|
|
|
|
/* provider does not support slave operations / disconnected from group,
|
|
|
|
* just close applier thread */
|
|
|
|
break;
|
|
|
|
case WSREP_NODE_FAIL:
|
|
|
|
/* data inconsistency => SST is needed */
|
|
|
|
/* Note: we cannot just blindly restart replication here,
|
|
|
|
* SST might require server restart if storage engines must be
|
|
|
|
* initialized after SST */
|
|
|
|
WSREP_ERROR("node consistency compromised, aborting");
|
|
|
|
wsrep_kill_mysql(thd);
|
|
|
|
break;
|
|
|
|
case WSREP_WARNING:
|
|
|
|
case WSREP_TRX_FAIL:
|
|
|
|
case WSREP_TRX_MISSING:
|
|
|
|
/* these suggests a bug in provider code */
|
|
|
|
WSREP_WARN("bad return from recv() call: %d", rcode);
|
2017-05-16 19:08:47 +02:00
|
|
|
/* Shut down this node. */
|
|
|
|
/* fall through */
|
2014-08-06 14:39:15 +02:00
|
|
|
case WSREP_FATAL:
|
|
|
|
/* Cluster connectivity is lost.
|
|
|
|
*
|
|
|
|
* If applier was killed on purpose (KILL_CONNECTION), we
|
|
|
|
* avoid mysql shutdown. This is because the killer will then handle
|
|
|
|
* shutdown processing (or replication restarting)
|
|
|
|
*/
|
|
|
|
if (thd->killed != KILL_CONNECTION)
|
|
|
|
{
|
|
|
|
wsrep_kill_mysql(thd);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
mysql_mutex_lock(&LOCK_thread_count);
|
|
|
|
wsrep_close_applier(thd);
|
|
|
|
mysql_cond_broadcast(&COND_thread_count);
|
|
|
|
mysql_mutex_unlock(&LOCK_thread_count);
|
|
|
|
|
2016-06-10 22:19:59 +02:00
|
|
|
if(thd->has_thd_temporary_tables())
|
2014-08-06 14:39:15 +02:00
|
|
|
{
|
2016-06-10 22:19:59 +02:00
|
|
|
WSREP_WARN("Applier %lld has temporary tables at exit.",
|
|
|
|
thd->thread_id);
|
2014-08-06 14:39:15 +02:00
|
|
|
}
|
|
|
|
wsrep_return_from_bf_mode(thd, &shadow);
|
|
|
|
DBUG_VOID_RETURN;
|
|
|
|
}
|
|
|
|
|
2020-01-16 12:18:44 +01:00
|
|
|
static bool create_wsrep_THD(wsrep_thread_args* args, bool thread_count_lock)
|
2015-12-26 09:40:49 +01:00
|
|
|
{
|
2020-01-16 12:18:44 +01:00
|
|
|
if (!thread_count_lock)
|
|
|
|
mysql_mutex_lock(&LOCK_thread_count);
|
|
|
|
|
2019-08-13 08:24:31 +02:00
|
|
|
ulong old_wsrep_running_threads= wsrep_running_threads;
|
2020-01-16 12:18:44 +01:00
|
|
|
|
2019-09-30 11:48:26 +02:00
|
|
|
DBUG_ASSERT(args->thread_type == WSREP_APPLIER_THREAD ||
|
|
|
|
args->thread_type == WSREP_ROLLBACKER_THREAD);
|
2020-01-16 12:18:44 +01:00
|
|
|
|
2019-09-30 11:48:26 +02:00
|
|
|
bool res= mysql_thread_create(args->thread_type == WSREP_APPLIER_THREAD
|
|
|
|
? key_wsrep_applier : key_wsrep_rollbacker,
|
|
|
|
&args->thread_id, &connection_attrib,
|
|
|
|
start_wsrep_THD, (void*)args);
|
2020-01-16 12:18:44 +01:00
|
|
|
|
|
|
|
if (res)
|
|
|
|
{
|
|
|
|
WSREP_ERROR("Can't create wsrep thread");
|
|
|
|
}
|
|
|
|
|
2016-01-25 17:04:09 +01:00
|
|
|
/*
|
|
|
|
if starting a thread on server startup, wait until the this thread's THD
|
|
|
|
is fully initialized (otherwise a THD initialization code might
|
|
|
|
try to access a partially initialized server data structure - MDEV-8208).
|
|
|
|
*/
|
|
|
|
if (!mysqld_server_initialized)
|
2020-01-16 12:18:44 +01:00
|
|
|
{
|
2016-01-25 17:04:09 +01:00
|
|
|
while (old_wsrep_running_threads == wsrep_running_threads)
|
2020-01-16 12:18:44 +01:00
|
|
|
{
|
2016-01-25 17:04:09 +01:00
|
|
|
mysql_cond_wait(&COND_thread_count, &LOCK_thread_count);
|
2020-01-16 12:18:44 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!thread_count_lock)
|
|
|
|
mysql_mutex_unlock(&LOCK_thread_count);
|
|
|
|
|
2015-12-26 09:40:49 +01:00
|
|
|
return res;
|
|
|
|
}
|
|
|
|
|
2020-01-16 12:18:44 +01:00
|
|
|
bool wsrep_create_appliers(long threads, bool thread_count_lock)
|
2014-08-06 14:39:15 +02:00
|
|
|
{
|
|
|
|
if (!wsrep_connected)
|
|
|
|
{
|
|
|
|
/* see wsrep_replication_start() for the logic */
|
|
|
|
if (wsrep_cluster_address && strlen(wsrep_cluster_address) &&
|
|
|
|
wsrep_provider && strcasecmp(wsrep_provider, "none"))
|
|
|
|
{
|
|
|
|
WSREP_ERROR("Trying to launch slave threads before creating "
|
|
|
|
"connection at '%s'", wsrep_cluster_address);
|
|
|
|
}
|
2021-12-27 09:22:28 +01:00
|
|
|
return true;
|
2014-08-06 14:39:15 +02:00
|
|
|
}
|
|
|
|
|
2020-01-16 12:18:44 +01:00
|
|
|
long wsrep_threads= 0;
|
|
|
|
|
2014-08-06 14:39:15 +02:00
|
|
|
while (wsrep_threads++ < threads) {
|
2019-07-11 06:13:58 +02:00
|
|
|
wsrep_thread_args* arg;
|
2020-01-16 12:18:44 +01:00
|
|
|
|
|
|
|
if((arg= (wsrep_thread_args*)my_malloc(sizeof(wsrep_thread_args), MYF(0))) == NULL)
|
|
|
|
{
|
2019-07-11 06:13:58 +02:00
|
|
|
WSREP_ERROR("Can't allocate memory for wsrep replication thread %ld\n", wsrep_threads);
|
|
|
|
assert(0);
|
|
|
|
}
|
|
|
|
|
2020-01-16 12:18:44 +01:00
|
|
|
arg->thread_type= WSREP_APPLIER_THREAD;
|
|
|
|
arg->processor= wsrep_replication_process;
|
2019-07-11 06:13:58 +02:00
|
|
|
|
2020-01-16 12:18:44 +01:00
|
|
|
if (create_wsrep_THD(arg, thread_count_lock))
|
|
|
|
{
|
|
|
|
WSREP_ERROR("Can't create thread to manage wsrep replication");
|
2019-07-11 06:13:58 +02:00
|
|
|
my_free(arg);
|
2020-01-16 12:18:44 +01:00
|
|
|
return true;
|
2019-07-11 06:13:58 +02:00
|
|
|
}
|
2014-08-06 14:39:15 +02:00
|
|
|
}
|
2020-01-16 12:18:44 +01:00
|
|
|
|
|
|
|
return false;
|
2014-08-06 14:39:15 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
static void wsrep_rollback_process(THD *thd)
|
|
|
|
{
|
|
|
|
DBUG_ENTER("wsrep_rollback_process");
|
|
|
|
|
|
|
|
mysql_mutex_lock(&LOCK_wsrep_rollback);
|
|
|
|
wsrep_aborting_thd= NULL;
|
|
|
|
|
|
|
|
while (thd->killed == NOT_KILLED) {
|
|
|
|
thd_proc_info(thd, "wsrep aborter idle");
|
|
|
|
thd->mysys_var->current_mutex= &LOCK_wsrep_rollback;
|
|
|
|
thd->mysys_var->current_cond= &COND_wsrep_rollback;
|
|
|
|
|
|
|
|
mysql_cond_wait(&COND_wsrep_rollback,&LOCK_wsrep_rollback);
|
|
|
|
|
|
|
|
WSREP_DEBUG("WSREP rollback thread wakes for signal");
|
|
|
|
|
|
|
|
mysql_mutex_lock(&thd->mysys_var->mutex);
|
|
|
|
thd_proc_info(thd, "wsrep aborter active");
|
|
|
|
thd->mysys_var->current_mutex= 0;
|
|
|
|
thd->mysys_var->current_cond= 0;
|
|
|
|
mysql_mutex_unlock(&thd->mysys_var->mutex);
|
|
|
|
|
|
|
|
/* check for false alarms */
|
|
|
|
if (!wsrep_aborting_thd)
|
|
|
|
{
|
|
|
|
WSREP_DEBUG("WSREP rollback thread has empty abort queue");
|
|
|
|
}
|
|
|
|
/* process all entries in the queue */
|
|
|
|
while (wsrep_aborting_thd) {
|
|
|
|
THD *aborting;
|
|
|
|
wsrep_aborting_thd_t next = wsrep_aborting_thd->next;
|
|
|
|
aborting = wsrep_aborting_thd->aborting_thd;
|
|
|
|
my_free(wsrep_aborting_thd);
|
|
|
|
wsrep_aborting_thd= next;
|
|
|
|
/*
|
|
|
|
* must release mutex, appliers my want to add more
|
|
|
|
* aborting thds in our work queue, while we rollback
|
|
|
|
*/
|
|
|
|
mysql_mutex_unlock(&LOCK_wsrep_rollback);
|
|
|
|
|
MDEV-16005 sporadic failures with galera tests MW-328B and MW-328C
These test can sporadically show mutex deadlock warnings between LOCK_wsrep_thd
and LOCK_thd_data mutexes. This means that these mutexes can be locked in opposite
order by different threads, and thus result in deadlock situation.
To fix such issue, the locking policy of these mutexes should be revised and
enforced to be uniform. However, a quick code review shows that the number of
lock/unlock operations for these mutexes combined is between 100-200, and all these
mutex invocations should be checked/fixed.
On the other hand, it turns out that LOCK_wsrep_thd is used for protecting access to
wsrep variables of THD (wsrep_conflict_state, wsrep_query_state), whereas LOCK_thd_data
protects query, db and mysys_var variables in THD. Extending LOCK_thd_data to protect
also wsrep variables looks like a viable solution, as there should not be a use case
where separate threads need simultaneous access to wsrep variables and THD data variables.
In this commit LOCK_wsrep_thd mutex is refactored to be replaced by LOCK_thd_data.
By bluntly replacing LOCK_wsrep_thd by LOCK_thd_data, will result in double locking
of LOCK_thd_data, and some adjustements have been performed to fix such situations.
2018-04-24 09:26:34 +02:00
|
|
|
mysql_mutex_lock(&aborting->LOCK_thd_data);
|
2014-08-06 14:39:15 +02:00
|
|
|
if (aborting->wsrep_conflict_state== ABORTED)
|
|
|
|
{
|
|
|
|
WSREP_DEBUG("WSREP, thd already aborted: %llu state: %d",
|
|
|
|
(long long)aborting->real_id,
|
|
|
|
aborting->wsrep_conflict_state);
|
|
|
|
|
MDEV-16005 sporadic failures with galera tests MW-328B and MW-328C
These test can sporadically show mutex deadlock warnings between LOCK_wsrep_thd
and LOCK_thd_data mutexes. This means that these mutexes can be locked in opposite
order by different threads, and thus result in deadlock situation.
To fix such issue, the locking policy of these mutexes should be revised and
enforced to be uniform. However, a quick code review shows that the number of
lock/unlock operations for these mutexes combined is between 100-200, and all these
mutex invocations should be checked/fixed.
On the other hand, it turns out that LOCK_wsrep_thd is used for protecting access to
wsrep variables of THD (wsrep_conflict_state, wsrep_query_state), whereas LOCK_thd_data
protects query, db and mysys_var variables in THD. Extending LOCK_thd_data to protect
also wsrep variables looks like a viable solution, as there should not be a use case
where separate threads need simultaneous access to wsrep variables and THD data variables.
In this commit LOCK_wsrep_thd mutex is refactored to be replaced by LOCK_thd_data.
By bluntly replacing LOCK_wsrep_thd by LOCK_thd_data, will result in double locking
of LOCK_thd_data, and some adjustements have been performed to fix such situations.
2018-04-24 09:26:34 +02:00
|
|
|
mysql_mutex_unlock(&aborting->LOCK_thd_data);
|
2014-08-06 14:39:15 +02:00
|
|
|
mysql_mutex_lock(&LOCK_wsrep_rollback);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
aborting->wsrep_conflict_state= ABORTING;
|
|
|
|
|
MDEV-16005 sporadic failures with galera tests MW-328B and MW-328C
These test can sporadically show mutex deadlock warnings between LOCK_wsrep_thd
and LOCK_thd_data mutexes. This means that these mutexes can be locked in opposite
order by different threads, and thus result in deadlock situation.
To fix such issue, the locking policy of these mutexes should be revised and
enforced to be uniform. However, a quick code review shows that the number of
lock/unlock operations for these mutexes combined is between 100-200, and all these
mutex invocations should be checked/fixed.
On the other hand, it turns out that LOCK_wsrep_thd is used for protecting access to
wsrep variables of THD (wsrep_conflict_state, wsrep_query_state), whereas LOCK_thd_data
protects query, db and mysys_var variables in THD. Extending LOCK_thd_data to protect
also wsrep variables looks like a viable solution, as there should not be a use case
where separate threads need simultaneous access to wsrep variables and THD data variables.
In this commit LOCK_wsrep_thd mutex is refactored to be replaced by LOCK_thd_data.
By bluntly replacing LOCK_wsrep_thd by LOCK_thd_data, will result in double locking
of LOCK_thd_data, and some adjustements have been performed to fix such situations.
2018-04-24 09:26:34 +02:00
|
|
|
mysql_mutex_unlock(&aborting->LOCK_thd_data);
|
2014-08-06 14:39:15 +02:00
|
|
|
|
|
|
|
set_current_thd(aborting);
|
|
|
|
aborting->store_globals();
|
|
|
|
|
MDEV-16005 sporadic failures with galera tests MW-328B and MW-328C
These test can sporadically show mutex deadlock warnings between LOCK_wsrep_thd
and LOCK_thd_data mutexes. This means that these mutexes can be locked in opposite
order by different threads, and thus result in deadlock situation.
To fix such issue, the locking policy of these mutexes should be revised and
enforced to be uniform. However, a quick code review shows that the number of
lock/unlock operations for these mutexes combined is between 100-200, and all these
mutex invocations should be checked/fixed.
On the other hand, it turns out that LOCK_wsrep_thd is used for protecting access to
wsrep variables of THD (wsrep_conflict_state, wsrep_query_state), whereas LOCK_thd_data
protects query, db and mysys_var variables in THD. Extending LOCK_thd_data to protect
also wsrep variables looks like a viable solution, as there should not be a use case
where separate threads need simultaneous access to wsrep variables and THD data variables.
In this commit LOCK_wsrep_thd mutex is refactored to be replaced by LOCK_thd_data.
By bluntly replacing LOCK_wsrep_thd by LOCK_thd_data, will result in double locking
of LOCK_thd_data, and some adjustements have been performed to fix such situations.
2018-04-24 09:26:34 +02:00
|
|
|
mysql_mutex_lock(&aborting->LOCK_thd_data);
|
2014-08-06 14:39:15 +02:00
|
|
|
wsrep_client_rollback(aborting);
|
2016-02-08 21:34:41 +01:00
|
|
|
WSREP_DEBUG("WSREP rollbacker aborted thd: (%lld %lld)",
|
|
|
|
(longlong) aborting->thread_id,
|
|
|
|
(longlong) aborting->real_id);
|
MDEV-16005 sporadic failures with galera tests MW-328B and MW-328C
These test can sporadically show mutex deadlock warnings between LOCK_wsrep_thd
and LOCK_thd_data mutexes. This means that these mutexes can be locked in opposite
order by different threads, and thus result in deadlock situation.
To fix such issue, the locking policy of these mutexes should be revised and
enforced to be uniform. However, a quick code review shows that the number of
lock/unlock operations for these mutexes combined is between 100-200, and all these
mutex invocations should be checked/fixed.
On the other hand, it turns out that LOCK_wsrep_thd is used for protecting access to
wsrep variables of THD (wsrep_conflict_state, wsrep_query_state), whereas LOCK_thd_data
protects query, db and mysys_var variables in THD. Extending LOCK_thd_data to protect
also wsrep variables looks like a viable solution, as there should not be a use case
where separate threads need simultaneous access to wsrep variables and THD data variables.
In this commit LOCK_wsrep_thd mutex is refactored to be replaced by LOCK_thd_data.
By bluntly replacing LOCK_wsrep_thd by LOCK_thd_data, will result in double locking
of LOCK_thd_data, and some adjustements have been performed to fix such situations.
2018-04-24 09:26:34 +02:00
|
|
|
mysql_mutex_unlock(&aborting->LOCK_thd_data);
|
2014-08-06 14:39:15 +02:00
|
|
|
|
|
|
|
set_current_thd(thd);
|
|
|
|
thd->store_globals();
|
|
|
|
|
|
|
|
mysql_mutex_lock(&LOCK_wsrep_rollback);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
mysql_mutex_unlock(&LOCK_wsrep_rollback);
|
|
|
|
sql_print_information("WSREP: rollbacker thread exiting");
|
|
|
|
|
|
|
|
DBUG_PRINT("wsrep",("wsrep rollbacker thread exiting"));
|
|
|
|
DBUG_VOID_RETURN;
|
|
|
|
}
|
|
|
|
|
|
|
|
void wsrep_create_rollbacker()
|
|
|
|
{
|
|
|
|
if (wsrep_provider && strcasecmp(wsrep_provider, "none"))
|
|
|
|
{
|
2019-07-11 06:13:58 +02:00
|
|
|
wsrep_thread_args* arg;
|
|
|
|
if((arg = (wsrep_thread_args*)my_malloc(sizeof(wsrep_thread_args), MYF(0))) == NULL) {
|
|
|
|
WSREP_ERROR("Can't allocate memory for wsrep rollbacker thread\n");
|
|
|
|
assert(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
arg->thread_type = WSREP_ROLLBACKER_THREAD;
|
|
|
|
arg->processor = wsrep_rollback_process;
|
|
|
|
|
2014-08-06 14:39:15 +02:00
|
|
|
/* create rollbacker */
|
2020-01-16 12:18:44 +01:00
|
|
|
if (create_wsrep_THD(arg, false)) {
|
2014-08-06 14:39:15 +02:00
|
|
|
WSREP_WARN("Can't create thread to manage wsrep rollback");
|
2019-07-11 06:13:58 +02:00
|
|
|
my_free(arg);
|
|
|
|
return;
|
|
|
|
}
|
2014-08-06 14:39:15 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void wsrep_thd_set_PA_safe(void *thd_ptr, my_bool safe)
|
|
|
|
{
|
|
|
|
if (thd_ptr)
|
|
|
|
{
|
|
|
|
THD* thd = (THD*)thd_ptr;
|
|
|
|
thd->wsrep_PA_safe = safe;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-09-27 22:29:10 +02:00
|
|
|
enum wsrep_conflict_state wsrep_thd_conflict_state(THD *thd, my_bool sync)
|
2014-08-06 14:39:15 +02:00
|
|
|
{
|
2014-09-27 22:29:10 +02:00
|
|
|
enum wsrep_conflict_state state = NO_CONFLICT;
|
|
|
|
if (thd)
|
2014-08-06 14:39:15 +02:00
|
|
|
{
|
MDEV-16005 sporadic failures with galera tests MW-328B and MW-328C
These test can sporadically show mutex deadlock warnings between LOCK_wsrep_thd
and LOCK_thd_data mutexes. This means that these mutexes can be locked in opposite
order by different threads, and thus result in deadlock situation.
To fix such issue, the locking policy of these mutexes should be revised and
enforced to be uniform. However, a quick code review shows that the number of
lock/unlock operations for these mutexes combined is between 100-200, and all these
mutex invocations should be checked/fixed.
On the other hand, it turns out that LOCK_wsrep_thd is used for protecting access to
wsrep variables of THD (wsrep_conflict_state, wsrep_query_state), whereas LOCK_thd_data
protects query, db and mysys_var variables in THD. Extending LOCK_thd_data to protect
also wsrep variables looks like a viable solution, as there should not be a use case
where separate threads need simultaneous access to wsrep variables and THD data variables.
In this commit LOCK_wsrep_thd mutex is refactored to be replaced by LOCK_thd_data.
By bluntly replacing LOCK_wsrep_thd by LOCK_thd_data, will result in double locking
of LOCK_thd_data, and some adjustements have been performed to fix such situations.
2018-04-24 09:26:34 +02:00
|
|
|
if (sync) mysql_mutex_lock(&thd->LOCK_thd_data);
|
2014-08-06 14:39:15 +02:00
|
|
|
|
|
|
|
state = thd->wsrep_conflict_state;
|
MDEV-16005 sporadic failures with galera tests MW-328B and MW-328C
These test can sporadically show mutex deadlock warnings between LOCK_wsrep_thd
and LOCK_thd_data mutexes. This means that these mutexes can be locked in opposite
order by different threads, and thus result in deadlock situation.
To fix such issue, the locking policy of these mutexes should be revised and
enforced to be uniform. However, a quick code review shows that the number of
lock/unlock operations for these mutexes combined is between 100-200, and all these
mutex invocations should be checked/fixed.
On the other hand, it turns out that LOCK_wsrep_thd is used for protecting access to
wsrep variables of THD (wsrep_conflict_state, wsrep_query_state), whereas LOCK_thd_data
protects query, db and mysys_var variables in THD. Extending LOCK_thd_data to protect
also wsrep variables looks like a viable solution, as there should not be a use case
where separate threads need simultaneous access to wsrep variables and THD data variables.
In this commit LOCK_wsrep_thd mutex is refactored to be replaced by LOCK_thd_data.
By bluntly replacing LOCK_wsrep_thd by LOCK_thd_data, will result in double locking
of LOCK_thd_data, and some adjustements have been performed to fix such situations.
2018-04-24 09:26:34 +02:00
|
|
|
if (sync) mysql_mutex_unlock(&thd->LOCK_thd_data);
|
2014-08-06 14:39:15 +02:00
|
|
|
}
|
|
|
|
return state;
|
|
|
|
}
|
|
|
|
|
2014-09-27 22:29:10 +02:00
|
|
|
my_bool wsrep_thd_is_wsrep(THD *thd)
|
2014-08-06 14:39:15 +02:00
|
|
|
{
|
|
|
|
my_bool status = FALSE;
|
2014-09-27 22:29:10 +02:00
|
|
|
if (thd)
|
2014-08-06 14:39:15 +02:00
|
|
|
{
|
|
|
|
status = (WSREP(thd) && WSREP_PROVIDER_EXISTS);
|
|
|
|
}
|
|
|
|
return status;
|
|
|
|
}
|
|
|
|
|
2014-09-27 22:29:10 +02:00
|
|
|
my_bool wsrep_thd_is_BF(THD *thd, my_bool sync)
|
2014-08-06 14:39:15 +02:00
|
|
|
{
|
|
|
|
my_bool status = FALSE;
|
2014-09-27 22:29:10 +02:00
|
|
|
if (thd)
|
2014-08-06 14:39:15 +02:00
|
|
|
{
|
|
|
|
// THD can be BF only if provider exists
|
2014-09-27 22:29:10 +02:00
|
|
|
if (wsrep_thd_is_wsrep(thd))
|
2014-08-06 14:39:15 +02:00
|
|
|
{
|
|
|
|
if (sync)
|
MDEV-16005 sporadic failures with galera tests MW-328B and MW-328C
These test can sporadically show mutex deadlock warnings between LOCK_wsrep_thd
and LOCK_thd_data mutexes. This means that these mutexes can be locked in opposite
order by different threads, and thus result in deadlock situation.
To fix such issue, the locking policy of these mutexes should be revised and
enforced to be uniform. However, a quick code review shows that the number of
lock/unlock operations for these mutexes combined is between 100-200, and all these
mutex invocations should be checked/fixed.
On the other hand, it turns out that LOCK_wsrep_thd is used for protecting access to
wsrep variables of THD (wsrep_conflict_state, wsrep_query_state), whereas LOCK_thd_data
protects query, db and mysys_var variables in THD. Extending LOCK_thd_data to protect
also wsrep variables looks like a viable solution, as there should not be a use case
where separate threads need simultaneous access to wsrep variables and THD data variables.
In this commit LOCK_wsrep_thd mutex is refactored to be replaced by LOCK_thd_data.
By bluntly replacing LOCK_wsrep_thd by LOCK_thd_data, will result in double locking
of LOCK_thd_data, and some adjustements have been performed to fix such situations.
2018-04-24 09:26:34 +02:00
|
|
|
mysql_mutex_lock(&thd->LOCK_thd_data);
|
2014-08-06 14:39:15 +02:00
|
|
|
|
|
|
|
status = ((thd->wsrep_exec_mode == REPL_RECV) ||
|
|
|
|
(thd->wsrep_exec_mode == TOTAL_ORDER));
|
|
|
|
if (sync)
|
MDEV-16005 sporadic failures with galera tests MW-328B and MW-328C
These test can sporadically show mutex deadlock warnings between LOCK_wsrep_thd
and LOCK_thd_data mutexes. This means that these mutexes can be locked in opposite
order by different threads, and thus result in deadlock situation.
To fix such issue, the locking policy of these mutexes should be revised and
enforced to be uniform. However, a quick code review shows that the number of
lock/unlock operations for these mutexes combined is between 100-200, and all these
mutex invocations should be checked/fixed.
On the other hand, it turns out that LOCK_wsrep_thd is used for protecting access to
wsrep variables of THD (wsrep_conflict_state, wsrep_query_state), whereas LOCK_thd_data
protects query, db and mysys_var variables in THD. Extending LOCK_thd_data to protect
also wsrep variables looks like a viable solution, as there should not be a use case
where separate threads need simultaneous access to wsrep variables and THD data variables.
In this commit LOCK_wsrep_thd mutex is refactored to be replaced by LOCK_thd_data.
By bluntly replacing LOCK_wsrep_thd by LOCK_thd_data, will result in double locking
of LOCK_thd_data, and some adjustements have been performed to fix such situations.
2018-04-24 09:26:34 +02:00
|
|
|
mysql_mutex_unlock(&thd->LOCK_thd_data);
|
2014-08-06 14:39:15 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return status;
|
|
|
|
}
|
|
|
|
|
|
|
|
extern "C"
|
|
|
|
my_bool wsrep_thd_is_BF_or_commit(void *thd_ptr, my_bool sync)
|
|
|
|
{
|
|
|
|
bool status = FALSE;
|
|
|
|
if (thd_ptr)
|
|
|
|
{
|
|
|
|
THD* thd = (THD*)thd_ptr;
|
MDEV-16005 sporadic failures with galera tests MW-328B and MW-328C
These test can sporadically show mutex deadlock warnings between LOCK_wsrep_thd
and LOCK_thd_data mutexes. This means that these mutexes can be locked in opposite
order by different threads, and thus result in deadlock situation.
To fix such issue, the locking policy of these mutexes should be revised and
enforced to be uniform. However, a quick code review shows that the number of
lock/unlock operations for these mutexes combined is between 100-200, and all these
mutex invocations should be checked/fixed.
On the other hand, it turns out that LOCK_wsrep_thd is used for protecting access to
wsrep variables of THD (wsrep_conflict_state, wsrep_query_state), whereas LOCK_thd_data
protects query, db and mysys_var variables in THD. Extending LOCK_thd_data to protect
also wsrep variables looks like a viable solution, as there should not be a use case
where separate threads need simultaneous access to wsrep variables and THD data variables.
In this commit LOCK_wsrep_thd mutex is refactored to be replaced by LOCK_thd_data.
By bluntly replacing LOCK_wsrep_thd by LOCK_thd_data, will result in double locking
of LOCK_thd_data, and some adjustements have been performed to fix such situations.
2018-04-24 09:26:34 +02:00
|
|
|
if (sync) mysql_mutex_lock(&thd->LOCK_thd_data);
|
2014-08-06 14:39:15 +02:00
|
|
|
|
|
|
|
status = ((thd->wsrep_exec_mode == REPL_RECV) ||
|
|
|
|
(thd->wsrep_exec_mode == TOTAL_ORDER) ||
|
|
|
|
(thd->wsrep_exec_mode == LOCAL_COMMIT));
|
MDEV-16005 sporadic failures with galera tests MW-328B and MW-328C
These test can sporadically show mutex deadlock warnings between LOCK_wsrep_thd
and LOCK_thd_data mutexes. This means that these mutexes can be locked in opposite
order by different threads, and thus result in deadlock situation.
To fix such issue, the locking policy of these mutexes should be revised and
enforced to be uniform. However, a quick code review shows that the number of
lock/unlock operations for these mutexes combined is between 100-200, and all these
mutex invocations should be checked/fixed.
On the other hand, it turns out that LOCK_wsrep_thd is used for protecting access to
wsrep variables of THD (wsrep_conflict_state, wsrep_query_state), whereas LOCK_thd_data
protects query, db and mysys_var variables in THD. Extending LOCK_thd_data to protect
also wsrep variables looks like a viable solution, as there should not be a use case
where separate threads need simultaneous access to wsrep variables and THD data variables.
In this commit LOCK_wsrep_thd mutex is refactored to be replaced by LOCK_thd_data.
By bluntly replacing LOCK_wsrep_thd by LOCK_thd_data, will result in double locking
of LOCK_thd_data, and some adjustements have been performed to fix such situations.
2018-04-24 09:26:34 +02:00
|
|
|
if (sync) mysql_mutex_unlock(&thd->LOCK_thd_data);
|
2014-08-06 14:39:15 +02:00
|
|
|
}
|
|
|
|
return status;
|
|
|
|
}
|
|
|
|
|
|
|
|
extern "C"
|
|
|
|
my_bool wsrep_thd_is_local(void *thd_ptr, my_bool sync)
|
|
|
|
{
|
|
|
|
bool status = FALSE;
|
|
|
|
if (thd_ptr)
|
|
|
|
{
|
|
|
|
THD* thd = (THD*)thd_ptr;
|
MDEV-16005 sporadic failures with galera tests MW-328B and MW-328C
These test can sporadically show mutex deadlock warnings between LOCK_wsrep_thd
and LOCK_thd_data mutexes. This means that these mutexes can be locked in opposite
order by different threads, and thus result in deadlock situation.
To fix such issue, the locking policy of these mutexes should be revised and
enforced to be uniform. However, a quick code review shows that the number of
lock/unlock operations for these mutexes combined is between 100-200, and all these
mutex invocations should be checked/fixed.
On the other hand, it turns out that LOCK_wsrep_thd is used for protecting access to
wsrep variables of THD (wsrep_conflict_state, wsrep_query_state), whereas LOCK_thd_data
protects query, db and mysys_var variables in THD. Extending LOCK_thd_data to protect
also wsrep variables looks like a viable solution, as there should not be a use case
where separate threads need simultaneous access to wsrep variables and THD data variables.
In this commit LOCK_wsrep_thd mutex is refactored to be replaced by LOCK_thd_data.
By bluntly replacing LOCK_wsrep_thd by LOCK_thd_data, will result in double locking
of LOCK_thd_data, and some adjustements have been performed to fix such situations.
2018-04-24 09:26:34 +02:00
|
|
|
if (sync) mysql_mutex_lock(&thd->LOCK_thd_data);
|
2014-08-06 14:39:15 +02:00
|
|
|
|
|
|
|
status = (thd->wsrep_exec_mode == LOCAL_STATE);
|
MDEV-16005 sporadic failures with galera tests MW-328B and MW-328C
These test can sporadically show mutex deadlock warnings between LOCK_wsrep_thd
and LOCK_thd_data mutexes. This means that these mutexes can be locked in opposite
order by different threads, and thus result in deadlock situation.
To fix such issue, the locking policy of these mutexes should be revised and
enforced to be uniform. However, a quick code review shows that the number of
lock/unlock operations for these mutexes combined is between 100-200, and all these
mutex invocations should be checked/fixed.
On the other hand, it turns out that LOCK_wsrep_thd is used for protecting access to
wsrep variables of THD (wsrep_conflict_state, wsrep_query_state), whereas LOCK_thd_data
protects query, db and mysys_var variables in THD. Extending LOCK_thd_data to protect
also wsrep variables looks like a viable solution, as there should not be a use case
where separate threads need simultaneous access to wsrep variables and THD data variables.
In this commit LOCK_wsrep_thd mutex is refactored to be replaced by LOCK_thd_data.
By bluntly replacing LOCK_wsrep_thd by LOCK_thd_data, will result in double locking
of LOCK_thd_data, and some adjustements have been performed to fix such situations.
2018-04-24 09:26:34 +02:00
|
|
|
if (sync) mysql_mutex_unlock(&thd->LOCK_thd_data);
|
2014-08-06 14:39:15 +02:00
|
|
|
}
|
|
|
|
return status;
|
|
|
|
}
|
|
|
|
|
|
|
|
int wsrep_abort_thd(void *bf_thd_ptr, void *victim_thd_ptr, my_bool signal)
|
|
|
|
{
|
2021-10-21 13:49:51 +02:00
|
|
|
THD *victim_thd= (THD *) victim_thd_ptr;
|
|
|
|
THD *bf_thd= (THD *) bf_thd_ptr;
|
2014-08-06 14:39:15 +02:00
|
|
|
DBUG_ENTER("wsrep_abort_thd");
|
|
|
|
|
2021-10-21 13:49:51 +02:00
|
|
|
mysql_mutex_assert_owner(&victim_thd->LOCK_thd_data);
|
|
|
|
|
2014-08-06 14:39:15 +02:00
|
|
|
if ( (WSREP(bf_thd) ||
|
2015-07-14 22:05:29 +02:00
|
|
|
( (WSREP_ON || bf_thd->variables.wsrep_OSU_method == WSREP_OSU_RSU) &&
|
|
|
|
bf_thd->wsrep_exec_mode == TOTAL_ORDER) ) &&
|
2014-08-06 14:39:15 +02:00
|
|
|
victim_thd)
|
|
|
|
{
|
2015-07-14 22:05:29 +02:00
|
|
|
if ((victim_thd->wsrep_conflict_state == MUST_ABORT) ||
|
|
|
|
(victim_thd->wsrep_conflict_state == ABORTED) ||
|
|
|
|
(victim_thd->wsrep_conflict_state == ABORTING))
|
|
|
|
{
|
|
|
|
WSREP_DEBUG("wsrep_abort_thd called by %llu with victim %llu already "
|
|
|
|
"aborted. Ignoring.",
|
|
|
|
(bf_thd) ? (long long)bf_thd->real_id : 0,
|
|
|
|
(long long)victim_thd->real_id);
|
2021-10-21 13:49:51 +02:00
|
|
|
mysql_mutex_unlock(&victim_thd->LOCK_thd_data);
|
2015-07-14 22:05:29 +02:00
|
|
|
DBUG_RETURN(1);
|
|
|
|
}
|
|
|
|
|
2014-08-06 14:39:15 +02:00
|
|
|
WSREP_DEBUG("wsrep_abort_thd, by: %llu, victim: %llu", (bf_thd) ?
|
|
|
|
(long long)bf_thd->real_id : 0, (long long)victim_thd->real_id);
|
|
|
|
ha_abort_transaction(bf_thd, victim_thd, signal);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
WSREP_DEBUG("wsrep_abort_thd not effective: %p %p", bf_thd, victim_thd);
|
2021-10-21 13:49:51 +02:00
|
|
|
mysql_mutex_unlock(&victim_thd->LOCK_thd_data);
|
2014-08-06 14:39:15 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
DBUG_RETURN(1);
|
|
|
|
}
|
|
|
|
|
|
|
|
extern "C"
|
|
|
|
int wsrep_thd_in_locking_session(void *thd_ptr)
|
|
|
|
{
|
|
|
|
if (thd_ptr && ((THD *)thd_ptr)->in_lock_tables) {
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2015-04-28 19:38:25 +02:00
|
|
|
bool wsrep_thd_has_explicit_locks(THD *thd)
|
|
|
|
{
|
|
|
|
assert(thd);
|
2015-11-19 17:48:36 +01:00
|
|
|
return thd->mdl_context.has_explicit_locks();
|
2015-04-28 19:38:25 +02:00
|
|
|
}
|
2019-02-25 23:28:46 +01:00
|
|
|
|
2019-03-12 14:44:10 +01:00
|
|
|
my_bool wsrep_thd_is_applier(MYSQL_THD thd)
|
|
|
|
{
|
|
|
|
my_bool is_applier= false;
|
|
|
|
|
|
|
|
if (thd && thd->wsrep_applier)
|
|
|
|
is_applier= true;
|
|
|
|
|
|
|
|
return (is_applier);
|
|
|
|
}
|
2019-03-18 06:39:51 +01:00
|
|
|
|
|
|
|
void wsrep_set_load_multi_commit(THD *thd, bool split)
|
|
|
|
{
|
|
|
|
thd->wsrep_split_flag= split;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool wsrep_is_load_multi_commit(THD *thd)
|
|
|
|
{
|
|
|
|
return thd->wsrep_split_flag;
|
|
|
|
}
|
2020-08-26 13:11:17 +02:00
|
|
|
|
|
|
|
void wsrep_report_bf_lock_wait(THD *thd,
|
|
|
|
unsigned long long trx_id)
|
|
|
|
{
|
|
|
|
if (thd)
|
|
|
|
{
|
|
|
|
WSREP_ERROR("Thread %s trx_id: %llu thread: %ld "
|
|
|
|
"seqno: %lld query_state: %s conf_state: %s exec_mode: %s "
|
|
|
|
"applier: %d query: %s",
|
|
|
|
wsrep_thd_is_BF(thd, false) ? "BF" : "normal",
|
|
|
|
trx_id,
|
|
|
|
thd_get_thread_id(thd),
|
|
|
|
wsrep_thd_trx_seqno(thd),
|
|
|
|
wsrep_thd_query_state_str(thd),
|
|
|
|
wsrep_thd_conflict_state_str(thd),
|
|
|
|
wsrep_thd_exec_mode_str(thd),
|
|
|
|
thd->wsrep_applier,
|
|
|
|
wsrep_thd_query(thd));
|
|
|
|
}
|
|
|
|
}
|