mirror of
https://github.com/MariaDB/server.git
synced 2025-01-20 05:52:27 +01:00
branches/zip: Merge 2263:2295 from branches/5.1.
This commit is contained in:
parent
ab10478ca4
commit
cdba733fb5
17 changed files with 531 additions and 317 deletions
|
@ -55,7 +55,7 @@ can be released by page reorganize, then it is reorganized */
|
|||
|
||||
#define BTR_CUR_PAGE_REORGANIZE_LIMIT (UNIV_PAGE_SIZE / 32)
|
||||
|
||||
/* When estimating number of different kay values in an index sample
|
||||
/* When estimating number of different key values in an index, sample
|
||||
this many index pages */
|
||||
#define BTR_KEY_VAL_ESTIMATE_N_PAGES 8
|
||||
|
||||
|
|
|
@ -73,6 +73,10 @@ extern "C" {
|
|||
/* This is needed because of Bug #3596. Let us hope that pthread_mutex_t
|
||||
is defined the same in both builds: the MySQL server and the InnoDB plugin. */
|
||||
extern pthread_mutex_t LOCK_thread_count;
|
||||
|
||||
/* this is defined in mysql_priv.h inside #ifdef MYSQL_SERVER
|
||||
but we need it here */
|
||||
bool check_global_access(THD *thd, ulong want_access);
|
||||
#endif /* MYSQL_SERVER */
|
||||
|
||||
/** to protect innobase_open_files */
|
||||
|
@ -139,7 +143,7 @@ static my_bool innobase_locks_unsafe_for_binlog = FALSE;
|
|||
static my_bool innobase_rollback_on_timeout = FALSE;
|
||||
static my_bool innobase_create_status_file = FALSE;
|
||||
static my_bool innobase_stats_on_metadata = TRUE;
|
||||
static my_bool innobase_use_adaptive_hash_indexes = TRUE;
|
||||
static my_bool innobase_adaptive_hash_index = TRUE;
|
||||
|
||||
static char* internal_innobase_data_file_path = NULL;
|
||||
|
||||
|
@ -1718,7 +1722,7 @@ innobase_init(
|
|||
|
||||
srv_stats_on_metadata = (ibool) innobase_stats_on_metadata;
|
||||
|
||||
btr_search_disabled = (ibool) !innobase_use_adaptive_hash_indexes;
|
||||
btr_search_disabled = (ibool) !innobase_adaptive_hash_index;
|
||||
|
||||
srv_print_verbose_log = mysqld_embedded ? 0 : 1;
|
||||
|
||||
|
@ -4869,6 +4873,12 @@ innodb_check_for_record_too_big_error(
|
|||
}
|
||||
}
|
||||
|
||||
/* limit innodb monitor access to users with PROCESS privilege.
|
||||
See http://bugs.mysql.com/32710 for expl. why we choose PROCESS. */
|
||||
#define IS_MAGIC_TABLE_AND_USER_DENIED_ACCESS(table_name, thd) \
|
||||
(row_is_magic_monitor_table(table_name) \
|
||||
&& check_global_access(thd, PROCESS_ACL))
|
||||
|
||||
/*********************************************************************
|
||||
Creates a table definition to an InnoDB database. */
|
||||
static
|
||||
|
@ -4905,6 +4915,12 @@ create_table_def(
|
|||
DBUG_ENTER("create_table_def");
|
||||
DBUG_PRINT("enter", ("table_name: %s", table_name));
|
||||
|
||||
ut_a(trx->mysql_thd != NULL);
|
||||
if (IS_MAGIC_TABLE_AND_USER_DENIED_ACCESS(table_name,
|
||||
(THD*) trx->mysql_thd)) {
|
||||
DBUG_RETURN(HA_ERR_GENERIC);
|
||||
}
|
||||
|
||||
n_cols = form->s->fields;
|
||||
|
||||
/* We pass 0 as the space id, and determine at a lower level the space
|
||||
|
@ -5457,6 +5473,14 @@ ha_innobase::delete_table(
|
|||
|
||||
DBUG_ENTER("ha_innobase::delete_table");
|
||||
|
||||
/* Strangely, MySQL passes the table name without the '.frm'
|
||||
extension, in contrast to ::create */
|
||||
normalize_table_name(norm_name, name);
|
||||
|
||||
if (IS_MAGIC_TABLE_AND_USER_DENIED_ACCESS(norm_name, thd)) {
|
||||
DBUG_RETURN(HA_ERR_GENERIC);
|
||||
}
|
||||
|
||||
/* Get the transaction associated with the current thd, or create one
|
||||
if not yet created */
|
||||
|
||||
|
@ -5490,11 +5514,6 @@ ha_innobase::delete_table(
|
|||
|
||||
ut_a(name_len < 1000);
|
||||
|
||||
/* Strangely, MySQL passes the table name without the '.frm'
|
||||
extension, in contrast to ::create */
|
||||
|
||||
normalize_table_name(norm_name, name);
|
||||
|
||||
/* Drop the table in InnoDB */
|
||||
|
||||
error = row_drop_table_for_mysql(norm_name, trx,
|
||||
|
@ -8324,9 +8343,10 @@ static MYSQL_SYSVAR_BOOL(stats_on_metadata, innobase_stats_on_metadata,
|
|||
"Enable statistics gathering for metadata commands such as SHOW TABLE STATUS (on by default)",
|
||||
NULL, NULL, TRUE);
|
||||
|
||||
static MYSQL_SYSVAR_BOOL(use_adaptive_hash_indexes, innobase_use_adaptive_hash_indexes,
|
||||
static MYSQL_SYSVAR_BOOL(adaptive_hash_index, innobase_adaptive_hash_index,
|
||||
PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
|
||||
"Enable the InnoDB adaptive hash indexes (enabled by default)",
|
||||
"Enable InnoDB adaptive hash index (enabled by default). "
|
||||
"Disable with --skip-innodb-adaptive-hash-index.",
|
||||
NULL, NULL, TRUE);
|
||||
|
||||
static MYSQL_SYSVAR_ULONG(replication_delay, srv_replication_delay,
|
||||
|
@ -8464,7 +8484,7 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
|
|||
MYSQL_SYSVAR(open_files),
|
||||
MYSQL_SYSVAR(rollback_on_timeout),
|
||||
MYSQL_SYSVAR(stats_on_metadata),
|
||||
MYSQL_SYSVAR(use_adaptive_hash_indexes),
|
||||
MYSQL_SYSVAR(adaptive_hash_index),
|
||||
MYSQL_SYSVAR(replication_delay),
|
||||
MYSQL_SYSVAR(status_file),
|
||||
MYSQL_SYSVAR(support_xa),
|
||||
|
|
|
@ -112,9 +112,13 @@ os_event_set(
|
|||
os_event_t event); /* in: event to set */
|
||||
/**************************************************************
|
||||
Resets an event semaphore to the nonsignaled state. Waiting threads will
|
||||
stop to wait for the event. */
|
||||
stop to wait for the event.
|
||||
The return value should be passed to os_even_wait_low() if it is desired
|
||||
that this thread should not wait in case of an intervening call to
|
||||
os_event_set() between this os_event_reset() and the
|
||||
os_event_wait_low() call. See comments for os_event_wait_low(). */
|
||||
UNIV_INTERN
|
||||
void
|
||||
ib_longlong
|
||||
os_event_reset(
|
||||
/*===========*/
|
||||
os_event_t event); /* in: event to reset */
|
||||
|
@ -125,16 +129,38 @@ void
|
|||
os_event_free(
|
||||
/*==========*/
|
||||
os_event_t event); /* in: event to free */
|
||||
|
||||
/**************************************************************
|
||||
Waits for an event object until it is in the signaled state. If
|
||||
srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS this also exits the
|
||||
waiting thread when the event becomes signaled (or immediately if the
|
||||
event is already in the signaled state). */
|
||||
event is already in the signaled state).
|
||||
|
||||
Typically, if the event has been signalled after the os_event_reset()
|
||||
we'll return immediately because event->is_set == TRUE.
|
||||
There are, however, situations (e.g.: sync_array code) where we may
|
||||
lose this information. For example:
|
||||
|
||||
thread A calls os_event_reset()
|
||||
thread B calls os_event_set() [event->is_set == TRUE]
|
||||
thread C calls os_event_reset() [event->is_set == FALSE]
|
||||
thread A calls os_event_wait() [infinite wait!]
|
||||
thread C calls os_event_wait() [infinite wait!]
|
||||
|
||||
Where such a scenario is possible, to avoid infinite wait, the
|
||||
value returned by os_event_reset() should be passed in as
|
||||
reset_sig_count. */
|
||||
UNIV_INTERN
|
||||
void
|
||||
os_event_wait(
|
||||
/*==========*/
|
||||
os_event_t event); /* in: event to wait */
|
||||
os_event_wait_low(
|
||||
/*==============*/
|
||||
os_event_t event, /* in: event to wait */
|
||||
ib_longlong reset_sig_count);/* in: zero or the value
|
||||
returned by previous call of
|
||||
os_event_reset(). */
|
||||
|
||||
#define os_event_wait(event) os_event_wait_low(event, 0)
|
||||
|
||||
/**************************************************************
|
||||
Waits for an event object until it is in the signaled state or
|
||||
a timeout is exceeded. In Unix the timeout is always infinite. */
|
||||
|
|
|
@ -329,7 +329,7 @@ row_mysql_unfreeze_data_dictionary(
|
|||
trx_t* trx); /* in: transaction */
|
||||
#ifndef UNIV_HOTBACKUP
|
||||
/*************************************************************************
|
||||
Drops a table for MySQL. If the name of the table ends in
|
||||
Creates a table for MySQL. If the name of the table ends in
|
||||
one of "innodb_monitor", "innodb_lock_monitor", "innodb_tablespace_monitor",
|
||||
"innodb_table_monitor", then this will also start the printing of monitor
|
||||
output by the master thread. If the table name ends in "innodb_mem_validate",
|
||||
|
@ -490,6 +490,16 @@ row_check_table_for_mysql(
|
|||
handle */
|
||||
#endif /* !UNIV_HOTBACKUP */
|
||||
|
||||
/*************************************************************************
|
||||
Determines if a table is a magic monitor table. */
|
||||
UNIV_INTERN
|
||||
ibool
|
||||
row_is_magic_monitor_table(
|
||||
/*=======================*/
|
||||
/* out: TRUE if monitor table */
|
||||
const char* table_name); /* in: name of the table, in the
|
||||
form database/table_name */
|
||||
|
||||
/* A struct describing a place for an individual column in the MySQL
|
||||
row format which is presented to the table handler in ha_innobase.
|
||||
This template struct is used to speed up row transformations between
|
||||
|
|
|
@ -66,26 +66,21 @@ sync_array_wait_event(
|
|||
sync_array_t* arr, /* in: wait array */
|
||||
ulint index); /* in: index of the reserved cell */
|
||||
/**********************************************************************
|
||||
Frees the cell safely by reserving the sync array mutex and decrementing
|
||||
n_reserved if necessary. Should only be called from mutex_spin_wait. */
|
||||
Frees the cell. NOTE! sync_array_wait_event frees the cell
|
||||
automatically! */
|
||||
UNIV_INTERN
|
||||
void
|
||||
sync_array_free_cell_protected(
|
||||
/*===========================*/
|
||||
sync_array_free_cell(
|
||||
/*=================*/
|
||||
sync_array_t* arr, /* in: wait array */
|
||||
ulint index); /* in: index of the cell in array */
|
||||
/**************************************************************************
|
||||
Looks for the cells in the wait array which refer
|
||||
to the wait object specified,
|
||||
and sets their corresponding events to the signaled state. In this
|
||||
way releases the threads waiting for the object to contend for the object.
|
||||
It is possible that no such cell is found, in which case does nothing. */
|
||||
Note that one of the wait objects was signalled. */
|
||||
UNIV_INTERN
|
||||
void
|
||||
sync_array_signal_object(
|
||||
/*=====================*/
|
||||
sync_array_t* arr, /* in: wait array */
|
||||
void* object);/* in: wait object */
|
||||
sync_array_object_signalled(
|
||||
/*========================*/
|
||||
sync_array_t* arr); /* in: wait array */
|
||||
/**************************************************************************
|
||||
If the wakeup algorithm does not work perfectly at semaphore relases,
|
||||
this function will do the waking (see the comment in mutex_exit). This
|
||||
|
|
|
@ -421,6 +421,18 @@ blocked by readers, a writer may queue for the lock by setting the writer
|
|||
field. Then no new readers are allowed in. */
|
||||
|
||||
struct rw_lock_struct {
|
||||
os_event_t event; /* Used by sync0arr.c for thread queueing */
|
||||
|
||||
#ifdef __WIN__
|
||||
os_event_t wait_ex_event; /* This windows specific event is
|
||||
used by the thread which has set the
|
||||
lock state to RW_LOCK_WAIT_EX. The
|
||||
rw_lock design guarantees that this
|
||||
thread will be the next one to proceed
|
||||
once the current the event gets
|
||||
signalled. See LEMMA 2 in sync0sync.c */
|
||||
#endif
|
||||
|
||||
ulint reader_count; /* Number of readers who have locked this
|
||||
lock in the shared mode */
|
||||
ulint writer; /* This field is set to RW_LOCK_EX if there
|
||||
|
|
|
@ -381,7 +381,11 @@ rw_lock_s_unlock_func(
|
|||
mutex_exit(mutex);
|
||||
|
||||
if (UNIV_UNLIKELY(sg)) {
|
||||
sync_array_signal_object(sync_primary_wait_array, lock);
|
||||
#ifdef __WIN__
|
||||
os_event_set(lock->wait_ex_event);
|
||||
#endif
|
||||
os_event_set(lock->event);
|
||||
sync_array_object_signalled(sync_primary_wait_array);
|
||||
}
|
||||
|
||||
ut_ad(rw_lock_validate(lock));
|
||||
|
@ -461,7 +465,11 @@ rw_lock_x_unlock_func(
|
|||
mutex_exit(&(lock->mutex));
|
||||
|
||||
if (UNIV_UNLIKELY(sg)) {
|
||||
sync_array_signal_object(sync_primary_wait_array, lock);
|
||||
#ifdef __WIN__
|
||||
os_event_set(lock->wait_ex_event);
|
||||
#endif
|
||||
os_event_set(lock->event);
|
||||
sync_array_object_signalled(sync_primary_wait_array);
|
||||
}
|
||||
|
||||
ut_ad(rw_lock_validate(lock));
|
||||
|
|
|
@ -459,6 +459,7 @@ Do not use its fields directly! The structure used in the spin lock
|
|||
implementation of a mutual exclusion semaphore. */
|
||||
|
||||
struct mutex_struct {
|
||||
os_event_t event; /* Used by sync0arr.c for the wait queue */
|
||||
ulint lock_word; /* This ulint is the target of the atomic
|
||||
test-and-set instruction in Win32 */
|
||||
#if defined WIN32 && defined UNIV_CAN_USE_X86_ASSEMBLER
|
||||
|
|
|
@ -211,7 +211,7 @@ mutex_exit(
|
|||
perform the read first, which could leave a waiting
|
||||
thread hanging indefinitely.
|
||||
|
||||
Our current solution call every 10 seconds
|
||||
Our current solution call every second
|
||||
sync_arr_wake_threads_if_sema_free()
|
||||
to wake up possible hanging threads if
|
||||
they are missed in mutex_signal_object. */
|
||||
|
|
1
mysql-test/innodb_bug34053.result
Normal file
1
mysql-test/innodb_bug34053.result
Normal file
|
@ -0,0 +1 @@
|
|||
SET storage_engine=InnoDB;
|
49
mysql-test/innodb_bug34053.test
Normal file
49
mysql-test/innodb_bug34053.test
Normal file
|
@ -0,0 +1,49 @@
|
|||
#
|
||||
# Make sure http://bugs.mysql.com/34053 remains fixed.
|
||||
#
|
||||
|
||||
-- source include/have_innodb.inc
|
||||
|
||||
SET storage_engine=InnoDB;
|
||||
|
||||
# we do not really care about what gets printed, we are only
|
||||
# interested in getting success or failure according to our
|
||||
# expectations
|
||||
-- disable_query_log
|
||||
-- disable_result_log
|
||||
|
||||
GRANT USAGE ON *.* TO 'shane'@'localhost' IDENTIFIED BY '12345';
|
||||
FLUSH PRIVILEGES;
|
||||
|
||||
-- connect (con1,localhost,shane,12345,)
|
||||
|
||||
-- connection con1
|
||||
-- error ER_SPECIFIC_ACCESS_DENIED_ERROR
|
||||
CREATE TABLE innodb_monitor (a INT) ENGINE=INNODB;
|
||||
-- error ER_SPECIFIC_ACCESS_DENIED_ERROR
|
||||
CREATE TABLE innodb_mem_validate (a INT) ENGINE=INNODB;
|
||||
CREATE TABLE innodb_monitorx (a INT) ENGINE=INNODB;
|
||||
DROP TABLE innodb_monitorx;
|
||||
CREATE TABLE innodb_monito (a INT) ENGINE=INNODB;
|
||||
DROP TABLE innodb_monito;
|
||||
CREATE TABLE xinnodb_monitor (a INT) ENGINE=INNODB;
|
||||
DROP TABLE xinnodb_monitor;
|
||||
CREATE TABLE nnodb_monitor (a INT) ENGINE=INNODB;
|
||||
DROP TABLE nnodb_monitor;
|
||||
|
||||
-- connection default
|
||||
CREATE TABLE innodb_monitor (a INT) ENGINE=INNODB;
|
||||
CREATE TABLE innodb_mem_validate (a INT) ENGINE=INNODB;
|
||||
|
||||
-- connection con1
|
||||
-- error ER_SPECIFIC_ACCESS_DENIED_ERROR
|
||||
DROP TABLE innodb_monitor;
|
||||
-- error ER_SPECIFIC_ACCESS_DENIED_ERROR
|
||||
DROP TABLE innodb_mem_validate;
|
||||
|
||||
-- connection default
|
||||
DROP TABLE innodb_monitor;
|
||||
DROP TABLE innodb_mem_validate;
|
||||
DROP USER 'shane'@'localhost';
|
||||
|
||||
-- disconnect con1
|
111
os/os0sync.c
111
os/os0sync.c
|
@ -21,6 +21,7 @@ Created 9/6/1995 Heikki Tuuri
|
|||
|
||||
/* Type definition for an operating system mutex struct */
|
||||
struct os_mutex_struct{
|
||||
os_event_t event; /* Used by sync0arr.c for queing threads */
|
||||
void* handle; /* OS handle to mutex */
|
||||
ulint count; /* we use this counter to check
|
||||
that the same thread does not
|
||||
|
@ -35,6 +36,7 @@ struct os_mutex_struct{
|
|||
/* Mutex protecting counts and the lists of OS mutexes and events */
|
||||
UNIV_INTERN os_mutex_t os_sync_mutex;
|
||||
static ibool os_sync_mutex_inited = FALSE;
|
||||
static ibool os_sync_free_called = FALSE;
|
||||
|
||||
/* This is incremented by 1 in os_thread_create and decremented by 1 in
|
||||
os_thread_exit */
|
||||
|
@ -50,6 +52,10 @@ UNIV_INTERN ulint os_event_count = 0;
|
|||
UNIV_INTERN ulint os_mutex_count = 0;
|
||||
UNIV_INTERN ulint os_fast_mutex_count = 0;
|
||||
|
||||
/* Because a mutex is embedded inside an event and there is an
|
||||
event embedded inside a mutex, on free, this generates a recursive call.
|
||||
This version of the free event function doesn't acquire the global lock */
|
||||
static void os_event_free_internal(os_event_t event);
|
||||
|
||||
/*************************************************************
|
||||
Initializes global event and OS 'slow' mutex lists. */
|
||||
|
@ -76,6 +82,7 @@ os_sync_free(void)
|
|||
os_event_t event;
|
||||
os_mutex_t mutex;
|
||||
|
||||
os_sync_free_called = TRUE;
|
||||
event = UT_LIST_GET_FIRST(os_event_list);
|
||||
|
||||
while (event) {
|
||||
|
@ -99,6 +106,7 @@ os_sync_free(void)
|
|||
|
||||
mutex = UT_LIST_GET_FIRST(os_mutex_list);
|
||||
}
|
||||
os_sync_free_called = FALSE;
|
||||
}
|
||||
|
||||
/*************************************************************
|
||||
|
@ -144,17 +152,31 @@ os_event_create(
|
|||
ut_a(0 == pthread_cond_init(&(event->cond_var), NULL));
|
||||
#endif
|
||||
event->is_set = FALSE;
|
||||
event->signal_count = 0;
|
||||
|
||||
/* We return this value in os_event_reset(), which can then be
|
||||
be used to pass to the os_event_wait_low(). The value of zero
|
||||
is reserved in os_event_wait_low() for the case when the
|
||||
caller does not want to pass any signal_count value. To
|
||||
distinguish between the two cases we initialize signal_count
|
||||
to 1 here. */
|
||||
event->signal_count = 1;
|
||||
#endif /* __WIN__ */
|
||||
|
||||
/* Put to the list of events */
|
||||
os_mutex_enter(os_sync_mutex);
|
||||
/* The os_sync_mutex can be NULL because during startup an event
|
||||
can be created [ because it's embedded in the mutex/rwlock ] before
|
||||
this module has been initialized */
|
||||
if (os_sync_mutex != NULL) {
|
||||
os_mutex_enter(os_sync_mutex);
|
||||
}
|
||||
|
||||
/* Put to the list of events */
|
||||
UT_LIST_ADD_FIRST(os_event_list, os_event_list, event);
|
||||
|
||||
os_event_count++;
|
||||
|
||||
os_mutex_exit(os_sync_mutex);
|
||||
if (os_sync_mutex != NULL) {
|
||||
os_mutex_exit(os_sync_mutex);
|
||||
}
|
||||
|
||||
return(event);
|
||||
}
|
||||
|
@ -231,13 +253,20 @@ os_event_set(
|
|||
|
||||
/**************************************************************
|
||||
Resets an event semaphore to the nonsignaled state. Waiting threads will
|
||||
stop to wait for the event. */
|
||||
stop to wait for the event.
|
||||
The return value should be passed to os_even_wait_low() if it is desired
|
||||
that this thread should not wait in case of an intervening call to
|
||||
os_event_set() between this os_event_reset() and the
|
||||
os_event_wait_low() call. See comments for os_event_wait_low(). */
|
||||
UNIV_INTERN
|
||||
void
|
||||
ib_longlong
|
||||
os_event_reset(
|
||||
/*===========*/
|
||||
/* out: current signal_count. */
|
||||
os_event_t event) /* in: event to reset */
|
||||
{
|
||||
ib_longlong ret = 0;
|
||||
|
||||
#ifdef __WIN__
|
||||
ut_a(event);
|
||||
|
||||
|
@ -252,9 +281,40 @@ os_event_reset(
|
|||
} else {
|
||||
event->is_set = FALSE;
|
||||
}
|
||||
ret = event->signal_count;
|
||||
|
||||
os_fast_mutex_unlock(&(event->os_mutex));
|
||||
#endif
|
||||
return(ret);
|
||||
}
|
||||
|
||||
/**************************************************************
|
||||
Frees an event object, without acquiring the global lock. */
|
||||
static
|
||||
void
|
||||
os_event_free_internal(
|
||||
/*===================*/
|
||||
os_event_t event) /* in: event to free */
|
||||
{
|
||||
#ifdef __WIN__
|
||||
ut_a(event);
|
||||
|
||||
ut_a(CloseHandle(event->handle));
|
||||
#else
|
||||
ut_a(event);
|
||||
|
||||
/* This is to avoid freeing the mutex twice */
|
||||
os_fast_mutex_free(&(event->os_mutex));
|
||||
|
||||
ut_a(0 == pthread_cond_destroy(&(event->cond_var)));
|
||||
#endif
|
||||
/* Remove from the list of events */
|
||||
|
||||
UT_LIST_REMOVE(os_event_list, os_event_list, event);
|
||||
|
||||
os_event_count--;
|
||||
|
||||
ut_free(event);
|
||||
}
|
||||
|
||||
/**************************************************************
|
||||
|
@ -293,18 +353,38 @@ os_event_free(
|
|||
Waits for an event object until it is in the signaled state. If
|
||||
srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS this also exits the
|
||||
waiting thread when the event becomes signaled (or immediately if the
|
||||
event is already in the signaled state). */
|
||||
event is already in the signaled state).
|
||||
|
||||
Typically, if the event has been signalled after the os_event_reset()
|
||||
we'll return immediately because event->is_set == TRUE.
|
||||
There are, however, situations (e.g.: sync_array code) where we may
|
||||
lose this information. For example:
|
||||
|
||||
thread A calls os_event_reset()
|
||||
thread B calls os_event_set() [event->is_set == TRUE]
|
||||
thread C calls os_event_reset() [event->is_set == FALSE]
|
||||
thread A calls os_event_wait() [infinite wait!]
|
||||
thread C calls os_event_wait() [infinite wait!]
|
||||
|
||||
Where such a scenario is possible, to avoid infinite wait, the
|
||||
value returned by os_event_reset() should be passed in as
|
||||
reset_sig_count. */
|
||||
UNIV_INTERN
|
||||
void
|
||||
os_event_wait(
|
||||
/*==========*/
|
||||
os_event_t event) /* in: event to wait */
|
||||
os_event_wait_low(
|
||||
/*==============*/
|
||||
os_event_t event, /* in: event to wait */
|
||||
ib_longlong reset_sig_count)/* in: zero or the value
|
||||
returned by previous call of
|
||||
os_event_reset(). */
|
||||
{
|
||||
#ifdef __WIN__
|
||||
DWORD err;
|
||||
|
||||
ut_a(event);
|
||||
|
||||
UT_NOT_USED(reset_sig_count);
|
||||
|
||||
/* Specify an infinite time limit for waiting */
|
||||
err = WaitForSingleObject(event->handle, INFINITE);
|
||||
|
||||
|
@ -318,7 +398,11 @@ os_event_wait(
|
|||
|
||||
os_fast_mutex_lock(&(event->os_mutex));
|
||||
|
||||
old_signal_count = event->signal_count;
|
||||
if (reset_sig_count) {
|
||||
old_signal_count = reset_sig_count;
|
||||
} else {
|
||||
old_signal_count = event->signal_count;
|
||||
}
|
||||
|
||||
for (;;) {
|
||||
if (event->is_set == TRUE
|
||||
|
@ -458,6 +542,7 @@ os_mutex_create(
|
|||
|
||||
mutex_str->handle = mutex;
|
||||
mutex_str->count = 0;
|
||||
mutex_str->event = os_event_create(NULL);
|
||||
|
||||
if (UNIV_LIKELY(os_sync_mutex_inited)) {
|
||||
/* When creating os_sync_mutex itself we cannot reserve it */
|
||||
|
@ -534,6 +619,10 @@ os_mutex_free(
|
|||
{
|
||||
ut_a(mutex);
|
||||
|
||||
if (UNIV_LIKELY(!os_sync_free_called)) {
|
||||
os_event_free_internal(mutex->event);
|
||||
}
|
||||
|
||||
if (UNIV_LIKELY(os_sync_mutex_inited)) {
|
||||
os_mutex_enter(os_sync_mutex);
|
||||
}
|
||||
|
|
|
@ -56,6 +56,12 @@ static const char S_innodb_tablespace_monitor[] = "innodb_tablespace_monitor";
|
|||
static const char S_innodb_table_monitor[] = "innodb_table_monitor";
|
||||
static const char S_innodb_mem_validate[] = "innodb_mem_validate";
|
||||
|
||||
/* Evaluates to true if str1 equals str2_onstack, used for comparing
|
||||
the above strings. */
|
||||
#define STR_EQ(str1, str1_len, str2_onstack) \
|
||||
((str1_len) == sizeof(str2_onstack) \
|
||||
&& memcmp(str1, str2_onstack, sizeof(str2_onstack)) == 0)
|
||||
|
||||
#ifndef UNIV_HOTBACKUP
|
||||
/***********************************************************************
|
||||
Determine if the given name is a name reserved for MySQL system tables. */
|
||||
|
@ -1685,7 +1691,7 @@ row_mysql_unlock_data_dictionary(
|
|||
|
||||
#ifndef UNIV_HOTBACKUP
|
||||
/*************************************************************************
|
||||
Drops a table for MySQL. If the name of the table ends in
|
||||
Creates a table for MySQL. If the name of the table ends in
|
||||
one of "innodb_monitor", "innodb_lock_monitor", "innodb_tablespace_monitor",
|
||||
"innodb_table_monitor", then this will also start the printing of monitor
|
||||
output by the master thread. If the table name ends in "innodb_mem_validate",
|
||||
|
@ -1766,9 +1772,7 @@ row_create_table_for_mysql(
|
|||
table_name++;
|
||||
table_name_len = strlen(table_name) + 1;
|
||||
|
||||
if (table_name_len == sizeof S_innodb_monitor
|
||||
&& !memcmp(table_name, S_innodb_monitor,
|
||||
sizeof S_innodb_monitor)) {
|
||||
if (STR_EQ(table_name, table_name_len, S_innodb_monitor)) {
|
||||
|
||||
/* Table equals "innodb_monitor":
|
||||
start monitor prints */
|
||||
|
@ -1779,28 +1783,24 @@ row_create_table_for_mysql(
|
|||
of InnoDB monitor prints */
|
||||
|
||||
os_event_set(srv_lock_timeout_thread_event);
|
||||
} else if (table_name_len == sizeof S_innodb_lock_monitor
|
||||
&& !memcmp(table_name, S_innodb_lock_monitor,
|
||||
sizeof S_innodb_lock_monitor)) {
|
||||
} else if (STR_EQ(table_name, table_name_len,
|
||||
S_innodb_lock_monitor)) {
|
||||
|
||||
srv_print_innodb_monitor = TRUE;
|
||||
srv_print_innodb_lock_monitor = TRUE;
|
||||
os_event_set(srv_lock_timeout_thread_event);
|
||||
} else if (table_name_len == sizeof S_innodb_tablespace_monitor
|
||||
&& !memcmp(table_name, S_innodb_tablespace_monitor,
|
||||
sizeof S_innodb_tablespace_monitor)) {
|
||||
} else if (STR_EQ(table_name, table_name_len,
|
||||
S_innodb_tablespace_monitor)) {
|
||||
|
||||
srv_print_innodb_tablespace_monitor = TRUE;
|
||||
os_event_set(srv_lock_timeout_thread_event);
|
||||
} else if (table_name_len == sizeof S_innodb_table_monitor
|
||||
&& !memcmp(table_name, S_innodb_table_monitor,
|
||||
sizeof S_innodb_table_monitor)) {
|
||||
} else if (STR_EQ(table_name, table_name_len,
|
||||
S_innodb_table_monitor)) {
|
||||
|
||||
srv_print_innodb_table_monitor = TRUE;
|
||||
os_event_set(srv_lock_timeout_thread_event);
|
||||
} else if (table_name_len == sizeof S_innodb_mem_validate
|
||||
&& !memcmp(table_name, S_innodb_mem_validate,
|
||||
sizeof S_innodb_mem_validate)) {
|
||||
} else if (STR_EQ(table_name, table_name_len,
|
||||
S_innodb_mem_validate)) {
|
||||
/* We define here a debugging feature intended for
|
||||
developers */
|
||||
|
||||
|
@ -4154,3 +4154,33 @@ row_check_table_for_mysql(
|
|||
return(ret);
|
||||
}
|
||||
#endif /* !UNIV_HOTBACKUP */
|
||||
|
||||
/*************************************************************************
|
||||
Determines if a table is a magic monitor table. */
|
||||
UNIV_INTERN
|
||||
ibool
|
||||
row_is_magic_monitor_table(
|
||||
/*=======================*/
|
||||
/* out: TRUE if monitor table */
|
||||
const char* table_name) /* in: name of the table, in the
|
||||
form database/table_name */
|
||||
{
|
||||
const char* name; /* table_name without database/ */
|
||||
ulint len;
|
||||
|
||||
name = strchr(table_name, '/');
|
||||
ut_a(name != NULL);
|
||||
name++;
|
||||
len = strlen(name) + 1;
|
||||
|
||||
if (STR_EQ(name, len, S_innodb_monitor)
|
||||
|| STR_EQ(name, len, S_innodb_lock_monitor)
|
||||
|| STR_EQ(name, len, S_innodb_tablespace_monitor)
|
||||
|| STR_EQ(name, len, S_innodb_table_monitor)
|
||||
|| STR_EQ(name, len, S_innodb_mem_validate)) {
|
||||
|
||||
return(TRUE);
|
||||
}
|
||||
|
||||
return(FALSE);
|
||||
}
|
||||
|
|
|
@ -1859,12 +1859,6 @@ loop:
|
|||
|
||||
os_thread_sleep(1000000);
|
||||
|
||||
/* In case mutex_exit is not a memory barrier, it is
|
||||
theoretically possible some threads are left waiting though
|
||||
the semaphore is already released. Wake up those threads: */
|
||||
|
||||
sync_arr_wake_threads_if_sema_free();
|
||||
|
||||
current_time = time(NULL);
|
||||
|
||||
time_elapsed = difftime(current_time, last_monitor_time);
|
||||
|
@ -2058,9 +2052,15 @@ loop:
|
|||
srv_refresh_innodb_monitor_stats();
|
||||
}
|
||||
|
||||
/* In case mutex_exit is not a memory barrier, it is
|
||||
theoretically possible some threads are left waiting though
|
||||
the semaphore is already released. Wake up those threads: */
|
||||
|
||||
sync_arr_wake_threads_if_sema_free();
|
||||
|
||||
if (sync_array_print_long_waits()) {
|
||||
fatal_cnt++;
|
||||
if (fatal_cnt > 5) {
|
||||
if (fatal_cnt > 10) {
|
||||
|
||||
fprintf(stderr,
|
||||
"InnoDB: Error: semaphore wait has lasted"
|
||||
|
@ -2080,7 +2080,7 @@ loop:
|
|||
|
||||
fflush(stderr);
|
||||
|
||||
os_thread_sleep(2000000);
|
||||
os_thread_sleep(1000000);
|
||||
|
||||
if (srv_shutdown_state < SRV_SHUTDOWN_CLEANUP) {
|
||||
|
||||
|
|
398
sync/sync0arr.c
398
sync/sync0arr.c
|
@ -40,24 +40,23 @@ because we can do with a very small number of OS events,
|
|||
say 200. In NT 3.51, allocating events seems to be a quadratic
|
||||
algorithm, because 10 000 events are created fast, but
|
||||
100 000 events takes a couple of minutes to create.
|
||||
*/
|
||||
|
||||
As of 5.0.30 the above mentioned design is changed. Since now
|
||||
OS can handle millions of wait events efficiently, we no longer
|
||||
have this concept of each cell of wait array having one event.
|
||||
Instead, now the event that a thread wants to wait on is embedded
|
||||
in the wait object (mutex or rw_lock). We still keep the global
|
||||
wait array for the sake of diagnostics and also to avoid infinite
|
||||
wait The error_monitor thread scans the global wait array to signal
|
||||
any waiting threads who have missed the signal. */
|
||||
|
||||
/* A cell where an individual thread may wait suspended
|
||||
until a resource is released. The suspending is implemented
|
||||
using an operating system event semaphore. */
|
||||
struct sync_cell_struct {
|
||||
/* State of the cell. SC_WAKING_UP means
|
||||
sync_array_struct->n_reserved has been decremented, but the thread
|
||||
in this cell has not waken up yet. When it does, it will set the
|
||||
state to SC_FREE. Note that this is done without the protection of
|
||||
any mutex. */
|
||||
enum { SC_FREE, SC_RESERVED, SC_WAKING_UP } state;
|
||||
|
||||
void* wait_object; /* pointer to the object the
|
||||
thread is waiting for; this is not
|
||||
reseted to NULL when a cell is
|
||||
freed. */
|
||||
|
||||
thread is waiting for; if NULL
|
||||
the cell is free for use */
|
||||
mutex_t* old_wait_mutex; /* the latest wait mutex in cell */
|
||||
rw_lock_t* old_wait_rw_lock;/* the latest wait rw-lock in cell */
|
||||
ulint request_type; /* lock type requested on the
|
||||
|
@ -71,13 +70,23 @@ struct sync_cell_struct {
|
|||
ibool waiting; /* TRUE if the thread has already
|
||||
called sync_array_event_wait
|
||||
on this cell */
|
||||
ibool event_set; /* TRUE if the event is set */
|
||||
os_event_t event; /* operating system event
|
||||
semaphore handle */
|
||||
ib_longlong signal_count; /* We capture the signal_count
|
||||
of the wait_object when we
|
||||
reset the event. This value is
|
||||
then passed on to os_event_wait
|
||||
and we wait only if the event
|
||||
has not been signalled in the
|
||||
period between the reset and
|
||||
wait call. */
|
||||
time_t reservation_time;/* time when the thread reserved
|
||||
the wait cell */
|
||||
};
|
||||
|
||||
/* NOTE: It is allowed for a thread to wait
|
||||
for an event allocated for the array without owning the
|
||||
protecting mutex (depending on the case: OS or database mutex), but
|
||||
all changes (set or reset) to the state of the event must be made
|
||||
while owning the mutex. */
|
||||
struct sync_array_struct {
|
||||
ulint n_reserved; /* number of currently reserved
|
||||
cells in the wait array */
|
||||
|
@ -220,12 +229,9 @@ sync_array_create(
|
|||
|
||||
for (i = 0; i < n_cells; i++) {
|
||||
cell = sync_array_get_nth_cell(arr, i);
|
||||
cell->state = SC_FREE;
|
||||
cell->wait_object = NULL;
|
||||
|
||||
/* Create an operating system event semaphore with no name */
|
||||
cell->event = os_event_create(NULL);
|
||||
cell->event_set = FALSE; /* it is created in reset state */
|
||||
cell->wait_object = NULL;
|
||||
cell->waiting = FALSE;
|
||||
cell->signal_count = 0;
|
||||
}
|
||||
|
||||
return(arr);
|
||||
|
@ -239,19 +245,12 @@ sync_array_free(
|
|||
/*============*/
|
||||
sync_array_t* arr) /* in, own: sync wait array */
|
||||
{
|
||||
ulint i;
|
||||
sync_cell_t* cell;
|
||||
ulint protection;
|
||||
|
||||
ut_a(arr->n_reserved == 0);
|
||||
|
||||
sync_array_validate(arr);
|
||||
|
||||
for (i = 0; i < arr->n_cells; i++) {
|
||||
cell = sync_array_get_nth_cell(arr, i);
|
||||
os_event_free(cell->event);
|
||||
}
|
||||
|
||||
protection = arr->protection;
|
||||
|
||||
/* Release the mutex protecting the wait array complex */
|
||||
|
@ -285,8 +284,7 @@ sync_array_validate(
|
|||
|
||||
for (i = 0; i < arr->n_cells; i++) {
|
||||
cell = sync_array_get_nth_cell(arr, i);
|
||||
|
||||
if (cell->state == SC_RESERVED) {
|
||||
if (cell->wait_object != NULL) {
|
||||
count++;
|
||||
}
|
||||
}
|
||||
|
@ -296,6 +294,29 @@ sync_array_validate(
|
|||
sync_array_exit(arr);
|
||||
}
|
||||
|
||||
/***********************************************************************
|
||||
Puts the cell event in reset state. */
|
||||
static
|
||||
ib_longlong
|
||||
sync_cell_event_reset(
|
||||
/*==================*/
|
||||
/* out: value of signal_count
|
||||
at the time of reset. */
|
||||
ulint type, /* in: lock type mutex/rw_lock */
|
||||
void* object) /* in: the rw_lock/mutex object */
|
||||
{
|
||||
if (type == SYNC_MUTEX) {
|
||||
return(os_event_reset(((mutex_t *) object)->event));
|
||||
#ifdef __WIN__
|
||||
} else if (type == RW_LOCK_WAIT_EX) {
|
||||
return(os_event_reset(
|
||||
((rw_lock_t *) object)->wait_ex_event));
|
||||
#endif
|
||||
} else {
|
||||
return(os_event_reset(((rw_lock_t *) object)->event));
|
||||
}
|
||||
}
|
||||
|
||||
/**********************************************************************
|
||||
Reserves a wait array cell for waiting for an object.
|
||||
The event of the cell is reset to nonsignalled state. */
|
||||
|
@ -324,21 +345,9 @@ sync_array_reserve_cell(
|
|||
for (i = 0; i < arr->n_cells; i++) {
|
||||
cell = sync_array_get_nth_cell(arr, i);
|
||||
|
||||
if (cell->state == SC_FREE) {
|
||||
|
||||
/* We do not check cell->event_set because it is
|
||||
set outside the protection of the sync array mutex
|
||||
and we had a bug regarding it, and since resetting
|
||||
an event when it is not needed does no harm it is
|
||||
safer always to do it. */
|
||||
|
||||
cell->event_set = FALSE;
|
||||
os_event_reset(cell->event);
|
||||
|
||||
cell->state = SC_RESERVED;
|
||||
cell->reservation_time = time(NULL);
|
||||
cell->thread = os_thread_get_curr_id();
|
||||
if (cell->wait_object == NULL) {
|
||||
|
||||
cell->waiting = FALSE;
|
||||
cell->wait_object = object;
|
||||
|
||||
if (type == SYNC_MUTEX) {
|
||||
|
@ -348,7 +357,6 @@ sync_array_reserve_cell(
|
|||
}
|
||||
|
||||
cell->request_type = type;
|
||||
cell->waiting = FALSE;
|
||||
|
||||
cell->file = file;
|
||||
cell->line = line;
|
||||
|
@ -359,6 +367,16 @@ sync_array_reserve_cell(
|
|||
|
||||
sync_array_exit(arr);
|
||||
|
||||
/* Make sure the event is reset and also store
|
||||
the value of signal_count at which the event
|
||||
was reset. */
|
||||
cell->signal_count = sync_cell_event_reset(type,
|
||||
object);
|
||||
|
||||
cell->reservation_time = time(NULL);
|
||||
|
||||
cell->thread = os_thread_get_curr_id();
|
||||
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
@ -368,68 +386,6 @@ sync_array_reserve_cell(
|
|||
return;
|
||||
}
|
||||
|
||||
/**********************************************************************
|
||||
Frees the cell. Note that we don't have any mutex reserved when calling
|
||||
this. */
|
||||
static
|
||||
void
|
||||
sync_array_free_cell(
|
||||
/*=================*/
|
||||
sync_array_t* arr, /* in: wait array */
|
||||
ulint index) /* in: index of the cell in array */
|
||||
{
|
||||
sync_cell_t* cell;
|
||||
|
||||
cell = sync_array_get_nth_cell(arr, index);
|
||||
|
||||
ut_a(cell->state == SC_WAKING_UP);
|
||||
ut_a(cell->wait_object != NULL);
|
||||
|
||||
cell->state = SC_FREE;
|
||||
}
|
||||
|
||||
/**********************************************************************
|
||||
Frees the cell safely by reserving the sync array mutex and decrementing
|
||||
n_reserved if necessary. Should only be called from mutex_spin_wait. */
|
||||
UNIV_INTERN
|
||||
void
|
||||
sync_array_free_cell_protected(
|
||||
/*===========================*/
|
||||
sync_array_t* arr, /* in: wait array */
|
||||
ulint index) /* in: index of the cell in array */
|
||||
{
|
||||
sync_cell_t* cell;
|
||||
|
||||
sync_array_enter(arr);
|
||||
|
||||
cell = sync_array_get_nth_cell(arr, index);
|
||||
|
||||
ut_a(cell->state != SC_FREE);
|
||||
ut_a(cell->wait_object != NULL);
|
||||
|
||||
/* We only need to decrement n_reserved if it has not already been
|
||||
done by sync_array_signal_object. */
|
||||
if (cell->state == SC_RESERVED) {
|
||||
ut_a(arr->n_reserved > 0);
|
||||
arr->n_reserved--;
|
||||
} else if (cell->state == SC_WAKING_UP) {
|
||||
/* This is tricky; if we don't wait for the event to be
|
||||
signaled, signal_object can set the state of a cell to
|
||||
SC_WAKING_UP, mutex_spin_wait can call this and set the
|
||||
state to SC_FREE, and then signal_object gets around to
|
||||
calling os_set_event for the cell but since it's already
|
||||
been freed things break horribly. */
|
||||
|
||||
sync_array_exit(arr);
|
||||
os_event_wait(cell->event);
|
||||
sync_array_enter(arr);
|
||||
}
|
||||
|
||||
cell->state = SC_FREE;
|
||||
|
||||
sync_array_exit(arr);
|
||||
}
|
||||
|
||||
/**********************************************************************
|
||||
This function should be called when a thread starts to wait on
|
||||
a wait array cell. In the debug version this function checks
|
||||
|
@ -447,15 +403,28 @@ sync_array_wait_event(
|
|||
|
||||
ut_a(arr);
|
||||
|
||||
sync_array_enter(arr);
|
||||
|
||||
cell = sync_array_get_nth_cell(arr, index);
|
||||
|
||||
ut_a((cell->state == SC_RESERVED) || (cell->state == SC_WAKING_UP));
|
||||
ut_a(cell->wait_object);
|
||||
ut_a(!cell->waiting);
|
||||
ut_ad(os_thread_get_curr_id() == cell->thread);
|
||||
|
||||
event = cell->event;
|
||||
cell->waiting = TRUE;
|
||||
if (cell->request_type == SYNC_MUTEX) {
|
||||
event = ((mutex_t*) cell->wait_object)->event;
|
||||
#ifdef __WIN__
|
||||
/* On windows if the thread about to wait is the one which
|
||||
has set the state of the rw_lock to RW_LOCK_WAIT_EX, then
|
||||
it waits on a special event i.e.: wait_ex_event. */
|
||||
} else if (cell->request_type == RW_LOCK_WAIT_EX) {
|
||||
event = ((rw_lock_t*) cell->wait_object)->wait_ex_event;
|
||||
#endif
|
||||
} else {
|
||||
event = ((rw_lock_t*) cell->wait_object)->event;
|
||||
}
|
||||
|
||||
cell->waiting = TRUE;
|
||||
|
||||
#ifdef UNIV_SYNC_DEBUG
|
||||
|
||||
|
@ -464,7 +433,6 @@ sync_array_wait_event(
|
|||
recursively sync_array routines, leading to trouble.
|
||||
rw_lock_debug_mutex freezes the debug lists. */
|
||||
|
||||
sync_array_enter(arr);
|
||||
rw_lock_debug_mutex_enter();
|
||||
|
||||
if (TRUE == sync_array_detect_deadlock(arr, cell, cell, 0)) {
|
||||
|
@ -474,16 +442,16 @@ sync_array_wait_event(
|
|||
}
|
||||
|
||||
rw_lock_debug_mutex_exit();
|
||||
sync_array_exit(arr);
|
||||
#endif
|
||||
os_event_wait(event);
|
||||
sync_array_exit(arr);
|
||||
|
||||
os_event_wait_low(event, cell->signal_count);
|
||||
|
||||
sync_array_free_cell(arr, index);
|
||||
}
|
||||
|
||||
/**********************************************************************
|
||||
Reports info of a wait array cell. Note: sync_array_print_long_waits()
|
||||
calls this without mutex protection. */
|
||||
Reports info of a wait array cell. */
|
||||
static
|
||||
void
|
||||
sync_array_cell_print(
|
||||
|
@ -503,17 +471,8 @@ sync_array_cell_print(
|
|||
(ulong) os_thread_pf(cell->thread), cell->file,
|
||||
(ulong) cell->line,
|
||||
difftime(time(NULL), cell->reservation_time));
|
||||
fprintf(file, "Wait array cell state %lu\n", (ulong)cell->state);
|
||||
|
||||
/* If the memory area pointed to by old_wait_mutex /
|
||||
old_wait_rw_lock has been freed, this can crash. */
|
||||
|
||||
if (cell->state != SC_RESERVED) {
|
||||
/* If cell has this state, then even if we are holding the sync
|
||||
array mutex, the wait object may get freed meanwhile. Do not
|
||||
print the wait object then. */
|
||||
|
||||
} else if (type == SYNC_MUTEX) {
|
||||
if (type == SYNC_MUTEX) {
|
||||
/* We use old_wait_mutex in case the cell has already
|
||||
been freed meanwhile */
|
||||
mutex = cell->old_wait_mutex;
|
||||
|
@ -531,7 +490,11 @@ sync_array_cell_print(
|
|||
#endif /* UNIV_SYNC_DEBUG */
|
||||
(ulong) mutex->waiters);
|
||||
|
||||
} else if (type == RW_LOCK_EX || type == RW_LOCK_SHARED) {
|
||||
} else if (type == RW_LOCK_EX
|
||||
#ifdef __WIN__
|
||||
|| type == RW_LOCK_WAIT_EX
|
||||
#endif
|
||||
|| type == RW_LOCK_SHARED) {
|
||||
|
||||
fputs(type == RW_LOCK_EX ? "X-lock on" : "S-lock on", file);
|
||||
|
||||
|
@ -565,8 +528,8 @@ sync_array_cell_print(
|
|||
ut_error;
|
||||
}
|
||||
|
||||
if (cell->event_set) {
|
||||
fputs("wait is ending\n", file);
|
||||
if (!cell->waiting) {
|
||||
fputs("wait has ended\n", file);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -589,7 +552,7 @@ sync_array_find_thread(
|
|||
|
||||
cell = sync_array_get_nth_cell(arr, i);
|
||||
|
||||
if ((cell->state == SC_RESERVED)
|
||||
if (cell->wait_object != NULL
|
||||
&& os_thread_eq(cell->thread, thread)) {
|
||||
|
||||
return(cell); /* Found */
|
||||
|
@ -679,7 +642,7 @@ sync_array_detect_deadlock(
|
|||
|
||||
depth++;
|
||||
|
||||
if (cell->event_set || !cell->waiting) {
|
||||
if (!cell->waiting) {
|
||||
|
||||
return(FALSE); /* No deadlock here */
|
||||
}
|
||||
|
@ -704,10 +667,8 @@ sync_array_detect_deadlock(
|
|||
depth);
|
||||
if (ret) {
|
||||
fprintf(stderr,
|
||||
"Mutex %p owned by thread %lu"
|
||||
" file %s line %lu\n",
|
||||
(void*) mutex,
|
||||
(ulong) os_thread_pf(mutex->thread_id),
|
||||
"Mutex %p owned by thread %lu file %s line %lu\n",
|
||||
mutex, (ulong) os_thread_pf(mutex->thread_id),
|
||||
mutex->file_name, (ulong) mutex->line);
|
||||
sync_array_cell_print(stderr, cell);
|
||||
|
||||
|
@ -717,7 +678,8 @@ sync_array_detect_deadlock(
|
|||
|
||||
return(FALSE); /* No deadlock */
|
||||
|
||||
} else if (cell->request_type == RW_LOCK_EX) {
|
||||
} else if (cell->request_type == RW_LOCK_EX
|
||||
|| cell->request_type == RW_LOCK_WAIT_EX) {
|
||||
|
||||
lock = cell->wait_object;
|
||||
|
||||
|
@ -816,7 +778,8 @@ sync_arr_cell_can_wake_up(
|
|||
return(TRUE);
|
||||
}
|
||||
|
||||
} else if (cell->request_type == RW_LOCK_EX) {
|
||||
} else if (cell->request_type == RW_LOCK_EX
|
||||
|| cell->request_type == RW_LOCK_WAIT_EX) {
|
||||
|
||||
lock = cell->wait_object;
|
||||
|
||||
|
@ -845,101 +808,47 @@ sync_arr_cell_can_wake_up(
|
|||
return(FALSE);
|
||||
}
|
||||
|
||||
/**************************************************************************
|
||||
Looks for the cells in the wait array which refer to the wait object
|
||||
specified, and sets their corresponding events to the signaled state. In this
|
||||
way releases the threads waiting for the object to contend for the object.
|
||||
It is possible that no such cell is found, in which case does nothing. */
|
||||
/**********************************************************************
|
||||
Frees the cell. NOTE! sync_array_wait_event frees the cell
|
||||
automatically! */
|
||||
UNIV_INTERN
|
||||
void
|
||||
sync_array_signal_object(
|
||||
/*=====================*/
|
||||
sync_array_free_cell(
|
||||
/*=================*/
|
||||
sync_array_t* arr, /* in: wait array */
|
||||
void* object) /* in: wait object */
|
||||
ulint index) /* in: index of the cell in array */
|
||||
{
|
||||
sync_cell_t* cell;
|
||||
ulint count;
|
||||
ulint i;
|
||||
ulint res_count;
|
||||
|
||||
/* We store the addresses of cells we need to signal and signal
|
||||
them only after we have released the sync array's mutex (for
|
||||
performance reasons). cell_count is the number of such cells, and
|
||||
cell_ptr points to the first one. If there are less than
|
||||
UT_ARR_SIZE(cells) of them, cell_ptr == &cells[0], otherwise
|
||||
cell_ptr points to malloc'd memory that we must free. */
|
||||
sync_array_enter(arr);
|
||||
|
||||
sync_cell_t* cells[100];
|
||||
sync_cell_t** cell_ptr = &cells[0];
|
||||
ulint cell_count = 0;
|
||||
ulint cell_max_count = UT_ARR_SIZE(cells);
|
||||
cell = sync_array_get_nth_cell(arr, index);
|
||||
|
||||
ut_a(100 == cell_max_count);
|
||||
ut_a(cell->wait_object != NULL);
|
||||
|
||||
cell->waiting = FALSE;
|
||||
cell->wait_object = NULL;
|
||||
cell->signal_count = 0;
|
||||
|
||||
ut_a(arr->n_reserved > 0);
|
||||
arr->n_reserved--;
|
||||
|
||||
sync_array_exit(arr);
|
||||
}
|
||||
|
||||
/**************************************************************************
|
||||
Increments the signalled count. */
|
||||
UNIV_INTERN
|
||||
void
|
||||
sync_array_object_signalled(
|
||||
/*========================*/
|
||||
sync_array_t* arr) /* in: wait array */
|
||||
{
|
||||
sync_array_enter(arr);
|
||||
|
||||
arr->sg_count++;
|
||||
|
||||
i = 0;
|
||||
count = 0;
|
||||
|
||||
/* We need to store this to a local variable because it is modified
|
||||
inside the loop */
|
||||
res_count = arr->n_reserved;
|
||||
|
||||
while (count < res_count) {
|
||||
|
||||
cell = sync_array_get_nth_cell(arr, i);
|
||||
|
||||
if (cell->state == SC_RESERVED) {
|
||||
|
||||
count++;
|
||||
if (cell->wait_object == object) {
|
||||
cell->state = SC_WAKING_UP;
|
||||
|
||||
ut_a(arr->n_reserved > 0);
|
||||
arr->n_reserved--;
|
||||
|
||||
if (cell_count == cell_max_count) {
|
||||
sync_cell_t** old_cell_ptr = cell_ptr;
|
||||
size_t old_size, new_size;
|
||||
|
||||
old_size = cell_max_count
|
||||
* sizeof(sync_cell_t*);
|
||||
cell_max_count *= 2;
|
||||
new_size = cell_max_count
|
||||
* sizeof(sync_cell_t*);
|
||||
|
||||
cell_ptr = malloc(new_size);
|
||||
ut_a(cell_ptr);
|
||||
memcpy(cell_ptr, old_cell_ptr,
|
||||
old_size);
|
||||
|
||||
if (old_cell_ptr != &cells[0]) {
|
||||
free(old_cell_ptr);
|
||||
}
|
||||
}
|
||||
|
||||
cell_ptr[cell_count] = cell;
|
||||
cell_count++;
|
||||
}
|
||||
}
|
||||
|
||||
i++;
|
||||
}
|
||||
|
||||
sync_array_exit(arr);
|
||||
|
||||
for (i = 0; i < cell_count; i++) {
|
||||
cell = cell_ptr[i];
|
||||
|
||||
cell->event_set = TRUE;
|
||||
os_event_set(cell->event);
|
||||
}
|
||||
|
||||
if (cell_ptr != &cells[0]) {
|
||||
free(cell_ptr);
|
||||
}
|
||||
}
|
||||
|
||||
/**************************************************************************
|
||||
|
@ -959,33 +868,41 @@ sync_arr_wake_threads_if_sema_free(void)
|
|||
sync_cell_t* cell;
|
||||
ulint count;
|
||||
ulint i;
|
||||
ulint res_count;
|
||||
|
||||
sync_array_enter(arr);
|
||||
|
||||
i = 0;
|
||||
count = 0;
|
||||
|
||||
/* We need to store this to a local variable because it is modified
|
||||
inside the loop */
|
||||
|
||||
res_count = arr->n_reserved;
|
||||
|
||||
while (count < res_count) {
|
||||
while (count < arr->n_reserved) {
|
||||
|
||||
cell = sync_array_get_nth_cell(arr, i);
|
||||
|
||||
if (cell->state == SC_RESERVED) {
|
||||
if (cell->wait_object != NULL) {
|
||||
|
||||
count++;
|
||||
|
||||
if (sync_arr_cell_can_wake_up(cell)) {
|
||||
cell->state = SC_WAKING_UP;
|
||||
cell->event_set = TRUE;
|
||||
os_event_set(cell->event);
|
||||
|
||||
ut_a(arr->n_reserved > 0);
|
||||
arr->n_reserved--;
|
||||
if (cell->request_type == SYNC_MUTEX) {
|
||||
mutex_t* mutex;
|
||||
|
||||
mutex = cell->wait_object;
|
||||
os_event_set(mutex->event);
|
||||
#ifdef __WIN__
|
||||
} else if (cell->request_type
|
||||
== RW_LOCK_WAIT_EX) {
|
||||
rw_lock_t* lock;
|
||||
|
||||
lock = cell->wait_object;
|
||||
os_event_set(lock->wait_ex_event);
|
||||
#endif
|
||||
} else {
|
||||
rw_lock_t* lock;
|
||||
|
||||
lock = cell->wait_object;
|
||||
os_event_set(lock->event);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1015,7 +932,7 @@ sync_array_print_long_waits(void)
|
|||
|
||||
cell = sync_array_get_nth_cell(sync_primary_wait_array, i);
|
||||
|
||||
if ((cell->state != SC_FREE)
|
||||
if (cell->wait_object != NULL && cell->waiting
|
||||
&& difftime(time(NULL), cell->reservation_time) > 240) {
|
||||
fputs("InnoDB: Warning: a long semaphore wait:\n",
|
||||
stderr);
|
||||
|
@ -1023,7 +940,7 @@ sync_array_print_long_waits(void)
|
|||
noticed = TRUE;
|
||||
}
|
||||
|
||||
if ((cell->state != SC_FREE)
|
||||
if (cell->wait_object != NULL && cell->waiting
|
||||
&& difftime(time(NULL), cell->reservation_time)
|
||||
> fatal_timeout) {
|
||||
fatal = TRUE;
|
||||
|
@ -1072,20 +989,25 @@ sync_array_output_info(
|
|||
mutex */
|
||||
{
|
||||
sync_cell_t* cell;
|
||||
ulint count;
|
||||
ulint i;
|
||||
|
||||
fprintf(file,
|
||||
"OS WAIT ARRAY INFO: reservation count %ld,"
|
||||
" signal count %ld\n",
|
||||
(long) arr->res_count,
|
||||
(long) arr->sg_count);
|
||||
for (i = 0; i < arr->n_cells; i++) {
|
||||
"OS WAIT ARRAY INFO: reservation count %ld, signal count %ld\n",
|
||||
(long) arr->res_count, (long) arr->sg_count);
|
||||
i = 0;
|
||||
count = 0;
|
||||
|
||||
while (count < arr->n_reserved) {
|
||||
|
||||
cell = sync_array_get_nth_cell(arr, i);
|
||||
|
||||
if (cell->state != SC_FREE) {
|
||||
if (cell->wait_object != NULL) {
|
||||
count++;
|
||||
sync_array_cell_print(file, cell);
|
||||
}
|
||||
|
||||
i++;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -151,6 +151,11 @@ rw_lock_create_func(
|
|||
lock->last_x_file_name = "not yet reserved";
|
||||
lock->last_s_line = 0;
|
||||
lock->last_x_line = 0;
|
||||
lock->event = os_event_create(NULL);
|
||||
|
||||
#ifdef __WIN__
|
||||
lock->wait_ex_event = os_event_create(NULL);
|
||||
#endif
|
||||
|
||||
mutex_enter(&rw_lock_list_mutex);
|
||||
|
||||
|
@ -184,6 +189,11 @@ rw_lock_free(
|
|||
mutex_free(rw_lock_get_mutex(lock));
|
||||
|
||||
mutex_enter(&rw_lock_list_mutex);
|
||||
os_event_free(lock->event);
|
||||
|
||||
#ifdef __WIN__
|
||||
os_event_free(lock->wait_ex_event);
|
||||
#endif
|
||||
|
||||
if (UT_LIST_GET_PREV(list, lock)) {
|
||||
ut_a(UT_LIST_GET_PREV(list, lock)->magic_n == RW_LOCK_MAGIC_N);
|
||||
|
@ -544,7 +554,15 @@ lock_loop:
|
|||
rw_x_system_call_count++;
|
||||
|
||||
sync_array_reserve_cell(sync_primary_wait_array,
|
||||
lock, RW_LOCK_EX,
|
||||
lock,
|
||||
#ifdef __WIN__
|
||||
/* On windows RW_LOCK_WAIT_EX signifies
|
||||
that this thread should wait on the
|
||||
special wait_ex_event. */
|
||||
(state == RW_LOCK_WAIT_EX)
|
||||
? RW_LOCK_WAIT_EX :
|
||||
#endif
|
||||
RW_LOCK_EX,
|
||||
file_name, line,
|
||||
&index);
|
||||
|
||||
|
|
|
@ -95,17 +95,47 @@ have happened that the thread which was holding the mutex has just released
|
|||
it and did not see the waiters byte set to 1, a case which would lead the
|
||||
other thread to an infinite wait.
|
||||
|
||||
LEMMA 1: After a thread resets the event of the cell it reserves for waiting
|
||||
========
|
||||
for a mutex, some thread will eventually call sync_array_signal_object with
|
||||
the mutex as an argument. Thus no infinite wait is possible.
|
||||
LEMMA 1: After a thread resets the event of a mutex (or rw_lock), some
|
||||
=======
|
||||
thread will eventually call os_event_set() on that particular event.
|
||||
Thus no infinite wait is possible in this case.
|
||||
|
||||
Proof: After making the reservation the thread sets the waiters field in the
|
||||
mutex to 1. Then it checks that the mutex is still reserved by some thread,
|
||||
or it reserves the mutex for itself. In any case, some thread (which may be
|
||||
also some earlier thread, not necessarily the one currently holding the mutex)
|
||||
will set the waiters field to 0 in mutex_exit, and then call
|
||||
sync_array_signal_object with the mutex as an argument.
|
||||
os_event_set() with the mutex as an argument.
|
||||
Q.E.D.
|
||||
|
||||
LEMMA 2: If an os_event_set() call is made after some thread has called
|
||||
=======
|
||||
the os_event_reset() and before it starts wait on that event, the call
|
||||
will not be lost to the second thread. This is true even if there is an
|
||||
intervening call to os_event_reset() by another thread.
|
||||
Thus no infinite wait is possible in this case.
|
||||
|
||||
Proof (non-windows platforms): os_event_reset() returns a monotonically
|
||||
increasing value of signal_count. This value is increased at every
|
||||
call of os_event_set() If thread A has called os_event_reset() followed
|
||||
by thread B calling os_event_set() and then some other thread C calling
|
||||
os_event_reset(), the is_set flag of the event will be set to FALSE;
|
||||
but now if thread A calls os_event_wait_low() with the signal_count
|
||||
value returned from the earlier call of os_event_reset(), it will
|
||||
return immediately without waiting.
|
||||
Q.E.D.
|
||||
|
||||
Proof (windows): If there is a writer thread which is forced to wait for
|
||||
the lock, it may be able to set the state of rw_lock to RW_LOCK_WAIT_EX
|
||||
The design of rw_lock ensures that there is one and only one thread
|
||||
that is able to change the state to RW_LOCK_WAIT_EX and this thread is
|
||||
guaranteed to acquire the lock after it is released by the current
|
||||
holders and before any other waiter gets the lock.
|
||||
On windows this thread waits on a separate event i.e.: wait_ex_event.
|
||||
Since only one thread can wait on this event there is no chance
|
||||
of this event getting reset before the writer starts wait on it.
|
||||
Therefore, this thread is guaranteed to catch the os_set_event()
|
||||
signalled unconditionally at the release of the lock.
|
||||
Q.E.D. */
|
||||
|
||||
/* The number of system calls made in this module. Intended for performance
|
||||
|
@ -193,6 +223,7 @@ mutex_create_func(
|
|||
os_fast_mutex_init(&(mutex->os_fast_mutex));
|
||||
mutex->lock_word = 0;
|
||||
#endif
|
||||
mutex->event = os_event_create(NULL);
|
||||
mutex_set_waiters(mutex, 0);
|
||||
#ifdef UNIV_DEBUG
|
||||
mutex->magic_n = MUTEX_MAGIC_N;
|
||||
|
@ -276,6 +307,8 @@ mutex_free(
|
|||
mutex_exit(&mutex_list_mutex);
|
||||
}
|
||||
|
||||
os_event_free(mutex->event);
|
||||
|
||||
#if !defined(_WIN32) || !defined(UNIV_CAN_USE_X86_ASSEMBLER)
|
||||
os_fast_mutex_free(&(mutex->os_fast_mutex));
|
||||
#endif
|
||||
|
@ -485,8 +518,7 @@ spin_loop:
|
|||
if (mutex_test_and_set(mutex) == 0) {
|
||||
/* Succeeded! Free the reserved wait cell */
|
||||
|
||||
sync_array_free_cell_protected(sync_primary_wait_array,
|
||||
index);
|
||||
sync_array_free_cell(sync_primary_wait_array, index);
|
||||
|
||||
ut_d(mutex->thread_id = os_thread_get_curr_id());
|
||||
#ifdef UNIV_SYNC_DEBUG
|
||||
|
@ -567,8 +599,8 @@ mutex_signal_object(
|
|||
|
||||
/* The memory order of resetting the waiters field and
|
||||
signaling the object is important. See LEMMA 1 above. */
|
||||
|
||||
sync_array_signal_object(sync_primary_wait_array, mutex);
|
||||
os_event_set(mutex->event);
|
||||
sync_array_object_signalled(sync_primary_wait_array);
|
||||
}
|
||||
|
||||
#ifdef UNIV_SYNC_DEBUG
|
||||
|
@ -1082,6 +1114,7 @@ sync_thread_add_level(
|
|||
break;
|
||||
case SYNC_TREE_NODE:
|
||||
ut_a(sync_thread_levels_contain(array, SYNC_INDEX_TREE)
|
||||
|| sync_thread_levels_contain(array, SYNC_DICT_OPERATION)
|
||||
|| sync_thread_levels_g(array, SYNC_TREE_NODE - 1));
|
||||
break;
|
||||
case SYNC_TREE_NODE_NEW:
|
||||
|
|
Loading…
Reference in a new issue