branches/5.1: Fix bug #47814 - Diagnostics are frequently not

printed after a long lock wait in InnoDB. Separate out the 
lock wait timeout check thread from monitor information
printing thread.

rb://200 Approved by Marko.
This commit is contained in:
jyang 2009-12-17 04:54:24 +00:00
parent c4cdbe88c9
commit 3f2db67f03
7 changed files with 195 additions and 61 deletions

View file

@ -7429,7 +7429,7 @@ innodb_show_status(
mutex_enter_noninline(&srv_monitor_file_mutex); mutex_enter_noninline(&srv_monitor_file_mutex);
rewind(srv_monitor_file); rewind(srv_monitor_file);
srv_printf_innodb_monitor(srv_monitor_file, srv_printf_innodb_monitor(srv_monitor_file, FALSE,
&trx_list_start, &trx_list_end); &trx_list_start, &trx_list_end);
flen = ftell(srv_monitor_file); flen = ftell(srv_monitor_file);
os_file_set_eof(srv_monitor_file); os_file_set_eof(srv_monitor_file);

View file

@ -579,10 +579,15 @@ lock_rec_print(
/************************************************************************* /*************************************************************************
Prints info of locks for all transactions. */ Prints info of locks for all transactions. */
void ibool
lock_print_info_summary( lock_print_info_summary(
/*====================*/ /*====================*/
FILE* file); /* in: file where to print */ /* out: FALSE if not able to obtain
kernel mutex and exits without
printing info */
FILE* file, /* in: file where to print */
ibool nowait);/* in: whether to wait for the kernel
mutex */
/************************************************************************* /*************************************************************************
Prints info of locks for each transaction. */ Prints info of locks for each transaction. */

View file

@ -146,7 +146,8 @@ extern ibool srv_print_innodb_tablespace_monitor;
extern ibool srv_print_verbose_log; extern ibool srv_print_verbose_log;
extern ibool srv_print_innodb_table_monitor; extern ibool srv_print_innodb_table_monitor;
extern ibool srv_lock_timeout_and_monitor_active; extern ibool srv_lock_timeout_active;
extern ibool srv_monitor_active;
extern ibool srv_error_monitor_active; extern ibool srv_error_monitor_active;
extern ulong srv_n_spin_wait_rounds; extern ulong srv_n_spin_wait_rounds;
@ -427,12 +428,21 @@ srv_release_mysql_thread_if_suspended(
que_thr_t* thr); /* in: query thread associated with the que_thr_t* thr); /* in: query thread associated with the
MySQL OS thread */ MySQL OS thread */
/************************************************************************* /*************************************************************************
A thread which wakes up threads whose lock wait may have lasted too long. A thread which wakes up threads whose lock wait may have lasted too
This also prints the info output by various InnoDB monitors. */ long. */
os_thread_ret_t os_thread_ret_t
srv_lock_timeout_and_monitor_thread( srv_lock_timeout_thread(
/*================================*/ /*====================*/
/* out: a dummy parameter */
void* arg); /* in: a dummy parameter required by
os_thread_create */
/*************************************************************************
A thread which prints the info output by various InnoDB monitors. */
os_thread_ret_t
srv_monitor_thread(
/*===============*/
/* out: a dummy parameter */ /* out: a dummy parameter */
void* arg); /* in: a dummy parameter required by void* arg); /* in: a dummy parameter required by
os_thread_create */ os_thread_create */
@ -449,10 +459,14 @@ srv_error_monitor_thread(
/********************************************************************** /**********************************************************************
Outputs to a file the output of the InnoDB Monitor. */ Outputs to a file the output of the InnoDB Monitor. */
void ibool
srv_printf_innodb_monitor( srv_printf_innodb_monitor(
/*======================*/ /*======================*/
/* out: FALSE if not all information printed
due to failure to obtain necessary mutex */
FILE* file, /* in: output stream */ FILE* file, /* in: output stream */
ibool nowait, /* in: whether to wait for kernel
mutex. */
ulint* trx_start, /* out: file position of the start of ulint* trx_start, /* out: file position of the start of
the list of active transactions */ the list of active transactions */
ulint* trx_end); /* out: file position of the end of ulint* trx_end); /* out: file position of the end of

View file

@ -4217,17 +4217,33 @@ lock_get_n_rec_locks(void)
/************************************************************************* /*************************************************************************
Prints info of locks for all transactions. */ Prints info of locks for all transactions. */
void ibool
lock_print_info_summary( lock_print_info_summary(
/*====================*/ /*====================*/
FILE* file) /* in: file where to print */ /* out: FALSE if not able to obtain
kernel mutex and exit without
printing lock info */
FILE* file, /* in: file where to print */
ibool nowait) /* in: whether to wait for the kernel
mutex */
{ {
/* We must protect the MySQL thd->query field with a MySQL mutex, and /* We must protect the MySQL thd->query field with a MySQL mutex, and
because the MySQL mutex must be reserved before the kernel_mutex of because the MySQL mutex must be reserved before the kernel_mutex of
InnoDB, we call innobase_mysql_prepare_print_arbitrary_thd() here. */ InnoDB, we call innobase_mysql_prepare_print_arbitrary_thd() here. */
innobase_mysql_prepare_print_arbitrary_thd(); innobase_mysql_prepare_print_arbitrary_thd();
/* if nowait is FALSE, wait on the kernel mutex,
otherwise return immediately if fail to obtain the
mutex. */
if (!nowait) {
lock_mutex_enter_kernel(); lock_mutex_enter_kernel();
} else if (mutex_enter_nowait(&kernel_mutex)) {
innobase_mysql_end_print_arbitrary_thd();
fputs("FAIL TO OBTAIN KERNEL MUTEX, "
"SKIP LOCK INFO PRINTING\n", file);
return(FALSE);
}
if (lock_deadlock_found) { if (lock_deadlock_found) {
fputs("------------------------\n" fputs("------------------------\n"
@ -4261,6 +4277,7 @@ lock_print_info_summary(
"Total number of lock structs in row lock hash table %lu\n", "Total number of lock structs in row lock hash table %lu\n",
(ulong) lock_get_n_rec_locks()); (ulong) lock_get_n_rec_locks());
#endif /* PRINT_NUM_OF_LOCK_STRUCTS */ #endif /* PRINT_NUM_OF_LOCK_STRUCTS */
return(TRUE);
} }
/************************************************************************* /*************************************************************************

View file

@ -3045,7 +3045,7 @@ loop:
if (srv_fast_shutdown < 2 if (srv_fast_shutdown < 2
&& (srv_error_monitor_active && (srv_error_monitor_active
|| srv_lock_timeout_and_monitor_active)) { || srv_lock_timeout_active || srv_monitor_active)) {
mutex_exit(&kernel_mutex); mutex_exit(&kernel_mutex);

View file

@ -64,7 +64,8 @@ ulint srv_fatal_semaphore_wait_threshold = 600;
in microseconds, in order to reduce the lagging of the purge thread. */ in microseconds, in order to reduce the lagging of the purge thread. */
ulint srv_dml_needed_delay = 0; ulint srv_dml_needed_delay = 0;
ibool srv_lock_timeout_and_monitor_active = FALSE; ibool srv_lock_timeout_active = FALSE;
ibool srv_monitor_active = FALSE;
ibool srv_error_monitor_active = FALSE; ibool srv_error_monitor_active = FALSE;
const char* srv_main_thread_op_info = ""; const char* srv_main_thread_op_info = "";
@ -122,6 +123,16 @@ ulint srv_log_file_size = ULINT_MAX; /* size in database pages */
ulint srv_log_buffer_size = ULINT_MAX; /* size in database pages */ ulint srv_log_buffer_size = ULINT_MAX; /* size in database pages */
ulong srv_flush_log_at_trx_commit = 1; ulong srv_flush_log_at_trx_commit = 1;
/* Maximum number of times allowed to conditionally acquire
mutex before switching to blocking wait on the mutex */
#define MAX_MUTEX_NOWAIT 20
/* Check whether the number of failed nonblocking mutex
acquisition attempts exceeds maximum allowed value. If so,
srv_printf_innodb_monitor() will request mutex acquisition
with mutex_enter(), which will wait until it gets the mutex. */
#define MUTEX_NOWAIT(mutex_skipped) ((mutex_skipped) < MAX_MUTEX_NOWAIT)
byte srv_latin1_ordering[256] /* The sort order table of the latin1 byte srv_latin1_ordering[256] /* The sort order table of the latin1
character set. The following table is character set. The following table is
the MySQL order as of Feb 10th, 2002 */ the MySQL order as of Feb 10th, 2002 */
@ -1626,10 +1637,13 @@ srv_refresh_innodb_monitor_stats(void)
/********************************************************************** /**********************************************************************
Outputs to a file the output of the InnoDB Monitor. */ Outputs to a file the output of the InnoDB Monitor. */
void ibool
srv_printf_innodb_monitor( srv_printf_innodb_monitor(
/*======================*/ /*======================*/
/* out: FALSE if not all information printed
due to failure to obtain necessary mutex */
FILE* file, /* in: output stream */ FILE* file, /* in: output stream */
ibool nowait, /* in: whether to wait for the mutex. */
ulint* trx_start, /* out: file position of the start of ulint* trx_start, /* out: file position of the start of
the list of active transactions */ the list of active transactions */
ulint* trx_end) /* out: file position of the end of ulint* trx_end) /* out: file position of the end of
@ -1638,6 +1652,7 @@ srv_printf_innodb_monitor(
double time_elapsed; double time_elapsed;
time_t current_time; time_t current_time;
ulint n_reserved; ulint n_reserved;
ibool ret;
mutex_enter(&srv_innodb_monitor_mutex); mutex_enter(&srv_innodb_monitor_mutex);
@ -1682,7 +1697,12 @@ srv_printf_innodb_monitor(
mutex_exit(&dict_foreign_err_mutex); mutex_exit(&dict_foreign_err_mutex);
lock_print_info_summary(file); /* Only if lock_print_info_summary proceeds correctly,
before we call the lock_print_info_all_transactions
to print all the lock information. */
ret = lock_print_info_summary(file, nowait);
if (ret) {
if (trx_start) { if (trx_start) {
long t = ftell(file); long t = ftell(file);
if (t < 0) { if (t < 0) {
@ -1700,6 +1720,8 @@ srv_printf_innodb_monitor(
*trx_end = (ulint) t; *trx_end = (ulint) t;
} }
} }
}
fputs("--------\n" fputs("--------\n"
"FILE I/O\n" "FILE I/O\n"
"--------\n", file); "--------\n", file);
@ -1804,6 +1826,8 @@ srv_printf_innodb_monitor(
"============================\n", file); "============================\n", file);
mutex_exit(&srv_innodb_monitor_mutex); mutex_exit(&srv_innodb_monitor_mutex);
fflush(file); fflush(file);
return(ret);
} }
/********************************************************************** /**********************************************************************
@ -1883,26 +1907,23 @@ srv_export_innodb_status(void)
} }
/************************************************************************* /*************************************************************************
A thread which wakes up threads whose lock wait may have lasted too long. A thread prints the info output by various InnoDB monitors. */
This also prints the info output by various InnoDB monitors. */
os_thread_ret_t os_thread_ret_t
srv_lock_timeout_and_monitor_thread( srv_monitor_thread(
/*================================*/ /*===============*/
/* out: a dummy parameter */ /* out: a dummy parameter */
void* arg __attribute__((unused))) void* arg __attribute__((unused)))
/* in: a dummy parameter required by /* in: a dummy parameter required by
os_thread_create */ os_thread_create */
{ {
srv_slot_t* slot;
double time_elapsed; double time_elapsed;
time_t current_time; time_t current_time;
time_t last_table_monitor_time; time_t last_table_monitor_time;
time_t last_tablespace_monitor_time; time_t last_tablespace_monitor_time;
time_t last_monitor_time; time_t last_monitor_time;
ibool some_waits; ulint mutex_skipped;
double wait_time; ibool last_srv_print_monitor;
ulint i;
#ifdef UNIV_DEBUG_THREAD_CREATION #ifdef UNIV_DEBUG_THREAD_CREATION
fprintf(stderr, "Lock timeout thread starts, id %lu\n", fprintf(stderr, "Lock timeout thread starts, id %lu\n",
@ -1913,13 +1934,15 @@ srv_lock_timeout_and_monitor_thread(
last_table_monitor_time = time(NULL); last_table_monitor_time = time(NULL);
last_tablespace_monitor_time = time(NULL); last_tablespace_monitor_time = time(NULL);
last_monitor_time = time(NULL); last_monitor_time = time(NULL);
mutex_skipped = 0;
last_srv_print_monitor = srv_print_innodb_monitor;
loop: loop:
srv_lock_timeout_and_monitor_active = TRUE; srv_monitor_active = TRUE;
/* When someone is waiting for a lock, we wake up every second /* Wake up every 5 seconds to see if we need to print
and check if a timeout has passed for a lock wait */ monitor information. */
os_thread_sleep(1000000); os_thread_sleep(5000000);
current_time = time(NULL); current_time = time(NULL);
@ -1929,14 +1952,40 @@ loop:
last_monitor_time = time(NULL); last_monitor_time = time(NULL);
if (srv_print_innodb_monitor) { if (srv_print_innodb_monitor) {
srv_printf_innodb_monitor(stderr, NULL, NULL); /* Reset mutex_skipped counter everytime
srv_print_innodb_monitor changes. This is to
ensure we will not be blocked by kernel_mutex
for short duration information printing,
such as requested by sync_array_print_long_waits() */
if (!last_srv_print_monitor) {
mutex_skipped = 0;
last_srv_print_monitor = TRUE;
} }
if (!srv_printf_innodb_monitor(stderr,
MUTEX_NOWAIT(mutex_skipped),
NULL, NULL)) {
mutex_skipped++;
} else {
/* Reset the counter */
mutex_skipped = 0;
}
} else {
last_srv_print_monitor = FALSE;
}
if (srv_innodb_status) { if (srv_innodb_status) {
mutex_enter(&srv_monitor_file_mutex); mutex_enter(&srv_monitor_file_mutex);
rewind(srv_monitor_file); rewind(srv_monitor_file);
srv_printf_innodb_monitor(srv_monitor_file, NULL, if (!srv_printf_innodb_monitor(srv_monitor_file,
NULL); MUTEX_NOWAIT(mutex_skipped),
NULL, NULL)) {
mutex_skipped++;
} else {
mutex_skipped = 0;
}
os_file_set_eof(srv_monitor_file); os_file_set_eof(srv_monitor_file);
mutex_exit(&srv_monitor_file_mutex); mutex_exit(&srv_monitor_file_mutex);
} }
@ -1989,6 +2038,56 @@ loop:
} }
} }
if (srv_shutdown_state >= SRV_SHUTDOWN_CLEANUP) {
goto exit_func;
}
if (srv_print_innodb_monitor
|| srv_print_innodb_lock_monitor
|| srv_print_innodb_tablespace_monitor
|| srv_print_innodb_table_monitor) {
goto loop;
}
srv_monitor_active = FALSE;
goto loop;
exit_func:
srv_monitor_active = FALSE;
/* We count the number of threads in os_thread_exit(). A created
thread should always use that to exit and not use return() to exit. */
os_thread_exit(NULL);
OS_THREAD_DUMMY_RETURN;
}
/*************************************************************************
A thread which wakes up threads whose lock wait may have lasted too long. */
os_thread_ret_t
srv_lock_timeout_thread(
/*====================*/
/* out: a dummy parameter */
void* arg __attribute__((unused)))
/* in: a dummy parameter required by
os_thread_create */
{
srv_slot_t* slot;
ibool some_waits;
double wait_time;
ulint i;
loop:
/* When someone is waiting for a lock, we wake up every second
and check if a timeout has passed for a lock wait */
os_thread_sleep(1000000);
srv_lock_timeout_active = TRUE;
mutex_enter(&kernel_mutex); mutex_enter(&kernel_mutex);
some_waits = FALSE; some_waits = FALSE;
@ -2033,17 +2132,11 @@ loop:
goto exit_func; goto exit_func;
} }
if (some_waits || srv_print_innodb_monitor if (some_waits) {
|| srv_print_innodb_lock_monitor
|| srv_print_innodb_tablespace_monitor
|| srv_print_innodb_table_monitor) {
goto loop; goto loop;
} }
/* No one was waiting for a lock and no monitor was active: srv_lock_timeout_active = FALSE;
suspend this thread */
srv_lock_timeout_and_monitor_active = FALSE;
#if 0 #if 0
/* The following synchronisation is disabled, since /* The following synchronisation is disabled, since
@ -2053,7 +2146,7 @@ loop:
goto loop; goto loop;
exit_func: exit_func:
srv_lock_timeout_and_monitor_active = FALSE; srv_lock_timeout_active = FALSE;
/* We count the number of threads in os_thread_exit(). A created /* We count the number of threads in os_thread_exit(). A created
thread should always use that to exit and not use return() to exit. */ thread should always use that to exit and not use return() to exit. */

View file

@ -87,8 +87,8 @@ static os_file_t files[1000];
static mutex_t ios_mutex; static mutex_t ios_mutex;
static ulint ios; static ulint ios;
static ulint n[SRV_MAX_N_IO_THREADS + 5]; static ulint n[SRV_MAX_N_IO_THREADS + 6];
static os_thread_id_t thread_ids[SRV_MAX_N_IO_THREADS + 5]; static os_thread_id_t thread_ids[SRV_MAX_N_IO_THREADS + 6];
/* We use this mutex to test the return value of pthread_mutex_trylock /* We use this mutex to test the return value of pthread_mutex_trylock
on successful locking. HP-UX does NOT return 0, though Linux et al do. */ on successful locking. HP-UX does NOT return 0, though Linux et al do. */
@ -1596,15 +1596,20 @@ innobase_start_or_create_for_mysql(void)
/* fprintf(stderr, "Max allowed record size %lu\n", /* fprintf(stderr, "Max allowed record size %lu\n",
page_get_free_space_of_empty() / 2); */ page_get_free_space_of_empty() / 2); */
/* Create the thread which watches the timeouts for lock waits /* Create the thread which watches the timeouts for lock
and prints InnoDB monitor info */ waits */
os_thread_create(&srv_lock_timeout_and_monitor_thread, NULL, os_thread_create(&srv_lock_timeout_thread, NULL,
thread_ids + 2 + SRV_MAX_N_IO_THREADS); thread_ids + 2 + SRV_MAX_N_IO_THREADS);
/* Create the thread which warns of long semaphore waits */ /* Create the thread which warns of long semaphore waits */
os_thread_create(&srv_error_monitor_thread, NULL, os_thread_create(&srv_error_monitor_thread, NULL,
thread_ids + 3 + SRV_MAX_N_IO_THREADS); thread_ids + 3 + SRV_MAX_N_IO_THREADS);
/* Create the thread which prints InnoDB monitor info */
os_thread_create(&srv_monitor_thread, NULL,
thread_ids + 4 + SRV_MAX_N_IO_THREADS);
srv_was_started = TRUE; srv_was_started = TRUE;
srv_is_being_started = FALSE; srv_is_being_started = FALSE;