mirror of
https://github.com/MariaDB/server.git
synced 2025-01-31 19:11:46 +01:00
merged io rate patch from Google
This commit is contained in:
commit
186a5311dd
5 changed files with 221 additions and 43 deletions
|
@ -133,6 +133,13 @@ static my_bool innobase_adaptive_hash_index = TRUE;
|
|||
|
||||
static char* internal_innobase_data_file_path = NULL;
|
||||
|
||||
/* Default number of IO per second supported by server. Tunes background
|
||||
IO rate. */
|
||||
static long innobase_io_capacity = 100;
|
||||
|
||||
/* Write dirty pages when pct dirty is less than max pct dirty */
|
||||
static my_bool innobase_extra_dirty_writes = TRUE;
|
||||
|
||||
/* The following counter is used to convey information to InnoDB
|
||||
about server activity: in selects it is not sensible to call
|
||||
srv_active_wake_master_thread after each fetch or search, we only do
|
||||
|
@ -1586,6 +1593,9 @@ innobase_init(
|
|||
#endif /* UNIV_LOG_ARCHIVE */
|
||||
srv_log_buffer_size = (ulint) innobase_log_buffer_size;
|
||||
|
||||
srv_io_capacity = (ulint) innobase_io_capacity;
|
||||
srv_extra_dirty_writes = (ulint) innobase_extra_dirty_writes;
|
||||
|
||||
/* We set srv_pool_size here in units of 1 kB. InnoDB internally
|
||||
changes the value so that it becomes the number of database pages. */
|
||||
|
||||
|
@ -8010,6 +8020,16 @@ static MYSQL_SYSVAR_BOOL(doublewrite, innobase_use_doublewrite,
|
|||
"Disable with --skip-innodb-doublewrite.",
|
||||
NULL, NULL, TRUE);
|
||||
|
||||
static MYSQL_SYSVAR_BOOL(extra_dirty_writes, innobase_extra_dirty_writes,
|
||||
PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
|
||||
"Flush dirty buffer pages when dirty max pct is not exceeded",
|
||||
NULL, NULL, TRUE);
|
||||
|
||||
static MYSQL_SYSVAR_LONG(io_capacity, innobase_io_capacity,
|
||||
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
|
||||
"Number of IOPs the server can do. Tunes the background IO rate",
|
||||
NULL, NULL, 100, 100, ~0L, 0);
|
||||
|
||||
static MYSQL_SYSVAR_ULONG(fast_shutdown, innobase_fast_shutdown,
|
||||
PLUGIN_VAR_OPCMDARG,
|
||||
"Speeds up the shutdown process of the InnoDB storage engine. Possible "
|
||||
|
@ -8225,6 +8245,8 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
|
|||
MYSQL_SYSVAR(thread_concurrency),
|
||||
MYSQL_SYSVAR(thread_sleep_delay),
|
||||
MYSQL_SYSVAR(autoinc_lock_mode),
|
||||
MYSQL_SYSVAR(extra_dirty_writes),
|
||||
MYSQL_SYSVAR(io_capacity),
|
||||
NULL
|
||||
};
|
||||
|
||||
|
|
|
@ -169,6 +169,13 @@ void
|
|||
log_buffer_flush_to_disk(void);
|
||||
/*==========================*/
|
||||
/********************************************************************
|
||||
Flushes the log buffer. Forces it to disk depending on the value of
|
||||
the configuration parameter innodb_flush_log_at_trx_commit. */
|
||||
|
||||
void
|
||||
log_buffer_flush_maybe_sync(void);
|
||||
/*==========================*/
|
||||
/********************************************************************
|
||||
Advances the smallest lsn for which there are unflushed dirty blocks in the
|
||||
buffer pool and also may make a new checkpoint. NOTE: this function may only
|
||||
be called if the calling thread owns no synchronization objects! */
|
||||
|
|
|
@ -91,6 +91,14 @@ extern ulint srv_lock_table_size;
|
|||
|
||||
extern ulint srv_n_file_io_threads;
|
||||
|
||||
/* Number of IO operations per second the server can do */
|
||||
extern ulint srv_io_capacity;
|
||||
|
||||
/* Flush dirty pages when below max dirty percent */
|
||||
extern ibool srv_extra_dirty_writes;
|
||||
|
||||
|
||||
|
||||
#ifdef UNIV_LOG_ARCHIVE
|
||||
extern ibool srv_log_archive_on;
|
||||
extern ibool srv_archive_recovery;
|
||||
|
|
|
@ -1516,6 +1516,26 @@ log_buffer_flush_to_disk(void)
|
|||
log_write_up_to(lsn, LOG_WAIT_ALL_GROUPS, TRUE);
|
||||
}
|
||||
|
||||
/********************************************************************
|
||||
Flush the log buffer. Force it to disk depending on the value of
|
||||
innodb_flush_log_at_trx_commit. */
|
||||
|
||||
void
|
||||
log_buffer_flush_maybe_sync(void)
|
||||
/*==========================*/
|
||||
{
|
||||
dulint lsn;
|
||||
|
||||
mutex_enter(&(log_sys->mutex));
|
||||
|
||||
lsn = log_sys->lsn;
|
||||
|
||||
mutex_exit(&(log_sys->mutex));
|
||||
|
||||
/* Force log buffer to disk when innodb_flush_log_at_trx_commit = 1. */
|
||||
log_write_up_to(lsn, LOG_WAIT_ALL_GROUPS,
|
||||
srv_flush_log_at_trx_commit == 1 ? TRUE : FALSE);
|
||||
}
|
||||
/********************************************************************
|
||||
Tries to establish a big enough margin of free space in the log buffer, such
|
||||
that a new log entry can be catenated without an immediate need for a flush. */
|
||||
|
|
|
@ -171,6 +171,12 @@ ulint srv_awe_window_size = 0; /* size in pages; MySQL inits
|
|||
ulint srv_mem_pool_size = ULINT_MAX; /* size in bytes */
|
||||
ulint srv_lock_table_size = ULINT_MAX;
|
||||
|
||||
ulint srv_io_capacity = ULINT_MAX; /* Number of IO operations per
|
||||
second the server can do */
|
||||
|
||||
ibool srv_extra_dirty_writes = TRUE; /* Write dirty pages to disk when pct
|
||||
dirty < max dirty pct */
|
||||
|
||||
ulint srv_n_file_io_threads = ULINT_MAX;
|
||||
|
||||
#ifdef UNIV_LOG_ARCHIVE
|
||||
|
@ -411,6 +417,30 @@ FILE* srv_misc_tmpfile;
|
|||
ulint srv_main_thread_process_no = 0;
|
||||
ulint srv_main_thread_id = 0;
|
||||
|
||||
// The following count work done by srv_master_thread.
|
||||
|
||||
// Iterations by the 'once per second' loop.
|
||||
ulint srv_main_1_second_loops = 0;
|
||||
// Calls to sleep by the 'once per second' loop.
|
||||
ulint srv_main_sleeps = 0;
|
||||
// Iterations by the 'once per 10 seconds' loop.
|
||||
ulint srv_main_10_second_loops = 0;
|
||||
// Iterations of the loop bounded by the 'background_loop' label.
|
||||
ulint srv_main_background_loops = 0;
|
||||
// Iterations of the loop bounded by the 'flush_loop' label.
|
||||
ulint srv_main_flush_loops = 0;
|
||||
// Calls to log_buffer_flush_to_disk.
|
||||
ulint srv_sync_flush = 0;
|
||||
// Calls to log_buffer_flush_maybe_sync.
|
||||
ulint srv_async_flush = 0;
|
||||
|
||||
// Number of microseconds threads wait because of
|
||||
// innodb_thread_concurrency
|
||||
static ib_longlong srv_thread_wait_mics = 0;
|
||||
|
||||
// Number of microseconds for spinlock delay
|
||||
static ib_longlong srv_timed_spin_delay = 0;
|
||||
|
||||
/*
|
||||
IMPLEMENTATION OF THE SERVER MAIN PROGRAM
|
||||
=========================================
|
||||
|
@ -630,6 +660,65 @@ are indexed by the type of the thread. */
|
|||
ulint srv_n_threads_active[SRV_MASTER + 1];
|
||||
ulint srv_n_threads[SRV_MASTER + 1];
|
||||
|
||||
static void srv_reset_free_tickets(trx_t* trx);
|
||||
|
||||
/*************************************************************************
|
||||
Return the difference in microseconds between 'end' and 'start'
|
||||
*/
|
||||
static ib_longlong mics_diff(ulint start_sec, ulint start_usec,
|
||||
ulint end_sec, ulint end_usec)
|
||||
{
|
||||
ib_longlong end_mics = end_sec * 1000000LL + end_usec;
|
||||
ib_longlong start_mics = start_sec * 1000000LL + start_usec;
|
||||
|
||||
if (end_mics > start_mics)
|
||||
return end_mics - start_mics;
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void time_spin_delay()
|
||||
{
|
||||
ulint start_sec, end_sec;
|
||||
ulint start_usec, end_usec;
|
||||
int i;
|
||||
|
||||
srv_timed_spin_delay = 0;
|
||||
|
||||
ut_usectime(&start_sec, &start_usec);
|
||||
|
||||
for (i = 0; i < SYNC_SPIN_ROUNDS; ++i)
|
||||
ut_delay(ut_rnd_interval(0, srv_spin_wait_delay));
|
||||
|
||||
ut_usectime(&end_sec, &end_usec);
|
||||
|
||||
srv_timed_spin_delay = mics_diff(start_sec, start_usec, end_sec, end_usec);
|
||||
}
|
||||
|
||||
/*************************************************************************
|
||||
Prints counters for work done by srv_master_thread. */
|
||||
|
||||
static
|
||||
void
|
||||
srv_print_extra(
|
||||
/*===================*/
|
||||
FILE *file) /* in: output stream */
|
||||
{
|
||||
fprintf(file, "srv_master_thread loops: %lu 1_second, %lu sleeps, "
|
||||
"%lu 10_second, %lu background, %lu flush\n",
|
||||
srv_main_1_second_loops, srv_main_sleeps,
|
||||
srv_main_10_second_loops, srv_main_background_loops,
|
||||
srv_main_flush_loops);
|
||||
fprintf(file, "srv_master_thread log flush: %lu sync, %lu async\n",
|
||||
srv_sync_flush, srv_async_flush);
|
||||
fprintf(file, "srv_wait_thread_mics %lld microseconds, %.1f seconds\n",
|
||||
srv_thread_wait_mics,
|
||||
(double) srv_thread_wait_mics / 1000000.0);
|
||||
fprintf(file,
|
||||
"spinlock delay for %d delay %d rounds is %lld mics\n",
|
||||
srv_spin_wait_delay, SYNC_SPIN_ROUNDS, srv_timed_spin_delay);
|
||||
}
|
||||
|
||||
/*************************************************************************
|
||||
Sets the info describing an i/o thread current state. */
|
||||
|
||||
|
@ -863,6 +952,8 @@ srv_init(void)
|
|||
dict_table_t* table;
|
||||
ulint i;
|
||||
|
||||
time_spin_delay();
|
||||
|
||||
srv_sys = mem_alloc(sizeof(srv_sys_t));
|
||||
|
||||
kernel_mutex_temp = mem_alloc(sizeof(mutex_t));
|
||||
|
@ -1646,6 +1737,11 @@ srv_printf_innodb_monitor(
|
|||
"Per second averages calculated from the last %lu seconds\n",
|
||||
(ulong)time_elapsed);
|
||||
|
||||
fputs("----------\n"
|
||||
"BACKGROUND THREAD\n"
|
||||
"----------\n", file);
|
||||
srv_print_extra(file);
|
||||
|
||||
fputs("----------\n"
|
||||
"SEMAPHORES\n"
|
||||
"----------\n", file);
|
||||
|
@ -1667,24 +1763,6 @@ srv_printf_innodb_monitor(
|
|||
|
||||
mutex_exit(&dict_foreign_err_mutex);
|
||||
|
||||
lock_print_info_summary(file);
|
||||
if (trx_start) {
|
||||
long t = ftell(file);
|
||||
if (t < 0) {
|
||||
*trx_start = ULINT_UNDEFINED;
|
||||
} else {
|
||||
*trx_start = (ulint) t;
|
||||
}
|
||||
}
|
||||
lock_print_info_all_transactions(file);
|
||||
if (trx_end) {
|
||||
long t = ftell(file);
|
||||
if (t < 0) {
|
||||
*trx_end = ULINT_UNDEFINED;
|
||||
} else {
|
||||
*trx_end = (ulint) t;
|
||||
}
|
||||
}
|
||||
fputs("--------\n"
|
||||
"FILE I/O\n"
|
||||
"--------\n", file);
|
||||
|
@ -2186,6 +2264,14 @@ srv_wake_master_thread(void)
|
|||
mutex_exit(&kernel_mutex);
|
||||
}
|
||||
|
||||
/*************************************************************************
|
||||
Returns the number of IO operations that is X percent of the capacity.
|
||||
|
||||
PCT_IO(5) -> returns the number of IO operations that is 5% of the max
|
||||
where max is srv_io_capacity.
|
||||
*/
|
||||
#define PCT_IO(pct) ((ulint) (srv_io_capacity * ((double) pct / 100.0)))
|
||||
|
||||
/*************************************************************************
|
||||
The master thread controlling the server. */
|
||||
|
||||
|
@ -2217,6 +2303,9 @@ srv_master_thread(
|
|||
fprintf(stderr, "Master thread starts, id %lu\n",
|
||||
os_thread_pf(os_thread_get_curr_id()));
|
||||
#endif
|
||||
fprintf(stderr, "InnoDB master thread running with io_capacity %lu\n",
|
||||
srv_io_capacity);
|
||||
|
||||
srv_main_thread_process_no = os_proc_get_number();
|
||||
srv_main_thread_id = os_thread_pf(os_thread_get_curr_id());
|
||||
|
||||
|
@ -2258,10 +2347,12 @@ loop:
|
|||
n_ios_old = log_sys->n_log_ios + buf_pool->n_pages_read
|
||||
+ buf_pool->n_pages_written;
|
||||
srv_main_thread_op_info = "sleeping";
|
||||
srv_main_1_second_loops++;
|
||||
|
||||
if (!skip_sleep) {
|
||||
|
||||
os_thread_sleep(1000000);
|
||||
srv_main_sleeps++;
|
||||
}
|
||||
|
||||
skip_sleep = FALSE;
|
||||
|
@ -2287,27 +2378,28 @@ loop:
|
|||
|
||||
srv_main_thread_op_info = "flushing log";
|
||||
log_buffer_flush_to_disk();
|
||||
srv_sync_flush++;
|
||||
|
||||
srv_main_thread_op_info = "making checkpoint";
|
||||
log_free_check();
|
||||
|
||||
/* If there were less than 5 i/os during the
|
||||
one second sleep, we assume that there is free
|
||||
disk i/o capacity available, and it makes sense to
|
||||
do an insert buffer merge. */
|
||||
/* If i/os during one second sleep were less than 5% of
|
||||
capacity, we assume that there is free disk i/o capacity
|
||||
available, and it makes sense to do an insert buffer merge. */
|
||||
|
||||
n_pend_ios = buf_get_n_pending_ios()
|
||||
+ log_sys->n_pending_writes;
|
||||
n_ios = log_sys->n_log_ios + buf_pool->n_pages_read
|
||||
+ buf_pool->n_pages_written;
|
||||
if (n_pend_ios < 3 && (n_ios - n_ios_old < 5)) {
|
||||
if (n_pend_ios < PCT_IO(3) && (n_ios - n_ios_old < PCT_IO(5))) {
|
||||
srv_main_thread_op_info = "doing insert buffer merge";
|
||||
ibuf_contract_for_n_pages(
|
||||
TRUE, srv_insert_buffer_batch_size / 4);
|
||||
ibuf_contract_for_n_pages(TRUE, PCT_IO(20) / 4);
|
||||
|
||||
srv_main_thread_op_info = "flushing log";
|
||||
|
||||
log_buffer_flush_to_disk();
|
||||
/* No fsync when srv_flush_log_at_trx_commit != 1 */
|
||||
log_buffer_flush_maybe_sync();
|
||||
srv_async_flush++;
|
||||
}
|
||||
|
||||
if (UNIV_UNLIKELY(buf_get_modified_ratio_pct()
|
||||
|
@ -2316,7 +2408,8 @@ loop:
|
|||
/* Try to keep the number of modified pages in the
|
||||
buffer pool under the limit wished by the user */
|
||||
|
||||
n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, 100,
|
||||
n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST,
|
||||
PCT_IO(100),
|
||||
ut_dulint_max);
|
||||
|
||||
/* If we had to do the flush, it may have taken
|
||||
|
@ -2344,30 +2437,40 @@ loop:
|
|||
seconds */
|
||||
mem_validate_all_blocks();
|
||||
#endif
|
||||
/* If there were less than 200 i/os during the 10 second period,
|
||||
we assume that there is free disk i/o capacity available, and it
|
||||
makes sense to flush 100 pages. */
|
||||
/* If i/os during the 10 second period were less than 200% of
|
||||
capacity, we assume that there is free disk i/o capacity
|
||||
available, and it makes sense to flush srv_io_capacity pages.
|
||||
|
||||
Note that this is done regardless of the fraction of dirty
|
||||
pages relative to the max requested by the user. The one second
|
||||
loop above requests writes for that case. The writes done here
|
||||
are not required, and may be disabled. */
|
||||
|
||||
n_pend_ios = buf_get_n_pending_ios() + log_sys->n_pending_writes;
|
||||
n_ios = log_sys->n_log_ios + buf_pool->n_pages_read
|
||||
+ buf_pool->n_pages_written;
|
||||
if (n_pend_ios < 3 && (n_ios - n_ios_very_old < 200)) {
|
||||
if (srv_extra_dirty_writes &&
|
||||
n_pend_ios < 3 && (n_ios - n_ios_very_old < PCT_IO(200))) {
|
||||
|
||||
srv_main_thread_op_info = "flushing buffer pool pages";
|
||||
buf_flush_batch(BUF_FLUSH_LIST, 100, ut_dulint_max);
|
||||
buf_flush_batch(BUF_FLUSH_LIST, PCT_IO(100), ut_dulint_max);
|
||||
|
||||
srv_main_thread_op_info = "flushing log";
|
||||
log_buffer_flush_to_disk();
|
||||
/* No fsync when srv_flush_log_at_trx_commit != 1 */
|
||||
log_buffer_flush_maybe_sync();
|
||||
srv_async_flush++;
|
||||
}
|
||||
|
||||
/* We run a batch of insert buffer merge every 10 seconds,
|
||||
even if the server were active */
|
||||
|
||||
srv_main_thread_op_info = "doing insert buffer merge";
|
||||
ibuf_contract_for_n_pages(TRUE, srv_insert_buffer_batch_size / 4);
|
||||
ibuf_contract_for_n_pages(TRUE, PCT_IO(20) / 4);
|
||||
|
||||
srv_main_thread_op_info = "flushing log";
|
||||
log_buffer_flush_to_disk();
|
||||
/* No fsync when srv_flush_log_at_trx_commit != 1 */
|
||||
log_buffer_flush_maybe_sync();
|
||||
srv_async_flush++;
|
||||
|
||||
/* We run a full purge every 10 seconds, even if the server
|
||||
were active */
|
||||
|
@ -2393,6 +2496,7 @@ loop:
|
|||
|
||||
log_buffer_flush_to_disk();
|
||||
last_flush_time = current_time;
|
||||
srv_sync_flush++;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2406,14 +2510,16 @@ loop:
|
|||
(> 70 %), we assume we can afford reserving the disk(s) for
|
||||
the time it requires to flush 100 pages */
|
||||
|
||||
n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, 100,
|
||||
n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST,
|
||||
PCT_IO(100),
|
||||
ut_dulint_max);
|
||||
} else {
|
||||
/* Otherwise, we only flush a small number of pages so that
|
||||
we do not unnecessarily use much disk i/o capacity from
|
||||
other work */
|
||||
|
||||
n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, 10,
|
||||
n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST,
|
||||
PCT_IO(10),
|
||||
ut_dulint_max);
|
||||
}
|
||||
|
||||
|
@ -2447,7 +2553,7 @@ background_loop:
|
|||
|
||||
/* The server has been quiet for a while: start running background
|
||||
operations */
|
||||
|
||||
srv_main_background_loops++;
|
||||
srv_main_thread_op_info = "doing background drop tables";
|
||||
|
||||
n_tables_to_drop = row_drop_tables_for_mysql_in_background();
|
||||
|
@ -2485,6 +2591,7 @@ background_loop:
|
|||
|
||||
log_buffer_flush_to_disk();
|
||||
last_flush_time = current_time;
|
||||
srv_sync_flush++;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2502,8 +2609,11 @@ background_loop:
|
|||
if (srv_fast_shutdown && srv_shutdown_state > 0) {
|
||||
n_bytes_merged = 0;
|
||||
} else {
|
||||
n_bytes_merged = ibuf_contract_for_n_pages(
|
||||
TRUE, srv_insert_buffer_batch_size);
|
||||
/* This should do an amount of IO similar to the number of
|
||||
* dirty pages that will be flushed in the call to
|
||||
* buf_flush_batch below. Otherwise, the system favors
|
||||
* clean pages over cleanup throughput. */
|
||||
n_bytes_merged = ibuf_contract_for_n_pages(TRUE, PCT_IO(100));
|
||||
}
|
||||
|
||||
srv_main_thread_op_info = "reserving kernel mutex";
|
||||
|
@ -2517,9 +2627,10 @@ background_loop:
|
|||
|
||||
flush_loop:
|
||||
srv_main_thread_op_info = "flushing buffer pool pages";
|
||||
|
||||
srv_main_flush_loops++;
|
||||
if (srv_fast_shutdown < 2) {
|
||||
n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, 100,
|
||||
n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST,
|
||||
PCT_IO(100),
|
||||
ut_dulint_max);
|
||||
} else {
|
||||
/* In the fastest shutdown we do not flush the buffer pool
|
||||
|
@ -2542,7 +2653,17 @@ flush_loop:
|
|||
|
||||
srv_main_thread_op_info = "flushing log";
|
||||
|
||||
log_buffer_flush_to_disk();
|
||||
current_time = time(NULL);
|
||||
if (difftime(current_time, last_flush_time) > 1) {
|
||||
srv_main_thread_op_info = (char*) "flushing log";
|
||||
log_buffer_flush_to_disk();
|
||||
last_flush_time = current_time;
|
||||
srv_sync_flush++;
|
||||
} else {
|
||||
/* No fsync when srv_flush_log_at_trx_commit != 1 */
|
||||
log_buffer_flush_maybe_sync();
|
||||
srv_async_flush++;
|
||||
}
|
||||
|
||||
srv_main_thread_op_info = "making checkpoint";
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue