mirror of
https://github.com/MariaDB/server.git
synced 2025-01-16 03:52:35 +01:00
WL#3071 - Maria checkpoint
* Preparation for having a background checkpoint thread: frequency of checkpoint taken by that thread is now configurable by the user: global variable maria_checkpoint_frequency, in seconds, default 30 (checkpoint every 30th second); 0 means no checkpoints (and thus no background thread, thus no background flushing, that will probably only be used for testing). * Don't take checkpoints in Recovery if it didn't do anything significant; thus no checkpoint after a clean shutdown/restart. The only checkpoint which is never skipped is the one at shutdown. * fix for a test failure (after-merge fix) include/maria.h: new variable mysql-test/suite/rpl/r/rpl_row_flsh_tbls.result: result update mysql-test/suite/rpl/t/rpl_row_flsh_tbls.test: position update (=after merge fix, as this position was already changed into 5.1 and not merged here, causing test to fail) storage/maria/ha_maria.cc: Checkpoint's frequency is now configurable by the user: global variable maria_checkpoint_frequency. Changing it on the fly requires us to shutdown/restart the background checkpoint thread, as the loop done in that thread assumes a constant checkpoint interval. Default value is 30: a checkpoint every 30 seconds (yes, I know, physicists will remind that it should be named "period" then). ha_maria now asks for a background checkpoint thread when it starts, but this is still overruled (disabled) in ma_checkpoint_init(). storage/maria/ma_checkpoint.c: Checkpoint's frequency is now configurable by the user: background thread takes a checkpoint every maria_checkpoint_interval-th second. If that variable is 0, no checkpoints are taken. Note, I will enable the background thread only in a later changeset. storage/maria/ma_recovery.c: Don't take checkpoints at the end of the REDO phase and at the end of Recovery if Recovery didn't make anything significant (didn't open any tables, didn't rollback any transactions). With this, after a clean shutdown, Recovery shouldn't take any checkpoint, which makes starting faster (we save a few fsync()s of the log and control file).
This commit is contained in:
parent
568e32c80d
commit
791b0aa081
6 changed files with 69 additions and 19 deletions
|
@ -244,7 +244,7 @@ typedef struct st_maria_columndef /* column information */
|
|||
} MARIA_COLUMNDEF;
|
||||
|
||||
|
||||
extern ulong maria_block_size;
|
||||
extern ulong maria_block_size, maria_checkpoint_frequency;
|
||||
extern ulong maria_concurrent_insert;
|
||||
extern my_bool maria_flush, maria_single_user;
|
||||
extern my_bool maria_delay_key_write;
|
||||
|
|
|
@ -12,13 +12,13 @@ create table t4 (a int);
|
|||
insert into t4 select * from t3;
|
||||
rename table t1 to t5, t2 to t1;
|
||||
flush no_write_to_binlog tables;
|
||||
SHOW BINLOG EVENTS FROM 647 ;
|
||||
SHOW BINLOG EVENTS FROM 651 ;
|
||||
Log_name Pos Event_type Server_id End_log_pos Info
|
||||
master-bin.000001 # Query 1 # use `test`; rename table t1 to t5, t2 to t1
|
||||
select * from t3;
|
||||
a
|
||||
flush tables;
|
||||
SHOW BINLOG EVENTS FROM 647 ;
|
||||
SHOW BINLOG EVENTS FROM 651 ;
|
||||
Log_name Pos Event_type Server_id End_log_pos Info
|
||||
master-bin.000001 # Query 1 # use `test`; rename table t1 to t5, t2 to t1
|
||||
master-bin.000001 # Query 1 # use `test`; flush tables
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
# depends on the binlog output
|
||||
-- source include/have_binlog_format_row.inc
|
||||
|
||||
let $rename_event_pos= 647;
|
||||
let $rename_event_pos= 651;
|
||||
|
||||
# Bug#18326: Do not lock table for writing during prepare of statement
|
||||
# The use of the ps protocol causes extra table maps in the binlog, so
|
||||
|
|
|
@ -78,12 +78,22 @@ TYPELIB maria_stats_method_typelib=
|
|||
maria_stats_method_names, NULL
|
||||
};
|
||||
|
||||
static void update_checkpoint_frequency(MYSQL_THD thd,
|
||||
struct st_mysql_sys_var *var,
|
||||
void *var_ptr, void *save);
|
||||
|
||||
static MYSQL_SYSVAR_ULONG(block_size, maria_block_size,
|
||||
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
|
||||
"Block size to be used for MARIA index pages.", 0, 0,
|
||||
MARIA_KEY_BLOCK_LENGTH, MARIA_MIN_KEY_BLOCK_LENGTH,
|
||||
MARIA_MAX_KEY_BLOCK_LENGTH, MARIA_MIN_KEY_BLOCK_LENGTH);
|
||||
|
||||
static MYSQL_SYSVAR_ULONG(checkpoint_frequency, maria_checkpoint_frequency,
|
||||
PLUGIN_VAR_RQCMDARG,
|
||||
"Frequency of automatic checkpoints, in seconds;"
|
||||
" 0 means 'no checkpoints'.",
|
||||
NULL, update_checkpoint_frequency, 30, 0, UINT_MAX, 1);
|
||||
|
||||
static MYSQL_SYSVAR_ULONGLONG(max_sort_file_size,
|
||||
maria_max_temp_length, PLUGIN_VAR_RQCMDARG,
|
||||
"Don't use the fast sort index method to created index if the "
|
||||
|
@ -2401,7 +2411,7 @@ static int ha_maria_init(void *p)
|
|||
MYSQL_VERSION_ID, server_id, maria_log_pagecache,
|
||||
TRANSLOG_DEFAULT_FLAGS) ||
|
||||
maria_recover() ||
|
||||
ma_checkpoint_init(FALSE);
|
||||
ma_checkpoint_init(TRUE);
|
||||
maria_multi_threaded= TRUE;
|
||||
return res;
|
||||
}
|
||||
|
@ -2484,6 +2494,7 @@ my_bool ha_maria::register_query_cache_table(THD *thd, char *table_name,
|
|||
|
||||
static struct st_mysql_sys_var* system_variables[]= {
|
||||
MYSQL_SYSVAR(block_size),
|
||||
MYSQL_SYSVAR(checkpoint_frequency),
|
||||
MYSQL_SYSVAR(max_sort_file_size),
|
||||
MYSQL_SYSVAR(repair_threads),
|
||||
MYSQL_SYSVAR(sort_buffer_size),
|
||||
|
@ -2492,6 +2503,26 @@ static struct st_mysql_sys_var* system_variables[]= {
|
|||
};
|
||||
|
||||
|
||||
/**
|
||||
@brief Updates the checkpoint frequency and restarts the background thread.
|
||||
|
||||
Background thread has a loop which correctness depends on a constant
|
||||
checkpoint frequency. So when the user wants to modify it, we stop and
|
||||
restart the thread.
|
||||
*/
|
||||
static void update_checkpoint_frequency(MYSQL_THD thd,
|
||||
struct st_mysql_sys_var *var,
|
||||
void *var_ptr, void *save)
|
||||
{
|
||||
ulong new_value= (ulong)(*(long *)save), *dest= (ulong *)var_ptr;
|
||||
if (new_value != *dest) /* it's actually a change */
|
||||
{
|
||||
ma_checkpoint_end();
|
||||
*dest= new_value;
|
||||
ma_checkpoint_init(TRUE);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
struct st_mysql_storage_engine maria_storage_engine=
|
||||
{ MYSQL_HANDLERTON_INTERFACE_VERSION };
|
||||
|
|
|
@ -42,6 +42,8 @@
|
|||
#include "ma_loghandler_lsn.h"
|
||||
|
||||
|
||||
/** @brief Frequency of background checkpoints, in seconds */
|
||||
ulong maria_checkpoint_frequency;
|
||||
/*
|
||||
Checkpoints currently happen only at ha_maria's startup (after recovery) and
|
||||
at shutdown, always when there is no open tables.
|
||||
|
@ -351,6 +353,10 @@ int ma_checkpoint_init(my_bool create_background_thread)
|
|||
DBUG_ENTER("ma_checkpoint_init");
|
||||
checkpoint_inited= TRUE;
|
||||
checkpoint_thread_die= 2; /* not yet born == dead */
|
||||
/* Background thread will be enabled in a later changeset */
|
||||
create_background_thread= FALSE;
|
||||
if (maria_checkpoint_frequency == 0)
|
||||
create_background_thread= FALSE;
|
||||
if (pthread_mutex_init(&LOCK_checkpoint, MY_MUTEX_INIT_SLOW) ||
|
||||
pthread_cond_init(&COND_checkpoint, 0))
|
||||
res= 1;
|
||||
|
@ -527,9 +533,10 @@ static int filter_flush_data_file_evenly(enum pagecache_page_type type,
|
|||
/**
|
||||
@brief Background thread which does checkpoints and flushes periodically.
|
||||
|
||||
Takes a checkpoint every 30th second. After taking a checkpoint, all pages
|
||||
dirty at the time of that checkpoint are flushed evenly until it is time to
|
||||
take another checkpoint (30 seconds later). This ensures that the REDO
|
||||
Takes a checkpoint every maria_checkpoint_frequency-th second. After taking
|
||||
a checkpoint, all pages dirty at the time of that checkpoint are flushed
|
||||
evenly until it is time to take another checkpoint
|
||||
(maria_checkpoint_frequency seconds later). This ensures that the REDO
|
||||
phase starts at earliest (in LSN time) at the next-to-last checkpoint
|
||||
record ("two-checkpoint rule").
|
||||
|
||||
|
@ -544,10 +551,8 @@ static int filter_flush_data_file_evenly(enum pagecache_page_type type,
|
|||
|
||||
pthread_handler_t ma_checkpoint_background(void *arg __attribute__((unused)))
|
||||
{
|
||||
const uint sleep_unit= 1 /* 1 second */,
|
||||
time_between_checkpoints= 30, /* 30 sleep units */
|
||||
/** @brief At least this of log/page bytes written between checkpoints */
|
||||
checkpoint_min_activity= 2*1024*1024;
|
||||
const uint checkpoint_min_activity= 2*1024*1024;
|
||||
uint sleeps= 0;
|
||||
|
||||
my_thread_init();
|
||||
|
@ -566,7 +571,12 @@ pthread_handler_t ma_checkpoint_background(void *arg __attribute__((unused)))
|
|||
struct timespec abstime;
|
||||
LINT_INIT(kfile);
|
||||
LINT_INIT(dfile);
|
||||
switch((sleeps++) % time_between_checkpoints)
|
||||
/*
|
||||
If the frequency could be changed by the user while we are in this loop,
|
||||
it could be annoying: for example it could cause "case 2" to be executed
|
||||
right after "case 0", thus having 'dfile' unset.
|
||||
*/
|
||||
switch((sleeps++) % maria_checkpoint_frequency)
|
||||
{
|
||||
case 0:
|
||||
/*
|
||||
|
@ -579,6 +589,9 @@ pthread_handler_t ma_checkpoint_background(void *arg __attribute__((unused)))
|
|||
since last checkpoint. Such work includes log writing (lengthens
|
||||
recovery, checkpoint would shorten it), page flushing (checkpoint
|
||||
would decrease the amount of read pages in recovery).
|
||||
In case of one short statement per minute (very low load), we don't
|
||||
want to checkpoint every minute, hence the positive
|
||||
checkpoint_min_activity.
|
||||
*/
|
||||
if (((translog_get_horizon() - log_horizon_at_last_checkpoint) +
|
||||
(maria_pagecache->global_cache_write -
|
||||
|
@ -608,7 +621,7 @@ pthread_handler_t ma_checkpoint_background(void *arg __attribute__((unused)))
|
|||
/* set up parameters for background page flushing */
|
||||
filter_param.up_to_lsn= last_checkpoint_lsn;
|
||||
pages_bunch_size= pages_to_flush_before_next_checkpoint /
|
||||
time_between_checkpoints;
|
||||
maria_checkpoint_frequency;
|
||||
dfile= dfiles;
|
||||
kfile= kfiles;
|
||||
/* fall through */
|
||||
|
@ -659,7 +672,7 @@ pthread_handler_t ma_checkpoint_background(void *arg __attribute__((unused)))
|
|||
pthread_mutex_lock(&LOCK_checkpoint);
|
||||
#else
|
||||
/* To have a killable sleep, we use timedwait like our SQL GET_LOCK() */
|
||||
set_timespec(abstime, sleep_unit);
|
||||
set_timespec(abstime, 1);
|
||||
pthread_cond_timedwait(&COND_checkpoint, &LOCK_checkpoint, &abstime);
|
||||
#endif
|
||||
if (checkpoint_thread_die == 1)
|
||||
|
|
|
@ -51,6 +51,8 @@ static LSN current_group_end_lsn,
|
|||
static TrID max_long_trid= 0; /**< max long trid seen by REDO phase */
|
||||
static FILE *tracef; /**< trace file for debugging */
|
||||
static my_bool skip_DDLs; /**< if REDO phase should skip DDL records */
|
||||
/** @brief to avoid writing a checkpoint if recovery did nothing. */
|
||||
static my_bool checkpoint_useful;
|
||||
static ulonglong now; /**< for tracking execution time of phases */
|
||||
|
||||
#define prototype_redo_exec_hook(R) \
|
||||
|
@ -221,6 +223,9 @@ int maria_apply_log(LSN from_lsn, my_bool apply, FILE *trace_file,
|
|||
if (!all_active_trans || !all_tables)
|
||||
goto err;
|
||||
|
||||
if (take_checkpoints && ma_checkpoint_init(FALSE))
|
||||
goto err;
|
||||
|
||||
redo_phase_message_printed= FALSE;
|
||||
tracef= trace_file;
|
||||
if (!(skip_DDLs= skip_DDLs_arg))
|
||||
|
@ -277,15 +282,14 @@ int maria_apply_log(LSN from_lsn, my_bool apply, FILE *trace_file,
|
|||
end_of_redo_phase(should_run_undo_phase)) == (uint)-1)
|
||||
goto err;
|
||||
|
||||
if (take_checkpoints)
|
||||
if (take_checkpoints && checkpoint_useful)
|
||||
{
|
||||
/*
|
||||
We take a checkpoint as it can save future recovery work if we crash
|
||||
during the UNDO phase. But we don't flush pages, as UNDOs will change
|
||||
them again probably.
|
||||
*/
|
||||
if (ma_checkpoint_init(FALSE) ||
|
||||
ma_checkpoint_execute(CHECKPOINT_INDIRECT, FALSE))
|
||||
if (ma_checkpoint_execute(CHECKPOINT_INDIRECT, FALSE))
|
||||
goto err;
|
||||
}
|
||||
|
||||
|
@ -305,7 +309,7 @@ int maria_apply_log(LSN from_lsn, my_bool apply, FILE *trace_file,
|
|||
if (close_all_tables())
|
||||
goto err;
|
||||
|
||||
if (take_checkpoints)
|
||||
if (take_checkpoints && checkpoint_useful)
|
||||
{
|
||||
/* No dirty pages, all tables are closed, no active transactions, save: */
|
||||
if (ma_checkpoint_execute(CHECKPOINT_FULL, FALSE))
|
||||
|
@ -948,6 +952,7 @@ static int new_table(uint16 sid, const char *name,
|
|||
*/
|
||||
int error= 1;
|
||||
|
||||
checkpoint_useful= TRUE;
|
||||
tprint(tracef, "Table '%s', id %u", name, sid);
|
||||
MARIA_HA *info= maria_open(name, O_RDWR, HA_OPEN_FOR_REPAIR);
|
||||
if (info == NULL)
|
||||
|
@ -1791,6 +1796,7 @@ static int run_undo_phase(uint unfinished)
|
|||
{
|
||||
if (unfinished > 0)
|
||||
{
|
||||
checkpoint_useful= TRUE;
|
||||
if (tracef != stdout)
|
||||
{
|
||||
ulonglong old_now= now;
|
||||
|
|
Loading…
Reference in a new issue