diff --git a/include/maria.h b/include/maria.h index dfd0596ba7e..f915e9fbb7e 100644 --- a/include/maria.h +++ b/include/maria.h @@ -244,7 +244,7 @@ typedef struct st_maria_columndef /* column information */ } MARIA_COLUMNDEF; -extern ulong maria_block_size; +extern ulong maria_block_size, maria_checkpoint_frequency; extern ulong maria_concurrent_insert; extern my_bool maria_flush, maria_single_user; extern my_bool maria_delay_key_write; diff --git a/mysql-test/suite/rpl/r/rpl_row_flsh_tbls.result b/mysql-test/suite/rpl/r/rpl_row_flsh_tbls.result index b2fd110973f..319888fa083 100644 --- a/mysql-test/suite/rpl/r/rpl_row_flsh_tbls.result +++ b/mysql-test/suite/rpl/r/rpl_row_flsh_tbls.result @@ -12,13 +12,13 @@ create table t4 (a int); insert into t4 select * from t3; rename table t1 to t5, t2 to t1; flush no_write_to_binlog tables; -SHOW BINLOG EVENTS FROM 647 ; +SHOW BINLOG EVENTS FROM 651 ; Log_name Pos Event_type Server_id End_log_pos Info master-bin.000001 # Query 1 # use `test`; rename table t1 to t5, t2 to t1 select * from t3; a flush tables; -SHOW BINLOG EVENTS FROM 647 ; +SHOW BINLOG EVENTS FROM 651 ; Log_name Pos Event_type Server_id End_log_pos Info master-bin.000001 # Query 1 # use `test`; rename table t1 to t5, t2 to t1 master-bin.000001 # Query 1 # use `test`; flush tables diff --git a/mysql-test/suite/rpl/t/rpl_row_flsh_tbls.test b/mysql-test/suite/rpl/t/rpl_row_flsh_tbls.test index b96bb50b3cc..c8a4d5d89a6 100644 --- a/mysql-test/suite/rpl/t/rpl_row_flsh_tbls.test +++ b/mysql-test/suite/rpl/t/rpl_row_flsh_tbls.test @@ -1,7 +1,7 @@ # depends on the binlog output -- source include/have_binlog_format_row.inc -let $rename_event_pos= 647; +let $rename_event_pos= 651; # Bug#18326: Do not lock table for writing during prepare of statement # The use of the ps protocol causes extra table maps in the binlog, so diff --git a/storage/maria/ha_maria.cc b/storage/maria/ha_maria.cc index a0dd69b05e3..0ec080ce383 100644 --- a/storage/maria/ha_maria.cc +++ b/storage/maria/ha_maria.cc @@ -78,12 +78,22 @@ TYPELIB maria_stats_method_typelib= maria_stats_method_names, NULL }; +static void update_checkpoint_frequency(MYSQL_THD thd, + struct st_mysql_sys_var *var, + void *var_ptr, void *save); + static MYSQL_SYSVAR_ULONG(block_size, maria_block_size, PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, "Block size to be used for MARIA index pages.", 0, 0, MARIA_KEY_BLOCK_LENGTH, MARIA_MIN_KEY_BLOCK_LENGTH, MARIA_MAX_KEY_BLOCK_LENGTH, MARIA_MIN_KEY_BLOCK_LENGTH); +static MYSQL_SYSVAR_ULONG(checkpoint_frequency, maria_checkpoint_frequency, + PLUGIN_VAR_RQCMDARG, + "Frequency of automatic checkpoints, in seconds;" + " 0 means 'no checkpoints'.", + NULL, update_checkpoint_frequency, 30, 0, UINT_MAX, 1); + static MYSQL_SYSVAR_ULONGLONG(max_sort_file_size, maria_max_temp_length, PLUGIN_VAR_RQCMDARG, "Don't use the fast sort index method to created index if the " @@ -2401,7 +2411,7 @@ static int ha_maria_init(void *p) MYSQL_VERSION_ID, server_id, maria_log_pagecache, TRANSLOG_DEFAULT_FLAGS) || maria_recover() || - ma_checkpoint_init(FALSE); + ma_checkpoint_init(TRUE); maria_multi_threaded= TRUE; return res; } @@ -2484,6 +2494,7 @@ my_bool ha_maria::register_query_cache_table(THD *thd, char *table_name, static struct st_mysql_sys_var* system_variables[]= { MYSQL_SYSVAR(block_size), + MYSQL_SYSVAR(checkpoint_frequency), MYSQL_SYSVAR(max_sort_file_size), MYSQL_SYSVAR(repair_threads), MYSQL_SYSVAR(sort_buffer_size), @@ -2492,6 +2503,26 @@ static struct st_mysql_sys_var* system_variables[]= { }; +/** + @brief Updates the checkpoint frequency and restarts the background thread. + + Background thread has a loop which correctness depends on a constant + checkpoint frequency. So when the user wants to modify it, we stop and + restart the thread. +*/ +static void update_checkpoint_frequency(MYSQL_THD thd, + struct st_mysql_sys_var *var, + void *var_ptr, void *save) +{ + ulong new_value= (ulong)(*(long *)save), *dest= (ulong *)var_ptr; + if (new_value != *dest) /* it's actually a change */ + { + ma_checkpoint_end(); + *dest= new_value; + ma_checkpoint_init(TRUE); + } +} + struct st_mysql_storage_engine maria_storage_engine= { MYSQL_HANDLERTON_INTERFACE_VERSION }; diff --git a/storage/maria/ma_checkpoint.c b/storage/maria/ma_checkpoint.c index 50c181fbd1c..60318e60102 100644 --- a/storage/maria/ma_checkpoint.c +++ b/storage/maria/ma_checkpoint.c @@ -42,6 +42,8 @@ #include "ma_loghandler_lsn.h" +/** @brief Frequency of background checkpoints, in seconds */ +ulong maria_checkpoint_frequency; /* Checkpoints currently happen only at ha_maria's startup (after recovery) and at shutdown, always when there is no open tables. @@ -351,6 +353,10 @@ int ma_checkpoint_init(my_bool create_background_thread) DBUG_ENTER("ma_checkpoint_init"); checkpoint_inited= TRUE; checkpoint_thread_die= 2; /* not yet born == dead */ + /* Background thread will be enabled in a later changeset */ + create_background_thread= FALSE; + if (maria_checkpoint_frequency == 0) + create_background_thread= FALSE; if (pthread_mutex_init(&LOCK_checkpoint, MY_MUTEX_INIT_SLOW) || pthread_cond_init(&COND_checkpoint, 0)) res= 1; @@ -527,9 +533,10 @@ static int filter_flush_data_file_evenly(enum pagecache_page_type type, /** @brief Background thread which does checkpoints and flushes periodically. - Takes a checkpoint every 30th second. After taking a checkpoint, all pages - dirty at the time of that checkpoint are flushed evenly until it is time to - take another checkpoint (30 seconds later). This ensures that the REDO + Takes a checkpoint every maria_checkpoint_frequency-th second. After taking + a checkpoint, all pages dirty at the time of that checkpoint are flushed + evenly until it is time to take another checkpoint + (maria_checkpoint_frequency seconds later). This ensures that the REDO phase starts at earliest (in LSN time) at the next-to-last checkpoint record ("two-checkpoint rule"). @@ -544,10 +551,8 @@ static int filter_flush_data_file_evenly(enum pagecache_page_type type, pthread_handler_t ma_checkpoint_background(void *arg __attribute__((unused))) { - const uint sleep_unit= 1 /* 1 second */, - time_between_checkpoints= 30, /* 30 sleep units */ - /** @brief At least this of log/page bytes written between checkpoints */ - checkpoint_min_activity= 2*1024*1024; + /** @brief At least this of log/page bytes written between checkpoints */ + const uint checkpoint_min_activity= 2*1024*1024; uint sleeps= 0; my_thread_init(); @@ -566,7 +571,12 @@ pthread_handler_t ma_checkpoint_background(void *arg __attribute__((unused))) struct timespec abstime; LINT_INIT(kfile); LINT_INIT(dfile); - switch((sleeps++) % time_between_checkpoints) + /* + If the frequency could be changed by the user while we are in this loop, + it could be annoying: for example it could cause "case 2" to be executed + right after "case 0", thus having 'dfile' unset. + */ + switch((sleeps++) % maria_checkpoint_frequency) { case 0: /* @@ -579,6 +589,9 @@ pthread_handler_t ma_checkpoint_background(void *arg __attribute__((unused))) since last checkpoint. Such work includes log writing (lengthens recovery, checkpoint would shorten it), page flushing (checkpoint would decrease the amount of read pages in recovery). + In case of one short statement per minute (very low load), we don't + want to checkpoint every minute, hence the positive + checkpoint_min_activity. */ if (((translog_get_horizon() - log_horizon_at_last_checkpoint) + (maria_pagecache->global_cache_write - @@ -608,7 +621,7 @@ pthread_handler_t ma_checkpoint_background(void *arg __attribute__((unused))) /* set up parameters for background page flushing */ filter_param.up_to_lsn= last_checkpoint_lsn; pages_bunch_size= pages_to_flush_before_next_checkpoint / - time_between_checkpoints; + maria_checkpoint_frequency; dfile= dfiles; kfile= kfiles; /* fall through */ @@ -659,7 +672,7 @@ pthread_handler_t ma_checkpoint_background(void *arg __attribute__((unused))) pthread_mutex_lock(&LOCK_checkpoint); #else /* To have a killable sleep, we use timedwait like our SQL GET_LOCK() */ - set_timespec(abstime, sleep_unit); + set_timespec(abstime, 1); pthread_cond_timedwait(&COND_checkpoint, &LOCK_checkpoint, &abstime); #endif if (checkpoint_thread_die == 1) diff --git a/storage/maria/ma_recovery.c b/storage/maria/ma_recovery.c index d3d475b5137..4dc1e12aac6 100644 --- a/storage/maria/ma_recovery.c +++ b/storage/maria/ma_recovery.c @@ -51,6 +51,8 @@ static LSN current_group_end_lsn, static TrID max_long_trid= 0; /**< max long trid seen by REDO phase */ static FILE *tracef; /**< trace file for debugging */ static my_bool skip_DDLs; /**< if REDO phase should skip DDL records */ +/** @brief to avoid writing a checkpoint if recovery did nothing. */ +static my_bool checkpoint_useful; static ulonglong now; /**< for tracking execution time of phases */ #define prototype_redo_exec_hook(R) \ @@ -221,6 +223,9 @@ int maria_apply_log(LSN from_lsn, my_bool apply, FILE *trace_file, if (!all_active_trans || !all_tables) goto err; + if (take_checkpoints && ma_checkpoint_init(FALSE)) + goto err; + redo_phase_message_printed= FALSE; tracef= trace_file; if (!(skip_DDLs= skip_DDLs_arg)) @@ -277,15 +282,14 @@ int maria_apply_log(LSN from_lsn, my_bool apply, FILE *trace_file, end_of_redo_phase(should_run_undo_phase)) == (uint)-1) goto err; - if (take_checkpoints) + if (take_checkpoints && checkpoint_useful) { /* We take a checkpoint as it can save future recovery work if we crash during the UNDO phase. But we don't flush pages, as UNDOs will change them again probably. */ - if (ma_checkpoint_init(FALSE) || - ma_checkpoint_execute(CHECKPOINT_INDIRECT, FALSE)) + if (ma_checkpoint_execute(CHECKPOINT_INDIRECT, FALSE)) goto err; } @@ -305,7 +309,7 @@ int maria_apply_log(LSN from_lsn, my_bool apply, FILE *trace_file, if (close_all_tables()) goto err; - if (take_checkpoints) + if (take_checkpoints && checkpoint_useful) { /* No dirty pages, all tables are closed, no active transactions, save: */ if (ma_checkpoint_execute(CHECKPOINT_FULL, FALSE)) @@ -948,6 +952,7 @@ static int new_table(uint16 sid, const char *name, */ int error= 1; + checkpoint_useful= TRUE; tprint(tracef, "Table '%s', id %u", name, sid); MARIA_HA *info= maria_open(name, O_RDWR, HA_OPEN_FOR_REPAIR); if (info == NULL) @@ -1791,6 +1796,7 @@ static int run_undo_phase(uint unfinished) { if (unfinished > 0) { + checkpoint_useful= TRUE; if (tracef != stdout) { ulonglong old_now= now;