mirror of
https://github.com/MariaDB/server.git
synced 2025-01-16 03:52:35 +01:00
Many files:
Eliminate the LOCK_log bottleneck in group commit in binlogging: flush InnoDB log files only after it has been released sql/log.cc: Eliminate the LOCK_log bottleneck in group commit in binlogging: flush InnoDB log files only after it has been released sql/handler.cc: Eliminate the LOCK_log bottleneck in group commit in binlogging: flush InnoDB log files only after it has been released sql/handler.h: Eliminate the LOCK_log bottleneck in group commit in binlogging: flush InnoDB log files only after it has been released sql/ha_innodb.cc: Eliminate the LOCK_log bottleneck in group commit in binlogging: flush InnoDB log files only after it has been released sql/ha_innodb.h: Eliminate the LOCK_log bottleneck in group commit in binlogging: flush InnoDB log files only after it has been released innobase/include/log0log.h: Eliminate the LOCK_log bottleneck in group commit in binlogging: flush InnoDB log files only after it has been released innobase/include/trx0trx.h: Eliminate the LOCK_log bottleneck in group commit in binlogging: flush InnoDB log files only after it has been released innobase/os/os0file.c: Eliminate the LOCK_log bottleneck in group commit in binlogging: flush InnoDB log files only after it has been released innobase/buf/buf0flu.c: Eliminate the LOCK_log bottleneck in group commit in binlogging: flush InnoDB log files only after it has been released innobase/trx/trx0trx.c: Eliminate the LOCK_log bottleneck in group commit in binlogging: flush InnoDB log files only after it has been released innobase/log/log0log.c: Eliminate the LOCK_log bottleneck in group commit in binlogging: flush InnoDB log files only after it has been released innobase/srv/srv0srv.c: Eliminate the LOCK_log bottleneck in group commit in binlogging: flush InnoDB log files only after it has been released innobase/row/row0mysql.c: Eliminate the LOCK_log bottleneck in group commit in binlogging: flush InnoDB log files only after it has been released
This commit is contained in:
parent
870397892b
commit
8d8f52e902
13 changed files with 276 additions and 154 deletions
|
@ -398,7 +398,7 @@ buf_flush_write_block_low(
|
|||
"Warning: cannot force log to disk in the log debug version!\n");
|
||||
#else
|
||||
/* Force the log to the disk before writing the modified block */
|
||||
log_flush_up_to(block->newest_modification, LOG_WAIT_ALL_GROUPS);
|
||||
log_write_up_to(block->newest_modification, LOG_WAIT_ALL_GROUPS, TRUE);
|
||||
#endif
|
||||
buf_flush_init_for_writing(block->frame, block->newest_modification,
|
||||
block->space, block->offset);
|
||||
|
|
|
@ -20,7 +20,7 @@ typedef struct log_group_struct log_group_t;
|
|||
extern ibool log_do_write;
|
||||
extern ibool log_debug_writes;
|
||||
|
||||
/* Wait modes for log_flush_up_to */
|
||||
/* Wait modes for log_write_up_to */
|
||||
#define LOG_NO_WAIT 91
|
||||
#define LOG_WAIT_ONE_GROUP 92
|
||||
#define LOG_WAIT_ALL_GROUPS 93
|
||||
|
@ -157,26 +157,21 @@ log_io_complete(
|
|||
/*============*/
|
||||
log_group_t* group); /* in: log group */
|
||||
/**********************************************************
|
||||
Flushes the log files to the disk, using, for example, the Unix fsync.
|
||||
This function does the flush even if the user has set
|
||||
srv_flush_log_at_trx_commit = FALSE. */
|
||||
|
||||
void
|
||||
log_flush_to_disk(void);
|
||||
/*===================*/
|
||||
/**********************************************************
|
||||
This function is called, e.g., when a transaction wants to commit. It checks
|
||||
that the log has been flushed to disk up to the last log entry written by the
|
||||
transaction. If there is a flush running, it waits and checks if the flush
|
||||
flushed enough. If not, starts a new flush. */
|
||||
that the log has been written to the log file up to the last log entry written
|
||||
by the transaction. If there is a flush running, it waits and checks if the
|
||||
flush flushed enough. If not, starts a new flush. */
|
||||
|
||||
void
|
||||
log_flush_up_to(
|
||||
log_write_up_to(
|
||||
/*============*/
|
||||
dulint lsn, /* in: log sequence number up to which the log should
|
||||
be flushed, ut_dulint_max if not specified */
|
||||
ulint wait); /* in: LOG_NO_WAIT, LOG_WAIT_ONE_GROUP,
|
||||
be written, ut_dulint_max if not specified */
|
||||
ulint wait, /* in: LOG_NO_WAIT, LOG_WAIT_ONE_GROUP,
|
||||
or LOG_WAIT_ALL_GROUPS */
|
||||
ibool flush_to_disk);
|
||||
/* in: TRUE if we want the written log also to be
|
||||
flushed to disk */
|
||||
/********************************************************************
|
||||
Advances the smallest lsn for which there are unflushed dirty blocks in the
|
||||
buffer pool and also may make a new checkpoint. NOTE: this function may only
|
||||
|
@ -741,27 +736,37 @@ struct log_struct{
|
|||
be advanced, it is enough that the
|
||||
write i/o has been completed for all
|
||||
log groups */
|
||||
dulint flush_lsn; /* end lsn for the current flush */
|
||||
ulint flush_end_offset;/* the data in buffer has been flushed
|
||||
dulint write_lsn; /* end lsn for the current running
|
||||
write */
|
||||
ulint write_end_offset;/* the data in buffer has been written
|
||||
up to this offset when the current
|
||||
flush ends: this field will then
|
||||
write ends: this field will then
|
||||
be copied to buf_next_to_write */
|
||||
ulint n_pending_writes;/* number of currently pending flush
|
||||
writes */
|
||||
dulint current_flush_lsn;/* end lsn for the current running
|
||||
write + flush operation */
|
||||
dulint flushed_to_disk_lsn;
|
||||
/* how far we have written the log
|
||||
AND flushed to disk */
|
||||
ulint n_pending_writes;/* number of currently pending flushes
|
||||
or writes */
|
||||
/* NOTE on the 'flush' in names of the fields below: starting from
|
||||
4.0.14, we separate the write of the log file and the actual fsync()
|
||||
or other method to flush it to disk. The names below shhould really
|
||||
be 'flush_or_write'! */
|
||||
os_event_t no_flush_event; /* this event is in the reset state
|
||||
when a flush is running; a thread
|
||||
should wait for this without owning
|
||||
the log mutex, but NOTE that to set or
|
||||
reset this event, the thread MUST own
|
||||
the log mutex! */
|
||||
when a flush or a write is running;
|
||||
a thread should wait for this without
|
||||
owning the log mutex, but NOTE that
|
||||
to set or reset this event, the
|
||||
thread MUST own the log mutex! */
|
||||
ibool one_flushed; /* during a flush, this is first FALSE
|
||||
and becomes TRUE when one log group
|
||||
has been flushed */
|
||||
has been written or flushed */
|
||||
os_event_t one_flushed_event;/* this event is reset when the
|
||||
flush has not yet completed for any
|
||||
log group; e.g., this means that a
|
||||
transaction has been committed when
|
||||
this is set; a thread should wait
|
||||
flush or write has not yet completed
|
||||
for any log group; e.g., this means
|
||||
that a transaction has been committed
|
||||
when this is set; a thread should wait
|
||||
for this without owning the log mutex,
|
||||
but NOTE that to set or reset this
|
||||
event, the thread MUST own the log
|
||||
|
|
|
@ -157,6 +157,15 @@ trx_commit_for_mysql(
|
|||
/* out: 0 or error number */
|
||||
trx_t* trx); /* in: trx handle */
|
||||
/**************************************************************************
|
||||
If required, flushes the log to disk if we called trx_commit_for_mysql()
|
||||
with trx->flush_log_later == TRUE. */
|
||||
|
||||
ulint
|
||||
trx_commit_complete_for_mysql(
|
||||
/*==========================*/
|
||||
/* out: 0 or error number */
|
||||
trx_t* trx); /* in: trx handle */
|
||||
/**************************************************************************
|
||||
Marks the latest SQL statement ended. */
|
||||
|
||||
void
|
||||
|
@ -343,6 +352,11 @@ struct trx_struct{
|
|||
dulint no; /* transaction serialization number ==
|
||||
max trx id when the transaction is
|
||||
moved to COMMITTED_IN_MEMORY state */
|
||||
ibool flush_log_later;/* when we commit the transaction
|
||||
in MySQL's binlog write, we will
|
||||
flush the log to disk later in
|
||||
a separate call */
|
||||
dulint commit_lsn; /* lsn at the time of the commit */
|
||||
ibool dict_operation; /* TRUE if the trx is used to create
|
||||
a table, create an index, or drop a
|
||||
table */
|
||||
|
|
|
@ -178,7 +178,7 @@ loop:
|
|||
|
||||
/* Not enough free space, do a syncronous flush of the log
|
||||
buffer */
|
||||
log_flush_up_to(ut_dulint_max, LOG_WAIT_ALL_GROUPS);
|
||||
log_write_up_to(ut_dulint_max, LOG_WAIT_ALL_GROUPS, TRUE);
|
||||
|
||||
count++;
|
||||
|
||||
|
@ -675,7 +675,9 @@ log_init(void)
|
|||
|
||||
log_sys->buf_next_to_write = 0;
|
||||
|
||||
log_sys->flush_lsn = ut_dulint_zero;
|
||||
log_sys->write_lsn = ut_dulint_zero;
|
||||
log_sys->current_flush_lsn = ut_dulint_zero;
|
||||
log_sys->flushed_to_disk_lsn = ut_dulint_zero;
|
||||
|
||||
log_sys->written_to_some_lsn = log_sys->lsn;
|
||||
log_sys->written_to_all_lsn = log_sys->lsn;
|
||||
|
@ -867,7 +869,7 @@ log_group_check_flush_completion(
|
|||
printf("Log flushed first to group %lu\n", group->id);
|
||||
}
|
||||
|
||||
log_sys->written_to_some_lsn = log_sys->flush_lsn;
|
||||
log_sys->written_to_some_lsn = log_sys->write_lsn;
|
||||
log_sys->one_flushed = TRUE;
|
||||
|
||||
return(LOG_UNLOCK_NONE_FLUSHED_LOCK);
|
||||
|
@ -896,15 +898,15 @@ log_sys_check_flush_completion(void)
|
|||
|
||||
if (log_sys->n_pending_writes == 0) {
|
||||
|
||||
log_sys->written_to_all_lsn = log_sys->flush_lsn;
|
||||
log_sys->buf_next_to_write = log_sys->flush_end_offset;
|
||||
log_sys->written_to_all_lsn = log_sys->write_lsn;
|
||||
log_sys->buf_next_to_write = log_sys->write_end_offset;
|
||||
|
||||
if (log_sys->flush_end_offset > log_sys->max_buf_free / 2) {
|
||||
if (log_sys->write_end_offset > log_sys->max_buf_free / 2) {
|
||||
/* Move the log buffer content to the start of the
|
||||
buffer */
|
||||
|
||||
move_start = ut_calc_align_down(
|
||||
log_sys->flush_end_offset,
|
||||
log_sys->write_end_offset,
|
||||
OS_FILE_LOG_BLOCK_SIZE);
|
||||
move_end = ut_calc_align(log_sys->buf_free,
|
||||
OS_FILE_LOG_BLOCK_SIZE);
|
||||
|
@ -981,57 +983,6 @@ log_io_complete(
|
|||
mutex_exit(&(log_sys->mutex));
|
||||
}
|
||||
|
||||
/**********************************************************
|
||||
Flushes the log files to the disk, using, for example, the Unix fsync.
|
||||
This function does the flush even if the user has set
|
||||
srv_flush_log_at_trx_commit = FALSE. */
|
||||
|
||||
void
|
||||
log_flush_to_disk(void)
|
||||
/*===================*/
|
||||
{
|
||||
log_group_t* group;
|
||||
loop:
|
||||
mutex_enter(&(log_sys->mutex));
|
||||
|
||||
if (log_sys->n_pending_writes > 0) {
|
||||
/* A log file write is running */
|
||||
|
||||
mutex_exit(&(log_sys->mutex));
|
||||
|
||||
/* Wait for the log file write to complete and try again */
|
||||
|
||||
os_event_wait(log_sys->no_flush_event);
|
||||
|
||||
goto loop;
|
||||
}
|
||||
|
||||
group = UT_LIST_GET_FIRST(log_sys->log_groups);
|
||||
|
||||
log_sys->n_pending_writes++;
|
||||
group->n_pending_writes++;
|
||||
|
||||
os_event_reset(log_sys->no_flush_event);
|
||||
os_event_reset(log_sys->one_flushed_event);
|
||||
|
||||
mutex_exit(&(log_sys->mutex));
|
||||
|
||||
fil_flush(group->space_id);
|
||||
|
||||
mutex_enter(&(log_sys->mutex));
|
||||
|
||||
ut_a(group->n_pending_writes == 1);
|
||||
ut_a(log_sys->n_pending_writes == 1);
|
||||
|
||||
group->n_pending_writes--;
|
||||
log_sys->n_pending_writes--;
|
||||
|
||||
os_event_set(log_sys->no_flush_event);
|
||||
os_event_set(log_sys->one_flushed_event);
|
||||
|
||||
mutex_exit(&(log_sys->mutex));
|
||||
}
|
||||
|
||||
/**********************************************************
|
||||
Writes a log file header to a log file space. */
|
||||
static
|
||||
|
@ -1205,12 +1156,15 @@ by the transaction. If there is a flush running, it waits and checks if the
|
|||
flush flushed enough. If not, starts a new flush. */
|
||||
|
||||
void
|
||||
log_flush_up_to(
|
||||
log_write_up_to(
|
||||
/*============*/
|
||||
dulint lsn, /* in: log sequence number up to which the log should
|
||||
be written, ut_dulint_max if not specified */
|
||||
ulint wait) /* in: LOG_NO_WAIT, LOG_WAIT_ONE_GROUP,
|
||||
ulint wait, /* in: LOG_NO_WAIT, LOG_WAIT_ONE_GROUP,
|
||||
or LOG_WAIT_ALL_GROUPS */
|
||||
ibool flush_to_disk)
|
||||
/* in: TRUE if we want the written log also to be
|
||||
flushed to disk */
|
||||
{
|
||||
log_group_t* group;
|
||||
ulint start_offset;
|
||||
|
@ -1239,9 +1193,18 @@ loop:
|
|||
|
||||
mutex_enter(&(log_sys->mutex));
|
||||
|
||||
if ((ut_dulint_cmp(log_sys->written_to_all_lsn, lsn) >= 0)
|
||||
|| ((ut_dulint_cmp(log_sys->written_to_some_lsn, lsn) >= 0)
|
||||
&& (wait != LOG_WAIT_ALL_GROUPS))) {
|
||||
if (flush_to_disk
|
||||
&& ut_dulint_cmp(log_sys->flushed_to_disk_lsn, lsn) >= 0) {
|
||||
|
||||
mutex_exit(&(log_sys->mutex));
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
if (!flush_to_disk
|
||||
&& (ut_dulint_cmp(log_sys->written_to_all_lsn, lsn) >= 0
|
||||
|| (ut_dulint_cmp(log_sys->written_to_some_lsn, lsn) >= 0
|
||||
&& wait != LOG_WAIT_ALL_GROUPS))) {
|
||||
|
||||
mutex_exit(&(log_sys->mutex));
|
||||
|
||||
|
@ -1249,10 +1212,19 @@ loop:
|
|||
}
|
||||
|
||||
if (log_sys->n_pending_writes > 0) {
|
||||
/* A flush is running */
|
||||
/* A write (+ possibly flush to disk) is running */
|
||||
|
||||
if (ut_dulint_cmp(log_sys->flush_lsn, lsn) >= 0) {
|
||||
/* The flush will flush enough: wait for it to
|
||||
if (flush_to_disk
|
||||
&& ut_dulint_cmp(log_sys->current_flush_lsn, lsn) >= 0) {
|
||||
/* The write + flush will write enough: wait for it to
|
||||
complete */
|
||||
|
||||
goto do_waits;
|
||||
}
|
||||
|
||||
if (!flush_to_disk
|
||||
&& ut_dulint_cmp(log_sys->write_lsn, lsn) >= 0) {
|
||||
/* The write will write enough: wait for it to
|
||||
complete */
|
||||
|
||||
goto do_waits;
|
||||
|
@ -1260,16 +1232,17 @@ loop:
|
|||
|
||||
mutex_exit(&(log_sys->mutex));
|
||||
|
||||
/* Wait for the flush to complete and try to start a new
|
||||
flush */
|
||||
/* Wait for the write to complete and try to start a new
|
||||
write */
|
||||
|
||||
os_event_wait(log_sys->no_flush_event);
|
||||
|
||||
goto loop;
|
||||
}
|
||||
|
||||
if (log_sys->buf_free == log_sys->buf_next_to_write) {
|
||||
/* Nothing to flush */
|
||||
if (!flush_to_disk
|
||||
&& log_sys->buf_free == log_sys->buf_next_to_write) {
|
||||
/* Nothing to write and no flush to disk requested */
|
||||
|
||||
mutex_exit(&(log_sys->mutex));
|
||||
|
||||
|
@ -1277,7 +1250,7 @@ loop:
|
|||
}
|
||||
|
||||
if (log_debug_writes) {
|
||||
printf("Flushing log from %lu %lu up to lsn %lu %lu\n",
|
||||
printf("Writing log from %lu %lu up to lsn %lu %lu\n",
|
||||
ut_dulint_get_high(log_sys->written_to_all_lsn),
|
||||
ut_dulint_get_low(log_sys->written_to_all_lsn),
|
||||
ut_dulint_get_high(log_sys->lsn),
|
||||
|
@ -1301,7 +1274,12 @@ loop:
|
|||
|
||||
ut_ad(area_end - area_start > 0);
|
||||
|
||||
log_sys->flush_lsn = log_sys->lsn;
|
||||
log_sys->write_lsn = log_sys->lsn;
|
||||
|
||||
if (flush_to_disk) {
|
||||
log_sys->current_flush_lsn = log_sys->lsn;
|
||||
}
|
||||
|
||||
log_sys->one_flushed = FALSE;
|
||||
|
||||
log_block_set_flush_bit(log_sys->buf + area_start, TRUE);
|
||||
|
@ -1318,10 +1296,12 @@ loop:
|
|||
OS_FILE_LOG_BLOCK_SIZE);
|
||||
|
||||
log_sys->buf_free += OS_FILE_LOG_BLOCK_SIZE;
|
||||
log_sys->flush_end_offset = log_sys->buf_free;
|
||||
log_sys->write_end_offset = log_sys->buf_free;
|
||||
|
||||
group = UT_LIST_GET_FIRST(log_sys->log_groups);
|
||||
|
||||
/* Do the write to the log files */
|
||||
|
||||
while (group) {
|
||||
log_group_write_buf(LOG_FLUSH, group,
|
||||
log_sys->buf + area_start,
|
||||
|
@ -1330,20 +1310,25 @@ loop:
|
|||
OS_FILE_LOG_BLOCK_SIZE),
|
||||
start_offset - area_start);
|
||||
|
||||
log_group_set_fields(group, log_sys->flush_lsn);
|
||||
log_group_set_fields(group, log_sys->write_lsn);
|
||||
|
||||
group = UT_LIST_GET_NEXT(log_groups, group);
|
||||
}
|
||||
|
||||
mutex_exit(&(log_sys->mutex));
|
||||
|
||||
if (srv_unix_file_flush_method != SRV_UNIX_O_DSYNC
|
||||
&& srv_unix_file_flush_method != SRV_UNIX_NOSYNC
|
||||
&& srv_flush_log_at_trx_commit != 2) {
|
||||
if (srv_unix_file_flush_method == SRV_UNIX_O_DSYNC) {
|
||||
/* O_DSYNC means the OS did not buffer the log file at all:
|
||||
so we have also flushed to disk what we have written */
|
||||
|
||||
log_sys->flushed_to_disk_lsn = log_sys->write_lsn;
|
||||
|
||||
} else if (flush_to_disk) {
|
||||
|
||||
group = UT_LIST_GET_FIRST(log_sys->log_groups);
|
||||
|
||||
fil_flush(group->space_id);
|
||||
log_sys->flushed_to_disk_lsn = log_sys->write_lsn;
|
||||
}
|
||||
|
||||
mutex_enter(&(log_sys->mutex));
|
||||
|
@ -1403,7 +1388,7 @@ log_flush_margin(void)
|
|||
mutex_exit(&(log->mutex));
|
||||
|
||||
if (do_flush) {
|
||||
log_flush_up_to(ut_dulint_max, LOG_NO_WAIT);
|
||||
log_write_up_to(ut_dulint_max, LOG_NO_WAIT, FALSE);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1555,7 +1540,8 @@ log_group_checkpoint(
|
|||
buf = group->checkpoint_buf;
|
||||
|
||||
mach_write_to_8(buf + LOG_CHECKPOINT_NO, log_sys->next_checkpoint_no);
|
||||
mach_write_to_8(buf + LOG_CHECKPOINT_LSN, log_sys->next_checkpoint_lsn);
|
||||
mach_write_to_8(buf + LOG_CHECKPOINT_LSN,
|
||||
log_sys->next_checkpoint_lsn);
|
||||
|
||||
mach_write_to_4(buf + LOG_CHECKPOINT_OFFSET,
|
||||
log_group_calc_lsn_offset(
|
||||
|
@ -1664,8 +1650,10 @@ log_reset_first_header_and_checkpoint(
|
|||
lsn = ut_dulint_add(start, LOG_BLOCK_HDR_SIZE);
|
||||
|
||||
/* Write the label of ibbackup --restore */
|
||||
sprintf((char*) hdr_buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP, "ibbackup ");
|
||||
ut_sprintf_timestamp((char*) hdr_buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP
|
||||
sprintf((char*) hdr_buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP,
|
||||
"ibbackup ");
|
||||
ut_sprintf_timestamp(
|
||||
(char*) hdr_buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP
|
||||
+ strlen("ibbackup "));
|
||||
buf = hdr_buf + LOG_CHECKPOINT_1;
|
||||
|
||||
|
@ -1773,7 +1761,7 @@ log_checkpoint(
|
|||
write-ahead-logging algorithm ensures that the log has been flushed
|
||||
up to oldest_lsn. */
|
||||
|
||||
log_flush_up_to(oldest_lsn, LOG_WAIT_ALL_GROUPS);
|
||||
log_write_up_to(oldest_lsn, LOG_WAIT_ALL_GROUPS, TRUE);
|
||||
|
||||
mutex_enter(&(log_sys->mutex));
|
||||
|
||||
|
@ -2466,7 +2454,7 @@ loop:
|
|||
|
||||
mutex_exit(&(log_sys->mutex));
|
||||
|
||||
log_flush_up_to(limit_lsn, LOG_WAIT_ALL_GROUPS);
|
||||
log_write_up_to(limit_lsn, LOG_WAIT_ALL_GROUPS, TRUE);
|
||||
|
||||
calc_new_limit = FALSE;
|
||||
|
||||
|
@ -3104,8 +3092,8 @@ log_print(
|
|||
"Last checkpoint at %lu %lu\n",
|
||||
ut_dulint_get_high(log_sys->lsn),
|
||||
ut_dulint_get_low(log_sys->lsn),
|
||||
ut_dulint_get_high(log_sys->written_to_some_lsn),
|
||||
ut_dulint_get_low(log_sys->written_to_some_lsn),
|
||||
ut_dulint_get_high(log_sys->flushed_to_disk_lsn),
|
||||
ut_dulint_get_low(log_sys->flushed_to_disk_lsn),
|
||||
ut_dulint_get_high(log_sys->last_checkpoint_lsn),
|
||||
ut_dulint_get_low(log_sys->last_checkpoint_lsn));
|
||||
|
||||
|
|
|
@ -521,10 +521,11 @@ try_again:
|
|||
}
|
||||
#endif
|
||||
#ifdef UNIV_NON_BUFFERED_IO
|
||||
if (type == OS_LOG_FILE && srv_flush_log_at_trx_commit == 2) {
|
||||
if (type == OS_LOG_FILE) {
|
||||
/* Do not use unbuffered i/o to log files because
|
||||
value 2 denotes that we do not flush the log at every
|
||||
commit, but only once per second */
|
||||
to allow group commit to work when MySQL binlogging
|
||||
is used we must separate log file write and log
|
||||
file flush to disk. */
|
||||
} else {
|
||||
if (srv_win_file_flush_method ==
|
||||
SRV_WIN_IO_UNBUFFERED) {
|
||||
|
|
|
@ -1664,7 +1664,7 @@ row_drop_table_for_mysql_in_background(
|
|||
the InnoDB data dictionary get out-of-sync if the user runs
|
||||
with innodb_flush_log_at_trx_commit = 0 */
|
||||
|
||||
log_flush_up_to(ut_dulint_max, LOG_WAIT_ONE_GROUP);
|
||||
log_write_up_to(ut_dulint_max, LOG_WAIT_ONE_GROUP, TRUE);
|
||||
|
||||
trx_commit_for_mysql(trx);
|
||||
|
||||
|
|
|
@ -2812,8 +2812,7 @@ loop:
|
|||
at transaction commit */
|
||||
|
||||
srv_main_thread_op_info = (char*)"flushing log";
|
||||
log_flush_up_to(ut_dulint_max, LOG_WAIT_ONE_GROUP);
|
||||
log_flush_to_disk();
|
||||
log_write_up_to(ut_dulint_max, LOG_WAIT_ONE_GROUP, TRUE);
|
||||
|
||||
/* If there were less than 10 i/os during the
|
||||
one second sleep, we assume that there is free
|
||||
|
@ -2831,8 +2830,8 @@ loop:
|
|||
|
||||
srv_main_thread_op_info =
|
||||
(char*)"flushing log";
|
||||
log_flush_up_to(ut_dulint_max, LOG_WAIT_ONE_GROUP);
|
||||
log_flush_to_disk();
|
||||
log_write_up_to(ut_dulint_max, LOG_WAIT_ONE_GROUP,
|
||||
TRUE);
|
||||
}
|
||||
|
||||
if (srv_activity_count == old_activity_count) {
|
||||
|
@ -2867,8 +2866,7 @@ loop:
|
|||
buf_flush_batch(BUF_FLUSH_LIST, 100, ut_dulint_max);
|
||||
|
||||
srv_main_thread_op_info = (char*) "flushing log";
|
||||
log_flush_up_to(ut_dulint_max, LOG_WAIT_ONE_GROUP);
|
||||
log_flush_to_disk();
|
||||
log_write_up_to(ut_dulint_max, LOG_WAIT_ONE_GROUP, TRUE);
|
||||
}
|
||||
|
||||
/* We run a batch of insert buffer merge every 10 seconds,
|
||||
|
@ -2878,8 +2876,7 @@ loop:
|
|||
ibuf_contract_for_n_pages(TRUE, 5);
|
||||
|
||||
srv_main_thread_op_info = (char*)"flushing log";
|
||||
log_flush_up_to(ut_dulint_max, LOG_WAIT_ONE_GROUP);
|
||||
log_flush_to_disk();
|
||||
log_write_up_to(ut_dulint_max, LOG_WAIT_ONE_GROUP, TRUE);
|
||||
|
||||
/* We run a full purge every 10 seconds, even if the server
|
||||
were active */
|
||||
|
@ -2903,8 +2900,8 @@ loop:
|
|||
if (difftime(current_time, last_flush_time) > 1) {
|
||||
srv_main_thread_op_info = (char*) "flushing log";
|
||||
|
||||
log_flush_up_to(ut_dulint_max, LOG_WAIT_ONE_GROUP);
|
||||
log_flush_to_disk();
|
||||
log_write_up_to(ut_dulint_max, LOG_WAIT_ONE_GROUP,
|
||||
TRUE);
|
||||
last_flush_time = current_time;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -89,6 +89,8 @@ trx_create(
|
|||
trx->check_foreigns = TRUE;
|
||||
trx->check_unique_secondary = TRUE;
|
||||
|
||||
trx->flush_log_later = FALSE;
|
||||
|
||||
trx->dict_operation = FALSE;
|
||||
|
||||
trx->mysql_thd = NULL;
|
||||
|
@ -780,13 +782,26 @@ trx_commit_off_kernel(
|
|||
|
||||
/*-------------------------------------*/
|
||||
|
||||
/* Most MySQL users run with srv_flush_.. set to FALSE: */
|
||||
/* Most MySQL users run with srv_flush_.. set to 0: */
|
||||
|
||||
if (srv_flush_log_at_trx_commit) {
|
||||
|
||||
log_flush_up_to(lsn, LOG_WAIT_ONE_GROUP);
|
||||
if (srv_flush_log_at_trx_commit != 0) {
|
||||
if (srv_unix_file_flush_method != SRV_UNIX_NOSYNC
|
||||
&& srv_flush_log_at_trx_commit != 2
|
||||
&& !trx->flush_log_later) {
|
||||
|
||||
/* Write the log to the log files AND flush
|
||||
them to disk */
|
||||
|
||||
log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE);
|
||||
} else {
|
||||
/* Write the log but do not flush it to disk */
|
||||
|
||||
log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE);
|
||||
}
|
||||
}
|
||||
|
||||
trx->commit_lsn = lsn;
|
||||
|
||||
/*-------------------------------------*/
|
||||
|
||||
mutex_enter(&kernel_mutex);
|
||||
|
@ -1467,6 +1482,31 @@ trx_commit_for_mysql(
|
|||
return(0);
|
||||
}
|
||||
|
||||
/**************************************************************************
|
||||
If required, flushes the log to disk if we called trx_commit_for_mysql()
|
||||
with trx->flush_log_later == TRUE. */
|
||||
|
||||
ulint
|
||||
trx_commit_complete_for_mysql(
|
||||
/*==========================*/
|
||||
/* out: 0 or error number */
|
||||
trx_t* trx) /* in: trx handle */
|
||||
{
|
||||
ut_a(trx);
|
||||
|
||||
if (srv_flush_log_at_trx_commit == 1
|
||||
&& srv_unix_file_flush_method != SRV_UNIX_NOSYNC) {
|
||||
|
||||
trx->op_info = (char *) "flushing log";
|
||||
|
||||
/* Flush the log files to disk */
|
||||
|
||||
log_write_up_to(trx->commit_lsn, LOG_WAIT_ONE_GROUP, TRUE);
|
||||
|
||||
trx->op_info = (char *) "";
|
||||
}
|
||||
}
|
||||
|
||||
/**************************************************************************
|
||||
Marks the latest SQL statement ended. */
|
||||
|
||||
|
|
|
@ -872,8 +872,7 @@ innobase_flush_logs(void)
|
|||
|
||||
DBUG_ENTER("innobase_flush_logs");
|
||||
|
||||
log_flush_up_to(ut_dulint_max, LOG_WAIT_ONE_GROUP);
|
||||
log_flush_to_disk();
|
||||
log_write_up_to(ut_dulint_max, LOG_WAIT_ONE_GROUP, TRUE);
|
||||
|
||||
DBUG_RETURN(result);
|
||||
}
|
||||
|
@ -920,7 +919,7 @@ Commits a transaction in an InnoDB database. */
|
|||
int
|
||||
innobase_commit(
|
||||
/*============*/
|
||||
/* out: 0 or error number */
|
||||
/* out: 0 */
|
||||
THD* thd, /* in: MySQL thread handle of the user for whom
|
||||
the transaction should be committed */
|
||||
void* trx_handle)/* in: InnoDB trx handle or
|
||||
|
@ -928,7 +927,6 @@ innobase_commit(
|
|||
that the current SQL statement ended, and we should
|
||||
mark the start of a new statement with a savepoint */
|
||||
{
|
||||
int error = 0;
|
||||
trx_t* trx;
|
||||
|
||||
DBUG_ENTER("innobase_commit");
|
||||
|
@ -955,29 +953,27 @@ innobase_commit(
|
|||
innobase_release_stat_resources(trx);
|
||||
trx_mark_sql_stat_end(trx);
|
||||
|
||||
#ifndef DBUG_OFF
|
||||
if (error) {
|
||||
DBUG_PRINT("error", ("error: %d", error));
|
||||
}
|
||||
#endif
|
||||
/* Tell InnoDB server that there might be work for
|
||||
utility threads: */
|
||||
|
||||
srv_active_wake_master_thread();
|
||||
|
||||
DBUG_RETURN(error);
|
||||
DBUG_RETURN(0);
|
||||
}
|
||||
|
||||
/*********************************************************************
|
||||
This is called when MySQL writes the binlog entry for the current
|
||||
transaction. Writes to the InnoDB tablespace info which tells where the
|
||||
MySQL binlog entry for the current transaction ended. Also commits the
|
||||
transaction inside InnoDB. */
|
||||
transaction inside InnoDB but does NOT flush InnoDB log files to disk.
|
||||
To flush you have to call innobase_flush_log_to_disk. We have separated
|
||||
flushing to eliminate the bottleneck of LOCK_log in log.cc which disabled
|
||||
InnoDB's group commit capability. */
|
||||
|
||||
int
|
||||
innobase_report_binlog_offset_and_commit(
|
||||
/*=====================================*/
|
||||
/* out: 0 or error code */
|
||||
/* out: 0 */
|
||||
THD* thd, /* in: user thread */
|
||||
void* trx_handle, /* in: InnoDB trx handle */
|
||||
char* log_file_name, /* in: latest binlog file name */
|
||||
|
@ -993,7 +989,39 @@ innobase_report_binlog_offset_and_commit(
|
|||
trx->mysql_log_file_name = log_file_name;
|
||||
trx->mysql_log_offset = (ib_longlong)end_offset;
|
||||
|
||||
return(innobase_commit(thd, trx_handle));
|
||||
trx->flush_log_later = TRUE;
|
||||
|
||||
innobase_commit(thd, trx_handle);
|
||||
|
||||
trx->flush_log_later = FALSE;
|
||||
|
||||
return(0);
|
||||
}
|
||||
|
||||
/*********************************************************************
|
||||
This is called after MySQL has written the binlog entry for the current
|
||||
transaction. Flushes the InnoDB log files to disk if required. */
|
||||
|
||||
int
|
||||
innobase_commit_complete(
|
||||
/*=====================*/
|
||||
/* out: 0 */
|
||||
void* trx_handle) /* in: InnoDB trx handle */
|
||||
{
|
||||
trx_t* trx;
|
||||
|
||||
if (srv_flush_log_at_trx_commit == 0) {
|
||||
|
||||
return(0);
|
||||
}
|
||||
|
||||
trx = (trx_t*)trx_handle;
|
||||
|
||||
ut_a(trx != NULL);
|
||||
|
||||
trx_commit_complete_for_mysql(trx);
|
||||
|
||||
return(0);
|
||||
}
|
||||
|
||||
/*********************************************************************
|
||||
|
@ -3202,7 +3230,7 @@ ha_innobase::create(
|
|||
the InnoDB data dictionary get out-of-sync if the user runs
|
||||
with innodb_flush_log_at_trx_commit = 0 */
|
||||
|
||||
log_flush_up_to(ut_dulint_max, LOG_WAIT_ONE_GROUP);
|
||||
log_write_up_to(ut_dulint_max, LOG_WAIT_ONE_GROUP, TRUE);
|
||||
|
||||
innobase_table = dict_table_get(norm_name, NULL);
|
||||
|
||||
|
@ -3277,7 +3305,7 @@ ha_innobase::delete_table(
|
|||
the InnoDB data dictionary get out-of-sync if the user runs
|
||||
with innodb_flush_log_at_trx_commit = 0 */
|
||||
|
||||
log_flush_up_to(ut_dulint_max, LOG_WAIT_ONE_GROUP);
|
||||
log_write_up_to(ut_dulint_max, LOG_WAIT_ONE_GROUP, TRUE);
|
||||
|
||||
/* Tell the InnoDB server that there might be work for
|
||||
utility threads: */
|
||||
|
@ -3347,7 +3375,7 @@ innobase_drop_database(
|
|||
the InnoDB data dictionary get out-of-sync if the user runs
|
||||
with innodb_flush_log_at_trx_commit = 0 */
|
||||
|
||||
log_flush_up_to(ut_dulint_max, LOG_WAIT_ONE_GROUP);
|
||||
log_write_up_to(ut_dulint_max, LOG_WAIT_ONE_GROUP, TRUE);
|
||||
|
||||
/* Tell the InnoDB server that there might be work for
|
||||
utility threads: */
|
||||
|
@ -3419,7 +3447,7 @@ ha_innobase::rename_table(
|
|||
the InnoDB data dictionary get out-of-sync if the user runs
|
||||
with innodb_flush_log_at_trx_commit = 0 */
|
||||
|
||||
log_flush_up_to(ut_dulint_max, LOG_WAIT_ONE_GROUP);
|
||||
log_write_up_to(ut_dulint_max, LOG_WAIT_ONE_GROUP, TRUE);
|
||||
|
||||
/* Tell the InnoDB server that there might be work for
|
||||
utility threads: */
|
||||
|
@ -3936,7 +3964,7 @@ ha_innobase::extra(
|
|||
case HA_EXTRA_RESET:
|
||||
case HA_EXTRA_RESET_STATE:
|
||||
prebuilt->read_just_key = 0;
|
||||
break;
|
||||
break;
|
||||
case HA_EXTRA_NO_KEYREAD:
|
||||
prebuilt->read_just_key = 0;
|
||||
break;
|
||||
|
|
|
@ -211,6 +211,8 @@ int innobase_report_binlog_offset_and_commit(
|
|||
void* trx_handle,
|
||||
char* log_file_name,
|
||||
my_off_t end_offset);
|
||||
int innobase_commit_complete(
|
||||
void* trx_handle);
|
||||
int innobase_rollback(THD *thd, void* trx_handle);
|
||||
int innobase_close_connection(THD *thd);
|
||||
int innobase_drop_database(char *path);
|
||||
|
|
|
@ -243,6 +243,9 @@ int ha_autocommit_or_rollback(THD *thd, int error)
|
|||
replication. This function also calls the commit of the table
|
||||
handler, because the order of transactions in the log of the table
|
||||
handler must be the same as in the binlog.
|
||||
NOTE that to eliminate the bottleneck of the group commit, we do not
|
||||
flush the handler log files here, but only later in a call of
|
||||
ha_commit_complete().
|
||||
|
||||
arguments:
|
||||
thd: the thread handle of the current connection
|
||||
|
@ -269,12 +272,37 @@ int ha_report_binlog_offset_and_commit(THD *thd,
|
|||
my_error(ER_ERROR_DURING_COMMIT, MYF(0), error);
|
||||
error=1;
|
||||
}
|
||||
trans->innodb_active_trans=0;
|
||||
}
|
||||
#endif
|
||||
return error;
|
||||
}
|
||||
|
||||
/*
|
||||
Flushes the handler log files (if my.cnf settings do not free us from it)
|
||||
after we have called ha_report_binlog_offset_and_commit(). To eliminate
|
||||
the bottleneck from the group commit, this should be called when
|
||||
LOCK_log has been released in log.cc.
|
||||
|
||||
arguments:
|
||||
thd: the thread handle of the current connection
|
||||
return value: always 0
|
||||
*/
|
||||
|
||||
int ha_commit_complete(THD *thd)
|
||||
{
|
||||
#ifdef HAVE_INNOBASE_DB
|
||||
THD_TRANS *trans;
|
||||
trans = &thd->transaction.all;
|
||||
if (trans->innobase_tid)
|
||||
{
|
||||
innobase_commit_complete(trans->innobase_tid);
|
||||
|
||||
trans->innodb_active_trans=0;
|
||||
}
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
This function should be called when MySQL sends rows of a SELECT result set
|
||||
or the EOF mark to the client. It releases a possible adaptive hash index
|
||||
|
|
|
@ -372,6 +372,7 @@ void ha_resize_key_cache(void);
|
|||
int ha_start_stmt(THD *thd);
|
||||
int ha_report_binlog_offset_and_commit(THD *thd, char *log_file_name,
|
||||
my_off_t end_offset);
|
||||
int ha_commit_complete(THD *thd);
|
||||
int ha_release_temporary_latches(THD *thd);
|
||||
int ha_commit_trans(THD *thd, THD_TRANS *trans);
|
||||
int ha_rollback_trans(THD *thd, THD_TRANS *trans);
|
||||
|
|
20
sql/log.cc
20
sql/log.cc
|
@ -1033,6 +1033,8 @@ bool MYSQL_LOG::write(THD *thd,enum enum_server_command command,
|
|||
|
||||
bool MYSQL_LOG::write(Log_event* event_info)
|
||||
{
|
||||
THD *thd=event_info->thd;
|
||||
bool called_handler_commit=0;
|
||||
bool error=0;
|
||||
DBUG_ENTER("MYSQL_LOG::write(event)");
|
||||
|
||||
|
@ -1047,7 +1049,6 @@ bool MYSQL_LOG::write(Log_event* event_info)
|
|||
if (is_open())
|
||||
{
|
||||
bool should_rotate = 0;
|
||||
THD *thd=event_info->thd;
|
||||
const char *local_db = event_info->get_db();
|
||||
#ifdef USING_TRANSACTIONS
|
||||
IO_CACHE *file = ((event_info->get_cache_stmt()) ?
|
||||
|
@ -1147,6 +1148,7 @@ bool MYSQL_LOG::write(Log_event* event_info)
|
|||
{
|
||||
error = ha_report_binlog_offset_and_commit(thd, log_file_name,
|
||||
file->pos_in_file);
|
||||
called_handler_commit=1;
|
||||
}
|
||||
|
||||
should_rotate= (my_b_tell(file) >= (my_off_t) max_binlog_size);
|
||||
|
@ -1172,6 +1174,15 @@ err:
|
|||
}
|
||||
|
||||
pthread_mutex_unlock(&LOCK_log);
|
||||
|
||||
/* Flush the transactional handler log file now that we have released
|
||||
LOCK_log; the flush is placed here to eliminate the bottleneck on the
|
||||
group commit */
|
||||
|
||||
if (called_handler_commit) {
|
||||
ha_commit_complete(thd);
|
||||
}
|
||||
|
||||
DBUG_RETURN(error);
|
||||
}
|
||||
|
||||
|
@ -1277,6 +1288,13 @@ bool MYSQL_LOG::write(THD *thd, IO_CACHE *cache)
|
|||
|
||||
}
|
||||
VOID(pthread_mutex_unlock(&LOCK_log));
|
||||
|
||||
/* Flush the transactional handler log file now that we have released
|
||||
LOCK_log; the flush is placed here to eliminate the bottleneck on the
|
||||
group commit */
|
||||
|
||||
ha_commit_complete(thd);
|
||||
|
||||
DBUG_RETURN(0);
|
||||
|
||||
err:
|
||||
|
|
Loading…
Reference in a new issue