mariadb/mysys/my_write.c
Brandon Nesterenko 0f5e5ced37 MDEV-37662: Binlog Corruption When tmpdir is Full
The binary log could be corrupted when committing a large transaction
(i.e. one whose data exceeds the binlog_cache_size limit and spills
into a tmp file) in binlog_format=row if the server's --tmp-dir is
full. The corruption that happens is only the GTID of the errored
transaction would be written into the binary log, without any
body/finalizing events.  This would happen because the content of the
transaction wasn't flushed at the proper time, and the transaction's
binlog cache data was not durable while trying to copy the content
from the binlog cache file into the binary log itself. While switching
the tmp file from a WRITE_CACHE to a READ_CACHE, the server would see
there is still data to flush in the cache, and first try to flush it.
This is not a valid time to flush that data to the temporary file
though, as:

  1. The GTID event has already been written directly to the binary
     log. So if this flushing fails, it leaves the binary log in a
     corrupted state.

  2. This is done during group commit, and will slow down other
     concurrent transactions, which are otherwise ready to commit.

This patch fixes these issues by ensuring all transaction data is
fully flushed to its temporary file (if used) before starting any
critical paths, i.e. in binlog_flush_cache(). Note that if the binlog
cache is solely in-memory, this flush-to-temporary-file is skipped.

Reviewed-by: Andrei Elkin <andrei.elkin@mariadb.com>
Signed-off-by: Brandon Nesterenko <brandon.nesterenko@mariadb.com>
2025-11-19 07:05:49 -07:00

121 lines
3.6 KiB
C

/* Copyright (c) 2000, 2016, Oracle and/or its affiliates.
Copyright (c) 2011, 2016, MariaDB
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */
#include "mysys_priv.h"
#include "mysys_err.h"
#include <errno.h>
/* Write a chunk of bytes to a file */
size_t my_write(File Filedes, const uchar *Buffer, size_t Count, myf MyFlags)
{
size_t writtenbytes, written;
uint errors;
DBUG_ENTER("my_write");
DBUG_PRINT("my",("fd: %d Buffer: %p Count: %lu MyFlags: %lu",
Filedes, Buffer, (ulong) Count, MyFlags));
errors= 0; written= 0;
if (!(MyFlags & (MY_WME | MY_FAE | MY_FNABP)))
MyFlags|= my_global_flags;
/* The behavior of write(fd, buf, 0) is not portable */
if (unlikely(!Count))
DBUG_RETURN(0);
for (;;)
{
#ifdef _WIN32
if(Filedes < 0)
{
my_errno= errno= EBADF;
DBUG_RETURN((size_t)-1);
}
writtenbytes= my_win_write(Filedes, Buffer, Count);
#else
writtenbytes= write(Filedes, Buffer, Count);
#endif
/**
To simulate the write error set the errno = error code
and the number pf written bytes to -1.
*/
DBUG_EXECUTE_IF ("simulate_file_write_error",
if (!errors) {
errno= ENOSPC;
writtenbytes= (size_t) -1;
MyFlags&= ~MY_WAIT_IF_FULL;
});
if (writtenbytes == Count)
break;
if (writtenbytes != (size_t) -1)
{ /* Safeguard */
written+= writtenbytes;
Buffer+= writtenbytes;
Count-= writtenbytes;
}
my_errno= errno;
DBUG_PRINT("error",("Write only %ld bytes, error: %d",
(long) writtenbytes, my_errno));
#ifndef NO_BACKGROUND
if (my_thread_var->abort)
MyFlags&= ~ MY_WAIT_IF_FULL; /* End if aborted by user */
if ((my_errno == ENOSPC || my_errno == EDQUOT) &&
(MyFlags & MY_WAIT_IF_FULL))
{
wait_for_free_space(my_filename(Filedes), errors);
errors++;
continue;
}
if ((writtenbytes == 0 || writtenbytes == (size_t) -1))
{
if (my_errno == EINTR)
{
DBUG_PRINT("debug", ("my_write() was interrupted and returned %ld",
(long) writtenbytes));
continue; /* Interrupted */
}
if (!writtenbytes && !errors++) /* Retry once */
{
/* We may come here if the file quota is exeeded */
errno= EFBIG; /* Assume this is the error */
continue;
}
}
else
continue; /* Retry */
#endif
/* Don't give a warning if it's ok that we only write part of the data */
if (MyFlags & (MY_NABP | MY_FNABP))
{
if (MyFlags & (MY_WME | MY_FAE | MY_FNABP))
{
my_error(EE_WRITE, MYF(ME_BELL | (MyFlags & (ME_NOTE | ME_ERROR_LOG))),
my_filename(Filedes),my_errno);
}
DBUG_RETURN(MY_FILE_ERROR); /* Error on read */
}
break; /* Return bytes written */
}
if (MyFlags & (MY_NABP | MY_FNABP))
DBUG_RETURN(0); /* Want only errors */
DBUG_RETURN(writtenbytes+written);
} /* my_write */