mirror of
https://github.com/MariaDB/server.git
synced 2025-01-30 18:41:56 +01:00
10802c4d90
When we append data to the binlog file, we use fdatasync() to ensure the data gets to disk so that crash recovery can work. Unfortunately there seems to be a bug in ext3/ext4 on linux, so that fdatasync() does not correctly sync all data when the size of a file is increased. This causes crash recovery to not work correctly (it loses transactions from the binlog). As a work-around, use fsync() for the binlog, not fdatasync(). Since we are increasing the file size, (correct) fdatasync() will most likely not be faster than fsync() on any file system, and fsync() does work correctly on ext3/ext4. This avoids the need to try to detect if we are running on buggy ext3/ext4.
182 lines
5.2 KiB
C
182 lines
5.2 KiB
C
/*
|
|
Copyright (c) 2003, 2010, Oracle and/or its affiliates
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation; version 2 of the License.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; if not, write to the Free Software
|
|
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
|
*/
|
|
|
|
#include "mysys_priv.h"
|
|
#include "mysys_err.h"
|
|
#include <errno.h>
|
|
|
|
ulong my_sync_count; /* Count number of sync calls */
|
|
|
|
/*
|
|
Sync data in file to disk
|
|
|
|
SYNOPSIS
|
|
my_sync()
|
|
fd File descritor to sync
|
|
my_flags Flags (now only MY_WME is supported)
|
|
|
|
NOTE
|
|
If file system supports its, only file data is synced, not inode data.
|
|
|
|
MY_IGNORE_BADFD is useful when fd is "volatile" - not protected by a
|
|
mutex. In this case by the time of fsync(), fd may be already closed by
|
|
another thread, or even reassigned to a different file. With this flag -
|
|
MY_IGNORE_BADFD - such a situation will not be considered an error.
|
|
(which is correct behaviour, if we know that the other thread synced the
|
|
file before closing)
|
|
|
|
MY_SYNC_FILESIZE is useful when syncing a file after it has been extended.
|
|
On Linux, fdatasync() on ext3/ext4 file systems does not properly flush
|
|
to disk the inode data required to preserve the added data across a crash
|
|
(this looks to be a bug). But when a file is extended, inode data will most
|
|
likely need flushing in any case, so passing MY_SYNC_FILESIZE as flags
|
|
is not likely to be any slower, and will be crash safe on Linux ext3/ext4.
|
|
|
|
RETURN
|
|
0 ok
|
|
-1 error
|
|
*/
|
|
|
|
int my_sync(File fd, myf my_flags)
|
|
{
|
|
int res;
|
|
DBUG_ENTER("my_sync");
|
|
DBUG_PRINT("my",("fd: %d my_flags: %d", fd, my_flags));
|
|
|
|
if (my_disable_sync)
|
|
DBUG_RETURN(0);
|
|
|
|
statistic_increment(my_sync_count,&THR_LOCK_open);
|
|
do
|
|
{
|
|
#if defined(F_FULLFSYNC)
|
|
/*
|
|
In Mac OS X >= 10.3 this call is safer than fsync() (it forces the
|
|
disk's cache and guarantees ordered writes).
|
|
*/
|
|
if (!(res= fcntl(fd, F_FULLFSYNC, 0)))
|
|
break; /* ok */
|
|
/* Some file systems don't support F_FULLFSYNC and fail above: */
|
|
DBUG_PRINT("info",("fcntl(F_FULLFSYNC) failed, falling back"));
|
|
#endif
|
|
#if defined(HAVE_FDATASYNC) && HAVE_DECL_FDATASYNC
|
|
if (!(my_flags & MY_SYNC_FILESIZE))
|
|
res= fdatasync(fd);
|
|
else
|
|
{
|
|
#endif
|
|
#if defined(HAVE_FSYNC)
|
|
res= fsync(fd);
|
|
if (res == -1 && errno == ENOLCK)
|
|
res= 0; /* Result Bug in Old FreeBSD */
|
|
#elif defined(__WIN__)
|
|
res= my_win_fsync(fd);
|
|
#else
|
|
#error Cannot find a way to sync a file, durability in danger
|
|
res= 0; /* No sync (strange OS) */
|
|
#endif
|
|
#if defined(HAVE_FDATASYNC) && HAVE_DECL_FDATASYNC
|
|
}
|
|
#endif
|
|
} while (res == -1 && errno == EINTR);
|
|
|
|
if (res)
|
|
{
|
|
int er= errno;
|
|
if (!(my_errno= er))
|
|
my_errno= -1; /* Unknown error */
|
|
if ((my_flags & MY_IGNORE_BADFD) &&
|
|
(er == EBADF || er == EINVAL || er == EROFS))
|
|
{
|
|
DBUG_PRINT("info", ("ignoring errno %d", er));
|
|
res= 0;
|
|
}
|
|
else if (my_flags & MY_WME)
|
|
my_error(EE_SYNC, MYF(ME_BELL+ME_WAITTANG), my_filename(fd), my_errno);
|
|
}
|
|
DBUG_RETURN(res);
|
|
} /* my_sync */
|
|
|
|
|
|
static const char cur_dir_name[]= {FN_CURLIB, 0};
|
|
|
|
/*
|
|
Force directory information to disk.
|
|
|
|
SYNOPSIS
|
|
my_sync_dir()
|
|
dir_name the name of the directory
|
|
my_flags flags (MY_WME etc)
|
|
|
|
RETURN
|
|
0 if ok, !=0 if error
|
|
*/
|
|
|
|
int my_sync_dir(const char *dir_name __attribute__((unused)),
|
|
myf my_flags __attribute__((unused)))
|
|
{
|
|
#ifdef NEED_EXPLICIT_SYNC_DIR
|
|
File dir_fd;
|
|
int res= 0;
|
|
const char *correct_dir_name;
|
|
DBUG_ENTER("my_sync_dir");
|
|
DBUG_PRINT("my",("Dir: '%s' my_flags: %d", dir_name, my_flags));
|
|
/* Sometimes the path does not contain an explicit directory */
|
|
correct_dir_name= (dir_name[0] == 0) ? cur_dir_name : dir_name;
|
|
/*
|
|
Syncing a dir may give EINVAL on tmpfs on Linux, which is ok.
|
|
EIO on the other hand is very important. Hence MY_IGNORE_BADFD.
|
|
*/
|
|
if ((dir_fd= my_open(correct_dir_name, O_RDONLY, MYF(my_flags))) >= 0)
|
|
{
|
|
if (my_sync(dir_fd, MYF(my_flags | MY_IGNORE_BADFD)))
|
|
res= 2;
|
|
if (my_close(dir_fd, MYF(my_flags)))
|
|
res= 3;
|
|
}
|
|
else
|
|
res= 1;
|
|
DBUG_RETURN(res);
|
|
#else
|
|
return 0;
|
|
#endif
|
|
}
|
|
|
|
/*
|
|
Force directory information to disk.
|
|
|
|
SYNOPSIS
|
|
my_sync_dir_by_file()
|
|
file_name the name of a file in the directory
|
|
my_flags flags (MY_WME etc)
|
|
|
|
RETURN
|
|
0 if ok, !=0 if error
|
|
*/
|
|
|
|
int my_sync_dir_by_file(const char *file_name __attribute__((unused)),
|
|
myf my_flags __attribute__((unused)))
|
|
{
|
|
#ifdef NEED_EXPLICIT_SYNC_DIR
|
|
char dir_name[FN_REFLEN];
|
|
size_t dir_name_length;
|
|
dirname_part(dir_name, file_name, &dir_name_length);
|
|
return my_sync_dir(dir_name, my_flags);
|
|
#else
|
|
return 0;
|
|
#endif
|
|
}
|