mirror of
https://github.com/MariaDB/server.git
synced 2025-01-16 03:52:35 +01:00
Maria - various fixes around durability of files:
1) on Mac OS X >=10.3, fcntl() is recommended over fsync (from the man page: "[With fsync()] the disk drive may also re-order the data so that later writes may be present while earlier writes are not. Applications such as databases that require a strict ordering of writes should use F_FULLFSYNC to ensure their data is written in the order they expect"). I have seen two other pieces of software changing from fsync to F_FULLFSYNC on Mac OS X. 2) to make a file creation/deletion/renaming durable on Linux (at least ext2 as I have tested) (see "man fsync"), a fsync() on the directory is needed: new functions to do that, and a flag MY_SYNC_DIR to do it in my_create/my_delete/my_rename. 3) now using this directory syncing when creating he frm if opt_sync_frm, and for Maria's control file when it is created. include/my_sys.h: new flag to my_create/my_delete/my_rename, which asks to sync the directory after the operation is done (currently does nothing except on Linux) libmysql/CMakeLists.txt: my_create() now depends on my_sync() so my_sync is needed for libmysql libmysql/Makefile.shared: my_create() now depends on my_sync() so my_sync is needed for libmysql mysys/my_create.c: my_create() can now sync the directory if asked for mysys/my_delete.c: my_delete() can now sync the directory if asked for mysys/my_open.c: it was a bug that my_close() is done on fd but a positive fd would still be returned, by my_register_filename(). mysys/my_rename.c: my_rename() can now sync the two directories (the one of "from" and the one of "to") if asked for. mysys/my_sync.c: On recent Mac OS X, fcntl(F_FULLFSYNC) is recommended over fsync() (see "man fsync" on Mac OS X 10.3). my_sync_dir(): to sync a directory after a file creation/deletion/ renaming; can be called directly or via MY_SYNC_DIR in my_create/ my_delete/my_rename(). No-op except on Linux (see "man fsync" on Linux). my_sync_dir_from_file(): same as above, just more practical when the caller has a file name but no directory name ready. Should the #warning even be a #error? I mean do we want to release binaries which don't guarantee any durability? sql/log.cc: a TODO for the future. sql/unireg.cc: If we sync the frm it makes sense to also sync its creation in the directory. storage/maria/ma_control_file.c: control file is vital, try to make it to disk
This commit is contained in:
parent
3becab22e9
commit
a41ac15b96
11 changed files with 104 additions and 12 deletions
|
@ -55,6 +55,7 @@ extern int NEAR my_errno; /* Last error in mysys */
|
||||||
#define MY_WME 16 /* Write message on error */
|
#define MY_WME 16 /* Write message on error */
|
||||||
#define MY_WAIT_IF_FULL 32 /* Wait and try again if disk full error */
|
#define MY_WAIT_IF_FULL 32 /* Wait and try again if disk full error */
|
||||||
#define MY_IGNORE_BADFD 32 /* my_sync: ignore 'bad descriptor' errors */
|
#define MY_IGNORE_BADFD 32 /* my_sync: ignore 'bad descriptor' errors */
|
||||||
|
#define MY_SYNC_DIR 1024 /* my_create/delete/rename: sync directory */
|
||||||
#define MY_RAID 64 /* Support for RAID */
|
#define MY_RAID 64 /* Support for RAID */
|
||||||
#define MY_FULL_IO 512 /* For my_read - loop intil I/O is complete */
|
#define MY_FULL_IO 512 /* For my_read - loop intil I/O is complete */
|
||||||
#define MY_DONT_CHECK_FILESIZE 128 /* Option to init_io_cache() */
|
#define MY_DONT_CHECK_FILESIZE 128 /* Option to init_io_cache() */
|
||||||
|
@ -622,6 +623,8 @@ extern FILE *my_fdopen(File Filedes,const char *name, int Flags,myf MyFlags);
|
||||||
extern int my_fclose(FILE *fd,myf MyFlags);
|
extern int my_fclose(FILE *fd,myf MyFlags);
|
||||||
extern int my_chsize(File fd,my_off_t newlength, int filler, myf MyFlags);
|
extern int my_chsize(File fd,my_off_t newlength, int filler, myf MyFlags);
|
||||||
extern int my_sync(File fd, myf my_flags);
|
extern int my_sync(File fd, myf my_flags);
|
||||||
|
extern void my_sync_dir(const char *dir_name, myf my_flags);
|
||||||
|
extern void my_sync_dir_by_file(const char *file_name, myf my_flags);
|
||||||
extern int my_error _VARARGS((int nr,myf MyFlags, ...));
|
extern int my_error _VARARGS((int nr,myf MyFlags, ...));
|
||||||
extern int my_printf_error _VARARGS((uint my_err, const char *format,
|
extern int my_printf_error _VARARGS((uint my_err, const char *format,
|
||||||
myf MyFlags, ...))
|
myf MyFlags, ...))
|
||||||
|
|
|
@ -37,6 +37,7 @@ ADD_LIBRARY(libmysql SHARED dll.c libmysql.def
|
||||||
../mysys/my_open.c ../mysys/my_pread.c ../mysys/my_pthread.c ../mysys/my_read.c
|
../mysys/my_open.c ../mysys/my_pread.c ../mysys/my_pthread.c ../mysys/my_read.c
|
||||||
../mysys/my_realloc.c ../mysys/my_rename.c ../mysys/my_seek.c
|
../mysys/my_realloc.c ../mysys/my_rename.c ../mysys/my_seek.c
|
||||||
../mysys/my_static.c ../strings/my_strtoll10.c ../mysys/my_symlink.c
|
../mysys/my_static.c ../strings/my_strtoll10.c ../mysys/my_symlink.c
|
||||||
|
../mysys/my_sync.c
|
||||||
../mysys/my_symlink2.c ../mysys/my_thr_init.c ../sql-common/my_time.c
|
../mysys/my_symlink2.c ../mysys/my_thr_init.c ../sql-common/my_time.c
|
||||||
../strings/my_vsnprintf.c ../mysys/my_wincond.c ../mysys/my_winthread.c
|
../strings/my_vsnprintf.c ../mysys/my_wincond.c ../mysys/my_winthread.c
|
||||||
../mysys/my_write.c ../sql/net_serv.cc ../sql-common/pack.c ../sql/password.c
|
../mysys/my_write.c ../sql/net_serv.cc ../sql-common/pack.c ../sql/password.c
|
||||||
|
|
|
@ -68,7 +68,7 @@ mysysobjects1 = my_init.lo my_static.lo my_malloc.lo my_realloc.lo \
|
||||||
mf_iocache2.lo my_seek.lo my_sleep.lo \
|
mf_iocache2.lo my_seek.lo my_sleep.lo \
|
||||||
my_pread.lo mf_cache.lo md5.lo sha1.lo \
|
my_pread.lo mf_cache.lo md5.lo sha1.lo \
|
||||||
my_getopt.lo my_gethostbyname.lo my_port.lo \
|
my_getopt.lo my_gethostbyname.lo my_port.lo \
|
||||||
my_rename.lo my_chsize.lo
|
my_rename.lo my_chsize.lo my_sync.lo
|
||||||
sqlobjects = net.lo
|
sqlobjects = net.lo
|
||||||
sql_cmn_objects = pack.lo client.lo my_time.lo
|
sql_cmn_objects = pack.lo client.lo my_time.lo
|
||||||
|
|
||||||
|
|
|
@ -53,6 +53,9 @@ File my_create(const char *FileName, int CreateFlags, int access_flags,
|
||||||
fd = open(FileName, access_flags);
|
fd = open(FileName, access_flags);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
if ((MyFlags & MY_SYNC_DIR) && (fd >=0))
|
||||||
|
my_sync_dir_by_file(FileName, MyFlags);
|
||||||
|
|
||||||
DBUG_RETURN(my_register_filename(fd, FileName, FILE_BY_CREATE,
|
DBUG_RETURN(my_register_filename(fd, FileName, FILE_BY_CREATE,
|
||||||
EE_CANTCREATEFILE, MyFlags));
|
EE_CANTCREATEFILE, MyFlags));
|
||||||
} /* my_create */
|
} /* my_create */
|
||||||
|
|
|
@ -30,6 +30,8 @@ int my_delete(const char *name, myf MyFlags)
|
||||||
my_error(EE_DELETE,MYF(ME_BELL+ME_WAITTANG+(MyFlags & ME_NOINPUT)),
|
my_error(EE_DELETE,MYF(ME_BELL+ME_WAITTANG+(MyFlags & ME_NOINPUT)),
|
||||||
name,errno);
|
name,errno);
|
||||||
}
|
}
|
||||||
|
else if (MyFlags & MY_SYNC_DIR)
|
||||||
|
my_sync_dir_by_file(name, MyFlags);
|
||||||
DBUG_RETURN(err);
|
DBUG_RETURN(err);
|
||||||
} /* my_delete */
|
} /* my_delete */
|
||||||
|
|
||||||
|
|
|
@ -162,6 +162,7 @@ File my_register_filename(File fd, const char *FileName, enum file_type
|
||||||
}
|
}
|
||||||
pthread_mutex_unlock(&THR_LOCK_open);
|
pthread_mutex_unlock(&THR_LOCK_open);
|
||||||
(void) my_close(fd, MyFlags);
|
(void) my_close(fd, MyFlags);
|
||||||
|
fd= -1;
|
||||||
my_errno=ENOMEM;
|
my_errno=ENOMEM;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
|
|
@ -61,5 +61,10 @@ int my_rename(const char *from, const char *to, myf MyFlags)
|
||||||
if (MyFlags & (MY_FAE+MY_WME))
|
if (MyFlags & (MY_FAE+MY_WME))
|
||||||
my_error(EE_LINK, MYF(ME_BELL+ME_WAITTANG),from,to,my_errno);
|
my_error(EE_LINK, MYF(ME_BELL+ME_WAITTANG),from,to,my_errno);
|
||||||
}
|
}
|
||||||
|
else if (MyFlags & MY_SYNC_DIR)
|
||||||
|
{
|
||||||
|
my_sync_dir_by_file(from, MyFlags);
|
||||||
|
my_sync_dir_by_file(to, MyFlags);
|
||||||
|
}
|
||||||
DBUG_RETURN(error);
|
DBUG_RETURN(error);
|
||||||
} /* my_rename */
|
} /* my_rename */
|
||||||
|
|
|
@ -49,6 +49,12 @@ int my_sync(File fd, myf my_flags)
|
||||||
|
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
|
#if defined(F_FULLFSYNC)
|
||||||
|
/* Recent Mac OS X versions insist this call is safer than fsync() */
|
||||||
|
if (!(res= fcntl(fd, F_FULLFSYNC, 0)))
|
||||||
|
break; /* ok */
|
||||||
|
/* Some fs don't support F_FULLFSYNC and fail above, fallback: */
|
||||||
|
#endif
|
||||||
#if defined(HAVE_FDATASYNC)
|
#if defined(HAVE_FDATASYNC)
|
||||||
res= fdatasync(fd);
|
res= fdatasync(fd);
|
||||||
#elif defined(HAVE_FSYNC)
|
#elif defined(HAVE_FSYNC)
|
||||||
|
@ -56,6 +62,7 @@ int my_sync(File fd, myf my_flags)
|
||||||
#elif defined(__WIN__)
|
#elif defined(__WIN__)
|
||||||
res= _commit(fd);
|
res= _commit(fd);
|
||||||
#else
|
#else
|
||||||
|
#warning Cannot find a way to sync a file, durability in danger
|
||||||
res= 0; /* No sync (strange OS) */
|
res= 0; /* No sync (strange OS) */
|
||||||
#endif
|
#endif
|
||||||
} while (res == -1 && errno == EINTR);
|
} while (res == -1 && errno == EINTR);
|
||||||
|
@ -74,3 +81,70 @@ int my_sync(File fd, myf my_flags)
|
||||||
DBUG_RETURN(res);
|
DBUG_RETURN(res);
|
||||||
} /* my_sync */
|
} /* my_sync */
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
Force directory information to disk. Only Linux is known to need this to
|
||||||
|
make sure a file creation/deletion/renaming in(from,to) this directory
|
||||||
|
durable.
|
||||||
|
|
||||||
|
SYNOPSIS
|
||||||
|
my_sync_dir()
|
||||||
|
dir_name the name of the directory
|
||||||
|
my_flags unused
|
||||||
|
|
||||||
|
RETURN
|
||||||
|
nothing (the sync may fail sometimes).
|
||||||
|
*/
|
||||||
|
void my_sync_dir(const char *dir_name, myf my_flags __attribute__((unused)))
|
||||||
|
{
|
||||||
|
#ifdef TARGET_OS_LINUX
|
||||||
|
DBUG_ENTER("my_sync_dir");
|
||||||
|
DBUG_PRINT("my",("Dir: '%s' my_flags: %d", dir_name, my_flags));
|
||||||
|
File dir_fd;
|
||||||
|
int error= 0;
|
||||||
|
/*
|
||||||
|
Syncing a dir does not work on all filesystems (e.g. tmpfs->EINVAL) :
|
||||||
|
ignore errors. But print them to the debug log.
|
||||||
|
*/
|
||||||
|
if (((dir_fd= my_open(dir_name, O_RDONLY, MYF(0))) >= 0))
|
||||||
|
{
|
||||||
|
if (my_sync(dir_fd, MYF(0)))
|
||||||
|
{
|
||||||
|
error= errno;
|
||||||
|
DBUG_PRINT("info",("my_sync failed errno: %d", error));
|
||||||
|
}
|
||||||
|
my_close(dir_fd, MYF(0));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
error= errno;
|
||||||
|
DBUG_PRINT("info",("my_open failed errno: %d", error));
|
||||||
|
}
|
||||||
|
DBUG_VOID_RETURN;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
Force directory information to disk. Only Linux is known to need this to
|
||||||
|
make sure a file creation/deletion/renaming in(from,to) this directory
|
||||||
|
durable.
|
||||||
|
|
||||||
|
SYNOPSIS
|
||||||
|
my_sync_dir_by_file()
|
||||||
|
file_name the name of a file in the directory
|
||||||
|
my_flags unused
|
||||||
|
|
||||||
|
RETURN
|
||||||
|
nothing (the sync may fail sometimes).
|
||||||
|
*/
|
||||||
|
void my_sync_dir_by_file(const char *file_name,
|
||||||
|
myf my_flags __attribute__((unused)))
|
||||||
|
{
|
||||||
|
#ifdef TARGET_OS_LINUX
|
||||||
|
char dir_name[FN_REFLEN];
|
||||||
|
dirname_part(dir_name, file_name);
|
||||||
|
return my_sync_dir(dir_name, my_flags);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
|
|
@ -2102,6 +2102,11 @@ bool MYSQL_BIN_LOG::open_index_file(const char *index_file_name_arg,
|
||||||
my_seek(index_file_nr,0L,MY_SEEK_END,MYF(0)),
|
my_seek(index_file_nr,0L,MY_SEEK_END,MYF(0)),
|
||||||
0, MYF(MY_WME | MY_WAIT_IF_FULL)))
|
0, MYF(MY_WME | MY_WAIT_IF_FULL)))
|
||||||
{
|
{
|
||||||
|
/*
|
||||||
|
TODO: all operations creating/deleting the index file or a log, should
|
||||||
|
call my_sync_dir() or my_sync_dir_by_file() to be durable.
|
||||||
|
TODO: file creation should be done with my_create() not my_open().
|
||||||
|
*/
|
||||||
if (index_file_nr >= 0)
|
if (index_file_nr >= 0)
|
||||||
my_close(index_file_nr,MYF(0));
|
my_close(index_file_nr,MYF(0));
|
||||||
return TRUE;
|
return TRUE;
|
||||||
|
|
|
@ -285,9 +285,12 @@ bool mysql_create_frm(THD *thd, const char *file_name,
|
||||||
my_free((gptr) screen_buff,MYF(0));
|
my_free((gptr) screen_buff,MYF(0));
|
||||||
my_free((gptr) keybuff, MYF(0));
|
my_free((gptr) keybuff, MYF(0));
|
||||||
|
|
||||||
if (opt_sync_frm && !(create_info->options & HA_LEX_CREATE_TMP_TABLE) &&
|
if (opt_sync_frm && !(create_info->options & HA_LEX_CREATE_TMP_TABLE))
|
||||||
my_sync(file, MYF(MY_WME)))
|
{
|
||||||
goto err2;
|
if (my_sync(file, MYF(MY_WME)))
|
||||||
|
goto err2;
|
||||||
|
my_sync_dir_by_file(file_name, MYF(0));
|
||||||
|
}
|
||||||
if (my_close(file,MYF(MY_WME)))
|
if (my_close(file,MYF(MY_WME)))
|
||||||
goto err3;
|
goto err3;
|
||||||
|
|
||||||
|
|
|
@ -134,16 +134,11 @@ CONTROL_FILE_ERROR ma_control_file_create_or_open()
|
||||||
|
|
||||||
if (create_file)
|
if (create_file)
|
||||||
{
|
{
|
||||||
if ((control_file_fd= my_create(name, 0, open_flags, MYF(0))) < 0)
|
if ((control_file_fd= my_create(name, 0,
|
||||||
|
open_flags, MYF(MY_SYNC_DIR))) < 0)
|
||||||
DBUG_RETURN(CONTROL_FILE_UNKNOWN_ERROR);
|
DBUG_RETURN(CONTROL_FILE_UNKNOWN_ERROR);
|
||||||
/*
|
|
||||||
TODO: from "man fsync" on Linux:
|
|
||||||
"fsync does not necessarily ensure that the entry in the directory
|
|
||||||
containing the file has also reached disk. For that an explicit
|
|
||||||
fsync on the file descriptor of the directory is also needed."
|
|
||||||
So if we just created the file we should sync the directory.
|
|
||||||
Maybe there should be a flag of my_create() to do this.
|
|
||||||
|
|
||||||
|
/*
|
||||||
To be safer we should make sure that there are no logs or data/index
|
To be safer we should make sure that there are no logs or data/index
|
||||||
files around (indeed it could be that the control file alone was deleted
|
files around (indeed it could be that the control file alone was deleted
|
||||||
or not restored, and we should not go on with life at this point).
|
or not restored, and we should not go on with life at this point).
|
||||||
|
|
Loading…
Reference in a new issue