Maria - various fixes around durability of files:

1) on Mac OS X >=10.3, fcntl() is recommended over fsync (from the
man page: "[With fsync()] the disk drive may also re-order the data
so that later writes may be present while earlier writes are not.
Applications such as databases that require a strict ordering of writes
should use F_FULLFSYNC to ensure their data is written in the order
they expect"). I have seen two other pieces of software changing from
fsync to F_FULLFSYNC on Mac OS X.
2) to make a file creation/deletion/renaming durable on Linux (at least
ext2 as I have tested) (see "man fsync"), a fsync() on the directory
is needed: new functions to do that, and a flag MY_SYNC_DIR to do
it in my_create/my_delete/my_rename.
3) now using this directory syncing when creating he frm if
opt_sync_frm, and for Maria's control file when it is created.


include/my_sys.h:
  new flag to my_create/my_delete/my_rename, which asks to sync the
  directory after the operation is done (currently does nothing except
  on Linux)
libmysql/CMakeLists.txt:
  my_create() now depends on my_sync() so my_sync is needed for libmysql
libmysql/Makefile.shared:
  my_create() now depends on my_sync() so my_sync is needed for libmysql
mysys/my_create.c:
  my_create() can now sync the directory if asked for
mysys/my_delete.c:
  my_delete() can now sync the directory if asked for
mysys/my_open.c:
  it was a bug that my_close() is done on fd but a positive fd would
  still be returned, by my_register_filename().
mysys/my_rename.c:
  my_rename() can now sync the two directories (the one of "from" and
  the one of "to") if asked for.
mysys/my_sync.c:
  On recent Mac OS X, fcntl(F_FULLFSYNC) is recommended over fsync()
  (see "man fsync" on Mac OS X 10.3).
  my_sync_dir(): to sync a directory after a file creation/deletion/
  renaming; can be called directly or via MY_SYNC_DIR in my_create/
  my_delete/my_rename(). No-op except on Linux (see "man fsync" on Linux).
  my_sync_dir_from_file(): same as above, just more practical when the
  caller has a file name but no directory name ready.
  Should the #warning even be a #error? I mean do we want to release
  binaries which don't guarantee any durability?
sql/log.cc:
  a TODO for the future.
sql/unireg.cc:
  If we sync the frm it makes sense to also sync its creation in the
  directory.
storage/maria/ma_control_file.c:
  control file is vital, try to make it to disk
This commit is contained in:
unknown 2006-11-21 22:22:59 +01:00
parent 3becab22e9
commit a41ac15b96
11 changed files with 104 additions and 12 deletions

View file

@ -55,6 +55,7 @@ extern int NEAR my_errno; /* Last error in mysys */
#define MY_WME 16 /* Write message on error */
#define MY_WAIT_IF_FULL 32 /* Wait and try again if disk full error */
#define MY_IGNORE_BADFD 32 /* my_sync: ignore 'bad descriptor' errors */
#define MY_SYNC_DIR 1024 /* my_create/delete/rename: sync directory */
#define MY_RAID 64 /* Support for RAID */
#define MY_FULL_IO 512 /* For my_read - loop intil I/O is complete */
#define MY_DONT_CHECK_FILESIZE 128 /* Option to init_io_cache() */
@ -622,6 +623,8 @@ extern FILE *my_fdopen(File Filedes,const char *name, int Flags,myf MyFlags);
extern int my_fclose(FILE *fd,myf MyFlags);
extern int my_chsize(File fd,my_off_t newlength, int filler, myf MyFlags);
extern int my_sync(File fd, myf my_flags);
extern void my_sync_dir(const char *dir_name, myf my_flags);
extern void my_sync_dir_by_file(const char *file_name, myf my_flags);
extern int my_error _VARARGS((int nr,myf MyFlags, ...));
extern int my_printf_error _VARARGS((uint my_err, const char *format,
myf MyFlags, ...))

View file

@ -37,6 +37,7 @@ ADD_LIBRARY(libmysql SHARED dll.c libmysql.def
../mysys/my_open.c ../mysys/my_pread.c ../mysys/my_pthread.c ../mysys/my_read.c
../mysys/my_realloc.c ../mysys/my_rename.c ../mysys/my_seek.c
../mysys/my_static.c ../strings/my_strtoll10.c ../mysys/my_symlink.c
../mysys/my_sync.c
../mysys/my_symlink2.c ../mysys/my_thr_init.c ../sql-common/my_time.c
../strings/my_vsnprintf.c ../mysys/my_wincond.c ../mysys/my_winthread.c
../mysys/my_write.c ../sql/net_serv.cc ../sql-common/pack.c ../sql/password.c

View file

@ -68,7 +68,7 @@ mysysobjects1 = my_init.lo my_static.lo my_malloc.lo my_realloc.lo \
mf_iocache2.lo my_seek.lo my_sleep.lo \
my_pread.lo mf_cache.lo md5.lo sha1.lo \
my_getopt.lo my_gethostbyname.lo my_port.lo \
my_rename.lo my_chsize.lo
my_rename.lo my_chsize.lo my_sync.lo
sqlobjects = net.lo
sql_cmn_objects = pack.lo client.lo my_time.lo

View file

@ -53,6 +53,9 @@ File my_create(const char *FileName, int CreateFlags, int access_flags,
fd = open(FileName, access_flags);
#endif
if ((MyFlags & MY_SYNC_DIR) && (fd >=0))
my_sync_dir_by_file(FileName, MyFlags);
DBUG_RETURN(my_register_filename(fd, FileName, FILE_BY_CREATE,
EE_CANTCREATEFILE, MyFlags));
} /* my_create */

View file

@ -30,6 +30,8 @@ int my_delete(const char *name, myf MyFlags)
my_error(EE_DELETE,MYF(ME_BELL+ME_WAITTANG+(MyFlags & ME_NOINPUT)),
name,errno);
}
else if (MyFlags & MY_SYNC_DIR)
my_sync_dir_by_file(name, MyFlags);
DBUG_RETURN(err);
} /* my_delete */

View file

@ -162,6 +162,7 @@ File my_register_filename(File fd, const char *FileName, enum file_type
}
pthread_mutex_unlock(&THR_LOCK_open);
(void) my_close(fd, MyFlags);
fd= -1;
my_errno=ENOMEM;
}
else

View file

@ -61,5 +61,10 @@ int my_rename(const char *from, const char *to, myf MyFlags)
if (MyFlags & (MY_FAE+MY_WME))
my_error(EE_LINK, MYF(ME_BELL+ME_WAITTANG),from,to,my_errno);
}
else if (MyFlags & MY_SYNC_DIR)
{
my_sync_dir_by_file(from, MyFlags);
my_sync_dir_by_file(to, MyFlags);
}
DBUG_RETURN(error);
} /* my_rename */

View file

@ -49,6 +49,12 @@ int my_sync(File fd, myf my_flags)
do
{
#if defined(F_FULLFSYNC)
/* Recent Mac OS X versions insist this call is safer than fsync() */
if (!(res= fcntl(fd, F_FULLFSYNC, 0)))
break; /* ok */
/* Some fs don't support F_FULLFSYNC and fail above, fallback: */
#endif
#if defined(HAVE_FDATASYNC)
res= fdatasync(fd);
#elif defined(HAVE_FSYNC)
@ -56,6 +62,7 @@ int my_sync(File fd, myf my_flags)
#elif defined(__WIN__)
res= _commit(fd);
#else
#warning Cannot find a way to sync a file, durability in danger
res= 0; /* No sync (strange OS) */
#endif
} while (res == -1 && errno == EINTR);
@ -74,3 +81,70 @@ int my_sync(File fd, myf my_flags)
DBUG_RETURN(res);
} /* my_sync */
/*
Force directory information to disk. Only Linux is known to need this to
make sure a file creation/deletion/renaming in(from,to) this directory
durable.
SYNOPSIS
my_sync_dir()
dir_name the name of the directory
my_flags unused
RETURN
nothing (the sync may fail sometimes).
*/
void my_sync_dir(const char *dir_name, myf my_flags __attribute__((unused)))
{
#ifdef TARGET_OS_LINUX
DBUG_ENTER("my_sync_dir");
DBUG_PRINT("my",("Dir: '%s' my_flags: %d", dir_name, my_flags));
File dir_fd;
int error= 0;
/*
Syncing a dir does not work on all filesystems (e.g. tmpfs->EINVAL) :
ignore errors. But print them to the debug log.
*/
if (((dir_fd= my_open(dir_name, O_RDONLY, MYF(0))) >= 0))
{
if (my_sync(dir_fd, MYF(0)))
{
error= errno;
DBUG_PRINT("info",("my_sync failed errno: %d", error));
}
my_close(dir_fd, MYF(0));
}
else
{
error= errno;
DBUG_PRINT("info",("my_open failed errno: %d", error));
}
DBUG_VOID_RETURN;
#endif
}
/*
Force directory information to disk. Only Linux is known to need this to
make sure a file creation/deletion/renaming in(from,to) this directory
durable.
SYNOPSIS
my_sync_dir_by_file()
file_name the name of a file in the directory
my_flags unused
RETURN
nothing (the sync may fail sometimes).
*/
void my_sync_dir_by_file(const char *file_name,
myf my_flags __attribute__((unused)))
{
#ifdef TARGET_OS_LINUX
char dir_name[FN_REFLEN];
dirname_part(dir_name, file_name);
return my_sync_dir(dir_name, my_flags);
#endif
}

View file

@ -2102,6 +2102,11 @@ bool MYSQL_BIN_LOG::open_index_file(const char *index_file_name_arg,
my_seek(index_file_nr,0L,MY_SEEK_END,MYF(0)),
0, MYF(MY_WME | MY_WAIT_IF_FULL)))
{
/*
TODO: all operations creating/deleting the index file or a log, should
call my_sync_dir() or my_sync_dir_by_file() to be durable.
TODO: file creation should be done with my_create() not my_open().
*/
if (index_file_nr >= 0)
my_close(index_file_nr,MYF(0));
return TRUE;

View file

@ -285,9 +285,12 @@ bool mysql_create_frm(THD *thd, const char *file_name,
my_free((gptr) screen_buff,MYF(0));
my_free((gptr) keybuff, MYF(0));
if (opt_sync_frm && !(create_info->options & HA_LEX_CREATE_TMP_TABLE) &&
my_sync(file, MYF(MY_WME)))
goto err2;
if (opt_sync_frm && !(create_info->options & HA_LEX_CREATE_TMP_TABLE))
{
if (my_sync(file, MYF(MY_WME)))
goto err2;
my_sync_dir_by_file(file_name, MYF(0));
}
if (my_close(file,MYF(MY_WME)))
goto err3;

View file

@ -134,16 +134,11 @@ CONTROL_FILE_ERROR ma_control_file_create_or_open()
if (create_file)
{
if ((control_file_fd= my_create(name, 0, open_flags, MYF(0))) < 0)
if ((control_file_fd= my_create(name, 0,
open_flags, MYF(MY_SYNC_DIR))) < 0)
DBUG_RETURN(CONTROL_FILE_UNKNOWN_ERROR);
/*
TODO: from "man fsync" on Linux:
"fsync does not necessarily ensure that the entry in the directory
containing the file has also reached disk. For that an explicit
fsync on the file descriptor of the directory is also needed."
So if we just created the file we should sync the directory.
Maybe there should be a flag of my_create() to do this.
/*
To be safer we should make sure that there are no logs or data/index
files around (indeed it could be that the control file alone was deleted
or not restored, and we should not go on with life at this point).