mariadb/storage/myisam/mi_locking.c
Vladislav Vaintroub 716099e07c This is the downport of
Bug#24509 - 2048 file descriptor limit on windows needs increasing, also 
WL#3049 - improved Windows I/O
                        
The patch replaces the use of the POSIX I/O interfaces in mysys on Windows with 
the Win32 API calls (CreateFile, WriteFile, etc). The Windows HANDLE for the open 
file is stored in the my_file_info struct, along with a flag for append mode 
because the Windows API does not support opening files in append mode in all cases)
The default max open files has been increased to 16384 and can be increased further
by setting --max-open-files=<value> during the server start.
                              
Another major change in this patch that almost all Windows specific file IO code
has been moved to a new file my_winfile.c, greatly reducing the amount of code 
in #ifdef blocks within mysys, thus improving readability.
                               
                                    
Minor enhancements:
- my_(f)stat() is changed to use __stati64 structure with  64 file size
and timestamps. It will return correct file size now (C runtime implementation
used to report outdated information)
- my_lock on Windows is prepared to handle additional timeout parameter
- after review : changed __WIN__ to _WIN32 in the new and changed code.

client/mysqlbinlog.cc:
  fileno -> my_fileno
client/readline.cc:
  fileno -> my_fileno
include/config-win.h:
  Increase OS_FILE_LIMIT for Windows.
  Remove O_SHARE - Windows does not support it. Its definition conflicts with
  O_SHORT_LIVED, that has different semantics.
include/my_dir.h:
  Use stat64 for stat() family of functions on Windows, because of 64 bit file size.
include/my_global.h:
  Increased default value for open file limit to 16K
include/my_sys.h:
  - my_file_info got new structure members - file handle and open flags
  - 2 new Windows-only mysys functions : my_get_osfhandle and my_osmaperr,
    modelled after Windows C runtime functions _get_osfhandle and _dosmaperr
libmysql/CMakeLists.txt:
  new files my_winfile.c and my_winerr.c
mysql-test/suite/large_tests/r/lock_tables_big.result:
  test for more then 2048 open file descriptors on Windows
mysql-test/suite/large_tests/t/lock_tables_big.test:
  test for more then 2048 open file descriptors on Windows
mysys/CMakeLists.txt:
  new files my_winfile.c and my_winerr.c
mysys/Makefile.am:
  new files my_winfile.c and my_winerr.c
mysys/default_modify.c:
  fileno -> my_fileno
mysys/my_chsize.c:
  implementation of chsize on Windows now moved to my_winfile.c
mysys/my_create.c:
  - my_sopen->my_win_open
  - close open file before removing (won't generally work on Windows otherwise)
mysys/my_file.c:
  On Windows, files returned by my_open will not start with 0, but 2048 
  (making it simple to detect incompatible mix of CRT and mysys  io functions)
mysys/my_fopen.c:
  fileno->my_win_fileno , fclose->my_win_fclose, fdopen->my_win_fdopen
  Check for legal filename is done by my_win_[f]open functions
mysys/my_fstream.c:
  fileno->my_fileno
mysys/my_lib.c:
  Windows stat() functions are moved to my_winfile.c
mysys/my_lock.c:
  Move Windows code under #ifdef to a separate function win_lock().
  Add a parameter for lock wait timeout
mysys/my_mmap.c:
   _get_osfhandle->my_get_osfhandle
mysys/my_open.c:
  my_sopen->my_win_open (simpler interface)
mysys/my_pread.c:
  moved most windows specific code to my_win_file.c
  Use my_win_pread
mysys/my_quick.c:
  Use my_win_read/my_win_write
mysys/my_read.c:
  Moved most of windows specific code to my_win_file.c
  Use my_win_read()
mysys/my_seek.c:
  On Windows, use my_win_lseek() in my_seek()/my_tell()
  Removed dead code (synchronization of lseeks)
  Improved DBUG tracing (file position is ulonglong, not ulong)
mysys/my_static.c:
  Removed array initialization. my_file_info_default is global variable
  thus it is initialized with all zeros anyway
mysys/my_sync.c:
  _commit->my_win_fsync
mysys/my_winerr.c:
  New file my_winerr.c
  Exports my_osmaperr modelled after undocumented C runtime
  function _dosmaperr(). The problem with _dosmaperr() used previously is that 
  1) it is nowhere documented and thus code relying on it is not guaranteed to work
     in subsequent releases on the C runtime
  2) it is present only in static C runtime (mysqld does not link if compiled with
    /MD)
mysys/my_winfile.c:
  New file my_winfile.c
  Implements ANSI/Posix file IO routines, when possible using native Windows IO, without
  C runtime (C runtime dropped because of the 2048 file descriptor limit).
mysys/my_write.c:
  write->my_win_write
mysys/mysys_priv.h:
  Declaration of Windows Posix functions (private to mysys, shall not be visible
  outside)
storage/innobase/handler/ha_innodb.cc:
  mysys native Windows IO : correct innodb tmp file handling
  mysql_tmpfile does not return valid CRT file descriptor, thus
  it is not possible to dup() it. Instead, the native file handle has 
  to be duplicated and then converted to CRT descriptor.
storage/myisam/mi_locking.c:
  _commit->my_sync
2009-09-11 22:26:35 +02:00

579 lines
17 KiB
C

/* Copyright (C) 2000-2006 MySQL AB
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
/*
locking of isam-tables.
reads info from a isam-table. Must be first request before doing any furter
calls to any isamfunktion. Is used to allow many process use the same
isamdatabase.
*/
#include "ftdefs.h"
/* lock table by F_UNLCK, F_RDLCK or F_WRLCK */
int mi_lock_database(MI_INFO *info, int lock_type)
{
int error;
uint count;
MYISAM_SHARE *share=info->s;
uint flag;
DBUG_ENTER("mi_lock_database");
DBUG_PRINT("enter",("lock_type: %d old lock %d r_locks: %u w_locks: %u "
"global_changed: %d open_count: %u name: '%s'",
lock_type, info->lock_type, share->r_locks,
share->w_locks,
share->global_changed, share->state.open_count,
share->index_file_name));
if (share->options & HA_OPTION_READ_ONLY_DATA ||
info->lock_type == lock_type)
DBUG_RETURN(0);
if (lock_type == F_EXTRA_LCK) /* Used by TMP tables */
{
++share->w_locks;
++share->tot_locks;
info->lock_type= lock_type;
DBUG_RETURN(0);
}
flag=error=0;
pthread_mutex_lock(&share->intern_lock);
if (share->kfile >= 0) /* May only be false on windows */
{
switch (lock_type) {
case F_UNLCK:
ftparser_call_deinitializer(info);
if (info->lock_type == F_RDLCK)
count= --share->r_locks;
else
count= --share->w_locks;
--share->tot_locks;
if (info->lock_type == F_WRLCK && !share->w_locks &&
!share->delay_key_write && flush_key_blocks(share->key_cache,
share->kfile,FLUSH_KEEP))
{
error=my_errno;
mi_print_error(info->s, HA_ERR_CRASHED);
mi_mark_crashed(info); /* Mark that table must be checked */
}
if (info->opt_flag & (READ_CACHE_USED | WRITE_CACHE_USED))
{
if (end_io_cache(&info->rec_cache))
{
error=my_errno;
mi_print_error(info->s, HA_ERR_CRASHED);
mi_mark_crashed(info);
}
}
if (!count)
{
DBUG_PRINT("info",("changed: %u w_locks: %u",
(uint) share->changed, share->w_locks));
if (share->changed && !share->w_locks)
{
#ifdef HAVE_MMAP
if ((info->s->mmaped_length != info->s->state.state.data_file_length) &&
(info->s->nonmmaped_inserts > MAX_NONMAPPED_INSERTS))
{
if (info->s->concurrent_insert)
rw_wrlock(&info->s->mmap_lock);
mi_remap_file(info, info->s->state.state.data_file_length);
info->s->nonmmaped_inserts= 0;
if (info->s->concurrent_insert)
rw_unlock(&info->s->mmap_lock);
}
#endif
share->state.process= share->last_process=share->this_process;
share->state.unique= info->last_unique= info->this_unique;
share->state.update_count= info->last_loop= ++info->this_loop;
if (mi_state_info_write(share->kfile, &share->state, 1))
error=my_errno;
share->changed=0;
if (myisam_flush)
{
if (my_sync(share->kfile, MYF(0)))
error= my_errno;
if (my_sync(info->dfile, MYF(0)))
error= my_errno;
}
else
share->not_flushed=1;
if (error)
{
mi_print_error(info->s, HA_ERR_CRASHED);
mi_mark_crashed(info);
}
}
if (info->lock_type != F_EXTRA_LCK)
{
if (share->r_locks)
{ /* Only read locks left */
flag=1;
if (my_lock(share->kfile,F_RDLCK,0L,F_TO_EOF,
MYF(MY_WME | MY_SEEK_NOT_DONE)) && !error)
error=my_errno;
}
else if (!share->w_locks)
{ /* No more locks */
flag=1;
if (my_lock(share->kfile,F_UNLCK,0L,F_TO_EOF,
MYF(MY_WME | MY_SEEK_NOT_DONE)) && !error)
error=my_errno;
}
}
}
info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED);
info->lock_type= F_UNLCK;
break;
case F_RDLCK:
if (info->lock_type == F_WRLCK)
{
/*
Change RW to READONLY
mysqld does not turn write locks to read locks,
so we're never here in mysqld.
*/
if (share->w_locks == 1)
{
flag=1;
if (my_lock(share->kfile,lock_type,0L,F_TO_EOF,
MYF(MY_SEEK_NOT_DONE)))
{
error=my_errno;
break;
}
}
share->w_locks--;
share->r_locks++;
info->lock_type=lock_type;
break;
}
if (!share->r_locks && !share->w_locks)
{
flag=1;
if (my_lock(share->kfile,lock_type,0L,F_TO_EOF,
info->lock_wait | MY_SEEK_NOT_DONE))
{
error=my_errno;
break;
}
if (mi_state_info_read_dsk(share->kfile, &share->state, 1))
{
error=my_errno;
VOID(my_lock(share->kfile,F_UNLCK,0L,F_TO_EOF,MYF(MY_SEEK_NOT_DONE)));
my_errno=error;
break;
}
}
VOID(_mi_test_if_changed(info));
share->r_locks++;
share->tot_locks++;
info->lock_type=lock_type;
break;
case F_WRLCK:
if (info->lock_type == F_RDLCK)
{ /* Change READONLY to RW */
if (share->r_locks == 1)
{
flag=1;
if (my_lock(share->kfile,lock_type,0L,F_TO_EOF,
MYF(info->lock_wait | MY_SEEK_NOT_DONE)))
{
error=my_errno;
break;
}
share->r_locks--;
share->w_locks++;
info->lock_type=lock_type;
break;
}
}
if (!(share->options & HA_OPTION_READ_ONLY_DATA))
{
if (!share->w_locks)
{
flag=1;
if (my_lock(share->kfile,lock_type,0L,F_TO_EOF,
info->lock_wait | MY_SEEK_NOT_DONE))
{
error=my_errno;
break;
}
if (!share->r_locks)
{
if (mi_state_info_read_dsk(share->kfile, &share->state, 1))
{
error=my_errno;
VOID(my_lock(share->kfile,F_UNLCK,0L,F_TO_EOF,
info->lock_wait | MY_SEEK_NOT_DONE));
my_errno=error;
break;
}
}
}
}
VOID(_mi_test_if_changed(info));
info->lock_type=lock_type;
info->invalidator=info->s->invalidator;
share->w_locks++;
share->tot_locks++;
break;
default:
break; /* Impossible */
}
}
#ifdef _WIN32
else
{
/*
Check for bad file descriptors if this table is part
of a merge union. Failing to capture this may cause
a crash on windows if the table is renamed and
later on referenced by the merge table.
*/
if( info->owned_by_merge && (info->s)->kfile < 0 )
{
error = HA_ERR_NO_SUCH_TABLE;
}
}
#endif
pthread_mutex_unlock(&share->intern_lock);
#if defined(FULL_LOG) || defined(_lint)
lock_type|=(int) (flag << 8); /* Set bit to set if real lock */
myisam_log_command(MI_LOG_LOCK,info,(uchar*) &lock_type,sizeof(lock_type),
error);
#endif
DBUG_RETURN(error);
} /* mi_lock_database */
/****************************************************************************
The following functions are called by thr_lock() in threaded applications
****************************************************************************/
/*
Create a copy of the current status for the table
SYNOPSIS
mi_get_status()
param Pointer to Myisam handler
concurrent_insert Set to 1 if we are going to do concurrent inserts
(THR_WRITE_CONCURRENT_INSERT was used)
*/
void mi_get_status(void* param, int concurrent_insert)
{
MI_INFO *info=(MI_INFO*) param;
DBUG_ENTER("mi_get_status");
DBUG_PRINT("info",("key_file: %ld data_file: %ld concurrent_insert: %d",
(long) info->s->state.state.key_file_length,
(long) info->s->state.state.data_file_length,
concurrent_insert));
#ifndef DBUG_OFF
if (info->state->key_file_length > info->s->state.state.key_file_length ||
info->state->data_file_length > info->s->state.state.data_file_length)
DBUG_PRINT("warning",("old info: key_file: %ld data_file: %ld",
(long) info->state->key_file_length,
(long) info->state->data_file_length));
#endif
info->save_state=info->s->state.state;
info->state= &info->save_state;
info->append_insert_at_end= concurrent_insert;
if (concurrent_insert)
info->s->state.state.uncacheable= TRUE;
DBUG_VOID_RETURN;
}
void mi_update_status(void* param)
{
MI_INFO *info=(MI_INFO*) param;
/*
Because someone may have closed the table we point at, we only
update the state if its our own state. This isn't a problem as
we are always pointing at our own lock or at a read lock.
(This is enforced by thr_multi_lock.c)
*/
if (info->state == &info->save_state)
{
#ifndef DBUG_OFF
DBUG_PRINT("info",("updating status: key_file: %ld data_file: %ld",
(long) info->state->key_file_length,
(long) info->state->data_file_length));
if (info->state->key_file_length < info->s->state.state.key_file_length ||
info->state->data_file_length < info->s->state.state.data_file_length)
DBUG_PRINT("warning",("old info: key_file: %ld data_file: %ld",
(long) info->s->state.state.key_file_length,
(long) info->s->state.state.data_file_length));
#endif
info->s->state.state= *info->state;
info->state= &info->s->state.state;
}
info->append_insert_at_end= 0;
/*
We have to flush the write cache here as other threads may start
reading the table before mi_lock_database() is called
*/
if (info->opt_flag & WRITE_CACHE_USED)
{
if (end_io_cache(&info->rec_cache))
{
mi_print_error(info->s, HA_ERR_CRASHED);
mi_mark_crashed(info);
}
info->opt_flag&= ~WRITE_CACHE_USED;
}
}
void mi_restore_status(void *param)
{
MI_INFO *info= (MI_INFO*) param;
info->state= &info->s->state.state;
info->append_insert_at_end= 0;
}
void mi_copy_status(void* to,void *from)
{
((MI_INFO*) to)->state= &((MI_INFO*) from)->save_state;
}
/*
Check if should allow concurrent inserts
IMPLEMENTATION
Allow concurrent inserts if we don't have a hole in the table or
if there is no active write lock and there is active read locks and
myisam_concurrent_insert == 2. In this last case the new
row('s) are inserted at end of file instead of filling up the hole.
The last case is to allow one to inserts into a heavily read-used table
even if there is holes.
NOTES
If there is a an rtree indexes in the table, concurrent inserts are
disabled in mi_open()
RETURN
0 ok to use concurrent inserts
1 not ok
*/
my_bool mi_check_status(void *param)
{
MI_INFO *info=(MI_INFO*) param;
/*
The test for w_locks == 1 is here because this thread has already done an
external lock (in other words: w_locks == 1 means no other threads has
a write lock)
*/
DBUG_PRINT("info",("dellink: %ld r_locks: %u w_locks: %u",
(long) info->s->state.dellink, (uint) info->s->r_locks,
(uint) info->s->w_locks));
return (my_bool) !(info->s->state.dellink == HA_OFFSET_ERROR ||
(myisam_concurrent_insert == 2 && info->s->r_locks &&
info->s->w_locks == 1));
}
/****************************************************************************
** functions to read / write the state
****************************************************************************/
int _mi_readinfo(register MI_INFO *info, int lock_type, int check_keybuffer)
{
DBUG_ENTER("_mi_readinfo");
if (info->lock_type == F_UNLCK)
{
MYISAM_SHARE *share=info->s;
if (!share->tot_locks)
{
if (my_lock(share->kfile,lock_type,0L,F_TO_EOF,
info->lock_wait | MY_SEEK_NOT_DONE))
DBUG_RETURN(1);
if (mi_state_info_read_dsk(share->kfile, &share->state, 1))
{
int error=my_errno ? my_errno : -1;
VOID(my_lock(share->kfile,F_UNLCK,0L,F_TO_EOF,
MYF(MY_SEEK_NOT_DONE)));
my_errno=error;
DBUG_RETURN(1);
}
}
if (check_keybuffer)
VOID(_mi_test_if_changed(info));
info->invalidator=info->s->invalidator;
}
else if (lock_type == F_WRLCK && info->lock_type == F_RDLCK)
{
my_errno=EACCES; /* Not allowed to change */
DBUG_RETURN(-1); /* when have read_lock() */
}
DBUG_RETURN(0);
} /* _mi_readinfo */
/*
Every isam-function that uppdates the isam-database MUST end with this
request
*/
int _mi_writeinfo(register MI_INFO *info, uint operation)
{
int error,olderror;
MYISAM_SHARE *share=info->s;
DBUG_ENTER("_mi_writeinfo");
DBUG_PRINT("info",("operation: %u tot_locks: %u", operation,
share->tot_locks));
error=0;
if (share->tot_locks == 0)
{
olderror=my_errno; /* Remember last error */
if (operation)
{ /* Two threads can't be here */
share->state.process= share->last_process= share->this_process;
share->state.unique= info->last_unique= info->this_unique;
share->state.update_count= info->last_loop= ++info->this_loop;
if ((error=mi_state_info_write(share->kfile, &share->state, 1)))
olderror=my_errno;
#ifdef _WIN32
if (myisam_flush)
{
my_sync(share->kfile,0);
my_sync(info->dfile,0);
}
#endif
}
if (!(operation & WRITEINFO_NO_UNLOCK) &&
my_lock(share->kfile,F_UNLCK,0L,F_TO_EOF,
MYF(MY_WME | MY_SEEK_NOT_DONE)) && !error)
DBUG_RETURN(1);
my_errno=olderror;
}
else if (operation)
share->changed= 1; /* Mark keyfile changed */
DBUG_RETURN(error);
} /* _mi_writeinfo */
/* Test if someone has changed the database */
/* (Should be called after readinfo) */
int _mi_test_if_changed(register MI_INFO *info)
{
MYISAM_SHARE *share=info->s;
if (share->state.process != share->last_process ||
share->state.unique != info->last_unique ||
share->state.update_count != info->last_loop)
{ /* Keyfile has changed */
DBUG_PRINT("info",("index file changed"));
if (share->state.process != share->this_process)
VOID(flush_key_blocks(share->key_cache, share->kfile, FLUSH_RELEASE));
share->last_process=share->state.process;
info->last_unique= share->state.unique;
info->last_loop= share->state.update_count;
info->update|= HA_STATE_WRITTEN; /* Must use file on next */
info->data_changed= 1; /* For mi_is_changed */
return 1;
}
return (!(info->update & HA_STATE_AKTIV) ||
(info->update & (HA_STATE_WRITTEN | HA_STATE_DELETED |
HA_STATE_KEY_CHANGED)));
} /* _mi_test_if_changed */
/*
Put a mark in the .MYI file that someone is updating the table
DOCUMENTATION
state.open_count in the .MYI file is used the following way:
- For the first change of the .MYI file in this process open_count is
incremented by mi_mark_file_change(). (We have a write lock on the file
when this happens)
- In mi_close() it's decremented by _mi_decrement_open_count() if it
was incremented in the same process.
This mean that if we are the only process using the file, the open_count
tells us if the MYISAM file wasn't properly closed. (This is true if
my_disable_locking is set).
*/
int _mi_mark_file_changed(MI_INFO *info)
{
uchar buff[3];
register MYISAM_SHARE *share=info->s;
DBUG_ENTER("_mi_mark_file_changed");
if (!(share->state.changed & STATE_CHANGED) || ! share->global_changed)
{
share->state.changed|=(STATE_CHANGED | STATE_NOT_ANALYZED |
STATE_NOT_OPTIMIZED_KEYS);
if (!share->global_changed)
{
share->global_changed=1;
share->state.open_count++;
}
if (!share->temporary)
{
mi_int2store(buff,share->state.open_count);
buff[2]=1; /* Mark that it's changed */
DBUG_RETURN(my_pwrite(share->kfile,buff,sizeof(buff),
sizeof(share->state.header),
MYF(MY_NABP)));
}
}
DBUG_RETURN(0);
}
/*
This is only called by close or by extra(HA_FLUSH) if the OS has the pwrite()
call. In these context the following code should be safe!
*/
int _mi_decrement_open_count(MI_INFO *info)
{
uchar buff[2];
register MYISAM_SHARE *share=info->s;
int lock_error=0,write_error=0;
if (share->global_changed)
{
uint old_lock=info->lock_type;
share->global_changed=0;
lock_error=mi_lock_database(info,F_WRLCK);
/* Its not fatal even if we couldn't get the lock ! */
if (share->state.open_count > 0)
{
share->state.open_count--;
mi_int2store(buff,share->state.open_count);
write_error=my_pwrite(share->kfile,buff,sizeof(buff),
sizeof(share->state.header),
MYF(MY_NABP));
}
if (!lock_error)
lock_error=mi_lock_database(info,old_lock);
}
return test(lock_error || write_error);
}