mirror of
https://github.com/MariaDB/server.git
synced 2025-01-16 03:52:35 +01:00
609 lines
19 KiB
C
609 lines
19 KiB
C
/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation; version 2 of the License.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; if not, write to the Free Software
|
|
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301 USA */
|
|
|
|
/*
|
|
Locking of Maria-tables.
|
|
Must be first request before doing any furter calls to any Maria function.
|
|
Is used to allow many process use the same non transactional Maria table
|
|
*/
|
|
|
|
#include "ma_ftdefs.h"
|
|
|
|
/* lock table by F_UNLCK, F_RDLCK or F_WRLCK */
|
|
|
|
int maria_lock_database(MARIA_HA *info, int lock_type)
|
|
{
|
|
int error;
|
|
uint count;
|
|
MARIA_SHARE *share= info->s;
|
|
DBUG_ENTER("maria_lock_database");
|
|
DBUG_PRINT("enter",("lock_type: %d old lock %d r_locks: %u w_locks: %u "
|
|
"global_changed: %d open_count: %u name: '%s'",
|
|
lock_type, info->lock_type, share->r_locks,
|
|
share->w_locks,
|
|
share->global_changed, share->state.open_count,
|
|
share->index_file_name.str));
|
|
if (share->options & HA_OPTION_READ_ONLY_DATA ||
|
|
info->lock_type == lock_type)
|
|
DBUG_RETURN(0);
|
|
if (lock_type == F_EXTRA_LCK) /* Used by TMP tables */
|
|
{
|
|
++share->w_locks;
|
|
++share->tot_locks;
|
|
info->lock_type= lock_type;
|
|
DBUG_RETURN(0);
|
|
}
|
|
|
|
error=0;
|
|
mysql_mutex_lock(&share->intern_lock);
|
|
if (share->kfile.file >= 0) /* May only be false on windows */
|
|
{
|
|
switch (lock_type) {
|
|
case F_UNLCK:
|
|
maria_ftparser_call_deinitializer(info);
|
|
if (info->lock_type == F_RDLCK)
|
|
{
|
|
count= --share->r_locks;
|
|
if (share->lock_restore_status)
|
|
(*share->lock_restore_status)(info);
|
|
}
|
|
else
|
|
{
|
|
count= --share->w_locks;
|
|
if (share->lock.update_status)
|
|
_ma_update_status_with_lock(info);
|
|
}
|
|
--share->tot_locks;
|
|
if (info->lock_type == F_WRLCK && !share->w_locks)
|
|
{
|
|
/* pages of transactional tables get flushed at Checkpoint */
|
|
if (!share->base.born_transactional && !share->temporary &&
|
|
_ma_flush_table_files(info,
|
|
share->delay_key_write ? MARIA_FLUSH_DATA :
|
|
MARIA_FLUSH_DATA | MARIA_FLUSH_INDEX,
|
|
FLUSH_KEEP, FLUSH_KEEP))
|
|
error= my_errno;
|
|
}
|
|
if (info->opt_flag & (READ_CACHE_USED | WRITE_CACHE_USED))
|
|
{
|
|
if (end_io_cache(&info->rec_cache))
|
|
{
|
|
error= my_errno;
|
|
_ma_set_fatal_error(share, error);
|
|
}
|
|
}
|
|
if (!count)
|
|
{
|
|
DBUG_PRINT("info",("changed: %u w_locks: %u",
|
|
(uint) share->changed, share->w_locks));
|
|
if (share->changed && !share->w_locks)
|
|
{
|
|
#ifdef HAVE_MMAP
|
|
if ((share->mmaped_length !=
|
|
share->state.state.data_file_length) &&
|
|
(share->nonmmaped_inserts > MAX_NONMAPPED_INSERTS))
|
|
{
|
|
if (share->lock_key_trees)
|
|
mysql_rwlock_wrlock(&share->mmap_lock);
|
|
_ma_remap_file(info, share->state.state.data_file_length);
|
|
share->nonmmaped_inserts= 0;
|
|
if (share->lock_key_trees)
|
|
mysql_rwlock_unlock(&share->mmap_lock);
|
|
}
|
|
#endif
|
|
#ifdef MARIA_EXTERNAL_LOCKING
|
|
share->state.process= share->last_process=share->this_process;
|
|
share->state.unique= info->last_unique= info->this_unique;
|
|
share->state.update_count= info->last_loop= ++info->this_loop;
|
|
#endif
|
|
/* transactional tables rather flush their state at Checkpoint */
|
|
if (!share->base.born_transactional)
|
|
{
|
|
if (_ma_state_info_write_sub(share->kfile.file, &share->state,
|
|
MA_STATE_INFO_WRITE_DONT_MOVE_OFFSET))
|
|
error= my_errno;
|
|
else
|
|
{
|
|
/* A value of 0 means below means "state flushed" */
|
|
share->changed= 0;
|
|
}
|
|
}
|
|
if (maria_flush)
|
|
{
|
|
if (_ma_sync_table_files(info))
|
|
error= my_errno;
|
|
}
|
|
else
|
|
share->not_flushed=1;
|
|
if (error)
|
|
_ma_set_fatal_error(share, error);
|
|
}
|
|
}
|
|
info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED);
|
|
info->lock_type= F_UNLCK;
|
|
break;
|
|
case F_RDLCK:
|
|
if (info->lock_type == F_WRLCK)
|
|
{
|
|
/*
|
|
Change RW to READONLY
|
|
|
|
mysqld does not turn write locks to read locks,
|
|
so we're never here in mysqld.
|
|
*/
|
|
share->w_locks--;
|
|
share->r_locks++;
|
|
info->lock_type=lock_type;
|
|
break;
|
|
}
|
|
#ifdef MARIA_EXTERNAL_LOCKING
|
|
if (!share->r_locks && !share->w_locks)
|
|
{
|
|
/* note that a transactional table should not do this */
|
|
if (_ma_state_info_read_dsk(share->kfile.file, &share->state))
|
|
{
|
|
error=my_errno;
|
|
break;
|
|
}
|
|
}
|
|
#endif
|
|
_ma_test_if_changed(info);
|
|
share->r_locks++;
|
|
share->tot_locks++;
|
|
info->lock_type=lock_type;
|
|
break;
|
|
case F_WRLCK:
|
|
if (info->lock_type == F_RDLCK)
|
|
{ /* Change READONLY to RW */
|
|
if (share->r_locks == 1)
|
|
{
|
|
share->r_locks--;
|
|
share->w_locks++;
|
|
info->lock_type=lock_type;
|
|
break;
|
|
}
|
|
}
|
|
#ifdef MARIA_EXTERNAL_LOCKING
|
|
if (!(share->options & HA_OPTION_READ_ONLY_DATA))
|
|
{
|
|
if (!share->w_locks)
|
|
{
|
|
if (!share->r_locks)
|
|
{
|
|
/*
|
|
Note that transactional tables should not do this.
|
|
If we enabled this code, we should make sure to skip it if
|
|
born_transactional is true. We should not test
|
|
now_transactional to decide if we can call
|
|
_ma_state_info_read_dsk(), because it can temporarily be 0
|
|
(TRUNCATE on a partitioned table) and thus it would make a state
|
|
modification below without mutex, confusing a concurrent
|
|
checkpoint running.
|
|
Even if this code was enabled only for non-transactional tables:
|
|
in scenario LOCK TABLE t1 WRITE; INSERT INTO t1; DELETE FROM t1;
|
|
state on disk read by DELETE is obsolete as it was not flushed
|
|
at the end of INSERT. MyISAM same. It however causes no issue as
|
|
maria_delete_all_rows() calls _ma_reset_status() thus is not
|
|
influenced by the obsolete read values.
|
|
*/
|
|
if (_ma_state_info_read_dsk(share->kfile.file, &share->state))
|
|
{
|
|
error=my_errno;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
#endif /* defined(MARIA_EXTERNAL_LOCKING) */
|
|
_ma_test_if_changed(info);
|
|
|
|
info->lock_type=lock_type;
|
|
info->invalidator=share->invalidator;
|
|
share->w_locks++;
|
|
share->tot_locks++;
|
|
break;
|
|
default:
|
|
DBUG_ASSERT(0);
|
|
break; /* Impossible */
|
|
}
|
|
}
|
|
#ifdef __WIN__
|
|
else
|
|
{
|
|
/*
|
|
Check for bad file descriptors if this table is part
|
|
of a merge union. Failing to capture this may cause
|
|
a crash on windows if the table is renamed and
|
|
later on referenced by the merge table.
|
|
*/
|
|
if( info->owned_by_merge && (info->s)->kfile.file < 0 )
|
|
{
|
|
error = HA_ERR_NO_SUCH_TABLE;
|
|
}
|
|
}
|
|
#endif
|
|
mysql_mutex_unlock(&share->intern_lock);
|
|
DBUG_RETURN(error);
|
|
} /* maria_lock_database */
|
|
|
|
|
|
/****************************************************************************
|
|
** functions to read / write the state
|
|
****************************************************************************/
|
|
|
|
int _ma_readinfo(register MARIA_HA *info __attribute__ ((unused)),
|
|
int lock_type __attribute__ ((unused)),
|
|
int check_keybuffer __attribute__ ((unused)))
|
|
{
|
|
#ifdef MARIA_EXTERNAL_LOCKING
|
|
DBUG_ENTER("_ma_readinfo");
|
|
|
|
if (info->lock_type == F_UNLCK)
|
|
{
|
|
MARIA_SHARE *share= info->s;
|
|
if (!share->tot_locks)
|
|
{
|
|
/* should not be done for transactional tables */
|
|
if (_ma_state_info_read_dsk(share->kfile.file, &share->state))
|
|
{
|
|
if (!my_errno)
|
|
my_errno= HA_ERR_FILE_TOO_SHORT;
|
|
DBUG_RETURN(1);
|
|
}
|
|
}
|
|
if (check_keybuffer)
|
|
VOID(_ma_test_if_changed(info));
|
|
info->invalidator=share->invalidator;
|
|
}
|
|
else if (lock_type == F_WRLCK && info->lock_type == F_RDLCK)
|
|
{
|
|
my_errno=EACCES; /* Not allowed to change */
|
|
DBUG_RETURN(-1); /* when have read_lock() */
|
|
}
|
|
DBUG_RETURN(0);
|
|
#else
|
|
return 0;
|
|
#endif /* defined(MARIA_EXTERNAL_LOCKING) */
|
|
} /* _ma_readinfo */
|
|
|
|
|
|
/*
|
|
Every isam-function that uppdates the isam-database MUST end with this
|
|
request
|
|
|
|
NOTES
|
|
my_errno is not changed if this succeeds!
|
|
*/
|
|
|
|
int _ma_writeinfo(register MARIA_HA *info, uint operation)
|
|
{
|
|
int error,olderror;
|
|
MARIA_SHARE *share= info->s;
|
|
DBUG_ENTER("_ma_writeinfo");
|
|
DBUG_PRINT("info",("operation: %u tot_locks: %u", operation,
|
|
share->tot_locks));
|
|
|
|
error=0;
|
|
if (share->tot_locks == 0 && !share->base.born_transactional)
|
|
{
|
|
/* transactional tables flush their state at Checkpoint */
|
|
if (operation)
|
|
{ /* Two threads can't be here */
|
|
olderror= my_errno; /* Remember last error */
|
|
|
|
#ifdef MARIA_EXTERNAL_LOCKING
|
|
/*
|
|
The following only makes sense if we want to be allow two different
|
|
processes access the same table at the same time
|
|
*/
|
|
share->state.process= share->last_process= share->this_process;
|
|
share->state.unique= info->last_unique= info->this_unique;
|
|
share->state.update_count= info->last_loop= ++info->this_loop;
|
|
#endif
|
|
|
|
if ((error=
|
|
_ma_state_info_write_sub(share->kfile.file,
|
|
&share->state,
|
|
MA_STATE_INFO_WRITE_DONT_MOVE_OFFSET)))
|
|
olderror=my_errno;
|
|
#ifdef __WIN__
|
|
if (maria_flush)
|
|
{
|
|
_commit(share->kfile.file);
|
|
_commit(info->dfile.file);
|
|
}
|
|
#endif
|
|
my_errno=olderror;
|
|
}
|
|
}
|
|
else if (operation)
|
|
share->changed= 1; /* Mark keyfile changed */
|
|
DBUG_RETURN(error);
|
|
} /* _ma_writeinfo */
|
|
|
|
|
|
/*
|
|
Test if an external process has changed the database
|
|
(Should be called after readinfo)
|
|
*/
|
|
|
|
int _ma_test_if_changed(register MARIA_HA *info)
|
|
{
|
|
#ifdef MARIA_EXTERNAL_LOCKING
|
|
MARIA_SHARE *share= info->s;
|
|
if (share->state.process != share->last_process ||
|
|
share->state.unique != info->last_unique ||
|
|
share->state.update_count != info->last_loop)
|
|
{ /* Keyfile has changed */
|
|
DBUG_PRINT("info",("index file changed"));
|
|
if (share->state.process != share->this_process)
|
|
VOID(flush_pagecache_blocks(share->pagecache, &share->kfile,
|
|
FLUSH_RELEASE));
|
|
share->last_process=share->state.process;
|
|
info->last_unique= share->state.unique;
|
|
info->last_loop= share->state.update_count;
|
|
info->update|= HA_STATE_WRITTEN; /* Must use file on next */
|
|
info->data_changed= 1; /* For maria_is_changed */
|
|
return 1;
|
|
}
|
|
#endif
|
|
return (!(info->update & HA_STATE_AKTIV) ||
|
|
(info->update & (HA_STATE_WRITTEN | HA_STATE_DELETED |
|
|
HA_STATE_KEY_CHANGED)));
|
|
} /* _ma_test_if_changed */
|
|
|
|
|
|
/*
|
|
Put a mark in the .MAI file that someone is updating the table
|
|
|
|
DOCUMENTATION
|
|
state.open_count in the .MAI file is used the following way:
|
|
- For the first change of the .MYI file in this process open_count is
|
|
incremented by _ma_mark_file_changed(). (We have a write lock on the file
|
|
when this happens)
|
|
- In maria_close() it's decremented by _ma_decrement_open_count() if it
|
|
was incremented in the same process.
|
|
|
|
This mean that if we are the only process using the file, the open_count
|
|
tells us if the MARIA file wasn't properly closed. (This is true if
|
|
my_disable_locking is set).
|
|
|
|
open_count is not maintained on disk for temporary tables.
|
|
*/
|
|
|
|
#define _MA_ALREADY_MARKED_FILE_CHANGED \
|
|
((share->state.changed & STATE_CHANGED) && share->global_changed)
|
|
|
|
int _ma_mark_file_changed(register MARIA_SHARE *share)
|
|
{
|
|
if (!share->base.born_transactional)
|
|
{
|
|
if (!_MA_ALREADY_MARKED_FILE_CHANGED)
|
|
return _ma_mark_file_changed_now(share);
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
For transactional tables, the table is marked changed when the first page
|
|
is written. Here we just mark the state to be updated so that caller
|
|
can do 'analyze table' and find that is has changed before any pages
|
|
are written.
|
|
*/
|
|
if (! test_all_bits(share->state.changed,
|
|
(STATE_CHANGED | STATE_NOT_ANALYZED |
|
|
STATE_NOT_OPTIMIZED_KEYS)))
|
|
{
|
|
mysql_mutex_lock(&share->intern_lock);
|
|
share->state.changed|=(STATE_CHANGED | STATE_NOT_ANALYZED |
|
|
STATE_NOT_OPTIMIZED_KEYS);
|
|
mysql_mutex_unlock(&share->intern_lock);
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
int _ma_mark_file_changed_now(register MARIA_SHARE *share)
|
|
{
|
|
uchar buff[3];
|
|
int error= 1;
|
|
DBUG_ENTER("_ma_mark_file_changed_now");
|
|
|
|
if (_MA_ALREADY_MARKED_FILE_CHANGED)
|
|
DBUG_RETURN(0);
|
|
mysql_mutex_lock(&share->intern_lock); /* recheck under mutex */
|
|
if (! _MA_ALREADY_MARKED_FILE_CHANGED)
|
|
{
|
|
share->state.changed|=(STATE_CHANGED | STATE_NOT_ANALYZED |
|
|
STATE_NOT_OPTIMIZED_KEYS);
|
|
if (!share->global_changed)
|
|
{
|
|
share->changed= share->global_changed= 1;
|
|
share->state.open_count++;
|
|
}
|
|
/*
|
|
Temp tables don't need an open_count as they are removed on crash.
|
|
In theory transactional tables are fixed by log-based recovery, so don't
|
|
need an open_count either, but if recovery has failed and logs have been
|
|
removed (by maria-force-start-after-recovery-failures), we still need to
|
|
detect dubious tables.
|
|
If we didn't maintain open_count on disk for a table, after a crash
|
|
we wouldn't know if it was closed at crash time (thus does not need a
|
|
check) or not. So we would have to check all tables: overkill.
|
|
*/
|
|
if (!share->temporary)
|
|
{
|
|
mi_int2store(buff,share->state.open_count);
|
|
buff[2]=1; /* Mark that it's changed */
|
|
if (my_pwrite(share->kfile.file, buff, sizeof(buff),
|
|
sizeof(share->state.header) +
|
|
MARIA_FILE_OPEN_COUNT_OFFSET,
|
|
MYF(MY_NABP)))
|
|
goto err;
|
|
}
|
|
/* Set uuid of file if not yet set (zerofilled file) */
|
|
if (share->base.born_transactional &&
|
|
!(share->state.changed & STATE_NOT_MOVABLE))
|
|
{
|
|
/* Lock table to current installation */
|
|
if (_ma_set_uuid(share, 0) ||
|
|
(share->state.create_rename_lsn == LSN_NEEDS_NEW_STATE_LSNS &&
|
|
_ma_update_state_lsns_sub(share, LSN_IMPOSSIBLE,
|
|
trnman_get_min_trid(),
|
|
TRUE, TRUE)))
|
|
goto err;
|
|
share->state.changed|= STATE_NOT_MOVABLE;
|
|
}
|
|
}
|
|
error= 0;
|
|
err:
|
|
mysql_mutex_unlock(&share->intern_lock);
|
|
DBUG_RETURN(error);
|
|
#undef _MA_ALREADY_MARKED_FILE_CHANGED
|
|
}
|
|
|
|
/*
|
|
Check that a region is all zero
|
|
|
|
SYNOPSIS
|
|
check_if_zero()
|
|
pos Start of memory to check
|
|
length length of memory region
|
|
|
|
NOTES
|
|
Used mainly to detect rows with wrong extent information
|
|
*/
|
|
|
|
my_bool _ma_check_if_zero(uchar *pos, size_t length)
|
|
{
|
|
uchar *end;
|
|
for (end= pos+ length; pos != end ; pos++)
|
|
if (pos[0] != 0)
|
|
return 1;
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
This is only called by close or by extra(HA_FLUSH) if the OS has the pwrite()
|
|
call. In these context the following code should be safe!
|
|
*/
|
|
|
|
int _ma_decrement_open_count(MARIA_HA *info, my_bool lock_tables)
|
|
{
|
|
uchar buff[2];
|
|
register MARIA_SHARE *share= info->s;
|
|
int lock_error=0,write_error=0;
|
|
DBUG_ENTER("_ma_decrement_open_count");
|
|
|
|
if (share->global_changed)
|
|
{
|
|
uint old_lock=info->lock_type;
|
|
share->global_changed=0;
|
|
lock_error= (my_disable_locking || ! lock_tables ? 0 :
|
|
maria_lock_database(info, F_WRLCK));
|
|
/* Its not fatal even if we couldn't get the lock ! */
|
|
if (share->state.open_count > 0)
|
|
{
|
|
share->state.open_count--;
|
|
share->changed= 1; /* We have to update state */
|
|
/*
|
|
For temporary tables that will just be deleted, we don't have
|
|
to decrement state. For transactional tables the state will be
|
|
updated in maria_close().
|
|
*/
|
|
|
|
if (!share->temporary && !share->now_transactional)
|
|
{
|
|
mi_int2store(buff,share->state.open_count);
|
|
write_error= (int) my_pwrite(share->kfile.file, buff, sizeof(buff),
|
|
sizeof(share->state.header) +
|
|
MARIA_FILE_OPEN_COUNT_OFFSET,
|
|
MYF(MY_NABP));
|
|
}
|
|
}
|
|
if (!lock_error && !my_disable_locking && lock_tables)
|
|
lock_error=maria_lock_database(info,old_lock);
|
|
}
|
|
DBUG_RETURN(MY_TEST(lock_error || write_error));
|
|
}
|
|
|
|
|
|
/** @brief mark file as crashed */
|
|
|
|
void _ma_mark_file_crashed(MARIA_SHARE *share)
|
|
{
|
|
uchar buff[2];
|
|
DBUG_ENTER("_ma_mark_file_crashed");
|
|
|
|
share->state.changed|= STATE_CRASHED;
|
|
mi_int2store(buff, share->state.changed);
|
|
/*
|
|
We can ignore the errors, as if the mark failed, there isn't anything
|
|
else we can do; The user should already have got an error that the
|
|
table was crashed.
|
|
*/
|
|
(void) my_pwrite(share->kfile.file, buff, sizeof(buff),
|
|
sizeof(share->state.header) +
|
|
MARIA_FILE_CHANGED_OFFSET,
|
|
MYF(MY_NABP));
|
|
DBUG_VOID_RETURN;
|
|
}
|
|
|
|
/*
|
|
Handle a fatal error
|
|
|
|
- Mark the table as crashed
|
|
- Print an error message, if we had not issued an error message before
|
|
that the table had been crashed.
|
|
- set my_errno to error
|
|
- If 'maria_assert_if_crashed_table is set, then assert.
|
|
*/
|
|
|
|
void _ma_set_fatal_error(MARIA_SHARE *share, int error)
|
|
{
|
|
DBUG_PRINT("error", ("error: %d", error));
|
|
maria_mark_crashed_share(share);
|
|
if (!(share->state.changed & STATE_CRASHED_PRINTED))
|
|
{
|
|
share->state.changed|= STATE_CRASHED_PRINTED;
|
|
maria_print_error(share, error);
|
|
}
|
|
my_errno= error;
|
|
DBUG_ASSERT(!maria_assert_if_crashed_table);
|
|
}
|
|
|
|
|
|
/**
|
|
@brief Set uuid of for a Maria file
|
|
|
|
@fn _ma_set_uuid()
|
|
@param share Maria share
|
|
@param reset_uuid Instead of setting file to maria_uuid, set it to
|
|
0 to mark it as movable
|
|
*/
|
|
|
|
my_bool _ma_set_uuid(MARIA_SHARE *share, my_bool reset_uuid)
|
|
{
|
|
uchar buff[MY_UUID_SIZE], *uuid;
|
|
|
|
uuid= maria_uuid;
|
|
if (reset_uuid)
|
|
{
|
|
bzero(buff, sizeof(buff));
|
|
uuid= buff;
|
|
}
|
|
return (my_bool) my_pwrite(share->kfile.file, uuid, MY_UUID_SIZE,
|
|
mi_uint2korr(share->state.header.base_pos),
|
|
MYF(MY_NABP));
|
|
}
|