mirror of
https://github.com/MariaDB/server.git
synced 2025-01-18 13:02:28 +01:00
63ff9877a5
Misc changes: - fix for benign Valgrind error, compiler warnings - fix for a segfault in execution of maria_delete_all_rows() and one when taking multiple checkpoints - fix for too paranoid assertion - adding ability to take checkpoints at the end of the REDO phase and at the end of recovery. - other minor changes storage/maria/ha_maria.cc: The checkpoint done after Recovery is finished, is moved to maria_recover(). storage/maria/ma_bitmap.c: fix for Valgrind error: the "shadow debug copy" of the bitmap page started unitialized and so ma_print_bitmap() would use it uninitialized storage/maria/ma_checkpoint.c: * reset pointers to NULL after freeing them, or we segfault at next checkpoint in my_realloc(). * fix for compiler warnings. storage/maria/ma_delete_all.c: info->trn is NULL for non-transactional tables storage/maria/ma_locking.c: correct assertion (it fired wrongly in execution of REDO_DROP_TABLE due to the maria_extra(HA_PREPARE_FOR_DROP)->_ma_decrement_open_count() ->maria_lock_database(F_UNLCK); another solution would have been to not call _ma_decrement_open_count() (it's ok to have a wrong open count in a table which we are dropping), but the same problem would still exist for REDO_RENAME_TABLE. storage/maria/ma_loghandler.c: fail early if UNRECOVERABLE_ERROR storage/maria/ma_recovery.c: * new argument to maria_apply_log(): should it take checkpoints (at end of REDO phase and at the very end) or no. * moving the call to translog_next_LSN() into parse_checkpoint_record() ("hide the details"). * Refining an error detection for something which could happen if there is a checkpoint record in the log. * Using close_one_table() instead of maria_extra(HA_EXTRA_PREPARE_FOR_DROP|RENAME), as it looks safer, and also changing how close_one_table() works: it now limits itself to scanning all_tables[], thus having one loopp instead of two, which should be faster (as a result, it does not close tables not registered in this array, which is ok as there should not be any). storage/maria/ma_recovery.h: new parameter storage/maria/maria_read_log.c: update to new prototype
575 lines
18 KiB
C
575 lines
18 KiB
C
/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation; version 2 of the License.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; if not, write to the Free Software
|
|
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
|
|
|
|
/*
|
|
locking of isam-tables.
|
|
reads info from a isam-table. Must be first request before doing any furter
|
|
calls to any isamfunktion. Is used to allow many process use the same
|
|
isamdatabase.
|
|
*/
|
|
|
|
#include "ma_ftdefs.h"
|
|
|
|
/* lock table by F_UNLCK, F_RDLCK or F_WRLCK */
|
|
|
|
int maria_lock_database(MARIA_HA *info, int lock_type)
|
|
{
|
|
int error;
|
|
uint count;
|
|
MARIA_SHARE *share=info->s;
|
|
DBUG_ENTER("maria_lock_database");
|
|
DBUG_PRINT("enter",("lock_type: %d old lock %d r_locks: %u w_locks: %u "
|
|
"global_changed: %d open_count: %u name: '%s'",
|
|
lock_type, info->lock_type, share->r_locks,
|
|
share->w_locks,
|
|
share->global_changed, share->state.open_count,
|
|
share->index_file_name));
|
|
if (share->options & HA_OPTION_READ_ONLY_DATA ||
|
|
info->lock_type == lock_type)
|
|
DBUG_RETURN(0);
|
|
if (lock_type == F_EXTRA_LCK) /* Used by TMP tables */
|
|
{
|
|
++share->w_locks;
|
|
++share->tot_locks;
|
|
info->lock_type= lock_type;
|
|
DBUG_RETURN(0);
|
|
}
|
|
|
|
error=0;
|
|
pthread_mutex_lock(&share->intern_lock);
|
|
if (share->kfile.file >= 0) /* May only be false on windows */
|
|
{
|
|
switch (lock_type) {
|
|
case F_UNLCK:
|
|
maria_ftparser_call_deinitializer(info);
|
|
if (info->lock_type == F_RDLCK)
|
|
{
|
|
count= --share->r_locks;
|
|
_ma_restore_status(info);
|
|
}
|
|
else
|
|
{
|
|
count= --share->w_locks;
|
|
_ma_update_status(info);
|
|
}
|
|
--share->tot_locks;
|
|
if (info->lock_type == F_WRLCK && !share->w_locks)
|
|
{
|
|
if (!share->delay_key_write &&
|
|
flush_pagecache_blocks(share->pagecache, &share->kfile,
|
|
FLUSH_KEEP))
|
|
{
|
|
error= my_errno;
|
|
maria_print_error(info->s, HA_ERR_CRASHED);
|
|
/* Mark that table must be checked */
|
|
maria_mark_crashed(info);
|
|
}
|
|
/* pages of transactional tables get flushed at Checkpoint */
|
|
if (!share->base.born_transactional &&
|
|
_ma_flush_table_files(info, MARIA_FLUSH_DATA,
|
|
FLUSH_KEEP, FLUSH_KEEP))
|
|
error= my_errno;
|
|
}
|
|
if (info->opt_flag & (READ_CACHE_USED | WRITE_CACHE_USED))
|
|
{
|
|
if (end_io_cache(&info->rec_cache))
|
|
{
|
|
error=my_errno;
|
|
maria_print_error(info->s, HA_ERR_CRASHED);
|
|
maria_mark_crashed(info);
|
|
}
|
|
}
|
|
if (!count)
|
|
{
|
|
DBUG_PRINT("info",("changed: %u w_locks: %u",
|
|
(uint) share->changed, share->w_locks));
|
|
if (share->changed && !share->w_locks)
|
|
{
|
|
#ifdef HAVE_MMAP
|
|
if ((info->s->mmaped_length !=
|
|
info->s->state.state.data_file_length) &&
|
|
(info->s->nonmmaped_inserts > MAX_NONMAPPED_INSERTS))
|
|
{
|
|
if (info->s->concurrent_insert)
|
|
rw_wrlock(&info->s->mmap_lock);
|
|
_ma_remap_file(info, info->s->state.state.data_file_length);
|
|
info->s->nonmmaped_inserts= 0;
|
|
if (info->s->concurrent_insert)
|
|
rw_unlock(&info->s->mmap_lock);
|
|
}
|
|
#endif
|
|
share->state.process= share->last_process=share->this_process;
|
|
share->state.unique= info->last_unique= info->this_unique;
|
|
share->state.update_count= info->last_loop= ++info->this_loop;
|
|
/* transactional tables rather flush their state at Checkpoint */
|
|
if (!share->base.born_transactional)
|
|
{
|
|
if (_ma_state_info_write_sub(share->kfile.file, &share->state, 1))
|
|
error= my_errno;
|
|
else
|
|
{
|
|
/* A value of 0 means below means "state flushed" */
|
|
share->changed= 0;
|
|
}
|
|
}
|
|
if (maria_flush)
|
|
{
|
|
if (_ma_sync_table_files(info))
|
|
error= my_errno;
|
|
}
|
|
else
|
|
share->not_flushed=1;
|
|
if (error)
|
|
{
|
|
maria_print_error(info->s, HA_ERR_CRASHED);
|
|
maria_mark_crashed(info);
|
|
}
|
|
}
|
|
}
|
|
info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED);
|
|
info->lock_type= F_UNLCK;
|
|
/*
|
|
Verify that user of the table cleaned up after itself. Not in
|
|
recovery, as for example maria_extra(HA_EXTRA_PREPARE_FOR_RENAME) may
|
|
call us here, with transactionality temporarily disabled.
|
|
*/
|
|
DBUG_ASSERT(maria_in_recovery ||
|
|
share->now_transactional == share->base.born_transactional);
|
|
break;
|
|
case F_RDLCK:
|
|
if (info->lock_type == F_WRLCK)
|
|
{
|
|
/*
|
|
Change RW to READONLY
|
|
|
|
mysqld does not turn write locks to read locks,
|
|
so we're never here in mysqld.
|
|
*/
|
|
share->w_locks--;
|
|
share->r_locks++;
|
|
info->lock_type=lock_type;
|
|
break;
|
|
}
|
|
#ifdef MARIA_EXTERNAL_LOCKING
|
|
if (!share->r_locks && !share->w_locks)
|
|
{
|
|
/* note that a transactional table should not do this */
|
|
if (_ma_state_info_read_dsk(share->kfile.file, &share->state))
|
|
{
|
|
error=my_errno;
|
|
break;
|
|
}
|
|
}
|
|
#endif
|
|
VOID(_ma_test_if_changed(info));
|
|
share->r_locks++;
|
|
share->tot_locks++;
|
|
info->lock_type=lock_type;
|
|
break;
|
|
case F_WRLCK:
|
|
if (info->lock_type == F_RDLCK)
|
|
{ /* Change READONLY to RW */
|
|
if (share->r_locks == 1)
|
|
{
|
|
share->r_locks--;
|
|
share->w_locks++;
|
|
info->lock_type=lock_type;
|
|
break;
|
|
}
|
|
}
|
|
#ifdef MARIA_EXTERNAL_LOCKING
|
|
if (!(share->options & HA_OPTION_READ_ONLY_DATA))
|
|
{
|
|
if (!share->w_locks)
|
|
{
|
|
if (!share->r_locks)
|
|
{
|
|
/*
|
|
Note that transactional tables should not do this.
|
|
If we enabled this code, we should make sure to skip it if
|
|
born_transactional is true. We should not test
|
|
now_transactional to decide if we can call
|
|
_ma_state_info_read_dsk(), because it can temporarily be 0
|
|
(TRUNCATE on a partitioned table) and thus it would make a state
|
|
modification below without mutex, confusing a concurrent
|
|
checkpoint running.
|
|
Even if this code was enabled only for non-transactional tables:
|
|
in scenario LOCK TABLE t1 WRITE; INSERT INTO t1; DELETE FROM t1;
|
|
state on disk read by DELETE is obsolete as it was not flushed
|
|
at the end of INSERT. MyISAM same. It however causes no issue as
|
|
maria_delete_all_rows() calls _ma_reset_status() thus is not
|
|
influenced by the obsolete read values.
|
|
*/
|
|
if (_ma_state_info_read_dsk(share->kfile.file, &share->state))
|
|
{
|
|
error=my_errno;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
#endif /* defined(MARIA_EXTERNAL_LOCKING) */
|
|
VOID(_ma_test_if_changed(info));
|
|
|
|
info->lock_type=lock_type;
|
|
info->invalidator=info->s->invalidator;
|
|
share->w_locks++;
|
|
share->tot_locks++;
|
|
break;
|
|
default:
|
|
DBUG_ASSERT(0);
|
|
break; /* Impossible */
|
|
}
|
|
}
|
|
#ifdef __WIN__
|
|
else
|
|
{
|
|
/*
|
|
Check for bad file descriptors if this table is part
|
|
of a merge union. Failing to capture this may cause
|
|
a crash on windows if the table is renamed and
|
|
later on referenced by the merge table.
|
|
*/
|
|
if( info->owned_by_merge && (info->s)->kfile.file < 0 )
|
|
{
|
|
error = HA_ERR_NO_SUCH_TABLE;
|
|
}
|
|
}
|
|
#endif
|
|
pthread_mutex_unlock(&share->intern_lock);
|
|
DBUG_RETURN(error);
|
|
} /* maria_lock_database */
|
|
|
|
|
|
/****************************************************************************
|
|
The following functions are called by thr_lock() in threaded applications
|
|
****************************************************************************/
|
|
|
|
/*
|
|
Create a copy of the current status for the table
|
|
|
|
SYNOPSIS
|
|
_ma_get_status()
|
|
param Pointer to Myisam handler
|
|
concurrent_insert Set to 1 if we are going to do concurrent inserts
|
|
(THR_WRITE_CONCURRENT_INSERT was used)
|
|
*/
|
|
|
|
void _ma_get_status(void* param, int concurrent_insert)
|
|
{
|
|
MARIA_HA *info=(MARIA_HA*) param;
|
|
DBUG_ENTER("_ma_get_status");
|
|
DBUG_PRINT("info",("key_file: %ld data_file: %ld concurrent_insert: %d",
|
|
(long) info->s->state.state.key_file_length,
|
|
(long) info->s->state.state.data_file_length,
|
|
concurrent_insert));
|
|
#ifndef DBUG_OFF
|
|
if (info->state->key_file_length > info->s->state.state.key_file_length ||
|
|
info->state->data_file_length > info->s->state.state.data_file_length)
|
|
DBUG_PRINT("warning",("old info: key_file: %ld data_file: %ld",
|
|
(long) info->state->key_file_length,
|
|
(long) info->state->data_file_length));
|
|
#endif
|
|
info->save_state=info->s->state.state;
|
|
info->state= &info->save_state;
|
|
info->append_insert_at_end= concurrent_insert;
|
|
DBUG_VOID_RETURN;
|
|
}
|
|
|
|
|
|
void _ma_update_status(void* param)
|
|
{
|
|
MARIA_HA *info=(MARIA_HA*) param;
|
|
MARIA_SHARE *share= info->s;
|
|
/*
|
|
Because someone may have closed the table we point at, we only
|
|
update the state if its our own state. This isn't a problem as
|
|
we are always pointing at our own lock or at a read lock.
|
|
(This is enforced by thr_multi_lock.c)
|
|
*/
|
|
if (info->state == &info->save_state)
|
|
{
|
|
#ifndef DBUG_OFF
|
|
DBUG_PRINT("info",("updating status: key_file: %ld data_file: %ld",
|
|
(long) info->state->key_file_length,
|
|
(long) info->state->data_file_length));
|
|
if (info->state->key_file_length < share->state.state.key_file_length ||
|
|
info->state->data_file_length < share->state.state.data_file_length)
|
|
DBUG_PRINT("warning",("old info: key_file: %ld data_file: %ld",
|
|
(long) share->state.state.key_file_length,
|
|
(long) share->state.state.data_file_length));
|
|
#endif
|
|
/*
|
|
we are going to modify the state without lock's log, this would break
|
|
recovery if done with a transactional table.
|
|
*/
|
|
DBUG_ASSERT(!info->s->base.born_transactional);
|
|
share->state.state= *info->state;
|
|
info->state= &share->state.state;
|
|
}
|
|
info->append_insert_at_end= 0;
|
|
}
|
|
|
|
|
|
void _ma_restore_status(void *param)
|
|
{
|
|
MARIA_HA *info= (MARIA_HA*) param;
|
|
info->state= &info->s->state.state;
|
|
info->append_insert_at_end= 0;
|
|
}
|
|
|
|
|
|
void _ma_copy_status(void* to,void *from)
|
|
{
|
|
((MARIA_HA*) to)->state= &((MARIA_HA*) from)->save_state;
|
|
}
|
|
|
|
|
|
/*
|
|
Check if should allow concurrent inserts
|
|
|
|
IMPLEMENTATION
|
|
Allow concurrent inserts if we don't have a hole in the table or
|
|
if there is no active write lock and there is active read locks and
|
|
maria_concurrent_insert == 2. In this last case the new
|
|
row('s) are inserted at end of file instead of filling up the hole.
|
|
|
|
The last case is to allow one to inserts into a heavily read-used table
|
|
even if there is holes.
|
|
|
|
NOTES
|
|
If there is a an rtree indexes in the table, concurrent inserts are
|
|
disabled in maria_open()
|
|
|
|
RETURN
|
|
0 ok to use concurrent inserts
|
|
1 not ok
|
|
*/
|
|
|
|
my_bool _ma_check_status(void *param)
|
|
{
|
|
MARIA_HA *info=(MARIA_HA*) param;
|
|
/*
|
|
The test for w_locks == 1 is here because this thread has already done an
|
|
external lock (in other words: w_locks == 1 means no other threads has
|
|
a write lock)
|
|
*/
|
|
DBUG_PRINT("info",("dellink: %ld r_locks: %u w_locks: %u",
|
|
(long) info->s->state.dellink, (uint) info->s->r_locks,
|
|
(uint) info->s->w_locks));
|
|
return (my_bool) !(info->s->state.dellink == HA_OFFSET_ERROR ||
|
|
(maria_concurrent_insert == 2 && info->s->r_locks &&
|
|
info->s->w_locks == 1));
|
|
}
|
|
|
|
|
|
/****************************************************************************
|
|
** functions to read / write the state
|
|
****************************************************************************/
|
|
|
|
int _ma_readinfo(register MARIA_HA *info __attribute__ ((unused)),
|
|
int lock_type __attribute__ ((unused)),
|
|
int check_keybuffer __attribute__ ((unused)))
|
|
{
|
|
#ifdef MARIA_EXTERNAL_LOCKING
|
|
DBUG_ENTER("_ma_readinfo");
|
|
|
|
if (info->lock_type == F_UNLCK)
|
|
{
|
|
MARIA_SHARE *share=info->s;
|
|
if (!share->tot_locks)
|
|
{
|
|
/* should not be done for transactional tables */
|
|
if (_ma_state_info_read_dsk(share->kfile.file, &share->state))
|
|
{
|
|
int error=my_errno ? my_errno : -1;
|
|
my_errno=error;
|
|
DBUG_RETURN(1);
|
|
}
|
|
}
|
|
if (check_keybuffer)
|
|
VOID(_ma_test_if_changed(info));
|
|
info->invalidator=info->s->invalidator;
|
|
}
|
|
else if (lock_type == F_WRLCK && info->lock_type == F_RDLCK)
|
|
{
|
|
my_errno=EACCES; /* Not allowed to change */
|
|
DBUG_RETURN(-1); /* when have read_lock() */
|
|
}
|
|
DBUG_RETURN(0);
|
|
#else
|
|
return 0;
|
|
#endif /* defined(MARIA_EXTERNAL_LOCKING) */
|
|
} /* _ma_readinfo */
|
|
|
|
|
|
/*
|
|
Every isam-function that uppdates the isam-database MUST end with this
|
|
request
|
|
|
|
NOTES
|
|
my_errno is not changed if this succeeds!
|
|
*/
|
|
|
|
int _ma_writeinfo(register MARIA_HA *info, uint operation)
|
|
{
|
|
int error,olderror;
|
|
MARIA_SHARE *share= info->s;
|
|
DBUG_ENTER("_ma_writeinfo");
|
|
DBUG_PRINT("info",("operation: %u tot_locks: %u", operation,
|
|
share->tot_locks));
|
|
|
|
error=0;
|
|
if (share->tot_locks == 0 && !share->base.born_transactional)
|
|
{
|
|
/* transactional tables flush their state at Checkpoint */
|
|
if (operation)
|
|
{ /* Two threads can't be here */
|
|
olderror= my_errno; /* Remember last error */
|
|
share->state.process= share->last_process= share->this_process;
|
|
share->state.unique= info->last_unique= info->this_unique;
|
|
share->state.update_count= info->last_loop= ++info->this_loop;
|
|
if ((error= _ma_state_info_write_sub(share->kfile.file,
|
|
&share->state, 1)))
|
|
olderror=my_errno;
|
|
#ifdef __WIN__
|
|
if (maria_flush)
|
|
{
|
|
_commit(share->kfile.file);
|
|
_commit(info->dfile.file);
|
|
}
|
|
#endif
|
|
my_errno=olderror;
|
|
}
|
|
}
|
|
else if (operation)
|
|
share->changed= 1; /* Mark keyfile changed */
|
|
DBUG_RETURN(error);
|
|
} /* _ma_writeinfo */
|
|
|
|
|
|
/* Test if someone has changed the database */
|
|
/* (Should be called after readinfo) */
|
|
|
|
int _ma_test_if_changed(register MARIA_HA *info)
|
|
{
|
|
MARIA_SHARE *share=info->s;
|
|
if (share->state.process != share->last_process ||
|
|
share->state.unique != info->last_unique ||
|
|
share->state.update_count != info->last_loop)
|
|
{ /* Keyfile has changed */
|
|
DBUG_PRINT("info",("index file changed"));
|
|
if (share->state.process != share->this_process)
|
|
VOID(flush_pagecache_blocks(share->pagecache, &share->kfile,
|
|
FLUSH_RELEASE));
|
|
share->last_process=share->state.process;
|
|
info->last_unique= share->state.unique;
|
|
info->last_loop= share->state.update_count;
|
|
info->update|= HA_STATE_WRITTEN; /* Must use file on next */
|
|
info->data_changed= 1; /* For maria_is_changed */
|
|
return 1;
|
|
}
|
|
return (!(info->update & HA_STATE_AKTIV) ||
|
|
(info->update & (HA_STATE_WRITTEN | HA_STATE_DELETED |
|
|
HA_STATE_KEY_CHANGED)));
|
|
} /* _ma_test_if_changed */
|
|
|
|
|
|
/*
|
|
Put a mark in the .MYI file that someone is updating the table
|
|
|
|
|
|
DOCUMENTATION
|
|
|
|
state.open_count in the .MYI file is used the following way:
|
|
- For the first change of the .MYI file in this process open_count is
|
|
incremented by _ma_mark_file_changed(). (We have a write lock on the file
|
|
when this happens)
|
|
- In maria_close() it's decremented by _ma_decrement_open_count() if it
|
|
was incremented in the same process.
|
|
|
|
This mean that if we are the only process using the file, the open_count
|
|
tells us if the MARIA file wasn't properly closed. (This is true if
|
|
my_disable_locking is set).
|
|
|
|
open_count is not maintained on disk for transactional or temporary tables.
|
|
*/
|
|
|
|
|
|
int _ma_mark_file_changed(MARIA_HA *info)
|
|
{
|
|
uchar buff[3];
|
|
register MARIA_SHARE *share=info->s;
|
|
DBUG_ENTER("_ma_mark_file_changed");
|
|
|
|
if (!(share->state.changed & STATE_CHANGED) || ! share->global_changed)
|
|
{
|
|
share->state.changed|=(STATE_CHANGED | STATE_NOT_ANALYZED |
|
|
STATE_NOT_OPTIMIZED_KEYS);
|
|
if (!share->global_changed)
|
|
{
|
|
share->global_changed=1;
|
|
share->state.open_count++;
|
|
}
|
|
/*
|
|
temp tables don't need an open_count as they are removed on crash;
|
|
transactional tables are fixed by log-based recovery, so don't need an
|
|
open_count either (and we thus avoid the disk write below).
|
|
*/
|
|
if (!(share->temporary | share->base.born_transactional))
|
|
{
|
|
mi_int2store(buff,share->state.open_count);
|
|
buff[2]=1; /* Mark that it's changed */
|
|
DBUG_RETURN(my_pwrite(share->kfile.file, buff, sizeof(buff),
|
|
sizeof(share->state.header),
|
|
MYF(MY_NABP)));
|
|
}
|
|
}
|
|
DBUG_RETURN(0);
|
|
}
|
|
|
|
|
|
/*
|
|
This is only called by close or by extra(HA_FLUSH) if the OS has the pwrite()
|
|
call. In these context the following code should be safe!
|
|
*/
|
|
|
|
int _ma_decrement_open_count(MARIA_HA *info)
|
|
{
|
|
uchar buff[2];
|
|
register MARIA_SHARE *share=info->s;
|
|
int lock_error=0,write_error=0;
|
|
if (share->global_changed)
|
|
{
|
|
uint old_lock=info->lock_type;
|
|
share->global_changed=0;
|
|
lock_error=maria_lock_database(info,F_WRLCK);
|
|
/* Its not fatal even if we couldn't get the lock ! */
|
|
if (share->state.open_count > 0)
|
|
{
|
|
share->state.open_count--;
|
|
if (!(share->temporary | share->base.born_transactional))
|
|
{
|
|
mi_int2store(buff,share->state.open_count);
|
|
write_error= my_pwrite(share->kfile.file, buff, sizeof(buff),
|
|
sizeof(share->state.header),
|
|
MYF(MY_NABP));
|
|
}
|
|
}
|
|
if (!lock_error)
|
|
lock_error=maria_lock_database(info,old_lock);
|
|
}
|
|
return test(lock_error || write_error);
|
|
}
|