mariadb/storage/maria/ma_state.c
2020-06-14 19:39:43 +03:00

879 lines
26 KiB
C

/* Copyright (C) 2008 Sun AB and Michael Widenius
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */
/*
Functions to maintain live statistics for Maria transactional tables
and versioning for not transactional tables
See WL#3138; Maria - fast "SELECT COUNT(*) FROM t;" and "CHECKSUM TABLE t"
for details about live number of rows and live checksums
TODO
- Allocate MA_USED_TABLES and MA_HISTORY_STATE from a global pool (to
avoid calls to malloc()
- In trnamn_end_trans_hook(), don't call _ma_remove_not_visible_states()
every time. One could for example call it if there has been more than
10 ended transactions since last time it was called.
*/
#include "maria_def.h"
#include "trnman.h"
#include "ma_trnman.h"
#include "ma_blockrec.h"
/**
@brief Setup initial start-of-transaction state for a table
@fn _ma_setup_live_state
@param info Maria handler
@notes
This function ensures that trn->used_tables contains a list of
start and live states for tables that are part of the transaction
and that info->state points to the current live state for the table.
@TODO
Change trn->table_list to a hash and share->state_history to a binary tree
@return
@retval 0 ok
@retval 1 error (out of memory)
*/
my_bool _ma_setup_live_state(MARIA_HA *info)
{
TRN *trn;
MARIA_SHARE *share= info->s;
MARIA_USED_TABLES *tables;
MARIA_STATE_HISTORY *history;
DBUG_ENTER("_ma_setup_live_state");
DBUG_PRINT("enter", ("info: %p", info));
DBUG_ASSERT(share->lock_key_trees);
if (maria_create_trn_hook(info))
DBUG_RETURN(1);
trn= info->trn;
for (tables= (MARIA_USED_TABLES*) trn->used_tables;
tables;
tables= tables->next)
{
if (tables->share == share)
{
/* Table is already used by transaction */
goto end;
}
}
/* Table was not used before, create new table state entry */
if (!(tables= (MARIA_USED_TABLES*) my_malloc(PSI_INSTRUMENT_ME,
sizeof(*tables), MYF(MY_WME | MY_ZEROFILL))))
DBUG_RETURN(1);
tables->next= trn->used_tables;
trn->used_tables= tables;
tables->share= share;
mysql_mutex_lock(&share->intern_lock);
share->in_trans++;
DBUG_PRINT("info", ("share: %p in_trans: %d",
share, share->in_trans));
history= share->state_history;
/*
We must keep share locked to ensure that we don't access a history
link that is deleted by concurrently running checkpoint.
It's enough to compare trids here (instead of calling
tranman_can_read_from) as history->trid is a commit_trid
*/
while (trn->trid <= history->trid)
history= history->next;
mysql_mutex_unlock(&share->intern_lock);
/* The current item can't be deleted as it's the first one visible for us */
tables->state_start= tables->state_current= history->state;
tables->state_current.changed= tables->state_current.no_transid= 0;
DBUG_PRINT("info", ("records: %ld", (ulong) tables->state_start.records));
end:
info->state_start= &tables->state_start;
info->state= &tables->state_current;
info->used_tables= tables;
tables->use_count++;
/*
Mark in transaction state if we are not using transid (versioning)
on rows. If not, then we will in _ma_trnman_end_trans_hook()
ensure that the state is visible for all at end of transaction
*/
tables->state_current.no_transid|= !(info->row_flag & ROW_FLAG_TRANSID);
DBUG_PRINT("exit", ("tables: %p info->state: %p", tables, info->state));
DBUG_RETURN(0);
}
/**
@brief Remove states that are not visible by anyone
@fn _ma_remove_not_visible_states()
@param org_history List to history
@param all 1 if we should delete the first state if it's
visible for all. For the moment this is only used
on close() of table.
@param trnman_is_locked Set to 1 if we have already a lock on trnman.
@notes
The assumption is that items in the history list is ordered by
commit_trid.
A state is not visible anymore if there is no new transaction
that has been started between the commit_trid's of two states
As long as some states exists, we keep the newest = (last commit)
state as first state in the history. This is to allow us to just move
the history from the global list to the share when we open the table.
Note that if 'all' is set trnman_is_locked must be 0, becasue
trnman_get_min_trid() will take a lock on trnman.
@return
@retval Pointer to new history list
*/
MARIA_STATE_HISTORY
*_ma_remove_not_visible_states(MARIA_STATE_HISTORY *org_history,
my_bool all,
my_bool trnman_is_locked)
{
TrID last_trid;
MARIA_STATE_HISTORY *history, **parent, *next;
DBUG_ENTER("_ma_remove_not_visible_states");
if (!org_history)
DBUG_RETURN(0); /* Not versioned table */
last_trid= org_history->trid;
parent= &org_history->next;
for (history= org_history->next; history; history= next)
{
next= history->next;
if (!trnman_exists_active_transactions(history->trid, last_trid,
trnman_is_locked))
{
DBUG_PRINT("info", ("removing history->trid: %lu next: %lu",
(ulong) history->trid, (ulong) last_trid));
my_free(history);
continue;
}
*parent= history;
parent= &history->next;
last_trid= history->trid;
}
*parent= 0;
if (all && parent == &org_history->next)
{
/* There is only one state left. Delete this if it's visible for all */
if (last_trid < trnman_get_min_trid())
{
my_free(org_history);
org_history= 0;
}
}
DBUG_RETURN(org_history);
}
/**
@brief Remove not used state history
@param share Maria table information
@param all 1 if we should delete the first state if it's
visible for all. For the moment this is only used
on close() of table.
@notes
share and trnman are not locked.
We must first lock trnman and then share->intern_lock. This is becasue
_ma_trnman_end_trans_hook() has a lock on trnman and then
takes share->intern_lock.
*/
void _ma_remove_not_visible_states_with_lock(MARIA_SHARE *share,
my_bool all)
{
my_bool is_lock_trman;
if ((is_lock_trman= trman_is_inited()))
trnman_lock();
mysql_mutex_lock(&share->intern_lock);
share->state_history= _ma_remove_not_visible_states(share->state_history,
all, 1);
mysql_mutex_unlock(&share->intern_lock);
if (is_lock_trman)
trnman_unlock();
}
/*
Free state history information from share->history and reset information
to current state.
@notes
Used after repair/rename/drop as then all rows are visible for everyone
*/
void _ma_reset_state(MARIA_HA *info)
{
MARIA_SHARE *share= info->s;
MARIA_STATE_HISTORY *history= share->state_history;
DBUG_ENTER("_ma_reset_state");
/* Always true if share->now_transactional is set */
if (history && share->have_versioning)
{
MARIA_STATE_HISTORY *next;
DBUG_PRINT("info", ("resetting history"));
/* Set the current history to current state */
share->state_history->state= share->state.state;
/* Set current table handler to point to new history state */
info->state= info->state_start= &share->state_history->state;
for (history= history->next ; history ; history= next)
{
next= history->next;
my_free(history);
}
share->state_history->next= 0;
share->state_history->trid= 0; /* Visible for all */
}
DBUG_VOID_RETURN;
}
/****************************************************************************
The following functions are called by thr_lock() in threaded applications
for not transactional tables
****************************************************************************/
/*
Create a copy of the current status for the table
SYNOPSIS
_ma_get_status()
param Pointer to Myisam handler
concurrent_insert Set to 1 if we are going to do concurrent inserts
(THR_WRITE_CONCURRENT_INSERT was used)
*/
my_bool _ma_get_status(void* param, my_bool concurrent_insert)
{
MARIA_HA *info=(MARIA_HA*) param;
DBUG_ENTER("_ma_get_status");
DBUG_PRINT("info",("key_file: %ld data_file: %ld concurrent_insert: %d",
(long) info->s->state.state.key_file_length,
(long) info->s->state.state.data_file_length,
concurrent_insert));
#ifndef DBUG_OFF
if (info->state->key_file_length > info->s->state.state.key_file_length ||
info->state->data_file_length > info->s->state.state.data_file_length)
DBUG_PRINT("warning",("old info: key_file: %ld data_file: %ld",
(long) info->state->key_file_length,
(long) info->state->data_file_length));
#endif
info->state_save= info->s->state.state;
info->state= &info->state_save;
info->state->changed= 0;
info->append_insert_at_end= concurrent_insert;
DBUG_RETURN(0);
}
void _ma_update_status(void* param)
{
MARIA_HA *info=(MARIA_HA*) param;
/*
Because someone may have closed the table we point at, we only
update the state if its our own state. This isn't a problem as
we are always pointing at our own lock or at a read lock.
(This is enforced by thr_multi_lock.c)
*/
if (info->state == &info->state_save)
{
MARIA_SHARE *share= info->s;
#ifndef DBUG_OFF
DBUG_PRINT("info",("updating status: key_file: %ld data_file: %ld",
(long) info->state->key_file_length,
(long) info->state->data_file_length));
if (info->state->key_file_length < share->state.state.key_file_length ||
info->state->data_file_length < share->state.state.data_file_length)
DBUG_PRINT("warning",("old info: key_file: %ld data_file: %ld",
(long) share->state.state.key_file_length,
(long) share->state.state.data_file_length));
#endif
/*
we are going to modify the state without lock's log, this would break
recovery if done with a transactional table.
*/
DBUG_ASSERT(!info->s->base.born_transactional);
share->state.state= *info->state;
info->state= &share->state.state;
#ifdef HAVE_QUERY_CACHE
DBUG_PRINT("info", ("invalidator... '%s' (status update)",
info->s->data_file_name.str));
DBUG_ASSERT(info->s->chst_invalidator != NULL);
(*info->s->chst_invalidator)((const char *)info->s->data_file_name.str);
#endif
}
info->append_insert_at_end= 0;
}
/*
Same as ma_update_status() but take a lock in the table lock, to protect
against someone calling ma_get_status() from thr_lock() at the same time.
*/
void _ma_update_status_with_lock(MARIA_HA *info)
{
my_bool locked= 0;
if (info->state == &info->state_save)
{
locked= 1;
mysql_mutex_lock(&info->s->lock.mutex);
}
(*info->s->lock.update_status)(info->lock.status_param);
if (locked)
mysql_mutex_unlock(&info->s->lock.mutex);
}
void _ma_restore_status(void *param)
{
MARIA_HA *info= (MARIA_HA*) param;
info->state= &info->s->state.state;
info->append_insert_at_end= 0;
}
void _ma_copy_status(void* to, void *from)
{
((MARIA_HA*) to)->state= &((MARIA_HA*) from)->state_save;
}
my_bool _ma_reset_update_flag(void *param,
my_bool concurrent_insert __attribute__((unused)))
{
MARIA_HA *info=(MARIA_HA*) param;
info->state->changed= 0;
return 0;
}
my_bool _ma_start_trans(void* param)
{
MARIA_HA *info=(MARIA_HA*) param;
if (!info->s->lock_key_trees)
{
info->state= info->state_start;
*info->state= info->s->state.state;
}
return 0;
}
/**
@brief Check if should allow concurrent inserts
@implementation
Allow concurrent inserts if we don't have a hole in the table or
if there is no active write lock and there is active read locks and
maria_concurrent_insert == 2. In this last case the new
row('s) are inserted at end of file instead of filling up the hole.
The last case is to allow one to inserts into a heavily read-used table
even if there is holes.
@notes
If there is a an rtree indexes in the table, concurrent inserts are
disabled in maria_open()
@return
@retval 0 ok to use concurrent inserts
@retval 1 not ok
*/
my_bool _ma_check_status(void *param)
{
MARIA_HA *info=(MARIA_HA*) param;
/*
The test for w_locks == 1 is here because this thread has already done an
external lock (in other words: w_locks == 1 means no other threads has
a write lock)
*/
DBUG_PRINT("info",("dellink: %ld r_locks: %u w_locks: %u",
(long) info->s->state.dellink, (uint) info->s->r_locks,
(uint) info->s->w_locks));
return (my_bool) !(info->s->state.dellink == HA_OFFSET_ERROR ||
(maria_concurrent_insert == 2 && info->s->r_locks &&
info->s->w_locks == 1));
}
/**
@brief write hook at end of trans to store status for all used table
@Notes
This function must be called under trnman_lock in trnman_end_trn()
because of the following reasons:
- After trnman_end_trn() is called, the current transaction will be
regarded as committed and all used tables state_history will be
visible to other transactions. To do this, we loop over all used
tables and create/update a history entries that contains the correct
state_history for them.
*/
my_bool _ma_trnman_end_trans_hook(TRN *trn, my_bool commit,
my_bool active_transactions)
{
my_bool error= 0;
MARIA_USED_TABLES *tables, *next;
DBUG_ENTER("_ma_trnman_end_trans_hook");
DBUG_PRINT("enter", ("trn: %p used_tables: %p", trn, trn->used_tables));
for (tables= (MARIA_USED_TABLES*) trn->used_tables;
tables;
tables= next)
{
MARIA_SHARE *share= tables->share;
next= tables->next;
if (commit)
{
MARIA_STATE_HISTORY *history;
mysql_mutex_lock(&share->intern_lock);
/* We only have to update history state if something changed */
if (tables->state_current.changed)
{
if (tables->state_current.no_transid)
{
/*
The change was done without using transid on rows (like in
bulk insert). In this case this thread is the only one
that is using the table and all rows will be visible
for all transactions.
*/
_ma_reset_history(share);
}
else
{
if (active_transactions && share->now_transactional &&
trnman_exists_active_transactions(share->state_history->trid,
trn->commit_trid, 1))
{
/*
There exist transactions that are still using the current
share->state_history. Create a new history item for this
commit and add it first in the state_history list. This
ensures that all history items are stored in the list in
decresing trid order.
*/
if (!(history= my_malloc(PSI_INSTRUMENT_ME, sizeof(*history),
MYF(MY_WME))))
{
/* purecov: begin inspected */
error= 1;
mysql_mutex_unlock(&share->intern_lock);
my_free(tables);
continue;
/* purecov: end */
}
history->state= share->state_history->state;
history->next= share->state_history;
share->state_history= history;
}
else
{
/* Previous history can't be seen by anyone, reuse old memory */
history= share->state_history;
DBUG_PRINT("info", ("removing history->trid: %lu new: %lu",
(ulong) history->trid,
(ulong) trn->commit_trid));
}
history->state.records+= (tables->state_current.records -
tables->state_start.records);
history->state.checksum+= (tables->state_current.checksum -
tables->state_start.checksum);
history->trid= trn->commit_trid;
share->state.last_change_trn= trn->commit_trid;
if (history->next)
{
/* Remove not visible states */
share->state_history= _ma_remove_not_visible_states(history, 0, 1);
}
DBUG_PRINT("info", ("share: %p in_trans: %d",
share, share->in_trans));
}
}
/* The following calls frees &share->intern_lock */
decrement_share_in_trans(share);
}
else
{
/*
We need to keep share->in_trans correct because of the check
in free_maria_share()
*/
mysql_mutex_lock(&share->intern_lock);
decrement_share_in_trans(share);
}
my_free(tables);
}
trn->used_tables= 0;
trn->used_instances= 0;
DBUG_RETURN(error);
}
/**
Remove table from trnman_list
@notes
This is used when we unlock a table from a group of locked tables
just before doing a rename or drop table.
share->internal_lock must be locked when function is called
*/
void _ma_remove_table_from_trnman(MARIA_HA *info)
{
MARIA_SHARE *share= info->s;
TRN *trn= info->trn;
MARIA_USED_TABLES *tables, **prev;
DBUG_ENTER("_ma_remove_table_from_trnman");
DBUG_PRINT("enter", ("trn: %p used_tables: %p share: %p in_trans: %d",
trn, trn->used_tables, share, share->in_trans));
mysql_mutex_assert_owner(&share->intern_lock);
if (trn == &dummy_transaction_object)
DBUG_VOID_RETURN;
/* First remove share from used_tables */
for (prev= (MARIA_USED_TABLES**) (char*) &trn->used_tables;
(tables= *prev);
prev= &tables->next)
{
if (tables->share == share)
{
*prev= tables->next;
/*
We don't have to and can't call decrement_share_in_trans(share) here
as we know there is an active MARIA_HA handler around.
*/
share->in_trans--;
my_free(tables);
break;
}
}
if (!tables)
{
/*
This can only happens in case of rename of intermediate table as
part of alter table
*/
DBUG_PRINT("warning", ("share: %p where not in used_tables_list", share));
}
/* Reset trn and remove table from used_instances */
_ma_reset_trn_for_table(info);
DBUG_VOID_RETURN;
}
/****************************************************************************
The following functions are called by thr_lock() in threaded applications
for transactional tables.
****************************************************************************/
/*
Create a copy of the current status for the table
SYNOPSIS
_ma_get_status()
param Pointer to Aria handler
concurrent_insert Set to 1 if we are going to do concurrent inserts
(THR_WRITE_CONCURRENT_INSERT was used)
*/
my_bool _ma_block_get_status(void* param, my_bool concurrent_insert)
{
MARIA_HA *info=(MARIA_HA*) param;
DBUG_ENTER("_ma_block_get_status");
DBUG_PRINT("enter", ("concurrent_insert %d", concurrent_insert));
info->row_base_length= info->s->base_length;
info->row_flag= info->s->base.default_row_flag;
DBUG_ASSERT(!concurrent_insert ||
info->lock.type == TL_WRITE_CONCURRENT_INSERT);
if (concurrent_insert || !info->autocommit)
{
info->row_flag|= ROW_FLAG_TRANSID;
info->row_base_length+= TRANSID_SIZE;
}
else
{
DBUG_ASSERT(info->lock.type != TL_WRITE_CONCURRENT_INSERT);
}
DBUG_RETURN(0);
}
my_bool _ma_block_start_trans(void* param)
{
MARIA_HA *info=(MARIA_HA*) param;
DBUG_ENTER("_ma_block_start_trans");
if (info->s->lock_key_trees)
{
/*
Assume for now that this doesn't fail (It can only fail in
out of memory conditions)
TODO: Fix this by having one extra state pre-allocated
*/
DBUG_RETURN(_ma_setup_live_state(info));
}
else
{
/*
We come here in the following cases:
- The table is a temporary table
- It's a table which is crash safe but not yet versioned, for
example a table with fulltext or rtree keys
Set the current state to point to save_state so that the
block_format code don't count the same record twice.
Copy also the current state. This may have been wrong if the
same file was used several times in the last statement
*/
info->state= info->state_start;
*info->state= info->s->state.state;
}
/*
Info->trn is set if this table is already handled and we are
called from maria_versioning()
*/
if (info->s->base.born_transactional && !info->trn)
{
/*
Assume for now that this doesn't fail (It can only fail in
out of memory conditions)
*/
DBUG_RETURN(maria_create_trn_hook(info) != 0);
}
DBUG_RETURN(0);
}
void _ma_block_update_status(void *param __attribute__((unused)))
{
}
void _ma_block_restore_status(void *param __attribute__((unused)))
{
}
/**
Check if should allow concurrent inserts
@return
@retval 0 ok to use concurrent inserts
@retval 1 not ok
*/
my_bool _ma_block_check_status(void *param __attribute__((unused)))
{
return (my_bool) 0;
}
/* Get status when transactional but not versioned */
my_bool _ma_block_start_trans_no_versioning(void* param)
{
MARIA_HA *info=(MARIA_HA*) param;
DBUG_ENTER("_ma_block_start_trans_no_versioning");
DBUG_ASSERT(info->s->base.born_transactional && !info->s->lock_key_trees);
info->state->changed= 0; /* from _ma_reset_update_flag() */
info->state= info->state_start;
*info->state= info->s->state.state;
if (!info->trn)
{
/*
Assume for now that this doesn't fail (It can only fail in
out of memory conditions)
*/
DBUG_RETURN(maria_create_trn_hook(info));
}
DBUG_RETURN(0);
}
/**
Enable/disable versioning
*/
void maria_versioning(MARIA_HA *info, my_bool versioning)
{
MARIA_SHARE *share= info->s;
DBUG_ENTER("maria_versioning");
/* For now, this is a hack */
if (share->have_versioning)
{
enum thr_lock_type save_lock_type;
share->lock_key_trees= versioning;
/* Set up info->lock.type temporary for _ma_block_get_status() */
save_lock_type= info->lock.type;
info->lock.type= versioning ? TL_WRITE_CONCURRENT_INSERT : TL_WRITE;
_ma_block_get_status((void*) info, versioning);
info->lock.type= save_lock_type;
if (versioning)
info->state= &share->state.common;
else
info->state= &share->state.state; /* Change global values by default */
info->state_start= info->state; /* Initial values */
}
DBUG_VOID_RETURN;
}
/**
Update data_file_length to new length
NOTES
Only used by block records
*/
void _ma_set_share_data_file_length(MARIA_SHARE *share, ulonglong new_length)
{
if (!share->internal_table)
mysql_mutex_lock(&share->intern_lock);
if (share->state.state.data_file_length < new_length)
{
share->state.state.data_file_length= new_length;
if (new_length >= share->base.max_data_file_length)
{
/* Give an error on next insert */
share->state.changed|= STATE_DATA_FILE_FULL;
}
}
if (!share->internal_table)
mysql_mutex_unlock(&share->intern_lock);
}
/**
Copy state information that where updated while the table was used
in not transactional mode
*/
void _ma_copy_nontrans_state_information(MARIA_HA *info)
{
info->s->state.state.records= info->state->records;
info->s->state.state.checksum= info->state->checksum;
}
/**
Reset history
This is only called during repair when we are the only one using the table.
*/
void _ma_reset_history(MARIA_SHARE *share)
{
MARIA_STATE_HISTORY *history, *next;
DBUG_ENTER("_ma_reset_history");
share->state_history->trid= 0; /* Visibly by all */
share->state_history->state= share->state.state;
history= share->state_history->next;
share->state_history->next= 0;
for (; history; history= next)
{
next= history->next;
my_free(history);
}
DBUG_VOID_RETURN;
}
/****************************************************************************
Virtual functions to check if row is visible
****************************************************************************/
/**
Row is always visible
This is for tables without concurrent insert
*/
my_bool _ma_row_visible_always(MARIA_HA *info __attribute__((unused)))
{
return 1;
}
/**
Row visibility for non transactional tables with concurrent insert
@implementation
When we got our table lock, we saved the current
data_file_length. Concurrent inserts always go to the end of the
file. So we can test if the found key references a new record.
*/
my_bool _ma_row_visible_non_transactional_table(MARIA_HA *info)
{
return info->cur_row.lastpos < info->state->data_file_length;
}
/**
Row visibility for transactional tables with versioning
@TODO
Add test if found key was marked deleted and it was deleted by
us. In that case we should return 0
*/
my_bool _ma_row_visible_transactional_table(MARIA_HA *info)
{
return trnman_can_read_from(info->trn, info->cur_row.trid);
}