2007-01-18 20:38:14 +01:00
|
|
|
/* Copyright (C) 2007 Michael Widenius
|
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
|
|
it under the terms of the GNU General Public License as published by
|
2007-03-02 11:20:23 +01:00
|
|
|
the Free Software Foundation; version 2 of the License.
|
2007-01-18 20:38:14 +01:00
|
|
|
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
GNU General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
|
|
along with this program; if not, write to the Free Software
|
|
|
|
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
|
|
|
|
|
|
|
|
/*
|
|
|
|
Bitmap handling (for records in block)
|
|
|
|
|
|
|
|
The data file starts with a bitmap page, followed by as many data
|
|
|
|
pages as the bitmap can cover. After this there is a new bitmap page
|
|
|
|
and more data pages etc.
|
|
|
|
|
|
|
|
The bitmap code assumes there is always an active bitmap page and thus
|
|
|
|
that there is at least one bitmap page in the file
|
|
|
|
|
|
|
|
Structure of bitmap page:
|
|
|
|
|
|
|
|
Fixed size records (to be implemented later):
|
|
|
|
|
|
|
|
2 bits are used to indicate:
|
|
|
|
|
|
|
|
0 Empty
|
2007-04-19 12:18:56 +02:00
|
|
|
1 0-75 % full (at least room for 2 records)
|
2007-01-18 20:38:14 +01:00
|
|
|
2 75-100 % full (at least room for one record)
|
|
|
|
3 100 % full (no more room for records)
|
|
|
|
|
|
|
|
Assuming 8K pages, this will allow us to map:
|
|
|
|
8192 (bytes per page) * 4 (pages mapped per byte) * 8192 (page size)= 256M
|
|
|
|
|
|
|
|
(For Maria this will be 7*4 * 8192 = 224K smaller because of LSN)
|
|
|
|
|
|
|
|
Note that for fixed size rows, we can't add more columns without doing
|
|
|
|
a full reorganization of the table. The user can always force a dynamic
|
|
|
|
size row format by specifying ROW_FORMAT=dynamic.
|
|
|
|
|
|
|
|
|
|
|
|
Dynamic size records:
|
|
|
|
|
2008-01-07 17:54:41 +01:00
|
|
|
3 bits are used to indicate Bytes free in 8K page
|
2007-01-18 20:38:14 +01:00
|
|
|
|
2008-01-07 17:54:41 +01:00
|
|
|
0 Empty page 8176 (head or tail)
|
|
|
|
1 0-30 % full (at least room for 3 records) 5724
|
|
|
|
2 30-60 % full (at least room for 2 records) 3271
|
|
|
|
3 60-90 % full (at least room for one record) 818
|
|
|
|
4 100 % full (no more room for records) 0
|
|
|
|
5 Tail page, 0-40 % full 4906
|
|
|
|
6 Tail page, 40-80 % full 1636
|
|
|
|
7 Full tail page or full blob page 0
|
2007-01-18 20:38:14 +01:00
|
|
|
|
|
|
|
Assuming 8K pages, this will allow us to map:
|
|
|
|
8192 (bytes per page) * 8 bits/byte / 3 bits/page * 8192 (page size)= 170.7M
|
|
|
|
|
|
|
|
Note that values 1-3 may be adjust for each individual table based on
|
|
|
|
'min record length'. Tail pages are for overflow data which can be of
|
|
|
|
any size and thus doesn't have to be adjusted for different tables.
|
|
|
|
If we add more columns to the table, some of the originally calculated
|
|
|
|
'cut off' points may not be optimal, but they shouldn't be 'drasticly
|
|
|
|
wrong'.
|
|
|
|
|
|
|
|
When allocating data from the bitmap, we are trying to do it in a
|
|
|
|
'best fit' manner. Blobs and varchar blocks are given out in large
|
|
|
|
continuous extents to allow fast access to these. Before allowing a
|
|
|
|
row to 'flow over' to other blocks, we will compact the page and use
|
|
|
|
all space on it. If there is many rows in the page, we will ensure
|
|
|
|
there is *LEFT_TO_GROW_ON_SPLIT* bytes left on the page to allow other
|
|
|
|
rows to grow.
|
|
|
|
|
|
|
|
The bitmap format allows us to extend the row file in big chunks, if needed.
|
|
|
|
|
|
|
|
When calculating the size for a packed row, we will calculate the following
|
|
|
|
things separately:
|
|
|
|
- Row header + null_bits + empty_bits fixed size segments etc.
|
|
|
|
- Size of all char/varchar fields
|
|
|
|
- Size of each blob field
|
|
|
|
|
|
|
|
The bitmap handler will get all the above information and return
|
2007-10-19 23:24:22 +02:00
|
|
|
either one page or a set of pages to put the different parts.
|
2007-01-18 20:38:14 +01:00
|
|
|
|
|
|
|
Bitmaps are read on demand in response to insert/delete/update operations.
|
|
|
|
The following bitmap pointers will be cached and stored on disk on close:
|
|
|
|
- Current insert_bitmap; When inserting new data we will first try to
|
2007-04-19 12:18:56 +02:00
|
|
|
fill this one.
|
2007-01-18 20:38:14 +01:00
|
|
|
- First bitmap which is not completely full. This is updated when we
|
2007-04-19 12:18:56 +02:00
|
|
|
free data with an update or delete.
|
2007-01-18 20:38:14 +01:00
|
|
|
|
|
|
|
While flushing out bitmaps, we will cache the status of the bitmap in memory
|
|
|
|
to avoid having to read a bitmap for insert of new data that will not
|
|
|
|
be of any use
|
|
|
|
- Total empty space
|
|
|
|
- Largest number of continuous pages
|
|
|
|
|
|
|
|
Bitmap ONLY goes to disk in the following scenarios
|
|
|
|
- The file is closed (and we flush all changes to disk)
|
|
|
|
- On checkpoint
|
|
|
|
(Ie: When we do a checkpoint, we have to ensure that all bitmaps are
|
|
|
|
put on disk even if they are not in the page cache).
|
2011-01-24 14:19:40 +01:00
|
|
|
- When explicitely requested (for example on backup or after recovery,
|
2007-01-18 20:38:14 +01:00
|
|
|
to simplify things)
|
2007-07-01 15:20:57 +02:00
|
|
|
|
|
|
|
The flow of writing a row is that:
|
2011-01-24 14:19:40 +01:00
|
|
|
- Mark the bitmap not flushable (_ma_bitmap_flushable(X, 1))
|
2007-07-01 15:20:57 +02:00
|
|
|
- Lock the bitmap
|
|
|
|
- Decide which data pages we will write to
|
|
|
|
- Mark them full in the bitmap page so that other threads do not try to
|
|
|
|
use the same data pages as us
|
|
|
|
- We unlock the bitmap
|
|
|
|
- Write the data pages
|
|
|
|
- Lock the bitmap
|
|
|
|
- Correct the bitmap page with the true final occupation of the data
|
|
|
|
pages (that is, we marked pages full but when we are done we realize
|
|
|
|
we didn't fill them)
|
|
|
|
- Unlock the bitmap.
|
2011-01-24 14:19:40 +01:00
|
|
|
- Mark the bitmap flushable (_ma_bitmap_flushable(X, -1))
|
2007-01-18 20:38:14 +01:00
|
|
|
*/
|
|
|
|
|
|
|
|
#include "maria_def.h"
|
|
|
|
#include "ma_blockrec.h"
|
|
|
|
|
|
|
|
#define FULL_HEAD_PAGE 4
|
|
|
|
#define FULL_TAIL_PAGE 7
|
|
|
|
|
2011-01-24 14:19:40 +01:00
|
|
|
const char *bits_to_txt[]=
|
|
|
|
{
|
|
|
|
"empty", "00-30% full", "30-60% full", "60-90% full", "full",
|
|
|
|
"tail 00-40 % full", "tail 40-80 % full", "tail/blob full"
|
|
|
|
};
|
|
|
|
|
WL#3072 - Maria recovery.
* fix for bitmap vs checkpoint bug which could lead to corrupted
tables in case of crashes at certain moments: a bitmap could be flushed
to disk even though it was inconsistent with the log (it could be
flushed before REDO-UNDO are written to the log). One bug remains, need
code from others. Tests added. Fix is to pin unflushable bitmap pages,
and let checkpoint wait for them to be flushable.
* fix for long_trid!=0 assertion failure at Recovery.
* less useless wakeups in the background flush|checkpoint thread.
* store global_trid_generator in checkpoint record.
mysql-test/r/maria-recovery.result:
result update
mysql-test/t/maria-recovery.test:
make it easier to locate subtests
storage/maria/ma_bitmap.c:
When we send a bitmap to the pagecache, if this bitmap is not in a
flushable state we keep it pinned and add it to a list, it will be
unpinned when the bitmap is flushable again.
A new function _ma_bitmap_flush_all() used by checkpoint.
A new function _ma_bitmap_flushable() used by block format to signal
when it starts modifying a bitmap and when it is done with it.
storage/maria/ma_blockrec.c:
When starting a row operation (insert/update/delete), mark that
the bitmap is not flushable (because for example INSERT is going
to over-allocate in the bitmap to prevent other threads from using
our data pages). If a checkpoint comes at this moment it will wait
for the bitmap to be flushable before flushing it.
When the operation ends, bitmap becomes flushable again; that
transition is done under the bitmap's mutex (needed for correct
synchro with a concurrent checkpoint); but for INSERT/UPDATE this
happens inside _ma_bitmap_release_unused() at a place where it already
has the mutex, so the only penalty (mutex adding) is in DELETE and UNDO
of INSERT. In case of errors after setting the bitmap unflushable,
we must always set it back to flushable or checkpoint would block.
Debug possibilities to force a sleep while the bitmap is over-allocated.
In case of error in get_head_or_tail() in allocate_and_write_block_record(),
we still need to unpin all pages.
Bugfix: _ma_apply_redo_insert_row_blobs() produced wrong
data_file_length.
storage/maria/ma_blockrec.h:
new bitmap calls.
storage/maria/ma_checkpoint.c:
filter_flush_indirect not needed anymore (flushing bitmap
pages happens in _ma_bitmap_flush_all() now). So
st_filter_param::is_data_file|pages_covered_by_bitmap not needed.
Other filter_flush* don't need to flush bitmap anymore.
Add debug possibility to flush all bitmap pages outside of a checkpoint,
to simulate pagecache LRU eviction.
When the background flush/checkpoint thread notices it has nothing
to flush, it now sleeps directly until the next potential checkpoint
moment instead of waking up every second.
When in checkpoint we decide to not store a table in the checkpoint record
(because it has logged no writes for example), we can also skip flushing
this table.
storage/maria/ma_commit.c:
comment is out-of-date
storage/maria/ma_key_recover.c:
comment fix
storage/maria/ma_loghandler.c:
comment is out-of-date
storage/maria/ma_open.c:
comment is out-of-date
storage/maria/ma_pagecache.c:
comment for bug to fix. And we don't take checkpoints at end of REDO
phase yet so can trust block->type.
storage/maria/ma_recovery.c:
Comments. Now-unneeded code for incomplete REDO-UNDO groups removed.
When we forget about an old transaction we must really forget
about it with bzero() (fixes the "long_trid!=0 assertion" recovery
bug). When we delete a row with maria_delete() we turn on
STATE_NOT_OPTIMIZED_ROWS so we do the same when we see a CLR_END
for an UNDO_ROW_INSERT or when we execute an UNDO_ROW_INSERT (in both
cases a row was deleted). Pick up max_long_trid from the checkpoint record.
storage/maria/maria_chk.c:
comment
storage/maria/maria_def.h:
MARIA_FILE_BITMAP gets new members: 'flushable', 'bitmap_cond' and
'pinned_pages'.
storage/maria/trnman.c:
I used to think that recovery only needs to know the maximum TrID
of the lists of active and committed transactions. But no, sometimes
both lists can even be empty and their TrID should not be reused.
So Checkpoint now saves global_trid_generator in the checkpoint record.
storage/maria/trnman_public.h:
macros to read/store a TrID
mysql-test/r/maria-recovery-bitmap.result:
result is ok. Without the code fix, we would get a corruption message
about the bitmap page in CHECK TABLE EXTENDED.
mysql-test/t/maria-recovery-bitmap-master.opt:
usual when we crash mysqld in tests
mysql-test/t/maria-recovery-bitmap.test:
test of recovery problems specific of the bitmap pages.
2007-12-14 16:14:12 +01:00
|
|
|
/*#define WRONG_BITMAP_FLUSH 1*/ /*define only for provoking bugs*/
|
|
|
|
#undef WRONG_BITMAP_FLUSH
|
2007-04-19 12:18:56 +02:00
|
|
|
|
WL#3072 - Maria Recovery
Bulk insert: don't log REDO/UNDO for rows, log one UNDO which will
truncate files; this is an optimization and a bugfix (table was left
half-repaired by crash).
Repair: mark table crashed-on-repair at start, bump skip_redo_lsn at start,
this is easier for recovery (tells it to skip old REDOs or even UNDO
phase) and user (tells it to repair) in case of crash, sync files
in the end.
Recovery skips missing or corrupted table and moves to next record
(in REDO or UNDO phase) to be more robust; warns if happens in UNDO phase.
Bugfix for UNDO_KEY_DELETE_WITH_ROOT (tested in ma_test_recovery)
and maria_enable_indexes().
Create missing bitmaps when needed (there can be more than one to create,
in rare cases), log a record for this.
include/myisamchk.h:
new flag: bulk insert repair mustn't bump create_rename_lsn
mysql-test/lib/mtr_report.pl:
skip normal warning in maria-recovery.test
mysql-test/r/maria-recovery.result:
result: crash before bulk insert is committed, causes proper rollback,
and crash right after OPTIMIZE replaces index file with new index file
leads to table marked corrupted and recovery not failing.
mysql-test/t/maria-recovery.test:
- can't check the table or it would commit the transaction,
but check is made after recovery.
- test of crash before bulk-insert-with-repair is committed
(to see if it is rolled back), and of crash after OPTIMIZE has replaced
index file but not finished all operations (to see if recovery fails -
it used to assert when trying to execute an old REDO on the new
index).
storage/maria/CMakeLists.txt:
new file
storage/maria/Makefile.am:
new file
storage/maria/ha_maria.cc:
- If bulk insert on a transactional table using an index repair:
table is initially empty, so don't log REDO/UNDO for data rows
(optimization), just log an UNDO_BULK_INSERT_WITH_REPAIR
which will, if executed, empty the data and index file. Re-enable
logging in end_bulk_insert().
- write log record for repair operation only after it's fully done,
index sort including (maria_repair*() used to write the log record).
- Adding back file->trn=NULL which was removed by mistake earlier.
storage/maria/ha_maria.h:
new member (see ha_maria.cc)
storage/maria/ma_bitmap.c:
Functions to create missing bitmaps:
- one function which creates missing bitmaps in page cache, except
the missing one with max offset which it does not put into page cache
as it will be modified very soon.
- one function which the one above calls, and creates bitmaps in page
cache
- one function to execute REDO_BITMAP_NEW_PAGE which uses the second
one above.
storage/maria/ma_blockrec.c:
- when logging REDO_DELETE_ALL, not only 'records' and 'checksum'
has to be reset under log's mutex.
- execution of REDO_INSERT_ROW_BLOBS now checks the dirty pages' list
- execution of UNDO_BULK_INSERT_WITH_REPAIR
storage/maria/ma_blockrec.h:
new functions
storage/maria/ma_check.c:
- table-flush-before-repair is moved to a separate function reused
by maria_sort_index(); syncing is added
- maria_repair() is allowed to re-enable logging only if it is the one
which disabled it.
- "_ma_flush_table_files_after_repair" was a bad name, it's not after
repair now, and it should not sync as we do more changes to the files
shortly after (sync is postponed to when writing the log record)
- REDO_REPAIR record should be written only after all repair
operations (in particular after sorting index in ha_mara::repair())
- close to the end of repair by sort, flushing of pages must happen
also in the non-quick case, to prepare for the sync at end.
- in parallel repair, some page flushes are not needed as done
by initialize_variables_for_repair().
storage/maria/ma_create.c:
Update skip_redo_lsn, create_rename_lsn optionally.
storage/maria/ma_delete_all.c:
Need to sync files at end of maria_delete_all_rows(), if transactional.
storage/maria/ma_extra.c:
During repair, we sometimes call _ma_flush_table_files() (via
_ma_flush_table_files_before_swap()) while there is a WRITE_CACHE.
storage/maria/ma_key_recover.c:
- when we see CLR_END for UNDO_BULK_INSERT_WITH_REPAIR, re-enable
indices.
- fixing bug: _ma_apply_undo_key_delete() parsed UNDO_KEY_DELETE_WITH_ROOT
wrongly, leading to recovery failure
storage/maria/ma_key_recover.h:
new prototype
storage/maria/ma_locking.c:
DBUG_VOID_RETURN missing
storage/maria/ma_loghandler.c:
UNDO for bulk insert with repair, and REDO for creating bitmaps.
LOGREC_FIRST_FREE to not have to change the for() every time we
add a new record type.
storage/maria/ma_loghandler.h:
new UNDO and REDO
storage/maria/ma_open.c:
Move share.kfile.file=kfile up a bit, so that _ma_update_state_lsns()
can get its value, this fixes a bug where LSN_REPAIRED_BY_MARIA_CHK
was not corrected on disk by maria_open().
Store skip_redo_lsn in index' header.
maria_enable_indexes() had a bug for BLOCK_RECORD, where an empty
file has one page, not 0 bytes.
storage/maria/ma_recovery.c:
- Skip a corrupted, missing, or repaired-with-maria_chk, table in
recovery: don't fail, just go to next REDO or UNDO; but if an UNDO
is skipped in UNDO phase we issue warnings.
- Skip REDO|UNDO in REDO phase if <skip_redo_lsn.
- If UNDO phase fails, delete transactions to not make trnman
assert.
- Update skip_redo_lsn when playing REDO_CREATE_TABLE
- Don't record UNDOs for old transactions which we don't know (long_trid==0)
- Bugfix for UNDO_KEY_DELETE_WITH_ROOT (see ma_key_recover.c)
- Execution of UNDO_BULK_INSERT_WITH_REPAIR
- Don't try to find a page number in REDO_DELETE_ALL
- Pieces moved to ma_recovery_util.c
storage/maria/ma_rename.c:
name change
storage/maria/ma_static.c:
I modified layout of the index' header (inserted skip_redo_lsn in its middle)
storage/maria/ma_test2.c:
allow breaking the test towards the end, tests execution of
UNDO_KEY_DELETE_WITH_ROOT
storage/maria/ma_test_recovery.expected:
6 as testflag instead of 4
storage/maria/ma_test_recovery:
Increase the amount of rollback work to do when testing recovery
with ma_test2; this reproduces the UNDO_KEY_DELETE_WITH_ROOT bug.
storage/maria/maria_chk.c:
skip_redo_lsn should be updated too, for consistency.
Write a REDO_REPAIR after all operations (including sort-records)
have been done.
No reason to flush blocks after maria_chk_data_link() and
maria_sort_records(), there is maria_close() in the end.
write_log_record() is a function, to not clutter maria_chk().
storage/maria/maria_def.h:
New member skip_redo_lsn in the state, and comments
storage/maria/maria_pack.c:
skip_redo_lsn should be updated too, for consistency
storage/maria/ma_recovery_util.c:
_ma_redo_not_needed_for_page(), defined in ma_recovery.c, is needed
by ma_blockrec.c; this causes link issues, resolved by putting
_ma_redo_not_needed_for_page() into a new file (so that it is not
in the same file as repair-related objects of ma_recovery.c).
storage/maria/ma_recovery_util.h:
new file
2008-01-17 23:59:32 +01:00
|
|
|
static my_bool _ma_read_bitmap_page(MARIA_HA *info,
|
2007-04-19 12:18:56 +02:00
|
|
|
MARIA_FILE_BITMAP *bitmap,
|
2008-01-10 20:21:36 +01:00
|
|
|
pgcache_page_no_t page);
|
WL#3072 - Maria Recovery
Bulk insert: don't log REDO/UNDO for rows, log one UNDO which will
truncate files; this is an optimization and a bugfix (table was left
half-repaired by crash).
Repair: mark table crashed-on-repair at start, bump skip_redo_lsn at start,
this is easier for recovery (tells it to skip old REDOs or even UNDO
phase) and user (tells it to repair) in case of crash, sync files
in the end.
Recovery skips missing or corrupted table and moves to next record
(in REDO or UNDO phase) to be more robust; warns if happens in UNDO phase.
Bugfix for UNDO_KEY_DELETE_WITH_ROOT (tested in ma_test_recovery)
and maria_enable_indexes().
Create missing bitmaps when needed (there can be more than one to create,
in rare cases), log a record for this.
include/myisamchk.h:
new flag: bulk insert repair mustn't bump create_rename_lsn
mysql-test/lib/mtr_report.pl:
skip normal warning in maria-recovery.test
mysql-test/r/maria-recovery.result:
result: crash before bulk insert is committed, causes proper rollback,
and crash right after OPTIMIZE replaces index file with new index file
leads to table marked corrupted and recovery not failing.
mysql-test/t/maria-recovery.test:
- can't check the table or it would commit the transaction,
but check is made after recovery.
- test of crash before bulk-insert-with-repair is committed
(to see if it is rolled back), and of crash after OPTIMIZE has replaced
index file but not finished all operations (to see if recovery fails -
it used to assert when trying to execute an old REDO on the new
index).
storage/maria/CMakeLists.txt:
new file
storage/maria/Makefile.am:
new file
storage/maria/ha_maria.cc:
- If bulk insert on a transactional table using an index repair:
table is initially empty, so don't log REDO/UNDO for data rows
(optimization), just log an UNDO_BULK_INSERT_WITH_REPAIR
which will, if executed, empty the data and index file. Re-enable
logging in end_bulk_insert().
- write log record for repair operation only after it's fully done,
index sort including (maria_repair*() used to write the log record).
- Adding back file->trn=NULL which was removed by mistake earlier.
storage/maria/ha_maria.h:
new member (see ha_maria.cc)
storage/maria/ma_bitmap.c:
Functions to create missing bitmaps:
- one function which creates missing bitmaps in page cache, except
the missing one with max offset which it does not put into page cache
as it will be modified very soon.
- one function which the one above calls, and creates bitmaps in page
cache
- one function to execute REDO_BITMAP_NEW_PAGE which uses the second
one above.
storage/maria/ma_blockrec.c:
- when logging REDO_DELETE_ALL, not only 'records' and 'checksum'
has to be reset under log's mutex.
- execution of REDO_INSERT_ROW_BLOBS now checks the dirty pages' list
- execution of UNDO_BULK_INSERT_WITH_REPAIR
storage/maria/ma_blockrec.h:
new functions
storage/maria/ma_check.c:
- table-flush-before-repair is moved to a separate function reused
by maria_sort_index(); syncing is added
- maria_repair() is allowed to re-enable logging only if it is the one
which disabled it.
- "_ma_flush_table_files_after_repair" was a bad name, it's not after
repair now, and it should not sync as we do more changes to the files
shortly after (sync is postponed to when writing the log record)
- REDO_REPAIR record should be written only after all repair
operations (in particular after sorting index in ha_mara::repair())
- close to the end of repair by sort, flushing of pages must happen
also in the non-quick case, to prepare for the sync at end.
- in parallel repair, some page flushes are not needed as done
by initialize_variables_for_repair().
storage/maria/ma_create.c:
Update skip_redo_lsn, create_rename_lsn optionally.
storage/maria/ma_delete_all.c:
Need to sync files at end of maria_delete_all_rows(), if transactional.
storage/maria/ma_extra.c:
During repair, we sometimes call _ma_flush_table_files() (via
_ma_flush_table_files_before_swap()) while there is a WRITE_CACHE.
storage/maria/ma_key_recover.c:
- when we see CLR_END for UNDO_BULK_INSERT_WITH_REPAIR, re-enable
indices.
- fixing bug: _ma_apply_undo_key_delete() parsed UNDO_KEY_DELETE_WITH_ROOT
wrongly, leading to recovery failure
storage/maria/ma_key_recover.h:
new prototype
storage/maria/ma_locking.c:
DBUG_VOID_RETURN missing
storage/maria/ma_loghandler.c:
UNDO for bulk insert with repair, and REDO for creating bitmaps.
LOGREC_FIRST_FREE to not have to change the for() every time we
add a new record type.
storage/maria/ma_loghandler.h:
new UNDO and REDO
storage/maria/ma_open.c:
Move share.kfile.file=kfile up a bit, so that _ma_update_state_lsns()
can get its value, this fixes a bug where LSN_REPAIRED_BY_MARIA_CHK
was not corrected on disk by maria_open().
Store skip_redo_lsn in index' header.
maria_enable_indexes() had a bug for BLOCK_RECORD, where an empty
file has one page, not 0 bytes.
storage/maria/ma_recovery.c:
- Skip a corrupted, missing, or repaired-with-maria_chk, table in
recovery: don't fail, just go to next REDO or UNDO; but if an UNDO
is skipped in UNDO phase we issue warnings.
- Skip REDO|UNDO in REDO phase if <skip_redo_lsn.
- If UNDO phase fails, delete transactions to not make trnman
assert.
- Update skip_redo_lsn when playing REDO_CREATE_TABLE
- Don't record UNDOs for old transactions which we don't know (long_trid==0)
- Bugfix for UNDO_KEY_DELETE_WITH_ROOT (see ma_key_recover.c)
- Execution of UNDO_BULK_INSERT_WITH_REPAIR
- Don't try to find a page number in REDO_DELETE_ALL
- Pieces moved to ma_recovery_util.c
storage/maria/ma_rename.c:
name change
storage/maria/ma_static.c:
I modified layout of the index' header (inserted skip_redo_lsn in its middle)
storage/maria/ma_test2.c:
allow breaking the test towards the end, tests execution of
UNDO_KEY_DELETE_WITH_ROOT
storage/maria/ma_test_recovery.expected:
6 as testflag instead of 4
storage/maria/ma_test_recovery:
Increase the amount of rollback work to do when testing recovery
with ma_test2; this reproduces the UNDO_KEY_DELETE_WITH_ROOT bug.
storage/maria/maria_chk.c:
skip_redo_lsn should be updated too, for consistency.
Write a REDO_REPAIR after all operations (including sort-records)
have been done.
No reason to flush blocks after maria_chk_data_link() and
maria_sort_records(), there is maria_close() in the end.
write_log_record() is a function, to not clutter maria_chk().
storage/maria/maria_def.h:
New member skip_redo_lsn in the state, and comments
storage/maria/maria_pack.c:
skip_redo_lsn should be updated too, for consistency
storage/maria/ma_recovery_util.c:
_ma_redo_not_needed_for_page(), defined in ma_recovery.c, is needed
by ma_blockrec.c; this causes link issues, resolved by putting
_ma_redo_not_needed_for_page() into a new file (so that it is not
in the same file as repair-related objects of ma_recovery.c).
storage/maria/ma_recovery_util.h:
new file
2008-01-17 23:59:32 +01:00
|
|
|
static my_bool _ma_bitmap_create_missing(MARIA_HA *info,
|
|
|
|
MARIA_FILE_BITMAP *bitmap,
|
|
|
|
pgcache_page_no_t page);
|
2011-02-10 19:33:51 +01:00
|
|
|
static void _ma_bitmap_unpin_all(MARIA_SHARE *share);
|
|
|
|
|
2007-04-19 12:18:56 +02:00
|
|
|
|
|
|
|
/* Write bitmap page to key cache */
|
|
|
|
|
2007-01-18 20:38:14 +01:00
|
|
|
static inline my_bool write_changed_bitmap(MARIA_SHARE *share,
|
|
|
|
MARIA_FILE_BITMAP *bitmap)
|
|
|
|
{
|
2011-02-10 21:04:29 +01:00
|
|
|
my_bool res;
|
WL#3072 - Maria recovery.
* fix for bitmap vs checkpoint bug which could lead to corrupted
tables in case of crashes at certain moments: a bitmap could be flushed
to disk even though it was inconsistent with the log (it could be
flushed before REDO-UNDO are written to the log). One bug remains, need
code from others. Tests added. Fix is to pin unflushable bitmap pages,
and let checkpoint wait for them to be flushable.
* fix for long_trid!=0 assertion failure at Recovery.
* less useless wakeups in the background flush|checkpoint thread.
* store global_trid_generator in checkpoint record.
mysql-test/r/maria-recovery.result:
result update
mysql-test/t/maria-recovery.test:
make it easier to locate subtests
storage/maria/ma_bitmap.c:
When we send a bitmap to the pagecache, if this bitmap is not in a
flushable state we keep it pinned and add it to a list, it will be
unpinned when the bitmap is flushable again.
A new function _ma_bitmap_flush_all() used by checkpoint.
A new function _ma_bitmap_flushable() used by block format to signal
when it starts modifying a bitmap and when it is done with it.
storage/maria/ma_blockrec.c:
When starting a row operation (insert/update/delete), mark that
the bitmap is not flushable (because for example INSERT is going
to over-allocate in the bitmap to prevent other threads from using
our data pages). If a checkpoint comes at this moment it will wait
for the bitmap to be flushable before flushing it.
When the operation ends, bitmap becomes flushable again; that
transition is done under the bitmap's mutex (needed for correct
synchro with a concurrent checkpoint); but for INSERT/UPDATE this
happens inside _ma_bitmap_release_unused() at a place where it already
has the mutex, so the only penalty (mutex adding) is in DELETE and UNDO
of INSERT. In case of errors after setting the bitmap unflushable,
we must always set it back to flushable or checkpoint would block.
Debug possibilities to force a sleep while the bitmap is over-allocated.
In case of error in get_head_or_tail() in allocate_and_write_block_record(),
we still need to unpin all pages.
Bugfix: _ma_apply_redo_insert_row_blobs() produced wrong
data_file_length.
storage/maria/ma_blockrec.h:
new bitmap calls.
storage/maria/ma_checkpoint.c:
filter_flush_indirect not needed anymore (flushing bitmap
pages happens in _ma_bitmap_flush_all() now). So
st_filter_param::is_data_file|pages_covered_by_bitmap not needed.
Other filter_flush* don't need to flush bitmap anymore.
Add debug possibility to flush all bitmap pages outside of a checkpoint,
to simulate pagecache LRU eviction.
When the background flush/checkpoint thread notices it has nothing
to flush, it now sleeps directly until the next potential checkpoint
moment instead of waking up every second.
When in checkpoint we decide to not store a table in the checkpoint record
(because it has logged no writes for example), we can also skip flushing
this table.
storage/maria/ma_commit.c:
comment is out-of-date
storage/maria/ma_key_recover.c:
comment fix
storage/maria/ma_loghandler.c:
comment is out-of-date
storage/maria/ma_open.c:
comment is out-of-date
storage/maria/ma_pagecache.c:
comment for bug to fix. And we don't take checkpoints at end of REDO
phase yet so can trust block->type.
storage/maria/ma_recovery.c:
Comments. Now-unneeded code for incomplete REDO-UNDO groups removed.
When we forget about an old transaction we must really forget
about it with bzero() (fixes the "long_trid!=0 assertion" recovery
bug). When we delete a row with maria_delete() we turn on
STATE_NOT_OPTIMIZED_ROWS so we do the same when we see a CLR_END
for an UNDO_ROW_INSERT or when we execute an UNDO_ROW_INSERT (in both
cases a row was deleted). Pick up max_long_trid from the checkpoint record.
storage/maria/maria_chk.c:
comment
storage/maria/maria_def.h:
MARIA_FILE_BITMAP gets new members: 'flushable', 'bitmap_cond' and
'pinned_pages'.
storage/maria/trnman.c:
I used to think that recovery only needs to know the maximum TrID
of the lists of active and committed transactions. But no, sometimes
both lists can even be empty and their TrID should not be reused.
So Checkpoint now saves global_trid_generator in the checkpoint record.
storage/maria/trnman_public.h:
macros to read/store a TrID
mysql-test/r/maria-recovery-bitmap.result:
result is ok. Without the code fix, we would get a corruption message
about the bitmap page in CHECK TABLE EXTENDED.
mysql-test/t/maria-recovery-bitmap-master.opt:
usual when we crash mysqld in tests
mysql-test/t/maria-recovery-bitmap.test:
test of recovery problems specific of the bitmap pages.
2007-12-14 16:14:12 +01:00
|
|
|
DBUG_ENTER("write_changed_bitmap");
|
2007-04-04 22:37:09 +02:00
|
|
|
DBUG_ASSERT(share->pagecache->block_size == bitmap->block_size);
|
2007-12-15 22:31:22 +01:00
|
|
|
DBUG_ASSERT(bitmap->file.write_callback != 0);
|
2007-12-15 14:17:23 +01:00
|
|
|
DBUG_PRINT("info", ("bitmap->non_flushable: %u", bitmap->non_flushable));
|
2007-12-18 02:21:32 +01:00
|
|
|
|
2010-09-10 01:42:12 +02:00
|
|
|
/*
|
|
|
|
Mark that a bitmap page has been written to page cache and we have
|
|
|
|
to flush it during checkpoint.
|
|
|
|
*/
|
|
|
|
bitmap->changed_not_flushed= 1;
|
|
|
|
|
2007-12-15 14:17:23 +01:00
|
|
|
if ((bitmap->non_flushable == 0)
|
WL#3072 - Maria recovery.
* fix for bitmap vs checkpoint bug which could lead to corrupted
tables in case of crashes at certain moments: a bitmap could be flushed
to disk even though it was inconsistent with the log (it could be
flushed before REDO-UNDO are written to the log). One bug remains, need
code from others. Tests added. Fix is to pin unflushable bitmap pages,
and let checkpoint wait for them to be flushable.
* fix for long_trid!=0 assertion failure at Recovery.
* less useless wakeups in the background flush|checkpoint thread.
* store global_trid_generator in checkpoint record.
mysql-test/r/maria-recovery.result:
result update
mysql-test/t/maria-recovery.test:
make it easier to locate subtests
storage/maria/ma_bitmap.c:
When we send a bitmap to the pagecache, if this bitmap is not in a
flushable state we keep it pinned and add it to a list, it will be
unpinned when the bitmap is flushable again.
A new function _ma_bitmap_flush_all() used by checkpoint.
A new function _ma_bitmap_flushable() used by block format to signal
when it starts modifying a bitmap and when it is done with it.
storage/maria/ma_blockrec.c:
When starting a row operation (insert/update/delete), mark that
the bitmap is not flushable (because for example INSERT is going
to over-allocate in the bitmap to prevent other threads from using
our data pages). If a checkpoint comes at this moment it will wait
for the bitmap to be flushable before flushing it.
When the operation ends, bitmap becomes flushable again; that
transition is done under the bitmap's mutex (needed for correct
synchro with a concurrent checkpoint); but for INSERT/UPDATE this
happens inside _ma_bitmap_release_unused() at a place where it already
has the mutex, so the only penalty (mutex adding) is in DELETE and UNDO
of INSERT. In case of errors after setting the bitmap unflushable,
we must always set it back to flushable or checkpoint would block.
Debug possibilities to force a sleep while the bitmap is over-allocated.
In case of error in get_head_or_tail() in allocate_and_write_block_record(),
we still need to unpin all pages.
Bugfix: _ma_apply_redo_insert_row_blobs() produced wrong
data_file_length.
storage/maria/ma_blockrec.h:
new bitmap calls.
storage/maria/ma_checkpoint.c:
filter_flush_indirect not needed anymore (flushing bitmap
pages happens in _ma_bitmap_flush_all() now). So
st_filter_param::is_data_file|pages_covered_by_bitmap not needed.
Other filter_flush* don't need to flush bitmap anymore.
Add debug possibility to flush all bitmap pages outside of a checkpoint,
to simulate pagecache LRU eviction.
When the background flush/checkpoint thread notices it has nothing
to flush, it now sleeps directly until the next potential checkpoint
moment instead of waking up every second.
When in checkpoint we decide to not store a table in the checkpoint record
(because it has logged no writes for example), we can also skip flushing
this table.
storage/maria/ma_commit.c:
comment is out-of-date
storage/maria/ma_key_recover.c:
comment fix
storage/maria/ma_loghandler.c:
comment is out-of-date
storage/maria/ma_open.c:
comment is out-of-date
storage/maria/ma_pagecache.c:
comment for bug to fix. And we don't take checkpoints at end of REDO
phase yet so can trust block->type.
storage/maria/ma_recovery.c:
Comments. Now-unneeded code for incomplete REDO-UNDO groups removed.
When we forget about an old transaction we must really forget
about it with bzero() (fixes the "long_trid!=0 assertion" recovery
bug). When we delete a row with maria_delete() we turn on
STATE_NOT_OPTIMIZED_ROWS so we do the same when we see a CLR_END
for an UNDO_ROW_INSERT or when we execute an UNDO_ROW_INSERT (in both
cases a row was deleted). Pick up max_long_trid from the checkpoint record.
storage/maria/maria_chk.c:
comment
storage/maria/maria_def.h:
MARIA_FILE_BITMAP gets new members: 'flushable', 'bitmap_cond' and
'pinned_pages'.
storage/maria/trnman.c:
I used to think that recovery only needs to know the maximum TrID
of the lists of active and committed transactions. But no, sometimes
both lists can even be empty and their TrID should not be reused.
So Checkpoint now saves global_trid_generator in the checkpoint record.
storage/maria/trnman_public.h:
macros to read/store a TrID
mysql-test/r/maria-recovery-bitmap.result:
result is ok. Without the code fix, we would get a corruption message
about the bitmap page in CHECK TABLE EXTENDED.
mysql-test/t/maria-recovery-bitmap-master.opt:
usual when we crash mysqld in tests
mysql-test/t/maria-recovery-bitmap.test:
test of recovery problems specific of the bitmap pages.
2007-12-14 16:14:12 +01:00
|
|
|
#ifdef WRONG_BITMAP_FLUSH
|
|
|
|
|| 1
|
|
|
|
#endif
|
|
|
|
)
|
|
|
|
{
|
2011-02-10 21:04:29 +01:00
|
|
|
res= pagecache_write(share->pagecache,
|
WL#3072 - Maria recovery.
* fix for bitmap vs checkpoint bug which could lead to corrupted
tables in case of crashes at certain moments: a bitmap could be flushed
to disk even though it was inconsistent with the log (it could be
flushed before REDO-UNDO are written to the log). One bug remains, need
code from others. Tests added. Fix is to pin unflushable bitmap pages,
and let checkpoint wait for them to be flushable.
* fix for long_trid!=0 assertion failure at Recovery.
* less useless wakeups in the background flush|checkpoint thread.
* store global_trid_generator in checkpoint record.
mysql-test/r/maria-recovery.result:
result update
mysql-test/t/maria-recovery.test:
make it easier to locate subtests
storage/maria/ma_bitmap.c:
When we send a bitmap to the pagecache, if this bitmap is not in a
flushable state we keep it pinned and add it to a list, it will be
unpinned when the bitmap is flushable again.
A new function _ma_bitmap_flush_all() used by checkpoint.
A new function _ma_bitmap_flushable() used by block format to signal
when it starts modifying a bitmap and when it is done with it.
storage/maria/ma_blockrec.c:
When starting a row operation (insert/update/delete), mark that
the bitmap is not flushable (because for example INSERT is going
to over-allocate in the bitmap to prevent other threads from using
our data pages). If a checkpoint comes at this moment it will wait
for the bitmap to be flushable before flushing it.
When the operation ends, bitmap becomes flushable again; that
transition is done under the bitmap's mutex (needed for correct
synchro with a concurrent checkpoint); but for INSERT/UPDATE this
happens inside _ma_bitmap_release_unused() at a place where it already
has the mutex, so the only penalty (mutex adding) is in DELETE and UNDO
of INSERT. In case of errors after setting the bitmap unflushable,
we must always set it back to flushable or checkpoint would block.
Debug possibilities to force a sleep while the bitmap is over-allocated.
In case of error in get_head_or_tail() in allocate_and_write_block_record(),
we still need to unpin all pages.
Bugfix: _ma_apply_redo_insert_row_blobs() produced wrong
data_file_length.
storage/maria/ma_blockrec.h:
new bitmap calls.
storage/maria/ma_checkpoint.c:
filter_flush_indirect not needed anymore (flushing bitmap
pages happens in _ma_bitmap_flush_all() now). So
st_filter_param::is_data_file|pages_covered_by_bitmap not needed.
Other filter_flush* don't need to flush bitmap anymore.
Add debug possibility to flush all bitmap pages outside of a checkpoint,
to simulate pagecache LRU eviction.
When the background flush/checkpoint thread notices it has nothing
to flush, it now sleeps directly until the next potential checkpoint
moment instead of waking up every second.
When in checkpoint we decide to not store a table in the checkpoint record
(because it has logged no writes for example), we can also skip flushing
this table.
storage/maria/ma_commit.c:
comment is out-of-date
storage/maria/ma_key_recover.c:
comment fix
storage/maria/ma_loghandler.c:
comment is out-of-date
storage/maria/ma_open.c:
comment is out-of-date
storage/maria/ma_pagecache.c:
comment for bug to fix. And we don't take checkpoints at end of REDO
phase yet so can trust block->type.
storage/maria/ma_recovery.c:
Comments. Now-unneeded code for incomplete REDO-UNDO groups removed.
When we forget about an old transaction we must really forget
about it with bzero() (fixes the "long_trid!=0 assertion" recovery
bug). When we delete a row with maria_delete() we turn on
STATE_NOT_OPTIMIZED_ROWS so we do the same when we see a CLR_END
for an UNDO_ROW_INSERT or when we execute an UNDO_ROW_INSERT (in both
cases a row was deleted). Pick up max_long_trid from the checkpoint record.
storage/maria/maria_chk.c:
comment
storage/maria/maria_def.h:
MARIA_FILE_BITMAP gets new members: 'flushable', 'bitmap_cond' and
'pinned_pages'.
storage/maria/trnman.c:
I used to think that recovery only needs to know the maximum TrID
of the lists of active and committed transactions. But no, sometimes
both lists can even be empty and their TrID should not be reused.
So Checkpoint now saves global_trid_generator in the checkpoint record.
storage/maria/trnman_public.h:
macros to read/store a TrID
mysql-test/r/maria-recovery-bitmap.result:
result is ok. Without the code fix, we would get a corruption message
about the bitmap page in CHECK TABLE EXTENDED.
mysql-test/t/maria-recovery-bitmap-master.opt:
usual when we crash mysqld in tests
mysql-test/t/maria-recovery-bitmap.test:
test of recovery problems specific of the bitmap pages.
2007-12-14 16:14:12 +01:00
|
|
|
&bitmap->file, bitmap->page, 0,
|
2009-01-09 05:23:25 +01:00
|
|
|
bitmap->map, PAGECACHE_PLAIN_PAGE,
|
WL#3072 - Maria recovery.
* fix for bitmap vs checkpoint bug which could lead to corrupted
tables in case of crashes at certain moments: a bitmap could be flushed
to disk even though it was inconsistent with the log (it could be
flushed before REDO-UNDO are written to the log). One bug remains, need
code from others. Tests added. Fix is to pin unflushable bitmap pages,
and let checkpoint wait for them to be flushable.
* fix for long_trid!=0 assertion failure at Recovery.
* less useless wakeups in the background flush|checkpoint thread.
* store global_trid_generator in checkpoint record.
mysql-test/r/maria-recovery.result:
result update
mysql-test/t/maria-recovery.test:
make it easier to locate subtests
storage/maria/ma_bitmap.c:
When we send a bitmap to the pagecache, if this bitmap is not in a
flushable state we keep it pinned and add it to a list, it will be
unpinned when the bitmap is flushable again.
A new function _ma_bitmap_flush_all() used by checkpoint.
A new function _ma_bitmap_flushable() used by block format to signal
when it starts modifying a bitmap and when it is done with it.
storage/maria/ma_blockrec.c:
When starting a row operation (insert/update/delete), mark that
the bitmap is not flushable (because for example INSERT is going
to over-allocate in the bitmap to prevent other threads from using
our data pages). If a checkpoint comes at this moment it will wait
for the bitmap to be flushable before flushing it.
When the operation ends, bitmap becomes flushable again; that
transition is done under the bitmap's mutex (needed for correct
synchro with a concurrent checkpoint); but for INSERT/UPDATE this
happens inside _ma_bitmap_release_unused() at a place where it already
has the mutex, so the only penalty (mutex adding) is in DELETE and UNDO
of INSERT. In case of errors after setting the bitmap unflushable,
we must always set it back to flushable or checkpoint would block.
Debug possibilities to force a sleep while the bitmap is over-allocated.
In case of error in get_head_or_tail() in allocate_and_write_block_record(),
we still need to unpin all pages.
Bugfix: _ma_apply_redo_insert_row_blobs() produced wrong
data_file_length.
storage/maria/ma_blockrec.h:
new bitmap calls.
storage/maria/ma_checkpoint.c:
filter_flush_indirect not needed anymore (flushing bitmap
pages happens in _ma_bitmap_flush_all() now). So
st_filter_param::is_data_file|pages_covered_by_bitmap not needed.
Other filter_flush* don't need to flush bitmap anymore.
Add debug possibility to flush all bitmap pages outside of a checkpoint,
to simulate pagecache LRU eviction.
When the background flush/checkpoint thread notices it has nothing
to flush, it now sleeps directly until the next potential checkpoint
moment instead of waking up every second.
When in checkpoint we decide to not store a table in the checkpoint record
(because it has logged no writes for example), we can also skip flushing
this table.
storage/maria/ma_commit.c:
comment is out-of-date
storage/maria/ma_key_recover.c:
comment fix
storage/maria/ma_loghandler.c:
comment is out-of-date
storage/maria/ma_open.c:
comment is out-of-date
storage/maria/ma_pagecache.c:
comment for bug to fix. And we don't take checkpoints at end of REDO
phase yet so can trust block->type.
storage/maria/ma_recovery.c:
Comments. Now-unneeded code for incomplete REDO-UNDO groups removed.
When we forget about an old transaction we must really forget
about it with bzero() (fixes the "long_trid!=0 assertion" recovery
bug). When we delete a row with maria_delete() we turn on
STATE_NOT_OPTIMIZED_ROWS so we do the same when we see a CLR_END
for an UNDO_ROW_INSERT or when we execute an UNDO_ROW_INSERT (in both
cases a row was deleted). Pick up max_long_trid from the checkpoint record.
storage/maria/maria_chk.c:
comment
storage/maria/maria_def.h:
MARIA_FILE_BITMAP gets new members: 'flushable', 'bitmap_cond' and
'pinned_pages'.
storage/maria/trnman.c:
I used to think that recovery only needs to know the maximum TrID
of the lists of active and committed transactions. But no, sometimes
both lists can even be empty and their TrID should not be reused.
So Checkpoint now saves global_trid_generator in the checkpoint record.
storage/maria/trnman_public.h:
macros to read/store a TrID
mysql-test/r/maria-recovery-bitmap.result:
result is ok. Without the code fix, we would get a corruption message
about the bitmap page in CHECK TABLE EXTENDED.
mysql-test/t/maria-recovery-bitmap-master.opt:
usual when we crash mysqld in tests
mysql-test/t/maria-recovery-bitmap.test:
test of recovery problems specific of the bitmap pages.
2007-12-14 16:14:12 +01:00
|
|
|
PAGECACHE_LOCK_LEFT_UNLOCKED,
|
|
|
|
PAGECACHE_PIN_LEFT_UNPINNED,
|
|
|
|
PAGECACHE_WRITE_DELAY, 0, LSN_IMPOSSIBLE);
|
2011-07-04 03:32:53 +02:00
|
|
|
DBUG_ASSERT(!res);
|
WL#3072 - Maria recovery.
* fix for bitmap vs checkpoint bug which could lead to corrupted
tables in case of crashes at certain moments: a bitmap could be flushed
to disk even though it was inconsistent with the log (it could be
flushed before REDO-UNDO are written to the log). One bug remains, need
code from others. Tests added. Fix is to pin unflushable bitmap pages,
and let checkpoint wait for them to be flushable.
* fix for long_trid!=0 assertion failure at Recovery.
* less useless wakeups in the background flush|checkpoint thread.
* store global_trid_generator in checkpoint record.
mysql-test/r/maria-recovery.result:
result update
mysql-test/t/maria-recovery.test:
make it easier to locate subtests
storage/maria/ma_bitmap.c:
When we send a bitmap to the pagecache, if this bitmap is not in a
flushable state we keep it pinned and add it to a list, it will be
unpinned when the bitmap is flushable again.
A new function _ma_bitmap_flush_all() used by checkpoint.
A new function _ma_bitmap_flushable() used by block format to signal
when it starts modifying a bitmap and when it is done with it.
storage/maria/ma_blockrec.c:
When starting a row operation (insert/update/delete), mark that
the bitmap is not flushable (because for example INSERT is going
to over-allocate in the bitmap to prevent other threads from using
our data pages). If a checkpoint comes at this moment it will wait
for the bitmap to be flushable before flushing it.
When the operation ends, bitmap becomes flushable again; that
transition is done under the bitmap's mutex (needed for correct
synchro with a concurrent checkpoint); but for INSERT/UPDATE this
happens inside _ma_bitmap_release_unused() at a place where it already
has the mutex, so the only penalty (mutex adding) is in DELETE and UNDO
of INSERT. In case of errors after setting the bitmap unflushable,
we must always set it back to flushable or checkpoint would block.
Debug possibilities to force a sleep while the bitmap is over-allocated.
In case of error in get_head_or_tail() in allocate_and_write_block_record(),
we still need to unpin all pages.
Bugfix: _ma_apply_redo_insert_row_blobs() produced wrong
data_file_length.
storage/maria/ma_blockrec.h:
new bitmap calls.
storage/maria/ma_checkpoint.c:
filter_flush_indirect not needed anymore (flushing bitmap
pages happens in _ma_bitmap_flush_all() now). So
st_filter_param::is_data_file|pages_covered_by_bitmap not needed.
Other filter_flush* don't need to flush bitmap anymore.
Add debug possibility to flush all bitmap pages outside of a checkpoint,
to simulate pagecache LRU eviction.
When the background flush/checkpoint thread notices it has nothing
to flush, it now sleeps directly until the next potential checkpoint
moment instead of waking up every second.
When in checkpoint we decide to not store a table in the checkpoint record
(because it has logged no writes for example), we can also skip flushing
this table.
storage/maria/ma_commit.c:
comment is out-of-date
storage/maria/ma_key_recover.c:
comment fix
storage/maria/ma_loghandler.c:
comment is out-of-date
storage/maria/ma_open.c:
comment is out-of-date
storage/maria/ma_pagecache.c:
comment for bug to fix. And we don't take checkpoints at end of REDO
phase yet so can trust block->type.
storage/maria/ma_recovery.c:
Comments. Now-unneeded code for incomplete REDO-UNDO groups removed.
When we forget about an old transaction we must really forget
about it with bzero() (fixes the "long_trid!=0 assertion" recovery
bug). When we delete a row with maria_delete() we turn on
STATE_NOT_OPTIMIZED_ROWS so we do the same when we see a CLR_END
for an UNDO_ROW_INSERT or when we execute an UNDO_ROW_INSERT (in both
cases a row was deleted). Pick up max_long_trid from the checkpoint record.
storage/maria/maria_chk.c:
comment
storage/maria/maria_def.h:
MARIA_FILE_BITMAP gets new members: 'flushable', 'bitmap_cond' and
'pinned_pages'.
storage/maria/trnman.c:
I used to think that recovery only needs to know the maximum TrID
of the lists of active and committed transactions. But no, sometimes
both lists can even be empty and their TrID should not be reused.
So Checkpoint now saves global_trid_generator in the checkpoint record.
storage/maria/trnman_public.h:
macros to read/store a TrID
mysql-test/r/maria-recovery-bitmap.result:
result is ok. Without the code fix, we would get a corruption message
about the bitmap page in CHECK TABLE EXTENDED.
mysql-test/t/maria-recovery-bitmap-master.opt:
usual when we crash mysqld in tests
mysql-test/t/maria-recovery-bitmap.test:
test of recovery problems specific of the bitmap pages.
2007-12-14 16:14:12 +01:00
|
|
|
DBUG_RETURN(res);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2011-02-10 19:33:51 +01:00
|
|
|
/*
|
|
|
|
bitmap->non_flushable means that someone has changed the bitmap,
|
|
|
|
but it's not yet complete so it can't yet be written to disk.
|
|
|
|
In this case we write the changed bitmap to the disk cache,
|
|
|
|
but keep it pinned until the change is completed. The page will
|
|
|
|
be unpinned later by _ma_bitmap_unpin_all() as soon as non_flushable
|
|
|
|
is set back to 0.
|
|
|
|
*/
|
WL#3072 - Maria recovery.
* fix for bitmap vs checkpoint bug which could lead to corrupted
tables in case of crashes at certain moments: a bitmap could be flushed
to disk even though it was inconsistent with the log (it could be
flushed before REDO-UNDO are written to the log). One bug remains, need
code from others. Tests added. Fix is to pin unflushable bitmap pages,
and let checkpoint wait for them to be flushable.
* fix for long_trid!=0 assertion failure at Recovery.
* less useless wakeups in the background flush|checkpoint thread.
* store global_trid_generator in checkpoint record.
mysql-test/r/maria-recovery.result:
result update
mysql-test/t/maria-recovery.test:
make it easier to locate subtests
storage/maria/ma_bitmap.c:
When we send a bitmap to the pagecache, if this bitmap is not in a
flushable state we keep it pinned and add it to a list, it will be
unpinned when the bitmap is flushable again.
A new function _ma_bitmap_flush_all() used by checkpoint.
A new function _ma_bitmap_flushable() used by block format to signal
when it starts modifying a bitmap and when it is done with it.
storage/maria/ma_blockrec.c:
When starting a row operation (insert/update/delete), mark that
the bitmap is not flushable (because for example INSERT is going
to over-allocate in the bitmap to prevent other threads from using
our data pages). If a checkpoint comes at this moment it will wait
for the bitmap to be flushable before flushing it.
When the operation ends, bitmap becomes flushable again; that
transition is done under the bitmap's mutex (needed for correct
synchro with a concurrent checkpoint); but for INSERT/UPDATE this
happens inside _ma_bitmap_release_unused() at a place where it already
has the mutex, so the only penalty (mutex adding) is in DELETE and UNDO
of INSERT. In case of errors after setting the bitmap unflushable,
we must always set it back to flushable or checkpoint would block.
Debug possibilities to force a sleep while the bitmap is over-allocated.
In case of error in get_head_or_tail() in allocate_and_write_block_record(),
we still need to unpin all pages.
Bugfix: _ma_apply_redo_insert_row_blobs() produced wrong
data_file_length.
storage/maria/ma_blockrec.h:
new bitmap calls.
storage/maria/ma_checkpoint.c:
filter_flush_indirect not needed anymore (flushing bitmap
pages happens in _ma_bitmap_flush_all() now). So
st_filter_param::is_data_file|pages_covered_by_bitmap not needed.
Other filter_flush* don't need to flush bitmap anymore.
Add debug possibility to flush all bitmap pages outside of a checkpoint,
to simulate pagecache LRU eviction.
When the background flush/checkpoint thread notices it has nothing
to flush, it now sleeps directly until the next potential checkpoint
moment instead of waking up every second.
When in checkpoint we decide to not store a table in the checkpoint record
(because it has logged no writes for example), we can also skip flushing
this table.
storage/maria/ma_commit.c:
comment is out-of-date
storage/maria/ma_key_recover.c:
comment fix
storage/maria/ma_loghandler.c:
comment is out-of-date
storage/maria/ma_open.c:
comment is out-of-date
storage/maria/ma_pagecache.c:
comment for bug to fix. And we don't take checkpoints at end of REDO
phase yet so can trust block->type.
storage/maria/ma_recovery.c:
Comments. Now-unneeded code for incomplete REDO-UNDO groups removed.
When we forget about an old transaction we must really forget
about it with bzero() (fixes the "long_trid!=0 assertion" recovery
bug). When we delete a row with maria_delete() we turn on
STATE_NOT_OPTIMIZED_ROWS so we do the same when we see a CLR_END
for an UNDO_ROW_INSERT or when we execute an UNDO_ROW_INSERT (in both
cases a row was deleted). Pick up max_long_trid from the checkpoint record.
storage/maria/maria_chk.c:
comment
storage/maria/maria_def.h:
MARIA_FILE_BITMAP gets new members: 'flushable', 'bitmap_cond' and
'pinned_pages'.
storage/maria/trnman.c:
I used to think that recovery only needs to know the maximum TrID
of the lists of active and committed transactions. But no, sometimes
both lists can even be empty and their TrID should not be reused.
So Checkpoint now saves global_trid_generator in the checkpoint record.
storage/maria/trnman_public.h:
macros to read/store a TrID
mysql-test/r/maria-recovery-bitmap.result:
result is ok. Without the code fix, we would get a corruption message
about the bitmap page in CHECK TABLE EXTENDED.
mysql-test/t/maria-recovery-bitmap-master.opt:
usual when we crash mysqld in tests
mysql-test/t/maria-recovery-bitmap.test:
test of recovery problems specific of the bitmap pages.
2007-12-14 16:14:12 +01:00
|
|
|
MARIA_PINNED_PAGE page_link;
|
2011-02-10 21:04:29 +01:00
|
|
|
DBUG_PRINT("info", ("Writing pinned bitmap page"));
|
|
|
|
res= pagecache_write(share->pagecache,
|
WL#3072 - Maria recovery.
* fix for bitmap vs checkpoint bug which could lead to corrupted
tables in case of crashes at certain moments: a bitmap could be flushed
to disk even though it was inconsistent with the log (it could be
flushed before REDO-UNDO are written to the log). One bug remains, need
code from others. Tests added. Fix is to pin unflushable bitmap pages,
and let checkpoint wait for them to be flushable.
* fix for long_trid!=0 assertion failure at Recovery.
* less useless wakeups in the background flush|checkpoint thread.
* store global_trid_generator in checkpoint record.
mysql-test/r/maria-recovery.result:
result update
mysql-test/t/maria-recovery.test:
make it easier to locate subtests
storage/maria/ma_bitmap.c:
When we send a bitmap to the pagecache, if this bitmap is not in a
flushable state we keep it pinned and add it to a list, it will be
unpinned when the bitmap is flushable again.
A new function _ma_bitmap_flush_all() used by checkpoint.
A new function _ma_bitmap_flushable() used by block format to signal
when it starts modifying a bitmap and when it is done with it.
storage/maria/ma_blockrec.c:
When starting a row operation (insert/update/delete), mark that
the bitmap is not flushable (because for example INSERT is going
to over-allocate in the bitmap to prevent other threads from using
our data pages). If a checkpoint comes at this moment it will wait
for the bitmap to be flushable before flushing it.
When the operation ends, bitmap becomes flushable again; that
transition is done under the bitmap's mutex (needed for correct
synchro with a concurrent checkpoint); but for INSERT/UPDATE this
happens inside _ma_bitmap_release_unused() at a place where it already
has the mutex, so the only penalty (mutex adding) is in DELETE and UNDO
of INSERT. In case of errors after setting the bitmap unflushable,
we must always set it back to flushable or checkpoint would block.
Debug possibilities to force a sleep while the bitmap is over-allocated.
In case of error in get_head_or_tail() in allocate_and_write_block_record(),
we still need to unpin all pages.
Bugfix: _ma_apply_redo_insert_row_blobs() produced wrong
data_file_length.
storage/maria/ma_blockrec.h:
new bitmap calls.
storage/maria/ma_checkpoint.c:
filter_flush_indirect not needed anymore (flushing bitmap
pages happens in _ma_bitmap_flush_all() now). So
st_filter_param::is_data_file|pages_covered_by_bitmap not needed.
Other filter_flush* don't need to flush bitmap anymore.
Add debug possibility to flush all bitmap pages outside of a checkpoint,
to simulate pagecache LRU eviction.
When the background flush/checkpoint thread notices it has nothing
to flush, it now sleeps directly until the next potential checkpoint
moment instead of waking up every second.
When in checkpoint we decide to not store a table in the checkpoint record
(because it has logged no writes for example), we can also skip flushing
this table.
storage/maria/ma_commit.c:
comment is out-of-date
storage/maria/ma_key_recover.c:
comment fix
storage/maria/ma_loghandler.c:
comment is out-of-date
storage/maria/ma_open.c:
comment is out-of-date
storage/maria/ma_pagecache.c:
comment for bug to fix. And we don't take checkpoints at end of REDO
phase yet so can trust block->type.
storage/maria/ma_recovery.c:
Comments. Now-unneeded code for incomplete REDO-UNDO groups removed.
When we forget about an old transaction we must really forget
about it with bzero() (fixes the "long_trid!=0 assertion" recovery
bug). When we delete a row with maria_delete() we turn on
STATE_NOT_OPTIMIZED_ROWS so we do the same when we see a CLR_END
for an UNDO_ROW_INSERT or when we execute an UNDO_ROW_INSERT (in both
cases a row was deleted). Pick up max_long_trid from the checkpoint record.
storage/maria/maria_chk.c:
comment
storage/maria/maria_def.h:
MARIA_FILE_BITMAP gets new members: 'flushable', 'bitmap_cond' and
'pinned_pages'.
storage/maria/trnman.c:
I used to think that recovery only needs to know the maximum TrID
of the lists of active and committed transactions. But no, sometimes
both lists can even be empty and their TrID should not be reused.
So Checkpoint now saves global_trid_generator in the checkpoint record.
storage/maria/trnman_public.h:
macros to read/store a TrID
mysql-test/r/maria-recovery-bitmap.result:
result is ok. Without the code fix, we would get a corruption message
about the bitmap page in CHECK TABLE EXTENDED.
mysql-test/t/maria-recovery-bitmap-master.opt:
usual when we crash mysqld in tests
mysql-test/t/maria-recovery-bitmap.test:
test of recovery problems specific of the bitmap pages.
2007-12-14 16:14:12 +01:00
|
|
|
&bitmap->file, bitmap->page, 0,
|
2009-01-09 05:23:25 +01:00
|
|
|
bitmap->map, PAGECACHE_PLAIN_PAGE,
|
2008-10-14 14:16:10 +02:00
|
|
|
PAGECACHE_LOCK_LEFT_UNLOCKED, PAGECACHE_PIN,
|
WL#3072 - Maria recovery.
* fix for bitmap vs checkpoint bug which could lead to corrupted
tables in case of crashes at certain moments: a bitmap could be flushed
to disk even though it was inconsistent with the log (it could be
flushed before REDO-UNDO are written to the log). One bug remains, need
code from others. Tests added. Fix is to pin unflushable bitmap pages,
and let checkpoint wait for them to be flushable.
* fix for long_trid!=0 assertion failure at Recovery.
* less useless wakeups in the background flush|checkpoint thread.
* store global_trid_generator in checkpoint record.
mysql-test/r/maria-recovery.result:
result update
mysql-test/t/maria-recovery.test:
make it easier to locate subtests
storage/maria/ma_bitmap.c:
When we send a bitmap to the pagecache, if this bitmap is not in a
flushable state we keep it pinned and add it to a list, it will be
unpinned when the bitmap is flushable again.
A new function _ma_bitmap_flush_all() used by checkpoint.
A new function _ma_bitmap_flushable() used by block format to signal
when it starts modifying a bitmap and when it is done with it.
storage/maria/ma_blockrec.c:
When starting a row operation (insert/update/delete), mark that
the bitmap is not flushable (because for example INSERT is going
to over-allocate in the bitmap to prevent other threads from using
our data pages). If a checkpoint comes at this moment it will wait
for the bitmap to be flushable before flushing it.
When the operation ends, bitmap becomes flushable again; that
transition is done under the bitmap's mutex (needed for correct
synchro with a concurrent checkpoint); but for INSERT/UPDATE this
happens inside _ma_bitmap_release_unused() at a place where it already
has the mutex, so the only penalty (mutex adding) is in DELETE and UNDO
of INSERT. In case of errors after setting the bitmap unflushable,
we must always set it back to flushable or checkpoint would block.
Debug possibilities to force a sleep while the bitmap is over-allocated.
In case of error in get_head_or_tail() in allocate_and_write_block_record(),
we still need to unpin all pages.
Bugfix: _ma_apply_redo_insert_row_blobs() produced wrong
data_file_length.
storage/maria/ma_blockrec.h:
new bitmap calls.
storage/maria/ma_checkpoint.c:
filter_flush_indirect not needed anymore (flushing bitmap
pages happens in _ma_bitmap_flush_all() now). So
st_filter_param::is_data_file|pages_covered_by_bitmap not needed.
Other filter_flush* don't need to flush bitmap anymore.
Add debug possibility to flush all bitmap pages outside of a checkpoint,
to simulate pagecache LRU eviction.
When the background flush/checkpoint thread notices it has nothing
to flush, it now sleeps directly until the next potential checkpoint
moment instead of waking up every second.
When in checkpoint we decide to not store a table in the checkpoint record
(because it has logged no writes for example), we can also skip flushing
this table.
storage/maria/ma_commit.c:
comment is out-of-date
storage/maria/ma_key_recover.c:
comment fix
storage/maria/ma_loghandler.c:
comment is out-of-date
storage/maria/ma_open.c:
comment is out-of-date
storage/maria/ma_pagecache.c:
comment for bug to fix. And we don't take checkpoints at end of REDO
phase yet so can trust block->type.
storage/maria/ma_recovery.c:
Comments. Now-unneeded code for incomplete REDO-UNDO groups removed.
When we forget about an old transaction we must really forget
about it with bzero() (fixes the "long_trid!=0 assertion" recovery
bug). When we delete a row with maria_delete() we turn on
STATE_NOT_OPTIMIZED_ROWS so we do the same when we see a CLR_END
for an UNDO_ROW_INSERT or when we execute an UNDO_ROW_INSERT (in both
cases a row was deleted). Pick up max_long_trid from the checkpoint record.
storage/maria/maria_chk.c:
comment
storage/maria/maria_def.h:
MARIA_FILE_BITMAP gets new members: 'flushable', 'bitmap_cond' and
'pinned_pages'.
storage/maria/trnman.c:
I used to think that recovery only needs to know the maximum TrID
of the lists of active and committed transactions. But no, sometimes
both lists can even be empty and their TrID should not be reused.
So Checkpoint now saves global_trid_generator in the checkpoint record.
storage/maria/trnman_public.h:
macros to read/store a TrID
mysql-test/r/maria-recovery-bitmap.result:
result is ok. Without the code fix, we would get a corruption message
about the bitmap page in CHECK TABLE EXTENDED.
mysql-test/t/maria-recovery-bitmap-master.opt:
usual when we crash mysqld in tests
mysql-test/t/maria-recovery-bitmap.test:
test of recovery problems specific of the bitmap pages.
2007-12-14 16:14:12 +01:00
|
|
|
PAGECACHE_WRITE_DELAY, &page_link.link,
|
|
|
|
LSN_IMPOSSIBLE);
|
2008-10-14 14:16:10 +02:00
|
|
|
page_link.unlock= PAGECACHE_LOCK_LEFT_UNLOCKED;
|
2007-12-18 02:21:32 +01:00
|
|
|
page_link.changed= 1;
|
2011-02-10 21:04:29 +01:00
|
|
|
push_dynamic(&bitmap->pinned_pages, (const uchar*) (void*) &page_link);
|
2011-07-04 03:32:53 +02:00
|
|
|
DBUG_ASSERT(!res);
|
WL#3072 - Maria recovery.
* fix for bitmap vs checkpoint bug which could lead to corrupted
tables in case of crashes at certain moments: a bitmap could be flushed
to disk even though it was inconsistent with the log (it could be
flushed before REDO-UNDO are written to the log). One bug remains, need
code from others. Tests added. Fix is to pin unflushable bitmap pages,
and let checkpoint wait for them to be flushable.
* fix for long_trid!=0 assertion failure at Recovery.
* less useless wakeups in the background flush|checkpoint thread.
* store global_trid_generator in checkpoint record.
mysql-test/r/maria-recovery.result:
result update
mysql-test/t/maria-recovery.test:
make it easier to locate subtests
storage/maria/ma_bitmap.c:
When we send a bitmap to the pagecache, if this bitmap is not in a
flushable state we keep it pinned and add it to a list, it will be
unpinned when the bitmap is flushable again.
A new function _ma_bitmap_flush_all() used by checkpoint.
A new function _ma_bitmap_flushable() used by block format to signal
when it starts modifying a bitmap and when it is done with it.
storage/maria/ma_blockrec.c:
When starting a row operation (insert/update/delete), mark that
the bitmap is not flushable (because for example INSERT is going
to over-allocate in the bitmap to prevent other threads from using
our data pages). If a checkpoint comes at this moment it will wait
for the bitmap to be flushable before flushing it.
When the operation ends, bitmap becomes flushable again; that
transition is done under the bitmap's mutex (needed for correct
synchro with a concurrent checkpoint); but for INSERT/UPDATE this
happens inside _ma_bitmap_release_unused() at a place where it already
has the mutex, so the only penalty (mutex adding) is in DELETE and UNDO
of INSERT. In case of errors after setting the bitmap unflushable,
we must always set it back to flushable or checkpoint would block.
Debug possibilities to force a sleep while the bitmap is over-allocated.
In case of error in get_head_or_tail() in allocate_and_write_block_record(),
we still need to unpin all pages.
Bugfix: _ma_apply_redo_insert_row_blobs() produced wrong
data_file_length.
storage/maria/ma_blockrec.h:
new bitmap calls.
storage/maria/ma_checkpoint.c:
filter_flush_indirect not needed anymore (flushing bitmap
pages happens in _ma_bitmap_flush_all() now). So
st_filter_param::is_data_file|pages_covered_by_bitmap not needed.
Other filter_flush* don't need to flush bitmap anymore.
Add debug possibility to flush all bitmap pages outside of a checkpoint,
to simulate pagecache LRU eviction.
When the background flush/checkpoint thread notices it has nothing
to flush, it now sleeps directly until the next potential checkpoint
moment instead of waking up every second.
When in checkpoint we decide to not store a table in the checkpoint record
(because it has logged no writes for example), we can also skip flushing
this table.
storage/maria/ma_commit.c:
comment is out-of-date
storage/maria/ma_key_recover.c:
comment fix
storage/maria/ma_loghandler.c:
comment is out-of-date
storage/maria/ma_open.c:
comment is out-of-date
storage/maria/ma_pagecache.c:
comment for bug to fix. And we don't take checkpoints at end of REDO
phase yet so can trust block->type.
storage/maria/ma_recovery.c:
Comments. Now-unneeded code for incomplete REDO-UNDO groups removed.
When we forget about an old transaction we must really forget
about it with bzero() (fixes the "long_trid!=0 assertion" recovery
bug). When we delete a row with maria_delete() we turn on
STATE_NOT_OPTIMIZED_ROWS so we do the same when we see a CLR_END
for an UNDO_ROW_INSERT or when we execute an UNDO_ROW_INSERT (in both
cases a row was deleted). Pick up max_long_trid from the checkpoint record.
storage/maria/maria_chk.c:
comment
storage/maria/maria_def.h:
MARIA_FILE_BITMAP gets new members: 'flushable', 'bitmap_cond' and
'pinned_pages'.
storage/maria/trnman.c:
I used to think that recovery only needs to know the maximum TrID
of the lists of active and committed transactions. But no, sometimes
both lists can even be empty and their TrID should not be reused.
So Checkpoint now saves global_trid_generator in the checkpoint record.
storage/maria/trnman_public.h:
macros to read/store a TrID
mysql-test/r/maria-recovery-bitmap.result:
result is ok. Without the code fix, we would get a corruption message
about the bitmap page in CHECK TABLE EXTENDED.
mysql-test/t/maria-recovery-bitmap-master.opt:
usual when we crash mysqld in tests
mysql-test/t/maria-recovery-bitmap.test:
test of recovery problems specific of the bitmap pages.
2007-12-14 16:14:12 +01:00
|
|
|
DBUG_RETURN(res);
|
|
|
|
}
|
2007-01-18 20:38:14 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2007-04-19 12:18:56 +02:00
|
|
|
Initialize bitmap variables in share
|
|
|
|
|
|
|
|
SYNOPSIS
|
|
|
|
_ma_bitmap_init()
|
|
|
|
share Share handler
|
2011-07-24 10:25:28 +02:00
|
|
|
file Data file handler
|
|
|
|
last_page Pointer to last page (max_file_size) that needs to be
|
|
|
|
mapped by the bitmap. This is adjusted to bitmap
|
|
|
|
alignment.
|
2007-04-19 12:18:56 +02:00
|
|
|
|
|
|
|
NOTES
|
|
|
|
This is called the first time a file is opened.
|
|
|
|
|
|
|
|
RETURN
|
|
|
|
0 ok
|
|
|
|
1 error
|
2007-01-18 20:38:14 +01:00
|
|
|
*/
|
|
|
|
|
2011-07-24 10:25:28 +02:00
|
|
|
my_bool _ma_bitmap_init(MARIA_SHARE *share, File file,
|
|
|
|
pgcache_page_no_t *last_page)
|
2007-01-18 20:38:14 +01:00
|
|
|
{
|
|
|
|
uint aligned_bit_blocks;
|
|
|
|
uint max_page_size;
|
|
|
|
MARIA_FILE_BITMAP *bitmap= &share->bitmap;
|
|
|
|
uint size= share->block_size;
|
2011-07-04 03:32:53 +02:00
|
|
|
pgcache_page_no_t first_bitmap_with_space;
|
2007-01-18 20:38:14 +01:00
|
|
|
#ifndef DBUG_OFF
|
|
|
|
/* We want to have a copy of the bitmap to be able to print differences */
|
|
|
|
size*= 2;
|
|
|
|
#endif
|
|
|
|
|
WL#3072 - Maria recovery.
* fix for bitmap vs checkpoint bug which could lead to corrupted
tables in case of crashes at certain moments: a bitmap could be flushed
to disk even though it was inconsistent with the log (it could be
flushed before REDO-UNDO are written to the log). One bug remains, need
code from others. Tests added. Fix is to pin unflushable bitmap pages,
and let checkpoint wait for them to be flushable.
* fix for long_trid!=0 assertion failure at Recovery.
* less useless wakeups in the background flush|checkpoint thread.
* store global_trid_generator in checkpoint record.
mysql-test/r/maria-recovery.result:
result update
mysql-test/t/maria-recovery.test:
make it easier to locate subtests
storage/maria/ma_bitmap.c:
When we send a bitmap to the pagecache, if this bitmap is not in a
flushable state we keep it pinned and add it to a list, it will be
unpinned when the bitmap is flushable again.
A new function _ma_bitmap_flush_all() used by checkpoint.
A new function _ma_bitmap_flushable() used by block format to signal
when it starts modifying a bitmap and when it is done with it.
storage/maria/ma_blockrec.c:
When starting a row operation (insert/update/delete), mark that
the bitmap is not flushable (because for example INSERT is going
to over-allocate in the bitmap to prevent other threads from using
our data pages). If a checkpoint comes at this moment it will wait
for the bitmap to be flushable before flushing it.
When the operation ends, bitmap becomes flushable again; that
transition is done under the bitmap's mutex (needed for correct
synchro with a concurrent checkpoint); but for INSERT/UPDATE this
happens inside _ma_bitmap_release_unused() at a place where it already
has the mutex, so the only penalty (mutex adding) is in DELETE and UNDO
of INSERT. In case of errors after setting the bitmap unflushable,
we must always set it back to flushable or checkpoint would block.
Debug possibilities to force a sleep while the bitmap is over-allocated.
In case of error in get_head_or_tail() in allocate_and_write_block_record(),
we still need to unpin all pages.
Bugfix: _ma_apply_redo_insert_row_blobs() produced wrong
data_file_length.
storage/maria/ma_blockrec.h:
new bitmap calls.
storage/maria/ma_checkpoint.c:
filter_flush_indirect not needed anymore (flushing bitmap
pages happens in _ma_bitmap_flush_all() now). So
st_filter_param::is_data_file|pages_covered_by_bitmap not needed.
Other filter_flush* don't need to flush bitmap anymore.
Add debug possibility to flush all bitmap pages outside of a checkpoint,
to simulate pagecache LRU eviction.
When the background flush/checkpoint thread notices it has nothing
to flush, it now sleeps directly until the next potential checkpoint
moment instead of waking up every second.
When in checkpoint we decide to not store a table in the checkpoint record
(because it has logged no writes for example), we can also skip flushing
this table.
storage/maria/ma_commit.c:
comment is out-of-date
storage/maria/ma_key_recover.c:
comment fix
storage/maria/ma_loghandler.c:
comment is out-of-date
storage/maria/ma_open.c:
comment is out-of-date
storage/maria/ma_pagecache.c:
comment for bug to fix. And we don't take checkpoints at end of REDO
phase yet so can trust block->type.
storage/maria/ma_recovery.c:
Comments. Now-unneeded code for incomplete REDO-UNDO groups removed.
When we forget about an old transaction we must really forget
about it with bzero() (fixes the "long_trid!=0 assertion" recovery
bug). When we delete a row with maria_delete() we turn on
STATE_NOT_OPTIMIZED_ROWS so we do the same when we see a CLR_END
for an UNDO_ROW_INSERT or when we execute an UNDO_ROW_INSERT (in both
cases a row was deleted). Pick up max_long_trid from the checkpoint record.
storage/maria/maria_chk.c:
comment
storage/maria/maria_def.h:
MARIA_FILE_BITMAP gets new members: 'flushable', 'bitmap_cond' and
'pinned_pages'.
storage/maria/trnman.c:
I used to think that recovery only needs to know the maximum TrID
of the lists of active and committed transactions. But no, sometimes
both lists can even be empty and their TrID should not be reused.
So Checkpoint now saves global_trid_generator in the checkpoint record.
storage/maria/trnman_public.h:
macros to read/store a TrID
mysql-test/r/maria-recovery-bitmap.result:
result is ok. Without the code fix, we would get a corruption message
about the bitmap page in CHECK TABLE EXTENDED.
mysql-test/t/maria-recovery-bitmap-master.opt:
usual when we crash mysqld in tests
mysql-test/t/maria-recovery-bitmap.test:
test of recovery problems specific of the bitmap pages.
2007-12-14 16:14:12 +01:00
|
|
|
if (((bitmap->map= (uchar*) my_malloc(size, MYF(MY_WME))) == NULL) ||
|
|
|
|
my_init_dynamic_array(&bitmap->pinned_pages,
|
2013-01-23 16:18:09 +01:00
|
|
|
sizeof(MARIA_PINNED_PAGE), 1, 1, MYF(0)))
|
2007-01-18 20:38:14 +01:00
|
|
|
return 1;
|
|
|
|
|
|
|
|
bitmap->block_size= share->block_size;
|
2007-12-18 02:21:32 +01:00
|
|
|
bitmap->file.file= file;
|
WL#3072 - Maria Recovery
* to honour WAL we now force the whole log when flushing a bitmap page.
* ability to intentionally crash in various places for recovery testing
* bugfix (dirty pages list found in checkpoint record was ignored)
* smaller checkpoint record
* misc small cleanups and comments
mysql-test/include/maria_empty_logs.inc:
maria-purge.test creates ~11 logs, remove them all
mysql-test/r/maria-recovery-bitmap.result:
result is good; without the _ma_bitmap_get_log_address() call,
we got
check error Bitmap at 0 has pages reserved outside of data file length
mysql-test/r/maria-recovery.result:
result update
mysql-test/t/maria-recovery-bitmap.test:
enable test of "bitmap-flush should flush whole log otherwise
corrupted data file (bitmap ahead of data pages)".
mysql-test/t/maria-recovery.test:
test of checkpoint
sql/sql_table.cc:
comment
storage/maria/ha_maria.cc:
_ma_reenable_logging_for_table() now includes file->trn=0.
At the end of repair() we don't need to re-enable logging, it is
done already by caller (like copy_data_between_tables()); it sounds
strange that this function could decide to re-enable, it should be
up to caller who knows what other operations it plans. Removing this
line led to assertion failure in maria_lock_database(F_UNLCK), fixed
by removing the assertion: maria_lock_database()
is here called in a context where F_UNLCK does not make the
table visible to others so assertion is excessive, and external_lock()
is already designed to honour the asserted condition.
Ability to crash at the end of bulk insert when indices
have been enabled.
storage/maria/ma_bitmap.c:
Better use pagecache_file_init() than set pagecache callbacks directly;
and a new function to set those callbacks for bitmap so that we can
reuse it.
_ma_bitmap_get_log_address() is a pagecache get_log_address callback
which causes the whole log to be flushed when a bitmap page
is flushed by the page cache. This was required by WAL.
storage/maria/ma_blockrec.c:
get_log_address pagecache callback for data (non bitmap) pages:
just reads the LSN from the page's content, like was hard-coded
before in ma_pagecache.c.
storage/maria/ma_blockrec.h:
functions which need to be exported
storage/maria/ma_check.c:
create_new_data_handle() can be static.
Ability to crash after rebuilding the index in OPTIMIZE,
in REPAIR. my_lock() implemented already.
storage/maria/ma_checkpoint.c:
As MARIA_SHARE* is now accessible to pagecache_collect_changed_blocks_LSN(),
we don't need to store kfile/dfile descriptors in checkpoint record,
2-byte-id of the table plus one byte to say if this is data or index
file is enough. So we go from 4+4 bytes per table down to 2+1.
storage/maria/ma_commit.c:
removing duplicate functions (see _ma_tmp_disable_logging_for_table())
storage/maria/ma_extra.c:
Monty fixed
storage/maria/ma_key_recover.c:
comment
storage/maria/ma_locking.c:
Sometimes other code does funny things with maria_lock_database(),
like ha_maria::repair() calling it at start and end without going
through ha_maria::external_lock(). So it happens that maria_lock_database()
is called with now_transactional!=born_transactional.
storage/maria/ma_loghandler.c:
update to new prototype
storage/maria/ma_open.c:
set_data|index_pagecache_callbacks() need to be exported as
they are now called when disabling/enabling transactionality.
storage/maria/ma_pagecache.c:
Removing PAGE_LSN_OFFSET, as much of the code relies on it being
0 anyway (let's not give impression we can just change this constant).
When flushing a page to disk, call the get_log_address callback to
know up to which LSN the log should be flushed.
As we now can access MARIA_SHARE* we can know share->id and store
it into the checkpoint record; we thus go from 4 bytes per dirty page
to 2+1.
storage/maria/ma_pagecache.h:
get_log_address callback
storage/maria/ma_panic.c:
No reason to reset pagecache callbacks in HA_PANIC_READ:
all we do is reopen files if they were closed; callbacks should
be in place already as 'info' exists; we just want to modify
the file descriptors, not the full PAGECACHE_FILE structure.
If we open data file and it was closed, share->bitmap.file needs
to be set.
Note that the modified code is disabled anyway.
storage/maria/ma_recovery.c:
Checkpoint record does not contain kfile/dfile descriptors anymore
so code can be simplified. Hash key in all_dirty_pages is
not made from file_descriptor & pageno anymore, but
index_or_data & table-short-id & pageno.
If a table's create_rename_lsn is higher than record's LSN,
we skip the table and don't fail if it's corrupted (because the LSNs
say that we don't have to look at this table).
If a table is skipped (for example due to create_rename_lsn),
its UNDOs still cause undo_lsn to advance; this is so that if later
we notice the transaction has to rollback we fail (as table should
not be skipped in this case).
Fixing a bug: the dirty_pages list was never used, because
the LSN below which it was used was the minimum rec_lsn of dirty pages!
It is now the min(checkpoint_start_log_horizon, min(trn's rec_lsn)).
When we disable/reenable transactionality, we modify pagecache
callbacks (needed for example for get_log_address: changing
share->page_type is not enough anymore).
storage/maria/ma_write.c:
'records' and 'checksum' are protected: they are updated under
log's mutex in write-hooks when UNDO is written.
storage/maria/maria_chk.c:
remove use of duplicate functions.
storage/maria/maria_def.h:
set_data|index_pagecache_callbacks() need to be exported;
_ma_reenable_logging_for_table() changes to a real function.
storage/maria/unittest/ma_pagecache_consist.c:
new prototype
storage/maria/unittest/ma_pagecache_single.c:
new prototype
storage/maria/unittest/ma_test_loghandler_pagecache-t.c:
new prototype
2007-12-30 21:32:07 +01:00
|
|
|
_ma_bitmap_set_pagecache_callbacks(&bitmap->file, share);
|
2007-12-18 02:21:32 +01:00
|
|
|
|
2007-12-15 22:31:22 +01:00
|
|
|
/* Size needs to be aligned on 6 */
|
2007-10-09 20:09:50 +02:00
|
|
|
aligned_bit_blocks= (share->block_size - PAGE_SUFFIX_SIZE) / 6;
|
2011-07-24 10:25:28 +02:00
|
|
|
bitmap->max_total_size= bitmap->total_size= aligned_bit_blocks * 6;
|
2007-01-18 20:38:14 +01:00
|
|
|
/*
|
|
|
|
In each 6 bytes, we have 6*8/3 = 16 pages covered
|
|
|
|
The +1 is to add the bitmap page, as this doesn't have to be covered
|
|
|
|
*/
|
|
|
|
bitmap->pages_covered= aligned_bit_blocks * 16 + 1;
|
2011-01-30 13:36:24 +01:00
|
|
|
bitmap->flush_all_requested= bitmap->waiting_for_flush_all_requested=
|
|
|
|
bitmap->waiting_for_non_flushable= 0;
|
2010-11-03 13:14:02 +01:00
|
|
|
bitmap->non_flushable= 0;
|
2007-01-18 20:38:14 +01:00
|
|
|
|
|
|
|
/* Update size for bits */
|
|
|
|
/* TODO; Make this dependent of the row size */
|
2008-01-07 17:54:41 +01:00
|
|
|
max_page_size= share->block_size - PAGE_OVERHEAD_SIZE + DIR_ENTRY_SIZE;
|
2007-01-18 20:38:14 +01:00
|
|
|
bitmap->sizes[0]= max_page_size; /* Empty page */
|
|
|
|
bitmap->sizes[1]= max_page_size - max_page_size * 30 / 100;
|
|
|
|
bitmap->sizes[2]= max_page_size - max_page_size * 60 / 100;
|
|
|
|
bitmap->sizes[3]= max_page_size - max_page_size * 90 / 100;
|
|
|
|
bitmap->sizes[4]= 0; /* Full page */
|
|
|
|
bitmap->sizes[5]= max_page_size - max_page_size * 40 / 100;
|
|
|
|
bitmap->sizes[6]= max_page_size - max_page_size * 80 / 100;
|
|
|
|
bitmap->sizes[7]= 0;
|
|
|
|
|
2011-07-13 21:10:18 +02:00
|
|
|
mysql_mutex_init(key_SHARE_BITMAP_lock,
|
|
|
|
&share->bitmap.bitmap_lock, MY_MUTEX_INIT_SLOW);
|
|
|
|
mysql_cond_init(key_SHARE_BITMAP_cond,
|
|
|
|
&share->bitmap.bitmap_cond, 0);
|
2007-01-18 20:38:14 +01:00
|
|
|
|
2011-07-04 03:32:53 +02:00
|
|
|
first_bitmap_with_space= share->state.first_bitmap_with_space;
|
2007-11-28 20:38:30 +01:00
|
|
|
_ma_bitmap_reset_cache(share);
|
2007-10-19 23:24:22 +02:00
|
|
|
|
2011-07-26 06:52:15 +02:00
|
|
|
/*
|
|
|
|
The bitmap used to map the file are aligned on 6 bytes. We now
|
|
|
|
calculate the max file size that can be used by the bitmap. This
|
|
|
|
is needed to get ma_info() give a true file size so that the user can
|
|
|
|
estimate if there is still space free for records in the file.
|
|
|
|
*/
|
2007-04-20 14:16:43 +02:00
|
|
|
{
|
2011-07-26 06:52:15 +02:00
|
|
|
pgcache_page_no_t last_bitmap_page;
|
2011-11-29 21:48:24 +01:00
|
|
|
ulong blocks, bytes;
|
2011-07-26 06:52:15 +02:00
|
|
|
|
|
|
|
last_bitmap_page= *last_page - *last_page % bitmap->pages_covered;
|
2011-11-29 21:48:24 +01:00
|
|
|
blocks= (ulong) (*last_page - last_bitmap_page);
|
2011-07-26 06:52:15 +02:00
|
|
|
bytes= (blocks * 3) / 8; /* 3 bit per page / 8 bits per byte */
|
|
|
|
/* Size needs to be aligned on 6 */
|
|
|
|
bytes/= 6;
|
|
|
|
bytes*= 6;
|
|
|
|
bitmap->last_bitmap_page= last_bitmap_page;
|
2011-11-29 02:00:24 +01:00
|
|
|
bitmap->last_total_size= (uint)bytes;
|
2011-07-26 06:52:15 +02:00
|
|
|
*last_page= ((last_bitmap_page + bytes*8/3));
|
2007-04-20 14:16:43 +02:00
|
|
|
}
|
2011-07-24 10:25:28 +02:00
|
|
|
|
2011-07-04 03:32:53 +02:00
|
|
|
/* Restore first_bitmap_with_space if it's resonable */
|
|
|
|
if (first_bitmap_with_space <= (share->state.state.data_file_length /
|
|
|
|
share->block_size))
|
|
|
|
share->state.first_bitmap_with_space= first_bitmap_with_space;
|
|
|
|
|
2007-04-19 17:48:36 +02:00
|
|
|
return 0;
|
2007-01-18 20:38:14 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
Free data allocated by _ma_bitmap_init
|
2007-04-19 12:18:56 +02:00
|
|
|
|
|
|
|
SYNOPSIS
|
|
|
|
_ma_bitmap_end()
|
|
|
|
share Share handler
|
2007-01-18 20:38:14 +01:00
|
|
|
*/
|
|
|
|
|
|
|
|
my_bool _ma_bitmap_end(MARIA_SHARE *share)
|
|
|
|
{
|
2011-02-10 19:33:51 +01:00
|
|
|
my_bool res;
|
2011-07-13 21:10:18 +02:00
|
|
|
mysql_mutex_assert_owner(&share->close_lock);
|
2011-02-10 19:33:51 +01:00
|
|
|
DBUG_ASSERT(share->bitmap.non_flushable == 0);
|
|
|
|
DBUG_ASSERT(share->bitmap.flush_all_requested == 0);
|
|
|
|
DBUG_ASSERT(share->bitmap.waiting_for_non_flushable == 0 &&
|
|
|
|
share->bitmap.waiting_for_flush_all_requested == 0);
|
|
|
|
DBUG_ASSERT(share->bitmap.pinned_pages.elements == 0);
|
|
|
|
|
|
|
|
res= _ma_bitmap_flush(share);
|
2011-07-13 21:10:18 +02:00
|
|
|
mysql_mutex_destroy(&share->bitmap.bitmap_lock);
|
|
|
|
mysql_cond_destroy(&share->bitmap.bitmap_cond);
|
WL#3072 - Maria recovery.
* fix for bitmap vs checkpoint bug which could lead to corrupted
tables in case of crashes at certain moments: a bitmap could be flushed
to disk even though it was inconsistent with the log (it could be
flushed before REDO-UNDO are written to the log). One bug remains, need
code from others. Tests added. Fix is to pin unflushable bitmap pages,
and let checkpoint wait for them to be flushable.
* fix for long_trid!=0 assertion failure at Recovery.
* less useless wakeups in the background flush|checkpoint thread.
* store global_trid_generator in checkpoint record.
mysql-test/r/maria-recovery.result:
result update
mysql-test/t/maria-recovery.test:
make it easier to locate subtests
storage/maria/ma_bitmap.c:
When we send a bitmap to the pagecache, if this bitmap is not in a
flushable state we keep it pinned and add it to a list, it will be
unpinned when the bitmap is flushable again.
A new function _ma_bitmap_flush_all() used by checkpoint.
A new function _ma_bitmap_flushable() used by block format to signal
when it starts modifying a bitmap and when it is done with it.
storage/maria/ma_blockrec.c:
When starting a row operation (insert/update/delete), mark that
the bitmap is not flushable (because for example INSERT is going
to over-allocate in the bitmap to prevent other threads from using
our data pages). If a checkpoint comes at this moment it will wait
for the bitmap to be flushable before flushing it.
When the operation ends, bitmap becomes flushable again; that
transition is done under the bitmap's mutex (needed for correct
synchro with a concurrent checkpoint); but for INSERT/UPDATE this
happens inside _ma_bitmap_release_unused() at a place where it already
has the mutex, so the only penalty (mutex adding) is in DELETE and UNDO
of INSERT. In case of errors after setting the bitmap unflushable,
we must always set it back to flushable or checkpoint would block.
Debug possibilities to force a sleep while the bitmap is over-allocated.
In case of error in get_head_or_tail() in allocate_and_write_block_record(),
we still need to unpin all pages.
Bugfix: _ma_apply_redo_insert_row_blobs() produced wrong
data_file_length.
storage/maria/ma_blockrec.h:
new bitmap calls.
storage/maria/ma_checkpoint.c:
filter_flush_indirect not needed anymore (flushing bitmap
pages happens in _ma_bitmap_flush_all() now). So
st_filter_param::is_data_file|pages_covered_by_bitmap not needed.
Other filter_flush* don't need to flush bitmap anymore.
Add debug possibility to flush all bitmap pages outside of a checkpoint,
to simulate pagecache LRU eviction.
When the background flush/checkpoint thread notices it has nothing
to flush, it now sleeps directly until the next potential checkpoint
moment instead of waking up every second.
When in checkpoint we decide to not store a table in the checkpoint record
(because it has logged no writes for example), we can also skip flushing
this table.
storage/maria/ma_commit.c:
comment is out-of-date
storage/maria/ma_key_recover.c:
comment fix
storage/maria/ma_loghandler.c:
comment is out-of-date
storage/maria/ma_open.c:
comment is out-of-date
storage/maria/ma_pagecache.c:
comment for bug to fix. And we don't take checkpoints at end of REDO
phase yet so can trust block->type.
storage/maria/ma_recovery.c:
Comments. Now-unneeded code for incomplete REDO-UNDO groups removed.
When we forget about an old transaction we must really forget
about it with bzero() (fixes the "long_trid!=0 assertion" recovery
bug). When we delete a row with maria_delete() we turn on
STATE_NOT_OPTIMIZED_ROWS so we do the same when we see a CLR_END
for an UNDO_ROW_INSERT or when we execute an UNDO_ROW_INSERT (in both
cases a row was deleted). Pick up max_long_trid from the checkpoint record.
storage/maria/maria_chk.c:
comment
storage/maria/maria_def.h:
MARIA_FILE_BITMAP gets new members: 'flushable', 'bitmap_cond' and
'pinned_pages'.
storage/maria/trnman.c:
I used to think that recovery only needs to know the maximum TrID
of the lists of active and committed transactions. But no, sometimes
both lists can even be empty and their TrID should not be reused.
So Checkpoint now saves global_trid_generator in the checkpoint record.
storage/maria/trnman_public.h:
macros to read/store a TrID
mysql-test/r/maria-recovery-bitmap.result:
result is ok. Without the code fix, we would get a corruption message
about the bitmap page in CHECK TABLE EXTENDED.
mysql-test/t/maria-recovery-bitmap-master.opt:
usual when we crash mysqld in tests
mysql-test/t/maria-recovery-bitmap.test:
test of recovery problems specific of the bitmap pages.
2007-12-14 16:14:12 +01:00
|
|
|
delete_dynamic(&share->bitmap.pinned_pages);
|
2011-04-25 17:22:25 +02:00
|
|
|
my_free(share->bitmap.map);
|
2007-04-12 11:05:30 +02:00
|
|
|
share->bitmap.map= 0;
|
2011-01-24 14:19:40 +01:00
|
|
|
/*
|
|
|
|
This is to not get an assert in checkpoint. The bitmap will be flushed
|
|
|
|
at once by _ma_once_end_block_record() as part of the normal flush
|
|
|
|
of the kfile.
|
|
|
|
*/
|
|
|
|
share->bitmap.changed_not_flushed= 0;
|
2007-01-18 20:38:14 +01:00
|
|
|
return res;
|
|
|
|
}
|
|
|
|
|
2011-06-24 11:08:45 +02:00
|
|
|
/*
|
|
|
|
Ensure that we have incremented open count before we try to read/write
|
|
|
|
a page while we have the bitmap lock.
|
|
|
|
This is needed to ensure that we don't call _ma_mark_file_changed() as
|
|
|
|
part of flushing a page to disk, as this locks share->internal_lock
|
|
|
|
and then mutex lock would happen in the wrong order.
|
|
|
|
*/
|
|
|
|
|
2011-07-26 06:52:15 +02:00
|
|
|
static inline void _ma_bitmap_mark_file_changed(MARIA_SHARE *share,
|
|
|
|
my_bool flush_translog)
|
2011-06-24 11:08:45 +02:00
|
|
|
{
|
2011-07-26 06:52:15 +02:00
|
|
|
/*
|
|
|
|
It's extremely unlikely that the following test is true as it
|
|
|
|
only happens once if the table has changed.
|
2011-06-24 11:08:45 +02:00
|
|
|
*/
|
|
|
|
if (unlikely(!share->global_changed &&
|
|
|
|
(share->state.changed & STATE_CHANGED)))
|
|
|
|
{
|
|
|
|
/* purecov: begin inspected */
|
|
|
|
/* unlock mutex as it can't be hold during _ma_mark_file_changed() */
|
2011-10-19 21:45:18 +02:00
|
|
|
mysql_mutex_unlock(&share->bitmap.bitmap_lock);
|
2011-07-26 06:52:15 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
We have to flush the translog to ensure we have registered that the
|
|
|
|
table is open.
|
|
|
|
*/
|
|
|
|
if (flush_translog && share->now_transactional)
|
|
|
|
(void) translog_flush(share->state.logrec_file_id);
|
|
|
|
|
2012-02-28 22:18:52 +01:00
|
|
|
_ma_mark_file_changed_now(share);
|
2011-10-19 21:45:18 +02:00
|
|
|
mysql_mutex_lock(&share->bitmap.bitmap_lock);
|
2011-06-24 11:08:45 +02:00
|
|
|
/* purecov: end */
|
|
|
|
}
|
|
|
|
}
|
2007-01-18 20:38:14 +01:00
|
|
|
|
|
|
|
/*
|
WL#3072 - Maria recovery
Unit test for recovery: runs ma_test1 and ma_test2 (both only with
INSERTs and DELETEs; UPDATEs disabled as not handled by recovery)
then moves the tables elswhere; recreates tables from the log, and
compares and fails if there is a difference. Passes now.
Most of maria_read_log.c moved to ma_recovery.c, as it will be re-used
for recovery-from-ha_maria.
Bugfixes of applying of REDO_INSERT, REDO_PURGE_ROW.
Applying of REDO_PURGE_BLOCKS, REDO_DELETE_ALL, REDO_DROP_TABLE,
UNDO_ROW_INSERT (in REDO phase only, i.e. just doing records++),
UNDO_ROW_DELETE, UNDO_ROW_PURGE.
Code cleanups.
Monty: please look for "QQ". Sanja: please look for "Sanja".
Future tasks: recovery of the bitmap (easy), recovery of the state
(make it idempotent), more REDOs (Monty to work on
REDO_UPDATE?), UNDO phase...
Pushing this cset as it looks safe, contains test and bugfixes which
will help Monty implement applying of REDO_UPDATE.
sql/handler.cc:
typo
storage/maria/Makefile.am:
Adding ma_test_recovery (which ma_test_all invokes, and which can
also be run alone). Most of maria_read_log.c moved to ma_recovery.c
storage/maria/ha_maria.cc:
comments
storage/maria/ma_bitmap.c:
fixing comments. 2 -> sizeof(maria_bitmap_marker).
Bitmap-related part of _ma_initialize_datafile() moves in bitmap module.
Now putting the "bm" signature when creating the first bitmap page
(it used to happen only at next open, but that
caused an annoying difference when testing Recovery if the original
run didn't open the table, and it looks more
logical like this: it goes to disk only with its signature correct);
see the "QQ" comment towards the _ma_initialize_data_file() call
in ma_create.c for more).
When reading a bitmap page, verify its signature (happens when normally
using the table or when CHECKing it; not when REPAIRing it).
storage/maria/ma_blockrec.c:
* no need to sync the data file if table is not transactional
* Comments, code cleanup (log-related data moved to log-related code
block, int5store->page_store).
* Store the table's short id into LOGREC_UNDO_ROW_PURGE, like we
do for other records (though this record will soon be replaced
with a CLR).
* If "page" is 1 it means the page which extends from byte
page*block_size+1 to (page+1)*block_size (byte number 1 being
the first byte of the file). The last byte of the file is
data_file_length (same convention).
A new page needs to be created if the last byte of the page is
beyond the last byte of the file, i.e.
(page+1)*block_size+1 > data_file_length, so we correct the test
(bug found when testing log applying for ma_test1 -M -T --skip-update).
* update the page's LSN when removing a row from it during
execution of a REDO_PURGE_ROW record (bug found when testing log
applying for ma_test1 -M -T --skip-update).
* applying of REDO_PURGE_BLOCKs (limited to a one-page range for now).
storage/maria/ma_blockrec.h:
new functions. maria_bitmap_marker does not need to be exported.
storage/maria/ma_close.c:
we can always flush the table's state when closing the last instance
of the table. And it is needed for maria_read_log (as it does
not use maria_lock_database()).
storage/maria/ma_control_file.c:
when in Recovery, some assertions should not be used.
storage/maria/ma_control_file.h:
double-inclusion safe
storage/maria/ma_create.c:
during recovery, don't log records. Comments.
Moving the creation of the first bitmap page to ma_bitmap.c
storage/maria/ma_delete_table.c:
during recovery, don't log records. Log the end-zero of the dropped
table's name, so that recovery can use the string in place without
extending it to fit an end zero.
storage/maria/ma_loghandler.c:
* inwrite_rec_hook also needs access to the MARIA_SHARE, like
prewrite_rec_hook. This will be needed to update
share->records_diff (in the upcoming patch "recovery of the state").
* LOG_DESC::record_ends_group changed to an enum.
* LOG_DESC for LOGREC_REDO_PURGE_BLOCKS and LOGREC_UNDO_ROW_PURGE
corrected
* Sanja please see the @todo LOG BUG
* avoiding DBUG_RETURN(func()) as it gives confusing debug traces.
storage/maria/ma_loghandler.h:
- log write hooks called while the log's lock is held (inwrite_rec_hook)
now need the MARIA_SHARE, like prewrite_rec_hook already had
- instead of a bool saying if this record's type ends groups or not,
we refine: it may not end a group, it may end a group, or it may
be a group in itself. Imagine that we had a physical write failure
to a table before we log the UNDO, we still end up in
external_lock(F_UNLCK) and then we log a COMMIT: we don't want
to consider this COMMIT as ending the group of REDOs (don't want
to execute those REDOs during Recovery), that's why we say "COMMIT
is a group in itself, it aborts any previous group". This also
gives one more sanity check in maria_read_log.
storage/maria/ma_recovery.c:
New Recovery code, replacing the old pseudocode.
Most of maria_read_log moved here.
Call-able from ha_maria, but not enabled yet.
Compared to the previous version of maria_read_log, some bugs have
been fixed, debugging output can go to stdout or a disk file (for now
it's useful for me, later it can be changed), execution of
REDO_DROP_TABLE, REDO_DELETE_ALL, REDO_PURGE_BLOCKS has been added. Duplicate code
has been factored into functions. We abort an unfinished group
of records if we see a record which is a group in itself (like COMMIT).
No need for maria_panic() after a bug (which caused tables to not
be closed) was fixed; if there is yet another bug I prefer to see it.
When opening a table for Recovery, set data_file_length
and key_file_length to their real physical value (these are the
easiest state members to restore :). Warn us if the last page
was truncated (but Recovery handles it).
MARIA_SHARE::state::state::records is now partly recovered (not
idempotent, but works if recreating tables from scracth).
When applying a REDO to a page, stamp it with the UNDO's LSN
(current_group_end_lsn), not with the REDO's LSN; it makes
the table more identical to the original table (easier to compare
the two tables in the end).
Big thing missing: some types of REDOs are not handled,
and the UNDO phase does not exist (missing functions to execute UNDOs
to actually rollback). So for now tests are only inserting/deleting
a few 100 rows, closing the table and seeing if the log is applied ok;
it works. UPDATE not handled.
storage/maria/ma_recovery.h:
new functions: ma_recover() for recovery from inside ha_maria;
_ma_apply_log() for maria_read_log (ma_recover() calls _ma_apply_log()).
Btw, we need to not use the word "recover" for REPAIR/maria_chk anymore.
storage/maria/ma_rename.c:
don't write log records during recovery
storage/maria/ma_test2.c:
- fail if maria_info() or other subtests find some wrong information
- new option -g to skip updates.
- init the translog before creating the table, so that log applying
can work.
- in "#if 0" you'll see some fixed bugs (will be removed).
storage/maria/ma_test_all.sh:
cleanup files. Test log applying.
storage/maria/maria_read_log.c:
most of the logic moves to ma_recovery.c to be shared between
maria_read_log and recovery-from-inside-mysqld.
See ma_recovery.c for additional changes made to the moved code.
storage/maria/ma_test_recovery:
unit test for Recovery. Tests insert and delete,
REDO_UPDATE not yet coded.
Script is called from ma_test_all. Can run standalone.
2007-07-26 11:56:21 +02:00
|
|
|
Send updated bitmap to the page cache
|
2007-04-19 12:18:56 +02:00
|
|
|
|
|
|
|
SYNOPSIS
|
WL#3072 Maria recovery:
fix for bug: if a crash happened right after writing a REDO like this:
REDO - UNDO - REDO*, then recovery would ignore the last REDO* (ok),
rollback: REDO - UNDO - REDO* - REDO - CLR, and a next recovery would
thus execute REDO* instead of skipping it again. Recovery now logs
LOGREC_INCOMPLETE_GROUP when it meets REDO* for the first time,
to draw a boundary and ensure it is always skipped. Tested by hand.
Note: ma_test_all fails "maria_chk: error: Key 1 - Found too many records"
not due to this patch (failed before).
BitKeeper/triggers/post-commit:
no truncation of the commit mail, or how to review patches?
mysql-test/include/maria_verify_recovery.inc:
let caller choose the statement used to crash (sometimes we
want the crash to happen at special places)
mysql-test/t/maria-recovery.test:
user of maria_verify_recovery.inc now specifies statement which the
script should use for crashing.
storage/maria/ma_bitmap.c:
it's easier to search for all places using functions from the bitmap
module (like in ma_blockrec.c) if those exported functions all start
with "_ma_bitmap": renaming some of them.
Assertion that when we read a bitmap page, overwriting bitmap->map,
we are not losing information (i.e. bitmap->changed is false).
storage/maria/ma_blockrec.c:
update to new names. Adding code (disabled, protected by a #ifdef)
that I use to test certain crash scenarios (more to come).
storage/maria/ma_blockrec.h:
update to new names
storage/maria/ma_checkpoint.c:
update to new names
storage/maria/ma_extra.c:
update to new names
storage/maria/ma_loghandler.c:
new LOGREC_INCOMPLETE_GROUP
storage/maria/ma_loghandler.h:
new LOGREC_INCOMPLETE_GROUP
storage/maria/ma_recovery.c:
When at the end of the REDO phase we have identified some transactions
with incomplete REDO groups (REDOs without an UNDO or CLR_END),
for each of them we log LOGREC_INCOMPLETE_GROUP. This way, the
upcoming UNDO phase can write more records for such transaction,
a future recovery won't pair the incomplete group with the
CLR_END (as there is LOGREC_INCOMPLETE_GROUP to draw a boundary).
2007-12-10 23:26:53 +01:00
|
|
|
_ma_bitmap_flush()
|
2007-04-19 12:18:56 +02:00
|
|
|
share Share handler
|
|
|
|
|
|
|
|
NOTES
|
WL#3072 Maria recovery:
fix for bug: if a crash happened right after writing a REDO like this:
REDO - UNDO - REDO*, then recovery would ignore the last REDO* (ok),
rollback: REDO - UNDO - REDO* - REDO - CLR, and a next recovery would
thus execute REDO* instead of skipping it again. Recovery now logs
LOGREC_INCOMPLETE_GROUP when it meets REDO* for the first time,
to draw a boundary and ensure it is always skipped. Tested by hand.
Note: ma_test_all fails "maria_chk: error: Key 1 - Found too many records"
not due to this patch (failed before).
BitKeeper/triggers/post-commit:
no truncation of the commit mail, or how to review patches?
mysql-test/include/maria_verify_recovery.inc:
let caller choose the statement used to crash (sometimes we
want the crash to happen at special places)
mysql-test/t/maria-recovery.test:
user of maria_verify_recovery.inc now specifies statement which the
script should use for crashing.
storage/maria/ma_bitmap.c:
it's easier to search for all places using functions from the bitmap
module (like in ma_blockrec.c) if those exported functions all start
with "_ma_bitmap": renaming some of them.
Assertion that when we read a bitmap page, overwriting bitmap->map,
we are not losing information (i.e. bitmap->changed is false).
storage/maria/ma_blockrec.c:
update to new names. Adding code (disabled, protected by a #ifdef)
that I use to test certain crash scenarios (more to come).
storage/maria/ma_blockrec.h:
update to new names
storage/maria/ma_checkpoint.c:
update to new names
storage/maria/ma_extra.c:
update to new names
storage/maria/ma_loghandler.c:
new LOGREC_INCOMPLETE_GROUP
storage/maria/ma_loghandler.h:
new LOGREC_INCOMPLETE_GROUP
storage/maria/ma_recovery.c:
When at the end of the REDO phase we have identified some transactions
with incomplete REDO groups (REDOs without an UNDO or CLR_END),
for each of them we log LOGREC_INCOMPLETE_GROUP. This way, the
upcoming UNDO phase can write more records for such transaction,
a future recovery won't pair the incomplete group with the
CLR_END (as there is LOGREC_INCOMPLETE_GROUP to draw a boundary).
2007-12-10 23:26:53 +01:00
|
|
|
In the future, _ma_bitmap_flush() will be called to flush changes don't
|
2007-04-19 12:18:56 +02:00
|
|
|
by this thread (ie, checking the changed flag is ok). The reason we
|
|
|
|
check it again in the mutex is that if someone else did a flush at the
|
|
|
|
same time, we don't have to do the write.
|
2008-10-14 11:38:07 +02:00
|
|
|
This is also ok for _ma_scan_init_block_record() which does not want to
|
|
|
|
miss rows: it cares only for committed rows, that is, rows for which there
|
|
|
|
was a commit before our transaction started; as commit and transaction's
|
|
|
|
start are protected by the same LOCK_trn_list mutex, we see memory at
|
|
|
|
least as new as at other transaction's commit time, so if the committed
|
|
|
|
rows caused bitmap->changed to be true, we see it; if we see 0 it really
|
|
|
|
means a flush happened since then. So, it's ok to read without bitmap's
|
|
|
|
mutex.
|
2007-04-19 12:18:56 +02:00
|
|
|
|
|
|
|
RETURN
|
|
|
|
0 ok
|
|
|
|
1 error
|
2007-01-18 20:38:14 +01:00
|
|
|
*/
|
|
|
|
|
WL#3072 Maria recovery:
fix for bug: if a crash happened right after writing a REDO like this:
REDO - UNDO - REDO*, then recovery would ignore the last REDO* (ok),
rollback: REDO - UNDO - REDO* - REDO - CLR, and a next recovery would
thus execute REDO* instead of skipping it again. Recovery now logs
LOGREC_INCOMPLETE_GROUP when it meets REDO* for the first time,
to draw a boundary and ensure it is always skipped. Tested by hand.
Note: ma_test_all fails "maria_chk: error: Key 1 - Found too many records"
not due to this patch (failed before).
BitKeeper/triggers/post-commit:
no truncation of the commit mail, or how to review patches?
mysql-test/include/maria_verify_recovery.inc:
let caller choose the statement used to crash (sometimes we
want the crash to happen at special places)
mysql-test/t/maria-recovery.test:
user of maria_verify_recovery.inc now specifies statement which the
script should use for crashing.
storage/maria/ma_bitmap.c:
it's easier to search for all places using functions from the bitmap
module (like in ma_blockrec.c) if those exported functions all start
with "_ma_bitmap": renaming some of them.
Assertion that when we read a bitmap page, overwriting bitmap->map,
we are not losing information (i.e. bitmap->changed is false).
storage/maria/ma_blockrec.c:
update to new names. Adding code (disabled, protected by a #ifdef)
that I use to test certain crash scenarios (more to come).
storage/maria/ma_blockrec.h:
update to new names
storage/maria/ma_checkpoint.c:
update to new names
storage/maria/ma_extra.c:
update to new names
storage/maria/ma_loghandler.c:
new LOGREC_INCOMPLETE_GROUP
storage/maria/ma_loghandler.h:
new LOGREC_INCOMPLETE_GROUP
storage/maria/ma_recovery.c:
When at the end of the REDO phase we have identified some transactions
with incomplete REDO groups (REDOs without an UNDO or CLR_END),
for each of them we log LOGREC_INCOMPLETE_GROUP. This way, the
upcoming UNDO phase can write more records for such transaction,
a future recovery won't pair the incomplete group with the
CLR_END (as there is LOGREC_INCOMPLETE_GROUP to draw a boundary).
2007-12-10 23:26:53 +01:00
|
|
|
my_bool _ma_bitmap_flush(MARIA_SHARE *share)
|
2007-01-18 20:38:14 +01:00
|
|
|
{
|
|
|
|
my_bool res= 0;
|
WL#3072 Maria recovery:
fix for bug: if a crash happened right after writing a REDO like this:
REDO - UNDO - REDO*, then recovery would ignore the last REDO* (ok),
rollback: REDO - UNDO - REDO* - REDO - CLR, and a next recovery would
thus execute REDO* instead of skipping it again. Recovery now logs
LOGREC_INCOMPLETE_GROUP when it meets REDO* for the first time,
to draw a boundary and ensure it is always skipped. Tested by hand.
Note: ma_test_all fails "maria_chk: error: Key 1 - Found too many records"
not due to this patch (failed before).
BitKeeper/triggers/post-commit:
no truncation of the commit mail, or how to review patches?
mysql-test/include/maria_verify_recovery.inc:
let caller choose the statement used to crash (sometimes we
want the crash to happen at special places)
mysql-test/t/maria-recovery.test:
user of maria_verify_recovery.inc now specifies statement which the
script should use for crashing.
storage/maria/ma_bitmap.c:
it's easier to search for all places using functions from the bitmap
module (like in ma_blockrec.c) if those exported functions all start
with "_ma_bitmap": renaming some of them.
Assertion that when we read a bitmap page, overwriting bitmap->map,
we are not losing information (i.e. bitmap->changed is false).
storage/maria/ma_blockrec.c:
update to new names. Adding code (disabled, protected by a #ifdef)
that I use to test certain crash scenarios (more to come).
storage/maria/ma_blockrec.h:
update to new names
storage/maria/ma_checkpoint.c:
update to new names
storage/maria/ma_extra.c:
update to new names
storage/maria/ma_loghandler.c:
new LOGREC_INCOMPLETE_GROUP
storage/maria/ma_loghandler.h:
new LOGREC_INCOMPLETE_GROUP
storage/maria/ma_recovery.c:
When at the end of the REDO phase we have identified some transactions
with incomplete REDO groups (REDOs without an UNDO or CLR_END),
for each of them we log LOGREC_INCOMPLETE_GROUP. This way, the
upcoming UNDO phase can write more records for such transaction,
a future recovery won't pair the incomplete group with the
CLR_END (as there is LOGREC_INCOMPLETE_GROUP to draw a boundary).
2007-12-10 23:26:53 +01:00
|
|
|
DBUG_ENTER("_ma_bitmap_flush");
|
2007-01-18 20:38:14 +01:00
|
|
|
if (share->bitmap.changed)
|
|
|
|
{
|
2011-07-13 21:10:18 +02:00
|
|
|
mysql_mutex_lock(&share->bitmap.bitmap_lock);
|
2007-01-18 20:38:14 +01:00
|
|
|
if (share->bitmap.changed)
|
|
|
|
{
|
2011-07-26 06:52:15 +02:00
|
|
|
/*
|
|
|
|
We have to mark the file changed here, as otherwise the following
|
|
|
|
write to pagecache may force a page out from this file, which would
|
|
|
|
cause _ma_mark_file_changed() to be called with bitmaplock hold!
|
|
|
|
*/
|
|
|
|
_ma_bitmap_mark_file_changed(share, 1);
|
UNDO of rows now puts back all part of the row on their original pages and positions
Added variable _dbug_on_ to speed up execution when DBUG is not going to be used
Added --debug-on option to mysqld (to be able to turn of DBUG with --debug-on=0)
Fixed some bugs with 'non_flushable' marking of bitmap pages
Don't use 'non_flushable' marking of bitmap pages for not transactional tables
SHOW CREATE TABLE now shows if table was created with page checksums
Fixed a lot of bugs with BLOB handling in case of update/REDO and UNDO
More tests (especially for blobs) and DBUG_ASSERTS()
More readable output from maria_read_log and maria_chk
Fixed wrong shift that caused Maria to crash on files > 4G
Mark tables as crashed of REDO fails
dbug/dbug.c:
Changed to use my_bool (allowed me to remove some windows specific code)
Added variable _dbug_on_ to speed up execution when DBUG is not going to be used
Removed initialization of variables if not needed
include/my_dbug.h:
Use my_bool for some functions that was defined as BOOLEAN in dbug.c code
Added DBUGGER_ON/DEBUGGER_OFF to speed up execution when DBUG is not used
include/my_global.h:
Define my_bool early
Increase MY_HOW_OFTEN_TO_WRITE as computers are now faster than 10 years ago
mysql-test/mysql-test-run.pl:
Added debug-on=0 to speed up tests
mysql-test/r/maria-recovery.result:
Added new test by Guilhem to test if UNDO_ROW_DELETE preserves rowid
mysql-test/r/maria.result:
Added testing of page checksums
mysql-test/t/crash_commit_before-master.opt:
Added --debug-on as test require DBUG to work
mysql-test/t/maria-recovery-bitmap-master.opt:
Added --debug-on as test require DBUG to work
mysql-test/t/maria-recovery-master.opt:
Added --debug-on as test require DBUG to work
mysql-test/t/maria-recovery.test:
Added new test by Guilhem to test if UNDO_ROW_DELETE preserves rowid
mysql-test/t/maria.test:
Added testing of page checksums
sql/mysqld.cc:
Added --debug-on option (to be able to turn of DBUG with --debug-on=0)
Indentation fixes
Removed end spaces
sql/sql_show.cc:
Allow update_create_info() to inform MySQL if PACK_KEYS, NO_PACK_KEYS, CHECKSUM, PAGE_CHECKSUM or DELAY_KEY_WRITE is used
storage/maria/Makefile.am:
Added ma_test_big.sh
storage/maria/ha_maria.cc:
Store in create_info if page checksums are used (For SHOW CREATE TABLE)
storage/maria/ma_bitmap.c:
Added _ma_bitmap_wait_or_flush() to cause reader of bitmap pages to wait with reading until bitmap is flushed.
Use TAIL_PAGE_COUNT_MARKER for tail pages
Set 'sub_blocks' for and only for the head page or for the first extent of a blob. This is needed for store_extent_info() to be able to set START_EXTENT_BIT's
Don't allocate more than 0x3ffff pages in one extent (We need bit 0x4000 as a START_EXTENT_BIT)
Increase the calculated 'head_length' with the number of bytes used for extents.
Update row->space_on_head_page also in _ma_bitmap_find_new_place()
Make _ma_bitmap_get_page_bits() global. (Needed for UNDO handling)
Changed _ma_bitmap_flushable() to take MARIA_HA instead of MARIA_SHARE.
This was needed to be able to mark the handler if we had a 'non_flushable' call pending or not.
Don't use 'non_flushable' marking of bitmap pages for not transactional tables.
Added BLOCKUSED_USE_ORG_BITMAP handling also for tail pages.
Added more DBUG_ASSERT() to find possible errors in other code
Some code simplications by adding new local variables
storage/maria/ma_blockrec.c:
UNDO of rows now puts back all part of the row on their original pages and positions.
Changed UNDO of DELETE and UNDO of UPDATE to contain information about the original length of data on head block and also extent information
This changes a lot of logic as now an insert of a row on a page may happen to any position (and not just to the first or next free)
Use PAGE_COUNT to mark if an extent is the start of of a blob. (Needed for extent_to_bitmap_blocks())
Added check_directory() for checking that directroy entries are correct.
Added checking of row checksums when reading rows (with EXTRA_DEBUG)
Added make_space_for_directory() and extend_directory() for doing expansion of directory
Added get_rowpos_in_head_or_tail_page() to be able to store head/tail on original position in UNDO
Added extent_to_bitmap_blocks() to be able to generate original bitmap blocks from UNDO entry
Added _ma_update_at_original_place() for UNDO of DELETES
Added row->min_length to hold minmum required space needed on head page
Changed find_free_position() to use make_space_for_directory()
Changed make_empty_page() to allow optional creation of directory entry
Changed delete_head_or_tail() and _ma_apply_undo_row_isnert() to not copy pagecache block (speed optimization)
Changed _ma_apply_redo_insert_row_head_or_tail() to be able to insert new row at any position on 'new' page
Changed _ma_apply_undo_row_delete() and _ma_apply_undo_row_update() to put row in it's original position
Ensure allocation of tail blocks are of at least MIN_TAIL_SIZE.
Ensure we store pages in pinned pages even if read failed. (If not we will have pages pinned forever in page cache)
Write original extent information in UNDO entry, not compacted ones (we need position to tails!)
When setting BLOCKUSED_USED, don't clear other bits (we have to preserve BLOCKUSED_USE_ORG_BITMAP)
Fixed som bugs in directory handling
Fixed bug where we wrote wrong lsn to blob pages
Added separate blob_buffer for fixing bug when updating row that had char/varchar that spanned several pages and also had blobs
Ensure we call _ma_bitmap_flushable() also in case of errors
When doing an update, first delete old entries, then search in bitmap for where to put new information
Info->s -> share
Rowid -> rowid
More DBUG_ASSERT()
storage/maria/ma_blockrec.h:
Added START_EXTENT_BIT and TAIL_PAGE_COUNT_MARKER
Added _ma_bitmap_wait_or_flush() and _ma_bitmap_get_page_bits()
storage/maria/ma_check.c:
Don't write extra empty line if there is no deleted blocks
Ignore START_EXTENT_BIT's in page count
Call _ma_fast_unlock_key_del() to free key_del link
storage/maria/ma_close.c:
Ensure that used_key_del is 0. (If not, someone forgot to call _ma_unlock_key_del())
storage/maria/ma_create.c:
Changed constant to macro
storage/maria/ma_delete.c:
For deleted keys, log also position to row
storage/maria/ma_extra.c:
Release blob buffer at maria_reset() if bigger than MARIA_SMALL_BLOB_BUFFER
storage/maria/ma_key_recover.c:
Added bzero() of LSN that confused paged cache in case of uninitialized block
Mark file crashed if applying of index changes fails
Added calls to _ma_fast_unlock_key_del() for protection of shared key_del link.
storage/maria/ma_locking.c:
Added usage of MARIA_FILE_OPEN_COUNT_OFFSET
Added _ma_mark_file_crashed()
storage/maria/ma_loghandler.c:
Fixed bug where we logged uninitialized memory
storage/maria/ma_open.c:
Moved state->changed to be at start of state info on disk to allow one to easly mark files as crashed
storage/maria/ma_page.c:
Disable 'dummy' checksumming of pages as this gave false warnings.
(Need to investigate if this is ever needed)
storage/maria/ma_pagecache.c:
Fixed wrong shift that caused Maria to crash on files > 4G
storage/maria/ma_recovery.c:
In case of errors, start writing on new line if we where in %## %## printing mode (Made errors more readable)
Changed global variable name from warnings -> recovery_warnings
Use MARIA_FILE_CREATE_RENAME_LSN_OFFSET instead of constant
Removed special handling of row position for deleted keys. Keys now always includes row positions
_ma_apply_undo_row_delete() now gets page and row position
Added check that we don't loop forever when handling undo's (in case of bug in undo chain)
Print name of failed REDO/UNDO
storage/maria/ma_recovery.h:
Removed old comment
storage/maria/ma_static.c:
Chaned version number of Maria files to not accidently use old ones (becasue of change of ordering of status variables)
storage/maria/ma_test2.c:
Added option -u to specify number of rows to update
Changed old option -u to be -A, as for ma_test1
Fixed bug in update of rows with blobs (before blobs was always reset to empty on update)
First created blob is now of max blob length to ensure we have at least one big blob in the table
storage/maria/ma_test_all.sh:
More tests
storage/maria/ma_test_recovery.expected:
Updated results
storage/maria/ma_test_recovery:
Changed tests to use bigger blobs (not just 1K)
Added new tests that tests recovery of update with blobs
Removed comparision of .MAD file as it's not guranteed that recovery from scratch gives identical data file as original update
(compact_page() may be called at different times during normal execution and during REDO)
storage/maria/ma_update.c:
Simplify code (changed * to if)
storage/maria/maria_chk.c:
Make output more readable
storage/maria/maria_def.h:
Changed 'changed' to int to prepare for more bits
Added 2 more bytes to status information
Added 'st_mara_row->min_length' for storing min length needed on head page
Added 'st_mara_handler->blob_buff & blob_buff_size' for storing blobs
Moved all tunning parameters into one block
Added MARIA_SMALL_BLOB_BUFFER
Added _ma_mark_file_crashed()
storage/myisam/mi_test2.c:
Fixed bug in update of rows with blobs (before blobs was always reset to empty on update)
storage/maria/ma_test_big.sh:
Testing of insert, update, delete, recovery and undo of rows with blobs
Thanks to the random-ness of ma_test2 this is likely to find most bugs in the row handling
2007-12-30 21:40:03 +01:00
|
|
|
res= write_changed_bitmap(share, &share->bitmap);
|
2010-11-03 13:14:02 +01:00
|
|
|
share->bitmap.changed= 0;
|
2007-01-18 20:38:14 +01:00
|
|
|
}
|
2011-07-13 21:10:18 +02:00
|
|
|
mysql_mutex_unlock(&share->bitmap.bitmap_lock);
|
2007-01-18 20:38:14 +01:00
|
|
|
}
|
- speed optimization:
minimize writes to transactional Maria tables: don't write
data pages, state, and open_count at the end of each statement.
Data pages will be written by a background thread periodically.
State will be written by Checkpoint periodically.
open_count serves to detect when a table is potentially damaged
due to an unclean mysqld stop, but thanks to recovery an unclean
mysqld stop will be corrected and so open_count becomes useless.
As state is written less often, it is often obsolete on disk,
we thus should avoid to read it from disk.
- by removing the data page writes above, it is necessary to put
it back at the start of some statements like check, repair and
delete_all. It was already necessary in fact (see ma_delete_all.c).
- disabling CACHE INDEX on Maria tables for now (fixes crash
of test 'key_cache' when run with --default-storage-engine=maria).
- correcting some fishy code in maria_extra.c (we possibly could lose
index pages when doing a DROP TABLE under Windows, in theory).
storage/maria/ha_maria.cc:
disable CACHE INDEX in Maria for now (there is a single cache for now),
it crashes and it's not a priority
storage/maria/ma_bitmap.c:
debug message
storage/maria/ma_check.c:
The statement before maria_repair() may not flush state,
so it needs to be done by maria_repair() (indeed this function
uses maria_open(HA_OPEN_COPY) so reads state from disk,
so needs to find it up-to-date on disk).
For safety (but normally this is not needed) we remove index blocks
out of the cache before repairing.
_ma_flush_blocks() becomes _ma_flush_table_files_after_repair():
it now additionally flushes the data file and state and syncs files.
As a side effect, the assertion "no WRITE_CACHE_USED" from
_ma_flush_table_files() fired so we move all end_io_cache() done
at the end of repair to before the calls to _ma_flush_table_files_after_repair().
storage/maria/ma_close.c:
when closing a transactional table, we fsync it. But we need to
do this only after writing its state.
We need to write the state at close time only for transactional
tables (the other tables do that at last unlock).
Putting back the O_RDONLY||crashed condition which I had
removed earlier.
Unmap the file before syncing it (does not matter now as Maria
does not use mmap)
storage/maria/ma_delete_all.c:
need to flush data pages before chsize-ing it. Was needed even when
we flushed data pages at the end of each statement, because we didn't
anyway do it if under LOCK TABLES: the change here thus fixes this bug:
create table t(a int) engine=maria;lock tables t write;
insert into t values(1);delete from t;unlock tables;check table t;
"Size of datafile is: 16384 Should be: 8192"
(an obsolete page went to disk after the chsize(), at unlock time).
storage/maria/ma_extra.c:
When doing share->last_version=0, we make the MARIA_SHARE-in-memory
invisible to future openers, so need to have an up-to-date state
on disk for them. The same way, future openers will reopen the data
and index file, so they will not find our cached blocks, so we
need to flush them to disk.
In HA_EXTRA_FORCE_REOPEN, this probably happens naturally as all
tables normally get closed, we however add a safety flush.
In HA_EXTRA_PREPARE_FOR_RENAME, we need to do the flushing. On
Windows we additionally need to close files.
In HA_EXTRA_PREPARE_FOR_DROP, we don't need to flush anything but
remove dirty cached blocks from memory. On Windows we need to close
files.
Closing files forces us to sync them before (requirement for transactional
tables).
For mutex reasons (don't lock intern_lock twice), we move
maria_lock_database() and _ma_decrement_open_count() first in the list
of operations.
Flush also data file in HA_EXTRA_FLUSH.
storage/maria/ma_locking.c:
For transactional tables:
- don't write data pages / state at unlock time;
as a consequence, "share->changed=0" cannot be done.
- don't write state in _ma_writeinfo()
- don't maintain open_count on disk (Recovery corrects the table in case of crash
anyway, and we gain speed by not writing open_count to disk),
For non-transactional tables, flush the state at unlock only
if the table was changed (optimization).
Code which read the state from disk is relevant only with
external locking, we disable it (if want to re-enable it, it shouldn't
for transactional tables as state on disk may be obsolete (such tables
does not flush state at unlock anymore).
The comment "We have to flush the write cache" is now wrong because
maria_lock_database(F_UNLCK) now happens before thr_unlock(), and
we are not using external locking.
storage/maria/ma_open.c:
_ma_state_info_read() is only used in ma_open.c, making it static
storage/maria/ma_recovery.c:
set MARIA_SHARE::changed to TRUE when we are going to apply a
REDO/UNDO, so that the state gets flushed at close.
storage/maria/ma_test_recovery.expected:
Changes introduced by this patch:
- good: the "open" (table open, not properly closed) is gone,
it was pointless for a recovered table
- bad: stemming from different moments of writing the index's state
probably (_ma_writeinfo() used to write the state after every row
write in ma_test* programs, doesn't anymore as the table is
transactional): some differences in indexes (not relevant as we don't
yet have recovery for them); some differences in count of records
(changed from a wrong value to another wrong value) (not relevant
as we don't recover this count correctly yet anyway, though
a patch will be pushed soon).
storage/maria/ma_test_recovery:
for repeatable output, no names of varying directories.
storage/maria/maria_chk.c:
function renamed
storage/maria/maria_def.h:
Function became local to ma_open.c. Function renamed.
2007-09-06 16:53:26 +02:00
|
|
|
DBUG_RETURN(res);
|
2007-01-18 20:38:14 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
WL#3072 - Maria recovery.
* fix for bitmap vs checkpoint bug which could lead to corrupted
tables in case of crashes at certain moments: a bitmap could be flushed
to disk even though it was inconsistent with the log (it could be
flushed before REDO-UNDO are written to the log). One bug remains, need
code from others. Tests added. Fix is to pin unflushable bitmap pages,
and let checkpoint wait for them to be flushable.
* fix for long_trid!=0 assertion failure at Recovery.
* less useless wakeups in the background flush|checkpoint thread.
* store global_trid_generator in checkpoint record.
mysql-test/r/maria-recovery.result:
result update
mysql-test/t/maria-recovery.test:
make it easier to locate subtests
storage/maria/ma_bitmap.c:
When we send a bitmap to the pagecache, if this bitmap is not in a
flushable state we keep it pinned and add it to a list, it will be
unpinned when the bitmap is flushable again.
A new function _ma_bitmap_flush_all() used by checkpoint.
A new function _ma_bitmap_flushable() used by block format to signal
when it starts modifying a bitmap and when it is done with it.
storage/maria/ma_blockrec.c:
When starting a row operation (insert/update/delete), mark that
the bitmap is not flushable (because for example INSERT is going
to over-allocate in the bitmap to prevent other threads from using
our data pages). If a checkpoint comes at this moment it will wait
for the bitmap to be flushable before flushing it.
When the operation ends, bitmap becomes flushable again; that
transition is done under the bitmap's mutex (needed for correct
synchro with a concurrent checkpoint); but for INSERT/UPDATE this
happens inside _ma_bitmap_release_unused() at a place where it already
has the mutex, so the only penalty (mutex adding) is in DELETE and UNDO
of INSERT. In case of errors after setting the bitmap unflushable,
we must always set it back to flushable or checkpoint would block.
Debug possibilities to force a sleep while the bitmap is over-allocated.
In case of error in get_head_or_tail() in allocate_and_write_block_record(),
we still need to unpin all pages.
Bugfix: _ma_apply_redo_insert_row_blobs() produced wrong
data_file_length.
storage/maria/ma_blockrec.h:
new bitmap calls.
storage/maria/ma_checkpoint.c:
filter_flush_indirect not needed anymore (flushing bitmap
pages happens in _ma_bitmap_flush_all() now). So
st_filter_param::is_data_file|pages_covered_by_bitmap not needed.
Other filter_flush* don't need to flush bitmap anymore.
Add debug possibility to flush all bitmap pages outside of a checkpoint,
to simulate pagecache LRU eviction.
When the background flush/checkpoint thread notices it has nothing
to flush, it now sleeps directly until the next potential checkpoint
moment instead of waking up every second.
When in checkpoint we decide to not store a table in the checkpoint record
(because it has logged no writes for example), we can also skip flushing
this table.
storage/maria/ma_commit.c:
comment is out-of-date
storage/maria/ma_key_recover.c:
comment fix
storage/maria/ma_loghandler.c:
comment is out-of-date
storage/maria/ma_open.c:
comment is out-of-date
storage/maria/ma_pagecache.c:
comment for bug to fix. And we don't take checkpoints at end of REDO
phase yet so can trust block->type.
storage/maria/ma_recovery.c:
Comments. Now-unneeded code for incomplete REDO-UNDO groups removed.
When we forget about an old transaction we must really forget
about it with bzero() (fixes the "long_trid!=0 assertion" recovery
bug). When we delete a row with maria_delete() we turn on
STATE_NOT_OPTIMIZED_ROWS so we do the same when we see a CLR_END
for an UNDO_ROW_INSERT or when we execute an UNDO_ROW_INSERT (in both
cases a row was deleted). Pick up max_long_trid from the checkpoint record.
storage/maria/maria_chk.c:
comment
storage/maria/maria_def.h:
MARIA_FILE_BITMAP gets new members: 'flushable', 'bitmap_cond' and
'pinned_pages'.
storage/maria/trnman.c:
I used to think that recovery only needs to know the maximum TrID
of the lists of active and committed transactions. But no, sometimes
both lists can even be empty and their TrID should not be reused.
So Checkpoint now saves global_trid_generator in the checkpoint record.
storage/maria/trnman_public.h:
macros to read/store a TrID
mysql-test/r/maria-recovery-bitmap.result:
result is ok. Without the code fix, we would get a corruption message
about the bitmap page in CHECK TABLE EXTENDED.
mysql-test/t/maria-recovery-bitmap-master.opt:
usual when we crash mysqld in tests
mysql-test/t/maria-recovery-bitmap.test:
test of recovery problems specific of the bitmap pages.
2007-12-14 16:14:12 +01:00
|
|
|
/**
|
|
|
|
Dirty-page filtering criteria for bitmap pages
|
|
|
|
|
|
|
|
@param type Page's type
|
|
|
|
@param pageno Page's number
|
|
|
|
@param rec_lsn Page's rec_lsn
|
|
|
|
@param arg pages_covered of bitmap
|
|
|
|
*/
|
|
|
|
|
|
|
|
static enum pagecache_flush_filter_result
|
|
|
|
filter_flush_bitmap_pages(enum pagecache_page_type type
|
|
|
|
__attribute__ ((unused)),
|
|
|
|
pgcache_page_no_t pageno,
|
|
|
|
LSN rec_lsn __attribute__ ((unused)),
|
|
|
|
void *arg)
|
|
|
|
{
|
|
|
|
return ((pageno % (*(ulong*)arg)) == 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
Flushes current bitmap page to the pagecache, and then all bitmap pages
|
|
|
|
from pagecache to the file. Used by Checkpoint.
|
|
|
|
|
|
|
|
@param share Table's share
|
|
|
|
*/
|
|
|
|
|
|
|
|
my_bool _ma_bitmap_flush_all(MARIA_SHARE *share)
|
|
|
|
{
|
|
|
|
my_bool res= 0;
|
2011-01-30 13:36:24 +01:00
|
|
|
uint send_signal= 0;
|
WL#3072 - Maria recovery.
* fix for bitmap vs checkpoint bug which could lead to corrupted
tables in case of crashes at certain moments: a bitmap could be flushed
to disk even though it was inconsistent with the log (it could be
flushed before REDO-UNDO are written to the log). One bug remains, need
code from others. Tests added. Fix is to pin unflushable bitmap pages,
and let checkpoint wait for them to be flushable.
* fix for long_trid!=0 assertion failure at Recovery.
* less useless wakeups in the background flush|checkpoint thread.
* store global_trid_generator in checkpoint record.
mysql-test/r/maria-recovery.result:
result update
mysql-test/t/maria-recovery.test:
make it easier to locate subtests
storage/maria/ma_bitmap.c:
When we send a bitmap to the pagecache, if this bitmap is not in a
flushable state we keep it pinned and add it to a list, it will be
unpinned when the bitmap is flushable again.
A new function _ma_bitmap_flush_all() used by checkpoint.
A new function _ma_bitmap_flushable() used by block format to signal
when it starts modifying a bitmap and when it is done with it.
storage/maria/ma_blockrec.c:
When starting a row operation (insert/update/delete), mark that
the bitmap is not flushable (because for example INSERT is going
to over-allocate in the bitmap to prevent other threads from using
our data pages). If a checkpoint comes at this moment it will wait
for the bitmap to be flushable before flushing it.
When the operation ends, bitmap becomes flushable again; that
transition is done under the bitmap's mutex (needed for correct
synchro with a concurrent checkpoint); but for INSERT/UPDATE this
happens inside _ma_bitmap_release_unused() at a place where it already
has the mutex, so the only penalty (mutex adding) is in DELETE and UNDO
of INSERT. In case of errors after setting the bitmap unflushable,
we must always set it back to flushable or checkpoint would block.
Debug possibilities to force a sleep while the bitmap is over-allocated.
In case of error in get_head_or_tail() in allocate_and_write_block_record(),
we still need to unpin all pages.
Bugfix: _ma_apply_redo_insert_row_blobs() produced wrong
data_file_length.
storage/maria/ma_blockrec.h:
new bitmap calls.
storage/maria/ma_checkpoint.c:
filter_flush_indirect not needed anymore (flushing bitmap
pages happens in _ma_bitmap_flush_all() now). So
st_filter_param::is_data_file|pages_covered_by_bitmap not needed.
Other filter_flush* don't need to flush bitmap anymore.
Add debug possibility to flush all bitmap pages outside of a checkpoint,
to simulate pagecache LRU eviction.
When the background flush/checkpoint thread notices it has nothing
to flush, it now sleeps directly until the next potential checkpoint
moment instead of waking up every second.
When in checkpoint we decide to not store a table in the checkpoint record
(because it has logged no writes for example), we can also skip flushing
this table.
storage/maria/ma_commit.c:
comment is out-of-date
storage/maria/ma_key_recover.c:
comment fix
storage/maria/ma_loghandler.c:
comment is out-of-date
storage/maria/ma_open.c:
comment is out-of-date
storage/maria/ma_pagecache.c:
comment for bug to fix. And we don't take checkpoints at end of REDO
phase yet so can trust block->type.
storage/maria/ma_recovery.c:
Comments. Now-unneeded code for incomplete REDO-UNDO groups removed.
When we forget about an old transaction we must really forget
about it with bzero() (fixes the "long_trid!=0 assertion" recovery
bug). When we delete a row with maria_delete() we turn on
STATE_NOT_OPTIMIZED_ROWS so we do the same when we see a CLR_END
for an UNDO_ROW_INSERT or when we execute an UNDO_ROW_INSERT (in both
cases a row was deleted). Pick up max_long_trid from the checkpoint record.
storage/maria/maria_chk.c:
comment
storage/maria/maria_def.h:
MARIA_FILE_BITMAP gets new members: 'flushable', 'bitmap_cond' and
'pinned_pages'.
storage/maria/trnman.c:
I used to think that recovery only needs to know the maximum TrID
of the lists of active and committed transactions. But no, sometimes
both lists can even be empty and their TrID should not be reused.
So Checkpoint now saves global_trid_generator in the checkpoint record.
storage/maria/trnman_public.h:
macros to read/store a TrID
mysql-test/r/maria-recovery-bitmap.result:
result is ok. Without the code fix, we would get a corruption message
about the bitmap page in CHECK TABLE EXTENDED.
mysql-test/t/maria-recovery-bitmap-master.opt:
usual when we crash mysqld in tests
mysql-test/t/maria-recovery-bitmap.test:
test of recovery problems specific of the bitmap pages.
2007-12-14 16:14:12 +01:00
|
|
|
MARIA_FILE_BITMAP *bitmap= &share->bitmap;
|
|
|
|
DBUG_ENTER("_ma_bitmap_flush_all");
|
2011-01-24 14:19:40 +01:00
|
|
|
|
|
|
|
#ifdef EXTRA_DEBUG_BITMAP
|
|
|
|
{
|
|
|
|
char buff[160];
|
|
|
|
uint len= my_sprintf(buff,
|
|
|
|
(buff, "bitmap_flush: fd: %d id: %u "
|
|
|
|
"changed: %d changed_not_flushed: %d "
|
2011-01-30 13:36:24 +01:00
|
|
|
"flush_all_requested: %d",
|
2011-01-24 14:19:40 +01:00
|
|
|
share->bitmap.file.file,
|
|
|
|
share->id,
|
|
|
|
bitmap->changed,
|
|
|
|
bitmap->changed_not_flushed,
|
|
|
|
bitmap->flush_all_requested));
|
|
|
|
(void) translog_log_debug_info(0, LOGREC_DEBUG_INFO_QUERY,
|
|
|
|
(uchar*) buff, len);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2011-07-13 21:10:18 +02:00
|
|
|
mysql_mutex_lock(&bitmap->bitmap_lock);
|
2011-02-10 19:33:51 +01:00
|
|
|
if (!bitmap->changed && !bitmap->changed_not_flushed)
|
|
|
|
{
|
2011-10-19 21:45:18 +02:00
|
|
|
mysql_mutex_unlock(&bitmap->bitmap_lock);
|
2011-02-10 19:33:51 +01:00
|
|
|
DBUG_RETURN(0);
|
|
|
|
}
|
|
|
|
|
2011-07-26 06:52:15 +02:00
|
|
|
_ma_bitmap_mark_file_changed(share, 0);
|
2011-06-24 11:08:45 +02:00
|
|
|
|
2011-02-10 19:33:51 +01:00
|
|
|
/*
|
2011-06-24 11:08:45 +02:00
|
|
|
The following should be true as it was tested above. We have to test
|
|
|
|
this again as _ma_bitmap_mark_file_changed() did temporarly release
|
|
|
|
the bitmap mutex.
|
2011-02-10 19:33:51 +01:00
|
|
|
*/
|
2010-09-10 01:42:12 +02:00
|
|
|
if (bitmap->changed || bitmap->changed_not_flushed)
|
WL#3072 - Maria recovery.
* fix for bitmap vs checkpoint bug which could lead to corrupted
tables in case of crashes at certain moments: a bitmap could be flushed
to disk even though it was inconsistent with the log (it could be
flushed before REDO-UNDO are written to the log). One bug remains, need
code from others. Tests added. Fix is to pin unflushable bitmap pages,
and let checkpoint wait for them to be flushable.
* fix for long_trid!=0 assertion failure at Recovery.
* less useless wakeups in the background flush|checkpoint thread.
* store global_trid_generator in checkpoint record.
mysql-test/r/maria-recovery.result:
result update
mysql-test/t/maria-recovery.test:
make it easier to locate subtests
storage/maria/ma_bitmap.c:
When we send a bitmap to the pagecache, if this bitmap is not in a
flushable state we keep it pinned and add it to a list, it will be
unpinned when the bitmap is flushable again.
A new function _ma_bitmap_flush_all() used by checkpoint.
A new function _ma_bitmap_flushable() used by block format to signal
when it starts modifying a bitmap and when it is done with it.
storage/maria/ma_blockrec.c:
When starting a row operation (insert/update/delete), mark that
the bitmap is not flushable (because for example INSERT is going
to over-allocate in the bitmap to prevent other threads from using
our data pages). If a checkpoint comes at this moment it will wait
for the bitmap to be flushable before flushing it.
When the operation ends, bitmap becomes flushable again; that
transition is done under the bitmap's mutex (needed for correct
synchro with a concurrent checkpoint); but for INSERT/UPDATE this
happens inside _ma_bitmap_release_unused() at a place where it already
has the mutex, so the only penalty (mutex adding) is in DELETE and UNDO
of INSERT. In case of errors after setting the bitmap unflushable,
we must always set it back to flushable or checkpoint would block.
Debug possibilities to force a sleep while the bitmap is over-allocated.
In case of error in get_head_or_tail() in allocate_and_write_block_record(),
we still need to unpin all pages.
Bugfix: _ma_apply_redo_insert_row_blobs() produced wrong
data_file_length.
storage/maria/ma_blockrec.h:
new bitmap calls.
storage/maria/ma_checkpoint.c:
filter_flush_indirect not needed anymore (flushing bitmap
pages happens in _ma_bitmap_flush_all() now). So
st_filter_param::is_data_file|pages_covered_by_bitmap not needed.
Other filter_flush* don't need to flush bitmap anymore.
Add debug possibility to flush all bitmap pages outside of a checkpoint,
to simulate pagecache LRU eviction.
When the background flush/checkpoint thread notices it has nothing
to flush, it now sleeps directly until the next potential checkpoint
moment instead of waking up every second.
When in checkpoint we decide to not store a table in the checkpoint record
(because it has logged no writes for example), we can also skip flushing
this table.
storage/maria/ma_commit.c:
comment is out-of-date
storage/maria/ma_key_recover.c:
comment fix
storage/maria/ma_loghandler.c:
comment is out-of-date
storage/maria/ma_open.c:
comment is out-of-date
storage/maria/ma_pagecache.c:
comment for bug to fix. And we don't take checkpoints at end of REDO
phase yet so can trust block->type.
storage/maria/ma_recovery.c:
Comments. Now-unneeded code for incomplete REDO-UNDO groups removed.
When we forget about an old transaction we must really forget
about it with bzero() (fixes the "long_trid!=0 assertion" recovery
bug). When we delete a row with maria_delete() we turn on
STATE_NOT_OPTIMIZED_ROWS so we do the same when we see a CLR_END
for an UNDO_ROW_INSERT or when we execute an UNDO_ROW_INSERT (in both
cases a row was deleted). Pick up max_long_trid from the checkpoint record.
storage/maria/maria_chk.c:
comment
storage/maria/maria_def.h:
MARIA_FILE_BITMAP gets new members: 'flushable', 'bitmap_cond' and
'pinned_pages'.
storage/maria/trnman.c:
I used to think that recovery only needs to know the maximum TrID
of the lists of active and committed transactions. But no, sometimes
both lists can even be empty and their TrID should not be reused.
So Checkpoint now saves global_trid_generator in the checkpoint record.
storage/maria/trnman_public.h:
macros to read/store a TrID
mysql-test/r/maria-recovery-bitmap.result:
result is ok. Without the code fix, we would get a corruption message
about the bitmap page in CHECK TABLE EXTENDED.
mysql-test/t/maria-recovery-bitmap-master.opt:
usual when we crash mysqld in tests
mysql-test/t/maria-recovery-bitmap.test:
test of recovery problems specific of the bitmap pages.
2007-12-14 16:14:12 +01:00
|
|
|
{
|
2010-11-03 13:14:02 +01:00
|
|
|
bitmap->flush_all_requested++;
|
2011-01-30 13:36:24 +01:00
|
|
|
bitmap->waiting_for_non_flushable++;
|
WL#3072 - Maria recovery.
* fix for bitmap vs checkpoint bug which could lead to corrupted
tables in case of crashes at certain moments: a bitmap could be flushed
to disk even though it was inconsistent with the log (it could be
flushed before REDO-UNDO are written to the log). One bug remains, need
code from others. Tests added. Fix is to pin unflushable bitmap pages,
and let checkpoint wait for them to be flushable.
* fix for long_trid!=0 assertion failure at Recovery.
* less useless wakeups in the background flush|checkpoint thread.
* store global_trid_generator in checkpoint record.
mysql-test/r/maria-recovery.result:
result update
mysql-test/t/maria-recovery.test:
make it easier to locate subtests
storage/maria/ma_bitmap.c:
When we send a bitmap to the pagecache, if this bitmap is not in a
flushable state we keep it pinned and add it to a list, it will be
unpinned when the bitmap is flushable again.
A new function _ma_bitmap_flush_all() used by checkpoint.
A new function _ma_bitmap_flushable() used by block format to signal
when it starts modifying a bitmap and when it is done with it.
storage/maria/ma_blockrec.c:
When starting a row operation (insert/update/delete), mark that
the bitmap is not flushable (because for example INSERT is going
to over-allocate in the bitmap to prevent other threads from using
our data pages). If a checkpoint comes at this moment it will wait
for the bitmap to be flushable before flushing it.
When the operation ends, bitmap becomes flushable again; that
transition is done under the bitmap's mutex (needed for correct
synchro with a concurrent checkpoint); but for INSERT/UPDATE this
happens inside _ma_bitmap_release_unused() at a place where it already
has the mutex, so the only penalty (mutex adding) is in DELETE and UNDO
of INSERT. In case of errors after setting the bitmap unflushable,
we must always set it back to flushable or checkpoint would block.
Debug possibilities to force a sleep while the bitmap is over-allocated.
In case of error in get_head_or_tail() in allocate_and_write_block_record(),
we still need to unpin all pages.
Bugfix: _ma_apply_redo_insert_row_blobs() produced wrong
data_file_length.
storage/maria/ma_blockrec.h:
new bitmap calls.
storage/maria/ma_checkpoint.c:
filter_flush_indirect not needed anymore (flushing bitmap
pages happens in _ma_bitmap_flush_all() now). So
st_filter_param::is_data_file|pages_covered_by_bitmap not needed.
Other filter_flush* don't need to flush bitmap anymore.
Add debug possibility to flush all bitmap pages outside of a checkpoint,
to simulate pagecache LRU eviction.
When the background flush/checkpoint thread notices it has nothing
to flush, it now sleeps directly until the next potential checkpoint
moment instead of waking up every second.
When in checkpoint we decide to not store a table in the checkpoint record
(because it has logged no writes for example), we can also skip flushing
this table.
storage/maria/ma_commit.c:
comment is out-of-date
storage/maria/ma_key_recover.c:
comment fix
storage/maria/ma_loghandler.c:
comment is out-of-date
storage/maria/ma_open.c:
comment is out-of-date
storage/maria/ma_pagecache.c:
comment for bug to fix. And we don't take checkpoints at end of REDO
phase yet so can trust block->type.
storage/maria/ma_recovery.c:
Comments. Now-unneeded code for incomplete REDO-UNDO groups removed.
When we forget about an old transaction we must really forget
about it with bzero() (fixes the "long_trid!=0 assertion" recovery
bug). When we delete a row with maria_delete() we turn on
STATE_NOT_OPTIMIZED_ROWS so we do the same when we see a CLR_END
for an UNDO_ROW_INSERT or when we execute an UNDO_ROW_INSERT (in both
cases a row was deleted). Pick up max_long_trid from the checkpoint record.
storage/maria/maria_chk.c:
comment
storage/maria/maria_def.h:
MARIA_FILE_BITMAP gets new members: 'flushable', 'bitmap_cond' and
'pinned_pages'.
storage/maria/trnman.c:
I used to think that recovery only needs to know the maximum TrID
of the lists of active and committed transactions. But no, sometimes
both lists can even be empty and their TrID should not be reused.
So Checkpoint now saves global_trid_generator in the checkpoint record.
storage/maria/trnman_public.h:
macros to read/store a TrID
mysql-test/r/maria-recovery-bitmap.result:
result is ok. Without the code fix, we would get a corruption message
about the bitmap page in CHECK TABLE EXTENDED.
mysql-test/t/maria-recovery-bitmap-master.opt:
usual when we crash mysqld in tests
mysql-test/t/maria-recovery-bitmap.test:
test of recovery problems specific of the bitmap pages.
2007-12-14 16:14:12 +01:00
|
|
|
#ifndef WRONG_BITMAP_FLUSH
|
2007-12-15 14:17:23 +01:00
|
|
|
while (bitmap->non_flushable > 0)
|
WL#3072 - Maria recovery.
* fix for bitmap vs checkpoint bug which could lead to corrupted
tables in case of crashes at certain moments: a bitmap could be flushed
to disk even though it was inconsistent with the log (it could be
flushed before REDO-UNDO are written to the log). One bug remains, need
code from others. Tests added. Fix is to pin unflushable bitmap pages,
and let checkpoint wait for them to be flushable.
* fix for long_trid!=0 assertion failure at Recovery.
* less useless wakeups in the background flush|checkpoint thread.
* store global_trid_generator in checkpoint record.
mysql-test/r/maria-recovery.result:
result update
mysql-test/t/maria-recovery.test:
make it easier to locate subtests
storage/maria/ma_bitmap.c:
When we send a bitmap to the pagecache, if this bitmap is not in a
flushable state we keep it pinned and add it to a list, it will be
unpinned when the bitmap is flushable again.
A new function _ma_bitmap_flush_all() used by checkpoint.
A new function _ma_bitmap_flushable() used by block format to signal
when it starts modifying a bitmap and when it is done with it.
storage/maria/ma_blockrec.c:
When starting a row operation (insert/update/delete), mark that
the bitmap is not flushable (because for example INSERT is going
to over-allocate in the bitmap to prevent other threads from using
our data pages). If a checkpoint comes at this moment it will wait
for the bitmap to be flushable before flushing it.
When the operation ends, bitmap becomes flushable again; that
transition is done under the bitmap's mutex (needed for correct
synchro with a concurrent checkpoint); but for INSERT/UPDATE this
happens inside _ma_bitmap_release_unused() at a place where it already
has the mutex, so the only penalty (mutex adding) is in DELETE and UNDO
of INSERT. In case of errors after setting the bitmap unflushable,
we must always set it back to flushable or checkpoint would block.
Debug possibilities to force a sleep while the bitmap is over-allocated.
In case of error in get_head_or_tail() in allocate_and_write_block_record(),
we still need to unpin all pages.
Bugfix: _ma_apply_redo_insert_row_blobs() produced wrong
data_file_length.
storage/maria/ma_blockrec.h:
new bitmap calls.
storage/maria/ma_checkpoint.c:
filter_flush_indirect not needed anymore (flushing bitmap
pages happens in _ma_bitmap_flush_all() now). So
st_filter_param::is_data_file|pages_covered_by_bitmap not needed.
Other filter_flush* don't need to flush bitmap anymore.
Add debug possibility to flush all bitmap pages outside of a checkpoint,
to simulate pagecache LRU eviction.
When the background flush/checkpoint thread notices it has nothing
to flush, it now sleeps directly until the next potential checkpoint
moment instead of waking up every second.
When in checkpoint we decide to not store a table in the checkpoint record
(because it has logged no writes for example), we can also skip flushing
this table.
storage/maria/ma_commit.c:
comment is out-of-date
storage/maria/ma_key_recover.c:
comment fix
storage/maria/ma_loghandler.c:
comment is out-of-date
storage/maria/ma_open.c:
comment is out-of-date
storage/maria/ma_pagecache.c:
comment for bug to fix. And we don't take checkpoints at end of REDO
phase yet so can trust block->type.
storage/maria/ma_recovery.c:
Comments. Now-unneeded code for incomplete REDO-UNDO groups removed.
When we forget about an old transaction we must really forget
about it with bzero() (fixes the "long_trid!=0 assertion" recovery
bug). When we delete a row with maria_delete() we turn on
STATE_NOT_OPTIMIZED_ROWS so we do the same when we see a CLR_END
for an UNDO_ROW_INSERT or when we execute an UNDO_ROW_INSERT (in both
cases a row was deleted). Pick up max_long_trid from the checkpoint record.
storage/maria/maria_chk.c:
comment
storage/maria/maria_def.h:
MARIA_FILE_BITMAP gets new members: 'flushable', 'bitmap_cond' and
'pinned_pages'.
storage/maria/trnman.c:
I used to think that recovery only needs to know the maximum TrID
of the lists of active and committed transactions. But no, sometimes
both lists can even be empty and their TrID should not be reused.
So Checkpoint now saves global_trid_generator in the checkpoint record.
storage/maria/trnman_public.h:
macros to read/store a TrID
mysql-test/r/maria-recovery-bitmap.result:
result is ok. Without the code fix, we would get a corruption message
about the bitmap page in CHECK TABLE EXTENDED.
mysql-test/t/maria-recovery-bitmap-master.opt:
usual when we crash mysqld in tests
mysql-test/t/maria-recovery-bitmap.test:
test of recovery problems specific of the bitmap pages.
2007-12-14 16:14:12 +01:00
|
|
|
{
|
|
|
|
DBUG_PRINT("info", ("waiting for bitmap to be flushable"));
|
2011-07-13 21:10:18 +02:00
|
|
|
mysql_cond_wait(&bitmap->bitmap_cond, &bitmap->bitmap_lock);
|
WL#3072 - Maria recovery.
* fix for bitmap vs checkpoint bug which could lead to corrupted
tables in case of crashes at certain moments: a bitmap could be flushed
to disk even though it was inconsistent with the log (it could be
flushed before REDO-UNDO are written to the log). One bug remains, need
code from others. Tests added. Fix is to pin unflushable bitmap pages,
and let checkpoint wait for them to be flushable.
* fix for long_trid!=0 assertion failure at Recovery.
* less useless wakeups in the background flush|checkpoint thread.
* store global_trid_generator in checkpoint record.
mysql-test/r/maria-recovery.result:
result update
mysql-test/t/maria-recovery.test:
make it easier to locate subtests
storage/maria/ma_bitmap.c:
When we send a bitmap to the pagecache, if this bitmap is not in a
flushable state we keep it pinned and add it to a list, it will be
unpinned when the bitmap is flushable again.
A new function _ma_bitmap_flush_all() used by checkpoint.
A new function _ma_bitmap_flushable() used by block format to signal
when it starts modifying a bitmap and when it is done with it.
storage/maria/ma_blockrec.c:
When starting a row operation (insert/update/delete), mark that
the bitmap is not flushable (because for example INSERT is going
to over-allocate in the bitmap to prevent other threads from using
our data pages). If a checkpoint comes at this moment it will wait
for the bitmap to be flushable before flushing it.
When the operation ends, bitmap becomes flushable again; that
transition is done under the bitmap's mutex (needed for correct
synchro with a concurrent checkpoint); but for INSERT/UPDATE this
happens inside _ma_bitmap_release_unused() at a place where it already
has the mutex, so the only penalty (mutex adding) is in DELETE and UNDO
of INSERT. In case of errors after setting the bitmap unflushable,
we must always set it back to flushable or checkpoint would block.
Debug possibilities to force a sleep while the bitmap is over-allocated.
In case of error in get_head_or_tail() in allocate_and_write_block_record(),
we still need to unpin all pages.
Bugfix: _ma_apply_redo_insert_row_blobs() produced wrong
data_file_length.
storage/maria/ma_blockrec.h:
new bitmap calls.
storage/maria/ma_checkpoint.c:
filter_flush_indirect not needed anymore (flushing bitmap
pages happens in _ma_bitmap_flush_all() now). So
st_filter_param::is_data_file|pages_covered_by_bitmap not needed.
Other filter_flush* don't need to flush bitmap anymore.
Add debug possibility to flush all bitmap pages outside of a checkpoint,
to simulate pagecache LRU eviction.
When the background flush/checkpoint thread notices it has nothing
to flush, it now sleeps directly until the next potential checkpoint
moment instead of waking up every second.
When in checkpoint we decide to not store a table in the checkpoint record
(because it has logged no writes for example), we can also skip flushing
this table.
storage/maria/ma_commit.c:
comment is out-of-date
storage/maria/ma_key_recover.c:
comment fix
storage/maria/ma_loghandler.c:
comment is out-of-date
storage/maria/ma_open.c:
comment is out-of-date
storage/maria/ma_pagecache.c:
comment for bug to fix. And we don't take checkpoints at end of REDO
phase yet so can trust block->type.
storage/maria/ma_recovery.c:
Comments. Now-unneeded code for incomplete REDO-UNDO groups removed.
When we forget about an old transaction we must really forget
about it with bzero() (fixes the "long_trid!=0 assertion" recovery
bug). When we delete a row with maria_delete() we turn on
STATE_NOT_OPTIMIZED_ROWS so we do the same when we see a CLR_END
for an UNDO_ROW_INSERT or when we execute an UNDO_ROW_INSERT (in both
cases a row was deleted). Pick up max_long_trid from the checkpoint record.
storage/maria/maria_chk.c:
comment
storage/maria/maria_def.h:
MARIA_FILE_BITMAP gets new members: 'flushable', 'bitmap_cond' and
'pinned_pages'.
storage/maria/trnman.c:
I used to think that recovery only needs to know the maximum TrID
of the lists of active and committed transactions. But no, sometimes
both lists can even be empty and their TrID should not be reused.
So Checkpoint now saves global_trid_generator in the checkpoint record.
storage/maria/trnman_public.h:
macros to read/store a TrID
mysql-test/r/maria-recovery-bitmap.result:
result is ok. Without the code fix, we would get a corruption message
about the bitmap page in CHECK TABLE EXTENDED.
mysql-test/t/maria-recovery-bitmap-master.opt:
usual when we crash mysqld in tests
mysql-test/t/maria-recovery-bitmap.test:
test of recovery problems specific of the bitmap pages.
2007-12-14 16:14:12 +01:00
|
|
|
}
|
|
|
|
#endif
|
2011-01-30 13:36:24 +01:00
|
|
|
bitmap->waiting_for_non_flushable--;
|
2011-01-24 14:19:40 +01:00
|
|
|
#ifdef EXTRA_DEBUG_BITMAP
|
|
|
|
{
|
|
|
|
char tmp[MAX_BITMAP_INFO_LENGTH];
|
|
|
|
_ma_get_bitmap_description(bitmap, bitmap->map, bitmap->page, tmp);
|
|
|
|
(void) translog_log_debug_info(0, LOGREC_DEBUG_INFO_QUERY,
|
|
|
|
(uchar*) tmp, strlen(tmp));
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2010-11-03 13:14:02 +01:00
|
|
|
DBUG_ASSERT(bitmap->flush_all_requested == 1);
|
WL#3072 - Maria recovery.
* fix for bitmap vs checkpoint bug which could lead to corrupted
tables in case of crashes at certain moments: a bitmap could be flushed
to disk even though it was inconsistent with the log (it could be
flushed before REDO-UNDO are written to the log). One bug remains, need
code from others. Tests added. Fix is to pin unflushable bitmap pages,
and let checkpoint wait for them to be flushable.
* fix for long_trid!=0 assertion failure at Recovery.
* less useless wakeups in the background flush|checkpoint thread.
* store global_trid_generator in checkpoint record.
mysql-test/r/maria-recovery.result:
result update
mysql-test/t/maria-recovery.test:
make it easier to locate subtests
storage/maria/ma_bitmap.c:
When we send a bitmap to the pagecache, if this bitmap is not in a
flushable state we keep it pinned and add it to a list, it will be
unpinned when the bitmap is flushable again.
A new function _ma_bitmap_flush_all() used by checkpoint.
A new function _ma_bitmap_flushable() used by block format to signal
when it starts modifying a bitmap and when it is done with it.
storage/maria/ma_blockrec.c:
When starting a row operation (insert/update/delete), mark that
the bitmap is not flushable (because for example INSERT is going
to over-allocate in the bitmap to prevent other threads from using
our data pages). If a checkpoint comes at this moment it will wait
for the bitmap to be flushable before flushing it.
When the operation ends, bitmap becomes flushable again; that
transition is done under the bitmap's mutex (needed for correct
synchro with a concurrent checkpoint); but for INSERT/UPDATE this
happens inside _ma_bitmap_release_unused() at a place where it already
has the mutex, so the only penalty (mutex adding) is in DELETE and UNDO
of INSERT. In case of errors after setting the bitmap unflushable,
we must always set it back to flushable or checkpoint would block.
Debug possibilities to force a sleep while the bitmap is over-allocated.
In case of error in get_head_or_tail() in allocate_and_write_block_record(),
we still need to unpin all pages.
Bugfix: _ma_apply_redo_insert_row_blobs() produced wrong
data_file_length.
storage/maria/ma_blockrec.h:
new bitmap calls.
storage/maria/ma_checkpoint.c:
filter_flush_indirect not needed anymore (flushing bitmap
pages happens in _ma_bitmap_flush_all() now). So
st_filter_param::is_data_file|pages_covered_by_bitmap not needed.
Other filter_flush* don't need to flush bitmap anymore.
Add debug possibility to flush all bitmap pages outside of a checkpoint,
to simulate pagecache LRU eviction.
When the background flush/checkpoint thread notices it has nothing
to flush, it now sleeps directly until the next potential checkpoint
moment instead of waking up every second.
When in checkpoint we decide to not store a table in the checkpoint record
(because it has logged no writes for example), we can also skip flushing
this table.
storage/maria/ma_commit.c:
comment is out-of-date
storage/maria/ma_key_recover.c:
comment fix
storage/maria/ma_loghandler.c:
comment is out-of-date
storage/maria/ma_open.c:
comment is out-of-date
storage/maria/ma_pagecache.c:
comment for bug to fix. And we don't take checkpoints at end of REDO
phase yet so can trust block->type.
storage/maria/ma_recovery.c:
Comments. Now-unneeded code for incomplete REDO-UNDO groups removed.
When we forget about an old transaction we must really forget
about it with bzero() (fixes the "long_trid!=0 assertion" recovery
bug). When we delete a row with maria_delete() we turn on
STATE_NOT_OPTIMIZED_ROWS so we do the same when we see a CLR_END
for an UNDO_ROW_INSERT or when we execute an UNDO_ROW_INSERT (in both
cases a row was deleted). Pick up max_long_trid from the checkpoint record.
storage/maria/maria_chk.c:
comment
storage/maria/maria_def.h:
MARIA_FILE_BITMAP gets new members: 'flushable', 'bitmap_cond' and
'pinned_pages'.
storage/maria/trnman.c:
I used to think that recovery only needs to know the maximum TrID
of the lists of active and committed transactions. But no, sometimes
both lists can even be empty and their TrID should not be reused.
So Checkpoint now saves global_trid_generator in the checkpoint record.
storage/maria/trnman_public.h:
macros to read/store a TrID
mysql-test/r/maria-recovery-bitmap.result:
result is ok. Without the code fix, we would get a corruption message
about the bitmap page in CHECK TABLE EXTENDED.
mysql-test/t/maria-recovery-bitmap-master.opt:
usual when we crash mysqld in tests
mysql-test/t/maria-recovery-bitmap.test:
test of recovery problems specific of the bitmap pages.
2007-12-14 16:14:12 +01:00
|
|
|
/*
|
|
|
|
Bitmap is in a flushable state: its contents in memory are reflected by
|
|
|
|
log records (complete REDO-UNDO groups) and all bitmap pages are
|
|
|
|
unpinned. We keep the mutex to preserve this situation, and flush to the
|
|
|
|
file.
|
|
|
|
*/
|
2008-10-14 11:38:07 +02:00
|
|
|
if (bitmap->changed)
|
|
|
|
{
|
|
|
|
bitmap->changed= FALSE;
|
2010-08-12 19:55:00 +02:00
|
|
|
res= write_changed_bitmap(share, bitmap);
|
2008-10-14 11:38:07 +02:00
|
|
|
}
|
WL#3072 - Maria recovery.
* fix for bitmap vs checkpoint bug which could lead to corrupted
tables in case of crashes at certain moments: a bitmap could be flushed
to disk even though it was inconsistent with the log (it could be
flushed before REDO-UNDO are written to the log). One bug remains, need
code from others. Tests added. Fix is to pin unflushable bitmap pages,
and let checkpoint wait for them to be flushable.
* fix for long_trid!=0 assertion failure at Recovery.
* less useless wakeups in the background flush|checkpoint thread.
* store global_trid_generator in checkpoint record.
mysql-test/r/maria-recovery.result:
result update
mysql-test/t/maria-recovery.test:
make it easier to locate subtests
storage/maria/ma_bitmap.c:
When we send a bitmap to the pagecache, if this bitmap is not in a
flushable state we keep it pinned and add it to a list, it will be
unpinned when the bitmap is flushable again.
A new function _ma_bitmap_flush_all() used by checkpoint.
A new function _ma_bitmap_flushable() used by block format to signal
when it starts modifying a bitmap and when it is done with it.
storage/maria/ma_blockrec.c:
When starting a row operation (insert/update/delete), mark that
the bitmap is not flushable (because for example INSERT is going
to over-allocate in the bitmap to prevent other threads from using
our data pages). If a checkpoint comes at this moment it will wait
for the bitmap to be flushable before flushing it.
When the operation ends, bitmap becomes flushable again; that
transition is done under the bitmap's mutex (needed for correct
synchro with a concurrent checkpoint); but for INSERT/UPDATE this
happens inside _ma_bitmap_release_unused() at a place where it already
has the mutex, so the only penalty (mutex adding) is in DELETE and UNDO
of INSERT. In case of errors after setting the bitmap unflushable,
we must always set it back to flushable or checkpoint would block.
Debug possibilities to force a sleep while the bitmap is over-allocated.
In case of error in get_head_or_tail() in allocate_and_write_block_record(),
we still need to unpin all pages.
Bugfix: _ma_apply_redo_insert_row_blobs() produced wrong
data_file_length.
storage/maria/ma_blockrec.h:
new bitmap calls.
storage/maria/ma_checkpoint.c:
filter_flush_indirect not needed anymore (flushing bitmap
pages happens in _ma_bitmap_flush_all() now). So
st_filter_param::is_data_file|pages_covered_by_bitmap not needed.
Other filter_flush* don't need to flush bitmap anymore.
Add debug possibility to flush all bitmap pages outside of a checkpoint,
to simulate pagecache LRU eviction.
When the background flush/checkpoint thread notices it has nothing
to flush, it now sleeps directly until the next potential checkpoint
moment instead of waking up every second.
When in checkpoint we decide to not store a table in the checkpoint record
(because it has logged no writes for example), we can also skip flushing
this table.
storage/maria/ma_commit.c:
comment is out-of-date
storage/maria/ma_key_recover.c:
comment fix
storage/maria/ma_loghandler.c:
comment is out-of-date
storage/maria/ma_open.c:
comment is out-of-date
storage/maria/ma_pagecache.c:
comment for bug to fix. And we don't take checkpoints at end of REDO
phase yet so can trust block->type.
storage/maria/ma_recovery.c:
Comments. Now-unneeded code for incomplete REDO-UNDO groups removed.
When we forget about an old transaction we must really forget
about it with bzero() (fixes the "long_trid!=0 assertion" recovery
bug). When we delete a row with maria_delete() we turn on
STATE_NOT_OPTIMIZED_ROWS so we do the same when we see a CLR_END
for an UNDO_ROW_INSERT or when we execute an UNDO_ROW_INSERT (in both
cases a row was deleted). Pick up max_long_trid from the checkpoint record.
storage/maria/maria_chk.c:
comment
storage/maria/maria_def.h:
MARIA_FILE_BITMAP gets new members: 'flushable', 'bitmap_cond' and
'pinned_pages'.
storage/maria/trnman.c:
I used to think that recovery only needs to know the maximum TrID
of the lists of active and committed transactions. But no, sometimes
both lists can even be empty and their TrID should not be reused.
So Checkpoint now saves global_trid_generator in the checkpoint record.
storage/maria/trnman_public.h:
macros to read/store a TrID
mysql-test/r/maria-recovery-bitmap.result:
result is ok. Without the code fix, we would get a corruption message
about the bitmap page in CHECK TABLE EXTENDED.
mysql-test/t/maria-recovery-bitmap-master.opt:
usual when we crash mysqld in tests
mysql-test/t/maria-recovery-bitmap.test:
test of recovery problems specific of the bitmap pages.
2007-12-14 16:14:12 +01:00
|
|
|
/*
|
|
|
|
We do NOT use FLUSH_KEEP_LAZY because we must be sure that bitmap
|
|
|
|
pages have been flushed. That's a condition of correctness of
|
|
|
|
Recovery: data pages may have been all flushed, if we write the
|
|
|
|
checkpoint record Recovery will start from after their REDOs. If
|
|
|
|
bitmap page was not flushed, as the REDOs about it will be skipped, it
|
|
|
|
will wrongly not be recovered. If bitmap pages had a rec_lsn it would
|
|
|
|
be different.
|
2007-12-15 14:17:23 +01:00
|
|
|
There should be no pinned pages as bitmap->non_flushable==0.
|
WL#3072 - Maria recovery.
* fix for bitmap vs checkpoint bug which could lead to corrupted
tables in case of crashes at certain moments: a bitmap could be flushed
to disk even though it was inconsistent with the log (it could be
flushed before REDO-UNDO are written to the log). One bug remains, need
code from others. Tests added. Fix is to pin unflushable bitmap pages,
and let checkpoint wait for them to be flushable.
* fix for long_trid!=0 assertion failure at Recovery.
* less useless wakeups in the background flush|checkpoint thread.
* store global_trid_generator in checkpoint record.
mysql-test/r/maria-recovery.result:
result update
mysql-test/t/maria-recovery.test:
make it easier to locate subtests
storage/maria/ma_bitmap.c:
When we send a bitmap to the pagecache, if this bitmap is not in a
flushable state we keep it pinned and add it to a list, it will be
unpinned when the bitmap is flushable again.
A new function _ma_bitmap_flush_all() used by checkpoint.
A new function _ma_bitmap_flushable() used by block format to signal
when it starts modifying a bitmap and when it is done with it.
storage/maria/ma_blockrec.c:
When starting a row operation (insert/update/delete), mark that
the bitmap is not flushable (because for example INSERT is going
to over-allocate in the bitmap to prevent other threads from using
our data pages). If a checkpoint comes at this moment it will wait
for the bitmap to be flushable before flushing it.
When the operation ends, bitmap becomes flushable again; that
transition is done under the bitmap's mutex (needed for correct
synchro with a concurrent checkpoint); but for INSERT/UPDATE this
happens inside _ma_bitmap_release_unused() at a place where it already
has the mutex, so the only penalty (mutex adding) is in DELETE and UNDO
of INSERT. In case of errors after setting the bitmap unflushable,
we must always set it back to flushable or checkpoint would block.
Debug possibilities to force a sleep while the bitmap is over-allocated.
In case of error in get_head_or_tail() in allocate_and_write_block_record(),
we still need to unpin all pages.
Bugfix: _ma_apply_redo_insert_row_blobs() produced wrong
data_file_length.
storage/maria/ma_blockrec.h:
new bitmap calls.
storage/maria/ma_checkpoint.c:
filter_flush_indirect not needed anymore (flushing bitmap
pages happens in _ma_bitmap_flush_all() now). So
st_filter_param::is_data_file|pages_covered_by_bitmap not needed.
Other filter_flush* don't need to flush bitmap anymore.
Add debug possibility to flush all bitmap pages outside of a checkpoint,
to simulate pagecache LRU eviction.
When the background flush/checkpoint thread notices it has nothing
to flush, it now sleeps directly until the next potential checkpoint
moment instead of waking up every second.
When in checkpoint we decide to not store a table in the checkpoint record
(because it has logged no writes for example), we can also skip flushing
this table.
storage/maria/ma_commit.c:
comment is out-of-date
storage/maria/ma_key_recover.c:
comment fix
storage/maria/ma_loghandler.c:
comment is out-of-date
storage/maria/ma_open.c:
comment is out-of-date
storage/maria/ma_pagecache.c:
comment for bug to fix. And we don't take checkpoints at end of REDO
phase yet so can trust block->type.
storage/maria/ma_recovery.c:
Comments. Now-unneeded code for incomplete REDO-UNDO groups removed.
When we forget about an old transaction we must really forget
about it with bzero() (fixes the "long_trid!=0 assertion" recovery
bug). When we delete a row with maria_delete() we turn on
STATE_NOT_OPTIMIZED_ROWS so we do the same when we see a CLR_END
for an UNDO_ROW_INSERT or when we execute an UNDO_ROW_INSERT (in both
cases a row was deleted). Pick up max_long_trid from the checkpoint record.
storage/maria/maria_chk.c:
comment
storage/maria/maria_def.h:
MARIA_FILE_BITMAP gets new members: 'flushable', 'bitmap_cond' and
'pinned_pages'.
storage/maria/trnman.c:
I used to think that recovery only needs to know the maximum TrID
of the lists of active and committed transactions. But no, sometimes
both lists can even be empty and their TrID should not be reused.
So Checkpoint now saves global_trid_generator in the checkpoint record.
storage/maria/trnman_public.h:
macros to read/store a TrID
mysql-test/r/maria-recovery-bitmap.result:
result is ok. Without the code fix, we would get a corruption message
about the bitmap page in CHECK TABLE EXTENDED.
mysql-test/t/maria-recovery-bitmap-master.opt:
usual when we crash mysqld in tests
mysql-test/t/maria-recovery-bitmap.test:
test of recovery problems specific of the bitmap pages.
2007-12-14 16:14:12 +01:00
|
|
|
*/
|
|
|
|
if (flush_pagecache_blocks_with_filter(share->pagecache,
|
|
|
|
&bitmap->file, FLUSH_KEEP,
|
|
|
|
filter_flush_bitmap_pages,
|
|
|
|
&bitmap->pages_covered) &
|
|
|
|
PCFLUSH_PINNED_AND_ERROR)
|
|
|
|
res= TRUE;
|
2010-09-10 01:42:12 +02:00
|
|
|
bitmap->changed_not_flushed= FALSE;
|
2010-11-03 13:14:02 +01:00
|
|
|
bitmap->flush_all_requested--;
|
2007-12-15 14:17:23 +01:00
|
|
|
/*
|
|
|
|
Some well-behaved threads may be waiting for flush_all_requested to
|
|
|
|
become false, wake them up.
|
|
|
|
*/
|
|
|
|
DBUG_PRINT("info", ("bitmap flusher waking up others"));
|
2011-01-30 13:36:24 +01:00
|
|
|
send_signal= (bitmap->waiting_for_flush_all_requested |
|
|
|
|
bitmap->waiting_for_non_flushable);
|
WL#3072 - Maria recovery.
* fix for bitmap vs checkpoint bug which could lead to corrupted
tables in case of crashes at certain moments: a bitmap could be flushed
to disk even though it was inconsistent with the log (it could be
flushed before REDO-UNDO are written to the log). One bug remains, need
code from others. Tests added. Fix is to pin unflushable bitmap pages,
and let checkpoint wait for them to be flushable.
* fix for long_trid!=0 assertion failure at Recovery.
* less useless wakeups in the background flush|checkpoint thread.
* store global_trid_generator in checkpoint record.
mysql-test/r/maria-recovery.result:
result update
mysql-test/t/maria-recovery.test:
make it easier to locate subtests
storage/maria/ma_bitmap.c:
When we send a bitmap to the pagecache, if this bitmap is not in a
flushable state we keep it pinned and add it to a list, it will be
unpinned when the bitmap is flushable again.
A new function _ma_bitmap_flush_all() used by checkpoint.
A new function _ma_bitmap_flushable() used by block format to signal
when it starts modifying a bitmap and when it is done with it.
storage/maria/ma_blockrec.c:
When starting a row operation (insert/update/delete), mark that
the bitmap is not flushable (because for example INSERT is going
to over-allocate in the bitmap to prevent other threads from using
our data pages). If a checkpoint comes at this moment it will wait
for the bitmap to be flushable before flushing it.
When the operation ends, bitmap becomes flushable again; that
transition is done under the bitmap's mutex (needed for correct
synchro with a concurrent checkpoint); but for INSERT/UPDATE this
happens inside _ma_bitmap_release_unused() at a place where it already
has the mutex, so the only penalty (mutex adding) is in DELETE and UNDO
of INSERT. In case of errors after setting the bitmap unflushable,
we must always set it back to flushable or checkpoint would block.
Debug possibilities to force a sleep while the bitmap is over-allocated.
In case of error in get_head_or_tail() in allocate_and_write_block_record(),
we still need to unpin all pages.
Bugfix: _ma_apply_redo_insert_row_blobs() produced wrong
data_file_length.
storage/maria/ma_blockrec.h:
new bitmap calls.
storage/maria/ma_checkpoint.c:
filter_flush_indirect not needed anymore (flushing bitmap
pages happens in _ma_bitmap_flush_all() now). So
st_filter_param::is_data_file|pages_covered_by_bitmap not needed.
Other filter_flush* don't need to flush bitmap anymore.
Add debug possibility to flush all bitmap pages outside of a checkpoint,
to simulate pagecache LRU eviction.
When the background flush/checkpoint thread notices it has nothing
to flush, it now sleeps directly until the next potential checkpoint
moment instead of waking up every second.
When in checkpoint we decide to not store a table in the checkpoint record
(because it has logged no writes for example), we can also skip flushing
this table.
storage/maria/ma_commit.c:
comment is out-of-date
storage/maria/ma_key_recover.c:
comment fix
storage/maria/ma_loghandler.c:
comment is out-of-date
storage/maria/ma_open.c:
comment is out-of-date
storage/maria/ma_pagecache.c:
comment for bug to fix. And we don't take checkpoints at end of REDO
phase yet so can trust block->type.
storage/maria/ma_recovery.c:
Comments. Now-unneeded code for incomplete REDO-UNDO groups removed.
When we forget about an old transaction we must really forget
about it with bzero() (fixes the "long_trid!=0 assertion" recovery
bug). When we delete a row with maria_delete() we turn on
STATE_NOT_OPTIMIZED_ROWS so we do the same when we see a CLR_END
for an UNDO_ROW_INSERT or when we execute an UNDO_ROW_INSERT (in both
cases a row was deleted). Pick up max_long_trid from the checkpoint record.
storage/maria/maria_chk.c:
comment
storage/maria/maria_def.h:
MARIA_FILE_BITMAP gets new members: 'flushable', 'bitmap_cond' and
'pinned_pages'.
storage/maria/trnman.c:
I used to think that recovery only needs to know the maximum TrID
of the lists of active and committed transactions. But no, sometimes
both lists can even be empty and their TrID should not be reused.
So Checkpoint now saves global_trid_generator in the checkpoint record.
storage/maria/trnman_public.h:
macros to read/store a TrID
mysql-test/r/maria-recovery-bitmap.result:
result is ok. Without the code fix, we would get a corruption message
about the bitmap page in CHECK TABLE EXTENDED.
mysql-test/t/maria-recovery-bitmap-master.opt:
usual when we crash mysqld in tests
mysql-test/t/maria-recovery-bitmap.test:
test of recovery problems specific of the bitmap pages.
2007-12-14 16:14:12 +01:00
|
|
|
}
|
2011-07-13 21:10:18 +02:00
|
|
|
mysql_mutex_unlock(&bitmap->bitmap_lock);
|
2011-01-30 13:36:24 +01:00
|
|
|
if (send_signal)
|
2011-10-19 21:45:18 +02:00
|
|
|
mysql_cond_broadcast(&bitmap->bitmap_cond);
|
WL#3072 - Maria recovery.
* fix for bitmap vs checkpoint bug which could lead to corrupted
tables in case of crashes at certain moments: a bitmap could be flushed
to disk even though it was inconsistent with the log (it could be
flushed before REDO-UNDO are written to the log). One bug remains, need
code from others. Tests added. Fix is to pin unflushable bitmap pages,
and let checkpoint wait for them to be flushable.
* fix for long_trid!=0 assertion failure at Recovery.
* less useless wakeups in the background flush|checkpoint thread.
* store global_trid_generator in checkpoint record.
mysql-test/r/maria-recovery.result:
result update
mysql-test/t/maria-recovery.test:
make it easier to locate subtests
storage/maria/ma_bitmap.c:
When we send a bitmap to the pagecache, if this bitmap is not in a
flushable state we keep it pinned and add it to a list, it will be
unpinned when the bitmap is flushable again.
A new function _ma_bitmap_flush_all() used by checkpoint.
A new function _ma_bitmap_flushable() used by block format to signal
when it starts modifying a bitmap and when it is done with it.
storage/maria/ma_blockrec.c:
When starting a row operation (insert/update/delete), mark that
the bitmap is not flushable (because for example INSERT is going
to over-allocate in the bitmap to prevent other threads from using
our data pages). If a checkpoint comes at this moment it will wait
for the bitmap to be flushable before flushing it.
When the operation ends, bitmap becomes flushable again; that
transition is done under the bitmap's mutex (needed for correct
synchro with a concurrent checkpoint); but for INSERT/UPDATE this
happens inside _ma_bitmap_release_unused() at a place where it already
has the mutex, so the only penalty (mutex adding) is in DELETE and UNDO
of INSERT. In case of errors after setting the bitmap unflushable,
we must always set it back to flushable or checkpoint would block.
Debug possibilities to force a sleep while the bitmap is over-allocated.
In case of error in get_head_or_tail() in allocate_and_write_block_record(),
we still need to unpin all pages.
Bugfix: _ma_apply_redo_insert_row_blobs() produced wrong
data_file_length.
storage/maria/ma_blockrec.h:
new bitmap calls.
storage/maria/ma_checkpoint.c:
filter_flush_indirect not needed anymore (flushing bitmap
pages happens in _ma_bitmap_flush_all() now). So
st_filter_param::is_data_file|pages_covered_by_bitmap not needed.
Other filter_flush* don't need to flush bitmap anymore.
Add debug possibility to flush all bitmap pages outside of a checkpoint,
to simulate pagecache LRU eviction.
When the background flush/checkpoint thread notices it has nothing
to flush, it now sleeps directly until the next potential checkpoint
moment instead of waking up every second.
When in checkpoint we decide to not store a table in the checkpoint record
(because it has logged no writes for example), we can also skip flushing
this table.
storage/maria/ma_commit.c:
comment is out-of-date
storage/maria/ma_key_recover.c:
comment fix
storage/maria/ma_loghandler.c:
comment is out-of-date
storage/maria/ma_open.c:
comment is out-of-date
storage/maria/ma_pagecache.c:
comment for bug to fix. And we don't take checkpoints at end of REDO
phase yet so can trust block->type.
storage/maria/ma_recovery.c:
Comments. Now-unneeded code for incomplete REDO-UNDO groups removed.
When we forget about an old transaction we must really forget
about it with bzero() (fixes the "long_trid!=0 assertion" recovery
bug). When we delete a row with maria_delete() we turn on
STATE_NOT_OPTIMIZED_ROWS so we do the same when we see a CLR_END
for an UNDO_ROW_INSERT or when we execute an UNDO_ROW_INSERT (in both
cases a row was deleted). Pick up max_long_trid from the checkpoint record.
storage/maria/maria_chk.c:
comment
storage/maria/maria_def.h:
MARIA_FILE_BITMAP gets new members: 'flushable', 'bitmap_cond' and
'pinned_pages'.
storage/maria/trnman.c:
I used to think that recovery only needs to know the maximum TrID
of the lists of active and committed transactions. But no, sometimes
both lists can even be empty and their TrID should not be reused.
So Checkpoint now saves global_trid_generator in the checkpoint record.
storage/maria/trnman_public.h:
macros to read/store a TrID
mysql-test/r/maria-recovery-bitmap.result:
result is ok. Without the code fix, we would get a corruption message
about the bitmap page in CHECK TABLE EXTENDED.
mysql-test/t/maria-recovery-bitmap-master.opt:
usual when we crash mysqld in tests
mysql-test/t/maria-recovery-bitmap.test:
test of recovery problems specific of the bitmap pages.
2007-12-14 16:14:12 +01:00
|
|
|
DBUG_RETURN(res);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2010-11-03 13:14:02 +01:00
|
|
|
/**
|
|
|
|
@brief Lock bitmap from being used by another thread
|
|
|
|
|
|
|
|
@fn _ma_bitmap_lock()
|
|
|
|
@param share Table's share
|
|
|
|
|
|
|
|
@notes
|
|
|
|
This is a temporary solution for allowing someone to delete an inserted
|
|
|
|
duplicate-key row while someone else is doing concurrent inserts.
|
|
|
|
This is ok for now as duplicate key errors are not that common.
|
|
|
|
|
|
|
|
In the future we will add locks for row-pages to ensure two threads doesn't
|
|
|
|
work at the same time on the same page.
|
|
|
|
*/
|
|
|
|
|
|
|
|
void _ma_bitmap_lock(MARIA_SHARE *share)
|
|
|
|
{
|
|
|
|
MARIA_FILE_BITMAP *bitmap= &share->bitmap;
|
|
|
|
DBUG_ENTER("_ma_bitmap_lock");
|
|
|
|
|
|
|
|
if (!share->now_transactional)
|
|
|
|
DBUG_VOID_RETURN;
|
|
|
|
|
2011-07-13 21:10:18 +02:00
|
|
|
mysql_mutex_lock(&bitmap->bitmap_lock);
|
2010-11-03 13:14:02 +01:00
|
|
|
bitmap->flush_all_requested++;
|
2011-01-30 13:36:24 +01:00
|
|
|
bitmap->waiting_for_non_flushable++;
|
2010-11-03 13:14:02 +01:00
|
|
|
while (bitmap->non_flushable)
|
|
|
|
{
|
|
|
|
DBUG_PRINT("info", ("waiting for bitmap to be flushable"));
|
2011-07-13 21:10:18 +02:00
|
|
|
mysql_cond_wait(&bitmap->bitmap_cond, &bitmap->bitmap_lock);
|
2010-11-03 13:14:02 +01:00
|
|
|
}
|
2011-01-30 13:36:24 +01:00
|
|
|
bitmap->waiting_for_non_flushable--;
|
2010-11-03 13:14:02 +01:00
|
|
|
/*
|
|
|
|
Ensure that _ma_bitmap_flush_all() and _ma_bitmap_lock() are blocked.
|
|
|
|
ma_bitmap_flushable() is blocked thanks to 'flush_all_requested'.
|
|
|
|
*/
|
|
|
|
bitmap->non_flushable= 1;
|
2011-07-13 21:10:18 +02:00
|
|
|
mysql_mutex_unlock(&bitmap->bitmap_lock);
|
2010-11-03 13:14:02 +01:00
|
|
|
DBUG_VOID_RETURN;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
@brief Unlock bitmap after _ma_bitmap_lock()
|
|
|
|
|
|
|
|
@fn _ma_bitmap_unlock()
|
|
|
|
@param share Table's share
|
|
|
|
*/
|
|
|
|
|
|
|
|
void _ma_bitmap_unlock(MARIA_SHARE *share)
|
|
|
|
{
|
|
|
|
MARIA_FILE_BITMAP *bitmap= &share->bitmap;
|
2011-01-30 13:36:24 +01:00
|
|
|
uint send_signal;
|
2010-11-03 13:14:02 +01:00
|
|
|
DBUG_ENTER("_ma_bitmap_unlock");
|
|
|
|
|
|
|
|
if (!share->now_transactional)
|
|
|
|
DBUG_VOID_RETURN;
|
|
|
|
DBUG_ASSERT(bitmap->flush_all_requested > 0 && bitmap->non_flushable == 1);
|
|
|
|
|
2011-07-13 21:10:18 +02:00
|
|
|
mysql_mutex_lock(&bitmap->bitmap_lock);
|
2010-11-03 13:14:02 +01:00
|
|
|
bitmap->non_flushable= 0;
|
2011-02-10 19:33:51 +01:00
|
|
|
_ma_bitmap_unpin_all(share);
|
2011-01-30 13:36:24 +01:00
|
|
|
send_signal= bitmap->waiting_for_non_flushable;
|
|
|
|
if (!--bitmap->flush_all_requested)
|
|
|
|
send_signal|= bitmap->waiting_for_flush_all_requested;
|
2011-07-13 21:10:18 +02:00
|
|
|
mysql_mutex_unlock(&bitmap->bitmap_lock);
|
2011-01-30 13:36:24 +01:00
|
|
|
if (send_signal)
|
2011-10-19 21:45:18 +02:00
|
|
|
mysql_cond_broadcast(&bitmap->bitmap_cond);
|
2010-11-03 13:14:02 +01:00
|
|
|
DBUG_VOID_RETURN;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
WL#3072 - Maria recovery.
* fix for bitmap vs checkpoint bug which could lead to corrupted
tables in case of crashes at certain moments: a bitmap could be flushed
to disk even though it was inconsistent with the log (it could be
flushed before REDO-UNDO are written to the log). One bug remains, need
code from others. Tests added. Fix is to pin unflushable bitmap pages,
and let checkpoint wait for them to be flushable.
* fix for long_trid!=0 assertion failure at Recovery.
* less useless wakeups in the background flush|checkpoint thread.
* store global_trid_generator in checkpoint record.
mysql-test/r/maria-recovery.result:
result update
mysql-test/t/maria-recovery.test:
make it easier to locate subtests
storage/maria/ma_bitmap.c:
When we send a bitmap to the pagecache, if this bitmap is not in a
flushable state we keep it pinned and add it to a list, it will be
unpinned when the bitmap is flushable again.
A new function _ma_bitmap_flush_all() used by checkpoint.
A new function _ma_bitmap_flushable() used by block format to signal
when it starts modifying a bitmap and when it is done with it.
storage/maria/ma_blockrec.c:
When starting a row operation (insert/update/delete), mark that
the bitmap is not flushable (because for example INSERT is going
to over-allocate in the bitmap to prevent other threads from using
our data pages). If a checkpoint comes at this moment it will wait
for the bitmap to be flushable before flushing it.
When the operation ends, bitmap becomes flushable again; that
transition is done under the bitmap's mutex (needed for correct
synchro with a concurrent checkpoint); but for INSERT/UPDATE this
happens inside _ma_bitmap_release_unused() at a place where it already
has the mutex, so the only penalty (mutex adding) is in DELETE and UNDO
of INSERT. In case of errors after setting the bitmap unflushable,
we must always set it back to flushable or checkpoint would block.
Debug possibilities to force a sleep while the bitmap is over-allocated.
In case of error in get_head_or_tail() in allocate_and_write_block_record(),
we still need to unpin all pages.
Bugfix: _ma_apply_redo_insert_row_blobs() produced wrong
data_file_length.
storage/maria/ma_blockrec.h:
new bitmap calls.
storage/maria/ma_checkpoint.c:
filter_flush_indirect not needed anymore (flushing bitmap
pages happens in _ma_bitmap_flush_all() now). So
st_filter_param::is_data_file|pages_covered_by_bitmap not needed.
Other filter_flush* don't need to flush bitmap anymore.
Add debug possibility to flush all bitmap pages outside of a checkpoint,
to simulate pagecache LRU eviction.
When the background flush/checkpoint thread notices it has nothing
to flush, it now sleeps directly until the next potential checkpoint
moment instead of waking up every second.
When in checkpoint we decide to not store a table in the checkpoint record
(because it has logged no writes for example), we can also skip flushing
this table.
storage/maria/ma_commit.c:
comment is out-of-date
storage/maria/ma_key_recover.c:
comment fix
storage/maria/ma_loghandler.c:
comment is out-of-date
storage/maria/ma_open.c:
comment is out-of-date
storage/maria/ma_pagecache.c:
comment for bug to fix. And we don't take checkpoints at end of REDO
phase yet so can trust block->type.
storage/maria/ma_recovery.c:
Comments. Now-unneeded code for incomplete REDO-UNDO groups removed.
When we forget about an old transaction we must really forget
about it with bzero() (fixes the "long_trid!=0 assertion" recovery
bug). When we delete a row with maria_delete() we turn on
STATE_NOT_OPTIMIZED_ROWS so we do the same when we see a CLR_END
for an UNDO_ROW_INSERT or when we execute an UNDO_ROW_INSERT (in both
cases a row was deleted). Pick up max_long_trid from the checkpoint record.
storage/maria/maria_chk.c:
comment
storage/maria/maria_def.h:
MARIA_FILE_BITMAP gets new members: 'flushable', 'bitmap_cond' and
'pinned_pages'.
storage/maria/trnman.c:
I used to think that recovery only needs to know the maximum TrID
of the lists of active and committed transactions. But no, sometimes
both lists can even be empty and their TrID should not be reused.
So Checkpoint now saves global_trid_generator in the checkpoint record.
storage/maria/trnman_public.h:
macros to read/store a TrID
mysql-test/r/maria-recovery-bitmap.result:
result is ok. Without the code fix, we would get a corruption message
about the bitmap page in CHECK TABLE EXTENDED.
mysql-test/t/maria-recovery-bitmap-master.opt:
usual when we crash mysqld in tests
mysql-test/t/maria-recovery-bitmap.test:
test of recovery problems specific of the bitmap pages.
2007-12-14 16:14:12 +01:00
|
|
|
/**
|
|
|
|
@brief Unpin all pinned bitmap pages
|
|
|
|
|
|
|
|
@param share Table's share
|
|
|
|
|
|
|
|
@return Operation status
|
|
|
|
@retval 0 ok
|
2008-10-14 11:38:07 +02:00
|
|
|
|
|
|
|
@note This unpins pages pinned by other threads.
|
WL#3072 - Maria recovery.
* fix for bitmap vs checkpoint bug which could lead to corrupted
tables in case of crashes at certain moments: a bitmap could be flushed
to disk even though it was inconsistent with the log (it could be
flushed before REDO-UNDO are written to the log). One bug remains, need
code from others. Tests added. Fix is to pin unflushable bitmap pages,
and let checkpoint wait for them to be flushable.
* fix for long_trid!=0 assertion failure at Recovery.
* less useless wakeups in the background flush|checkpoint thread.
* store global_trid_generator in checkpoint record.
mysql-test/r/maria-recovery.result:
result update
mysql-test/t/maria-recovery.test:
make it easier to locate subtests
storage/maria/ma_bitmap.c:
When we send a bitmap to the pagecache, if this bitmap is not in a
flushable state we keep it pinned and add it to a list, it will be
unpinned when the bitmap is flushable again.
A new function _ma_bitmap_flush_all() used by checkpoint.
A new function _ma_bitmap_flushable() used by block format to signal
when it starts modifying a bitmap and when it is done with it.
storage/maria/ma_blockrec.c:
When starting a row operation (insert/update/delete), mark that
the bitmap is not flushable (because for example INSERT is going
to over-allocate in the bitmap to prevent other threads from using
our data pages). If a checkpoint comes at this moment it will wait
for the bitmap to be flushable before flushing it.
When the operation ends, bitmap becomes flushable again; that
transition is done under the bitmap's mutex (needed for correct
synchro with a concurrent checkpoint); but for INSERT/UPDATE this
happens inside _ma_bitmap_release_unused() at a place where it already
has the mutex, so the only penalty (mutex adding) is in DELETE and UNDO
of INSERT. In case of errors after setting the bitmap unflushable,
we must always set it back to flushable or checkpoint would block.
Debug possibilities to force a sleep while the bitmap is over-allocated.
In case of error in get_head_or_tail() in allocate_and_write_block_record(),
we still need to unpin all pages.
Bugfix: _ma_apply_redo_insert_row_blobs() produced wrong
data_file_length.
storage/maria/ma_blockrec.h:
new bitmap calls.
storage/maria/ma_checkpoint.c:
filter_flush_indirect not needed anymore (flushing bitmap
pages happens in _ma_bitmap_flush_all() now). So
st_filter_param::is_data_file|pages_covered_by_bitmap not needed.
Other filter_flush* don't need to flush bitmap anymore.
Add debug possibility to flush all bitmap pages outside of a checkpoint,
to simulate pagecache LRU eviction.
When the background flush/checkpoint thread notices it has nothing
to flush, it now sleeps directly until the next potential checkpoint
moment instead of waking up every second.
When in checkpoint we decide to not store a table in the checkpoint record
(because it has logged no writes for example), we can also skip flushing
this table.
storage/maria/ma_commit.c:
comment is out-of-date
storage/maria/ma_key_recover.c:
comment fix
storage/maria/ma_loghandler.c:
comment is out-of-date
storage/maria/ma_open.c:
comment is out-of-date
storage/maria/ma_pagecache.c:
comment for bug to fix. And we don't take checkpoints at end of REDO
phase yet so can trust block->type.
storage/maria/ma_recovery.c:
Comments. Now-unneeded code for incomplete REDO-UNDO groups removed.
When we forget about an old transaction we must really forget
about it with bzero() (fixes the "long_trid!=0 assertion" recovery
bug). When we delete a row with maria_delete() we turn on
STATE_NOT_OPTIMIZED_ROWS so we do the same when we see a CLR_END
for an UNDO_ROW_INSERT or when we execute an UNDO_ROW_INSERT (in both
cases a row was deleted). Pick up max_long_trid from the checkpoint record.
storage/maria/maria_chk.c:
comment
storage/maria/maria_def.h:
MARIA_FILE_BITMAP gets new members: 'flushable', 'bitmap_cond' and
'pinned_pages'.
storage/maria/trnman.c:
I used to think that recovery only needs to know the maximum TrID
of the lists of active and committed transactions. But no, sometimes
both lists can even be empty and their TrID should not be reused.
So Checkpoint now saves global_trid_generator in the checkpoint record.
storage/maria/trnman_public.h:
macros to read/store a TrID
mysql-test/r/maria-recovery-bitmap.result:
result is ok. Without the code fix, we would get a corruption message
about the bitmap page in CHECK TABLE EXTENDED.
mysql-test/t/maria-recovery-bitmap-master.opt:
usual when we crash mysqld in tests
mysql-test/t/maria-recovery-bitmap.test:
test of recovery problems specific of the bitmap pages.
2007-12-14 16:14:12 +01:00
|
|
|
*/
|
|
|
|
|
|
|
|
static void _ma_bitmap_unpin_all(MARIA_SHARE *share)
|
|
|
|
{
|
|
|
|
MARIA_FILE_BITMAP *bitmap= &share->bitmap;
|
|
|
|
MARIA_PINNED_PAGE *page_link= ((MARIA_PINNED_PAGE*)
|
|
|
|
dynamic_array_ptr(&bitmap->pinned_pages, 0));
|
|
|
|
MARIA_PINNED_PAGE *pinned_page= page_link + bitmap->pinned_pages.elements;
|
|
|
|
DBUG_ENTER("_ma_bitmap_unpin_all");
|
|
|
|
DBUG_PRINT("info", ("pinned: %u", bitmap->pinned_pages.elements));
|
|
|
|
while (pinned_page-- != page_link)
|
|
|
|
pagecache_unlock_by_link(share->pagecache, pinned_page->link,
|
|
|
|
pinned_page->unlock, PAGECACHE_UNPIN,
|
2010-11-15 21:44:41 +01:00
|
|
|
LSN_IMPOSSIBLE, LSN_IMPOSSIBLE, FALSE, TRUE);
|
WL#3072 - Maria recovery.
* fix for bitmap vs checkpoint bug which could lead to corrupted
tables in case of crashes at certain moments: a bitmap could be flushed
to disk even though it was inconsistent with the log (it could be
flushed before REDO-UNDO are written to the log). One bug remains, need
code from others. Tests added. Fix is to pin unflushable bitmap pages,
and let checkpoint wait for them to be flushable.
* fix for long_trid!=0 assertion failure at Recovery.
* less useless wakeups in the background flush|checkpoint thread.
* store global_trid_generator in checkpoint record.
mysql-test/r/maria-recovery.result:
result update
mysql-test/t/maria-recovery.test:
make it easier to locate subtests
storage/maria/ma_bitmap.c:
When we send a bitmap to the pagecache, if this bitmap is not in a
flushable state we keep it pinned and add it to a list, it will be
unpinned when the bitmap is flushable again.
A new function _ma_bitmap_flush_all() used by checkpoint.
A new function _ma_bitmap_flushable() used by block format to signal
when it starts modifying a bitmap and when it is done with it.
storage/maria/ma_blockrec.c:
When starting a row operation (insert/update/delete), mark that
the bitmap is not flushable (because for example INSERT is going
to over-allocate in the bitmap to prevent other threads from using
our data pages). If a checkpoint comes at this moment it will wait
for the bitmap to be flushable before flushing it.
When the operation ends, bitmap becomes flushable again; that
transition is done under the bitmap's mutex (needed for correct
synchro with a concurrent checkpoint); but for INSERT/UPDATE this
happens inside _ma_bitmap_release_unused() at a place where it already
has the mutex, so the only penalty (mutex adding) is in DELETE and UNDO
of INSERT. In case of errors after setting the bitmap unflushable,
we must always set it back to flushable or checkpoint would block.
Debug possibilities to force a sleep while the bitmap is over-allocated.
In case of error in get_head_or_tail() in allocate_and_write_block_record(),
we still need to unpin all pages.
Bugfix: _ma_apply_redo_insert_row_blobs() produced wrong
data_file_length.
storage/maria/ma_blockrec.h:
new bitmap calls.
storage/maria/ma_checkpoint.c:
filter_flush_indirect not needed anymore (flushing bitmap
pages happens in _ma_bitmap_flush_all() now). So
st_filter_param::is_data_file|pages_covered_by_bitmap not needed.
Other filter_flush* don't need to flush bitmap anymore.
Add debug possibility to flush all bitmap pages outside of a checkpoint,
to simulate pagecache LRU eviction.
When the background flush/checkpoint thread notices it has nothing
to flush, it now sleeps directly until the next potential checkpoint
moment instead of waking up every second.
When in checkpoint we decide to not store a table in the checkpoint record
(because it has logged no writes for example), we can also skip flushing
this table.
storage/maria/ma_commit.c:
comment is out-of-date
storage/maria/ma_key_recover.c:
comment fix
storage/maria/ma_loghandler.c:
comment is out-of-date
storage/maria/ma_open.c:
comment is out-of-date
storage/maria/ma_pagecache.c:
comment for bug to fix. And we don't take checkpoints at end of REDO
phase yet so can trust block->type.
storage/maria/ma_recovery.c:
Comments. Now-unneeded code for incomplete REDO-UNDO groups removed.
When we forget about an old transaction we must really forget
about it with bzero() (fixes the "long_trid!=0 assertion" recovery
bug). When we delete a row with maria_delete() we turn on
STATE_NOT_OPTIMIZED_ROWS so we do the same when we see a CLR_END
for an UNDO_ROW_INSERT or when we execute an UNDO_ROW_INSERT (in both
cases a row was deleted). Pick up max_long_trid from the checkpoint record.
storage/maria/maria_chk.c:
comment
storage/maria/maria_def.h:
MARIA_FILE_BITMAP gets new members: 'flushable', 'bitmap_cond' and
'pinned_pages'.
storage/maria/trnman.c:
I used to think that recovery only needs to know the maximum TrID
of the lists of active and committed transactions. But no, sometimes
both lists can even be empty and their TrID should not be reused.
So Checkpoint now saves global_trid_generator in the checkpoint record.
storage/maria/trnman_public.h:
macros to read/store a TrID
mysql-test/r/maria-recovery-bitmap.result:
result is ok. Without the code fix, we would get a corruption message
about the bitmap page in CHECK TABLE EXTENDED.
mysql-test/t/maria-recovery-bitmap-master.opt:
usual when we crash mysqld in tests
mysql-test/t/maria-recovery-bitmap.test:
test of recovery problems specific of the bitmap pages.
2007-12-14 16:14:12 +01:00
|
|
|
bitmap->pinned_pages.elements= 0;
|
|
|
|
DBUG_VOID_RETURN;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2007-04-19 12:18:56 +02:00
|
|
|
/*
|
|
|
|
Intialize bitmap in memory to a zero bitmap
|
|
|
|
|
|
|
|
SYNOPSIS
|
|
|
|
_ma_bitmap_delete_all()
|
|
|
|
share Share handler
|
|
|
|
|
|
|
|
NOTES
|
WL#3072 - Maria recovery
Unit test for recovery: runs ma_test1 and ma_test2 (both only with
INSERTs and DELETEs; UPDATEs disabled as not handled by recovery)
then moves the tables elswhere; recreates tables from the log, and
compares and fails if there is a difference. Passes now.
Most of maria_read_log.c moved to ma_recovery.c, as it will be re-used
for recovery-from-ha_maria.
Bugfixes of applying of REDO_INSERT, REDO_PURGE_ROW.
Applying of REDO_PURGE_BLOCKS, REDO_DELETE_ALL, REDO_DROP_TABLE,
UNDO_ROW_INSERT (in REDO phase only, i.e. just doing records++),
UNDO_ROW_DELETE, UNDO_ROW_PURGE.
Code cleanups.
Monty: please look for "QQ". Sanja: please look for "Sanja".
Future tasks: recovery of the bitmap (easy), recovery of the state
(make it idempotent), more REDOs (Monty to work on
REDO_UPDATE?), UNDO phase...
Pushing this cset as it looks safe, contains test and bugfixes which
will help Monty implement applying of REDO_UPDATE.
sql/handler.cc:
typo
storage/maria/Makefile.am:
Adding ma_test_recovery (which ma_test_all invokes, and which can
also be run alone). Most of maria_read_log.c moved to ma_recovery.c
storage/maria/ha_maria.cc:
comments
storage/maria/ma_bitmap.c:
fixing comments. 2 -> sizeof(maria_bitmap_marker).
Bitmap-related part of _ma_initialize_datafile() moves in bitmap module.
Now putting the "bm" signature when creating the first bitmap page
(it used to happen only at next open, but that
caused an annoying difference when testing Recovery if the original
run didn't open the table, and it looks more
logical like this: it goes to disk only with its signature correct);
see the "QQ" comment towards the _ma_initialize_data_file() call
in ma_create.c for more).
When reading a bitmap page, verify its signature (happens when normally
using the table or when CHECKing it; not when REPAIRing it).
storage/maria/ma_blockrec.c:
* no need to sync the data file if table is not transactional
* Comments, code cleanup (log-related data moved to log-related code
block, int5store->page_store).
* Store the table's short id into LOGREC_UNDO_ROW_PURGE, like we
do for other records (though this record will soon be replaced
with a CLR).
* If "page" is 1 it means the page which extends from byte
page*block_size+1 to (page+1)*block_size (byte number 1 being
the first byte of the file). The last byte of the file is
data_file_length (same convention).
A new page needs to be created if the last byte of the page is
beyond the last byte of the file, i.e.
(page+1)*block_size+1 > data_file_length, so we correct the test
(bug found when testing log applying for ma_test1 -M -T --skip-update).
* update the page's LSN when removing a row from it during
execution of a REDO_PURGE_ROW record (bug found when testing log
applying for ma_test1 -M -T --skip-update).
* applying of REDO_PURGE_BLOCKs (limited to a one-page range for now).
storage/maria/ma_blockrec.h:
new functions. maria_bitmap_marker does not need to be exported.
storage/maria/ma_close.c:
we can always flush the table's state when closing the last instance
of the table. And it is needed for maria_read_log (as it does
not use maria_lock_database()).
storage/maria/ma_control_file.c:
when in Recovery, some assertions should not be used.
storage/maria/ma_control_file.h:
double-inclusion safe
storage/maria/ma_create.c:
during recovery, don't log records. Comments.
Moving the creation of the first bitmap page to ma_bitmap.c
storage/maria/ma_delete_table.c:
during recovery, don't log records. Log the end-zero of the dropped
table's name, so that recovery can use the string in place without
extending it to fit an end zero.
storage/maria/ma_loghandler.c:
* inwrite_rec_hook also needs access to the MARIA_SHARE, like
prewrite_rec_hook. This will be needed to update
share->records_diff (in the upcoming patch "recovery of the state").
* LOG_DESC::record_ends_group changed to an enum.
* LOG_DESC for LOGREC_REDO_PURGE_BLOCKS and LOGREC_UNDO_ROW_PURGE
corrected
* Sanja please see the @todo LOG BUG
* avoiding DBUG_RETURN(func()) as it gives confusing debug traces.
storage/maria/ma_loghandler.h:
- log write hooks called while the log's lock is held (inwrite_rec_hook)
now need the MARIA_SHARE, like prewrite_rec_hook already had
- instead of a bool saying if this record's type ends groups or not,
we refine: it may not end a group, it may end a group, or it may
be a group in itself. Imagine that we had a physical write failure
to a table before we log the UNDO, we still end up in
external_lock(F_UNLCK) and then we log a COMMIT: we don't want
to consider this COMMIT as ending the group of REDOs (don't want
to execute those REDOs during Recovery), that's why we say "COMMIT
is a group in itself, it aborts any previous group". This also
gives one more sanity check in maria_read_log.
storage/maria/ma_recovery.c:
New Recovery code, replacing the old pseudocode.
Most of maria_read_log moved here.
Call-able from ha_maria, but not enabled yet.
Compared to the previous version of maria_read_log, some bugs have
been fixed, debugging output can go to stdout or a disk file (for now
it's useful for me, later it can be changed), execution of
REDO_DROP_TABLE, REDO_DELETE_ALL, REDO_PURGE_BLOCKS has been added. Duplicate code
has been factored into functions. We abort an unfinished group
of records if we see a record which is a group in itself (like COMMIT).
No need for maria_panic() after a bug (which caused tables to not
be closed) was fixed; if there is yet another bug I prefer to see it.
When opening a table for Recovery, set data_file_length
and key_file_length to their real physical value (these are the
easiest state members to restore :). Warn us if the last page
was truncated (but Recovery handles it).
MARIA_SHARE::state::state::records is now partly recovered (not
idempotent, but works if recreating tables from scracth).
When applying a REDO to a page, stamp it with the UNDO's LSN
(current_group_end_lsn), not with the REDO's LSN; it makes
the table more identical to the original table (easier to compare
the two tables in the end).
Big thing missing: some types of REDOs are not handled,
and the UNDO phase does not exist (missing functions to execute UNDOs
to actually rollback). So for now tests are only inserting/deleting
a few 100 rows, closing the table and seeing if the log is applied ok;
it works. UPDATE not handled.
storage/maria/ma_recovery.h:
new functions: ma_recover() for recovery from inside ha_maria;
_ma_apply_log() for maria_read_log (ma_recover() calls _ma_apply_log()).
Btw, we need to not use the word "recover" for REPAIR/maria_chk anymore.
storage/maria/ma_rename.c:
don't write log records during recovery
storage/maria/ma_test2.c:
- fail if maria_info() or other subtests find some wrong information
- new option -g to skip updates.
- init the translog before creating the table, so that log applying
can work.
- in "#if 0" you'll see some fixed bugs (will be removed).
storage/maria/ma_test_all.sh:
cleanup files. Test log applying.
storage/maria/maria_read_log.c:
most of the logic moves to ma_recovery.c to be shared between
maria_read_log and recovery-from-inside-mysqld.
See ma_recovery.c for additional changes made to the moved code.
storage/maria/ma_test_recovery:
unit test for Recovery. Tests insert and delete,
REDO_UPDATE not yet coded.
Script is called from ma_test_all. Can run standalone.
2007-07-26 11:56:21 +02:00
|
|
|
This is called on maria_delete_all_rows (truncate data file).
|
2007-04-19 12:18:56 +02:00
|
|
|
*/
|
|
|
|
|
2007-04-12 11:05:30 +02:00
|
|
|
void _ma_bitmap_delete_all(MARIA_SHARE *share)
|
|
|
|
{
|
|
|
|
MARIA_FILE_BITMAP *bitmap= &share->bitmap;
|
WL#3072 - Maria Recovery
Bulk insert: don't log REDO/UNDO for rows, log one UNDO which will
truncate files; this is an optimization and a bugfix (table was left
half-repaired by crash).
Repair: mark table crashed-on-repair at start, bump skip_redo_lsn at start,
this is easier for recovery (tells it to skip old REDOs or even UNDO
phase) and user (tells it to repair) in case of crash, sync files
in the end.
Recovery skips missing or corrupted table and moves to next record
(in REDO or UNDO phase) to be more robust; warns if happens in UNDO phase.
Bugfix for UNDO_KEY_DELETE_WITH_ROOT (tested in ma_test_recovery)
and maria_enable_indexes().
Create missing bitmaps when needed (there can be more than one to create,
in rare cases), log a record for this.
include/myisamchk.h:
new flag: bulk insert repair mustn't bump create_rename_lsn
mysql-test/lib/mtr_report.pl:
skip normal warning in maria-recovery.test
mysql-test/r/maria-recovery.result:
result: crash before bulk insert is committed, causes proper rollback,
and crash right after OPTIMIZE replaces index file with new index file
leads to table marked corrupted and recovery not failing.
mysql-test/t/maria-recovery.test:
- can't check the table or it would commit the transaction,
but check is made after recovery.
- test of crash before bulk-insert-with-repair is committed
(to see if it is rolled back), and of crash after OPTIMIZE has replaced
index file but not finished all operations (to see if recovery fails -
it used to assert when trying to execute an old REDO on the new
index).
storage/maria/CMakeLists.txt:
new file
storage/maria/Makefile.am:
new file
storage/maria/ha_maria.cc:
- If bulk insert on a transactional table using an index repair:
table is initially empty, so don't log REDO/UNDO for data rows
(optimization), just log an UNDO_BULK_INSERT_WITH_REPAIR
which will, if executed, empty the data and index file. Re-enable
logging in end_bulk_insert().
- write log record for repair operation only after it's fully done,
index sort including (maria_repair*() used to write the log record).
- Adding back file->trn=NULL which was removed by mistake earlier.
storage/maria/ha_maria.h:
new member (see ha_maria.cc)
storage/maria/ma_bitmap.c:
Functions to create missing bitmaps:
- one function which creates missing bitmaps in page cache, except
the missing one with max offset which it does not put into page cache
as it will be modified very soon.
- one function which the one above calls, and creates bitmaps in page
cache
- one function to execute REDO_BITMAP_NEW_PAGE which uses the second
one above.
storage/maria/ma_blockrec.c:
- when logging REDO_DELETE_ALL, not only 'records' and 'checksum'
has to be reset under log's mutex.
- execution of REDO_INSERT_ROW_BLOBS now checks the dirty pages' list
- execution of UNDO_BULK_INSERT_WITH_REPAIR
storage/maria/ma_blockrec.h:
new functions
storage/maria/ma_check.c:
- table-flush-before-repair is moved to a separate function reused
by maria_sort_index(); syncing is added
- maria_repair() is allowed to re-enable logging only if it is the one
which disabled it.
- "_ma_flush_table_files_after_repair" was a bad name, it's not after
repair now, and it should not sync as we do more changes to the files
shortly after (sync is postponed to when writing the log record)
- REDO_REPAIR record should be written only after all repair
operations (in particular after sorting index in ha_mara::repair())
- close to the end of repair by sort, flushing of pages must happen
also in the non-quick case, to prepare for the sync at end.
- in parallel repair, some page flushes are not needed as done
by initialize_variables_for_repair().
storage/maria/ma_create.c:
Update skip_redo_lsn, create_rename_lsn optionally.
storage/maria/ma_delete_all.c:
Need to sync files at end of maria_delete_all_rows(), if transactional.
storage/maria/ma_extra.c:
During repair, we sometimes call _ma_flush_table_files() (via
_ma_flush_table_files_before_swap()) while there is a WRITE_CACHE.
storage/maria/ma_key_recover.c:
- when we see CLR_END for UNDO_BULK_INSERT_WITH_REPAIR, re-enable
indices.
- fixing bug: _ma_apply_undo_key_delete() parsed UNDO_KEY_DELETE_WITH_ROOT
wrongly, leading to recovery failure
storage/maria/ma_key_recover.h:
new prototype
storage/maria/ma_locking.c:
DBUG_VOID_RETURN missing
storage/maria/ma_loghandler.c:
UNDO for bulk insert with repair, and REDO for creating bitmaps.
LOGREC_FIRST_FREE to not have to change the for() every time we
add a new record type.
storage/maria/ma_loghandler.h:
new UNDO and REDO
storage/maria/ma_open.c:
Move share.kfile.file=kfile up a bit, so that _ma_update_state_lsns()
can get its value, this fixes a bug where LSN_REPAIRED_BY_MARIA_CHK
was not corrected on disk by maria_open().
Store skip_redo_lsn in index' header.
maria_enable_indexes() had a bug for BLOCK_RECORD, where an empty
file has one page, not 0 bytes.
storage/maria/ma_recovery.c:
- Skip a corrupted, missing, or repaired-with-maria_chk, table in
recovery: don't fail, just go to next REDO or UNDO; but if an UNDO
is skipped in UNDO phase we issue warnings.
- Skip REDO|UNDO in REDO phase if <skip_redo_lsn.
- If UNDO phase fails, delete transactions to not make trnman
assert.
- Update skip_redo_lsn when playing REDO_CREATE_TABLE
- Don't record UNDOs for old transactions which we don't know (long_trid==0)
- Bugfix for UNDO_KEY_DELETE_WITH_ROOT (see ma_key_recover.c)
- Execution of UNDO_BULK_INSERT_WITH_REPAIR
- Don't try to find a page number in REDO_DELETE_ALL
- Pieces moved to ma_recovery_util.c
storage/maria/ma_rename.c:
name change
storage/maria/ma_static.c:
I modified layout of the index' header (inserted skip_redo_lsn in its middle)
storage/maria/ma_test2.c:
allow breaking the test towards the end, tests execution of
UNDO_KEY_DELETE_WITH_ROOT
storage/maria/ma_test_recovery.expected:
6 as testflag instead of 4
storage/maria/ma_test_recovery:
Increase the amount of rollback work to do when testing recovery
with ma_test2; this reproduces the UNDO_KEY_DELETE_WITH_ROOT bug.
storage/maria/maria_chk.c:
skip_redo_lsn should be updated too, for consistency.
Write a REDO_REPAIR after all operations (including sort-records)
have been done.
No reason to flush blocks after maria_chk_data_link() and
maria_sort_records(), there is maria_close() in the end.
write_log_record() is a function, to not clutter maria_chk().
storage/maria/maria_def.h:
New member skip_redo_lsn in the state, and comments
storage/maria/maria_pack.c:
skip_redo_lsn should be updated too, for consistency
storage/maria/ma_recovery_util.c:
_ma_redo_not_needed_for_page(), defined in ma_recovery.c, is needed
by ma_blockrec.c; this causes link issues, resolved by putting
_ma_redo_not_needed_for_page() into a new file (so that it is not
in the same file as repair-related objects of ma_recovery.c).
storage/maria/ma_recovery_util.h:
new file
2008-01-17 23:59:32 +01:00
|
|
|
DBUG_ENTER("_ma_bitmap_delete_all");
|
2007-04-12 11:05:30 +02:00
|
|
|
if (bitmap->map) /* Not in create */
|
|
|
|
{
|
WL#3072 - Maria recovery
Unit test for recovery: runs ma_test1 and ma_test2 (both only with
INSERTs and DELETEs; UPDATEs disabled as not handled by recovery)
then moves the tables elswhere; recreates tables from the log, and
compares and fails if there is a difference. Passes now.
Most of maria_read_log.c moved to ma_recovery.c, as it will be re-used
for recovery-from-ha_maria.
Bugfixes of applying of REDO_INSERT, REDO_PURGE_ROW.
Applying of REDO_PURGE_BLOCKS, REDO_DELETE_ALL, REDO_DROP_TABLE,
UNDO_ROW_INSERT (in REDO phase only, i.e. just doing records++),
UNDO_ROW_DELETE, UNDO_ROW_PURGE.
Code cleanups.
Monty: please look for "QQ". Sanja: please look for "Sanja".
Future tasks: recovery of the bitmap (easy), recovery of the state
(make it idempotent), more REDOs (Monty to work on
REDO_UPDATE?), UNDO phase...
Pushing this cset as it looks safe, contains test and bugfixes which
will help Monty implement applying of REDO_UPDATE.
sql/handler.cc:
typo
storage/maria/Makefile.am:
Adding ma_test_recovery (which ma_test_all invokes, and which can
also be run alone). Most of maria_read_log.c moved to ma_recovery.c
storage/maria/ha_maria.cc:
comments
storage/maria/ma_bitmap.c:
fixing comments. 2 -> sizeof(maria_bitmap_marker).
Bitmap-related part of _ma_initialize_datafile() moves in bitmap module.
Now putting the "bm" signature when creating the first bitmap page
(it used to happen only at next open, but that
caused an annoying difference when testing Recovery if the original
run didn't open the table, and it looks more
logical like this: it goes to disk only with its signature correct);
see the "QQ" comment towards the _ma_initialize_data_file() call
in ma_create.c for more).
When reading a bitmap page, verify its signature (happens when normally
using the table or when CHECKing it; not when REPAIRing it).
storage/maria/ma_blockrec.c:
* no need to sync the data file if table is not transactional
* Comments, code cleanup (log-related data moved to log-related code
block, int5store->page_store).
* Store the table's short id into LOGREC_UNDO_ROW_PURGE, like we
do for other records (though this record will soon be replaced
with a CLR).
* If "page" is 1 it means the page which extends from byte
page*block_size+1 to (page+1)*block_size (byte number 1 being
the first byte of the file). The last byte of the file is
data_file_length (same convention).
A new page needs to be created if the last byte of the page is
beyond the last byte of the file, i.e.
(page+1)*block_size+1 > data_file_length, so we correct the test
(bug found when testing log applying for ma_test1 -M -T --skip-update).
* update the page's LSN when removing a row from it during
execution of a REDO_PURGE_ROW record (bug found when testing log
applying for ma_test1 -M -T --skip-update).
* applying of REDO_PURGE_BLOCKs (limited to a one-page range for now).
storage/maria/ma_blockrec.h:
new functions. maria_bitmap_marker does not need to be exported.
storage/maria/ma_close.c:
we can always flush the table's state when closing the last instance
of the table. And it is needed for maria_read_log (as it does
not use maria_lock_database()).
storage/maria/ma_control_file.c:
when in Recovery, some assertions should not be used.
storage/maria/ma_control_file.h:
double-inclusion safe
storage/maria/ma_create.c:
during recovery, don't log records. Comments.
Moving the creation of the first bitmap page to ma_bitmap.c
storage/maria/ma_delete_table.c:
during recovery, don't log records. Log the end-zero of the dropped
table's name, so that recovery can use the string in place without
extending it to fit an end zero.
storage/maria/ma_loghandler.c:
* inwrite_rec_hook also needs access to the MARIA_SHARE, like
prewrite_rec_hook. This will be needed to update
share->records_diff (in the upcoming patch "recovery of the state").
* LOG_DESC::record_ends_group changed to an enum.
* LOG_DESC for LOGREC_REDO_PURGE_BLOCKS and LOGREC_UNDO_ROW_PURGE
corrected
* Sanja please see the @todo LOG BUG
* avoiding DBUG_RETURN(func()) as it gives confusing debug traces.
storage/maria/ma_loghandler.h:
- log write hooks called while the log's lock is held (inwrite_rec_hook)
now need the MARIA_SHARE, like prewrite_rec_hook already had
- instead of a bool saying if this record's type ends groups or not,
we refine: it may not end a group, it may end a group, or it may
be a group in itself. Imagine that we had a physical write failure
to a table before we log the UNDO, we still end up in
external_lock(F_UNLCK) and then we log a COMMIT: we don't want
to consider this COMMIT as ending the group of REDOs (don't want
to execute those REDOs during Recovery), that's why we say "COMMIT
is a group in itself, it aborts any previous group". This also
gives one more sanity check in maria_read_log.
storage/maria/ma_recovery.c:
New Recovery code, replacing the old pseudocode.
Most of maria_read_log moved here.
Call-able from ha_maria, but not enabled yet.
Compared to the previous version of maria_read_log, some bugs have
been fixed, debugging output can go to stdout or a disk file (for now
it's useful for me, later it can be changed), execution of
REDO_DROP_TABLE, REDO_DELETE_ALL, REDO_PURGE_BLOCKS has been added. Duplicate code
has been factored into functions. We abort an unfinished group
of records if we see a record which is a group in itself (like COMMIT).
No need for maria_panic() after a bug (which caused tables to not
be closed) was fixed; if there is yet another bug I prefer to see it.
When opening a table for Recovery, set data_file_length
and key_file_length to their real physical value (these are the
easiest state members to restore :). Warn us if the last page
was truncated (but Recovery handles it).
MARIA_SHARE::state::state::records is now partly recovered (not
idempotent, but works if recreating tables from scracth).
When applying a REDO to a page, stamp it with the UNDO's LSN
(current_group_end_lsn), not with the REDO's LSN; it makes
the table more identical to the original table (easier to compare
the two tables in the end).
Big thing missing: some types of REDOs are not handled,
and the UNDO phase does not exist (missing functions to execute UNDOs
to actually rollback). So for now tests are only inserting/deleting
a few 100 rows, closing the table and seeing if the log is applied ok;
it works. UPDATE not handled.
storage/maria/ma_recovery.h:
new functions: ma_recover() for recovery from inside ha_maria;
_ma_apply_log() for maria_read_log (ma_recover() calls _ma_apply_log()).
Btw, we need to not use the word "recover" for REPAIR/maria_chk anymore.
storage/maria/ma_rename.c:
don't write log records during recovery
storage/maria/ma_test2.c:
- fail if maria_info() or other subtests find some wrong information
- new option -g to skip updates.
- init the translog before creating the table, so that log applying
can work.
- in "#if 0" you'll see some fixed bugs (will be removed).
storage/maria/ma_test_all.sh:
cleanup files. Test log applying.
storage/maria/maria_read_log.c:
most of the logic moves to ma_recovery.c to be shared between
maria_read_log and recovery-from-inside-mysqld.
See ma_recovery.c for additional changes made to the moved code.
storage/maria/ma_test_recovery:
unit test for Recovery. Tests insert and delete,
REDO_UPDATE not yet coded.
Script is called from ma_test_all. Can run standalone.
2007-07-26 11:56:21 +02:00
|
|
|
bzero(bitmap->map, bitmap->block_size);
|
2007-07-03 23:50:17 +02:00
|
|
|
bitmap->changed= 1;
|
2007-04-12 11:05:30 +02:00
|
|
|
bitmap->page= 0;
|
2011-07-24 10:25:28 +02:00
|
|
|
bitmap->used_size= bitmap->total_size= bitmap->max_total_size;
|
2007-04-12 11:05:30 +02:00
|
|
|
}
|
WL#3072 - Maria Recovery
Bulk insert: don't log REDO/UNDO for rows, log one UNDO which will
truncate files; this is an optimization and a bugfix (table was left
half-repaired by crash).
Repair: mark table crashed-on-repair at start, bump skip_redo_lsn at start,
this is easier for recovery (tells it to skip old REDOs or even UNDO
phase) and user (tells it to repair) in case of crash, sync files
in the end.
Recovery skips missing or corrupted table and moves to next record
(in REDO or UNDO phase) to be more robust; warns if happens in UNDO phase.
Bugfix for UNDO_KEY_DELETE_WITH_ROOT (tested in ma_test_recovery)
and maria_enable_indexes().
Create missing bitmaps when needed (there can be more than one to create,
in rare cases), log a record for this.
include/myisamchk.h:
new flag: bulk insert repair mustn't bump create_rename_lsn
mysql-test/lib/mtr_report.pl:
skip normal warning in maria-recovery.test
mysql-test/r/maria-recovery.result:
result: crash before bulk insert is committed, causes proper rollback,
and crash right after OPTIMIZE replaces index file with new index file
leads to table marked corrupted and recovery not failing.
mysql-test/t/maria-recovery.test:
- can't check the table or it would commit the transaction,
but check is made after recovery.
- test of crash before bulk-insert-with-repair is committed
(to see if it is rolled back), and of crash after OPTIMIZE has replaced
index file but not finished all operations (to see if recovery fails -
it used to assert when trying to execute an old REDO on the new
index).
storage/maria/CMakeLists.txt:
new file
storage/maria/Makefile.am:
new file
storage/maria/ha_maria.cc:
- If bulk insert on a transactional table using an index repair:
table is initially empty, so don't log REDO/UNDO for data rows
(optimization), just log an UNDO_BULK_INSERT_WITH_REPAIR
which will, if executed, empty the data and index file. Re-enable
logging in end_bulk_insert().
- write log record for repair operation only after it's fully done,
index sort including (maria_repair*() used to write the log record).
- Adding back file->trn=NULL which was removed by mistake earlier.
storage/maria/ha_maria.h:
new member (see ha_maria.cc)
storage/maria/ma_bitmap.c:
Functions to create missing bitmaps:
- one function which creates missing bitmaps in page cache, except
the missing one with max offset which it does not put into page cache
as it will be modified very soon.
- one function which the one above calls, and creates bitmaps in page
cache
- one function to execute REDO_BITMAP_NEW_PAGE which uses the second
one above.
storage/maria/ma_blockrec.c:
- when logging REDO_DELETE_ALL, not only 'records' and 'checksum'
has to be reset under log's mutex.
- execution of REDO_INSERT_ROW_BLOBS now checks the dirty pages' list
- execution of UNDO_BULK_INSERT_WITH_REPAIR
storage/maria/ma_blockrec.h:
new functions
storage/maria/ma_check.c:
- table-flush-before-repair is moved to a separate function reused
by maria_sort_index(); syncing is added
- maria_repair() is allowed to re-enable logging only if it is the one
which disabled it.
- "_ma_flush_table_files_after_repair" was a bad name, it's not after
repair now, and it should not sync as we do more changes to the files
shortly after (sync is postponed to when writing the log record)
- REDO_REPAIR record should be written only after all repair
operations (in particular after sorting index in ha_mara::repair())
- close to the end of repair by sort, flushing of pages must happen
also in the non-quick case, to prepare for the sync at end.
- in parallel repair, some page flushes are not needed as done
by initialize_variables_for_repair().
storage/maria/ma_create.c:
Update skip_redo_lsn, create_rename_lsn optionally.
storage/maria/ma_delete_all.c:
Need to sync files at end of maria_delete_all_rows(), if transactional.
storage/maria/ma_extra.c:
During repair, we sometimes call _ma_flush_table_files() (via
_ma_flush_table_files_before_swap()) while there is a WRITE_CACHE.
storage/maria/ma_key_recover.c:
- when we see CLR_END for UNDO_BULK_INSERT_WITH_REPAIR, re-enable
indices.
- fixing bug: _ma_apply_undo_key_delete() parsed UNDO_KEY_DELETE_WITH_ROOT
wrongly, leading to recovery failure
storage/maria/ma_key_recover.h:
new prototype
storage/maria/ma_locking.c:
DBUG_VOID_RETURN missing
storage/maria/ma_loghandler.c:
UNDO for bulk insert with repair, and REDO for creating bitmaps.
LOGREC_FIRST_FREE to not have to change the for() every time we
add a new record type.
storage/maria/ma_loghandler.h:
new UNDO and REDO
storage/maria/ma_open.c:
Move share.kfile.file=kfile up a bit, so that _ma_update_state_lsns()
can get its value, this fixes a bug where LSN_REPAIRED_BY_MARIA_CHK
was not corrected on disk by maria_open().
Store skip_redo_lsn in index' header.
maria_enable_indexes() had a bug for BLOCK_RECORD, where an empty
file has one page, not 0 bytes.
storage/maria/ma_recovery.c:
- Skip a corrupted, missing, or repaired-with-maria_chk, table in
recovery: don't fail, just go to next REDO or UNDO; but if an UNDO
is skipped in UNDO phase we issue warnings.
- Skip REDO|UNDO in REDO phase if <skip_redo_lsn.
- If UNDO phase fails, delete transactions to not make trnman
assert.
- Update skip_redo_lsn when playing REDO_CREATE_TABLE
- Don't record UNDOs for old transactions which we don't know (long_trid==0)
- Bugfix for UNDO_KEY_DELETE_WITH_ROOT (see ma_key_recover.c)
- Execution of UNDO_BULK_INSERT_WITH_REPAIR
- Don't try to find a page number in REDO_DELETE_ALL
- Pieces moved to ma_recovery_util.c
storage/maria/ma_rename.c:
name change
storage/maria/ma_static.c:
I modified layout of the index' header (inserted skip_redo_lsn in its middle)
storage/maria/ma_test2.c:
allow breaking the test towards the end, tests execution of
UNDO_KEY_DELETE_WITH_ROOT
storage/maria/ma_test_recovery.expected:
6 as testflag instead of 4
storage/maria/ma_test_recovery:
Increase the amount of rollback work to do when testing recovery
with ma_test2; this reproduces the UNDO_KEY_DELETE_WITH_ROOT bug.
storage/maria/maria_chk.c:
skip_redo_lsn should be updated too, for consistency.
Write a REDO_REPAIR after all operations (including sort-records)
have been done.
No reason to flush blocks after maria_chk_data_link() and
maria_sort_records(), there is maria_close() in the end.
write_log_record() is a function, to not clutter maria_chk().
storage/maria/maria_def.h:
New member skip_redo_lsn in the state, and comments
storage/maria/maria_pack.c:
skip_redo_lsn should be updated too, for consistency
storage/maria/ma_recovery_util.c:
_ma_redo_not_needed_for_page(), defined in ma_recovery.c, is needed
by ma_blockrec.c; this causes link issues, resolved by putting
_ma_redo_not_needed_for_page() into a new file (so that it is not
in the same file as repair-related objects of ma_recovery.c).
storage/maria/ma_recovery_util.h:
new file
2008-01-17 23:59:32 +01:00
|
|
|
DBUG_VOID_RETURN;
|
2007-04-12 11:05:30 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2007-11-28 20:38:30 +01:00
|
|
|
/**
|
|
|
|
@brief Reset bitmap caches
|
|
|
|
|
|
|
|
@fn _ma_bitmap_reset_cache()
|
|
|
|
@param share Maria share
|
|
|
|
|
|
|
|
@notes
|
|
|
|
This is called after we have swapped file descriptors and we want
|
2011-07-04 03:32:53 +02:00
|
|
|
bitmap to forget all cached information.
|
|
|
|
It's also called directly after we have opened a file.
|
2007-11-28 20:38:30 +01:00
|
|
|
*/
|
|
|
|
|
|
|
|
void _ma_bitmap_reset_cache(MARIA_SHARE *share)
|
|
|
|
{
|
|
|
|
MARIA_FILE_BITMAP *bitmap= &share->bitmap;
|
|
|
|
|
|
|
|
if (bitmap->map) /* If using bitmap */
|
|
|
|
{
|
|
|
|
/* Forget changes in current bitmap page */
|
|
|
|
bitmap->changed= 0;
|
|
|
|
|
|
|
|
/*
|
|
|
|
We can't read a page yet, as in some case we don't have an active
|
|
|
|
page cache yet.
|
|
|
|
Pretend we have a dummy, full and not changed bitmap page in memory.
|
2011-07-04 03:32:53 +02:00
|
|
|
|
|
|
|
We set bitmap->page to a value so that if we use it in
|
|
|
|
move_to_next_bitmap() it will point to page 0.
|
|
|
|
(This can only happen if writing to a bitmap page fails)
|
2007-11-28 20:38:30 +01:00
|
|
|
*/
|
2011-07-04 03:32:53 +02:00
|
|
|
bitmap->page= ((pgcache_page_no_t) 0) - bitmap->pages_covered;
|
2011-07-24 10:25:28 +02:00
|
|
|
bitmap->used_size= bitmap->total_size= bitmap->max_total_size;
|
2007-11-28 20:38:30 +01:00
|
|
|
bfill(bitmap->map, share->block_size, 255);
|
|
|
|
#ifndef DBUG_OFF
|
|
|
|
memcpy(bitmap->map + bitmap->block_size, bitmap->map, bitmap->block_size);
|
|
|
|
#endif
|
2011-07-04 03:32:53 +02:00
|
|
|
|
|
|
|
/* Start scanning for free space from start of file */
|
|
|
|
share->state.first_bitmap_with_space = 0;
|
2007-11-28 20:38:30 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2007-01-18 20:38:14 +01:00
|
|
|
/*
|
|
|
|
Return bitmap pattern for the smallest head block that can hold 'size'
|
|
|
|
|
|
|
|
SYNOPSIS
|
|
|
|
size_to_head_pattern()
|
|
|
|
bitmap Bitmap
|
|
|
|
size Requested size
|
|
|
|
|
|
|
|
RETURN
|
|
|
|
0-3 For a description of the bitmap sizes, see the header
|
|
|
|
*/
|
|
|
|
|
|
|
|
static uint size_to_head_pattern(MARIA_FILE_BITMAP *bitmap, uint size)
|
|
|
|
{
|
|
|
|
if (size <= bitmap->sizes[3])
|
|
|
|
return 3;
|
|
|
|
if (size <= bitmap->sizes[2])
|
|
|
|
return 2;
|
|
|
|
if (size <= bitmap->sizes[1])
|
|
|
|
return 1;
|
|
|
|
DBUG_ASSERT(size <= bitmap->sizes[0]);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
2007-04-19 12:18:56 +02:00
|
|
|
Return bitmap pattern for head block where there is size bytes free
|
|
|
|
|
|
|
|
SYNOPSIS
|
|
|
|
_ma_free_size_to_head_pattern()
|
|
|
|
bitmap Bitmap
|
|
|
|
size Requested size
|
|
|
|
|
|
|
|
RETURN
|
|
|
|
0-4 (Possible bitmap patterns for head block)
|
2007-01-18 20:38:14 +01:00
|
|
|
*/
|
|
|
|
|
|
|
|
uint _ma_free_size_to_head_pattern(MARIA_FILE_BITMAP *bitmap, uint size)
|
|
|
|
{
|
|
|
|
if (size < bitmap->sizes[3])
|
|
|
|
return 4;
|
|
|
|
if (size < bitmap->sizes[2])
|
|
|
|
return 3;
|
|
|
|
if (size < bitmap->sizes[1])
|
|
|
|
return 2;
|
|
|
|
return (size < bitmap->sizes[0]) ? 1 : 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
Return bitmap pattern for the smallest tail block that can hold 'size'
|
|
|
|
|
|
|
|
SYNOPSIS
|
|
|
|
size_to_tail_pattern()
|
|
|
|
bitmap Bitmap
|
|
|
|
size Requested size
|
|
|
|
|
|
|
|
RETURN
|
|
|
|
0, 5 or 6 For a description of the bitmap sizes, see the header
|
|
|
|
*/
|
|
|
|
|
|
|
|
static uint size_to_tail_pattern(MARIA_FILE_BITMAP *bitmap, uint size)
|
|
|
|
{
|
|
|
|
if (size <= bitmap->sizes[6])
|
|
|
|
return 6;
|
|
|
|
if (size <= bitmap->sizes[5])
|
|
|
|
return 5;
|
|
|
|
DBUG_ASSERT(size <= bitmap->sizes[0]);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2007-04-19 12:18:56 +02:00
|
|
|
/*
|
|
|
|
Return bitmap pattern for tail block where there is size bytes free
|
|
|
|
|
|
|
|
SYNOPSIS
|
|
|
|
free_size_to_tail_pattern()
|
|
|
|
bitmap Bitmap
|
|
|
|
size Requested size
|
|
|
|
|
|
|
|
RETURN
|
|
|
|
0, 5, 6, 7 For a description of the bitmap sizes, see the header
|
|
|
|
*/
|
|
|
|
|
2007-01-18 20:38:14 +01:00
|
|
|
static uint free_size_to_tail_pattern(MARIA_FILE_BITMAP *bitmap, uint size)
|
|
|
|
{
|
|
|
|
if (size >= bitmap->sizes[0])
|
|
|
|
return 0; /* Revert to empty page */
|
|
|
|
if (size < bitmap->sizes[6])
|
|
|
|
return 7;
|
|
|
|
if (size < bitmap->sizes[5])
|
|
|
|
return 6;
|
|
|
|
return 5;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
Return size guranteed to be available on a page
|
|
|
|
|
|
|
|
SYNOPSIS
|
2007-04-19 12:18:56 +02:00
|
|
|
pattern_to_head_size()
|
2007-01-18 20:38:14 +01:00
|
|
|
bitmap Bitmap
|
|
|
|
pattern Pattern (0-7)
|
|
|
|
|
|
|
|
RETURN
|
|
|
|
0 - block_size
|
|
|
|
*/
|
|
|
|
|
|
|
|
static inline uint pattern_to_size(MARIA_FILE_BITMAP *bitmap, uint pattern)
|
|
|
|
{
|
|
|
|
DBUG_ASSERT(pattern <= 7);
|
|
|
|
return bitmap->sizes[pattern];
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
Print bitmap for debugging
|
2007-04-19 12:18:56 +02:00
|
|
|
|
|
|
|
SYNOPSIS
|
2011-01-24 14:19:40 +01:00
|
|
|
_ma_print_bitmap_changes()
|
2007-04-19 12:18:56 +02:00
|
|
|
bitmap Bitmap to print
|
|
|
|
|
|
|
|
IMPLEMENTATION
|
|
|
|
Prints all changed bits since last call to _ma_print_bitmap().
|
|
|
|
This is done by having a copy of the last bitmap in
|
|
|
|
bitmap->map+bitmap->block_size.
|
2007-01-18 20:38:14 +01:00
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef DBUG_OFF
|
|
|
|
|
2007-10-19 23:24:22 +02:00
|
|
|
static void _ma_print_bitmap_changes(MARIA_FILE_BITMAP *bitmap)
|
2007-01-18 20:38:14 +01:00
|
|
|
{
|
|
|
|
uchar *pos, *end, *org_pos;
|
|
|
|
ulong page;
|
2010-11-03 13:14:02 +01:00
|
|
|
DBUG_ENTER("_ma_print_bitmap_changes");
|
2007-01-18 20:38:14 +01:00
|
|
|
|
2007-04-19 12:18:56 +02:00
|
|
|
end= bitmap->map + bitmap->used_size;
|
2007-01-18 20:38:14 +01:00
|
|
|
DBUG_LOCK_FILE;
|
2010-11-03 13:14:02 +01:00
|
|
|
fprintf(DBUG_FILE,"\nBitmap page changes at page: %lu bitmap: 0x%lx\n",
|
|
|
|
(ulong) bitmap->page, (long) bitmap->map);
|
2007-01-18 20:38:14 +01:00
|
|
|
|
|
|
|
page= (ulong) bitmap->page+1;
|
2007-04-19 12:18:56 +02:00
|
|
|
for (pos= bitmap->map, org_pos= bitmap->map + bitmap->block_size ;
|
|
|
|
pos < end ;
|
2007-01-18 20:38:14 +01:00
|
|
|
pos+= 6, org_pos+= 6)
|
|
|
|
{
|
|
|
|
ulonglong bits= uint6korr(pos); /* 6 bytes = 6*8/3= 16 patterns */
|
|
|
|
ulonglong org_bits= uint6korr(org_pos);
|
|
|
|
uint i;
|
2007-04-19 12:18:56 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
Test if there is any changes in the next 16 bitmaps (to not have to
|
|
|
|
loop through all bits if we know they are the same)
|
|
|
|
*/
|
2007-01-18 20:38:14 +01:00
|
|
|
if (bits != org_bits)
|
|
|
|
{
|
|
|
|
for (i= 0; i < 16 ; i++, bits>>= 3, org_bits>>= 3)
|
|
|
|
{
|
|
|
|
if ((bits & 7) != (org_bits & 7))
|
|
|
|
fprintf(DBUG_FILE, "Page: %8lu %s -> %s\n", page+i,
|
|
|
|
bits_to_txt[org_bits & 7], bits_to_txt[bits & 7]);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
page+= 16;
|
|
|
|
}
|
|
|
|
fputc('\n', DBUG_FILE);
|
|
|
|
DBUG_UNLOCK_FILE;
|
2007-04-19 12:18:56 +02:00
|
|
|
memcpy(bitmap->map + bitmap->block_size, bitmap->map, bitmap->block_size);
|
2010-11-03 13:14:02 +01:00
|
|
|
DBUG_VOID_RETURN;
|
2007-01-18 20:38:14 +01:00
|
|
|
}
|
|
|
|
|
2007-10-19 23:24:22 +02:00
|
|
|
|
|
|
|
/* Print content of bitmap for debugging */
|
|
|
|
|
|
|
|
void _ma_print_bitmap(MARIA_FILE_BITMAP *bitmap, uchar *data,
|
2008-01-10 20:21:36 +01:00
|
|
|
pgcache_page_no_t page)
|
2007-10-19 23:24:22 +02:00
|
|
|
{
|
|
|
|
uchar *pos, *end;
|
|
|
|
char llbuff[22];
|
|
|
|
|
|
|
|
DBUG_LOCK_FILE;
|
|
|
|
fprintf(DBUG_FILE,"\nDump of bitmap page at %s\n", llstr(page, llbuff));
|
|
|
|
|
|
|
|
page++; /* Skip bitmap page */
|
2011-08-12 14:40:56 +02:00
|
|
|
for (pos= data, end= pos + bitmap->max_total_size;
|
2007-10-19 23:24:22 +02:00
|
|
|
pos < end ;
|
|
|
|
pos+= 6)
|
|
|
|
{
|
|
|
|
ulonglong bits= uint6korr(pos); /* 6 bytes = 6*8/3= 16 patterns */
|
|
|
|
|
|
|
|
/*
|
|
|
|
Test if there is any changes in the next 16 bitmaps (to not have to
|
|
|
|
loop through all bits if we know they are the same)
|
|
|
|
*/
|
|
|
|
if (bits)
|
|
|
|
{
|
|
|
|
uint i;
|
|
|
|
for (i= 0; i < 16 ; i++, bits>>= 3)
|
|
|
|
{
|
|
|
|
if (bits & 7)
|
|
|
|
fprintf(DBUG_FILE, "Page: %8s %s\n", llstr(page+i, llbuff),
|
|
|
|
bits_to_txt[bits & 7]);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
page+= 16;
|
|
|
|
}
|
|
|
|
fputc('\n', DBUG_FILE);
|
|
|
|
DBUG_UNLOCK_FILE;
|
|
|
|
}
|
|
|
|
|
2007-01-18 20:38:14 +01:00
|
|
|
#endif /* DBUG_OFF */
|
|
|
|
|
|
|
|
|
2011-01-24 14:19:40 +01:00
|
|
|
/*
|
|
|
|
Return content of bitmap as a printable string
|
|
|
|
*/
|
|
|
|
|
|
|
|
void _ma_get_bitmap_description(MARIA_FILE_BITMAP *bitmap,
|
|
|
|
uchar *bitmap_data,
|
|
|
|
pgcache_page_no_t page,
|
|
|
|
char *out)
|
|
|
|
{
|
|
|
|
uchar *pos, *end;
|
|
|
|
uint count=0, dot_printed= 0, len;
|
|
|
|
char buff[80], last[80];
|
|
|
|
|
|
|
|
page++;
|
|
|
|
last[0]=0;
|
|
|
|
for (pos= bitmap_data, end= pos+ bitmap->used_size ; pos < end ; pos+= 6)
|
|
|
|
{
|
|
|
|
ulonglong bits= uint6korr(pos); /* 6 bytes = 6*8/3= 16 patterns */
|
|
|
|
uint i;
|
|
|
|
|
|
|
|
for (i= 0; i < 16 ; i++, bits>>= 3)
|
|
|
|
{
|
|
|
|
if (count > 60)
|
|
|
|
{
|
|
|
|
if (memcmp(buff, last, count))
|
|
|
|
{
|
|
|
|
memcpy(last, buff, count);
|
2011-10-19 21:45:18 +02:00
|
|
|
len= sprintf(out, "%8lu: ", (ulong) page - count);
|
2011-01-24 14:19:40 +01:00
|
|
|
memcpy(out+len, buff, count);
|
|
|
|
out+= len + count + 1;
|
|
|
|
out[-1]= '\n';
|
|
|
|
dot_printed= 0;
|
|
|
|
}
|
|
|
|
else if (!(dot_printed++))
|
|
|
|
{
|
|
|
|
out= strmov(out, "...\n");
|
|
|
|
}
|
|
|
|
count= 0;
|
|
|
|
}
|
|
|
|
buff[count++]= '0' + (uint) (bits & 7);
|
|
|
|
page++;
|
|
|
|
}
|
|
|
|
}
|
2011-10-19 21:45:18 +02:00
|
|
|
len= sprintf(out, "%8lu: ", (ulong) page - count);
|
2011-01-24 14:19:40 +01:00
|
|
|
memcpy(out+len, buff, count);
|
|
|
|
out[len + count]= '\n';
|
|
|
|
out[len + count + 1]= 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2011-07-24 10:25:28 +02:00
|
|
|
/*
|
|
|
|
Adjust bitmap->total_size to not go over max_data_file_size
|
|
|
|
*/
|
|
|
|
|
|
|
|
static void adjust_total_size(MARIA_HA *info, pgcache_page_no_t page)
|
|
|
|
{
|
|
|
|
MARIA_FILE_BITMAP *bitmap= &info->s->bitmap;
|
|
|
|
|
|
|
|
if (page < bitmap->last_bitmap_page)
|
|
|
|
bitmap->total_size= bitmap->max_total_size; /* Use all bits in bitmap */
|
|
|
|
else
|
|
|
|
bitmap->total_size= bitmap->last_total_size;
|
|
|
|
}
|
|
|
|
|
2007-01-18 20:38:14 +01:00
|
|
|
/***************************************************************************
|
|
|
|
Reading & writing bitmap pages
|
|
|
|
***************************************************************************/
|
|
|
|
|
|
|
|
/*
|
|
|
|
Read a given bitmap page
|
|
|
|
|
|
|
|
SYNOPSIS
|
WL#3072 - Maria Recovery
Bulk insert: don't log REDO/UNDO for rows, log one UNDO which will
truncate files; this is an optimization and a bugfix (table was left
half-repaired by crash).
Repair: mark table crashed-on-repair at start, bump skip_redo_lsn at start,
this is easier for recovery (tells it to skip old REDOs or even UNDO
phase) and user (tells it to repair) in case of crash, sync files
in the end.
Recovery skips missing or corrupted table and moves to next record
(in REDO or UNDO phase) to be more robust; warns if happens in UNDO phase.
Bugfix for UNDO_KEY_DELETE_WITH_ROOT (tested in ma_test_recovery)
and maria_enable_indexes().
Create missing bitmaps when needed (there can be more than one to create,
in rare cases), log a record for this.
include/myisamchk.h:
new flag: bulk insert repair mustn't bump create_rename_lsn
mysql-test/lib/mtr_report.pl:
skip normal warning in maria-recovery.test
mysql-test/r/maria-recovery.result:
result: crash before bulk insert is committed, causes proper rollback,
and crash right after OPTIMIZE replaces index file with new index file
leads to table marked corrupted and recovery not failing.
mysql-test/t/maria-recovery.test:
- can't check the table or it would commit the transaction,
but check is made after recovery.
- test of crash before bulk-insert-with-repair is committed
(to see if it is rolled back), and of crash after OPTIMIZE has replaced
index file but not finished all operations (to see if recovery fails -
it used to assert when trying to execute an old REDO on the new
index).
storage/maria/CMakeLists.txt:
new file
storage/maria/Makefile.am:
new file
storage/maria/ha_maria.cc:
- If bulk insert on a transactional table using an index repair:
table is initially empty, so don't log REDO/UNDO for data rows
(optimization), just log an UNDO_BULK_INSERT_WITH_REPAIR
which will, if executed, empty the data and index file. Re-enable
logging in end_bulk_insert().
- write log record for repair operation only after it's fully done,
index sort including (maria_repair*() used to write the log record).
- Adding back file->trn=NULL which was removed by mistake earlier.
storage/maria/ha_maria.h:
new member (see ha_maria.cc)
storage/maria/ma_bitmap.c:
Functions to create missing bitmaps:
- one function which creates missing bitmaps in page cache, except
the missing one with max offset which it does not put into page cache
as it will be modified very soon.
- one function which the one above calls, and creates bitmaps in page
cache
- one function to execute REDO_BITMAP_NEW_PAGE which uses the second
one above.
storage/maria/ma_blockrec.c:
- when logging REDO_DELETE_ALL, not only 'records' and 'checksum'
has to be reset under log's mutex.
- execution of REDO_INSERT_ROW_BLOBS now checks the dirty pages' list
- execution of UNDO_BULK_INSERT_WITH_REPAIR
storage/maria/ma_blockrec.h:
new functions
storage/maria/ma_check.c:
- table-flush-before-repair is moved to a separate function reused
by maria_sort_index(); syncing is added
- maria_repair() is allowed to re-enable logging only if it is the one
which disabled it.
- "_ma_flush_table_files_after_repair" was a bad name, it's not after
repair now, and it should not sync as we do more changes to the files
shortly after (sync is postponed to when writing the log record)
- REDO_REPAIR record should be written only after all repair
operations (in particular after sorting index in ha_mara::repair())
- close to the end of repair by sort, flushing of pages must happen
also in the non-quick case, to prepare for the sync at end.
- in parallel repair, some page flushes are not needed as done
by initialize_variables_for_repair().
storage/maria/ma_create.c:
Update skip_redo_lsn, create_rename_lsn optionally.
storage/maria/ma_delete_all.c:
Need to sync files at end of maria_delete_all_rows(), if transactional.
storage/maria/ma_extra.c:
During repair, we sometimes call _ma_flush_table_files() (via
_ma_flush_table_files_before_swap()) while there is a WRITE_CACHE.
storage/maria/ma_key_recover.c:
- when we see CLR_END for UNDO_BULK_INSERT_WITH_REPAIR, re-enable
indices.
- fixing bug: _ma_apply_undo_key_delete() parsed UNDO_KEY_DELETE_WITH_ROOT
wrongly, leading to recovery failure
storage/maria/ma_key_recover.h:
new prototype
storage/maria/ma_locking.c:
DBUG_VOID_RETURN missing
storage/maria/ma_loghandler.c:
UNDO for bulk insert with repair, and REDO for creating bitmaps.
LOGREC_FIRST_FREE to not have to change the for() every time we
add a new record type.
storage/maria/ma_loghandler.h:
new UNDO and REDO
storage/maria/ma_open.c:
Move share.kfile.file=kfile up a bit, so that _ma_update_state_lsns()
can get its value, this fixes a bug where LSN_REPAIRED_BY_MARIA_CHK
was not corrected on disk by maria_open().
Store skip_redo_lsn in index' header.
maria_enable_indexes() had a bug for BLOCK_RECORD, where an empty
file has one page, not 0 bytes.
storage/maria/ma_recovery.c:
- Skip a corrupted, missing, or repaired-with-maria_chk, table in
recovery: don't fail, just go to next REDO or UNDO; but if an UNDO
is skipped in UNDO phase we issue warnings.
- Skip REDO|UNDO in REDO phase if <skip_redo_lsn.
- If UNDO phase fails, delete transactions to not make trnman
assert.
- Update skip_redo_lsn when playing REDO_CREATE_TABLE
- Don't record UNDOs for old transactions which we don't know (long_trid==0)
- Bugfix for UNDO_KEY_DELETE_WITH_ROOT (see ma_key_recover.c)
- Execution of UNDO_BULK_INSERT_WITH_REPAIR
- Don't try to find a page number in REDO_DELETE_ALL
- Pieces moved to ma_recovery_util.c
storage/maria/ma_rename.c:
name change
storage/maria/ma_static.c:
I modified layout of the index' header (inserted skip_redo_lsn in its middle)
storage/maria/ma_test2.c:
allow breaking the test towards the end, tests execution of
UNDO_KEY_DELETE_WITH_ROOT
storage/maria/ma_test_recovery.expected:
6 as testflag instead of 4
storage/maria/ma_test_recovery:
Increase the amount of rollback work to do when testing recovery
with ma_test2; this reproduces the UNDO_KEY_DELETE_WITH_ROOT bug.
storage/maria/maria_chk.c:
skip_redo_lsn should be updated too, for consistency.
Write a REDO_REPAIR after all operations (including sort-records)
have been done.
No reason to flush blocks after maria_chk_data_link() and
maria_sort_records(), there is maria_close() in the end.
write_log_record() is a function, to not clutter maria_chk().
storage/maria/maria_def.h:
New member skip_redo_lsn in the state, and comments
storage/maria/maria_pack.c:
skip_redo_lsn should be updated too, for consistency
storage/maria/ma_recovery_util.c:
_ma_redo_not_needed_for_page(), defined in ma_recovery.c, is needed
by ma_blockrec.c; this causes link issues, resolved by putting
_ma_redo_not_needed_for_page() into a new file (so that it is not
in the same file as repair-related objects of ma_recovery.c).
storage/maria/ma_recovery_util.h:
new file
2008-01-17 23:59:32 +01:00
|
|
|
_ma_read_bitmap_page()
|
2007-01-18 20:38:14 +01:00
|
|
|
info Maria handler
|
|
|
|
bitmap Bitmap handler
|
|
|
|
page Page to read
|
|
|
|
|
|
|
|
TODO
|
|
|
|
Update 'bitmap->used_size' to real size of used bitmap
|
|
|
|
|
WL#3072 - Maria recovery
Unit test for recovery: runs ma_test1 and ma_test2 (both only with
INSERTs and DELETEs; UPDATEs disabled as not handled by recovery)
then moves the tables elswhere; recreates tables from the log, and
compares and fails if there is a difference. Passes now.
Most of maria_read_log.c moved to ma_recovery.c, as it will be re-used
for recovery-from-ha_maria.
Bugfixes of applying of REDO_INSERT, REDO_PURGE_ROW.
Applying of REDO_PURGE_BLOCKS, REDO_DELETE_ALL, REDO_DROP_TABLE,
UNDO_ROW_INSERT (in REDO phase only, i.e. just doing records++),
UNDO_ROW_DELETE, UNDO_ROW_PURGE.
Code cleanups.
Monty: please look for "QQ". Sanja: please look for "Sanja".
Future tasks: recovery of the bitmap (easy), recovery of the state
(make it idempotent), more REDOs (Monty to work on
REDO_UPDATE?), UNDO phase...
Pushing this cset as it looks safe, contains test and bugfixes which
will help Monty implement applying of REDO_UPDATE.
sql/handler.cc:
typo
storage/maria/Makefile.am:
Adding ma_test_recovery (which ma_test_all invokes, and which can
also be run alone). Most of maria_read_log.c moved to ma_recovery.c
storage/maria/ha_maria.cc:
comments
storage/maria/ma_bitmap.c:
fixing comments. 2 -> sizeof(maria_bitmap_marker).
Bitmap-related part of _ma_initialize_datafile() moves in bitmap module.
Now putting the "bm" signature when creating the first bitmap page
(it used to happen only at next open, but that
caused an annoying difference when testing Recovery if the original
run didn't open the table, and it looks more
logical like this: it goes to disk only with its signature correct);
see the "QQ" comment towards the _ma_initialize_data_file() call
in ma_create.c for more).
When reading a bitmap page, verify its signature (happens when normally
using the table or when CHECKing it; not when REPAIRing it).
storage/maria/ma_blockrec.c:
* no need to sync the data file if table is not transactional
* Comments, code cleanup (log-related data moved to log-related code
block, int5store->page_store).
* Store the table's short id into LOGREC_UNDO_ROW_PURGE, like we
do for other records (though this record will soon be replaced
with a CLR).
* If "page" is 1 it means the page which extends from byte
page*block_size+1 to (page+1)*block_size (byte number 1 being
the first byte of the file). The last byte of the file is
data_file_length (same convention).
A new page needs to be created if the last byte of the page is
beyond the last byte of the file, i.e.
(page+1)*block_size+1 > data_file_length, so we correct the test
(bug found when testing log applying for ma_test1 -M -T --skip-update).
* update the page's LSN when removing a row from it during
execution of a REDO_PURGE_ROW record (bug found when testing log
applying for ma_test1 -M -T --skip-update).
* applying of REDO_PURGE_BLOCKs (limited to a one-page range for now).
storage/maria/ma_blockrec.h:
new functions. maria_bitmap_marker does not need to be exported.
storage/maria/ma_close.c:
we can always flush the table's state when closing the last instance
of the table. And it is needed for maria_read_log (as it does
not use maria_lock_database()).
storage/maria/ma_control_file.c:
when in Recovery, some assertions should not be used.
storage/maria/ma_control_file.h:
double-inclusion safe
storage/maria/ma_create.c:
during recovery, don't log records. Comments.
Moving the creation of the first bitmap page to ma_bitmap.c
storage/maria/ma_delete_table.c:
during recovery, don't log records. Log the end-zero of the dropped
table's name, so that recovery can use the string in place without
extending it to fit an end zero.
storage/maria/ma_loghandler.c:
* inwrite_rec_hook also needs access to the MARIA_SHARE, like
prewrite_rec_hook. This will be needed to update
share->records_diff (in the upcoming patch "recovery of the state").
* LOG_DESC::record_ends_group changed to an enum.
* LOG_DESC for LOGREC_REDO_PURGE_BLOCKS and LOGREC_UNDO_ROW_PURGE
corrected
* Sanja please see the @todo LOG BUG
* avoiding DBUG_RETURN(func()) as it gives confusing debug traces.
storage/maria/ma_loghandler.h:
- log write hooks called while the log's lock is held (inwrite_rec_hook)
now need the MARIA_SHARE, like prewrite_rec_hook already had
- instead of a bool saying if this record's type ends groups or not,
we refine: it may not end a group, it may end a group, or it may
be a group in itself. Imagine that we had a physical write failure
to a table before we log the UNDO, we still end up in
external_lock(F_UNLCK) and then we log a COMMIT: we don't want
to consider this COMMIT as ending the group of REDOs (don't want
to execute those REDOs during Recovery), that's why we say "COMMIT
is a group in itself, it aborts any previous group". This also
gives one more sanity check in maria_read_log.
storage/maria/ma_recovery.c:
New Recovery code, replacing the old pseudocode.
Most of maria_read_log moved here.
Call-able from ha_maria, but not enabled yet.
Compared to the previous version of maria_read_log, some bugs have
been fixed, debugging output can go to stdout or a disk file (for now
it's useful for me, later it can be changed), execution of
REDO_DROP_TABLE, REDO_DELETE_ALL, REDO_PURGE_BLOCKS has been added. Duplicate code
has been factored into functions. We abort an unfinished group
of records if we see a record which is a group in itself (like COMMIT).
No need for maria_panic() after a bug (which caused tables to not
be closed) was fixed; if there is yet another bug I prefer to see it.
When opening a table for Recovery, set data_file_length
and key_file_length to their real physical value (these are the
easiest state members to restore :). Warn us if the last page
was truncated (but Recovery handles it).
MARIA_SHARE::state::state::records is now partly recovered (not
idempotent, but works if recreating tables from scracth).
When applying a REDO to a page, stamp it with the UNDO's LSN
(current_group_end_lsn), not with the REDO's LSN; it makes
the table more identical to the original table (easier to compare
the two tables in the end).
Big thing missing: some types of REDOs are not handled,
and the UNDO phase does not exist (missing functions to execute UNDOs
to actually rollback). So for now tests are only inserting/deleting
a few 100 rows, closing the table and seeing if the log is applied ok;
it works. UPDATE not handled.
storage/maria/ma_recovery.h:
new functions: ma_recover() for recovery from inside ha_maria;
_ma_apply_log() for maria_read_log (ma_recover() calls _ma_apply_log()).
Btw, we need to not use the word "recover" for REPAIR/maria_chk anymore.
storage/maria/ma_rename.c:
don't write log records during recovery
storage/maria/ma_test2.c:
- fail if maria_info() or other subtests find some wrong information
- new option -g to skip updates.
- init the translog before creating the table, so that log applying
can work.
- in "#if 0" you'll see some fixed bugs (will be removed).
storage/maria/ma_test_all.sh:
cleanup files. Test log applying.
storage/maria/maria_read_log.c:
most of the logic moves to ma_recovery.c to be shared between
maria_read_log and recovery-from-inside-mysqld.
See ma_recovery.c for additional changes made to the moved code.
storage/maria/ma_test_recovery:
unit test for Recovery. Tests insert and delete,
REDO_UPDATE not yet coded.
Script is called from ma_test_all. Can run standalone.
2007-07-26 11:56:21 +02:00
|
|
|
NOTE
|
|
|
|
We don't always have share->bitmap.bitmap_lock here
|
|
|
|
(when called from_ma_check_bitmap_data() for example).
|
|
|
|
|
2007-01-18 20:38:14 +01:00
|
|
|
RETURN
|
|
|
|
0 ok
|
|
|
|
1 error (Error writing old bitmap or reading bitmap page)
|
|
|
|
*/
|
|
|
|
|
WL#3072 - Maria Recovery
Bulk insert: don't log REDO/UNDO for rows, log one UNDO which will
truncate files; this is an optimization and a bugfix (table was left
half-repaired by crash).
Repair: mark table crashed-on-repair at start, bump skip_redo_lsn at start,
this is easier for recovery (tells it to skip old REDOs or even UNDO
phase) and user (tells it to repair) in case of crash, sync files
in the end.
Recovery skips missing or corrupted table and moves to next record
(in REDO or UNDO phase) to be more robust; warns if happens in UNDO phase.
Bugfix for UNDO_KEY_DELETE_WITH_ROOT (tested in ma_test_recovery)
and maria_enable_indexes().
Create missing bitmaps when needed (there can be more than one to create,
in rare cases), log a record for this.
include/myisamchk.h:
new flag: bulk insert repair mustn't bump create_rename_lsn
mysql-test/lib/mtr_report.pl:
skip normal warning in maria-recovery.test
mysql-test/r/maria-recovery.result:
result: crash before bulk insert is committed, causes proper rollback,
and crash right after OPTIMIZE replaces index file with new index file
leads to table marked corrupted and recovery not failing.
mysql-test/t/maria-recovery.test:
- can't check the table or it would commit the transaction,
but check is made after recovery.
- test of crash before bulk-insert-with-repair is committed
(to see if it is rolled back), and of crash after OPTIMIZE has replaced
index file but not finished all operations (to see if recovery fails -
it used to assert when trying to execute an old REDO on the new
index).
storage/maria/CMakeLists.txt:
new file
storage/maria/Makefile.am:
new file
storage/maria/ha_maria.cc:
- If bulk insert on a transactional table using an index repair:
table is initially empty, so don't log REDO/UNDO for data rows
(optimization), just log an UNDO_BULK_INSERT_WITH_REPAIR
which will, if executed, empty the data and index file. Re-enable
logging in end_bulk_insert().
- write log record for repair operation only after it's fully done,
index sort including (maria_repair*() used to write the log record).
- Adding back file->trn=NULL which was removed by mistake earlier.
storage/maria/ha_maria.h:
new member (see ha_maria.cc)
storage/maria/ma_bitmap.c:
Functions to create missing bitmaps:
- one function which creates missing bitmaps in page cache, except
the missing one with max offset which it does not put into page cache
as it will be modified very soon.
- one function which the one above calls, and creates bitmaps in page
cache
- one function to execute REDO_BITMAP_NEW_PAGE which uses the second
one above.
storage/maria/ma_blockrec.c:
- when logging REDO_DELETE_ALL, not only 'records' and 'checksum'
has to be reset under log's mutex.
- execution of REDO_INSERT_ROW_BLOBS now checks the dirty pages' list
- execution of UNDO_BULK_INSERT_WITH_REPAIR
storage/maria/ma_blockrec.h:
new functions
storage/maria/ma_check.c:
- table-flush-before-repair is moved to a separate function reused
by maria_sort_index(); syncing is added
- maria_repair() is allowed to re-enable logging only if it is the one
which disabled it.
- "_ma_flush_table_files_after_repair" was a bad name, it's not after
repair now, and it should not sync as we do more changes to the files
shortly after (sync is postponed to when writing the log record)
- REDO_REPAIR record should be written only after all repair
operations (in particular after sorting index in ha_mara::repair())
- close to the end of repair by sort, flushing of pages must happen
also in the non-quick case, to prepare for the sync at end.
- in parallel repair, some page flushes are not needed as done
by initialize_variables_for_repair().
storage/maria/ma_create.c:
Update skip_redo_lsn, create_rename_lsn optionally.
storage/maria/ma_delete_all.c:
Need to sync files at end of maria_delete_all_rows(), if transactional.
storage/maria/ma_extra.c:
During repair, we sometimes call _ma_flush_table_files() (via
_ma_flush_table_files_before_swap()) while there is a WRITE_CACHE.
storage/maria/ma_key_recover.c:
- when we see CLR_END for UNDO_BULK_INSERT_WITH_REPAIR, re-enable
indices.
- fixing bug: _ma_apply_undo_key_delete() parsed UNDO_KEY_DELETE_WITH_ROOT
wrongly, leading to recovery failure
storage/maria/ma_key_recover.h:
new prototype
storage/maria/ma_locking.c:
DBUG_VOID_RETURN missing
storage/maria/ma_loghandler.c:
UNDO for bulk insert with repair, and REDO for creating bitmaps.
LOGREC_FIRST_FREE to not have to change the for() every time we
add a new record type.
storage/maria/ma_loghandler.h:
new UNDO and REDO
storage/maria/ma_open.c:
Move share.kfile.file=kfile up a bit, so that _ma_update_state_lsns()
can get its value, this fixes a bug where LSN_REPAIRED_BY_MARIA_CHK
was not corrected on disk by maria_open().
Store skip_redo_lsn in index' header.
maria_enable_indexes() had a bug for BLOCK_RECORD, where an empty
file has one page, not 0 bytes.
storage/maria/ma_recovery.c:
- Skip a corrupted, missing, or repaired-with-maria_chk, table in
recovery: don't fail, just go to next REDO or UNDO; but if an UNDO
is skipped in UNDO phase we issue warnings.
- Skip REDO|UNDO in REDO phase if <skip_redo_lsn.
- If UNDO phase fails, delete transactions to not make trnman
assert.
- Update skip_redo_lsn when playing REDO_CREATE_TABLE
- Don't record UNDOs for old transactions which we don't know (long_trid==0)
- Bugfix for UNDO_KEY_DELETE_WITH_ROOT (see ma_key_recover.c)
- Execution of UNDO_BULK_INSERT_WITH_REPAIR
- Don't try to find a page number in REDO_DELETE_ALL
- Pieces moved to ma_recovery_util.c
storage/maria/ma_rename.c:
name change
storage/maria/ma_static.c:
I modified layout of the index' header (inserted skip_redo_lsn in its middle)
storage/maria/ma_test2.c:
allow breaking the test towards the end, tests execution of
UNDO_KEY_DELETE_WITH_ROOT
storage/maria/ma_test_recovery.expected:
6 as testflag instead of 4
storage/maria/ma_test_recovery:
Increase the amount of rollback work to do when testing recovery
with ma_test2; this reproduces the UNDO_KEY_DELETE_WITH_ROOT bug.
storage/maria/maria_chk.c:
skip_redo_lsn should be updated too, for consistency.
Write a REDO_REPAIR after all operations (including sort-records)
have been done.
No reason to flush blocks after maria_chk_data_link() and
maria_sort_records(), there is maria_close() in the end.
write_log_record() is a function, to not clutter maria_chk().
storage/maria/maria_def.h:
New member skip_redo_lsn in the state, and comments
storage/maria/maria_pack.c:
skip_redo_lsn should be updated too, for consistency
storage/maria/ma_recovery_util.c:
_ma_redo_not_needed_for_page(), defined in ma_recovery.c, is needed
by ma_blockrec.c; this causes link issues, resolved by putting
_ma_redo_not_needed_for_page() into a new file (so that it is not
in the same file as repair-related objects of ma_recovery.c).
storage/maria/ma_recovery_util.h:
new file
2008-01-17 23:59:32 +01:00
|
|
|
static my_bool _ma_read_bitmap_page(MARIA_HA *info,
|
2007-04-19 12:18:56 +02:00
|
|
|
MARIA_FILE_BITMAP *bitmap,
|
2008-01-10 20:21:36 +01:00
|
|
|
pgcache_page_no_t page)
|
2007-01-18 20:38:14 +01:00
|
|
|
{
|
WL#3072 - Maria Recovery
Bulk insert: don't log REDO/UNDO for rows, log one UNDO which will
truncate files; this is an optimization and a bugfix (table was left
half-repaired by crash).
Repair: mark table crashed-on-repair at start, bump skip_redo_lsn at start,
this is easier for recovery (tells it to skip old REDOs or even UNDO
phase) and user (tells it to repair) in case of crash, sync files
in the end.
Recovery skips missing or corrupted table and moves to next record
(in REDO or UNDO phase) to be more robust; warns if happens in UNDO phase.
Bugfix for UNDO_KEY_DELETE_WITH_ROOT (tested in ma_test_recovery)
and maria_enable_indexes().
Create missing bitmaps when needed (there can be more than one to create,
in rare cases), log a record for this.
include/myisamchk.h:
new flag: bulk insert repair mustn't bump create_rename_lsn
mysql-test/lib/mtr_report.pl:
skip normal warning in maria-recovery.test
mysql-test/r/maria-recovery.result:
result: crash before bulk insert is committed, causes proper rollback,
and crash right after OPTIMIZE replaces index file with new index file
leads to table marked corrupted and recovery not failing.
mysql-test/t/maria-recovery.test:
- can't check the table or it would commit the transaction,
but check is made after recovery.
- test of crash before bulk-insert-with-repair is committed
(to see if it is rolled back), and of crash after OPTIMIZE has replaced
index file but not finished all operations (to see if recovery fails -
it used to assert when trying to execute an old REDO on the new
index).
storage/maria/CMakeLists.txt:
new file
storage/maria/Makefile.am:
new file
storage/maria/ha_maria.cc:
- If bulk insert on a transactional table using an index repair:
table is initially empty, so don't log REDO/UNDO for data rows
(optimization), just log an UNDO_BULK_INSERT_WITH_REPAIR
which will, if executed, empty the data and index file. Re-enable
logging in end_bulk_insert().
- write log record for repair operation only after it's fully done,
index sort including (maria_repair*() used to write the log record).
- Adding back file->trn=NULL which was removed by mistake earlier.
storage/maria/ha_maria.h:
new member (see ha_maria.cc)
storage/maria/ma_bitmap.c:
Functions to create missing bitmaps:
- one function which creates missing bitmaps in page cache, except
the missing one with max offset which it does not put into page cache
as it will be modified very soon.
- one function which the one above calls, and creates bitmaps in page
cache
- one function to execute REDO_BITMAP_NEW_PAGE which uses the second
one above.
storage/maria/ma_blockrec.c:
- when logging REDO_DELETE_ALL, not only 'records' and 'checksum'
has to be reset under log's mutex.
- execution of REDO_INSERT_ROW_BLOBS now checks the dirty pages' list
- execution of UNDO_BULK_INSERT_WITH_REPAIR
storage/maria/ma_blockrec.h:
new functions
storage/maria/ma_check.c:
- table-flush-before-repair is moved to a separate function reused
by maria_sort_index(); syncing is added
- maria_repair() is allowed to re-enable logging only if it is the one
which disabled it.
- "_ma_flush_table_files_after_repair" was a bad name, it's not after
repair now, and it should not sync as we do more changes to the files
shortly after (sync is postponed to when writing the log record)
- REDO_REPAIR record should be written only after all repair
operations (in particular after sorting index in ha_mara::repair())
- close to the end of repair by sort, flushing of pages must happen
also in the non-quick case, to prepare for the sync at end.
- in parallel repair, some page flushes are not needed as done
by initialize_variables_for_repair().
storage/maria/ma_create.c:
Update skip_redo_lsn, create_rename_lsn optionally.
storage/maria/ma_delete_all.c:
Need to sync files at end of maria_delete_all_rows(), if transactional.
storage/maria/ma_extra.c:
During repair, we sometimes call _ma_flush_table_files() (via
_ma_flush_table_files_before_swap()) while there is a WRITE_CACHE.
storage/maria/ma_key_recover.c:
- when we see CLR_END for UNDO_BULK_INSERT_WITH_REPAIR, re-enable
indices.
- fixing bug: _ma_apply_undo_key_delete() parsed UNDO_KEY_DELETE_WITH_ROOT
wrongly, leading to recovery failure
storage/maria/ma_key_recover.h:
new prototype
storage/maria/ma_locking.c:
DBUG_VOID_RETURN missing
storage/maria/ma_loghandler.c:
UNDO for bulk insert with repair, and REDO for creating bitmaps.
LOGREC_FIRST_FREE to not have to change the for() every time we
add a new record type.
storage/maria/ma_loghandler.h:
new UNDO and REDO
storage/maria/ma_open.c:
Move share.kfile.file=kfile up a bit, so that _ma_update_state_lsns()
can get its value, this fixes a bug where LSN_REPAIRED_BY_MARIA_CHK
was not corrected on disk by maria_open().
Store skip_redo_lsn in index' header.
maria_enable_indexes() had a bug for BLOCK_RECORD, where an empty
file has one page, not 0 bytes.
storage/maria/ma_recovery.c:
- Skip a corrupted, missing, or repaired-with-maria_chk, table in
recovery: don't fail, just go to next REDO or UNDO; but if an UNDO
is skipped in UNDO phase we issue warnings.
- Skip REDO|UNDO in REDO phase if <skip_redo_lsn.
- If UNDO phase fails, delete transactions to not make trnman
assert.
- Update skip_redo_lsn when playing REDO_CREATE_TABLE
- Don't record UNDOs for old transactions which we don't know (long_trid==0)
- Bugfix for UNDO_KEY_DELETE_WITH_ROOT (see ma_key_recover.c)
- Execution of UNDO_BULK_INSERT_WITH_REPAIR
- Don't try to find a page number in REDO_DELETE_ALL
- Pieces moved to ma_recovery_util.c
storage/maria/ma_rename.c:
name change
storage/maria/ma_static.c:
I modified layout of the index' header (inserted skip_redo_lsn in its middle)
storage/maria/ma_test2.c:
allow breaking the test towards the end, tests execution of
UNDO_KEY_DELETE_WITH_ROOT
storage/maria/ma_test_recovery.expected:
6 as testflag instead of 4
storage/maria/ma_test_recovery:
Increase the amount of rollback work to do when testing recovery
with ma_test2; this reproduces the UNDO_KEY_DELETE_WITH_ROOT bug.
storage/maria/maria_chk.c:
skip_redo_lsn should be updated too, for consistency.
Write a REDO_REPAIR after all operations (including sort-records)
have been done.
No reason to flush blocks after maria_chk_data_link() and
maria_sort_records(), there is maria_close() in the end.
write_log_record() is a function, to not clutter maria_chk().
storage/maria/maria_def.h:
New member skip_redo_lsn in the state, and comments
storage/maria/maria_pack.c:
skip_redo_lsn should be updated too, for consistency
storage/maria/ma_recovery_util.c:
_ma_redo_not_needed_for_page(), defined in ma_recovery.c, is needed
by ma_blockrec.c; this causes link issues, resolved by putting
_ma_redo_not_needed_for_page() into a new file (so that it is not
in the same file as repair-related objects of ma_recovery.c).
storage/maria/ma_recovery_util.h:
new file
2008-01-17 23:59:32 +01:00
|
|
|
MARIA_SHARE *share= info->s;
|
2007-01-18 20:38:14 +01:00
|
|
|
my_bool res;
|
|
|
|
DBUG_ENTER("_ma_read_bitmap_page");
|
|
|
|
DBUG_ASSERT(page % bitmap->pages_covered == 0);
|
WL#3072 Maria recovery:
fix for bug: if a crash happened right after writing a REDO like this:
REDO - UNDO - REDO*, then recovery would ignore the last REDO* (ok),
rollback: REDO - UNDO - REDO* - REDO - CLR, and a next recovery would
thus execute REDO* instead of skipping it again. Recovery now logs
LOGREC_INCOMPLETE_GROUP when it meets REDO* for the first time,
to draw a boundary and ensure it is always skipped. Tested by hand.
Note: ma_test_all fails "maria_chk: error: Key 1 - Found too many records"
not due to this patch (failed before).
BitKeeper/triggers/post-commit:
no truncation of the commit mail, or how to review patches?
mysql-test/include/maria_verify_recovery.inc:
let caller choose the statement used to crash (sometimes we
want the crash to happen at special places)
mysql-test/t/maria-recovery.test:
user of maria_verify_recovery.inc now specifies statement which the
script should use for crashing.
storage/maria/ma_bitmap.c:
it's easier to search for all places using functions from the bitmap
module (like in ma_blockrec.c) if those exported functions all start
with "_ma_bitmap": renaming some of them.
Assertion that when we read a bitmap page, overwriting bitmap->map,
we are not losing information (i.e. bitmap->changed is false).
storage/maria/ma_blockrec.c:
update to new names. Adding code (disabled, protected by a #ifdef)
that I use to test certain crash scenarios (more to come).
storage/maria/ma_blockrec.h:
update to new names
storage/maria/ma_checkpoint.c:
update to new names
storage/maria/ma_extra.c:
update to new names
storage/maria/ma_loghandler.c:
new LOGREC_INCOMPLETE_GROUP
storage/maria/ma_loghandler.h:
new LOGREC_INCOMPLETE_GROUP
storage/maria/ma_recovery.c:
When at the end of the REDO phase we have identified some transactions
with incomplete REDO groups (REDOs without an UNDO or CLR_END),
for each of them we log LOGREC_INCOMPLETE_GROUP. This way, the
upcoming UNDO phase can write more records for such transaction,
a future recovery won't pair the incomplete group with the
CLR_END (as there is LOGREC_INCOMPLETE_GROUP to draw a boundary).
2007-12-10 23:26:53 +01:00
|
|
|
DBUG_ASSERT(!bitmap->changed);
|
2007-01-18 20:38:14 +01:00
|
|
|
|
|
|
|
bitmap->page= page;
|
2011-07-24 10:25:28 +02:00
|
|
|
if ((page + 1) * bitmap->block_size > share->state.state.data_file_length)
|
2007-01-18 20:38:14 +01:00
|
|
|
{
|
WL#3072 - Maria Recovery
Bulk insert: don't log REDO/UNDO for rows, log one UNDO which will
truncate files; this is an optimization and a bugfix (table was left
half-repaired by crash).
Repair: mark table crashed-on-repair at start, bump skip_redo_lsn at start,
this is easier for recovery (tells it to skip old REDOs or even UNDO
phase) and user (tells it to repair) in case of crash, sync files
in the end.
Recovery skips missing or corrupted table and moves to next record
(in REDO or UNDO phase) to be more robust; warns if happens in UNDO phase.
Bugfix for UNDO_KEY_DELETE_WITH_ROOT (tested in ma_test_recovery)
and maria_enable_indexes().
Create missing bitmaps when needed (there can be more than one to create,
in rare cases), log a record for this.
include/myisamchk.h:
new flag: bulk insert repair mustn't bump create_rename_lsn
mysql-test/lib/mtr_report.pl:
skip normal warning in maria-recovery.test
mysql-test/r/maria-recovery.result:
result: crash before bulk insert is committed, causes proper rollback,
and crash right after OPTIMIZE replaces index file with new index file
leads to table marked corrupted and recovery not failing.
mysql-test/t/maria-recovery.test:
- can't check the table or it would commit the transaction,
but check is made after recovery.
- test of crash before bulk-insert-with-repair is committed
(to see if it is rolled back), and of crash after OPTIMIZE has replaced
index file but not finished all operations (to see if recovery fails -
it used to assert when trying to execute an old REDO on the new
index).
storage/maria/CMakeLists.txt:
new file
storage/maria/Makefile.am:
new file
storage/maria/ha_maria.cc:
- If bulk insert on a transactional table using an index repair:
table is initially empty, so don't log REDO/UNDO for data rows
(optimization), just log an UNDO_BULK_INSERT_WITH_REPAIR
which will, if executed, empty the data and index file. Re-enable
logging in end_bulk_insert().
- write log record for repair operation only after it's fully done,
index sort including (maria_repair*() used to write the log record).
- Adding back file->trn=NULL which was removed by mistake earlier.
storage/maria/ha_maria.h:
new member (see ha_maria.cc)
storage/maria/ma_bitmap.c:
Functions to create missing bitmaps:
- one function which creates missing bitmaps in page cache, except
the missing one with max offset which it does not put into page cache
as it will be modified very soon.
- one function which the one above calls, and creates bitmaps in page
cache
- one function to execute REDO_BITMAP_NEW_PAGE which uses the second
one above.
storage/maria/ma_blockrec.c:
- when logging REDO_DELETE_ALL, not only 'records' and 'checksum'
has to be reset under log's mutex.
- execution of REDO_INSERT_ROW_BLOBS now checks the dirty pages' list
- execution of UNDO_BULK_INSERT_WITH_REPAIR
storage/maria/ma_blockrec.h:
new functions
storage/maria/ma_check.c:
- table-flush-before-repair is moved to a separate function reused
by maria_sort_index(); syncing is added
- maria_repair() is allowed to re-enable logging only if it is the one
which disabled it.
- "_ma_flush_table_files_after_repair" was a bad name, it's not after
repair now, and it should not sync as we do more changes to the files
shortly after (sync is postponed to when writing the log record)
- REDO_REPAIR record should be written only after all repair
operations (in particular after sorting index in ha_mara::repair())
- close to the end of repair by sort, flushing of pages must happen
also in the non-quick case, to prepare for the sync at end.
- in parallel repair, some page flushes are not needed as done
by initialize_variables_for_repair().
storage/maria/ma_create.c:
Update skip_redo_lsn, create_rename_lsn optionally.
storage/maria/ma_delete_all.c:
Need to sync files at end of maria_delete_all_rows(), if transactional.
storage/maria/ma_extra.c:
During repair, we sometimes call _ma_flush_table_files() (via
_ma_flush_table_files_before_swap()) while there is a WRITE_CACHE.
storage/maria/ma_key_recover.c:
- when we see CLR_END for UNDO_BULK_INSERT_WITH_REPAIR, re-enable
indices.
- fixing bug: _ma_apply_undo_key_delete() parsed UNDO_KEY_DELETE_WITH_ROOT
wrongly, leading to recovery failure
storage/maria/ma_key_recover.h:
new prototype
storage/maria/ma_locking.c:
DBUG_VOID_RETURN missing
storage/maria/ma_loghandler.c:
UNDO for bulk insert with repair, and REDO for creating bitmaps.
LOGREC_FIRST_FREE to not have to change the for() every time we
add a new record type.
storage/maria/ma_loghandler.h:
new UNDO and REDO
storage/maria/ma_open.c:
Move share.kfile.file=kfile up a bit, so that _ma_update_state_lsns()
can get its value, this fixes a bug where LSN_REPAIRED_BY_MARIA_CHK
was not corrected on disk by maria_open().
Store skip_redo_lsn in index' header.
maria_enable_indexes() had a bug for BLOCK_RECORD, where an empty
file has one page, not 0 bytes.
storage/maria/ma_recovery.c:
- Skip a corrupted, missing, or repaired-with-maria_chk, table in
recovery: don't fail, just go to next REDO or UNDO; but if an UNDO
is skipped in UNDO phase we issue warnings.
- Skip REDO|UNDO in REDO phase if <skip_redo_lsn.
- If UNDO phase fails, delete transactions to not make trnman
assert.
- Update skip_redo_lsn when playing REDO_CREATE_TABLE
- Don't record UNDOs for old transactions which we don't know (long_trid==0)
- Bugfix for UNDO_KEY_DELETE_WITH_ROOT (see ma_key_recover.c)
- Execution of UNDO_BULK_INSERT_WITH_REPAIR
- Don't try to find a page number in REDO_DELETE_ALL
- Pieces moved to ma_recovery_util.c
storage/maria/ma_rename.c:
name change
storage/maria/ma_static.c:
I modified layout of the index' header (inserted skip_redo_lsn in its middle)
storage/maria/ma_test2.c:
allow breaking the test towards the end, tests execution of
UNDO_KEY_DELETE_WITH_ROOT
storage/maria/ma_test_recovery.expected:
6 as testflag instead of 4
storage/maria/ma_test_recovery:
Increase the amount of rollback work to do when testing recovery
with ma_test2; this reproduces the UNDO_KEY_DELETE_WITH_ROOT bug.
storage/maria/maria_chk.c:
skip_redo_lsn should be updated too, for consistency.
Write a REDO_REPAIR after all operations (including sort-records)
have been done.
No reason to flush blocks after maria_chk_data_link() and
maria_sort_records(), there is maria_close() in the end.
write_log_record() is a function, to not clutter maria_chk().
storage/maria/maria_def.h:
New member skip_redo_lsn in the state, and comments
storage/maria/maria_pack.c:
skip_redo_lsn should be updated too, for consistency
storage/maria/ma_recovery_util.c:
_ma_redo_not_needed_for_page(), defined in ma_recovery.c, is needed
by ma_blockrec.c; this causes link issues, resolved by putting
_ma_redo_not_needed_for_page() into a new file (so that it is not
in the same file as repair-related objects of ma_recovery.c).
storage/maria/ma_recovery_util.h:
new file
2008-01-17 23:59:32 +01:00
|
|
|
/* Inexistent or half-created page */
|
|
|
|
res= _ma_bitmap_create_missing(info, bitmap, page);
|
2011-07-24 10:25:28 +02:00
|
|
|
if (!res)
|
|
|
|
adjust_total_size(info, page);
|
WL#3072 - Maria Recovery
Bulk insert: don't log REDO/UNDO for rows, log one UNDO which will
truncate files; this is an optimization and a bugfix (table was left
half-repaired by crash).
Repair: mark table crashed-on-repair at start, bump skip_redo_lsn at start,
this is easier for recovery (tells it to skip old REDOs or even UNDO
phase) and user (tells it to repair) in case of crash, sync files
in the end.
Recovery skips missing or corrupted table and moves to next record
(in REDO or UNDO phase) to be more robust; warns if happens in UNDO phase.
Bugfix for UNDO_KEY_DELETE_WITH_ROOT (tested in ma_test_recovery)
and maria_enable_indexes().
Create missing bitmaps when needed (there can be more than one to create,
in rare cases), log a record for this.
include/myisamchk.h:
new flag: bulk insert repair mustn't bump create_rename_lsn
mysql-test/lib/mtr_report.pl:
skip normal warning in maria-recovery.test
mysql-test/r/maria-recovery.result:
result: crash before bulk insert is committed, causes proper rollback,
and crash right after OPTIMIZE replaces index file with new index file
leads to table marked corrupted and recovery not failing.
mysql-test/t/maria-recovery.test:
- can't check the table or it would commit the transaction,
but check is made after recovery.
- test of crash before bulk-insert-with-repair is committed
(to see if it is rolled back), and of crash after OPTIMIZE has replaced
index file but not finished all operations (to see if recovery fails -
it used to assert when trying to execute an old REDO on the new
index).
storage/maria/CMakeLists.txt:
new file
storage/maria/Makefile.am:
new file
storage/maria/ha_maria.cc:
- If bulk insert on a transactional table using an index repair:
table is initially empty, so don't log REDO/UNDO for data rows
(optimization), just log an UNDO_BULK_INSERT_WITH_REPAIR
which will, if executed, empty the data and index file. Re-enable
logging in end_bulk_insert().
- write log record for repair operation only after it's fully done,
index sort including (maria_repair*() used to write the log record).
- Adding back file->trn=NULL which was removed by mistake earlier.
storage/maria/ha_maria.h:
new member (see ha_maria.cc)
storage/maria/ma_bitmap.c:
Functions to create missing bitmaps:
- one function which creates missing bitmaps in page cache, except
the missing one with max offset which it does not put into page cache
as it will be modified very soon.
- one function which the one above calls, and creates bitmaps in page
cache
- one function to execute REDO_BITMAP_NEW_PAGE which uses the second
one above.
storage/maria/ma_blockrec.c:
- when logging REDO_DELETE_ALL, not only 'records' and 'checksum'
has to be reset under log's mutex.
- execution of REDO_INSERT_ROW_BLOBS now checks the dirty pages' list
- execution of UNDO_BULK_INSERT_WITH_REPAIR
storage/maria/ma_blockrec.h:
new functions
storage/maria/ma_check.c:
- table-flush-before-repair is moved to a separate function reused
by maria_sort_index(); syncing is added
- maria_repair() is allowed to re-enable logging only if it is the one
which disabled it.
- "_ma_flush_table_files_after_repair" was a bad name, it's not after
repair now, and it should not sync as we do more changes to the files
shortly after (sync is postponed to when writing the log record)
- REDO_REPAIR record should be written only after all repair
operations (in particular after sorting index in ha_mara::repair())
- close to the end of repair by sort, flushing of pages must happen
also in the non-quick case, to prepare for the sync at end.
- in parallel repair, some page flushes are not needed as done
by initialize_variables_for_repair().
storage/maria/ma_create.c:
Update skip_redo_lsn, create_rename_lsn optionally.
storage/maria/ma_delete_all.c:
Need to sync files at end of maria_delete_all_rows(), if transactional.
storage/maria/ma_extra.c:
During repair, we sometimes call _ma_flush_table_files() (via
_ma_flush_table_files_before_swap()) while there is a WRITE_CACHE.
storage/maria/ma_key_recover.c:
- when we see CLR_END for UNDO_BULK_INSERT_WITH_REPAIR, re-enable
indices.
- fixing bug: _ma_apply_undo_key_delete() parsed UNDO_KEY_DELETE_WITH_ROOT
wrongly, leading to recovery failure
storage/maria/ma_key_recover.h:
new prototype
storage/maria/ma_locking.c:
DBUG_VOID_RETURN missing
storage/maria/ma_loghandler.c:
UNDO for bulk insert with repair, and REDO for creating bitmaps.
LOGREC_FIRST_FREE to not have to change the for() every time we
add a new record type.
storage/maria/ma_loghandler.h:
new UNDO and REDO
storage/maria/ma_open.c:
Move share.kfile.file=kfile up a bit, so that _ma_update_state_lsns()
can get its value, this fixes a bug where LSN_REPAIRED_BY_MARIA_CHK
was not corrected on disk by maria_open().
Store skip_redo_lsn in index' header.
maria_enable_indexes() had a bug for BLOCK_RECORD, where an empty
file has one page, not 0 bytes.
storage/maria/ma_recovery.c:
- Skip a corrupted, missing, or repaired-with-maria_chk, table in
recovery: don't fail, just go to next REDO or UNDO; but if an UNDO
is skipped in UNDO phase we issue warnings.
- Skip REDO|UNDO in REDO phase if <skip_redo_lsn.
- If UNDO phase fails, delete transactions to not make trnman
assert.
- Update skip_redo_lsn when playing REDO_CREATE_TABLE
- Don't record UNDOs for old transactions which we don't know (long_trid==0)
- Bugfix for UNDO_KEY_DELETE_WITH_ROOT (see ma_key_recover.c)
- Execution of UNDO_BULK_INSERT_WITH_REPAIR
- Don't try to find a page number in REDO_DELETE_ALL
- Pieces moved to ma_recovery_util.c
storage/maria/ma_rename.c:
name change
storage/maria/ma_static.c:
I modified layout of the index' header (inserted skip_redo_lsn in its middle)
storage/maria/ma_test2.c:
allow breaking the test towards the end, tests execution of
UNDO_KEY_DELETE_WITH_ROOT
storage/maria/ma_test_recovery.expected:
6 as testflag instead of 4
storage/maria/ma_test_recovery:
Increase the amount of rollback work to do when testing recovery
with ma_test2; this reproduces the UNDO_KEY_DELETE_WITH_ROOT bug.
storage/maria/maria_chk.c:
skip_redo_lsn should be updated too, for consistency.
Write a REDO_REPAIR after all operations (including sort-records)
have been done.
No reason to flush blocks after maria_chk_data_link() and
maria_sort_records(), there is maria_close() in the end.
write_log_record() is a function, to not clutter maria_chk().
storage/maria/maria_def.h:
New member skip_redo_lsn in the state, and comments
storage/maria/maria_pack.c:
skip_redo_lsn should be updated too, for consistency
storage/maria/ma_recovery_util.c:
_ma_redo_not_needed_for_page(), defined in ma_recovery.c, is needed
by ma_blockrec.c; this causes link issues, resolved by putting
_ma_redo_not_needed_for_page() into a new file (so that it is not
in the same file as repair-related objects of ma_recovery.c).
storage/maria/ma_recovery_util.h:
new file
2008-01-17 23:59:32 +01:00
|
|
|
DBUG_RETURN(res);
|
2007-01-18 20:38:14 +01:00
|
|
|
}
|
2011-07-24 10:25:28 +02:00
|
|
|
|
|
|
|
adjust_total_size(info, page);
|
2007-01-18 20:38:14 +01:00
|
|
|
bitmap->used_size= bitmap->total_size;
|
2007-04-04 22:37:09 +02:00
|
|
|
DBUG_ASSERT(share->pagecache->block_size == bitmap->block_size);
|
2007-10-09 20:09:50 +02:00
|
|
|
res= pagecache_read(share->pagecache,
|
2007-12-15 22:31:22 +01:00
|
|
|
&bitmap->file, page, 0,
|
2009-01-09 05:23:25 +01:00
|
|
|
bitmap->map, PAGECACHE_PLAIN_PAGE,
|
2007-10-09 20:09:50 +02:00
|
|
|
PAGECACHE_LOCK_LEFT_UNLOCKED, 0) == NULL;
|
|
|
|
|
|
|
|
/*
|
|
|
|
We can't check maria_bitmap_marker here as if the bitmap page
|
|
|
|
previously had a true checksum and the user switched mode to not checksum
|
|
|
|
this may have any value, except maria_normal_page_marker.
|
|
|
|
|
|
|
|
Using maria_normal_page_marker gives us a protection against bugs
|
|
|
|
when running without any checksums.
|
|
|
|
*/
|
|
|
|
|
2007-01-18 20:38:14 +01:00
|
|
|
#ifndef DBUG_OFF
|
|
|
|
if (!res)
|
2007-04-19 12:18:56 +02:00
|
|
|
memcpy(bitmap->map + bitmap->block_size, bitmap->map, bitmap->block_size);
|
2007-01-18 20:38:14 +01:00
|
|
|
#endif
|
|
|
|
DBUG_RETURN(res);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
Change to another bitmap page
|
|
|
|
|
|
|
|
SYNOPSIS
|
|
|
|
_ma_change_bitmap_page()
|
|
|
|
info Maria handler
|
|
|
|
bitmap Bitmap handler
|
|
|
|
page Bitmap page to read
|
|
|
|
|
|
|
|
NOTES
|
|
|
|
If old bitmap was changed, write it out before reading new one
|
|
|
|
We return empty bitmap if page is outside of file size
|
|
|
|
|
|
|
|
RETURN
|
|
|
|
0 ok
|
|
|
|
1 error (Error writing old bitmap or reading bitmap page)
|
|
|
|
*/
|
|
|
|
|
|
|
|
static my_bool _ma_change_bitmap_page(MARIA_HA *info,
|
|
|
|
MARIA_FILE_BITMAP *bitmap,
|
2008-01-10 20:21:36 +01:00
|
|
|
pgcache_page_no_t page)
|
2007-01-18 20:38:14 +01:00
|
|
|
{
|
|
|
|
DBUG_ENTER("_ma_change_bitmap_page");
|
|
|
|
|
2011-07-26 06:52:15 +02:00
|
|
|
/*
|
|
|
|
We have to mark the file changed here, as otherwise the following
|
|
|
|
read/write to pagecache may force a page out from this file, which would
|
|
|
|
cause _ma_mark_file_changed() to be called with bitmaplock hold!
|
|
|
|
*/
|
|
|
|
_ma_bitmap_mark_file_changed(info->s, 1);
|
2011-06-24 11:08:45 +02:00
|
|
|
|
2007-01-18 20:38:14 +01:00
|
|
|
if (bitmap->changed)
|
|
|
|
{
|
|
|
|
if (write_changed_bitmap(info->s, bitmap))
|
|
|
|
DBUG_RETURN(1);
|
|
|
|
bitmap->changed= 0;
|
|
|
|
}
|
WL#3072 - Maria Recovery
Bulk insert: don't log REDO/UNDO for rows, log one UNDO which will
truncate files; this is an optimization and a bugfix (table was left
half-repaired by crash).
Repair: mark table crashed-on-repair at start, bump skip_redo_lsn at start,
this is easier for recovery (tells it to skip old REDOs or even UNDO
phase) and user (tells it to repair) in case of crash, sync files
in the end.
Recovery skips missing or corrupted table and moves to next record
(in REDO or UNDO phase) to be more robust; warns if happens in UNDO phase.
Bugfix for UNDO_KEY_DELETE_WITH_ROOT (tested in ma_test_recovery)
and maria_enable_indexes().
Create missing bitmaps when needed (there can be more than one to create,
in rare cases), log a record for this.
include/myisamchk.h:
new flag: bulk insert repair mustn't bump create_rename_lsn
mysql-test/lib/mtr_report.pl:
skip normal warning in maria-recovery.test
mysql-test/r/maria-recovery.result:
result: crash before bulk insert is committed, causes proper rollback,
and crash right after OPTIMIZE replaces index file with new index file
leads to table marked corrupted and recovery not failing.
mysql-test/t/maria-recovery.test:
- can't check the table or it would commit the transaction,
but check is made after recovery.
- test of crash before bulk-insert-with-repair is committed
(to see if it is rolled back), and of crash after OPTIMIZE has replaced
index file but not finished all operations (to see if recovery fails -
it used to assert when trying to execute an old REDO on the new
index).
storage/maria/CMakeLists.txt:
new file
storage/maria/Makefile.am:
new file
storage/maria/ha_maria.cc:
- If bulk insert on a transactional table using an index repair:
table is initially empty, so don't log REDO/UNDO for data rows
(optimization), just log an UNDO_BULK_INSERT_WITH_REPAIR
which will, if executed, empty the data and index file. Re-enable
logging in end_bulk_insert().
- write log record for repair operation only after it's fully done,
index sort including (maria_repair*() used to write the log record).
- Adding back file->trn=NULL which was removed by mistake earlier.
storage/maria/ha_maria.h:
new member (see ha_maria.cc)
storage/maria/ma_bitmap.c:
Functions to create missing bitmaps:
- one function which creates missing bitmaps in page cache, except
the missing one with max offset which it does not put into page cache
as it will be modified very soon.
- one function which the one above calls, and creates bitmaps in page
cache
- one function to execute REDO_BITMAP_NEW_PAGE which uses the second
one above.
storage/maria/ma_blockrec.c:
- when logging REDO_DELETE_ALL, not only 'records' and 'checksum'
has to be reset under log's mutex.
- execution of REDO_INSERT_ROW_BLOBS now checks the dirty pages' list
- execution of UNDO_BULK_INSERT_WITH_REPAIR
storage/maria/ma_blockrec.h:
new functions
storage/maria/ma_check.c:
- table-flush-before-repair is moved to a separate function reused
by maria_sort_index(); syncing is added
- maria_repair() is allowed to re-enable logging only if it is the one
which disabled it.
- "_ma_flush_table_files_after_repair" was a bad name, it's not after
repair now, and it should not sync as we do more changes to the files
shortly after (sync is postponed to when writing the log record)
- REDO_REPAIR record should be written only after all repair
operations (in particular after sorting index in ha_mara::repair())
- close to the end of repair by sort, flushing of pages must happen
also in the non-quick case, to prepare for the sync at end.
- in parallel repair, some page flushes are not needed as done
by initialize_variables_for_repair().
storage/maria/ma_create.c:
Update skip_redo_lsn, create_rename_lsn optionally.
storage/maria/ma_delete_all.c:
Need to sync files at end of maria_delete_all_rows(), if transactional.
storage/maria/ma_extra.c:
During repair, we sometimes call _ma_flush_table_files() (via
_ma_flush_table_files_before_swap()) while there is a WRITE_CACHE.
storage/maria/ma_key_recover.c:
- when we see CLR_END for UNDO_BULK_INSERT_WITH_REPAIR, re-enable
indices.
- fixing bug: _ma_apply_undo_key_delete() parsed UNDO_KEY_DELETE_WITH_ROOT
wrongly, leading to recovery failure
storage/maria/ma_key_recover.h:
new prototype
storage/maria/ma_locking.c:
DBUG_VOID_RETURN missing
storage/maria/ma_loghandler.c:
UNDO for bulk insert with repair, and REDO for creating bitmaps.
LOGREC_FIRST_FREE to not have to change the for() every time we
add a new record type.
storage/maria/ma_loghandler.h:
new UNDO and REDO
storage/maria/ma_open.c:
Move share.kfile.file=kfile up a bit, so that _ma_update_state_lsns()
can get its value, this fixes a bug where LSN_REPAIRED_BY_MARIA_CHK
was not corrected on disk by maria_open().
Store skip_redo_lsn in index' header.
maria_enable_indexes() had a bug for BLOCK_RECORD, where an empty
file has one page, not 0 bytes.
storage/maria/ma_recovery.c:
- Skip a corrupted, missing, or repaired-with-maria_chk, table in
recovery: don't fail, just go to next REDO or UNDO; but if an UNDO
is skipped in UNDO phase we issue warnings.
- Skip REDO|UNDO in REDO phase if <skip_redo_lsn.
- If UNDO phase fails, delete transactions to not make trnman
assert.
- Update skip_redo_lsn when playing REDO_CREATE_TABLE
- Don't record UNDOs for old transactions which we don't know (long_trid==0)
- Bugfix for UNDO_KEY_DELETE_WITH_ROOT (see ma_key_recover.c)
- Execution of UNDO_BULK_INSERT_WITH_REPAIR
- Don't try to find a page number in REDO_DELETE_ALL
- Pieces moved to ma_recovery_util.c
storage/maria/ma_rename.c:
name change
storage/maria/ma_static.c:
I modified layout of the index' header (inserted skip_redo_lsn in its middle)
storage/maria/ma_test2.c:
allow breaking the test towards the end, tests execution of
UNDO_KEY_DELETE_WITH_ROOT
storage/maria/ma_test_recovery.expected:
6 as testflag instead of 4
storage/maria/ma_test_recovery:
Increase the amount of rollback work to do when testing recovery
with ma_test2; this reproduces the UNDO_KEY_DELETE_WITH_ROOT bug.
storage/maria/maria_chk.c:
skip_redo_lsn should be updated too, for consistency.
Write a REDO_REPAIR after all operations (including sort-records)
have been done.
No reason to flush blocks after maria_chk_data_link() and
maria_sort_records(), there is maria_close() in the end.
write_log_record() is a function, to not clutter maria_chk().
storage/maria/maria_def.h:
New member skip_redo_lsn in the state, and comments
storage/maria/maria_pack.c:
skip_redo_lsn should be updated too, for consistency
storage/maria/ma_recovery_util.c:
_ma_redo_not_needed_for_page(), defined in ma_recovery.c, is needed
by ma_blockrec.c; this causes link issues, resolved by putting
_ma_redo_not_needed_for_page() into a new file (so that it is not
in the same file as repair-related objects of ma_recovery.c).
storage/maria/ma_recovery_util.h:
new file
2008-01-17 23:59:32 +01:00
|
|
|
DBUG_RETURN(_ma_read_bitmap_page(info, bitmap, page));
|
2007-01-18 20:38:14 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
Read next suitable bitmap
|
|
|
|
|
|
|
|
SYNOPSIS
|
|
|
|
move_to_next_bitmap()
|
|
|
|
bitmap Bitmap handle
|
|
|
|
|
2007-04-19 12:18:56 +02:00
|
|
|
NOTES
|
|
|
|
The found bitmap may be full, so calling function may need to call this
|
|
|
|
repeatedly until it finds enough space.
|
|
|
|
|
2007-01-18 20:38:14 +01:00
|
|
|
TODO
|
|
|
|
Add cache of bitmaps to not read something that is not usable
|
|
|
|
|
|
|
|
RETURN
|
|
|
|
0 ok
|
WL#3072 Maria recovery:
fix for bug: if a crash happened right after writing a REDO like this:
REDO - UNDO - REDO*, then recovery would ignore the last REDO* (ok),
rollback: REDO - UNDO - REDO* - REDO - CLR, and a next recovery would
thus execute REDO* instead of skipping it again. Recovery now logs
LOGREC_INCOMPLETE_GROUP when it meets REDO* for the first time,
to draw a boundary and ensure it is always skipped. Tested by hand.
Note: ma_test_all fails "maria_chk: error: Key 1 - Found too many records"
not due to this patch (failed before).
BitKeeper/triggers/post-commit:
no truncation of the commit mail, or how to review patches?
mysql-test/include/maria_verify_recovery.inc:
let caller choose the statement used to crash (sometimes we
want the crash to happen at special places)
mysql-test/t/maria-recovery.test:
user of maria_verify_recovery.inc now specifies statement which the
script should use for crashing.
storage/maria/ma_bitmap.c:
it's easier to search for all places using functions from the bitmap
module (like in ma_blockrec.c) if those exported functions all start
with "_ma_bitmap": renaming some of them.
Assertion that when we read a bitmap page, overwriting bitmap->map,
we are not losing information (i.e. bitmap->changed is false).
storage/maria/ma_blockrec.c:
update to new names. Adding code (disabled, protected by a #ifdef)
that I use to test certain crash scenarios (more to come).
storage/maria/ma_blockrec.h:
update to new names
storage/maria/ma_checkpoint.c:
update to new names
storage/maria/ma_extra.c:
update to new names
storage/maria/ma_loghandler.c:
new LOGREC_INCOMPLETE_GROUP
storage/maria/ma_loghandler.h:
new LOGREC_INCOMPLETE_GROUP
storage/maria/ma_recovery.c:
When at the end of the REDO phase we have identified some transactions
with incomplete REDO groups (REDOs without an UNDO or CLR_END),
for each of them we log LOGREC_INCOMPLETE_GROUP. This way, the
upcoming UNDO phase can write more records for such transaction,
a future recovery won't pair the incomplete group with the
CLR_END (as there is LOGREC_INCOMPLETE_GROUP to draw a boundary).
2007-12-10 23:26:53 +01:00
|
|
|
1 error (either couldn't save old bitmap or read new one)
|
2007-01-18 20:38:14 +01:00
|
|
|
*/
|
|
|
|
|
|
|
|
static my_bool move_to_next_bitmap(MARIA_HA *info, MARIA_FILE_BITMAP *bitmap)
|
|
|
|
{
|
2008-01-10 20:21:36 +01:00
|
|
|
pgcache_page_no_t page= bitmap->page;
|
2007-01-18 20:38:14 +01:00
|
|
|
MARIA_STATE_INFO *state= &info->s->state;
|
|
|
|
DBUG_ENTER("move_to_next_bitmap");
|
|
|
|
|
2011-06-29 23:37:12 +02:00
|
|
|
if (state->first_bitmap_with_space != ~(pgcache_page_no_t) 0 &&
|
2007-01-18 20:38:14 +01:00
|
|
|
state->first_bitmap_with_space != page)
|
|
|
|
{
|
|
|
|
page= state->first_bitmap_with_space;
|
2011-06-29 23:37:12 +02:00
|
|
|
state->first_bitmap_with_space= ~(pgcache_page_no_t) 0;
|
|
|
|
DBUG_ASSERT(page % bitmap->pages_covered == 0);
|
2007-01-18 20:38:14 +01:00
|
|
|
}
|
|
|
|
else
|
2011-06-29 23:37:12 +02:00
|
|
|
{
|
2007-01-18 20:38:14 +01:00
|
|
|
page+= bitmap->pages_covered;
|
2011-06-29 23:37:12 +02:00
|
|
|
DBUG_ASSERT(page % bitmap->pages_covered == 0);
|
|
|
|
}
|
2007-01-18 20:38:14 +01:00
|
|
|
DBUG_RETURN(_ma_change_bitmap_page(info, bitmap, page));
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/****************************************************************************
|
|
|
|
Allocate data in bitmaps
|
|
|
|
****************************************************************************/
|
|
|
|
|
|
|
|
/*
|
|
|
|
Store data in 'block' and mark the place used in the bitmap
|
|
|
|
|
|
|
|
SYNOPSIS
|
|
|
|
fill_block()
|
|
|
|
bitmap Bitmap handle
|
|
|
|
block Store data about what we found
|
2007-07-02 19:45:15 +02:00
|
|
|
best_data Pointer to best 6 uchar aligned area in bitmap->map
|
2007-01-18 20:38:14 +01:00
|
|
|
best_pos Which bit in *best_data the area starts
|
|
|
|
0 = first bit pattern, 1 second bit pattern etc
|
2007-04-19 12:18:56 +02:00
|
|
|
best_bits The original value of the bits at best_pos
|
2007-01-18 20:38:14 +01:00
|
|
|
fill_pattern Bitmap pattern to store in best_data[best_pos]
|
2007-04-19 12:18:56 +02:00
|
|
|
|
|
|
|
NOTES
|
|
|
|
We mark all pages to be 'TAIL's, which means that
|
|
|
|
block->page_count is really a row position inside the page.
|
2007-01-18 20:38:14 +01:00
|
|
|
*/
|
|
|
|
|
|
|
|
static void fill_block(MARIA_FILE_BITMAP *bitmap,
|
|
|
|
MARIA_BITMAP_BLOCK *block,
|
|
|
|
uchar *best_data, uint best_pos, uint best_bits,
|
|
|
|
uint fill_pattern)
|
|
|
|
{
|
|
|
|
uint page, offset, tmp;
|
|
|
|
uchar *data;
|
2010-11-03 13:14:02 +01:00
|
|
|
DBUG_ENTER("fill_block");
|
2007-01-18 20:38:14 +01:00
|
|
|
|
|
|
|
/* For each 6 bytes we have 6*8/3= 16 patterns */
|
2008-01-12 23:30:38 +01:00
|
|
|
page= ((uint) (best_data - bitmap->map)) / 6 * 16 + best_pos;
|
2008-01-07 17:54:41 +01:00
|
|
|
DBUG_ASSERT(page + 1 < bitmap->pages_covered);
|
2007-01-18 20:38:14 +01:00
|
|
|
block->page= bitmap->page + 1 + page;
|
UNDO of rows now puts back all part of the row on their original pages and positions
Added variable _dbug_on_ to speed up execution when DBUG is not going to be used
Added --debug-on option to mysqld (to be able to turn of DBUG with --debug-on=0)
Fixed some bugs with 'non_flushable' marking of bitmap pages
Don't use 'non_flushable' marking of bitmap pages for not transactional tables
SHOW CREATE TABLE now shows if table was created with page checksums
Fixed a lot of bugs with BLOB handling in case of update/REDO and UNDO
More tests (especially for blobs) and DBUG_ASSERTS()
More readable output from maria_read_log and maria_chk
Fixed wrong shift that caused Maria to crash on files > 4G
Mark tables as crashed of REDO fails
dbug/dbug.c:
Changed to use my_bool (allowed me to remove some windows specific code)
Added variable _dbug_on_ to speed up execution when DBUG is not going to be used
Removed initialization of variables if not needed
include/my_dbug.h:
Use my_bool for some functions that was defined as BOOLEAN in dbug.c code
Added DBUGGER_ON/DEBUGGER_OFF to speed up execution when DBUG is not used
include/my_global.h:
Define my_bool early
Increase MY_HOW_OFTEN_TO_WRITE as computers are now faster than 10 years ago
mysql-test/mysql-test-run.pl:
Added debug-on=0 to speed up tests
mysql-test/r/maria-recovery.result:
Added new test by Guilhem to test if UNDO_ROW_DELETE preserves rowid
mysql-test/r/maria.result:
Added testing of page checksums
mysql-test/t/crash_commit_before-master.opt:
Added --debug-on as test require DBUG to work
mysql-test/t/maria-recovery-bitmap-master.opt:
Added --debug-on as test require DBUG to work
mysql-test/t/maria-recovery-master.opt:
Added --debug-on as test require DBUG to work
mysql-test/t/maria-recovery.test:
Added new test by Guilhem to test if UNDO_ROW_DELETE preserves rowid
mysql-test/t/maria.test:
Added testing of page checksums
sql/mysqld.cc:
Added --debug-on option (to be able to turn of DBUG with --debug-on=0)
Indentation fixes
Removed end spaces
sql/sql_show.cc:
Allow update_create_info() to inform MySQL if PACK_KEYS, NO_PACK_KEYS, CHECKSUM, PAGE_CHECKSUM or DELAY_KEY_WRITE is used
storage/maria/Makefile.am:
Added ma_test_big.sh
storage/maria/ha_maria.cc:
Store in create_info if page checksums are used (For SHOW CREATE TABLE)
storage/maria/ma_bitmap.c:
Added _ma_bitmap_wait_or_flush() to cause reader of bitmap pages to wait with reading until bitmap is flushed.
Use TAIL_PAGE_COUNT_MARKER for tail pages
Set 'sub_blocks' for and only for the head page or for the first extent of a blob. This is needed for store_extent_info() to be able to set START_EXTENT_BIT's
Don't allocate more than 0x3ffff pages in one extent (We need bit 0x4000 as a START_EXTENT_BIT)
Increase the calculated 'head_length' with the number of bytes used for extents.
Update row->space_on_head_page also in _ma_bitmap_find_new_place()
Make _ma_bitmap_get_page_bits() global. (Needed for UNDO handling)
Changed _ma_bitmap_flushable() to take MARIA_HA instead of MARIA_SHARE.
This was needed to be able to mark the handler if we had a 'non_flushable' call pending or not.
Don't use 'non_flushable' marking of bitmap pages for not transactional tables.
Added BLOCKUSED_USE_ORG_BITMAP handling also for tail pages.
Added more DBUG_ASSERT() to find possible errors in other code
Some code simplications by adding new local variables
storage/maria/ma_blockrec.c:
UNDO of rows now puts back all part of the row on their original pages and positions.
Changed UNDO of DELETE and UNDO of UPDATE to contain information about the original length of data on head block and also extent information
This changes a lot of logic as now an insert of a row on a page may happen to any position (and not just to the first or next free)
Use PAGE_COUNT to mark if an extent is the start of of a blob. (Needed for extent_to_bitmap_blocks())
Added check_directory() for checking that directroy entries are correct.
Added checking of row checksums when reading rows (with EXTRA_DEBUG)
Added make_space_for_directory() and extend_directory() for doing expansion of directory
Added get_rowpos_in_head_or_tail_page() to be able to store head/tail on original position in UNDO
Added extent_to_bitmap_blocks() to be able to generate original bitmap blocks from UNDO entry
Added _ma_update_at_original_place() for UNDO of DELETES
Added row->min_length to hold minmum required space needed on head page
Changed find_free_position() to use make_space_for_directory()
Changed make_empty_page() to allow optional creation of directory entry
Changed delete_head_or_tail() and _ma_apply_undo_row_isnert() to not copy pagecache block (speed optimization)
Changed _ma_apply_redo_insert_row_head_or_tail() to be able to insert new row at any position on 'new' page
Changed _ma_apply_undo_row_delete() and _ma_apply_undo_row_update() to put row in it's original position
Ensure allocation of tail blocks are of at least MIN_TAIL_SIZE.
Ensure we store pages in pinned pages even if read failed. (If not we will have pages pinned forever in page cache)
Write original extent information in UNDO entry, not compacted ones (we need position to tails!)
When setting BLOCKUSED_USED, don't clear other bits (we have to preserve BLOCKUSED_USE_ORG_BITMAP)
Fixed som bugs in directory handling
Fixed bug where we wrote wrong lsn to blob pages
Added separate blob_buffer for fixing bug when updating row that had char/varchar that spanned several pages and also had blobs
Ensure we call _ma_bitmap_flushable() also in case of errors
When doing an update, first delete old entries, then search in bitmap for where to put new information
Info->s -> share
Rowid -> rowid
More DBUG_ASSERT()
storage/maria/ma_blockrec.h:
Added START_EXTENT_BIT and TAIL_PAGE_COUNT_MARKER
Added _ma_bitmap_wait_or_flush() and _ma_bitmap_get_page_bits()
storage/maria/ma_check.c:
Don't write extra empty line if there is no deleted blocks
Ignore START_EXTENT_BIT's in page count
Call _ma_fast_unlock_key_del() to free key_del link
storage/maria/ma_close.c:
Ensure that used_key_del is 0. (If not, someone forgot to call _ma_unlock_key_del())
storage/maria/ma_create.c:
Changed constant to macro
storage/maria/ma_delete.c:
For deleted keys, log also position to row
storage/maria/ma_extra.c:
Release blob buffer at maria_reset() if bigger than MARIA_SMALL_BLOB_BUFFER
storage/maria/ma_key_recover.c:
Added bzero() of LSN that confused paged cache in case of uninitialized block
Mark file crashed if applying of index changes fails
Added calls to _ma_fast_unlock_key_del() for protection of shared key_del link.
storage/maria/ma_locking.c:
Added usage of MARIA_FILE_OPEN_COUNT_OFFSET
Added _ma_mark_file_crashed()
storage/maria/ma_loghandler.c:
Fixed bug where we logged uninitialized memory
storage/maria/ma_open.c:
Moved state->changed to be at start of state info on disk to allow one to easly mark files as crashed
storage/maria/ma_page.c:
Disable 'dummy' checksumming of pages as this gave false warnings.
(Need to investigate if this is ever needed)
storage/maria/ma_pagecache.c:
Fixed wrong shift that caused Maria to crash on files > 4G
storage/maria/ma_recovery.c:
In case of errors, start writing on new line if we where in %## %## printing mode (Made errors more readable)
Changed global variable name from warnings -> recovery_warnings
Use MARIA_FILE_CREATE_RENAME_LSN_OFFSET instead of constant
Removed special handling of row position for deleted keys. Keys now always includes row positions
_ma_apply_undo_row_delete() now gets page and row position
Added check that we don't loop forever when handling undo's (in case of bug in undo chain)
Print name of failed REDO/UNDO
storage/maria/ma_recovery.h:
Removed old comment
storage/maria/ma_static.c:
Chaned version number of Maria files to not accidently use old ones (becasue of change of ordering of status variables)
storage/maria/ma_test2.c:
Added option -u to specify number of rows to update
Changed old option -u to be -A, as for ma_test1
Fixed bug in update of rows with blobs (before blobs was always reset to empty on update)
First created blob is now of max blob length to ensure we have at least one big blob in the table
storage/maria/ma_test_all.sh:
More tests
storage/maria/ma_test_recovery.expected:
Updated results
storage/maria/ma_test_recovery:
Changed tests to use bigger blobs (not just 1K)
Added new tests that tests recovery of update with blobs
Removed comparision of .MAD file as it's not guranteed that recovery from scratch gives identical data file as original update
(compact_page() may be called at different times during normal execution and during REDO)
storage/maria/ma_update.c:
Simplify code (changed * to if)
storage/maria/maria_chk.c:
Make output more readable
storage/maria/maria_def.h:
Changed 'changed' to int to prepare for more bits
Added 2 more bytes to status information
Added 'st_mara_row->min_length' for storing min length needed on head page
Added 'st_mara_handler->blob_buff & blob_buff_size' for storing blobs
Moved all tunning parameters into one block
Added MARIA_SMALL_BLOB_BUFFER
Added _ma_mark_file_crashed()
storage/myisam/mi_test2.c:
Fixed bug in update of rows with blobs (before blobs was always reset to empty on update)
storage/maria/ma_test_big.sh:
Testing of insert, update, delete, recovery and undo of rows with blobs
Thanks to the random-ness of ma_test2 this is likely to find most bugs in the row handling
2007-12-30 21:40:03 +01:00
|
|
|
block->page_count= TAIL_PAGE_COUNT_MARKER;
|
2007-01-18 20:38:14 +01:00
|
|
|
block->empty_space= pattern_to_size(bitmap, best_bits);
|
UNDO of rows now puts back all part of the row on their original pages and positions
Added variable _dbug_on_ to speed up execution when DBUG is not going to be used
Added --debug-on option to mysqld (to be able to turn of DBUG with --debug-on=0)
Fixed some bugs with 'non_flushable' marking of bitmap pages
Don't use 'non_flushable' marking of bitmap pages for not transactional tables
SHOW CREATE TABLE now shows if table was created with page checksums
Fixed a lot of bugs with BLOB handling in case of update/REDO and UNDO
More tests (especially for blobs) and DBUG_ASSERTS()
More readable output from maria_read_log and maria_chk
Fixed wrong shift that caused Maria to crash on files > 4G
Mark tables as crashed of REDO fails
dbug/dbug.c:
Changed to use my_bool (allowed me to remove some windows specific code)
Added variable _dbug_on_ to speed up execution when DBUG is not going to be used
Removed initialization of variables if not needed
include/my_dbug.h:
Use my_bool for some functions that was defined as BOOLEAN in dbug.c code
Added DBUGGER_ON/DEBUGGER_OFF to speed up execution when DBUG is not used
include/my_global.h:
Define my_bool early
Increase MY_HOW_OFTEN_TO_WRITE as computers are now faster than 10 years ago
mysql-test/mysql-test-run.pl:
Added debug-on=0 to speed up tests
mysql-test/r/maria-recovery.result:
Added new test by Guilhem to test if UNDO_ROW_DELETE preserves rowid
mysql-test/r/maria.result:
Added testing of page checksums
mysql-test/t/crash_commit_before-master.opt:
Added --debug-on as test require DBUG to work
mysql-test/t/maria-recovery-bitmap-master.opt:
Added --debug-on as test require DBUG to work
mysql-test/t/maria-recovery-master.opt:
Added --debug-on as test require DBUG to work
mysql-test/t/maria-recovery.test:
Added new test by Guilhem to test if UNDO_ROW_DELETE preserves rowid
mysql-test/t/maria.test:
Added testing of page checksums
sql/mysqld.cc:
Added --debug-on option (to be able to turn of DBUG with --debug-on=0)
Indentation fixes
Removed end spaces
sql/sql_show.cc:
Allow update_create_info() to inform MySQL if PACK_KEYS, NO_PACK_KEYS, CHECKSUM, PAGE_CHECKSUM or DELAY_KEY_WRITE is used
storage/maria/Makefile.am:
Added ma_test_big.sh
storage/maria/ha_maria.cc:
Store in create_info if page checksums are used (For SHOW CREATE TABLE)
storage/maria/ma_bitmap.c:
Added _ma_bitmap_wait_or_flush() to cause reader of bitmap pages to wait with reading until bitmap is flushed.
Use TAIL_PAGE_COUNT_MARKER for tail pages
Set 'sub_blocks' for and only for the head page or for the first extent of a blob. This is needed for store_extent_info() to be able to set START_EXTENT_BIT's
Don't allocate more than 0x3ffff pages in one extent (We need bit 0x4000 as a START_EXTENT_BIT)
Increase the calculated 'head_length' with the number of bytes used for extents.
Update row->space_on_head_page also in _ma_bitmap_find_new_place()
Make _ma_bitmap_get_page_bits() global. (Needed for UNDO handling)
Changed _ma_bitmap_flushable() to take MARIA_HA instead of MARIA_SHARE.
This was needed to be able to mark the handler if we had a 'non_flushable' call pending or not.
Don't use 'non_flushable' marking of bitmap pages for not transactional tables.
Added BLOCKUSED_USE_ORG_BITMAP handling also for tail pages.
Added more DBUG_ASSERT() to find possible errors in other code
Some code simplications by adding new local variables
storage/maria/ma_blockrec.c:
UNDO of rows now puts back all part of the row on their original pages and positions.
Changed UNDO of DELETE and UNDO of UPDATE to contain information about the original length of data on head block and also extent information
This changes a lot of logic as now an insert of a row on a page may happen to any position (and not just to the first or next free)
Use PAGE_COUNT to mark if an extent is the start of of a blob. (Needed for extent_to_bitmap_blocks())
Added check_directory() for checking that directroy entries are correct.
Added checking of row checksums when reading rows (with EXTRA_DEBUG)
Added make_space_for_directory() and extend_directory() for doing expansion of directory
Added get_rowpos_in_head_or_tail_page() to be able to store head/tail on original position in UNDO
Added extent_to_bitmap_blocks() to be able to generate original bitmap blocks from UNDO entry
Added _ma_update_at_original_place() for UNDO of DELETES
Added row->min_length to hold minmum required space needed on head page
Changed find_free_position() to use make_space_for_directory()
Changed make_empty_page() to allow optional creation of directory entry
Changed delete_head_or_tail() and _ma_apply_undo_row_isnert() to not copy pagecache block (speed optimization)
Changed _ma_apply_redo_insert_row_head_or_tail() to be able to insert new row at any position on 'new' page
Changed _ma_apply_undo_row_delete() and _ma_apply_undo_row_update() to put row in it's original position
Ensure allocation of tail blocks are of at least MIN_TAIL_SIZE.
Ensure we store pages in pinned pages even if read failed. (If not we will have pages pinned forever in page cache)
Write original extent information in UNDO entry, not compacted ones (we need position to tails!)
When setting BLOCKUSED_USED, don't clear other bits (we have to preserve BLOCKUSED_USE_ORG_BITMAP)
Fixed som bugs in directory handling
Fixed bug where we wrote wrong lsn to blob pages
Added separate blob_buffer for fixing bug when updating row that had char/varchar that spanned several pages and also had blobs
Ensure we call _ma_bitmap_flushable() also in case of errors
When doing an update, first delete old entries, then search in bitmap for where to put new information
Info->s -> share
Rowid -> rowid
More DBUG_ASSERT()
storage/maria/ma_blockrec.h:
Added START_EXTENT_BIT and TAIL_PAGE_COUNT_MARKER
Added _ma_bitmap_wait_or_flush() and _ma_bitmap_get_page_bits()
storage/maria/ma_check.c:
Don't write extra empty line if there is no deleted blocks
Ignore START_EXTENT_BIT's in page count
Call _ma_fast_unlock_key_del() to free key_del link
storage/maria/ma_close.c:
Ensure that used_key_del is 0. (If not, someone forgot to call _ma_unlock_key_del())
storage/maria/ma_create.c:
Changed constant to macro
storage/maria/ma_delete.c:
For deleted keys, log also position to row
storage/maria/ma_extra.c:
Release blob buffer at maria_reset() if bigger than MARIA_SMALL_BLOB_BUFFER
storage/maria/ma_key_recover.c:
Added bzero() of LSN that confused paged cache in case of uninitialized block
Mark file crashed if applying of index changes fails
Added calls to _ma_fast_unlock_key_del() for protection of shared key_del link.
storage/maria/ma_locking.c:
Added usage of MARIA_FILE_OPEN_COUNT_OFFSET
Added _ma_mark_file_crashed()
storage/maria/ma_loghandler.c:
Fixed bug where we logged uninitialized memory
storage/maria/ma_open.c:
Moved state->changed to be at start of state info on disk to allow one to easly mark files as crashed
storage/maria/ma_page.c:
Disable 'dummy' checksumming of pages as this gave false warnings.
(Need to investigate if this is ever needed)
storage/maria/ma_pagecache.c:
Fixed wrong shift that caused Maria to crash on files > 4G
storage/maria/ma_recovery.c:
In case of errors, start writing on new line if we where in %## %## printing mode (Made errors more readable)
Changed global variable name from warnings -> recovery_warnings
Use MARIA_FILE_CREATE_RENAME_LSN_OFFSET instead of constant
Removed special handling of row position for deleted keys. Keys now always includes row positions
_ma_apply_undo_row_delete() now gets page and row position
Added check that we don't loop forever when handling undo's (in case of bug in undo chain)
Print name of failed REDO/UNDO
storage/maria/ma_recovery.h:
Removed old comment
storage/maria/ma_static.c:
Chaned version number of Maria files to not accidently use old ones (becasue of change of ordering of status variables)
storage/maria/ma_test2.c:
Added option -u to specify number of rows to update
Changed old option -u to be -A, as for ma_test1
Fixed bug in update of rows with blobs (before blobs was always reset to empty on update)
First created blob is now of max blob length to ensure we have at least one big blob in the table
storage/maria/ma_test_all.sh:
More tests
storage/maria/ma_test_recovery.expected:
Updated results
storage/maria/ma_test_recovery:
Changed tests to use bigger blobs (not just 1K)
Added new tests that tests recovery of update with blobs
Removed comparision of .MAD file as it's not guranteed that recovery from scratch gives identical data file as original update
(compact_page() may be called at different times during normal execution and during REDO)
storage/maria/ma_update.c:
Simplify code (changed * to if)
storage/maria/maria_chk.c:
Make output more readable
storage/maria/maria_def.h:
Changed 'changed' to int to prepare for more bits
Added 2 more bytes to status information
Added 'st_mara_row->min_length' for storing min length needed on head page
Added 'st_mara_handler->blob_buff & blob_buff_size' for storing blobs
Moved all tunning parameters into one block
Added MARIA_SMALL_BLOB_BUFFER
Added _ma_mark_file_crashed()
storage/myisam/mi_test2.c:
Fixed bug in update of rows with blobs (before blobs was always reset to empty on update)
storage/maria/ma_test_big.sh:
Testing of insert, update, delete, recovery and undo of rows with blobs
Thanks to the random-ness of ma_test2 this is likely to find most bugs in the row handling
2007-12-30 21:40:03 +01:00
|
|
|
block->sub_blocks= 0;
|
2007-01-18 20:38:14 +01:00
|
|
|
block->org_bitmap_value= best_bits;
|
2007-04-19 12:18:56 +02:00
|
|
|
block->used= BLOCKUSED_TAIL; /* See _ma_bitmap_release_unused() */
|
2007-01-18 20:38:14 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
Mark place used by reading/writing 2 bytes at a time to handle
|
|
|
|
bitmaps in overlapping bytes
|
|
|
|
*/
|
|
|
|
best_pos*= 3;
|
|
|
|
data= best_data+ best_pos / 8;
|
|
|
|
offset= best_pos & 7;
|
|
|
|
tmp= uint2korr(data);
|
2007-04-19 12:18:56 +02:00
|
|
|
|
|
|
|
/* we turn off the 3 bits and replace them with fill_pattern */
|
2007-01-18 20:38:14 +01:00
|
|
|
tmp= (tmp & ~(7 << offset)) | (fill_pattern << offset);
|
|
|
|
int2store(data, tmp);
|
|
|
|
bitmap->changed= 1;
|
2007-10-19 23:24:22 +02:00
|
|
|
DBUG_EXECUTE("bitmap", _ma_print_bitmap_changes(bitmap););
|
2010-11-03 13:14:02 +01:00
|
|
|
DBUG_VOID_RETURN;
|
2007-01-18 20:38:14 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
Allocate data for head block
|
|
|
|
|
|
|
|
SYNOPSIS
|
|
|
|
allocate_head()
|
|
|
|
bitmap bitmap
|
2007-04-19 12:18:56 +02:00
|
|
|
size Size of data region we need to store
|
2007-01-18 20:38:14 +01:00
|
|
|
block Store found information here
|
|
|
|
|
2007-04-19 12:18:56 +02:00
|
|
|
IMPLEMENTATION
|
|
|
|
Find the best-fit page to put a region of 'size'
|
|
|
|
This is defined as the first page of the set of pages
|
|
|
|
with the smallest free space that can hold 'size'.
|
|
|
|
|
2007-01-18 20:38:14 +01:00
|
|
|
RETURN
|
|
|
|
0 ok (block is updated)
|
|
|
|
1 error (no space in bitmap; block is not touched)
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
|
|
static my_bool allocate_head(MARIA_FILE_BITMAP *bitmap, uint size,
|
|
|
|
MARIA_BITMAP_BLOCK *block)
|
|
|
|
{
|
|
|
|
uint min_bits= size_to_head_pattern(bitmap, size);
|
|
|
|
uchar *data= bitmap->map, *end= data + bitmap->used_size;
|
|
|
|
uchar *best_data= 0;
|
2011-11-23 18:25:07 +01:00
|
|
|
uint best_bits= (uint) -1, UNINIT_VAR(best_pos);
|
2007-01-18 20:38:14 +01:00
|
|
|
DBUG_ENTER("allocate_head");
|
|
|
|
|
|
|
|
DBUG_ASSERT(size <= FULL_PAGE_SIZE(bitmap->block_size));
|
|
|
|
|
2008-01-07 17:54:41 +01:00
|
|
|
for (; data < end; data+= 6)
|
2007-01-18 20:38:14 +01:00
|
|
|
{
|
|
|
|
ulonglong bits= uint6korr(data); /* 6 bytes = 6*8/3= 16 patterns */
|
|
|
|
uint i;
|
|
|
|
|
|
|
|
/*
|
|
|
|
Skip common patterns
|
|
|
|
We can skip empty pages (if we already found a match) or
|
|
|
|
anything matching the following pattern as this will be either
|
|
|
|
a full page or a tail page
|
|
|
|
*/
|
|
|
|
if ((!bits && best_data) ||
|
|
|
|
((bits & LL(04444444444444444)) == LL(04444444444444444)))
|
|
|
|
continue;
|
|
|
|
for (i= 0; i < 16 ; i++, bits >>= 3)
|
|
|
|
{
|
2008-01-10 20:21:36 +01:00
|
|
|
uint pattern= (uint) (bits & 7);
|
2007-01-18 20:38:14 +01:00
|
|
|
if (pattern <= min_bits)
|
|
|
|
{
|
2007-04-19 12:18:56 +02:00
|
|
|
/* There is enough space here */
|
2007-01-18 20:38:14 +01:00
|
|
|
if ((int) pattern > (int) best_bits)
|
|
|
|
{
|
2007-04-19 12:18:56 +02:00
|
|
|
/*
|
|
|
|
There is more than enough space here and it's better than what
|
|
|
|
we have found so far. Remember it, as we will choose it if we
|
|
|
|
don't find anything in this bitmap page.
|
|
|
|
*/
|
2007-01-18 20:38:14 +01:00
|
|
|
best_bits= pattern;
|
|
|
|
best_data= data;
|
|
|
|
best_pos= i;
|
2008-01-07 17:54:41 +01:00
|
|
|
if (pattern == min_bits)
|
|
|
|
goto found; /* Best possible match */
|
2007-01-18 20:38:14 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2007-04-19 12:18:56 +02:00
|
|
|
if (!best_data) /* Found no place */
|
2007-01-18 20:38:14 +01:00
|
|
|
{
|
2008-01-07 17:54:41 +01:00
|
|
|
if (data >= bitmap->map + bitmap->total_size)
|
2007-04-19 12:18:56 +02:00
|
|
|
DBUG_RETURN(1); /* No space in bitmap */
|
2007-01-18 20:38:14 +01:00
|
|
|
/* Allocate data at end of bitmap */
|
|
|
|
bitmap->used_size+= 6;
|
2008-01-07 17:54:41 +01:00
|
|
|
set_if_smaller(bitmap->used_size, bitmap->total_size);
|
2007-01-18 20:38:14 +01:00
|
|
|
best_data= data;
|
|
|
|
best_pos= best_bits= 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
found:
|
|
|
|
fill_block(bitmap, block, best_data, best_pos, best_bits, FULL_HEAD_PAGE);
|
|
|
|
DBUG_RETURN(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
Allocate data for tail block
|
|
|
|
|
|
|
|
SYNOPSIS
|
|
|
|
allocate_tail()
|
|
|
|
bitmap bitmap
|
|
|
|
size Size of block we need to find
|
|
|
|
block Store found information here
|
|
|
|
|
|
|
|
RETURN
|
|
|
|
0 ok (block is updated)
|
|
|
|
1 error (no space in bitmap; block is not touched)
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
|
|
static my_bool allocate_tail(MARIA_FILE_BITMAP *bitmap, uint size,
|
|
|
|
MARIA_BITMAP_BLOCK *block)
|
|
|
|
{
|
|
|
|
uint min_bits= size_to_tail_pattern(bitmap, size);
|
|
|
|
uchar *data= bitmap->map, *end= data + bitmap->used_size;
|
|
|
|
uchar *best_data= 0;
|
2011-11-23 18:25:07 +01:00
|
|
|
uint best_bits= (uint) -1, UNINIT_VAR(best_pos);
|
2007-01-18 20:38:14 +01:00
|
|
|
DBUG_ENTER("allocate_tail");
|
|
|
|
DBUG_PRINT("enter", ("size: %u", size));
|
|
|
|
|
Fix for: LP #634955: Assert in _ma_update_at_original_place()
Added locking of lock mutex when updating status in external_unlock() for Aria and MyISAM tables.
Fixed that 'source' command doesn't cause mysql command line tool to exit on error.
DEBUG_EXECUTE() and DEBUG_EVALUATE_IF() should not execute things based on wildcards. (Allows one to run --debug with mysql-test-run scripts that uses @debug)
Fixed several core dump, deadlock and crashed table bugs in handling of LOCK TABLE with MERGE tables:
- Added priority of locks to avoid crashes with MERGE tables.
- Added thr_lock_merge() to allow one to merge two results of thr_lock().
Fixed 'not found row' bug in REPLACE with Aria tables.
Mark MyISAM tables that are part of MERGE with HA_OPEN_MERGE_TABLE and set the locks to have priority THR_LOCK_MERGE_PRIV.
- By sorting MERGE tables last in thr_multi_unlock() it's safer to release and relock them many times (can happen when TRIGGERS are created)
Avoid printing (null) in debug file (to easier find out wrong NULL pointer usage with %s).
client/mysql.cc:
Fixed that 'source' command doesn't cause mysql command line tool to exit on error.
client/mysqltest.cc:
Don't send NULL to fn_format(). (Can cause crash on Solaris when using --debug)
dbug/dbug.c:
DEBUG_EXECUTE() and DEBUG_EVALUATE_IF() should not execute things based on wildcards.
include/my_base.h:
Added flag to signal if one opens a MERGE table.
Added extra() command to signal that one is not part of a MERGE table anymore.
include/thr_lock.h:
Added priority for locks (needed to fix bug in thr_lock when using MERGE tables)
Added option to thr_unlock() if get_status() should be called.
Added prototype for thr_merge_locks().
mysql-test/mysql-test-run.pl:
Ignore crashed table warnings for tables named 'crashed'.
mysql-test/r/merge.result:
Renamed triggers to make debugging easier.
Added some CHECK TABLES to catch errors earlier.
Additional tests.
mysql-test/r/merge_debug.result:
Test of error handling when reopening MERGE tables.
mysql-test/r/udf_query_cache.result:
Added missing flush status
mysql-test/suite/parts/r/partition_repair_myisam.result:
Update results
mysql-test/t/merge.test:
Renamed triggers to make debugging easier.
Added some CHECK TABLES to catch errors earlier.
Additional tests.
mysql-test/t/merge_debug.test:
Test of error handling when reopening MERGE tables.
mysql-test/t/udf_query_cache.test:
Added missing flush status
mysys/my_getopt.c:
Removed not used variable
mysys/my_symlink2.c:
Changed (null) to (NULL) to make it easier to find NULL arguments to DBUG_PRINT() functions.
(On linux, NULL to sprintf is printed 'null')
mysys/thr_lock.c:
Added priority of locks to avoid crashes with MERGE tables.
Added thr_lock_merge() to allow one to merge two results of thr_lock().
- This is needed for MyISAM as all locked table must share the same status. If not, you will not see newly inserted rows in other instances of the table.
If calling thr_unlock() with THR_UNLOCK_UPDATE_STATUS, call update_status() and restore_status() for the locks. This is needed in some rare cases where we call thr_unlock() followed by thr_lock() without calling external_unlock/external_lock in between.
Simplify loop in thr_multi_lock().
Added 'start_trans', which is called at end of thr_multi_lock() when all locks are taken.
- This was needed by Aria to ensure that transaction is started when we got all locks, not at get_status(). Without this, some rows could not be visible when we lock two tables at the same time, causing REPLACE using two tables to fail unexpectedly.
sql/handler.cc:
Add an assert() in handler::print_error() for "impossible errors" (like table is crashed) when --debug-assert-if-crashed-table is used.
sql/lock.cc:
Simplify mysql_lock_tables() code if get_lock_data() returns 0 locks.
Added new parameter to thr_multi_unlock()
In mysql_unlock_read_tables(), call first externa_unlock(), then thr_multi_unlock(); This is same order as we do in mysql_unlock_tables().
Don't abort locks in mysql_lock_abort() for merged tables when a MERGE table is deleted; Would cause a spin lock.
Added call to thr_merge_locks() in mysql_lock_merge() to ensure consistency in thr_locks().
- New locks of same type and table is stored after the old lock to ensure that we get the status from the original lock.
sql/mysql_priv.h:
Added debug_assert_if_crashed_table
sql/mysqld.cc:
Added --debug-assert-if-crashed-table
sql/parse_file.cc:
Don't print '(null)' in DBUG_PRINT of no dir given
sql/set_var.cc:
Increase default size of buffer for @debug variable.
sql/sql_base.cc:
In case of error from reopen_table() in reopen_tables(), call unlock_open_table() and restart loop.
- This fixed bug when we twice deleted same table from open_cache.
Don't take name lock for already name locked table in open_unireg_entry().
- Fixed bug when doing repair in reopen_table().
- In detach_merge_children(), always detach if 'clear_refs' is given. We can't trust parent->children_attached as this function can be called twice, first time with clear_refs set to 0.
sql/sql_class.cc:
Changed printing of (null) to "" in set_thd_proc_info()
sql/sql_parse.cc:
Added DBUG
sql/sql_trigger.cc:
Don't call unlink_open_table() if reopen_table() fails as the table may already be freed.
storage/maria/ma_bitmap.c:
Fixed DBUG_ASSERT() in allocate_tail()
storage/maria/ma_blockrec.c:
Fixed wrong calculation of row length for very small rows in undo_row_update().
- Fixes ASSERT() when doing undo.
storage/maria/ma_blockrec.h:
Added _ma_block_start_trans() and _ma_block_start_trans_no_versioning()
storage/maria/ma_locking.c:
Call _ma_update_status_with_lock() when releasing write locks.
- Fixes potential problem with updating status without the proper lock.
storage/maria/ma_open.c:
Changed to use start_trans() instead of get_status() to ensure that we see all rows in all locked tables when we got the locks.
- Fixed 'not found row' bug in REPLACE with Aria tables.
storage/maria/ma_state.c:
Added _ma_update_status_with_lock() and _ma_block_start_trans().
This is to ensure that we see all rows in all locked tables when we got the locks.
storage/maria/ma_state.h:
Added _ma_update_status_with_lock()
storage/maria/ma_write.c:
More DBUG_PRINT
storage/myisam/mi_check.c:
Fixed error message
storage/myisam/mi_extra.c:
Added HA_EXTRA_DETACH_CHILD:
- Detach MyISAM table to not be part of MERGE table (remove flag & lock priority).
storage/myisam/mi_locking.c:
Call mi_update_status_with_lock() when releasing write locks.
- Fixes potential problem with updating status without the proper lock.
Change to use new HA_OPEN_MERGE_TABLE flag to test if MERGE table.
Added mi_fix_status(), called by thr_merge().
storage/myisam/mi_open.c:
Added marker if part of MERGE table.
Call mi_fix_status() in thr_lock() for transactional tables.
storage/myisam/myisamdef.h:
Change my_once_flag to uint, as it stored different values than just 0/1
Added 'open_flag' to store state given to mi_open()
storage/myisammrg/ha_myisammrg.cc:
Add THR_LOCK_MERGE_PRIV to THR_LOCK_DATA to get MERGE locks sorted after other types of locks.
storage/myisammrg/myrg_locking.c:
Remove windows specific code.
storage/myisammrg/myrg_open.c:
Use HA_OPEN_MERGE_TABLE to mi_open().
Set HA_OPEN_MERGE_TABLE for linked MyISAM tables.
storage/xtradb/buf/buf0buf.c:
Fixed compiler warning
storage/xtradb/buf/buf0lru.c:
Initialize variable that could be used not initialized.
2010-11-02 16:22:57 +01:00
|
|
|
/*
|
|
|
|
We have to add DIR_ENTRY_SIZE here as this is not part of the data size
|
|
|
|
See call to allocate_tail() in find_tail().
|
|
|
|
*/
|
|
|
|
DBUG_ASSERT(size <= MAX_TAIL_SIZE(bitmap->block_size) + DIR_ENTRY_SIZE);
|
2007-01-18 20:38:14 +01:00
|
|
|
|
|
|
|
for (; data < end; data += 6)
|
|
|
|
{
|
|
|
|
ulonglong bits= uint6korr(data); /* 6 bytes = 6*8/3= 16 patterns */
|
|
|
|
uint i;
|
|
|
|
|
|
|
|
/*
|
|
|
|
Skip common patterns
|
|
|
|
We can skip empty pages (if we already found a match) or
|
2007-04-19 12:18:56 +02:00
|
|
|
the following patterns: 1-4 (head pages, not suitable for tail) or
|
|
|
|
7 (full tail page). See 'Dynamic size records' comment at start of file.
|
|
|
|
|
2008-01-07 17:54:41 +01:00
|
|
|
At the moment we only skip full head and tail pages (ie, all bits are
|
2007-04-19 12:18:56 +02:00
|
|
|
set) as this is easy to detect with one simple test and is a
|
|
|
|
quite common case if we have blobs.
|
2007-01-18 20:38:14 +01:00
|
|
|
*/
|
|
|
|
|
2008-01-07 17:54:41 +01:00
|
|
|
if ((!bits && best_data) || bits == LL(0xffffffffffff) ||
|
|
|
|
bits == LL(04444444444444444))
|
2007-01-18 20:38:14 +01:00
|
|
|
continue;
|
|
|
|
for (i= 0; i < 16; i++, bits >>= 3)
|
|
|
|
{
|
2008-01-10 20:21:36 +01:00
|
|
|
uint pattern= (uint) (bits & 7);
|
2007-01-18 20:38:14 +01:00
|
|
|
if (pattern <= min_bits && (!pattern || pattern >= 5))
|
|
|
|
{
|
|
|
|
if ((int) pattern > (int) best_bits)
|
|
|
|
{
|
|
|
|
best_bits= pattern;
|
|
|
|
best_data= data;
|
|
|
|
best_pos= i;
|
2008-01-07 17:54:41 +01:00
|
|
|
if (pattern == min_bits)
|
|
|
|
goto found; /* Can't be better */
|
2007-01-18 20:38:14 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (!best_data)
|
|
|
|
{
|
2008-01-07 17:54:41 +01:00
|
|
|
if (data >= bitmap->map + bitmap->total_size)
|
2007-01-18 20:38:14 +01:00
|
|
|
DBUG_RETURN(1);
|
|
|
|
/* Allocate data at end of bitmap */
|
2008-01-07 17:54:41 +01:00
|
|
|
best_data= data;
|
2007-01-18 20:38:14 +01:00
|
|
|
bitmap->used_size+= 6;
|
2008-01-07 17:54:41 +01:00
|
|
|
set_if_smaller(bitmap->used_size, bitmap->total_size);
|
2007-01-18 20:38:14 +01:00
|
|
|
best_pos= best_bits= 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
found:
|
|
|
|
fill_block(bitmap, block, best_data, best_pos, best_bits, FULL_TAIL_PAGE);
|
|
|
|
DBUG_RETURN(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
Allocate data for full blocks
|
|
|
|
|
|
|
|
SYNOPSIS
|
|
|
|
allocate_full_pages()
|
|
|
|
bitmap bitmap
|
|
|
|
pages_needed Total size in pages (bitmap->total_size) we would like to have
|
|
|
|
block Store found information here
|
|
|
|
full_page 1 if we are not allowed to split extent
|
|
|
|
|
|
|
|
IMPLEMENTATION
|
|
|
|
We will return the smallest area >= size. If there is no such
|
|
|
|
block, we will return the biggest area that satisfies
|
|
|
|
area_size >= min(BLOB_SEGMENT_MIN_SIZE*full_page_size, size)
|
|
|
|
|
|
|
|
To speed up searches, we will only consider areas that has at least 16 free
|
|
|
|
pages starting on an even boundary. When finding such an area, we will
|
|
|
|
extend it with all previous and following free pages. This will ensure
|
|
|
|
we don't get holes between areas
|
|
|
|
|
|
|
|
RETURN
|
|
|
|
# Blocks used
|
|
|
|
0 error (no space in bitmap; block is not touched)
|
|
|
|
*/
|
|
|
|
|
|
|
|
static ulong allocate_full_pages(MARIA_FILE_BITMAP *bitmap,
|
|
|
|
ulong pages_needed,
|
|
|
|
MARIA_BITMAP_BLOCK *block, my_bool full_page)
|
|
|
|
{
|
|
|
|
uchar *data= bitmap->map, *data_end= data + bitmap->used_size;
|
|
|
|
uchar *page_end= data + bitmap->total_size;
|
|
|
|
uchar *best_data= 0;
|
|
|
|
uint min_size;
|
2011-11-22 18:04:38 +01:00
|
|
|
uint best_area_size, UNINIT_VAR(best_prefix_area_size);
|
2007-01-18 20:38:14 +01:00
|
|
|
uint page, size;
|
2011-10-29 20:40:03 +02:00
|
|
|
ulonglong UNINIT_VAR(best_prefix_bits);
|
2007-01-18 20:38:14 +01:00
|
|
|
DBUG_ENTER("allocate_full_pages");
|
|
|
|
DBUG_PRINT("enter", ("pages_needed: %lu", pages_needed));
|
|
|
|
|
|
|
|
min_size= pages_needed;
|
|
|
|
if (!full_page && min_size > BLOB_SEGMENT_MIN_SIZE)
|
|
|
|
min_size= BLOB_SEGMENT_MIN_SIZE;
|
|
|
|
best_area_size= ~(uint) 0;
|
|
|
|
|
|
|
|
for (; data < page_end; data+= 6)
|
|
|
|
{
|
|
|
|
ulonglong bits= uint6korr(data); /* 6 bytes = 6*8/3= 16 patterns */
|
|
|
|
uchar *data_start;
|
|
|
|
ulonglong prefix_bits= 0;
|
|
|
|
uint area_size, prefix_area_size, suffix_area_size;
|
|
|
|
|
|
|
|
/* Find area with at least 16 free pages */
|
|
|
|
if (bits)
|
|
|
|
continue;
|
|
|
|
data_start= data;
|
|
|
|
/* Find size of area */
|
|
|
|
for (data+=6 ; data < data_end ; data+= 6)
|
|
|
|
{
|
|
|
|
if ((bits= uint6korr(data)))
|
|
|
|
break;
|
|
|
|
}
|
2008-01-12 23:30:38 +01:00
|
|
|
area_size= (uint) (data - data_start) / 6 * 16;
|
2007-01-18 20:38:14 +01:00
|
|
|
if (area_size >= best_area_size)
|
|
|
|
continue;
|
|
|
|
prefix_area_size= suffix_area_size= 0;
|
|
|
|
if (!bits)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
End of page; All the rest of the bits on page are part of area
|
|
|
|
This is needed because bitmap->used_size only covers the set bits
|
|
|
|
in the bitmap.
|
|
|
|
*/
|
2008-01-12 23:30:38 +01:00
|
|
|
area_size+= (uint) (page_end - data) / 6 * 16;
|
2007-01-18 20:38:14 +01:00
|
|
|
if (area_size >= best_area_size)
|
|
|
|
break;
|
|
|
|
data= page_end;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/* Add bits at end of page */
|
|
|
|
for (; !(bits & 7); bits >>= 3)
|
|
|
|
suffix_area_size++;
|
|
|
|
area_size+= suffix_area_size;
|
|
|
|
}
|
|
|
|
if (data_start != bitmap->map)
|
|
|
|
{
|
|
|
|
/* Add bits before page */
|
|
|
|
bits= prefix_bits= uint6korr(data_start - 6);
|
|
|
|
DBUG_ASSERT(bits != 0);
|
|
|
|
/* 111 000 000 000 000 000 000 000 000 000 000 000 000 000 000 000 */
|
|
|
|
if (!(bits & LL(07000000000000000)))
|
|
|
|
{
|
|
|
|
data_start-= 6;
|
|
|
|
do
|
|
|
|
{
|
|
|
|
prefix_area_size++;
|
|
|
|
bits<<= 3;
|
|
|
|
} while (!(bits & LL(07000000000000000)));
|
|
|
|
area_size+= prefix_area_size;
|
|
|
|
/* Calculate offset to page from data_start */
|
|
|
|
prefix_area_size= 16 - prefix_area_size;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (area_size >= min_size && area_size <= best_area_size)
|
|
|
|
{
|
|
|
|
best_data= data_start;
|
|
|
|
best_area_size= area_size;
|
|
|
|
best_prefix_bits= prefix_bits;
|
|
|
|
best_prefix_area_size= prefix_area_size;
|
|
|
|
|
|
|
|
/* Prefer to put data in biggest possible area */
|
|
|
|
if (area_size <= pages_needed)
|
|
|
|
min_size= area_size;
|
|
|
|
else
|
|
|
|
min_size= pages_needed;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (!best_data)
|
|
|
|
DBUG_RETURN(0); /* No room on page */
|
|
|
|
|
|
|
|
/*
|
|
|
|
Now allocate min(pages_needed, area_size), starting from
|
|
|
|
best_start + best_prefix_area_size
|
|
|
|
*/
|
|
|
|
if (best_area_size > pages_needed)
|
|
|
|
best_area_size= pages_needed;
|
|
|
|
|
|
|
|
/* For each 6 bytes we have 6*8/3= 16 patterns */
|
2008-01-12 23:30:38 +01:00
|
|
|
page= ((uint) (best_data - bitmap->map) * 8) / 3 + best_prefix_area_size;
|
2007-01-18 20:38:14 +01:00
|
|
|
block->page= bitmap->page + 1 + page;
|
|
|
|
block->page_count= best_area_size;
|
|
|
|
block->empty_space= 0;
|
UNDO of rows now puts back all part of the row on their original pages and positions
Added variable _dbug_on_ to speed up execution when DBUG is not going to be used
Added --debug-on option to mysqld (to be able to turn of DBUG with --debug-on=0)
Fixed some bugs with 'non_flushable' marking of bitmap pages
Don't use 'non_flushable' marking of bitmap pages for not transactional tables
SHOW CREATE TABLE now shows if table was created with page checksums
Fixed a lot of bugs with BLOB handling in case of update/REDO and UNDO
More tests (especially for blobs) and DBUG_ASSERTS()
More readable output from maria_read_log and maria_chk
Fixed wrong shift that caused Maria to crash on files > 4G
Mark tables as crashed of REDO fails
dbug/dbug.c:
Changed to use my_bool (allowed me to remove some windows specific code)
Added variable _dbug_on_ to speed up execution when DBUG is not going to be used
Removed initialization of variables if not needed
include/my_dbug.h:
Use my_bool for some functions that was defined as BOOLEAN in dbug.c code
Added DBUGGER_ON/DEBUGGER_OFF to speed up execution when DBUG is not used
include/my_global.h:
Define my_bool early
Increase MY_HOW_OFTEN_TO_WRITE as computers are now faster than 10 years ago
mysql-test/mysql-test-run.pl:
Added debug-on=0 to speed up tests
mysql-test/r/maria-recovery.result:
Added new test by Guilhem to test if UNDO_ROW_DELETE preserves rowid
mysql-test/r/maria.result:
Added testing of page checksums
mysql-test/t/crash_commit_before-master.opt:
Added --debug-on as test require DBUG to work
mysql-test/t/maria-recovery-bitmap-master.opt:
Added --debug-on as test require DBUG to work
mysql-test/t/maria-recovery-master.opt:
Added --debug-on as test require DBUG to work
mysql-test/t/maria-recovery.test:
Added new test by Guilhem to test if UNDO_ROW_DELETE preserves rowid
mysql-test/t/maria.test:
Added testing of page checksums
sql/mysqld.cc:
Added --debug-on option (to be able to turn of DBUG with --debug-on=0)
Indentation fixes
Removed end spaces
sql/sql_show.cc:
Allow update_create_info() to inform MySQL if PACK_KEYS, NO_PACK_KEYS, CHECKSUM, PAGE_CHECKSUM or DELAY_KEY_WRITE is used
storage/maria/Makefile.am:
Added ma_test_big.sh
storage/maria/ha_maria.cc:
Store in create_info if page checksums are used (For SHOW CREATE TABLE)
storage/maria/ma_bitmap.c:
Added _ma_bitmap_wait_or_flush() to cause reader of bitmap pages to wait with reading until bitmap is flushed.
Use TAIL_PAGE_COUNT_MARKER for tail pages
Set 'sub_blocks' for and only for the head page or for the first extent of a blob. This is needed for store_extent_info() to be able to set START_EXTENT_BIT's
Don't allocate more than 0x3ffff pages in one extent (We need bit 0x4000 as a START_EXTENT_BIT)
Increase the calculated 'head_length' with the number of bytes used for extents.
Update row->space_on_head_page also in _ma_bitmap_find_new_place()
Make _ma_bitmap_get_page_bits() global. (Needed for UNDO handling)
Changed _ma_bitmap_flushable() to take MARIA_HA instead of MARIA_SHARE.
This was needed to be able to mark the handler if we had a 'non_flushable' call pending or not.
Don't use 'non_flushable' marking of bitmap pages for not transactional tables.
Added BLOCKUSED_USE_ORG_BITMAP handling also for tail pages.
Added more DBUG_ASSERT() to find possible errors in other code
Some code simplications by adding new local variables
storage/maria/ma_blockrec.c:
UNDO of rows now puts back all part of the row on their original pages and positions.
Changed UNDO of DELETE and UNDO of UPDATE to contain information about the original length of data on head block and also extent information
This changes a lot of logic as now an insert of a row on a page may happen to any position (and not just to the first or next free)
Use PAGE_COUNT to mark if an extent is the start of of a blob. (Needed for extent_to_bitmap_blocks())
Added check_directory() for checking that directroy entries are correct.
Added checking of row checksums when reading rows (with EXTRA_DEBUG)
Added make_space_for_directory() and extend_directory() for doing expansion of directory
Added get_rowpos_in_head_or_tail_page() to be able to store head/tail on original position in UNDO
Added extent_to_bitmap_blocks() to be able to generate original bitmap blocks from UNDO entry
Added _ma_update_at_original_place() for UNDO of DELETES
Added row->min_length to hold minmum required space needed on head page
Changed find_free_position() to use make_space_for_directory()
Changed make_empty_page() to allow optional creation of directory entry
Changed delete_head_or_tail() and _ma_apply_undo_row_isnert() to not copy pagecache block (speed optimization)
Changed _ma_apply_redo_insert_row_head_or_tail() to be able to insert new row at any position on 'new' page
Changed _ma_apply_undo_row_delete() and _ma_apply_undo_row_update() to put row in it's original position
Ensure allocation of tail blocks are of at least MIN_TAIL_SIZE.
Ensure we store pages in pinned pages even if read failed. (If not we will have pages pinned forever in page cache)
Write original extent information in UNDO entry, not compacted ones (we need position to tails!)
When setting BLOCKUSED_USED, don't clear other bits (we have to preserve BLOCKUSED_USE_ORG_BITMAP)
Fixed som bugs in directory handling
Fixed bug where we wrote wrong lsn to blob pages
Added separate blob_buffer for fixing bug when updating row that had char/varchar that spanned several pages and also had blobs
Ensure we call _ma_bitmap_flushable() also in case of errors
When doing an update, first delete old entries, then search in bitmap for where to put new information
Info->s -> share
Rowid -> rowid
More DBUG_ASSERT()
storage/maria/ma_blockrec.h:
Added START_EXTENT_BIT and TAIL_PAGE_COUNT_MARKER
Added _ma_bitmap_wait_or_flush() and _ma_bitmap_get_page_bits()
storage/maria/ma_check.c:
Don't write extra empty line if there is no deleted blocks
Ignore START_EXTENT_BIT's in page count
Call _ma_fast_unlock_key_del() to free key_del link
storage/maria/ma_close.c:
Ensure that used_key_del is 0. (If not, someone forgot to call _ma_unlock_key_del())
storage/maria/ma_create.c:
Changed constant to macro
storage/maria/ma_delete.c:
For deleted keys, log also position to row
storage/maria/ma_extra.c:
Release blob buffer at maria_reset() if bigger than MARIA_SMALL_BLOB_BUFFER
storage/maria/ma_key_recover.c:
Added bzero() of LSN that confused paged cache in case of uninitialized block
Mark file crashed if applying of index changes fails
Added calls to _ma_fast_unlock_key_del() for protection of shared key_del link.
storage/maria/ma_locking.c:
Added usage of MARIA_FILE_OPEN_COUNT_OFFSET
Added _ma_mark_file_crashed()
storage/maria/ma_loghandler.c:
Fixed bug where we logged uninitialized memory
storage/maria/ma_open.c:
Moved state->changed to be at start of state info on disk to allow one to easly mark files as crashed
storage/maria/ma_page.c:
Disable 'dummy' checksumming of pages as this gave false warnings.
(Need to investigate if this is ever needed)
storage/maria/ma_pagecache.c:
Fixed wrong shift that caused Maria to crash on files > 4G
storage/maria/ma_recovery.c:
In case of errors, start writing on new line if we where in %## %## printing mode (Made errors more readable)
Changed global variable name from warnings -> recovery_warnings
Use MARIA_FILE_CREATE_RENAME_LSN_OFFSET instead of constant
Removed special handling of row position for deleted keys. Keys now always includes row positions
_ma_apply_undo_row_delete() now gets page and row position
Added check that we don't loop forever when handling undo's (in case of bug in undo chain)
Print name of failed REDO/UNDO
storage/maria/ma_recovery.h:
Removed old comment
storage/maria/ma_static.c:
Chaned version number of Maria files to not accidently use old ones (becasue of change of ordering of status variables)
storage/maria/ma_test2.c:
Added option -u to specify number of rows to update
Changed old option -u to be -A, as for ma_test1
Fixed bug in update of rows with blobs (before blobs was always reset to empty on update)
First created blob is now of max blob length to ensure we have at least one big blob in the table
storage/maria/ma_test_all.sh:
More tests
storage/maria/ma_test_recovery.expected:
Updated results
storage/maria/ma_test_recovery:
Changed tests to use bigger blobs (not just 1K)
Added new tests that tests recovery of update with blobs
Removed comparision of .MAD file as it's not guranteed that recovery from scratch gives identical data file as original update
(compact_page() may be called at different times during normal execution and during REDO)
storage/maria/ma_update.c:
Simplify code (changed * to if)
storage/maria/maria_chk.c:
Make output more readable
storage/maria/maria_def.h:
Changed 'changed' to int to prepare for more bits
Added 2 more bytes to status information
Added 'st_mara_row->min_length' for storing min length needed on head page
Added 'st_mara_handler->blob_buff & blob_buff_size' for storing blobs
Moved all tunning parameters into one block
Added MARIA_SMALL_BLOB_BUFFER
Added _ma_mark_file_crashed()
storage/myisam/mi_test2.c:
Fixed bug in update of rows with blobs (before blobs was always reset to empty on update)
storage/maria/ma_test_big.sh:
Testing of insert, update, delete, recovery and undo of rows with blobs
Thanks to the random-ness of ma_test2 this is likely to find most bugs in the row handling
2007-12-30 21:40:03 +01:00
|
|
|
block->sub_blocks= 0;
|
2007-01-18 20:38:14 +01:00
|
|
|
block->org_bitmap_value= 0;
|
|
|
|
block->used= 0;
|
2008-01-07 17:54:41 +01:00
|
|
|
DBUG_ASSERT(page + best_area_size < bitmap->pages_covered);
|
2007-01-18 20:38:14 +01:00
|
|
|
DBUG_PRINT("info", ("page: %lu page_count: %u",
|
|
|
|
(ulong) block->page, block->page_count));
|
|
|
|
|
|
|
|
if (best_prefix_area_size)
|
|
|
|
{
|
|
|
|
ulonglong tmp;
|
|
|
|
/* Convert offset back to bits */
|
|
|
|
best_prefix_area_size= 16 - best_prefix_area_size;
|
|
|
|
if (best_area_size < best_prefix_area_size)
|
|
|
|
{
|
|
|
|
tmp= (LL(1) << best_area_size*3) - 1;
|
|
|
|
best_area_size= best_prefix_area_size; /* for easy end test */
|
|
|
|
}
|
|
|
|
else
|
|
|
|
tmp= (LL(1) << best_prefix_area_size*3) - 1;
|
|
|
|
tmp<<= (16 - best_prefix_area_size) * 3;
|
|
|
|
DBUG_ASSERT((best_prefix_bits & tmp) == 0);
|
|
|
|
best_prefix_bits|= tmp;
|
|
|
|
int6store(best_data, best_prefix_bits);
|
|
|
|
if (!(best_area_size-= best_prefix_area_size))
|
2011-06-24 11:08:45 +02:00
|
|
|
goto end;
|
2007-01-18 20:38:14 +01:00
|
|
|
best_data+= 6;
|
|
|
|
}
|
|
|
|
best_area_size*= 3; /* Bits to set */
|
|
|
|
size= best_area_size/8; /* Bytes to set */
|
|
|
|
bfill(best_data, size, 255);
|
|
|
|
best_data+= size;
|
|
|
|
if ((best_area_size-= size * 8))
|
|
|
|
{
|
2007-07-02 19:45:15 +02:00
|
|
|
/* fill last uchar */
|
2007-01-18 20:38:14 +01:00
|
|
|
*best_data|= (uchar) ((1 << best_area_size) -1);
|
|
|
|
best_data++;
|
|
|
|
}
|
|
|
|
if (data_end < best_data)
|
2008-01-07 17:54:41 +01:00
|
|
|
{
|
2007-01-18 20:38:14 +01:00
|
|
|
bitmap->used_size= (uint) (best_data - bitmap->map);
|
2008-01-07 17:54:41 +01:00
|
|
|
DBUG_ASSERT(bitmap->used_size <= bitmap->total_size);
|
|
|
|
}
|
2011-06-24 11:08:45 +02:00
|
|
|
end:
|
2007-01-18 20:38:14 +01:00
|
|
|
bitmap->changed= 1;
|
2007-10-19 23:24:22 +02:00
|
|
|
DBUG_EXECUTE("bitmap", _ma_print_bitmap_changes(bitmap););
|
2007-01-18 20:38:14 +01:00
|
|
|
DBUG_RETURN(block->page_count);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/****************************************************************************
|
|
|
|
Find right bitmaps where to store data
|
|
|
|
****************************************************************************/
|
|
|
|
|
|
|
|
/*
|
|
|
|
Find right bitmap and position for head block
|
|
|
|
|
2007-04-19 12:18:56 +02:00
|
|
|
SYNOPSIS
|
|
|
|
find_head()
|
|
|
|
info Maria handler
|
|
|
|
length Size of data region we need store
|
|
|
|
position Position in bitmap_blocks where to store the
|
|
|
|
information for the head block.
|
|
|
|
|
2007-01-18 20:38:14 +01:00
|
|
|
RETURN
|
|
|
|
0 ok
|
|
|
|
1 error
|
|
|
|
*/
|
|
|
|
|
|
|
|
static my_bool find_head(MARIA_HA *info, uint length, uint position)
|
|
|
|
{
|
|
|
|
MARIA_FILE_BITMAP *bitmap= &info->s->bitmap;
|
|
|
|
MARIA_BITMAP_BLOCK *block;
|
2007-04-19 12:18:56 +02:00
|
|
|
/*
|
|
|
|
There is always place for the head block in bitmap_blocks as these are
|
|
|
|
preallocated at _ma_init_block_record().
|
|
|
|
*/
|
2007-01-18 20:38:14 +01:00
|
|
|
block= dynamic_element(&info->bitmap_blocks, position, MARIA_BITMAP_BLOCK *);
|
|
|
|
|
2008-01-07 17:54:41 +01:00
|
|
|
/*
|
|
|
|
We need to have DIRENTRY_SIZE here to take into account that we may
|
|
|
|
need an extra directory entry for the row
|
|
|
|
*/
|
|
|
|
while (allocate_head(bitmap, length + DIR_ENTRY_SIZE, block))
|
2007-01-18 20:38:14 +01:00
|
|
|
if (move_to_next_bitmap(info, bitmap))
|
|
|
|
return 1;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
Find right bitmap and position for tail
|
|
|
|
|
2007-04-19 12:18:56 +02:00
|
|
|
SYNOPSIS
|
|
|
|
find_tail()
|
|
|
|
info Maria handler
|
|
|
|
length Size of data region we need store
|
|
|
|
position Position in bitmap_blocks where to store the
|
|
|
|
information for the head block.
|
|
|
|
|
2007-01-18 20:38:14 +01:00
|
|
|
RETURN
|
|
|
|
0 ok
|
|
|
|
1 error
|
|
|
|
*/
|
|
|
|
|
|
|
|
static my_bool find_tail(MARIA_HA *info, uint length, uint position)
|
|
|
|
{
|
|
|
|
MARIA_FILE_BITMAP *bitmap= &info->s->bitmap;
|
|
|
|
MARIA_BITMAP_BLOCK *block;
|
|
|
|
DBUG_ENTER("find_tail");
|
2008-01-07 17:54:41 +01:00
|
|
|
DBUG_ASSERT(length <= info->s->block_size - PAGE_OVERHEAD_SIZE);
|
2007-01-18 20:38:14 +01:00
|
|
|
|
|
|
|
/* Needed, as there is no error checking in dynamic_element */
|
|
|
|
if (allocate_dynamic(&info->bitmap_blocks, position))
|
|
|
|
DBUG_RETURN(1);
|
|
|
|
block= dynamic_element(&info->bitmap_blocks, position, MARIA_BITMAP_BLOCK *);
|
|
|
|
|
2008-01-07 17:54:41 +01:00
|
|
|
/*
|
|
|
|
We have to add DIR_ENTRY_SIZE to ensure we have space for the tail and
|
|
|
|
it's directroy entry on the page
|
|
|
|
*/
|
|
|
|
while (allocate_tail(bitmap, length + DIR_ENTRY_SIZE, block))
|
2007-01-18 20:38:14 +01:00
|
|
|
if (move_to_next_bitmap(info, bitmap))
|
|
|
|
DBUG_RETURN(1);
|
|
|
|
DBUG_RETURN(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
Find right bitmap and position for full blocks in one extent
|
|
|
|
|
2007-04-19 12:18:56 +02:00
|
|
|
SYNOPSIS
|
|
|
|
find_mid()
|
|
|
|
info Maria handler.
|
|
|
|
pages How many pages to allocate.
|
|
|
|
position Position in bitmap_blocks where to store the
|
|
|
|
information for the head block.
|
2007-01-18 20:38:14 +01:00
|
|
|
NOTES
|
|
|
|
This is used to allocate the main extent after the 'head' block
|
2007-04-19 12:18:56 +02:00
|
|
|
(Ie, the middle part of the head-middle-tail entry)
|
2007-01-18 20:38:14 +01:00
|
|
|
|
|
|
|
RETURN
|
|
|
|
0 ok
|
|
|
|
1 error
|
|
|
|
*/
|
|
|
|
|
|
|
|
static my_bool find_mid(MARIA_HA *info, ulong pages, uint position)
|
|
|
|
{
|
|
|
|
MARIA_FILE_BITMAP *bitmap= &info->s->bitmap;
|
|
|
|
MARIA_BITMAP_BLOCK *block;
|
|
|
|
block= dynamic_element(&info->bitmap_blocks, position, MARIA_BITMAP_BLOCK *);
|
|
|
|
|
2007-04-05 13:38:05 +02:00
|
|
|
while (!allocate_full_pages(bitmap, pages, block, 1))
|
2007-01-18 20:38:14 +01:00
|
|
|
{
|
|
|
|
if (move_to_next_bitmap(info, bitmap))
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
Find right bitmap and position for putting a blob
|
|
|
|
|
2007-04-19 12:18:56 +02:00
|
|
|
SYNOPSIS
|
|
|
|
find_blob()
|
|
|
|
info Maria handler.
|
|
|
|
length Length of the blob
|
|
|
|
|
2007-01-18 20:38:14 +01:00
|
|
|
NOTES
|
|
|
|
The extents are stored last in info->bitmap_blocks
|
|
|
|
|
|
|
|
IMPLEMENTATION
|
|
|
|
Allocate all full pages for the block + optionally one tail
|
|
|
|
|
|
|
|
RETURN
|
|
|
|
0 ok
|
|
|
|
1 error
|
|
|
|
*/
|
|
|
|
|
|
|
|
static my_bool find_blob(MARIA_HA *info, ulong length)
|
|
|
|
{
|
|
|
|
MARIA_FILE_BITMAP *bitmap= &info->s->bitmap;
|
|
|
|
uint full_page_size= FULL_PAGE_SIZE(info->s->block_size);
|
|
|
|
ulong pages;
|
|
|
|
uint rest_length, used;
|
|
|
|
uint first_block_pos;
|
|
|
|
MARIA_BITMAP_BLOCK *first_block= 0;
|
|
|
|
DBUG_ENTER("find_blob");
|
|
|
|
DBUG_PRINT("enter", ("length: %lu", length));
|
2007-10-11 17:45:42 +02:00
|
|
|
LINT_INIT(first_block_pos);
|
2007-01-18 20:38:14 +01:00
|
|
|
|
|
|
|
pages= length / full_page_size;
|
|
|
|
rest_length= (uint) (length - pages * full_page_size);
|
|
|
|
if (rest_length >= MAX_TAIL_SIZE(info->s->block_size))
|
|
|
|
{
|
|
|
|
pages++;
|
|
|
|
rest_length= 0;
|
|
|
|
}
|
|
|
|
|
UNDO of rows now puts back all part of the row on their original pages and positions
Added variable _dbug_on_ to speed up execution when DBUG is not going to be used
Added --debug-on option to mysqld (to be able to turn of DBUG with --debug-on=0)
Fixed some bugs with 'non_flushable' marking of bitmap pages
Don't use 'non_flushable' marking of bitmap pages for not transactional tables
SHOW CREATE TABLE now shows if table was created with page checksums
Fixed a lot of bugs with BLOB handling in case of update/REDO and UNDO
More tests (especially for blobs) and DBUG_ASSERTS()
More readable output from maria_read_log and maria_chk
Fixed wrong shift that caused Maria to crash on files > 4G
Mark tables as crashed of REDO fails
dbug/dbug.c:
Changed to use my_bool (allowed me to remove some windows specific code)
Added variable _dbug_on_ to speed up execution when DBUG is not going to be used
Removed initialization of variables if not needed
include/my_dbug.h:
Use my_bool for some functions that was defined as BOOLEAN in dbug.c code
Added DBUGGER_ON/DEBUGGER_OFF to speed up execution when DBUG is not used
include/my_global.h:
Define my_bool early
Increase MY_HOW_OFTEN_TO_WRITE as computers are now faster than 10 years ago
mysql-test/mysql-test-run.pl:
Added debug-on=0 to speed up tests
mysql-test/r/maria-recovery.result:
Added new test by Guilhem to test if UNDO_ROW_DELETE preserves rowid
mysql-test/r/maria.result:
Added testing of page checksums
mysql-test/t/crash_commit_before-master.opt:
Added --debug-on as test require DBUG to work
mysql-test/t/maria-recovery-bitmap-master.opt:
Added --debug-on as test require DBUG to work
mysql-test/t/maria-recovery-master.opt:
Added --debug-on as test require DBUG to work
mysql-test/t/maria-recovery.test:
Added new test by Guilhem to test if UNDO_ROW_DELETE preserves rowid
mysql-test/t/maria.test:
Added testing of page checksums
sql/mysqld.cc:
Added --debug-on option (to be able to turn of DBUG with --debug-on=0)
Indentation fixes
Removed end spaces
sql/sql_show.cc:
Allow update_create_info() to inform MySQL if PACK_KEYS, NO_PACK_KEYS, CHECKSUM, PAGE_CHECKSUM or DELAY_KEY_WRITE is used
storage/maria/Makefile.am:
Added ma_test_big.sh
storage/maria/ha_maria.cc:
Store in create_info if page checksums are used (For SHOW CREATE TABLE)
storage/maria/ma_bitmap.c:
Added _ma_bitmap_wait_or_flush() to cause reader of bitmap pages to wait with reading until bitmap is flushed.
Use TAIL_PAGE_COUNT_MARKER for tail pages
Set 'sub_blocks' for and only for the head page or for the first extent of a blob. This is needed for store_extent_info() to be able to set START_EXTENT_BIT's
Don't allocate more than 0x3ffff pages in one extent (We need bit 0x4000 as a START_EXTENT_BIT)
Increase the calculated 'head_length' with the number of bytes used for extents.
Update row->space_on_head_page also in _ma_bitmap_find_new_place()
Make _ma_bitmap_get_page_bits() global. (Needed for UNDO handling)
Changed _ma_bitmap_flushable() to take MARIA_HA instead of MARIA_SHARE.
This was needed to be able to mark the handler if we had a 'non_flushable' call pending or not.
Don't use 'non_flushable' marking of bitmap pages for not transactional tables.
Added BLOCKUSED_USE_ORG_BITMAP handling also for tail pages.
Added more DBUG_ASSERT() to find possible errors in other code
Some code simplications by adding new local variables
storage/maria/ma_blockrec.c:
UNDO of rows now puts back all part of the row on their original pages and positions.
Changed UNDO of DELETE and UNDO of UPDATE to contain information about the original length of data on head block and also extent information
This changes a lot of logic as now an insert of a row on a page may happen to any position (and not just to the first or next free)
Use PAGE_COUNT to mark if an extent is the start of of a blob. (Needed for extent_to_bitmap_blocks())
Added check_directory() for checking that directroy entries are correct.
Added checking of row checksums when reading rows (with EXTRA_DEBUG)
Added make_space_for_directory() and extend_directory() for doing expansion of directory
Added get_rowpos_in_head_or_tail_page() to be able to store head/tail on original position in UNDO
Added extent_to_bitmap_blocks() to be able to generate original bitmap blocks from UNDO entry
Added _ma_update_at_original_place() for UNDO of DELETES
Added row->min_length to hold minmum required space needed on head page
Changed find_free_position() to use make_space_for_directory()
Changed make_empty_page() to allow optional creation of directory entry
Changed delete_head_or_tail() and _ma_apply_undo_row_isnert() to not copy pagecache block (speed optimization)
Changed _ma_apply_redo_insert_row_head_or_tail() to be able to insert new row at any position on 'new' page
Changed _ma_apply_undo_row_delete() and _ma_apply_undo_row_update() to put row in it's original position
Ensure allocation of tail blocks are of at least MIN_TAIL_SIZE.
Ensure we store pages in pinned pages even if read failed. (If not we will have pages pinned forever in page cache)
Write original extent information in UNDO entry, not compacted ones (we need position to tails!)
When setting BLOCKUSED_USED, don't clear other bits (we have to preserve BLOCKUSED_USE_ORG_BITMAP)
Fixed som bugs in directory handling
Fixed bug where we wrote wrong lsn to blob pages
Added separate blob_buffer for fixing bug when updating row that had char/varchar that spanned several pages and also had blobs
Ensure we call _ma_bitmap_flushable() also in case of errors
When doing an update, first delete old entries, then search in bitmap for where to put new information
Info->s -> share
Rowid -> rowid
More DBUG_ASSERT()
storage/maria/ma_blockrec.h:
Added START_EXTENT_BIT and TAIL_PAGE_COUNT_MARKER
Added _ma_bitmap_wait_or_flush() and _ma_bitmap_get_page_bits()
storage/maria/ma_check.c:
Don't write extra empty line if there is no deleted blocks
Ignore START_EXTENT_BIT's in page count
Call _ma_fast_unlock_key_del() to free key_del link
storage/maria/ma_close.c:
Ensure that used_key_del is 0. (If not, someone forgot to call _ma_unlock_key_del())
storage/maria/ma_create.c:
Changed constant to macro
storage/maria/ma_delete.c:
For deleted keys, log also position to row
storage/maria/ma_extra.c:
Release blob buffer at maria_reset() if bigger than MARIA_SMALL_BLOB_BUFFER
storage/maria/ma_key_recover.c:
Added bzero() of LSN that confused paged cache in case of uninitialized block
Mark file crashed if applying of index changes fails
Added calls to _ma_fast_unlock_key_del() for protection of shared key_del link.
storage/maria/ma_locking.c:
Added usage of MARIA_FILE_OPEN_COUNT_OFFSET
Added _ma_mark_file_crashed()
storage/maria/ma_loghandler.c:
Fixed bug where we logged uninitialized memory
storage/maria/ma_open.c:
Moved state->changed to be at start of state info on disk to allow one to easly mark files as crashed
storage/maria/ma_page.c:
Disable 'dummy' checksumming of pages as this gave false warnings.
(Need to investigate if this is ever needed)
storage/maria/ma_pagecache.c:
Fixed wrong shift that caused Maria to crash on files > 4G
storage/maria/ma_recovery.c:
In case of errors, start writing on new line if we where in %## %## printing mode (Made errors more readable)
Changed global variable name from warnings -> recovery_warnings
Use MARIA_FILE_CREATE_RENAME_LSN_OFFSET instead of constant
Removed special handling of row position for deleted keys. Keys now always includes row positions
_ma_apply_undo_row_delete() now gets page and row position
Added check that we don't loop forever when handling undo's (in case of bug in undo chain)
Print name of failed REDO/UNDO
storage/maria/ma_recovery.h:
Removed old comment
storage/maria/ma_static.c:
Chaned version number of Maria files to not accidently use old ones (becasue of change of ordering of status variables)
storage/maria/ma_test2.c:
Added option -u to specify number of rows to update
Changed old option -u to be -A, as for ma_test1
Fixed bug in update of rows with blobs (before blobs was always reset to empty on update)
First created blob is now of max blob length to ensure we have at least one big blob in the table
storage/maria/ma_test_all.sh:
More tests
storage/maria/ma_test_recovery.expected:
Updated results
storage/maria/ma_test_recovery:
Changed tests to use bigger blobs (not just 1K)
Added new tests that tests recovery of update with blobs
Removed comparision of .MAD file as it's not guranteed that recovery from scratch gives identical data file as original update
(compact_page() may be called at different times during normal execution and during REDO)
storage/maria/ma_update.c:
Simplify code (changed * to if)
storage/maria/maria_chk.c:
Make output more readable
storage/maria/maria_def.h:
Changed 'changed' to int to prepare for more bits
Added 2 more bytes to status information
Added 'st_mara_row->min_length' for storing min length needed on head page
Added 'st_mara_handler->blob_buff & blob_buff_size' for storing blobs
Moved all tunning parameters into one block
Added MARIA_SMALL_BLOB_BUFFER
Added _ma_mark_file_crashed()
storage/myisam/mi_test2.c:
Fixed bug in update of rows with blobs (before blobs was always reset to empty on update)
storage/maria/ma_test_big.sh:
Testing of insert, update, delete, recovery and undo of rows with blobs
Thanks to the random-ness of ma_test2 this is likely to find most bugs in the row handling
2007-12-30 21:40:03 +01:00
|
|
|
first_block_pos= info->bitmap_blocks.elements;
|
2007-01-18 20:38:14 +01:00
|
|
|
if (pages)
|
|
|
|
{
|
|
|
|
MARIA_BITMAP_BLOCK *block;
|
|
|
|
if (allocate_dynamic(&info->bitmap_blocks,
|
|
|
|
info->bitmap_blocks.elements +
|
|
|
|
pages / BLOB_SEGMENT_MIN_SIZE + 2))
|
|
|
|
DBUG_RETURN(1);
|
|
|
|
block= dynamic_element(&info->bitmap_blocks, info->bitmap_blocks.elements,
|
|
|
|
MARIA_BITMAP_BLOCK*);
|
|
|
|
do
|
|
|
|
{
|
UNDO of rows now puts back all part of the row on their original pages and positions
Added variable _dbug_on_ to speed up execution when DBUG is not going to be used
Added --debug-on option to mysqld (to be able to turn of DBUG with --debug-on=0)
Fixed some bugs with 'non_flushable' marking of bitmap pages
Don't use 'non_flushable' marking of bitmap pages for not transactional tables
SHOW CREATE TABLE now shows if table was created with page checksums
Fixed a lot of bugs with BLOB handling in case of update/REDO and UNDO
More tests (especially for blobs) and DBUG_ASSERTS()
More readable output from maria_read_log and maria_chk
Fixed wrong shift that caused Maria to crash on files > 4G
Mark tables as crashed of REDO fails
dbug/dbug.c:
Changed to use my_bool (allowed me to remove some windows specific code)
Added variable _dbug_on_ to speed up execution when DBUG is not going to be used
Removed initialization of variables if not needed
include/my_dbug.h:
Use my_bool for some functions that was defined as BOOLEAN in dbug.c code
Added DBUGGER_ON/DEBUGGER_OFF to speed up execution when DBUG is not used
include/my_global.h:
Define my_bool early
Increase MY_HOW_OFTEN_TO_WRITE as computers are now faster than 10 years ago
mysql-test/mysql-test-run.pl:
Added debug-on=0 to speed up tests
mysql-test/r/maria-recovery.result:
Added new test by Guilhem to test if UNDO_ROW_DELETE preserves rowid
mysql-test/r/maria.result:
Added testing of page checksums
mysql-test/t/crash_commit_before-master.opt:
Added --debug-on as test require DBUG to work
mysql-test/t/maria-recovery-bitmap-master.opt:
Added --debug-on as test require DBUG to work
mysql-test/t/maria-recovery-master.opt:
Added --debug-on as test require DBUG to work
mysql-test/t/maria-recovery.test:
Added new test by Guilhem to test if UNDO_ROW_DELETE preserves rowid
mysql-test/t/maria.test:
Added testing of page checksums
sql/mysqld.cc:
Added --debug-on option (to be able to turn of DBUG with --debug-on=0)
Indentation fixes
Removed end spaces
sql/sql_show.cc:
Allow update_create_info() to inform MySQL if PACK_KEYS, NO_PACK_KEYS, CHECKSUM, PAGE_CHECKSUM or DELAY_KEY_WRITE is used
storage/maria/Makefile.am:
Added ma_test_big.sh
storage/maria/ha_maria.cc:
Store in create_info if page checksums are used (For SHOW CREATE TABLE)
storage/maria/ma_bitmap.c:
Added _ma_bitmap_wait_or_flush() to cause reader of bitmap pages to wait with reading until bitmap is flushed.
Use TAIL_PAGE_COUNT_MARKER for tail pages
Set 'sub_blocks' for and only for the head page or for the first extent of a blob. This is needed for store_extent_info() to be able to set START_EXTENT_BIT's
Don't allocate more than 0x3ffff pages in one extent (We need bit 0x4000 as a START_EXTENT_BIT)
Increase the calculated 'head_length' with the number of bytes used for extents.
Update row->space_on_head_page also in _ma_bitmap_find_new_place()
Make _ma_bitmap_get_page_bits() global. (Needed for UNDO handling)
Changed _ma_bitmap_flushable() to take MARIA_HA instead of MARIA_SHARE.
This was needed to be able to mark the handler if we had a 'non_flushable' call pending or not.
Don't use 'non_flushable' marking of bitmap pages for not transactional tables.
Added BLOCKUSED_USE_ORG_BITMAP handling also for tail pages.
Added more DBUG_ASSERT() to find possible errors in other code
Some code simplications by adding new local variables
storage/maria/ma_blockrec.c:
UNDO of rows now puts back all part of the row on their original pages and positions.
Changed UNDO of DELETE and UNDO of UPDATE to contain information about the original length of data on head block and also extent information
This changes a lot of logic as now an insert of a row on a page may happen to any position (and not just to the first or next free)
Use PAGE_COUNT to mark if an extent is the start of of a blob. (Needed for extent_to_bitmap_blocks())
Added check_directory() for checking that directroy entries are correct.
Added checking of row checksums when reading rows (with EXTRA_DEBUG)
Added make_space_for_directory() and extend_directory() for doing expansion of directory
Added get_rowpos_in_head_or_tail_page() to be able to store head/tail on original position in UNDO
Added extent_to_bitmap_blocks() to be able to generate original bitmap blocks from UNDO entry
Added _ma_update_at_original_place() for UNDO of DELETES
Added row->min_length to hold minmum required space needed on head page
Changed find_free_position() to use make_space_for_directory()
Changed make_empty_page() to allow optional creation of directory entry
Changed delete_head_or_tail() and _ma_apply_undo_row_isnert() to not copy pagecache block (speed optimization)
Changed _ma_apply_redo_insert_row_head_or_tail() to be able to insert new row at any position on 'new' page
Changed _ma_apply_undo_row_delete() and _ma_apply_undo_row_update() to put row in it's original position
Ensure allocation of tail blocks are of at least MIN_TAIL_SIZE.
Ensure we store pages in pinned pages even if read failed. (If not we will have pages pinned forever in page cache)
Write original extent information in UNDO entry, not compacted ones (we need position to tails!)
When setting BLOCKUSED_USED, don't clear other bits (we have to preserve BLOCKUSED_USE_ORG_BITMAP)
Fixed som bugs in directory handling
Fixed bug where we wrote wrong lsn to blob pages
Added separate blob_buffer for fixing bug when updating row that had char/varchar that spanned several pages and also had blobs
Ensure we call _ma_bitmap_flushable() also in case of errors
When doing an update, first delete old entries, then search in bitmap for where to put new information
Info->s -> share
Rowid -> rowid
More DBUG_ASSERT()
storage/maria/ma_blockrec.h:
Added START_EXTENT_BIT and TAIL_PAGE_COUNT_MARKER
Added _ma_bitmap_wait_or_flush() and _ma_bitmap_get_page_bits()
storage/maria/ma_check.c:
Don't write extra empty line if there is no deleted blocks
Ignore START_EXTENT_BIT's in page count
Call _ma_fast_unlock_key_del() to free key_del link
storage/maria/ma_close.c:
Ensure that used_key_del is 0. (If not, someone forgot to call _ma_unlock_key_del())
storage/maria/ma_create.c:
Changed constant to macro
storage/maria/ma_delete.c:
For deleted keys, log also position to row
storage/maria/ma_extra.c:
Release blob buffer at maria_reset() if bigger than MARIA_SMALL_BLOB_BUFFER
storage/maria/ma_key_recover.c:
Added bzero() of LSN that confused paged cache in case of uninitialized block
Mark file crashed if applying of index changes fails
Added calls to _ma_fast_unlock_key_del() for protection of shared key_del link.
storage/maria/ma_locking.c:
Added usage of MARIA_FILE_OPEN_COUNT_OFFSET
Added _ma_mark_file_crashed()
storage/maria/ma_loghandler.c:
Fixed bug where we logged uninitialized memory
storage/maria/ma_open.c:
Moved state->changed to be at start of state info on disk to allow one to easly mark files as crashed
storage/maria/ma_page.c:
Disable 'dummy' checksumming of pages as this gave false warnings.
(Need to investigate if this is ever needed)
storage/maria/ma_pagecache.c:
Fixed wrong shift that caused Maria to crash on files > 4G
storage/maria/ma_recovery.c:
In case of errors, start writing on new line if we where in %## %## printing mode (Made errors more readable)
Changed global variable name from warnings -> recovery_warnings
Use MARIA_FILE_CREATE_RENAME_LSN_OFFSET instead of constant
Removed special handling of row position for deleted keys. Keys now always includes row positions
_ma_apply_undo_row_delete() now gets page and row position
Added check that we don't loop forever when handling undo's (in case of bug in undo chain)
Print name of failed REDO/UNDO
storage/maria/ma_recovery.h:
Removed old comment
storage/maria/ma_static.c:
Chaned version number of Maria files to not accidently use old ones (becasue of change of ordering of status variables)
storage/maria/ma_test2.c:
Added option -u to specify number of rows to update
Changed old option -u to be -A, as for ma_test1
Fixed bug in update of rows with blobs (before blobs was always reset to empty on update)
First created blob is now of max blob length to ensure we have at least one big blob in the table
storage/maria/ma_test_all.sh:
More tests
storage/maria/ma_test_recovery.expected:
Updated results
storage/maria/ma_test_recovery:
Changed tests to use bigger blobs (not just 1K)
Added new tests that tests recovery of update with blobs
Removed comparision of .MAD file as it's not guranteed that recovery from scratch gives identical data file as original update
(compact_page() may be called at different times during normal execution and during REDO)
storage/maria/ma_update.c:
Simplify code (changed * to if)
storage/maria/maria_chk.c:
Make output more readable
storage/maria/maria_def.h:
Changed 'changed' to int to prepare for more bits
Added 2 more bytes to status information
Added 'st_mara_row->min_length' for storing min length needed on head page
Added 'st_mara_handler->blob_buff & blob_buff_size' for storing blobs
Moved all tunning parameters into one block
Added MARIA_SMALL_BLOB_BUFFER
Added _ma_mark_file_crashed()
storage/myisam/mi_test2.c:
Fixed bug in update of rows with blobs (before blobs was always reset to empty on update)
storage/maria/ma_test_big.sh:
Testing of insert, update, delete, recovery and undo of rows with blobs
Thanks to the random-ness of ma_test2 this is likely to find most bugs in the row handling
2007-12-30 21:40:03 +01:00
|
|
|
/*
|
|
|
|
We use 0x3fff here as the two upmost bits are reserved for
|
|
|
|
TAIL_BIT and START_EXTENT_BIT
|
|
|
|
*/
|
2007-01-18 20:38:14 +01:00
|
|
|
used= allocate_full_pages(bitmap,
|
UNDO of rows now puts back all part of the row on their original pages and positions
Added variable _dbug_on_ to speed up execution when DBUG is not going to be used
Added --debug-on option to mysqld (to be able to turn of DBUG with --debug-on=0)
Fixed some bugs with 'non_flushable' marking of bitmap pages
Don't use 'non_flushable' marking of bitmap pages for not transactional tables
SHOW CREATE TABLE now shows if table was created with page checksums
Fixed a lot of bugs with BLOB handling in case of update/REDO and UNDO
More tests (especially for blobs) and DBUG_ASSERTS()
More readable output from maria_read_log and maria_chk
Fixed wrong shift that caused Maria to crash on files > 4G
Mark tables as crashed of REDO fails
dbug/dbug.c:
Changed to use my_bool (allowed me to remove some windows specific code)
Added variable _dbug_on_ to speed up execution when DBUG is not going to be used
Removed initialization of variables if not needed
include/my_dbug.h:
Use my_bool for some functions that was defined as BOOLEAN in dbug.c code
Added DBUGGER_ON/DEBUGGER_OFF to speed up execution when DBUG is not used
include/my_global.h:
Define my_bool early
Increase MY_HOW_OFTEN_TO_WRITE as computers are now faster than 10 years ago
mysql-test/mysql-test-run.pl:
Added debug-on=0 to speed up tests
mysql-test/r/maria-recovery.result:
Added new test by Guilhem to test if UNDO_ROW_DELETE preserves rowid
mysql-test/r/maria.result:
Added testing of page checksums
mysql-test/t/crash_commit_before-master.opt:
Added --debug-on as test require DBUG to work
mysql-test/t/maria-recovery-bitmap-master.opt:
Added --debug-on as test require DBUG to work
mysql-test/t/maria-recovery-master.opt:
Added --debug-on as test require DBUG to work
mysql-test/t/maria-recovery.test:
Added new test by Guilhem to test if UNDO_ROW_DELETE preserves rowid
mysql-test/t/maria.test:
Added testing of page checksums
sql/mysqld.cc:
Added --debug-on option (to be able to turn of DBUG with --debug-on=0)
Indentation fixes
Removed end spaces
sql/sql_show.cc:
Allow update_create_info() to inform MySQL if PACK_KEYS, NO_PACK_KEYS, CHECKSUM, PAGE_CHECKSUM or DELAY_KEY_WRITE is used
storage/maria/Makefile.am:
Added ma_test_big.sh
storage/maria/ha_maria.cc:
Store in create_info if page checksums are used (For SHOW CREATE TABLE)
storage/maria/ma_bitmap.c:
Added _ma_bitmap_wait_or_flush() to cause reader of bitmap pages to wait with reading until bitmap is flushed.
Use TAIL_PAGE_COUNT_MARKER for tail pages
Set 'sub_blocks' for and only for the head page or for the first extent of a blob. This is needed for store_extent_info() to be able to set START_EXTENT_BIT's
Don't allocate more than 0x3ffff pages in one extent (We need bit 0x4000 as a START_EXTENT_BIT)
Increase the calculated 'head_length' with the number of bytes used for extents.
Update row->space_on_head_page also in _ma_bitmap_find_new_place()
Make _ma_bitmap_get_page_bits() global. (Needed for UNDO handling)
Changed _ma_bitmap_flushable() to take MARIA_HA instead of MARIA_SHARE.
This was needed to be able to mark the handler if we had a 'non_flushable' call pending or not.
Don't use 'non_flushable' marking of bitmap pages for not transactional tables.
Added BLOCKUSED_USE_ORG_BITMAP handling also for tail pages.
Added more DBUG_ASSERT() to find possible errors in other code
Some code simplications by adding new local variables
storage/maria/ma_blockrec.c:
UNDO of rows now puts back all part of the row on their original pages and positions.
Changed UNDO of DELETE and UNDO of UPDATE to contain information about the original length of data on head block and also extent information
This changes a lot of logic as now an insert of a row on a page may happen to any position (and not just to the first or next free)
Use PAGE_COUNT to mark if an extent is the start of of a blob. (Needed for extent_to_bitmap_blocks())
Added check_directory() for checking that directroy entries are correct.
Added checking of row checksums when reading rows (with EXTRA_DEBUG)
Added make_space_for_directory() and extend_directory() for doing expansion of directory
Added get_rowpos_in_head_or_tail_page() to be able to store head/tail on original position in UNDO
Added extent_to_bitmap_blocks() to be able to generate original bitmap blocks from UNDO entry
Added _ma_update_at_original_place() for UNDO of DELETES
Added row->min_length to hold minmum required space needed on head page
Changed find_free_position() to use make_space_for_directory()
Changed make_empty_page() to allow optional creation of directory entry
Changed delete_head_or_tail() and _ma_apply_undo_row_isnert() to not copy pagecache block (speed optimization)
Changed _ma_apply_redo_insert_row_head_or_tail() to be able to insert new row at any position on 'new' page
Changed _ma_apply_undo_row_delete() and _ma_apply_undo_row_update() to put row in it's original position
Ensure allocation of tail blocks are of at least MIN_TAIL_SIZE.
Ensure we store pages in pinned pages even if read failed. (If not we will have pages pinned forever in page cache)
Write original extent information in UNDO entry, not compacted ones (we need position to tails!)
When setting BLOCKUSED_USED, don't clear other bits (we have to preserve BLOCKUSED_USE_ORG_BITMAP)
Fixed som bugs in directory handling
Fixed bug where we wrote wrong lsn to blob pages
Added separate blob_buffer for fixing bug when updating row that had char/varchar that spanned several pages and also had blobs
Ensure we call _ma_bitmap_flushable() also in case of errors
When doing an update, first delete old entries, then search in bitmap for where to put new information
Info->s -> share
Rowid -> rowid
More DBUG_ASSERT()
storage/maria/ma_blockrec.h:
Added START_EXTENT_BIT and TAIL_PAGE_COUNT_MARKER
Added _ma_bitmap_wait_or_flush() and _ma_bitmap_get_page_bits()
storage/maria/ma_check.c:
Don't write extra empty line if there is no deleted blocks
Ignore START_EXTENT_BIT's in page count
Call _ma_fast_unlock_key_del() to free key_del link
storage/maria/ma_close.c:
Ensure that used_key_del is 0. (If not, someone forgot to call _ma_unlock_key_del())
storage/maria/ma_create.c:
Changed constant to macro
storage/maria/ma_delete.c:
For deleted keys, log also position to row
storage/maria/ma_extra.c:
Release blob buffer at maria_reset() if bigger than MARIA_SMALL_BLOB_BUFFER
storage/maria/ma_key_recover.c:
Added bzero() of LSN that confused paged cache in case of uninitialized block
Mark file crashed if applying of index changes fails
Added calls to _ma_fast_unlock_key_del() for protection of shared key_del link.
storage/maria/ma_locking.c:
Added usage of MARIA_FILE_OPEN_COUNT_OFFSET
Added _ma_mark_file_crashed()
storage/maria/ma_loghandler.c:
Fixed bug where we logged uninitialized memory
storage/maria/ma_open.c:
Moved state->changed to be at start of state info on disk to allow one to easly mark files as crashed
storage/maria/ma_page.c:
Disable 'dummy' checksumming of pages as this gave false warnings.
(Need to investigate if this is ever needed)
storage/maria/ma_pagecache.c:
Fixed wrong shift that caused Maria to crash on files > 4G
storage/maria/ma_recovery.c:
In case of errors, start writing on new line if we where in %## %## printing mode (Made errors more readable)
Changed global variable name from warnings -> recovery_warnings
Use MARIA_FILE_CREATE_RENAME_LSN_OFFSET instead of constant
Removed special handling of row position for deleted keys. Keys now always includes row positions
_ma_apply_undo_row_delete() now gets page and row position
Added check that we don't loop forever when handling undo's (in case of bug in undo chain)
Print name of failed REDO/UNDO
storage/maria/ma_recovery.h:
Removed old comment
storage/maria/ma_static.c:
Chaned version number of Maria files to not accidently use old ones (becasue of change of ordering of status variables)
storage/maria/ma_test2.c:
Added option -u to specify number of rows to update
Changed old option -u to be -A, as for ma_test1
Fixed bug in update of rows with blobs (before blobs was always reset to empty on update)
First created blob is now of max blob length to ensure we have at least one big blob in the table
storage/maria/ma_test_all.sh:
More tests
storage/maria/ma_test_recovery.expected:
Updated results
storage/maria/ma_test_recovery:
Changed tests to use bigger blobs (not just 1K)
Added new tests that tests recovery of update with blobs
Removed comparision of .MAD file as it's not guranteed that recovery from scratch gives identical data file as original update
(compact_page() may be called at different times during normal execution and during REDO)
storage/maria/ma_update.c:
Simplify code (changed * to if)
storage/maria/maria_chk.c:
Make output more readable
storage/maria/maria_def.h:
Changed 'changed' to int to prepare for more bits
Added 2 more bytes to status information
Added 'st_mara_row->min_length' for storing min length needed on head page
Added 'st_mara_handler->blob_buff & blob_buff_size' for storing blobs
Moved all tunning parameters into one block
Added MARIA_SMALL_BLOB_BUFFER
Added _ma_mark_file_crashed()
storage/myisam/mi_test2.c:
Fixed bug in update of rows with blobs (before blobs was always reset to empty on update)
storage/maria/ma_test_big.sh:
Testing of insert, update, delete, recovery and undo of rows with blobs
Thanks to the random-ness of ma_test2 this is likely to find most bugs in the row handling
2007-12-30 21:40:03 +01:00
|
|
|
(pages >= 0x3fff ? 0x3fff : (uint) pages),
|
|
|
|
block, 0);
|
2007-04-20 14:16:43 +02:00
|
|
|
if (!used)
|
|
|
|
{
|
|
|
|
if (move_to_next_bitmap(info, bitmap))
|
|
|
|
DBUG_RETURN(1);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
pages-= used;
|
|
|
|
info->bitmap_blocks.elements++;
|
|
|
|
block++;
|
|
|
|
}
|
|
|
|
} while (pages != 0);
|
2007-01-18 20:38:14 +01:00
|
|
|
}
|
|
|
|
if (rest_length && find_tail(info, rest_length,
|
|
|
|
info->bitmap_blocks.elements++))
|
|
|
|
DBUG_RETURN(1);
|
UNDO of rows now puts back all part of the row on their original pages and positions
Added variable _dbug_on_ to speed up execution when DBUG is not going to be used
Added --debug-on option to mysqld (to be able to turn of DBUG with --debug-on=0)
Fixed some bugs with 'non_flushable' marking of bitmap pages
Don't use 'non_flushable' marking of bitmap pages for not transactional tables
SHOW CREATE TABLE now shows if table was created with page checksums
Fixed a lot of bugs with BLOB handling in case of update/REDO and UNDO
More tests (especially for blobs) and DBUG_ASSERTS()
More readable output from maria_read_log and maria_chk
Fixed wrong shift that caused Maria to crash on files > 4G
Mark tables as crashed of REDO fails
dbug/dbug.c:
Changed to use my_bool (allowed me to remove some windows specific code)
Added variable _dbug_on_ to speed up execution when DBUG is not going to be used
Removed initialization of variables if not needed
include/my_dbug.h:
Use my_bool for some functions that was defined as BOOLEAN in dbug.c code
Added DBUGGER_ON/DEBUGGER_OFF to speed up execution when DBUG is not used
include/my_global.h:
Define my_bool early
Increase MY_HOW_OFTEN_TO_WRITE as computers are now faster than 10 years ago
mysql-test/mysql-test-run.pl:
Added debug-on=0 to speed up tests
mysql-test/r/maria-recovery.result:
Added new test by Guilhem to test if UNDO_ROW_DELETE preserves rowid
mysql-test/r/maria.result:
Added testing of page checksums
mysql-test/t/crash_commit_before-master.opt:
Added --debug-on as test require DBUG to work
mysql-test/t/maria-recovery-bitmap-master.opt:
Added --debug-on as test require DBUG to work
mysql-test/t/maria-recovery-master.opt:
Added --debug-on as test require DBUG to work
mysql-test/t/maria-recovery.test:
Added new test by Guilhem to test if UNDO_ROW_DELETE preserves rowid
mysql-test/t/maria.test:
Added testing of page checksums
sql/mysqld.cc:
Added --debug-on option (to be able to turn of DBUG with --debug-on=0)
Indentation fixes
Removed end spaces
sql/sql_show.cc:
Allow update_create_info() to inform MySQL if PACK_KEYS, NO_PACK_KEYS, CHECKSUM, PAGE_CHECKSUM or DELAY_KEY_WRITE is used
storage/maria/Makefile.am:
Added ma_test_big.sh
storage/maria/ha_maria.cc:
Store in create_info if page checksums are used (For SHOW CREATE TABLE)
storage/maria/ma_bitmap.c:
Added _ma_bitmap_wait_or_flush() to cause reader of bitmap pages to wait with reading until bitmap is flushed.
Use TAIL_PAGE_COUNT_MARKER for tail pages
Set 'sub_blocks' for and only for the head page or for the first extent of a blob. This is needed for store_extent_info() to be able to set START_EXTENT_BIT's
Don't allocate more than 0x3ffff pages in one extent (We need bit 0x4000 as a START_EXTENT_BIT)
Increase the calculated 'head_length' with the number of bytes used for extents.
Update row->space_on_head_page also in _ma_bitmap_find_new_place()
Make _ma_bitmap_get_page_bits() global. (Needed for UNDO handling)
Changed _ma_bitmap_flushable() to take MARIA_HA instead of MARIA_SHARE.
This was needed to be able to mark the handler if we had a 'non_flushable' call pending or not.
Don't use 'non_flushable' marking of bitmap pages for not transactional tables.
Added BLOCKUSED_USE_ORG_BITMAP handling also for tail pages.
Added more DBUG_ASSERT() to find possible errors in other code
Some code simplications by adding new local variables
storage/maria/ma_blockrec.c:
UNDO of rows now puts back all part of the row on their original pages and positions.
Changed UNDO of DELETE and UNDO of UPDATE to contain information about the original length of data on head block and also extent information
This changes a lot of logic as now an insert of a row on a page may happen to any position (and not just to the first or next free)
Use PAGE_COUNT to mark if an extent is the start of of a blob. (Needed for extent_to_bitmap_blocks())
Added check_directory() for checking that directroy entries are correct.
Added checking of row checksums when reading rows (with EXTRA_DEBUG)
Added make_space_for_directory() and extend_directory() for doing expansion of directory
Added get_rowpos_in_head_or_tail_page() to be able to store head/tail on original position in UNDO
Added extent_to_bitmap_blocks() to be able to generate original bitmap blocks from UNDO entry
Added _ma_update_at_original_place() for UNDO of DELETES
Added row->min_length to hold minmum required space needed on head page
Changed find_free_position() to use make_space_for_directory()
Changed make_empty_page() to allow optional creation of directory entry
Changed delete_head_or_tail() and _ma_apply_undo_row_isnert() to not copy pagecache block (speed optimization)
Changed _ma_apply_redo_insert_row_head_or_tail() to be able to insert new row at any position on 'new' page
Changed _ma_apply_undo_row_delete() and _ma_apply_undo_row_update() to put row in it's original position
Ensure allocation of tail blocks are of at least MIN_TAIL_SIZE.
Ensure we store pages in pinned pages even if read failed. (If not we will have pages pinned forever in page cache)
Write original extent information in UNDO entry, not compacted ones (we need position to tails!)
When setting BLOCKUSED_USED, don't clear other bits (we have to preserve BLOCKUSED_USE_ORG_BITMAP)
Fixed som bugs in directory handling
Fixed bug where we wrote wrong lsn to blob pages
Added separate blob_buffer for fixing bug when updating row that had char/varchar that spanned several pages and also had blobs
Ensure we call _ma_bitmap_flushable() also in case of errors
When doing an update, first delete old entries, then search in bitmap for where to put new information
Info->s -> share
Rowid -> rowid
More DBUG_ASSERT()
storage/maria/ma_blockrec.h:
Added START_EXTENT_BIT and TAIL_PAGE_COUNT_MARKER
Added _ma_bitmap_wait_or_flush() and _ma_bitmap_get_page_bits()
storage/maria/ma_check.c:
Don't write extra empty line if there is no deleted blocks
Ignore START_EXTENT_BIT's in page count
Call _ma_fast_unlock_key_del() to free key_del link
storage/maria/ma_close.c:
Ensure that used_key_del is 0. (If not, someone forgot to call _ma_unlock_key_del())
storage/maria/ma_create.c:
Changed constant to macro
storage/maria/ma_delete.c:
For deleted keys, log also position to row
storage/maria/ma_extra.c:
Release blob buffer at maria_reset() if bigger than MARIA_SMALL_BLOB_BUFFER
storage/maria/ma_key_recover.c:
Added bzero() of LSN that confused paged cache in case of uninitialized block
Mark file crashed if applying of index changes fails
Added calls to _ma_fast_unlock_key_del() for protection of shared key_del link.
storage/maria/ma_locking.c:
Added usage of MARIA_FILE_OPEN_COUNT_OFFSET
Added _ma_mark_file_crashed()
storage/maria/ma_loghandler.c:
Fixed bug where we logged uninitialized memory
storage/maria/ma_open.c:
Moved state->changed to be at start of state info on disk to allow one to easly mark files as crashed
storage/maria/ma_page.c:
Disable 'dummy' checksumming of pages as this gave false warnings.
(Need to investigate if this is ever needed)
storage/maria/ma_pagecache.c:
Fixed wrong shift that caused Maria to crash on files > 4G
storage/maria/ma_recovery.c:
In case of errors, start writing on new line if we where in %## %## printing mode (Made errors more readable)
Changed global variable name from warnings -> recovery_warnings
Use MARIA_FILE_CREATE_RENAME_LSN_OFFSET instead of constant
Removed special handling of row position for deleted keys. Keys now always includes row positions
_ma_apply_undo_row_delete() now gets page and row position
Added check that we don't loop forever when handling undo's (in case of bug in undo chain)
Print name of failed REDO/UNDO
storage/maria/ma_recovery.h:
Removed old comment
storage/maria/ma_static.c:
Chaned version number of Maria files to not accidently use old ones (becasue of change of ordering of status variables)
storage/maria/ma_test2.c:
Added option -u to specify number of rows to update
Changed old option -u to be -A, as for ma_test1
Fixed bug in update of rows with blobs (before blobs was always reset to empty on update)
First created blob is now of max blob length to ensure we have at least one big blob in the table
storage/maria/ma_test_all.sh:
More tests
storage/maria/ma_test_recovery.expected:
Updated results
storage/maria/ma_test_recovery:
Changed tests to use bigger blobs (not just 1K)
Added new tests that tests recovery of update with blobs
Removed comparision of .MAD file as it's not guranteed that recovery from scratch gives identical data file as original update
(compact_page() may be called at different times during normal execution and during REDO)
storage/maria/ma_update.c:
Simplify code (changed * to if)
storage/maria/maria_chk.c:
Make output more readable
storage/maria/maria_def.h:
Changed 'changed' to int to prepare for more bits
Added 2 more bytes to status information
Added 'st_mara_row->min_length' for storing min length needed on head page
Added 'st_mara_handler->blob_buff & blob_buff_size' for storing blobs
Moved all tunning parameters into one block
Added MARIA_SMALL_BLOB_BUFFER
Added _ma_mark_file_crashed()
storage/myisam/mi_test2.c:
Fixed bug in update of rows with blobs (before blobs was always reset to empty on update)
storage/maria/ma_test_big.sh:
Testing of insert, update, delete, recovery and undo of rows with blobs
Thanks to the random-ness of ma_test2 this is likely to find most bugs in the row handling
2007-12-30 21:40:03 +01:00
|
|
|
first_block= dynamic_element(&info->bitmap_blocks, first_block_pos,
|
|
|
|
MARIA_BITMAP_BLOCK*);
|
|
|
|
first_block->sub_blocks= info->bitmap_blocks.elements - first_block_pos;
|
2007-01-18 20:38:14 +01:00
|
|
|
DBUG_RETURN(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2007-04-19 12:18:56 +02:00
|
|
|
/*
|
|
|
|
Find pages to put ALL blobs
|
|
|
|
|
|
|
|
SYNOPSIS
|
|
|
|
allocate_blobs()
|
|
|
|
info Maria handler
|
|
|
|
row Information of what is in the row (from calc_record_size())
|
|
|
|
|
|
|
|
RETURN
|
|
|
|
0 ok
|
|
|
|
1 error
|
|
|
|
*/
|
|
|
|
|
2007-01-18 20:38:14 +01:00
|
|
|
static my_bool allocate_blobs(MARIA_HA *info, MARIA_ROW *row)
|
|
|
|
{
|
|
|
|
ulong *length, *end;
|
|
|
|
uint elements;
|
|
|
|
/*
|
|
|
|
Reserve size for:
|
|
|
|
head block
|
|
|
|
one extent
|
|
|
|
tail block
|
|
|
|
*/
|
|
|
|
elements= info->bitmap_blocks.elements;
|
|
|
|
for (length= row->blob_lengths, end= length + info->s->base.blobs;
|
|
|
|
length < end; length++)
|
|
|
|
{
|
|
|
|
if (*length && find_blob(info, *length))
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
row->extents_count= (info->bitmap_blocks.elements - elements);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2007-04-19 12:18:56 +02:00
|
|
|
/*
|
|
|
|
Store in the bitmap the new size for a head page
|
|
|
|
|
|
|
|
SYNOPSIS
|
|
|
|
use_head()
|
|
|
|
info Maria handler
|
|
|
|
page Page number to update
|
|
|
|
(Note that caller guarantees this is in the active
|
|
|
|
bitmap)
|
|
|
|
size How much free space is left on the page
|
|
|
|
block_position In which info->bitmap_block we have the
|
|
|
|
information about the head block.
|
|
|
|
|
|
|
|
NOTES
|
|
|
|
This is used on update where we are updating an existing head page
|
|
|
|
*/
|
|
|
|
|
2008-01-10 20:21:36 +01:00
|
|
|
static void use_head(MARIA_HA *info, pgcache_page_no_t page, uint size,
|
2007-01-18 20:38:14 +01:00
|
|
|
uint block_position)
|
|
|
|
{
|
|
|
|
MARIA_FILE_BITMAP *bitmap= &info->s->bitmap;
|
|
|
|
MARIA_BITMAP_BLOCK *block;
|
|
|
|
uchar *data;
|
|
|
|
uint offset, tmp, offset_page;
|
2010-11-03 13:14:02 +01:00
|
|
|
DBUG_ENTER("use_head");
|
|
|
|
|
2008-01-07 17:54:41 +01:00
|
|
|
DBUG_ASSERT(page % bitmap->pages_covered);
|
2007-01-18 20:38:14 +01:00
|
|
|
|
|
|
|
block= dynamic_element(&info->bitmap_blocks, block_position,
|
|
|
|
MARIA_BITMAP_BLOCK*);
|
|
|
|
block->page= page;
|
|
|
|
block->page_count= 1 + TAIL_BIT;
|
|
|
|
block->empty_space= size;
|
|
|
|
block->used= BLOCKUSED_TAIL;
|
|
|
|
|
|
|
|
/*
|
|
|
|
Mark place used by reading/writing 2 bytes at a time to handle
|
|
|
|
bitmaps in overlapping bytes
|
|
|
|
*/
|
|
|
|
offset_page= (uint) (page - bitmap->page - 1) * 3;
|
|
|
|
offset= offset_page & 7;
|
|
|
|
data= bitmap->map + offset_page / 8;
|
|
|
|
tmp= uint2korr(data);
|
|
|
|
block->org_bitmap_value= (tmp >> offset) & 7;
|
|
|
|
tmp= (tmp & ~(7 << offset)) | (FULL_HEAD_PAGE << offset);
|
|
|
|
int2store(data, tmp);
|
|
|
|
bitmap->changed= 1;
|
2007-10-19 23:24:22 +02:00
|
|
|
DBUG_EXECUTE("bitmap", _ma_print_bitmap_changes(bitmap););
|
2010-11-03 13:14:02 +01:00
|
|
|
DBUG_VOID_RETURN;
|
2007-01-18 20:38:14 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
2007-04-19 12:18:56 +02:00
|
|
|
Find out where to split the row (ie, what goes in head, middle, tail etc)
|
|
|
|
|
|
|
|
SYNOPSIS
|
|
|
|
find_where_to_split_row()
|
|
|
|
share Maria share
|
|
|
|
row Information of what is in the row (from calc_record_size())
|
2011-08-15 15:39:53 +02:00
|
|
|
extents Max number of extents we have to store in header
|
2007-04-19 12:18:56 +02:00
|
|
|
split_size Free size on the page (The head length must be less
|
|
|
|
than this)
|
|
|
|
|
|
|
|
RETURN
|
|
|
|
row_length for the head block.
|
2007-01-18 20:38:14 +01:00
|
|
|
*/
|
|
|
|
|
|
|
|
static uint find_where_to_split_row(MARIA_SHARE *share, MARIA_ROW *row,
|
2011-08-15 15:39:53 +02:00
|
|
|
uint extents, uint split_size)
|
2007-01-18 20:38:14 +01:00
|
|
|
{
|
|
|
|
uint *lengths, *lengths_end;
|
UNDO of rows now puts back all part of the row on their original pages and positions
Added variable _dbug_on_ to speed up execution when DBUG is not going to be used
Added --debug-on option to mysqld (to be able to turn of DBUG with --debug-on=0)
Fixed some bugs with 'non_flushable' marking of bitmap pages
Don't use 'non_flushable' marking of bitmap pages for not transactional tables
SHOW CREATE TABLE now shows if table was created with page checksums
Fixed a lot of bugs with BLOB handling in case of update/REDO and UNDO
More tests (especially for blobs) and DBUG_ASSERTS()
More readable output from maria_read_log and maria_chk
Fixed wrong shift that caused Maria to crash on files > 4G
Mark tables as crashed of REDO fails
dbug/dbug.c:
Changed to use my_bool (allowed me to remove some windows specific code)
Added variable _dbug_on_ to speed up execution when DBUG is not going to be used
Removed initialization of variables if not needed
include/my_dbug.h:
Use my_bool for some functions that was defined as BOOLEAN in dbug.c code
Added DBUGGER_ON/DEBUGGER_OFF to speed up execution when DBUG is not used
include/my_global.h:
Define my_bool early
Increase MY_HOW_OFTEN_TO_WRITE as computers are now faster than 10 years ago
mysql-test/mysql-test-run.pl:
Added debug-on=0 to speed up tests
mysql-test/r/maria-recovery.result:
Added new test by Guilhem to test if UNDO_ROW_DELETE preserves rowid
mysql-test/r/maria.result:
Added testing of page checksums
mysql-test/t/crash_commit_before-master.opt:
Added --debug-on as test require DBUG to work
mysql-test/t/maria-recovery-bitmap-master.opt:
Added --debug-on as test require DBUG to work
mysql-test/t/maria-recovery-master.opt:
Added --debug-on as test require DBUG to work
mysql-test/t/maria-recovery.test:
Added new test by Guilhem to test if UNDO_ROW_DELETE preserves rowid
mysql-test/t/maria.test:
Added testing of page checksums
sql/mysqld.cc:
Added --debug-on option (to be able to turn of DBUG with --debug-on=0)
Indentation fixes
Removed end spaces
sql/sql_show.cc:
Allow update_create_info() to inform MySQL if PACK_KEYS, NO_PACK_KEYS, CHECKSUM, PAGE_CHECKSUM or DELAY_KEY_WRITE is used
storage/maria/Makefile.am:
Added ma_test_big.sh
storage/maria/ha_maria.cc:
Store in create_info if page checksums are used (For SHOW CREATE TABLE)
storage/maria/ma_bitmap.c:
Added _ma_bitmap_wait_or_flush() to cause reader of bitmap pages to wait with reading until bitmap is flushed.
Use TAIL_PAGE_COUNT_MARKER for tail pages
Set 'sub_blocks' for and only for the head page or for the first extent of a blob. This is needed for store_extent_info() to be able to set START_EXTENT_BIT's
Don't allocate more than 0x3ffff pages in one extent (We need bit 0x4000 as a START_EXTENT_BIT)
Increase the calculated 'head_length' with the number of bytes used for extents.
Update row->space_on_head_page also in _ma_bitmap_find_new_place()
Make _ma_bitmap_get_page_bits() global. (Needed for UNDO handling)
Changed _ma_bitmap_flushable() to take MARIA_HA instead of MARIA_SHARE.
This was needed to be able to mark the handler if we had a 'non_flushable' call pending or not.
Don't use 'non_flushable' marking of bitmap pages for not transactional tables.
Added BLOCKUSED_USE_ORG_BITMAP handling also for tail pages.
Added more DBUG_ASSERT() to find possible errors in other code
Some code simplications by adding new local variables
storage/maria/ma_blockrec.c:
UNDO of rows now puts back all part of the row on their original pages and positions.
Changed UNDO of DELETE and UNDO of UPDATE to contain information about the original length of data on head block and also extent information
This changes a lot of logic as now an insert of a row on a page may happen to any position (and not just to the first or next free)
Use PAGE_COUNT to mark if an extent is the start of of a blob. (Needed for extent_to_bitmap_blocks())
Added check_directory() for checking that directroy entries are correct.
Added checking of row checksums when reading rows (with EXTRA_DEBUG)
Added make_space_for_directory() and extend_directory() for doing expansion of directory
Added get_rowpos_in_head_or_tail_page() to be able to store head/tail on original position in UNDO
Added extent_to_bitmap_blocks() to be able to generate original bitmap blocks from UNDO entry
Added _ma_update_at_original_place() for UNDO of DELETES
Added row->min_length to hold minmum required space needed on head page
Changed find_free_position() to use make_space_for_directory()
Changed make_empty_page() to allow optional creation of directory entry
Changed delete_head_or_tail() and _ma_apply_undo_row_isnert() to not copy pagecache block (speed optimization)
Changed _ma_apply_redo_insert_row_head_or_tail() to be able to insert new row at any position on 'new' page
Changed _ma_apply_undo_row_delete() and _ma_apply_undo_row_update() to put row in it's original position
Ensure allocation of tail blocks are of at least MIN_TAIL_SIZE.
Ensure we store pages in pinned pages even if read failed. (If not we will have pages pinned forever in page cache)
Write original extent information in UNDO entry, not compacted ones (we need position to tails!)
When setting BLOCKUSED_USED, don't clear other bits (we have to preserve BLOCKUSED_USE_ORG_BITMAP)
Fixed som bugs in directory handling
Fixed bug where we wrote wrong lsn to blob pages
Added separate blob_buffer for fixing bug when updating row that had char/varchar that spanned several pages and also had blobs
Ensure we call _ma_bitmap_flushable() also in case of errors
When doing an update, first delete old entries, then search in bitmap for where to put new information
Info->s -> share
Rowid -> rowid
More DBUG_ASSERT()
storage/maria/ma_blockrec.h:
Added START_EXTENT_BIT and TAIL_PAGE_COUNT_MARKER
Added _ma_bitmap_wait_or_flush() and _ma_bitmap_get_page_bits()
storage/maria/ma_check.c:
Don't write extra empty line if there is no deleted blocks
Ignore START_EXTENT_BIT's in page count
Call _ma_fast_unlock_key_del() to free key_del link
storage/maria/ma_close.c:
Ensure that used_key_del is 0. (If not, someone forgot to call _ma_unlock_key_del())
storage/maria/ma_create.c:
Changed constant to macro
storage/maria/ma_delete.c:
For deleted keys, log also position to row
storage/maria/ma_extra.c:
Release blob buffer at maria_reset() if bigger than MARIA_SMALL_BLOB_BUFFER
storage/maria/ma_key_recover.c:
Added bzero() of LSN that confused paged cache in case of uninitialized block
Mark file crashed if applying of index changes fails
Added calls to _ma_fast_unlock_key_del() for protection of shared key_del link.
storage/maria/ma_locking.c:
Added usage of MARIA_FILE_OPEN_COUNT_OFFSET
Added _ma_mark_file_crashed()
storage/maria/ma_loghandler.c:
Fixed bug where we logged uninitialized memory
storage/maria/ma_open.c:
Moved state->changed to be at start of state info on disk to allow one to easly mark files as crashed
storage/maria/ma_page.c:
Disable 'dummy' checksumming of pages as this gave false warnings.
(Need to investigate if this is ever needed)
storage/maria/ma_pagecache.c:
Fixed wrong shift that caused Maria to crash on files > 4G
storage/maria/ma_recovery.c:
In case of errors, start writing on new line if we where in %## %## printing mode (Made errors more readable)
Changed global variable name from warnings -> recovery_warnings
Use MARIA_FILE_CREATE_RENAME_LSN_OFFSET instead of constant
Removed special handling of row position for deleted keys. Keys now always includes row positions
_ma_apply_undo_row_delete() now gets page and row position
Added check that we don't loop forever when handling undo's (in case of bug in undo chain)
Print name of failed REDO/UNDO
storage/maria/ma_recovery.h:
Removed old comment
storage/maria/ma_static.c:
Chaned version number of Maria files to not accidently use old ones (becasue of change of ordering of status variables)
storage/maria/ma_test2.c:
Added option -u to specify number of rows to update
Changed old option -u to be -A, as for ma_test1
Fixed bug in update of rows with blobs (before blobs was always reset to empty on update)
First created blob is now of max blob length to ensure we have at least one big blob in the table
storage/maria/ma_test_all.sh:
More tests
storage/maria/ma_test_recovery.expected:
Updated results
storage/maria/ma_test_recovery:
Changed tests to use bigger blobs (not just 1K)
Added new tests that tests recovery of update with blobs
Removed comparision of .MAD file as it's not guranteed that recovery from scratch gives identical data file as original update
(compact_page() may be called at different times during normal execution and during REDO)
storage/maria/ma_update.c:
Simplify code (changed * to if)
storage/maria/maria_chk.c:
Make output more readable
storage/maria/maria_def.h:
Changed 'changed' to int to prepare for more bits
Added 2 more bytes to status information
Added 'st_mara_row->min_length' for storing min length needed on head page
Added 'st_mara_handler->blob_buff & blob_buff_size' for storing blobs
Moved all tunning parameters into one block
Added MARIA_SMALL_BLOB_BUFFER
Added _ma_mark_file_crashed()
storage/myisam/mi_test2.c:
Fixed bug in update of rows with blobs (before blobs was always reset to empty on update)
storage/maria/ma_test_big.sh:
Testing of insert, update, delete, recovery and undo of rows with blobs
Thanks to the random-ness of ma_test2 this is likely to find most bugs in the row handling
2007-12-30 21:40:03 +01:00
|
|
|
/*
|
|
|
|
Ensure we have the minimum required space on head page:
|
|
|
|
- Header + length of field lengths (row->min_length)
|
|
|
|
- Number of extents
|
|
|
|
- One extent
|
|
|
|
*/
|
|
|
|
uint row_length= (row->min_length +
|
2011-08-15 15:39:53 +02:00
|
|
|
size_to_store_key_length(extents) +
|
UNDO of rows now puts back all part of the row on their original pages and positions
Added variable _dbug_on_ to speed up execution when DBUG is not going to be used
Added --debug-on option to mysqld (to be able to turn of DBUG with --debug-on=0)
Fixed some bugs with 'non_flushable' marking of bitmap pages
Don't use 'non_flushable' marking of bitmap pages for not transactional tables
SHOW CREATE TABLE now shows if table was created with page checksums
Fixed a lot of bugs with BLOB handling in case of update/REDO and UNDO
More tests (especially for blobs) and DBUG_ASSERTS()
More readable output from maria_read_log and maria_chk
Fixed wrong shift that caused Maria to crash on files > 4G
Mark tables as crashed of REDO fails
dbug/dbug.c:
Changed to use my_bool (allowed me to remove some windows specific code)
Added variable _dbug_on_ to speed up execution when DBUG is not going to be used
Removed initialization of variables if not needed
include/my_dbug.h:
Use my_bool for some functions that was defined as BOOLEAN in dbug.c code
Added DBUGGER_ON/DEBUGGER_OFF to speed up execution when DBUG is not used
include/my_global.h:
Define my_bool early
Increase MY_HOW_OFTEN_TO_WRITE as computers are now faster than 10 years ago
mysql-test/mysql-test-run.pl:
Added debug-on=0 to speed up tests
mysql-test/r/maria-recovery.result:
Added new test by Guilhem to test if UNDO_ROW_DELETE preserves rowid
mysql-test/r/maria.result:
Added testing of page checksums
mysql-test/t/crash_commit_before-master.opt:
Added --debug-on as test require DBUG to work
mysql-test/t/maria-recovery-bitmap-master.opt:
Added --debug-on as test require DBUG to work
mysql-test/t/maria-recovery-master.opt:
Added --debug-on as test require DBUG to work
mysql-test/t/maria-recovery.test:
Added new test by Guilhem to test if UNDO_ROW_DELETE preserves rowid
mysql-test/t/maria.test:
Added testing of page checksums
sql/mysqld.cc:
Added --debug-on option (to be able to turn of DBUG with --debug-on=0)
Indentation fixes
Removed end spaces
sql/sql_show.cc:
Allow update_create_info() to inform MySQL if PACK_KEYS, NO_PACK_KEYS, CHECKSUM, PAGE_CHECKSUM or DELAY_KEY_WRITE is used
storage/maria/Makefile.am:
Added ma_test_big.sh
storage/maria/ha_maria.cc:
Store in create_info if page checksums are used (For SHOW CREATE TABLE)
storage/maria/ma_bitmap.c:
Added _ma_bitmap_wait_or_flush() to cause reader of bitmap pages to wait with reading until bitmap is flushed.
Use TAIL_PAGE_COUNT_MARKER for tail pages
Set 'sub_blocks' for and only for the head page or for the first extent of a blob. This is needed for store_extent_info() to be able to set START_EXTENT_BIT's
Don't allocate more than 0x3ffff pages in one extent (We need bit 0x4000 as a START_EXTENT_BIT)
Increase the calculated 'head_length' with the number of bytes used for extents.
Update row->space_on_head_page also in _ma_bitmap_find_new_place()
Make _ma_bitmap_get_page_bits() global. (Needed for UNDO handling)
Changed _ma_bitmap_flushable() to take MARIA_HA instead of MARIA_SHARE.
This was needed to be able to mark the handler if we had a 'non_flushable' call pending or not.
Don't use 'non_flushable' marking of bitmap pages for not transactional tables.
Added BLOCKUSED_USE_ORG_BITMAP handling also for tail pages.
Added more DBUG_ASSERT() to find possible errors in other code
Some code simplications by adding new local variables
storage/maria/ma_blockrec.c:
UNDO of rows now puts back all part of the row on their original pages and positions.
Changed UNDO of DELETE and UNDO of UPDATE to contain information about the original length of data on head block and also extent information
This changes a lot of logic as now an insert of a row on a page may happen to any position (and not just to the first or next free)
Use PAGE_COUNT to mark if an extent is the start of of a blob. (Needed for extent_to_bitmap_blocks())
Added check_directory() for checking that directroy entries are correct.
Added checking of row checksums when reading rows (with EXTRA_DEBUG)
Added make_space_for_directory() and extend_directory() for doing expansion of directory
Added get_rowpos_in_head_or_tail_page() to be able to store head/tail on original position in UNDO
Added extent_to_bitmap_blocks() to be able to generate original bitmap blocks from UNDO entry
Added _ma_update_at_original_place() for UNDO of DELETES
Added row->min_length to hold minmum required space needed on head page
Changed find_free_position() to use make_space_for_directory()
Changed make_empty_page() to allow optional creation of directory entry
Changed delete_head_or_tail() and _ma_apply_undo_row_isnert() to not copy pagecache block (speed optimization)
Changed _ma_apply_redo_insert_row_head_or_tail() to be able to insert new row at any position on 'new' page
Changed _ma_apply_undo_row_delete() and _ma_apply_undo_row_update() to put row in it's original position
Ensure allocation of tail blocks are of at least MIN_TAIL_SIZE.
Ensure we store pages in pinned pages even if read failed. (If not we will have pages pinned forever in page cache)
Write original extent information in UNDO entry, not compacted ones (we need position to tails!)
When setting BLOCKUSED_USED, don't clear other bits (we have to preserve BLOCKUSED_USE_ORG_BITMAP)
Fixed som bugs in directory handling
Fixed bug where we wrote wrong lsn to blob pages
Added separate blob_buffer for fixing bug when updating row that had char/varchar that spanned several pages and also had blobs
Ensure we call _ma_bitmap_flushable() also in case of errors
When doing an update, first delete old entries, then search in bitmap for where to put new information
Info->s -> share
Rowid -> rowid
More DBUG_ASSERT()
storage/maria/ma_blockrec.h:
Added START_EXTENT_BIT and TAIL_PAGE_COUNT_MARKER
Added _ma_bitmap_wait_or_flush() and _ma_bitmap_get_page_bits()
storage/maria/ma_check.c:
Don't write extra empty line if there is no deleted blocks
Ignore START_EXTENT_BIT's in page count
Call _ma_fast_unlock_key_del() to free key_del link
storage/maria/ma_close.c:
Ensure that used_key_del is 0. (If not, someone forgot to call _ma_unlock_key_del())
storage/maria/ma_create.c:
Changed constant to macro
storage/maria/ma_delete.c:
For deleted keys, log also position to row
storage/maria/ma_extra.c:
Release blob buffer at maria_reset() if bigger than MARIA_SMALL_BLOB_BUFFER
storage/maria/ma_key_recover.c:
Added bzero() of LSN that confused paged cache in case of uninitialized block
Mark file crashed if applying of index changes fails
Added calls to _ma_fast_unlock_key_del() for protection of shared key_del link.
storage/maria/ma_locking.c:
Added usage of MARIA_FILE_OPEN_COUNT_OFFSET
Added _ma_mark_file_crashed()
storage/maria/ma_loghandler.c:
Fixed bug where we logged uninitialized memory
storage/maria/ma_open.c:
Moved state->changed to be at start of state info on disk to allow one to easly mark files as crashed
storage/maria/ma_page.c:
Disable 'dummy' checksumming of pages as this gave false warnings.
(Need to investigate if this is ever needed)
storage/maria/ma_pagecache.c:
Fixed wrong shift that caused Maria to crash on files > 4G
storage/maria/ma_recovery.c:
In case of errors, start writing on new line if we where in %## %## printing mode (Made errors more readable)
Changed global variable name from warnings -> recovery_warnings
Use MARIA_FILE_CREATE_RENAME_LSN_OFFSET instead of constant
Removed special handling of row position for deleted keys. Keys now always includes row positions
_ma_apply_undo_row_delete() now gets page and row position
Added check that we don't loop forever when handling undo's (in case of bug in undo chain)
Print name of failed REDO/UNDO
storage/maria/ma_recovery.h:
Removed old comment
storage/maria/ma_static.c:
Chaned version number of Maria files to not accidently use old ones (becasue of change of ordering of status variables)
storage/maria/ma_test2.c:
Added option -u to specify number of rows to update
Changed old option -u to be -A, as for ma_test1
Fixed bug in update of rows with blobs (before blobs was always reset to empty on update)
First created blob is now of max blob length to ensure we have at least one big blob in the table
storage/maria/ma_test_all.sh:
More tests
storage/maria/ma_test_recovery.expected:
Updated results
storage/maria/ma_test_recovery:
Changed tests to use bigger blobs (not just 1K)
Added new tests that tests recovery of update with blobs
Removed comparision of .MAD file as it's not guranteed that recovery from scratch gives identical data file as original update
(compact_page() may be called at different times during normal execution and during REDO)
storage/maria/ma_update.c:
Simplify code (changed * to if)
storage/maria/maria_chk.c:
Make output more readable
storage/maria/maria_def.h:
Changed 'changed' to int to prepare for more bits
Added 2 more bytes to status information
Added 'st_mara_row->min_length' for storing min length needed on head page
Added 'st_mara_handler->blob_buff & blob_buff_size' for storing blobs
Moved all tunning parameters into one block
Added MARIA_SMALL_BLOB_BUFFER
Added _ma_mark_file_crashed()
storage/myisam/mi_test2.c:
Fixed bug in update of rows with blobs (before blobs was always reset to empty on update)
storage/maria/ma_test_big.sh:
Testing of insert, update, delete, recovery and undo of rows with blobs
Thanks to the random-ness of ma_test2 this is likely to find most bugs in the row handling
2007-12-30 21:40:03 +01:00
|
|
|
ROW_EXTENT_SIZE);
|
2011-08-15 15:39:53 +02:00
|
|
|
DBUG_ASSERT(row_length <= split_size);
|
|
|
|
|
2007-01-18 20:38:14 +01:00
|
|
|
/*
|
|
|
|
Store first in all_field_lengths the different parts that are written
|
|
|
|
to the row. This needs to be in same order as in
|
|
|
|
ma_block_rec.c::write_block_record()
|
|
|
|
*/
|
2011-08-15 15:39:53 +02:00
|
|
|
row->null_field_lengths[-3]= extents * ROW_EXTENT_SIZE;
|
2007-01-18 20:38:14 +01:00
|
|
|
row->null_field_lengths[-2]= share->base.fixed_not_null_fields_length;
|
|
|
|
row->null_field_lengths[-1]= row->field_lengths_length;
|
|
|
|
for (lengths= row->null_field_lengths - EXTRA_LENGTH_FIELDS,
|
2011-08-15 15:39:53 +02:00
|
|
|
lengths_end= (lengths + share->base.fields - share->base.blobs +
|
2007-01-18 20:38:14 +01:00
|
|
|
EXTRA_LENGTH_FIELDS); lengths < lengths_end; lengths++)
|
|
|
|
{
|
|
|
|
if (row_length + *lengths > split_size)
|
|
|
|
break;
|
|
|
|
row_length+= *lengths;
|
|
|
|
}
|
|
|
|
return row_length;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2007-04-19 12:18:56 +02:00
|
|
|
/*
|
|
|
|
Find where to write the middle parts of the row and the tail
|
|
|
|
|
|
|
|
SYNOPSIS
|
|
|
|
write_rest_of_head()
|
|
|
|
info Maria handler
|
|
|
|
position Position in bitmap_blocks. Is 0 for rows that needs
|
|
|
|
full blocks (ie, has a head, middle part and optional tail)
|
|
|
|
rest_length How much left of the head block to write.
|
|
|
|
|
|
|
|
RETURN
|
|
|
|
0 ok
|
|
|
|
1 error
|
|
|
|
*/
|
|
|
|
|
2007-01-18 20:38:14 +01:00
|
|
|
static my_bool write_rest_of_head(MARIA_HA *info, uint position,
|
|
|
|
ulong rest_length)
|
|
|
|
{
|
|
|
|
MARIA_SHARE *share= info->s;
|
|
|
|
uint full_page_size= FULL_PAGE_SIZE(share->block_size);
|
|
|
|
MARIA_BITMAP_BLOCK *block;
|
2007-04-05 13:38:05 +02:00
|
|
|
DBUG_ENTER("write_rest_of_head");
|
|
|
|
DBUG_PRINT("enter", ("position: %u rest_length: %lu", position,
|
|
|
|
rest_length));
|
2007-01-18 20:38:14 +01:00
|
|
|
|
|
|
|
if (position == 0)
|
|
|
|
{
|
|
|
|
/* Write out full pages */
|
|
|
|
uint pages= rest_length / full_page_size;
|
|
|
|
|
|
|
|
rest_length%= full_page_size;
|
|
|
|
if (rest_length >= MAX_TAIL_SIZE(share->block_size))
|
|
|
|
{
|
|
|
|
/* Put tail on a full page */
|
|
|
|
pages++;
|
|
|
|
rest_length= 0;
|
|
|
|
}
|
2007-04-05 13:38:05 +02:00
|
|
|
if (find_mid(info, pages, 1))
|
|
|
|
DBUG_RETURN(1);
|
2007-01-18 20:38:14 +01:00
|
|
|
/*
|
|
|
|
Insert empty block after full pages, to allow write_block_record() to
|
|
|
|
split segment into used + free page
|
|
|
|
*/
|
|
|
|
block= dynamic_element(&info->bitmap_blocks, 2, MARIA_BITMAP_BLOCK*);
|
|
|
|
block->page_count= 0;
|
|
|
|
block->used= 0;
|
|
|
|
}
|
|
|
|
if (rest_length)
|
|
|
|
{
|
|
|
|
if (find_tail(info, rest_length, ELEMENTS_RESERVED_FOR_MAIN_PART - 1))
|
2007-04-05 13:38:05 +02:00
|
|
|
DBUG_RETURN(1);
|
2007-01-18 20:38:14 +01:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/* Empty tail block */
|
|
|
|
block= dynamic_element(&info->bitmap_blocks,
|
|
|
|
ELEMENTS_RESERVED_FOR_MAIN_PART - 1,
|
|
|
|
MARIA_BITMAP_BLOCK *);
|
|
|
|
block->page_count= 0;
|
|
|
|
block->used= 0;
|
|
|
|
}
|
2007-04-05 13:38:05 +02:00
|
|
|
DBUG_RETURN(0);
|
2007-01-18 20:38:14 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
Find where to store one row
|
|
|
|
|
|
|
|
SYNPOSIS
|
|
|
|
_ma_bitmap_find_place()
|
|
|
|
info Maria handler
|
|
|
|
row Information about row to write
|
|
|
|
blocks Store data about allocated places here
|
|
|
|
|
|
|
|
RETURN
|
|
|
|
0 ok
|
2007-09-05 01:57:53 +02:00
|
|
|
row->space_on_head_page contains minimum number of bytes we
|
|
|
|
expect to put on the head page.
|
2007-01-18 20:38:14 +01:00
|
|
|
1 error
|
2007-10-09 20:09:50 +02:00
|
|
|
my_errno is set to error
|
2007-01-18 20:38:14 +01:00
|
|
|
*/
|
|
|
|
|
|
|
|
my_bool _ma_bitmap_find_place(MARIA_HA *info, MARIA_ROW *row,
|
|
|
|
MARIA_BITMAP_BLOCKS *blocks)
|
|
|
|
{
|
|
|
|
MARIA_SHARE *share= info->s;
|
|
|
|
my_bool res= 1;
|
|
|
|
uint full_page_size, position, max_page_size;
|
|
|
|
uint head_length, row_length, rest_length, extents_length;
|
|
|
|
DBUG_ENTER("_ma_bitmap_find_place");
|
|
|
|
|
|
|
|
blocks->count= 0;
|
|
|
|
blocks->tail_page_skipped= blocks->page_skipped= 0;
|
|
|
|
row->extents_count= 0;
|
2007-04-20 14:16:43 +02:00
|
|
|
|
2007-01-18 20:38:14 +01:00
|
|
|
/*
|
2007-04-20 14:16:43 +02:00
|
|
|
Reserve place for the following blocks:
|
2007-01-18 20:38:14 +01:00
|
|
|
- Head block
|
|
|
|
- Full page block
|
|
|
|
- Marker block to allow write_block_record() to split full page blocks
|
|
|
|
into full and free part
|
|
|
|
- Tail block
|
|
|
|
*/
|
|
|
|
|
|
|
|
info->bitmap_blocks.elements= ELEMENTS_RESERVED_FOR_MAIN_PART;
|
|
|
|
max_page_size= (share->block_size - PAGE_OVERHEAD_SIZE);
|
|
|
|
|
2011-07-13 21:10:18 +02:00
|
|
|
mysql_mutex_lock(&share->bitmap.bitmap_lock);
|
2007-01-18 20:38:14 +01:00
|
|
|
|
|
|
|
if (row->total_length <= max_page_size)
|
|
|
|
{
|
|
|
|
/* Row fits in one page */
|
|
|
|
position= ELEMENTS_RESERVED_FOR_MAIN_PART - 1;
|
|
|
|
if (find_head(info, (uint) row->total_length, position))
|
|
|
|
goto abort;
|
2007-09-05 01:57:53 +02:00
|
|
|
row->space_on_head_page= row->total_length;
|
2007-01-18 20:38:14 +01:00
|
|
|
goto end;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
UNDO of rows now puts back all part of the row on their original pages and positions
Added variable _dbug_on_ to speed up execution when DBUG is not going to be used
Added --debug-on option to mysqld (to be able to turn of DBUG with --debug-on=0)
Fixed some bugs with 'non_flushable' marking of bitmap pages
Don't use 'non_flushable' marking of bitmap pages for not transactional tables
SHOW CREATE TABLE now shows if table was created with page checksums
Fixed a lot of bugs with BLOB handling in case of update/REDO and UNDO
More tests (especially for blobs) and DBUG_ASSERTS()
More readable output from maria_read_log and maria_chk
Fixed wrong shift that caused Maria to crash on files > 4G
Mark tables as crashed of REDO fails
dbug/dbug.c:
Changed to use my_bool (allowed me to remove some windows specific code)
Added variable _dbug_on_ to speed up execution when DBUG is not going to be used
Removed initialization of variables if not needed
include/my_dbug.h:
Use my_bool for some functions that was defined as BOOLEAN in dbug.c code
Added DBUGGER_ON/DEBUGGER_OFF to speed up execution when DBUG is not used
include/my_global.h:
Define my_bool early
Increase MY_HOW_OFTEN_TO_WRITE as computers are now faster than 10 years ago
mysql-test/mysql-test-run.pl:
Added debug-on=0 to speed up tests
mysql-test/r/maria-recovery.result:
Added new test by Guilhem to test if UNDO_ROW_DELETE preserves rowid
mysql-test/r/maria.result:
Added testing of page checksums
mysql-test/t/crash_commit_before-master.opt:
Added --debug-on as test require DBUG to work
mysql-test/t/maria-recovery-bitmap-master.opt:
Added --debug-on as test require DBUG to work
mysql-test/t/maria-recovery-master.opt:
Added --debug-on as test require DBUG to work
mysql-test/t/maria-recovery.test:
Added new test by Guilhem to test if UNDO_ROW_DELETE preserves rowid
mysql-test/t/maria.test:
Added testing of page checksums
sql/mysqld.cc:
Added --debug-on option (to be able to turn of DBUG with --debug-on=0)
Indentation fixes
Removed end spaces
sql/sql_show.cc:
Allow update_create_info() to inform MySQL if PACK_KEYS, NO_PACK_KEYS, CHECKSUM, PAGE_CHECKSUM or DELAY_KEY_WRITE is used
storage/maria/Makefile.am:
Added ma_test_big.sh
storage/maria/ha_maria.cc:
Store in create_info if page checksums are used (For SHOW CREATE TABLE)
storage/maria/ma_bitmap.c:
Added _ma_bitmap_wait_or_flush() to cause reader of bitmap pages to wait with reading until bitmap is flushed.
Use TAIL_PAGE_COUNT_MARKER for tail pages
Set 'sub_blocks' for and only for the head page or for the first extent of a blob. This is needed for store_extent_info() to be able to set START_EXTENT_BIT's
Don't allocate more than 0x3ffff pages in one extent (We need bit 0x4000 as a START_EXTENT_BIT)
Increase the calculated 'head_length' with the number of bytes used for extents.
Update row->space_on_head_page also in _ma_bitmap_find_new_place()
Make _ma_bitmap_get_page_bits() global. (Needed for UNDO handling)
Changed _ma_bitmap_flushable() to take MARIA_HA instead of MARIA_SHARE.
This was needed to be able to mark the handler if we had a 'non_flushable' call pending or not.
Don't use 'non_flushable' marking of bitmap pages for not transactional tables.
Added BLOCKUSED_USE_ORG_BITMAP handling also for tail pages.
Added more DBUG_ASSERT() to find possible errors in other code
Some code simplications by adding new local variables
storage/maria/ma_blockrec.c:
UNDO of rows now puts back all part of the row on their original pages and positions.
Changed UNDO of DELETE and UNDO of UPDATE to contain information about the original length of data on head block and also extent information
This changes a lot of logic as now an insert of a row on a page may happen to any position (and not just to the first or next free)
Use PAGE_COUNT to mark if an extent is the start of of a blob. (Needed for extent_to_bitmap_blocks())
Added check_directory() for checking that directroy entries are correct.
Added checking of row checksums when reading rows (with EXTRA_DEBUG)
Added make_space_for_directory() and extend_directory() for doing expansion of directory
Added get_rowpos_in_head_or_tail_page() to be able to store head/tail on original position in UNDO
Added extent_to_bitmap_blocks() to be able to generate original bitmap blocks from UNDO entry
Added _ma_update_at_original_place() for UNDO of DELETES
Added row->min_length to hold minmum required space needed on head page
Changed find_free_position() to use make_space_for_directory()
Changed make_empty_page() to allow optional creation of directory entry
Changed delete_head_or_tail() and _ma_apply_undo_row_isnert() to not copy pagecache block (speed optimization)
Changed _ma_apply_redo_insert_row_head_or_tail() to be able to insert new row at any position on 'new' page
Changed _ma_apply_undo_row_delete() and _ma_apply_undo_row_update() to put row in it's original position
Ensure allocation of tail blocks are of at least MIN_TAIL_SIZE.
Ensure we store pages in pinned pages even if read failed. (If not we will have pages pinned forever in page cache)
Write original extent information in UNDO entry, not compacted ones (we need position to tails!)
When setting BLOCKUSED_USED, don't clear other bits (we have to preserve BLOCKUSED_USE_ORG_BITMAP)
Fixed som bugs in directory handling
Fixed bug where we wrote wrong lsn to blob pages
Added separate blob_buffer for fixing bug when updating row that had char/varchar that spanned several pages and also had blobs
Ensure we call _ma_bitmap_flushable() also in case of errors
When doing an update, first delete old entries, then search in bitmap for where to put new information
Info->s -> share
Rowid -> rowid
More DBUG_ASSERT()
storage/maria/ma_blockrec.h:
Added START_EXTENT_BIT and TAIL_PAGE_COUNT_MARKER
Added _ma_bitmap_wait_or_flush() and _ma_bitmap_get_page_bits()
storage/maria/ma_check.c:
Don't write extra empty line if there is no deleted blocks
Ignore START_EXTENT_BIT's in page count
Call _ma_fast_unlock_key_del() to free key_del link
storage/maria/ma_close.c:
Ensure that used_key_del is 0. (If not, someone forgot to call _ma_unlock_key_del())
storage/maria/ma_create.c:
Changed constant to macro
storage/maria/ma_delete.c:
For deleted keys, log also position to row
storage/maria/ma_extra.c:
Release blob buffer at maria_reset() if bigger than MARIA_SMALL_BLOB_BUFFER
storage/maria/ma_key_recover.c:
Added bzero() of LSN that confused paged cache in case of uninitialized block
Mark file crashed if applying of index changes fails
Added calls to _ma_fast_unlock_key_del() for protection of shared key_del link.
storage/maria/ma_locking.c:
Added usage of MARIA_FILE_OPEN_COUNT_OFFSET
Added _ma_mark_file_crashed()
storage/maria/ma_loghandler.c:
Fixed bug where we logged uninitialized memory
storage/maria/ma_open.c:
Moved state->changed to be at start of state info on disk to allow one to easly mark files as crashed
storage/maria/ma_page.c:
Disable 'dummy' checksumming of pages as this gave false warnings.
(Need to investigate if this is ever needed)
storage/maria/ma_pagecache.c:
Fixed wrong shift that caused Maria to crash on files > 4G
storage/maria/ma_recovery.c:
In case of errors, start writing on new line if we where in %## %## printing mode (Made errors more readable)
Changed global variable name from warnings -> recovery_warnings
Use MARIA_FILE_CREATE_RENAME_LSN_OFFSET instead of constant
Removed special handling of row position for deleted keys. Keys now always includes row positions
_ma_apply_undo_row_delete() now gets page and row position
Added check that we don't loop forever when handling undo's (in case of bug in undo chain)
Print name of failed REDO/UNDO
storage/maria/ma_recovery.h:
Removed old comment
storage/maria/ma_static.c:
Chaned version number of Maria files to not accidently use old ones (becasue of change of ordering of status variables)
storage/maria/ma_test2.c:
Added option -u to specify number of rows to update
Changed old option -u to be -A, as for ma_test1
Fixed bug in update of rows with blobs (before blobs was always reset to empty on update)
First created blob is now of max blob length to ensure we have at least one big blob in the table
storage/maria/ma_test_all.sh:
More tests
storage/maria/ma_test_recovery.expected:
Updated results
storage/maria/ma_test_recovery:
Changed tests to use bigger blobs (not just 1K)
Added new tests that tests recovery of update with blobs
Removed comparision of .MAD file as it's not guranteed that recovery from scratch gives identical data file as original update
(compact_page() may be called at different times during normal execution and during REDO)
storage/maria/ma_update.c:
Simplify code (changed * to if)
storage/maria/maria_chk.c:
Make output more readable
storage/maria/maria_def.h:
Changed 'changed' to int to prepare for more bits
Added 2 more bytes to status information
Added 'st_mara_row->min_length' for storing min length needed on head page
Added 'st_mara_handler->blob_buff & blob_buff_size' for storing blobs
Moved all tunning parameters into one block
Added MARIA_SMALL_BLOB_BUFFER
Added _ma_mark_file_crashed()
storage/myisam/mi_test2.c:
Fixed bug in update of rows with blobs (before blobs was always reset to empty on update)
storage/maria/ma_test_big.sh:
Testing of insert, update, delete, recovery and undo of rows with blobs
Thanks to the random-ness of ma_test2 this is likely to find most bugs in the row handling
2007-12-30 21:40:03 +01:00
|
|
|
First allocate all blobs so that we can find out the needed size for
|
2007-01-18 20:38:14 +01:00
|
|
|
the main block.
|
|
|
|
*/
|
|
|
|
if (row->blob_length && allocate_blobs(info, row))
|
|
|
|
goto abort;
|
|
|
|
|
|
|
|
extents_length= row->extents_count * ROW_EXTENT_SIZE;
|
UNDO of rows now puts back all part of the row on their original pages and positions
Added variable _dbug_on_ to speed up execution when DBUG is not going to be used
Added --debug-on option to mysqld (to be able to turn of DBUG with --debug-on=0)
Fixed some bugs with 'non_flushable' marking of bitmap pages
Don't use 'non_flushable' marking of bitmap pages for not transactional tables
SHOW CREATE TABLE now shows if table was created with page checksums
Fixed a lot of bugs with BLOB handling in case of update/REDO and UNDO
More tests (especially for blobs) and DBUG_ASSERTS()
More readable output from maria_read_log and maria_chk
Fixed wrong shift that caused Maria to crash on files > 4G
Mark tables as crashed of REDO fails
dbug/dbug.c:
Changed to use my_bool (allowed me to remove some windows specific code)
Added variable _dbug_on_ to speed up execution when DBUG is not going to be used
Removed initialization of variables if not needed
include/my_dbug.h:
Use my_bool for some functions that was defined as BOOLEAN in dbug.c code
Added DBUGGER_ON/DEBUGGER_OFF to speed up execution when DBUG is not used
include/my_global.h:
Define my_bool early
Increase MY_HOW_OFTEN_TO_WRITE as computers are now faster than 10 years ago
mysql-test/mysql-test-run.pl:
Added debug-on=0 to speed up tests
mysql-test/r/maria-recovery.result:
Added new test by Guilhem to test if UNDO_ROW_DELETE preserves rowid
mysql-test/r/maria.result:
Added testing of page checksums
mysql-test/t/crash_commit_before-master.opt:
Added --debug-on as test require DBUG to work
mysql-test/t/maria-recovery-bitmap-master.opt:
Added --debug-on as test require DBUG to work
mysql-test/t/maria-recovery-master.opt:
Added --debug-on as test require DBUG to work
mysql-test/t/maria-recovery.test:
Added new test by Guilhem to test if UNDO_ROW_DELETE preserves rowid
mysql-test/t/maria.test:
Added testing of page checksums
sql/mysqld.cc:
Added --debug-on option (to be able to turn of DBUG with --debug-on=0)
Indentation fixes
Removed end spaces
sql/sql_show.cc:
Allow update_create_info() to inform MySQL if PACK_KEYS, NO_PACK_KEYS, CHECKSUM, PAGE_CHECKSUM or DELAY_KEY_WRITE is used
storage/maria/Makefile.am:
Added ma_test_big.sh
storage/maria/ha_maria.cc:
Store in create_info if page checksums are used (For SHOW CREATE TABLE)
storage/maria/ma_bitmap.c:
Added _ma_bitmap_wait_or_flush() to cause reader of bitmap pages to wait with reading until bitmap is flushed.
Use TAIL_PAGE_COUNT_MARKER for tail pages
Set 'sub_blocks' for and only for the head page or for the first extent of a blob. This is needed for store_extent_info() to be able to set START_EXTENT_BIT's
Don't allocate more than 0x3ffff pages in one extent (We need bit 0x4000 as a START_EXTENT_BIT)
Increase the calculated 'head_length' with the number of bytes used for extents.
Update row->space_on_head_page also in _ma_bitmap_find_new_place()
Make _ma_bitmap_get_page_bits() global. (Needed for UNDO handling)
Changed _ma_bitmap_flushable() to take MARIA_HA instead of MARIA_SHARE.
This was needed to be able to mark the handler if we had a 'non_flushable' call pending or not.
Don't use 'non_flushable' marking of bitmap pages for not transactional tables.
Added BLOCKUSED_USE_ORG_BITMAP handling also for tail pages.
Added more DBUG_ASSERT() to find possible errors in other code
Some code simplications by adding new local variables
storage/maria/ma_blockrec.c:
UNDO of rows now puts back all part of the row on their original pages and positions.
Changed UNDO of DELETE and UNDO of UPDATE to contain information about the original length of data on head block and also extent information
This changes a lot of logic as now an insert of a row on a page may happen to any position (and not just to the first or next free)
Use PAGE_COUNT to mark if an extent is the start of of a blob. (Needed for extent_to_bitmap_blocks())
Added check_directory() for checking that directroy entries are correct.
Added checking of row checksums when reading rows (with EXTRA_DEBUG)
Added make_space_for_directory() and extend_directory() for doing expansion of directory
Added get_rowpos_in_head_or_tail_page() to be able to store head/tail on original position in UNDO
Added extent_to_bitmap_blocks() to be able to generate original bitmap blocks from UNDO entry
Added _ma_update_at_original_place() for UNDO of DELETES
Added row->min_length to hold minmum required space needed on head page
Changed find_free_position() to use make_space_for_directory()
Changed make_empty_page() to allow optional creation of directory entry
Changed delete_head_or_tail() and _ma_apply_undo_row_isnert() to not copy pagecache block (speed optimization)
Changed _ma_apply_redo_insert_row_head_or_tail() to be able to insert new row at any position on 'new' page
Changed _ma_apply_undo_row_delete() and _ma_apply_undo_row_update() to put row in it's original position
Ensure allocation of tail blocks are of at least MIN_TAIL_SIZE.
Ensure we store pages in pinned pages even if read failed. (If not we will have pages pinned forever in page cache)
Write original extent information in UNDO entry, not compacted ones (we need position to tails!)
When setting BLOCKUSED_USED, don't clear other bits (we have to preserve BLOCKUSED_USE_ORG_BITMAP)
Fixed som bugs in directory handling
Fixed bug where we wrote wrong lsn to blob pages
Added separate blob_buffer for fixing bug when updating row that had char/varchar that spanned several pages and also had blobs
Ensure we call _ma_bitmap_flushable() also in case of errors
When doing an update, first delete old entries, then search in bitmap for where to put new information
Info->s -> share
Rowid -> rowid
More DBUG_ASSERT()
storage/maria/ma_blockrec.h:
Added START_EXTENT_BIT and TAIL_PAGE_COUNT_MARKER
Added _ma_bitmap_wait_or_flush() and _ma_bitmap_get_page_bits()
storage/maria/ma_check.c:
Don't write extra empty line if there is no deleted blocks
Ignore START_EXTENT_BIT's in page count
Call _ma_fast_unlock_key_del() to free key_del link
storage/maria/ma_close.c:
Ensure that used_key_del is 0. (If not, someone forgot to call _ma_unlock_key_del())
storage/maria/ma_create.c:
Changed constant to macro
storage/maria/ma_delete.c:
For deleted keys, log also position to row
storage/maria/ma_extra.c:
Release blob buffer at maria_reset() if bigger than MARIA_SMALL_BLOB_BUFFER
storage/maria/ma_key_recover.c:
Added bzero() of LSN that confused paged cache in case of uninitialized block
Mark file crashed if applying of index changes fails
Added calls to _ma_fast_unlock_key_del() for protection of shared key_del link.
storage/maria/ma_locking.c:
Added usage of MARIA_FILE_OPEN_COUNT_OFFSET
Added _ma_mark_file_crashed()
storage/maria/ma_loghandler.c:
Fixed bug where we logged uninitialized memory
storage/maria/ma_open.c:
Moved state->changed to be at start of state info on disk to allow one to easly mark files as crashed
storage/maria/ma_page.c:
Disable 'dummy' checksumming of pages as this gave false warnings.
(Need to investigate if this is ever needed)
storage/maria/ma_pagecache.c:
Fixed wrong shift that caused Maria to crash on files > 4G
storage/maria/ma_recovery.c:
In case of errors, start writing on new line if we where in %## %## printing mode (Made errors more readable)
Changed global variable name from warnings -> recovery_warnings
Use MARIA_FILE_CREATE_RENAME_LSN_OFFSET instead of constant
Removed special handling of row position for deleted keys. Keys now always includes row positions
_ma_apply_undo_row_delete() now gets page and row position
Added check that we don't loop forever when handling undo's (in case of bug in undo chain)
Print name of failed REDO/UNDO
storage/maria/ma_recovery.h:
Removed old comment
storage/maria/ma_static.c:
Chaned version number of Maria files to not accidently use old ones (becasue of change of ordering of status variables)
storage/maria/ma_test2.c:
Added option -u to specify number of rows to update
Changed old option -u to be -A, as for ma_test1
Fixed bug in update of rows with blobs (before blobs was always reset to empty on update)
First created blob is now of max blob length to ensure we have at least one big blob in the table
storage/maria/ma_test_all.sh:
More tests
storage/maria/ma_test_recovery.expected:
Updated results
storage/maria/ma_test_recovery:
Changed tests to use bigger blobs (not just 1K)
Added new tests that tests recovery of update with blobs
Removed comparision of .MAD file as it's not guranteed that recovery from scratch gives identical data file as original update
(compact_page() may be called at different times during normal execution and during REDO)
storage/maria/ma_update.c:
Simplify code (changed * to if)
storage/maria/maria_chk.c:
Make output more readable
storage/maria/maria_def.h:
Changed 'changed' to int to prepare for more bits
Added 2 more bytes to status information
Added 'st_mara_row->min_length' for storing min length needed on head page
Added 'st_mara_handler->blob_buff & blob_buff_size' for storing blobs
Moved all tunning parameters into one block
Added MARIA_SMALL_BLOB_BUFFER
Added _ma_mark_file_crashed()
storage/myisam/mi_test2.c:
Fixed bug in update of rows with blobs (before blobs was always reset to empty on update)
storage/maria/ma_test_big.sh:
Testing of insert, update, delete, recovery and undo of rows with blobs
Thanks to the random-ness of ma_test2 this is likely to find most bugs in the row handling
2007-12-30 21:40:03 +01:00
|
|
|
/*
|
2008-01-07 17:54:41 +01:00
|
|
|
The + 3 is reserved for storing the number of segments in the row header.
|
UNDO of rows now puts back all part of the row on their original pages and positions
Added variable _dbug_on_ to speed up execution when DBUG is not going to be used
Added --debug-on option to mysqld (to be able to turn of DBUG with --debug-on=0)
Fixed some bugs with 'non_flushable' marking of bitmap pages
Don't use 'non_flushable' marking of bitmap pages for not transactional tables
SHOW CREATE TABLE now shows if table was created with page checksums
Fixed a lot of bugs with BLOB handling in case of update/REDO and UNDO
More tests (especially for blobs) and DBUG_ASSERTS()
More readable output from maria_read_log and maria_chk
Fixed wrong shift that caused Maria to crash on files > 4G
Mark tables as crashed of REDO fails
dbug/dbug.c:
Changed to use my_bool (allowed me to remove some windows specific code)
Added variable _dbug_on_ to speed up execution when DBUG is not going to be used
Removed initialization of variables if not needed
include/my_dbug.h:
Use my_bool for some functions that was defined as BOOLEAN in dbug.c code
Added DBUGGER_ON/DEBUGGER_OFF to speed up execution when DBUG is not used
include/my_global.h:
Define my_bool early
Increase MY_HOW_OFTEN_TO_WRITE as computers are now faster than 10 years ago
mysql-test/mysql-test-run.pl:
Added debug-on=0 to speed up tests
mysql-test/r/maria-recovery.result:
Added new test by Guilhem to test if UNDO_ROW_DELETE preserves rowid
mysql-test/r/maria.result:
Added testing of page checksums
mysql-test/t/crash_commit_before-master.opt:
Added --debug-on as test require DBUG to work
mysql-test/t/maria-recovery-bitmap-master.opt:
Added --debug-on as test require DBUG to work
mysql-test/t/maria-recovery-master.opt:
Added --debug-on as test require DBUG to work
mysql-test/t/maria-recovery.test:
Added new test by Guilhem to test if UNDO_ROW_DELETE preserves rowid
mysql-test/t/maria.test:
Added testing of page checksums
sql/mysqld.cc:
Added --debug-on option (to be able to turn of DBUG with --debug-on=0)
Indentation fixes
Removed end spaces
sql/sql_show.cc:
Allow update_create_info() to inform MySQL if PACK_KEYS, NO_PACK_KEYS, CHECKSUM, PAGE_CHECKSUM or DELAY_KEY_WRITE is used
storage/maria/Makefile.am:
Added ma_test_big.sh
storage/maria/ha_maria.cc:
Store in create_info if page checksums are used (For SHOW CREATE TABLE)
storage/maria/ma_bitmap.c:
Added _ma_bitmap_wait_or_flush() to cause reader of bitmap pages to wait with reading until bitmap is flushed.
Use TAIL_PAGE_COUNT_MARKER for tail pages
Set 'sub_blocks' for and only for the head page or for the first extent of a blob. This is needed for store_extent_info() to be able to set START_EXTENT_BIT's
Don't allocate more than 0x3ffff pages in one extent (We need bit 0x4000 as a START_EXTENT_BIT)
Increase the calculated 'head_length' with the number of bytes used for extents.
Update row->space_on_head_page also in _ma_bitmap_find_new_place()
Make _ma_bitmap_get_page_bits() global. (Needed for UNDO handling)
Changed _ma_bitmap_flushable() to take MARIA_HA instead of MARIA_SHARE.
This was needed to be able to mark the handler if we had a 'non_flushable' call pending or not.
Don't use 'non_flushable' marking of bitmap pages for not transactional tables.
Added BLOCKUSED_USE_ORG_BITMAP handling also for tail pages.
Added more DBUG_ASSERT() to find possible errors in other code
Some code simplications by adding new local variables
storage/maria/ma_blockrec.c:
UNDO of rows now puts back all part of the row on their original pages and positions.
Changed UNDO of DELETE and UNDO of UPDATE to contain information about the original length of data on head block and also extent information
This changes a lot of logic as now an insert of a row on a page may happen to any position (and not just to the first or next free)
Use PAGE_COUNT to mark if an extent is the start of of a blob. (Needed for extent_to_bitmap_blocks())
Added check_directory() for checking that directroy entries are correct.
Added checking of row checksums when reading rows (with EXTRA_DEBUG)
Added make_space_for_directory() and extend_directory() for doing expansion of directory
Added get_rowpos_in_head_or_tail_page() to be able to store head/tail on original position in UNDO
Added extent_to_bitmap_blocks() to be able to generate original bitmap blocks from UNDO entry
Added _ma_update_at_original_place() for UNDO of DELETES
Added row->min_length to hold minmum required space needed on head page
Changed find_free_position() to use make_space_for_directory()
Changed make_empty_page() to allow optional creation of directory entry
Changed delete_head_or_tail() and _ma_apply_undo_row_isnert() to not copy pagecache block (speed optimization)
Changed _ma_apply_redo_insert_row_head_or_tail() to be able to insert new row at any position on 'new' page
Changed _ma_apply_undo_row_delete() and _ma_apply_undo_row_update() to put row in it's original position
Ensure allocation of tail blocks are of at least MIN_TAIL_SIZE.
Ensure we store pages in pinned pages even if read failed. (If not we will have pages pinned forever in page cache)
Write original extent information in UNDO entry, not compacted ones (we need position to tails!)
When setting BLOCKUSED_USED, don't clear other bits (we have to preserve BLOCKUSED_USE_ORG_BITMAP)
Fixed som bugs in directory handling
Fixed bug where we wrote wrong lsn to blob pages
Added separate blob_buffer for fixing bug when updating row that had char/varchar that spanned several pages and also had blobs
Ensure we call _ma_bitmap_flushable() also in case of errors
When doing an update, first delete old entries, then search in bitmap for where to put new information
Info->s -> share
Rowid -> rowid
More DBUG_ASSERT()
storage/maria/ma_blockrec.h:
Added START_EXTENT_BIT and TAIL_PAGE_COUNT_MARKER
Added _ma_bitmap_wait_or_flush() and _ma_bitmap_get_page_bits()
storage/maria/ma_check.c:
Don't write extra empty line if there is no deleted blocks
Ignore START_EXTENT_BIT's in page count
Call _ma_fast_unlock_key_del() to free key_del link
storage/maria/ma_close.c:
Ensure that used_key_del is 0. (If not, someone forgot to call _ma_unlock_key_del())
storage/maria/ma_create.c:
Changed constant to macro
storage/maria/ma_delete.c:
For deleted keys, log also position to row
storage/maria/ma_extra.c:
Release blob buffer at maria_reset() if bigger than MARIA_SMALL_BLOB_BUFFER
storage/maria/ma_key_recover.c:
Added bzero() of LSN that confused paged cache in case of uninitialized block
Mark file crashed if applying of index changes fails
Added calls to _ma_fast_unlock_key_del() for protection of shared key_del link.
storage/maria/ma_locking.c:
Added usage of MARIA_FILE_OPEN_COUNT_OFFSET
Added _ma_mark_file_crashed()
storage/maria/ma_loghandler.c:
Fixed bug where we logged uninitialized memory
storage/maria/ma_open.c:
Moved state->changed to be at start of state info on disk to allow one to easly mark files as crashed
storage/maria/ma_page.c:
Disable 'dummy' checksumming of pages as this gave false warnings.
(Need to investigate if this is ever needed)
storage/maria/ma_pagecache.c:
Fixed wrong shift that caused Maria to crash on files > 4G
storage/maria/ma_recovery.c:
In case of errors, start writing on new line if we where in %## %## printing mode (Made errors more readable)
Changed global variable name from warnings -> recovery_warnings
Use MARIA_FILE_CREATE_RENAME_LSN_OFFSET instead of constant
Removed special handling of row position for deleted keys. Keys now always includes row positions
_ma_apply_undo_row_delete() now gets page and row position
Added check that we don't loop forever when handling undo's (in case of bug in undo chain)
Print name of failed REDO/UNDO
storage/maria/ma_recovery.h:
Removed old comment
storage/maria/ma_static.c:
Chaned version number of Maria files to not accidently use old ones (becasue of change of ordering of status variables)
storage/maria/ma_test2.c:
Added option -u to specify number of rows to update
Changed old option -u to be -A, as for ma_test1
Fixed bug in update of rows with blobs (before blobs was always reset to empty on update)
First created blob is now of max blob length to ensure we have at least one big blob in the table
storage/maria/ma_test_all.sh:
More tests
storage/maria/ma_test_recovery.expected:
Updated results
storage/maria/ma_test_recovery:
Changed tests to use bigger blobs (not just 1K)
Added new tests that tests recovery of update with blobs
Removed comparision of .MAD file as it's not guranteed that recovery from scratch gives identical data file as original update
(compact_page() may be called at different times during normal execution and during REDO)
storage/maria/ma_update.c:
Simplify code (changed * to if)
storage/maria/maria_chk.c:
Make output more readable
storage/maria/maria_def.h:
Changed 'changed' to int to prepare for more bits
Added 2 more bytes to status information
Added 'st_mara_row->min_length' for storing min length needed on head page
Added 'st_mara_handler->blob_buff & blob_buff_size' for storing blobs
Moved all tunning parameters into one block
Added MARIA_SMALL_BLOB_BUFFER
Added _ma_mark_file_crashed()
storage/myisam/mi_test2.c:
Fixed bug in update of rows with blobs (before blobs was always reset to empty on update)
storage/maria/ma_test_big.sh:
Testing of insert, update, delete, recovery and undo of rows with blobs
Thanks to the random-ness of ma_test2 this is likely to find most bugs in the row handling
2007-12-30 21:40:03 +01:00
|
|
|
*/
|
|
|
|
if ((head_length= (row->head_length + extents_length + 3)) <=
|
|
|
|
max_page_size)
|
2007-01-18 20:38:14 +01:00
|
|
|
{
|
|
|
|
/* Main row part fits into one page */
|
|
|
|
position= ELEMENTS_RESERVED_FOR_MAIN_PART - 1;
|
|
|
|
if (find_head(info, head_length, position))
|
|
|
|
goto abort;
|
2007-09-05 01:57:53 +02:00
|
|
|
row->space_on_head_page= head_length;
|
2007-01-18 20:38:14 +01:00
|
|
|
goto end;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Allocate enough space */
|
|
|
|
head_length+= ELEMENTS_RESERVED_FOR_MAIN_PART * ROW_EXTENT_SIZE;
|
|
|
|
|
|
|
|
/* The first segment size is stored in 'row_length' */
|
2011-08-15 15:39:53 +02:00
|
|
|
row_length= find_where_to_split_row(share, row, row->extents_count +
|
|
|
|
ELEMENTS_RESERVED_FOR_MAIN_PART-1,
|
2007-01-18 20:38:14 +01:00
|
|
|
max_page_size);
|
|
|
|
|
2010-10-07 11:51:34 +02:00
|
|
|
full_page_size= MAX_TAIL_SIZE(share->block_size);
|
2007-01-18 20:38:14 +01:00
|
|
|
position= 0;
|
2011-08-15 15:39:53 +02:00
|
|
|
rest_length= head_length - row_length;
|
|
|
|
if (rest_length <= full_page_size)
|
2007-01-18 20:38:14 +01:00
|
|
|
position= ELEMENTS_RESERVED_FOR_MAIN_PART -2; /* Only head and tail */
|
|
|
|
if (find_head(info, row_length, position))
|
|
|
|
goto abort;
|
2007-09-05 01:57:53 +02:00
|
|
|
row->space_on_head_page= row_length;
|
UNDO of rows now puts back all part of the row on their original pages and positions
Added variable _dbug_on_ to speed up execution when DBUG is not going to be used
Added --debug-on option to mysqld (to be able to turn of DBUG with --debug-on=0)
Fixed some bugs with 'non_flushable' marking of bitmap pages
Don't use 'non_flushable' marking of bitmap pages for not transactional tables
SHOW CREATE TABLE now shows if table was created with page checksums
Fixed a lot of bugs with BLOB handling in case of update/REDO and UNDO
More tests (especially for blobs) and DBUG_ASSERTS()
More readable output from maria_read_log and maria_chk
Fixed wrong shift that caused Maria to crash on files > 4G
Mark tables as crashed of REDO fails
dbug/dbug.c:
Changed to use my_bool (allowed me to remove some windows specific code)
Added variable _dbug_on_ to speed up execution when DBUG is not going to be used
Removed initialization of variables if not needed
include/my_dbug.h:
Use my_bool for some functions that was defined as BOOLEAN in dbug.c code
Added DBUGGER_ON/DEBUGGER_OFF to speed up execution when DBUG is not used
include/my_global.h:
Define my_bool early
Increase MY_HOW_OFTEN_TO_WRITE as computers are now faster than 10 years ago
mysql-test/mysql-test-run.pl:
Added debug-on=0 to speed up tests
mysql-test/r/maria-recovery.result:
Added new test by Guilhem to test if UNDO_ROW_DELETE preserves rowid
mysql-test/r/maria.result:
Added testing of page checksums
mysql-test/t/crash_commit_before-master.opt:
Added --debug-on as test require DBUG to work
mysql-test/t/maria-recovery-bitmap-master.opt:
Added --debug-on as test require DBUG to work
mysql-test/t/maria-recovery-master.opt:
Added --debug-on as test require DBUG to work
mysql-test/t/maria-recovery.test:
Added new test by Guilhem to test if UNDO_ROW_DELETE preserves rowid
mysql-test/t/maria.test:
Added testing of page checksums
sql/mysqld.cc:
Added --debug-on option (to be able to turn of DBUG with --debug-on=0)
Indentation fixes
Removed end spaces
sql/sql_show.cc:
Allow update_create_info() to inform MySQL if PACK_KEYS, NO_PACK_KEYS, CHECKSUM, PAGE_CHECKSUM or DELAY_KEY_WRITE is used
storage/maria/Makefile.am:
Added ma_test_big.sh
storage/maria/ha_maria.cc:
Store in create_info if page checksums are used (For SHOW CREATE TABLE)
storage/maria/ma_bitmap.c:
Added _ma_bitmap_wait_or_flush() to cause reader of bitmap pages to wait with reading until bitmap is flushed.
Use TAIL_PAGE_COUNT_MARKER for tail pages
Set 'sub_blocks' for and only for the head page or for the first extent of a blob. This is needed for store_extent_info() to be able to set START_EXTENT_BIT's
Don't allocate more than 0x3ffff pages in one extent (We need bit 0x4000 as a START_EXTENT_BIT)
Increase the calculated 'head_length' with the number of bytes used for extents.
Update row->space_on_head_page also in _ma_bitmap_find_new_place()
Make _ma_bitmap_get_page_bits() global. (Needed for UNDO handling)
Changed _ma_bitmap_flushable() to take MARIA_HA instead of MARIA_SHARE.
This was needed to be able to mark the handler if we had a 'non_flushable' call pending or not.
Don't use 'non_flushable' marking of bitmap pages for not transactional tables.
Added BLOCKUSED_USE_ORG_BITMAP handling also for tail pages.
Added more DBUG_ASSERT() to find possible errors in other code
Some code simplications by adding new local variables
storage/maria/ma_blockrec.c:
UNDO of rows now puts back all part of the row on their original pages and positions.
Changed UNDO of DELETE and UNDO of UPDATE to contain information about the original length of data on head block and also extent information
This changes a lot of logic as now an insert of a row on a page may happen to any position (and not just to the first or next free)
Use PAGE_COUNT to mark if an extent is the start of of a blob. (Needed for extent_to_bitmap_blocks())
Added check_directory() for checking that directroy entries are correct.
Added checking of row checksums when reading rows (with EXTRA_DEBUG)
Added make_space_for_directory() and extend_directory() for doing expansion of directory
Added get_rowpos_in_head_or_tail_page() to be able to store head/tail on original position in UNDO
Added extent_to_bitmap_blocks() to be able to generate original bitmap blocks from UNDO entry
Added _ma_update_at_original_place() for UNDO of DELETES
Added row->min_length to hold minmum required space needed on head page
Changed find_free_position() to use make_space_for_directory()
Changed make_empty_page() to allow optional creation of directory entry
Changed delete_head_or_tail() and _ma_apply_undo_row_isnert() to not copy pagecache block (speed optimization)
Changed _ma_apply_redo_insert_row_head_or_tail() to be able to insert new row at any position on 'new' page
Changed _ma_apply_undo_row_delete() and _ma_apply_undo_row_update() to put row in it's original position
Ensure allocation of tail blocks are of at least MIN_TAIL_SIZE.
Ensure we store pages in pinned pages even if read failed. (If not we will have pages pinned forever in page cache)
Write original extent information in UNDO entry, not compacted ones (we need position to tails!)
When setting BLOCKUSED_USED, don't clear other bits (we have to preserve BLOCKUSED_USE_ORG_BITMAP)
Fixed som bugs in directory handling
Fixed bug where we wrote wrong lsn to blob pages
Added separate blob_buffer for fixing bug when updating row that had char/varchar that spanned several pages and also had blobs
Ensure we call _ma_bitmap_flushable() also in case of errors
When doing an update, first delete old entries, then search in bitmap for where to put new information
Info->s -> share
Rowid -> rowid
More DBUG_ASSERT()
storage/maria/ma_blockrec.h:
Added START_EXTENT_BIT and TAIL_PAGE_COUNT_MARKER
Added _ma_bitmap_wait_or_flush() and _ma_bitmap_get_page_bits()
storage/maria/ma_check.c:
Don't write extra empty line if there is no deleted blocks
Ignore START_EXTENT_BIT's in page count
Call _ma_fast_unlock_key_del() to free key_del link
storage/maria/ma_close.c:
Ensure that used_key_del is 0. (If not, someone forgot to call _ma_unlock_key_del())
storage/maria/ma_create.c:
Changed constant to macro
storage/maria/ma_delete.c:
For deleted keys, log also position to row
storage/maria/ma_extra.c:
Release blob buffer at maria_reset() if bigger than MARIA_SMALL_BLOB_BUFFER
storage/maria/ma_key_recover.c:
Added bzero() of LSN that confused paged cache in case of uninitialized block
Mark file crashed if applying of index changes fails
Added calls to _ma_fast_unlock_key_del() for protection of shared key_del link.
storage/maria/ma_locking.c:
Added usage of MARIA_FILE_OPEN_COUNT_OFFSET
Added _ma_mark_file_crashed()
storage/maria/ma_loghandler.c:
Fixed bug where we logged uninitialized memory
storage/maria/ma_open.c:
Moved state->changed to be at start of state info on disk to allow one to easly mark files as crashed
storage/maria/ma_page.c:
Disable 'dummy' checksumming of pages as this gave false warnings.
(Need to investigate if this is ever needed)
storage/maria/ma_pagecache.c:
Fixed wrong shift that caused Maria to crash on files > 4G
storage/maria/ma_recovery.c:
In case of errors, start writing on new line if we where in %## %## printing mode (Made errors more readable)
Changed global variable name from warnings -> recovery_warnings
Use MARIA_FILE_CREATE_RENAME_LSN_OFFSET instead of constant
Removed special handling of row position for deleted keys. Keys now always includes row positions
_ma_apply_undo_row_delete() now gets page and row position
Added check that we don't loop forever when handling undo's (in case of bug in undo chain)
Print name of failed REDO/UNDO
storage/maria/ma_recovery.h:
Removed old comment
storage/maria/ma_static.c:
Chaned version number of Maria files to not accidently use old ones (becasue of change of ordering of status variables)
storage/maria/ma_test2.c:
Added option -u to specify number of rows to update
Changed old option -u to be -A, as for ma_test1
Fixed bug in update of rows with blobs (before blobs was always reset to empty on update)
First created blob is now of max blob length to ensure we have at least one big blob in the table
storage/maria/ma_test_all.sh:
More tests
storage/maria/ma_test_recovery.expected:
Updated results
storage/maria/ma_test_recovery:
Changed tests to use bigger blobs (not just 1K)
Added new tests that tests recovery of update with blobs
Removed comparision of .MAD file as it's not guranteed that recovery from scratch gives identical data file as original update
(compact_page() may be called at different times during normal execution and during REDO)
storage/maria/ma_update.c:
Simplify code (changed * to if)
storage/maria/maria_chk.c:
Make output more readable
storage/maria/maria_def.h:
Changed 'changed' to int to prepare for more bits
Added 2 more bytes to status information
Added 'st_mara_row->min_length' for storing min length needed on head page
Added 'st_mara_handler->blob_buff & blob_buff_size' for storing blobs
Moved all tunning parameters into one block
Added MARIA_SMALL_BLOB_BUFFER
Added _ma_mark_file_crashed()
storage/myisam/mi_test2.c:
Fixed bug in update of rows with blobs (before blobs was always reset to empty on update)
storage/maria/ma_test_big.sh:
Testing of insert, update, delete, recovery and undo of rows with blobs
Thanks to the random-ness of ma_test2 this is likely to find most bugs in the row handling
2007-12-30 21:40:03 +01:00
|
|
|
|
2007-01-18 20:38:14 +01:00
|
|
|
if (write_rest_of_head(info, position, rest_length))
|
|
|
|
goto abort;
|
|
|
|
|
|
|
|
end:
|
|
|
|
blocks->block= dynamic_element(&info->bitmap_blocks, position,
|
|
|
|
MARIA_BITMAP_BLOCK*);
|
|
|
|
blocks->block->sub_blocks= ELEMENTS_RESERVED_FOR_MAIN_PART - position;
|
|
|
|
/* First block's page_count is for all blocks */
|
|
|
|
blocks->count= info->bitmap_blocks.elements - position;
|
|
|
|
res= 0;
|
|
|
|
|
|
|
|
abort:
|
2011-07-13 21:10:18 +02:00
|
|
|
mysql_mutex_unlock(&share->bitmap.bitmap_lock);
|
2007-01-18 20:38:14 +01:00
|
|
|
DBUG_RETURN(res);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
Find where to put row on update (when head page is already defined)
|
|
|
|
|
|
|
|
SYNPOSIS
|
|
|
|
_ma_bitmap_find_new_place()
|
|
|
|
info Maria handler
|
|
|
|
row Information about row to write
|
|
|
|
page On which page original row was stored
|
|
|
|
free_size Free size on head page
|
|
|
|
blocks Store data about allocated places here
|
|
|
|
|
|
|
|
NOTES
|
|
|
|
This function is only called when the new row can't fit in the space of
|
|
|
|
the old row in the head page.
|
|
|
|
|
|
|
|
This is essently same as _ma_bitmap_find_place() except that
|
|
|
|
we don't call find_head() to search in bitmaps where to put the page.
|
|
|
|
|
|
|
|
RETURN
|
|
|
|
0 ok
|
|
|
|
1 error
|
|
|
|
*/
|
|
|
|
|
|
|
|
my_bool _ma_bitmap_find_new_place(MARIA_HA *info, MARIA_ROW *row,
|
2008-01-10 20:21:36 +01:00
|
|
|
pgcache_page_no_t page, uint free_size,
|
2007-01-18 20:38:14 +01:00
|
|
|
MARIA_BITMAP_BLOCKS *blocks)
|
|
|
|
{
|
|
|
|
MARIA_SHARE *share= info->s;
|
|
|
|
my_bool res= 1;
|
2007-11-28 20:38:30 +01:00
|
|
|
uint position;
|
2007-01-18 20:38:14 +01:00
|
|
|
uint head_length, row_length, rest_length, extents_length;
|
2007-10-19 23:24:22 +02:00
|
|
|
ulonglong bitmap_page;
|
2007-01-18 20:38:14 +01:00
|
|
|
DBUG_ENTER("_ma_bitmap_find_new_place");
|
|
|
|
|
|
|
|
blocks->count= 0;
|
|
|
|
blocks->tail_page_skipped= blocks->page_skipped= 0;
|
|
|
|
row->extents_count= 0;
|
|
|
|
info->bitmap_blocks.elements= ELEMENTS_RESERVED_FOR_MAIN_PART;
|
|
|
|
|
2011-07-13 21:10:18 +02:00
|
|
|
mysql_mutex_lock(&share->bitmap.bitmap_lock);
|
2007-01-18 20:38:14 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
First allocate all blobs (so that we can find out the needed size for
|
|
|
|
the main block.
|
|
|
|
*/
|
|
|
|
if (row->blob_length && allocate_blobs(info, row))
|
|
|
|
goto abort;
|
|
|
|
|
2008-01-10 20:21:36 +01:00
|
|
|
/* Switch bitmap to current head page */
|
2011-06-29 23:37:12 +02:00
|
|
|
bitmap_page= page - page % share->bitmap.pages_covered;
|
2008-01-10 20:21:36 +01:00
|
|
|
|
|
|
|
if (share->bitmap.page != bitmap_page &&
|
|
|
|
_ma_change_bitmap_page(info, &share->bitmap, bitmap_page))
|
|
|
|
goto abort;
|
|
|
|
|
2007-01-18 20:38:14 +01:00
|
|
|
extents_length= row->extents_count * ROW_EXTENT_SIZE;
|
UNDO of rows now puts back all part of the row on their original pages and positions
Added variable _dbug_on_ to speed up execution when DBUG is not going to be used
Added --debug-on option to mysqld (to be able to turn of DBUG with --debug-on=0)
Fixed some bugs with 'non_flushable' marking of bitmap pages
Don't use 'non_flushable' marking of bitmap pages for not transactional tables
SHOW CREATE TABLE now shows if table was created with page checksums
Fixed a lot of bugs with BLOB handling in case of update/REDO and UNDO
More tests (especially for blobs) and DBUG_ASSERTS()
More readable output from maria_read_log and maria_chk
Fixed wrong shift that caused Maria to crash on files > 4G
Mark tables as crashed of REDO fails
dbug/dbug.c:
Changed to use my_bool (allowed me to remove some windows specific code)
Added variable _dbug_on_ to speed up execution when DBUG is not going to be used
Removed initialization of variables if not needed
include/my_dbug.h:
Use my_bool for some functions that was defined as BOOLEAN in dbug.c code
Added DBUGGER_ON/DEBUGGER_OFF to speed up execution when DBUG is not used
include/my_global.h:
Define my_bool early
Increase MY_HOW_OFTEN_TO_WRITE as computers are now faster than 10 years ago
mysql-test/mysql-test-run.pl:
Added debug-on=0 to speed up tests
mysql-test/r/maria-recovery.result:
Added new test by Guilhem to test if UNDO_ROW_DELETE preserves rowid
mysql-test/r/maria.result:
Added testing of page checksums
mysql-test/t/crash_commit_before-master.opt:
Added --debug-on as test require DBUG to work
mysql-test/t/maria-recovery-bitmap-master.opt:
Added --debug-on as test require DBUG to work
mysql-test/t/maria-recovery-master.opt:
Added --debug-on as test require DBUG to work
mysql-test/t/maria-recovery.test:
Added new test by Guilhem to test if UNDO_ROW_DELETE preserves rowid
mysql-test/t/maria.test:
Added testing of page checksums
sql/mysqld.cc:
Added --debug-on option (to be able to turn of DBUG with --debug-on=0)
Indentation fixes
Removed end spaces
sql/sql_show.cc:
Allow update_create_info() to inform MySQL if PACK_KEYS, NO_PACK_KEYS, CHECKSUM, PAGE_CHECKSUM or DELAY_KEY_WRITE is used
storage/maria/Makefile.am:
Added ma_test_big.sh
storage/maria/ha_maria.cc:
Store in create_info if page checksums are used (For SHOW CREATE TABLE)
storage/maria/ma_bitmap.c:
Added _ma_bitmap_wait_or_flush() to cause reader of bitmap pages to wait with reading until bitmap is flushed.
Use TAIL_PAGE_COUNT_MARKER for tail pages
Set 'sub_blocks' for and only for the head page or for the first extent of a blob. This is needed for store_extent_info() to be able to set START_EXTENT_BIT's
Don't allocate more than 0x3ffff pages in one extent (We need bit 0x4000 as a START_EXTENT_BIT)
Increase the calculated 'head_length' with the number of bytes used for extents.
Update row->space_on_head_page also in _ma_bitmap_find_new_place()
Make _ma_bitmap_get_page_bits() global. (Needed for UNDO handling)
Changed _ma_bitmap_flushable() to take MARIA_HA instead of MARIA_SHARE.
This was needed to be able to mark the handler if we had a 'non_flushable' call pending or not.
Don't use 'non_flushable' marking of bitmap pages for not transactional tables.
Added BLOCKUSED_USE_ORG_BITMAP handling also for tail pages.
Added more DBUG_ASSERT() to find possible errors in other code
Some code simplications by adding new local variables
storage/maria/ma_blockrec.c:
UNDO of rows now puts back all part of the row on their original pages and positions.
Changed UNDO of DELETE and UNDO of UPDATE to contain information about the original length of data on head block and also extent information
This changes a lot of logic as now an insert of a row on a page may happen to any position (and not just to the first or next free)
Use PAGE_COUNT to mark if an extent is the start of of a blob. (Needed for extent_to_bitmap_blocks())
Added check_directory() for checking that directroy entries are correct.
Added checking of row checksums when reading rows (with EXTRA_DEBUG)
Added make_space_for_directory() and extend_directory() for doing expansion of directory
Added get_rowpos_in_head_or_tail_page() to be able to store head/tail on original position in UNDO
Added extent_to_bitmap_blocks() to be able to generate original bitmap blocks from UNDO entry
Added _ma_update_at_original_place() for UNDO of DELETES
Added row->min_length to hold minmum required space needed on head page
Changed find_free_position() to use make_space_for_directory()
Changed make_empty_page() to allow optional creation of directory entry
Changed delete_head_or_tail() and _ma_apply_undo_row_isnert() to not copy pagecache block (speed optimization)
Changed _ma_apply_redo_insert_row_head_or_tail() to be able to insert new row at any position on 'new' page
Changed _ma_apply_undo_row_delete() and _ma_apply_undo_row_update() to put row in it's original position
Ensure allocation of tail blocks are of at least MIN_TAIL_SIZE.
Ensure we store pages in pinned pages even if read failed. (If not we will have pages pinned forever in page cache)
Write original extent information in UNDO entry, not compacted ones (we need position to tails!)
When setting BLOCKUSED_USED, don't clear other bits (we have to preserve BLOCKUSED_USE_ORG_BITMAP)
Fixed som bugs in directory handling
Fixed bug where we wrote wrong lsn to blob pages
Added separate blob_buffer for fixing bug when updating row that had char/varchar that spanned several pages and also had blobs
Ensure we call _ma_bitmap_flushable() also in case of errors
When doing an update, first delete old entries, then search in bitmap for where to put new information
Info->s -> share
Rowid -> rowid
More DBUG_ASSERT()
storage/maria/ma_blockrec.h:
Added START_EXTENT_BIT and TAIL_PAGE_COUNT_MARKER
Added _ma_bitmap_wait_or_flush() and _ma_bitmap_get_page_bits()
storage/maria/ma_check.c:
Don't write extra empty line if there is no deleted blocks
Ignore START_EXTENT_BIT's in page count
Call _ma_fast_unlock_key_del() to free key_del link
storage/maria/ma_close.c:
Ensure that used_key_del is 0. (If not, someone forgot to call _ma_unlock_key_del())
storage/maria/ma_create.c:
Changed constant to macro
storage/maria/ma_delete.c:
For deleted keys, log also position to row
storage/maria/ma_extra.c:
Release blob buffer at maria_reset() if bigger than MARIA_SMALL_BLOB_BUFFER
storage/maria/ma_key_recover.c:
Added bzero() of LSN that confused paged cache in case of uninitialized block
Mark file crashed if applying of index changes fails
Added calls to _ma_fast_unlock_key_del() for protection of shared key_del link.
storage/maria/ma_locking.c:
Added usage of MARIA_FILE_OPEN_COUNT_OFFSET
Added _ma_mark_file_crashed()
storage/maria/ma_loghandler.c:
Fixed bug where we logged uninitialized memory
storage/maria/ma_open.c:
Moved state->changed to be at start of state info on disk to allow one to easly mark files as crashed
storage/maria/ma_page.c:
Disable 'dummy' checksumming of pages as this gave false warnings.
(Need to investigate if this is ever needed)
storage/maria/ma_pagecache.c:
Fixed wrong shift that caused Maria to crash on files > 4G
storage/maria/ma_recovery.c:
In case of errors, start writing on new line if we where in %## %## printing mode (Made errors more readable)
Changed global variable name from warnings -> recovery_warnings
Use MARIA_FILE_CREATE_RENAME_LSN_OFFSET instead of constant
Removed special handling of row position for deleted keys. Keys now always includes row positions
_ma_apply_undo_row_delete() now gets page and row position
Added check that we don't loop forever when handling undo's (in case of bug in undo chain)
Print name of failed REDO/UNDO
storage/maria/ma_recovery.h:
Removed old comment
storage/maria/ma_static.c:
Chaned version number of Maria files to not accidently use old ones (becasue of change of ordering of status variables)
storage/maria/ma_test2.c:
Added option -u to specify number of rows to update
Changed old option -u to be -A, as for ma_test1
Fixed bug in update of rows with blobs (before blobs was always reset to empty on update)
First created blob is now of max blob length to ensure we have at least one big blob in the table
storage/maria/ma_test_all.sh:
More tests
storage/maria/ma_test_recovery.expected:
Updated results
storage/maria/ma_test_recovery:
Changed tests to use bigger blobs (not just 1K)
Added new tests that tests recovery of update with blobs
Removed comparision of .MAD file as it's not guranteed that recovery from scratch gives identical data file as original update
(compact_page() may be called at different times during normal execution and during REDO)
storage/maria/ma_update.c:
Simplify code (changed * to if)
storage/maria/maria_chk.c:
Make output more readable
storage/maria/maria_def.h:
Changed 'changed' to int to prepare for more bits
Added 2 more bytes to status information
Added 'st_mara_row->min_length' for storing min length needed on head page
Added 'st_mara_handler->blob_buff & blob_buff_size' for storing blobs
Moved all tunning parameters into one block
Added MARIA_SMALL_BLOB_BUFFER
Added _ma_mark_file_crashed()
storage/myisam/mi_test2.c:
Fixed bug in update of rows with blobs (before blobs was always reset to empty on update)
storage/maria/ma_test_big.sh:
Testing of insert, update, delete, recovery and undo of rows with blobs
Thanks to the random-ness of ma_test2 this is likely to find most bugs in the row handling
2007-12-30 21:40:03 +01:00
|
|
|
if ((head_length= (row->head_length + extents_length + 3)) <= free_size)
|
2007-01-18 20:38:14 +01:00
|
|
|
{
|
|
|
|
/* Main row part fits into one page */
|
|
|
|
position= ELEMENTS_RESERVED_FOR_MAIN_PART - 1;
|
|
|
|
use_head(info, page, head_length, position);
|
UNDO of rows now puts back all part of the row on their original pages and positions
Added variable _dbug_on_ to speed up execution when DBUG is not going to be used
Added --debug-on option to mysqld (to be able to turn of DBUG with --debug-on=0)
Fixed some bugs with 'non_flushable' marking of bitmap pages
Don't use 'non_flushable' marking of bitmap pages for not transactional tables
SHOW CREATE TABLE now shows if table was created with page checksums
Fixed a lot of bugs with BLOB handling in case of update/REDO and UNDO
More tests (especially for blobs) and DBUG_ASSERTS()
More readable output from maria_read_log and maria_chk
Fixed wrong shift that caused Maria to crash on files > 4G
Mark tables as crashed of REDO fails
dbug/dbug.c:
Changed to use my_bool (allowed me to remove some windows specific code)
Added variable _dbug_on_ to speed up execution when DBUG is not going to be used
Removed initialization of variables if not needed
include/my_dbug.h:
Use my_bool for some functions that was defined as BOOLEAN in dbug.c code
Added DBUGGER_ON/DEBUGGER_OFF to speed up execution when DBUG is not used
include/my_global.h:
Define my_bool early
Increase MY_HOW_OFTEN_TO_WRITE as computers are now faster than 10 years ago
mysql-test/mysql-test-run.pl:
Added debug-on=0 to speed up tests
mysql-test/r/maria-recovery.result:
Added new test by Guilhem to test if UNDO_ROW_DELETE preserves rowid
mysql-test/r/maria.result:
Added testing of page checksums
mysql-test/t/crash_commit_before-master.opt:
Added --debug-on as test require DBUG to work
mysql-test/t/maria-recovery-bitmap-master.opt:
Added --debug-on as test require DBUG to work
mysql-test/t/maria-recovery-master.opt:
Added --debug-on as test require DBUG to work
mysql-test/t/maria-recovery.test:
Added new test by Guilhem to test if UNDO_ROW_DELETE preserves rowid
mysql-test/t/maria.test:
Added testing of page checksums
sql/mysqld.cc:
Added --debug-on option (to be able to turn of DBUG with --debug-on=0)
Indentation fixes
Removed end spaces
sql/sql_show.cc:
Allow update_create_info() to inform MySQL if PACK_KEYS, NO_PACK_KEYS, CHECKSUM, PAGE_CHECKSUM or DELAY_KEY_WRITE is used
storage/maria/Makefile.am:
Added ma_test_big.sh
storage/maria/ha_maria.cc:
Store in create_info if page checksums are used (For SHOW CREATE TABLE)
storage/maria/ma_bitmap.c:
Added _ma_bitmap_wait_or_flush() to cause reader of bitmap pages to wait with reading until bitmap is flushed.
Use TAIL_PAGE_COUNT_MARKER for tail pages
Set 'sub_blocks' for and only for the head page or for the first extent of a blob. This is needed for store_extent_info() to be able to set START_EXTENT_BIT's
Don't allocate more than 0x3ffff pages in one extent (We need bit 0x4000 as a START_EXTENT_BIT)
Increase the calculated 'head_length' with the number of bytes used for extents.
Update row->space_on_head_page also in _ma_bitmap_find_new_place()
Make _ma_bitmap_get_page_bits() global. (Needed for UNDO handling)
Changed _ma_bitmap_flushable() to take MARIA_HA instead of MARIA_SHARE.
This was needed to be able to mark the handler if we had a 'non_flushable' call pending or not.
Don't use 'non_flushable' marking of bitmap pages for not transactional tables.
Added BLOCKUSED_USE_ORG_BITMAP handling also for tail pages.
Added more DBUG_ASSERT() to find possible errors in other code
Some code simplications by adding new local variables
storage/maria/ma_blockrec.c:
UNDO of rows now puts back all part of the row on their original pages and positions.
Changed UNDO of DELETE and UNDO of UPDATE to contain information about the original length of data on head block and also extent information
This changes a lot of logic as now an insert of a row on a page may happen to any position (and not just to the first or next free)
Use PAGE_COUNT to mark if an extent is the start of of a blob. (Needed for extent_to_bitmap_blocks())
Added check_directory() for checking that directroy entries are correct.
Added checking of row checksums when reading rows (with EXTRA_DEBUG)
Added make_space_for_directory() and extend_directory() for doing expansion of directory
Added get_rowpos_in_head_or_tail_page() to be able to store head/tail on original position in UNDO
Added extent_to_bitmap_blocks() to be able to generate original bitmap blocks from UNDO entry
Added _ma_update_at_original_place() for UNDO of DELETES
Added row->min_length to hold minmum required space needed on head page
Changed find_free_position() to use make_space_for_directory()
Changed make_empty_page() to allow optional creation of directory entry
Changed delete_head_or_tail() and _ma_apply_undo_row_isnert() to not copy pagecache block (speed optimization)
Changed _ma_apply_redo_insert_row_head_or_tail() to be able to insert new row at any position on 'new' page
Changed _ma_apply_undo_row_delete() and _ma_apply_undo_row_update() to put row in it's original position
Ensure allocation of tail blocks are of at least MIN_TAIL_SIZE.
Ensure we store pages in pinned pages even if read failed. (If not we will have pages pinned forever in page cache)
Write original extent information in UNDO entry, not compacted ones (we need position to tails!)
When setting BLOCKUSED_USED, don't clear other bits (we have to preserve BLOCKUSED_USE_ORG_BITMAP)
Fixed som bugs in directory handling
Fixed bug where we wrote wrong lsn to blob pages
Added separate blob_buffer for fixing bug when updating row that had char/varchar that spanned several pages and also had blobs
Ensure we call _ma_bitmap_flushable() also in case of errors
When doing an update, first delete old entries, then search in bitmap for where to put new information
Info->s -> share
Rowid -> rowid
More DBUG_ASSERT()
storage/maria/ma_blockrec.h:
Added START_EXTENT_BIT and TAIL_PAGE_COUNT_MARKER
Added _ma_bitmap_wait_or_flush() and _ma_bitmap_get_page_bits()
storage/maria/ma_check.c:
Don't write extra empty line if there is no deleted blocks
Ignore START_EXTENT_BIT's in page count
Call _ma_fast_unlock_key_del() to free key_del link
storage/maria/ma_close.c:
Ensure that used_key_del is 0. (If not, someone forgot to call _ma_unlock_key_del())
storage/maria/ma_create.c:
Changed constant to macro
storage/maria/ma_delete.c:
For deleted keys, log also position to row
storage/maria/ma_extra.c:
Release blob buffer at maria_reset() if bigger than MARIA_SMALL_BLOB_BUFFER
storage/maria/ma_key_recover.c:
Added bzero() of LSN that confused paged cache in case of uninitialized block
Mark file crashed if applying of index changes fails
Added calls to _ma_fast_unlock_key_del() for protection of shared key_del link.
storage/maria/ma_locking.c:
Added usage of MARIA_FILE_OPEN_COUNT_OFFSET
Added _ma_mark_file_crashed()
storage/maria/ma_loghandler.c:
Fixed bug where we logged uninitialized memory
storage/maria/ma_open.c:
Moved state->changed to be at start of state info on disk to allow one to easly mark files as crashed
storage/maria/ma_page.c:
Disable 'dummy' checksumming of pages as this gave false warnings.
(Need to investigate if this is ever needed)
storage/maria/ma_pagecache.c:
Fixed wrong shift that caused Maria to crash on files > 4G
storage/maria/ma_recovery.c:
In case of errors, start writing on new line if we where in %## %## printing mode (Made errors more readable)
Changed global variable name from warnings -> recovery_warnings
Use MARIA_FILE_CREATE_RENAME_LSN_OFFSET instead of constant
Removed special handling of row position for deleted keys. Keys now always includes row positions
_ma_apply_undo_row_delete() now gets page and row position
Added check that we don't loop forever when handling undo's (in case of bug in undo chain)
Print name of failed REDO/UNDO
storage/maria/ma_recovery.h:
Removed old comment
storage/maria/ma_static.c:
Chaned version number of Maria files to not accidently use old ones (becasue of change of ordering of status variables)
storage/maria/ma_test2.c:
Added option -u to specify number of rows to update
Changed old option -u to be -A, as for ma_test1
Fixed bug in update of rows with blobs (before blobs was always reset to empty on update)
First created blob is now of max blob length to ensure we have at least one big blob in the table
storage/maria/ma_test_all.sh:
More tests
storage/maria/ma_test_recovery.expected:
Updated results
storage/maria/ma_test_recovery:
Changed tests to use bigger blobs (not just 1K)
Added new tests that tests recovery of update with blobs
Removed comparision of .MAD file as it's not guranteed that recovery from scratch gives identical data file as original update
(compact_page() may be called at different times during normal execution and during REDO)
storage/maria/ma_update.c:
Simplify code (changed * to if)
storage/maria/maria_chk.c:
Make output more readable
storage/maria/maria_def.h:
Changed 'changed' to int to prepare for more bits
Added 2 more bytes to status information
Added 'st_mara_row->min_length' for storing min length needed on head page
Added 'st_mara_handler->blob_buff & blob_buff_size' for storing blobs
Moved all tunning parameters into one block
Added MARIA_SMALL_BLOB_BUFFER
Added _ma_mark_file_crashed()
storage/myisam/mi_test2.c:
Fixed bug in update of rows with blobs (before blobs was always reset to empty on update)
storage/maria/ma_test_big.sh:
Testing of insert, update, delete, recovery and undo of rows with blobs
Thanks to the random-ness of ma_test2 this is likely to find most bugs in the row handling
2007-12-30 21:40:03 +01:00
|
|
|
row->space_on_head_page= head_length;
|
2007-01-18 20:38:14 +01:00
|
|
|
goto end;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Allocate enough space */
|
|
|
|
head_length+= ELEMENTS_RESERVED_FOR_MAIN_PART * ROW_EXTENT_SIZE;
|
|
|
|
|
2011-08-15 15:39:53 +02:00
|
|
|
/*
|
|
|
|
The first segment size is stored in 'row_length'
|
|
|
|
We have to add ELEMENTS_RESERVED_FOR_MAIN_PART here as the extent
|
|
|
|
information may be up to this size when the header splits.
|
|
|
|
*/
|
|
|
|
row_length= find_where_to_split_row(share, row, row->extents_count +
|
|
|
|
ELEMENTS_RESERVED_FOR_MAIN_PART-1,
|
|
|
|
free_size);
|
2007-01-18 20:38:14 +01:00
|
|
|
|
|
|
|
position= 0;
|
2011-08-15 15:39:53 +02:00
|
|
|
rest_length= head_length - row_length;
|
|
|
|
if (rest_length <= MAX_TAIL_SIZE(share->block_size))
|
2007-01-18 20:38:14 +01:00
|
|
|
position= ELEMENTS_RESERVED_FOR_MAIN_PART -2; /* Only head and tail */
|
|
|
|
use_head(info, page, row_length, position);
|
UNDO of rows now puts back all part of the row on their original pages and positions
Added variable _dbug_on_ to speed up execution when DBUG is not going to be used
Added --debug-on option to mysqld (to be able to turn of DBUG with --debug-on=0)
Fixed some bugs with 'non_flushable' marking of bitmap pages
Don't use 'non_flushable' marking of bitmap pages for not transactional tables
SHOW CREATE TABLE now shows if table was created with page checksums
Fixed a lot of bugs with BLOB handling in case of update/REDO and UNDO
More tests (especially for blobs) and DBUG_ASSERTS()
More readable output from maria_read_log and maria_chk
Fixed wrong shift that caused Maria to crash on files > 4G
Mark tables as crashed of REDO fails
dbug/dbug.c:
Changed to use my_bool (allowed me to remove some windows specific code)
Added variable _dbug_on_ to speed up execution when DBUG is not going to be used
Removed initialization of variables if not needed
include/my_dbug.h:
Use my_bool for some functions that was defined as BOOLEAN in dbug.c code
Added DBUGGER_ON/DEBUGGER_OFF to speed up execution when DBUG is not used
include/my_global.h:
Define my_bool early
Increase MY_HOW_OFTEN_TO_WRITE as computers are now faster than 10 years ago
mysql-test/mysql-test-run.pl:
Added debug-on=0 to speed up tests
mysql-test/r/maria-recovery.result:
Added new test by Guilhem to test if UNDO_ROW_DELETE preserves rowid
mysql-test/r/maria.result:
Added testing of page checksums
mysql-test/t/crash_commit_before-master.opt:
Added --debug-on as test require DBUG to work
mysql-test/t/maria-recovery-bitmap-master.opt:
Added --debug-on as test require DBUG to work
mysql-test/t/maria-recovery-master.opt:
Added --debug-on as test require DBUG to work
mysql-test/t/maria-recovery.test:
Added new test by Guilhem to test if UNDO_ROW_DELETE preserves rowid
mysql-test/t/maria.test:
Added testing of page checksums
sql/mysqld.cc:
Added --debug-on option (to be able to turn of DBUG with --debug-on=0)
Indentation fixes
Removed end spaces
sql/sql_show.cc:
Allow update_create_info() to inform MySQL if PACK_KEYS, NO_PACK_KEYS, CHECKSUM, PAGE_CHECKSUM or DELAY_KEY_WRITE is used
storage/maria/Makefile.am:
Added ma_test_big.sh
storage/maria/ha_maria.cc:
Store in create_info if page checksums are used (For SHOW CREATE TABLE)
storage/maria/ma_bitmap.c:
Added _ma_bitmap_wait_or_flush() to cause reader of bitmap pages to wait with reading until bitmap is flushed.
Use TAIL_PAGE_COUNT_MARKER for tail pages
Set 'sub_blocks' for and only for the head page or for the first extent of a blob. This is needed for store_extent_info() to be able to set START_EXTENT_BIT's
Don't allocate more than 0x3ffff pages in one extent (We need bit 0x4000 as a START_EXTENT_BIT)
Increase the calculated 'head_length' with the number of bytes used for extents.
Update row->space_on_head_page also in _ma_bitmap_find_new_place()
Make _ma_bitmap_get_page_bits() global. (Needed for UNDO handling)
Changed _ma_bitmap_flushable() to take MARIA_HA instead of MARIA_SHARE.
This was needed to be able to mark the handler if we had a 'non_flushable' call pending or not.
Don't use 'non_flushable' marking of bitmap pages for not transactional tables.
Added BLOCKUSED_USE_ORG_BITMAP handling also for tail pages.
Added more DBUG_ASSERT() to find possible errors in other code
Some code simplications by adding new local variables
storage/maria/ma_blockrec.c:
UNDO of rows now puts back all part of the row on their original pages and positions.
Changed UNDO of DELETE and UNDO of UPDATE to contain information about the original length of data on head block and also extent information
This changes a lot of logic as now an insert of a row on a page may happen to any position (and not just to the first or next free)
Use PAGE_COUNT to mark if an extent is the start of of a blob. (Needed for extent_to_bitmap_blocks())
Added check_directory() for checking that directroy entries are correct.
Added checking of row checksums when reading rows (with EXTRA_DEBUG)
Added make_space_for_directory() and extend_directory() for doing expansion of directory
Added get_rowpos_in_head_or_tail_page() to be able to store head/tail on original position in UNDO
Added extent_to_bitmap_blocks() to be able to generate original bitmap blocks from UNDO entry
Added _ma_update_at_original_place() for UNDO of DELETES
Added row->min_length to hold minmum required space needed on head page
Changed find_free_position() to use make_space_for_directory()
Changed make_empty_page() to allow optional creation of directory entry
Changed delete_head_or_tail() and _ma_apply_undo_row_isnert() to not copy pagecache block (speed optimization)
Changed _ma_apply_redo_insert_row_head_or_tail() to be able to insert new row at any position on 'new' page
Changed _ma_apply_undo_row_delete() and _ma_apply_undo_row_update() to put row in it's original position
Ensure allocation of tail blocks are of at least MIN_TAIL_SIZE.
Ensure we store pages in pinned pages even if read failed. (If not we will have pages pinned forever in page cache)
Write original extent information in UNDO entry, not compacted ones (we need position to tails!)
When setting BLOCKUSED_USED, don't clear other bits (we have to preserve BLOCKUSED_USE_ORG_BITMAP)
Fixed som bugs in directory handling
Fixed bug where we wrote wrong lsn to blob pages
Added separate blob_buffer for fixing bug when updating row that had char/varchar that spanned several pages and also had blobs
Ensure we call _ma_bitmap_flushable() also in case of errors
When doing an update, first delete old entries, then search in bitmap for where to put new information
Info->s -> share
Rowid -> rowid
More DBUG_ASSERT()
storage/maria/ma_blockrec.h:
Added START_EXTENT_BIT and TAIL_PAGE_COUNT_MARKER
Added _ma_bitmap_wait_or_flush() and _ma_bitmap_get_page_bits()
storage/maria/ma_check.c:
Don't write extra empty line if there is no deleted blocks
Ignore START_EXTENT_BIT's in page count
Call _ma_fast_unlock_key_del() to free key_del link
storage/maria/ma_close.c:
Ensure that used_key_del is 0. (If not, someone forgot to call _ma_unlock_key_del())
storage/maria/ma_create.c:
Changed constant to macro
storage/maria/ma_delete.c:
For deleted keys, log also position to row
storage/maria/ma_extra.c:
Release blob buffer at maria_reset() if bigger than MARIA_SMALL_BLOB_BUFFER
storage/maria/ma_key_recover.c:
Added bzero() of LSN that confused paged cache in case of uninitialized block
Mark file crashed if applying of index changes fails
Added calls to _ma_fast_unlock_key_del() for protection of shared key_del link.
storage/maria/ma_locking.c:
Added usage of MARIA_FILE_OPEN_COUNT_OFFSET
Added _ma_mark_file_crashed()
storage/maria/ma_loghandler.c:
Fixed bug where we logged uninitialized memory
storage/maria/ma_open.c:
Moved state->changed to be at start of state info on disk to allow one to easly mark files as crashed
storage/maria/ma_page.c:
Disable 'dummy' checksumming of pages as this gave false warnings.
(Need to investigate if this is ever needed)
storage/maria/ma_pagecache.c:
Fixed wrong shift that caused Maria to crash on files > 4G
storage/maria/ma_recovery.c:
In case of errors, start writing on new line if we where in %## %## printing mode (Made errors more readable)
Changed global variable name from warnings -> recovery_warnings
Use MARIA_FILE_CREATE_RENAME_LSN_OFFSET instead of constant
Removed special handling of row position for deleted keys. Keys now always includes row positions
_ma_apply_undo_row_delete() now gets page and row position
Added check that we don't loop forever when handling undo's (in case of bug in undo chain)
Print name of failed REDO/UNDO
storage/maria/ma_recovery.h:
Removed old comment
storage/maria/ma_static.c:
Chaned version number of Maria files to not accidently use old ones (becasue of change of ordering of status variables)
storage/maria/ma_test2.c:
Added option -u to specify number of rows to update
Changed old option -u to be -A, as for ma_test1
Fixed bug in update of rows with blobs (before blobs was always reset to empty on update)
First created blob is now of max blob length to ensure we have at least one big blob in the table
storage/maria/ma_test_all.sh:
More tests
storage/maria/ma_test_recovery.expected:
Updated results
storage/maria/ma_test_recovery:
Changed tests to use bigger blobs (not just 1K)
Added new tests that tests recovery of update with blobs
Removed comparision of .MAD file as it's not guranteed that recovery from scratch gives identical data file as original update
(compact_page() may be called at different times during normal execution and during REDO)
storage/maria/ma_update.c:
Simplify code (changed * to if)
storage/maria/maria_chk.c:
Make output more readable
storage/maria/maria_def.h:
Changed 'changed' to int to prepare for more bits
Added 2 more bytes to status information
Added 'st_mara_row->min_length' for storing min length needed on head page
Added 'st_mara_handler->blob_buff & blob_buff_size' for storing blobs
Moved all tunning parameters into one block
Added MARIA_SMALL_BLOB_BUFFER
Added _ma_mark_file_crashed()
storage/myisam/mi_test2.c:
Fixed bug in update of rows with blobs (before blobs was always reset to empty on update)
storage/maria/ma_test_big.sh:
Testing of insert, update, delete, recovery and undo of rows with blobs
Thanks to the random-ness of ma_test2 this is likely to find most bugs in the row handling
2007-12-30 21:40:03 +01:00
|
|
|
row->space_on_head_page= row_length;
|
2007-01-18 20:38:14 +01:00
|
|
|
|
|
|
|
if (write_rest_of_head(info, position, rest_length))
|
|
|
|
goto abort;
|
|
|
|
|
|
|
|
end:
|
|
|
|
blocks->block= dynamic_element(&info->bitmap_blocks, position,
|
|
|
|
MARIA_BITMAP_BLOCK*);
|
|
|
|
blocks->block->sub_blocks= ELEMENTS_RESERVED_FOR_MAIN_PART - position;
|
|
|
|
/* First block's page_count is for all blocks */
|
|
|
|
blocks->count= info->bitmap_blocks.elements - position;
|
|
|
|
res= 0;
|
|
|
|
|
|
|
|
abort:
|
2011-07-13 21:10:18 +02:00
|
|
|
mysql_mutex_unlock(&share->bitmap.bitmap_lock);
|
2007-01-18 20:38:14 +01:00
|
|
|
DBUG_RETURN(res);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/****************************************************************************
|
|
|
|
Clear and reset bits
|
|
|
|
****************************************************************************/
|
|
|
|
|
2007-04-19 12:18:56 +02:00
|
|
|
/*
|
|
|
|
Set fill pattern for a page
|
|
|
|
|
|
|
|
set_page_bits()
|
|
|
|
info Maria handler
|
|
|
|
bitmap Bitmap handler
|
|
|
|
page Adress to page
|
|
|
|
fill_pattern Pattern (not size) for page
|
|
|
|
|
|
|
|
NOTES
|
|
|
|
Page may not be part of active bitmap
|
|
|
|
|
|
|
|
RETURN
|
|
|
|
0 ok
|
|
|
|
1 error
|
|
|
|
*/
|
|
|
|
|
2007-01-18 20:38:14 +01:00
|
|
|
static my_bool set_page_bits(MARIA_HA *info, MARIA_FILE_BITMAP *bitmap,
|
2008-01-10 20:21:36 +01:00
|
|
|
pgcache_page_no_t page, uint fill_pattern)
|
2007-01-18 20:38:14 +01:00
|
|
|
{
|
2008-01-10 20:21:36 +01:00
|
|
|
pgcache_page_no_t bitmap_page;
|
2007-01-18 20:38:14 +01:00
|
|
|
uint offset_page, offset, tmp, org_tmp;
|
|
|
|
uchar *data;
|
|
|
|
DBUG_ENTER("set_page_bits");
|
2010-09-07 18:58:39 +02:00
|
|
|
DBUG_ASSERT(fill_pattern <= 7);
|
2007-01-18 20:38:14 +01:00
|
|
|
|
2007-04-20 14:16:43 +02:00
|
|
|
bitmap_page= page - page % bitmap->pages_covered;
|
2007-01-18 20:38:14 +01:00
|
|
|
if (bitmap_page != bitmap->page &&
|
|
|
|
_ma_change_bitmap_page(info, bitmap, bitmap_page))
|
|
|
|
DBUG_RETURN(1);
|
|
|
|
|
|
|
|
/* Find page number from start of bitmap */
|
2008-01-10 20:21:36 +01:00
|
|
|
offset_page= (uint) (page - bitmap->page - 1);
|
2007-01-18 20:38:14 +01:00
|
|
|
/*
|
|
|
|
Mark place used by reading/writing 2 bytes at a time to handle
|
|
|
|
bitmaps in overlapping bytes
|
|
|
|
*/
|
|
|
|
offset_page*= 3;
|
|
|
|
offset= offset_page & 7;
|
|
|
|
data= bitmap->map + offset_page / 8;
|
|
|
|
org_tmp= tmp= uint2korr(data);
|
|
|
|
tmp= (tmp & ~(7 << offset)) | (fill_pattern << offset);
|
|
|
|
if (tmp == org_tmp)
|
|
|
|
DBUG_RETURN(0); /* No changes */
|
|
|
|
int2store(data, tmp);
|
|
|
|
|
|
|
|
bitmap->changed= 1;
|
2007-10-19 23:24:22 +02:00
|
|
|
DBUG_EXECUTE("bitmap", _ma_print_bitmap_changes(bitmap););
|
WL#3072 - Maria recovery
Unit test for recovery: runs ma_test1 and ma_test2 (both only with
INSERTs and DELETEs; UPDATEs disabled as not handled by recovery)
then moves the tables elswhere; recreates tables from the log, and
compares and fails if there is a difference. Passes now.
Most of maria_read_log.c moved to ma_recovery.c, as it will be re-used
for recovery-from-ha_maria.
Bugfixes of applying of REDO_INSERT, REDO_PURGE_ROW.
Applying of REDO_PURGE_BLOCKS, REDO_DELETE_ALL, REDO_DROP_TABLE,
UNDO_ROW_INSERT (in REDO phase only, i.e. just doing records++),
UNDO_ROW_DELETE, UNDO_ROW_PURGE.
Code cleanups.
Monty: please look for "QQ". Sanja: please look for "Sanja".
Future tasks: recovery of the bitmap (easy), recovery of the state
(make it idempotent), more REDOs (Monty to work on
REDO_UPDATE?), UNDO phase...
Pushing this cset as it looks safe, contains test and bugfixes which
will help Monty implement applying of REDO_UPDATE.
sql/handler.cc:
typo
storage/maria/Makefile.am:
Adding ma_test_recovery (which ma_test_all invokes, and which can
also be run alone). Most of maria_read_log.c moved to ma_recovery.c
storage/maria/ha_maria.cc:
comments
storage/maria/ma_bitmap.c:
fixing comments. 2 -> sizeof(maria_bitmap_marker).
Bitmap-related part of _ma_initialize_datafile() moves in bitmap module.
Now putting the "bm" signature when creating the first bitmap page
(it used to happen only at next open, but that
caused an annoying difference when testing Recovery if the original
run didn't open the table, and it looks more
logical like this: it goes to disk only with its signature correct);
see the "QQ" comment towards the _ma_initialize_data_file() call
in ma_create.c for more).
When reading a bitmap page, verify its signature (happens when normally
using the table or when CHECKing it; not when REPAIRing it).
storage/maria/ma_blockrec.c:
* no need to sync the data file if table is not transactional
* Comments, code cleanup (log-related data moved to log-related code
block, int5store->page_store).
* Store the table's short id into LOGREC_UNDO_ROW_PURGE, like we
do for other records (though this record will soon be replaced
with a CLR).
* If "page" is 1 it means the page which extends from byte
page*block_size+1 to (page+1)*block_size (byte number 1 being
the first byte of the file). The last byte of the file is
data_file_length (same convention).
A new page needs to be created if the last byte of the page is
beyond the last byte of the file, i.e.
(page+1)*block_size+1 > data_file_length, so we correct the test
(bug found when testing log applying for ma_test1 -M -T --skip-update).
* update the page's LSN when removing a row from it during
execution of a REDO_PURGE_ROW record (bug found when testing log
applying for ma_test1 -M -T --skip-update).
* applying of REDO_PURGE_BLOCKs (limited to a one-page range for now).
storage/maria/ma_blockrec.h:
new functions. maria_bitmap_marker does not need to be exported.
storage/maria/ma_close.c:
we can always flush the table's state when closing the last instance
of the table. And it is needed for maria_read_log (as it does
not use maria_lock_database()).
storage/maria/ma_control_file.c:
when in Recovery, some assertions should not be used.
storage/maria/ma_control_file.h:
double-inclusion safe
storage/maria/ma_create.c:
during recovery, don't log records. Comments.
Moving the creation of the first bitmap page to ma_bitmap.c
storage/maria/ma_delete_table.c:
during recovery, don't log records. Log the end-zero of the dropped
table's name, so that recovery can use the string in place without
extending it to fit an end zero.
storage/maria/ma_loghandler.c:
* inwrite_rec_hook also needs access to the MARIA_SHARE, like
prewrite_rec_hook. This will be needed to update
share->records_diff (in the upcoming patch "recovery of the state").
* LOG_DESC::record_ends_group changed to an enum.
* LOG_DESC for LOGREC_REDO_PURGE_BLOCKS and LOGREC_UNDO_ROW_PURGE
corrected
* Sanja please see the @todo LOG BUG
* avoiding DBUG_RETURN(func()) as it gives confusing debug traces.
storage/maria/ma_loghandler.h:
- log write hooks called while the log's lock is held (inwrite_rec_hook)
now need the MARIA_SHARE, like prewrite_rec_hook already had
- instead of a bool saying if this record's type ends groups or not,
we refine: it may not end a group, it may end a group, or it may
be a group in itself. Imagine that we had a physical write failure
to a table before we log the UNDO, we still end up in
external_lock(F_UNLCK) and then we log a COMMIT: we don't want
to consider this COMMIT as ending the group of REDOs (don't want
to execute those REDOs during Recovery), that's why we say "COMMIT
is a group in itself, it aborts any previous group". This also
gives one more sanity check in maria_read_log.
storage/maria/ma_recovery.c:
New Recovery code, replacing the old pseudocode.
Most of maria_read_log moved here.
Call-able from ha_maria, but not enabled yet.
Compared to the previous version of maria_read_log, some bugs have
been fixed, debugging output can go to stdout or a disk file (for now
it's useful for me, later it can be changed), execution of
REDO_DROP_TABLE, REDO_DELETE_ALL, REDO_PURGE_BLOCKS has been added. Duplicate code
has been factored into functions. We abort an unfinished group
of records if we see a record which is a group in itself (like COMMIT).
No need for maria_panic() after a bug (which caused tables to not
be closed) was fixed; if there is yet another bug I prefer to see it.
When opening a table for Recovery, set data_file_length
and key_file_length to their real physical value (these are the
easiest state members to restore :). Warn us if the last page
was truncated (but Recovery handles it).
MARIA_SHARE::state::state::records is now partly recovered (not
idempotent, but works if recreating tables from scracth).
When applying a REDO to a page, stamp it with the UNDO's LSN
(current_group_end_lsn), not with the REDO's LSN; it makes
the table more identical to the original table (easier to compare
the two tables in the end).
Big thing missing: some types of REDOs are not handled,
and the UNDO phase does not exist (missing functions to execute UNDOs
to actually rollback). So for now tests are only inserting/deleting
a few 100 rows, closing the table and seeing if the log is applied ok;
it works. UPDATE not handled.
storage/maria/ma_recovery.h:
new functions: ma_recover() for recovery from inside ha_maria;
_ma_apply_log() for maria_read_log (ma_recover() calls _ma_apply_log()).
Btw, we need to not use the word "recover" for REPAIR/maria_chk anymore.
storage/maria/ma_rename.c:
don't write log records during recovery
storage/maria/ma_test2.c:
- fail if maria_info() or other subtests find some wrong information
- new option -g to skip updates.
- init the translog before creating the table, so that log applying
can work.
- in "#if 0" you'll see some fixed bugs (will be removed).
storage/maria/ma_test_all.sh:
cleanup files. Test log applying.
storage/maria/maria_read_log.c:
most of the logic moves to ma_recovery.c to be shared between
maria_read_log and recovery-from-inside-mysqld.
See ma_recovery.c for additional changes made to the moved code.
storage/maria/ma_test_recovery:
unit test for Recovery. Tests insert and delete,
REDO_UPDATE not yet coded.
Script is called from ma_test_all. Can run standalone.
2007-07-26 11:56:21 +02:00
|
|
|
if (fill_pattern != 3 && fill_pattern != 7)
|
|
|
|
set_if_smaller(info->s->state.first_bitmap_with_space, bitmap_page);
|
|
|
|
/*
|
|
|
|
Note that if the condition above is false (page is full), and all pages of
|
|
|
|
this bitmap are now full, and that bitmap page was
|
|
|
|
first_bitmap_with_space, we don't modify first_bitmap_with_space, indeed
|
|
|
|
its value still tells us where to start our search for a bitmap with space
|
|
|
|
(which is for sure after this full one).
|
|
|
|
That does mean that first_bitmap_with_space is only a lower bound.
|
|
|
|
*/
|
2007-01-18 20:38:14 +01:00
|
|
|
DBUG_RETURN(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
Get bitmap pattern for a given page
|
|
|
|
|
|
|
|
SYNOPSIS
|
2011-06-24 11:08:45 +02:00
|
|
|
bitmap_get_page_bits()
|
2007-04-19 12:18:56 +02:00
|
|
|
info Maria handler
|
|
|
|
bitmap Bitmap handler
|
|
|
|
page Page number
|
2007-01-18 20:38:14 +01:00
|
|
|
|
|
|
|
RETURN
|
|
|
|
0-7 Bitmap pattern
|
|
|
|
~0 Error (couldn't read page)
|
|
|
|
*/
|
|
|
|
|
2011-06-24 11:08:45 +02:00
|
|
|
static uint bitmap_get_page_bits(MARIA_HA *info, MARIA_FILE_BITMAP *bitmap,
|
|
|
|
pgcache_page_no_t page)
|
2007-01-18 20:38:14 +01:00
|
|
|
{
|
2008-01-10 20:21:36 +01:00
|
|
|
pgcache_page_no_t bitmap_page;
|
2007-01-18 20:38:14 +01:00
|
|
|
uint offset_page, offset, tmp;
|
|
|
|
uchar *data;
|
UNDO of rows now puts back all part of the row on their original pages and positions
Added variable _dbug_on_ to speed up execution when DBUG is not going to be used
Added --debug-on option to mysqld (to be able to turn of DBUG with --debug-on=0)
Fixed some bugs with 'non_flushable' marking of bitmap pages
Don't use 'non_flushable' marking of bitmap pages for not transactional tables
SHOW CREATE TABLE now shows if table was created with page checksums
Fixed a lot of bugs with BLOB handling in case of update/REDO and UNDO
More tests (especially for blobs) and DBUG_ASSERTS()
More readable output from maria_read_log and maria_chk
Fixed wrong shift that caused Maria to crash on files > 4G
Mark tables as crashed of REDO fails
dbug/dbug.c:
Changed to use my_bool (allowed me to remove some windows specific code)
Added variable _dbug_on_ to speed up execution when DBUG is not going to be used
Removed initialization of variables if not needed
include/my_dbug.h:
Use my_bool for some functions that was defined as BOOLEAN in dbug.c code
Added DBUGGER_ON/DEBUGGER_OFF to speed up execution when DBUG is not used
include/my_global.h:
Define my_bool early
Increase MY_HOW_OFTEN_TO_WRITE as computers are now faster than 10 years ago
mysql-test/mysql-test-run.pl:
Added debug-on=0 to speed up tests
mysql-test/r/maria-recovery.result:
Added new test by Guilhem to test if UNDO_ROW_DELETE preserves rowid
mysql-test/r/maria.result:
Added testing of page checksums
mysql-test/t/crash_commit_before-master.opt:
Added --debug-on as test require DBUG to work
mysql-test/t/maria-recovery-bitmap-master.opt:
Added --debug-on as test require DBUG to work
mysql-test/t/maria-recovery-master.opt:
Added --debug-on as test require DBUG to work
mysql-test/t/maria-recovery.test:
Added new test by Guilhem to test if UNDO_ROW_DELETE preserves rowid
mysql-test/t/maria.test:
Added testing of page checksums
sql/mysqld.cc:
Added --debug-on option (to be able to turn of DBUG with --debug-on=0)
Indentation fixes
Removed end spaces
sql/sql_show.cc:
Allow update_create_info() to inform MySQL if PACK_KEYS, NO_PACK_KEYS, CHECKSUM, PAGE_CHECKSUM or DELAY_KEY_WRITE is used
storage/maria/Makefile.am:
Added ma_test_big.sh
storage/maria/ha_maria.cc:
Store in create_info if page checksums are used (For SHOW CREATE TABLE)
storage/maria/ma_bitmap.c:
Added _ma_bitmap_wait_or_flush() to cause reader of bitmap pages to wait with reading until bitmap is flushed.
Use TAIL_PAGE_COUNT_MARKER for tail pages
Set 'sub_blocks' for and only for the head page or for the first extent of a blob. This is needed for store_extent_info() to be able to set START_EXTENT_BIT's
Don't allocate more than 0x3ffff pages in one extent (We need bit 0x4000 as a START_EXTENT_BIT)
Increase the calculated 'head_length' with the number of bytes used for extents.
Update row->space_on_head_page also in _ma_bitmap_find_new_place()
Make _ma_bitmap_get_page_bits() global. (Needed for UNDO handling)
Changed _ma_bitmap_flushable() to take MARIA_HA instead of MARIA_SHARE.
This was needed to be able to mark the handler if we had a 'non_flushable' call pending or not.
Don't use 'non_flushable' marking of bitmap pages for not transactional tables.
Added BLOCKUSED_USE_ORG_BITMAP handling also for tail pages.
Added more DBUG_ASSERT() to find possible errors in other code
Some code simplications by adding new local variables
storage/maria/ma_blockrec.c:
UNDO of rows now puts back all part of the row on their original pages and positions.
Changed UNDO of DELETE and UNDO of UPDATE to contain information about the original length of data on head block and also extent information
This changes a lot of logic as now an insert of a row on a page may happen to any position (and not just to the first or next free)
Use PAGE_COUNT to mark if an extent is the start of of a blob. (Needed for extent_to_bitmap_blocks())
Added check_directory() for checking that directroy entries are correct.
Added checking of row checksums when reading rows (with EXTRA_DEBUG)
Added make_space_for_directory() and extend_directory() for doing expansion of directory
Added get_rowpos_in_head_or_tail_page() to be able to store head/tail on original position in UNDO
Added extent_to_bitmap_blocks() to be able to generate original bitmap blocks from UNDO entry
Added _ma_update_at_original_place() for UNDO of DELETES
Added row->min_length to hold minmum required space needed on head page
Changed find_free_position() to use make_space_for_directory()
Changed make_empty_page() to allow optional creation of directory entry
Changed delete_head_or_tail() and _ma_apply_undo_row_isnert() to not copy pagecache block (speed optimization)
Changed _ma_apply_redo_insert_row_head_or_tail() to be able to insert new row at any position on 'new' page
Changed _ma_apply_undo_row_delete() and _ma_apply_undo_row_update() to put row in it's original position
Ensure allocation of tail blocks are of at least MIN_TAIL_SIZE.
Ensure we store pages in pinned pages even if read failed. (If not we will have pages pinned forever in page cache)
Write original extent information in UNDO entry, not compacted ones (we need position to tails!)
When setting BLOCKUSED_USED, don't clear other bits (we have to preserve BLOCKUSED_USE_ORG_BITMAP)
Fixed som bugs in directory handling
Fixed bug where we wrote wrong lsn to blob pages
Added separate blob_buffer for fixing bug when updating row that had char/varchar that spanned several pages and also had blobs
Ensure we call _ma_bitmap_flushable() also in case of errors
When doing an update, first delete old entries, then search in bitmap for where to put new information
Info->s -> share
Rowid -> rowid
More DBUG_ASSERT()
storage/maria/ma_blockrec.h:
Added START_EXTENT_BIT and TAIL_PAGE_COUNT_MARKER
Added _ma_bitmap_wait_or_flush() and _ma_bitmap_get_page_bits()
storage/maria/ma_check.c:
Don't write extra empty line if there is no deleted blocks
Ignore START_EXTENT_BIT's in page count
Call _ma_fast_unlock_key_del() to free key_del link
storage/maria/ma_close.c:
Ensure that used_key_del is 0. (If not, someone forgot to call _ma_unlock_key_del())
storage/maria/ma_create.c:
Changed constant to macro
storage/maria/ma_delete.c:
For deleted keys, log also position to row
storage/maria/ma_extra.c:
Release blob buffer at maria_reset() if bigger than MARIA_SMALL_BLOB_BUFFER
storage/maria/ma_key_recover.c:
Added bzero() of LSN that confused paged cache in case of uninitialized block
Mark file crashed if applying of index changes fails
Added calls to _ma_fast_unlock_key_del() for protection of shared key_del link.
storage/maria/ma_locking.c:
Added usage of MARIA_FILE_OPEN_COUNT_OFFSET
Added _ma_mark_file_crashed()
storage/maria/ma_loghandler.c:
Fixed bug where we logged uninitialized memory
storage/maria/ma_open.c:
Moved state->changed to be at start of state info on disk to allow one to easly mark files as crashed
storage/maria/ma_page.c:
Disable 'dummy' checksumming of pages as this gave false warnings.
(Need to investigate if this is ever needed)
storage/maria/ma_pagecache.c:
Fixed wrong shift that caused Maria to crash on files > 4G
storage/maria/ma_recovery.c:
In case of errors, start writing on new line if we where in %## %## printing mode (Made errors more readable)
Changed global variable name from warnings -> recovery_warnings
Use MARIA_FILE_CREATE_RENAME_LSN_OFFSET instead of constant
Removed special handling of row position for deleted keys. Keys now always includes row positions
_ma_apply_undo_row_delete() now gets page and row position
Added check that we don't loop forever when handling undo's (in case of bug in undo chain)
Print name of failed REDO/UNDO
storage/maria/ma_recovery.h:
Removed old comment
storage/maria/ma_static.c:
Chaned version number of Maria files to not accidently use old ones (becasue of change of ordering of status variables)
storage/maria/ma_test2.c:
Added option -u to specify number of rows to update
Changed old option -u to be -A, as for ma_test1
Fixed bug in update of rows with blobs (before blobs was always reset to empty on update)
First created blob is now of max blob length to ensure we have at least one big blob in the table
storage/maria/ma_test_all.sh:
More tests
storage/maria/ma_test_recovery.expected:
Updated results
storage/maria/ma_test_recovery:
Changed tests to use bigger blobs (not just 1K)
Added new tests that tests recovery of update with blobs
Removed comparision of .MAD file as it's not guranteed that recovery from scratch gives identical data file as original update
(compact_page() may be called at different times during normal execution and during REDO)
storage/maria/ma_update.c:
Simplify code (changed * to if)
storage/maria/maria_chk.c:
Make output more readable
storage/maria/maria_def.h:
Changed 'changed' to int to prepare for more bits
Added 2 more bytes to status information
Added 'st_mara_row->min_length' for storing min length needed on head page
Added 'st_mara_handler->blob_buff & blob_buff_size' for storing blobs
Moved all tunning parameters into one block
Added MARIA_SMALL_BLOB_BUFFER
Added _ma_mark_file_crashed()
storage/myisam/mi_test2.c:
Fixed bug in update of rows with blobs (before blobs was always reset to empty on update)
storage/maria/ma_test_big.sh:
Testing of insert, update, delete, recovery and undo of rows with blobs
Thanks to the random-ness of ma_test2 this is likely to find most bugs in the row handling
2007-12-30 21:40:03 +01:00
|
|
|
DBUG_ENTER("_ma_bitmap_get_page_bits");
|
2007-01-18 20:38:14 +01:00
|
|
|
|
2007-04-20 14:16:43 +02:00
|
|
|
bitmap_page= page - page % bitmap->pages_covered;
|
2007-01-18 20:38:14 +01:00
|
|
|
if (bitmap_page != bitmap->page &&
|
|
|
|
_ma_change_bitmap_page(info, bitmap, bitmap_page))
|
|
|
|
DBUG_RETURN(~ (uint) 0);
|
|
|
|
|
|
|
|
/* Find page number from start of bitmap */
|
2008-01-10 20:21:36 +01:00
|
|
|
offset_page= (uint) (page - bitmap->page - 1);
|
2007-01-18 20:38:14 +01:00
|
|
|
/*
|
|
|
|
Mark place used by reading/writing 2 bytes at a time to handle
|
|
|
|
bitmaps in overlapping bytes
|
|
|
|
*/
|
|
|
|
offset_page*= 3;
|
|
|
|
offset= offset_page & 7;
|
|
|
|
data= bitmap->map + offset_page / 8;
|
|
|
|
tmp= uint2korr(data);
|
|
|
|
DBUG_RETURN((tmp >> offset) & 7);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2011-06-24 11:08:45 +02:00
|
|
|
/* As above, but take a lock while getting the data */
|
|
|
|
|
|
|
|
uint _ma_bitmap_get_page_bits(MARIA_HA *info, MARIA_FILE_BITMAP *bitmap,
|
|
|
|
pgcache_page_no_t page)
|
|
|
|
{
|
|
|
|
uint tmp;
|
2011-10-19 21:45:18 +02:00
|
|
|
mysql_mutex_lock(&bitmap->bitmap_lock);
|
2011-06-24 11:08:45 +02:00
|
|
|
tmp= bitmap_get_page_bits(info, bitmap, page);
|
2011-10-19 21:45:18 +02:00
|
|
|
mysql_mutex_unlock(&bitmap->bitmap_lock);
|
2011-06-24 11:08:45 +02:00
|
|
|
return tmp;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2007-01-18 20:38:14 +01:00
|
|
|
/*
|
|
|
|
Mark all pages in a region as free
|
|
|
|
|
|
|
|
SYNOPSIS
|
WL#3072 Maria recovery:
fix for bug: if a crash happened right after writing a REDO like this:
REDO - UNDO - REDO*, then recovery would ignore the last REDO* (ok),
rollback: REDO - UNDO - REDO* - REDO - CLR, and a next recovery would
thus execute REDO* instead of skipping it again. Recovery now logs
LOGREC_INCOMPLETE_GROUP when it meets REDO* for the first time,
to draw a boundary and ensure it is always skipped. Tested by hand.
Note: ma_test_all fails "maria_chk: error: Key 1 - Found too many records"
not due to this patch (failed before).
BitKeeper/triggers/post-commit:
no truncation of the commit mail, or how to review patches?
mysql-test/include/maria_verify_recovery.inc:
let caller choose the statement used to crash (sometimes we
want the crash to happen at special places)
mysql-test/t/maria-recovery.test:
user of maria_verify_recovery.inc now specifies statement which the
script should use for crashing.
storage/maria/ma_bitmap.c:
it's easier to search for all places using functions from the bitmap
module (like in ma_blockrec.c) if those exported functions all start
with "_ma_bitmap": renaming some of them.
Assertion that when we read a bitmap page, overwriting bitmap->map,
we are not losing information (i.e. bitmap->changed is false).
storage/maria/ma_blockrec.c:
update to new names. Adding code (disabled, protected by a #ifdef)
that I use to test certain crash scenarios (more to come).
storage/maria/ma_blockrec.h:
update to new names
storage/maria/ma_checkpoint.c:
update to new names
storage/maria/ma_extra.c:
update to new names
storage/maria/ma_loghandler.c:
new LOGREC_INCOMPLETE_GROUP
storage/maria/ma_loghandler.h:
new LOGREC_INCOMPLETE_GROUP
storage/maria/ma_recovery.c:
When at the end of the REDO phase we have identified some transactions
with incomplete REDO groups (REDOs without an UNDO or CLR_END),
for each of them we log LOGREC_INCOMPLETE_GROUP. This way, the
upcoming UNDO phase can write more records for such transaction,
a future recovery won't pair the incomplete group with the
CLR_END (as there is LOGREC_INCOMPLETE_GROUP to draw a boundary).
2007-12-10 23:26:53 +01:00
|
|
|
_ma_bitmap_reset_full_page_bits()
|
2007-01-18 20:38:14 +01:00
|
|
|
info Maria handler
|
|
|
|
bitmap Bitmap handler
|
|
|
|
page Start page
|
|
|
|
page_count Number of pages
|
|
|
|
|
|
|
|
NOTES
|
|
|
|
We assume that all pages in region is covered by same bitmap
|
|
|
|
One must have a lock on info->s->bitmap.bitmap_lock
|
|
|
|
|
|
|
|
RETURN
|
|
|
|
0 ok
|
|
|
|
1 Error (when reading bitmap)
|
|
|
|
*/
|
|
|
|
|
WL#3072 Maria recovery:
fix for bug: if a crash happened right after writing a REDO like this:
REDO - UNDO - REDO*, then recovery would ignore the last REDO* (ok),
rollback: REDO - UNDO - REDO* - REDO - CLR, and a next recovery would
thus execute REDO* instead of skipping it again. Recovery now logs
LOGREC_INCOMPLETE_GROUP when it meets REDO* for the first time,
to draw a boundary and ensure it is always skipped. Tested by hand.
Note: ma_test_all fails "maria_chk: error: Key 1 - Found too many records"
not due to this patch (failed before).
BitKeeper/triggers/post-commit:
no truncation of the commit mail, or how to review patches?
mysql-test/include/maria_verify_recovery.inc:
let caller choose the statement used to crash (sometimes we
want the crash to happen at special places)
mysql-test/t/maria-recovery.test:
user of maria_verify_recovery.inc now specifies statement which the
script should use for crashing.
storage/maria/ma_bitmap.c:
it's easier to search for all places using functions from the bitmap
module (like in ma_blockrec.c) if those exported functions all start
with "_ma_bitmap": renaming some of them.
Assertion that when we read a bitmap page, overwriting bitmap->map,
we are not losing information (i.e. bitmap->changed is false).
storage/maria/ma_blockrec.c:
update to new names. Adding code (disabled, protected by a #ifdef)
that I use to test certain crash scenarios (more to come).
storage/maria/ma_blockrec.h:
update to new names
storage/maria/ma_checkpoint.c:
update to new names
storage/maria/ma_extra.c:
update to new names
storage/maria/ma_loghandler.c:
new LOGREC_INCOMPLETE_GROUP
storage/maria/ma_loghandler.h:
new LOGREC_INCOMPLETE_GROUP
storage/maria/ma_recovery.c:
When at the end of the REDO phase we have identified some transactions
with incomplete REDO groups (REDOs without an UNDO or CLR_END),
for each of them we log LOGREC_INCOMPLETE_GROUP. This way, the
upcoming UNDO phase can write more records for such transaction,
a future recovery won't pair the incomplete group with the
CLR_END (as there is LOGREC_INCOMPLETE_GROUP to draw a boundary).
2007-12-10 23:26:53 +01:00
|
|
|
my_bool _ma_bitmap_reset_full_page_bits(MARIA_HA *info,
|
|
|
|
MARIA_FILE_BITMAP *bitmap,
|
2008-01-10 20:21:36 +01:00
|
|
|
pgcache_page_no_t page,
|
|
|
|
uint page_count)
|
2007-01-18 20:38:14 +01:00
|
|
|
{
|
|
|
|
ulonglong bitmap_page;
|
|
|
|
uint offset, bit_start, bit_count, tmp;
|
|
|
|
uchar *data;
|
WL#3072 Maria recovery:
fix for bug: if a crash happened right after writing a REDO like this:
REDO - UNDO - REDO*, then recovery would ignore the last REDO* (ok),
rollback: REDO - UNDO - REDO* - REDO - CLR, and a next recovery would
thus execute REDO* instead of skipping it again. Recovery now logs
LOGREC_INCOMPLETE_GROUP when it meets REDO* for the first time,
to draw a boundary and ensure it is always skipped. Tested by hand.
Note: ma_test_all fails "maria_chk: error: Key 1 - Found too many records"
not due to this patch (failed before).
BitKeeper/triggers/post-commit:
no truncation of the commit mail, or how to review patches?
mysql-test/include/maria_verify_recovery.inc:
let caller choose the statement used to crash (sometimes we
want the crash to happen at special places)
mysql-test/t/maria-recovery.test:
user of maria_verify_recovery.inc now specifies statement which the
script should use for crashing.
storage/maria/ma_bitmap.c:
it's easier to search for all places using functions from the bitmap
module (like in ma_blockrec.c) if those exported functions all start
with "_ma_bitmap": renaming some of them.
Assertion that when we read a bitmap page, overwriting bitmap->map,
we are not losing information (i.e. bitmap->changed is false).
storage/maria/ma_blockrec.c:
update to new names. Adding code (disabled, protected by a #ifdef)
that I use to test certain crash scenarios (more to come).
storage/maria/ma_blockrec.h:
update to new names
storage/maria/ma_checkpoint.c:
update to new names
storage/maria/ma_extra.c:
update to new names
storage/maria/ma_loghandler.c:
new LOGREC_INCOMPLETE_GROUP
storage/maria/ma_loghandler.h:
new LOGREC_INCOMPLETE_GROUP
storage/maria/ma_recovery.c:
When at the end of the REDO phase we have identified some transactions
with incomplete REDO groups (REDOs without an UNDO or CLR_END),
for each of them we log LOGREC_INCOMPLETE_GROUP. This way, the
upcoming UNDO phase can write more records for such transaction,
a future recovery won't pair the incomplete group with the
CLR_END (as there is LOGREC_INCOMPLETE_GROUP to draw a boundary).
2007-12-10 23:26:53 +01:00
|
|
|
DBUG_ENTER("_ma_bitmap_reset_full_page_bits");
|
2007-01-18 20:38:14 +01:00
|
|
|
DBUG_PRINT("enter", ("page: %lu page_count: %u", (ulong) page, page_count));
|
2011-07-13 21:10:18 +02:00
|
|
|
mysql_mutex_assert_owner(&info->s->bitmap.bitmap_lock);
|
2007-10-19 23:24:22 +02:00
|
|
|
|
2007-04-20 14:16:43 +02:00
|
|
|
bitmap_page= page - page % bitmap->pages_covered;
|
UNDO of rows now puts back all part of the row on their original pages and positions
Added variable _dbug_on_ to speed up execution when DBUG is not going to be used
Added --debug-on option to mysqld (to be able to turn of DBUG with --debug-on=0)
Fixed some bugs with 'non_flushable' marking of bitmap pages
Don't use 'non_flushable' marking of bitmap pages for not transactional tables
SHOW CREATE TABLE now shows if table was created with page checksums
Fixed a lot of bugs with BLOB handling in case of update/REDO and UNDO
More tests (especially for blobs) and DBUG_ASSERTS()
More readable output from maria_read_log and maria_chk
Fixed wrong shift that caused Maria to crash on files > 4G
Mark tables as crashed of REDO fails
dbug/dbug.c:
Changed to use my_bool (allowed me to remove some windows specific code)
Added variable _dbug_on_ to speed up execution when DBUG is not going to be used
Removed initialization of variables if not needed
include/my_dbug.h:
Use my_bool for some functions that was defined as BOOLEAN in dbug.c code
Added DBUGGER_ON/DEBUGGER_OFF to speed up execution when DBUG is not used
include/my_global.h:
Define my_bool early
Increase MY_HOW_OFTEN_TO_WRITE as computers are now faster than 10 years ago
mysql-test/mysql-test-run.pl:
Added debug-on=0 to speed up tests
mysql-test/r/maria-recovery.result:
Added new test by Guilhem to test if UNDO_ROW_DELETE preserves rowid
mysql-test/r/maria.result:
Added testing of page checksums
mysql-test/t/crash_commit_before-master.opt:
Added --debug-on as test require DBUG to work
mysql-test/t/maria-recovery-bitmap-master.opt:
Added --debug-on as test require DBUG to work
mysql-test/t/maria-recovery-master.opt:
Added --debug-on as test require DBUG to work
mysql-test/t/maria-recovery.test:
Added new test by Guilhem to test if UNDO_ROW_DELETE preserves rowid
mysql-test/t/maria.test:
Added testing of page checksums
sql/mysqld.cc:
Added --debug-on option (to be able to turn of DBUG with --debug-on=0)
Indentation fixes
Removed end spaces
sql/sql_show.cc:
Allow update_create_info() to inform MySQL if PACK_KEYS, NO_PACK_KEYS, CHECKSUM, PAGE_CHECKSUM or DELAY_KEY_WRITE is used
storage/maria/Makefile.am:
Added ma_test_big.sh
storage/maria/ha_maria.cc:
Store in create_info if page checksums are used (For SHOW CREATE TABLE)
storage/maria/ma_bitmap.c:
Added _ma_bitmap_wait_or_flush() to cause reader of bitmap pages to wait with reading until bitmap is flushed.
Use TAIL_PAGE_COUNT_MARKER for tail pages
Set 'sub_blocks' for and only for the head page or for the first extent of a blob. This is needed for store_extent_info() to be able to set START_EXTENT_BIT's
Don't allocate more than 0x3ffff pages in one extent (We need bit 0x4000 as a START_EXTENT_BIT)
Increase the calculated 'head_length' with the number of bytes used for extents.
Update row->space_on_head_page also in _ma_bitmap_find_new_place()
Make _ma_bitmap_get_page_bits() global. (Needed for UNDO handling)
Changed _ma_bitmap_flushable() to take MARIA_HA instead of MARIA_SHARE.
This was needed to be able to mark the handler if we had a 'non_flushable' call pending or not.
Don't use 'non_flushable' marking of bitmap pages for not transactional tables.
Added BLOCKUSED_USE_ORG_BITMAP handling also for tail pages.
Added more DBUG_ASSERT() to find possible errors in other code
Some code simplications by adding new local variables
storage/maria/ma_blockrec.c:
UNDO of rows now puts back all part of the row on their original pages and positions.
Changed UNDO of DELETE and UNDO of UPDATE to contain information about the original length of data on head block and also extent information
This changes a lot of logic as now an insert of a row on a page may happen to any position (and not just to the first or next free)
Use PAGE_COUNT to mark if an extent is the start of of a blob. (Needed for extent_to_bitmap_blocks())
Added check_directory() for checking that directroy entries are correct.
Added checking of row checksums when reading rows (with EXTRA_DEBUG)
Added make_space_for_directory() and extend_directory() for doing expansion of directory
Added get_rowpos_in_head_or_tail_page() to be able to store head/tail on original position in UNDO
Added extent_to_bitmap_blocks() to be able to generate original bitmap blocks from UNDO entry
Added _ma_update_at_original_place() for UNDO of DELETES
Added row->min_length to hold minmum required space needed on head page
Changed find_free_position() to use make_space_for_directory()
Changed make_empty_page() to allow optional creation of directory entry
Changed delete_head_or_tail() and _ma_apply_undo_row_isnert() to not copy pagecache block (speed optimization)
Changed _ma_apply_redo_insert_row_head_or_tail() to be able to insert new row at any position on 'new' page
Changed _ma_apply_undo_row_delete() and _ma_apply_undo_row_update() to put row in it's original position
Ensure allocation of tail blocks are of at least MIN_TAIL_SIZE.
Ensure we store pages in pinned pages even if read failed. (If not we will have pages pinned forever in page cache)
Write original extent information in UNDO entry, not compacted ones (we need position to tails!)
When setting BLOCKUSED_USED, don't clear other bits (we have to preserve BLOCKUSED_USE_ORG_BITMAP)
Fixed som bugs in directory handling
Fixed bug where we wrote wrong lsn to blob pages
Added separate blob_buffer for fixing bug when updating row that had char/varchar that spanned several pages and also had blobs
Ensure we call _ma_bitmap_flushable() also in case of errors
When doing an update, first delete old entries, then search in bitmap for where to put new information
Info->s -> share
Rowid -> rowid
More DBUG_ASSERT()
storage/maria/ma_blockrec.h:
Added START_EXTENT_BIT and TAIL_PAGE_COUNT_MARKER
Added _ma_bitmap_wait_or_flush() and _ma_bitmap_get_page_bits()
storage/maria/ma_check.c:
Don't write extra empty line if there is no deleted blocks
Ignore START_EXTENT_BIT's in page count
Call _ma_fast_unlock_key_del() to free key_del link
storage/maria/ma_close.c:
Ensure that used_key_del is 0. (If not, someone forgot to call _ma_unlock_key_del())
storage/maria/ma_create.c:
Changed constant to macro
storage/maria/ma_delete.c:
For deleted keys, log also position to row
storage/maria/ma_extra.c:
Release blob buffer at maria_reset() if bigger than MARIA_SMALL_BLOB_BUFFER
storage/maria/ma_key_recover.c:
Added bzero() of LSN that confused paged cache in case of uninitialized block
Mark file crashed if applying of index changes fails
Added calls to _ma_fast_unlock_key_del() for protection of shared key_del link.
storage/maria/ma_locking.c:
Added usage of MARIA_FILE_OPEN_COUNT_OFFSET
Added _ma_mark_file_crashed()
storage/maria/ma_loghandler.c:
Fixed bug where we logged uninitialized memory
storage/maria/ma_open.c:
Moved state->changed to be at start of state info on disk to allow one to easly mark files as crashed
storage/maria/ma_page.c:
Disable 'dummy' checksumming of pages as this gave false warnings.
(Need to investigate if this is ever needed)
storage/maria/ma_pagecache.c:
Fixed wrong shift that caused Maria to crash on files > 4G
storage/maria/ma_recovery.c:
In case of errors, start writing on new line if we where in %## %## printing mode (Made errors more readable)
Changed global variable name from warnings -> recovery_warnings
Use MARIA_FILE_CREATE_RENAME_LSN_OFFSET instead of constant
Removed special handling of row position for deleted keys. Keys now always includes row positions
_ma_apply_undo_row_delete() now gets page and row position
Added check that we don't loop forever when handling undo's (in case of bug in undo chain)
Print name of failed REDO/UNDO
storage/maria/ma_recovery.h:
Removed old comment
storage/maria/ma_static.c:
Chaned version number of Maria files to not accidently use old ones (becasue of change of ordering of status variables)
storage/maria/ma_test2.c:
Added option -u to specify number of rows to update
Changed old option -u to be -A, as for ma_test1
Fixed bug in update of rows with blobs (before blobs was always reset to empty on update)
First created blob is now of max blob length to ensure we have at least one big blob in the table
storage/maria/ma_test_all.sh:
More tests
storage/maria/ma_test_recovery.expected:
Updated results
storage/maria/ma_test_recovery:
Changed tests to use bigger blobs (not just 1K)
Added new tests that tests recovery of update with blobs
Removed comparision of .MAD file as it's not guranteed that recovery from scratch gives identical data file as original update
(compact_page() may be called at different times during normal execution and during REDO)
storage/maria/ma_update.c:
Simplify code (changed * to if)
storage/maria/maria_chk.c:
Make output more readable
storage/maria/maria_def.h:
Changed 'changed' to int to prepare for more bits
Added 2 more bytes to status information
Added 'st_mara_row->min_length' for storing min length needed on head page
Added 'st_mara_handler->blob_buff & blob_buff_size' for storing blobs
Moved all tunning parameters into one block
Added MARIA_SMALL_BLOB_BUFFER
Added _ma_mark_file_crashed()
storage/myisam/mi_test2.c:
Fixed bug in update of rows with blobs (before blobs was always reset to empty on update)
storage/maria/ma_test_big.sh:
Testing of insert, update, delete, recovery and undo of rows with blobs
Thanks to the random-ness of ma_test2 this is likely to find most bugs in the row handling
2007-12-30 21:40:03 +01:00
|
|
|
DBUG_ASSERT(page != bitmap_page);
|
|
|
|
|
2007-01-18 20:38:14 +01:00
|
|
|
if (bitmap_page != bitmap->page &&
|
|
|
|
_ma_change_bitmap_page(info, bitmap, bitmap_page))
|
|
|
|
DBUG_RETURN(1);
|
|
|
|
|
|
|
|
/* Find page number from start of bitmap */
|
2008-01-10 20:21:36 +01:00
|
|
|
offset= (uint) (page - bitmap->page - 1);
|
2007-01-18 20:38:14 +01:00
|
|
|
|
|
|
|
/* Clear bits from 'page * 3' -> '(page + page_count) * 3' */
|
2008-01-10 20:21:36 +01:00
|
|
|
bit_start= offset * 3;
|
2007-01-18 20:38:14 +01:00
|
|
|
bit_count= page_count * 3;
|
|
|
|
|
|
|
|
data= bitmap->map + bit_start / 8;
|
|
|
|
offset= bit_start & 7;
|
|
|
|
|
|
|
|
tmp= (255 << offset); /* Bits to keep */
|
|
|
|
if (bit_count + offset < 8)
|
|
|
|
{
|
|
|
|
/* Only clear bits between 'offset' and 'offset+bit_count-1' */
|
|
|
|
tmp^= (255 << (offset + bit_count));
|
|
|
|
}
|
|
|
|
*data&= ~tmp;
|
|
|
|
|
|
|
|
if ((int) (bit_count-= (8 - offset)) > 0)
|
|
|
|
{
|
|
|
|
uint fill;
|
|
|
|
data++;
|
|
|
|
/*
|
|
|
|
-1 is here to avoid one 'if' statement and to let the following code
|
|
|
|
handle the last byte
|
|
|
|
*/
|
|
|
|
if ((fill= (bit_count - 1) / 8))
|
|
|
|
{
|
|
|
|
bzero(data, fill);
|
|
|
|
data+= fill;
|
|
|
|
}
|
|
|
|
bit_count-= fill * 8; /* Bits left to clear */
|
|
|
|
tmp= (1 << bit_count) - 1;
|
|
|
|
*data&= ~tmp;
|
|
|
|
}
|
WL#3072 - Maria recovery
Unit test for recovery: runs ma_test1 and ma_test2 (both only with
INSERTs and DELETEs; UPDATEs disabled as not handled by recovery)
then moves the tables elswhere; recreates tables from the log, and
compares and fails if there is a difference. Passes now.
Most of maria_read_log.c moved to ma_recovery.c, as it will be re-used
for recovery-from-ha_maria.
Bugfixes of applying of REDO_INSERT, REDO_PURGE_ROW.
Applying of REDO_PURGE_BLOCKS, REDO_DELETE_ALL, REDO_DROP_TABLE,
UNDO_ROW_INSERT (in REDO phase only, i.e. just doing records++),
UNDO_ROW_DELETE, UNDO_ROW_PURGE.
Code cleanups.
Monty: please look for "QQ". Sanja: please look for "Sanja".
Future tasks: recovery of the bitmap (easy), recovery of the state
(make it idempotent), more REDOs (Monty to work on
REDO_UPDATE?), UNDO phase...
Pushing this cset as it looks safe, contains test and bugfixes which
will help Monty implement applying of REDO_UPDATE.
sql/handler.cc:
typo
storage/maria/Makefile.am:
Adding ma_test_recovery (which ma_test_all invokes, and which can
also be run alone). Most of maria_read_log.c moved to ma_recovery.c
storage/maria/ha_maria.cc:
comments
storage/maria/ma_bitmap.c:
fixing comments. 2 -> sizeof(maria_bitmap_marker).
Bitmap-related part of _ma_initialize_datafile() moves in bitmap module.
Now putting the "bm" signature when creating the first bitmap page
(it used to happen only at next open, but that
caused an annoying difference when testing Recovery if the original
run didn't open the table, and it looks more
logical like this: it goes to disk only with its signature correct);
see the "QQ" comment towards the _ma_initialize_data_file() call
in ma_create.c for more).
When reading a bitmap page, verify its signature (happens when normally
using the table or when CHECKing it; not when REPAIRing it).
storage/maria/ma_blockrec.c:
* no need to sync the data file if table is not transactional
* Comments, code cleanup (log-related data moved to log-related code
block, int5store->page_store).
* Store the table's short id into LOGREC_UNDO_ROW_PURGE, like we
do for other records (though this record will soon be replaced
with a CLR).
* If "page" is 1 it means the page which extends from byte
page*block_size+1 to (page+1)*block_size (byte number 1 being
the first byte of the file). The last byte of the file is
data_file_length (same convention).
A new page needs to be created if the last byte of the page is
beyond the last byte of the file, i.e.
(page+1)*block_size+1 > data_file_length, so we correct the test
(bug found when testing log applying for ma_test1 -M -T --skip-update).
* update the page's LSN when removing a row from it during
execution of a REDO_PURGE_ROW record (bug found when testing log
applying for ma_test1 -M -T --skip-update).
* applying of REDO_PURGE_BLOCKs (limited to a one-page range for now).
storage/maria/ma_blockrec.h:
new functions. maria_bitmap_marker does not need to be exported.
storage/maria/ma_close.c:
we can always flush the table's state when closing the last instance
of the table. And it is needed for maria_read_log (as it does
not use maria_lock_database()).
storage/maria/ma_control_file.c:
when in Recovery, some assertions should not be used.
storage/maria/ma_control_file.h:
double-inclusion safe
storage/maria/ma_create.c:
during recovery, don't log records. Comments.
Moving the creation of the first bitmap page to ma_bitmap.c
storage/maria/ma_delete_table.c:
during recovery, don't log records. Log the end-zero of the dropped
table's name, so that recovery can use the string in place without
extending it to fit an end zero.
storage/maria/ma_loghandler.c:
* inwrite_rec_hook also needs access to the MARIA_SHARE, like
prewrite_rec_hook. This will be needed to update
share->records_diff (in the upcoming patch "recovery of the state").
* LOG_DESC::record_ends_group changed to an enum.
* LOG_DESC for LOGREC_REDO_PURGE_BLOCKS and LOGREC_UNDO_ROW_PURGE
corrected
* Sanja please see the @todo LOG BUG
* avoiding DBUG_RETURN(func()) as it gives confusing debug traces.
storage/maria/ma_loghandler.h:
- log write hooks called while the log's lock is held (inwrite_rec_hook)
now need the MARIA_SHARE, like prewrite_rec_hook already had
- instead of a bool saying if this record's type ends groups or not,
we refine: it may not end a group, it may end a group, or it may
be a group in itself. Imagine that we had a physical write failure
to a table before we log the UNDO, we still end up in
external_lock(F_UNLCK) and then we log a COMMIT: we don't want
to consider this COMMIT as ending the group of REDOs (don't want
to execute those REDOs during Recovery), that's why we say "COMMIT
is a group in itself, it aborts any previous group". This also
gives one more sanity check in maria_read_log.
storage/maria/ma_recovery.c:
New Recovery code, replacing the old pseudocode.
Most of maria_read_log moved here.
Call-able from ha_maria, but not enabled yet.
Compared to the previous version of maria_read_log, some bugs have
been fixed, debugging output can go to stdout or a disk file (for now
it's useful for me, later it can be changed), execution of
REDO_DROP_TABLE, REDO_DELETE_ALL, REDO_PURGE_BLOCKS has been added. Duplicate code
has been factored into functions. We abort an unfinished group
of records if we see a record which is a group in itself (like COMMIT).
No need for maria_panic() after a bug (which caused tables to not
be closed) was fixed; if there is yet another bug I prefer to see it.
When opening a table for Recovery, set data_file_length
and key_file_length to their real physical value (these are the
easiest state members to restore :). Warn us if the last page
was truncated (but Recovery handles it).
MARIA_SHARE::state::state::records is now partly recovered (not
idempotent, but works if recreating tables from scracth).
When applying a REDO to a page, stamp it with the UNDO's LSN
(current_group_end_lsn), not with the REDO's LSN; it makes
the table more identical to the original table (easier to compare
the two tables in the end).
Big thing missing: some types of REDOs are not handled,
and the UNDO phase does not exist (missing functions to execute UNDOs
to actually rollback). So for now tests are only inserting/deleting
a few 100 rows, closing the table and seeing if the log is applied ok;
it works. UPDATE not handled.
storage/maria/ma_recovery.h:
new functions: ma_recover() for recovery from inside ha_maria;
_ma_apply_log() for maria_read_log (ma_recover() calls _ma_apply_log()).
Btw, we need to not use the word "recover" for REPAIR/maria_chk anymore.
storage/maria/ma_rename.c:
don't write log records during recovery
storage/maria/ma_test2.c:
- fail if maria_info() or other subtests find some wrong information
- new option -g to skip updates.
- init the translog before creating the table, so that log applying
can work.
- in "#if 0" you'll see some fixed bugs (will be removed).
storage/maria/ma_test_all.sh:
cleanup files. Test log applying.
storage/maria/maria_read_log.c:
most of the logic moves to ma_recovery.c to be shared between
maria_read_log and recovery-from-inside-mysqld.
See ma_recovery.c for additional changes made to the moved code.
storage/maria/ma_test_recovery:
unit test for Recovery. Tests insert and delete,
REDO_UPDATE not yet coded.
Script is called from ma_test_all. Can run standalone.
2007-07-26 11:56:21 +02:00
|
|
|
set_if_smaller(info->s->state.first_bitmap_with_space, bitmap_page);
|
2007-01-18 20:38:14 +01:00
|
|
|
bitmap->changed= 1;
|
2007-10-19 23:24:22 +02:00
|
|
|
DBUG_EXECUTE("bitmap", _ma_print_bitmap_changes(bitmap););
|
|
|
|
DBUG_RETURN(0);
|
|
|
|
}
|
|
|
|
|
2011-06-24 11:08:45 +02:00
|
|
|
|
2007-10-19 23:24:22 +02:00
|
|
|
/*
|
|
|
|
Set all pages in a region as used
|
|
|
|
|
|
|
|
SYNOPSIS
|
WL#3072 Maria recovery:
fix for bug: if a crash happened right after writing a REDO like this:
REDO - UNDO - REDO*, then recovery would ignore the last REDO* (ok),
rollback: REDO - UNDO - REDO* - REDO - CLR, and a next recovery would
thus execute REDO* instead of skipping it again. Recovery now logs
LOGREC_INCOMPLETE_GROUP when it meets REDO* for the first time,
to draw a boundary and ensure it is always skipped. Tested by hand.
Note: ma_test_all fails "maria_chk: error: Key 1 - Found too many records"
not due to this patch (failed before).
BitKeeper/triggers/post-commit:
no truncation of the commit mail, or how to review patches?
mysql-test/include/maria_verify_recovery.inc:
let caller choose the statement used to crash (sometimes we
want the crash to happen at special places)
mysql-test/t/maria-recovery.test:
user of maria_verify_recovery.inc now specifies statement which the
script should use for crashing.
storage/maria/ma_bitmap.c:
it's easier to search for all places using functions from the bitmap
module (like in ma_blockrec.c) if those exported functions all start
with "_ma_bitmap": renaming some of them.
Assertion that when we read a bitmap page, overwriting bitmap->map,
we are not losing information (i.e. bitmap->changed is false).
storage/maria/ma_blockrec.c:
update to new names. Adding code (disabled, protected by a #ifdef)
that I use to test certain crash scenarios (more to come).
storage/maria/ma_blockrec.h:
update to new names
storage/maria/ma_checkpoint.c:
update to new names
storage/maria/ma_extra.c:
update to new names
storage/maria/ma_loghandler.c:
new LOGREC_INCOMPLETE_GROUP
storage/maria/ma_loghandler.h:
new LOGREC_INCOMPLETE_GROUP
storage/maria/ma_recovery.c:
When at the end of the REDO phase we have identified some transactions
with incomplete REDO groups (REDOs without an UNDO or CLR_END),
for each of them we log LOGREC_INCOMPLETE_GROUP. This way, the
upcoming UNDO phase can write more records for such transaction,
a future recovery won't pair the incomplete group with the
CLR_END (as there is LOGREC_INCOMPLETE_GROUP to draw a boundary).
2007-12-10 23:26:53 +01:00
|
|
|
_ma_bitmap_set_full_page_bits()
|
2007-10-19 23:24:22 +02:00
|
|
|
info Maria handler
|
|
|
|
bitmap Bitmap handler
|
|
|
|
page Start page
|
|
|
|
page_count Number of pages
|
|
|
|
|
|
|
|
NOTES
|
|
|
|
We assume that all pages in region is covered by same bitmap
|
|
|
|
One must have a lock on info->s->bitmap.bitmap_lock
|
|
|
|
|
|
|
|
RETURN
|
|
|
|
0 ok
|
|
|
|
1 Error (when reading bitmap)
|
|
|
|
*/
|
|
|
|
|
WL#3072 Maria recovery:
fix for bug: if a crash happened right after writing a REDO like this:
REDO - UNDO - REDO*, then recovery would ignore the last REDO* (ok),
rollback: REDO - UNDO - REDO* - REDO - CLR, and a next recovery would
thus execute REDO* instead of skipping it again. Recovery now logs
LOGREC_INCOMPLETE_GROUP when it meets REDO* for the first time,
to draw a boundary and ensure it is always skipped. Tested by hand.
Note: ma_test_all fails "maria_chk: error: Key 1 - Found too many records"
not due to this patch (failed before).
BitKeeper/triggers/post-commit:
no truncation of the commit mail, or how to review patches?
mysql-test/include/maria_verify_recovery.inc:
let caller choose the statement used to crash (sometimes we
want the crash to happen at special places)
mysql-test/t/maria-recovery.test:
user of maria_verify_recovery.inc now specifies statement which the
script should use for crashing.
storage/maria/ma_bitmap.c:
it's easier to search for all places using functions from the bitmap
module (like in ma_blockrec.c) if those exported functions all start
with "_ma_bitmap": renaming some of them.
Assertion that when we read a bitmap page, overwriting bitmap->map,
we are not losing information (i.e. bitmap->changed is false).
storage/maria/ma_blockrec.c:
update to new names. Adding code (disabled, protected by a #ifdef)
that I use to test certain crash scenarios (more to come).
storage/maria/ma_blockrec.h:
update to new names
storage/maria/ma_checkpoint.c:
update to new names
storage/maria/ma_extra.c:
update to new names
storage/maria/ma_loghandler.c:
new LOGREC_INCOMPLETE_GROUP
storage/maria/ma_loghandler.h:
new LOGREC_INCOMPLETE_GROUP
storage/maria/ma_recovery.c:
When at the end of the REDO phase we have identified some transactions
with incomplete REDO groups (REDOs without an UNDO or CLR_END),
for each of them we log LOGREC_INCOMPLETE_GROUP. This way, the
upcoming UNDO phase can write more records for such transaction,
a future recovery won't pair the incomplete group with the
CLR_END (as there is LOGREC_INCOMPLETE_GROUP to draw a boundary).
2007-12-10 23:26:53 +01:00
|
|
|
my_bool _ma_bitmap_set_full_page_bits(MARIA_HA *info,
|
|
|
|
MARIA_FILE_BITMAP *bitmap,
|
2008-01-10 20:21:36 +01:00
|
|
|
pgcache_page_no_t page, uint page_count)
|
2007-10-19 23:24:22 +02:00
|
|
|
{
|
|
|
|
ulonglong bitmap_page;
|
|
|
|
uint offset, bit_start, bit_count, tmp;
|
|
|
|
uchar *data;
|
WL#3072 Maria recovery:
fix for bug: if a crash happened right after writing a REDO like this:
REDO - UNDO - REDO*, then recovery would ignore the last REDO* (ok),
rollback: REDO - UNDO - REDO* - REDO - CLR, and a next recovery would
thus execute REDO* instead of skipping it again. Recovery now logs
LOGREC_INCOMPLETE_GROUP when it meets REDO* for the first time,
to draw a boundary and ensure it is always skipped. Tested by hand.
Note: ma_test_all fails "maria_chk: error: Key 1 - Found too many records"
not due to this patch (failed before).
BitKeeper/triggers/post-commit:
no truncation of the commit mail, or how to review patches?
mysql-test/include/maria_verify_recovery.inc:
let caller choose the statement used to crash (sometimes we
want the crash to happen at special places)
mysql-test/t/maria-recovery.test:
user of maria_verify_recovery.inc now specifies statement which the
script should use for crashing.
storage/maria/ma_bitmap.c:
it's easier to search for all places using functions from the bitmap
module (like in ma_blockrec.c) if those exported functions all start
with "_ma_bitmap": renaming some of them.
Assertion that when we read a bitmap page, overwriting bitmap->map,
we are not losing information (i.e. bitmap->changed is false).
storage/maria/ma_blockrec.c:
update to new names. Adding code (disabled, protected by a #ifdef)
that I use to test certain crash scenarios (more to come).
storage/maria/ma_blockrec.h:
update to new names
storage/maria/ma_checkpoint.c:
update to new names
storage/maria/ma_extra.c:
update to new names
storage/maria/ma_loghandler.c:
new LOGREC_INCOMPLETE_GROUP
storage/maria/ma_loghandler.h:
new LOGREC_INCOMPLETE_GROUP
storage/maria/ma_recovery.c:
When at the end of the REDO phase we have identified some transactions
with incomplete REDO groups (REDOs without an UNDO or CLR_END),
for each of them we log LOGREC_INCOMPLETE_GROUP. This way, the
upcoming UNDO phase can write more records for such transaction,
a future recovery won't pair the incomplete group with the
CLR_END (as there is LOGREC_INCOMPLETE_GROUP to draw a boundary).
2007-12-10 23:26:53 +01:00
|
|
|
DBUG_ENTER("_ma_bitmap_set_full_page_bits");
|
2007-10-19 23:24:22 +02:00
|
|
|
DBUG_PRINT("enter", ("page: %lu page_count: %u", (ulong) page, page_count));
|
2011-07-13 21:10:18 +02:00
|
|
|
mysql_mutex_assert_owner(&info->s->bitmap.bitmap_lock);
|
2007-10-19 23:24:22 +02:00
|
|
|
|
|
|
|
bitmap_page= page - page % bitmap->pages_covered;
|
2010-08-23 11:52:57 +02:00
|
|
|
if (page == bitmap_page ||
|
2011-06-24 11:08:45 +02:00
|
|
|
page + page_count > bitmap_page + bitmap->pages_covered)
|
2010-08-23 11:52:57 +02:00
|
|
|
{
|
|
|
|
DBUG_ASSERT(0); /* Wrong in data */
|
|
|
|
DBUG_RETURN(1);
|
|
|
|
}
|
|
|
|
|
2007-10-19 23:24:22 +02:00
|
|
|
if (bitmap_page != bitmap->page &&
|
|
|
|
_ma_change_bitmap_page(info, bitmap, bitmap_page))
|
|
|
|
DBUG_RETURN(1);
|
|
|
|
|
|
|
|
/* Find page number from start of bitmap */
|
2008-01-10 20:21:36 +01:00
|
|
|
offset= (uint) (page - bitmap->page - 1);
|
2007-10-19 23:24:22 +02:00
|
|
|
|
|
|
|
/* Set bits from 'page * 3' -> '(page + page_count) * 3' */
|
2008-01-10 20:21:36 +01:00
|
|
|
bit_start= offset * 3;
|
2007-10-19 23:24:22 +02:00
|
|
|
bit_count= page_count * 3;
|
|
|
|
|
|
|
|
data= bitmap->map + bit_start / 8;
|
|
|
|
offset= bit_start & 7;
|
|
|
|
|
|
|
|
tmp= (255 << offset); /* Bits to keep */
|
|
|
|
if (bit_count + offset < 8)
|
|
|
|
{
|
|
|
|
/* Only set bits between 'offset' and 'offset+bit_count-1' */
|
|
|
|
tmp^= (255 << (offset + bit_count));
|
|
|
|
}
|
|
|
|
*data|= tmp;
|
|
|
|
|
|
|
|
if ((int) (bit_count-= (8 - offset)) > 0)
|
|
|
|
{
|
|
|
|
uint fill;
|
|
|
|
data++;
|
|
|
|
/*
|
|
|
|
-1 is here to avoid one 'if' statement and to let the following code
|
|
|
|
handle the last byte
|
|
|
|
*/
|
|
|
|
if ((fill= (bit_count - 1) / 8))
|
|
|
|
{
|
|
|
|
bfill(data, fill, 255);
|
|
|
|
data+= fill;
|
|
|
|
}
|
|
|
|
bit_count-= fill * 8; /* Bits left to set */
|
|
|
|
tmp= (1 << bit_count) - 1;
|
|
|
|
*data|= tmp;
|
|
|
|
}
|
|
|
|
bitmap->changed= 1;
|
|
|
|
DBUG_EXECUTE("bitmap", _ma_print_bitmap_changes(bitmap););
|
2007-01-18 20:38:14 +01:00
|
|
|
DBUG_RETURN(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
WL#3072 - Maria recovery.
* fix for bitmap vs checkpoint bug which could lead to corrupted
tables in case of crashes at certain moments: a bitmap could be flushed
to disk even though it was inconsistent with the log (it could be
flushed before REDO-UNDO are written to the log). One bug remains, need
code from others. Tests added. Fix is to pin unflushable bitmap pages,
and let checkpoint wait for them to be flushable.
* fix for long_trid!=0 assertion failure at Recovery.
* less useless wakeups in the background flush|checkpoint thread.
* store global_trid_generator in checkpoint record.
mysql-test/r/maria-recovery.result:
result update
mysql-test/t/maria-recovery.test:
make it easier to locate subtests
storage/maria/ma_bitmap.c:
When we send a bitmap to the pagecache, if this bitmap is not in a
flushable state we keep it pinned and add it to a list, it will be
unpinned when the bitmap is flushable again.
A new function _ma_bitmap_flush_all() used by checkpoint.
A new function _ma_bitmap_flushable() used by block format to signal
when it starts modifying a bitmap and when it is done with it.
storage/maria/ma_blockrec.c:
When starting a row operation (insert/update/delete), mark that
the bitmap is not flushable (because for example INSERT is going
to over-allocate in the bitmap to prevent other threads from using
our data pages). If a checkpoint comes at this moment it will wait
for the bitmap to be flushable before flushing it.
When the operation ends, bitmap becomes flushable again; that
transition is done under the bitmap's mutex (needed for correct
synchro with a concurrent checkpoint); but for INSERT/UPDATE this
happens inside _ma_bitmap_release_unused() at a place where it already
has the mutex, so the only penalty (mutex adding) is in DELETE and UNDO
of INSERT. In case of errors after setting the bitmap unflushable,
we must always set it back to flushable or checkpoint would block.
Debug possibilities to force a sleep while the bitmap is over-allocated.
In case of error in get_head_or_tail() in allocate_and_write_block_record(),
we still need to unpin all pages.
Bugfix: _ma_apply_redo_insert_row_blobs() produced wrong
data_file_length.
storage/maria/ma_blockrec.h:
new bitmap calls.
storage/maria/ma_checkpoint.c:
filter_flush_indirect not needed anymore (flushing bitmap
pages happens in _ma_bitmap_flush_all() now). So
st_filter_param::is_data_file|pages_covered_by_bitmap not needed.
Other filter_flush* don't need to flush bitmap anymore.
Add debug possibility to flush all bitmap pages outside of a checkpoint,
to simulate pagecache LRU eviction.
When the background flush/checkpoint thread notices it has nothing
to flush, it now sleeps directly until the next potential checkpoint
moment instead of waking up every second.
When in checkpoint we decide to not store a table in the checkpoint record
(because it has logged no writes for example), we can also skip flushing
this table.
storage/maria/ma_commit.c:
comment is out-of-date
storage/maria/ma_key_recover.c:
comment fix
storage/maria/ma_loghandler.c:
comment is out-of-date
storage/maria/ma_open.c:
comment is out-of-date
storage/maria/ma_pagecache.c:
comment for bug to fix. And we don't take checkpoints at end of REDO
phase yet so can trust block->type.
storage/maria/ma_recovery.c:
Comments. Now-unneeded code for incomplete REDO-UNDO groups removed.
When we forget about an old transaction we must really forget
about it with bzero() (fixes the "long_trid!=0 assertion" recovery
bug). When we delete a row with maria_delete() we turn on
STATE_NOT_OPTIMIZED_ROWS so we do the same when we see a CLR_END
for an UNDO_ROW_INSERT or when we execute an UNDO_ROW_INSERT (in both
cases a row was deleted). Pick up max_long_trid from the checkpoint record.
storage/maria/maria_chk.c:
comment
storage/maria/maria_def.h:
MARIA_FILE_BITMAP gets new members: 'flushable', 'bitmap_cond' and
'pinned_pages'.
storage/maria/trnman.c:
I used to think that recovery only needs to know the maximum TrID
of the lists of active and committed transactions. But no, sometimes
both lists can even be empty and their TrID should not be reused.
So Checkpoint now saves global_trid_generator in the checkpoint record.
storage/maria/trnman_public.h:
macros to read/store a TrID
mysql-test/r/maria-recovery-bitmap.result:
result is ok. Without the code fix, we would get a corruption message
about the bitmap page in CHECK TABLE EXTENDED.
mysql-test/t/maria-recovery-bitmap-master.opt:
usual when we crash mysqld in tests
mysql-test/t/maria-recovery-bitmap.test:
test of recovery problems specific of the bitmap pages.
2007-12-14 16:14:12 +01:00
|
|
|
/**
|
UNDO of rows now puts back all part of the row on their original pages and positions
Added variable _dbug_on_ to speed up execution when DBUG is not going to be used
Added --debug-on option to mysqld (to be able to turn of DBUG with --debug-on=0)
Fixed some bugs with 'non_flushable' marking of bitmap pages
Don't use 'non_flushable' marking of bitmap pages for not transactional tables
SHOW CREATE TABLE now shows if table was created with page checksums
Fixed a lot of bugs with BLOB handling in case of update/REDO and UNDO
More tests (especially for blobs) and DBUG_ASSERTS()
More readable output from maria_read_log and maria_chk
Fixed wrong shift that caused Maria to crash on files > 4G
Mark tables as crashed of REDO fails
dbug/dbug.c:
Changed to use my_bool (allowed me to remove some windows specific code)
Added variable _dbug_on_ to speed up execution when DBUG is not going to be used
Removed initialization of variables if not needed
include/my_dbug.h:
Use my_bool for some functions that was defined as BOOLEAN in dbug.c code
Added DBUGGER_ON/DEBUGGER_OFF to speed up execution when DBUG is not used
include/my_global.h:
Define my_bool early
Increase MY_HOW_OFTEN_TO_WRITE as computers are now faster than 10 years ago
mysql-test/mysql-test-run.pl:
Added debug-on=0 to speed up tests
mysql-test/r/maria-recovery.result:
Added new test by Guilhem to test if UNDO_ROW_DELETE preserves rowid
mysql-test/r/maria.result:
Added testing of page checksums
mysql-test/t/crash_commit_before-master.opt:
Added --debug-on as test require DBUG to work
mysql-test/t/maria-recovery-bitmap-master.opt:
Added --debug-on as test require DBUG to work
mysql-test/t/maria-recovery-master.opt:
Added --debug-on as test require DBUG to work
mysql-test/t/maria-recovery.test:
Added new test by Guilhem to test if UNDO_ROW_DELETE preserves rowid
mysql-test/t/maria.test:
Added testing of page checksums
sql/mysqld.cc:
Added --debug-on option (to be able to turn of DBUG with --debug-on=0)
Indentation fixes
Removed end spaces
sql/sql_show.cc:
Allow update_create_info() to inform MySQL if PACK_KEYS, NO_PACK_KEYS, CHECKSUM, PAGE_CHECKSUM or DELAY_KEY_WRITE is used
storage/maria/Makefile.am:
Added ma_test_big.sh
storage/maria/ha_maria.cc:
Store in create_info if page checksums are used (For SHOW CREATE TABLE)
storage/maria/ma_bitmap.c:
Added _ma_bitmap_wait_or_flush() to cause reader of bitmap pages to wait with reading until bitmap is flushed.
Use TAIL_PAGE_COUNT_MARKER for tail pages
Set 'sub_blocks' for and only for the head page or for the first extent of a blob. This is needed for store_extent_info() to be able to set START_EXTENT_BIT's
Don't allocate more than 0x3ffff pages in one extent (We need bit 0x4000 as a START_EXTENT_BIT)
Increase the calculated 'head_length' with the number of bytes used for extents.
Update row->space_on_head_page also in _ma_bitmap_find_new_place()
Make _ma_bitmap_get_page_bits() global. (Needed for UNDO handling)
Changed _ma_bitmap_flushable() to take MARIA_HA instead of MARIA_SHARE.
This was needed to be able to mark the handler if we had a 'non_flushable' call pending or not.
Don't use 'non_flushable' marking of bitmap pages for not transactional tables.
Added BLOCKUSED_USE_ORG_BITMAP handling also for tail pages.
Added more DBUG_ASSERT() to find possible errors in other code
Some code simplications by adding new local variables
storage/maria/ma_blockrec.c:
UNDO of rows now puts back all part of the row on their original pages and positions.
Changed UNDO of DELETE and UNDO of UPDATE to contain information about the original length of data on head block and also extent information
This changes a lot of logic as now an insert of a row on a page may happen to any position (and not just to the first or next free)
Use PAGE_COUNT to mark if an extent is the start of of a blob. (Needed for extent_to_bitmap_blocks())
Added check_directory() for checking that directroy entries are correct.
Added checking of row checksums when reading rows (with EXTRA_DEBUG)
Added make_space_for_directory() and extend_directory() for doing expansion of directory
Added get_rowpos_in_head_or_tail_page() to be able to store head/tail on original position in UNDO
Added extent_to_bitmap_blocks() to be able to generate original bitmap blocks from UNDO entry
Added _ma_update_at_original_place() for UNDO of DELETES
Added row->min_length to hold minmum required space needed on head page
Changed find_free_position() to use make_space_for_directory()
Changed make_empty_page() to allow optional creation of directory entry
Changed delete_head_or_tail() and _ma_apply_undo_row_isnert() to not copy pagecache block (speed optimization)
Changed _ma_apply_redo_insert_row_head_or_tail() to be able to insert new row at any position on 'new' page
Changed _ma_apply_undo_row_delete() and _ma_apply_undo_row_update() to put row in it's original position
Ensure allocation of tail blocks are of at least MIN_TAIL_SIZE.
Ensure we store pages in pinned pages even if read failed. (If not we will have pages pinned forever in page cache)
Write original extent information in UNDO entry, not compacted ones (we need position to tails!)
When setting BLOCKUSED_USED, don't clear other bits (we have to preserve BLOCKUSED_USE_ORG_BITMAP)
Fixed som bugs in directory handling
Fixed bug where we wrote wrong lsn to blob pages
Added separate blob_buffer for fixing bug when updating row that had char/varchar that spanned several pages and also had blobs
Ensure we call _ma_bitmap_flushable() also in case of errors
When doing an update, first delete old entries, then search in bitmap for where to put new information
Info->s -> share
Rowid -> rowid
More DBUG_ASSERT()
storage/maria/ma_blockrec.h:
Added START_EXTENT_BIT and TAIL_PAGE_COUNT_MARKER
Added _ma_bitmap_wait_or_flush() and _ma_bitmap_get_page_bits()
storage/maria/ma_check.c:
Don't write extra empty line if there is no deleted blocks
Ignore START_EXTENT_BIT's in page count
Call _ma_fast_unlock_key_del() to free key_del link
storage/maria/ma_close.c:
Ensure that used_key_del is 0. (If not, someone forgot to call _ma_unlock_key_del())
storage/maria/ma_create.c:
Changed constant to macro
storage/maria/ma_delete.c:
For deleted keys, log also position to row
storage/maria/ma_extra.c:
Release blob buffer at maria_reset() if bigger than MARIA_SMALL_BLOB_BUFFER
storage/maria/ma_key_recover.c:
Added bzero() of LSN that confused paged cache in case of uninitialized block
Mark file crashed if applying of index changes fails
Added calls to _ma_fast_unlock_key_del() for protection of shared key_del link.
storage/maria/ma_locking.c:
Added usage of MARIA_FILE_OPEN_COUNT_OFFSET
Added _ma_mark_file_crashed()
storage/maria/ma_loghandler.c:
Fixed bug where we logged uninitialized memory
storage/maria/ma_open.c:
Moved state->changed to be at start of state info on disk to allow one to easly mark files as crashed
storage/maria/ma_page.c:
Disable 'dummy' checksumming of pages as this gave false warnings.
(Need to investigate if this is ever needed)
storage/maria/ma_pagecache.c:
Fixed wrong shift that caused Maria to crash on files > 4G
storage/maria/ma_recovery.c:
In case of errors, start writing on new line if we where in %## %## printing mode (Made errors more readable)
Changed global variable name from warnings -> recovery_warnings
Use MARIA_FILE_CREATE_RENAME_LSN_OFFSET instead of constant
Removed special handling of row position for deleted keys. Keys now always includes row positions
_ma_apply_undo_row_delete() now gets page and row position
Added check that we don't loop forever when handling undo's (in case of bug in undo chain)
Print name of failed REDO/UNDO
storage/maria/ma_recovery.h:
Removed old comment
storage/maria/ma_static.c:
Chaned version number of Maria files to not accidently use old ones (becasue of change of ordering of status variables)
storage/maria/ma_test2.c:
Added option -u to specify number of rows to update
Changed old option -u to be -A, as for ma_test1
Fixed bug in update of rows with blobs (before blobs was always reset to empty on update)
First created blob is now of max blob length to ensure we have at least one big blob in the table
storage/maria/ma_test_all.sh:
More tests
storage/maria/ma_test_recovery.expected:
Updated results
storage/maria/ma_test_recovery:
Changed tests to use bigger blobs (not just 1K)
Added new tests that tests recovery of update with blobs
Removed comparision of .MAD file as it's not guranteed that recovery from scratch gives identical data file as original update
(compact_page() may be called at different times during normal execution and during REDO)
storage/maria/ma_update.c:
Simplify code (changed * to if)
storage/maria/maria_chk.c:
Make output more readable
storage/maria/maria_def.h:
Changed 'changed' to int to prepare for more bits
Added 2 more bytes to status information
Added 'st_mara_row->min_length' for storing min length needed on head page
Added 'st_mara_handler->blob_buff & blob_buff_size' for storing blobs
Moved all tunning parameters into one block
Added MARIA_SMALL_BLOB_BUFFER
Added _ma_mark_file_crashed()
storage/myisam/mi_test2.c:
Fixed bug in update of rows with blobs (before blobs was always reset to empty on update)
storage/maria/ma_test_big.sh:
Testing of insert, update, delete, recovery and undo of rows with blobs
Thanks to the random-ness of ma_test2 this is likely to find most bugs in the row handling
2007-12-30 21:40:03 +01:00
|
|
|
@brief
|
2007-12-15 14:17:23 +01:00
|
|
|
Make a transition of MARIA_FILE_BITMAP::non_flushable.
|
WL#3072 - Maria recovery.
* fix for bitmap vs checkpoint bug which could lead to corrupted
tables in case of crashes at certain moments: a bitmap could be flushed
to disk even though it was inconsistent with the log (it could be
flushed before REDO-UNDO are written to the log). One bug remains, need
code from others. Tests added. Fix is to pin unflushable bitmap pages,
and let checkpoint wait for them to be flushable.
* fix for long_trid!=0 assertion failure at Recovery.
* less useless wakeups in the background flush|checkpoint thread.
* store global_trid_generator in checkpoint record.
mysql-test/r/maria-recovery.result:
result update
mysql-test/t/maria-recovery.test:
make it easier to locate subtests
storage/maria/ma_bitmap.c:
When we send a bitmap to the pagecache, if this bitmap is not in a
flushable state we keep it pinned and add it to a list, it will be
unpinned when the bitmap is flushable again.
A new function _ma_bitmap_flush_all() used by checkpoint.
A new function _ma_bitmap_flushable() used by block format to signal
when it starts modifying a bitmap and when it is done with it.
storage/maria/ma_blockrec.c:
When starting a row operation (insert/update/delete), mark that
the bitmap is not flushable (because for example INSERT is going
to over-allocate in the bitmap to prevent other threads from using
our data pages). If a checkpoint comes at this moment it will wait
for the bitmap to be flushable before flushing it.
When the operation ends, bitmap becomes flushable again; that
transition is done under the bitmap's mutex (needed for correct
synchro with a concurrent checkpoint); but for INSERT/UPDATE this
happens inside _ma_bitmap_release_unused() at a place where it already
has the mutex, so the only penalty (mutex adding) is in DELETE and UNDO
of INSERT. In case of errors after setting the bitmap unflushable,
we must always set it back to flushable or checkpoint would block.
Debug possibilities to force a sleep while the bitmap is over-allocated.
In case of error in get_head_or_tail() in allocate_and_write_block_record(),
we still need to unpin all pages.
Bugfix: _ma_apply_redo_insert_row_blobs() produced wrong
data_file_length.
storage/maria/ma_blockrec.h:
new bitmap calls.
storage/maria/ma_checkpoint.c:
filter_flush_indirect not needed anymore (flushing bitmap
pages happens in _ma_bitmap_flush_all() now). So
st_filter_param::is_data_file|pages_covered_by_bitmap not needed.
Other filter_flush* don't need to flush bitmap anymore.
Add debug possibility to flush all bitmap pages outside of a checkpoint,
to simulate pagecache LRU eviction.
When the background flush/checkpoint thread notices it has nothing
to flush, it now sleeps directly until the next potential checkpoint
moment instead of waking up every second.
When in checkpoint we decide to not store a table in the checkpoint record
(because it has logged no writes for example), we can also skip flushing
this table.
storage/maria/ma_commit.c:
comment is out-of-date
storage/maria/ma_key_recover.c:
comment fix
storage/maria/ma_loghandler.c:
comment is out-of-date
storage/maria/ma_open.c:
comment is out-of-date
storage/maria/ma_pagecache.c:
comment for bug to fix. And we don't take checkpoints at end of REDO
phase yet so can trust block->type.
storage/maria/ma_recovery.c:
Comments. Now-unneeded code for incomplete REDO-UNDO groups removed.
When we forget about an old transaction we must really forget
about it with bzero() (fixes the "long_trid!=0 assertion" recovery
bug). When we delete a row with maria_delete() we turn on
STATE_NOT_OPTIMIZED_ROWS so we do the same when we see a CLR_END
for an UNDO_ROW_INSERT or when we execute an UNDO_ROW_INSERT (in both
cases a row was deleted). Pick up max_long_trid from the checkpoint record.
storage/maria/maria_chk.c:
comment
storage/maria/maria_def.h:
MARIA_FILE_BITMAP gets new members: 'flushable', 'bitmap_cond' and
'pinned_pages'.
storage/maria/trnman.c:
I used to think that recovery only needs to know the maximum TrID
of the lists of active and committed transactions. But no, sometimes
both lists can even be empty and their TrID should not be reused.
So Checkpoint now saves global_trid_generator in the checkpoint record.
storage/maria/trnman_public.h:
macros to read/store a TrID
mysql-test/r/maria-recovery-bitmap.result:
result is ok. Without the code fix, we would get a corruption message
about the bitmap page in CHECK TABLE EXTENDED.
mysql-test/t/maria-recovery-bitmap-master.opt:
usual when we crash mysqld in tests
mysql-test/t/maria-recovery-bitmap.test:
test of recovery problems specific of the bitmap pages.
2007-12-14 16:14:12 +01:00
|
|
|
If the bitmap becomes flushable, which requires that REDO-UNDO has been
|
|
|
|
logged and all bitmap pages touched by the thread have a correct
|
2007-12-15 14:17:23 +01:00
|
|
|
allocation, it unpins all bitmap pages, and if _ma_bitmap_flush_all() is
|
|
|
|
waiting (in practice it is a checkpoint), it wakes it up.
|
|
|
|
If the bitmap becomes or stays unflushable, the function merely records it
|
|
|
|
unless a concurrent _ma_bitmap_flush_all() is happening, in which case the
|
|
|
|
function first waits for the flush to be done.
|
WL#3072 - Maria recovery.
* fix for bitmap vs checkpoint bug which could lead to corrupted
tables in case of crashes at certain moments: a bitmap could be flushed
to disk even though it was inconsistent with the log (it could be
flushed before REDO-UNDO are written to the log). One bug remains, need
code from others. Tests added. Fix is to pin unflushable bitmap pages,
and let checkpoint wait for them to be flushable.
* fix for long_trid!=0 assertion failure at Recovery.
* less useless wakeups in the background flush|checkpoint thread.
* store global_trid_generator in checkpoint record.
mysql-test/r/maria-recovery.result:
result update
mysql-test/t/maria-recovery.test:
make it easier to locate subtests
storage/maria/ma_bitmap.c:
When we send a bitmap to the pagecache, if this bitmap is not in a
flushable state we keep it pinned and add it to a list, it will be
unpinned when the bitmap is flushable again.
A new function _ma_bitmap_flush_all() used by checkpoint.
A new function _ma_bitmap_flushable() used by block format to signal
when it starts modifying a bitmap and when it is done with it.
storage/maria/ma_blockrec.c:
When starting a row operation (insert/update/delete), mark that
the bitmap is not flushable (because for example INSERT is going
to over-allocate in the bitmap to prevent other threads from using
our data pages). If a checkpoint comes at this moment it will wait
for the bitmap to be flushable before flushing it.
When the operation ends, bitmap becomes flushable again; that
transition is done under the bitmap's mutex (needed for correct
synchro with a concurrent checkpoint); but for INSERT/UPDATE this
happens inside _ma_bitmap_release_unused() at a place where it already
has the mutex, so the only penalty (mutex adding) is in DELETE and UNDO
of INSERT. In case of errors after setting the bitmap unflushable,
we must always set it back to flushable or checkpoint would block.
Debug possibilities to force a sleep while the bitmap is over-allocated.
In case of error in get_head_or_tail() in allocate_and_write_block_record(),
we still need to unpin all pages.
Bugfix: _ma_apply_redo_insert_row_blobs() produced wrong
data_file_length.
storage/maria/ma_blockrec.h:
new bitmap calls.
storage/maria/ma_checkpoint.c:
filter_flush_indirect not needed anymore (flushing bitmap
pages happens in _ma_bitmap_flush_all() now). So
st_filter_param::is_data_file|pages_covered_by_bitmap not needed.
Other filter_flush* don't need to flush bitmap anymore.
Add debug possibility to flush all bitmap pages outside of a checkpoint,
to simulate pagecache LRU eviction.
When the background flush/checkpoint thread notices it has nothing
to flush, it now sleeps directly until the next potential checkpoint
moment instead of waking up every second.
When in checkpoint we decide to not store a table in the checkpoint record
(because it has logged no writes for example), we can also skip flushing
this table.
storage/maria/ma_commit.c:
comment is out-of-date
storage/maria/ma_key_recover.c:
comment fix
storage/maria/ma_loghandler.c:
comment is out-of-date
storage/maria/ma_open.c:
comment is out-of-date
storage/maria/ma_pagecache.c:
comment for bug to fix. And we don't take checkpoints at end of REDO
phase yet so can trust block->type.
storage/maria/ma_recovery.c:
Comments. Now-unneeded code for incomplete REDO-UNDO groups removed.
When we forget about an old transaction we must really forget
about it with bzero() (fixes the "long_trid!=0 assertion" recovery
bug). When we delete a row with maria_delete() we turn on
STATE_NOT_OPTIMIZED_ROWS so we do the same when we see a CLR_END
for an UNDO_ROW_INSERT or when we execute an UNDO_ROW_INSERT (in both
cases a row was deleted). Pick up max_long_trid from the checkpoint record.
storage/maria/maria_chk.c:
comment
storage/maria/maria_def.h:
MARIA_FILE_BITMAP gets new members: 'flushable', 'bitmap_cond' and
'pinned_pages'.
storage/maria/trnman.c:
I used to think that recovery only needs to know the maximum TrID
of the lists of active and committed transactions. But no, sometimes
both lists can even be empty and their TrID should not be reused.
So Checkpoint now saves global_trid_generator in the checkpoint record.
storage/maria/trnman_public.h:
macros to read/store a TrID
mysql-test/r/maria-recovery-bitmap.result:
result is ok. Without the code fix, we would get a corruption message
about the bitmap page in CHECK TABLE EXTENDED.
mysql-test/t/maria-recovery-bitmap-master.opt:
usual when we crash mysqld in tests
mysql-test/t/maria-recovery-bitmap.test:
test of recovery problems specific of the bitmap pages.
2007-12-14 16:14:12 +01:00
|
|
|
|
UNDO of rows now puts back all part of the row on their original pages and positions
Added variable _dbug_on_ to speed up execution when DBUG is not going to be used
Added --debug-on option to mysqld (to be able to turn of DBUG with --debug-on=0)
Fixed some bugs with 'non_flushable' marking of bitmap pages
Don't use 'non_flushable' marking of bitmap pages for not transactional tables
SHOW CREATE TABLE now shows if table was created with page checksums
Fixed a lot of bugs with BLOB handling in case of update/REDO and UNDO
More tests (especially for blobs) and DBUG_ASSERTS()
More readable output from maria_read_log and maria_chk
Fixed wrong shift that caused Maria to crash on files > 4G
Mark tables as crashed of REDO fails
dbug/dbug.c:
Changed to use my_bool (allowed me to remove some windows specific code)
Added variable _dbug_on_ to speed up execution when DBUG is not going to be used
Removed initialization of variables if not needed
include/my_dbug.h:
Use my_bool for some functions that was defined as BOOLEAN in dbug.c code
Added DBUGGER_ON/DEBUGGER_OFF to speed up execution when DBUG is not used
include/my_global.h:
Define my_bool early
Increase MY_HOW_OFTEN_TO_WRITE as computers are now faster than 10 years ago
mysql-test/mysql-test-run.pl:
Added debug-on=0 to speed up tests
mysql-test/r/maria-recovery.result:
Added new test by Guilhem to test if UNDO_ROW_DELETE preserves rowid
mysql-test/r/maria.result:
Added testing of page checksums
mysql-test/t/crash_commit_before-master.opt:
Added --debug-on as test require DBUG to work
mysql-test/t/maria-recovery-bitmap-master.opt:
Added --debug-on as test require DBUG to work
mysql-test/t/maria-recovery-master.opt:
Added --debug-on as test require DBUG to work
mysql-test/t/maria-recovery.test:
Added new test by Guilhem to test if UNDO_ROW_DELETE preserves rowid
mysql-test/t/maria.test:
Added testing of page checksums
sql/mysqld.cc:
Added --debug-on option (to be able to turn of DBUG with --debug-on=0)
Indentation fixes
Removed end spaces
sql/sql_show.cc:
Allow update_create_info() to inform MySQL if PACK_KEYS, NO_PACK_KEYS, CHECKSUM, PAGE_CHECKSUM or DELAY_KEY_WRITE is used
storage/maria/Makefile.am:
Added ma_test_big.sh
storage/maria/ha_maria.cc:
Store in create_info if page checksums are used (For SHOW CREATE TABLE)
storage/maria/ma_bitmap.c:
Added _ma_bitmap_wait_or_flush() to cause reader of bitmap pages to wait with reading until bitmap is flushed.
Use TAIL_PAGE_COUNT_MARKER for tail pages
Set 'sub_blocks' for and only for the head page or for the first extent of a blob. This is needed for store_extent_info() to be able to set START_EXTENT_BIT's
Don't allocate more than 0x3ffff pages in one extent (We need bit 0x4000 as a START_EXTENT_BIT)
Increase the calculated 'head_length' with the number of bytes used for extents.
Update row->space_on_head_page also in _ma_bitmap_find_new_place()
Make _ma_bitmap_get_page_bits() global. (Needed for UNDO handling)
Changed _ma_bitmap_flushable() to take MARIA_HA instead of MARIA_SHARE.
This was needed to be able to mark the handler if we had a 'non_flushable' call pending or not.
Don't use 'non_flushable' marking of bitmap pages for not transactional tables.
Added BLOCKUSED_USE_ORG_BITMAP handling also for tail pages.
Added more DBUG_ASSERT() to find possible errors in other code
Some code simplications by adding new local variables
storage/maria/ma_blockrec.c:
UNDO of rows now puts back all part of the row on their original pages and positions.
Changed UNDO of DELETE and UNDO of UPDATE to contain information about the original length of data on head block and also extent information
This changes a lot of logic as now an insert of a row on a page may happen to any position (and not just to the first or next free)
Use PAGE_COUNT to mark if an extent is the start of of a blob. (Needed for extent_to_bitmap_blocks())
Added check_directory() for checking that directroy entries are correct.
Added checking of row checksums when reading rows (with EXTRA_DEBUG)
Added make_space_for_directory() and extend_directory() for doing expansion of directory
Added get_rowpos_in_head_or_tail_page() to be able to store head/tail on original position in UNDO
Added extent_to_bitmap_blocks() to be able to generate original bitmap blocks from UNDO entry
Added _ma_update_at_original_place() for UNDO of DELETES
Added row->min_length to hold minmum required space needed on head page
Changed find_free_position() to use make_space_for_directory()
Changed make_empty_page() to allow optional creation of directory entry
Changed delete_head_or_tail() and _ma_apply_undo_row_isnert() to not copy pagecache block (speed optimization)
Changed _ma_apply_redo_insert_row_head_or_tail() to be able to insert new row at any position on 'new' page
Changed _ma_apply_undo_row_delete() and _ma_apply_undo_row_update() to put row in it's original position
Ensure allocation of tail blocks are of at least MIN_TAIL_SIZE.
Ensure we store pages in pinned pages even if read failed. (If not we will have pages pinned forever in page cache)
Write original extent information in UNDO entry, not compacted ones (we need position to tails!)
When setting BLOCKUSED_USED, don't clear other bits (we have to preserve BLOCKUSED_USE_ORG_BITMAP)
Fixed som bugs in directory handling
Fixed bug where we wrote wrong lsn to blob pages
Added separate blob_buffer for fixing bug when updating row that had char/varchar that spanned several pages and also had blobs
Ensure we call _ma_bitmap_flushable() also in case of errors
When doing an update, first delete old entries, then search in bitmap for where to put new information
Info->s -> share
Rowid -> rowid
More DBUG_ASSERT()
storage/maria/ma_blockrec.h:
Added START_EXTENT_BIT and TAIL_PAGE_COUNT_MARKER
Added _ma_bitmap_wait_or_flush() and _ma_bitmap_get_page_bits()
storage/maria/ma_check.c:
Don't write extra empty line if there is no deleted blocks
Ignore START_EXTENT_BIT's in page count
Call _ma_fast_unlock_key_del() to free key_del link
storage/maria/ma_close.c:
Ensure that used_key_del is 0. (If not, someone forgot to call _ma_unlock_key_del())
storage/maria/ma_create.c:
Changed constant to macro
storage/maria/ma_delete.c:
For deleted keys, log also position to row
storage/maria/ma_extra.c:
Release blob buffer at maria_reset() if bigger than MARIA_SMALL_BLOB_BUFFER
storage/maria/ma_key_recover.c:
Added bzero() of LSN that confused paged cache in case of uninitialized block
Mark file crashed if applying of index changes fails
Added calls to _ma_fast_unlock_key_del() for protection of shared key_del link.
storage/maria/ma_locking.c:
Added usage of MARIA_FILE_OPEN_COUNT_OFFSET
Added _ma_mark_file_crashed()
storage/maria/ma_loghandler.c:
Fixed bug where we logged uninitialized memory
storage/maria/ma_open.c:
Moved state->changed to be at start of state info on disk to allow one to easly mark files as crashed
storage/maria/ma_page.c:
Disable 'dummy' checksumming of pages as this gave false warnings.
(Need to investigate if this is ever needed)
storage/maria/ma_pagecache.c:
Fixed wrong shift that caused Maria to crash on files > 4G
storage/maria/ma_recovery.c:
In case of errors, start writing on new line if we where in %## %## printing mode (Made errors more readable)
Changed global variable name from warnings -> recovery_warnings
Use MARIA_FILE_CREATE_RENAME_LSN_OFFSET instead of constant
Removed special handling of row position for deleted keys. Keys now always includes row positions
_ma_apply_undo_row_delete() now gets page and row position
Added check that we don't loop forever when handling undo's (in case of bug in undo chain)
Print name of failed REDO/UNDO
storage/maria/ma_recovery.h:
Removed old comment
storage/maria/ma_static.c:
Chaned version number of Maria files to not accidently use old ones (becasue of change of ordering of status variables)
storage/maria/ma_test2.c:
Added option -u to specify number of rows to update
Changed old option -u to be -A, as for ma_test1
Fixed bug in update of rows with blobs (before blobs was always reset to empty on update)
First created blob is now of max blob length to ensure we have at least one big blob in the table
storage/maria/ma_test_all.sh:
More tests
storage/maria/ma_test_recovery.expected:
Updated results
storage/maria/ma_test_recovery:
Changed tests to use bigger blobs (not just 1K)
Added new tests that tests recovery of update with blobs
Removed comparision of .MAD file as it's not guranteed that recovery from scratch gives identical data file as original update
(compact_page() may be called at different times during normal execution and during REDO)
storage/maria/ma_update.c:
Simplify code (changed * to if)
storage/maria/maria_chk.c:
Make output more readable
storage/maria/maria_def.h:
Changed 'changed' to int to prepare for more bits
Added 2 more bytes to status information
Added 'st_mara_row->min_length' for storing min length needed on head page
Added 'st_mara_handler->blob_buff & blob_buff_size' for storing blobs
Moved all tunning parameters into one block
Added MARIA_SMALL_BLOB_BUFFER
Added _ma_mark_file_crashed()
storage/myisam/mi_test2.c:
Fixed bug in update of rows with blobs (before blobs was always reset to empty on update)
storage/maria/ma_test_big.sh:
Testing of insert, update, delete, recovery and undo of rows with blobs
Thanks to the random-ness of ma_test2 this is likely to find most bugs in the row handling
2007-12-30 21:40:03 +01:00
|
|
|
@note
|
2008-10-14 11:38:07 +02:00
|
|
|
this sets info->non_flushable_state to 1 if we have incremented
|
|
|
|
bitmap->non_flushable and not yet decremented it.
|
UNDO of rows now puts back all part of the row on their original pages and positions
Added variable _dbug_on_ to speed up execution when DBUG is not going to be used
Added --debug-on option to mysqld (to be able to turn of DBUG with --debug-on=0)
Fixed some bugs with 'non_flushable' marking of bitmap pages
Don't use 'non_flushable' marking of bitmap pages for not transactional tables
SHOW CREATE TABLE now shows if table was created with page checksums
Fixed a lot of bugs with BLOB handling in case of update/REDO and UNDO
More tests (especially for blobs) and DBUG_ASSERTS()
More readable output from maria_read_log and maria_chk
Fixed wrong shift that caused Maria to crash on files > 4G
Mark tables as crashed of REDO fails
dbug/dbug.c:
Changed to use my_bool (allowed me to remove some windows specific code)
Added variable _dbug_on_ to speed up execution when DBUG is not going to be used
Removed initialization of variables if not needed
include/my_dbug.h:
Use my_bool for some functions that was defined as BOOLEAN in dbug.c code
Added DBUGGER_ON/DEBUGGER_OFF to speed up execution when DBUG is not used
include/my_global.h:
Define my_bool early
Increase MY_HOW_OFTEN_TO_WRITE as computers are now faster than 10 years ago
mysql-test/mysql-test-run.pl:
Added debug-on=0 to speed up tests
mysql-test/r/maria-recovery.result:
Added new test by Guilhem to test if UNDO_ROW_DELETE preserves rowid
mysql-test/r/maria.result:
Added testing of page checksums
mysql-test/t/crash_commit_before-master.opt:
Added --debug-on as test require DBUG to work
mysql-test/t/maria-recovery-bitmap-master.opt:
Added --debug-on as test require DBUG to work
mysql-test/t/maria-recovery-master.opt:
Added --debug-on as test require DBUG to work
mysql-test/t/maria-recovery.test:
Added new test by Guilhem to test if UNDO_ROW_DELETE preserves rowid
mysql-test/t/maria.test:
Added testing of page checksums
sql/mysqld.cc:
Added --debug-on option (to be able to turn of DBUG with --debug-on=0)
Indentation fixes
Removed end spaces
sql/sql_show.cc:
Allow update_create_info() to inform MySQL if PACK_KEYS, NO_PACK_KEYS, CHECKSUM, PAGE_CHECKSUM or DELAY_KEY_WRITE is used
storage/maria/Makefile.am:
Added ma_test_big.sh
storage/maria/ha_maria.cc:
Store in create_info if page checksums are used (For SHOW CREATE TABLE)
storage/maria/ma_bitmap.c:
Added _ma_bitmap_wait_or_flush() to cause reader of bitmap pages to wait with reading until bitmap is flushed.
Use TAIL_PAGE_COUNT_MARKER for tail pages
Set 'sub_blocks' for and only for the head page or for the first extent of a blob. This is needed for store_extent_info() to be able to set START_EXTENT_BIT's
Don't allocate more than 0x3ffff pages in one extent (We need bit 0x4000 as a START_EXTENT_BIT)
Increase the calculated 'head_length' with the number of bytes used for extents.
Update row->space_on_head_page also in _ma_bitmap_find_new_place()
Make _ma_bitmap_get_page_bits() global. (Needed for UNDO handling)
Changed _ma_bitmap_flushable() to take MARIA_HA instead of MARIA_SHARE.
This was needed to be able to mark the handler if we had a 'non_flushable' call pending or not.
Don't use 'non_flushable' marking of bitmap pages for not transactional tables.
Added BLOCKUSED_USE_ORG_BITMAP handling also for tail pages.
Added more DBUG_ASSERT() to find possible errors in other code
Some code simplications by adding new local variables
storage/maria/ma_blockrec.c:
UNDO of rows now puts back all part of the row on their original pages and positions.
Changed UNDO of DELETE and UNDO of UPDATE to contain information about the original length of data on head block and also extent information
This changes a lot of logic as now an insert of a row on a page may happen to any position (and not just to the first or next free)
Use PAGE_COUNT to mark if an extent is the start of of a blob. (Needed for extent_to_bitmap_blocks())
Added check_directory() for checking that directroy entries are correct.
Added checking of row checksums when reading rows (with EXTRA_DEBUG)
Added make_space_for_directory() and extend_directory() for doing expansion of directory
Added get_rowpos_in_head_or_tail_page() to be able to store head/tail on original position in UNDO
Added extent_to_bitmap_blocks() to be able to generate original bitmap blocks from UNDO entry
Added _ma_update_at_original_place() for UNDO of DELETES
Added row->min_length to hold minmum required space needed on head page
Changed find_free_position() to use make_space_for_directory()
Changed make_empty_page() to allow optional creation of directory entry
Changed delete_head_or_tail() and _ma_apply_undo_row_isnert() to not copy pagecache block (speed optimization)
Changed _ma_apply_redo_insert_row_head_or_tail() to be able to insert new row at any position on 'new' page
Changed _ma_apply_undo_row_delete() and _ma_apply_undo_row_update() to put row in it's original position
Ensure allocation of tail blocks are of at least MIN_TAIL_SIZE.
Ensure we store pages in pinned pages even if read failed. (If not we will have pages pinned forever in page cache)
Write original extent information in UNDO entry, not compacted ones (we need position to tails!)
When setting BLOCKUSED_USED, don't clear other bits (we have to preserve BLOCKUSED_USE_ORG_BITMAP)
Fixed som bugs in directory handling
Fixed bug where we wrote wrong lsn to blob pages
Added separate blob_buffer for fixing bug when updating row that had char/varchar that spanned several pages and also had blobs
Ensure we call _ma_bitmap_flushable() also in case of errors
When doing an update, first delete old entries, then search in bitmap for where to put new information
Info->s -> share
Rowid -> rowid
More DBUG_ASSERT()
storage/maria/ma_blockrec.h:
Added START_EXTENT_BIT and TAIL_PAGE_COUNT_MARKER
Added _ma_bitmap_wait_or_flush() and _ma_bitmap_get_page_bits()
storage/maria/ma_check.c:
Don't write extra empty line if there is no deleted blocks
Ignore START_EXTENT_BIT's in page count
Call _ma_fast_unlock_key_del() to free key_del link
storage/maria/ma_close.c:
Ensure that used_key_del is 0. (If not, someone forgot to call _ma_unlock_key_del())
storage/maria/ma_create.c:
Changed constant to macro
storage/maria/ma_delete.c:
For deleted keys, log also position to row
storage/maria/ma_extra.c:
Release blob buffer at maria_reset() if bigger than MARIA_SMALL_BLOB_BUFFER
storage/maria/ma_key_recover.c:
Added bzero() of LSN that confused paged cache in case of uninitialized block
Mark file crashed if applying of index changes fails
Added calls to _ma_fast_unlock_key_del() for protection of shared key_del link.
storage/maria/ma_locking.c:
Added usage of MARIA_FILE_OPEN_COUNT_OFFSET
Added _ma_mark_file_crashed()
storage/maria/ma_loghandler.c:
Fixed bug where we logged uninitialized memory
storage/maria/ma_open.c:
Moved state->changed to be at start of state info on disk to allow one to easly mark files as crashed
storage/maria/ma_page.c:
Disable 'dummy' checksumming of pages as this gave false warnings.
(Need to investigate if this is ever needed)
storage/maria/ma_pagecache.c:
Fixed wrong shift that caused Maria to crash on files > 4G
storage/maria/ma_recovery.c:
In case of errors, start writing on new line if we where in %## %## printing mode (Made errors more readable)
Changed global variable name from warnings -> recovery_warnings
Use MARIA_FILE_CREATE_RENAME_LSN_OFFSET instead of constant
Removed special handling of row position for deleted keys. Keys now always includes row positions
_ma_apply_undo_row_delete() now gets page and row position
Added check that we don't loop forever when handling undo's (in case of bug in undo chain)
Print name of failed REDO/UNDO
storage/maria/ma_recovery.h:
Removed old comment
storage/maria/ma_static.c:
Chaned version number of Maria files to not accidently use old ones (becasue of change of ordering of status variables)
storage/maria/ma_test2.c:
Added option -u to specify number of rows to update
Changed old option -u to be -A, as for ma_test1
Fixed bug in update of rows with blobs (before blobs was always reset to empty on update)
First created blob is now of max blob length to ensure we have at least one big blob in the table
storage/maria/ma_test_all.sh:
More tests
storage/maria/ma_test_recovery.expected:
Updated results
storage/maria/ma_test_recovery:
Changed tests to use bigger blobs (not just 1K)
Added new tests that tests recovery of update with blobs
Removed comparision of .MAD file as it's not guranteed that recovery from scratch gives identical data file as original update
(compact_page() may be called at different times during normal execution and during REDO)
storage/maria/ma_update.c:
Simplify code (changed * to if)
storage/maria/maria_chk.c:
Make output more readable
storage/maria/maria_def.h:
Changed 'changed' to int to prepare for more bits
Added 2 more bytes to status information
Added 'st_mara_row->min_length' for storing min length needed on head page
Added 'st_mara_handler->blob_buff & blob_buff_size' for storing blobs
Moved all tunning parameters into one block
Added MARIA_SMALL_BLOB_BUFFER
Added _ma_mark_file_crashed()
storage/myisam/mi_test2.c:
Fixed bug in update of rows with blobs (before blobs was always reset to empty on update)
storage/maria/ma_test_big.sh:
Testing of insert, update, delete, recovery and undo of rows with blobs
Thanks to the random-ness of ma_test2 this is likely to find most bugs in the row handling
2007-12-30 21:40:03 +01:00
|
|
|
|
WL#3072 - Maria recovery.
* fix for bitmap vs checkpoint bug which could lead to corrupted
tables in case of crashes at certain moments: a bitmap could be flushed
to disk even though it was inconsistent with the log (it could be
flushed before REDO-UNDO are written to the log). One bug remains, need
code from others. Tests added. Fix is to pin unflushable bitmap pages,
and let checkpoint wait for them to be flushable.
* fix for long_trid!=0 assertion failure at Recovery.
* less useless wakeups in the background flush|checkpoint thread.
* store global_trid_generator in checkpoint record.
mysql-test/r/maria-recovery.result:
result update
mysql-test/t/maria-recovery.test:
make it easier to locate subtests
storage/maria/ma_bitmap.c:
When we send a bitmap to the pagecache, if this bitmap is not in a
flushable state we keep it pinned and add it to a list, it will be
unpinned when the bitmap is flushable again.
A new function _ma_bitmap_flush_all() used by checkpoint.
A new function _ma_bitmap_flushable() used by block format to signal
when it starts modifying a bitmap and when it is done with it.
storage/maria/ma_blockrec.c:
When starting a row operation (insert/update/delete), mark that
the bitmap is not flushable (because for example INSERT is going
to over-allocate in the bitmap to prevent other threads from using
our data pages). If a checkpoint comes at this moment it will wait
for the bitmap to be flushable before flushing it.
When the operation ends, bitmap becomes flushable again; that
transition is done under the bitmap's mutex (needed for correct
synchro with a concurrent checkpoint); but for INSERT/UPDATE this
happens inside _ma_bitmap_release_unused() at a place where it already
has the mutex, so the only penalty (mutex adding) is in DELETE and UNDO
of INSERT. In case of errors after setting the bitmap unflushable,
we must always set it back to flushable or checkpoint would block.
Debug possibilities to force a sleep while the bitmap is over-allocated.
In case of error in get_head_or_tail() in allocate_and_write_block_record(),
we still need to unpin all pages.
Bugfix: _ma_apply_redo_insert_row_blobs() produced wrong
data_file_length.
storage/maria/ma_blockrec.h:
new bitmap calls.
storage/maria/ma_checkpoint.c:
filter_flush_indirect not needed anymore (flushing bitmap
pages happens in _ma_bitmap_flush_all() now). So
st_filter_param::is_data_file|pages_covered_by_bitmap not needed.
Other filter_flush* don't need to flush bitmap anymore.
Add debug possibility to flush all bitmap pages outside of a checkpoint,
to simulate pagecache LRU eviction.
When the background flush/checkpoint thread notices it has nothing
to flush, it now sleeps directly until the next potential checkpoint
moment instead of waking up every second.
When in checkpoint we decide to not store a table in the checkpoint record
(because it has logged no writes for example), we can also skip flushing
this table.
storage/maria/ma_commit.c:
comment is out-of-date
storage/maria/ma_key_recover.c:
comment fix
storage/maria/ma_loghandler.c:
comment is out-of-date
storage/maria/ma_open.c:
comment is out-of-date
storage/maria/ma_pagecache.c:
comment for bug to fix. And we don't take checkpoints at end of REDO
phase yet so can trust block->type.
storage/maria/ma_recovery.c:
Comments. Now-unneeded code for incomplete REDO-UNDO groups removed.
When we forget about an old transaction we must really forget
about it with bzero() (fixes the "long_trid!=0 assertion" recovery
bug). When we delete a row with maria_delete() we turn on
STATE_NOT_OPTIMIZED_ROWS so we do the same when we see a CLR_END
for an UNDO_ROW_INSERT or when we execute an UNDO_ROW_INSERT (in both
cases a row was deleted). Pick up max_long_trid from the checkpoint record.
storage/maria/maria_chk.c:
comment
storage/maria/maria_def.h:
MARIA_FILE_BITMAP gets new members: 'flushable', 'bitmap_cond' and
'pinned_pages'.
storage/maria/trnman.c:
I used to think that recovery only needs to know the maximum TrID
of the lists of active and committed transactions. But no, sometimes
both lists can even be empty and their TrID should not be reused.
So Checkpoint now saves global_trid_generator in the checkpoint record.
storage/maria/trnman_public.h:
macros to read/store a TrID
mysql-test/r/maria-recovery-bitmap.result:
result is ok. Without the code fix, we would get a corruption message
about the bitmap page in CHECK TABLE EXTENDED.
mysql-test/t/maria-recovery-bitmap-master.opt:
usual when we crash mysqld in tests
mysql-test/t/maria-recovery-bitmap.test:
test of recovery problems specific of the bitmap pages.
2007-12-14 16:14:12 +01:00
|
|
|
@param share Table's share
|
2007-12-15 14:17:23 +01:00
|
|
|
@param non_flushable_inc Increment of MARIA_FILE_BITMAP::non_flushable
|
|
|
|
(-1 or +1).
|
WL#3072 - Maria recovery.
* fix for bitmap vs checkpoint bug which could lead to corrupted
tables in case of crashes at certain moments: a bitmap could be flushed
to disk even though it was inconsistent with the log (it could be
flushed before REDO-UNDO are written to the log). One bug remains, need
code from others. Tests added. Fix is to pin unflushable bitmap pages,
and let checkpoint wait for them to be flushable.
* fix for long_trid!=0 assertion failure at Recovery.
* less useless wakeups in the background flush|checkpoint thread.
* store global_trid_generator in checkpoint record.
mysql-test/r/maria-recovery.result:
result update
mysql-test/t/maria-recovery.test:
make it easier to locate subtests
storage/maria/ma_bitmap.c:
When we send a bitmap to the pagecache, if this bitmap is not in a
flushable state we keep it pinned and add it to a list, it will be
unpinned when the bitmap is flushable again.
A new function _ma_bitmap_flush_all() used by checkpoint.
A new function _ma_bitmap_flushable() used by block format to signal
when it starts modifying a bitmap and when it is done with it.
storage/maria/ma_blockrec.c:
When starting a row operation (insert/update/delete), mark that
the bitmap is not flushable (because for example INSERT is going
to over-allocate in the bitmap to prevent other threads from using
our data pages). If a checkpoint comes at this moment it will wait
for the bitmap to be flushable before flushing it.
When the operation ends, bitmap becomes flushable again; that
transition is done under the bitmap's mutex (needed for correct
synchro with a concurrent checkpoint); but for INSERT/UPDATE this
happens inside _ma_bitmap_release_unused() at a place where it already
has the mutex, so the only penalty (mutex adding) is in DELETE and UNDO
of INSERT. In case of errors after setting the bitmap unflushable,
we must always set it back to flushable or checkpoint would block.
Debug possibilities to force a sleep while the bitmap is over-allocated.
In case of error in get_head_or_tail() in allocate_and_write_block_record(),
we still need to unpin all pages.
Bugfix: _ma_apply_redo_insert_row_blobs() produced wrong
data_file_length.
storage/maria/ma_blockrec.h:
new bitmap calls.
storage/maria/ma_checkpoint.c:
filter_flush_indirect not needed anymore (flushing bitmap
pages happens in _ma_bitmap_flush_all() now). So
st_filter_param::is_data_file|pages_covered_by_bitmap not needed.
Other filter_flush* don't need to flush bitmap anymore.
Add debug possibility to flush all bitmap pages outside of a checkpoint,
to simulate pagecache LRU eviction.
When the background flush/checkpoint thread notices it has nothing
to flush, it now sleeps directly until the next potential checkpoint
moment instead of waking up every second.
When in checkpoint we decide to not store a table in the checkpoint record
(because it has logged no writes for example), we can also skip flushing
this table.
storage/maria/ma_commit.c:
comment is out-of-date
storage/maria/ma_key_recover.c:
comment fix
storage/maria/ma_loghandler.c:
comment is out-of-date
storage/maria/ma_open.c:
comment is out-of-date
storage/maria/ma_pagecache.c:
comment for bug to fix. And we don't take checkpoints at end of REDO
phase yet so can trust block->type.
storage/maria/ma_recovery.c:
Comments. Now-unneeded code for incomplete REDO-UNDO groups removed.
When we forget about an old transaction we must really forget
about it with bzero() (fixes the "long_trid!=0 assertion" recovery
bug). When we delete a row with maria_delete() we turn on
STATE_NOT_OPTIMIZED_ROWS so we do the same when we see a CLR_END
for an UNDO_ROW_INSERT or when we execute an UNDO_ROW_INSERT (in both
cases a row was deleted). Pick up max_long_trid from the checkpoint record.
storage/maria/maria_chk.c:
comment
storage/maria/maria_def.h:
MARIA_FILE_BITMAP gets new members: 'flushable', 'bitmap_cond' and
'pinned_pages'.
storage/maria/trnman.c:
I used to think that recovery only needs to know the maximum TrID
of the lists of active and committed transactions. But no, sometimes
both lists can even be empty and their TrID should not be reused.
So Checkpoint now saves global_trid_generator in the checkpoint record.
storage/maria/trnman_public.h:
macros to read/store a TrID
mysql-test/r/maria-recovery-bitmap.result:
result is ok. Without the code fix, we would get a corruption message
about the bitmap page in CHECK TABLE EXTENDED.
mysql-test/t/maria-recovery-bitmap-master.opt:
usual when we crash mysqld in tests
mysql-test/t/maria-recovery-bitmap.test:
test of recovery problems specific of the bitmap pages.
2007-12-14 16:14:12 +01:00
|
|
|
*/
|
|
|
|
|
UNDO of rows now puts back all part of the row on their original pages and positions
Added variable _dbug_on_ to speed up execution when DBUG is not going to be used
Added --debug-on option to mysqld (to be able to turn of DBUG with --debug-on=0)
Fixed some bugs with 'non_flushable' marking of bitmap pages
Don't use 'non_flushable' marking of bitmap pages for not transactional tables
SHOW CREATE TABLE now shows if table was created with page checksums
Fixed a lot of bugs with BLOB handling in case of update/REDO and UNDO
More tests (especially for blobs) and DBUG_ASSERTS()
More readable output from maria_read_log and maria_chk
Fixed wrong shift that caused Maria to crash on files > 4G
Mark tables as crashed of REDO fails
dbug/dbug.c:
Changed to use my_bool (allowed me to remove some windows specific code)
Added variable _dbug_on_ to speed up execution when DBUG is not going to be used
Removed initialization of variables if not needed
include/my_dbug.h:
Use my_bool for some functions that was defined as BOOLEAN in dbug.c code
Added DBUGGER_ON/DEBUGGER_OFF to speed up execution when DBUG is not used
include/my_global.h:
Define my_bool early
Increase MY_HOW_OFTEN_TO_WRITE as computers are now faster than 10 years ago
mysql-test/mysql-test-run.pl:
Added debug-on=0 to speed up tests
mysql-test/r/maria-recovery.result:
Added new test by Guilhem to test if UNDO_ROW_DELETE preserves rowid
mysql-test/r/maria.result:
Added testing of page checksums
mysql-test/t/crash_commit_before-master.opt:
Added --debug-on as test require DBUG to work
mysql-test/t/maria-recovery-bitmap-master.opt:
Added --debug-on as test require DBUG to work
mysql-test/t/maria-recovery-master.opt:
Added --debug-on as test require DBUG to work
mysql-test/t/maria-recovery.test:
Added new test by Guilhem to test if UNDO_ROW_DELETE preserves rowid
mysql-test/t/maria.test:
Added testing of page checksums
sql/mysqld.cc:
Added --debug-on option (to be able to turn of DBUG with --debug-on=0)
Indentation fixes
Removed end spaces
sql/sql_show.cc:
Allow update_create_info() to inform MySQL if PACK_KEYS, NO_PACK_KEYS, CHECKSUM, PAGE_CHECKSUM or DELAY_KEY_WRITE is used
storage/maria/Makefile.am:
Added ma_test_big.sh
storage/maria/ha_maria.cc:
Store in create_info if page checksums are used (For SHOW CREATE TABLE)
storage/maria/ma_bitmap.c:
Added _ma_bitmap_wait_or_flush() to cause reader of bitmap pages to wait with reading until bitmap is flushed.
Use TAIL_PAGE_COUNT_MARKER for tail pages
Set 'sub_blocks' for and only for the head page or for the first extent of a blob. This is needed for store_extent_info() to be able to set START_EXTENT_BIT's
Don't allocate more than 0x3ffff pages in one extent (We need bit 0x4000 as a START_EXTENT_BIT)
Increase the calculated 'head_length' with the number of bytes used for extents.
Update row->space_on_head_page also in _ma_bitmap_find_new_place()
Make _ma_bitmap_get_page_bits() global. (Needed for UNDO handling)
Changed _ma_bitmap_flushable() to take MARIA_HA instead of MARIA_SHARE.
This was needed to be able to mark the handler if we had a 'non_flushable' call pending or not.
Don't use 'non_flushable' marking of bitmap pages for not transactional tables.
Added BLOCKUSED_USE_ORG_BITMAP handling also for tail pages.
Added more DBUG_ASSERT() to find possible errors in other code
Some code simplications by adding new local variables
storage/maria/ma_blockrec.c:
UNDO of rows now puts back all part of the row on their original pages and positions.
Changed UNDO of DELETE and UNDO of UPDATE to contain information about the original length of data on head block and also extent information
This changes a lot of logic as now an insert of a row on a page may happen to any position (and not just to the first or next free)
Use PAGE_COUNT to mark if an extent is the start of of a blob. (Needed for extent_to_bitmap_blocks())
Added check_directory() for checking that directroy entries are correct.
Added checking of row checksums when reading rows (with EXTRA_DEBUG)
Added make_space_for_directory() and extend_directory() for doing expansion of directory
Added get_rowpos_in_head_or_tail_page() to be able to store head/tail on original position in UNDO
Added extent_to_bitmap_blocks() to be able to generate original bitmap blocks from UNDO entry
Added _ma_update_at_original_place() for UNDO of DELETES
Added row->min_length to hold minmum required space needed on head page
Changed find_free_position() to use make_space_for_directory()
Changed make_empty_page() to allow optional creation of directory entry
Changed delete_head_or_tail() and _ma_apply_undo_row_isnert() to not copy pagecache block (speed optimization)
Changed _ma_apply_redo_insert_row_head_or_tail() to be able to insert new row at any position on 'new' page
Changed _ma_apply_undo_row_delete() and _ma_apply_undo_row_update() to put row in it's original position
Ensure allocation of tail blocks are of at least MIN_TAIL_SIZE.
Ensure we store pages in pinned pages even if read failed. (If not we will have pages pinned forever in page cache)
Write original extent information in UNDO entry, not compacted ones (we need position to tails!)
When setting BLOCKUSED_USED, don't clear other bits (we have to preserve BLOCKUSED_USE_ORG_BITMAP)
Fixed som bugs in directory handling
Fixed bug where we wrote wrong lsn to blob pages
Added separate blob_buffer for fixing bug when updating row that had char/varchar that spanned several pages and also had blobs
Ensure we call _ma_bitmap_flushable() also in case of errors
When doing an update, first delete old entries, then search in bitmap for where to put new information
Info->s -> share
Rowid -> rowid
More DBUG_ASSERT()
storage/maria/ma_blockrec.h:
Added START_EXTENT_BIT and TAIL_PAGE_COUNT_MARKER
Added _ma_bitmap_wait_or_flush() and _ma_bitmap_get_page_bits()
storage/maria/ma_check.c:
Don't write extra empty line if there is no deleted blocks
Ignore START_EXTENT_BIT's in page count
Call _ma_fast_unlock_key_del() to free key_del link
storage/maria/ma_close.c:
Ensure that used_key_del is 0. (If not, someone forgot to call _ma_unlock_key_del())
storage/maria/ma_create.c:
Changed constant to macro
storage/maria/ma_delete.c:
For deleted keys, log also position to row
storage/maria/ma_extra.c:
Release blob buffer at maria_reset() if bigger than MARIA_SMALL_BLOB_BUFFER
storage/maria/ma_key_recover.c:
Added bzero() of LSN that confused paged cache in case of uninitialized block
Mark file crashed if applying of index changes fails
Added calls to _ma_fast_unlock_key_del() for protection of shared key_del link.
storage/maria/ma_locking.c:
Added usage of MARIA_FILE_OPEN_COUNT_OFFSET
Added _ma_mark_file_crashed()
storage/maria/ma_loghandler.c:
Fixed bug where we logged uninitialized memory
storage/maria/ma_open.c:
Moved state->changed to be at start of state info on disk to allow one to easly mark files as crashed
storage/maria/ma_page.c:
Disable 'dummy' checksumming of pages as this gave false warnings.
(Need to investigate if this is ever needed)
storage/maria/ma_pagecache.c:
Fixed wrong shift that caused Maria to crash on files > 4G
storage/maria/ma_recovery.c:
In case of errors, start writing on new line if we where in %## %## printing mode (Made errors more readable)
Changed global variable name from warnings -> recovery_warnings
Use MARIA_FILE_CREATE_RENAME_LSN_OFFSET instead of constant
Removed special handling of row position for deleted keys. Keys now always includes row positions
_ma_apply_undo_row_delete() now gets page and row position
Added check that we don't loop forever when handling undo's (in case of bug in undo chain)
Print name of failed REDO/UNDO
storage/maria/ma_recovery.h:
Removed old comment
storage/maria/ma_static.c:
Chaned version number of Maria files to not accidently use old ones (becasue of change of ordering of status variables)
storage/maria/ma_test2.c:
Added option -u to specify number of rows to update
Changed old option -u to be -A, as for ma_test1
Fixed bug in update of rows with blobs (before blobs was always reset to empty on update)
First created blob is now of max blob length to ensure we have at least one big blob in the table
storage/maria/ma_test_all.sh:
More tests
storage/maria/ma_test_recovery.expected:
Updated results
storage/maria/ma_test_recovery:
Changed tests to use bigger blobs (not just 1K)
Added new tests that tests recovery of update with blobs
Removed comparision of .MAD file as it's not guranteed that recovery from scratch gives identical data file as original update
(compact_page() may be called at different times during normal execution and during REDO)
storage/maria/ma_update.c:
Simplify code (changed * to if)
storage/maria/maria_chk.c:
Make output more readable
storage/maria/maria_def.h:
Changed 'changed' to int to prepare for more bits
Added 2 more bytes to status information
Added 'st_mara_row->min_length' for storing min length needed on head page
Added 'st_mara_handler->blob_buff & blob_buff_size' for storing blobs
Moved all tunning parameters into one block
Added MARIA_SMALL_BLOB_BUFFER
Added _ma_mark_file_crashed()
storage/myisam/mi_test2.c:
Fixed bug in update of rows with blobs (before blobs was always reset to empty on update)
storage/maria/ma_test_big.sh:
Testing of insert, update, delete, recovery and undo of rows with blobs
Thanks to the random-ness of ma_test2 this is likely to find most bugs in the row handling
2007-12-30 21:40:03 +01:00
|
|
|
void _ma_bitmap_flushable(MARIA_HA *info, int non_flushable_inc)
|
WL#3072 - Maria recovery.
* fix for bitmap vs checkpoint bug which could lead to corrupted
tables in case of crashes at certain moments: a bitmap could be flushed
to disk even though it was inconsistent with the log (it could be
flushed before REDO-UNDO are written to the log). One bug remains, need
code from others. Tests added. Fix is to pin unflushable bitmap pages,
and let checkpoint wait for them to be flushable.
* fix for long_trid!=0 assertion failure at Recovery.
* less useless wakeups in the background flush|checkpoint thread.
* store global_trid_generator in checkpoint record.
mysql-test/r/maria-recovery.result:
result update
mysql-test/t/maria-recovery.test:
make it easier to locate subtests
storage/maria/ma_bitmap.c:
When we send a bitmap to the pagecache, if this bitmap is not in a
flushable state we keep it pinned and add it to a list, it will be
unpinned when the bitmap is flushable again.
A new function _ma_bitmap_flush_all() used by checkpoint.
A new function _ma_bitmap_flushable() used by block format to signal
when it starts modifying a bitmap and when it is done with it.
storage/maria/ma_blockrec.c:
When starting a row operation (insert/update/delete), mark that
the bitmap is not flushable (because for example INSERT is going
to over-allocate in the bitmap to prevent other threads from using
our data pages). If a checkpoint comes at this moment it will wait
for the bitmap to be flushable before flushing it.
When the operation ends, bitmap becomes flushable again; that
transition is done under the bitmap's mutex (needed for correct
synchro with a concurrent checkpoint); but for INSERT/UPDATE this
happens inside _ma_bitmap_release_unused() at a place where it already
has the mutex, so the only penalty (mutex adding) is in DELETE and UNDO
of INSERT. In case of errors after setting the bitmap unflushable,
we must always set it back to flushable or checkpoint would block.
Debug possibilities to force a sleep while the bitmap is over-allocated.
In case of error in get_head_or_tail() in allocate_and_write_block_record(),
we still need to unpin all pages.
Bugfix: _ma_apply_redo_insert_row_blobs() produced wrong
data_file_length.
storage/maria/ma_blockrec.h:
new bitmap calls.
storage/maria/ma_checkpoint.c:
filter_flush_indirect not needed anymore (flushing bitmap
pages happens in _ma_bitmap_flush_all() now). So
st_filter_param::is_data_file|pages_covered_by_bitmap not needed.
Other filter_flush* don't need to flush bitmap anymore.
Add debug possibility to flush all bitmap pages outside of a checkpoint,
to simulate pagecache LRU eviction.
When the background flush/checkpoint thread notices it has nothing
to flush, it now sleeps directly until the next potential checkpoint
moment instead of waking up every second.
When in checkpoint we decide to not store a table in the checkpoint record
(because it has logged no writes for example), we can also skip flushing
this table.
storage/maria/ma_commit.c:
comment is out-of-date
storage/maria/ma_key_recover.c:
comment fix
storage/maria/ma_loghandler.c:
comment is out-of-date
storage/maria/ma_open.c:
comment is out-of-date
storage/maria/ma_pagecache.c:
comment for bug to fix. And we don't take checkpoints at end of REDO
phase yet so can trust block->type.
storage/maria/ma_recovery.c:
Comments. Now-unneeded code for incomplete REDO-UNDO groups removed.
When we forget about an old transaction we must really forget
about it with bzero() (fixes the "long_trid!=0 assertion" recovery
bug). When we delete a row with maria_delete() we turn on
STATE_NOT_OPTIMIZED_ROWS so we do the same when we see a CLR_END
for an UNDO_ROW_INSERT or when we execute an UNDO_ROW_INSERT (in both
cases a row was deleted). Pick up max_long_trid from the checkpoint record.
storage/maria/maria_chk.c:
comment
storage/maria/maria_def.h:
MARIA_FILE_BITMAP gets new members: 'flushable', 'bitmap_cond' and
'pinned_pages'.
storage/maria/trnman.c:
I used to think that recovery only needs to know the maximum TrID
of the lists of active and committed transactions. But no, sometimes
both lists can even be empty and their TrID should not be reused.
So Checkpoint now saves global_trid_generator in the checkpoint record.
storage/maria/trnman_public.h:
macros to read/store a TrID
mysql-test/r/maria-recovery-bitmap.result:
result is ok. Without the code fix, we would get a corruption message
about the bitmap page in CHECK TABLE EXTENDED.
mysql-test/t/maria-recovery-bitmap-master.opt:
usual when we crash mysqld in tests
mysql-test/t/maria-recovery-bitmap.test:
test of recovery problems specific of the bitmap pages.
2007-12-14 16:14:12 +01:00
|
|
|
{
|
UNDO of rows now puts back all part of the row on their original pages and positions
Added variable _dbug_on_ to speed up execution when DBUG is not going to be used
Added --debug-on option to mysqld (to be able to turn of DBUG with --debug-on=0)
Fixed some bugs with 'non_flushable' marking of bitmap pages
Don't use 'non_flushable' marking of bitmap pages for not transactional tables
SHOW CREATE TABLE now shows if table was created with page checksums
Fixed a lot of bugs with BLOB handling in case of update/REDO and UNDO
More tests (especially for blobs) and DBUG_ASSERTS()
More readable output from maria_read_log and maria_chk
Fixed wrong shift that caused Maria to crash on files > 4G
Mark tables as crashed of REDO fails
dbug/dbug.c:
Changed to use my_bool (allowed me to remove some windows specific code)
Added variable _dbug_on_ to speed up execution when DBUG is not going to be used
Removed initialization of variables if not needed
include/my_dbug.h:
Use my_bool for some functions that was defined as BOOLEAN in dbug.c code
Added DBUGGER_ON/DEBUGGER_OFF to speed up execution when DBUG is not used
include/my_global.h:
Define my_bool early
Increase MY_HOW_OFTEN_TO_WRITE as computers are now faster than 10 years ago
mysql-test/mysql-test-run.pl:
Added debug-on=0 to speed up tests
mysql-test/r/maria-recovery.result:
Added new test by Guilhem to test if UNDO_ROW_DELETE preserves rowid
mysql-test/r/maria.result:
Added testing of page checksums
mysql-test/t/crash_commit_before-master.opt:
Added --debug-on as test require DBUG to work
mysql-test/t/maria-recovery-bitmap-master.opt:
Added --debug-on as test require DBUG to work
mysql-test/t/maria-recovery-master.opt:
Added --debug-on as test require DBUG to work
mysql-test/t/maria-recovery.test:
Added new test by Guilhem to test if UNDO_ROW_DELETE preserves rowid
mysql-test/t/maria.test:
Added testing of page checksums
sql/mysqld.cc:
Added --debug-on option (to be able to turn of DBUG with --debug-on=0)
Indentation fixes
Removed end spaces
sql/sql_show.cc:
Allow update_create_info() to inform MySQL if PACK_KEYS, NO_PACK_KEYS, CHECKSUM, PAGE_CHECKSUM or DELAY_KEY_WRITE is used
storage/maria/Makefile.am:
Added ma_test_big.sh
storage/maria/ha_maria.cc:
Store in create_info if page checksums are used (For SHOW CREATE TABLE)
storage/maria/ma_bitmap.c:
Added _ma_bitmap_wait_or_flush() to cause reader of bitmap pages to wait with reading until bitmap is flushed.
Use TAIL_PAGE_COUNT_MARKER for tail pages
Set 'sub_blocks' for and only for the head page or for the first extent of a blob. This is needed for store_extent_info() to be able to set START_EXTENT_BIT's
Don't allocate more than 0x3ffff pages in one extent (We need bit 0x4000 as a START_EXTENT_BIT)
Increase the calculated 'head_length' with the number of bytes used for extents.
Update row->space_on_head_page also in _ma_bitmap_find_new_place()
Make _ma_bitmap_get_page_bits() global. (Needed for UNDO handling)
Changed _ma_bitmap_flushable() to take MARIA_HA instead of MARIA_SHARE.
This was needed to be able to mark the handler if we had a 'non_flushable' call pending or not.
Don't use 'non_flushable' marking of bitmap pages for not transactional tables.
Added BLOCKUSED_USE_ORG_BITMAP handling also for tail pages.
Added more DBUG_ASSERT() to find possible errors in other code
Some code simplications by adding new local variables
storage/maria/ma_blockrec.c:
UNDO of rows now puts back all part of the row on their original pages and positions.
Changed UNDO of DELETE and UNDO of UPDATE to contain information about the original length of data on head block and also extent information
This changes a lot of logic as now an insert of a row on a page may happen to any position (and not just to the first or next free)
Use PAGE_COUNT to mark if an extent is the start of of a blob. (Needed for extent_to_bitmap_blocks())
Added check_directory() for checking that directroy entries are correct.
Added checking of row checksums when reading rows (with EXTRA_DEBUG)
Added make_space_for_directory() and extend_directory() for doing expansion of directory
Added get_rowpos_in_head_or_tail_page() to be able to store head/tail on original position in UNDO
Added extent_to_bitmap_blocks() to be able to generate original bitmap blocks from UNDO entry
Added _ma_update_at_original_place() for UNDO of DELETES
Added row->min_length to hold minmum required space needed on head page
Changed find_free_position() to use make_space_for_directory()
Changed make_empty_page() to allow optional creation of directory entry
Changed delete_head_or_tail() and _ma_apply_undo_row_isnert() to not copy pagecache block (speed optimization)
Changed _ma_apply_redo_insert_row_head_or_tail() to be able to insert new row at any position on 'new' page
Changed _ma_apply_undo_row_delete() and _ma_apply_undo_row_update() to put row in it's original position
Ensure allocation of tail blocks are of at least MIN_TAIL_SIZE.
Ensure we store pages in pinned pages even if read failed. (If not we will have pages pinned forever in page cache)
Write original extent information in UNDO entry, not compacted ones (we need position to tails!)
When setting BLOCKUSED_USED, don't clear other bits (we have to preserve BLOCKUSED_USE_ORG_BITMAP)
Fixed som bugs in directory handling
Fixed bug where we wrote wrong lsn to blob pages
Added separate blob_buffer for fixing bug when updating row that had char/varchar that spanned several pages and also had blobs
Ensure we call _ma_bitmap_flushable() also in case of errors
When doing an update, first delete old entries, then search in bitmap for where to put new information
Info->s -> share
Rowid -> rowid
More DBUG_ASSERT()
storage/maria/ma_blockrec.h:
Added START_EXTENT_BIT and TAIL_PAGE_COUNT_MARKER
Added _ma_bitmap_wait_or_flush() and _ma_bitmap_get_page_bits()
storage/maria/ma_check.c:
Don't write extra empty line if there is no deleted blocks
Ignore START_EXTENT_BIT's in page count
Call _ma_fast_unlock_key_del() to free key_del link
storage/maria/ma_close.c:
Ensure that used_key_del is 0. (If not, someone forgot to call _ma_unlock_key_del())
storage/maria/ma_create.c:
Changed constant to macro
storage/maria/ma_delete.c:
For deleted keys, log also position to row
storage/maria/ma_extra.c:
Release blob buffer at maria_reset() if bigger than MARIA_SMALL_BLOB_BUFFER
storage/maria/ma_key_recover.c:
Added bzero() of LSN that confused paged cache in case of uninitialized block
Mark file crashed if applying of index changes fails
Added calls to _ma_fast_unlock_key_del() for protection of shared key_del link.
storage/maria/ma_locking.c:
Added usage of MARIA_FILE_OPEN_COUNT_OFFSET
Added _ma_mark_file_crashed()
storage/maria/ma_loghandler.c:
Fixed bug where we logged uninitialized memory
storage/maria/ma_open.c:
Moved state->changed to be at start of state info on disk to allow one to easly mark files as crashed
storage/maria/ma_page.c:
Disable 'dummy' checksumming of pages as this gave false warnings.
(Need to investigate if this is ever needed)
storage/maria/ma_pagecache.c:
Fixed wrong shift that caused Maria to crash on files > 4G
storage/maria/ma_recovery.c:
In case of errors, start writing on new line if we where in %## %## printing mode (Made errors more readable)
Changed global variable name from warnings -> recovery_warnings
Use MARIA_FILE_CREATE_RENAME_LSN_OFFSET instead of constant
Removed special handling of row position for deleted keys. Keys now always includes row positions
_ma_apply_undo_row_delete() now gets page and row position
Added check that we don't loop forever when handling undo's (in case of bug in undo chain)
Print name of failed REDO/UNDO
storage/maria/ma_recovery.h:
Removed old comment
storage/maria/ma_static.c:
Chaned version number of Maria files to not accidently use old ones (becasue of change of ordering of status variables)
storage/maria/ma_test2.c:
Added option -u to specify number of rows to update
Changed old option -u to be -A, as for ma_test1
Fixed bug in update of rows with blobs (before blobs was always reset to empty on update)
First created blob is now of max blob length to ensure we have at least one big blob in the table
storage/maria/ma_test_all.sh:
More tests
storage/maria/ma_test_recovery.expected:
Updated results
storage/maria/ma_test_recovery:
Changed tests to use bigger blobs (not just 1K)
Added new tests that tests recovery of update with blobs
Removed comparision of .MAD file as it's not guranteed that recovery from scratch gives identical data file as original update
(compact_page() may be called at different times during normal execution and during REDO)
storage/maria/ma_update.c:
Simplify code (changed * to if)
storage/maria/maria_chk.c:
Make output more readable
storage/maria/maria_def.h:
Changed 'changed' to int to prepare for more bits
Added 2 more bytes to status information
Added 'st_mara_row->min_length' for storing min length needed on head page
Added 'st_mara_handler->blob_buff & blob_buff_size' for storing blobs
Moved all tunning parameters into one block
Added MARIA_SMALL_BLOB_BUFFER
Added _ma_mark_file_crashed()
storage/myisam/mi_test2.c:
Fixed bug in update of rows with blobs (before blobs was always reset to empty on update)
storage/maria/ma_test_big.sh:
Testing of insert, update, delete, recovery and undo of rows with blobs
Thanks to the random-ness of ma_test2 this is likely to find most bugs in the row handling
2007-12-30 21:40:03 +01:00
|
|
|
MARIA_SHARE *share= info->s;
|
2007-12-18 02:21:32 +01:00
|
|
|
MARIA_FILE_BITMAP *bitmap;
|
2008-10-14 11:38:07 +02:00
|
|
|
DBUG_ENTER("_ma_bitmap_flushable");
|
2007-12-18 02:21:32 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
Not transactional tables are never automaticly flushed and needs no
|
|
|
|
protection
|
|
|
|
*/
|
|
|
|
if (!share->now_transactional)
|
2008-10-14 11:38:07 +02:00
|
|
|
DBUG_VOID_RETURN;
|
2007-12-18 02:21:32 +01:00
|
|
|
|
|
|
|
bitmap= &share->bitmap;
|
2011-07-13 21:10:18 +02:00
|
|
|
mysql_mutex_lock(&bitmap->bitmap_lock);
|
2010-08-12 18:46:36 +02:00
|
|
|
|
2007-12-15 14:17:23 +01:00
|
|
|
if (non_flushable_inc == -1)
|
WL#3072 - Maria recovery.
* fix for bitmap vs checkpoint bug which could lead to corrupted
tables in case of crashes at certain moments: a bitmap could be flushed
to disk even though it was inconsistent with the log (it could be
flushed before REDO-UNDO are written to the log). One bug remains, need
code from others. Tests added. Fix is to pin unflushable bitmap pages,
and let checkpoint wait for them to be flushable.
* fix for long_trid!=0 assertion failure at Recovery.
* less useless wakeups in the background flush|checkpoint thread.
* store global_trid_generator in checkpoint record.
mysql-test/r/maria-recovery.result:
result update
mysql-test/t/maria-recovery.test:
make it easier to locate subtests
storage/maria/ma_bitmap.c:
When we send a bitmap to the pagecache, if this bitmap is not in a
flushable state we keep it pinned and add it to a list, it will be
unpinned when the bitmap is flushable again.
A new function _ma_bitmap_flush_all() used by checkpoint.
A new function _ma_bitmap_flushable() used by block format to signal
when it starts modifying a bitmap and when it is done with it.
storage/maria/ma_blockrec.c:
When starting a row operation (insert/update/delete), mark that
the bitmap is not flushable (because for example INSERT is going
to over-allocate in the bitmap to prevent other threads from using
our data pages). If a checkpoint comes at this moment it will wait
for the bitmap to be flushable before flushing it.
When the operation ends, bitmap becomes flushable again; that
transition is done under the bitmap's mutex (needed for correct
synchro with a concurrent checkpoint); but for INSERT/UPDATE this
happens inside _ma_bitmap_release_unused() at a place where it already
has the mutex, so the only penalty (mutex adding) is in DELETE and UNDO
of INSERT. In case of errors after setting the bitmap unflushable,
we must always set it back to flushable or checkpoint would block.
Debug possibilities to force a sleep while the bitmap is over-allocated.
In case of error in get_head_or_tail() in allocate_and_write_block_record(),
we still need to unpin all pages.
Bugfix: _ma_apply_redo_insert_row_blobs() produced wrong
data_file_length.
storage/maria/ma_blockrec.h:
new bitmap calls.
storage/maria/ma_checkpoint.c:
filter_flush_indirect not needed anymore (flushing bitmap
pages happens in _ma_bitmap_flush_all() now). So
st_filter_param::is_data_file|pages_covered_by_bitmap not needed.
Other filter_flush* don't need to flush bitmap anymore.
Add debug possibility to flush all bitmap pages outside of a checkpoint,
to simulate pagecache LRU eviction.
When the background flush/checkpoint thread notices it has nothing
to flush, it now sleeps directly until the next potential checkpoint
moment instead of waking up every second.
When in checkpoint we decide to not store a table in the checkpoint record
(because it has logged no writes for example), we can also skip flushing
this table.
storage/maria/ma_commit.c:
comment is out-of-date
storage/maria/ma_key_recover.c:
comment fix
storage/maria/ma_loghandler.c:
comment is out-of-date
storage/maria/ma_open.c:
comment is out-of-date
storage/maria/ma_pagecache.c:
comment for bug to fix. And we don't take checkpoints at end of REDO
phase yet so can trust block->type.
storage/maria/ma_recovery.c:
Comments. Now-unneeded code for incomplete REDO-UNDO groups removed.
When we forget about an old transaction we must really forget
about it with bzero() (fixes the "long_trid!=0 assertion" recovery
bug). When we delete a row with maria_delete() we turn on
STATE_NOT_OPTIMIZED_ROWS so we do the same when we see a CLR_END
for an UNDO_ROW_INSERT or when we execute an UNDO_ROW_INSERT (in both
cases a row was deleted). Pick up max_long_trid from the checkpoint record.
storage/maria/maria_chk.c:
comment
storage/maria/maria_def.h:
MARIA_FILE_BITMAP gets new members: 'flushable', 'bitmap_cond' and
'pinned_pages'.
storage/maria/trnman.c:
I used to think that recovery only needs to know the maximum TrID
of the lists of active and committed transactions. But no, sometimes
both lists can even be empty and their TrID should not be reused.
So Checkpoint now saves global_trid_generator in the checkpoint record.
storage/maria/trnman_public.h:
macros to read/store a TrID
mysql-test/r/maria-recovery-bitmap.result:
result is ok. Without the code fix, we would get a corruption message
about the bitmap page in CHECK TABLE EXTENDED.
mysql-test/t/maria-recovery-bitmap-master.opt:
usual when we crash mysqld in tests
mysql-test/t/maria-recovery-bitmap.test:
test of recovery problems specific of the bitmap pages.
2007-12-14 16:14:12 +01:00
|
|
|
{
|
2008-10-14 11:38:07 +02:00
|
|
|
DBUG_ASSERT((int) bitmap->non_flushable > 0);
|
|
|
|
DBUG_ASSERT(info->non_flushable_state == 1);
|
2007-12-15 14:17:23 +01:00
|
|
|
if (--bitmap->non_flushable == 0)
|
|
|
|
{
|
2008-01-01 22:30:49 +01:00
|
|
|
/*
|
|
|
|
We unlock and unpin pages locked and pinned by other threads. It does
|
|
|
|
not seem to be an issue as all bitmap changes are serialized with
|
|
|
|
the bitmap's mutex.
|
|
|
|
*/
|
2007-12-15 14:17:23 +01:00
|
|
|
_ma_bitmap_unpin_all(share);
|
2011-01-30 13:36:24 +01:00
|
|
|
if (unlikely(bitmap->waiting_for_non_flushable))
|
2007-12-15 14:17:23 +01:00
|
|
|
{
|
|
|
|
DBUG_PRINT("info", ("bitmap flushable waking up flusher"));
|
2011-07-13 21:10:18 +02:00
|
|
|
mysql_cond_broadcast(&bitmap->bitmap_cond);
|
2007-12-15 14:17:23 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
DBUG_PRINT("info", ("bitmap->non_flushable: %u", bitmap->non_flushable));
|
2011-07-13 21:10:18 +02:00
|
|
|
mysql_mutex_unlock(&bitmap->bitmap_lock);
|
2010-08-12 18:46:36 +02:00
|
|
|
info->non_flushable_state= 0;
|
2008-10-14 11:38:07 +02:00
|
|
|
DBUG_VOID_RETURN;
|
2007-12-15 14:17:23 +01:00
|
|
|
}
|
2008-10-14 11:38:07 +02:00
|
|
|
DBUG_ASSERT(non_flushable_inc == 1);
|
|
|
|
DBUG_ASSERT(info->non_flushable_state == 0);
|
2011-01-30 13:36:24 +01:00
|
|
|
|
|
|
|
bitmap->waiting_for_flush_all_requested++;
|
Fix for BUG#39363 "Concurent inserts in the same table lead to hang in maria engine"
(need a mutex when modifying bitmap->non_flushable), which I hit when running maria_bulk_insert.yy.
After fixing this, I hit an assertion in check_and_set_lsn() saying that the page was PAGECACHE_PLAIN_PAGE.
This could be caused by pages left by an operation which had transactions disabled (like a bulk insert with repair):
in this patch we remove those pages out of the cache when we re-enable transactions.
After fixing this, I get page cache deadlocks, pushbuild2 also has some, to be looked at.
No testcase, requires concurrency and running for 15 minutes, but automatically tested by pushbuild2.
storage/maria/ma_bitmap.c:
Doing bitmap->non_flushable++ without mutex was wrong. If this ++ happened while another ++ or -- was happening
in another thread, one ++ or -- could be missed and the bitmap code would behave wrongly. For example, if a ++
was missed, the DBUG_ASSERT(((int) (bitmap->non_flushable)) >= 0) in _ma_bitmap_release_unused() could fire.
I saw this assertion happen in practice in maria_bulk_insert.yy. Adding this mutex lock eliminated
the assertion problem.
The >=0 was wrong, should be >0 (or the variable could go negative).
storage/maria/ma_recovery.c:
When we re-enable transactionality, as we may have created pages of type PAGECACHE_PLAIN_PAGE before,
we need to remove them from the cache (FLUSH_RELEASE). Or they would stay this way, and later when we
maria_write() to them, we would try to tag them with a LSN (ma_unpin_all_pages()), which is incorrect
for a plain page (and causes assertion in the page cache at start of check_and_set_lsn()).
I saw the assertion fire with maria_bulk_insert.yy, and this seems to cure it.
page cache
2008-10-17 15:37:07 +02:00
|
|
|
while (unlikely(bitmap->flush_all_requested))
|
2007-12-15 14:17:23 +01:00
|
|
|
{
|
WL#3072 - Maria recovery.
* fix for bitmap vs checkpoint bug which could lead to corrupted
tables in case of crashes at certain moments: a bitmap could be flushed
to disk even though it was inconsistent with the log (it could be
flushed before REDO-UNDO are written to the log). One bug remains, need
code from others. Tests added. Fix is to pin unflushable bitmap pages,
and let checkpoint wait for them to be flushable.
* fix for long_trid!=0 assertion failure at Recovery.
* less useless wakeups in the background flush|checkpoint thread.
* store global_trid_generator in checkpoint record.
mysql-test/r/maria-recovery.result:
result update
mysql-test/t/maria-recovery.test:
make it easier to locate subtests
storage/maria/ma_bitmap.c:
When we send a bitmap to the pagecache, if this bitmap is not in a
flushable state we keep it pinned and add it to a list, it will be
unpinned when the bitmap is flushable again.
A new function _ma_bitmap_flush_all() used by checkpoint.
A new function _ma_bitmap_flushable() used by block format to signal
when it starts modifying a bitmap and when it is done with it.
storage/maria/ma_blockrec.c:
When starting a row operation (insert/update/delete), mark that
the bitmap is not flushable (because for example INSERT is going
to over-allocate in the bitmap to prevent other threads from using
our data pages). If a checkpoint comes at this moment it will wait
for the bitmap to be flushable before flushing it.
When the operation ends, bitmap becomes flushable again; that
transition is done under the bitmap's mutex (needed for correct
synchro with a concurrent checkpoint); but for INSERT/UPDATE this
happens inside _ma_bitmap_release_unused() at a place where it already
has the mutex, so the only penalty (mutex adding) is in DELETE and UNDO
of INSERT. In case of errors after setting the bitmap unflushable,
we must always set it back to flushable or checkpoint would block.
Debug possibilities to force a sleep while the bitmap is over-allocated.
In case of error in get_head_or_tail() in allocate_and_write_block_record(),
we still need to unpin all pages.
Bugfix: _ma_apply_redo_insert_row_blobs() produced wrong
data_file_length.
storage/maria/ma_blockrec.h:
new bitmap calls.
storage/maria/ma_checkpoint.c:
filter_flush_indirect not needed anymore (flushing bitmap
pages happens in _ma_bitmap_flush_all() now). So
st_filter_param::is_data_file|pages_covered_by_bitmap not needed.
Other filter_flush* don't need to flush bitmap anymore.
Add debug possibility to flush all bitmap pages outside of a checkpoint,
to simulate pagecache LRU eviction.
When the background flush/checkpoint thread notices it has nothing
to flush, it now sleeps directly until the next potential checkpoint
moment instead of waking up every second.
When in checkpoint we decide to not store a table in the checkpoint record
(because it has logged no writes for example), we can also skip flushing
this table.
storage/maria/ma_commit.c:
comment is out-of-date
storage/maria/ma_key_recover.c:
comment fix
storage/maria/ma_loghandler.c:
comment is out-of-date
storage/maria/ma_open.c:
comment is out-of-date
storage/maria/ma_pagecache.c:
comment for bug to fix. And we don't take checkpoints at end of REDO
phase yet so can trust block->type.
storage/maria/ma_recovery.c:
Comments. Now-unneeded code for incomplete REDO-UNDO groups removed.
When we forget about an old transaction we must really forget
about it with bzero() (fixes the "long_trid!=0 assertion" recovery
bug). When we delete a row with maria_delete() we turn on
STATE_NOT_OPTIMIZED_ROWS so we do the same when we see a CLR_END
for an UNDO_ROW_INSERT or when we execute an UNDO_ROW_INSERT (in both
cases a row was deleted). Pick up max_long_trid from the checkpoint record.
storage/maria/maria_chk.c:
comment
storage/maria/maria_def.h:
MARIA_FILE_BITMAP gets new members: 'flushable', 'bitmap_cond' and
'pinned_pages'.
storage/maria/trnman.c:
I used to think that recovery only needs to know the maximum TrID
of the lists of active and committed transactions. But no, sometimes
both lists can even be empty and their TrID should not be reused.
So Checkpoint now saves global_trid_generator in the checkpoint record.
storage/maria/trnman_public.h:
macros to read/store a TrID
mysql-test/r/maria-recovery-bitmap.result:
result is ok. Without the code fix, we would get a corruption message
about the bitmap page in CHECK TABLE EXTENDED.
mysql-test/t/maria-recovery-bitmap-master.opt:
usual when we crash mysqld in tests
mysql-test/t/maria-recovery-bitmap.test:
test of recovery problems specific of the bitmap pages.
2007-12-14 16:14:12 +01:00
|
|
|
/*
|
2008-10-14 11:38:07 +02:00
|
|
|
Some other thread is waiting for the bitmap to become
|
2007-12-15 14:17:23 +01:00
|
|
|
flushable. Not the moment to make the bitmap unflushable or more
|
|
|
|
unflushable; let's rather back off and wait. If we didn't do this, with
|
|
|
|
multiple writers, there may always be one thread causing the bitmap to
|
|
|
|
be unflushable and _ma_bitmap_flush_all() would wait for long.
|
|
|
|
There should not be a deadlock because if our thread increased
|
|
|
|
non_flushable (and thus _ma_bitmap_flush_all() is waiting for at least
|
|
|
|
our thread), it is not going to increase it more so is not going to come
|
|
|
|
here.
|
WL#3072 - Maria recovery.
* fix for bitmap vs checkpoint bug which could lead to corrupted
tables in case of crashes at certain moments: a bitmap could be flushed
to disk even though it was inconsistent with the log (it could be
flushed before REDO-UNDO are written to the log). One bug remains, need
code from others. Tests added. Fix is to pin unflushable bitmap pages,
and let checkpoint wait for them to be flushable.
* fix for long_trid!=0 assertion failure at Recovery.
* less useless wakeups in the background flush|checkpoint thread.
* store global_trid_generator in checkpoint record.
mysql-test/r/maria-recovery.result:
result update
mysql-test/t/maria-recovery.test:
make it easier to locate subtests
storage/maria/ma_bitmap.c:
When we send a bitmap to the pagecache, if this bitmap is not in a
flushable state we keep it pinned and add it to a list, it will be
unpinned when the bitmap is flushable again.
A new function _ma_bitmap_flush_all() used by checkpoint.
A new function _ma_bitmap_flushable() used by block format to signal
when it starts modifying a bitmap and when it is done with it.
storage/maria/ma_blockrec.c:
When starting a row operation (insert/update/delete), mark that
the bitmap is not flushable (because for example INSERT is going
to over-allocate in the bitmap to prevent other threads from using
our data pages). If a checkpoint comes at this moment it will wait
for the bitmap to be flushable before flushing it.
When the operation ends, bitmap becomes flushable again; that
transition is done under the bitmap's mutex (needed for correct
synchro with a concurrent checkpoint); but for INSERT/UPDATE this
happens inside _ma_bitmap_release_unused() at a place where it already
has the mutex, so the only penalty (mutex adding) is in DELETE and UNDO
of INSERT. In case of errors after setting the bitmap unflushable,
we must always set it back to flushable or checkpoint would block.
Debug possibilities to force a sleep while the bitmap is over-allocated.
In case of error in get_head_or_tail() in allocate_and_write_block_record(),
we still need to unpin all pages.
Bugfix: _ma_apply_redo_insert_row_blobs() produced wrong
data_file_length.
storage/maria/ma_blockrec.h:
new bitmap calls.
storage/maria/ma_checkpoint.c:
filter_flush_indirect not needed anymore (flushing bitmap
pages happens in _ma_bitmap_flush_all() now). So
st_filter_param::is_data_file|pages_covered_by_bitmap not needed.
Other filter_flush* don't need to flush bitmap anymore.
Add debug possibility to flush all bitmap pages outside of a checkpoint,
to simulate pagecache LRU eviction.
When the background flush/checkpoint thread notices it has nothing
to flush, it now sleeps directly until the next potential checkpoint
moment instead of waking up every second.
When in checkpoint we decide to not store a table in the checkpoint record
(because it has logged no writes for example), we can also skip flushing
this table.
storage/maria/ma_commit.c:
comment is out-of-date
storage/maria/ma_key_recover.c:
comment fix
storage/maria/ma_loghandler.c:
comment is out-of-date
storage/maria/ma_open.c:
comment is out-of-date
storage/maria/ma_pagecache.c:
comment for bug to fix. And we don't take checkpoints at end of REDO
phase yet so can trust block->type.
storage/maria/ma_recovery.c:
Comments. Now-unneeded code for incomplete REDO-UNDO groups removed.
When we forget about an old transaction we must really forget
about it with bzero() (fixes the "long_trid!=0 assertion" recovery
bug). When we delete a row with maria_delete() we turn on
STATE_NOT_OPTIMIZED_ROWS so we do the same when we see a CLR_END
for an UNDO_ROW_INSERT or when we execute an UNDO_ROW_INSERT (in both
cases a row was deleted). Pick up max_long_trid from the checkpoint record.
storage/maria/maria_chk.c:
comment
storage/maria/maria_def.h:
MARIA_FILE_BITMAP gets new members: 'flushable', 'bitmap_cond' and
'pinned_pages'.
storage/maria/trnman.c:
I used to think that recovery only needs to know the maximum TrID
of the lists of active and committed transactions. But no, sometimes
both lists can even be empty and their TrID should not be reused.
So Checkpoint now saves global_trid_generator in the checkpoint record.
storage/maria/trnman_public.h:
macros to read/store a TrID
mysql-test/r/maria-recovery-bitmap.result:
result is ok. Without the code fix, we would get a corruption message
about the bitmap page in CHECK TABLE EXTENDED.
mysql-test/t/maria-recovery-bitmap-master.opt:
usual when we crash mysqld in tests
mysql-test/t/maria-recovery-bitmap.test:
test of recovery problems specific of the bitmap pages.
2007-12-14 16:14:12 +01:00
|
|
|
*/
|
Fix for BUG#39363 "Concurent inserts in the same table lead to hang in maria engine"
(need a mutex when modifying bitmap->non_flushable), which I hit when running maria_bulk_insert.yy.
After fixing this, I hit an assertion in check_and_set_lsn() saying that the page was PAGECACHE_PLAIN_PAGE.
This could be caused by pages left by an operation which had transactions disabled (like a bulk insert with repair):
in this patch we remove those pages out of the cache when we re-enable transactions.
After fixing this, I get page cache deadlocks, pushbuild2 also has some, to be looked at.
No testcase, requires concurrency and running for 15 minutes, but automatically tested by pushbuild2.
storage/maria/ma_bitmap.c:
Doing bitmap->non_flushable++ without mutex was wrong. If this ++ happened while another ++ or -- was happening
in another thread, one ++ or -- could be missed and the bitmap code would behave wrongly. For example, if a ++
was missed, the DBUG_ASSERT(((int) (bitmap->non_flushable)) >= 0) in _ma_bitmap_release_unused() could fire.
I saw this assertion happen in practice in maria_bulk_insert.yy. Adding this mutex lock eliminated
the assertion problem.
The >=0 was wrong, should be >0 (or the variable could go negative).
storage/maria/ma_recovery.c:
When we re-enable transactionality, as we may have created pages of type PAGECACHE_PLAIN_PAGE before,
we need to remove them from the cache (FLUSH_RELEASE). Or they would stay this way, and later when we
maria_write() to them, we would try to tag them with a LSN (ma_unpin_all_pages()), which is incorrect
for a plain page (and causes assertion in the page cache at start of check_and_set_lsn()).
I saw the assertion fire with maria_bulk_insert.yy, and this seems to cure it.
page cache
2008-10-17 15:37:07 +02:00
|
|
|
DBUG_PRINT("info", ("waiting for bitmap flusher"));
|
2011-07-13 21:10:18 +02:00
|
|
|
mysql_cond_wait(&bitmap->bitmap_cond, &bitmap->bitmap_lock);
|
WL#3072 - Maria recovery.
* fix for bitmap vs checkpoint bug which could lead to corrupted
tables in case of crashes at certain moments: a bitmap could be flushed
to disk even though it was inconsistent with the log (it could be
flushed before REDO-UNDO are written to the log). One bug remains, need
code from others. Tests added. Fix is to pin unflushable bitmap pages,
and let checkpoint wait for them to be flushable.
* fix for long_trid!=0 assertion failure at Recovery.
* less useless wakeups in the background flush|checkpoint thread.
* store global_trid_generator in checkpoint record.
mysql-test/r/maria-recovery.result:
result update
mysql-test/t/maria-recovery.test:
make it easier to locate subtests
storage/maria/ma_bitmap.c:
When we send a bitmap to the pagecache, if this bitmap is not in a
flushable state we keep it pinned and add it to a list, it will be
unpinned when the bitmap is flushable again.
A new function _ma_bitmap_flush_all() used by checkpoint.
A new function _ma_bitmap_flushable() used by block format to signal
when it starts modifying a bitmap and when it is done with it.
storage/maria/ma_blockrec.c:
When starting a row operation (insert/update/delete), mark that
the bitmap is not flushable (because for example INSERT is going
to over-allocate in the bitmap to prevent other threads from using
our data pages). If a checkpoint comes at this moment it will wait
for the bitmap to be flushable before flushing it.
When the operation ends, bitmap becomes flushable again; that
transition is done under the bitmap's mutex (needed for correct
synchro with a concurrent checkpoint); but for INSERT/UPDATE this
happens inside _ma_bitmap_release_unused() at a place where it already
has the mutex, so the only penalty (mutex adding) is in DELETE and UNDO
of INSERT. In case of errors after setting the bitmap unflushable,
we must always set it back to flushable or checkpoint would block.
Debug possibilities to force a sleep while the bitmap is over-allocated.
In case of error in get_head_or_tail() in allocate_and_write_block_record(),
we still need to unpin all pages.
Bugfix: _ma_apply_redo_insert_row_blobs() produced wrong
data_file_length.
storage/maria/ma_blockrec.h:
new bitmap calls.
storage/maria/ma_checkpoint.c:
filter_flush_indirect not needed anymore (flushing bitmap
pages happens in _ma_bitmap_flush_all() now). So
st_filter_param::is_data_file|pages_covered_by_bitmap not needed.
Other filter_flush* don't need to flush bitmap anymore.
Add debug possibility to flush all bitmap pages outside of a checkpoint,
to simulate pagecache LRU eviction.
When the background flush/checkpoint thread notices it has nothing
to flush, it now sleeps directly until the next potential checkpoint
moment instead of waking up every second.
When in checkpoint we decide to not store a table in the checkpoint record
(because it has logged no writes for example), we can also skip flushing
this table.
storage/maria/ma_commit.c:
comment is out-of-date
storage/maria/ma_key_recover.c:
comment fix
storage/maria/ma_loghandler.c:
comment is out-of-date
storage/maria/ma_open.c:
comment is out-of-date
storage/maria/ma_pagecache.c:
comment for bug to fix. And we don't take checkpoints at end of REDO
phase yet so can trust block->type.
storage/maria/ma_recovery.c:
Comments. Now-unneeded code for incomplete REDO-UNDO groups removed.
When we forget about an old transaction we must really forget
about it with bzero() (fixes the "long_trid!=0 assertion" recovery
bug). When we delete a row with maria_delete() we turn on
STATE_NOT_OPTIMIZED_ROWS so we do the same when we see a CLR_END
for an UNDO_ROW_INSERT or when we execute an UNDO_ROW_INSERT (in both
cases a row was deleted). Pick up max_long_trid from the checkpoint record.
storage/maria/maria_chk.c:
comment
storage/maria/maria_def.h:
MARIA_FILE_BITMAP gets new members: 'flushable', 'bitmap_cond' and
'pinned_pages'.
storage/maria/trnman.c:
I used to think that recovery only needs to know the maximum TrID
of the lists of active and committed transactions. But no, sometimes
both lists can even be empty and their TrID should not be reused.
So Checkpoint now saves global_trid_generator in the checkpoint record.
storage/maria/trnman_public.h:
macros to read/store a TrID
mysql-test/r/maria-recovery-bitmap.result:
result is ok. Without the code fix, we would get a corruption message
about the bitmap page in CHECK TABLE EXTENDED.
mysql-test/t/maria-recovery-bitmap-master.opt:
usual when we crash mysqld in tests
mysql-test/t/maria-recovery-bitmap.test:
test of recovery problems specific of the bitmap pages.
2007-12-14 16:14:12 +01:00
|
|
|
}
|
2011-01-30 13:36:24 +01:00
|
|
|
bitmap->waiting_for_flush_all_requested--;
|
2007-12-15 14:17:23 +01:00
|
|
|
bitmap->non_flushable++;
|
|
|
|
DBUG_PRINT("info", ("bitmap->non_flushable: %u", bitmap->non_flushable));
|
2011-07-13 21:10:18 +02:00
|
|
|
mysql_mutex_unlock(&bitmap->bitmap_lock);
|
2010-08-12 18:46:36 +02:00
|
|
|
info->non_flushable_state= 1;
|
2008-10-14 11:38:07 +02:00
|
|
|
DBUG_VOID_RETURN;
|
WL#3072 - Maria recovery.
* fix for bitmap vs checkpoint bug which could lead to corrupted
tables in case of crashes at certain moments: a bitmap could be flushed
to disk even though it was inconsistent with the log (it could be
flushed before REDO-UNDO are written to the log). One bug remains, need
code from others. Tests added. Fix is to pin unflushable bitmap pages,
and let checkpoint wait for them to be flushable.
* fix for long_trid!=0 assertion failure at Recovery.
* less useless wakeups in the background flush|checkpoint thread.
* store global_trid_generator in checkpoint record.
mysql-test/r/maria-recovery.result:
result update
mysql-test/t/maria-recovery.test:
make it easier to locate subtests
storage/maria/ma_bitmap.c:
When we send a bitmap to the pagecache, if this bitmap is not in a
flushable state we keep it pinned and add it to a list, it will be
unpinned when the bitmap is flushable again.
A new function _ma_bitmap_flush_all() used by checkpoint.
A new function _ma_bitmap_flushable() used by block format to signal
when it starts modifying a bitmap and when it is done with it.
storage/maria/ma_blockrec.c:
When starting a row operation (insert/update/delete), mark that
the bitmap is not flushable (because for example INSERT is going
to over-allocate in the bitmap to prevent other threads from using
our data pages). If a checkpoint comes at this moment it will wait
for the bitmap to be flushable before flushing it.
When the operation ends, bitmap becomes flushable again; that
transition is done under the bitmap's mutex (needed for correct
synchro with a concurrent checkpoint); but for INSERT/UPDATE this
happens inside _ma_bitmap_release_unused() at a place where it already
has the mutex, so the only penalty (mutex adding) is in DELETE and UNDO
of INSERT. In case of errors after setting the bitmap unflushable,
we must always set it back to flushable or checkpoint would block.
Debug possibilities to force a sleep while the bitmap is over-allocated.
In case of error in get_head_or_tail() in allocate_and_write_block_record(),
we still need to unpin all pages.
Bugfix: _ma_apply_redo_insert_row_blobs() produced wrong
data_file_length.
storage/maria/ma_blockrec.h:
new bitmap calls.
storage/maria/ma_checkpoint.c:
filter_flush_indirect not needed anymore (flushing bitmap
pages happens in _ma_bitmap_flush_all() now). So
st_filter_param::is_data_file|pages_covered_by_bitmap not needed.
Other filter_flush* don't need to flush bitmap anymore.
Add debug possibility to flush all bitmap pages outside of a checkpoint,
to simulate pagecache LRU eviction.
When the background flush/checkpoint thread notices it has nothing
to flush, it now sleeps directly until the next potential checkpoint
moment instead of waking up every second.
When in checkpoint we decide to not store a table in the checkpoint record
(because it has logged no writes for example), we can also skip flushing
this table.
storage/maria/ma_commit.c:
comment is out-of-date
storage/maria/ma_key_recover.c:
comment fix
storage/maria/ma_loghandler.c:
comment is out-of-date
storage/maria/ma_open.c:
comment is out-of-date
storage/maria/ma_pagecache.c:
comment for bug to fix. And we don't take checkpoints at end of REDO
phase yet so can trust block->type.
storage/maria/ma_recovery.c:
Comments. Now-unneeded code for incomplete REDO-UNDO groups removed.
When we forget about an old transaction we must really forget
about it with bzero() (fixes the "long_trid!=0 assertion" recovery
bug). When we delete a row with maria_delete() we turn on
STATE_NOT_OPTIMIZED_ROWS so we do the same when we see a CLR_END
for an UNDO_ROW_INSERT or when we execute an UNDO_ROW_INSERT (in both
cases a row was deleted). Pick up max_long_trid from the checkpoint record.
storage/maria/maria_chk.c:
comment
storage/maria/maria_def.h:
MARIA_FILE_BITMAP gets new members: 'flushable', 'bitmap_cond' and
'pinned_pages'.
storage/maria/trnman.c:
I used to think that recovery only needs to know the maximum TrID
of the lists of active and committed transactions. But no, sometimes
both lists can even be empty and their TrID should not be reused.
So Checkpoint now saves global_trid_generator in the checkpoint record.
storage/maria/trnman_public.h:
macros to read/store a TrID
mysql-test/r/maria-recovery-bitmap.result:
result is ok. Without the code fix, we would get a corruption message
about the bitmap page in CHECK TABLE EXTENDED.
mysql-test/t/maria-recovery-bitmap-master.opt:
usual when we crash mysqld in tests
mysql-test/t/maria-recovery-bitmap.test:
test of recovery problems specific of the bitmap pages.
2007-12-14 16:14:12 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2007-01-18 20:38:14 +01:00
|
|
|
/*
|
|
|
|
Correct bitmap pages to reflect the true allocation
|
|
|
|
|
|
|
|
SYNOPSIS
|
|
|
|
_ma_bitmap_release_unused()
|
|
|
|
info Maria handle
|
|
|
|
blocks Bitmap blocks
|
|
|
|
|
|
|
|
IMPLEMENTATION
|
|
|
|
If block->used & BLOCKUSED_TAIL is set:
|
|
|
|
If block->used & BLOCKUSED_USED is set, then the bits for the
|
|
|
|
corresponding page is set according to block->empty_space
|
|
|
|
If block->used & BLOCKUSED_USED is not set, then the bits for
|
|
|
|
the corresponding page is set to org_bitmap_value;
|
|
|
|
|
|
|
|
If block->used & BLOCKUSED_TAIL is not set:
|
|
|
|
if block->used is not set, the bits for the corresponding page are
|
|
|
|
cleared
|
|
|
|
|
|
|
|
For the first block (head block) the logic is same as for a tail block
|
|
|
|
|
2007-04-05 13:38:05 +02:00
|
|
|
Note that we may have 'filler blocks' that are used to split a block
|
|
|
|
in half; These can be recognized by that they have page_count == 0.
|
|
|
|
|
2010-08-12 18:46:36 +02:00
|
|
|
This code also reverse the effect of ma_bitmap_flushable(.., 1);
|
|
|
|
|
2007-01-18 20:38:14 +01:00
|
|
|
RETURN
|
|
|
|
0 ok
|
|
|
|
1 error (Couldn't write or read bitmap page)
|
|
|
|
*/
|
|
|
|
|
|
|
|
my_bool _ma_bitmap_release_unused(MARIA_HA *info, MARIA_BITMAP_BLOCKS *blocks)
|
|
|
|
{
|
|
|
|
MARIA_BITMAP_BLOCK *block= blocks->block, *end= block + blocks->count;
|
|
|
|
MARIA_FILE_BITMAP *bitmap= &info->s->bitmap;
|
|
|
|
uint bits, current_bitmap_value;
|
|
|
|
DBUG_ENTER("_ma_bitmap_release_unused");
|
|
|
|
|
|
|
|
/*
|
|
|
|
We can skip FULL_HEAD_PAGE (4) as the page was marked as 'full'
|
|
|
|
when we allocated space in the page
|
|
|
|
*/
|
|
|
|
current_bitmap_value= FULL_HEAD_PAGE;
|
|
|
|
|
2011-07-13 21:10:18 +02:00
|
|
|
mysql_mutex_lock(&bitmap->bitmap_lock);
|
2007-01-18 20:38:14 +01:00
|
|
|
|
|
|
|
/* First handle head block */
|
|
|
|
if (block->used & BLOCKUSED_USED)
|
|
|
|
{
|
UNDO of rows now puts back all part of the row on their original pages and positions
Added variable _dbug_on_ to speed up execution when DBUG is not going to be used
Added --debug-on option to mysqld (to be able to turn of DBUG with --debug-on=0)
Fixed some bugs with 'non_flushable' marking of bitmap pages
Don't use 'non_flushable' marking of bitmap pages for not transactional tables
SHOW CREATE TABLE now shows if table was created with page checksums
Fixed a lot of bugs with BLOB handling in case of update/REDO and UNDO
More tests (especially for blobs) and DBUG_ASSERTS()
More readable output from maria_read_log and maria_chk
Fixed wrong shift that caused Maria to crash on files > 4G
Mark tables as crashed of REDO fails
dbug/dbug.c:
Changed to use my_bool (allowed me to remove some windows specific code)
Added variable _dbug_on_ to speed up execution when DBUG is not going to be used
Removed initialization of variables if not needed
include/my_dbug.h:
Use my_bool for some functions that was defined as BOOLEAN in dbug.c code
Added DBUGGER_ON/DEBUGGER_OFF to speed up execution when DBUG is not used
include/my_global.h:
Define my_bool early
Increase MY_HOW_OFTEN_TO_WRITE as computers are now faster than 10 years ago
mysql-test/mysql-test-run.pl:
Added debug-on=0 to speed up tests
mysql-test/r/maria-recovery.result:
Added new test by Guilhem to test if UNDO_ROW_DELETE preserves rowid
mysql-test/r/maria.result:
Added testing of page checksums
mysql-test/t/crash_commit_before-master.opt:
Added --debug-on as test require DBUG to work
mysql-test/t/maria-recovery-bitmap-master.opt:
Added --debug-on as test require DBUG to work
mysql-test/t/maria-recovery-master.opt:
Added --debug-on as test require DBUG to work
mysql-test/t/maria-recovery.test:
Added new test by Guilhem to test if UNDO_ROW_DELETE preserves rowid
mysql-test/t/maria.test:
Added testing of page checksums
sql/mysqld.cc:
Added --debug-on option (to be able to turn of DBUG with --debug-on=0)
Indentation fixes
Removed end spaces
sql/sql_show.cc:
Allow update_create_info() to inform MySQL if PACK_KEYS, NO_PACK_KEYS, CHECKSUM, PAGE_CHECKSUM or DELAY_KEY_WRITE is used
storage/maria/Makefile.am:
Added ma_test_big.sh
storage/maria/ha_maria.cc:
Store in create_info if page checksums are used (For SHOW CREATE TABLE)
storage/maria/ma_bitmap.c:
Added _ma_bitmap_wait_or_flush() to cause reader of bitmap pages to wait with reading until bitmap is flushed.
Use TAIL_PAGE_COUNT_MARKER for tail pages
Set 'sub_blocks' for and only for the head page or for the first extent of a blob. This is needed for store_extent_info() to be able to set START_EXTENT_BIT's
Don't allocate more than 0x3ffff pages in one extent (We need bit 0x4000 as a START_EXTENT_BIT)
Increase the calculated 'head_length' with the number of bytes used for extents.
Update row->space_on_head_page also in _ma_bitmap_find_new_place()
Make _ma_bitmap_get_page_bits() global. (Needed for UNDO handling)
Changed _ma_bitmap_flushable() to take MARIA_HA instead of MARIA_SHARE.
This was needed to be able to mark the handler if we had a 'non_flushable' call pending or not.
Don't use 'non_flushable' marking of bitmap pages for not transactional tables.
Added BLOCKUSED_USE_ORG_BITMAP handling also for tail pages.
Added more DBUG_ASSERT() to find possible errors in other code
Some code simplications by adding new local variables
storage/maria/ma_blockrec.c:
UNDO of rows now puts back all part of the row on their original pages and positions.
Changed UNDO of DELETE and UNDO of UPDATE to contain information about the original length of data on head block and also extent information
This changes a lot of logic as now an insert of a row on a page may happen to any position (and not just to the first or next free)
Use PAGE_COUNT to mark if an extent is the start of of a blob. (Needed for extent_to_bitmap_blocks())
Added check_directory() for checking that directroy entries are correct.
Added checking of row checksums when reading rows (with EXTRA_DEBUG)
Added make_space_for_directory() and extend_directory() for doing expansion of directory
Added get_rowpos_in_head_or_tail_page() to be able to store head/tail on original position in UNDO
Added extent_to_bitmap_blocks() to be able to generate original bitmap blocks from UNDO entry
Added _ma_update_at_original_place() for UNDO of DELETES
Added row->min_length to hold minmum required space needed on head page
Changed find_free_position() to use make_space_for_directory()
Changed make_empty_page() to allow optional creation of directory entry
Changed delete_head_or_tail() and _ma_apply_undo_row_isnert() to not copy pagecache block (speed optimization)
Changed _ma_apply_redo_insert_row_head_or_tail() to be able to insert new row at any position on 'new' page
Changed _ma_apply_undo_row_delete() and _ma_apply_undo_row_update() to put row in it's original position
Ensure allocation of tail blocks are of at least MIN_TAIL_SIZE.
Ensure we store pages in pinned pages even if read failed. (If not we will have pages pinned forever in page cache)
Write original extent information in UNDO entry, not compacted ones (we need position to tails!)
When setting BLOCKUSED_USED, don't clear other bits (we have to preserve BLOCKUSED_USE_ORG_BITMAP)
Fixed som bugs in directory handling
Fixed bug where we wrote wrong lsn to blob pages
Added separate blob_buffer for fixing bug when updating row that had char/varchar that spanned several pages and also had blobs
Ensure we call _ma_bitmap_flushable() also in case of errors
When doing an update, first delete old entries, then search in bitmap for where to put new information
Info->s -> share
Rowid -> rowid
More DBUG_ASSERT()
storage/maria/ma_blockrec.h:
Added START_EXTENT_BIT and TAIL_PAGE_COUNT_MARKER
Added _ma_bitmap_wait_or_flush() and _ma_bitmap_get_page_bits()
storage/maria/ma_check.c:
Don't write extra empty line if there is no deleted blocks
Ignore START_EXTENT_BIT's in page count
Call _ma_fast_unlock_key_del() to free key_del link
storage/maria/ma_close.c:
Ensure that used_key_del is 0. (If not, someone forgot to call _ma_unlock_key_del())
storage/maria/ma_create.c:
Changed constant to macro
storage/maria/ma_delete.c:
For deleted keys, log also position to row
storage/maria/ma_extra.c:
Release blob buffer at maria_reset() if bigger than MARIA_SMALL_BLOB_BUFFER
storage/maria/ma_key_recover.c:
Added bzero() of LSN that confused paged cache in case of uninitialized block
Mark file crashed if applying of index changes fails
Added calls to _ma_fast_unlock_key_del() for protection of shared key_del link.
storage/maria/ma_locking.c:
Added usage of MARIA_FILE_OPEN_COUNT_OFFSET
Added _ma_mark_file_crashed()
storage/maria/ma_loghandler.c:
Fixed bug where we logged uninitialized memory
storage/maria/ma_open.c:
Moved state->changed to be at start of state info on disk to allow one to easly mark files as crashed
storage/maria/ma_page.c:
Disable 'dummy' checksumming of pages as this gave false warnings.
(Need to investigate if this is ever needed)
storage/maria/ma_pagecache.c:
Fixed wrong shift that caused Maria to crash on files > 4G
storage/maria/ma_recovery.c:
In case of errors, start writing on new line if we where in %## %## printing mode (Made errors more readable)
Changed global variable name from warnings -> recovery_warnings
Use MARIA_FILE_CREATE_RENAME_LSN_OFFSET instead of constant
Removed special handling of row position for deleted keys. Keys now always includes row positions
_ma_apply_undo_row_delete() now gets page and row position
Added check that we don't loop forever when handling undo's (in case of bug in undo chain)
Print name of failed REDO/UNDO
storage/maria/ma_recovery.h:
Removed old comment
storage/maria/ma_static.c:
Chaned version number of Maria files to not accidently use old ones (becasue of change of ordering of status variables)
storage/maria/ma_test2.c:
Added option -u to specify number of rows to update
Changed old option -u to be -A, as for ma_test1
Fixed bug in update of rows with blobs (before blobs was always reset to empty on update)
First created blob is now of max blob length to ensure we have at least one big blob in the table
storage/maria/ma_test_all.sh:
More tests
storage/maria/ma_test_recovery.expected:
Updated results
storage/maria/ma_test_recovery:
Changed tests to use bigger blobs (not just 1K)
Added new tests that tests recovery of update with blobs
Removed comparision of .MAD file as it's not guranteed that recovery from scratch gives identical data file as original update
(compact_page() may be called at different times during normal execution and during REDO)
storage/maria/ma_update.c:
Simplify code (changed * to if)
storage/maria/maria_chk.c:
Make output more readable
storage/maria/maria_def.h:
Changed 'changed' to int to prepare for more bits
Added 2 more bytes to status information
Added 'st_mara_row->min_length' for storing min length needed on head page
Added 'st_mara_handler->blob_buff & blob_buff_size' for storing blobs
Moved all tunning parameters into one block
Added MARIA_SMALL_BLOB_BUFFER
Added _ma_mark_file_crashed()
storage/myisam/mi_test2.c:
Fixed bug in update of rows with blobs (before blobs was always reset to empty on update)
storage/maria/ma_test_big.sh:
Testing of insert, update, delete, recovery and undo of rows with blobs
Thanks to the random-ness of ma_test2 this is likely to find most bugs in the row handling
2007-12-30 21:40:03 +01:00
|
|
|
DBUG_PRINT("info", ("head page: %lu empty_space: %u",
|
|
|
|
(ulong) block->page, block->empty_space));
|
2007-01-18 20:38:14 +01:00
|
|
|
bits= _ma_free_size_to_head_pattern(bitmap, block->empty_space);
|
|
|
|
if (block->used & BLOCKUSED_USE_ORG_BITMAP)
|
|
|
|
current_bitmap_value= block->org_bitmap_value;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
bits= block->org_bitmap_value;
|
UNDO of rows now puts back all part of the row on their original pages and positions
Added variable _dbug_on_ to speed up execution when DBUG is not going to be used
Added --debug-on option to mysqld (to be able to turn of DBUG with --debug-on=0)
Fixed some bugs with 'non_flushable' marking of bitmap pages
Don't use 'non_flushable' marking of bitmap pages for not transactional tables
SHOW CREATE TABLE now shows if table was created with page checksums
Fixed a lot of bugs with BLOB handling in case of update/REDO and UNDO
More tests (especially for blobs) and DBUG_ASSERTS()
More readable output from maria_read_log and maria_chk
Fixed wrong shift that caused Maria to crash on files > 4G
Mark tables as crashed of REDO fails
dbug/dbug.c:
Changed to use my_bool (allowed me to remove some windows specific code)
Added variable _dbug_on_ to speed up execution when DBUG is not going to be used
Removed initialization of variables if not needed
include/my_dbug.h:
Use my_bool for some functions that was defined as BOOLEAN in dbug.c code
Added DBUGGER_ON/DEBUGGER_OFF to speed up execution when DBUG is not used
include/my_global.h:
Define my_bool early
Increase MY_HOW_OFTEN_TO_WRITE as computers are now faster than 10 years ago
mysql-test/mysql-test-run.pl:
Added debug-on=0 to speed up tests
mysql-test/r/maria-recovery.result:
Added new test by Guilhem to test if UNDO_ROW_DELETE preserves rowid
mysql-test/r/maria.result:
Added testing of page checksums
mysql-test/t/crash_commit_before-master.opt:
Added --debug-on as test require DBUG to work
mysql-test/t/maria-recovery-bitmap-master.opt:
Added --debug-on as test require DBUG to work
mysql-test/t/maria-recovery-master.opt:
Added --debug-on as test require DBUG to work
mysql-test/t/maria-recovery.test:
Added new test by Guilhem to test if UNDO_ROW_DELETE preserves rowid
mysql-test/t/maria.test:
Added testing of page checksums
sql/mysqld.cc:
Added --debug-on option (to be able to turn of DBUG with --debug-on=0)
Indentation fixes
Removed end spaces
sql/sql_show.cc:
Allow update_create_info() to inform MySQL if PACK_KEYS, NO_PACK_KEYS, CHECKSUM, PAGE_CHECKSUM or DELAY_KEY_WRITE is used
storage/maria/Makefile.am:
Added ma_test_big.sh
storage/maria/ha_maria.cc:
Store in create_info if page checksums are used (For SHOW CREATE TABLE)
storage/maria/ma_bitmap.c:
Added _ma_bitmap_wait_or_flush() to cause reader of bitmap pages to wait with reading until bitmap is flushed.
Use TAIL_PAGE_COUNT_MARKER for tail pages
Set 'sub_blocks' for and only for the head page or for the first extent of a blob. This is needed for store_extent_info() to be able to set START_EXTENT_BIT's
Don't allocate more than 0x3ffff pages in one extent (We need bit 0x4000 as a START_EXTENT_BIT)
Increase the calculated 'head_length' with the number of bytes used for extents.
Update row->space_on_head_page also in _ma_bitmap_find_new_place()
Make _ma_bitmap_get_page_bits() global. (Needed for UNDO handling)
Changed _ma_bitmap_flushable() to take MARIA_HA instead of MARIA_SHARE.
This was needed to be able to mark the handler if we had a 'non_flushable' call pending or not.
Don't use 'non_flushable' marking of bitmap pages for not transactional tables.
Added BLOCKUSED_USE_ORG_BITMAP handling also for tail pages.
Added more DBUG_ASSERT() to find possible errors in other code
Some code simplications by adding new local variables
storage/maria/ma_blockrec.c:
UNDO of rows now puts back all part of the row on their original pages and positions.
Changed UNDO of DELETE and UNDO of UPDATE to contain information about the original length of data on head block and also extent information
This changes a lot of logic as now an insert of a row on a page may happen to any position (and not just to the first or next free)
Use PAGE_COUNT to mark if an extent is the start of of a blob. (Needed for extent_to_bitmap_blocks())
Added check_directory() for checking that directroy entries are correct.
Added checking of row checksums when reading rows (with EXTRA_DEBUG)
Added make_space_for_directory() and extend_directory() for doing expansion of directory
Added get_rowpos_in_head_or_tail_page() to be able to store head/tail on original position in UNDO
Added extent_to_bitmap_blocks() to be able to generate original bitmap blocks from UNDO entry
Added _ma_update_at_original_place() for UNDO of DELETES
Added row->min_length to hold minmum required space needed on head page
Changed find_free_position() to use make_space_for_directory()
Changed make_empty_page() to allow optional creation of directory entry
Changed delete_head_or_tail() and _ma_apply_undo_row_isnert() to not copy pagecache block (speed optimization)
Changed _ma_apply_redo_insert_row_head_or_tail() to be able to insert new row at any position on 'new' page
Changed _ma_apply_undo_row_delete() and _ma_apply_undo_row_update() to put row in it's original position
Ensure allocation of tail blocks are of at least MIN_TAIL_SIZE.
Ensure we store pages in pinned pages even if read failed. (If not we will have pages pinned forever in page cache)
Write original extent information in UNDO entry, not compacted ones (we need position to tails!)
When setting BLOCKUSED_USED, don't clear other bits (we have to preserve BLOCKUSED_USE_ORG_BITMAP)
Fixed som bugs in directory handling
Fixed bug where we wrote wrong lsn to blob pages
Added separate blob_buffer for fixing bug when updating row that had char/varchar that spanned several pages and also had blobs
Ensure we call _ma_bitmap_flushable() also in case of errors
When doing an update, first delete old entries, then search in bitmap for where to put new information
Info->s -> share
Rowid -> rowid
More DBUG_ASSERT()
storage/maria/ma_blockrec.h:
Added START_EXTENT_BIT and TAIL_PAGE_COUNT_MARKER
Added _ma_bitmap_wait_or_flush() and _ma_bitmap_get_page_bits()
storage/maria/ma_check.c:
Don't write extra empty line if there is no deleted blocks
Ignore START_EXTENT_BIT's in page count
Call _ma_fast_unlock_key_del() to free key_del link
storage/maria/ma_close.c:
Ensure that used_key_del is 0. (If not, someone forgot to call _ma_unlock_key_del())
storage/maria/ma_create.c:
Changed constant to macro
storage/maria/ma_delete.c:
For deleted keys, log also position to row
storage/maria/ma_extra.c:
Release blob buffer at maria_reset() if bigger than MARIA_SMALL_BLOB_BUFFER
storage/maria/ma_key_recover.c:
Added bzero() of LSN that confused paged cache in case of uninitialized block
Mark file crashed if applying of index changes fails
Added calls to _ma_fast_unlock_key_del() for protection of shared key_del link.
storage/maria/ma_locking.c:
Added usage of MARIA_FILE_OPEN_COUNT_OFFSET
Added _ma_mark_file_crashed()
storage/maria/ma_loghandler.c:
Fixed bug where we logged uninitialized memory
storage/maria/ma_open.c:
Moved state->changed to be at start of state info on disk to allow one to easly mark files as crashed
storage/maria/ma_page.c:
Disable 'dummy' checksumming of pages as this gave false warnings.
(Need to investigate if this is ever needed)
storage/maria/ma_pagecache.c:
Fixed wrong shift that caused Maria to crash on files > 4G
storage/maria/ma_recovery.c:
In case of errors, start writing on new line if we where in %## %## printing mode (Made errors more readable)
Changed global variable name from warnings -> recovery_warnings
Use MARIA_FILE_CREATE_RENAME_LSN_OFFSET instead of constant
Removed special handling of row position for deleted keys. Keys now always includes row positions
_ma_apply_undo_row_delete() now gets page and row position
Added check that we don't loop forever when handling undo's (in case of bug in undo chain)
Print name of failed REDO/UNDO
storage/maria/ma_recovery.h:
Removed old comment
storage/maria/ma_static.c:
Chaned version number of Maria files to not accidently use old ones (becasue of change of ordering of status variables)
storage/maria/ma_test2.c:
Added option -u to specify number of rows to update
Changed old option -u to be -A, as for ma_test1
Fixed bug in update of rows with blobs (before blobs was always reset to empty on update)
First created blob is now of max blob length to ensure we have at least one big blob in the table
storage/maria/ma_test_all.sh:
More tests
storage/maria/ma_test_recovery.expected:
Updated results
storage/maria/ma_test_recovery:
Changed tests to use bigger blobs (not just 1K)
Added new tests that tests recovery of update with blobs
Removed comparision of .MAD file as it's not guranteed that recovery from scratch gives identical data file as original update
(compact_page() may be called at different times during normal execution and during REDO)
storage/maria/ma_update.c:
Simplify code (changed * to if)
storage/maria/maria_chk.c:
Make output more readable
storage/maria/maria_def.h:
Changed 'changed' to int to prepare for more bits
Added 2 more bytes to status information
Added 'st_mara_row->min_length' for storing min length needed on head page
Added 'st_mara_handler->blob_buff & blob_buff_size' for storing blobs
Moved all tunning parameters into one block
Added MARIA_SMALL_BLOB_BUFFER
Added _ma_mark_file_crashed()
storage/myisam/mi_test2.c:
Fixed bug in update of rows with blobs (before blobs was always reset to empty on update)
storage/maria/ma_test_big.sh:
Testing of insert, update, delete, recovery and undo of rows with blobs
Thanks to the random-ness of ma_test2 this is likely to find most bugs in the row handling
2007-12-30 21:40:03 +01:00
|
|
|
if (bits != current_bitmap_value)
|
|
|
|
{
|
|
|
|
if (set_page_bits(info, bitmap, block->page, bits))
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
DBUG_ASSERT(current_bitmap_value ==
|
2011-06-24 11:08:45 +02:00
|
|
|
bitmap_get_page_bits(info, bitmap, block->page));
|
UNDO of rows now puts back all part of the row on their original pages and positions
Added variable _dbug_on_ to speed up execution when DBUG is not going to be used
Added --debug-on option to mysqld (to be able to turn of DBUG with --debug-on=0)
Fixed some bugs with 'non_flushable' marking of bitmap pages
Don't use 'non_flushable' marking of bitmap pages for not transactional tables
SHOW CREATE TABLE now shows if table was created with page checksums
Fixed a lot of bugs with BLOB handling in case of update/REDO and UNDO
More tests (especially for blobs) and DBUG_ASSERTS()
More readable output from maria_read_log and maria_chk
Fixed wrong shift that caused Maria to crash on files > 4G
Mark tables as crashed of REDO fails
dbug/dbug.c:
Changed to use my_bool (allowed me to remove some windows specific code)
Added variable _dbug_on_ to speed up execution when DBUG is not going to be used
Removed initialization of variables if not needed
include/my_dbug.h:
Use my_bool for some functions that was defined as BOOLEAN in dbug.c code
Added DBUGGER_ON/DEBUGGER_OFF to speed up execution when DBUG is not used
include/my_global.h:
Define my_bool early
Increase MY_HOW_OFTEN_TO_WRITE as computers are now faster than 10 years ago
mysql-test/mysql-test-run.pl:
Added debug-on=0 to speed up tests
mysql-test/r/maria-recovery.result:
Added new test by Guilhem to test if UNDO_ROW_DELETE preserves rowid
mysql-test/r/maria.result:
Added testing of page checksums
mysql-test/t/crash_commit_before-master.opt:
Added --debug-on as test require DBUG to work
mysql-test/t/maria-recovery-bitmap-master.opt:
Added --debug-on as test require DBUG to work
mysql-test/t/maria-recovery-master.opt:
Added --debug-on as test require DBUG to work
mysql-test/t/maria-recovery.test:
Added new test by Guilhem to test if UNDO_ROW_DELETE preserves rowid
mysql-test/t/maria.test:
Added testing of page checksums
sql/mysqld.cc:
Added --debug-on option (to be able to turn of DBUG with --debug-on=0)
Indentation fixes
Removed end spaces
sql/sql_show.cc:
Allow update_create_info() to inform MySQL if PACK_KEYS, NO_PACK_KEYS, CHECKSUM, PAGE_CHECKSUM or DELAY_KEY_WRITE is used
storage/maria/Makefile.am:
Added ma_test_big.sh
storage/maria/ha_maria.cc:
Store in create_info if page checksums are used (For SHOW CREATE TABLE)
storage/maria/ma_bitmap.c:
Added _ma_bitmap_wait_or_flush() to cause reader of bitmap pages to wait with reading until bitmap is flushed.
Use TAIL_PAGE_COUNT_MARKER for tail pages
Set 'sub_blocks' for and only for the head page or for the first extent of a blob. This is needed for store_extent_info() to be able to set START_EXTENT_BIT's
Don't allocate more than 0x3ffff pages in one extent (We need bit 0x4000 as a START_EXTENT_BIT)
Increase the calculated 'head_length' with the number of bytes used for extents.
Update row->space_on_head_page also in _ma_bitmap_find_new_place()
Make _ma_bitmap_get_page_bits() global. (Needed for UNDO handling)
Changed _ma_bitmap_flushable() to take MARIA_HA instead of MARIA_SHARE.
This was needed to be able to mark the handler if we had a 'non_flushable' call pending or not.
Don't use 'non_flushable' marking of bitmap pages for not transactional tables.
Added BLOCKUSED_USE_ORG_BITMAP handling also for tail pages.
Added more DBUG_ASSERT() to find possible errors in other code
Some code simplications by adding new local variables
storage/maria/ma_blockrec.c:
UNDO of rows now puts back all part of the row on their original pages and positions.
Changed UNDO of DELETE and UNDO of UPDATE to contain information about the original length of data on head block and also extent information
This changes a lot of logic as now an insert of a row on a page may happen to any position (and not just to the first or next free)
Use PAGE_COUNT to mark if an extent is the start of of a blob. (Needed for extent_to_bitmap_blocks())
Added check_directory() for checking that directroy entries are correct.
Added checking of row checksums when reading rows (with EXTRA_DEBUG)
Added make_space_for_directory() and extend_directory() for doing expansion of directory
Added get_rowpos_in_head_or_tail_page() to be able to store head/tail on original position in UNDO
Added extent_to_bitmap_blocks() to be able to generate original bitmap blocks from UNDO entry
Added _ma_update_at_original_place() for UNDO of DELETES
Added row->min_length to hold minmum required space needed on head page
Changed find_free_position() to use make_space_for_directory()
Changed make_empty_page() to allow optional creation of directory entry
Changed delete_head_or_tail() and _ma_apply_undo_row_isnert() to not copy pagecache block (speed optimization)
Changed _ma_apply_redo_insert_row_head_or_tail() to be able to insert new row at any position on 'new' page
Changed _ma_apply_undo_row_delete() and _ma_apply_undo_row_update() to put row in it's original position
Ensure allocation of tail blocks are of at least MIN_TAIL_SIZE.
Ensure we store pages in pinned pages even if read failed. (If not we will have pages pinned forever in page cache)
Write original extent information in UNDO entry, not compacted ones (we need position to tails!)
When setting BLOCKUSED_USED, don't clear other bits (we have to preserve BLOCKUSED_USE_ORG_BITMAP)
Fixed som bugs in directory handling
Fixed bug where we wrote wrong lsn to blob pages
Added separate blob_buffer for fixing bug when updating row that had char/varchar that spanned several pages and also had blobs
Ensure we call _ma_bitmap_flushable() also in case of errors
When doing an update, first delete old entries, then search in bitmap for where to put new information
Info->s -> share
Rowid -> rowid
More DBUG_ASSERT()
storage/maria/ma_blockrec.h:
Added START_EXTENT_BIT and TAIL_PAGE_COUNT_MARKER
Added _ma_bitmap_wait_or_flush() and _ma_bitmap_get_page_bits()
storage/maria/ma_check.c:
Don't write extra empty line if there is no deleted blocks
Ignore START_EXTENT_BIT's in page count
Call _ma_fast_unlock_key_del() to free key_del link
storage/maria/ma_close.c:
Ensure that used_key_del is 0. (If not, someone forgot to call _ma_unlock_key_del())
storage/maria/ma_create.c:
Changed constant to macro
storage/maria/ma_delete.c:
For deleted keys, log also position to row
storage/maria/ma_extra.c:
Release blob buffer at maria_reset() if bigger than MARIA_SMALL_BLOB_BUFFER
storage/maria/ma_key_recover.c:
Added bzero() of LSN that confused paged cache in case of uninitialized block
Mark file crashed if applying of index changes fails
Added calls to _ma_fast_unlock_key_del() for protection of shared key_del link.
storage/maria/ma_locking.c:
Added usage of MARIA_FILE_OPEN_COUNT_OFFSET
Added _ma_mark_file_crashed()
storage/maria/ma_loghandler.c:
Fixed bug where we logged uninitialized memory
storage/maria/ma_open.c:
Moved state->changed to be at start of state info on disk to allow one to easly mark files as crashed
storage/maria/ma_page.c:
Disable 'dummy' checksumming of pages as this gave false warnings.
(Need to investigate if this is ever needed)
storage/maria/ma_pagecache.c:
Fixed wrong shift that caused Maria to crash on files > 4G
storage/maria/ma_recovery.c:
In case of errors, start writing on new line if we where in %## %## printing mode (Made errors more readable)
Changed global variable name from warnings -> recovery_warnings
Use MARIA_FILE_CREATE_RENAME_LSN_OFFSET instead of constant
Removed special handling of row position for deleted keys. Keys now always includes row positions
_ma_apply_undo_row_delete() now gets page and row position
Added check that we don't loop forever when handling undo's (in case of bug in undo chain)
Print name of failed REDO/UNDO
storage/maria/ma_recovery.h:
Removed old comment
storage/maria/ma_static.c:
Chaned version number of Maria files to not accidently use old ones (becasue of change of ordering of status variables)
storage/maria/ma_test2.c:
Added option -u to specify number of rows to update
Changed old option -u to be -A, as for ma_test1
Fixed bug in update of rows with blobs (before blobs was always reset to empty on update)
First created blob is now of max blob length to ensure we have at least one big blob in the table
storage/maria/ma_test_all.sh:
More tests
storage/maria/ma_test_recovery.expected:
Updated results
storage/maria/ma_test_recovery:
Changed tests to use bigger blobs (not just 1K)
Added new tests that tests recovery of update with blobs
Removed comparision of .MAD file as it's not guranteed that recovery from scratch gives identical data file as original update
(compact_page() may be called at different times during normal execution and during REDO)
storage/maria/ma_update.c:
Simplify code (changed * to if)
storage/maria/maria_chk.c:
Make output more readable
storage/maria/maria_def.h:
Changed 'changed' to int to prepare for more bits
Added 2 more bytes to status information
Added 'st_mara_row->min_length' for storing min length needed on head page
Added 'st_mara_handler->blob_buff & blob_buff_size' for storing blobs
Moved all tunning parameters into one block
Added MARIA_SMALL_BLOB_BUFFER
Added _ma_mark_file_crashed()
storage/myisam/mi_test2.c:
Fixed bug in update of rows with blobs (before blobs was always reset to empty on update)
storage/maria/ma_test_big.sh:
Testing of insert, update, delete, recovery and undo of rows with blobs
Thanks to the random-ness of ma_test2 this is likely to find most bugs in the row handling
2007-12-30 21:40:03 +01:00
|
|
|
}
|
2007-10-19 23:24:22 +02:00
|
|
|
|
2007-01-18 20:38:14 +01:00
|
|
|
/* Handle all full pages and tail pages (for head page and blob) */
|
|
|
|
for (block++; block < end; block++)
|
|
|
|
{
|
2007-09-03 11:05:17 +02:00
|
|
|
uint page_count;
|
2007-04-05 13:38:05 +02:00
|
|
|
if (!block->page_count)
|
|
|
|
continue; /* Skip 'filler blocks' */
|
|
|
|
|
2007-09-03 11:05:17 +02:00
|
|
|
page_count= block->page_count;
|
2007-01-18 20:38:14 +01:00
|
|
|
if (block->used & BLOCKUSED_TAIL)
|
|
|
|
{
|
UNDO of rows now puts back all part of the row on their original pages and positions
Added variable _dbug_on_ to speed up execution when DBUG is not going to be used
Added --debug-on option to mysqld (to be able to turn of DBUG with --debug-on=0)
Fixed some bugs with 'non_flushable' marking of bitmap pages
Don't use 'non_flushable' marking of bitmap pages for not transactional tables
SHOW CREATE TABLE now shows if table was created with page checksums
Fixed a lot of bugs with BLOB handling in case of update/REDO and UNDO
More tests (especially for blobs) and DBUG_ASSERTS()
More readable output from maria_read_log and maria_chk
Fixed wrong shift that caused Maria to crash on files > 4G
Mark tables as crashed of REDO fails
dbug/dbug.c:
Changed to use my_bool (allowed me to remove some windows specific code)
Added variable _dbug_on_ to speed up execution when DBUG is not going to be used
Removed initialization of variables if not needed
include/my_dbug.h:
Use my_bool for some functions that was defined as BOOLEAN in dbug.c code
Added DBUGGER_ON/DEBUGGER_OFF to speed up execution when DBUG is not used
include/my_global.h:
Define my_bool early
Increase MY_HOW_OFTEN_TO_WRITE as computers are now faster than 10 years ago
mysql-test/mysql-test-run.pl:
Added debug-on=0 to speed up tests
mysql-test/r/maria-recovery.result:
Added new test by Guilhem to test if UNDO_ROW_DELETE preserves rowid
mysql-test/r/maria.result:
Added testing of page checksums
mysql-test/t/crash_commit_before-master.opt:
Added --debug-on as test require DBUG to work
mysql-test/t/maria-recovery-bitmap-master.opt:
Added --debug-on as test require DBUG to work
mysql-test/t/maria-recovery-master.opt:
Added --debug-on as test require DBUG to work
mysql-test/t/maria-recovery.test:
Added new test by Guilhem to test if UNDO_ROW_DELETE preserves rowid
mysql-test/t/maria.test:
Added testing of page checksums
sql/mysqld.cc:
Added --debug-on option (to be able to turn of DBUG with --debug-on=0)
Indentation fixes
Removed end spaces
sql/sql_show.cc:
Allow update_create_info() to inform MySQL if PACK_KEYS, NO_PACK_KEYS, CHECKSUM, PAGE_CHECKSUM or DELAY_KEY_WRITE is used
storage/maria/Makefile.am:
Added ma_test_big.sh
storage/maria/ha_maria.cc:
Store in create_info if page checksums are used (For SHOW CREATE TABLE)
storage/maria/ma_bitmap.c:
Added _ma_bitmap_wait_or_flush() to cause reader of bitmap pages to wait with reading until bitmap is flushed.
Use TAIL_PAGE_COUNT_MARKER for tail pages
Set 'sub_blocks' for and only for the head page or for the first extent of a blob. This is needed for store_extent_info() to be able to set START_EXTENT_BIT's
Don't allocate more than 0x3ffff pages in one extent (We need bit 0x4000 as a START_EXTENT_BIT)
Increase the calculated 'head_length' with the number of bytes used for extents.
Update row->space_on_head_page also in _ma_bitmap_find_new_place()
Make _ma_bitmap_get_page_bits() global. (Needed for UNDO handling)
Changed _ma_bitmap_flushable() to take MARIA_HA instead of MARIA_SHARE.
This was needed to be able to mark the handler if we had a 'non_flushable' call pending or not.
Don't use 'non_flushable' marking of bitmap pages for not transactional tables.
Added BLOCKUSED_USE_ORG_BITMAP handling also for tail pages.
Added more DBUG_ASSERT() to find possible errors in other code
Some code simplications by adding new local variables
storage/maria/ma_blockrec.c:
UNDO of rows now puts back all part of the row on their original pages and positions.
Changed UNDO of DELETE and UNDO of UPDATE to contain information about the original length of data on head block and also extent information
This changes a lot of logic as now an insert of a row on a page may happen to any position (and not just to the first or next free)
Use PAGE_COUNT to mark if an extent is the start of of a blob. (Needed for extent_to_bitmap_blocks())
Added check_directory() for checking that directroy entries are correct.
Added checking of row checksums when reading rows (with EXTRA_DEBUG)
Added make_space_for_directory() and extend_directory() for doing expansion of directory
Added get_rowpos_in_head_or_tail_page() to be able to store head/tail on original position in UNDO
Added extent_to_bitmap_blocks() to be able to generate original bitmap blocks from UNDO entry
Added _ma_update_at_original_place() for UNDO of DELETES
Added row->min_length to hold minmum required space needed on head page
Changed find_free_position() to use make_space_for_directory()
Changed make_empty_page() to allow optional creation of directory entry
Changed delete_head_or_tail() and _ma_apply_undo_row_isnert() to not copy pagecache block (speed optimization)
Changed _ma_apply_redo_insert_row_head_or_tail() to be able to insert new row at any position on 'new' page
Changed _ma_apply_undo_row_delete() and _ma_apply_undo_row_update() to put row in it's original position
Ensure allocation of tail blocks are of at least MIN_TAIL_SIZE.
Ensure we store pages in pinned pages even if read failed. (If not we will have pages pinned forever in page cache)
Write original extent information in UNDO entry, not compacted ones (we need position to tails!)
When setting BLOCKUSED_USED, don't clear other bits (we have to preserve BLOCKUSED_USE_ORG_BITMAP)
Fixed som bugs in directory handling
Fixed bug where we wrote wrong lsn to blob pages
Added separate blob_buffer for fixing bug when updating row that had char/varchar that spanned several pages and also had blobs
Ensure we call _ma_bitmap_flushable() also in case of errors
When doing an update, first delete old entries, then search in bitmap for where to put new information
Info->s -> share
Rowid -> rowid
More DBUG_ASSERT()
storage/maria/ma_blockrec.h:
Added START_EXTENT_BIT and TAIL_PAGE_COUNT_MARKER
Added _ma_bitmap_wait_or_flush() and _ma_bitmap_get_page_bits()
storage/maria/ma_check.c:
Don't write extra empty line if there is no deleted blocks
Ignore START_EXTENT_BIT's in page count
Call _ma_fast_unlock_key_del() to free key_del link
storage/maria/ma_close.c:
Ensure that used_key_del is 0. (If not, someone forgot to call _ma_unlock_key_del())
storage/maria/ma_create.c:
Changed constant to macro
storage/maria/ma_delete.c:
For deleted keys, log also position to row
storage/maria/ma_extra.c:
Release blob buffer at maria_reset() if bigger than MARIA_SMALL_BLOB_BUFFER
storage/maria/ma_key_recover.c:
Added bzero() of LSN that confused paged cache in case of uninitialized block
Mark file crashed if applying of index changes fails
Added calls to _ma_fast_unlock_key_del() for protection of shared key_del link.
storage/maria/ma_locking.c:
Added usage of MARIA_FILE_OPEN_COUNT_OFFSET
Added _ma_mark_file_crashed()
storage/maria/ma_loghandler.c:
Fixed bug where we logged uninitialized memory
storage/maria/ma_open.c:
Moved state->changed to be at start of state info on disk to allow one to easly mark files as crashed
storage/maria/ma_page.c:
Disable 'dummy' checksumming of pages as this gave false warnings.
(Need to investigate if this is ever needed)
storage/maria/ma_pagecache.c:
Fixed wrong shift that caused Maria to crash on files > 4G
storage/maria/ma_recovery.c:
In case of errors, start writing on new line if we where in %## %## printing mode (Made errors more readable)
Changed global variable name from warnings -> recovery_warnings
Use MARIA_FILE_CREATE_RENAME_LSN_OFFSET instead of constant
Removed special handling of row position for deleted keys. Keys now always includes row positions
_ma_apply_undo_row_delete() now gets page and row position
Added check that we don't loop forever when handling undo's (in case of bug in undo chain)
Print name of failed REDO/UNDO
storage/maria/ma_recovery.h:
Removed old comment
storage/maria/ma_static.c:
Chaned version number of Maria files to not accidently use old ones (becasue of change of ordering of status variables)
storage/maria/ma_test2.c:
Added option -u to specify number of rows to update
Changed old option -u to be -A, as for ma_test1
Fixed bug in update of rows with blobs (before blobs was always reset to empty on update)
First created blob is now of max blob length to ensure we have at least one big blob in the table
storage/maria/ma_test_all.sh:
More tests
storage/maria/ma_test_recovery.expected:
Updated results
storage/maria/ma_test_recovery:
Changed tests to use bigger blobs (not just 1K)
Added new tests that tests recovery of update with blobs
Removed comparision of .MAD file as it's not guranteed that recovery from scratch gives identical data file as original update
(compact_page() may be called at different times during normal execution and during REDO)
storage/maria/ma_update.c:
Simplify code (changed * to if)
storage/maria/maria_chk.c:
Make output more readable
storage/maria/maria_def.h:
Changed 'changed' to int to prepare for more bits
Added 2 more bytes to status information
Added 'st_mara_row->min_length' for storing min length needed on head page
Added 'st_mara_handler->blob_buff & blob_buff_size' for storing blobs
Moved all tunning parameters into one block
Added MARIA_SMALL_BLOB_BUFFER
Added _ma_mark_file_crashed()
storage/myisam/mi_test2.c:
Fixed bug in update of rows with blobs (before blobs was always reset to empty on update)
storage/maria/ma_test_big.sh:
Testing of insert, update, delete, recovery and undo of rows with blobs
Thanks to the random-ness of ma_test2 this is likely to find most bugs in the row handling
2007-12-30 21:40:03 +01:00
|
|
|
current_bitmap_value= FULL_TAIL_PAGE;
|
2007-09-03 11:05:17 +02:00
|
|
|
/* The bitmap page is only one page */
|
|
|
|
page_count= 1;
|
2007-01-18 20:38:14 +01:00
|
|
|
if (block->used & BLOCKUSED_USED)
|
|
|
|
{
|
UNDO of rows now puts back all part of the row on their original pages and positions
Added variable _dbug_on_ to speed up execution when DBUG is not going to be used
Added --debug-on option to mysqld (to be able to turn of DBUG with --debug-on=0)
Fixed some bugs with 'non_flushable' marking of bitmap pages
Don't use 'non_flushable' marking of bitmap pages for not transactional tables
SHOW CREATE TABLE now shows if table was created with page checksums
Fixed a lot of bugs with BLOB handling in case of update/REDO and UNDO
More tests (especially for blobs) and DBUG_ASSERTS()
More readable output from maria_read_log and maria_chk
Fixed wrong shift that caused Maria to crash on files > 4G
Mark tables as crashed of REDO fails
dbug/dbug.c:
Changed to use my_bool (allowed me to remove some windows specific code)
Added variable _dbug_on_ to speed up execution when DBUG is not going to be used
Removed initialization of variables if not needed
include/my_dbug.h:
Use my_bool for some functions that was defined as BOOLEAN in dbug.c code
Added DBUGGER_ON/DEBUGGER_OFF to speed up execution when DBUG is not used
include/my_global.h:
Define my_bool early
Increase MY_HOW_OFTEN_TO_WRITE as computers are now faster than 10 years ago
mysql-test/mysql-test-run.pl:
Added debug-on=0 to speed up tests
mysql-test/r/maria-recovery.result:
Added new test by Guilhem to test if UNDO_ROW_DELETE preserves rowid
mysql-test/r/maria.result:
Added testing of page checksums
mysql-test/t/crash_commit_before-master.opt:
Added --debug-on as test require DBUG to work
mysql-test/t/maria-recovery-bitmap-master.opt:
Added --debug-on as test require DBUG to work
mysql-test/t/maria-recovery-master.opt:
Added --debug-on as test require DBUG to work
mysql-test/t/maria-recovery.test:
Added new test by Guilhem to test if UNDO_ROW_DELETE preserves rowid
mysql-test/t/maria.test:
Added testing of page checksums
sql/mysqld.cc:
Added --debug-on option (to be able to turn of DBUG with --debug-on=0)
Indentation fixes
Removed end spaces
sql/sql_show.cc:
Allow update_create_info() to inform MySQL if PACK_KEYS, NO_PACK_KEYS, CHECKSUM, PAGE_CHECKSUM or DELAY_KEY_WRITE is used
storage/maria/Makefile.am:
Added ma_test_big.sh
storage/maria/ha_maria.cc:
Store in create_info if page checksums are used (For SHOW CREATE TABLE)
storage/maria/ma_bitmap.c:
Added _ma_bitmap_wait_or_flush() to cause reader of bitmap pages to wait with reading until bitmap is flushed.
Use TAIL_PAGE_COUNT_MARKER for tail pages
Set 'sub_blocks' for and only for the head page or for the first extent of a blob. This is needed for store_extent_info() to be able to set START_EXTENT_BIT's
Don't allocate more than 0x3ffff pages in one extent (We need bit 0x4000 as a START_EXTENT_BIT)
Increase the calculated 'head_length' with the number of bytes used for extents.
Update row->space_on_head_page also in _ma_bitmap_find_new_place()
Make _ma_bitmap_get_page_bits() global. (Needed for UNDO handling)
Changed _ma_bitmap_flushable() to take MARIA_HA instead of MARIA_SHARE.
This was needed to be able to mark the handler if we had a 'non_flushable' call pending or not.
Don't use 'non_flushable' marking of bitmap pages for not transactional tables.
Added BLOCKUSED_USE_ORG_BITMAP handling also for tail pages.
Added more DBUG_ASSERT() to find possible errors in other code
Some code simplications by adding new local variables
storage/maria/ma_blockrec.c:
UNDO of rows now puts back all part of the row on their original pages and positions.
Changed UNDO of DELETE and UNDO of UPDATE to contain information about the original length of data on head block and also extent information
This changes a lot of logic as now an insert of a row on a page may happen to any position (and not just to the first or next free)
Use PAGE_COUNT to mark if an extent is the start of of a blob. (Needed for extent_to_bitmap_blocks())
Added check_directory() for checking that directroy entries are correct.
Added checking of row checksums when reading rows (with EXTRA_DEBUG)
Added make_space_for_directory() and extend_directory() for doing expansion of directory
Added get_rowpos_in_head_or_tail_page() to be able to store head/tail on original position in UNDO
Added extent_to_bitmap_blocks() to be able to generate original bitmap blocks from UNDO entry
Added _ma_update_at_original_place() for UNDO of DELETES
Added row->min_length to hold minmum required space needed on head page
Changed find_free_position() to use make_space_for_directory()
Changed make_empty_page() to allow optional creation of directory entry
Changed delete_head_or_tail() and _ma_apply_undo_row_isnert() to not copy pagecache block (speed optimization)
Changed _ma_apply_redo_insert_row_head_or_tail() to be able to insert new row at any position on 'new' page
Changed _ma_apply_undo_row_delete() and _ma_apply_undo_row_update() to put row in it's original position
Ensure allocation of tail blocks are of at least MIN_TAIL_SIZE.
Ensure we store pages in pinned pages even if read failed. (If not we will have pages pinned forever in page cache)
Write original extent information in UNDO entry, not compacted ones (we need position to tails!)
When setting BLOCKUSED_USED, don't clear other bits (we have to preserve BLOCKUSED_USE_ORG_BITMAP)
Fixed som bugs in directory handling
Fixed bug where we wrote wrong lsn to blob pages
Added separate blob_buffer for fixing bug when updating row that had char/varchar that spanned several pages and also had blobs
Ensure we call _ma_bitmap_flushable() also in case of errors
When doing an update, first delete old entries, then search in bitmap for where to put new information
Info->s -> share
Rowid -> rowid
More DBUG_ASSERT()
storage/maria/ma_blockrec.h:
Added START_EXTENT_BIT and TAIL_PAGE_COUNT_MARKER
Added _ma_bitmap_wait_or_flush() and _ma_bitmap_get_page_bits()
storage/maria/ma_check.c:
Don't write extra empty line if there is no deleted blocks
Ignore START_EXTENT_BIT's in page count
Call _ma_fast_unlock_key_del() to free key_del link
storage/maria/ma_close.c:
Ensure that used_key_del is 0. (If not, someone forgot to call _ma_unlock_key_del())
storage/maria/ma_create.c:
Changed constant to macro
storage/maria/ma_delete.c:
For deleted keys, log also position to row
storage/maria/ma_extra.c:
Release blob buffer at maria_reset() if bigger than MARIA_SMALL_BLOB_BUFFER
storage/maria/ma_key_recover.c:
Added bzero() of LSN that confused paged cache in case of uninitialized block
Mark file crashed if applying of index changes fails
Added calls to _ma_fast_unlock_key_del() for protection of shared key_del link.
storage/maria/ma_locking.c:
Added usage of MARIA_FILE_OPEN_COUNT_OFFSET
Added _ma_mark_file_crashed()
storage/maria/ma_loghandler.c:
Fixed bug where we logged uninitialized memory
storage/maria/ma_open.c:
Moved state->changed to be at start of state info on disk to allow one to easly mark files as crashed
storage/maria/ma_page.c:
Disable 'dummy' checksumming of pages as this gave false warnings.
(Need to investigate if this is ever needed)
storage/maria/ma_pagecache.c:
Fixed wrong shift that caused Maria to crash on files > 4G
storage/maria/ma_recovery.c:
In case of errors, start writing on new line if we where in %## %## printing mode (Made errors more readable)
Changed global variable name from warnings -> recovery_warnings
Use MARIA_FILE_CREATE_RENAME_LSN_OFFSET instead of constant
Removed special handling of row position for deleted keys. Keys now always includes row positions
_ma_apply_undo_row_delete() now gets page and row position
Added check that we don't loop forever when handling undo's (in case of bug in undo chain)
Print name of failed REDO/UNDO
storage/maria/ma_recovery.h:
Removed old comment
storage/maria/ma_static.c:
Chaned version number of Maria files to not accidently use old ones (becasue of change of ordering of status variables)
storage/maria/ma_test2.c:
Added option -u to specify number of rows to update
Changed old option -u to be -A, as for ma_test1
Fixed bug in update of rows with blobs (before blobs was always reset to empty on update)
First created blob is now of max blob length to ensure we have at least one big blob in the table
storage/maria/ma_test_all.sh:
More tests
storage/maria/ma_test_recovery.expected:
Updated results
storage/maria/ma_test_recovery:
Changed tests to use bigger blobs (not just 1K)
Added new tests that tests recovery of update with blobs
Removed comparision of .MAD file as it's not guranteed that recovery from scratch gives identical data file as original update
(compact_page() may be called at different times during normal execution and during REDO)
storage/maria/ma_update.c:
Simplify code (changed * to if)
storage/maria/maria_chk.c:
Make output more readable
storage/maria/maria_def.h:
Changed 'changed' to int to prepare for more bits
Added 2 more bytes to status information
Added 'st_mara_row->min_length' for storing min length needed on head page
Added 'st_mara_handler->blob_buff & blob_buff_size' for storing blobs
Moved all tunning parameters into one block
Added MARIA_SMALL_BLOB_BUFFER
Added _ma_mark_file_crashed()
storage/myisam/mi_test2.c:
Fixed bug in update of rows with blobs (before blobs was always reset to empty on update)
storage/maria/ma_test_big.sh:
Testing of insert, update, delete, recovery and undo of rows with blobs
Thanks to the random-ness of ma_test2 this is likely to find most bugs in the row handling
2007-12-30 21:40:03 +01:00
|
|
|
DBUG_PRINT("info", ("tail page: %lu empty_space: %u",
|
|
|
|
(ulong) block->page, block->empty_space));
|
2007-01-18 20:38:14 +01:00
|
|
|
bits= free_size_to_tail_pattern(bitmap, block->empty_space);
|
UNDO of rows now puts back all part of the row on their original pages and positions
Added variable _dbug_on_ to speed up execution when DBUG is not going to be used
Added --debug-on option to mysqld (to be able to turn of DBUG with --debug-on=0)
Fixed some bugs with 'non_flushable' marking of bitmap pages
Don't use 'non_flushable' marking of bitmap pages for not transactional tables
SHOW CREATE TABLE now shows if table was created with page checksums
Fixed a lot of bugs with BLOB handling in case of update/REDO and UNDO
More tests (especially for blobs) and DBUG_ASSERTS()
More readable output from maria_read_log and maria_chk
Fixed wrong shift that caused Maria to crash on files > 4G
Mark tables as crashed of REDO fails
dbug/dbug.c:
Changed to use my_bool (allowed me to remove some windows specific code)
Added variable _dbug_on_ to speed up execution when DBUG is not going to be used
Removed initialization of variables if not needed
include/my_dbug.h:
Use my_bool for some functions that was defined as BOOLEAN in dbug.c code
Added DBUGGER_ON/DEBUGGER_OFF to speed up execution when DBUG is not used
include/my_global.h:
Define my_bool early
Increase MY_HOW_OFTEN_TO_WRITE as computers are now faster than 10 years ago
mysql-test/mysql-test-run.pl:
Added debug-on=0 to speed up tests
mysql-test/r/maria-recovery.result:
Added new test by Guilhem to test if UNDO_ROW_DELETE preserves rowid
mysql-test/r/maria.result:
Added testing of page checksums
mysql-test/t/crash_commit_before-master.opt:
Added --debug-on as test require DBUG to work
mysql-test/t/maria-recovery-bitmap-master.opt:
Added --debug-on as test require DBUG to work
mysql-test/t/maria-recovery-master.opt:
Added --debug-on as test require DBUG to work
mysql-test/t/maria-recovery.test:
Added new test by Guilhem to test if UNDO_ROW_DELETE preserves rowid
mysql-test/t/maria.test:
Added testing of page checksums
sql/mysqld.cc:
Added --debug-on option (to be able to turn of DBUG with --debug-on=0)
Indentation fixes
Removed end spaces
sql/sql_show.cc:
Allow update_create_info() to inform MySQL if PACK_KEYS, NO_PACK_KEYS, CHECKSUM, PAGE_CHECKSUM or DELAY_KEY_WRITE is used
storage/maria/Makefile.am:
Added ma_test_big.sh
storage/maria/ha_maria.cc:
Store in create_info if page checksums are used (For SHOW CREATE TABLE)
storage/maria/ma_bitmap.c:
Added _ma_bitmap_wait_or_flush() to cause reader of bitmap pages to wait with reading until bitmap is flushed.
Use TAIL_PAGE_COUNT_MARKER for tail pages
Set 'sub_blocks' for and only for the head page or for the first extent of a blob. This is needed for store_extent_info() to be able to set START_EXTENT_BIT's
Don't allocate more than 0x3ffff pages in one extent (We need bit 0x4000 as a START_EXTENT_BIT)
Increase the calculated 'head_length' with the number of bytes used for extents.
Update row->space_on_head_page also in _ma_bitmap_find_new_place()
Make _ma_bitmap_get_page_bits() global. (Needed for UNDO handling)
Changed _ma_bitmap_flushable() to take MARIA_HA instead of MARIA_SHARE.
This was needed to be able to mark the handler if we had a 'non_flushable' call pending or not.
Don't use 'non_flushable' marking of bitmap pages for not transactional tables.
Added BLOCKUSED_USE_ORG_BITMAP handling also for tail pages.
Added more DBUG_ASSERT() to find possible errors in other code
Some code simplications by adding new local variables
storage/maria/ma_blockrec.c:
UNDO of rows now puts back all part of the row on their original pages and positions.
Changed UNDO of DELETE and UNDO of UPDATE to contain information about the original length of data on head block and also extent information
This changes a lot of logic as now an insert of a row on a page may happen to any position (and not just to the first or next free)
Use PAGE_COUNT to mark if an extent is the start of of a blob. (Needed for extent_to_bitmap_blocks())
Added check_directory() for checking that directroy entries are correct.
Added checking of row checksums when reading rows (with EXTRA_DEBUG)
Added make_space_for_directory() and extend_directory() for doing expansion of directory
Added get_rowpos_in_head_or_tail_page() to be able to store head/tail on original position in UNDO
Added extent_to_bitmap_blocks() to be able to generate original bitmap blocks from UNDO entry
Added _ma_update_at_original_place() for UNDO of DELETES
Added row->min_length to hold minmum required space needed on head page
Changed find_free_position() to use make_space_for_directory()
Changed make_empty_page() to allow optional creation of directory entry
Changed delete_head_or_tail() and _ma_apply_undo_row_isnert() to not copy pagecache block (speed optimization)
Changed _ma_apply_redo_insert_row_head_or_tail() to be able to insert new row at any position on 'new' page
Changed _ma_apply_undo_row_delete() and _ma_apply_undo_row_update() to put row in it's original position
Ensure allocation of tail blocks are of at least MIN_TAIL_SIZE.
Ensure we store pages in pinned pages even if read failed. (If not we will have pages pinned forever in page cache)
Write original extent information in UNDO entry, not compacted ones (we need position to tails!)
When setting BLOCKUSED_USED, don't clear other bits (we have to preserve BLOCKUSED_USE_ORG_BITMAP)
Fixed som bugs in directory handling
Fixed bug where we wrote wrong lsn to blob pages
Added separate blob_buffer for fixing bug when updating row that had char/varchar that spanned several pages and also had blobs
Ensure we call _ma_bitmap_flushable() also in case of errors
When doing an update, first delete old entries, then search in bitmap for where to put new information
Info->s -> share
Rowid -> rowid
More DBUG_ASSERT()
storage/maria/ma_blockrec.h:
Added START_EXTENT_BIT and TAIL_PAGE_COUNT_MARKER
Added _ma_bitmap_wait_or_flush() and _ma_bitmap_get_page_bits()
storage/maria/ma_check.c:
Don't write extra empty line if there is no deleted blocks
Ignore START_EXTENT_BIT's in page count
Call _ma_fast_unlock_key_del() to free key_del link
storage/maria/ma_close.c:
Ensure that used_key_del is 0. (If not, someone forgot to call _ma_unlock_key_del())
storage/maria/ma_create.c:
Changed constant to macro
storage/maria/ma_delete.c:
For deleted keys, log also position to row
storage/maria/ma_extra.c:
Release blob buffer at maria_reset() if bigger than MARIA_SMALL_BLOB_BUFFER
storage/maria/ma_key_recover.c:
Added bzero() of LSN that confused paged cache in case of uninitialized block
Mark file crashed if applying of index changes fails
Added calls to _ma_fast_unlock_key_del() for protection of shared key_del link.
storage/maria/ma_locking.c:
Added usage of MARIA_FILE_OPEN_COUNT_OFFSET
Added _ma_mark_file_crashed()
storage/maria/ma_loghandler.c:
Fixed bug where we logged uninitialized memory
storage/maria/ma_open.c:
Moved state->changed to be at start of state info on disk to allow one to easly mark files as crashed
storage/maria/ma_page.c:
Disable 'dummy' checksumming of pages as this gave false warnings.
(Need to investigate if this is ever needed)
storage/maria/ma_pagecache.c:
Fixed wrong shift that caused Maria to crash on files > 4G
storage/maria/ma_recovery.c:
In case of errors, start writing on new line if we where in %## %## printing mode (Made errors more readable)
Changed global variable name from warnings -> recovery_warnings
Use MARIA_FILE_CREATE_RENAME_LSN_OFFSET instead of constant
Removed special handling of row position for deleted keys. Keys now always includes row positions
_ma_apply_undo_row_delete() now gets page and row position
Added check that we don't loop forever when handling undo's (in case of bug in undo chain)
Print name of failed REDO/UNDO
storage/maria/ma_recovery.h:
Removed old comment
storage/maria/ma_static.c:
Chaned version number of Maria files to not accidently use old ones (becasue of change of ordering of status variables)
storage/maria/ma_test2.c:
Added option -u to specify number of rows to update
Changed old option -u to be -A, as for ma_test1
Fixed bug in update of rows with blobs (before blobs was always reset to empty on update)
First created blob is now of max blob length to ensure we have at least one big blob in the table
storage/maria/ma_test_all.sh:
More tests
storage/maria/ma_test_recovery.expected:
Updated results
storage/maria/ma_test_recovery:
Changed tests to use bigger blobs (not just 1K)
Added new tests that tests recovery of update with blobs
Removed comparision of .MAD file as it's not guranteed that recovery from scratch gives identical data file as original update
(compact_page() may be called at different times during normal execution and during REDO)
storage/maria/ma_update.c:
Simplify code (changed * to if)
storage/maria/maria_chk.c:
Make output more readable
storage/maria/maria_def.h:
Changed 'changed' to int to prepare for more bits
Added 2 more bytes to status information
Added 'st_mara_row->min_length' for storing min length needed on head page
Added 'st_mara_handler->blob_buff & blob_buff_size' for storing blobs
Moved all tunning parameters into one block
Added MARIA_SMALL_BLOB_BUFFER
Added _ma_mark_file_crashed()
storage/myisam/mi_test2.c:
Fixed bug in update of rows with blobs (before blobs was always reset to empty on update)
storage/maria/ma_test_big.sh:
Testing of insert, update, delete, recovery and undo of rows with blobs
Thanks to the random-ness of ma_test2 this is likely to find most bugs in the row handling
2007-12-30 21:40:03 +01:00
|
|
|
if (block->used & BLOCKUSED_USE_ORG_BITMAP)
|
|
|
|
current_bitmap_value= block->org_bitmap_value;
|
2007-01-18 20:38:14 +01:00
|
|
|
}
|
|
|
|
else
|
|
|
|
bits= block->org_bitmap_value;
|
2007-04-19 12:18:56 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
The page has all bits set; The following test is an optimization
|
|
|
|
to not set the bits to the same value as before.
|
|
|
|
*/
|
2011-01-24 14:19:40 +01:00
|
|
|
DBUG_ASSERT(current_bitmap_value ==
|
2011-06-24 11:08:45 +02:00
|
|
|
bitmap_get_page_bits(info, bitmap, block->page));
|
2011-01-24 14:19:40 +01:00
|
|
|
|
2010-09-07 18:58:39 +02:00
|
|
|
if (bits != current_bitmap_value)
|
|
|
|
{
|
|
|
|
if (set_page_bits(info, bitmap, block->page, bits))
|
|
|
|
goto err;
|
|
|
|
}
|
2007-01-18 20:38:14 +01:00
|
|
|
}
|
2008-01-07 17:54:41 +01:00
|
|
|
else if (!(block->used & BLOCKUSED_USED) &&
|
|
|
|
_ma_bitmap_reset_full_page_bits(info, bitmap,
|
|
|
|
block->page, page_count))
|
2007-01-18 20:38:14 +01:00
|
|
|
goto err;
|
|
|
|
}
|
WL#3072 - Maria recovery.
* fix for bitmap vs checkpoint bug which could lead to corrupted
tables in case of crashes at certain moments: a bitmap could be flushed
to disk even though it was inconsistent with the log (it could be
flushed before REDO-UNDO are written to the log). One bug remains, need
code from others. Tests added. Fix is to pin unflushable bitmap pages,
and let checkpoint wait for them to be flushable.
* fix for long_trid!=0 assertion failure at Recovery.
* less useless wakeups in the background flush|checkpoint thread.
* store global_trid_generator in checkpoint record.
mysql-test/r/maria-recovery.result:
result update
mysql-test/t/maria-recovery.test:
make it easier to locate subtests
storage/maria/ma_bitmap.c:
When we send a bitmap to the pagecache, if this bitmap is not in a
flushable state we keep it pinned and add it to a list, it will be
unpinned when the bitmap is flushable again.
A new function _ma_bitmap_flush_all() used by checkpoint.
A new function _ma_bitmap_flushable() used by block format to signal
when it starts modifying a bitmap and when it is done with it.
storage/maria/ma_blockrec.c:
When starting a row operation (insert/update/delete), mark that
the bitmap is not flushable (because for example INSERT is going
to over-allocate in the bitmap to prevent other threads from using
our data pages). If a checkpoint comes at this moment it will wait
for the bitmap to be flushable before flushing it.
When the operation ends, bitmap becomes flushable again; that
transition is done under the bitmap's mutex (needed for correct
synchro with a concurrent checkpoint); but for INSERT/UPDATE this
happens inside _ma_bitmap_release_unused() at a place where it already
has the mutex, so the only penalty (mutex adding) is in DELETE and UNDO
of INSERT. In case of errors after setting the bitmap unflushable,
we must always set it back to flushable or checkpoint would block.
Debug possibilities to force a sleep while the bitmap is over-allocated.
In case of error in get_head_or_tail() in allocate_and_write_block_record(),
we still need to unpin all pages.
Bugfix: _ma_apply_redo_insert_row_blobs() produced wrong
data_file_length.
storage/maria/ma_blockrec.h:
new bitmap calls.
storage/maria/ma_checkpoint.c:
filter_flush_indirect not needed anymore (flushing bitmap
pages happens in _ma_bitmap_flush_all() now). So
st_filter_param::is_data_file|pages_covered_by_bitmap not needed.
Other filter_flush* don't need to flush bitmap anymore.
Add debug possibility to flush all bitmap pages outside of a checkpoint,
to simulate pagecache LRU eviction.
When the background flush/checkpoint thread notices it has nothing
to flush, it now sleeps directly until the next potential checkpoint
moment instead of waking up every second.
When in checkpoint we decide to not store a table in the checkpoint record
(because it has logged no writes for example), we can also skip flushing
this table.
storage/maria/ma_commit.c:
comment is out-of-date
storage/maria/ma_key_recover.c:
comment fix
storage/maria/ma_loghandler.c:
comment is out-of-date
storage/maria/ma_open.c:
comment is out-of-date
storage/maria/ma_pagecache.c:
comment for bug to fix. And we don't take checkpoints at end of REDO
phase yet so can trust block->type.
storage/maria/ma_recovery.c:
Comments. Now-unneeded code for incomplete REDO-UNDO groups removed.
When we forget about an old transaction we must really forget
about it with bzero() (fixes the "long_trid!=0 assertion" recovery
bug). When we delete a row with maria_delete() we turn on
STATE_NOT_OPTIMIZED_ROWS so we do the same when we see a CLR_END
for an UNDO_ROW_INSERT or when we execute an UNDO_ROW_INSERT (in both
cases a row was deleted). Pick up max_long_trid from the checkpoint record.
storage/maria/maria_chk.c:
comment
storage/maria/maria_def.h:
MARIA_FILE_BITMAP gets new members: 'flushable', 'bitmap_cond' and
'pinned_pages'.
storage/maria/trnman.c:
I used to think that recovery only needs to know the maximum TrID
of the lists of active and committed transactions. But no, sometimes
both lists can even be empty and their TrID should not be reused.
So Checkpoint now saves global_trid_generator in the checkpoint record.
storage/maria/trnman_public.h:
macros to read/store a TrID
mysql-test/r/maria-recovery-bitmap.result:
result is ok. Without the code fix, we would get a corruption message
about the bitmap page in CHECK TABLE EXTENDED.
mysql-test/t/maria-recovery-bitmap-master.opt:
usual when we crash mysqld in tests
mysql-test/t/maria-recovery-bitmap.test:
test of recovery problems specific of the bitmap pages.
2007-12-14 16:14:12 +01:00
|
|
|
|
2008-10-14 11:38:07 +02:00
|
|
|
/* This duplicates ma_bitmap_flushable(-1) except it already has mutex */
|
|
|
|
if (info->non_flushable_state)
|
WL#3072 - Maria recovery.
* fix for bitmap vs checkpoint bug which could lead to corrupted
tables in case of crashes at certain moments: a bitmap could be flushed
to disk even though it was inconsistent with the log (it could be
flushed before REDO-UNDO are written to the log). One bug remains, need
code from others. Tests added. Fix is to pin unflushable bitmap pages,
and let checkpoint wait for them to be flushable.
* fix for long_trid!=0 assertion failure at Recovery.
* less useless wakeups in the background flush|checkpoint thread.
* store global_trid_generator in checkpoint record.
mysql-test/r/maria-recovery.result:
result update
mysql-test/t/maria-recovery.test:
make it easier to locate subtests
storage/maria/ma_bitmap.c:
When we send a bitmap to the pagecache, if this bitmap is not in a
flushable state we keep it pinned and add it to a list, it will be
unpinned when the bitmap is flushable again.
A new function _ma_bitmap_flush_all() used by checkpoint.
A new function _ma_bitmap_flushable() used by block format to signal
when it starts modifying a bitmap and when it is done with it.
storage/maria/ma_blockrec.c:
When starting a row operation (insert/update/delete), mark that
the bitmap is not flushable (because for example INSERT is going
to over-allocate in the bitmap to prevent other threads from using
our data pages). If a checkpoint comes at this moment it will wait
for the bitmap to be flushable before flushing it.
When the operation ends, bitmap becomes flushable again; that
transition is done under the bitmap's mutex (needed for correct
synchro with a concurrent checkpoint); but for INSERT/UPDATE this
happens inside _ma_bitmap_release_unused() at a place where it already
has the mutex, so the only penalty (mutex adding) is in DELETE and UNDO
of INSERT. In case of errors after setting the bitmap unflushable,
we must always set it back to flushable or checkpoint would block.
Debug possibilities to force a sleep while the bitmap is over-allocated.
In case of error in get_head_or_tail() in allocate_and_write_block_record(),
we still need to unpin all pages.
Bugfix: _ma_apply_redo_insert_row_blobs() produced wrong
data_file_length.
storage/maria/ma_blockrec.h:
new bitmap calls.
storage/maria/ma_checkpoint.c:
filter_flush_indirect not needed anymore (flushing bitmap
pages happens in _ma_bitmap_flush_all() now). So
st_filter_param::is_data_file|pages_covered_by_bitmap not needed.
Other filter_flush* don't need to flush bitmap anymore.
Add debug possibility to flush all bitmap pages outside of a checkpoint,
to simulate pagecache LRU eviction.
When the background flush/checkpoint thread notices it has nothing
to flush, it now sleeps directly until the next potential checkpoint
moment instead of waking up every second.
When in checkpoint we decide to not store a table in the checkpoint record
(because it has logged no writes for example), we can also skip flushing
this table.
storage/maria/ma_commit.c:
comment is out-of-date
storage/maria/ma_key_recover.c:
comment fix
storage/maria/ma_loghandler.c:
comment is out-of-date
storage/maria/ma_open.c:
comment is out-of-date
storage/maria/ma_pagecache.c:
comment for bug to fix. And we don't take checkpoints at end of REDO
phase yet so can trust block->type.
storage/maria/ma_recovery.c:
Comments. Now-unneeded code for incomplete REDO-UNDO groups removed.
When we forget about an old transaction we must really forget
about it with bzero() (fixes the "long_trid!=0 assertion" recovery
bug). When we delete a row with maria_delete() we turn on
STATE_NOT_OPTIMIZED_ROWS so we do the same when we see a CLR_END
for an UNDO_ROW_INSERT or when we execute an UNDO_ROW_INSERT (in both
cases a row was deleted). Pick up max_long_trid from the checkpoint record.
storage/maria/maria_chk.c:
comment
storage/maria/maria_def.h:
MARIA_FILE_BITMAP gets new members: 'flushable', 'bitmap_cond' and
'pinned_pages'.
storage/maria/trnman.c:
I used to think that recovery only needs to know the maximum TrID
of the lists of active and committed transactions. But no, sometimes
both lists can even be empty and their TrID should not be reused.
So Checkpoint now saves global_trid_generator in the checkpoint record.
storage/maria/trnman_public.h:
macros to read/store a TrID
mysql-test/r/maria-recovery-bitmap.result:
result is ok. Without the code fix, we would get a corruption message
about the bitmap page in CHECK TABLE EXTENDED.
mysql-test/t/maria-recovery-bitmap-master.opt:
usual when we crash mysqld in tests
mysql-test/t/maria-recovery-bitmap.test:
test of recovery problems specific of the bitmap pages.
2007-12-14 16:14:12 +01:00
|
|
|
{
|
Fix for BUG#39363 "Concurent inserts in the same table lead to hang in maria engine"
(need a mutex when modifying bitmap->non_flushable), which I hit when running maria_bulk_insert.yy.
After fixing this, I hit an assertion in check_and_set_lsn() saying that the page was PAGECACHE_PLAIN_PAGE.
This could be caused by pages left by an operation which had transactions disabled (like a bulk insert with repair):
in this patch we remove those pages out of the cache when we re-enable transactions.
After fixing this, I get page cache deadlocks, pushbuild2 also has some, to be looked at.
No testcase, requires concurrency and running for 15 minutes, but automatically tested by pushbuild2.
storage/maria/ma_bitmap.c:
Doing bitmap->non_flushable++ without mutex was wrong. If this ++ happened while another ++ or -- was happening
in another thread, one ++ or -- could be missed and the bitmap code would behave wrongly. For example, if a ++
was missed, the DBUG_ASSERT(((int) (bitmap->non_flushable)) >= 0) in _ma_bitmap_release_unused() could fire.
I saw this assertion happen in practice in maria_bulk_insert.yy. Adding this mutex lock eliminated
the assertion problem.
The >=0 was wrong, should be >0 (or the variable could go negative).
storage/maria/ma_recovery.c:
When we re-enable transactionality, as we may have created pages of type PAGECACHE_PLAIN_PAGE before,
we need to remove them from the cache (FLUSH_RELEASE). Or they would stay this way, and later when we
maria_write() to them, we would try to tag them with a LSN (ma_unpin_all_pages()), which is incorrect
for a plain page (and causes assertion in the page cache at start of check_and_set_lsn()).
I saw the assertion fire with maria_bulk_insert.yy, and this seems to cure it.
page cache
2008-10-17 15:37:07 +02:00
|
|
|
DBUG_ASSERT(((int) (bitmap->non_flushable)) > 0);
|
UNDO of rows now puts back all part of the row on their original pages and positions
Added variable _dbug_on_ to speed up execution when DBUG is not going to be used
Added --debug-on option to mysqld (to be able to turn of DBUG with --debug-on=0)
Fixed some bugs with 'non_flushable' marking of bitmap pages
Don't use 'non_flushable' marking of bitmap pages for not transactional tables
SHOW CREATE TABLE now shows if table was created with page checksums
Fixed a lot of bugs with BLOB handling in case of update/REDO and UNDO
More tests (especially for blobs) and DBUG_ASSERTS()
More readable output from maria_read_log and maria_chk
Fixed wrong shift that caused Maria to crash on files > 4G
Mark tables as crashed of REDO fails
dbug/dbug.c:
Changed to use my_bool (allowed me to remove some windows specific code)
Added variable _dbug_on_ to speed up execution when DBUG is not going to be used
Removed initialization of variables if not needed
include/my_dbug.h:
Use my_bool for some functions that was defined as BOOLEAN in dbug.c code
Added DBUGGER_ON/DEBUGGER_OFF to speed up execution when DBUG is not used
include/my_global.h:
Define my_bool early
Increase MY_HOW_OFTEN_TO_WRITE as computers are now faster than 10 years ago
mysql-test/mysql-test-run.pl:
Added debug-on=0 to speed up tests
mysql-test/r/maria-recovery.result:
Added new test by Guilhem to test if UNDO_ROW_DELETE preserves rowid
mysql-test/r/maria.result:
Added testing of page checksums
mysql-test/t/crash_commit_before-master.opt:
Added --debug-on as test require DBUG to work
mysql-test/t/maria-recovery-bitmap-master.opt:
Added --debug-on as test require DBUG to work
mysql-test/t/maria-recovery-master.opt:
Added --debug-on as test require DBUG to work
mysql-test/t/maria-recovery.test:
Added new test by Guilhem to test if UNDO_ROW_DELETE preserves rowid
mysql-test/t/maria.test:
Added testing of page checksums
sql/mysqld.cc:
Added --debug-on option (to be able to turn of DBUG with --debug-on=0)
Indentation fixes
Removed end spaces
sql/sql_show.cc:
Allow update_create_info() to inform MySQL if PACK_KEYS, NO_PACK_KEYS, CHECKSUM, PAGE_CHECKSUM or DELAY_KEY_WRITE is used
storage/maria/Makefile.am:
Added ma_test_big.sh
storage/maria/ha_maria.cc:
Store in create_info if page checksums are used (For SHOW CREATE TABLE)
storage/maria/ma_bitmap.c:
Added _ma_bitmap_wait_or_flush() to cause reader of bitmap pages to wait with reading until bitmap is flushed.
Use TAIL_PAGE_COUNT_MARKER for tail pages
Set 'sub_blocks' for and only for the head page or for the first extent of a blob. This is needed for store_extent_info() to be able to set START_EXTENT_BIT's
Don't allocate more than 0x3ffff pages in one extent (We need bit 0x4000 as a START_EXTENT_BIT)
Increase the calculated 'head_length' with the number of bytes used for extents.
Update row->space_on_head_page also in _ma_bitmap_find_new_place()
Make _ma_bitmap_get_page_bits() global. (Needed for UNDO handling)
Changed _ma_bitmap_flushable() to take MARIA_HA instead of MARIA_SHARE.
This was needed to be able to mark the handler if we had a 'non_flushable' call pending or not.
Don't use 'non_flushable' marking of bitmap pages for not transactional tables.
Added BLOCKUSED_USE_ORG_BITMAP handling also for tail pages.
Added more DBUG_ASSERT() to find possible errors in other code
Some code simplications by adding new local variables
storage/maria/ma_blockrec.c:
UNDO of rows now puts back all part of the row on their original pages and positions.
Changed UNDO of DELETE and UNDO of UPDATE to contain information about the original length of data on head block and also extent information
This changes a lot of logic as now an insert of a row on a page may happen to any position (and not just to the first or next free)
Use PAGE_COUNT to mark if an extent is the start of of a blob. (Needed for extent_to_bitmap_blocks())
Added check_directory() for checking that directroy entries are correct.
Added checking of row checksums when reading rows (with EXTRA_DEBUG)
Added make_space_for_directory() and extend_directory() for doing expansion of directory
Added get_rowpos_in_head_or_tail_page() to be able to store head/tail on original position in UNDO
Added extent_to_bitmap_blocks() to be able to generate original bitmap blocks from UNDO entry
Added _ma_update_at_original_place() for UNDO of DELETES
Added row->min_length to hold minmum required space needed on head page
Changed find_free_position() to use make_space_for_directory()
Changed make_empty_page() to allow optional creation of directory entry
Changed delete_head_or_tail() and _ma_apply_undo_row_isnert() to not copy pagecache block (speed optimization)
Changed _ma_apply_redo_insert_row_head_or_tail() to be able to insert new row at any position on 'new' page
Changed _ma_apply_undo_row_delete() and _ma_apply_undo_row_update() to put row in it's original position
Ensure allocation of tail blocks are of at least MIN_TAIL_SIZE.
Ensure we store pages in pinned pages even if read failed. (If not we will have pages pinned forever in page cache)
Write original extent information in UNDO entry, not compacted ones (we need position to tails!)
When setting BLOCKUSED_USED, don't clear other bits (we have to preserve BLOCKUSED_USE_ORG_BITMAP)
Fixed som bugs in directory handling
Fixed bug where we wrote wrong lsn to blob pages
Added separate blob_buffer for fixing bug when updating row that had char/varchar that spanned several pages and also had blobs
Ensure we call _ma_bitmap_flushable() also in case of errors
When doing an update, first delete old entries, then search in bitmap for where to put new information
Info->s -> share
Rowid -> rowid
More DBUG_ASSERT()
storage/maria/ma_blockrec.h:
Added START_EXTENT_BIT and TAIL_PAGE_COUNT_MARKER
Added _ma_bitmap_wait_or_flush() and _ma_bitmap_get_page_bits()
storage/maria/ma_check.c:
Don't write extra empty line if there is no deleted blocks
Ignore START_EXTENT_BIT's in page count
Call _ma_fast_unlock_key_del() to free key_del link
storage/maria/ma_close.c:
Ensure that used_key_del is 0. (If not, someone forgot to call _ma_unlock_key_del())
storage/maria/ma_create.c:
Changed constant to macro
storage/maria/ma_delete.c:
For deleted keys, log also position to row
storage/maria/ma_extra.c:
Release blob buffer at maria_reset() if bigger than MARIA_SMALL_BLOB_BUFFER
storage/maria/ma_key_recover.c:
Added bzero() of LSN that confused paged cache in case of uninitialized block
Mark file crashed if applying of index changes fails
Added calls to _ma_fast_unlock_key_del() for protection of shared key_del link.
storage/maria/ma_locking.c:
Added usage of MARIA_FILE_OPEN_COUNT_OFFSET
Added _ma_mark_file_crashed()
storage/maria/ma_loghandler.c:
Fixed bug where we logged uninitialized memory
storage/maria/ma_open.c:
Moved state->changed to be at start of state info on disk to allow one to easly mark files as crashed
storage/maria/ma_page.c:
Disable 'dummy' checksumming of pages as this gave false warnings.
(Need to investigate if this is ever needed)
storage/maria/ma_pagecache.c:
Fixed wrong shift that caused Maria to crash on files > 4G
storage/maria/ma_recovery.c:
In case of errors, start writing on new line if we where in %## %## printing mode (Made errors more readable)
Changed global variable name from warnings -> recovery_warnings
Use MARIA_FILE_CREATE_RENAME_LSN_OFFSET instead of constant
Removed special handling of row position for deleted keys. Keys now always includes row positions
_ma_apply_undo_row_delete() now gets page and row position
Added check that we don't loop forever when handling undo's (in case of bug in undo chain)
Print name of failed REDO/UNDO
storage/maria/ma_recovery.h:
Removed old comment
storage/maria/ma_static.c:
Chaned version number of Maria files to not accidently use old ones (becasue of change of ordering of status variables)
storage/maria/ma_test2.c:
Added option -u to specify number of rows to update
Changed old option -u to be -A, as for ma_test1
Fixed bug in update of rows with blobs (before blobs was always reset to empty on update)
First created blob is now of max blob length to ensure we have at least one big blob in the table
storage/maria/ma_test_all.sh:
More tests
storage/maria/ma_test_recovery.expected:
Updated results
storage/maria/ma_test_recovery:
Changed tests to use bigger blobs (not just 1K)
Added new tests that tests recovery of update with blobs
Removed comparision of .MAD file as it's not guranteed that recovery from scratch gives identical data file as original update
(compact_page() may be called at different times during normal execution and during REDO)
storage/maria/ma_update.c:
Simplify code (changed * to if)
storage/maria/maria_chk.c:
Make output more readable
storage/maria/maria_def.h:
Changed 'changed' to int to prepare for more bits
Added 2 more bytes to status information
Added 'st_mara_row->min_length' for storing min length needed on head page
Added 'st_mara_handler->blob_buff & blob_buff_size' for storing blobs
Moved all tunning parameters into one block
Added MARIA_SMALL_BLOB_BUFFER
Added _ma_mark_file_crashed()
storage/myisam/mi_test2.c:
Fixed bug in update of rows with blobs (before blobs was always reset to empty on update)
storage/maria/ma_test_big.sh:
Testing of insert, update, delete, recovery and undo of rows with blobs
Thanks to the random-ness of ma_test2 this is likely to find most bugs in the row handling
2007-12-30 21:40:03 +01:00
|
|
|
info->non_flushable_state= 0;
|
|
|
|
if (--bitmap->non_flushable == 0)
|
2007-12-15 14:17:23 +01:00
|
|
|
{
|
UNDO of rows now puts back all part of the row on their original pages and positions
Added variable _dbug_on_ to speed up execution when DBUG is not going to be used
Added --debug-on option to mysqld (to be able to turn of DBUG with --debug-on=0)
Fixed some bugs with 'non_flushable' marking of bitmap pages
Don't use 'non_flushable' marking of bitmap pages for not transactional tables
SHOW CREATE TABLE now shows if table was created with page checksums
Fixed a lot of bugs with BLOB handling in case of update/REDO and UNDO
More tests (especially for blobs) and DBUG_ASSERTS()
More readable output from maria_read_log and maria_chk
Fixed wrong shift that caused Maria to crash on files > 4G
Mark tables as crashed of REDO fails
dbug/dbug.c:
Changed to use my_bool (allowed me to remove some windows specific code)
Added variable _dbug_on_ to speed up execution when DBUG is not going to be used
Removed initialization of variables if not needed
include/my_dbug.h:
Use my_bool for some functions that was defined as BOOLEAN in dbug.c code
Added DBUGGER_ON/DEBUGGER_OFF to speed up execution when DBUG is not used
include/my_global.h:
Define my_bool early
Increase MY_HOW_OFTEN_TO_WRITE as computers are now faster than 10 years ago
mysql-test/mysql-test-run.pl:
Added debug-on=0 to speed up tests
mysql-test/r/maria-recovery.result:
Added new test by Guilhem to test if UNDO_ROW_DELETE preserves rowid
mysql-test/r/maria.result:
Added testing of page checksums
mysql-test/t/crash_commit_before-master.opt:
Added --debug-on as test require DBUG to work
mysql-test/t/maria-recovery-bitmap-master.opt:
Added --debug-on as test require DBUG to work
mysql-test/t/maria-recovery-master.opt:
Added --debug-on as test require DBUG to work
mysql-test/t/maria-recovery.test:
Added new test by Guilhem to test if UNDO_ROW_DELETE preserves rowid
mysql-test/t/maria.test:
Added testing of page checksums
sql/mysqld.cc:
Added --debug-on option (to be able to turn of DBUG with --debug-on=0)
Indentation fixes
Removed end spaces
sql/sql_show.cc:
Allow update_create_info() to inform MySQL if PACK_KEYS, NO_PACK_KEYS, CHECKSUM, PAGE_CHECKSUM or DELAY_KEY_WRITE is used
storage/maria/Makefile.am:
Added ma_test_big.sh
storage/maria/ha_maria.cc:
Store in create_info if page checksums are used (For SHOW CREATE TABLE)
storage/maria/ma_bitmap.c:
Added _ma_bitmap_wait_or_flush() to cause reader of bitmap pages to wait with reading until bitmap is flushed.
Use TAIL_PAGE_COUNT_MARKER for tail pages
Set 'sub_blocks' for and only for the head page or for the first extent of a blob. This is needed for store_extent_info() to be able to set START_EXTENT_BIT's
Don't allocate more than 0x3ffff pages in one extent (We need bit 0x4000 as a START_EXTENT_BIT)
Increase the calculated 'head_length' with the number of bytes used for extents.
Update row->space_on_head_page also in _ma_bitmap_find_new_place()
Make _ma_bitmap_get_page_bits() global. (Needed for UNDO handling)
Changed _ma_bitmap_flushable() to take MARIA_HA instead of MARIA_SHARE.
This was needed to be able to mark the handler if we had a 'non_flushable' call pending or not.
Don't use 'non_flushable' marking of bitmap pages for not transactional tables.
Added BLOCKUSED_USE_ORG_BITMAP handling also for tail pages.
Added more DBUG_ASSERT() to find possible errors in other code
Some code simplications by adding new local variables
storage/maria/ma_blockrec.c:
UNDO of rows now puts back all part of the row on their original pages and positions.
Changed UNDO of DELETE and UNDO of UPDATE to contain information about the original length of data on head block and also extent information
This changes a lot of logic as now an insert of a row on a page may happen to any position (and not just to the first or next free)
Use PAGE_COUNT to mark if an extent is the start of of a blob. (Needed for extent_to_bitmap_blocks())
Added check_directory() for checking that directroy entries are correct.
Added checking of row checksums when reading rows (with EXTRA_DEBUG)
Added make_space_for_directory() and extend_directory() for doing expansion of directory
Added get_rowpos_in_head_or_tail_page() to be able to store head/tail on original position in UNDO
Added extent_to_bitmap_blocks() to be able to generate original bitmap blocks from UNDO entry
Added _ma_update_at_original_place() for UNDO of DELETES
Added row->min_length to hold minmum required space needed on head page
Changed find_free_position() to use make_space_for_directory()
Changed make_empty_page() to allow optional creation of directory entry
Changed delete_head_or_tail() and _ma_apply_undo_row_isnert() to not copy pagecache block (speed optimization)
Changed _ma_apply_redo_insert_row_head_or_tail() to be able to insert new row at any position on 'new' page
Changed _ma_apply_undo_row_delete() and _ma_apply_undo_row_update() to put row in it's original position
Ensure allocation of tail blocks are of at least MIN_TAIL_SIZE.
Ensure we store pages in pinned pages even if read failed. (If not we will have pages pinned forever in page cache)
Write original extent information in UNDO entry, not compacted ones (we need position to tails!)
When setting BLOCKUSED_USED, don't clear other bits (we have to preserve BLOCKUSED_USE_ORG_BITMAP)
Fixed som bugs in directory handling
Fixed bug where we wrote wrong lsn to blob pages
Added separate blob_buffer for fixing bug when updating row that had char/varchar that spanned several pages and also had blobs
Ensure we call _ma_bitmap_flushable() also in case of errors
When doing an update, first delete old entries, then search in bitmap for where to put new information
Info->s -> share
Rowid -> rowid
More DBUG_ASSERT()
storage/maria/ma_blockrec.h:
Added START_EXTENT_BIT and TAIL_PAGE_COUNT_MARKER
Added _ma_bitmap_wait_or_flush() and _ma_bitmap_get_page_bits()
storage/maria/ma_check.c:
Don't write extra empty line if there is no deleted blocks
Ignore START_EXTENT_BIT's in page count
Call _ma_fast_unlock_key_del() to free key_del link
storage/maria/ma_close.c:
Ensure that used_key_del is 0. (If not, someone forgot to call _ma_unlock_key_del())
storage/maria/ma_create.c:
Changed constant to macro
storage/maria/ma_delete.c:
For deleted keys, log also position to row
storage/maria/ma_extra.c:
Release blob buffer at maria_reset() if bigger than MARIA_SMALL_BLOB_BUFFER
storage/maria/ma_key_recover.c:
Added bzero() of LSN that confused paged cache in case of uninitialized block
Mark file crashed if applying of index changes fails
Added calls to _ma_fast_unlock_key_del() for protection of shared key_del link.
storage/maria/ma_locking.c:
Added usage of MARIA_FILE_OPEN_COUNT_OFFSET
Added _ma_mark_file_crashed()
storage/maria/ma_loghandler.c:
Fixed bug where we logged uninitialized memory
storage/maria/ma_open.c:
Moved state->changed to be at start of state info on disk to allow one to easly mark files as crashed
storage/maria/ma_page.c:
Disable 'dummy' checksumming of pages as this gave false warnings.
(Need to investigate if this is ever needed)
storage/maria/ma_pagecache.c:
Fixed wrong shift that caused Maria to crash on files > 4G
storage/maria/ma_recovery.c:
In case of errors, start writing on new line if we where in %## %## printing mode (Made errors more readable)
Changed global variable name from warnings -> recovery_warnings
Use MARIA_FILE_CREATE_RENAME_LSN_OFFSET instead of constant
Removed special handling of row position for deleted keys. Keys now always includes row positions
_ma_apply_undo_row_delete() now gets page and row position
Added check that we don't loop forever when handling undo's (in case of bug in undo chain)
Print name of failed REDO/UNDO
storage/maria/ma_recovery.h:
Removed old comment
storage/maria/ma_static.c:
Chaned version number of Maria files to not accidently use old ones (becasue of change of ordering of status variables)
storage/maria/ma_test2.c:
Added option -u to specify number of rows to update
Changed old option -u to be -A, as for ma_test1
Fixed bug in update of rows with blobs (before blobs was always reset to empty on update)
First created blob is now of max blob length to ensure we have at least one big blob in the table
storage/maria/ma_test_all.sh:
More tests
storage/maria/ma_test_recovery.expected:
Updated results
storage/maria/ma_test_recovery:
Changed tests to use bigger blobs (not just 1K)
Added new tests that tests recovery of update with blobs
Removed comparision of .MAD file as it's not guranteed that recovery from scratch gives identical data file as original update
(compact_page() may be called at different times during normal execution and during REDO)
storage/maria/ma_update.c:
Simplify code (changed * to if)
storage/maria/maria_chk.c:
Make output more readable
storage/maria/maria_def.h:
Changed 'changed' to int to prepare for more bits
Added 2 more bytes to status information
Added 'st_mara_row->min_length' for storing min length needed on head page
Added 'st_mara_handler->blob_buff & blob_buff_size' for storing blobs
Moved all tunning parameters into one block
Added MARIA_SMALL_BLOB_BUFFER
Added _ma_mark_file_crashed()
storage/myisam/mi_test2.c:
Fixed bug in update of rows with blobs (before blobs was always reset to empty on update)
storage/maria/ma_test_big.sh:
Testing of insert, update, delete, recovery and undo of rows with blobs
Thanks to the random-ness of ma_test2 this is likely to find most bugs in the row handling
2007-12-30 21:40:03 +01:00
|
|
|
_ma_bitmap_unpin_all(info->s);
|
2011-01-30 13:36:24 +01:00
|
|
|
if (unlikely(bitmap->waiting_for_non_flushable))
|
UNDO of rows now puts back all part of the row on their original pages and positions
Added variable _dbug_on_ to speed up execution when DBUG is not going to be used
Added --debug-on option to mysqld (to be able to turn of DBUG with --debug-on=0)
Fixed some bugs with 'non_flushable' marking of bitmap pages
Don't use 'non_flushable' marking of bitmap pages for not transactional tables
SHOW CREATE TABLE now shows if table was created with page checksums
Fixed a lot of bugs with BLOB handling in case of update/REDO and UNDO
More tests (especially for blobs) and DBUG_ASSERTS()
More readable output from maria_read_log and maria_chk
Fixed wrong shift that caused Maria to crash on files > 4G
Mark tables as crashed of REDO fails
dbug/dbug.c:
Changed to use my_bool (allowed me to remove some windows specific code)
Added variable _dbug_on_ to speed up execution when DBUG is not going to be used
Removed initialization of variables if not needed
include/my_dbug.h:
Use my_bool for some functions that was defined as BOOLEAN in dbug.c code
Added DBUGGER_ON/DEBUGGER_OFF to speed up execution when DBUG is not used
include/my_global.h:
Define my_bool early
Increase MY_HOW_OFTEN_TO_WRITE as computers are now faster than 10 years ago
mysql-test/mysql-test-run.pl:
Added debug-on=0 to speed up tests
mysql-test/r/maria-recovery.result:
Added new test by Guilhem to test if UNDO_ROW_DELETE preserves rowid
mysql-test/r/maria.result:
Added testing of page checksums
mysql-test/t/crash_commit_before-master.opt:
Added --debug-on as test require DBUG to work
mysql-test/t/maria-recovery-bitmap-master.opt:
Added --debug-on as test require DBUG to work
mysql-test/t/maria-recovery-master.opt:
Added --debug-on as test require DBUG to work
mysql-test/t/maria-recovery.test:
Added new test by Guilhem to test if UNDO_ROW_DELETE preserves rowid
mysql-test/t/maria.test:
Added testing of page checksums
sql/mysqld.cc:
Added --debug-on option (to be able to turn of DBUG with --debug-on=0)
Indentation fixes
Removed end spaces
sql/sql_show.cc:
Allow update_create_info() to inform MySQL if PACK_KEYS, NO_PACK_KEYS, CHECKSUM, PAGE_CHECKSUM or DELAY_KEY_WRITE is used
storage/maria/Makefile.am:
Added ma_test_big.sh
storage/maria/ha_maria.cc:
Store in create_info if page checksums are used (For SHOW CREATE TABLE)
storage/maria/ma_bitmap.c:
Added _ma_bitmap_wait_or_flush() to cause reader of bitmap pages to wait with reading until bitmap is flushed.
Use TAIL_PAGE_COUNT_MARKER for tail pages
Set 'sub_blocks' for and only for the head page or for the first extent of a blob. This is needed for store_extent_info() to be able to set START_EXTENT_BIT's
Don't allocate more than 0x3ffff pages in one extent (We need bit 0x4000 as a START_EXTENT_BIT)
Increase the calculated 'head_length' with the number of bytes used for extents.
Update row->space_on_head_page also in _ma_bitmap_find_new_place()
Make _ma_bitmap_get_page_bits() global. (Needed for UNDO handling)
Changed _ma_bitmap_flushable() to take MARIA_HA instead of MARIA_SHARE.
This was needed to be able to mark the handler if we had a 'non_flushable' call pending or not.
Don't use 'non_flushable' marking of bitmap pages for not transactional tables.
Added BLOCKUSED_USE_ORG_BITMAP handling also for tail pages.
Added more DBUG_ASSERT() to find possible errors in other code
Some code simplications by adding new local variables
storage/maria/ma_blockrec.c:
UNDO of rows now puts back all part of the row on their original pages and positions.
Changed UNDO of DELETE and UNDO of UPDATE to contain information about the original length of data on head block and also extent information
This changes a lot of logic as now an insert of a row on a page may happen to any position (and not just to the first or next free)
Use PAGE_COUNT to mark if an extent is the start of of a blob. (Needed for extent_to_bitmap_blocks())
Added check_directory() for checking that directroy entries are correct.
Added checking of row checksums when reading rows (with EXTRA_DEBUG)
Added make_space_for_directory() and extend_directory() for doing expansion of directory
Added get_rowpos_in_head_or_tail_page() to be able to store head/tail on original position in UNDO
Added extent_to_bitmap_blocks() to be able to generate original bitmap blocks from UNDO entry
Added _ma_update_at_original_place() for UNDO of DELETES
Added row->min_length to hold minmum required space needed on head page
Changed find_free_position() to use make_space_for_directory()
Changed make_empty_page() to allow optional creation of directory entry
Changed delete_head_or_tail() and _ma_apply_undo_row_isnert() to not copy pagecache block (speed optimization)
Changed _ma_apply_redo_insert_row_head_or_tail() to be able to insert new row at any position on 'new' page
Changed _ma_apply_undo_row_delete() and _ma_apply_undo_row_update() to put row in it's original position
Ensure allocation of tail blocks are of at least MIN_TAIL_SIZE.
Ensure we store pages in pinned pages even if read failed. (If not we will have pages pinned forever in page cache)
Write original extent information in UNDO entry, not compacted ones (we need position to tails!)
When setting BLOCKUSED_USED, don't clear other bits (we have to preserve BLOCKUSED_USE_ORG_BITMAP)
Fixed som bugs in directory handling
Fixed bug where we wrote wrong lsn to blob pages
Added separate blob_buffer for fixing bug when updating row that had char/varchar that spanned several pages and also had blobs
Ensure we call _ma_bitmap_flushable() also in case of errors
When doing an update, first delete old entries, then search in bitmap for where to put new information
Info->s -> share
Rowid -> rowid
More DBUG_ASSERT()
storage/maria/ma_blockrec.h:
Added START_EXTENT_BIT and TAIL_PAGE_COUNT_MARKER
Added _ma_bitmap_wait_or_flush() and _ma_bitmap_get_page_bits()
storage/maria/ma_check.c:
Don't write extra empty line if there is no deleted blocks
Ignore START_EXTENT_BIT's in page count
Call _ma_fast_unlock_key_del() to free key_del link
storage/maria/ma_close.c:
Ensure that used_key_del is 0. (If not, someone forgot to call _ma_unlock_key_del())
storage/maria/ma_create.c:
Changed constant to macro
storage/maria/ma_delete.c:
For deleted keys, log also position to row
storage/maria/ma_extra.c:
Release blob buffer at maria_reset() if bigger than MARIA_SMALL_BLOB_BUFFER
storage/maria/ma_key_recover.c:
Added bzero() of LSN that confused paged cache in case of uninitialized block
Mark file crashed if applying of index changes fails
Added calls to _ma_fast_unlock_key_del() for protection of shared key_del link.
storage/maria/ma_locking.c:
Added usage of MARIA_FILE_OPEN_COUNT_OFFSET
Added _ma_mark_file_crashed()
storage/maria/ma_loghandler.c:
Fixed bug where we logged uninitialized memory
storage/maria/ma_open.c:
Moved state->changed to be at start of state info on disk to allow one to easly mark files as crashed
storage/maria/ma_page.c:
Disable 'dummy' checksumming of pages as this gave false warnings.
(Need to investigate if this is ever needed)
storage/maria/ma_pagecache.c:
Fixed wrong shift that caused Maria to crash on files > 4G
storage/maria/ma_recovery.c:
In case of errors, start writing on new line if we where in %## %## printing mode (Made errors more readable)
Changed global variable name from warnings -> recovery_warnings
Use MARIA_FILE_CREATE_RENAME_LSN_OFFSET instead of constant
Removed special handling of row position for deleted keys. Keys now always includes row positions
_ma_apply_undo_row_delete() now gets page and row position
Added check that we don't loop forever when handling undo's (in case of bug in undo chain)
Print name of failed REDO/UNDO
storage/maria/ma_recovery.h:
Removed old comment
storage/maria/ma_static.c:
Chaned version number of Maria files to not accidently use old ones (becasue of change of ordering of status variables)
storage/maria/ma_test2.c:
Added option -u to specify number of rows to update
Changed old option -u to be -A, as for ma_test1
Fixed bug in update of rows with blobs (before blobs was always reset to empty on update)
First created blob is now of max blob length to ensure we have at least one big blob in the table
storage/maria/ma_test_all.sh:
More tests
storage/maria/ma_test_recovery.expected:
Updated results
storage/maria/ma_test_recovery:
Changed tests to use bigger blobs (not just 1K)
Added new tests that tests recovery of update with blobs
Removed comparision of .MAD file as it's not guranteed that recovery from scratch gives identical data file as original update
(compact_page() may be called at different times during normal execution and during REDO)
storage/maria/ma_update.c:
Simplify code (changed * to if)
storage/maria/maria_chk.c:
Make output more readable
storage/maria/maria_def.h:
Changed 'changed' to int to prepare for more bits
Added 2 more bytes to status information
Added 'st_mara_row->min_length' for storing min length needed on head page
Added 'st_mara_handler->blob_buff & blob_buff_size' for storing blobs
Moved all tunning parameters into one block
Added MARIA_SMALL_BLOB_BUFFER
Added _ma_mark_file_crashed()
storage/myisam/mi_test2.c:
Fixed bug in update of rows with blobs (before blobs was always reset to empty on update)
storage/maria/ma_test_big.sh:
Testing of insert, update, delete, recovery and undo of rows with blobs
Thanks to the random-ness of ma_test2 this is likely to find most bugs in the row handling
2007-12-30 21:40:03 +01:00
|
|
|
{
|
|
|
|
DBUG_PRINT("info", ("bitmap flushable waking up flusher"));
|
2011-07-13 21:10:18 +02:00
|
|
|
mysql_cond_broadcast(&bitmap->bitmap_cond);
|
UNDO of rows now puts back all part of the row on their original pages and positions
Added variable _dbug_on_ to speed up execution when DBUG is not going to be used
Added --debug-on option to mysqld (to be able to turn of DBUG with --debug-on=0)
Fixed some bugs with 'non_flushable' marking of bitmap pages
Don't use 'non_flushable' marking of bitmap pages for not transactional tables
SHOW CREATE TABLE now shows if table was created with page checksums
Fixed a lot of bugs with BLOB handling in case of update/REDO and UNDO
More tests (especially for blobs) and DBUG_ASSERTS()
More readable output from maria_read_log and maria_chk
Fixed wrong shift that caused Maria to crash on files > 4G
Mark tables as crashed of REDO fails
dbug/dbug.c:
Changed to use my_bool (allowed me to remove some windows specific code)
Added variable _dbug_on_ to speed up execution when DBUG is not going to be used
Removed initialization of variables if not needed
include/my_dbug.h:
Use my_bool for some functions that was defined as BOOLEAN in dbug.c code
Added DBUGGER_ON/DEBUGGER_OFF to speed up execution when DBUG is not used
include/my_global.h:
Define my_bool early
Increase MY_HOW_OFTEN_TO_WRITE as computers are now faster than 10 years ago
mysql-test/mysql-test-run.pl:
Added debug-on=0 to speed up tests
mysql-test/r/maria-recovery.result:
Added new test by Guilhem to test if UNDO_ROW_DELETE preserves rowid
mysql-test/r/maria.result:
Added testing of page checksums
mysql-test/t/crash_commit_before-master.opt:
Added --debug-on as test require DBUG to work
mysql-test/t/maria-recovery-bitmap-master.opt:
Added --debug-on as test require DBUG to work
mysql-test/t/maria-recovery-master.opt:
Added --debug-on as test require DBUG to work
mysql-test/t/maria-recovery.test:
Added new test by Guilhem to test if UNDO_ROW_DELETE preserves rowid
mysql-test/t/maria.test:
Added testing of page checksums
sql/mysqld.cc:
Added --debug-on option (to be able to turn of DBUG with --debug-on=0)
Indentation fixes
Removed end spaces
sql/sql_show.cc:
Allow update_create_info() to inform MySQL if PACK_KEYS, NO_PACK_KEYS, CHECKSUM, PAGE_CHECKSUM or DELAY_KEY_WRITE is used
storage/maria/Makefile.am:
Added ma_test_big.sh
storage/maria/ha_maria.cc:
Store in create_info if page checksums are used (For SHOW CREATE TABLE)
storage/maria/ma_bitmap.c:
Added _ma_bitmap_wait_or_flush() to cause reader of bitmap pages to wait with reading until bitmap is flushed.
Use TAIL_PAGE_COUNT_MARKER for tail pages
Set 'sub_blocks' for and only for the head page or for the first extent of a blob. This is needed for store_extent_info() to be able to set START_EXTENT_BIT's
Don't allocate more than 0x3ffff pages in one extent (We need bit 0x4000 as a START_EXTENT_BIT)
Increase the calculated 'head_length' with the number of bytes used for extents.
Update row->space_on_head_page also in _ma_bitmap_find_new_place()
Make _ma_bitmap_get_page_bits() global. (Needed for UNDO handling)
Changed _ma_bitmap_flushable() to take MARIA_HA instead of MARIA_SHARE.
This was needed to be able to mark the handler if we had a 'non_flushable' call pending or not.
Don't use 'non_flushable' marking of bitmap pages for not transactional tables.
Added BLOCKUSED_USE_ORG_BITMAP handling also for tail pages.
Added more DBUG_ASSERT() to find possible errors in other code
Some code simplications by adding new local variables
storage/maria/ma_blockrec.c:
UNDO of rows now puts back all part of the row on their original pages and positions.
Changed UNDO of DELETE and UNDO of UPDATE to contain information about the original length of data on head block and also extent information
This changes a lot of logic as now an insert of a row on a page may happen to any position (and not just to the first or next free)
Use PAGE_COUNT to mark if an extent is the start of of a blob. (Needed for extent_to_bitmap_blocks())
Added check_directory() for checking that directroy entries are correct.
Added checking of row checksums when reading rows (with EXTRA_DEBUG)
Added make_space_for_directory() and extend_directory() for doing expansion of directory
Added get_rowpos_in_head_or_tail_page() to be able to store head/tail on original position in UNDO
Added extent_to_bitmap_blocks() to be able to generate original bitmap blocks from UNDO entry
Added _ma_update_at_original_place() for UNDO of DELETES
Added row->min_length to hold minmum required space needed on head page
Changed find_free_position() to use make_space_for_directory()
Changed make_empty_page() to allow optional creation of directory entry
Changed delete_head_or_tail() and _ma_apply_undo_row_isnert() to not copy pagecache block (speed optimization)
Changed _ma_apply_redo_insert_row_head_or_tail() to be able to insert new row at any position on 'new' page
Changed _ma_apply_undo_row_delete() and _ma_apply_undo_row_update() to put row in it's original position
Ensure allocation of tail blocks are of at least MIN_TAIL_SIZE.
Ensure we store pages in pinned pages even if read failed. (If not we will have pages pinned forever in page cache)
Write original extent information in UNDO entry, not compacted ones (we need position to tails!)
When setting BLOCKUSED_USED, don't clear other bits (we have to preserve BLOCKUSED_USE_ORG_BITMAP)
Fixed som bugs in directory handling
Fixed bug where we wrote wrong lsn to blob pages
Added separate blob_buffer for fixing bug when updating row that had char/varchar that spanned several pages and also had blobs
Ensure we call _ma_bitmap_flushable() also in case of errors
When doing an update, first delete old entries, then search in bitmap for where to put new information
Info->s -> share
Rowid -> rowid
More DBUG_ASSERT()
storage/maria/ma_blockrec.h:
Added START_EXTENT_BIT and TAIL_PAGE_COUNT_MARKER
Added _ma_bitmap_wait_or_flush() and _ma_bitmap_get_page_bits()
storage/maria/ma_check.c:
Don't write extra empty line if there is no deleted blocks
Ignore START_EXTENT_BIT's in page count
Call _ma_fast_unlock_key_del() to free key_del link
storage/maria/ma_close.c:
Ensure that used_key_del is 0. (If not, someone forgot to call _ma_unlock_key_del())
storage/maria/ma_create.c:
Changed constant to macro
storage/maria/ma_delete.c:
For deleted keys, log also position to row
storage/maria/ma_extra.c:
Release blob buffer at maria_reset() if bigger than MARIA_SMALL_BLOB_BUFFER
storage/maria/ma_key_recover.c:
Added bzero() of LSN that confused paged cache in case of uninitialized block
Mark file crashed if applying of index changes fails
Added calls to _ma_fast_unlock_key_del() for protection of shared key_del link.
storage/maria/ma_locking.c:
Added usage of MARIA_FILE_OPEN_COUNT_OFFSET
Added _ma_mark_file_crashed()
storage/maria/ma_loghandler.c:
Fixed bug where we logged uninitialized memory
storage/maria/ma_open.c:
Moved state->changed to be at start of state info on disk to allow one to easly mark files as crashed
storage/maria/ma_page.c:
Disable 'dummy' checksumming of pages as this gave false warnings.
(Need to investigate if this is ever needed)
storage/maria/ma_pagecache.c:
Fixed wrong shift that caused Maria to crash on files > 4G
storage/maria/ma_recovery.c:
In case of errors, start writing on new line if we where in %## %## printing mode (Made errors more readable)
Changed global variable name from warnings -> recovery_warnings
Use MARIA_FILE_CREATE_RENAME_LSN_OFFSET instead of constant
Removed special handling of row position for deleted keys. Keys now always includes row positions
_ma_apply_undo_row_delete() now gets page and row position
Added check that we don't loop forever when handling undo's (in case of bug in undo chain)
Print name of failed REDO/UNDO
storage/maria/ma_recovery.h:
Removed old comment
storage/maria/ma_static.c:
Chaned version number of Maria files to not accidently use old ones (becasue of change of ordering of status variables)
storage/maria/ma_test2.c:
Added option -u to specify number of rows to update
Changed old option -u to be -A, as for ma_test1
Fixed bug in update of rows with blobs (before blobs was always reset to empty on update)
First created blob is now of max blob length to ensure we have at least one big blob in the table
storage/maria/ma_test_all.sh:
More tests
storage/maria/ma_test_recovery.expected:
Updated results
storage/maria/ma_test_recovery:
Changed tests to use bigger blobs (not just 1K)
Added new tests that tests recovery of update with blobs
Removed comparision of .MAD file as it's not guranteed that recovery from scratch gives identical data file as original update
(compact_page() may be called at different times during normal execution and during REDO)
storage/maria/ma_update.c:
Simplify code (changed * to if)
storage/maria/maria_chk.c:
Make output more readable
storage/maria/maria_def.h:
Changed 'changed' to int to prepare for more bits
Added 2 more bytes to status information
Added 'st_mara_row->min_length' for storing min length needed on head page
Added 'st_mara_handler->blob_buff & blob_buff_size' for storing blobs
Moved all tunning parameters into one block
Added MARIA_SMALL_BLOB_BUFFER
Added _ma_mark_file_crashed()
storage/myisam/mi_test2.c:
Fixed bug in update of rows with blobs (before blobs was always reset to empty on update)
storage/maria/ma_test_big.sh:
Testing of insert, update, delete, recovery and undo of rows with blobs
Thanks to the random-ness of ma_test2 this is likely to find most bugs in the row handling
2007-12-30 21:40:03 +01:00
|
|
|
}
|
2007-12-15 14:17:23 +01:00
|
|
|
}
|
WL#3072 - Maria recovery.
* fix for bitmap vs checkpoint bug which could lead to corrupted
tables in case of crashes at certain moments: a bitmap could be flushed
to disk even though it was inconsistent with the log (it could be
flushed before REDO-UNDO are written to the log). One bug remains, need
code from others. Tests added. Fix is to pin unflushable bitmap pages,
and let checkpoint wait for them to be flushable.
* fix for long_trid!=0 assertion failure at Recovery.
* less useless wakeups in the background flush|checkpoint thread.
* store global_trid_generator in checkpoint record.
mysql-test/r/maria-recovery.result:
result update
mysql-test/t/maria-recovery.test:
make it easier to locate subtests
storage/maria/ma_bitmap.c:
When we send a bitmap to the pagecache, if this bitmap is not in a
flushable state we keep it pinned and add it to a list, it will be
unpinned when the bitmap is flushable again.
A new function _ma_bitmap_flush_all() used by checkpoint.
A new function _ma_bitmap_flushable() used by block format to signal
when it starts modifying a bitmap and when it is done with it.
storage/maria/ma_blockrec.c:
When starting a row operation (insert/update/delete), mark that
the bitmap is not flushable (because for example INSERT is going
to over-allocate in the bitmap to prevent other threads from using
our data pages). If a checkpoint comes at this moment it will wait
for the bitmap to be flushable before flushing it.
When the operation ends, bitmap becomes flushable again; that
transition is done under the bitmap's mutex (needed for correct
synchro with a concurrent checkpoint); but for INSERT/UPDATE this
happens inside _ma_bitmap_release_unused() at a place where it already
has the mutex, so the only penalty (mutex adding) is in DELETE and UNDO
of INSERT. In case of errors after setting the bitmap unflushable,
we must always set it back to flushable or checkpoint would block.
Debug possibilities to force a sleep while the bitmap is over-allocated.
In case of error in get_head_or_tail() in allocate_and_write_block_record(),
we still need to unpin all pages.
Bugfix: _ma_apply_redo_insert_row_blobs() produced wrong
data_file_length.
storage/maria/ma_blockrec.h:
new bitmap calls.
storage/maria/ma_checkpoint.c:
filter_flush_indirect not needed anymore (flushing bitmap
pages happens in _ma_bitmap_flush_all() now). So
st_filter_param::is_data_file|pages_covered_by_bitmap not needed.
Other filter_flush* don't need to flush bitmap anymore.
Add debug possibility to flush all bitmap pages outside of a checkpoint,
to simulate pagecache LRU eviction.
When the background flush/checkpoint thread notices it has nothing
to flush, it now sleeps directly until the next potential checkpoint
moment instead of waking up every second.
When in checkpoint we decide to not store a table in the checkpoint record
(because it has logged no writes for example), we can also skip flushing
this table.
storage/maria/ma_commit.c:
comment is out-of-date
storage/maria/ma_key_recover.c:
comment fix
storage/maria/ma_loghandler.c:
comment is out-of-date
storage/maria/ma_open.c:
comment is out-of-date
storage/maria/ma_pagecache.c:
comment for bug to fix. And we don't take checkpoints at end of REDO
phase yet so can trust block->type.
storage/maria/ma_recovery.c:
Comments. Now-unneeded code for incomplete REDO-UNDO groups removed.
When we forget about an old transaction we must really forget
about it with bzero() (fixes the "long_trid!=0 assertion" recovery
bug). When we delete a row with maria_delete() we turn on
STATE_NOT_OPTIMIZED_ROWS so we do the same when we see a CLR_END
for an UNDO_ROW_INSERT or when we execute an UNDO_ROW_INSERT (in both
cases a row was deleted). Pick up max_long_trid from the checkpoint record.
storage/maria/maria_chk.c:
comment
storage/maria/maria_def.h:
MARIA_FILE_BITMAP gets new members: 'flushable', 'bitmap_cond' and
'pinned_pages'.
storage/maria/trnman.c:
I used to think that recovery only needs to know the maximum TrID
of the lists of active and committed transactions. But no, sometimes
both lists can even be empty and their TrID should not be reused.
So Checkpoint now saves global_trid_generator in the checkpoint record.
storage/maria/trnman_public.h:
macros to read/store a TrID
mysql-test/r/maria-recovery-bitmap.result:
result is ok. Without the code fix, we would get a corruption message
about the bitmap page in CHECK TABLE EXTENDED.
mysql-test/t/maria-recovery-bitmap-master.opt:
usual when we crash mysqld in tests
mysql-test/t/maria-recovery-bitmap.test:
test of recovery problems specific of the bitmap pages.
2007-12-14 16:14:12 +01:00
|
|
|
}
|
2007-12-15 14:17:23 +01:00
|
|
|
DBUG_PRINT("info", ("bitmap->non_flushable: %u", bitmap->non_flushable));
|
|
|
|
|
2011-07-13 21:10:18 +02:00
|
|
|
mysql_mutex_unlock(&bitmap->bitmap_lock);
|
2007-01-18 20:38:14 +01:00
|
|
|
DBUG_RETURN(0);
|
|
|
|
|
|
|
|
err:
|
2011-07-13 21:10:18 +02:00
|
|
|
mysql_mutex_unlock(&bitmap->bitmap_lock);
|
2007-01-18 20:38:14 +01:00
|
|
|
DBUG_RETURN(1);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
This patch is a collection of patches from from Sanja, Sergei and Monty.
Added logging and pinning of pages to block format.
Integration of transaction manager, log handler.
Better page cache intergration
Split trnman.h into two files, so that we don't have to include my_atomic.h into C++ programs.
Renaming of structures, more comments, more debugging etc.
Fixed problem with small head block + long varchar.
Added extra argument to delete_record() and update_record() (needed for UNDO logging)
Small changes to interface of pagecache and log handler.
Change initialization of log_record_type_descriptors to not be depending on enum order.
Use array of LEX_STRING's to send data to log handler
Added 'dummy' transaction option to MARIA_INFO so that we can always assume 'trn' exists.
include/lf.h:
Interface fixes
Rename of structures
(Patch from Sergei via Sanja)
include/my_atomic.h:
More comments
include/my_global.h:
Added MY_ERRPTR
include/pagecache.h:
Added undo LSN when unlocking pages
mysql-test/r/maria.result:
Updated results
mysql-test/t/maria.test:
Added autocommit around lock tables
(Patch from Sanja)
mysys/lf_alloc-pin.c:
Post-review fixes, simple optimizations
More comments
Struct slot renames
Check amount of memory on stack
(Patch from Sergei)
mysys/lf_dynarray.c:
More comments
mysys/lf_hash.c:
More comments
After review fixes
(Patch from Sergei)
storage/maria/ha_maria.cc:
Split trnman.h into two files, so that we don't have to include my_atomic.h into the .cc program.
(Temporary fix to avoid bug in gcc)
Move out all deferencing of the transaction structure.
Transaction manager integrated (Patch from Sergei)
storage/maria/ha_maria.h:
Added prototype for start_stmt()
storage/maria/lockman.c:
Function call rename
storage/maria/ma_bitmap.c:
Mark deleted pages free from page cache
storage/maria/ma_blockrec.c:
Offset -> rownr
More debugging
Fixed problem with small head block + long varchar
Added logging of changed pages
Added logging of undo (Including only loggging of changed fields in case of update)
Added pinning/unpinning of all changed pages
More comments
Added free_full_pages() as the same code was used in several places.
fill_rows_parts() renamed as fill_insert_undo_parts()
offset -> rownr
Added some optimization of not transactional tables
_ma_update_block_record() has new parameter, as we need original row to do efficent undo for update
storage/maria/ma_blockrec.h:
Added ROW_EXTENTS_ON_STACK
Changed prototype for update and delete of row
storage/maria/ma_check.c:
Added original row to delete_record() call
storage/maria/ma_control_file.h:
Added ifdefs for C++
storage/maria/ma_delete.c:
Added original row to delete_record() call
(Needed for efficent undo logging)
storage/maria/ma_dynrec.c:
Added extra argument to delete_record() and update_record()
Removed not used variable
storage/maria/ma_init.c:
Initialize log handler
storage/maria/ma_loghandler.c:
Removed not used variable
Change initialization of log_record_type_descriptors to not be depending on enum order
Use array of LEX_STRING's to send data to log handler
storage/maria/ma_loghandler.h:
New defines
Use array of LEX_STRING's to send data to log handler
storage/maria/ma_open.c:
Added 'dummy' transaction option to MARIA_INFO so that we can always assume 'trn' exists.
Store in MARIA_SHARE->page_type if pages will have up to date LSN's
storage/maria/ma_pagecache.c:
Don't decrease number of readers when using pagecache_write()/pagecache_read()
In pagecache_write() decrement request count if page was left pinned
Added pagecache_delete_pages()
Removed some casts
Make trace output consistent with rest of code
Simplify calling of DBUG_ASSERT(0)
Only update LSN if the LSN is bigger than what's already on the page
Added LSN parameter pagecache_unpin_page(), pagecache_unpin(), and pagecache_unlock()
(Part of patch from Sanja)
storage/maria/ma_static.c:
Added 'dummy' transaction option to MARIA_INFO so that we can always assume 'trn' exists.
Added default page cache
storage/maria/ma_statrec.c:
Added extra argument to delete_record() and update_record()
storage/maria/ma_test1.c:
Added option -T for transactions
storage/maria/ma_test2.c:
Added option -T for transactions
storage/maria/ma_test_all.sh:
Test with transactions
storage/maria/ma_update.c:
Changed prototype for update of row
storage/maria/maria_def.h:
Changed prototype for update & delete of row as block records need to access the old row
Store in MARIA_SHARE->page_type if pages will have up to date LSN's
Added MARIA_MAX_TREE_LEVELS to allow us to calculate the number of possible pinned pages we may need.
Removed not used 'empty_bits_buffer'
Added pointer to transaction object
Added array for pinned pages
Added log_row_parts array for logging of field data.
Added MARIA_PINNED_PAGE to store pinned pages
storage/maria/trnman.c:
Added accessor functions to transaction object
Added missing DBUG_RETURN()
More debugging
More comments
Changed // comment of code to #ifdef NOT_USED
Transaction manager integrated.
Post review fixes
Part of patch originally from Sergei
storage/maria/trnman.h:
Split trnman.h into two files, so that we don't have to include my_atomic.h into the .cc program.
(Temporary fix to avoid bug in gcc)
storage/maria/unittest/ma_pagecache_single.c:
Added missing argument
Added SKIP_BIG_TESTS
(Patch from Sanja)
storage/maria/unittest/ma_test_loghandler-t.c:
Test logging with new LEX_STRING parameter
(Patch from Sanja)
storage/maria/unittest/ma_test_loghandler_multigroup-t.c:
Test logging with new LEX_STRING parameter
(Patch from Sanja)
storage/maria/unittest/ma_test_loghandler_multithread-t.c:
Test logging with new LEX_STRING parameter
(Patch from Sanja)
storage/maria/unittest/ma_test_loghandler_pagecache-t.c:
Test logging with new LEX_STRING parameter
(Patch from Sanja)
storage/maria/unittest/trnman-t.c:
Stack overflow detection
(Patch from Sergei)
unittest/unit.pl:
Command-line options --big and --verbose
(Patch from Sergei)
unittest/mytap/tap.c:
Detect --big
(Patch from Sergei)
unittest/mytap/tap.h:
Skip_big_tests and SKIP_BIG_TESTS
(Patch from Sergei)
storage/maria/trnman_public.h:
New BitKeeper file ``storage/maria/trnman_public.h''
2007-05-29 19:13:56 +02:00
|
|
|
Free full pages from bitmap and pagecache
|
2007-01-18 20:38:14 +01:00
|
|
|
|
|
|
|
SYNOPSIS
|
|
|
|
_ma_bitmap_free_full_pages()
|
|
|
|
info Maria handle
|
|
|
|
extents Extents (as stored on disk)
|
|
|
|
count Number of extents
|
|
|
|
|
|
|
|
IMPLEMENTATION
|
This patch is a collection of patches from from Sanja, Sergei and Monty.
Added logging and pinning of pages to block format.
Integration of transaction manager, log handler.
Better page cache intergration
Split trnman.h into two files, so that we don't have to include my_atomic.h into C++ programs.
Renaming of structures, more comments, more debugging etc.
Fixed problem with small head block + long varchar.
Added extra argument to delete_record() and update_record() (needed for UNDO logging)
Small changes to interface of pagecache and log handler.
Change initialization of log_record_type_descriptors to not be depending on enum order.
Use array of LEX_STRING's to send data to log handler
Added 'dummy' transaction option to MARIA_INFO so that we can always assume 'trn' exists.
include/lf.h:
Interface fixes
Rename of structures
(Patch from Sergei via Sanja)
include/my_atomic.h:
More comments
include/my_global.h:
Added MY_ERRPTR
include/pagecache.h:
Added undo LSN when unlocking pages
mysql-test/r/maria.result:
Updated results
mysql-test/t/maria.test:
Added autocommit around lock tables
(Patch from Sanja)
mysys/lf_alloc-pin.c:
Post-review fixes, simple optimizations
More comments
Struct slot renames
Check amount of memory on stack
(Patch from Sergei)
mysys/lf_dynarray.c:
More comments
mysys/lf_hash.c:
More comments
After review fixes
(Patch from Sergei)
storage/maria/ha_maria.cc:
Split trnman.h into two files, so that we don't have to include my_atomic.h into the .cc program.
(Temporary fix to avoid bug in gcc)
Move out all deferencing of the transaction structure.
Transaction manager integrated (Patch from Sergei)
storage/maria/ha_maria.h:
Added prototype for start_stmt()
storage/maria/lockman.c:
Function call rename
storage/maria/ma_bitmap.c:
Mark deleted pages free from page cache
storage/maria/ma_blockrec.c:
Offset -> rownr
More debugging
Fixed problem with small head block + long varchar
Added logging of changed pages
Added logging of undo (Including only loggging of changed fields in case of update)
Added pinning/unpinning of all changed pages
More comments
Added free_full_pages() as the same code was used in several places.
fill_rows_parts() renamed as fill_insert_undo_parts()
offset -> rownr
Added some optimization of not transactional tables
_ma_update_block_record() has new parameter, as we need original row to do efficent undo for update
storage/maria/ma_blockrec.h:
Added ROW_EXTENTS_ON_STACK
Changed prototype for update and delete of row
storage/maria/ma_check.c:
Added original row to delete_record() call
storage/maria/ma_control_file.h:
Added ifdefs for C++
storage/maria/ma_delete.c:
Added original row to delete_record() call
(Needed for efficent undo logging)
storage/maria/ma_dynrec.c:
Added extra argument to delete_record() and update_record()
Removed not used variable
storage/maria/ma_init.c:
Initialize log handler
storage/maria/ma_loghandler.c:
Removed not used variable
Change initialization of log_record_type_descriptors to not be depending on enum order
Use array of LEX_STRING's to send data to log handler
storage/maria/ma_loghandler.h:
New defines
Use array of LEX_STRING's to send data to log handler
storage/maria/ma_open.c:
Added 'dummy' transaction option to MARIA_INFO so that we can always assume 'trn' exists.
Store in MARIA_SHARE->page_type if pages will have up to date LSN's
storage/maria/ma_pagecache.c:
Don't decrease number of readers when using pagecache_write()/pagecache_read()
In pagecache_write() decrement request count if page was left pinned
Added pagecache_delete_pages()
Removed some casts
Make trace output consistent with rest of code
Simplify calling of DBUG_ASSERT(0)
Only update LSN if the LSN is bigger than what's already on the page
Added LSN parameter pagecache_unpin_page(), pagecache_unpin(), and pagecache_unlock()
(Part of patch from Sanja)
storage/maria/ma_static.c:
Added 'dummy' transaction option to MARIA_INFO so that we can always assume 'trn' exists.
Added default page cache
storage/maria/ma_statrec.c:
Added extra argument to delete_record() and update_record()
storage/maria/ma_test1.c:
Added option -T for transactions
storage/maria/ma_test2.c:
Added option -T for transactions
storage/maria/ma_test_all.sh:
Test with transactions
storage/maria/ma_update.c:
Changed prototype for update of row
storage/maria/maria_def.h:
Changed prototype for update & delete of row as block records need to access the old row
Store in MARIA_SHARE->page_type if pages will have up to date LSN's
Added MARIA_MAX_TREE_LEVELS to allow us to calculate the number of possible pinned pages we may need.
Removed not used 'empty_bits_buffer'
Added pointer to transaction object
Added array for pinned pages
Added log_row_parts array for logging of field data.
Added MARIA_PINNED_PAGE to store pinned pages
storage/maria/trnman.c:
Added accessor functions to transaction object
Added missing DBUG_RETURN()
More debugging
More comments
Changed // comment of code to #ifdef NOT_USED
Transaction manager integrated.
Post review fixes
Part of patch originally from Sergei
storage/maria/trnman.h:
Split trnman.h into two files, so that we don't have to include my_atomic.h into the .cc program.
(Temporary fix to avoid bug in gcc)
storage/maria/unittest/ma_pagecache_single.c:
Added missing argument
Added SKIP_BIG_TESTS
(Patch from Sanja)
storage/maria/unittest/ma_test_loghandler-t.c:
Test logging with new LEX_STRING parameter
(Patch from Sanja)
storage/maria/unittest/ma_test_loghandler_multigroup-t.c:
Test logging with new LEX_STRING parameter
(Patch from Sanja)
storage/maria/unittest/ma_test_loghandler_multithread-t.c:
Test logging with new LEX_STRING parameter
(Patch from Sanja)
storage/maria/unittest/ma_test_loghandler_pagecache-t.c:
Test logging with new LEX_STRING parameter
(Patch from Sanja)
storage/maria/unittest/trnman-t.c:
Stack overflow detection
(Patch from Sergei)
unittest/unit.pl:
Command-line options --big and --verbose
(Patch from Sergei)
unittest/mytap/tap.c:
Detect --big
(Patch from Sergei)
unittest/mytap/tap.h:
Skip_big_tests and SKIP_BIG_TESTS
(Patch from Sergei)
storage/maria/trnman_public.h:
New BitKeeper file ``storage/maria/trnman_public.h''
2007-05-29 19:13:56 +02:00
|
|
|
Mark all full pages (not tails) from extents as free, both in bitmap
|
|
|
|
and page cache.
|
2007-01-18 20:38:14 +01:00
|
|
|
|
|
|
|
RETURN
|
|
|
|
0 ok
|
|
|
|
1 error (Couldn't write or read bitmap page)
|
|
|
|
*/
|
|
|
|
|
2007-07-02 19:45:15 +02:00
|
|
|
my_bool _ma_bitmap_free_full_pages(MARIA_HA *info, const uchar *extents,
|
2007-01-18 20:38:14 +01:00
|
|
|
uint count)
|
|
|
|
{
|
UNDO of rows now puts back all part of the row on their original pages and positions
Added variable _dbug_on_ to speed up execution when DBUG is not going to be used
Added --debug-on option to mysqld (to be able to turn of DBUG with --debug-on=0)
Fixed some bugs with 'non_flushable' marking of bitmap pages
Don't use 'non_flushable' marking of bitmap pages for not transactional tables
SHOW CREATE TABLE now shows if table was created with page checksums
Fixed a lot of bugs with BLOB handling in case of update/REDO and UNDO
More tests (especially for blobs) and DBUG_ASSERTS()
More readable output from maria_read_log and maria_chk
Fixed wrong shift that caused Maria to crash on files > 4G
Mark tables as crashed of REDO fails
dbug/dbug.c:
Changed to use my_bool (allowed me to remove some windows specific code)
Added variable _dbug_on_ to speed up execution when DBUG is not going to be used
Removed initialization of variables if not needed
include/my_dbug.h:
Use my_bool for some functions that was defined as BOOLEAN in dbug.c code
Added DBUGGER_ON/DEBUGGER_OFF to speed up execution when DBUG is not used
include/my_global.h:
Define my_bool early
Increase MY_HOW_OFTEN_TO_WRITE as computers are now faster than 10 years ago
mysql-test/mysql-test-run.pl:
Added debug-on=0 to speed up tests
mysql-test/r/maria-recovery.result:
Added new test by Guilhem to test if UNDO_ROW_DELETE preserves rowid
mysql-test/r/maria.result:
Added testing of page checksums
mysql-test/t/crash_commit_before-master.opt:
Added --debug-on as test require DBUG to work
mysql-test/t/maria-recovery-bitmap-master.opt:
Added --debug-on as test require DBUG to work
mysql-test/t/maria-recovery-master.opt:
Added --debug-on as test require DBUG to work
mysql-test/t/maria-recovery.test:
Added new test by Guilhem to test if UNDO_ROW_DELETE preserves rowid
mysql-test/t/maria.test:
Added testing of page checksums
sql/mysqld.cc:
Added --debug-on option (to be able to turn of DBUG with --debug-on=0)
Indentation fixes
Removed end spaces
sql/sql_show.cc:
Allow update_create_info() to inform MySQL if PACK_KEYS, NO_PACK_KEYS, CHECKSUM, PAGE_CHECKSUM or DELAY_KEY_WRITE is used
storage/maria/Makefile.am:
Added ma_test_big.sh
storage/maria/ha_maria.cc:
Store in create_info if page checksums are used (For SHOW CREATE TABLE)
storage/maria/ma_bitmap.c:
Added _ma_bitmap_wait_or_flush() to cause reader of bitmap pages to wait with reading until bitmap is flushed.
Use TAIL_PAGE_COUNT_MARKER for tail pages
Set 'sub_blocks' for and only for the head page or for the first extent of a blob. This is needed for store_extent_info() to be able to set START_EXTENT_BIT's
Don't allocate more than 0x3ffff pages in one extent (We need bit 0x4000 as a START_EXTENT_BIT)
Increase the calculated 'head_length' with the number of bytes used for extents.
Update row->space_on_head_page also in _ma_bitmap_find_new_place()
Make _ma_bitmap_get_page_bits() global. (Needed for UNDO handling)
Changed _ma_bitmap_flushable() to take MARIA_HA instead of MARIA_SHARE.
This was needed to be able to mark the handler if we had a 'non_flushable' call pending or not.
Don't use 'non_flushable' marking of bitmap pages for not transactional tables.
Added BLOCKUSED_USE_ORG_BITMAP handling also for tail pages.
Added more DBUG_ASSERT() to find possible errors in other code
Some code simplications by adding new local variables
storage/maria/ma_blockrec.c:
UNDO of rows now puts back all part of the row on their original pages and positions.
Changed UNDO of DELETE and UNDO of UPDATE to contain information about the original length of data on head block and also extent information
This changes a lot of logic as now an insert of a row on a page may happen to any position (and not just to the first or next free)
Use PAGE_COUNT to mark if an extent is the start of of a blob. (Needed for extent_to_bitmap_blocks())
Added check_directory() for checking that directroy entries are correct.
Added checking of row checksums when reading rows (with EXTRA_DEBUG)
Added make_space_for_directory() and extend_directory() for doing expansion of directory
Added get_rowpos_in_head_or_tail_page() to be able to store head/tail on original position in UNDO
Added extent_to_bitmap_blocks() to be able to generate original bitmap blocks from UNDO entry
Added _ma_update_at_original_place() for UNDO of DELETES
Added row->min_length to hold minmum required space needed on head page
Changed find_free_position() to use make_space_for_directory()
Changed make_empty_page() to allow optional creation of directory entry
Changed delete_head_or_tail() and _ma_apply_undo_row_isnert() to not copy pagecache block (speed optimization)
Changed _ma_apply_redo_insert_row_head_or_tail() to be able to insert new row at any position on 'new' page
Changed _ma_apply_undo_row_delete() and _ma_apply_undo_row_update() to put row in it's original position
Ensure allocation of tail blocks are of at least MIN_TAIL_SIZE.
Ensure we store pages in pinned pages even if read failed. (If not we will have pages pinned forever in page cache)
Write original extent information in UNDO entry, not compacted ones (we need position to tails!)
When setting BLOCKUSED_USED, don't clear other bits (we have to preserve BLOCKUSED_USE_ORG_BITMAP)
Fixed som bugs in directory handling
Fixed bug where we wrote wrong lsn to blob pages
Added separate blob_buffer for fixing bug when updating row that had char/varchar that spanned several pages and also had blobs
Ensure we call _ma_bitmap_flushable() also in case of errors
When doing an update, first delete old entries, then search in bitmap for where to put new information
Info->s -> share
Rowid -> rowid
More DBUG_ASSERT()
storage/maria/ma_blockrec.h:
Added START_EXTENT_BIT and TAIL_PAGE_COUNT_MARKER
Added _ma_bitmap_wait_or_flush() and _ma_bitmap_get_page_bits()
storage/maria/ma_check.c:
Don't write extra empty line if there is no deleted blocks
Ignore START_EXTENT_BIT's in page count
Call _ma_fast_unlock_key_del() to free key_del link
storage/maria/ma_close.c:
Ensure that used_key_del is 0. (If not, someone forgot to call _ma_unlock_key_del())
storage/maria/ma_create.c:
Changed constant to macro
storage/maria/ma_delete.c:
For deleted keys, log also position to row
storage/maria/ma_extra.c:
Release blob buffer at maria_reset() if bigger than MARIA_SMALL_BLOB_BUFFER
storage/maria/ma_key_recover.c:
Added bzero() of LSN that confused paged cache in case of uninitialized block
Mark file crashed if applying of index changes fails
Added calls to _ma_fast_unlock_key_del() for protection of shared key_del link.
storage/maria/ma_locking.c:
Added usage of MARIA_FILE_OPEN_COUNT_OFFSET
Added _ma_mark_file_crashed()
storage/maria/ma_loghandler.c:
Fixed bug where we logged uninitialized memory
storage/maria/ma_open.c:
Moved state->changed to be at start of state info on disk to allow one to easly mark files as crashed
storage/maria/ma_page.c:
Disable 'dummy' checksumming of pages as this gave false warnings.
(Need to investigate if this is ever needed)
storage/maria/ma_pagecache.c:
Fixed wrong shift that caused Maria to crash on files > 4G
storage/maria/ma_recovery.c:
In case of errors, start writing on new line if we where in %## %## printing mode (Made errors more readable)
Changed global variable name from warnings -> recovery_warnings
Use MARIA_FILE_CREATE_RENAME_LSN_OFFSET instead of constant
Removed special handling of row position for deleted keys. Keys now always includes row positions
_ma_apply_undo_row_delete() now gets page and row position
Added check that we don't loop forever when handling undo's (in case of bug in undo chain)
Print name of failed REDO/UNDO
storage/maria/ma_recovery.h:
Removed old comment
storage/maria/ma_static.c:
Chaned version number of Maria files to not accidently use old ones (becasue of change of ordering of status variables)
storage/maria/ma_test2.c:
Added option -u to specify number of rows to update
Changed old option -u to be -A, as for ma_test1
Fixed bug in update of rows with blobs (before blobs was always reset to empty on update)
First created blob is now of max blob length to ensure we have at least one big blob in the table
storage/maria/ma_test_all.sh:
More tests
storage/maria/ma_test_recovery.expected:
Updated results
storage/maria/ma_test_recovery:
Changed tests to use bigger blobs (not just 1K)
Added new tests that tests recovery of update with blobs
Removed comparision of .MAD file as it's not guranteed that recovery from scratch gives identical data file as original update
(compact_page() may be called at different times during normal execution and during REDO)
storage/maria/ma_update.c:
Simplify code (changed * to if)
storage/maria/maria_chk.c:
Make output more readable
storage/maria/maria_def.h:
Changed 'changed' to int to prepare for more bits
Added 2 more bytes to status information
Added 'st_mara_row->min_length' for storing min length needed on head page
Added 'st_mara_handler->blob_buff & blob_buff_size' for storing blobs
Moved all tunning parameters into one block
Added MARIA_SMALL_BLOB_BUFFER
Added _ma_mark_file_crashed()
storage/myisam/mi_test2.c:
Fixed bug in update of rows with blobs (before blobs was always reset to empty on update)
storage/maria/ma_test_big.sh:
Testing of insert, update, delete, recovery and undo of rows with blobs
Thanks to the random-ness of ma_test2 this is likely to find most bugs in the row handling
2007-12-30 21:40:03 +01:00
|
|
|
MARIA_FILE_BITMAP *bitmap= &info->s->bitmap;
|
2011-02-10 19:33:51 +01:00
|
|
|
my_bool res;
|
2007-01-18 20:38:14 +01:00
|
|
|
DBUG_ENTER("_ma_bitmap_free_full_pages");
|
|
|
|
|
2007-10-19 23:24:22 +02:00
|
|
|
for (; count--; extents+= ROW_EXTENT_SIZE)
|
2007-01-18 20:38:14 +01:00
|
|
|
{
|
2008-01-10 20:21:36 +01:00
|
|
|
pgcache_page_no_t page= uint5korr(extents);
|
UNDO of rows now puts back all part of the row on their original pages and positions
Added variable _dbug_on_ to speed up execution when DBUG is not going to be used
Added --debug-on option to mysqld (to be able to turn of DBUG with --debug-on=0)
Fixed some bugs with 'non_flushable' marking of bitmap pages
Don't use 'non_flushable' marking of bitmap pages for not transactional tables
SHOW CREATE TABLE now shows if table was created with page checksums
Fixed a lot of bugs with BLOB handling in case of update/REDO and UNDO
More tests (especially for blobs) and DBUG_ASSERTS()
More readable output from maria_read_log and maria_chk
Fixed wrong shift that caused Maria to crash on files > 4G
Mark tables as crashed of REDO fails
dbug/dbug.c:
Changed to use my_bool (allowed me to remove some windows specific code)
Added variable _dbug_on_ to speed up execution when DBUG is not going to be used
Removed initialization of variables if not needed
include/my_dbug.h:
Use my_bool for some functions that was defined as BOOLEAN in dbug.c code
Added DBUGGER_ON/DEBUGGER_OFF to speed up execution when DBUG is not used
include/my_global.h:
Define my_bool early
Increase MY_HOW_OFTEN_TO_WRITE as computers are now faster than 10 years ago
mysql-test/mysql-test-run.pl:
Added debug-on=0 to speed up tests
mysql-test/r/maria-recovery.result:
Added new test by Guilhem to test if UNDO_ROW_DELETE preserves rowid
mysql-test/r/maria.result:
Added testing of page checksums
mysql-test/t/crash_commit_before-master.opt:
Added --debug-on as test require DBUG to work
mysql-test/t/maria-recovery-bitmap-master.opt:
Added --debug-on as test require DBUG to work
mysql-test/t/maria-recovery-master.opt:
Added --debug-on as test require DBUG to work
mysql-test/t/maria-recovery.test:
Added new test by Guilhem to test if UNDO_ROW_DELETE preserves rowid
mysql-test/t/maria.test:
Added testing of page checksums
sql/mysqld.cc:
Added --debug-on option (to be able to turn of DBUG with --debug-on=0)
Indentation fixes
Removed end spaces
sql/sql_show.cc:
Allow update_create_info() to inform MySQL if PACK_KEYS, NO_PACK_KEYS, CHECKSUM, PAGE_CHECKSUM or DELAY_KEY_WRITE is used
storage/maria/Makefile.am:
Added ma_test_big.sh
storage/maria/ha_maria.cc:
Store in create_info if page checksums are used (For SHOW CREATE TABLE)
storage/maria/ma_bitmap.c:
Added _ma_bitmap_wait_or_flush() to cause reader of bitmap pages to wait with reading until bitmap is flushed.
Use TAIL_PAGE_COUNT_MARKER for tail pages
Set 'sub_blocks' for and only for the head page or for the first extent of a blob. This is needed for store_extent_info() to be able to set START_EXTENT_BIT's
Don't allocate more than 0x3ffff pages in one extent (We need bit 0x4000 as a START_EXTENT_BIT)
Increase the calculated 'head_length' with the number of bytes used for extents.
Update row->space_on_head_page also in _ma_bitmap_find_new_place()
Make _ma_bitmap_get_page_bits() global. (Needed for UNDO handling)
Changed _ma_bitmap_flushable() to take MARIA_HA instead of MARIA_SHARE.
This was needed to be able to mark the handler if we had a 'non_flushable' call pending or not.
Don't use 'non_flushable' marking of bitmap pages for not transactional tables.
Added BLOCKUSED_USE_ORG_BITMAP handling also for tail pages.
Added more DBUG_ASSERT() to find possible errors in other code
Some code simplications by adding new local variables
storage/maria/ma_blockrec.c:
UNDO of rows now puts back all part of the row on their original pages and positions.
Changed UNDO of DELETE and UNDO of UPDATE to contain information about the original length of data on head block and also extent information
This changes a lot of logic as now an insert of a row on a page may happen to any position (and not just to the first or next free)
Use PAGE_COUNT to mark if an extent is the start of of a blob. (Needed for extent_to_bitmap_blocks())
Added check_directory() for checking that directroy entries are correct.
Added checking of row checksums when reading rows (with EXTRA_DEBUG)
Added make_space_for_directory() and extend_directory() for doing expansion of directory
Added get_rowpos_in_head_or_tail_page() to be able to store head/tail on original position in UNDO
Added extent_to_bitmap_blocks() to be able to generate original bitmap blocks from UNDO entry
Added _ma_update_at_original_place() for UNDO of DELETES
Added row->min_length to hold minmum required space needed on head page
Changed find_free_position() to use make_space_for_directory()
Changed make_empty_page() to allow optional creation of directory entry
Changed delete_head_or_tail() and _ma_apply_undo_row_isnert() to not copy pagecache block (speed optimization)
Changed _ma_apply_redo_insert_row_head_or_tail() to be able to insert new row at any position on 'new' page
Changed _ma_apply_undo_row_delete() and _ma_apply_undo_row_update() to put row in it's original position
Ensure allocation of tail blocks are of at least MIN_TAIL_SIZE.
Ensure we store pages in pinned pages even if read failed. (If not we will have pages pinned forever in page cache)
Write original extent information in UNDO entry, not compacted ones (we need position to tails!)
When setting BLOCKUSED_USED, don't clear other bits (we have to preserve BLOCKUSED_USE_ORG_BITMAP)
Fixed som bugs in directory handling
Fixed bug where we wrote wrong lsn to blob pages
Added separate blob_buffer for fixing bug when updating row that had char/varchar that spanned several pages and also had blobs
Ensure we call _ma_bitmap_flushable() also in case of errors
When doing an update, first delete old entries, then search in bitmap for where to put new information
Info->s -> share
Rowid -> rowid
More DBUG_ASSERT()
storage/maria/ma_blockrec.h:
Added START_EXTENT_BIT and TAIL_PAGE_COUNT_MARKER
Added _ma_bitmap_wait_or_flush() and _ma_bitmap_get_page_bits()
storage/maria/ma_check.c:
Don't write extra empty line if there is no deleted blocks
Ignore START_EXTENT_BIT's in page count
Call _ma_fast_unlock_key_del() to free key_del link
storage/maria/ma_close.c:
Ensure that used_key_del is 0. (If not, someone forgot to call _ma_unlock_key_del())
storage/maria/ma_create.c:
Changed constant to macro
storage/maria/ma_delete.c:
For deleted keys, log also position to row
storage/maria/ma_extra.c:
Release blob buffer at maria_reset() if bigger than MARIA_SMALL_BLOB_BUFFER
storage/maria/ma_key_recover.c:
Added bzero() of LSN that confused paged cache in case of uninitialized block
Mark file crashed if applying of index changes fails
Added calls to _ma_fast_unlock_key_del() for protection of shared key_del link.
storage/maria/ma_locking.c:
Added usage of MARIA_FILE_OPEN_COUNT_OFFSET
Added _ma_mark_file_crashed()
storage/maria/ma_loghandler.c:
Fixed bug where we logged uninitialized memory
storage/maria/ma_open.c:
Moved state->changed to be at start of state info on disk to allow one to easly mark files as crashed
storage/maria/ma_page.c:
Disable 'dummy' checksumming of pages as this gave false warnings.
(Need to investigate if this is ever needed)
storage/maria/ma_pagecache.c:
Fixed wrong shift that caused Maria to crash on files > 4G
storage/maria/ma_recovery.c:
In case of errors, start writing on new line if we where in %## %## printing mode (Made errors more readable)
Changed global variable name from warnings -> recovery_warnings
Use MARIA_FILE_CREATE_RENAME_LSN_OFFSET instead of constant
Removed special handling of row position for deleted keys. Keys now always includes row positions
_ma_apply_undo_row_delete() now gets page and row position
Added check that we don't loop forever when handling undo's (in case of bug in undo chain)
Print name of failed REDO/UNDO
storage/maria/ma_recovery.h:
Removed old comment
storage/maria/ma_static.c:
Chaned version number of Maria files to not accidently use old ones (becasue of change of ordering of status variables)
storage/maria/ma_test2.c:
Added option -u to specify number of rows to update
Changed old option -u to be -A, as for ma_test1
Fixed bug in update of rows with blobs (before blobs was always reset to empty on update)
First created blob is now of max blob length to ensure we have at least one big blob in the table
storage/maria/ma_test_all.sh:
More tests
storage/maria/ma_test_recovery.expected:
Updated results
storage/maria/ma_test_recovery:
Changed tests to use bigger blobs (not just 1K)
Added new tests that tests recovery of update with blobs
Removed comparision of .MAD file as it's not guranteed that recovery from scratch gives identical data file as original update
(compact_page() may be called at different times during normal execution and during REDO)
storage/maria/ma_update.c:
Simplify code (changed * to if)
storage/maria/maria_chk.c:
Make output more readable
storage/maria/maria_def.h:
Changed 'changed' to int to prepare for more bits
Added 2 more bytes to status information
Added 'st_mara_row->min_length' for storing min length needed on head page
Added 'st_mara_handler->blob_buff & blob_buff_size' for storing blobs
Moved all tunning parameters into one block
Added MARIA_SMALL_BLOB_BUFFER
Added _ma_mark_file_crashed()
storage/myisam/mi_test2.c:
Fixed bug in update of rows with blobs (before blobs was always reset to empty on update)
storage/maria/ma_test_big.sh:
Testing of insert, update, delete, recovery and undo of rows with blobs
Thanks to the random-ness of ma_test2 this is likely to find most bugs in the row handling
2007-12-30 21:40:03 +01:00
|
|
|
uint page_count= (uint2korr(extents + ROW_EXTENT_PAGE_SIZE) &
|
|
|
|
~START_EXTENT_BIT);
|
2007-01-18 20:38:14 +01:00
|
|
|
if (!(page_count & TAIL_BIT))
|
|
|
|
{
|
2007-10-19 23:24:22 +02:00
|
|
|
if (page == 0 && page_count == 0)
|
|
|
|
continue; /* Not used extent */
|
This patch is a collection of patches from from Sanja, Sergei and Monty.
Added logging and pinning of pages to block format.
Integration of transaction manager, log handler.
Better page cache intergration
Split trnman.h into two files, so that we don't have to include my_atomic.h into C++ programs.
Renaming of structures, more comments, more debugging etc.
Fixed problem with small head block + long varchar.
Added extra argument to delete_record() and update_record() (needed for UNDO logging)
Small changes to interface of pagecache and log handler.
Change initialization of log_record_type_descriptors to not be depending on enum order.
Use array of LEX_STRING's to send data to log handler
Added 'dummy' transaction option to MARIA_INFO so that we can always assume 'trn' exists.
include/lf.h:
Interface fixes
Rename of structures
(Patch from Sergei via Sanja)
include/my_atomic.h:
More comments
include/my_global.h:
Added MY_ERRPTR
include/pagecache.h:
Added undo LSN when unlocking pages
mysql-test/r/maria.result:
Updated results
mysql-test/t/maria.test:
Added autocommit around lock tables
(Patch from Sanja)
mysys/lf_alloc-pin.c:
Post-review fixes, simple optimizations
More comments
Struct slot renames
Check amount of memory on stack
(Patch from Sergei)
mysys/lf_dynarray.c:
More comments
mysys/lf_hash.c:
More comments
After review fixes
(Patch from Sergei)
storage/maria/ha_maria.cc:
Split trnman.h into two files, so that we don't have to include my_atomic.h into the .cc program.
(Temporary fix to avoid bug in gcc)
Move out all deferencing of the transaction structure.
Transaction manager integrated (Patch from Sergei)
storage/maria/ha_maria.h:
Added prototype for start_stmt()
storage/maria/lockman.c:
Function call rename
storage/maria/ma_bitmap.c:
Mark deleted pages free from page cache
storage/maria/ma_blockrec.c:
Offset -> rownr
More debugging
Fixed problem with small head block + long varchar
Added logging of changed pages
Added logging of undo (Including only loggging of changed fields in case of update)
Added pinning/unpinning of all changed pages
More comments
Added free_full_pages() as the same code was used in several places.
fill_rows_parts() renamed as fill_insert_undo_parts()
offset -> rownr
Added some optimization of not transactional tables
_ma_update_block_record() has new parameter, as we need original row to do efficent undo for update
storage/maria/ma_blockrec.h:
Added ROW_EXTENTS_ON_STACK
Changed prototype for update and delete of row
storage/maria/ma_check.c:
Added original row to delete_record() call
storage/maria/ma_control_file.h:
Added ifdefs for C++
storage/maria/ma_delete.c:
Added original row to delete_record() call
(Needed for efficent undo logging)
storage/maria/ma_dynrec.c:
Added extra argument to delete_record() and update_record()
Removed not used variable
storage/maria/ma_init.c:
Initialize log handler
storage/maria/ma_loghandler.c:
Removed not used variable
Change initialization of log_record_type_descriptors to not be depending on enum order
Use array of LEX_STRING's to send data to log handler
storage/maria/ma_loghandler.h:
New defines
Use array of LEX_STRING's to send data to log handler
storage/maria/ma_open.c:
Added 'dummy' transaction option to MARIA_INFO so that we can always assume 'trn' exists.
Store in MARIA_SHARE->page_type if pages will have up to date LSN's
storage/maria/ma_pagecache.c:
Don't decrease number of readers when using pagecache_write()/pagecache_read()
In pagecache_write() decrement request count if page was left pinned
Added pagecache_delete_pages()
Removed some casts
Make trace output consistent with rest of code
Simplify calling of DBUG_ASSERT(0)
Only update LSN if the LSN is bigger than what's already on the page
Added LSN parameter pagecache_unpin_page(), pagecache_unpin(), and pagecache_unlock()
(Part of patch from Sanja)
storage/maria/ma_static.c:
Added 'dummy' transaction option to MARIA_INFO so that we can always assume 'trn' exists.
Added default page cache
storage/maria/ma_statrec.c:
Added extra argument to delete_record() and update_record()
storage/maria/ma_test1.c:
Added option -T for transactions
storage/maria/ma_test2.c:
Added option -T for transactions
storage/maria/ma_test_all.sh:
Test with transactions
storage/maria/ma_update.c:
Changed prototype for update of row
storage/maria/maria_def.h:
Changed prototype for update & delete of row as block records need to access the old row
Store in MARIA_SHARE->page_type if pages will have up to date LSN's
Added MARIA_MAX_TREE_LEVELS to allow us to calculate the number of possible pinned pages we may need.
Removed not used 'empty_bits_buffer'
Added pointer to transaction object
Added array for pinned pages
Added log_row_parts array for logging of field data.
Added MARIA_PINNED_PAGE to store pinned pages
storage/maria/trnman.c:
Added accessor functions to transaction object
Added missing DBUG_RETURN()
More debugging
More comments
Changed // comment of code to #ifdef NOT_USED
Transaction manager integrated.
Post review fixes
Part of patch originally from Sergei
storage/maria/trnman.h:
Split trnman.h into two files, so that we don't have to include my_atomic.h into the .cc program.
(Temporary fix to avoid bug in gcc)
storage/maria/unittest/ma_pagecache_single.c:
Added missing argument
Added SKIP_BIG_TESTS
(Patch from Sanja)
storage/maria/unittest/ma_test_loghandler-t.c:
Test logging with new LEX_STRING parameter
(Patch from Sanja)
storage/maria/unittest/ma_test_loghandler_multigroup-t.c:
Test logging with new LEX_STRING parameter
(Patch from Sanja)
storage/maria/unittest/ma_test_loghandler_multithread-t.c:
Test logging with new LEX_STRING parameter
(Patch from Sanja)
storage/maria/unittest/ma_test_loghandler_pagecache-t.c:
Test logging with new LEX_STRING parameter
(Patch from Sanja)
storage/maria/unittest/trnman-t.c:
Stack overflow detection
(Patch from Sergei)
unittest/unit.pl:
Command-line options --big and --verbose
(Patch from Sergei)
unittest/mytap/tap.c:
Detect --big
(Patch from Sergei)
unittest/mytap/tap.h:
Skip_big_tests and SKIP_BIG_TESTS
(Patch from Sergei)
storage/maria/trnman_public.h:
New BitKeeper file ``storage/maria/trnman_public.h''
2007-05-29 19:13:56 +02:00
|
|
|
if (pagecache_delete_pages(info->s->pagecache, &info->dfile, page,
|
2011-02-10 19:33:51 +01:00
|
|
|
page_count, PAGECACHE_LOCK_WRITE, 1))
|
|
|
|
DBUG_RETURN(1);
|
2011-10-19 21:45:18 +02:00
|
|
|
mysql_mutex_lock(&bitmap->bitmap_lock);
|
2011-02-10 19:33:51 +01:00
|
|
|
res= _ma_bitmap_reset_full_page_bits(info, bitmap, page, page_count);
|
2011-10-19 21:45:18 +02:00
|
|
|
mysql_mutex_unlock(&bitmap->bitmap_lock);
|
2011-02-10 19:33:51 +01:00
|
|
|
if (res)
|
2007-01-18 20:38:14 +01:00
|
|
|
DBUG_RETURN(1);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
DBUG_RETURN(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2007-04-19 12:18:56 +02:00
|
|
|
/*
|
|
|
|
Mark in the bitmap how much free space there is on a page
|
|
|
|
|
|
|
|
SYNOPSIS
|
|
|
|
_ma_bitmap_set()
|
WL#3072 Maria recovery:
fix for bug: if a crash happened right after writing a REDO like this:
REDO - UNDO - REDO*, then recovery would ignore the last REDO* (ok),
rollback: REDO - UNDO - REDO* - REDO - CLR, and a next recovery would
thus execute REDO* instead of skipping it again. Recovery now logs
LOGREC_INCOMPLETE_GROUP when it meets REDO* for the first time,
to draw a boundary and ensure it is always skipped. Tested by hand.
Note: ma_test_all fails "maria_chk: error: Key 1 - Found too many records"
not due to this patch (failed before).
BitKeeper/triggers/post-commit:
no truncation of the commit mail, or how to review patches?
mysql-test/include/maria_verify_recovery.inc:
let caller choose the statement used to crash (sometimes we
want the crash to happen at special places)
mysql-test/t/maria-recovery.test:
user of maria_verify_recovery.inc now specifies statement which the
script should use for crashing.
storage/maria/ma_bitmap.c:
it's easier to search for all places using functions from the bitmap
module (like in ma_blockrec.c) if those exported functions all start
with "_ma_bitmap": renaming some of them.
Assertion that when we read a bitmap page, overwriting bitmap->map,
we are not losing information (i.e. bitmap->changed is false).
storage/maria/ma_blockrec.c:
update to new names. Adding code (disabled, protected by a #ifdef)
that I use to test certain crash scenarios (more to come).
storage/maria/ma_blockrec.h:
update to new names
storage/maria/ma_checkpoint.c:
update to new names
storage/maria/ma_extra.c:
update to new names
storage/maria/ma_loghandler.c:
new LOGREC_INCOMPLETE_GROUP
storage/maria/ma_loghandler.h:
new LOGREC_INCOMPLETE_GROUP
storage/maria/ma_recovery.c:
When at the end of the REDO phase we have identified some transactions
with incomplete REDO groups (REDOs without an UNDO or CLR_END),
for each of them we log LOGREC_INCOMPLETE_GROUP. This way, the
upcoming UNDO phase can write more records for such transaction,
a future recovery won't pair the incomplete group with the
CLR_END (as there is LOGREC_INCOMPLETE_GROUP to draw a boundary).
2007-12-10 23:26:53 +01:00
|
|
|
info Maria handler
|
2007-04-19 12:18:56 +02:00
|
|
|
page Adress to page
|
|
|
|
head 1 if page is a head page, 0 if tail page
|
|
|
|
empty_space How much empty space there is on page
|
|
|
|
|
|
|
|
RETURN
|
|
|
|
0 ok
|
|
|
|
1 error
|
|
|
|
*/
|
2007-01-18 20:38:14 +01:00
|
|
|
|
2008-01-10 20:21:36 +01:00
|
|
|
my_bool _ma_bitmap_set(MARIA_HA *info, pgcache_page_no_t page, my_bool head,
|
2007-01-18 20:38:14 +01:00
|
|
|
uint empty_space)
|
|
|
|
{
|
|
|
|
MARIA_FILE_BITMAP *bitmap= &info->s->bitmap;
|
|
|
|
uint bits;
|
|
|
|
my_bool res;
|
|
|
|
DBUG_ENTER("_ma_bitmap_set");
|
2010-08-10 23:58:08 +02:00
|
|
|
DBUG_PRINT("enter", ("page: %lu head: %d empty_space: %u",
|
|
|
|
(ulong) page, head, empty_space));
|
2007-01-18 20:38:14 +01:00
|
|
|
|
2011-07-13 21:10:18 +02:00
|
|
|
mysql_mutex_lock(&info->s->bitmap.bitmap_lock);
|
2007-01-18 20:38:14 +01:00
|
|
|
bits= (head ?
|
|
|
|
_ma_free_size_to_head_pattern(bitmap, empty_space) :
|
|
|
|
free_size_to_tail_pattern(bitmap, empty_space));
|
2007-04-19 12:18:56 +02:00
|
|
|
res= set_page_bits(info, bitmap, page, bits);
|
2011-07-13 21:10:18 +02:00
|
|
|
mysql_mutex_unlock(&info->s->bitmap.bitmap_lock);
|
2007-01-18 20:38:14 +01:00
|
|
|
DBUG_RETURN(res);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
Check that bitmap pattern is correct for a page
|
|
|
|
|
|
|
|
NOTES
|
|
|
|
Used in maria_chk
|
|
|
|
|
2007-04-19 12:18:56 +02:00
|
|
|
SYNOPSIS
|
|
|
|
_ma_check_bitmap_data()
|
|
|
|
info Maria handler
|
|
|
|
page_type What kind of page this is
|
|
|
|
page Adress to page
|
|
|
|
empty_space Empty space on page
|
2011-01-24 14:19:40 +01:00
|
|
|
bitmap_pattern Bitmap pattern for page (from bitmap)
|
2007-04-19 12:18:56 +02:00
|
|
|
|
2007-01-18 20:38:14 +01:00
|
|
|
RETURN
|
|
|
|
0 ok
|
|
|
|
1 error
|
|
|
|
*/
|
|
|
|
|
2011-01-24 14:19:40 +01:00
|
|
|
my_bool _ma_check_bitmap_data(MARIA_HA *info, enum en_page_type page_type,
|
|
|
|
uint empty_space, uint bitmap_pattern)
|
2007-01-18 20:38:14 +01:00
|
|
|
{
|
|
|
|
uint bits;
|
|
|
|
switch (page_type) {
|
|
|
|
case UNALLOCATED_PAGE:
|
|
|
|
case MAX_PAGE_TYPE:
|
|
|
|
bits= 0;
|
|
|
|
break;
|
|
|
|
case HEAD_PAGE:
|
|
|
|
bits= _ma_free_size_to_head_pattern(&info->s->bitmap, empty_space);
|
|
|
|
break;
|
|
|
|
case TAIL_PAGE:
|
|
|
|
bits= free_size_to_tail_pattern(&info->s->bitmap, empty_space);
|
|
|
|
break;
|
|
|
|
case BLOB_PAGE:
|
|
|
|
bits= FULL_TAIL_PAGE;
|
|
|
|
break;
|
2007-10-11 17:45:42 +02:00
|
|
|
default:
|
|
|
|
bits= 0; /* to satisfy compiler */
|
|
|
|
DBUG_ASSERT(0);
|
2007-01-18 20:38:14 +01:00
|
|
|
}
|
2011-01-24 14:19:40 +01:00
|
|
|
return (bitmap_pattern != bits);
|
2007-01-18 20:38:14 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
2007-04-19 12:18:56 +02:00
|
|
|
Check if the page type matches the one that we have in the bitmap
|
|
|
|
|
|
|
|
SYNOPSIS
|
|
|
|
_ma_check_if_right_bitmap_type()
|
|
|
|
info Maria handler
|
|
|
|
page_type What kind of page this is
|
|
|
|
page Adress to page
|
|
|
|
bitmap_pattern Store here the pattern that was in the bitmap for the
|
|
|
|
page. This is always updated.
|
2007-01-18 20:38:14 +01:00
|
|
|
|
|
|
|
NOTES
|
|
|
|
Used in maria_chk
|
|
|
|
|
|
|
|
RETURN
|
|
|
|
0 ok
|
|
|
|
1 error
|
|
|
|
*/
|
|
|
|
|
|
|
|
my_bool _ma_check_if_right_bitmap_type(MARIA_HA *info,
|
|
|
|
enum en_page_type page_type,
|
2008-01-10 20:21:36 +01:00
|
|
|
pgcache_page_no_t page,
|
2007-01-18 20:38:14 +01:00
|
|
|
uint *bitmap_pattern)
|
|
|
|
{
|
UNDO of rows now puts back all part of the row on their original pages and positions
Added variable _dbug_on_ to speed up execution when DBUG is not going to be used
Added --debug-on option to mysqld (to be able to turn of DBUG with --debug-on=0)
Fixed some bugs with 'non_flushable' marking of bitmap pages
Don't use 'non_flushable' marking of bitmap pages for not transactional tables
SHOW CREATE TABLE now shows if table was created with page checksums
Fixed a lot of bugs with BLOB handling in case of update/REDO and UNDO
More tests (especially for blobs) and DBUG_ASSERTS()
More readable output from maria_read_log and maria_chk
Fixed wrong shift that caused Maria to crash on files > 4G
Mark tables as crashed of REDO fails
dbug/dbug.c:
Changed to use my_bool (allowed me to remove some windows specific code)
Added variable _dbug_on_ to speed up execution when DBUG is not going to be used
Removed initialization of variables if not needed
include/my_dbug.h:
Use my_bool for some functions that was defined as BOOLEAN in dbug.c code
Added DBUGGER_ON/DEBUGGER_OFF to speed up execution when DBUG is not used
include/my_global.h:
Define my_bool early
Increase MY_HOW_OFTEN_TO_WRITE as computers are now faster than 10 years ago
mysql-test/mysql-test-run.pl:
Added debug-on=0 to speed up tests
mysql-test/r/maria-recovery.result:
Added new test by Guilhem to test if UNDO_ROW_DELETE preserves rowid
mysql-test/r/maria.result:
Added testing of page checksums
mysql-test/t/crash_commit_before-master.opt:
Added --debug-on as test require DBUG to work
mysql-test/t/maria-recovery-bitmap-master.opt:
Added --debug-on as test require DBUG to work
mysql-test/t/maria-recovery-master.opt:
Added --debug-on as test require DBUG to work
mysql-test/t/maria-recovery.test:
Added new test by Guilhem to test if UNDO_ROW_DELETE preserves rowid
mysql-test/t/maria.test:
Added testing of page checksums
sql/mysqld.cc:
Added --debug-on option (to be able to turn of DBUG with --debug-on=0)
Indentation fixes
Removed end spaces
sql/sql_show.cc:
Allow update_create_info() to inform MySQL if PACK_KEYS, NO_PACK_KEYS, CHECKSUM, PAGE_CHECKSUM or DELAY_KEY_WRITE is used
storage/maria/Makefile.am:
Added ma_test_big.sh
storage/maria/ha_maria.cc:
Store in create_info if page checksums are used (For SHOW CREATE TABLE)
storage/maria/ma_bitmap.c:
Added _ma_bitmap_wait_or_flush() to cause reader of bitmap pages to wait with reading until bitmap is flushed.
Use TAIL_PAGE_COUNT_MARKER for tail pages
Set 'sub_blocks' for and only for the head page or for the first extent of a blob. This is needed for store_extent_info() to be able to set START_EXTENT_BIT's
Don't allocate more than 0x3ffff pages in one extent (We need bit 0x4000 as a START_EXTENT_BIT)
Increase the calculated 'head_length' with the number of bytes used for extents.
Update row->space_on_head_page also in _ma_bitmap_find_new_place()
Make _ma_bitmap_get_page_bits() global. (Needed for UNDO handling)
Changed _ma_bitmap_flushable() to take MARIA_HA instead of MARIA_SHARE.
This was needed to be able to mark the handler if we had a 'non_flushable' call pending or not.
Don't use 'non_flushable' marking of bitmap pages for not transactional tables.
Added BLOCKUSED_USE_ORG_BITMAP handling also for tail pages.
Added more DBUG_ASSERT() to find possible errors in other code
Some code simplications by adding new local variables
storage/maria/ma_blockrec.c:
UNDO of rows now puts back all part of the row on their original pages and positions.
Changed UNDO of DELETE and UNDO of UPDATE to contain information about the original length of data on head block and also extent information
This changes a lot of logic as now an insert of a row on a page may happen to any position (and not just to the first or next free)
Use PAGE_COUNT to mark if an extent is the start of of a blob. (Needed for extent_to_bitmap_blocks())
Added check_directory() for checking that directroy entries are correct.
Added checking of row checksums when reading rows (with EXTRA_DEBUG)
Added make_space_for_directory() and extend_directory() for doing expansion of directory
Added get_rowpos_in_head_or_tail_page() to be able to store head/tail on original position in UNDO
Added extent_to_bitmap_blocks() to be able to generate original bitmap blocks from UNDO entry
Added _ma_update_at_original_place() for UNDO of DELETES
Added row->min_length to hold minmum required space needed on head page
Changed find_free_position() to use make_space_for_directory()
Changed make_empty_page() to allow optional creation of directory entry
Changed delete_head_or_tail() and _ma_apply_undo_row_isnert() to not copy pagecache block (speed optimization)
Changed _ma_apply_redo_insert_row_head_or_tail() to be able to insert new row at any position on 'new' page
Changed _ma_apply_undo_row_delete() and _ma_apply_undo_row_update() to put row in it's original position
Ensure allocation of tail blocks are of at least MIN_TAIL_SIZE.
Ensure we store pages in pinned pages even if read failed. (If not we will have pages pinned forever in page cache)
Write original extent information in UNDO entry, not compacted ones (we need position to tails!)
When setting BLOCKUSED_USED, don't clear other bits (we have to preserve BLOCKUSED_USE_ORG_BITMAP)
Fixed som bugs in directory handling
Fixed bug where we wrote wrong lsn to blob pages
Added separate blob_buffer for fixing bug when updating row that had char/varchar that spanned several pages and also had blobs
Ensure we call _ma_bitmap_flushable() also in case of errors
When doing an update, first delete old entries, then search in bitmap for where to put new information
Info->s -> share
Rowid -> rowid
More DBUG_ASSERT()
storage/maria/ma_blockrec.h:
Added START_EXTENT_BIT and TAIL_PAGE_COUNT_MARKER
Added _ma_bitmap_wait_or_flush() and _ma_bitmap_get_page_bits()
storage/maria/ma_check.c:
Don't write extra empty line if there is no deleted blocks
Ignore START_EXTENT_BIT's in page count
Call _ma_fast_unlock_key_del() to free key_del link
storage/maria/ma_close.c:
Ensure that used_key_del is 0. (If not, someone forgot to call _ma_unlock_key_del())
storage/maria/ma_create.c:
Changed constant to macro
storage/maria/ma_delete.c:
For deleted keys, log also position to row
storage/maria/ma_extra.c:
Release blob buffer at maria_reset() if bigger than MARIA_SMALL_BLOB_BUFFER
storage/maria/ma_key_recover.c:
Added bzero() of LSN that confused paged cache in case of uninitialized block
Mark file crashed if applying of index changes fails
Added calls to _ma_fast_unlock_key_del() for protection of shared key_del link.
storage/maria/ma_locking.c:
Added usage of MARIA_FILE_OPEN_COUNT_OFFSET
Added _ma_mark_file_crashed()
storage/maria/ma_loghandler.c:
Fixed bug where we logged uninitialized memory
storage/maria/ma_open.c:
Moved state->changed to be at start of state info on disk to allow one to easly mark files as crashed
storage/maria/ma_page.c:
Disable 'dummy' checksumming of pages as this gave false warnings.
(Need to investigate if this is ever needed)
storage/maria/ma_pagecache.c:
Fixed wrong shift that caused Maria to crash on files > 4G
storage/maria/ma_recovery.c:
In case of errors, start writing on new line if we where in %## %## printing mode (Made errors more readable)
Changed global variable name from warnings -> recovery_warnings
Use MARIA_FILE_CREATE_RENAME_LSN_OFFSET instead of constant
Removed special handling of row position for deleted keys. Keys now always includes row positions
_ma_apply_undo_row_delete() now gets page and row position
Added check that we don't loop forever when handling undo's (in case of bug in undo chain)
Print name of failed REDO/UNDO
storage/maria/ma_recovery.h:
Removed old comment
storage/maria/ma_static.c:
Chaned version number of Maria files to not accidently use old ones (becasue of change of ordering of status variables)
storage/maria/ma_test2.c:
Added option -u to specify number of rows to update
Changed old option -u to be -A, as for ma_test1
Fixed bug in update of rows with blobs (before blobs was always reset to empty on update)
First created blob is now of max blob length to ensure we have at least one big blob in the table
storage/maria/ma_test_all.sh:
More tests
storage/maria/ma_test_recovery.expected:
Updated results
storage/maria/ma_test_recovery:
Changed tests to use bigger blobs (not just 1K)
Added new tests that tests recovery of update with blobs
Removed comparision of .MAD file as it's not guranteed that recovery from scratch gives identical data file as original update
(compact_page() may be called at different times during normal execution and during REDO)
storage/maria/ma_update.c:
Simplify code (changed * to if)
storage/maria/maria_chk.c:
Make output more readable
storage/maria/maria_def.h:
Changed 'changed' to int to prepare for more bits
Added 2 more bytes to status information
Added 'st_mara_row->min_length' for storing min length needed on head page
Added 'st_mara_handler->blob_buff & blob_buff_size' for storing blobs
Moved all tunning parameters into one block
Added MARIA_SMALL_BLOB_BUFFER
Added _ma_mark_file_crashed()
storage/myisam/mi_test2.c:
Fixed bug in update of rows with blobs (before blobs was always reset to empty on update)
storage/maria/ma_test_big.sh:
Testing of insert, update, delete, recovery and undo of rows with blobs
Thanks to the random-ness of ma_test2 this is likely to find most bugs in the row handling
2007-12-30 21:40:03 +01:00
|
|
|
if ((*bitmap_pattern= _ma_bitmap_get_page_bits(info, &info->s->bitmap,
|
|
|
|
page)) > 7)
|
2007-01-18 20:38:14 +01:00
|
|
|
return 1; /* Couldn't read page */
|
|
|
|
switch (page_type) {
|
|
|
|
case HEAD_PAGE:
|
|
|
|
return *bitmap_pattern < 1 || *bitmap_pattern > 4;
|
|
|
|
case TAIL_PAGE:
|
|
|
|
return *bitmap_pattern < 5;
|
|
|
|
case BLOB_PAGE:
|
|
|
|
return *bitmap_pattern != 7;
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
DBUG_ASSERT(0);
|
|
|
|
return 1;
|
|
|
|
}
|
WL#3072 - Maria recovery
Unit test for recovery: runs ma_test1 and ma_test2 (both only with
INSERTs and DELETEs; UPDATEs disabled as not handled by recovery)
then moves the tables elswhere; recreates tables from the log, and
compares and fails if there is a difference. Passes now.
Most of maria_read_log.c moved to ma_recovery.c, as it will be re-used
for recovery-from-ha_maria.
Bugfixes of applying of REDO_INSERT, REDO_PURGE_ROW.
Applying of REDO_PURGE_BLOCKS, REDO_DELETE_ALL, REDO_DROP_TABLE,
UNDO_ROW_INSERT (in REDO phase only, i.e. just doing records++),
UNDO_ROW_DELETE, UNDO_ROW_PURGE.
Code cleanups.
Monty: please look for "QQ". Sanja: please look for "Sanja".
Future tasks: recovery of the bitmap (easy), recovery of the state
(make it idempotent), more REDOs (Monty to work on
REDO_UPDATE?), UNDO phase...
Pushing this cset as it looks safe, contains test and bugfixes which
will help Monty implement applying of REDO_UPDATE.
sql/handler.cc:
typo
storage/maria/Makefile.am:
Adding ma_test_recovery (which ma_test_all invokes, and which can
also be run alone). Most of maria_read_log.c moved to ma_recovery.c
storage/maria/ha_maria.cc:
comments
storage/maria/ma_bitmap.c:
fixing comments. 2 -> sizeof(maria_bitmap_marker).
Bitmap-related part of _ma_initialize_datafile() moves in bitmap module.
Now putting the "bm" signature when creating the first bitmap page
(it used to happen only at next open, but that
caused an annoying difference when testing Recovery if the original
run didn't open the table, and it looks more
logical like this: it goes to disk only with its signature correct);
see the "QQ" comment towards the _ma_initialize_data_file() call
in ma_create.c for more).
When reading a bitmap page, verify its signature (happens when normally
using the table or when CHECKing it; not when REPAIRing it).
storage/maria/ma_blockrec.c:
* no need to sync the data file if table is not transactional
* Comments, code cleanup (log-related data moved to log-related code
block, int5store->page_store).
* Store the table's short id into LOGREC_UNDO_ROW_PURGE, like we
do for other records (though this record will soon be replaced
with a CLR).
* If "page" is 1 it means the page which extends from byte
page*block_size+1 to (page+1)*block_size (byte number 1 being
the first byte of the file). The last byte of the file is
data_file_length (same convention).
A new page needs to be created if the last byte of the page is
beyond the last byte of the file, i.e.
(page+1)*block_size+1 > data_file_length, so we correct the test
(bug found when testing log applying for ma_test1 -M -T --skip-update).
* update the page's LSN when removing a row from it during
execution of a REDO_PURGE_ROW record (bug found when testing log
applying for ma_test1 -M -T --skip-update).
* applying of REDO_PURGE_BLOCKs (limited to a one-page range for now).
storage/maria/ma_blockrec.h:
new functions. maria_bitmap_marker does not need to be exported.
storage/maria/ma_close.c:
we can always flush the table's state when closing the last instance
of the table. And it is needed for maria_read_log (as it does
not use maria_lock_database()).
storage/maria/ma_control_file.c:
when in Recovery, some assertions should not be used.
storage/maria/ma_control_file.h:
double-inclusion safe
storage/maria/ma_create.c:
during recovery, don't log records. Comments.
Moving the creation of the first bitmap page to ma_bitmap.c
storage/maria/ma_delete_table.c:
during recovery, don't log records. Log the end-zero of the dropped
table's name, so that recovery can use the string in place without
extending it to fit an end zero.
storage/maria/ma_loghandler.c:
* inwrite_rec_hook also needs access to the MARIA_SHARE, like
prewrite_rec_hook. This will be needed to update
share->records_diff (in the upcoming patch "recovery of the state").
* LOG_DESC::record_ends_group changed to an enum.
* LOG_DESC for LOGREC_REDO_PURGE_BLOCKS and LOGREC_UNDO_ROW_PURGE
corrected
* Sanja please see the @todo LOG BUG
* avoiding DBUG_RETURN(func()) as it gives confusing debug traces.
storage/maria/ma_loghandler.h:
- log write hooks called while the log's lock is held (inwrite_rec_hook)
now need the MARIA_SHARE, like prewrite_rec_hook already had
- instead of a bool saying if this record's type ends groups or not,
we refine: it may not end a group, it may end a group, or it may
be a group in itself. Imagine that we had a physical write failure
to a table before we log the UNDO, we still end up in
external_lock(F_UNLCK) and then we log a COMMIT: we don't want
to consider this COMMIT as ending the group of REDOs (don't want
to execute those REDOs during Recovery), that's why we say "COMMIT
is a group in itself, it aborts any previous group". This also
gives one more sanity check in maria_read_log.
storage/maria/ma_recovery.c:
New Recovery code, replacing the old pseudocode.
Most of maria_read_log moved here.
Call-able from ha_maria, but not enabled yet.
Compared to the previous version of maria_read_log, some bugs have
been fixed, debugging output can go to stdout or a disk file (for now
it's useful for me, later it can be changed), execution of
REDO_DROP_TABLE, REDO_DELETE_ALL, REDO_PURGE_BLOCKS has been added. Duplicate code
has been factored into functions. We abort an unfinished group
of records if we see a record which is a group in itself (like COMMIT).
No need for maria_panic() after a bug (which caused tables to not
be closed) was fixed; if there is yet another bug I prefer to see it.
When opening a table for Recovery, set data_file_length
and key_file_length to their real physical value (these are the
easiest state members to restore :). Warn us if the last page
was truncated (but Recovery handles it).
MARIA_SHARE::state::state::records is now partly recovered (not
idempotent, but works if recreating tables from scracth).
When applying a REDO to a page, stamp it with the UNDO's LSN
(current_group_end_lsn), not with the REDO's LSN; it makes
the table more identical to the original table (easier to compare
the two tables in the end).
Big thing missing: some types of REDOs are not handled,
and the UNDO phase does not exist (missing functions to execute UNDOs
to actually rollback). So for now tests are only inserting/deleting
a few 100 rows, closing the table and seeing if the log is applied ok;
it works. UPDATE not handled.
storage/maria/ma_recovery.h:
new functions: ma_recover() for recovery from inside ha_maria;
_ma_apply_log() for maria_read_log (ma_recover() calls _ma_apply_log()).
Btw, we need to not use the word "recover" for REPAIR/maria_chk anymore.
storage/maria/ma_rename.c:
don't write log records during recovery
storage/maria/ma_test2.c:
- fail if maria_info() or other subtests find some wrong information
- new option -g to skip updates.
- init the translog before creating the table, so that log applying
can work.
- in "#if 0" you'll see some fixed bugs (will be removed).
storage/maria/ma_test_all.sh:
cleanup files. Test log applying.
storage/maria/maria_read_log.c:
most of the logic moves to ma_recovery.c to be shared between
maria_read_log and recovery-from-inside-mysqld.
See ma_recovery.c for additional changes made to the moved code.
storage/maria/ma_test_recovery:
unit test for Recovery. Tests insert and delete,
REDO_UPDATE not yet coded.
Script is called from ma_test_all. Can run standalone.
2007-07-26 11:56:21 +02:00
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
@brief create the first bitmap page of a freshly created data file
|
|
|
|
|
|
|
|
@param share table's share
|
|
|
|
|
|
|
|
@return Operation status
|
|
|
|
@retval 0 OK
|
|
|
|
@retval !=0 Error
|
|
|
|
*/
|
|
|
|
|
|
|
|
int _ma_bitmap_create_first(MARIA_SHARE *share)
|
2007-10-19 23:24:22 +02:00
|
|
|
{
|
WL#3072 - Maria recovery
Unit test for recovery: runs ma_test1 and ma_test2 (both only with
INSERTs and DELETEs; UPDATEs disabled as not handled by recovery)
then moves the tables elswhere; recreates tables from the log, and
compares and fails if there is a difference. Passes now.
Most of maria_read_log.c moved to ma_recovery.c, as it will be re-used
for recovery-from-ha_maria.
Bugfixes of applying of REDO_INSERT, REDO_PURGE_ROW.
Applying of REDO_PURGE_BLOCKS, REDO_DELETE_ALL, REDO_DROP_TABLE,
UNDO_ROW_INSERT (in REDO phase only, i.e. just doing records++),
UNDO_ROW_DELETE, UNDO_ROW_PURGE.
Code cleanups.
Monty: please look for "QQ". Sanja: please look for "Sanja".
Future tasks: recovery of the bitmap (easy), recovery of the state
(make it idempotent), more REDOs (Monty to work on
REDO_UPDATE?), UNDO phase...
Pushing this cset as it looks safe, contains test and bugfixes which
will help Monty implement applying of REDO_UPDATE.
sql/handler.cc:
typo
storage/maria/Makefile.am:
Adding ma_test_recovery (which ma_test_all invokes, and which can
also be run alone). Most of maria_read_log.c moved to ma_recovery.c
storage/maria/ha_maria.cc:
comments
storage/maria/ma_bitmap.c:
fixing comments. 2 -> sizeof(maria_bitmap_marker).
Bitmap-related part of _ma_initialize_datafile() moves in bitmap module.
Now putting the "bm" signature when creating the first bitmap page
(it used to happen only at next open, but that
caused an annoying difference when testing Recovery if the original
run didn't open the table, and it looks more
logical like this: it goes to disk only with its signature correct);
see the "QQ" comment towards the _ma_initialize_data_file() call
in ma_create.c for more).
When reading a bitmap page, verify its signature (happens when normally
using the table or when CHECKing it; not when REPAIRing it).
storage/maria/ma_blockrec.c:
* no need to sync the data file if table is not transactional
* Comments, code cleanup (log-related data moved to log-related code
block, int5store->page_store).
* Store the table's short id into LOGREC_UNDO_ROW_PURGE, like we
do for other records (though this record will soon be replaced
with a CLR).
* If "page" is 1 it means the page which extends from byte
page*block_size+1 to (page+1)*block_size (byte number 1 being
the first byte of the file). The last byte of the file is
data_file_length (same convention).
A new page needs to be created if the last byte of the page is
beyond the last byte of the file, i.e.
(page+1)*block_size+1 > data_file_length, so we correct the test
(bug found when testing log applying for ma_test1 -M -T --skip-update).
* update the page's LSN when removing a row from it during
execution of a REDO_PURGE_ROW record (bug found when testing log
applying for ma_test1 -M -T --skip-update).
* applying of REDO_PURGE_BLOCKs (limited to a one-page range for now).
storage/maria/ma_blockrec.h:
new functions. maria_bitmap_marker does not need to be exported.
storage/maria/ma_close.c:
we can always flush the table's state when closing the last instance
of the table. And it is needed for maria_read_log (as it does
not use maria_lock_database()).
storage/maria/ma_control_file.c:
when in Recovery, some assertions should not be used.
storage/maria/ma_control_file.h:
double-inclusion safe
storage/maria/ma_create.c:
during recovery, don't log records. Comments.
Moving the creation of the first bitmap page to ma_bitmap.c
storage/maria/ma_delete_table.c:
during recovery, don't log records. Log the end-zero of the dropped
table's name, so that recovery can use the string in place without
extending it to fit an end zero.
storage/maria/ma_loghandler.c:
* inwrite_rec_hook also needs access to the MARIA_SHARE, like
prewrite_rec_hook. This will be needed to update
share->records_diff (in the upcoming patch "recovery of the state").
* LOG_DESC::record_ends_group changed to an enum.
* LOG_DESC for LOGREC_REDO_PURGE_BLOCKS and LOGREC_UNDO_ROW_PURGE
corrected
* Sanja please see the @todo LOG BUG
* avoiding DBUG_RETURN(func()) as it gives confusing debug traces.
storage/maria/ma_loghandler.h:
- log write hooks called while the log's lock is held (inwrite_rec_hook)
now need the MARIA_SHARE, like prewrite_rec_hook already had
- instead of a bool saying if this record's type ends groups or not,
we refine: it may not end a group, it may end a group, or it may
be a group in itself. Imagine that we had a physical write failure
to a table before we log the UNDO, we still end up in
external_lock(F_UNLCK) and then we log a COMMIT: we don't want
to consider this COMMIT as ending the group of REDOs (don't want
to execute those REDOs during Recovery), that's why we say "COMMIT
is a group in itself, it aborts any previous group". This also
gives one more sanity check in maria_read_log.
storage/maria/ma_recovery.c:
New Recovery code, replacing the old pseudocode.
Most of maria_read_log moved here.
Call-able from ha_maria, but not enabled yet.
Compared to the previous version of maria_read_log, some bugs have
been fixed, debugging output can go to stdout or a disk file (for now
it's useful for me, later it can be changed), execution of
REDO_DROP_TABLE, REDO_DELETE_ALL, REDO_PURGE_BLOCKS has been added. Duplicate code
has been factored into functions. We abort an unfinished group
of records if we see a record which is a group in itself (like COMMIT).
No need for maria_panic() after a bug (which caused tables to not
be closed) was fixed; if there is yet another bug I prefer to see it.
When opening a table for Recovery, set data_file_length
and key_file_length to their real physical value (these are the
easiest state members to restore :). Warn us if the last page
was truncated (but Recovery handles it).
MARIA_SHARE::state::state::records is now partly recovered (not
idempotent, but works if recreating tables from scracth).
When applying a REDO to a page, stamp it with the UNDO's LSN
(current_group_end_lsn), not with the REDO's LSN; it makes
the table more identical to the original table (easier to compare
the two tables in the end).
Big thing missing: some types of REDOs are not handled,
and the UNDO phase does not exist (missing functions to execute UNDOs
to actually rollback). So for now tests are only inserting/deleting
a few 100 rows, closing the table and seeing if the log is applied ok;
it works. UPDATE not handled.
storage/maria/ma_recovery.h:
new functions: ma_recover() for recovery from inside ha_maria;
_ma_apply_log() for maria_read_log (ma_recover() calls _ma_apply_log()).
Btw, we need to not use the word "recover" for REPAIR/maria_chk anymore.
storage/maria/ma_rename.c:
don't write log records during recovery
storage/maria/ma_test2.c:
- fail if maria_info() or other subtests find some wrong information
- new option -g to skip updates.
- init the translog before creating the table, so that log applying
can work.
- in "#if 0" you'll see some fixed bugs (will be removed).
storage/maria/ma_test_all.sh:
cleanup files. Test log applying.
storage/maria/maria_read_log.c:
most of the logic moves to ma_recovery.c to be shared between
maria_read_log and recovery-from-inside-mysqld.
See ma_recovery.c for additional changes made to the moved code.
storage/maria/ma_test_recovery:
unit test for Recovery. Tests insert and delete,
REDO_UPDATE not yet coded.
Script is called from ma_test_all. Can run standalone.
2007-07-26 11:56:21 +02:00
|
|
|
uint block_size= share->bitmap.block_size;
|
|
|
|
File file= share->bitmap.file.file;
|
2008-02-07 21:46:32 +01:00
|
|
|
uchar marker[CRC_SIZE];
|
2007-10-09 20:09:50 +02:00
|
|
|
|
2007-12-15 22:31:22 +01:00
|
|
|
/*
|
|
|
|
Next write operation of the page will write correct CRC
|
|
|
|
if it is needed
|
|
|
|
*/
|
|
|
|
int4store(marker, MARIA_NO_CRC_BITMAP_PAGE);
|
2007-10-09 20:09:50 +02:00
|
|
|
|
2011-07-13 21:10:18 +02:00
|
|
|
if (mysql_file_chsize(file, block_size - sizeof(marker),
|
|
|
|
0, MYF(MY_WME)) ||
|
2007-12-15 22:31:22 +01:00
|
|
|
my_pwrite(file, marker, sizeof(marker),
|
|
|
|
block_size - sizeof(marker),
|
WL#3072 - Maria recovery
Unit test for recovery: runs ma_test1 and ma_test2 (both only with
INSERTs and DELETEs; UPDATEs disabled as not handled by recovery)
then moves the tables elswhere; recreates tables from the log, and
compares and fails if there is a difference. Passes now.
Most of maria_read_log.c moved to ma_recovery.c, as it will be re-used
for recovery-from-ha_maria.
Bugfixes of applying of REDO_INSERT, REDO_PURGE_ROW.
Applying of REDO_PURGE_BLOCKS, REDO_DELETE_ALL, REDO_DROP_TABLE,
UNDO_ROW_INSERT (in REDO phase only, i.e. just doing records++),
UNDO_ROW_DELETE, UNDO_ROW_PURGE.
Code cleanups.
Monty: please look for "QQ". Sanja: please look for "Sanja".
Future tasks: recovery of the bitmap (easy), recovery of the state
(make it idempotent), more REDOs (Monty to work on
REDO_UPDATE?), UNDO phase...
Pushing this cset as it looks safe, contains test and bugfixes which
will help Monty implement applying of REDO_UPDATE.
sql/handler.cc:
typo
storage/maria/Makefile.am:
Adding ma_test_recovery (which ma_test_all invokes, and which can
also be run alone). Most of maria_read_log.c moved to ma_recovery.c
storage/maria/ha_maria.cc:
comments
storage/maria/ma_bitmap.c:
fixing comments. 2 -> sizeof(maria_bitmap_marker).
Bitmap-related part of _ma_initialize_datafile() moves in bitmap module.
Now putting the "bm" signature when creating the first bitmap page
(it used to happen only at next open, but that
caused an annoying difference when testing Recovery if the original
run didn't open the table, and it looks more
logical like this: it goes to disk only with its signature correct);
see the "QQ" comment towards the _ma_initialize_data_file() call
in ma_create.c for more).
When reading a bitmap page, verify its signature (happens when normally
using the table or when CHECKing it; not when REPAIRing it).
storage/maria/ma_blockrec.c:
* no need to sync the data file if table is not transactional
* Comments, code cleanup (log-related data moved to log-related code
block, int5store->page_store).
* Store the table's short id into LOGREC_UNDO_ROW_PURGE, like we
do for other records (though this record will soon be replaced
with a CLR).
* If "page" is 1 it means the page which extends from byte
page*block_size+1 to (page+1)*block_size (byte number 1 being
the first byte of the file). The last byte of the file is
data_file_length (same convention).
A new page needs to be created if the last byte of the page is
beyond the last byte of the file, i.e.
(page+1)*block_size+1 > data_file_length, so we correct the test
(bug found when testing log applying for ma_test1 -M -T --skip-update).
* update the page's LSN when removing a row from it during
execution of a REDO_PURGE_ROW record (bug found when testing log
applying for ma_test1 -M -T --skip-update).
* applying of REDO_PURGE_BLOCKs (limited to a one-page range for now).
storage/maria/ma_blockrec.h:
new functions. maria_bitmap_marker does not need to be exported.
storage/maria/ma_close.c:
we can always flush the table's state when closing the last instance
of the table. And it is needed for maria_read_log (as it does
not use maria_lock_database()).
storage/maria/ma_control_file.c:
when in Recovery, some assertions should not be used.
storage/maria/ma_control_file.h:
double-inclusion safe
storage/maria/ma_create.c:
during recovery, don't log records. Comments.
Moving the creation of the first bitmap page to ma_bitmap.c
storage/maria/ma_delete_table.c:
during recovery, don't log records. Log the end-zero of the dropped
table's name, so that recovery can use the string in place without
extending it to fit an end zero.
storage/maria/ma_loghandler.c:
* inwrite_rec_hook also needs access to the MARIA_SHARE, like
prewrite_rec_hook. This will be needed to update
share->records_diff (in the upcoming patch "recovery of the state").
* LOG_DESC::record_ends_group changed to an enum.
* LOG_DESC for LOGREC_REDO_PURGE_BLOCKS and LOGREC_UNDO_ROW_PURGE
corrected
* Sanja please see the @todo LOG BUG
* avoiding DBUG_RETURN(func()) as it gives confusing debug traces.
storage/maria/ma_loghandler.h:
- log write hooks called while the log's lock is held (inwrite_rec_hook)
now need the MARIA_SHARE, like prewrite_rec_hook already had
- instead of a bool saying if this record's type ends groups or not,
we refine: it may not end a group, it may end a group, or it may
be a group in itself. Imagine that we had a physical write failure
to a table before we log the UNDO, we still end up in
external_lock(F_UNLCK) and then we log a COMMIT: we don't want
to consider this COMMIT as ending the group of REDOs (don't want
to execute those REDOs during Recovery), that's why we say "COMMIT
is a group in itself, it aborts any previous group". This also
gives one more sanity check in maria_read_log.
storage/maria/ma_recovery.c:
New Recovery code, replacing the old pseudocode.
Most of maria_read_log moved here.
Call-able from ha_maria, but not enabled yet.
Compared to the previous version of maria_read_log, some bugs have
been fixed, debugging output can go to stdout or a disk file (for now
it's useful for me, later it can be changed), execution of
REDO_DROP_TABLE, REDO_DELETE_ALL, REDO_PURGE_BLOCKS has been added. Duplicate code
has been factored into functions. We abort an unfinished group
of records if we see a record which is a group in itself (like COMMIT).
No need for maria_panic() after a bug (which caused tables to not
be closed) was fixed; if there is yet another bug I prefer to see it.
When opening a table for Recovery, set data_file_length
and key_file_length to their real physical value (these are the
easiest state members to restore :). Warn us if the last page
was truncated (but Recovery handles it).
MARIA_SHARE::state::state::records is now partly recovered (not
idempotent, but works if recreating tables from scracth).
When applying a REDO to a page, stamp it with the UNDO's LSN
(current_group_end_lsn), not with the REDO's LSN; it makes
the table more identical to the original table (easier to compare
the two tables in the end).
Big thing missing: some types of REDOs are not handled,
and the UNDO phase does not exist (missing functions to execute UNDOs
to actually rollback). So for now tests are only inserting/deleting
a few 100 rows, closing the table and seeing if the log is applied ok;
it works. UPDATE not handled.
storage/maria/ma_recovery.h:
new functions: ma_recover() for recovery from inside ha_maria;
_ma_apply_log() for maria_read_log (ma_recover() calls _ma_apply_log()).
Btw, we need to not use the word "recover" for REPAIR/maria_chk anymore.
storage/maria/ma_rename.c:
don't write log records during recovery
storage/maria/ma_test2.c:
- fail if maria_info() or other subtests find some wrong information
- new option -g to skip updates.
- init the translog before creating the table, so that log applying
can work.
- in "#if 0" you'll see some fixed bugs (will be removed).
storage/maria/ma_test_all.sh:
cleanup files. Test log applying.
storage/maria/maria_read_log.c:
most of the logic moves to ma_recovery.c to be shared between
maria_read_log and recovery-from-inside-mysqld.
See ma_recovery.c for additional changes made to the moved code.
storage/maria/ma_test_recovery:
unit test for Recovery. Tests insert and delete,
REDO_UPDATE not yet coded.
Script is called from ma_test_all. Can run standalone.
2007-07-26 11:56:21 +02:00
|
|
|
MYF(MY_NABP | MY_WME)))
|
|
|
|
return 1;
|
|
|
|
share->state.state.data_file_length= block_size;
|
|
|
|
_ma_bitmap_delete_all(share);
|
|
|
|
return 0;
|
|
|
|
}
|
WL#3072 - Maria Recovery
* to honour WAL we now force the whole log when flushing a bitmap page.
* ability to intentionally crash in various places for recovery testing
* bugfix (dirty pages list found in checkpoint record was ignored)
* smaller checkpoint record
* misc small cleanups and comments
mysql-test/include/maria_empty_logs.inc:
maria-purge.test creates ~11 logs, remove them all
mysql-test/r/maria-recovery-bitmap.result:
result is good; without the _ma_bitmap_get_log_address() call,
we got
check error Bitmap at 0 has pages reserved outside of data file length
mysql-test/r/maria-recovery.result:
result update
mysql-test/t/maria-recovery-bitmap.test:
enable test of "bitmap-flush should flush whole log otherwise
corrupted data file (bitmap ahead of data pages)".
mysql-test/t/maria-recovery.test:
test of checkpoint
sql/sql_table.cc:
comment
storage/maria/ha_maria.cc:
_ma_reenable_logging_for_table() now includes file->trn=0.
At the end of repair() we don't need to re-enable logging, it is
done already by caller (like copy_data_between_tables()); it sounds
strange that this function could decide to re-enable, it should be
up to caller who knows what other operations it plans. Removing this
line led to assertion failure in maria_lock_database(F_UNLCK), fixed
by removing the assertion: maria_lock_database()
is here called in a context where F_UNLCK does not make the
table visible to others so assertion is excessive, and external_lock()
is already designed to honour the asserted condition.
Ability to crash at the end of bulk insert when indices
have been enabled.
storage/maria/ma_bitmap.c:
Better use pagecache_file_init() than set pagecache callbacks directly;
and a new function to set those callbacks for bitmap so that we can
reuse it.
_ma_bitmap_get_log_address() is a pagecache get_log_address callback
which causes the whole log to be flushed when a bitmap page
is flushed by the page cache. This was required by WAL.
storage/maria/ma_blockrec.c:
get_log_address pagecache callback for data (non bitmap) pages:
just reads the LSN from the page's content, like was hard-coded
before in ma_pagecache.c.
storage/maria/ma_blockrec.h:
functions which need to be exported
storage/maria/ma_check.c:
create_new_data_handle() can be static.
Ability to crash after rebuilding the index in OPTIMIZE,
in REPAIR. my_lock() implemented already.
storage/maria/ma_checkpoint.c:
As MARIA_SHARE* is now accessible to pagecache_collect_changed_blocks_LSN(),
we don't need to store kfile/dfile descriptors in checkpoint record,
2-byte-id of the table plus one byte to say if this is data or index
file is enough. So we go from 4+4 bytes per table down to 2+1.
storage/maria/ma_commit.c:
removing duplicate functions (see _ma_tmp_disable_logging_for_table())
storage/maria/ma_extra.c:
Monty fixed
storage/maria/ma_key_recover.c:
comment
storage/maria/ma_locking.c:
Sometimes other code does funny things with maria_lock_database(),
like ha_maria::repair() calling it at start and end without going
through ha_maria::external_lock(). So it happens that maria_lock_database()
is called with now_transactional!=born_transactional.
storage/maria/ma_loghandler.c:
update to new prototype
storage/maria/ma_open.c:
set_data|index_pagecache_callbacks() need to be exported as
they are now called when disabling/enabling transactionality.
storage/maria/ma_pagecache.c:
Removing PAGE_LSN_OFFSET, as much of the code relies on it being
0 anyway (let's not give impression we can just change this constant).
When flushing a page to disk, call the get_log_address callback to
know up to which LSN the log should be flushed.
As we now can access MARIA_SHARE* we can know share->id and store
it into the checkpoint record; we thus go from 4 bytes per dirty page
to 2+1.
storage/maria/ma_pagecache.h:
get_log_address callback
storage/maria/ma_panic.c:
No reason to reset pagecache callbacks in HA_PANIC_READ:
all we do is reopen files if they were closed; callbacks should
be in place already as 'info' exists; we just want to modify
the file descriptors, not the full PAGECACHE_FILE structure.
If we open data file and it was closed, share->bitmap.file needs
to be set.
Note that the modified code is disabled anyway.
storage/maria/ma_recovery.c:
Checkpoint record does not contain kfile/dfile descriptors anymore
so code can be simplified. Hash key in all_dirty_pages is
not made from file_descriptor & pageno anymore, but
index_or_data & table-short-id & pageno.
If a table's create_rename_lsn is higher than record's LSN,
we skip the table and don't fail if it's corrupted (because the LSNs
say that we don't have to look at this table).
If a table is skipped (for example due to create_rename_lsn),
its UNDOs still cause undo_lsn to advance; this is so that if later
we notice the transaction has to rollback we fail (as table should
not be skipped in this case).
Fixing a bug: the dirty_pages list was never used, because
the LSN below which it was used was the minimum rec_lsn of dirty pages!
It is now the min(checkpoint_start_log_horizon, min(trn's rec_lsn)).
When we disable/reenable transactionality, we modify pagecache
callbacks (needed for example for get_log_address: changing
share->page_type is not enough anymore).
storage/maria/ma_write.c:
'records' and 'checksum' are protected: they are updated under
log's mutex in write-hooks when UNDO is written.
storage/maria/maria_chk.c:
remove use of duplicate functions.
storage/maria/maria_def.h:
set_data|index_pagecache_callbacks() need to be exported;
_ma_reenable_logging_for_table() changes to a real function.
storage/maria/unittest/ma_pagecache_consist.c:
new prototype
storage/maria/unittest/ma_pagecache_single.c:
new prototype
storage/maria/unittest/ma_test_loghandler_pagecache-t.c:
new prototype
2007-12-30 21:32:07 +01:00
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
@brief Pagecache callback to get the TRANSLOG_ADDRESS to flush up to, when a
|
|
|
|
bitmap page needs to be flushed.
|
|
|
|
|
|
|
|
@param page Page's content
|
|
|
|
@param page_no Page's number (<offset>/<page length>)
|
|
|
|
@param data_ptr Callback data pointer (pointer to MARIA_SHARE)
|
|
|
|
|
|
|
|
@retval TRANSLOG_ADDRESS to flush up to.
|
|
|
|
*/
|
|
|
|
|
2008-01-02 17:27:24 +01:00
|
|
|
static my_bool
|
|
|
|
flush_log_for_bitmap(uchar *page __attribute__((unused)),
|
|
|
|
pgcache_page_no_t page_no __attribute__((unused)),
|
2008-01-11 00:47:52 +01:00
|
|
|
uchar *data_ptr __attribute__((unused)))
|
WL#3072 - Maria Recovery
* to honour WAL we now force the whole log when flushing a bitmap page.
* ability to intentionally crash in various places for recovery testing
* bugfix (dirty pages list found in checkpoint record was ignored)
* smaller checkpoint record
* misc small cleanups and comments
mysql-test/include/maria_empty_logs.inc:
maria-purge.test creates ~11 logs, remove them all
mysql-test/r/maria-recovery-bitmap.result:
result is good; without the _ma_bitmap_get_log_address() call,
we got
check error Bitmap at 0 has pages reserved outside of data file length
mysql-test/r/maria-recovery.result:
result update
mysql-test/t/maria-recovery-bitmap.test:
enable test of "bitmap-flush should flush whole log otherwise
corrupted data file (bitmap ahead of data pages)".
mysql-test/t/maria-recovery.test:
test of checkpoint
sql/sql_table.cc:
comment
storage/maria/ha_maria.cc:
_ma_reenable_logging_for_table() now includes file->trn=0.
At the end of repair() we don't need to re-enable logging, it is
done already by caller (like copy_data_between_tables()); it sounds
strange that this function could decide to re-enable, it should be
up to caller who knows what other operations it plans. Removing this
line led to assertion failure in maria_lock_database(F_UNLCK), fixed
by removing the assertion: maria_lock_database()
is here called in a context where F_UNLCK does not make the
table visible to others so assertion is excessive, and external_lock()
is already designed to honour the asserted condition.
Ability to crash at the end of bulk insert when indices
have been enabled.
storage/maria/ma_bitmap.c:
Better use pagecache_file_init() than set pagecache callbacks directly;
and a new function to set those callbacks for bitmap so that we can
reuse it.
_ma_bitmap_get_log_address() is a pagecache get_log_address callback
which causes the whole log to be flushed when a bitmap page
is flushed by the page cache. This was required by WAL.
storage/maria/ma_blockrec.c:
get_log_address pagecache callback for data (non bitmap) pages:
just reads the LSN from the page's content, like was hard-coded
before in ma_pagecache.c.
storage/maria/ma_blockrec.h:
functions which need to be exported
storage/maria/ma_check.c:
create_new_data_handle() can be static.
Ability to crash after rebuilding the index in OPTIMIZE,
in REPAIR. my_lock() implemented already.
storage/maria/ma_checkpoint.c:
As MARIA_SHARE* is now accessible to pagecache_collect_changed_blocks_LSN(),
we don't need to store kfile/dfile descriptors in checkpoint record,
2-byte-id of the table plus one byte to say if this is data or index
file is enough. So we go from 4+4 bytes per table down to 2+1.
storage/maria/ma_commit.c:
removing duplicate functions (see _ma_tmp_disable_logging_for_table())
storage/maria/ma_extra.c:
Monty fixed
storage/maria/ma_key_recover.c:
comment
storage/maria/ma_locking.c:
Sometimes other code does funny things with maria_lock_database(),
like ha_maria::repair() calling it at start and end without going
through ha_maria::external_lock(). So it happens that maria_lock_database()
is called with now_transactional!=born_transactional.
storage/maria/ma_loghandler.c:
update to new prototype
storage/maria/ma_open.c:
set_data|index_pagecache_callbacks() need to be exported as
they are now called when disabling/enabling transactionality.
storage/maria/ma_pagecache.c:
Removing PAGE_LSN_OFFSET, as much of the code relies on it being
0 anyway (let's not give impression we can just change this constant).
When flushing a page to disk, call the get_log_address callback to
know up to which LSN the log should be flushed.
As we now can access MARIA_SHARE* we can know share->id and store
it into the checkpoint record; we thus go from 4 bytes per dirty page
to 2+1.
storage/maria/ma_pagecache.h:
get_log_address callback
storage/maria/ma_panic.c:
No reason to reset pagecache callbacks in HA_PANIC_READ:
all we do is reopen files if they were closed; callbacks should
be in place already as 'info' exists; we just want to modify
the file descriptors, not the full PAGECACHE_FILE structure.
If we open data file and it was closed, share->bitmap.file needs
to be set.
Note that the modified code is disabled anyway.
storage/maria/ma_recovery.c:
Checkpoint record does not contain kfile/dfile descriptors anymore
so code can be simplified. Hash key in all_dirty_pages is
not made from file_descriptor & pageno anymore, but
index_or_data & table-short-id & pageno.
If a table's create_rename_lsn is higher than record's LSN,
we skip the table and don't fail if it's corrupted (because the LSNs
say that we don't have to look at this table).
If a table is skipped (for example due to create_rename_lsn),
its UNDOs still cause undo_lsn to advance; this is so that if later
we notice the transaction has to rollback we fail (as table should
not be skipped in this case).
Fixing a bug: the dirty_pages list was never used, because
the LSN below which it was used was the minimum rec_lsn of dirty pages!
It is now the min(checkpoint_start_log_horizon, min(trn's rec_lsn)).
When we disable/reenable transactionality, we modify pagecache
callbacks (needed for example for get_log_address: changing
share->page_type is not enough anymore).
storage/maria/ma_write.c:
'records' and 'checksum' are protected: they are updated under
log's mutex in write-hooks when UNDO is written.
storage/maria/maria_chk.c:
remove use of duplicate functions.
storage/maria/maria_def.h:
set_data|index_pagecache_callbacks() need to be exported;
_ma_reenable_logging_for_table() changes to a real function.
storage/maria/unittest/ma_pagecache_consist.c:
new prototype
storage/maria/unittest/ma_pagecache_single.c:
new prototype
storage/maria/unittest/ma_test_loghandler_pagecache-t.c:
new prototype
2007-12-30 21:32:07 +01:00
|
|
|
{
|
|
|
|
#ifndef DBUG_OFF
|
|
|
|
const MARIA_SHARE *share= (MARIA_SHARE*)data_ptr;
|
|
|
|
#endif
|
2008-01-02 17:27:24 +01:00
|
|
|
DBUG_ENTER("flush_log_for_bitmap");
|
2008-01-07 17:54:41 +01:00
|
|
|
DBUG_ASSERT(share->now_transactional);
|
WL#3072 - Maria Recovery
* to honour WAL we now force the whole log when flushing a bitmap page.
* ability to intentionally crash in various places for recovery testing
* bugfix (dirty pages list found in checkpoint record was ignored)
* smaller checkpoint record
* misc small cleanups and comments
mysql-test/include/maria_empty_logs.inc:
maria-purge.test creates ~11 logs, remove them all
mysql-test/r/maria-recovery-bitmap.result:
result is good; without the _ma_bitmap_get_log_address() call,
we got
check error Bitmap at 0 has pages reserved outside of data file length
mysql-test/r/maria-recovery.result:
result update
mysql-test/t/maria-recovery-bitmap.test:
enable test of "bitmap-flush should flush whole log otherwise
corrupted data file (bitmap ahead of data pages)".
mysql-test/t/maria-recovery.test:
test of checkpoint
sql/sql_table.cc:
comment
storage/maria/ha_maria.cc:
_ma_reenable_logging_for_table() now includes file->trn=0.
At the end of repair() we don't need to re-enable logging, it is
done already by caller (like copy_data_between_tables()); it sounds
strange that this function could decide to re-enable, it should be
up to caller who knows what other operations it plans. Removing this
line led to assertion failure in maria_lock_database(F_UNLCK), fixed
by removing the assertion: maria_lock_database()
is here called in a context where F_UNLCK does not make the
table visible to others so assertion is excessive, and external_lock()
is already designed to honour the asserted condition.
Ability to crash at the end of bulk insert when indices
have been enabled.
storage/maria/ma_bitmap.c:
Better use pagecache_file_init() than set pagecache callbacks directly;
and a new function to set those callbacks for bitmap so that we can
reuse it.
_ma_bitmap_get_log_address() is a pagecache get_log_address callback
which causes the whole log to be flushed when a bitmap page
is flushed by the page cache. This was required by WAL.
storage/maria/ma_blockrec.c:
get_log_address pagecache callback for data (non bitmap) pages:
just reads the LSN from the page's content, like was hard-coded
before in ma_pagecache.c.
storage/maria/ma_blockrec.h:
functions which need to be exported
storage/maria/ma_check.c:
create_new_data_handle() can be static.
Ability to crash after rebuilding the index in OPTIMIZE,
in REPAIR. my_lock() implemented already.
storage/maria/ma_checkpoint.c:
As MARIA_SHARE* is now accessible to pagecache_collect_changed_blocks_LSN(),
we don't need to store kfile/dfile descriptors in checkpoint record,
2-byte-id of the table plus one byte to say if this is data or index
file is enough. So we go from 4+4 bytes per table down to 2+1.
storage/maria/ma_commit.c:
removing duplicate functions (see _ma_tmp_disable_logging_for_table())
storage/maria/ma_extra.c:
Monty fixed
storage/maria/ma_key_recover.c:
comment
storage/maria/ma_locking.c:
Sometimes other code does funny things with maria_lock_database(),
like ha_maria::repair() calling it at start and end without going
through ha_maria::external_lock(). So it happens that maria_lock_database()
is called with now_transactional!=born_transactional.
storage/maria/ma_loghandler.c:
update to new prototype
storage/maria/ma_open.c:
set_data|index_pagecache_callbacks() need to be exported as
they are now called when disabling/enabling transactionality.
storage/maria/ma_pagecache.c:
Removing PAGE_LSN_OFFSET, as much of the code relies on it being
0 anyway (let's not give impression we can just change this constant).
When flushing a page to disk, call the get_log_address callback to
know up to which LSN the log should be flushed.
As we now can access MARIA_SHARE* we can know share->id and store
it into the checkpoint record; we thus go from 4 bytes per dirty page
to 2+1.
storage/maria/ma_pagecache.h:
get_log_address callback
storage/maria/ma_panic.c:
No reason to reset pagecache callbacks in HA_PANIC_READ:
all we do is reopen files if they were closed; callbacks should
be in place already as 'info' exists; we just want to modify
the file descriptors, not the full PAGECACHE_FILE structure.
If we open data file and it was closed, share->bitmap.file needs
to be set.
Note that the modified code is disabled anyway.
storage/maria/ma_recovery.c:
Checkpoint record does not contain kfile/dfile descriptors anymore
so code can be simplified. Hash key in all_dirty_pages is
not made from file_descriptor & pageno anymore, but
index_or_data & table-short-id & pageno.
If a table's create_rename_lsn is higher than record's LSN,
we skip the table and don't fail if it's corrupted (because the LSNs
say that we don't have to look at this table).
If a table is skipped (for example due to create_rename_lsn),
its UNDOs still cause undo_lsn to advance; this is so that if later
we notice the transaction has to rollback we fail (as table should
not be skipped in this case).
Fixing a bug: the dirty_pages list was never used, because
the LSN below which it was used was the minimum rec_lsn of dirty pages!
It is now the min(checkpoint_start_log_horizon, min(trn's rec_lsn)).
When we disable/reenable transactionality, we modify pagecache
callbacks (needed for example for get_log_address: changing
share->page_type is not enough anymore).
storage/maria/ma_write.c:
'records' and 'checksum' are protected: they are updated under
log's mutex in write-hooks when UNDO is written.
storage/maria/maria_chk.c:
remove use of duplicate functions.
storage/maria/maria_def.h:
set_data|index_pagecache_callbacks() need to be exported;
_ma_reenable_logging_for_table() changes to a real function.
storage/maria/unittest/ma_pagecache_consist.c:
new prototype
storage/maria/unittest/ma_pagecache_single.c:
new prototype
storage/maria/unittest/ma_test_loghandler_pagecache-t.c:
new prototype
2007-12-30 21:32:07 +01:00
|
|
|
/*
|
|
|
|
WAL imposes that UNDOs reach disk before bitmap is flushed. We don't know
|
|
|
|
the LSN of the last UNDO about this bitmap page, so we flush whole log.
|
|
|
|
*/
|
2008-01-02 17:27:24 +01:00
|
|
|
DBUG_RETURN(translog_flush(translog_get_horizon()));
|
WL#3072 - Maria Recovery
* to honour WAL we now force the whole log when flushing a bitmap page.
* ability to intentionally crash in various places for recovery testing
* bugfix (dirty pages list found in checkpoint record was ignored)
* smaller checkpoint record
* misc small cleanups and comments
mysql-test/include/maria_empty_logs.inc:
maria-purge.test creates ~11 logs, remove them all
mysql-test/r/maria-recovery-bitmap.result:
result is good; without the _ma_bitmap_get_log_address() call,
we got
check error Bitmap at 0 has pages reserved outside of data file length
mysql-test/r/maria-recovery.result:
result update
mysql-test/t/maria-recovery-bitmap.test:
enable test of "bitmap-flush should flush whole log otherwise
corrupted data file (bitmap ahead of data pages)".
mysql-test/t/maria-recovery.test:
test of checkpoint
sql/sql_table.cc:
comment
storage/maria/ha_maria.cc:
_ma_reenable_logging_for_table() now includes file->trn=0.
At the end of repair() we don't need to re-enable logging, it is
done already by caller (like copy_data_between_tables()); it sounds
strange that this function could decide to re-enable, it should be
up to caller who knows what other operations it plans. Removing this
line led to assertion failure in maria_lock_database(F_UNLCK), fixed
by removing the assertion: maria_lock_database()
is here called in a context where F_UNLCK does not make the
table visible to others so assertion is excessive, and external_lock()
is already designed to honour the asserted condition.
Ability to crash at the end of bulk insert when indices
have been enabled.
storage/maria/ma_bitmap.c:
Better use pagecache_file_init() than set pagecache callbacks directly;
and a new function to set those callbacks for bitmap so that we can
reuse it.
_ma_bitmap_get_log_address() is a pagecache get_log_address callback
which causes the whole log to be flushed when a bitmap page
is flushed by the page cache. This was required by WAL.
storage/maria/ma_blockrec.c:
get_log_address pagecache callback for data (non bitmap) pages:
just reads the LSN from the page's content, like was hard-coded
before in ma_pagecache.c.
storage/maria/ma_blockrec.h:
functions which need to be exported
storage/maria/ma_check.c:
create_new_data_handle() can be static.
Ability to crash after rebuilding the index in OPTIMIZE,
in REPAIR. my_lock() implemented already.
storage/maria/ma_checkpoint.c:
As MARIA_SHARE* is now accessible to pagecache_collect_changed_blocks_LSN(),
we don't need to store kfile/dfile descriptors in checkpoint record,
2-byte-id of the table plus one byte to say if this is data or index
file is enough. So we go from 4+4 bytes per table down to 2+1.
storage/maria/ma_commit.c:
removing duplicate functions (see _ma_tmp_disable_logging_for_table())
storage/maria/ma_extra.c:
Monty fixed
storage/maria/ma_key_recover.c:
comment
storage/maria/ma_locking.c:
Sometimes other code does funny things with maria_lock_database(),
like ha_maria::repair() calling it at start and end without going
through ha_maria::external_lock(). So it happens that maria_lock_database()
is called with now_transactional!=born_transactional.
storage/maria/ma_loghandler.c:
update to new prototype
storage/maria/ma_open.c:
set_data|index_pagecache_callbacks() need to be exported as
they are now called when disabling/enabling transactionality.
storage/maria/ma_pagecache.c:
Removing PAGE_LSN_OFFSET, as much of the code relies on it being
0 anyway (let's not give impression we can just change this constant).
When flushing a page to disk, call the get_log_address callback to
know up to which LSN the log should be flushed.
As we now can access MARIA_SHARE* we can know share->id and store
it into the checkpoint record; we thus go from 4 bytes per dirty page
to 2+1.
storage/maria/ma_pagecache.h:
get_log_address callback
storage/maria/ma_panic.c:
No reason to reset pagecache callbacks in HA_PANIC_READ:
all we do is reopen files if they were closed; callbacks should
be in place already as 'info' exists; we just want to modify
the file descriptors, not the full PAGECACHE_FILE structure.
If we open data file and it was closed, share->bitmap.file needs
to be set.
Note that the modified code is disabled anyway.
storage/maria/ma_recovery.c:
Checkpoint record does not contain kfile/dfile descriptors anymore
so code can be simplified. Hash key in all_dirty_pages is
not made from file_descriptor & pageno anymore, but
index_or_data & table-short-id & pageno.
If a table's create_rename_lsn is higher than record's LSN,
we skip the table and don't fail if it's corrupted (because the LSNs
say that we don't have to look at this table).
If a table is skipped (for example due to create_rename_lsn),
its UNDOs still cause undo_lsn to advance; this is so that if later
we notice the transaction has to rollback we fail (as table should
not be skipped in this case).
Fixing a bug: the dirty_pages list was never used, because
the LSN below which it was used was the minimum rec_lsn of dirty pages!
It is now the min(checkpoint_start_log_horizon, min(trn's rec_lsn)).
When we disable/reenable transactionality, we modify pagecache
callbacks (needed for example for get_log_address: changing
share->page_type is not enough anymore).
storage/maria/ma_write.c:
'records' and 'checksum' are protected: they are updated under
log's mutex in write-hooks when UNDO is written.
storage/maria/maria_chk.c:
remove use of duplicate functions.
storage/maria/maria_def.h:
set_data|index_pagecache_callbacks() need to be exported;
_ma_reenable_logging_for_table() changes to a real function.
storage/maria/unittest/ma_pagecache_consist.c:
new prototype
storage/maria/unittest/ma_pagecache_single.c:
new prototype
storage/maria/unittest/ma_test_loghandler_pagecache-t.c:
new prototype
2007-12-30 21:32:07 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2008-01-02 17:27:24 +01:00
|
|
|
/**
|
|
|
|
@brief Set callbacks for bitmap pages
|
|
|
|
|
|
|
|
@note
|
|
|
|
We don't use pagecache_file_init here, as we want to keep the
|
|
|
|
code readable
|
|
|
|
*/
|
|
|
|
|
WL#3072 - Maria Recovery
* to honour WAL we now force the whole log when flushing a bitmap page.
* ability to intentionally crash in various places for recovery testing
* bugfix (dirty pages list found in checkpoint record was ignored)
* smaller checkpoint record
* misc small cleanups and comments
mysql-test/include/maria_empty_logs.inc:
maria-purge.test creates ~11 logs, remove them all
mysql-test/r/maria-recovery-bitmap.result:
result is good; without the _ma_bitmap_get_log_address() call,
we got
check error Bitmap at 0 has pages reserved outside of data file length
mysql-test/r/maria-recovery.result:
result update
mysql-test/t/maria-recovery-bitmap.test:
enable test of "bitmap-flush should flush whole log otherwise
corrupted data file (bitmap ahead of data pages)".
mysql-test/t/maria-recovery.test:
test of checkpoint
sql/sql_table.cc:
comment
storage/maria/ha_maria.cc:
_ma_reenable_logging_for_table() now includes file->trn=0.
At the end of repair() we don't need to re-enable logging, it is
done already by caller (like copy_data_between_tables()); it sounds
strange that this function could decide to re-enable, it should be
up to caller who knows what other operations it plans. Removing this
line led to assertion failure in maria_lock_database(F_UNLCK), fixed
by removing the assertion: maria_lock_database()
is here called in a context where F_UNLCK does not make the
table visible to others so assertion is excessive, and external_lock()
is already designed to honour the asserted condition.
Ability to crash at the end of bulk insert when indices
have been enabled.
storage/maria/ma_bitmap.c:
Better use pagecache_file_init() than set pagecache callbacks directly;
and a new function to set those callbacks for bitmap so that we can
reuse it.
_ma_bitmap_get_log_address() is a pagecache get_log_address callback
which causes the whole log to be flushed when a bitmap page
is flushed by the page cache. This was required by WAL.
storage/maria/ma_blockrec.c:
get_log_address pagecache callback for data (non bitmap) pages:
just reads the LSN from the page's content, like was hard-coded
before in ma_pagecache.c.
storage/maria/ma_blockrec.h:
functions which need to be exported
storage/maria/ma_check.c:
create_new_data_handle() can be static.
Ability to crash after rebuilding the index in OPTIMIZE,
in REPAIR. my_lock() implemented already.
storage/maria/ma_checkpoint.c:
As MARIA_SHARE* is now accessible to pagecache_collect_changed_blocks_LSN(),
we don't need to store kfile/dfile descriptors in checkpoint record,
2-byte-id of the table plus one byte to say if this is data or index
file is enough. So we go from 4+4 bytes per table down to 2+1.
storage/maria/ma_commit.c:
removing duplicate functions (see _ma_tmp_disable_logging_for_table())
storage/maria/ma_extra.c:
Monty fixed
storage/maria/ma_key_recover.c:
comment
storage/maria/ma_locking.c:
Sometimes other code does funny things with maria_lock_database(),
like ha_maria::repair() calling it at start and end without going
through ha_maria::external_lock(). So it happens that maria_lock_database()
is called with now_transactional!=born_transactional.
storage/maria/ma_loghandler.c:
update to new prototype
storage/maria/ma_open.c:
set_data|index_pagecache_callbacks() need to be exported as
they are now called when disabling/enabling transactionality.
storage/maria/ma_pagecache.c:
Removing PAGE_LSN_OFFSET, as much of the code relies on it being
0 anyway (let's not give impression we can just change this constant).
When flushing a page to disk, call the get_log_address callback to
know up to which LSN the log should be flushed.
As we now can access MARIA_SHARE* we can know share->id and store
it into the checkpoint record; we thus go from 4 bytes per dirty page
to 2+1.
storage/maria/ma_pagecache.h:
get_log_address callback
storage/maria/ma_panic.c:
No reason to reset pagecache callbacks in HA_PANIC_READ:
all we do is reopen files if they were closed; callbacks should
be in place already as 'info' exists; we just want to modify
the file descriptors, not the full PAGECACHE_FILE structure.
If we open data file and it was closed, share->bitmap.file needs
to be set.
Note that the modified code is disabled anyway.
storage/maria/ma_recovery.c:
Checkpoint record does not contain kfile/dfile descriptors anymore
so code can be simplified. Hash key in all_dirty_pages is
not made from file_descriptor & pageno anymore, but
index_or_data & table-short-id & pageno.
If a table's create_rename_lsn is higher than record's LSN,
we skip the table and don't fail if it's corrupted (because the LSNs
say that we don't have to look at this table).
If a table is skipped (for example due to create_rename_lsn),
its UNDOs still cause undo_lsn to advance; this is so that if later
we notice the transaction has to rollback we fail (as table should
not be skipped in this case).
Fixing a bug: the dirty_pages list was never used, because
the LSN below which it was used was the minimum rec_lsn of dirty pages!
It is now the min(checkpoint_start_log_horizon, min(trn's rec_lsn)).
When we disable/reenable transactionality, we modify pagecache
callbacks (needed for example for get_log_address: changing
share->page_type is not enough anymore).
storage/maria/ma_write.c:
'records' and 'checksum' are protected: they are updated under
log's mutex in write-hooks when UNDO is written.
storage/maria/maria_chk.c:
remove use of duplicate functions.
storage/maria/maria_def.h:
set_data|index_pagecache_callbacks() need to be exported;
_ma_reenable_logging_for_table() changes to a real function.
storage/maria/unittest/ma_pagecache_consist.c:
new prototype
storage/maria/unittest/ma_pagecache_single.c:
new prototype
storage/maria/unittest/ma_test_loghandler_pagecache-t.c:
new prototype
2007-12-30 21:32:07 +01:00
|
|
|
void _ma_bitmap_set_pagecache_callbacks(PAGECACHE_FILE *file,
|
|
|
|
MARIA_SHARE *share)
|
|
|
|
{
|
2008-01-02 17:27:24 +01:00
|
|
|
file->callback_data= (uchar*) share;
|
|
|
|
file->flush_log_callback= maria_flush_log_for_page_none;
|
|
|
|
file->write_fail= maria_page_write_failure;
|
|
|
|
|
WL#3072 - Maria Recovery
* to honour WAL we now force the whole log when flushing a bitmap page.
* ability to intentionally crash in various places for recovery testing
* bugfix (dirty pages list found in checkpoint record was ignored)
* smaller checkpoint record
* misc small cleanups and comments
mysql-test/include/maria_empty_logs.inc:
maria-purge.test creates ~11 logs, remove them all
mysql-test/r/maria-recovery-bitmap.result:
result is good; without the _ma_bitmap_get_log_address() call,
we got
check error Bitmap at 0 has pages reserved outside of data file length
mysql-test/r/maria-recovery.result:
result update
mysql-test/t/maria-recovery-bitmap.test:
enable test of "bitmap-flush should flush whole log otherwise
corrupted data file (bitmap ahead of data pages)".
mysql-test/t/maria-recovery.test:
test of checkpoint
sql/sql_table.cc:
comment
storage/maria/ha_maria.cc:
_ma_reenable_logging_for_table() now includes file->trn=0.
At the end of repair() we don't need to re-enable logging, it is
done already by caller (like copy_data_between_tables()); it sounds
strange that this function could decide to re-enable, it should be
up to caller who knows what other operations it plans. Removing this
line led to assertion failure in maria_lock_database(F_UNLCK), fixed
by removing the assertion: maria_lock_database()
is here called in a context where F_UNLCK does not make the
table visible to others so assertion is excessive, and external_lock()
is already designed to honour the asserted condition.
Ability to crash at the end of bulk insert when indices
have been enabled.
storage/maria/ma_bitmap.c:
Better use pagecache_file_init() than set pagecache callbacks directly;
and a new function to set those callbacks for bitmap so that we can
reuse it.
_ma_bitmap_get_log_address() is a pagecache get_log_address callback
which causes the whole log to be flushed when a bitmap page
is flushed by the page cache. This was required by WAL.
storage/maria/ma_blockrec.c:
get_log_address pagecache callback for data (non bitmap) pages:
just reads the LSN from the page's content, like was hard-coded
before in ma_pagecache.c.
storage/maria/ma_blockrec.h:
functions which need to be exported
storage/maria/ma_check.c:
create_new_data_handle() can be static.
Ability to crash after rebuilding the index in OPTIMIZE,
in REPAIR. my_lock() implemented already.
storage/maria/ma_checkpoint.c:
As MARIA_SHARE* is now accessible to pagecache_collect_changed_blocks_LSN(),
we don't need to store kfile/dfile descriptors in checkpoint record,
2-byte-id of the table plus one byte to say if this is data or index
file is enough. So we go from 4+4 bytes per table down to 2+1.
storage/maria/ma_commit.c:
removing duplicate functions (see _ma_tmp_disable_logging_for_table())
storage/maria/ma_extra.c:
Monty fixed
storage/maria/ma_key_recover.c:
comment
storage/maria/ma_locking.c:
Sometimes other code does funny things with maria_lock_database(),
like ha_maria::repair() calling it at start and end without going
through ha_maria::external_lock(). So it happens that maria_lock_database()
is called with now_transactional!=born_transactional.
storage/maria/ma_loghandler.c:
update to new prototype
storage/maria/ma_open.c:
set_data|index_pagecache_callbacks() need to be exported as
they are now called when disabling/enabling transactionality.
storage/maria/ma_pagecache.c:
Removing PAGE_LSN_OFFSET, as much of the code relies on it being
0 anyway (let's not give impression we can just change this constant).
When flushing a page to disk, call the get_log_address callback to
know up to which LSN the log should be flushed.
As we now can access MARIA_SHARE* we can know share->id and store
it into the checkpoint record; we thus go from 4 bytes per dirty page
to 2+1.
storage/maria/ma_pagecache.h:
get_log_address callback
storage/maria/ma_panic.c:
No reason to reset pagecache callbacks in HA_PANIC_READ:
all we do is reopen files if they were closed; callbacks should
be in place already as 'info' exists; we just want to modify
the file descriptors, not the full PAGECACHE_FILE structure.
If we open data file and it was closed, share->bitmap.file needs
to be set.
Note that the modified code is disabled anyway.
storage/maria/ma_recovery.c:
Checkpoint record does not contain kfile/dfile descriptors anymore
so code can be simplified. Hash key in all_dirty_pages is
not made from file_descriptor & pageno anymore, but
index_or_data & table-short-id & pageno.
If a table's create_rename_lsn is higher than record's LSN,
we skip the table and don't fail if it's corrupted (because the LSNs
say that we don't have to look at this table).
If a table is skipped (for example due to create_rename_lsn),
its UNDOs still cause undo_lsn to advance; this is so that if later
we notice the transaction has to rollback we fail (as table should
not be skipped in this case).
Fixing a bug: the dirty_pages list was never used, because
the LSN below which it was used was the minimum rec_lsn of dirty pages!
It is now the min(checkpoint_start_log_horizon, min(trn's rec_lsn)).
When we disable/reenable transactionality, we modify pagecache
callbacks (needed for example for get_log_address: changing
share->page_type is not enough anymore).
storage/maria/ma_write.c:
'records' and 'checksum' are protected: they are updated under
log's mutex in write-hooks when UNDO is written.
storage/maria/maria_chk.c:
remove use of duplicate functions.
storage/maria/maria_def.h:
set_data|index_pagecache_callbacks() need to be exported;
_ma_reenable_logging_for_table() changes to a real function.
storage/maria/unittest/ma_pagecache_consist.c:
new prototype
storage/maria/unittest/ma_pagecache_single.c:
new prototype
storage/maria/unittest/ma_test_loghandler_pagecache-t.c:
new prototype
2007-12-30 21:32:07 +01:00
|
|
|
if (share->temporary)
|
2008-01-02 17:27:24 +01:00
|
|
|
{
|
|
|
|
file->read_callback= &maria_page_crc_check_none;
|
|
|
|
file->write_callback= &maria_page_filler_set_none;
|
|
|
|
}
|
WL#3072 - Maria Recovery
* to honour WAL we now force the whole log when flushing a bitmap page.
* ability to intentionally crash in various places for recovery testing
* bugfix (dirty pages list found in checkpoint record was ignored)
* smaller checkpoint record
* misc small cleanups and comments
mysql-test/include/maria_empty_logs.inc:
maria-purge.test creates ~11 logs, remove them all
mysql-test/r/maria-recovery-bitmap.result:
result is good; without the _ma_bitmap_get_log_address() call,
we got
check error Bitmap at 0 has pages reserved outside of data file length
mysql-test/r/maria-recovery.result:
result update
mysql-test/t/maria-recovery-bitmap.test:
enable test of "bitmap-flush should flush whole log otherwise
corrupted data file (bitmap ahead of data pages)".
mysql-test/t/maria-recovery.test:
test of checkpoint
sql/sql_table.cc:
comment
storage/maria/ha_maria.cc:
_ma_reenable_logging_for_table() now includes file->trn=0.
At the end of repair() we don't need to re-enable logging, it is
done already by caller (like copy_data_between_tables()); it sounds
strange that this function could decide to re-enable, it should be
up to caller who knows what other operations it plans. Removing this
line led to assertion failure in maria_lock_database(F_UNLCK), fixed
by removing the assertion: maria_lock_database()
is here called in a context where F_UNLCK does not make the
table visible to others so assertion is excessive, and external_lock()
is already designed to honour the asserted condition.
Ability to crash at the end of bulk insert when indices
have been enabled.
storage/maria/ma_bitmap.c:
Better use pagecache_file_init() than set pagecache callbacks directly;
and a new function to set those callbacks for bitmap so that we can
reuse it.
_ma_bitmap_get_log_address() is a pagecache get_log_address callback
which causes the whole log to be flushed when a bitmap page
is flushed by the page cache. This was required by WAL.
storage/maria/ma_blockrec.c:
get_log_address pagecache callback for data (non bitmap) pages:
just reads the LSN from the page's content, like was hard-coded
before in ma_pagecache.c.
storage/maria/ma_blockrec.h:
functions which need to be exported
storage/maria/ma_check.c:
create_new_data_handle() can be static.
Ability to crash after rebuilding the index in OPTIMIZE,
in REPAIR. my_lock() implemented already.
storage/maria/ma_checkpoint.c:
As MARIA_SHARE* is now accessible to pagecache_collect_changed_blocks_LSN(),
we don't need to store kfile/dfile descriptors in checkpoint record,
2-byte-id of the table plus one byte to say if this is data or index
file is enough. So we go from 4+4 bytes per table down to 2+1.
storage/maria/ma_commit.c:
removing duplicate functions (see _ma_tmp_disable_logging_for_table())
storage/maria/ma_extra.c:
Monty fixed
storage/maria/ma_key_recover.c:
comment
storage/maria/ma_locking.c:
Sometimes other code does funny things with maria_lock_database(),
like ha_maria::repair() calling it at start and end without going
through ha_maria::external_lock(). So it happens that maria_lock_database()
is called with now_transactional!=born_transactional.
storage/maria/ma_loghandler.c:
update to new prototype
storage/maria/ma_open.c:
set_data|index_pagecache_callbacks() need to be exported as
they are now called when disabling/enabling transactionality.
storage/maria/ma_pagecache.c:
Removing PAGE_LSN_OFFSET, as much of the code relies on it being
0 anyway (let's not give impression we can just change this constant).
When flushing a page to disk, call the get_log_address callback to
know up to which LSN the log should be flushed.
As we now can access MARIA_SHARE* we can know share->id and store
it into the checkpoint record; we thus go from 4 bytes per dirty page
to 2+1.
storage/maria/ma_pagecache.h:
get_log_address callback
storage/maria/ma_panic.c:
No reason to reset pagecache callbacks in HA_PANIC_READ:
all we do is reopen files if they were closed; callbacks should
be in place already as 'info' exists; we just want to modify
the file descriptors, not the full PAGECACHE_FILE structure.
If we open data file and it was closed, share->bitmap.file needs
to be set.
Note that the modified code is disabled anyway.
storage/maria/ma_recovery.c:
Checkpoint record does not contain kfile/dfile descriptors anymore
so code can be simplified. Hash key in all_dirty_pages is
not made from file_descriptor & pageno anymore, but
index_or_data & table-short-id & pageno.
If a table's create_rename_lsn is higher than record's LSN,
we skip the table and don't fail if it's corrupted (because the LSNs
say that we don't have to look at this table).
If a table is skipped (for example due to create_rename_lsn),
its UNDOs still cause undo_lsn to advance; this is so that if later
we notice the transaction has to rollback we fail (as table should
not be skipped in this case).
Fixing a bug: the dirty_pages list was never used, because
the LSN below which it was used was the minimum rec_lsn of dirty pages!
It is now the min(checkpoint_start_log_horizon, min(trn's rec_lsn)).
When we disable/reenable transactionality, we modify pagecache
callbacks (needed for example for get_log_address: changing
share->page_type is not enough anymore).
storage/maria/ma_write.c:
'records' and 'checksum' are protected: they are updated under
log's mutex in write-hooks when UNDO is written.
storage/maria/maria_chk.c:
remove use of duplicate functions.
storage/maria/maria_def.h:
set_data|index_pagecache_callbacks() need to be exported;
_ma_reenable_logging_for_table() changes to a real function.
storage/maria/unittest/ma_pagecache_consist.c:
new prototype
storage/maria/unittest/ma_pagecache_single.c:
new prototype
storage/maria/unittest/ma_test_loghandler_pagecache-t.c:
new prototype
2007-12-30 21:32:07 +01:00
|
|
|
else
|
2008-01-02 17:27:24 +01:00
|
|
|
{
|
|
|
|
file->read_callback= &maria_page_crc_check_bitmap;
|
|
|
|
if (share->options & HA_OPTION_PAGE_CHECKSUM)
|
|
|
|
file->write_callback= &maria_page_crc_set_normal;
|
|
|
|
else
|
|
|
|
file->write_callback= &maria_page_filler_set_bitmap;
|
|
|
|
if (share->now_transactional)
|
|
|
|
file->flush_log_callback= flush_log_for_bitmap;
|
|
|
|
}
|
WL#3072 - Maria Recovery
* to honour WAL we now force the whole log when flushing a bitmap page.
* ability to intentionally crash in various places for recovery testing
* bugfix (dirty pages list found in checkpoint record was ignored)
* smaller checkpoint record
* misc small cleanups and comments
mysql-test/include/maria_empty_logs.inc:
maria-purge.test creates ~11 logs, remove them all
mysql-test/r/maria-recovery-bitmap.result:
result is good; without the _ma_bitmap_get_log_address() call,
we got
check error Bitmap at 0 has pages reserved outside of data file length
mysql-test/r/maria-recovery.result:
result update
mysql-test/t/maria-recovery-bitmap.test:
enable test of "bitmap-flush should flush whole log otherwise
corrupted data file (bitmap ahead of data pages)".
mysql-test/t/maria-recovery.test:
test of checkpoint
sql/sql_table.cc:
comment
storage/maria/ha_maria.cc:
_ma_reenable_logging_for_table() now includes file->trn=0.
At the end of repair() we don't need to re-enable logging, it is
done already by caller (like copy_data_between_tables()); it sounds
strange that this function could decide to re-enable, it should be
up to caller who knows what other operations it plans. Removing this
line led to assertion failure in maria_lock_database(F_UNLCK), fixed
by removing the assertion: maria_lock_database()
is here called in a context where F_UNLCK does not make the
table visible to others so assertion is excessive, and external_lock()
is already designed to honour the asserted condition.
Ability to crash at the end of bulk insert when indices
have been enabled.
storage/maria/ma_bitmap.c:
Better use pagecache_file_init() than set pagecache callbacks directly;
and a new function to set those callbacks for bitmap so that we can
reuse it.
_ma_bitmap_get_log_address() is a pagecache get_log_address callback
which causes the whole log to be flushed when a bitmap page
is flushed by the page cache. This was required by WAL.
storage/maria/ma_blockrec.c:
get_log_address pagecache callback for data (non bitmap) pages:
just reads the LSN from the page's content, like was hard-coded
before in ma_pagecache.c.
storage/maria/ma_blockrec.h:
functions which need to be exported
storage/maria/ma_check.c:
create_new_data_handle() can be static.
Ability to crash after rebuilding the index in OPTIMIZE,
in REPAIR. my_lock() implemented already.
storage/maria/ma_checkpoint.c:
As MARIA_SHARE* is now accessible to pagecache_collect_changed_blocks_LSN(),
we don't need to store kfile/dfile descriptors in checkpoint record,
2-byte-id of the table plus one byte to say if this is data or index
file is enough. So we go from 4+4 bytes per table down to 2+1.
storage/maria/ma_commit.c:
removing duplicate functions (see _ma_tmp_disable_logging_for_table())
storage/maria/ma_extra.c:
Monty fixed
storage/maria/ma_key_recover.c:
comment
storage/maria/ma_locking.c:
Sometimes other code does funny things with maria_lock_database(),
like ha_maria::repair() calling it at start and end without going
through ha_maria::external_lock(). So it happens that maria_lock_database()
is called with now_transactional!=born_transactional.
storage/maria/ma_loghandler.c:
update to new prototype
storage/maria/ma_open.c:
set_data|index_pagecache_callbacks() need to be exported as
they are now called when disabling/enabling transactionality.
storage/maria/ma_pagecache.c:
Removing PAGE_LSN_OFFSET, as much of the code relies on it being
0 anyway (let's not give impression we can just change this constant).
When flushing a page to disk, call the get_log_address callback to
know up to which LSN the log should be flushed.
As we now can access MARIA_SHARE* we can know share->id and store
it into the checkpoint record; we thus go from 4 bytes per dirty page
to 2+1.
storage/maria/ma_pagecache.h:
get_log_address callback
storage/maria/ma_panic.c:
No reason to reset pagecache callbacks in HA_PANIC_READ:
all we do is reopen files if they were closed; callbacks should
be in place already as 'info' exists; we just want to modify
the file descriptors, not the full PAGECACHE_FILE structure.
If we open data file and it was closed, share->bitmap.file needs
to be set.
Note that the modified code is disabled anyway.
storage/maria/ma_recovery.c:
Checkpoint record does not contain kfile/dfile descriptors anymore
so code can be simplified. Hash key in all_dirty_pages is
not made from file_descriptor & pageno anymore, but
index_or_data & table-short-id & pageno.
If a table's create_rename_lsn is higher than record's LSN,
we skip the table and don't fail if it's corrupted (because the LSNs
say that we don't have to look at this table).
If a table is skipped (for example due to create_rename_lsn),
its UNDOs still cause undo_lsn to advance; this is so that if later
we notice the transaction has to rollback we fail (as table should
not be skipped in this case).
Fixing a bug: the dirty_pages list was never used, because
the LSN below which it was used was the minimum rec_lsn of dirty pages!
It is now the min(checkpoint_start_log_horizon, min(trn's rec_lsn)).
When we disable/reenable transactionality, we modify pagecache
callbacks (needed for example for get_log_address: changing
share->page_type is not enough anymore).
storage/maria/ma_write.c:
'records' and 'checksum' are protected: they are updated under
log's mutex in write-hooks when UNDO is written.
storage/maria/maria_chk.c:
remove use of duplicate functions.
storage/maria/maria_def.h:
set_data|index_pagecache_callbacks() need to be exported;
_ma_reenable_logging_for_table() changes to a real function.
storage/maria/unittest/ma_pagecache_consist.c:
new prototype
storage/maria/unittest/ma_pagecache_single.c:
new prototype
storage/maria/unittest/ma_test_loghandler_pagecache-t.c:
new prototype
2007-12-30 21:32:07 +01:00
|
|
|
}
|
WL#3072 - Maria Recovery
Bulk insert: don't log REDO/UNDO for rows, log one UNDO which will
truncate files; this is an optimization and a bugfix (table was left
half-repaired by crash).
Repair: mark table crashed-on-repair at start, bump skip_redo_lsn at start,
this is easier for recovery (tells it to skip old REDOs or even UNDO
phase) and user (tells it to repair) in case of crash, sync files
in the end.
Recovery skips missing or corrupted table and moves to next record
(in REDO or UNDO phase) to be more robust; warns if happens in UNDO phase.
Bugfix for UNDO_KEY_DELETE_WITH_ROOT (tested in ma_test_recovery)
and maria_enable_indexes().
Create missing bitmaps when needed (there can be more than one to create,
in rare cases), log a record for this.
include/myisamchk.h:
new flag: bulk insert repair mustn't bump create_rename_lsn
mysql-test/lib/mtr_report.pl:
skip normal warning in maria-recovery.test
mysql-test/r/maria-recovery.result:
result: crash before bulk insert is committed, causes proper rollback,
and crash right after OPTIMIZE replaces index file with new index file
leads to table marked corrupted and recovery not failing.
mysql-test/t/maria-recovery.test:
- can't check the table or it would commit the transaction,
but check is made after recovery.
- test of crash before bulk-insert-with-repair is committed
(to see if it is rolled back), and of crash after OPTIMIZE has replaced
index file but not finished all operations (to see if recovery fails -
it used to assert when trying to execute an old REDO on the new
index).
storage/maria/CMakeLists.txt:
new file
storage/maria/Makefile.am:
new file
storage/maria/ha_maria.cc:
- If bulk insert on a transactional table using an index repair:
table is initially empty, so don't log REDO/UNDO for data rows
(optimization), just log an UNDO_BULK_INSERT_WITH_REPAIR
which will, if executed, empty the data and index file. Re-enable
logging in end_bulk_insert().
- write log record for repair operation only after it's fully done,
index sort including (maria_repair*() used to write the log record).
- Adding back file->trn=NULL which was removed by mistake earlier.
storage/maria/ha_maria.h:
new member (see ha_maria.cc)
storage/maria/ma_bitmap.c:
Functions to create missing bitmaps:
- one function which creates missing bitmaps in page cache, except
the missing one with max offset which it does not put into page cache
as it will be modified very soon.
- one function which the one above calls, and creates bitmaps in page
cache
- one function to execute REDO_BITMAP_NEW_PAGE which uses the second
one above.
storage/maria/ma_blockrec.c:
- when logging REDO_DELETE_ALL, not only 'records' and 'checksum'
has to be reset under log's mutex.
- execution of REDO_INSERT_ROW_BLOBS now checks the dirty pages' list
- execution of UNDO_BULK_INSERT_WITH_REPAIR
storage/maria/ma_blockrec.h:
new functions
storage/maria/ma_check.c:
- table-flush-before-repair is moved to a separate function reused
by maria_sort_index(); syncing is added
- maria_repair() is allowed to re-enable logging only if it is the one
which disabled it.
- "_ma_flush_table_files_after_repair" was a bad name, it's not after
repair now, and it should not sync as we do more changes to the files
shortly after (sync is postponed to when writing the log record)
- REDO_REPAIR record should be written only after all repair
operations (in particular after sorting index in ha_mara::repair())
- close to the end of repair by sort, flushing of pages must happen
also in the non-quick case, to prepare for the sync at end.
- in parallel repair, some page flushes are not needed as done
by initialize_variables_for_repair().
storage/maria/ma_create.c:
Update skip_redo_lsn, create_rename_lsn optionally.
storage/maria/ma_delete_all.c:
Need to sync files at end of maria_delete_all_rows(), if transactional.
storage/maria/ma_extra.c:
During repair, we sometimes call _ma_flush_table_files() (via
_ma_flush_table_files_before_swap()) while there is a WRITE_CACHE.
storage/maria/ma_key_recover.c:
- when we see CLR_END for UNDO_BULK_INSERT_WITH_REPAIR, re-enable
indices.
- fixing bug: _ma_apply_undo_key_delete() parsed UNDO_KEY_DELETE_WITH_ROOT
wrongly, leading to recovery failure
storage/maria/ma_key_recover.h:
new prototype
storage/maria/ma_locking.c:
DBUG_VOID_RETURN missing
storage/maria/ma_loghandler.c:
UNDO for bulk insert with repair, and REDO for creating bitmaps.
LOGREC_FIRST_FREE to not have to change the for() every time we
add a new record type.
storage/maria/ma_loghandler.h:
new UNDO and REDO
storage/maria/ma_open.c:
Move share.kfile.file=kfile up a bit, so that _ma_update_state_lsns()
can get its value, this fixes a bug where LSN_REPAIRED_BY_MARIA_CHK
was not corrected on disk by maria_open().
Store skip_redo_lsn in index' header.
maria_enable_indexes() had a bug for BLOCK_RECORD, where an empty
file has one page, not 0 bytes.
storage/maria/ma_recovery.c:
- Skip a corrupted, missing, or repaired-with-maria_chk, table in
recovery: don't fail, just go to next REDO or UNDO; but if an UNDO
is skipped in UNDO phase we issue warnings.
- Skip REDO|UNDO in REDO phase if <skip_redo_lsn.
- If UNDO phase fails, delete transactions to not make trnman
assert.
- Update skip_redo_lsn when playing REDO_CREATE_TABLE
- Don't record UNDOs for old transactions which we don't know (long_trid==0)
- Bugfix for UNDO_KEY_DELETE_WITH_ROOT (see ma_key_recover.c)
- Execution of UNDO_BULK_INSERT_WITH_REPAIR
- Don't try to find a page number in REDO_DELETE_ALL
- Pieces moved to ma_recovery_util.c
storage/maria/ma_rename.c:
name change
storage/maria/ma_static.c:
I modified layout of the index' header (inserted skip_redo_lsn in its middle)
storage/maria/ma_test2.c:
allow breaking the test towards the end, tests execution of
UNDO_KEY_DELETE_WITH_ROOT
storage/maria/ma_test_recovery.expected:
6 as testflag instead of 4
storage/maria/ma_test_recovery:
Increase the amount of rollback work to do when testing recovery
with ma_test2; this reproduces the UNDO_KEY_DELETE_WITH_ROOT bug.
storage/maria/maria_chk.c:
skip_redo_lsn should be updated too, for consistency.
Write a REDO_REPAIR after all operations (including sort-records)
have been done.
No reason to flush blocks after maria_chk_data_link() and
maria_sort_records(), there is maria_close() in the end.
write_log_record() is a function, to not clutter maria_chk().
storage/maria/maria_def.h:
New member skip_redo_lsn in the state, and comments
storage/maria/maria_pack.c:
skip_redo_lsn should be updated too, for consistency
storage/maria/ma_recovery_util.c:
_ma_redo_not_needed_for_page(), defined in ma_recovery.c, is needed
by ma_blockrec.c; this causes link issues, resolved by putting
_ma_redo_not_needed_for_page() into a new file (so that it is not
in the same file as repair-related objects of ma_recovery.c).
storage/maria/ma_recovery_util.h:
new file
2008-01-17 23:59:32 +01:00
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
Extends data file with zeroes and creates new bitmap pages into page cache.
|
|
|
|
|
2008-01-19 04:51:38 +01:00
|
|
|
Writes all bitmap pages in [from, to].
|
|
|
|
|
WL#3072 - Maria Recovery
Bulk insert: don't log REDO/UNDO for rows, log one UNDO which will
truncate files; this is an optimization and a bugfix (table was left
half-repaired by crash).
Repair: mark table crashed-on-repair at start, bump skip_redo_lsn at start,
this is easier for recovery (tells it to skip old REDOs or even UNDO
phase) and user (tells it to repair) in case of crash, sync files
in the end.
Recovery skips missing or corrupted table and moves to next record
(in REDO or UNDO phase) to be more robust; warns if happens in UNDO phase.
Bugfix for UNDO_KEY_DELETE_WITH_ROOT (tested in ma_test_recovery)
and maria_enable_indexes().
Create missing bitmaps when needed (there can be more than one to create,
in rare cases), log a record for this.
include/myisamchk.h:
new flag: bulk insert repair mustn't bump create_rename_lsn
mysql-test/lib/mtr_report.pl:
skip normal warning in maria-recovery.test
mysql-test/r/maria-recovery.result:
result: crash before bulk insert is committed, causes proper rollback,
and crash right after OPTIMIZE replaces index file with new index file
leads to table marked corrupted and recovery not failing.
mysql-test/t/maria-recovery.test:
- can't check the table or it would commit the transaction,
but check is made after recovery.
- test of crash before bulk-insert-with-repair is committed
(to see if it is rolled back), and of crash after OPTIMIZE has replaced
index file but not finished all operations (to see if recovery fails -
it used to assert when trying to execute an old REDO on the new
index).
storage/maria/CMakeLists.txt:
new file
storage/maria/Makefile.am:
new file
storage/maria/ha_maria.cc:
- If bulk insert on a transactional table using an index repair:
table is initially empty, so don't log REDO/UNDO for data rows
(optimization), just log an UNDO_BULK_INSERT_WITH_REPAIR
which will, if executed, empty the data and index file. Re-enable
logging in end_bulk_insert().
- write log record for repair operation only after it's fully done,
index sort including (maria_repair*() used to write the log record).
- Adding back file->trn=NULL which was removed by mistake earlier.
storage/maria/ha_maria.h:
new member (see ha_maria.cc)
storage/maria/ma_bitmap.c:
Functions to create missing bitmaps:
- one function which creates missing bitmaps in page cache, except
the missing one with max offset which it does not put into page cache
as it will be modified very soon.
- one function which the one above calls, and creates bitmaps in page
cache
- one function to execute REDO_BITMAP_NEW_PAGE which uses the second
one above.
storage/maria/ma_blockrec.c:
- when logging REDO_DELETE_ALL, not only 'records' and 'checksum'
has to be reset under log's mutex.
- execution of REDO_INSERT_ROW_BLOBS now checks the dirty pages' list
- execution of UNDO_BULK_INSERT_WITH_REPAIR
storage/maria/ma_blockrec.h:
new functions
storage/maria/ma_check.c:
- table-flush-before-repair is moved to a separate function reused
by maria_sort_index(); syncing is added
- maria_repair() is allowed to re-enable logging only if it is the one
which disabled it.
- "_ma_flush_table_files_after_repair" was a bad name, it's not after
repair now, and it should not sync as we do more changes to the files
shortly after (sync is postponed to when writing the log record)
- REDO_REPAIR record should be written only after all repair
operations (in particular after sorting index in ha_mara::repair())
- close to the end of repair by sort, flushing of pages must happen
also in the non-quick case, to prepare for the sync at end.
- in parallel repair, some page flushes are not needed as done
by initialize_variables_for_repair().
storage/maria/ma_create.c:
Update skip_redo_lsn, create_rename_lsn optionally.
storage/maria/ma_delete_all.c:
Need to sync files at end of maria_delete_all_rows(), if transactional.
storage/maria/ma_extra.c:
During repair, we sometimes call _ma_flush_table_files() (via
_ma_flush_table_files_before_swap()) while there is a WRITE_CACHE.
storage/maria/ma_key_recover.c:
- when we see CLR_END for UNDO_BULK_INSERT_WITH_REPAIR, re-enable
indices.
- fixing bug: _ma_apply_undo_key_delete() parsed UNDO_KEY_DELETE_WITH_ROOT
wrongly, leading to recovery failure
storage/maria/ma_key_recover.h:
new prototype
storage/maria/ma_locking.c:
DBUG_VOID_RETURN missing
storage/maria/ma_loghandler.c:
UNDO for bulk insert with repair, and REDO for creating bitmaps.
LOGREC_FIRST_FREE to not have to change the for() every time we
add a new record type.
storage/maria/ma_loghandler.h:
new UNDO and REDO
storage/maria/ma_open.c:
Move share.kfile.file=kfile up a bit, so that _ma_update_state_lsns()
can get its value, this fixes a bug where LSN_REPAIRED_BY_MARIA_CHK
was not corrected on disk by maria_open().
Store skip_redo_lsn in index' header.
maria_enable_indexes() had a bug for BLOCK_RECORD, where an empty
file has one page, not 0 bytes.
storage/maria/ma_recovery.c:
- Skip a corrupted, missing, or repaired-with-maria_chk, table in
recovery: don't fail, just go to next REDO or UNDO; but if an UNDO
is skipped in UNDO phase we issue warnings.
- Skip REDO|UNDO in REDO phase if <skip_redo_lsn.
- If UNDO phase fails, delete transactions to not make trnman
assert.
- Update skip_redo_lsn when playing REDO_CREATE_TABLE
- Don't record UNDOs for old transactions which we don't know (long_trid==0)
- Bugfix for UNDO_KEY_DELETE_WITH_ROOT (see ma_key_recover.c)
- Execution of UNDO_BULK_INSERT_WITH_REPAIR
- Don't try to find a page number in REDO_DELETE_ALL
- Pieces moved to ma_recovery_util.c
storage/maria/ma_rename.c:
name change
storage/maria/ma_static.c:
I modified layout of the index' header (inserted skip_redo_lsn in its middle)
storage/maria/ma_test2.c:
allow breaking the test towards the end, tests execution of
UNDO_KEY_DELETE_WITH_ROOT
storage/maria/ma_test_recovery.expected:
6 as testflag instead of 4
storage/maria/ma_test_recovery:
Increase the amount of rollback work to do when testing recovery
with ma_test2; this reproduces the UNDO_KEY_DELETE_WITH_ROOT bug.
storage/maria/maria_chk.c:
skip_redo_lsn should be updated too, for consistency.
Write a REDO_REPAIR after all operations (including sort-records)
have been done.
No reason to flush blocks after maria_chk_data_link() and
maria_sort_records(), there is maria_close() in the end.
write_log_record() is a function, to not clutter maria_chk().
storage/maria/maria_def.h:
New member skip_redo_lsn in the state, and comments
storage/maria/maria_pack.c:
skip_redo_lsn should be updated too, for consistency
storage/maria/ma_recovery_util.c:
_ma_redo_not_needed_for_page(), defined in ma_recovery.c, is needed
by ma_blockrec.c; this causes link issues, resolved by putting
_ma_redo_not_needed_for_page() into a new file (so that it is not
in the same file as repair-related objects of ma_recovery.c).
storage/maria/ma_recovery_util.h:
new file
2008-01-17 23:59:32 +01:00
|
|
|
Non-bitmap pages of zeroes are correct as they are marked empty in
|
|
|
|
bitmaps. Bitmap pages will not be zeroes: they will get their CRC fixed when
|
|
|
|
flushed. And if there is a crash before flush (so they are zeroes at
|
|
|
|
restart), a REDO will re-create them in page cache.
|
|
|
|
*/
|
|
|
|
|
|
|
|
static my_bool
|
|
|
|
_ma_bitmap_create_missing_into_pagecache(MARIA_SHARE *share,
|
|
|
|
MARIA_FILE_BITMAP *bitmap,
|
|
|
|
pgcache_page_no_t from,
|
|
|
|
pgcache_page_no_t to,
|
|
|
|
uchar *zeroes)
|
|
|
|
{
|
|
|
|
pgcache_page_no_t i;
|
|
|
|
/*
|
2008-01-19 04:51:38 +01:00
|
|
|
We do not use my_chsize() because there can be a race between when it
|
|
|
|
reads the physical size and when it writes (assume data_file_length is 10,
|
|
|
|
physical length is 8 and two data pages are in cache, and here we do a
|
|
|
|
my_chsize: my_chsize sees physical length is 8, then the two data pages go
|
|
|
|
to disk then my_chsize writes from page 8 and so overwrites the two data
|
|
|
|
pages, wrongly).
|
|
|
|
We instead rely on the filesystem filling gaps with zeroes.
|
WL#3072 - Maria Recovery
Bulk insert: don't log REDO/UNDO for rows, log one UNDO which will
truncate files; this is an optimization and a bugfix (table was left
half-repaired by crash).
Repair: mark table crashed-on-repair at start, bump skip_redo_lsn at start,
this is easier for recovery (tells it to skip old REDOs or even UNDO
phase) and user (tells it to repair) in case of crash, sync files
in the end.
Recovery skips missing or corrupted table and moves to next record
(in REDO or UNDO phase) to be more robust; warns if happens in UNDO phase.
Bugfix for UNDO_KEY_DELETE_WITH_ROOT (tested in ma_test_recovery)
and maria_enable_indexes().
Create missing bitmaps when needed (there can be more than one to create,
in rare cases), log a record for this.
include/myisamchk.h:
new flag: bulk insert repair mustn't bump create_rename_lsn
mysql-test/lib/mtr_report.pl:
skip normal warning in maria-recovery.test
mysql-test/r/maria-recovery.result:
result: crash before bulk insert is committed, causes proper rollback,
and crash right after OPTIMIZE replaces index file with new index file
leads to table marked corrupted and recovery not failing.
mysql-test/t/maria-recovery.test:
- can't check the table or it would commit the transaction,
but check is made after recovery.
- test of crash before bulk-insert-with-repair is committed
(to see if it is rolled back), and of crash after OPTIMIZE has replaced
index file but not finished all operations (to see if recovery fails -
it used to assert when trying to execute an old REDO on the new
index).
storage/maria/CMakeLists.txt:
new file
storage/maria/Makefile.am:
new file
storage/maria/ha_maria.cc:
- If bulk insert on a transactional table using an index repair:
table is initially empty, so don't log REDO/UNDO for data rows
(optimization), just log an UNDO_BULK_INSERT_WITH_REPAIR
which will, if executed, empty the data and index file. Re-enable
logging in end_bulk_insert().
- write log record for repair operation only after it's fully done,
index sort including (maria_repair*() used to write the log record).
- Adding back file->trn=NULL which was removed by mistake earlier.
storage/maria/ha_maria.h:
new member (see ha_maria.cc)
storage/maria/ma_bitmap.c:
Functions to create missing bitmaps:
- one function which creates missing bitmaps in page cache, except
the missing one with max offset which it does not put into page cache
as it will be modified very soon.
- one function which the one above calls, and creates bitmaps in page
cache
- one function to execute REDO_BITMAP_NEW_PAGE which uses the second
one above.
storage/maria/ma_blockrec.c:
- when logging REDO_DELETE_ALL, not only 'records' and 'checksum'
has to be reset under log's mutex.
- execution of REDO_INSERT_ROW_BLOBS now checks the dirty pages' list
- execution of UNDO_BULK_INSERT_WITH_REPAIR
storage/maria/ma_blockrec.h:
new functions
storage/maria/ma_check.c:
- table-flush-before-repair is moved to a separate function reused
by maria_sort_index(); syncing is added
- maria_repair() is allowed to re-enable logging only if it is the one
which disabled it.
- "_ma_flush_table_files_after_repair" was a bad name, it's not after
repair now, and it should not sync as we do more changes to the files
shortly after (sync is postponed to when writing the log record)
- REDO_REPAIR record should be written only after all repair
operations (in particular after sorting index in ha_mara::repair())
- close to the end of repair by sort, flushing of pages must happen
also in the non-quick case, to prepare for the sync at end.
- in parallel repair, some page flushes are not needed as done
by initialize_variables_for_repair().
storage/maria/ma_create.c:
Update skip_redo_lsn, create_rename_lsn optionally.
storage/maria/ma_delete_all.c:
Need to sync files at end of maria_delete_all_rows(), if transactional.
storage/maria/ma_extra.c:
During repair, we sometimes call _ma_flush_table_files() (via
_ma_flush_table_files_before_swap()) while there is a WRITE_CACHE.
storage/maria/ma_key_recover.c:
- when we see CLR_END for UNDO_BULK_INSERT_WITH_REPAIR, re-enable
indices.
- fixing bug: _ma_apply_undo_key_delete() parsed UNDO_KEY_DELETE_WITH_ROOT
wrongly, leading to recovery failure
storage/maria/ma_key_recover.h:
new prototype
storage/maria/ma_locking.c:
DBUG_VOID_RETURN missing
storage/maria/ma_loghandler.c:
UNDO for bulk insert with repair, and REDO for creating bitmaps.
LOGREC_FIRST_FREE to not have to change the for() every time we
add a new record type.
storage/maria/ma_loghandler.h:
new UNDO and REDO
storage/maria/ma_open.c:
Move share.kfile.file=kfile up a bit, so that _ma_update_state_lsns()
can get its value, this fixes a bug where LSN_REPAIRED_BY_MARIA_CHK
was not corrected on disk by maria_open().
Store skip_redo_lsn in index' header.
maria_enable_indexes() had a bug for BLOCK_RECORD, where an empty
file has one page, not 0 bytes.
storage/maria/ma_recovery.c:
- Skip a corrupted, missing, or repaired-with-maria_chk, table in
recovery: don't fail, just go to next REDO or UNDO; but if an UNDO
is skipped in UNDO phase we issue warnings.
- Skip REDO|UNDO in REDO phase if <skip_redo_lsn.
- If UNDO phase fails, delete transactions to not make trnman
assert.
- Update skip_redo_lsn when playing REDO_CREATE_TABLE
- Don't record UNDOs for old transactions which we don't know (long_trid==0)
- Bugfix for UNDO_KEY_DELETE_WITH_ROOT (see ma_key_recover.c)
- Execution of UNDO_BULK_INSERT_WITH_REPAIR
- Don't try to find a page number in REDO_DELETE_ALL
- Pieces moved to ma_recovery_util.c
storage/maria/ma_rename.c:
name change
storage/maria/ma_static.c:
I modified layout of the index' header (inserted skip_redo_lsn in its middle)
storage/maria/ma_test2.c:
allow breaking the test towards the end, tests execution of
UNDO_KEY_DELETE_WITH_ROOT
storage/maria/ma_test_recovery.expected:
6 as testflag instead of 4
storage/maria/ma_test_recovery:
Increase the amount of rollback work to do when testing recovery
with ma_test2; this reproduces the UNDO_KEY_DELETE_WITH_ROOT bug.
storage/maria/maria_chk.c:
skip_redo_lsn should be updated too, for consistency.
Write a REDO_REPAIR after all operations (including sort-records)
have been done.
No reason to flush blocks after maria_chk_data_link() and
maria_sort_records(), there is maria_close() in the end.
write_log_record() is a function, to not clutter maria_chk().
storage/maria/maria_def.h:
New member skip_redo_lsn in the state, and comments
storage/maria/maria_pack.c:
skip_redo_lsn should be updated too, for consistency
storage/maria/ma_recovery_util.c:
_ma_redo_not_needed_for_page(), defined in ma_recovery.c, is needed
by ma_blockrec.c; this causes link issues, resolved by putting
_ma_redo_not_needed_for_page() into a new file (so that it is not
in the same file as repair-related objects of ma_recovery.c).
storage/maria/ma_recovery_util.h:
new file
2008-01-17 23:59:32 +01:00
|
|
|
*/
|
|
|
|
for (i= from; i <= to; i+= bitmap->pages_covered)
|
|
|
|
{
|
2008-01-19 04:51:38 +01:00
|
|
|
/**
|
|
|
|
No need to keep them pinned, they are new so flushable.
|
|
|
|
@todo but we may want to keep them pinned, as an optimization: if they
|
|
|
|
are not pinned they may go to disk before the data pages go (so, the
|
|
|
|
physical pages would be in non-ascending "sparse" order on disk), or the
|
|
|
|
filesystem may fill gaps with zeroes physically which is a waste of
|
|
|
|
time.
|
|
|
|
*/
|
WL#3072 - Maria Recovery
Bulk insert: don't log REDO/UNDO for rows, log one UNDO which will
truncate files; this is an optimization and a bugfix (table was left
half-repaired by crash).
Repair: mark table crashed-on-repair at start, bump skip_redo_lsn at start,
this is easier for recovery (tells it to skip old REDOs or even UNDO
phase) and user (tells it to repair) in case of crash, sync files
in the end.
Recovery skips missing or corrupted table and moves to next record
(in REDO or UNDO phase) to be more robust; warns if happens in UNDO phase.
Bugfix for UNDO_KEY_DELETE_WITH_ROOT (tested in ma_test_recovery)
and maria_enable_indexes().
Create missing bitmaps when needed (there can be more than one to create,
in rare cases), log a record for this.
include/myisamchk.h:
new flag: bulk insert repair mustn't bump create_rename_lsn
mysql-test/lib/mtr_report.pl:
skip normal warning in maria-recovery.test
mysql-test/r/maria-recovery.result:
result: crash before bulk insert is committed, causes proper rollback,
and crash right after OPTIMIZE replaces index file with new index file
leads to table marked corrupted and recovery not failing.
mysql-test/t/maria-recovery.test:
- can't check the table or it would commit the transaction,
but check is made after recovery.
- test of crash before bulk-insert-with-repair is committed
(to see if it is rolled back), and of crash after OPTIMIZE has replaced
index file but not finished all operations (to see if recovery fails -
it used to assert when trying to execute an old REDO on the new
index).
storage/maria/CMakeLists.txt:
new file
storage/maria/Makefile.am:
new file
storage/maria/ha_maria.cc:
- If bulk insert on a transactional table using an index repair:
table is initially empty, so don't log REDO/UNDO for data rows
(optimization), just log an UNDO_BULK_INSERT_WITH_REPAIR
which will, if executed, empty the data and index file. Re-enable
logging in end_bulk_insert().
- write log record for repair operation only after it's fully done,
index sort including (maria_repair*() used to write the log record).
- Adding back file->trn=NULL which was removed by mistake earlier.
storage/maria/ha_maria.h:
new member (see ha_maria.cc)
storage/maria/ma_bitmap.c:
Functions to create missing bitmaps:
- one function which creates missing bitmaps in page cache, except
the missing one with max offset which it does not put into page cache
as it will be modified very soon.
- one function which the one above calls, and creates bitmaps in page
cache
- one function to execute REDO_BITMAP_NEW_PAGE which uses the second
one above.
storage/maria/ma_blockrec.c:
- when logging REDO_DELETE_ALL, not only 'records' and 'checksum'
has to be reset under log's mutex.
- execution of REDO_INSERT_ROW_BLOBS now checks the dirty pages' list
- execution of UNDO_BULK_INSERT_WITH_REPAIR
storage/maria/ma_blockrec.h:
new functions
storage/maria/ma_check.c:
- table-flush-before-repair is moved to a separate function reused
by maria_sort_index(); syncing is added
- maria_repair() is allowed to re-enable logging only if it is the one
which disabled it.
- "_ma_flush_table_files_after_repair" was a bad name, it's not after
repair now, and it should not sync as we do more changes to the files
shortly after (sync is postponed to when writing the log record)
- REDO_REPAIR record should be written only after all repair
operations (in particular after sorting index in ha_mara::repair())
- close to the end of repair by sort, flushing of pages must happen
also in the non-quick case, to prepare for the sync at end.
- in parallel repair, some page flushes are not needed as done
by initialize_variables_for_repair().
storage/maria/ma_create.c:
Update skip_redo_lsn, create_rename_lsn optionally.
storage/maria/ma_delete_all.c:
Need to sync files at end of maria_delete_all_rows(), if transactional.
storage/maria/ma_extra.c:
During repair, we sometimes call _ma_flush_table_files() (via
_ma_flush_table_files_before_swap()) while there is a WRITE_CACHE.
storage/maria/ma_key_recover.c:
- when we see CLR_END for UNDO_BULK_INSERT_WITH_REPAIR, re-enable
indices.
- fixing bug: _ma_apply_undo_key_delete() parsed UNDO_KEY_DELETE_WITH_ROOT
wrongly, leading to recovery failure
storage/maria/ma_key_recover.h:
new prototype
storage/maria/ma_locking.c:
DBUG_VOID_RETURN missing
storage/maria/ma_loghandler.c:
UNDO for bulk insert with repair, and REDO for creating bitmaps.
LOGREC_FIRST_FREE to not have to change the for() every time we
add a new record type.
storage/maria/ma_loghandler.h:
new UNDO and REDO
storage/maria/ma_open.c:
Move share.kfile.file=kfile up a bit, so that _ma_update_state_lsns()
can get its value, this fixes a bug where LSN_REPAIRED_BY_MARIA_CHK
was not corrected on disk by maria_open().
Store skip_redo_lsn in index' header.
maria_enable_indexes() had a bug for BLOCK_RECORD, where an empty
file has one page, not 0 bytes.
storage/maria/ma_recovery.c:
- Skip a corrupted, missing, or repaired-with-maria_chk, table in
recovery: don't fail, just go to next REDO or UNDO; but if an UNDO
is skipped in UNDO phase we issue warnings.
- Skip REDO|UNDO in REDO phase if <skip_redo_lsn.
- If UNDO phase fails, delete transactions to not make trnman
assert.
- Update skip_redo_lsn when playing REDO_CREATE_TABLE
- Don't record UNDOs for old transactions which we don't know (long_trid==0)
- Bugfix for UNDO_KEY_DELETE_WITH_ROOT (see ma_key_recover.c)
- Execution of UNDO_BULK_INSERT_WITH_REPAIR
- Don't try to find a page number in REDO_DELETE_ALL
- Pieces moved to ma_recovery_util.c
storage/maria/ma_rename.c:
name change
storage/maria/ma_static.c:
I modified layout of the index' header (inserted skip_redo_lsn in its middle)
storage/maria/ma_test2.c:
allow breaking the test towards the end, tests execution of
UNDO_KEY_DELETE_WITH_ROOT
storage/maria/ma_test_recovery.expected:
6 as testflag instead of 4
storage/maria/ma_test_recovery:
Increase the amount of rollback work to do when testing recovery
with ma_test2; this reproduces the UNDO_KEY_DELETE_WITH_ROOT bug.
storage/maria/maria_chk.c:
skip_redo_lsn should be updated too, for consistency.
Write a REDO_REPAIR after all operations (including sort-records)
have been done.
No reason to flush blocks after maria_chk_data_link() and
maria_sort_records(), there is maria_close() in the end.
write_log_record() is a function, to not clutter maria_chk().
storage/maria/maria_def.h:
New member skip_redo_lsn in the state, and comments
storage/maria/maria_pack.c:
skip_redo_lsn should be updated too, for consistency
storage/maria/ma_recovery_util.c:
_ma_redo_not_needed_for_page(), defined in ma_recovery.c, is needed
by ma_blockrec.c; this causes link issues, resolved by putting
_ma_redo_not_needed_for_page() into a new file (so that it is not
in the same file as repair-related objects of ma_recovery.c).
storage/maria/ma_recovery_util.h:
new file
2008-01-17 23:59:32 +01:00
|
|
|
if (pagecache_write(share->pagecache,
|
|
|
|
&bitmap->file, i, 0,
|
|
|
|
zeroes, PAGECACHE_PLAIN_PAGE,
|
|
|
|
PAGECACHE_LOCK_LEFT_UNLOCKED,
|
|
|
|
PAGECACHE_PIN_LEFT_UNPINNED,
|
|
|
|
PAGECACHE_WRITE_DELAY, 0, LSN_IMPOSSIBLE))
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
/*
|
|
|
|
Data pages after data_file_length are full of zeroes but that is allowed
|
|
|
|
as they are marked empty in the bitmap.
|
|
|
|
*/
|
|
|
|
return FALSE;
|
|
|
|
err:
|
|
|
|
return TRUE;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
Creates missing bitmaps when we extend the data file.
|
|
|
|
|
|
|
|
At run-time, when we need a new bitmap page we come here; and only one bitmap
|
|
|
|
page at a time is created.
|
|
|
|
|
|
|
|
In some recovery cases we insert at a large offset in the data file, way
|
|
|
|
beyond state.data_file_length, so can need to create more than one bitmap
|
|
|
|
page in one go. Known case is:
|
|
|
|
Start a transaction in Maria;
|
|
|
|
delete last row of very large table (with delete_row)
|
|
|
|
do a bulk insert
|
|
|
|
crash
|
- fix for segfault in rpl_trigger/rpl_found_rows with default engine=maria
(fix is keeping the real TRN through a disable_logging/reenable cycle)
- fix for pagecache assertion failure in ps/type_ranges with default
engine=maria (fix is in sql_insert.cc)
- when reenabling logging we must either flush all dirty pages,
or at least verify (in debug build) that there are none. For example
a bulk insert with single UNDO_BULK_INSERT must flush them, no matter
if it uses repair or not (bugfix)
- UNDO_BULK_INSERT_WITH_REPAIR is also used with repair, changes name
mysql-test/r/maria.result:
tests for bugs fixed
mysql-test/t/maria.test:
tests for bugs fixed
sql/sql_insert.cc:
Bugfix: even if select_create::prepare() failed to create the 'table' object
we still have to re-enable logging.
storage/maria/ha_maria.cc:
Bugfix: when a transactional table does a bulk insert without
repair, it still sometimes skips logging of REDOs thus needs a full
flush and sync at the end. Not if repair is done, as repair does
it internally already (see end of maria_repair*()).
storage/maria/ha_maria.h:
variable now can have 3 states not 2
storage/maria/ma_bitmap.c:
name change
storage/maria/ma_blockrec.c:
name change
storage/maria/ma_blockrec.h:
name change
storage/maria/ma_check.c:
* When maria_repair() re-enables logging it does not need to ask for
a flush&sync as it did it by itself already a few lines before.
* the log record of bulk insert can be used even without repair
* disable logging in maria_zerofill(): without that, it puts LSN pages
in the cache, so when it flushes them it flushes the log; the change
makes auto-ha_maria::zerofill-if-moved faster (no log flush).
storage/maria/ma_key_recover.c:
name change
storage/maria/ma_loghandler.c:
name change
storage/maria/ma_loghandler.h:
name change
storage/maria/ma_pagecache.c:
A function, to check in debug builds that no dirty pages exist for a file.
storage/maria/ma_pagecache.h:
new function (nothing in non-debug)
storage/maria/ma_recovery.c:
_ma_tmp_disable_logging() sets info->trn to dummy_transaction_object
when needed now. The changes done here about info->trn are to allow
a table to retain its original, real TRN through a disable/reenable
cycle (see replication scenario in _ma_reenable_logging_for_table()).
When we reenable, we offer the caller to flush and sync the table;
if the caller doesn't accept our offer, we verify that it's ok
(no REDOs => no dirty pages are allowed to exist).
storage/maria/maria_chk.c:
comment
storage/maria/maria_def.h:
new names
mysql-test/suite/rpl/r/rpl_stm_maria.result:
result (it used to crash)
mysql-test/suite/rpl/t/rpl_stm_maria.test:
Test of replication-specific Maria bug fixed
2008-01-20 05:25:26 +01:00
|
|
|
Then UNDO_BULK_INSERT will truncate table files, and
|
WL#3072 - Maria Recovery
Bulk insert: don't log REDO/UNDO for rows, log one UNDO which will
truncate files; this is an optimization and a bugfix (table was left
half-repaired by crash).
Repair: mark table crashed-on-repair at start, bump skip_redo_lsn at start,
this is easier for recovery (tells it to skip old REDOs or even UNDO
phase) and user (tells it to repair) in case of crash, sync files
in the end.
Recovery skips missing or corrupted table and moves to next record
(in REDO or UNDO phase) to be more robust; warns if happens in UNDO phase.
Bugfix for UNDO_KEY_DELETE_WITH_ROOT (tested in ma_test_recovery)
and maria_enable_indexes().
Create missing bitmaps when needed (there can be more than one to create,
in rare cases), log a record for this.
include/myisamchk.h:
new flag: bulk insert repair mustn't bump create_rename_lsn
mysql-test/lib/mtr_report.pl:
skip normal warning in maria-recovery.test
mysql-test/r/maria-recovery.result:
result: crash before bulk insert is committed, causes proper rollback,
and crash right after OPTIMIZE replaces index file with new index file
leads to table marked corrupted and recovery not failing.
mysql-test/t/maria-recovery.test:
- can't check the table or it would commit the transaction,
but check is made after recovery.
- test of crash before bulk-insert-with-repair is committed
(to see if it is rolled back), and of crash after OPTIMIZE has replaced
index file but not finished all operations (to see if recovery fails -
it used to assert when trying to execute an old REDO on the new
index).
storage/maria/CMakeLists.txt:
new file
storage/maria/Makefile.am:
new file
storage/maria/ha_maria.cc:
- If bulk insert on a transactional table using an index repair:
table is initially empty, so don't log REDO/UNDO for data rows
(optimization), just log an UNDO_BULK_INSERT_WITH_REPAIR
which will, if executed, empty the data and index file. Re-enable
logging in end_bulk_insert().
- write log record for repair operation only after it's fully done,
index sort including (maria_repair*() used to write the log record).
- Adding back file->trn=NULL which was removed by mistake earlier.
storage/maria/ha_maria.h:
new member (see ha_maria.cc)
storage/maria/ma_bitmap.c:
Functions to create missing bitmaps:
- one function which creates missing bitmaps in page cache, except
the missing one with max offset which it does not put into page cache
as it will be modified very soon.
- one function which the one above calls, and creates bitmaps in page
cache
- one function to execute REDO_BITMAP_NEW_PAGE which uses the second
one above.
storage/maria/ma_blockrec.c:
- when logging REDO_DELETE_ALL, not only 'records' and 'checksum'
has to be reset under log's mutex.
- execution of REDO_INSERT_ROW_BLOBS now checks the dirty pages' list
- execution of UNDO_BULK_INSERT_WITH_REPAIR
storage/maria/ma_blockrec.h:
new functions
storage/maria/ma_check.c:
- table-flush-before-repair is moved to a separate function reused
by maria_sort_index(); syncing is added
- maria_repair() is allowed to re-enable logging only if it is the one
which disabled it.
- "_ma_flush_table_files_after_repair" was a bad name, it's not after
repair now, and it should not sync as we do more changes to the files
shortly after (sync is postponed to when writing the log record)
- REDO_REPAIR record should be written only after all repair
operations (in particular after sorting index in ha_mara::repair())
- close to the end of repair by sort, flushing of pages must happen
also in the non-quick case, to prepare for the sync at end.
- in parallel repair, some page flushes are not needed as done
by initialize_variables_for_repair().
storage/maria/ma_create.c:
Update skip_redo_lsn, create_rename_lsn optionally.
storage/maria/ma_delete_all.c:
Need to sync files at end of maria_delete_all_rows(), if transactional.
storage/maria/ma_extra.c:
During repair, we sometimes call _ma_flush_table_files() (via
_ma_flush_table_files_before_swap()) while there is a WRITE_CACHE.
storage/maria/ma_key_recover.c:
- when we see CLR_END for UNDO_BULK_INSERT_WITH_REPAIR, re-enable
indices.
- fixing bug: _ma_apply_undo_key_delete() parsed UNDO_KEY_DELETE_WITH_ROOT
wrongly, leading to recovery failure
storage/maria/ma_key_recover.h:
new prototype
storage/maria/ma_locking.c:
DBUG_VOID_RETURN missing
storage/maria/ma_loghandler.c:
UNDO for bulk insert with repair, and REDO for creating bitmaps.
LOGREC_FIRST_FREE to not have to change the for() every time we
add a new record type.
storage/maria/ma_loghandler.h:
new UNDO and REDO
storage/maria/ma_open.c:
Move share.kfile.file=kfile up a bit, so that _ma_update_state_lsns()
can get its value, this fixes a bug where LSN_REPAIRED_BY_MARIA_CHK
was not corrected on disk by maria_open().
Store skip_redo_lsn in index' header.
maria_enable_indexes() had a bug for BLOCK_RECORD, where an empty
file has one page, not 0 bytes.
storage/maria/ma_recovery.c:
- Skip a corrupted, missing, or repaired-with-maria_chk, table in
recovery: don't fail, just go to next REDO or UNDO; but if an UNDO
is skipped in UNDO phase we issue warnings.
- Skip REDO|UNDO in REDO phase if <skip_redo_lsn.
- If UNDO phase fails, delete transactions to not make trnman
assert.
- Update skip_redo_lsn when playing REDO_CREATE_TABLE
- Don't record UNDOs for old transactions which we don't know (long_trid==0)
- Bugfix for UNDO_KEY_DELETE_WITH_ROOT (see ma_key_recover.c)
- Execution of UNDO_BULK_INSERT_WITH_REPAIR
- Don't try to find a page number in REDO_DELETE_ALL
- Pieces moved to ma_recovery_util.c
storage/maria/ma_rename.c:
name change
storage/maria/ma_static.c:
I modified layout of the index' header (inserted skip_redo_lsn in its middle)
storage/maria/ma_test2.c:
allow breaking the test towards the end, tests execution of
UNDO_KEY_DELETE_WITH_ROOT
storage/maria/ma_test_recovery.expected:
6 as testflag instead of 4
storage/maria/ma_test_recovery:
Increase the amount of rollback work to do when testing recovery
with ma_test2; this reproduces the UNDO_KEY_DELETE_WITH_ROOT bug.
storage/maria/maria_chk.c:
skip_redo_lsn should be updated too, for consistency.
Write a REDO_REPAIR after all operations (including sort-records)
have been done.
No reason to flush blocks after maria_chk_data_link() and
maria_sort_records(), there is maria_close() in the end.
write_log_record() is a function, to not clutter maria_chk().
storage/maria/maria_def.h:
New member skip_redo_lsn in the state, and comments
storage/maria/maria_pack.c:
skip_redo_lsn should be updated too, for consistency
storage/maria/ma_recovery_util.c:
_ma_redo_not_needed_for_page(), defined in ma_recovery.c, is needed
by ma_blockrec.c; this causes link issues, resolved by putting
_ma_redo_not_needed_for_page() into a new file (so that it is not
in the same file as repair-related objects of ma_recovery.c).
storage/maria/ma_recovery_util.h:
new file
2008-01-17 23:59:32 +01:00
|
|
|
UNDO_ROW_DELETE will want to put the row back to its original position,
|
|
|
|
extending the data file a lot: bitmap page*s* in the hole must be created,
|
|
|
|
or he table would look corrupted.
|
|
|
|
|
|
|
|
We need to log REDOs for bitmap creation, consider: we apply a REDO for a
|
|
|
|
data page, which creates the first data page covered by a new bitmap
|
|
|
|
not yet created. If the data page is flushed but the bitmap page is not and
|
|
|
|
there is a crash, re-execution of the REDO will complain about the zeroed
|
|
|
|
bitmap page (see it as corruption). Thus a REDO is needed to re-create the
|
|
|
|
bitmap.
|
|
|
|
|
|
|
|
@param info Maria handler
|
|
|
|
@param bitmap Bitmap handler
|
|
|
|
@param page Last bitmap page to create
|
|
|
|
|
|
|
|
@note When this function is called this must be true:
|
|
|
|
((page + 1) * bitmap->block_size > info->s->state.state.data_file_length)
|
|
|
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
static my_bool _ma_bitmap_create_missing(MARIA_HA *info,
|
|
|
|
MARIA_FILE_BITMAP *bitmap,
|
|
|
|
pgcache_page_no_t page)
|
|
|
|
{
|
|
|
|
MARIA_SHARE *share= info->s;
|
|
|
|
uint block_size= bitmap->block_size;
|
|
|
|
pgcache_page_no_t from, to;
|
|
|
|
my_off_t data_file_length= share->state.state.data_file_length;
|
|
|
|
DBUG_ENTER("_ma_bitmap_create_missing");
|
|
|
|
|
|
|
|
/* First (in offset order) bitmap page to create */
|
|
|
|
if (data_file_length < block_size)
|
|
|
|
goto err; /* corrupted, should have first bitmap page */
|
2011-07-24 10:25:28 +02:00
|
|
|
if (page * block_size >= share->base.max_data_file_length)
|
|
|
|
{
|
|
|
|
my_errno= HA_ERR_RECORD_FILE_FULL;
|
|
|
|
goto err;
|
|
|
|
}
|
WL#3072 - Maria Recovery
Bulk insert: don't log REDO/UNDO for rows, log one UNDO which will
truncate files; this is an optimization and a bugfix (table was left
half-repaired by crash).
Repair: mark table crashed-on-repair at start, bump skip_redo_lsn at start,
this is easier for recovery (tells it to skip old REDOs or even UNDO
phase) and user (tells it to repair) in case of crash, sync files
in the end.
Recovery skips missing or corrupted table and moves to next record
(in REDO or UNDO phase) to be more robust; warns if happens in UNDO phase.
Bugfix for UNDO_KEY_DELETE_WITH_ROOT (tested in ma_test_recovery)
and maria_enable_indexes().
Create missing bitmaps when needed (there can be more than one to create,
in rare cases), log a record for this.
include/myisamchk.h:
new flag: bulk insert repair mustn't bump create_rename_lsn
mysql-test/lib/mtr_report.pl:
skip normal warning in maria-recovery.test
mysql-test/r/maria-recovery.result:
result: crash before bulk insert is committed, causes proper rollback,
and crash right after OPTIMIZE replaces index file with new index file
leads to table marked corrupted and recovery not failing.
mysql-test/t/maria-recovery.test:
- can't check the table or it would commit the transaction,
but check is made after recovery.
- test of crash before bulk-insert-with-repair is committed
(to see if it is rolled back), and of crash after OPTIMIZE has replaced
index file but not finished all operations (to see if recovery fails -
it used to assert when trying to execute an old REDO on the new
index).
storage/maria/CMakeLists.txt:
new file
storage/maria/Makefile.am:
new file
storage/maria/ha_maria.cc:
- If bulk insert on a transactional table using an index repair:
table is initially empty, so don't log REDO/UNDO for data rows
(optimization), just log an UNDO_BULK_INSERT_WITH_REPAIR
which will, if executed, empty the data and index file. Re-enable
logging in end_bulk_insert().
- write log record for repair operation only after it's fully done,
index sort including (maria_repair*() used to write the log record).
- Adding back file->trn=NULL which was removed by mistake earlier.
storage/maria/ha_maria.h:
new member (see ha_maria.cc)
storage/maria/ma_bitmap.c:
Functions to create missing bitmaps:
- one function which creates missing bitmaps in page cache, except
the missing one with max offset which it does not put into page cache
as it will be modified very soon.
- one function which the one above calls, and creates bitmaps in page
cache
- one function to execute REDO_BITMAP_NEW_PAGE which uses the second
one above.
storage/maria/ma_blockrec.c:
- when logging REDO_DELETE_ALL, not only 'records' and 'checksum'
has to be reset under log's mutex.
- execution of REDO_INSERT_ROW_BLOBS now checks the dirty pages' list
- execution of UNDO_BULK_INSERT_WITH_REPAIR
storage/maria/ma_blockrec.h:
new functions
storage/maria/ma_check.c:
- table-flush-before-repair is moved to a separate function reused
by maria_sort_index(); syncing is added
- maria_repair() is allowed to re-enable logging only if it is the one
which disabled it.
- "_ma_flush_table_files_after_repair" was a bad name, it's not after
repair now, and it should not sync as we do more changes to the files
shortly after (sync is postponed to when writing the log record)
- REDO_REPAIR record should be written only after all repair
operations (in particular after sorting index in ha_mara::repair())
- close to the end of repair by sort, flushing of pages must happen
also in the non-quick case, to prepare for the sync at end.
- in parallel repair, some page flushes are not needed as done
by initialize_variables_for_repair().
storage/maria/ma_create.c:
Update skip_redo_lsn, create_rename_lsn optionally.
storage/maria/ma_delete_all.c:
Need to sync files at end of maria_delete_all_rows(), if transactional.
storage/maria/ma_extra.c:
During repair, we sometimes call _ma_flush_table_files() (via
_ma_flush_table_files_before_swap()) while there is a WRITE_CACHE.
storage/maria/ma_key_recover.c:
- when we see CLR_END for UNDO_BULK_INSERT_WITH_REPAIR, re-enable
indices.
- fixing bug: _ma_apply_undo_key_delete() parsed UNDO_KEY_DELETE_WITH_ROOT
wrongly, leading to recovery failure
storage/maria/ma_key_recover.h:
new prototype
storage/maria/ma_locking.c:
DBUG_VOID_RETURN missing
storage/maria/ma_loghandler.c:
UNDO for bulk insert with repair, and REDO for creating bitmaps.
LOGREC_FIRST_FREE to not have to change the for() every time we
add a new record type.
storage/maria/ma_loghandler.h:
new UNDO and REDO
storage/maria/ma_open.c:
Move share.kfile.file=kfile up a bit, so that _ma_update_state_lsns()
can get its value, this fixes a bug where LSN_REPAIRED_BY_MARIA_CHK
was not corrected on disk by maria_open().
Store skip_redo_lsn in index' header.
maria_enable_indexes() had a bug for BLOCK_RECORD, where an empty
file has one page, not 0 bytes.
storage/maria/ma_recovery.c:
- Skip a corrupted, missing, or repaired-with-maria_chk, table in
recovery: don't fail, just go to next REDO or UNDO; but if an UNDO
is skipped in UNDO phase we issue warnings.
- Skip REDO|UNDO in REDO phase if <skip_redo_lsn.
- If UNDO phase fails, delete transactions to not make trnman
assert.
- Update skip_redo_lsn when playing REDO_CREATE_TABLE
- Don't record UNDOs for old transactions which we don't know (long_trid==0)
- Bugfix for UNDO_KEY_DELETE_WITH_ROOT (see ma_key_recover.c)
- Execution of UNDO_BULK_INSERT_WITH_REPAIR
- Don't try to find a page number in REDO_DELETE_ALL
- Pieces moved to ma_recovery_util.c
storage/maria/ma_rename.c:
name change
storage/maria/ma_static.c:
I modified layout of the index' header (inserted skip_redo_lsn in its middle)
storage/maria/ma_test2.c:
allow breaking the test towards the end, tests execution of
UNDO_KEY_DELETE_WITH_ROOT
storage/maria/ma_test_recovery.expected:
6 as testflag instead of 4
storage/maria/ma_test_recovery:
Increase the amount of rollback work to do when testing recovery
with ma_test2; this reproduces the UNDO_KEY_DELETE_WITH_ROOT bug.
storage/maria/maria_chk.c:
skip_redo_lsn should be updated too, for consistency.
Write a REDO_REPAIR after all operations (including sort-records)
have been done.
No reason to flush blocks after maria_chk_data_link() and
maria_sort_records(), there is maria_close() in the end.
write_log_record() is a function, to not clutter maria_chk().
storage/maria/maria_def.h:
New member skip_redo_lsn in the state, and comments
storage/maria/maria_pack.c:
skip_redo_lsn should be updated too, for consistency
storage/maria/ma_recovery_util.c:
_ma_redo_not_needed_for_page(), defined in ma_recovery.c, is needed
by ma_blockrec.c; this causes link issues, resolved by putting
_ma_redo_not_needed_for_page() into a new file (so that it is not
in the same file as repair-related objects of ma_recovery.c).
storage/maria/ma_recovery_util.h:
new file
2008-01-17 23:59:32 +01:00
|
|
|
|
|
|
|
from= (data_file_length / block_size - 1) / bitmap->pages_covered + 1;
|
|
|
|
from*= bitmap->pages_covered;
|
|
|
|
/*
|
|
|
|
page>=from because:
|
|
|
|
(page + 1) * bs > dfl, and page == k * pc so:
|
|
|
|
(k * pc + 1) * bs > dfl; k * pc + 1 > dfl / bs; k * pc > dfl / bs - 1
|
|
|
|
k > (dfl / bs - 1) / pc; k >= (dfl / bs - 1) / pc + 1
|
|
|
|
k * pc >= ((dfl / bs - 1) / pc + 1) * pc == from.
|
|
|
|
*/
|
|
|
|
DBUG_ASSERT(page >= from);
|
|
|
|
|
|
|
|
if (share->now_transactional)
|
|
|
|
{
|
|
|
|
LSN lsn;
|
|
|
|
uchar log_data[FILEID_STORE_SIZE + PAGE_STORE_SIZE * 2];
|
Injecting more "const" declarations into code which does not change
pointed data.
I ran gcc -Wcast-qual on storage/maria, this identified un-needed casts,
a couple of functions which said they had a const parameter though
they changed the pointed content! This is fixed here. Some suspicious
places receive a comment.
The original intention of running -Wcast-qual was to find what code
changes R-tree keys: I added const words, but hidden casts
like those of int2store (casts target to (uint16*)) removed const
checking; -Wcast-qual helped find those hidden casts.
Log handler does not change the content pointed by LEX_STRING::str it
receives, so we now use a struct which has a const inside, to emphasize
this and be able to pass "const uchar*" buffers to log handler
without fear of their content being changed by it.
One-line fix for a merge glitch (when merging from MyISAM).
include/m_string.h:
As Maria's log handler uses LEX_STRING but never changes the content
pointed by LEX_STRING::str, and assigns uchar* into this member most
of the time, we introduce a new struct LEX_CUSTRING
(C const U unsigned) for the log handler.
include/my_global.h:
In macros which read pointed content: use const pointers so that
gcc -Wcast-qual does not warn about casting a const pointer to non-const.
include/my_handler.h:
In macros which read pointed content: use const pointers so that
gcc -Wcast-qual does not warn about casting a const pointer to non-const.
ha_find_null() does not change *a.
include/my_sys.h:
insert_dynamic() does not change *element.
include/myisampack.h:
In macros which read pointed content: use const pointers so that
gcc -Wcast-qual does not warn about casting a const pointer to non-const.
mysys/array.c:
insert_dynamic() does not change *element
mysys/my_handler.c:
ha_find_null() does not change *a
storage/maria/ma_bitmap.c:
Log handler receives const strings now
storage/maria/ma_blockrec.c:
Log handler receives const strings now.
_ma_apply_undo_row_delete/update() do change *header.
storage/maria/ma_blockrec.h:
correct prototype
storage/maria/ma_check.c:
Log handler receives const strings now. Un-needed casts
storage/maria/ma_checkpoint.c:
Log handler receives const strings now
storage/maria/ma_checksum.c:
unneeded cast
storage/maria/ma_commit.c:
Log handler receives const strings now
storage/maria/ma_create.c:
Log handler receives const strings now
storage/maria/ma_dbug.c:
fixing warning of gcc -Wcast-qual
storage/maria/ma_delete.c:
Log handler receives const strings now
storage/maria/ma_delete_all.c:
Log handler receives const strings now
storage/maria/ma_delete_table.c:
Log handler receives const strings now
storage/maria/ma_dynrec.c:
fixing some warnings of gcc -Wcast-qual. Unneeded casts removed.
Comment about function which lies.
storage/maria/ma_ft_parser.c:
fix for warnings of gcc -Wcast-qual, removing unneeded casts
storage/maria/ma_ft_update.c:
less casts, comment
storage/maria/ma_key.c:
less casts, stay const (warnings of gcc -Wcast-qual)
storage/maria/ma_key_recover.c:
Log handler receives const strings now
storage/maria/ma_loghandler.c:
Log handler receives const strings now
storage/maria/ma_loghandler.h:
Log handler receives const strings now
storage/maria/ma_loghandler_lsn.h:
In macros which read pointed content: use const pointers so that
gcc -Wcast-qual does not warn about casting a const pointer to non-const.
storage/maria/ma_page.c:
Log handler receives const strings now; more const
storage/maria/ma_recovery.c:
Log handler receives const strings now
storage/maria/ma_rename.c:
Log handler receives const strings now
storage/maria/ma_rt_index.c:
more const, to emphasize that functions don't change pointed content.
best_key= NULL was forgotten during merge from MyISAM a few days ago,
was causing a Valgrind warning
storage/maria/ma_rt_index.h:
new proto
storage/maria/ma_rt_key.c:
more const
storage/maria/ma_rt_key.h:
new proto
storage/maria/ma_rt_mbr.c:
more const for functions which deserve it
storage/maria/ma_rt_mbr.h:
new prototype
storage/maria/ma_rt_split.c:
make const what is not changed.
storage/maria/ma_search.c:
un-needed casts, more const
storage/maria/ma_sp_key.c:
more const
storage/maria/ma_unique.c:
un-needed casts.
storage/maria/ma_write.c:
Log handler receives const strings now
storage/maria/maria_def.h:
some more const
storage/maria/unittest/ma_test_loghandler-t.c:
Log handler receives const strings now
storage/maria/unittest/ma_test_loghandler_first_lsn-t.c:
Log handler receives const strings now
storage/maria/unittest/ma_test_loghandler_max_lsn-t.c:
Log handler receives const strings now
storage/maria/unittest/ma_test_loghandler_multigroup-t.c:
Log handler receives const strings now
storage/maria/unittest/ma_test_loghandler_multithread-t.c:
Log handler receives const strings now
storage/maria/unittest/ma_test_loghandler_noflush-t.c:
Log handler receives const strings now
storage/maria/unittest/ma_test_loghandler_nologs-t.c:
Log handler receives const strings now
storage/maria/unittest/ma_test_loghandler_pagecache-t.c:
Log handler receives const strings now
storage/maria/unittest/ma_test_loghandler_purge-t.c:
Log handler receives const strings now
2008-04-03 15:40:25 +02:00
|
|
|
LEX_CUSTRING log_array[TRANSLOG_INTERNAL_PARTS + 1];
|
WL#3072 - Maria Recovery
Bulk insert: don't log REDO/UNDO for rows, log one UNDO which will
truncate files; this is an optimization and a bugfix (table was left
half-repaired by crash).
Repair: mark table crashed-on-repair at start, bump skip_redo_lsn at start,
this is easier for recovery (tells it to skip old REDOs or even UNDO
phase) and user (tells it to repair) in case of crash, sync files
in the end.
Recovery skips missing or corrupted table and moves to next record
(in REDO or UNDO phase) to be more robust; warns if happens in UNDO phase.
Bugfix for UNDO_KEY_DELETE_WITH_ROOT (tested in ma_test_recovery)
and maria_enable_indexes().
Create missing bitmaps when needed (there can be more than one to create,
in rare cases), log a record for this.
include/myisamchk.h:
new flag: bulk insert repair mustn't bump create_rename_lsn
mysql-test/lib/mtr_report.pl:
skip normal warning in maria-recovery.test
mysql-test/r/maria-recovery.result:
result: crash before bulk insert is committed, causes proper rollback,
and crash right after OPTIMIZE replaces index file with new index file
leads to table marked corrupted and recovery not failing.
mysql-test/t/maria-recovery.test:
- can't check the table or it would commit the transaction,
but check is made after recovery.
- test of crash before bulk-insert-with-repair is committed
(to see if it is rolled back), and of crash after OPTIMIZE has replaced
index file but not finished all operations (to see if recovery fails -
it used to assert when trying to execute an old REDO on the new
index).
storage/maria/CMakeLists.txt:
new file
storage/maria/Makefile.am:
new file
storage/maria/ha_maria.cc:
- If bulk insert on a transactional table using an index repair:
table is initially empty, so don't log REDO/UNDO for data rows
(optimization), just log an UNDO_BULK_INSERT_WITH_REPAIR
which will, if executed, empty the data and index file. Re-enable
logging in end_bulk_insert().
- write log record for repair operation only after it's fully done,
index sort including (maria_repair*() used to write the log record).
- Adding back file->trn=NULL which was removed by mistake earlier.
storage/maria/ha_maria.h:
new member (see ha_maria.cc)
storage/maria/ma_bitmap.c:
Functions to create missing bitmaps:
- one function which creates missing bitmaps in page cache, except
the missing one with max offset which it does not put into page cache
as it will be modified very soon.
- one function which the one above calls, and creates bitmaps in page
cache
- one function to execute REDO_BITMAP_NEW_PAGE which uses the second
one above.
storage/maria/ma_blockrec.c:
- when logging REDO_DELETE_ALL, not only 'records' and 'checksum'
has to be reset under log's mutex.
- execution of REDO_INSERT_ROW_BLOBS now checks the dirty pages' list
- execution of UNDO_BULK_INSERT_WITH_REPAIR
storage/maria/ma_blockrec.h:
new functions
storage/maria/ma_check.c:
- table-flush-before-repair is moved to a separate function reused
by maria_sort_index(); syncing is added
- maria_repair() is allowed to re-enable logging only if it is the one
which disabled it.
- "_ma_flush_table_files_after_repair" was a bad name, it's not after
repair now, and it should not sync as we do more changes to the files
shortly after (sync is postponed to when writing the log record)
- REDO_REPAIR record should be written only after all repair
operations (in particular after sorting index in ha_mara::repair())
- close to the end of repair by sort, flushing of pages must happen
also in the non-quick case, to prepare for the sync at end.
- in parallel repair, some page flushes are not needed as done
by initialize_variables_for_repair().
storage/maria/ma_create.c:
Update skip_redo_lsn, create_rename_lsn optionally.
storage/maria/ma_delete_all.c:
Need to sync files at end of maria_delete_all_rows(), if transactional.
storage/maria/ma_extra.c:
During repair, we sometimes call _ma_flush_table_files() (via
_ma_flush_table_files_before_swap()) while there is a WRITE_CACHE.
storage/maria/ma_key_recover.c:
- when we see CLR_END for UNDO_BULK_INSERT_WITH_REPAIR, re-enable
indices.
- fixing bug: _ma_apply_undo_key_delete() parsed UNDO_KEY_DELETE_WITH_ROOT
wrongly, leading to recovery failure
storage/maria/ma_key_recover.h:
new prototype
storage/maria/ma_locking.c:
DBUG_VOID_RETURN missing
storage/maria/ma_loghandler.c:
UNDO for bulk insert with repair, and REDO for creating bitmaps.
LOGREC_FIRST_FREE to not have to change the for() every time we
add a new record type.
storage/maria/ma_loghandler.h:
new UNDO and REDO
storage/maria/ma_open.c:
Move share.kfile.file=kfile up a bit, so that _ma_update_state_lsns()
can get its value, this fixes a bug where LSN_REPAIRED_BY_MARIA_CHK
was not corrected on disk by maria_open().
Store skip_redo_lsn in index' header.
maria_enable_indexes() had a bug for BLOCK_RECORD, where an empty
file has one page, not 0 bytes.
storage/maria/ma_recovery.c:
- Skip a corrupted, missing, or repaired-with-maria_chk, table in
recovery: don't fail, just go to next REDO or UNDO; but if an UNDO
is skipped in UNDO phase we issue warnings.
- Skip REDO|UNDO in REDO phase if <skip_redo_lsn.
- If UNDO phase fails, delete transactions to not make trnman
assert.
- Update skip_redo_lsn when playing REDO_CREATE_TABLE
- Don't record UNDOs for old transactions which we don't know (long_trid==0)
- Bugfix for UNDO_KEY_DELETE_WITH_ROOT (see ma_key_recover.c)
- Execution of UNDO_BULK_INSERT_WITH_REPAIR
- Don't try to find a page number in REDO_DELETE_ALL
- Pieces moved to ma_recovery_util.c
storage/maria/ma_rename.c:
name change
storage/maria/ma_static.c:
I modified layout of the index' header (inserted skip_redo_lsn in its middle)
storage/maria/ma_test2.c:
allow breaking the test towards the end, tests execution of
UNDO_KEY_DELETE_WITH_ROOT
storage/maria/ma_test_recovery.expected:
6 as testflag instead of 4
storage/maria/ma_test_recovery:
Increase the amount of rollback work to do when testing recovery
with ma_test2; this reproduces the UNDO_KEY_DELETE_WITH_ROOT bug.
storage/maria/maria_chk.c:
skip_redo_lsn should be updated too, for consistency.
Write a REDO_REPAIR after all operations (including sort-records)
have been done.
No reason to flush blocks after maria_chk_data_link() and
maria_sort_records(), there is maria_close() in the end.
write_log_record() is a function, to not clutter maria_chk().
storage/maria/maria_def.h:
New member skip_redo_lsn in the state, and comments
storage/maria/maria_pack.c:
skip_redo_lsn should be updated too, for consistency
storage/maria/ma_recovery_util.c:
_ma_redo_not_needed_for_page(), defined in ma_recovery.c, is needed
by ma_blockrec.c; this causes link issues, resolved by putting
_ma_redo_not_needed_for_page() into a new file (so that it is not
in the same file as repair-related objects of ma_recovery.c).
storage/maria/ma_recovery_util.h:
new file
2008-01-17 23:59:32 +01:00
|
|
|
page_store(log_data + FILEID_STORE_SIZE, from);
|
|
|
|
page_store(log_data + FILEID_STORE_SIZE + PAGE_STORE_SIZE, page);
|
Injecting more "const" declarations into code which does not change
pointed data.
I ran gcc -Wcast-qual on storage/maria, this identified un-needed casts,
a couple of functions which said they had a const parameter though
they changed the pointed content! This is fixed here. Some suspicious
places receive a comment.
The original intention of running -Wcast-qual was to find what code
changes R-tree keys: I added const words, but hidden casts
like those of int2store (casts target to (uint16*)) removed const
checking; -Wcast-qual helped find those hidden casts.
Log handler does not change the content pointed by LEX_STRING::str it
receives, so we now use a struct which has a const inside, to emphasize
this and be able to pass "const uchar*" buffers to log handler
without fear of their content being changed by it.
One-line fix for a merge glitch (when merging from MyISAM).
include/m_string.h:
As Maria's log handler uses LEX_STRING but never changes the content
pointed by LEX_STRING::str, and assigns uchar* into this member most
of the time, we introduce a new struct LEX_CUSTRING
(C const U unsigned) for the log handler.
include/my_global.h:
In macros which read pointed content: use const pointers so that
gcc -Wcast-qual does not warn about casting a const pointer to non-const.
include/my_handler.h:
In macros which read pointed content: use const pointers so that
gcc -Wcast-qual does not warn about casting a const pointer to non-const.
ha_find_null() does not change *a.
include/my_sys.h:
insert_dynamic() does not change *element.
include/myisampack.h:
In macros which read pointed content: use const pointers so that
gcc -Wcast-qual does not warn about casting a const pointer to non-const.
mysys/array.c:
insert_dynamic() does not change *element
mysys/my_handler.c:
ha_find_null() does not change *a
storage/maria/ma_bitmap.c:
Log handler receives const strings now
storage/maria/ma_blockrec.c:
Log handler receives const strings now.
_ma_apply_undo_row_delete/update() do change *header.
storage/maria/ma_blockrec.h:
correct prototype
storage/maria/ma_check.c:
Log handler receives const strings now. Un-needed casts
storage/maria/ma_checkpoint.c:
Log handler receives const strings now
storage/maria/ma_checksum.c:
unneeded cast
storage/maria/ma_commit.c:
Log handler receives const strings now
storage/maria/ma_create.c:
Log handler receives const strings now
storage/maria/ma_dbug.c:
fixing warning of gcc -Wcast-qual
storage/maria/ma_delete.c:
Log handler receives const strings now
storage/maria/ma_delete_all.c:
Log handler receives const strings now
storage/maria/ma_delete_table.c:
Log handler receives const strings now
storage/maria/ma_dynrec.c:
fixing some warnings of gcc -Wcast-qual. Unneeded casts removed.
Comment about function which lies.
storage/maria/ma_ft_parser.c:
fix for warnings of gcc -Wcast-qual, removing unneeded casts
storage/maria/ma_ft_update.c:
less casts, comment
storage/maria/ma_key.c:
less casts, stay const (warnings of gcc -Wcast-qual)
storage/maria/ma_key_recover.c:
Log handler receives const strings now
storage/maria/ma_loghandler.c:
Log handler receives const strings now
storage/maria/ma_loghandler.h:
Log handler receives const strings now
storage/maria/ma_loghandler_lsn.h:
In macros which read pointed content: use const pointers so that
gcc -Wcast-qual does not warn about casting a const pointer to non-const.
storage/maria/ma_page.c:
Log handler receives const strings now; more const
storage/maria/ma_recovery.c:
Log handler receives const strings now
storage/maria/ma_rename.c:
Log handler receives const strings now
storage/maria/ma_rt_index.c:
more const, to emphasize that functions don't change pointed content.
best_key= NULL was forgotten during merge from MyISAM a few days ago,
was causing a Valgrind warning
storage/maria/ma_rt_index.h:
new proto
storage/maria/ma_rt_key.c:
more const
storage/maria/ma_rt_key.h:
new proto
storage/maria/ma_rt_mbr.c:
more const for functions which deserve it
storage/maria/ma_rt_mbr.h:
new prototype
storage/maria/ma_rt_split.c:
make const what is not changed.
storage/maria/ma_search.c:
un-needed casts, more const
storage/maria/ma_sp_key.c:
more const
storage/maria/ma_unique.c:
un-needed casts.
storage/maria/ma_write.c:
Log handler receives const strings now
storage/maria/maria_def.h:
some more const
storage/maria/unittest/ma_test_loghandler-t.c:
Log handler receives const strings now
storage/maria/unittest/ma_test_loghandler_first_lsn-t.c:
Log handler receives const strings now
storage/maria/unittest/ma_test_loghandler_max_lsn-t.c:
Log handler receives const strings now
storage/maria/unittest/ma_test_loghandler_multigroup-t.c:
Log handler receives const strings now
storage/maria/unittest/ma_test_loghandler_multithread-t.c:
Log handler receives const strings now
storage/maria/unittest/ma_test_loghandler_noflush-t.c:
Log handler receives const strings now
storage/maria/unittest/ma_test_loghandler_nologs-t.c:
Log handler receives const strings now
storage/maria/unittest/ma_test_loghandler_pagecache-t.c:
Log handler receives const strings now
storage/maria/unittest/ma_test_loghandler_purge-t.c:
Log handler receives const strings now
2008-04-03 15:40:25 +02:00
|
|
|
log_array[TRANSLOG_INTERNAL_PARTS + 0].str= log_data;
|
WL#3072 - Maria Recovery
Bulk insert: don't log REDO/UNDO for rows, log one UNDO which will
truncate files; this is an optimization and a bugfix (table was left
half-repaired by crash).
Repair: mark table crashed-on-repair at start, bump skip_redo_lsn at start,
this is easier for recovery (tells it to skip old REDOs or even UNDO
phase) and user (tells it to repair) in case of crash, sync files
in the end.
Recovery skips missing or corrupted table and moves to next record
(in REDO or UNDO phase) to be more robust; warns if happens in UNDO phase.
Bugfix for UNDO_KEY_DELETE_WITH_ROOT (tested in ma_test_recovery)
and maria_enable_indexes().
Create missing bitmaps when needed (there can be more than one to create,
in rare cases), log a record for this.
include/myisamchk.h:
new flag: bulk insert repair mustn't bump create_rename_lsn
mysql-test/lib/mtr_report.pl:
skip normal warning in maria-recovery.test
mysql-test/r/maria-recovery.result:
result: crash before bulk insert is committed, causes proper rollback,
and crash right after OPTIMIZE replaces index file with new index file
leads to table marked corrupted and recovery not failing.
mysql-test/t/maria-recovery.test:
- can't check the table or it would commit the transaction,
but check is made after recovery.
- test of crash before bulk-insert-with-repair is committed
(to see if it is rolled back), and of crash after OPTIMIZE has replaced
index file but not finished all operations (to see if recovery fails -
it used to assert when trying to execute an old REDO on the new
index).
storage/maria/CMakeLists.txt:
new file
storage/maria/Makefile.am:
new file
storage/maria/ha_maria.cc:
- If bulk insert on a transactional table using an index repair:
table is initially empty, so don't log REDO/UNDO for data rows
(optimization), just log an UNDO_BULK_INSERT_WITH_REPAIR
which will, if executed, empty the data and index file. Re-enable
logging in end_bulk_insert().
- write log record for repair operation only after it's fully done,
index sort including (maria_repair*() used to write the log record).
- Adding back file->trn=NULL which was removed by mistake earlier.
storage/maria/ha_maria.h:
new member (see ha_maria.cc)
storage/maria/ma_bitmap.c:
Functions to create missing bitmaps:
- one function which creates missing bitmaps in page cache, except
the missing one with max offset which it does not put into page cache
as it will be modified very soon.
- one function which the one above calls, and creates bitmaps in page
cache
- one function to execute REDO_BITMAP_NEW_PAGE which uses the second
one above.
storage/maria/ma_blockrec.c:
- when logging REDO_DELETE_ALL, not only 'records' and 'checksum'
has to be reset under log's mutex.
- execution of REDO_INSERT_ROW_BLOBS now checks the dirty pages' list
- execution of UNDO_BULK_INSERT_WITH_REPAIR
storage/maria/ma_blockrec.h:
new functions
storage/maria/ma_check.c:
- table-flush-before-repair is moved to a separate function reused
by maria_sort_index(); syncing is added
- maria_repair() is allowed to re-enable logging only if it is the one
which disabled it.
- "_ma_flush_table_files_after_repair" was a bad name, it's not after
repair now, and it should not sync as we do more changes to the files
shortly after (sync is postponed to when writing the log record)
- REDO_REPAIR record should be written only after all repair
operations (in particular after sorting index in ha_mara::repair())
- close to the end of repair by sort, flushing of pages must happen
also in the non-quick case, to prepare for the sync at end.
- in parallel repair, some page flushes are not needed as done
by initialize_variables_for_repair().
storage/maria/ma_create.c:
Update skip_redo_lsn, create_rename_lsn optionally.
storage/maria/ma_delete_all.c:
Need to sync files at end of maria_delete_all_rows(), if transactional.
storage/maria/ma_extra.c:
During repair, we sometimes call _ma_flush_table_files() (via
_ma_flush_table_files_before_swap()) while there is a WRITE_CACHE.
storage/maria/ma_key_recover.c:
- when we see CLR_END for UNDO_BULK_INSERT_WITH_REPAIR, re-enable
indices.
- fixing bug: _ma_apply_undo_key_delete() parsed UNDO_KEY_DELETE_WITH_ROOT
wrongly, leading to recovery failure
storage/maria/ma_key_recover.h:
new prototype
storage/maria/ma_locking.c:
DBUG_VOID_RETURN missing
storage/maria/ma_loghandler.c:
UNDO for bulk insert with repair, and REDO for creating bitmaps.
LOGREC_FIRST_FREE to not have to change the for() every time we
add a new record type.
storage/maria/ma_loghandler.h:
new UNDO and REDO
storage/maria/ma_open.c:
Move share.kfile.file=kfile up a bit, so that _ma_update_state_lsns()
can get its value, this fixes a bug where LSN_REPAIRED_BY_MARIA_CHK
was not corrected on disk by maria_open().
Store skip_redo_lsn in index' header.
maria_enable_indexes() had a bug for BLOCK_RECORD, where an empty
file has one page, not 0 bytes.
storage/maria/ma_recovery.c:
- Skip a corrupted, missing, or repaired-with-maria_chk, table in
recovery: don't fail, just go to next REDO or UNDO; but if an UNDO
is skipped in UNDO phase we issue warnings.
- Skip REDO|UNDO in REDO phase if <skip_redo_lsn.
- If UNDO phase fails, delete transactions to not make trnman
assert.
- Update skip_redo_lsn when playing REDO_CREATE_TABLE
- Don't record UNDOs for old transactions which we don't know (long_trid==0)
- Bugfix for UNDO_KEY_DELETE_WITH_ROOT (see ma_key_recover.c)
- Execution of UNDO_BULK_INSERT_WITH_REPAIR
- Don't try to find a page number in REDO_DELETE_ALL
- Pieces moved to ma_recovery_util.c
storage/maria/ma_rename.c:
name change
storage/maria/ma_static.c:
I modified layout of the index' header (inserted skip_redo_lsn in its middle)
storage/maria/ma_test2.c:
allow breaking the test towards the end, tests execution of
UNDO_KEY_DELETE_WITH_ROOT
storage/maria/ma_test_recovery.expected:
6 as testflag instead of 4
storage/maria/ma_test_recovery:
Increase the amount of rollback work to do when testing recovery
with ma_test2; this reproduces the UNDO_KEY_DELETE_WITH_ROOT bug.
storage/maria/maria_chk.c:
skip_redo_lsn should be updated too, for consistency.
Write a REDO_REPAIR after all operations (including sort-records)
have been done.
No reason to flush blocks after maria_chk_data_link() and
maria_sort_records(), there is maria_close() in the end.
write_log_record() is a function, to not clutter maria_chk().
storage/maria/maria_def.h:
New member skip_redo_lsn in the state, and comments
storage/maria/maria_pack.c:
skip_redo_lsn should be updated too, for consistency
storage/maria/ma_recovery_util.c:
_ma_redo_not_needed_for_page(), defined in ma_recovery.c, is needed
by ma_blockrec.c; this causes link issues, resolved by putting
_ma_redo_not_needed_for_page() into a new file (so that it is not
in the same file as repair-related objects of ma_recovery.c).
storage/maria/ma_recovery_util.h:
new file
2008-01-17 23:59:32 +01:00
|
|
|
log_array[TRANSLOG_INTERNAL_PARTS + 0].length= sizeof(log_data);
|
|
|
|
/*
|
|
|
|
We don't use info->trn so that this REDO is always executed even though
|
|
|
|
the UNDO does not reach disk due to crash. This is also consistent with
|
|
|
|
the fact that the new bitmap pages are not pinned.
|
|
|
|
*/
|
|
|
|
if (translog_write_record(&lsn, LOGREC_REDO_BITMAP_NEW_PAGE,
|
|
|
|
&dummy_transaction_object, info,
|
|
|
|
(translog_size_t)sizeof(log_data),
|
|
|
|
TRANSLOG_INTERNAL_PARTS + 1, log_array,
|
|
|
|
log_data, NULL))
|
|
|
|
goto err;
|
|
|
|
/*
|
|
|
|
No need to flush the log: the bitmap pages we are going to create will
|
|
|
|
flush it when they go to disk.
|
|
|
|
*/
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
Last bitmap page. It has special creation: will go to the page cache
|
|
|
|
only later as we are going to modify it very soon.
|
|
|
|
*/
|
|
|
|
bzero(bitmap->map, bitmap->block_size);
|
|
|
|
bitmap->used_size= 0;
|
|
|
|
#ifndef DBUG_OFF
|
|
|
|
memcpy(bitmap->map + bitmap->block_size, bitmap->map, bitmap->block_size);
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/* Last bitmap page to create before 'page' */
|
|
|
|
DBUG_ASSERT(page >= bitmap->pages_covered);
|
|
|
|
to= page - bitmap->pages_covered;
|
|
|
|
/*
|
|
|
|
In run-time situations, from>=to is always false, i.e. we always create
|
|
|
|
one bitmap at a time ('page').
|
|
|
|
*/
|
|
|
|
if ((from <= to) &&
|
|
|
|
_ma_bitmap_create_missing_into_pagecache(share, bitmap, from, to,
|
|
|
|
bitmap->map))
|
|
|
|
goto err;
|
|
|
|
|
|
|
|
share->state.state.data_file_length= (page + 1) * bitmap->block_size;
|
|
|
|
|
|
|
|
DBUG_RETURN(FALSE);
|
|
|
|
err:
|
|
|
|
DBUG_RETURN(TRUE);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
my_bool _ma_apply_redo_bitmap_new_page(MARIA_HA *info,
|
|
|
|
LSN lsn __attribute__ ((unused)),
|
|
|
|
const uchar *header)
|
|
|
|
{
|
|
|
|
MARIA_SHARE *share= info->s;
|
|
|
|
MARIA_FILE_BITMAP *bitmap= &share->bitmap;
|
|
|
|
my_bool error;
|
|
|
|
pgcache_page_no_t from, to, min_from;
|
|
|
|
DBUG_ENTER("_ma_apply_redo_bitmap_new_page");
|
|
|
|
|
|
|
|
from= page_korr(header);
|
|
|
|
to= page_korr(header + PAGE_STORE_SIZE);
|
|
|
|
DBUG_PRINT("info", ("from: %lu to: %lu", (ulong)from, (ulong)to));
|
|
|
|
if ((from > to) ||
|
|
|
|
(from % bitmap->pages_covered) != 0 ||
|
|
|
|
(to % bitmap->pages_covered) != 0)
|
|
|
|
{
|
|
|
|
error= TRUE; /* corrupted log record */
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
|
|
|
min_from= (share->state.state.data_file_length / bitmap->block_size - 1) /
|
|
|
|
bitmap->pages_covered + 1;
|
|
|
|
min_from*= bitmap->pages_covered;
|
|
|
|
if (from < min_from)
|
|
|
|
{
|
|
|
|
DBUG_PRINT("info", ("overwrite bitmap pages from %lu", (ulong)min_from));
|
|
|
|
/*
|
|
|
|
We have to overwrite. It could be that there was a bitmap page in
|
|
|
|
memory, covering a data page which went to disk, then crash: the
|
|
|
|
bitmap page is now full of zeros and is ==min_from, we have to overwrite
|
|
|
|
it with correct checksum.
|
|
|
|
*/
|
|
|
|
}
|
|
|
|
share->state.changed|= STATE_CHANGED;
|
|
|
|
bzero(info->buff, bitmap->block_size);
|
|
|
|
if (!(error=
|
|
|
|
_ma_bitmap_create_missing_into_pagecache(share, bitmap, from, to,
|
|
|
|
info->buff)))
|
|
|
|
share->state.state.data_file_length= (to + 1) * bitmap->block_size;
|
|
|
|
|
|
|
|
err:
|
|
|
|
DBUG_RETURN(error);
|
|
|
|
}
|