mirror of
https://github.com/MariaDB/server.git
synced 2025-01-16 20:12:31 +01:00
2fcff8988a
command-line" and BUG#34062 "Maria table corruption on master". Use 5 bytes (instead of 4) to store page's number in the checkpoint record, to allow bigger table (1PB with maria-block-size=1kB). Help pushbuild not run out of memory by moving the portion of maria-recovery.test which generates lots of data into a -big.test. mysql-test/r/maria-recovery.result: result moved mysql-test/t/maria-recovery.test: piece which generates much data moved to maria-recovery-big.test mysys/my_pread.c: To fix BUG#34062, where a 1.1TB file was generated due to a wrong pwrite offset, it was useful to not lose precision on 'offset' in DBUG_PRINT, so that the crazy value is visible. mysys/my_read.c: To fix BUG#34062, where a 1.1TB file was generated due to a wrong pwrite offset, it was useful to not lose precision on 'offset' in DBUG_PRINT, so that the crazy value is visible. mysys/my_write.c: To fix BUG#34062, where a 1.1TB file was generated due to a wrong pwrite offset, it was useful to not lose precision on 'offset' in DBUG_PRINT, so that the crazy value is visible. storage/maria/ha_maria.cc: When starting a bulk insert, we throw away dirty index pages from the cache. Unique (non disabled) key insertions thus read out-of-date pages from the disk leading to BUG#34062 "Maria table corruption on master": a DELETE in procedure viewer_sp() had deleted all rows of viewer_tbl2 one by one, putting index page 1 into key_del; that page was thrown away at start of INSERT SELECT, then the INSERT SELECT needed a page to insert keys, looked at key_del, found 1, read page 1 from disk, and its out-of-date content was used to set the new value of key_del (crazy value of 1TB), then a later insertion needed another index page, tried to read page at this crazy offset and failed, leading to corruption mark. The fix is to destroy out-of-date pages and make the state consistent with that, i.e. call maria_delete_all_rows(). storage/maria/ma_blockrec.c: Special hook for UNDO_BULK_INSERT storage/maria/ma_blockrec.h: special hook for UNDO_BULK_INSERT storage/maria/ma_check.c: Fix for BUG#34114 "maria_chk reports false error when several tables on command-line": if the Nth (on the command line) table was BLOCK_RECORD it would start checks by using the param->record_checksum computed by checks of table N-1. storage/maria/ma_delete_all.c: comment storage/maria/ma_loghandler.c: special hook for UNDO_BULK_INSERT storage/maria/ma_page.c: comment storage/maria/ma_pagecache.c: page number is 5 bytes in checkpoint record now (allows bigger tables) storage/maria/ma_recovery.c: page number is 5 bytes in checkpoint record now storage/maria/ma_recovery_util.c: page number is 5 bytes now storage/maria/ma_write.c: typo mysql-test/r/maria-recovery-big.result: result is correct mysql-test/t/maria-recovery-big-master.opt: usual options for recovery tests mysql-test/t/maria-recovery-big.test: Moving out the big blob test to a -big test (it exhausts memory when using /dev/shm on certain machines)
138 lines
4.4 KiB
C
138 lines
4.4 KiB
C
/* Copyright (C) 2006,2007,2008 MySQL AB
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation; version 2 of the License.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; if not, write to the Free Software
|
|
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
|
|
|
|
/*
|
|
Q: Why isn't ma_recovery_util.c simply moved to ma_recovery.c ?
|
|
|
|
A: ma_recovery.c, because it invokes objects from ma_check.c (like
|
|
maria_chk_init()) causes the following problem:
|
|
if a source file a.c of a program invokes a function defined in
|
|
ma_recovery.c, then a.o depends on ma_recovery.o which depends on
|
|
ma_check.o: linker thus brings in ma_check.o. That brings in the
|
|
dependencies of ma_check.o which are definitions of _ma_check_print_info()
|
|
etc; if a.o does not define them then the ones of ha_maria.o are used
|
|
i.e. ha_maria.o is linked into the program, and this brings in dependencies
|
|
of ha_maria.o on mysqld.o into the program's linking which thus fails, as
|
|
the program is not linked with mysqld.o.
|
|
Thus, while several functions defined in ma_recovery.c could be useful to
|
|
other files, they cannot be used by them.
|
|
So we are going to gradually move a great share of ma_recovery.c's exported
|
|
functions into the present file, to isolate the problematic components and
|
|
avoid the problem.
|
|
*/
|
|
|
|
#include "maria_def.h"
|
|
|
|
HASH all_dirty_pages;
|
|
struct st_dirty_page /* used only in the REDO phase */
|
|
{
|
|
uint64 file_and_page_id;
|
|
LSN rec_lsn;
|
|
};
|
|
/*
|
|
LSN after which dirty pages list does not apply. Can be slightly before
|
|
when ma_checkpoint_execute() started.
|
|
*/
|
|
LSN checkpoint_start= LSN_IMPOSSIBLE;
|
|
|
|
/** @todo looks like duplicate of recovery_message_printed */
|
|
my_bool procent_printed;
|
|
FILE *tracef; /**< trace file for debugging */
|
|
|
|
|
|
/** @brief Prints to a trace file if it is not NULL */
|
|
void tprint(FILE *trace_file __attribute__ ((unused)),
|
|
const char *format __attribute__ ((unused)), ...)
|
|
{
|
|
va_list args;
|
|
va_start(args, format);
|
|
DBUG_PRINT("info", ("%s", format));
|
|
if (trace_file != NULL)
|
|
{
|
|
if (procent_printed)
|
|
{
|
|
procent_printed= 0;
|
|
fputc('\n', trace_file);
|
|
}
|
|
vfprintf(trace_file, format, args);
|
|
}
|
|
va_end(args);
|
|
}
|
|
|
|
|
|
void eprint(FILE *trace_file __attribute__ ((unused)),
|
|
const char *format __attribute__ ((unused)), ...)
|
|
{
|
|
va_list args;
|
|
va_start(args, format);
|
|
DBUG_PRINT("error", ("%s", format));
|
|
if (!trace_file)
|
|
trace_file= stderr;
|
|
|
|
if (procent_printed)
|
|
{
|
|
/* In silent mode, print on another line than the 0% 10% 20% line */
|
|
procent_printed= 0;
|
|
fputc('\n', trace_file);
|
|
}
|
|
vfprintf(trace_file , format, args);
|
|
fputc('\n', trace_file);
|
|
if (trace_file != stderr)
|
|
{
|
|
va_start(args, format);
|
|
my_printv_error(HA_ERR_INITIALIZATION, format, MYF(0), args);
|
|
}
|
|
va_end(args);
|
|
fflush(trace_file);
|
|
}
|
|
|
|
|
|
/**
|
|
Tells if the dirty pages list found in checkpoint record allows to ignore a
|
|
REDO for a certain page.
|
|
|
|
@param shortid short id of the table
|
|
@param lsn REDO record's LSN
|
|
@param page page number
|
|
@param index TRUE if index page, FALSE if data page
|
|
*/
|
|
|
|
my_bool _ma_redo_not_needed_for_page(uint16 shortid, LSN lsn,
|
|
pgcache_page_no_t page,
|
|
my_bool index)
|
|
{
|
|
if (cmp_translog_addr(lsn, checkpoint_start) < 0)
|
|
{
|
|
/*
|
|
64-bit key is formed like this:
|
|
Most significant byte: 0 if data page, 1 if index page
|
|
Next 2 bytes: table's short id
|
|
Next 5 bytes: page number
|
|
*/
|
|
uint64 file_and_page_id=
|
|
(((uint64)((index << 16) | shortid)) << 40) | page;
|
|
struct st_dirty_page *dirty_page= (struct st_dirty_page *)
|
|
hash_search(&all_dirty_pages,
|
|
(uchar *)&file_and_page_id, sizeof(file_and_page_id));
|
|
DBUG_PRINT("info", ("in dirty pages list: %d", dirty_page != NULL));
|
|
if ((dirty_page == NULL) ||
|
|
cmp_translog_addr(lsn, dirty_page->rec_lsn) < 0)
|
|
{
|
|
tprint(tracef, ", ignoring because of dirty_pages list\n");
|
|
return TRUE;
|
|
}
|
|
}
|
|
return FALSE;
|
|
}
|