mirror of
https://github.com/MariaDB/server.git
synced 2025-01-27 09:14:17 +01:00
185591c1c0
The bug was in the test case. The problem was that maria_empty_logs.inc deleted aria log files before the server was properly shutdown. Fixed by waiting for pid file to disappear before starting to delete log files. Other things: - Fixed that translog_purge_at_flush() will not stop deleting files even if one file could not be deleted.
9351 lines
307 KiB
C
9351 lines
307 KiB
C
/* Copyright (C) 2007 MySQL AB & Sanja Belkin. 2010 Monty Program Ab.
|
|
Copyright (c) 2020, MariaDB Corporation.
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation; version 2 of the License.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; if not, write to the Free Software
|
|
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */
|
|
|
|
#include "maria_def.h"
|
|
#include "trnman.h"
|
|
#include "ma_blockrec.h" /* for some constants and in-write hooks */
|
|
#include "ma_key_recover.h" /* For some in-write hooks */
|
|
#include "ma_checkpoint.h"
|
|
#include "ma_servicethread.h"
|
|
#include "ma_recovery.h"
|
|
#include "ma_loghandler_lsn.h"
|
|
#include "ma_recovery_util.h"
|
|
|
|
/*
|
|
On Windows, neither my_open() nor mysql_file_sync() work for directories.
|
|
Also there is no need to flush filesystem changes ,i.e to sync()
|
|
directories.
|
|
*/
|
|
#ifdef __WIN__
|
|
#define sync_dir(A,B) 0
|
|
#else
|
|
#define sync_dir(A,B) mysql_file_sync(A,B)
|
|
#endif
|
|
|
|
/**
|
|
@file
|
|
@brief Module which writes and reads to a transaction log
|
|
*/
|
|
|
|
/* 0xFF can never be valid first byte of a chunk */
|
|
#define TRANSLOG_FILLER 0xFF
|
|
|
|
/* number of opened log files in the pagecache (should be at least 2) */
|
|
#define OPENED_FILES_NUM 3
|
|
#define CACHED_FILES_NUM 5
|
|
#define CACHED_FILES_NUM_DIRECT_SEARCH_LIMIT 7
|
|
#if CACHED_FILES_NUM > CACHED_FILES_NUM_DIRECT_SEARCH_LIMIT
|
|
#include <hash.h>
|
|
#include <m_ctype.h>
|
|
#endif
|
|
|
|
/** @brief protects checkpoint_in_progress */
|
|
static mysql_mutex_t LOCK_soft_sync;
|
|
/** @brief for killing the background checkpoint thread */
|
|
static mysql_cond_t COND_soft_sync;
|
|
/** @brief control structure for checkpoint background thread */
|
|
static MA_SERVICE_THREAD_CONTROL soft_sync_control=
|
|
{0, FALSE, FALSE, &LOCK_soft_sync, &COND_soft_sync};
|
|
|
|
uint log_purge_disabled= 0;
|
|
|
|
|
|
/* transaction log file descriptor */
|
|
typedef struct st_translog_file
|
|
{
|
|
uint32 number;
|
|
PAGECACHE_FILE handler;
|
|
my_bool was_recovered;
|
|
my_bool is_sync;
|
|
} TRANSLOG_FILE;
|
|
|
|
/* records buffer size (should be TRANSLOG_PAGE_SIZE * n) */
|
|
#define TRANSLOG_WRITE_BUFFER (1024*1024)
|
|
/*
|
|
pagecache_read/write/inject() use bmove512() on their buffers so those must
|
|
be long-aligned, which we guarantee by using the type below:
|
|
*/
|
|
typedef union
|
|
{
|
|
ulonglong dummy;
|
|
uchar buffer[TRANSLOG_PAGE_SIZE];
|
|
} TRANSLOG_PAGE_SIZE_BUFF;
|
|
|
|
#define MAX_TRUNSLOG_USED_BUFFERS 3
|
|
|
|
typedef struct
|
|
{
|
|
struct st_translog_buffer *buff[MAX_TRUNSLOG_USED_BUFFERS];
|
|
uint8 wrt_ptr;
|
|
uint8 unlck_ptr;
|
|
} TRUNSLOG_USED_BUFFERS;
|
|
|
|
static void
|
|
used_buffs_init(TRUNSLOG_USED_BUFFERS *buffs)
|
|
{
|
|
buffs->unlck_ptr= buffs->wrt_ptr= 0;
|
|
}
|
|
|
|
static void
|
|
used_buffs_add(TRUNSLOG_USED_BUFFERS *buffs,
|
|
struct st_translog_buffer *buff);
|
|
|
|
static void
|
|
used_buffs_register_unlock(TRUNSLOG_USED_BUFFERS *buffs,
|
|
struct st_translog_buffer *buff);
|
|
|
|
static void
|
|
used_buffs_urgent_unlock(TRUNSLOG_USED_BUFFERS *buffs);
|
|
|
|
/* min chunk length */
|
|
#define TRANSLOG_MIN_CHUNK 3
|
|
/*
|
|
Number of buffers used by loghandler
|
|
|
|
Should be at least 4, because one thread can block up to 2 buffers in
|
|
normal circumstances (less then half of one and full other, or just
|
|
switched one and other), But if we met end of the file in the middle and
|
|
have to switch buffer it will be 3. + 1 buffer for flushing/writing.
|
|
We have a bigger number here for higher concurrency and to make division
|
|
faster.
|
|
|
|
The number should be power of 2 to be fast.
|
|
*/
|
|
#define TRANSLOG_BUFFERS_NO 8
|
|
/* number of bytes (+ header) which can be unused on first page in sequence */
|
|
#define TRANSLOG_MINCHUNK_CONTENT 1
|
|
/* version of log file */
|
|
#define TRANSLOG_VERSION_ID 10000 /* 1.00.00 */
|
|
|
|
#define TRANSLOG_PAGE_FLAGS 6 /* transaction log page flags offset */
|
|
|
|
/* Maximum length of compressed LSNs (the worst case of whole LSN storing) */
|
|
#define COMPRESSED_LSN_MAX_STORE_SIZE (2 + LSN_STORE_SIZE)
|
|
#define MAX_NUMBER_OF_LSNS_PER_RECORD 2
|
|
|
|
|
|
/* max lsn calculation for buffer */
|
|
#define BUFFER_MAX_LSN(B) \
|
|
((B)->last_lsn == LSN_IMPOSSIBLE ? (B)->prev_last_lsn : (B)->last_lsn)
|
|
|
|
/* log write buffer descriptor */
|
|
struct st_translog_buffer
|
|
{
|
|
/*
|
|
Cache for current log. Comes first to be aligned for bmove512() in
|
|
pagecache_inject()
|
|
*/
|
|
uchar buffer[TRANSLOG_WRITE_BUFFER];
|
|
/*
|
|
Maximum LSN of records which ends in this buffer (or IMPOSSIBLE_LSN
|
|
if no LSNs ends here)
|
|
*/
|
|
LSN last_lsn;
|
|
/* last_lsn of previous buffer or IMPOSSIBLE_LSN if it is very first one */
|
|
LSN prev_last_lsn;
|
|
/* This buffer offset in the file */
|
|
TRANSLOG_ADDRESS offset;
|
|
/*
|
|
Next buffer offset in the file (it is not always offset + size,
|
|
in case of flush by LSN it can be offset + size - TRANSLOG_PAGE_SIZE)
|
|
*/
|
|
TRANSLOG_ADDRESS next_buffer_offset;
|
|
/* Previous buffer offset to detect it flush finish */
|
|
TRANSLOG_ADDRESS prev_buffer_offset;
|
|
/*
|
|
If the buffer was forced to close it save value of its horizon
|
|
otherwise LSN_IMPOSSIBLE
|
|
*/
|
|
TRANSLOG_ADDRESS pre_force_close_horizon;
|
|
/*
|
|
How much is written (or will be written when copy_to_buffer_in_progress
|
|
become 0) to this buffer
|
|
*/
|
|
translog_size_t size;
|
|
/*
|
|
When moving from one log buffer to another, we write the last of the
|
|
previous buffer to file and then move to start using the new log
|
|
buffer. In the case of a part filed last page, this page is not moved
|
|
to the start of the new buffer but instead we set the 'skip_data'
|
|
variable to tell us how much data at the beginning of the buffer is not
|
|
relevant.
|
|
*/
|
|
uint skipped_data;
|
|
/* File handler for this buffer */
|
|
TRANSLOG_FILE *file;
|
|
/* Threads which are waiting for buffer filling/freeing */
|
|
mysql_cond_t waiting_filling_buffer;
|
|
/*
|
|
Number of records which are in copy progress.
|
|
|
|
Controlled via translog_buffer_increase_writers() and
|
|
translog_buffer_decrease_writers().
|
|
|
|
1 Simple case: translog_force_current_buffer_to_finish both called in
|
|
the same procedure.
|
|
|
|
2 Simple case: translog_write_variable_record_1group:
|
|
translog_advance_pointer() increase writer of the buffer and
|
|
translog_buffer_decrease_writers() decrease it.
|
|
|
|
Usual case:
|
|
1) translog_advance_pointer (i.e. reserve place for future writing)
|
|
increase writers for all buffers where place reserved.
|
|
Simpliest case: just all space reserved in one buffer
|
|
complex case: end of the first buffer, all second buffer, beginning
|
|
of the third buffer.
|
|
2) When we finish with writing translog_chaser_page_next() will be
|
|
called and unlock the buffer by decreasing number of writers.
|
|
*/
|
|
uint copy_to_buffer_in_progress;
|
|
/* list of waiting buffer ready threads */
|
|
struct st_my_thread_var *waiting_flush;
|
|
/*
|
|
If true then previous buffer overlap with this one (due to flush of
|
|
loghandler, the last page of that buffer is the same as the first page
|
|
of this buffer) and have to be written first (because contain old
|
|
content of page which present in both buffers)
|
|
*/
|
|
my_bool overlay;
|
|
uint buffer_no;
|
|
/*
|
|
Lock for the buffer.
|
|
|
|
Current buffer also lock the whole handler (if one want lock the handler
|
|
one should lock the current buffer).
|
|
|
|
Buffers are locked only in one direction (with overflow and beginning
|
|
from the first buffer). If we keep lock on buffer N we can lock only
|
|
buffer N+1 (never N-1).
|
|
|
|
One thread do not lock more then 2 buffer in a time, so to make dead
|
|
lock it should be N thread (where N equal number of buffers) takes one
|
|
buffer and try to lock next. But it is impossible because there is only
|
|
2 cases when thread take 2 buffers: 1) one thread finishes current
|
|
buffer (where horizon is) and start next (to which horizon moves). 2)
|
|
flush start from buffer after current (oldest) and go till the current
|
|
crabbing by buffer sequence. And there is only one flush in a moment
|
|
(they are serialised).
|
|
|
|
Because of above and number of buffers equal 5 we can't get dead lock (it is
|
|
impossible to get all 5 buffers locked simultaneously).
|
|
*/
|
|
mysql_mutex_t mutex;
|
|
/*
|
|
Some thread is going to close the buffer and it should be
|
|
done only by that thread
|
|
*/
|
|
my_bool is_closing_buffer;
|
|
/*
|
|
Version of the buffer increases every time buffer the buffer flushed.
|
|
With file and offset it allow detect buffer changes
|
|
*/
|
|
uint8 ver;
|
|
|
|
/*
|
|
When previous buffer sent to disk it set its address here to allow
|
|
to detect when it is done
|
|
(we have to keep it in this buffer to lock buffers only in one direction).
|
|
*/
|
|
TRANSLOG_ADDRESS prev_sent_to_disk;
|
|
mysql_cond_t prev_sent_to_disk_cond;
|
|
};
|
|
|
|
|
|
struct st_buffer_cursor
|
|
{
|
|
TRUNSLOG_USED_BUFFERS buffs;
|
|
/* pointer into the buffer */
|
|
uchar *ptr;
|
|
/* current buffer */
|
|
struct st_translog_buffer *buffer;
|
|
/* How many bytes we wrote on the current page */
|
|
uint16 current_page_fill;
|
|
/*
|
|
How many times we write the page on the disk during flushing process
|
|
(for sector protection).
|
|
*/
|
|
uint16 write_counter;
|
|
/* previous write offset */
|
|
uint16 previous_offset;
|
|
/* Number of current buffer */
|
|
uint8 buffer_no;
|
|
/*
|
|
True if it is just filling buffer after advancing the pointer to
|
|
the horizon.
|
|
*/
|
|
my_bool chaser;
|
|
/*
|
|
Is current page of the cursor already finished (sector protection
|
|
should be applied if it is needed)
|
|
*/
|
|
my_bool protected;
|
|
};
|
|
|
|
|
|
typedef uint8 dirty_buffer_mask_t;
|
|
|
|
struct st_translog_descriptor
|
|
{
|
|
/* *** Parameters of the log handler *** */
|
|
|
|
/* Page cache for the log reads */
|
|
PAGECACHE *pagecache;
|
|
uint flags;
|
|
/* File open flags */
|
|
uint open_flags;
|
|
/* max size of one log size (for new logs creation) */
|
|
uint32 log_file_max_size;
|
|
uint32 server_version;
|
|
/* server ID (used for replication) */
|
|
uint32 server_id;
|
|
/* Loghandler's buffer capacity in case of chunk 2 filling */
|
|
uint32 buffer_capacity_chunk_2;
|
|
/*
|
|
Half of the buffer capacity in case of chunk 2 filling,
|
|
used to decide will we write a record in one group or many.
|
|
It is written to the variable just to avoid devision every
|
|
time we need it.
|
|
*/
|
|
uint32 half_buffer_capacity_chunk_2;
|
|
/* Page overhead calculated by flags (whether CRC is enabled, etc) */
|
|
uint16 page_overhead;
|
|
/*
|
|
Page capacity ("useful load") calculated by flags
|
|
(TRANSLOG_PAGE_SIZE - page_overhead-1)
|
|
*/
|
|
uint16 page_capacity_chunk_2;
|
|
/* Path to the directory where we store log store files */
|
|
char directory[FN_REFLEN];
|
|
|
|
/* *** Current state of the log handler *** */
|
|
/* list of opened files */
|
|
DYNAMIC_ARRAY open_files;
|
|
/* min/max number of file in the array */
|
|
uint32 max_file, min_file;
|
|
/* the opened files list guard */
|
|
mysql_rwlock_t open_files_lock;
|
|
|
|
/*
|
|
File descriptor of the directory where we store log files for syncing
|
|
it.
|
|
*/
|
|
File directory_fd;
|
|
/* buffers for log writing */
|
|
struct st_translog_buffer buffers[TRANSLOG_BUFFERS_NO];
|
|
/* Mask where 1 in position N mean that buffer N is not flushed */
|
|
dirty_buffer_mask_t dirty_buffer_mask;
|
|
/* The above variable protection */
|
|
mysql_mutex_t dirty_buffer_mask_lock;
|
|
/*
|
|
horizon - visible end of the log (here is absolute end of the log:
|
|
position where next chunk can start
|
|
*/
|
|
TRANSLOG_ADDRESS horizon;
|
|
/* horizon buffer cursor */
|
|
struct st_buffer_cursor bc;
|
|
/* maximum LSN of the current (not finished) file */
|
|
LSN max_lsn;
|
|
|
|
/*
|
|
Last flushed LSN (protected by log_flush_lock).
|
|
Pointers in the log ordered like this:
|
|
last_lsn_checked <= flushed <= sent_to_disk <= in_buffers_only <=
|
|
max_lsn <= horizon
|
|
*/
|
|
LSN flushed;
|
|
/* Last LSN sent to the disk (but maybe not written yet) */
|
|
LSN sent_to_disk;
|
|
/* Horizon from which log started after initialization */
|
|
TRANSLOG_ADDRESS log_start;
|
|
TRANSLOG_ADDRESS previous_flush_horizon;
|
|
/* All what is after this address is not sent to disk yet */
|
|
TRANSLOG_ADDRESS in_buffers_only;
|
|
/* protection of sent_to_disk and in_buffers_only */
|
|
mysql_mutex_t sent_to_disk_lock;
|
|
/*
|
|
Protect flushed (see above) and for flush serialization (will
|
|
be removed in v1.5
|
|
*/
|
|
mysql_mutex_t log_flush_lock;
|
|
mysql_cond_t log_flush_cond;
|
|
mysql_cond_t new_goal_cond;
|
|
|
|
/* Protects changing of headers of finished files (max_lsn) */
|
|
mysql_mutex_t file_header_lock;
|
|
|
|
/*
|
|
Sorted array (with protection) of files where we started writing process
|
|
and so we can't give last LSN yet
|
|
*/
|
|
mysql_mutex_t unfinished_files_lock;
|
|
DYNAMIC_ARRAY unfinished_files;
|
|
|
|
/*
|
|
minimum number of still need file calculeted during last
|
|
translog_purge call
|
|
*/
|
|
uint32 min_need_file;
|
|
/* Purger data: minimum file in the log (or 0 if unknown) */
|
|
uint32 min_file_number;
|
|
/* Protect purger from many calls and it's data */
|
|
mysql_mutex_t purger_lock;
|
|
/* last low water mark checked */
|
|
LSN last_lsn_checked;
|
|
/**
|
|
Must be set to 0 under loghandler lock every time a new LSN
|
|
is generated.
|
|
*/
|
|
my_bool is_everything_flushed;
|
|
/* True when flush pass is in progress */
|
|
my_bool flush_in_progress;
|
|
/* The flush number (used to distinguish two flushes goes one by one) */
|
|
volatile int flush_no;
|
|
/* Next flush pass variables */
|
|
TRANSLOG_ADDRESS next_pass_max_lsn;
|
|
pthread_t max_lsn_requester;
|
|
};
|
|
|
|
static struct st_translog_descriptor log_descriptor;
|
|
|
|
ulong log_purge_type= TRANSLOG_PURGE_IMMIDIATE;
|
|
ulong log_file_size= TRANSLOG_FILE_SIZE;
|
|
/* sync() of log files directory mode */
|
|
ulong sync_log_dir= TRANSLOG_SYNC_DIR_NEWFILE;
|
|
ulong maria_group_commit= TRANSLOG_GCOMMIT_NONE;
|
|
ulong maria_group_commit_interval= 0;
|
|
|
|
/* Marker for end of log */
|
|
static uchar end_of_log= 0;
|
|
#define END_OF_LOG &end_of_log
|
|
/**
|
|
Switch for "soft" sync (no real sync() but periodical sync by service
|
|
thread)
|
|
*/
|
|
static volatile my_bool soft_sync= FALSE;
|
|
/**
|
|
Switch for "hard" group commit mode
|
|
*/
|
|
static volatile my_bool hard_group_commit= FALSE;
|
|
/**
|
|
File numbers interval which have to be sync()
|
|
*/
|
|
static uint32 soft_sync_min= 0;
|
|
static uint32 soft_sync_max= 0;
|
|
static uint32 soft_need_sync= 1;
|
|
/**
|
|
stores interval in microseconds
|
|
*/
|
|
static uint32 group_commit_wait= 0;
|
|
|
|
enum enum_translog_status translog_status= TRANSLOG_UNINITED;
|
|
ulonglong translog_syncs= 0; /* Number of sync()s */
|
|
|
|
/* time of last flush */
|
|
static ulonglong flush_start= 0;
|
|
|
|
/* chunk types */
|
|
#define TRANSLOG_CHUNK_LSN 0x00 /* 0 chunk refer as LSN (head or tail */
|
|
#define TRANSLOG_CHUNK_FIXED (1 << 6) /* 1 (pseudo)fixed record (also LSN) */
|
|
#define TRANSLOG_CHUNK_NOHDR (2 << 6) /* 2 no head chunk (till page end) */
|
|
#define TRANSLOG_CHUNK_LNGTH (3 << 6) /* 3 chunk with chunk length */
|
|
#define TRANSLOG_CHUNK_TYPE (3 << 6) /* Mask to get chunk type */
|
|
#define TRANSLOG_REC_TYPE 0x3F /* Mask to get record type */
|
|
#define TRANSLOG_CHUNK_0_CONT 0x3F /* the type to mark chunk 0 continue */
|
|
|
|
/* compressed (relative) LSN constants */
|
|
#define TRANSLOG_CLSN_LEN_BITS 0xC0 /* Mask to get compressed LSN length */
|
|
|
|
|
|
/* an array that maps id of a MARIA_SHARE to this MARIA_SHARE */
|
|
static MARIA_SHARE **id_to_share= NULL;
|
|
|
|
static my_bool translog_page_validator(int res, PAGECACHE_IO_HOOK_ARGS *args);
|
|
|
|
static my_bool translog_get_next_chunk(TRANSLOG_SCANNER_DATA *scanner);
|
|
static uint32 translog_first_file(TRANSLOG_ADDRESS horizon, int is_protected);
|
|
LSN translog_next_LSN(TRANSLOG_ADDRESS addr, TRANSLOG_ADDRESS horizon);
|
|
|
|
|
|
/*
|
|
Initialize log_record_type_descriptors
|
|
*/
|
|
|
|
LOG_DESC log_record_type_descriptor[LOGREC_NUMBER_OF_TYPES];
|
|
|
|
|
|
#ifndef DBUG_OFF
|
|
|
|
#define translog_buffer_lock_assert_owner(B) \
|
|
mysql_mutex_assert_owner(&(B)->mutex)
|
|
#define translog_lock_assert_owner() \
|
|
mysql_mutex_assert_owner(&log_descriptor.bc.buffer->mutex)
|
|
void translog_lock_handler_assert_owner()
|
|
{
|
|
translog_lock_assert_owner();
|
|
}
|
|
|
|
/**
|
|
@brief check the description table validity
|
|
|
|
@param num how many records should be filled
|
|
*/
|
|
|
|
static uint max_allowed_translog_type= 0;
|
|
|
|
void check_translog_description_table(int num)
|
|
{
|
|
int i;
|
|
DBUG_ENTER("check_translog_description_table");
|
|
DBUG_PRINT("enter", ("last record: %d", num));
|
|
DBUG_ASSERT(num > 0);
|
|
/* last is reserved for extending the table */
|
|
DBUG_ASSERT(num < LOGREC_NUMBER_OF_TYPES - 1);
|
|
DBUG_ASSERT(log_record_type_descriptor[0].rclass == LOGRECTYPE_NOT_ALLOWED);
|
|
max_allowed_translog_type= num;
|
|
|
|
for (i= 0; i <= num; i++)
|
|
{
|
|
DBUG_PRINT("info",
|
|
("record type: %d class: %d fixed: %u header: %u LSNs: %u "
|
|
"name: %s",
|
|
i, log_record_type_descriptor[i].rclass,
|
|
(uint)log_record_type_descriptor[i].fixed_length,
|
|
(uint)log_record_type_descriptor[i].read_header_len,
|
|
(uint)log_record_type_descriptor[i].compressed_LSN,
|
|
log_record_type_descriptor[i].name));
|
|
switch (log_record_type_descriptor[i].rclass) {
|
|
case LOGRECTYPE_NOT_ALLOWED:
|
|
DBUG_ASSERT(i == 0);
|
|
break;
|
|
case LOGRECTYPE_VARIABLE_LENGTH:
|
|
DBUG_ASSERT(log_record_type_descriptor[i].fixed_length == 0);
|
|
DBUG_ASSERT((log_record_type_descriptor[i].compressed_LSN == 0) ||
|
|
((log_record_type_descriptor[i].compressed_LSN == 1) &&
|
|
(log_record_type_descriptor[i].read_header_len >=
|
|
LSN_STORE_SIZE)) ||
|
|
((log_record_type_descriptor[i].compressed_LSN == 2) &&
|
|
(log_record_type_descriptor[i].read_header_len >=
|
|
LSN_STORE_SIZE * 2)));
|
|
break;
|
|
case LOGRECTYPE_PSEUDOFIXEDLENGTH:
|
|
DBUG_ASSERT(log_record_type_descriptor[i].fixed_length ==
|
|
log_record_type_descriptor[i].read_header_len);
|
|
DBUG_ASSERT(log_record_type_descriptor[i].compressed_LSN > 0);
|
|
DBUG_ASSERT(log_record_type_descriptor[i].compressed_LSN <= 2);
|
|
break;
|
|
case LOGRECTYPE_FIXEDLENGTH:
|
|
DBUG_ASSERT(log_record_type_descriptor[i].fixed_length ==
|
|
log_record_type_descriptor[i].read_header_len);
|
|
DBUG_ASSERT(log_record_type_descriptor[i].compressed_LSN == 0);
|
|
break;
|
|
default:
|
|
DBUG_ASSERT(0);
|
|
}
|
|
}
|
|
for (i= num + 1; i < LOGREC_NUMBER_OF_TYPES; i++)
|
|
{
|
|
DBUG_ASSERT(log_record_type_descriptor[i].rclass ==
|
|
LOGRECTYPE_NOT_ALLOWED);
|
|
}
|
|
DBUG_VOID_RETURN;
|
|
}
|
|
#else
|
|
#define translog_buffer_lock_assert_owner(B) {}
|
|
#define translog_lock_assert_owner() {}
|
|
#endif
|
|
|
|
static LOG_DESC INIT_LOGREC_RESERVED_FOR_CHUNKS23=
|
|
{LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0,
|
|
"reserved", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL };
|
|
|
|
static LOG_DESC INIT_LOGREC_REDO_INSERT_ROW_HEAD=
|
|
{LOGRECTYPE_VARIABLE_LENGTH, 0,
|
|
FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE, NULL,
|
|
write_hook_for_redo, NULL, 0,
|
|
"redo_insert_row_head", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
|
|
|
|
static LOG_DESC INIT_LOGREC_REDO_INSERT_ROW_TAIL=
|
|
{LOGRECTYPE_VARIABLE_LENGTH, 0,
|
|
FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE, NULL,
|
|
write_hook_for_redo, NULL, 0,
|
|
"redo_insert_row_tail", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
|
|
|
|
static LOG_DESC INIT_LOGREC_REDO_NEW_ROW_HEAD=
|
|
{LOGRECTYPE_VARIABLE_LENGTH, 0,
|
|
FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE, NULL,
|
|
write_hook_for_redo, NULL, 0,
|
|
"redo_new_row_head", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
|
|
|
|
static LOG_DESC INIT_LOGREC_REDO_NEW_ROW_TAIL=
|
|
{LOGRECTYPE_VARIABLE_LENGTH, 0,
|
|
FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE, NULL,
|
|
write_hook_for_redo, NULL, 0,
|
|
"redo_new_row_tail", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
|
|
|
|
static LOG_DESC INIT_LOGREC_REDO_INSERT_ROW_BLOBS=
|
|
{LOGRECTYPE_VARIABLE_LENGTH, 0, FILEID_STORE_SIZE, NULL,
|
|
write_hook_for_redo, NULL, 0,
|
|
"redo_insert_row_blobs", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
|
|
|
|
static LOG_DESC INIT_LOGREC_REDO_PURGE_ROW_HEAD=
|
|
{LOGRECTYPE_FIXEDLENGTH,
|
|
FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE,
|
|
FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE,
|
|
NULL, write_hook_for_redo, NULL, 0,
|
|
"redo_purge_row_head", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
|
|
|
|
static LOG_DESC INIT_LOGREC_REDO_PURGE_ROW_TAIL=
|
|
{LOGRECTYPE_FIXEDLENGTH,
|
|
FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE,
|
|
FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE,
|
|
NULL, write_hook_for_redo, NULL, 0,
|
|
"redo_purge_row_tail", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
|
|
|
|
static LOG_DESC INIT_LOGREC_REDO_FREE_BLOCKS=
|
|
{LOGRECTYPE_VARIABLE_LENGTH, 0,
|
|
FILEID_STORE_SIZE + PAGERANGE_STORE_SIZE,
|
|
NULL, write_hook_for_redo, NULL, 0,
|
|
"redo_free_blocks", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
|
|
|
|
static LOG_DESC INIT_LOGREC_REDO_FREE_HEAD_OR_TAIL=
|
|
{LOGRECTYPE_FIXEDLENGTH,
|
|
FILEID_STORE_SIZE + PAGE_STORE_SIZE,
|
|
FILEID_STORE_SIZE + PAGE_STORE_SIZE,
|
|
NULL, write_hook_for_redo, NULL, 0,
|
|
"redo_free_head_or_tail", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
|
|
|
|
/* not yet used; for when we have versioning */
|
|
static LOG_DESC INIT_LOGREC_REDO_DELETE_ROW=
|
|
{LOGRECTYPE_FIXEDLENGTH, 16, 16, NULL, write_hook_for_redo, NULL, 0,
|
|
"redo_delete_row", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
|
|
|
|
/** @todo RECOVERY BUG unused, remove? */
|
|
static LOG_DESC INIT_LOGREC_REDO_UPDATE_ROW_HEAD=
|
|
{LOGRECTYPE_VARIABLE_LENGTH, 0, 9, NULL, write_hook_for_redo, NULL, 0,
|
|
"redo_update_row_head", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
|
|
|
|
static LOG_DESC INIT_LOGREC_REDO_INDEX=
|
|
{LOGRECTYPE_VARIABLE_LENGTH, 0, 9, NULL, write_hook_for_redo, NULL, 0,
|
|
"redo_index", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
|
|
|
|
static LOG_DESC INIT_LOGREC_REDO_INDEX_NEW_PAGE=
|
|
{LOGRECTYPE_VARIABLE_LENGTH, 0,
|
|
FILEID_STORE_SIZE + PAGE_STORE_SIZE * 2 + KEY_NR_STORE_SIZE + 1,
|
|
NULL, write_hook_for_redo, NULL, 0,
|
|
"redo_index_new_page", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
|
|
|
|
static LOG_DESC INIT_LOGREC_REDO_INDEX_FREE_PAGE=
|
|
{LOGRECTYPE_FIXEDLENGTH, FILEID_STORE_SIZE + PAGE_STORE_SIZE * 2,
|
|
FILEID_STORE_SIZE + PAGE_STORE_SIZE * 2,
|
|
NULL, write_hook_for_redo, NULL, 0,
|
|
"redo_index_free_page", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
|
|
|
|
static LOG_DESC INIT_LOGREC_REDO_UNDELETE_ROW=
|
|
{LOGRECTYPE_FIXEDLENGTH, 16, 16, NULL, write_hook_for_redo, NULL, 0,
|
|
"redo_undelete_row", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
|
|
|
|
static LOG_DESC INIT_LOGREC_CLR_END=
|
|
{LOGRECTYPE_VARIABLE_LENGTH, 0, LSN_STORE_SIZE + FILEID_STORE_SIZE +
|
|
CLR_TYPE_STORE_SIZE, NULL, write_hook_for_clr_end, NULL, 1,
|
|
"clr_end", LOGREC_LAST_IN_GROUP, NULL, NULL};
|
|
|
|
static LOG_DESC INIT_LOGREC_PURGE_END=
|
|
{LOGRECTYPE_PSEUDOFIXEDLENGTH, 5, 5, NULL, NULL, NULL, 1,
|
|
"purge_end", LOGREC_LAST_IN_GROUP, NULL, NULL};
|
|
|
|
static LOG_DESC INIT_LOGREC_UNDO_ROW_INSERT=
|
|
{LOGRECTYPE_VARIABLE_LENGTH, 0,
|
|
LSN_STORE_SIZE + FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE,
|
|
NULL, write_hook_for_undo_row_insert, NULL, 1,
|
|
"undo_row_insert", LOGREC_LAST_IN_GROUP, NULL, NULL};
|
|
|
|
static LOG_DESC INIT_LOGREC_UNDO_ROW_DELETE=
|
|
{LOGRECTYPE_VARIABLE_LENGTH, 0,
|
|
LSN_STORE_SIZE + FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE,
|
|
NULL, write_hook_for_undo_row_delete, NULL, 1,
|
|
"undo_row_delete", LOGREC_LAST_IN_GROUP, NULL, NULL};
|
|
|
|
static LOG_DESC INIT_LOGREC_UNDO_ROW_UPDATE=
|
|
{LOGRECTYPE_VARIABLE_LENGTH, 0,
|
|
LSN_STORE_SIZE + FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE,
|
|
NULL, write_hook_for_undo_row_update, NULL, 1,
|
|
"undo_row_update", LOGREC_LAST_IN_GROUP, NULL, NULL};
|
|
|
|
static LOG_DESC INIT_LOGREC_UNDO_KEY_INSERT=
|
|
{LOGRECTYPE_VARIABLE_LENGTH, 0,
|
|
LSN_STORE_SIZE + FILEID_STORE_SIZE + KEY_NR_STORE_SIZE,
|
|
NULL, write_hook_for_undo_key_insert, NULL, 1,
|
|
"undo_key_insert", LOGREC_LAST_IN_GROUP, NULL, NULL};
|
|
|
|
/* This will never be in the log, only in the clr */
|
|
static LOG_DESC INIT_LOGREC_UNDO_KEY_INSERT_WITH_ROOT=
|
|
{LOGRECTYPE_VARIABLE_LENGTH, 0,
|
|
LSN_STORE_SIZE + FILEID_STORE_SIZE + KEY_NR_STORE_SIZE + PAGE_STORE_SIZE,
|
|
NULL, write_hook_for_undo_key, NULL, 1,
|
|
"undo_key_insert_with_root", LOGREC_LAST_IN_GROUP, NULL, NULL};
|
|
|
|
static LOG_DESC INIT_LOGREC_UNDO_KEY_DELETE=
|
|
{LOGRECTYPE_VARIABLE_LENGTH, 0,
|
|
LSN_STORE_SIZE + FILEID_STORE_SIZE + KEY_NR_STORE_SIZE,
|
|
NULL, write_hook_for_undo_key_delete, NULL, 1,
|
|
"undo_key_delete", LOGREC_LAST_IN_GROUP, NULL, NULL};
|
|
|
|
static LOG_DESC INIT_LOGREC_UNDO_KEY_DELETE_WITH_ROOT=
|
|
{LOGRECTYPE_VARIABLE_LENGTH, 0,
|
|
LSN_STORE_SIZE + FILEID_STORE_SIZE + KEY_NR_STORE_SIZE + PAGE_STORE_SIZE,
|
|
NULL, write_hook_for_undo_key_delete, NULL, 1,
|
|
"undo_key_delete_with_root", LOGREC_LAST_IN_GROUP, NULL, NULL};
|
|
|
|
static LOG_DESC INIT_LOGREC_PREPARE=
|
|
{LOGRECTYPE_VARIABLE_LENGTH, 0, 0, NULL, NULL, NULL, 0,
|
|
"prepare", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
|
|
|
|
static LOG_DESC INIT_LOGREC_PREPARE_WITH_UNDO_PURGE=
|
|
{LOGRECTYPE_VARIABLE_LENGTH, 0, LSN_STORE_SIZE, NULL, NULL, NULL, 1,
|
|
"prepare_with_undo_purge", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
|
|
|
|
static LOG_DESC INIT_LOGREC_COMMIT=
|
|
{LOGRECTYPE_FIXEDLENGTH, 0, 0, NULL,
|
|
write_hook_for_commit, NULL, 0, "commit", LOGREC_IS_GROUP_ITSELF, NULL,
|
|
NULL};
|
|
|
|
static LOG_DESC INIT_LOGREC_COMMIT_WITH_UNDO_PURGE=
|
|
{LOGRECTYPE_PSEUDOFIXEDLENGTH, 5, 5, NULL, write_hook_for_commit, NULL, 1,
|
|
"commit_with_undo_purge", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
|
|
|
|
static LOG_DESC INIT_LOGREC_CHECKPOINT=
|
|
{LOGRECTYPE_VARIABLE_LENGTH, 0, 0, NULL, NULL, NULL, 0,
|
|
"checkpoint", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
|
|
|
|
static LOG_DESC INIT_LOGREC_REDO_CREATE_TABLE=
|
|
{LOGRECTYPE_VARIABLE_LENGTH, 0, 1 + 2, NULL, NULL, NULL, 0,
|
|
"redo_create_table", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
|
|
|
|
static LOG_DESC INIT_LOGREC_REDO_RENAME_TABLE=
|
|
{LOGRECTYPE_VARIABLE_LENGTH, 0, 0, NULL, NULL, NULL, 0,
|
|
"redo_rename_table", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
|
|
|
|
static LOG_DESC INIT_LOGREC_REDO_DROP_TABLE=
|
|
{LOGRECTYPE_VARIABLE_LENGTH, 0, 0, NULL, NULL, NULL, 0,
|
|
"redo_drop_table", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
|
|
|
|
static LOG_DESC INIT_LOGREC_REDO_DELETE_ALL=
|
|
{LOGRECTYPE_FIXEDLENGTH, FILEID_STORE_SIZE, FILEID_STORE_SIZE,
|
|
NULL, write_hook_for_redo_delete_all, NULL, 0,
|
|
"redo_delete_all", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
|
|
|
|
static LOG_DESC INIT_LOGREC_REDO_REPAIR_TABLE=
|
|
{LOGRECTYPE_FIXEDLENGTH, FILEID_STORE_SIZE + 8 + 8, FILEID_STORE_SIZE + 8 + 8,
|
|
NULL, NULL, NULL, 0,
|
|
"redo_repair_table", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
|
|
|
|
static LOG_DESC INIT_LOGREC_FILE_ID=
|
|
{LOGRECTYPE_VARIABLE_LENGTH, 0, 2, NULL, write_hook_for_file_id, NULL, 0,
|
|
"file_id", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
|
|
|
|
static LOG_DESC INIT_LOGREC_LONG_TRANSACTION_ID=
|
|
{LOGRECTYPE_FIXEDLENGTH, 6, 6, NULL, NULL, NULL, 0,
|
|
"long_transaction_id", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
|
|
|
|
static LOG_DESC INIT_LOGREC_INCOMPLETE_LOG=
|
|
{LOGRECTYPE_FIXEDLENGTH, FILEID_STORE_SIZE, FILEID_STORE_SIZE,
|
|
NULL, NULL, NULL, 0,
|
|
"incomplete_log", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
|
|
|
|
static LOG_DESC INIT_LOGREC_INCOMPLETE_GROUP=
|
|
{LOGRECTYPE_FIXEDLENGTH, 0, 0,
|
|
NULL, NULL, NULL, 0,
|
|
"incomplete_group", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
|
|
|
|
static LOG_DESC INIT_LOGREC_UNDO_BULK_INSERT=
|
|
{LOGRECTYPE_VARIABLE_LENGTH, 0,
|
|
LSN_STORE_SIZE + FILEID_STORE_SIZE,
|
|
NULL, write_hook_for_undo_bulk_insert, NULL, 1,
|
|
"undo_bulk_insert", LOGREC_LAST_IN_GROUP, NULL, NULL};
|
|
|
|
static LOG_DESC INIT_LOGREC_REDO_BITMAP_NEW_PAGE=
|
|
{LOGRECTYPE_FIXEDLENGTH, FILEID_STORE_SIZE + PAGE_STORE_SIZE * 2,
|
|
FILEID_STORE_SIZE + PAGE_STORE_SIZE * 2,
|
|
NULL, NULL, NULL, 0,
|
|
"redo_create_bitmap", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
|
|
|
|
static LOG_DESC INIT_LOGREC_IMPORTED_TABLE=
|
|
{LOGRECTYPE_VARIABLE_LENGTH, 0, 0, NULL, NULL, NULL, 0,
|
|
"imported_table", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
|
|
|
|
static LOG_DESC INIT_LOGREC_DEBUG_INFO=
|
|
{LOGRECTYPE_VARIABLE_LENGTH, 0, 0, NULL, NULL, NULL, 0,
|
|
"info", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
|
|
|
|
const myf log_write_flags= MY_WME | MY_NABP | MY_WAIT_IF_FULL;
|
|
|
|
void translog_table_init()
|
|
{
|
|
int i;
|
|
log_record_type_descriptor[LOGREC_RESERVED_FOR_CHUNKS23]=
|
|
INIT_LOGREC_RESERVED_FOR_CHUNKS23;
|
|
log_record_type_descriptor[LOGREC_REDO_INSERT_ROW_HEAD]=
|
|
INIT_LOGREC_REDO_INSERT_ROW_HEAD;
|
|
log_record_type_descriptor[LOGREC_REDO_INSERT_ROW_TAIL]=
|
|
INIT_LOGREC_REDO_INSERT_ROW_TAIL;
|
|
log_record_type_descriptor[LOGREC_REDO_NEW_ROW_HEAD]=
|
|
INIT_LOGREC_REDO_NEW_ROW_HEAD;
|
|
log_record_type_descriptor[LOGREC_REDO_NEW_ROW_TAIL]=
|
|
INIT_LOGREC_REDO_NEW_ROW_TAIL;
|
|
log_record_type_descriptor[LOGREC_REDO_INSERT_ROW_BLOBS]=
|
|
INIT_LOGREC_REDO_INSERT_ROW_BLOBS;
|
|
log_record_type_descriptor[LOGREC_REDO_PURGE_ROW_HEAD]=
|
|
INIT_LOGREC_REDO_PURGE_ROW_HEAD;
|
|
log_record_type_descriptor[LOGREC_REDO_PURGE_ROW_TAIL]=
|
|
INIT_LOGREC_REDO_PURGE_ROW_TAIL;
|
|
log_record_type_descriptor[LOGREC_REDO_FREE_BLOCKS]=
|
|
INIT_LOGREC_REDO_FREE_BLOCKS;
|
|
log_record_type_descriptor[LOGREC_REDO_FREE_HEAD_OR_TAIL]=
|
|
INIT_LOGREC_REDO_FREE_HEAD_OR_TAIL;
|
|
log_record_type_descriptor[LOGREC_REDO_DELETE_ROW]=
|
|
INIT_LOGREC_REDO_DELETE_ROW;
|
|
log_record_type_descriptor[LOGREC_REDO_UPDATE_ROW_HEAD]=
|
|
INIT_LOGREC_REDO_UPDATE_ROW_HEAD;
|
|
log_record_type_descriptor[LOGREC_REDO_INDEX]=
|
|
INIT_LOGREC_REDO_INDEX;
|
|
log_record_type_descriptor[LOGREC_REDO_INDEX_NEW_PAGE]=
|
|
INIT_LOGREC_REDO_INDEX_NEW_PAGE;
|
|
log_record_type_descriptor[LOGREC_REDO_INDEX_FREE_PAGE]=
|
|
INIT_LOGREC_REDO_INDEX_FREE_PAGE;
|
|
log_record_type_descriptor[LOGREC_REDO_UNDELETE_ROW]=
|
|
INIT_LOGREC_REDO_UNDELETE_ROW;
|
|
log_record_type_descriptor[LOGREC_CLR_END]=
|
|
INIT_LOGREC_CLR_END;
|
|
log_record_type_descriptor[LOGREC_PURGE_END]=
|
|
INIT_LOGREC_PURGE_END;
|
|
log_record_type_descriptor[LOGREC_UNDO_ROW_INSERT]=
|
|
INIT_LOGREC_UNDO_ROW_INSERT;
|
|
log_record_type_descriptor[LOGREC_UNDO_ROW_DELETE]=
|
|
INIT_LOGREC_UNDO_ROW_DELETE;
|
|
log_record_type_descriptor[LOGREC_UNDO_ROW_UPDATE]=
|
|
INIT_LOGREC_UNDO_ROW_UPDATE;
|
|
log_record_type_descriptor[LOGREC_UNDO_KEY_INSERT]=
|
|
INIT_LOGREC_UNDO_KEY_INSERT;
|
|
log_record_type_descriptor[LOGREC_UNDO_KEY_INSERT_WITH_ROOT]=
|
|
INIT_LOGREC_UNDO_KEY_INSERT_WITH_ROOT;
|
|
log_record_type_descriptor[LOGREC_UNDO_KEY_DELETE]=
|
|
INIT_LOGREC_UNDO_KEY_DELETE;
|
|
log_record_type_descriptor[LOGREC_UNDO_KEY_DELETE_WITH_ROOT]=
|
|
INIT_LOGREC_UNDO_KEY_DELETE_WITH_ROOT;
|
|
log_record_type_descriptor[LOGREC_PREPARE]=
|
|
INIT_LOGREC_PREPARE;
|
|
log_record_type_descriptor[LOGREC_PREPARE_WITH_UNDO_PURGE]=
|
|
INIT_LOGREC_PREPARE_WITH_UNDO_PURGE;
|
|
log_record_type_descriptor[LOGREC_COMMIT]=
|
|
INIT_LOGREC_COMMIT;
|
|
log_record_type_descriptor[LOGREC_COMMIT_WITH_UNDO_PURGE]=
|
|
INIT_LOGREC_COMMIT_WITH_UNDO_PURGE;
|
|
log_record_type_descriptor[LOGREC_CHECKPOINT]=
|
|
INIT_LOGREC_CHECKPOINT;
|
|
log_record_type_descriptor[LOGREC_REDO_CREATE_TABLE]=
|
|
INIT_LOGREC_REDO_CREATE_TABLE;
|
|
log_record_type_descriptor[LOGREC_REDO_RENAME_TABLE]=
|
|
INIT_LOGREC_REDO_RENAME_TABLE;
|
|
log_record_type_descriptor[LOGREC_REDO_DROP_TABLE]=
|
|
INIT_LOGREC_REDO_DROP_TABLE;
|
|
log_record_type_descriptor[LOGREC_REDO_DELETE_ALL]=
|
|
INIT_LOGREC_REDO_DELETE_ALL;
|
|
log_record_type_descriptor[LOGREC_REDO_REPAIR_TABLE]=
|
|
INIT_LOGREC_REDO_REPAIR_TABLE;
|
|
log_record_type_descriptor[LOGREC_FILE_ID]=
|
|
INIT_LOGREC_FILE_ID;
|
|
log_record_type_descriptor[LOGREC_LONG_TRANSACTION_ID]=
|
|
INIT_LOGREC_LONG_TRANSACTION_ID;
|
|
log_record_type_descriptor[LOGREC_INCOMPLETE_LOG]=
|
|
INIT_LOGREC_INCOMPLETE_LOG;
|
|
log_record_type_descriptor[LOGREC_INCOMPLETE_GROUP]=
|
|
INIT_LOGREC_INCOMPLETE_GROUP;
|
|
log_record_type_descriptor[LOGREC_UNDO_BULK_INSERT]=
|
|
INIT_LOGREC_UNDO_BULK_INSERT;
|
|
log_record_type_descriptor[LOGREC_REDO_BITMAP_NEW_PAGE]=
|
|
INIT_LOGREC_REDO_BITMAP_NEW_PAGE;
|
|
log_record_type_descriptor[LOGREC_IMPORTED_TABLE]=
|
|
INIT_LOGREC_IMPORTED_TABLE;
|
|
log_record_type_descriptor[LOGREC_DEBUG_INFO]=
|
|
INIT_LOGREC_DEBUG_INFO;
|
|
|
|
for (i= LOGREC_FIRST_FREE; i < LOGREC_NUMBER_OF_TYPES; i++)
|
|
log_record_type_descriptor[i].rclass= LOGRECTYPE_NOT_ALLOWED;
|
|
#ifndef DBUG_OFF
|
|
check_translog_description_table(LOGREC_FIRST_FREE -1);
|
|
#endif
|
|
}
|
|
|
|
|
|
/* all possible flags page overheads */
|
|
static uint page_overhead[TRANSLOG_FLAGS_NUM];
|
|
|
|
typedef struct st_translog_validator_data
|
|
{
|
|
TRANSLOG_ADDRESS *addr;
|
|
my_bool was_recovered;
|
|
} TRANSLOG_VALIDATOR_DATA;
|
|
|
|
|
|
/*
|
|
Check cursor/buffer consistence
|
|
|
|
SYNOPSIS
|
|
translog_check_cursor
|
|
cursor cursor which will be checked
|
|
*/
|
|
|
|
static void translog_check_cursor(struct st_buffer_cursor *cursor
|
|
__attribute__((unused)))
|
|
{
|
|
DBUG_ASSERT(cursor->chaser ||
|
|
((ulong) (cursor->ptr - cursor->buffer->buffer) ==
|
|
cursor->buffer->size));
|
|
DBUG_ASSERT(cursor->buffer->buffer_no == cursor->buffer_no);
|
|
DBUG_ASSERT((cursor->ptr -cursor->buffer->buffer) %TRANSLOG_PAGE_SIZE ==
|
|
cursor->current_page_fill % TRANSLOG_PAGE_SIZE);
|
|
DBUG_ASSERT(cursor->current_page_fill <= TRANSLOG_PAGE_SIZE);
|
|
}
|
|
|
|
|
|
/**
|
|
@brief switch the loghandler in read only mode in case of write error
|
|
*/
|
|
|
|
void translog_stop_writing()
|
|
{
|
|
DBUG_ENTER("translog_stop_writing");
|
|
DBUG_PRINT("error", ("errno: %d my_errno: %d", errno, my_errno));
|
|
translog_status= (translog_status == TRANSLOG_SHUTDOWN ?
|
|
TRANSLOG_UNINITED :
|
|
TRANSLOG_READONLY);
|
|
log_descriptor.is_everything_flushed= 1;
|
|
log_descriptor.open_flags= O_BINARY | O_RDONLY;
|
|
DBUG_ASSERT(0);
|
|
DBUG_VOID_RETURN;
|
|
}
|
|
|
|
|
|
/*
|
|
@brief Get file name of the log by log number
|
|
|
|
@param file_no Number of the log we want to open
|
|
@param path Pointer to buffer where file name will be
|
|
stored (must be FN_REFLEN bytes at least)
|
|
|
|
@return pointer to path
|
|
*/
|
|
|
|
char *translog_filename_by_fileno(uint32 file_no, char *path)
|
|
{
|
|
char buff[11], *end;
|
|
uint length;
|
|
DBUG_ENTER("translog_filename_by_fileno");
|
|
DBUG_ASSERT(file_no <= 0xfffffff);
|
|
|
|
/* log_descriptor.directory is already formatted */
|
|
end= strxmov(path, log_descriptor.directory, "aria_log.0000000", NullS);
|
|
length= (uint) (int10_to_str(file_no, buff, 10) - buff);
|
|
strmov(end - length +1, buff);
|
|
|
|
DBUG_PRINT("info", ("Path: '%s' path: %p", path, path));
|
|
DBUG_RETURN(path);
|
|
}
|
|
|
|
|
|
/**
|
|
@brief Create log file with given number without cache
|
|
|
|
@param file_no Number of the log we want to open
|
|
|
|
retval -1 error
|
|
retval # file descriptor number
|
|
*/
|
|
|
|
static File create_logfile_by_number_no_cache(uint32 file_no)
|
|
{
|
|
File file;
|
|
char path[FN_REFLEN];
|
|
DBUG_ENTER("create_logfile_by_number_no_cache");
|
|
|
|
if (translog_status != TRANSLOG_OK)
|
|
DBUG_RETURN(-1);
|
|
|
|
/* TODO: add O_DIRECT to open flags (when buffer is aligned) */
|
|
if ((file= mysql_file_create(key_file_translog,
|
|
translog_filename_by_fileno(file_no, path),
|
|
0, O_BINARY | O_RDWR | O_CLOEXEC, MYF(MY_WME))) < 0)
|
|
{
|
|
DBUG_PRINT("error", ("Error %d during creating file '%s'", errno, path));
|
|
translog_stop_writing();
|
|
DBUG_RETURN(-1);
|
|
}
|
|
if (sync_log_dir >= TRANSLOG_SYNC_DIR_NEWFILE &&
|
|
sync_dir(log_descriptor.directory_fd, MYF(MY_WME | MY_IGNORE_BADFD)))
|
|
{
|
|
DBUG_PRINT("error", ("Error %d during syncing directory '%s'",
|
|
errno, log_descriptor.directory));
|
|
mysql_file_close(file, MYF(0));
|
|
translog_stop_writing();
|
|
DBUG_RETURN(-1);
|
|
}
|
|
DBUG_PRINT("info", ("File: '%s' handler: %d", path, file));
|
|
DBUG_RETURN(file);
|
|
}
|
|
|
|
/**
|
|
@brief Open (not create) log file with given number without cache
|
|
|
|
@param file_no Number of the log we want to open
|
|
|
|
retval -1 error
|
|
retval # file descriptor number
|
|
*/
|
|
|
|
static File open_logfile_by_number_no_cache(uint32 file_no)
|
|
{
|
|
File file;
|
|
char path[FN_REFLEN];
|
|
DBUG_ENTER("open_logfile_by_number_no_cache");
|
|
|
|
/* TODO: add O_DIRECT to open flags (when buffer is aligned) */
|
|
/* TODO: use mysql_file_create() */
|
|
if ((file= mysql_file_open(key_file_translog,
|
|
translog_filename_by_fileno(file_no, path),
|
|
log_descriptor.open_flags | O_CLOEXEC,
|
|
MYF(MY_WME))) < 0)
|
|
{
|
|
DBUG_PRINT("error", ("Error %d during opening file '%s'", errno, path));
|
|
DBUG_RETURN(-1);
|
|
}
|
|
DBUG_PRINT("info", ("File: '%s' handler: %d", path, file));
|
|
DBUG_RETURN(file);
|
|
}
|
|
|
|
|
|
/**
|
|
@brief get file descriptor by given number using cache
|
|
|
|
@param file_no Number of the log we want to open
|
|
|
|
retval # file descriptor
|
|
retval NULL file is not opened
|
|
*/
|
|
|
|
static TRANSLOG_FILE *get_logfile_by_number(uint32 file_no)
|
|
{
|
|
TRANSLOG_FILE *file;
|
|
DBUG_ENTER("get_logfile_by_number");
|
|
mysql_rwlock_rdlock(&log_descriptor.open_files_lock);
|
|
if (log_descriptor.max_file - file_no >=
|
|
log_descriptor.open_files.elements)
|
|
{
|
|
DBUG_PRINT("info", ("File #%u is not opened", file_no));
|
|
mysql_rwlock_unlock(&log_descriptor.open_files_lock);
|
|
DBUG_RETURN(NULL);
|
|
}
|
|
DBUG_ASSERT(log_descriptor.max_file - log_descriptor.min_file + 1 ==
|
|
log_descriptor.open_files.elements);
|
|
DBUG_ASSERT(log_descriptor.max_file >= file_no);
|
|
DBUG_ASSERT(log_descriptor.min_file <= file_no);
|
|
|
|
file= *dynamic_element(&log_descriptor.open_files,
|
|
log_descriptor.max_file - file_no, TRANSLOG_FILE **);
|
|
mysql_rwlock_unlock(&log_descriptor.open_files_lock);
|
|
DBUG_PRINT("info", ("File %p File no: %u, File handler: %d",
|
|
file, file_no,
|
|
(file ? file->handler.file : -1)));
|
|
DBUG_ASSERT(!file || file->number == file_no);
|
|
DBUG_RETURN(file);
|
|
}
|
|
|
|
|
|
/**
|
|
@brief get current file descriptor
|
|
|
|
retval # file descriptor
|
|
*/
|
|
|
|
static TRANSLOG_FILE *get_current_logfile()
|
|
{
|
|
TRANSLOG_FILE *file;
|
|
DBUG_ENTER("get_current_logfile");
|
|
mysql_rwlock_rdlock(&log_descriptor.open_files_lock);
|
|
DBUG_PRINT("info", ("max_file: %lu min_file: %lu open_files: %lu",
|
|
(ulong) log_descriptor.max_file,
|
|
(ulong) log_descriptor.min_file,
|
|
(ulong) log_descriptor.open_files.elements));
|
|
DBUG_ASSERT(log_descriptor.max_file - log_descriptor.min_file + 1 ==
|
|
log_descriptor.open_files.elements);
|
|
file= *dynamic_element(&log_descriptor.open_files, 0, TRANSLOG_FILE **);
|
|
mysql_rwlock_unlock(&log_descriptor.open_files_lock);
|
|
DBUG_RETURN(file);
|
|
}
|
|
|
|
uchar maria_trans_file_magic[]=
|
|
{ (uchar) 254, (uchar) 254, (uchar) 11, '\001', 'M', 'A', 'R', 'I', 'A',
|
|
'L', 'O', 'G' };
|
|
#define LOG_HEADER_DATA_SIZE (sizeof(maria_trans_file_magic) + \
|
|
8 + 4 + 4 + 4 + 2 + 3 + \
|
|
LSN_STORE_SIZE)
|
|
|
|
|
|
/*
|
|
Write log file page header in the just opened new log file
|
|
|
|
SYNOPSIS
|
|
translog_write_file_header();
|
|
|
|
NOTES
|
|
First page is just a marker page; We don't store any real log data in it.
|
|
|
|
RETURN
|
|
0 OK
|
|
1 ERROR
|
|
*/
|
|
|
|
static my_bool translog_write_file_header()
|
|
{
|
|
TRANSLOG_FILE *file;
|
|
ulonglong timestamp;
|
|
uchar page_buff[TRANSLOG_PAGE_SIZE], *page= page_buff;
|
|
my_bool rc;
|
|
DBUG_ENTER("translog_write_file_header");
|
|
|
|
/* file tag */
|
|
memcpy(page, maria_trans_file_magic, sizeof(maria_trans_file_magic));
|
|
page+= sizeof(maria_trans_file_magic);
|
|
/* timestamp */
|
|
timestamp= my_hrtime().val;
|
|
int8store(page, timestamp);
|
|
page+= 8;
|
|
/* maria version */
|
|
int4store(page, TRANSLOG_VERSION_ID);
|
|
page+= 4;
|
|
/* mysql version (MYSQL_VERSION_ID) */
|
|
int4store(page, log_descriptor.server_version);
|
|
page+= 4;
|
|
/* server ID */
|
|
int4store(page, log_descriptor.server_id);
|
|
page+= 4;
|
|
/* loghandler page_size */
|
|
int2store(page, TRANSLOG_PAGE_SIZE - 1);
|
|
page+= 2;
|
|
/* file number */
|
|
int3store(page, LSN_FILE_NO(log_descriptor.horizon));
|
|
page+= 3;
|
|
lsn_store(page, LSN_IMPOSSIBLE);
|
|
page+= LSN_STORE_SIZE;
|
|
memset(page, TRANSLOG_FILLER, sizeof(page_buff) - (page- page_buff));
|
|
|
|
file= get_current_logfile();
|
|
rc= my_pwrite(file->handler.file, page_buff, sizeof(page_buff), 0,
|
|
log_write_flags) != 0;
|
|
/*
|
|
Dropping the flag in such way can make false alarm: signalling than the
|
|
file in not sync when it is sync, but the situation is quite rare and
|
|
protections with mutexes give much more overhead to the whole engine
|
|
*/
|
|
file->is_sync= 0;
|
|
DBUG_RETURN(rc);
|
|
}
|
|
|
|
/*
|
|
@brief write the new LSN on the given file header
|
|
|
|
@param file The file descriptor
|
|
@param lsn That LSN which should be written
|
|
|
|
@retval 0 OK
|
|
@retval 1 Error
|
|
*/
|
|
|
|
static my_bool translog_max_lsn_to_header(File file, LSN lsn)
|
|
{
|
|
uchar lsn_buff[LSN_STORE_SIZE];
|
|
my_bool rc;
|
|
DBUG_ENTER("translog_max_lsn_to_header");
|
|
DBUG_PRINT("enter", ("File descriptor: %ld "
|
|
"lsn: " LSN_FMT,
|
|
(long) file,
|
|
LSN_IN_PARTS(lsn)));
|
|
|
|
lsn_store(lsn_buff, lsn);
|
|
|
|
rc= (my_pwrite(file, lsn_buff,
|
|
LSN_STORE_SIZE,
|
|
(LOG_HEADER_DATA_SIZE - LSN_STORE_SIZE),
|
|
log_write_flags) != 0 ||
|
|
mysql_file_sync(file, MYF(MY_WME)) != 0);
|
|
/*
|
|
We should not increase counter in case of error above, but it is so
|
|
unlikely that we can ignore this case
|
|
*/
|
|
translog_syncs++;
|
|
DBUG_RETURN(rc);
|
|
}
|
|
|
|
|
|
/*
|
|
@brief Extract hander file information from loghandler file page
|
|
|
|
@param desc header information descriptor to be filled with information
|
|
@param page_buff buffer with the page content
|
|
*/
|
|
|
|
void translog_interpret_file_header(LOGHANDLER_FILE_INFO *desc,
|
|
uchar *page_buff)
|
|
{
|
|
uchar *ptr;
|
|
|
|
ptr= page_buff + sizeof(maria_trans_file_magic);
|
|
desc->timestamp= uint8korr(ptr);
|
|
ptr+= 8;
|
|
desc->maria_version= uint4korr(ptr);
|
|
ptr+= 4;
|
|
desc->mysql_version= uint4korr(ptr);
|
|
ptr+= 4;
|
|
desc->server_id= uint4korr(ptr + 4);
|
|
ptr+= 4;
|
|
desc->page_size= uint2korr(ptr) + 1;
|
|
ptr+= 2;
|
|
desc->file_number= uint3korr(ptr);
|
|
ptr+=3;
|
|
desc->max_lsn= lsn_korr(ptr);
|
|
}
|
|
|
|
|
|
/*
|
|
@brief Read hander file information from loghandler file
|
|
|
|
@param desc header information descriptor to be filled with information
|
|
@param file file descriptor to read
|
|
|
|
@retval 0 OK
|
|
@retval 1 Error
|
|
*/
|
|
|
|
my_bool translog_read_file_header(LOGHANDLER_FILE_INFO *desc, File file)
|
|
{
|
|
uchar page_buff[LOG_HEADER_DATA_SIZE];
|
|
DBUG_ENTER("translog_read_file_header");
|
|
|
|
if (mysql_file_pread(file, page_buff,
|
|
sizeof(page_buff), 0, MYF(MY_FNABP | MY_WME)))
|
|
{
|
|
DBUG_PRINT("info", ("log read fail error: %d", my_errno));
|
|
DBUG_RETURN(1);
|
|
}
|
|
translog_interpret_file_header(desc, page_buff);
|
|
DBUG_PRINT("info", ("timestamp: %llu aria ver: %lu mysql ver: %lu "
|
|
"server id %lu page size %lu file number %lu "
|
|
"max lsn: " LSN_FMT,
|
|
(ulonglong) desc->timestamp,
|
|
(ulong) desc->maria_version,
|
|
(ulong) desc->mysql_version,
|
|
(ulong) desc->server_id,
|
|
desc->page_size, (ulong) desc->file_number,
|
|
LSN_IN_PARTS(desc->max_lsn)));
|
|
DBUG_RETURN(0);
|
|
}
|
|
|
|
|
|
/*
|
|
@brief set the lsn to the files from_file - to_file if it is greater
|
|
then written in the file
|
|
|
|
@param from_file first file number (min)
|
|
@param to_file last file number (max)
|
|
@param lsn the lsn for writing
|
|
@param is_locked true if current thread locked the log handler
|
|
|
|
@retval 0 OK
|
|
@retval 1 Error
|
|
*/
|
|
|
|
static my_bool translog_set_lsn_for_files(uint32 from_file, uint32 to_file,
|
|
LSN lsn, my_bool is_locked)
|
|
{
|
|
uint32 file;
|
|
DBUG_ENTER("translog_set_lsn_for_files");
|
|
DBUG_PRINT("enter", ("From: %lu to: %lu lsn: " LSN_FMT " locked: %d",
|
|
(ulong) from_file, (ulong) to_file,
|
|
LSN_IN_PARTS(lsn),
|
|
is_locked));
|
|
DBUG_ASSERT(from_file <= to_file);
|
|
DBUG_ASSERT(from_file > 0); /* we have not file 0 */
|
|
|
|
/* Checks the current file (not finished yet file) */
|
|
if (!is_locked)
|
|
translog_lock();
|
|
if (to_file == (uint32) LSN_FILE_NO(log_descriptor.horizon))
|
|
{
|
|
if (likely(cmp_translog_addr(lsn, log_descriptor.max_lsn) > 0))
|
|
log_descriptor.max_lsn= lsn;
|
|
to_file--;
|
|
}
|
|
if (!is_locked)
|
|
translog_unlock();
|
|
|
|
/* Checks finished files if they are */
|
|
mysql_mutex_lock(&log_descriptor.file_header_lock);
|
|
for (file= from_file; file <= to_file; file++)
|
|
{
|
|
LOGHANDLER_FILE_INFO info;
|
|
File fd;
|
|
|
|
fd= open_logfile_by_number_no_cache(file);
|
|
if ((fd < 0) ||
|
|
((translog_read_file_header(&info, fd) ||
|
|
(cmp_translog_addr(lsn, info.max_lsn) > 0 &&
|
|
translog_max_lsn_to_header(fd, lsn))) |
|
|
mysql_file_close(fd, MYF(MY_WME))))
|
|
{
|
|
translog_stop_writing();
|
|
mysql_mutex_unlock(&log_descriptor.file_header_lock);
|
|
DBUG_RETURN(1);
|
|
}
|
|
}
|
|
mysql_mutex_unlock(&log_descriptor.file_header_lock);
|
|
|
|
DBUG_RETURN(0);
|
|
}
|
|
|
|
|
|
/* descriptor of file in unfinished_files */
|
|
struct st_file_counter
|
|
{
|
|
uint32 file; /* file number */
|
|
uint32 counter; /* counter for started writes */
|
|
};
|
|
|
|
|
|
/*
|
|
@brief mark file "in progress" (for multi-group records)
|
|
|
|
@param file log file number
|
|
*/
|
|
|
|
static void translog_mark_file_unfinished(uint32 file)
|
|
{
|
|
int place, i;
|
|
struct st_file_counter fc, *fc_ptr;
|
|
|
|
DBUG_ENTER("translog_mark_file_unfinished");
|
|
DBUG_PRINT("enter", ("file: %lu", (ulong) file));
|
|
|
|
fc.file= file; fc.counter= 1;
|
|
mysql_mutex_lock(&log_descriptor.unfinished_files_lock);
|
|
|
|
if (log_descriptor.unfinished_files.elements == 0)
|
|
{
|
|
insert_dynamic(&log_descriptor.unfinished_files, (uchar*) &fc);
|
|
DBUG_PRINT("info", ("The first element inserted"));
|
|
goto end;
|
|
}
|
|
|
|
for (place= log_descriptor.unfinished_files.elements - 1;
|
|
place >= 0;
|
|
place--)
|
|
{
|
|
fc_ptr= dynamic_element(&log_descriptor.unfinished_files,
|
|
place, struct st_file_counter *);
|
|
if (fc_ptr->file <= file)
|
|
break;
|
|
}
|
|
|
|
if (place >= 0 && fc_ptr->file == file)
|
|
{
|
|
fc_ptr->counter++;
|
|
DBUG_PRINT("info", ("counter increased"));
|
|
goto end;
|
|
}
|
|
|
|
if (place == (int)log_descriptor.unfinished_files.elements)
|
|
{
|
|
insert_dynamic(&log_descriptor.unfinished_files, (uchar*) &fc);
|
|
DBUG_PRINT("info", ("The last element inserted"));
|
|
goto end;
|
|
}
|
|
/* shift and assign new element */
|
|
insert_dynamic(&log_descriptor.unfinished_files,
|
|
(uchar*)
|
|
dynamic_element(&log_descriptor.unfinished_files,
|
|
log_descriptor.unfinished_files.elements- 1,
|
|
struct st_file_counter *));
|
|
for(i= log_descriptor.unfinished_files.elements - 1; i > place; i--)
|
|
{
|
|
/* we do not use set_dynamic() to avoid unneeded checks */
|
|
memcpy(dynamic_element(&log_descriptor.unfinished_files,
|
|
i, struct st_file_counter *),
|
|
dynamic_element(&log_descriptor.unfinished_files,
|
|
i + 1, struct st_file_counter *),
|
|
sizeof(struct st_file_counter));
|
|
}
|
|
memcpy(dynamic_element(&log_descriptor.unfinished_files,
|
|
place + 1, struct st_file_counter *),
|
|
&fc, sizeof(struct st_file_counter));
|
|
end:
|
|
mysql_mutex_unlock(&log_descriptor.unfinished_files_lock);
|
|
DBUG_VOID_RETURN;
|
|
}
|
|
|
|
|
|
/*
|
|
@brief remove file mark "in progress" (for multi-group records)
|
|
|
|
@param file log file number
|
|
*/
|
|
|
|
static void translog_mark_file_finished(uint32 file)
|
|
{
|
|
int i;
|
|
struct st_file_counter *UNINIT_VAR(fc_ptr);
|
|
DBUG_ENTER("translog_mark_file_finished");
|
|
DBUG_PRINT("enter", ("file: %lu", (ulong) file));
|
|
|
|
mysql_mutex_lock(&log_descriptor.unfinished_files_lock);
|
|
|
|
DBUG_ASSERT(log_descriptor.unfinished_files.elements > 0);
|
|
for (i= 0;
|
|
i < (int) log_descriptor.unfinished_files.elements;
|
|
i++)
|
|
{
|
|
fc_ptr= dynamic_element(&log_descriptor.unfinished_files,
|
|
i, struct st_file_counter *);
|
|
if (fc_ptr->file == file)
|
|
{
|
|
break;
|
|
}
|
|
}
|
|
DBUG_ASSERT(i < (int) log_descriptor.unfinished_files.elements);
|
|
|
|
if (! --fc_ptr->counter)
|
|
delete_dynamic_element(&log_descriptor.unfinished_files, i);
|
|
mysql_mutex_unlock(&log_descriptor.unfinished_files_lock);
|
|
DBUG_VOID_RETURN;
|
|
}
|
|
|
|
|
|
/*
|
|
@brief get max LSN of the record which parts stored in this file
|
|
|
|
@param file file number
|
|
|
|
@return requested LSN or LSN_IMPOSSIBLE/LSN_ERROR
|
|
@retval LSN_IMPOSSIBLE File is still not finished
|
|
@retval LSN_ERROR Error opening file
|
|
@retval # LSN of the record which parts stored in this file
|
|
*/
|
|
|
|
LSN translog_get_file_max_lsn_stored(uint32 file)
|
|
{
|
|
uint32 limit= FILENO_IMPOSSIBLE;
|
|
DBUG_ENTER("translog_get_file_max_lsn_stored");
|
|
DBUG_PRINT("enter", ("file: %lu", (ulong)file));
|
|
DBUG_ASSERT(translog_status == TRANSLOG_OK ||
|
|
translog_status == TRANSLOG_READONLY);
|
|
|
|
mysql_mutex_lock(&log_descriptor.unfinished_files_lock);
|
|
|
|
/* find file with minimum file number "in progress" */
|
|
if (log_descriptor.unfinished_files.elements > 0)
|
|
{
|
|
struct st_file_counter *fc_ptr;
|
|
fc_ptr= dynamic_element(&log_descriptor.unfinished_files,
|
|
0, struct st_file_counter *);
|
|
limit= fc_ptr->file; /* minimal file number "in progress" */
|
|
}
|
|
mysql_mutex_unlock(&log_descriptor.unfinished_files_lock);
|
|
|
|
/*
|
|
if there is no "in progress file" then unfinished file is in progress
|
|
for sure
|
|
*/
|
|
if (limit == FILENO_IMPOSSIBLE)
|
|
{
|
|
TRANSLOG_ADDRESS horizon= translog_get_horizon();
|
|
limit= LSN_FILE_NO(horizon);
|
|
}
|
|
|
|
if (file >= limit)
|
|
{
|
|
DBUG_PRINT("info", ("The file in in progress"));
|
|
DBUG_RETURN(LSN_IMPOSSIBLE);
|
|
}
|
|
|
|
{
|
|
LOGHANDLER_FILE_INFO info;
|
|
File fd;
|
|
|
|
fd= open_logfile_by_number_no_cache(file);
|
|
if(fd < 0)
|
|
{
|
|
DBUG_PRINT("error", ("Can't open file"));
|
|
DBUG_RETURN(LSN_ERROR);
|
|
}
|
|
|
|
if (translog_read_file_header(&info, fd))
|
|
{
|
|
DBUG_PRINT("error", ("Can't read file header"));
|
|
info.max_lsn= LSN_ERROR;
|
|
}
|
|
|
|
if (mysql_file_close(fd, MYF(MY_WME)))
|
|
{
|
|
DBUG_PRINT("error", ("Can't close file"));
|
|
info.max_lsn= LSN_ERROR;
|
|
}
|
|
|
|
DBUG_PRINT("info", ("Max lsn: " LSN_FMT, LSN_IN_PARTS(info.max_lsn)));
|
|
DBUG_RETURN(info.max_lsn);
|
|
}
|
|
}
|
|
|
|
/*
|
|
Initialize transaction log file buffer
|
|
|
|
SYNOPSIS
|
|
translog_buffer_init()
|
|
buffer The buffer to initialize
|
|
num Number of this buffer
|
|
|
|
RETURN
|
|
0 OK
|
|
1 Error
|
|
*/
|
|
|
|
static my_bool translog_buffer_init(struct st_translog_buffer *buffer, int num)
|
|
{
|
|
DBUG_ENTER("translog_buffer_init");
|
|
buffer->pre_force_close_horizon=
|
|
buffer->prev_last_lsn= buffer->last_lsn=
|
|
LSN_IMPOSSIBLE;
|
|
DBUG_PRINT("info", ("last_lsn and prev_last_lsn set to 0 buffer: %p",
|
|
buffer));
|
|
|
|
buffer->buffer_no= (uint8) num;
|
|
/* This Buffer File */
|
|
buffer->file= NULL;
|
|
buffer->overlay= 0;
|
|
/* cache for current log */
|
|
memset(buffer->buffer, TRANSLOG_FILLER, TRANSLOG_WRITE_BUFFER);
|
|
/* Buffer size */
|
|
buffer->size= 0;
|
|
buffer->skipped_data= 0;
|
|
/* cond of thread which is waiting for buffer filling */
|
|
if (mysql_cond_init(key_TRANSLOG_BUFFER_waiting_filling_buffer,
|
|
&buffer->waiting_filling_buffer, 0))
|
|
DBUG_RETURN(1);
|
|
/* Number of records which are in copy progress */
|
|
buffer->copy_to_buffer_in_progress= 0;
|
|
/* list of waiting buffer ready threads */
|
|
buffer->waiting_flush= 0;
|
|
/*
|
|
Buffers locked by the following mutex. As far as buffers create logical
|
|
circle (after last buffer goes first) it trigger false alarm of deadlock
|
|
detect system, so we remove check of deadlock for this buffers. Indeed
|
|
all mutex locks concentrated around current buffer except flushing
|
|
thread (but it is only one thread). One thread can't take more then
|
|
2 buffer locks at once. So deadlock is impossible here.
|
|
|
|
To prevent false alarm of dead lock detection we switch dead lock
|
|
detection for one buffer in the middle of the buffers chain. Excluding
|
|
only one of eight buffers from deadlock detection hardly can hide other
|
|
possible problems which include this mutexes.
|
|
*/
|
|
|
|
if (mysql_mutex_init(key_TRANSLOG_BUFFER_mutex,
|
|
&buffer->mutex, MY_MUTEX_INIT_FAST) ||
|
|
mysql_cond_init(key_TRANSLOG_BUFFER_prev_sent_to_disk_cond,
|
|
&buffer->prev_sent_to_disk_cond, 0))
|
|
DBUG_RETURN(1);
|
|
mysql_mutex_setflags(&buffer->mutex, MYF_NO_DEADLOCK_DETECTION);
|
|
buffer->is_closing_buffer= 0;
|
|
buffer->prev_sent_to_disk= LSN_IMPOSSIBLE;
|
|
buffer->prev_buffer_offset= LSN_IMPOSSIBLE;
|
|
buffer->ver= 0;
|
|
DBUG_RETURN(0);
|
|
}
|
|
|
|
|
|
/*
|
|
@brief close transaction log file by descriptor
|
|
|
|
@param file pagegecache file descriptor reference
|
|
|
|
@return Operation status
|
|
@retval 0 OK
|
|
@retval 1 Error
|
|
*/
|
|
|
|
static my_bool translog_close_log_file(TRANSLOG_FILE *file)
|
|
{
|
|
int rc= 0;
|
|
flush_pagecache_blocks(log_descriptor.pagecache, &file->handler,
|
|
FLUSH_RELEASE);
|
|
/*
|
|
Sync file when we close it
|
|
TODO: sync only we have changed the log
|
|
*/
|
|
if (!file->is_sync)
|
|
{
|
|
rc= mysql_file_sync(file->handler.file, MYF(MY_WME));
|
|
translog_syncs++;
|
|
}
|
|
rc|= mysql_file_close(file->handler.file, MYF(MY_WME));
|
|
my_free(file);
|
|
return MY_TEST(rc);
|
|
}
|
|
|
|
|
|
/**
|
|
@brief Initializes TRANSLOG_FILE structure
|
|
|
|
@param file reference on the file to initialize
|
|
@param number file number
|
|
@param is_sync is file synced on disk
|
|
*/
|
|
|
|
static void translog_file_init(TRANSLOG_FILE *file, uint32 number,
|
|
my_bool is_sync)
|
|
{
|
|
pagecache_file_set_null_hooks(&file->handler);
|
|
file->handler.post_read_hook= translog_page_validator;
|
|
file->handler.flush_log_callback= maria_flush_log_for_page_none;
|
|
file->handler.callback_data= (uchar*)file;
|
|
|
|
file->number= number;
|
|
file->was_recovered= 0;
|
|
file->is_sync= is_sync;
|
|
}
|
|
|
|
|
|
/**
|
|
@brief Create and fill header of new file.
|
|
|
|
@note the caller must call it right after it has increased
|
|
log_descriptor.horizon to the new file
|
|
(log_descriptor.horizon+= LSN_ONE_FILE)
|
|
|
|
|
|
@retval 0 OK
|
|
@retval 1 Error
|
|
*/
|
|
|
|
static my_bool translog_create_new_file()
|
|
{
|
|
TRANSLOG_FILE *file= (TRANSLOG_FILE*)my_malloc(PSI_INSTRUMENT_ME, sizeof(TRANSLOG_FILE),
|
|
MYF(0));
|
|
|
|
TRANSLOG_FILE *old= get_current_logfile();
|
|
uint32 file_no= LSN_FILE_NO(log_descriptor.horizon);
|
|
DBUG_ENTER("translog_create_new_file");
|
|
|
|
if (file == NULL)
|
|
goto error;
|
|
|
|
/*
|
|
Writes max_lsn to the file header before finishing it (there is no need
|
|
to lock file header buffer because it is still unfinished file, so only
|
|
one thread can finish the file and nobody interested of LSN of current
|
|
(unfinished) file, because no one can purge it).
|
|
*/
|
|
if (translog_max_lsn_to_header(old->handler.file, log_descriptor.max_lsn))
|
|
goto error;
|
|
|
|
mysql_rwlock_wrlock(&log_descriptor.open_files_lock);
|
|
DBUG_ASSERT(log_descriptor.max_file - log_descriptor.min_file + 1 ==
|
|
log_descriptor.open_files.elements);
|
|
DBUG_ASSERT(file_no == log_descriptor.max_file + 1);
|
|
if (allocate_dynamic(&log_descriptor.open_files,
|
|
log_descriptor.max_file - log_descriptor.min_file + 2))
|
|
goto error_lock;
|
|
|
|
/* this call just expand the array */
|
|
if (insert_dynamic(&log_descriptor.open_files, (uchar*)&file))
|
|
goto error_lock;
|
|
|
|
if ((file->handler.file= create_logfile_by_number_no_cache(file_no)) == -1)
|
|
goto error_lock;
|
|
translog_file_init(file, file_no, 0);
|
|
|
|
log_descriptor.max_file++;
|
|
{
|
|
char *start= (char*) dynamic_element(&log_descriptor.open_files, 0,
|
|
TRANSLOG_FILE**);
|
|
memmove(start + sizeof(TRANSLOG_FILE*), start,
|
|
sizeof(TRANSLOG_FILE*) *
|
|
(log_descriptor.max_file - log_descriptor.min_file + 1 - 1));
|
|
}
|
|
/* can't fail we because we expanded array */
|
|
set_dynamic(&log_descriptor.open_files, (uchar*)&file, 0);
|
|
DBUG_ASSERT(log_descriptor.max_file - log_descriptor.min_file + 1 ==
|
|
log_descriptor.open_files.elements);
|
|
mysql_rwlock_unlock(&log_descriptor.open_files_lock);
|
|
|
|
DBUG_PRINT("info", ("file_no: %lu", (ulong)file_no));
|
|
|
|
if (translog_write_file_header())
|
|
goto error;
|
|
|
|
if (ma_control_file_write_and_force(last_checkpoint_lsn, file_no,
|
|
max_trid_in_control_file,
|
|
recovery_failures))
|
|
goto error;
|
|
|
|
DBUG_RETURN(0);
|
|
|
|
error_lock:
|
|
mysql_rwlock_unlock(&log_descriptor.open_files_lock);
|
|
error:
|
|
translog_stop_writing();
|
|
my_free(file);
|
|
DBUG_RETURN(1);
|
|
}
|
|
|
|
|
|
/**
|
|
@brief Locks the loghandler buffer.
|
|
|
|
@param buffer This buffer which should be locked
|
|
|
|
@note See comment before buffer 'mutex' variable.
|
|
|
|
@retval 0 OK
|
|
@retval 1 Error
|
|
*/
|
|
|
|
static void translog_buffer_lock(struct st_translog_buffer *buffer)
|
|
{
|
|
DBUG_ENTER("translog_buffer_lock");
|
|
DBUG_PRINT("enter",
|
|
("Lock buffer #%u: %p", buffer->buffer_no,
|
|
buffer));
|
|
mysql_mutex_lock(&buffer->mutex);
|
|
DBUG_VOID_RETURN;
|
|
}
|
|
|
|
|
|
/*
|
|
Unlock the loghandler buffer
|
|
|
|
SYNOPSIS
|
|
translog_buffer_unlock()
|
|
buffer This buffer which should be unlocked
|
|
*/
|
|
|
|
static void translog_buffer_unlock(struct st_translog_buffer *buffer)
|
|
{
|
|
DBUG_ENTER("translog_buffer_unlock");
|
|
DBUG_PRINT("enter", ("Unlock buffer... #%u (%p)",
|
|
(uint) buffer->buffer_no, buffer));
|
|
|
|
mysql_mutex_unlock(&buffer->mutex);
|
|
DBUG_VOID_RETURN;
|
|
}
|
|
|
|
|
|
/*
|
|
Write a header on the page
|
|
|
|
SYNOPSIS
|
|
translog_new_page_header()
|
|
horizon Where to write the page
|
|
cursor Where to write the page
|
|
|
|
NOTE
|
|
- space for page header should be checked before
|
|
*/
|
|
|
|
static uchar translog_sector_random;
|
|
|
|
static void translog_new_page_header(TRANSLOG_ADDRESS *horizon,
|
|
struct st_buffer_cursor *cursor)
|
|
{
|
|
uchar *ptr;
|
|
|
|
DBUG_ENTER("translog_new_page_header");
|
|
DBUG_ASSERT(cursor->ptr);
|
|
|
|
cursor->protected= 0;
|
|
|
|
ptr= cursor->ptr;
|
|
/* Page number */
|
|
int3store(ptr, LSN_OFFSET(*horizon) / TRANSLOG_PAGE_SIZE);
|
|
ptr+= 3;
|
|
/* File number */
|
|
int3store(ptr, LSN_FILE_NO(*horizon));
|
|
ptr+= 3;
|
|
DBUG_ASSERT(TRANSLOG_PAGE_FLAGS == (ptr - cursor->ptr));
|
|
cursor->ptr[TRANSLOG_PAGE_FLAGS]= (uchar) log_descriptor.flags;
|
|
ptr++;
|
|
if (log_descriptor.flags & TRANSLOG_PAGE_CRC)
|
|
{
|
|
#ifndef DBUG_OFF
|
|
DBUG_PRINT("info", ("write 0x11223344 CRC to " LSN_FMT,
|
|
LSN_IN_PARTS(*horizon)));
|
|
/* This will be overwritten by real CRC; This is just for debugging */
|
|
int4store(ptr, 0x11223344);
|
|
#endif
|
|
/* CRC will be put when page is finished */
|
|
ptr+= CRC_SIZE;
|
|
}
|
|
if (log_descriptor.flags & TRANSLOG_SECTOR_PROTECTION)
|
|
{
|
|
/*
|
|
translog_sector_randmo works like "random" values producer because
|
|
it is enough to have such "random" for this purpose and it will
|
|
not interfere with higher level pseudo random value generator
|
|
*/
|
|
ptr[0]= translog_sector_random++;
|
|
ptr+= TRANSLOG_PAGE_SIZE / DISK_DRIVE_SECTOR_SIZE;
|
|
}
|
|
{
|
|
size_t len= (ptr - cursor->ptr);
|
|
(*horizon)+= len; /* increasing the offset part of the address */
|
|
cursor->current_page_fill= (uint16)len;
|
|
if (!cursor->chaser)
|
|
cursor->buffer->size+= (translog_size_t)len;
|
|
}
|
|
cursor->ptr= ptr;
|
|
DBUG_PRINT("info", ("NewP buffer #%u: %p chaser: %d Size: %lu (%lu) "
|
|
"Horizon: " LSN_FMT,
|
|
(uint) cursor->buffer->buffer_no, cursor->buffer,
|
|
cursor->chaser, (ulong) cursor->buffer->size,
|
|
(ulong) (cursor->ptr - cursor->buffer->buffer),
|
|
LSN_IN_PARTS(*horizon)));
|
|
translog_check_cursor(cursor);
|
|
DBUG_VOID_RETURN;
|
|
}
|
|
|
|
|
|
/*
|
|
Put sector protection on the page image
|
|
|
|
SYNOPSIS
|
|
translog_put_sector_protection()
|
|
page reference on the page content
|
|
cursor cursor of the buffer
|
|
|
|
NOTES
|
|
We put a sector protection on all following sectors on the page,
|
|
except the first sector that is protected by page header.
|
|
*/
|
|
|
|
static void translog_put_sector_protection(uchar *page,
|
|
struct st_buffer_cursor *cursor)
|
|
{
|
|
uchar *table= page + log_descriptor.page_overhead -
|
|
TRANSLOG_PAGE_SIZE / DISK_DRIVE_SECTOR_SIZE;
|
|
uint i, offset;
|
|
uint16 last_protected_sector= ((cursor->previous_offset - 1) /
|
|
DISK_DRIVE_SECTOR_SIZE);
|
|
uint16 start_sector= cursor->previous_offset / DISK_DRIVE_SECTOR_SIZE;
|
|
uint8 value= table[0] + cursor->write_counter;
|
|
DBUG_ENTER("translog_put_sector_protection");
|
|
|
|
if (start_sector == 0)
|
|
{
|
|
/* First sector is protected by file & page numbers in the page header. */
|
|
start_sector= 1;
|
|
}
|
|
|
|
DBUG_PRINT("enter", ("Write counter:%u value:%u offset:%u, "
|
|
"last protected:%u start sector:%u",
|
|
(uint) cursor->write_counter,
|
|
(uint) value,
|
|
(uint) cursor->previous_offset,
|
|
(uint) last_protected_sector, (uint) start_sector));
|
|
if (last_protected_sector == start_sector)
|
|
{
|
|
i= last_protected_sector;
|
|
offset= last_protected_sector * DISK_DRIVE_SECTOR_SIZE;
|
|
/* restore data, because we modified sector which was protected */
|
|
if (offset < cursor->previous_offset)
|
|
page[offset]= table[i];
|
|
}
|
|
for (i= start_sector, offset= start_sector * DISK_DRIVE_SECTOR_SIZE;
|
|
i < TRANSLOG_PAGE_SIZE / DISK_DRIVE_SECTOR_SIZE;
|
|
i++, (offset+= DISK_DRIVE_SECTOR_SIZE))
|
|
{
|
|
DBUG_PRINT("info", ("sector:%u offset:%u data 0x%x",
|
|
i, offset, (uint) page[offset]));
|
|
table[i]= page[offset];
|
|
page[offset]= value;
|
|
DBUG_PRINT("info", ("sector:%u offset:%u data 0x%x",
|
|
i, offset, (uint) page[offset]));
|
|
}
|
|
DBUG_VOID_RETURN;
|
|
}
|
|
|
|
|
|
/*
|
|
Calculate CRC32 of given area
|
|
|
|
SYNOPSIS
|
|
translog_crc()
|
|
area Pointer of the area beginning
|
|
length The Area length
|
|
|
|
RETURN
|
|
CRC32
|
|
*/
|
|
|
|
static uint32 translog_crc(uchar *area, uint length)
|
|
{
|
|
DBUG_ENTER("translog_crc");
|
|
DBUG_RETURN(my_checksum(0L, area, length));
|
|
}
|
|
|
|
|
|
/*
|
|
Finish current page with zeros
|
|
|
|
SYNOPSIS
|
|
translog_finish_page()
|
|
horizon \ horizon & buffer pointers
|
|
cursor /
|
|
*/
|
|
|
|
static void translog_finish_page(TRANSLOG_ADDRESS *horizon,
|
|
struct st_buffer_cursor *cursor)
|
|
{
|
|
uint16 left= TRANSLOG_PAGE_SIZE - cursor->current_page_fill;
|
|
uchar *page= cursor->ptr - cursor->current_page_fill;
|
|
DBUG_ENTER("translog_finish_page");
|
|
DBUG_PRINT("enter", ("Buffer: #%u %p "
|
|
"Buffer addr: " LSN_FMT " "
|
|
"Page addr: " LSN_FMT " "
|
|
"size:%u (%u) Pg:%u left:%u",
|
|
(uint) cursor->buffer_no, cursor->buffer,
|
|
LSN_IN_PARTS(cursor->buffer->offset),
|
|
(uint)LSN_FILE_NO(*horizon),
|
|
(uint)(LSN_OFFSET(*horizon) -
|
|
cursor->current_page_fill),
|
|
(uint) cursor->buffer->size,
|
|
(uint) (cursor->ptr -cursor->buffer->buffer),
|
|
(uint) cursor->current_page_fill, (uint) left));
|
|
DBUG_ASSERT(LSN_FILE_NO(*horizon) == LSN_FILE_NO(cursor->buffer->offset)
|
|
|| translog_status == TRANSLOG_UNINITED);
|
|
if ((LSN_FILE_NO(*horizon) != LSN_FILE_NO(cursor->buffer->offset)))
|
|
DBUG_VOID_RETURN; // everything wrong do not write to awoid more problems
|
|
translog_check_cursor(cursor);
|
|
if (cursor->protected)
|
|
{
|
|
DBUG_PRINT("info", ("Already protected and finished"));
|
|
DBUG_VOID_RETURN;
|
|
}
|
|
cursor->protected= 1;
|
|
|
|
DBUG_ASSERT(left < TRANSLOG_PAGE_SIZE);
|
|
if (left != 0)
|
|
{
|
|
DBUG_PRINT("info", ("left: %u", (uint) left));
|
|
memset(cursor->ptr, TRANSLOG_FILLER, left);
|
|
cursor->ptr+= left;
|
|
(*horizon)+= left; /* offset increasing */
|
|
if (!cursor->chaser)
|
|
cursor->buffer->size+= left;
|
|
/* We are finishing the page so reset the counter */
|
|
cursor->current_page_fill= 0;
|
|
DBUG_PRINT("info", ("Finish Page buffer #%u: %p "
|
|
"chaser: %d Size: %lu (%lu)",
|
|
(uint) cursor->buffer->buffer_no,
|
|
cursor->buffer, cursor->chaser,
|
|
(ulong) cursor->buffer->size,
|
|
(ulong) (cursor->ptr - cursor->buffer->buffer)));
|
|
translog_check_cursor(cursor);
|
|
}
|
|
/*
|
|
When we are finishing the page other thread might not finish the page
|
|
header yet (in case if we started from the middle of the page) so we
|
|
have to read log_descriptor.flags but not the flags from the page.
|
|
*/
|
|
if (log_descriptor.flags & TRANSLOG_SECTOR_PROTECTION)
|
|
{
|
|
translog_put_sector_protection(page, cursor);
|
|
DBUG_PRINT("info", ("drop write_counter"));
|
|
cursor->write_counter= 0;
|
|
cursor->previous_offset= 0;
|
|
}
|
|
if (log_descriptor.flags & TRANSLOG_PAGE_CRC)
|
|
{
|
|
uint32 crc= translog_crc(page + log_descriptor.page_overhead,
|
|
TRANSLOG_PAGE_SIZE -
|
|
log_descriptor.page_overhead);
|
|
DBUG_PRINT("info", ("CRC: %lx", (ulong) crc));
|
|
/* We have page number, file number and flag before crc */
|
|
int4store(page + 3 + 3 + 1, crc);
|
|
}
|
|
DBUG_VOID_RETURN;
|
|
}
|
|
|
|
|
|
/*
|
|
@brief Wait until all threads have finished closing this buffer.
|
|
|
|
@param buffer This buffer should be check
|
|
*/
|
|
|
|
static void translog_wait_for_closing(struct st_translog_buffer *buffer)
|
|
{
|
|
DBUG_ENTER("translog_wait_for_closing");
|
|
DBUG_PRINT("enter", ("Buffer #%u %p copies in progress: %u "
|
|
"is closing %u File: %d size: %lu",
|
|
(uint) buffer->buffer_no, buffer,
|
|
(uint) buffer->copy_to_buffer_in_progress,
|
|
(uint) buffer->is_closing_buffer,
|
|
(buffer->file ? buffer->file->handler.file : -1),
|
|
(ulong) buffer->size));
|
|
translog_buffer_lock_assert_owner(buffer);
|
|
|
|
while (buffer->is_closing_buffer)
|
|
{
|
|
DBUG_PRINT("info", ("wait for writers... buffer: #%u %p",
|
|
(uint) buffer->buffer_no, buffer));
|
|
DBUG_ASSERT(buffer->file != NULL);
|
|
mysql_cond_wait(&buffer->waiting_filling_buffer, &buffer->mutex);
|
|
DBUG_PRINT("info", ("wait for writers done buffer: #%u %p",
|
|
(uint) buffer->buffer_no, buffer));
|
|
}
|
|
|
|
DBUG_VOID_RETURN;
|
|
}
|
|
|
|
|
|
/*
|
|
@brief Wait until all threads have finished filling this buffer.
|
|
|
|
@param buffer This buffer should be check
|
|
*/
|
|
|
|
static void translog_wait_for_writers(struct st_translog_buffer *buffer)
|
|
{
|
|
DBUG_ENTER("translog_wait_for_writers");
|
|
DBUG_PRINT("enter", ("Buffer #%u %p copies in progress: %u "
|
|
"is closing %u File: %d size: %lu",
|
|
(uint) buffer->buffer_no, buffer,
|
|
(uint) buffer->copy_to_buffer_in_progress,
|
|
(uint) buffer->is_closing_buffer,
|
|
(buffer->file ? buffer->file->handler.file : -1),
|
|
(ulong) buffer->size));
|
|
translog_buffer_lock_assert_owner(buffer);
|
|
|
|
while (buffer->copy_to_buffer_in_progress)
|
|
{
|
|
DBUG_PRINT("info", ("wait for writers... buffer: #%u %p",
|
|
(uint) buffer->buffer_no, buffer));
|
|
DBUG_ASSERT(buffer->file != NULL);
|
|
mysql_cond_wait(&buffer->waiting_filling_buffer, &buffer->mutex);
|
|
DBUG_PRINT("info", ("wait for writers done buffer: #%u %p",
|
|
(uint) buffer->buffer_no, buffer));
|
|
}
|
|
|
|
DBUG_VOID_RETURN;
|
|
}
|
|
|
|
|
|
/*
|
|
|
|
Wait for buffer to become free
|
|
|
|
SYNOPSIS
|
|
translog_wait_for_buffer_free()
|
|
buffer The buffer we are waiting for
|
|
|
|
NOTE
|
|
- this buffer should be locked
|
|
*/
|
|
|
|
static void translog_wait_for_buffer_free(struct st_translog_buffer *buffer)
|
|
{
|
|
TRANSLOG_ADDRESS offset= buffer->offset;
|
|
TRANSLOG_FILE *file= buffer->file;
|
|
uint8 ver= buffer->ver;
|
|
DBUG_ENTER("translog_wait_for_buffer_free");
|
|
DBUG_PRINT("enter", ("Buffer #%u %p copies in progress: %u "
|
|
"is closing %u File: %d size: %lu",
|
|
(uint) buffer->buffer_no, buffer,
|
|
(uint) buffer->copy_to_buffer_in_progress,
|
|
(uint) buffer->is_closing_buffer,
|
|
(buffer->file ? buffer->file->handler.file : -1),
|
|
(ulong) buffer->size));
|
|
|
|
translog_wait_for_writers(buffer);
|
|
|
|
if (offset != buffer->offset || file != buffer->file || ver != buffer->ver)
|
|
DBUG_VOID_RETURN; /* the buffer if already freed */
|
|
|
|
while (buffer->file != NULL)
|
|
{
|
|
DBUG_PRINT("info", ("wait for writers... buffer: #%u %p",
|
|
(uint) buffer->buffer_no, buffer));
|
|
mysql_cond_wait(&buffer->waiting_filling_buffer, &buffer->mutex);
|
|
DBUG_PRINT("info", ("wait for writers done. buffer: #%u %p",
|
|
(uint) buffer->buffer_no, buffer));
|
|
}
|
|
DBUG_ASSERT(buffer->copy_to_buffer_in_progress == 0);
|
|
DBUG_VOID_RETURN;
|
|
}
|
|
|
|
|
|
/*
|
|
Initialize the cursor for a buffer
|
|
|
|
SYNOPSIS
|
|
translog_cursor_init()
|
|
buffer The buffer
|
|
cursor It's cursor
|
|
buffer_no Number of buffer
|
|
*/
|
|
|
|
static void translog_cursor_init(struct st_buffer_cursor *cursor,
|
|
struct st_translog_buffer *buffer,
|
|
uint8 buffer_no)
|
|
{
|
|
DBUG_ENTER("translog_cursor_init");
|
|
cursor->ptr= buffer->buffer;
|
|
cursor->buffer= buffer;
|
|
cursor->buffer_no= buffer_no;
|
|
cursor->current_page_fill= 0;
|
|
cursor->chaser= (cursor != &log_descriptor.bc);
|
|
cursor->write_counter= 0;
|
|
cursor->previous_offset= 0;
|
|
cursor->protected= 0;
|
|
DBUG_VOID_RETURN;
|
|
}
|
|
|
|
|
|
/*
|
|
@brief Initialize buffer for the current file, and a cursor for this buffer.
|
|
|
|
@param buffer The buffer
|
|
@param cursor It's cursor
|
|
@param buffer_no Number of buffer
|
|
*/
|
|
|
|
static void translog_start_buffer(struct st_translog_buffer *buffer,
|
|
struct st_buffer_cursor *cursor,
|
|
uint buffer_no)
|
|
{
|
|
DBUG_ENTER("translog_start_buffer");
|
|
DBUG_PRINT("enter",
|
|
("Assign buffer: #%u (%p) offset: 0x%x(%u)",
|
|
(uint) buffer->buffer_no, buffer,
|
|
(uint) LSN_OFFSET(log_descriptor.horizon),
|
|
(uint) LSN_OFFSET(log_descriptor.horizon)));
|
|
DBUG_ASSERT(buffer_no == buffer->buffer_no);
|
|
buffer->pre_force_close_horizon=
|
|
buffer->prev_last_lsn= buffer->last_lsn= LSN_IMPOSSIBLE;
|
|
DBUG_PRINT("info", ("last_lsn and prev_last_lsn set to 0 buffer: %p",
|
|
buffer));
|
|
buffer->offset= log_descriptor.horizon;
|
|
buffer->next_buffer_offset= LSN_IMPOSSIBLE;
|
|
buffer->file= get_current_logfile();
|
|
buffer->overlay= 0;
|
|
buffer->size= 0;
|
|
buffer->skipped_data= 0;
|
|
translog_cursor_init(cursor, buffer, buffer_no);
|
|
DBUG_PRINT("info", ("file: #%ld (%d) init cursor #%u: %p "
|
|
"chaser: %d Size: %lu (%lu)",
|
|
(long) (buffer->file ? buffer->file->number : 0),
|
|
(buffer->file ? buffer->file->handler.file : -1),
|
|
(uint) cursor->buffer->buffer_no, cursor->buffer,
|
|
cursor->chaser, (ulong) cursor->buffer->size,
|
|
(ulong) (cursor->ptr - cursor->buffer->buffer)));
|
|
translog_check_cursor(cursor);
|
|
mysql_mutex_lock(&log_descriptor.dirty_buffer_mask_lock);
|
|
log_descriptor.dirty_buffer_mask|= (1 << buffer->buffer_no);
|
|
mysql_mutex_unlock(&log_descriptor.dirty_buffer_mask_lock);
|
|
|
|
DBUG_VOID_RETURN;
|
|
}
|
|
|
|
|
|
/*
|
|
@brief Switch to the next buffer in a chain.
|
|
|
|
@param horizon \ Pointers on current position in file and buffer
|
|
@param cursor /
|
|
@param new_file Also start new file
|
|
|
|
@note
|
|
- loghandler should be locked
|
|
- after return new and old buffer still are locked
|
|
|
|
@retval 0 OK
|
|
@retval 1 Error
|
|
*/
|
|
|
|
static my_bool translog_buffer_next(TRANSLOG_ADDRESS *horizon,
|
|
struct st_buffer_cursor *cursor,
|
|
my_bool new_file)
|
|
{
|
|
uint old_buffer_no= cursor->buffer_no;
|
|
uint new_buffer_no= (old_buffer_no + 1) % TRANSLOG_BUFFERS_NO;
|
|
struct st_translog_buffer *new_buffer= log_descriptor.buffers + new_buffer_no;
|
|
my_bool chasing= cursor->chaser;
|
|
DBUG_ENTER("translog_buffer_next");
|
|
|
|
DBUG_PRINT("info", ("horizon: " LSN_FMT " chasing: %d",
|
|
LSN_IN_PARTS(log_descriptor.horizon), chasing));
|
|
|
|
DBUG_ASSERT(cmp_translog_addr(log_descriptor.horizon, *horizon) >= 0);
|
|
|
|
translog_finish_page(horizon, cursor);
|
|
|
|
if (!chasing)
|
|
{
|
|
translog_buffer_lock(new_buffer);
|
|
#ifndef DBUG_OFF
|
|
{
|
|
TRANSLOG_ADDRESS offset= new_buffer->offset;
|
|
TRANSLOG_FILE *file= new_buffer->file;
|
|
uint8 ver= new_buffer->ver;
|
|
translog_lock_assert_owner();
|
|
#endif
|
|
translog_wait_for_buffer_free(new_buffer);
|
|
#ifndef DBUG_OFF
|
|
/* We keep the handler locked so nobody can start this new buffer */
|
|
DBUG_ASSERT(offset == new_buffer->offset && new_buffer->file == NULL &&
|
|
(file == NULL ? ver : (uint8)(ver + 1)) == new_buffer->ver);
|
|
}
|
|
#endif
|
|
}
|
|
else
|
|
DBUG_ASSERT(new_buffer->file != NULL);
|
|
|
|
if (new_file)
|
|
{
|
|
/* move the horizon to the next file and its header page */
|
|
(*horizon)+= LSN_ONE_FILE;
|
|
(*horizon)= LSN_REPLACE_OFFSET(*horizon, TRANSLOG_PAGE_SIZE);
|
|
if (!chasing && translog_create_new_file())
|
|
{
|
|
DBUG_RETURN(1);
|
|
}
|
|
}
|
|
|
|
/* prepare next page */
|
|
if (chasing)
|
|
translog_cursor_init(cursor, new_buffer, new_buffer_no);
|
|
else
|
|
{
|
|
translog_lock_assert_owner();
|
|
translog_start_buffer(new_buffer, cursor, new_buffer_no);
|
|
new_buffer->prev_buffer_offset=
|
|
log_descriptor.buffers[old_buffer_no].offset;
|
|
new_buffer->prev_last_lsn=
|
|
BUFFER_MAX_LSN(log_descriptor.buffers + old_buffer_no);
|
|
}
|
|
log_descriptor.buffers[old_buffer_no].next_buffer_offset= new_buffer->offset;
|
|
DBUG_PRINT("info", ("prev_last_lsn set to " LSN_FMT " buffer:%p",
|
|
LSN_IN_PARTS(new_buffer->prev_last_lsn),
|
|
new_buffer));
|
|
translog_new_page_header(horizon, cursor);
|
|
DBUG_RETURN(0);
|
|
}
|
|
|
|
|
|
/*
|
|
Sets max LSN sent to file, and address from which data is only in the buffer
|
|
|
|
SYNOPSIS
|
|
translog_set_sent_to_disk()
|
|
buffer buffer which we have sent to disk
|
|
|
|
TODO: use atomic operations if possible (64bit architectures?)
|
|
*/
|
|
|
|
static void translog_set_sent_to_disk(struct st_translog_buffer *buffer)
|
|
{
|
|
LSN lsn= buffer->last_lsn;
|
|
TRANSLOG_ADDRESS in_buffers= buffer->next_buffer_offset;
|
|
|
|
DBUG_ENTER("translog_set_sent_to_disk");
|
|
mysql_mutex_lock(&log_descriptor.sent_to_disk_lock);
|
|
DBUG_PRINT("enter", ("lsn: " LSN_FMT " in_buffers: " LSN_FMT " "
|
|
"in_buffers_only: " LSN_FMT " start: " LSN_FMT " "
|
|
"sent_to_disk: " LSN_FMT,
|
|
LSN_IN_PARTS(lsn),
|
|
LSN_IN_PARTS(in_buffers),
|
|
LSN_IN_PARTS(log_descriptor.log_start),
|
|
LSN_IN_PARTS(log_descriptor.in_buffers_only),
|
|
LSN_IN_PARTS(log_descriptor.sent_to_disk)));
|
|
/*
|
|
We write sequentially (first part of following assert) but we rewrite
|
|
the same page in case we started mysql and shut it down immediately
|
|
(second part of the following assert)
|
|
*/
|
|
DBUG_ASSERT(cmp_translog_addr(lsn, log_descriptor.sent_to_disk) >= 0 ||
|
|
cmp_translog_addr(lsn, log_descriptor.log_start) < 0);
|
|
log_descriptor.sent_to_disk= lsn;
|
|
/* LSN_IMPOSSIBLE == 0 => it will work for very first time */
|
|
if (cmp_translog_addr(in_buffers, log_descriptor.in_buffers_only) > 0)
|
|
{
|
|
log_descriptor.in_buffers_only= in_buffers;
|
|
DBUG_PRINT("info", ("set new in_buffers_only"));
|
|
}
|
|
mysql_mutex_unlock(&log_descriptor.sent_to_disk_lock);
|
|
DBUG_VOID_RETURN;
|
|
}
|
|
|
|
|
|
/*
|
|
Sets address from which data is only in the buffer
|
|
|
|
SYNOPSIS
|
|
translog_set_only_in_buffers()
|
|
lsn LSN to assign
|
|
in_buffers to assign to in_buffers_only
|
|
*/
|
|
|
|
static void translog_set_only_in_buffers(TRANSLOG_ADDRESS in_buffers)
|
|
{
|
|
DBUG_ENTER("translog_set_only_in_buffers");
|
|
mysql_mutex_lock(&log_descriptor.sent_to_disk_lock);
|
|
DBUG_PRINT("enter", ("in_buffers: " LSN_FMT " "
|
|
"in_buffers_only: " LSN_FMT,
|
|
LSN_IN_PARTS(in_buffers),
|
|
LSN_IN_PARTS(log_descriptor.in_buffers_only)));
|
|
/* LSN_IMPOSSIBLE == 0 => it will work for very first time */
|
|
if (cmp_translog_addr(in_buffers, log_descriptor.in_buffers_only) > 0)
|
|
{
|
|
if (translog_status != TRANSLOG_OK)
|
|
goto end;
|
|
log_descriptor.in_buffers_only= in_buffers;
|
|
DBUG_PRINT("info", ("set new in_buffers_only"));
|
|
}
|
|
end:
|
|
mysql_mutex_unlock(&log_descriptor.sent_to_disk_lock);
|
|
DBUG_VOID_RETURN;
|
|
}
|
|
|
|
|
|
/*
|
|
Gets address from which data is only in the buffer
|
|
|
|
SYNOPSIS
|
|
translog_only_in_buffers()
|
|
|
|
RETURN
|
|
address from which data is only in the buffer
|
|
*/
|
|
|
|
static TRANSLOG_ADDRESS translog_only_in_buffers()
|
|
{
|
|
register TRANSLOG_ADDRESS addr;
|
|
DBUG_ENTER("translog_only_in_buffers");
|
|
mysql_mutex_lock(&log_descriptor.sent_to_disk_lock);
|
|
addr= log_descriptor.in_buffers_only;
|
|
mysql_mutex_unlock(&log_descriptor.sent_to_disk_lock);
|
|
DBUG_RETURN(addr);
|
|
}
|
|
|
|
|
|
/*
|
|
Get max LSN sent to file
|
|
|
|
SYNOPSIS
|
|
translog_get_sent_to_disk()
|
|
|
|
RETURN
|
|
max LSN send to file
|
|
*/
|
|
|
|
static LSN translog_get_sent_to_disk()
|
|
{
|
|
register LSN lsn;
|
|
DBUG_ENTER("translog_get_sent_to_disk");
|
|
mysql_mutex_lock(&log_descriptor.sent_to_disk_lock);
|
|
lsn= log_descriptor.sent_to_disk;
|
|
DBUG_PRINT("info", ("sent to disk up to " LSN_FMT, LSN_IN_PARTS(lsn)));
|
|
mysql_mutex_unlock(&log_descriptor.sent_to_disk_lock);
|
|
DBUG_RETURN(lsn);
|
|
}
|
|
|
|
|
|
/*
|
|
Get first chunk address on the given page
|
|
|
|
SYNOPSIS
|
|
translog_get_first_chunk_offset()
|
|
page The page where to find first chunk
|
|
|
|
RETURN
|
|
first chunk offset
|
|
*/
|
|
|
|
static my_bool translog_get_first_chunk_offset(uchar *page)
|
|
{
|
|
DBUG_ENTER("translog_get_first_chunk_offset");
|
|
DBUG_ASSERT(page[TRANSLOG_PAGE_FLAGS] < TRANSLOG_FLAGS_NUM);
|
|
DBUG_RETURN(page_overhead[page[TRANSLOG_PAGE_FLAGS]]);
|
|
}
|
|
|
|
|
|
/*
|
|
Write coded length of record
|
|
|
|
SYNOPSIS
|
|
translog_write_variable_record_1group_code_len
|
|
dst Destination buffer pointer
|
|
length Length which should be coded
|
|
header_len Calculated total header length
|
|
*/
|
|
|
|
static void
|
|
translog_write_variable_record_1group_code_len(uchar *dst,
|
|
translog_size_t length,
|
|
uint16 header_len)
|
|
{
|
|
switch (header_len) {
|
|
case 6: /* (5 + 1) */
|
|
DBUG_ASSERT(length <= 250);
|
|
*dst= (uint8) length;
|
|
return;
|
|
case 8: /* (5 + 3) */
|
|
DBUG_ASSERT(length <= 0xFFFF);
|
|
*dst= 251;
|
|
int2store(dst + 1, length);
|
|
return;
|
|
case 9: /* (5 + 4) */
|
|
DBUG_ASSERT(length <= (ulong) 0xFFFFFF);
|
|
*dst= 252;
|
|
int3store(dst + 1, length);
|
|
return;
|
|
case 10: /* (5 + 5) */
|
|
*dst= 253;
|
|
int4store(dst + 1, length);
|
|
return;
|
|
default:
|
|
DBUG_ASSERT(0);
|
|
}
|
|
return;
|
|
}
|
|
|
|
|
|
/*
|
|
Decode record data length and advance given pointer to the next field
|
|
|
|
SYNOPSIS
|
|
translog_variable_record_1group_decode_len()
|
|
src The pointer to the pointer to the length beginning
|
|
|
|
RETURN
|
|
decoded length
|
|
*/
|
|
|
|
static translog_size_t translog_variable_record_1group_decode_len(uchar **src)
|
|
{
|
|
uint8 first= (uint8) (**src);
|
|
switch (first) {
|
|
case 251:
|
|
(*src)+= 3;
|
|
return (uint2korr((*src) - 2));
|
|
case 252:
|
|
(*src)+= 4;
|
|
return (uint3korr((*src) - 3));
|
|
case 253:
|
|
(*src)+= 5;
|
|
return (uint4korr((*src) - 4));
|
|
case 254:
|
|
case 255:
|
|
DBUG_ASSERT(0); /* reserved for future use */
|
|
return (0);
|
|
default:
|
|
(*src)++;
|
|
return (first);
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
Get total length of this chunk (not only body)
|
|
|
|
SYNOPSIS
|
|
translog_get_total_chunk_length()
|
|
page The page where chunk placed
|
|
offset Offset of the chunk on this place
|
|
|
|
RETURN
|
|
total length of the chunk
|
|
*/
|
|
|
|
static uint16 translog_get_total_chunk_length(uchar *page, uint16 offset)
|
|
{
|
|
DBUG_ENTER("translog_get_total_chunk_length");
|
|
switch (page[offset] & TRANSLOG_CHUNK_TYPE) {
|
|
case TRANSLOG_CHUNK_LSN:
|
|
{
|
|
/* 0 chunk referred as LSN (head or tail) */
|
|
translog_size_t rec_len;
|
|
uchar *start= page + offset;
|
|
uchar *ptr= start + 1 + 2; /* chunk type and short trid */
|
|
uint16 chunk_len, header_len, page_rest;
|
|
DBUG_PRINT("info", ("TRANSLOG_CHUNK_LSN"));
|
|
rec_len= translog_variable_record_1group_decode_len(&ptr);
|
|
chunk_len= uint2korr(ptr);
|
|
header_len= (uint16) (ptr -start) + 2;
|
|
DBUG_PRINT("info", ("rec len: %lu chunk len: %u header len: %u",
|
|
(ulong) rec_len, (uint) chunk_len, (uint) header_len));
|
|
if (chunk_len)
|
|
{
|
|
DBUG_PRINT("info", ("chunk len: %u + %u = %u",
|
|
(uint) header_len, (uint) chunk_len,
|
|
(uint) (chunk_len + header_len)));
|
|
DBUG_RETURN(chunk_len + header_len);
|
|
}
|
|
page_rest= TRANSLOG_PAGE_SIZE - offset;
|
|
DBUG_PRINT("info", ("page_rest %u", (uint) page_rest));
|
|
if (rec_len + header_len < page_rest)
|
|
DBUG_RETURN(rec_len + header_len);
|
|
DBUG_RETURN(page_rest);
|
|
}
|
|
case TRANSLOG_CHUNK_FIXED:
|
|
{
|
|
uchar *ptr;
|
|
uint type= page[offset] & TRANSLOG_REC_TYPE;
|
|
uint length;
|
|
int i;
|
|
/* 1 (pseudo)fixed record (also LSN) */
|
|
DBUG_PRINT("info", ("TRANSLOG_CHUNK_FIXED"));
|
|
DBUG_ASSERT(log_record_type_descriptor[type].rclass ==
|
|
LOGRECTYPE_FIXEDLENGTH ||
|
|
log_record_type_descriptor[type].rclass ==
|
|
LOGRECTYPE_PSEUDOFIXEDLENGTH);
|
|
if (log_record_type_descriptor[type].rclass == LOGRECTYPE_FIXEDLENGTH)
|
|
{
|
|
DBUG_PRINT("info",
|
|
("Fixed length: %u",
|
|
(uint) (log_record_type_descriptor[type].fixed_length + 3)));
|
|
DBUG_RETURN(log_record_type_descriptor[type].fixed_length + 3);
|
|
}
|
|
|
|
ptr= page + offset + 3; /* first compressed LSN */
|
|
length= log_record_type_descriptor[type].fixed_length + 3;
|
|
for (i= 0; i < log_record_type_descriptor[type].compressed_LSN; i++)
|
|
{
|
|
/* first 2 bits is length - 2 */
|
|
uint len= (((uint8) (*ptr)) >> 6) + 2;
|
|
if (ptr[0] == 0 && ((uint8) ptr[1]) == 1)
|
|
len+= LSN_STORE_SIZE; /* case of full LSN storing */
|
|
ptr+= len;
|
|
/* subtract saved bytes */
|
|
length-= (LSN_STORE_SIZE - len);
|
|
}
|
|
DBUG_PRINT("info", ("Pseudo-fixed length: %u", length));
|
|
DBUG_RETURN(length);
|
|
}
|
|
case TRANSLOG_CHUNK_NOHDR:
|
|
/* 2 no header chunk (till page end) */
|
|
DBUG_PRINT("info", ("TRANSLOG_CHUNK_NOHDR length: %u",
|
|
(uint) (TRANSLOG_PAGE_SIZE - offset)));
|
|
DBUG_RETURN(TRANSLOG_PAGE_SIZE - offset);
|
|
case TRANSLOG_CHUNK_LNGTH: /* 3 chunk with chunk length */
|
|
DBUG_PRINT("info", ("TRANSLOG_CHUNK_LNGTH"));
|
|
DBUG_ASSERT(TRANSLOG_PAGE_SIZE - offset >= 3);
|
|
DBUG_PRINT("info", ("length: %u", uint2korr(page + offset + 1) + 3));
|
|
DBUG_RETURN(uint2korr(page + offset + 1) + 3);
|
|
default:
|
|
DBUG_ASSERT(0);
|
|
DBUG_RETURN(0);
|
|
}
|
|
}
|
|
|
|
/*
|
|
@brief Waits previous buffer flush finish
|
|
|
|
@param buffer buffer for check
|
|
|
|
@retval 0 previous buffer flushed and this thread have to flush this one
|
|
@retval 1 previous buffer flushed and this buffer flushed by other thread too
|
|
*/
|
|
|
|
my_bool translog_prev_buffer_flush_wait(struct st_translog_buffer *buffer)
|
|
{
|
|
TRANSLOG_ADDRESS offset= buffer->offset;
|
|
TRANSLOG_FILE *file= buffer->file;
|
|
uint8 ver= buffer->ver;
|
|
DBUG_ENTER("translog_prev_buffer_flush_wait");
|
|
DBUG_PRINT("enter", ("buffer: %p #%u offset: " LSN_FMT " "
|
|
"prev sent: " LSN_FMT " prev offset: " LSN_FMT,
|
|
buffer, (uint) buffer->buffer_no,
|
|
LSN_IN_PARTS(buffer->offset),
|
|
LSN_IN_PARTS(buffer->prev_sent_to_disk),
|
|
LSN_IN_PARTS(buffer->prev_buffer_offset)));
|
|
translog_buffer_lock_assert_owner(buffer);
|
|
if (buffer->prev_buffer_offset != buffer->prev_sent_to_disk)
|
|
{
|
|
do {
|
|
mysql_cond_wait(&buffer->prev_sent_to_disk_cond, &buffer->mutex);
|
|
if (buffer->file != file || buffer->offset != offset ||
|
|
buffer->ver != ver)
|
|
DBUG_RETURN(1); /* some the thread flushed the buffer already */
|
|
} while(buffer->prev_buffer_offset != buffer->prev_sent_to_disk);
|
|
}
|
|
DBUG_RETURN(0);
|
|
}
|
|
|
|
|
|
/*
|
|
Flush given buffer
|
|
|
|
SYNOPSIS
|
|
translog_buffer_flush()
|
|
buffer This buffer should be flushed
|
|
|
|
RETURN
|
|
0 OK
|
|
1 Error
|
|
*/
|
|
|
|
static my_bool translog_buffer_flush(struct st_translog_buffer *buffer)
|
|
{
|
|
uint32 i, pg;
|
|
TRANSLOG_ADDRESS offset= buffer->offset;
|
|
TRANSLOG_FILE *file= buffer->file;
|
|
uint8 ver= buffer->ver;
|
|
uint skipped_data;
|
|
DBUG_ENTER("translog_buffer_flush");
|
|
DBUG_PRINT("enter",
|
|
("Buffer: #%u %p file: %d offset: " LSN_FMT " size: %lu",
|
|
(uint) buffer->buffer_no, buffer,
|
|
buffer->file->handler.file,
|
|
LSN_IN_PARTS(buffer->offset),
|
|
(ulong) buffer->size));
|
|
translog_buffer_lock_assert_owner(buffer);
|
|
|
|
if (buffer->file == NULL)
|
|
DBUG_RETURN(0);
|
|
|
|
translog_wait_for_writers(buffer);
|
|
|
|
if (buffer->file != file || buffer->offset != offset || buffer->ver != ver)
|
|
DBUG_RETURN(0); /* some the thread flushed the buffer already */
|
|
|
|
if (buffer->is_closing_buffer)
|
|
{
|
|
/* some other flush in progress */
|
|
translog_wait_for_closing(buffer);
|
|
if (buffer->file != file || buffer->offset != offset || buffer->ver != ver)
|
|
DBUG_RETURN(0); /* some the thread flushed the buffer already */
|
|
}
|
|
|
|
if (buffer->overlay && translog_prev_buffer_flush_wait(buffer))
|
|
DBUG_RETURN(0); /* some the thread flushed the buffer already */
|
|
|
|
/*
|
|
Send page by page in the pagecache what we are going to write on the
|
|
disk
|
|
*/
|
|
file= buffer->file;
|
|
skipped_data= buffer->skipped_data;
|
|
DBUG_ASSERT(skipped_data < TRANSLOG_PAGE_SIZE);
|
|
for (i= 0, pg= LSN_OFFSET(buffer->offset) / TRANSLOG_PAGE_SIZE;
|
|
i < buffer->size;
|
|
i+= TRANSLOG_PAGE_SIZE, pg++)
|
|
{
|
|
#ifdef DBUG_TRACE
|
|
TRANSLOG_ADDRESS addr= (buffer->offset + i);
|
|
#endif
|
|
DBUG_PRINT("info", ("send log form %lu till %lu address: " LSN_FMT " "
|
|
"page #: %lu buffer size: %lu buffer: %p",
|
|
(ulong) i, (ulong) (i + TRANSLOG_PAGE_SIZE),
|
|
LSN_IN_PARTS(addr), (ulong) pg, (ulong) buffer->size,
|
|
buffer));
|
|
DBUG_ASSERT(log_descriptor.pagecache->block_size == TRANSLOG_PAGE_SIZE);
|
|
DBUG_ASSERT(i + TRANSLOG_PAGE_SIZE <= buffer->size);
|
|
if (translog_status != TRANSLOG_OK && translog_status != TRANSLOG_SHUTDOWN)
|
|
DBUG_RETURN(1);
|
|
if (pagecache_write_part(log_descriptor.pagecache,
|
|
&file->handler, pg, 3,
|
|
buffer->buffer + i,
|
|
PAGECACHE_PLAIN_PAGE,
|
|
PAGECACHE_LOCK_LEFT_UNLOCKED,
|
|
PAGECACHE_PIN_LEFT_UNPINNED,
|
|
PAGECACHE_WRITE_DONE, 0,
|
|
LSN_IMPOSSIBLE,
|
|
skipped_data,
|
|
TRANSLOG_PAGE_SIZE - skipped_data))
|
|
{
|
|
DBUG_PRINT("error",
|
|
("Can't write page " LSN_FMT " to pagecache, error: %d",
|
|
buffer->file->number,
|
|
(uint)(LSN_OFFSET(buffer->offset)+ i),
|
|
my_errno));
|
|
translog_stop_writing();
|
|
DBUG_RETURN(1);
|
|
}
|
|
skipped_data= 0;
|
|
}
|
|
file->is_sync= 0;
|
|
if (my_pwrite(file->handler.file, buffer->buffer + buffer->skipped_data,
|
|
buffer->size - buffer->skipped_data,
|
|
LSN_OFFSET(buffer->offset) + buffer->skipped_data,
|
|
log_write_flags))
|
|
{
|
|
DBUG_PRINT("error", ("Can't write buffer " LSN_FMT " size %lu "
|
|
"to the disk (%d)",
|
|
(uint) file->handler.file,
|
|
(uint) LSN_OFFSET(buffer->offset),
|
|
(ulong) buffer->size, errno));
|
|
translog_stop_writing();
|
|
DBUG_RETURN(1);
|
|
}
|
|
/*
|
|
Dropping the flag in such way can make false alarm: signalling than the
|
|
file in not sync when it is sync, but the situation is quite rare and
|
|
protections with mutexes give much more overhead to the whole engine
|
|
*/
|
|
file->is_sync= 0;
|
|
|
|
if (LSN_OFFSET(buffer->last_lsn) != 0) /* if buffer->last_lsn is set */
|
|
{
|
|
if (translog_prev_buffer_flush_wait(buffer))
|
|
DBUG_RETURN(0); /* some the thread flushed the buffer already */
|
|
translog_set_sent_to_disk(buffer);
|
|
}
|
|
else
|
|
translog_set_only_in_buffers(buffer->next_buffer_offset);
|
|
|
|
/* say to next buffer that we are finished */
|
|
{
|
|
struct st_translog_buffer *next_buffer=
|
|
log_descriptor.buffers + ((buffer->buffer_no + 1) % TRANSLOG_BUFFERS_NO);
|
|
if (likely(translog_status == TRANSLOG_OK)){
|
|
translog_buffer_lock(next_buffer);
|
|
next_buffer->prev_sent_to_disk= buffer->offset;
|
|
translog_buffer_unlock(next_buffer);
|
|
mysql_cond_broadcast(&next_buffer->prev_sent_to_disk_cond);
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
It is shutdown =>
|
|
1) there is only one thread
|
|
2) mutexes of other buffers can be destroyed => we can't use them
|
|
*/
|
|
next_buffer->prev_sent_to_disk= buffer->offset;
|
|
}
|
|
}
|
|
/* Free buffer */
|
|
buffer->file= NULL;
|
|
buffer->overlay= 0;
|
|
buffer->ver++;
|
|
mysql_mutex_lock(&log_descriptor.dirty_buffer_mask_lock);
|
|
log_descriptor.dirty_buffer_mask&= ~(1 << buffer->buffer_no);
|
|
mysql_mutex_unlock(&log_descriptor.dirty_buffer_mask_lock);
|
|
mysql_cond_broadcast(&buffer->waiting_filling_buffer);
|
|
DBUG_RETURN(0);
|
|
}
|
|
|
|
|
|
/*
|
|
Recover page with sector protection (wipe out failed chunks)
|
|
|
|
SYNOPSYS
|
|
translog_recover_page_up_to_sector()
|
|
page reference on the page
|
|
offset offset of failed sector
|
|
|
|
RETURN
|
|
0 OK
|
|
1 Error
|
|
*/
|
|
|
|
static my_bool translog_recover_page_up_to_sector(uchar *page, uint16 offset)
|
|
{
|
|
uint16 chunk_offset= translog_get_first_chunk_offset(page), valid_chunk_end;
|
|
DBUG_ENTER("translog_recover_page_up_to_sector");
|
|
DBUG_PRINT("enter", ("offset: %u first chunk: %u",
|
|
(uint) offset, (uint) chunk_offset));
|
|
|
|
while (chunk_offset < offset && page[chunk_offset] != TRANSLOG_FILLER)
|
|
{
|
|
uint16 chunk_length;
|
|
if ((chunk_length=
|
|
translog_get_total_chunk_length(page, chunk_offset)) == 0)
|
|
{
|
|
DBUG_PRINT("error", ("cant get chunk length (offset %u)",
|
|
(uint) chunk_offset));
|
|
DBUG_RETURN(1);
|
|
}
|
|
DBUG_PRINT("info", ("chunk: offset: %u length %u",
|
|
(uint) chunk_offset, (uint) chunk_length));
|
|
if (((ulong) chunk_offset) + ((ulong) chunk_length) > TRANSLOG_PAGE_SIZE)
|
|
{
|
|
DBUG_PRINT("error", ("damaged chunk (offset %u) in trusted area",
|
|
(uint) chunk_offset));
|
|
DBUG_RETURN(1);
|
|
}
|
|
chunk_offset+= chunk_length;
|
|
}
|
|
|
|
valid_chunk_end= chunk_offset;
|
|
/* end of trusted area - sector parsing */
|
|
while (page[chunk_offset] != TRANSLOG_FILLER)
|
|
{
|
|
uint16 chunk_length;
|
|
if ((chunk_length=
|
|
translog_get_total_chunk_length(page, chunk_offset)) == 0)
|
|
break;
|
|
|
|
DBUG_PRINT("info", ("chunk: offset: %u length %u",
|
|
(uint) chunk_offset, (uint) chunk_length));
|
|
if (((ulong) chunk_offset) + ((ulong) chunk_length) >
|
|
(uint) (offset + DISK_DRIVE_SECTOR_SIZE))
|
|
break;
|
|
|
|
chunk_offset+= chunk_length;
|
|
valid_chunk_end= chunk_offset;
|
|
}
|
|
DBUG_PRINT("info", ("valid chunk end offset: %u", (uint) valid_chunk_end));
|
|
|
|
memset(page + valid_chunk_end, TRANSLOG_FILLER,
|
|
TRANSLOG_PAGE_SIZE - valid_chunk_end);
|
|
|
|
DBUG_RETURN(0);
|
|
}
|
|
|
|
|
|
/**
|
|
@brief Checks and removes sector protection.
|
|
|
|
@param page reference on the page content.
|
|
@param file transaction log descriptor.
|
|
|
|
@retvat 0 OK
|
|
@retval 1 Error
|
|
*/
|
|
|
|
static my_bool
|
|
translog_check_sector_protection(uchar *page, TRANSLOG_FILE *file)
|
|
{
|
|
uint i, offset;
|
|
uchar *table= page + page_overhead[page[TRANSLOG_PAGE_FLAGS]] -
|
|
TRANSLOG_PAGE_SIZE / DISK_DRIVE_SECTOR_SIZE;
|
|
uint8 current= table[0];
|
|
DBUG_ENTER("translog_check_sector_protection");
|
|
|
|
for (i= 1, offset= DISK_DRIVE_SECTOR_SIZE;
|
|
i < TRANSLOG_PAGE_SIZE / DISK_DRIVE_SECTOR_SIZE;
|
|
i++, offset+= DISK_DRIVE_SECTOR_SIZE)
|
|
{
|
|
/*
|
|
TODO: add chunk counting for "suspecting" sectors (difference is
|
|
more than 1-2), if difference more then present chunks then it is
|
|
the problem.
|
|
*/
|
|
uint8 test= page[offset];
|
|
DBUG_PRINT("info", ("sector: #%u offset: %u current: %lx "
|
|
"read: 0x%x stored: 0x%x%x",
|
|
i, offset, (ulong) current,
|
|
(uint) uint2korr(page + offset), (uint) table[i],
|
|
(uint) table[i + 1]));
|
|
/*
|
|
3 is minimal possible record length. So we can have "distance"
|
|
between 2 sectors value more then DISK_DRIVE_SECTOR_SIZE / 3
|
|
only if it is old value, i.e. the sector was not written.
|
|
*/
|
|
if (((test < current) &&
|
|
((uint)(0xFFL - current + test) > DISK_DRIVE_SECTOR_SIZE / 3)) ||
|
|
((test >= current) &&
|
|
((uint)(test - current) > DISK_DRIVE_SECTOR_SIZE / 3)))
|
|
{
|
|
if (translog_recover_page_up_to_sector(page, offset))
|
|
DBUG_RETURN(1);
|
|
file->was_recovered= 1;
|
|
DBUG_RETURN(0);
|
|
}
|
|
|
|
/* Restore value on the page */
|
|
page[offset]= table[i];
|
|
current= test;
|
|
DBUG_PRINT("info", ("sector: #%u offset: %u current: %lx "
|
|
"read: 0x%x stored: 0x%x",
|
|
i, offset, (ulong) current,
|
|
(uint) page[offset], (uint) table[i]));
|
|
}
|
|
DBUG_RETURN(0);
|
|
}
|
|
|
|
|
|
/**
|
|
@brief Log page validator (read callback)
|
|
|
|
@param page The page data to check
|
|
@param page_no The page number (<offset>/<page length>)
|
|
@param data_ptr Read callback data pointer (pointer to TRANSLOG_FILE)
|
|
|
|
@todo: add turning loghandler to read-only mode after merging with
|
|
that patch.
|
|
|
|
@retval 0 OK
|
|
@retval 1 Error
|
|
*/
|
|
|
|
static my_bool translog_page_validator(int res, PAGECACHE_IO_HOOK_ARGS *args)
|
|
{
|
|
uchar *page= args->page;
|
|
pgcache_page_no_t page_no= args->pageno;
|
|
uint this_page_page_overhead;
|
|
uint flags;
|
|
uchar *page_pos;
|
|
TRANSLOG_FILE *data= (TRANSLOG_FILE *) args->data;
|
|
#ifdef DBUG_TRACE
|
|
pgcache_page_no_t offset= page_no * TRANSLOG_PAGE_SIZE;
|
|
#endif
|
|
DBUG_ENTER("translog_page_validator");
|
|
|
|
data->was_recovered= 0;
|
|
|
|
if (res)
|
|
{
|
|
DBUG_RETURN(1);
|
|
}
|
|
|
|
if ((pgcache_page_no_t) uint3korr(page) != page_no ||
|
|
(uint32) uint3korr(page + 3) != data->number)
|
|
{
|
|
DBUG_PRINT("error", ("Page " LSN_FMT ": "
|
|
"page address written in the page is incorrect: "
|
|
"File %lu instead of %lu or page %lu instead of %lu",
|
|
(uint)data->number, (uint)offset,
|
|
(ulong) uint3korr(page + 3), (ulong) data->number,
|
|
(ulong) uint3korr(page),
|
|
(ulong) page_no));
|
|
DBUG_RETURN(1);
|
|
}
|
|
flags= (uint)(page[TRANSLOG_PAGE_FLAGS]);
|
|
this_page_page_overhead= page_overhead[flags];
|
|
if (flags & ~(TRANSLOG_PAGE_CRC | TRANSLOG_SECTOR_PROTECTION |
|
|
TRANSLOG_RECORD_CRC))
|
|
{
|
|
DBUG_PRINT("error", ("Page " LSN_FMT ": "
|
|
"Garbage in the page flags field detected : %x",
|
|
(uint) data->number, (uint) offset,
|
|
(uint) flags));
|
|
DBUG_RETURN(1);
|
|
}
|
|
page_pos= page + (3 + 3 + 1);
|
|
if (flags & TRANSLOG_PAGE_CRC)
|
|
{
|
|
uint32 crc= translog_crc(page + this_page_page_overhead,
|
|
TRANSLOG_PAGE_SIZE -
|
|
this_page_page_overhead);
|
|
if (crc != uint4korr(page_pos))
|
|
{
|
|
DBUG_PRINT("error", ("Page " LSN_FMT ": "
|
|
"CRC mismatch: calculated: %lx on the page %lx",
|
|
(uint) data->number, (uint) offset,
|
|
(ulong) crc, (ulong) uint4korr(page_pos)));
|
|
DBUG_RETURN(1);
|
|
}
|
|
page_pos+= CRC_SIZE; /* Skip crc */
|
|
}
|
|
if (flags & TRANSLOG_SECTOR_PROTECTION &&
|
|
translog_check_sector_protection(page, data))
|
|
{
|
|
DBUG_RETURN(1);
|
|
}
|
|
DBUG_RETURN(0);
|
|
}
|
|
|
|
|
|
/**
|
|
@brief Locks the loghandler.
|
|
*/
|
|
|
|
void translog_lock()
|
|
{
|
|
uint8 current_buffer;
|
|
DBUG_ENTER("translog_lock");
|
|
|
|
/*
|
|
Locking the loghandler mean locking current buffer, but it can change
|
|
during locking, so we should check it
|
|
*/
|
|
for (;;)
|
|
{
|
|
/*
|
|
log_descriptor.bc.buffer_no is only one byte so its reading is
|
|
an atomic operation
|
|
*/
|
|
current_buffer= log_descriptor.bc.buffer_no;
|
|
translog_buffer_lock(log_descriptor.buffers + current_buffer);
|
|
if (log_descriptor.bc.buffer_no == current_buffer)
|
|
break;
|
|
translog_buffer_unlock(log_descriptor.buffers + current_buffer);
|
|
}
|
|
DBUG_VOID_RETURN;
|
|
}
|
|
|
|
|
|
/*
|
|
Unlock the loghandler
|
|
|
|
SYNOPSIS
|
|
translog_unlock()
|
|
|
|
RETURN
|
|
0 OK
|
|
1 Error
|
|
*/
|
|
|
|
void translog_unlock()
|
|
{
|
|
translog_buffer_unlock(log_descriptor.bc.buffer);
|
|
}
|
|
|
|
|
|
/**
|
|
@brief Get log page by file number and offset of the beginning of the page
|
|
|
|
@param data validator data, which contains the page address
|
|
@param buffer buffer for page placing
|
|
(might not be used in some cache implementations)
|
|
@param direct_link if it is not NULL then caller can accept direct
|
|
link to the page cache
|
|
|
|
@retval NULL Error
|
|
@retval # pointer to the page cache which should be used to read this page
|
|
*/
|
|
|
|
static uchar *translog_get_page(TRANSLOG_VALIDATOR_DATA *data, uchar *buffer,
|
|
PAGECACHE_BLOCK_LINK **direct_link)
|
|
{
|
|
TRANSLOG_ADDRESS addr= *(data->addr), in_buffers;
|
|
uint32 file_no= LSN_FILE_NO(addr);
|
|
TRANSLOG_FILE *file;
|
|
DBUG_ENTER("translog_get_page");
|
|
DBUG_PRINT("enter", ("File: %u Offset: %u(0x%x)",
|
|
file_no,
|
|
(uint) LSN_OFFSET(addr),
|
|
(uint) LSN_OFFSET(addr)));
|
|
|
|
/* it is really page address */
|
|
DBUG_ASSERT(LSN_OFFSET(addr) % TRANSLOG_PAGE_SIZE == 0);
|
|
if (direct_link)
|
|
*direct_link= NULL;
|
|
|
|
restart:
|
|
|
|
in_buffers= translog_only_in_buffers();
|
|
DBUG_PRINT("info", ("in_buffers: " LSN_FMT,
|
|
LSN_IN_PARTS(in_buffers)));
|
|
if (in_buffers != LSN_IMPOSSIBLE &&
|
|
cmp_translog_addr(addr, in_buffers) >= 0)
|
|
{
|
|
translog_lock();
|
|
DBUG_ASSERT(cmp_translog_addr(addr, log_descriptor.horizon) < 0);
|
|
/* recheck with locked loghandler */
|
|
in_buffers= translog_only_in_buffers();
|
|
if (cmp_translog_addr(addr, in_buffers) >= 0)
|
|
{
|
|
uint16 buffer_no= log_descriptor.bc.buffer_no;
|
|
#ifdef DBUG_ASSERT_EXISTS
|
|
uint16 buffer_start= buffer_no;
|
|
#endif
|
|
struct st_translog_buffer *buffer_unlock= log_descriptor.bc.buffer;
|
|
struct st_translog_buffer *curr_buffer= log_descriptor.bc.buffer;
|
|
for (;;)
|
|
{
|
|
/*
|
|
if the page is in the buffer and it is the last version of the
|
|
page (in case of division the page by buffer flush)
|
|
*/
|
|
if (curr_buffer->file != NULL &&
|
|
cmp_translog_addr(addr, curr_buffer->offset) >= 0 &&
|
|
cmp_translog_addr(addr,
|
|
(curr_buffer->next_buffer_offset ?
|
|
curr_buffer->next_buffer_offset:
|
|
curr_buffer->offset + curr_buffer->size)) < 0)
|
|
{
|
|
TRANSLOG_ADDRESS offset= curr_buffer->offset;
|
|
TRANSLOG_FILE *fl= curr_buffer->file;
|
|
uchar *from, *table= NULL;
|
|
int is_last_unfinished_page;
|
|
uint last_protected_sector= 0;
|
|
uint skipped_data= curr_buffer->skipped_data;
|
|
TRANSLOG_FILE file_copy;
|
|
uint8 ver= curr_buffer->ver;
|
|
translog_wait_for_writers(curr_buffer);
|
|
if (offset != curr_buffer->offset || fl != curr_buffer->file ||
|
|
ver != curr_buffer->ver)
|
|
{
|
|
DBUG_ASSERT(buffer_unlock == curr_buffer);
|
|
translog_buffer_unlock(buffer_unlock);
|
|
goto restart;
|
|
}
|
|
DBUG_ASSERT(LSN_FILE_NO(addr) == LSN_FILE_NO(curr_buffer->offset));
|
|
from= curr_buffer->buffer + (addr - curr_buffer->offset);
|
|
if (skipped_data && addr == curr_buffer->offset)
|
|
{
|
|
/*
|
|
We read page part of which is not present in buffer,
|
|
so we should read absent part from file (page cache actually)
|
|
*/
|
|
file= get_logfile_by_number(file_no);
|
|
DBUG_ASSERT(file != NULL);
|
|
/*
|
|
it's ok to not lock the page because:
|
|
- The log handler has it's own page cache.
|
|
- There is only one thread that can access the log
|
|
cache at a time
|
|
*/
|
|
if (!(buffer= pagecache_read(log_descriptor.pagecache,
|
|
&file->handler,
|
|
LSN_OFFSET(addr) / TRANSLOG_PAGE_SIZE,
|
|
3, buffer,
|
|
PAGECACHE_PLAIN_PAGE,
|
|
PAGECACHE_LOCK_LEFT_UNLOCKED,
|
|
NULL)))
|
|
DBUG_RETURN(NULL);
|
|
}
|
|
else
|
|
skipped_data= 0; /* Read after skipped in buffer data */
|
|
/*
|
|
Now we have correct data in buffer up to 'skipped_data'. The
|
|
following memcpy() will move the data from the internal buffer
|
|
that was not yet on disk.
|
|
*/
|
|
memcpy(buffer + skipped_data, from + skipped_data,
|
|
TRANSLOG_PAGE_SIZE - skipped_data);
|
|
/*
|
|
We can use copy then in translog_page_validator() because it
|
|
do not put it permanently somewhere.
|
|
We have to use copy because after releasing log lock we can't
|
|
guaranty that the file still be present (in real life it will be
|
|
present but theoretically possible that it will be released
|
|
already from last files cache);
|
|
*/
|
|
file_copy= *(curr_buffer->file);
|
|
file_copy.handler.callback_data= (uchar*) &file_copy;
|
|
is_last_unfinished_page= ((log_descriptor.bc.buffer ==
|
|
curr_buffer) &&
|
|
(log_descriptor.bc.ptr >= from) &&
|
|
(log_descriptor.bc.ptr <
|
|
from + TRANSLOG_PAGE_SIZE));
|
|
if (is_last_unfinished_page &&
|
|
(buffer[TRANSLOG_PAGE_FLAGS] & TRANSLOG_SECTOR_PROTECTION))
|
|
{
|
|
last_protected_sector= ((log_descriptor.bc.previous_offset - 1) /
|
|
DISK_DRIVE_SECTOR_SIZE);
|
|
table= buffer + log_descriptor.page_overhead -
|
|
TRANSLOG_PAGE_SIZE / DISK_DRIVE_SECTOR_SIZE;
|
|
}
|
|
|
|
DBUG_ASSERT(buffer_unlock == curr_buffer);
|
|
translog_buffer_unlock(buffer_unlock);
|
|
if (is_last_unfinished_page)
|
|
{
|
|
uint i;
|
|
/*
|
|
This is last unfinished page => we should not check CRC and
|
|
remove only that protection which already installed (no need
|
|
to check it)
|
|
|
|
We do not check the flag of sector protection, because if
|
|
(buffer[TRANSLOG_PAGE_FLAGS] & TRANSLOG_SECTOR_PROTECTION) is
|
|
not set then last_protected_sector will be 0 so following loop
|
|
will be never executed
|
|
*/
|
|
DBUG_PRINT("info", ("This is last unfinished page, "
|
|
"last protected sector %u",
|
|
last_protected_sector));
|
|
for (i= 1; i <= last_protected_sector; i++)
|
|
{
|
|
uint offset= i * DISK_DRIVE_SECTOR_SIZE;
|
|
DBUG_PRINT("info", ("Sector %u: 0x%02x <- 0x%02x",
|
|
i, buffer[offset],
|
|
table[i]));
|
|
buffer[offset]= table[i];
|
|
}
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
This IF should be true because we use in-memory data which
|
|
supposed to be correct.
|
|
*/
|
|
PAGECACHE_IO_HOOK_ARGS args;
|
|
args.page= buffer;
|
|
args.pageno= LSN_OFFSET(addr) / TRANSLOG_PAGE_SIZE;
|
|
args.data= (uchar*) &file_copy;
|
|
if (translog_page_validator(0, &args))
|
|
{
|
|
DBUG_ASSERT(0);
|
|
buffer= NULL;
|
|
}
|
|
}
|
|
DBUG_RETURN(buffer);
|
|
}
|
|
buffer_no= (buffer_no + 1) % TRANSLOG_BUFFERS_NO;
|
|
curr_buffer= log_descriptor.buffers + buffer_no;
|
|
translog_buffer_lock(curr_buffer);
|
|
translog_buffer_unlock(buffer_unlock);
|
|
buffer_unlock= curr_buffer;
|
|
/* we can't make a full circle */
|
|
DBUG_ASSERT(buffer_start != buffer_no);
|
|
}
|
|
}
|
|
translog_unlock();
|
|
}
|
|
file= get_logfile_by_number(file_no);
|
|
DBUG_ASSERT(file != NULL);
|
|
buffer= pagecache_read(log_descriptor.pagecache, &file->handler,
|
|
LSN_OFFSET(addr) / TRANSLOG_PAGE_SIZE,
|
|
3, (direct_link ? NULL : buffer),
|
|
PAGECACHE_PLAIN_PAGE,
|
|
(direct_link ?
|
|
PAGECACHE_LOCK_READ :
|
|
PAGECACHE_LOCK_LEFT_UNLOCKED),
|
|
direct_link);
|
|
DBUG_PRINT("info", ("Direct link is assigned to : %p * %p",
|
|
direct_link,
|
|
(direct_link ? *direct_link : NULL)));
|
|
data->was_recovered= file->was_recovered;
|
|
DBUG_RETURN(buffer);
|
|
}
|
|
|
|
|
|
/**
|
|
@brief free direct log page link
|
|
|
|
@param direct_link the direct log page link to be freed
|
|
|
|
*/
|
|
|
|
static void translog_free_link(PAGECACHE_BLOCK_LINK *direct_link)
|
|
{
|
|
DBUG_ENTER("translog_free_link");
|
|
DBUG_PRINT("info", ("Direct link: %p",
|
|
direct_link));
|
|
if (direct_link)
|
|
pagecache_unlock_by_link(log_descriptor.pagecache, direct_link,
|
|
PAGECACHE_LOCK_READ_UNLOCK, PAGECACHE_UNPIN,
|
|
LSN_IMPOSSIBLE, LSN_IMPOSSIBLE, 0, FALSE);
|
|
DBUG_VOID_RETURN;
|
|
}
|
|
|
|
|
|
/**
|
|
@brief Finds last full page of the given log file.
|
|
|
|
@param addr address structure to fill with data, which contain
|
|
file number of the log file
|
|
@param last_page_ok Result of the check whether last page OK.
|
|
(for now only we check only that file length
|
|
divisible on page length).
|
|
@param no_errors suppress messages about non-critical errors
|
|
|
|
@retval 0 OK
|
|
@retval 1 Error
|
|
*/
|
|
|
|
static my_bool translog_get_last_page_addr(TRANSLOG_ADDRESS *addr,
|
|
my_bool *last_page_ok,
|
|
my_bool no_errors)
|
|
{
|
|
char path[FN_REFLEN];
|
|
uint32 rec_offset;
|
|
my_off_t file_size;
|
|
uint32 file_no= LSN_FILE_NO(*addr);
|
|
TRANSLOG_FILE *file;
|
|
#ifdef DBUG_TRACE
|
|
char buff[21];
|
|
#endif
|
|
DBUG_ENTER("translog_get_last_page_addr");
|
|
|
|
if (likely((file= get_logfile_by_number(file_no)) != NULL))
|
|
{
|
|
/*
|
|
This function used only during initialization of loghandler or in
|
|
scanner (which mean we need read that part of the log), so the
|
|
requested log file have to be opened and can't be freed after
|
|
returning pointer on it (file_size).
|
|
*/
|
|
file_size= mysql_file_seek(file->handler.file, 0, SEEK_END, MYF(0));
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
This branch is used only during very early initialization
|
|
when files are not opened.
|
|
*/
|
|
File fd;
|
|
if ((fd= mysql_file_open(key_file_translog,
|
|
translog_filename_by_fileno(file_no, path),
|
|
O_RDONLY | O_CLOEXEC, (no_errors ? MYF(0) : MYF(MY_WME)))) < 0)
|
|
{
|
|
my_errno= errno;
|
|
DBUG_PRINT("error", ("Error %d during opening file #%d",
|
|
errno, file_no));
|
|
DBUG_RETURN(1);
|
|
}
|
|
file_size= mysql_file_seek(fd, 0, SEEK_END, MYF(0));
|
|
mysql_file_close(fd, MYF(0));
|
|
}
|
|
DBUG_PRINT("info", ("File size: %s", llstr(file_size, buff)));
|
|
if (file_size == MY_FILEPOS_ERROR)
|
|
DBUG_RETURN(1);
|
|
DBUG_ASSERT(file_size < 0xffffffffULL);
|
|
if (((uint32)file_size) > TRANSLOG_PAGE_SIZE)
|
|
{
|
|
rec_offset= (((((uint32)file_size) / TRANSLOG_PAGE_SIZE) - 1) *
|
|
TRANSLOG_PAGE_SIZE);
|
|
*last_page_ok= (((uint32)file_size) == rec_offset + TRANSLOG_PAGE_SIZE);
|
|
}
|
|
else
|
|
{
|
|
*last_page_ok= 0;
|
|
rec_offset= 0;
|
|
}
|
|
*addr= MAKE_LSN(file_no, rec_offset);
|
|
DBUG_PRINT("info", ("Last page: 0x%lx ok: %d", (ulong) rec_offset,
|
|
*last_page_ok));
|
|
DBUG_RETURN(0);
|
|
}
|
|
|
|
|
|
/**
|
|
@brief Get number bytes for record length storing
|
|
|
|
@param length Record length which will be encoded
|
|
|
|
@return 1,3,4,5 - number of bytes to store given length
|
|
*/
|
|
|
|
static uint translog_variable_record_length_bytes(translog_size_t length)
|
|
{
|
|
if (length < 250)
|
|
return 1;
|
|
if (length < 0xFFFF)
|
|
return 3;
|
|
if (length < (ulong) 0xFFFFFF)
|
|
return 4;
|
|
return 5;
|
|
}
|
|
|
|
|
|
/**
|
|
@brief Gets header of this chunk.
|
|
|
|
@param chunk The pointer to the chunk beginning
|
|
|
|
@retval # total length of the chunk
|
|
@retval 0 Error
|
|
*/
|
|
|
|
static uint16 translog_get_chunk_header_length(uchar *chunk)
|
|
{
|
|
DBUG_ENTER("translog_get_chunk_header_length");
|
|
switch (*chunk & TRANSLOG_CHUNK_TYPE) {
|
|
case TRANSLOG_CHUNK_LSN:
|
|
{
|
|
/* 0 chunk referred as LSN (head or tail) */
|
|
translog_size_t rec_len __attribute__((unused));
|
|
uchar *start= chunk;
|
|
uchar *ptr= start + 1 + 2;
|
|
uint16 chunk_len, header_len;
|
|
DBUG_PRINT("info", ("TRANSLOG_CHUNK_LSN"));
|
|
rec_len= translog_variable_record_1group_decode_len(&ptr);
|
|
chunk_len= uint2korr(ptr);
|
|
header_len= (uint16) (ptr - start) +2;
|
|
DBUG_PRINT("info", ("rec len: %lu chunk len: %u header len: %u",
|
|
(ulong) rec_len, (uint) chunk_len, (uint) header_len));
|
|
if (chunk_len)
|
|
{
|
|
/* TODO: fine header end */
|
|
/*
|
|
The last chunk of multi-group record can be base for it header
|
|
calculation (we skip to the first group to read the header) so if we
|
|
stuck here something is wrong.
|
|
*/
|
|
DBUG_ASSERT(0);
|
|
DBUG_RETURN(0); /* Keep compiler happy */
|
|
}
|
|
DBUG_RETURN(header_len);
|
|
}
|
|
case TRANSLOG_CHUNK_FIXED:
|
|
{
|
|
/* 1 (pseudo)fixed record (also LSN) */
|
|
DBUG_PRINT("info", ("TRANSLOG_CHUNK_FIXED = 3"));
|
|
DBUG_RETURN(3);
|
|
}
|
|
case TRANSLOG_CHUNK_NOHDR:
|
|
/* 2 no header chunk (till page end) */
|
|
DBUG_PRINT("info", ("TRANSLOG_CHUNK_NOHDR = 1"));
|
|
DBUG_RETURN(1);
|
|
break;
|
|
case TRANSLOG_CHUNK_LNGTH:
|
|
/* 3 chunk with chunk length */
|
|
DBUG_PRINT("info", ("TRANSLOG_CHUNK_LNGTH = 3"));
|
|
DBUG_RETURN(3);
|
|
break;
|
|
}
|
|
DBUG_ASSERT(0);
|
|
DBUG_RETURN(0); /* Keep compiler happy */
|
|
}
|
|
|
|
|
|
/**
|
|
@brief Truncate the log to the given address. Used during the startup if the
|
|
end of log if corrupted.
|
|
|
|
@param addr new horizon
|
|
|
|
@retval 0 OK
|
|
@retval 1 Error
|
|
*/
|
|
|
|
static my_bool translog_truncate_log(TRANSLOG_ADDRESS addr)
|
|
{
|
|
uchar *page;
|
|
TRANSLOG_ADDRESS current_page;
|
|
uint32 next_page_offset, page_rest;
|
|
uint32 i;
|
|
File fd;
|
|
int rc;
|
|
TRANSLOG_VALIDATOR_DATA data;
|
|
char path[FN_REFLEN];
|
|
uchar page_buff[TRANSLOG_PAGE_SIZE];
|
|
DBUG_ENTER("translog_truncate_log");
|
|
/* TODO: write warning to the client */
|
|
DBUG_PRINT("warning", ("removing all records from " LSN_FMT " "
|
|
"till " LSN_FMT,
|
|
LSN_IN_PARTS(addr),
|
|
LSN_IN_PARTS(log_descriptor.horizon)));
|
|
DBUG_ASSERT(cmp_translog_addr(addr, log_descriptor.horizon) < 0);
|
|
/* remove files between the address and horizon */
|
|
for (i= LSN_FILE_NO(addr) + 1; i <= LSN_FILE_NO(log_descriptor.horizon); i++)
|
|
if (mysql_file_delete(key_file_translog,
|
|
translog_filename_by_fileno(i, path), MYF(MY_WME)))
|
|
{
|
|
translog_unlock();
|
|
DBUG_RETURN(1);
|
|
}
|
|
|
|
/* truncate the last file up to the last page */
|
|
next_page_offset= LSN_OFFSET(addr);
|
|
next_page_offset= (next_page_offset -
|
|
((next_page_offset - 1) % TRANSLOG_PAGE_SIZE + 1) +
|
|
TRANSLOG_PAGE_SIZE);
|
|
page_rest= next_page_offset - LSN_OFFSET(addr);
|
|
memset(page_buff, TRANSLOG_FILLER, page_rest);
|
|
rc= ((fd= open_logfile_by_number_no_cache(LSN_FILE_NO(addr))) < 0 ||
|
|
((mysql_file_chsize(fd, next_page_offset, TRANSLOG_FILLER, MYF(MY_WME)) ||
|
|
(page_rest && my_pwrite(fd, page_buff, page_rest, LSN_OFFSET(addr),
|
|
log_write_flags)) ||
|
|
mysql_file_sync(fd, MYF(MY_WME)))));
|
|
translog_syncs++;
|
|
rc|= (fd > 0 && mysql_file_close(fd, MYF(MY_WME)));
|
|
if (sync_log_dir >= TRANSLOG_SYNC_DIR_ALWAYS)
|
|
{
|
|
rc|= sync_dir(log_descriptor.directory_fd, MYF(MY_WME | MY_IGNORE_BADFD));
|
|
translog_syncs++;
|
|
}
|
|
if (rc)
|
|
DBUG_RETURN(1);
|
|
|
|
/* fix the horizon */
|
|
log_descriptor.horizon= addr;
|
|
/* fix the buffer data */
|
|
current_page= MAKE_LSN(LSN_FILE_NO(addr), (next_page_offset -
|
|
TRANSLOG_PAGE_SIZE));
|
|
data.addr= ¤t_page;
|
|
if ((page= translog_get_page(&data, log_descriptor.buffers->buffer, NULL)) ==
|
|
NULL)
|
|
DBUG_RETURN(1);
|
|
if (page != log_descriptor.buffers->buffer)
|
|
memcpy(log_descriptor.buffers->buffer, page, TRANSLOG_PAGE_SIZE);
|
|
log_descriptor.bc.buffer->offset= current_page;
|
|
log_descriptor.bc.buffer->size= LSN_OFFSET(addr) - LSN_OFFSET(current_page);
|
|
log_descriptor.bc.ptr=
|
|
log_descriptor.buffers->buffer + log_descriptor.bc.buffer->size;
|
|
log_descriptor.bc.current_page_fill= log_descriptor.bc.buffer->size;
|
|
DBUG_RETURN(0);
|
|
}
|
|
|
|
|
|
/**
|
|
Applies function 'callback' to all files (in a directory) which
|
|
name looks like a log's name (aria_log.[0-9]{7}).
|
|
If 'callback' returns TRUE this interrupts the walk and returns
|
|
TRUE. Otherwise FALSE is returned after processing all log files.
|
|
It cannot just use log_descriptor.directory because that may not yet have
|
|
been initialized.
|
|
|
|
@param directory directory to scan
|
|
@param callback function to apply; is passed directory and base
|
|
name of found file
|
|
*/
|
|
|
|
my_bool translog_walk_filenames(const char *directory,
|
|
my_bool (*callback)(const char *,
|
|
const char *))
|
|
{
|
|
MY_DIR *dirp;
|
|
uint i;
|
|
my_bool rc= FALSE;
|
|
|
|
/* Finds and removes transaction log files */
|
|
if (!(dirp = my_dir(directory, MYF(MY_DONT_SORT))))
|
|
return FALSE;
|
|
|
|
for (i= 0; i < dirp->number_of_files; i++)
|
|
{
|
|
char *file= dirp->dir_entry[i].name;
|
|
if (strncmp(file, "aria_log.", 10) == 0 &&
|
|
file[10] >= '0' && file[10] <= '9' &&
|
|
file[11] >= '0' && file[11] <= '9' &&
|
|
file[12] >= '0' && file[12] <= '9' &&
|
|
file[13] >= '0' && file[13] <= '9' &&
|
|
file[14] >= '0' && file[14] <= '9' &&
|
|
file[15] >= '0' && file[15] <= '9' &&
|
|
file[16] >= '0' && file[16] <= '9' &&
|
|
file[17] >= '0' && file[17] <= '9' &&
|
|
file[18] == '\0' && (*callback)(directory, file))
|
|
{
|
|
rc= TRUE;
|
|
break;
|
|
}
|
|
}
|
|
my_dirend(dirp);
|
|
return rc;
|
|
}
|
|
|
|
|
|
/**
|
|
@brief Fills table of dependence length of page header from page flags
|
|
*/
|
|
|
|
void translog_fill_overhead_table()
|
|
{
|
|
uint i;
|
|
for (i= 0; i < TRANSLOG_FLAGS_NUM; i++)
|
|
{
|
|
page_overhead[i]= 7;
|
|
if (i & TRANSLOG_PAGE_CRC)
|
|
page_overhead[i]+= CRC_SIZE;
|
|
if (i & TRANSLOG_SECTOR_PROTECTION)
|
|
page_overhead[i]+= TRANSLOG_PAGE_SIZE /
|
|
DISK_DRIVE_SECTOR_SIZE;
|
|
}
|
|
}
|
|
|
|
|
|
/**
|
|
Callback to find first log in directory.
|
|
*/
|
|
|
|
static my_bool translog_callback_search_first(const char *directory
|
|
__attribute__((unused)),
|
|
const char *filename
|
|
__attribute__((unused)))
|
|
{
|
|
return TRUE;
|
|
}
|
|
|
|
|
|
/**
|
|
@brief Checks that chunk is LSN one
|
|
|
|
@param type type of the chunk
|
|
|
|
@retval 1 the chunk is LNS
|
|
@retval 0 the chunk is not LSN
|
|
*/
|
|
|
|
static my_bool translog_is_LSN_chunk(uchar type)
|
|
{
|
|
DBUG_ENTER("translog_is_LSN_chunk");
|
|
DBUG_PRINT("info", ("byte: %x chunk type: %u record type: %u",
|
|
type, type >> 6, type & TRANSLOG_REC_TYPE));
|
|
DBUG_RETURN(((type & TRANSLOG_CHUNK_TYPE) == TRANSLOG_CHUNK_FIXED) ||
|
|
(((type & TRANSLOG_CHUNK_TYPE) == TRANSLOG_CHUNK_LSN) &&
|
|
((type & TRANSLOG_REC_TYPE)) != TRANSLOG_CHUNK_0_CONT));
|
|
}
|
|
|
|
|
|
/**
|
|
@brief Initialize transaction log
|
|
|
|
@param directory Directory where log files are put
|
|
@param log_file_max_size max size of one log size (for new logs creation)
|
|
@param server_version version of MySQL server (MYSQL_VERSION_ID)
|
|
@param server_id server ID (replication & Co)
|
|
@param pagecache Page cache for the log reads
|
|
@param flags flags (TRANSLOG_PAGE_CRC, TRANSLOG_SECTOR_PROTECTION
|
|
TRANSLOG_RECORD_CRC)
|
|
@param read_only Put transaction log in read-only mode
|
|
@param init_table_func function to initialize record descriptors table
|
|
@param no_errors suppress messages about non-critical errors
|
|
|
|
@todo
|
|
Free used resources in case of error.
|
|
|
|
@retval 0 OK
|
|
@retval 1 Error
|
|
*/
|
|
|
|
my_bool translog_init_with_table(const char *directory,
|
|
uint32 log_file_max_size,
|
|
uint32 server_version,
|
|
uint32 server_id, PAGECACHE *pagecache,
|
|
uint flags, my_bool readonly,
|
|
void (*init_table_func)(),
|
|
my_bool no_errors)
|
|
{
|
|
int i;
|
|
int old_log_was_recovered= 0, logs_found= 0;
|
|
uint old_flags= flags;
|
|
uint32 start_file_num= 1;
|
|
TRANSLOG_ADDRESS UNINIT_VAR(sure_page), last_page, last_valid_page,
|
|
checkpoint_lsn;
|
|
my_bool version_changed= 0;
|
|
DBUG_ENTER("translog_init_with_table");
|
|
|
|
translog_syncs= 0;
|
|
flush_start= 0;
|
|
id_to_share= NULL;
|
|
log_purge_disabled= 0;
|
|
|
|
log_descriptor.directory_fd= -1;
|
|
log_descriptor.is_everything_flushed= 1;
|
|
log_descriptor.flush_in_progress= 0;
|
|
log_descriptor.flush_no= 0;
|
|
log_descriptor.next_pass_max_lsn= LSN_IMPOSSIBLE;
|
|
|
|
/* Normally in Aria this this calls translog_table_init() */
|
|
(*init_table_func)();
|
|
compile_time_assert(sizeof(log_descriptor.dirty_buffer_mask) * 8 >=
|
|
TRANSLOG_BUFFERS_NO);
|
|
log_descriptor.dirty_buffer_mask= 0;
|
|
if (readonly)
|
|
log_descriptor.open_flags= O_BINARY | O_RDONLY;
|
|
else
|
|
log_descriptor.open_flags= O_BINARY | O_RDWR;
|
|
if (mysql_mutex_init(key_TRANSLOG_BUFFER_mutex,
|
|
&log_descriptor.sent_to_disk_lock, MY_MUTEX_INIT_FAST) ||
|
|
mysql_mutex_init(key_TRANSLOG_DESCRIPTOR_file_header_lock,
|
|
&log_descriptor.file_header_lock, MY_MUTEX_INIT_FAST) ||
|
|
mysql_mutex_init(key_TRANSLOG_DESCRIPTOR_unfinished_files_lock,
|
|
&log_descriptor.unfinished_files_lock, MY_MUTEX_INIT_FAST) ||
|
|
mysql_mutex_init(key_TRANSLOG_DESCRIPTOR_purger_lock,
|
|
&log_descriptor.purger_lock, MY_MUTEX_INIT_FAST) ||
|
|
mysql_mutex_init(key_TRANSLOG_DESCRIPTOR_log_flush_lock,
|
|
&log_descriptor.log_flush_lock, MY_MUTEX_INIT_FAST) ||
|
|
mysql_mutex_init(key_TRANSLOG_DESCRIPTOR_dirty_buffer_mask_lock,
|
|
&log_descriptor.dirty_buffer_mask_lock, MY_MUTEX_INIT_FAST) ||
|
|
mysql_cond_init(key_TRANSLOG_DESCRIPTOR_log_flush_cond,
|
|
&log_descriptor.log_flush_cond, 0) ||
|
|
mysql_cond_init(key_TRANSLOG_DESCRIPTOR_new_goal_cond,
|
|
&log_descriptor.new_goal_cond, 0) ||
|
|
mysql_rwlock_init(key_TRANSLOG_DESCRIPTOR_open_files_lock,
|
|
&log_descriptor.open_files_lock) ||
|
|
my_init_dynamic_array(PSI_INSTRUMENT_ME, &log_descriptor.open_files,
|
|
sizeof(TRANSLOG_FILE*), 10, 10, MYF(0)) ||
|
|
my_init_dynamic_array(PSI_INSTRUMENT_ME, &log_descriptor.unfinished_files,
|
|
sizeof(struct st_file_counter),
|
|
10, 10, MYF(0)))
|
|
goto err;
|
|
log_descriptor.min_need_file= 0;
|
|
log_descriptor.min_file_number= 0;
|
|
log_descriptor.last_lsn_checked= LSN_IMPOSSIBLE;
|
|
|
|
/* Directory to store files */
|
|
unpack_dirname(log_descriptor.directory, directory);
|
|
#ifndef __WIN__
|
|
if ((log_descriptor.directory_fd= my_open(log_descriptor.directory,
|
|
O_RDONLY, MYF(MY_WME))) < 0)
|
|
{
|
|
my_errno= errno;
|
|
DBUG_PRINT("error", ("Error %d during opening directory '%s'",
|
|
errno, log_descriptor.directory));
|
|
goto err;
|
|
}
|
|
#endif
|
|
log_descriptor.in_buffers_only= LSN_IMPOSSIBLE;
|
|
DBUG_ASSERT(log_file_max_size % TRANSLOG_PAGE_SIZE == 0 &&
|
|
log_file_max_size >= TRANSLOG_MIN_FILE_SIZE);
|
|
/* max size of one log size (for new logs creation) */
|
|
log_file_size= log_descriptor.log_file_max_size=
|
|
log_file_max_size;
|
|
/* server version */
|
|
log_descriptor.server_version= server_version;
|
|
/* server ID */
|
|
log_descriptor.server_id= server_id;
|
|
/* Page cache for the log reads */
|
|
log_descriptor.pagecache= pagecache;
|
|
/* Flags */
|
|
DBUG_ASSERT((flags &
|
|
~(TRANSLOG_PAGE_CRC | TRANSLOG_SECTOR_PROTECTION |
|
|
TRANSLOG_RECORD_CRC)) == 0);
|
|
log_descriptor.flags= flags;
|
|
translog_fill_overhead_table();
|
|
log_descriptor.page_overhead= page_overhead[flags];
|
|
log_descriptor.page_capacity_chunk_2=
|
|
TRANSLOG_PAGE_SIZE - log_descriptor.page_overhead - 1;
|
|
compile_time_assert(TRANSLOG_WRITE_BUFFER % TRANSLOG_PAGE_SIZE == 0);
|
|
log_descriptor.buffer_capacity_chunk_2=
|
|
(TRANSLOG_WRITE_BUFFER / TRANSLOG_PAGE_SIZE) *
|
|
log_descriptor.page_capacity_chunk_2;
|
|
log_descriptor.half_buffer_capacity_chunk_2=
|
|
log_descriptor.buffer_capacity_chunk_2 / 2;
|
|
DBUG_PRINT("info",
|
|
("Overhead: %u pc2: %u bc2: %u, bc2/2: %u",
|
|
log_descriptor.page_overhead,
|
|
log_descriptor.page_capacity_chunk_2,
|
|
log_descriptor.buffer_capacity_chunk_2,
|
|
log_descriptor.half_buffer_capacity_chunk_2));
|
|
|
|
/* Just to init it somehow (hack for bootstrap)*/
|
|
{
|
|
TRANSLOG_FILE *file= 0;
|
|
log_descriptor.min_file = log_descriptor.max_file= 1;
|
|
insert_dynamic(&log_descriptor.open_files, (uchar *)&file);
|
|
translog_start_buffer(log_descriptor.buffers, &log_descriptor.bc, 0);
|
|
pop_dynamic(&log_descriptor.open_files);
|
|
}
|
|
|
|
/* Buffers for log writing */
|
|
for (i= 0; i < TRANSLOG_BUFFERS_NO; i++)
|
|
{
|
|
if (translog_buffer_init(log_descriptor.buffers + i, i))
|
|
goto err;
|
|
DBUG_PRINT("info", ("translog_buffer buffer #%u:%p",
|
|
i, log_descriptor.buffers + i));
|
|
}
|
|
|
|
/*
|
|
last_logno and last_checkpoint_lsn were set in
|
|
ma_control_file_create_or_open()
|
|
*/
|
|
logs_found= (last_logno != FILENO_IMPOSSIBLE);
|
|
|
|
translog_status= (readonly ? TRANSLOG_READONLY : TRANSLOG_OK);
|
|
checkpoint_lsn= last_checkpoint_lsn;
|
|
|
|
if (logs_found)
|
|
{
|
|
my_bool pageok;
|
|
DBUG_PRINT("info", ("log found..."));
|
|
/*
|
|
TODO: scan directory for aria_log.XXXXXXXX files and find
|
|
highest XXXXXXXX & set logs_found
|
|
TODO: check that last checkpoint within present log addresses space
|
|
|
|
find the log end
|
|
*/
|
|
if (LSN_FILE_NO(last_checkpoint_lsn) == FILENO_IMPOSSIBLE)
|
|
{
|
|
DBUG_ASSERT(LSN_OFFSET(last_checkpoint_lsn) == 0);
|
|
/* only last log needs to be checked */
|
|
sure_page= MAKE_LSN(last_logno, TRANSLOG_PAGE_SIZE);
|
|
}
|
|
else
|
|
{
|
|
sure_page= last_checkpoint_lsn;
|
|
DBUG_ASSERT(LSN_OFFSET(sure_page) % TRANSLOG_PAGE_SIZE != 0);
|
|
sure_page-= LSN_OFFSET(sure_page) % TRANSLOG_PAGE_SIZE;
|
|
}
|
|
/* Set horizon to the beginning of the last file first */
|
|
log_descriptor.horizon= last_page= MAKE_LSN(last_logno, 0);
|
|
if (translog_get_last_page_addr(&last_page, &pageok, no_errors))
|
|
{
|
|
if (!translog_walk_filenames(log_descriptor.directory,
|
|
&translog_callback_search_first))
|
|
{
|
|
/*
|
|
Files was deleted, just start from the next log number, so that
|
|
existing tables are in the past.
|
|
*/
|
|
start_file_num= last_logno + 1;
|
|
checkpoint_lsn= LSN_IMPOSSIBLE; /* no log so no checkpoint */
|
|
logs_found= 0;
|
|
}
|
|
else
|
|
goto err;
|
|
}
|
|
else if (LSN_OFFSET(last_page) == 0)
|
|
{
|
|
if (LSN_FILE_NO(last_page) == 1)
|
|
{
|
|
logs_found= 0; /* file #1 has no pages */
|
|
DBUG_PRINT("info", ("log found. But is is empty => no log assumed"));
|
|
}
|
|
else
|
|
{
|
|
last_page-= LSN_ONE_FILE;
|
|
if (translog_get_last_page_addr(&last_page, &pageok, 0))
|
|
goto err;
|
|
}
|
|
}
|
|
if (logs_found)
|
|
{
|
|
uint32 i;
|
|
log_descriptor.min_file= translog_first_file(log_descriptor.horizon, 1);
|
|
log_descriptor.max_file= last_logno;
|
|
/* Open all files */
|
|
if (allocate_dynamic(&log_descriptor.open_files,
|
|
log_descriptor.max_file -
|
|
log_descriptor.min_file + 1))
|
|
goto err;
|
|
for (i = log_descriptor.max_file; i >= log_descriptor.min_file; i--)
|
|
{
|
|
/*
|
|
We can't allocate all file together because they will be freed
|
|
one by one
|
|
*/
|
|
TRANSLOG_FILE *file= (TRANSLOG_FILE *)my_malloc(PSI_INSTRUMENT_ME, sizeof(TRANSLOG_FILE),
|
|
MYF(0));
|
|
|
|
compile_time_assert(MY_FILEPOS_ERROR > 0xffffffffULL);
|
|
if (file == NULL ||
|
|
(file->handler.file=
|
|
open_logfile_by_number_no_cache(i)) < 0 ||
|
|
mysql_file_seek(file->handler.file, 0, SEEK_END, MYF(0)) >=
|
|
0xffffffffULL)
|
|
{
|
|
int j;
|
|
for (j= i - log_descriptor.min_file - 1; j > 0; j--)
|
|
{
|
|
TRANSLOG_FILE *el=
|
|
*dynamic_element(&log_descriptor.open_files, j,
|
|
TRANSLOG_FILE **);
|
|
mysql_file_close(el->handler.file, MYF(MY_WME));
|
|
my_free(el);
|
|
}
|
|
if (file)
|
|
{
|
|
free(file);
|
|
goto err;
|
|
}
|
|
else
|
|
goto err;
|
|
}
|
|
translog_file_init(file, i, 1);
|
|
/* we allocated space so it can't fail */
|
|
insert_dynamic(&log_descriptor.open_files, (uchar *)&file);
|
|
}
|
|
DBUG_ASSERT(log_descriptor.max_file - log_descriptor.min_file + 1 ==
|
|
log_descriptor.open_files.elements);
|
|
}
|
|
}
|
|
else if (readonly)
|
|
{
|
|
/* There is no logs and there is read-only mode => nothing to read */
|
|
DBUG_PRINT("error", ("No logs and read-only mode"));
|
|
goto err;
|
|
}
|
|
|
|
if (logs_found)
|
|
{
|
|
TRANSLOG_ADDRESS current_page= sure_page;
|
|
my_bool pageok;
|
|
|
|
DBUG_PRINT("info", ("The log is really present"));
|
|
if (sure_page > last_page)
|
|
{
|
|
my_printf_error(HA_ERR_GENERIC, "Aria engine: log data error\n"
|
|
"last_log_page: " LSN_FMT " is less than\n"
|
|
"checkpoint page: " LSN_FMT, MYF(0),
|
|
LSN_IN_PARTS(last_page), LSN_IN_PARTS(sure_page));
|
|
goto err;
|
|
}
|
|
|
|
/* TODO: check page size */
|
|
|
|
last_valid_page= LSN_IMPOSSIBLE;
|
|
/*
|
|
Scans and validate pages. We need it to show "outside" only for sure
|
|
valid part of the log. If the log was damaged then fixed we have to
|
|
cut off damaged part before some other process start write something
|
|
in the log.
|
|
*/
|
|
do
|
|
{
|
|
TRANSLOG_ADDRESS current_file_last_page;
|
|
current_file_last_page= current_page;
|
|
if (translog_get_last_page_addr(¤t_file_last_page, &pageok, 0))
|
|
goto err;
|
|
if (!pageok)
|
|
{
|
|
DBUG_PRINT("error", ("File %lu have no complete last page",
|
|
(ulong) LSN_FILE_NO(current_file_last_page)));
|
|
old_log_was_recovered= 1;
|
|
/* This file is not written till the end so it should be last */
|
|
last_page= current_file_last_page;
|
|
/* TODO: issue warning */
|
|
}
|
|
do
|
|
{
|
|
TRANSLOG_VALIDATOR_DATA data;
|
|
TRANSLOG_PAGE_SIZE_BUFF psize_buff;
|
|
uchar *page;
|
|
data.addr= ¤t_page;
|
|
if ((page= translog_get_page(&data, psize_buff.buffer, NULL)) == NULL)
|
|
goto err;
|
|
if (data.was_recovered)
|
|
{
|
|
DBUG_PRINT("error", ("file no: %lu (%d) "
|
|
"rec_offset: 0x%lx (%lu) (%d)",
|
|
(ulong) LSN_FILE_NO(current_page),
|
|
(uint3korr(page + 3) !=
|
|
LSN_FILE_NO(current_page)),
|
|
(ulong) LSN_OFFSET(current_page),
|
|
(ulong) (LSN_OFFSET(current_page) /
|
|
TRANSLOG_PAGE_SIZE),
|
|
(uint3korr(page) !=
|
|
LSN_OFFSET(current_page) /
|
|
TRANSLOG_PAGE_SIZE)));
|
|
old_log_was_recovered= 1;
|
|
break;
|
|
}
|
|
old_flags= page[TRANSLOG_PAGE_FLAGS];
|
|
last_valid_page= current_page;
|
|
current_page+= TRANSLOG_PAGE_SIZE; /* increase offset */
|
|
} while (current_page <= current_file_last_page);
|
|
current_page+= LSN_ONE_FILE;
|
|
current_page= LSN_REPLACE_OFFSET(current_page, TRANSLOG_PAGE_SIZE);
|
|
} while (LSN_FILE_NO(current_page) <= LSN_FILE_NO(last_page) &&
|
|
!old_log_was_recovered);
|
|
if (last_valid_page == LSN_IMPOSSIBLE)
|
|
{
|
|
/* Panic!!! Even page which should be valid is invalid */
|
|
/* TODO: issue error */
|
|
goto err;
|
|
}
|
|
DBUG_PRINT("info", ("Last valid page is in file: %lu "
|
|
"offset: %lu (0x%lx) "
|
|
"Logs found: %d was recovered: %d "
|
|
"flags match: %d",
|
|
(ulong) LSN_FILE_NO(last_valid_page),
|
|
(ulong) LSN_OFFSET(last_valid_page),
|
|
(ulong) LSN_OFFSET(last_valid_page),
|
|
logs_found, old_log_was_recovered,
|
|
(old_flags == flags)));
|
|
|
|
/* TODO: check server ID */
|
|
if (logs_found && !old_log_was_recovered && old_flags == flags)
|
|
{
|
|
TRANSLOG_VALIDATOR_DATA data;
|
|
TRANSLOG_PAGE_SIZE_BUFF psize_buff;
|
|
uchar *page;
|
|
uint16 chunk_offset;
|
|
data.addr= &last_valid_page;
|
|
/* continue old log */
|
|
DBUG_ASSERT(LSN_FILE_NO(last_valid_page)==
|
|
LSN_FILE_NO(log_descriptor.horizon));
|
|
if ((page= translog_get_page(&data, psize_buff.buffer, NULL)) == NULL ||
|
|
(chunk_offset= translog_get_first_chunk_offset(page)) == 0)
|
|
goto err;
|
|
|
|
/* Puts filled part of old page in the buffer */
|
|
log_descriptor.horizon= last_valid_page;
|
|
translog_start_buffer(log_descriptor.buffers, &log_descriptor.bc, 0);
|
|
/*
|
|
Free space if filled with TRANSLOG_FILLER and first uchar of
|
|
real chunk can't be TRANSLOG_FILLER
|
|
*/
|
|
while (chunk_offset < TRANSLOG_PAGE_SIZE &&
|
|
page[chunk_offset] != TRANSLOG_FILLER)
|
|
{
|
|
uint16 chunk_length;
|
|
if ((chunk_length=
|
|
translog_get_total_chunk_length(page, chunk_offset)) == 0)
|
|
goto err;
|
|
DBUG_PRINT("info", ("chunk: offset: %u length: %u",
|
|
(uint) chunk_offset, (uint) chunk_length));
|
|
chunk_offset+= chunk_length;
|
|
|
|
/* chunk can't cross the page border */
|
|
DBUG_ASSERT(chunk_offset <= TRANSLOG_PAGE_SIZE);
|
|
}
|
|
memcpy(log_descriptor.buffers->buffer, page, chunk_offset);
|
|
log_descriptor.bc.buffer->size+= chunk_offset;
|
|
log_descriptor.bc.ptr+= chunk_offset;
|
|
log_descriptor.bc.current_page_fill= chunk_offset;
|
|
log_descriptor.horizon= LSN_REPLACE_OFFSET(log_descriptor.horizon,
|
|
(chunk_offset +
|
|
LSN_OFFSET(last_valid_page)));
|
|
DBUG_PRINT("info", ("Move Page #%u: %p chaser: %d Size: %lu (%lu)",
|
|
(uint) log_descriptor.bc.buffer_no,
|
|
log_descriptor.bc.buffer,
|
|
log_descriptor.bc.chaser,
|
|
(ulong) log_descriptor.bc.buffer->size,
|
|
(ulong) (log_descriptor.bc.ptr - log_descriptor.bc.
|
|
buffer->buffer)));
|
|
translog_check_cursor(&log_descriptor.bc);
|
|
}
|
|
if (!old_log_was_recovered && old_flags == flags)
|
|
{
|
|
LOGHANDLER_FILE_INFO info;
|
|
|
|
/*
|
|
Accessing &log_descriptor.open_files without mutex is safe
|
|
because it is initialization
|
|
*/
|
|
if (translog_read_file_header(&info,
|
|
(*dynamic_element(&log_descriptor.
|
|
open_files,
|
|
0, TRANSLOG_FILE **))->
|
|
handler.file))
|
|
goto err;
|
|
version_changed= (info.maria_version != TRANSLOG_VERSION_ID);
|
|
}
|
|
}
|
|
DBUG_PRINT("info", ("Logs found: %d was recovered: %d",
|
|
logs_found, old_log_was_recovered));
|
|
if (!logs_found)
|
|
{
|
|
TRANSLOG_FILE *file= (TRANSLOG_FILE*)my_malloc(PSI_INSTRUMENT_ME,
|
|
sizeof(TRANSLOG_FILE), MYF(MY_WME));
|
|
DBUG_PRINT("info", ("The log is not found => we will create new log"));
|
|
if (file == NULL)
|
|
goto err;
|
|
/* Start new log system from scratch */
|
|
log_descriptor.horizon= MAKE_LSN(start_file_num,
|
|
TRANSLOG_PAGE_SIZE); /* header page */
|
|
translog_file_init(file, start_file_num, 0);
|
|
if (insert_dynamic(&log_descriptor.open_files, (uchar*)&file))
|
|
{
|
|
my_free(file);
|
|
goto err;
|
|
}
|
|
if ((file->handler.file=
|
|
create_logfile_by_number_no_cache(start_file_num)) == -1)
|
|
goto err;
|
|
log_descriptor.min_file= log_descriptor.max_file= start_file_num;
|
|
if (translog_write_file_header())
|
|
goto err;
|
|
DBUG_ASSERT(log_descriptor.max_file - log_descriptor.min_file + 1 ==
|
|
log_descriptor.open_files.elements);
|
|
|
|
if (ma_control_file_write_and_force(checkpoint_lsn, start_file_num,
|
|
max_trid_in_control_file,
|
|
recovery_failures))
|
|
goto err;
|
|
/* assign buffer 0 */
|
|
translog_start_buffer(log_descriptor.buffers, &log_descriptor.bc, 0);
|
|
translog_new_page_header(&log_descriptor.horizon, &log_descriptor.bc);
|
|
}
|
|
else if ((old_log_was_recovered || old_flags != flags || version_changed) &&
|
|
!readonly)
|
|
{
|
|
/* leave the damaged file untouched */
|
|
log_descriptor.horizon+= LSN_ONE_FILE;
|
|
/* header page */
|
|
log_descriptor.horizon= LSN_REPLACE_OFFSET(log_descriptor.horizon,
|
|
TRANSLOG_PAGE_SIZE);
|
|
if (translog_create_new_file())
|
|
goto err;
|
|
/*
|
|
Buffer system left untouched after recovery => we should init it
|
|
(starting from buffer 0)
|
|
*/
|
|
translog_start_buffer(log_descriptor.buffers, &log_descriptor.bc, 0);
|
|
translog_new_page_header(&log_descriptor.horizon, &log_descriptor.bc);
|
|
}
|
|
|
|
/* all LSNs that are on disk are flushed */
|
|
log_descriptor.log_start= log_descriptor.sent_to_disk=
|
|
log_descriptor.flushed= log_descriptor.horizon;
|
|
log_descriptor.in_buffers_only= log_descriptor.bc.buffer->offset;
|
|
log_descriptor.max_lsn= LSN_IMPOSSIBLE; /* set to 0 */
|
|
/*
|
|
Now 'flushed' is set to 'horizon' value, but 'horizon' is (potentially)
|
|
address of the next LSN and we want indicate that all LSNs that are
|
|
already on the disk are flushed so we need decrease horizon on 1 (we are
|
|
sure that there is no LSN on the disk which is greater then 'flushed'
|
|
and there will not be LSN created that is equal or less then the value
|
|
of the 'flushed').
|
|
*/
|
|
log_descriptor.flushed--; /* offset decreased */
|
|
log_descriptor.sent_to_disk--; /* offset decreased */
|
|
/*
|
|
Log records will refer to a MARIA_SHARE by a unique 2-byte id; set up
|
|
structures for generating 2-byte ids:
|
|
*/
|
|
id_to_share= (MARIA_SHARE **) my_malloc(PSI_INSTRUMENT_ME, SHARE_ID_MAX * sizeof(MARIA_SHARE*),
|
|
MYF(MY_WME | MY_ZEROFILL));
|
|
if (unlikely(!id_to_share))
|
|
goto err;
|
|
id_to_share--; /* min id is 1 */
|
|
|
|
/* Check the last LSN record integrity */
|
|
if (logs_found)
|
|
{
|
|
TRANSLOG_SCANNER_DATA scanner;
|
|
TRANSLOG_ADDRESS page_addr;
|
|
LSN last_lsn= LSN_IMPOSSIBLE;
|
|
/*
|
|
take very last page address and try to find LSN record on it
|
|
if it fail take address of previous page and so on
|
|
*/
|
|
page_addr= (log_descriptor.horizon -
|
|
((log_descriptor.horizon - 1) % TRANSLOG_PAGE_SIZE + 1));
|
|
if (translog_scanner_init(page_addr, 1, &scanner, 1))
|
|
goto err;
|
|
scanner.page_offset= page_overhead[scanner.page[TRANSLOG_PAGE_FLAGS]];
|
|
for (;;)
|
|
{
|
|
uint chunk_1byte;
|
|
chunk_1byte= scanner.page[scanner.page_offset];
|
|
while (!translog_is_LSN_chunk(chunk_1byte) &&
|
|
scanner.page != END_OF_LOG &&
|
|
scanner.page[scanner.page_offset] != TRANSLOG_FILLER &&
|
|
scanner.page_addr == page_addr)
|
|
{
|
|
if (translog_get_next_chunk(&scanner))
|
|
{
|
|
translog_destroy_scanner(&scanner);
|
|
goto err;
|
|
}
|
|
if (scanner.page != END_OF_LOG)
|
|
chunk_1byte= scanner.page[scanner.page_offset];
|
|
}
|
|
if (translog_is_LSN_chunk(chunk_1byte))
|
|
{
|
|
last_lsn= scanner.page_addr + scanner.page_offset;
|
|
if (translog_get_next_chunk(&scanner))
|
|
{
|
|
translog_destroy_scanner(&scanner);
|
|
goto err;
|
|
}
|
|
if (scanner.page == END_OF_LOG)
|
|
break; /* it was the last record */
|
|
chunk_1byte= scanner.page[scanner.page_offset];
|
|
continue; /* try to find other record on this page */
|
|
}
|
|
|
|
if (last_lsn != LSN_IMPOSSIBLE)
|
|
break; /* there is no more records on the page */
|
|
|
|
/* We have to make step back */
|
|
if (unlikely(LSN_OFFSET(page_addr) == TRANSLOG_PAGE_SIZE))
|
|
{
|
|
uint32 file_no= LSN_FILE_NO(page_addr);
|
|
my_bool last_page_ok;
|
|
/* it is beginning of the current file */
|
|
if (unlikely(file_no == 1))
|
|
{
|
|
/*
|
|
It is beginning of the log => there is no LSNs in the log =>
|
|
There is no harm in leaving it "as-is".
|
|
*/
|
|
log_descriptor.previous_flush_horizon= log_descriptor.horizon;
|
|
DBUG_PRINT("info", ("previous_flush_horizon: " LSN_FMT,
|
|
LSN_IN_PARTS(log_descriptor.
|
|
previous_flush_horizon)));
|
|
DBUG_RETURN(0);
|
|
}
|
|
file_no--;
|
|
page_addr= MAKE_LSN(file_no, TRANSLOG_PAGE_SIZE);
|
|
translog_get_last_page_addr(&page_addr, &last_page_ok, 0);
|
|
/* page should be OK as it is not the last file */
|
|
DBUG_ASSERT(last_page_ok);
|
|
}
|
|
else
|
|
{
|
|
page_addr-= TRANSLOG_PAGE_SIZE;
|
|
}
|
|
translog_destroy_scanner(&scanner);
|
|
if (translog_scanner_init(page_addr, 1, &scanner, 1))
|
|
goto err;
|
|
scanner.page_offset= page_overhead[scanner.page[TRANSLOG_PAGE_FLAGS]];
|
|
}
|
|
translog_destroy_scanner(&scanner);
|
|
|
|
/* Now scanner points to the last LSN chunk, lets check it */
|
|
{
|
|
TRANSLOG_HEADER_BUFFER rec;
|
|
translog_size_t rec_len;
|
|
int len;
|
|
uchar buffer[1];
|
|
DBUG_PRINT("info", ("going to check the last found record " LSN_FMT,
|
|
LSN_IN_PARTS(last_lsn)));
|
|
|
|
len=
|
|
translog_read_record_header(last_lsn, &rec);
|
|
if (unlikely (len == RECHEADER_READ_ERROR ||
|
|
len == RECHEADER_READ_EOF))
|
|
{
|
|
DBUG_PRINT("error", ("unexpected end of log or record during "
|
|
"reading record header: " LSN_FMT " len: %d",
|
|
LSN_IN_PARTS(last_lsn), len));
|
|
if (readonly)
|
|
log_descriptor.log_start= log_descriptor.horizon= last_lsn;
|
|
else if (translog_truncate_log(last_lsn))
|
|
{
|
|
translog_free_record_header(&rec);
|
|
goto err;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
DBUG_ASSERT(last_lsn == rec.lsn);
|
|
if (likely(rec.record_length != 0))
|
|
{
|
|
/*
|
|
Reading the last byte of record will trigger scanning all
|
|
record chunks for now
|
|
*/
|
|
rec_len= translog_read_record(rec.lsn, rec.record_length - 1, 1,
|
|
buffer, NULL);
|
|
if (rec_len != 1)
|
|
{
|
|
DBUG_PRINT("error", ("unexpected end of log or record during "
|
|
"reading record body: " LSN_FMT " len: %d",
|
|
LSN_IN_PARTS(rec.lsn),
|
|
len));
|
|
if (readonly)
|
|
log_descriptor.log_start= log_descriptor.horizon= last_lsn;
|
|
|
|
else if (translog_truncate_log(last_lsn))
|
|
{
|
|
translog_free_record_header(&rec);
|
|
goto err;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
translog_free_record_header(&rec);
|
|
}
|
|
}
|
|
log_descriptor.previous_flush_horizon= log_descriptor.horizon;
|
|
DBUG_PRINT("info", ("previous_flush_horizon: " LSN_FMT,
|
|
LSN_IN_PARTS(log_descriptor.previous_flush_horizon)));
|
|
DBUG_RETURN(0);
|
|
err:
|
|
ma_message_no_user(0, "log initialization failed");
|
|
DBUG_RETURN(1);
|
|
}
|
|
|
|
|
|
/*
|
|
@brief Free transaction log file buffer.
|
|
|
|
@param buffer_no The buffer to free
|
|
*/
|
|
|
|
static void translog_buffer_destroy(struct st_translog_buffer *buffer)
|
|
{
|
|
DBUG_ENTER("translog_buffer_destroy");
|
|
DBUG_PRINT("enter",
|
|
("Buffer #%u: %p file: %d offset: " LSN_FMT " size: %lu",
|
|
(uint) buffer->buffer_no, buffer,
|
|
(buffer->file ? buffer->file->handler.file : -1),
|
|
LSN_IN_PARTS(buffer->offset),
|
|
(ulong) buffer->size));
|
|
if (buffer->file != NULL)
|
|
{
|
|
/*
|
|
We ignore errors here, because we can't do something about it
|
|
(it is shutting down)
|
|
|
|
We also have to take the locks even if there can't be any other
|
|
threads running, because translog_buffer_flush()
|
|
requires that we have the buffer locked.
|
|
*/
|
|
translog_buffer_lock(buffer);
|
|
translog_buffer_flush(buffer);
|
|
translog_buffer_unlock(buffer);
|
|
}
|
|
DBUG_PRINT("info", ("Destroy mutex: %p", &buffer->mutex));
|
|
mysql_mutex_destroy(&buffer->mutex);
|
|
mysql_cond_destroy(&buffer->waiting_filling_buffer);
|
|
DBUG_VOID_RETURN;
|
|
}
|
|
|
|
|
|
/*
|
|
Free log handler resources
|
|
|
|
SYNOPSIS
|
|
translog_destroy()
|
|
*/
|
|
|
|
void translog_destroy()
|
|
{
|
|
TRANSLOG_FILE **file;
|
|
uint i;
|
|
uint8 current_buffer;
|
|
DBUG_ENTER("translog_destroy");
|
|
|
|
DBUG_ASSERT(translog_status == TRANSLOG_OK ||
|
|
translog_status == TRANSLOG_READONLY);
|
|
translog_lock();
|
|
current_buffer= log_descriptor.bc.buffer_no;
|
|
translog_status= (translog_status == TRANSLOG_READONLY ?
|
|
TRANSLOG_UNINITED :
|
|
TRANSLOG_SHUTDOWN);
|
|
if (log_descriptor.bc.buffer->file != NULL)
|
|
translog_finish_page(&log_descriptor.horizon, &log_descriptor.bc);
|
|
translog_unlock();
|
|
|
|
for (i= 0; i < TRANSLOG_BUFFERS_NO; i++)
|
|
{
|
|
struct st_translog_buffer *buffer= (log_descriptor.buffers +
|
|
((i + current_buffer + 1) %
|
|
TRANSLOG_BUFFERS_NO));
|
|
translog_buffer_destroy(buffer);
|
|
}
|
|
translog_status= TRANSLOG_UNINITED;
|
|
|
|
/* close files */
|
|
while ((file= (TRANSLOG_FILE **)pop_dynamic(&log_descriptor.open_files)))
|
|
translog_close_log_file(*file);
|
|
mysql_mutex_destroy(&log_descriptor.sent_to_disk_lock);
|
|
mysql_mutex_destroy(&log_descriptor.file_header_lock);
|
|
mysql_mutex_destroy(&log_descriptor.unfinished_files_lock);
|
|
mysql_mutex_destroy(&log_descriptor.purger_lock);
|
|
mysql_mutex_destroy(&log_descriptor.log_flush_lock);
|
|
mysql_mutex_destroy(&log_descriptor.dirty_buffer_mask_lock);
|
|
mysql_cond_destroy(&log_descriptor.log_flush_cond);
|
|
mysql_cond_destroy(&log_descriptor.new_goal_cond);
|
|
mysql_rwlock_destroy(&log_descriptor.open_files_lock);
|
|
delete_dynamic(&log_descriptor.open_files);
|
|
delete_dynamic(&log_descriptor.unfinished_files);
|
|
|
|
if (log_descriptor.directory_fd >= 0)
|
|
mysql_file_close(log_descriptor.directory_fd, MYF(MY_WME));
|
|
if (id_to_share != NULL)
|
|
my_free(id_to_share + 1);
|
|
DBUG_VOID_RETURN;
|
|
}
|
|
|
|
|
|
/*
|
|
@brief Starts new page.
|
|
|
|
@param horizon \ Position in file and buffer where we are
|
|
@param cursor /
|
|
@param prev_buffer Buffer which should be flushed will be assigned here.
|
|
This is always set (to NULL if nothing to flush).
|
|
|
|
@note We do not want to flush the buffer immediately because we want to
|
|
let caller of this function first advance 'horizon' pointer and unlock the
|
|
loghandler and only then flush the log which can take some time.
|
|
|
|
@retval 0 OK
|
|
@retval 1 Error
|
|
*/
|
|
|
|
static my_bool translog_page_next(TRANSLOG_ADDRESS *horizon,
|
|
struct st_buffer_cursor *cursor,
|
|
struct st_translog_buffer **prev_buffer)
|
|
{
|
|
struct st_translog_buffer *buffer= cursor->buffer;
|
|
DBUG_ENTER("translog_page_next");
|
|
|
|
*prev_buffer= NULL;
|
|
if ((cursor->ptr + TRANSLOG_PAGE_SIZE >
|
|
cursor->buffer->buffer + TRANSLOG_WRITE_BUFFER) ||
|
|
(LSN_OFFSET(*horizon) >
|
|
log_descriptor.log_file_max_size - TRANSLOG_PAGE_SIZE))
|
|
{
|
|
DBUG_PRINT("info", ("Switch to next buffer Buffer Size: %lu (%lu) => %d "
|
|
"File size: %lu max: %lu => %d",
|
|
(ulong) cursor->buffer->size,
|
|
(ulong) (cursor->ptr - cursor->buffer->buffer),
|
|
(cursor->ptr + TRANSLOG_PAGE_SIZE >
|
|
cursor->buffer->buffer + TRANSLOG_WRITE_BUFFER),
|
|
(ulong) LSN_OFFSET(*horizon),
|
|
(ulong) log_descriptor.log_file_max_size,
|
|
(LSN_OFFSET(*horizon) >
|
|
(log_descriptor.log_file_max_size -
|
|
TRANSLOG_PAGE_SIZE))));
|
|
if (translog_buffer_next(horizon, cursor,
|
|
LSN_OFFSET(*horizon) >
|
|
(log_descriptor.log_file_max_size -
|
|
TRANSLOG_PAGE_SIZE)))
|
|
DBUG_RETURN(1);
|
|
*prev_buffer= buffer;
|
|
DBUG_PRINT("info", ("Buffer #%u (%p): have to be flushed",
|
|
(uint) buffer->buffer_no, buffer));
|
|
}
|
|
else
|
|
{
|
|
DBUG_PRINT("info", ("Use the same buffer #%u (%p): "
|
|
"Buffer Size: %lu (%lu)",
|
|
(uint) buffer->buffer_no,
|
|
buffer,
|
|
(ulong) cursor->buffer->size,
|
|
(ulong) (cursor->ptr - cursor->buffer->buffer)));
|
|
translog_finish_page(horizon, cursor);
|
|
translog_new_page_header(horizon, cursor);
|
|
}
|
|
DBUG_RETURN(0);
|
|
}
|
|
|
|
|
|
/*
|
|
Write data of given length to the current page
|
|
|
|
SYNOPSIS
|
|
translog_write_data_on_page()
|
|
horizon \ Pointers on file and buffer
|
|
cursor /
|
|
length IN length of the chunk
|
|
buffer buffer with data
|
|
|
|
RETURN
|
|
0 OK
|
|
1 Error
|
|
*/
|
|
|
|
static my_bool translog_write_data_on_page(TRANSLOG_ADDRESS *horizon,
|
|
struct st_buffer_cursor *cursor,
|
|
translog_size_t length,
|
|
uchar *buffer)
|
|
{
|
|
DBUG_ENTER("translog_write_data_on_page");
|
|
DBUG_PRINT("enter", ("Chunk length: %lu Page size %u",
|
|
(ulong) length, (uint) cursor->current_page_fill));
|
|
DBUG_ASSERT(length > 0);
|
|
DBUG_ASSERT(length + cursor->current_page_fill <= TRANSLOG_PAGE_SIZE);
|
|
DBUG_ASSERT(length + cursor->ptr <= cursor->buffer->buffer +
|
|
TRANSLOG_WRITE_BUFFER);
|
|
|
|
memcpy(cursor->ptr, buffer, length);
|
|
cursor->ptr+= length;
|
|
(*horizon)+= length; /* adds offset */
|
|
cursor->current_page_fill+= length;
|
|
if (!cursor->chaser)
|
|
cursor->buffer->size+= length;
|
|
DBUG_PRINT("info", ("Write data buffer #%u: %p "
|
|
"chaser: %d Size: %lu (%lu)",
|
|
(uint) cursor->buffer->buffer_no, cursor->buffer,
|
|
cursor->chaser, (ulong) cursor->buffer->size,
|
|
(ulong) (cursor->ptr - cursor->buffer->buffer)));
|
|
translog_check_cursor(cursor);
|
|
|
|
DBUG_RETURN(0);
|
|
}
|
|
|
|
|
|
/*
|
|
Write data from parts of given length to the current page
|
|
|
|
SYNOPSIS
|
|
translog_write_parts_on_page()
|
|
horizon \ Pointers on file and buffer
|
|
cursor /
|
|
length IN length of the chunk
|
|
parts IN/OUT chunk source
|
|
|
|
RETURN
|
|
0 OK
|
|
1 Error
|
|
*/
|
|
|
|
static my_bool translog_write_parts_on_page(TRANSLOG_ADDRESS *horizon,
|
|
struct st_buffer_cursor *cursor,
|
|
translog_size_t length,
|
|
struct st_translog_parts *parts)
|
|
{
|
|
translog_size_t left= length;
|
|
uint cur= (uint) parts->current;
|
|
DBUG_ENTER("translog_write_parts_on_page");
|
|
DBUG_PRINT("enter", ("Chunk length: %lu parts: %u of %u. Page size: %u "
|
|
"Buffer size: %lu (%lu)",
|
|
(ulong) length,
|
|
(uint) (cur + 1), (uint) parts->elements,
|
|
(uint) cursor->current_page_fill,
|
|
(ulong) cursor->buffer->size,
|
|
(ulong) (cursor->ptr - cursor->buffer->buffer)));
|
|
DBUG_ASSERT(length > 0);
|
|
DBUG_ASSERT(length + cursor->current_page_fill <= TRANSLOG_PAGE_SIZE);
|
|
DBUG_ASSERT(length + cursor->ptr <= cursor->buffer->buffer +
|
|
TRANSLOG_WRITE_BUFFER);
|
|
|
|
do
|
|
{
|
|
translog_size_t len;
|
|
LEX_CUSTRING *part;
|
|
const uchar *buff;
|
|
|
|
DBUG_ASSERT(cur < parts->elements);
|
|
part= parts->parts + cur;
|
|
buff= part->str;
|
|
DBUG_PRINT("info", ("Part: %u Length: %lu left: %lu buff: %p",
|
|
(uint) (cur + 1), (ulong) part->length, (ulong) left,
|
|
buff));
|
|
|
|
if (part->length > left)
|
|
{
|
|
/* we should write less then the current part */
|
|
len= left;
|
|
part->length-= len;
|
|
part->str+= len;
|
|
DBUG_PRINT("info", ("Set new part: %u Length: %lu",
|
|
(uint) (cur + 1), (ulong) part->length));
|
|
}
|
|
else
|
|
{
|
|
len= (translog_size_t) part->length;
|
|
cur++;
|
|
DBUG_PRINT("info", ("moved to next part (len: %lu)", (ulong) len));
|
|
}
|
|
DBUG_PRINT("info", ("copy: %p <- %p %u",
|
|
cursor->ptr, buff, len));
|
|
if (likely(len))
|
|
{
|
|
memcpy(cursor->ptr, buff, len);
|
|
left-= len;
|
|
cursor->ptr+= len;
|
|
}
|
|
} while (left);
|
|
|
|
DBUG_PRINT("info", ("Horizon: " LSN_FMT " Length %u(0x%x)",
|
|
LSN_IN_PARTS(*horizon),
|
|
length, length));
|
|
parts->current= cur;
|
|
(*horizon)+= length; /* offset increasing */
|
|
cursor->current_page_fill+= length;
|
|
if (!cursor->chaser)
|
|
cursor->buffer->size+= length;
|
|
/*
|
|
We do not not updating parts->total_record_length here because it is
|
|
need only before writing record to have total length
|
|
*/
|
|
DBUG_PRINT("info", ("Write parts buffer #%u: %p "
|
|
"chaser: %d Size: %lu (%lu) "
|
|
"Horizon: " LSN_FMT " buff offset: 0x%x",
|
|
(uint) cursor->buffer->buffer_no, cursor->buffer,
|
|
cursor->chaser, (ulong) cursor->buffer->size,
|
|
(ulong) (cursor->ptr - cursor->buffer->buffer),
|
|
LSN_IN_PARTS(*horizon),
|
|
(uint) (LSN_OFFSET(cursor->buffer->offset) +
|
|
cursor->buffer->size)));
|
|
translog_check_cursor(cursor);
|
|
|
|
DBUG_RETURN(0);
|
|
}
|
|
|
|
|
|
/*
|
|
Put 1 group chunk type 0 header into parts array
|
|
|
|
SYNOPSIS
|
|
translog_write_variable_record_1group_header()
|
|
parts Descriptor of record source parts
|
|
type The log record type
|
|
short_trid Short transaction ID or 0 if it has no sense
|
|
header_length Calculated header length of chunk type 0
|
|
chunk0_header Buffer for the chunk header writing
|
|
*/
|
|
|
|
static void
|
|
translog_write_variable_record_1group_header(struct st_translog_parts *parts,
|
|
enum translog_record_type type,
|
|
SHORT_TRANSACTION_ID short_trid,
|
|
uint16 header_length,
|
|
uchar *chunk0_header)
|
|
{
|
|
LEX_CUSTRING *part;
|
|
DBUG_ASSERT(parts->current != 0); /* first part is left for header */
|
|
part= parts->parts + (--parts->current);
|
|
parts->total_record_length+= (translog_size_t) (part->length= header_length);
|
|
part->str= chunk0_header;
|
|
/* puts chunk type */
|
|
*chunk0_header= (uchar) (type | TRANSLOG_CHUNK_LSN);
|
|
int2store(chunk0_header + 1, short_trid);
|
|
/* puts record length */
|
|
translog_write_variable_record_1group_code_len(chunk0_header + 3,
|
|
parts->record_length,
|
|
header_length);
|
|
/* puts 0 as chunk length which indicate 1 group record */
|
|
int2store(chunk0_header + header_length - 2, 0);
|
|
}
|
|
|
|
|
|
/*
|
|
Increase number of writers for this buffer
|
|
|
|
SYNOPSIS
|
|
translog_buffer_increase_writers()
|
|
buffer target buffer
|
|
*/
|
|
|
|
static inline void
|
|
translog_buffer_increase_writers(struct st_translog_buffer *buffer)
|
|
{
|
|
DBUG_ENTER("translog_buffer_increase_writers");
|
|
translog_buffer_lock_assert_owner(buffer);
|
|
buffer->copy_to_buffer_in_progress++;
|
|
DBUG_PRINT("info", ("copy_to_buffer_in_progress. Buffer #%u %p progress: %d",
|
|
(uint) buffer->buffer_no, buffer,
|
|
buffer->copy_to_buffer_in_progress));
|
|
DBUG_VOID_RETURN;
|
|
}
|
|
|
|
|
|
/*
|
|
Decrease number of writers for this buffer
|
|
|
|
SYNOPSIS
|
|
translog_buffer_decrease_writers()
|
|
buffer target buffer
|
|
*/
|
|
|
|
static void translog_buffer_decrease_writers(struct st_translog_buffer *buffer)
|
|
{
|
|
DBUG_ENTER("translog_buffer_decrease_writers");
|
|
translog_buffer_lock_assert_owner(buffer);
|
|
buffer->copy_to_buffer_in_progress--;
|
|
DBUG_PRINT("info",
|
|
("copy_to_buffer_in_progress. Buffer #%u %p progress: %d",
|
|
(uint) buffer->buffer_no, buffer,
|
|
buffer->copy_to_buffer_in_progress));
|
|
if (buffer->copy_to_buffer_in_progress == 0)
|
|
mysql_cond_broadcast(&buffer->waiting_filling_buffer);
|
|
DBUG_VOID_RETURN;
|
|
}
|
|
|
|
|
|
/**
|
|
@brief Skip to the next page for chaser (thread which advanced horizon
|
|
pointer and now feeling the buffer)
|
|
|
|
@param horizon \ Pointers on file position and buffer
|
|
@param cursor /
|
|
|
|
@retval 1 OK
|
|
@retval 0 Error
|
|
*/
|
|
|
|
static my_bool translog_chaser_page_next(TRANSLOG_ADDRESS *horizon,
|
|
struct st_buffer_cursor *cursor)
|
|
{
|
|
struct st_translog_buffer *buffer_to_flush;
|
|
my_bool rc;
|
|
DBUG_ENTER("translog_chaser_page_next");
|
|
DBUG_ASSERT(cursor->chaser);
|
|
rc= translog_page_next(horizon, cursor, &buffer_to_flush);
|
|
if (buffer_to_flush != NULL)
|
|
{
|
|
translog_buffer_lock(buffer_to_flush);
|
|
translog_buffer_decrease_writers(buffer_to_flush);
|
|
used_buffs_register_unlock(&cursor->buffs, buffer_to_flush);
|
|
if (!rc)
|
|
rc= translog_buffer_flush(buffer_to_flush);
|
|
translog_buffer_unlock(buffer_to_flush);
|
|
}
|
|
DBUG_RETURN(rc);
|
|
}
|
|
|
|
/*
|
|
Put chunk 2 from new page beginning
|
|
|
|
SYNOPSIS
|
|
translog_write_variable_record_chunk2_page()
|
|
parts Descriptor of record source parts
|
|
horizon \ Pointers on file position and buffer
|
|
cursor /
|
|
|
|
RETURN
|
|
0 OK
|
|
1 Error
|
|
*/
|
|
|
|
static my_bool
|
|
translog_write_variable_record_chunk2_page(struct st_translog_parts *parts,
|
|
TRANSLOG_ADDRESS *horizon,
|
|
struct st_buffer_cursor *cursor)
|
|
{
|
|
uchar chunk2_header[1];
|
|
DBUG_ENTER("translog_write_variable_record_chunk2_page");
|
|
chunk2_header[0]= TRANSLOG_CHUNK_NOHDR;
|
|
|
|
if (translog_chaser_page_next(horizon, cursor))
|
|
DBUG_RETURN(1);
|
|
|
|
/* Puts chunk type */
|
|
translog_write_data_on_page(horizon, cursor, 1, chunk2_header);
|
|
/* Puts chunk body */
|
|
translog_write_parts_on_page(horizon, cursor,
|
|
log_descriptor.page_capacity_chunk_2, parts);
|
|
DBUG_RETURN(0);
|
|
}
|
|
|
|
|
|
/*
|
|
Put chunk 3 of requested length in the buffer from new page beginning
|
|
|
|
SYNOPSIS
|
|
translog_write_variable_record_chunk3_page()
|
|
parts Descriptor of record source parts
|
|
length Length of this chunk
|
|
horizon \ Pointers on file position and buffer
|
|
cursor /
|
|
|
|
RETURN
|
|
0 OK
|
|
1 Error
|
|
*/
|
|
|
|
static my_bool
|
|
translog_write_variable_record_chunk3_page(struct st_translog_parts *parts,
|
|
uint16 length,
|
|
TRANSLOG_ADDRESS *horizon,
|
|
struct st_buffer_cursor *cursor)
|
|
{
|
|
LEX_CUSTRING *part;
|
|
uchar chunk3_header[1 + 2];
|
|
DBUG_ENTER("translog_write_variable_record_chunk3_page");
|
|
|
|
if (translog_chaser_page_next(horizon, cursor))
|
|
DBUG_RETURN(1);
|
|
|
|
if (length == 0)
|
|
{
|
|
/* It was call to write page header only (no data for chunk 3) */
|
|
DBUG_PRINT("info", ("It is a call to make page header only"));
|
|
DBUG_RETURN(0);
|
|
}
|
|
|
|
DBUG_ASSERT(parts->current != 0); /* first part is left for header */
|
|
part= parts->parts + (--parts->current);
|
|
parts->total_record_length+= (translog_size_t) (part->length= 1 + 2);
|
|
part->str= chunk3_header;
|
|
/* Puts chunk type */
|
|
*chunk3_header= (uchar) (TRANSLOG_CHUNK_LNGTH);
|
|
/* Puts chunk length */
|
|
int2store(chunk3_header + 1, length);
|
|
|
|
translog_write_parts_on_page(horizon, cursor, length + 1 + 2, parts);
|
|
DBUG_RETURN(0);
|
|
}
|
|
|
|
/*
|
|
Move log pointer (horizon) on given number pages starting from next page,
|
|
and given offset on the last page
|
|
|
|
SYNOPSIS
|
|
translog_advance_pointer()
|
|
pages Number of full pages starting from the next one
|
|
last_page_data Plus this data on the last page
|
|
|
|
RETURN
|
|
0 OK
|
|
1 Error
|
|
*/
|
|
|
|
static my_bool translog_advance_pointer(int pages, uint16 last_page_data,
|
|
TRUNSLOG_USED_BUFFERS *buffs)
|
|
{
|
|
translog_size_t last_page_offset= (log_descriptor.page_overhead +
|
|
last_page_data);
|
|
translog_size_t offset= (TRANSLOG_PAGE_SIZE -
|
|
log_descriptor.bc.current_page_fill +
|
|
pages * TRANSLOG_PAGE_SIZE + last_page_offset);
|
|
translog_size_t buffer_end_offset, file_end_offset, min_offset;
|
|
DBUG_ENTER("translog_advance_pointer");
|
|
DBUG_PRINT("enter", ("Pointer: " LSN_FMT " + %u + %u pages + %u + %u",
|
|
LSN_IN_PARTS(log_descriptor.horizon),
|
|
(uint) (TRANSLOG_PAGE_SIZE -
|
|
log_descriptor.bc.current_page_fill),
|
|
pages, (uint) log_descriptor.page_overhead,
|
|
(uint) last_page_data));
|
|
translog_lock_assert_owner();
|
|
|
|
used_buffs_init(buffs);
|
|
|
|
if (pages == -1)
|
|
{
|
|
/*
|
|
It is special case when we advance the pointer on the same page.
|
|
It can happened when we write last part of multi-group record.
|
|
*/
|
|
DBUG_ASSERT(last_page_data + log_descriptor.bc.current_page_fill <=
|
|
TRANSLOG_PAGE_SIZE);
|
|
offset= last_page_data;
|
|
last_page_offset= log_descriptor.bc.current_page_fill + last_page_data;
|
|
goto end;
|
|
}
|
|
DBUG_PRINT("info", ("last_page_offset %lu", (ulong) last_page_offset));
|
|
DBUG_ASSERT(last_page_offset <= TRANSLOG_PAGE_SIZE);
|
|
|
|
/*
|
|
The loop will be executed 1-3 times. Usually we advance the
|
|
pointer to fill only the current buffer (if we have more then 1/2 of
|
|
buffer free or 2 buffers (rest of current and all next). In case of
|
|
really huge record end where we write last group with "table of
|
|
content" of all groups and ignore buffer borders we can occupy
|
|
3 buffers.
|
|
*/
|
|
for (;;)
|
|
{
|
|
uint8 new_buffer_no;
|
|
struct st_translog_buffer *new_buffer;
|
|
struct st_translog_buffer *old_buffer;
|
|
buffer_end_offset= TRANSLOG_WRITE_BUFFER - log_descriptor.bc.buffer->size;
|
|
if (likely(log_descriptor.log_file_max_size >=
|
|
LSN_OFFSET(log_descriptor.horizon)))
|
|
file_end_offset= (log_descriptor.log_file_max_size -
|
|
LSN_OFFSET(log_descriptor.horizon));
|
|
else
|
|
{
|
|
/*
|
|
We already have written more then current file limit allow,
|
|
So we will finish this page and start new file
|
|
*/
|
|
file_end_offset= (TRANSLOG_PAGE_SIZE -
|
|
log_descriptor.bc.current_page_fill);
|
|
}
|
|
DBUG_PRINT("info", ("offset: %u buffer_end_offs: %u, "
|
|
"file_end_offs: %u",
|
|
offset, buffer_end_offset,
|
|
file_end_offset));
|
|
DBUG_PRINT("info", ("Buff #%u %u (%p) offset 0x%x + size 0x%x = "
|
|
"0x%x (0x%x)",
|
|
log_descriptor.bc.buffer->buffer_no,
|
|
log_descriptor.bc.buffer_no,
|
|
log_descriptor.bc.buffer,
|
|
(uint) LSN_OFFSET(log_descriptor.bc.buffer->offset),
|
|
log_descriptor.bc.buffer->size,
|
|
(uint) (LSN_OFFSET(log_descriptor.bc.buffer->offset) +
|
|
log_descriptor.bc.buffer->size),
|
|
(uint) LSN_OFFSET(log_descriptor.horizon)));
|
|
DBUG_ASSERT(LSN_OFFSET(log_descriptor.bc.buffer->offset) +
|
|
log_descriptor.bc.buffer->size ==
|
|
LSN_OFFSET(log_descriptor.horizon));
|
|
|
|
if (offset <= buffer_end_offset && offset <= file_end_offset)
|
|
break;
|
|
old_buffer= log_descriptor.bc.buffer;
|
|
new_buffer_no= (log_descriptor.bc.buffer_no + 1) % TRANSLOG_BUFFERS_NO;
|
|
new_buffer= log_descriptor.buffers + new_buffer_no;
|
|
|
|
translog_buffer_lock(new_buffer);
|
|
#ifndef DBUG_OFF
|
|
{
|
|
TRANSLOG_ADDRESS offset= new_buffer->offset;
|
|
TRANSLOG_FILE *file= new_buffer->file;
|
|
uint8 ver= new_buffer->ver;
|
|
translog_lock_assert_owner();
|
|
#endif
|
|
translog_wait_for_buffer_free(new_buffer);
|
|
#ifndef DBUG_OFF
|
|
/* We keep the handler locked so nobody can start this new buffer */
|
|
DBUG_ASSERT((offset == new_buffer->offset && new_buffer->file == NULL &&
|
|
(file == NULL ? ver : (uint8)(ver + 1)) ==
|
|
new_buffer->ver) ||
|
|
translog_status == TRANSLOG_READONLY);
|
|
}
|
|
#endif
|
|
|
|
min_offset= MY_MIN(buffer_end_offset, file_end_offset);
|
|
/* TODO: check is it ptr or size enough */
|
|
log_descriptor.bc.buffer->size+= min_offset;
|
|
log_descriptor.bc.ptr+= min_offset;
|
|
DBUG_PRINT("info", ("NewP buffer #%u: %p chaser: %d Size: %lu (%lu)",
|
|
(uint) log_descriptor.bc.buffer->buffer_no,
|
|
log_descriptor.bc.buffer,
|
|
log_descriptor.bc.chaser,
|
|
(ulong) log_descriptor.bc.buffer->size,
|
|
(ulong) (log_descriptor.bc.ptr -log_descriptor.bc.
|
|
buffer->buffer)));
|
|
DBUG_ASSERT((ulong) (log_descriptor.bc.ptr -
|
|
log_descriptor.bc.buffer->buffer) ==
|
|
log_descriptor.bc.buffer->size);
|
|
DBUG_ASSERT(log_descriptor.bc.buffer->buffer_no ==
|
|
log_descriptor.bc.buffer_no);
|
|
translog_buffer_increase_writers(log_descriptor.bc.buffer);
|
|
// register for case of error
|
|
used_buffs_add(buffs, log_descriptor.bc.buffer);
|
|
|
|
if (file_end_offset <= buffer_end_offset)
|
|
{
|
|
log_descriptor.horizon+= LSN_ONE_FILE;
|
|
log_descriptor.horizon= LSN_REPLACE_OFFSET(log_descriptor.horizon,
|
|
TRANSLOG_PAGE_SIZE);
|
|
DBUG_PRINT("info", ("New file: %lu",
|
|
(ulong) LSN_FILE_NO(log_descriptor.horizon)));
|
|
if (translog_create_new_file())
|
|
{
|
|
struct st_translog_buffer *ob= log_descriptor.bc.buffer;
|
|
translog_buffer_unlock(ob);
|
|
used_buffs_urgent_unlock(buffs);
|
|
translog_buffer_lock(ob);
|
|
DBUG_RETURN(1);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
DBUG_PRINT("info", ("The same file"));
|
|
log_descriptor.horizon+= min_offset; /* offset increasing */
|
|
}
|
|
translog_start_buffer(new_buffer, &log_descriptor.bc, new_buffer_no);
|
|
old_buffer->next_buffer_offset= new_buffer->offset;
|
|
new_buffer->prev_buffer_offset= old_buffer->offset;
|
|
translog_buffer_unlock(old_buffer);
|
|
offset-= min_offset;
|
|
}
|
|
DBUG_PRINT("info", ("drop write_counter"));
|
|
log_descriptor.bc.write_counter= 0;
|
|
log_descriptor.bc.previous_offset= 0;
|
|
end:
|
|
log_descriptor.bc.ptr+= offset;
|
|
log_descriptor.bc.buffer->size+= offset;
|
|
translog_buffer_increase_writers(log_descriptor.bc.buffer);
|
|
used_buffs_add(buffs, log_descriptor.bc.buffer);
|
|
log_descriptor.horizon+= offset; /* offset increasing */
|
|
log_descriptor.bc.current_page_fill= last_page_offset;
|
|
DBUG_PRINT("info", ("NewP buffer #%u: %p chaser: %d Size: %lu (%lu) "
|
|
"offset: %u last page: %u",
|
|
(uint) log_descriptor.bc.buffer->buffer_no,
|
|
log_descriptor.bc.buffer,
|
|
log_descriptor.bc.chaser,
|
|
(ulong) log_descriptor.bc.buffer->size,
|
|
(ulong) (log_descriptor.bc.ptr -
|
|
log_descriptor.bc.buffer->
|
|
buffer), (uint) offset,
|
|
(uint) last_page_offset));
|
|
DBUG_PRINT("info",
|
|
("pointer moved to: " LSN_FMT,
|
|
LSN_IN_PARTS(log_descriptor.horizon)));
|
|
translog_check_cursor(&log_descriptor.bc);
|
|
log_descriptor.bc.protected= 0;
|
|
DBUG_RETURN(0);
|
|
}
|
|
|
|
static void
|
|
used_buffs_add(TRUNSLOG_USED_BUFFERS *buffs,
|
|
struct st_translog_buffer *buff)
|
|
{
|
|
DBUG_ENTER("used_buffs_add");
|
|
DBUG_PRINT("enter", ("ADD buffs: %p unlk %u (%p) wrt_ptr: %u (%p)"
|
|
" buff %p (%u)",
|
|
buffs,
|
|
buffs->wrt_ptr, buffs->buff[buffs->wrt_ptr],
|
|
buffs->unlck_ptr, buffs->buff[buffs->unlck_ptr],
|
|
buff, buff->buffer_no));
|
|
DBUG_ASSERT(buffs->wrt_ptr < MAX_TRUNSLOG_USED_BUFFERS);
|
|
buffs->buff[buffs->wrt_ptr++]= buff;
|
|
DBUG_VOID_RETURN;
|
|
}
|
|
|
|
static void
|
|
used_buffs_register_unlock(TRUNSLOG_USED_BUFFERS *buffs,
|
|
struct st_translog_buffer *buff
|
|
__attribute__((unused)) )
|
|
{
|
|
DBUG_ENTER("used_buffs_register_unlock");
|
|
DBUG_PRINT("enter", ("SUB buffs: %p unlk %u (%p) wrt_ptr: %u (%p)"
|
|
" buff %p (%u)",
|
|
buffs,
|
|
buffs->wrt_ptr, buffs->buff[buffs->wrt_ptr],
|
|
buffs->unlck_ptr, buffs->buff[buffs->unlck_ptr],
|
|
buff, buff->buffer_no));
|
|
DBUG_ASSERT(buffs->buff[buffs->unlck_ptr] == buff);
|
|
buffs->unlck_ptr++;
|
|
DBUG_VOID_RETURN;
|
|
}
|
|
static void used_buffs_urgent_unlock(TRUNSLOG_USED_BUFFERS *buffs)
|
|
{
|
|
uint i;
|
|
DBUG_ENTER("used_buffs_urgent_unlock");
|
|
translog_lock();
|
|
translog_stop_writing();
|
|
translog_unlock();
|
|
for (i= buffs->unlck_ptr; i < buffs->wrt_ptr; i++)
|
|
{
|
|
struct st_translog_buffer *buf= buffs->buff[i];
|
|
translog_buffer_lock(buf);
|
|
translog_buffer_decrease_writers(buf);
|
|
translog_buffer_unlock(buf);
|
|
buffs->buff[i]= NULL;
|
|
}
|
|
used_buffs_init(buffs);
|
|
DBUG_VOID_RETURN;
|
|
}
|
|
|
|
/*
|
|
Get page rest
|
|
|
|
SYNOPSIS
|
|
translog_get_current_page_rest()
|
|
|
|
NOTE loghandler should be locked
|
|
|
|
RETURN
|
|
number of bytes left on the current page
|
|
*/
|
|
|
|
static uint translog_get_current_page_rest()
|
|
{
|
|
return (TRANSLOG_PAGE_SIZE - log_descriptor.bc.current_page_fill);
|
|
}
|
|
|
|
|
|
/*
|
|
Get buffer rest in full pages
|
|
|
|
SYNOPSIS
|
|
translog_get_current_buffer_rest()
|
|
|
|
NOTE loghandler should be locked
|
|
|
|
RETURN
|
|
number of full pages left on the current buffer
|
|
*/
|
|
|
|
static uint translog_get_current_buffer_rest()
|
|
{
|
|
return (uint)((log_descriptor.bc.buffer->buffer + TRANSLOG_WRITE_BUFFER -
|
|
log_descriptor.bc.ptr) /
|
|
TRANSLOG_PAGE_SIZE);
|
|
}
|
|
|
|
/*
|
|
Calculate possible group size without first (current) page
|
|
|
|
SYNOPSIS
|
|
translog_get_current_group_size()
|
|
|
|
NOTE loghandler should be locked
|
|
|
|
RETURN
|
|
group size without first (current) page
|
|
*/
|
|
|
|
static translog_size_t translog_get_current_group_size()
|
|
{
|
|
/* buffer rest in full pages */
|
|
translog_size_t buffer_rest= translog_get_current_buffer_rest();
|
|
DBUG_ENTER("translog_get_current_group_size");
|
|
DBUG_PRINT("info", ("buffer_rest in pages: %u", buffer_rest));
|
|
|
|
buffer_rest*= log_descriptor.page_capacity_chunk_2;
|
|
/* in case of only half of buffer free we can write this and next buffer */
|
|
if (buffer_rest < log_descriptor.half_buffer_capacity_chunk_2)
|
|
{
|
|
DBUG_PRINT("info", ("buffer_rest: %lu -> add %lu",
|
|
(ulong) buffer_rest,
|
|
(ulong) log_descriptor.buffer_capacity_chunk_2));
|
|
buffer_rest+= log_descriptor.buffer_capacity_chunk_2;
|
|
}
|
|
|
|
DBUG_PRINT("info", ("buffer_rest: %lu", (ulong) buffer_rest));
|
|
|
|
DBUG_RETURN(buffer_rest);
|
|
}
|
|
|
|
|
|
static inline void set_lsn(LSN *lsn, LSN value)
|
|
{
|
|
DBUG_ENTER("set_lsn");
|
|
translog_lock_assert_owner();
|
|
*lsn= value;
|
|
/* we generate LSN so something is not flushed in log */
|
|
log_descriptor.is_everything_flushed= 0;
|
|
DBUG_PRINT("info", ("new LSN appeared: " LSN_FMT, LSN_IN_PARTS(value)));
|
|
DBUG_VOID_RETURN;
|
|
}
|
|
|
|
|
|
/**
|
|
@brief Write variable record in 1 group.
|
|
|
|
@param lsn LSN of the record will be written here
|
|
@param type the log record type
|
|
@param short_trid Short transaction ID or 0 if it has no sense
|
|
@param parts Descriptor of record source parts
|
|
@param buffer_to_flush Buffer which have to be flushed if it is not 0
|
|
@param header_length Calculated header length of chunk type 0
|
|
@param trn Transaction structure pointer for hooks by
|
|
record log type, for short_id
|
|
@param hook_arg Argument which will be passed to pre-write and
|
|
in-write hooks of this record.
|
|
|
|
@note
|
|
We must have a translog_lock() when entering this function
|
|
We must have buffer_to_flush locked (if not null)
|
|
|
|
@return Operation status
|
|
@retval 0 OK
|
|
@retval 1 Error
|
|
*/
|
|
|
|
static my_bool
|
|
translog_write_variable_record_1group(LSN *lsn,
|
|
enum translog_record_type type,
|
|
MARIA_HA *tbl_info,
|
|
SHORT_TRANSACTION_ID short_trid,
|
|
struct st_translog_parts *parts,
|
|
struct st_translog_buffer
|
|
*buffer_to_flush, uint16 header_length,
|
|
TRN *trn, void *hook_arg)
|
|
{
|
|
TRANSLOG_ADDRESS horizon;
|
|
struct st_buffer_cursor cursor;
|
|
int rc= 0;
|
|
uint i;
|
|
translog_size_t record_rest, full_pages, first_page;
|
|
uint additional_chunk3_page= 0;
|
|
uchar chunk0_header[1 + 2 + 5 + 2];
|
|
DBUG_ENTER("translog_write_variable_record_1group");
|
|
translog_lock_assert_owner();
|
|
if (buffer_to_flush)
|
|
translog_buffer_lock_assert_owner(buffer_to_flush);
|
|
|
|
set_lsn(lsn, horizon= log_descriptor.horizon);
|
|
if (translog_set_lsn_for_files(LSN_FILE_NO(*lsn), LSN_FILE_NO(*lsn),
|
|
*lsn, TRUE) ||
|
|
(log_record_type_descriptor[type].inwrite_hook &&
|
|
(*log_record_type_descriptor[type].inwrite_hook)(type, trn, tbl_info,
|
|
lsn, hook_arg)))
|
|
{
|
|
translog_unlock();
|
|
if (buffer_to_flush != NULL)
|
|
{
|
|
translog_buffer_flush(buffer_to_flush);
|
|
translog_buffer_unlock(buffer_to_flush);
|
|
}
|
|
DBUG_RETURN(1);
|
|
}
|
|
cursor= log_descriptor.bc;
|
|
cursor.chaser= 1;
|
|
|
|
/* Advance pointer to be able unlock the loghandler */
|
|
first_page= translog_get_current_page_rest();
|
|
record_rest= parts->record_length - (first_page - header_length);
|
|
full_pages= record_rest / log_descriptor.page_capacity_chunk_2;
|
|
record_rest= (record_rest % log_descriptor.page_capacity_chunk_2);
|
|
|
|
if (record_rest + 1 == log_descriptor.page_capacity_chunk_2)
|
|
{
|
|
DBUG_PRINT("info", ("2 chunks type 3 is needed"));
|
|
/* We will write 2 chunks type 3 at the end of this group */
|
|
additional_chunk3_page= 1;
|
|
record_rest= 1;
|
|
}
|
|
|
|
DBUG_PRINT("info", ("first_page: %u (%u) full_pages: %u (%lu) "
|
|
"additional: %u (%u) rest %u = %u",
|
|
first_page, first_page - header_length,
|
|
full_pages,
|
|
(ulong) full_pages *
|
|
log_descriptor.page_capacity_chunk_2,
|
|
additional_chunk3_page,
|
|
additional_chunk3_page *
|
|
(log_descriptor.page_capacity_chunk_2 - 1),
|
|
record_rest, parts->record_length));
|
|
/* record_rest + 3 is chunk type 3 overhead + record_rest */
|
|
rc= translog_advance_pointer((int)(full_pages + additional_chunk3_page),
|
|
(record_rest ? record_rest + 3 : 0),
|
|
&cursor.buffs);
|
|
log_descriptor.bc.buffer->last_lsn= *lsn;
|
|
DBUG_PRINT("info", ("last_lsn set to " LSN_FMT " buffer: %p",
|
|
LSN_IN_PARTS(log_descriptor.bc.buffer->last_lsn),
|
|
log_descriptor.bc.buffer));
|
|
|
|
translog_unlock();
|
|
|
|
/*
|
|
Check if we switched buffer and need process it (current buffer is
|
|
unlocked already => we will not delay other threads
|
|
*/
|
|
if (buffer_to_flush != NULL)
|
|
{
|
|
if (!rc)
|
|
rc= translog_buffer_flush(buffer_to_flush);
|
|
translog_buffer_unlock(buffer_to_flush);
|
|
}
|
|
if (rc)
|
|
{
|
|
//translog_advance_pointer decreased writers so it is OK
|
|
DBUG_ASSERT(cursor.buffs.unlck_ptr == cursor.buffs.wrt_ptr);
|
|
DBUG_RETURN(1);
|
|
}
|
|
|
|
translog_write_variable_record_1group_header(parts, type, short_trid,
|
|
header_length, chunk0_header);
|
|
|
|
/* fill the pages */
|
|
translog_write_parts_on_page(&horizon, &cursor, first_page, parts);
|
|
|
|
DBUG_PRINT("info", ("absolute horizon: " LSN_FMT " local: " LSN_FMT,
|
|
LSN_IN_PARTS(log_descriptor.horizon),
|
|
LSN_IN_PARTS(horizon)));
|
|
|
|
for (i= 0; i < full_pages; i++)
|
|
{
|
|
if (translog_write_variable_record_chunk2_page(parts, &horizon, &cursor))
|
|
goto error;
|
|
|
|
DBUG_PRINT("info", ("absolute horizon: " LSN_FMT " local: " LSN_FMT,
|
|
LSN_IN_PARTS(log_descriptor.horizon),
|
|
LSN_IN_PARTS(horizon)));
|
|
}
|
|
|
|
if (additional_chunk3_page)
|
|
{
|
|
if (translog_write_variable_record_chunk3_page(parts,
|
|
log_descriptor.
|
|
page_capacity_chunk_2 - 2,
|
|
&horizon, &cursor))
|
|
goto error;
|
|
DBUG_PRINT("info", ("absolute horizon: " LSN_FMT " local: " LSN_FMT,
|
|
LSN_IN_PARTS(log_descriptor.horizon),
|
|
LSN_IN_PARTS(horizon)));
|
|
DBUG_ASSERT(cursor.current_page_fill == TRANSLOG_PAGE_SIZE);
|
|
}
|
|
|
|
if (translog_write_variable_record_chunk3_page(parts,
|
|
record_rest,
|
|
&horizon, &cursor))
|
|
goto error;
|
|
DBUG_PRINT("info", ("absolute horizon: " LSN_FMT " local: " LSN_FMT,
|
|
(uint) LSN_FILE_NO(log_descriptor.horizon),
|
|
(uint) LSN_OFFSET(log_descriptor.horizon),
|
|
(uint) LSN_FILE_NO(horizon),
|
|
(uint) LSN_OFFSET(horizon)));
|
|
|
|
translog_buffer_lock(cursor.buffer);
|
|
translog_buffer_decrease_writers(cursor.buffer);
|
|
used_buffs_register_unlock(&cursor.buffs, cursor.buffer);
|
|
translog_buffer_unlock(cursor.buffer);
|
|
DBUG_ASSERT(cursor.buffs.unlck_ptr == cursor.buffs.wrt_ptr);
|
|
DBUG_RETURN(0);
|
|
error:
|
|
used_buffs_urgent_unlock(&cursor.buffs);
|
|
DBUG_RETURN(1);
|
|
}
|
|
|
|
|
|
/**
|
|
@brief Write variable record in 1 chunk.
|
|
|
|
@param lsn LSN of the record will be written here
|
|
@param type the log record type
|
|
@param short_trid Short transaction ID or 0 if it has no sense
|
|
@param parts Descriptor of record source parts
|
|
@param buffer_to_flush Buffer which have to be flushed if it is not 0
|
|
@param header_length Calculated header length of chunk type 0
|
|
@param trn Transaction structure pointer for hooks by
|
|
record log type, for short_id
|
|
@param hook_arg Argument which will be passed to pre-write and
|
|
in-write hooks of this record.
|
|
|
|
@note
|
|
We must have a translog_lock() when entering this function
|
|
We must have buffer_to_flush locked (if not null)
|
|
|
|
@return Operation status
|
|
@retval 0 OK
|
|
@retval 1 Error
|
|
*/
|
|
|
|
static my_bool
|
|
translog_write_variable_record_1chunk(LSN *lsn,
|
|
enum translog_record_type type,
|
|
MARIA_HA *tbl_info,
|
|
SHORT_TRANSACTION_ID short_trid,
|
|
struct st_translog_parts *parts,
|
|
struct st_translog_buffer
|
|
*buffer_to_flush, uint16 header_length,
|
|
TRN *trn, void *hook_arg)
|
|
{
|
|
int rc;
|
|
uchar chunk0_header[1 + 2 + 5 + 2];
|
|
DBUG_ENTER("translog_write_variable_record_1chunk");
|
|
translog_lock_assert_owner();
|
|
if (buffer_to_flush)
|
|
translog_buffer_lock_assert_owner(buffer_to_flush);
|
|
|
|
translog_write_variable_record_1group_header(parts, type, short_trid,
|
|
header_length, chunk0_header);
|
|
set_lsn(lsn, log_descriptor.horizon);
|
|
if (translog_set_lsn_for_files(LSN_FILE_NO(*lsn), LSN_FILE_NO(*lsn),
|
|
*lsn, TRUE) ||
|
|
(log_record_type_descriptor[type].inwrite_hook &&
|
|
(*log_record_type_descriptor[type].inwrite_hook)(type, trn, tbl_info,
|
|
lsn, hook_arg)))
|
|
{
|
|
translog_unlock();
|
|
rc= 1;
|
|
goto err;
|
|
}
|
|
|
|
rc= translog_write_parts_on_page(&log_descriptor.horizon,
|
|
&log_descriptor.bc,
|
|
parts->total_record_length, parts);
|
|
log_descriptor.bc.buffer->last_lsn= *lsn;
|
|
DBUG_PRINT("info", ("last_lsn set to " LSN_FMT " buffer: %p",
|
|
LSN_IN_PARTS(log_descriptor.bc.buffer->last_lsn),
|
|
log_descriptor.bc.buffer));
|
|
translog_unlock();
|
|
|
|
/*
|
|
check if we switched buffer and need process it (current buffer is
|
|
unlocked already => we will not delay other threads
|
|
*/
|
|
err:
|
|
if (buffer_to_flush != NULL)
|
|
{
|
|
if (!rc)
|
|
rc= translog_buffer_flush(buffer_to_flush);
|
|
translog_buffer_unlock(buffer_to_flush);
|
|
}
|
|
|
|
DBUG_RETURN(rc);
|
|
}
|
|
|
|
|
|
/*
|
|
@brief Calculates and write LSN difference (compressed LSN).
|
|
|
|
@param base_lsn LSN from which we calculate difference
|
|
@param lsn LSN for codding
|
|
@param dst Result will be written to dst[-pack_length] .. dst[-1]
|
|
|
|
@note To store an LSN in a compact way we will use the following compression:
|
|
If a log record has LSN1, and it contains the LSN2 as a back reference,
|
|
Instead of LSN2 we write LSN1-LSN2, encoded as:
|
|
two bits the number N (see below)
|
|
14 bits
|
|
N bytes
|
|
That is, LSN is encoded in 2..5 bytes, and the number of bytes minus 2
|
|
is stored in the first two bits.
|
|
|
|
@note function made to write the result in backward direction with no
|
|
special sense or tricks both directions are equal in complicity
|
|
|
|
@retval # pointer on coded LSN
|
|
*/
|
|
|
|
static uchar *translog_put_LSN_diff(LSN base_lsn, LSN lsn, uchar *dst)
|
|
{
|
|
uint64 diff;
|
|
DBUG_ENTER("translog_put_LSN_diff");
|
|
DBUG_PRINT("enter", ("Base: " LSN_FMT " val: " LSN_FMT " dst: %p",
|
|
LSN_IN_PARTS(base_lsn), LSN_IN_PARTS(lsn),
|
|
dst));
|
|
DBUG_ASSERT(base_lsn > lsn);
|
|
diff= base_lsn - lsn;
|
|
DBUG_PRINT("info", ("Diff: 0x%llx", (ulonglong) diff));
|
|
if (diff <= 0x3FFF)
|
|
{
|
|
dst-= 2;
|
|
/*
|
|
Note we store this high uchar first to ensure that first uchar has
|
|
0 in the 3 upper bits.
|
|
*/
|
|
dst[0]= (uchar)(diff >> 8);
|
|
dst[1]= (uchar)(diff & 0xFF);
|
|
}
|
|
else if (diff <= 0x3FFFFFL)
|
|
{
|
|
dst-= 3;
|
|
dst[0]= (uchar)(0x40 | (diff >> 16));
|
|
int2store(dst + 1, diff & 0xFFFF);
|
|
}
|
|
else if (diff <= 0x3FFFFFFFL)
|
|
{
|
|
dst-= 4;
|
|
dst[0]= (uchar)(0x80 | (diff >> 24));
|
|
int3store(dst + 1, diff & 0xFFFFFFL);
|
|
}
|
|
else if (diff <= 0x3FFFFFFFFFLL)
|
|
|
|
{
|
|
dst-= 5;
|
|
dst[0]= (uchar)(0xC0 | (diff >> 32));
|
|
int4store(dst + 1, diff & 0xFFFFFFFFL);
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
It is full LSN after special 1 diff (which is impossible
|
|
in real life)
|
|
*/
|
|
dst-= 2 + LSN_STORE_SIZE;
|
|
dst[0]= 0;
|
|
dst[1]= 1;
|
|
lsn_store(dst + 2, lsn);
|
|
}
|
|
DBUG_PRINT("info", ("new dst: %p", dst));
|
|
DBUG_RETURN(dst);
|
|
}
|
|
|
|
|
|
/*
|
|
Get LSN from LSN-difference (compressed LSN)
|
|
|
|
SYNOPSIS
|
|
translog_get_LSN_from_diff()
|
|
base_lsn LSN from which we calculate difference
|
|
src pointer to coded lsn
|
|
dst pointer to buffer where to write 7byte LSN
|
|
|
|
NOTE:
|
|
To store an LSN in a compact way we will use the following compression:
|
|
|
|
If a log record has LSN1, and it contains the lSN2 as a back reference,
|
|
Instead of LSN2 we write LSN1-LSN2, encoded as:
|
|
|
|
two bits the number N (see below)
|
|
14 bits
|
|
N bytes
|
|
|
|
That is, LSN is encoded in 2..5 bytes, and the number of bytes minus 2
|
|
is stored in the first two bits.
|
|
|
|
RETURN
|
|
pointer to buffer after decoded LSN
|
|
*/
|
|
|
|
static uchar *translog_get_LSN_from_diff(LSN base_lsn, uchar *src, uchar *dst)
|
|
{
|
|
LSN lsn;
|
|
uint32 diff;
|
|
uint32 first_byte;
|
|
uint32 file_no, rec_offset;
|
|
uint8 code;
|
|
DBUG_ENTER("translog_get_LSN_from_diff");
|
|
DBUG_PRINT("enter", ("Base: " LSN_FMT " src:%p dst %p",
|
|
LSN_IN_PARTS(base_lsn), src, dst));
|
|
first_byte= *((uint8*) src);
|
|
code= first_byte >> 6; /* Length is in 2 most significant bits */
|
|
first_byte&= 0x3F;
|
|
src++; /* Skip length + encode */
|
|
file_no= LSN_FILE_NO(base_lsn); /* Assume relative */
|
|
DBUG_PRINT("info", ("code: %u first byte: %lu",
|
|
(uint) code, (ulong) first_byte));
|
|
switch (code) {
|
|
case 0:
|
|
if (first_byte == 0 && *((uint8*)src) == 1)
|
|
{
|
|
/*
|
|
It is full LSN after special 1 diff (which is impossible
|
|
in real life)
|
|
*/
|
|
memcpy(dst, src + 1, LSN_STORE_SIZE);
|
|
DBUG_PRINT("info", ("Special case of full LSN, new src:%p",
|
|
src + 1 + LSN_STORE_SIZE));
|
|
DBUG_RETURN(src + 1 + LSN_STORE_SIZE);
|
|
}
|
|
rec_offset= LSN_OFFSET(base_lsn) - ((first_byte << 8) | *((uint8*)src));
|
|
break;
|
|
case 1:
|
|
diff= uint2korr(src);
|
|
rec_offset= LSN_OFFSET(base_lsn) - ((first_byte << 16) | diff);
|
|
break;
|
|
case 2:
|
|
diff= uint3korr(src);
|
|
rec_offset= LSN_OFFSET(base_lsn) - ((first_byte << 24) | diff);
|
|
break;
|
|
case 3:
|
|
{
|
|
ulonglong base_offset= LSN_OFFSET(base_lsn);
|
|
diff= uint4korr(src);
|
|
if (diff > LSN_OFFSET(base_lsn))
|
|
{
|
|
/* take 1 from file offset */
|
|
first_byte++;
|
|
base_offset+= 0x100000000LL;
|
|
}
|
|
file_no= LSN_FILE_NO(base_lsn) - first_byte;
|
|
DBUG_ASSERT(base_offset - diff <= UINT_MAX);
|
|
rec_offset= (uint32)(base_offset - diff);
|
|
break;
|
|
}
|
|
default:
|
|
DBUG_ASSERT(0);
|
|
DBUG_RETURN(NULL);
|
|
}
|
|
lsn= MAKE_LSN(file_no, rec_offset);
|
|
src+= code + 1;
|
|
lsn_store(dst, lsn);
|
|
DBUG_PRINT("info", ("new src:%p", src));
|
|
DBUG_RETURN(src);
|
|
}
|
|
|
|
|
|
/**
|
|
@brief Encodes relative LSNs listed in the parameters.
|
|
|
|
@param parts Parts list with encoded LSN(s)
|
|
@param base_lsn LSN which is base for encoding
|
|
@param lsns number of LSN(s) to encode
|
|
@param compressed_LSNs buffer which can be used for storing compressed LSN(s)
|
|
*/
|
|
|
|
static void translog_relative_LSN_encode(struct st_translog_parts *parts,
|
|
LSN base_lsn,
|
|
uint lsns, uchar *compressed_LSNs)
|
|
{
|
|
LEX_CUSTRING *part;
|
|
uint lsns_len= lsns * LSN_STORE_SIZE;
|
|
uchar buffer_src[MAX_NUMBER_OF_LSNS_PER_RECORD * LSN_STORE_SIZE];
|
|
uchar *buffer= buffer_src;
|
|
const uchar *cbuffer;
|
|
|
|
DBUG_ENTER("translog_relative_LSN_encode");
|
|
|
|
DBUG_ASSERT(parts->current != 0);
|
|
part= parts->parts + parts->current;
|
|
|
|
/* collect all LSN(s) in one chunk if it (they) is (are) divided */
|
|
if (part->length < lsns_len)
|
|
{
|
|
size_t copied= part->length;
|
|
LEX_CUSTRING *next_part;
|
|
DBUG_PRINT("info", ("Using buffer:%p", compressed_LSNs));
|
|
memcpy(buffer, part->str, part->length);
|
|
next_part= parts->parts + parts->current + 1;
|
|
do
|
|
{
|
|
DBUG_ASSERT(next_part < parts->parts + parts->elements);
|
|
if ((next_part->length + copied) < lsns_len)
|
|
{
|
|
memcpy(buffer + copied, next_part->str,
|
|
next_part->length);
|
|
copied+= next_part->length;
|
|
next_part->length= 0; next_part->str= 0;
|
|
/* delete_dynamic_element(&parts->parts, parts->current + 1); */
|
|
next_part++;
|
|
parts->current++;
|
|
part= parts->parts + parts->current;
|
|
}
|
|
else
|
|
{
|
|
size_t len= lsns_len - copied;
|
|
memcpy(buffer + copied, next_part->str, len);
|
|
copied= lsns_len;
|
|
next_part->str+= len;
|
|
next_part->length-= len;
|
|
}
|
|
} while (copied < lsns_len);
|
|
cbuffer= buffer;
|
|
}
|
|
else
|
|
{
|
|
cbuffer= part->str;
|
|
part->str+= lsns_len;
|
|
part->length-= lsns_len;
|
|
parts->current--;
|
|
part= parts->parts + parts->current;
|
|
}
|
|
|
|
{
|
|
/* Compress */
|
|
LSN ref;
|
|
int economy;
|
|
const uchar *src_ptr;
|
|
uchar *dst_ptr= compressed_LSNs + (MAX_NUMBER_OF_LSNS_PER_RECORD *
|
|
COMPRESSED_LSN_MAX_STORE_SIZE);
|
|
/*
|
|
We write the result in backward direction with no special sense or
|
|
tricks both directions are equal in complicity
|
|
*/
|
|
for (src_ptr= cbuffer + lsns_len - LSN_STORE_SIZE;
|
|
src_ptr >= (const uchar*)cbuffer;
|
|
src_ptr-= LSN_STORE_SIZE)
|
|
{
|
|
ref= lsn_korr(src_ptr);
|
|
dst_ptr= translog_put_LSN_diff(base_lsn, ref, dst_ptr);
|
|
}
|
|
part->length= (size_t)((compressed_LSNs +
|
|
(MAX_NUMBER_OF_LSNS_PER_RECORD *
|
|
COMPRESSED_LSN_MAX_STORE_SIZE)) -
|
|
dst_ptr);
|
|
economy= lsns_len - (uint)part->length;
|
|
parts->record_length-= economy;
|
|
DBUG_PRINT("info", ("new length of LSNs: %lu economy: %d",
|
|
(ulong)part->length, economy));
|
|
parts->total_record_length-= economy;
|
|
part->str= dst_ptr;
|
|
}
|
|
DBUG_VOID_RETURN;
|
|
}
|
|
|
|
|
|
/**
|
|
@brief Write multi-group variable-size record.
|
|
|
|
@param lsn LSN of the record will be written here
|
|
@param type the log record type
|
|
@param short_trid Short transaction ID or 0 if it has no sense
|
|
@param parts Descriptor of record source parts
|
|
@param buffer_to_flush Buffer which have to be flushed if it is not 0
|
|
@param header_length Header length calculated for 1 group
|
|
@param buffer_rest Beginning from which we plan to write in full pages
|
|
@param trn Transaction structure pointer for hooks by
|
|
record log type, for short_id
|
|
@param hook_arg Argument which will be passed to pre-write and
|
|
in-write hooks of this record.
|
|
|
|
@note
|
|
We must have a translog_lock() when entering this function
|
|
|
|
We must have buffer_to_flush locked (if not null)
|
|
buffer_to_flush should *NOT* be locked when calling this function.
|
|
(This is note is here as this is different from most other
|
|
translog_write...() functions which require the buffer to be locked)
|
|
|
|
@return Operation status
|
|
@retval 0 OK
|
|
@retval 1 Error
|
|
*/
|
|
|
|
static my_bool
|
|
translog_write_variable_record_mgroup(LSN *lsn,
|
|
enum translog_record_type type,
|
|
MARIA_HA *tbl_info,
|
|
SHORT_TRANSACTION_ID short_trid,
|
|
struct st_translog_parts *parts,
|
|
struct st_translog_buffer
|
|
*buffer_to_flush,
|
|
uint16 header_length,
|
|
translog_size_t buffer_rest,
|
|
TRN *trn, void *hook_arg)
|
|
{
|
|
TRANSLOG_ADDRESS horizon;
|
|
struct st_buffer_cursor cursor;
|
|
int rc= 0;
|
|
uint i, chunk2_page, full_pages;
|
|
uint curr_group= 0;
|
|
translog_size_t record_rest, first_page, chunk3_pages, chunk0_pages= 1;
|
|
translog_size_t done= 0;
|
|
struct st_translog_group_descriptor group;
|
|
DYNAMIC_ARRAY groups;
|
|
uint16 chunk3_size;
|
|
uint16 page_capacity= log_descriptor.page_capacity_chunk_2 + 1;
|
|
uint16 last_page_capacity;
|
|
my_bool new_page_before_chunk0= 1, first_chunk0= 1;
|
|
uchar chunk0_header[1 + 2 + 5 + 2 + 2], group_desc[7 + 1];
|
|
uchar chunk2_header[1];
|
|
uint header_fixed_part= header_length + 2;
|
|
uint groups_per_page= (page_capacity - header_fixed_part) / (7 + 1);
|
|
uint file_of_the_first_group;
|
|
int pages_to_skip;
|
|
struct st_translog_buffer *buffer_of_last_lsn;
|
|
my_bool external_buffer_to_flush= TRUE;
|
|
DBUG_ENTER("translog_write_variable_record_mgroup");
|
|
translog_lock_assert_owner();
|
|
|
|
used_buffs_init(&cursor.buffs);
|
|
chunk2_header[0]= TRANSLOG_CHUNK_NOHDR;
|
|
|
|
if (my_init_dynamic_array(PSI_INSTRUMENT_ME, &groups,
|
|
sizeof(struct st_translog_group_descriptor),
|
|
10, 10, MYF(0)))
|
|
{
|
|
translog_unlock();
|
|
if (buffer_to_flush != NULL)
|
|
{
|
|
translog_buffer_flush(buffer_to_flush);
|
|
translog_buffer_unlock(buffer_to_flush);
|
|
}
|
|
DBUG_PRINT("error", ("init array failed"));
|
|
DBUG_RETURN(1);
|
|
}
|
|
|
|
first_page= translog_get_current_page_rest();
|
|
record_rest= parts->record_length - (first_page - 1);
|
|
DBUG_PRINT("info", ("Record Rest: %lu", (ulong) record_rest));
|
|
|
|
if (record_rest < buffer_rest)
|
|
{
|
|
/*
|
|
The record (group 1 type) is larger than the free space on the page
|
|
- we need to split it in two. But when we split it in two, the first
|
|
part is big enough to hold all the data of the record (because the
|
|
header of the first part of the split is smaller than the header of
|
|
the record as a whole when it takes only one chunk)
|
|
*/
|
|
DBUG_PRINT("info", ("too many free space because changing header"));
|
|
buffer_rest-= log_descriptor.page_capacity_chunk_2;
|
|
DBUG_ASSERT(record_rest >= buffer_rest);
|
|
}
|
|
|
|
file_of_the_first_group= LSN_FILE_NO(log_descriptor.horizon);
|
|
translog_mark_file_unfinished(file_of_the_first_group);
|
|
do
|
|
{
|
|
DBUG_ASSERT(cursor.buffs.unlck_ptr == cursor.buffs.wrt_ptr);
|
|
group.addr= horizon= log_descriptor.horizon;
|
|
cursor= log_descriptor.bc;
|
|
cursor.chaser= 1;
|
|
if ((full_pages= buffer_rest / log_descriptor.page_capacity_chunk_2) > 255)
|
|
{
|
|
/* sizeof(uint8) == 256 is max number of chunk in multi-chunks group */
|
|
full_pages= 255;
|
|
buffer_rest= full_pages * log_descriptor.page_capacity_chunk_2;
|
|
}
|
|
/*
|
|
group chunks =
|
|
full pages + first page (which actually can be full, too).
|
|
But here we assign number of chunks - 1
|
|
*/
|
|
group.num= full_pages;
|
|
if (insert_dynamic(&groups, (uchar*) &group))
|
|
{
|
|
DBUG_PRINT("error", ("insert into array failed"));
|
|
goto err_unlock;
|
|
}
|
|
|
|
DBUG_PRINT("info", ("chunk: #%u first_page: %u (%u) "
|
|
"full_pages: %lu (%lu) "
|
|
"Left %lu",
|
|
groups.elements,
|
|
first_page, first_page - 1,
|
|
(ulong) full_pages,
|
|
(ulong) (full_pages *
|
|
log_descriptor.page_capacity_chunk_2),
|
|
(ulong)(parts->record_length - (first_page - 1 +
|
|
buffer_rest) -
|
|
done)));
|
|
rc= translog_advance_pointer((int)full_pages, 0, &cursor.buffs);
|
|
|
|
translog_unlock();
|
|
|
|
if (buffer_to_flush != NULL)
|
|
{
|
|
if (!external_buffer_to_flush)
|
|
translog_buffer_decrease_writers(buffer_to_flush);
|
|
if (!rc)
|
|
rc= translog_buffer_flush(buffer_to_flush);
|
|
translog_buffer_unlock(buffer_to_flush);
|
|
buffer_to_flush= NULL;
|
|
}
|
|
external_buffer_to_flush= FALSE;
|
|
|
|
if (rc)
|
|
{
|
|
DBUG_PRINT("error", ("flush of unlock buffer failed"));
|
|
//translog_advance_pointer decreased writers so it is OK
|
|
DBUG_ASSERT(cursor.buffs.unlck_ptr == cursor.buffs.wrt_ptr);
|
|
goto err;
|
|
}
|
|
|
|
translog_write_data_on_page(&horizon, &cursor, 1, chunk2_header);
|
|
translog_write_parts_on_page(&horizon, &cursor, first_page - 1, parts);
|
|
DBUG_PRINT("info", ("absolute horizon: " LSN_FMT " local: " LSN_FMT " "
|
|
"Left %lu",
|
|
LSN_IN_PARTS(log_descriptor.horizon),
|
|
LSN_IN_PARTS(horizon),
|
|
(ulong) (parts->record_length - (first_page - 1) -
|
|
done)));
|
|
|
|
for (i= 0; i < full_pages; i++)
|
|
{
|
|
if (translog_write_variable_record_chunk2_page(parts, &horizon, &cursor))
|
|
goto err;
|
|
|
|
DBUG_PRINT("info", ("absolute horizon: " LSN_FMT " "
|
|
"local: " LSN_FMT " "
|
|
"Left: %lu",
|
|
LSN_IN_PARTS(log_descriptor.horizon),
|
|
LSN_IN_PARTS(horizon),
|
|
(ulong) (parts->record_length - (first_page - 1) -
|
|
i * log_descriptor.page_capacity_chunk_2 -
|
|
done)));
|
|
}
|
|
|
|
done+= (first_page - 1 + buffer_rest);
|
|
|
|
if (translog_chaser_page_next(&horizon, &cursor))
|
|
{
|
|
DBUG_PRINT("error", ("flush of unlock buffer failed"));
|
|
goto err;
|
|
}
|
|
translog_buffer_lock(cursor.buffer);
|
|
translog_buffer_decrease_writers(cursor.buffer);
|
|
used_buffs_register_unlock(&cursor.buffs, cursor.buffer);
|
|
translog_buffer_unlock(cursor.buffer);
|
|
|
|
translog_lock();
|
|
|
|
/* Check that we have place for chunk type 2 */
|
|
first_page= translog_get_current_page_rest();
|
|
if (first_page <= 1)
|
|
{
|
|
if (translog_page_next(&log_descriptor.horizon, &log_descriptor.bc,
|
|
&buffer_to_flush))
|
|
goto err_unlock;
|
|
first_page= translog_get_current_page_rest();
|
|
}
|
|
buffer_rest= translog_get_current_group_size();
|
|
|
|
if (buffer_to_flush)
|
|
used_buffs_register_unlock(&cursor.buffs,
|
|
buffer_to_flush); // will be unlocked
|
|
|
|
} while ((translog_size_t)(first_page + buffer_rest) <
|
|
(translog_size_t)(parts->record_length - done));
|
|
|
|
group.addr= horizon= log_descriptor.horizon;
|
|
cursor= log_descriptor.bc;
|
|
cursor.chaser= 1;
|
|
group.num= 0; /* 0 because it does not matter */
|
|
if (insert_dynamic(&groups, (uchar*) &group))
|
|
{
|
|
DBUG_PRINT("error", ("insert into array failed"));
|
|
goto err_unlock;
|
|
}
|
|
record_rest= parts->record_length - done;
|
|
DBUG_PRINT("info", ("Record rest: %lu", (ulong) record_rest));
|
|
if (first_page > record_rest + 1)
|
|
{
|
|
/*
|
|
We have not so much data to fill all first page
|
|
(no speaking about full pages)
|
|
so it will be:
|
|
<chunk0 <data>>
|
|
or
|
|
<chunk0>...<chunk0><chunk0 <data>>
|
|
or
|
|
<chunk3 <data>><chunk0>...<chunk0><chunk0 <possible data of 1 byte>>
|
|
*/
|
|
chunk2_page= full_pages= 0;
|
|
last_page_capacity= first_page;
|
|
pages_to_skip= -1;
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
We will have:
|
|
<chunk2 <data>>...<chunk2 <data>><chunk0 <data>>
|
|
or
|
|
<chunk2 <data>>...<chunk2 <data>><chunk0>...<chunk0><chunk0 <data>>
|
|
or
|
|
<chunk3 <data>><chunk0>...<chunk0><chunk0 <possible data of 1 byte>>
|
|
*/
|
|
chunk2_page= 1;
|
|
record_rest-= (first_page - 1);
|
|
pages_to_skip= full_pages=
|
|
record_rest / log_descriptor.page_capacity_chunk_2;
|
|
record_rest= (record_rest % log_descriptor.page_capacity_chunk_2);
|
|
last_page_capacity= page_capacity;
|
|
}
|
|
chunk3_size= 0;
|
|
chunk3_pages= 0;
|
|
if (last_page_capacity > record_rest + 1 && record_rest != 0)
|
|
{
|
|
if (last_page_capacity >
|
|
record_rest + header_fixed_part + groups.elements * (7 + 1))
|
|
{
|
|
/* 1 record of type 0 */
|
|
chunk3_pages= 0;
|
|
}
|
|
else
|
|
{
|
|
pages_to_skip++;
|
|
chunk3_pages= 1;
|
|
if (record_rest + 2 == last_page_capacity)
|
|
{
|
|
chunk3_size= record_rest - 1;
|
|
record_rest= 1;
|
|
}
|
|
else
|
|
{
|
|
chunk3_size= record_rest;
|
|
record_rest= 0;
|
|
}
|
|
}
|
|
}
|
|
/*
|
|
A first non-full page will hold type 0 chunk only if it fit in it with
|
|
all its headers
|
|
*/
|
|
while (page_capacity <
|
|
record_rest + header_fixed_part +
|
|
(groups.elements - groups_per_page * (chunk0_pages - 1)) * (7 + 1))
|
|
chunk0_pages++;
|
|
DBUG_PRINT("info", ("chunk0_pages: %u groups %u groups per full page: %u "
|
|
"Group on last page: %u",
|
|
chunk0_pages, groups.elements,
|
|
groups_per_page,
|
|
(groups.elements -
|
|
((page_capacity - header_fixed_part) / (7 + 1)) *
|
|
(chunk0_pages - 1))));
|
|
DBUG_PRINT("info", ("first_page: %u chunk2: %u full_pages: %u (%lu) "
|
|
"chunk3: %u (%u) rest: %u",
|
|
first_page,
|
|
chunk2_page, full_pages,
|
|
(ulong) full_pages *
|
|
log_descriptor.page_capacity_chunk_2,
|
|
chunk3_pages, (uint) chunk3_size, (uint) record_rest));
|
|
|
|
DBUG_ASSERT(cursor.buffs.unlck_ptr == cursor.buffs.wrt_ptr);
|
|
rc= translog_advance_pointer(pages_to_skip + (int)(chunk0_pages - 1),
|
|
record_rest + header_fixed_part +
|
|
(groups.elements -
|
|
((page_capacity -
|
|
header_fixed_part) / (7 + 1)) *
|
|
(chunk0_pages - 1)) * (7 + 1),
|
|
&cursor.buffs);
|
|
buffer_of_last_lsn= log_descriptor.bc.buffer;
|
|
translog_unlock();
|
|
|
|
if (buffer_to_flush != NULL)
|
|
{
|
|
DBUG_ASSERT(!external_buffer_to_flush);
|
|
translog_buffer_decrease_writers(buffer_to_flush);
|
|
if (!rc)
|
|
rc= translog_buffer_flush(buffer_to_flush);
|
|
translog_buffer_unlock(buffer_to_flush);
|
|
buffer_to_flush= NULL;
|
|
}
|
|
if (rc)
|
|
{
|
|
DBUG_PRINT("error", ("flush of unlock buffer failed"));
|
|
goto err;
|
|
}
|
|
|
|
if (rc)
|
|
goto err;
|
|
|
|
if (chunk2_page)
|
|
{
|
|
DBUG_PRINT("info", ("chunk 2 to finish first page"));
|
|
translog_write_data_on_page(&horizon, &cursor, 1, chunk2_header);
|
|
translog_write_parts_on_page(&horizon, &cursor, first_page - 1, parts);
|
|
DBUG_PRINT("info", ("absolute horizon: " LSN_FMT " local: " LSN_FMT " "
|
|
"Left: %lu",
|
|
LSN_IN_PARTS(log_descriptor.horizon),
|
|
LSN_IN_PARTS(horizon),
|
|
(ulong) (parts->record_length - (first_page - 1) -
|
|
done)));
|
|
}
|
|
else if (chunk3_pages)
|
|
{
|
|
uchar chunk3_header[3];
|
|
DBUG_PRINT("info", ("chunk 3"));
|
|
DBUG_ASSERT(full_pages == 0);
|
|
chunk3_pages= 0;
|
|
chunk3_header[0]= TRANSLOG_CHUNK_LNGTH;
|
|
int2store(chunk3_header + 1, chunk3_size);
|
|
translog_write_data_on_page(&horizon, &cursor, 3, chunk3_header);
|
|
translog_write_parts_on_page(&horizon, &cursor, chunk3_size, parts);
|
|
DBUG_PRINT("info", ("absolute horizon: " LSN_FMT " local: " LSN_FMT " "
|
|
"Left: %lu",
|
|
LSN_IN_PARTS(log_descriptor.horizon),
|
|
LSN_IN_PARTS(horizon),
|
|
(ulong) (parts->record_length - chunk3_size - done)));
|
|
}
|
|
else
|
|
{
|
|
DBUG_PRINT("info", ("no new_page_before_chunk0"));
|
|
new_page_before_chunk0= 0;
|
|
}
|
|
|
|
for (i= 0; i < full_pages; i++)
|
|
{
|
|
DBUG_ASSERT(chunk2_page != 0);
|
|
if (translog_write_variable_record_chunk2_page(parts, &horizon, &cursor))
|
|
goto err;
|
|
|
|
DBUG_PRINT("info", ("absolute horizon: " LSN_FMT " local: " LSN_FMT " "
|
|
"Left: %lu",
|
|
LSN_IN_PARTS(log_descriptor.horizon),
|
|
LSN_IN_PARTS(horizon),
|
|
(ulong) (parts->record_length - (first_page - 1) -
|
|
i * log_descriptor.page_capacity_chunk_2 -
|
|
done)));
|
|
}
|
|
|
|
if (chunk3_pages &&
|
|
translog_write_variable_record_chunk3_page(parts,
|
|
chunk3_size,
|
|
&horizon, &cursor))
|
|
goto err;
|
|
DBUG_PRINT("info", ("absolute horizon: " LSN_FMT " local: " LSN_FMT,
|
|
LSN_IN_PARTS(log_descriptor.horizon),
|
|
LSN_IN_PARTS(horizon)));
|
|
|
|
*chunk0_header= (uchar) (type | TRANSLOG_CHUNK_LSN);
|
|
int2store(chunk0_header + 1, short_trid);
|
|
translog_write_variable_record_1group_code_len(chunk0_header + 3,
|
|
parts->record_length,
|
|
header_length);
|
|
do
|
|
{
|
|
int limit;
|
|
if (new_page_before_chunk0 &&
|
|
translog_chaser_page_next(&horizon, &cursor))
|
|
{
|
|
DBUG_PRINT("error", ("flush of unlock buffer failed"));
|
|
goto err;
|
|
}
|
|
new_page_before_chunk0= 1;
|
|
|
|
if (first_chunk0)
|
|
{
|
|
first_chunk0= 0;
|
|
|
|
/*
|
|
We can drop "log_descriptor.is_everything_flushed" earlier when have
|
|
lock on loghandler and assign initial value of "horizon" variable or
|
|
before unlocking loghandler (because we will increase writers
|
|
counter on the buffer and every thread which wanted flush the buffer
|
|
will wait till we finish with it). But IMHO better here take short
|
|
lock and do not bother other threads with waiting.
|
|
*/
|
|
translog_lock();
|
|
set_lsn(lsn, horizon);
|
|
buffer_of_last_lsn->last_lsn= *lsn;
|
|
DBUG_PRINT("info", ("last_lsn set to " LSN_FMT " buffer: %p",
|
|
LSN_IN_PARTS(buffer_of_last_lsn->last_lsn),
|
|
buffer_of_last_lsn));
|
|
if (log_record_type_descriptor[type].inwrite_hook &&
|
|
(*log_record_type_descriptor[type].inwrite_hook) (type, trn,
|
|
tbl_info,
|
|
lsn, hook_arg))
|
|
goto err_unlock;
|
|
translog_unlock();
|
|
}
|
|
|
|
/*
|
|
A first non-full page will hold type 0 chunk only if it fit in it with
|
|
all its headers => the fist page is full or number of groups less then
|
|
possible number of full page.
|
|
*/
|
|
limit= (groups_per_page < groups.elements - curr_group ?
|
|
groups_per_page : groups.elements - curr_group);
|
|
DBUG_PRINT("info", ("Groups: %u curr: %u limit: %u",
|
|
(uint) groups.elements, (uint) curr_group,
|
|
(uint) limit));
|
|
|
|
if (chunk0_pages == 1)
|
|
{
|
|
DBUG_PRINT("info", ("chunk_len: 2 + %u * (7+1) + %u = %u",
|
|
(uint) limit, (uint) record_rest,
|
|
(uint) (2 + limit * (7 + 1) + record_rest)));
|
|
int2store(chunk0_header + header_length - 2,
|
|
2 + limit * (7 + 1) + record_rest);
|
|
}
|
|
else
|
|
{
|
|
DBUG_PRINT("info", ("chunk_len: 2 + %u * (7+1) = %u",
|
|
(uint) limit, (uint) (2 + limit * (7 + 1))));
|
|
int2store(chunk0_header + header_length - 2, 2 + limit * (7 + 1));
|
|
}
|
|
int2store(chunk0_header + header_length, groups.elements - curr_group);
|
|
translog_write_data_on_page(&horizon, &cursor, header_fixed_part,
|
|
chunk0_header);
|
|
for (i= curr_group; i < limit + curr_group; i++)
|
|
{
|
|
struct st_translog_group_descriptor *grp_ptr;
|
|
grp_ptr= dynamic_element(&groups, i,
|
|
struct st_translog_group_descriptor *);
|
|
lsn_store(group_desc, grp_ptr->addr);
|
|
group_desc[7]= grp_ptr->num;
|
|
translog_write_data_on_page(&horizon, &cursor, (7 + 1), group_desc);
|
|
}
|
|
|
|
if (chunk0_pages == 1 && record_rest != 0)
|
|
translog_write_parts_on_page(&horizon, &cursor, record_rest, parts);
|
|
|
|
chunk0_pages--;
|
|
curr_group+= limit;
|
|
/* put special type to indicate that it is not LSN chunk */
|
|
*chunk0_header= (uchar) (TRANSLOG_CHUNK_LSN | TRANSLOG_CHUNK_0_CONT);
|
|
} while (chunk0_pages != 0);
|
|
translog_buffer_lock(cursor.buffer);
|
|
translog_buffer_decrease_writers(cursor.buffer);
|
|
used_buffs_register_unlock(&cursor.buffs, cursor.buffer);
|
|
translog_buffer_unlock(cursor.buffer);
|
|
rc= 0;
|
|
DBUG_ASSERT(cursor.buffs.unlck_ptr == cursor.buffs.wrt_ptr);
|
|
|
|
if (translog_set_lsn_for_files(file_of_the_first_group, LSN_FILE_NO(*lsn),
|
|
*lsn, FALSE))
|
|
goto err;
|
|
|
|
translog_mark_file_finished(file_of_the_first_group);
|
|
|
|
delete_dynamic(&groups);
|
|
DBUG_RETURN(0);
|
|
|
|
err_unlock:
|
|
|
|
translog_unlock();
|
|
|
|
err:
|
|
|
|
if (cursor.buffs.unlck_ptr != cursor.buffs.wrt_ptr)
|
|
used_buffs_urgent_unlock(&cursor.buffs);
|
|
|
|
if (buffer_to_flush != NULL)
|
|
{
|
|
/* This is to prevent locking buffer forever in case of error */
|
|
if (!external_buffer_to_flush)
|
|
translog_buffer_decrease_writers(buffer_to_flush);
|
|
if (!rc)
|
|
rc= translog_buffer_flush(buffer_to_flush);
|
|
translog_buffer_unlock(buffer_to_flush);
|
|
buffer_to_flush= NULL;
|
|
}
|
|
|
|
|
|
translog_mark_file_finished(file_of_the_first_group);
|
|
|
|
delete_dynamic(&groups);
|
|
DBUG_RETURN(1);
|
|
}
|
|
|
|
|
|
/**
|
|
@brief Write the variable length log record.
|
|
|
|
@param lsn LSN of the record will be written here
|
|
@param type the log record type
|
|
@param short_trid Short transaction ID or 0 if it has no sense
|
|
@param parts Descriptor of record source parts
|
|
@param trn Transaction structure pointer for hooks by
|
|
record log type, for short_id
|
|
@param hook_arg Argument which will be passed to pre-write and
|
|
in-write hooks of this record.
|
|
|
|
@return Operation status
|
|
@retval 0 OK
|
|
@retval 1 Error
|
|
*/
|
|
|
|
static my_bool translog_write_variable_record(LSN *lsn,
|
|
enum translog_record_type type,
|
|
MARIA_HA *tbl_info,
|
|
SHORT_TRANSACTION_ID short_trid,
|
|
struct st_translog_parts *parts,
|
|
TRN *trn, void *hook_arg)
|
|
{
|
|
struct st_translog_buffer *buffer_to_flush= NULL;
|
|
uint header_length1= 1 + 2 + 2 +
|
|
translog_variable_record_length_bytes(parts->record_length);
|
|
ulong buffer_rest;
|
|
uint page_rest;
|
|
/* Max number of such LSNs per record is 2 */
|
|
uchar compressed_LSNs[MAX_NUMBER_OF_LSNS_PER_RECORD *
|
|
COMPRESSED_LSN_MAX_STORE_SIZE];
|
|
my_bool res;
|
|
DBUG_ENTER("translog_write_variable_record");
|
|
|
|
translog_lock();
|
|
DBUG_PRINT("info", ("horizon: " LSN_FMT,
|
|
LSN_IN_PARTS(log_descriptor.horizon)));
|
|
page_rest= TRANSLOG_PAGE_SIZE - log_descriptor.bc.current_page_fill;
|
|
DBUG_PRINT("info", ("header length: %u page_rest: %u",
|
|
header_length1, page_rest));
|
|
|
|
/*
|
|
header and part which we should read have to fit in one chunk
|
|
TODO: allow to divide readable header
|
|
*/
|
|
if (page_rest <
|
|
(header_length1 + log_record_type_descriptor[type].read_header_len))
|
|
{
|
|
DBUG_PRINT("info",
|
|
("Next page, size: %u header: %u + %u",
|
|
log_descriptor.bc.current_page_fill,
|
|
header_length1,
|
|
log_record_type_descriptor[type].read_header_len));
|
|
translog_page_next(&log_descriptor.horizon, &log_descriptor.bc,
|
|
&buffer_to_flush);
|
|
/* Chunk 2 header is 1 byte, so full page capacity will be one uchar more */
|
|
page_rest= log_descriptor.page_capacity_chunk_2 + 1;
|
|
DBUG_PRINT("info", ("page_rest: %u", page_rest));
|
|
}
|
|
|
|
/*
|
|
To minimize compressed size we will compress always relative to
|
|
very first chunk address (log_descriptor.horizon for now)
|
|
*/
|
|
if (log_record_type_descriptor[type].compressed_LSN > 0)
|
|
{
|
|
translog_relative_LSN_encode(parts, log_descriptor.horizon,
|
|
log_record_type_descriptor[type].
|
|
compressed_LSN, compressed_LSNs);
|
|
/* recalculate header length after compression */
|
|
header_length1= 1 + 2 + 2 +
|
|
translog_variable_record_length_bytes(parts->record_length);
|
|
DBUG_PRINT("info", ("after compressing LSN(s) header length: %u "
|
|
"record length: %lu",
|
|
header_length1, (ulong)parts->record_length));
|
|
}
|
|
|
|
/* TODO: check space on current page for header + few bytes */
|
|
if (page_rest >= parts->record_length + header_length1)
|
|
{
|
|
/* following function makes translog_unlock(); */
|
|
res= translog_write_variable_record_1chunk(lsn, type, tbl_info,
|
|
short_trid,
|
|
parts, buffer_to_flush,
|
|
header_length1, trn, hook_arg);
|
|
DBUG_RETURN(res);
|
|
}
|
|
|
|
buffer_rest= translog_get_current_group_size();
|
|
|
|
if (buffer_rest >= parts->record_length + header_length1 - page_rest)
|
|
{
|
|
/* following function makes translog_unlock(); */
|
|
res= translog_write_variable_record_1group(lsn, type, tbl_info,
|
|
short_trid,
|
|
parts, buffer_to_flush,
|
|
header_length1, trn, hook_arg);
|
|
DBUG_RETURN(res);
|
|
}
|
|
/* following function makes translog_unlock(); */
|
|
res= translog_write_variable_record_mgroup(lsn, type, tbl_info,
|
|
short_trid,
|
|
parts, buffer_to_flush,
|
|
header_length1,
|
|
buffer_rest, trn, hook_arg);
|
|
DBUG_RETURN(res);
|
|
}
|
|
|
|
|
|
/**
|
|
@brief Write the fixed and pseudo-fixed log record.
|
|
|
|
@param lsn LSN of the record will be written here
|
|
@param type the log record type
|
|
@param short_trid Short transaction ID or 0 if it has no sense
|
|
@param parts Descriptor of record source parts
|
|
@param trn Transaction structure pointer for hooks by
|
|
record log type, for short_id
|
|
@param hook_arg Argument which will be passed to pre-write and
|
|
in-write hooks of this record.
|
|
|
|
@return Operation status
|
|
@retval 0 OK
|
|
@retval 1 Error
|
|
*/
|
|
|
|
static my_bool translog_write_fixed_record(LSN *lsn,
|
|
enum translog_record_type type,
|
|
MARIA_HA *tbl_info,
|
|
SHORT_TRANSACTION_ID short_trid,
|
|
struct st_translog_parts *parts,
|
|
TRN *trn, void *hook_arg)
|
|
{
|
|
struct st_translog_buffer *buffer_to_flush= NULL;
|
|
uchar chunk1_header[1 + 2];
|
|
/* Max number of such LSNs per record is 2 */
|
|
uchar compressed_LSNs[MAX_NUMBER_OF_LSNS_PER_RECORD *
|
|
COMPRESSED_LSN_MAX_STORE_SIZE];
|
|
LEX_CUSTRING *part;
|
|
int rc= 1;
|
|
DBUG_ENTER("translog_write_fixed_record");
|
|
DBUG_ASSERT((log_record_type_descriptor[type].rclass ==
|
|
LOGRECTYPE_FIXEDLENGTH &&
|
|
parts->record_length ==
|
|
log_record_type_descriptor[type].fixed_length) ||
|
|
(log_record_type_descriptor[type].rclass ==
|
|
LOGRECTYPE_PSEUDOFIXEDLENGTH &&
|
|
parts->record_length ==
|
|
log_record_type_descriptor[type].fixed_length));
|
|
|
|
translog_lock();
|
|
DBUG_PRINT("info", ("horizon: " LSN_FMT,
|
|
LSN_IN_PARTS(log_descriptor.horizon)));
|
|
|
|
DBUG_ASSERT(log_descriptor.bc.current_page_fill <= TRANSLOG_PAGE_SIZE);
|
|
DBUG_PRINT("info",
|
|
("Page size: %u record: %u next cond: %d",
|
|
log_descriptor.bc.current_page_fill,
|
|
(parts->record_length +
|
|
log_record_type_descriptor[type].compressed_LSN * 2 + 3),
|
|
((((uint) log_descriptor.bc.current_page_fill) +
|
|
(parts->record_length +
|
|
log_record_type_descriptor[type].compressed_LSN * 2 + 3)) >
|
|
TRANSLOG_PAGE_SIZE)));
|
|
/*
|
|
check that there is enough place on current page.
|
|
NOTE: compressing may increase page LSN size on two bytes for every LSN
|
|
*/
|
|
if ((((uint) log_descriptor.bc.current_page_fill) +
|
|
(parts->record_length +
|
|
log_record_type_descriptor[type].compressed_LSN * 2 + 3)) >
|
|
TRANSLOG_PAGE_SIZE)
|
|
{
|
|
DBUG_PRINT("info", ("Next page"));
|
|
if (translog_page_next(&log_descriptor.horizon, &log_descriptor.bc,
|
|
&buffer_to_flush))
|
|
goto err; /* rc == 1 */
|
|
if (buffer_to_flush)
|
|
translog_buffer_lock_assert_owner(buffer_to_flush);
|
|
}
|
|
|
|
set_lsn(lsn, log_descriptor.horizon);
|
|
if (translog_set_lsn_for_files(LSN_FILE_NO(*lsn), LSN_FILE_NO(*lsn),
|
|
*lsn, TRUE) ||
|
|
(log_record_type_descriptor[type].inwrite_hook &&
|
|
(*log_record_type_descriptor[type].inwrite_hook)(type, trn, tbl_info,
|
|
lsn, hook_arg)))
|
|
goto err;
|
|
|
|
/* compress LSNs */
|
|
if (log_record_type_descriptor[type].rclass ==
|
|
LOGRECTYPE_PSEUDOFIXEDLENGTH)
|
|
{
|
|
DBUG_ASSERT(log_record_type_descriptor[type].compressed_LSN > 0);
|
|
translog_relative_LSN_encode(parts, *lsn,
|
|
log_record_type_descriptor[type].
|
|
compressed_LSN, compressed_LSNs);
|
|
}
|
|
|
|
/*
|
|
Write the whole record at once (we know that there is enough place on
|
|
the destination page)
|
|
*/
|
|
DBUG_ASSERT(parts->current != 0); /* first part is left for header */
|
|
part= parts->parts + (--parts->current);
|
|
parts->total_record_length+= (translog_size_t) (part->length= 1 + 2);
|
|
part->str= chunk1_header;
|
|
*chunk1_header= (uchar) (type | TRANSLOG_CHUNK_FIXED);
|
|
int2store(chunk1_header + 1, short_trid);
|
|
|
|
rc= translog_write_parts_on_page(&log_descriptor.horizon,
|
|
&log_descriptor.bc,
|
|
parts->total_record_length, parts);
|
|
|
|
log_descriptor.bc.buffer->last_lsn= *lsn;
|
|
DBUG_PRINT("info", ("last_lsn set to " LSN_FMT " buffer: %p",
|
|
LSN_IN_PARTS(log_descriptor.bc.buffer->last_lsn),
|
|
log_descriptor.bc.buffer));
|
|
|
|
err:
|
|
translog_unlock();
|
|
|
|
/*
|
|
check if we switched buffer and need process it (current buffer is
|
|
unlocked already => we will not delay other threads
|
|
*/
|
|
if (buffer_to_flush != NULL)
|
|
{
|
|
if (!rc)
|
|
rc= translog_buffer_flush(buffer_to_flush);
|
|
translog_buffer_unlock(buffer_to_flush);
|
|
}
|
|
|
|
DBUG_RETURN(rc);
|
|
}
|
|
|
|
|
|
/**
|
|
@brief Writes the log record
|
|
|
|
If share has no 2-byte-id yet, gives an id to the share and logs
|
|
LOGREC_FILE_ID. If transaction has not logged LOGREC_LONG_TRANSACTION_ID
|
|
yet, logs it.
|
|
|
|
@param lsn LSN of the record will be written here
|
|
@param type the log record type
|
|
@param trn Transaction structure pointer for hooks by
|
|
record log type, for short_id
|
|
@param tbl_info MARIA_HA of table or NULL
|
|
@param rec_len record length or 0 (count it)
|
|
@param part_no number of parts or 0 (count it)
|
|
@param parts_data zero ended (in case of number of parts is 0)
|
|
array of LEX_STRINGs (parts), first
|
|
TRANSLOG_INTERNAL_PARTS positions in the log
|
|
should be unused (need for loghandler)
|
|
@param store_share_id if tbl_info!=NULL then share's id will
|
|
automatically be stored in the two first bytes
|
|
pointed (so pointer is assumed to be !=NULL)
|
|
@param hook_arg argument which will be passed to pre-write and
|
|
in-write hooks of this record.
|
|
|
|
@return Operation status
|
|
@retval 0 OK
|
|
@retval 1 Error
|
|
*/
|
|
|
|
my_bool translog_write_record(LSN *lsn,
|
|
enum translog_record_type type,
|
|
TRN *trn, MARIA_HA *tbl_info,
|
|
translog_size_t rec_len,
|
|
uint part_no,
|
|
LEX_CUSTRING *parts_data,
|
|
uchar *store_share_id,
|
|
void *hook_arg)
|
|
{
|
|
struct st_translog_parts parts;
|
|
LEX_CUSTRING *part;
|
|
int rc;
|
|
uint short_trid= trn->short_id;
|
|
DBUG_ENTER("translog_write_record");
|
|
DBUG_PRINT("enter", ("type: %u (%s) ShortTrID: %u rec_len: %lu",
|
|
(uint) type, log_record_type_descriptor[type].name,
|
|
(uint) short_trid, (ulong) rec_len));
|
|
DBUG_ASSERT(translog_status == TRANSLOG_OK ||
|
|
translog_status == TRANSLOG_READONLY);
|
|
DBUG_ASSERT(type != 0);
|
|
DBUG_SLOW_ASSERT((uint)type <= max_allowed_translog_type);
|
|
if (unlikely(translog_status != TRANSLOG_OK))
|
|
{
|
|
DBUG_PRINT("error", ("Transaction log is write protected"));
|
|
DBUG_RETURN(1);
|
|
}
|
|
|
|
if (tbl_info && type != LOGREC_FILE_ID)
|
|
{
|
|
MARIA_SHARE *share= tbl_info->s;
|
|
DBUG_ASSERT(share->now_transactional);
|
|
if (unlikely(share->id == 0))
|
|
{
|
|
/*
|
|
First log write for this MARIA_SHARE; give it a short id.
|
|
When the lock manager is enabled and needs a short id, it should be
|
|
assigned in the lock manager (because row locks will be taken before
|
|
log records are written; for example SELECT FOR UPDATE takes locks but
|
|
writes no log record.
|
|
*/
|
|
if (unlikely(translog_assign_id_to_share(tbl_info, trn)))
|
|
DBUG_RETURN(1);
|
|
}
|
|
fileid_store(store_share_id, share->id);
|
|
}
|
|
if (unlikely(!(trn->first_undo_lsn & TRANSACTION_LOGGED_LONG_ID)))
|
|
{
|
|
LSN dummy_lsn;
|
|
LEX_CUSTRING log_array[TRANSLOG_INTERNAL_PARTS + 1];
|
|
uchar log_data[6];
|
|
DBUG_ASSERT(trn->undo_lsn == LSN_IMPOSSIBLE);
|
|
int6store(log_data, trn->trid);
|
|
log_array[TRANSLOG_INTERNAL_PARTS + 0].str= log_data;
|
|
log_array[TRANSLOG_INTERNAL_PARTS + 0].length= sizeof(log_data);
|
|
trn->first_undo_lsn|= TRANSACTION_LOGGED_LONG_ID; /* no recursion */
|
|
if (unlikely(translog_write_record(&dummy_lsn, LOGREC_LONG_TRANSACTION_ID,
|
|
trn, NULL, sizeof(log_data),
|
|
sizeof(log_array)/sizeof(log_array[0]),
|
|
log_array, NULL, NULL)))
|
|
DBUG_RETURN(1);
|
|
}
|
|
|
|
parts.parts= parts_data;
|
|
|
|
/* count parts if they are not counted by upper level */
|
|
if (part_no == 0)
|
|
{
|
|
for (part_no= TRANSLOG_INTERNAL_PARTS;
|
|
parts_data[part_no].length != 0;
|
|
part_no++);
|
|
}
|
|
parts.elements= part_no;
|
|
parts.current= TRANSLOG_INTERNAL_PARTS;
|
|
|
|
/* clear TRANSLOG_INTERNAL_PARTS */
|
|
compile_time_assert(TRANSLOG_INTERNAL_PARTS != 0);
|
|
parts_data[0].str= 0;
|
|
parts_data[0].length= 0;
|
|
|
|
/* count length of the record */
|
|
if (rec_len == 0)
|
|
{
|
|
for(part= parts_data + TRANSLOG_INTERNAL_PARTS;\
|
|
part < parts_data + part_no;
|
|
part++)
|
|
{
|
|
rec_len+= (translog_size_t) part->length;
|
|
}
|
|
}
|
|
parts.record_length= rec_len;
|
|
|
|
#ifndef DBUG_OFF
|
|
{
|
|
uint i;
|
|
size_t len= 0;
|
|
#ifdef HAVE_valgrind
|
|
ha_checksum checksum= 0;
|
|
#endif
|
|
for (i= TRANSLOG_INTERNAL_PARTS; i < part_no; i++)
|
|
{
|
|
#ifdef HAVE_valgrind
|
|
/* Find unitialized bytes early */
|
|
checksum+= my_checksum(checksum, parts_data[i].str,
|
|
parts_data[i].length);
|
|
#endif
|
|
len+= parts_data[i].length;
|
|
}
|
|
DBUG_ASSERT(len == rec_len);
|
|
}
|
|
#endif
|
|
/*
|
|
Start total_record_length from record_length then overhead will
|
|
be add
|
|
*/
|
|
parts.total_record_length= parts.record_length;
|
|
DBUG_PRINT("info", ("record length: %lu", (ulong) parts.record_length));
|
|
|
|
/* process this parts */
|
|
if (!(rc= (log_record_type_descriptor[type].prewrite_hook &&
|
|
(*log_record_type_descriptor[type].prewrite_hook)(type, trn,
|
|
tbl_info,
|
|
hook_arg))))
|
|
{
|
|
switch (log_record_type_descriptor[type].rclass) {
|
|
case LOGRECTYPE_VARIABLE_LENGTH:
|
|
rc= translog_write_variable_record(lsn, type, tbl_info,
|
|
short_trid, &parts, trn, hook_arg);
|
|
break;
|
|
case LOGRECTYPE_PSEUDOFIXEDLENGTH:
|
|
case LOGRECTYPE_FIXEDLENGTH:
|
|
rc= translog_write_fixed_record(lsn, type, tbl_info,
|
|
short_trid, &parts, trn, hook_arg);
|
|
break;
|
|
case LOGRECTYPE_NOT_ALLOWED:
|
|
default:
|
|
DBUG_ASSERT(0);
|
|
rc= 1;
|
|
}
|
|
}
|
|
|
|
DBUG_PRINT("info", ("LSN: " LSN_FMT, LSN_IN_PARTS(*lsn)));
|
|
DBUG_RETURN(rc);
|
|
}
|
|
|
|
|
|
/*
|
|
Decode compressed (relative) LSN(s)
|
|
|
|
SYNOPSIS
|
|
translog_relative_lsn_decode()
|
|
base_lsn LSN for encoding
|
|
src Decode LSN(s) from here
|
|
dst Put decoded LSNs here
|
|
lsns number of LSN(s)
|
|
|
|
RETURN
|
|
position in sources after decoded LSN(s)
|
|
*/
|
|
|
|
static uchar *translog_relative_LSN_decode(LSN base_lsn,
|
|
uchar *src, uchar *dst, uint lsns)
|
|
{
|
|
uint i;
|
|
for (i= 0; i < lsns; i++, dst+= LSN_STORE_SIZE)
|
|
{
|
|
src= translog_get_LSN_from_diff(base_lsn, src, dst);
|
|
}
|
|
return src;
|
|
}
|
|
|
|
/**
|
|
@brief Get header of fixed/pseudo length record and call hook for
|
|
it processing
|
|
|
|
@param page Pointer to the buffer with page where LSN chunk is
|
|
placed
|
|
@param page_offset Offset of the first chunk in the page
|
|
@param buff Buffer to be filled with header data
|
|
|
|
@return Length of header or operation status
|
|
@retval # number of bytes in TRANSLOG_HEADER_BUFFER::header where
|
|
stored decoded part of the header
|
|
*/
|
|
|
|
static int translog_fixed_length_header(uchar *page,
|
|
translog_size_t page_offset,
|
|
TRANSLOG_HEADER_BUFFER *buff)
|
|
{
|
|
struct st_log_record_type_descriptor *desc=
|
|
log_record_type_descriptor + buff->type;
|
|
uchar *src= page + page_offset + 3;
|
|
uchar *dst= buff->header;
|
|
uchar *start= src;
|
|
int lsns= desc->compressed_LSN;
|
|
uint length= desc->fixed_length;
|
|
DBUG_ENTER("translog_fixed_length_header");
|
|
|
|
buff->record_length= length;
|
|
|
|
if (desc->rclass == LOGRECTYPE_PSEUDOFIXEDLENGTH)
|
|
{
|
|
DBUG_ASSERT(lsns > 0);
|
|
src= translog_relative_LSN_decode(buff->lsn, src, dst, lsns);
|
|
lsns*= LSN_STORE_SIZE;
|
|
dst+= lsns;
|
|
length-= lsns;
|
|
buff->compressed_LSN_economy= (lsns - (int) (src - start));
|
|
}
|
|
else
|
|
buff->compressed_LSN_economy= 0;
|
|
|
|
memcpy(dst, src, length);
|
|
buff->non_header_data_start_offset= (uint16) (page_offset +
|
|
((src + length) -
|
|
(page + page_offset)));
|
|
buff->non_header_data_len= 0;
|
|
DBUG_RETURN(buff->record_length);
|
|
}
|
|
|
|
|
|
/*
|
|
Free resources used by TRANSLOG_HEADER_BUFFER
|
|
|
|
SYNOPSIS
|
|
translog_free_record_header();
|
|
*/
|
|
|
|
void translog_free_record_header(TRANSLOG_HEADER_BUFFER *buff)
|
|
{
|
|
DBUG_ENTER("translog_free_record_header");
|
|
if (buff->groups_no != 0)
|
|
{
|
|
my_free(buff->groups);
|
|
buff->groups_no= 0;
|
|
}
|
|
DBUG_VOID_RETURN;
|
|
}
|
|
|
|
|
|
/**
|
|
@brief Returns the current horizon at the end of the current log
|
|
|
|
@return Horizon
|
|
@retval LSN_ERROR error
|
|
@retvar # Horizon
|
|
*/
|
|
|
|
TRANSLOG_ADDRESS translog_get_horizon()
|
|
{
|
|
TRANSLOG_ADDRESS res;
|
|
DBUG_ASSERT(translog_status == TRANSLOG_OK ||
|
|
translog_status == TRANSLOG_READONLY);
|
|
translog_lock();
|
|
res= log_descriptor.horizon;
|
|
translog_unlock();
|
|
return res;
|
|
}
|
|
|
|
|
|
/**
|
|
@brief Returns the current horizon at the end of the current log, caller is
|
|
assumed to already hold the lock
|
|
|
|
@return Horizon
|
|
@retval LSN_ERROR error
|
|
@retvar # Horizon
|
|
*/
|
|
|
|
TRANSLOG_ADDRESS translog_get_horizon_no_lock()
|
|
{
|
|
DBUG_ASSERT(translog_status == TRANSLOG_OK ||
|
|
translog_status == TRANSLOG_READONLY);
|
|
translog_lock_assert_owner();
|
|
return log_descriptor.horizon;
|
|
}
|
|
|
|
|
|
/*
|
|
Set last page in the scanner data structure
|
|
|
|
SYNOPSIS
|
|
translog_scanner_set_last_page()
|
|
scanner Information about current chunk during scanning
|
|
|
|
RETURN
|
|
0 OK
|
|
1 Error
|
|
*/
|
|
|
|
static my_bool translog_scanner_set_last_page(TRANSLOG_SCANNER_DATA *scanner)
|
|
{
|
|
my_bool page_ok;
|
|
if (LSN_FILE_NO(scanner->page_addr) == LSN_FILE_NO(scanner->horizon))
|
|
{
|
|
/* It is last file => we can easy find last page address by horizon */
|
|
uint pagegrest= LSN_OFFSET(scanner->horizon) % TRANSLOG_PAGE_SIZE;
|
|
scanner->last_file_page= (scanner->horizon -
|
|
(pagegrest ? pagegrest : TRANSLOG_PAGE_SIZE));
|
|
return (0);
|
|
}
|
|
scanner->last_file_page= scanner->page_addr;
|
|
return (translog_get_last_page_addr(&scanner->last_file_page, &page_ok, 0));
|
|
}
|
|
|
|
|
|
/**
|
|
@brief Get page from page cache according to requested method
|
|
|
|
@param scanner The scanner data
|
|
|
|
@return operation status
|
|
@retval 0 OK
|
|
@retval 1 Error
|
|
*/
|
|
|
|
static my_bool
|
|
translog_scanner_get_page(TRANSLOG_SCANNER_DATA *scanner)
|
|
{
|
|
TRANSLOG_VALIDATOR_DATA data;
|
|
DBUG_ENTER("translog_scanner_get_page");
|
|
data.addr= &scanner->page_addr;
|
|
data.was_recovered= 0;
|
|
DBUG_RETURN((scanner->page=
|
|
translog_get_page(&data, scanner->buffer,
|
|
(scanner->use_direct_link ?
|
|
&scanner->direct_link :
|
|
NULL))) ==
|
|
NULL);
|
|
}
|
|
|
|
|
|
/**
|
|
@brief Initialize reader scanner.
|
|
|
|
@param lsn LSN with which it have to be inited
|
|
@param fixed_horizon true if it is OK do not read records which was written
|
|
after scanning beginning
|
|
@param scanner scanner which have to be inited
|
|
@param use_direct prefer using direct lings from page handler
|
|
where it is possible.
|
|
|
|
@note If direct link was used translog_destroy_scanner should be
|
|
called after it using
|
|
|
|
@return status of the operation
|
|
@retval 0 OK
|
|
@retval 1 Error
|
|
*/
|
|
|
|
my_bool translog_scanner_init(LSN lsn,
|
|
my_bool fixed_horizon,
|
|
TRANSLOG_SCANNER_DATA *scanner,
|
|
my_bool use_direct)
|
|
{
|
|
DBUG_ENTER("translog_scanner_init");
|
|
DBUG_PRINT("enter", ("Scanner: %p LSN: " LSN_FMT,
|
|
scanner, LSN_IN_PARTS(lsn)));
|
|
DBUG_ASSERT(translog_status == TRANSLOG_OK ||
|
|
translog_status == TRANSLOG_READONLY);
|
|
|
|
scanner->page_offset= LSN_OFFSET(lsn) % TRANSLOG_PAGE_SIZE;
|
|
|
|
scanner->fixed_horizon= fixed_horizon;
|
|
scanner->use_direct_link= use_direct;
|
|
scanner->direct_link= NULL;
|
|
|
|
scanner->horizon= translog_get_horizon();
|
|
DBUG_PRINT("info", ("horizon: " LSN_FMT, LSN_IN_PARTS(scanner->horizon)));
|
|
|
|
/* lsn < horizon */
|
|
DBUG_ASSERT(lsn <= scanner->horizon);
|
|
|
|
scanner->page_addr= lsn;
|
|
scanner->page_addr-= scanner->page_offset; /*decrease offset */
|
|
|
|
if (translog_scanner_set_last_page(scanner))
|
|
DBUG_RETURN(1);
|
|
|
|
if (translog_scanner_get_page(scanner))
|
|
DBUG_RETURN(1);
|
|
DBUG_RETURN(0);
|
|
}
|
|
|
|
|
|
/**
|
|
@brief Destroy scanner object;
|
|
|
|
@param scanner The scanner object to destroy
|
|
*/
|
|
|
|
void translog_destroy_scanner(TRANSLOG_SCANNER_DATA *scanner)
|
|
{
|
|
DBUG_ENTER("translog_destroy_scanner");
|
|
DBUG_PRINT("enter", ("Scanner: %p", scanner));
|
|
translog_free_link(scanner->direct_link);
|
|
DBUG_VOID_RETURN;
|
|
}
|
|
|
|
|
|
/*
|
|
Checks End of the Log
|
|
|
|
SYNOPSIS
|
|
translog_scanner_eol()
|
|
scanner Information about current chunk during scanning
|
|
|
|
RETURN
|
|
1 End of the Log
|
|
0 OK
|
|
*/
|
|
|
|
static my_bool translog_scanner_eol(TRANSLOG_SCANNER_DATA *scanner)
|
|
{
|
|
DBUG_ENTER("translog_scanner_eol");
|
|
DBUG_PRINT("enter",
|
|
("Horizon: " LSN_FMT " Current: (%u, 0x%x+0x%x=0x%x)",
|
|
LSN_IN_PARTS(scanner->horizon),
|
|
LSN_IN_PARTS(scanner->page_addr),
|
|
(uint) scanner->page_offset,
|
|
(uint) (LSN_OFFSET(scanner->page_addr) + scanner->page_offset)));
|
|
if (scanner->horizon > (scanner->page_addr +
|
|
scanner->page_offset))
|
|
{
|
|
DBUG_PRINT("info", ("Horizon is not reached"));
|
|
DBUG_RETURN(0);
|
|
}
|
|
if (scanner->fixed_horizon)
|
|
{
|
|
DBUG_PRINT("info", ("Horizon is fixed and reached"));
|
|
DBUG_RETURN(1);
|
|
}
|
|
scanner->horizon= translog_get_horizon();
|
|
DBUG_PRINT("info",
|
|
("Horizon is re-read, EOL: %d",
|
|
scanner->horizon <= (scanner->page_addr +
|
|
scanner->page_offset)));
|
|
DBUG_RETURN(scanner->horizon <= (scanner->page_addr +
|
|
scanner->page_offset));
|
|
}
|
|
|
|
|
|
/**
|
|
@brief Cheks End of the Page
|
|
|
|
@param scanner Information about current chunk during scanning
|
|
|
|
@retval 1 End of the Page
|
|
@retval 0 OK
|
|
*/
|
|
|
|
static my_bool translog_scanner_eop(TRANSLOG_SCANNER_DATA *scanner)
|
|
{
|
|
DBUG_ENTER("translog_scanner_eop");
|
|
DBUG_RETURN(scanner->page_offset >= TRANSLOG_PAGE_SIZE ||
|
|
scanner->page[scanner->page_offset] == TRANSLOG_FILLER);
|
|
}
|
|
|
|
|
|
/**
|
|
@brief Checks End of the File (i.e. we are scanning last page, which do not
|
|
mean end of this page)
|
|
|
|
@param scanner Information about current chunk during scanning
|
|
|
|
@retval 1 End of the File
|
|
@retval 0 OK
|
|
*/
|
|
|
|
static my_bool translog_scanner_eof(TRANSLOG_SCANNER_DATA *scanner)
|
|
{
|
|
DBUG_ENTER("translog_scanner_eof");
|
|
DBUG_ASSERT(LSN_FILE_NO(scanner->page_addr) ==
|
|
LSN_FILE_NO(scanner->last_file_page));
|
|
DBUG_PRINT("enter", ("curr Page: 0x%lx last page: 0x%lx "
|
|
"normal EOF: %d",
|
|
(ulong) LSN_OFFSET(scanner->page_addr),
|
|
(ulong) LSN_OFFSET(scanner->last_file_page),
|
|
LSN_OFFSET(scanner->page_addr) ==
|
|
LSN_OFFSET(scanner->last_file_page)));
|
|
/*
|
|
TODO: detect damaged file EOF,
|
|
TODO: issue warning if damaged file EOF detected
|
|
*/
|
|
DBUG_RETURN(scanner->page_addr ==
|
|
scanner->last_file_page);
|
|
}
|
|
|
|
/*
|
|
Move scanner to the next chunk
|
|
|
|
SYNOPSIS
|
|
translog_get_next_chunk()
|
|
scanner Information about current chunk during scanning
|
|
|
|
RETURN
|
|
0 OK
|
|
1 Error
|
|
*/
|
|
|
|
static my_bool
|
|
translog_get_next_chunk(TRANSLOG_SCANNER_DATA *scanner)
|
|
{
|
|
uint16 len;
|
|
DBUG_ENTER("translog_get_next_chunk");
|
|
|
|
if (translog_scanner_eop(scanner))
|
|
len= TRANSLOG_PAGE_SIZE - scanner->page_offset;
|
|
else if ((len= translog_get_total_chunk_length(scanner->page,
|
|
scanner->page_offset)) == 0)
|
|
DBUG_RETURN(1);
|
|
scanner->page_offset+= len;
|
|
|
|
if (translog_scanner_eol(scanner))
|
|
{
|
|
scanner->page= END_OF_LOG;
|
|
scanner->page_offset= 0;
|
|
DBUG_RETURN(0);
|
|
}
|
|
if (translog_scanner_eop(scanner))
|
|
{
|
|
/* before reading next page we should unpin current one if it was pinned */
|
|
translog_free_link(scanner->direct_link);
|
|
if (translog_scanner_eof(scanner))
|
|
{
|
|
DBUG_PRINT("info", ("horizon: " LSN_FMT " pageaddr: " LSN_FMT,
|
|
LSN_IN_PARTS(scanner->horizon),
|
|
LSN_IN_PARTS(scanner->page_addr)));
|
|
/* if it is log end it have to be caught before */
|
|
DBUG_ASSERT(LSN_FILE_NO(scanner->horizon) >
|
|
LSN_FILE_NO(scanner->page_addr));
|
|
scanner->page_addr+= LSN_ONE_FILE;
|
|
scanner->page_addr= LSN_REPLACE_OFFSET(scanner->page_addr,
|
|
TRANSLOG_PAGE_SIZE);
|
|
if (translog_scanner_set_last_page(scanner))
|
|
DBUG_RETURN(1);
|
|
}
|
|
else
|
|
{
|
|
scanner->page_addr+= TRANSLOG_PAGE_SIZE; /* offset increased */
|
|
}
|
|
|
|
if (translog_scanner_get_page(scanner))
|
|
DBUG_RETURN(1);
|
|
|
|
scanner->page_offset= translog_get_first_chunk_offset(scanner->page);
|
|
if (translog_scanner_eol(scanner))
|
|
{
|
|
scanner->page= END_OF_LOG;
|
|
scanner->page_offset= 0;
|
|
DBUG_RETURN(0);
|
|
}
|
|
DBUG_ASSERT(scanner->page[scanner->page_offset] != TRANSLOG_FILLER);
|
|
}
|
|
DBUG_RETURN(0);
|
|
}
|
|
|
|
|
|
/**
|
|
@brief Get header of variable length record and call hook for it processing
|
|
|
|
@param page Pointer to the buffer with page where LSN chunk is
|
|
placed
|
|
@param page_offset Offset of the first chunk in the page
|
|
@param buff Buffer to be filled with header data
|
|
@param scanner If present should be moved to the header page if
|
|
it differ from LSN page
|
|
|
|
@return Length of header or operation status
|
|
@retval RECHEADER_READ_ERROR error
|
|
@retval RECHEADER_READ_EOF End of the log reached during the read
|
|
@retval # number of bytes in
|
|
TRANSLOG_HEADER_BUFFER::header where
|
|
stored decoded part of the header
|
|
*/
|
|
|
|
static int
|
|
translog_variable_length_header(uchar *page, translog_size_t page_offset,
|
|
TRANSLOG_HEADER_BUFFER *buff,
|
|
TRANSLOG_SCANNER_DATA *scanner)
|
|
{
|
|
struct st_log_record_type_descriptor *desc= (log_record_type_descriptor +
|
|
buff->type);
|
|
uchar *src= page + page_offset + 1 + 2;
|
|
uchar *dst= buff->header;
|
|
LSN base_lsn;
|
|
uint lsns= desc->compressed_LSN;
|
|
uint16 chunk_len;
|
|
uint16 length= desc->read_header_len;
|
|
uint16 buffer_length= length;
|
|
uint16 body_len;
|
|
int rc;
|
|
TRANSLOG_SCANNER_DATA internal_scanner;
|
|
DBUG_ENTER("translog_variable_length_header");
|
|
|
|
buff->record_length= translog_variable_record_1group_decode_len(&src);
|
|
chunk_len= uint2korr(src);
|
|
DBUG_PRINT("info", ("rec len: %lu chunk len: %u length: %u bufflen: %u",
|
|
(ulong) buff->record_length, (uint) chunk_len,
|
|
(uint) length, (uint) buffer_length));
|
|
if (chunk_len == 0)
|
|
{
|
|
uint16 page_rest;
|
|
DBUG_PRINT("info", ("1 group"));
|
|
src+= 2;
|
|
page_rest= (uint16) (TRANSLOG_PAGE_SIZE - (src - page));
|
|
|
|
base_lsn= buff->lsn;
|
|
body_len= MY_MIN(page_rest, buff->record_length);
|
|
}
|
|
else
|
|
{
|
|
uint grp_no, curr;
|
|
uint header_to_skip;
|
|
uint16 page_rest;
|
|
|
|
DBUG_PRINT("info", ("multi-group"));
|
|
grp_no= buff->groups_no= uint2korr(src + 2);
|
|
if (!(buff->groups=
|
|
(TRANSLOG_GROUP*) my_malloc(PSI_INSTRUMENT_ME, sizeof(TRANSLOG_GROUP) * grp_no,
|
|
MYF(0))))
|
|
DBUG_RETURN(RECHEADER_READ_ERROR);
|
|
DBUG_PRINT("info", ("Groups: %u", (uint) grp_no));
|
|
src+= (2 + 2);
|
|
page_rest= (uint16) (TRANSLOG_PAGE_SIZE - (src - page));
|
|
curr= 0;
|
|
header_to_skip= (uint) (src - (page + page_offset));
|
|
buff->chunk0_pages= 0;
|
|
|
|
for (;;)
|
|
{
|
|
uint i, read_length= grp_no;
|
|
|
|
buff->chunk0_pages++;
|
|
if (page_rest < grp_no * (7 + 1))
|
|
read_length= page_rest / (7 + 1);
|
|
DBUG_PRINT("info", ("Read chunk0 page#%u read: %u left: %u "
|
|
"start from: %u",
|
|
buff->chunk0_pages, read_length, grp_no, curr));
|
|
for (i= 0; i < read_length; i++, curr++)
|
|
{
|
|
DBUG_ASSERT(curr < buff->groups_no);
|
|
buff->groups[curr].addr= lsn_korr(src + i * (7 + 1));
|
|
buff->groups[curr].num= src[i * (7 + 1) + 7];
|
|
DBUG_PRINT("info", ("group #%u " LSN_FMT " chunks: %u",
|
|
curr,
|
|
LSN_IN_PARTS(buff->groups[curr].addr),
|
|
(uint) buff->groups[curr].num));
|
|
}
|
|
grp_no-= read_length;
|
|
if (grp_no == 0)
|
|
{
|
|
if (scanner)
|
|
{
|
|
buff->chunk0_data_addr= scanner->page_addr;
|
|
/* offset increased */
|
|
buff->chunk0_data_addr+= (page_offset + header_to_skip +
|
|
read_length * (7 + 1));
|
|
}
|
|
else
|
|
{
|
|
buff->chunk0_data_addr= buff->lsn;
|
|
/* offset increased */
|
|
buff->chunk0_data_addr+= (header_to_skip + read_length * (7 + 1));
|
|
}
|
|
buff->chunk0_data_len= chunk_len - 2 - read_length * (7 + 1);
|
|
DBUG_PRINT("info", ("Data address: " LSN_FMT " len: %u",
|
|
LSN_IN_PARTS(buff->chunk0_data_addr),
|
|
buff->chunk0_data_len));
|
|
break;
|
|
}
|
|
if (scanner == NULL)
|
|
{
|
|
DBUG_PRINT("info", ("use internal scanner for header reading"));
|
|
scanner= &internal_scanner;
|
|
if (translog_scanner_init(buff->lsn, 1, scanner, 0))
|
|
{
|
|
rc= RECHEADER_READ_ERROR;
|
|
goto exit_and_free;
|
|
}
|
|
}
|
|
if (translog_get_next_chunk(scanner))
|
|
{
|
|
if (scanner == &internal_scanner)
|
|
translog_destroy_scanner(scanner);
|
|
rc= RECHEADER_READ_ERROR;
|
|
goto exit_and_free;
|
|
}
|
|
if (scanner->page == END_OF_LOG)
|
|
{
|
|
if (scanner == &internal_scanner)
|
|
translog_destroy_scanner(scanner);
|
|
rc= RECHEADER_READ_EOF;
|
|
goto exit_and_free;
|
|
}
|
|
page= scanner->page;
|
|
page_offset= scanner->page_offset;
|
|
src= page + page_offset + header_to_skip;
|
|
chunk_len= uint2korr(src - 2 - 2);
|
|
DBUG_PRINT("info", ("Chunk len: %u", (uint) chunk_len));
|
|
page_rest= (uint16) (TRANSLOG_PAGE_SIZE - (src - page));
|
|
}
|
|
|
|
if (scanner == NULL)
|
|
{
|
|
DBUG_PRINT("info", ("use internal scanner"));
|
|
scanner= &internal_scanner;
|
|
}
|
|
else
|
|
{
|
|
translog_destroy_scanner(scanner);
|
|
}
|
|
base_lsn= buff->groups[0].addr;
|
|
translog_scanner_init(base_lsn, 1, scanner, scanner == &internal_scanner);
|
|
/* first group chunk is always chunk type 2 */
|
|
page= scanner->page;
|
|
page_offset= scanner->page_offset;
|
|
src= page + page_offset + 1;
|
|
page_rest= (uint16) (TRANSLOG_PAGE_SIZE - (src - page));
|
|
body_len= page_rest;
|
|
if (scanner == &internal_scanner)
|
|
translog_destroy_scanner(scanner);
|
|
}
|
|
if (lsns)
|
|
{
|
|
uchar *start= src;
|
|
src= translog_relative_LSN_decode(base_lsn, src, dst, lsns);
|
|
lsns*= LSN_STORE_SIZE;
|
|
dst+= lsns;
|
|
length-= lsns;
|
|
buff->record_length+= (buff->compressed_LSN_economy=
|
|
(int) (lsns - (src - start)));
|
|
DBUG_PRINT("info", ("lsns: %u length: %u economy: %d new length: %lu",
|
|
lsns / LSN_STORE_SIZE, (uint) length,
|
|
(int) buff->compressed_LSN_economy,
|
|
(ulong) buff->record_length));
|
|
body_len-= (uint16) (src - start);
|
|
}
|
|
else
|
|
buff->compressed_LSN_economy= 0;
|
|
|
|
DBUG_ASSERT(body_len >= length);
|
|
body_len-= length;
|
|
memcpy(dst, src, length);
|
|
buff->non_header_data_start_offset= (uint16) (src + length - page);
|
|
buff->non_header_data_len= body_len;
|
|
DBUG_PRINT("info", ("non_header_data_start_offset: %u len: %u buffer: %u",
|
|
buff->non_header_data_start_offset,
|
|
buff->non_header_data_len, buffer_length));
|
|
DBUG_RETURN(buffer_length);
|
|
|
|
exit_and_free:
|
|
my_free(buff->groups);
|
|
buff->groups_no= 0; /* prevent try to use of buff->groups */
|
|
DBUG_RETURN(rc);
|
|
}
|
|
|
|
|
|
/**
|
|
@brief Read record header from the given buffer
|
|
|
|
@param page page content buffer
|
|
@param page_offset offset of the chunk in the page
|
|
@param buff destination buffer
|
|
@param scanner If this is set the scanner will be moved to the
|
|
record header page (differ from LSN page in case of
|
|
multi-group records)
|
|
|
|
@return Length of header or operation status
|
|
@retval RECHEADER_READ_ERROR error
|
|
@retval # number of bytes in
|
|
TRANSLOG_HEADER_BUFFER::header where
|
|
stored decoded part of the header
|
|
*/
|
|
|
|
int translog_read_record_header_from_buffer(uchar *page,
|
|
uint16 page_offset,
|
|
TRANSLOG_HEADER_BUFFER *buff,
|
|
TRANSLOG_SCANNER_DATA *scanner)
|
|
{
|
|
translog_size_t res;
|
|
DBUG_ENTER("translog_read_record_header_from_buffer");
|
|
DBUG_PRINT("info", ("page byte: 0x%x offset: %u",
|
|
(uint) page[page_offset], (uint) page_offset));
|
|
DBUG_ASSERT(translog_is_LSN_chunk(page[page_offset]));
|
|
DBUG_ASSERT(translog_status == TRANSLOG_OK ||
|
|
translog_status == TRANSLOG_READONLY);
|
|
buff->type= (page[page_offset] & TRANSLOG_REC_TYPE);
|
|
buff->short_trid= uint2korr(page + page_offset + 1);
|
|
DBUG_PRINT("info", ("Type %u, Short TrID %u, LSN " LSN_FMT,
|
|
(uint) buff->type, (uint)buff->short_trid,
|
|
LSN_IN_PARTS(buff->lsn)));
|
|
/* Read required bytes from the header and call hook */
|
|
switch (log_record_type_descriptor[buff->type].rclass) {
|
|
case LOGRECTYPE_VARIABLE_LENGTH:
|
|
res= translog_variable_length_header(page, page_offset, buff,
|
|
scanner);
|
|
break;
|
|
case LOGRECTYPE_PSEUDOFIXEDLENGTH:
|
|
case LOGRECTYPE_FIXEDLENGTH:
|
|
res= translog_fixed_length_header(page, page_offset, buff);
|
|
break;
|
|
default:
|
|
DBUG_ASSERT(0); /* we read some junk (got no LSN) */
|
|
res= RECHEADER_READ_ERROR;
|
|
}
|
|
DBUG_RETURN(res);
|
|
}
|
|
|
|
|
|
/**
|
|
@brief Read record header and some fixed part of a record (the part depend
|
|
on record type).
|
|
|
|
@param lsn log record serial number (address of the record)
|
|
@param buff log record header buffer
|
|
|
|
@note Some type of record can be read completely by this call
|
|
@note "Decoded" header stored in TRANSLOG_HEADER_BUFFER::header (relative
|
|
LSN can be translated to absolute one), some fields can be added (like
|
|
actual header length in the record if the header has variable length)
|
|
|
|
@return Length of header or operation status
|
|
@retval RECHEADER_READ_ERROR error
|
|
@retval # number of bytes in
|
|
TRANSLOG_HEADER_BUFFER::header where
|
|
stored decoded part of the header
|
|
*/
|
|
|
|
int translog_read_record_header(LSN lsn, TRANSLOG_HEADER_BUFFER *buff)
|
|
{
|
|
TRANSLOG_PAGE_SIZE_BUFF psize_buff;
|
|
uchar *page;
|
|
translog_size_t res, page_offset= LSN_OFFSET(lsn) % TRANSLOG_PAGE_SIZE;
|
|
PAGECACHE_BLOCK_LINK *direct_link;
|
|
TRANSLOG_ADDRESS addr;
|
|
TRANSLOG_VALIDATOR_DATA data;
|
|
DBUG_ENTER("translog_read_record_header");
|
|
DBUG_PRINT("enter", ("LSN: " LSN_FMT, LSN_IN_PARTS(lsn)));
|
|
DBUG_ASSERT(LSN_OFFSET(lsn) % TRANSLOG_PAGE_SIZE != 0);
|
|
DBUG_ASSERT(translog_status == TRANSLOG_OK ||
|
|
translog_status == TRANSLOG_READONLY);
|
|
|
|
buff->lsn= lsn;
|
|
buff->groups_no= 0;
|
|
data.addr= &addr;
|
|
data.was_recovered= 0;
|
|
addr= lsn;
|
|
addr-= page_offset; /* offset decreasing */
|
|
res= (!(page= translog_get_page(&data, psize_buff.buffer, &direct_link))) ?
|
|
RECHEADER_READ_ERROR :
|
|
translog_read_record_header_from_buffer(page, page_offset, buff, 0);
|
|
translog_free_link(direct_link);
|
|
DBUG_RETURN(res);
|
|
}
|
|
|
|
|
|
/**
|
|
@brief Read record header and some fixed part of a record (the part depend
|
|
on record type).
|
|
|
|
@param scan scanner position to read
|
|
@param buff log record header buffer
|
|
@param move_scanner request to move scanner to the header position
|
|
|
|
@note Some type of record can be read completely by this call
|
|
@note "Decoded" header stored in TRANSLOG_HEADER_BUFFER::header (relative
|
|
LSN can be translated to absolute one), some fields can be added (like
|
|
actual header length in the record if the header has variable length)
|
|
|
|
@return Length of header or operation status
|
|
@retval RECHEADER_READ_ERROR error
|
|
@retval # number of bytes in
|
|
TRANSLOG_HEADER_BUFFER::header where stored
|
|
decoded part of the header
|
|
*/
|
|
|
|
int translog_read_record_header_scan(TRANSLOG_SCANNER_DATA *scanner,
|
|
TRANSLOG_HEADER_BUFFER *buff,
|
|
my_bool move_scanner)
|
|
{
|
|
translog_size_t res;
|
|
DBUG_ENTER("translog_read_record_header_scan");
|
|
DBUG_PRINT("enter", ("Scanner: Cur: " LSN_FMT " Hrz: " LSN_FMT " "
|
|
"Lst: " LSN_FMT " Offset: %u(%x) fixed %d",
|
|
LSN_IN_PARTS(scanner->page_addr),
|
|
LSN_IN_PARTS(scanner->horizon),
|
|
LSN_IN_PARTS(scanner->last_file_page),
|
|
(uint) scanner->page_offset,
|
|
(uint) scanner->page_offset, scanner->fixed_horizon));
|
|
DBUG_ASSERT(translog_status == TRANSLOG_OK ||
|
|
translog_status == TRANSLOG_READONLY);
|
|
buff->groups_no= 0;
|
|
buff->lsn= scanner->page_addr;
|
|
buff->lsn+= scanner->page_offset; /* offset increasing */
|
|
res= translog_read_record_header_from_buffer(scanner->page,
|
|
scanner->page_offset,
|
|
buff,
|
|
(move_scanner ?
|
|
scanner : 0));
|
|
DBUG_RETURN(res);
|
|
}
|
|
|
|
|
|
/**
|
|
@brief Read record header and some fixed part of the next record (the part
|
|
depend on record type).
|
|
|
|
@param scanner data for scanning if lsn is NULL scanner data
|
|
will be used for continue scanning.
|
|
The scanner can be NULL.
|
|
|
|
@param buff log record header buffer
|
|
|
|
@return Length of header or operation status
|
|
@retval RECHEADER_READ_ERROR error
|
|
@retval RECHEADER_READ_EOF EOF
|
|
@retval # number of bytes in
|
|
TRANSLOG_HEADER_BUFFER::header where
|
|
stored decoded part of the header
|
|
*/
|
|
|
|
int translog_read_next_record_header(TRANSLOG_SCANNER_DATA *scanner,
|
|
TRANSLOG_HEADER_BUFFER *buff)
|
|
{
|
|
translog_size_t res;
|
|
|
|
DBUG_ENTER("translog_read_next_record_header");
|
|
buff->groups_no= 0; /* to be sure that we will free it right */
|
|
DBUG_PRINT("enter", ("scanner: %p", scanner));
|
|
DBUG_PRINT("info", ("Scanner: Cur: " LSN_FMT " Hrz: " LSN_FMT " "
|
|
"Lst: " LSN_FMT " Offset: %u(%x) fixed: %d",
|
|
LSN_IN_PARTS(scanner->page_addr),
|
|
LSN_IN_PARTS(scanner->horizon),
|
|
LSN_IN_PARTS(scanner->last_file_page),
|
|
(uint) scanner->page_offset,
|
|
(uint) scanner->page_offset, scanner->fixed_horizon));
|
|
DBUG_ASSERT(translog_status == TRANSLOG_OK ||
|
|
translog_status == TRANSLOG_READONLY);
|
|
|
|
do
|
|
{
|
|
if (translog_get_next_chunk(scanner))
|
|
DBUG_RETURN(RECHEADER_READ_ERROR);
|
|
if (scanner->page == END_OF_LOG)
|
|
{
|
|
DBUG_PRINT("info", ("End of file from the scanner"));
|
|
/* Last record was read */
|
|
buff->lsn= LSN_IMPOSSIBLE;
|
|
DBUG_RETURN(RECHEADER_READ_EOF);
|
|
}
|
|
DBUG_PRINT("info", ("Page: " LSN_FMT " offset: %lu byte: %x",
|
|
LSN_IN_PARTS(scanner->page_addr),
|
|
(ulong) scanner->page_offset,
|
|
(uint) scanner->page[scanner->page_offset]));
|
|
} while (!translog_is_LSN_chunk(scanner->page[scanner->page_offset]) &&
|
|
scanner->page[scanner->page_offset] != TRANSLOG_FILLER);
|
|
|
|
if (scanner->page[scanner->page_offset] == TRANSLOG_FILLER)
|
|
{
|
|
DBUG_PRINT("info", ("End of file"));
|
|
/* Last record was read */
|
|
buff->lsn= LSN_IMPOSSIBLE;
|
|
/* Return 'end of log' marker */
|
|
res= RECHEADER_READ_EOF;
|
|
}
|
|
else
|
|
res= translog_read_record_header_scan(scanner, buff, 0);
|
|
DBUG_RETURN(res);
|
|
}
|
|
|
|
|
|
/*
|
|
Moves record data reader to the next chunk and fill the data reader
|
|
information about that chunk.
|
|
|
|
SYNOPSIS
|
|
translog_record_read_next_chunk()
|
|
data data cursor
|
|
|
|
RETURN
|
|
0 OK
|
|
1 Error
|
|
*/
|
|
|
|
static my_bool translog_record_read_next_chunk(TRANSLOG_READER_DATA *data)
|
|
{
|
|
translog_size_t new_current_offset= data->current_offset + data->chunk_size;
|
|
uint16 chunk_header_len, chunk_len;
|
|
uint8 type;
|
|
DBUG_ENTER("translog_record_read_next_chunk");
|
|
|
|
if (data->eor)
|
|
{
|
|
DBUG_PRINT("info", ("end of the record flag set"));
|
|
DBUG_RETURN(1);
|
|
}
|
|
|
|
if (data->header.groups_no &&
|
|
data->header.groups_no - 1 != data->current_group &&
|
|
data->header.groups[data->current_group].num == data->current_chunk)
|
|
{
|
|
/* Goto next group */
|
|
data->current_group++;
|
|
data->current_chunk= 0;
|
|
DBUG_PRINT("info", ("skip to group: #%u", data->current_group));
|
|
translog_destroy_scanner(&data->scanner);
|
|
translog_scanner_init(data->header.groups[data->current_group].addr,
|
|
1, &data->scanner, 1);
|
|
}
|
|
else
|
|
{
|
|
data->current_chunk++;
|
|
if (translog_get_next_chunk(&data->scanner))
|
|
DBUG_RETURN(1);
|
|
if (data->scanner.page == END_OF_LOG)
|
|
{
|
|
/*
|
|
Actually it should not happened, but we want to quit nicely in case
|
|
of a truncated log
|
|
*/
|
|
DBUG_RETURN(1);
|
|
}
|
|
}
|
|
type= data->scanner.page[data->scanner.page_offset] & TRANSLOG_CHUNK_TYPE;
|
|
|
|
if (type == TRANSLOG_CHUNK_LSN && data->header.groups_no)
|
|
{
|
|
DBUG_PRINT("info",
|
|
("Last chunk: data len: %u offset: %u group: %u of %u",
|
|
data->header.chunk0_data_len, data->scanner.page_offset,
|
|
data->current_group, data->header.groups_no - 1));
|
|
DBUG_ASSERT(data->header.groups_no - 1 == data->current_group);
|
|
DBUG_ASSERT(data->header.lsn ==
|
|
data->scanner.page_addr + data->scanner.page_offset);
|
|
translog_destroy_scanner(&data->scanner);
|
|
translog_scanner_init(data->header.chunk0_data_addr, 1, &data->scanner, 1);
|
|
data->chunk_size= data->header.chunk0_data_len;
|
|
data->body_offset= data->scanner.page_offset;
|
|
data->current_offset= new_current_offset;
|
|
data->eor= 1;
|
|
DBUG_RETURN(0);
|
|
}
|
|
|
|
if (type == TRANSLOG_CHUNK_LSN || type == TRANSLOG_CHUNK_FIXED)
|
|
{
|
|
data->eor= 1;
|
|
DBUG_RETURN(1); /* End of record */
|
|
}
|
|
|
|
chunk_header_len=
|
|
translog_get_chunk_header_length(data->scanner.page +
|
|
data->scanner.page_offset);
|
|
chunk_len= translog_get_total_chunk_length(data->scanner.page,
|
|
data->scanner.page_offset);
|
|
data->chunk_size= chunk_len - chunk_header_len;
|
|
data->body_offset= data->scanner.page_offset + chunk_header_len;
|
|
data->current_offset= new_current_offset;
|
|
DBUG_PRINT("info", ("grp: %u chunk: %u body_offset: %u chunk_size: %u "
|
|
"current_offset: %lu",
|
|
(uint) data->current_group,
|
|
(uint) data->current_chunk,
|
|
(uint) data->body_offset,
|
|
(uint) data->chunk_size, (ulong) data->current_offset));
|
|
DBUG_RETURN(0);
|
|
}
|
|
|
|
|
|
/*
|
|
Initialize record reader data from LSN
|
|
|
|
SYNOPSIS
|
|
translog_init_reader_data()
|
|
lsn reference to LSN we should start from
|
|
data reader data to initialize
|
|
|
|
RETURN
|
|
0 OK
|
|
1 Error
|
|
*/
|
|
|
|
static my_bool translog_init_reader_data(LSN lsn,
|
|
TRANSLOG_READER_DATA *data)
|
|
{
|
|
int read_header;
|
|
DBUG_ENTER("translog_init_reader_data");
|
|
if (translog_scanner_init(lsn, 1, &data->scanner, 1) ||
|
|
((read_header=
|
|
translog_read_record_header_scan(&data->scanner, &data->header, 1))
|
|
== RECHEADER_READ_ERROR))
|
|
DBUG_RETURN(1);
|
|
data->read_header= read_header;
|
|
data->body_offset= data->header.non_header_data_start_offset;
|
|
data->chunk_size= data->header.non_header_data_len;
|
|
data->current_offset= data->read_header;
|
|
data->current_group= 0;
|
|
data->current_chunk= 0;
|
|
data->eor= 0;
|
|
DBUG_PRINT("info", ("read_header: %u "
|
|
"body_offset: %u chunk_size: %u current_offset: %lu",
|
|
(uint) data->read_header,
|
|
(uint) data->body_offset,
|
|
(uint) data->chunk_size, (ulong) data->current_offset));
|
|
DBUG_RETURN(0);
|
|
}
|
|
|
|
|
|
/**
|
|
@brief Destroy reader data object
|
|
*/
|
|
|
|
static void translog_destroy_reader_data(TRANSLOG_READER_DATA *data)
|
|
{
|
|
translog_destroy_scanner(&data->scanner);
|
|
translog_free_record_header(&data->header);
|
|
}
|
|
|
|
|
|
/*
|
|
Read a part of the record.
|
|
|
|
SYNOPSIS
|
|
translog_read_record_header()
|
|
lsn log record serial number (address of the record)
|
|
offset From the beginning of the record beginning (read
|
|
by translog_read_record_header).
|
|
length Length of record part which have to be read.
|
|
buffer Buffer where to read the record part (have to be at
|
|
least 'length' bytes length)
|
|
|
|
RETURN
|
|
length of data actually read
|
|
*/
|
|
|
|
translog_size_t translog_read_record(LSN lsn,
|
|
translog_size_t offset,
|
|
translog_size_t length,
|
|
uchar *buffer,
|
|
TRANSLOG_READER_DATA *data)
|
|
{
|
|
translog_size_t requested_length= length;
|
|
translog_size_t end= offset + length;
|
|
TRANSLOG_READER_DATA internal_data;
|
|
DBUG_ENTER("translog_read_record");
|
|
DBUG_ASSERT(translog_status == TRANSLOG_OK ||
|
|
translog_status == TRANSLOG_READONLY);
|
|
|
|
if (data == NULL)
|
|
{
|
|
DBUG_ASSERT(lsn != LSN_IMPOSSIBLE);
|
|
data= &internal_data;
|
|
}
|
|
if (lsn ||
|
|
(offset < data->current_offset &&
|
|
!(offset < data->read_header && offset + length < data->read_header)))
|
|
{
|
|
if (translog_init_reader_data(lsn, data))
|
|
DBUG_RETURN(0);
|
|
}
|
|
DBUG_PRINT("info", ("Offset: %lu length: %lu "
|
|
"Scanner: Cur: " LSN_FMT " Hrz: " LSN_FMT " "
|
|
"Lst: " LSN_FMT " Offset: %u(%x) fixed: %d",
|
|
(ulong) offset, (ulong) length,
|
|
LSN_IN_PARTS(data->scanner.page_addr),
|
|
LSN_IN_PARTS(data->scanner.horizon),
|
|
LSN_IN_PARTS(data->scanner.last_file_page),
|
|
(uint) data->scanner.page_offset,
|
|
(uint) data->scanner.page_offset,
|
|
data->scanner.fixed_horizon));
|
|
if (offset < data->read_header)
|
|
{
|
|
uint16 len= MY_MIN(data->read_header, end) - offset;
|
|
DBUG_PRINT("info",
|
|
("enter header offset: %lu length: %lu",
|
|
(ulong) offset, (ulong) length));
|
|
memcpy(buffer, data->header.header + offset, len);
|
|
length-= len;
|
|
if (length == 0)
|
|
{
|
|
translog_destroy_reader_data(data);
|
|
DBUG_RETURN(requested_length);
|
|
}
|
|
offset+= len;
|
|
buffer+= len;
|
|
DBUG_PRINT("info",
|
|
("len: %u offset: %lu curr: %lu length: %lu",
|
|
len, (ulong) offset, (ulong) data->current_offset,
|
|
(ulong) length));
|
|
}
|
|
/* TODO: find first page which we should read by offset */
|
|
|
|
/* read the record chunk by chunk */
|
|
for(;;)
|
|
{
|
|
uint page_end= data->current_offset + data->chunk_size;
|
|
DBUG_PRINT("info",
|
|
("enter body offset: %lu curr: %lu "
|
|
"length: %lu page_end: %lu",
|
|
(ulong) offset, (ulong) data->current_offset, (ulong) length,
|
|
(ulong) page_end));
|
|
if (offset < page_end)
|
|
{
|
|
uint len= page_end - offset;
|
|
set_if_smaller(len, length); /* in case we read beyond record's end */
|
|
DBUG_ASSERT(offset >= data->current_offset);
|
|
memcpy(buffer,
|
|
data->scanner.page + data->body_offset +
|
|
(offset - data->current_offset), len);
|
|
length-= len;
|
|
if (length == 0)
|
|
{
|
|
translog_destroy_reader_data(data);
|
|
DBUG_RETURN(requested_length);
|
|
}
|
|
offset+= len;
|
|
buffer+= len;
|
|
DBUG_PRINT("info",
|
|
("len: %u offset: %lu curr: %lu length: %lu",
|
|
len, (ulong) offset, (ulong) data->current_offset,
|
|
(ulong) length));
|
|
}
|
|
if (translog_record_read_next_chunk(data))
|
|
{
|
|
translog_destroy_reader_data(data);
|
|
DBUG_RETURN(requested_length - length);
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
@brief Force skipping to the next buffer
|
|
|
|
@todo Do not copy old page content if all page protections are switched off
|
|
(because we do not need calculate something or change old parts of the page)
|
|
*/
|
|
|
|
static void translog_force_current_buffer_to_finish()
|
|
{
|
|
TRANSLOG_ADDRESS new_buff_beginning;
|
|
uint16 old_buffer_no= log_descriptor.bc.buffer_no;
|
|
uint16 new_buffer_no= (old_buffer_no + 1) % TRANSLOG_BUFFERS_NO;
|
|
struct st_translog_buffer *new_buffer= (log_descriptor.buffers +
|
|
new_buffer_no);
|
|
struct st_translog_buffer *old_buffer= log_descriptor.bc.buffer;
|
|
uchar *data= log_descriptor.bc.ptr - log_descriptor.bc.current_page_fill;
|
|
uint16 left= TRANSLOG_PAGE_SIZE - log_descriptor.bc.current_page_fill;
|
|
uint16 UNINIT_VAR(current_page_fill), write_counter, previous_offset;
|
|
DBUG_ENTER("translog_force_current_buffer_to_finish");
|
|
|
|
DBUG_PRINT("enter", ("Buffer #%u %p "
|
|
"Buffer addr: " LSN_FMT " "
|
|
"Page addr: " LSN_FMT " "
|
|
"size: %lu (%lu) Pg: %u left: %u in progress %u",
|
|
(uint) old_buffer_no,
|
|
old_buffer,
|
|
LSN_IN_PARTS(old_buffer->offset),
|
|
LSN_FILE_NO(log_descriptor.horizon),
|
|
(uint)(LSN_OFFSET(log_descriptor.horizon) -
|
|
log_descriptor.bc.current_page_fill),
|
|
(ulong) old_buffer->size,
|
|
(ulong) (log_descriptor.bc.ptr -log_descriptor.bc.
|
|
buffer->buffer),
|
|
(uint) log_descriptor.bc.current_page_fill,
|
|
(uint) left,
|
|
(uint) old_buffer->
|
|
copy_to_buffer_in_progress));
|
|
translog_lock_assert_owner();
|
|
new_buff_beginning= old_buffer->offset;
|
|
new_buff_beginning+= old_buffer->size; /* increase offset */
|
|
|
|
DBUG_ASSERT(log_descriptor.bc.ptr !=NULL);
|
|
DBUG_ASSERT(LSN_FILE_NO(log_descriptor.horizon) ==
|
|
LSN_FILE_NO(old_buffer->offset) ||
|
|
translog_status == TRANSLOG_READONLY );
|
|
translog_check_cursor(&log_descriptor.bc);
|
|
DBUG_ASSERT(left < TRANSLOG_PAGE_SIZE);
|
|
if (left)
|
|
{
|
|
/*
|
|
TODO: if 'left' is so small that can't hold any other record
|
|
then do not move the page
|
|
*/
|
|
DBUG_PRINT("info", ("left: %u", (uint) left));
|
|
|
|
old_buffer->pre_force_close_horizon=
|
|
old_buffer->offset + old_buffer->size;
|
|
/* decrease offset */
|
|
new_buff_beginning-= log_descriptor.bc.current_page_fill;
|
|
current_page_fill= log_descriptor.bc.current_page_fill;
|
|
|
|
memset(log_descriptor.bc.ptr, TRANSLOG_FILLER, left);
|
|
old_buffer->size+= left;
|
|
DBUG_PRINT("info", ("Finish Page buffer #%u: %p "
|
|
"Size: %lu",
|
|
(uint) old_buffer->buffer_no,
|
|
old_buffer,
|
|
(ulong) old_buffer->size));
|
|
DBUG_ASSERT(old_buffer->buffer_no ==
|
|
log_descriptor.bc.buffer_no);
|
|
}
|
|
else
|
|
{
|
|
log_descriptor.bc.current_page_fill= 0;
|
|
}
|
|
|
|
translog_buffer_lock(new_buffer);
|
|
#ifndef DBUG_OFF
|
|
{
|
|
TRANSLOG_ADDRESS offset= new_buffer->offset;
|
|
TRANSLOG_FILE *file= new_buffer->file;
|
|
uint8 ver= new_buffer->ver;
|
|
translog_lock_assert_owner();
|
|
#endif
|
|
translog_wait_for_buffer_free(new_buffer);
|
|
#ifndef DBUG_OFF
|
|
/* We keep the handler locked so nobody can start this new buffer */
|
|
DBUG_ASSERT(offset == new_buffer->offset && new_buffer->file == NULL &&
|
|
(file == NULL ? ver : (uint8)(ver + 1)) == new_buffer->ver);
|
|
}
|
|
#endif
|
|
|
|
write_counter= log_descriptor.bc.write_counter;
|
|
previous_offset= log_descriptor.bc.previous_offset;
|
|
translog_start_buffer(new_buffer, &log_descriptor.bc, new_buffer_no);
|
|
/* Fix buffer offset (which was incorrectly set to horizon) */
|
|
log_descriptor.bc.buffer->offset= new_buff_beginning;
|
|
log_descriptor.bc.write_counter= write_counter;
|
|
log_descriptor.bc.previous_offset= previous_offset;
|
|
new_buffer->prev_last_lsn= BUFFER_MAX_LSN(old_buffer);
|
|
DBUG_PRINT("info", ("prev_last_lsn set to " LSN_FMT " buffer: %p",
|
|
LSN_IN_PARTS(new_buffer->prev_last_lsn),
|
|
new_buffer));
|
|
|
|
/*
|
|
Advances this log pointer, increases writers and let other threads to
|
|
write to the log while we process old page content
|
|
*/
|
|
if (left)
|
|
{
|
|
log_descriptor.bc.ptr+= current_page_fill;
|
|
log_descriptor.bc.buffer->size= log_descriptor.bc.current_page_fill=
|
|
current_page_fill;
|
|
new_buffer->overlay= 1;
|
|
}
|
|
else
|
|
translog_new_page_header(&log_descriptor.horizon, &log_descriptor.bc);
|
|
translog_buffer_increase_writers(new_buffer);
|
|
translog_buffer_unlock(new_buffer);
|
|
|
|
/*
|
|
We have to wait until all writers finish before start changing the
|
|
pages by applying protection and copying the page content in the
|
|
new buffer.
|
|
*/
|
|
#ifndef DBUG_OFF
|
|
{
|
|
TRANSLOG_ADDRESS offset= old_buffer->offset;
|
|
TRANSLOG_FILE *file= old_buffer->file;
|
|
uint8 ver= old_buffer->ver;
|
|
#endif
|
|
/*
|
|
Now only one thread can flush log (buffer can flush many threads but
|
|
log flush log flush where this function is used can do only one thread)
|
|
so no other thread can set is_closing_buffer.
|
|
*/
|
|
DBUG_ASSERT(!old_buffer->is_closing_buffer);
|
|
old_buffer->is_closing_buffer= 1; /* Other flushes will wait */
|
|
DBUG_PRINT("enter", ("Buffer #%u %p is_closing_buffer set",
|
|
(uint) old_buffer->buffer_no, old_buffer));
|
|
translog_wait_for_writers(old_buffer);
|
|
#ifndef DBUG_OFF
|
|
/* We blocked flushing this buffer so the buffer should not changed */
|
|
DBUG_ASSERT(offset == old_buffer->offset && file == old_buffer->file &&
|
|
ver == old_buffer->ver);
|
|
}
|
|
#endif
|
|
|
|
if (log_descriptor.flags & TRANSLOG_SECTOR_PROTECTION)
|
|
{
|
|
translog_put_sector_protection(data, &log_descriptor.bc);
|
|
if (left)
|
|
{
|
|
log_descriptor.bc.write_counter++;
|
|
log_descriptor.bc.previous_offset= current_page_fill;
|
|
}
|
|
else
|
|
{
|
|
DBUG_PRINT("info", ("drop write_counter"));
|
|
log_descriptor.bc.write_counter= 0;
|
|
log_descriptor.bc.previous_offset= 0;
|
|
}
|
|
}
|
|
|
|
if (log_descriptor.flags & TRANSLOG_PAGE_CRC)
|
|
{
|
|
uint32 crc= translog_crc(data + log_descriptor.page_overhead,
|
|
TRANSLOG_PAGE_SIZE -
|
|
log_descriptor.page_overhead);
|
|
DBUG_PRINT("info", ("CRC: 0x%x", crc));
|
|
int4store(data + 3 + 3 + 1, crc);
|
|
}
|
|
old_buffer->is_closing_buffer= 0;
|
|
DBUG_PRINT("enter", ("Buffer #%u %p is_closing_buffer cleared",
|
|
(uint) old_buffer->buffer_no, old_buffer));
|
|
mysql_cond_broadcast(&old_buffer->waiting_filling_buffer);
|
|
|
|
if (left)
|
|
{
|
|
if (log_descriptor.flags &
|
|
(TRANSLOG_PAGE_CRC | TRANSLOG_SECTOR_PROTECTION))
|
|
memcpy(new_buffer->buffer, data, current_page_fill);
|
|
else
|
|
{
|
|
/*
|
|
This page header does not change if we add more data to the page so
|
|
we can not copy it and will not overwrite later
|
|
*/
|
|
new_buffer->skipped_data= current_page_fill;
|
|
TRASH_ALLOC(new_buffer->buffer, current_page_fill);
|
|
DBUG_ASSERT(new_buffer->skipped_data < TRANSLOG_PAGE_SIZE);
|
|
}
|
|
}
|
|
old_buffer->next_buffer_offset= new_buffer->offset;
|
|
translog_buffer_lock(new_buffer);
|
|
new_buffer->prev_buffer_offset= old_buffer->offset;
|
|
translog_buffer_decrease_writers(new_buffer);
|
|
translog_buffer_unlock(new_buffer);
|
|
|
|
DBUG_VOID_RETURN;
|
|
}
|
|
|
|
|
|
/**
|
|
@brief Waits while given lsn will be flushed
|
|
|
|
@param lsn log record serial number up to which (inclusive)
|
|
the log has to be flushed
|
|
*/
|
|
|
|
void translog_flush_wait_for_end(LSN lsn)
|
|
{
|
|
DBUG_ENTER("translog_flush_wait_for_end");
|
|
DBUG_PRINT("enter", ("LSN: " LSN_FMT, LSN_IN_PARTS(lsn)));
|
|
mysql_mutex_assert_owner(&log_descriptor.log_flush_lock);
|
|
while (cmp_translog_addr(log_descriptor.flushed, lsn) < 0)
|
|
mysql_cond_wait(&log_descriptor.log_flush_cond,
|
|
&log_descriptor.log_flush_lock);
|
|
DBUG_VOID_RETURN;
|
|
}
|
|
|
|
|
|
/**
|
|
@brief Sets goal for the next flush pass and waits for this pass end.
|
|
|
|
@param lsn log record serial number up to which (inclusive)
|
|
the log has to be flushed
|
|
*/
|
|
|
|
void translog_flush_set_new_goal_and_wait(TRANSLOG_ADDRESS lsn)
|
|
{
|
|
int flush_no= log_descriptor.flush_no;
|
|
DBUG_ENTER("translog_flush_set_new_goal_and_wait");
|
|
DBUG_PRINT("enter", ("LSN: " LSN_FMT, LSN_IN_PARTS(lsn)));
|
|
mysql_mutex_assert_owner(&log_descriptor.log_flush_lock);
|
|
if (cmp_translog_addr(lsn, log_descriptor.next_pass_max_lsn) > 0)
|
|
{
|
|
log_descriptor.next_pass_max_lsn= lsn;
|
|
log_descriptor.max_lsn_requester= pthread_self();
|
|
mysql_cond_broadcast(&log_descriptor.new_goal_cond);
|
|
}
|
|
while (flush_no == log_descriptor.flush_no)
|
|
{
|
|
mysql_cond_wait(&log_descriptor.log_flush_cond,
|
|
&log_descriptor.log_flush_lock);
|
|
}
|
|
DBUG_VOID_RETURN;
|
|
}
|
|
|
|
|
|
/**
|
|
@brief sync() range of files (inclusive) and directory (by request)
|
|
|
|
@param min min internal file number to flush
|
|
@param max max internal file number to flush
|
|
@param sync_dir need sync directory
|
|
|
|
return Operation status
|
|
@retval 0 OK
|
|
@retval 1 Error
|
|
*/
|
|
|
|
static my_bool translog_sync_files(uint32 min, uint32 max,
|
|
my_bool sync_dir)
|
|
{
|
|
uint fn;
|
|
my_bool rc= 0;
|
|
ulonglong flush_interval;
|
|
DBUG_ENTER("translog_sync_files");
|
|
DBUG_PRINT("info", ("min: %lu max: %lu sync dir: %d",
|
|
(ulong) min, (ulong) max, (int) sync_dir));
|
|
DBUG_ASSERT(min <= max);
|
|
|
|
flush_interval= group_commit_wait;
|
|
if (flush_interval)
|
|
flush_start= microsecond_interval_timer();
|
|
for (fn= min; fn <= max; fn++)
|
|
{
|
|
TRANSLOG_FILE *file= get_logfile_by_number(fn);
|
|
DBUG_ASSERT(file != NULL);
|
|
if (!file->is_sync)
|
|
{
|
|
if (mysql_file_sync(file->handler.file, MYF(MY_WME)))
|
|
{
|
|
rc= 1;
|
|
translog_stop_writing();
|
|
DBUG_RETURN(rc);
|
|
}
|
|
translog_syncs++;
|
|
file->is_sync= 1;
|
|
}
|
|
}
|
|
|
|
if (sync_dir)
|
|
{
|
|
if (!(rc= sync_dir(log_descriptor.directory_fd,
|
|
MYF(MY_WME | MY_IGNORE_BADFD))))
|
|
translog_syncs++;
|
|
}
|
|
|
|
DBUG_RETURN(rc);
|
|
}
|
|
|
|
|
|
/**
|
|
check_skipped_lsn
|
|
|
|
Check if lsn skipped in redo is ok
|
|
*/
|
|
|
|
void check_skipped_lsn(MARIA_HA *info, LSN lsn, my_bool index_file,
|
|
pgcache_page_no_t page)
|
|
{
|
|
if (lsn <= log_descriptor.horizon)
|
|
{
|
|
DBUG_PRINT("info", ("Page is up to date, skipping redo"));
|
|
}
|
|
else
|
|
{
|
|
/* Give error, but don't flood the log */
|
|
if (skipped_lsn_err_count++ < MAX_LSN_ERRORS &&
|
|
! info->s->redo_error_given++)
|
|
{
|
|
eprint(tracef, "Table %s has wrong LSN: " LSN_FMT " on page: %llu",
|
|
(index_file ? info->s->data_file_name.str :
|
|
info->s->index_file_name.str),
|
|
LSN_IN_PARTS(lsn), (ulonglong) page);
|
|
recovery_found_crashed_tables++;
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
@brief Flushes buffers with LSNs in them less or equal address <lsn>
|
|
|
|
@param lsn address up to which all LSNs should be flushed,
|
|
can be reset to real last LSN address
|
|
@parem sent_to_disk returns 'sent to disk' position
|
|
@param flush_horizon returns horizon of the flush
|
|
|
|
@note About terminology see comment to translog_flush().
|
|
*/
|
|
|
|
void translog_flush_buffers(TRANSLOG_ADDRESS *lsn,
|
|
TRANSLOG_ADDRESS *sent_to_disk,
|
|
TRANSLOG_ADDRESS *flush_horizon)
|
|
{
|
|
dirty_buffer_mask_t dirty_buffer_mask;
|
|
uint i;
|
|
uint8 UNINIT_VAR(last_buffer_no), start_buffer_no;
|
|
DBUG_ENTER("translog_flush_buffers");
|
|
|
|
/*
|
|
We will recheck information when will lock buffers one by
|
|
one so we can use unprotected read here (this is just for
|
|
speed up buffers processing)
|
|
*/
|
|
dirty_buffer_mask= log_descriptor.dirty_buffer_mask;
|
|
DBUG_PRINT("info", ("Dirty buffer mask: %lx current buffer: %u",
|
|
(ulong) dirty_buffer_mask,
|
|
(uint) log_descriptor.bc.buffer_no));
|
|
for (i= (log_descriptor.bc.buffer_no + 1) % TRANSLOG_BUFFERS_NO;
|
|
i != log_descriptor.bc.buffer_no && !(dirty_buffer_mask & (1 << i));
|
|
i= (i + 1) % TRANSLOG_BUFFERS_NO) {}
|
|
start_buffer_no= i;
|
|
|
|
DBUG_PRINT("info",
|
|
("start from: %u current: %u prev last lsn: " LSN_FMT,
|
|
(uint) start_buffer_no, (uint) log_descriptor.bc.buffer_no,
|
|
LSN_IN_PARTS(log_descriptor.bc.buffer->prev_last_lsn)));
|
|
|
|
/*
|
|
if LSN up to which we have to flush bigger then maximum LSN of previous
|
|
buffer and at least one LSN was saved in the current buffer (last_lsn !=
|
|
LSN_IMPOSSIBLE) then we have to close the current buffer.
|
|
*/
|
|
if (cmp_translog_addr(*lsn, log_descriptor.bc.buffer->prev_last_lsn) > 0 &&
|
|
log_descriptor.bc.buffer->last_lsn != LSN_IMPOSSIBLE)
|
|
{
|
|
struct st_translog_buffer *buffer= log_descriptor.bc.buffer;
|
|
*lsn= log_descriptor.bc.buffer->last_lsn; /* fix lsn if it was horizon */
|
|
DBUG_PRINT("info", ("LSN to flush fixed to last lsn: " LSN_FMT,
|
|
LSN_IN_PARTS(*lsn)));
|
|
last_buffer_no= log_descriptor.bc.buffer_no;
|
|
log_descriptor.is_everything_flushed= 1;
|
|
translog_force_current_buffer_to_finish();
|
|
translog_buffer_unlock(buffer);
|
|
}
|
|
else
|
|
{
|
|
if (log_descriptor.bc.buffer->last_lsn == LSN_IMPOSSIBLE &&
|
|
log_descriptor.bc.buffer->prev_last_lsn == LSN_IMPOSSIBLE)
|
|
{
|
|
DBUG_PRINT("info", ("There is no LSNs yet generated => do nothing"));
|
|
translog_unlock();
|
|
DBUG_VOID_RETURN;
|
|
}
|
|
|
|
/* fix lsn if it was horizon */
|
|
*lsn= log_descriptor.bc.buffer->prev_last_lsn;
|
|
DBUG_PRINT("info", ("LSN to flush fixed to prev last lsn: " LSN_FMT,
|
|
LSN_IN_PARTS(*lsn)));
|
|
last_buffer_no= ((log_descriptor.bc.buffer_no + TRANSLOG_BUFFERS_NO -1) %
|
|
TRANSLOG_BUFFERS_NO);
|
|
translog_unlock();
|
|
}
|
|
/* flush buffers */
|
|
*sent_to_disk= translog_get_sent_to_disk();
|
|
if (cmp_translog_addr(*lsn, *sent_to_disk) > 0)
|
|
{
|
|
|
|
DBUG_PRINT("info", ("Start buffer #: %u last buffer #: %u",
|
|
(uint) start_buffer_no, (uint) last_buffer_no));
|
|
last_buffer_no= (last_buffer_no + 1) % TRANSLOG_BUFFERS_NO;
|
|
i= start_buffer_no;
|
|
do
|
|
{
|
|
struct st_translog_buffer *buffer= log_descriptor.buffers + i;
|
|
translog_buffer_lock(buffer);
|
|
DBUG_PRINT("info", ("Check buffer: %p #: %u "
|
|
"prev last LSN: " LSN_FMT " "
|
|
"last LSN: " LSN_FMT " status: %s",
|
|
buffer,
|
|
(uint) i,
|
|
LSN_IN_PARTS(buffer->prev_last_lsn),
|
|
LSN_IN_PARTS(buffer->last_lsn),
|
|
(buffer->file ?
|
|
"dirty" : "closed")));
|
|
if (buffer->prev_last_lsn <= *lsn &&
|
|
buffer->file != NULL)
|
|
{
|
|
DBUG_ASSERT(*flush_horizon <= buffer->offset + buffer->size);
|
|
*flush_horizon= (buffer->pre_force_close_horizon != LSN_IMPOSSIBLE ?
|
|
buffer->pre_force_close_horizon :
|
|
buffer->offset + buffer->size);
|
|
/* pre_force_close_horizon is reset during new buffer start */
|
|
DBUG_PRINT("info", ("flush_horizon: " LSN_FMT,
|
|
LSN_IN_PARTS(*flush_horizon)));
|
|
DBUG_ASSERT(*flush_horizon <= log_descriptor.horizon);
|
|
|
|
translog_buffer_flush(buffer);
|
|
}
|
|
translog_buffer_unlock(buffer);
|
|
i= (i + 1) % TRANSLOG_BUFFERS_NO;
|
|
} while (i != last_buffer_no);
|
|
*sent_to_disk= translog_get_sent_to_disk();
|
|
}
|
|
|
|
DBUG_VOID_RETURN;
|
|
}
|
|
|
|
/**
|
|
@brief Flush the log up to given LSN (included)
|
|
|
|
@param lsn log record serial number up to which (inclusive)
|
|
the log has to be flushed
|
|
|
|
@return Operation status
|
|
@retval 0 OK
|
|
@retval 1 Error
|
|
|
|
@note
|
|
|
|
- Non group commit logic: Commits made in passes. Thread which started
|
|
flush first is performing actual flush, other threads sets new goal (LSN)
|
|
of the next pass (if it is maximum) and waits for the pass end or just
|
|
wait for the pass end.
|
|
|
|
- If hard group commit enabled and rate set to zero:
|
|
The first thread sends all changed buffers to disk. This is repeated
|
|
as long as there are new LSNs added. The process can not loop
|
|
forever because we have limited number of threads and they will wait
|
|
for the data to be synced.
|
|
Pseudo code:
|
|
|
|
do
|
|
send changed buffers to disk
|
|
while new_goal
|
|
sync
|
|
|
|
- If hard group commit switched ON and less than rate microseconds has
|
|
passed from last sync, then after buffers have been sent to disk
|
|
wait until rate microseconds has passed since last sync, do sync and return.
|
|
This ensures that if we call sync infrequently we don't do any waits.
|
|
|
|
- If soft group commit enabled everything works as with 'non group commit'
|
|
but the thread doesn't do any real sync(). If rate is not zero the
|
|
sync() will be performed by a service thread with the given rate
|
|
when needed (new LSN appears).
|
|
|
|
@note Terminology:
|
|
'sent to disk' means written to disk but not sync()ed,
|
|
'flushed' mean sent to disk and synced().
|
|
*/
|
|
|
|
my_bool translog_flush(TRANSLOG_ADDRESS lsn)
|
|
{
|
|
struct timespec abstime;
|
|
ulonglong UNINIT_VAR(flush_interval);
|
|
ulonglong time_spent;
|
|
LSN sent_to_disk= LSN_IMPOSSIBLE;
|
|
TRANSLOG_ADDRESS flush_horizon;
|
|
my_bool rc= 0;
|
|
my_bool hgroup_commit_at_start;
|
|
DBUG_ENTER("translog_flush");
|
|
DBUG_PRINT("enter", ("Flush up to LSN: " LSN_FMT, LSN_IN_PARTS(lsn)));
|
|
DBUG_ASSERT(translog_status == TRANSLOG_OK ||
|
|
translog_status == TRANSLOG_READONLY);
|
|
|
|
mysql_mutex_lock(&log_descriptor.log_flush_lock);
|
|
DBUG_PRINT("info", ("Everything is flushed up to " LSN_FMT,
|
|
LSN_IN_PARTS(log_descriptor.flushed)));
|
|
if (cmp_translog_addr(log_descriptor.flushed, lsn) >= 0)
|
|
{
|
|
mysql_mutex_unlock(&log_descriptor.log_flush_lock);
|
|
DBUG_RETURN(0);
|
|
}
|
|
if (log_descriptor.flush_in_progress)
|
|
{
|
|
translog_lock();
|
|
/* fix lsn if it was horizon */
|
|
if (cmp_translog_addr(lsn, log_descriptor.bc.buffer->last_lsn) > 0)
|
|
lsn= BUFFER_MAX_LSN(log_descriptor.bc.buffer);
|
|
translog_unlock();
|
|
translog_flush_set_new_goal_and_wait(lsn);
|
|
if (!pthread_equal(log_descriptor.max_lsn_requester, pthread_self()))
|
|
{
|
|
/*
|
|
translog_flush_wait_for_end() release log_flush_lock while is
|
|
waiting then acquire it again
|
|
*/
|
|
translog_flush_wait_for_end(lsn);
|
|
mysql_mutex_unlock(&log_descriptor.log_flush_lock);
|
|
DBUG_RETURN(0);
|
|
}
|
|
log_descriptor.next_pass_max_lsn= LSN_IMPOSSIBLE;
|
|
}
|
|
log_descriptor.flush_in_progress= 1;
|
|
flush_horizon= log_descriptor.previous_flush_horizon;
|
|
DBUG_PRINT("info", ("flush_in_progress is set, flush_horizon: " LSN_FMT,
|
|
LSN_IN_PARTS(flush_horizon)));
|
|
mysql_mutex_unlock(&log_descriptor.log_flush_lock);
|
|
|
|
hgroup_commit_at_start= hard_group_commit;
|
|
if (hgroup_commit_at_start)
|
|
flush_interval= group_commit_wait;
|
|
|
|
translog_lock();
|
|
if (log_descriptor.is_everything_flushed)
|
|
{
|
|
DBUG_PRINT("info", ("everything is flushed"));
|
|
translog_unlock();
|
|
mysql_mutex_lock(&log_descriptor.log_flush_lock);
|
|
goto out;
|
|
}
|
|
|
|
for (;;)
|
|
{
|
|
/* Following function flushes buffers and makes translog_unlock() */
|
|
translog_flush_buffers(&lsn, &sent_to_disk, &flush_horizon);
|
|
|
|
if (!hgroup_commit_at_start)
|
|
break; /* flush pass is ended */
|
|
|
|
retest:
|
|
/*
|
|
We do not check time here because mysql_mutex_lock rarely takes
|
|
a lot of time so we can sacrifice a bit precision to performance
|
|
(taking into account that microsecond_interval_timer() might be
|
|
expensive call).
|
|
*/
|
|
if (flush_interval == 0)
|
|
break; /* flush pass is ended */
|
|
|
|
mysql_mutex_lock(&log_descriptor.log_flush_lock);
|
|
if (log_descriptor.next_pass_max_lsn == LSN_IMPOSSIBLE)
|
|
{
|
|
if (flush_interval == 0 ||
|
|
(time_spent= (microsecond_interval_timer() - flush_start)) >=
|
|
flush_interval)
|
|
{
|
|
mysql_mutex_unlock(&log_descriptor.log_flush_lock);
|
|
break;
|
|
}
|
|
DBUG_PRINT("info", ("flush waits: %llu interval: %llu spent: %llu",
|
|
flush_interval - time_spent,
|
|
flush_interval, time_spent));
|
|
/* wait time or next goal */
|
|
set_timespec_nsec(abstime, flush_interval - time_spent);
|
|
mysql_cond_timedwait(&log_descriptor.new_goal_cond,
|
|
&log_descriptor.log_flush_lock,
|
|
&abstime);
|
|
mysql_mutex_unlock(&log_descriptor.log_flush_lock);
|
|
DBUG_PRINT("info", ("retest conditions"));
|
|
goto retest;
|
|
}
|
|
|
|
/* take next goal */
|
|
lsn= log_descriptor.next_pass_max_lsn;
|
|
log_descriptor.next_pass_max_lsn= LSN_IMPOSSIBLE;
|
|
/* prevent other thread from continue */
|
|
log_descriptor.max_lsn_requester= pthread_self();
|
|
DBUG_PRINT("info", ("flush took next goal: " LSN_FMT,
|
|
LSN_IN_PARTS(lsn)));
|
|
mysql_mutex_unlock(&log_descriptor.log_flush_lock);
|
|
|
|
/* next flush pass */
|
|
DBUG_PRINT("info", ("next flush pass"));
|
|
translog_lock();
|
|
}
|
|
|
|
/*
|
|
sync() files from previous flush till current one
|
|
*/
|
|
if (!soft_sync || hgroup_commit_at_start)
|
|
{
|
|
if ((rc=
|
|
translog_sync_files(LSN_FILE_NO(log_descriptor.flushed),
|
|
LSN_FILE_NO(lsn),
|
|
sync_log_dir >= TRANSLOG_SYNC_DIR_ALWAYS &&
|
|
(LSN_FILE_NO(log_descriptor.
|
|
previous_flush_horizon) !=
|
|
LSN_FILE_NO(flush_horizon) ||
|
|
(LSN_OFFSET(log_descriptor.
|
|
previous_flush_horizon) /
|
|
TRANSLOG_PAGE_SIZE) !=
|
|
(LSN_OFFSET(flush_horizon) /
|
|
TRANSLOG_PAGE_SIZE)))))
|
|
{
|
|
sent_to_disk= LSN_IMPOSSIBLE;
|
|
mysql_mutex_lock(&log_descriptor.log_flush_lock);
|
|
goto out;
|
|
}
|
|
/* keep values for soft sync() and forced sync() actual */
|
|
{
|
|
uint32 fileno= LSN_FILE_NO(lsn);
|
|
soft_sync_min= fileno;
|
|
soft_sync_max= fileno;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
soft_sync_max= LSN_FILE_NO(lsn);
|
|
soft_need_sync= 1;
|
|
}
|
|
|
|
DBUG_ASSERT(flush_horizon <= log_descriptor.horizon);
|
|
|
|
mysql_mutex_lock(&log_descriptor.log_flush_lock);
|
|
log_descriptor.previous_flush_horizon= flush_horizon;
|
|
out:
|
|
if (sent_to_disk != LSN_IMPOSSIBLE)
|
|
log_descriptor.flushed= sent_to_disk;
|
|
log_descriptor.flush_in_progress= 0;
|
|
log_descriptor.flush_no++;
|
|
DBUG_PRINT("info", ("flush_in_progress is dropped"));
|
|
mysql_mutex_unlock(&log_descriptor.log_flush_lock);
|
|
mysql_cond_broadcast(&log_descriptor.log_flush_cond);
|
|
DBUG_RETURN(rc);
|
|
}
|
|
|
|
|
|
/**
|
|
@brief Gives a 2-byte-id to MARIA_SHARE and logs this fact
|
|
|
|
If a MARIA_SHARE does not yet have a 2-byte-id (unique over all currently
|
|
open MARIA_SHAREs), give it one and record this assignment in the log
|
|
(LOGREC_FILE_ID log record).
|
|
|
|
@param tbl_info table
|
|
@param trn calling transaction
|
|
|
|
@return Operation status
|
|
@retval 0 OK
|
|
@retval 1 Error
|
|
|
|
@note Can be called even if share already has an id (then will do nothing)
|
|
*/
|
|
|
|
int translog_assign_id_to_share(MARIA_HA *tbl_info, TRN *trn)
|
|
{
|
|
uint16 id;
|
|
MARIA_SHARE *share= tbl_info->s;
|
|
/*
|
|
If you give an id to a non-BLOCK_RECORD table, you also need to release
|
|
this id somewhere. Then you can change the assertion.
|
|
*/
|
|
DBUG_ASSERT(share->data_file_type == BLOCK_RECORD);
|
|
/* re-check under mutex to avoid having 2 ids for the same share */
|
|
mysql_mutex_lock(&share->intern_lock);
|
|
if (unlikely(share->id == 0))
|
|
{
|
|
LSN lsn;
|
|
LEX_CUSTRING log_array[TRANSLOG_INTERNAL_PARTS + 2];
|
|
uchar log_data[FILEID_STORE_SIZE];
|
|
/* Inspired by set_short_trid() of trnman.c */
|
|
uint i= share->kfile.file % SHARE_ID_MAX + 1;
|
|
id= 0;
|
|
do
|
|
{
|
|
for ( ; i <= SHARE_ID_MAX ; i++) /* the range is [1..SHARE_ID_MAX] */
|
|
{
|
|
void *tmp= NULL;
|
|
if (id_to_share[i] == NULL &&
|
|
my_atomic_casptr((void **)&id_to_share[i], &tmp, share))
|
|
{
|
|
id= (uint16) i;
|
|
break;
|
|
}
|
|
}
|
|
i= 1; /* scan the whole array */
|
|
} while (id == 0);
|
|
DBUG_PRINT("info", ("id_to_share: %p -> %u", share, id));
|
|
fileid_store(log_data, id);
|
|
log_array[TRANSLOG_INTERNAL_PARTS + 0].str= log_data;
|
|
log_array[TRANSLOG_INTERNAL_PARTS + 0].length= sizeof(log_data);
|
|
/*
|
|
open_file_name is an unresolved name (symlinks are not resolved, datadir
|
|
is not realpath-ed, etc) which is good: the log can be moved to another
|
|
directory and continue working.
|
|
*/
|
|
log_array[TRANSLOG_INTERNAL_PARTS + 1].str=
|
|
(uchar *)share->open_file_name.str;
|
|
log_array[TRANSLOG_INTERNAL_PARTS + 1].length=
|
|
share->open_file_name.length + 1;
|
|
/*
|
|
We can't unlock share->intern_lock before the log entry is written to
|
|
ensure no one uses the id before it's logged.
|
|
*/
|
|
if (unlikely(translog_write_record(&lsn, LOGREC_FILE_ID, trn, tbl_info,
|
|
(translog_size_t)
|
|
(sizeof(log_data) +
|
|
log_array[TRANSLOG_INTERNAL_PARTS +
|
|
1].length),
|
|
sizeof(log_array)/sizeof(log_array[0]),
|
|
log_array, NULL, NULL)))
|
|
{
|
|
mysql_mutex_unlock(&share->intern_lock);
|
|
return 1;
|
|
}
|
|
/*
|
|
Now when translog record is done, we can set share->id.
|
|
If we set it before, then translog_write_record may pick up the id
|
|
before it's written to the log.
|
|
*/
|
|
share->id= id;
|
|
share->state.logrec_file_id= lsn;
|
|
}
|
|
mysql_mutex_unlock(&share->intern_lock);
|
|
return 0;
|
|
}
|
|
|
|
|
|
/**
|
|
@brief Recycles a MARIA_SHARE's short id.
|
|
|
|
@param share table
|
|
|
|
@note Must be called only if share has an id (i.e. id != 0)
|
|
*/
|
|
|
|
void translog_deassign_id_from_share(MARIA_SHARE *share)
|
|
{
|
|
DBUG_PRINT("info", ("id_to_share: %p id %u -> 0",
|
|
share, share->id));
|
|
/*
|
|
We don't need any mutex as we are called only when closing the last
|
|
instance of the table or at the end of REPAIR: no writes can be
|
|
happening. But a Checkpoint may be reading share->id, so we require this
|
|
mutex:
|
|
*/
|
|
mysql_mutex_assert_owner(&share->intern_lock);
|
|
my_atomic_storeptr((void **)&id_to_share[share->id], 0);
|
|
share->id= 0;
|
|
/* useless but safety: */
|
|
share->lsn_of_file_id= LSN_IMPOSSIBLE;
|
|
}
|
|
|
|
|
|
void translog_assign_id_to_share_from_recovery(MARIA_SHARE *share,
|
|
uint16 id)
|
|
{
|
|
DBUG_ASSERT(maria_in_recovery && !maria_multi_threaded);
|
|
DBUG_ASSERT(share->data_file_type == BLOCK_RECORD);
|
|
DBUG_ASSERT(share->id == 0);
|
|
DBUG_ASSERT(id_to_share[id] == NULL);
|
|
id_to_share[share->id= id]= share;
|
|
}
|
|
|
|
|
|
/**
|
|
@brief check if such log file exists
|
|
|
|
@param file_no number of the file to test
|
|
|
|
@retval 0 no such file
|
|
@retval 1 there is file with such number
|
|
*/
|
|
|
|
my_bool translog_is_file(uint file_no)
|
|
{
|
|
MY_STAT stat_buff;
|
|
char path[FN_REFLEN];
|
|
return (MY_TEST(mysql_file_stat(key_file_translog,
|
|
translog_filename_by_fileno(file_no, path),
|
|
&stat_buff, MYF(0))));
|
|
}
|
|
|
|
|
|
/**
|
|
@brief returns minimum log file number
|
|
|
|
@param horizon the end of the log
|
|
@param is_protected true if it is under purge_log protection
|
|
|
|
@retval minimum file number
|
|
@retval 0 no files found
|
|
*/
|
|
|
|
static uint32 translog_first_file(TRANSLOG_ADDRESS horizon, int is_protected)
|
|
{
|
|
uint min_file= 1, max_file;
|
|
DBUG_ENTER("translog_first_file");
|
|
if (!is_protected)
|
|
mysql_mutex_lock(&log_descriptor.purger_lock);
|
|
if (log_descriptor.min_file_number)
|
|
{
|
|
min_file= log_descriptor.min_file_number;
|
|
if (translog_is_file(log_descriptor.min_file_number))
|
|
{
|
|
DBUG_PRINT("info", ("cached %lu",
|
|
(ulong) log_descriptor.min_file_number));
|
|
if (!is_protected)
|
|
mysql_mutex_unlock(&log_descriptor.purger_lock);
|
|
DBUG_RETURN(log_descriptor.min_file_number);
|
|
}
|
|
}
|
|
|
|
max_file= LSN_FILE_NO(horizon);
|
|
if (!translog_is_file(max_file))
|
|
{
|
|
if (!is_protected)
|
|
mysql_mutex_unlock(&log_descriptor.purger_lock);
|
|
DBUG_RETURN(max_file); /* For compatibility */
|
|
}
|
|
|
|
/* binary search for last file */
|
|
while (min_file < max_file)
|
|
{
|
|
uint test= (min_file + max_file) / 2;
|
|
DBUG_PRINT("info", ("min_file: %u test: %u max_file: %u",
|
|
min_file, test, max_file));
|
|
if (translog_is_file(test))
|
|
max_file= test;
|
|
else
|
|
min_file= test+1;
|
|
}
|
|
log_descriptor.min_file_number= max_file;
|
|
if (!is_protected)
|
|
mysql_mutex_unlock(&log_descriptor.purger_lock);
|
|
DBUG_PRINT("info", ("first file :%lu", (ulong) max_file));
|
|
DBUG_ASSERT(max_file >= 1);
|
|
DBUG_RETURN(max_file);
|
|
}
|
|
|
|
|
|
/**
|
|
@brief returns the most close LSN higher the given chunk address
|
|
|
|
@param addr the chunk address to start from
|
|
@param horizon the horizon if it is known or LSN_IMPOSSIBLE
|
|
|
|
@retval LSN_ERROR Error
|
|
@retval LSN_IMPOSSIBLE no LSNs after the address
|
|
@retval # LSN of the most close LSN higher the given chunk address
|
|
*/
|
|
|
|
LSN translog_next_LSN(TRANSLOG_ADDRESS addr, TRANSLOG_ADDRESS horizon)
|
|
{
|
|
TRANSLOG_SCANNER_DATA scanner;
|
|
LSN result;
|
|
DBUG_ENTER("translog_next_LSN");
|
|
|
|
if (horizon == LSN_IMPOSSIBLE)
|
|
horizon= translog_get_horizon();
|
|
|
|
if (addr == horizon)
|
|
DBUG_RETURN(LSN_IMPOSSIBLE);
|
|
|
|
translog_scanner_init(addr, 0, &scanner, 1);
|
|
/*
|
|
addr can point not to a chunk beginning but page end so next
|
|
page beginning.
|
|
*/
|
|
if (addr % TRANSLOG_PAGE_SIZE == 0)
|
|
{
|
|
/*
|
|
We are emulating the page end which cased such horizon value to
|
|
trigger translog_scanner_eop().
|
|
|
|
We can't just increase addr on page header overhead because it
|
|
can be file end so we allow translog_get_next_chunk() to skip
|
|
to the next page in correct way
|
|
*/
|
|
scanner.page_addr-= TRANSLOG_PAGE_SIZE;
|
|
scanner.page_offset= TRANSLOG_PAGE_SIZE;
|
|
#ifndef DBUG_OFF
|
|
scanner.page= NULL; /* prevent using incorrect page content */
|
|
#endif
|
|
}
|
|
/* addr can point not to a chunk beginning but to a page end */
|
|
if (translog_scanner_eop(&scanner))
|
|
{
|
|
if (translog_get_next_chunk(&scanner))
|
|
{
|
|
result= LSN_ERROR;
|
|
goto out;
|
|
}
|
|
if (scanner.page == END_OF_LOG)
|
|
{
|
|
result= LSN_IMPOSSIBLE;
|
|
goto out;
|
|
}
|
|
}
|
|
|
|
while (!translog_is_LSN_chunk(scanner.page[scanner.page_offset]) &&
|
|
scanner.page[scanner.page_offset] != TRANSLOG_FILLER)
|
|
{
|
|
if (translog_get_next_chunk(&scanner))
|
|
{
|
|
result= LSN_ERROR;
|
|
goto out;
|
|
}
|
|
if (scanner.page == END_OF_LOG)
|
|
{
|
|
result= LSN_IMPOSSIBLE;
|
|
goto out;
|
|
}
|
|
}
|
|
|
|
if (scanner.page[scanner.page_offset] == TRANSLOG_FILLER)
|
|
result= LSN_IMPOSSIBLE; /* reached page filler */
|
|
else
|
|
result= scanner.page_addr + scanner.page_offset;
|
|
out:
|
|
translog_destroy_scanner(&scanner);
|
|
DBUG_RETURN(result);
|
|
}
|
|
|
|
|
|
/**
|
|
@brief returns the LSN of the first record starting in this log
|
|
|
|
@retval LSN_ERROR Error
|
|
@retval LSN_IMPOSSIBLE no log or the log is empty
|
|
@retval # LSN of the first record
|
|
*/
|
|
|
|
LSN translog_first_lsn_in_log()
|
|
{
|
|
TRANSLOG_ADDRESS addr, horizon= translog_get_horizon();
|
|
TRANSLOG_VALIDATOR_DATA data;
|
|
uint file;
|
|
uint16 chunk_offset;
|
|
uchar *page;
|
|
DBUG_ENTER("translog_first_lsn_in_log");
|
|
DBUG_PRINT("info", ("Horizon: " LSN_FMT, LSN_IN_PARTS(horizon)));
|
|
DBUG_ASSERT(translog_status == TRANSLOG_OK ||
|
|
translog_status == TRANSLOG_READONLY);
|
|
|
|
if (!(file= translog_first_file(horizon, 0)))
|
|
{
|
|
/* log has no records yet */
|
|
DBUG_RETURN(LSN_IMPOSSIBLE);
|
|
}
|
|
|
|
addr= MAKE_LSN(file, TRANSLOG_PAGE_SIZE); /* the first page of the file */
|
|
data.addr= &addr;
|
|
{
|
|
TRANSLOG_PAGE_SIZE_BUFF psize_buff;
|
|
if ((page= translog_get_page(&data, psize_buff.buffer, NULL)) == NULL ||
|
|
(chunk_offset= translog_get_first_chunk_offset(page)) == 0)
|
|
DBUG_RETURN(LSN_ERROR);
|
|
}
|
|
addr+= chunk_offset;
|
|
|
|
DBUG_RETURN(translog_next_LSN(addr, horizon));
|
|
}
|
|
|
|
|
|
/**
|
|
@brief Returns theoretical first LSN if first log is present
|
|
|
|
@retval LSN_ERROR Error
|
|
@retval LSN_IMPOSSIBLE no log
|
|
@retval # LSN of the first record
|
|
*/
|
|
|
|
LSN translog_first_theoretical_lsn()
|
|
{
|
|
TRANSLOG_ADDRESS addr= translog_get_horizon();
|
|
TRANSLOG_PAGE_SIZE_BUFF psize_buff;
|
|
uchar *page;
|
|
TRANSLOG_VALIDATOR_DATA data;
|
|
DBUG_ENTER("translog_first_theoretical_lsn");
|
|
DBUG_PRINT("info", ("Horizon: " LSN_FMT, LSN_IN_PARTS(addr)));
|
|
DBUG_ASSERT(translog_status == TRANSLOG_OK ||
|
|
translog_status == TRANSLOG_READONLY);
|
|
|
|
if (!translog_is_file(1))
|
|
DBUG_RETURN(LSN_IMPOSSIBLE);
|
|
if (addr == MAKE_LSN(1, TRANSLOG_PAGE_SIZE))
|
|
{
|
|
/* log has no records yet */
|
|
DBUG_RETURN(MAKE_LSN(1, TRANSLOG_PAGE_SIZE +
|
|
log_descriptor.page_overhead));
|
|
}
|
|
|
|
addr= MAKE_LSN(1, TRANSLOG_PAGE_SIZE); /* the first page of the file */
|
|
data.addr= &addr;
|
|
if ((page= translog_get_page(&data, psize_buff.buffer, NULL)) == NULL)
|
|
DBUG_RETURN(LSN_ERROR);
|
|
|
|
DBUG_RETURN(MAKE_LSN(1, TRANSLOG_PAGE_SIZE +
|
|
page_overhead[page[TRANSLOG_PAGE_FLAGS]]));
|
|
}
|
|
|
|
|
|
/**
|
|
@brief Checks given low water mark and purge files if it is need
|
|
|
|
@param low the last (minimum) address which is need
|
|
|
|
@retval 0 OK
|
|
@retval 1 Error
|
|
*/
|
|
|
|
my_bool translog_purge(TRANSLOG_ADDRESS low)
|
|
{
|
|
uint32 last_need_file= LSN_FILE_NO(low);
|
|
uint32 min_unsync;
|
|
int soft;
|
|
TRANSLOG_ADDRESS horizon= translog_get_horizon();
|
|
int rc= 0;
|
|
DBUG_ENTER("translog_purge");
|
|
DBUG_PRINT("enter", ("low: " LSN_FMT, LSN_IN_PARTS(low)));
|
|
DBUG_ASSERT(translog_status == TRANSLOG_OK ||
|
|
translog_status == TRANSLOG_READONLY);
|
|
|
|
soft= soft_sync;
|
|
min_unsync= soft_sync_min;
|
|
DBUG_PRINT("info", ("min_unsync: %lu", (ulong) min_unsync));
|
|
if (soft && min_unsync < last_need_file)
|
|
{
|
|
last_need_file= min_unsync;
|
|
DBUG_PRINT("info", ("last_need_file set to %lu", (ulong)last_need_file));
|
|
}
|
|
|
|
mysql_mutex_lock(&log_descriptor.purger_lock);
|
|
DBUG_PRINT("info", ("last_lsn_checked file: %lu:",
|
|
(ulong) log_descriptor.last_lsn_checked));
|
|
if (LSN_FILE_NO(log_descriptor.last_lsn_checked) < last_need_file)
|
|
{
|
|
uint32 i;
|
|
uint32 min_file= translog_first_file(horizon, 1);
|
|
DBUG_ASSERT(min_file != 0); /* log is already started */
|
|
DBUG_PRINT("info", ("min_file: %lu:",(ulong) min_file));
|
|
for(i= min_file; i < last_need_file && rc == 0; i++)
|
|
{
|
|
LSN lsn= translog_get_file_max_lsn_stored(i);
|
|
if (lsn == LSN_IMPOSSIBLE)
|
|
break; /* files are still in writing */
|
|
if (lsn == LSN_ERROR)
|
|
{
|
|
rc= 1;
|
|
break;
|
|
}
|
|
if (cmp_translog_addr(lsn, low) >= 0)
|
|
break;
|
|
|
|
DBUG_PRINT("info", ("purge file %lu", (ulong) i));
|
|
|
|
/* remove file descriptor from the cache */
|
|
/*
|
|
log_descriptor.min_file can be changed only here during execution
|
|
and the function is serialized, so we can access it without problems
|
|
*/
|
|
if (i >= log_descriptor.min_file)
|
|
{
|
|
TRANSLOG_FILE *file;
|
|
mysql_rwlock_wrlock(&log_descriptor.open_files_lock);
|
|
DBUG_ASSERT(log_descriptor.max_file - log_descriptor.min_file + 1 ==
|
|
log_descriptor.open_files.elements);
|
|
DBUG_ASSERT(log_descriptor.min_file == i);
|
|
file= *((TRANSLOG_FILE **)pop_dynamic(&log_descriptor.open_files));
|
|
DBUG_PRINT("info", ("Files : %d", log_descriptor.open_files.elements));
|
|
DBUG_ASSERT(i == file->number);
|
|
log_descriptor.min_file++;
|
|
DBUG_ASSERT(log_descriptor.max_file - log_descriptor.min_file + 1 ==
|
|
log_descriptor.open_files.elements);
|
|
mysql_rwlock_unlock(&log_descriptor.open_files_lock);
|
|
translog_close_log_file(file);
|
|
}
|
|
if (log_purge_type == TRANSLOG_PURGE_IMMIDIATE && ! log_purge_disabled)
|
|
{
|
|
char path[FN_REFLEN], *file_name;
|
|
file_name= translog_filename_by_fileno(i, path);
|
|
rc= MY_TEST(mysql_file_delete(key_file_translog,
|
|
file_name, MYF(MY_WME)));
|
|
}
|
|
}
|
|
if (unlikely(rc == 1))
|
|
log_descriptor.min_need_file= 0; /* impossible value */
|
|
else
|
|
log_descriptor.min_need_file= i;
|
|
}
|
|
|
|
mysql_mutex_unlock(&log_descriptor.purger_lock);
|
|
DBUG_RETURN(rc);
|
|
}
|
|
|
|
|
|
/**
|
|
@brief Purges files by stored min need file in case of
|
|
"one demand" purge type
|
|
|
|
@note This function do real work only if it is "one demand" purge type
|
|
and translog_purge() was called at least once and last time without
|
|
errors
|
|
|
|
@retval 0 OK
|
|
@retval 1 Error
|
|
*/
|
|
|
|
my_bool translog_purge_at_flush()
|
|
{
|
|
uint32 i, min_file;
|
|
int rc= 0;
|
|
DBUG_ENTER("translog_purge_at_flush");
|
|
DBUG_ASSERT(translog_status == TRANSLOG_OK ||
|
|
translog_status == TRANSLOG_READONLY);
|
|
|
|
if (unlikely(translog_status == TRANSLOG_READONLY))
|
|
{
|
|
DBUG_PRINT("info", ("The log is read only => exit"));
|
|
DBUG_RETURN(0);
|
|
}
|
|
|
|
if (log_purge_type != TRANSLOG_PURGE_ONDEMAND)
|
|
{
|
|
DBUG_PRINT("info", ("It is not \"at_flush\" => exit"));
|
|
DBUG_RETURN(0);
|
|
}
|
|
|
|
mysql_mutex_lock(&log_descriptor.purger_lock);
|
|
|
|
if (unlikely(log_descriptor.min_need_file == 0 || log_purge_disabled))
|
|
{
|
|
DBUG_PRINT("info", ("No info about min need file => exit"));
|
|
mysql_mutex_unlock(&log_descriptor.purger_lock);
|
|
DBUG_RETURN(0);
|
|
}
|
|
|
|
min_file= translog_first_file(translog_get_horizon(), 1);
|
|
DBUG_ASSERT(min_file != 0); /* log is already started */
|
|
for(i= min_file; i < log_descriptor.min_need_file ; i++)
|
|
{
|
|
char path[FN_REFLEN], *file_name;
|
|
DBUG_PRINT("info", ("purge file %lu\n", (ulong) i));
|
|
file_name= translog_filename_by_fileno(i, path);
|
|
rc|= MY_TEST(mysql_file_delete(key_file_translog,
|
|
file_name, MYF(MY_WME)));
|
|
DBUG_ASSERT(rc == 0);
|
|
}
|
|
|
|
mysql_mutex_unlock(&log_descriptor.purger_lock);
|
|
DBUG_RETURN(rc);
|
|
}
|
|
|
|
|
|
/**
|
|
@brief Gets min file number
|
|
|
|
@param horizon the end of the log
|
|
|
|
@retval minimum file number
|
|
@retval 0 no files found
|
|
*/
|
|
|
|
uint32 translog_get_first_file(TRANSLOG_ADDRESS horizon)
|
|
{
|
|
return translog_first_file(horizon, 0);
|
|
}
|
|
|
|
|
|
/**
|
|
@brief Gets min file number which is needed
|
|
|
|
@retval minimum file number
|
|
@retval 0 unknown
|
|
*/
|
|
|
|
uint32 translog_get_first_needed_file()
|
|
{
|
|
uint32 file_no;
|
|
mysql_mutex_lock(&log_descriptor.purger_lock);
|
|
file_no= log_descriptor.min_need_file;
|
|
mysql_mutex_unlock(&log_descriptor.purger_lock);
|
|
return file_no;
|
|
}
|
|
|
|
|
|
/**
|
|
@brief Gets transaction log file size
|
|
|
|
@return transaction log file size
|
|
*/
|
|
|
|
uint32 translog_get_file_size()
|
|
{
|
|
uint32 res;
|
|
translog_lock();
|
|
res= log_descriptor.log_file_max_size;
|
|
translog_unlock();
|
|
return (res);
|
|
}
|
|
|
|
|
|
/**
|
|
@brief Sets transaction log file size
|
|
|
|
@return Returns actually set transaction log size
|
|
*/
|
|
|
|
void translog_set_file_size(uint32 size)
|
|
{
|
|
struct st_translog_buffer *old_buffer= NULL;
|
|
DBUG_ENTER("translog_set_file_size");
|
|
translog_lock();
|
|
DBUG_PRINT("enter", ("Size: %lu", (ulong) size));
|
|
DBUG_ASSERT(size % TRANSLOG_PAGE_SIZE == 0);
|
|
DBUG_ASSERT(size >= TRANSLOG_MIN_FILE_SIZE);
|
|
log_descriptor.log_file_max_size= size;
|
|
/* if current file longer then finish it*/
|
|
if (LSN_OFFSET(log_descriptor.horizon) >= log_descriptor.log_file_max_size)
|
|
{
|
|
old_buffer= log_descriptor.bc.buffer;
|
|
translog_buffer_next(&log_descriptor.horizon, &log_descriptor.bc, 1);
|
|
translog_buffer_unlock(old_buffer);
|
|
}
|
|
translog_unlock();
|
|
if (old_buffer)
|
|
{
|
|
translog_buffer_lock(old_buffer);
|
|
translog_buffer_flush(old_buffer);
|
|
translog_buffer_unlock(old_buffer);
|
|
}
|
|
DBUG_VOID_RETURN;
|
|
}
|
|
|
|
|
|
/**
|
|
Write debug information to log if we EXTRA_DEBUG is enabled
|
|
*/
|
|
|
|
my_bool translog_log_debug_info(TRN *trn __attribute__((unused)),
|
|
enum translog_debug_info_type type
|
|
__attribute__((unused)),
|
|
uchar *info __attribute__((unused)),
|
|
size_t length __attribute__((unused)))
|
|
{
|
|
#ifdef EXTRA_DEBUG
|
|
LEX_CUSTRING log_array[TRANSLOG_INTERNAL_PARTS + 2];
|
|
uchar debug_type;
|
|
LSN lsn;
|
|
|
|
if (!trn)
|
|
{
|
|
/*
|
|
We can't log the current transaction because we don't have
|
|
an active transaction. Use a temporary transaction object instead
|
|
*/
|
|
trn= &dummy_transaction_object;
|
|
}
|
|
debug_type= (uchar) type;
|
|
log_array[TRANSLOG_INTERNAL_PARTS + 0].str= &debug_type;
|
|
log_array[TRANSLOG_INTERNAL_PARTS + 0].length= 1;
|
|
log_array[TRANSLOG_INTERNAL_PARTS + 1].str= info;
|
|
log_array[TRANSLOG_INTERNAL_PARTS + 1].length= length;
|
|
return translog_write_record(&lsn, LOGREC_DEBUG_INFO,
|
|
trn, NULL,
|
|
(translog_size_t) (1+ length),
|
|
sizeof(log_array)/sizeof(log_array[0]),
|
|
log_array, NULL, NULL);
|
|
#else
|
|
return 0;
|
|
#endif
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
Sets soft sync mode
|
|
|
|
@param mode TRUE if we need switch soft sync on else off
|
|
*/
|
|
|
|
void translog_soft_sync(my_bool mode)
|
|
{
|
|
soft_sync= mode;
|
|
}
|
|
|
|
|
|
/**
|
|
Sets hard group commit
|
|
|
|
@param mode TRUE if we need switch hard group commit on else off
|
|
*/
|
|
|
|
void translog_hard_group_commit(my_bool mode)
|
|
{
|
|
hard_group_commit= mode;
|
|
}
|
|
|
|
|
|
/**
|
|
@brief forced log sync (used when we are switching modes)
|
|
*/
|
|
|
|
void translog_sync()
|
|
{
|
|
DBUG_ENTER("ma_translog_sync");
|
|
|
|
/* The following is only true if initalization of translog succeded */
|
|
if (log_descriptor.open_files.elements != 0)
|
|
{
|
|
uint32 max= get_current_logfile()->number;
|
|
uint32 min;
|
|
|
|
min= soft_sync_min;
|
|
if (!min)
|
|
min= max;
|
|
|
|
translog_sync_files(min, max, sync_log_dir >= TRANSLOG_SYNC_DIR_ALWAYS);
|
|
}
|
|
DBUG_VOID_RETURN;
|
|
}
|
|
|
|
/**
|
|
@brief set rate for group commit
|
|
|
|
@param interval interval to set.
|
|
|
|
@note We use this function with additional variable because have to
|
|
restart service thread with new value which we can't make inside changing
|
|
variable routine (update_maria_group_commit_interval)
|
|
*/
|
|
|
|
void translog_set_group_commit_interval(uint32 interval)
|
|
{
|
|
DBUG_ENTER("translog_set_group_commit_interval");
|
|
group_commit_wait= interval;
|
|
DBUG_PRINT("info", ("wait: %llu",
|
|
(ulonglong)group_commit_wait));
|
|
DBUG_VOID_RETURN;
|
|
}
|
|
|
|
|
|
/**
|
|
@brief syncing service thread
|
|
*/
|
|
|
|
static pthread_handler_t
|
|
ma_soft_sync_background( void *arg __attribute__((unused)))
|
|
{
|
|
|
|
my_thread_init();
|
|
{
|
|
DBUG_ENTER("ma_soft_sync_background");
|
|
for(;;)
|
|
{
|
|
ulonglong prev_loop= microsecond_interval_timer();
|
|
ulonglong time, sleep;
|
|
uint32 min, max, sync_request;
|
|
min= soft_sync_min;
|
|
max= soft_sync_max;
|
|
sync_request= soft_need_sync;
|
|
soft_sync_min= max;
|
|
soft_need_sync= 0;
|
|
|
|
sleep= group_commit_wait;
|
|
if (sync_request)
|
|
translog_sync_files(min, max, FALSE);
|
|
time= microsecond_interval_timer() - prev_loop;
|
|
if (time > sleep)
|
|
sleep= 0;
|
|
else
|
|
sleep-= time;
|
|
if (my_service_thread_sleep(&soft_sync_control, sleep))
|
|
break;
|
|
}
|
|
my_thread_end();
|
|
DBUG_RETURN(0);
|
|
}
|
|
}
|
|
|
|
|
|
/**
|
|
@brief Starts syncing thread
|
|
*/
|
|
|
|
int translog_soft_sync_start(void)
|
|
{
|
|
int res= 0;
|
|
uint32 min, max;
|
|
DBUG_ENTER("translog_soft_sync_start");
|
|
|
|
/* check and init variables */
|
|
min= soft_sync_min;
|
|
max= soft_sync_max;
|
|
if (!max)
|
|
soft_sync_max= max= get_current_logfile()->number;
|
|
if (!min)
|
|
soft_sync_min= max;
|
|
soft_need_sync= 1;
|
|
|
|
if (!(res= ma_service_thread_control_init(&soft_sync_control)))
|
|
if ((res= mysql_thread_create(key_thread_soft_sync,
|
|
&soft_sync_control.thread, NULL,
|
|
ma_soft_sync_background, NULL)))
|
|
soft_sync_control.killed= TRUE;
|
|
DBUG_RETURN(res);
|
|
}
|
|
|
|
|
|
/**
|
|
@brief Stops syncing thread
|
|
*/
|
|
|
|
void translog_soft_sync_end(void)
|
|
{
|
|
DBUG_ENTER("translog_soft_sync_end");
|
|
if (soft_sync_control.inited)
|
|
{
|
|
ma_service_thread_control_end(&soft_sync_control);
|
|
}
|
|
DBUG_VOID_RETURN;
|
|
}
|
|
|
|
|
|
/**
|
|
@brief Dump information about file header page.
|
|
*/
|
|
|
|
static void dump_header_page(uchar *buff)
|
|
{
|
|
LOGHANDLER_FILE_INFO desc;
|
|
char strbuff[21];
|
|
translog_interpret_file_header(&desc, buff);
|
|
printf(" This can be header page:\n"
|
|
" Timestamp: %s\n"
|
|
" Aria log version: %lu\n"
|
|
" Server version: %lu\n"
|
|
" Server id %lu\n"
|
|
" Page size %lu\n",
|
|
llstr(desc.timestamp, strbuff),
|
|
desc.maria_version,
|
|
desc.mysql_version,
|
|
desc.server_id,
|
|
desc.page_size);
|
|
if (desc.page_size != TRANSLOG_PAGE_SIZE)
|
|
printf(" WARNING: page size is not equal compiled in one %lu!!!\n",
|
|
(ulong) TRANSLOG_PAGE_SIZE);
|
|
printf(" File number %lu\n"
|
|
" Max lsn: " LSN_FMT "\n",
|
|
desc.file_number,
|
|
LSN_IN_PARTS(desc.max_lsn));
|
|
}
|
|
|
|
static const char *record_class_string[]=
|
|
{
|
|
"LOGRECTYPE_NOT_ALLOWED",
|
|
"LOGRECTYPE_VARIABLE_LENGTH",
|
|
"LOGRECTYPE_PSEUDOFIXEDLENGTH",
|
|
"LOGRECTYPE_FIXEDLENGTH"
|
|
};
|
|
|
|
|
|
/**
|
|
@brief dump information about transaction log chunk
|
|
|
|
@param buffer reference to the whole page
|
|
@param ptr pointer to the chunk
|
|
|
|
@reval # reference to the next chunk
|
|
@retval NULL can't interpret data
|
|
*/
|
|
|
|
static uchar *dump_chunk(uchar *buffer, uchar *ptr)
|
|
{
|
|
uint length;
|
|
if (*ptr == TRANSLOG_FILLER)
|
|
{
|
|
printf(" Filler till the page end\n");
|
|
for (; ptr < buffer + TRANSLOG_PAGE_SIZE; ptr++)
|
|
{
|
|
if (*ptr != TRANSLOG_FILLER)
|
|
{
|
|
printf(" WARNING: non filler character met before page end "
|
|
"(page + 0x%04x: 0x%02x) (stop interpretation)!!!",
|
|
(uint) (ptr - buffer), (uint) ptr[0]);
|
|
return NULL;
|
|
}
|
|
}
|
|
return ptr;
|
|
}
|
|
if (*ptr == 0 || *ptr == 0xFF)
|
|
{
|
|
printf(" WARNING: chunk can't start from 0x0 "
|
|
"(stop interpretation)!!!\n");
|
|
return NULL;
|
|
}
|
|
switch (ptr[0] & TRANSLOG_CHUNK_TYPE) {
|
|
case TRANSLOG_CHUNK_LSN:
|
|
printf(" LSN chunk type 0 (variable length)\n");
|
|
if (likely((ptr[0] & TRANSLOG_REC_TYPE) != TRANSLOG_CHUNK_0_CONT))
|
|
{
|
|
printf(" Record type %u: %s record class %s compressed LSNs: %u\n",
|
|
ptr[0] & TRANSLOG_REC_TYPE,
|
|
(log_record_type_descriptor[ptr[0] & TRANSLOG_REC_TYPE].name ?
|
|
log_record_type_descriptor[ptr[0] & TRANSLOG_REC_TYPE].name :
|
|
"NULL"),
|
|
record_class_string[log_record_type_descriptor[ptr[0] &
|
|
TRANSLOG_REC_TYPE].
|
|
rclass],
|
|
log_record_type_descriptor[ptr[0] & TRANSLOG_REC_TYPE].
|
|
compressed_LSN);
|
|
if (log_record_type_descriptor[ptr[0] & TRANSLOG_REC_TYPE].rclass !=
|
|
LOGRECTYPE_VARIABLE_LENGTH)
|
|
{
|
|
printf(" WARNING: this record class here can't be used "
|
|
"(stop interpretation)!!!\n");
|
|
break;
|
|
}
|
|
}
|
|
else
|
|
printf(" Continuation of previous chunk 0 header \n");
|
|
printf(" Short transaction id: %u\n", (uint) uint2korr(ptr + 1));
|
|
{
|
|
uchar *hdr_ptr= ptr + 1 + 2; /* chunk type and short trid */
|
|
uint16 chunk_len;
|
|
printf (" Record length: %lu\n",
|
|
(ulong) translog_variable_record_1group_decode_len(&hdr_ptr));
|
|
chunk_len= uint2korr(hdr_ptr);
|
|
if (chunk_len == 0)
|
|
printf (" It is 1 group record (chunk length == 0)\n");
|
|
else
|
|
{
|
|
uint16 groups, i;
|
|
|
|
printf (" Chunk length %u\n", (uint) chunk_len);
|
|
groups= uint2korr(hdr_ptr + 2);
|
|
hdr_ptr+= 4;
|
|
printf (" Number of groups left to the end %u:\n", (uint) groups);
|
|
for(i= 0;
|
|
i < groups && hdr_ptr < buffer + TRANSLOG_PAGE_SIZE;
|
|
i++, hdr_ptr+= LSN_STORE_SIZE + 1)
|
|
{
|
|
TRANSLOG_ADDRESS gpr_addr= lsn_korr(hdr_ptr);
|
|
uint pages= hdr_ptr[LSN_STORE_SIZE];
|
|
printf (" Group +#%u: " LSN_FMT " pages: %u\n",
|
|
(uint) i, LSN_IN_PARTS(gpr_addr), pages);
|
|
}
|
|
}
|
|
}
|
|
break;
|
|
case TRANSLOG_CHUNK_FIXED:
|
|
printf(" LSN chunk type 1 (fixed size)\n");
|
|
printf(" Record type %u: %s record class %s compressed LSNs: %u\n",
|
|
ptr[0] & TRANSLOG_REC_TYPE,
|
|
(log_record_type_descriptor[ptr[0] & TRANSLOG_REC_TYPE].name ?
|
|
log_record_type_descriptor[ptr[0] & TRANSLOG_REC_TYPE].name :
|
|
"NULL"),
|
|
record_class_string[log_record_type_descriptor[ptr[0] &
|
|
TRANSLOG_REC_TYPE].
|
|
rclass],
|
|
log_record_type_descriptor[ptr[0] & TRANSLOG_REC_TYPE].
|
|
compressed_LSN);
|
|
if (log_record_type_descriptor[ptr[0] & TRANSLOG_REC_TYPE].rclass !=
|
|
LOGRECTYPE_PSEUDOFIXEDLENGTH &&
|
|
log_record_type_descriptor[ptr[0] & TRANSLOG_REC_TYPE].rclass !=
|
|
LOGRECTYPE_FIXEDLENGTH)
|
|
{
|
|
printf(" WARNING: this record class here can't be used "
|
|
"(stop interpretation)!!!\n");
|
|
}
|
|
printf(" Short transaction id: %u\n", (uint) uint2korr(ptr + 1));
|
|
break;
|
|
case TRANSLOG_CHUNK_NOHDR:
|
|
printf(" No header chunk type 2(till the end of the page)\n");
|
|
if (ptr[0] & TRANSLOG_REC_TYPE)
|
|
{
|
|
printf(" WARNING: chunk header content record type: 0x%02x "
|
|
"(dtop interpretation)!!!",
|
|
(uint) ptr[0]);
|
|
return NULL;
|
|
}
|
|
break;
|
|
case TRANSLOG_CHUNK_LNGTH:
|
|
printf(" Chunk with length type 3\n");
|
|
if (ptr[0] & TRANSLOG_REC_TYPE)
|
|
{
|
|
printf(" WARNING: chunk header content record type: 0x%02x "
|
|
"(dtop interpretation)!!!",
|
|
(uint) ptr[0]);
|
|
return NULL;
|
|
}
|
|
break;
|
|
}
|
|
{
|
|
intptr offset= ptr - buffer;
|
|
DBUG_ASSERT(offset <= UINT_MAX16);
|
|
length= translog_get_total_chunk_length(buffer, (uint16)offset);
|
|
}
|
|
printf(" Length %u\n", length);
|
|
ptr+= length;
|
|
return ptr;
|
|
}
|
|
|
|
|
|
/**
|
|
@brief Dump information about page with data.
|
|
*/
|
|
|
|
static void dump_datapage(uchar *buffer, File handler)
|
|
{
|
|
uchar *ptr;
|
|
ulong offset;
|
|
uint32 page, file;
|
|
uint header_len;
|
|
printf(" Page: %ld File number: %ld\n",
|
|
(ulong) (page= uint3korr(buffer)),
|
|
(ulong) (file= uint3korr(buffer + 3)));
|
|
if (page == 0)
|
|
printf(" WARNING: page == 0!!!\n");
|
|
if (file == 0)
|
|
printf(" WARNING: file == 0!!!\n");
|
|
offset= page * TRANSLOG_PAGE_SIZE;
|
|
printf(" Flags (0x%x):\n", (uint) buffer[TRANSLOG_PAGE_FLAGS]);
|
|
if (buffer[TRANSLOG_PAGE_FLAGS])
|
|
{
|
|
if (buffer[TRANSLOG_PAGE_FLAGS] & TRANSLOG_PAGE_CRC)
|
|
printf(" Page CRC\n");
|
|
if (buffer[TRANSLOG_PAGE_FLAGS] & TRANSLOG_SECTOR_PROTECTION)
|
|
printf(" Sector protection\n");
|
|
if (buffer[TRANSLOG_PAGE_FLAGS] & TRANSLOG_RECORD_CRC)
|
|
printf(" Record CRC (WARNING: not yet implemented!!!)\n");
|
|
if (buffer[TRANSLOG_PAGE_FLAGS] & ~(TRANSLOG_PAGE_CRC |
|
|
TRANSLOG_SECTOR_PROTECTION |
|
|
TRANSLOG_RECORD_CRC))
|
|
{
|
|
printf(" WARNING: unknown flags (stop interpretation)!!!\n");
|
|
return;
|
|
}
|
|
}
|
|
else
|
|
printf(" No flags\n");
|
|
printf(" Page header length: %u\n",
|
|
(header_len= page_overhead[buffer[TRANSLOG_PAGE_FLAGS]]));
|
|
if (buffer[TRANSLOG_PAGE_FLAGS] & TRANSLOG_RECORD_CRC)
|
|
{
|
|
uint32 crc= uint4korr(buffer + TRANSLOG_PAGE_FLAGS + 1);
|
|
uint32 ccrc;
|
|
printf (" Page CRC 0x%04lx\n", (ulong) crc);
|
|
ccrc= translog_crc(buffer + header_len, TRANSLOG_PAGE_SIZE - header_len);
|
|
if (crc != ccrc)
|
|
printf(" WARNING: calculated CRC: 0x%04lx!!!\n", (ulong) ccrc);
|
|
}
|
|
if (buffer[TRANSLOG_PAGE_FLAGS] & TRANSLOG_SECTOR_PROTECTION)
|
|
{
|
|
TRANSLOG_FILE tfile;
|
|
{
|
|
uchar *table= buffer + header_len -
|
|
TRANSLOG_PAGE_SIZE / DISK_DRIVE_SECTOR_SIZE;
|
|
uint i;
|
|
printf(" Sector protection current value: 0x%02x\n", (uint) table[0]);
|
|
for (i= 1; i < TRANSLOG_PAGE_SIZE / DISK_DRIVE_SECTOR_SIZE; i++)
|
|
{
|
|
printf(" Sector protection in sector: 0x%02x saved value 0x%02x\n",
|
|
(uint)buffer[i * DISK_DRIVE_SECTOR_SIZE],
|
|
(uint)table[i]);
|
|
}
|
|
}
|
|
tfile.number= file;
|
|
bzero(&tfile.handler, sizeof(tfile.handler));
|
|
tfile.handler.file= handler;
|
|
tfile.was_recovered= 0;
|
|
tfile.is_sync= 1;
|
|
if (translog_check_sector_protection(buffer, &tfile))
|
|
printf(" WARNING: sector protection found problems!!!\n");
|
|
}
|
|
ptr= buffer + header_len;
|
|
while (ptr && ptr < buffer + TRANSLOG_PAGE_SIZE)
|
|
{
|
|
printf(" Chunk %d %lld:\n",
|
|
file,((longlong) (ptr - buffer)+ offset));
|
|
ptr= dump_chunk(buffer, ptr);
|
|
}
|
|
}
|
|
|
|
|
|
/**
|
|
@brief Dump information about page.
|
|
*/
|
|
|
|
void dump_page(uchar *buffer, File handler)
|
|
{
|
|
if (strncmp((char*)maria_trans_file_magic, (char*)buffer,
|
|
sizeof(maria_trans_file_magic)) == 0)
|
|
{
|
|
dump_header_page(buffer);
|
|
}
|
|
dump_datapage(buffer, handler);
|
|
}
|
|
|
|
|
|
/*
|
|
Handle backup calls
|
|
*/
|
|
|
|
void translog_disable_purge()
|
|
{
|
|
mysql_mutex_lock(&log_descriptor.purger_lock);
|
|
log_purge_disabled++;
|
|
mysql_mutex_unlock(&log_descriptor.purger_lock);
|
|
}
|
|
|
|
void translog_enable_purge()
|
|
{
|
|
mysql_mutex_lock(&log_descriptor.purger_lock);
|
|
log_purge_disabled--;
|
|
mysql_mutex_unlock(&log_descriptor.purger_lock);
|
|
}
|