mirror of
https://github.com/MariaDB/server.git
synced 2025-01-22 06:44:16 +01:00
59eaf292de
Added missing DBUG_xxx_RETURN statements Fixed some usage of not initialized variables (as found by valgrind) Ensure that we don't remove locked tables used as name locks from open table cache until unlock_table_names() are called. This was fixed by having drop_locked_name() returning any table used as a name lock so that we can free it in unlock_table_names() This will allow Tomas to continue with his work to use namelocks to syncronize things. Note: valgrind still produces a lot of warnings about using not initialized code and shows memory loss errors when running the ndb tests BitKeeper/etc/ignore: added mysql-test/r/*.log client/mysqltest.c: Change type of variables to get rid of compiler warnings More debugging Fix memory leak mysql-test/mysql-test-run.sh: Collect warnings about missing DBUG_RETURN statements mysql-test/r/lock_multi.result: Add test of new code mysql-test/r/ndb_condition_pushdown.result: Drop used tables before test mysql-test/t/lock_multi.test: Add test of new code mysql-test/t/ndb_condition_pushdown.test: Drop used tables before test mysql-test/valgrind.supp: Ignore 'safe' warnings from libz (when used with archive) sql/event.cc: More comments Simplify code Fixed memory leak found by valgrind sql/ha_archive.cc: Remove compiler warnings (Wrong handlerton structure and signed/unsigned comparison) sql/ha_berkeley.cc: Fixed compiler warning sql/ha_blackhole.cc: Fixed compiler warning sql/ha_federated.cc: Fixed compiler warning sql/ha_heap.cc: Fixed compiler warning sql/ha_myisam.cc: Fixed compiler warning sql/ha_myisammrg.cc: Fixed compiler warning sql/ha_ndbcluster.cc: Fixed compiler warnings sql/ha_partition.cc: Fixed compiler warning Fixed error noticed by valgrind in ha_partition::rnd_init() sql/handler.cc: Fixed compiler warning sql/handler.h: Fixed compiler warning sql/item.cc: Fixed compiler warning sql/item_xmlfunc.cc: Fixed warning from valgrind when calling memcpy with wrong address sql/lock.cc: More debugging sql/log.cc: Fixed compiler warning Indentation fixes sql/log.h: Fixed compiler warning sql/mysql_priv.h: Changed prototype for 'drop_locked_tables' sql/opt_range.cc: Indentation fix sql/password.c: Removed compiler warnings sql/set_var.cc: Fixed compiler warning sql/slave.cc: Fixed compiler warning sql/sp_head.cc: Fixed compiler warning sql/sql_acl.cc: Fixed compiler warning sql/sql_analyse.cc: Added missing DBUG_RETURN statements sql/sql_base.cc: Removed de-reference of not initialized pointer More comments drop_locked_tables() changed to not delete tables used for name locking Fixed compiler warnings sql/sql_delete.cc: Fixed usage of not initialized variable (deleted could be referenced in some not common error conditions) sql/sql_parse.cc: Added missing DBUG_VOID_RETURN Simplify code sql/sql_partition.cc: Fixed usage of wrong variable (noticed by valgrind) sql/sql_plugin.cc: Removed compiler warning sql/sql_show.cc: Removed compiler warning sql/sql_table.cc: Ensure that we don't remove locked tables used as name locks from open table cache until unlock_table_names() are called. This was fixed by having drop_locked_name() returning any table used as a name lock so that we can free it in unlock_table_names() This will allow Tomas to continue with his work to use namelocks to syncronize things. Fixed wrong test of 'table_type' (path_length could otherwise be accessed uninitialized) Remove compile warnings about not initialized variables. sql/sql_yacc.yy: Ensure that no_write_to_binlog is properly initialized (Was accessed uninitialized by partition code) sql/table.cc: Removed valgrind warnings (not fatal) Removed compiler warnings sql/tztime.cc: Removed valgrind warning storage/ndb/include/ndbapi/NdbIndexStat.hpp: Removed compiler warning
1499 lines
42 KiB
C++
1499 lines
42 KiB
C++
/* Copyright (C) 2003 MySQL AB
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation; either version 2 of the License, or
|
|
(at your option) any later version.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; if not, write to the Free Software
|
|
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
|
|
|
|
#ifdef USE_PRAGMA_IMPLEMENTATION
|
|
#pragma implementation // gcc: Class implementation
|
|
#endif
|
|
|
|
#include "mysql_priv.h"
|
|
|
|
#include "ha_archive.h"
|
|
#include <my_dir.h>
|
|
|
|
/*
|
|
First, if you want to understand storage engines you should look at
|
|
ha_example.cc and ha_example.h.
|
|
This example was written as a test case for a customer who needed
|
|
a storage engine without indexes that could compress data very well.
|
|
So, welcome to a completely compressed storage engine. This storage
|
|
engine only does inserts. No replace, deletes, or updates. All reads are
|
|
complete table scans. Compression is done through azip (bzip compresses
|
|
better, but only marginally, if someone asks I could add support for
|
|
it too, but beaware that it costs a lot more in CPU time then azip).
|
|
|
|
We keep a file pointer open for each instance of ha_archive for each read
|
|
but for writes we keep one open file handle just for that. We flush it
|
|
only if we have a read occur. azip handles compressing lots of records
|
|
at once much better then doing lots of little records between writes.
|
|
It is possible to not lock on writes but this would then mean we couldn't
|
|
handle bulk inserts as well (that is if someone was trying to read at
|
|
the same time since we would want to flush).
|
|
|
|
A "meta" file is kept alongside the data file. This file serves two purpose.
|
|
The first purpose is to track the number of rows in the table. The second
|
|
purpose is to determine if the table was closed properly or not. When the
|
|
meta file is first opened it is marked as dirty. It is opened when the table
|
|
itself is opened for writing. When the table is closed the new count for rows
|
|
is written to the meta file and the file is marked as clean. If the meta file
|
|
is opened and it is marked as dirty, it is assumed that a crash occured. At
|
|
this point an error occurs and the user is told to rebuild the file.
|
|
A rebuild scans the rows and rewrites the meta file. If corruption is found
|
|
in the data file then the meta file is not repaired.
|
|
|
|
At some point a recovery method for such a drastic case needs to be divised.
|
|
|
|
Locks are row level, and you will get a consistant read.
|
|
|
|
For performance as far as table scans go it is quite fast. I don't have
|
|
good numbers but locally it has out performed both Innodb and MyISAM. For
|
|
Innodb the question will be if the table can be fit into the buffer
|
|
pool. For MyISAM its a question of how much the file system caches the
|
|
MyISAM file. With enough free memory MyISAM is faster. Its only when the OS
|
|
doesn't have enough memory to cache entire table that archive turns out
|
|
to be any faster. For writes it is always a bit slower then MyISAM. It has no
|
|
internal limits though for row length.
|
|
|
|
Examples between MyISAM (packed) and Archive.
|
|
|
|
Table with 76695844 identical rows:
|
|
29680807 a_archive.ARZ
|
|
920350317 a.MYD
|
|
|
|
|
|
Table with 8991478 rows (all of Slashdot's comments):
|
|
1922964506 comment_archive.ARZ
|
|
2944970297 comment_text.MYD
|
|
|
|
|
|
TODO:
|
|
Add bzip optional support.
|
|
Allow users to set compression level.
|
|
Add truncate table command.
|
|
Implement versioning, should be easy.
|
|
Allow for errors, find a way to mark bad rows.
|
|
Talk to the azip guys, come up with a writable format so that updates are doable
|
|
without switching to a block method.
|
|
Add optional feature so that rows can be flushed at interval (which will cause less
|
|
compression but may speed up ordered searches).
|
|
Checkpoint the meta file to allow for faster rebuilds.
|
|
Dirty open (right now the meta file is repaired if a crash occured).
|
|
Option to allow for dirty reads, this would lower the sync calls, which would make
|
|
inserts a lot faster, but would mean highly arbitrary reads.
|
|
|
|
-Brian
|
|
*/
|
|
/*
|
|
Notes on file formats.
|
|
The Meta file is layed out as:
|
|
check - Just an int of 254 to make sure that the the file we are opening was
|
|
never corrupted.
|
|
version - The current version of the file format.
|
|
rows - This is an unsigned long long which is the number of rows in the data
|
|
file.
|
|
check point - Reserved for future use
|
|
auto increment - MAX value for autoincrement
|
|
dirty - Status of the file, whether or not its values are the latest. This
|
|
flag is what causes a repair to occur
|
|
|
|
The data file:
|
|
check - Just an int of 254 to make sure that the the file we are opening was
|
|
never corrupted.
|
|
version - The current version of the file format.
|
|
data - The data is stored in a "row +blobs" format.
|
|
*/
|
|
|
|
/* If the archive storage engine has been inited */
|
|
static bool archive_inited= FALSE;
|
|
/* Variables for archive share methods */
|
|
pthread_mutex_t archive_mutex;
|
|
static HASH archive_open_tables;
|
|
|
|
/* The file extension */
|
|
#define ARZ ".ARZ" // The data file
|
|
#define ARN ".ARN" // Files used during an optimize call
|
|
#define ARM ".ARM" // Meta file
|
|
/*
|
|
uchar + uchar + ulonglong + ulonglong + ulonglong + ulonglong + uchar
|
|
*/
|
|
#define META_BUFFER_SIZE sizeof(uchar) + sizeof(uchar) + sizeof(ulonglong) \
|
|
+ sizeof(ulonglong) + sizeof(ulonglong) + sizeof(ulonglong) + sizeof(uchar)
|
|
|
|
/*
|
|
uchar + uchar
|
|
*/
|
|
#define DATA_BUFFER_SIZE 2 // Size of the data used in the data file
|
|
#define ARCHIVE_CHECK_HEADER 254 // The number we use to determine corruption
|
|
|
|
/* Static declarations for handerton */
|
|
static handler *archive_create_handler(TABLE_SHARE *table);
|
|
/*
|
|
Number of rows that will force a bulk insert.
|
|
*/
|
|
#define ARCHIVE_MIN_ROWS_TO_USE_BULK_INSERT 2
|
|
|
|
|
|
/* dummy handlerton - only to have something to return from archive_db_init */
|
|
handlerton archive_hton = {
|
|
MYSQL_HANDLERTON_INTERFACE_VERSION,
|
|
"ARCHIVE",
|
|
SHOW_OPTION_YES,
|
|
"Archive storage engine",
|
|
DB_TYPE_ARCHIVE_DB,
|
|
archive_db_init,
|
|
0, /* slot */
|
|
0, /* savepoint size. */
|
|
NULL, /* close_connection */
|
|
NULL, /* savepoint */
|
|
NULL, /* rollback to savepoint */
|
|
NULL, /* releas savepoint */
|
|
NULL, /* commit */
|
|
NULL, /* rollback */
|
|
NULL, /* prepare */
|
|
NULL, /* recover */
|
|
NULL, /* commit_by_xid */
|
|
NULL, /* rollback_by_xid */
|
|
NULL, /* create_cursor_read_view */
|
|
NULL, /* set_cursor_read_view */
|
|
NULL, /* close_cursor_read_view */
|
|
archive_create_handler, /* Create a new handler */
|
|
NULL, /* Drop a database */
|
|
archive_db_end, /* Panic call */
|
|
NULL, /* Start Consistent Snapshot */
|
|
NULL, /* Flush logs */
|
|
NULL, /* Show status */
|
|
NULL, /* Partition flags */
|
|
NULL, /* Alter table flags */
|
|
NULL, /* Alter interface */
|
|
NULL, /* fill_files_table */
|
|
HTON_NO_FLAGS,
|
|
NULL, /* binlog_func */
|
|
NULL, /* binlog_log_query */
|
|
NULL /* release_temporary_latches */
|
|
|
|
};
|
|
|
|
static handler *archive_create_handler(TABLE_SHARE *table)
|
|
{
|
|
return new ha_archive(table);
|
|
}
|
|
|
|
/*
|
|
Used for hash table that tracks open tables.
|
|
*/
|
|
static byte* archive_get_key(ARCHIVE_SHARE *share,uint *length,
|
|
my_bool not_used __attribute__((unused)))
|
|
{
|
|
*length=share->table_name_length;
|
|
return (byte*) share->table_name;
|
|
}
|
|
|
|
|
|
/*
|
|
Initialize the archive handler.
|
|
|
|
SYNOPSIS
|
|
archive_db_init()
|
|
void
|
|
|
|
RETURN
|
|
FALSE OK
|
|
TRUE Error
|
|
*/
|
|
|
|
bool archive_db_init()
|
|
{
|
|
DBUG_ENTER("archive_db_init");
|
|
if (pthread_mutex_init(&archive_mutex, MY_MUTEX_INIT_FAST))
|
|
goto error;
|
|
if (hash_init(&archive_open_tables, system_charset_info, 32, 0, 0,
|
|
(hash_get_key) archive_get_key, 0, 0))
|
|
{
|
|
VOID(pthread_mutex_destroy(&archive_mutex));
|
|
}
|
|
else
|
|
{
|
|
archive_inited= TRUE;
|
|
DBUG_RETURN(FALSE);
|
|
}
|
|
error:
|
|
have_archive_db= SHOW_OPTION_DISABLED; // If we couldn't use handler
|
|
DBUG_RETURN(TRUE);
|
|
}
|
|
|
|
/*
|
|
Release the archive handler.
|
|
|
|
SYNOPSIS
|
|
archive_db_end()
|
|
void
|
|
|
|
RETURN
|
|
FALSE OK
|
|
*/
|
|
|
|
int archive_db_end(ha_panic_function type)
|
|
{
|
|
if (archive_inited)
|
|
{
|
|
hash_free(&archive_open_tables);
|
|
VOID(pthread_mutex_destroy(&archive_mutex));
|
|
}
|
|
archive_inited= 0;
|
|
return 0;
|
|
}
|
|
|
|
ha_archive::ha_archive(TABLE_SHARE *table_arg)
|
|
:handler(&archive_hton, table_arg), delayed_insert(0), bulk_insert(0)
|
|
{
|
|
/* Set our original buffer from pre-allocated memory */
|
|
buffer.set((char *)byte_buffer, IO_SIZE, system_charset_info);
|
|
|
|
/* The size of the offset value we will use for position() */
|
|
ref_length = sizeof(my_off_t);
|
|
}
|
|
|
|
/*
|
|
This method reads the header of a datafile and returns whether or not it was successful.
|
|
*/
|
|
int ha_archive::read_data_header(azio_stream *file_to_read)
|
|
{
|
|
uchar data_buffer[DATA_BUFFER_SIZE];
|
|
DBUG_ENTER("ha_archive::read_data_header");
|
|
|
|
if (azrewind(file_to_read) == -1)
|
|
DBUG_RETURN(HA_ERR_CRASHED_ON_USAGE);
|
|
|
|
if (azread(file_to_read, data_buffer, DATA_BUFFER_SIZE) != DATA_BUFFER_SIZE)
|
|
DBUG_RETURN(errno ? errno : -1);
|
|
|
|
DBUG_PRINT("ha_archive::read_data_header", ("Check %u", data_buffer[0]));
|
|
DBUG_PRINT("ha_archive::read_data_header", ("Version %u", data_buffer[1]));
|
|
|
|
if ((data_buffer[0] != (uchar)ARCHIVE_CHECK_HEADER) &&
|
|
(data_buffer[1] != (uchar)ARCHIVE_VERSION))
|
|
DBUG_RETURN(HA_ERR_CRASHED_ON_USAGE);
|
|
|
|
DBUG_RETURN(0);
|
|
}
|
|
|
|
/*
|
|
This method writes out the header of a datafile and returns whether or not it was successful.
|
|
*/
|
|
int ha_archive::write_data_header(azio_stream *file_to_write)
|
|
{
|
|
uchar data_buffer[DATA_BUFFER_SIZE];
|
|
DBUG_ENTER("ha_archive::write_data_header");
|
|
|
|
data_buffer[0]= (uchar)ARCHIVE_CHECK_HEADER;
|
|
data_buffer[1]= (uchar)ARCHIVE_VERSION;
|
|
|
|
if (azwrite(file_to_write, &data_buffer, DATA_BUFFER_SIZE) !=
|
|
DATA_BUFFER_SIZE)
|
|
goto error;
|
|
DBUG_PRINT("ha_archive::write_data_header", ("Check %u", (uint)data_buffer[0]));
|
|
DBUG_PRINT("ha_archive::write_data_header", ("Version %u", (uint)data_buffer[1]));
|
|
|
|
DBUG_RETURN(0);
|
|
error:
|
|
DBUG_RETURN(errno);
|
|
}
|
|
|
|
/*
|
|
This method reads the header of a meta file and returns whether or not it was successful.
|
|
*rows will contain the current number of rows in the data file upon success.
|
|
*/
|
|
int ha_archive::read_meta_file(File meta_file, ha_rows *rows,
|
|
ulonglong *auto_increment,
|
|
ulonglong *forced_flushes)
|
|
{
|
|
uchar meta_buffer[META_BUFFER_SIZE];
|
|
uchar *ptr= meta_buffer;
|
|
ulonglong check_point;
|
|
|
|
DBUG_ENTER("ha_archive::read_meta_file");
|
|
|
|
VOID(my_seek(meta_file, 0, MY_SEEK_SET, MYF(0)));
|
|
if (my_read(meta_file, (byte*)meta_buffer, META_BUFFER_SIZE, 0) != META_BUFFER_SIZE)
|
|
DBUG_RETURN(-1);
|
|
|
|
/*
|
|
Parse out the meta data, we ignore version at the moment
|
|
*/
|
|
|
|
ptr+= sizeof(uchar)*2; // Move past header
|
|
*rows= (ha_rows)uint8korr(ptr);
|
|
ptr+= sizeof(ulonglong); // Move past rows
|
|
check_point= uint8korr(ptr);
|
|
ptr+= sizeof(ulonglong); // Move past check_point
|
|
*auto_increment= uint8korr(ptr);
|
|
ptr+= sizeof(ulonglong); // Move past auto_increment
|
|
*forced_flushes= uint8korr(ptr);
|
|
ptr+= sizeof(ulonglong); // Move past forced_flush
|
|
|
|
DBUG_PRINT("ha_archive::read_meta_file", ("Check %d", (uint)meta_buffer[0]));
|
|
DBUG_PRINT("ha_archive::read_meta_file", ("Version %d", (uint)meta_buffer[1]));
|
|
DBUG_PRINT("ha_archive::read_meta_file", ("Rows %llu", *rows));
|
|
DBUG_PRINT("ha_archive::read_meta_file", ("Checkpoint %llu", check_point));
|
|
DBUG_PRINT("ha_archive::read_meta_file", ("Auto-Increment %llu", *auto_increment));
|
|
DBUG_PRINT("ha_archive::read_meta_file", ("Forced Flushes %llu", *forced_flushes));
|
|
DBUG_PRINT("ha_archive::read_meta_file", ("Dirty %d", (int)(*ptr)));
|
|
|
|
if ((meta_buffer[0] != (uchar)ARCHIVE_CHECK_HEADER) ||
|
|
((bool)(*ptr)== TRUE))
|
|
DBUG_RETURN(HA_ERR_CRASHED_ON_USAGE);
|
|
|
|
my_sync(meta_file, MYF(MY_WME));
|
|
|
|
DBUG_RETURN(0);
|
|
}
|
|
|
|
/*
|
|
This method writes out the header of a meta file and returns whether or not it was successful.
|
|
By setting dirty you say whether or not the file represents the actual state of the data file.
|
|
Upon ::open() we set to dirty, and upon ::close() we set to clean.
|
|
*/
|
|
int ha_archive::write_meta_file(File meta_file, ha_rows rows,
|
|
ulonglong auto_increment,
|
|
ulonglong forced_flushes,
|
|
bool dirty)
|
|
{
|
|
uchar meta_buffer[META_BUFFER_SIZE];
|
|
uchar *ptr= meta_buffer;
|
|
ulonglong check_point= 0; //Reserved for the future
|
|
|
|
DBUG_ENTER("ha_archive::write_meta_file");
|
|
|
|
*ptr= (uchar)ARCHIVE_CHECK_HEADER;
|
|
ptr += sizeof(uchar);
|
|
*ptr= (uchar)ARCHIVE_VERSION;
|
|
ptr += sizeof(uchar);
|
|
int8store(ptr, (ulonglong)rows);
|
|
ptr += sizeof(ulonglong);
|
|
int8store(ptr, check_point);
|
|
ptr += sizeof(ulonglong);
|
|
int8store(ptr, auto_increment);
|
|
ptr += sizeof(ulonglong);
|
|
int8store(ptr, forced_flushes);
|
|
ptr += sizeof(ulonglong);
|
|
*ptr= (uchar)dirty;
|
|
DBUG_PRINT("ha_archive::write_meta_file", ("Check %d",
|
|
(uint)ARCHIVE_CHECK_HEADER));
|
|
DBUG_PRINT("ha_archive::write_meta_file", ("Version %d",
|
|
(uint)ARCHIVE_VERSION));
|
|
DBUG_PRINT("ha_archive::write_meta_file", ("Rows %llu", (ulonglong)rows));
|
|
DBUG_PRINT("ha_archive::write_meta_file", ("Checkpoint %llu", check_point));
|
|
DBUG_PRINT("ha_archive::write_meta_file", ("Auto Increment %llu",
|
|
auto_increment));
|
|
DBUG_PRINT("ha_archive::write_meta_file", ("Forced Flushes %llu",
|
|
forced_flushes));
|
|
DBUG_PRINT("ha_archive::write_meta_file", ("Dirty %d", (uint)dirty));
|
|
|
|
VOID(my_seek(meta_file, 0, MY_SEEK_SET, MYF(0)));
|
|
if (my_write(meta_file, (byte *)meta_buffer, META_BUFFER_SIZE, 0) != META_BUFFER_SIZE)
|
|
DBUG_RETURN(-1);
|
|
|
|
my_sync(meta_file, MYF(MY_WME));
|
|
|
|
DBUG_RETURN(0);
|
|
}
|
|
|
|
|
|
/*
|
|
We create the shared memory space that we will use for the open table.
|
|
No matter what we try to get or create a share. This is so that a repair
|
|
table operation can occur.
|
|
|
|
See ha_example.cc for a longer description.
|
|
*/
|
|
ARCHIVE_SHARE *ha_archive::get_share(const char *table_name,
|
|
TABLE *table, int *rc)
|
|
{
|
|
ARCHIVE_SHARE *share;
|
|
char meta_file_name[FN_REFLEN];
|
|
uint length;
|
|
char *tmp_name;
|
|
DBUG_ENTER("ha_archive::get_share");
|
|
|
|
pthread_mutex_lock(&archive_mutex);
|
|
length=(uint) strlen(table_name);
|
|
|
|
if (!(share=(ARCHIVE_SHARE*) hash_search(&archive_open_tables,
|
|
(byte*) table_name,
|
|
length)))
|
|
{
|
|
if (!my_multi_malloc(MYF(MY_WME | MY_ZEROFILL),
|
|
&share, sizeof(*share),
|
|
&tmp_name, length+1,
|
|
NullS))
|
|
{
|
|
pthread_mutex_unlock(&archive_mutex);
|
|
*rc= HA_ERR_OUT_OF_MEM;
|
|
DBUG_RETURN(NULL);
|
|
}
|
|
|
|
share->use_count= 0;
|
|
share->table_name_length= length;
|
|
share->table_name= tmp_name;
|
|
share->crashed= FALSE;
|
|
fn_format(share->data_file_name,table_name,"",ARZ,MY_REPLACE_EXT|MY_UNPACK_FILENAME);
|
|
fn_format(meta_file_name,table_name,"",ARM,MY_REPLACE_EXT|MY_UNPACK_FILENAME);
|
|
strmov(share->table_name,table_name);
|
|
/*
|
|
We will use this lock for rows.
|
|
*/
|
|
VOID(pthread_mutex_init(&share->mutex,MY_MUTEX_INIT_FAST));
|
|
if ((share->meta_file= my_open(meta_file_name, O_RDWR, MYF(0))) == -1)
|
|
share->crashed= TRUE;
|
|
|
|
/*
|
|
After we read, we set the file to dirty. When we close, we will do the
|
|
opposite. If the meta file will not open we assume it is crashed and
|
|
leave it up to the user to fix.
|
|
*/
|
|
if (read_meta_file(share->meta_file, &share->rows_recorded,
|
|
&share->auto_increment_value,
|
|
&share->forced_flushes))
|
|
share->crashed= TRUE;
|
|
else
|
|
(void)write_meta_file(share->meta_file, share->rows_recorded,
|
|
share->auto_increment_value,
|
|
share->forced_flushes,
|
|
TRUE);
|
|
/*
|
|
It is expensive to open and close the data files and since you can't have
|
|
a gzip file that can be both read and written we keep a writer open
|
|
that is shared amoung all open tables.
|
|
*/
|
|
if (!(azopen(&(share->archive_write), share->data_file_name,
|
|
O_WRONLY|O_APPEND|O_BINARY)))
|
|
{
|
|
DBUG_PRINT("info", ("Could not open archive write file"));
|
|
share->crashed= TRUE;
|
|
}
|
|
VOID(my_hash_insert(&archive_open_tables, (byte*) share));
|
|
thr_lock_init(&share->lock);
|
|
}
|
|
share->use_count++;
|
|
DBUG_PRINT("info", ("archive table %.*s has %d open handles now",
|
|
share->table_name_length, share->table_name,
|
|
share->use_count));
|
|
if (share->crashed)
|
|
*rc= HA_ERR_CRASHED_ON_USAGE;
|
|
pthread_mutex_unlock(&archive_mutex);
|
|
|
|
DBUG_RETURN(share);
|
|
}
|
|
|
|
|
|
/*
|
|
Free the share.
|
|
See ha_example.cc for a description.
|
|
*/
|
|
int ha_archive::free_share(ARCHIVE_SHARE *share)
|
|
{
|
|
int rc= 0;
|
|
DBUG_ENTER("ha_archive::free_share");
|
|
DBUG_PRINT("info", ("archive table %.*s has %d open handles on entrance",
|
|
share->table_name_length, share->table_name,
|
|
share->use_count));
|
|
|
|
pthread_mutex_lock(&archive_mutex);
|
|
if (!--share->use_count)
|
|
{
|
|
hash_delete(&archive_open_tables, (byte*) share);
|
|
thr_lock_delete(&share->lock);
|
|
VOID(pthread_mutex_destroy(&share->mutex));
|
|
/*
|
|
We need to make sure we don't reset the crashed state.
|
|
If we open a crashed file, wee need to close it as crashed unless
|
|
it has been repaired.
|
|
Since we will close the data down after this, we go on and count
|
|
the flush on close;
|
|
*/
|
|
share->forced_flushes++;
|
|
(void)write_meta_file(share->meta_file, share->rows_recorded,
|
|
share->auto_increment_value,
|
|
share->forced_flushes,
|
|
share->crashed ? TRUE :FALSE);
|
|
if (azclose(&(share->archive_write)))
|
|
rc= 1;
|
|
if (my_close(share->meta_file, MYF(0)))
|
|
rc= 1;
|
|
my_free((gptr) share, MYF(0));
|
|
}
|
|
pthread_mutex_unlock(&archive_mutex);
|
|
|
|
DBUG_RETURN(rc);
|
|
}
|
|
|
|
|
|
/*
|
|
We just implement one additional file extension.
|
|
*/
|
|
static const char *ha_archive_exts[] = {
|
|
ARZ,
|
|
ARM,
|
|
NullS
|
|
};
|
|
|
|
const char **ha_archive::bas_ext() const
|
|
{
|
|
return ha_archive_exts;
|
|
}
|
|
|
|
|
|
/*
|
|
When opening a file we:
|
|
Create/get our shared structure.
|
|
Init out lock.
|
|
We open the file we will read from.
|
|
*/
|
|
int ha_archive::open(const char *name, int mode, uint open_options)
|
|
{
|
|
int rc= 0;
|
|
DBUG_ENTER("ha_archive::open");
|
|
|
|
DBUG_PRINT("info", ("archive table was opened for crash %s",
|
|
(open_options & HA_OPEN_FOR_REPAIR) ? "yes" : "no"));
|
|
share= get_share(name, table, &rc);
|
|
|
|
if (rc == HA_ERR_CRASHED_ON_USAGE && !(open_options & HA_OPEN_FOR_REPAIR))
|
|
{
|
|
free_share(share);
|
|
DBUG_RETURN(rc);
|
|
}
|
|
else if (rc == HA_ERR_OUT_OF_MEM)
|
|
{
|
|
DBUG_RETURN(rc);
|
|
}
|
|
|
|
thr_lock_data_init(&share->lock,&lock,NULL);
|
|
|
|
if (!(azopen(&archive, share->data_file_name, O_RDONLY|O_BINARY)))
|
|
{
|
|
if (errno == EROFS || errno == EACCES)
|
|
DBUG_RETURN(my_errno= errno);
|
|
DBUG_RETURN(HA_ERR_CRASHED_ON_USAGE);
|
|
}
|
|
|
|
DBUG_PRINT("info", ("archive table was crashed %s",
|
|
rc == HA_ERR_CRASHED_ON_USAGE ? "yes" : "no"));
|
|
if (rc == HA_ERR_CRASHED_ON_USAGE && open_options & HA_OPEN_FOR_REPAIR)
|
|
{
|
|
DBUG_RETURN(0);
|
|
}
|
|
else
|
|
DBUG_RETURN(rc);
|
|
}
|
|
|
|
|
|
/*
|
|
Closes the file.
|
|
|
|
SYNOPSIS
|
|
close();
|
|
|
|
IMPLEMENTATION:
|
|
|
|
We first close this storage engines file handle to the archive and
|
|
then remove our reference count to the table (and possibly free it
|
|
as well).
|
|
|
|
RETURN
|
|
0 ok
|
|
1 Error
|
|
*/
|
|
|
|
int ha_archive::close(void)
|
|
{
|
|
int rc= 0;
|
|
DBUG_ENTER("ha_archive::close");
|
|
|
|
/* First close stream */
|
|
if (azclose(&archive))
|
|
rc= 1;
|
|
/* then also close share */
|
|
rc|= free_share(share);
|
|
|
|
DBUG_RETURN(rc);
|
|
}
|
|
|
|
|
|
/*
|
|
We create our data file here. The format is pretty simple.
|
|
You can read about the format of the data file above.
|
|
Unlike other storage engines we do not "pack" our data. Since we
|
|
are about to do a general compression, packing would just be a waste of
|
|
CPU time. If the table has blobs they are written after the row in the order
|
|
of creation.
|
|
*/
|
|
|
|
int ha_archive::create(const char *name, TABLE *table_arg,
|
|
HA_CREATE_INFO *create_info)
|
|
{
|
|
File create_file; // We use to create the datafile and the metafile
|
|
char name_buff[FN_REFLEN];
|
|
int error;
|
|
DBUG_ENTER("ha_archive::create");
|
|
|
|
auto_increment_value= (create_info->auto_increment_value ?
|
|
create_info->auto_increment_value -1 :
|
|
(ulonglong) 0);
|
|
|
|
if ((create_file= my_create(fn_format(name_buff,name,"",ARM,
|
|
MY_REPLACE_EXT|MY_UNPACK_FILENAME),0,
|
|
O_RDWR | O_TRUNC,MYF(MY_WME))) < 0)
|
|
{
|
|
error= my_errno;
|
|
goto error;
|
|
}
|
|
|
|
for (uint key= 0; key < table_arg->s->keys; key++)
|
|
{
|
|
KEY *pos= table_arg->key_info+key;
|
|
KEY_PART_INFO *key_part= pos->key_part;
|
|
KEY_PART_INFO *key_part_end= key_part + pos->key_parts;
|
|
|
|
for (; key_part != key_part_end; key_part++)
|
|
{
|
|
Field *field= key_part->field;
|
|
|
|
if (!(field->flags & AUTO_INCREMENT_FLAG))
|
|
{
|
|
error= -1;
|
|
goto error;
|
|
}
|
|
}
|
|
}
|
|
|
|
write_meta_file(create_file, 0, auto_increment_value, 0, FALSE);
|
|
my_close(create_file,MYF(0));
|
|
|
|
/*
|
|
We reuse name_buff since it is available.
|
|
*/
|
|
if ((create_file= my_create(fn_format(name_buff,name,"",ARZ,
|
|
MY_REPLACE_EXT|MY_UNPACK_FILENAME),0,
|
|
O_RDWR | O_TRUNC,MYF(MY_WME))) < 0)
|
|
{
|
|
error= my_errno;
|
|
goto error;
|
|
}
|
|
if (!azdopen(&archive, create_file, O_WRONLY|O_BINARY))
|
|
{
|
|
error= errno;
|
|
goto error2;
|
|
}
|
|
if (write_data_header(&archive))
|
|
{
|
|
error= errno;
|
|
goto error3;
|
|
}
|
|
|
|
if (azclose(&archive))
|
|
{
|
|
error= errno;
|
|
goto error2;
|
|
}
|
|
|
|
DBUG_RETURN(0);
|
|
|
|
error3:
|
|
/* We already have an error, so ignore results of azclose. */
|
|
(void)azclose(&archive);
|
|
error2:
|
|
my_close(create_file, MYF(0));
|
|
delete_table(name);
|
|
error:
|
|
/* Return error number, if we got one */
|
|
DBUG_RETURN(error ? error : -1);
|
|
}
|
|
|
|
/*
|
|
This is where the actual row is written out.
|
|
*/
|
|
int ha_archive::real_write_row(byte *buf, azio_stream *writer)
|
|
{
|
|
my_off_t written;
|
|
uint *ptr, *end;
|
|
DBUG_ENTER("ha_archive::real_write_row");
|
|
|
|
written= azwrite(writer, buf, table->s->reclength);
|
|
DBUG_PRINT("ha_archive::real_write_row", ("Wrote %d bytes expected %d",
|
|
written, table->s->reclength));
|
|
if (!delayed_insert || !bulk_insert)
|
|
share->dirty= TRUE;
|
|
|
|
if (written != (my_off_t)table->s->reclength)
|
|
DBUG_RETURN(errno ? errno : -1);
|
|
/*
|
|
We should probably mark the table as damagaged if the record is written
|
|
but the blob fails.
|
|
*/
|
|
for (ptr= table->s->blob_field, end= ptr + table->s->blob_fields ;
|
|
ptr != end ;
|
|
ptr++)
|
|
{
|
|
char *data_ptr;
|
|
uint32 size= ((Field_blob*) table->field[*ptr])->get_length();
|
|
|
|
if (size)
|
|
{
|
|
((Field_blob*) table->field[*ptr])->get_ptr(&data_ptr);
|
|
written= azwrite(writer, data_ptr, (unsigned)size);
|
|
if (written != (my_off_t)size)
|
|
DBUG_RETURN(errno ? errno : -1);
|
|
}
|
|
}
|
|
DBUG_RETURN(0);
|
|
}
|
|
|
|
|
|
/*
|
|
Look at ha_archive::open() for an explanation of the row format.
|
|
Here we just write out the row.
|
|
|
|
Wondering about start_bulk_insert()? We don't implement it for
|
|
archive since it optimizes for lots of writes. The only save
|
|
for implementing start_bulk_insert() is that we could skip
|
|
setting dirty to true each time.
|
|
*/
|
|
int ha_archive::write_row(byte *buf)
|
|
{
|
|
int rc;
|
|
byte *read_buf= NULL;
|
|
ulonglong temp_auto;
|
|
DBUG_ENTER("ha_archive::write_row");
|
|
|
|
if (share->crashed)
|
|
DBUG_RETURN(HA_ERR_CRASHED_ON_USAGE);
|
|
|
|
statistic_increment(table->in_use->status_var.ha_write_count, &LOCK_status);
|
|
if (table->timestamp_field_type & TIMESTAMP_AUTO_SET_ON_INSERT)
|
|
table->timestamp_field->set_time();
|
|
pthread_mutex_lock(&share->mutex);
|
|
|
|
if (table->next_number_field)
|
|
{
|
|
KEY *mkey= &table->s->key_info[0]; // We only support one key right now
|
|
update_auto_increment();
|
|
temp_auto= table->next_number_field->val_int();
|
|
|
|
/*
|
|
Bad news, this will cause a search for the unique value which is very
|
|
expensive since we will have to do a table scan which will lock up
|
|
all other writers during this period. This could perhaps be optimized
|
|
in the future.
|
|
*/
|
|
if (temp_auto == share->auto_increment_value &&
|
|
mkey->flags & HA_NOSAME)
|
|
{
|
|
rc= HA_ERR_FOUND_DUPP_KEY;
|
|
goto error;
|
|
}
|
|
|
|
if (temp_auto < share->auto_increment_value &&
|
|
mkey->flags & HA_NOSAME)
|
|
{
|
|
/*
|
|
First we create a buffer that we can use for reading rows, and can pass
|
|
to get_row().
|
|
*/
|
|
if (!(read_buf= (byte*) my_malloc(table->s->reclength, MYF(MY_WME))))
|
|
{
|
|
rc= HA_ERR_OUT_OF_MEM;
|
|
goto error;
|
|
}
|
|
/*
|
|
All of the buffer must be written out or we won't see all of the
|
|
data
|
|
*/
|
|
azflush(&(share->archive_write), Z_SYNC_FLUSH);
|
|
share->forced_flushes++;
|
|
/*
|
|
Set the position of the local read thread to the beginning postion.
|
|
*/
|
|
if (read_data_header(&archive))
|
|
{
|
|
rc= HA_ERR_CRASHED_ON_USAGE;
|
|
goto error;
|
|
}
|
|
|
|
/*
|
|
Now we read and check all of the rows.
|
|
if (!memcmp(table->next_number_field->ptr, mfield->ptr, mfield->max_length()))
|
|
if ((longlong)temp_auto ==
|
|
mfield->val_int((char*)(read_buf + mfield->offset())))
|
|
*/
|
|
Field *mfield= table->next_number_field;
|
|
|
|
while (!(get_row(&archive, read_buf)))
|
|
{
|
|
if (!memcmp(read_buf + mfield->offset(), table->next_number_field->ptr,
|
|
mfield->max_length()))
|
|
{
|
|
rc= HA_ERR_FOUND_DUPP_KEY;
|
|
goto error;
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
if (temp_auto > share->auto_increment_value)
|
|
auto_increment_value= share->auto_increment_value= temp_auto;
|
|
}
|
|
}
|
|
|
|
/*
|
|
Notice that the global auto_increment has been increased.
|
|
In case of a failed row write, we will never try to reuse the value.
|
|
*/
|
|
|
|
share->rows_recorded++;
|
|
rc= real_write_row(buf, &(share->archive_write));
|
|
error:
|
|
pthread_mutex_unlock(&share->mutex);
|
|
if (read_buf)
|
|
my_free((gptr) read_buf, MYF(0));
|
|
|
|
DBUG_RETURN(rc);
|
|
}
|
|
|
|
|
|
ulonglong ha_archive::get_auto_increment()
|
|
{
|
|
return share->auto_increment_value + 1;
|
|
}
|
|
|
|
/* Initialized at each key walk (called multiple times unlike rnd_init()) */
|
|
int ha_archive::index_init(uint keynr, bool sorted)
|
|
{
|
|
DBUG_ENTER("ha_archive::index_init");
|
|
active_index= keynr;
|
|
DBUG_RETURN(0);
|
|
}
|
|
|
|
|
|
/*
|
|
No indexes, so if we get a request for an index search since we tell
|
|
the optimizer that we have unique indexes, we scan
|
|
*/
|
|
int ha_archive::index_read(byte *buf, const byte *key,
|
|
uint key_len, enum ha_rkey_function find_flag)
|
|
{
|
|
int rc;
|
|
DBUG_ENTER("ha_archive::index_read");
|
|
rc= index_read_idx(buf, active_index, key, key_len, find_flag);
|
|
DBUG_RETURN(rc);
|
|
}
|
|
|
|
|
|
int ha_archive::index_read_idx(byte *buf, uint index, const byte *key,
|
|
uint key_len, enum ha_rkey_function find_flag)
|
|
{
|
|
int rc= 0;
|
|
bool found= 0;
|
|
KEY *mkey= &table->s->key_info[index];
|
|
current_k_offset= mkey->key_part->offset;
|
|
current_key= key;
|
|
current_key_len= key_len;
|
|
|
|
|
|
DBUG_ENTER("ha_archive::index_read_idx");
|
|
|
|
/*
|
|
All of the buffer must be written out or we won't see all of the
|
|
data
|
|
*/
|
|
pthread_mutex_lock(&share->mutex);
|
|
azflush(&(share->archive_write), Z_SYNC_FLUSH);
|
|
share->forced_flushes++;
|
|
pthread_mutex_unlock(&share->mutex);
|
|
|
|
/*
|
|
Set the position of the local read thread to the beginning postion.
|
|
*/
|
|
if (read_data_header(&archive))
|
|
{
|
|
rc= HA_ERR_CRASHED_ON_USAGE;
|
|
goto error;
|
|
}
|
|
|
|
while (!(get_row(&archive, buf)))
|
|
{
|
|
if (!memcmp(current_key, buf + current_k_offset, current_key_len))
|
|
{
|
|
found= 1;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (found)
|
|
DBUG_RETURN(0);
|
|
|
|
error:
|
|
DBUG_RETURN(rc ? rc : HA_ERR_END_OF_FILE);
|
|
}
|
|
|
|
|
|
int ha_archive::index_next(byte * buf)
|
|
{
|
|
bool found= 0;
|
|
|
|
DBUG_ENTER("ha_archive::index_next");
|
|
|
|
while (!(get_row(&archive, buf)))
|
|
{
|
|
if (!memcmp(current_key, buf+current_k_offset, current_key_len))
|
|
{
|
|
found= 1;
|
|
break;
|
|
}
|
|
}
|
|
|
|
DBUG_RETURN(found ? 0 : HA_ERR_END_OF_FILE);
|
|
}
|
|
|
|
/*
|
|
All calls that need to scan the table start with this method. If we are told
|
|
that it is a table scan we rewind the file to the beginning, otherwise
|
|
we assume the position will be set.
|
|
*/
|
|
|
|
int ha_archive::rnd_init(bool scan)
|
|
{
|
|
DBUG_ENTER("ha_archive::rnd_init");
|
|
|
|
if (share->crashed)
|
|
DBUG_RETURN(HA_ERR_CRASHED_ON_USAGE);
|
|
|
|
/* We rewind the file so that we can read from the beginning if scan */
|
|
if (scan)
|
|
{
|
|
scan_rows= share->rows_recorded;
|
|
DBUG_PRINT("info", ("archive will retrieve %llu rows", scan_rows));
|
|
records= 0;
|
|
|
|
/*
|
|
If dirty, we lock, and then reset/flush the data.
|
|
I found that just calling azflush() doesn't always work.
|
|
*/
|
|
if (share->dirty == TRUE)
|
|
{
|
|
pthread_mutex_lock(&share->mutex);
|
|
if (share->dirty == TRUE)
|
|
{
|
|
DBUG_PRINT("info", ("archive flushing out rows for scan"));
|
|
azflush(&(share->archive_write), Z_SYNC_FLUSH);
|
|
share->forced_flushes++;
|
|
share->dirty= FALSE;
|
|
}
|
|
pthread_mutex_unlock(&share->mutex);
|
|
}
|
|
|
|
if (read_data_header(&archive))
|
|
DBUG_RETURN(HA_ERR_CRASHED_ON_USAGE);
|
|
}
|
|
|
|
DBUG_RETURN(0);
|
|
}
|
|
|
|
|
|
/*
|
|
This is the method that is used to read a row. It assumes that the row is
|
|
positioned where you want it.
|
|
*/
|
|
int ha_archive::get_row(azio_stream *file_to_read, byte *buf)
|
|
{
|
|
int read; // Bytes read, azread() returns int
|
|
uint *ptr, *end;
|
|
char *last;
|
|
size_t total_blob_length= 0;
|
|
DBUG_ENTER("ha_archive::get_row");
|
|
|
|
read= azread(file_to_read, buf, table->s->reclength);
|
|
DBUG_PRINT("ha_archive::get_row", ("Read %d bytes expected %d", read,
|
|
table->s->reclength));
|
|
|
|
if (read == Z_STREAM_ERROR)
|
|
DBUG_RETURN(HA_ERR_CRASHED_ON_USAGE);
|
|
|
|
/* If we read nothing we are at the end of the file */
|
|
if (read == 0)
|
|
DBUG_RETURN(HA_ERR_END_OF_FILE);
|
|
|
|
/*
|
|
If the record is the wrong size, the file is probably damaged, unless
|
|
we are dealing with a delayed insert or a bulk insert.
|
|
*/
|
|
if ((ulong) read != table->s->reclength)
|
|
DBUG_RETURN(HA_ERR_END_OF_FILE);
|
|
|
|
/* Calculate blob length, we use this for our buffer */
|
|
for (ptr= table->s->blob_field, end=ptr + table->s->blob_fields ;
|
|
ptr != end ;
|
|
ptr++)
|
|
{
|
|
if (ha_get_bit_in_read_set(((Field_blob*) table->field[*ptr])->fieldnr))
|
|
total_blob_length += ((Field_blob*) table->field[*ptr])->get_length();
|
|
}
|
|
|
|
/* Adjust our row buffer if we need be */
|
|
buffer.alloc(total_blob_length);
|
|
last= (char *)buffer.ptr();
|
|
|
|
/* Loop through our blobs and read them */
|
|
for (ptr= table->s->blob_field, end=ptr + table->s->blob_fields ;
|
|
ptr != end ;
|
|
ptr++)
|
|
{
|
|
size_t size= ((Field_blob*) table->field[*ptr])->get_length();
|
|
if (size)
|
|
{
|
|
if (ha_get_bit_in_read_set(((Field_blob*) table->field[*ptr])->fieldnr))
|
|
{
|
|
read= azread(file_to_read, last, size);
|
|
if ((size_t) read != size)
|
|
DBUG_RETURN(HA_ERR_END_OF_FILE);
|
|
((Field_blob*) table->field[*ptr])->set_ptr(size, last);
|
|
last += size;
|
|
}
|
|
else
|
|
{
|
|
(void)azseek(file_to_read, size, SEEK_CUR);
|
|
}
|
|
}
|
|
}
|
|
DBUG_RETURN(0);
|
|
}
|
|
|
|
|
|
/*
|
|
Called during ORDER BY. Its position is either from being called sequentially
|
|
or by having had ha_archive::rnd_pos() called before it is called.
|
|
*/
|
|
|
|
int ha_archive::rnd_next(byte *buf)
|
|
{
|
|
int rc;
|
|
DBUG_ENTER("ha_archive::rnd_next");
|
|
|
|
if (share->crashed)
|
|
DBUG_RETURN(HA_ERR_CRASHED_ON_USAGE);
|
|
|
|
if (!scan_rows)
|
|
DBUG_RETURN(HA_ERR_END_OF_FILE);
|
|
scan_rows--;
|
|
|
|
statistic_increment(table->in_use->status_var.ha_read_rnd_next_count,
|
|
&LOCK_status);
|
|
current_position= aztell(&archive);
|
|
rc= get_row(&archive, buf);
|
|
|
|
|
|
if (rc != HA_ERR_END_OF_FILE)
|
|
records++;
|
|
|
|
DBUG_RETURN(rc);
|
|
}
|
|
|
|
|
|
/*
|
|
Thanks to the table flag HA_REC_NOT_IN_SEQ this will be called after
|
|
each call to ha_archive::rnd_next() if an ordering of the rows is
|
|
needed.
|
|
*/
|
|
|
|
void ha_archive::position(const byte *record)
|
|
{
|
|
DBUG_ENTER("ha_archive::position");
|
|
my_store_ptr(ref, ref_length, current_position);
|
|
DBUG_VOID_RETURN;
|
|
}
|
|
|
|
|
|
/*
|
|
This is called after a table scan for each row if the results of the
|
|
scan need to be ordered. It will take *pos and use it to move the
|
|
cursor in the file so that the next row that is called is the
|
|
correctly ordered row.
|
|
*/
|
|
|
|
int ha_archive::rnd_pos(byte * buf, byte *pos)
|
|
{
|
|
DBUG_ENTER("ha_archive::rnd_pos");
|
|
statistic_increment(table->in_use->status_var.ha_read_rnd_next_count,
|
|
&LOCK_status);
|
|
current_position= (my_off_t)my_get_ptr(pos, ref_length);
|
|
(void)azseek(&archive, current_position, SEEK_SET);
|
|
|
|
DBUG_RETURN(get_row(&archive, buf));
|
|
}
|
|
|
|
/*
|
|
This method repairs the meta file. It does this by walking the datafile and
|
|
rewriting the meta file. Currently it does this by calling optimize with
|
|
the extended flag.
|
|
*/
|
|
int ha_archive::repair(THD* thd, HA_CHECK_OPT* check_opt)
|
|
{
|
|
DBUG_ENTER("ha_archive::repair");
|
|
check_opt->flags= T_EXTEND;
|
|
int rc= optimize(thd, check_opt);
|
|
|
|
if (rc)
|
|
DBUG_RETURN(HA_ERR_CRASHED_ON_REPAIR);
|
|
|
|
share->crashed= FALSE;
|
|
DBUG_RETURN(0);
|
|
}
|
|
|
|
/*
|
|
The table can become fragmented if data was inserted, read, and then
|
|
inserted again. What we do is open up the file and recompress it completely.
|
|
*/
|
|
int ha_archive::optimize(THD* thd, HA_CHECK_OPT* check_opt)
|
|
{
|
|
DBUG_ENTER("ha_archive::optimize");
|
|
int rc;
|
|
azio_stream writer;
|
|
char writer_filename[FN_REFLEN];
|
|
|
|
/* Flush any waiting data */
|
|
azflush(&(share->archive_write), Z_SYNC_FLUSH);
|
|
share->forced_flushes++;
|
|
|
|
/* Lets create a file to contain the new data */
|
|
fn_format(writer_filename, share->table_name, "", ARN,
|
|
MY_REPLACE_EXT|MY_UNPACK_FILENAME);
|
|
|
|
if (!(azopen(&writer, writer_filename, O_CREAT|O_WRONLY|O_TRUNC|O_BINARY)))
|
|
DBUG_RETURN(HA_ERR_CRASHED_ON_USAGE);
|
|
|
|
/*
|
|
An extended rebuild is a lot more effort. We open up each row and re-record it.
|
|
Any dead rows are removed (aka rows that may have been partially recorded).
|
|
*/
|
|
|
|
if (check_opt->flags == T_EXTEND)
|
|
{
|
|
DBUG_PRINT("info", ("archive extended rebuild"));
|
|
byte *buf;
|
|
|
|
/*
|
|
First we create a buffer that we can use for reading rows, and can pass
|
|
to get_row().
|
|
*/
|
|
if (!(buf= (byte*) my_malloc(table->s->reclength, MYF(MY_WME))))
|
|
{
|
|
rc= HA_ERR_OUT_OF_MEM;
|
|
goto error;
|
|
}
|
|
|
|
/*
|
|
Now we will rewind the archive file so that we are positioned at the
|
|
start of the file.
|
|
*/
|
|
rc= read_data_header(&archive);
|
|
|
|
/*
|
|
Assuming now error from rewinding the archive file, we now write out the
|
|
new header for out data file.
|
|
*/
|
|
if (!rc)
|
|
rc= write_data_header(&writer);
|
|
|
|
/*
|
|
On success of writing out the new header, we now fetch each row and
|
|
insert it into the new archive file.
|
|
*/
|
|
if (!rc)
|
|
{
|
|
share->rows_recorded= 0;
|
|
auto_increment_value= share->auto_increment_value= 0;
|
|
while (!(rc= get_row(&archive, buf)))
|
|
{
|
|
real_write_row(buf, &writer);
|
|
if (table->found_next_number_field)
|
|
{
|
|
Field *field= table->found_next_number_field;
|
|
ulonglong auto_value=
|
|
(ulonglong) field->val_int((char*)(buf + field->offset()));
|
|
if (share->auto_increment_value < auto_value)
|
|
auto_increment_value= share->auto_increment_value=
|
|
auto_value;
|
|
}
|
|
share->rows_recorded++;
|
|
}
|
|
}
|
|
DBUG_PRINT("info", ("recovered %llu archive rows", share->rows_recorded));
|
|
|
|
my_free((char*)buf, MYF(0));
|
|
if (rc && rc != HA_ERR_END_OF_FILE)
|
|
goto error;
|
|
}
|
|
else
|
|
{
|
|
DBUG_PRINT("info", ("archive quick rebuild"));
|
|
/*
|
|
The quick method is to just read the data raw, and then compress it directly.
|
|
*/
|
|
int read; // Bytes read, azread() returns int
|
|
char block[IO_SIZE];
|
|
if (azrewind(&archive) == -1)
|
|
{
|
|
rc= HA_ERR_CRASHED_ON_USAGE;
|
|
DBUG_PRINT("info", ("archive HA_ERR_CRASHED_ON_USAGE"));
|
|
goto error;
|
|
}
|
|
|
|
while ((read= azread(&archive, block, IO_SIZE)) > 0)
|
|
azwrite(&writer, block, read);
|
|
}
|
|
|
|
azclose(&writer);
|
|
share->dirty= FALSE;
|
|
share->forced_flushes= 0;
|
|
azclose(&(share->archive_write));
|
|
DBUG_PRINT("info", ("Reopening archive data file"));
|
|
if (!(azopen(&(share->archive_write), share->data_file_name,
|
|
O_WRONLY|O_APPEND|O_BINARY)))
|
|
{
|
|
DBUG_PRINT("info", ("Could not open archive write file"));
|
|
rc= HA_ERR_CRASHED_ON_USAGE;
|
|
goto error;
|
|
}
|
|
|
|
my_rename(writer_filename,share->data_file_name,MYF(0));
|
|
|
|
/*
|
|
Now we need to reopen our read descriptor since it has changed.
|
|
*/
|
|
azclose(&archive);
|
|
if (!(azopen(&archive, share->data_file_name, O_RDONLY|O_BINARY)))
|
|
{
|
|
rc= HA_ERR_CRASHED_ON_USAGE;
|
|
goto error;
|
|
}
|
|
|
|
|
|
DBUG_RETURN(0);
|
|
|
|
error:
|
|
azclose(&writer);
|
|
|
|
DBUG_RETURN(rc);
|
|
}
|
|
|
|
/*
|
|
Below is an example of how to setup row level locking.
|
|
*/
|
|
THR_LOCK_DATA **ha_archive::store_lock(THD *thd,
|
|
THR_LOCK_DATA **to,
|
|
enum thr_lock_type lock_type)
|
|
{
|
|
if (lock_type == TL_WRITE_DELAYED)
|
|
delayed_insert= TRUE;
|
|
else
|
|
delayed_insert= FALSE;
|
|
|
|
if (lock_type != TL_IGNORE && lock.type == TL_UNLOCK)
|
|
{
|
|
/*
|
|
Here is where we get into the guts of a row level lock.
|
|
If TL_UNLOCK is set
|
|
If we are not doing a LOCK TABLE or DISCARD/IMPORT
|
|
TABLESPACE, then allow multiple writers
|
|
*/
|
|
|
|
if ((lock_type >= TL_WRITE_CONCURRENT_INSERT &&
|
|
lock_type <= TL_WRITE) && !thd->in_lock_tables
|
|
&& !thd->tablespace_op)
|
|
lock_type = TL_WRITE_ALLOW_WRITE;
|
|
|
|
/*
|
|
In queries of type INSERT INTO t1 SELECT ... FROM t2 ...
|
|
MySQL would use the lock TL_READ_NO_INSERT on t2, and that
|
|
would conflict with TL_WRITE_ALLOW_WRITE, blocking all inserts
|
|
to t2. Convert the lock to a normal read lock to allow
|
|
concurrent inserts to t2.
|
|
*/
|
|
|
|
if (lock_type == TL_READ_NO_INSERT && !thd->in_lock_tables)
|
|
lock_type = TL_READ;
|
|
|
|
lock.type=lock_type;
|
|
}
|
|
|
|
*to++= &lock;
|
|
|
|
return to;
|
|
}
|
|
|
|
void ha_archive::update_create_info(HA_CREATE_INFO *create_info)
|
|
{
|
|
ha_archive::info(HA_STATUS_AUTO | HA_STATUS_CONST);
|
|
if (!(create_info->used_fields & HA_CREATE_USED_AUTO))
|
|
{
|
|
create_info->auto_increment_value=auto_increment_value;
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
Hints for optimizer, see ha_tina for more information
|
|
*/
|
|
void ha_archive::info(uint flag)
|
|
{
|
|
DBUG_ENTER("ha_archive::info");
|
|
/*
|
|
This should be an accurate number now, though bulk and delayed inserts can
|
|
cause the number to be inaccurate.
|
|
*/
|
|
records= share->rows_recorded;
|
|
deleted= 0;
|
|
/* Costs quite a bit more to get all information */
|
|
if (flag & HA_STATUS_TIME)
|
|
{
|
|
MY_STAT file_stat; // Stat information for the data file
|
|
|
|
VOID(my_stat(share->data_file_name, &file_stat, MYF(MY_WME)));
|
|
|
|
mean_rec_length= table->s->reclength + buffer.alloced_length();
|
|
data_file_length= file_stat.st_size;
|
|
create_time= file_stat.st_ctime;
|
|
update_time= file_stat.st_mtime;
|
|
max_data_file_length= share->rows_recorded * mean_rec_length;
|
|
}
|
|
delete_length= 0;
|
|
index_file_length=0;
|
|
|
|
if (flag & HA_STATUS_AUTO)
|
|
auto_increment_value= share->auto_increment_value;
|
|
|
|
DBUG_VOID_RETURN;
|
|
}
|
|
|
|
|
|
/*
|
|
This method tells us that a bulk insert operation is about to occur. We set
|
|
a flag which will keep write_row from saying that its data is dirty. This in
|
|
turn will keep selects from causing a sync to occur.
|
|
Basically, yet another optimizations to keep compression working well.
|
|
*/
|
|
void ha_archive::start_bulk_insert(ha_rows rows)
|
|
{
|
|
DBUG_ENTER("ha_archive::start_bulk_insert");
|
|
if (!rows || rows >= ARCHIVE_MIN_ROWS_TO_USE_BULK_INSERT)
|
|
bulk_insert= TRUE;
|
|
DBUG_VOID_RETURN;
|
|
}
|
|
|
|
|
|
/*
|
|
Other side of start_bulk_insert, is end_bulk_insert. Here we turn off the bulk insert
|
|
flag, and set the share dirty so that the next select will call sync for us.
|
|
*/
|
|
int ha_archive::end_bulk_insert()
|
|
{
|
|
DBUG_ENTER("ha_archive::end_bulk_insert");
|
|
bulk_insert= FALSE;
|
|
share->dirty= TRUE;
|
|
DBUG_RETURN(0);
|
|
}
|
|
|
|
/*
|
|
We cancel a truncate command. The only way to delete an archive table is to drop it.
|
|
This is done for security reasons. In a later version we will enable this by
|
|
allowing the user to select a different row format.
|
|
*/
|
|
int ha_archive::delete_all_rows()
|
|
{
|
|
DBUG_ENTER("ha_archive::delete_all_rows");
|
|
DBUG_RETURN(0);
|
|
}
|
|
|
|
/*
|
|
We just return state if asked.
|
|
*/
|
|
bool ha_archive::is_crashed() const
|
|
{
|
|
DBUG_ENTER("ha_archive::is_crashed");
|
|
DBUG_RETURN(share->crashed);
|
|
}
|
|
|
|
/*
|
|
Simple scan of the tables to make sure everything is ok.
|
|
*/
|
|
|
|
int ha_archive::check(THD* thd, HA_CHECK_OPT* check_opt)
|
|
{
|
|
int rc= 0;
|
|
byte *buf;
|
|
const char *old_proc_info=thd->proc_info;
|
|
ha_rows count= share->rows_recorded;
|
|
DBUG_ENTER("ha_archive::check");
|
|
|
|
thd->proc_info= "Checking table";
|
|
/* Flush any waiting data */
|
|
azflush(&(share->archive_write), Z_SYNC_FLUSH);
|
|
share->forced_flushes++;
|
|
|
|
/*
|
|
First we create a buffer that we can use for reading rows, and can pass
|
|
to get_row().
|
|
*/
|
|
if (!(buf= (byte*) my_malloc(table->s->reclength, MYF(MY_WME))))
|
|
rc= HA_ERR_OUT_OF_MEM;
|
|
|
|
/*
|
|
Now we will rewind the archive file so that we are positioned at the
|
|
start of the file.
|
|
*/
|
|
if (!rc)
|
|
read_data_header(&archive);
|
|
|
|
if (!rc)
|
|
while (!(rc= get_row(&archive, buf)))
|
|
count--;
|
|
|
|
my_free((char*)buf, MYF(0));
|
|
|
|
thd->proc_info= old_proc_info;
|
|
|
|
if ((rc && rc != HA_ERR_END_OF_FILE) || count)
|
|
{
|
|
share->crashed= FALSE;
|
|
DBUG_RETURN(HA_ADMIN_CORRUPT);
|
|
}
|
|
else
|
|
{
|
|
DBUG_RETURN(HA_ADMIN_OK);
|
|
}
|
|
}
|
|
|
|
/*
|
|
Check and repair the table if needed.
|
|
*/
|
|
bool ha_archive::check_and_repair(THD *thd)
|
|
{
|
|
HA_CHECK_OPT check_opt;
|
|
DBUG_ENTER("ha_archive::check_and_repair");
|
|
|
|
check_opt.init();
|
|
|
|
DBUG_RETURN(repair(thd, &check_opt));
|
|
}
|