2006-04-11 15:45:10 +02:00
|
|
|
/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
|
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
|
|
it under the terms of the GNU General Public License as published by
|
2007-03-02 11:20:23 +01:00
|
|
|
the Free Software Foundation; version 2 of the License.
|
2006-04-11 15:45:10 +02:00
|
|
|
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
GNU General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
|
|
along with this program; if not, write to the Free Software
|
Update FSF address
This commit is based on the work of Michal Schorm, rebased on the
earliest MariaDB version.
Th command line used to generate this diff was:
find ./ -type f \
-exec sed -i -e 's/Foundation, Inc., 59 Temple Place, Suite 330, Boston, /Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, /g' {} \; \
-exec sed -i -e 's/Foundation, Inc. 59 Temple Place.* Suite 330, Boston, /Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, /g' {} \; \
-exec sed -i -e 's/MA.*.....-1307.*USA/MA 02110-1335 USA/g' {} \; \
-exec sed -i -e 's/Foundation, Inc., 59 Temple/Foundation, Inc., 51 Franklin/g' {} \; \
-exec sed -i -e 's/Place, Suite 330, Boston, MA.*02111-1307.*USA/Street, Fifth Floor, Boston, MA 02110-1335 USA/g' {} \; \
-exec sed -i -e 's/MA.*.....-1307/MA 02110-1335/g' {} \;
2019-05-10 19:49:46 +02:00
|
|
|
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */
|
2006-04-11 15:45:10 +02:00
|
|
|
|
|
|
|
/* Pack MARIA file */
|
|
|
|
|
|
|
|
#ifndef USE_MY_FUNC
|
|
|
|
#define USE_MY_FUNC /* We need at least my_malloc */
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#include "maria_def.h"
|
|
|
|
#include <queues.h>
|
|
|
|
#include <my_tree.h>
|
|
|
|
#include "mysys_err.h"
|
|
|
|
#ifdef MSDOS
|
|
|
|
#include <io.h>
|
|
|
|
#endif
|
|
|
|
#ifndef __GNU_LIBRARY__
|
|
|
|
#define __GNU_LIBRARY__ /* Skip warnings in getopt.h */
|
|
|
|
#endif
|
|
|
|
#include <my_getopt.h>
|
|
|
|
#include <assert.h>
|
|
|
|
|
|
|
|
#if SIZEOF_LONG_LONG > 4
|
|
|
|
#define BITS_SAVED 64
|
|
|
|
#else
|
|
|
|
#define BITS_SAVED 32
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#define IS_OFFSET ((uint) 32768) /* Bit if offset or char in tree */
|
|
|
|
#define HEAD_LENGTH 32
|
|
|
|
#define ALLOWED_JOIN_DIFF 256 /* Diff allowed to join trees */
|
|
|
|
|
|
|
|
#define DATA_TMP_EXT ".TMD"
|
|
|
|
#define OLD_EXT ".OLD"
|
|
|
|
#define WRITE_COUNT MY_HOW_OFTEN_TO_WRITE
|
|
|
|
|
|
|
|
struct st_file_buffer {
|
|
|
|
File file;
|
|
|
|
uchar *buffer,*pos,*end;
|
|
|
|
my_off_t pos_in_file;
|
|
|
|
int bits;
|
|
|
|
ulonglong bitbucket;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct st_huff_tree;
|
|
|
|
struct st_huff_element;
|
|
|
|
|
|
|
|
typedef struct st_huff_counts {
|
|
|
|
uint field_length,max_zero_fill;
|
|
|
|
uint pack_type;
|
|
|
|
uint max_end_space,max_pre_space,length_bits,min_space;
|
|
|
|
ulong max_length;
|
|
|
|
enum en_fieldtype field_type;
|
|
|
|
struct st_huff_tree *tree; /* Tree for field */
|
|
|
|
my_off_t counts[256];
|
|
|
|
my_off_t end_space[8];
|
|
|
|
my_off_t pre_space[8];
|
|
|
|
my_off_t tot_end_space,tot_pre_space,zero_fields,empty_fields,bytes_packed;
|
|
|
|
TREE int_tree; /* Tree for detecting distinct column values. */
|
2007-07-02 19:45:15 +02:00
|
|
|
uchar *tree_buff; /* Column values, 'field_length' each. */
|
|
|
|
uchar *tree_pos; /* Points to end of column values in 'tree_buff'. */
|
2006-04-11 15:45:10 +02:00
|
|
|
} HUFF_COUNTS;
|
|
|
|
|
|
|
|
typedef struct st_huff_element HUFF_ELEMENT;
|
|
|
|
|
|
|
|
/*
|
|
|
|
WARNING: It is crucial for the optimizations in calc_packed_length()
|
|
|
|
that 'count' is the first element of 'HUFF_ELEMENT'.
|
|
|
|
*/
|
|
|
|
struct st_huff_element {
|
|
|
|
my_off_t count;
|
|
|
|
union un_element {
|
|
|
|
struct st_nod {
|
|
|
|
HUFF_ELEMENT *left,*right;
|
|
|
|
} nod;
|
|
|
|
struct st_leaf {
|
|
|
|
HUFF_ELEMENT *null;
|
|
|
|
uint element_nr; /* Number of element */
|
|
|
|
} leaf;
|
|
|
|
} a;
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
typedef struct st_huff_tree {
|
|
|
|
HUFF_ELEMENT *root,*element_buffer;
|
|
|
|
HUFF_COUNTS *counts;
|
|
|
|
uint tree_number;
|
|
|
|
uint elements;
|
|
|
|
my_off_t bytes_packed;
|
|
|
|
uint tree_pack_length;
|
|
|
|
uint min_chr,max_chr,char_bits,offset_bits,max_offset,height;
|
|
|
|
ulonglong *code;
|
|
|
|
uchar *code_len;
|
|
|
|
} HUFF_TREE;
|
|
|
|
|
|
|
|
|
|
|
|
typedef struct st_isam_mrg {
|
|
|
|
MARIA_HA **file,**current,**end;
|
|
|
|
uint free_file;
|
|
|
|
uint count;
|
|
|
|
uint min_pack_length; /* Theese is used by packed data */
|
|
|
|
uint max_pack_length;
|
|
|
|
uint ref_length;
|
|
|
|
uint max_blob_length;
|
|
|
|
my_off_t records;
|
|
|
|
/* true if at least one source file has at least one disabled index */
|
|
|
|
my_bool src_file_has_indexes_disabled;
|
|
|
|
} PACK_MRG_INFO;
|
|
|
|
|
|
|
|
|
|
|
|
extern int main(int argc,char * *argv);
|
|
|
|
static void get_options(int *argc,char ***argv);
|
2008-01-10 20:21:36 +01:00
|
|
|
static MARIA_HA *open_maria_file(char *name,int mode);
|
2008-02-19 00:00:58 +01:00
|
|
|
static my_bool open_maria_files(PACK_MRG_INFO *mrg,char **names,uint count);
|
2006-04-11 15:45:10 +02:00
|
|
|
static int compress(PACK_MRG_INFO *file,char *join_name);
|
|
|
|
static HUFF_COUNTS *init_huff_count(MARIA_HA *info,my_off_t records);
|
|
|
|
static void free_counts_and_tree_and_queue(HUFF_TREE *huff_trees,
|
|
|
|
uint trees,
|
|
|
|
HUFF_COUNTS *huff_counts,
|
|
|
|
uint fields);
|
|
|
|
static int compare_tree(void* cmp_arg __attribute__((unused)),
|
|
|
|
const uchar *s,const uchar *t);
|
|
|
|
static int get_statistic(PACK_MRG_INFO *mrg,HUFF_COUNTS *huff_counts);
|
|
|
|
static void check_counts(HUFF_COUNTS *huff_counts,uint trees,
|
|
|
|
my_off_t records);
|
|
|
|
static int test_space_compress(HUFF_COUNTS *huff_counts,my_off_t records,
|
|
|
|
uint max_space_length,my_off_t *space_counts,
|
|
|
|
my_off_t tot_space_count,
|
|
|
|
enum en_fieldtype field_type);
|
|
|
|
static HUFF_TREE* make_huff_trees(HUFF_COUNTS *huff_counts,uint trees);
|
|
|
|
static int make_huff_tree(HUFF_TREE *tree,HUFF_COUNTS *huff_counts);
|
2007-07-02 19:45:15 +02:00
|
|
|
static int compare_huff_elements(void *not_used, uchar *a,uchar *b);
|
|
|
|
static int save_counts_in_queue(uchar *key,element_count count,
|
2006-04-11 15:45:10 +02:00
|
|
|
HUFF_TREE *tree);
|
|
|
|
static my_off_t calc_packed_length(HUFF_COUNTS *huff_counts,uint flag);
|
|
|
|
static uint join_same_trees(HUFF_COUNTS *huff_counts,uint trees);
|
|
|
|
static int make_huff_decode_table(HUFF_TREE *huff_tree,uint trees);
|
|
|
|
static void make_traverse_code_tree(HUFF_TREE *huff_tree,
|
|
|
|
HUFF_ELEMENT *element,uint size,
|
|
|
|
ulonglong code);
|
|
|
|
static int write_header(PACK_MRG_INFO *isam_file, uint header_length,uint trees,
|
|
|
|
my_off_t tot_elements,my_off_t filelength);
|
|
|
|
static void write_field_info(HUFF_COUNTS *counts, uint fields,uint trees);
|
|
|
|
static my_off_t write_huff_tree(HUFF_TREE *huff_tree,uint trees);
|
|
|
|
static uint *make_offset_code_tree(HUFF_TREE *huff_tree,
|
|
|
|
HUFF_ELEMENT *element,
|
|
|
|
uint *offset);
|
|
|
|
static uint max_bit(uint value);
|
2008-01-10 20:21:36 +01:00
|
|
|
static int compress_maria_file(PACK_MRG_INFO *file,HUFF_COUNTS *huff_counts);
|
2006-04-11 15:45:10 +02:00
|
|
|
static char *make_new_name(char *new_name,char *old_name);
|
|
|
|
static char *make_old_name(char *new_name,char *old_name);
|
|
|
|
static void init_file_buffer(File file,pbool read_buffer);
|
|
|
|
static int flush_buffer(ulong neaded_length);
|
|
|
|
static void end_file_buffer(void);
|
|
|
|
static void write_bits(ulonglong value, uint bits);
|
|
|
|
static void flush_bits(void);
|
WL#3138: Maria - fast "SELECT COUNT(*) FROM t;" and "CHECKSUM TABLE t"
Added argument to maria_end_bulk_insert() to know if the table will be deleted after the operation
Fixed wrong call to strmake
Don't call bulk insert in case of inserting only one row (speed optimization as starting/stopping bulk insert
Allow storing year 2155 in year field
When running with purify/valgrind avoid copying structures over themself
Added hook 'trnnam_end_trans_hook' that is called when transaction ends
Added trn->used_tables that is used to an entry for all tables used by transaction
Fixed that ndb doesn't crash on duplicate key error when start_bulk_insert/end_bulk_insert are not called
include/maria.h:
Added argument to maria_end_bulk_insert() to know if the table will be deleted after the operation
include/my_tree.h:
Added macro 'reset_free_element()' to be able to ignore calls to the external free function.
Is used to optimize end-bulk-insert in case of failures, in which case we don't want write the remaining keys in the tree
mysql-test/install_test_db.sh:
Upgrade to new mysql_install_db options
mysql-test/r/maria-mvcc.result:
New tests
mysql-test/r/maria.result:
New tests
mysql-test/suite/ndb/r/ndb_auto_increment.result:
Fixed error message now when bulk insert is not always called
mysql-test/suite/ndb/t/ndb_auto_increment.test:
Fixed error message now when bulk insert is not always called
mysql-test/t/maria-mvcc.test:
Added testing of versioning of count(*)
mysql-test/t/maria-page-checksum.test:
Added comment
mysql-test/t/maria.test:
More tests
mysys/hash.c:
Code style change
sql/field.cc:
Allow storing year 2155 in year field
sql/ha_ndbcluster.cc:
Added new argument to end_bulk_insert() to signal if the bulk insert should ignored
sql/ha_ndbcluster.h:
Added new argument to end_bulk_insert() to signal if the bulk insert should ignored
sql/ha_partition.cc:
Added new argument to end_bulk_insert() to signal if the bulk insert should ignored
sql/ha_partition.h:
Added new argument to end_bulk_insert() to signal if the bulk insert should ignored
sql/handler.cc:
Don't call get_dup_key() if there is no table object. This can happen if the handler generates a duplicate key error on commit
sql/handler.h:
Added new argument to end_bulk_insert() to signal if the bulk insert should ignored (ie, the table will be deleted)
sql/item.cc:
Style fix
Removed compiler warning
sql/log_event.cc:
Added new argument to ha_end_bulk_insert()
sql/log_event_old.cc:
Added new argument to ha_end_bulk_insert()
sql/mysqld.cc:
Removed compiler warning
sql/protocol.cc:
Added DBUG
sql/sql_class.cc:
Added DBUG
Fixed wrong call to strmake
sql/sql_insert.cc:
Don't call bulk insert in case of inserting only one row (speed optimization as starting/stopping bulk insert involves a lot of if's)
Added new argument to ha_end_bulk_insert()
sql/sql_load.cc:
Added new argument to ha_end_bulk_insert()
sql/sql_parse.cc:
Style fixes
Avoid goto in common senario
sql/sql_select.cc:
When running with purify/valgrind avoid copying structures over themself. This is not a real bug in itself, but it's a waste of cycles and causes valgrind warnings
sql/sql_select.h:
Avoid copying structures over themself. This is not a real bug in itself, but it's a waste of cycles and causes valgrind warnings
sql/sql_table.cc:
Call HA_EXTRA_PREPARE_FOR_DROP if table created by ALTER TABLE is going to be dropped
Added new argument to ha_end_bulk_insert()
storage/archive/ha_archive.cc:
Added new argument to end_bulk_insert()
storage/archive/ha_archive.h:
Added new argument to end_bulk_insert()
storage/federated/ha_federated.cc:
Added new argument to end_bulk_insert()
storage/federated/ha_federated.h:
Added new argument to end_bulk_insert()
storage/maria/Makefile.am:
Added ma_state.c and ma_state.h
storage/maria/ha_maria.cc:
Versioning of count(*) and checksum
- share->state.state is now assumed to be correct, not handler->state
- Call _ma_setup_live_state() in external lock to get count(*)/checksum versioning. In case of
not versioned and not concurrent insertable table, file->s->state.state contains the correct state information
Other things:
- file->s -> share
- Added DBUG_ASSERT() for unlikely case
- Optimized end_bulk_insert() to not write anything if table is going to be deleted (as in failed alter table)
- Indentation changes in external_lock becasue of removed 'goto' caused a big conflict even if very little was changed
storage/maria/ha_maria.h:
New argument to end_bulk_insert()
storage/maria/ma_blockrec.c:
Update for versioning of count(*) and checksum
Keep share->state.state.data_file_length up to date (not info->state->data_file_length)
Moved _ma_block_xxxx_status() and maria_versioning() functions to ma_state.c
storage/maria/ma_check.c:
Update and use share->state.state instead of info->state
info->s to share
Update info->state at end of repair
Call _ma_reset_state() to update share->state_history at end of repair
storage/maria/ma_checkpoint.c:
Call _ma_remove_not_visible_states() on checkpoint to clean up not visible state history from tables
storage/maria/ma_close.c:
Remember state history for running transaction even if table is closed
storage/maria/ma_commit.c:
Ensure we always call trnman_commit_trn() even if other calls fails. If we don't do that, the translog and state structures will not be freed
storage/maria/ma_delete.c:
Versioning of count(*) and checksum:
- Always update info->state->checksum and info->state->records
storage/maria/ma_delete_all.c:
Versioning of count(*) and checksum:
- Ensure that share->state.state is updated, as here is where we store the primary information
storage/maria/ma_dynrec.c:
Use lock_key_trees instead of concurrent_insert to check if trees should be locked.
This allows us to lock trees both for concurrent_insert and for index versioning.
storage/maria/ma_extra.c:
Versioning of count(*) and checksum:
- Use share->state.state instead of info->state
- share->concurrent_insert -> share->non_transactional_concurrent_insert
- Don't update share->state.state from info->state if transactional table
Optimization:
- Don't flush io_cache or bitmap if we are using FLUSH_IGNORE_CHANGED
storage/maria/ma_info.c:
Get most state information from current state
storage/maria/ma_init.c:
Add hash table and free function to store states for closed tables
Install hook for transaction commit/rollback to update history state
storage/maria/ma_key_recover.c:
Versioning of count(*) and checksum:
- Use share->state.state instead of info->state
storage/maria/ma_locking.c:
Versioning of count(*) and checksum:
- Call virtual functions (if exists) to restore/update status
- Move _ma_xxx_status() functions to ma_state.c
info->s -> share
storage/maria/ma_open.c:
Versioning of count(*) and checksum:
- For not transactional tables, set info->state to point to new allocated state structure.
- Initialize new info->state_start variable that points to state at start of transaction
- Copy old history states from hash table (maria_stored_states) first time the table is opened
- Split flag share->concurrent_insert to non_transactional_concurrent_insert & lock_key_tree
- For now, only enable versioning of tables without keys (to be fixed in soon!)
- Added new virtual function to restore status in maria_lock_database)
More DBUG
storage/maria/ma_page.c:
Versioning of count(*) and checksum:
- Use share->state.state instead of info->state
- Modify share->state.state.key_file_length under share->intern_lock
storage/maria/ma_range.c:
Versioning of count(*) and checksum:
- Lock trees based on share->lock_key_trees
info->s -> share
storage/maria/ma_recovery.c:
Versioning of count(*) and checksum:
- Use share->state.state instead of info->state
- Update state information on close and when reenabling logging
storage/maria/ma_rkey.c:
Versioning of count(*) and checksum:
- Lock trees based on share->lock_key_trees
storage/maria/ma_rnext.c:
Versioning of count(*) and checksum:
- Lock trees based on share->lock_key_trees
storage/maria/ma_rnext_same.c:
Versioning of count(*) and checksum:
- Lock trees based on share->lock_key_trees
- Only skip rows based on file length if non_transactional_concurrent_insert is set
storage/maria/ma_rprev.c:
Versioning of count(*) and checksum:
- Lock trees based on share->lock_key_trees
storage/maria/ma_rsame.c:
Versioning of count(*) and checksum:
- Lock trees based on share->lock_key_trees
storage/maria/ma_sort.c:
Use share->state.state instead of info->state
Fixed indentation
storage/maria/ma_static.c:
Added maria_stored_state
storage/maria/ma_update.c:
Versioning of count(*) and checksum:
- Always update info->state->checksum and info->state->records
- Remove optimization for index file update as it doesn't work for transactional tables
storage/maria/ma_write.c:
Versioning of count(*) and checksum:
- Always update info->state->checksum and info->state->records
storage/maria/maria_def.h:
Move MARIA_STATUS_INFO to ma_state.h
Changes to MARIA_SHARE:
- Added state_history to store count(*)/checksum states
- Added in_trans as counter if table is used by running transactions
- Split concurrent_insert into lock_key_trees and on_transactional_concurrent_insert.
- Added virtual function lock_restore_status
Changes to MARIA_HA:
- save_state -> state_save
- Added state_start to store state at start of transaction
storage/maria/maria_pack.c:
Versioning of count(*) and checksum:
- Use share->state.state instead of info->state
Indentation fixes
storage/maria/trnman.c:
Added hook 'trnnam_end_trans_hook' that is called when transaction ends
Added trn->used_tables that is used to an entry for all tables used by transaction
More DBUG
Changed return type of trnman_end_trn() to my_bool
Added trnman_get_min_trid() to get minimum trid in use.
Added trnman_exists_active_transactions() to check if there exist a running transaction started between two commit id
storage/maria/trnman.h:
Added 'used_tables'
Moved all pointers into same groups to get better memory alignment
storage/maria/trnman_public.h:
Added prototypes for new functions and variables
Chagned return type of trnman_end_trn() to my_bool
storage/myisam/ha_myisam.cc:
Added argument to end_bulk_insert() if operation should be aborted
storage/myisam/ha_myisam.h:
Added argument to end_bulk_insert() if operation should be aborted
storage/maria/ma_state.c:
Functions to handle state of count(*) and checksum
storage/maria/ma_state.h:
Structures and declarations to handle state of count(*) and checksum
2008-05-29 17:33:33 +02:00
|
|
|
static int save_state(MARIA_HA *isam_file,PACK_MRG_INFO *mrg,
|
|
|
|
my_off_t new_length, ha_checksum crc);
|
|
|
|
static int save_state_mrg(File file,PACK_MRG_INFO *isam_file,
|
|
|
|
my_off_t new_length, ha_checksum crc);
|
2006-04-11 15:45:10 +02:00
|
|
|
static int mrg_close(PACK_MRG_INFO *mrg);
|
2007-07-02 19:45:15 +02:00
|
|
|
static int mrg_rrnd(PACK_MRG_INFO *info,uchar *buf);
|
2006-04-11 15:45:10 +02:00
|
|
|
static void mrg_reset(PACK_MRG_INFO *mrg);
|
|
|
|
#if !defined(DBUG_OFF)
|
|
|
|
static void fakebigcodes(HUFF_COUNTS *huff_counts, HUFF_COUNTS *end_count);
|
|
|
|
static int fakecmp(my_off_t **count1, my_off_t **count2);
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
|
|
static int error_on_write=0,test_only=0,verbose=0,silent=0,
|
|
|
|
write_loop=0,force_pack=0, isamchk_neaded=0;
|
|
|
|
static int tmpfile_createflag=O_RDWR | O_TRUNC | O_EXCL;
|
|
|
|
static my_bool backup, opt_wait;
|
|
|
|
/*
|
|
|
|
tree_buff_length is somewhat arbitrary. The bigger it is the better
|
|
|
|
the chance to win in terms of compression factor. On the other hand,
|
|
|
|
this table becomes part of the compressed file header. And its length
|
|
|
|
is coded with 16 bits in the header. Hence the limit is 2**16 - 1.
|
|
|
|
*/
|
|
|
|
static uint tree_buff_length= 65536 - MALLOC_OVERHEAD;
|
|
|
|
static char tmp_dir[FN_REFLEN]={0},*join_table;
|
|
|
|
static my_off_t intervall_length;
|
|
|
|
static ha_checksum glob_crc;
|
|
|
|
static struct st_file_buffer file_buffer;
|
|
|
|
static QUEUE queue;
|
|
|
|
static HUFF_COUNTS *global_count;
|
|
|
|
static char zero_string[]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
|
2010-09-12 18:40:01 +02:00
|
|
|
static const char *load_default_groups[]= { "ariapack",0 };
|
2006-04-11 15:45:10 +02:00
|
|
|
|
|
|
|
/* The main program */
|
|
|
|
|
|
|
|
int main(int argc, char **argv)
|
|
|
|
{
|
|
|
|
int error,ok;
|
|
|
|
PACK_MRG_INFO merge;
|
|
|
|
char **default_argv;
|
|
|
|
MY_INIT(argv[0]);
|
|
|
|
|
2018-03-16 13:35:42 +01:00
|
|
|
load_defaults_or_exit("my", load_default_groups, &argc, &argv);
|
2006-04-11 15:45:10 +02:00
|
|
|
default_argv= argv;
|
|
|
|
get_options(&argc,&argv);
|
|
|
|
maria_init();
|
|
|
|
|
|
|
|
error=ok=isamchk_neaded=0;
|
|
|
|
if (join_table)
|
|
|
|
{ /* Join files into one */
|
2008-01-10 20:21:36 +01:00
|
|
|
if (open_maria_files(&merge,argv,(uint) argc) ||
|
2006-04-11 15:45:10 +02:00
|
|
|
compress(&merge,join_table))
|
|
|
|
error=1;
|
|
|
|
}
|
|
|
|
else while (argc--)
|
|
|
|
{
|
|
|
|
MARIA_HA *isam_file;
|
2008-01-10 20:21:36 +01:00
|
|
|
if (!(isam_file=open_maria_file(*argv++,O_RDWR)))
|
2006-04-11 15:45:10 +02:00
|
|
|
error=1;
|
|
|
|
else
|
|
|
|
{
|
|
|
|
merge.file= &isam_file;
|
|
|
|
merge.current=0;
|
|
|
|
merge.free_file=0;
|
|
|
|
merge.count=1;
|
|
|
|
if (compress(&merge,0))
|
|
|
|
error=1;
|
|
|
|
else
|
|
|
|
ok=1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (ok && isamchk_neaded && !silent)
|
2010-09-12 18:40:01 +02:00
|
|
|
puts("Remember to run aria_chk -rq on compressed tables");
|
2011-04-25 17:22:25 +02:00
|
|
|
fflush(stdout);
|
|
|
|
fflush(stderr);
|
2006-04-11 15:45:10 +02:00
|
|
|
free_defaults(default_argv);
|
|
|
|
maria_end();
|
|
|
|
my_end(verbose ? MY_CHECK_ERROR | MY_GIVE_INFO : MY_CHECK_ERROR);
|
|
|
|
exit(error ? 2 : 0);
|
|
|
|
#ifndef _lint
|
|
|
|
return 0; /* No compiler warning */
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
enum options_mp {OPT_CHARSETS_DIR_MP=256, OPT_AUTO_CLOSE};
|
|
|
|
|
|
|
|
static struct my_option my_long_options[] =
|
|
|
|
{
|
|
|
|
#ifdef __NETWARE__
|
|
|
|
{"autoclose", OPT_AUTO_CLOSE, "Auto close the screen on exit for Netware.",
|
|
|
|
0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
|
|
|
|
#endif
|
|
|
|
{"backup", 'b', "Make a backup of the table as table_name.OLD.",
|
2010-08-02 11:01:24 +02:00
|
|
|
&backup, &backup, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0},
|
2006-04-11 15:45:10 +02:00
|
|
|
{"character-sets-dir", OPT_CHARSETS_DIR_MP,
|
2010-08-11 12:55:54 +02:00
|
|
|
"Directory where character sets are.", (char**) &charsets_dir,
|
|
|
|
(char**) &charsets_dir, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
|
2006-04-11 15:45:10 +02:00
|
|
|
{"debug", '#', "Output debug log. Often this is 'd:t:o,filename'.",
|
|
|
|
0, 0, 0, GET_STR, OPT_ARG, 0, 0, 0, 0, 0, 0},
|
|
|
|
{"force", 'f',
|
|
|
|
"Force packing of table even if it gets bigger or if tempfile exists.",
|
|
|
|
0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
|
|
|
|
{"join", 'j',
|
|
|
|
"Join all given tables into 'new_table_name'. All tables MUST have identical layouts.",
|
2010-08-02 11:01:24 +02:00
|
|
|
&join_table, &join_table, 0, GET_STR, REQUIRED_ARG, 0, 0, 0,
|
2006-04-11 15:45:10 +02:00
|
|
|
0, 0, 0},
|
|
|
|
{"help", '?', "Display this help and exit.",
|
|
|
|
0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
|
|
|
|
{"silent", 's', "Be more silent.",
|
|
|
|
0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
|
|
|
|
{"tmpdir", 'T', "Use temporary directory to store temporary table.",
|
|
|
|
0, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
|
|
|
|
{"test", 't', "Don't pack table, only test packing it.",
|
|
|
|
0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
|
|
|
|
{"verbose", 'v', "Write info about progress and packing result. Use many -v for more verbosity!",
|
|
|
|
0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
|
|
|
|
{"version", 'V', "Output version information and exit.",
|
|
|
|
0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
|
2010-08-02 11:01:24 +02:00
|
|
|
{"wait", 'w', "Wait and retry if table is in use.", &opt_wait,
|
|
|
|
&opt_wait, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0},
|
2006-04-11 15:45:10 +02:00
|
|
|
{ 0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
static void print_version(void)
|
|
|
|
{
|
2011-04-25 17:22:25 +02:00
|
|
|
printf("%s Ver 1.0 for %s on %s\n", my_progname, SYSTEM_TYPE, MACHINE_TYPE);
|
2006-04-11 15:45:10 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static void usage(void)
|
|
|
|
{
|
|
|
|
print_version();
|
2009-02-12 18:51:00 +01:00
|
|
|
puts("Copyright 2002-2008 MySQL AB, 2008-2009 Sun Microsystems, Inc.");
|
2006-04-11 15:45:10 +02:00
|
|
|
puts("This software comes with ABSOLUTELY NO WARRANTY. This is free software,");
|
|
|
|
puts("and you are welcome to modify and redistribute it under the GPL license\n");
|
|
|
|
|
2010-09-12 18:40:01 +02:00
|
|
|
puts("Pack a Aria-table to take much less space.");
|
|
|
|
puts("Keys are not updated, you must run aria_chk -rq on the index (.MAI) file");
|
2006-04-11 15:45:10 +02:00
|
|
|
puts("afterwards to update the keys.");
|
2007-04-19 12:18:56 +02:00
|
|
|
puts("You should give the .MAI file as the filename argument.");
|
2010-09-12 18:40:01 +02:00
|
|
|
puts("To unpack a packed table, run aria_chk -u on the table");
|
2006-04-11 15:45:10 +02:00
|
|
|
|
2011-04-25 17:22:25 +02:00
|
|
|
printf("\nUsage: %s [OPTIONS] filename...\n", my_progname);
|
2006-04-11 15:45:10 +02:00
|
|
|
my_print_help(my_long_options);
|
|
|
|
print_defaults("my", load_default_groups);
|
|
|
|
my_print_variables(my_long_options);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static my_bool
|
|
|
|
get_one_option(int optid, const struct my_option *opt __attribute__((unused)),
|
|
|
|
char *argument)
|
|
|
|
{
|
|
|
|
uint length;
|
|
|
|
|
|
|
|
switch(optid) {
|
|
|
|
#ifdef __NETWARE__
|
|
|
|
case OPT_AUTO_CLOSE:
|
|
|
|
setscreenmode(SCR_AUTOCLOSE_ON_EXIT);
|
|
|
|
break;
|
|
|
|
#endif
|
|
|
|
case 'f':
|
|
|
|
force_pack= 1;
|
|
|
|
tmpfile_createflag= O_RDWR | O_TRUNC;
|
|
|
|
break;
|
|
|
|
case 's':
|
|
|
|
write_loop= verbose= 0;
|
|
|
|
silent= 1;
|
|
|
|
break;
|
|
|
|
case 't':
|
|
|
|
test_only= 1;
|
|
|
|
/* Avoid to reset 'verbose' if it was already set > 1. */
|
|
|
|
if (! verbose)
|
|
|
|
verbose= 1;
|
|
|
|
break;
|
|
|
|
case 'T':
|
|
|
|
length= (uint) (strmov(tmp_dir, argument) - tmp_dir);
|
|
|
|
if (length != dirname_length(tmp_dir))
|
|
|
|
{
|
|
|
|
tmp_dir[length]=FN_LIBCHAR;
|
|
|
|
tmp_dir[length+1]=0;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case 'v':
|
|
|
|
verbose++; /* Allow for selecting the level of verbosity. */
|
|
|
|
silent= 0;
|
|
|
|
break;
|
|
|
|
case '#':
|
2010-09-12 18:40:01 +02:00
|
|
|
DBUG_PUSH(argument ? argument : "d:t:o,/tmp/aria_pack.trace");
|
2006-04-11 15:45:10 +02:00
|
|
|
break;
|
|
|
|
case 'V':
|
|
|
|
print_version();
|
|
|
|
exit(0);
|
|
|
|
case 'I':
|
|
|
|
case '?':
|
|
|
|
usage();
|
|
|
|
exit(0);
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* reads options */
|
|
|
|
/* Initiates DEBUG - but no debugging here ! */
|
|
|
|
|
|
|
|
static void get_options(int *argc,char ***argv)
|
|
|
|
{
|
|
|
|
int ho_error;
|
|
|
|
|
|
|
|
my_progname= argv[0][0];
|
|
|
|
if (isatty(fileno(stdout)))
|
|
|
|
write_loop=1;
|
|
|
|
|
|
|
|
if ((ho_error=handle_options(argc, argv, my_long_options, get_one_option)))
|
|
|
|
exit(ho_error);
|
|
|
|
|
|
|
|
if (!*argc)
|
|
|
|
{
|
|
|
|
usage();
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
if (join_table)
|
|
|
|
{
|
|
|
|
backup=0; /* Not needed */
|
|
|
|
tmp_dir[0]=0;
|
|
|
|
}
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2008-01-10 20:21:36 +01:00
|
|
|
static MARIA_HA *open_maria_file(char *name,int mode)
|
2006-04-11 15:45:10 +02:00
|
|
|
{
|
|
|
|
MARIA_HA *isam_file;
|
|
|
|
MARIA_SHARE *share;
|
2008-01-10 20:21:36 +01:00
|
|
|
DBUG_ENTER("open_maria_file");
|
2006-04-11 15:45:10 +02:00
|
|
|
|
2008-01-09 18:51:05 +01:00
|
|
|
if (!(isam_file=maria_open(name, mode, HA_OPEN_IGNORE_MOVED_STATE |
|
2006-04-11 15:45:10 +02:00
|
|
|
(opt_wait ? HA_OPEN_WAIT_IF_LOCKED :
|
|
|
|
HA_OPEN_ABORT_IF_LOCKED))))
|
|
|
|
{
|
2011-04-25 17:22:25 +02:00
|
|
|
fprintf(stderr, "%s gave error %d on open\n", name, my_errno);
|
2006-04-11 15:45:10 +02:00
|
|
|
DBUG_RETURN(0);
|
|
|
|
}
|
|
|
|
share=isam_file->s;
|
|
|
|
if (share->options & HA_OPTION_COMPRESS_RECORD && !join_table)
|
|
|
|
{
|
|
|
|
if (!force_pack)
|
|
|
|
{
|
2011-04-25 17:22:25 +02:00
|
|
|
fprintf(stderr, "%s is already compressed\n", name);
|
|
|
|
maria_close(isam_file);
|
2006-04-11 15:45:10 +02:00
|
|
|
DBUG_RETURN(0);
|
|
|
|
}
|
|
|
|
if (verbose)
|
|
|
|
puts("Recompressing already compressed table");
|
|
|
|
share->options&= ~HA_OPTION_READ_ONLY_DATA; /* We are modifing it */
|
|
|
|
}
|
|
|
|
if (! force_pack && share->state.state.records != 0 &&
|
|
|
|
(share->state.state.records <= 1 ||
|
|
|
|
share->state.state.data_file_length < 1024))
|
|
|
|
{
|
2011-04-25 17:22:25 +02:00
|
|
|
fprintf(stderr, "%s is too small to compress\n", name);
|
|
|
|
maria_close(isam_file);
|
2006-04-11 15:45:10 +02:00
|
|
|
DBUG_RETURN(0);
|
|
|
|
}
|
2011-04-25 17:22:25 +02:00
|
|
|
maria_lock_database(isam_file,F_WRLCK);
|
Added versioning of row data
Will in future changeset (soon) av versioning of status variables (number of rows) and index
Changed some LEX_STRING to LEX_CUSTRING to avoid casts and warnings
Removed some not needed variables (as noticed by Guilhem)
include/maria.h:
Added prototypes for maria_chk_init_for_check(), maria_versioning() and maria_ignore_trids()
include/my_base.h:
Add new error HA_ERR_ROW_NOT_VISIBLE
include/myisamchk.h:
Added variables for checking visibility of rows during maria_chk
include/thr_lock.h:
Changed argument type from int to my_bool for get_status
Added variable allow_multiple_concurrent_insert, to signal if table supports multiple concurrent inserts
mysql-test/r/maria-page-checksum.result:
Added missing drop table
mysql-test/t/maria-page-checksum.test:
Added missing drop table
mysys/my_handler.c:
Added new error messages
mysys/thr_lock.c:
Added support for multiple concurrent inserts, if table handler supports it
sql/sql_yacc.yy:
Added LOCK TABLE table_name WRITE CONCURRENT
This was added (temporarly?) to be able to check versioning with Maria
storage/csv/ha_tina.cc:
Updated parameter for get_status
storage/maria/ha_maria.cc:
Added calls to maria_chk_init_status()
Fixed call to ma_control_file_open()
storage/maria/ma_blockrec.c:
Changed some LEX_STRING to LEX_CUSTRING to avoid casts and warnings
Changed back some 'header' parameters to const char*
Removed some casts
Added support for versioning:
- If info->row_flag & ROW_FLAG_TRANSID is set, store transaction id together with the row
- When reading rows, check if rows are visible. Give error if not
- When scanning table, ignore not visible rows
- Added function parameters to some functions, to be able to call _ma_compact_block_page() with different parameters depending of if the page is a HEAD or TAIL page
- _ma_compact_block_page() deletes transaction id's that are visible by all running transactions
- Added functions for thr_lock() to enable multiple concurrent inserts
- Added helper function 'mysql_versioning()' to enable/disable versioning
- Added helper function maria_ignore_trids(), used by maria_chk and maria_pack to see all rows.
storage/maria/ma_blockrec.h:
Updated parameters for some functions.
Added new functions to read/store state with thr_lock
storage/maria/ma_check.c:
Enable handling of transaction id's in rows
Give a readable error if a table contains a transation id that makes rows not visible
storage/maria/ma_control_file.c:
Added option to not give warning if control file doesn't exists.
storage/maria/ma_control_file.h:
Updated parameter lists for ma_control_file_open()
storage/maria/ma_delete.c:
Removed not used variable (suggestion by Guilhem)
storage/maria/ma_locking.c:
Changed type of argument from int -> my_bool
storage/maria/ma_open.c:
Removed not used variables 'key_write_undo_lsn' and 'key_delete_undo_lsn'
Added new thr_lock interface functions for BLOCK_RECORD to enable multiple concurrent insert
storage/maria/ma_test1.c:
Added option --versioning (-C) to check versioning
storage/maria/ma_test2.c:
Added option -C to check versioning
storage/maria/ma_test_recovery:
Forward argumetns to ma_test_recovery.pl
storage/maria/ma_write.c:
Removed not used variable key_write_undo_lsn
storage/maria/maria_chk.c:
Always read control file (if exist) at start
Initialize checking of tables by calling maria_chk_init_for_check()
In verbose mode and in case of error, print max found transaction id
storage/maria/maria_def.h:
Added Trid to MARIA_ROW to be able to check transaction id for found row
Moved 'base_length' from MARIA_ROW to MARIA_HA to be able to handle different base length (with and without TRANSID) without if's
Added default row_flag to MARIA_HA for the same reason
Changed LEX_STRING -> LEX_CUSTRING to avoid casts in ma_blockrec.c
Removed not needed variables key_write_undo_lsn and key_delete_undo_lsn
Added prototypes for new functions and fixed those that had changed
storage/maria/maria_pack.c:
Ensure we can read all rows from the file, independent of the used transaction id
storage/maria/maria_read_log.c:
Updated arguments to ma_control_file_open()
storage/maria/trnman.c:
If we have only one transaction, fixed that min_read_from contains current transaction
Fixed that trnman_can_read_from() returns that row is readable if it was written by current transaction
storage/maria/unittest/ma_control_file-t.c:
Updated arguments to ma_control_file_open()
storage/maria/unittest/ma_test_all-t:
Added test of versioning
Removed printing of one extra space
storage/maria/unittest/ma_test_loghandler-t.c:
Updated arguments to ma_control_file_open()
storage/maria/unittest/ma_test_loghandler_first_lsn-t.c:
Updated arguments to ma_control_file_open()
storage/maria/unittest/ma_test_loghandler_max_lsn-t.c:
Updated arguments to ma_control_file_open()
storage/maria/unittest/ma_test_loghandler_multigroup-t.c:
Updated arguments to ma_control_file_open()
storage/maria/unittest/ma_test_loghandler_multithread-t.c:
Updated arguments to ma_control_file_open()
storage/maria/unittest/ma_test_loghandler_noflush-t.c:
Updated arguments to ma_control_file_open()
storage/maria/unittest/ma_test_loghandler_nologs-t.c:
Updated arguments to ma_control_file_open()
storage/maria/unittest/ma_test_loghandler_pagecache-t.c:
Updated arguments to ma_control_file_open()
storage/maria/unittest/ma_test_loghandler_purge-t.c:
Updated arguments to ma_control_file_open()
storage/maria/unittest/ma_test_recovery.expected:
Updated file with result from new tests
storage/maria/unittest/ma_test_recovery.pl:
Added options --abort-on-error and --verbose
In case of --verbose, print all excuted shell commands
Added test of versioning
storage/myisam/mi_locking.c:
Updated type of parameter
storage/myisam/myisamdef.h:
Updated type of parameter
mysql-test/r/maria-mvcc.result:
New BitKeeper file ``mysql-test/r/maria-mvcc.result''
mysql-test/t/maria-mvcc.test:
New BitKeeper file ``mysql-test/t/maria-mvcc.test''
2008-04-10 04:26:36 +02:00
|
|
|
maria_ignore_trids(isam_file);
|
2006-04-11 15:45:10 +02:00
|
|
|
DBUG_RETURN(isam_file);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2008-02-19 00:00:58 +01:00
|
|
|
static my_bool open_maria_files(PACK_MRG_INFO *mrg,char **names,uint count)
|
2006-04-11 15:45:10 +02:00
|
|
|
{
|
|
|
|
uint i,j;
|
|
|
|
mrg->count=0;
|
|
|
|
mrg->current=0;
|
|
|
|
mrg->file=(MARIA_HA**) my_malloc(sizeof(MARIA_HA*)*count,MYF(MY_FAE));
|
|
|
|
mrg->free_file=1;
|
|
|
|
mrg->src_file_has_indexes_disabled= 0;
|
|
|
|
for (i=0; i < count ; i++)
|
|
|
|
{
|
2008-01-10 20:21:36 +01:00
|
|
|
if (!(mrg->file[i]=open_maria_file(names[i],O_RDONLY)))
|
2006-04-11 15:45:10 +02:00
|
|
|
goto error;
|
|
|
|
|
|
|
|
mrg->src_file_has_indexes_disabled|=
|
|
|
|
! maria_is_all_keys_active(mrg->file[i]->s->state.key_map,
|
|
|
|
mrg->file[i]->s->base.keys);
|
|
|
|
}
|
|
|
|
/* Check that files are identical */
|
|
|
|
for (j=0 ; j < count-1 ; j++)
|
|
|
|
{
|
|
|
|
MARIA_COLUMNDEF *m1,*m2,*end;
|
|
|
|
if (mrg->file[j]->s->base.reclength != mrg->file[j+1]->s->base.reclength ||
|
|
|
|
mrg->file[j]->s->base.fields != mrg->file[j+1]->s->base.fields)
|
|
|
|
goto diff_file;
|
2007-04-19 12:18:56 +02:00
|
|
|
m1=mrg->file[j]->s->columndef;
|
2006-04-11 15:45:10 +02:00
|
|
|
end=m1+mrg->file[j]->s->base.fields;
|
2007-04-19 12:18:56 +02:00
|
|
|
m2=mrg->file[j+1]->s->columndef;
|
2006-04-11 15:45:10 +02:00
|
|
|
for ( ; m1 != end ; m1++,m2++)
|
|
|
|
{
|
|
|
|
if (m1->type != m2->type || m1->length != m2->length)
|
|
|
|
goto diff_file;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
mrg->count=count;
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
diff_file:
|
2011-04-25 17:22:25 +02:00
|
|
|
fprintf(stderr, "%s: Tables '%s' and '%s' are not identical\n",
|
|
|
|
my_progname, names[j], names[j+1]);
|
2006-04-11 15:45:10 +02:00
|
|
|
error:
|
|
|
|
while (i--)
|
|
|
|
maria_close(mrg->file[i]);
|
2011-04-25 17:22:25 +02:00
|
|
|
my_free(mrg->file);
|
2006-04-11 15:45:10 +02:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static int compress(PACK_MRG_INFO *mrg,char *result_table)
|
|
|
|
{
|
|
|
|
int error;
|
2008-01-10 20:21:36 +01:00
|
|
|
File new_file,join_maria_file;
|
2006-04-11 15:45:10 +02:00
|
|
|
MARIA_HA *isam_file;
|
|
|
|
MARIA_SHARE *share;
|
|
|
|
char org_name[FN_REFLEN],new_name[FN_REFLEN],temp_name[FN_REFLEN];
|
|
|
|
uint i,header_length,fields,trees,used_trees;
|
|
|
|
my_off_t old_length,new_length,tot_elements;
|
|
|
|
HUFF_COUNTS *huff_counts;
|
|
|
|
HUFF_TREE *huff_trees;
|
|
|
|
DBUG_ENTER("compress");
|
|
|
|
|
|
|
|
isam_file=mrg->file[0]; /* Take this as an example */
|
|
|
|
share=isam_file->s;
|
2008-01-10 20:21:36 +01:00
|
|
|
new_file=join_maria_file= -1;
|
2006-04-11 15:45:10 +02:00
|
|
|
trees=fields=0;
|
|
|
|
huff_trees=0;
|
|
|
|
huff_counts=0;
|
2007-04-19 17:48:36 +02:00
|
|
|
maria_block_size= isam_file->s->block_size;
|
2006-04-11 15:45:10 +02:00
|
|
|
|
|
|
|
/* Create temporary or join file */
|
|
|
|
if (backup)
|
2011-04-25 17:22:25 +02:00
|
|
|
fn_format(org_name,isam_file->s->open_file_name.str, "",MARIA_NAME_DEXT, 2);
|
2006-04-11 15:45:10 +02:00
|
|
|
else
|
2011-04-25 17:22:25 +02:00
|
|
|
fn_format(org_name,isam_file->s->open_file_name.str, "",MARIA_NAME_DEXT, 2+4+16);
|
2007-04-19 17:48:36 +02:00
|
|
|
|
|
|
|
if (init_pagecache(maria_pagecache, MARIA_MIN_PAGE_CACHE_SIZE, 0, 0,
|
2014-07-19 16:46:08 +02:00
|
|
|
maria_block_size, 0, MY_WME) == 0)
|
2007-04-19 17:48:36 +02:00
|
|
|
{
|
|
|
|
fprintf(stderr, "Can't initialize page cache\n");
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
2006-04-11 15:45:10 +02:00
|
|
|
if (!test_only && result_table)
|
|
|
|
{
|
|
|
|
/* Make a new indexfile based on first file in list */
|
|
|
|
uint length;
|
2007-10-04 19:33:42 +02:00
|
|
|
uchar *buff;
|
2006-04-11 15:45:10 +02:00
|
|
|
strmov(org_name,result_table); /* Fix error messages */
|
2011-04-25 17:22:25 +02:00
|
|
|
fn_format(new_name,result_table,"",MARIA_NAME_IEXT,2);
|
2008-01-10 20:21:36 +01:00
|
|
|
if ((join_maria_file=my_create(new_name,0,tmpfile_createflag,MYF(MY_WME)))
|
2006-04-11 15:45:10 +02:00
|
|
|
< 0)
|
|
|
|
goto err;
|
|
|
|
length=(uint) share->base.keystart;
|
2007-10-04 19:33:42 +02:00
|
|
|
if (!(buff= (uchar*) my_malloc(length,MYF(MY_WME))))
|
2006-04-11 15:45:10 +02:00
|
|
|
goto err;
|
2007-04-04 22:37:09 +02:00
|
|
|
if (my_pread(share->kfile.file, buff, length, 0L, MYF(MY_WME | MY_NABP)) ||
|
2008-01-10 20:21:36 +01:00
|
|
|
my_write(join_maria_file,buff,length,
|
2006-04-11 15:45:10 +02:00
|
|
|
MYF(MY_WME | MY_NABP | MY_WAIT_IF_FULL)))
|
|
|
|
{
|
2011-04-25 17:22:25 +02:00
|
|
|
my_free(buff);
|
2006-04-11 15:45:10 +02:00
|
|
|
goto err;
|
|
|
|
}
|
2011-04-25 17:22:25 +02:00
|
|
|
my_free(buff);
|
|
|
|
fn_format(new_name,result_table,"",MARIA_NAME_DEXT,2);
|
2006-04-11 15:45:10 +02:00
|
|
|
}
|
|
|
|
else if (!tmp_dir[0])
|
2011-04-25 17:22:25 +02:00
|
|
|
make_new_name(new_name,org_name);
|
2006-04-11 15:45:10 +02:00
|
|
|
else
|
2011-04-25 17:22:25 +02:00
|
|
|
fn_format(new_name,org_name,tmp_dir,DATA_TMP_EXT,1+2+4);
|
2006-04-11 15:45:10 +02:00
|
|
|
if (!test_only &&
|
|
|
|
(new_file=my_create(new_name,0,tmpfile_createflag,MYF(MY_WME))) < 0)
|
|
|
|
goto err;
|
|
|
|
|
|
|
|
/* Start calculating statistics */
|
|
|
|
|
|
|
|
mrg->records=0;
|
|
|
|
for (i=0 ; i < mrg->count ; i++)
|
|
|
|
mrg->records+=mrg->file[i]->s->state.state.records;
|
|
|
|
|
|
|
|
DBUG_PRINT("info", ("Compressing %s: (%lu records)",
|
|
|
|
result_table ? new_name : org_name,
|
|
|
|
(ulong) mrg->records));
|
|
|
|
if (write_loop || verbose)
|
|
|
|
{
|
2011-04-25 17:22:25 +02:00
|
|
|
printf("Compressing %s: (%lu records)\n",
|
|
|
|
result_table ? new_name : org_name, (ulong) mrg->records);
|
2006-04-11 15:45:10 +02:00
|
|
|
}
|
|
|
|
trees=fields=share->base.fields;
|
|
|
|
huff_counts=init_huff_count(isam_file,mrg->records);
|
|
|
|
|
|
|
|
/*
|
|
|
|
Read the whole data file(s) for statistics.
|
|
|
|
*/
|
|
|
|
DBUG_PRINT("info", ("- Calculating statistics"));
|
|
|
|
if (write_loop || verbose)
|
2011-04-25 17:22:25 +02:00
|
|
|
printf("- Calculating statistics\n");
|
2006-04-11 15:45:10 +02:00
|
|
|
if (get_statistic(mrg,huff_counts))
|
|
|
|
goto err;
|
2011-04-25 17:22:25 +02:00
|
|
|
|
2006-04-11 15:45:10 +02:00
|
|
|
old_length=0;
|
|
|
|
for (i=0; i < mrg->count ; i++)
|
|
|
|
old_length+= (mrg->file[i]->s->state.state.data_file_length -
|
|
|
|
mrg->file[i]->s->state.state.empty);
|
|
|
|
|
|
|
|
/*
|
|
|
|
Create a global priority queue in preparation for making
|
|
|
|
temporary Huffman trees.
|
|
|
|
*/
|
2010-07-16 09:33:01 +02:00
|
|
|
if (init_queue(&queue, 256, 0, 0, compare_huff_elements, 0, 0, 0))
|
2006-04-11 15:45:10 +02:00
|
|
|
goto err;
|
|
|
|
|
|
|
|
/*
|
|
|
|
Check each column if we should use pre-space-compress, end-space-
|
|
|
|
compress, empty-field-compress or zero-field-compress.
|
|
|
|
*/
|
|
|
|
check_counts(huff_counts,fields,mrg->records);
|
|
|
|
|
|
|
|
/*
|
|
|
|
Build a Huffman tree for each column.
|
|
|
|
*/
|
|
|
|
huff_trees=make_huff_trees(huff_counts,trees);
|
|
|
|
|
|
|
|
/*
|
|
|
|
If the packed lengths of combined columns is less then the sum of
|
|
|
|
the non-combined columns, then create common Huffman trees for them.
|
2007-07-02 19:45:15 +02:00
|
|
|
We do this only for uchar compressed columns, not for distinct values
|
2006-04-11 15:45:10 +02:00
|
|
|
compressed columns.
|
|
|
|
*/
|
|
|
|
if ((int) (used_trees=join_same_trees(huff_counts,trees)) < 0)
|
|
|
|
goto err;
|
|
|
|
|
|
|
|
/*
|
2007-07-02 19:45:15 +02:00
|
|
|
Assign codes to all uchar or column values.
|
2006-04-11 15:45:10 +02:00
|
|
|
*/
|
|
|
|
if (make_huff_decode_table(huff_trees,fields))
|
|
|
|
goto err;
|
|
|
|
|
|
|
|
/* Prepare a file buffer. */
|
|
|
|
init_file_buffer(new_file,0);
|
|
|
|
|
|
|
|
/*
|
|
|
|
Reserve space in the target file for the fixed compressed file header.
|
|
|
|
*/
|
|
|
|
file_buffer.pos_in_file=HEAD_LENGTH;
|
|
|
|
if (! test_only)
|
2011-04-25 17:22:25 +02:00
|
|
|
my_seek(new_file,file_buffer.pos_in_file,MY_SEEK_SET,MYF(0));
|
2006-04-11 15:45:10 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
Write field infos: field type, pack type, length bits, tree number.
|
|
|
|
*/
|
|
|
|
write_field_info(huff_counts,fields,used_trees);
|
|
|
|
|
|
|
|
/*
|
|
|
|
Write decode trees.
|
|
|
|
*/
|
|
|
|
if (!(tot_elements=write_huff_tree(huff_trees,trees)))
|
|
|
|
goto err;
|
|
|
|
|
|
|
|
/*
|
|
|
|
Calculate the total length of the compression info header.
|
|
|
|
This includes the fixed compressed file header, the column compression
|
|
|
|
type descriptions, and the decode trees.
|
|
|
|
*/
|
|
|
|
header_length=(uint) file_buffer.pos_in_file+
|
|
|
|
(uint) (file_buffer.pos-file_buffer.buffer);
|
|
|
|
|
|
|
|
/*
|
|
|
|
Compress the source file into the target file.
|
|
|
|
*/
|
|
|
|
DBUG_PRINT("info", ("- Compressing file"));
|
|
|
|
if (write_loop || verbose)
|
2011-04-25 17:22:25 +02:00
|
|
|
printf("- Compressing file\n");
|
2008-01-10 20:21:36 +01:00
|
|
|
error=compress_maria_file(mrg,huff_counts);
|
2006-04-11 15:45:10 +02:00
|
|
|
new_length=file_buffer.pos_in_file;
|
|
|
|
if (!error && !test_only)
|
|
|
|
{
|
2007-10-04 19:33:42 +02:00
|
|
|
uchar buff[MEMMAP_EXTRA_MARGIN]; /* End marginal for memmap */
|
2006-04-11 15:45:10 +02:00
|
|
|
bzero(buff,sizeof(buff));
|
|
|
|
error=my_write(file_buffer.file,buff,sizeof(buff),
|
|
|
|
MYF(MY_WME | MY_NABP | MY_WAIT_IF_FULL)) != 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
Write the fixed compressed file header.
|
|
|
|
*/
|
|
|
|
if (!error)
|
|
|
|
error=write_header(mrg,header_length,used_trees,tot_elements,
|
|
|
|
new_length);
|
|
|
|
|
|
|
|
/* Flush the file buffer. */
|
|
|
|
end_file_buffer();
|
|
|
|
|
|
|
|
/* Display statistics. */
|
|
|
|
DBUG_PRINT("info", ("Min record length: %6d Max length: %6d "
|
2007-01-18 20:38:14 +01:00
|
|
|
"Mean total length: %6ld",
|
2006-04-11 15:45:10 +02:00
|
|
|
mrg->min_pack_length, mrg->max_pack_length,
|
|
|
|
(ulong) (mrg->records ? (new_length/mrg->records) : 0)));
|
|
|
|
if (verbose && mrg->records)
|
2011-04-25 17:22:25 +02:00
|
|
|
printf("Min record length: %6d Max length: %6d "
|
2006-04-11 15:45:10 +02:00
|
|
|
"Mean total length: %6ld\n", mrg->min_pack_length,
|
2011-04-25 17:22:25 +02:00
|
|
|
mrg->max_pack_length, (ulong) (new_length/mrg->records));
|
2006-04-11 15:45:10 +02:00
|
|
|
|
|
|
|
/* Close source and target file. */
|
|
|
|
if (!test_only)
|
|
|
|
{
|
|
|
|
error|=my_close(new_file,MYF(MY_WME));
|
|
|
|
if (!result_table)
|
|
|
|
{
|
2010-12-05 13:25:01 +01:00
|
|
|
(void) flush_pagecache_blocks(isam_file->s->pagecache, &isam_file->dfile,
|
|
|
|
FLUSH_RELEASE);
|
2007-04-04 22:37:09 +02:00
|
|
|
error|=my_close(isam_file->dfile.file, MYF(MY_WME));
|
|
|
|
isam_file->dfile.file= -1; /* Tell maria_close file is closed */
|
2007-04-19 17:48:36 +02:00
|
|
|
isam_file->s->bitmap.file.file= -1;
|
2006-04-11 15:45:10 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Cleanup. */
|
|
|
|
free_counts_and_tree_and_queue(huff_trees,trees,huff_counts,fields);
|
|
|
|
if (! test_only && ! error)
|
|
|
|
{
|
|
|
|
if (result_table)
|
|
|
|
{
|
2008-01-10 20:21:36 +01:00
|
|
|
error=save_state_mrg(join_maria_file,mrg,new_length,glob_crc);
|
2006-04-11 15:45:10 +02:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
if (backup)
|
|
|
|
{
|
2007-06-07 00:01:43 +02:00
|
|
|
if (my_rename(org_name,make_old_name(temp_name,
|
2008-08-25 13:49:47 +02:00
|
|
|
isam_file->s->open_file_name.str),
|
2006-04-11 15:45:10 +02:00
|
|
|
MYF(MY_WME)))
|
|
|
|
error=1;
|
|
|
|
else
|
|
|
|
{
|
|
|
|
if (tmp_dir[0])
|
|
|
|
error=my_copy(new_name,org_name,MYF(MY_WME));
|
|
|
|
else
|
|
|
|
error=my_rename(new_name,org_name,MYF(MY_WME));
|
|
|
|
if (!error)
|
|
|
|
{
|
2011-04-25 17:22:25 +02:00
|
|
|
my_copystat(temp_name,org_name,MYF(MY_COPYTIME));
|
2006-04-11 15:45:10 +02:00
|
|
|
if (tmp_dir[0])
|
2011-04-25 17:22:25 +02:00
|
|
|
my_delete(new_name,MYF(MY_WME));
|
2006-04-11 15:45:10 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
if (tmp_dir[0])
|
|
|
|
{
|
|
|
|
error=my_copy(new_name,org_name,
|
|
|
|
MYF(MY_WME | MY_HOLD_ORIGINAL_MODES | MY_COPYTIME));
|
|
|
|
if (!error)
|
2011-04-25 17:22:25 +02:00
|
|
|
my_delete(new_name,MYF(MY_WME));
|
2006-04-11 15:45:10 +02:00
|
|
|
}
|
|
|
|
else
|
2010-11-07 13:25:29 +01:00
|
|
|
error=my_redel(org_name, new_name, 0, MYF(MY_WME | MY_COPYTIME));
|
2006-04-11 15:45:10 +02:00
|
|
|
}
|
|
|
|
if (! error)
|
|
|
|
error=save_state(isam_file,mrg,new_length,glob_crc);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
error|=mrg_close(mrg);
|
2008-01-10 20:21:36 +01:00
|
|
|
if (join_maria_file >= 0)
|
|
|
|
error|=my_close(join_maria_file,MYF(MY_WME));
|
2006-04-11 15:45:10 +02:00
|
|
|
if (error)
|
|
|
|
{
|
2011-04-25 17:22:25 +02:00
|
|
|
fprintf(stderr, "Aborting: %s is not compressed\n", org_name);
|
|
|
|
my_delete(new_name,MYF(MY_WME));
|
2006-04-11 15:45:10 +02:00
|
|
|
DBUG_RETURN(-1);
|
|
|
|
}
|
|
|
|
if (write_loop || verbose)
|
|
|
|
{
|
|
|
|
if (old_length)
|
2011-04-25 17:22:25 +02:00
|
|
|
printf("%.4g%% \n",
|
2006-04-11 15:45:10 +02:00
|
|
|
(((longlong) (old_length - new_length)) * 100.0 /
|
2011-04-25 17:22:25 +02:00
|
|
|
(longlong) old_length));
|
2006-04-11 15:45:10 +02:00
|
|
|
else
|
|
|
|
puts("Empty file saved in compressed format");
|
|
|
|
}
|
|
|
|
DBUG_RETURN(0);
|
|
|
|
|
|
|
|
err:
|
|
|
|
free_counts_and_tree_and_queue(huff_trees,trees,huff_counts,fields);
|
|
|
|
if (new_file >= 0)
|
2011-04-25 17:22:25 +02:00
|
|
|
my_close(new_file,MYF(0));
|
2008-01-10 20:21:36 +01:00
|
|
|
if (join_maria_file >= 0)
|
2011-04-25 17:22:25 +02:00
|
|
|
my_close(join_maria_file,MYF(0));
|
2006-04-11 15:45:10 +02:00
|
|
|
mrg_close(mrg);
|
2010-12-05 13:25:01 +01:00
|
|
|
end_pagecache(maria_pagecache, 1);
|
2011-04-25 17:22:25 +02:00
|
|
|
fprintf(stderr, "Aborted: %s is not compressed\n", org_name);
|
2006-04-11 15:45:10 +02:00
|
|
|
DBUG_RETURN(-1);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Init a huff_count-struct for each field and init it */
|
|
|
|
|
|
|
|
static HUFF_COUNTS *init_huff_count(MARIA_HA *info,my_off_t records)
|
|
|
|
{
|
|
|
|
reg2 uint i;
|
|
|
|
reg1 HUFF_COUNTS *count;
|
|
|
|
if ((count = (HUFF_COUNTS*) my_malloc(info->s->base.fields*
|
|
|
|
sizeof(HUFF_COUNTS),
|
|
|
|
MYF(MY_ZEROFILL | MY_WME))))
|
|
|
|
{
|
|
|
|
for (i=0 ; i < info->s->base.fields ; i++)
|
|
|
|
{
|
|
|
|
enum en_fieldtype type;
|
2007-04-19 12:18:56 +02:00
|
|
|
count[i].field_length=info->s->columndef[i].length;
|
|
|
|
type= count[i].field_type= (enum en_fieldtype) info->s->columndef[i].type;
|
2006-04-11 15:45:10 +02:00
|
|
|
if (type == FIELD_INTERVALL ||
|
|
|
|
type == FIELD_CONSTANT ||
|
|
|
|
type == FIELD_ZERO)
|
|
|
|
type = FIELD_NORMAL;
|
|
|
|
if (count[i].field_length <= 8 &&
|
|
|
|
(type == FIELD_NORMAL ||
|
|
|
|
type == FIELD_SKIP_ZERO))
|
|
|
|
count[i].max_zero_fill= count[i].field_length;
|
|
|
|
/*
|
|
|
|
For every column initialize a tree, which is used to detect distinct
|
|
|
|
column values. 'int_tree' works together with 'tree_buff' and
|
|
|
|
'tree_pos'. It's keys are implemented by pointers into 'tree_buff'.
|
|
|
|
This is accomplished by '-1' as the element size.
|
|
|
|
*/
|
MDEV-4011 Added per thread memory counting and usage
Base code and idea from a patch from by plinux at Taobao.
The idea is that we mark all memory that are thread specific with MY_THREAD_SPECIFIC.
Memory counting is done per thread in the my_malloc_size_cb_func callback function from my_malloc().
There are plenty of new asserts to ensure that for a debug server the counting is correct.
Information_schema.processlist gets two new columns: MEMORY_USED and EXAMINED_ROWS.
- The later is there mainly to show how query is progressing.
The following changes in interfaces was needed to get this to work:
- init_alloc_root() amd init_sql_alloc() has extra option so that one can mark memory with MY_THREAD_SPECIFIC
- One now have to use alloc_root_set_min_malloc() to set min memory to be allocated by alloc_root()
- my_init_dynamic_array() has extra option so that one can mark memory with MY_THREAD_SPECIFIC
- my_net_init() has extra option so that one can mark memory with MY_THREAD_SPECIFIC
- Added flag for hash_init() so that one can mark hash table to be thread specific.
- Added flags to init_tree() so that one can mark tree to be thread specific.
- Removed with_delete option to init_tree(). Now one should instead use MY_TREE_WITH_DELETE_FLAG.
- Added flag to Warning_info::Warning_info() if the structure should be fully initialized.
- String elements can now be marked as thread specific.
- Internal HEAP tables are now marking it's memory as MY_THREAD_SPECIFIC.
- Changed type of myf from int to ulong, as this is always a set of bit flags.
Other things:
- Removed calls to net_end() and thd->cleanup() as these are now done in ~THD()
- We now also show EXAMINED_ROWS in SHOW PROCESSLIST
- Added new variable 'memory_used'
- Fixed bug where kill_threads_for_user() was using the wrong mem_root to allocate memory.
- Removed calls to the obsoleted function init_dynamic_array()
- Use set_current_thd() instead of my_pthread_setspecific_ptr(THR_THD,...)
client/completion_hash.cc:
Updated call to init_alloc_root()
client/mysql.cc:
Updated call to init_alloc_root()
client/mysqlbinlog.cc:
init_dynamic_array() -> my_init_dynamic_array()
Updated call to init_alloc_root()
client/mysqlcheck.c:
Updated call to my_init_dynamic_array()
client/mysqldump.c:
Updated call to init_alloc_root()
client/mysqltest.cc:
Updated call to init_alloc_root()
Updated call to my_init_dynamic_array()
Fixed compiler warnings
extra/comp_err.c:
Updated call to my_init_dynamic_array()
extra/resolve_stack_dump.c:
Updated call to my_init_dynamic_array()
include/hash.h:
Added HASH_THREAD_SPECIFIC
include/heap.h:
Added flag is internal temporary table.
include/my_dir.h:
Safety fix: Ensure that MY_DONT_SORT and MY_WANT_STAT don't interfer with other mysys flags
include/my_global.h:
Changed type of myf from int to ulong, as this is always a set of bit flags.
include/my_sys.h:
Added MY_THREAD_SPECIFIC and MY_THREAD_MOVE
Added malloc_flags to DYNAMIC_ARRAY
Added extra mysys flag argument to my_init_dynamic_array()
Removed deprecated functions init_dynamic_array() and my_init_dynamic_array.._ci
Updated paramaters for init_alloc_root()
include/my_tree.h:
Added my_flags to allow one to use MY_THREAD_SPECIFIC with hash tables.
Removed with_delete. One should now instead use MY_TREE_WITH_DELETE_FLAG
Updated parameters to init_tree()
include/myisamchk.h:
Added malloc_flags to allow one to use MY_THREAD_SPECIFIC for checks.
include/mysql.h:
Added MYSQL_THREAD_SPECIFIC_MALLOC
Used 'unused1' to mark memory as thread specific.
include/mysql.h.pp:
Updated file
include/mysql_com.h:
Used 'unused1' to mark memory as thread specific.
Updated parameters for my_net_init()
libmysql/libmysql.c:
Updated call to init_alloc_root() to mark memory thread specific.
libmysqld/emb_qcache.cc:
Updated call to init_alloc_root()
libmysqld/lib_sql.cc:
Updated call to init_alloc_root()
mysql-test/r/create.result:
Updated results
mysql-test/r/user_var.result:
Updated results
mysql-test/suite/funcs_1/datadict/processlist_priv.inc:
Update to handle new format of SHOW PROCESSLIST
mysql-test/suite/funcs_1/datadict/processlist_val.inc:
Update to handle new format of SHOW PROCESSLIST
mysql-test/suite/funcs_1/r/is_columns_is.result:
Update to handle new format of SHOW PROCESSLIST
mysql-test/suite/funcs_1/r/processlist_priv_no_prot.result:
Updated results
mysql-test/suite/funcs_1/r/processlist_val_no_prot.result:
Updated results
mysql-test/t/show_explain.test:
Fixed usage of debug variable so that one can run test with --debug
mysql-test/t/user_var.test:
Added test of memory_usage variable.
mysys/array.c:
Added extra my_flags option to init_dynamic_array() and init_dynamic_array2() so that one can mark memory with MY_THREAD_SPECIFIC
All allocated memory is marked with the given my_flags.
Removed obsolete function init_dynamic_array()
mysys/default.c:
Updated call to init_alloc_root()
Updated call to my_init_dynamic_array()
mysys/hash.c:
Updated call to my_init_dynamic_array_ci().
Allocated memory is marked with MY_THREAD_SPECIFIC if HASH_THREAD_SPECIFIC is used.
mysys/ma_dyncol.c:
init_dynamic_array() -> my_init_dynamic_array()
Added #if to get rid of compiler warnings
mysys/mf_tempdir.c:
Updated call to my_init_dynamic_array()
mysys/my_alloc.c:
Added extra parameter to init_alloc_root() so that one can mark memory with MY_THREAD_SPECIFIC
Extend MEM_ROOT with a flag if memory is thread specific.
This is stored in block_size, to keep the size of the MEM_ROOT object identical as before.
Allocated memory is marked with MY_THREAD_SPECIFIC if used with init_alloc_root()
mysys/my_chmod.c:
Updated DBUG_PRINT because of change of myf type
mysys/my_chsize.c:
Updated DBUG_PRINT because of change of myf type
mysys/my_copy.c:
Updated DBUG_PRINT because of change of myf type
mysys/my_create.c:
Updated DBUG_PRINT because of change of myf type
mysys/my_delete.c:
Updated DBUG_PRINT because of change of myf type
mysys/my_error.c:
Updated DBUG_PRINT because of change of myf type
mysys/my_fopen.c:
Updated DBUG_PRINT because of change of myf type
mysys/my_fstream.c:
Updated DBUG_PRINT because of change of myf type
mysys/my_getwd.c:
Updated DBUG_PRINT because of change of myf type
mysys/my_lib.c:
Updated call to init_alloc_root()
Updated call to my_init_dynamic_array()
Updated DBUG_PRINT because of change of myf type
mysys/my_lock.c:
Updated DBUG_PRINT because of change of myf type
mysys/my_malloc.c:
Store at start of each allocated memory block the size of the block and if the block is thread specific.
Call malloc_size_cb_func, if set, with the memory allocated/freed.
Updated DBUG_PRINT because of change of myf type
mysys/my_open.c:
Updated DBUG_PRINT because of change of myf type
mysys/my_pread.c:
Updated DBUG_PRINT because of change of myf type
mysys/my_read.c:
Updated DBUG_PRINT because of change of myf type
mysys/my_redel.c:
Updated DBUG_PRINT because of change of myf type
mysys/my_rename.c:
Updated DBUG_PRINT because of change of myf type
mysys/my_seek.c:
Updated DBUG_PRINT because of change of myf type
mysys/my_sync.c:
Updated DBUG_PRINT because of change of myf type
mysys/my_thr_init.c:
Ensure that one can call my_thread_dbug_id() even if thread is not properly initialized.
mysys/my_write.c:
Updated DBUG_PRINT because of change of myf type
mysys/mysys_priv.h:
Updated parameters to sf_malloc and sf_realloc()
mysys/safemalloc.c:
Added checking that for memory marked with MY_THREAD_SPECIFIC that it's the same thread that is allocation and freeing the memory.
Added sf_malloc_dbug_id() to allow MariaDB to specify which THD is handling the memory.
Added my_flags arguments to sf_malloc() and sf_realloc() to be able to mark memory with MY_THREAD_SPECIFIC.
Added sf_report_leaked_memory() to get list of memory not freed by a thread.
mysys/tree.c:
Added flags to init_tree() so that one can mark tree to be thread specific.
Removed with_delete option to init_tree(). Now one should instead use MY_TREE_WITH_DELETE_FLAG.
Updated call to init_alloc_root()
All allocated memory is marked with the given malloc flags
mysys/waiting_threads.c:
Updated call to my_init_dynamic_array()
sql-common/client.c:
Updated call to init_alloc_root() and my_net_init() to mark memory thread specific.
Updated call to my_init_dynamic_array().
Added MYSQL_THREAD_SPECIFIC_MALLOC so that client can mark memory as MY_THREAD_SPECIFIC.
sql-common/client_plugin.c:
Updated call to init_alloc_root()
sql/debug_sync.cc:
Added MY_THREAD_SPECIFIC to allocated memory.
sql/event_scheduler.cc:
Removed calls to net_end() as this is now done in ~THD()
Call set_current_thd() to ensure that memory is assigned to right thread.
sql/events.cc:
my_pthread_setspecific_ptr(THR_THD,...) -> set_current_thd()
sql/filesort.cc:
Added MY_THREAD_SPECIFIC to allocated memory.
sql/filesort_utils.cc:
Added MY_THREAD_SPECIFIC to allocated memory.
sql/ha_ndbcluster.cc:
Updated call to init_alloc_root()
Updated call to my_net_init()
Removed calls to net_end() and thd->cleanup() as these are now done in ~THD()
sql/ha_ndbcluster_binlog.cc:
Updated call to my_net_init()
Updated call to init_sql_alloc()
Removed calls to net_end() and thd->cleanup() as these are now done in ~THD()
sql/ha_partition.cc:
Updated call to init_alloc_root()
sql/handler.cc:
Added MY_THREAD_SPECIFIC to allocated memory.
Added missing call to my_dir_end()
sql/item_func.cc:
Added MY_THREAD_SPECIFIC to allocated memory.
sql/item_subselect.cc:
Added MY_THREAD_SPECIFIC to allocated memory.
sql/item_sum.cc:
Added MY_THREAD_SPECIFIC to allocated memory.
sql/log.cc:
More DBUG
Updated call to init_alloc_root()
sql/mdl.cc:
Added MY_THREAD_SPECIFIC to allocated memory.
sql/mysqld.cc:
Added total_memory_used
Updated call to init_alloc_root()
Move mysql_cond_broadcast() before my_thread_end()
Added mariadb_dbug_id() to count memory per THD instead of per thread.
Added my_malloc_size_cb_func() callback function for my_malloc() to count memory.
Move initialization of mysqld_server_started and mysqld_server_initialized earlier.
Updated call to my_init_dynamic_array().
Updated call to my_net_init().
Call my_pthread_setspecific_ptr(THR_THD,...) to ensure that memory is assigned to right thread.
Added status variable 'memory_used'.
Updated call to init_alloc_root()
my_pthread_setspecific_ptr(THR_THD,...) -> set_current_thd()
sql/mysqld.h:
Added set_current_thd()
sql/net_serv.cc:
Added new parameter to my_net_init() so that one can mark memory with MY_THREAD_SPECIFIC.
Store in net->thread_specific_malloc if memory is thread specific.
Mark memory to be thread specific if requested.
sql/opt_range.cc:
Updated call to my_init_dynamic_array()
Updated call to init_sql_alloc()
Added MY_THREAD_SPECIFIC to allocated memory.
sql/opt_subselect.cc:
Updated call to init_sql_alloc() to mark memory thread specific.
sql/protocol.cc:
Fixed compiler warning
sql/records.cc:
Added MY_THREAD_SPECIFIC to allocated memory.
sql/rpl_filter.cc:
Updated call to my_init_dynamic_array()
sql/rpl_handler.cc:
Updated call to my_init_dynamic_array2()
sql/rpl_handler.h:
Updated call to init_sql_alloc()
sql/rpl_mi.cc:
Updated call to my_init_dynamic_array()
sql/rpl_tblmap.cc:
Updated call to init_alloc_root()
sql/rpl_utility.cc:
Updated call to my_init_dynamic_array()
sql/slave.cc:
Initialize things properly before calling functions that allocate memory.
Removed calls to net_end() as this is now done in ~THD()
sql/sp_head.cc:
Updated call to init_sql_alloc()
Updated call to my_init_dynamic_array()
Added parameter to warning_info() that it should be fully initialized.
sql/sp_pcontext.cc:
Updated call to my_init_dynamic_array()
sql/sql_acl.cc:
Updated call to init_sql_alloc()
Updated call to my_init_dynamic_array()
my_pthread_setspecific_ptr(THR_THD,...) -> set_current_thd()
sql/sql_admin.cc:
Added parameter to warning_info() that it should be fully initialized.
sql/sql_analyse.h:
Updated call to init_tree() to mark memory thread specific.
sql/sql_array.h:
Updated call to my_init_dynamic_array() to mark memory thread specific.
sql/sql_audit.cc:
Updated call to my_init_dynamic_array()
sql/sql_base.cc:
Updated call to init_sql_alloc()
my_pthread_setspecific_ptr(THR_THD,...) -> set_current_thd()
sql/sql_cache.cc:
Updated comment
sql/sql_class.cc:
Added parameter to warning_info() that not initialize it until THD is fully created.
Updated call to init_sql_alloc()
Mark THD::user_vars has to be thread specific.
Updated call to my_init_dynamic_array()
Ensure that memory allocated by THD is assigned to the THD.
More DBUG
Always acll net_end() in ~THD()
Assert that all memory signed to this THD is really deleted at ~THD.
Fixed set_status_var_init() to not reset memory_used.
my_pthread_setspecific_ptr(THR_THD,...) -> set_current_thd()
sql/sql_class.h:
Added MY_THREAD_SPECIFIC to allocated memory.
Added malloc_size to THD to record allocated memory per THD.
sql/sql_delete.cc:
Added MY_THREAD_SPECIFIC to allocated memory.
sql/sql_error.cc:
Added 'initialize' parameter to Warning_info() to say if should allocate memory for it's structures.
This is used by THD::THD() to not allocate memory until THD is ready.
Added Warning_info::free_memory()
sql/sql_error.h:
Updated Warning_info() class.
sql/sql_handler.cc:
Updated call to init_alloc_root() to mark memory thread specific.
sql/sql_insert.cc:
More DBUG
sql/sql_join_cache.cc:
Added MY_THREAD_SPECIFIC to allocated memory.
sql/sql_lex.cc:
Updated call to my_init_dynamic_array()
sql/sql_lex.h:
Updated call to my_init_dynamic_array()
sql/sql_load.cc:
Added MY_THREAD_SPECIFIC to allocated memory.
sql/sql_parse.cc:
Removed calls to net_end() and thd->cleanup() as these are now done in ~THD()
Ensure that examined_row_count() is reset before query.
Fixed bug where kill_threads_for_user() was using the wrong mem_root to allocate memory.
my_pthread_setspecific_ptr(THR_THD,...) -> set_current_thd()
Don't restore thd->status_var.memory_used when restoring thd->status_var
sql/sql_plugin.cc:
Updated call to init_alloc_root()
Updated call to my_init_dynamic_array()
Don't allocate THD on the stack, as this causes problems with valgrind when doing thd memory counting.
my_pthread_setspecific_ptr(THR_THD,...) -> set_current_thd()
sql/sql_prepare.cc:
Added parameter to warning_info() that it should be fully initialized.
Updated call to init_sql_alloc() to mark memory thread specific.
sql/sql_reload.cc:
my_pthread_setspecific_ptr(THR_THD,...) -> set_current_thd()
sql/sql_select.cc:
Updated call to my_init_dynamic_array() and init_sql_alloc() to mark memory thread specific.
Added MY_THREAD_SPECIFIC to allocated memory.
More DBUG
sql/sql_servers.cc:
Updated call to init_sql_alloc() to mark memory some memory thread specific.
my_pthread_setspecific_ptr(THR_THD,...) -> set_current_thd()
sql/sql_show.cc:
Updated call to my_init_dynamic_array()
Mark my_dir() memory thread specific.
Use my_pthread_setspecific_ptr(THR_THD,...) to mark that allocated memory should be allocated to calling thread.
More DBUG.
Added malloc_size and examined_row_count to SHOW PROCESSLIST.
Added MY_THREAD_SPECIFIC to allocated memory.
Updated call to init_sql_alloc()
Added parameter to warning_info() that it should be fully initialized.
sql/sql_statistics.cc:
Fixed compiler warning
sql/sql_string.cc:
String elements can now be marked as thread specific.
sql/sql_string.h:
String elements can now be marked as thread specific.
sql/sql_table.cc:
Updated call to init_sql_alloc() and my_malloc() to mark memory thread specific
my_pthread_setspecific_ptr(THR_THD,...) -> set_current_thd()
Fixed compiler warning
sql/sql_test.cc:
Updated call to my_init_dynamic_array() to mark memory thread specific.
sql/sql_trigger.cc:
Updated call to init_sql_alloc()
sql/sql_udf.cc:
Updated call to init_sql_alloc()
my_pthread_setspecific_ptr(THR_THD,...) -> set_current_thd()
sql/sql_update.cc:
Added MY_THREAD_SPECIFIC to allocated memory.
sql/table.cc:
Updated call to init_sql_alloc().
Mark memory used by temporary tables, that are not for slave threads, as MY_THREAD_SPECIFIC
Updated call to init_sql_alloc()
sql/thr_malloc.cc:
Added my_flags argument to init_sql_alloc() to be able to mark memory as MY_THREAD_SPECIFIC.
sql/thr_malloc.h:
Updated prototype for init_sql_alloc()
sql/tztime.cc:
Updated call to init_sql_alloc()
Updated call to init_alloc_root() to mark memory thread specific.
my_pthread_setspecific_ptr(THR_THD,...) -> set_current_thd()
sql/uniques.cc:
Updated calls to init_tree(), my_init_dynamic_array() and my_malloc() to mark memory thread specific.
sql/unireg.cc:
Added MY_THREAD_SPECIFIC to allocated memory.
storage/csv/ha_tina.cc:
Updated call to init_alloc_root()
storage/federated/ha_federated.cc:
Updated call to init_alloc_root()
Updated call to my_init_dynamic_array()
Ensure that memory allocated by fedarated is registered for the system, not for the thread.
storage/federatedx/federatedx_io_mysql.cc:
Updated call to my_init_dynamic_array()
storage/federatedx/ha_federatedx.cc:
Updated call to init_alloc_root()
Updated call to my_init_dynamic_array()
storage/heap/ha_heap.cc:
Added MY_THREAD_SPECIFIC to allocated memory.
storage/heap/heapdef.h:
Added parameter to hp_get_new_block() to be able to do thread specific memory tagging.
storage/heap/hp_block.c:
Added parameter to hp_get_new_block() to be able to do thread specific memory tagging.
storage/heap/hp_create.c:
- Internal HEAP tables are now marking it's memory as MY_THREAD_SPECIFIC.
- Use MY_TREE_WITH_DELETE instead of removed option 'with_delete'.
storage/heap/hp_open.c:
Internal HEAP tables are now marking it's memory as MY_THREAD_SPECIFIC.
storage/heap/hp_write.c:
Added new parameter to hp_get_new_block()
storage/maria/ma_bitmap.c:
Updated call to my_init_dynamic_array()
storage/maria/ma_blockrec.c:
Updated call to my_init_dynamic_array()
storage/maria/ma_check.c:
Updated call to init_alloc_root()
storage/maria/ma_ft_boolean_search.c:
Updated calls to init_tree() and init_alloc_root()
storage/maria/ma_ft_nlq_search.c:
Updated call to init_tree()
storage/maria/ma_ft_parser.c:
Updated call to init_tree()
Updated call to init_alloc_root()
storage/maria/ma_loghandler.c:
Updated call to my_init_dynamic_array()
storage/maria/ma_open.c:
Updated call to my_init_dynamic_array()
storage/maria/ma_sort.c:
Updated call to my_init_dynamic_array()
storage/maria/ma_write.c:
Updated calls to my_init_dynamic_array() and init_tree()
storage/maria/maria_pack.c:
Updated call to init_tree()
storage/maria/unittest/sequence_storage.c:
Updated call to my_init_dynamic_array()
storage/myisam/ft_boolean_search.c:
Updated call to init_tree()
Updated call to init_alloc_root()
storage/myisam/ft_nlq_search.c:
Updated call to init_tree()
storage/myisam/ft_parser.c:
Updated call to init_tree()
Updated call to init_alloc_root()
storage/myisam/ft_stopwords.c:
Updated call to init_tree()
storage/myisam/mi_check.c:
Updated call to init_alloc_root()
storage/myisam/mi_write.c:
Updated call to my_init_dynamic_array()
Updated call to init_tree()
storage/myisam/myisamlog.c:
Updated call to init_tree()
storage/myisam/myisampack.c:
Updated call to init_tree()
storage/myisam/sort.c:
Updated call to my_init_dynamic_array()
storage/myisammrg/ha_myisammrg.cc:
Updated call to init_sql_alloc()
storage/perfschema/pfs_check.cc:
Rest current_thd
storage/perfschema/pfs_instr.cc:
Removed DBUG_ENTER/DBUG_VOID_RETURN as at this point my_thread_var is not allocated anymore, which can cause problems.
support-files/compiler_warnings.supp:
Disable compiler warning from offsetof macro.
2013-01-23 16:16:14 +01:00
|
|
|
init_tree(&count[i].int_tree,0,0,-1,(qsort_cmp2) compare_tree, NULL,
|
2013-01-23 16:18:09 +01:00
|
|
|
NULL, MYF(0));
|
2006-04-11 15:45:10 +02:00
|
|
|
if (records && type != FIELD_BLOB && type != FIELD_VARCHAR)
|
|
|
|
count[i].tree_pos=count[i].tree_buff =
|
|
|
|
my_malloc(count[i].field_length > 1 ? tree_buff_length : 2,
|
|
|
|
MYF(MY_WME));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return count;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/* Free memory used by counts and trees */
|
|
|
|
|
|
|
|
static void free_counts_and_tree_and_queue(HUFF_TREE *huff_trees, uint trees,
|
|
|
|
HUFF_COUNTS *huff_counts,
|
|
|
|
uint fields)
|
|
|
|
{
|
|
|
|
register uint i;
|
|
|
|
|
|
|
|
if (huff_trees)
|
|
|
|
{
|
|
|
|
for (i=0 ; i < trees ; i++)
|
|
|
|
{
|
|
|
|
if (huff_trees[i].element_buffer)
|
2011-04-25 17:22:25 +02:00
|
|
|
my_free(huff_trees[i].element_buffer);
|
2006-04-11 15:45:10 +02:00
|
|
|
if (huff_trees[i].code)
|
2011-04-25 17:22:25 +02:00
|
|
|
my_free(huff_trees[i].code);
|
2006-04-11 15:45:10 +02:00
|
|
|
}
|
2011-04-25 17:22:25 +02:00
|
|
|
my_free(huff_trees);
|
2006-04-11 15:45:10 +02:00
|
|
|
}
|
|
|
|
if (huff_counts)
|
|
|
|
{
|
|
|
|
for (i=0 ; i < fields ; i++)
|
|
|
|
{
|
|
|
|
if (huff_counts[i].tree_buff)
|
|
|
|
{
|
2011-04-25 17:22:25 +02:00
|
|
|
my_free(huff_counts[i].tree_buff);
|
2017-05-16 23:34:48 +02:00
|
|
|
delete_tree(&huff_counts[i].int_tree, 0);
|
2006-04-11 15:45:10 +02:00
|
|
|
}
|
|
|
|
}
|
2011-04-25 17:22:25 +02:00
|
|
|
my_free(huff_counts);
|
2006-04-11 15:45:10 +02:00
|
|
|
}
|
|
|
|
delete_queue(&queue); /* This is safe to free */
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Read through old file and gather some statistics */
|
|
|
|
|
|
|
|
static int get_statistic(PACK_MRG_INFO *mrg,HUFF_COUNTS *huff_counts)
|
|
|
|
{
|
|
|
|
int error;
|
2007-01-18 20:38:14 +01:00
|
|
|
uint length, null_bytes;
|
2006-04-11 15:45:10 +02:00
|
|
|
ulong reclength,max_blob_length;
|
2007-07-02 19:45:15 +02:00
|
|
|
uchar *record,*pos,*next_pos,*end_pos,*start_pos;
|
2006-04-11 15:45:10 +02:00
|
|
|
ha_rows record_count;
|
|
|
|
HUFF_COUNTS *count,*end_count;
|
|
|
|
TREE_ELEMENT *element;
|
2007-10-09 20:09:50 +02:00
|
|
|
ha_checksum(*calc_checksum)(MARIA_HA *, const uchar *);
|
2006-04-11 15:45:10 +02:00
|
|
|
DBUG_ENTER("get_statistic");
|
|
|
|
|
2007-01-18 20:38:14 +01:00
|
|
|
reclength= mrg->file[0]->s->base.reclength;
|
|
|
|
null_bytes= mrg->file[0]->s->base.null_bytes;
|
2015-08-11 13:03:25 +02:00
|
|
|
record=(uchar*) my_safe_alloca(reclength);
|
2006-04-11 15:45:10 +02:00
|
|
|
end_count=huff_counts+mrg->file[0]->s->base.fields;
|
|
|
|
record_count=0; glob_crc=0;
|
|
|
|
max_blob_length=0;
|
|
|
|
|
|
|
|
/* Check how to calculate checksum */
|
2007-01-18 20:38:14 +01:00
|
|
|
if (mrg->file[0]->s->data_file_type == STATIC_RECORD)
|
|
|
|
calc_checksum= _ma_static_checksum;
|
2007-10-09 20:09:50 +02:00
|
|
|
else
|
|
|
|
calc_checksum= _ma_checksum;
|
2006-04-11 15:45:10 +02:00
|
|
|
|
|
|
|
mrg_reset(mrg);
|
|
|
|
while ((error=mrg_rrnd(mrg,record)) != HA_ERR_END_OF_FILE)
|
|
|
|
{
|
|
|
|
ulong tot_blob_length=0;
|
|
|
|
if (! error)
|
|
|
|
{
|
|
|
|
/* glob_crc is a checksum over all bytes of all records. */
|
2007-01-18 20:38:14 +01:00
|
|
|
glob_crc+= (*calc_checksum)(mrg->file[0],record);
|
2006-04-11 15:45:10 +02:00
|
|
|
|
|
|
|
/* Count the incidence of values separately for every column. */
|
2007-01-18 20:38:14 +01:00
|
|
|
for (pos=record + null_bytes, count=huff_counts ;
|
2006-04-11 15:45:10 +02:00
|
|
|
count < end_count ;
|
|
|
|
count++,
|
|
|
|
pos=next_pos)
|
|
|
|
{
|
|
|
|
next_pos=end_pos=(start_pos=pos)+count->field_length;
|
|
|
|
|
|
|
|
/*
|
|
|
|
Put the whole column value in a tree if there is room for it.
|
|
|
|
'int_tree' is used to quickly check for duplicate values.
|
|
|
|
'tree_buff' collects as many distinct column values as
|
|
|
|
possible. If the field length is > 1, it is tree_buff_length,
|
|
|
|
else 2 bytes. Each value is 'field_length' bytes big. If there
|
|
|
|
are more distinct column values than fit into the buffer, we
|
|
|
|
give up with this tree. BLOBs and VARCHARs do not have a
|
|
|
|
tree_buff as it can only be used with fixed length columns.
|
|
|
|
For the special case of field length == 1, we handle only the
|
|
|
|
case that there is only one distinct value in the table(s).
|
|
|
|
Otherwise, we can have a maximum of 256 distinct values. This
|
|
|
|
is then handled by the normal Huffman tree build.
|
|
|
|
|
|
|
|
Another limit for collecting distinct column values is the
|
|
|
|
number of values itself. Since we would need to build a
|
|
|
|
Huffman tree for the values, we are limited by the 'IS_OFFSET'
|
|
|
|
constant. This constant expresses a bit which is used to
|
|
|
|
determine if a tree element holds a final value or an offset
|
|
|
|
to a child element. Hence, all values and offsets need to be
|
|
|
|
smaller than 'IS_OFFSET'. A tree element is implemented with
|
|
|
|
two integer values, one for the left branch and one for the
|
|
|
|
right branch. For the extreme case that the first element
|
|
|
|
points to the last element, the number of integers in the tree
|
|
|
|
must be less or equal to IS_OFFSET. So the number of elements
|
|
|
|
must be less or equal to IS_OFFSET / 2.
|
|
|
|
|
|
|
|
WARNING: At first, we insert a pointer into the record buffer
|
|
|
|
as the key for the tree. If we got a new distinct value, which
|
|
|
|
is really inserted into the tree, instead of being counted
|
|
|
|
only, we will copy the column value from the record buffer to
|
|
|
|
'tree_buff' and adjust the key pointer of the tree accordingly.
|
|
|
|
*/
|
|
|
|
if (count->tree_buff)
|
|
|
|
{
|
|
|
|
global_count=count;
|
|
|
|
if (!(element=tree_insert(&count->int_tree,pos, 0,
|
|
|
|
count->int_tree.custom_arg)) ||
|
|
|
|
(element->count == 1 &&
|
|
|
|
(count->tree_buff + tree_buff_length <
|
|
|
|
count->tree_pos + count->field_length)) ||
|
|
|
|
(count->int_tree.elements_in_tree > IS_OFFSET / 2) ||
|
|
|
|
(count->field_length == 1 &&
|
|
|
|
count->int_tree.elements_in_tree > 1))
|
|
|
|
{
|
2017-05-16 23:34:48 +02:00
|
|
|
delete_tree(&count->int_tree, 0);
|
2011-04-25 17:22:25 +02:00
|
|
|
my_free(count->tree_buff);
|
2006-04-11 15:45:10 +02:00
|
|
|
count->tree_buff=0;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
If tree_insert() succeeds, it either creates a new element
|
|
|
|
or increments the counter of an existing element.
|
|
|
|
*/
|
|
|
|
if (element->count == 1)
|
|
|
|
{
|
|
|
|
/* Copy the new column value into 'tree_buff'. */
|
|
|
|
memcpy(count->tree_pos,pos,(size_t) count->field_length);
|
|
|
|
/* Adjust the key pointer in the tree. */
|
|
|
|
tree_set_pointer(element,count->tree_pos);
|
|
|
|
/* Point behind the last column value so far. */
|
|
|
|
count->tree_pos+=count->field_length;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Save character counters and space-counts and zero-field-counts */
|
|
|
|
if (count->field_type == FIELD_NORMAL ||
|
|
|
|
count->field_type == FIELD_SKIP_ENDSPACE)
|
|
|
|
{
|
|
|
|
/* Ignore trailing space. */
|
|
|
|
for ( ; end_pos > pos ; end_pos--)
|
|
|
|
if (end_pos[-1] != ' ')
|
|
|
|
break;
|
|
|
|
/* Empty fields are just counted. Go to the next record. */
|
|
|
|
if (end_pos == pos)
|
|
|
|
{
|
|
|
|
count->empty_fields++;
|
|
|
|
count->max_zero_fill=0;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
/*
|
|
|
|
Count the total of all trailing spaces and the number of
|
|
|
|
short trailing spaces. Remember the longest trailing space.
|
|
|
|
*/
|
|
|
|
length= (uint) (next_pos-end_pos);
|
|
|
|
count->tot_end_space+=length;
|
|
|
|
if (length < 8)
|
|
|
|
count->end_space[length]++;
|
|
|
|
if (count->max_end_space < length)
|
|
|
|
count->max_end_space = length;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (count->field_type == FIELD_NORMAL ||
|
|
|
|
count->field_type == FIELD_SKIP_PRESPACE)
|
|
|
|
{
|
|
|
|
/* Ignore leading space. */
|
|
|
|
for (pos=start_pos; pos < end_pos ; pos++)
|
|
|
|
if (pos[0] != ' ')
|
|
|
|
break;
|
|
|
|
/* Empty fields are just counted. Go to the next record. */
|
|
|
|
if (end_pos == pos)
|
|
|
|
{
|
|
|
|
count->empty_fields++;
|
|
|
|
count->max_zero_fill=0;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
/*
|
|
|
|
Count the total of all leading spaces and the number of
|
|
|
|
short leading spaces. Remember the longest leading space.
|
|
|
|
*/
|
|
|
|
length= (uint) (pos-start_pos);
|
|
|
|
count->tot_pre_space+=length;
|
|
|
|
if (length < 8)
|
|
|
|
count->pre_space[length]++;
|
|
|
|
if (count->max_pre_space < length)
|
|
|
|
count->max_pre_space = length;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Calculate pos, end_pos, and max_length for variable length fields. */
|
|
|
|
if (count->field_type == FIELD_BLOB)
|
|
|
|
{
|
2007-04-19 12:18:56 +02:00
|
|
|
uint field_length=count->field_length -portable_sizeof_char_ptr;
|
2006-04-11 15:45:10 +02:00
|
|
|
ulong blob_length= _ma_calc_blob_length(field_length, start_pos);
|
2011-04-25 17:22:25 +02:00
|
|
|
memcpy(&pos, start_pos+field_length,sizeof(char*));
|
2006-04-11 15:45:10 +02:00
|
|
|
end_pos=pos+blob_length;
|
|
|
|
tot_blob_length+=blob_length;
|
|
|
|
set_if_bigger(count->max_length,blob_length);
|
|
|
|
}
|
|
|
|
else if (count->field_type == FIELD_VARCHAR)
|
|
|
|
{
|
|
|
|
uint pack_length= HA_VARCHAR_PACKLENGTH(count->field_length-1);
|
|
|
|
length= (pack_length == 1 ? (uint) *(uchar*) start_pos :
|
|
|
|
uint2korr(start_pos));
|
|
|
|
pos= start_pos+pack_length;
|
|
|
|
end_pos= pos+length;
|
|
|
|
set_if_bigger(count->max_length,length);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Evaluate 'max_zero_fill' for short fields. */
|
|
|
|
if (count->field_length <= 8 &&
|
|
|
|
(count->field_type == FIELD_NORMAL ||
|
|
|
|
count->field_type == FIELD_SKIP_ZERO))
|
|
|
|
{
|
|
|
|
uint i;
|
|
|
|
/* Zero fields are just counted. Go to the next record. */
|
2009-01-09 05:23:25 +01:00
|
|
|
if (!memcmp(start_pos, zero_string, count->field_length))
|
2006-04-11 15:45:10 +02:00
|
|
|
{
|
|
|
|
count->zero_fields++;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
/*
|
|
|
|
max_zero_fill starts with field_length. It is decreased every
|
|
|
|
time a shorter "zero trailer" is found. It is set to zero when
|
|
|
|
an empty field is found (see above). This suggests that the
|
|
|
|
variable should be called 'min_zero_fill'.
|
|
|
|
*/
|
|
|
|
for (i =0 ; i < count->max_zero_fill && ! end_pos[-1 - (int) i] ;
|
|
|
|
i++) ;
|
|
|
|
if (i < count->max_zero_fill)
|
|
|
|
count->max_zero_fill=i;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Ignore zero fields and check fields. */
|
|
|
|
if (count->field_type == FIELD_ZERO ||
|
|
|
|
count->field_type == FIELD_CHECK)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
/*
|
2007-07-02 19:45:15 +02:00
|
|
|
Count the incidence of every uchar value in the
|
2006-04-11 15:45:10 +02:00
|
|
|
significant field value.
|
|
|
|
*/
|
|
|
|
for ( ; pos < end_pos ; pos++)
|
|
|
|
count->counts[(uchar) *pos]++;
|
|
|
|
|
|
|
|
/* Step to next field. */
|
|
|
|
}
|
|
|
|
|
|
|
|
if (tot_blob_length > max_blob_length)
|
|
|
|
max_blob_length=tot_blob_length;
|
|
|
|
record_count++;
|
|
|
|
if (write_loop && record_count % WRITE_COUNT == 0)
|
|
|
|
{
|
2011-04-25 17:22:25 +02:00
|
|
|
printf("%lu\r", (ulong) record_count);
|
|
|
|
fflush(stdout);
|
2006-04-11 15:45:10 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
else if (error != HA_ERR_RECORD_DELETED)
|
|
|
|
{
|
2011-04-25 17:22:25 +02:00
|
|
|
fprintf(stderr, "Got error %d while reading rows\n", error);
|
2006-04-11 15:45:10 +02:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Step to next record. */
|
|
|
|
}
|
|
|
|
if (write_loop)
|
|
|
|
{
|
2011-04-25 17:22:25 +02:00
|
|
|
printf(" \r");
|
|
|
|
fflush(stdout);
|
2006-04-11 15:45:10 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
If --debug=d,fakebigcodes is set, fake the counts to get big Huffman
|
|
|
|
codes.
|
|
|
|
*/
|
|
|
|
DBUG_EXECUTE_IF("fakebigcodes", fakebigcodes(huff_counts, end_count););
|
|
|
|
|
|
|
|
DBUG_PRINT("info", ("Found the following number of incidents "
|
2007-07-02 19:45:15 +02:00
|
|
|
"of the uchar codes:"));
|
2006-04-11 15:45:10 +02:00
|
|
|
if (verbose >= 2)
|
2011-04-25 17:22:25 +02:00
|
|
|
printf("Found the following number of incidents "
|
|
|
|
"of the uchar codes:\n");
|
2006-04-11 15:45:10 +02:00
|
|
|
for (count= huff_counts ; count < end_count; count++)
|
|
|
|
{
|
|
|
|
uint idx;
|
|
|
|
my_off_t total_count;
|
|
|
|
char llbuf[32];
|
|
|
|
|
2006-12-20 18:58:35 +01:00
|
|
|
DBUG_PRINT("info", ("column: %3u", (uint) (count - huff_counts + 1)));
|
2006-04-11 15:45:10 +02:00
|
|
|
if (verbose >= 2)
|
2011-04-25 17:22:25 +02:00
|
|
|
printf("column: %3u\n", (uint) (count - huff_counts + 1));
|
2006-04-11 15:45:10 +02:00
|
|
|
if (count->tree_buff)
|
|
|
|
{
|
2006-12-20 18:58:35 +01:00
|
|
|
DBUG_PRINT("info", ("number of distinct values: %u",
|
|
|
|
(uint) ((count->tree_pos - count->tree_buff) /
|
|
|
|
count->field_length)));
|
2006-04-11 15:45:10 +02:00
|
|
|
if (verbose >= 2)
|
2011-04-25 17:22:25 +02:00
|
|
|
printf("number of distinct values: %u\n",
|
2006-12-20 18:58:35 +01:00
|
|
|
(uint) ((count->tree_pos - count->tree_buff) /
|
2011-04-25 17:22:25 +02:00
|
|
|
count->field_length));
|
2006-04-11 15:45:10 +02:00
|
|
|
}
|
|
|
|
total_count= 0;
|
|
|
|
for (idx= 0; idx < 256; idx++)
|
|
|
|
{
|
|
|
|
if (count->counts[idx])
|
|
|
|
{
|
|
|
|
total_count+= count->counts[idx];
|
|
|
|
DBUG_PRINT("info", ("counts[0x%02x]: %12s", idx,
|
|
|
|
llstr((longlong) count->counts[idx], llbuf)));
|
|
|
|
if (verbose >= 2)
|
2011-04-25 17:22:25 +02:00
|
|
|
printf("counts[0x%02x]: %12s\n", idx,
|
|
|
|
llstr((longlong) count->counts[idx], llbuf));
|
2006-04-11 15:45:10 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
DBUG_PRINT("info", ("total: %12s", llstr((longlong) total_count,
|
|
|
|
llbuf)));
|
|
|
|
if ((verbose >= 2) && total_count)
|
|
|
|
{
|
2011-04-25 17:22:25 +02:00
|
|
|
printf("total: %12s\n",
|
|
|
|
llstr((longlong) total_count, llbuf));
|
2006-04-11 15:45:10 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
mrg->records=record_count;
|
|
|
|
mrg->max_blob_length=max_blob_length;
|
2015-08-11 13:03:25 +02:00
|
|
|
my_safe_afree(record, reclength);
|
2006-04-11 15:45:10 +02:00
|
|
|
DBUG_RETURN(error != HA_ERR_END_OF_FILE);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int compare_huff_elements(void *not_used __attribute__((unused)),
|
2007-07-02 19:45:15 +02:00
|
|
|
uchar *a, uchar *b)
|
2006-04-11 15:45:10 +02:00
|
|
|
{
|
|
|
|
return *((my_off_t*) a) < *((my_off_t*) b) ? -1 :
|
|
|
|
(*((my_off_t*) a) == *((my_off_t*) b) ? 0 : 1);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Check each tree if we should use pre-space-compress, end-space-
|
|
|
|
compress, empty-field-compress or zero-field-compress */
|
|
|
|
|
|
|
|
static void check_counts(HUFF_COUNTS *huff_counts, uint trees,
|
|
|
|
my_off_t records)
|
|
|
|
{
|
|
|
|
uint space_fields,fill_zero_fields,field_count[(int) FIELD_enum_val_count];
|
|
|
|
my_off_t old_length,new_length,length;
|
|
|
|
DBUG_ENTER("check_counts");
|
|
|
|
|
2007-07-02 19:45:15 +02:00
|
|
|
bzero((uchar*) field_count,sizeof(field_count));
|
2006-04-11 15:45:10 +02:00
|
|
|
space_fields=fill_zero_fields=0;
|
|
|
|
|
|
|
|
for (; trees-- ; huff_counts++)
|
|
|
|
{
|
|
|
|
if (huff_counts->field_type == FIELD_BLOB)
|
|
|
|
{
|
|
|
|
huff_counts->length_bits=max_bit(huff_counts->max_length);
|
|
|
|
goto found_pack;
|
|
|
|
}
|
|
|
|
else if (huff_counts->field_type == FIELD_VARCHAR)
|
|
|
|
{
|
|
|
|
huff_counts->length_bits=max_bit(huff_counts->max_length);
|
|
|
|
goto found_pack;
|
|
|
|
}
|
|
|
|
else if (huff_counts->field_type == FIELD_CHECK)
|
|
|
|
{
|
|
|
|
huff_counts->bytes_packed=0;
|
|
|
|
huff_counts->counts[0]=0;
|
|
|
|
goto found_pack;
|
|
|
|
}
|
|
|
|
|
|
|
|
huff_counts->field_type=FIELD_NORMAL;
|
|
|
|
huff_counts->pack_type=0;
|
|
|
|
|
|
|
|
/* Check for zero-filled records (in this column), or zero records. */
|
|
|
|
if (huff_counts->zero_fields || ! records)
|
|
|
|
{
|
|
|
|
my_off_t old_space_count;
|
|
|
|
/*
|
|
|
|
If there are only zero filled records (in this column),
|
|
|
|
or no records at all, we are done.
|
|
|
|
*/
|
|
|
|
if (huff_counts->zero_fields == records)
|
|
|
|
{
|
|
|
|
huff_counts->field_type= FIELD_ZERO;
|
|
|
|
huff_counts->bytes_packed=0;
|
|
|
|
huff_counts->counts[0]=0;
|
|
|
|
goto found_pack;
|
|
|
|
}
|
|
|
|
/* Remeber the number of significant spaces. */
|
|
|
|
old_space_count=huff_counts->counts[' '];
|
|
|
|
/* Add all leading and trailing spaces. */
|
|
|
|
huff_counts->counts[' ']+= (huff_counts->tot_end_space +
|
|
|
|
huff_counts->tot_pre_space +
|
|
|
|
huff_counts->empty_fields *
|
|
|
|
huff_counts->field_length);
|
|
|
|
/* Check, what the compressed length of this would be. */
|
|
|
|
old_length=calc_packed_length(huff_counts,0)+records/8;
|
|
|
|
/* Get the number of zero bytes. */
|
|
|
|
length=huff_counts->zero_fields*huff_counts->field_length;
|
|
|
|
/* Add it to the counts. */
|
|
|
|
huff_counts->counts[0]+=length;
|
|
|
|
/* Check, what the compressed length of this would be. */
|
|
|
|
new_length=calc_packed_length(huff_counts,0);
|
|
|
|
/* If the compression without the zeroes would be shorter, we are done. */
|
|
|
|
if (old_length < new_length && huff_counts->field_length > 1)
|
|
|
|
{
|
|
|
|
huff_counts->field_type=FIELD_SKIP_ZERO;
|
|
|
|
huff_counts->counts[0]-=length;
|
|
|
|
huff_counts->bytes_packed=old_length- records/8;
|
|
|
|
goto found_pack;
|
|
|
|
}
|
|
|
|
/* Remove the insignificant spaces, but keep the zeroes. */
|
|
|
|
huff_counts->counts[' ']=old_space_count;
|
|
|
|
}
|
|
|
|
/* Check, what the compressed length of this column would be. */
|
|
|
|
huff_counts->bytes_packed=calc_packed_length(huff_counts,0);
|
|
|
|
|
|
|
|
/*
|
|
|
|
If there are enough empty records (in this column),
|
|
|
|
treating them specially may pay off.
|
|
|
|
*/
|
|
|
|
if (huff_counts->empty_fields)
|
|
|
|
{
|
|
|
|
if (huff_counts->field_length > 2 &&
|
|
|
|
huff_counts->empty_fields + (records - huff_counts->empty_fields)*
|
2013-03-25 23:03:13 +01:00
|
|
|
(1+max_bit(MY_MAX(huff_counts->max_pre_space,
|
2006-04-11 15:45:10 +02:00
|
|
|
huff_counts->max_end_space))) <
|
|
|
|
records * max_bit(huff_counts->field_length))
|
|
|
|
{
|
|
|
|
huff_counts->pack_type |= PACK_TYPE_SPACE_FIELDS;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
length=huff_counts->empty_fields*huff_counts->field_length;
|
|
|
|
if (huff_counts->tot_end_space || ! huff_counts->tot_pre_space)
|
|
|
|
{
|
|
|
|
huff_counts->tot_end_space+=length;
|
|
|
|
huff_counts->max_end_space=huff_counts->field_length;
|
|
|
|
if (huff_counts->field_length < 8)
|
|
|
|
huff_counts->end_space[huff_counts->field_length]+=
|
|
|
|
huff_counts->empty_fields;
|
|
|
|
}
|
|
|
|
if (huff_counts->tot_pre_space)
|
|
|
|
{
|
|
|
|
huff_counts->tot_pre_space+=length;
|
|
|
|
huff_counts->max_pre_space=huff_counts->field_length;
|
|
|
|
if (huff_counts->field_length < 8)
|
|
|
|
huff_counts->pre_space[huff_counts->field_length]+=
|
|
|
|
huff_counts->empty_fields;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
If there are enough trailing spaces (in this column),
|
|
|
|
treating them specially may pay off.
|
|
|
|
*/
|
|
|
|
if (huff_counts->tot_end_space)
|
|
|
|
{
|
|
|
|
huff_counts->counts[' ']+=huff_counts->tot_pre_space;
|
|
|
|
if (test_space_compress(huff_counts,records,huff_counts->max_end_space,
|
|
|
|
huff_counts->end_space,
|
|
|
|
huff_counts->tot_end_space,FIELD_SKIP_ENDSPACE))
|
|
|
|
goto found_pack;
|
|
|
|
huff_counts->counts[' ']-=huff_counts->tot_pre_space;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
If there are enough leading spaces (in this column),
|
|
|
|
treating them specially may pay off.
|
|
|
|
*/
|
|
|
|
if (huff_counts->tot_pre_space)
|
|
|
|
{
|
|
|
|
if (test_space_compress(huff_counts,records,huff_counts->max_pre_space,
|
|
|
|
huff_counts->pre_space,
|
|
|
|
huff_counts->tot_pre_space,FIELD_SKIP_PRESPACE))
|
|
|
|
goto found_pack;
|
|
|
|
}
|
|
|
|
|
|
|
|
found_pack: /* Found field-packing */
|
|
|
|
|
|
|
|
/* Test if we can use zero-fill */
|
|
|
|
|
|
|
|
if (huff_counts->max_zero_fill &&
|
|
|
|
(huff_counts->field_type == FIELD_NORMAL ||
|
|
|
|
huff_counts->field_type == FIELD_SKIP_ZERO))
|
|
|
|
{
|
|
|
|
huff_counts->counts[0]-=huff_counts->max_zero_fill*
|
|
|
|
(huff_counts->field_type == FIELD_SKIP_ZERO ?
|
|
|
|
records - huff_counts->zero_fields : records);
|
|
|
|
huff_counts->pack_type|=PACK_TYPE_ZERO_FILL;
|
|
|
|
huff_counts->bytes_packed=calc_packed_length(huff_counts,0);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Test if intervall-field is better */
|
|
|
|
|
|
|
|
if (huff_counts->tree_buff)
|
|
|
|
{
|
|
|
|
HUFF_TREE tree;
|
|
|
|
|
|
|
|
DBUG_EXECUTE_IF("forceintervall",
|
|
|
|
huff_counts->bytes_packed= ~ (my_off_t) 0;);
|
|
|
|
tree.element_buffer=0;
|
|
|
|
if (!make_huff_tree(&tree,huff_counts) &&
|
|
|
|
tree.bytes_packed+tree.tree_pack_length < huff_counts->bytes_packed)
|
|
|
|
{
|
|
|
|
if (tree.elements == 1)
|
|
|
|
huff_counts->field_type=FIELD_CONSTANT;
|
|
|
|
else
|
|
|
|
huff_counts->field_type=FIELD_INTERVALL;
|
|
|
|
huff_counts->pack_type=0;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2011-04-25 17:22:25 +02:00
|
|
|
my_free(huff_counts->tree_buff);
|
2017-05-16 23:34:48 +02:00
|
|
|
delete_tree(&huff_counts->int_tree, 0);
|
2006-04-11 15:45:10 +02:00
|
|
|
huff_counts->tree_buff=0;
|
|
|
|
}
|
|
|
|
if (tree.element_buffer)
|
2011-04-25 17:22:25 +02:00
|
|
|
my_free(tree.element_buffer);
|
2006-04-11 15:45:10 +02:00
|
|
|
}
|
|
|
|
if (huff_counts->pack_type & PACK_TYPE_SPACE_FIELDS)
|
|
|
|
space_fields++;
|
|
|
|
if (huff_counts->pack_type & PACK_TYPE_ZERO_FILL)
|
|
|
|
fill_zero_fields++;
|
|
|
|
field_count[huff_counts->field_type]++;
|
|
|
|
}
|
|
|
|
DBUG_PRINT("info", ("normal: %3d empty-space: %3d "
|
|
|
|
"empty-zero: %3d empty-fill: %3d",
|
|
|
|
field_count[FIELD_NORMAL],space_fields,
|
|
|
|
field_count[FIELD_SKIP_ZERO],fill_zero_fields));
|
|
|
|
DBUG_PRINT("info", ("pre-space: %3d end-space: %3d "
|
|
|
|
"intervall-fields: %3d zero: %3d",
|
|
|
|
field_count[FIELD_SKIP_PRESPACE],
|
|
|
|
field_count[FIELD_SKIP_ENDSPACE],
|
|
|
|
field_count[FIELD_INTERVALL],
|
|
|
|
field_count[FIELD_ZERO]));
|
|
|
|
if (verbose)
|
2011-04-25 17:22:25 +02:00
|
|
|
printf("\nnormal: %3d empty-space: %3d "
|
2006-04-11 15:45:10 +02:00
|
|
|
"empty-zero: %3d empty-fill: %3d\n"
|
|
|
|
"pre-space: %3d end-space: %3d "
|
|
|
|
"intervall-fields: %3d zero: %3d\n",
|
|
|
|
field_count[FIELD_NORMAL],space_fields,
|
|
|
|
field_count[FIELD_SKIP_ZERO],fill_zero_fields,
|
|
|
|
field_count[FIELD_SKIP_PRESPACE],
|
|
|
|
field_count[FIELD_SKIP_ENDSPACE],
|
|
|
|
field_count[FIELD_INTERVALL],
|
2011-04-25 17:22:25 +02:00
|
|
|
field_count[FIELD_ZERO]);
|
2006-04-11 15:45:10 +02:00
|
|
|
DBUG_VOID_RETURN;
|
|
|
|
}
|
|
|
|
|
2007-01-18 20:38:14 +01:00
|
|
|
|
|
|
|
/* Test if we can use space-compression and empty-field-compression */
|
2006-04-11 15:45:10 +02:00
|
|
|
|
|
|
|
static int
|
|
|
|
test_space_compress(HUFF_COUNTS *huff_counts, my_off_t records,
|
|
|
|
uint max_space_length, my_off_t *space_counts,
|
|
|
|
my_off_t tot_space_count, enum en_fieldtype field_type)
|
|
|
|
{
|
|
|
|
int min_pos;
|
|
|
|
uint length_bits,i;
|
|
|
|
my_off_t space_count,min_space_count,min_pack,new_length,skip;
|
|
|
|
|
|
|
|
length_bits=max_bit(max_space_length);
|
|
|
|
|
|
|
|
/* Default no end_space-packing */
|
|
|
|
space_count=huff_counts->counts[(uint) ' '];
|
|
|
|
min_space_count= (huff_counts->counts[(uint) ' ']+= tot_space_count);
|
|
|
|
min_pack=calc_packed_length(huff_counts,0);
|
|
|
|
min_pos= -2;
|
|
|
|
huff_counts->counts[(uint) ' ']=space_count;
|
|
|
|
|
|
|
|
/* Test with allways space-count */
|
|
|
|
new_length=huff_counts->bytes_packed+length_bits*records/8;
|
|
|
|
if (new_length+1 < min_pack)
|
|
|
|
{
|
|
|
|
min_pos= -1;
|
|
|
|
min_pack=new_length;
|
|
|
|
min_space_count=space_count;
|
|
|
|
}
|
|
|
|
/* Test with length-flag */
|
|
|
|
for (skip=0L, i=0 ; i < 8 ; i++)
|
|
|
|
{
|
|
|
|
if (space_counts[i])
|
|
|
|
{
|
|
|
|
if (i)
|
|
|
|
huff_counts->counts[(uint) ' ']+=space_counts[i];
|
|
|
|
skip+=huff_counts->pre_space[i];
|
|
|
|
new_length=calc_packed_length(huff_counts,0)+
|
|
|
|
(records+(records-skip)*(1+length_bits))/8;
|
|
|
|
if (new_length < min_pack)
|
|
|
|
{
|
|
|
|
min_pos=(int) i;
|
|
|
|
min_pack=new_length;
|
|
|
|
min_space_count=huff_counts->counts[(uint) ' '];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
huff_counts->counts[(uint) ' ']=min_space_count;
|
|
|
|
huff_counts->bytes_packed=min_pack;
|
|
|
|
switch (min_pos) {
|
|
|
|
case -2:
|
|
|
|
return(0); /* No space-compress */
|
|
|
|
case -1: /* Always space-count */
|
|
|
|
huff_counts->field_type=field_type;
|
|
|
|
huff_counts->min_space=0;
|
|
|
|
huff_counts->length_bits=max_bit(max_space_length);
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
huff_counts->field_type=field_type;
|
|
|
|
huff_counts->min_space=(uint) min_pos;
|
|
|
|
huff_counts->pack_type|=PACK_TYPE_SELECTED;
|
|
|
|
huff_counts->length_bits=max_bit(max_space_length);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
return(1); /* Using space-compress */
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/* Make a huff_tree of each huff_count */
|
|
|
|
|
|
|
|
static HUFF_TREE* make_huff_trees(HUFF_COUNTS *huff_counts, uint trees)
|
|
|
|
{
|
|
|
|
uint tree;
|
|
|
|
HUFF_TREE *huff_tree;
|
|
|
|
DBUG_ENTER("make_huff_trees");
|
|
|
|
|
|
|
|
if (!(huff_tree=(HUFF_TREE*) my_malloc(trees*sizeof(HUFF_TREE),
|
|
|
|
MYF(MY_WME | MY_ZEROFILL))))
|
|
|
|
DBUG_RETURN(0);
|
|
|
|
|
|
|
|
for (tree=0 ; tree < trees ; tree++)
|
|
|
|
{
|
|
|
|
if (make_huff_tree(huff_tree+tree,huff_counts+tree))
|
|
|
|
{
|
|
|
|
while (tree--)
|
2011-04-25 17:22:25 +02:00
|
|
|
my_free(huff_tree[tree].element_buffer);
|
|
|
|
my_free(huff_tree);
|
2006-04-11 15:45:10 +02:00
|
|
|
DBUG_RETURN(0);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
DBUG_RETURN(huff_tree);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
Build a Huffman tree.
|
|
|
|
|
|
|
|
SYNOPSIS
|
|
|
|
make_huff_tree()
|
|
|
|
huff_tree The Huffman tree.
|
|
|
|
huff_counts The counts.
|
|
|
|
|
|
|
|
DESCRIPTION
|
|
|
|
Build a Huffman tree according to huff_counts->counts or
|
|
|
|
huff_counts->tree_buff. tree_buff, if non-NULL contains up to
|
|
|
|
tree_buff_length of distinct column values. In that case, whole
|
|
|
|
values can be Huffman encoded instead of single bytes.
|
|
|
|
|
|
|
|
RETURN
|
|
|
|
0 OK
|
|
|
|
!= 0 Error
|
|
|
|
*/
|
|
|
|
|
|
|
|
static int make_huff_tree(HUFF_TREE *huff_tree, HUFF_COUNTS *huff_counts)
|
|
|
|
{
|
|
|
|
uint i,found,bits_packed,first,last;
|
|
|
|
my_off_t bytes_packed;
|
|
|
|
HUFF_ELEMENT *a,*b,*new_huff_el;
|
|
|
|
|
|
|
|
first=last=0;
|
|
|
|
if (huff_counts->tree_buff)
|
|
|
|
{
|
|
|
|
/* Calculate the number of distinct values in tree_buff. */
|
|
|
|
found= (uint) (huff_counts->tree_pos - huff_counts->tree_buff) /
|
|
|
|
huff_counts->field_length;
|
|
|
|
first=0; last=found-1;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2007-07-02 19:45:15 +02:00
|
|
|
/* Count the number of uchar codes found in the column. */
|
2006-04-11 15:45:10 +02:00
|
|
|
for (i=found=0 ; i < 256 ; i++)
|
|
|
|
{
|
|
|
|
if (huff_counts->counts[i])
|
|
|
|
{
|
|
|
|
if (! found++)
|
|
|
|
first=i;
|
|
|
|
last=i;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (found < 2)
|
|
|
|
found=2;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* When using 'tree_buff' we can have more that 256 values. */
|
|
|
|
if (queue.max_elements < found)
|
|
|
|
{
|
|
|
|
delete_queue(&queue);
|
2010-07-16 09:33:01 +02:00
|
|
|
if (init_queue(&queue,found, 0, 0, compare_huff_elements, 0, 0, 0))
|
2006-04-11 15:45:10 +02:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Allocate or reallocate an element buffer for the Huffman tree. */
|
|
|
|
if (!huff_tree->element_buffer)
|
|
|
|
{
|
|
|
|
if (!(huff_tree->element_buffer=
|
|
|
|
(HUFF_ELEMENT*) my_malloc(found*2*sizeof(HUFF_ELEMENT),MYF(MY_WME))))
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
HUFF_ELEMENT *temp;
|
|
|
|
if (!(temp=
|
2007-07-02 19:45:15 +02:00
|
|
|
(HUFF_ELEMENT*) my_realloc((uchar*) huff_tree->element_buffer,
|
2006-04-11 15:45:10 +02:00
|
|
|
found*2*sizeof(HUFF_ELEMENT),
|
|
|
|
MYF(MY_WME))))
|
|
|
|
return 1;
|
|
|
|
huff_tree->element_buffer=temp;
|
|
|
|
}
|
|
|
|
|
|
|
|
huff_counts->tree=huff_tree;
|
|
|
|
huff_tree->counts=huff_counts;
|
|
|
|
huff_tree->min_chr=first;
|
|
|
|
huff_tree->max_chr=last;
|
|
|
|
huff_tree->char_bits=max_bit(last-first);
|
|
|
|
huff_tree->offset_bits=max_bit(found-1)+1;
|
|
|
|
|
|
|
|
if (huff_counts->tree_buff)
|
|
|
|
{
|
|
|
|
huff_tree->elements=0;
|
|
|
|
huff_tree->tree_pack_length=(1+15+16+5+5+
|
|
|
|
(huff_tree->char_bits+1)*found+
|
|
|
|
(huff_tree->offset_bits+1)*
|
|
|
|
(found-2)+7)/8 +
|
|
|
|
(uint) (huff_tree->counts->tree_pos-
|
|
|
|
huff_tree->counts->tree_buff);
|
|
|
|
/*
|
|
|
|
Put a HUFF_ELEMENT into the queue for every distinct column value.
|
|
|
|
|
|
|
|
tree_walk() calls save_counts_in_queue() for every element in
|
|
|
|
'int_tree'. This takes elements from the target trees element
|
|
|
|
buffer and places references to them into the buffer of the
|
|
|
|
priority queue. We insert in column value order, but the order is
|
|
|
|
in fact irrelevant here. We will establish the correct order
|
|
|
|
later.
|
|
|
|
*/
|
|
|
|
tree_walk(&huff_counts->int_tree,
|
|
|
|
(int (*)(void*, element_count,void*)) save_counts_in_queue,
|
2007-07-02 19:45:15 +02:00
|
|
|
(uchar*) huff_tree, left_root_right);
|
2006-04-11 15:45:10 +02:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
huff_tree->elements=found;
|
|
|
|
huff_tree->tree_pack_length=(9+9+5+5+
|
|
|
|
(huff_tree->char_bits+1)*found+
|
|
|
|
(huff_tree->offset_bits+1)*
|
|
|
|
(found-2)+7)/8;
|
|
|
|
/*
|
2007-07-02 19:45:15 +02:00
|
|
|
Put a HUFF_ELEMENT into the queue for every uchar code found in the column.
|
2006-04-11 15:45:10 +02:00
|
|
|
|
|
|
|
The elements are taken from the target trees element buffer.
|
|
|
|
Instead of using queue_insert(), we just place references to the
|
|
|
|
elements into the buffer of the priority queue. We insert in byte
|
|
|
|
value order, but the order is in fact irrelevant here. We will
|
|
|
|
establish the correct order later.
|
|
|
|
*/
|
|
|
|
for (i=first, found=0 ; i <= last ; i++)
|
|
|
|
{
|
|
|
|
if (huff_counts->counts[i])
|
|
|
|
{
|
|
|
|
new_huff_el=huff_tree->element_buffer+(found++);
|
|
|
|
new_huff_el->count=huff_counts->counts[i];
|
|
|
|
new_huff_el->a.leaf.null=0;
|
|
|
|
new_huff_el->a.leaf.element_nr=i;
|
2007-07-02 19:45:15 +02:00
|
|
|
queue.root[found]=(uchar*) new_huff_el;
|
2006-04-11 15:45:10 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
/*
|
2007-07-02 19:45:15 +02:00
|
|
|
If there is only a single uchar value in this field in all records,
|
2006-04-11 15:45:10 +02:00
|
|
|
add a second element with zero incidence. This is required to enter
|
|
|
|
the loop, which builds the Huffman tree.
|
|
|
|
*/
|
|
|
|
while (found < 2)
|
|
|
|
{
|
|
|
|
new_huff_el=huff_tree->element_buffer+(found++);
|
|
|
|
new_huff_el->count=0;
|
|
|
|
new_huff_el->a.leaf.null=0;
|
|
|
|
if (last)
|
|
|
|
new_huff_el->a.leaf.element_nr=huff_tree->min_chr=last-1;
|
|
|
|
else
|
|
|
|
new_huff_el->a.leaf.element_nr=huff_tree->max_chr=last+1;
|
2007-07-02 19:45:15 +02:00
|
|
|
queue.root[found]=(uchar*) new_huff_el;
|
2006-04-11 15:45:10 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Make a queue from the queue buffer. */
|
|
|
|
queue.elements=found;
|
|
|
|
|
|
|
|
/*
|
|
|
|
Make a priority queue from the queue. Construct its index so that we
|
|
|
|
have a partially ordered tree.
|
|
|
|
*/
|
2010-07-16 09:33:01 +02:00
|
|
|
queue_fix(&queue);
|
2006-04-11 15:45:10 +02:00
|
|
|
|
|
|
|
/* The Huffman algorithm. */
|
|
|
|
bytes_packed=0; bits_packed=0;
|
|
|
|
for (i=1 ; i < found ; i++)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
Pop the top element from the queue (the one with the least incidence).
|
|
|
|
Popping from a priority queue includes a re-ordering of the queue,
|
|
|
|
to get the next least incidence element to the top.
|
|
|
|
*/
|
2010-07-16 09:33:01 +02:00
|
|
|
a=(HUFF_ELEMENT*) queue_remove_top(&queue);
|
|
|
|
/* Copy the next least incidence element */
|
|
|
|
b=(HUFF_ELEMENT*) queue_top(&queue);
|
2006-04-11 15:45:10 +02:00
|
|
|
/* Get a new element from the element buffer. */
|
|
|
|
new_huff_el=huff_tree->element_buffer+found+i;
|
|
|
|
/* The new element gets the sum of the two least incidence elements. */
|
|
|
|
new_huff_el->count=a->count+b->count;
|
|
|
|
/*
|
|
|
|
The Huffman algorithm assigns another bit to the code for a byte
|
|
|
|
every time that bytes incidence is combined (directly or indirectly)
|
|
|
|
to a new element as one of the two least incidence elements.
|
2007-07-02 19:45:15 +02:00
|
|
|
This means that one more bit per incidence of that uchar is required
|
2006-04-11 15:45:10 +02:00
|
|
|
in the resulting file. So we add the new combined incidence as the
|
|
|
|
number of bits by which the result grows.
|
|
|
|
*/
|
|
|
|
bits_packed+=(uint) (new_huff_el->count & 7);
|
|
|
|
bytes_packed+=new_huff_el->count/8;
|
|
|
|
/* The new element points to its children, lesser in left. */
|
|
|
|
new_huff_el->a.nod.left=a;
|
|
|
|
new_huff_el->a.nod.right=b;
|
|
|
|
/*
|
|
|
|
Replace the copied top element by the new element and re-order the
|
|
|
|
queue.
|
|
|
|
*/
|
2010-07-16 09:33:01 +02:00
|
|
|
queue_top(&queue)= (uchar*) new_huff_el;
|
|
|
|
queue_replace_top(&queue);
|
2006-04-11 15:45:10 +02:00
|
|
|
}
|
|
|
|
huff_tree->root=(HUFF_ELEMENT*) queue.root[1];
|
|
|
|
huff_tree->bytes_packed=bytes_packed+(bits_packed+7)/8;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int compare_tree(void* cmp_arg __attribute__((unused)),
|
|
|
|
register const uchar *s, register const uchar *t)
|
|
|
|
{
|
|
|
|
uint length;
|
|
|
|
for (length=global_count->field_length; length-- ;)
|
|
|
|
if (*s++ != *t++)
|
|
|
|
return (int) s[-1] - (int) t[-1];
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
Organize distinct column values and their incidences into a priority queue.
|
|
|
|
|
|
|
|
SYNOPSIS
|
|
|
|
save_counts_in_queue()
|
|
|
|
key The column value.
|
|
|
|
count The incidence of this value.
|
|
|
|
tree The Huffman tree to be built later.
|
|
|
|
|
|
|
|
DESCRIPTION
|
|
|
|
We use the element buffer of the targeted tree. The distinct column
|
|
|
|
values are organized in a priority queue first. The Huffman
|
|
|
|
algorithm will later organize the elements into a Huffman tree. For
|
|
|
|
the time being, we just place references to the elements into the
|
|
|
|
queue buffer. The buffer will later be organized into a priority
|
|
|
|
queue.
|
|
|
|
|
|
|
|
RETURN
|
|
|
|
0
|
|
|
|
*/
|
|
|
|
|
2007-07-02 19:45:15 +02:00
|
|
|
static int save_counts_in_queue(uchar *key, element_count count,
|
2006-04-11 15:45:10 +02:00
|
|
|
HUFF_TREE *tree)
|
|
|
|
{
|
|
|
|
HUFF_ELEMENT *new_huff_el;
|
|
|
|
|
|
|
|
new_huff_el=tree->element_buffer+(tree->elements++);
|
|
|
|
new_huff_el->count=count;
|
|
|
|
new_huff_el->a.leaf.null=0;
|
|
|
|
new_huff_el->a.leaf.element_nr= (uint) (key- tree->counts->tree_buff) /
|
|
|
|
tree->counts->field_length;
|
2007-07-02 19:45:15 +02:00
|
|
|
queue.root[tree->elements]=(uchar*) new_huff_el;
|
2006-04-11 15:45:10 +02:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
Calculate length of file if given counts should be used.
|
|
|
|
|
|
|
|
SYNOPSIS
|
|
|
|
calc_packed_length()
|
|
|
|
huff_counts The counts for a column of the table(s).
|
|
|
|
add_tree_lenght If the decode tree length should be added.
|
|
|
|
|
|
|
|
DESCRIPTION
|
|
|
|
We need to follow the Huffman algorithm until we know, how many bits
|
2007-07-02 19:45:15 +02:00
|
|
|
are required for each uchar code. But we do not need the resulting
|
2006-04-11 15:45:10 +02:00
|
|
|
Huffman tree. Hence, we can leave out some steps which are essential
|
|
|
|
in make_huff_tree().
|
|
|
|
|
|
|
|
RETURN
|
|
|
|
Number of bytes required to compress this table column.
|
|
|
|
*/
|
|
|
|
|
|
|
|
static my_off_t calc_packed_length(HUFF_COUNTS *huff_counts,
|
|
|
|
uint add_tree_lenght)
|
|
|
|
{
|
|
|
|
uint i,found,bits_packed,first,last;
|
|
|
|
my_off_t bytes_packed;
|
|
|
|
HUFF_ELEMENT element_buffer[256];
|
|
|
|
DBUG_ENTER("calc_packed_length");
|
|
|
|
|
|
|
|
/*
|
|
|
|
WARNING: We use a small hack for efficiency: Instead of placing
|
|
|
|
references to HUFF_ELEMENTs into the queue, we just insert
|
2007-07-02 19:45:15 +02:00
|
|
|
references to the counts of the uchar codes which appeared in this
|
2006-04-11 15:45:10 +02:00
|
|
|
table column. During the Huffman algorithm they are successively
|
|
|
|
replaced by references to HUFF_ELEMENTs. This works, because
|
|
|
|
HUFF_ELEMENTs have the incidence count at their beginning.
|
|
|
|
Regardless, wether the queue array contains references to counts of
|
|
|
|
type my_off_t or references to HUFF_ELEMENTs which have the count of
|
|
|
|
type my_off_t at their beginning, it always points to a count of the
|
|
|
|
same type.
|
|
|
|
|
|
|
|
Instead of using queue_insert(), we just copy the references into
|
2007-07-02 19:45:15 +02:00
|
|
|
the buffer of the priority queue. We insert in uchar value order, but
|
2006-04-11 15:45:10 +02:00
|
|
|
the order is in fact irrelevant here. We will establish the correct
|
|
|
|
order later.
|
|
|
|
*/
|
|
|
|
first=last=0;
|
|
|
|
for (i=found=0 ; i < 256 ; i++)
|
|
|
|
{
|
|
|
|
if (huff_counts->counts[i])
|
|
|
|
{
|
|
|
|
if (! found++)
|
|
|
|
first=i;
|
|
|
|
last=i;
|
|
|
|
/* We start with root[1], which is the queues top element. */
|
2007-07-02 19:45:15 +02:00
|
|
|
queue.root[found]=(uchar*) &huff_counts->counts[i];
|
2006-04-11 15:45:10 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
if (!found)
|
|
|
|
DBUG_RETURN(0); /* Empty tree */
|
|
|
|
/*
|
2007-07-02 19:45:15 +02:00
|
|
|
If there is only a single uchar value in this field in all records,
|
2006-04-11 15:45:10 +02:00
|
|
|
add a second element with zero incidence. This is required to enter
|
|
|
|
the loop, which follows the Huffman algorithm.
|
|
|
|
*/
|
|
|
|
if (found < 2)
|
2007-07-02 19:45:15 +02:00
|
|
|
queue.root[++found]=(uchar*) &huff_counts->counts[last ? 0 : 1];
|
2006-04-11 15:45:10 +02:00
|
|
|
|
|
|
|
/* Make a queue from the queue buffer. */
|
|
|
|
queue.elements=found;
|
|
|
|
|
|
|
|
bytes_packed=0; bits_packed=0;
|
|
|
|
/* Add the length of the coding table, which would become part of the file. */
|
|
|
|
if (add_tree_lenght)
|
|
|
|
bytes_packed=(8+9+5+5+(max_bit(last-first)+1)*found+
|
|
|
|
(max_bit(found-1)+1+1)*(found-2) +7)/8;
|
|
|
|
|
|
|
|
/*
|
|
|
|
Make a priority queue from the queue. Construct its index so that we
|
|
|
|
have a partially ordered tree.
|
|
|
|
*/
|
2010-07-16 09:33:01 +02:00
|
|
|
queue_fix(&queue);
|
2006-04-11 15:45:10 +02:00
|
|
|
|
|
|
|
/* The Huffman algorithm. */
|
|
|
|
for (i=0 ; i < found-1 ; i++)
|
|
|
|
{
|
|
|
|
my_off_t *a;
|
|
|
|
my_off_t *b;
|
|
|
|
HUFF_ELEMENT *new_huff_el;
|
|
|
|
|
|
|
|
/*
|
|
|
|
Pop the top element from the queue (the one with the least
|
|
|
|
incidence). Popping from a priority queue includes a re-ordering
|
|
|
|
of the queue, to get the next least incidence element to the top.
|
|
|
|
*/
|
2010-07-16 09:33:01 +02:00
|
|
|
a= (my_off_t*) queue_remove_top(&queue);
|
|
|
|
/* Copy the next least incidence element. */
|
|
|
|
b= (my_off_t*) queue_top(&queue);
|
2006-04-11 15:45:10 +02:00
|
|
|
/* Create a new element in a local (automatic) buffer. */
|
|
|
|
new_huff_el= element_buffer + i;
|
|
|
|
/* The new element gets the sum of the two least incidence elements. */
|
|
|
|
new_huff_el->count= *a + *b;
|
|
|
|
/*
|
|
|
|
The Huffman algorithm assigns another bit to the code for a byte
|
|
|
|
every time that bytes incidence is combined (directly or indirectly)
|
|
|
|
to a new element as one of the two least incidence elements.
|
2007-07-02 19:45:15 +02:00
|
|
|
This means that one more bit per incidence of that uchar is required
|
2006-04-11 15:45:10 +02:00
|
|
|
in the resulting file. So we add the new combined incidence as the
|
|
|
|
number of bits by which the result grows.
|
|
|
|
*/
|
|
|
|
bits_packed+=(uint) (new_huff_el->count & 7);
|
|
|
|
bytes_packed+=new_huff_el->count/8;
|
|
|
|
/*
|
|
|
|
Replace the copied top element by the new element and re-order the
|
|
|
|
queue. This successively replaces the references to counts by
|
|
|
|
references to HUFF_ELEMENTs.
|
|
|
|
*/
|
2010-07-16 09:33:01 +02:00
|
|
|
queue_top(&queue)= (uchar*) new_huff_el;
|
|
|
|
queue_replace_top(&queue);
|
2006-04-11 15:45:10 +02:00
|
|
|
}
|
|
|
|
DBUG_RETURN(bytes_packed+(bits_packed+7)/8);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/* Remove trees that don't give any compression */
|
|
|
|
|
|
|
|
static uint join_same_trees(HUFF_COUNTS *huff_counts, uint trees)
|
|
|
|
{
|
|
|
|
uint k,tree_number;
|
|
|
|
HUFF_COUNTS count,*i,*j,*last_count;
|
|
|
|
|
|
|
|
last_count=huff_counts+trees;
|
|
|
|
for (tree_number=0, i=huff_counts ; i < last_count ; i++)
|
|
|
|
{
|
|
|
|
if (!i->tree->tree_number)
|
|
|
|
{
|
|
|
|
i->tree->tree_number= ++tree_number;
|
|
|
|
if (i->tree_buff)
|
|
|
|
continue; /* Don't join intervall */
|
|
|
|
for (j=i+1 ; j < last_count ; j++)
|
|
|
|
{
|
|
|
|
if (! j->tree->tree_number && ! j->tree_buff)
|
|
|
|
{
|
|
|
|
for (k=0 ; k < 256 ; k++)
|
|
|
|
count.counts[k]=i->counts[k]+j->counts[k];
|
|
|
|
if (calc_packed_length(&count,1) <=
|
|
|
|
i->tree->bytes_packed + j->tree->bytes_packed+
|
|
|
|
i->tree->tree_pack_length+j->tree->tree_pack_length+
|
|
|
|
ALLOWED_JOIN_DIFF)
|
|
|
|
{
|
2011-04-25 17:22:25 +02:00
|
|
|
memcpy(i->counts,(uchar*) count.counts, sizeof(count.counts[0])*256);
|
|
|
|
my_free(j->tree->element_buffer);
|
2006-04-11 15:45:10 +02:00
|
|
|
j->tree->element_buffer=0;
|
|
|
|
j->tree=i->tree;
|
2007-07-02 19:45:15 +02:00
|
|
|
bmove((uchar*) i->counts,(uchar*) count.counts,
|
2006-04-11 15:45:10 +02:00
|
|
|
sizeof(count.counts[0])*256);
|
|
|
|
if (make_huff_tree(i->tree,i))
|
|
|
|
return (uint) -1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
DBUG_PRINT("info", ("Original trees: %d After join: %d",
|
|
|
|
trees, tree_number));
|
|
|
|
if (verbose)
|
2011-04-25 17:22:25 +02:00
|
|
|
printf("Original trees: %d After join: %d\n", trees, tree_number);
|
2006-04-11 15:45:10 +02:00
|
|
|
return tree_number; /* Return trees left */
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
Fill in huff_tree encode tables.
|
|
|
|
|
|
|
|
SYNOPSIS
|
|
|
|
make_huff_decode_table()
|
|
|
|
huff_tree An array of HUFF_TREE which are to be encoded.
|
|
|
|
trees The number of HUFF_TREE in the array.
|
|
|
|
|
|
|
|
RETURN
|
|
|
|
0 success
|
|
|
|
!= 0 error
|
|
|
|
*/
|
|
|
|
|
|
|
|
static int make_huff_decode_table(HUFF_TREE *huff_tree, uint trees)
|
|
|
|
{
|
|
|
|
uint elements;
|
|
|
|
for ( ; trees-- ; huff_tree++)
|
|
|
|
{
|
|
|
|
if (huff_tree->tree_number > 0)
|
|
|
|
{
|
|
|
|
elements=huff_tree->counts->tree_buff ? huff_tree->elements : 256;
|
|
|
|
if (!(huff_tree->code =
|
|
|
|
(ulonglong*) my_malloc(elements*
|
|
|
|
(sizeof(ulonglong) + sizeof(uchar)),
|
|
|
|
MYF(MY_WME | MY_ZEROFILL))))
|
|
|
|
return 1;
|
|
|
|
huff_tree->code_len=(uchar*) (huff_tree->code+elements);
|
|
|
|
make_traverse_code_tree(huff_tree, huff_tree->root,
|
2013-04-07 14:00:16 +02:00
|
|
|
8 * sizeof(ulonglong), 0);
|
2006-04-11 15:45:10 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static void make_traverse_code_tree(HUFF_TREE *huff_tree,
|
|
|
|
HUFF_ELEMENT *element,
|
|
|
|
uint size, ulonglong code)
|
|
|
|
{
|
|
|
|
uint chr;
|
|
|
|
if (!element->a.leaf.null)
|
|
|
|
{
|
|
|
|
chr=element->a.leaf.element_nr;
|
|
|
|
huff_tree->code_len[chr]= (uchar) (8 * sizeof(ulonglong) - size);
|
|
|
|
huff_tree->code[chr]= (code >> size);
|
|
|
|
if (huff_tree->height < 8 * sizeof(ulonglong) - size)
|
|
|
|
huff_tree->height= 8 * sizeof(ulonglong) - size;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
size--;
|
|
|
|
make_traverse_code_tree(huff_tree,element->a.nod.left,size,code);
|
|
|
|
make_traverse_code_tree(huff_tree, element->a.nod.right, size,
|
|
|
|
code + (((ulonglong) 1) << size));
|
|
|
|
}
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
Convert a value into binary digits.
|
|
|
|
|
|
|
|
SYNOPSIS
|
|
|
|
bindigits()
|
|
|
|
value The value.
|
|
|
|
length The number of low order bits to convert.
|
|
|
|
|
|
|
|
NOTE
|
|
|
|
The result string is in static storage. It is reused on every call.
|
|
|
|
So you cannot use it twice in one expression.
|
|
|
|
|
|
|
|
RETURN
|
|
|
|
A pointer to a static NUL-terminated string.
|
|
|
|
*/
|
|
|
|
|
|
|
|
static char *bindigits(ulonglong value, uint bits)
|
|
|
|
{
|
|
|
|
static char digits[72];
|
|
|
|
char *ptr= digits;
|
|
|
|
uint idx= bits;
|
|
|
|
|
|
|
|
DBUG_ASSERT(idx < sizeof(digits));
|
|
|
|
while (idx)
|
2006-12-19 19:15:53 +01:00
|
|
|
*(ptr++)= '0' + ((char) (value >> (--idx)) & (char) 1);
|
2006-04-11 15:45:10 +02:00
|
|
|
*ptr= '\0';
|
|
|
|
return digits;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
Convert a value into hexadecimal digits.
|
|
|
|
|
|
|
|
SYNOPSIS
|
|
|
|
hexdigits()
|
|
|
|
value The value.
|
|
|
|
|
|
|
|
NOTE
|
|
|
|
The result string is in static storage. It is reused on every call.
|
|
|
|
So you cannot use it twice in one expression.
|
|
|
|
|
|
|
|
RETURN
|
|
|
|
A pointer to a static NUL-terminated string.
|
|
|
|
*/
|
|
|
|
|
|
|
|
static char *hexdigits(ulonglong value)
|
|
|
|
{
|
|
|
|
static char digits[20];
|
|
|
|
char *ptr= digits;
|
|
|
|
uint idx= 2 * sizeof(value); /* Two hex digits per byte. */
|
|
|
|
|
|
|
|
DBUG_ASSERT(idx < sizeof(digits));
|
|
|
|
while (idx)
|
|
|
|
{
|
2006-12-19 19:15:53 +01:00
|
|
|
if ((*(ptr++)= '0' + ((char) (value >> (4 * (--idx))) & (char) 0xf)) > '9')
|
2006-04-11 15:45:10 +02:00
|
|
|
*(ptr - 1)+= 'a' - '9' - 1;
|
|
|
|
}
|
|
|
|
*ptr= '\0';
|
|
|
|
return digits;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/* Write header to new packed data file */
|
|
|
|
|
|
|
|
static int write_header(PACK_MRG_INFO *mrg,uint head_length,uint trees,
|
|
|
|
my_off_t tot_elements,my_off_t filelength)
|
|
|
|
{
|
2007-07-02 19:45:15 +02:00
|
|
|
uchar *buff= (uchar*) file_buffer.pos;
|
2006-04-11 15:45:10 +02:00
|
|
|
|
|
|
|
bzero(buff,HEAD_LENGTH);
|
2011-04-25 17:22:25 +02:00
|
|
|
memcpy(buff,maria_pack_file_magic,4);
|
2006-04-11 15:45:10 +02:00
|
|
|
int4store(buff+4,head_length);
|
|
|
|
int4store(buff+8, mrg->min_pack_length);
|
|
|
|
int4store(buff+12,mrg->max_pack_length);
|
|
|
|
int4store(buff+16,tot_elements);
|
|
|
|
int4store(buff+20,intervall_length);
|
|
|
|
int2store(buff+24,trees);
|
|
|
|
buff[26]=(char) mrg->ref_length;
|
|
|
|
/* Save record pointer length */
|
|
|
|
buff[27]= (uchar) maria_get_pointer_length((ulonglong) filelength,2);
|
|
|
|
if (test_only)
|
|
|
|
return 0;
|
2011-04-25 17:22:25 +02:00
|
|
|
my_seek(file_buffer.file,0L,MY_SEEK_SET,MYF(0));
|
2007-07-02 19:45:15 +02:00
|
|
|
return my_write(file_buffer.file,(const uchar *) file_buffer.pos,HEAD_LENGTH,
|
2006-04-11 15:45:10 +02:00
|
|
|
MYF(MY_WME | MY_NABP | MY_WAIT_IF_FULL)) != 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Write fieldinfo to new packed file */
|
|
|
|
|
|
|
|
static void write_field_info(HUFF_COUNTS *counts, uint fields, uint trees)
|
|
|
|
{
|
|
|
|
reg1 uint i;
|
|
|
|
uint huff_tree_bits;
|
|
|
|
huff_tree_bits=max_bit(trees ? trees-1 : 0);
|
|
|
|
|
2006-12-20 18:58:35 +01:00
|
|
|
DBUG_PRINT("info", (" "));
|
2006-04-11 15:45:10 +02:00
|
|
|
DBUG_PRINT("info", ("column types:"));
|
|
|
|
DBUG_PRINT("info", ("FIELD_NORMAL 0"));
|
|
|
|
DBUG_PRINT("info", ("FIELD_SKIP_ENDSPACE 1"));
|
|
|
|
DBUG_PRINT("info", ("FIELD_SKIP_PRESPACE 2"));
|
|
|
|
DBUG_PRINT("info", ("FIELD_SKIP_ZERO 3"));
|
|
|
|
DBUG_PRINT("info", ("FIELD_BLOB 4"));
|
|
|
|
DBUG_PRINT("info", ("FIELD_CONSTANT 5"));
|
|
|
|
DBUG_PRINT("info", ("FIELD_INTERVALL 6"));
|
|
|
|
DBUG_PRINT("info", ("FIELD_ZERO 7"));
|
|
|
|
DBUG_PRINT("info", ("FIELD_VARCHAR 8"));
|
|
|
|
DBUG_PRINT("info", ("FIELD_CHECK 9"));
|
2006-12-20 18:58:35 +01:00
|
|
|
DBUG_PRINT("info", (" "));
|
2006-04-11 15:45:10 +02:00
|
|
|
DBUG_PRINT("info", ("pack type as a set of flags:"));
|
|
|
|
DBUG_PRINT("info", ("PACK_TYPE_SELECTED 1"));
|
|
|
|
DBUG_PRINT("info", ("PACK_TYPE_SPACE_FIELDS 2"));
|
|
|
|
DBUG_PRINT("info", ("PACK_TYPE_ZERO_FILL 4"));
|
2006-12-20 18:58:35 +01:00
|
|
|
DBUG_PRINT("info", (" "));
|
2006-04-11 15:45:10 +02:00
|
|
|
if (verbose >= 2)
|
|
|
|
{
|
2011-04-25 17:22:25 +02:00
|
|
|
printf("\n");
|
|
|
|
printf("column types:\n");
|
|
|
|
printf("FIELD_NORMAL 0\n");
|
|
|
|
printf("FIELD_SKIP_ENDSPACE 1\n");
|
|
|
|
printf("FIELD_SKIP_PRESPACE 2\n");
|
|
|
|
printf("FIELD_SKIP_ZERO 3\n");
|
|
|
|
printf("FIELD_BLOB 4\n");
|
|
|
|
printf("FIELD_CONSTANT 5\n");
|
|
|
|
printf("FIELD_INTERVALL 6\n");
|
|
|
|
printf("FIELD_ZERO 7\n");
|
|
|
|
printf("FIELD_VARCHAR 8\n");
|
|
|
|
printf("FIELD_CHECK 9\n");
|
|
|
|
printf("\n");
|
|
|
|
printf("pack type as a set of flags:\n");
|
|
|
|
printf("PACK_TYPE_SELECTED 1\n");
|
|
|
|
printf("PACK_TYPE_SPACE_FIELDS 2\n");
|
|
|
|
printf("PACK_TYPE_ZERO_FILL 4\n");
|
|
|
|
printf("\n");
|
2006-04-11 15:45:10 +02:00
|
|
|
}
|
|
|
|
for (i=0 ; i++ < fields ; counts++)
|
|
|
|
{
|
|
|
|
write_bits((ulonglong) (int) counts->field_type, 5);
|
|
|
|
write_bits(counts->pack_type,6);
|
|
|
|
if (counts->pack_type & PACK_TYPE_ZERO_FILL)
|
|
|
|
write_bits(counts->max_zero_fill,5);
|
|
|
|
else
|
|
|
|
write_bits(counts->length_bits,5);
|
|
|
|
write_bits((ulonglong) counts->tree->tree_number - 1, huff_tree_bits);
|
|
|
|
DBUG_PRINT("info", ("column: %3u type: %2u pack: %2u zero: %4u "
|
|
|
|
"lbits: %2u tree: %2u length: %4u",
|
|
|
|
i , counts->field_type, counts->pack_type,
|
|
|
|
counts->max_zero_fill, counts->length_bits,
|
|
|
|
counts->tree->tree_number, counts->field_length));
|
|
|
|
if (verbose >= 2)
|
2011-04-25 17:22:25 +02:00
|
|
|
printf("column: %3u type: %2u pack: %2u zero: %4u lbits: %2u "
|
2006-04-11 15:45:10 +02:00
|
|
|
"tree: %2u length: %4u\n", i , counts->field_type,
|
|
|
|
counts->pack_type, counts->max_zero_fill, counts->length_bits,
|
2011-04-25 17:22:25 +02:00
|
|
|
counts->tree->tree_number, counts->field_length);
|
2006-04-11 15:45:10 +02:00
|
|
|
}
|
|
|
|
flush_bits();
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Write all huff_trees to new datafile. Return tot count of
|
|
|
|
elements in all trees
|
|
|
|
Returns 0 on error */
|
|
|
|
|
|
|
|
static my_off_t write_huff_tree(HUFF_TREE *huff_tree, uint trees)
|
|
|
|
{
|
|
|
|
uint i,int_length;
|
|
|
|
uint tree_no;
|
|
|
|
uint codes;
|
|
|
|
uint errors= 0;
|
|
|
|
uint *packed_tree,*offset,length;
|
|
|
|
my_off_t elements;
|
|
|
|
|
|
|
|
/* Find the highest number of elements in the trees. */
|
|
|
|
for (i=length=0 ; i < trees ; i++)
|
|
|
|
if (huff_tree[i].tree_number > 0 && huff_tree[i].elements > length)
|
|
|
|
length=huff_tree[i].elements;
|
|
|
|
/*
|
|
|
|
Allocate a buffer for packing a decode tree. Two numbers per element
|
|
|
|
(left child and right child).
|
|
|
|
*/
|
|
|
|
if (!(packed_tree=(uint*) my_alloca(sizeof(uint)*length*2)))
|
|
|
|
{
|
|
|
|
my_error(EE_OUTOFMEMORY,MYF(ME_BELL),sizeof(uint)*length*2);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2006-12-20 18:58:35 +01:00
|
|
|
DBUG_PRINT("info", (" "));
|
2006-04-11 15:45:10 +02:00
|
|
|
if (verbose >= 2)
|
2011-04-25 17:22:25 +02:00
|
|
|
printf("\n");
|
2006-04-11 15:45:10 +02:00
|
|
|
tree_no= 0;
|
|
|
|
intervall_length=0;
|
|
|
|
for (elements=0; trees-- ; huff_tree++)
|
|
|
|
{
|
|
|
|
/* Skip columns that have been joined with other columns. */
|
|
|
|
if (huff_tree->tree_number == 0)
|
|
|
|
continue; /* Deleted tree */
|
|
|
|
tree_no++;
|
2006-12-20 18:58:35 +01:00
|
|
|
DBUG_PRINT("info", (" "));
|
2006-04-11 15:45:10 +02:00
|
|
|
if (verbose >= 3)
|
2011-04-25 17:22:25 +02:00
|
|
|
printf("\n");
|
2006-04-11 15:45:10 +02:00
|
|
|
/* Count the total number of elements (byte codes or column values). */
|
|
|
|
elements+=huff_tree->elements;
|
|
|
|
huff_tree->max_offset=2;
|
|
|
|
/* Build a tree of offsets and codes for decoding in 'packed_tree'. */
|
|
|
|
if (huff_tree->elements <= 1)
|
|
|
|
offset=packed_tree;
|
|
|
|
else
|
|
|
|
offset=make_offset_code_tree(huff_tree,huff_tree->root,packed_tree);
|
|
|
|
|
|
|
|
/* This should be the same as 'length' above. */
|
|
|
|
huff_tree->offset_bits=max_bit(huff_tree->max_offset);
|
|
|
|
|
|
|
|
/*
|
|
|
|
Since we check this during collecting the distinct column values,
|
|
|
|
this should never happen.
|
|
|
|
*/
|
|
|
|
if (huff_tree->max_offset >= IS_OFFSET)
|
|
|
|
{ /* This should be impossible */
|
2011-04-25 17:22:25 +02:00
|
|
|
fprintf(stderr, "Tree offset got too big: %d, aborted\n",
|
|
|
|
huff_tree->max_offset);
|
2009-01-09 05:23:25 +01:00
|
|
|
my_afree(packed_tree);
|
2006-04-11 15:45:10 +02:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
DBUG_PRINT("info", ("pos: %lu elements: %u tree-elements: %lu "
|
|
|
|
"char_bits: %u\n",
|
|
|
|
(ulong) (file_buffer.pos - file_buffer.buffer),
|
|
|
|
huff_tree->elements, (ulong) (offset - packed_tree),
|
|
|
|
huff_tree->char_bits));
|
|
|
|
if (!huff_tree->counts->tree_buff)
|
|
|
|
{
|
2007-07-02 19:45:15 +02:00
|
|
|
/* We do a uchar compression on this column. Mark with bit 0. */
|
2006-04-11 15:45:10 +02:00
|
|
|
write_bits(0,1);
|
|
|
|
write_bits(huff_tree->min_chr,8);
|
|
|
|
write_bits(huff_tree->elements,9);
|
|
|
|
write_bits(huff_tree->char_bits,5);
|
|
|
|
write_bits(huff_tree->offset_bits,5);
|
|
|
|
int_length=0;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
int_length=(uint) (huff_tree->counts->tree_pos -
|
|
|
|
huff_tree->counts->tree_buff);
|
|
|
|
/* We have distinct column values for this column. Mark with bit 1. */
|
|
|
|
write_bits(1,1);
|
|
|
|
write_bits(huff_tree->elements,15);
|
|
|
|
write_bits(int_length,16);
|
|
|
|
write_bits(huff_tree->char_bits,5);
|
|
|
|
write_bits(huff_tree->offset_bits,5);
|
|
|
|
intervall_length+=int_length;
|
|
|
|
}
|
|
|
|
DBUG_PRINT("info", ("tree: %2u elements: %4u char_bits: %2u "
|
|
|
|
"offset_bits: %2u %s: %5u codelen: %2u",
|
|
|
|
tree_no, huff_tree->elements, huff_tree->char_bits,
|
|
|
|
huff_tree->offset_bits, huff_tree->counts->tree_buff ?
|
|
|
|
"bufflen" : "min_chr", huff_tree->counts->tree_buff ?
|
|
|
|
int_length : huff_tree->min_chr, huff_tree->height));
|
|
|
|
if (verbose >= 2)
|
2011-04-25 17:22:25 +02:00
|
|
|
printf("tree: %2u elements: %4u char_bits: %2u offset_bits: %2u "
|
2006-04-11 15:45:10 +02:00
|
|
|
"%s: %5u codelen: %2u\n", tree_no, huff_tree->elements,
|
|
|
|
huff_tree->char_bits, huff_tree->offset_bits,
|
|
|
|
huff_tree->counts->tree_buff ? "bufflen" : "min_chr",
|
|
|
|
huff_tree->counts->tree_buff ? int_length :
|
2011-04-25 17:22:25 +02:00
|
|
|
huff_tree->min_chr, huff_tree->height);
|
2006-04-11 15:45:10 +02:00
|
|
|
|
|
|
|
/* Check that the code tree length matches the element count. */
|
|
|
|
length=(uint) (offset-packed_tree);
|
|
|
|
if (length != huff_tree->elements*2-2)
|
|
|
|
{
|
2011-04-25 17:22:25 +02:00
|
|
|
fprintf(stderr, "error: Huff-tree-length: %d != calc_length: %d\n",
|
|
|
|
length, huff_tree->elements * 2 - 2);
|
2006-04-11 15:45:10 +02:00
|
|
|
errors++;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (i=0 ; i < length ; i++)
|
|
|
|
{
|
|
|
|
if (packed_tree[i] & IS_OFFSET)
|
|
|
|
write_bits(packed_tree[i] - IS_OFFSET+ (1 << huff_tree->offset_bits),
|
|
|
|
huff_tree->offset_bits+1);
|
|
|
|
else
|
|
|
|
write_bits(packed_tree[i]-huff_tree->min_chr,huff_tree->char_bits+1);
|
|
|
|
DBUG_PRINT("info", ("tree[0x%04x]: %s0x%04x",
|
|
|
|
i, (packed_tree[i] & IS_OFFSET) ?
|
|
|
|
" -> " : "", (packed_tree[i] & IS_OFFSET) ?
|
|
|
|
packed_tree[i] - IS_OFFSET + i : packed_tree[i]));
|
|
|
|
if (verbose >= 3)
|
2011-04-25 17:22:25 +02:00
|
|
|
printf("tree[0x%04x]: %s0x%04x\n",
|
2006-04-11 15:45:10 +02:00
|
|
|
i, (packed_tree[i] & IS_OFFSET) ? " -> " : "",
|
|
|
|
(packed_tree[i] & IS_OFFSET) ?
|
2011-04-25 17:22:25 +02:00
|
|
|
packed_tree[i] - IS_OFFSET + i : packed_tree[i]);
|
2006-04-11 15:45:10 +02:00
|
|
|
}
|
|
|
|
flush_bits();
|
|
|
|
|
|
|
|
/*
|
|
|
|
Display coding tables and check their correctness.
|
|
|
|
*/
|
|
|
|
codes= huff_tree->counts->tree_buff ? huff_tree->elements : 256;
|
|
|
|
for (i= 0; i < codes; i++)
|
|
|
|
{
|
|
|
|
ulonglong code;
|
|
|
|
uint bits;
|
|
|
|
uint len;
|
|
|
|
uint idx;
|
|
|
|
|
|
|
|
if (! (len= huff_tree->code_len[i]))
|
|
|
|
continue;
|
|
|
|
DBUG_PRINT("info", ("code[0x%04x]: 0x%s bits: %2u bin: %s", i,
|
|
|
|
hexdigits(huff_tree->code[i]), huff_tree->code_len[i],
|
|
|
|
bindigits(huff_tree->code[i],
|
|
|
|
huff_tree->code_len[i])));
|
|
|
|
if (verbose >= 3)
|
2011-04-25 17:22:25 +02:00
|
|
|
printf("code[0x%04x]: 0x%s bits: %2u bin: %s\n", i,
|
2006-04-11 15:45:10 +02:00
|
|
|
hexdigits(huff_tree->code[i]), huff_tree->code_len[i],
|
2011-04-25 17:22:25 +02:00
|
|
|
bindigits(huff_tree->code[i], huff_tree->code_len[i]));
|
2006-04-11 15:45:10 +02:00
|
|
|
|
|
|
|
/* Check that the encode table decodes correctly. */
|
|
|
|
code= 0;
|
|
|
|
bits= 0;
|
|
|
|
idx= 0;
|
|
|
|
DBUG_EXECUTE_IF("forcechkerr1", len--;);
|
|
|
|
DBUG_EXECUTE_IF("forcechkerr2", bits= 8 * sizeof(code););
|
|
|
|
DBUG_EXECUTE_IF("forcechkerr3", idx= length;);
|
|
|
|
for (;;)
|
|
|
|
{
|
|
|
|
if (! len)
|
|
|
|
{
|
2011-04-25 17:22:25 +02:00
|
|
|
fflush(stdout);
|
|
|
|
fprintf(stderr, "error: code 0x%s with %u bits not found\n",
|
|
|
|
hexdigits(huff_tree->code[i]), huff_tree->code_len[i]);
|
2006-04-11 15:45:10 +02:00
|
|
|
errors++;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
code<<= 1;
|
|
|
|
code|= (huff_tree->code[i] >> (--len)) & 1;
|
|
|
|
bits++;
|
|
|
|
if (bits > 8 * sizeof(code))
|
|
|
|
{
|
2011-04-25 17:22:25 +02:00
|
|
|
fflush(stdout);
|
|
|
|
fprintf(stderr, "error: Huffman code too long: %u/%u\n",
|
|
|
|
bits, (uint) (8 * sizeof(code)));
|
2006-04-11 15:45:10 +02:00
|
|
|
errors++;
|
|
|
|
break;
|
|
|
|
}
|
2006-12-19 19:15:53 +01:00
|
|
|
idx+= (uint) code & 1;
|
2006-04-11 15:45:10 +02:00
|
|
|
if (idx >= length)
|
|
|
|
{
|
2011-04-25 17:22:25 +02:00
|
|
|
fflush(stdout);
|
|
|
|
fprintf(stderr, "error: illegal tree offset: %u/%u\n", idx, length);
|
2006-04-11 15:45:10 +02:00
|
|
|
errors++;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (packed_tree[idx] & IS_OFFSET)
|
|
|
|
idx+= packed_tree[idx] & ~IS_OFFSET;
|
|
|
|
else
|
|
|
|
break; /* Hit a leaf. This contains the result value. */
|
|
|
|
}
|
|
|
|
if (errors)
|
|
|
|
break;
|
|
|
|
|
|
|
|
DBUG_EXECUTE_IF("forcechkerr4", packed_tree[idx]++;);
|
|
|
|
if (packed_tree[idx] != i)
|
|
|
|
{
|
2011-04-25 17:22:25 +02:00
|
|
|
fflush(stdout);
|
|
|
|
fprintf(stderr, "error: decoded value 0x%04x should be: 0x%04x\n",
|
|
|
|
packed_tree[idx], i);
|
2006-04-11 15:45:10 +02:00
|
|
|
errors++;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
} /*end for (codes)*/
|
|
|
|
if (errors)
|
|
|
|
break;
|
|
|
|
|
|
|
|
/* Write column values in case of distinct column value compression. */
|
|
|
|
if (huff_tree->counts->tree_buff)
|
|
|
|
{
|
|
|
|
for (i=0 ; i < int_length ; i++)
|
|
|
|
{
|
|
|
|
write_bits((ulonglong) (uchar) huff_tree->counts->tree_buff[i], 8);
|
|
|
|
DBUG_PRINT("info", ("column_values[0x%04x]: 0x%02x",
|
|
|
|
i, (uchar) huff_tree->counts->tree_buff[i]));
|
|
|
|
if (verbose >= 3)
|
2011-04-25 17:22:25 +02:00
|
|
|
printf("column_values[0x%04x]: 0x%02x\n",
|
|
|
|
i, (uchar) huff_tree->counts->tree_buff[i]);
|
2006-04-11 15:45:10 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
flush_bits();
|
|
|
|
}
|
2006-12-20 18:58:35 +01:00
|
|
|
DBUG_PRINT("info", (" "));
|
2006-04-11 15:45:10 +02:00
|
|
|
if (verbose >= 2)
|
2011-04-25 17:22:25 +02:00
|
|
|
printf("\n");
|
2009-01-09 05:23:25 +01:00
|
|
|
my_afree(packed_tree);
|
2006-04-11 15:45:10 +02:00
|
|
|
if (errors)
|
|
|
|
{
|
2011-04-25 17:22:25 +02:00
|
|
|
fprintf(stderr, "Error: Generated decode trees are corrupt. Stop.\n");
|
2006-04-11 15:45:10 +02:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
return elements;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static uint *make_offset_code_tree(HUFF_TREE *huff_tree, HUFF_ELEMENT *element,
|
|
|
|
uint *offset)
|
|
|
|
{
|
|
|
|
uint *prev_offset;
|
|
|
|
|
|
|
|
prev_offset= offset;
|
|
|
|
/*
|
|
|
|
'a.leaf.null' takes the same place as 'a.nod.left'. If this is null,
|
|
|
|
then there is no left child and, hence no right child either. This
|
|
|
|
is a property of a binary tree. An element is either a node with two
|
|
|
|
childs, or a leaf without childs.
|
|
|
|
|
|
|
|
The current element is always a node with two childs. Go left first.
|
|
|
|
*/
|
|
|
|
if (!element->a.nod.left->a.leaf.null)
|
|
|
|
{
|
2007-07-02 19:45:15 +02:00
|
|
|
/* Store the uchar code or the index of the column value. */
|
2006-04-11 15:45:10 +02:00
|
|
|
prev_offset[0] =(uint) element->a.nod.left->a.leaf.element_nr;
|
|
|
|
offset+=2;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
Recursively traverse the tree to the left. Mark it as an offset to
|
2007-07-02 19:45:15 +02:00
|
|
|
another tree node (in contrast to a uchar code or column value index).
|
2006-04-11 15:45:10 +02:00
|
|
|
*/
|
|
|
|
prev_offset[0]= IS_OFFSET+2;
|
|
|
|
offset=make_offset_code_tree(huff_tree,element->a.nod.left,offset+2);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Now, check the right child. */
|
|
|
|
if (!element->a.nod.right->a.leaf.null)
|
|
|
|
{
|
2007-07-02 19:45:15 +02:00
|
|
|
/* Store the uchar code or the index of the column value. */
|
2006-04-11 15:45:10 +02:00
|
|
|
prev_offset[1]=element->a.nod.right->a.leaf.element_nr;
|
|
|
|
return offset;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
Recursively traverse the tree to the right. Mark it as an offset to
|
2007-07-02 19:45:15 +02:00
|
|
|
another tree node (in contrast to a uchar code or column value index).
|
2006-04-11 15:45:10 +02:00
|
|
|
*/
|
|
|
|
uint temp=(uint) (offset-prev_offset-1);
|
|
|
|
prev_offset[1]= IS_OFFSET+ temp;
|
|
|
|
if (huff_tree->max_offset < temp)
|
|
|
|
huff_tree->max_offset = temp;
|
|
|
|
return make_offset_code_tree(huff_tree,element->a.nod.right,offset);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Get number of bits neaded to represent value */
|
|
|
|
|
|
|
|
static uint max_bit(register uint value)
|
|
|
|
{
|
|
|
|
reg2 uint power=1;
|
|
|
|
|
|
|
|
while ((value>>=1))
|
|
|
|
power++;
|
|
|
|
return (power);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2008-01-10 20:21:36 +01:00
|
|
|
static int compress_maria_file(PACK_MRG_INFO *mrg, HUFF_COUNTS *huff_counts)
|
2006-04-11 15:45:10 +02:00
|
|
|
{
|
|
|
|
int error;
|
2007-01-18 20:38:14 +01:00
|
|
|
uint i,max_calc_length,pack_ref_length,min_record_length,max_record_length;
|
|
|
|
uint intervall,field_length,max_pack_length,pack_blob_length, null_bytes;
|
2006-04-11 15:45:10 +02:00
|
|
|
my_off_t record_count;
|
|
|
|
char llbuf[32];
|
|
|
|
ulong length,pack_length;
|
2007-07-02 19:45:15 +02:00
|
|
|
uchar *record,*pos,*end_pos,*record_pos,*start_pos;
|
2006-04-11 15:45:10 +02:00
|
|
|
HUFF_COUNTS *count,*end_count;
|
|
|
|
HUFF_TREE *tree;
|
|
|
|
MARIA_HA *isam_file=mrg->file[0];
|
|
|
|
uint pack_version= (uint) isam_file->s->pack.version;
|
2008-01-10 20:21:36 +01:00
|
|
|
DBUG_ENTER("compress_maria_file");
|
2006-04-11 15:45:10 +02:00
|
|
|
|
|
|
|
/* Allocate a buffer for the records (excluding blobs). */
|
2015-08-11 13:03:25 +02:00
|
|
|
if (!(record=(uchar*) my_safe_alloca(isam_file->s->base.reclength)))
|
2006-04-11 15:45:10 +02:00
|
|
|
return -1;
|
|
|
|
|
|
|
|
end_count=huff_counts+isam_file->s->base.fields;
|
|
|
|
min_record_length= (uint) ~0;
|
|
|
|
max_record_length=0;
|
2007-01-18 20:38:14 +01:00
|
|
|
null_bytes= isam_file->s->base.null_bytes;
|
2006-04-11 15:45:10 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
Calculate the maximum number of bits required to pack the records.
|
|
|
|
Remember to understand 'max_zero_fill' as 'min_zero_fill'.
|
|
|
|
The tree height determines the maximum number of bits per value.
|
|
|
|
Some fields skip leading or trailing spaces or zeroes. The skipped
|
|
|
|
number of bytes is encoded by 'length_bits' bits.
|
|
|
|
Empty blobs and varchar are encoded with a single 1 bit. Other blobs
|
|
|
|
and varchar get a leading 0 bit.
|
|
|
|
*/
|
2007-01-18 20:38:14 +01:00
|
|
|
max_calc_length= null_bytes;
|
|
|
|
for (i= 0 ; i < isam_file->s->base.fields ; i++)
|
2006-04-11 15:45:10 +02:00
|
|
|
{
|
|
|
|
if (!(huff_counts[i].pack_type & PACK_TYPE_ZERO_FILL))
|
|
|
|
huff_counts[i].max_zero_fill=0;
|
|
|
|
if (huff_counts[i].field_type == FIELD_CONSTANT ||
|
|
|
|
huff_counts[i].field_type == FIELD_ZERO ||
|
|
|
|
huff_counts[i].field_type == FIELD_CHECK)
|
|
|
|
continue;
|
|
|
|
if (huff_counts[i].field_type == FIELD_INTERVALL)
|
|
|
|
max_calc_length+=huff_counts[i].tree->height;
|
|
|
|
else if (huff_counts[i].field_type == FIELD_BLOB ||
|
|
|
|
huff_counts[i].field_type == FIELD_VARCHAR)
|
|
|
|
max_calc_length+=huff_counts[i].tree->height*huff_counts[i].max_length + huff_counts[i].length_bits +1;
|
|
|
|
else
|
|
|
|
max_calc_length+=
|
|
|
|
(huff_counts[i].field_length - huff_counts[i].max_zero_fill)*
|
|
|
|
huff_counts[i].tree->height+huff_counts[i].length_bits;
|
|
|
|
}
|
|
|
|
max_calc_length= (max_calc_length + 7) / 8;
|
|
|
|
pack_ref_length= _ma_calc_pack_length(pack_version, max_calc_length);
|
|
|
|
record_count=0;
|
|
|
|
/* 'max_blob_length' is the max length of all blobs of a record. */
|
|
|
|
pack_blob_length= isam_file->s->base.blobs ?
|
|
|
|
_ma_calc_pack_length(pack_version, mrg->max_blob_length) : 0;
|
|
|
|
max_pack_length=pack_ref_length+pack_blob_length;
|
|
|
|
|
|
|
|
DBUG_PRINT("fields", ("==="));
|
|
|
|
mrg_reset(mrg);
|
|
|
|
while ((error=mrg_rrnd(mrg,record)) != HA_ERR_END_OF_FILE)
|
|
|
|
{
|
|
|
|
ulong tot_blob_length=0;
|
|
|
|
if (! error)
|
|
|
|
{
|
2008-01-22 18:49:05 +01:00
|
|
|
if (flush_buffer((ulong) max_calc_length + (ulong) max_pack_length +
|
|
|
|
null_bytes))
|
2006-04-11 15:45:10 +02:00
|
|
|
break;
|
2009-01-09 05:23:25 +01:00
|
|
|
record_pos= file_buffer.pos;
|
2007-01-18 20:38:14 +01:00
|
|
|
file_buffer.pos+= max_pack_length;
|
|
|
|
if (null_bytes)
|
|
|
|
{
|
|
|
|
/* Copy null bits 'as is' */
|
|
|
|
memcpy(file_buffer.pos, record, null_bytes);
|
|
|
|
file_buffer.pos+= null_bytes;
|
|
|
|
}
|
|
|
|
for (start_pos=record+null_bytes, count= huff_counts;
|
|
|
|
count < end_count ;
|
|
|
|
count++)
|
2006-04-11 15:45:10 +02:00
|
|
|
{
|
|
|
|
end_pos=start_pos+(field_length=count->field_length);
|
|
|
|
tree=count->tree;
|
|
|
|
|
|
|
|
DBUG_PRINT("fields", ("column: %3lu type: %2u pack: %2u zero: %4u "
|
|
|
|
"lbits: %2u tree: %2u length: %4u",
|
|
|
|
(ulong) (count - huff_counts + 1),
|
|
|
|
count->field_type,
|
|
|
|
count->pack_type, count->max_zero_fill,
|
|
|
|
count->length_bits, count->tree->tree_number,
|
|
|
|
count->field_length));
|
|
|
|
|
|
|
|
/* Check if the column contains spaces only. */
|
|
|
|
if (count->pack_type & PACK_TYPE_SPACE_FIELDS)
|
|
|
|
{
|
|
|
|
for (pos=start_pos ; *pos == ' ' && pos < end_pos; pos++) ;
|
|
|
|
if (pos == end_pos)
|
|
|
|
{
|
|
|
|
DBUG_PRINT("fields",
|
|
|
|
("PACK_TYPE_SPACE_FIELDS spaces only, bits: 1"));
|
|
|
|
DBUG_PRINT("fields", ("---"));
|
|
|
|
write_bits(1,1);
|
|
|
|
start_pos=end_pos;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
DBUG_PRINT("fields",
|
|
|
|
("PACK_TYPE_SPACE_FIELDS not only spaces, bits: 1"));
|
|
|
|
write_bits(0,1);
|
|
|
|
}
|
|
|
|
end_pos-=count->max_zero_fill;
|
|
|
|
field_length-=count->max_zero_fill;
|
|
|
|
|
2006-12-20 18:58:35 +01:00
|
|
|
switch (count->field_type) {
|
2006-04-11 15:45:10 +02:00
|
|
|
case FIELD_SKIP_ZERO:
|
2009-01-09 05:23:25 +01:00
|
|
|
if (!memcmp(start_pos, zero_string, field_length))
|
2006-04-11 15:45:10 +02:00
|
|
|
{
|
|
|
|
DBUG_PRINT("fields", ("FIELD_SKIP_ZERO zeroes only, bits: 1"));
|
|
|
|
write_bits(1,1);
|
|
|
|
start_pos=end_pos;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
DBUG_PRINT("fields", ("FIELD_SKIP_ZERO not only zeroes, bits: 1"));
|
|
|
|
write_bits(0,1);
|
|
|
|
/* Fall through */
|
|
|
|
case FIELD_NORMAL:
|
|
|
|
DBUG_PRINT("fields", ("FIELD_NORMAL %lu bytes",
|
|
|
|
(ulong) (end_pos - start_pos)));
|
|
|
|
for ( ; start_pos < end_pos ; start_pos++)
|
|
|
|
{
|
|
|
|
DBUG_PRINT("fields",
|
|
|
|
("value: 0x%02x code: 0x%s bits: %2u bin: %s",
|
|
|
|
(uchar) *start_pos,
|
|
|
|
hexdigits(tree->code[(uchar) *start_pos]),
|
|
|
|
(uint) tree->code_len[(uchar) *start_pos],
|
|
|
|
bindigits(tree->code[(uchar) *start_pos],
|
|
|
|
(uint) tree->code_len[(uchar) *start_pos])));
|
|
|
|
write_bits(tree->code[(uchar) *start_pos],
|
|
|
|
(uint) tree->code_len[(uchar) *start_pos]);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case FIELD_SKIP_ENDSPACE:
|
|
|
|
for (pos=end_pos ; pos > start_pos && pos[-1] == ' ' ; pos--) ;
|
|
|
|
length= (ulong) (end_pos - pos);
|
|
|
|
if (count->pack_type & PACK_TYPE_SELECTED)
|
|
|
|
{
|
|
|
|
if (length > count->min_space)
|
|
|
|
{
|
|
|
|
DBUG_PRINT("fields",
|
|
|
|
("FIELD_SKIP_ENDSPACE more than min_space, bits: 1"));
|
|
|
|
DBUG_PRINT("fields",
|
|
|
|
("FIELD_SKIP_ENDSPACE skip %lu/%u bytes, bits: %2u",
|
|
|
|
length, field_length, count->length_bits));
|
|
|
|
write_bits(1,1);
|
|
|
|
write_bits(length,count->length_bits);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
DBUG_PRINT("fields",
|
|
|
|
("FIELD_SKIP_ENDSPACE not more than min_space, "
|
|
|
|
"bits: 1"));
|
|
|
|
write_bits(0,1);
|
|
|
|
pos=end_pos;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
DBUG_PRINT("fields",
|
|
|
|
("FIELD_SKIP_ENDSPACE skip %lu/%u bytes, bits: %2u",
|
|
|
|
length, field_length, count->length_bits));
|
|
|
|
write_bits(length,count->length_bits);
|
|
|
|
}
|
|
|
|
/* Encode all significant bytes. */
|
|
|
|
DBUG_PRINT("fields", ("FIELD_SKIP_ENDSPACE %lu bytes",
|
|
|
|
(ulong) (pos - start_pos)));
|
|
|
|
for ( ; start_pos < pos ; start_pos++)
|
|
|
|
{
|
|
|
|
DBUG_PRINT("fields",
|
|
|
|
("value: 0x%02x code: 0x%s bits: %2u bin: %s",
|
|
|
|
(uchar) *start_pos,
|
|
|
|
hexdigits(tree->code[(uchar) *start_pos]),
|
|
|
|
(uint) tree->code_len[(uchar) *start_pos],
|
|
|
|
bindigits(tree->code[(uchar) *start_pos],
|
|
|
|
(uint) tree->code_len[(uchar) *start_pos])));
|
|
|
|
write_bits(tree->code[(uchar) *start_pos],
|
|
|
|
(uint) tree->code_len[(uchar) *start_pos]);
|
|
|
|
}
|
|
|
|
start_pos=end_pos;
|
|
|
|
break;
|
|
|
|
case FIELD_SKIP_PRESPACE:
|
|
|
|
for (pos=start_pos ; pos < end_pos && pos[0] == ' ' ; pos++) ;
|
|
|
|
length= (ulong) (pos - start_pos);
|
|
|
|
if (count->pack_type & PACK_TYPE_SELECTED)
|
|
|
|
{
|
|
|
|
if (length > count->min_space)
|
|
|
|
{
|
|
|
|
DBUG_PRINT("fields",
|
|
|
|
("FIELD_SKIP_PRESPACE more than min_space, bits: 1"));
|
|
|
|
DBUG_PRINT("fields",
|
|
|
|
("FIELD_SKIP_PRESPACE skip %lu/%u bytes, bits: %2u",
|
|
|
|
length, field_length, count->length_bits));
|
|
|
|
write_bits(1,1);
|
|
|
|
write_bits(length,count->length_bits);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
DBUG_PRINT("fields",
|
|
|
|
("FIELD_SKIP_PRESPACE not more than min_space, "
|
|
|
|
"bits: 1"));
|
|
|
|
pos=start_pos;
|
|
|
|
write_bits(0,1);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
DBUG_PRINT("fields",
|
|
|
|
("FIELD_SKIP_PRESPACE skip %lu/%u bytes, bits: %2u",
|
|
|
|
length, field_length, count->length_bits));
|
|
|
|
write_bits(length,count->length_bits);
|
|
|
|
}
|
|
|
|
/* Encode all significant bytes. */
|
|
|
|
DBUG_PRINT("fields", ("FIELD_SKIP_PRESPACE %lu bytes",
|
|
|
|
(ulong) (end_pos - start_pos)));
|
|
|
|
for (start_pos=pos ; start_pos < end_pos ; start_pos++)
|
|
|
|
{
|
|
|
|
DBUG_PRINT("fields",
|
|
|
|
("value: 0x%02x code: 0x%s bits: %2u bin: %s",
|
|
|
|
(uchar) *start_pos,
|
|
|
|
hexdigits(tree->code[(uchar) *start_pos]),
|
|
|
|
(uint) tree->code_len[(uchar) *start_pos],
|
|
|
|
bindigits(tree->code[(uchar) *start_pos],
|
|
|
|
(uint) tree->code_len[(uchar) *start_pos])));
|
|
|
|
write_bits(tree->code[(uchar) *start_pos],
|
|
|
|
(uint) tree->code_len[(uchar) *start_pos]);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case FIELD_CONSTANT:
|
|
|
|
case FIELD_ZERO:
|
|
|
|
case FIELD_CHECK:
|
|
|
|
DBUG_PRINT("fields", ("FIELD_CONSTANT/ZERO/CHECK"));
|
|
|
|
start_pos=end_pos;
|
|
|
|
break;
|
|
|
|
case FIELD_INTERVALL:
|
|
|
|
global_count=count;
|
2007-07-02 19:45:15 +02:00
|
|
|
pos=(uchar*) tree_search(&count->int_tree, start_pos,
|
2006-04-11 15:45:10 +02:00
|
|
|
count->int_tree.custom_arg);
|
|
|
|
intervall=(uint) (pos - count->tree_buff)/field_length;
|
|
|
|
DBUG_PRINT("fields", ("FIELD_INTERVALL"));
|
|
|
|
DBUG_PRINT("fields", ("index: %4u code: 0x%s bits: %2u",
|
|
|
|
intervall, hexdigits(tree->code[intervall]),
|
|
|
|
(uint) tree->code_len[intervall]));
|
|
|
|
write_bits(tree->code[intervall],(uint) tree->code_len[intervall]);
|
|
|
|
start_pos=end_pos;
|
|
|
|
break;
|
|
|
|
case FIELD_BLOB:
|
|
|
|
{
|
|
|
|
ulong blob_length= _ma_calc_blob_length(field_length-
|
2007-04-19 12:18:56 +02:00
|
|
|
portable_sizeof_char_ptr,
|
2006-04-11 15:45:10 +02:00
|
|
|
start_pos);
|
|
|
|
/* Empty blobs are encoded with a single 1 bit. */
|
|
|
|
if (!blob_length)
|
|
|
|
{
|
|
|
|
DBUG_PRINT("fields", ("FIELD_BLOB empty, bits: 1"));
|
|
|
|
write_bits(1,1);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2007-07-02 19:45:15 +02:00
|
|
|
uchar *blob,*blob_end;
|
2006-04-11 15:45:10 +02:00
|
|
|
DBUG_PRINT("fields", ("FIELD_BLOB not empty, bits: 1"));
|
|
|
|
write_bits(0,1);
|
|
|
|
/* Write the blob length. */
|
|
|
|
DBUG_PRINT("fields", ("FIELD_BLOB %lu bytes, bits: %2u",
|
|
|
|
blob_length, count->length_bits));
|
|
|
|
write_bits(blob_length,count->length_bits);
|
2011-04-25 17:22:25 +02:00
|
|
|
memcpy(&blob,end_pos-portable_sizeof_char_ptr, sizeof(char*));
|
2006-04-11 15:45:10 +02:00
|
|
|
blob_end=blob+blob_length;
|
|
|
|
/* Encode the blob bytes. */
|
|
|
|
for ( ; blob < blob_end ; blob++)
|
|
|
|
{
|
|
|
|
DBUG_PRINT("fields",
|
|
|
|
("value: 0x%02x code: 0x%s bits: %2u bin: %s",
|
|
|
|
(uchar) *blob, hexdigits(tree->code[(uchar) *blob]),
|
|
|
|
(uint) tree->code_len[(uchar) *blob],
|
|
|
|
bindigits(tree->code[(uchar) *start_pos],
|
|
|
|
(uint)tree->code_len[(uchar) *start_pos])));
|
|
|
|
write_bits(tree->code[(uchar) *blob],
|
|
|
|
(uint) tree->code_len[(uchar) *blob]);
|
|
|
|
}
|
|
|
|
tot_blob_length+=blob_length;
|
|
|
|
}
|
|
|
|
start_pos= end_pos;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case FIELD_VARCHAR:
|
|
|
|
{
|
2007-03-01 18:23:58 +01:00
|
|
|
uint var_pack_length= HA_VARCHAR_PACKLENGTH(count->field_length-1);
|
|
|
|
ulong col_length= (var_pack_length == 1 ?
|
|
|
|
(uint) *(uchar*) start_pos :
|
2006-04-11 15:45:10 +02:00
|
|
|
uint2korr(start_pos));
|
|
|
|
/* Empty varchar are encoded with a single 1 bit. */
|
|
|
|
if (!col_length)
|
|
|
|
{
|
|
|
|
DBUG_PRINT("fields", ("FIELD_VARCHAR empty, bits: 1"));
|
|
|
|
write_bits(1,1); /* Empty varchar */
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2007-07-02 19:45:15 +02:00
|
|
|
uchar *end= start_pos + var_pack_length + col_length;
|
2006-04-11 15:45:10 +02:00
|
|
|
DBUG_PRINT("fields", ("FIELD_VARCHAR not empty, bits: 1"));
|
|
|
|
write_bits(0,1);
|
|
|
|
/* Write the varchar length. */
|
|
|
|
DBUG_PRINT("fields", ("FIELD_VARCHAR %lu bytes, bits: %2u",
|
|
|
|
col_length, count->length_bits));
|
|
|
|
write_bits(col_length,count->length_bits);
|
|
|
|
/* Encode the varchar bytes. */
|
2007-03-01 18:23:58 +01:00
|
|
|
for (start_pos+= var_pack_length ; start_pos < end ; start_pos++)
|
2006-04-11 15:45:10 +02:00
|
|
|
{
|
|
|
|
DBUG_PRINT("fields",
|
|
|
|
("value: 0x%02x code: 0x%s bits: %2u bin: %s",
|
|
|
|
(uchar) *start_pos,
|
|
|
|
hexdigits(tree->code[(uchar) *start_pos]),
|
|
|
|
(uint) tree->code_len[(uchar) *start_pos],
|
|
|
|
bindigits(tree->code[(uchar) *start_pos],
|
|
|
|
(uint)tree->code_len[(uchar) *start_pos])));
|
|
|
|
write_bits(tree->code[(uchar) *start_pos],
|
|
|
|
(uint) tree->code_len[(uchar) *start_pos]);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
start_pos= end_pos;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case FIELD_LAST:
|
|
|
|
case FIELD_enum_val_count:
|
|
|
|
abort(); /* Impossible */
|
|
|
|
}
|
|
|
|
start_pos+=count->max_zero_fill;
|
|
|
|
DBUG_PRINT("fields", ("---"));
|
|
|
|
}
|
|
|
|
flush_bits();
|
2009-01-09 05:23:25 +01:00
|
|
|
length=(ulong) (file_buffer.pos - record_pos) - max_pack_length;
|
2006-04-11 15:45:10 +02:00
|
|
|
pack_length= _ma_save_pack_length(pack_version, record_pos, length);
|
|
|
|
if (pack_blob_length)
|
2007-01-18 20:38:14 +01:00
|
|
|
pack_length+= _ma_save_pack_length(pack_version,
|
|
|
|
record_pos + pack_length,
|
|
|
|
tot_blob_length);
|
2006-04-11 15:45:10 +02:00
|
|
|
DBUG_PRINT("fields", ("record: %lu length: %lu blob-length: %lu "
|
|
|
|
"length-bytes: %lu", (ulong) record_count, length,
|
|
|
|
tot_blob_length, pack_length));
|
|
|
|
DBUG_PRINT("fields", ("==="));
|
|
|
|
|
|
|
|
/* Correct file buffer if the header was smaller */
|
|
|
|
if (pack_length != max_pack_length)
|
|
|
|
{
|
|
|
|
bmove(record_pos+pack_length,record_pos+max_pack_length,length);
|
|
|
|
file_buffer.pos-= (max_pack_length-pack_length);
|
|
|
|
}
|
|
|
|
if (length < (ulong) min_record_length)
|
|
|
|
min_record_length=(uint) length;
|
|
|
|
if (length > (ulong) max_record_length)
|
|
|
|
max_record_length=(uint) length;
|
|
|
|
record_count++;
|
|
|
|
if (write_loop && record_count % WRITE_COUNT == 0)
|
|
|
|
{
|
2011-04-25 17:22:25 +02:00
|
|
|
printf("%lu\r", (ulong) record_count);
|
|
|
|
fflush(stdout);
|
2006-04-11 15:45:10 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
else if (error != HA_ERR_RECORD_DELETED)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (error == HA_ERR_END_OF_FILE)
|
|
|
|
error=0;
|
|
|
|
else
|
|
|
|
{
|
2011-04-25 17:22:25 +02:00
|
|
|
fprintf(stderr, "%s: Got error %d reading records\n", my_progname, error);
|
2006-04-11 15:45:10 +02:00
|
|
|
}
|
|
|
|
if (verbose >= 2)
|
2011-04-25 17:22:25 +02:00
|
|
|
printf("wrote %s records.\n", llstr((longlong) record_count, llbuf));
|
2006-04-11 15:45:10 +02:00
|
|
|
|
2015-08-11 13:03:25 +02:00
|
|
|
my_safe_afree(record, isam_file->s->base.reclength);
|
2006-04-11 15:45:10 +02:00
|
|
|
mrg->ref_length=max_pack_length;
|
|
|
|
mrg->min_pack_length=max_record_length ? min_record_length : 0;
|
|
|
|
mrg->max_pack_length=max_record_length;
|
|
|
|
DBUG_RETURN(error || error_on_write || flush_buffer(~(ulong) 0));
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static char *make_new_name(char *new_name, char *old_name)
|
|
|
|
{
|
|
|
|
return fn_format(new_name,old_name,"",DATA_TMP_EXT,2+4);
|
|
|
|
}
|
|
|
|
|
|
|
|
static char *make_old_name(char *new_name, char *old_name)
|
|
|
|
{
|
|
|
|
return fn_format(new_name,old_name,"",OLD_EXT,2+4);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* rutines for bit writing buffer */
|
|
|
|
|
|
|
|
static void init_file_buffer(File file, pbool read_buffer)
|
|
|
|
{
|
|
|
|
file_buffer.file=file;
|
|
|
|
file_buffer.buffer= (uchar*) my_malloc(ALIGN_SIZE(RECORD_CACHE_SIZE),
|
|
|
|
MYF(MY_WME));
|
|
|
|
file_buffer.end=file_buffer.buffer+ALIGN_SIZE(RECORD_CACHE_SIZE)-8;
|
|
|
|
file_buffer.pos_in_file=0;
|
|
|
|
error_on_write=0;
|
|
|
|
if (read_buffer)
|
|
|
|
{
|
|
|
|
|
|
|
|
file_buffer.pos=file_buffer.end;
|
|
|
|
file_buffer.bits=0;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
file_buffer.pos=file_buffer.buffer;
|
|
|
|
file_buffer.bits=BITS_SAVED;
|
|
|
|
}
|
|
|
|
file_buffer.bitbucket= 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static int flush_buffer(ulong neaded_length)
|
|
|
|
{
|
|
|
|
ulong length;
|
|
|
|
|
|
|
|
/*
|
|
|
|
file_buffer.end is 8 bytes lower than the real end of the buffer.
|
|
|
|
This is done so that the end-of-buffer condition does not need to be
|
2007-07-02 19:45:15 +02:00
|
|
|
checked for every uchar (see write_bits()). Consequently,
|
2006-04-11 15:45:10 +02:00
|
|
|
file_buffer.pos can become greater than file_buffer.end. The
|
|
|
|
algorithms in the other functions ensure that there will never be
|
|
|
|
more than 8 bytes written to the buffer without an end-of-buffer
|
|
|
|
check. So the buffer cannot be overrun. But we need to check for the
|
|
|
|
near-to-buffer-end condition to avoid a negative result, which is
|
|
|
|
casted to unsigned and thus becomes giant.
|
|
|
|
*/
|
|
|
|
if ((file_buffer.pos < file_buffer.end) &&
|
|
|
|
((ulong) (file_buffer.end - file_buffer.pos) > neaded_length))
|
|
|
|
return 0;
|
|
|
|
length=(ulong) (file_buffer.pos-file_buffer.buffer);
|
|
|
|
file_buffer.pos=file_buffer.buffer;
|
|
|
|
file_buffer.pos_in_file+=length;
|
|
|
|
if (test_only)
|
|
|
|
return 0;
|
|
|
|
if (error_on_write|| my_write(file_buffer.file,
|
2007-07-02 19:45:15 +02:00
|
|
|
(const uchar*) file_buffer.buffer,
|
2006-04-11 15:45:10 +02:00
|
|
|
length,
|
|
|
|
MYF(MY_WME | MY_NABP | MY_WAIT_IF_FULL)))
|
|
|
|
{
|
|
|
|
error_on_write=1;
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (neaded_length != ~(ulong) 0 &&
|
|
|
|
(ulong) (file_buffer.end-file_buffer.buffer) < neaded_length)
|
|
|
|
{
|
2009-01-09 05:23:25 +01:00
|
|
|
uchar *tmp;
|
2006-04-11 15:45:10 +02:00
|
|
|
neaded_length+=256; /* some margin */
|
2009-01-09 05:23:25 +01:00
|
|
|
tmp= (uchar*) my_realloc(file_buffer.buffer, neaded_length,MYF(MY_WME));
|
2006-04-11 15:45:10 +02:00
|
|
|
if (!tmp)
|
|
|
|
return 1;
|
2009-01-09 05:23:25 +01:00
|
|
|
file_buffer.pos= (tmp + (ulong) (file_buffer.pos - file_buffer.buffer));
|
|
|
|
file_buffer.buffer= tmp;
|
|
|
|
file_buffer.end= (tmp+neaded_length-8);
|
2006-04-11 15:45:10 +02:00
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static void end_file_buffer(void)
|
|
|
|
{
|
2011-04-25 17:22:25 +02:00
|
|
|
my_free(file_buffer.buffer);
|
2006-04-11 15:45:10 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/* output `bits` low bits of `value' */
|
|
|
|
|
|
|
|
static void write_bits(register ulonglong value, register uint bits)
|
|
|
|
{
|
|
|
|
DBUG_ASSERT(((bits < 8 * sizeof(value)) && ! (value >> bits)) ||
|
|
|
|
(bits == 8 * sizeof(value)));
|
|
|
|
|
|
|
|
if ((file_buffer.bits-= (int) bits) >= 0)
|
|
|
|
{
|
|
|
|
file_buffer.bitbucket|= value << file_buffer.bits;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
reg3 ulonglong bit_buffer;
|
|
|
|
bits= (uint) -file_buffer.bits;
|
|
|
|
bit_buffer= (file_buffer.bitbucket |
|
|
|
|
((bits != 8 * sizeof(value)) ? (value >> bits) : 0));
|
|
|
|
#if BITS_SAVED == 64
|
|
|
|
*file_buffer.pos++= (uchar) (bit_buffer >> 56);
|
|
|
|
*file_buffer.pos++= (uchar) (bit_buffer >> 48);
|
|
|
|
*file_buffer.pos++= (uchar) (bit_buffer >> 40);
|
|
|
|
*file_buffer.pos++= (uchar) (bit_buffer >> 32);
|
|
|
|
#endif
|
|
|
|
*file_buffer.pos++= (uchar) (bit_buffer >> 24);
|
|
|
|
*file_buffer.pos++= (uchar) (bit_buffer >> 16);
|
|
|
|
*file_buffer.pos++= (uchar) (bit_buffer >> 8);
|
|
|
|
*file_buffer.pos++= (uchar) (bit_buffer);
|
|
|
|
|
|
|
|
if (bits != 8 * sizeof(value))
|
|
|
|
value&= (((ulonglong) 1) << bits) - 1;
|
|
|
|
if (file_buffer.pos >= file_buffer.end)
|
2011-04-25 17:22:25 +02:00
|
|
|
flush_buffer(~ (ulong) 0);
|
2006-04-11 15:45:10 +02:00
|
|
|
file_buffer.bits=(int) (BITS_SAVED - bits);
|
|
|
|
file_buffer.bitbucket= value << (BITS_SAVED - bits);
|
|
|
|
}
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Flush bits in bit_buffer to buffer */
|
|
|
|
|
|
|
|
static void flush_bits(void)
|
|
|
|
{
|
|
|
|
int bits;
|
|
|
|
ulonglong bit_buffer;
|
|
|
|
|
|
|
|
bits= file_buffer.bits & ~7;
|
|
|
|
bit_buffer= file_buffer.bitbucket >> bits;
|
|
|
|
bits= BITS_SAVED - bits;
|
|
|
|
while (bits > 0)
|
|
|
|
{
|
|
|
|
bits-= 8;
|
|
|
|
*file_buffer.pos++= (uchar) (bit_buffer >> bits);
|
|
|
|
}
|
2006-12-20 18:58:35 +01:00
|
|
|
if (file_buffer.pos >= file_buffer.end)
|
2011-04-25 17:22:25 +02:00
|
|
|
flush_buffer(~ (ulong) 0);
|
2006-04-11 15:45:10 +02:00
|
|
|
file_buffer.bits= BITS_SAVED;
|
|
|
|
file_buffer.bitbucket= 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/****************************************************************************
|
|
|
|
** functions to handle the joined files
|
|
|
|
****************************************************************************/
|
|
|
|
|
2007-01-18 20:38:14 +01:00
|
|
|
static int save_state(MARIA_HA *isam_file,PACK_MRG_INFO *mrg,
|
|
|
|
my_off_t new_length,
|
2006-04-11 15:45:10 +02:00
|
|
|
ha_checksum crc)
|
|
|
|
{
|
|
|
|
MARIA_SHARE *share=isam_file->s;
|
|
|
|
uint options=mi_uint2korr(share->state.header.options);
|
|
|
|
uint key;
|
|
|
|
DBUG_ENTER("save_state");
|
|
|
|
|
|
|
|
options|= HA_OPTION_COMPRESS_RECORD | HA_OPTION_READ_ONLY_DATA;
|
|
|
|
mi_int2store(share->state.header.options,options);
|
2007-04-19 12:18:56 +02:00
|
|
|
/* Save the original file type of we have to undo the packing later */
|
2007-01-18 20:38:14 +01:00
|
|
|
share->state.header.org_data_file_type= share->state.header.data_file_type;
|
|
|
|
share->state.header.data_file_type= COMPRESSED_RECORD;
|
2006-04-11 15:45:10 +02:00
|
|
|
|
|
|
|
share->state.state.data_file_length=new_length;
|
|
|
|
share->state.state.del=0;
|
|
|
|
share->state.state.empty=0;
|
|
|
|
share->state.dellink= HA_OFFSET_ERROR;
|
|
|
|
share->state.split=(ha_rows) mrg->records;
|
|
|
|
share->state.version=(ulong) time((time_t*) 0);
|
2007-12-04 22:23:42 +01:00
|
|
|
if (share->base.born_transactional)
|
|
|
|
share->state.create_rename_lsn= share->state.is_of_horizon=
|
Fix for BUG#37876 "Importing Maria table from other server via binary copy does not work":
- after auto-zerofill (ha_maria::check_and_repair()) kepts its state's LSNs unchanged, which could
be the same as the create_rename_lsn of another pre-existing table, which would break versioning as this LSN
serves as unique identifier in the versioning code (in maria_open()). Even the state pieces which
maria_zerofill() did change were lost (because they didn't go to disk).
- after this fix, if two tables were auto-zerofilled at the same time (by _ma_mark_changed())
they could receive the same create_rename_lsn, which would break versioning again. Fix is to write a log
record each time a table is imported.
- Print state's LSNs (create_rename_lsn, is_of_horizon, skip_redo_lsn) and UUID in maria_chk -dvv.
mysql-test/r/maria-autozerofill.result:
result
mysql-test/t/maria-autozerofill.test:
Test for auto-zerofilling
storage/maria/ha_maria.cc:
The state changes done by auto-zerofilling never reached disk.
storage/maria/ma_check.c:
When zerofilling a table, including its pages' LSNs, new state LSNs are needed next time the table
is imported into a Maria instance.
storage/maria/ma_create.c:
Write LOGREC_IMPORTED_TABLE when importing a table. This is informative and ensures
that the table gets a unique create_rename_lsn even though multiple tables
are imported by concurrent threads (it advances the log's end LSN).
storage/maria/ma_key_recover.c:
comment
storage/maria/ma_locking.c:
instead of using translog_get_horizon() for state's LSNs of imported table,
use the LSN of to-be-written LOGREC_IMPORTED_TABLE.
storage/maria/ma_loghandler.c:
New type of log record
storage/maria/ma_loghandler.h:
New type of log record
storage/maria/ma_loghandler_lsn.h:
New name for constant as can be used not only by maria_chk but auto-zerofill now too.
storage/maria/ma_open.c:
instead of using translog_get_horizon() for state's LSNs of imported table,
use the LSN of to-be-written LOGREC_IMPORTED_TABLE.
storage/maria/ma_recovery.c:
print content of LOGREC_IMPORTED_TABLE in maria_read_log.
storage/maria/maria_chk.c:
print info about LSNs of the table's state, and UUID, when maria_chk -dvv
storage/maria/maria_pack.c:
new name for constant
storage/maria/unittest/ma_test_recovery.pl:
Now that maria_chk -dvv shows state LSNs and UUID those need to be filtered out,
as maria_read_log -a does not use the same as at original run.
2008-07-09 11:02:27 +02:00
|
|
|
share->state.skip_redo_lsn= LSN_NEEDS_NEW_STATE_LSNS;
|
2006-04-11 15:45:10 +02:00
|
|
|
if (! maria_is_all_keys_active(share->state.key_map, share->base.keys))
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
Some indexes are disabled, cannot use current key_file_length value
|
|
|
|
as an estimate of upper bound of index file size. Use packed data file
|
|
|
|
size instead.
|
|
|
|
*/
|
|
|
|
share->state.state.key_file_length= new_length;
|
|
|
|
}
|
|
|
|
/*
|
|
|
|
If there are no disabled indexes, keep key_file_length value from
|
2010-09-12 18:40:01 +02:00
|
|
|
original file so "aria_chk -rq" can use this value (this is necessary
|
2006-04-11 15:45:10 +02:00
|
|
|
because index size cannot be easily calculated for fulltext keys)
|
|
|
|
*/
|
|
|
|
maria_clear_all_keys_active(share->state.key_map);
|
|
|
|
for (key=0 ; key < share->base.keys ; key++)
|
|
|
|
share->state.key_root[key]= HA_OFFSET_ERROR;
|
2007-01-18 20:38:14 +01:00
|
|
|
share->state.key_del= HA_OFFSET_ERROR;
|
WL#3138: Maria - fast "SELECT COUNT(*) FROM t;" and "CHECKSUM TABLE t"
Added argument to maria_end_bulk_insert() to know if the table will be deleted after the operation
Fixed wrong call to strmake
Don't call bulk insert in case of inserting only one row (speed optimization as starting/stopping bulk insert
Allow storing year 2155 in year field
When running with purify/valgrind avoid copying structures over themself
Added hook 'trnnam_end_trans_hook' that is called when transaction ends
Added trn->used_tables that is used to an entry for all tables used by transaction
Fixed that ndb doesn't crash on duplicate key error when start_bulk_insert/end_bulk_insert are not called
include/maria.h:
Added argument to maria_end_bulk_insert() to know if the table will be deleted after the operation
include/my_tree.h:
Added macro 'reset_free_element()' to be able to ignore calls to the external free function.
Is used to optimize end-bulk-insert in case of failures, in which case we don't want write the remaining keys in the tree
mysql-test/install_test_db.sh:
Upgrade to new mysql_install_db options
mysql-test/r/maria-mvcc.result:
New tests
mysql-test/r/maria.result:
New tests
mysql-test/suite/ndb/r/ndb_auto_increment.result:
Fixed error message now when bulk insert is not always called
mysql-test/suite/ndb/t/ndb_auto_increment.test:
Fixed error message now when bulk insert is not always called
mysql-test/t/maria-mvcc.test:
Added testing of versioning of count(*)
mysql-test/t/maria-page-checksum.test:
Added comment
mysql-test/t/maria.test:
More tests
mysys/hash.c:
Code style change
sql/field.cc:
Allow storing year 2155 in year field
sql/ha_ndbcluster.cc:
Added new argument to end_bulk_insert() to signal if the bulk insert should ignored
sql/ha_ndbcluster.h:
Added new argument to end_bulk_insert() to signal if the bulk insert should ignored
sql/ha_partition.cc:
Added new argument to end_bulk_insert() to signal if the bulk insert should ignored
sql/ha_partition.h:
Added new argument to end_bulk_insert() to signal if the bulk insert should ignored
sql/handler.cc:
Don't call get_dup_key() if there is no table object. This can happen if the handler generates a duplicate key error on commit
sql/handler.h:
Added new argument to end_bulk_insert() to signal if the bulk insert should ignored (ie, the table will be deleted)
sql/item.cc:
Style fix
Removed compiler warning
sql/log_event.cc:
Added new argument to ha_end_bulk_insert()
sql/log_event_old.cc:
Added new argument to ha_end_bulk_insert()
sql/mysqld.cc:
Removed compiler warning
sql/protocol.cc:
Added DBUG
sql/sql_class.cc:
Added DBUG
Fixed wrong call to strmake
sql/sql_insert.cc:
Don't call bulk insert in case of inserting only one row (speed optimization as starting/stopping bulk insert involves a lot of if's)
Added new argument to ha_end_bulk_insert()
sql/sql_load.cc:
Added new argument to ha_end_bulk_insert()
sql/sql_parse.cc:
Style fixes
Avoid goto in common senario
sql/sql_select.cc:
When running with purify/valgrind avoid copying structures over themself. This is not a real bug in itself, but it's a waste of cycles and causes valgrind warnings
sql/sql_select.h:
Avoid copying structures over themself. This is not a real bug in itself, but it's a waste of cycles and causes valgrind warnings
sql/sql_table.cc:
Call HA_EXTRA_PREPARE_FOR_DROP if table created by ALTER TABLE is going to be dropped
Added new argument to ha_end_bulk_insert()
storage/archive/ha_archive.cc:
Added new argument to end_bulk_insert()
storage/archive/ha_archive.h:
Added new argument to end_bulk_insert()
storage/federated/ha_federated.cc:
Added new argument to end_bulk_insert()
storage/federated/ha_federated.h:
Added new argument to end_bulk_insert()
storage/maria/Makefile.am:
Added ma_state.c and ma_state.h
storage/maria/ha_maria.cc:
Versioning of count(*) and checksum
- share->state.state is now assumed to be correct, not handler->state
- Call _ma_setup_live_state() in external lock to get count(*)/checksum versioning. In case of
not versioned and not concurrent insertable table, file->s->state.state contains the correct state information
Other things:
- file->s -> share
- Added DBUG_ASSERT() for unlikely case
- Optimized end_bulk_insert() to not write anything if table is going to be deleted (as in failed alter table)
- Indentation changes in external_lock becasue of removed 'goto' caused a big conflict even if very little was changed
storage/maria/ha_maria.h:
New argument to end_bulk_insert()
storage/maria/ma_blockrec.c:
Update for versioning of count(*) and checksum
Keep share->state.state.data_file_length up to date (not info->state->data_file_length)
Moved _ma_block_xxxx_status() and maria_versioning() functions to ma_state.c
storage/maria/ma_check.c:
Update and use share->state.state instead of info->state
info->s to share
Update info->state at end of repair
Call _ma_reset_state() to update share->state_history at end of repair
storage/maria/ma_checkpoint.c:
Call _ma_remove_not_visible_states() on checkpoint to clean up not visible state history from tables
storage/maria/ma_close.c:
Remember state history for running transaction even if table is closed
storage/maria/ma_commit.c:
Ensure we always call trnman_commit_trn() even if other calls fails. If we don't do that, the translog and state structures will not be freed
storage/maria/ma_delete.c:
Versioning of count(*) and checksum:
- Always update info->state->checksum and info->state->records
storage/maria/ma_delete_all.c:
Versioning of count(*) and checksum:
- Ensure that share->state.state is updated, as here is where we store the primary information
storage/maria/ma_dynrec.c:
Use lock_key_trees instead of concurrent_insert to check if trees should be locked.
This allows us to lock trees both for concurrent_insert and for index versioning.
storage/maria/ma_extra.c:
Versioning of count(*) and checksum:
- Use share->state.state instead of info->state
- share->concurrent_insert -> share->non_transactional_concurrent_insert
- Don't update share->state.state from info->state if transactional table
Optimization:
- Don't flush io_cache or bitmap if we are using FLUSH_IGNORE_CHANGED
storage/maria/ma_info.c:
Get most state information from current state
storage/maria/ma_init.c:
Add hash table and free function to store states for closed tables
Install hook for transaction commit/rollback to update history state
storage/maria/ma_key_recover.c:
Versioning of count(*) and checksum:
- Use share->state.state instead of info->state
storage/maria/ma_locking.c:
Versioning of count(*) and checksum:
- Call virtual functions (if exists) to restore/update status
- Move _ma_xxx_status() functions to ma_state.c
info->s -> share
storage/maria/ma_open.c:
Versioning of count(*) and checksum:
- For not transactional tables, set info->state to point to new allocated state structure.
- Initialize new info->state_start variable that points to state at start of transaction
- Copy old history states from hash table (maria_stored_states) first time the table is opened
- Split flag share->concurrent_insert to non_transactional_concurrent_insert & lock_key_tree
- For now, only enable versioning of tables without keys (to be fixed in soon!)
- Added new virtual function to restore status in maria_lock_database)
More DBUG
storage/maria/ma_page.c:
Versioning of count(*) and checksum:
- Use share->state.state instead of info->state
- Modify share->state.state.key_file_length under share->intern_lock
storage/maria/ma_range.c:
Versioning of count(*) and checksum:
- Lock trees based on share->lock_key_trees
info->s -> share
storage/maria/ma_recovery.c:
Versioning of count(*) and checksum:
- Use share->state.state instead of info->state
- Update state information on close and when reenabling logging
storage/maria/ma_rkey.c:
Versioning of count(*) and checksum:
- Lock trees based on share->lock_key_trees
storage/maria/ma_rnext.c:
Versioning of count(*) and checksum:
- Lock trees based on share->lock_key_trees
storage/maria/ma_rnext_same.c:
Versioning of count(*) and checksum:
- Lock trees based on share->lock_key_trees
- Only skip rows based on file length if non_transactional_concurrent_insert is set
storage/maria/ma_rprev.c:
Versioning of count(*) and checksum:
- Lock trees based on share->lock_key_trees
storage/maria/ma_rsame.c:
Versioning of count(*) and checksum:
- Lock trees based on share->lock_key_trees
storage/maria/ma_sort.c:
Use share->state.state instead of info->state
Fixed indentation
storage/maria/ma_static.c:
Added maria_stored_state
storage/maria/ma_update.c:
Versioning of count(*) and checksum:
- Always update info->state->checksum and info->state->records
- Remove optimization for index file update as it doesn't work for transactional tables
storage/maria/ma_write.c:
Versioning of count(*) and checksum:
- Always update info->state->checksum and info->state->records
storage/maria/maria_def.h:
Move MARIA_STATUS_INFO to ma_state.h
Changes to MARIA_SHARE:
- Added state_history to store count(*)/checksum states
- Added in_trans as counter if table is used by running transactions
- Split concurrent_insert into lock_key_trees and on_transactional_concurrent_insert.
- Added virtual function lock_restore_status
Changes to MARIA_HA:
- save_state -> state_save
- Added state_start to store state at start of transaction
storage/maria/maria_pack.c:
Versioning of count(*) and checksum:
- Use share->state.state instead of info->state
Indentation fixes
storage/maria/trnman.c:
Added hook 'trnnam_end_trans_hook' that is called when transaction ends
Added trn->used_tables that is used to an entry for all tables used by transaction
More DBUG
Changed return type of trnman_end_trn() to my_bool
Added trnman_get_min_trid() to get minimum trid in use.
Added trnman_exists_active_transactions() to check if there exist a running transaction started between two commit id
storage/maria/trnman.h:
Added 'used_tables'
Moved all pointers into same groups to get better memory alignment
storage/maria/trnman_public.h:
Added prototypes for new functions and variables
Chagned return type of trnman_end_trn() to my_bool
storage/myisam/ha_myisam.cc:
Added argument to end_bulk_insert() if operation should be aborted
storage/myisam/ha_myisam.h:
Added argument to end_bulk_insert() if operation should be aborted
storage/maria/ma_state.c:
Functions to handle state of count(*) and checksum
storage/maria/ma_state.h:
Structures and declarations to handle state of count(*) and checksum
2008-05-29 17:33:33 +02:00
|
|
|
share->state.state.checksum= crc; /* Save crc in file */
|
2006-04-11 15:45:10 +02:00
|
|
|
share->changed=1; /* Force write of header */
|
|
|
|
share->state.open_count=0;
|
|
|
|
share->global_changed=0;
|
2011-04-25 17:22:25 +02:00
|
|
|
my_chsize(share->kfile.file, share->base.keystart, 0, MYF(0));
|
2006-04-11 15:45:10 +02:00
|
|
|
if (share->base.keys)
|
|
|
|
isamchk_neaded=1;
|
- WL#3072 Maria Recovery:
Recovery of state.records (the count of records which is stored into
the header of the index file). For that, state.is_of_lsn is introduced;
logic is explained in ma_recovery.c (look for "Recovery of the state").
The net gain is that in case of crash, we now recover state.records,
and it is idempotent (ma_test_recovery tests it).
state.checksum is not recovered yet, mail sent for discussion.
- WL#3071 Maria Checkpoint: preparation for it, by protecting
all modifications of the state in memory or on disk with intern_lock
(with the exception of the really-often-modified state.records,
which is now protected with the log's lock, see ma_recovery.c
(look for "Recovery of the state"). Also, if maria_close() sees that
Checkpoint is looking at this table it will not my_free() the share.
- don't compute row's checksum twice in case of UPDATE (correction
to a bugfix I made yesterday).
storage/maria/ha_maria.cc:
protect state write with intern_lock (against Checkpoint)
storage/maria/ma_blockrec.c:
* don't reset trn->rec_lsn in _ma_unpin_all_pages(), because it
should wait until we have corrected the allocation in the bitmap
(as the REDO can serve to correct the allocation during Recovery);
introducing _ma_finalize_row() for that.
* In a changeset yesterday I moved computation of the checksum
into write_block_record(), to fix a bug in UPDATE. Now I notice
that maria_update() already computes the checksum, it's just that
it puts it into info->cur_row while _ma_update_block_record()
uses info->new_row; so, removing the checksum computation from
write_block_record(), putting it back into allocate_and_write_block_record()
(which is called only by INSERT and UNDO_DELETE), and copying
cur_row->checksum into new_row->checksum in _ma_update_block_record().
storage/maria/ma_check.c:
new prototypes, they will take intern_lock when writing the state;
also take intern_lock when changing share->kfile. In both cases
this is to protect against Checkpoint reading/writing the state or reading
kfile at the same time.
Not updating create_rename_lsn directly at end of write_log_record_for_repair()
as it wouldn't have intern_lock.
storage/maria/ma_close.c:
Checkpoint builds a list of shares (under THR_LOCK_maria), then it
handles each such share (under intern_lock) (doing flushing etc);
if maria_close() freed this share between the two, Checkpoint
would see a bad pointer. To avoid this, when building the list Checkpoint
marks each share, so that maria_close() knows it should not free it
and Checkpoint will free it itself.
Extending the zone covered by intern_lock to protect against
Checkpoint reading kfile, writing state.
storage/maria/ma_create.c:
When we update create_rename_lsn, we also update is_of_lsn to
the same value: it is logical, and allows us to test in maria_open()
that the former is not bigger than the latter (the contrary is a sign
of index header corruption, or severe logging bug which hinders
Recovery, table needs a repair).
_ma_update_create_rename_lsn_on_disk() also writes is_of_lsn;
it now operates under intern_lock (protect against Checkpoint),
a shortcut function is available for cases where acquiring
intern_lock is not needed (table's creation or first open).
storage/maria/ma_delete.c:
if table is transactional, "records" is already decremented
when logging UNDO_ROW_DELETE.
storage/maria/ma_delete_all.c:
comments
storage/maria/ma_extra.c:
Protect modifications of the state, in memory and/or on disk,
with intern_lock, against a concurrent Checkpoint.
When state goes to disk, update it's is_of_lsn (by calling
the new _ma_state_info_write()).
In HA_EXTRA_FORCE_REOPEN, don't set share->changed to 0 (undoing
a change I made a few days ago) and ASK_MONTY
storage/maria/ma_locking.c:
no real code change here.
storage/maria/ma_loghandler.c:
Log-write-hooks for updating "state.records" under log's mutex
when writing/updating/deleting a row or deleting all rows.
storage/maria/ma_loghandler_lsn.h:
merge (make LSN_ERROR and LSN_REPAIRED_BY_MARIA_CHK different)
storage/maria/ma_open.c:
When opening a table verify that is_of_lsn >= create_rename_lsn; if
false the header must be corrupted.
_ma_state_info_write() is split in two: _ma_state_info_write_sub()
which is the old _ma_state_info_write(), and _ma_state_info_write()
which additionally takes intern_lock if requested (to protect
against Checkpoint) and updates is_of_lsn.
_ma_open_keyfile() should change kfile.file under intern_lock
to protect Checkpoint from reading a wrong kfile.file.
storage/maria/ma_recovery.c:
Recovery of state.records: when the REDO phase sees UNDO_ROW_INSERT
which has a LSN > state.is_of_lsn it increments state.records.
Same for UNDO_ROW_DELETE and UNDO_ROW_PURGE.
When closing a table during Recovery, we know its state is at least
as new as the current log record we are looking at, so increase
is_of_lsn to the LSN of the current log record.
storage/maria/ma_rename.c:
update for new behaviour of _ma_update_create_rename_lsn_on_disk().
storage/maria/ma_test1.c:
update to new prototype
storage/maria/ma_test2.c:
update to new prototype (actually prototype was changed days ago,
but compiler does not complain about the extra argument??)
storage/maria/ma_test_recovery.expected:
new result file of ma_test_recovery. Improvements: record
count read from index's header is now always correct.
storage/maria/ma_test_recovery:
"rm" fails if file does not exist. Redirect stderr of script.
storage/maria/ma_write.c:
if table is transactional, "records" is already incremented when
logging UNDO_ROW_INSERT. Comments.
storage/maria/maria_chk.c:
update is_of_lsn too
storage/maria/maria_def.h:
- MARIA_STATE_INFO::is_of_lsn which is used by Recovery. It is stored
into the index file's header.
- Checkpoint can now mark a table as "don't free this", and maria_close()
can reply "ok then you will free it".
- new functions
storage/maria/maria_pack.c:
update for new name
2007-09-07 15:02:30 +02:00
|
|
|
DBUG_RETURN(_ma_state_info_write_sub(share->kfile.file,
|
2009-01-08 09:20:04 +01:00
|
|
|
&share->state,
|
|
|
|
MA_STATE_INFO_WRITE_DONT_MOVE_OFFSET |
|
|
|
|
MA_STATE_INFO_WRITE_FULL_INFO));
|
2006-04-11 15:45:10 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static int save_state_mrg(File file,PACK_MRG_INFO *mrg,my_off_t new_length,
|
|
|
|
ha_checksum crc)
|
|
|
|
{
|
|
|
|
MARIA_STATE_INFO state;
|
|
|
|
MARIA_HA *isam_file=mrg->file[0];
|
|
|
|
uint options;
|
|
|
|
DBUG_ENTER("save_state_mrg");
|
|
|
|
|
|
|
|
state= isam_file->s->state;
|
|
|
|
options= (mi_uint2korr(state.header.options) | HA_OPTION_COMPRESS_RECORD |
|
|
|
|
HA_OPTION_READ_ONLY_DATA);
|
|
|
|
mi_int2store(state.header.options,options);
|
2008-01-23 04:00:17 +01:00
|
|
|
/* Save the original file type of we have to undo the packing later */
|
|
|
|
state.header.org_data_file_type= state.header.data_file_type;
|
|
|
|
state.header.data_file_type= COMPRESSED_RECORD;
|
|
|
|
|
2006-04-11 15:45:10 +02:00
|
|
|
state.state.data_file_length=new_length;
|
|
|
|
state.state.del=0;
|
|
|
|
state.state.empty=0;
|
|
|
|
state.state.records=state.split=(ha_rows) mrg->records;
|
2008-01-23 04:00:17 +01:00
|
|
|
state.create_rename_lsn= state.is_of_horizon= state.skip_redo_lsn=
|
Fix for BUG#37876 "Importing Maria table from other server via binary copy does not work":
- after auto-zerofill (ha_maria::check_and_repair()) kepts its state's LSNs unchanged, which could
be the same as the create_rename_lsn of another pre-existing table, which would break versioning as this LSN
serves as unique identifier in the versioning code (in maria_open()). Even the state pieces which
maria_zerofill() did change were lost (because they didn't go to disk).
- after this fix, if two tables were auto-zerofilled at the same time (by _ma_mark_changed())
they could receive the same create_rename_lsn, which would break versioning again. Fix is to write a log
record each time a table is imported.
- Print state's LSNs (create_rename_lsn, is_of_horizon, skip_redo_lsn) and UUID in maria_chk -dvv.
mysql-test/r/maria-autozerofill.result:
result
mysql-test/t/maria-autozerofill.test:
Test for auto-zerofilling
storage/maria/ha_maria.cc:
The state changes done by auto-zerofilling never reached disk.
storage/maria/ma_check.c:
When zerofilling a table, including its pages' LSNs, new state LSNs are needed next time the table
is imported into a Maria instance.
storage/maria/ma_create.c:
Write LOGREC_IMPORTED_TABLE when importing a table. This is informative and ensures
that the table gets a unique create_rename_lsn even though multiple tables
are imported by concurrent threads (it advances the log's end LSN).
storage/maria/ma_key_recover.c:
comment
storage/maria/ma_locking.c:
instead of using translog_get_horizon() for state's LSNs of imported table,
use the LSN of to-be-written LOGREC_IMPORTED_TABLE.
storage/maria/ma_loghandler.c:
New type of log record
storage/maria/ma_loghandler.h:
New type of log record
storage/maria/ma_loghandler_lsn.h:
New name for constant as can be used not only by maria_chk but auto-zerofill now too.
storage/maria/ma_open.c:
instead of using translog_get_horizon() for state's LSNs of imported table,
use the LSN of to-be-written LOGREC_IMPORTED_TABLE.
storage/maria/ma_recovery.c:
print content of LOGREC_IMPORTED_TABLE in maria_read_log.
storage/maria/maria_chk.c:
print info about LSNs of the table's state, and UUID, when maria_chk -dvv
storage/maria/maria_pack.c:
new name for constant
storage/maria/unittest/ma_test_recovery.pl:
Now that maria_chk -dvv shows state LSNs and UUID those need to be filtered out,
as maria_read_log -a does not use the same as at original run.
2008-07-09 11:02:27 +02:00
|
|
|
LSN_NEEDS_NEW_STATE_LSNS;
|
2008-01-23 04:00:17 +01:00
|
|
|
|
2006-04-11 15:45:10 +02:00
|
|
|
/* See comment above in save_state about key_file_length handling. */
|
|
|
|
if (mrg->src_file_has_indexes_disabled)
|
|
|
|
{
|
|
|
|
isam_file->s->state.state.key_file_length=
|
2013-03-25 23:03:13 +01:00
|
|
|
MY_MAX(isam_file->s->state.state.key_file_length, new_length);
|
2006-04-11 15:45:10 +02:00
|
|
|
}
|
|
|
|
state.dellink= HA_OFFSET_ERROR;
|
|
|
|
state.version=(ulong) time((time_t*) 0);
|
|
|
|
maria_clear_all_keys_active(state.key_map);
|
|
|
|
state.state.checksum=crc;
|
|
|
|
if (isam_file->s->base.keys)
|
|
|
|
isamchk_neaded=1;
|
|
|
|
state.changed=STATE_CHANGED | STATE_NOT_ANALYZED; /* Force check of table */
|
2009-01-08 09:20:04 +01:00
|
|
|
DBUG_RETURN (_ma_state_info_write_sub(file, &state,
|
|
|
|
MA_STATE_INFO_WRITE_DONT_MOVE_OFFSET |
|
|
|
|
MA_STATE_INFO_WRITE_FULL_INFO));
|
2006-04-11 15:45:10 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/* reset for mrg_rrnd */
|
|
|
|
|
|
|
|
static void mrg_reset(PACK_MRG_INFO *mrg)
|
|
|
|
{
|
|
|
|
if (mrg->current)
|
|
|
|
{
|
|
|
|
maria_extra(*mrg->current, HA_EXTRA_NO_CACHE, 0);
|
|
|
|
mrg->current=0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2007-07-02 19:45:15 +02:00
|
|
|
static int mrg_rrnd(PACK_MRG_INFO *info,uchar *buf)
|
2006-04-11 15:45:10 +02:00
|
|
|
{
|
|
|
|
int error;
|
|
|
|
MARIA_HA *isam_info;
|
|
|
|
|
|
|
|
if (!info->current)
|
|
|
|
{
|
|
|
|
isam_info= *(info->current=info->file);
|
|
|
|
info->end=info->current+info->count;
|
Completion of merge of mysql-5.1 into mysql-maria.
Manually imported changes done to MyISAM (include/myisam.h,
storage/myisam/*, sql/ha_myisam.*, mysql-test/t/myisam.test,
mysql-test/t/ps_2myisam.test) the last
months into Maria (tedious, should do it more frequently in the
future), including those not done at the previous 5.1->Maria merge
(please in the future don't forget to apply MyISAM changes to Maria
when you merge 5.1 into Maria).
Note: I didn't try to import anything which could be MyISAM-related
in other tests of mysql-test (I didn't want to dig in all csets),
but as QA is working to make most tests re-usable for other engines
(Falcon), it is likely that we'll benefit from this and just have
to set engine=Maria somewhere to run those tests on Maria.
func_group and partition tests fail but they already do in main 5.1
on my machine. No Valgrind error in t/*maria*.test.
Monty: please see the commit comment of maria.result and check.
BitKeeper/deleted/.del-ha_maria.m4:
Delete: config/ac-macros/ha_maria.m4
configure.in:
fix for the new way of enabling engines
include/maria.h:
importing changes done to MyISAM the last months into Maria
include/my_handler.h:
importing changes done to MyISAM the last months into Maria
include/myisam.h:
importing changes done to MyISAM the last months into Maria
mysql-test/r/maria.result:
identical to myisam.result, except the engine name in some places
AND in the line testing key_block_size=1000000000000000000:
Maria gives a key block size of 8192 while MyISAM gives 4096;
is it explainable by the difference between MARIA_KEY_BLOCK_LENGTH
and the same constant in MyISAM? Monty?
mysql-test/r/ps_maria.result:
identical to ps_2myisam.result (except the engine name in some places)
mysql-test/t/maria.test:
instead of engine=maria everywhere, I use @@storage_engine (reduces
the diff with myisam.test).
importing changes done to MyISAM the last months into Maria
mysys/my_handler.c:
importing changes done to MyISAM the last months into Maria
sql/ha_maria.cc:
importing changes done to MyISAM the last months into Maria
sql/ha_maria.h:
importing changes done to MyISAM the last months into Maria
sql/mysqld.cc:
unneeded
storage/maria/Makefile.am:
importing changes done to MyISAM the last months into Maria
storage/maria/ma_check.c:
importing changes done to MyISAM the last months into Maria
storage/maria/ma_create.c:
importing changes done to MyISAM the last months into Maria
storage/maria/ma_delete_table.c:
importing changes done to MyISAM the last months into Maria
storage/maria/ma_dynrec.c:
importing changes done to MyISAM the last months into Maria
storage/maria/ma_extra.c:
importing changes done to MyISAM the last months into Maria
storage/maria/ma_ft_boolean_search.c:
importing changes done to MyISAM the last months into Maria
storage/maria/ma_ft_eval.c:
importing changes done to MyISAM the last months into Maria
storage/maria/ma_ft_nlq_search.c:
importing changes done to MyISAM the last months into Maria
storage/maria/ma_ft_parser.c:
importing changes done to MyISAM the last months into Maria
storage/maria/ma_ft_test1.c:
importing changes done to MyISAM the last months into Maria
storage/maria/ma_ft_update.c:
importing changes done to MyISAM the last months into Maria
storage/maria/ma_ftdefs.h:
importing changes done to MyISAM the last months into Maria
storage/maria/ma_key.c:
importing changes done to MyISAM the last months into Maria
storage/maria/ma_open.c:
importing changes done to MyISAM the last months into Maria
storage/maria/ma_page.c:
importing changes done to MyISAM the last months into Maria
storage/maria/ma_rkey.c:
importing changes done to MyISAM the last months into Maria
storage/maria/ma_rsamepos.c:
importing changes done to MyISAM the last months into Maria
storage/maria/ma_rt_index.c:
importing changes done to MyISAM the last months into Maria
storage/maria/ma_rt_mbr.c:
importing changes done to MyISAM the last months into Maria
storage/maria/ma_search.c:
importing changes done to MyISAM the last months into Maria
storage/maria/ma_sort.c:
importing changes done to MyISAM the last months into Maria
storage/maria/ma_test1.c:
importing changes done to MyISAM the last months into Maria
storage/maria/ma_test2.c:
importing changes done to MyISAM the last months into Maria
storage/maria/ma_test3.c:
importing changes done to MyISAM the last months into Maria
storage/maria/ma_update.c:
importing changes done to MyISAM the last months into Maria
storage/maria/ma_write.c:
importing changes done to MyISAM the last months into Maria
storage/maria/maria_chk.c:
importing changes done to MyISAM the last months into Maria
storage/maria/maria_def.h:
importing changes done to MyISAM the last months into Maria
storage/maria/maria_ftdump.c:
importing changes done to MyISAM the last months into Maria
storage/maria/maria_pack.c:
importing changes done to MyISAM the last months into Maria
2006-08-10 16:36:54 +02:00
|
|
|
maria_reset(isam_info);
|
2006-04-11 15:45:10 +02:00
|
|
|
maria_extra(isam_info, HA_EXTRA_CACHE, 0);
|
2007-01-18 20:38:14 +01:00
|
|
|
if ((error= maria_scan_init(isam_info)))
|
|
|
|
return(error);
|
2006-04-11 15:45:10 +02:00
|
|
|
}
|
|
|
|
else
|
|
|
|
isam_info= *info->current;
|
|
|
|
|
|
|
|
for (;;)
|
|
|
|
{
|
2007-01-18 20:38:14 +01:00
|
|
|
if (!(error= maria_scan(isam_info, buf)) ||
|
2006-04-11 15:45:10 +02:00
|
|
|
error != HA_ERR_END_OF_FILE)
|
|
|
|
return (error);
|
2007-01-18 20:38:14 +01:00
|
|
|
maria_scan_end(isam_info);
|
2006-04-11 15:45:10 +02:00
|
|
|
maria_extra(isam_info,HA_EXTRA_NO_CACHE, 0);
|
|
|
|
if (info->current+1 == info->end)
|
|
|
|
return(HA_ERR_END_OF_FILE);
|
|
|
|
info->current++;
|
|
|
|
isam_info= *info->current;
|
Completion of merge of mysql-5.1 into mysql-maria.
Manually imported changes done to MyISAM (include/myisam.h,
storage/myisam/*, sql/ha_myisam.*, mysql-test/t/myisam.test,
mysql-test/t/ps_2myisam.test) the last
months into Maria (tedious, should do it more frequently in the
future), including those not done at the previous 5.1->Maria merge
(please in the future don't forget to apply MyISAM changes to Maria
when you merge 5.1 into Maria).
Note: I didn't try to import anything which could be MyISAM-related
in other tests of mysql-test (I didn't want to dig in all csets),
but as QA is working to make most tests re-usable for other engines
(Falcon), it is likely that we'll benefit from this and just have
to set engine=Maria somewhere to run those tests on Maria.
func_group and partition tests fail but they already do in main 5.1
on my machine. No Valgrind error in t/*maria*.test.
Monty: please see the commit comment of maria.result and check.
BitKeeper/deleted/.del-ha_maria.m4:
Delete: config/ac-macros/ha_maria.m4
configure.in:
fix for the new way of enabling engines
include/maria.h:
importing changes done to MyISAM the last months into Maria
include/my_handler.h:
importing changes done to MyISAM the last months into Maria
include/myisam.h:
importing changes done to MyISAM the last months into Maria
mysql-test/r/maria.result:
identical to myisam.result, except the engine name in some places
AND in the line testing key_block_size=1000000000000000000:
Maria gives a key block size of 8192 while MyISAM gives 4096;
is it explainable by the difference between MARIA_KEY_BLOCK_LENGTH
and the same constant in MyISAM? Monty?
mysql-test/r/ps_maria.result:
identical to ps_2myisam.result (except the engine name in some places)
mysql-test/t/maria.test:
instead of engine=maria everywhere, I use @@storage_engine (reduces
the diff with myisam.test).
importing changes done to MyISAM the last months into Maria
mysys/my_handler.c:
importing changes done to MyISAM the last months into Maria
sql/ha_maria.cc:
importing changes done to MyISAM the last months into Maria
sql/ha_maria.h:
importing changes done to MyISAM the last months into Maria
sql/mysqld.cc:
unneeded
storage/maria/Makefile.am:
importing changes done to MyISAM the last months into Maria
storage/maria/ma_check.c:
importing changes done to MyISAM the last months into Maria
storage/maria/ma_create.c:
importing changes done to MyISAM the last months into Maria
storage/maria/ma_delete_table.c:
importing changes done to MyISAM the last months into Maria
storage/maria/ma_dynrec.c:
importing changes done to MyISAM the last months into Maria
storage/maria/ma_extra.c:
importing changes done to MyISAM the last months into Maria
storage/maria/ma_ft_boolean_search.c:
importing changes done to MyISAM the last months into Maria
storage/maria/ma_ft_eval.c:
importing changes done to MyISAM the last months into Maria
storage/maria/ma_ft_nlq_search.c:
importing changes done to MyISAM the last months into Maria
storage/maria/ma_ft_parser.c:
importing changes done to MyISAM the last months into Maria
storage/maria/ma_ft_test1.c:
importing changes done to MyISAM the last months into Maria
storage/maria/ma_ft_update.c:
importing changes done to MyISAM the last months into Maria
storage/maria/ma_ftdefs.h:
importing changes done to MyISAM the last months into Maria
storage/maria/ma_key.c:
importing changes done to MyISAM the last months into Maria
storage/maria/ma_open.c:
importing changes done to MyISAM the last months into Maria
storage/maria/ma_page.c:
importing changes done to MyISAM the last months into Maria
storage/maria/ma_rkey.c:
importing changes done to MyISAM the last months into Maria
storage/maria/ma_rsamepos.c:
importing changes done to MyISAM the last months into Maria
storage/maria/ma_rt_index.c:
importing changes done to MyISAM the last months into Maria
storage/maria/ma_rt_mbr.c:
importing changes done to MyISAM the last months into Maria
storage/maria/ma_search.c:
importing changes done to MyISAM the last months into Maria
storage/maria/ma_sort.c:
importing changes done to MyISAM the last months into Maria
storage/maria/ma_test1.c:
importing changes done to MyISAM the last months into Maria
storage/maria/ma_test2.c:
importing changes done to MyISAM the last months into Maria
storage/maria/ma_test3.c:
importing changes done to MyISAM the last months into Maria
storage/maria/ma_update.c:
importing changes done to MyISAM the last months into Maria
storage/maria/ma_write.c:
importing changes done to MyISAM the last months into Maria
storage/maria/maria_chk.c:
importing changes done to MyISAM the last months into Maria
storage/maria/maria_def.h:
importing changes done to MyISAM the last months into Maria
storage/maria/maria_ftdump.c:
importing changes done to MyISAM the last months into Maria
storage/maria/maria_pack.c:
importing changes done to MyISAM the last months into Maria
2006-08-10 16:36:54 +02:00
|
|
|
maria_reset(isam_info);
|
2006-04-11 15:45:10 +02:00
|
|
|
maria_extra(isam_info,HA_EXTRA_CACHE, 0);
|
2007-01-18 20:38:14 +01:00
|
|
|
if ((error= maria_scan_init(isam_info)))
|
|
|
|
return(error);
|
2006-04-11 15:45:10 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static int mrg_close(PACK_MRG_INFO *mrg)
|
|
|
|
{
|
|
|
|
uint i;
|
|
|
|
int error=0;
|
2007-01-18 20:38:14 +01:00
|
|
|
DBUG_ENTER("mrg_close");
|
|
|
|
|
2006-04-11 15:45:10 +02:00
|
|
|
for (i=0 ; i < mrg->count ; i++)
|
|
|
|
error|=maria_close(mrg->file[i]);
|
|
|
|
if (mrg->free_file)
|
2011-04-25 17:22:25 +02:00
|
|
|
my_free(mrg->file);
|
2007-01-18 20:38:14 +01:00
|
|
|
DBUG_RETURN(error);
|
2006-04-11 15:45:10 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#if !defined(DBUG_OFF)
|
|
|
|
/*
|
|
|
|
Fake the counts to get big Huffman codes.
|
|
|
|
|
|
|
|
SYNOPSIS
|
|
|
|
fakebigcodes()
|
|
|
|
huff_counts A pointer to the counts array.
|
|
|
|
end_count A pointer past the counts array.
|
|
|
|
|
|
|
|
DESCRIPTION
|
|
|
|
|
|
|
|
Huffman coding works by removing the two least frequent values from
|
|
|
|
the list of values and add a new value with the sum of their
|
|
|
|
incidences in a loop until only one value is left. Every time a
|
|
|
|
value is reused for a new value, it gets one more bit for its
|
|
|
|
encoding. Hence, the least frequent values get the longest codes.
|
|
|
|
|
|
|
|
To get a maximum code length for a value, two of the values must
|
|
|
|
have an incidence of 1. As their sum is 2, the next infrequent value
|
|
|
|
must have at least an incidence of 2, then 4, 8, 16 and so on. This
|
|
|
|
means that one needs 2**n bytes (values) for a code length of n
|
|
|
|
bits. However, using more distinct values forces the use of longer
|
|
|
|
codes, or reaching the code length with less total bytes (values).
|
|
|
|
|
|
|
|
To get 64(32)-bit codes, I sort the counts by decreasing incidence.
|
|
|
|
I assign counts of 1 to the two most frequent values, a count of 2
|
|
|
|
for the next one, then 4, 8, and so on until 2**64-1(2**30-1). All
|
2007-07-02 19:45:15 +02:00
|
|
|
the remaining values get 1. That way every possible uchar has an
|
|
|
|
assigned code, though not all codes are used if not all uchar values
|
2006-04-11 15:45:10 +02:00
|
|
|
are present in the column.
|
|
|
|
|
|
|
|
This strategy would work with distinct column values too, but
|
|
|
|
requires that at least 64(32) values are present. To make things
|
|
|
|
easier here, I cancel all distinct column values and force byte
|
|
|
|
compression for all columns.
|
|
|
|
|
|
|
|
RETURN
|
|
|
|
void
|
|
|
|
*/
|
|
|
|
|
|
|
|
static void fakebigcodes(HUFF_COUNTS *huff_counts, HUFF_COUNTS *end_count)
|
|
|
|
{
|
|
|
|
HUFF_COUNTS *count;
|
|
|
|
my_off_t *cur_count_p;
|
|
|
|
my_off_t *end_count_p;
|
|
|
|
my_off_t **cur_sort_p;
|
|
|
|
my_off_t **end_sort_p;
|
|
|
|
my_off_t *sort_counts[256];
|
|
|
|
my_off_t total;
|
|
|
|
DBUG_ENTER("fakebigcodes");
|
|
|
|
|
|
|
|
for (count= huff_counts; count < end_count; count++)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
Remove distinct column values.
|
|
|
|
*/
|
|
|
|
if (huff_counts->tree_buff)
|
|
|
|
{
|
2011-04-25 17:22:25 +02:00
|
|
|
my_free(huff_counts->tree_buff);
|
2017-05-16 23:34:48 +02:00
|
|
|
delete_tree(&huff_counts->int_tree, 0);
|
2006-04-11 15:45:10 +02:00
|
|
|
huff_counts->tree_buff= NULL;
|
|
|
|
DBUG_PRINT("fakebigcodes", ("freed distinct column values"));
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
Sort counts by decreasing incidence.
|
|
|
|
*/
|
|
|
|
cur_count_p= count->counts;
|
|
|
|
end_count_p= cur_count_p + 256;
|
|
|
|
cur_sort_p= sort_counts;
|
|
|
|
while (cur_count_p < end_count_p)
|
|
|
|
*(cur_sort_p++)= cur_count_p++;
|
2008-04-01 16:57:30 +02:00
|
|
|
(void) my_qsort(sort_counts, 256, sizeof(my_off_t*), (qsort_cmp) fakecmp);
|
2006-04-11 15:45:10 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
Assign faked counts.
|
|
|
|
*/
|
|
|
|
cur_sort_p= sort_counts;
|
|
|
|
#if SIZEOF_LONG_LONG > 4
|
|
|
|
end_sort_p= sort_counts + 8 * sizeof(ulonglong) - 1;
|
|
|
|
#else
|
|
|
|
end_sort_p= sort_counts + 8 * sizeof(ulonglong) - 2;
|
|
|
|
#endif
|
|
|
|
/* Most frequent value gets a faked count of 1. */
|
|
|
|
**(cur_sort_p++)= 1;
|
|
|
|
total= 1;
|
|
|
|
while (cur_sort_p < end_sort_p)
|
|
|
|
{
|
|
|
|
**(cur_sort_p++)= total;
|
|
|
|
total<<= 1;
|
|
|
|
}
|
|
|
|
/* Set the last value. */
|
|
|
|
**(cur_sort_p++)= --total;
|
|
|
|
/*
|
|
|
|
Set the remaining counts.
|
|
|
|
*/
|
|
|
|
end_sort_p= sort_counts + 256;
|
|
|
|
while (cur_sort_p < end_sort_p)
|
|
|
|
**(cur_sort_p++)= 1;
|
|
|
|
}
|
|
|
|
DBUG_VOID_RETURN;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
Compare two counts for reverse sorting.
|
|
|
|
|
|
|
|
SYNOPSIS
|
|
|
|
fakecmp()
|
|
|
|
count1 One count.
|
|
|
|
count2 Another count.
|
|
|
|
|
|
|
|
RETURN
|
|
|
|
1 count1 < count2
|
|
|
|
0 count1 == count2
|
|
|
|
-1 count1 > count2
|
|
|
|
*/
|
|
|
|
|
|
|
|
static int fakecmp(my_off_t **count1, my_off_t **count2)
|
|
|
|
{
|
|
|
|
return ((**count1 < **count2) ? 1 :
|
|
|
|
(**count1 > **count2) ? -1 : 0);
|
|
|
|
}
|
|
|
|
#endif
|
2011-04-25 17:22:25 +02:00
|
|
|
|
|
|
|
#include "ma_check_standalone.h"
|
|
|
|
|