mariadb/storage/maria/maria_ftdump.c
Michael Widenius 53d44ad18b Added some fixes that should make MyISAM & Aria REPAIR work with more than 4G records
- If one specifies --force twice to myisamchk and aria_chk, then we will try to finnish the repair even if sort_buffer would be too small.
  This was done by dynamically allocate buffer handler objects as long as memory lasts.
- New option for myisamchk and aria_chk: create-missing-keys
- Changed default size of myisam_sort_buffer_size from 8M to 128M.
- Changed default size of sort_buffer_size in aria_chk from 128M to 256M.
- Increased information in error message about 'sort_buffer_size' beeing to small.
- Print also to 'show warnings' if repair was retried.
- Increased size of internal sort-buffer-readers from 16K to 128K
- Changed printing of 'number of records' to use %ll instead of casting to long
- Changed buffer sizes for myisam and aria to use MY_ALIGN_DOWN() to get same number of bytes allocated on different machines.





include/my_global.h:
  Added MY_ALIGN_DOWN() to get previous alignment (for big memory areas)
include/myisam.h:
  Increased size of types to be able to handle more records
include/myisamchk.h:
  Increased size of types to be able to handle more records
  Added T_FORCE_SORT_MEMORY to force repair to work even if sort_buffer would not be big enough
mysql-test/r/myisam.result:
  Updated result
mysql-test/r/mysqld--help.result:
  Updated result
mysql-test/r/repair.result:
  Updated result
mysql-test/suite/maria/maria.result:
  Added test cases for some fixed bugs in MyISAM to verify that Aria doesn't have them too.
mysql-test/suite/maria/maria.test:
  Added test cases for some fixed bugs in MyISAM to verify that Aria doesn't have them too.
mysql-test/suite/maria/maria3.result:
  Updated result after sort buffer size increase
mysql-test/suite/sys_vars/r/aria_sort_buffer_size_basic.result:
  Updated result after sort buffer size increase
mysql-test/suite/sys_vars/r/myisam_sort_buffer_size_basic.result:
  Updated result after sort buffer size increase
mysql-test/suite/sys_vars/t/myisam_sort_buffer_size_basic.test:
  Updated result after sort buffer size increase
mysql-test/t/myisam.test:
  Fixed error messages to not print system specific data
mysql-test/t/repair.test:
  Fixed error messages to not print system specific data
storage/maria/ha_maria.cc:
  Print also to 'show warnings' if repair was retried
  Changed default size of sort_buffer_size from 128M to 256M (same as in mysqld)
storage/maria/ma_check.c:
  Renamed USE_BUFFER_INIT -> PAGE_BUFFER_INIT
storage/maria/ma_sort.c:
  Increased size of internal sort-buffer-readers from 16K to 128K
  Increased size of types to be able to handle more records
  Added support for T_FORCE_SORT_MEMORY
  Don't allocate too many extra BUFFPEK at a time (they are probably not needed)
  Improved error message for "sort_buffer_size is too small"
  Changed printing of 'number of records' to use %ll instead of casting to long
  Fixed bug where maria_update_key_parts() was called too early.
  Fixed bug in detecting result from read_to_buffer().
  Added 'out of memory' checking when calling 'alloc_dynamic()'.
storage/maria/maria_chk.c:
  Added --create-missing-keys
  If one specifies --force twice then we will try to finnish the repair even if sort_buffer would be too small.
  check_param.sort_buffer_length varialble was used with wrong type.
storage/maria/maria_def.h:
  Increased size of types to be able to handle more records
  Use MY_ALIGN_DOWN() to get same number of bytes allocated on different machines
  Renamed USE_BUFFER_INIT -> PAGE_BUFFER_INIT
storage/maria/maria_ftdump.c:
  Renamed USE_BUFFER_INIT -> PAGE_BUFFER_INIT
storage/maria/maria_read_log.c:
  Use PAGE_BUFFER_INIT for page cache
storage/myisam/ha_myisam.cc:
  Changed default size of myisam_sort_buffer_size from 8M to 128M
storage/myisam/mi_check.c:
  Renamed USE_BUFFER_INIT -> KEY_BUFFER_INIT
storage/myisam/myisam_ftdump.c:
  Renamed USE_BUFFER_INIT -> KEY_BUFFER_INIT
storage/myisam/myisamchk.c:
  Added --create-missing-keys
  If one specifies --force twice then we will try to finnish the repair even if sort_buffer would be too small.
  check_param.sort_buffer_length varialble was used with wrong type.
  Renamed USE_BUFFER_INIT -> KEY_BUFFER_INIT
storage/myisam/myisamdef.h:
  Increased SORT_BUFFER_INIT to 64M
  (speeds up repair a lot and most machines have nowadays a lot of memory)
  Use MY_ALIGN_DOWN() to get same number of bytes allocated on different machines
  Renamed USE_BUFFER_INIT -> KEY_BUFFER_INIT
storage/myisam/sort.c:
  Increased size of internal sort-buffer-readers from 16K to 128K
  Increased size of types to be able to handle more records
  Added support for T_FORCE_SORT_MEMORY
  Don't allocate too many extra BUFFPEK at a time (they are probably not needed)
  Improved error message for "sort_buffer_size is too small"
  Changed printing of 'number of records' to use %ll instead of casting to long
  Fixed bug in detecting result from read_to_buffer().
  Added 'out of memory' checking when calling 'alloc_dynamic()'.
2013-05-10 12:32:34 +03:00

282 lines
7 KiB
C

/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
/* Written by Sergei A. Golubchik, who has a shared copyright to this code
added support for long options (my_getopt) 22.5.2002 by Jani Tolonen */
#include "ma_ftdefs.h"
#include <my_getopt.h>
static void usage();
static void complain(int val);
static my_bool get_one_option(int, const struct my_option *, char *);
static int count=0, stats=0, dump=0, lstats=0;
static my_bool verbose;
static char *query=NULL;
static uint lengths[256];
#define MAX_LEN (HA_FT_MAXBYTELEN+10)
#define HOW_OFTEN_TO_WRITE 10000
static struct my_option my_long_options[] =
{
{"help", 'h', "Display help and exit.",
0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
{"help", '?', "Synonym for -h.",
0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
{"count", 'c', "Calculate per-word stats (counts and global weights).",
0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
{"dump", 'd', "Dump index (incl. data offsets and word weights).",
0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
{"length", 'l', "Report length distribution.",
0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
{"stats", 's', "Report global stats.",
0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
{"verbose", 'v', "Be verbose.",
&verbose, &verbose, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0},
{ 0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}
};
int main(int argc,char *argv[])
{
int error=0;
uint keylen, keylen2=0, inx, doc_cnt=0;
float weight= 1.0;
double gws, min_gws=0, avg_gws=0;
MARIA_HA *info;
char buf[MAX_LEN], buf2[MAX_LEN], buf_maxlen[MAX_LEN], buf_min_gws[MAX_LEN];
ulong total=0, maxlen=0, uniq=0, max_doc_cnt=0;
struct { MARIA_HA *info; } aio0, *aio=&aio0; /* for GWS_IN_USE */
MY_INIT(argv[0]);
if ((error= handle_options(&argc, &argv, my_long_options, get_one_option)))
exit(error);
maria_init();
if (count || dump)
verbose=0;
if (!count && !dump && !lstats && !query)
stats=1;
if (verbose)
setbuf(stdout,NULL);
if (argc < 2)
usage();
{
char *end;
inx= (uint) strtoll(argv[1], &end, 10);
if (*end)
usage();
}
init_pagecache(maria_pagecache, PAGE_BUFFER_INIT, 0, 0,
MARIA_KEY_BLOCK_LENGTH, MY_WME);
if (!(info=maria_open(argv[0], O_RDONLY,
HA_OPEN_ABORT_IF_LOCKED|HA_OPEN_FROM_SQL_LAYER)))
{
error=my_errno;
goto err;
}
*buf2=0;
aio->info=info;
if ((inx >= info->s->base.keys) ||
!(info->s->keyinfo[inx].flag & HA_FULLTEXT))
{
printf("Key %d in table %s is not a FULLTEXT key\n", inx,
info->s->open_file_name.str);
goto err;
}
maria_lock_database(info, F_EXTRA_LCK);
info->cur_row.lastpos= HA_OFFSET_ERROR;
info->update|= HA_STATE_PREV_FOUND;
while (!(error=maria_rnext(info,NULL,inx)))
{
FT_WEIGTH subkeys;
keylen=*(info->lastkey_buff);
subkeys.i= ft_sintXkorr(info->lastkey_buff + keylen + 1);
if (subkeys.i >= 0)
weight= subkeys.f;
#ifdef HAVE_SNPRINTF
snprintf(buf,MAX_LEN,"%.*s",(int) keylen,info->lastkey_buff+1);
#else
sprintf(buf,"%.*s",(int) keylen,info->lastkey_buff+1);
#endif
my_casedn_str(default_charset_info,buf);
total++;
lengths[keylen]++;
if (count || stats)
{
if (strcmp(buf, buf2))
{
if (*buf2)
{
uniq++;
avg_gws+=gws=GWS_IN_USE;
if (count)
printf("%9u %20.7f %s\n",doc_cnt,gws,buf2);
if (maxlen<keylen2)
{
maxlen=keylen2;
strmov(buf_maxlen, buf2);
}
if (max_doc_cnt < doc_cnt)
{
max_doc_cnt=doc_cnt;
strmov(buf_min_gws, buf2);
min_gws=gws;
}
}
strmov(buf2, buf);
keylen2=keylen;
doc_cnt=0;
}
doc_cnt+= (subkeys.i >= 0 ? 1 : -subkeys.i);
}
if (dump)
{
if (subkeys.i >= 0)
printf("%9lx %20.7f %s\n", (long) info->cur_row.lastpos,weight,buf);
else
printf("%9lx => %17d %s\n",(long) info->cur_row.lastpos,-subkeys.i,
buf);
}
if (verbose && (total%HOW_OFTEN_TO_WRITE)==0)
printf("%10ld\r",total);
}
maria_lock_database(info, F_UNLCK);
if (count || stats)
{
if (*buf2)
{
uniq++;
avg_gws+=gws=GWS_IN_USE;
if (count)
printf("%9u %20.7f %s\n",doc_cnt,gws,buf2);
if (maxlen<keylen2)
{
maxlen=keylen2;
strmov(buf_maxlen, buf2);
}
if (max_doc_cnt < doc_cnt)
{
max_doc_cnt=doc_cnt;
strmov(buf_min_gws, buf2);
min_gws=gws;
}
}
}
if (stats)
{
count=0;
for (inx=0;inx<256;inx++)
{
count+=lengths[inx];
if ((ulong) count >= total/2)
break;
}
printf("Total rows: %lu\nTotal words: %lu\n"
"Unique words: %lu\nLongest word: %lu chars (%s)\n"
"Median length: %u\n"
"Average global weight: %f\n"
"Most common word: %lu times, weight: %f (%s)\n",
(long) info->state->records, total, uniq, maxlen, buf_maxlen,
inx, avg_gws/uniq, max_doc_cnt, min_gws, buf_min_gws);
}
if (lstats)
{
count=0;
for (inx=0; inx<256; inx++)
{
count+=lengths[inx];
if (count && lengths[inx])
printf("%3u: %10lu %5.2f%% %20lu %4.1f%%\n", inx,
(ulong) lengths[inx],100.0*lengths[inx]/total,(ulong) count,
100.0*count/total);
}
}
err:
if (error && error != HA_ERR_END_OF_FILE)
printf("got error %d\n",my_errno);
if (info)
maria_close(info);
maria_end();
return 0;
}
static my_bool
get_one_option(int optid, const struct my_option *opt __attribute__((unused)),
char *argument __attribute__((unused)))
{
switch(optid) {
case 'd':
dump=1;
complain(count || query);
break;
case 's':
stats=1;
complain(query!=0);
break;
case 'c':
count= 1;
complain(dump || query);
break;
case 'l':
lstats=1;
complain(query!=0);
break;
case '?':
case 'h':
usage();
}
return 0;
}
static void usage()
{
printf("Use: aria_ft_dump <table_name> <index_num>\n");
my_print_help(my_long_options);
my_print_variables(my_long_options);
exit(1);
}
static void complain(int val) /* Kinda assert :-) */
{
if (val)
{
printf("You cannot use these options together!\n");
exit(1);
}
}
#include "ma_check_standalone.h"