2011-06-30 17:37:13 +02:00
|
|
|
/*
|
2011-11-21 19:13:14 +02:00
|
|
|
Copyright (c) 2001, 2010, Oracle and/or its affiliates
|
2001-04-11 13:04:03 +02:00
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
|
|
it under the terms of the GNU General Public License as published by
|
2006-12-23 20:17:15 +01:00
|
|
|
the Free Software Foundation; version 2 of the License.
|
2001-04-11 13:04:03 +02:00
|
|
|
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
GNU General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
|
|
along with this program; if not, write to the Free Software
|
2011-06-30 17:46:53 +02:00
|
|
|
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
|
2001-04-11 13:04:03 +02:00
|
|
|
|
2002-05-24 14:06:58 +03:00
|
|
|
/* Written by Sergei A. Golubchik, who has a shared copyright to this code
|
|
|
|
added support for long options (my_getopt) 22.5.2002 by Jani Tolonen */
|
2001-04-11 13:04:03 +02:00
|
|
|
|
|
|
|
#include "ftdefs.h"
|
2002-05-24 14:06:58 +03:00
|
|
|
#include <my_getopt.h>
|
2001-04-11 13:04:03 +02:00
|
|
|
|
2002-05-24 14:06:58 +03:00
|
|
|
static void usage();
|
2001-04-11 13:04:03 +02:00
|
|
|
static void complain(int val);
|
2004-02-21 11:18:29 +01:00
|
|
|
static my_bool get_one_option(int, const struct my_option *, char *);
|
2001-04-11 13:04:03 +02:00
|
|
|
|
2002-05-24 14:06:58 +03:00
|
|
|
static int count=0, stats=0, dump=0, lstats=0;
|
|
|
|
static my_bool verbose;
|
2001-04-11 13:04:03 +02:00
|
|
|
static char *query=NULL;
|
2001-12-04 14:24:47 +01:00
|
|
|
static uint lengths[256];
|
2001-04-11 13:04:03 +02:00
|
|
|
|
2003-12-04 21:58:28 +01:00
|
|
|
#define MAX_LEN (HA_FT_MAXBYTELEN+10)
|
2001-12-04 14:24:47 +01:00
|
|
|
#define HOW_OFTEN_TO_WRITE 10000
|
2001-04-11 13:04:03 +02:00
|
|
|
|
2002-05-24 14:06:58 +03:00
|
|
|
static struct my_option my_long_options[] =
|
|
|
|
{
|
2006-04-23 19:26:56 -05:00
|
|
|
{"help", 'h', "Display help and exit.",
|
2002-05-24 14:06:58 +03:00
|
|
|
0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
|
2006-04-23 19:26:56 -05:00
|
|
|
{"help", '?', "Synonym for -h.",
|
2002-05-24 14:06:58 +03:00
|
|
|
0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
|
2003-06-13 10:59:02 +02:00
|
|
|
{"count", 'c', "Calculate per-word stats (counts and global weights).",
|
2002-05-24 14:06:58 +03:00
|
|
|
0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
|
2006-04-23 19:26:56 -05:00
|
|
|
{"dump", 'd', "Dump index (incl. data offsets and word weights).",
|
2002-05-24 14:06:58 +03:00
|
|
|
0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
|
2006-04-23 19:26:56 -05:00
|
|
|
{"length", 'l', "Report length distribution.",
|
2002-05-24 14:06:58 +03:00
|
|
|
0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
|
2006-04-23 19:26:56 -05:00
|
|
|
{"stats", 's', "Report global stats.",
|
2002-05-24 14:06:58 +03:00
|
|
|
0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
|
2006-04-23 19:26:56 -05:00
|
|
|
{"verbose", 'v', "Be verbose.",
|
2010-07-24 09:24:44 -03:00
|
|
|
&verbose, &verbose, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0},
|
2002-05-24 14:06:58 +03:00
|
|
|
{ 0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}
|
|
|
|
};
|
|
|
|
|
|
|
|
|
2001-04-11 13:04:03 +02:00
|
|
|
int main(int argc,char *argv[])
|
|
|
|
{
|
2009-11-30 15:36:06 +02:00
|
|
|
int error=0;
|
2002-01-05 22:51:42 +02:00
|
|
|
uint keylen, keylen2=0, inx, doc_cnt=0;
|
2003-02-07 15:47:24 +02:00
|
|
|
float weight= 1.0;
|
2001-05-31 14:07:17 +03:00
|
|
|
double gws, min_gws=0, avg_gws=0;
|
2001-04-11 13:04:03 +02:00
|
|
|
MI_INFO *info;
|
2002-01-02 21:29:41 +02:00
|
|
|
char buf[MAX_LEN], buf2[MAX_LEN], buf_maxlen[MAX_LEN], buf_min_gws[MAX_LEN];
|
2001-04-11 13:04:03 +02:00
|
|
|
ulong total=0, maxlen=0, uniq=0, max_doc_cnt=0;
|
|
|
|
struct { MI_INFO *info; } aio0, *aio=&aio0; /* for GWS_IN_USE */
|
|
|
|
|
|
|
|
MY_INIT(argv[0]);
|
2004-10-20 01:28:42 +03:00
|
|
|
if ((error= handle_options(&argc, &argv, my_long_options, get_one_option)))
|
2004-02-21 11:18:29 +01:00
|
|
|
exit(error);
|
2001-04-11 13:04:03 +02:00
|
|
|
if (count || dump)
|
|
|
|
verbose=0;
|
2001-12-04 14:24:47 +01:00
|
|
|
if (!count && !dump && !lstats && !query)
|
2001-04-11 13:04:03 +02:00
|
|
|
stats=1;
|
|
|
|
|
|
|
|
if (verbose)
|
|
|
|
setbuf(stdout,NULL);
|
|
|
|
|
2002-06-12 23:54:52 +03:00
|
|
|
if (argc < 2)
|
2002-05-24 14:06:58 +03:00
|
|
|
usage();
|
2001-04-11 13:04:03 +02:00
|
|
|
|
2004-02-21 11:18:29 +01:00
|
|
|
{
|
|
|
|
char *end;
|
2004-05-05 02:59:17 -03:00
|
|
|
inx= (uint) strtoll(argv[1], &end, 10);
|
2004-02-21 11:18:29 +01:00
|
|
|
if (*end)
|
|
|
|
usage();
|
|
|
|
}
|
|
|
|
|
2014-07-19 17:46:08 +03:00
|
|
|
init_key_cache(dflt_key_cache, MI_KEY_BLOCK_LENGTH, KEY_BUFFER_INIT, 0, 0, 0, 0);
|
2003-11-13 14:31:29 +01:00
|
|
|
|
2006-07-21 13:59:59 -07:00
|
|
|
if (!(info=mi_open(argv[0], O_RDONLY,
|
|
|
|
HA_OPEN_ABORT_IF_LOCKED|HA_OPEN_FROM_SQL_LAYER)))
|
2004-02-21 11:18:29 +01:00
|
|
|
{
|
|
|
|
error=my_errno;
|
2001-04-11 13:04:03 +02:00
|
|
|
goto err;
|
2004-02-21 11:18:29 +01:00
|
|
|
}
|
2001-04-11 13:04:03 +02:00
|
|
|
|
|
|
|
*buf2=0;
|
|
|
|
aio->info=info;
|
|
|
|
|
2002-06-12 23:54:52 +03:00
|
|
|
if ((inx >= info->s->base.keys) ||
|
|
|
|
!(info->s->keyinfo[inx].flag & HA_FULLTEXT))
|
2001-04-11 13:04:03 +02:00
|
|
|
{
|
|
|
|
printf("Key %d in table %s is not a FULLTEXT key\n", inx, info->filename);
|
|
|
|
goto err;
|
|
|
|
}
|
2001-12-06 14:10:51 +02:00
|
|
|
|
2004-02-21 11:18:29 +01:00
|
|
|
mi_lock_database(info, F_EXTRA_LCK);
|
|
|
|
|
2005-03-09 12:59:20 +01:00
|
|
|
info->lastpos= HA_OFFSET_ERROR;
|
|
|
|
info->update|= HA_STATE_PREV_FOUND;
|
2001-04-11 13:04:03 +02:00
|
|
|
|
2005-03-09 12:59:20 +01:00
|
|
|
while (!(error=mi_rnext(info,NULL,inx)))
|
2001-04-11 13:04:03 +02:00
|
|
|
{
|
2009-11-30 15:36:06 +02:00
|
|
|
FT_WEIGTH subkeys;
|
2005-03-09 12:59:20 +01:00
|
|
|
keylen=*(info->lastkey);
|
2001-04-11 13:04:03 +02:00
|
|
|
|
2009-11-30 15:36:06 +02:00
|
|
|
subkeys.i =ft_sintXkorr(info->lastkey+keylen+1);
|
|
|
|
if (subkeys.i >= 0)
|
|
|
|
weight= subkeys.f;
|
2001-04-11 13:04:03 +02:00
|
|
|
|
2003-08-12 15:28:36 +02:00
|
|
|
#ifdef HAVE_SNPRINTF
|
2005-03-09 12:59:20 +01:00
|
|
|
snprintf(buf,MAX_LEN,"%.*s",(int) keylen,info->lastkey+1);
|
2003-08-12 15:28:36 +02:00
|
|
|
#else
|
2005-03-09 12:59:20 +01:00
|
|
|
sprintf(buf,"%.*s",(int) keylen,info->lastkey+1);
|
2003-08-12 15:28:36 +02:00
|
|
|
#endif
|
2005-03-09 12:59:20 +01:00
|
|
|
my_casedn_str(default_charset_info,buf);
|
|
|
|
total++;
|
|
|
|
lengths[keylen]++;
|
2001-04-11 13:04:03 +02:00
|
|
|
|
2005-03-09 12:59:20 +01:00
|
|
|
if (count || stats)
|
|
|
|
{
|
|
|
|
if (strcmp(buf, buf2))
|
2001-04-11 13:04:03 +02:00
|
|
|
{
|
2005-03-09 12:59:20 +01:00
|
|
|
if (*buf2)
|
2001-04-11 13:04:03 +02:00
|
|
|
{
|
2005-03-09 12:59:20 +01:00
|
|
|
uniq++;
|
|
|
|
avg_gws+=gws=GWS_IN_USE;
|
|
|
|
if (count)
|
|
|
|
printf("%9u %20.7f %s\n",doc_cnt,gws,buf2);
|
|
|
|
if (maxlen<keylen2)
|
2001-04-11 13:04:03 +02:00
|
|
|
{
|
2005-03-09 12:59:20 +01:00
|
|
|
maxlen=keylen2;
|
|
|
|
strmov(buf_maxlen, buf2);
|
|
|
|
}
|
|
|
|
if (max_doc_cnt < doc_cnt)
|
|
|
|
{
|
|
|
|
max_doc_cnt=doc_cnt;
|
|
|
|
strmov(buf_min_gws, buf2);
|
|
|
|
min_gws=gws;
|
2001-04-11 13:04:03 +02:00
|
|
|
}
|
|
|
|
}
|
2005-03-09 12:59:20 +01:00
|
|
|
strmov(buf2, buf);
|
|
|
|
keylen2=keylen;
|
|
|
|
doc_cnt=0;
|
2001-04-11 13:04:03 +02:00
|
|
|
}
|
2009-11-30 15:36:06 +02:00
|
|
|
doc_cnt+= (subkeys.i >= 0 ? 1 : -subkeys.i);
|
2001-04-11 13:04:03 +02:00
|
|
|
}
|
2005-03-09 12:59:20 +01:00
|
|
|
if (dump)
|
|
|
|
{
|
2009-11-30 15:36:06 +02:00
|
|
|
if (subkeys.i >= 0)
|
2005-03-09 12:59:20 +01:00
|
|
|
printf("%9lx %20.7f %s\n", (long) info->lastpos,weight,buf);
|
|
|
|
else
|
2009-11-30 15:36:06 +02:00
|
|
|
printf("%9lx => %17d %s\n",(long) info->lastpos,-subkeys.i,buf);
|
2005-03-09 12:59:20 +01:00
|
|
|
}
|
|
|
|
if (verbose && (total%HOW_OFTEN_TO_WRITE)==0)
|
|
|
|
printf("%10ld\r",total);
|
|
|
|
}
|
|
|
|
mi_lock_database(info, F_UNLCK);
|
2001-04-11 13:04:03 +02:00
|
|
|
|
2005-03-09 12:59:20 +01:00
|
|
|
if (count || stats)
|
|
|
|
{
|
|
|
|
if (*buf2)
|
2001-12-04 14:24:47 +01:00
|
|
|
{
|
2005-03-09 12:59:20 +01:00
|
|
|
uniq++;
|
|
|
|
avg_gws+=gws=GWS_IN_USE;
|
|
|
|
if (count)
|
|
|
|
printf("%9u %20.7f %s\n",doc_cnt,gws,buf2);
|
|
|
|
if (maxlen<keylen2)
|
2001-12-04 14:24:47 +01:00
|
|
|
{
|
2005-03-09 12:59:20 +01:00
|
|
|
maxlen=keylen2;
|
|
|
|
strmov(buf_maxlen, buf2);
|
2001-12-04 14:24:47 +01:00
|
|
|
}
|
2005-03-09 12:59:20 +01:00
|
|
|
if (max_doc_cnt < doc_cnt)
|
2001-12-04 14:24:47 +01:00
|
|
|
{
|
2005-03-09 12:59:20 +01:00
|
|
|
max_doc_cnt=doc_cnt;
|
|
|
|
strmov(buf_min_gws, buf2);
|
|
|
|
min_gws=gws;
|
2001-12-04 14:24:47 +01:00
|
|
|
}
|
|
|
|
}
|
2001-04-11 13:04:03 +02:00
|
|
|
}
|
|
|
|
|
2005-03-09 12:59:20 +01:00
|
|
|
if (stats)
|
|
|
|
{
|
|
|
|
count=0;
|
|
|
|
for (inx=0;inx<256;inx++)
|
|
|
|
{
|
|
|
|
count+=lengths[inx];
|
|
|
|
if ((ulong) count >= total/2)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
printf("Total rows: %lu\nTotal words: %lu\n"
|
|
|
|
"Unique words: %lu\nLongest word: %lu chars (%s)\n"
|
|
|
|
"Median length: %u\n"
|
|
|
|
"Average global weight: %f\n"
|
|
|
|
"Most common word: %lu times, weight: %f (%s)\n",
|
|
|
|
(long) info->state->records, total, uniq, maxlen, buf_maxlen,
|
|
|
|
inx, avg_gws/uniq, max_doc_cnt, min_gws, buf_min_gws);
|
|
|
|
}
|
|
|
|
if (lstats)
|
|
|
|
{
|
|
|
|
count=0;
|
|
|
|
for (inx=0; inx<256; inx++)
|
|
|
|
{
|
|
|
|
count+=lengths[inx];
|
|
|
|
if (count && lengths[inx])
|
|
|
|
printf("%3u: %10lu %5.2f%% %20lu %4.1f%%\n", inx,
|
|
|
|
(ulong) lengths[inx],100.0*lengths[inx]/total,(ulong) count,
|
|
|
|
100.0*count/total);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2001-04-11 13:04:03 +02:00
|
|
|
err:
|
|
|
|
if (error && error != HA_ERR_END_OF_FILE)
|
|
|
|
printf("got error %d\n",my_errno);
|
|
|
|
if (info)
|
|
|
|
mi_close(info);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2002-05-24 14:06:58 +03:00
|
|
|
|
|
|
|
static my_bool
|
|
|
|
get_one_option(int optid, const struct my_option *opt __attribute__((unused)),
|
|
|
|
char *argument __attribute__((unused)))
|
|
|
|
{
|
|
|
|
switch(optid) {
|
|
|
|
case 'd':
|
2003-01-21 19:24:34 +01:00
|
|
|
dump=1;
|
2002-05-24 14:06:58 +03:00
|
|
|
complain(count || query);
|
|
|
|
break;
|
2003-01-21 19:24:34 +01:00
|
|
|
case 's':
|
|
|
|
stats=1;
|
2002-05-24 14:06:58 +03:00
|
|
|
complain(query!=0);
|
|
|
|
break;
|
2003-01-21 19:24:34 +01:00
|
|
|
case 'c':
|
2002-05-24 14:06:58 +03:00
|
|
|
count= 1;
|
|
|
|
complain(dump || query);
|
|
|
|
break;
|
2003-01-21 19:24:34 +01:00
|
|
|
case 'l':
|
2002-05-24 14:06:58 +03:00
|
|
|
lstats=1;
|
|
|
|
complain(query!=0);
|
|
|
|
break;
|
|
|
|
case '?':
|
|
|
|
case 'h':
|
|
|
|
usage();
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
2001-04-11 13:04:03 +02:00
|
|
|
|
2002-06-12 23:54:52 +03:00
|
|
|
|
2002-05-24 14:06:58 +03:00
|
|
|
static void usage()
|
2001-04-11 13:04:03 +02:00
|
|
|
{
|
2004-03-16 11:42:35 -06:00
|
|
|
printf("Use: myisam_ftdump <table_name> <index_num>\n");
|
2002-05-24 14:06:58 +03:00
|
|
|
my_print_help(my_long_options);
|
|
|
|
my_print_variables(my_long_options);
|
2001-04-11 13:04:03 +02:00
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
|
2002-05-24 14:06:58 +03:00
|
|
|
|
2001-04-11 13:04:03 +02:00
|
|
|
static void complain(int val) /* Kinda assert :-) */
|
|
|
|
{
|
|
|
|
if (val)
|
|
|
|
{
|
|
|
|
printf("You cannot use these options together!\n");
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
}
|
2009-11-25 16:25:01 +04:00
|
|
|
|
|
|
|
#include "mi_extrafunc.h"
|