mirror of
https://github.com/MariaDB/server.git
synced 2025-07-25 04:34:58 +02:00

Added option 'aria-pagecache-segments', default 1. For values > 1, this split the aria-pagecache-buffer into the given number of segments, each independent from each other. Having multiple pagecaches improve performance when multiple connections runs queries concurrently using different tables. Each pagecache will use aria-pageache-buffer/segments amount of memory, however at least 128K. Each opened table has its index and data file use the segments in a a round-robin fashion. Internal changes: - All programs allocating the maria pagecache themselves should now call multi_init_pagecache() instead of init_pagecache(). - pagecache statistics is now stored in 'pagecache_stats' instead of maria_pagecache. One must call multi_update_pagecache_stats() to update the statistics. - Added into PAGECACHE_FILE a pointer to files pagecache. This was done to ensure that index and data file are using the same pagecache and simplified the checkpoint code. I kept pagecache in TABLE_SHARE to minimize the changes. - really_execute_checkpoint() was update to handle a dynamic number of pagecaches. - pagecache_collect_changed_blocks_with_lsn() was slight changed to allow it to be called for each pagecache. - undefined not used functions maria_assign_pagecache() and maria_change_pagecache() - ma_pagecaches.c is totally rewritten. It now contains all multi_pagecache functions. Errors found be QA that are fixed: MDEV-36872 UBSAN errors in ma_checkpoint.c MDEV-36874 Behavior upon too small aria_pagecache_buffer_size in case of multiple segments is not very user-friendly MDEV-36914 ma_checkpoint.c(285,9): conversion from '__int64' to 'uint' treated as an error MDEV-36912 sys_vars.sysvars_server_embedded and sys_vars.sysvars_server_notembedded fail on x86
282 lines
7.1 KiB
C
282 lines
7.1 KiB
C
/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation; version 2 of the License.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; if not, write to the Free Software
|
|
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */
|
|
|
|
/* Written by Sergei A. Golubchik, who has a shared copyright to this code
|
|
added support for long options (my_getopt) 22.5.2002 by Jani Tolonen */
|
|
|
|
#include "ma_ftdefs.h"
|
|
#include <my_getopt.h>
|
|
|
|
static void usage();
|
|
static void complain(int val);
|
|
static my_bool get_one_option(const struct my_option *, const char *,
|
|
const char*);
|
|
|
|
static int count=0, stats=0, dump=0, lstats=0;
|
|
static my_bool verbose;
|
|
static char *query=NULL;
|
|
static uint lengths[256];
|
|
|
|
#define MAX_LEN (HA_FT_MAXBYTELEN+10)
|
|
#define HOW_OFTEN_TO_WRITE 10000
|
|
|
|
static struct my_option my_long_options[] =
|
|
{
|
|
{"help", 'h', "Display help and exit.",
|
|
0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
|
|
{"help", '?', "Synonym for -h.",
|
|
0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
|
|
{"count", 'c', "Calculate per-word stats (counts and global weights).",
|
|
0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
|
|
{"dump", 'd', "Dump index (incl. data offsets and word weights).",
|
|
0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
|
|
{"length", 'l', "Report length distribution.",
|
|
0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
|
|
{"stats", 's', "Report global stats.",
|
|
0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
|
|
{"verbose", 'v', "Be verbose.",
|
|
&verbose, &verbose, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0},
|
|
{ 0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}
|
|
};
|
|
|
|
|
|
int main(int argc,char *argv[])
|
|
{
|
|
int error=0;
|
|
uint keylen, keylen2=0, inx, doc_cnt=0;
|
|
float weight= 1.0;
|
|
double gws, min_gws=0, avg_gws=0;
|
|
MARIA_HA *info;
|
|
char buf[MAX_LEN], buf2[MAX_LEN], buf_maxlen[MAX_LEN], buf_min_gws[MAX_LEN];
|
|
ulong total=0, maxlen=0, uniq=0, max_doc_cnt=0;
|
|
struct { MARIA_HA *info; } aio0, *aio=&aio0; /* for GWS_IN_USE */
|
|
|
|
MY_INIT(argv[0]);
|
|
if ((error= handle_options(&argc, &argv, my_long_options, get_one_option)))
|
|
exit(error);
|
|
maria_init();
|
|
if (count || dump)
|
|
verbose=0;
|
|
if (!count && !dump && !lstats && !query)
|
|
stats=1;
|
|
|
|
if (verbose)
|
|
setbuf(stdout,NULL);
|
|
|
|
if (argc < 2)
|
|
usage();
|
|
|
|
{
|
|
char *end;
|
|
inx= (uint) strtoll(argv[1], &end, 10);
|
|
if (*end)
|
|
usage();
|
|
}
|
|
|
|
multi_init_pagecache(&maria_pagecaches, 1,
|
|
PAGE_BUFFER_INIT, 0, 0,
|
|
MARIA_KEY_BLOCK_LENGTH, 0, MY_WME);
|
|
|
|
if (!(info=maria_open(argv[0], O_RDONLY,
|
|
HA_OPEN_ABORT_IF_LOCKED|HA_OPEN_FROM_SQL_LAYER, 0)))
|
|
{
|
|
error=my_errno;
|
|
goto err;
|
|
}
|
|
|
|
*buf2=0;
|
|
aio->info=info;
|
|
|
|
if ((inx >= info->s->base.keys) ||
|
|
info->s->keyinfo[inx].key_alg != HA_KEY_ALG_FULLTEXT)
|
|
{
|
|
printf("Key %d in table %s is not a FULLTEXT key\n", inx,
|
|
info->s->open_file_name.str);
|
|
goto err;
|
|
}
|
|
|
|
maria_lock_database(info, F_EXTRA_LCK);
|
|
|
|
info->cur_row.lastpos= HA_OFFSET_ERROR;
|
|
info->update|= HA_STATE_PREV_FOUND;
|
|
|
|
while (!(error=maria_rnext(info,NULL,inx)))
|
|
{
|
|
FT_WEIGTH subkeys;
|
|
keylen=*(info->lastkey_buff);
|
|
|
|
subkeys.i= ft_sintXkorr(info->lastkey_buff + keylen + 1);
|
|
if (subkeys.i >= 0)
|
|
weight= subkeys.f;
|
|
|
|
keylen= (uint) my_ci_casedn(default_charset_info, buf, sizeof(buf) - 1,
|
|
(char *) info->lastkey_buff + 1, keylen);
|
|
buf[keylen]= '\0';
|
|
total++;
|
|
lengths[keylen]++;
|
|
|
|
if (count || stats)
|
|
{
|
|
if (strcmp(buf, buf2))
|
|
{
|
|
if (*buf2)
|
|
{
|
|
uniq++;
|
|
avg_gws+=gws=GWS_IN_USE;
|
|
if (count)
|
|
printf("%9u %20.7f %s\n",doc_cnt,gws,buf2);
|
|
if (maxlen<keylen2)
|
|
{
|
|
maxlen=keylen2;
|
|
strmov(buf_maxlen, buf2);
|
|
}
|
|
if (max_doc_cnt < doc_cnt)
|
|
{
|
|
max_doc_cnt=doc_cnt;
|
|
strmov(buf_min_gws, buf2);
|
|
min_gws=gws;
|
|
}
|
|
}
|
|
strmov(buf2, buf);
|
|
keylen2=keylen;
|
|
doc_cnt=0;
|
|
}
|
|
doc_cnt+= (subkeys.i >= 0 ? 1 : -subkeys.i);
|
|
}
|
|
if (dump)
|
|
{
|
|
if (subkeys.i >= 0)
|
|
printf("%9lx %20.7f %s\n", (long) info->cur_row.lastpos,weight,buf);
|
|
else
|
|
printf("%9lx => %17d %s\n",(long) info->cur_row.lastpos,-subkeys.i,
|
|
buf);
|
|
}
|
|
if (verbose && (total%HOW_OFTEN_TO_WRITE)==0)
|
|
printf("%10ld\r",total);
|
|
}
|
|
maria_lock_database(info, F_UNLCK);
|
|
|
|
if (count || stats)
|
|
{
|
|
if (*buf2)
|
|
{
|
|
uniq++;
|
|
avg_gws+=gws=GWS_IN_USE;
|
|
if (count)
|
|
printf("%9u %20.7f %s\n",doc_cnt,gws,buf2);
|
|
if (maxlen<keylen2)
|
|
{
|
|
maxlen=keylen2;
|
|
strmov(buf_maxlen, buf2);
|
|
}
|
|
if (max_doc_cnt < doc_cnt)
|
|
{
|
|
max_doc_cnt=doc_cnt;
|
|
strmov(buf_min_gws, buf2);
|
|
min_gws=gws;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (stats)
|
|
{
|
|
count=0;
|
|
for (inx=0;inx<256;inx++)
|
|
{
|
|
count+=lengths[inx];
|
|
if ((ulong) count >= total/2)
|
|
break;
|
|
}
|
|
printf("Total rows: %lu\nTotal words: %lu\n"
|
|
"Unique words: %lu\nLongest word: %lu chars (%s)\n"
|
|
"Median length: %u\n"
|
|
"Average global weight: %f\n"
|
|
"Most common word: %lu times, weight: %f (%s)\n",
|
|
(long) info->state->records, total, uniq, maxlen, buf_maxlen,
|
|
inx, avg_gws/uniq, max_doc_cnt, min_gws, buf_min_gws);
|
|
}
|
|
if (lstats)
|
|
{
|
|
count=0;
|
|
for (inx=0; inx<256; inx++)
|
|
{
|
|
count+=lengths[inx];
|
|
if (count && lengths[inx])
|
|
printf("%3u: %10lu %5.2f%% %20lu %4.1f%%\n", inx,
|
|
(ulong) lengths[inx],100.0*lengths[inx]/total,(ulong) count,
|
|
100.0*count/total);
|
|
}
|
|
}
|
|
|
|
err:
|
|
if (error && error != HA_ERR_END_OF_FILE)
|
|
printf("got error %d\n",my_errno);
|
|
if (info)
|
|
maria_close(info);
|
|
maria_end();
|
|
return 0;
|
|
}
|
|
|
|
|
|
static my_bool
|
|
get_one_option(const struct my_option *opt,
|
|
const char *argument __attribute__((unused)),
|
|
const char *filename __attribute__((unused)))
|
|
{
|
|
switch(opt->id) {
|
|
case 'd':
|
|
dump=1;
|
|
complain(count || query);
|
|
break;
|
|
case 's':
|
|
stats=1;
|
|
complain(query!=0);
|
|
break;
|
|
case 'c':
|
|
count= 1;
|
|
complain(dump || query);
|
|
break;
|
|
case 'l':
|
|
lstats=1;
|
|
complain(query!=0);
|
|
break;
|
|
case '?':
|
|
case 'h':
|
|
usage();
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
|
|
static void usage()
|
|
{
|
|
printf("Use: aria_ft_dump <table_name> <index_num>\n");
|
|
my_print_help(my_long_options);
|
|
my_print_variables(my_long_options);
|
|
exit(1);
|
|
}
|
|
|
|
|
|
static void complain(int val) /* Kinda assert :-) */
|
|
{
|
|
if (val)
|
|
{
|
|
printf("You cannot use these options together!\n");
|
|
exit(1);
|
|
}
|
|
}
|
|
|
|
#include "ma_check_standalone.h"
|
|
|