mirror of
https://github.com/MariaDB/server.git
synced 2025-01-29 02:05:57 +01:00
--ft_stopword_file command-line option
This commit is contained in:
parent
30a35bcfe4
commit
fc09f13c5d
5 changed files with 77 additions and 23 deletions
|
@ -46,6 +46,7 @@ struct st_ft_info
|
|||
};
|
||||
#endif
|
||||
|
||||
extern const char *ft_stopword_file;
|
||||
extern const char *ft_precompiled_stopwords[];
|
||||
|
||||
extern ulong ft_min_word_len;
|
||||
|
@ -53,7 +54,7 @@ extern ulong ft_max_word_len;
|
|||
extern ulong ft_max_word_len_for_sort;
|
||||
extern const char *ft_boolean_syntax;
|
||||
|
||||
int ft_init_stopwords(const char **);
|
||||
int ft_init_stopwords(void);
|
||||
void ft_free_stopwords(void);
|
||||
|
||||
#define FT_NL 0
|
||||
|
|
|
@ -67,6 +67,7 @@ FT_INFO *ft_init_search(uint mode, void *info, uint keynr,
|
|||
query, query_len, presort);
|
||||
}
|
||||
|
||||
const char *ft_stopword_file = 0;
|
||||
const char *ft_precompiled_stopwords[] = {
|
||||
|
||||
#ifdef COMPILE_STOPWORDS_IN
|
||||
|
|
|
@ -33,27 +33,74 @@ static int FT_STOPWORD_cmp(void* cmp_arg __attribute__((unused)),
|
|||
(uchar *)w2->pos,w2->len,0);
|
||||
}
|
||||
|
||||
int ft_init_stopwords(const char **sws)
|
||||
static void FT_STOPWORD_free(FT_STOPWORD *w, TREE_FREE action,
|
||||
void *arg __attribute__((unused)))
|
||||
{
|
||||
if (action == free_free && ft_stopword_file)
|
||||
my_free(w->pos, MYF(0));
|
||||
}
|
||||
|
||||
static int ft_add_stopword(const char *w)
|
||||
{
|
||||
FT_STOPWORD sw;
|
||||
return !w ||
|
||||
(((sw.len= (uint) strlen(sw.pos=w)) >= ft_min_word_len) &&
|
||||
(tree_insert(stopwords3, &sw, 0)==NULL));
|
||||
}
|
||||
|
||||
|
||||
if(!stopwords3)
|
||||
int ft_init_stopwords()
|
||||
{
|
||||
if (!stopwords3)
|
||||
{
|
||||
if(!(stopwords3=(TREE *)my_malloc(sizeof(TREE),MYF(0)))) return -1;
|
||||
init_tree(stopwords3,0,0,sizeof(FT_STOPWORD),(qsort_cmp2)&FT_STOPWORD_cmp,0,
|
||||
NULL, NULL);
|
||||
if (!(stopwords3=(TREE *)my_malloc(sizeof(TREE),MYF(0))))
|
||||
return -1;
|
||||
init_tree(stopwords3,0,0,sizeof(FT_STOPWORD),(qsort_cmp2)&FT_STOPWORD_cmp,
|
||||
0, (tree_element_free)&FT_STOPWORD_free, NULL);
|
||||
}
|
||||
|
||||
if(!sws) return 0;
|
||||
|
||||
for(;*sws;sws++)
|
||||
if (ft_stopword_file)
|
||||
{
|
||||
if( (sw.len= (uint) strlen(sw.pos=*sws)) < ft_min_word_len) continue;
|
||||
if(!tree_insert(stopwords3, &sw, 0))
|
||||
File fd;
|
||||
my_off_t len;
|
||||
byte *buffer, *start, *end;
|
||||
FT_WORD w;
|
||||
int err=-1;
|
||||
|
||||
if (!*ft_stopword_file)
|
||||
return 0;
|
||||
|
||||
if ((fd=my_open(ft_stopword_file, O_RDONLY, MYF(MY_WME))) == -1)
|
||||
return -1;
|
||||
len=my_seek(fd, 0L, MY_SEEK_END, MYF(0));
|
||||
my_seek(fd, 0L, MY_SEEK_SET, MYF(0));
|
||||
if (!(start=buffer=my_malloc(len+1, MYF(MY_WME))))
|
||||
{
|
||||
delete_tree(stopwords3); /* purecov: inspected */
|
||||
return -1; /* purecov: inspected */
|
||||
my_close(fd, MYF(MY_WME));
|
||||
return -1;
|
||||
}
|
||||
len=my_read(fd, buffer, len, MYF(MY_WME));
|
||||
end=start+len;
|
||||
while (ft_simple_get_word(&start, end, &w))
|
||||
{
|
||||
if (ft_add_stopword(my_strdup_with_length(w.pos, w.len, MYF(0))))
|
||||
goto err1;
|
||||
}
|
||||
err=0;
|
||||
err1:
|
||||
my_free(buffer, MYF(0));
|
||||
err0:
|
||||
my_close(fd, MYF(MY_WME));
|
||||
return err;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* compatibility mode: to be removed */
|
||||
char **sws=ft_precompiled_stopwords;
|
||||
|
||||
for (;*sws;sws++)
|
||||
{
|
||||
if (ft_add_stopword(*sws))
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
|
@ -72,7 +119,7 @@ void ft_free_stopwords()
|
|||
{
|
||||
if (stopwords3)
|
||||
{
|
||||
delete_tree(stopwords3); /* purecov: inspected */
|
||||
delete_tree(stopwords3); /* purecov: inspected */
|
||||
my_free((char*) stopwords3,MYF(0));
|
||||
stopwords3=0;
|
||||
}
|
||||
|
|
|
@ -860,7 +860,7 @@ static int myisamchk(MI_CHECK *param, my_string filename)
|
|||
else
|
||||
{
|
||||
if (share->fulltext_index)
|
||||
ft_init_stopwords(ft_precompiled_stopwords); /* SerG */
|
||||
ft_init_stopwords();
|
||||
|
||||
if (!(param->testflag & T_READONLY))
|
||||
lock_type = F_WRLCK; /* table is changed */
|
||||
|
|
|
@ -2072,8 +2072,8 @@ int main(int argc, char **argv)
|
|||
#endif
|
||||
|
||||
if (opt_myisam_log)
|
||||
(void) mi_log( 1 );
|
||||
ft_init_stopwords(ft_precompiled_stopwords);
|
||||
(void) mi_log(1);
|
||||
ft_init_stopwords();
|
||||
|
||||
#ifdef __WIN__
|
||||
if (!opt_console)
|
||||
|
@ -2929,7 +2929,7 @@ enum options {
|
|||
OPT_CONNECT_TIMEOUT, OPT_DELAYED_INSERT_TIMEOUT,
|
||||
OPT_DELAYED_INSERT_LIMIT, OPT_DELAYED_QUEUE_SIZE,
|
||||
OPT_FLUSH_TIME, OPT_FT_MIN_WORD_LEN,
|
||||
OPT_FT_MAX_WORD_LEN, OPT_FT_MAX_WORD_LEN_FOR_SORT,
|
||||
OPT_FT_MAX_WORD_LEN, OPT_FT_MAX_WORD_LEN_FOR_SORT, OPT_FT_STOPWORD_FILE,
|
||||
OPT_INTERACTIVE_TIMEOUT, OPT_JOIN_BUFF_SIZE,
|
||||
OPT_KEY_BUFFER_SIZE, OPT_LONG_QUERY_TIME,
|
||||
OPT_LOWER_CASE_TABLE_NAMES, OPT_MAX_ALLOWED_PACKET,
|
||||
|
@ -3415,7 +3415,8 @@ struct my_option my_long_options[] =
|
|||
(gptr*) &max_system_variables.log_warnings, 0, GET_BOOL, NO_ARG, 0, 0, 0,
|
||||
0, 0, 0},
|
||||
{ "back_log", OPT_BACK_LOG,
|
||||
"The number of outstanding connection requests MySQL can have. This comes into play when the main MySQL thread gets very many connection requests in a very short time.", (gptr*) &back_log, (gptr*) &back_log, 0, GET_ULONG,
|
||||
"The number of outstanding connection requests MySQL can have. This comes into play when the main MySQL thread gets very many connection requests in a very short time.",
|
||||
(gptr*) &back_log, (gptr*) &back_log, 0, GET_ULONG,
|
||||
REQUIRED_ARG, 50, 1, 65535, 0, 1, 0 },
|
||||
#ifdef HAVE_BERKELEY_DB
|
||||
{ "bdb_cache_size", OPT_BDB_CACHE_SIZE,
|
||||
|
@ -3468,9 +3469,13 @@ struct my_option my_long_options[] =
|
|||
(gptr*) &ft_max_word_len, (gptr*) &ft_max_word_len, 0, GET_ULONG,
|
||||
REQUIRED_ARG, HA_FT_MAXLEN, 10, HA_FT_MAXLEN, 0, 1, 0},
|
||||
{ "ft_max_word_len_for_sort", OPT_FT_MAX_WORD_LEN_FOR_SORT,
|
||||
"Undocumented", (gptr*) &ft_max_word_len_for_sort,
|
||||
(gptr*) &ft_max_word_len_for_sort, 0, GET_ULONG, REQUIRED_ARG, 20, 4,
|
||||
HA_FT_MAXLEN, 0, 1, 0},
|
||||
"The maximum length of the word for repair_by_sorting. Longer words are included the slow way. The lower this value, the more words will be put in one sort bucket.",
|
||||
(gptr*) &ft_max_word_len_for_sort, (gptr*) &ft_max_word_len_for_sort, 0, GET_ULONG,
|
||||
REQUIRED_ARG, 20, 4, HA_FT_MAXLEN, 0, 1, 0},
|
||||
{ "ft_stopword_file", OPT_FT_STOPWORD_FILE,
|
||||
"Use stopwords from this file instead of built-in list.",
|
||||
(gptr*) &ft_stopword_file, (gptr*) &ft_stopword_file, 0, GET_STR,
|
||||
REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
|
||||
#ifdef HAVE_INNOBASE_DB
|
||||
{"innodb_mirrored_log_groups", OPT_INNODB_MIRRORED_LOG_GROUPS,
|
||||
"Number of identical copies of log groups we keep for the database. Currently this should be set to 1.",
|
||||
|
|
Loading…
Add table
Reference in a new issue