mirror of
https://github.com/MariaDB/server.git
synced 2025-01-22 06:44:16 +01:00
f2040f48c4
Code cleanup: removed duplicate stopword.
637 lines
9.2 KiB
C
637 lines
9.2 KiB
C
/* Copyright (C) 2000-2005 MySQL AB
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation; version 2 of the License.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; if not, write to the Free Software
|
|
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
|
|
|
|
/* Written by Sergei A. Golubchik, who has a shared copyright to this code */
|
|
|
|
#include "ftdefs.h"
|
|
|
|
ulong ft_min_word_len=4;
|
|
ulong ft_max_word_len=HA_FT_MAXCHARLEN;
|
|
ulong ft_query_expansion_limit=5;
|
|
char ft_boolean_syntax[]="+ -><()~*:\"\"&|";
|
|
|
|
const HA_KEYSEG ft_keysegs[FT_SEGS]={
|
|
{
|
|
0, /* charset */
|
|
HA_FT_WLEN, /* start */
|
|
0, /* null_pos */
|
|
0, /* Bit pos */
|
|
HA_VAR_LENGTH_PART | HA_PACK_KEY, /* flag */
|
|
HA_FT_MAXBYTELEN, /* length */
|
|
HA_KEYTYPE_VARTEXT2, /* type */
|
|
63, /* language (will be overwritten) */
|
|
0, /* null_bit */
|
|
2, 0, 0 /* bit_start, bit_end, bit_length */
|
|
},
|
|
{
|
|
/*
|
|
Note, this (and the last HA_KEYTYPE_END) segment should NOT
|
|
be packed in any way, otherwise w_search() won't be able to
|
|
update key entry 'in vivo'
|
|
*/
|
|
0, 0, 0, 0, HA_NO_SORT, HA_FT_WLEN, HA_FT_WTYPE, 63, 0, 0, 0, 0
|
|
}
|
|
};
|
|
|
|
const struct _ft_vft _ft_vft_nlq = {
|
|
ft_nlq_read_next, ft_nlq_find_relevance, ft_nlq_close_search,
|
|
ft_nlq_get_relevance, ft_nlq_reinit_search
|
|
};
|
|
const struct _ft_vft _ft_vft_boolean = {
|
|
ft_boolean_read_next, ft_boolean_find_relevance, ft_boolean_close_search,
|
|
ft_boolean_get_relevance, ft_boolean_reinit_search
|
|
};
|
|
|
|
|
|
FT_INFO *ft_init_search(uint flags, void *info, uint keynr,
|
|
uchar *query, uint query_len, CHARSET_INFO *cs,
|
|
uchar *record)
|
|
{
|
|
FT_INFO *res;
|
|
if (flags & FT_BOOL)
|
|
res= ft_init_boolean_search((MI_INFO *)info, keynr, query, query_len,cs);
|
|
else
|
|
res= ft_init_nlq_search((MI_INFO *)info, keynr, query, query_len, flags,
|
|
record);
|
|
return res;
|
|
}
|
|
|
|
const char *ft_stopword_file = 0;
|
|
const char *ft_precompiled_stopwords[] = {
|
|
|
|
#ifdef COMPILE_STOPWORDS_IN
|
|
|
|
/* This particular stopword list was taken from SMART distribution
|
|
ftp://ftp.cs.cornell.edu/pub/smart/smart.11.0.tar.Z
|
|
it was slightly modified to my taste, though
|
|
*/
|
|
|
|
"a's",
|
|
"able",
|
|
"about",
|
|
"above",
|
|
"according",
|
|
"accordingly",
|
|
"across",
|
|
"actually",
|
|
"after",
|
|
"afterwards",
|
|
"again",
|
|
"against",
|
|
"ain't",
|
|
"all",
|
|
"allow",
|
|
"allows",
|
|
"almost",
|
|
"alone",
|
|
"along",
|
|
"already",
|
|
"also",
|
|
"although",
|
|
"always",
|
|
"am",
|
|
"among",
|
|
"amongst",
|
|
"an",
|
|
"and",
|
|
"another",
|
|
"any",
|
|
"anybody",
|
|
"anyhow",
|
|
"anyone",
|
|
"anything",
|
|
"anyway",
|
|
"anyways",
|
|
"anywhere",
|
|
"apart",
|
|
"appear",
|
|
"appreciate",
|
|
"appropriate",
|
|
"are",
|
|
"aren't",
|
|
"around",
|
|
"as",
|
|
"aside",
|
|
"ask",
|
|
"asking",
|
|
"associated",
|
|
"at",
|
|
"available",
|
|
"away",
|
|
"awfully",
|
|
"be",
|
|
"became",
|
|
"because",
|
|
"become",
|
|
"becomes",
|
|
"becoming",
|
|
"been",
|
|
"before",
|
|
"beforehand",
|
|
"behind",
|
|
"being",
|
|
"believe",
|
|
"below",
|
|
"beside",
|
|
"besides",
|
|
"best",
|
|
"better",
|
|
"between",
|
|
"beyond",
|
|
"both",
|
|
"brief",
|
|
"but",
|
|
"by",
|
|
"c'mon",
|
|
"c's",
|
|
"came",
|
|
"can",
|
|
"can't",
|
|
"cannot",
|
|
"cant",
|
|
"cause",
|
|
"causes",
|
|
"certain",
|
|
"certainly",
|
|
"changes",
|
|
"clearly",
|
|
"co",
|
|
"com",
|
|
"come",
|
|
"comes",
|
|
"concerning",
|
|
"consequently",
|
|
"consider",
|
|
"considering",
|
|
"contain",
|
|
"containing",
|
|
"contains",
|
|
"corresponding",
|
|
"could",
|
|
"couldn't",
|
|
"course",
|
|
"currently",
|
|
"definitely",
|
|
"described",
|
|
"despite",
|
|
"did",
|
|
"didn't",
|
|
"different",
|
|
"do",
|
|
"does",
|
|
"doesn't",
|
|
"doing",
|
|
"don't",
|
|
"done",
|
|
"down",
|
|
"downwards",
|
|
"during",
|
|
"each",
|
|
"edu",
|
|
"eg",
|
|
"eight",
|
|
"either",
|
|
"else",
|
|
"elsewhere",
|
|
"enough",
|
|
"entirely",
|
|
"especially",
|
|
"et",
|
|
"etc",
|
|
"even",
|
|
"ever",
|
|
"every",
|
|
"everybody",
|
|
"everyone",
|
|
"everything",
|
|
"everywhere",
|
|
"ex",
|
|
"exactly",
|
|
"example",
|
|
"except",
|
|
"far",
|
|
"few",
|
|
"fifth",
|
|
"first",
|
|
"five",
|
|
"followed",
|
|
"following",
|
|
"follows",
|
|
"for",
|
|
"former",
|
|
"formerly",
|
|
"forth",
|
|
"four",
|
|
"from",
|
|
"further",
|
|
"furthermore",
|
|
"get",
|
|
"gets",
|
|
"getting",
|
|
"given",
|
|
"gives",
|
|
"go",
|
|
"goes",
|
|
"going",
|
|
"gone",
|
|
"got",
|
|
"gotten",
|
|
"greetings",
|
|
"had",
|
|
"hadn't",
|
|
"happens",
|
|
"hardly",
|
|
"has",
|
|
"hasn't",
|
|
"have",
|
|
"haven't",
|
|
"having",
|
|
"he",
|
|
"he's",
|
|
"hello",
|
|
"help",
|
|
"hence",
|
|
"her",
|
|
"here",
|
|
"here's",
|
|
"hereafter",
|
|
"hereby",
|
|
"herein",
|
|
"hereupon",
|
|
"hers",
|
|
"herself",
|
|
"hi",
|
|
"him",
|
|
"himself",
|
|
"his",
|
|
"hither",
|
|
"hopefully",
|
|
"how",
|
|
"howbeit",
|
|
"however",
|
|
"i'd",
|
|
"i'll",
|
|
"i'm",
|
|
"i've",
|
|
"ie",
|
|
"if",
|
|
"ignored",
|
|
"immediate",
|
|
"in",
|
|
"inasmuch",
|
|
"inc",
|
|
"indeed",
|
|
"indicate",
|
|
"indicated",
|
|
"indicates",
|
|
"inner",
|
|
"insofar",
|
|
"instead",
|
|
"into",
|
|
"inward",
|
|
"is",
|
|
"isn't",
|
|
"it",
|
|
"it'd",
|
|
"it'll",
|
|
"it's",
|
|
"its",
|
|
"itself",
|
|
"just",
|
|
"keep",
|
|
"keeps",
|
|
"kept",
|
|
"know",
|
|
"knows",
|
|
"known",
|
|
"last",
|
|
"lately",
|
|
"later",
|
|
"latter",
|
|
"latterly",
|
|
"least",
|
|
"less",
|
|
"lest",
|
|
"let",
|
|
"let's",
|
|
"like",
|
|
"liked",
|
|
"likely",
|
|
"little",
|
|
"look",
|
|
"looking",
|
|
"looks",
|
|
"ltd",
|
|
"mainly",
|
|
"many",
|
|
"may",
|
|
"maybe",
|
|
"me",
|
|
"mean",
|
|
"meanwhile",
|
|
"merely",
|
|
"might",
|
|
"more",
|
|
"moreover",
|
|
"most",
|
|
"mostly",
|
|
"much",
|
|
"must",
|
|
"my",
|
|
"myself",
|
|
"name",
|
|
"namely",
|
|
"nd",
|
|
"near",
|
|
"nearly",
|
|
"necessary",
|
|
"need",
|
|
"needs",
|
|
"neither",
|
|
"never",
|
|
"nevertheless",
|
|
"new",
|
|
"next",
|
|
"nine",
|
|
"no",
|
|
"nobody",
|
|
"non",
|
|
"none",
|
|
"noone",
|
|
"nor",
|
|
"normally",
|
|
"not",
|
|
"nothing",
|
|
"novel",
|
|
"now",
|
|
"nowhere",
|
|
"obviously",
|
|
"of",
|
|
"off",
|
|
"often",
|
|
"oh",
|
|
"ok",
|
|
"okay",
|
|
"old",
|
|
"on",
|
|
"once",
|
|
"one",
|
|
"ones",
|
|
"only",
|
|
"onto",
|
|
"or",
|
|
"other",
|
|
"others",
|
|
"otherwise",
|
|
"ought",
|
|
"our",
|
|
"ours",
|
|
"ourselves",
|
|
"out",
|
|
"outside",
|
|
"over",
|
|
"overall",
|
|
"own",
|
|
"particular",
|
|
"particularly",
|
|
"per",
|
|
"perhaps",
|
|
"placed",
|
|
"please",
|
|
"plus",
|
|
"possible",
|
|
"presumably",
|
|
"probably",
|
|
"provides",
|
|
"que",
|
|
"quite",
|
|
"qv",
|
|
"rather",
|
|
"rd",
|
|
"re",
|
|
"really",
|
|
"reasonably",
|
|
"regarding",
|
|
"regardless",
|
|
"regards",
|
|
"relatively",
|
|
"respectively",
|
|
"right",
|
|
"said",
|
|
"same",
|
|
"saw",
|
|
"say",
|
|
"saying",
|
|
"says",
|
|
"second",
|
|
"secondly",
|
|
"see",
|
|
"seeing",
|
|
"seem",
|
|
"seemed",
|
|
"seeming",
|
|
"seems",
|
|
"seen",
|
|
"self",
|
|
"selves",
|
|
"sensible",
|
|
"sent",
|
|
"serious",
|
|
"seriously",
|
|
"seven",
|
|
"several",
|
|
"shall",
|
|
"she",
|
|
"should",
|
|
"shouldn't",
|
|
"since",
|
|
"six",
|
|
"so",
|
|
"some",
|
|
"somebody",
|
|
"somehow",
|
|
"someone",
|
|
"something",
|
|
"sometime",
|
|
"sometimes",
|
|
"somewhat",
|
|
"somewhere",
|
|
"soon",
|
|
"sorry",
|
|
"specified",
|
|
"specify",
|
|
"specifying",
|
|
"still",
|
|
"sub",
|
|
"such",
|
|
"sup",
|
|
"sure",
|
|
"t's",
|
|
"take",
|
|
"taken",
|
|
"tell",
|
|
"tends",
|
|
"th",
|
|
"than",
|
|
"thank",
|
|
"thanks",
|
|
"thanx",
|
|
"that",
|
|
"that's",
|
|
"thats",
|
|
"the",
|
|
"their",
|
|
"theirs",
|
|
"them",
|
|
"themselves",
|
|
"then",
|
|
"thence",
|
|
"there",
|
|
"there's",
|
|
"thereafter",
|
|
"thereby",
|
|
"therefore",
|
|
"therein",
|
|
"theres",
|
|
"thereupon",
|
|
"these",
|
|
"they",
|
|
"they'd",
|
|
"they'll",
|
|
"they're",
|
|
"they've",
|
|
"think",
|
|
"third",
|
|
"this",
|
|
"thorough",
|
|
"thoroughly",
|
|
"those",
|
|
"though",
|
|
"three",
|
|
"through",
|
|
"throughout",
|
|
"thru",
|
|
"thus",
|
|
"to",
|
|
"together",
|
|
"too",
|
|
"took",
|
|
"toward",
|
|
"towards",
|
|
"tried",
|
|
"tries",
|
|
"truly",
|
|
"try",
|
|
"trying",
|
|
"twice",
|
|
"two",
|
|
"un",
|
|
"under",
|
|
"unfortunately",
|
|
"unless",
|
|
"unlikely",
|
|
"until",
|
|
"unto",
|
|
"up",
|
|
"upon",
|
|
"us",
|
|
"use",
|
|
"used",
|
|
"useful",
|
|
"uses",
|
|
"using",
|
|
"usually",
|
|
"value",
|
|
"various",
|
|
"very",
|
|
"via",
|
|
"viz",
|
|
"vs",
|
|
"want",
|
|
"wants",
|
|
"was",
|
|
"wasn't",
|
|
"way",
|
|
"we",
|
|
"we'd",
|
|
"we'll",
|
|
"we're",
|
|
"we've",
|
|
"welcome",
|
|
"well",
|
|
"went",
|
|
"were",
|
|
"weren't",
|
|
"what",
|
|
"what's",
|
|
"whatever",
|
|
"when",
|
|
"whence",
|
|
"whenever",
|
|
"where",
|
|
"where's",
|
|
"whereafter",
|
|
"whereas",
|
|
"whereby",
|
|
"wherein",
|
|
"whereupon",
|
|
"wherever",
|
|
"whether",
|
|
"which",
|
|
"while",
|
|
"whither",
|
|
"who",
|
|
"who's",
|
|
"whoever",
|
|
"whole",
|
|
"whom",
|
|
"whose",
|
|
"why",
|
|
"will",
|
|
"willing",
|
|
"wish",
|
|
"with",
|
|
"within",
|
|
"without",
|
|
"won't",
|
|
"wonder",
|
|
"would",
|
|
"wouldn't",
|
|
"yes",
|
|
"yet",
|
|
"you",
|
|
"you'd",
|
|
"you'll",
|
|
"you're",
|
|
"you've",
|
|
"your",
|
|
"yours",
|
|
"yourself",
|
|
"yourselves",
|
|
"zero",
|
|
#endif
|
|
|
|
NULL };
|
|
|
|
static int ft_default_parser_parse(MYSQL_FTPARSER_PARAM *param)
|
|
{
|
|
return param->mysql_parse(param, param->doc, param->length);
|
|
}
|
|
|
|
struct st_mysql_ftparser ft_default_parser=
|
|
{
|
|
MYSQL_FTPARSER_INTERFACE_VERSION, ft_default_parser_parse, 0, 0
|
|
};
|
|
|