From 736e5b0de2281ab75af2cf3d88480aa6db9a18d2 Mon Sep 17 00:00:00 2001 From: unknown Date: Tue, 9 Oct 2001 14:53:54 +0200 Subject: [PATCH] Boolean search passes _some_ tests sql/ha_myisam.cc: One more abstraction layer added (C++ emulated in C :). include/ft_global.h: boolean search code plugged in mysql-test/r/fulltext_cache.result: boolean search code plugged in mysql-test/r/fulltext_left_join.result: boolean search code plugged in mysql-test/r/fulltext_multi.result: boolean search code plugged in mysql-test/r/fulltext_order_by.result: boolean search code plugged in sql/lex.h: IN BOOLEAN MODE syntax myisam/ft_nlq_search.c: boolean search code plugged in myisam/ftdefs.h: boolean search code plugged in sql/ha_myisam.h: boolean search code plugged in sql/handler.h: boolean search code plugged in include/my_base.h: do_not_sort_keyseg feature for MyISAM include/my_global.h: #define comp(a,b) (((a) < (b)) ? -1 : ((a) > (b)) ? 1 : 0) myisam/ft_boolean_search.c: bugfixing myisam/ft_parser.c: cleanup myisam/ft_static.c: do_not_sort_keyseg feature for MyISAM myisam/mi_search.c: do_not_sort_keyseg feature for MyISAM myisam/mi_write.c: cleanup mysql-test/t/fulltext.test: boolean search tests added BitKeeper/etc/ignore: Added myisam/FT1.MYD myisam/FT1.MYI to the ignore list sql/item_func.cc: boolean search sql/item_func.h: boolean search sql/sql_yacc.yy: boolean search --- .bzrignore | 2 + include/ft_global.h | 33 ++-- include/my_base.h | 1 + include/my_global.h | 3 +- myisam/ft_boolean_search.c | 240 ++++++++++++++----------- myisam/ft_nlq_search.c | 85 +++++++-- myisam/ft_parser.c | 2 +- myisam/ft_static.c | 21 ++- myisam/ftdefs.h | 23 ++- myisam/mi_search.c | 120 +++++++------ myisam/mi_write.c | 10 +- mysql-test/r/fulltext_cache.result | 4 +- mysql-test/r/fulltext_left_join.result | 2 +- mysql-test/r/fulltext_multi.result | 8 +- mysql-test/r/fulltext_order_by.result | 18 +- mysql-test/t/fulltext.test | 13 +- sql/ha_myisam.cc | 2 +- sql/ha_myisam.h | 6 +- sql/handler.h | 8 +- sql/item_func.cc | 109 ++++++----- sql/item_func.h | 21 +-- sql/lex.h | 1 + sql/sql_yacc.yy | 22 ++- 23 files changed, 466 insertions(+), 288 deletions(-) diff --git a/.bzrignore b/.bzrignore index bfb1710d33d..492a91ee534 100644 --- a/.bzrignore +++ b/.bzrignore @@ -412,3 +412,5 @@ libmysqld/examples/sql_string.cc libmysqld/examples/sql_string.h libmysqld/examples/mysql libmysqld/examples/mysqltest +myisam/FT1.MYD +myisam/FT1.MYI diff --git a/include/ft_global.h b/include/ft_global.h index b6f33ec801b..8588684907d 100644 --- a/include/ft_global.h +++ b/include/ft_global.h @@ -29,17 +29,21 @@ extern "C" { #define FT_QUERY_MAXLEN 1024 #define HA_FT_MAXLEN 254 -typedef struct ft_doc_rec { - my_off_t dpos; - double weight; -} FT_DOC; +typedef struct st_ft_info FT_INFO; +struct _ft_vft { + int (*read_next)(FT_INFO *, char *); + float (*find_relevance)(FT_INFO *, my_off_t); + void (*close_search)(FT_INFO *); + float (*get_relevance)(FT_INFO *); + my_off_t (*get_docid)(FT_INFO *); + void (*reinit_search)(FT_INFO *); +}; -typedef struct st_ft_doclist { - int ndocs; - int curdoc; - void *info; /* actually (MI_INFO *) but don't want to include myisam.h */ - FT_DOC doc[1]; -} FT_DOCLIST; +#ifndef FT_CORE +struct st_ft_info { + struct _ft_vft *please; /* INTERCAL style :-) */ +}; +#endif extern const char *ft_precompiled_stopwords[]; @@ -50,12 +54,9 @@ extern uint ft_max_word_len_for_sort; int ft_init_stopwords(const char **); void ft_free_stopwords(void); -FT_DOCLIST * ft_nlq_init_search(void *, uint, byte *, uint, my_bool); -int ft_nlq_read_next(FT_DOCLIST *, char *); -#define ft_nlq_close_search(handler) my_free(((gptr)(handler)),MYF(0)) -#define ft_nlq_get_relevance(handler) (((FT_DOCLIST *)(handler))->doc[((FT_DOCLIST *)(handler))->curdoc].weight) -#define ft_nlq_get_docid(handler) (((FT_DOCLIST *)(handler))->doc[((FT_DOCLIST *)(handler))->curdoc].dpos) -#define ft_nlq_reinit_search(handler) (((FT_DOCLIST *)(handler))->curdoc=-1) +#define FT_NL 0 +#define FT_BOOL 1 +FT_INFO *ft_init_search(uint,void *, uint, byte *, uint, my_bool); #ifdef __cplusplus } diff --git a/include/my_base.h b/include/my_base.h index 800e623bc2c..4fdc2c33a39 100644 --- a/include/my_base.h +++ b/include/my_base.h @@ -159,6 +159,7 @@ enum ha_base_keytype { #define HA_BLOB_PART 32 #define HA_SWAP_KEY 64 #define HA_REVERSE_SORT 128 /* Sort key in reverse order */ +#define HA_NO_SORT 256 /* do not bother sorting on this keyseg */ /* optionbits for database */ #define HA_OPTION_PACK_RECORD 1 diff --git a/include/my_global.h b/include/my_global.h index 7cd79b3e078..c6c18a987e2 100644 --- a/include/my_global.h +++ b/include/my_global.h @@ -262,7 +262,7 @@ int __void__; #define LINT_INIT(var) #endif -/* Define som useful general macros */ +/* Define some useful general macros */ #if defined(__cplusplus) && defined(__GNUC__) #define max(a, b) ((a) >? (b)) #define min(a, b) ((a) (b)) ? 1 : 0) #define sgn(a) (((a) < 0) ? -1 : ((a) > 0) ? 1 : 0) #define swap(t,a,b) { register t dummy; dummy = a; a = b; b = dummy; } #define test(a) ((a) ? 1 : 0) diff --git a/myisam/ft_boolean_search.c b/myisam/ft_boolean_search.c index 79fecccaaf6..32d8e7941bb 100644 --- a/myisam/ft_boolean_search.c +++ b/myisam/ft_boolean_search.c @@ -16,6 +16,7 @@ /* Written by Sergei A. Golubchik, who has a shared copyright to this code */ +#define FT_CORE #include "ftdefs.h" #include @@ -73,8 +74,9 @@ typedef struct { byte word[1]; } FTB_WORD; -typedef struct st_ftb_handler { - MI_INFO *info; +typedef struct st_ft_info { + struct _ft_vft *please; + MI_INFO *info; uint keynr; int ok; FTB_EXPR *root; @@ -85,10 +87,10 @@ typedef struct st_ftb_handler { int FTB_WORD_cmp(void *v, byte *a, byte *b) { /* ORDER BY docid, ndepth DESC */ - int i=((FTB_WORD *)a)->docid-((FTB_WORD *)b)->docid; + int i=comp(((FTB_WORD *)a)->docid, ((FTB_WORD *)b)->docid); if (!i) - i=((FTB_WORD *)b)->ndepth-((FTB_WORD *)a)->ndepth; - return sgn(i); + i=comp(((FTB_WORD *)b)->ndepth,((FTB_WORD *)a)->ndepth); + return i; } void _ftb_parse_query(FTB *ftb, byte **start, byte *end, @@ -108,70 +110,71 @@ void _ftb_parse_query(FTB *ftb, byte **start, byte *end, if (! ftb->ok) return; - while (res=ftb_get_word(&start,end,&w,¶m)) + param.prev=' '; + while (res=ft_get_word(start,end,&w,¶m)) { byte r=param.plusminus; float weight=(param.pmsign ? nwghts : wghts)[(r>5)?5:((r<-5)?-5:r)]; switch (res) { case FTB_LBR: - ftbe=(FTB_EXPR *)alloc_root(&ftb->mem_root, sizeof(FTB_EXPR)); - ftbe->yesno=param.yesno; - ftbe->weight=weight; - ftbe->up=up; - ftbe->ythresh=0; - ftbe->docid=HA_POS_ERROR; - if (ftbw->yesno > 0) up->ythresh++; + ftbe=(FTB_EXPR *)alloc_root(&ftb->mem_root, sizeof(FTB_EXPR)); + ftbe->yesno=param.yesno; + ftbe->weight=weight; + ftbe->up=up; + ftbe->ythresh=0; + ftbe->docid=HA_POS_ERROR; + if (ftbw->yesno > 0) up->ythresh++; _ftb_parse_query(ftb, start, end, ftbe, depth+1, - (param.yesno<0 ? depth+1 : ndepth)); + (param.yesno<0 ? depth+1 : ndepth)); break; case FTB_RBR: return; case 1: - ftbw=(FTB_WORD *)alloc_root(&ftb->mem_root, + ftbw=(FTB_WORD *)alloc_root(&ftb->mem_root, sizeof(FTB_WORD) + (param.trunc ? MI_MAX_KEY_BUFF : w.len+extra)); ftbw->len=w.len + !param.trunc; - ftbw->yesno=param.yesno; - ftbw->trunc=param.trunc; /* 0 or 1 */ - ftbw->weight=weight; - ftbw->up=up; - ftbw->docid=HA_POS_ERROR; - ftbw->ndepth= param.yesno<0 ? depth : ndepth; + ftbw->yesno=param.yesno; + ftbw->trunc=param.trunc; /* 0 or 1 */ + ftbw->weight=weight; + ftbw->up=up; + ftbw->docid=HA_POS_ERROR; + ftbw->ndepth= param.yesno<0 ? depth : ndepth; memcpy(ftbw->word+1, w.pos, w.len); ftbw->word[0]=w.len; - if (ftbw->yesno > 0) up->ythresh++; - /*****************************************/ - r=_mi_search(info, keyinfo, ftbw->word, ftbw->len, - SEARCH_FIND | SEARCH_PREFIX, keyroot); - if (!r) - { - r=_mi_compare_text(default_charset_info, - info->lastkey+ftbw->trunc,ftbw->len, - ftbw->word+ftbw->trunc,ftbw->len,0); - } - if (r) /* not found */ - { - if (ftbw->yesno>0 && ftbw->up->up==0) - { /* this word MUST BE present in every document returned, - so we can abort the search right now */ - ftb->ok=0; - return; - } - } - else - { - memcpy(ftbw->word, info->lastkey, info->lastkey_length); - ftbw->docid=info->lastpos; - queue_insert(& ftb->queue, (byte *)ftbw); - } - /*****************************************/ + if (ftbw->yesno > 0) up->ythresh++; + /*****************************************/ + r=_mi_search(info, keyinfo, ftbw->word, ftbw->len, + SEARCH_FIND | SEARCH_PREFIX, keyroot); + if (!r) + { + r=_mi_compare_text(default_charset_info, + info->lastkey+ftbw->trunc,ftbw->len, + ftbw->word+ftbw->trunc,ftbw->len,0); + } + if (r) /* not found */ + { + if (ftbw->yesno>0 && ftbw->up->up==0) + { /* this word MUST BE present in every document returned, + so we can abort the search right now */ + ftb->ok=0; + return; + } + } + else + { + memcpy(ftbw->word, info->lastkey, info->lastkey_length); + ftbw->docid=info->lastpos; + queue_insert(& ftb->queue, (byte *)ftbw); + } + /*****************************************/ break; } } return; } -FTB * ft_boolean_search_init(MI_INFO *info, uint keynr, byte *query, - uint query_len) +FT_INFO * ft_init_boolean_search(MI_INFO *info, uint keynr, byte *query, + uint query_len, my_bool presort __attribute__((unused))) { FTB *ftb; FTB_EXPR *ftbe; @@ -179,11 +182,12 @@ FTB * ft_boolean_search_init(MI_INFO *info, uint keynr, byte *query, if (!(ftb=(FTB *)my_malloc(sizeof(FTB), MYF(MY_WME)))) return 0; + ftb->please=& _ft_vft_boolean; ftb->ok=1; ftb->info=info; ftb->keynr=keynr; - init_alloc_root(&ftb->mem_root, query_len,0); + init_alloc_root(&ftb->mem_root, 1024, 1024); /* hack: instead of init_queue, we'll use reinit queue to be able * to alloc queue with alloc_root() @@ -201,7 +205,7 @@ FTB * ft_boolean_search_init(MI_INFO *info, uint keynr, byte *query, return ftb; } -int ft_boolean_search_next(FTB *ftb, char *record) +int ft_boolean_read_next(FT_INFO *ftb, char *record) { FTB_EXPR *ftbe, *up; FTB_WORD *ftbw; @@ -218,61 +222,64 @@ int ft_boolean_search_next(FTB *ftb, char *record) return my_errno; /* black magic OFF */ - while(ftb->ok && ftb->queue.elements) - { - curdoc=((FTB_WORD *)queue_top(& ftb->queue))->docid; + if (!ftb->queue.elements) + return my_errno=HA_ERR_END_OF_FILE; + while(ftb->ok && + (curdoc=((FTB_WORD *)queue_top(& ftb->queue))->docid) != HA_POS_ERROR) + { while (curdoc==(ftbw=(FTB_WORD *)queue_top(& ftb->queue))->docid) { float weight=ftbw->weight; - uint yn=ftbw->yesno; + int yn=ftbw->yesno; for (ftbe=ftbw->up; ftbe; ftbe=ftbe->up) { - if (ftbe->docid != curdoc) - { - ftbe->cur_weight=ftbe->yesses=ftbe->nos=0; - ftbe->docid=curdoc; - } - if (yn>0) - { - ftbe->cur_weight+=weight; - if (++ftbe->yesses >= ftbe->ythresh && !ftbe->nos) - { + if (ftbe->docid != curdoc) + { + ftbe->cur_weight=ftbe->yesses=ftbe->nos=0; + ftbe->docid=curdoc; + } + if (yn>0) + { + ftbe->cur_weight+=weight; + if (++ftbe->yesses >= ftbe->ythresh && !ftbe->nos) + { yn=ftbe->yesno; - weight=ftbe->cur_weight*ftbe->weight; - } - else - break; - } - else - if (yn<0) - { - /* NOTE: special sort function of queue assures that all yn<0 - * events for every particular subexpression will happen - * BEFORE all yn>=0 events. So no already matched expression - * can become not-matched again. - */ - ++ftbe->nos; - break; - } - else + weight=ftbe->cur_weight*ftbe->weight; + } + else + break; + } + else + if (yn<0) + { + /* NOTE: special sort function of queue assures that all yn<0 + * events for every particular subexpression will + * "auto-magically" happen BEFORE all yn>=0 events. So no + * already matched expression can become not-matched again. + */ + ++ftbe->nos; + break; + } + else /* if (yn==0) */ - { - if (ftbe->yesses >= ftbe->ythresh && !ftbe->nos) - { + { + if (ftbe->yesses >= ftbe->ythresh && !ftbe->nos) + { yn=ftbe->yesno; - weight*=ftbe->weight; - } - else - { - ftbe->cur_weight+=weight; - break; - } - } + ftbe->cur_weight=weight; + weight*=ftbe->weight; + } + else + { + ftbe->cur_weight+=weight; + break; + } + } } /* update queue */ r=_mi_search(info, keyinfo, ftbw->word, ftbw->len, - SEARCH_FIND | SEARCH_PREFIX, keyroot); + SEARCH_BIGGER , keyroot); if (!r) { r=_mi_compare_text(default_charset_info, @@ -281,19 +288,19 @@ int ft_boolean_search_next(FTB *ftb, char *record) } if (r) /* not found */ { - queue_remove(& ftb->queue, 0); - if (ftbw->yesno>0 && ftbw->up->up==0) - { /* this word MUST BE present in every document returned, - so we can stop the search right now */ - ftb->ok=0; - } + ftbw->docid=HA_POS_ERROR; + if (ftbw->yesno>0 && ftbw->up->up==0) + { /* this word MUST BE present in every document returned, + so we can stop the search right now */ + ftb->ok=0; + } } else { memcpy(ftbw->word, info->lastkey, info->lastkey_length); ftbw->docid=info->lastpos; - queue_replaced(& ftb->queue); } + queue_replaced(& ftb->queue); } ftbe=ftb->root; @@ -305,8 +312,8 @@ int ft_boolean_search_next(FTB *ftb, char *record) /* info->lastpos=curdoc; */ /* do I need this ? */ if (!(*info->read_record)(info,curdoc,record)) { - info->update|= HA_STATE_AKTIV; /* Record is read */ - return 0; + info->update|= HA_STATE_AKTIV; /* Record is read */ + return 0; } return my_errno; } @@ -314,3 +321,30 @@ int ft_boolean_search_next(FTB *ftb, char *record) return my_errno=HA_ERR_END_OF_FILE; } +float ft_boolean_find_relevance(FT_INFO *ftb, my_off_t docid) +{ + fprintf(stderr, "ft_boolean_find_relevance called!\n"); + return -1.0; /* to be done via str scan */ +} + +void ft_boolean_close_search(FT_INFO *ftb) +{ + free_root(& ftb->mem_root, MYF(0)); + my_free((gptr)ftb,MYF(0)); +} + +float ft_boolean_get_relevance(FT_INFO *ftb) +{ + return ftb->root->cur_weight; +} + +my_off_t ft_boolean_get_docid(FT_INFO *ftb) +{ + return HA_POS_ERROR; +} + +void ft_boolean_reinit_search(FT_INFO *ftb) +{ + fprintf(stderr, "ft_boolean_reinit_search called!\n"); +} + diff --git a/myisam/ft_nlq_search.c b/myisam/ft_nlq_search.c index be11c5a9c4c..f0f878a7f16 100644 --- a/myisam/ft_nlq_search.c +++ b/myisam/ft_nlq_search.c @@ -16,10 +16,24 @@ /* Written by Sergei A. Golubchik, who has a shared copyright to this code */ +#define FT_CORE #include "ftdefs.h" /* search with natural language queries */ +typedef struct ft_doc_rec { + my_off_t dpos; + double weight; +} FT_DOC; + +struct st_ft_info { + struct _ft_vft *please; + MI_INFO *info; + int ndocs; + int curdoc; + FT_DOC doc[1]; +}; + typedef struct st_all_in_one { MI_INFO *info; uint keynr; @@ -152,27 +166,27 @@ static int FT_DOC_cmp(FT_DOC *a, FT_DOC *b) return sgn(b->weight - a->weight); } -FT_DOCLIST *ft_nlq_init_search(void *info, uint keynr, byte *query, - uint query_len, my_bool presort) +FT_INFO *ft_init_nlq_search(MI_INFO *info, uint keynr, byte *query, + uint query_len, my_bool presort) { TREE *wtree, allocated_wtree; ALL_IN_ONE aio; FT_DOC *dptr; - FT_DOCLIST *dlist=NULL; - my_off_t saved_lastpos=((MI_INFO *)info)->lastpos; + FT_INFO *dlist=NULL; + my_off_t saved_lastpos=info->lastpos; /* black magic ON */ - if ((int) (keynr = _mi_check_index((MI_INFO *)info,keynr)) < 0) + if ((int) (keynr = _mi_check_index(info,keynr)) < 0) return NULL; - if (_mi_readinfo((MI_INFO *)info,F_RDLCK,1)) + if (_mi_readinfo(info,F_RDLCK,1)) return NULL; /* black magic OFF */ - aio.info=(MI_INFO *)info; + aio.info=info; aio.keynr=keynr; - aio.keybuff=aio.info->lastkey+aio.info->s->base.max_key_length; - aio.keyinfo=aio.info->s->keyinfo+keynr; - aio.key_root=aio.info->s->state.key_root[keynr]; + aio.keybuff=info->lastkey+info->s->base.max_key_length; + aio.keyinfo=info->s->keyinfo+keynr; + aio.key_root=info->s->state.key_root[keynr]; bzero(&allocated_wtree,sizeof(allocated_wtree)); @@ -186,18 +200,19 @@ FT_DOCLIST *ft_nlq_init_search(void *info, uint keynr, byte *query, left_root_right)) goto err2; - dlist=(FT_DOCLIST *)my_malloc(sizeof(FT_DOCLIST)+ + dlist=(FT_INFO *)my_malloc(sizeof(FT_INFO)+ sizeof(FT_DOC)*(aio.dtree.elements_in_tree-1),MYF(0)); if(!dlist) goto err2; + dlist->please=& _ft_vft_nlq; dlist->ndocs=aio.dtree.elements_in_tree; dlist->curdoc=-1; dlist->info=aio.info; dptr=dlist->doc; - tree_walk(&aio.dtree, (tree_walk_action)&walk_and_copy, &dptr, - left_root_right); + tree_walk(&aio.dtree, (tree_walk_action)&walk_and_copy, + &dptr, left_root_right); if(presort) qsort(dlist->doc, dlist->ndocs, sizeof(FT_DOC), (qsort_cmp)&FT_DOC_cmp); @@ -207,11 +222,11 @@ err2: delete_tree(&aio.dtree); err: - ((MI_INFO *)info)->lastpos=saved_lastpos; + info->lastpos=saved_lastpos; return dlist; } -int ft_nlq_read_next(FT_DOCLIST *handler, char *record) +int ft_nlq_read_next(FT_INFO *handler, char *record) { MI_INFO *info= (MI_INFO *) handler->info; @@ -232,3 +247,43 @@ int ft_nlq_read_next(FT_DOCLIST *handler, char *record) return my_errno; } +float ft_nlq_find_relevance(FT_INFO *handler, my_off_t docid) +{ + int a,b,c; + FT_DOC *docs=handler->doc; + + // Assuming docs[] is sorted by dpos... + + for (a=0, b=handler->ndocs, c=(a+b)/2; b-a>1; c=(a+b)/2) + { + if (docs[c].dpos > docid) + b=c; + else + a=c; + } + if (docs[a].dpos == docid) + return docs[a].weight; + else + return 0.0; +} + +void ft_nlq_close_search(FT_INFO *handler) +{ + my_free((gptr)handler,MYF(0)); +} + +float ft_nlq_get_relevance(FT_INFO *handler) +{ + return handler->doc[handler->curdoc].weight; +} + +my_off_t ft_nlq_get_docid(FT_INFO *handler) +{ + return handler->doc[handler->curdoc].dpos; +} + +void ft_nlq_reinit_search(FT_INFO *handler) +{ + handler->curdoc=-1; +} + diff --git a/myisam/ft_parser.c b/myisam/ft_parser.c index 83b0956a752..466f1dfe021 100644 --- a/myisam/ft_parser.c +++ b/myisam/ft_parser.c @@ -135,7 +135,7 @@ byte ft_get_word(byte **start, byte *end, FT_WORD *word, FTB_PARAM *param) if (true_word_char(*doc)) break; if (*doc == FTB_LBR || *doc == FTB_RBR) { - param->prev=' '; + /* param->prev=' '; */ *start=doc+1; return *doc; } diff --git a/myisam/ft_static.c b/myisam/ft_static.c index 09afadec23f..494d7638d67 100644 --- a/myisam/ft_static.c +++ b/myisam/ft_static.c @@ -43,10 +43,29 @@ const MI_KEYSEG ft_keysegs[FT_SEGS]={ }, #endif /* EVAL_RUN */ { - HA_FT_WTYPE, 7, 0, 0, 0, 0, HA_FT_WLEN, 0, 0, NULL + HA_FT_WTYPE, 7, 0, 0, 0, HA_NO_SORT, HA_FT_WLEN, 0, 0, NULL } }; +const struct _ft_vft _ft_vft_nlq = { + ft_nlq_read_next, ft_nlq_find_relevance, ft_nlq_close_search, + ft_nlq_get_relevance, ft_nlq_get_docid, ft_nlq_reinit_search +}; +const struct _ft_vft _ft_vft_boolean = { + ft_boolean_read_next, ft_boolean_find_relevance, ft_boolean_close_search, + ft_boolean_get_relevance, ft_boolean_get_docid, ft_boolean_reinit_search +}; + +FT_INFO *(*_ft_init_vft[2])(MI_INFO *, uint, byte *, uint, my_bool) = +{ ft_init_nlq_search, ft_init_boolean_search }; + +FT_INFO *ft_init_search(uint mode, void *info, uint keynr, + byte *query, uint query_len, my_bool presort) +{ + return (*_ft_init_vft[mode])((MI_INFO *)info, keynr, + query, query_len, presort); +} + const char *ft_precompiled_stopwords[] = { #ifdef COMPILE_STOPWORDS_IN diff --git a/myisam/ftdefs.h b/myisam/ftdefs.h index 1a017d3c73a..9eedf57c759 100644 --- a/myisam/ftdefs.h +++ b/myisam/ftdefs.h @@ -95,9 +95,6 @@ extern ulong collstat; #define FTB_NEG '~' #define FTB_TRUNC '*' -// #define FTB_MAX_SUBEXPR 255 -// #define FTB_MAX_DEPTH 16 - typedef struct st_ft_word { byte * pos; uint len; @@ -116,7 +113,6 @@ typedef struct st_ftb_param { } FTB_PARAM; int is_stopword(char *word, uint len); -int is_boolean(byte *q, uint len); uint _ft_make_key(MI_INFO *, uint , byte *, FT_WORD *, my_off_t); @@ -127,6 +123,21 @@ TREE * ft_parse(TREE *, byte *, int); FT_WORD * ft_linearize(MI_INFO *, uint, byte *, TREE *); FT_WORD * _mi_ft_parserecord(MI_INFO *, uint , byte *, const byte *); -FT_DOCLIST * ft_nlq_search(MI_INFO *, uint, byte *, uint); -FT_DOCLIST * ft_boolean_search(MI_INFO *, uint, byte *, uint); +const struct _ft_vft _ft_vft_nlq; +FT_INFO *ft_init_nlq_search(MI_INFO *, uint, byte *, uint, my_bool); +int ft_nlq_read_next(FT_INFO *, char *); +float ft_nlq_find_relevance(FT_INFO *, my_off_t ); +void ft_nlq_close_search(FT_INFO *); +float ft_nlq_get_relevance(FT_INFO *); +my_off_t ft_nlq_get_docid(FT_INFO *); +void ft_nlq_reinit_search(FT_INFO *); + +const struct _ft_vft _ft_vft_boolean; +FT_INFO *ft_init_boolean_search(MI_INFO *, uint, byte *, uint, my_bool); +int ft_boolean_read_next(FT_INFO *, char *); +float ft_boolean_find_relevance(FT_INFO *, my_off_t ); +void ft_boolean_close_search(FT_INFO *); +float ft_boolean_get_relevance(FT_INFO *); +my_off_t ft_boolean_get_docid(FT_INFO *); +void ft_boolean_reinit_search(FT_INFO *); diff --git a/myisam/mi_search.c b/myisam/mi_search.c index b9895e9d6cd..67db7c7110a 100644 --- a/myisam/mi_search.c +++ b/myisam/mi_search.c @@ -317,7 +317,7 @@ int _mi_prefix_search(MI_INFO *info, register MI_KEYDEF *keyinfo, uchar *page, while (page < end) { uint packed= *page & 128; - + vseg=page; if (keyinfo->seg->length >= 127) { @@ -333,7 +333,7 @@ int _mi_prefix_search(MI_INFO *info, register MI_KEYDEF *keyinfo, uchar *page, prefix_len=len; else { - prefix_len=suffix_len; + prefix_len=suffix_len; get_key_length(suffix_len,vseg); } } @@ -359,7 +359,7 @@ int _mi_prefix_search(MI_INFO *info, register MI_KEYDEF *keyinfo, uchar *page, for (keyseg=keyinfo->seg+1 ; keyseg->type ; keyseg++ ) { - + if (keyseg->flag & HA_NULL_PART) { if (!(*from++)) @@ -726,13 +726,14 @@ int _mi_key_cmp(register MI_KEYSEG *keyseg, register uchar *a, for ( ; (int) key_length >0 ; key_length=next_key_length, keyseg++) { uchar *end; + uint piks=! (keyseg->flag & HA_NO_SORT); (*diff_pos)++; /* Handle NULL part */ if (keyseg->null_bit) { key_length--; - if (*a != *b) + if (*a != *b && piks) { flag = (int) *a - (int) *b; return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag); @@ -758,9 +759,9 @@ int _mi_key_cmp(register MI_KEYSEG *keyseg, register uchar *a, get_key_pack_length(b_length,pack_length,b); next_key_length=key_length-b_length-pack_length; - if ((flag=_mi_compare_text(keyseg->charset,a,a_length,b,b_length, - (my_bool) ((nextflag & SEARCH_PREFIX) && - next_key_length <= 0)))) + if (piks && + (flag=_mi_compare_text(keyseg->charset,a,a_length,b,b_length, + (my_bool) ((nextflag & SEARCH_PREFIX) && next_key_length <= 0)))) return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag); a+=a_length; b+=b_length; @@ -776,9 +777,9 @@ int _mi_key_cmp(register MI_KEYSEG *keyseg, register uchar *a, while (b_length && b[b_length-1] == ' ') b_length--; } - if ((flag=_mi_compare_text(keyseg->charset,a,a_length,b,b_length, - (my_bool) ((nextflag & SEARCH_PREFIX) && - next_key_length <= 0)))) + if (piks && + (flag=_mi_compare_text(keyseg->charset,a,a_length,b,b_length, + (my_bool) ((nextflag & SEARCH_PREFIX) && next_key_length <= 0)))) return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag); a=end; b+=length; @@ -792,9 +793,9 @@ int _mi_key_cmp(register MI_KEYSEG *keyseg, register uchar *a, get_key_pack_length(b_length,pack_length,b); next_key_length=key_length-b_length-pack_length; - if ((flag=compare_bin(a,a_length,b,b_length, - (my_bool) ((nextflag & SEARCH_PREFIX) && - next_key_length <= 0)))) + if (piks && + (flag=compare_bin(a,a_length,b,b_length, + (my_bool) ((nextflag & SEARCH_PREFIX) && next_key_length <= 0)))) return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag); a+=a_length; b+=b_length; @@ -803,9 +804,9 @@ int _mi_key_cmp(register MI_KEYSEG *keyseg, register uchar *a, else { uint length=keyseg->length; - if ((flag=compare_bin(a,length,b,length, - (my_bool) ((nextflag & SEARCH_PREFIX) && - next_key_length <= 0)))) + if (piks && + (flag=compare_bin(a,length,b,length, + (my_bool) ((nextflag & SEARCH_PREFIX) && next_key_length <= 0)))) return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag); a+=length; b+=length; @@ -818,9 +819,9 @@ int _mi_key_cmp(register MI_KEYSEG *keyseg, register uchar *a, get_key_pack_length(b_length,pack_length,b); next_key_length=key_length-b_length-pack_length; - if ((flag=_mi_compare_text(keyseg->charset,a,a_length,b,b_length, - (my_bool) ((nextflag & SEARCH_PREFIX) && - next_key_length <= 0)))) + if (piks && + (flag=_mi_compare_text(keyseg->charset,a,a_length,b,b_length, + (my_bool) ((nextflag & SEARCH_PREFIX) && next_key_length <= 0)))) return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag); a+=a_length; b+=b_length; @@ -834,9 +835,9 @@ int _mi_key_cmp(register MI_KEYSEG *keyseg, register uchar *a, get_key_pack_length(b_length,pack_length,b); next_key_length=key_length-b_length-pack_length; - if ((flag=compare_bin(a,a_length,b,b_length, - (my_bool) ((nextflag & SEARCH_PREFIX) && - next_key_length <= 0)))) + if (piks && + (flag=compare_bin(a,a_length,b,b_length, + (my_bool) ((nextflag & SEARCH_PREFIX) && next_key_length <= 0)))) return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag); a+=a_length; b+=b_length; @@ -847,7 +848,7 @@ int _mi_key_cmp(register MI_KEYSEG *keyseg, register uchar *a, { int i_1= (int) *((signed char*) a); int i_2= (int) *((signed char*) b); - if ((flag = CMP(i_1,i_2))) + if (piks && (flag = CMP(i_1,i_2))) return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag); a= end; b++; @@ -856,7 +857,7 @@ int _mi_key_cmp(register MI_KEYSEG *keyseg, register uchar *a, case HA_KEYTYPE_SHORT_INT: s_1= mi_sint2korr(a); s_2= mi_sint2korr(b); - if ((flag = CMP(s_1,s_2))) + if (piks && (flag = CMP(s_1,s_2))) return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag); a= end; b+= 2; /* sizeof(short int); */ @@ -866,7 +867,7 @@ int _mi_key_cmp(register MI_KEYSEG *keyseg, register uchar *a, uint16 us_1,us_2; us_1= mi_sint2korr(a); us_2= mi_sint2korr(b); - if ((flag = CMP(us_1,us_2))) + if (piks && (flag = CMP(us_1,us_2))) return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag); a= end; b+=2; /* sizeof(short int); */ @@ -875,7 +876,7 @@ int _mi_key_cmp(register MI_KEYSEG *keyseg, register uchar *a, case HA_KEYTYPE_LONG_INT: l_1= mi_sint4korr(a); l_2= mi_sint4korr(b); - if ((flag = CMP(l_1,l_2))) + if (piks && (flag = CMP(l_1,l_2))) return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag); a= end; b+= 4; /* sizeof(long int); */ @@ -883,7 +884,7 @@ int _mi_key_cmp(register MI_KEYSEG *keyseg, register uchar *a, case HA_KEYTYPE_ULONG_INT: u_1= mi_sint4korr(a); u_2= mi_sint4korr(b); - if ((flag = CMP(u_1,u_2))) + if (piks && (flag = CMP(u_1,u_2))) return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag); a= end; b+= 4; /* sizeof(long int); */ @@ -891,7 +892,7 @@ int _mi_key_cmp(register MI_KEYSEG *keyseg, register uchar *a, case HA_KEYTYPE_INT24: l_1=mi_sint3korr(a); l_2=mi_sint3korr(b); - if ((flag = CMP(l_1,l_2))) + if (piks && (flag = CMP(l_1,l_2))) return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag); a= end; b+= 3; @@ -899,7 +900,7 @@ int _mi_key_cmp(register MI_KEYSEG *keyseg, register uchar *a, case HA_KEYTYPE_UINT24: l_1=mi_uint3korr(a); l_2=mi_uint3korr(b); - if ((flag = CMP(l_1,l_2))) + if (piks && (flag = CMP(l_1,l_2))) return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag); a= end; b+= 3; @@ -907,7 +908,7 @@ int _mi_key_cmp(register MI_KEYSEG *keyseg, register uchar *a, case HA_KEYTYPE_FLOAT: mi_float4get(f_1,a); mi_float4get(f_2,b); - if ((flag = CMP(f_1,f_2))) + if (piks && (flag = CMP(f_1,f_2))) return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag); a= end; b+= 4; /* sizeof(float); */ @@ -915,7 +916,7 @@ int _mi_key_cmp(register MI_KEYSEG *keyseg, register uchar *a, case HA_KEYTYPE_DOUBLE: mi_float8get(d_1,a); mi_float8get(d_2,b); - if ((flag = CMP(d_1,d_2))) + if (piks && (flag = CMP(d_1,d_2))) return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag); a= end; b+= 8; /* sizeof(double); */ @@ -945,33 +946,40 @@ int _mi_key_cmp(register MI_KEYSEG *keyseg, register uchar *a, for ( ; alength && *a == ' ' ; a++, alength--) ; for ( ; blength && *b == ' ' ; b++, blength--) ; } - - if (*a == '-') + if (piks) { - if (*b != '-') - return -1; - a++; b++; - swap(uchar*,a,b); - swap(int,alength,blength); - swap_flag=1-swap_flag; - alength--; blength--; - end=a+alength; + if (*a == '-') + { + if (*b != '-') + return -1; + a++; b++; + swap(uchar*,a,b); + swap(int,alength,blength); + swap_flag=1-swap_flag; + alength--; blength--; + end=a+alength; + } + else if (*b == '-') + return 1; + while (alength && (*a == '+' || *a == '0')) + { + a++; alength--; + } + while (blength && (*b == '+' || *b == '0')) + { + b++; blength--; + } + if (alength != blength) + return (alength < blength) ? -1 : 1; + while (a < end) + if (*a++ != *b++) + return ((int) a[-1] - (int) b[-1]); } - else if (*b == '-') - return 1; - while (alength && (*a == '+' || *a == '0')) + else { - a++; alength--; + b+=(end-a); + a=end; } - while (blength && (*b == '+' || *b == '0')) - { - b++; blength--; - } - if (alength != blength) - return (alength < blength) ? -1 : 1; - while (a < end) - if (*a++ != *b++) - return ((int) a[-1] - (int) b[-1]); if (swap_flag) /* Restore pointers */ swap(uchar*,a,b); @@ -983,7 +991,7 @@ int _mi_key_cmp(register MI_KEYSEG *keyseg, register uchar *a, longlong ll_a,ll_b; ll_a= mi_sint8korr(a); ll_b= mi_sint8korr(b); - if ((flag = CMP(ll_a,ll_b))) + if (piks && (flag = CMP(ll_a,ll_b))) return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag); a= end; b+= 8; @@ -994,7 +1002,7 @@ int _mi_key_cmp(register MI_KEYSEG *keyseg, register uchar *a, ulonglong ll_a,ll_b; ll_a= mi_uint8korr(a); ll_b= mi_uint8korr(b); - if ((flag = CMP(ll_a,ll_b))) + if (piks && (flag = CMP(ll_a,ll_b))) return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag); a= end; b+= 8; diff --git a/myisam/mi_write.c b/myisam/mi_write.c index 096d5829fed..322b0755015 100644 --- a/myisam/mi_write.c +++ b/myisam/mi_write.c @@ -774,7 +774,7 @@ static int keys_free(uchar *key, TREE_FREE mode, bulk_insert_param *param) keyinfo=param->info->s->keyinfo+param->keynr; keylen=_mi_keylength(keyinfo, key); memcpy(lastkey, key, keylen); - return _mi_ck_write_btree(param->info,param->keynr,lastkey, + return _mi_ck_write_btree(param->info,param->keynr,lastkey, keylen - param->info->s->rec_reflength); case free_end: if (param->info->s->concurrent_insert) @@ -794,7 +794,7 @@ int _mi_init_bulk_insert(MI_INFO *info) if (info->bulk_insert) return 0; - + for (i=num_keys=0 ; i < share->base.keys ; i++) { if (!(key[i].flag & HA_NOSAME) && share->base.auto_key != i+1 @@ -807,7 +807,7 @@ int _mi_init_bulk_insert(MI_INFO *info) if (!num_keys) return 0; - + info->bulk_insert=(TREE *) my_malloc((sizeof(TREE)*share->base.keys+ sizeof(bulk_insert_param)*num_keys),MYF(0)); @@ -822,13 +822,13 @@ int _mi_init_bulk_insert(MI_INFO *info) { params->info=info; params->keynr=i; - init_tree(& info->bulk_insert[i], 0, + init_tree(& info->bulk_insert[i], 0, myisam_bulk_insert_tree_size / num_keys, 0, (qsort_cmp2)keys_compare, 0, (tree_element_free) keys_free, (void *)params++); } else - info->bulk_insert[i].root=0; + info->bulk_insert[i].root=0; } return 0; diff --git a/mysql-test/r/fulltext_cache.result b/mysql-test/r/fulltext_cache.result index a580437b0ce..0e5dd2060d5 100644 --- a/mysql-test/r/fulltext_cache.result +++ b/mysql-test/r/fulltext_cache.result @@ -1,5 +1,5 @@ q item id x -aaaaaaaaa dsaass de sushi 1 1.92378665219675 +aaaaaaaaa dsaass de sushi 1 1.92378664016724 aaaaaaaaa dsaass de Bolo de Chocolate 2 0 aaaaaaaaa dsaass de Feijoada 3 0 aaaaaaaaa dsaass de Mousse de Chocolate 4 0 @@ -8,7 +8,7 @@ ssde df s fsda sad er um chocolate Snickers 6 0 aaaaaaaaa dsaass de Bife 7 0 aaaaaaaaa dsaass de Pizza de Salmao 8 0 q item id x -aaaaaaaaa dsaass de sushi 1 1.92378665219675 +aaaaaaaaa dsaass de sushi 1 1.92378664016724 aaaaaaaaa dsaass de Bolo de Chocolate 2 0 aaaaaaaaa dsaass de Feijoada 3 0 aaaaaaaaa dsaass de Mousse de Chocolate 4 0 diff --git a/mysql-test/r/fulltext_left_join.result b/mysql-test/r/fulltext_left_join.result index 04244e309e5..051d3bf4f84 100644 --- a/mysql-test/r/fulltext_left_join.result +++ b/mysql-test/r/fulltext_left_join.result @@ -1,5 +1,5 @@ match(t1.texte,t1.sujet,t1.motsclefs) against('droit') 0 0 -0.67003110026735 +0.67003107070923 0 diff --git a/mysql-test/r/fulltext_multi.result b/mysql-test/r/fulltext_multi.result index d78d88172b5..f1f890da9de 100644 --- a/mysql-test/r/fulltext_multi.result +++ b/mysql-test/r/fulltext_multi.result @@ -1,12 +1,12 @@ a MATCH b AGAINST ('lala lkjh') -1 0.67003110026735 +1 0.67003107070923 2 0 3 0 a MATCH c AGAINST ('lala lkjh') 1 0 -2 0.67756324121582 +2 0.67756325006485 3 0 a MATCH b,c AGAINST ('lala lkjh') -1 0.64840710366884 -2 0.66266459031789 +1 0.64840710163116 +2 0.66266459226608 3 0 diff --git a/mysql-test/r/fulltext_order_by.result b/mysql-test/r/fulltext_order_by.result index 3ac5285151b..c0db830c08a 100644 --- a/mysql-test/r/fulltext_order_by.result +++ b/mysql-test/r/fulltext_order_by.result @@ -1,19 +1,19 @@ a MATCH (message) AGAINST ('steve') -4 0.90587321329654 -7 0.89568988462614 +4 0.90587323904037 +7 0.89568990468979 a MATCH (message) AGAINST ('steve') -4 0.90587321329654 -7 0.89568988462614 +4 0.90587323904037 +7 0.89568990468979 a MATCH (message) AGAINST ('steve') -7 0.89568988462614 -4 0.90587321329654 +7 0.89568990468979 +4 0.90587323904037 a MATCH (message) AGAINST ('steve') -7 0.89568988462614 +7 0.89568990468979 a rel 1 0 2 0 3 0 5 0 6 0 -7 0.89568988462614 -4 0.90587321329654 +7 0.89568990468979 +4 0.90587323904037 diff --git a/mysql-test/t/fulltext.test b/mysql-test/t/fulltext.test index e9b9bd23398..d0ff789e38f 100644 --- a/mysql-test/t/fulltext.test +++ b/mysql-test/t/fulltext.test @@ -5,10 +5,21 @@ drop table if exists t1,t2,t3; CREATE TABLE t1 (a VARCHAR(200), b TEXT, FULLTEXT (a,b)); -INSERT INTO t1 VALUES('MySQL has now support', 'for full-text search'),('Full-text indexes', 'are called collections'),('Only MyISAM tables','support collections'),('Function MATCH ... AGAINST()','is used to do a search'),('Full-text search in MySQL', 'implements vector space model'); +INSERT INTO t1 VALUES('MySQL has now support', 'for full-text search'), + ('Full-text indexes', 'are called collections'), + ('Only MyISAM tables','support collections'), + ('Function MATCH ... AGAINST()','is used to do a search'), + ('Full-text search in MySQL', 'implements vector space model'); select * from t1 where MATCH(a,b) AGAINST ("collections"); select * from t1 where MATCH(a,b) AGAINST ("indexes"); select * from t1 where MATCH(a,b) AGAINST ("indexes collections"); +select * from t1 where MATCH(a,b) AGAINST("support -collections" IN BOOLEAN MODE); +select * from t1 where MATCH(a,b) AGAINST("support collections" IN BOOLEAN MODE); +select * from t1 where MATCH(a,b) AGAINST("support +collections" IN BOOLEAN MODE); +select * from t1 where MATCH(a,b) AGAINST("sear*" IN BOOLEAN MODE); +select * from t1 where MATCH(a,b) AGAINST("+support +collections" IN BOOLEAN MODE); +select * from t1 where MATCH(a,b) AGAINST("+search" IN BOOLEAN MODE); +select * from t1 where MATCH(a,b) AGAINST("+search +(support vector)" IN BOOLEAN MODE); delete from t1 where a like "MySQL%"; drop table t1; diff --git a/sql/ha_myisam.cc b/sql/ha_myisam.cc index 61452c3de3b..94481057743 100644 --- a/sql/ha_myisam.cc +++ b/sql/ha_myisam.cc @@ -1197,7 +1197,7 @@ int ha_myisam::ft_read(byte * buf) thread_safe_increment(ha_read_next_count,&LOCK_status); // why ? - error=ft_nlq_read_next((FT_DOCLIST *) ft_handler,(char*) buf); + error=ft_handler->please->read_next(ft_handler,(char*) buf); table->status=error ? STATUS_NOT_FOUND: 0; return error; diff --git a/sql/ha_myisam.h b/sql/ha_myisam.h index a95f0ed71cb..eba2bde7d59 100644 --- a/sql/ha_myisam.h +++ b/sql/ha_myisam.h @@ -76,9 +76,9 @@ class ha_myisam: public handler int index_next_same(byte *buf, const byte *key, uint keylen); int index_end() { ft_handler=NULL; return 0; } int ft_init() - { if(!ft_handler) return 1; ft_nlq_reinit_search(ft_handler); return 0; } - void *ft_init_ext(uint inx,const byte *key, uint keylen, bool presort) - { return ft_nlq_init_search(file,inx,(byte*) key,keylen,presort); } + { if(!ft_handler) return 1; ft_handler->please->reinit_search(ft_handler); return 0; } + FT_INFO *ft_init_ext(uint mode, uint inx,const byte *key, uint keylen, bool presort) + { return ft_init_search(mode, file,inx,(byte*) key,keylen,presort); } int ft_read(byte *buf); int rnd_init(bool scan=1); int rnd_next(byte *buf); diff --git a/sql/handler.h b/sql/handler.h index 16f7ba90eff..530c94a8b43 100644 --- a/sql/handler.h +++ b/sql/handler.h @@ -21,6 +21,8 @@ #pragma interface /* gcc class implementation */ #endif +#include + #ifndef NO_HASH #define NO_HASH /* Not yet implemented */ #endif @@ -201,7 +203,7 @@ public: time_t check_time; time_t update_time; ulong mean_rec_length; /* physical reclength */ - void *ft_handler; + FT_INFO *ft_handler; bool auto_increment_column_changed; handler(TABLE *table_arg) : table(table_arg),active_index(MAX_REF_PARTS), @@ -247,9 +249,9 @@ public: virtual int index_next_same(byte *buf, const byte *key, uint keylen); virtual int ft_init() { return -1; } - virtual void *ft_init_ext(uint inx,const byte *key, uint keylen, + virtual FT_INFO *ft_init_ext(uint mode,uint inx,const byte *key, uint keylen, bool presort) - { return (void *)NULL; } + { return NULL; } virtual int ft_read(byte *buf) { return -1; } virtual int rnd_init(bool scan=1)=0; virtual int rnd_end() { return 0; } diff --git a/sql/item_func.cc b/sql/item_func.cc index 076194e8b7b..960cadf71e9 100644 --- a/sql/item_func.cc +++ b/sql/item_func.cc @@ -1903,46 +1903,6 @@ err: return 0; } -double Item_func_match_nl::val() -{ - if (ft_handler==NULL) - init_search(1); - - if ((null_value= (ft_handler==NULL))) - return 0.0; - - if (join_key) - { - if (table->file->ft_handler) - return ft_nlq_get_relevance(ft_handler); - - join_key=0; // Magic here ! See ha_myisam::ft_read() - } - - /* we'll have to find ft_relevance manually in ft_handler array */ - - int a,b,c; - FT_DOC *docs=((FT_DOCLIST *)ft_handler)->doc; - my_off_t docid=table->file->row_position(); - - if ((null_value=(docid==HA_OFFSET_ERROR))) - return 0.0; - - // Assuming docs[] is sorted by dpos... - - for (a=0, b=((FT_DOCLIST *)ft_handler)->ndocs, c=(a+b)/2; b-a>1; c=(a+b)/2) - { - if (docs[c].dpos > docid) - b=c; - else - a=c; - } - if (docs[a].dpos == docid) - return docs[a].weight; - else - return 0.0; -} - void Item_func_match::init_search(bool no_order) { if (ft_handler) @@ -2113,6 +2073,75 @@ bool Item_func_match::eq(const Item *item) const return 0; } +#if 0 +double Item_func_match::val() +{ + if (ft_handler==NULL) + init_search(1); + + if ((null_value= (ft_handler==NULL))) + return 0.0; + + if (join_key) + { + if (table->file->ft_handler) + return ft_handler->please->get_relevance(ft_handler); + + join_key=0; + } + + my_off_t docid=table->file->row_position(); + + if ((null_value=(docid==HA_OFFSET_ERROR))) + return 0.0; + else + return ft_handler->please->find_relevance(ft_handler, docid); +} +#endif + +double Item_func_match_nl::val() +{ + if (ft_handler==NULL) + init_search(1); + + if ((null_value= (ft_handler==NULL))) + return 0.0; + + if (join_key) + { + if (table->file->ft_handler) + return ft_handler->please->get_relevance(ft_handler); + + join_key=0; + } + + my_off_t docid=table->file->row_position(); + + if ((null_value=(docid==HA_OFFSET_ERROR))) + return 0.0; + else + return ft_handler->please->find_relevance(ft_handler, docid); +} + +double Item_func_match_bool::val() +{ + if (ft_handler==NULL) + init_search(1); + + if ((null_value= (ft_handler==NULL))) + return 0.0; + + if (join_key) + { + if (table->file->ft_handler) + return ft_handler->please->get_relevance(ft_handler); + + join_key=0; + } + + null_value=1; + return -1.0; +} /*************************************************************************** System variables diff --git a/sql/item_func.h b/sql/item_func.h index 19f15eaf9b3..1926fd025f4 100644 --- a/sql/item_func.h +++ b/sql/item_func.h @@ -863,7 +863,7 @@ public: uint key; bool join_key; Item_func_match *master; - void * ft_handler; + FT_INFO * ft_handler; Item_func_match(List &a, Item *b): Item_real_func(b), fields(a), table(0), join_key(0), master(0), ft_handler(0) {} @@ -871,14 +871,13 @@ public: { if (!master && ft_handler) { - ft_handler_close(); + ft_handler->please->close_search(ft_handler); + ft_handler=0; if(join_key) table->file->ft_handler=0; } } - virtual int ft_handler_init(const byte *key, uint keylen, bool presort) - { return 1; } - virtual int ft_handler_close() { return 1; } + virtual int ft_handler_init(const byte *key, uint keylen, bool presort) =0; enum Functype functype() const { return FT_FUNC; } void update_used_tables() {} bool fix_fields(THD *thd,struct st_table_list *tlist); @@ -896,18 +895,16 @@ public: const char *func_name() const { return "match_nl"; } double val(); int ft_handler_init(const byte *query, uint querylen, bool presort) - { ft_handler=table->file->ft_init_ext(key, query, querylen, presort); } - int ft_handler_close() { ft_nlq_close_search(ft_handler); ft_handler=0; } + { ft_handler=table->file->ft_init_ext(FT_NL,key, query, querylen, presort); } }; -#if 0 + class Item_func_match_bool :public Item_func_match { public: - Item_func_match_nl(List &a, Item *b): Item_func_match(a,b) {} + Item_func_match_bool(List &a, Item *b): Item_func_match(a,b) {} const char *func_name() const { return "match_bool"; } double val(); int ft_handler_init(const byte *query, uint querylen, bool presort) - { ft_handler=table->file->ft_init_ext(key, query, querylen, presort); } - int ft_handler_close() { ft_close_search(ft_handler); ft_handler=0; } + { ft_handler=table->file->ft_init_ext(FT_BOOL,key, query, querylen, presort); } }; -#endif + diff --git a/sql/lex.h b/sql/lex.h index 1d481aa7c85..21639fe4b70 100644 --- a/sql/lex.h +++ b/sql/lex.h @@ -73,6 +73,7 @@ static SYMBOL symbols[] = { { "BINLOG", SYM(BINLOG_SYM),0,0}, { "BLOB", SYM(BLOB_SYM),0,0}, { "BOOL", SYM(BOOL_SYM),0,0}, + { "BOOLEAN", SYM(BOOLEAN_SYM),0,0}, { "BOTH", SYM(BOTH),0,0}, { "BY", SYM(BY),0,0}, { "CASCADE", SYM(CASCADE),0,0}, diff --git a/sql/sql_yacc.yy b/sql/sql_yacc.yy index 08c2d800acf..3983f3b726c 100644 --- a/sql/sql_yacc.yy +++ b/sql/sql_yacc.yy @@ -23,7 +23,7 @@ #define Lex current_lex #define Select Lex->select #include "mysql_priv.h" -#include "slave.h" +#include "slave.h" #include "sql_acl.h" #include "lex_symbol.h" #include @@ -149,6 +149,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b,int *yystacksize); %token BINARY %token BIT_SYM %token BOOL_SYM +%token BOOLEAN_SYM %token BOTH %token BY %token CASCADE @@ -499,7 +500,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b,int *yystacksize); using_list %type - expr_list udf_expr_list when_list ident_list + expr_list udf_expr_list when_list ident_list ident_list_arg %type key_type opt_unique_or_fulltext @@ -1547,12 +1548,12 @@ simple_expr: | '!' expr %prec NEG { $$= new Item_func_not($2); } | '(' expr ')' { $$= $2; } | '{' ident expr '}' { $$= $3; } - | MATCH '(' ident_list ')' AGAINST '(' expr ')' - { Select->ftfunc_list.push_back( - (Item_func_match *)($$=new Item_func_match_nl(*$3,$7))); } - | MATCH ident_list AGAINST '(' expr ')' - { Select->ftfunc_list.push_back( - (Item_func_match *)($$=new Item_func_match_nl(*$2,$5))); } + | MATCH ident_list_arg AGAINST '(' expr ')' + { Select->ftfunc_list.push_back((Item_func_match *) + $$=new Item_func_match_nl(*$2,$5)); } + | MATCH ident_list_arg AGAINST '(' expr IN_SYM BOOLEAN_SYM MODE_SYM ')' + { Select->ftfunc_list.push_back((Item_func_match *) + $$=new Item_func_match_bool(*$2,$5)); } | BINARY expr %prec NEG { $$= new Item_func_binary($2); } | CASE_SYM opt_expr WHEN_SYM when_list opt_else END { $$= new Item_func_case(* $4, $2, $5 ) } @@ -1798,6 +1799,10 @@ expr_list2: expr { Select->expr_list.head()->push_back($1); } | expr_list2 ',' expr { Select->expr_list.head()->push_back($3); } +ident_list_arg: + ident_list { $$= $1; } + | '(' ident_list ')' { $$= $2; } + ident_list: { Select->expr_list.push_front(new List); } ident_list2 @@ -2816,6 +2821,7 @@ keyword: | BERKELEY_DB_SYM {} | BIT_SYM {} | BOOL_SYM {} + | BOOLEAN_SYM {} | CHANGED {} | CHECKSUM_SYM {} | CHECK_SYM {}