Boolean search passes _some_ tests

sql/ha_myisam.cc:
  One more abstraction layer added (C++ emulated in C :).
include/ft_global.h:
  boolean search code plugged in
mysql-test/r/fulltext_cache.result:
  boolean search code plugged in
mysql-test/r/fulltext_left_join.result:
  boolean search code plugged in
mysql-test/r/fulltext_multi.result:
  boolean search code plugged in
mysql-test/r/fulltext_order_by.result:
  boolean search code plugged in
sql/lex.h:
  IN BOOLEAN MODE syntax
myisam/ft_nlq_search.c:
  boolean search code plugged in
myisam/ftdefs.h:
  boolean search code plugged in
sql/ha_myisam.h:
  boolean search code plugged in
sql/handler.h:
  boolean search code plugged in
include/my_base.h:
  do_not_sort_keyseg feature for MyISAM
include/my_global.h:
  #define comp(a,b) (((a) < (b)) ? -1 : ((a) > (b)) ? 1 : 0)
myisam/ft_boolean_search.c:
  bugfixing
myisam/ft_parser.c:
  cleanup
myisam/ft_static.c:
  do_not_sort_keyseg feature for MyISAM
myisam/mi_search.c:
  do_not_sort_keyseg feature for MyISAM
myisam/mi_write.c:
  cleanup
mysql-test/t/fulltext.test:
  boolean search tests added
BitKeeper/etc/ignore:
  Added myisam/FT1.MYD myisam/FT1.MYI to the ignore list
sql/item_func.cc:
  boolean search
sql/item_func.h:
  boolean search
sql/sql_yacc.yy:
  boolean search
This commit is contained in:
unknown 2001-10-09 14:53:54 +02:00
parent 734e2a8bca
commit 736e5b0de2
23 changed files with 466 additions and 288 deletions

View file

@ -412,3 +412,5 @@ libmysqld/examples/sql_string.cc
libmysqld/examples/sql_string.h libmysqld/examples/sql_string.h
libmysqld/examples/mysql libmysqld/examples/mysql
libmysqld/examples/mysqltest libmysqld/examples/mysqltest
myisam/FT1.MYD
myisam/FT1.MYI

View file

@ -29,17 +29,21 @@ extern "C" {
#define FT_QUERY_MAXLEN 1024 #define FT_QUERY_MAXLEN 1024
#define HA_FT_MAXLEN 254 #define HA_FT_MAXLEN 254
typedef struct ft_doc_rec { typedef struct st_ft_info FT_INFO;
my_off_t dpos; struct _ft_vft {
double weight; int (*read_next)(FT_INFO *, char *);
} FT_DOC; float (*find_relevance)(FT_INFO *, my_off_t);
void (*close_search)(FT_INFO *);
float (*get_relevance)(FT_INFO *);
my_off_t (*get_docid)(FT_INFO *);
void (*reinit_search)(FT_INFO *);
};
typedef struct st_ft_doclist { #ifndef FT_CORE
int ndocs; struct st_ft_info {
int curdoc; struct _ft_vft *please; /* INTERCAL style :-) */
void *info; /* actually (MI_INFO *) but don't want to include myisam.h */ };
FT_DOC doc[1]; #endif
} FT_DOCLIST;
extern const char *ft_precompiled_stopwords[]; extern const char *ft_precompiled_stopwords[];
@ -50,12 +54,9 @@ extern uint ft_max_word_len_for_sort;
int ft_init_stopwords(const char **); int ft_init_stopwords(const char **);
void ft_free_stopwords(void); void ft_free_stopwords(void);
FT_DOCLIST * ft_nlq_init_search(void *, uint, byte *, uint, my_bool); #define FT_NL 0
int ft_nlq_read_next(FT_DOCLIST *, char *); #define FT_BOOL 1
#define ft_nlq_close_search(handler) my_free(((gptr)(handler)),MYF(0)) FT_INFO *ft_init_search(uint,void *, uint, byte *, uint, my_bool);
#define ft_nlq_get_relevance(handler) (((FT_DOCLIST *)(handler))->doc[((FT_DOCLIST *)(handler))->curdoc].weight)
#define ft_nlq_get_docid(handler) (((FT_DOCLIST *)(handler))->doc[((FT_DOCLIST *)(handler))->curdoc].dpos)
#define ft_nlq_reinit_search(handler) (((FT_DOCLIST *)(handler))->curdoc=-1)
#ifdef __cplusplus #ifdef __cplusplus
} }

View file

@ -159,6 +159,7 @@ enum ha_base_keytype {
#define HA_BLOB_PART 32 #define HA_BLOB_PART 32
#define HA_SWAP_KEY 64 #define HA_SWAP_KEY 64
#define HA_REVERSE_SORT 128 /* Sort key in reverse order */ #define HA_REVERSE_SORT 128 /* Sort key in reverse order */
#define HA_NO_SORT 256 /* do not bother sorting on this keyseg */
/* optionbits for database */ /* optionbits for database */
#define HA_OPTION_PACK_RECORD 1 #define HA_OPTION_PACK_RECORD 1

View file

@ -262,7 +262,7 @@ int __void__;
#define LINT_INIT(var) #define LINT_INIT(var)
#endif #endif
/* Define som useful general macros */ /* Define some useful general macros */
#if defined(__cplusplus) && defined(__GNUC__) #if defined(__cplusplus) && defined(__GNUC__)
#define max(a, b) ((a) >? (b)) #define max(a, b) ((a) >? (b))
#define min(a, b) ((a) <? (b)) #define min(a, b) ((a) <? (b))
@ -276,6 +276,7 @@ typedef unsigned int uint;
typedef unsigned short ushort; typedef unsigned short ushort;
#endif #endif
#define comp(a,b) (((a) < (b)) ? -1 : ((a) > (b)) ? 1 : 0)
#define sgn(a) (((a) < 0) ? -1 : ((a) > 0) ? 1 : 0) #define sgn(a) (((a) < 0) ? -1 : ((a) > 0) ? 1 : 0)
#define swap(t,a,b) { register t dummy; dummy = a; a = b; b = dummy; } #define swap(t,a,b) { register t dummy; dummy = a; a = b; b = dummy; }
#define test(a) ((a) ? 1 : 0) #define test(a) ((a) ? 1 : 0)

View file

@ -16,6 +16,7 @@
/* Written by Sergei A. Golubchik, who has a shared copyright to this code */ /* Written by Sergei A. Golubchik, who has a shared copyright to this code */
#define FT_CORE
#include "ftdefs.h" #include "ftdefs.h"
#include <queues.h> #include <queues.h>
@ -73,8 +74,9 @@ typedef struct {
byte word[1]; byte word[1];
} FTB_WORD; } FTB_WORD;
typedef struct st_ftb_handler { typedef struct st_ft_info {
MI_INFO *info; struct _ft_vft *please;
MI_INFO *info;
uint keynr; uint keynr;
int ok; int ok;
FTB_EXPR *root; FTB_EXPR *root;
@ -85,10 +87,10 @@ typedef struct st_ftb_handler {
int FTB_WORD_cmp(void *v, byte *a, byte *b) int FTB_WORD_cmp(void *v, byte *a, byte *b)
{ {
/* ORDER BY docid, ndepth DESC */ /* ORDER BY docid, ndepth DESC */
int i=((FTB_WORD *)a)->docid-((FTB_WORD *)b)->docid; int i=comp(((FTB_WORD *)a)->docid, ((FTB_WORD *)b)->docid);
if (!i) if (!i)
i=((FTB_WORD *)b)->ndepth-((FTB_WORD *)a)->ndepth; i=comp(((FTB_WORD *)b)->ndepth,((FTB_WORD *)a)->ndepth);
return sgn(i); return i;
} }
void _ftb_parse_query(FTB *ftb, byte **start, byte *end, void _ftb_parse_query(FTB *ftb, byte **start, byte *end,
@ -108,70 +110,71 @@ void _ftb_parse_query(FTB *ftb, byte **start, byte *end,
if (! ftb->ok) if (! ftb->ok)
return; return;
while (res=ftb_get_word(&start,end,&w,&param)) param.prev=' ';
while (res=ft_get_word(start,end,&w,&param))
{ {
byte r=param.plusminus; byte r=param.plusminus;
float weight=(param.pmsign ? nwghts : wghts)[(r>5)?5:((r<-5)?-5:r)]; float weight=(param.pmsign ? nwghts : wghts)[(r>5)?5:((r<-5)?-5:r)];
switch (res) { switch (res) {
case FTB_LBR: case FTB_LBR:
ftbe=(FTB_EXPR *)alloc_root(&ftb->mem_root, sizeof(FTB_EXPR)); ftbe=(FTB_EXPR *)alloc_root(&ftb->mem_root, sizeof(FTB_EXPR));
ftbe->yesno=param.yesno; ftbe->yesno=param.yesno;
ftbe->weight=weight; ftbe->weight=weight;
ftbe->up=up; ftbe->up=up;
ftbe->ythresh=0; ftbe->ythresh=0;
ftbe->docid=HA_POS_ERROR; ftbe->docid=HA_POS_ERROR;
if (ftbw->yesno > 0) up->ythresh++; if (ftbw->yesno > 0) up->ythresh++;
_ftb_parse_query(ftb, start, end, ftbe, depth+1, _ftb_parse_query(ftb, start, end, ftbe, depth+1,
(param.yesno<0 ? depth+1 : ndepth)); (param.yesno<0 ? depth+1 : ndepth));
break; break;
case FTB_RBR: case FTB_RBR:
return; return;
case 1: case 1:
ftbw=(FTB_WORD *)alloc_root(&ftb->mem_root, ftbw=(FTB_WORD *)alloc_root(&ftb->mem_root,
sizeof(FTB_WORD) + (param.trunc ? MI_MAX_KEY_BUFF : w.len+extra)); sizeof(FTB_WORD) + (param.trunc ? MI_MAX_KEY_BUFF : w.len+extra));
ftbw->len=w.len + !param.trunc; ftbw->len=w.len + !param.trunc;
ftbw->yesno=param.yesno; ftbw->yesno=param.yesno;
ftbw->trunc=param.trunc; /* 0 or 1 */ ftbw->trunc=param.trunc; /* 0 or 1 */
ftbw->weight=weight; ftbw->weight=weight;
ftbw->up=up; ftbw->up=up;
ftbw->docid=HA_POS_ERROR; ftbw->docid=HA_POS_ERROR;
ftbw->ndepth= param.yesno<0 ? depth : ndepth; ftbw->ndepth= param.yesno<0 ? depth : ndepth;
memcpy(ftbw->word+1, w.pos, w.len); memcpy(ftbw->word+1, w.pos, w.len);
ftbw->word[0]=w.len; ftbw->word[0]=w.len;
if (ftbw->yesno > 0) up->ythresh++; if (ftbw->yesno > 0) up->ythresh++;
/*****************************************/ /*****************************************/
r=_mi_search(info, keyinfo, ftbw->word, ftbw->len, r=_mi_search(info, keyinfo, ftbw->word, ftbw->len,
SEARCH_FIND | SEARCH_PREFIX, keyroot); SEARCH_FIND | SEARCH_PREFIX, keyroot);
if (!r) if (!r)
{ {
r=_mi_compare_text(default_charset_info, r=_mi_compare_text(default_charset_info,
info->lastkey+ftbw->trunc,ftbw->len, info->lastkey+ftbw->trunc,ftbw->len,
ftbw->word+ftbw->trunc,ftbw->len,0); ftbw->word+ftbw->trunc,ftbw->len,0);
} }
if (r) /* not found */ if (r) /* not found */
{ {
if (ftbw->yesno>0 && ftbw->up->up==0) if (ftbw->yesno>0 && ftbw->up->up==0)
{ /* this word MUST BE present in every document returned, { /* this word MUST BE present in every document returned,
so we can abort the search right now */ so we can abort the search right now */
ftb->ok=0; ftb->ok=0;
return; return;
} }
} }
else else
{ {
memcpy(ftbw->word, info->lastkey, info->lastkey_length); memcpy(ftbw->word, info->lastkey, info->lastkey_length);
ftbw->docid=info->lastpos; ftbw->docid=info->lastpos;
queue_insert(& ftb->queue, (byte *)ftbw); queue_insert(& ftb->queue, (byte *)ftbw);
} }
/*****************************************/ /*****************************************/
break; break;
} }
} }
return; return;
} }
FTB * ft_boolean_search_init(MI_INFO *info, uint keynr, byte *query, FT_INFO * ft_init_boolean_search(MI_INFO *info, uint keynr, byte *query,
uint query_len) uint query_len, my_bool presort __attribute__((unused)))
{ {
FTB *ftb; FTB *ftb;
FTB_EXPR *ftbe; FTB_EXPR *ftbe;
@ -179,11 +182,12 @@ FTB * ft_boolean_search_init(MI_INFO *info, uint keynr, byte *query,
if (!(ftb=(FTB *)my_malloc(sizeof(FTB), MYF(MY_WME)))) if (!(ftb=(FTB *)my_malloc(sizeof(FTB), MYF(MY_WME))))
return 0; return 0;
ftb->please=& _ft_vft_boolean;
ftb->ok=1; ftb->ok=1;
ftb->info=info; ftb->info=info;
ftb->keynr=keynr; ftb->keynr=keynr;
init_alloc_root(&ftb->mem_root, query_len,0); init_alloc_root(&ftb->mem_root, 1024, 1024);
/* hack: instead of init_queue, we'll use reinit queue to be able /* hack: instead of init_queue, we'll use reinit queue to be able
* to alloc queue with alloc_root() * to alloc queue with alloc_root()
@ -201,7 +205,7 @@ FTB * ft_boolean_search_init(MI_INFO *info, uint keynr, byte *query,
return ftb; return ftb;
} }
int ft_boolean_search_next(FTB *ftb, char *record) int ft_boolean_read_next(FT_INFO *ftb, char *record)
{ {
FTB_EXPR *ftbe, *up; FTB_EXPR *ftbe, *up;
FTB_WORD *ftbw; FTB_WORD *ftbw;
@ -218,61 +222,64 @@ int ft_boolean_search_next(FTB *ftb, char *record)
return my_errno; return my_errno;
/* black magic OFF */ /* black magic OFF */
while(ftb->ok && ftb->queue.elements) if (!ftb->queue.elements)
{ return my_errno=HA_ERR_END_OF_FILE;
curdoc=((FTB_WORD *)queue_top(& ftb->queue))->docid;
while(ftb->ok &&
(curdoc=((FTB_WORD *)queue_top(& ftb->queue))->docid) != HA_POS_ERROR)
{
while (curdoc==(ftbw=(FTB_WORD *)queue_top(& ftb->queue))->docid) while (curdoc==(ftbw=(FTB_WORD *)queue_top(& ftb->queue))->docid)
{ {
float weight=ftbw->weight; float weight=ftbw->weight;
uint yn=ftbw->yesno; int yn=ftbw->yesno;
for (ftbe=ftbw->up; ftbe; ftbe=ftbe->up) for (ftbe=ftbw->up; ftbe; ftbe=ftbe->up)
{ {
if (ftbe->docid != curdoc) if (ftbe->docid != curdoc)
{ {
ftbe->cur_weight=ftbe->yesses=ftbe->nos=0; ftbe->cur_weight=ftbe->yesses=ftbe->nos=0;
ftbe->docid=curdoc; ftbe->docid=curdoc;
} }
if (yn>0) if (yn>0)
{ {
ftbe->cur_weight+=weight; ftbe->cur_weight+=weight;
if (++ftbe->yesses >= ftbe->ythresh && !ftbe->nos) if (++ftbe->yesses >= ftbe->ythresh && !ftbe->nos)
{ {
yn=ftbe->yesno; yn=ftbe->yesno;
weight=ftbe->cur_weight*ftbe->weight; weight=ftbe->cur_weight*ftbe->weight;
} }
else else
break; break;
} }
else else
if (yn<0) if (yn<0)
{ {
/* NOTE: special sort function of queue assures that all yn<0 /* NOTE: special sort function of queue assures that all yn<0
* events for every particular subexpression will happen * events for every particular subexpression will
* BEFORE all yn>=0 events. So no already matched expression * "auto-magically" happen BEFORE all yn>=0 events. So no
* can become not-matched again. * already matched expression can become not-matched again.
*/ */
++ftbe->nos; ++ftbe->nos;
break; break;
} }
else else
/* if (yn==0) */ /* if (yn==0) */
{ {
if (ftbe->yesses >= ftbe->ythresh && !ftbe->nos) if (ftbe->yesses >= ftbe->ythresh && !ftbe->nos)
{ {
yn=ftbe->yesno; yn=ftbe->yesno;
weight*=ftbe->weight; ftbe->cur_weight=weight;
} weight*=ftbe->weight;
else }
{ else
ftbe->cur_weight+=weight; {
break; ftbe->cur_weight+=weight;
} break;
} }
}
} }
/* update queue */ /* update queue */
r=_mi_search(info, keyinfo, ftbw->word, ftbw->len, r=_mi_search(info, keyinfo, ftbw->word, ftbw->len,
SEARCH_FIND | SEARCH_PREFIX, keyroot); SEARCH_BIGGER , keyroot);
if (!r) if (!r)
{ {
r=_mi_compare_text(default_charset_info, r=_mi_compare_text(default_charset_info,
@ -281,19 +288,19 @@ int ft_boolean_search_next(FTB *ftb, char *record)
} }
if (r) /* not found */ if (r) /* not found */
{ {
queue_remove(& ftb->queue, 0); ftbw->docid=HA_POS_ERROR;
if (ftbw->yesno>0 && ftbw->up->up==0) if (ftbw->yesno>0 && ftbw->up->up==0)
{ /* this word MUST BE present in every document returned, { /* this word MUST BE present in every document returned,
so we can stop the search right now */ so we can stop the search right now */
ftb->ok=0; ftb->ok=0;
} }
} }
else else
{ {
memcpy(ftbw->word, info->lastkey, info->lastkey_length); memcpy(ftbw->word, info->lastkey, info->lastkey_length);
ftbw->docid=info->lastpos; ftbw->docid=info->lastpos;
queue_replaced(& ftb->queue);
} }
queue_replaced(& ftb->queue);
} }
ftbe=ftb->root; ftbe=ftb->root;
@ -305,8 +312,8 @@ int ft_boolean_search_next(FTB *ftb, char *record)
/* info->lastpos=curdoc; */ /* do I need this ? */ /* info->lastpos=curdoc; */ /* do I need this ? */
if (!(*info->read_record)(info,curdoc,record)) if (!(*info->read_record)(info,curdoc,record))
{ {
info->update|= HA_STATE_AKTIV; /* Record is read */ info->update|= HA_STATE_AKTIV; /* Record is read */
return 0; return 0;
} }
return my_errno; return my_errno;
} }
@ -314,3 +321,30 @@ int ft_boolean_search_next(FTB *ftb, char *record)
return my_errno=HA_ERR_END_OF_FILE; return my_errno=HA_ERR_END_OF_FILE;
} }
float ft_boolean_find_relevance(FT_INFO *ftb, my_off_t docid)
{
fprintf(stderr, "ft_boolean_find_relevance called!\n");
return -1.0; /* to be done via str scan */
}
void ft_boolean_close_search(FT_INFO *ftb)
{
free_root(& ftb->mem_root, MYF(0));
my_free((gptr)ftb,MYF(0));
}
float ft_boolean_get_relevance(FT_INFO *ftb)
{
return ftb->root->cur_weight;
}
my_off_t ft_boolean_get_docid(FT_INFO *ftb)
{
return HA_POS_ERROR;
}
void ft_boolean_reinit_search(FT_INFO *ftb)
{
fprintf(stderr, "ft_boolean_reinit_search called!\n");
}

View file

@ -16,10 +16,24 @@
/* Written by Sergei A. Golubchik, who has a shared copyright to this code */ /* Written by Sergei A. Golubchik, who has a shared copyright to this code */
#define FT_CORE
#include "ftdefs.h" #include "ftdefs.h"
/* search with natural language queries */ /* search with natural language queries */
typedef struct ft_doc_rec {
my_off_t dpos;
double weight;
} FT_DOC;
struct st_ft_info {
struct _ft_vft *please;
MI_INFO *info;
int ndocs;
int curdoc;
FT_DOC doc[1];
};
typedef struct st_all_in_one { typedef struct st_all_in_one {
MI_INFO *info; MI_INFO *info;
uint keynr; uint keynr;
@ -152,27 +166,27 @@ static int FT_DOC_cmp(FT_DOC *a, FT_DOC *b)
return sgn(b->weight - a->weight); return sgn(b->weight - a->weight);
} }
FT_DOCLIST *ft_nlq_init_search(void *info, uint keynr, byte *query, FT_INFO *ft_init_nlq_search(MI_INFO *info, uint keynr, byte *query,
uint query_len, my_bool presort) uint query_len, my_bool presort)
{ {
TREE *wtree, allocated_wtree; TREE *wtree, allocated_wtree;
ALL_IN_ONE aio; ALL_IN_ONE aio;
FT_DOC *dptr; FT_DOC *dptr;
FT_DOCLIST *dlist=NULL; FT_INFO *dlist=NULL;
my_off_t saved_lastpos=((MI_INFO *)info)->lastpos; my_off_t saved_lastpos=info->lastpos;
/* black magic ON */ /* black magic ON */
if ((int) (keynr = _mi_check_index((MI_INFO *)info,keynr)) < 0) if ((int) (keynr = _mi_check_index(info,keynr)) < 0)
return NULL; return NULL;
if (_mi_readinfo((MI_INFO *)info,F_RDLCK,1)) if (_mi_readinfo(info,F_RDLCK,1))
return NULL; return NULL;
/* black magic OFF */ /* black magic OFF */
aio.info=(MI_INFO *)info; aio.info=info;
aio.keynr=keynr; aio.keynr=keynr;
aio.keybuff=aio.info->lastkey+aio.info->s->base.max_key_length; aio.keybuff=info->lastkey+info->s->base.max_key_length;
aio.keyinfo=aio.info->s->keyinfo+keynr; aio.keyinfo=info->s->keyinfo+keynr;
aio.key_root=aio.info->s->state.key_root[keynr]; aio.key_root=info->s->state.key_root[keynr];
bzero(&allocated_wtree,sizeof(allocated_wtree)); bzero(&allocated_wtree,sizeof(allocated_wtree));
@ -186,18 +200,19 @@ FT_DOCLIST *ft_nlq_init_search(void *info, uint keynr, byte *query,
left_root_right)) left_root_right))
goto err2; goto err2;
dlist=(FT_DOCLIST *)my_malloc(sizeof(FT_DOCLIST)+ dlist=(FT_INFO *)my_malloc(sizeof(FT_INFO)+
sizeof(FT_DOC)*(aio.dtree.elements_in_tree-1),MYF(0)); sizeof(FT_DOC)*(aio.dtree.elements_in_tree-1),MYF(0));
if(!dlist) if(!dlist)
goto err2; goto err2;
dlist->please=& _ft_vft_nlq;
dlist->ndocs=aio.dtree.elements_in_tree; dlist->ndocs=aio.dtree.elements_in_tree;
dlist->curdoc=-1; dlist->curdoc=-1;
dlist->info=aio.info; dlist->info=aio.info;
dptr=dlist->doc; dptr=dlist->doc;
tree_walk(&aio.dtree, (tree_walk_action)&walk_and_copy, &dptr, tree_walk(&aio.dtree, (tree_walk_action)&walk_and_copy,
left_root_right); &dptr, left_root_right);
if(presort) if(presort)
qsort(dlist->doc, dlist->ndocs, sizeof(FT_DOC), (qsort_cmp)&FT_DOC_cmp); qsort(dlist->doc, dlist->ndocs, sizeof(FT_DOC), (qsort_cmp)&FT_DOC_cmp);
@ -207,11 +222,11 @@ err2:
delete_tree(&aio.dtree); delete_tree(&aio.dtree);
err: err:
((MI_INFO *)info)->lastpos=saved_lastpos; info->lastpos=saved_lastpos;
return dlist; return dlist;
} }
int ft_nlq_read_next(FT_DOCLIST *handler, char *record) int ft_nlq_read_next(FT_INFO *handler, char *record)
{ {
MI_INFO *info= (MI_INFO *) handler->info; MI_INFO *info= (MI_INFO *) handler->info;
@ -232,3 +247,43 @@ int ft_nlq_read_next(FT_DOCLIST *handler, char *record)
return my_errno; return my_errno;
} }
float ft_nlq_find_relevance(FT_INFO *handler, my_off_t docid)
{
int a,b,c;
FT_DOC *docs=handler->doc;
// Assuming docs[] is sorted by dpos...
for (a=0, b=handler->ndocs, c=(a+b)/2; b-a>1; c=(a+b)/2)
{
if (docs[c].dpos > docid)
b=c;
else
a=c;
}
if (docs[a].dpos == docid)
return docs[a].weight;
else
return 0.0;
}
void ft_nlq_close_search(FT_INFO *handler)
{
my_free((gptr)handler,MYF(0));
}
float ft_nlq_get_relevance(FT_INFO *handler)
{
return handler->doc[handler->curdoc].weight;
}
my_off_t ft_nlq_get_docid(FT_INFO *handler)
{
return handler->doc[handler->curdoc].dpos;
}
void ft_nlq_reinit_search(FT_INFO *handler)
{
handler->curdoc=-1;
}

View file

@ -135,7 +135,7 @@ byte ft_get_word(byte **start, byte *end, FT_WORD *word, FTB_PARAM *param)
if (true_word_char(*doc)) break; if (true_word_char(*doc)) break;
if (*doc == FTB_LBR || *doc == FTB_RBR) if (*doc == FTB_LBR || *doc == FTB_RBR)
{ {
param->prev=' '; /* param->prev=' '; */
*start=doc+1; *start=doc+1;
return *doc; return *doc;
} }

View file

@ -43,10 +43,29 @@ const MI_KEYSEG ft_keysegs[FT_SEGS]={
}, },
#endif /* EVAL_RUN */ #endif /* EVAL_RUN */
{ {
HA_FT_WTYPE, 7, 0, 0, 0, 0, HA_FT_WLEN, 0, 0, NULL HA_FT_WTYPE, 7, 0, 0, 0, HA_NO_SORT, HA_FT_WLEN, 0, 0, NULL
} }
}; };
const struct _ft_vft _ft_vft_nlq = {
ft_nlq_read_next, ft_nlq_find_relevance, ft_nlq_close_search,
ft_nlq_get_relevance, ft_nlq_get_docid, ft_nlq_reinit_search
};
const struct _ft_vft _ft_vft_boolean = {
ft_boolean_read_next, ft_boolean_find_relevance, ft_boolean_close_search,
ft_boolean_get_relevance, ft_boolean_get_docid, ft_boolean_reinit_search
};
FT_INFO *(*_ft_init_vft[2])(MI_INFO *, uint, byte *, uint, my_bool) =
{ ft_init_nlq_search, ft_init_boolean_search };
FT_INFO *ft_init_search(uint mode, void *info, uint keynr,
byte *query, uint query_len, my_bool presort)
{
return (*_ft_init_vft[mode])((MI_INFO *)info, keynr,
query, query_len, presort);
}
const char *ft_precompiled_stopwords[] = { const char *ft_precompiled_stopwords[] = {
#ifdef COMPILE_STOPWORDS_IN #ifdef COMPILE_STOPWORDS_IN

View file

@ -95,9 +95,6 @@ extern ulong collstat;
#define FTB_NEG '~' #define FTB_NEG '~'
#define FTB_TRUNC '*' #define FTB_TRUNC '*'
// #define FTB_MAX_SUBEXPR 255
// #define FTB_MAX_DEPTH 16
typedef struct st_ft_word { typedef struct st_ft_word {
byte * pos; byte * pos;
uint len; uint len;
@ -116,7 +113,6 @@ typedef struct st_ftb_param {
} FTB_PARAM; } FTB_PARAM;
int is_stopword(char *word, uint len); int is_stopword(char *word, uint len);
int is_boolean(byte *q, uint len);
uint _ft_make_key(MI_INFO *, uint , byte *, FT_WORD *, my_off_t); uint _ft_make_key(MI_INFO *, uint , byte *, FT_WORD *, my_off_t);
@ -127,6 +123,21 @@ TREE * ft_parse(TREE *, byte *, int);
FT_WORD * ft_linearize(MI_INFO *, uint, byte *, TREE *); FT_WORD * ft_linearize(MI_INFO *, uint, byte *, TREE *);
FT_WORD * _mi_ft_parserecord(MI_INFO *, uint , byte *, const byte *); FT_WORD * _mi_ft_parserecord(MI_INFO *, uint , byte *, const byte *);
FT_DOCLIST * ft_nlq_search(MI_INFO *, uint, byte *, uint); const struct _ft_vft _ft_vft_nlq;
FT_DOCLIST * ft_boolean_search(MI_INFO *, uint, byte *, uint); FT_INFO *ft_init_nlq_search(MI_INFO *, uint, byte *, uint, my_bool);
int ft_nlq_read_next(FT_INFO *, char *);
float ft_nlq_find_relevance(FT_INFO *, my_off_t );
void ft_nlq_close_search(FT_INFO *);
float ft_nlq_get_relevance(FT_INFO *);
my_off_t ft_nlq_get_docid(FT_INFO *);
void ft_nlq_reinit_search(FT_INFO *);
const struct _ft_vft _ft_vft_boolean;
FT_INFO *ft_init_boolean_search(MI_INFO *, uint, byte *, uint, my_bool);
int ft_boolean_read_next(FT_INFO *, char *);
float ft_boolean_find_relevance(FT_INFO *, my_off_t );
void ft_boolean_close_search(FT_INFO *);
float ft_boolean_get_relevance(FT_INFO *);
my_off_t ft_boolean_get_docid(FT_INFO *);
void ft_boolean_reinit_search(FT_INFO *);

View file

@ -317,7 +317,7 @@ int _mi_prefix_search(MI_INFO *info, register MI_KEYDEF *keyinfo, uchar *page,
while (page < end) while (page < end)
{ {
uint packed= *page & 128; uint packed= *page & 128;
vseg=page; vseg=page;
if (keyinfo->seg->length >= 127) if (keyinfo->seg->length >= 127)
{ {
@ -333,7 +333,7 @@ int _mi_prefix_search(MI_INFO *info, register MI_KEYDEF *keyinfo, uchar *page,
prefix_len=len; prefix_len=len;
else else
{ {
prefix_len=suffix_len; prefix_len=suffix_len;
get_key_length(suffix_len,vseg); get_key_length(suffix_len,vseg);
} }
} }
@ -359,7 +359,7 @@ int _mi_prefix_search(MI_INFO *info, register MI_KEYDEF *keyinfo, uchar *page,
for (keyseg=keyinfo->seg+1 ; keyseg->type ; keyseg++ ) for (keyseg=keyinfo->seg+1 ; keyseg->type ; keyseg++ )
{ {
if (keyseg->flag & HA_NULL_PART) if (keyseg->flag & HA_NULL_PART)
{ {
if (!(*from++)) if (!(*from++))
@ -726,13 +726,14 @@ int _mi_key_cmp(register MI_KEYSEG *keyseg, register uchar *a,
for ( ; (int) key_length >0 ; key_length=next_key_length, keyseg++) for ( ; (int) key_length >0 ; key_length=next_key_length, keyseg++)
{ {
uchar *end; uchar *end;
uint piks=! (keyseg->flag & HA_NO_SORT);
(*diff_pos)++; (*diff_pos)++;
/* Handle NULL part */ /* Handle NULL part */
if (keyseg->null_bit) if (keyseg->null_bit)
{ {
key_length--; key_length--;
if (*a != *b) if (*a != *b && piks)
{ {
flag = (int) *a - (int) *b; flag = (int) *a - (int) *b;
return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag); return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag);
@ -758,9 +759,9 @@ int _mi_key_cmp(register MI_KEYSEG *keyseg, register uchar *a,
get_key_pack_length(b_length,pack_length,b); get_key_pack_length(b_length,pack_length,b);
next_key_length=key_length-b_length-pack_length; next_key_length=key_length-b_length-pack_length;
if ((flag=_mi_compare_text(keyseg->charset,a,a_length,b,b_length, if (piks &&
(my_bool) ((nextflag & SEARCH_PREFIX) && (flag=_mi_compare_text(keyseg->charset,a,a_length,b,b_length,
next_key_length <= 0)))) (my_bool) ((nextflag & SEARCH_PREFIX) && next_key_length <= 0))))
return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag); return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag);
a+=a_length; a+=a_length;
b+=b_length; b+=b_length;
@ -776,9 +777,9 @@ int _mi_key_cmp(register MI_KEYSEG *keyseg, register uchar *a,
while (b_length && b[b_length-1] == ' ') while (b_length && b[b_length-1] == ' ')
b_length--; b_length--;
} }
if ((flag=_mi_compare_text(keyseg->charset,a,a_length,b,b_length, if (piks &&
(my_bool) ((nextflag & SEARCH_PREFIX) && (flag=_mi_compare_text(keyseg->charset,a,a_length,b,b_length,
next_key_length <= 0)))) (my_bool) ((nextflag & SEARCH_PREFIX) && next_key_length <= 0))))
return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag); return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag);
a=end; a=end;
b+=length; b+=length;
@ -792,9 +793,9 @@ int _mi_key_cmp(register MI_KEYSEG *keyseg, register uchar *a,
get_key_pack_length(b_length,pack_length,b); get_key_pack_length(b_length,pack_length,b);
next_key_length=key_length-b_length-pack_length; next_key_length=key_length-b_length-pack_length;
if ((flag=compare_bin(a,a_length,b,b_length, if (piks &&
(my_bool) ((nextflag & SEARCH_PREFIX) && (flag=compare_bin(a,a_length,b,b_length,
next_key_length <= 0)))) (my_bool) ((nextflag & SEARCH_PREFIX) && next_key_length <= 0))))
return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag); return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag);
a+=a_length; a+=a_length;
b+=b_length; b+=b_length;
@ -803,9 +804,9 @@ int _mi_key_cmp(register MI_KEYSEG *keyseg, register uchar *a,
else else
{ {
uint length=keyseg->length; uint length=keyseg->length;
if ((flag=compare_bin(a,length,b,length, if (piks &&
(my_bool) ((nextflag & SEARCH_PREFIX) && (flag=compare_bin(a,length,b,length,
next_key_length <= 0)))) (my_bool) ((nextflag & SEARCH_PREFIX) && next_key_length <= 0))))
return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag); return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag);
a+=length; a+=length;
b+=length; b+=length;
@ -818,9 +819,9 @@ int _mi_key_cmp(register MI_KEYSEG *keyseg, register uchar *a,
get_key_pack_length(b_length,pack_length,b); get_key_pack_length(b_length,pack_length,b);
next_key_length=key_length-b_length-pack_length; next_key_length=key_length-b_length-pack_length;
if ((flag=_mi_compare_text(keyseg->charset,a,a_length,b,b_length, if (piks &&
(my_bool) ((nextflag & SEARCH_PREFIX) && (flag=_mi_compare_text(keyseg->charset,a,a_length,b,b_length,
next_key_length <= 0)))) (my_bool) ((nextflag & SEARCH_PREFIX) && next_key_length <= 0))))
return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag); return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag);
a+=a_length; a+=a_length;
b+=b_length; b+=b_length;
@ -834,9 +835,9 @@ int _mi_key_cmp(register MI_KEYSEG *keyseg, register uchar *a,
get_key_pack_length(b_length,pack_length,b); get_key_pack_length(b_length,pack_length,b);
next_key_length=key_length-b_length-pack_length; next_key_length=key_length-b_length-pack_length;
if ((flag=compare_bin(a,a_length,b,b_length, if (piks &&
(my_bool) ((nextflag & SEARCH_PREFIX) && (flag=compare_bin(a,a_length,b,b_length,
next_key_length <= 0)))) (my_bool) ((nextflag & SEARCH_PREFIX) && next_key_length <= 0))))
return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag); return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag);
a+=a_length; a+=a_length;
b+=b_length; b+=b_length;
@ -847,7 +848,7 @@ int _mi_key_cmp(register MI_KEYSEG *keyseg, register uchar *a,
{ {
int i_1= (int) *((signed char*) a); int i_1= (int) *((signed char*) a);
int i_2= (int) *((signed char*) b); int i_2= (int) *((signed char*) b);
if ((flag = CMP(i_1,i_2))) if (piks && (flag = CMP(i_1,i_2)))
return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag); return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag);
a= end; a= end;
b++; b++;
@ -856,7 +857,7 @@ int _mi_key_cmp(register MI_KEYSEG *keyseg, register uchar *a,
case HA_KEYTYPE_SHORT_INT: case HA_KEYTYPE_SHORT_INT:
s_1= mi_sint2korr(a); s_1= mi_sint2korr(a);
s_2= mi_sint2korr(b); s_2= mi_sint2korr(b);
if ((flag = CMP(s_1,s_2))) if (piks && (flag = CMP(s_1,s_2)))
return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag); return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag);
a= end; a= end;
b+= 2; /* sizeof(short int); */ b+= 2; /* sizeof(short int); */
@ -866,7 +867,7 @@ int _mi_key_cmp(register MI_KEYSEG *keyseg, register uchar *a,
uint16 us_1,us_2; uint16 us_1,us_2;
us_1= mi_sint2korr(a); us_1= mi_sint2korr(a);
us_2= mi_sint2korr(b); us_2= mi_sint2korr(b);
if ((flag = CMP(us_1,us_2))) if (piks && (flag = CMP(us_1,us_2)))
return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag); return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag);
a= end; a= end;
b+=2; /* sizeof(short int); */ b+=2; /* sizeof(short int); */
@ -875,7 +876,7 @@ int _mi_key_cmp(register MI_KEYSEG *keyseg, register uchar *a,
case HA_KEYTYPE_LONG_INT: case HA_KEYTYPE_LONG_INT:
l_1= mi_sint4korr(a); l_1= mi_sint4korr(a);
l_2= mi_sint4korr(b); l_2= mi_sint4korr(b);
if ((flag = CMP(l_1,l_2))) if (piks && (flag = CMP(l_1,l_2)))
return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag); return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag);
a= end; a= end;
b+= 4; /* sizeof(long int); */ b+= 4; /* sizeof(long int); */
@ -883,7 +884,7 @@ int _mi_key_cmp(register MI_KEYSEG *keyseg, register uchar *a,
case HA_KEYTYPE_ULONG_INT: case HA_KEYTYPE_ULONG_INT:
u_1= mi_sint4korr(a); u_1= mi_sint4korr(a);
u_2= mi_sint4korr(b); u_2= mi_sint4korr(b);
if ((flag = CMP(u_1,u_2))) if (piks && (flag = CMP(u_1,u_2)))
return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag); return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag);
a= end; a= end;
b+= 4; /* sizeof(long int); */ b+= 4; /* sizeof(long int); */
@ -891,7 +892,7 @@ int _mi_key_cmp(register MI_KEYSEG *keyseg, register uchar *a,
case HA_KEYTYPE_INT24: case HA_KEYTYPE_INT24:
l_1=mi_sint3korr(a); l_1=mi_sint3korr(a);
l_2=mi_sint3korr(b); l_2=mi_sint3korr(b);
if ((flag = CMP(l_1,l_2))) if (piks && (flag = CMP(l_1,l_2)))
return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag); return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag);
a= end; a= end;
b+= 3; b+= 3;
@ -899,7 +900,7 @@ int _mi_key_cmp(register MI_KEYSEG *keyseg, register uchar *a,
case HA_KEYTYPE_UINT24: case HA_KEYTYPE_UINT24:
l_1=mi_uint3korr(a); l_1=mi_uint3korr(a);
l_2=mi_uint3korr(b); l_2=mi_uint3korr(b);
if ((flag = CMP(l_1,l_2))) if (piks && (flag = CMP(l_1,l_2)))
return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag); return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag);
a= end; a= end;
b+= 3; b+= 3;
@ -907,7 +908,7 @@ int _mi_key_cmp(register MI_KEYSEG *keyseg, register uchar *a,
case HA_KEYTYPE_FLOAT: case HA_KEYTYPE_FLOAT:
mi_float4get(f_1,a); mi_float4get(f_1,a);
mi_float4get(f_2,b); mi_float4get(f_2,b);
if ((flag = CMP(f_1,f_2))) if (piks && (flag = CMP(f_1,f_2)))
return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag); return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag);
a= end; a= end;
b+= 4; /* sizeof(float); */ b+= 4; /* sizeof(float); */
@ -915,7 +916,7 @@ int _mi_key_cmp(register MI_KEYSEG *keyseg, register uchar *a,
case HA_KEYTYPE_DOUBLE: case HA_KEYTYPE_DOUBLE:
mi_float8get(d_1,a); mi_float8get(d_1,a);
mi_float8get(d_2,b); mi_float8get(d_2,b);
if ((flag = CMP(d_1,d_2))) if (piks && (flag = CMP(d_1,d_2)))
return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag); return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag);
a= end; a= end;
b+= 8; /* sizeof(double); */ b+= 8; /* sizeof(double); */
@ -945,33 +946,40 @@ int _mi_key_cmp(register MI_KEYSEG *keyseg, register uchar *a,
for ( ; alength && *a == ' ' ; a++, alength--) ; for ( ; alength && *a == ' ' ; a++, alength--) ;
for ( ; blength && *b == ' ' ; b++, blength--) ; for ( ; blength && *b == ' ' ; b++, blength--) ;
} }
if (piks)
if (*a == '-')
{ {
if (*b != '-') if (*a == '-')
return -1; {
a++; b++; if (*b != '-')
swap(uchar*,a,b); return -1;
swap(int,alength,blength); a++; b++;
swap_flag=1-swap_flag; swap(uchar*,a,b);
alength--; blength--; swap(int,alength,blength);
end=a+alength; swap_flag=1-swap_flag;
alength--; blength--;
end=a+alength;
}
else if (*b == '-')
return 1;
while (alength && (*a == '+' || *a == '0'))
{
a++; alength--;
}
while (blength && (*b == '+' || *b == '0'))
{
b++; blength--;
}
if (alength != blength)
return (alength < blength) ? -1 : 1;
while (a < end)
if (*a++ != *b++)
return ((int) a[-1] - (int) b[-1]);
} }
else if (*b == '-') else
return 1;
while (alength && (*a == '+' || *a == '0'))
{ {
a++; alength--; b+=(end-a);
a=end;
} }
while (blength && (*b == '+' || *b == '0'))
{
b++; blength--;
}
if (alength != blength)
return (alength < blength) ? -1 : 1;
while (a < end)
if (*a++ != *b++)
return ((int) a[-1] - (int) b[-1]);
if (swap_flag) /* Restore pointers */ if (swap_flag) /* Restore pointers */
swap(uchar*,a,b); swap(uchar*,a,b);
@ -983,7 +991,7 @@ int _mi_key_cmp(register MI_KEYSEG *keyseg, register uchar *a,
longlong ll_a,ll_b; longlong ll_a,ll_b;
ll_a= mi_sint8korr(a); ll_a= mi_sint8korr(a);
ll_b= mi_sint8korr(b); ll_b= mi_sint8korr(b);
if ((flag = CMP(ll_a,ll_b))) if (piks && (flag = CMP(ll_a,ll_b)))
return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag); return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag);
a= end; a= end;
b+= 8; b+= 8;
@ -994,7 +1002,7 @@ int _mi_key_cmp(register MI_KEYSEG *keyseg, register uchar *a,
ulonglong ll_a,ll_b; ulonglong ll_a,ll_b;
ll_a= mi_uint8korr(a); ll_a= mi_uint8korr(a);
ll_b= mi_uint8korr(b); ll_b= mi_uint8korr(b);
if ((flag = CMP(ll_a,ll_b))) if (piks && (flag = CMP(ll_a,ll_b)))
return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag); return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag);
a= end; a= end;
b+= 8; b+= 8;

View file

@ -774,7 +774,7 @@ static int keys_free(uchar *key, TREE_FREE mode, bulk_insert_param *param)
keyinfo=param->info->s->keyinfo+param->keynr; keyinfo=param->info->s->keyinfo+param->keynr;
keylen=_mi_keylength(keyinfo, key); keylen=_mi_keylength(keyinfo, key);
memcpy(lastkey, key, keylen); memcpy(lastkey, key, keylen);
return _mi_ck_write_btree(param->info,param->keynr,lastkey, return _mi_ck_write_btree(param->info,param->keynr,lastkey,
keylen - param->info->s->rec_reflength); keylen - param->info->s->rec_reflength);
case free_end: case free_end:
if (param->info->s->concurrent_insert) if (param->info->s->concurrent_insert)
@ -794,7 +794,7 @@ int _mi_init_bulk_insert(MI_INFO *info)
if (info->bulk_insert) if (info->bulk_insert)
return 0; return 0;
for (i=num_keys=0 ; i < share->base.keys ; i++) for (i=num_keys=0 ; i < share->base.keys ; i++)
{ {
if (!(key[i].flag & HA_NOSAME) && share->base.auto_key != i+1 if (!(key[i].flag & HA_NOSAME) && share->base.auto_key != i+1
@ -807,7 +807,7 @@ int _mi_init_bulk_insert(MI_INFO *info)
if (!num_keys) if (!num_keys)
return 0; return 0;
info->bulk_insert=(TREE *) info->bulk_insert=(TREE *)
my_malloc((sizeof(TREE)*share->base.keys+ my_malloc((sizeof(TREE)*share->base.keys+
sizeof(bulk_insert_param)*num_keys),MYF(0)); sizeof(bulk_insert_param)*num_keys),MYF(0));
@ -822,13 +822,13 @@ int _mi_init_bulk_insert(MI_INFO *info)
{ {
params->info=info; params->info=info;
params->keynr=i; params->keynr=i;
init_tree(& info->bulk_insert[i], 0, init_tree(& info->bulk_insert[i], 0,
myisam_bulk_insert_tree_size / num_keys, 0, myisam_bulk_insert_tree_size / num_keys, 0,
(qsort_cmp2)keys_compare, 0, (qsort_cmp2)keys_compare, 0,
(tree_element_free) keys_free, (void *)params++); (tree_element_free) keys_free, (void *)params++);
} }
else else
info->bulk_insert[i].root=0; info->bulk_insert[i].root=0;
} }
return 0; return 0;

View file

@ -1,5 +1,5 @@
q item id x q item id x
aaaaaaaaa dsaass de sushi 1 1.92378665219675 aaaaaaaaa dsaass de sushi 1 1.92378664016724
aaaaaaaaa dsaass de Bolo de Chocolate 2 0 aaaaaaaaa dsaass de Bolo de Chocolate 2 0
aaaaaaaaa dsaass de Feijoada 3 0 aaaaaaaaa dsaass de Feijoada 3 0
aaaaaaaaa dsaass de Mousse de Chocolate 4 0 aaaaaaaaa dsaass de Mousse de Chocolate 4 0
@ -8,7 +8,7 @@ ssde df s fsda sad er um chocolate Snickers 6 0
aaaaaaaaa dsaass de Bife 7 0 aaaaaaaaa dsaass de Bife 7 0
aaaaaaaaa dsaass de Pizza de Salmao 8 0 aaaaaaaaa dsaass de Pizza de Salmao 8 0
q item id x q item id x
aaaaaaaaa dsaass de sushi 1 1.92378665219675 aaaaaaaaa dsaass de sushi 1 1.92378664016724
aaaaaaaaa dsaass de Bolo de Chocolate 2 0 aaaaaaaaa dsaass de Bolo de Chocolate 2 0
aaaaaaaaa dsaass de Feijoada 3 0 aaaaaaaaa dsaass de Feijoada 3 0
aaaaaaaaa dsaass de Mousse de Chocolate 4 0 aaaaaaaaa dsaass de Mousse de Chocolate 4 0

View file

@ -1,5 +1,5 @@
match(t1.texte,t1.sujet,t1.motsclefs) against('droit') match(t1.texte,t1.sujet,t1.motsclefs) against('droit')
0 0
0 0
0.67003110026735 0.67003107070923
0 0

View file

@ -1,12 +1,12 @@
a MATCH b AGAINST ('lala lkjh') a MATCH b AGAINST ('lala lkjh')
1 0.67003110026735 1 0.67003107070923
2 0 2 0
3 0 3 0
a MATCH c AGAINST ('lala lkjh') a MATCH c AGAINST ('lala lkjh')
1 0 1 0
2 0.67756324121582 2 0.67756325006485
3 0 3 0
a MATCH b,c AGAINST ('lala lkjh') a MATCH b,c AGAINST ('lala lkjh')
1 0.64840710366884 1 0.64840710163116
2 0.66266459031789 2 0.66266459226608
3 0 3 0

View file

@ -1,19 +1,19 @@
a MATCH (message) AGAINST ('steve') a MATCH (message) AGAINST ('steve')
4 0.90587321329654 4 0.90587323904037
7 0.89568988462614 7 0.89568990468979
a MATCH (message) AGAINST ('steve') a MATCH (message) AGAINST ('steve')
4 0.90587321329654 4 0.90587323904037
7 0.89568988462614 7 0.89568990468979
a MATCH (message) AGAINST ('steve') a MATCH (message) AGAINST ('steve')
7 0.89568988462614 7 0.89568990468979
4 0.90587321329654 4 0.90587323904037
a MATCH (message) AGAINST ('steve') a MATCH (message) AGAINST ('steve')
7 0.89568988462614 7 0.89568990468979
a rel a rel
1 0 1 0
2 0 2 0
3 0 3 0
5 0 5 0
6 0 6 0
7 0.89568988462614 7 0.89568990468979
4 0.90587321329654 4 0.90587323904037

View file

@ -5,10 +5,21 @@
drop table if exists t1,t2,t3; drop table if exists t1,t2,t3;
CREATE TABLE t1 (a VARCHAR(200), b TEXT, FULLTEXT (a,b)); CREATE TABLE t1 (a VARCHAR(200), b TEXT, FULLTEXT (a,b));
INSERT INTO t1 VALUES('MySQL has now support', 'for full-text search'),('Full-text indexes', 'are called collections'),('Only MyISAM tables','support collections'),('Function MATCH ... AGAINST()','is used to do a search'),('Full-text search in MySQL', 'implements vector space model'); INSERT INTO t1 VALUES('MySQL has now support', 'for full-text search'),
('Full-text indexes', 'are called collections'),
('Only MyISAM tables','support collections'),
('Function MATCH ... AGAINST()','is used to do a search'),
('Full-text search in MySQL', 'implements vector space model');
select * from t1 where MATCH(a,b) AGAINST ("collections"); select * from t1 where MATCH(a,b) AGAINST ("collections");
select * from t1 where MATCH(a,b) AGAINST ("indexes"); select * from t1 where MATCH(a,b) AGAINST ("indexes");
select * from t1 where MATCH(a,b) AGAINST ("indexes collections"); select * from t1 where MATCH(a,b) AGAINST ("indexes collections");
select * from t1 where MATCH(a,b) AGAINST("support -collections" IN BOOLEAN MODE);
select * from t1 where MATCH(a,b) AGAINST("support collections" IN BOOLEAN MODE);
select * from t1 where MATCH(a,b) AGAINST("support +collections" IN BOOLEAN MODE);
select * from t1 where MATCH(a,b) AGAINST("sear*" IN BOOLEAN MODE);
select * from t1 where MATCH(a,b) AGAINST("+support +collections" IN BOOLEAN MODE);
select * from t1 where MATCH(a,b) AGAINST("+search" IN BOOLEAN MODE);
select * from t1 where MATCH(a,b) AGAINST("+search +(support vector)" IN BOOLEAN MODE);
delete from t1 where a like "MySQL%"; delete from t1 where a like "MySQL%";
drop table t1; drop table t1;

View file

@ -1197,7 +1197,7 @@ int ha_myisam::ft_read(byte * buf)
thread_safe_increment(ha_read_next_count,&LOCK_status); // why ? thread_safe_increment(ha_read_next_count,&LOCK_status); // why ?
error=ft_nlq_read_next((FT_DOCLIST *) ft_handler,(char*) buf); error=ft_handler->please->read_next(ft_handler,(char*) buf);
table->status=error ? STATUS_NOT_FOUND: 0; table->status=error ? STATUS_NOT_FOUND: 0;
return error; return error;

View file

@ -76,9 +76,9 @@ class ha_myisam: public handler
int index_next_same(byte *buf, const byte *key, uint keylen); int index_next_same(byte *buf, const byte *key, uint keylen);
int index_end() { ft_handler=NULL; return 0; } int index_end() { ft_handler=NULL; return 0; }
int ft_init() int ft_init()
{ if(!ft_handler) return 1; ft_nlq_reinit_search(ft_handler); return 0; } { if(!ft_handler) return 1; ft_handler->please->reinit_search(ft_handler); return 0; }
void *ft_init_ext(uint inx,const byte *key, uint keylen, bool presort) FT_INFO *ft_init_ext(uint mode, uint inx,const byte *key, uint keylen, bool presort)
{ return ft_nlq_init_search(file,inx,(byte*) key,keylen,presort); } { return ft_init_search(mode, file,inx,(byte*) key,keylen,presort); }
int ft_read(byte *buf); int ft_read(byte *buf);
int rnd_init(bool scan=1); int rnd_init(bool scan=1);
int rnd_next(byte *buf); int rnd_next(byte *buf);

View file

@ -21,6 +21,8 @@
#pragma interface /* gcc class implementation */ #pragma interface /* gcc class implementation */
#endif #endif
#include <ft_global.h>
#ifndef NO_HASH #ifndef NO_HASH
#define NO_HASH /* Not yet implemented */ #define NO_HASH /* Not yet implemented */
#endif #endif
@ -201,7 +203,7 @@ public:
time_t check_time; time_t check_time;
time_t update_time; time_t update_time;
ulong mean_rec_length; /* physical reclength */ ulong mean_rec_length; /* physical reclength */
void *ft_handler; FT_INFO *ft_handler;
bool auto_increment_column_changed; bool auto_increment_column_changed;
handler(TABLE *table_arg) : table(table_arg),active_index(MAX_REF_PARTS), handler(TABLE *table_arg) : table(table_arg),active_index(MAX_REF_PARTS),
@ -247,9 +249,9 @@ public:
virtual int index_next_same(byte *buf, const byte *key, uint keylen); virtual int index_next_same(byte *buf, const byte *key, uint keylen);
virtual int ft_init() virtual int ft_init()
{ return -1; } { return -1; }
virtual void *ft_init_ext(uint inx,const byte *key, uint keylen, virtual FT_INFO *ft_init_ext(uint mode,uint inx,const byte *key, uint keylen,
bool presort) bool presort)
{ return (void *)NULL; } { return NULL; }
virtual int ft_read(byte *buf) { return -1; } virtual int ft_read(byte *buf) { return -1; }
virtual int rnd_init(bool scan=1)=0; virtual int rnd_init(bool scan=1)=0;
virtual int rnd_end() { return 0; } virtual int rnd_end() { return 0; }

View file

@ -1903,46 +1903,6 @@ err:
return 0; return 0;
} }
double Item_func_match_nl::val()
{
if (ft_handler==NULL)
init_search(1);
if ((null_value= (ft_handler==NULL)))
return 0.0;
if (join_key)
{
if (table->file->ft_handler)
return ft_nlq_get_relevance(ft_handler);
join_key=0; // Magic here ! See ha_myisam::ft_read()
}
/* we'll have to find ft_relevance manually in ft_handler array */
int a,b,c;
FT_DOC *docs=((FT_DOCLIST *)ft_handler)->doc;
my_off_t docid=table->file->row_position();
if ((null_value=(docid==HA_OFFSET_ERROR)))
return 0.0;
// Assuming docs[] is sorted by dpos...
for (a=0, b=((FT_DOCLIST *)ft_handler)->ndocs, c=(a+b)/2; b-a>1; c=(a+b)/2)
{
if (docs[c].dpos > docid)
b=c;
else
a=c;
}
if (docs[a].dpos == docid)
return docs[a].weight;
else
return 0.0;
}
void Item_func_match::init_search(bool no_order) void Item_func_match::init_search(bool no_order)
{ {
if (ft_handler) if (ft_handler)
@ -2113,6 +2073,75 @@ bool Item_func_match::eq(const Item *item) const
return 0; return 0;
} }
#if 0
double Item_func_match::val()
{
if (ft_handler==NULL)
init_search(1);
if ((null_value= (ft_handler==NULL)))
return 0.0;
if (join_key)
{
if (table->file->ft_handler)
return ft_handler->please->get_relevance(ft_handler);
join_key=0;
}
my_off_t docid=table->file->row_position();
if ((null_value=(docid==HA_OFFSET_ERROR)))
return 0.0;
else
return ft_handler->please->find_relevance(ft_handler, docid);
}
#endif
double Item_func_match_nl::val()
{
if (ft_handler==NULL)
init_search(1);
if ((null_value= (ft_handler==NULL)))
return 0.0;
if (join_key)
{
if (table->file->ft_handler)
return ft_handler->please->get_relevance(ft_handler);
join_key=0;
}
my_off_t docid=table->file->row_position();
if ((null_value=(docid==HA_OFFSET_ERROR)))
return 0.0;
else
return ft_handler->please->find_relevance(ft_handler, docid);
}
double Item_func_match_bool::val()
{
if (ft_handler==NULL)
init_search(1);
if ((null_value= (ft_handler==NULL)))
return 0.0;
if (join_key)
{
if (table->file->ft_handler)
return ft_handler->please->get_relevance(ft_handler);
join_key=0;
}
null_value=1;
return -1.0;
}
/*************************************************************************** /***************************************************************************
System variables System variables

View file

@ -863,7 +863,7 @@ public:
uint key; uint key;
bool join_key; bool join_key;
Item_func_match *master; Item_func_match *master;
void * ft_handler; FT_INFO * ft_handler;
Item_func_match(List<Item> &a, Item *b): Item_real_func(b), Item_func_match(List<Item> &a, Item *b): Item_real_func(b),
fields(a), table(0), join_key(0), master(0), ft_handler(0) {} fields(a), table(0), join_key(0), master(0), ft_handler(0) {}
@ -871,14 +871,13 @@ public:
{ {
if (!master && ft_handler) if (!master && ft_handler)
{ {
ft_handler_close(); ft_handler->please->close_search(ft_handler);
ft_handler=0;
if(join_key) if(join_key)
table->file->ft_handler=0; table->file->ft_handler=0;
} }
} }
virtual int ft_handler_init(const byte *key, uint keylen, bool presort) virtual int ft_handler_init(const byte *key, uint keylen, bool presort) =0;
{ return 1; }
virtual int ft_handler_close() { return 1; }
enum Functype functype() const { return FT_FUNC; } enum Functype functype() const { return FT_FUNC; }
void update_used_tables() {} void update_used_tables() {}
bool fix_fields(THD *thd,struct st_table_list *tlist); bool fix_fields(THD *thd,struct st_table_list *tlist);
@ -896,18 +895,16 @@ public:
const char *func_name() const { return "match_nl"; } const char *func_name() const { return "match_nl"; }
double val(); double val();
int ft_handler_init(const byte *query, uint querylen, bool presort) int ft_handler_init(const byte *query, uint querylen, bool presort)
{ ft_handler=table->file->ft_init_ext(key, query, querylen, presort); } { ft_handler=table->file->ft_init_ext(FT_NL,key, query, querylen, presort); }
int ft_handler_close() { ft_nlq_close_search(ft_handler); ft_handler=0; }
}; };
#if 0
class Item_func_match_bool :public Item_func_match class Item_func_match_bool :public Item_func_match
{ {
public: public:
Item_func_match_nl(List<Item> &a, Item *b): Item_func_match(a,b) {} Item_func_match_bool(List<Item> &a, Item *b): Item_func_match(a,b) {}
const char *func_name() const { return "match_bool"; } const char *func_name() const { return "match_bool"; }
double val(); double val();
int ft_handler_init(const byte *query, uint querylen, bool presort) int ft_handler_init(const byte *query, uint querylen, bool presort)
{ ft_handler=table->file->ft_init_ext(key, query, querylen, presort); } { ft_handler=table->file->ft_init_ext(FT_BOOL,key, query, querylen, presort); }
int ft_handler_close() { ft_close_search(ft_handler); ft_handler=0; }
}; };
#endif

View file

@ -73,6 +73,7 @@ static SYMBOL symbols[] = {
{ "BINLOG", SYM(BINLOG_SYM),0,0}, { "BINLOG", SYM(BINLOG_SYM),0,0},
{ "BLOB", SYM(BLOB_SYM),0,0}, { "BLOB", SYM(BLOB_SYM),0,0},
{ "BOOL", SYM(BOOL_SYM),0,0}, { "BOOL", SYM(BOOL_SYM),0,0},
{ "BOOLEAN", SYM(BOOLEAN_SYM),0,0},
{ "BOTH", SYM(BOTH),0,0}, { "BOTH", SYM(BOTH),0,0},
{ "BY", SYM(BY),0,0}, { "BY", SYM(BY),0,0},
{ "CASCADE", SYM(CASCADE),0,0}, { "CASCADE", SYM(CASCADE),0,0},

View file

@ -23,7 +23,7 @@
#define Lex current_lex #define Lex current_lex
#define Select Lex->select #define Select Lex->select
#include "mysql_priv.h" #include "mysql_priv.h"
#include "slave.h" #include "slave.h"
#include "sql_acl.h" #include "sql_acl.h"
#include "lex_symbol.h" #include "lex_symbol.h"
#include <myisam.h> #include <myisam.h>
@ -149,6 +149,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b,int *yystacksize);
%token BINARY %token BINARY
%token BIT_SYM %token BIT_SYM
%token BOOL_SYM %token BOOL_SYM
%token BOOLEAN_SYM
%token BOTH %token BOTH
%token BY %token BY
%token CASCADE %token CASCADE
@ -499,7 +500,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b,int *yystacksize);
using_list using_list
%type <item_list> %type <item_list>
expr_list udf_expr_list when_list ident_list expr_list udf_expr_list when_list ident_list ident_list_arg
%type <key_type> %type <key_type>
key_type opt_unique_or_fulltext key_type opt_unique_or_fulltext
@ -1547,12 +1548,12 @@ simple_expr:
| '!' expr %prec NEG { $$= new Item_func_not($2); } | '!' expr %prec NEG { $$= new Item_func_not($2); }
| '(' expr ')' { $$= $2; } | '(' expr ')' { $$= $2; }
| '{' ident expr '}' { $$= $3; } | '{' ident expr '}' { $$= $3; }
| MATCH '(' ident_list ')' AGAINST '(' expr ')' | MATCH ident_list_arg AGAINST '(' expr ')'
{ Select->ftfunc_list.push_back( { Select->ftfunc_list.push_back((Item_func_match *)
(Item_func_match *)($$=new Item_func_match_nl(*$3,$7))); } $$=new Item_func_match_nl(*$2,$5)); }
| MATCH ident_list AGAINST '(' expr ')' | MATCH ident_list_arg AGAINST '(' expr IN_SYM BOOLEAN_SYM MODE_SYM ')'
{ Select->ftfunc_list.push_back( { Select->ftfunc_list.push_back((Item_func_match *)
(Item_func_match *)($$=new Item_func_match_nl(*$2,$5))); } $$=new Item_func_match_bool(*$2,$5)); }
| BINARY expr %prec NEG { $$= new Item_func_binary($2); } | BINARY expr %prec NEG { $$= new Item_func_binary($2); }
| CASE_SYM opt_expr WHEN_SYM when_list opt_else END | CASE_SYM opt_expr WHEN_SYM when_list opt_else END
{ $$= new Item_func_case(* $4, $2, $5 ) } { $$= new Item_func_case(* $4, $2, $5 ) }
@ -1798,6 +1799,10 @@ expr_list2:
expr { Select->expr_list.head()->push_back($1); } expr { Select->expr_list.head()->push_back($1); }
| expr_list2 ',' expr { Select->expr_list.head()->push_back($3); } | expr_list2 ',' expr { Select->expr_list.head()->push_back($3); }
ident_list_arg:
ident_list { $$= $1; }
| '(' ident_list ')' { $$= $2; }
ident_list: ident_list:
{ Select->expr_list.push_front(new List<Item>); } { Select->expr_list.push_front(new List<Item>); }
ident_list2 ident_list2
@ -2816,6 +2821,7 @@ keyword:
| BERKELEY_DB_SYM {} | BERKELEY_DB_SYM {}
| BIT_SYM {} | BIT_SYM {}
| BOOL_SYM {} | BOOL_SYM {}
| BOOLEAN_SYM {}
| CHANGED {} | CHANGED {}
| CHECKSUM_SYM {} | CHECKSUM_SYM {}
| CHECK_SYM {} | CHECK_SYM {}