From a902712b93b941eb728ee66a36b5c5136c3331e7 Mon Sep 17 00:00:00 2001
From: "serg@serg.mysql.com" <>
Date: Tue, 7 May 2002 18:27:39 +0000
Subject: [PATCH] "duplicated rows" bug

---
 myisam/ft_boolean_search.c   | 55 ++++++++++++++++++++++++++++--------
 mysql-test/r/fulltext.result |  5 ++++
 mysql-test/t/fulltext.test   |  4 +++
 3 files changed, 52 insertions(+), 12 deletions(-)

diff --git a/myisam/ft_boolean_search.c b/myisam/ft_boolean_search.c
index c3883ed0961..a026a8613dc 100644
--- a/myisam/ft_boolean_search.c
+++ b/myisam/ft_boolean_search.c
@@ -77,7 +77,7 @@ typedef struct st_ftb_word {
   my_off_t  docid[2];             /* for index search and for scan */
   uint      ndepth;
   int       len;
-  /* ... there can be docid cache added here. SerG */
+  /* ... docid cache can be added here. SerG */
   byte      word[1];
 } FTB_WORD;
 
@@ -90,6 +90,7 @@ typedef struct st_ft_info {
   uint       with_scan;
   FTB_EXPR  *root;
   QUEUE      queue;
+  TREE       no_dupes;
   FTB_WORD **list;
   MEM_ROOT   mem_root;
 } FTB;
@@ -174,7 +175,12 @@ void _ftb_parse_query(FTB *ftb, byte **start, byte *end,
   return;
 }
 
-void  _ftb_init_index_search(FT_INFO *ftb)
+static int _ftb_no_dupes_cmp(void* not_used, const void *a,const void *b)
+{
+  return CMP_NUM((*((my_off_t*)a)), (*((my_off_t*)b)));
+}
+
+void _ftb_init_index_search(FT_INFO *ftb)
 {
   int i, r;
   FTB_WORD   *ftbw;
@@ -193,16 +199,31 @@ void  _ftb_init_index_search(FT_INFO *ftb)
   {
     ftbw=(FTB_WORD *)(ftb->queue.root[i]);
 
-    if (ftbw->flags&FTB_FLAG_TRUNC &&
-        (ftbw->up->ythresh > test(ftbw->flags&FTB_FLAG_YES)))
-    {
-      /* no need to search for this prefix in the index -
-       * it cannot ADD new matches, and to REMOVE half-matched
-       * rows we do scan anyway */
-      ftbw->docid[0]=HA_POS_ERROR;
-      ftbw->up->yweaks++;
-      continue;
-    }
+    if (ftbw->flags&FTB_FLAG_TRUNC) /* special treatment :(( */
+      if (ftbw->up->ythresh > test(ftbw->flags&FTB_FLAG_YES))
+      {
+        /* no need to search for this prefix in the index -
+         * it cannot ADD new matches, and to REMOVE half-matched
+         * rows we do scan anyway */
+        ftbw->docid[0]=HA_POS_ERROR;
+        ftbw->up->yweaks++;
+        continue;
+      }
+      else
+      {
+        /* We have to index-search for this prefix.
+         * It may cause duplicates, as in the index (sorted by <word,docid>)
+         *   <aaaa,row1>
+         *   <aabb,row2>
+         *   <aacc,row1>
+         * Searching for "aa*" will find row1 twice...
+         */
+        if (!is_tree_inited(& ftb->no_dupes))
+        {
+          init_tree(& ftb->no_dupes,0,0,sizeof(my_off_t),
+              _ftb_no_dupes_cmp,0,0,0);
+        }
+      }
 
     r=_mi_search(info, keyinfo, (uchar*) ftbw->word, ftbw->len,
                               SEARCH_FIND | SEARCH_BIGGER, keyroot);
@@ -250,6 +271,7 @@ FT_INFO * ft_init_boolean_search(MI_INFO *info, uint keynr, byte *query,
     default_charset_info              :
     info->s->keyinfo[keynr].seg->charset);
   ftb->with_scan=0;
+  bzero(& ftb->no_dupes, sizeof(TREE));
 
   init_alloc_root(&ftb->mem_root, 1024, 1024);
 
@@ -438,6 +460,11 @@ int ft_boolean_read_next(FT_INFO *ftb, char *record)
         ftbe->yesses>=(ftbe->ythresh-ftbe->yweaks) && !ftbe->nos)
     {
       /* curdoc matched ! */
+      if (is_tree_inited(& ftb->no_dupes) &&
+          tree_insert(& ftb->no_dupes, &curdoc, 0)->count >1)
+        /* but it managed to get past this line once */
+        continue;
+
       info->lastpos=curdoc;
       info->update&= (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED); /* why is this ? */
 
@@ -523,6 +550,10 @@ float ft_boolean_find_relevance(FT_INFO *ftb, byte *record, uint length)
 
 void ft_boolean_close_search(FT_INFO *ftb)
 {
+  if (is_tree_inited(& ftb->no_dupes))
+  {
+    delete_tree(& ftb->no_dupes);
+  }
   free_root(& ftb->mem_root, MYF(0));
   my_free((gptr)ftb,MYF(0));
 }
diff --git a/mysql-test/r/fulltext.result b/mysql-test/r/fulltext.result
index e8d9c9aa708..5ee90c38585 100644
--- a/mysql-test/r/fulltext.result
+++ b/mysql-test/r/fulltext.result
@@ -173,4 +173,9 @@ CREATE TABLE t1 (a int(11), b text, FULLTEXT KEY (b)) TYPE=MyISAM;
 insert into t1 values (1,"I wonder why the fulltext index doesnt work?");
 SELECT * from t1 where MATCH (b) AGAINST ('apples');
 a	b
+insert into t1 values (2,"fullaaa fullzzz");
+select * from t1 where match b against ('full*' in boolean mode);
+a	b
+2	fullaaa fullzzz
+1	I wonder why the fulltext index doesnt work?
 drop table t1;
diff --git a/mysql-test/t/fulltext.test b/mysql-test/t/fulltext.test
index 0334910fe6c..a7a40706e89 100644
--- a/mysql-test/t/fulltext.test
+++ b/mysql-test/t/fulltext.test
@@ -145,4 +145,8 @@ CREATE TABLE t1 (a int(11), b text, FULLTEXT KEY (b)) TYPE=MyISAM;
 insert into t1 values (1,"I wonder why the fulltext index doesnt work?");
 SELECT * from t1 where MATCH (b) AGAINST ('apples');
 
+insert into t1 values (2,"fullaaa fullzzz");
+select * from t1 where match b against ('full*' in boolean mode);
+
 drop table t1;
+