bug#3964 and related issues: FTB problems with charsets where one byte can match many

correct prefix compare with my_strnncoll


include/m_ctype.h:
  6th argument to my_strncoll to handle prefix comparison
myisam/ft_boolean_search.c:
  bug#3964 and related issues: problems with charsets where one byte can match many
  *correct* prefix compare with my_strnncoll
  *correct* backup of info->lastkey
mysql-test/r/fulltext.result:
  6th argument to my_strncoll to handle prefix comparison
mysql-test/t/fulltext.test:
  6th argument to my_strncoll to handle prefix comparison
mysys/my_handler.c:
  6th argument to my_strncoll to handle prefix comparison
sql/sql_parse.cc:
  cleanup
strings/ctype-big5.c:
  6th argument to my_strncoll to handle prefix comparison
strings/ctype-bin.c:
  6th argument to my_strncoll to handle prefix comparison
strings/ctype-czech.c:
  6th argument to my_strncoll to handle prefix comparison
strings/ctype-gbk.c:
  6th argument to my_strncoll to handle prefix comparison
strings/ctype-latin1.c:
  6th argument to my_strncoll to handle prefix comparison
strings/ctype-mb.c:
  6th argument to my_strncoll to handle prefix comparison
strings/ctype-simple.c:
  6th argument to my_strncoll to handle prefix comparison
strings/ctype-sjis.c:
  6th argument to my_strncoll to handle prefix comparison
strings/ctype-tis620.c:
  6th argument to my_strncoll to handle prefix comparison
strings/ctype-uca.c:
  6th argument to my_strncoll to handle prefix comparison
strings/ctype-ucs2.c:
  6th argument to my_strncoll to handle prefix comparison
strings/ctype-utf8.c:
  6th argument to my_strncoll to handle prefix comparison
strings/ctype-win1250ch.c:
  6th argument to my_strncoll to handle prefix comparison
This commit is contained in:
unknown 2004-06-10 21:18:57 +02:00
parent 2e8fc0cae2
commit 1e24da548b
19 changed files with 143 additions and 60 deletions

View file

@ -104,7 +104,7 @@ typedef struct my_collation_handler_st
{
/* Collation routines */
int (*strnncoll)(struct charset_info_st *,
const uchar *, uint, const uchar *, uint);
const uchar *, uint, const uchar *, uint, my_bool);
int (*strnncollsp)(struct charset_info_st *,
const uchar *, uint, const uchar *, uint);
int (*strnxfrm)(struct charset_info_st *,
@ -251,7 +251,7 @@ extern CHARSET_INFO my_charset_cp1250_czech_ci;
extern int my_strnxfrm_simple(CHARSET_INFO *, uchar *, uint, const uchar *,
uint);
extern int my_strnncoll_simple(CHARSET_INFO *, const uchar *, uint,
const uchar *, uint);
const uchar *, uint, my_bool);
extern int my_strnncollsp_simple(CHARSET_INFO *, const uchar *, uint,
const uchar *, uint);
@ -385,7 +385,7 @@ extern my_bool my_parse_charset_xml(const char *bug, uint len,
#define my_binary_compare(s) ((s)->state & MY_CS_BINSORT)
#define use_strnxfrm(s) ((s)->state & MY_CS_STRNXFRM)
#define my_strnxfrm(s, a, b, c, d) ((s)->coll->strnxfrm((s), (a), (b), (c), (d)))
#define my_strnncoll(s, a, b, c, d) ((s)->coll->strnncoll((s), (a), (b), (c), (d)))
#define my_strnncoll(s, a, b, c, d) ((s)->coll->strnncoll((s), (a), (b), (c), (d), 0))
#define my_like_range(s, a, b, c, d, e, f, g, h, i, j) \
((s)->coll->like_range((s), (a), (b), (c), (d), (e), (f), (g), (h), (i), (j)))
#define my_wildcmp(cs,s,se,w,we,e,o,m) ((cs)->coll->wildcmp((cs),(s),(se),(w),(we),(e),(o),(m)))

View file

@ -53,10 +53,10 @@ static double _nwghts[11]=
-3.796875000000000};
static double *nwghts=_nwghts+5; /* nwghts[i] = -0.5*1.5**i */
#define FTB_FLAG_TRUNC 1 /* MUST be 1 */
#define FTB_FLAG_TRUNC 1 /* */
#define FTB_FLAG_YES 2 /* no two from these three */
#define FTB_FLAG_NO 4 /* YES, NO, WONLY */
#define FTB_FLAG_WONLY 8 /* should be ever set both */
#define FTB_FLAG_WONLY 8 /* should be _ever_ set both */
typedef struct st_ftb_expr FTB_EXPR;
struct st_ftb_expr
@ -157,6 +157,7 @@ static void _ftb_parse_query(FTB *ftb, byte **start, byte *end,
w.len+extra));
ftbw->len=w.len+1;
ftbw->flags=0;
ftbw->off=0;
if (param.yesno>0) ftbw->flags|=FTB_FLAG_YES;
if (param.yesno<0) ftbw->flags|=FTB_FLAG_NO;
if (param.trunc) ftbw->flags|=FTB_FLAG_TRUNC;
@ -203,23 +204,26 @@ static int _ftb_no_dupes_cmp(void* not_used __attribute__((unused)),
static int _ft2_search(FTB *ftb, FTB_WORD *ftbw, my_bool init_search)
{
int r;
uint off;
int subkeys=1;
my_bool can_go_down;
MI_INFO *info=ftb->info;
uint off, extra=HA_FT_WLEN+info->s->base.rec_reflength;
byte *lastkey_buf=ftbw->word+ftbw->off;
if (ftbw->flags & FTB_FLAG_TRUNC)
lastkey_buf+=ftbw->len;
if (init_search)
{
ftbw->key_root=info->s->state.key_root[ftb->keynr];
ftbw->keyinfo=info->s->keyinfo+ftb->keynr;
ftbw->off=0;
r=_mi_search(info, ftbw->keyinfo, (uchar*) ftbw->word, ftbw->len,
SEARCH_FIND | SEARCH_BIGGER, ftbw->key_root);
}
else
{
r=_mi_search(info, ftbw->keyinfo, (uchar*) ftbw->word+ftbw->off,
r=_mi_search(info, ftbw->keyinfo, (uchar*) lastkey_buf,
USE_WHOLE_KEY, SEARCH_BIGGER, ftbw->key_root);
}
@ -230,7 +234,7 @@ static int _ft2_search(FTB *ftb, FTB_WORD *ftbw, my_bool init_search)
if (can_go_down)
{
/* going down ? */
off=info->lastkey_length-HA_FT_WLEN-info->s->base.rec_reflength;
off=info->lastkey_length-extra;
subkeys=ft_sintXkorr(info->lastkey+off);
}
if (subkeys<0 || info->lastpos < info->state->data_file_length)
@ -243,11 +247,11 @@ static int _ft2_search(FTB *ftb, FTB_WORD *ftbw, my_bool init_search)
if (!r && !ftbw->off)
{
r= mi_compare_text(ftb->charset,
info->lastkey + (ftbw->flags & FTB_FLAG_TRUNC),
ftbw->len - (ftbw->flags & FTB_FLAG_TRUNC),
(uchar*) ftbw->word + (ftbw->flags & FTB_FLAG_TRUNC),
ftbw->len - (ftbw->flags & FTB_FLAG_TRUNC),
0,0);
info->lastkey+1,
info->lastkey_length-extra-1,
(uchar*) ftbw->word+1,
ftbw->len-1,
(my_bool) (ftbw->flags & FTB_FLAG_TRUNC),0);
}
if (r) /* not found */
@ -269,8 +273,7 @@ static int _ft2_search(FTB *ftb, FTB_WORD *ftbw, my_bool init_search)
}
/* going up to the first-level tree to continue search there */
_mi_dpointer(info, (uchar*) (ftbw->word+ftbw->off+HA_FT_WLEN),
ftbw->key_root);
_mi_dpointer(info, (uchar*) (lastkey_buf+HA_FT_WLEN), ftbw->key_root);
ftbw->key_root=info->s->state.key_root[ftb->keynr];
ftbw->keyinfo=info->s->keyinfo+ftb->keynr;
ftbw->off=0;
@ -278,7 +281,10 @@ static int _ft2_search(FTB *ftb, FTB_WORD *ftbw, my_bool init_search)
}
/* matching key found */
memcpy(ftbw->word+ftbw->off, info->lastkey, info->lastkey_length);
memcpy(lastkey_buf, info->lastkey, info->lastkey_length);
if (lastkey_buf == ftbw->word)
ftbw->len=info->lastkey_length-extra;
/* going down ? */
if (subkeys<0)
{
@ -291,7 +297,7 @@ static int _ft2_search(FTB *ftb, FTB_WORD *ftbw, my_bool init_search)
ftbw->keyinfo=& info->s->ft2_keyinfo;
r=_mi_search_first(info, ftbw->keyinfo, ftbw->key_root);
DBUG_ASSERT(r==0); /* found something */
memcpy(ftbw->word+off, info->lastkey, info->lastkey_length);
memcpy(lastkey_buf+off, info->lastkey, info->lastkey_length);
}
ftbw->docid[0]=info->lastpos;
return 0;
@ -356,7 +362,7 @@ static void _ftb_init_index_search(FT_INFO *ftb)
else
reset_tree(& ftb->no_dupes);
}
if (_ft2_search(ftb, ftbw, 1))
return;
}

View file

@ -363,3 +363,14 @@ SELECT t, collation(t) FROM t1 WHERE MATCH t AGAINST ('Osnabrueck');
t collation(t)
aus Osnabrück latin1_german2_ci
DROP TABLE t1;
CREATE TABLE t1 (s varchar(255), FULLTEXT (s)) DEFAULT CHARSET=utf8;
insert into t1 (s) values ('pära para para'),('para para para');
select * from t1 where match(s) against('para' in boolean mode);
s
pära para para
para para para
select * from t1 where match(s) against('par*' in boolean mode);
s
pära para para
para para para
DROP TABLE t1;

View file

@ -279,3 +279,12 @@ SELECT t, collation(t) FROM t1 WHERE MATCH t AGAINST ('Osnabr
SELECT t, collation(t) FROM t1 WHERE MATCH t AGAINST ('Osnabrueck');
DROP TABLE t1;
#
# bug#3964
#
CREATE TABLE t1 (s varchar(255), FULLTEXT (s)) DEFAULT CHARSET=utf8;
insert into t1 (s) values ('pära para para'),('para para para');
select * from t1 where match(s) against('para' in boolean mode);
select * from t1 where match(s) against('par*' in boolean mode);
DROP TABLE t1;

View file

@ -21,13 +21,11 @@ int mi_compare_text(CHARSET_INFO *charset_info, uchar *a, uint a_length,
uchar *b, uint b_length, my_bool part_key,
my_bool skip_end_space)
{
if (part_key && b_length < a_length)
a_length=b_length;
if (skip_end_space)
return charset_info->coll->strnncollsp(charset_info, a, a_length,
b, b_length);
return charset_info->coll->strnncoll(charset_info, a, a_length,
b, b_length);
b, b_length, part_key);
}

View file

@ -1191,7 +1191,7 @@ int mysql_table_dump(THD* thd, char* db, char* tbl_name, int fd)
}
net_flush(&thd->net);
if ((error= table->file->dump(thd,fd)))
my_error(ER_GET_ERRNO, MYF(0));
my_error(ER_GET_ERRNO, MYF(0), error);
err:
close_thread_tables(thd);

View file

@ -251,11 +251,12 @@ static int my_strnncoll_big5_internal(const uchar **a_res,
static int my_strnncoll_big5(CHARSET_INFO *cs __attribute__((unused)),
const uchar *a, uint a_length,
const uchar *b, uint b_length)
const uchar *b, uint b_length,
my_bool b_is_prefix)
{
uint length= min(a_length, b_length);
int res= my_strnncoll_big5_internal(&a, &b, length);
return res ? res : (int) (a_length - b_length);
return res ? res : (int)((b_is_prefix ? length : a_length) - b_length);
}

View file

@ -91,10 +91,20 @@ static uchar bin_char_array[] =
static int my_strnncoll_binary(CHARSET_INFO * cs __attribute__((unused)),
const uchar *s, uint slen,
const uchar *t, uint tlen)
const uchar *t, uint tlen,
my_bool t_is_prefix)
{
int cmp= memcmp(s,t,min(slen,tlen));
return cmp ? cmp : (int) (slen - tlen);
uint len=min(slen,tlen);
int cmp= memcmp(s,t,len);
return cmp ? cmp : (int)((t_is_prefix ? len : slen) - tlen);
}
static int my_strnncollsp_binary(CHARSET_INFO * cs __attribute__((unused)),
const uchar *s, uint slen,
const uchar *t, uint tlen)
{
return my_strnncoll_binary(cs,s,slen,t,tlen,0);
}
@ -334,7 +344,7 @@ skip:
MY_COLLATION_HANDLER my_collation_8bit_bin_handler =
{
my_strnncoll_binary,
my_strnncoll_binary,
my_strnncollsp_binary,
my_strnxfrm_bin,
my_like_range_simple,
my_wildcmp_bin,

View file

@ -242,12 +242,16 @@ while (1) \
static int my_strnncoll_czech(CHARSET_INFO *cs __attribute__((unused)),
const uchar * s1, uint len1,
const uchar * s2, uint len2)
const uchar * s2, uint len2,
my_bool s2_is_prefix)
{
int v1, v2;
const uchar * p1, * p2, * store1, * store2;
int pass1 = 0, pass2 = 0;
if (s2_is_prefix && len1 > len2)
len1=len2;
p1 = s1; p2 = s2;
store1 = s1; store2 = s2;
@ -276,7 +280,7 @@ int my_strnncollsp_czech(CHARSET_INFO * cs,
{
for ( ; slen && s[slen-1] == ' ' ; slen--);
for ( ; tlen && t[tlen-1] == ' ' ; tlen--);
return my_strnncoll_czech(cs,s,slen,t,tlen);
return my_strnncoll_czech(cs,s,slen,t,tlen,0);
}

View file

@ -2614,11 +2614,12 @@ int my_strnncoll_gbk_internal(const uchar **a_res, const uchar **b_res,
int my_strnncoll_gbk(CHARSET_INFO *cs __attribute__((unused)),
const uchar *a, uint a_length,
const uchar *b, uint b_length)
const uchar *b, uint b_length,
my_bool b_is_prefix)
{
uint length= min(a_length, b_length);
int res= my_strnncoll_gbk_internal(&a, &b, length);
return res ? res : (int) (a_length - b_length);
return res ? res : (int) ((b_is_prefix ? length : a_length) - b_length);
}

View file

@ -525,7 +525,8 @@ uchar combo2map[]={
static int my_strnncoll_latin1_de(CHARSET_INFO *cs __attribute__((unused)),
const uchar *a, uint a_length,
const uchar *b, uint b_length)
const uchar *b, uint b_length,
my_bool b_is_prefix)
{
const uchar *a_end= a + a_length;
const uchar *b_end= b + b_length;
@ -558,7 +559,7 @@ static int my_strnncoll_latin1_de(CHARSET_INFO *cs __attribute__((unused)),
A simple test of string lengths won't work -- we test to see
which string ran out first
*/
return ((a < a_end || a_extend) ? 1 :
return ((a < a_end || a_extend) ? (b_is_prefix ? 0 : 1) :
(b < b_end || b_extend) ? -1 : 0);
}

View file

@ -322,7 +322,7 @@ uint my_instr_mb(CHARSET_INFO *cs,
int mblen;
if (!cs->coll->strnncoll(cs, (unsigned char*) b, s_length,
(unsigned char*) s, s_length))
(unsigned char*) s, s_length, 0))
{
if (nmatch)
{
@ -352,10 +352,19 @@ uint my_instr_mb(CHARSET_INFO *cs,
static int my_strnncoll_mb_bin(CHARSET_INFO * cs __attribute__((unused)),
const uchar *s, uint slen,
const uchar *t, uint tlen)
const uchar *t, uint tlen,
my_bool t_is_prefix)
{
int cmp= memcmp(s,t,min(slen,tlen));
return cmp ? cmp : (int) (slen - tlen);
uint len=min(slen,tlen);
int cmp= memcmp(s,t,len);
return cmp ? cmp : (int) ((t_is_prefix ? len : slen) - tlen);
}
static int my_strnncollsp_mb_bin(CHARSET_INFO * cs __attribute__((unused)),
const uchar *s, uint slen,
const uchar *t, uint tlen)
{
return my_strnncoll_mb_bin(cs,s,slen,t,tlen,0);
}
@ -513,7 +522,7 @@ static int my_wildcmp_mb_bin(CHARSET_INFO *cs,
MY_COLLATION_HANDLER my_collation_mb_bin_handler =
{
my_strnncoll_mb_bin,
my_strnncoll_mb_bin,
my_strnncollsp_mb_bin,
my_strnxfrm_mb_bin,
my_like_range_simple,
my_wildcmp_mb_bin,

View file

@ -47,16 +47,19 @@ int my_strnxfrm_simple(CHARSET_INFO * cs,
}
int my_strnncoll_simple(CHARSET_INFO * cs, const uchar *s, uint slen,
const uchar *t, uint tlen)
const uchar *t, uint tlen,
my_bool t_is_prefix)
{
int len = ( slen > tlen ) ? tlen : slen;
uchar *map= cs->sort_order;
if (t_is_prefix && slen > tlen)
slen=tlen;
while (len--)
{
if (map[*s++] != map[*t++])
return ((int) map[s[-1]] - (int) map[t[-1]]);
}
return (int) (slen-tlen);
return (int) (slen - tlen);
}

View file

@ -232,9 +232,12 @@ static int my_strnncoll_sjis_internal(CHARSET_INFO *cs,
static int my_strnncoll_sjis(CHARSET_INFO *cs __attribute__((unused)),
const uchar *a, uint a_length,
const uchar *b, uint b_length)
const uchar *b, uint b_length,
my_bool b_is_prefix)
{
int res= my_strnncoll_sjis_internal(cs, &a, a_length, &b, b_length);
if (b_is_prefix && a_length > b_length)
a_length= b_length;
return res ? res : (int) (a_length - b_length);
}

View file

@ -529,12 +529,16 @@ static uint thai2sortable(uchar *tstr, uint len)
static
int my_strnncoll_tis620(CHARSET_INFO *cs __attribute__((unused)),
const uchar * s1, uint len1,
const uchar * s2, uint len2)
const uchar * s2, uint len2,
my_bool s2_is_prefix)
{
uchar buf[80] ;
uchar *tc1, *tc2;
int i;
if (s2_is_prefix && len1 > len2)
len1= len2;
tc1= buf;
if ((len1 + len2 +2) > (int) sizeof(buf))
tc1= (uchar*) malloc(len1+len2);

View file

@ -6704,7 +6704,8 @@ implicit:
static int my_strnncoll_uca(CHARSET_INFO *cs,
const uchar *s, uint slen,
const uchar *t, uint tlen)
const uchar *t, uint tlen,
my_bool t_is_prefix)
{
my_uca_scanner sscanner;
my_uca_scanner tscanner;
@ -6720,7 +6721,7 @@ static int my_strnncoll_uca(CHARSET_INFO *cs,
t_res= my_uca_scanner_next(&tscanner);
} while ( s_res == t_res && s_res >0);
return ( s_res - t_res );
return (t_is_prefix && t_res < 0) ? 0 : (s_res - t_res);
}

View file

@ -182,7 +182,8 @@ static void my_casedn_str_ucs2(CHARSET_INFO *cs __attribute__((unused)),
static int my_strnncoll_ucs2(CHARSET_INFO *cs,
const uchar *s, uint slen,
const uchar *t, uint tlen)
const uchar *t, uint tlen,
my_bool t_is_prefix)
{
int s_res,t_res;
my_wc_t s_wc,t_wc;
@ -213,7 +214,14 @@ static int my_strnncoll_ucs2(CHARSET_INFO *cs,
s+=s_res;
t+=t_res;
}
return ( (se-s) - (te-t) );
return t_is_prefix ? t-te : ((se-s) - (te-t));
}
static int my_strnncollsp_ucs2(CHARSET_INFO *cs,
const uchar *s, uint slen,
const uchar *t, uint tlen)
{
return my_strnncoll_ucs2(cs,s,slen,t,tlen,0);
}
@ -1224,8 +1232,9 @@ int my_wildcmp_ucs2_bin(CHARSET_INFO *cs,
static
int my_strnncoll_ucs2_bin(CHARSET_INFO *cs,
const uchar *s, uint slen,
const uchar *t, uint tlen)
const uchar *s, uint slen,
const uchar *t, uint tlen,
my_bool t_is_prefix)
{
int s_res,t_res;
my_wc_t s_wc,t_wc;
@ -1250,7 +1259,14 @@ int my_strnncoll_ucs2_bin(CHARSET_INFO *cs,
s+=s_res;
t+=t_res;
}
return ( (se-s) - (te-t) );
return t_is_prefix ? t-te : ((se-s) - (te-t));
}
static int my_strnncollsp_ucs2_bin(CHARSET_INFO *cs,
const uchar *s, uint slen,
const uchar *t, uint tlen)
{
return my_strnncoll_ucs2_bin(cs,s,slen,t,tlen,0);
}
@ -1374,7 +1390,7 @@ my_bool my_like_range_ucs2(CHARSET_INFO *cs,
static MY_COLLATION_HANDLER my_collation_ucs2_general_ci_handler =
{
my_strnncoll_ucs2,
my_strnncoll_ucs2,
my_strnncollsp_ucs2,
my_strnxfrm_ucs2,
my_like_range_ucs2,
my_wildcmp_ucs2_ci,
@ -1387,7 +1403,7 @@ static MY_COLLATION_HANDLER my_collation_ucs2_general_ci_handler =
static MY_COLLATION_HANDLER my_collation_ucs2_bin_handler =
{
my_strnncoll_ucs2_bin,
my_strnncoll_ucs2_bin,
my_strnncollsp_ucs2_bin,
my_strnxfrm_ucs2_bin,
my_like_range_simple,
my_wildcmp_ucs2_bin,

View file

@ -1802,7 +1802,8 @@ static void my_casedn_str_utf8(CHARSET_INFO *cs, char * s)
static int my_strnncoll_utf8(CHARSET_INFO *cs,
const uchar *s, uint slen,
const uchar *t, uint tlen)
const uchar *t, uint tlen,
my_bool t_is_prefix)
{
int s_res,t_res;
my_wc_t s_wc,t_wc;
@ -1833,7 +1834,7 @@ static int my_strnncoll_utf8(CHARSET_INFO *cs,
s+=s_res;
t+=t_res;
}
return ( (se-s) - (te-t) );
return t_is_prefix ? t-te : ((se-s) - (te-t));
}

View file

@ -448,20 +448,25 @@ static struct wordvalue doubles[] = {
static int my_strnncoll_win1250ch(CHARSET_INFO *cs __attribute__((unused)),
const uchar * s1, uint len1,
const uchar * s2, uint len2)
const uchar * s2, uint len2,
my_bool s2_is_prefix)
{
int v1, v2;
const uchar * p1, * p2;
int pass1 = 0, pass2 = 0;
int diff;
if (s2_is_prefix && len1 > len2)
len1=len2;
p1 = s1; p2 = s2;
do {
do
{
NEXT_CMP_VALUE(s1, p1, pass1, v1, (int)len1);
NEXT_CMP_VALUE(s2, p2, pass2, v2, (int)len2);
diff = v1 - v2;
if (diff != 0) return diff;
if ((diff = v1 - v2))
return diff;
} while (v1);
return 0;
}
@ -478,7 +483,7 @@ int my_strnncollsp_win1250ch(CHARSET_INFO * cs,
{
for ( ; slen && s[slen-1] == ' ' ; slen--);
for ( ; tlen && t[tlen-1] == ' ' ; tlen--);
return my_strnncoll_win1250ch(cs,s,slen,t,tlen);
return my_strnncoll_win1250ch(cs,s,slen,t,tlen,0);
}