FULLTEXT: correct charset support (UTF included, UCS2 - not)

code cleanup


include/m_ctype.h:
  my_mbcharlen_8bit() { return 1 }
mysql-test/r/fulltext.result:
  fulltext on UTF
mysql-test/t/fulltext.test:
  fulltext on UTF
sql/item_cmpfunc.h:
  cleanup
sql/sql_table.cc:
  FULLTEXT: UCS2 is not allowed
sql/sql_yacc.yy:
  FULLTEXT: code cleanup
strings/ctype-bin.c:
  my_mbcharlen_8bit() { return 1 }
strings/ctype-latin1.c:
  my_mbcharlen_8bit() { return 1 }
strings/ctype-simple.c:
  my_mbcharlen_8bit() { return 1 }
strings/ctype-tis620.c:
  my_mbcharlen_8bit() { return 1 }
strings/ctype-utf8.c:
  hack: (to be fixed properly later) all multi-byte sequences are considered isalpha() now
This commit is contained in:
unknown 2003-10-20 15:53:48 +02:00
parent 4a253d2af0
commit 228f4a43a3
13 changed files with 121 additions and 86 deletions

View file

@ -310,6 +310,7 @@ int my_wildcmp_8bit(CHARSET_INFO *,
uint my_numchars_8bit(CHARSET_INFO *, const char *b, const char *e);
uint my_charpos_8bit(CHARSET_INFO *, const char *b, const char *e, uint pos);
int my_mbcharlen_8bit(CHARSET_INFO *, uint c);
/* Functions for multibyte charsets */

View file

@ -279,3 +279,24 @@ select * from t1 join t2 using(`t1_id`) where match (t1.name, t2.name) against('
t1_id name t2_id t1_id name
1 data1 1 1 xxfoo
drop table t1,t2;
SET NAMES latin1;
CREATE TABLE t1 (t text character set utf8 not null, fulltext(t));
INSERT t1 VALUES ('Mit freundlichem Grüß'), ('aus Osnabrück');
SET NAMES koi8r;
INSERT t1 VALUES ("üÔÏ ÍÙ - ÏÐÉÌËÉ"),("ïÔÌÅÚØ, ÇÎÉÄÁ!"),
("îÅ ×ÌÅÚÁÊ, ÕÂØÅÔ!"),("É ÂÕÄÅÔ ÐÒÁ×!");
SELECT t, charset(t) FROM t1 WHERE MATCH t AGAINST ('ïðéìëé');
t charset(t)
üÔÏ ÍÙ - ÏÐÉÌËÉ utf8
SELECT t, charset(t) FROM t1 WHERE MATCH t AGAINST ('ðÒá*' IN BOOLEAN MODE);
t charset(t)
É ÂÕÄÅÔ ÐÒÁ×! utf8
SELECT * FROM t1 WHERE MATCH t AGAINST ('ÜÔÏ' IN BOOLEAN MODE);
t
SELECT t, charset(t) FROM t1 WHERE MATCH t AGAINST ('Osnabrück');
t charset(t)
SET NAMES latin1;
SELECT t, charset(t) FROM t1 WHERE MATCH t AGAINST ('Osnabrück');
t charset(t)
aus Osnabrück utf8
DROP TABLE t1;

View file

@ -226,3 +226,21 @@ insert into t2 values (2, 1, 'xxbar');
insert into t2 values (3, 1, 'xxbuz');
select * from t1 join t2 using(`t1_id`) where match (t1.name, t2.name) against('xxfoo' in boolean mode);
drop table t1,t2;
#
# UTF8
#
SET NAMES latin1;
CREATE TABLE t1 (t text character set utf8 not null, fulltext(t));
INSERT t1 VALUES ('Mit freundlichem Grüß'), ('aus Osnabrück');
SET NAMES koi8r;
INSERT t1 VALUES ("üÔÏ ÍÙ - ÏÐÉÌËÉ"),("ïÔÌÅÚØ, ÇÎÉÄÁ!"),
("îÅ ×ÌÅÚÁÊ, ÕÂØÅÔ!"),("É ÂÕÄÅÔ ÐÒÁ×!");
SELECT t, charset(t) FROM t1 WHERE MATCH t AGAINST ('ïðéìëé');
SELECT t, charset(t) FROM t1 WHERE MATCH t AGAINST ('ðÒá*' IN BOOLEAN MODE);
SELECT * FROM t1 WHERE MATCH t AGAINST ('ÜÔÏ' IN BOOLEAN MODE);
SELECT t, charset(t) FROM t1 WHERE MATCH t AGAINST ('Osnabrück');
SET NAMES latin1;
SELECT t, charset(t) FROM t1 WHERE MATCH t AGAINST ('Osnabrück');
DROP TABLE t1;

View file

@ -647,7 +647,6 @@ class Item_func_in :public Item_int_func
~Item_func_in() { delete array; delete in_item; }
optimize_type select_optimize() const
{ return array ? OPTIMIZE_KEY : OPTIMIZE_NONE; }
Item *key_item() const { return args[0]; }
void print(String *str);
enum Functype functype() const { return IN_FUNC; }
const char *func_name() const { return " IN "; }

View file

@ -81,12 +81,12 @@ bool Item_func::agg_arg_collations(DTCollation &c, Item **av, uint count)
}
bool Item_func::agg_arg_collations_for_comparison(DTCollation &c,
bool Item_func::agg_arg_collations_for_comparison(DTCollation &c,
Item **av, uint count)
{
if (agg_arg_collations(c, av, count))
return TRUE;
if (c.derivation == DERIVATION_NONE)
{
my_coll_agg_error(av, count, func_name());
@ -211,7 +211,7 @@ Item_func::fix_fields(THD *thd, TABLE_LIST *tables, Item **ref)
item= *arg;
if (item->maybe_null)
maybe_null=1;
with_sum_func= with_sum_func || item->with_sum_func;
used_tables_cache|= item->used_tables();
not_null_tables_cache|= item->not_null_tables();
@ -2545,9 +2545,13 @@ void Item_func_match::init_search(bool no_order)
DBUG_VOID_RETURN;
if (key == NO_SUCH_KEY)
{
List<Item> fields;
for (uint i=1; i < arg_count; i++)
fields.push_back(args[i]);
concat=new Item_func_concat_ws(new Item_string(" ",1,
default_charset_info),
fields);
cmp_collation.collation), fields);
}
if (master)
{
@ -2559,14 +2563,19 @@ void Item_func_match::init_search(bool no_order)
}
String *ft_tmp= 0;
char tmp1[FT_QUERY_MAXLEN];
String tmp2(tmp1,sizeof(tmp1),default_charset_info);
// MATCH ... AGAINST (NULL) is meaningless, but possible
if (!(ft_tmp=key_item()->val_str(&tmp2)))
if (!(ft_tmp=key_item()->val_str(&value)))
{
ft_tmp= &tmp2;
tmp2.set("",0,default_charset_info);
ft_tmp= &value;
value.set("",0,cmp_collation.collation);
}
if (ft_tmp->charset() != cmp_collation.collation)
{
search_value.copy(ft_tmp->ptr(), ft_tmp->length(), ft_tmp->charset(),
cmp_collation.collation);
ft_tmp= &search_value;
}
ft_handler=table->file->ft_init_ext(mode, key,
@ -2583,7 +2592,6 @@ void Item_func_match::init_search(bool no_order)
bool Item_func_match::fix_fields(THD *thd, TABLE_LIST *tlist, Item **ref)
{
List_iterator<Item> li(fields);
Item *item;
maybe_null=1;
@ -2595,51 +2603,37 @@ bool Item_func_match::fix_fields(THD *thd, TABLE_LIST *tlist, Item **ref)
modifications to find_best and auto_close as complement to auto_init code
above.
*/
if (Item_func::fix_fields(thd, tlist, ref) || !const_item())
if (Item_func::fix_fields(thd, tlist, ref) || !args[0]->const_item())
{
my_error(ER_WRONG_ARGUMENTS,MYF(0),"AGAINST");
return 1;
}
while ((item=li++))
const_item_cache=0;
for (uint i=1 ; i < arg_count ; i++)
{
if (item->fix_fields(thd, tlist, li.ref()) || item->check_cols(1))
return 1;
item=args[i];
if (item->type() == Item::REF_ITEM)
li.replace(item= *((Item_ref *)item)->ref);
if (item->type() != Item::FIELD_ITEM || !item->used_tables())
args[i]= item= *((Item_ref *)item)->ref;
if (item->type() != Item::FIELD_ITEM)
key=NO_SUCH_KEY;
used_tables_cache|=item->used_tables();
}
/* check that all columns come from the same table */
if (my_count_bits(used_tables_cache) != 1)
key=NO_SUCH_KEY;
const_item_cache=0;
table=((Item_field *)fields.head())->field->table;
table->fulltext_searched=1;
record=table->record[0];
if (key == NO_SUCH_KEY && mode != FT_BOOL)
{
my_error(ER_WRONG_ARGUMENTS,MYF(0),"MATCH");
return 1;
}
return 0;
}
bool Item_func_match::walk(Item_processor processor, byte *arg)
{
List_iterator_fast<Item> li(fields);
Item *item;
while ((item= li++))
if (item->walk(processor, arg))
return 1;
return Item_func::walk(processor, arg);
table=((Item_field *)item)->field->table;
table->fulltext_searched=1;
return agg_arg_collations_for_comparison(cmp_collation, args+1, arg_count-1);
}
bool Item_func_match::fix_index()
{
List_iterator_fast<Item> li(fields);
Item_field *item;
uint ft_to_key[MAX_KEY], ft_cnt[MAX_KEY], fts=0, keynr;
uint max_cnt=0, mkeys=0;
@ -2661,8 +2655,9 @@ bool Item_func_match::fix_index()
if (!fts)
goto err;
while ((item=(Item_field*)(li++)))
for (uint i=1; i < arg_count; i++)
{
item=(Item_field*)args[i];
for (keynr=0 ; keynr < fts ; keynr++)
{
KEY *ft_key=&table->key_info[ft_to_key[keynr]];
@ -2696,8 +2691,8 @@ bool Item_func_match::fix_index()
for (keynr=0 ; keynr <= mkeys ; keynr++)
{
// for now, partial keys won't work. SerG
if (max_cnt < fields.elements ||
// partial keys doesn't work
if (max_cnt < arg_count-1 ||
max_cnt < table->key_info[ft_to_key[keynr]].key_parts)
continue;
@ -2712,8 +2707,7 @@ err:
key=NO_SUCH_KEY;
return 0;
}
my_printf_error(ER_FT_MATCHING_KEY_NOT_FOUND,
ER(ER_FT_MATCHING_KEY_NOT_FOUND),MYF(0));
my_error(ER_FT_MATCHING_KEY_NOT_FOUND,MYF(0));
return 1;
}
@ -2759,7 +2753,8 @@ double Item_func_match::val()
(byte *)a->ptr(), a->length()));
}
else
DBUG_RETURN(ft_handler->please->find_relevance(ft_handler, record, 0));
DBUG_RETURN(ft_handler->please->find_relevance(ft_handler,
table->record[0], 0));
}

View file

@ -962,20 +962,18 @@ public:
class Item_func_match :public Item_real_func
{
public:
List<Item> fields;
String value;
TABLE *table;
Item_func_match *master;
FT_INFO * ft_handler;
Item *concat;
byte *record;
uint key, mode;
bool join_key;
DTCollation cmp_collation;
FT_INFO *ft_handler;
TABLE *table;
Item_func_match *master; // for master-slave optimization
Item *concat; // Item_func_concat_ws
String value; // value of concat
String search_value; // key_item()'s value converted to cmp_collation
Item_func_match(List<Item> &a, Item *b): Item_real_func(b),
fields(a), table(0), master(0), ft_handler(0),
concat(0), key(0), join_key(0)
{}
Item_func_match(List<Item> &a): Item_real_func(a),
table(0), master(0), ft_handler(0), concat(0), key(0), join_key(0) { }
~Item_func_match()
{
if (!master && ft_handler)
@ -999,17 +997,13 @@ public:
bool fix_index();
void init_search(bool no_order);
bool walk(Item_processor processor, byte *arg);
};
class Item_func_match_nl :public Item_func_match
{
public:
Item_func_match_nl(List<Item> &a, Item *b)
:Item_func_match(a,b)
{ mode=FT_NL; }
Item_func_match_nl(List<Item> &a) :Item_func_match(a) { mode=FT_NL; }
const char *func_name() const { return "match_nl"; }
};
@ -1017,9 +1011,7 @@ public:
class Item_func_match_bool :public Item_func_match
{
public:
Item_func_match_bool(List<Item> &a, Item *b)
:Item_func_match(a,b)
{ mode=FT_BOOL; }
Item_func_match_bool(List<Item> &a) :Item_func_match(a) { mode=FT_BOOL; }
const char *func_name() const { return "match_bool"; }
};

View file

@ -733,6 +733,7 @@ int mysql_create_table(THD *thd,const char *db, const char *table_name,
sql_field->sql_type != FIELD_TYPE_VAR_STRING &&
!f_is_blob(sql_field->pack_flag)) ||
sql_field->charset == &my_charset_bin ||
sql_field->charset->state & MY_CS_NONTEXT || // ucs2 doesn't work yet
(ft_key_charset && sql_field->charset != ft_key_charset))
{
my_printf_error(ER_BAD_FT_COLUMN,ER(ER_BAD_FT_COLUMN),MYF(0),

View file

@ -2446,11 +2446,13 @@ simple_expr:
| singlerow_subselect { $$= $1; }
| '{' ident expr '}' { $$= $3; }
| MATCH ident_list_arg AGAINST '(' expr ')'
{ Select->add_ftfunc_to_list((Item_func_match *)
($$=new Item_func_match_nl(*$2,$5))); }
{ $2->push_front($5);
Select->add_ftfunc_to_list((Item_func_match *)
($$=new Item_func_match_nl(*$2))); }
| MATCH ident_list_arg AGAINST '(' expr IN_SYM BOOLEAN_SYM MODE_SYM ')'
{ Select->add_ftfunc_to_list((Item_func_match *)
($$=new Item_func_match_bool(*$2,$5))); }
{ $2->push_front($5);
Select->add_ftfunc_to_list((Item_func_match *)
($$=new Item_func_match_bool(*$2))); }
| ASCII_SYM '(' expr ')' { $$= new Item_func_ascii($3); }
| BINARY expr %prec NEG
{
@ -2458,10 +2460,10 @@ simple_expr:
6, &my_charset_latin1));
}
| CAST_SYM '(' expr AS cast_type ')'
{
$$= create_func_cast($3, $5,
{
$$= create_func_cast($3, $5,
Lex->length ? atoi(Lex->length) : -1,
Lex->charset);
Lex->charset);
}
| CASE_SYM opt_expr WHEN_SYM when_list opt_else END
{ $$= new Item_func_case(* $4, $2, $5 ); }
@ -2962,7 +2964,7 @@ ident_list2:
opt_expr:
/* empty */ { $$= NULL; }
| expr { $$= $1; };
| expr { $$= $1; };
opt_else:
/* empty */ { $$= NULL; }

View file

@ -118,6 +118,12 @@ static int my_strcasecmp_bin(CHARSET_INFO * cs __attribute__((unused)),
return strcmp(s,t);
}
int my_mbcharlen_8bit(CHARSET_INFO *cs __attribute__((unused)),
uint c __attribute__((unused)))
{
return 1;
}
static int my_mb_wc_bin(CHARSET_INFO *cs __attribute__((unused)),
my_wc_t *wc,
const unsigned char *str,
@ -264,12 +270,12 @@ static int my_strnxfrm_bin(CHARSET_INFO *cs __attribute__((unused)),
static
uint my_instr_bin(CHARSET_INFO *cs __attribute__((unused)),
const char *big, uint b_length,
const char *big, uint b_length,
const char *small, uint s_length,
my_match_t *match, uint nmatch)
{
register const uchar *str, *search, *end, *search_end;
if (s_length <= b_length)
{
if (!s_length)
@ -282,32 +288,32 @@ uint my_instr_bin(CHARSET_INFO *cs __attribute__((unused)),
}
return 1; /* Empty string is always found */
}
str= (const uchar*) big;
search= (const uchar*) small;
end= (const uchar*) big+b_length-s_length+1;
search_end= (const uchar*) small + s_length;
skipp:
while (str != end)
{
if ( (*str++) == (*search))
{
register const uchar *i,*j;
i= str;
i= str;
j= search+1;
while (j != search_end)
if ((*i++) != (*j++))
goto skipp;
if (nmatch > 0)
{
match[0].beg= 0;
match[0].end= str- (const uchar*)big-1;
match[0].mblen= match[0].end;
if (nmatch > 1)
{
match[1].beg= match[0].end;
@ -338,7 +344,7 @@ MY_COLLATION_HANDLER my_collation_8bit_bin_handler =
static MY_CHARSET_HANDLER my_charset_handler=
{
NULL, /* ismbchar */
NULL, /* mbcharlen */
my_mbcharlen_8bit, /* mbcharlen */
my_numchars_8bit,
my_charpos_8bit,
my_lengthsp_8bit,

View file

@ -170,14 +170,14 @@ int my_wc_mb_latin1(CHARSET_INFO *cs __attribute__((unused)),
{
if (str >= end)
return MY_CS_TOOSMALL;
return ((wc < 256) && ((str[0]=uni_latin1[wc]) || (!wc))) ? 1 : MY_CS_ILUNI;
}
static MY_CHARSET_HANDLER my_charset_handler=
{
NULL,
NULL,
my_mbcharlen_8bit,
my_numchars_8bit,
my_charpos_8bit,
my_lengthsp_8bit,

View file

@ -1093,7 +1093,7 @@ skipp:
MY_CHARSET_HANDLER my_charset_8bit_handler=
{
NULL, /* ismbchar */
NULL, /* mbcharlen */
my_mbcharlen_8bit, /* mbcharlen */
my_numchars_8bit,
my_charpos_8bit,
my_lengthsp_8bit,

View file

@ -717,7 +717,7 @@ static MY_COLLATION_HANDLER my_collation_ci_handler =
static MY_CHARSET_HANDLER my_charset_handler=
{
NULL, /* ismbchar */
NULL, /* mbcharlen */
my_mbcharlen_8bit, /* mbcharlen */
my_numchars_8bit,
my_charpos_8bit,
my_lengthsp_8bit,

View file

@ -1540,10 +1540,10 @@ static uchar ctype_utf8[] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
0, 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0
};
static uchar to_lower_utf8[] = {