mirror of
https://github.com/MariaDB/server.git
synced 2025-01-26 08:44:33 +01:00
528e85a4c0
The problem was that MySQL hadn't true ctype implementation. As a result many multibyte punctuation/whitespace characters were treated as word characters. This fix uses recently added CTYPE table for unicode character sets (WL1386) to detect unicode punctuation/whitespace characters correctly. Note: this is incompatible change since it changes parser behavior. One will have to use REPAIR TABLE statement to rebuild fulltext indexes. mysql-test/r/fulltext2.result: Testcase for BUG#19580. mysql-test/t/fulltext2.test: Testcase for BUG#19580. storage/myisam/ft_parser.c: Use WL1386 "CTYPE table for unicode character sets" functionality. storage/myisam/ft_update.c: Use WL1386 "CTYPE table for unicode character sets" functionality. Reverse fix for BUG#16489 "utf8 + fulltext leads to corrupt index file.". It is not needed anymore, since we have true ctype implementation. storage/myisam/ftdefs.h: Use WL1386 "CTYPE table for unicode character sets" functionality. Rework true_word_char macro so it accepts ctype instead of charset as first param. It doesn't use my_isalnum anymore, but instead directly checks ctype. Obsolete word_char macro removed.
233 lines
8.1 KiB
Text
233 lines
8.1 KiB
Text
#
|
||
# test of new fulltext search features
|
||
#
|
||
|
||
#
|
||
# two-level tree
|
||
#
|
||
|
||
--disable_warnings
|
||
DROP TABLE IF EXISTS t1;
|
||
--enable_warnings
|
||
|
||
CREATE TABLE t1 (
|
||
i int(10) unsigned not null auto_increment primary key,
|
||
a varchar(255) not null,
|
||
FULLTEXT KEY (a)
|
||
) ENGINE=MyISAM;
|
||
|
||
# two-level entry, second-level tree with depth 2
|
||
--disable_query_log
|
||
let $1=260;
|
||
while ($1)
|
||
{
|
||
eval insert t1 (a) values ('aaaxxx');
|
||
dec $1;
|
||
}
|
||
|
||
# two-level entry, second-level tree has only one page
|
||
let $1=255;
|
||
while ($1)
|
||
{
|
||
eval insert t1 (a) values ('aaazzz');
|
||
dec $1;
|
||
}
|
||
|
||
# one-level entry (entries)
|
||
let $1=250;
|
||
while ($1)
|
||
{
|
||
eval insert t1 (a) values ('aaayyy');
|
||
dec $1;
|
||
}
|
||
--enable_query_log
|
||
|
||
# converting to two-level
|
||
repair table t1 quick;
|
||
check table t1;
|
||
optimize table t1; # BUG#5327 - mi_sort_index() of 2-level tree
|
||
check table t1;
|
||
|
||
select count(*) from t1 where match a against ('aaaxxx');
|
||
select count(*) from t1 where match a against ('aaayyy');
|
||
select count(*) from t1 where match a against ('aaazzz');
|
||
select count(*) from t1 where match a against ('aaaxxx' in boolean mode);
|
||
select count(*) from t1 where match a against ('aaayyy' in boolean mode);
|
||
select count(*) from t1 where match a against ('aaazzz' in boolean mode);
|
||
select count(*) from t1 where match a against ('aaaxxx aaayyy aaazzz');
|
||
select count(*) from t1 where match a against ('aaaxxx aaayyy aaazzz' in boolean mode);
|
||
|
||
select count(*) from t1 where match a against ('aaax*' in boolean mode);
|
||
select count(*) from t1 where match a against ('aaay*' in boolean mode);
|
||
select count(*) from t1 where match a against ('aaa*' in boolean mode);
|
||
|
||
# mi_write:
|
||
|
||
insert t1 (a) values ('aaaxxx'),('aaayyy');
|
||
# call to enlarge_root() below
|
||
insert t1 (a) values ('aaazzz'),('aaazzz'),('aaazzz'),('aaazzz'),('aaazzz');
|
||
select count(*) from t1 where match a against ('aaaxxx');
|
||
select count(*) from t1 where match a against ('aaayyy');
|
||
select count(*) from t1 where match a against ('aaazzz');
|
||
|
||
# mi_delete
|
||
insert t1 (a) values ('aaaxxx 000000');
|
||
select count(*) from t1 where match a against ('000000');
|
||
delete from t1 where match a against ('000000');
|
||
select count(*) from t1 where match a against ('000000');
|
||
select count(*) from t1 where match a against ('aaaxxx');
|
||
delete from t1 where match a against ('aaazzz');
|
||
select count(*) from t1 where match a against ('aaaxxx' in boolean mode);
|
||
select count(*) from t1 where match a against ('aaayyy' in boolean mode);
|
||
select count(*) from t1 where match a against ('aaazzz' in boolean mode);
|
||
# double-check without index
|
||
select count(*) from t1 where a = 'aaaxxx';
|
||
select count(*) from t1 where a = 'aaayyy';
|
||
select count(*) from t1 where a = 'aaazzz';
|
||
|
||
# update
|
||
insert t1 (a) values ('aaaxxx 000000');
|
||
select count(*) from t1 where match a against ('000000');
|
||
update t1 set a='aaazzz' where match a against ('000000');
|
||
select count(*) from t1 where match a against ('aaaxxx' in boolean mode);
|
||
select count(*) from t1 where match a against ('aaazzz' in boolean mode);
|
||
update t1 set a='aaazzz' where a = 'aaaxxx';
|
||
update t1 set a='aaaxxx' where a = 'aaayyy';
|
||
select count(*) from t1 where match a against ('aaaxxx' in boolean mode);
|
||
select count(*) from t1 where match a against ('aaayyy' in boolean mode);
|
||
select count(*) from t1 where match a against ('aaazzz' in boolean mode);
|
||
|
||
drop table t1;
|
||
|
||
CREATE TABLE t1 (
|
||
i int(10) unsigned not null auto_increment primary key,
|
||
a varchar(255) not null,
|
||
FULLTEXT KEY (a)
|
||
) ENGINE=MyISAM;
|
||
|
||
#
|
||
# now same as about but w/o repair table
|
||
# 2-level tree created by mi_write
|
||
#
|
||
|
||
# two-level entry, second-level tree with depth 2
|
||
--disable_query_log
|
||
let $1=260;
|
||
while ($1)
|
||
{
|
||
eval insert t1 (a) values ('aaaxxx');
|
||
dec $1;
|
||
}
|
||
let $1=255;
|
||
while ($1)
|
||
{
|
||
eval insert t1 (a) values ('aaazzz');
|
||
dec $1;
|
||
}
|
||
let $1=250;
|
||
while ($1)
|
||
{
|
||
eval insert t1 (a) values ('aaayyy');
|
||
dec $1;
|
||
}
|
||
--enable_query_log
|
||
|
||
select count(*) from t1 where match a against ('aaaxxx');
|
||
select count(*) from t1 where match a against ('aaayyy');
|
||
select count(*) from t1 where match a against ('aaazzz');
|
||
select count(*) from t1 where match a against ('aaaxxx' in boolean mode);
|
||
select count(*) from t1 where match a against ('aaayyy' in boolean mode);
|
||
select count(*) from t1 where match a against ('aaazzz' in boolean mode);
|
||
select count(*) from t1 where match a against ('aaaxxx aaayyy aaazzz');
|
||
select count(*) from t1 where match a against ('aaaxxx aaayyy aaazzz' in boolean mode);
|
||
|
||
select count(*) from t1 where match a against ('aaax*' in boolean mode);
|
||
select count(*) from t1 where match a against ('aaay*' in boolean mode);
|
||
select count(*) from t1 where match a against ('aaa*' in boolean mode);
|
||
|
||
# mi_write:
|
||
|
||
insert t1 (a) values ('aaaxxx'),('aaayyy');
|
||
insert t1 (a) values ('aaazzz'),('aaazzz'),('aaazzz'),('aaazzz'),('aaazzz');
|
||
select count(*) from t1 where match a against ('aaaxxx');
|
||
select count(*) from t1 where match a against ('aaayyy');
|
||
select count(*) from t1 where match a against ('aaazzz');
|
||
|
||
# mi_delete
|
||
insert t1 (a) values ('aaaxxx 000000');
|
||
select count(*) from t1 where match a against ('000000');
|
||
delete from t1 where match a against ('000000');
|
||
select count(*) from t1 where match a against ('000000');
|
||
select count(*) from t1 where match a against ('aaaxxx');
|
||
delete from t1 where match a against ('aaazzz');
|
||
select count(*) from t1 where match a against ('aaaxxx' in boolean mode);
|
||
select count(*) from t1 where match a against ('aaayyy' in boolean mode);
|
||
select count(*) from t1 where match a against ('aaazzz' in boolean mode);
|
||
# double-check without index
|
||
select count(*) from t1 where a = 'aaaxxx';
|
||
select count(*) from t1 where a = 'aaayyy';
|
||
select count(*) from t1 where a = 'aaazzz';
|
||
|
||
# update
|
||
insert t1 (a) values ('aaaxxx 000000');
|
||
select count(*) from t1 where match a against ('000000');
|
||
update t1 set a='aaazzz' where match a against ('000000');
|
||
select count(*) from t1 where match a against ('aaaxxx' in boolean mode);
|
||
select count(*) from t1 where match a against ('aaazzz' in boolean mode);
|
||
update t1 set a='aaazzz' where a = 'aaaxxx';
|
||
update t1 set a='aaaxxx' where a = 'aaayyy';
|
||
select count(*) from t1 where match a against ('aaaxxx' in boolean mode);
|
||
select count(*) from t1 where match a against ('aaayyy' in boolean mode);
|
||
select count(*) from t1 where match a against ('aaazzz' in boolean mode);
|
||
drop table t1;
|
||
|
||
#
|
||
# BUG#11336
|
||
#
|
||
# for uca collation isalnum and strnncollsp don't agree on whether
|
||
# 0xC2A0 is a space (strnncollsp is right, isalnum is wrong).
|
||
#
|
||
# they still don't, the bug was fixed by avoiding strnncollsp
|
||
#
|
||
|
||
set names utf8;
|
||
create table t1(a text,fulltext(a)) collate=utf8_swedish_ci;
|
||
insert into t1 values('test test '),('test'),('test'),('test'),
|
||
('test'),('test'),('test'),('test'),('test'),('test'),('test'),('test'),
|
||
('test'),('test'),('test'),('test'),('test'),('test'),('test'),('test'),
|
||
('test'),('test'),('test'),('test'),('test'),('test'),('test'),('test'),
|
||
('test'),('test'),('test'),('test'),('test'),('test'),('test'),('test'),
|
||
('test'),('test'),('test'),('test'),('test'),('test'),('test'),('test'),
|
||
('test'),('test'),('test'),('test'),('test'),('test'),('test'),('test'),
|
||
('test'),('test'),('test'),('test'),('test'),('test'),('test'),('test'),
|
||
('test'),('test'),('test'),('test'),('test'),('test'),('test'),('test'),
|
||
('test'),('test'),('test'),('test'),('test'),('test'),('test'),('test'),
|
||
('test'),('test'),('test'),('test'),('test'),('test'),('test'),('test'),
|
||
('test'),('test'),('test'),('test'),('test'),('test'),('test'),('test'),
|
||
('test'),('test'),('test'),('test'),('test'),('test'),('test'),('test'),
|
||
('test'),('test'),('test'),('test'),('test'),('test'),('test'),('test'),
|
||
('test'),('test'),('test'),('test'),('test'),('test'),('test'),('test'),
|
||
('test'),('test'),('test'),('test'),('test'),('test'),('test'),('test');
|
||
delete from t1 limit 1;
|
||
|
||
#
|
||
# BUG#16489: utf8 + fulltext leads to corrupt index file.
|
||
#
|
||
truncate table t1;
|
||
insert into t1 values('ab c d');
|
||
update t1 set a='ab c d';
|
||
select * from t1 where match a against('ab c' in boolean mode);
|
||
drop table t1;
|
||
set names latin1;
|
||
|
||
# End of 4.1 tests
|
||
|
||
#
|
||
# BUG#19580 - FULLTEXT search produces wrong results on UTF-8 columns
|
||
#
|
||
SET NAMES utf8;
|
||
CREATE TABLE t1(a VARCHAR(255), FULLTEXT(a)) ENGINE=MyISAM DEFAULT CHARSET=utf8;
|
||
INSERT INTO t1 VALUES('„MySQL“');
|
||
SELECT a FROM t1 WHERE MATCH a AGAINST('“MySQL„' IN BOOLEAN MODE);
|
||
DROP TABLE t1;
|
||
SET NAMES latin1;
|