mariadb/mysql-test/t/fulltext2.test
svoj@may.pils.ru 53279f1dc4 BUG#19580 - FULLTEXT search produces wrong results on UTF-8 columns
The problem was that MySQL hadn't true ctype implementation. As a
result many multibyte punctuation/whitespace characters were
treated as word characters.

This fix uses recently added CTYPE table for unicode character sets
(WL1386) to detect unicode punctuation/whitespace characters
correctly.

Note: this is incompatible change since it changes parser behavior.
One will have to use REPAIR TABLE statement to rebuild fulltext
indexes.
2006-05-29 16:46:46 +05:00

233 lines
8.1 KiB
Text
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#
# test of new fulltext search features
#
#
# two-level tree
#
--disable_warnings
DROP TABLE IF EXISTS t1;
--enable_warnings
CREATE TABLE t1 (
i int(10) unsigned not null auto_increment primary key,
a varchar(255) not null,
FULLTEXT KEY (a)
) ENGINE=MyISAM;
# two-level entry, second-level tree with depth 2
--disable_query_log
let $1=260;
while ($1)
{
eval insert t1 (a) values ('aaaxxx');
dec $1;
}
# two-level entry, second-level tree has only one page
let $1=255;
while ($1)
{
eval insert t1 (a) values ('aaazzz');
dec $1;
}
# one-level entry (entries)
let $1=250;
while ($1)
{
eval insert t1 (a) values ('aaayyy');
dec $1;
}
--enable_query_log
# converting to two-level
repair table t1 quick;
check table t1;
optimize table t1; # BUG#5327 - mi_sort_index() of 2-level tree
check table t1;
select count(*) from t1 where match a against ('aaaxxx');
select count(*) from t1 where match a against ('aaayyy');
select count(*) from t1 where match a against ('aaazzz');
select count(*) from t1 where match a against ('aaaxxx' in boolean mode);
select count(*) from t1 where match a against ('aaayyy' in boolean mode);
select count(*) from t1 where match a against ('aaazzz' in boolean mode);
select count(*) from t1 where match a against ('aaaxxx aaayyy aaazzz');
select count(*) from t1 where match a against ('aaaxxx aaayyy aaazzz' in boolean mode);
select count(*) from t1 where match a against ('aaax*' in boolean mode);
select count(*) from t1 where match a against ('aaay*' in boolean mode);
select count(*) from t1 where match a against ('aaa*' in boolean mode);
# mi_write:
insert t1 (a) values ('aaaxxx'),('aaayyy');
# call to enlarge_root() below
insert t1 (a) values ('aaazzz'),('aaazzz'),('aaazzz'),('aaazzz'),('aaazzz');
select count(*) from t1 where match a against ('aaaxxx');
select count(*) from t1 where match a against ('aaayyy');
select count(*) from t1 where match a against ('aaazzz');
# mi_delete
insert t1 (a) values ('aaaxxx 000000');
select count(*) from t1 where match a against ('000000');
delete from t1 where match a against ('000000');
select count(*) from t1 where match a against ('000000');
select count(*) from t1 where match a against ('aaaxxx');
delete from t1 where match a against ('aaazzz');
select count(*) from t1 where match a against ('aaaxxx' in boolean mode);
select count(*) from t1 where match a against ('aaayyy' in boolean mode);
select count(*) from t1 where match a against ('aaazzz' in boolean mode);
# double-check without index
select count(*) from t1 where a = 'aaaxxx';
select count(*) from t1 where a = 'aaayyy';
select count(*) from t1 where a = 'aaazzz';
# update
insert t1 (a) values ('aaaxxx 000000');
select count(*) from t1 where match a against ('000000');
update t1 set a='aaazzz' where match a against ('000000');
select count(*) from t1 where match a against ('aaaxxx' in boolean mode);
select count(*) from t1 where match a against ('aaazzz' in boolean mode);
update t1 set a='aaazzz' where a = 'aaaxxx';
update t1 set a='aaaxxx' where a = 'aaayyy';
select count(*) from t1 where match a against ('aaaxxx' in boolean mode);
select count(*) from t1 where match a against ('aaayyy' in boolean mode);
select count(*) from t1 where match a against ('aaazzz' in boolean mode);
drop table t1;
CREATE TABLE t1 (
i int(10) unsigned not null auto_increment primary key,
a varchar(255) not null,
FULLTEXT KEY (a)
) ENGINE=MyISAM;
#
# now same as about but w/o repair table
# 2-level tree created by mi_write
#
# two-level entry, second-level tree with depth 2
--disable_query_log
let $1=260;
while ($1)
{
eval insert t1 (a) values ('aaaxxx');
dec $1;
}
let $1=255;
while ($1)
{
eval insert t1 (a) values ('aaazzz');
dec $1;
}
let $1=250;
while ($1)
{
eval insert t1 (a) values ('aaayyy');
dec $1;
}
--enable_query_log
select count(*) from t1 where match a against ('aaaxxx');
select count(*) from t1 where match a against ('aaayyy');
select count(*) from t1 where match a against ('aaazzz');
select count(*) from t1 where match a against ('aaaxxx' in boolean mode);
select count(*) from t1 where match a against ('aaayyy' in boolean mode);
select count(*) from t1 where match a against ('aaazzz' in boolean mode);
select count(*) from t1 where match a against ('aaaxxx aaayyy aaazzz');
select count(*) from t1 where match a against ('aaaxxx aaayyy aaazzz' in boolean mode);
select count(*) from t1 where match a against ('aaax*' in boolean mode);
select count(*) from t1 where match a against ('aaay*' in boolean mode);
select count(*) from t1 where match a against ('aaa*' in boolean mode);
# mi_write:
insert t1 (a) values ('aaaxxx'),('aaayyy');
insert t1 (a) values ('aaazzz'),('aaazzz'),('aaazzz'),('aaazzz'),('aaazzz');
select count(*) from t1 where match a against ('aaaxxx');
select count(*) from t1 where match a against ('aaayyy');
select count(*) from t1 where match a against ('aaazzz');
# mi_delete
insert t1 (a) values ('aaaxxx 000000');
select count(*) from t1 where match a against ('000000');
delete from t1 where match a against ('000000');
select count(*) from t1 where match a against ('000000');
select count(*) from t1 where match a against ('aaaxxx');
delete from t1 where match a against ('aaazzz');
select count(*) from t1 where match a against ('aaaxxx' in boolean mode);
select count(*) from t1 where match a against ('aaayyy' in boolean mode);
select count(*) from t1 where match a against ('aaazzz' in boolean mode);
# double-check without index
select count(*) from t1 where a = 'aaaxxx';
select count(*) from t1 where a = 'aaayyy';
select count(*) from t1 where a = 'aaazzz';
# update
insert t1 (a) values ('aaaxxx 000000');
select count(*) from t1 where match a against ('000000');
update t1 set a='aaazzz' where match a against ('000000');
select count(*) from t1 where match a against ('aaaxxx' in boolean mode);
select count(*) from t1 where match a against ('aaazzz' in boolean mode);
update t1 set a='aaazzz' where a = 'aaaxxx';
update t1 set a='aaaxxx' where a = 'aaayyy';
select count(*) from t1 where match a against ('aaaxxx' in boolean mode);
select count(*) from t1 where match a against ('aaayyy' in boolean mode);
select count(*) from t1 where match a against ('aaazzz' in boolean mode);
drop table t1;
#
# BUG#11336
#
# for uca collation isalnum and strnncollsp don't agree on whether
# 0xC2A0 is a space (strnncollsp is right, isalnum is wrong).
#
# they still don't, the bug was fixed by avoiding strnncollsp
#
set names utf8;
create table t1(a text,fulltext(a)) collate=utf8_swedish_ci;
insert into t1 values('test test '),('test'),('test'),('test'),
('test'),('test'),('test'),('test'),('test'),('test'),('test'),('test'),
('test'),('test'),('test'),('test'),('test'),('test'),('test'),('test'),
('test'),('test'),('test'),('test'),('test'),('test'),('test'),('test'),
('test'),('test'),('test'),('test'),('test'),('test'),('test'),('test'),
('test'),('test'),('test'),('test'),('test'),('test'),('test'),('test'),
('test'),('test'),('test'),('test'),('test'),('test'),('test'),('test'),
('test'),('test'),('test'),('test'),('test'),('test'),('test'),('test'),
('test'),('test'),('test'),('test'),('test'),('test'),('test'),('test'),
('test'),('test'),('test'),('test'),('test'),('test'),('test'),('test'),
('test'),('test'),('test'),('test'),('test'),('test'),('test'),('test'),
('test'),('test'),('test'),('test'),('test'),('test'),('test'),('test'),
('test'),('test'),('test'),('test'),('test'),('test'),('test'),('test'),
('test'),('test'),('test'),('test'),('test'),('test'),('test'),('test'),
('test'),('test'),('test'),('test'),('test'),('test'),('test'),('test'),
('test'),('test'),('test'),('test'),('test'),('test'),('test'),('test');
delete from t1 limit 1;
#
# BUG#16489: utf8 + fulltext leads to corrupt index file.
#
truncate table t1;
insert into t1 values('ab c d');
update t1 set a='ab c d';
select * from t1 where match a against('ab c' in boolean mode);
drop table t1;
set names latin1;
# End of 4.1 tests
#
# BUG#19580 - FULLTEXT search produces wrong results on UTF-8 columns
#
SET NAMES utf8;
CREATE TABLE t1(a VARCHAR(255), FULLTEXT(a)) ENGINE=MyISAM DEFAULT CHARSET=utf8;
INSERT INTO t1 VALUES('„MySQL“');
SELECT a FROM t1 WHERE MATCH a AGAINST('“MySQL„' IN BOOLEAN MODE);
DROP TABLE t1;
SET NAMES latin1;