This bug in Field_string::cmp resulted in a wrong comparison 
with keys in partial indexes over multi-byte character fields.
Given field a is declared as a varchar(16) collate utf8_unicode_ci
INDEX(a(4)) gives us an example of such an index.
  
Wrong key comparisons could lead to wrong result sets if 
the selected query execution plan used a range scan by 
a partial index over a utf8 character field.
This also caused wrong results in many other cases.
This commit is contained in:
evgen@moonbone.local 2006-06-22 00:29:04 +04:00
parent 5f46cb7e1a
commit 8d4a910a1f
3 changed files with 73 additions and 13 deletions

View file

@ -1124,3 +1124,43 @@ check table t1;
Table Op Msg_type Msg_text
test.t1 check status OK
drop table t1;
SET NAMES utf8;
CREATE TABLE t1 (id int PRIMARY KEY,
a varchar(16) collate utf8_unicode_ci NOT NULL default '',
b int,
f varchar(128) default 'XXX',
INDEX (a(4))
) ENGINE=MyISAM DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci;
INSERT INTO t1(id, a, b) VALUES
(1, 'cccc', 50), (2, 'cccc', 70), (3, 'cccc', 30),
(4, 'cccc', 30), (5, 'cccc', 20), (6, 'bbbbbb', 40),
(7, 'dddd', 30), (8, 'aaaa', 10), (9, 'aaaa', 50),
(10, 'eeeee', 40), (11, 'bbbbbb', 60);
SELECT id, a, b FROM t1;
id a b
1 cccc 50
2 cccc 70
3 cccc 30
4 cccc 30
5 cccc 20
6 bbbbbb 40
7 dddd 30
8 aaaa 10
9 aaaa 50
10 eeeee 40
11 bbbbbb 60
SELECT id, a, b FROM t1 WHERE a BETWEEN 'aaaa' AND 'bbbbbb';
id a b
8 aaaa 10
9 aaaa 50
6 bbbbbb 40
11 bbbbbb 60
SELECT id, a FROM t1 WHERE a='bbbbbb';
id a
6 bbbbbb
11 bbbbbb
SELECT id, a FROM t1 WHERE a='bbbbbb' ORDER BY b;
id a
6 bbbbbb
11 bbbbbb
DROP TABLE t1;

View file

@ -926,4 +926,30 @@ INSERT INTO t1 VALUES('uUABCDEFGHIGKLMNOPRSTUVWXYZ̈bbbbbbbbbbbbbbbbbbbbbbbbbbbb
check table t1;
drop table t1;
#
# Bug#14896: Comparison with a key in a partial index over mb chararacter field
#
SET NAMES utf8;
CREATE TABLE t1 (id int PRIMARY KEY,
a varchar(16) collate utf8_unicode_ci NOT NULL default '',
b int,
f varchar(128) default 'XXX',
INDEX (a(4))
) ENGINE=MyISAM DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci;
INSERT INTO t1(id, a, b) VALUES
(1, 'cccc', 50), (2, 'cccc', 70), (3, 'cccc', 30),
(4, 'cccc', 30), (5, 'cccc', 20), (6, 'bbbbbb', 40),
(7, 'dddd', 30), (8, 'aaaa', 10), (9, 'aaaa', 50),
(10, 'eeeee', 40), (11, 'bbbbbb', 60);
SELECT id, a, b FROM t1;
SELECT id, a, b FROM t1 WHERE a BETWEEN 'aaaa' AND 'bbbbbb';
SELECT id, a FROM t1 WHERE a='bbbbbb';
SELECT id, a FROM t1 WHERE a='bbbbbb' ORDER BY b;
DROP TABLE t1;
# End of 4.1 tests

View file

@ -5072,17 +5072,6 @@ int Field_string::cmp(const char *a_ptr, const char *b_ptr)
{
uint a_len, b_len;
if (field_charset->strxfrm_multiply > 1)
{
/*
We have to remove end space to be able to compare multi-byte-characters
like in latin_de 'ae' and 0xe4
*/
return field_charset->coll->strnncollsp(field_charset,
(const uchar*) a_ptr, field_length,
(const uchar*) b_ptr,
field_length);
}
if (field_charset->mbmaxlen != 1)
{
uint char_len= field_length/field_charset->mbmaxlen;
@ -5091,8 +5080,13 @@ int Field_string::cmp(const char *a_ptr, const char *b_ptr)
}
else
a_len= b_len= field_length;
return my_strnncoll(field_charset,(const uchar*) a_ptr, a_len,
(const uchar*) b_ptr, b_len);
/*
We have to remove end space to be able to compare multi-byte-characters
like in latin_de 'ae' and 0xe4
*/
return field_charset->coll->strnncollsp(field_charset,
(const uchar*) a_ptr, a_len,
(const uchar*) b_ptr, b_len);
}