mirror of
https://github.com/MariaDB/server.git
synced 2025-01-15 19:42:28 +01:00
Bug#57737 Character sets: search fails with like, contraction, index
Problem: LIKE over an indexed column optimized away good results, because my_like_range_utf32/utf16 returned wrong ranges for contractions. Contraction related code was missing in my_like_range_utf32/utf16, but did exist in my_like_range_ucs2/utf8. It was forgotten in utf32/utf16 versions (during mysql-6.0 push/revert mess). Fix: The patch removes individual functions my_like_range_ucs2, my_like_range_utf16, my_like_range_utf32 and introduces a single function my_like_range_generic() instead. The new function handles contractions correctly. It can handle any character set with cs->min_sort_char and cs->max_sort_char represented in Unicode code points. added: @ mysql-test/include/ctype_czech.inc @ mysql-test/include/ctype_like_ignorable.inc @ mysql-test/r/ctype_like_range.result @ mysql-test/t/ctype_like_range.test Adding tests modified: @ include/m_ctype.h - Adding helper functions for contractions. - Prototypes: removing ucs2,utf16,utf32 functions, adding generic function. @ mysql-test/r/ctype_uca.result @ mysql-test/r/ctype_utf16_uca.result @ mysql-test/r/ctype_utf32_uca.result @ mysql-test/t/ctype_uca.test @ mysql-test/t/ctype_utf16_uca.test @ mysql-test/t/ctype_utf32_uca.test - Adding tests. @ strings/ctype-mb.c - Pad function did not put the last character. - Implementing my_like_range_generic() - an universal replacement for three separate functions my_like_range_ucs2(), my_like_range_utf16() and my_like_range_utf32(), with correct contraction handling. @ strings/ctype-ucs2.c - my_fill_mb2 did not put the high byte, as previously it was used to put only characters in ASCII range. Now it puts high byte as well (needed to pupulate cs->max_sort_char correctly). - Adding DBUG_ASSERT() - Removing character set specific functions: my_like_range_ucs2(), my_like_range_utf16() and my_like_range_utf32(). - Using my_like_range_generic() instead of the old functions. @ strings/ctype-uca.c - Using generic function instead of the old character set specific ones. @ sql/item_create.cc @ sql/item_strfunc.cc @ sql/item_strfunc.h - Adding SQL functions LIKE_RANGE_MIN and LIKE_RANGE_MAX, available only in debug build to make sure like_range() works correctly for all character sets and collations.
This commit is contained in:
parent
ce441751ed
commit
e3dee8a7fd
17 changed files with 3001 additions and 344 deletions
|
@ -356,6 +356,32 @@ extern CHARSET_INFO my_charset_utf8mb4_unicode_ci;
|
|||
#define MY_UTF8MB4 "utf8mb4"
|
||||
|
||||
|
||||
/* Helper functions to handle contraction */
|
||||
static inline my_bool
|
||||
my_cs_have_contractions(CHARSET_INFO *cs)
|
||||
{
|
||||
return cs->contractions != NULL;
|
||||
}
|
||||
|
||||
static inline my_bool
|
||||
my_cs_can_be_contraction_head(CHARSET_INFO *cs, my_wc_t wc)
|
||||
{
|
||||
return ((const char *)cs->contractions)[0x40*0x40 + (wc & 0xFF)];
|
||||
}
|
||||
|
||||
static inline my_bool
|
||||
my_cs_can_be_contraction_tail(CHARSET_INFO *cs, my_wc_t wc)
|
||||
{
|
||||
return ((const char *)cs->contractions)[0x40*0x40 + (wc & 0xFF)];
|
||||
}
|
||||
|
||||
static inline uint16*
|
||||
my_cs_contraction2_weight(CHARSET_INFO *cs, my_wc_t wc1, my_wc_t wc2)
|
||||
{
|
||||
return &cs->contractions[(wc1 - 0x40) * 0x40 + wc2 - 0x40];
|
||||
}
|
||||
|
||||
|
||||
/* declarations for simple charsets */
|
||||
extern size_t my_strnxfrm_simple(CHARSET_INFO *, uchar *, size_t,
|
||||
const uchar *, size_t);
|
||||
|
@ -430,6 +456,7 @@ ulonglong my_strntoull10rnd_ucs2(CHARSET_INFO *cs,
|
|||
|
||||
void my_fill_8bit(CHARSET_INFO *cs, char* to, size_t l, int fill);
|
||||
|
||||
/* For 8-bit character set */
|
||||
my_bool my_like_range_simple(CHARSET_INFO *cs,
|
||||
const char *ptr, size_t ptr_length,
|
||||
pbool escape, pbool w_one, pbool w_many,
|
||||
|
@ -437,6 +464,7 @@ my_bool my_like_range_simple(CHARSET_INFO *cs,
|
|||
char *min_str, char *max_str,
|
||||
size_t *min_length, size_t *max_length);
|
||||
|
||||
/* For ASCII-based multi-byte character sets with mbminlen=1 */
|
||||
my_bool my_like_range_mb(CHARSET_INFO *cs,
|
||||
const char *ptr, size_t ptr_length,
|
||||
pbool escape, pbool w_one, pbool w_many,
|
||||
|
@ -444,26 +472,13 @@ my_bool my_like_range_mb(CHARSET_INFO *cs,
|
|||
char *min_str, char *max_str,
|
||||
size_t *min_length, size_t *max_length);
|
||||
|
||||
my_bool my_like_range_ucs2(CHARSET_INFO *cs,
|
||||
const char *ptr, size_t ptr_length,
|
||||
pbool escape, pbool w_one, pbool w_many,
|
||||
size_t res_length,
|
||||
char *min_str, char *max_str,
|
||||
size_t *min_length, size_t *max_length);
|
||||
|
||||
my_bool my_like_range_utf16(CHARSET_INFO *cs,
|
||||
const char *ptr, size_t ptr_length,
|
||||
pbool escape, pbool w_one, pbool w_many,
|
||||
size_t res_length,
|
||||
char *min_str, char *max_str,
|
||||
size_t *min_length, size_t *max_length);
|
||||
|
||||
my_bool my_like_range_utf32(CHARSET_INFO *cs,
|
||||
const char *ptr, size_t ptr_length,
|
||||
pbool escape, pbool w_one, pbool w_many,
|
||||
size_t res_length,
|
||||
char *min_str, char *max_str,
|
||||
size_t *min_length, size_t *max_length);
|
||||
/* For other character sets, with arbitrary mbminlen and mbmaxlen numbers */
|
||||
my_bool my_like_range_generic(CHARSET_INFO *cs,
|
||||
const char *ptr, size_t ptr_length,
|
||||
pbool escape, pbool w_one, pbool w_many,
|
||||
size_t res_length,
|
||||
char *min_str, char *max_str,
|
||||
size_t *min_length, size_t *max_length);
|
||||
|
||||
int my_wildcmp_8bit(CHARSET_INFO *,
|
||||
const char *str,const char *str_end,
|
||||
|
|
12
mysql-test/include/ctype_czech.inc
Normal file
12
mysql-test/include/ctype_czech.inc
Normal file
|
@ -0,0 +1,12 @@
|
|||
SELECT @@collation_connection;
|
||||
--echo #
|
||||
--echo # Bug#57737 Character sets: search fails with like, contraction, index
|
||||
--echo #
|
||||
CREATE TABLE t1 AS SELECT REPEAT(' ', 10) AS s1 LIMIT 0;
|
||||
INSERT INTO t1 VALUES ('c'),('ce'),('cé'),('ch');
|
||||
SELECT * FROM t1 WHERE s1 LIKE 'c%';
|
||||
ALTER TABLE t1 ADD KEY s1 (s1);
|
||||
SELECT * FROM t1 WHERE s1 LIKE 'c%';
|
||||
ALTER TABLE t1 DROP KEY s1, ADD KEY(s1(1));
|
||||
SELECT * FROM t1 WHERE s1 LIKE 'ch';
|
||||
DROP TABLE t1;
|
11
mysql-test/include/ctype_like_ignorable.inc
Normal file
11
mysql-test/include/ctype_like_ignorable.inc
Normal file
|
@ -0,0 +1,11 @@
|
|||
SELECT @@collation_connection;
|
||||
--echo #
|
||||
--echo # Bug#57737 Character sets: search fails with like, contraction, index
|
||||
--echo # Part#2 - ignorable characters
|
||||
--echo #
|
||||
CREATE TABLE t1 AS SELECT REPEAT(' ', 10) AS s1 LIMIT 0;
|
||||
INSERT INTO t1 VALUES ('a\0\0\0\0\0\t'),('a'),('b'),('c'),('d'),('e');
|
||||
SELECT HEX(s1) FROM t1 WHERE s1 LIKE 'a%';
|
||||
ALTER TABLE t1 ADD KEY s1 (s1);
|
||||
SELECT HEX(s1) FROM t1 WHERE s1 LIKE 'a%';
|
||||
DROP TABLE t1;
|
2310
mysql-test/r/ctype_like_range.result
Normal file
2310
mysql-test/r/ctype_like_range.result
Normal file
File diff suppressed because it is too large
Load diff
|
@ -2888,3 +2888,101 @@ a hex(b) c
|
|||
DROP TABLE t1;
|
||||
set names utf8;
|
||||
End for 5.0 tests
|
||||
#
|
||||
# Start of 5.5 tests
|
||||
#
|
||||
SET collation_connection=utf8_czech_ci;
|
||||
SELECT @@collation_connection;
|
||||
@@collation_connection
|
||||
utf8_czech_ci
|
||||
#
|
||||
# Bug#57737 Character sets: search fails with like, contraction, index
|
||||
#
|
||||
CREATE TABLE t1 AS SELECT REPEAT(' ', 10) AS s1 LIMIT 0;
|
||||
INSERT INTO t1 VALUES ('c'),('ce'),('cé'),('ch');
|
||||
SELECT * FROM t1 WHERE s1 LIKE 'c%';
|
||||
s1
|
||||
c
|
||||
ce
|
||||
cé
|
||||
ch
|
||||
ALTER TABLE t1 ADD KEY s1 (s1);
|
||||
SELECT * FROM t1 WHERE s1 LIKE 'c%';
|
||||
s1
|
||||
c
|
||||
ce
|
||||
cé
|
||||
ch
|
||||
ALTER TABLE t1 DROP KEY s1, ADD KEY(s1(1));
|
||||
SELECT * FROM t1 WHERE s1 LIKE 'ch';
|
||||
s1
|
||||
ch
|
||||
DROP TABLE t1;
|
||||
SELECT @@collation_connection;
|
||||
@@collation_connection
|
||||
utf8_czech_ci
|
||||
#
|
||||
# Bug#57737 Character sets: search fails with like, contraction, index
|
||||
# Part#2 - ignorable characters
|
||||
#
|
||||
CREATE TABLE t1 AS SELECT REPEAT(' ', 10) AS s1 LIMIT 0;
|
||||
INSERT INTO t1 VALUES ('a\0\0\0\0\0\t'),('a'),('b'),('c'),('d'),('e');
|
||||
SELECT HEX(s1) FROM t1 WHERE s1 LIKE 'a%';
|
||||
HEX(s1)
|
||||
61000000000009
|
||||
61
|
||||
ALTER TABLE t1 ADD KEY s1 (s1);
|
||||
SELECT HEX(s1) FROM t1 WHERE s1 LIKE 'a%';
|
||||
HEX(s1)
|
||||
61000000000009
|
||||
61
|
||||
DROP TABLE t1;
|
||||
SET collation_connection=ucs2_czech_ci;
|
||||
SELECT @@collation_connection;
|
||||
@@collation_connection
|
||||
ucs2_czech_ci
|
||||
#
|
||||
# Bug#57737 Character sets: search fails with like, contraction, index
|
||||
#
|
||||
CREATE TABLE t1 AS SELECT REPEAT(' ', 10) AS s1 LIMIT 0;
|
||||
INSERT INTO t1 VALUES ('c'),('ce'),('cé'),('ch');
|
||||
SELECT * FROM t1 WHERE s1 LIKE 'c%';
|
||||
s1
|
||||
c
|
||||
ce
|
||||
cé
|
||||
ch
|
||||
ALTER TABLE t1 ADD KEY s1 (s1);
|
||||
SELECT * FROM t1 WHERE s1 LIKE 'c%';
|
||||
s1
|
||||
c
|
||||
ce
|
||||
cé
|
||||
ch
|
||||
ALTER TABLE t1 DROP KEY s1, ADD KEY(s1(1));
|
||||
SELECT * FROM t1 WHERE s1 LIKE 'ch';
|
||||
s1
|
||||
ch
|
||||
DROP TABLE t1;
|
||||
SELECT @@collation_connection;
|
||||
@@collation_connection
|
||||
ucs2_czech_ci
|
||||
#
|
||||
# Bug#57737 Character sets: search fails with like, contraction, index
|
||||
# Part#2 - ignorable characters
|
||||
#
|
||||
CREATE TABLE t1 AS SELECT REPEAT(' ', 10) AS s1 LIMIT 0;
|
||||
INSERT INTO t1 VALUES ('a\0\0\0\0\0\t'),('a'),('b'),('c'),('d'),('e');
|
||||
SELECT HEX(s1) FROM t1 WHERE s1 LIKE 'a%';
|
||||
HEX(s1)
|
||||
0061000000000000000000000009
|
||||
0061
|
||||
ALTER TABLE t1 ADD KEY s1 (s1);
|
||||
SELECT HEX(s1) FROM t1 WHERE s1 LIKE 'a%';
|
||||
HEX(s1)
|
||||
0061000000000000000000000009
|
||||
0061
|
||||
DROP TABLE t1;
|
||||
#
|
||||
# End of 5.5 tests
|
||||
#
|
||||
|
|
|
@ -2368,6 +2368,52 @@ NULL
|
|||
NULL
|
||||
NULL
|
||||
drop table t1;
|
||||
SET collation_connection=utf16_czech_ci;
|
||||
SELECT @@collation_connection;
|
||||
@@collation_connection
|
||||
utf16_czech_ci
|
||||
#
|
||||
# Bug#57737 Character sets: search fails with like, contraction, index
|
||||
#
|
||||
CREATE TABLE t1 AS SELECT REPEAT(' ', 10) AS s1 LIMIT 0;
|
||||
INSERT INTO t1 VALUES ('c'),('ce'),('cé'),('ch');
|
||||
SELECT * FROM t1 WHERE s1 LIKE 'c%';
|
||||
s1
|
||||
c
|
||||
ce
|
||||
cé
|
||||
ch
|
||||
ALTER TABLE t1 ADD KEY s1 (s1);
|
||||
SELECT * FROM t1 WHERE s1 LIKE 'c%';
|
||||
s1
|
||||
c
|
||||
ce
|
||||
cé
|
||||
ch
|
||||
ALTER TABLE t1 DROP KEY s1, ADD KEY(s1(1));
|
||||
SELECT * FROM t1 WHERE s1 LIKE 'ch';
|
||||
s1
|
||||
ch
|
||||
DROP TABLE t1;
|
||||
SELECT @@collation_connection;
|
||||
@@collation_connection
|
||||
utf16_czech_ci
|
||||
#
|
||||
# Bug#57737 Character sets: search fails with like, contraction, index
|
||||
# Part#2 - ignorable characters
|
||||
#
|
||||
CREATE TABLE t1 AS SELECT REPEAT(' ', 10) AS s1 LIMIT 0;
|
||||
INSERT INTO t1 VALUES ('a\0\0\0\0\0\t'),('a'),('b'),('c'),('d'),('e');
|
||||
SELECT HEX(s1) FROM t1 WHERE s1 LIKE 'a%';
|
||||
HEX(s1)
|
||||
0061000000000000000000000009
|
||||
0061
|
||||
ALTER TABLE t1 ADD KEY s1 (s1);
|
||||
SELECT HEX(s1) FROM t1 WHERE s1 LIKE 'a%';
|
||||
HEX(s1)
|
||||
0061000000000000000000000009
|
||||
0061
|
||||
DROP TABLE t1;
|
||||
#
|
||||
# End of 5.5 tests
|
||||
#
|
||||
|
|
|
@ -2368,6 +2368,52 @@ NULL
|
|||
NULL
|
||||
NULL
|
||||
drop table t1;
|
||||
SET collation_connection=utf32_czech_ci;
|
||||
SELECT @@collation_connection;
|
||||
@@collation_connection
|
||||
utf32_czech_ci
|
||||
#
|
||||
# Bug#57737 Character sets: search fails with like, contraction, index
|
||||
#
|
||||
CREATE TABLE t1 AS SELECT REPEAT(' ', 10) AS s1 LIMIT 0;
|
||||
INSERT INTO t1 VALUES ('c'),('ce'),('cé'),('ch');
|
||||
SELECT * FROM t1 WHERE s1 LIKE 'c%';
|
||||
s1
|
||||
c
|
||||
ce
|
||||
cé
|
||||
ch
|
||||
ALTER TABLE t1 ADD KEY s1 (s1);
|
||||
SELECT * FROM t1 WHERE s1 LIKE 'c%';
|
||||
s1
|
||||
c
|
||||
ce
|
||||
cé
|
||||
ch
|
||||
ALTER TABLE t1 DROP KEY s1, ADD KEY(s1(1));
|
||||
SELECT * FROM t1 WHERE s1 LIKE 'ch';
|
||||
s1
|
||||
ch
|
||||
DROP TABLE t1;
|
||||
SELECT @@collation_connection;
|
||||
@@collation_connection
|
||||
utf32_czech_ci
|
||||
#
|
||||
# Bug#57737 Character sets: search fails with like, contraction, index
|
||||
# Part#2 - ignorable characters
|
||||
#
|
||||
CREATE TABLE t1 AS SELECT REPEAT(' ', 10) AS s1 LIMIT 0;
|
||||
INSERT INTO t1 VALUES ('a\0\0\0\0\0\t'),('a'),('b'),('c'),('d'),('e');
|
||||
SELECT HEX(s1) FROM t1 WHERE s1 LIKE 'a%';
|
||||
HEX(s1)
|
||||
00000061000000000000000000000000000000000000000000000009
|
||||
00000061
|
||||
ALTER TABLE t1 ADD KEY s1 (s1);
|
||||
SELECT HEX(s1) FROM t1 WHERE s1 LIKE 'a%';
|
||||
HEX(s1)
|
||||
00000061000000000000000000000000000000000000000000000009
|
||||
00000061
|
||||
DROP TABLE t1;
|
||||
#
|
||||
# End of 5.5 tests
|
||||
#
|
||||
|
|
87
mysql-test/t/ctype_like_range.test
Normal file
87
mysql-test/t/ctype_like_range.test
Normal file
|
@ -0,0 +1,87 @@
|
|||
--source include/have_debug.inc
|
||||
--source include/have_ucs2.inc
|
||||
--source include/have_utf16.inc
|
||||
--source include/have_utf32.inc
|
||||
|
||||
--disable_warnings
|
||||
DROP TABLE IF EXISTS t1;
|
||||
DROP VIEW IF EXISTS v1;
|
||||
--enable_warnings
|
||||
|
||||
CREATE TABLE t1 (id INT NOT NULL AUTO_INCREMENT PRIMARY KEY, a VARBINARY(32));
|
||||
INSERT INTO t1 (a) VALUES (''),('_'),('%'),('\_'),('\%'),('\\');
|
||||
INSERT INTO t1 (a) VALUES ('a'),('c');
|
||||
INSERT INTO t1 (a) VALUES ('a_'),('c_');
|
||||
INSERT INTO t1 (a) VALUES ('a%'),('c%');
|
||||
INSERT INTO t1 (a) VALUES ('aa'),('cc'),('ch');
|
||||
INSERT INTO t1 (a) VALUES ('aa_'),('cc_'),('ch_');
|
||||
INSERT INTO t1 (a) VALUES ('aa%'),('cc%'),('ch%');
|
||||
INSERT INTO t1 (a) VALUES ('aaa'),('ccc'),('cch');
|
||||
INSERT INTO t1 (a) VALUES ('aaa_'),('ccc_'),('cch_');
|
||||
INSERT INTO t1 (a) VALUES ('aaa%'),('ccc%'),('cch%');
|
||||
INSERT INTO t1 (a) VALUES ('aaaaaaaaaaaaaaaaaaaa');
|
||||
|
||||
CREATE VIEW v1 AS
|
||||
SELECT id, 'a' AS name, a AS val FROM t1
|
||||
UNION
|
||||
SELECT id, 'mn', HEX(LIKE_RANGE_MIN(a, 16)) AS min FROM t1
|
||||
UNION
|
||||
SELECT id, 'mx', HEX(LIKE_RANGE_MAX(a, 16)) AS max FROM t1
|
||||
UNION
|
||||
SELECT id, 'sp', REPEAT('-', 32) AS sep FROM t1
|
||||
ORDER BY id, name;
|
||||
SELECT * FROM v1;
|
||||
|
||||
ALTER TABLE t1 MODIFY a VARCHAR(32) CHARACTER SET latin1;
|
||||
SELECT * FROM v1;
|
||||
|
||||
ALTER TABLE t1 MODIFY a VARCHAR(32) CHARACTER SET utf8;
|
||||
SELECT * FROM v1;
|
||||
|
||||
ALTER TABLE t1 MODIFY a VARCHAR(32) CHARACTER SET utf8 COLLATE utf8_unicode_ci;
|
||||
SELECT * FROM v1;
|
||||
|
||||
ALTER TABLE t1 MODIFY a VARCHAR(32) CHARACTER SET utf8 COLLATE utf8_czech_ci;
|
||||
SELECT * FROM v1;
|
||||
|
||||
ALTER TABLE t1 MODIFY a VARCHAR(32) CHARACTER SET utf8 COLLATE utf8_danish_ci;
|
||||
SELECT * FROM v1;
|
||||
|
||||
ALTER TABLE t1 MODIFY a VARCHAR(32) CHARACTER SET ucs2;
|
||||
SELECT * FROM v1;
|
||||
|
||||
ALTER TABLE t1 MODIFY a VARCHAR(32) CHARACTER SET ucs2 COLLATE ucs2_unicode_ci;
|
||||
SELECT * FROM v1;
|
||||
|
||||
ALTER TABLE t1 MODIFY a VARCHAR(32) CHARACTER SET ucs2 COLLATE ucs2_czech_ci;
|
||||
SELECT * FROM v1;
|
||||
|
||||
ALTER TABLE t1 MODIFY a VARCHAR(32) CHARACTER SET ucs2 COLLATE ucs2_danish_ci;
|
||||
SELECT * FROM v1;
|
||||
|
||||
ALTER TABLE t1 MODIFY a VARCHAR(32) CHARACTER SET utf16;
|
||||
SELECT * FROM v1;
|
||||
|
||||
ALTER TABLE t1 MODIFY a VARCHAR(32) CHARACTER SET utf16 COLLATE utf16_unicode_ci;
|
||||
SELECT * FROM v1;
|
||||
|
||||
ALTER TABLE t1 MODIFY a VARCHAR(32) CHARACTER SET utf16 COLLATE utf16_czech_ci;
|
||||
SELECT * FROM v1;
|
||||
|
||||
ALTER TABLE t1 MODIFY a VARCHAR(32) CHARACTER SET utf16 COLLATE utf16_danish_ci;
|
||||
SELECT * FROM v1;
|
||||
|
||||
ALTER TABLE t1 MODIFY a VARCHAR(32) CHARACTER SET utf32;
|
||||
SELECT * FROM v1;
|
||||
|
||||
ALTER TABLE t1 MODIFY a VARCHAR(32) CHARACTER SET utf32 COLLATE utf32_unicode_ci;
|
||||
SELECT * FROM v1;
|
||||
|
||||
ALTER TABLE t1 MODIFY a VARCHAR(32) CHARACTER SET utf32 COLLATE utf32_czech_ci;
|
||||
SELECT * FROM v1;
|
||||
|
||||
ALTER TABLE t1 MODIFY a VARCHAR(32) CHARACTER SET utf32 COLLATE utf32_danish_ci;
|
||||
SELECT * FROM v1;
|
||||
|
||||
DROP VIEW v1;
|
||||
DROP TABLE t1;
|
|
@ -545,3 +545,19 @@ set collation_connection=ucs2_unicode_ci;
|
|||
set names utf8;
|
||||
|
||||
-- echo End for 5.0 tests
|
||||
|
||||
--echo #
|
||||
--echo # Start of 5.5 tests
|
||||
--echo #
|
||||
#
|
||||
# Test my_like_range and contractions
|
||||
#
|
||||
SET collation_connection=utf8_czech_ci;
|
||||
--source include/ctype_czech.inc
|
||||
--source include/ctype_like_ignorable.inc
|
||||
SET collation_connection=ucs2_czech_ci;
|
||||
--source include/ctype_czech.inc
|
||||
--source include/ctype_like_ignorable.inc
|
||||
--echo #
|
||||
--echo # End of 5.5 tests
|
||||
--echo #
|
||||
|
|
|
@ -284,6 +284,13 @@ DROP TABLE IF EXISTS t1;
|
|||
set collation_connection=utf16_unicode_ci;
|
||||
--source include/ctype_regex.inc
|
||||
|
||||
#
|
||||
# Test my_like_range and contractions
|
||||
#
|
||||
SET collation_connection=utf16_czech_ci;
|
||||
--source include/ctype_czech.inc
|
||||
--source include/ctype_like_ignorable.inc
|
||||
|
||||
|
||||
--echo #
|
||||
--echo # End of 5.5 tests
|
||||
|
|
|
@ -286,6 +286,14 @@ set collation_connection=utf32_unicode_ci;
|
|||
--source include/ctype_regex.inc
|
||||
|
||||
|
||||
#
|
||||
# Test my_like_range and contractions
|
||||
#
|
||||
SET collation_connection=utf32_czech_ci;
|
||||
--source include/ctype_czech.inc
|
||||
--source include/ctype_like_ignorable.inc
|
||||
|
||||
|
||||
--echo #
|
||||
--echo # End of 5.5 tests
|
||||
--echo #
|
||||
|
|
|
@ -1330,6 +1330,34 @@ protected:
|
|||
};
|
||||
|
||||
|
||||
#ifndef DBUG_OFF
|
||||
class Create_func_like_range_min : public Create_func_arg2
|
||||
{
|
||||
public:
|
||||
virtual Item *create(THD *thd, Item *arg1, Item *arg2);
|
||||
|
||||
static Create_func_like_range_min s_singleton;
|
||||
|
||||
protected:
|
||||
Create_func_like_range_min() {}
|
||||
virtual ~Create_func_like_range_min() {}
|
||||
};
|
||||
|
||||
|
||||
class Create_func_like_range_max : public Create_func_arg2
|
||||
{
|
||||
public:
|
||||
virtual Item *create(THD *thd, Item *arg1, Item *arg2);
|
||||
|
||||
static Create_func_like_range_max s_singleton;
|
||||
|
||||
protected:
|
||||
Create_func_like_range_max() {}
|
||||
virtual ~Create_func_like_range_max() {}
|
||||
};
|
||||
#endif
|
||||
|
||||
|
||||
class Create_func_ln : public Create_func_arg1
|
||||
{
|
||||
public:
|
||||
|
@ -3836,6 +3864,26 @@ Create_func_length::create(THD *thd, Item *arg1)
|
|||
}
|
||||
|
||||
|
||||
#ifndef DBUG_OFF
|
||||
Create_func_like_range_min Create_func_like_range_min::s_singleton;
|
||||
|
||||
Item*
|
||||
Create_func_like_range_min::create(THD *thd, Item *arg1, Item *arg2)
|
||||
{
|
||||
return new (thd->mem_root) Item_func_like_range_min(arg1, arg2);
|
||||
}
|
||||
|
||||
|
||||
Create_func_like_range_max Create_func_like_range_max::s_singleton;
|
||||
|
||||
Item*
|
||||
Create_func_like_range_max::create(THD *thd, Item *arg1, Item *arg2)
|
||||
{
|
||||
return new (thd->mem_root) Item_func_like_range_max(arg1, arg2);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
Create_func_ln Create_func_ln::s_singleton;
|
||||
|
||||
Item*
|
||||
|
@ -4924,6 +4972,10 @@ static Native_func_registry func_array[] =
|
|||
{ { C_STRING_WITH_LEN("LCASE") }, BUILDER(Create_func_lcase)},
|
||||
{ { C_STRING_WITH_LEN("LEAST") }, BUILDER(Create_func_least)},
|
||||
{ { C_STRING_WITH_LEN("LENGTH") }, BUILDER(Create_func_length)},
|
||||
#ifndef DBUG_OFF
|
||||
{ { C_STRING_WITH_LEN("LIKE_RANGE_MIN") }, BUILDER(Create_func_like_range_min)},
|
||||
{ { C_STRING_WITH_LEN("LIKE_RANGE_MAX") }, BUILDER(Create_func_like_range_max)},
|
||||
#endif
|
||||
{ { C_STRING_WITH_LEN("LINEFROMTEXT") }, GEOM_BUILDER(Create_func_geometry_from_text)},
|
||||
{ { C_STRING_WITH_LEN("LINEFROMWKB") }, GEOM_BUILDER(Create_func_geometry_from_wkb)},
|
||||
{ { C_STRING_WITH_LEN("LINESTRINGFROMTEXT") }, GEOM_BUILDER(Create_func_geometry_from_text)},
|
||||
|
|
|
@ -3128,6 +3128,41 @@ String *Item_func_unhex::val_str(String *str)
|
|||
}
|
||||
|
||||
|
||||
#ifndef DBUG_OFF
|
||||
String *Item_func_like_range::val_str(String *str)
|
||||
{
|
||||
DBUG_ASSERT(fixed == 1);
|
||||
longlong nbytes= args[1]->val_int();
|
||||
String *res= args[0]->val_str(str);
|
||||
size_t min_len, max_len;
|
||||
CHARSET_INFO *cs= collation.collation;
|
||||
|
||||
if (!res || args[0]->null_value || args[1]->null_value ||
|
||||
nbytes < 0 || nbytes > MAX_BLOB_WIDTH ||
|
||||
min_str.alloc(nbytes) || max_str.alloc(nbytes))
|
||||
goto err;
|
||||
null_value=0;
|
||||
|
||||
if (cs->coll->like_range(cs, res->ptr(), res->length(),
|
||||
'\\', '_', '%', nbytes,
|
||||
(char*) min_str.ptr(), (char*) max_str.ptr(),
|
||||
&min_len, &max_len))
|
||||
goto err;
|
||||
|
||||
min_str.set_charset(collation.collation);
|
||||
max_str.set_charset(collation.collation);
|
||||
min_str.length(min_len);
|
||||
max_str.length(max_len);
|
||||
|
||||
return is_min ? &min_str : &max_str;
|
||||
|
||||
err:
|
||||
null_value= 1;
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
void Item_func_binary::print(String *str, enum_query_type query_type)
|
||||
{
|
||||
str->append(STRING_WITH_LEN("cast("));
|
||||
|
|
|
@ -657,6 +657,46 @@ public:
|
|||
};
|
||||
|
||||
|
||||
#ifndef DBUG_OFF
|
||||
class Item_func_like_range :public Item_str_func
|
||||
{
|
||||
protected:
|
||||
String min_str;
|
||||
String max_str;
|
||||
const bool is_min;
|
||||
public:
|
||||
Item_func_like_range(Item *a, Item *b, bool is_min_arg)
|
||||
:Item_str_func(a, b), is_min(is_min_arg)
|
||||
{ maybe_null= 1; }
|
||||
String *val_str(String *);
|
||||
void fix_length_and_dec()
|
||||
{
|
||||
collation.set(args[0]->collation);
|
||||
decimals=0;
|
||||
max_length= MAX_BLOB_WIDTH;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
class Item_func_like_range_min :public Item_func_like_range
|
||||
{
|
||||
public:
|
||||
Item_func_like_range_min(Item *a, Item *b)
|
||||
:Item_func_like_range(a, b, true) { }
|
||||
const char *func_name() const { return "like_range_min"; }
|
||||
};
|
||||
|
||||
|
||||
class Item_func_like_range_max :public Item_func_like_range
|
||||
{
|
||||
public:
|
||||
Item_func_like_range_max(Item *a, Item *b)
|
||||
:Item_func_like_range(a, b, false) { }
|
||||
const char *func_name() const { return "like_range_max"; }
|
||||
};
|
||||
#endif
|
||||
|
||||
|
||||
class Item_func_binary :public Item_str_func
|
||||
{
|
||||
public:
|
||||
|
|
|
@ -636,7 +636,7 @@ static void pad_max_char(CHARSET_INFO *cs, char *str, char *end)
|
|||
DBUG_ASSERT(buflen > 0);
|
||||
do
|
||||
{
|
||||
if ((str + buflen) < end)
|
||||
if ((str + buflen) <= end)
|
||||
{
|
||||
/* Enough space for the characer */
|
||||
memcpy(str, buf, buflen);
|
||||
|
@ -802,6 +802,192 @@ fill_max_and_min:
|
|||
}
|
||||
|
||||
|
||||
/**
|
||||
Calculate min_str and max_str that ranges a LIKE string.
|
||||
Generic function, currently used for ucs2, utf16, utf32,
|
||||
but should be suitable for any other character sets with
|
||||
cs->min_sort_char and cs->max_sort_char represented in
|
||||
Unicode code points.
|
||||
|
||||
@param cs Character set and collation pointer
|
||||
@param ptr Pointer to LIKE pattern.
|
||||
@param ptr_length Length of LIKE pattern.
|
||||
@param escape Escape character pattern, typically '\'.
|
||||
@param w_one 'One character' pattern, typically '_'.
|
||||
@param w_many 'Many characters' pattern, typically '%'.
|
||||
@param res_length Length of min_str and max_str.
|
||||
|
||||
@param[out] min_str Smallest string that ranges LIKE.
|
||||
@param[out] max_str Largest string that ranges LIKE.
|
||||
@param[out] min_len Length of min_str
|
||||
@param[out] max_len Length of max_str
|
||||
|
||||
@return Optimization status.
|
||||
@retval FALSE if LIKE pattern can be optimized
|
||||
@rerval TRUE if LIKE can't be optimized.
|
||||
*/
|
||||
my_bool
|
||||
my_like_range_generic(CHARSET_INFO *cs,
|
||||
const char *ptr, size_t ptr_length,
|
||||
pbool escape, pbool w_one, pbool w_many,
|
||||
size_t res_length,
|
||||
char *min_str,char *max_str,
|
||||
size_t *min_length,size_t *max_length)
|
||||
{
|
||||
const char *end= ptr + ptr_length;
|
||||
const char *min_org= min_str;
|
||||
const char *max_org= max_str;
|
||||
char *min_end= min_str + res_length;
|
||||
char *max_end= max_str + res_length;
|
||||
size_t charlen= res_length / cs->mbmaxlen;
|
||||
size_t res_length_diff;
|
||||
my_bool have_contractions= my_cs_have_contractions(cs);
|
||||
|
||||
for ( ; charlen > 0; charlen--)
|
||||
{
|
||||
my_wc_t wc, wc2;
|
||||
int res;
|
||||
if ((res= cs->cset->mb_wc(cs, &wc, (uchar*) ptr, (uchar*) end)) <= 0)
|
||||
{
|
||||
if (res == MY_CS_ILSEQ) /* Bad sequence */
|
||||
return TRUE; /* min_length and max_length are not important */
|
||||
break; /* End of the string */
|
||||
}
|
||||
ptr+= res;
|
||||
|
||||
if (wc == (my_wc_t) escape)
|
||||
{
|
||||
if ((res= cs->cset->mb_wc(cs, &wc, (uchar*) ptr, (uchar*) end)) <= 0)
|
||||
{
|
||||
if (res == MY_CS_ILSEQ)
|
||||
return TRUE; /* min_length and max_length are not important */
|
||||
/*
|
||||
End of the string: Escape is the last character.
|
||||
Put escape as a normal character.
|
||||
We'll will leave the loop on the next iteration.
|
||||
*/
|
||||
}
|
||||
else
|
||||
ptr+= res;
|
||||
|
||||
/* Put escape character to min_str and max_str */
|
||||
if ((res= cs->cset->wc_mb(cs, wc,
|
||||
(uchar*) min_str, (uchar*) min_end)) <= 0)
|
||||
goto pad_set_lengths; /* No space */
|
||||
min_str+= res;
|
||||
|
||||
if ((res= cs->cset->wc_mb(cs, wc,
|
||||
(uchar*) max_str, (uchar*) max_end)) <= 0)
|
||||
goto pad_set_lengths; /* No space */
|
||||
max_str+= res;
|
||||
continue;
|
||||
}
|
||||
else if (wc == (my_wc_t) w_one)
|
||||
{
|
||||
if ((res= cs->cset->wc_mb(cs, cs->min_sort_char,
|
||||
(uchar*) min_str, (uchar*) min_end)) <= 0)
|
||||
goto pad_set_lengths;
|
||||
min_str+= res;
|
||||
|
||||
if ((res= cs->cset->wc_mb(cs, cs->max_sort_char,
|
||||
(uchar*) max_str, (uchar*) max_end)) <= 0)
|
||||
goto pad_set_lengths;
|
||||
max_str+= res;
|
||||
continue;
|
||||
}
|
||||
else if (wc == (my_wc_t) w_many)
|
||||
{
|
||||
/*
|
||||
Calculate length of keys:
|
||||
a\min\min... is the smallest possible string
|
||||
a\max\max... is the biggest possible string
|
||||
*/
|
||||
*min_length= ((cs->state & MY_CS_BINSORT) ?
|
||||
(size_t) (min_str - min_org) :
|
||||
res_length);
|
||||
*max_length= res_length;
|
||||
goto pad_min_max;
|
||||
}
|
||||
|
||||
if (have_contractions &&
|
||||
my_cs_can_be_contraction_head(cs, wc) &&
|
||||
(res= cs->cset->mb_wc(cs, &wc2, (uchar*) ptr, (uchar*) end)) > 0)
|
||||
{
|
||||
uint16 *weight;
|
||||
if ((wc2 == (my_wc_t) w_one || wc2 == (my_wc_t) w_many))
|
||||
{
|
||||
/* Contraction head followed by a wildcard */
|
||||
*min_length= *max_length= res_length;
|
||||
goto pad_min_max;
|
||||
}
|
||||
|
||||
if (my_cs_can_be_contraction_tail(cs, wc2) &&
|
||||
(weight= my_cs_contraction2_weight(cs, wc, wc2)) && weight[0])
|
||||
{
|
||||
/* Contraction found */
|
||||
if (charlen == 1)
|
||||
{
|
||||
/* contraction does not fit to result */
|
||||
*min_length= *max_length= res_length;
|
||||
goto pad_min_max;
|
||||
}
|
||||
|
||||
ptr+= res;
|
||||
charlen--;
|
||||
|
||||
/* Put contraction head */
|
||||
if ((res= cs->cset->wc_mb(cs, wc,
|
||||
(uchar*) min_str, (uchar*) min_end)) <= 0)
|
||||
goto pad_set_lengths;
|
||||
min_str+= res;
|
||||
|
||||
if ((res= cs->cset->wc_mb(cs, wc,
|
||||
(uchar*) max_str, (uchar*) max_end)) <= 0)
|
||||
goto pad_set_lengths;
|
||||
max_str+= res;
|
||||
wc= wc2; /* Prepare to put contraction tail */
|
||||
}
|
||||
}
|
||||
|
||||
/* Normal character, or contraction tail */
|
||||
if ((res= cs->cset->wc_mb(cs, wc,
|
||||
(uchar*) min_str, (uchar*) min_end)) <= 0)
|
||||
goto pad_set_lengths;
|
||||
min_str+= res;
|
||||
if ((res= cs->cset->wc_mb(cs, wc,
|
||||
(uchar*) max_str, (uchar*) max_end)) <= 0)
|
||||
goto pad_set_lengths;
|
||||
max_str+= res;
|
||||
}
|
||||
|
||||
pad_set_lengths:
|
||||
*min_length= (size_t) (min_str - min_org);
|
||||
*max_length= (size_t) (max_str - max_org);
|
||||
|
||||
pad_min_max:
|
||||
/*
|
||||
Fill up max_str and min_str to res_length.
|
||||
fill() cannot set incomplete characters and
|
||||
requires that "length" argument is divisible to mbminlen.
|
||||
Make sure to call fill() with proper "length" argument.
|
||||
*/
|
||||
res_length_diff= res_length % cs->mbminlen;
|
||||
cs->cset->fill(cs, min_str, min_end - min_str - res_length_diff,
|
||||
cs->min_sort_char);
|
||||
cs->cset->fill(cs, max_str, max_end - max_str - res_length_diff,
|
||||
cs->max_sort_char);
|
||||
|
||||
/* In case of incomplete characters set the remainder to 0x00's */
|
||||
if (res_length_diff)
|
||||
{
|
||||
/* Example: odd res_length for ucs2 */
|
||||
memset(min_end - res_length_diff, 0, res_length_diff);
|
||||
memset(max_end - res_length_diff, 0, res_length_diff);
|
||||
}
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
my_wildcmp_mb_bin(CHARSET_INFO *cs,
|
||||
const char *str,const char *str_end,
|
||||
|
|
|
@ -8127,7 +8127,7 @@ MY_COLLATION_HANDLER my_collation_ucs2_uca_handler =
|
|||
my_strnncollsp_ucs2_uca,
|
||||
my_strnxfrm_ucs2_uca,
|
||||
my_strnxfrmlen_simple,
|
||||
my_like_range_ucs2,
|
||||
my_like_range_generic,
|
||||
my_wildcmp_uca,
|
||||
NULL,
|
||||
my_instr_mb,
|
||||
|
@ -10134,7 +10134,7 @@ MY_COLLATION_HANDLER my_collation_utf32_uca_handler =
|
|||
my_strnncollsp_any_uca,
|
||||
my_strnxfrm_any_uca,
|
||||
my_strnxfrmlen_simple,
|
||||
my_like_range_utf32,
|
||||
my_like_range_generic,
|
||||
my_wildcmp_uca,
|
||||
NULL,
|
||||
my_instr_mb,
|
||||
|
@ -10801,7 +10801,7 @@ MY_COLLATION_HANDLER my_collation_utf16_uca_handler =
|
|||
my_strnncollsp_any_uca,
|
||||
my_strnxfrm_any_uca,
|
||||
my_strnxfrmlen_simple,
|
||||
my_like_range_utf16,
|
||||
my_like_range_generic,
|
||||
my_wildcmp_uca,
|
||||
NULL,
|
||||
my_instr_mb,
|
||||
|
|
|
@ -903,7 +903,8 @@ static void
|
|||
my_fill_mb2(CHARSET_INFO *cs __attribute__((unused)),
|
||||
char *s, size_t l, int fill)
|
||||
{
|
||||
for ( ; l >= 2; s[0]= 0, s[1]= fill, s+= 2, l-= 2);
|
||||
DBUG_ASSERT(fill <= 0xFFFF);
|
||||
for ( ; l >= 2; s[0]= (fill >> 8), s[1]= (fill & 0xFF), s+= 2, l-= 2);
|
||||
}
|
||||
|
||||
|
||||
|
@ -1563,98 +1564,6 @@ my_hash_sort_utf16_bin(CHARSET_INFO *cs __attribute__((unused)),
|
|||
}
|
||||
|
||||
|
||||
/**
|
||||
Calculate min_str and max_str that ranges a LIKE string.
|
||||
|
||||
@param ptr Pointer to LIKE pattern.
|
||||
@param ptr_length Length of LIKE pattern.
|
||||
@param escape Escape character in LIKE. (Normally '\').
|
||||
All escape characters should be removed
|
||||
from min_str and max_str.
|
||||
@param res_length Length of min_str and max_str.
|
||||
@param min_str Smallest case sensitive string that ranges LIKE.
|
||||
Should be space padded to res_length.
|
||||
@param max_str Largest case sensitive string that ranges LIKE.
|
||||
Normally padded with the biggest character sort value.
|
||||
|
||||
@return Optimization status.
|
||||
@retval FALSE if LIKE pattern can be optimized
|
||||
@rerval TRUE if LIKE can't be optimized.
|
||||
*/
|
||||
|
||||
my_bool
|
||||
my_like_range_utf16(CHARSET_INFO *cs,
|
||||
const char *ptr, size_t ptr_length,
|
||||
pbool escape, pbool w_one, pbool w_many,
|
||||
size_t res_length,
|
||||
char *min_str,char *max_str,
|
||||
size_t *min_length,size_t *max_length)
|
||||
{
|
||||
const char *end=ptr+ptr_length;
|
||||
char *min_org=min_str;
|
||||
char *min_end=min_str+res_length;
|
||||
size_t charlen= res_length / cs->mbmaxlen;
|
||||
|
||||
for ( ; ptr + 1 < end && min_str + 1 < min_end && charlen > 0
|
||||
; ptr+=2, charlen--)
|
||||
{
|
||||
if (ptr[0] == '\0' && ptr[1] == escape && ptr + 1 < end)
|
||||
{
|
||||
ptr+=2; /* Skip escape */
|
||||
*min_str++= *max_str++ = ptr[0];
|
||||
*min_str++= *max_str++ = ptr[1];
|
||||
continue;
|
||||
}
|
||||
if (ptr[0] == '\0' && ptr[1] == w_one) /* '_' in SQL */
|
||||
{
|
||||
*min_str++= (char) (cs->min_sort_char >> 8);
|
||||
*min_str++= (char) (cs->min_sort_char & 255);
|
||||
*max_str++= (char) (cs->max_sort_char >> 8);
|
||||
*max_str++= (char) (cs->max_sort_char & 255);
|
||||
continue;
|
||||
}
|
||||
if (ptr[0] == '\0' && ptr[1] == w_many) /* '%' in SQL */
|
||||
{
|
||||
/*
|
||||
Calculate length of keys:
|
||||
'a\0\0... is the smallest possible string when we have space expand
|
||||
a\ff\ff... is the biggest possible string
|
||||
*/
|
||||
*min_length= ((cs->state & MY_CS_BINSORT) ? (size_t) (min_str - min_org) :
|
||||
res_length);
|
||||
*max_length= res_length;
|
||||
do {
|
||||
*min_str++ = 0;
|
||||
*min_str++ = 0;
|
||||
*max_str++ = (char) (cs->max_sort_char >> 8);
|
||||
*max_str++ = (char) (cs->max_sort_char & 255);
|
||||
} while (min_str + 1 < min_end);
|
||||
return FALSE;
|
||||
}
|
||||
*min_str++= *max_str++ = ptr[0];
|
||||
*min_str++= *max_str++ = ptr[1];
|
||||
}
|
||||
|
||||
/* Temporary fix for handling w_one at end of string (key compression) */
|
||||
{
|
||||
char *tmp;
|
||||
for (tmp= min_str ; tmp-1 > min_org && tmp[-1] == '\0' && tmp[-2]=='\0';)
|
||||
{
|
||||
*--tmp=' ';
|
||||
*--tmp='\0';
|
||||
}
|
||||
}
|
||||
|
||||
*min_length= *max_length = (size_t) (min_str - min_org);
|
||||
while (min_str + 1 < min_end)
|
||||
{
|
||||
*min_str++ = *max_str++ = '\0';
|
||||
*min_str++ = *max_str++ = ' '; /* Because if key compression */
|
||||
}
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
|
||||
static MY_COLLATION_HANDLER my_collation_utf16_general_ci_handler =
|
||||
{
|
||||
NULL, /* init */
|
||||
|
@ -1662,7 +1571,7 @@ static MY_COLLATION_HANDLER my_collation_utf16_general_ci_handler =
|
|||
my_strnncollsp_utf16,
|
||||
my_strnxfrm_unicode,
|
||||
my_strnxfrmlen_simple,
|
||||
my_like_range_utf16,
|
||||
my_like_range_generic,
|
||||
my_wildcmp_utf16_ci,
|
||||
my_strcasecmp_mb2_or_mb4,
|
||||
my_instr_mb,
|
||||
|
@ -1678,7 +1587,7 @@ static MY_COLLATION_HANDLER my_collation_utf16_bin_handler =
|
|||
my_strnncollsp_utf16_bin,
|
||||
my_strnxfrm_unicode_full_bin,
|
||||
my_strnxfrmlen_unicode_full_bin,
|
||||
my_like_range_utf16,
|
||||
my_like_range_generic,
|
||||
my_wildcmp_utf16_bin,
|
||||
my_strcasecmp_mb2_or_mb4,
|
||||
my_instr_mb,
|
||||
|
@ -2551,113 +2460,6 @@ my_strnncollsp_utf32_bin(CHARSET_INFO *cs __attribute__((unused)),
|
|||
}
|
||||
|
||||
|
||||
/**
|
||||
Calculate min_str and max_str that ranges a LIKE string.
|
||||
|
||||
@param ptr Pointer to LIKE pattern.
|
||||
@param ptr_length Length of LIKE pattern.
|
||||
@param escape Escape character in LIKE. (Normally '\').
|
||||
All escape characters should be removed
|
||||
from min_str and max_str.
|
||||
@param res_length Length of min_str and max_str.
|
||||
@param min_str Smallest case sensitive string that ranges LIKE.
|
||||
Should be space padded to res_length.
|
||||
@param max_str Largest case sensitive string that ranges LIKE.
|
||||
Normally padded with the biggest character sort value.
|
||||
|
||||
@return Optimization status.
|
||||
@retval FALSE if LIKE pattern can be optimized
|
||||
@rerval TRUE if LIKE can't be optimized.
|
||||
*/
|
||||
|
||||
my_bool
|
||||
my_like_range_utf32(CHARSET_INFO *cs,
|
||||
const char *ptr, size_t ptr_length,
|
||||
pbool escape, pbool w_one, pbool w_many,
|
||||
size_t res_length,
|
||||
char *min_str,char *max_str,
|
||||
size_t *min_length,size_t *max_length)
|
||||
{
|
||||
const char *end= ptr + ptr_length;
|
||||
char *min_org= min_str;
|
||||
char *min_end= min_str + res_length;
|
||||
char *max_end= max_str + res_length;
|
||||
size_t charlen= res_length / cs->mbmaxlen;
|
||||
|
||||
DBUG_ASSERT((res_length % 4) == 0);
|
||||
|
||||
for ( ; charlen > 0; ptr+= 4, charlen--)
|
||||
{
|
||||
my_wc_t wc;
|
||||
int res;
|
||||
if ((res= my_utf32_uni(cs, &wc, (uchar*) ptr, (uchar*) end)) < 0)
|
||||
{
|
||||
my_fill_utf32(cs, min_str, min_end - min_str, cs->min_sort_char);
|
||||
my_fill_utf32(cs, max_str, min_end - min_str, cs->max_sort_char);
|
||||
/* min_length and max_legnth are not important */
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
if (wc == (my_wc_t) escape)
|
||||
{
|
||||
ptr+= 4; /* Skip escape */
|
||||
if ((res= my_utf32_uni(cs, &wc, (uchar*) ptr, (uchar*) end)) < 0)
|
||||
{
|
||||
my_fill_utf32(cs, min_str, min_end - min_str, cs->min_sort_char);
|
||||
my_fill_utf32(cs, max_str, max_end - min_str, cs->max_sort_char);
|
||||
/* min_length and max_length are not important */
|
||||
return TRUE;
|
||||
}
|
||||
if (my_uni_utf32(cs, wc, (uchar*) min_str, (uchar*) min_end) != 4 ||
|
||||
my_uni_utf32(cs, wc, (uchar*) max_str, (uchar*) max_end) != 4)
|
||||
goto pad_set_lengths;
|
||||
*min_str++= 4;
|
||||
*max_str++= 4;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (wc == (my_wc_t) w_one)
|
||||
{
|
||||
if (my_uni_utf32(cs, cs->min_sort_char, (uchar*) min_str, (uchar*) min_end) != 4 ||
|
||||
my_uni_utf32(cs, cs->max_sort_char, (uchar*) max_str, (uchar*) max_end) != 4)
|
||||
goto pad_set_lengths;
|
||||
min_str+= 4;
|
||||
max_str+= 4;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (wc == (my_wc_t) w_many)
|
||||
{
|
||||
/*
|
||||
Calculate length of keys:
|
||||
'a\0\0... is the smallest possible string when we have space expand
|
||||
a\ff\ff... is the biggest possible string
|
||||
*/
|
||||
*min_length= ((cs->state & MY_CS_BINSORT) ?
|
||||
(size_t) (min_str - min_org) :
|
||||
res_length);
|
||||
*max_length= res_length;
|
||||
goto pad_min_max;
|
||||
}
|
||||
|
||||
/* Normal character */
|
||||
if (my_uni_utf32(cs, wc, (uchar*) min_str, (uchar*) min_end) != 4 ||
|
||||
my_uni_utf32(cs, wc, (uchar*) max_str, (uchar*) max_end) != 4)
|
||||
goto pad_set_lengths;
|
||||
min_str+= 4;
|
||||
max_str+= 4;
|
||||
}
|
||||
|
||||
pad_set_lengths:
|
||||
*min_length= *max_length= (size_t) (min_str - min_org);
|
||||
|
||||
pad_min_max:
|
||||
my_fill_utf32(cs, min_str, min_end - min_str, cs->min_sort_char);
|
||||
my_fill_utf32(cs, max_str, max_end - max_str, cs->max_sort_char);
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
|
||||
static size_t
|
||||
my_scan_utf32(CHARSET_INFO *cs,
|
||||
const char *str, const char *end, int sequence_type)
|
||||
|
@ -2689,7 +2491,7 @@ static MY_COLLATION_HANDLER my_collation_utf32_general_ci_handler =
|
|||
my_strnncollsp_utf32,
|
||||
my_strnxfrm_unicode,
|
||||
my_strnxfrmlen_utf32,
|
||||
my_like_range_utf32,
|
||||
my_like_range_generic,
|
||||
my_wildcmp_utf32_ci,
|
||||
my_strcasecmp_mb2_or_mb4,
|
||||
my_instr_mb,
|
||||
|
@ -2705,7 +2507,7 @@ static MY_COLLATION_HANDLER my_collation_utf32_bin_handler =
|
|||
my_strnncollsp_utf32_bin,
|
||||
my_strnxfrm_unicode_full_bin,
|
||||
my_strnxfrmlen_unicode_full_bin,
|
||||
my_like_range_utf32,
|
||||
my_like_range_generic,
|
||||
my_wildcmp_utf32_bin,
|
||||
my_strcasecmp_mb2_or_mb4,
|
||||
my_instr_mb,
|
||||
|
@ -3252,120 +3054,6 @@ void my_hash_sort_ucs2_bin(CHARSET_INFO *cs __attribute__((unused)),
|
|||
}
|
||||
}
|
||||
|
||||
/*
|
||||
** Calculate min_str and max_str that ranges a LIKE string.
|
||||
** Arguments:
|
||||
** ptr Pointer to LIKE string.
|
||||
** ptr_length Length of LIKE string.
|
||||
** escape Escape character in LIKE. (Normally '\').
|
||||
** All escape characters should be removed from min_str and max_str
|
||||
** res_length Length of min_str and max_str.
|
||||
** min_str Smallest case sensitive string that ranges LIKE.
|
||||
** Should be space padded to res_length.
|
||||
** max_str Largest case sensitive string that ranges LIKE.
|
||||
** Normally padded with the biggest character sort value.
|
||||
**
|
||||
** The function should return 0 if ok and 1 if the LIKE string can't be
|
||||
** optimized !
|
||||
*/
|
||||
|
||||
my_bool my_like_range_ucs2(CHARSET_INFO *cs,
|
||||
const char *ptr, size_t ptr_length,
|
||||
pbool escape, pbool w_one, pbool w_many,
|
||||
size_t res_length,
|
||||
char *min_str,char *max_str,
|
||||
size_t *min_length,size_t *max_length)
|
||||
{
|
||||
const char *end=ptr+ptr_length;
|
||||
char *min_org=min_str;
|
||||
char *min_end=min_str+res_length;
|
||||
size_t charlen= res_length / cs->mbmaxlen;
|
||||
const char *contraction_flags= cs->contractions ?
|
||||
((const char*) cs->contractions) + 0x40*0x40 : NULL;
|
||||
|
||||
for ( ; ptr + 1 < end && min_str + 1 < min_end && charlen > 0
|
||||
; ptr+=2, charlen--)
|
||||
{
|
||||
if (ptr[0] == '\0' && ptr[1] == escape && ptr + 1 < end)
|
||||
{
|
||||
ptr+=2; /* Skip escape */
|
||||
*min_str++= *max_str++ = ptr[0];
|
||||
*min_str++= *max_str++ = ptr[1];
|
||||
continue;
|
||||
}
|
||||
if (ptr[0] == '\0' && ptr[1] == w_one) /* '_' in SQL */
|
||||
{
|
||||
*min_str++= (char) (cs->min_sort_char >> 8);
|
||||
*min_str++= (char) (cs->min_sort_char & 255);
|
||||
*max_str++= (char) (cs->max_sort_char >> 8);
|
||||
*max_str++= (char) (cs->max_sort_char & 255);
|
||||
continue;
|
||||
}
|
||||
if (ptr[0] == '\0' && ptr[1] == w_many) /* '%' in SQL */
|
||||
{
|
||||
fill_max_and_min:
|
||||
/*
|
||||
Calculate length of keys:
|
||||
'a\0\0... is the smallest possible string when we have space expand
|
||||
a\ff\ff... is the biggest possible string
|
||||
*/
|
||||
*min_length= ((cs->state & MY_CS_BINSORT) ? (size_t) (min_str - min_org) :
|
||||
res_length);
|
||||
*max_length= res_length;
|
||||
do {
|
||||
*min_str++ = 0;
|
||||
*min_str++ = 0;
|
||||
*max_str++ = (char) (cs->max_sort_char >> 8);
|
||||
*max_str++ = (char) (cs->max_sort_char & 255);
|
||||
} while (min_str + 1 < min_end);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (contraction_flags && ptr + 3 < end &&
|
||||
ptr[0] == '\0' && contraction_flags[(uchar) ptr[1]])
|
||||
{
|
||||
/* Contraction head found */
|
||||
if (ptr[2] == '\0' && (ptr[3] == w_one || ptr[3] == w_many))
|
||||
{
|
||||
/* Contraction head followed by a wildcard, quit */
|
||||
goto fill_max_and_min;
|
||||
}
|
||||
|
||||
/*
|
||||
Check if the second letter can be contraction part,
|
||||
and if two letters really produce a contraction.
|
||||
*/
|
||||
if (ptr[2] == '\0' && contraction_flags[(uchar) ptr[3]] &&
|
||||
cs->contractions[(ptr[1]-0x40)*0x40 + ptr[3] - 0x40])
|
||||
{
|
||||
/* Contraction found */
|
||||
if (charlen == 1 || min_str + 2 >= min_end)
|
||||
{
|
||||
/* Full contraction doesn't fit, quit */
|
||||
goto fill_max_and_min;
|
||||
}
|
||||
|
||||
/* Put contraction head */
|
||||
*min_str++= *max_str++= *ptr++;
|
||||
*min_str++= *max_str++= *ptr++;
|
||||
charlen--;
|
||||
}
|
||||
}
|
||||
/* Put contraction tail, or a single character */
|
||||
*min_str++= *max_str++ = ptr[0];
|
||||
*min_str++= *max_str++ = ptr[1];
|
||||
}
|
||||
|
||||
*min_length= *max_length = (size_t) (min_str - min_org);
|
||||
while (min_str + 1 < min_end)
|
||||
{
|
||||
*min_str++ = *max_str++ = '\0';
|
||||
*min_str++ = *max_str++ = ' '; /* Because if key compression */
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
static MY_COLLATION_HANDLER my_collation_ucs2_general_ci_handler =
|
||||
{
|
||||
|
@ -3374,7 +3062,7 @@ static MY_COLLATION_HANDLER my_collation_ucs2_general_ci_handler =
|
|||
my_strnncollsp_ucs2,
|
||||
my_strnxfrm_unicode,
|
||||
my_strnxfrmlen_simple,
|
||||
my_like_range_ucs2,
|
||||
my_like_range_generic,
|
||||
my_wildcmp_ucs2_ci,
|
||||
my_strcasecmp_mb2_or_mb4,
|
||||
my_instr_mb,
|
||||
|
@ -3390,7 +3078,7 @@ static MY_COLLATION_HANDLER my_collation_ucs2_bin_handler =
|
|||
my_strnncollsp_ucs2_bin,
|
||||
my_strnxfrm_unicode,
|
||||
my_strnxfrmlen_simple,
|
||||
my_like_range_ucs2,
|
||||
my_like_range_generic,
|
||||
my_wildcmp_ucs2_bin,
|
||||
my_strcasecmp_mb2_or_mb4,
|
||||
my_instr_mb,
|
||||
|
|
Loading…
Reference in a new issue