Bug#57737 Character sets: search fails with like, contraction, index

Problem: LIKE over an indexed column optimized away good results,
because my_like_range_utf32/utf16 returned wrong ranges for contractions.
Contraction related code was missing in my_like_range_utf32/utf16,
but did exist in my_like_range_ucs2/utf8.
It was forgotten in utf32/utf16 versions (during mysql-6.0 push/revert mess).

Fix:
The patch removes individual functions my_like_range_ucs2,
my_like_range_utf16, my_like_range_utf32 and introduces a single function
my_like_range_generic() instead. The new function handles contractions
correctly. It can handle any character set with cs->min_sort_char and
cs->max_sort_char represented in Unicode code points.

added:
  @ mysql-test/include/ctype_czech.inc
  @ mysql-test/include/ctype_like_ignorable.inc
  @ mysql-test/r/ctype_like_range.result
  @ mysql-test/t/ctype_like_range.test
  Adding tests


modified:

  @ include/m_ctype.h
  - Adding helper functions for contractions.
  - Prototypes: removing ucs2,utf16,utf32 functions, adding generic function.
  @ mysql-test/r/ctype_uca.result
  @ mysql-test/r/ctype_utf16_uca.result
  @ mysql-test/r/ctype_utf32_uca.result
  @ mysql-test/t/ctype_uca.test
  @ mysql-test/t/ctype_utf16_uca.test
  @ mysql-test/t/ctype_utf32_uca.test
  - Adding tests.

  @ strings/ctype-mb.c
  - Pad function did not put the last character.
  - Implementing my_like_range_generic() - an universal replacement
    for three separate functions
    my_like_range_ucs2(), my_like_range_utf16() and my_like_range_utf32(),
    with correct contraction handling.

  @ strings/ctype-ucs2.c
  - my_fill_mb2 did not put the high byte, as previously
    it was used to put only characters in ASCII range.
    Now it puts high byte as well
    (needed to pupulate cs->max_sort_char correctly).
  - Adding DBUG_ASSERT()
  - Removing character set specific functions:
    my_like_range_ucs2(), my_like_range_utf16() and my_like_range_utf32().
  - Using my_like_range_generic() instead of the old functions.

  @ strings/ctype-uca.c
  - Using generic function instead of the old character set specific ones.

  @ sql/item_create.cc
  @ sql/item_strfunc.cc
  @ sql/item_strfunc.h
  - Adding SQL functions LIKE_RANGE_MIN and LIKE_RANGE_MAX,
    available only in debug build to make sure like_range()
    works correctly for all character sets and collations.
This commit is contained in:
Alexander Barkov 2010-11-26 13:44:39 +03:00
parent ce441751ed
commit e3dee8a7fd
17 changed files with 3001 additions and 344 deletions

View file

@ -356,6 +356,32 @@ extern CHARSET_INFO my_charset_utf8mb4_unicode_ci;
#define MY_UTF8MB4 "utf8mb4"
/* Helper functions to handle contraction */
static inline my_bool
my_cs_have_contractions(CHARSET_INFO *cs)
{
return cs->contractions != NULL;
}
static inline my_bool
my_cs_can_be_contraction_head(CHARSET_INFO *cs, my_wc_t wc)
{
return ((const char *)cs->contractions)[0x40*0x40 + (wc & 0xFF)];
}
static inline my_bool
my_cs_can_be_contraction_tail(CHARSET_INFO *cs, my_wc_t wc)
{
return ((const char *)cs->contractions)[0x40*0x40 + (wc & 0xFF)];
}
static inline uint16*
my_cs_contraction2_weight(CHARSET_INFO *cs, my_wc_t wc1, my_wc_t wc2)
{
return &cs->contractions[(wc1 - 0x40) * 0x40 + wc2 - 0x40];
}
/* declarations for simple charsets */
extern size_t my_strnxfrm_simple(CHARSET_INFO *, uchar *, size_t,
const uchar *, size_t);
@ -430,6 +456,7 @@ ulonglong my_strntoull10rnd_ucs2(CHARSET_INFO *cs,
void my_fill_8bit(CHARSET_INFO *cs, char* to, size_t l, int fill);
/* For 8-bit character set */
my_bool my_like_range_simple(CHARSET_INFO *cs,
const char *ptr, size_t ptr_length,
pbool escape, pbool w_one, pbool w_many,
@ -437,6 +464,7 @@ my_bool my_like_range_simple(CHARSET_INFO *cs,
char *min_str, char *max_str,
size_t *min_length, size_t *max_length);
/* For ASCII-based multi-byte character sets with mbminlen=1 */
my_bool my_like_range_mb(CHARSET_INFO *cs,
const char *ptr, size_t ptr_length,
pbool escape, pbool w_one, pbool w_many,
@ -444,26 +472,13 @@ my_bool my_like_range_mb(CHARSET_INFO *cs,
char *min_str, char *max_str,
size_t *min_length, size_t *max_length);
my_bool my_like_range_ucs2(CHARSET_INFO *cs,
const char *ptr, size_t ptr_length,
pbool escape, pbool w_one, pbool w_many,
size_t res_length,
char *min_str, char *max_str,
size_t *min_length, size_t *max_length);
my_bool my_like_range_utf16(CHARSET_INFO *cs,
const char *ptr, size_t ptr_length,
pbool escape, pbool w_one, pbool w_many,
size_t res_length,
char *min_str, char *max_str,
size_t *min_length, size_t *max_length);
my_bool my_like_range_utf32(CHARSET_INFO *cs,
const char *ptr, size_t ptr_length,
pbool escape, pbool w_one, pbool w_many,
size_t res_length,
char *min_str, char *max_str,
size_t *min_length, size_t *max_length);
/* For other character sets, with arbitrary mbminlen and mbmaxlen numbers */
my_bool my_like_range_generic(CHARSET_INFO *cs,
const char *ptr, size_t ptr_length,
pbool escape, pbool w_one, pbool w_many,
size_t res_length,
char *min_str, char *max_str,
size_t *min_length, size_t *max_length);
int my_wildcmp_8bit(CHARSET_INFO *,
const char *str,const char *str_end,

View file

@ -0,0 +1,12 @@
SELECT @@collation_connection;
--echo #
--echo # Bug#57737 Character sets: search fails with like, contraction, index
--echo #
CREATE TABLE t1 AS SELECT REPEAT(' ', 10) AS s1 LIMIT 0;
INSERT INTO t1 VALUES ('c'),('ce'),('cé'),('ch');
SELECT * FROM t1 WHERE s1 LIKE 'c%';
ALTER TABLE t1 ADD KEY s1 (s1);
SELECT * FROM t1 WHERE s1 LIKE 'c%';
ALTER TABLE t1 DROP KEY s1, ADD KEY(s1(1));
SELECT * FROM t1 WHERE s1 LIKE 'ch';
DROP TABLE t1;

View file

@ -0,0 +1,11 @@
SELECT @@collation_connection;
--echo #
--echo # Bug#57737 Character sets: search fails with like, contraction, index
--echo # Part#2 - ignorable characters
--echo #
CREATE TABLE t1 AS SELECT REPEAT(' ', 10) AS s1 LIMIT 0;
INSERT INTO t1 VALUES ('a\0\0\0\0\0\t'),('a'),('b'),('c'),('d'),('e');
SELECT HEX(s1) FROM t1 WHERE s1 LIKE 'a%';
ALTER TABLE t1 ADD KEY s1 (s1);
SELECT HEX(s1) FROM t1 WHERE s1 LIKE 'a%';
DROP TABLE t1;

File diff suppressed because it is too large Load diff

View file

@ -2888,3 +2888,101 @@ a hex(b) c
DROP TABLE t1;
set names utf8;
End for 5.0 tests
#
# Start of 5.5 tests
#
SET collation_connection=utf8_czech_ci;
SELECT @@collation_connection;
@@collation_connection
utf8_czech_ci
#
# Bug#57737 Character sets: search fails with like, contraction, index
#
CREATE TABLE t1 AS SELECT REPEAT(' ', 10) AS s1 LIMIT 0;
INSERT INTO t1 VALUES ('c'),('ce'),('cé'),('ch');
SELECT * FROM t1 WHERE s1 LIKE 'c%';
s1
c
ce
ch
ALTER TABLE t1 ADD KEY s1 (s1);
SELECT * FROM t1 WHERE s1 LIKE 'c%';
s1
c
ce
ch
ALTER TABLE t1 DROP KEY s1, ADD KEY(s1(1));
SELECT * FROM t1 WHERE s1 LIKE 'ch';
s1
ch
DROP TABLE t1;
SELECT @@collation_connection;
@@collation_connection
utf8_czech_ci
#
# Bug#57737 Character sets: search fails with like, contraction, index
# Part#2 - ignorable characters
#
CREATE TABLE t1 AS SELECT REPEAT(' ', 10) AS s1 LIMIT 0;
INSERT INTO t1 VALUES ('a\0\0\0\0\0\t'),('a'),('b'),('c'),('d'),('e');
SELECT HEX(s1) FROM t1 WHERE s1 LIKE 'a%';
HEX(s1)
61000000000009
61
ALTER TABLE t1 ADD KEY s1 (s1);
SELECT HEX(s1) FROM t1 WHERE s1 LIKE 'a%';
HEX(s1)
61000000000009
61
DROP TABLE t1;
SET collation_connection=ucs2_czech_ci;
SELECT @@collation_connection;
@@collation_connection
ucs2_czech_ci
#
# Bug#57737 Character sets: search fails with like, contraction, index
#
CREATE TABLE t1 AS SELECT REPEAT(' ', 10) AS s1 LIMIT 0;
INSERT INTO t1 VALUES ('c'),('ce'),('cé'),('ch');
SELECT * FROM t1 WHERE s1 LIKE 'c%';
s1
c
ce
ch
ALTER TABLE t1 ADD KEY s1 (s1);
SELECT * FROM t1 WHERE s1 LIKE 'c%';
s1
c
ce
ch
ALTER TABLE t1 DROP KEY s1, ADD KEY(s1(1));
SELECT * FROM t1 WHERE s1 LIKE 'ch';
s1
ch
DROP TABLE t1;
SELECT @@collation_connection;
@@collation_connection
ucs2_czech_ci
#
# Bug#57737 Character sets: search fails with like, contraction, index
# Part#2 - ignorable characters
#
CREATE TABLE t1 AS SELECT REPEAT(' ', 10) AS s1 LIMIT 0;
INSERT INTO t1 VALUES ('a\0\0\0\0\0\t'),('a'),('b'),('c'),('d'),('e');
SELECT HEX(s1) FROM t1 WHERE s1 LIKE 'a%';
HEX(s1)
0061000000000000000000000009
0061
ALTER TABLE t1 ADD KEY s1 (s1);
SELECT HEX(s1) FROM t1 WHERE s1 LIKE 'a%';
HEX(s1)
0061000000000000000000000009
0061
DROP TABLE t1;
#
# End of 5.5 tests
#

View file

@ -2368,6 +2368,52 @@ NULL
NULL
NULL
drop table t1;
SET collation_connection=utf16_czech_ci;
SELECT @@collation_connection;
@@collation_connection
utf16_czech_ci
#
# Bug#57737 Character sets: search fails with like, contraction, index
#
CREATE TABLE t1 AS SELECT REPEAT(' ', 10) AS s1 LIMIT 0;
INSERT INTO t1 VALUES ('c'),('ce'),('cé'),('ch');
SELECT * FROM t1 WHERE s1 LIKE 'c%';
s1
c
ce
ch
ALTER TABLE t1 ADD KEY s1 (s1);
SELECT * FROM t1 WHERE s1 LIKE 'c%';
s1
c
ce
ch
ALTER TABLE t1 DROP KEY s1, ADD KEY(s1(1));
SELECT * FROM t1 WHERE s1 LIKE 'ch';
s1
ch
DROP TABLE t1;
SELECT @@collation_connection;
@@collation_connection
utf16_czech_ci
#
# Bug#57737 Character sets: search fails with like, contraction, index
# Part#2 - ignorable characters
#
CREATE TABLE t1 AS SELECT REPEAT(' ', 10) AS s1 LIMIT 0;
INSERT INTO t1 VALUES ('a\0\0\0\0\0\t'),('a'),('b'),('c'),('d'),('e');
SELECT HEX(s1) FROM t1 WHERE s1 LIKE 'a%';
HEX(s1)
0061000000000000000000000009
0061
ALTER TABLE t1 ADD KEY s1 (s1);
SELECT HEX(s1) FROM t1 WHERE s1 LIKE 'a%';
HEX(s1)
0061000000000000000000000009
0061
DROP TABLE t1;
#
# End of 5.5 tests
#

View file

@ -2368,6 +2368,52 @@ NULL
NULL
NULL
drop table t1;
SET collation_connection=utf32_czech_ci;
SELECT @@collation_connection;
@@collation_connection
utf32_czech_ci
#
# Bug#57737 Character sets: search fails with like, contraction, index
#
CREATE TABLE t1 AS SELECT REPEAT(' ', 10) AS s1 LIMIT 0;
INSERT INTO t1 VALUES ('c'),('ce'),('cé'),('ch');
SELECT * FROM t1 WHERE s1 LIKE 'c%';
s1
c
ce
ch
ALTER TABLE t1 ADD KEY s1 (s1);
SELECT * FROM t1 WHERE s1 LIKE 'c%';
s1
c
ce
ch
ALTER TABLE t1 DROP KEY s1, ADD KEY(s1(1));
SELECT * FROM t1 WHERE s1 LIKE 'ch';
s1
ch
DROP TABLE t1;
SELECT @@collation_connection;
@@collation_connection
utf32_czech_ci
#
# Bug#57737 Character sets: search fails with like, contraction, index
# Part#2 - ignorable characters
#
CREATE TABLE t1 AS SELECT REPEAT(' ', 10) AS s1 LIMIT 0;
INSERT INTO t1 VALUES ('a\0\0\0\0\0\t'),('a'),('b'),('c'),('d'),('e');
SELECT HEX(s1) FROM t1 WHERE s1 LIKE 'a%';
HEX(s1)
00000061000000000000000000000000000000000000000000000009
00000061
ALTER TABLE t1 ADD KEY s1 (s1);
SELECT HEX(s1) FROM t1 WHERE s1 LIKE 'a%';
HEX(s1)
00000061000000000000000000000000000000000000000000000009
00000061
DROP TABLE t1;
#
# End of 5.5 tests
#

View file

@ -0,0 +1,87 @@
--source include/have_debug.inc
--source include/have_ucs2.inc
--source include/have_utf16.inc
--source include/have_utf32.inc
--disable_warnings
DROP TABLE IF EXISTS t1;
DROP VIEW IF EXISTS v1;
--enable_warnings
CREATE TABLE t1 (id INT NOT NULL AUTO_INCREMENT PRIMARY KEY, a VARBINARY(32));
INSERT INTO t1 (a) VALUES (''),('_'),('%'),('\_'),('\%'),('\\');
INSERT INTO t1 (a) VALUES ('a'),('c');
INSERT INTO t1 (a) VALUES ('a_'),('c_');
INSERT INTO t1 (a) VALUES ('a%'),('c%');
INSERT INTO t1 (a) VALUES ('aa'),('cc'),('ch');
INSERT INTO t1 (a) VALUES ('aa_'),('cc_'),('ch_');
INSERT INTO t1 (a) VALUES ('aa%'),('cc%'),('ch%');
INSERT INTO t1 (a) VALUES ('aaa'),('ccc'),('cch');
INSERT INTO t1 (a) VALUES ('aaa_'),('ccc_'),('cch_');
INSERT INTO t1 (a) VALUES ('aaa%'),('ccc%'),('cch%');
INSERT INTO t1 (a) VALUES ('aaaaaaaaaaaaaaaaaaaa');
CREATE VIEW v1 AS
SELECT id, 'a' AS name, a AS val FROM t1
UNION
SELECT id, 'mn', HEX(LIKE_RANGE_MIN(a, 16)) AS min FROM t1
UNION
SELECT id, 'mx', HEX(LIKE_RANGE_MAX(a, 16)) AS max FROM t1
UNION
SELECT id, 'sp', REPEAT('-', 32) AS sep FROM t1
ORDER BY id, name;
SELECT * FROM v1;
ALTER TABLE t1 MODIFY a VARCHAR(32) CHARACTER SET latin1;
SELECT * FROM v1;
ALTER TABLE t1 MODIFY a VARCHAR(32) CHARACTER SET utf8;
SELECT * FROM v1;
ALTER TABLE t1 MODIFY a VARCHAR(32) CHARACTER SET utf8 COLLATE utf8_unicode_ci;
SELECT * FROM v1;
ALTER TABLE t1 MODIFY a VARCHAR(32) CHARACTER SET utf8 COLLATE utf8_czech_ci;
SELECT * FROM v1;
ALTER TABLE t1 MODIFY a VARCHAR(32) CHARACTER SET utf8 COLLATE utf8_danish_ci;
SELECT * FROM v1;
ALTER TABLE t1 MODIFY a VARCHAR(32) CHARACTER SET ucs2;
SELECT * FROM v1;
ALTER TABLE t1 MODIFY a VARCHAR(32) CHARACTER SET ucs2 COLLATE ucs2_unicode_ci;
SELECT * FROM v1;
ALTER TABLE t1 MODIFY a VARCHAR(32) CHARACTER SET ucs2 COLLATE ucs2_czech_ci;
SELECT * FROM v1;
ALTER TABLE t1 MODIFY a VARCHAR(32) CHARACTER SET ucs2 COLLATE ucs2_danish_ci;
SELECT * FROM v1;
ALTER TABLE t1 MODIFY a VARCHAR(32) CHARACTER SET utf16;
SELECT * FROM v1;
ALTER TABLE t1 MODIFY a VARCHAR(32) CHARACTER SET utf16 COLLATE utf16_unicode_ci;
SELECT * FROM v1;
ALTER TABLE t1 MODIFY a VARCHAR(32) CHARACTER SET utf16 COLLATE utf16_czech_ci;
SELECT * FROM v1;
ALTER TABLE t1 MODIFY a VARCHAR(32) CHARACTER SET utf16 COLLATE utf16_danish_ci;
SELECT * FROM v1;
ALTER TABLE t1 MODIFY a VARCHAR(32) CHARACTER SET utf32;
SELECT * FROM v1;
ALTER TABLE t1 MODIFY a VARCHAR(32) CHARACTER SET utf32 COLLATE utf32_unicode_ci;
SELECT * FROM v1;
ALTER TABLE t1 MODIFY a VARCHAR(32) CHARACTER SET utf32 COLLATE utf32_czech_ci;
SELECT * FROM v1;
ALTER TABLE t1 MODIFY a VARCHAR(32) CHARACTER SET utf32 COLLATE utf32_danish_ci;
SELECT * FROM v1;
DROP VIEW v1;
DROP TABLE t1;

View file

@ -545,3 +545,19 @@ set collation_connection=ucs2_unicode_ci;
set names utf8;
-- echo End for 5.0 tests
--echo #
--echo # Start of 5.5 tests
--echo #
#
# Test my_like_range and contractions
#
SET collation_connection=utf8_czech_ci;
--source include/ctype_czech.inc
--source include/ctype_like_ignorable.inc
SET collation_connection=ucs2_czech_ci;
--source include/ctype_czech.inc
--source include/ctype_like_ignorable.inc
--echo #
--echo # End of 5.5 tests
--echo #

View file

@ -284,6 +284,13 @@ DROP TABLE IF EXISTS t1;
set collation_connection=utf16_unicode_ci;
--source include/ctype_regex.inc
#
# Test my_like_range and contractions
#
SET collation_connection=utf16_czech_ci;
--source include/ctype_czech.inc
--source include/ctype_like_ignorable.inc
--echo #
--echo # End of 5.5 tests

View file

@ -286,6 +286,14 @@ set collation_connection=utf32_unicode_ci;
--source include/ctype_regex.inc
#
# Test my_like_range and contractions
#
SET collation_connection=utf32_czech_ci;
--source include/ctype_czech.inc
--source include/ctype_like_ignorable.inc
--echo #
--echo # End of 5.5 tests
--echo #

View file

@ -1330,6 +1330,34 @@ protected:
};
#ifndef DBUG_OFF
class Create_func_like_range_min : public Create_func_arg2
{
public:
virtual Item *create(THD *thd, Item *arg1, Item *arg2);
static Create_func_like_range_min s_singleton;
protected:
Create_func_like_range_min() {}
virtual ~Create_func_like_range_min() {}
};
class Create_func_like_range_max : public Create_func_arg2
{
public:
virtual Item *create(THD *thd, Item *arg1, Item *arg2);
static Create_func_like_range_max s_singleton;
protected:
Create_func_like_range_max() {}
virtual ~Create_func_like_range_max() {}
};
#endif
class Create_func_ln : public Create_func_arg1
{
public:
@ -3836,6 +3864,26 @@ Create_func_length::create(THD *thd, Item *arg1)
}
#ifndef DBUG_OFF
Create_func_like_range_min Create_func_like_range_min::s_singleton;
Item*
Create_func_like_range_min::create(THD *thd, Item *arg1, Item *arg2)
{
return new (thd->mem_root) Item_func_like_range_min(arg1, arg2);
}
Create_func_like_range_max Create_func_like_range_max::s_singleton;
Item*
Create_func_like_range_max::create(THD *thd, Item *arg1, Item *arg2)
{
return new (thd->mem_root) Item_func_like_range_max(arg1, arg2);
}
#endif
Create_func_ln Create_func_ln::s_singleton;
Item*
@ -4924,6 +4972,10 @@ static Native_func_registry func_array[] =
{ { C_STRING_WITH_LEN("LCASE") }, BUILDER(Create_func_lcase)},
{ { C_STRING_WITH_LEN("LEAST") }, BUILDER(Create_func_least)},
{ { C_STRING_WITH_LEN("LENGTH") }, BUILDER(Create_func_length)},
#ifndef DBUG_OFF
{ { C_STRING_WITH_LEN("LIKE_RANGE_MIN") }, BUILDER(Create_func_like_range_min)},
{ { C_STRING_WITH_LEN("LIKE_RANGE_MAX") }, BUILDER(Create_func_like_range_max)},
#endif
{ { C_STRING_WITH_LEN("LINEFROMTEXT") }, GEOM_BUILDER(Create_func_geometry_from_text)},
{ { C_STRING_WITH_LEN("LINEFROMWKB") }, GEOM_BUILDER(Create_func_geometry_from_wkb)},
{ { C_STRING_WITH_LEN("LINESTRINGFROMTEXT") }, GEOM_BUILDER(Create_func_geometry_from_text)},

View file

@ -3128,6 +3128,41 @@ String *Item_func_unhex::val_str(String *str)
}
#ifndef DBUG_OFF
String *Item_func_like_range::val_str(String *str)
{
DBUG_ASSERT(fixed == 1);
longlong nbytes= args[1]->val_int();
String *res= args[0]->val_str(str);
size_t min_len, max_len;
CHARSET_INFO *cs= collation.collation;
if (!res || args[0]->null_value || args[1]->null_value ||
nbytes < 0 || nbytes > MAX_BLOB_WIDTH ||
min_str.alloc(nbytes) || max_str.alloc(nbytes))
goto err;
null_value=0;
if (cs->coll->like_range(cs, res->ptr(), res->length(),
'\\', '_', '%', nbytes,
(char*) min_str.ptr(), (char*) max_str.ptr(),
&min_len, &max_len))
goto err;
min_str.set_charset(collation.collation);
max_str.set_charset(collation.collation);
min_str.length(min_len);
max_str.length(max_len);
return is_min ? &min_str : &max_str;
err:
null_value= 1;
return 0;
}
#endif
void Item_func_binary::print(String *str, enum_query_type query_type)
{
str->append(STRING_WITH_LEN("cast("));

View file

@ -657,6 +657,46 @@ public:
};
#ifndef DBUG_OFF
class Item_func_like_range :public Item_str_func
{
protected:
String min_str;
String max_str;
const bool is_min;
public:
Item_func_like_range(Item *a, Item *b, bool is_min_arg)
:Item_str_func(a, b), is_min(is_min_arg)
{ maybe_null= 1; }
String *val_str(String *);
void fix_length_and_dec()
{
collation.set(args[0]->collation);
decimals=0;
max_length= MAX_BLOB_WIDTH;
}
};
class Item_func_like_range_min :public Item_func_like_range
{
public:
Item_func_like_range_min(Item *a, Item *b)
:Item_func_like_range(a, b, true) { }
const char *func_name() const { return "like_range_min"; }
};
class Item_func_like_range_max :public Item_func_like_range
{
public:
Item_func_like_range_max(Item *a, Item *b)
:Item_func_like_range(a, b, false) { }
const char *func_name() const { return "like_range_max"; }
};
#endif
class Item_func_binary :public Item_str_func
{
public:

View file

@ -636,7 +636,7 @@ static void pad_max_char(CHARSET_INFO *cs, char *str, char *end)
DBUG_ASSERT(buflen > 0);
do
{
if ((str + buflen) < end)
if ((str + buflen) <= end)
{
/* Enough space for the characer */
memcpy(str, buf, buflen);
@ -802,6 +802,192 @@ fill_max_and_min:
}
/**
Calculate min_str and max_str that ranges a LIKE string.
Generic function, currently used for ucs2, utf16, utf32,
but should be suitable for any other character sets with
cs->min_sort_char and cs->max_sort_char represented in
Unicode code points.
@param cs Character set and collation pointer
@param ptr Pointer to LIKE pattern.
@param ptr_length Length of LIKE pattern.
@param escape Escape character pattern, typically '\'.
@param w_one 'One character' pattern, typically '_'.
@param w_many 'Many characters' pattern, typically '%'.
@param res_length Length of min_str and max_str.
@param[out] min_str Smallest string that ranges LIKE.
@param[out] max_str Largest string that ranges LIKE.
@param[out] min_len Length of min_str
@param[out] max_len Length of max_str
@return Optimization status.
@retval FALSE if LIKE pattern can be optimized
@rerval TRUE if LIKE can't be optimized.
*/
my_bool
my_like_range_generic(CHARSET_INFO *cs,
const char *ptr, size_t ptr_length,
pbool escape, pbool w_one, pbool w_many,
size_t res_length,
char *min_str,char *max_str,
size_t *min_length,size_t *max_length)
{
const char *end= ptr + ptr_length;
const char *min_org= min_str;
const char *max_org= max_str;
char *min_end= min_str + res_length;
char *max_end= max_str + res_length;
size_t charlen= res_length / cs->mbmaxlen;
size_t res_length_diff;
my_bool have_contractions= my_cs_have_contractions(cs);
for ( ; charlen > 0; charlen--)
{
my_wc_t wc, wc2;
int res;
if ((res= cs->cset->mb_wc(cs, &wc, (uchar*) ptr, (uchar*) end)) <= 0)
{
if (res == MY_CS_ILSEQ) /* Bad sequence */
return TRUE; /* min_length and max_length are not important */
break; /* End of the string */
}
ptr+= res;
if (wc == (my_wc_t) escape)
{
if ((res= cs->cset->mb_wc(cs, &wc, (uchar*) ptr, (uchar*) end)) <= 0)
{
if (res == MY_CS_ILSEQ)
return TRUE; /* min_length and max_length are not important */
/*
End of the string: Escape is the last character.
Put escape as a normal character.
We'll will leave the loop on the next iteration.
*/
}
else
ptr+= res;
/* Put escape character to min_str and max_str */
if ((res= cs->cset->wc_mb(cs, wc,
(uchar*) min_str, (uchar*) min_end)) <= 0)
goto pad_set_lengths; /* No space */
min_str+= res;
if ((res= cs->cset->wc_mb(cs, wc,
(uchar*) max_str, (uchar*) max_end)) <= 0)
goto pad_set_lengths; /* No space */
max_str+= res;
continue;
}
else if (wc == (my_wc_t) w_one)
{
if ((res= cs->cset->wc_mb(cs, cs->min_sort_char,
(uchar*) min_str, (uchar*) min_end)) <= 0)
goto pad_set_lengths;
min_str+= res;
if ((res= cs->cset->wc_mb(cs, cs->max_sort_char,
(uchar*) max_str, (uchar*) max_end)) <= 0)
goto pad_set_lengths;
max_str+= res;
continue;
}
else if (wc == (my_wc_t) w_many)
{
/*
Calculate length of keys:
a\min\min... is the smallest possible string
a\max\max... is the biggest possible string
*/
*min_length= ((cs->state & MY_CS_BINSORT) ?
(size_t) (min_str - min_org) :
res_length);
*max_length= res_length;
goto pad_min_max;
}
if (have_contractions &&
my_cs_can_be_contraction_head(cs, wc) &&
(res= cs->cset->mb_wc(cs, &wc2, (uchar*) ptr, (uchar*) end)) > 0)
{
uint16 *weight;
if ((wc2 == (my_wc_t) w_one || wc2 == (my_wc_t) w_many))
{
/* Contraction head followed by a wildcard */
*min_length= *max_length= res_length;
goto pad_min_max;
}
if (my_cs_can_be_contraction_tail(cs, wc2) &&
(weight= my_cs_contraction2_weight(cs, wc, wc2)) && weight[0])
{
/* Contraction found */
if (charlen == 1)
{
/* contraction does not fit to result */
*min_length= *max_length= res_length;
goto pad_min_max;
}
ptr+= res;
charlen--;
/* Put contraction head */
if ((res= cs->cset->wc_mb(cs, wc,
(uchar*) min_str, (uchar*) min_end)) <= 0)
goto pad_set_lengths;
min_str+= res;
if ((res= cs->cset->wc_mb(cs, wc,
(uchar*) max_str, (uchar*) max_end)) <= 0)
goto pad_set_lengths;
max_str+= res;
wc= wc2; /* Prepare to put contraction tail */
}
}
/* Normal character, or contraction tail */
if ((res= cs->cset->wc_mb(cs, wc,
(uchar*) min_str, (uchar*) min_end)) <= 0)
goto pad_set_lengths;
min_str+= res;
if ((res= cs->cset->wc_mb(cs, wc,
(uchar*) max_str, (uchar*) max_end)) <= 0)
goto pad_set_lengths;
max_str+= res;
}
pad_set_lengths:
*min_length= (size_t) (min_str - min_org);
*max_length= (size_t) (max_str - max_org);
pad_min_max:
/*
Fill up max_str and min_str to res_length.
fill() cannot set incomplete characters and
requires that "length" argument is divisible to mbminlen.
Make sure to call fill() with proper "length" argument.
*/
res_length_diff= res_length % cs->mbminlen;
cs->cset->fill(cs, min_str, min_end - min_str - res_length_diff,
cs->min_sort_char);
cs->cset->fill(cs, max_str, max_end - max_str - res_length_diff,
cs->max_sort_char);
/* In case of incomplete characters set the remainder to 0x00's */
if (res_length_diff)
{
/* Example: odd res_length for ucs2 */
memset(min_end - res_length_diff, 0, res_length_diff);
memset(max_end - res_length_diff, 0, res_length_diff);
}
return FALSE;
}
int
my_wildcmp_mb_bin(CHARSET_INFO *cs,
const char *str,const char *str_end,

View file

@ -8127,7 +8127,7 @@ MY_COLLATION_HANDLER my_collation_ucs2_uca_handler =
my_strnncollsp_ucs2_uca,
my_strnxfrm_ucs2_uca,
my_strnxfrmlen_simple,
my_like_range_ucs2,
my_like_range_generic,
my_wildcmp_uca,
NULL,
my_instr_mb,
@ -10134,7 +10134,7 @@ MY_COLLATION_HANDLER my_collation_utf32_uca_handler =
my_strnncollsp_any_uca,
my_strnxfrm_any_uca,
my_strnxfrmlen_simple,
my_like_range_utf32,
my_like_range_generic,
my_wildcmp_uca,
NULL,
my_instr_mb,
@ -10801,7 +10801,7 @@ MY_COLLATION_HANDLER my_collation_utf16_uca_handler =
my_strnncollsp_any_uca,
my_strnxfrm_any_uca,
my_strnxfrmlen_simple,
my_like_range_utf16,
my_like_range_generic,
my_wildcmp_uca,
NULL,
my_instr_mb,

View file

@ -903,7 +903,8 @@ static void
my_fill_mb2(CHARSET_INFO *cs __attribute__((unused)),
char *s, size_t l, int fill)
{
for ( ; l >= 2; s[0]= 0, s[1]= fill, s+= 2, l-= 2);
DBUG_ASSERT(fill <= 0xFFFF);
for ( ; l >= 2; s[0]= (fill >> 8), s[1]= (fill & 0xFF), s+= 2, l-= 2);
}
@ -1563,98 +1564,6 @@ my_hash_sort_utf16_bin(CHARSET_INFO *cs __attribute__((unused)),
}
/**
Calculate min_str and max_str that ranges a LIKE string.
@param ptr Pointer to LIKE pattern.
@param ptr_length Length of LIKE pattern.
@param escape Escape character in LIKE. (Normally '\').
All escape characters should be removed
from min_str and max_str.
@param res_length Length of min_str and max_str.
@param min_str Smallest case sensitive string that ranges LIKE.
Should be space padded to res_length.
@param max_str Largest case sensitive string that ranges LIKE.
Normally padded with the biggest character sort value.
@return Optimization status.
@retval FALSE if LIKE pattern can be optimized
@rerval TRUE if LIKE can't be optimized.
*/
my_bool
my_like_range_utf16(CHARSET_INFO *cs,
const char *ptr, size_t ptr_length,
pbool escape, pbool w_one, pbool w_many,
size_t res_length,
char *min_str,char *max_str,
size_t *min_length,size_t *max_length)
{
const char *end=ptr+ptr_length;
char *min_org=min_str;
char *min_end=min_str+res_length;
size_t charlen= res_length / cs->mbmaxlen;
for ( ; ptr + 1 < end && min_str + 1 < min_end && charlen > 0
; ptr+=2, charlen--)
{
if (ptr[0] == '\0' && ptr[1] == escape && ptr + 1 < end)
{
ptr+=2; /* Skip escape */
*min_str++= *max_str++ = ptr[0];
*min_str++= *max_str++ = ptr[1];
continue;
}
if (ptr[0] == '\0' && ptr[1] == w_one) /* '_' in SQL */
{
*min_str++= (char) (cs->min_sort_char >> 8);
*min_str++= (char) (cs->min_sort_char & 255);
*max_str++= (char) (cs->max_sort_char >> 8);
*max_str++= (char) (cs->max_sort_char & 255);
continue;
}
if (ptr[0] == '\0' && ptr[1] == w_many) /* '%' in SQL */
{
/*
Calculate length of keys:
'a\0\0... is the smallest possible string when we have space expand
a\ff\ff... is the biggest possible string
*/
*min_length= ((cs->state & MY_CS_BINSORT) ? (size_t) (min_str - min_org) :
res_length);
*max_length= res_length;
do {
*min_str++ = 0;
*min_str++ = 0;
*max_str++ = (char) (cs->max_sort_char >> 8);
*max_str++ = (char) (cs->max_sort_char & 255);
} while (min_str + 1 < min_end);
return FALSE;
}
*min_str++= *max_str++ = ptr[0];
*min_str++= *max_str++ = ptr[1];
}
/* Temporary fix for handling w_one at end of string (key compression) */
{
char *tmp;
for (tmp= min_str ; tmp-1 > min_org && tmp[-1] == '\0' && tmp[-2]=='\0';)
{
*--tmp=' ';
*--tmp='\0';
}
}
*min_length= *max_length = (size_t) (min_str - min_org);
while (min_str + 1 < min_end)
{
*min_str++ = *max_str++ = '\0';
*min_str++ = *max_str++ = ' '; /* Because if key compression */
}
return FALSE;
}
static MY_COLLATION_HANDLER my_collation_utf16_general_ci_handler =
{
NULL, /* init */
@ -1662,7 +1571,7 @@ static MY_COLLATION_HANDLER my_collation_utf16_general_ci_handler =
my_strnncollsp_utf16,
my_strnxfrm_unicode,
my_strnxfrmlen_simple,
my_like_range_utf16,
my_like_range_generic,
my_wildcmp_utf16_ci,
my_strcasecmp_mb2_or_mb4,
my_instr_mb,
@ -1678,7 +1587,7 @@ static MY_COLLATION_HANDLER my_collation_utf16_bin_handler =
my_strnncollsp_utf16_bin,
my_strnxfrm_unicode_full_bin,
my_strnxfrmlen_unicode_full_bin,
my_like_range_utf16,
my_like_range_generic,
my_wildcmp_utf16_bin,
my_strcasecmp_mb2_or_mb4,
my_instr_mb,
@ -2551,113 +2460,6 @@ my_strnncollsp_utf32_bin(CHARSET_INFO *cs __attribute__((unused)),
}
/**
Calculate min_str and max_str that ranges a LIKE string.
@param ptr Pointer to LIKE pattern.
@param ptr_length Length of LIKE pattern.
@param escape Escape character in LIKE. (Normally '\').
All escape characters should be removed
from min_str and max_str.
@param res_length Length of min_str and max_str.
@param min_str Smallest case sensitive string that ranges LIKE.
Should be space padded to res_length.
@param max_str Largest case sensitive string that ranges LIKE.
Normally padded with the biggest character sort value.
@return Optimization status.
@retval FALSE if LIKE pattern can be optimized
@rerval TRUE if LIKE can't be optimized.
*/
my_bool
my_like_range_utf32(CHARSET_INFO *cs,
const char *ptr, size_t ptr_length,
pbool escape, pbool w_one, pbool w_many,
size_t res_length,
char *min_str,char *max_str,
size_t *min_length,size_t *max_length)
{
const char *end= ptr + ptr_length;
char *min_org= min_str;
char *min_end= min_str + res_length;
char *max_end= max_str + res_length;
size_t charlen= res_length / cs->mbmaxlen;
DBUG_ASSERT((res_length % 4) == 0);
for ( ; charlen > 0; ptr+= 4, charlen--)
{
my_wc_t wc;
int res;
if ((res= my_utf32_uni(cs, &wc, (uchar*) ptr, (uchar*) end)) < 0)
{
my_fill_utf32(cs, min_str, min_end - min_str, cs->min_sort_char);
my_fill_utf32(cs, max_str, min_end - min_str, cs->max_sort_char);
/* min_length and max_legnth are not important */
return TRUE;
}
if (wc == (my_wc_t) escape)
{
ptr+= 4; /* Skip escape */
if ((res= my_utf32_uni(cs, &wc, (uchar*) ptr, (uchar*) end)) < 0)
{
my_fill_utf32(cs, min_str, min_end - min_str, cs->min_sort_char);
my_fill_utf32(cs, max_str, max_end - min_str, cs->max_sort_char);
/* min_length and max_length are not important */
return TRUE;
}
if (my_uni_utf32(cs, wc, (uchar*) min_str, (uchar*) min_end) != 4 ||
my_uni_utf32(cs, wc, (uchar*) max_str, (uchar*) max_end) != 4)
goto pad_set_lengths;
*min_str++= 4;
*max_str++= 4;
continue;
}
if (wc == (my_wc_t) w_one)
{
if (my_uni_utf32(cs, cs->min_sort_char, (uchar*) min_str, (uchar*) min_end) != 4 ||
my_uni_utf32(cs, cs->max_sort_char, (uchar*) max_str, (uchar*) max_end) != 4)
goto pad_set_lengths;
min_str+= 4;
max_str+= 4;
continue;
}
if (wc == (my_wc_t) w_many)
{
/*
Calculate length of keys:
'a\0\0... is the smallest possible string when we have space expand
a\ff\ff... is the biggest possible string
*/
*min_length= ((cs->state & MY_CS_BINSORT) ?
(size_t) (min_str - min_org) :
res_length);
*max_length= res_length;
goto pad_min_max;
}
/* Normal character */
if (my_uni_utf32(cs, wc, (uchar*) min_str, (uchar*) min_end) != 4 ||
my_uni_utf32(cs, wc, (uchar*) max_str, (uchar*) max_end) != 4)
goto pad_set_lengths;
min_str+= 4;
max_str+= 4;
}
pad_set_lengths:
*min_length= *max_length= (size_t) (min_str - min_org);
pad_min_max:
my_fill_utf32(cs, min_str, min_end - min_str, cs->min_sort_char);
my_fill_utf32(cs, max_str, max_end - max_str, cs->max_sort_char);
return FALSE;
}
static size_t
my_scan_utf32(CHARSET_INFO *cs,
const char *str, const char *end, int sequence_type)
@ -2689,7 +2491,7 @@ static MY_COLLATION_HANDLER my_collation_utf32_general_ci_handler =
my_strnncollsp_utf32,
my_strnxfrm_unicode,
my_strnxfrmlen_utf32,
my_like_range_utf32,
my_like_range_generic,
my_wildcmp_utf32_ci,
my_strcasecmp_mb2_or_mb4,
my_instr_mb,
@ -2705,7 +2507,7 @@ static MY_COLLATION_HANDLER my_collation_utf32_bin_handler =
my_strnncollsp_utf32_bin,
my_strnxfrm_unicode_full_bin,
my_strnxfrmlen_unicode_full_bin,
my_like_range_utf32,
my_like_range_generic,
my_wildcmp_utf32_bin,
my_strcasecmp_mb2_or_mb4,
my_instr_mb,
@ -3252,120 +3054,6 @@ void my_hash_sort_ucs2_bin(CHARSET_INFO *cs __attribute__((unused)),
}
}
/*
** Calculate min_str and max_str that ranges a LIKE string.
** Arguments:
** ptr Pointer to LIKE string.
** ptr_length Length of LIKE string.
** escape Escape character in LIKE. (Normally '\').
** All escape characters should be removed from min_str and max_str
** res_length Length of min_str and max_str.
** min_str Smallest case sensitive string that ranges LIKE.
** Should be space padded to res_length.
** max_str Largest case sensitive string that ranges LIKE.
** Normally padded with the biggest character sort value.
**
** The function should return 0 if ok and 1 if the LIKE string can't be
** optimized !
*/
my_bool my_like_range_ucs2(CHARSET_INFO *cs,
const char *ptr, size_t ptr_length,
pbool escape, pbool w_one, pbool w_many,
size_t res_length,
char *min_str,char *max_str,
size_t *min_length,size_t *max_length)
{
const char *end=ptr+ptr_length;
char *min_org=min_str;
char *min_end=min_str+res_length;
size_t charlen= res_length / cs->mbmaxlen;
const char *contraction_flags= cs->contractions ?
((const char*) cs->contractions) + 0x40*0x40 : NULL;
for ( ; ptr + 1 < end && min_str + 1 < min_end && charlen > 0
; ptr+=2, charlen--)
{
if (ptr[0] == '\0' && ptr[1] == escape && ptr + 1 < end)
{
ptr+=2; /* Skip escape */
*min_str++= *max_str++ = ptr[0];
*min_str++= *max_str++ = ptr[1];
continue;
}
if (ptr[0] == '\0' && ptr[1] == w_one) /* '_' in SQL */
{
*min_str++= (char) (cs->min_sort_char >> 8);
*min_str++= (char) (cs->min_sort_char & 255);
*max_str++= (char) (cs->max_sort_char >> 8);
*max_str++= (char) (cs->max_sort_char & 255);
continue;
}
if (ptr[0] == '\0' && ptr[1] == w_many) /* '%' in SQL */
{
fill_max_and_min:
/*
Calculate length of keys:
'a\0\0... is the smallest possible string when we have space expand
a\ff\ff... is the biggest possible string
*/
*min_length= ((cs->state & MY_CS_BINSORT) ? (size_t) (min_str - min_org) :
res_length);
*max_length= res_length;
do {
*min_str++ = 0;
*min_str++ = 0;
*max_str++ = (char) (cs->max_sort_char >> 8);
*max_str++ = (char) (cs->max_sort_char & 255);
} while (min_str + 1 < min_end);
return 0;
}
if (contraction_flags && ptr + 3 < end &&
ptr[0] == '\0' && contraction_flags[(uchar) ptr[1]])
{
/* Contraction head found */
if (ptr[2] == '\0' && (ptr[3] == w_one || ptr[3] == w_many))
{
/* Contraction head followed by a wildcard, quit */
goto fill_max_and_min;
}
/*
Check if the second letter can be contraction part,
and if two letters really produce a contraction.
*/
if (ptr[2] == '\0' && contraction_flags[(uchar) ptr[3]] &&
cs->contractions[(ptr[1]-0x40)*0x40 + ptr[3] - 0x40])
{
/* Contraction found */
if (charlen == 1 || min_str + 2 >= min_end)
{
/* Full contraction doesn't fit, quit */
goto fill_max_and_min;
}
/* Put contraction head */
*min_str++= *max_str++= *ptr++;
*min_str++= *max_str++= *ptr++;
charlen--;
}
}
/* Put contraction tail, or a single character */
*min_str++= *max_str++ = ptr[0];
*min_str++= *max_str++ = ptr[1];
}
*min_length= *max_length = (size_t) (min_str - min_org);
while (min_str + 1 < min_end)
{
*min_str++ = *max_str++ = '\0';
*min_str++ = *max_str++ = ' '; /* Because if key compression */
}
return 0;
}
static MY_COLLATION_HANDLER my_collation_ucs2_general_ci_handler =
{
@ -3374,7 +3062,7 @@ static MY_COLLATION_HANDLER my_collation_ucs2_general_ci_handler =
my_strnncollsp_ucs2,
my_strnxfrm_unicode,
my_strnxfrmlen_simple,
my_like_range_ucs2,
my_like_range_generic,
my_wildcmp_ucs2_ci,
my_strcasecmp_mb2_or_mb4,
my_instr_mb,
@ -3390,7 +3078,7 @@ static MY_COLLATION_HANDLER my_collation_ucs2_bin_handler =
my_strnncollsp_ucs2_bin,
my_strnxfrm_unicode,
my_strnxfrmlen_simple,
my_like_range_ucs2,
my_like_range_generic,
my_wildcmp_ucs2_bin,
my_strcasecmp_mb2_or_mb4,
my_instr_mb,