mirror of
https://github.com/MariaDB/server.git
synced 2026-04-29 11:45:32 +02:00
Bug#57737 Character sets: search fails with like, contraction, index
Problem: LIKE over an indexed column optimized away good results,
because my_like_range_utf32/utf16 returned wrong ranges for contractions.
Contraction related code was missing in my_like_range_utf32/utf16,
but did exist in my_like_range_ucs2/utf8.
It was forgotten in utf32/utf16 versions (during mysql-6.0 push/revert mess).
Fix:
The patch removes individual functions my_like_range_ucs2,
my_like_range_utf16, my_like_range_utf32 and introduces a single function
my_like_range_generic() instead. The new function handles contractions
correctly. It can handle any character set with cs->min_sort_char and
cs->max_sort_char represented in Unicode code points.
added:
@ mysql-test/include/ctype_czech.inc
@ mysql-test/include/ctype_like_ignorable.inc
@ mysql-test/r/ctype_like_range.result
@ mysql-test/t/ctype_like_range.test
Adding tests
modified:
@ include/m_ctype.h
- Adding helper functions for contractions.
- Prototypes: removing ucs2,utf16,utf32 functions, adding generic function.
@ mysql-test/r/ctype_uca.result
@ mysql-test/r/ctype_utf16_uca.result
@ mysql-test/r/ctype_utf32_uca.result
@ mysql-test/t/ctype_uca.test
@ mysql-test/t/ctype_utf16_uca.test
@ mysql-test/t/ctype_utf32_uca.test
- Adding tests.
@ strings/ctype-mb.c
- Pad function did not put the last character.
- Implementing my_like_range_generic() - an universal replacement
for three separate functions
my_like_range_ucs2(), my_like_range_utf16() and my_like_range_utf32(),
with correct contraction handling.
@ strings/ctype-ucs2.c
- my_fill_mb2 did not put the high byte, as previously
it was used to put only characters in ASCII range.
Now it puts high byte as well
(needed to pupulate cs->max_sort_char correctly).
- Adding DBUG_ASSERT()
- Removing character set specific functions:
my_like_range_ucs2(), my_like_range_utf16() and my_like_range_utf32().
- Using my_like_range_generic() instead of the old functions.
@ strings/ctype-uca.c
- Using generic function instead of the old character set specific ones.
@ sql/item_create.cc
@ sql/item_strfunc.cc
@ sql/item_strfunc.h
- Adding SQL functions LIKE_RANGE_MIN and LIKE_RANGE_MAX,
available only in debug build to make sure like_range()
works correctly for all character sets and collations.
This commit is contained in:
parent
ce441751ed
commit
e3dee8a7fd
17 changed files with 3001 additions and 344 deletions
|
|
@ -636,7 +636,7 @@ static void pad_max_char(CHARSET_INFO *cs, char *str, char *end)
|
|||
DBUG_ASSERT(buflen > 0);
|
||||
do
|
||||
{
|
||||
if ((str + buflen) < end)
|
||||
if ((str + buflen) <= end)
|
||||
{
|
||||
/* Enough space for the characer */
|
||||
memcpy(str, buf, buflen);
|
||||
|
|
@ -802,6 +802,192 @@ fill_max_and_min:
|
|||
}
|
||||
|
||||
|
||||
/**
|
||||
Calculate min_str and max_str that ranges a LIKE string.
|
||||
Generic function, currently used for ucs2, utf16, utf32,
|
||||
but should be suitable for any other character sets with
|
||||
cs->min_sort_char and cs->max_sort_char represented in
|
||||
Unicode code points.
|
||||
|
||||
@param cs Character set and collation pointer
|
||||
@param ptr Pointer to LIKE pattern.
|
||||
@param ptr_length Length of LIKE pattern.
|
||||
@param escape Escape character pattern, typically '\'.
|
||||
@param w_one 'One character' pattern, typically '_'.
|
||||
@param w_many 'Many characters' pattern, typically '%'.
|
||||
@param res_length Length of min_str and max_str.
|
||||
|
||||
@param[out] min_str Smallest string that ranges LIKE.
|
||||
@param[out] max_str Largest string that ranges LIKE.
|
||||
@param[out] min_len Length of min_str
|
||||
@param[out] max_len Length of max_str
|
||||
|
||||
@return Optimization status.
|
||||
@retval FALSE if LIKE pattern can be optimized
|
||||
@rerval TRUE if LIKE can't be optimized.
|
||||
*/
|
||||
my_bool
|
||||
my_like_range_generic(CHARSET_INFO *cs,
|
||||
const char *ptr, size_t ptr_length,
|
||||
pbool escape, pbool w_one, pbool w_many,
|
||||
size_t res_length,
|
||||
char *min_str,char *max_str,
|
||||
size_t *min_length,size_t *max_length)
|
||||
{
|
||||
const char *end= ptr + ptr_length;
|
||||
const char *min_org= min_str;
|
||||
const char *max_org= max_str;
|
||||
char *min_end= min_str + res_length;
|
||||
char *max_end= max_str + res_length;
|
||||
size_t charlen= res_length / cs->mbmaxlen;
|
||||
size_t res_length_diff;
|
||||
my_bool have_contractions= my_cs_have_contractions(cs);
|
||||
|
||||
for ( ; charlen > 0; charlen--)
|
||||
{
|
||||
my_wc_t wc, wc2;
|
||||
int res;
|
||||
if ((res= cs->cset->mb_wc(cs, &wc, (uchar*) ptr, (uchar*) end)) <= 0)
|
||||
{
|
||||
if (res == MY_CS_ILSEQ) /* Bad sequence */
|
||||
return TRUE; /* min_length and max_length are not important */
|
||||
break; /* End of the string */
|
||||
}
|
||||
ptr+= res;
|
||||
|
||||
if (wc == (my_wc_t) escape)
|
||||
{
|
||||
if ((res= cs->cset->mb_wc(cs, &wc, (uchar*) ptr, (uchar*) end)) <= 0)
|
||||
{
|
||||
if (res == MY_CS_ILSEQ)
|
||||
return TRUE; /* min_length and max_length are not important */
|
||||
/*
|
||||
End of the string: Escape is the last character.
|
||||
Put escape as a normal character.
|
||||
We'll will leave the loop on the next iteration.
|
||||
*/
|
||||
}
|
||||
else
|
||||
ptr+= res;
|
||||
|
||||
/* Put escape character to min_str and max_str */
|
||||
if ((res= cs->cset->wc_mb(cs, wc,
|
||||
(uchar*) min_str, (uchar*) min_end)) <= 0)
|
||||
goto pad_set_lengths; /* No space */
|
||||
min_str+= res;
|
||||
|
||||
if ((res= cs->cset->wc_mb(cs, wc,
|
||||
(uchar*) max_str, (uchar*) max_end)) <= 0)
|
||||
goto pad_set_lengths; /* No space */
|
||||
max_str+= res;
|
||||
continue;
|
||||
}
|
||||
else if (wc == (my_wc_t) w_one)
|
||||
{
|
||||
if ((res= cs->cset->wc_mb(cs, cs->min_sort_char,
|
||||
(uchar*) min_str, (uchar*) min_end)) <= 0)
|
||||
goto pad_set_lengths;
|
||||
min_str+= res;
|
||||
|
||||
if ((res= cs->cset->wc_mb(cs, cs->max_sort_char,
|
||||
(uchar*) max_str, (uchar*) max_end)) <= 0)
|
||||
goto pad_set_lengths;
|
||||
max_str+= res;
|
||||
continue;
|
||||
}
|
||||
else if (wc == (my_wc_t) w_many)
|
||||
{
|
||||
/*
|
||||
Calculate length of keys:
|
||||
a\min\min... is the smallest possible string
|
||||
a\max\max... is the biggest possible string
|
||||
*/
|
||||
*min_length= ((cs->state & MY_CS_BINSORT) ?
|
||||
(size_t) (min_str - min_org) :
|
||||
res_length);
|
||||
*max_length= res_length;
|
||||
goto pad_min_max;
|
||||
}
|
||||
|
||||
if (have_contractions &&
|
||||
my_cs_can_be_contraction_head(cs, wc) &&
|
||||
(res= cs->cset->mb_wc(cs, &wc2, (uchar*) ptr, (uchar*) end)) > 0)
|
||||
{
|
||||
uint16 *weight;
|
||||
if ((wc2 == (my_wc_t) w_one || wc2 == (my_wc_t) w_many))
|
||||
{
|
||||
/* Contraction head followed by a wildcard */
|
||||
*min_length= *max_length= res_length;
|
||||
goto pad_min_max;
|
||||
}
|
||||
|
||||
if (my_cs_can_be_contraction_tail(cs, wc2) &&
|
||||
(weight= my_cs_contraction2_weight(cs, wc, wc2)) && weight[0])
|
||||
{
|
||||
/* Contraction found */
|
||||
if (charlen == 1)
|
||||
{
|
||||
/* contraction does not fit to result */
|
||||
*min_length= *max_length= res_length;
|
||||
goto pad_min_max;
|
||||
}
|
||||
|
||||
ptr+= res;
|
||||
charlen--;
|
||||
|
||||
/* Put contraction head */
|
||||
if ((res= cs->cset->wc_mb(cs, wc,
|
||||
(uchar*) min_str, (uchar*) min_end)) <= 0)
|
||||
goto pad_set_lengths;
|
||||
min_str+= res;
|
||||
|
||||
if ((res= cs->cset->wc_mb(cs, wc,
|
||||
(uchar*) max_str, (uchar*) max_end)) <= 0)
|
||||
goto pad_set_lengths;
|
||||
max_str+= res;
|
||||
wc= wc2; /* Prepare to put contraction tail */
|
||||
}
|
||||
}
|
||||
|
||||
/* Normal character, or contraction tail */
|
||||
if ((res= cs->cset->wc_mb(cs, wc,
|
||||
(uchar*) min_str, (uchar*) min_end)) <= 0)
|
||||
goto pad_set_lengths;
|
||||
min_str+= res;
|
||||
if ((res= cs->cset->wc_mb(cs, wc,
|
||||
(uchar*) max_str, (uchar*) max_end)) <= 0)
|
||||
goto pad_set_lengths;
|
||||
max_str+= res;
|
||||
}
|
||||
|
||||
pad_set_lengths:
|
||||
*min_length= (size_t) (min_str - min_org);
|
||||
*max_length= (size_t) (max_str - max_org);
|
||||
|
||||
pad_min_max:
|
||||
/*
|
||||
Fill up max_str and min_str to res_length.
|
||||
fill() cannot set incomplete characters and
|
||||
requires that "length" argument is divisible to mbminlen.
|
||||
Make sure to call fill() with proper "length" argument.
|
||||
*/
|
||||
res_length_diff= res_length % cs->mbminlen;
|
||||
cs->cset->fill(cs, min_str, min_end - min_str - res_length_diff,
|
||||
cs->min_sort_char);
|
||||
cs->cset->fill(cs, max_str, max_end - max_str - res_length_diff,
|
||||
cs->max_sort_char);
|
||||
|
||||
/* In case of incomplete characters set the remainder to 0x00's */
|
||||
if (res_length_diff)
|
||||
{
|
||||
/* Example: odd res_length for ucs2 */
|
||||
memset(min_end - res_length_diff, 0, res_length_diff);
|
||||
memset(max_end - res_length_diff, 0, res_length_diff);
|
||||
}
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
my_wildcmp_mb_bin(CHARSET_INFO *cs,
|
||||
const char *str,const char *str_end,
|
||||
|
|
|
|||
|
|
@ -8127,7 +8127,7 @@ MY_COLLATION_HANDLER my_collation_ucs2_uca_handler =
|
|||
my_strnncollsp_ucs2_uca,
|
||||
my_strnxfrm_ucs2_uca,
|
||||
my_strnxfrmlen_simple,
|
||||
my_like_range_ucs2,
|
||||
my_like_range_generic,
|
||||
my_wildcmp_uca,
|
||||
NULL,
|
||||
my_instr_mb,
|
||||
|
|
@ -10134,7 +10134,7 @@ MY_COLLATION_HANDLER my_collation_utf32_uca_handler =
|
|||
my_strnncollsp_any_uca,
|
||||
my_strnxfrm_any_uca,
|
||||
my_strnxfrmlen_simple,
|
||||
my_like_range_utf32,
|
||||
my_like_range_generic,
|
||||
my_wildcmp_uca,
|
||||
NULL,
|
||||
my_instr_mb,
|
||||
|
|
@ -10801,7 +10801,7 @@ MY_COLLATION_HANDLER my_collation_utf16_uca_handler =
|
|||
my_strnncollsp_any_uca,
|
||||
my_strnxfrm_any_uca,
|
||||
my_strnxfrmlen_simple,
|
||||
my_like_range_utf16,
|
||||
my_like_range_generic,
|
||||
my_wildcmp_uca,
|
||||
NULL,
|
||||
my_instr_mb,
|
||||
|
|
|
|||
|
|
@ -903,7 +903,8 @@ static void
|
|||
my_fill_mb2(CHARSET_INFO *cs __attribute__((unused)),
|
||||
char *s, size_t l, int fill)
|
||||
{
|
||||
for ( ; l >= 2; s[0]= 0, s[1]= fill, s+= 2, l-= 2);
|
||||
DBUG_ASSERT(fill <= 0xFFFF);
|
||||
for ( ; l >= 2; s[0]= (fill >> 8), s[1]= (fill & 0xFF), s+= 2, l-= 2);
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -1563,98 +1564,6 @@ my_hash_sort_utf16_bin(CHARSET_INFO *cs __attribute__((unused)),
|
|||
}
|
||||
|
||||
|
||||
/**
|
||||
Calculate min_str and max_str that ranges a LIKE string.
|
||||
|
||||
@param ptr Pointer to LIKE pattern.
|
||||
@param ptr_length Length of LIKE pattern.
|
||||
@param escape Escape character in LIKE. (Normally '\').
|
||||
All escape characters should be removed
|
||||
from min_str and max_str.
|
||||
@param res_length Length of min_str and max_str.
|
||||
@param min_str Smallest case sensitive string that ranges LIKE.
|
||||
Should be space padded to res_length.
|
||||
@param max_str Largest case sensitive string that ranges LIKE.
|
||||
Normally padded with the biggest character sort value.
|
||||
|
||||
@return Optimization status.
|
||||
@retval FALSE if LIKE pattern can be optimized
|
||||
@rerval TRUE if LIKE can't be optimized.
|
||||
*/
|
||||
|
||||
my_bool
|
||||
my_like_range_utf16(CHARSET_INFO *cs,
|
||||
const char *ptr, size_t ptr_length,
|
||||
pbool escape, pbool w_one, pbool w_many,
|
||||
size_t res_length,
|
||||
char *min_str,char *max_str,
|
||||
size_t *min_length,size_t *max_length)
|
||||
{
|
||||
const char *end=ptr+ptr_length;
|
||||
char *min_org=min_str;
|
||||
char *min_end=min_str+res_length;
|
||||
size_t charlen= res_length / cs->mbmaxlen;
|
||||
|
||||
for ( ; ptr + 1 < end && min_str + 1 < min_end && charlen > 0
|
||||
; ptr+=2, charlen--)
|
||||
{
|
||||
if (ptr[0] == '\0' && ptr[1] == escape && ptr + 1 < end)
|
||||
{
|
||||
ptr+=2; /* Skip escape */
|
||||
*min_str++= *max_str++ = ptr[0];
|
||||
*min_str++= *max_str++ = ptr[1];
|
||||
continue;
|
||||
}
|
||||
if (ptr[0] == '\0' && ptr[1] == w_one) /* '_' in SQL */
|
||||
{
|
||||
*min_str++= (char) (cs->min_sort_char >> 8);
|
||||
*min_str++= (char) (cs->min_sort_char & 255);
|
||||
*max_str++= (char) (cs->max_sort_char >> 8);
|
||||
*max_str++= (char) (cs->max_sort_char & 255);
|
||||
continue;
|
||||
}
|
||||
if (ptr[0] == '\0' && ptr[1] == w_many) /* '%' in SQL */
|
||||
{
|
||||
/*
|
||||
Calculate length of keys:
|
||||
'a\0\0... is the smallest possible string when we have space expand
|
||||
a\ff\ff... is the biggest possible string
|
||||
*/
|
||||
*min_length= ((cs->state & MY_CS_BINSORT) ? (size_t) (min_str - min_org) :
|
||||
res_length);
|
||||
*max_length= res_length;
|
||||
do {
|
||||
*min_str++ = 0;
|
||||
*min_str++ = 0;
|
||||
*max_str++ = (char) (cs->max_sort_char >> 8);
|
||||
*max_str++ = (char) (cs->max_sort_char & 255);
|
||||
} while (min_str + 1 < min_end);
|
||||
return FALSE;
|
||||
}
|
||||
*min_str++= *max_str++ = ptr[0];
|
||||
*min_str++= *max_str++ = ptr[1];
|
||||
}
|
||||
|
||||
/* Temporary fix for handling w_one at end of string (key compression) */
|
||||
{
|
||||
char *tmp;
|
||||
for (tmp= min_str ; tmp-1 > min_org && tmp[-1] == '\0' && tmp[-2]=='\0';)
|
||||
{
|
||||
*--tmp=' ';
|
||||
*--tmp='\0';
|
||||
}
|
||||
}
|
||||
|
||||
*min_length= *max_length = (size_t) (min_str - min_org);
|
||||
while (min_str + 1 < min_end)
|
||||
{
|
||||
*min_str++ = *max_str++ = '\0';
|
||||
*min_str++ = *max_str++ = ' '; /* Because if key compression */
|
||||
}
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
|
||||
static MY_COLLATION_HANDLER my_collation_utf16_general_ci_handler =
|
||||
{
|
||||
NULL, /* init */
|
||||
|
|
@ -1662,7 +1571,7 @@ static MY_COLLATION_HANDLER my_collation_utf16_general_ci_handler =
|
|||
my_strnncollsp_utf16,
|
||||
my_strnxfrm_unicode,
|
||||
my_strnxfrmlen_simple,
|
||||
my_like_range_utf16,
|
||||
my_like_range_generic,
|
||||
my_wildcmp_utf16_ci,
|
||||
my_strcasecmp_mb2_or_mb4,
|
||||
my_instr_mb,
|
||||
|
|
@ -1678,7 +1587,7 @@ static MY_COLLATION_HANDLER my_collation_utf16_bin_handler =
|
|||
my_strnncollsp_utf16_bin,
|
||||
my_strnxfrm_unicode_full_bin,
|
||||
my_strnxfrmlen_unicode_full_bin,
|
||||
my_like_range_utf16,
|
||||
my_like_range_generic,
|
||||
my_wildcmp_utf16_bin,
|
||||
my_strcasecmp_mb2_or_mb4,
|
||||
my_instr_mb,
|
||||
|
|
@ -2551,113 +2460,6 @@ my_strnncollsp_utf32_bin(CHARSET_INFO *cs __attribute__((unused)),
|
|||
}
|
||||
|
||||
|
||||
/**
|
||||
Calculate min_str and max_str that ranges a LIKE string.
|
||||
|
||||
@param ptr Pointer to LIKE pattern.
|
||||
@param ptr_length Length of LIKE pattern.
|
||||
@param escape Escape character in LIKE. (Normally '\').
|
||||
All escape characters should be removed
|
||||
from min_str and max_str.
|
||||
@param res_length Length of min_str and max_str.
|
||||
@param min_str Smallest case sensitive string that ranges LIKE.
|
||||
Should be space padded to res_length.
|
||||
@param max_str Largest case sensitive string that ranges LIKE.
|
||||
Normally padded with the biggest character sort value.
|
||||
|
||||
@return Optimization status.
|
||||
@retval FALSE if LIKE pattern can be optimized
|
||||
@rerval TRUE if LIKE can't be optimized.
|
||||
*/
|
||||
|
||||
my_bool
|
||||
my_like_range_utf32(CHARSET_INFO *cs,
|
||||
const char *ptr, size_t ptr_length,
|
||||
pbool escape, pbool w_one, pbool w_many,
|
||||
size_t res_length,
|
||||
char *min_str,char *max_str,
|
||||
size_t *min_length,size_t *max_length)
|
||||
{
|
||||
const char *end= ptr + ptr_length;
|
||||
char *min_org= min_str;
|
||||
char *min_end= min_str + res_length;
|
||||
char *max_end= max_str + res_length;
|
||||
size_t charlen= res_length / cs->mbmaxlen;
|
||||
|
||||
DBUG_ASSERT((res_length % 4) == 0);
|
||||
|
||||
for ( ; charlen > 0; ptr+= 4, charlen--)
|
||||
{
|
||||
my_wc_t wc;
|
||||
int res;
|
||||
if ((res= my_utf32_uni(cs, &wc, (uchar*) ptr, (uchar*) end)) < 0)
|
||||
{
|
||||
my_fill_utf32(cs, min_str, min_end - min_str, cs->min_sort_char);
|
||||
my_fill_utf32(cs, max_str, min_end - min_str, cs->max_sort_char);
|
||||
/* min_length and max_legnth are not important */
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
if (wc == (my_wc_t) escape)
|
||||
{
|
||||
ptr+= 4; /* Skip escape */
|
||||
if ((res= my_utf32_uni(cs, &wc, (uchar*) ptr, (uchar*) end)) < 0)
|
||||
{
|
||||
my_fill_utf32(cs, min_str, min_end - min_str, cs->min_sort_char);
|
||||
my_fill_utf32(cs, max_str, max_end - min_str, cs->max_sort_char);
|
||||
/* min_length and max_length are not important */
|
||||
return TRUE;
|
||||
}
|
||||
if (my_uni_utf32(cs, wc, (uchar*) min_str, (uchar*) min_end) != 4 ||
|
||||
my_uni_utf32(cs, wc, (uchar*) max_str, (uchar*) max_end) != 4)
|
||||
goto pad_set_lengths;
|
||||
*min_str++= 4;
|
||||
*max_str++= 4;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (wc == (my_wc_t) w_one)
|
||||
{
|
||||
if (my_uni_utf32(cs, cs->min_sort_char, (uchar*) min_str, (uchar*) min_end) != 4 ||
|
||||
my_uni_utf32(cs, cs->max_sort_char, (uchar*) max_str, (uchar*) max_end) != 4)
|
||||
goto pad_set_lengths;
|
||||
min_str+= 4;
|
||||
max_str+= 4;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (wc == (my_wc_t) w_many)
|
||||
{
|
||||
/*
|
||||
Calculate length of keys:
|
||||
'a\0\0... is the smallest possible string when we have space expand
|
||||
a\ff\ff... is the biggest possible string
|
||||
*/
|
||||
*min_length= ((cs->state & MY_CS_BINSORT) ?
|
||||
(size_t) (min_str - min_org) :
|
||||
res_length);
|
||||
*max_length= res_length;
|
||||
goto pad_min_max;
|
||||
}
|
||||
|
||||
/* Normal character */
|
||||
if (my_uni_utf32(cs, wc, (uchar*) min_str, (uchar*) min_end) != 4 ||
|
||||
my_uni_utf32(cs, wc, (uchar*) max_str, (uchar*) max_end) != 4)
|
||||
goto pad_set_lengths;
|
||||
min_str+= 4;
|
||||
max_str+= 4;
|
||||
}
|
||||
|
||||
pad_set_lengths:
|
||||
*min_length= *max_length= (size_t) (min_str - min_org);
|
||||
|
||||
pad_min_max:
|
||||
my_fill_utf32(cs, min_str, min_end - min_str, cs->min_sort_char);
|
||||
my_fill_utf32(cs, max_str, max_end - max_str, cs->max_sort_char);
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
|
||||
static size_t
|
||||
my_scan_utf32(CHARSET_INFO *cs,
|
||||
const char *str, const char *end, int sequence_type)
|
||||
|
|
@ -2689,7 +2491,7 @@ static MY_COLLATION_HANDLER my_collation_utf32_general_ci_handler =
|
|||
my_strnncollsp_utf32,
|
||||
my_strnxfrm_unicode,
|
||||
my_strnxfrmlen_utf32,
|
||||
my_like_range_utf32,
|
||||
my_like_range_generic,
|
||||
my_wildcmp_utf32_ci,
|
||||
my_strcasecmp_mb2_or_mb4,
|
||||
my_instr_mb,
|
||||
|
|
@ -2705,7 +2507,7 @@ static MY_COLLATION_HANDLER my_collation_utf32_bin_handler =
|
|||
my_strnncollsp_utf32_bin,
|
||||
my_strnxfrm_unicode_full_bin,
|
||||
my_strnxfrmlen_unicode_full_bin,
|
||||
my_like_range_utf32,
|
||||
my_like_range_generic,
|
||||
my_wildcmp_utf32_bin,
|
||||
my_strcasecmp_mb2_or_mb4,
|
||||
my_instr_mb,
|
||||
|
|
@ -3252,120 +3054,6 @@ void my_hash_sort_ucs2_bin(CHARSET_INFO *cs __attribute__((unused)),
|
|||
}
|
||||
}
|
||||
|
||||
/*
|
||||
** Calculate min_str and max_str that ranges a LIKE string.
|
||||
** Arguments:
|
||||
** ptr Pointer to LIKE string.
|
||||
** ptr_length Length of LIKE string.
|
||||
** escape Escape character in LIKE. (Normally '\').
|
||||
** All escape characters should be removed from min_str and max_str
|
||||
** res_length Length of min_str and max_str.
|
||||
** min_str Smallest case sensitive string that ranges LIKE.
|
||||
** Should be space padded to res_length.
|
||||
** max_str Largest case sensitive string that ranges LIKE.
|
||||
** Normally padded with the biggest character sort value.
|
||||
**
|
||||
** The function should return 0 if ok and 1 if the LIKE string can't be
|
||||
** optimized !
|
||||
*/
|
||||
|
||||
my_bool my_like_range_ucs2(CHARSET_INFO *cs,
|
||||
const char *ptr, size_t ptr_length,
|
||||
pbool escape, pbool w_one, pbool w_many,
|
||||
size_t res_length,
|
||||
char *min_str,char *max_str,
|
||||
size_t *min_length,size_t *max_length)
|
||||
{
|
||||
const char *end=ptr+ptr_length;
|
||||
char *min_org=min_str;
|
||||
char *min_end=min_str+res_length;
|
||||
size_t charlen= res_length / cs->mbmaxlen;
|
||||
const char *contraction_flags= cs->contractions ?
|
||||
((const char*) cs->contractions) + 0x40*0x40 : NULL;
|
||||
|
||||
for ( ; ptr + 1 < end && min_str + 1 < min_end && charlen > 0
|
||||
; ptr+=2, charlen--)
|
||||
{
|
||||
if (ptr[0] == '\0' && ptr[1] == escape && ptr + 1 < end)
|
||||
{
|
||||
ptr+=2; /* Skip escape */
|
||||
*min_str++= *max_str++ = ptr[0];
|
||||
*min_str++= *max_str++ = ptr[1];
|
||||
continue;
|
||||
}
|
||||
if (ptr[0] == '\0' && ptr[1] == w_one) /* '_' in SQL */
|
||||
{
|
||||
*min_str++= (char) (cs->min_sort_char >> 8);
|
||||
*min_str++= (char) (cs->min_sort_char & 255);
|
||||
*max_str++= (char) (cs->max_sort_char >> 8);
|
||||
*max_str++= (char) (cs->max_sort_char & 255);
|
||||
continue;
|
||||
}
|
||||
if (ptr[0] == '\0' && ptr[1] == w_many) /* '%' in SQL */
|
||||
{
|
||||
fill_max_and_min:
|
||||
/*
|
||||
Calculate length of keys:
|
||||
'a\0\0... is the smallest possible string when we have space expand
|
||||
a\ff\ff... is the biggest possible string
|
||||
*/
|
||||
*min_length= ((cs->state & MY_CS_BINSORT) ? (size_t) (min_str - min_org) :
|
||||
res_length);
|
||||
*max_length= res_length;
|
||||
do {
|
||||
*min_str++ = 0;
|
||||
*min_str++ = 0;
|
||||
*max_str++ = (char) (cs->max_sort_char >> 8);
|
||||
*max_str++ = (char) (cs->max_sort_char & 255);
|
||||
} while (min_str + 1 < min_end);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (contraction_flags && ptr + 3 < end &&
|
||||
ptr[0] == '\0' && contraction_flags[(uchar) ptr[1]])
|
||||
{
|
||||
/* Contraction head found */
|
||||
if (ptr[2] == '\0' && (ptr[3] == w_one || ptr[3] == w_many))
|
||||
{
|
||||
/* Contraction head followed by a wildcard, quit */
|
||||
goto fill_max_and_min;
|
||||
}
|
||||
|
||||
/*
|
||||
Check if the second letter can be contraction part,
|
||||
and if two letters really produce a contraction.
|
||||
*/
|
||||
if (ptr[2] == '\0' && contraction_flags[(uchar) ptr[3]] &&
|
||||
cs->contractions[(ptr[1]-0x40)*0x40 + ptr[3] - 0x40])
|
||||
{
|
||||
/* Contraction found */
|
||||
if (charlen == 1 || min_str + 2 >= min_end)
|
||||
{
|
||||
/* Full contraction doesn't fit, quit */
|
||||
goto fill_max_and_min;
|
||||
}
|
||||
|
||||
/* Put contraction head */
|
||||
*min_str++= *max_str++= *ptr++;
|
||||
*min_str++= *max_str++= *ptr++;
|
||||
charlen--;
|
||||
}
|
||||
}
|
||||
/* Put contraction tail, or a single character */
|
||||
*min_str++= *max_str++ = ptr[0];
|
||||
*min_str++= *max_str++ = ptr[1];
|
||||
}
|
||||
|
||||
*min_length= *max_length = (size_t) (min_str - min_org);
|
||||
while (min_str + 1 < min_end)
|
||||
{
|
||||
*min_str++ = *max_str++ = '\0';
|
||||
*min_str++ = *max_str++ = ' '; /* Because if key compression */
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
static MY_COLLATION_HANDLER my_collation_ucs2_general_ci_handler =
|
||||
{
|
||||
|
|
@ -3374,7 +3062,7 @@ static MY_COLLATION_HANDLER my_collation_ucs2_general_ci_handler =
|
|||
my_strnncollsp_ucs2,
|
||||
my_strnxfrm_unicode,
|
||||
my_strnxfrmlen_simple,
|
||||
my_like_range_ucs2,
|
||||
my_like_range_generic,
|
||||
my_wildcmp_ucs2_ci,
|
||||
my_strcasecmp_mb2_or_mb4,
|
||||
my_instr_mb,
|
||||
|
|
@ -3390,7 +3078,7 @@ static MY_COLLATION_HANDLER my_collation_ucs2_bin_handler =
|
|||
my_strnncollsp_ucs2_bin,
|
||||
my_strnxfrm_unicode,
|
||||
my_strnxfrmlen_simple,
|
||||
my_like_range_ucs2,
|
||||
my_like_range_generic,
|
||||
my_wildcmp_ucs2_bin,
|
||||
my_strcasecmp_mb2_or_mb4,
|
||||
my_instr_mb,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue