mariadb/strings/ctype-wildcmp.inl
Alexander Barkov 1e889a6e6c MDEV-33621 Unify duplicate code in my_wildcmp_uca_impl() and my_wildcmp_unicode_impl()
This is a refactoring patch, it does not change the behaviour.
The MTR tests are being added only to cover the LIKE predicate better.
(these tests should have been added earlier under terms of MDEV 9711).
This patch does not need its own specific MTR tests.

Moving the duplicate code into a new shared file ctype-wildcmp.inl
and including it from multiple places, to define the following functions:

- my_wildcmp_uca_impl(), in ctype-uca.c

  For utf8mb3, utf8mb4, ucs2, utf16, utf32, using cs->cset->mb_wc().
  For UCA based collations.

- my_wildcmp_mb2_or_mb4_general_ci_impl(), in ctype-ucs2.c:

  For ucs2, utf16, utf32, using cs->cset->mb_wc().
  For general_ci-style collations:
      - xxx_general_ci
      - xxx_general_mysql500_ci
      - xxx_general_nopad_ci

- my_wildcmp_mb2_or_mb4_bin_impl(), in ctype-ucs2.c:

  For ucs2, utf16, utf32, using cs->cset->mb_wc().
  For _bin collations:
      - xxx_bin
      - xxx_nopad_bin

- my_wildcmp_utf8mb3_general_ci_impl(), in ctype-utf8.c

  Optimized for utf8mb3, using my_mb_wc_utf8mb3_quick().

  For general_ci-style collations:
      - utf8mb3_general_ci
      - utf8mb3_general_mysql500_ci
      - utf8mb3_general_nopad_ci

- my_wildcmp_utf8mb4_general_ci_impl(), in ctype-utf8.c

  Optimized for utf8mb4, using my_mb_wc_utf8mb4_quick().

  For general_ci-style collations:
      - utf8mb4_general_ci
      - utf8mb4_general_nopad_ci
2024-03-12 09:33:20 +04:00

177 lines
4.9 KiB
C++

/*
Copyright (c) 2024, MariaDB
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA
*/
#ifndef MY_FUNCTION_NAME
#error MY_FUNCTION_NAME is not defined
#endif
#ifndef MY_MB_WC
#error MY_MB_WC is not defined
#endif
#ifndef MY_CHAR_EQ
#error MY_CHAR_EQ is not defined
#endif
/*
** Compare string against string with wildcard
**
** 0 if matched
** -1 if not matched with wildcard
** 1 if matched with wildcard
*/
static int
MY_FUNCTION_NAME(wildcmp)(CHARSET_INFO *cs,
const char *str, const char *str_end,
const char *wildstr,const char *wildend,
int escape, int w_one, int w_many,
int recurse_level)
{
int result= -1; /* Not found, using wildcards */
my_wc_t s_wc, w_wc;
int scan;
if (my_string_stack_guard && my_string_stack_guard(recurse_level))
return 1;
while (wildstr != wildend)
{
while (1)
{
my_bool escaped= 0;
if ((scan= MY_MB_WC(cs, &w_wc, (const uchar*) wildstr,
(const uchar*) wildend)) <= 0)
return 1;
if (w_wc == (my_wc_t) w_many)
{
result= 1; /* Found an anchor char */
break;
}
wildstr+= scan;
if (w_wc == (my_wc_t) escape && wildstr < wildend)
{
if ((scan= MY_MB_WC(cs, &w_wc, (const uchar*) wildstr,
(const uchar*) wildend)) <= 0)
return 1;
wildstr+= scan;
escaped= 1;
}
if ((scan= MY_MB_WC(cs, &s_wc, (const uchar*) str,
(const uchar*) str_end)) <= 0)
return 1;
str+= scan;
if (!escaped && w_wc == (my_wc_t) w_one)
{
result= 1; /* Found an anchor char */
}
else
{
if (!MY_CHAR_EQ(cs, s_wc, w_wc))
return 1; /* No match */
}
if (wildstr == wildend)
return (str != str_end); /* Match if both are at end */
}
if (w_wc == (my_wc_t) w_many)
{ /* Found w_many */
/* Remove any '%' and '_' from the wild search string */
for ( ; wildstr != wildend ; )
{
if ((scan= MY_MB_WC(cs, &w_wc, (const uchar*) wildstr,
(const uchar*) wildend)) <= 0)
return 1;
if (w_wc == (my_wc_t) w_many)
{
wildstr+= scan;
continue;
}
if (w_wc == (my_wc_t) w_one)
{
wildstr+= scan;
if ((scan= MY_MB_WC(cs, &s_wc, (const uchar*) str,
(const uchar*) str_end)) <= 0)
return 1;
str+= scan;
continue;
}
break; /* Not a wild character */
}
if (wildstr == wildend)
return 0; /* Ok if w_many is last */
if (str == str_end)
return -1;
if ((scan= MY_MB_WC(cs, &w_wc, (const uchar*) wildstr,
(const uchar*) wildend)) <= 0)
return 1;
wildstr+= scan;
if (w_wc == (my_wc_t) escape)
{
if (wildstr < wildend)
{
if ((scan= MY_MB_WC(cs, &w_wc, (const uchar*) wildstr,
(const uchar*) wildend)) <= 0)
return 1;
wildstr+= scan;
}
}
while (1)
{
/* Skip until the first character from wildstr is found */
while (str != str_end)
{
if ((scan= MY_MB_WC(cs, &s_wc, (const uchar*) str,
(const uchar*) str_end)) <= 0)
return 1;
if (MY_CHAR_EQ(cs, s_wc, w_wc))
break;
str+= scan;
}
if (str == str_end)
return -1;
str+= scan;
result= MY_FUNCTION_NAME(wildcmp)(cs,
str, str_end,
wildstr, wildend,
escape, w_one, w_many,
recurse_level + 1);
if (result <= 0)
return result;
}
}
}
return (str != str_end ? 1 : 0);
}
#undef MY_FUNCTION_NAME
#undef MY_MB_WC
#undef MY_CHAR_EQ