/* Copyright (c) 2000, 2014, Oracle and/or its affiliates. Copyright (c) 2009, 2021, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; version 2 of the License. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ #include "strings_def.h" #include #include "ctype-mb.h" #ifdef USE_MB size_t my_caseup_str_mb(CHARSET_INFO * cs, char *str) { register uint32 l; register const uchar *map= cs->to_upper; char *str_orig= str; while (*str) { /* Pointing after the '\0' is safe here. */ if ((l= my_ismbchar(cs, str, str + cs->mbmaxlen))) str+= l; else { *str= (char) map[(uchar)*str]; str++; } } return (size_t) (str - str_orig); } size_t my_casedn_str_mb(CHARSET_INFO * cs, char *str) { register uint32 l; register const uchar *map= cs->to_lower; char *str_orig= str; while (*str) { /* Pointing after the '\0' is safe here. */ if ((l= my_ismbchar(cs, str, str + cs->mbmaxlen))) str+= l; else { *str= (char) map[(uchar)*str]; str++; } } return (size_t) (str - str_orig); } static inline const MY_CASEFOLD_CHARACTER* get_case_info_for_ch(CHARSET_INFO *cs, uint page, uint offs) { const MY_CASEFOLD_CHARACTER *p; return cs->casefold && (p= cs->casefold->page[page]) ? &p[offs] : NULL; } /* Case folding functions for CJK character set. Case conversion can optionally reduce string octet length. For example, in EUCKR, _euckr 0xA9A5 == "LATIN LETTER DOTLESS I" (Turkish letter) is upper-cased to to _euckr 0x49 "LATIN CAPITAL LETTER I" ('usual' letter I) Length is reduced in this example from two bytes to one byte. */ static size_t my_casefold_mb(CHARSET_INFO *cs, const char *src, size_t srclen, char *dst, size_t dstlen __attribute__((unused)), const uchar *map, size_t is_upper) { const char *srcend= src + srclen; char *dst0= dst; DBUG_ASSERT(cs->mbmaxlen == 2); while (src < srcend) { size_t mblen= my_ismbchar(cs, src, srcend); if (mblen) { const MY_CASEFOLD_CHARACTER *ch; if ((ch= get_case_info_for_ch(cs, (uchar) src[0], (uchar) src[1]))) { int code= is_upper ? ch->toupper : ch->tolower; src+= 2; if (code > 0xFF) *dst++= code >> 8; *dst++= code & 0xFF; } else { *dst++= *src++; *dst++= *src++; } } else { *dst++= (char) map[(uchar) *src++]; } } return (size_t) (dst - dst0); } size_t my_casedn_mb(CHARSET_INFO * cs, const char *src, size_t srclen, char *dst, size_t dstlen) { DBUG_ASSERT(dstlen >= srclen * cs->cset->casedn_multiply(cs)); DBUG_ASSERT(src != dst || cs->cset->casedn_multiply(cs) == 1); return my_casefold_mb(cs, src, srclen, dst, dstlen, cs->to_lower, 0); } size_t my_caseup_mb(CHARSET_INFO * cs, const char *src, size_t srclen, char *dst, size_t dstlen) { DBUG_ASSERT(dstlen >= srclen * cs->cset->caseup_multiply(cs)); DBUG_ASSERT(src != dst || cs->cset->caseup_multiply(cs) == 1); return my_casefold_mb(cs, src, srclen, dst, dstlen, cs->to_upper, 1); } /* my_strcasecmp_mb() returns 0 if strings are equal, non-zero otherwise. */ int my_strcasecmp_mb(CHARSET_INFO * cs,const char *s, const char *t) { register uint32 l; register const uchar *map=cs->to_upper; while (*s && *t) { /* Pointing after the '\0' is safe here. */ if ((l=my_ismbchar(cs, s, s + cs->mbmaxlen))) { while (l--) if (*s++ != *t++) return 1; } else if (my_ci_charlen(cs, (const uchar *) t, (const uchar *) t + cs->mbmaxlen) > 1) return 1; else if (map[(uchar) *s++] != map[(uchar) *t++]) return 1; } /* At least one of '*s' and '*t' is zero here. */ return (*t != *s); } /* ** Compare string against string with wildcard ** 0 if matched ** -1 if not matched with wildcard ** 1 if matched with wildcard */ #define INC_PTR(cs,A,B) A+=(my_ismbchar(cs,A,B) ? my_ismbchar(cs,A,B) : 1) #define likeconv(s,A) (uchar) (s)->sort_order[(uchar) (A)] static int my_wildcmp_mb_impl(CHARSET_INFO *cs, const char *str,const char *str_end, const char *wildstr,const char *wildend, int escape, int w_one, int w_many, int recurse_level) { int result= -1; /* Not found, using wildcards */ if (my_string_stack_guard && my_string_stack_guard(recurse_level)) return 1; while (wildstr != wildend) { while (*wildstr != w_many && *wildstr != w_one) { int l; if (*wildstr == escape && wildstr+1 != wildend) wildstr++; if ((l = my_ismbchar(cs, wildstr, wildend))) { if (str+l > str_end || memcmp(str, wildstr, l) != 0) return 1; str += l; wildstr += l; } else if (str == str_end || likeconv(cs,*wildstr++) != likeconv(cs,*str++)) return(1); /* No match */ if (wildstr == wildend) return (str != str_end); /* Match if both are at end */ result=1; /* Found an anchor char */ } if (*wildstr == w_one) { do { if (str == str_end) /* Skip one char if possible */ return (result); INC_PTR(cs,str,str_end); } while (++wildstr < wildend && *wildstr == w_one); if (wildstr == wildend) break; } if (*wildstr == w_many) { /* Found w_many */ uchar cmp; const char* mb = wildstr; int mb_len=0; wildstr++; /* Remove any '%' and '_' from the wild search string */ for (; wildstr != wildend ; wildstr++) { if (*wildstr == w_many) continue; if (*wildstr == w_one) { if (str == str_end) return (-1); INC_PTR(cs,str,str_end); continue; } break; /* Not a wild character */ } if (wildstr == wildend) return(0); /* Ok if w_many is last */ if (str == str_end) return -1; if ((cmp= *wildstr) == escape && wildstr+1 != wildend) cmp= *++wildstr; mb=wildstr; mb_len= my_ismbchar(cs, wildstr, wildend); INC_PTR(cs,wildstr,wildend); /* This is compared trough cmp */ cmp=likeconv(cs,cmp); do { for (;;) { if (str >= str_end) return -1; if (mb_len) { if (str+mb_len <= str_end && memcmp(str, mb, mb_len) == 0) { str += mb_len; break; } } else if (!my_ismbchar(cs, str, str_end) && likeconv(cs,*str) == cmp) { str++; break; } INC_PTR(cs,str, str_end); } { int tmp=my_wildcmp_mb_impl(cs,str,str_end,wildstr,wildend,escape,w_one, w_many, recurse_level + 1); if (tmp <= 0) return (tmp); } } while (str != str_end); return(-1); } } return (str != str_end ? 1 : 0); } int my_wildcmp_mb(CHARSET_INFO *cs, const char *str,const char *str_end, const char *wildstr,const char *wildend, int escape, int w_one, int w_many) { return my_wildcmp_mb_impl(cs, str, str_end, wildstr, wildend, escape, w_one, w_many, 1); } size_t my_numchars_mb(CHARSET_INFO *cs __attribute__((unused)), const char *pos, const char *end) { register size_t count= 0; while (pos < end) { uint mb_len; pos+= (mb_len= my_ismbchar(cs,pos,end)) ? mb_len : 1; count++; } return count; } size_t my_charpos_mb(CHARSET_INFO *cs __attribute__((unused)), const char *pos, const char *end, size_t length) { const char *start= pos; while (length && pos < end) { uint mb_len; pos+= (mb_len= my_ismbchar(cs, pos, end)) ? mb_len : 1; length--; } return (size_t) (length ? end+2-start : pos-start); } /* Append a badly formed piece of string. Bad bytes are fixed to '?'. @param to The destination string @param to_end The end of the destination string @param from The source string @param from_end The end of the source string @param nchars Write not more than "nchars" characters. @param status Copying status, must be previously initialized, e.g. using well_formed_char_length() on the original full source string. */ static size_t my_append_fix_badly_formed_tail(CHARSET_INFO *cs, char *to, char *to_end, const char *from, const char *from_end, size_t nchars, MY_STRCOPY_STATUS *status) { char *to0= to; for ( ; nchars; nchars--) { int chlen; if ((chlen= my_ci_charlen(cs, (const uchar*) from, (const uchar *) from_end)) > 0) { /* Found a valid character */ /* chlen == 1..MBMAXLEN */ DBUG_ASSERT(chlen <= (int) cs->mbmaxlen); if (to + chlen > to_end) goto end; /* Does not fit to "to" */ memcpy(to, from, (size_t) chlen); from+= chlen; to+= chlen; continue; } if (chlen == MY_CS_ILSEQ) /* chlen == 0 */ { DBUG_ASSERT(from < from_end); /* Shouldn't get MY_CS_ILSEQ if empty */ goto bad; } /* Got an incomplete character */ /* chlen == MY_CS_TOOSMALLXXX */ DBUG_ASSERT(chlen >= MY_CS_TOOSMALL6); DBUG_ASSERT(chlen <= MY_CS_TOOSMALL); if (from >= from_end) break; /* End of the source string */ bad: /* Bad byte sequence, or incomplete character found */ if (!status->m_well_formed_error_pos) status->m_well_formed_error_pos= from; if ((chlen= my_ci_wc_mb(cs, '?', (uchar*) to, (uchar *) to_end)) <= 0) break; /* Question mark does not fit into the destination */ to+= chlen; from++; } end: status->m_source_end_pos= from; return to - to0; } size_t my_copy_fix_mb(CHARSET_INFO *cs, char *dst, size_t dst_length, const char *src, size_t src_length, size_t nchars, MY_STRCOPY_STATUS *status) { size_t well_formed_nchars; size_t well_formed_length; size_t fixed_length; size_t min_length= MY_MIN(src_length, dst_length); well_formed_nchars= my_ci_well_formed_char_length(cs, src, src + min_length, nchars, status); DBUG_ASSERT(well_formed_nchars <= nchars); well_formed_length= status->m_source_end_pos - src; if (well_formed_length) memmove(dst, src, well_formed_length); if (!status->m_well_formed_error_pos) return well_formed_length; fixed_length= my_append_fix_badly_formed_tail(cs, dst + well_formed_length, dst + dst_length, src + well_formed_length, src + src_length, nchars - well_formed_nchars, status); return well_formed_length + fixed_length; } uint my_instr_mb(CHARSET_INFO *cs, const char *b, size_t b_length, const char *s, size_t s_length, my_match_t *match, uint nmatch) { register const char *end, *b0; int res= 0; if (s_length <= b_length) { if (!s_length) { if (nmatch) { match->beg= 0; match->end= 0; match->mb_len= 0; } return 1; /* Empty string is always found */ } b0= b; end= b+b_length-s_length+1; while (b < end) { int mb_len; if (!my_ci_strnncoll(cs, (const uchar *) b, s_length, (const uchar *) s, s_length, 0)) { if (nmatch) { match[0].beg= 0; match[0].end= (uint) (b-b0); match[0].mb_len= res; if (nmatch > 1) { match[1].beg= match[0].end; match[1].end= (uint)(match[0].end+s_length); match[1].mb_len= 0; /* Not computed */ } } return 2; } mb_len= (mb_len= my_ismbchar(cs, b, end)) ? mb_len : 1; b+= mb_len; b_length-= mb_len; res++; } } return 0; } /* Copy one non-ascii character. "dst" must have enough room for the character. Note, we don't use sort_order[] in this macros. This is correct even for case insensitive collations: - basic Latin letters are processed outside this macros; - for other characters sort_order[x] is equal to x. */ #define my_strnxfrm_mb_non_ascii_char(cs, dst, src, se) \ { \ switch (my_ismbchar(cs, (const char *) src, (const char *) se)) { \ case 4: \ *dst++= *src++; \ /* fall through */ \ case 3: \ *dst++= *src++; \ /* fall through */ \ case 2: \ *dst++= *src++; \ /* fall through */ \ case 0: \ *dst++= *src++; /* byte in range 0x80..0xFF which is not MB head */ \ } \ } /* For character sets with two or three byte multi-byte characters having multibyte weights *equal* to their codes: cp932, euckr, gb2312, sjis, eucjpms, ujis. */ size_t my_strnxfrm_mb_internal(CHARSET_INFO *cs, uchar *dst, uchar *de, uint *nweights, const uchar *src, size_t srclen) { uchar *d0= dst; const uchar *se= src + srclen; const uchar *sort_order= cs->sort_order; DBUG_ASSERT(cs->mbmaxlen <= 4); /* If "srclen" is smaller than both "dstlen" and "nweights" then we can run a simplified loop - without checking "nweights" and "de". */ if (de >= d0 + srclen && *nweights >= srclen) { if (sort_order) { /* Optimized version for a case insensitive collation */ for (; src < se; (*nweights)--) { if (*src < 128) /* quickly catch ASCII characters */ *dst++= sort_order[*src++]; else my_strnxfrm_mb_non_ascii_char(cs, dst, src, se); } } else { /* Optimized version for a case sensitive collation (no sort_order) */ for (; src < se; (*nweights)--) { if (*src < 128) /* quickly catch ASCII characters */ *dst++= *src++; else my_strnxfrm_mb_non_ascii_char(cs, dst, src, se); } } goto end; } /* A thourough loop, checking all possible limits: "se", "nweights" and "de". */ for (; src < se && *nweights && dst < de; (*nweights)--) { int chlen; if (*src < 128 || !(chlen= my_ismbchar(cs, (const char *) src, (const char *) se))) { /* Single byte character */ *dst++= sort_order ? sort_order[*src++] : *src++; } else { /* Multi-byte character */ size_t len= (dst + chlen <= de) ? chlen : de - dst; memcpy(dst, src, len); dst+= len; src+= len; } } end: return dst - d0; } size_t my_strnxfrm_mb(CHARSET_INFO *cs, uchar *dst, size_t dstlen, uint nweights, const uchar *src, size_t srclen, uint flags) { uchar *de= dst + dstlen; uchar *d0= dst; dst= d0 + my_strnxfrm_mb_internal(cs, dst, de, &nweights, src, srclen); return my_strxfrm_pad_desc_and_reverse(cs, d0, dst, de, nweights, flags, 0); } size_t my_strnxfrm_mb_nopad(CHARSET_INFO *cs, uchar *dst, size_t dstlen, uint nweights, const uchar *src, size_t srclen, uint flags) { uchar *de= dst + dstlen; uchar *d0= dst; dst= d0 + my_strnxfrm_mb_internal(cs, dst, de, &nweights, src, srclen); return my_strxfrm_pad_desc_and_reverse_nopad(cs, d0, dst, de, nweights, flags, 0); } int my_strcasecmp_mb_bin(CHARSET_INFO * cs __attribute__((unused)), const char *s, const char *t) { return strcmp(s,t); } void my_hash_sort_mb_nopad_bin(CHARSET_INFO *cs __attribute__((unused)), const uchar *key, size_t len,ulong *nr1, ulong *nr2) { register ulong m1= *nr1, m2= *nr2; const uchar *end= key + len; for (; key < end ; key++) { MY_HASH_ADD(m1, m2, (uint)*key); } *nr1= m1; *nr2= m2; } void my_hash_sort_mb_bin(CHARSET_INFO *cs __attribute__((unused)), const uchar *key, size_t len,ulong *nr1, ulong *nr2) { /* Remove trailing spaces. We have to do this to be able to compare 'A ' and 'A' as identical */ const uchar *end= skip_trailing_space(key, len); my_hash_sort_mb_nopad_bin(cs, key, end - key, nr1, nr2); } static inline size_t my_repeat_char_native(CHARSET_INFO *cs, uchar *dst, size_t dst_size, size_t nchars, my_wc_t native_code) { uchar *dst0= dst; uchar *dstend= dst + dst_size; int chlen= my_ci_native_to_mb(cs, native_code, dst, dstend); if (chlen < 1 /* Not enough space */ || !nchars) return 0; for (dst+= chlen, nchars--; dst + chlen <= dstend && nchars > 0; dst+= chlen, nchars--) memcpy(dst, dst0, chlen); return dst - dst0; } size_t my_min_str_mb_simple(CHARSET_INFO *cs, uchar *dst, size_t dst_size, size_t nchars) { return my_repeat_char_native(cs, dst, dst_size, nchars, cs->min_sort_char); } size_t my_min_str_mb_simple_nopad(CHARSET_INFO *cs, uchar *dst, size_t dst_size, size_t nchars) { /* For NOPAD collations, the empty string is the smallest possible */ return 0; } size_t my_max_str_mb_simple(CHARSET_INFO *cs, uchar *dst, size_t dst_size, size_t nchars) { return my_repeat_char_native(cs, dst, dst_size, nchars, cs->max_sort_char); } /* Fill the given buffer with 'maximum character' for given charset SYNOPSIS pad_max_char() cs Character set str Start of buffer to fill end End of buffer to fill DESCRIPTION Write max key: - for non-Unicode character sets: just bfill using max_sort_char if max_sort_char is one byte. In case when max_sort_char is two bytes, fill with double-byte pairs and optionally pad with a single space character. - for Unicode character set (utf-8): create a buffer with multibyte representation of the max_sort_char character, and copy it into max_str in a loop. */ static void pad_max_char(CHARSET_INFO *cs, char *str, char *end) { char buf[10]; char buflen= my_ci_native_to_mb(cs, cs->max_sort_char, (uchar*) buf, (uchar*) buf + sizeof(buf)); DBUG_ASSERT(buflen > 0); do { if ((str + buflen) <= end) { /* Enough space for the character */ memcpy(str, buf, buflen); str+= buflen; } else { /* There is no space for whole multibyte character, then add trailing spaces. */ *str++= ' '; } } while (str < end); } /* ** Calculate min_str and max_str that ranges a LIKE string. ** Arguments: ** ptr Pointer to LIKE string. ** ptr_length Length of LIKE string. ** escape Escape character in LIKE. (Normally '\'). ** All escape characters should be removed from min_str and max_str ** res_length Length of min_str and max_str. ** min_str Smallest case sensitive string that ranges LIKE. ** Should be space padded to res_length. ** max_str Largest case sensitive string that ranges LIKE. ** Normally padded with the biggest character sort value. ** ** The function should return 0 if ok and 1 if the LIKE string can't be ** optimized ! */ my_bool my_like_range_mb(CHARSET_INFO *cs, const char *ptr,size_t ptr_length, pbool escape, pbool w_one, pbool w_many, size_t res_length, char *min_str,char *max_str, size_t *min_length,size_t *max_length) { uint mb_len; const char *end= ptr + ptr_length; char *min_org= min_str; char *min_end= min_str + res_length; char *max_end= max_str + res_length; size_t maxcharlen= res_length / cs->mbmaxlen; const MY_CONTRACTIONS *contractions= my_charset_get_contractions(cs, 0); for (; ptr != end && min_str != min_end && maxcharlen ; maxcharlen--) { /* We assume here that escape, w_any, w_namy are one-byte characters */ if (*ptr == escape && ptr+1 != end) ptr++; /* Skip escape */ else if (*ptr == w_one || *ptr == w_many) /* '_' and '%' in SQL */ { fill_max_and_min: /* Calculate length of keys: 'a\0\0... is the smallest possible string when we have space expand a\ff\ff... is the biggest possible string */ *min_length= (cs->state & (MY_CS_BINSORT | MY_CS_NOPAD)) ? (size_t) (min_str - min_org) : res_length; /* Create min key */ do { *min_str++= (char) cs->min_sort_char; } while (min_str != min_end); /* Write max key: create a buffer with multibyte representation of the max_sort_char character, and copy it into max_str in a loop. */ *max_length= res_length; pad_max_char(cs, max_str, max_end); return 0; } if ((mb_len= my_ismbchar(cs, ptr, end)) > 1) { if (ptr+mb_len > end || min_str+mb_len > min_end) break; while (mb_len--) *min_str++= *max_str++= *ptr++; } else { /* Special case for collations with contractions. For example, in Chezh, 'ch' is a separate letter which is sorted between 'h' and 'i'. If the pattern 'abc%', 'c' at the end can mean: - letter 'c' itself, - beginning of the contraction 'ch'. If we simply return this LIKE range: 'abc\min\min\min' and 'abc\max\max\max' then this query: SELECT * FROM t1 WHERE a LIKE 'abc%' will only find values starting from 'abc[^h]', but won't find values starting from 'abch'. We must ignore contraction heads followed by w_one or w_many. ('Contraction head' means any letter which can be the first letter in a contraction) For example, for Czech 'abc%', we will return LIKE range, which is equal to LIKE range for 'ab%': 'ab\min\min\min\min' and 'ab\max\max\max\max'. */ if (contractions && ptr + 1 < end && my_uca_can_be_contraction_head(contractions, (uchar) *ptr)) { /* Ptr[0] is a contraction head. */ if (ptr[1] == w_one || ptr[1] == w_many) { /* Contraction head followed by a wildcard, quit. */ goto fill_max_and_min; } /* Some letters can be both contraction heads and contraction tails. For example, in Danish 'aa' is a separate single letter which is sorted after 'z'. So 'a' can be both head and tail. If ptr[0]+ptr[1] is a contraction, then put both letters together. If ptr[1] can be a contraction part, but ptr[0]+ptr[1] is not a contraction, then we put only ptr[0], and continue with ptr[1] on the next loop. */ if (my_uca_can_be_contraction_tail(contractions, (uchar) ptr[1]) && my_uca_contraction2_weight(contractions, (uchar) ptr[0], ptr[1])) { /* Contraction found */ if (maxcharlen == 1 || min_str + 1 >= min_end) { /* Both contraction parts don't fit, quit */ goto fill_max_and_min; } /* Put contraction head */ *min_str++= *max_str++= *ptr++; maxcharlen--; } } /* Put contraction tail, or a single character */ *min_str++= *max_str++= *ptr++; } } *min_length= *max_length = (size_t) (min_str - min_org); while (min_str != min_end) *min_str++= *max_str++= ' '; /* Because if key compression */ return 0; } /** Calculate min_str and max_str that ranges a LIKE string. Generic function, currently used for ucs2, utf16, utf32, but should be suitable for any other character sets with cs->min_sort_char and cs->max_sort_char represented in Unicode code points. @param cs Character set and collation pointer @param ptr Pointer to LIKE pattern. @param ptr_length Length of LIKE pattern. @param escape Escape character pattern, typically '\'. @param w_one 'One character' pattern, typically '_'. @param w_many 'Many characters' pattern, typically '%'. @param res_length Length of min_str and max_str. @param[out] min_str Smallest string that ranges LIKE. @param[out] max_str Largest string that ranges LIKE. @param[out] min_len Length of min_str @param[out] max_len Length of max_str @return Optimization status. @retval FALSE if LIKE pattern can be optimized @rerval TRUE if LIKE can't be optimized. */ my_bool my_like_range_generic(CHARSET_INFO *cs, const char *ptr, size_t ptr_length, pbool escape, pbool w_one, pbool w_many, size_t res_length, char *min_str,char *max_str, size_t *min_length,size_t *max_length) { const char *end= ptr + ptr_length; const char *min_org= min_str; const char *max_org= max_str; char *min_end= min_str + res_length; char *max_end= max_str + res_length; size_t charlen= res_length / cs->mbmaxlen; size_t res_length_diff; const MY_CONTRACTIONS *contractions= my_charset_get_contractions(cs, 0); for ( ; charlen > 0; charlen--) { my_wc_t wc, wc2; int res; if ((res= my_ci_mb_wc(cs, &wc, (uchar*) ptr, (uchar*) end)) <= 0) { if (res == MY_CS_ILSEQ) /* Bad sequence */ return TRUE; /* min_length and max_length are not important */ break; /* End of the string */ } ptr+= res; if (wc == (my_wc_t) escape) { if ((res= my_ci_mb_wc(cs, &wc, (uchar*) ptr, (uchar*) end)) <= 0) { if (res == MY_CS_ILSEQ) return TRUE; /* min_length and max_length are not important */ /* End of the string: Escape is the last character. Put escape as a normal character. We'll will leave the loop on the next iteration. */ } else ptr+= res; /* Put escape character to min_str and max_str */ if ((res= my_ci_wc_mb(cs, wc, (uchar*) min_str, (uchar*) min_end)) <= 0) goto pad_set_lengths; /* No space */ min_str+= res; if ((res= my_ci_wc_mb(cs, wc, (uchar*) max_str, (uchar*) max_end)) <= 0) goto pad_set_lengths; /* No space */ max_str+= res; continue; } else if (wc == (my_wc_t) w_one) { if ((res= my_ci_wc_mb(cs, cs->min_sort_char, (uchar*) min_str, (uchar*) min_end)) <= 0) goto pad_set_lengths; min_str+= res; if ((res= my_ci_wc_mb(cs, cs->max_sort_char, (uchar*) max_str, (uchar*) max_end)) <= 0) goto pad_set_lengths; max_str+= res; continue; } else if (wc == (my_wc_t) w_many) { /* Calculate length of keys: a\min\min... is the smallest possible string a\max\max... is the biggest possible string */ *min_length= (cs->state & (MY_CS_BINSORT | MY_CS_NOPAD)) ? (size_t) (min_str - min_org) : res_length; *max_length= res_length; goto pad_min_max; } if (contractions && my_uca_can_be_contraction_head(contractions, wc) && (res= my_ci_mb_wc(cs, &wc2, (uchar*) ptr, (uchar*) end)) > 0) { const uint16 *weight; if ((wc2 == (my_wc_t) w_one || wc2 == (my_wc_t) w_many)) { /* Contraction head followed by a wildcard */ *min_length= *max_length= res_length; goto pad_min_max; } if (my_uca_can_be_contraction_tail(contractions, wc2) && (weight= my_uca_contraction2_weight(contractions, wc, wc2)) && weight[0]) { /* Contraction found */ if (charlen == 1) { /* contraction does not fit to result */ *min_length= *max_length= res_length; goto pad_min_max; } ptr+= res; charlen--; /* Put contraction head */ if ((res= my_ci_wc_mb(cs, wc, (uchar*) min_str, (uchar*) min_end)) <= 0) goto pad_set_lengths; min_str+= res; if ((res= my_ci_wc_mb(cs, wc, (uchar*) max_str, (uchar*) max_end)) <= 0) goto pad_set_lengths; max_str+= res; wc= wc2; /* Prepare to put contraction tail */ } } /* Normal character, or contraction tail */ if ((res= my_ci_wc_mb(cs, wc, (uchar*) min_str, (uchar*) min_end)) <= 0) goto pad_set_lengths; min_str+= res; if ((res= my_ci_wc_mb(cs, wc, (uchar*) max_str, (uchar*) max_end)) <= 0) goto pad_set_lengths; max_str+= res; } pad_set_lengths: *min_length= (size_t) (min_str - min_org); *max_length= (size_t) (max_str - max_org); pad_min_max: /* Fill up max_str and min_str to res_length. fill() cannot set incomplete characters and requires that "length" argument is divisible to mbminlen. Make sure to call fill() with proper "length" argument. */ res_length_diff= res_length % cs->mbminlen; my_ci_fill(cs, min_str, min_end - min_str - res_length_diff, cs->min_sort_char); my_ci_fill(cs, max_str, max_end - max_str - res_length_diff, cs->max_sort_char); /* In case of incomplete characters set the remainder to 0x00's */ if (res_length_diff) { /* Example: odd res_length for ucs2 */ memset(min_end - res_length_diff, 0, res_length_diff); memset(max_end - res_length_diff, 0, res_length_diff); } return FALSE; } static int my_wildcmp_mb_bin_impl(CHARSET_INFO *cs, const char *str,const char *str_end, const char *wildstr,const char *wildend, int escape, int w_one, int w_many, int recurse_level) { int result= -1; /* Not found, using wildcards */ if (my_string_stack_guard && my_string_stack_guard(recurse_level)) return 1; while (wildstr != wildend) { while (*wildstr != w_many && *wildstr != w_one) { int l; if (*wildstr == escape && wildstr+1 != wildend) wildstr++; if ((l = my_ismbchar(cs, wildstr, wildend))) { if (str+l > str_end || memcmp(str, wildstr, l) != 0) return 1; str += l; wildstr += l; } else if (str == str_end || *wildstr++ != *str++) return(1); /* No match */ if (wildstr == wildend) return (str != str_end); /* Match if both are at end */ result=1; /* Found an anchor char */ } if (*wildstr == w_one) { do { if (str == str_end) /* Skip one char if possible */ return (result); INC_PTR(cs,str,str_end); } while (++wildstr < wildend && *wildstr == w_one); if (wildstr == wildend) break; } if (*wildstr == w_many) { /* Found w_many */ int cmp; const char* mb = wildstr; int mb_len=0; wildstr++; /* Remove any '%' and '_' from the wild search string */ for (; wildstr != wildend ; wildstr++) { if (*wildstr == w_many) continue; if (*wildstr == w_one) { if (str == str_end) return (-1); INC_PTR(cs,str,str_end); continue; } break; /* Not a wild character */ } if (wildstr == wildend) return(0); /* Ok if w_many is last */ if (str == str_end) return -1; if ((cmp= *wildstr) == escape && wildstr+1 != wildend) cmp= *++wildstr; mb=wildstr; mb_len= my_ismbchar(cs, wildstr, wildend); INC_PTR(cs,wildstr,wildend); /* This is compared trough cmp */ do { for (;;) { if (str >= str_end) return -1; if (mb_len) { if (str+mb_len <= str_end && memcmp(str, mb, mb_len) == 0) { str += mb_len; break; } } else if (!my_ismbchar(cs, str, str_end) && *str == cmp) { str++; break; } INC_PTR(cs,str, str_end); } { int tmp=my_wildcmp_mb_bin_impl(cs,str,str_end, wildstr,wildend,escape, w_one,w_many, recurse_level+1); if (tmp <= 0) return (tmp); } } while (str != str_end); return(-1); } } return (str != str_end ? 1 : 0); } int my_wildcmp_mb_bin(CHARSET_INFO *cs, const char *str,const char *str_end, const char *wildstr,const char *wildend, int escape, int w_one, int w_many) { return my_wildcmp_mb_bin_impl(cs, str, str_end, wildstr, wildend, escape, w_one, w_many, 1); } /* Data was produced from EastAsianWidth.txt using utt11-dump utility. */ static const char pg11[256]= { 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,1, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 }; static const char pg23[256]= { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 }; static const char pg2E[256]= { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0 }; static const char pg2F[256]= { 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0 }; static const char pg30[256]= { 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0, 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1 }; static const char pg31[256]= { 0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1 }; static const char pg32[256]= { 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0 }; static const char pg4D[256]= { 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 }; static const char pg9F[256]= { 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 }; static const char pgA4[256]= { 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 }; static const char pgD7[256]= { 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 }; static const char pgFA[256]= { 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 }; static const char pgFE[256]= { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 }; static const char pgFF[256]= { 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 }; static const struct {int page; const char *p;} utr11_data[256]= { {0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL}, {0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL}, {0,NULL},{0,pg11},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL}, {0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL}, {0,NULL},{0,NULL},{0,NULL},{0,pg23},{0,NULL},{0,NULL},{0,NULL},{0,NULL}, {0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,pg2E},{0,pg2F}, {0,pg30},{0,pg31},{0,pg32},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL}, {1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL}, {1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL}, {1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{0,pg4D},{1,NULL},{1,NULL}, {1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL}, {1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL}, {1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL}, {1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL}, {1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL}, {1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL}, {1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL}, {1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL}, {1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL}, {1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{0,pg9F}, {1,NULL},{1,NULL},{1,NULL},{1,NULL},{0,pgA4},{0,NULL},{0,NULL},{0,NULL}, {0,NULL},{0,NULL},{0,NULL},{0,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL}, {1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL}, {1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL}, {1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL}, {1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL}, {1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{0,pgD7}, {0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL}, {0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL}, {0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL}, {0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL}, {0,NULL},{1,NULL},{0,pgFA},{0,NULL},{0,NULL},{0,NULL},{0,pgFE},{0,pgFF} }; size_t my_numcells_mb(CHARSET_INFO *cs, const char *b, const char *e) { my_wc_t wc; size_t clen= 0; while (b < e) { int mb_len; uint pg; if ((mb_len= my_ci_mb_wc(cs, &wc, (uchar*) b, (uchar*) e)) <= 0) { mb_len= 1; /* Let's think a wrong sequence takes 1 dysplay cell */ b++; continue; } b+= mb_len; if (wc > 0xFFFF) { if (wc >= 0x20000 && wc <= 0x3FFFD) /* CJK Ideograph Extension B, C */ clen+= 1; } else { pg= (wc >> 8) & 0xFF; clen+= utr11_data[pg].p ? utr11_data[pg].p[wc & 0xFF] : utr11_data[pg].page; } clen++; } return clen; } int my_mb_ctype_mb(CHARSET_INFO *cs, int *ctype, const uchar *s, const uchar *e) { my_wc_t wc; int res= my_ci_mb_wc(cs, &wc, s, e); if (res <= 0 || wc > 0xFFFF) *ctype= 0; else *ctype= my_uni_ctype[wc>>8].ctype ? my_uni_ctype[wc>>8].ctype[wc&0xFF] : my_uni_ctype[wc>>8].pctype; return res; } #endif