mirror of
				https://github.com/MariaDB/server.git
				synced 2025-10-31 02:46:29 +01:00 
			
		
		
		
	 10c063f9f0
			
		
	
	
	10c063f9f0
	
	
	
		
			
			Fixing the code adding MySQL _0900_ collations as _uca1400_ aliases
not to perform deep initialization of the corresponding _uca1400_
collations.
Only basic initialization is now performed which allows to watch
these collations (both _0900_ and _uca1400_) in queries to
INFORMATION_SCHEMA tables COLLATIONS and
COLLATION_CHARACTER_SET_APPLICABILITY,
as well as in SHOW COLLATION statements.
Deep initialization is now performed only when a collation
(either the _0900_ alias or the corresponding  _uca1400_ collation)
is used for the very first time after the server startup.
Refactoring was done to maintain the code easier:
- most of the _uca1400_ code was moved from ctype-uca.c
  to a new file ctype-uca1400.c
- most of the _0900_ code was moved from type-uca.c
  to a new file ctype-uca0900.c
Change details:
- The original function add_alias_for_collation() added by the patch for
   "MDEV-20912 Add support for utf8mb4_0900_* collations in MariaDB Server"
  was removed from mysys/charset.c, as it had two two problems:
  a. it forced deep initialization of the _uca1400_ collations
     when adding _0900_ aliases for them at the server startup
     (the main reported problem)
  b. the collation initialization code in add_alias_for_collation()
     was related more to collations rather than to memory management,
     so /strings should be a better place for it than /mysys.
  The code from add_alias_for_collation() was split into separate functions.
  Cyclic dependency was removed. `#include <my_sys.h>` was removed
  from /strings/ctype-uca.c. Collations are now added using a callback
  function MY_CHARSET_LOADED::add_collation, like it is done for
  user collations defined in Index.xml. The code in /mysys sets
  MY_CHARSET_LOADED::add_collation to add_compiled_collation().
- The function compare_collations() was removed.
  A new virtual function was added into my_collation_handler_st instead:
    my_bool (*eq_collation)(CHARSET_INFO *self, CHARSET_INFO *other);
  because it is the collation handler who knows how to detect equal
  collations by comparing only some of CHARSET_INFO members without
  their deep initialization.
  Three implementations were added:
  - my_ci_eq_collation_uca() for UCA collations, it compares
    _0900_ collations as equal to their corresponding _uca1400_ collations.
  - my_ci_eq_collation_utf8mb4_bin(), it compares
    utf8mb4_nopad_bin and utf8mb4_0900_bin as equal.
  - my_ci_eq_collation_generic() - the default implementation,
    which compares all collations as not equal.
  A C++ wrapper CHARSET_INFO::eq_collations() was added.
  The code in /sql was changes to use the wrapper instead of
  the former calls for the removed function compare_collations().
- A part of add_alias_for_collation() was moved into a new function
  my_ci_alloc(). It allocates a memory for a new charset_info_st
  instance together with the collation name and the comment using a single
  MY_CHARSET_LOADER::once_alloc call, which points to my_once_alloc()
  in the server.
- A part of add_alias_for_collation() was moved into a new function
  my_ci_make_comment_for_alias(). It makes an "Alias for xxx" string,
  e.g. "Alias for utf8mb4_uca1400_swedish_ai_ci" in case of
  utf8mb4_sv_0900_ai_ci.
- A part of the code in create_tailoring() was moved to
  a new function my_uca1400_collation_get_initialized_shared_uca(),
  to reuse the code between _uca1400_ and _0900_ collations.
- A new function my_collation_id_is_mysql_uca0900() was added
  in addition to my_collation_id_is_mysql_uca1400().
- Functions to build collation names were added:
   my_uca0900_collation_build_name()
   my_uca1400_collation_build_name()
- A shared function function was added:
  my_bool
  my_uca1400_collation_alloc_and_init(MY_CHARSET_LOADER *loader,
                                      LEX_CSTRING name,
                                      LEX_CSTRING comment,
                                      const uca_collation_def_param_t *param,
                                      uint id)
  It's reused to add _uca1400_ and _0900_ collations, with basic
  initialization (without deep initialization).
- The function add_compiled_collation() changed its return type from
  void to int, to make it compatible with MY_CHARSET_LOADER::add_collation.
- Functions mysql_uca0900_collation_definition_add(),
  mysql_uca0900_utf8mb4_collation_definitions_add(),
  mysql_utf8mb4_0900_bin_add() were added into ctype-uca0900.c.
  They get MY_CHARSET_LOADER as a parameter.
- Functions my_uca1400_collation_definition_add(),
  my_uca1400_collation_definitions_add() were moved from
  charset-def.c to strings/ctype-uca1400.c.
  The latter now accepts MY_CHARSET_LOADER as the first parameter
  instead of initializing a MY_CHARSET_LOADER inside.
- init_compiled_charsets() now initializes a MY_CHARSET_LOADER
  variable and passes it to all functions adding collations:
  - mysql_utf8mb4_0900_collation_definitions_add()
  - mysql_uca0900_utf8mb4_collation_definitions_add()
  - mysql_utf8mb4_0900_bin_add()
- A new structure was added into ctype-uca.h:
  typedef struct uca_collation_def_param
  {
    my_cs_encoding_t cs_id;
    uint tailoring_id;
    uint nopad_flags;
    uint level_flags;
  } uca_collation_def_param_t;
  It simplifies reusing the code for _uca1400_ and _0900_ collations.
- The definition of MY_UCA1400_COLLATION_DEFINITION was
  moved from ctype-uca.c to ctype-uca1400.h, to reuse
  the code for _uca1400_ and _0900_ collations.
- The definitions of "MY_UCA_INFO my_uca_v1400" and
  "MY_UCA_INFO my_uca1400_info_tailored[][]" were moved from
  ctype-uca.c to ctype-uca1400.c.
- The definitions/declarations of:
  - mysql_0900_collation_start,
  - struct mysql_0900_to_mariadb_1400_mapping
  - mysql_0900_to_mariadb_1400_mapping
  - mysql_utf8mb4_0900_collation_definitions_add()
  were moved from ctype-uca.c to ctype-uca0900.c
- Functions
  my_uca1400_make_builtin_collation_id()
  my_uca1400_collation_definition_init()
  my_uca1400_collation_id_uca400_compat()
  my_ci_get_collation_name_uca1400_context()
  were moved from ctype-uca.c to ctype-uca1400.c and ctype-uca1400.h
- A part of my_uca1400_collation_definition_init()
  was moved into my_uca0520_builtin_collation_by_id(),
  to make functions smaller.
		
	
			
		
			
				
	
	
		
			2219 lines
		
	
	
	
		
			52 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			2219 lines
		
	
	
	
		
			52 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| /* Copyright (c) 2002, 2013, Oracle and/or its affiliates.
 | |
|    Copyright (c) 2009, 2020, MariaDB Corporation.
 | |
| 
 | |
|    This program is free software; you can redistribute it and/or modify
 | |
|    it under the terms of the GNU General Public License as published by
 | |
|    the Free Software Foundation; version 2 of the License.
 | |
| 
 | |
|    This program is distributed in the hope that it will be useful,
 | |
|    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | |
|    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | |
|    GNU General Public License for more details.
 | |
| 
 | |
|    You should have received a copy of the GNU General Public License
 | |
|    along with this program; if not, write to the Free Software
 | |
|    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335  USA */
 | |
| 
 | |
| #include "strings_def.h"
 | |
| #include <m_ctype.h>
 | |
| #include "ctype-simple.h"
 | |
| #include "my_sys.h"  /* Needed for MY_ERRNO_ERANGE */
 | |
| #include <errno.h>
 | |
| 
 | |
| #include "stdarg.h"
 | |
| #include "my_bit.h"
 | |
| 
 | |
| /*
 | |
|   Returns the number of bytes required for strnxfrm().
 | |
| */
 | |
| 
 | |
| size_t my_strnxfrmlen_simple(CHARSET_INFO *cs, size_t len)
 | |
| {
 | |
|   return len * (cs->strxfrm_multiply ? cs->strxfrm_multiply : 1);
 | |
| }
 | |
| 
 | |
| 
 | |
| /*
 | |
|   Converts a string into its sort key.
 | |
|   
 | |
|   SYNOPSIS
 | |
|      my_strnxfrm_xxx()
 | |
|      
 | |
|   IMPLEMENTATION
 | |
|      
 | |
|      The my_strxfrm_xxx() function transforms a string pointed to by
 | |
|      'src' with length 'srclen' according to the charset+collation 
 | |
|      pair 'cs' and copies the result key into 'dest'.
 | |
|      
 | |
|      Comparing two strings using memcmp() after my_strnxfrm_xxx()
 | |
|      is equal to comparing two original strings with my_strnncollsp_xxx().
 | |
|      
 | |
|      Not more than 'dstlen' bytes are written into 'dst'.
 | |
|      To guarantee that the whole string is transformed, 'dstlen' must be
 | |
|      at least srclen*cs->strnxfrm_multiply bytes long. Otherwise,
 | |
|      consequent memcmp() may return a non-accurate result.
 | |
|      
 | |
|      If the source string is too short to fill whole 'dstlen' bytes,
 | |
|      then the 'dest' string is padded up to 'dstlen', ensuring that:
 | |
|      
 | |
|        "a"  == "a "
 | |
|        "a\0" < "a"
 | |
|        "a\0" < "a "
 | |
|      
 | |
|      my_strnxfrm_simple() is implemented for 8bit charsets and
 | |
|      simple collations with one-to-one string->key transformation.
 | |
|      
 | |
|      See also implementations for various charsets/collations in  
 | |
|      other ctype-xxx.c files.
 | |
|      
 | |
|   RETURN
 | |
|   
 | |
|     Target len 'dstlen'.
 | |
|   
 | |
| */
 | |
| 
 | |
| 
 | |
| size_t my_strnxfrm_simple_internal(CHARSET_INFO * cs,
 | |
|                                    uchar *dst, size_t dstlen, uint *nweights,
 | |
|                                    const uchar *src, size_t srclen)
 | |
| {
 | |
|   const uchar *map= cs->sort_order;
 | |
|   uchar *d0= dst;
 | |
|   uint frmlen;
 | |
|   if ((frmlen= (uint)MY_MIN(dstlen, *nweights)) > srclen)
 | |
|     frmlen= (uint)srclen;
 | |
|   if (dst != src)
 | |
|   {
 | |
|     const uchar *end;
 | |
|     for (end= src + frmlen; src < end;)
 | |
|       *dst++= map[*src++];
 | |
|   }
 | |
|   else
 | |
|   {
 | |
|     const uchar *end;
 | |
|     for (end= dst + frmlen; dst < end; dst++)
 | |
|       *dst= map[(uchar) *dst];
 | |
|   }
 | |
|   *nweights-= frmlen;
 | |
|   return dst - d0;
 | |
| }
 | |
| 
 | |
| 
 | |
| size_t my_strnxfrm_simple(CHARSET_INFO * cs,
 | |
|                           uchar *dst, size_t dstlen, uint nweights,
 | |
|                           const uchar *src, size_t srclen, uint flags)
 | |
| {
 | |
|   uchar *d0= dst;
 | |
|   dst= d0 + my_strnxfrm_simple_internal(cs, dst, dstlen, &nweights,
 | |
|                                         src, srclen);
 | |
|   return my_strxfrm_pad_desc_and_reverse(cs, d0, dst, d0 + dstlen,
 | |
|                                          nweights, flags, 0);
 | |
| }
 | |
| 
 | |
| 
 | |
| size_t my_strnxfrm_simple_nopad(CHARSET_INFO * cs,
 | |
|                                 uchar *dst, size_t dstlen, uint nweights,
 | |
|                                 const uchar *src, size_t srclen, uint flags)
 | |
| {
 | |
|   uchar *d0= dst;
 | |
|   dst= d0 + my_strnxfrm_simple_internal(cs, dst, dstlen, &nweights,
 | |
|                                         src, srclen);
 | |
|   return my_strxfrm_pad_desc_and_reverse_nopad(cs, d0, dst, d0 + dstlen,
 | |
|                                                nweights, flags, 0);
 | |
| }
 | |
| 
 | |
| 
 | |
| int my_strnncoll_simple(CHARSET_INFO * cs, const uchar *s, size_t slen, 
 | |
|                         const uchar *t, size_t tlen,
 | |
|                         my_bool t_is_prefix)
 | |
| {
 | |
|   size_t len = ( slen > tlen ) ? tlen : slen;
 | |
|   const uchar *map= cs->sort_order;
 | |
|   if (t_is_prefix && slen > tlen)
 | |
|     slen=tlen;
 | |
|   while (len--)
 | |
|   {
 | |
|     if (map[*s++] != map[*t++])
 | |
|       return ((int) map[s[-1]] - (int) map[t[-1]]);
 | |
|   }
 | |
|   /*
 | |
|     We can't use (slen - tlen) here as the result may be outside of the
 | |
|     precision of a signed int
 | |
|   */
 | |
|   return slen > tlen ? 1 : slen < tlen ? -1 : 0 ;
 | |
| }
 | |
| 
 | |
| 
 | |
| /*
 | |
|   Compare strings, discarding end space
 | |
| 
 | |
|   SYNOPSIS
 | |
|     my_strnncollsp_simple()
 | |
|     cs			character set handler
 | |
|     a			First string to compare
 | |
|     a_length		Length of 'a'
 | |
|     b			Second string to compare
 | |
|     b_length		Length of 'b'
 | |
| 
 | |
|   IMPLEMENTATION
 | |
|     If one string is shorter as the other, then we space extend the other
 | |
|     so that the strings have equal length.
 | |
| 
 | |
|     This will ensure that the following things hold:
 | |
| 
 | |
|     "a"  == "a "
 | |
|     "a\0" < "a"
 | |
|     "a\0" < "a "
 | |
| 
 | |
|   RETURN
 | |
|     < 0	 a <  b
 | |
|     = 0	 a == b
 | |
|     > 0	 a > b
 | |
| */
 | |
| 
 | |
| int my_strnncollsp_simple(CHARSET_INFO * cs, const uchar *a, size_t a_length, 
 | |
| 			  const uchar *b, size_t b_length)
 | |
| {
 | |
|   const uchar *map= cs->sort_order, *end;
 | |
|   size_t length;
 | |
| 
 | |
|   end= a + (length= MY_MIN(a_length, b_length));
 | |
|   while (a < end)
 | |
|   {
 | |
|     if (map[*a++] != map[*b++])
 | |
|       return ((int) map[a[-1]] - (int) map[b[-1]]);
 | |
|   }
 | |
|   if (a_length != b_length)
 | |
|   {
 | |
|     int swap= 1;
 | |
|     /*
 | |
|       Check the next not space character of the longer key. If it's < ' ',
 | |
|       then it's smaller than the other key.
 | |
|     */
 | |
|     if (a_length < b_length)
 | |
|     {
 | |
|       /* put shorter key in s */
 | |
|       a_length= b_length;
 | |
|       a= b;
 | |
|       swap= -1;                                 /* swap sign of result */
 | |
|     }
 | |
|     for (end= a + a_length-length; a < end ; a++)
 | |
|     {
 | |
|       if (map[*a] != map[' '])
 | |
|         return (map[*a] < map[' ']) ? -swap : swap;
 | |
|     }
 | |
|   }
 | |
|   return 0;
 | |
| }
 | |
| 
 | |
| 
 | |
| static int
 | |
| my_strnncollsp_nchars_simple(CHARSET_INFO * cs,
 | |
|                              const uchar *a, size_t a_length,
 | |
|                              const uchar *b, size_t b_length,
 | |
|                              size_t nchars,
 | |
|                              uint flags)
 | |
| {
 | |
|   set_if_smaller(a_length, nchars);
 | |
|   set_if_smaller(b_length, nchars);
 | |
|   return my_strnncollsp_simple(cs, a, a_length, b, b_length);
 | |
| }
 | |
| 
 | |
| 
 | |
| int my_strnncollsp_simple_nopad(CHARSET_INFO * cs,
 | |
|                                 const uchar *a, size_t a_length,
 | |
|                                 const uchar *b, size_t b_length)
 | |
| {
 | |
|   return my_strnncoll_simple(cs, a, a_length, b, b_length, FALSE);
 | |
| }
 | |
| 
 | |
| 
 | |
| size_t my_caseup_str_8bit(CHARSET_INFO * cs,char *str)
 | |
| {
 | |
|   register const uchar *map= cs->to_upper;
 | |
|   char *str_orig= str;
 | |
|   while ((*str= (char) map[(uchar) *str]) != 0)
 | |
|     str++;
 | |
|   return (size_t) (str - str_orig);
 | |
| }
 | |
| 
 | |
| 
 | |
| size_t my_casedn_str_8bit(CHARSET_INFO * cs,char *str)
 | |
| {
 | |
|   register const uchar *map= cs->to_lower;
 | |
|   char *str_orig= str;
 | |
|   while ((*str= (char) map[(uchar) *str]) != 0)
 | |
|     str++;
 | |
|   return (size_t) (str - str_orig);
 | |
| }
 | |
| 
 | |
| 
 | |
| size_t my_caseup_8bit(CHARSET_INFO * cs, const char *src, size_t srclen,
 | |
|                       char *dst, size_t dstlen __attribute__((unused)))
 | |
| {
 | |
|   const char *end= src + srclen;
 | |
|   register const uchar *map= cs->to_upper;
 | |
|   DBUG_ASSERT(src != NULL); /* Avoid UBSAN nullptr-with-offset */
 | |
|   DBUG_ASSERT(srclen <= dstlen);
 | |
|   for ( ; src != end ; src++)
 | |
|     *dst++= (char) map[(uchar) *src];
 | |
|   return srclen;
 | |
| }
 | |
| 
 | |
| 
 | |
| size_t my_casedn_8bit(CHARSET_INFO * cs, const char *src, size_t srclen,
 | |
|                       char *dst, size_t dstlen __attribute__((unused)))
 | |
| {
 | |
|   const char *end= src + srclen;
 | |
|   register const uchar *map=cs->to_lower;
 | |
|   DBUG_ASSERT(src != NULL); /* Avoid UBSAN nullptr-with-offset */
 | |
|   DBUG_ASSERT(srclen <= dstlen);
 | |
|   for ( ; src != end ; src++)
 | |
|     *dst++= (char) map[(uchar) *src];
 | |
|   return srclen;
 | |
| }
 | |
| 
 | |
| int my_strcasecmp_8bit(CHARSET_INFO * cs,const char *s, const char *t)
 | |
| {
 | |
|   register const uchar *map=cs->to_upper;
 | |
|   while (map[(uchar) *s] == map[(uchar) *t++])
 | |
|     if (!*s++) return 0;
 | |
|   return ((int) map[(uchar) s[0]] - (int) map[(uchar) t[-1]]);
 | |
| }
 | |
| 
 | |
| 
 | |
| int my_charlen_8bit(CHARSET_INFO *cs __attribute__((unused)),
 | |
|                     const uchar *str, const uchar *end)
 | |
| {
 | |
|   return str >= end ? MY_CS_TOOSMALL : 1;
 | |
| }
 | |
| 
 | |
| 
 | |
| int my_mb_wc_8bit(CHARSET_INFO *cs,my_wc_t *wc,
 | |
| 		  const uchar *str,
 | |
| 		  const uchar *end __attribute__((unused)))
 | |
| {
 | |
|   if (str >= end)
 | |
|     return MY_CS_TOOSMALL;
 | |
|   
 | |
|   *wc=cs->tab_to_uni[*str];
 | |
|   return (!wc[0] && str[0]) ? -1 : 1;
 | |
| }
 | |
| 
 | |
| int my_wc_mb_8bit(CHARSET_INFO *cs,my_wc_t wc,
 | |
| 		  uchar *str,
 | |
| 		  uchar *end)
 | |
| {
 | |
|   MY_UNI_IDX *idx;
 | |
| 
 | |
|   if (str >= end)
 | |
|     return MY_CS_TOOSMALL;
 | |
|   
 | |
|   for (idx=cs->tab_from_uni; idx->tab ; idx++)
 | |
|   {
 | |
|     if (idx->from <= wc && idx->to >= wc)
 | |
|     {
 | |
|       str[0]= idx->tab[wc - idx->from];
 | |
|       return (!str[0] && wc) ? MY_CS_ILUNI : 1;
 | |
|     }
 | |
|   }
 | |
|   return MY_CS_ILUNI;
 | |
| }
 | |
| 
 | |
| 
 | |
| /* 
 | |
|    We can't use vsprintf here as it's not guaranteed to return
 | |
|    the length on all operating systems.
 | |
|    This function is also not called in a safe environment, so the
 | |
|    end buffer must be checked.
 | |
| */
 | |
| 
 | |
| size_t my_snprintf_8bit(CHARSET_INFO *cs  __attribute__((unused)),
 | |
|                         char* to, size_t n  __attribute__((unused)),
 | |
| 		     const char* fmt, ...)
 | |
| {
 | |
|   va_list args;
 | |
|   size_t result;
 | |
|   va_start(args,fmt);
 | |
|   result= my_vsnprintf(to, n, fmt, args);
 | |
|   va_end(args);
 | |
|   return result;
 | |
| }
 | |
| 
 | |
| 
 | |
| void my_hash_sort_simple_nopad(CHARSET_INFO *cs,
 | |
| 			       const uchar *key, size_t len,
 | |
| 			       ulong *nr1, ulong *nr2)
 | |
| {
 | |
|   register const uchar *sort_order=cs->sort_order;
 | |
|   const uchar *end= key + len;
 | |
|   register ulong m1= *nr1, m2= *nr2;
 | |
|   DBUG_ASSERT(key); /* Avoid UBSAN nullptr-with-offset */
 | |
|   for (; key < (uchar*) end ; key++)
 | |
|   {
 | |
|     MY_HASH_ADD(m1, m2, (uint) sort_order[(uint) *key]);
 | |
|   }
 | |
|   *nr1= m1;
 | |
|   *nr2= m2;
 | |
| }
 | |
| 
 | |
| 
 | |
| void my_hash_sort_simple(CHARSET_INFO *cs,
 | |
|                          const uchar *key, size_t len,
 | |
|                          ulong *nr1, ulong *nr2)
 | |
| {
 | |
|   register const uchar *sort_order=cs->sort_order;
 | |
|   const uchar *end;
 | |
|   uint16 space_weight= sort_order[' '];
 | |
|   DBUG_ASSERT(key); /* Avoid UBSAN nullptr-with-offset */
 | |
| 
 | |
|   /*
 | |
|     Remove all trailing characters that are equal to space.
 | |
|     We have to do this to be able to compare 'A ' and 'A' as identical.
 | |
| 
 | |
|     If the key is long enough, cut the trailing spaces (0x20) using an
 | |
|     optimized function implemented in skip_trailing_spaces().
 | |
| 
 | |
|     "len > 16" is just some heuristic here.
 | |
|     Calling skip_triling_space() for short values is not desirable,
 | |
|     because its initialization block may be more expensive than the
 | |
|     performance gained.
 | |
|   */
 | |
| 
 | |
|   end= len > 16 ? skip_trailing_space(key, len) : key + len;
 | |
| 
 | |
|   /*
 | |
|     We removed all trailing characters that are binary equal to space 0x20.
 | |
|     Now remove all trailing characters that have weights equal to space.
 | |
|     Some 8bit simple collations may have such characters:
 | |
|     - cp1250_general_ci    0xA0 NO-BREAK SPACE == 0x20 SPACE
 | |
|     - cp1251_ukrainian_ci  0x60 GRAVE ACCENT   == 0x20 SPACE
 | |
|     - koi8u_general_ci     0x60 GRAVE ACCENT   == 0x20 SPACE
 | |
|   */
 | |
| 
 | |
|   for ( ; key < end ; )
 | |
|   {
 | |
|     if (sort_order[*--end] != space_weight)
 | |
|     {
 | |
|       end++;
 | |
|       break;
 | |
|     }
 | |
|   }
 | |
|   my_hash_sort_simple_nopad(cs, key, end - key, nr1, nr2);
 | |
| }
 | |
| 
 | |
| 
 | |
| long my_strntol_8bit(CHARSET_INFO *cs,
 | |
| 		     const char *nptr, size_t l, int base,
 | |
| 		     char **endptr, int *err)
 | |
| {
 | |
|   int negative;
 | |
|   register uint32 cutoff;
 | |
|   register uint cutlim;
 | |
|   register uint32 i;
 | |
|   register const char *s;
 | |
|   register uchar c;
 | |
|   const char *save, *e;
 | |
|   int overflow;
 | |
| 
 | |
|   *err= 0;				/* Initialize error indicator */
 | |
| 
 | |
|   s = nptr;
 | |
|   e = nptr+l;
 | |
|   
 | |
|   for ( ; s<e && my_isspace(cs, *s) ; s++);
 | |
|   
 | |
|   if (s == e)
 | |
|   {
 | |
|     goto noconv;
 | |
|   }
 | |
|   
 | |
|   /* Check for a sign.	*/
 | |
|   if (*s == '-')
 | |
|   {
 | |
|     negative = 1;
 | |
|     ++s;
 | |
|   }
 | |
|   else if (*s == '+')
 | |
|   {
 | |
|     negative = 0;
 | |
|     ++s;
 | |
|   }
 | |
|   else
 | |
|     negative = 0;
 | |
| 
 | |
|   save = s;
 | |
|   cutoff = ((uint32)~0L) / (uint32) base;
 | |
|   cutlim = (uint) (((uint32)~0L) % (uint32) base);
 | |
| 
 | |
|   overflow = 0;
 | |
|   i = 0;
 | |
|   for (c = *s; s != e; c = *++s)
 | |
|   {
 | |
|     if (c>='0' && c<='9')
 | |
|       c -= '0';
 | |
|     else if (c>='A' && c<='Z')
 | |
|       c = c - 'A' + 10;
 | |
|     else if (c>='a' && c<='z')
 | |
|     {
 | |
|       c = c - 'a' + 10;
 | |
|       if (base > 36)
 | |
|         c += 26;
 | |
|     }
 | |
|     else
 | |
|       break;
 | |
|     if (c >= base)
 | |
|       break;
 | |
|     if (i > cutoff || (i == cutoff && c > cutlim))
 | |
|       overflow = 1;
 | |
|     else
 | |
|     {
 | |
|       i *= (uint32) base;
 | |
|       i += c;
 | |
|     }
 | |
|   }
 | |
|   
 | |
|   if (s == save)
 | |
|     goto noconv;
 | |
|   
 | |
|   if (endptr != NULL)
 | |
|     *endptr = (char *) s;
 | |
|   
 | |
|   if (negative)
 | |
|   {
 | |
|     if (i  > (uint32) INT_MIN32)
 | |
|       overflow = 1;
 | |
|   }
 | |
|   else if (i > INT_MAX32)
 | |
|     overflow = 1;
 | |
|   
 | |
|   if (overflow)
 | |
|   {
 | |
|     err[0]= ERANGE;
 | |
|     return negative ? INT_MIN32 : INT_MAX32;
 | |
|   }
 | |
|   
 | |
|   return (negative ? -((long) i) : (long) i);
 | |
| 
 | |
| noconv:
 | |
|   err[0]= EDOM;
 | |
|   if (endptr != NULL)
 | |
|     *endptr = (char *) nptr;
 | |
|   return 0L;
 | |
| }
 | |
| 
 | |
| 
 | |
| ulong my_strntoul_8bit(CHARSET_INFO *cs,
 | |
| 		       const char *nptr, size_t l, int base,
 | |
| 		       char **endptr, int *err)
 | |
| {
 | |
|   int negative;
 | |
|   register uint32 cutoff;
 | |
|   register uint cutlim;
 | |
|   register uint32 i;
 | |
|   register const char *s;
 | |
|   const char *save, *e;
 | |
|   int overflow;
 | |
| 
 | |
|   *err= 0;				/* Initialize error indicator */
 | |
| 
 | |
|   s = nptr;
 | |
|   e = nptr+l;
 | |
|   
 | |
|   for( ; s<e && my_isspace(cs, *s); s++);
 | |
|   
 | |
|   if (s==e)
 | |
|   {
 | |
|     goto noconv;
 | |
|   }
 | |
| 
 | |
|   if (*s == '-')
 | |
|   {
 | |
|     negative = 1;
 | |
|     ++s;
 | |
|   }
 | |
|   else if (*s == '+')
 | |
|   {
 | |
|     negative = 0;
 | |
|     ++s;
 | |
|   }
 | |
|   else
 | |
|     negative = 0;
 | |
| 
 | |
|   save = s;
 | |
|   cutoff = ((uint32)~0L) / (uint32) base;
 | |
|   cutlim = (uint) (((uint32)~0L) % (uint32) base);
 | |
|   overflow = 0;
 | |
|   i = 0;
 | |
|   
 | |
|   for ( ; s != e; ++s)
 | |
|   {
 | |
|     register uchar c= *s;
 | |
|     if (c>='0' && c<='9')
 | |
|       c -= '0';
 | |
|     else if (c>='A' && c<='Z')
 | |
|       c = c - 'A' + 10;
 | |
|     else if (c>='a' && c<='z')
 | |
|     {
 | |
|       c = c - 'a' + 10;
 | |
|       if (base > 36)
 | |
|         c += 26;
 | |
|     }
 | |
|     else
 | |
|       break;
 | |
|     if (c >= base)
 | |
|       break;
 | |
|     if (i > cutoff || (i == cutoff && c > cutlim))
 | |
|       overflow = 1;
 | |
|     else
 | |
|     {
 | |
|       i *= (uint32) base;
 | |
|       i += c;
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   if (s == save)
 | |
|     goto noconv;
 | |
| 
 | |
|   if (endptr != NULL)
 | |
|     *endptr = (char *) s;
 | |
| 
 | |
|   if (overflow)
 | |
|   {
 | |
|     err[0]= ERANGE;
 | |
|     return (~(uint32) 0);
 | |
|   }
 | |
|   
 | |
|   return (negative ? -((long) i) : (long) i);
 | |
|   
 | |
| noconv:
 | |
|   err[0]= EDOM;
 | |
|   if (endptr != NULL)
 | |
|     *endptr = (char *) nptr;
 | |
|   return 0L;
 | |
| }
 | |
| 
 | |
| 
 | |
| longlong my_strntoll_8bit(CHARSET_INFO *cs __attribute__((unused)),
 | |
| 			  const char *nptr, size_t l, int base,
 | |
| 			  char **endptr,int *err)
 | |
| {
 | |
|   int negative;
 | |
|   register ulonglong cutoff;
 | |
|   register uint cutlim;
 | |
|   register ulonglong i;
 | |
|   register const char *s, *e;
 | |
|   const char *save;
 | |
|   int overflow;
 | |
| 
 | |
|   *err= 0;				/* Initialize error indicator */
 | |
| 
 | |
|   s = nptr;
 | |
|   e = nptr+l;
 | |
| 
 | |
|   for(; s<e && my_isspace(cs,*s); s++);
 | |
| 
 | |
|   if (s == e)
 | |
|   {
 | |
|     goto noconv;
 | |
|   }
 | |
| 
 | |
|   if (*s == '-')
 | |
|   {
 | |
|     negative = 1;
 | |
|     ++s;
 | |
|   }
 | |
|   else if (*s == '+')
 | |
|   {
 | |
|     negative = 0;
 | |
|     ++s;
 | |
|   }
 | |
|   else
 | |
|     negative = 0;
 | |
| 
 | |
|   save = s;
 | |
| 
 | |
|   cutoff = (~(ulonglong) 0) / (unsigned long int) base;
 | |
|   cutlim = (uint) ((~(ulonglong) 0) % (unsigned long int) base);
 | |
| 
 | |
|   overflow = 0;
 | |
|   i = 0;
 | |
|   for ( ; s != e; s++)
 | |
|   {
 | |
|     register uchar c= *s;
 | |
|     if (c>='0' && c<='9')
 | |
|       c -= '0';
 | |
|     else if (c>='A' && c<='Z')
 | |
|       c = c - 'A' + 10;
 | |
|     else if (c>='a' && c<='z')
 | |
|     {
 | |
|       c = c - 'a' + 10;
 | |
|       if (base > 36)
 | |
|         c += 26;
 | |
|     }
 | |
|     else
 | |
|       break;
 | |
|     if (c >= base)
 | |
|       break;
 | |
|     if (i > cutoff || (i == cutoff && c > cutlim))
 | |
|       overflow = 1;
 | |
|     else
 | |
|     {
 | |
|       i *= (ulonglong) base;
 | |
|       i += c;
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   if (s == save)
 | |
|     goto noconv;
 | |
| 
 | |
|   if (endptr != NULL)
 | |
|     *endptr = (char *) s;
 | |
| 
 | |
|   if (negative)
 | |
|   {
 | |
|     if (i >= (ulonglong) LONGLONG_MIN)
 | |
|     {
 | |
|       if (i == (ulonglong) LONGLONG_MIN)
 | |
|         return LONGLONG_MIN;
 | |
|       overflow = 1;
 | |
|     }
 | |
|   }
 | |
|   else if (i > (ulonglong) LONGLONG_MAX)
 | |
|     overflow = 1;
 | |
| 
 | |
|   if (overflow)
 | |
|   {
 | |
|     err[0]= ERANGE;
 | |
|     return negative ? LONGLONG_MIN : LONGLONG_MAX;
 | |
|   }
 | |
| 
 | |
|   return (negative ? -((longlong) i) : (longlong) i);
 | |
| 
 | |
| noconv:
 | |
|   err[0]= EDOM;
 | |
|   if (endptr != NULL)
 | |
|     *endptr = (char *) nptr;
 | |
|   return 0L;
 | |
| }
 | |
| 
 | |
| 
 | |
| ulonglong my_strntoull_8bit(CHARSET_INFO *cs,
 | |
| 			   const char *nptr, size_t l, int base,
 | |
| 			   char **endptr, int *err)
 | |
| {
 | |
|   int negative;
 | |
|   register ulonglong cutoff;
 | |
|   register uint cutlim;
 | |
|   register ulonglong i;
 | |
|   register const char *s, *e;
 | |
|   const char *save;
 | |
|   int overflow;
 | |
| 
 | |
|   *err= 0;				/* Initialize error indicator */
 | |
| 
 | |
|   s = nptr;
 | |
|   e = nptr+l;
 | |
| 
 | |
|   for(; s<e && my_isspace(cs,*s); s++);
 | |
| 
 | |
|   if (s == e)
 | |
|   {
 | |
|     goto noconv;
 | |
|   }
 | |
| 
 | |
|   if (*s == '-')
 | |
|   {
 | |
|     negative = 1;
 | |
|     ++s;
 | |
|   }
 | |
|   else if (*s == '+')
 | |
|   {
 | |
|     negative = 0;
 | |
|     ++s;
 | |
|   }
 | |
|   else
 | |
|     negative = 0;
 | |
| 
 | |
|   save = s;
 | |
| 
 | |
|   cutoff = (~(ulonglong) 0) / (unsigned long int) base;
 | |
|   cutlim = (uint) ((~(ulonglong) 0) % (unsigned long int) base);
 | |
| 
 | |
|   overflow = 0;
 | |
|   i = 0;
 | |
|   for ( ; s != e; s++)
 | |
|   {
 | |
|     register uchar c= *s;
 | |
| 
 | |
|     if (c>='0' && c<='9')
 | |
|       c -= '0';
 | |
|     else if (c>='A' && c<='Z')
 | |
|       c = c - 'A' + 10;
 | |
|     else if (c>='a' && c<='z')
 | |
|     {
 | |
|       c = c - 'a' + 10;
 | |
|       if (base > 36)
 | |
|         c += 26;
 | |
|     }
 | |
|     else
 | |
|       break;
 | |
|     if (c >= base)
 | |
|       break;
 | |
|     if (i > cutoff || (i == cutoff && c > cutlim))
 | |
|       overflow = 1;
 | |
|     else
 | |
|     {
 | |
|       i *= (ulonglong) base;
 | |
|       i += c;
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   if (s == save)
 | |
|     goto noconv;
 | |
| 
 | |
|   if (endptr != NULL)
 | |
|     *endptr = (char *) s;
 | |
| 
 | |
|   if (overflow)
 | |
|   {
 | |
|     err[0]= ERANGE;
 | |
|     return (~(ulonglong) 0);
 | |
|   }
 | |
| 
 | |
|   /* Avoid undefinite behavior - negation of LONGLONG_MIN */
 | |
|   return negative && (longlong) i != LONGLONG_MIN ?
 | |
|         -((longlong) i) :
 | |
|          (longlong) i;
 | |
| 
 | |
| noconv:
 | |
|   err[0]= EDOM;
 | |
|   if (endptr != NULL)
 | |
|     *endptr = (char *) nptr;
 | |
|   return 0L;
 | |
| }
 | |
| 
 | |
| 
 | |
| /*
 | |
|   Read double from string
 | |
| 
 | |
|   SYNOPSIS:
 | |
|     my_strntod_8bit()
 | |
|     cs		Character set information
 | |
|     str		String to convert to double
 | |
|     length	Optional length for string.
 | |
|     end		result pointer to end of converted string
 | |
|     err		Error number if failed conversion
 | |
|     
 | |
|   NOTES:
 | |
|     If length is not INT_MAX32 or str[length] != 0 then the given str must
 | |
|     be writeable
 | |
|     If length == INT_MAX32 the str must be \0 terminated.
 | |
| 
 | |
|     It's implemented this way to save a buffer allocation and a memory copy.
 | |
| 
 | |
|   RETURN
 | |
|     Value of number in string
 | |
| */
 | |
| 
 | |
| 
 | |
| double my_strntod_8bit(CHARSET_INFO *cs __attribute__((unused)),
 | |
| 		       char *str, size_t length,
 | |
| 		       char **end, int *err)
 | |
| {
 | |
|   if (length == INT_MAX32)
 | |
|     length= 65535;                          /* Should be big enough */
 | |
|   *end= str + length;
 | |
|   return my_strtod(str, end, err);
 | |
| }
 | |
| 
 | |
| 
 | |
| /*
 | |
|   This is a fast version optimized for the case of radix 10 / -10
 | |
| 
 | |
|   Assume len >= 1
 | |
| */
 | |
| 
 | |
| size_t my_long10_to_str_8bit(CHARSET_INFO *cs __attribute__((unused)),
 | |
|                              char *dst, size_t len, int radix, long int val)
 | |
| {
 | |
|   char buffer[66];
 | |
|   register char *p, *e;
 | |
|   long int new_val;
 | |
|   uint sign=0;
 | |
|   unsigned long int uval = (unsigned long int) val;
 | |
| 
 | |
|   e = p = &buffer[sizeof(buffer)-1];
 | |
|   *p= 0;
 | |
|   
 | |
|   if (radix < 0)
 | |
|   {
 | |
|     if (val < 0)
 | |
|     {
 | |
|       /* Avoid integer overflow in (-val) for LONGLONG_MIN (BUG#31799). */
 | |
|       uval= (unsigned long int)0 - uval;
 | |
|       *dst++= '-';
 | |
|       len--;
 | |
|       sign= 1;
 | |
|     }
 | |
|   }
 | |
|   
 | |
|   new_val = (long) (uval / 10);
 | |
|   *--p    = '0'+ (char) (uval - (unsigned long) new_val * 10);
 | |
|   val     = new_val;
 | |
|   
 | |
|   while (val != 0)
 | |
|   {
 | |
|     new_val=val/10;
 | |
|     *--p = '0' + (char) (val-new_val*10);
 | |
|     val= new_val;
 | |
|   }
 | |
|   
 | |
|   len= MY_MIN(len, (size_t) (e-p));
 | |
|   memcpy(dst, p, len);
 | |
|   return len+sign;
 | |
| }
 | |
| 
 | |
| 
 | |
| size_t my_longlong10_to_str_8bit(CHARSET_INFO *cs __attribute__((unused)),
 | |
|                                  char *dst, size_t len, int radix,
 | |
|                                  longlong val)
 | |
| {
 | |
|   char buffer[65];
 | |
|   register char *p, *e;
 | |
|   long long_val;
 | |
|   uint sign= 0;
 | |
|   ulonglong uval = (ulonglong)val;
 | |
|   
 | |
|   if (radix < 0)
 | |
|   {
 | |
|     if (val < 0)
 | |
|     {
 | |
|       /* Avoid integer overflow in (-val) for LONGLONG_MIN (BUG#31799). */
 | |
|       uval = (ulonglong)0 - uval;
 | |
|       *dst++= '-';
 | |
|       len--;
 | |
|       sign= 1;
 | |
|     }
 | |
|   }
 | |
|   
 | |
|   e = p = &buffer[sizeof(buffer)-1];
 | |
|   *p= 0;
 | |
|   
 | |
|   if (uval == 0)
 | |
|   {
 | |
|     *--p= '0';
 | |
|     len= 1;
 | |
|     goto cnv;
 | |
|   }
 | |
|   
 | |
|   while (uval > (ulonglong) LONG_MAX)
 | |
|   {
 | |
|     ulonglong quo= uval/(uint) 10;
 | |
|     uint rem= (uint) (uval- quo* (uint) 10);
 | |
|     *--p = '0' + rem;
 | |
|     uval= quo;
 | |
|   }
 | |
|   
 | |
|   long_val= (long) uval;
 | |
|   while (long_val != 0)
 | |
|   {
 | |
|     long quo= long_val/10;
 | |
|     *--p = (char) ('0' + (long_val - quo*10));
 | |
|     long_val= quo;
 | |
|   }
 | |
|   
 | |
|   len= MY_MIN(len, (size_t) (e-p));
 | |
| cnv:
 | |
|   memcpy(dst, p, len);
 | |
|   return len+sign;
 | |
| }
 | |
| 
 | |
| 
 | |
| size_t my_min_str_8bit_simple(CHARSET_INFO *cs,
 | |
|                               uchar *dst, size_t dst_size,
 | |
|                               size_t nchars)
 | |
| {
 | |
|   set_if_smaller(dst_size, nchars);
 | |
|   memset(dst, cs->min_sort_char, dst_size);
 | |
|   return dst_size;
 | |
| }
 | |
| 
 | |
| 
 | |
| size_t my_min_str_8bit_simple_nopad(CHARSET_INFO *cs,
 | |
|                                     uchar *dst, size_t dst_size,
 | |
|                                     size_t nchars)
 | |
| {
 | |
|   /* For NOPAD collations, the empty string is always the smallest */
 | |
|   return 0;
 | |
| }
 | |
| 
 | |
| 
 | |
| size_t my_max_str_8bit_simple(CHARSET_INFO *cs,
 | |
|                               uchar *dst, size_t dst_size,
 | |
|                               size_t nchars)
 | |
| {
 | |
|   set_if_smaller(dst_size, nchars);
 | |
|   memset(dst, cs->max_sort_char, dst_size);
 | |
|   return dst_size;
 | |
| }
 | |
| 
 | |
| 
 | |
| /*
 | |
| ** Compare string against string with wildcard
 | |
| **	0 if matched
 | |
| **	-1 if not matched with wildcard
 | |
| **	 1 if matched with wildcard
 | |
| */
 | |
| 
 | |
| #ifdef LIKE_CMP_TOUPPER
 | |
| #define likeconv(s,A) (uchar) my_toupper(s,A)
 | |
| #else
 | |
| #define likeconv(s,A) (uchar) (s)->sort_order[(uchar) (A)]
 | |
| #endif
 | |
| 
 | |
| #define INC_PTR(cs,A,B) (A)++
 | |
| 
 | |
| 
 | |
| static
 | |
| int my_wildcmp_8bit_impl(CHARSET_INFO *cs,
 | |
|                          const char *str,const char *str_end,
 | |
|                          const char *wildstr,const char *wildend,
 | |
|                          int escape, int w_one, int w_many, int recurse_level)
 | |
| {
 | |
|   int result= -1;			/* Not found, using wildcards */
 | |
| 
 | |
|   if (my_string_stack_guard && my_string_stack_guard(recurse_level))
 | |
|     return 1;
 | |
|   while (wildstr != wildend)
 | |
|   {
 | |
|     while (*wildstr != w_many && *wildstr != w_one)
 | |
|     {
 | |
|       if (*wildstr == escape && wildstr+1 != wildend)
 | |
| 	wildstr++;
 | |
| 
 | |
|       if (str == str_end || likeconv(cs,*wildstr++) != likeconv(cs,*str++))
 | |
| 	return(1);				/* No match */
 | |
|       if (wildstr == wildend)
 | |
| 	return(str != str_end);		/* Match if both are at end */
 | |
|       result=1;					/* Found an anchor char     */
 | |
|     }
 | |
|     if (*wildstr == w_one)
 | |
|     {
 | |
|       do
 | |
|       {
 | |
| 	if (str == str_end)			/* Skip one char if possible */
 | |
| 	  return(result);
 | |
| 	INC_PTR(cs,str,str_end);
 | |
|       } while (++wildstr < wildend && *wildstr == w_one);
 | |
|       if (wildstr == wildend)
 | |
| 	break;
 | |
|     }
 | |
|     if (*wildstr == w_many)
 | |
|     {						/* Found w_many */
 | |
|       uchar cmp;
 | |
|       
 | |
|       wildstr++;
 | |
|       /* Remove any '%' and '_' from the wild search string */
 | |
|       for (; wildstr != wildend ; wildstr++)
 | |
|       {
 | |
| 	if (*wildstr == w_many)
 | |
| 	  continue;
 | |
| 	if (*wildstr == w_one)
 | |
| 	{
 | |
| 	  if (str == str_end)
 | |
| 	    return(-1);
 | |
| 	  INC_PTR(cs,str,str_end);
 | |
| 	  continue;
 | |
| 	}
 | |
| 	break;					/* Not a wild character */
 | |
|       }
 | |
|       if (wildstr == wildend)
 | |
| 	return(0);				/* Ok if w_many is last */
 | |
|       if (str == str_end)
 | |
| 	return(-1);
 | |
|       
 | |
|       if ((cmp= *wildstr) == escape && wildstr+1 != wildend)
 | |
| 	cmp= *++wildstr;
 | |
| 
 | |
|       INC_PTR(cs,wildstr,wildend);	/* This is compared trough cmp */
 | |
|       cmp=likeconv(cs,cmp);
 | |
|       do
 | |
|       {
 | |
|         /*
 | |
|           Find the next character in the subject string equal to 'cmp', then
 | |
|           check recursively my_wildcmp_8bit_impl() for the pattern remainder.
 | |
|         */
 | |
| 	while (str != str_end && (uchar) likeconv(cs,*str) != cmp)
 | |
| 	  str++;
 | |
| 	if (str++ == str_end)
 | |
| 	  return(-1); /* 'cmp' was not found in the subject string */
 | |
| 	{
 | |
| 	  int tmp=my_wildcmp_8bit_impl(cs,str,str_end,
 | |
|                                        wildstr,wildend,escape,w_one,
 | |
|                                        w_many, recurse_level+1);
 | |
| 	  if (tmp <= 0)
 | |
| 	    return(tmp);
 | |
| 	}
 | |
|         /*
 | |
|           The recursion call did not match. But it returned 1, which means
 | |
|           the pattern remainder has some non-special characters.
 | |
|           Continue, there is a chance that we'll find another 'cmp'
 | |
|           at a different position in the subject string.
 | |
|         */
 | |
|       } while (str != str_end);
 | |
|       return(-1);
 | |
|     }
 | |
|   }
 | |
|   return(str != str_end ? 1 : 0);
 | |
| }
 | |
| 
 | |
| int my_wildcmp_8bit(CHARSET_INFO *cs,
 | |
|                     const char *str,const char *str_end,
 | |
|                     const char *wildstr,const char *wildend,
 | |
|                     int escape, int w_one, int w_many)
 | |
| {
 | |
|   return my_wildcmp_8bit_impl(cs, str, str_end,
 | |
|                               wildstr, wildend,
 | |
|                               escape, w_one, w_many, 1);
 | |
| }
 | |
| 
 | |
| 
 | |
| /*
 | |
| ** Calculate min_str and max_str that ranges a LIKE string.
 | |
| ** Arguments:
 | |
| ** ptr		Pointer to LIKE string.
 | |
| ** ptr_length	Length of LIKE string.
 | |
| ** escape	Escape character in LIKE.  (Normally '\').
 | |
| **		All escape characters should be removed from min_str and max_str
 | |
| ** res_length	Length of min_str and max_str.
 | |
| ** min_str	Smallest case sensitive string that ranges LIKE.
 | |
| **		Should be space padded to res_length.
 | |
| ** max_str	Largest case sensitive string that ranges LIKE.
 | |
| **		Normally padded with the biggest character sort value.
 | |
| **
 | |
| ** The function should return 0 if ok and 1 if the LIKE string can't be
 | |
| ** optimized !
 | |
| */
 | |
| 
 | |
| my_bool my_like_range_simple(CHARSET_INFO *cs,
 | |
| 			     const char *ptr, size_t ptr_length,
 | |
| 			     pbool escape, pbool w_one, pbool w_many,
 | |
| 			     size_t res_length,
 | |
| 			     char *min_str,char *max_str,
 | |
| 			     size_t *min_length, size_t *max_length)
 | |
| {
 | |
|   const char *end= ptr + ptr_length;
 | |
|   char *min_org=min_str;
 | |
|   char *min_end=min_str+res_length;
 | |
|   size_t charlen= res_length / cs->mbmaxlen;
 | |
| 
 | |
|   for (; ptr != end && min_str != min_end && charlen > 0 ; ptr++, charlen--)
 | |
|   {
 | |
|     if (*ptr == escape && ptr+1 != end)
 | |
|     {
 | |
|       ptr++;					/* Skip escape */
 | |
|       *min_str++= *max_str++ = *ptr;
 | |
|       continue;
 | |
|     }
 | |
|     if (*ptr == w_one)				/* '_' in SQL */
 | |
|     {
 | |
|       *min_str++='\0';				/* This should be min char */
 | |
|       *max_str++= (char) cs->max_sort_char;
 | |
|       continue;
 | |
|     }
 | |
|     if (*ptr == w_many)				/* '%' in SQL */
 | |
|     {
 | |
|       /* Calculate length of keys */
 | |
|       *min_length= (cs->state & (MY_CS_BINSORT | MY_CS_NOPAD)) ?
 | |
|                     (size_t) (min_str - min_org) :
 | |
|                     res_length;
 | |
|       *max_length= res_length;
 | |
|       do
 | |
|       {
 | |
| 	*min_str++= 0;
 | |
| 	*max_str++= (char) cs->max_sort_char;
 | |
|       } while (min_str != min_end);
 | |
|       return 0;
 | |
|     }
 | |
|     *min_str++= *max_str++ = *ptr;
 | |
|   }
 | |
| 
 | |
|  *min_length= *max_length = (size_t) (min_str - min_org);
 | |
|   while (min_str != min_end)
 | |
|     *min_str++= *max_str++ = ' ';      /* Because if key compression */
 | |
|   return 0;
 | |
| }
 | |
| 
 | |
| 
 | |
| size_t my_scan_8bit(CHARSET_INFO *cs, const char *str, const char *end, int sq)
 | |
| {
 | |
|   const char *str0= str;
 | |
|   switch (sq)
 | |
|   {
 | |
|   case MY_SEQ_INTTAIL:
 | |
|     if (*str == '.')
 | |
|     {
 | |
|       for(str++ ; str != end && *str == '0' ; str++);
 | |
|       return (size_t) (str - str0);
 | |
|     }
 | |
|     return 0;
 | |
| 
 | |
|   case MY_SEQ_SPACES:
 | |
|     for ( ; str < end ; str++)
 | |
|     {
 | |
|       if (!my_isspace(cs,*str))
 | |
|         break;
 | |
|     }
 | |
|     return (size_t) (str - str0);
 | |
|   case MY_SEQ_NONSPACES:
 | |
|     for ( ; str < end ; str++)
 | |
|     {
 | |
|       if (my_isspace(cs, *str))
 | |
|         break;
 | |
|     }
 | |
|     return (size_t) (str - str0);
 | |
|   default:
 | |
|     return 0;
 | |
|   }
 | |
| }
 | |
| 
 | |
| 
 | |
| void my_fill_8bit(CHARSET_INFO *cs __attribute__((unused)),
 | |
| 		   char *s, size_t l, int fill)
 | |
| {
 | |
|   bfill((uchar*) s,l,fill);
 | |
| }
 | |
| 
 | |
| 
 | |
| size_t my_numchars_8bit(CHARSET_INFO *cs __attribute__((unused)),
 | |
| 		      const char *b, const char *e)
 | |
| {
 | |
|   return (size_t) (e - b);
 | |
| }
 | |
| 
 | |
| 
 | |
| size_t my_numcells_8bit(CHARSET_INFO *cs __attribute__((unused)),
 | |
|                         const char *b, const char *e)
 | |
| {
 | |
|   return (size_t) (e - b);
 | |
| }
 | |
| 
 | |
| 
 | |
| size_t my_charpos_8bit(CHARSET_INFO *cs __attribute__((unused)),
 | |
|                        const char *b  __attribute__((unused)),
 | |
|                        const char *e  __attribute__((unused)),
 | |
|                        size_t pos)
 | |
| {
 | |
|   return pos;
 | |
| }
 | |
| 
 | |
| 
 | |
| size_t
 | |
| my_well_formed_char_length_8bit(CHARSET_INFO *cs __attribute__((unused)),
 | |
|                                 const char *start, const char *end,
 | |
|                                 size_t nchars, MY_STRCOPY_STATUS *status)
 | |
| {
 | |
|   size_t nbytes= (size_t) (end - start);
 | |
|   size_t res= MY_MIN(nbytes, nchars);
 | |
|   status->m_well_formed_error_pos= NULL;
 | |
|   status->m_source_end_pos= start + res;
 | |
|   return res;
 | |
| }
 | |
| 
 | |
| 
 | |
| /*
 | |
|   Copy a 8-bit string. Not more than "nchars" character are copied.
 | |
| */
 | |
| size_t
 | |
| my_copy_8bit(CHARSET_INFO *cs __attribute__((unused)),
 | |
|              char *dst, size_t dst_length,
 | |
|              const char *src, size_t src_length,
 | |
|              size_t nchars, MY_STRCOPY_STATUS *status)
 | |
| {
 | |
|   set_if_smaller(src_length, dst_length);
 | |
|   set_if_smaller(src_length, nchars);
 | |
|   if (src_length)
 | |
|     memmove(dst, src, src_length);
 | |
|   status->m_source_end_pos= src + src_length;
 | |
|   status->m_well_formed_error_pos= NULL;   
 | |
|   return src_length;
 | |
| }
 | |
| 
 | |
| 
 | |
| size_t my_lengthsp_8bit(CHARSET_INFO *cs __attribute__((unused)),
 | |
|                         const char *ptr, size_t length)
 | |
| {
 | |
|   const char *end;
 | |
|   end= (const char *) skip_trailing_space((const uchar *)ptr, length);
 | |
|   return (size_t) (end-ptr);
 | |
| }
 | |
| 
 | |
| 
 | |
| uint my_instr_simple(CHARSET_INFO *cs,
 | |
|                      const char *b, size_t b_length, 
 | |
|                      const char *s, size_t s_length,
 | |
|                      my_match_t *match, uint nmatch)
 | |
| {
 | |
|   register const uchar *str, *search, *end, *search_end;
 | |
|   
 | |
|   if (s_length <= b_length)
 | |
|   {
 | |
|     if (!s_length)
 | |
|     {
 | |
|       if (nmatch)
 | |
|       {
 | |
|         match->beg= 0;
 | |
|         match->end= 0;
 | |
|         match->mb_len= 0;
 | |
|       }
 | |
|       return 1;		/* Empty string is always found */
 | |
|     }
 | |
|     
 | |
|     str= (const uchar*) b;
 | |
|     search= (const uchar*) s;
 | |
|     end= (const uchar*) b+b_length-s_length+1;
 | |
|     search_end= (const uchar*) s + s_length;
 | |
|     
 | |
| skip:
 | |
|     while (str != end)
 | |
|     {
 | |
|       if (cs->sort_order[*str++] == cs->sort_order[*search])
 | |
|       {
 | |
| 	register const uchar *i,*j;
 | |
| 	
 | |
| 	i= str; 
 | |
| 	j= search+1;
 | |
| 	
 | |
| 	while (j != search_end)
 | |
| 	  if (cs->sort_order[*i++] != cs->sort_order[*j++]) 
 | |
|             goto skip;
 | |
|         
 | |
| 	if (nmatch > 0)
 | |
| 	{
 | |
| 	  match[0].beg= 0;
 | |
| 	  match[0].end= (uint) (str- (const uchar*)b-1);
 | |
| 	  match[0].mb_len= match[0].end;
 | |
| 	  
 | |
| 	  if (nmatch > 1)
 | |
| 	  {
 | |
| 	    match[1].beg= match[0].end;
 | |
| 	    match[1].end= (uint)(match[0].end+s_length);
 | |
| 	    match[1].mb_len= match[1].end-match[1].beg;
 | |
| 	  }
 | |
| 	}
 | |
| 	return 2;
 | |
|       }
 | |
|     }
 | |
|   }
 | |
|   return 0;
 | |
| }
 | |
| 
 | |
| 
 | |
| typedef struct
 | |
| {
 | |
|   int		nchars;
 | |
|   struct my_uni_idx_st uidx;
 | |
| } uni_idx;
 | |
| 
 | |
| #define PLANE_SIZE	0x100
 | |
| #define PLANE_NUM	0x100
 | |
| #define PLANE_NUMBER(x)	(((x)>>8) % PLANE_NUM)
 | |
| 
 | |
| static int pcmp(const void * f, const void * s)
 | |
| {
 | |
|   const uni_idx *F= (const uni_idx*) f;
 | |
|   const uni_idx *S= (const uni_idx*) s;
 | |
|   int res;
 | |
| 
 | |
|   if (!(res=((S->nchars)-(F->nchars))))
 | |
|     res=((F->uidx.from)-(S->uidx.to));
 | |
|   return res;
 | |
| }
 | |
| 
 | |
| static my_bool
 | |
| create_fromuni(struct charset_info_st *cs,
 | |
|                MY_CHARSET_LOADER *loader)
 | |
| {
 | |
|   uni_idx	idx[PLANE_NUM];
 | |
|   int		i,n;
 | |
|   
 | |
|   /*
 | |
|     Check that Unicode map is loaded.
 | |
|     It can be not loaded when the collation is
 | |
|     listed in Index.xml but not specified
 | |
|     in the character set specific XML file.
 | |
|   */
 | |
|   if (!cs->tab_to_uni)
 | |
|     return TRUE;
 | |
|   
 | |
|   /* Clear plane statistics */
 | |
|   bzero(idx,sizeof(idx));
 | |
|   
 | |
|   /* Count number of characters in each plane */
 | |
|   for (i=0; i< 0x100; i++)
 | |
|   {
 | |
|     uint16 wc=cs->tab_to_uni[i];
 | |
|     int pl= PLANE_NUMBER(wc);
 | |
|     
 | |
|     if (wc || !i)
 | |
|     {
 | |
|       if (!idx[pl].nchars)
 | |
|       {
 | |
|         idx[pl].uidx.from=wc;
 | |
|         idx[pl].uidx.to=wc;
 | |
|       }else
 | |
|       {
 | |
|         idx[pl].uidx.from=wc<idx[pl].uidx.from?wc:idx[pl].uidx.from;
 | |
|         idx[pl].uidx.to=wc>idx[pl].uidx.to?wc:idx[pl].uidx.to;
 | |
|       }
 | |
|       idx[pl].nchars++;
 | |
|     }
 | |
|   }
 | |
|   
 | |
|   /* Sort planes in descending order */
 | |
|   qsort(&idx,PLANE_NUM,sizeof(uni_idx),&pcmp);
 | |
|   
 | |
|   for (i=0; i < PLANE_NUM; i++)
 | |
|   {
 | |
|     int ch,numchars;
 | |
|     uchar *tab;
 | |
|     
 | |
|     /* Skip empty plane */
 | |
|     if (!idx[i].nchars)
 | |
|       break;
 | |
|     
 | |
|     numchars=idx[i].uidx.to-idx[i].uidx.from+1;
 | |
|     if (!(idx[i].uidx.tab= tab= (uchar*)
 | |
|                                 (loader->once_alloc) (numchars *
 | |
|                                                       sizeof(*idx[i].uidx.tab))))
 | |
|       return TRUE;
 | |
|     
 | |
|     bzero(tab,numchars*sizeof(*tab));
 | |
|     
 | |
|     for (ch=1; ch < PLANE_SIZE; ch++)
 | |
|     {
 | |
|       uint16 wc=cs->tab_to_uni[ch];
 | |
|       if (wc >= idx[i].uidx.from && wc <= idx[i].uidx.to && wc)
 | |
|       {
 | |
|         int ofs= wc - idx[i].uidx.from;
 | |
|         if (!tab[ofs] || tab[ofs] > 0x7F) /* Prefer ASCII*/
 | |
|         {
 | |
|           /*
 | |
|             Some character sets can have double encoding. For example,
 | |
|             in ARMSCII8, the following characters are encoded twice:
 | |
| 
 | |
|             Encoding#1 Encoding#2 Unicode Character Name
 | |
|             ---------- ---------- ------- --------------
 | |
|             0x27       0xFF       U+0027  APOSTROPHE
 | |
|             0x28       0xA5       U+0028  LEFT PARENTHESIS
 | |
|             0x29       0xA4       U+0029  RIGHT PARENTHESIS
 | |
|             0x2C       0xAB       U+002C  COMMA
 | |
|             0x2D       0xAC       U+002D  HYPHEN-MINUS
 | |
|             0x2E       0xA9       U+002E  FULL STOP
 | |
| 
 | |
|             That is, both 0x27 and 0xFF convert to Unicode U+0027.
 | |
|             When converting back from Unicode to ARMSCII,
 | |
|             we prefer the ASCII range, that is we want U+0027
 | |
|             to convert to 0x27 rather than to 0xFF.
 | |
|           */
 | |
|           tab[ofs]= ch;
 | |
|         }
 | |
|       }
 | |
|     }
 | |
|   }
 | |
|   
 | |
|   /* Allocate and fill reverse table for each plane */
 | |
|   n=i;
 | |
|   if (!(cs->tab_from_uni= (MY_UNI_IDX *)
 | |
|                           (loader->once_alloc)(sizeof(MY_UNI_IDX) * (n + 1))))
 | |
|     return TRUE;
 | |
| 
 | |
|   for (i=0; i< n; i++)
 | |
|     ((struct my_uni_idx_st*)cs->tab_from_uni)[i]= idx[i].uidx;
 | |
|   
 | |
|   /* Set end-of-list marker */
 | |
|   bzero((char*) &cs->tab_from_uni[i],sizeof(MY_UNI_IDX));
 | |
|   return FALSE;
 | |
| }
 | |
| 
 | |
| 
 | |
| /*
 | |
|   Detect if a character set is 8bit,
 | |
|   and it is pure ascii, i.e. doesn't have
 | |
|   characters outside U+0000..U+007F
 | |
|   This functions is shared between "conf_to_src"
 | |
|   and dynamic charsets loader in "mysqld".
 | |
| */
 | |
| static my_bool
 | |
| my_charset_is_8bit_pure_ascii(CHARSET_INFO *cs)
 | |
| {
 | |
|   size_t code;
 | |
|   if (!cs->tab_to_uni)
 | |
|     return 0;
 | |
|   for (code= 0; code < 256; code++)
 | |
|   {
 | |
|     if (cs->tab_to_uni[code] > 0x7F)
 | |
|       return 0;
 | |
|   }
 | |
|   return 1;
 | |
| }
 | |
| 
 | |
| 
 | |
| /*
 | |
|   Shared function between conf_to_src and mysys.
 | |
|   Check if a 8bit character set is compatible with
 | |
|   ascii on the range 0x00..0x7F.
 | |
| */
 | |
| static my_bool
 | |
| my_charset_is_ascii_compatible(CHARSET_INFO *cs)
 | |
| {
 | |
|   uint i;
 | |
|   if (!cs->tab_to_uni)
 | |
|     return 1;
 | |
|   for (i= 0; i < 128; i++)
 | |
|   {
 | |
|     if (cs->tab_to_uni[i] != i)
 | |
|       return 0;
 | |
|   }
 | |
|   return 1;
 | |
| }
 | |
| 
 | |
| 
 | |
| uint my_8bit_charset_flags_from_data(CHARSET_INFO *cs)
 | |
| {
 | |
|   uint flags= 0;
 | |
|   if (my_charset_is_8bit_pure_ascii(cs))
 | |
|     flags|= MY_CS_PUREASCII;
 | |
|   if (!my_charset_is_ascii_compatible(cs))
 | |
|     flags|= MY_CS_NONASCII;
 | |
|   return flags;
 | |
| }
 | |
| 
 | |
| 
 | |
| /*
 | |
|   Check if case sensitive sort order: A < a < B.
 | |
|   We need MY_CS_FLAG for regex library, and for
 | |
|   case sensitivity flag for 5.0 client protocol,
 | |
|   to support isCaseSensitive() method in JDBC driver
 | |
| */
 | |
| uint my_8bit_collation_flags_from_data(CHARSET_INFO *cs)
 | |
| {
 | |
|   uint flags= 0;
 | |
|   if (cs->sort_order && cs->sort_order['A'] < cs->sort_order['a'] &&
 | |
|                         cs->sort_order['a'] < cs->sort_order['B'])
 | |
|     flags|= MY_CS_CSSORT;
 | |
|   return flags;
 | |
| }
 | |
| 
 | |
| 
 | |
| static my_bool
 | |
| my_cset_init_8bit(struct charset_info_st *cs, MY_CHARSET_LOADER *loader)
 | |
| {
 | |
|   cs->state|= my_8bit_charset_flags_from_data(cs);
 | |
|   cs->pad_char= ' ';
 | |
|   if (!cs->to_lower || !cs->to_upper || !cs->m_ctype || !cs->tab_to_uni)
 | |
|     return TRUE;
 | |
|   return create_fromuni(cs, loader);
 | |
| }
 | |
| 
 | |
| static void set_max_sort_char(struct charset_info_st *cs)
 | |
| {
 | |
|   uchar max_char;
 | |
|   uint  i;
 | |
|   
 | |
|   if (!cs->sort_order)
 | |
|     return;
 | |
|   
 | |
|   max_char=cs->sort_order[(uchar) cs->max_sort_char];
 | |
|   for (i= 0; i < 256; i++)
 | |
|   {
 | |
|     if ((uchar) cs->sort_order[i] > max_char)
 | |
|     {
 | |
|       max_char=(uchar) cs->sort_order[i];
 | |
|       cs->max_sort_char= i;
 | |
|     }
 | |
|   }
 | |
| }
 | |
| 
 | |
| static my_bool my_coll_init_simple(struct charset_info_st *cs,
 | |
|                                    MY_CHARSET_LOADER *loader __attribute__((unused)))
 | |
| {
 | |
|   if (!cs->sort_order)
 | |
|     return TRUE;
 | |
|   cs->state|= my_8bit_collation_flags_from_data(cs);
 | |
|   set_max_sort_char(cs);
 | |
|   return FALSE;
 | |
| }
 | |
| 
 | |
| 
 | |
| longlong my_strtoll10_8bit(CHARSET_INFO *cs __attribute__((unused)),
 | |
|                            const char *nptr, char **endptr, int *error)
 | |
| {
 | |
|   return my_strtoll10(nptr, endptr, error);
 | |
| }
 | |
| 
 | |
| 
 | |
| int my_mb_ctype_8bit(CHARSET_INFO *cs, int *ctype,
 | |
|                    const uchar *s, const uchar *e)
 | |
| {
 | |
|   if (s >= e)
 | |
|   {
 | |
|     *ctype= 0;
 | |
|     return MY_CS_TOOSMALL;
 | |
|   }
 | |
|   *ctype= cs->m_ctype[*s + 1];
 | |
|   return 1;
 | |
| }
 | |
| 
 | |
| 
 | |
| #define CUTOFF  (ULONGLONG_MAX / 10)
 | |
| #define CUTLIM  (ULONGLONG_MAX % 10)
 | |
| #define DIGITS_IN_ULONGLONG 20
 | |
| 
 | |
| static ulonglong d10[DIGITS_IN_ULONGLONG]=
 | |
| {
 | |
|   1,
 | |
|   10,
 | |
|   100,
 | |
|   1000,
 | |
|   10000,
 | |
|   100000,
 | |
|   1000000,
 | |
|   10000000,
 | |
|   100000000,
 | |
|   1000000000,
 | |
|   10000000000ULL,
 | |
|   100000000000ULL,
 | |
|   1000000000000ULL,
 | |
|   10000000000000ULL,
 | |
|   100000000000000ULL,
 | |
|   1000000000000000ULL,
 | |
|   10000000000000000ULL,
 | |
|   100000000000000000ULL,
 | |
|   1000000000000000000ULL,
 | |
|   10000000000000000000ULL
 | |
| };
 | |
| 
 | |
| 
 | |
| /*
 | |
| 
 | |
|   Convert a string to unsigned long long integer value
 | |
|   with rounding.
 | |
|   
 | |
|   SYNOPSIS
 | |
|     my_strntoull10_8bit()
 | |
|       cs              in      pointer to character set
 | |
|       str             in      pointer to the string to be converted
 | |
|       length          in      string length
 | |
|       unsigned_flag   in      whether the number is unsigned
 | |
|       endptr          out     pointer to the stop character
 | |
|       error           out     returned error code
 | |
| 
 | |
|   DESCRIPTION
 | |
|     This function takes the decimal representation of integer number
 | |
|     from string str and converts it to an signed or unsigned
 | |
|     long long integer value.
 | |
|     Space characters and tab are ignored.
 | |
|     A sign character might precede the digit characters.
 | |
|     The number may have any number of pre-zero digits.
 | |
|     The number may have decimal point and exponent.
 | |
|     Rounding is always done in "away from zero" style:
 | |
|       0.5  ->   1
 | |
|      -0.5  ->  -1
 | |
| 
 | |
|     The function stops reading the string str after "length" bytes
 | |
|     or at the first character that is not a part of correct number syntax:
 | |
| 
 | |
|     <signed numeric literal> ::=
 | |
|       [ <sign> ] <exact numeric literal> [ E [ <sign> ] <unsigned integer> ]
 | |
| 
 | |
|     <exact numeric literal> ::=
 | |
|                         <unsigned integer> [ <period> [ <unsigned integer> ] ]
 | |
|                       | <period> <unsigned integer>
 | |
|     <unsigned integer>   ::= <digit>...
 | |
|      
 | |
|   RETURN VALUES
 | |
|     Value of string as a signed/unsigned longlong integer
 | |
| 
 | |
|     endptr cannot be NULL. The function will store the end pointer
 | |
|     to the stop character here.
 | |
| 
 | |
|     The error parameter contains information how things went:
 | |
|     0	     ok
 | |
|     ERANGE   If the the value of the converted number is out of range
 | |
|     In this case the return value is:
 | |
|     - ULONGLONG_MAX if unsigned_flag and the number was too big
 | |
|     - 0 if unsigned_flag and the number was negative
 | |
|     - LONGLONG_MAX if no unsigned_flag and the number is too big
 | |
|     - LONGLONG_MIN if no unsigned_flag and the number it too big negative
 | |
|     
 | |
|     EDOM If the string didn't contain any digits.
 | |
|     In this case the return value is 0.
 | |
| */
 | |
| 
 | |
| ulonglong
 | |
| my_strntoull10rnd_8bit(CHARSET_INFO *cs __attribute__((unused)),
 | |
|                        const char *str, size_t length, int unsigned_flag,
 | |
|                        char **endptr, int *error)
 | |
| {
 | |
|   const char *dot, *end9, *beg, *end= str + length;
 | |
|   ulonglong ull;
 | |
|   ulong ul;
 | |
|   uchar ch;
 | |
|   int shift= 0, digits= 0, negative, addon;
 | |
| 
 | |
|   /* Skip leading spaces and tabs */
 | |
|   for ( ; str < end && my_isspace(&my_charset_latin1, *str) ; )
 | |
|     str++;
 | |
| 
 | |
|   if (str >= end)
 | |
|     goto ret_edom;
 | |
| 
 | |
|   if ((negative= (*str == '-')) || *str=='+') /* optional sign */
 | |
|   {
 | |
|     if (++str == end)
 | |
|       goto ret_edom;
 | |
|   }
 | |
| 
 | |
|   beg= str;
 | |
|   end9= (str + 9) > end ? end : (str + 9);
 | |
|   /* Accumulate small number into ulong, for performance purposes */
 | |
|   for (ul= 0 ; str < end9 && (ch= (uchar) (*str - '0')) < 10; str++)
 | |
|   {
 | |
|     ul= ul * 10 + ch;
 | |
|   }
 | |
|   
 | |
|   if (str >= end) /* Small number without dots and expanents */
 | |
|   {
 | |
|     *endptr= (char*) str;
 | |
|     if (negative)
 | |
|     {
 | |
|       if (unsigned_flag)
 | |
|       {
 | |
|         *error= ul ? MY_ERRNO_ERANGE : 0;
 | |
|         return 0;
 | |
|       }
 | |
|       else
 | |
|       {
 | |
|         *error= 0;
 | |
|         return (ulonglong) (longlong) -(long) ul;
 | |
|       }
 | |
|     }
 | |
|     else
 | |
|     {
 | |
|       *error=0;
 | |
|       return (ulonglong) ul;
 | |
|     }
 | |
|   }
 | |
|   
 | |
|   digits= (int) (str - beg);
 | |
| 
 | |
|   /* Continue to accumulate into ulonglong */
 | |
|   for (dot= NULL, ull= ul; str < end; str++)
 | |
|   {
 | |
|     if ((ch= (uchar) (*str - '0')) < 10)
 | |
|     {
 | |
|       if (ull < CUTOFF || (ull == CUTOFF && ch <= CUTLIM))
 | |
|       {
 | |
|         ull= ull * 10 + ch;
 | |
|         digits++;
 | |
|         continue;
 | |
|       }
 | |
|       /*
 | |
|         Adding the next digit would overflow.
 | |
|         Remember the next digit in "addon", for rounding.
 | |
|         Scan all digits with an optional single dot.
 | |
|       */
 | |
|       if (ull == CUTOFF)
 | |
|       {
 | |
|         ull= ULONGLONG_MAX;
 | |
|         addon= 1;
 | |
|         str++;
 | |
|       }
 | |
|       else
 | |
|         addon= (*str >= '5');
 | |
|       if (!dot)
 | |
|       {
 | |
|         for ( ; str < end && (ch= (uchar) (*str - '0')) < 10; shift++, str++);
 | |
|         if (str < end && *str == '.')
 | |
|         {
 | |
|           str++;
 | |
|           for ( ; str < end && (ch= (uchar) (*str - '0')) < 10; str++);
 | |
|         }
 | |
|       }
 | |
|       else
 | |
|       {
 | |
|         shift= (int) (dot - str);
 | |
|         for ( ; str < end && (ch= (uchar) (*str - '0')) < 10; str++);
 | |
|       }
 | |
|       goto exp;
 | |
|     }
 | |
|     
 | |
|     if (*str == '.')
 | |
|     {
 | |
|       if (dot)
 | |
|       {
 | |
|         /* The second dot character */
 | |
|         addon= 0;
 | |
|         goto exp;
 | |
|       }
 | |
|       else
 | |
|       {
 | |
|         dot= str + 1;
 | |
|       }
 | |
|       continue;
 | |
|     }
 | |
|     
 | |
|     /* Unknown character, exit the loop */
 | |
|     break; 
 | |
|   }
 | |
|   shift= dot ? (int)(dot - str) : 0; /* Right shift */
 | |
|   addon= 0;
 | |
| 
 | |
| exp:    /* [ E [ <sign> ] <unsigned integer> ] */
 | |
| 
 | |
|   if (!digits)
 | |
|   {
 | |
|     str= beg;
 | |
|     goto ret_edom;
 | |
|   }
 | |
|   
 | |
|   if (str < end && (*str == 'e' || *str == 'E'))
 | |
|   {
 | |
|     str++;
 | |
|     if (str < end)
 | |
|     {
 | |
|       int negative_exp, exponent;
 | |
|       if ((negative_exp= (*str == '-')) || *str=='+')
 | |
|       {
 | |
|         if (++str == end)
 | |
|         {
 | |
|           str-= 2; /* 'e-' or 'e+' not followed by digits */
 | |
|           goto ret_sign;
 | |
|         }
 | |
|       }
 | |
|       if (shift > 0 && !negative_exp)
 | |
|         goto ret_too_big;
 | |
|       for (exponent= 0 ; str < end && (ch= (uchar) (*str - '0')) < 10; str++)
 | |
|       {
 | |
|         if (negative_exp)
 | |
|         {
 | |
|           if (exponent - shift > DIGITS_IN_ULONGLONG)
 | |
|             goto ret_zero;
 | |
|         }
 | |
|         else
 | |
|         {
 | |
|           if (exponent + shift > DIGITS_IN_ULONGLONG)
 | |
|             goto ret_too_big;
 | |
|         }
 | |
|         exponent= exponent * 10 + ch;
 | |
|       }
 | |
|       shift+= negative_exp ? -exponent : exponent;
 | |
|     }
 | |
|     else
 | |
|       str--; /* 'e' not followed by digits */
 | |
|   }
 | |
|   
 | |
|   if (shift == 0) /* No shift, check addon digit */
 | |
|   {
 | |
|     if (addon)
 | |
|     {
 | |
|       if (ull == ULONGLONG_MAX)
 | |
|         goto ret_too_big;
 | |
|       ull++;
 | |
|     }
 | |
|     goto ret_sign;
 | |
|   }
 | |
| 
 | |
|   if (shift < 0) /* Right shift */
 | |
|   {
 | |
|     ulonglong d, r;
 | |
|     
 | |
|     if (-shift >= DIGITS_IN_ULONGLONG)
 | |
|       goto ret_zero; /* Exponent is a big negative number, return 0 */
 | |
|     
 | |
|     d= d10[-shift];
 | |
|     r= (ull % d) * 2;
 | |
|     ull /= d;
 | |
|     if (r >= d)
 | |
|       ull++;
 | |
|     goto ret_sign;
 | |
|   }
 | |
| 
 | |
|   if (shift > DIGITS_IN_ULONGLONG) /* Huge left shift */
 | |
|   {
 | |
|     if (!ull)
 | |
|       goto ret_sign;
 | |
|     goto ret_too_big;
 | |
|   }
 | |
| 
 | |
|   for ( ; shift > 0; shift--, ull*= 10) /* Left shift */
 | |
|   {
 | |
|     if (ull > CUTOFF)
 | |
|       goto ret_too_big; /* Overflow, number too big */
 | |
|   }
 | |
| 
 | |
| ret_sign:
 | |
|   *endptr= (char*) str;
 | |
| 
 | |
|   if (!unsigned_flag)
 | |
|   {
 | |
|     if (negative)
 | |
|     {
 | |
|       if (ull >= (ulonglong) LONGLONG_MIN)
 | |
|       {
 | |
|         if (ull != (ulonglong) LONGLONG_MIN)
 | |
|           *error= MY_ERRNO_ERANGE;
 | |
|         return (ulonglong) LONGLONG_MIN;
 | |
|       }
 | |
|       *error= 0;
 | |
|       return (ulonglong) -(longlong) ull;
 | |
|     }
 | |
|     else
 | |
|     {
 | |
|       if (ull > (ulonglong) LONGLONG_MAX)
 | |
|       {
 | |
|         *error= MY_ERRNO_ERANGE;
 | |
|         return (ulonglong) LONGLONG_MAX;
 | |
|       }
 | |
|       *error= 0;
 | |
|       return ull;
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   /* Unsigned number */
 | |
|   if (negative && ull)
 | |
|   {
 | |
|     *error= MY_ERRNO_ERANGE;
 | |
|     return 0;
 | |
|   }
 | |
|   *error= 0;
 | |
|   return ull;
 | |
| 
 | |
| ret_zero:
 | |
|   *endptr= (char*) str;
 | |
|   *error= 0;
 | |
|   return 0;
 | |
| 
 | |
| ret_edom:
 | |
|   *endptr= (char*) str;
 | |
|   *error= MY_ERRNO_EDOM;
 | |
|   return 0;
 | |
|   
 | |
| ret_too_big:
 | |
|   *endptr= (char*) str;
 | |
|   *error= MY_ERRNO_ERANGE;
 | |
|   return unsigned_flag ?
 | |
|          ULONGLONG_MAX :
 | |
|          negative ? (ulonglong) LONGLONG_MIN : (ulonglong) LONGLONG_MAX;
 | |
| }
 | |
| 
 | |
| 
 | |
| /*
 | |
|   Check if a constant can be propagated
 | |
| 
 | |
|   SYNOPSIS:
 | |
|     my_propagate_simple()
 | |
|     cs		Character set information
 | |
|     str		String to convert to double
 | |
|     length	Optional length for string.
 | |
|     
 | |
|   NOTES:
 | |
|    Takes the string in the given charset and check
 | |
|    if it can be safely propagated in the optimizer.
 | |
|    
 | |
|    create table t1 (
 | |
|      s char(5) character set latin1 collate latin1_german2_ci);
 | |
|    insert into t1 values (0xf6); -- o-umlaut
 | |
|    select * from t1 where length(s)=1 and s='oe';
 | |
| 
 | |
|    The above query should return one row.
 | |
|    We cannot convert this query into:
 | |
|    select * from t1 where length('oe')=1 and s='oe';
 | |
|    
 | |
|    Currently we don't check the constant itself,
 | |
|    and decide not to propagate a constant
 | |
|    just if the collation itself allows tricky things
 | |
|    like expansions and contractions. In the future
 | |
|    we can write a more sophisticated functions to
 | |
|    check the constants. For example, 'oa' can always
 | |
|    be safety propagated in German2 because unlike 
 | |
|    'oe' it does not have any special meaning.
 | |
| 
 | |
|   RETURN
 | |
|     1 if constant can be safely propagated
 | |
|     0 if it is not safe to propagate the constant
 | |
| */
 | |
| 
 | |
| 
 | |
| 
 | |
| my_bool my_propagate_simple(CHARSET_INFO *cs __attribute__((unused)),
 | |
|                             const uchar *str __attribute__((unused)),
 | |
|                             size_t length __attribute__((unused)))
 | |
| {
 | |
|   return 1;
 | |
| }
 | |
| 
 | |
| 
 | |
| my_bool my_propagate_complex(CHARSET_INFO *cs __attribute__((unused)),
 | |
|                              const uchar *str __attribute__((unused)),
 | |
|                              size_t length __attribute__((unused)))
 | |
| {
 | |
|   return 0;
 | |
| }
 | |
| 
 | |
| 
 | |
| void my_ci_set_strength(struct charset_info_st *cs, uint strength)
 | |
| {
 | |
|   DBUG_ASSERT(strength > 0);
 | |
|   DBUG_ASSERT(strength <= MY_STRXFRM_NLEVELS);
 | |
|   cs->levels_for_order= ((1 << strength) - 1);
 | |
| }
 | |
| 
 | |
| 
 | |
| void my_ci_set_level_flags(struct charset_info_st *cs, uint flags)
 | |
| {
 | |
|   DBUG_ASSERT(flags < (1<<MY_STRXFRM_NLEVELS));
 | |
|   cs->levels_for_order= flags;
 | |
| }
 | |
| 
 | |
| /*
 | |
|   Normalize strxfrm flags
 | |
| 
 | |
|   SYNOPSIS:
 | |
|     my_strxfrm_flag_normalize()
 | |
|     cs       - the CHARSET_INFO pointer
 | |
|     flags    - non-normalized flags
 | |
|     
 | |
|   NOTES:
 | |
|     If levels are omitted, then 1-maximum is assumed.
 | |
|     If any level number is greater than the maximum,
 | |
|     it is treated as the maximum.
 | |
| 
 | |
|   RETURN
 | |
|     normalized flags
 | |
| */
 | |
| 
 | |
| uint my_strxfrm_flag_normalize(CHARSET_INFO *cs, uint flags)
 | |
| {
 | |
|   uint maximum= my_bit_log2_uint32(cs->levels_for_order) + 1;
 | |
|   DBUG_ASSERT(maximum >= 1 && maximum <= MY_STRXFRM_NLEVELS);
 | |
|   
 | |
|   /* If levels are omitted, then 1-maximum is assumed*/
 | |
|   if (!(flags & MY_STRXFRM_LEVEL_ALL))
 | |
|   {
 | |
|     static uint def_level_flags[]= {0, 0x01, 0x03, 0x07, 0x0F, 0x1F, 0x3F };
 | |
|     uint flag_pad= flags &
 | |
|                    (MY_STRXFRM_PAD_WITH_SPACE | MY_STRXFRM_PAD_TO_MAXLEN);
 | |
|     flags= def_level_flags[maximum] | flag_pad;
 | |
|   }
 | |
|   else
 | |
|   {
 | |
|     uint i;
 | |
|     uint flag_lev= flags & MY_STRXFRM_LEVEL_ALL;
 | |
|     uint flag_dsc= (flags >> MY_STRXFRM_DESC_SHIFT) & MY_STRXFRM_LEVEL_ALL;
 | |
|     uint flag_rev= (flags >> MY_STRXFRM_REVERSE_SHIFT) & MY_STRXFRM_LEVEL_ALL;
 | |
|     uint flag_pad= flags &
 | |
|                    (MY_STRXFRM_PAD_WITH_SPACE | MY_STRXFRM_PAD_TO_MAXLEN);
 | |
| 
 | |
|     /*
 | |
|       If any level number is greater than the maximum,
 | |
|       it is treated as the maximum.
 | |
|     */
 | |
|     for (maximum--, flags= 0, i= 0; i < MY_STRXFRM_NLEVELS; i++)
 | |
|     {
 | |
|       uint src_bit= 1 << i;
 | |
|       if (flag_lev & src_bit)
 | |
|       {
 | |
|         uint dst_bit= 1 << MY_MIN(i, maximum);
 | |
|         flags|= dst_bit;
 | |
|         flags|= (flag_dsc & dst_bit) << MY_STRXFRM_DESC_SHIFT;
 | |
|         flags|= (flag_rev & dst_bit) << MY_STRXFRM_REVERSE_SHIFT;
 | |
|       }
 | |
|     }
 | |
|     flags|= flag_pad;
 | |
|   }
 | |
|   
 | |
|   return flags;
 | |
| }
 | |
| 
 | |
| 
 | |
| /*
 | |
|   Apply DESC and REVERSE collation rules.
 | |
| 
 | |
|   SYNOPSIS:
 | |
|     my_strxfrm_desc_and_reverse()
 | |
|     str      - pointer to string
 | |
|     strend   - end of string
 | |
|     flags    - flags
 | |
|     level    - which level, starting from 0.
 | |
|     
 | |
|   NOTES:
 | |
|     Apply DESC or REVERSE or both flags.
 | |
|     
 | |
|     If DESC flag is given, then the weights
 | |
|     come out NOTed or negated for that level.
 | |
|     
 | |
|     If REVERSE flags is given, then the weights come out in
 | |
|     reverse order for that level, that is, starting with
 | |
|     the last character and ending with the first character.
 | |
|     
 | |
|     If nether DESC nor REVERSE flags are give,
 | |
|     the string is not changed.
 | |
|     
 | |
| */
 | |
| void
 | |
| my_strxfrm_desc_and_reverse(uchar *str, uchar *strend,
 | |
|                             uint flags, uint level)
 | |
| {
 | |
|   if (flags & (MY_STRXFRM_DESC_LEVEL1 << level))
 | |
|   {
 | |
|     if (flags & (MY_STRXFRM_REVERSE_LEVEL1 << level))
 | |
|     {
 | |
|       for (strend--; str <= strend;)
 | |
|       {
 | |
|         uchar tmp= *str;
 | |
|         *str++= ~*strend;
 | |
|         *strend--= ~tmp;
 | |
|       }
 | |
|     }
 | |
|     else
 | |
|     {
 | |
|       for (; str < strend; str++)
 | |
|         *str= ~*str;
 | |
|     }
 | |
|   }
 | |
|   else if (flags & (MY_STRXFRM_REVERSE_LEVEL1 << level))
 | |
|   {
 | |
|     for (strend--; str < strend;)
 | |
|     {
 | |
|       uchar tmp= *str;
 | |
|       *str++= *strend;
 | |
|       *strend--= tmp;
 | |
|     }
 | |
|   }
 | |
| }
 | |
| 
 | |
| 
 | |
| size_t
 | |
| my_strxfrm_pad_desc_and_reverse(CHARSET_INFO *cs,
 | |
|                                 uchar *str, uchar *frmend, uchar *strend,
 | |
|                                 uint nweights, uint flags, uint level)
 | |
| {
 | |
|   if (nweights && frmend < strend && (flags & MY_STRXFRM_PAD_WITH_SPACE))
 | |
|   {
 | |
|     uint fill_length= MY_MIN((uint) (strend - frmend), nweights * cs->mbminlen);
 | |
|     my_ci_fill(cs, (char*) frmend, fill_length, cs->pad_char);
 | |
|     frmend+= fill_length;
 | |
|   }
 | |
|   my_strxfrm_desc_and_reverse(str, frmend, flags, level);
 | |
|   if ((flags & MY_STRXFRM_PAD_TO_MAXLEN) && frmend < strend)
 | |
|   {
 | |
|     size_t fill_length= strend - frmend;
 | |
|     my_ci_fill(cs, (char*) frmend, fill_length, cs->pad_char);
 | |
|     frmend= strend;
 | |
|   }
 | |
|   return frmend - str;
 | |
| }
 | |
| 
 | |
| 
 | |
| size_t
 | |
| my_strxfrm_pad_desc_and_reverse_nopad(CHARSET_INFO *cs,
 | |
|                                       uchar *str, uchar *frmend, uchar *strend,
 | |
|                                       uint nweights, uint flags, uint level)
 | |
| {
 | |
|   if (nweights && frmend < strend && (flags & MY_STRXFRM_PAD_WITH_SPACE))
 | |
|   {
 | |
|     uint fill_length= MY_MIN((uint) (strend - frmend), nweights * cs->mbminlen);
 | |
|     memset(frmend, 0x00, fill_length);
 | |
|     frmend+= fill_length;
 | |
|   }
 | |
|   my_strxfrm_desc_and_reverse(str, frmend, flags, level);
 | |
|   if ((flags & MY_STRXFRM_PAD_TO_MAXLEN) && frmend < strend)
 | |
|   {
 | |
|     size_t fill_length= strend - frmend;
 | |
|     memset(frmend, 0x00, fill_length);
 | |
|     frmend= strend;
 | |
|   }
 | |
|   return frmend - str;
 | |
| }
 | |
| 
 | |
| 
 | |
| MY_CHARSET_HANDLER my_charset_8bit_handler=
 | |
| {
 | |
|     my_cset_init_8bit,
 | |
|     my_numchars_8bit,
 | |
|     my_charpos_8bit,
 | |
|     my_lengthsp_8bit,
 | |
|     my_numcells_8bit,
 | |
|     my_mb_wc_8bit,
 | |
|     my_wc_mb_8bit,
 | |
|     my_mb_ctype_8bit,
 | |
|     my_caseup_str_8bit,
 | |
|     my_casedn_str_8bit,
 | |
|     my_caseup_8bit,
 | |
|     my_casedn_8bit,
 | |
|     my_snprintf_8bit,
 | |
|     my_long10_to_str_8bit,
 | |
|     my_longlong10_to_str_8bit,
 | |
|     my_fill_8bit,
 | |
|     my_strntol_8bit,
 | |
|     my_strntoul_8bit,
 | |
|     my_strntoll_8bit,
 | |
|     my_strntoull_8bit,
 | |
|     my_strntod_8bit,
 | |
|     my_strtoll10_8bit,
 | |
|     my_strntoull10rnd_8bit,
 | |
|     my_scan_8bit,
 | |
|     my_charlen_8bit,
 | |
|     my_well_formed_char_length_8bit,
 | |
|     my_copy_8bit,
 | |
|     my_wc_mb_bin, /* native_to_mb */
 | |
|     my_wc_to_printable_8bit,
 | |
|     my_casefold_multiply_1,
 | |
|     my_casefold_multiply_1
 | |
| };
 | |
| 
 | |
| MY_COLLATION_HANDLER my_collation_8bit_simple_ci_handler =
 | |
| {
 | |
|     my_coll_init_simple,	/* init */
 | |
|     my_strnncoll_simple,
 | |
|     my_strnncollsp_simple,
 | |
|     my_strnncollsp_nchars_simple,
 | |
|     my_strnxfrm_simple,
 | |
|     my_strnxfrmlen_simple,
 | |
|     my_like_range_simple,
 | |
|     my_wildcmp_8bit,
 | |
|     my_strcasecmp_8bit,
 | |
|     my_instr_simple,
 | |
|     my_hash_sort_simple,
 | |
|     my_propagate_simple,
 | |
|     my_min_str_8bit_simple,
 | |
|     my_max_str_8bit_simple,
 | |
|     my_ci_get_id_generic,
 | |
|     my_ci_get_collation_name_generic,
 | |
|     my_ci_eq_collation_generic
 | |
| };
 | |
| 
 | |
| 
 | |
| MY_COLLATION_HANDLER my_collation_8bit_simple_nopad_ci_handler =
 | |
| {
 | |
|     my_coll_init_simple,	/* init */
 | |
|     my_strnncoll_simple,
 | |
|     my_strnncollsp_simple_nopad,
 | |
|     my_strnncollsp_nchars_simple,
 | |
|     my_strnxfrm_simple_nopad,
 | |
|     my_strnxfrmlen_simple,
 | |
|     my_like_range_simple,
 | |
|     my_wildcmp_8bit,
 | |
|     my_strcasecmp_8bit,
 | |
|     my_instr_simple,
 | |
|     my_hash_sort_simple_nopad,
 | |
|     my_propagate_simple,
 | |
|     my_min_str_8bit_simple_nopad,
 | |
|     my_max_str_8bit_simple,
 | |
|     my_ci_get_id_generic,
 | |
|     my_ci_get_collation_name_generic,
 | |
|     my_ci_eq_collation_generic
 | |
| };
 |