/* Copyright (C) 2002 MySQL AB This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include "m_string.h" #include "m_ctype.h" #include #include "stdarg.h" #include "assert.h" int my_strnxfrm_simple(CHARSET_INFO * cs, uchar *dest, uint len, const uchar *src, uint srclen) { uchar *map= cs->sort_order; DBUG_ASSERT(len >= srclen); len= min(len,srclen); if (dest != src) { const uchar *end; for ( end=src+len; src < end ; ) *dest++= map[*src++]; return len; } else { const uchar *end; for ( end=dest+len; dest < end ; dest++) *dest= (char) map[(uchar) *dest]; return len; } } int my_strnncoll_simple(CHARSET_INFO * cs, const uchar *s, uint slen, const uchar *t, uint tlen) { int len = ( slen > tlen ) ? tlen : slen; uchar *map= cs->sort_order; while (len--) { if (map[*s++] != map[*t++]) return ((int) map[s[-1]] - (int) map[t[-1]]); } return (int) (slen-tlen); } int my_strnncollsp_simple(CHARSET_INFO * cs, const uchar *s, uint slen, const uchar *t, uint tlen) { uchar *map= cs->sort_order; int len; for ( ; slen && my_isspace(cs, s[slen-1]) ; slen--); for ( ; tlen && my_isspace(cs, t[tlen-1]) ; tlen--); len = ( slen > tlen ) ? tlen : slen; while (len--) { if (map[*s++] != map[*t++]) return ((int) map[s[-1]] - (int) map[t[-1]]); } return (int) (slen-tlen); } void my_caseup_str_8bit(CHARSET_INFO * cs,char *str) { register uchar *map=cs->to_upper; while ((*str = (char) map[(uchar) *str]) != 0) str++; } void my_casedn_str_8bit(CHARSET_INFO * cs,char *str) { register uchar *map=cs->to_lower; while ((*str = (char) map[(uchar)*str]) != 0) str++; } void my_caseup_8bit(CHARSET_INFO * cs, char *str, uint length) { register uchar *map=cs->to_upper; for ( ; length>0 ; length--, str++) *str= (char) map[(uchar)*str]; } void my_casedn_8bit(CHARSET_INFO * cs, char *str, uint length) { register uchar *map=cs->to_lower; for ( ; length>0 ; length--, str++) *str= (char) map[(uchar) *str]; } int my_strcasecmp_8bit(CHARSET_INFO * cs,const char *s, const char *t) { register uchar *map=cs->to_upper; while (map[(uchar) *s] == map[(uchar) *t++]) if (!*s++) return 0; return ((int) map[(uchar) s[0]] - (int) map[(uchar) t[-1]]); } int my_mb_wc_8bit(CHARSET_INFO *cs,my_wc_t *wc, const unsigned char *str, const unsigned char *end __attribute__((unused))) { if (str >= end) return MY_CS_TOOFEW(0); *wc=cs->tab_to_uni[*str]; return (!wc[0] && str[0]) ? MY_CS_ILSEQ : 1; } int my_wc_mb_8bit(CHARSET_INFO *cs,my_wc_t wc, unsigned char *str, unsigned char *end __attribute__((unused))) { MY_UNI_IDX *idx; if (str >= end) return MY_CS_TOOSMALL; for (idx=cs->tab_from_uni; idx->tab ; idx++) { if (idx->from <= wc && idx->to >= wc) { str[0]= idx->tab[wc - idx->from]; return (!str[0] && wc) ? MY_CS_ILUNI : 1; } } return MY_CS_ILUNI; } /* We can't use vsprintf here as it's not guaranteed to return the length on all operating systems. This function is also not called in a safe environment, so the end buffer must be checked. */ int my_snprintf_8bit(CHARSET_INFO *cs __attribute__((unused)), char* to, uint n __attribute__((unused)), const char* fmt, ...) { va_list args; int result; va_start(args,fmt); result= my_vsnprintf(to, n, fmt, args); va_end(args); return result; } void my_hash_sort_simple(CHARSET_INFO *cs, const uchar *key, uint len, ulong *nr1, ulong *nr2) { register uchar *sort_order=cs->sort_order; const uchar *pos = key; key+= len; for (; pos < (uchar*) key ; pos++) { nr1[0]^=(ulong) ((((uint) nr1[0] & 63)+nr2[0]) * ((uint) sort_order[(uint) *pos])) + (nr1[0] << 8); nr2[0]+=3; } } long my_strntol_8bit(CHARSET_INFO *cs, const char *nptr, uint l, int base, char **endptr, int *err) { int negative; register ulong cutoff; register unsigned int cutlim; register ulong i; register const char *s; register unsigned char c; const char *save, *e; int overflow; *err= 0; /* Initialize error indicator */ #ifdef NOT_USED if (base < 0 || base == 1 || base > 36) base = 10; #endif s = nptr; e = nptr+l; for ( ; s='0' && c<='9') c -= '0'; else if (c>='A' && c<='F') c = c - 'A' + 10; else if (c>='a' && c<='f') c = c - 'a' + 10; else break; if (c >= base) break; if (i > cutoff || (i == cutoff && c > cutlim)) overflow = 1; else { i *= (ulong) base; i += c; } } if (s == save) goto noconv; if (endptr != NULL) *endptr = (char *) s; if (negative) { if (i > (ulong) LONG_MIN) overflow = 1; } else if (i > (ulong) LONG_MAX) overflow = 1; if (overflow) { err[0]= ERANGE; return negative ? LONG_MIN : LONG_MAX; } return (negative ? -((long) i) : (long) i); noconv: err[0]= EDOM; if (endptr != NULL) *endptr = (char *) nptr; return 0L; } ulong my_strntoul_8bit(CHARSET_INFO *cs, const char *nptr, uint l, int base, char **endptr, int *err) { int negative; register ulong cutoff; register unsigned int cutlim; register ulong i; register const char *s; register unsigned char c; const char *save, *e; int overflow; *err= 0; /* Initialize error indicator */ #ifdef NOT_USED if (base < 0 || base == 1 || base > 36) base = 10; #endif s = nptr; e = nptr+l; for( ; s='0' && c<='9') c -= '0'; else if (c>='A' && c<='F') c = c - 'A' + 10; else if (c>='a' && c<='f') c = c - 'a' + 10; else break; if (c >= base) break; if (i > cutoff || (i == cutoff && c > cutlim)) overflow = 1; else { i *= (ulong) base; i += c; } } if (s == save) goto noconv; if (endptr != NULL) *endptr = (char *) s; if (overflow) { err[0]= ERANGE; return ((ulong)~0L); } return (negative ? -((long) i) : (long) i); noconv: err[0]= EDOM; if (endptr != NULL) *endptr = (char *) nptr; return 0L; } longlong my_strntoll_8bit(CHARSET_INFO *cs __attribute__((unused)), const char *nptr, uint l, int base, char **endptr,int *err) { int negative; register ulonglong cutoff; register unsigned int cutlim; register ulonglong i; register const char *s, *e; register unsigned char c; const char *save; int overflow; *err= 0; /* Initialize error indicator */ #ifdef NOT_USED if (base < 0 || base == 1 || base > 36) base = 10; #endif s = nptr; e = nptr+l; for(; s='0' && c<='9') c -= '0'; else if (c>='A' && c<='F') c = c - 'A' + 10; else if (c>='a' && c<='f') c = c - 'a' + 10; else break; if (c >= base) break; if (i > cutoff || (i == cutoff && c > cutlim)) overflow = 1; else { i *= (ulonglong) base; i += c; } } if (s == save) goto noconv; if (endptr != NULL) *endptr = (char *) s; if (negative) { if (i > (ulonglong) LONGLONG_MIN) overflow = 1; } else if (i > (ulonglong) LONGLONG_MAX) overflow = 1; if (overflow) { err[0]= ERANGE; return negative ? LONGLONG_MIN : LONGLONG_MAX; } return (negative ? -((longlong) i) : (longlong) i); noconv: err[0]= EDOM; if (endptr != NULL) *endptr = (char *) nptr; return 0L; } ulonglong my_strntoull_8bit(CHARSET_INFO *cs, const char *nptr, uint l, int base, char **endptr, int *err) { int negative; register ulonglong cutoff; register unsigned int cutlim; register ulonglong i; register const char *s, *e; register unsigned char c; const char *save; int overflow; *err= 0; /* Initialize error indicator */ #ifdef NOT_USED if (base < 0 || base == 1 || base > 36) base = 10; #endif s = nptr; e = nptr+l; for(; s='0' && c<='9') c -= '0'; else if (c>='A' && c<='F') c = c - 'A' + 10; else if (c>='a' && c<='f') c = c - 'a' + 10; else break; if (c >= base) break; if (i > cutoff || (i == cutoff && c > cutlim)) overflow = 1; else { i *= (ulonglong) base; i += c; } } if (s == save) goto noconv; if (endptr != NULL) *endptr = (char *) s; if (overflow) { err[0]= ERANGE; return (~(ulonglong) 0); } return (negative ? -((longlong) i) : (longlong) i); noconv: err[0]= EDOM; if (endptr != NULL) *endptr = (char *) nptr; return 0L; } /* Read double from string SYNOPSIS: my_strntod_8bit() cs Character set information str String to convert to double length Optional length for string. end result pointer to end of converted string err Error number if failed conversion NOTES: If length is not INT_MAX32 or str[length] != 0 then the given str must be writeable If length == INT_MAX32 the str must be \0 terminated. It's implemented this way to save a buffer allocation and a memory copy. RETURN Value of number in string */ double my_strntod_8bit(CHARSET_INFO *cs __attribute__((unused)), char *str, uint length, char **end, int *err) { char end_char; double result; errno= 0; /* Safety */ /* The following define is to avoid warnings from valgrind as str[length] may not be defined (which is not fatal in real life) */ #ifdef HAVE_purify if (length == INT_MAX32) #else if (length == INT_MAX32 || str[length] == 0) #endif result= strtod(str, end); else { end_char= str[length]; str[length]= 0; result= strtod(str, end); str[length]= end_char; /* Restore end char */ } *err= errno; return result; } /* This is a fast version optimized for the case of radix 10 / -10 Assume len >= 1 */ int my_long10_to_str_8bit(CHARSET_INFO *cs __attribute__((unused)), char *dst, uint len, int radix, long int val) { char buffer[66]; register char *p, *e; long int new_val; uint sign=0; e = p = &buffer[sizeof(buffer)-1]; *p= 0; if (radix < 0) { if (val < 0) { val= -val; *dst++= '-'; len--; sign= 1; } } new_val = (long) ((unsigned long int) val / 10); *--p = '0'+ (char) ((unsigned long int) val - (unsigned long) new_val * 10); val = new_val; while (val != 0) { new_val=val/10; *--p = '0' + (char) (val-new_val*10); val= new_val; } len= min(len, (uint) (e-p)); memcpy(dst, p, len); return (int) len+sign; } int my_longlong10_to_str_8bit(CHARSET_INFO *cs __attribute__((unused)), char *dst, uint len, int radix, longlong val) { char buffer[65]; register char *p, *e; long long_val; uint sign= 0; if (radix < 0) { if (val < 0) { val = -val; *dst++= '-'; len--; sign= 1; } } e = p = &buffer[sizeof(buffer)-1]; *p= 0; if (val == 0) { *--p= '0'; len= 1; goto cnv; } while ((ulonglong) val > (ulonglong) LONG_MAX) { ulonglong quo=(ulonglong) val/(uint) 10; uint rem= (uint) (val- quo* (uint) 10); *--p = '0' + rem; val= quo; } long_val= (long) val; while (long_val != 0) { long quo= long_val/10; *--p = '0' + (long_val - quo*10); long_val= quo; } len= min(len, (uint) (e-p)); cnv: memcpy(dst, p, len); return len+sign; } /* ** Compare string against string with wildcard ** 0 if matched ** -1 if not matched with wildcard ** 1 if matched with wildcard */ #ifdef LIKE_CMP_TOUPPER #define likeconv(s,A) (uchar) my_toupper(s,A) #else #define likeconv(s,A) (uchar) (s)->sort_order[(uchar) (A)] #endif #define INC_PTR(cs,A,B) A++ int my_wildcmp_8bit(CHARSET_INFO *cs, const char *str,const char *str_end, const char *wildstr,const char *wildend, int escape, int w_one, int w_many) { int result= -1; /* Not found, using wildcards */ while (wildstr != wildend) { while (*wildstr != w_many && *wildstr != w_one) { if (*wildstr == escape && wildstr+1 != wildend) wildstr++; if (str == str_end || likeconv(cs,*wildstr++) != likeconv(cs,*str++)) return(1); /* No match */ if (wildstr == wildend) return (str != str_end); /* Match if both are at end */ result=1; /* Found an anchor char */ } if (*wildstr == w_one) { do { if (str == str_end) /* Skip one char if possible */ return (result); INC_PTR(cs,str,str_end); } while (++wildstr < wildend && *wildstr == w_one); if (wildstr == wildend) break; } if (*wildstr == w_many) { /* Found w_many */ uchar cmp; wildstr++; /* Remove any '%' and '_' from the wild search string */ for (; wildstr != wildend ; wildstr++) { if (*wildstr == w_many) continue; if (*wildstr == w_one) { if (str == str_end) return (-1); INC_PTR(cs,str,str_end); continue; } break; /* Not a wild character */ } if (wildstr == wildend) return(0); /* Ok if w_many is last */ if (str == str_end) return -1; if ((cmp= *wildstr) == escape && wildstr+1 != wildend) cmp= *++wildstr; INC_PTR(cs,wildstr,wildend); /* This is compared trough cmp */ cmp=likeconv(cs,cmp); do { while (str != str_end && likeconv(cs,*str) != cmp) str++; if (str++ == str_end) return (-1); { int tmp=my_wildcmp_8bit(cs,str,str_end,wildstr,wildend,escape,w_one,w_many); if (tmp <= 0) return (tmp); } } while (str != str_end && wildstr[0] != w_many); return(-1); } } return (str != str_end ? 1 : 0); } /* ** Calculate min_str and max_str that ranges a LIKE string. ** Arguments: ** ptr Pointer to LIKE string. ** ptr_length Length of LIKE string. ** escape Escape character in LIKE. (Normally '\'). ** All escape characters should be removed from min_str and max_str ** res_length Length of min_str and max_str. ** min_str Smallest case sensitive string that ranges LIKE. ** Should be space padded to res_length. ** max_str Largest case sensitive string that ranges LIKE. ** Normally padded with the biggest character sort value. ** ** The function should return 0 if ok and 1 if the LIKE string can't be ** optimized ! */ my_bool my_like_range_simple(CHARSET_INFO *cs, const char *ptr,uint ptr_length, int escape, int w_one, int w_many, uint res_length, char *min_str,char *max_str, uint *min_length,uint *max_length) { const char *end=ptr+ptr_length; char *min_org=min_str; char *min_end=min_str+res_length; for (; ptr != end && min_str != min_end ; ptr++) { if (*ptr == escape && ptr+1 != end) { ptr++; /* Skip escape */ *min_str++= *max_str++ = *ptr; continue; } if (*ptr == w_one) /* '_' in SQL */ { *min_str++='\0'; /* This should be min char */ *max_str++=cs->max_sort_char; continue; } if (*ptr == w_many) /* '%' in SQL */ { *min_length= (uint) (min_str - min_org); *max_length=res_length; do { *min_str++ = ' '; /* Because if key compression */ *max_str++ = cs->max_sort_char; } while (min_str != min_end); return 0; } *min_str++= *max_str++ = *ptr; } *min_length= *max_length = (uint) (min_str - min_org); /* Temporary fix for handling w_one at end of string (key compression) */ { char *tmp; for (tmp= min_str ; tmp > min_org && tmp[-1] == '\0';) *--tmp=' '; } while (min_str != min_end) *min_str++ = *max_str++ = ' '; /* Because if key compression */ return 0; } ulong my_scan_8bit(CHARSET_INFO *cs, const char *str, const char *end, int sq) { const char *str0= str; switch (sq) { case MY_SEQ_INTTAIL: if (*str == '.') { for(str++ ; str != end && *str == '0' ; str++); return str-str0; } return 0; case MY_SEQ_SPACES: for ( ; str < end ; str++) { if (!my_isspace(cs,*str)) break; } return str-str0; default: return 0; } } void my_fill_8bit(CHARSET_INFO *cs __attribute__((unused)), char *s, uint l, int fill) { bfill(s,l,fill); } uint my_numchars_8bit(CHARSET_INFO *cs __attribute__((unused)), const char *b, const char *e) { return e-b; } uint my_charpos_8bit(CHARSET_INFO *cs __attribute__((unused)), const char *b __attribute__((unused)), const char *e __attribute__((unused)), uint pos) { return pos; } uint my_lengthsp_8bit(CHARSET_INFO *cs __attribute__((unused)), const char *ptr, uint length) { const char *end= ptr+length; while (end > ptr && end[-1] == ' ') end--; return (uint) (end-ptr); } uint my_instr_simple(CHARSET_INFO *cs, const char *b, uint b_length, const char *s, uint s_length, my_match_t *match, uint nmatch) { register const uchar *str, *search, *end, *search_end; if (s_length <= b_length) { if (!s_length) { if (nmatch) { match->beg= 0; match->end= 0; match->mblen= 0; } return 1; /* Empty string is always found */ } str= (const uchar*) b; search= (const uchar*) s; end= (const uchar*) b+b_length-s_length+1; search_end= (const uchar*) s + s_length; skipp: while (str != end) { if (cs->sort_order[*str++] == cs->sort_order[*search]) { register const uchar *i,*j; i= str; j= search+1; while (j != search_end) if (cs->sort_order[*i++] != cs->sort_order[*j++]) goto skipp; if (nmatch > 0) { match[0].beg= 0; match[0].end= str- (const uchar*)b-1; match[0].mblen= match[0].end; if (nmatch > 1) { match[1].beg= match[0].end; match[1].end= match[0].end+s_length; match[1].mblen= match[1].end-match[1].beg; } } return 2; } } } return 0; } MY_CHARSET_HANDLER my_charset_8bit_handler= { NULL, /* ismbchar */ my_mbcharlen_8bit, /* mbcharlen */ my_numchars_8bit, my_charpos_8bit, my_lengthsp_8bit, my_mb_wc_8bit, my_wc_mb_8bit, my_caseup_str_8bit, my_casedn_str_8bit, my_caseup_8bit, my_casedn_8bit, my_snprintf_8bit, my_long10_to_str_8bit, my_longlong10_to_str_8bit, my_fill_8bit, my_strntol_8bit, my_strntoul_8bit, my_strntoll_8bit, my_strntoull_8bit, my_strntod_8bit, my_scan_8bit }; MY_COLLATION_HANDLER my_collation_8bit_simple_ci_handler = { my_strnncoll_simple, my_strnncollsp_simple, my_strnxfrm_simple, my_like_range_simple, my_wildcmp_8bit, my_strcasecmp_8bit, my_instr_simple, my_hash_sort_simple };