mariadb/strings/ctype-mb.c
unknown 3c46af6cf4 BTREE-indexes in HEAP tables can now be used to optimize ORDER BY
Don't read character set files if we are using only the default charset. In most cases the user will not anymore get a warning about missing character set files
Compare strings with space extend instead of space strip. Now the following comparisons holds:  "a" == "a " and "a\t" < "a". (Bug #3152).
Note: Because of the above fix, one has to do a REPAIR on any table that has an ascii character < 32 last in a CHAR/VARCHAR/TEXT columns.


heap/hp_hash.c:
  Comments and DBUG information
include/my_handler.h:
  Updated prototype for mi_compare_text
myisam/ft_boolean_search.c:
  Updated calls to mi_compare_text
myisam/ft_nlq_search.c:
  Updated calls to mi_compare_text
myisam/ft_parser.c:
  Updated calls to mi_compare_text
myisam/ft_stopwords.c:
  Updated calls to mi_compare_text
myisam/ft_update.c:
  Updated calls to mi_compare_text
myisam/mi_check.c:
  Updated calls to mi_compare_text
myisam/mi_search.c:
  Changed all string comparisons that removed end space to instead extend the shorter string with space
myisam/mi_unique.c:
  Updated calls to mi_compare_text
myisam/mi_write.c:
  Updated calls to mi_compare_text
myisam/myisam_ftdump.c:
  Removed compiler warning
mysql-test/r/ctype_collate.result:
  Fixed wrong result
mysql-test/r/heap_btree.result:
  More tests
mysql-test/t/heap_btree.test:
  more tests
mysys/charset.c:
  Don't read charsets if we are only using default charset
  Don't require 'init_available_charsets' to succeed.
mysys/my_handler.c:
  Compare strings with space extend instead of space strip
mysys/tree.c:
  Fixed code to get better results for range optimzier
sql/field.cc:
  Compare strings with space extend instead of space strip
sql/filesort.cc:
  Compare strings with space extend instead of space strip
sql/ha_heap.cc:
  Created bit map for keys that are using BTREE. This allows the optimzer to use BTREE's for sorting
sql/ha_heap.h:
  Created bit map for keys that are using BTREE. This allows the optimzer to use BTREE's for sorting
strings/ctype-big5.c:
  Compare strings with space extend instead of space strip
strings/ctype-czech.c:
  Indentation cleanup. Should be fixed to use space extend
strings/ctype-gbk.c:
  Compare strings with space extend instead of space strip
strings/ctype-latin1.c:
  Compare strings with space extend instead of space strip
  Added missing my_hash_sort_latin1_de function
strings/ctype-mb.c:
  For binary strings, don't remove end space when comparing
strings/ctype-simple.c:
  Compare strings with space extend instead of space strip
strings/ctype-sjis.c:
  Compare strings with space extend instead of space strip
strings/ctype-tis620.c:
  Added comments that we should fix end space handling
strings/ctype-ucs2.c:
  indentation fixes
strings/ctype-utf8.c:
  Added comments that we should fix end space handling
strings/ctype-win1250ch.c:
  Added comments that we should fix end space handling
2004-03-25 15:05:01 +02:00

526 lines
12 KiB
C

/* Copyright (C) 2000 MySQL AB
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
#include <my_global.h>
#include "m_ctype.h"
#include "m_string.h"
#ifdef USE_MB
void my_caseup_str_mb(CHARSET_INFO * cs, char *str)
{
register uint32 l;
register char *end=str+strlen(str); /* BAR TODO: remove strlen() call */
register uchar *map=cs->to_upper;
while (*str)
{
if ((l=my_ismbchar(cs, str,end)))
str+=l;
else
{
*str=(char) map[(uchar)*str];
str++;
}
}
}
void my_casedn_str_mb(CHARSET_INFO * cs, char *str)
{
register uint32 l;
register char *end=str+strlen(str);
register uchar *map=cs->to_lower;
while (*str)
{
if ((l=my_ismbchar(cs, str,end)))
str+=l;
else
{
*str=(char) map[(uchar)*str];
str++;
}
}
}
void my_caseup_mb(CHARSET_INFO * cs, char *str, uint length)
{
register uint32 l;
register char *end=str+length;
register uchar *map=cs->to_upper;
while (str<end)
{
if ((l=my_ismbchar(cs, str,end)))
str+=l;
else
{
*str=(char) map[(uchar)*str];
str++;
}
}
}
void my_casedn_mb(CHARSET_INFO * cs, char *str, uint length)
{
register uint32 l;
register char *end=str+length;
register uchar *map=cs->to_lower;
while (str<end)
{
if ((l=my_ismbchar(cs, str,end)))
str+=l;
else
{
*str=(char) map[(uchar)*str];
str++;
}
}
}
int my_strcasecmp_mb(CHARSET_INFO * cs,const char *s, const char *t)
{
register uint32 l;
register const char *end=s+strlen(s);
register uchar *map=cs->to_upper;
while (s<end)
{
if ((l=my_ismbchar(cs, s,end)))
{
while (l--)
if (*s++ != *t++)
return 1;
}
else if (my_mbcharlen(cs, *t) > 1)
return 1;
else if (map[(uchar) *s++] != map[(uchar) *t++])
return 1;
}
return *t;
}
/*
** Compare string against string with wildcard
** 0 if matched
** -1 if not matched with wildcard
** 1 if matched with wildcard
*/
#define INC_PTR(cs,A,B) A+=((use_mb_flag && \
my_ismbchar(cs,A,B)) ? my_ismbchar(cs,A,B) : 1)
#define likeconv(s,A) (uchar) (s)->sort_order[(uchar) (A)]
int my_wildcmp_mb(CHARSET_INFO *cs,
const char *str,const char *str_end,
const char *wildstr,const char *wildend,
int escape, int w_one, int w_many)
{
int result= -1; /* Not found, using wildcards */
bool use_mb_flag=use_mb(cs);
while (wildstr != wildend)
{
while (*wildstr != w_many && *wildstr != w_one)
{
int l;
if (*wildstr == escape && wildstr+1 != wildend)
wildstr++;
if (use_mb_flag &&
(l = my_ismbchar(cs, wildstr, wildend)))
{
if (str+l > str_end || memcmp(str, wildstr, l) != 0)
return 1;
str += l;
wildstr += l;
}
else
if (str == str_end || likeconv(cs,*wildstr++) != likeconv(cs,*str++))
return(1); /* No match */
if (wildstr == wildend)
return (str != str_end); /* Match if both are at end */
result=1; /* Found an anchor char */
}
if (*wildstr == w_one)
{
do
{
if (str == str_end) /* Skip one char if possible */
return (result);
INC_PTR(cs,str,str_end);
} while (++wildstr < wildend && *wildstr == w_one);
if (wildstr == wildend)
break;
}
if (*wildstr == w_many)
{ /* Found w_many */
uchar cmp;
const char* mb = wildstr;
int mblen=0;
wildstr++;
/* Remove any '%' and '_' from the wild search string */
for (; wildstr != wildend ; wildstr++)
{
if (*wildstr == w_many)
continue;
if (*wildstr == w_one)
{
if (str == str_end)
return (-1);
INC_PTR(cs,str,str_end);
continue;
}
break; /* Not a wild character */
}
if (wildstr == wildend)
return(0); /* Ok if w_many is last */
if (str == str_end)
return -1;
if ((cmp= *wildstr) == escape && wildstr+1 != wildend)
cmp= *++wildstr;
mb=wildstr;
LINT_INIT(mblen);
if (use_mb_flag)
mblen = my_ismbchar(cs, wildstr, wildend);
INC_PTR(cs,wildstr,wildend); /* This is compared trough cmp */
cmp=likeconv(cs,cmp);
do
{
if (use_mb_flag)
{
for (;;)
{
if (str >= str_end)
return -1;
if (mblen)
{
if (str+mblen <= str_end && memcmp(str, mb, mblen) == 0)
{
str += mblen;
break;
}
}
else if (!my_ismbchar(cs, str, str_end) &&
likeconv(cs,*str) == cmp)
{
str++;
break;
}
INC_PTR(cs,str, str_end);
}
}
else
{
while (str != str_end && likeconv(cs,*str) != cmp)
str++;
if (str++ == str_end) return (-1);
}
{
int tmp=my_wildcmp_mb(cs,str,str_end,wildstr,wildend,escape,w_one,w_many);
if (tmp <= 0)
return (tmp);
}
} while (str != str_end && wildstr[0] != w_many);
return(-1);
}
}
return (str != str_end ? 1 : 0);
}
uint my_numchars_mb(CHARSET_INFO *cs __attribute__((unused)),
const char *b, const char *e)
{
register uint32 n=0,mblen;
while (b < e)
{
b+= (mblen= my_ismbchar(cs,b,e)) ? mblen : 1;
++n;
}
return n;
}
uint my_charpos_mb(CHARSET_INFO *cs __attribute__((unused)),
const char *b, const char *e, uint pos)
{
uint mblen;
const char *b0=b;
while (pos && b<e)
{
b+= (mblen= my_ismbchar(cs,b,e)) ? mblen : 1;
pos--;
}
return pos ? e+2-b0 : b-b0;
}
uint my_well_formed_len_mb(CHARSET_INFO *cs,
const char *b, const char *e, uint pos)
{
my_wc_t wc;
int mblen;
const char *b_start= b;
while (pos)
{
if ((mblen= cs->cset->mb_wc(cs, &wc, b, e)) <0)
break;
b+= mblen;
pos--;
}
return b - b_start;
}
uint my_instr_mb(CHARSET_INFO *cs,
const char *b, uint b_length,
const char *s, uint s_length,
my_match_t *match, uint nmatch)
{
register const char *end, *b0;
int res= 0;
if (s_length <= b_length)
{
if (!s_length)
{
if (nmatch)
{
match->beg= 0;
match->end= 0;
match->mblen= 0;
}
return 1; /* Empty string is always found */
}
b0= b;
end= b+b_length-s_length+1;
while (b < end)
{
int mblen;
if (!cs->coll->strnncoll(cs, (unsigned char*) b, s_length,
(unsigned char*) s, s_length))
{
if (nmatch)
{
match[0].beg= 0;
match[0].end= b-b0;
match[0].mblen= res;
if (nmatch > 1)
{
match[1].beg= match[0].end;
match[1].end= match[0].end+s_length;
match[1].mblen= 0; /* Not computed */
}
}
return 2;
}
mblen= (mblen= my_ismbchar(cs, b, end)) ? mblen : 1;
b+= mblen;
b_length-= mblen;
res++;
}
}
return 0;
}
/* BINARY collations handlers for MB charsets */
static int my_strnncoll_mb_bin(CHARSET_INFO * cs __attribute__((unused)),
const uchar *s, uint slen,
const uchar *t, uint tlen)
{
int cmp= memcmp(s,t,min(slen,tlen));
return cmp ? cmp : (int) (slen - tlen);
}
static int my_strnxfrm_mb_bin(CHARSET_INFO *cs __attribute__((unused)),
uchar * dest, uint len,
const uchar *src,
uint srclen __attribute__((unused)))
{
if (dest != src)
memcpy(dest,src,len= min(len,srclen));
return len;
}
static int my_strcasecmp_mb_bin(CHARSET_INFO * cs __attribute__((unused)),
const char *s, const char *t)
{
return strcmp(s,t);
}
static void my_hash_sort_mb_bin(CHARSET_INFO *cs __attribute__((unused)),
const uchar *key, uint len,ulong *nr1, ulong *nr2)
{
const uchar *pos = key;
key+= len;
for (; pos < (uchar*) key ; pos++)
{
nr1[0]^=(ulong) ((((uint) nr1[0] & 63)+nr2[0]) *
((uint)*pos)) + (nr1[0] << 8);
nr2[0]+=3;
}
}
static int my_wildcmp_mb_bin(CHARSET_INFO *cs,
const char *str,const char *str_end,
const char *wildstr,const char *wildend,
int escape, int w_one, int w_many)
{
int result= -1; /* Not found, using wildcards */
bool use_mb_flag=use_mb(cs);
while (wildstr != wildend)
{
while (*wildstr != w_many && *wildstr != w_one)
{
int l;
if (*wildstr == escape && wildstr+1 != wildend)
wildstr++;
if (use_mb_flag &&
(l = my_ismbchar(cs, wildstr, wildend)))
{
if (str+l > str_end || memcmp(str, wildstr, l) != 0)
return 1;
str += l;
wildstr += l;
}
else
if (str == str_end || *wildstr++ != *str++)
return(1); /* No match */
if (wildstr == wildend)
return (str != str_end); /* Match if both are at end */
result=1; /* Found an anchor char */
}
if (*wildstr == w_one)
{
do
{
if (str == str_end) /* Skip one char if possible */
return (result);
INC_PTR(cs,str,str_end);
} while (++wildstr < wildend && *wildstr == w_one);
if (wildstr == wildend)
break;
}
if (*wildstr == w_many)
{ /* Found w_many */
uchar cmp;
const char* mb = wildstr;
int mblen=0;
wildstr++;
/* Remove any '%' and '_' from the wild search string */
for (; wildstr != wildend ; wildstr++)
{
if (*wildstr == w_many)
continue;
if (*wildstr == w_one)
{
if (str == str_end)
return (-1);
INC_PTR(cs,str,str_end);
continue;
}
break; /* Not a wild character */
}
if (wildstr == wildend)
return(0); /* Ok if w_many is last */
if (str == str_end)
return -1;
if ((cmp= *wildstr) == escape && wildstr+1 != wildend)
cmp= *++wildstr;
mb=wildstr;
LINT_INIT(mblen);
if (use_mb_flag)
mblen = my_ismbchar(cs, wildstr, wildend);
INC_PTR(cs,wildstr,wildend); /* This is compared trough cmp */
do
{
if (use_mb_flag)
{
for (;;)
{
if (str >= str_end)
return -1;
if (mblen)
{
if (str+mblen <= str_end && memcmp(str, mb, mblen) == 0)
{
str += mblen;
break;
}
}
else if (!my_ismbchar(cs, str, str_end) && *str == cmp)
{
str++;
break;
}
INC_PTR(cs,str, str_end);
}
}
else
{
while (str != str_end && *str != cmp)
str++;
if (str++ == str_end) return (-1);
}
{
int tmp=my_wildcmp_mb(cs,str,str_end,wildstr,wildend,escape,w_one,w_many);
if (tmp <= 0)
return (tmp);
}
} while (str != str_end && wildstr[0] != w_many);
return(-1);
}
}
return (str != str_end ? 1 : 0);
}
MY_COLLATION_HANDLER my_collation_mb_bin_handler =
{
my_strnncoll_mb_bin,
my_strnncoll_mb_bin,
my_strnxfrm_mb_bin,
my_like_range_simple,
my_wildcmp_mb_bin,
my_strcasecmp_mb_bin,
my_instr_mb,
my_hash_sort_mb_bin
};
#endif