UCS2 charset has been added

This commit is contained in:
unknown 2002-06-17 22:43:40 +05:00
parent 16c2fdffc0
commit 2ec3617476
6 changed files with 384 additions and 2 deletions

View file

@ -94,6 +94,7 @@
#undef HAVE_CHARSET_sjis
#undef HAVE_CHARSET_swe7
#undef HAVE_CHARSET_tis620
#undef HAVE_CHARSET_ucs2
#undef HAVE_CHARSET_ujis
#undef HAVE_CHARSET_usa7
#undef HAVE_CHARSET_utf8

View file

@ -1942,10 +1942,10 @@ AC_DIVERT_PUSH(0)
CHARSETS_AVAILABLE="armscii8 big5 cp1251 cp1257
croat czech danish dec8 dos estonia euc_kr gb2312 gbk
german1 greek hebrew hp8 hungarian koi8_ru koi8_ukr
latin1 latin1_de latin2 latin5 sjis swe7 tis620 ujis
latin1 latin1_de latin2 latin5 sjis swe7 tis620 ucs2 ujis
usa7 utf8 win1250 win1250ch win1251ukr"
CHARSETS_DEPRECATED="win1251"
CHARSETS_COMPLEX="big5 czech euc_kr gb2312 gbk latin1_de sjis tis620 ujis utf8 win1250ch"
CHARSETS_COMPLEX="big5 czech euc_kr gb2312 gbk latin1_de sjis tis620 ucs2 ujis utf8 win1250ch"
DEFAULT_CHARSET=latin1
AC_DIVERT_POP
@ -2067,6 +2067,10 @@ do
tis620)
AC_DEFINE(HAVE_CHARSET_tis620)
;;
ucs2)
AC_DEFINE(HAVE_CHARSET_ucs2)
use_mb="yes"
;;
ujis)
AC_DEFINE(HAVE_CHARSET_ujis)
use_mb="yes"

View file

@ -287,6 +287,40 @@ void my_hash_sort_utf8(struct charset_info_st *cs, const uchar *key, uint len, u
#endif
#ifdef HAVE_CHARSET_ucs2
extern uchar ctype_ucs2[];
extern uchar to_lower_ucs2[];
extern uchar to_upper_ucs2[];
int my_strnncoll_ucs2(CHARSET_INFO *cs,
const uchar *s, uint s_len, const uchar *t, uint t_len);
int my_strnxfrm_ucs2(CHARSET_INFO *cs,
uchar *dest, uint destlen, const uchar *src, uint srclen);
int my_ismbchar_ucs2(CHARSET_INFO *cs, const char *b, const char *e);
my_bool my_ismbhead_ucs2(CHARSET_INFO * cs, uint ch);
int my_mbcharlen_ucs2(CHARSET_INFO *cs, uint c);
void my_caseup_str_ucs2(CHARSET_INFO * cs, char * s);
void my_casedn_str_ucs2(CHARSET_INFO *cs, char * s);
void my_caseup_ucs2(CHARSET_INFO *cs, char *s, uint len);
void my_casedn_ucs2(CHARSET_INFO *cs, char *s, uint len);
int my_strcasecmp_ucs2(CHARSET_INFO *cs, const char *s, const char *t);
int my_strncasecmp_ucs2(CHARSET_INFO *cs, const char *s,const char *t,uint l);
int my_ucs2_uni (CHARSET_INFO *cs, my_wc_t *p, const uchar *s, const uchar *e);
int my_uni_ucs2 (CHARSET_INFO *cs, my_wc_t wc, uchar *b, uchar *e);
uint my_hash_caseup_ucs2(struct charset_info_st *cs, const byte *key, uint len);
void my_hash_sort_ucs2(struct charset_info_st *cs, const uchar *key, uint len, ulong *nr1, ulong *nr2);
#endif
#define _U 01 /* Upper case */
#define _L 02 /* Lower case */
#define _NMR 04 /* Numeral (digit) */

View file

@ -39,3 +39,4 @@ latin1_de 31
armscii8 32
utf8 33
win1250ch 34
ucs2 35

View file

@ -25,6 +25,10 @@
#define HAVE_UNIDATA
#endif
#ifdef HAVE_CHARSET_ucs2
#define HAVE_UNIDATA
#endif
#ifdef HAVE_UNIDATA
static MY_UNICASE_INFO plane00[]={
@ -1999,4 +2003,309 @@ int main()
#endif
#endif /* HAVE_CHARSET_UTF8 */
#ifdef HAVE_CHARSET_ucs2
uchar ctype_ucs2[] = {
0,
32, 32, 32, 32, 32, 32, 32, 32, 32, 40, 40, 40, 40, 40, 32, 32,
32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
72, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
132,132,132,132,132,132,132,132,132,132, 16, 16, 16, 16, 16, 16,
16,129,129,129,129,129,129, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 16, 16, 16, 16, 16,
16,130,130,130,130,130,130, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 16, 16, 16, 16, 32,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
};
uchar to_lower_ucs2[] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
64, 97, 98, 99,100,101,102,103,104,105,106,107,108,109,110,111,
112,113,114,115,116,117,118,119,120,121,122, 91, 92, 93, 94, 95,
96, 97, 98, 99,100,101,102,103,104,105,106,107,108,109,110,111,
112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,
160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,
176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,
208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,
224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,
240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255
};
uchar to_upper_ucs2[] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
96, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90,123,124,125,126,127,
128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,
160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,
176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,
208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,
224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,
240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255
};
int my_ucs2_uni (CHARSET_INFO *cs __attribute__((unused)) ,
my_wc_t * pwc, const uchar *s, const uchar *e)
{
if (s+2 > e) /* Need 2 characters */
return MY_CS_ILSEQ;
*pwc= ((unsigned char)s[0]) * 256 + ((unsigned char)s[1]);
return 2;
}
int my_uni_ucs2 (CHARSET_INFO *cs __attribute__((unused)) ,
my_wc_t wc, uchar *r, uchar *e)
{
if ( r+2 > e )
return MY_CS_TOOSMALL;
r[0]=wc >> 8;
r[1]=wc & 0xFF;
return 2;
}
void my_caseup_ucs2(CHARSET_INFO *cs, char *s, uint slen)
{
my_wc_t wc;
int res;
char *e=s+slen;
while ((s < e) && (res=my_ucs2_uni(cs,&wc, (uchar *)s, (uchar*)e))>0 )
{
int plane = (wc>>8) & 0xFF;
wc = uni_plane[plane] ? uni_plane[plane][wc & 0xFF].toupper : wc;
if (res != my_uni_ucs2(cs,wc,(uchar*)s,(uchar*)e))
break;
s+=res;
}
}
uint my_hash_caseup_ucs2(CHARSET_INFO *cs, const byte *s, uint slen)
{
my_wc_t wc;
register uint nr=1, nr2=4;
int res;
const char *e=s+slen;
while ((s < e) && (res=my_ucs2_uni(cs,&wc, (uchar *)s, (uchar*)e))>0 )
{
int plane = (wc>>8) & 0xFF;
wc = uni_plane[plane] ? uni_plane[plane][wc & 0xFF].toupper : wc;
nr^= (((nr & 63)+nr2)*(wc & 0xFF))+ (nr << 8);
nr2+=3;
nr^= (((nr & 63)+nr2)*(wc >> 8))+ (nr << 8);
nr2+=3;
s+=res;
}
return nr;
}
void my_hash_sort_ucs2(CHARSET_INFO *cs, const uchar *s, uint slen, ulong *n1, ulong *n2)
{
my_wc_t wc;
int res;
const uchar *e=s+slen;
while ((s < e) && (res=my_ucs2_uni(cs,&wc, (uchar *)s, (uchar*)e))>0 )
{
int plane = (wc>>8) & 0xFF;
wc = uni_plane[plane] ? uni_plane[plane][wc & 0xFF].sort : wc;
n1[0]^= (((n1[0] & 63)+n2[0])*(wc & 0xFF))+ (n1[0] << 8);
n2[0]+=3;
n1[0]^= (((n1[0] & 63)+n2[0])*(wc >> 8))+ (n1[0] << 8);
n2[0]+=3;
s+=res;
}
}
void my_caseup_str_ucs2(CHARSET_INFO * cs __attribute__((unused)),
char * s __attribute__((unused)))
{
}
void my_casedn_ucs2(CHARSET_INFO *cs, char *s, uint slen)
{
my_wc_t wc;
int res;
char *e=s+slen;
while ((s < e) && (res=my_ucs2_uni(cs, &wc, (uchar*)s, (uchar*)e))>0)
{
int plane = (wc>>8) & 0xFF;
wc = uni_plane[plane] ? uni_plane[plane][wc & 0xFF].tolower : wc;
if (res != my_uni_ucs2(cs, wc, (uchar*)s, (uchar*)e))
{
break;
}
s+=res;
}
}
void my_casedn_str_ucs2(CHARSET_INFO *cs __attribute__((unused)),
char * s __attribute__((unused)))
{
}
int my_strnncoll_ucs2(CHARSET_INFO *cs,
const uchar *s, uint slen, const uchar *t, uint tlen)
{
int s_res,t_res;
my_wc_t s_wc,t_wc;
const uchar *se=s+slen;
const uchar *te=t+tlen;
while ( s < se && t < te )
{
int plane;
s_res=my_ucs2_uni(cs,&s_wc, s, se);
t_res=my_ucs2_uni(cs,&t_wc, t, te);
if ( s_res <= 0 || t_res <= 0 )
{
/* Incorrect string, compare by char value */
return ((int)s[0]-(int)t[0]);
}
plane=(s_wc>>8) & 0xFF;
s_wc = uni_plane[plane] ? uni_plane[plane][s_wc & 0xFF].sort : s_wc;
plane=(t_wc>>8) & 0xFF;
t_wc = uni_plane[plane] ? uni_plane[plane][t_wc & 0xFF].sort : t_wc;
if ( s_wc != t_wc )
{
return ((int) s_wc) - ((int) t_wc);
}
s+=s_res;
t+=t_res;
}
return ( (se-s) - (te-t) );
}
int my_strncasecmp_ucs2(CHARSET_INFO *cs,
const char *s, const char *t, uint len)
{
int s_res,t_res;
my_wc_t s_wc,t_wc;
const char *se=s+len;
const char *te=t+len;
while ( s < se && t < te )
{
int plane;
s_res=my_ucs2_uni(cs,&s_wc, (const uchar*)s, (const uchar*)se);
t_res=my_ucs2_uni(cs,&t_wc, (const uchar*)t, (const uchar*)te);
if ( s_res <= 0 || t_res <= 0 )
{
/* Incorrect string, compare by char value */
return ((int)s[0]-(int)t[0]);
}
plane=(s_wc>>8) & 0xFF;
s_wc = uni_plane[plane] ? uni_plane[plane][s_wc & 0xFF].tolower : s_wc;
plane=(t_wc>>8) & 0xFF;
t_wc = uni_plane[plane] ? uni_plane[plane][t_wc & 0xFF].tolower : t_wc;
if ( s_wc != t_wc )
return ((int) s_wc) - ((int) t_wc);
s+=s_res;
t+=t_res;
}
return ( (se-s) - (te-t) );
}
int my_strcasecmp_ucs2(CHARSET_INFO *cs, const char *s, const char *t)
{
uint s_len=strlen(s);
uint t_len=strlen(t);
uint len = (s_len > t_len) ? s_len : t_len;
return my_strncasecmp_ucs2(cs, s, t, len);
}
int my_strnxfrm_ucs2(CHARSET_INFO *cs,
uchar *dst, uint dstlen, const uchar *src, uint srclen)
{
my_wc_t wc;
int res;
int plane;
uchar *de = dst + dstlen;
const uchar *se = src + srclen;
const uchar *dst_orig = dst;
while( src < se && dst < de )
{
if ((res=my_ucs2_uni(cs,&wc, src, se))<0)
{
break;
}
src+=res;
srclen-=res;
plane=(wc>>8) & 0xFF;
wc = uni_plane[plane] ? uni_plane[plane][wc & 0xFF].sort : wc;
if ((res=my_uni_ucs2(cs,wc,dst,de)) <0)
{
break;
}
dst+=res;
}
return dst - dst_orig;
}
int my_ismbchar_ucs2(CHARSET_INFO *cs __attribute__((unused)),
const char *b __attribute__((unused)),
const char *e __attribute__((unused)))
{
return 2;
}
my_bool my_ismbhead_ucs2(CHARSET_INFO *cs __attribute__((unused)) ,
uint ch __attribute__((unused)))
{
return 1;
}
int my_mbcharlen_ucs2(CHARSET_INFO *cs __attribute__((unused)) ,
uint c __attribute__((unused)))
{
return 2;
}
#endif

View file

@ -3635,6 +3635,39 @@ CHARSET_INFO compiled_charsets[] = {
},
#endif
#ifdef HAVE_CHARSET_ucs2
{
35, /* number */
"ucs2", /* name */
ctype_ucs2, /* ctype */
to_lower_ucs2, /* to_lower */
to_upper_ucs2, /* to_upper */
to_upper_ucs2, /* sort_order */
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
1, /* strxfrm_multiply */
my_strnncoll_ucs2, /* strnncoll */
my_strnxfrm_ucs2, /* strnxfrm */
NULL, /* like_range */
2, /* mbmaxlen */
my_ismbchar_ucs2, /* ismbchar */
my_ismbhead_ucs2, /* ismbhead */
my_mbcharlen_ucs2, /* mbcharlen */
my_ucs2_uni, /* mb_wc */
my_uni_ucs2, /* wc_mb */
my_caseup_str_ucs2,
my_casedn_str_ucs2,
my_caseup_ucs2,
my_casedn_ucs2,
my_strcasecmp_ucs2,
my_strncasecmp_ucs2,
my_hash_caseup_ucs2,/* hash_caseup */
my_hash_sort_ucs2, /* hash_sort */
0
},
#endif
#ifdef HAVE_CHARSET_ujis
{
12, /* number */