UCS-2 aligning 0xAA -> 0x00AA

This commit is contained in:
unknown 2004-01-19 19:16:30 +04:00
parent d89fd8281b
commit 162f1dc5e6
21 changed files with 148 additions and 4 deletions

View file

@ -206,8 +206,9 @@ typedef struct charset_info_st
uchar state_map[256];
uchar ident_map[256];
uint strxfrm_multiply;
uint mbminlen;
uint mbmaxlen;
char max_sort_char; /* For LIKE optimization */
char max_sort_char; /* For LIKE optimization */
MY_CHARSET_HANDLER *cset;
MY_COLLATION_HANDLER *coll;

View file

@ -276,3 +276,51 @@ aardvara
aardvark
aardvarz
DROP TABLE t1;
SELECT HEX(_ucs2 0x0);
HEX(_ucs2 0x0)
0000
SELECT HEX(_ucs2 0x01);
HEX(_ucs2 0x01)
0001
SELECT HEX(_ucs2 0x012);
HEX(_ucs2 0x012)
0012
SELECT HEX(_ucs2 0x0123);
HEX(_ucs2 0x0123)
0123
SELECT HEX(_ucs2 0x01234);
HEX(_ucs2 0x01234)
00001234
SELECT HEX(_ucs2 0x012345);
HEX(_ucs2 0x012345)
00012345
SELECT HEX(_ucs2 0x0123456);
HEX(_ucs2 0x0123456)
00123456
SELECT HEX(_ucs2 0x01234567);
HEX(_ucs2 0x01234567)
01234567
SELECT HEX(_ucs2 0x012345678);
HEX(_ucs2 0x012345678)
000012345678
SELECT HEX(_ucs2 0x0123456789);
HEX(_ucs2 0x0123456789)
000123456789
SELECT HEX(_ucs2 0x0123456789A);
HEX(_ucs2 0x0123456789A)
00123456789A
SELECT HEX(_ucs2 0x0123456789AB);
HEX(_ucs2 0x0123456789AB)
0123456789AB
SELECT HEX(_ucs2 0x0123456789ABC);
HEX(_ucs2 0x0123456789ABC)
0000123456789ABC
SELECT HEX(_ucs2 0x0123456789ABCD);
HEX(_ucs2 0x0123456789ABCD)
000123456789ABCD
SELECT HEX(_ucs2 0x0123456789ABCDE);
HEX(_ucs2 0x0123456789ABCDE)
00123456789ABCDE
SELECT HEX(_ucs2 0x0123456789ABCDEF);
HEX(_ucs2 0x0123456789ABCDEF)
0123456789ABCDEF

View file

@ -197,3 +197,24 @@ DROP TABLE t1;
# END OF Bug 1264 test
#
########################################################
# Bug #2390
# Check alignment
#
SELECT HEX(_ucs2 0x0);
SELECT HEX(_ucs2 0x01);
SELECT HEX(_ucs2 0x012);
SELECT HEX(_ucs2 0x0123);
SELECT HEX(_ucs2 0x01234);
SELECT HEX(_ucs2 0x012345);
SELECT HEX(_ucs2 0x0123456);
SELECT HEX(_ucs2 0x01234567);
SELECT HEX(_ucs2 0x012345678);
SELECT HEX(_ucs2 0x0123456789);
SELECT HEX(_ucs2 0x0123456789A);
SELECT HEX(_ucs2 0x0123456789AB);
SELECT HEX(_ucs2 0x0123456789ABC);
SELECT HEX(_ucs2 0x0123456789ABCD);
SELECT HEX(_ucs2 0x0123456789ABCDE);
SELECT HEX(_ucs2 0x0123456789ABCDEF);

View file

@ -131,7 +131,8 @@ static void simple_cs_init_functions(CHARSET_INFO *cs)
cs->coll= &my_collation_8bit_simple_ci_handler;
cs->cset= &my_charset_8bit_handler;
cs->mbmaxlen = 1;
cs->mbminlen= 1;
cs->mbmaxlen= 1;
}
@ -273,6 +274,7 @@ static int simple_cs_copy_data(CHARSET_INFO *to, CHARSET_INFO *from)
if (create_fromuni(to))
goto err;
}
to->mbminlen= 1;
to->mbmaxlen= 1;
return 0;

View file

@ -477,7 +477,7 @@ public:
CHARSET_INFO *cs, Derivation dv= DERIVATION_COERCIBLE)
{
collation.set(cs, dv);
str_value.set(str,length,cs);
str_value.set_or_copy_aligned(str,length,cs);
/*
We have to have a different max_length than 'length' here to
ensure that we get the right length if we do use the item
@ -493,7 +493,7 @@ public:
CHARSET_INFO *cs, Derivation dv= DERIVATION_COERCIBLE)
{
collation.set(cs, dv);
str_value.set(str,length,cs);
str_value.set_or_copy_aligned(str,length,cs);
max_length= str_value.numchars()*cs->mbmaxlen;
set_name(name_par,0,cs);
decimals=NOT_FIXED_DEC;

View file

@ -228,6 +228,52 @@ bool String::copy(const char *str,uint32 arg_length, CHARSET_INFO *cs)
return FALSE;
}
/*
** For real multi-byte, ascii incompatible charactser sets,
** like UCS-2, add leading zeros if we have an incomplete character.
** Thus,
** SELECT _ucs2 0xAA
** will automatically be converted into
** SELECT _ucs2 0x00AA
*/
bool String::set_or_copy_aligned(const char *str,uint32 arg_length,
CHARSET_INFO *cs)
{
/* How many bytes are in incomplete character */
uint32 offs= (arg_length % cs->mbminlen);
if (!offs) /* All characters are complete, just copy */
{
set(str, arg_length, cs);
return FALSE;
}
offs= cs->mbmaxlen - offs; /* How many zeros we should prepend */
uint32 aligned_length= arg_length + offs;
if (alloc(aligned_length))
return TRUE;
/*
Probably this condition is not really necessary
because if aligned_length is 0 then offs is 0 too
and we'll return after calling set().
*/
if ((str_length= aligned_length))
{
/*
Note, this is only safe for little-endian UCS-2.
If we add big-endian UCS-2 sometimes, this code
will be more complicated. But it's OK for now.
*/
bzero((char*)Ptr, offs);
memcpy(Ptr + offs, str, arg_length);
}
Ptr[aligned_length]=0;
str_charset=cs;
return FALSE;
}
/* Copy with charset convertion */
bool String::copy(const char *str, uint32 arg_length,

View file

@ -183,6 +183,7 @@ public:
bool copy(); // Alloc string if not alloced
bool copy(const String &s); // Allocate new string
bool copy(const char *s,uint32 arg_length, CHARSET_INFO *cs); // Allocate new string
bool set_or_copy_aligned(const char *s, uint32 arg_length, CHARSET_INFO *cs);
bool copy(const char*s,uint32 arg_length, CHARSET_INFO *csfrom,
CHARSET_INFO *csto);
bool append(const String &s);

View file

@ -6281,6 +6281,7 @@ CHARSET_INFO my_charset_big5_chinese_ci=
"",
"",
1, /* strxfrm_multiply */
1, /* mbminlen */
2, /* mbmaxlen */
0,
&my_charset_big5_handler,
@ -6304,6 +6305,7 @@ CHARSET_INFO my_charset_big5_bin=
"",
"",
1, /* strxfrm_multiply */
1, /* mbminlen */
2, /* mbmaxlen */
0,
&my_charset_big5_handler,

View file

@ -381,6 +381,7 @@ CHARSET_INFO my_charset_bin =
NULL, /* tab_from_uni */
"","",
1, /* strxfrm_multiply */
1, /* mbminlen */
1, /* mbmaxlen */
(char) 255, /* max_sort_char */
&my_charset_handler,

View file

@ -631,6 +631,7 @@ CHARSET_INFO my_charset_latin2_czech_ci =
idx_uni_8859_2, /* tab_from_uni */
"","",
4, /* strxfrm_multiply */
1, /* mbminlen */
1, /* mbmaxlen */
0,
&my_charset_8bit_handler,

View file

@ -8689,6 +8689,7 @@ CHARSET_INFO my_charset_euckr_korean_ci=
"",
"",
1, /* strxfrm_multiply */
1, /* mbminlen */
2, /* mbmaxlen */
0,
&my_charset_handler,
@ -8712,6 +8713,7 @@ CHARSET_INFO my_charset_euckr_bin=
"",
"",
1, /* strxfrm_multiply */
1, /* mbminlen */
2, /* mbmaxlen */
0,
&my_charset_handler,

View file

@ -34,6 +34,7 @@ CHARSET_INFO compiled_charsets[] = {
0,
0,
0,
0,
NULL,
NULL
}

View file

@ -5740,6 +5740,7 @@ CHARSET_INFO my_charset_gb2312_chinese_ci=
"",
"",
1, /* strxfrm_multiply */
1, /* mbminlen */
2, /* mbmaxlen */
0,
&my_charset_handler,
@ -5762,6 +5763,7 @@ CHARSET_INFO my_charset_gb2312_bin=
"",
"",
1, /* strxfrm_multiply */
1, /* mbminlen */
2, /* mbmaxlen */
0,
&my_charset_handler,

View file

@ -9936,6 +9936,7 @@ CHARSET_INFO my_charset_gbk_chinese_ci=
"",
"",
1, /* strxfrm_multiply */
1, /* mbminlen */
2, /* mbmaxlen */
0,
&my_charset_handler,
@ -9958,6 +9959,7 @@ CHARSET_INFO my_charset_gbk_bin=
"",
"",
1, /* strxfrm_multiply */
1, /* mbminlen */
2, /* mbmaxlen */
0,
&my_charset_handler,

View file

@ -215,6 +215,7 @@ CHARSET_INFO my_charset_latin1=
NULL, /* tab_from_uni */
"","",
1, /* strxfrm_multiply */
1, /* mbminlen */
1, /* mbmaxlen */
0,
&my_charset_handler,
@ -410,6 +411,7 @@ CHARSET_INFO my_charset_latin1_german2_ci=
NULL, /* tab_from_uni */
"","",
2, /* strxfrm_multiply */
1, /* mbminlen */
1, /* mbmaxlen */
0,
&my_charset_handler,
@ -433,6 +435,7 @@ CHARSET_INFO my_charset_latin1_bin=
"",
"",
1, /* strxfrm_multiply */
1, /* mbminlen */
1, /* mbmaxlen */
0,
&my_charset_handler,

View file

@ -4525,6 +4525,7 @@ CHARSET_INFO my_charset_sjis_japanese_ci=
"",
"",
1, /* strxfrm_multiply */
1, /* mbminlen */
2, /* mbmaxlen */
0,
&my_charset_handler,
@ -4547,6 +4548,7 @@ CHARSET_INFO my_charset_sjis_bin=
"",
"",
1, /* strxfrm_multiply */
1, /* mbminlen */
2, /* mbmaxlen */
0,
&my_charset_handler,

View file

@ -956,6 +956,7 @@ CHARSET_INFO my_charset_tis620_thai_ci=
"",
"",
4, /* strxfrm_multiply */
1, /* mbminlen */
1, /* mbmaxlen */
0,
&my_charset_handler,
@ -978,6 +979,7 @@ CHARSET_INFO my_charset_tis620_bin=
"",
"",
1, /* strxfrm_multiply */
1, /* mbminlen */
1, /* mbmaxlen */
0,
&my_charset_handler,

View file

@ -1322,6 +1322,7 @@ CHARSET_INFO my_charset_ucs2_general_ci=
"",
"",
1, /* strxfrm_multiply */
2, /* mbminlen */
2, /* mbmaxlen */
0,
&my_charset_ucs2_handler,
@ -1345,6 +1346,7 @@ CHARSET_INFO my_charset_ucs2_bin=
"",
"",
1, /* strxfrm_multiply */
2, /* mbminlen */
2, /* mbmaxlen */
0,
&my_charset_ucs2_handler,

View file

@ -8480,6 +8480,7 @@ CHARSET_INFO my_charset_ujis_japanese_ci=
NULL, /* tab_from_uni */
"","",
1, /* strxfrm_multiply */
1, /* mbminlen */
3, /* mbmaxlen */
0,
&my_charset_handler,
@ -8502,6 +8503,7 @@ CHARSET_INFO my_charset_ujis_bin=
NULL, /* tab_from_uni */
"","",
1, /* strxfrm_multiply */
1, /* mbminlen */
3, /* mbmaxlen */
0,
&my_charset_handler,

View file

@ -2006,6 +2006,7 @@ CHARSET_INFO my_charset_utf8_general_ci=
"",
"",
1, /* strxfrm_multiply */
1, /* mbminlen */
3, /* mbmaxlen */
0,
&my_charset_handler,
@ -2029,6 +2030,7 @@ CHARSET_INFO my_charset_utf8_bin=
"",
"",
1, /* strxfrm_multiply */
1, /* mbminlen */
3, /* mbmaxlen */
0,
&my_charset_handler,

View file

@ -671,6 +671,7 @@ CHARSET_INFO my_charset_cp1250_czech_ci =
idx_uni_cp1250, /* tab_from_uni */
"","",
2, /* strxfrm_multiply */
1, /* mbminlen */
1, /* mbmaxlen */
0,
&my_charset_8bit_handler,