UCS-2 aligning 0xAA -> 0x00AA

This commit is contained in:
unknown 2004-01-19 19:16:30 +04:00
parent d89fd8281b
commit 162f1dc5e6
21 changed files with 148 additions and 4 deletions

View file

@ -206,8 +206,9 @@ typedef struct charset_info_st
uchar state_map[256]; uchar state_map[256];
uchar ident_map[256]; uchar ident_map[256];
uint strxfrm_multiply; uint strxfrm_multiply;
uint mbminlen;
uint mbmaxlen; uint mbmaxlen;
char max_sort_char; /* For LIKE optimization */ char max_sort_char; /* For LIKE optimization */
MY_CHARSET_HANDLER *cset; MY_CHARSET_HANDLER *cset;
MY_COLLATION_HANDLER *coll; MY_COLLATION_HANDLER *coll;

View file

@ -276,3 +276,51 @@ aardvara
aardvark aardvark
aardvarz aardvarz
DROP TABLE t1; DROP TABLE t1;
SELECT HEX(_ucs2 0x0);
HEX(_ucs2 0x0)
0000
SELECT HEX(_ucs2 0x01);
HEX(_ucs2 0x01)
0001
SELECT HEX(_ucs2 0x012);
HEX(_ucs2 0x012)
0012
SELECT HEX(_ucs2 0x0123);
HEX(_ucs2 0x0123)
0123
SELECT HEX(_ucs2 0x01234);
HEX(_ucs2 0x01234)
00001234
SELECT HEX(_ucs2 0x012345);
HEX(_ucs2 0x012345)
00012345
SELECT HEX(_ucs2 0x0123456);
HEX(_ucs2 0x0123456)
00123456
SELECT HEX(_ucs2 0x01234567);
HEX(_ucs2 0x01234567)
01234567
SELECT HEX(_ucs2 0x012345678);
HEX(_ucs2 0x012345678)
000012345678
SELECT HEX(_ucs2 0x0123456789);
HEX(_ucs2 0x0123456789)
000123456789
SELECT HEX(_ucs2 0x0123456789A);
HEX(_ucs2 0x0123456789A)
00123456789A
SELECT HEX(_ucs2 0x0123456789AB);
HEX(_ucs2 0x0123456789AB)
0123456789AB
SELECT HEX(_ucs2 0x0123456789ABC);
HEX(_ucs2 0x0123456789ABC)
0000123456789ABC
SELECT HEX(_ucs2 0x0123456789ABCD);
HEX(_ucs2 0x0123456789ABCD)
000123456789ABCD
SELECT HEX(_ucs2 0x0123456789ABCDE);
HEX(_ucs2 0x0123456789ABCDE)
00123456789ABCDE
SELECT HEX(_ucs2 0x0123456789ABCDEF);
HEX(_ucs2 0x0123456789ABCDEF)
0123456789ABCDEF

View file

@ -197,3 +197,24 @@ DROP TABLE t1;
# END OF Bug 1264 test # END OF Bug 1264 test
# #
######################################################## ########################################################
# Bug #2390
# Check alignment
#
SELECT HEX(_ucs2 0x0);
SELECT HEX(_ucs2 0x01);
SELECT HEX(_ucs2 0x012);
SELECT HEX(_ucs2 0x0123);
SELECT HEX(_ucs2 0x01234);
SELECT HEX(_ucs2 0x012345);
SELECT HEX(_ucs2 0x0123456);
SELECT HEX(_ucs2 0x01234567);
SELECT HEX(_ucs2 0x012345678);
SELECT HEX(_ucs2 0x0123456789);
SELECT HEX(_ucs2 0x0123456789A);
SELECT HEX(_ucs2 0x0123456789AB);
SELECT HEX(_ucs2 0x0123456789ABC);
SELECT HEX(_ucs2 0x0123456789ABCD);
SELECT HEX(_ucs2 0x0123456789ABCDE);
SELECT HEX(_ucs2 0x0123456789ABCDEF);

View file

@ -131,7 +131,8 @@ static void simple_cs_init_functions(CHARSET_INFO *cs)
cs->coll= &my_collation_8bit_simple_ci_handler; cs->coll= &my_collation_8bit_simple_ci_handler;
cs->cset= &my_charset_8bit_handler; cs->cset= &my_charset_8bit_handler;
cs->mbmaxlen = 1; cs->mbminlen= 1;
cs->mbmaxlen= 1;
} }
@ -273,6 +274,7 @@ static int simple_cs_copy_data(CHARSET_INFO *to, CHARSET_INFO *from)
if (create_fromuni(to)) if (create_fromuni(to))
goto err; goto err;
} }
to->mbminlen= 1;
to->mbmaxlen= 1; to->mbmaxlen= 1;
return 0; return 0;

View file

@ -477,7 +477,7 @@ public:
CHARSET_INFO *cs, Derivation dv= DERIVATION_COERCIBLE) CHARSET_INFO *cs, Derivation dv= DERIVATION_COERCIBLE)
{ {
collation.set(cs, dv); collation.set(cs, dv);
str_value.set(str,length,cs); str_value.set_or_copy_aligned(str,length,cs);
/* /*
We have to have a different max_length than 'length' here to We have to have a different max_length than 'length' here to
ensure that we get the right length if we do use the item ensure that we get the right length if we do use the item
@ -493,7 +493,7 @@ public:
CHARSET_INFO *cs, Derivation dv= DERIVATION_COERCIBLE) CHARSET_INFO *cs, Derivation dv= DERIVATION_COERCIBLE)
{ {
collation.set(cs, dv); collation.set(cs, dv);
str_value.set(str,length,cs); str_value.set_or_copy_aligned(str,length,cs);
max_length= str_value.numchars()*cs->mbmaxlen; max_length= str_value.numchars()*cs->mbmaxlen;
set_name(name_par,0,cs); set_name(name_par,0,cs);
decimals=NOT_FIXED_DEC; decimals=NOT_FIXED_DEC;

View file

@ -228,6 +228,52 @@ bool String::copy(const char *str,uint32 arg_length, CHARSET_INFO *cs)
return FALSE; return FALSE;
} }
/*
** For real multi-byte, ascii incompatible charactser sets,
** like UCS-2, add leading zeros if we have an incomplete character.
** Thus,
** SELECT _ucs2 0xAA
** will automatically be converted into
** SELECT _ucs2 0x00AA
*/
bool String::set_or_copy_aligned(const char *str,uint32 arg_length,
CHARSET_INFO *cs)
{
/* How many bytes are in incomplete character */
uint32 offs= (arg_length % cs->mbminlen);
if (!offs) /* All characters are complete, just copy */
{
set(str, arg_length, cs);
return FALSE;
}
offs= cs->mbmaxlen - offs; /* How many zeros we should prepend */
uint32 aligned_length= arg_length + offs;
if (alloc(aligned_length))
return TRUE;
/*
Probably this condition is not really necessary
because if aligned_length is 0 then offs is 0 too
and we'll return after calling set().
*/
if ((str_length= aligned_length))
{
/*
Note, this is only safe for little-endian UCS-2.
If we add big-endian UCS-2 sometimes, this code
will be more complicated. But it's OK for now.
*/
bzero((char*)Ptr, offs);
memcpy(Ptr + offs, str, arg_length);
}
Ptr[aligned_length]=0;
str_charset=cs;
return FALSE;
}
/* Copy with charset convertion */ /* Copy with charset convertion */
bool String::copy(const char *str, uint32 arg_length, bool String::copy(const char *str, uint32 arg_length,

View file

@ -183,6 +183,7 @@ public:
bool copy(); // Alloc string if not alloced bool copy(); // Alloc string if not alloced
bool copy(const String &s); // Allocate new string bool copy(const String &s); // Allocate new string
bool copy(const char *s,uint32 arg_length, CHARSET_INFO *cs); // Allocate new string bool copy(const char *s,uint32 arg_length, CHARSET_INFO *cs); // Allocate new string
bool set_or_copy_aligned(const char *s, uint32 arg_length, CHARSET_INFO *cs);
bool copy(const char*s,uint32 arg_length, CHARSET_INFO *csfrom, bool copy(const char*s,uint32 arg_length, CHARSET_INFO *csfrom,
CHARSET_INFO *csto); CHARSET_INFO *csto);
bool append(const String &s); bool append(const String &s);

View file

@ -6281,6 +6281,7 @@ CHARSET_INFO my_charset_big5_chinese_ci=
"", "",
"", "",
1, /* strxfrm_multiply */ 1, /* strxfrm_multiply */
1, /* mbminlen */
2, /* mbmaxlen */ 2, /* mbmaxlen */
0, 0,
&my_charset_big5_handler, &my_charset_big5_handler,
@ -6304,6 +6305,7 @@ CHARSET_INFO my_charset_big5_bin=
"", "",
"", "",
1, /* strxfrm_multiply */ 1, /* strxfrm_multiply */
1, /* mbminlen */
2, /* mbmaxlen */ 2, /* mbmaxlen */
0, 0,
&my_charset_big5_handler, &my_charset_big5_handler,

View file

@ -381,6 +381,7 @@ CHARSET_INFO my_charset_bin =
NULL, /* tab_from_uni */ NULL, /* tab_from_uni */
"","", "","",
1, /* strxfrm_multiply */ 1, /* strxfrm_multiply */
1, /* mbminlen */
1, /* mbmaxlen */ 1, /* mbmaxlen */
(char) 255, /* max_sort_char */ (char) 255, /* max_sort_char */
&my_charset_handler, &my_charset_handler,

View file

@ -631,6 +631,7 @@ CHARSET_INFO my_charset_latin2_czech_ci =
idx_uni_8859_2, /* tab_from_uni */ idx_uni_8859_2, /* tab_from_uni */
"","", "","",
4, /* strxfrm_multiply */ 4, /* strxfrm_multiply */
1, /* mbminlen */
1, /* mbmaxlen */ 1, /* mbmaxlen */
0, 0,
&my_charset_8bit_handler, &my_charset_8bit_handler,

View file

@ -8689,6 +8689,7 @@ CHARSET_INFO my_charset_euckr_korean_ci=
"", "",
"", "",
1, /* strxfrm_multiply */ 1, /* strxfrm_multiply */
1, /* mbminlen */
2, /* mbmaxlen */ 2, /* mbmaxlen */
0, 0,
&my_charset_handler, &my_charset_handler,
@ -8712,6 +8713,7 @@ CHARSET_INFO my_charset_euckr_bin=
"", "",
"", "",
1, /* strxfrm_multiply */ 1, /* strxfrm_multiply */
1, /* mbminlen */
2, /* mbmaxlen */ 2, /* mbmaxlen */
0, 0,
&my_charset_handler, &my_charset_handler,

View file

@ -34,6 +34,7 @@ CHARSET_INFO compiled_charsets[] = {
0, 0,
0, 0,
0, 0,
0,
NULL, NULL,
NULL NULL
} }

View file

@ -5740,6 +5740,7 @@ CHARSET_INFO my_charset_gb2312_chinese_ci=
"", "",
"", "",
1, /* strxfrm_multiply */ 1, /* strxfrm_multiply */
1, /* mbminlen */
2, /* mbmaxlen */ 2, /* mbmaxlen */
0, 0,
&my_charset_handler, &my_charset_handler,
@ -5762,6 +5763,7 @@ CHARSET_INFO my_charset_gb2312_bin=
"", "",
"", "",
1, /* strxfrm_multiply */ 1, /* strxfrm_multiply */
1, /* mbminlen */
2, /* mbmaxlen */ 2, /* mbmaxlen */
0, 0,
&my_charset_handler, &my_charset_handler,

View file

@ -9936,6 +9936,7 @@ CHARSET_INFO my_charset_gbk_chinese_ci=
"", "",
"", "",
1, /* strxfrm_multiply */ 1, /* strxfrm_multiply */
1, /* mbminlen */
2, /* mbmaxlen */ 2, /* mbmaxlen */
0, 0,
&my_charset_handler, &my_charset_handler,
@ -9958,6 +9959,7 @@ CHARSET_INFO my_charset_gbk_bin=
"", "",
"", "",
1, /* strxfrm_multiply */ 1, /* strxfrm_multiply */
1, /* mbminlen */
2, /* mbmaxlen */ 2, /* mbmaxlen */
0, 0,
&my_charset_handler, &my_charset_handler,

View file

@ -215,6 +215,7 @@ CHARSET_INFO my_charset_latin1=
NULL, /* tab_from_uni */ NULL, /* tab_from_uni */
"","", "","",
1, /* strxfrm_multiply */ 1, /* strxfrm_multiply */
1, /* mbminlen */
1, /* mbmaxlen */ 1, /* mbmaxlen */
0, 0,
&my_charset_handler, &my_charset_handler,
@ -410,6 +411,7 @@ CHARSET_INFO my_charset_latin1_german2_ci=
NULL, /* tab_from_uni */ NULL, /* tab_from_uni */
"","", "","",
2, /* strxfrm_multiply */ 2, /* strxfrm_multiply */
1, /* mbminlen */
1, /* mbmaxlen */ 1, /* mbmaxlen */
0, 0,
&my_charset_handler, &my_charset_handler,
@ -433,6 +435,7 @@ CHARSET_INFO my_charset_latin1_bin=
"", "",
"", "",
1, /* strxfrm_multiply */ 1, /* strxfrm_multiply */
1, /* mbminlen */
1, /* mbmaxlen */ 1, /* mbmaxlen */
0, 0,
&my_charset_handler, &my_charset_handler,

View file

@ -4525,6 +4525,7 @@ CHARSET_INFO my_charset_sjis_japanese_ci=
"", "",
"", "",
1, /* strxfrm_multiply */ 1, /* strxfrm_multiply */
1, /* mbminlen */
2, /* mbmaxlen */ 2, /* mbmaxlen */
0, 0,
&my_charset_handler, &my_charset_handler,
@ -4547,6 +4548,7 @@ CHARSET_INFO my_charset_sjis_bin=
"", "",
"", "",
1, /* strxfrm_multiply */ 1, /* strxfrm_multiply */
1, /* mbminlen */
2, /* mbmaxlen */ 2, /* mbmaxlen */
0, 0,
&my_charset_handler, &my_charset_handler,

View file

@ -956,6 +956,7 @@ CHARSET_INFO my_charset_tis620_thai_ci=
"", "",
"", "",
4, /* strxfrm_multiply */ 4, /* strxfrm_multiply */
1, /* mbminlen */
1, /* mbmaxlen */ 1, /* mbmaxlen */
0, 0,
&my_charset_handler, &my_charset_handler,
@ -978,6 +979,7 @@ CHARSET_INFO my_charset_tis620_bin=
"", "",
"", "",
1, /* strxfrm_multiply */ 1, /* strxfrm_multiply */
1, /* mbminlen */
1, /* mbmaxlen */ 1, /* mbmaxlen */
0, 0,
&my_charset_handler, &my_charset_handler,

View file

@ -1322,6 +1322,7 @@ CHARSET_INFO my_charset_ucs2_general_ci=
"", "",
"", "",
1, /* strxfrm_multiply */ 1, /* strxfrm_multiply */
2, /* mbminlen */
2, /* mbmaxlen */ 2, /* mbmaxlen */
0, 0,
&my_charset_ucs2_handler, &my_charset_ucs2_handler,
@ -1345,6 +1346,7 @@ CHARSET_INFO my_charset_ucs2_bin=
"", "",
"", "",
1, /* strxfrm_multiply */ 1, /* strxfrm_multiply */
2, /* mbminlen */
2, /* mbmaxlen */ 2, /* mbmaxlen */
0, 0,
&my_charset_ucs2_handler, &my_charset_ucs2_handler,

View file

@ -8480,6 +8480,7 @@ CHARSET_INFO my_charset_ujis_japanese_ci=
NULL, /* tab_from_uni */ NULL, /* tab_from_uni */
"","", "","",
1, /* strxfrm_multiply */ 1, /* strxfrm_multiply */
1, /* mbminlen */
3, /* mbmaxlen */ 3, /* mbmaxlen */
0, 0,
&my_charset_handler, &my_charset_handler,
@ -8502,6 +8503,7 @@ CHARSET_INFO my_charset_ujis_bin=
NULL, /* tab_from_uni */ NULL, /* tab_from_uni */
"","", "","",
1, /* strxfrm_multiply */ 1, /* strxfrm_multiply */
1, /* mbminlen */
3, /* mbmaxlen */ 3, /* mbmaxlen */
0, 0,
&my_charset_handler, &my_charset_handler,

View file

@ -2006,6 +2006,7 @@ CHARSET_INFO my_charset_utf8_general_ci=
"", "",
"", "",
1, /* strxfrm_multiply */ 1, /* strxfrm_multiply */
1, /* mbminlen */
3, /* mbmaxlen */ 3, /* mbmaxlen */
0, 0,
&my_charset_handler, &my_charset_handler,
@ -2029,6 +2030,7 @@ CHARSET_INFO my_charset_utf8_bin=
"", "",
"", "",
1, /* strxfrm_multiply */ 1, /* strxfrm_multiply */
1, /* mbminlen */
3, /* mbmaxlen */ 3, /* mbmaxlen */
0, 0,
&my_charset_handler, &my_charset_handler,

View file

@ -671,6 +671,7 @@ CHARSET_INFO my_charset_cp1250_czech_ci =
idx_uni_cp1250, /* tab_from_uni */ idx_uni_cp1250, /* tab_from_uni */
"","", "","",
2, /* strxfrm_multiply */ 2, /* strxfrm_multiply */
1, /* mbminlen */
1, /* mbmaxlen */ 1, /* mbmaxlen */
0, 0,
&my_charset_8bit_handler, &my_charset_8bit_handler,