mirror of
https://github.com/MariaDB/server.git
synced 2025-01-26 00:34:18 +01:00
UCS-2 aligning 0xAA -> 0x00AA
This commit is contained in:
parent
d89fd8281b
commit
162f1dc5e6
21 changed files with 148 additions and 4 deletions
|
@ -206,8 +206,9 @@ typedef struct charset_info_st
|
|||
uchar state_map[256];
|
||||
uchar ident_map[256];
|
||||
uint strxfrm_multiply;
|
||||
uint mbminlen;
|
||||
uint mbmaxlen;
|
||||
char max_sort_char; /* For LIKE optimization */
|
||||
char max_sort_char; /* For LIKE optimization */
|
||||
|
||||
MY_CHARSET_HANDLER *cset;
|
||||
MY_COLLATION_HANDLER *coll;
|
||||
|
|
|
@ -276,3 +276,51 @@ aardvara
|
|||
aardvark
|
||||
aardvarz
|
||||
DROP TABLE t1;
|
||||
SELECT HEX(_ucs2 0x0);
|
||||
HEX(_ucs2 0x0)
|
||||
0000
|
||||
SELECT HEX(_ucs2 0x01);
|
||||
HEX(_ucs2 0x01)
|
||||
0001
|
||||
SELECT HEX(_ucs2 0x012);
|
||||
HEX(_ucs2 0x012)
|
||||
0012
|
||||
SELECT HEX(_ucs2 0x0123);
|
||||
HEX(_ucs2 0x0123)
|
||||
0123
|
||||
SELECT HEX(_ucs2 0x01234);
|
||||
HEX(_ucs2 0x01234)
|
||||
00001234
|
||||
SELECT HEX(_ucs2 0x012345);
|
||||
HEX(_ucs2 0x012345)
|
||||
00012345
|
||||
SELECT HEX(_ucs2 0x0123456);
|
||||
HEX(_ucs2 0x0123456)
|
||||
00123456
|
||||
SELECT HEX(_ucs2 0x01234567);
|
||||
HEX(_ucs2 0x01234567)
|
||||
01234567
|
||||
SELECT HEX(_ucs2 0x012345678);
|
||||
HEX(_ucs2 0x012345678)
|
||||
000012345678
|
||||
SELECT HEX(_ucs2 0x0123456789);
|
||||
HEX(_ucs2 0x0123456789)
|
||||
000123456789
|
||||
SELECT HEX(_ucs2 0x0123456789A);
|
||||
HEX(_ucs2 0x0123456789A)
|
||||
00123456789A
|
||||
SELECT HEX(_ucs2 0x0123456789AB);
|
||||
HEX(_ucs2 0x0123456789AB)
|
||||
0123456789AB
|
||||
SELECT HEX(_ucs2 0x0123456789ABC);
|
||||
HEX(_ucs2 0x0123456789ABC)
|
||||
0000123456789ABC
|
||||
SELECT HEX(_ucs2 0x0123456789ABCD);
|
||||
HEX(_ucs2 0x0123456789ABCD)
|
||||
000123456789ABCD
|
||||
SELECT HEX(_ucs2 0x0123456789ABCDE);
|
||||
HEX(_ucs2 0x0123456789ABCDE)
|
||||
00123456789ABCDE
|
||||
SELECT HEX(_ucs2 0x0123456789ABCDEF);
|
||||
HEX(_ucs2 0x0123456789ABCDEF)
|
||||
0123456789ABCDEF
|
||||
|
|
|
@ -197,3 +197,24 @@ DROP TABLE t1;
|
|||
# END OF Bug 1264 test
|
||||
#
|
||||
########################################################
|
||||
|
||||
|
||||
# Bug #2390
|
||||
# Check alignment
|
||||
#
|
||||
SELECT HEX(_ucs2 0x0);
|
||||
SELECT HEX(_ucs2 0x01);
|
||||
SELECT HEX(_ucs2 0x012);
|
||||
SELECT HEX(_ucs2 0x0123);
|
||||
SELECT HEX(_ucs2 0x01234);
|
||||
SELECT HEX(_ucs2 0x012345);
|
||||
SELECT HEX(_ucs2 0x0123456);
|
||||
SELECT HEX(_ucs2 0x01234567);
|
||||
SELECT HEX(_ucs2 0x012345678);
|
||||
SELECT HEX(_ucs2 0x0123456789);
|
||||
SELECT HEX(_ucs2 0x0123456789A);
|
||||
SELECT HEX(_ucs2 0x0123456789AB);
|
||||
SELECT HEX(_ucs2 0x0123456789ABC);
|
||||
SELECT HEX(_ucs2 0x0123456789ABCD);
|
||||
SELECT HEX(_ucs2 0x0123456789ABCDE);
|
||||
SELECT HEX(_ucs2 0x0123456789ABCDEF);
|
||||
|
|
|
@ -131,7 +131,8 @@ static void simple_cs_init_functions(CHARSET_INFO *cs)
|
|||
cs->coll= &my_collation_8bit_simple_ci_handler;
|
||||
|
||||
cs->cset= &my_charset_8bit_handler;
|
||||
cs->mbmaxlen = 1;
|
||||
cs->mbminlen= 1;
|
||||
cs->mbmaxlen= 1;
|
||||
}
|
||||
|
||||
|
||||
|
@ -273,6 +274,7 @@ static int simple_cs_copy_data(CHARSET_INFO *to, CHARSET_INFO *from)
|
|||
if (create_fromuni(to))
|
||||
goto err;
|
||||
}
|
||||
to->mbminlen= 1;
|
||||
to->mbmaxlen= 1;
|
||||
|
||||
return 0;
|
||||
|
|
|
@ -477,7 +477,7 @@ public:
|
|||
CHARSET_INFO *cs, Derivation dv= DERIVATION_COERCIBLE)
|
||||
{
|
||||
collation.set(cs, dv);
|
||||
str_value.set(str,length,cs);
|
||||
str_value.set_or_copy_aligned(str,length,cs);
|
||||
/*
|
||||
We have to have a different max_length than 'length' here to
|
||||
ensure that we get the right length if we do use the item
|
||||
|
@ -493,7 +493,7 @@ public:
|
|||
CHARSET_INFO *cs, Derivation dv= DERIVATION_COERCIBLE)
|
||||
{
|
||||
collation.set(cs, dv);
|
||||
str_value.set(str,length,cs);
|
||||
str_value.set_or_copy_aligned(str,length,cs);
|
||||
max_length= str_value.numchars()*cs->mbmaxlen;
|
||||
set_name(name_par,0,cs);
|
||||
decimals=NOT_FIXED_DEC;
|
||||
|
|
|
@ -228,6 +228,52 @@ bool String::copy(const char *str,uint32 arg_length, CHARSET_INFO *cs)
|
|||
return FALSE;
|
||||
}
|
||||
|
||||
/*
|
||||
** For real multi-byte, ascii incompatible charactser sets,
|
||||
** like UCS-2, add leading zeros if we have an incomplete character.
|
||||
** Thus,
|
||||
** SELECT _ucs2 0xAA
|
||||
** will automatically be converted into
|
||||
** SELECT _ucs2 0x00AA
|
||||
*/
|
||||
|
||||
bool String::set_or_copy_aligned(const char *str,uint32 arg_length,
|
||||
CHARSET_INFO *cs)
|
||||
{
|
||||
/* How many bytes are in incomplete character */
|
||||
uint32 offs= (arg_length % cs->mbminlen);
|
||||
|
||||
if (!offs) /* All characters are complete, just copy */
|
||||
{
|
||||
set(str, arg_length, cs);
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
offs= cs->mbmaxlen - offs; /* How many zeros we should prepend */
|
||||
uint32 aligned_length= arg_length + offs;
|
||||
if (alloc(aligned_length))
|
||||
return TRUE;
|
||||
|
||||
/*
|
||||
Probably this condition is not really necessary
|
||||
because if aligned_length is 0 then offs is 0 too
|
||||
and we'll return after calling set().
|
||||
*/
|
||||
if ((str_length= aligned_length))
|
||||
{
|
||||
/*
|
||||
Note, this is only safe for little-endian UCS-2.
|
||||
If we add big-endian UCS-2 sometimes, this code
|
||||
will be more complicated. But it's OK for now.
|
||||
*/
|
||||
bzero((char*)Ptr, offs);
|
||||
memcpy(Ptr + offs, str, arg_length);
|
||||
}
|
||||
Ptr[aligned_length]=0;
|
||||
str_charset=cs;
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
/* Copy with charset convertion */
|
||||
|
||||
bool String::copy(const char *str, uint32 arg_length,
|
||||
|
|
|
@ -183,6 +183,7 @@ public:
|
|||
bool copy(); // Alloc string if not alloced
|
||||
bool copy(const String &s); // Allocate new string
|
||||
bool copy(const char *s,uint32 arg_length, CHARSET_INFO *cs); // Allocate new string
|
||||
bool set_or_copy_aligned(const char *s, uint32 arg_length, CHARSET_INFO *cs);
|
||||
bool copy(const char*s,uint32 arg_length, CHARSET_INFO *csfrom,
|
||||
CHARSET_INFO *csto);
|
||||
bool append(const String &s);
|
||||
|
|
|
@ -6281,6 +6281,7 @@ CHARSET_INFO my_charset_big5_chinese_ci=
|
|||
"",
|
||||
"",
|
||||
1, /* strxfrm_multiply */
|
||||
1, /* mbminlen */
|
||||
2, /* mbmaxlen */
|
||||
0,
|
||||
&my_charset_big5_handler,
|
||||
|
@ -6304,6 +6305,7 @@ CHARSET_INFO my_charset_big5_bin=
|
|||
"",
|
||||
"",
|
||||
1, /* strxfrm_multiply */
|
||||
1, /* mbminlen */
|
||||
2, /* mbmaxlen */
|
||||
0,
|
||||
&my_charset_big5_handler,
|
||||
|
|
|
@ -381,6 +381,7 @@ CHARSET_INFO my_charset_bin =
|
|||
NULL, /* tab_from_uni */
|
||||
"","",
|
||||
1, /* strxfrm_multiply */
|
||||
1, /* mbminlen */
|
||||
1, /* mbmaxlen */
|
||||
(char) 255, /* max_sort_char */
|
||||
&my_charset_handler,
|
||||
|
|
|
@ -631,6 +631,7 @@ CHARSET_INFO my_charset_latin2_czech_ci =
|
|||
idx_uni_8859_2, /* tab_from_uni */
|
||||
"","",
|
||||
4, /* strxfrm_multiply */
|
||||
1, /* mbminlen */
|
||||
1, /* mbmaxlen */
|
||||
0,
|
||||
&my_charset_8bit_handler,
|
||||
|
|
|
@ -8689,6 +8689,7 @@ CHARSET_INFO my_charset_euckr_korean_ci=
|
|||
"",
|
||||
"",
|
||||
1, /* strxfrm_multiply */
|
||||
1, /* mbminlen */
|
||||
2, /* mbmaxlen */
|
||||
0,
|
||||
&my_charset_handler,
|
||||
|
@ -8712,6 +8713,7 @@ CHARSET_INFO my_charset_euckr_bin=
|
|||
"",
|
||||
"",
|
||||
1, /* strxfrm_multiply */
|
||||
1, /* mbminlen */
|
||||
2, /* mbmaxlen */
|
||||
0,
|
||||
&my_charset_handler,
|
||||
|
|
|
@ -34,6 +34,7 @@ CHARSET_INFO compiled_charsets[] = {
|
|||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
NULL,
|
||||
NULL
|
||||
}
|
||||
|
|
|
@ -5740,6 +5740,7 @@ CHARSET_INFO my_charset_gb2312_chinese_ci=
|
|||
"",
|
||||
"",
|
||||
1, /* strxfrm_multiply */
|
||||
1, /* mbminlen */
|
||||
2, /* mbmaxlen */
|
||||
0,
|
||||
&my_charset_handler,
|
||||
|
@ -5762,6 +5763,7 @@ CHARSET_INFO my_charset_gb2312_bin=
|
|||
"",
|
||||
"",
|
||||
1, /* strxfrm_multiply */
|
||||
1, /* mbminlen */
|
||||
2, /* mbmaxlen */
|
||||
0,
|
||||
&my_charset_handler,
|
||||
|
|
|
@ -9936,6 +9936,7 @@ CHARSET_INFO my_charset_gbk_chinese_ci=
|
|||
"",
|
||||
"",
|
||||
1, /* strxfrm_multiply */
|
||||
1, /* mbminlen */
|
||||
2, /* mbmaxlen */
|
||||
0,
|
||||
&my_charset_handler,
|
||||
|
@ -9958,6 +9959,7 @@ CHARSET_INFO my_charset_gbk_bin=
|
|||
"",
|
||||
"",
|
||||
1, /* strxfrm_multiply */
|
||||
1, /* mbminlen */
|
||||
2, /* mbmaxlen */
|
||||
0,
|
||||
&my_charset_handler,
|
||||
|
|
|
@ -215,6 +215,7 @@ CHARSET_INFO my_charset_latin1=
|
|||
NULL, /* tab_from_uni */
|
||||
"","",
|
||||
1, /* strxfrm_multiply */
|
||||
1, /* mbminlen */
|
||||
1, /* mbmaxlen */
|
||||
0,
|
||||
&my_charset_handler,
|
||||
|
@ -410,6 +411,7 @@ CHARSET_INFO my_charset_latin1_german2_ci=
|
|||
NULL, /* tab_from_uni */
|
||||
"","",
|
||||
2, /* strxfrm_multiply */
|
||||
1, /* mbminlen */
|
||||
1, /* mbmaxlen */
|
||||
0,
|
||||
&my_charset_handler,
|
||||
|
@ -433,6 +435,7 @@ CHARSET_INFO my_charset_latin1_bin=
|
|||
"",
|
||||
"",
|
||||
1, /* strxfrm_multiply */
|
||||
1, /* mbminlen */
|
||||
1, /* mbmaxlen */
|
||||
0,
|
||||
&my_charset_handler,
|
||||
|
|
|
@ -4525,6 +4525,7 @@ CHARSET_INFO my_charset_sjis_japanese_ci=
|
|||
"",
|
||||
"",
|
||||
1, /* strxfrm_multiply */
|
||||
1, /* mbminlen */
|
||||
2, /* mbmaxlen */
|
||||
0,
|
||||
&my_charset_handler,
|
||||
|
@ -4547,6 +4548,7 @@ CHARSET_INFO my_charset_sjis_bin=
|
|||
"",
|
||||
"",
|
||||
1, /* strxfrm_multiply */
|
||||
1, /* mbminlen */
|
||||
2, /* mbmaxlen */
|
||||
0,
|
||||
&my_charset_handler,
|
||||
|
|
|
@ -956,6 +956,7 @@ CHARSET_INFO my_charset_tis620_thai_ci=
|
|||
"",
|
||||
"",
|
||||
4, /* strxfrm_multiply */
|
||||
1, /* mbminlen */
|
||||
1, /* mbmaxlen */
|
||||
0,
|
||||
&my_charset_handler,
|
||||
|
@ -978,6 +979,7 @@ CHARSET_INFO my_charset_tis620_bin=
|
|||
"",
|
||||
"",
|
||||
1, /* strxfrm_multiply */
|
||||
1, /* mbminlen */
|
||||
1, /* mbmaxlen */
|
||||
0,
|
||||
&my_charset_handler,
|
||||
|
|
|
@ -1322,6 +1322,7 @@ CHARSET_INFO my_charset_ucs2_general_ci=
|
|||
"",
|
||||
"",
|
||||
1, /* strxfrm_multiply */
|
||||
2, /* mbminlen */
|
||||
2, /* mbmaxlen */
|
||||
0,
|
||||
&my_charset_ucs2_handler,
|
||||
|
@ -1345,6 +1346,7 @@ CHARSET_INFO my_charset_ucs2_bin=
|
|||
"",
|
||||
"",
|
||||
1, /* strxfrm_multiply */
|
||||
2, /* mbminlen */
|
||||
2, /* mbmaxlen */
|
||||
0,
|
||||
&my_charset_ucs2_handler,
|
||||
|
|
|
@ -8480,6 +8480,7 @@ CHARSET_INFO my_charset_ujis_japanese_ci=
|
|||
NULL, /* tab_from_uni */
|
||||
"","",
|
||||
1, /* strxfrm_multiply */
|
||||
1, /* mbminlen */
|
||||
3, /* mbmaxlen */
|
||||
0,
|
||||
&my_charset_handler,
|
||||
|
@ -8502,6 +8503,7 @@ CHARSET_INFO my_charset_ujis_bin=
|
|||
NULL, /* tab_from_uni */
|
||||
"","",
|
||||
1, /* strxfrm_multiply */
|
||||
1, /* mbminlen */
|
||||
3, /* mbmaxlen */
|
||||
0,
|
||||
&my_charset_handler,
|
||||
|
|
|
@ -2006,6 +2006,7 @@ CHARSET_INFO my_charset_utf8_general_ci=
|
|||
"",
|
||||
"",
|
||||
1, /* strxfrm_multiply */
|
||||
1, /* mbminlen */
|
||||
3, /* mbmaxlen */
|
||||
0,
|
||||
&my_charset_handler,
|
||||
|
@ -2029,6 +2030,7 @@ CHARSET_INFO my_charset_utf8_bin=
|
|||
"",
|
||||
"",
|
||||
1, /* strxfrm_multiply */
|
||||
1, /* mbminlen */
|
||||
3, /* mbmaxlen */
|
||||
0,
|
||||
&my_charset_handler,
|
||||
|
|
|
@ -671,6 +671,7 @@ CHARSET_INFO my_charset_cp1250_czech_ci =
|
|||
idx_uni_cp1250, /* tab_from_uni */
|
||||
"","",
|
||||
2, /* strxfrm_multiply */
|
||||
1, /* mbminlen */
|
||||
1, /* mbmaxlen */
|
||||
0,
|
||||
&my_charset_8bit_handler,
|
||||
|
|
Loading…
Add table
Reference in a new issue