mirror of
https://github.com/MariaDB/server.git
synced 2025-01-16 12:02:42 +01:00
Bug#30315 Character sets: insertion of euckr code value 0xa141 fails
Problem: some valid euc-kr characters were rejected because condition checking multi-byte tail didn't allow multi-byte characters having the second byte in the ranges [0x41..0x5A] and [0x61..0x7A]. Fix: allow these byte ranges for mb tails mysql-test/r/ctype_euckr.result: Adding tests mysql-test/t/ctype_euckr.test: Adding tests strings/ctype-euc_kr.c: Fixing wrong tail character pattern
This commit is contained in:
parent
5418752d5b
commit
1ff1ff869c
3 changed files with 87 additions and 4 deletions
|
@ -165,3 +165,44 @@ hex(a)
|
|||
A2E6
|
||||
FEF7
|
||||
DROP TABLE t1;
|
||||
create table t1 (s1 varchar(5) character set euckr);
|
||||
insert into t1 values (0xA141);
|
||||
insert into t1 values (0xA15A);
|
||||
insert into t1 values (0xA161);
|
||||
insert into t1 values (0xA17A);
|
||||
insert into t1 values (0xA181);
|
||||
insert into t1 values (0xA1FE);
|
||||
insert into t1 values (0xA140);
|
||||
Warnings:
|
||||
Warning 1366 Incorrect string value: '\xA1@' for column 's1' at row 1
|
||||
insert into t1 values (0xA15B);
|
||||
Warnings:
|
||||
Warning 1366 Incorrect string value: '\xA1[' for column 's1' at row 1
|
||||
insert into t1 values (0xA160);
|
||||
Warnings:
|
||||
Warning 1366 Incorrect string value: '\xA1`' for column 's1' at row 1
|
||||
insert into t1 values (0xA17B);
|
||||
Warnings:
|
||||
Warning 1366 Incorrect string value: '\xA1{' for column 's1' at row 1
|
||||
insert into t1 values (0xA180);
|
||||
Warnings:
|
||||
Warning 1366 Incorrect string value: '\xA1\x80' for column 's1' at row 1
|
||||
insert into t1 values (0xA1FF);
|
||||
Warnings:
|
||||
Warning 1366 Incorrect string value: '\xA1\xFF' for column 's1' at row 1
|
||||
select hex(s1), hex(convert(s1 using utf8)) from t1 order by binary s1;
|
||||
hex(s1) hex(convert(s1 using utf8))
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
A141 ECA2A5
|
||||
A15A ECA381
|
||||
A161 ECA382
|
||||
A17A ECA3A5
|
||||
A181 ECA3A6
|
||||
A1FE EFBFA2
|
||||
drop table t1;
|
||||
End of 5.0 tests
|
||||
|
|
|
@ -31,3 +31,26 @@ SELECT hex(a) FROM t1 ORDER BY a;
|
|||
DROP TABLE t1;
|
||||
|
||||
# End of 4.1 tests
|
||||
|
||||
#
|
||||
#Bug #30315 Character sets: insertion of euckr code value 0xa141 fails
|
||||
#
|
||||
create table t1 (s1 varchar(5) character set euckr);
|
||||
# Insert some valid characters
|
||||
insert into t1 values (0xA141);
|
||||
insert into t1 values (0xA15A);
|
||||
insert into t1 values (0xA161);
|
||||
insert into t1 values (0xA17A);
|
||||
insert into t1 values (0xA181);
|
||||
insert into t1 values (0xA1FE);
|
||||
# Insert some invalid characters
|
||||
insert into t1 values (0xA140);
|
||||
insert into t1 values (0xA15B);
|
||||
insert into t1 values (0xA160);
|
||||
insert into t1 values (0xA17B);
|
||||
insert into t1 values (0xA180);
|
||||
insert into t1 values (0xA1FF);
|
||||
select hex(s1), hex(convert(s1 using utf8)) from t1 order by binary s1;
|
||||
drop table t1;
|
||||
|
||||
--echo End of 5.0 tests
|
||||
|
|
|
@ -179,20 +179,39 @@ static uchar NEAR sort_order_euc_kr[]=
|
|||
|
||||
/* Support for Korean(EUC_KR) characters, by powerm90@tinc.co.kr and mrpark@tinc.co.kr */
|
||||
|
||||
#define iseuc_kr(c) ((0xa1<=(uchar)(c) && (uchar)(c)<=0xfe))
|
||||
/*
|
||||
Unicode mapping is done according to:
|
||||
ftp://ftp.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/KSC/KSC5601.TXT
|
||||
|
||||
Valid multi-byte characters:
|
||||
|
||||
[A1..FE][41..5A,61..7A,81..FE]
|
||||
|
||||
Note, 0x5C is not a valid MB tail,
|
||||
so escape_with_backslash_is_dangerous is not set.
|
||||
*/
|
||||
|
||||
#define iseuc_kr_head(c) ((0xa1<=(uchar)(c) && (uchar)(c)<=0xfe))
|
||||
|
||||
#define iseuc_kr_tail1(c) ((uchar) (c) >= 0x41 && (uchar) (c) <= 0x5A)
|
||||
#define iseuc_kr_tail2(c) ((uchar) (c) >= 0x61 && (uchar) (c) <= 0x7A)
|
||||
#define iseuc_kr_tail3(c) ((uchar) (c) >= 0x81 && (uchar) (c) <= 0xFE)
|
||||
|
||||
#define iseuc_kr_tail(c) (iseuc_kr_tail1(c) || \
|
||||
iseuc_kr_tail2(c) || \
|
||||
iseuc_kr_tail3(c))
|
||||
|
||||
static int ismbchar_euc_kr(CHARSET_INFO *cs __attribute__((unused)),
|
||||
const char* p, const char *e)
|
||||
{
|
||||
return ((*(uchar*)(p)<0x80)? 0:\
|
||||
iseuc_kr(*(p)) && (e)-(p)>1 && iseuc_kr(*((p)+1))? 2:\
|
||||
iseuc_kr_head(*(p)) && (e)-(p)>1 && iseuc_kr_tail(*((p)+1))? 2:\
|
||||
0);
|
||||
}
|
||||
|
||||
static int mbcharlen_euc_kr(CHARSET_INFO *cs __attribute__((unused)),uint c)
|
||||
{
|
||||
return (iseuc_kr(c) ? 2 : 1);
|
||||
return (iseuc_kr_head(c) ? 2 : 1);
|
||||
}
|
||||
|
||||
|
||||
|
@ -8653,7 +8672,7 @@ my_well_formed_len_euckr(CHARSET_INFO *cs __attribute__((unused)),
|
|||
/* Single byte ascii character */
|
||||
b++;
|
||||
}
|
||||
else if (b < emb && iseuc_kr(*b) && iseuc_kr(b[1]))
|
||||
else if (b < emb && iseuc_kr_head(*b) && iseuc_kr_tail(b[1]))
|
||||
{
|
||||
/* Double byte character */
|
||||
b+= 2;
|
||||
|
|
Loading…
Reference in a new issue