mirror of
https://github.com/MariaDB/server.git
synced 2025-01-15 19:42:28 +01:00
MDEV-8214 Asian MB2 charsets: compare broken bytes as "greater than any non-broken character"
This commit is contained in:
parent
d535728165
commit
4f828a1cac
9 changed files with 830 additions and 373 deletions
|
@ -49,6 +49,7 @@
|
|||
#define big5tail(e) ((uchar)(e&0xff))
|
||||
|
||||
#define MY_FUNCTION_NAME(x) my_ ## x ## _big5
|
||||
#define IS_MB1_CHAR(x) ((uchar) (x) < 0x80)
|
||||
#define IS_MB2_CHAR(x,y) (isbig5head(x) && isbig5tail(y))
|
||||
#define DEFINE_ASIAN_ROUTINES
|
||||
#include "ctype-mb.ic"
|
||||
|
@ -849,89 +850,6 @@ static uint16 big5strokexfrm(uint16 i)
|
|||
}
|
||||
|
||||
|
||||
|
||||
static int my_strnncoll_big5_internal(const uchar **a_res,
|
||||
const uchar **b_res, size_t length)
|
||||
{
|
||||
const uchar *a= *a_res, *b= *b_res;
|
||||
|
||||
while (length--)
|
||||
{
|
||||
if ((length > 0) && isbig5code(*a,*(a+1)) && isbig5code(*b, *(b+1)))
|
||||
{
|
||||
if (*a != *b || *(a+1) != *(b+1))
|
||||
return ((int) big5code(*a,*(a+1)) -
|
||||
(int) big5code(*b,*(b+1)));
|
||||
a+= 2;
|
||||
b+= 2;
|
||||
length--;
|
||||
}
|
||||
else if (sort_order_big5[*a++] !=
|
||||
sort_order_big5[*b++])
|
||||
return ((int) sort_order_big5[a[-1]] -
|
||||
(int) sort_order_big5[b[-1]]);
|
||||
}
|
||||
*a_res= a;
|
||||
*b_res= b;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/* Compare strings */
|
||||
|
||||
static int my_strnncoll_big5(CHARSET_INFO *cs __attribute__((unused)),
|
||||
const uchar *a, size_t a_length,
|
||||
const uchar *b, size_t b_length,
|
||||
my_bool b_is_prefix)
|
||||
{
|
||||
size_t length= MY_MIN(a_length, b_length);
|
||||
int res= my_strnncoll_big5_internal(&a, &b, length);
|
||||
return res ? res : (int)((b_is_prefix ? length : a_length) - b_length);
|
||||
}
|
||||
|
||||
|
||||
/* compare strings, ignore end space */
|
||||
|
||||
static int my_strnncollsp_big5(CHARSET_INFO * cs __attribute__((unused)),
|
||||
const uchar *a, size_t a_length,
|
||||
const uchar *b, size_t b_length,
|
||||
my_bool diff_if_only_endspace_difference)
|
||||
{
|
||||
size_t length= MY_MIN(a_length, b_length);
|
||||
int res= my_strnncoll_big5_internal(&a, &b, length);
|
||||
|
||||
#ifndef VARCHAR_WITH_DIFF_ENDSPACE_ARE_DIFFERENT_FOR_UNIQUE
|
||||
diff_if_only_endspace_difference= 0;
|
||||
#endif
|
||||
|
||||
if (!res && a_length != b_length)
|
||||
{
|
||||
const uchar *end;
|
||||
int swap= 1;
|
||||
if (diff_if_only_endspace_difference)
|
||||
res= 1; /* Assume 'a' is bigger */
|
||||
/*
|
||||
Check the next not space character of the longer key. If it's < ' ',
|
||||
then it's smaller than the other key.
|
||||
*/
|
||||
if (a_length < b_length)
|
||||
{
|
||||
/* put longer key in a */
|
||||
a_length= b_length;
|
||||
a= b;
|
||||
swap= -1; /* swap sign of result */
|
||||
res= -res;
|
||||
}
|
||||
for (end= a + a_length-length; a < end ; a++)
|
||||
{
|
||||
if (*a != ' ')
|
||||
return (*a < ' ') ? -swap : swap;
|
||||
}
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
static size_t
|
||||
my_strnxfrm_big5(CHARSET_INFO *cs,
|
||||
uchar *dst, size_t dstlen, uint nweights,
|
||||
|
@ -6853,11 +6771,23 @@ my_mb_wc_big5(CHARSET_INFO *cs __attribute__((unused)),
|
|||
}
|
||||
|
||||
|
||||
static MY_COLLATION_HANDLER my_collation_big5_chinese_ci_handler =
|
||||
#define MY_FUNCTION_NAME(x) my_ ## x ## _big5_chinese_ci
|
||||
#define WEIGHT_MB1(x) (sort_order_big5[(uchar) (x)])
|
||||
#define WEIGHT_MB2(x,y) (big5code(x, y))
|
||||
#include "strcoll.ic"
|
||||
|
||||
|
||||
#define MY_FUNCTION_NAME(x) my_ ## x ## _big5_bin
|
||||
#define WEIGHT_MB1(x) ((uchar) (x))
|
||||
#define WEIGHT_MB2(x,y) (big5code(x, y))
|
||||
#include "strcoll.ic"
|
||||
|
||||
|
||||
static MY_COLLATION_HANDLER my_collation_handler_big5_chinese_ci=
|
||||
{
|
||||
NULL, /* init */
|
||||
my_strnncoll_big5,
|
||||
my_strnncollsp_big5,
|
||||
my_strnncoll_big5_chinese_ci,
|
||||
my_strnncollsp_big5_chinese_ci,
|
||||
my_strnxfrm_big5,
|
||||
my_strnxfrmlen_simple,
|
||||
my_like_range_mb,
|
||||
|
@ -6868,6 +6798,23 @@ static MY_COLLATION_HANDLER my_collation_big5_chinese_ci_handler =
|
|||
my_propagate_simple
|
||||
};
|
||||
|
||||
|
||||
static MY_COLLATION_HANDLER my_collation_handler_big5_bin=
|
||||
{
|
||||
NULL, /* init */
|
||||
my_strnncoll_big5_bin,
|
||||
my_strnncollsp_big5_bin,
|
||||
my_strnxfrm_mb,
|
||||
my_strnxfrmlen_simple,
|
||||
my_like_range_mb,
|
||||
my_wildcmp_mb_bin,
|
||||
my_strcasecmp_mb_bin,
|
||||
my_instr_mb,
|
||||
my_hash_sort_mb_bin,
|
||||
my_propagate_simple
|
||||
};
|
||||
|
||||
|
||||
static MY_CHARSET_HANDLER my_charset_big5_handler=
|
||||
{
|
||||
NULL, /* init */
|
||||
|
@ -6931,7 +6878,7 @@ struct charset_info_st my_charset_big5_chinese_ci=
|
|||
1, /* escape_with_backslash_is_dangerous */
|
||||
1, /* levels_for_order */
|
||||
&my_charset_big5_handler,
|
||||
&my_collation_big5_chinese_ci_handler
|
||||
&my_collation_handler_big5_chinese_ci
|
||||
};
|
||||
|
||||
|
||||
|
@ -6964,7 +6911,7 @@ struct charset_info_st my_charset_big5_bin=
|
|||
1, /* escape_with_backslash_is_dangerous */
|
||||
1, /* levels_for_order */
|
||||
&my_charset_big5_handler,
|
||||
&my_collation_mb_bin_handler
|
||||
&my_collation_handler_big5_bin
|
||||
};
|
||||
|
||||
|
||||
|
|
|
@ -185,6 +185,7 @@ static const uchar sort_order_cp932[]=
|
|||
|
||||
#define MY_FUNCTION_NAME(x) my_ ## x ## _cp932
|
||||
#define IS_8BIT_CHAR(x) iscp932kata(x)
|
||||
#define IS_MB1_CHAR(x) ((uchar) (x) < 0x80 || iscp932kata(x))
|
||||
#define IS_MB2_CHAR(x,y) (iscp932head(x) && iscp932tail(y))
|
||||
#define DEFINE_ASIAN_ROUTINES
|
||||
#include "ctype-mb.ic"
|
||||
|
@ -1717,90 +1718,6 @@ MY_UNICASE_INFO my_caseinfo_cp932=
|
|||
my_caseinfo_pages_cp932
|
||||
};
|
||||
|
||||
static int my_strnncoll_cp932_internal(CHARSET_INFO *cs,
|
||||
const uchar **a_res, size_t a_length,
|
||||
const uchar **b_res, size_t b_length)
|
||||
{
|
||||
const uchar *a= *a_res, *b= *b_res;
|
||||
const uchar *a_end= a + a_length;
|
||||
const uchar *b_end= b + b_length;
|
||||
while (a < a_end && b < b_end)
|
||||
{
|
||||
if (ismbchar_cp932(cs,(char*) a, (char*) a_end) &&
|
||||
ismbchar_cp932(cs,(char*) b, (char*) b_end))
|
||||
{
|
||||
uint a_char= cp932code(*a, *(a+1));
|
||||
uint b_char= cp932code(*b, *(b+1));
|
||||
if (a_char != b_char)
|
||||
return a_char - b_char;
|
||||
a += 2;
|
||||
b += 2;
|
||||
} else
|
||||
{
|
||||
if (sort_order_cp932[(uchar)*a] != sort_order_cp932[(uchar)*b])
|
||||
return sort_order_cp932[(uchar)*a] - sort_order_cp932[(uchar)*b];
|
||||
a++;
|
||||
b++;
|
||||
}
|
||||
}
|
||||
*a_res= a;
|
||||
*b_res= b;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static int my_strnncoll_cp932(CHARSET_INFO *cs __attribute__((unused)),
|
||||
const uchar *a, size_t a_length,
|
||||
const uchar *b, size_t b_length,
|
||||
my_bool b_is_prefix)
|
||||
{
|
||||
int res= my_strnncoll_cp932_internal(cs, &a, a_length, &b, b_length);
|
||||
if (b_is_prefix && a_length > b_length)
|
||||
a_length= b_length;
|
||||
return res ? res : (int) (a_length - b_length);
|
||||
}
|
||||
|
||||
|
||||
static int my_strnncollsp_cp932(CHARSET_INFO *cs __attribute__((unused)),
|
||||
const uchar *a, size_t a_length,
|
||||
const uchar *b, size_t b_length,
|
||||
my_bool diff_if_only_endspace_difference
|
||||
__attribute__((unused)))
|
||||
{
|
||||
const uchar *a_end= a + a_length;
|
||||
const uchar *b_end= b + b_length;
|
||||
int res= my_strnncoll_cp932_internal(cs, &a, a_length, &b, b_length);
|
||||
|
||||
#ifndef VARCHAR_WITH_DIFF_ENDSPACE_ARE_DIFFERENT_FOR_UNIQUE
|
||||
diff_if_only_endspace_difference= 0;
|
||||
#endif
|
||||
|
||||
if (!res && (a != a_end || b != b_end))
|
||||
{
|
||||
int swap= 1;
|
||||
if (diff_if_only_endspace_difference)
|
||||
res= 1; /* Assume 'a' is bigger */
|
||||
/*
|
||||
Check the next not space character of the longer key. If it's < ' ',
|
||||
then it's smaller than the other key.
|
||||
*/
|
||||
if (a == a_end)
|
||||
{
|
||||
/* put shorter key in a */
|
||||
a_end= b_end;
|
||||
a= b;
|
||||
swap= -1; /* swap sign of result */
|
||||
res= -res;
|
||||
}
|
||||
for (; a < a_end ; a++)
|
||||
{
|
||||
if (*a != (uchar) ' ')
|
||||
return (*a < (uchar) ' ') ? -swap : swap;
|
||||
}
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
static const uint16 cp932_to_unicode[65536]=
|
||||
{
|
||||
|
@ -34720,15 +34637,36 @@ size_t my_numcells_cp932(CHARSET_INFO *cs __attribute__((unused)),
|
|||
}
|
||||
|
||||
|
||||
static MY_COLLATION_HANDLER my_collation_ci_handler =
|
||||
/*
|
||||
cp932_chinese_ci and cp932_bin sort character blocks in this order:
|
||||
1. [00..7F] - 7BIT characters (ASCII)
|
||||
2. [81..9F][40..7E,80..FC] - MB2 characters, part1
|
||||
3. [A1..DF] - 8BIT characters (Kana)
|
||||
4. [E0..FC][40..7E,80..FC] - MB2 characters, part2
|
||||
*/
|
||||
#define MY_FUNCTION_NAME(x) my_ ## x ## _cp932_japanese_ci
|
||||
#define WEIGHT_PAD_SPACE (256 * (int) ' ')
|
||||
#define WEIGHT_MB1(x) (256 * (int) sort_order_cp932[(uchar) (x)])
|
||||
#define WEIGHT_MB2(x,y) (cp932code(x, y))
|
||||
#include "strcoll.ic"
|
||||
|
||||
|
||||
#define MY_FUNCTION_NAME(x) my_ ## x ## _cp932_bin
|
||||
#define WEIGHT_PAD_SPACE (256 * (int) ' ')
|
||||
#define WEIGHT_MB1(x) (256 * (int) (uchar) (x))
|
||||
#define WEIGHT_MB2(x,y) (cp932code(x, y))
|
||||
#include "strcoll.ic"
|
||||
|
||||
|
||||
static MY_COLLATION_HANDLER my_collation_handler_cp932_japanese_ci=
|
||||
{
|
||||
NULL, /* init */
|
||||
my_strnncoll_cp932,
|
||||
my_strnncollsp_cp932,
|
||||
NULL, /* init */
|
||||
my_strnncoll_cp932_japanese_ci,
|
||||
my_strnncollsp_cp932_japanese_ci,
|
||||
my_strnxfrm_mb,
|
||||
my_strnxfrmlen_simple,
|
||||
my_like_range_mb,
|
||||
my_wildcmp_mb, /* wildcmp */
|
||||
my_wildcmp_mb,
|
||||
my_strcasecmp_8bit,
|
||||
my_instr_mb,
|
||||
my_hash_sort_simple,
|
||||
|
@ -34736,6 +34674,22 @@ static MY_COLLATION_HANDLER my_collation_ci_handler =
|
|||
};
|
||||
|
||||
|
||||
static MY_COLLATION_HANDLER my_collation_handler_cp932_bin=
|
||||
{
|
||||
NULL, /* init */
|
||||
my_strnncoll_cp932_bin,
|
||||
my_strnncollsp_cp932_bin,
|
||||
my_strnxfrm_mb,
|
||||
my_strnxfrmlen_simple,
|
||||
my_like_range_mb,
|
||||
my_wildcmp_mb_bin,
|
||||
my_strcasecmp_mb_bin,
|
||||
my_instr_mb,
|
||||
my_hash_sort_mb_bin,
|
||||
my_propagate_simple
|
||||
};
|
||||
|
||||
|
||||
static MY_CHARSET_HANDLER my_charset_handler=
|
||||
{
|
||||
NULL, /* init */
|
||||
|
@ -34800,7 +34754,7 @@ struct charset_info_st my_charset_cp932_japanese_ci=
|
|||
1, /* escape_with_backslash_is_dangerous */
|
||||
1, /* levels_for_order */
|
||||
&my_charset_handler,
|
||||
&my_collation_ci_handler
|
||||
&my_collation_handler_cp932_japanese_ci
|
||||
};
|
||||
|
||||
struct charset_info_st my_charset_cp932_bin=
|
||||
|
@ -34832,7 +34786,7 @@ struct charset_info_st my_charset_cp932_bin=
|
|||
1, /* escape_with_backslash_is_dangerous */
|
||||
1, /* levels_for_order */
|
||||
&my_charset_handler,
|
||||
&my_collation_mb_bin_handler
|
||||
&my_collation_handler_cp932_bin
|
||||
};
|
||||
|
||||
#endif
|
||||
|
|
|
@ -201,8 +201,10 @@ static const uchar sort_order_euc_kr[]=
|
|||
iseuc_kr_tail2(c) || \
|
||||
iseuc_kr_tail3(c))
|
||||
|
||||
#define euckrcode(c,d) (((uchar)(c) <<8) | (uchar)(d))
|
||||
|
||||
#define MY_FUNCTION_NAME(x) my_ ## x ## _euckr
|
||||
#define IS_MB1_CHAR(x) ((uchar) (x) < 0x80)
|
||||
#define IS_MB2_CHAR(x,y) (iseuc_kr_head(x) && iseuc_kr_tail(y))
|
||||
#define DEFINE_ASIAN_ROUTINES
|
||||
#include "ctype-mb.ic"
|
||||
|
@ -9938,21 +9940,50 @@ my_mb_wc_euc_kr(CHARSET_INFO *cs __attribute__((unused)),
|
|||
}
|
||||
|
||||
|
||||
static MY_COLLATION_HANDLER my_collation_ci_handler =
|
||||
#define MY_FUNCTION_NAME(x) my_ ## x ## _euckr_korean_ci
|
||||
#define WEIGHT_MB1(x) (sort_order_euc_kr[(uchar) (x)])
|
||||
#define WEIGHT_MB2(x,y) (euckrcode(x, y))
|
||||
#include "strcoll.ic"
|
||||
|
||||
|
||||
#define MY_FUNCTION_NAME(x) my_ ## x ## _euckr_bin
|
||||
#define WEIGHT_MB1(x) ((uchar) (x))
|
||||
#define WEIGHT_MB2(x,y) (euckrcode(x, y))
|
||||
#include "strcoll.ic"
|
||||
|
||||
|
||||
static MY_COLLATION_HANDLER my_collation_handler_euckr_korean_ci=
|
||||
{
|
||||
NULL, /* init */
|
||||
my_strnncoll_simple, /* strnncoll */
|
||||
my_strnncollsp_simple,
|
||||
my_strnxfrm_mb, /* strnxfrm */
|
||||
NULL, /* init */
|
||||
my_strnncoll_euckr_korean_ci,
|
||||
my_strnncollsp_euckr_korean_ci,
|
||||
my_strnxfrm_mb,
|
||||
my_strnxfrmlen_simple,
|
||||
my_like_range_mb, /* like_range */
|
||||
my_wildcmp_mb, /* wildcmp */
|
||||
my_like_range_mb,
|
||||
my_wildcmp_mb,
|
||||
my_strcasecmp_mb,
|
||||
my_instr_mb,
|
||||
my_hash_sort_simple,
|
||||
my_propagate_simple
|
||||
};
|
||||
|
||||
|
||||
static MY_COLLATION_HANDLER my_collation_handler_euckr_bin=
|
||||
{
|
||||
NULL, /* init */
|
||||
my_strnncoll_euckr_bin,
|
||||
my_strnncollsp_euckr_bin,
|
||||
my_strnxfrm_mb,
|
||||
my_strnxfrmlen_simple,
|
||||
my_like_range_mb,
|
||||
my_wildcmp_mb_bin,
|
||||
my_strcasecmp_mb_bin,
|
||||
my_instr_mb,
|
||||
my_hash_sort_mb_bin,
|
||||
my_propagate_simple
|
||||
};
|
||||
|
||||
|
||||
static MY_CHARSET_HANDLER my_charset_handler=
|
||||
{
|
||||
NULL, /* init */
|
||||
|
@ -10017,7 +10048,7 @@ struct charset_info_st my_charset_euckr_korean_ci=
|
|||
0, /* escape_with_backslash_is_dangerous */
|
||||
1, /* levels_for_order */
|
||||
&my_charset_handler,
|
||||
&my_collation_ci_handler
|
||||
&my_collation_handler_euckr_korean_ci
|
||||
};
|
||||
|
||||
|
||||
|
@ -10050,7 +10081,7 @@ struct charset_info_st my_charset_euckr_bin=
|
|||
0, /* escape_with_backslash_is_dangerous */
|
||||
1, /* levels_for_order */
|
||||
&my_charset_handler,
|
||||
&my_collation_mb_bin_handler
|
||||
&my_collation_handler_euckr_bin
|
||||
};
|
||||
|
||||
#endif
|
||||
|
|
|
@ -163,9 +163,11 @@ static const uchar sort_order_gb2312[]=
|
|||
|
||||
#define isgb2312head(c) (0xa1<=(uchar)(c) && (uchar)(c)<=0xf7)
|
||||
#define isgb2312tail(c) (0xa1<=(uchar)(c) && (uchar)(c)<=0xfe)
|
||||
#define gb2312code(c,d) (((uchar)(c) <<8) | (uchar)(d))
|
||||
|
||||
|
||||
#define MY_FUNCTION_NAME(x) my_ ## x ## _gb2312
|
||||
#define IS_MB1_CHAR(x) ((uchar) (x) < 0x80)
|
||||
#define IS_MB2_CHAR(x,y) (isgb2312head(x) && isgb2312tail(y))
|
||||
#define DEFINE_ASIAN_ROUTINES
|
||||
#include "ctype-mb.ic"
|
||||
|
@ -6341,11 +6343,23 @@ my_mb_wc_gb2312(CHARSET_INFO *cs __attribute__((unused)),
|
|||
}
|
||||
|
||||
|
||||
static MY_COLLATION_HANDLER my_collation_ci_handler =
|
||||
#define MY_FUNCTION_NAME(x) my_ ## x ## _gb2312_chinese_ci
|
||||
#define WEIGHT_MB1(x) (sort_order_gb2312[(uchar) (x)])
|
||||
#define WEIGHT_MB2(x,y) (gb2312code(x, y))
|
||||
#include "strcoll.ic"
|
||||
|
||||
|
||||
#define MY_FUNCTION_NAME(x) my_ ## x ## _gb2312_bin
|
||||
#define WEIGHT_MB1(x) ((uchar) (x))
|
||||
#define WEIGHT_MB2(x,y) (gb2312code(x, y))
|
||||
#include "strcoll.ic"
|
||||
|
||||
|
||||
static MY_COLLATION_HANDLER my_collation_handler_gb2312_chinese_ci=
|
||||
{
|
||||
NULL, /* init */
|
||||
my_strnncoll_simple, /* strnncoll */
|
||||
my_strnncollsp_simple,
|
||||
NULL, /* init */
|
||||
my_strnncoll_gb2312_chinese_ci,
|
||||
my_strnncollsp_gb2312_chinese_ci,
|
||||
my_strnxfrm_mb, /* strnxfrm */
|
||||
my_strnxfrmlen_simple,
|
||||
my_like_range_mb, /* like_range */
|
||||
|
@ -6356,6 +6370,24 @@ static MY_COLLATION_HANDLER my_collation_ci_handler =
|
|||
my_propagate_simple
|
||||
};
|
||||
|
||||
|
||||
static MY_COLLATION_HANDLER my_collation_handler_gb2312_bin=
|
||||
{
|
||||
NULL, /* init */
|
||||
my_strnncoll_gb2312_bin,
|
||||
my_strnncollsp_gb2312_bin,
|
||||
my_strnxfrm_mb,
|
||||
my_strnxfrmlen_simple,
|
||||
my_like_range_mb,
|
||||
my_wildcmp_mb_bin,
|
||||
my_strcasecmp_mb_bin,
|
||||
my_instr_mb,
|
||||
my_hash_sort_mb_bin,
|
||||
my_propagate_simple
|
||||
};
|
||||
|
||||
|
||||
|
||||
static MY_CHARSET_HANDLER my_charset_handler=
|
||||
{
|
||||
NULL, /* init */
|
||||
|
@ -6420,9 +6452,10 @@ struct charset_info_st my_charset_gb2312_chinese_ci=
|
|||
0, /* escape_with_backslash_is_dangerous */
|
||||
1, /* levels_for_order */
|
||||
&my_charset_handler,
|
||||
&my_collation_ci_handler
|
||||
&my_collation_handler_gb2312_chinese_ci
|
||||
};
|
||||
|
||||
|
||||
struct charset_info_st my_charset_gb2312_bin=
|
||||
{
|
||||
86,0,0, /* number */
|
||||
|
@ -6452,7 +6485,7 @@ struct charset_info_st my_charset_gb2312_bin=
|
|||
0, /* escape_with_backslash_is_dangerous */
|
||||
1, /* levels_for_order */
|
||||
&my_charset_handler,
|
||||
&my_collation_mb_bin_handler
|
||||
&my_collation_handler_gb2312_bin
|
||||
};
|
||||
|
||||
#endif
|
||||
|
|
|
@ -44,6 +44,7 @@
|
|||
#define gbktail(e) ((uchar)(e&0xff))
|
||||
|
||||
#define MY_FUNCTION_NAME(x) my_ ## x ## _gbk
|
||||
#define IS_MB1_CHAR(x) ((uchar) (x) < 0x80)
|
||||
#define IS_MB2_CHAR(x,y) (isgbkhead(x) && isgbktail(y))
|
||||
#define DEFINE_ASIAN_ROUTINES
|
||||
#include "ctype-mb.ic"
|
||||
|
@ -3450,87 +3451,6 @@ static uint16 gbksortorder(uint16 i)
|
|||
}
|
||||
|
||||
|
||||
int my_strnncoll_gbk_internal(const uchar **a_res, const uchar **b_res,
|
||||
size_t length)
|
||||
{
|
||||
const uchar *a= *a_res, *b= *b_res;
|
||||
uint a_char,b_char;
|
||||
|
||||
while (length--)
|
||||
{
|
||||
if ((length > 0) && isgbkcode(*a,*(a+1)) && isgbkcode(*b, *(b+1)))
|
||||
{
|
||||
a_char= gbkcode(*a,*(a+1));
|
||||
b_char= gbkcode(*b,*(b+1));
|
||||
if (a_char != b_char)
|
||||
return ((int) gbksortorder((uint16) a_char) -
|
||||
(int) gbksortorder((uint16) b_char));
|
||||
a+= 2;
|
||||
b+= 2;
|
||||
length--;
|
||||
}
|
||||
else if (sort_order_gbk[*a++] != sort_order_gbk[*b++])
|
||||
return ((int) sort_order_gbk[a[-1]] -
|
||||
(int) sort_order_gbk[b[-1]]);
|
||||
}
|
||||
*a_res= a;
|
||||
*b_res= b;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
int my_strnncoll_gbk(CHARSET_INFO *cs __attribute__((unused)),
|
||||
const uchar *a, size_t a_length,
|
||||
const uchar *b, size_t b_length,
|
||||
my_bool b_is_prefix)
|
||||
{
|
||||
size_t length= MY_MIN(a_length, b_length);
|
||||
int res= my_strnncoll_gbk_internal(&a, &b, length);
|
||||
return res ? res : (int) ((b_is_prefix ? length : a_length) - b_length);
|
||||
}
|
||||
|
||||
|
||||
static int my_strnncollsp_gbk(CHARSET_INFO * cs __attribute__((unused)),
|
||||
const uchar *a, size_t a_length,
|
||||
const uchar *b, size_t b_length,
|
||||
my_bool diff_if_only_endspace_difference)
|
||||
{
|
||||
size_t length= MY_MIN(a_length, b_length);
|
||||
int res= my_strnncoll_gbk_internal(&a, &b, length);
|
||||
|
||||
#ifndef VARCHAR_WITH_DIFF_ENDSPACE_ARE_DIFFERENT_FOR_UNIQUE
|
||||
diff_if_only_endspace_difference= 0;
|
||||
#endif
|
||||
|
||||
if (!res && a_length != b_length)
|
||||
{
|
||||
const uchar *end;
|
||||
int swap= 1;
|
||||
if (diff_if_only_endspace_difference)
|
||||
res= 1; /* Assume 'a' is bigger */
|
||||
/*
|
||||
Check the next not space character of the longer key. If it's < ' ',
|
||||
then it's smaller than the other key.
|
||||
*/
|
||||
if (a_length < b_length)
|
||||
{
|
||||
/* put shorter key in a */
|
||||
a_length= b_length;
|
||||
a= b;
|
||||
swap= -1; /* swap sign of result */
|
||||
res= -res;
|
||||
}
|
||||
for (end= a + a_length-length; a < end ; a++)
|
||||
{
|
||||
if (*a != ' ')
|
||||
return (*a < ' ') ? -swap : swap;
|
||||
}
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
static size_t
|
||||
my_strnxfrm_gbk(CHARSET_INFO *cs,
|
||||
uchar *dst, size_t dstlen, uint nweights,
|
||||
|
@ -10735,11 +10655,23 @@ my_mb_wc_gbk(CHARSET_INFO *cs __attribute__((unused)),
|
|||
}
|
||||
|
||||
|
||||
static MY_COLLATION_HANDLER my_collation_ci_handler =
|
||||
#define MY_FUNCTION_NAME(x) my_ ## x ## _gbk_chinese_ci
|
||||
#define WEIGHT_MB1(x) (sort_order_gbk[(uchar) (x)])
|
||||
#define WEIGHT_MB2(x,y) (gbksortorder(gbkcode(x,y)))
|
||||
#include "strcoll.ic"
|
||||
|
||||
|
||||
#define MY_FUNCTION_NAME(x) my_ ## x ## _gbk_bin
|
||||
#define WEIGHT_MB1(x) ((uchar) (x))
|
||||
#define WEIGHT_MB2(x,y) (gbkcode(x,y))
|
||||
#include "strcoll.ic"
|
||||
|
||||
|
||||
static MY_COLLATION_HANDLER my_collation_handler_gbk_chinese_ci=
|
||||
{
|
||||
NULL, /* init */
|
||||
my_strnncoll_gbk,
|
||||
my_strnncollsp_gbk,
|
||||
NULL, /* init */
|
||||
my_strnncoll_gbk_chinese_ci,
|
||||
my_strnncollsp_gbk_chinese_ci,
|
||||
my_strnxfrm_gbk,
|
||||
my_strnxfrmlen_simple,
|
||||
my_like_range_mb,
|
||||
|
@ -10750,6 +10682,24 @@ static MY_COLLATION_HANDLER my_collation_ci_handler =
|
|||
my_propagate_simple
|
||||
};
|
||||
|
||||
|
||||
static MY_COLLATION_HANDLER my_collation_handler_gbk_bin=
|
||||
{
|
||||
NULL, /* init */
|
||||
my_strnncoll_gbk_bin,
|
||||
my_strnncollsp_gbk_bin,
|
||||
my_strnxfrm_mb,
|
||||
my_strnxfrmlen_simple,
|
||||
my_like_range_mb,
|
||||
my_wildcmp_mb_bin,
|
||||
my_strcasecmp_mb_bin,
|
||||
my_instr_mb,
|
||||
my_hash_sort_mb_bin,
|
||||
my_propagate_simple
|
||||
};
|
||||
|
||||
|
||||
|
||||
static MY_CHARSET_HANDLER my_charset_handler=
|
||||
{
|
||||
NULL, /* init */
|
||||
|
@ -10814,7 +10764,7 @@ struct charset_info_st my_charset_gbk_chinese_ci=
|
|||
1, /* escape_with_backslash_is_dangerous */
|
||||
1, /* levels_for_order */
|
||||
&my_charset_handler,
|
||||
&my_collation_ci_handler
|
||||
&my_collation_handler_gbk_chinese_ci
|
||||
};
|
||||
|
||||
struct charset_info_st my_charset_gbk_bin=
|
||||
|
@ -10846,7 +10796,7 @@ struct charset_info_st my_charset_gbk_bin=
|
|||
1, /* escape_with_backslash_is_dangerous */
|
||||
1, /* levels_for_order */
|
||||
&my_charset_handler,
|
||||
&my_collation_mb_bin_handler
|
||||
&my_collation_handler_gbk_bin
|
||||
};
|
||||
|
||||
|
||||
|
|
|
@ -256,3 +256,5 @@ MY_FUNCTION_NAME(well_formed_char_length)(CHARSET_INFO *cs __attribute__((unused
|
|||
return nchars0 - nchars;
|
||||
}
|
||||
#endif /* DEFINE_WELL_FORMED_CHAR_LENGTH_USING_CHARLEN */
|
||||
|
||||
#undef MY_FUNCTION_NAME
|
||||
|
|
|
@ -186,6 +186,7 @@ static const uchar sort_order_sjis[]=
|
|||
|
||||
#define MY_FUNCTION_NAME(x) my_ ## x ## _sjis
|
||||
#define IS_8BIT_CHAR(x) issjiskata(x)
|
||||
#define IS_MB1_CHAR(x) ((uchar) (x) < 0x80 || issjiskata(x))
|
||||
#define IS_MB2_CHAR(x,y) (issjishead(x) && issjistail(y))
|
||||
#define DEFINE_ASIAN_ROUTINES
|
||||
#include "ctype-mb.ic"
|
||||
|
@ -1088,90 +1089,6 @@ static MY_UNICASE_INFO my_caseinfo_sjis=
|
|||
};
|
||||
|
||||
|
||||
static int my_strnncoll_sjis_internal(CHARSET_INFO *cs,
|
||||
const uchar **a_res, size_t a_length,
|
||||
const uchar **b_res, size_t b_length)
|
||||
{
|
||||
const uchar *a= *a_res, *b= *b_res;
|
||||
const uchar *a_end= a + a_length;
|
||||
const uchar *b_end= b + b_length;
|
||||
while (a < a_end && b < b_end)
|
||||
{
|
||||
if (ismbchar_sjis(cs,(char*) a, (char*) a_end) &&
|
||||
ismbchar_sjis(cs,(char*) b, (char*) b_end))
|
||||
{
|
||||
uint a_char= sjiscode(*a, *(a+1));
|
||||
uint b_char= sjiscode(*b, *(b+1));
|
||||
if (a_char != b_char)
|
||||
return (int) a_char - (int) b_char;
|
||||
a += 2;
|
||||
b += 2;
|
||||
} else
|
||||
{
|
||||
if (sort_order_sjis[(uchar)*a] != sort_order_sjis[(uchar)*b])
|
||||
return sort_order_sjis[(uchar)*a] - sort_order_sjis[(uchar)*b];
|
||||
a++;
|
||||
b++;
|
||||
}
|
||||
}
|
||||
*a_res= a;
|
||||
*b_res= b;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static int my_strnncoll_sjis(CHARSET_INFO *cs __attribute__((unused)),
|
||||
const uchar *a, size_t a_length,
|
||||
const uchar *b, size_t b_length,
|
||||
my_bool b_is_prefix)
|
||||
{
|
||||
int res= my_strnncoll_sjis_internal(cs, &a, a_length, &b, b_length);
|
||||
if (b_is_prefix && a_length > b_length)
|
||||
a_length= b_length;
|
||||
return res ? res : (int) (a_length - b_length);
|
||||
}
|
||||
|
||||
|
||||
static int my_strnncollsp_sjis(CHARSET_INFO *cs __attribute__((unused)),
|
||||
const uchar *a, size_t a_length,
|
||||
const uchar *b, size_t b_length,
|
||||
my_bool diff_if_only_endspace_difference)
|
||||
{
|
||||
const uchar *a_end= a + a_length, *b_end= b + b_length;
|
||||
int res= my_strnncoll_sjis_internal(cs, &a, a_length, &b, b_length);
|
||||
|
||||
#ifndef VARCHAR_WITH_DIFF_ENDSPACE_ARE_DIFFERENT_FOR_UNIQUE
|
||||
diff_if_only_endspace_difference= 0;
|
||||
#endif
|
||||
|
||||
if (!res && (a != a_end || b != b_end))
|
||||
{
|
||||
int swap= 1;
|
||||
if (diff_if_only_endspace_difference)
|
||||
res= 1; /* Assume 'a' is bigger */
|
||||
/*
|
||||
Check the next not space character of the longer key. If it's < ' ',
|
||||
then it's smaller than the other key.
|
||||
*/
|
||||
if (a == a_end)
|
||||
{
|
||||
/* put shorter key in a */
|
||||
a_end= b_end;
|
||||
a= b;
|
||||
swap= -1; /* swap sign of result */
|
||||
res= -res;
|
||||
}
|
||||
for (; a < a_end ; a++)
|
||||
{
|
||||
if (*a != ' ')
|
||||
return (*a < ' ') ? -swap : swap;
|
||||
}
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* SJIS->Unicode conversion table */
|
||||
static uint16 sjis_to_unicode[65536]=
|
||||
{
|
||||
|
@ -34099,15 +34016,36 @@ size_t my_numcells_sjis(CHARSET_INFO *cs __attribute__((unused)),
|
|||
}
|
||||
|
||||
|
||||
static MY_COLLATION_HANDLER my_collation_ci_handler =
|
||||
/*
|
||||
sjis_chinese_ci and sjis_bin sort character blocks in this order:
|
||||
1. [00..7F] - 7BIT characters (ASCII)
|
||||
2. [81..9F][40..7E,80..FC] - MB2 characters, part1
|
||||
3. [A1..DF] - 8BIT characters (Kana)
|
||||
4. [E0..FC][40..7E,80..FC] - MB2 characters, part2
|
||||
*/
|
||||
#define MY_FUNCTION_NAME(x) my_ ## x ## _sjis_japanese_ci
|
||||
#define WEIGHT_PAD_SPACE (256 * (int) ' ')
|
||||
#define WEIGHT_MB1(x) (256 * (int) sort_order_sjis[(uchar) (x)])
|
||||
#define WEIGHT_MB2(x,y) (sjiscode(x, y))
|
||||
#include "strcoll.ic"
|
||||
|
||||
|
||||
#define MY_FUNCTION_NAME(x) my_ ## x ## _sjis_bin
|
||||
#define WEIGHT_PAD_SPACE (256 * (int) ' ')
|
||||
#define WEIGHT_MB1(x) (256 * (int) (uchar) (x))
|
||||
#define WEIGHT_MB2(x,y) (sjiscode(x, y))
|
||||
#include "strcoll.ic"
|
||||
|
||||
|
||||
static MY_COLLATION_HANDLER my_collation_handler_sjis_japanese_ci=
|
||||
{
|
||||
NULL, /* init */
|
||||
my_strnncoll_sjis,
|
||||
my_strnncollsp_sjis,
|
||||
NULL, /* init */
|
||||
my_strnncoll_sjis_japanese_ci,
|
||||
my_strnncollsp_sjis_japanese_ci,
|
||||
my_strnxfrm_mb,
|
||||
my_strnxfrmlen_simple,
|
||||
my_like_range_mb,
|
||||
my_wildcmp_mb, /* wildcmp */
|
||||
my_wildcmp_mb,
|
||||
my_strcasecmp_8bit,
|
||||
my_instr_mb,
|
||||
my_hash_sort_simple,
|
||||
|
@ -34115,6 +34053,22 @@ static MY_COLLATION_HANDLER my_collation_ci_handler =
|
|||
};
|
||||
|
||||
|
||||
static MY_COLLATION_HANDLER my_collation_handler_sjis_bin=
|
||||
{
|
||||
NULL, /* init */
|
||||
my_strnncoll_sjis_bin,
|
||||
my_strnncollsp_sjis_bin,
|
||||
my_strnxfrm_mb,
|
||||
my_strnxfrmlen_simple,
|
||||
my_like_range_mb,
|
||||
my_wildcmp_mb_bin,
|
||||
my_strcasecmp_mb_bin,
|
||||
my_instr_mb,
|
||||
my_hash_sort_mb_bin,
|
||||
my_propagate_simple
|
||||
};
|
||||
|
||||
|
||||
static MY_CHARSET_HANDLER my_charset_handler=
|
||||
{
|
||||
NULL, /* init */
|
||||
|
@ -34179,7 +34133,7 @@ struct charset_info_st my_charset_sjis_japanese_ci=
|
|||
1, /* escape_with_backslash_is_dangerous */
|
||||
1, /* levels_for_order */
|
||||
&my_charset_handler,
|
||||
&my_collation_ci_handler
|
||||
&my_collation_handler_sjis_japanese_ci
|
||||
};
|
||||
|
||||
struct charset_info_st my_charset_sjis_bin=
|
||||
|
@ -34211,7 +34165,7 @@ struct charset_info_st my_charset_sjis_bin=
|
|||
1, /* escape_with_backslash_is_dangerous */
|
||||
1, /* levels_for_order */
|
||||
&my_charset_handler,
|
||||
&my_collation_mb_bin_handler
|
||||
&my_collation_handler_sjis_bin
|
||||
};
|
||||
|
||||
#endif
|
||||
|
|
231
strings/strcoll.ic
Normal file
231
strings/strcoll.ic
Normal file
|
@ -0,0 +1,231 @@
|
|||
/*
|
||||
Copyright (c) 2015, MariaDB Foundation
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; version 2 of the License.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
|
||||
#ifndef MY_FUNCTION_NAME
|
||||
#error MY_FUNCTION_NAME is not defined
|
||||
#endif
|
||||
|
||||
|
||||
/*
|
||||
The weight for automatically padded spaces when comparing strings with
|
||||
the PAD SPACE property.
|
||||
Should normally be equal to the weight of a regular space.
|
||||
*/
|
||||
#ifndef WEIGHT_PAD_SPACE
|
||||
#define WEIGHT_PAD_SPACE (' ')
|
||||
#endif
|
||||
|
||||
|
||||
/*
|
||||
Weight of an illegal byte, must follow these rules:
|
||||
1. Must be greater than weight of any normal character in the collation.
|
||||
2. Two different bad bytes must have different weights and must be
|
||||
compared in their binary order.
|
||||
|
||||
Depends on mbmaxlen of the character set, as well as how the collation
|
||||
sorts various single-byte and multi-byte character blocks.
|
||||
|
||||
The macro below is the default definition, it is suitable for mbmaxlen=2
|
||||
character sets that sort all multi-byte characters after all single-byte
|
||||
characters: big5, euckr, gb2312, gbk.
|
||||
|
||||
All mbmaxlen>2 character sets must provide their own definitions.
|
||||
All collations that have a more complex order (than just MB1 followed by MB2)
|
||||
must also provide their own definitions (see definitions for
|
||||
cp932_japanese_ci and sjis_japanese_ci as examples of a more complex order).
|
||||
*/
|
||||
#ifndef WEIGHT_ILSEQ
|
||||
#define WEIGHT_ILSEQ(x) (0xFF00 + (x))
|
||||
#endif
|
||||
|
||||
|
||||
/**
|
||||
Scan a valid character, or a bad byte, or an auto-padded space
|
||||
from a string and calculate the weight of the scanned sequence.
|
||||
|
||||
@param [OUT] weight - the weight is returned here
|
||||
@param str - the string
|
||||
@param end - the end of the string
|
||||
@return - the number of bytes scanned
|
||||
|
||||
The including source file must define the following macros:
|
||||
IS_MB1_CHAR(x)
|
||||
IS_MB2_CHAR(x,y)
|
||||
WEIGHT_PAD_SPACE
|
||||
WEIGHT_MB1(x)
|
||||
WEIGHT_MB2(x,y)
|
||||
WEIGHT_ILSEQ(x)
|
||||
*/
|
||||
static inline uint
|
||||
MY_FUNCTION_NAME(scan_weight)(int *weight, const uchar *str, const uchar *end)
|
||||
{
|
||||
if (str >= end)
|
||||
{
|
||||
*weight= WEIGHT_PAD_SPACE;
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (IS_MB1_CHAR(*str))
|
||||
{
|
||||
*weight= WEIGHT_MB1(*str); /* A valid single byte character*/
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (str + 2 > end) /* The string ended unexpectedly */
|
||||
goto bad; /* Treat as a bad byte */
|
||||
|
||||
if (IS_MB2_CHAR(str[0], str[1]))
|
||||
{
|
||||
*weight= WEIGHT_MB2(str[0], str[1]);
|
||||
return 2; /* A valid two-byte character */
|
||||
}
|
||||
|
||||
bad:
|
||||
*weight= WEIGHT_ILSEQ(str[0]); /* Bad byte */
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
Compare two strings according to the collation,
|
||||
without handling the PAD SPACE property.
|
||||
|
||||
Note, cs->coll->strnncoll() is usually used to compare identifiers.
|
||||
Perhaps we should eventually (in 10.2?) create a new collation
|
||||
my_charset_utf8_general_ci_no_pad and have only one comparison function
|
||||
in MY_COLLATION_HANDLER.
|
||||
|
||||
@param cs - the character set and collation
|
||||
@param a - the left string
|
||||
@param a_length - the length of the left string
|
||||
@param b - the right string
|
||||
@param b_length - the length of the right string
|
||||
@param b_is_prefix - if the caller wants to check if "b" is a prefix of "a"
|
||||
@return - the comparison result
|
||||
*/
|
||||
static int
|
||||
MY_FUNCTION_NAME(strnncoll)(CHARSET_INFO *cs __attribute__((unused)),
|
||||
const uchar *a, size_t a_length,
|
||||
const uchar *b, size_t b_length,
|
||||
my_bool b_is_prefix)
|
||||
{
|
||||
const uchar *a_end= a + a_length;
|
||||
const uchar *b_end= b + b_length;
|
||||
for ( ; ; )
|
||||
{
|
||||
int a_weight, b_weight, res;
|
||||
uint a_wlen= MY_FUNCTION_NAME(scan_weight)(&a_weight, a, a_end);
|
||||
uint b_wlen= MY_FUNCTION_NAME(scan_weight)(&b_weight, b, b_end);
|
||||
/*
|
||||
a_wlen b_wlen Comment
|
||||
------ ------ -------
|
||||
0 0 Strings ended simultaneously, "a" and "b" are equal.
|
||||
0 >0 "a" is a prefix of "b", so "a" is smaller.
|
||||
>0 0 "b" is a prefix of "a", check b_is_prefix.
|
||||
>0 >0 Two weights were scanned, check weight difference.
|
||||
*/
|
||||
if (!a_wlen)
|
||||
return b_wlen ? -b_weight : 0;
|
||||
|
||||
if (!b_wlen)
|
||||
return b_is_prefix ? 0 : a_weight;
|
||||
|
||||
if ((res= (a_weight - b_weight)))
|
||||
return res;
|
||||
/*
|
||||
None of the strings has ended yet.
|
||||
*/
|
||||
DBUG_ASSERT(a < a_end);
|
||||
DBUG_ASSERT(b < b_end);
|
||||
a+= a_wlen;
|
||||
b+= b_wlen;
|
||||
}
|
||||
DBUG_ASSERT(0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
Compare two strings according to the collation, with PAD SPACE handling.
|
||||
|
||||
@param cs - the character set and collation
|
||||
@param a - the left string
|
||||
@param a_length - the length of the left string
|
||||
@param b - the right string
|
||||
@param b_length - the length of the right string
|
||||
@param diff_if_only_endspace_difference - not used in the code.
|
||||
TODO: this should be eventually removed (in 10.2?)
|
||||
@return - the comparison result
|
||||
*/
|
||||
|
||||
static int
|
||||
MY_FUNCTION_NAME(strnncollsp)(CHARSET_INFO *cs __attribute__((unused)),
|
||||
const uchar *a, size_t a_length,
|
||||
const uchar *b, size_t b_length,
|
||||
my_bool diff_if_only_endspace_difference
|
||||
__attribute__((unused)))
|
||||
{
|
||||
const uchar *a_end= a + a_length;
|
||||
const uchar *b_end= b + b_length;
|
||||
for ( ; ; )
|
||||
{
|
||||
int a_weight, b_weight, res;
|
||||
uint a_wlen= MY_FUNCTION_NAME(scan_weight)(&a_weight, a, a_end);
|
||||
uint b_wlen= MY_FUNCTION_NAME(scan_weight)(&b_weight, b, b_end);
|
||||
if ((res= (a_weight - b_weight)))
|
||||
{
|
||||
/*
|
||||
Got two different weights. Each weight can be generated by either of:
|
||||
- a real character
|
||||
- a bad byte sequence or an incomplete byte sequence
|
||||
- an auto-generated trailing space (PAD SPACE)
|
||||
It does not matter how exactly each weight was generated.
|
||||
Just return the weight difference.
|
||||
*/
|
||||
return res;
|
||||
}
|
||||
if (!a_wlen && !b_wlen)
|
||||
{
|
||||
/*
|
||||
Got two auto-generated trailing spaces, i.e.
|
||||
both strings have now ended, so they are equal.
|
||||
*/
|
||||
DBUG_ASSERT(a == a_end);
|
||||
DBUG_ASSERT(b == b_end);
|
||||
return 0;
|
||||
}
|
||||
/*
|
||||
At least one of the strings has not ended yet, continue comparison.
|
||||
*/
|
||||
DBUG_ASSERT(a < a_end || b < b_end);
|
||||
a+= a_wlen;
|
||||
b+= b_wlen;
|
||||
}
|
||||
DBUG_ASSERT(0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
We usually include this file at least two times from the same source file,
|
||||
for the _ci and the _bin collations. Prepare for the second inclusion.
|
||||
*/
|
||||
#undef MY_FUNCTION_NAME
|
||||
#undef WEIGHT_ILSEQ
|
||||
#undef WEIGHT_MB1
|
||||
#undef WEIGHT_MB2
|
||||
#undef WEIGHT_PAD_SPACE
|
|
@ -95,11 +95,361 @@ static CHARSET_INFO *charset_list[]=
|
|||
};
|
||||
|
||||
|
||||
typedef struct
|
||||
{
|
||||
const char *a;
|
||||
size_t alen;
|
||||
const char *b;
|
||||
size_t blen;
|
||||
int res;
|
||||
} STRNNCOLL_PARAM;
|
||||
|
||||
|
||||
#define CSTR(x) (x),(sizeof(x)-1)
|
||||
|
||||
/*
|
||||
Byte sequence types used in the tests:
|
||||
8BIT - a 8 bit byte (>=00x80) which makes a single byte characters
|
||||
MB2 - two bytes that make a valid character
|
||||
H2 - a byte which is a valid MB2 head byte
|
||||
T2 - a byte which is a valid MB2 tail byte
|
||||
ILSEQ - a byte which makes an illegal sequence
|
||||
H2+ILSEQ - a sequence that starts with a valid H2 byte,
|
||||
but not followed by a valid T2 byte.
|
||||
|
||||
Charset H2 T2 8BIT
|
||||
------- ---------------- --------------- --------
|
||||
big5 [A1..F9] [40..7E,A1..FE]
|
||||
euckr [81..FE] [41..5A,61..7A,81..FE]
|
||||
gb2312 [A1..F7] [A1..FE]
|
||||
gbk [81..FE] [40..7E,80..FE]
|
||||
|
||||
cp932 [81..9F,E0..FC] [40..7E,80..FC] [A1..DF]
|
||||
sjis [81..9F,E0..FC] [40..7E,80..FC] [A1..DF]
|
||||
|
||||
|
||||
Essential byte sequences in various character sets:
|
||||
|
||||
Sequence big5 cp932 euckr gb2312 gbk sjis
|
||||
-------- ---- ----- ----- ------ --- ----
|
||||
80 ILSEQ ILSEQ ILSEQ ILSEQ ILSEQ ILSEQ
|
||||
81 ILSEQ H2 H2 ILSEQ H2 H2
|
||||
A1 H2 8BIT H2 H2 H2 8BIT
|
||||
A1A1 MB2 8BIT+8BIT MB2 MB2 MB2 8BIT+8BIT
|
||||
E0E0 MB2 MB2 MB2 MB2 MB2 MB2
|
||||
F9FE MB2 H2+ILSEQ MB2 ILSEQ+T2 MB2 H2+ILSEQ
|
||||
*/
|
||||
|
||||
|
||||
/*
|
||||
For character sets that have the following byte sequences:
|
||||
80 - ILSEQ
|
||||
81 - ILSEQ or H2
|
||||
F9 - ILSEQ or H2
|
||||
A1A1 - MB2 or 8BIT+8BIT
|
||||
E0E0 - MB2
|
||||
*/
|
||||
STRNNCOLL_PARAM strcoll_mb2_common[]=
|
||||
{
|
||||
/* Compare two good sequences */
|
||||
{CSTR(""), CSTR(""), 0},
|
||||
{CSTR(""), CSTR(" "), 0},
|
||||
{CSTR(""), CSTR("A"), -1},
|
||||
{CSTR(""), CSTR("a"), -1},
|
||||
{CSTR(""), CSTR("\xA1\xA1"), -1},
|
||||
{CSTR(""), CSTR("\xE0\xE0"), -1},
|
||||
|
||||
{CSTR(" "), CSTR(""), 0},
|
||||
{CSTR(" "), CSTR(" "), 0},
|
||||
{CSTR(" "), CSTR("A"), -1},
|
||||
{CSTR(" "), CSTR("a"), -1},
|
||||
{CSTR(" "), CSTR("\xA1\xA1"), -1},
|
||||
{CSTR(" "), CSTR("\xE0\xE0"), -1},
|
||||
|
||||
{CSTR("a"), CSTR(""), 1},
|
||||
{CSTR("a"), CSTR(" "), 1},
|
||||
{CSTR("a"), CSTR("a"), 0},
|
||||
{CSTR("a"), CSTR("\xA1\xA1"), -1},
|
||||
{CSTR("a"), CSTR("\xE0\xE0"), -1},
|
||||
|
||||
{CSTR("\xA1\xA1"), CSTR("\xA1\xA1"), 0},
|
||||
{CSTR("\xA1\xA1"), CSTR("\xE0\xE0"), -1},
|
||||
|
||||
/* Compare a good character to an illegal or an incomplete sequence */
|
||||
{CSTR(""), CSTR("\x80"), -1},
|
||||
{CSTR(""), CSTR("\x81"), -1},
|
||||
{CSTR(""), CSTR("\xF9"), -1},
|
||||
|
||||
{CSTR(" "), CSTR("\x80"), -1},
|
||||
{CSTR(" "), CSTR("\x81"), -1},
|
||||
{CSTR(" "), CSTR("\xF9"), -1},
|
||||
|
||||
{CSTR("a"), CSTR("\x80"), -1},
|
||||
{CSTR("a"), CSTR("\x81"), -1},
|
||||
{CSTR("a"), CSTR("\xF9"), -1},
|
||||
|
||||
{CSTR("\xA1\xA1"), CSTR("\x80"), -1},
|
||||
{CSTR("\xA1\xA1"), CSTR("\x81"), -1},
|
||||
{CSTR("\xA1\xA1"), CSTR("\xF9"), -1},
|
||||
|
||||
{CSTR("\xE0\xE0"), CSTR("\x80"), -1},
|
||||
{CSTR("\xE0\xE0"), CSTR("\x81"), -1},
|
||||
{CSTR("\xE0\xE0"), CSTR("\xF9"), -1},
|
||||
|
||||
/* Compare two bad/incomplete sequences */
|
||||
{CSTR("\x80"), CSTR("\x80"), 0},
|
||||
{CSTR("\x80"), CSTR("\x81"), -1},
|
||||
{CSTR("\x80"), CSTR("\xF9"), -1},
|
||||
{CSTR("\x81"), CSTR("\x81"), 0},
|
||||
{CSTR("\x81"), CSTR("\xF9"), -1},
|
||||
|
||||
{NULL, 0, NULL, 0, 0}
|
||||
};
|
||||
|
||||
|
||||
/*
|
||||
For character sets that have good mb2 characters A1A1 and F9FE
|
||||
*/
|
||||
STRNNCOLL_PARAM strcoll_mb2_A1A1_mb2_F9FE[]=
|
||||
{
|
||||
/* Compare two good characters */
|
||||
{CSTR(""), CSTR("\xF9\xFE"), -1},
|
||||
{CSTR(" "), CSTR("\xF9\xFE"), -1},
|
||||
{CSTR("a") , CSTR("\xF9\xFE"), -1},
|
||||
{CSTR("\xA1\xA1"), CSTR("\xF9\xFE"), -1},
|
||||
{CSTR("\xF9\xFE"), CSTR("\xF9\xFE"), 0},
|
||||
|
||||
/* Compare a good character to an illegal or an incomplete sequence */
|
||||
{CSTR(""), CSTR("\xA1"), -1},
|
||||
{CSTR(""), CSTR("\xF9"), -1},
|
||||
{CSTR("a"), CSTR("\xA1"), -1},
|
||||
{CSTR("a"), CSTR("\xF9"), -1},
|
||||
|
||||
{CSTR("\xA1\xA1"), CSTR("\xA1"), -1},
|
||||
{CSTR("\xA1\xA1"), CSTR("\xF9"), -1},
|
||||
|
||||
{CSTR("\xF9\xFE"), CSTR("\x80"), -1},
|
||||
{CSTR("\xF9\xFE"), CSTR("\x81"), -1},
|
||||
{CSTR("\xF9\xFE"), CSTR("\xA1"), -1},
|
||||
{CSTR("\xF9\xFE"), CSTR("\xF9"), -1},
|
||||
|
||||
/* Compare two bad/incomplete sequences */
|
||||
{CSTR("\x80"), CSTR("\xA1"), -1},
|
||||
{CSTR("\x80"), CSTR("\xF9"), -1},
|
||||
|
||||
{NULL, 0, NULL, 0, 0}
|
||||
};
|
||||
|
||||
|
||||
/*
|
||||
For character sets that have:
|
||||
A1A1 - a good mb2 character
|
||||
F9FE - a bad sequence
|
||||
*/
|
||||
STRNNCOLL_PARAM strcoll_mb2_A1A1_bad_F9FE[]=
|
||||
{
|
||||
/* Compare a good character to an illegal or an incomplete sequence */
|
||||
{CSTR(""), CSTR("\xF9\xFE"), -1},
|
||||
{CSTR(" "), CSTR("\xF9\xFE"), -1},
|
||||
{CSTR("a") , CSTR("\xF9\xFE"), -1},
|
||||
{CSTR("\xA1\xA1"), CSTR("\xF9\xFE"), -1},
|
||||
|
||||
{CSTR(""), CSTR("\xA1"), -1},
|
||||
{CSTR(""), CSTR("\xF9"), -1},
|
||||
{CSTR("a"), CSTR("\xA1"), -1},
|
||||
{CSTR("a"), CSTR("\xF9"), -1},
|
||||
|
||||
{CSTR("\xA1\xA1"), CSTR("\xA1"), -1},
|
||||
{CSTR("\xA1\xA1"), CSTR("\xF9"), -1},
|
||||
|
||||
/* Compare two bad/incomplete sequences */
|
||||
{CSTR("\xF9\xFE"), CSTR("\x80"), 1},
|
||||
{CSTR("\xF9\xFE"), CSTR("\x81"), 1},
|
||||
{CSTR("\xF9\xFE"), CSTR("\xA1"), 1},
|
||||
{CSTR("\xF9\xFE"), CSTR("\xF9"), 1},
|
||||
{CSTR("\x80"), CSTR("\xA1"), -1},
|
||||
{CSTR("\x80"), CSTR("\xF9"), -1},
|
||||
{CSTR("\xF9\xFE"), CSTR("\xF9\xFE"), 0},
|
||||
|
||||
{NULL, 0, NULL, 0, 0}
|
||||
};
|
||||
|
||||
|
||||
/*
|
||||
For character sets that have:
|
||||
80 - ILSEQ or H2
|
||||
81 - ILSEQ or H2
|
||||
A1 - 8BIT
|
||||
F9 - ILSEQ or H2
|
||||
F9FE - a bad sequence (ILSEQ+XX or H2+ILSEQ)
|
||||
*/
|
||||
STRNNCOLL_PARAM strcoll_mb1_A1_bad_F9FE[]=
|
||||
{
|
||||
/* Compare two good characters */
|
||||
{CSTR(""), CSTR("\xA1"), -1},
|
||||
{CSTR("\xA1\xA1"), CSTR("\xA1"), 1},
|
||||
|
||||
/* Compare a good character to an illegal or an incomplete sequence */
|
||||
{CSTR(""), CSTR("\xF9"), -1},
|
||||
{CSTR(""), CSTR("\xF9\xFE"), -1},
|
||||
{CSTR(" "), CSTR("\xF9\xFE"), -1},
|
||||
{CSTR("a"), CSTR("\xF9\xFE"), -1},
|
||||
{CSTR("a"), CSTR("\xA1"), -1},
|
||||
{CSTR("a"), CSTR("\xF9"), -1},
|
||||
|
||||
{CSTR("\xA1\xA1"), CSTR("\xF9"), -1},
|
||||
{CSTR("\xA1\xA1"), CSTR("\xF9\xFE"), -1},
|
||||
|
||||
{CSTR("\xF9\xFE"), CSTR("\x80"), 1},
|
||||
{CSTR("\xF9\xFE"), CSTR("\x81"), 1},
|
||||
{CSTR("\xF9\xFE"), CSTR("\xA1"), 1},
|
||||
{CSTR("\xF9\xFE"), CSTR("\xF9"), 1},
|
||||
|
||||
{CSTR("\x80"), CSTR("\xA1"), 1},
|
||||
|
||||
/* Compare two bad/incomplete sequences */
|
||||
{CSTR("\x80"), CSTR("\xF9"), -1},
|
||||
{CSTR("\xF9\xFE"), CSTR("\xF9\xFE"), 0},
|
||||
|
||||
{NULL, 0, NULL, 0, 0}
|
||||
};
|
||||
|
||||
|
||||
/*
|
||||
For character sets (e.g. cp932 and sjis) that have:
|
||||
8181 - a valid MB2 character
|
||||
A1 - a valid 8BIT character
|
||||
E0E0 - a valid MB2 character
|
||||
and sort in this order:
|
||||
8181 < A1 < E0E0
|
||||
*/
|
||||
STRNNCOLL_PARAM strcoll_8181_A1_E0E0[]=
|
||||
{
|
||||
{CSTR("\x81\x81"), CSTR("\xA1"), -1},
|
||||
{CSTR("\x81\x81"), CSTR("\xE0\xE0"), -1},
|
||||
{CSTR("\xA1"), CSTR("\xE0\xE0"), -1},
|
||||
|
||||
{NULL, 0, NULL, 0, 0}
|
||||
};
|
||||
|
||||
|
||||
static void
|
||||
str2hex(char *dst, size_t dstlen, const char *src, size_t srclen)
|
||||
{
|
||||
char *dstend= dst + dstlen;
|
||||
const char *srcend= src + srclen;
|
||||
for (*dst= '\0' ; dst + 3 < dstend && src < srcend; )
|
||||
{
|
||||
sprintf(dst, "%02X", (unsigned char) src[0]);
|
||||
dst+=2;
|
||||
src++;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
Check if the two comparison result are semantically equal:
|
||||
both are negative, both are positive, or both are zero.
|
||||
*/
|
||||
static int
|
||||
eqres(int ares, int bres)
|
||||
{
|
||||
return (ares < 0 && bres < 0) ||
|
||||
(ares > 0 && bres > 0) ||
|
||||
(ares == 0 && bres == 0);
|
||||
}
|
||||
|
||||
|
||||
static int
|
||||
strcollsp(CHARSET_INFO *cs, const STRNNCOLL_PARAM *param)
|
||||
{
|
||||
int failed= 0;
|
||||
const STRNNCOLL_PARAM *p;
|
||||
diag("%-20s %-10s %-10s %10s %10s", "Collation", "a", "b", "ExpectSign", "Actual");
|
||||
for (p= param; p->a; p++)
|
||||
{
|
||||
char ahex[64], bhex[64];
|
||||
int res= cs->coll->strnncollsp(cs, (uchar *) p->a, p->alen,
|
||||
(uchar *) p->b, p->blen, 0);
|
||||
str2hex(ahex, sizeof(ahex), p->a, p->alen);
|
||||
str2hex(bhex, sizeof(bhex), p->b, p->blen);
|
||||
diag("%-20s %-10s %-10s %10d %10d%s",
|
||||
cs->name, ahex, bhex, p->res, res,
|
||||
eqres(res, p->res) ? "" : " FAILED");
|
||||
if (!eqres(res, p->res))
|
||||
{
|
||||
failed++;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Test in reverse order */
|
||||
res= cs->coll->strnncollsp(cs, (uchar *) p->b, p->blen,
|
||||
(uchar *) p->a, p->alen, 0);
|
||||
if (!eqres(res, -p->res))
|
||||
{
|
||||
diag("Comparison in reverse order failed. Expected %d, got %d",
|
||||
-p->res, res);
|
||||
failed++;
|
||||
}
|
||||
}
|
||||
}
|
||||
return failed;
|
||||
}
|
||||
|
||||
|
||||
static int
|
||||
test_strcollsp()
|
||||
{
|
||||
int failed= 0;
|
||||
#ifdef HAVE_CHARSET_big5
|
||||
failed+= strcollsp(&my_charset_big5_chinese_ci, strcoll_mb2_common);
|
||||
failed+= strcollsp(&my_charset_big5_chinese_ci, strcoll_mb2_A1A1_mb2_F9FE);
|
||||
failed+= strcollsp(&my_charset_big5_bin, strcoll_mb2_common);
|
||||
failed+= strcollsp(&my_charset_big5_bin, strcoll_mb2_A1A1_mb2_F9FE);
|
||||
#endif
|
||||
#ifdef HAVE_CHARSET_cp932
|
||||
failed+= strcollsp(&my_charset_cp932_japanese_ci, strcoll_mb2_common);
|
||||
failed+= strcollsp(&my_charset_cp932_japanese_ci, strcoll_mb1_A1_bad_F9FE);
|
||||
failed+= strcollsp(&my_charset_cp932_bin, strcoll_mb2_common);
|
||||
failed+= strcollsp(&my_charset_cp932_bin, strcoll_mb1_A1_bad_F9FE);
|
||||
failed+= strcollsp(&my_charset_cp932_japanese_ci, strcoll_8181_A1_E0E0);
|
||||
failed+= strcollsp(&my_charset_cp932_bin, strcoll_8181_A1_E0E0);
|
||||
#endif
|
||||
#ifdef HAVE_CHARSET_euckr
|
||||
failed+= strcollsp(&my_charset_euckr_korean_ci, strcoll_mb2_common);
|
||||
failed+= strcollsp(&my_charset_euckr_korean_ci, strcoll_mb2_A1A1_mb2_F9FE);
|
||||
failed+= strcollsp(&my_charset_euckr_bin, strcoll_mb2_common);
|
||||
failed+= strcollsp(&my_charset_euckr_bin, strcoll_mb2_A1A1_mb2_F9FE);
|
||||
#endif
|
||||
#ifdef HAVE_CHARSET_gb2312
|
||||
failed+= strcollsp(&my_charset_gb2312_chinese_ci, strcoll_mb2_common);
|
||||
failed+= strcollsp(&my_charset_gb2312_chinese_ci, strcoll_mb2_A1A1_bad_F9FE);
|
||||
failed+= strcollsp(&my_charset_gb2312_bin, strcoll_mb2_common);
|
||||
failed+= strcollsp(&my_charset_gb2312_bin, strcoll_mb2_A1A1_bad_F9FE);
|
||||
#endif
|
||||
#ifdef HAVE_CHARSET_gbk
|
||||
failed+= strcollsp(&my_charset_gbk_chinese_ci, strcoll_mb2_common);
|
||||
failed+= strcollsp(&my_charset_gbk_chinese_ci, strcoll_mb2_A1A1_mb2_F9FE);
|
||||
failed+= strcollsp(&my_charset_gbk_bin, strcoll_mb2_common);
|
||||
failed+= strcollsp(&my_charset_gbk_bin, strcoll_mb2_A1A1_mb2_F9FE);
|
||||
#endif
|
||||
#ifdef HAVE_CHARSET_sjis
|
||||
failed+= strcollsp(&my_charset_sjis_japanese_ci, strcoll_mb2_common);
|
||||
failed+= strcollsp(&my_charset_sjis_bin, strcoll_mb2_common);
|
||||
failed+= strcollsp(&my_charset_sjis_japanese_ci, strcoll_mb1_A1_bad_F9FE);
|
||||
failed+= strcollsp(&my_charset_sjis_bin, strcoll_mb1_A1_bad_F9FE);
|
||||
failed+= strcollsp(&my_charset_sjis_japanese_ci, strcoll_8181_A1_E0E0);
|
||||
failed+= strcollsp(&my_charset_sjis_bin, strcoll_8181_A1_E0E0);
|
||||
#endif
|
||||
return failed;
|
||||
}
|
||||
|
||||
|
||||
int main()
|
||||
{
|
||||
size_t i, failed= 0;
|
||||
|
||||
plan(1);
|
||||
plan(2);
|
||||
diag("Testing my_like_range_xxx() functions");
|
||||
|
||||
for (i= 0; i < array_elements(charset_list); i++)
|
||||
|
@ -112,5 +462,10 @@ int main()
|
|||
}
|
||||
}
|
||||
ok(failed == 0, "Testing my_like_range_xxx() functions");
|
||||
|
||||
diag("Testing cs->coll->strnncollsp()");
|
||||
failed= test_strcollsp();
|
||||
ok(failed == 0, "Testing cs->coll->strnncollsp()");
|
||||
|
||||
return exit_status();
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue