MDEV-8214 Asian MB2 charsets: compare broken bytes as "greater than any non-broken character"

This commit is contained in:
Alexander Barkov 2015-06-26 13:40:28 +04:00
parent d535728165
commit 4f828a1cac
9 changed files with 830 additions and 373 deletions

View file

@ -49,6 +49,7 @@
#define big5tail(e) ((uchar)(e&0xff))
#define MY_FUNCTION_NAME(x) my_ ## x ## _big5
#define IS_MB1_CHAR(x) ((uchar) (x) < 0x80)
#define IS_MB2_CHAR(x,y) (isbig5head(x) && isbig5tail(y))
#define DEFINE_ASIAN_ROUTINES
#include "ctype-mb.ic"
@ -849,89 +850,6 @@ static uint16 big5strokexfrm(uint16 i)
}
static int my_strnncoll_big5_internal(const uchar **a_res,
const uchar **b_res, size_t length)
{
const uchar *a= *a_res, *b= *b_res;
while (length--)
{
if ((length > 0) && isbig5code(*a,*(a+1)) && isbig5code(*b, *(b+1)))
{
if (*a != *b || *(a+1) != *(b+1))
return ((int) big5code(*a,*(a+1)) -
(int) big5code(*b,*(b+1)));
a+= 2;
b+= 2;
length--;
}
else if (sort_order_big5[*a++] !=
sort_order_big5[*b++])
return ((int) sort_order_big5[a[-1]] -
(int) sort_order_big5[b[-1]]);
}
*a_res= a;
*b_res= b;
return 0;
}
/* Compare strings */
static int my_strnncoll_big5(CHARSET_INFO *cs __attribute__((unused)),
const uchar *a, size_t a_length,
const uchar *b, size_t b_length,
my_bool b_is_prefix)
{
size_t length= MY_MIN(a_length, b_length);
int res= my_strnncoll_big5_internal(&a, &b, length);
return res ? res : (int)((b_is_prefix ? length : a_length) - b_length);
}
/* compare strings, ignore end space */
static int my_strnncollsp_big5(CHARSET_INFO * cs __attribute__((unused)),
const uchar *a, size_t a_length,
const uchar *b, size_t b_length,
my_bool diff_if_only_endspace_difference)
{
size_t length= MY_MIN(a_length, b_length);
int res= my_strnncoll_big5_internal(&a, &b, length);
#ifndef VARCHAR_WITH_DIFF_ENDSPACE_ARE_DIFFERENT_FOR_UNIQUE
diff_if_only_endspace_difference= 0;
#endif
if (!res && a_length != b_length)
{
const uchar *end;
int swap= 1;
if (diff_if_only_endspace_difference)
res= 1; /* Assume 'a' is bigger */
/*
Check the next not space character of the longer key. If it's < ' ',
then it's smaller than the other key.
*/
if (a_length < b_length)
{
/* put longer key in a */
a_length= b_length;
a= b;
swap= -1; /* swap sign of result */
res= -res;
}
for (end= a + a_length-length; a < end ; a++)
{
if (*a != ' ')
return (*a < ' ') ? -swap : swap;
}
}
return res;
}
static size_t
my_strnxfrm_big5(CHARSET_INFO *cs,
uchar *dst, size_t dstlen, uint nweights,
@ -6853,11 +6771,23 @@ my_mb_wc_big5(CHARSET_INFO *cs __attribute__((unused)),
}
static MY_COLLATION_HANDLER my_collation_big5_chinese_ci_handler =
#define MY_FUNCTION_NAME(x) my_ ## x ## _big5_chinese_ci
#define WEIGHT_MB1(x) (sort_order_big5[(uchar) (x)])
#define WEIGHT_MB2(x,y) (big5code(x, y))
#include "strcoll.ic"
#define MY_FUNCTION_NAME(x) my_ ## x ## _big5_bin
#define WEIGHT_MB1(x) ((uchar) (x))
#define WEIGHT_MB2(x,y) (big5code(x, y))
#include "strcoll.ic"
static MY_COLLATION_HANDLER my_collation_handler_big5_chinese_ci=
{
NULL, /* init */
my_strnncoll_big5,
my_strnncollsp_big5,
my_strnncoll_big5_chinese_ci,
my_strnncollsp_big5_chinese_ci,
my_strnxfrm_big5,
my_strnxfrmlen_simple,
my_like_range_mb,
@ -6868,6 +6798,23 @@ static MY_COLLATION_HANDLER my_collation_big5_chinese_ci_handler =
my_propagate_simple
};
static MY_COLLATION_HANDLER my_collation_handler_big5_bin=
{
NULL, /* init */
my_strnncoll_big5_bin,
my_strnncollsp_big5_bin,
my_strnxfrm_mb,
my_strnxfrmlen_simple,
my_like_range_mb,
my_wildcmp_mb_bin,
my_strcasecmp_mb_bin,
my_instr_mb,
my_hash_sort_mb_bin,
my_propagate_simple
};
static MY_CHARSET_HANDLER my_charset_big5_handler=
{
NULL, /* init */
@ -6931,7 +6878,7 @@ struct charset_info_st my_charset_big5_chinese_ci=
1, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_big5_handler,
&my_collation_big5_chinese_ci_handler
&my_collation_handler_big5_chinese_ci
};
@ -6964,7 +6911,7 @@ struct charset_info_st my_charset_big5_bin=
1, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_big5_handler,
&my_collation_mb_bin_handler
&my_collation_handler_big5_bin
};

View file

@ -185,6 +185,7 @@ static const uchar sort_order_cp932[]=
#define MY_FUNCTION_NAME(x) my_ ## x ## _cp932
#define IS_8BIT_CHAR(x) iscp932kata(x)
#define IS_MB1_CHAR(x) ((uchar) (x) < 0x80 || iscp932kata(x))
#define IS_MB2_CHAR(x,y) (iscp932head(x) && iscp932tail(y))
#define DEFINE_ASIAN_ROUTINES
#include "ctype-mb.ic"
@ -1717,90 +1718,6 @@ MY_UNICASE_INFO my_caseinfo_cp932=
my_caseinfo_pages_cp932
};
static int my_strnncoll_cp932_internal(CHARSET_INFO *cs,
const uchar **a_res, size_t a_length,
const uchar **b_res, size_t b_length)
{
const uchar *a= *a_res, *b= *b_res;
const uchar *a_end= a + a_length;
const uchar *b_end= b + b_length;
while (a < a_end && b < b_end)
{
if (ismbchar_cp932(cs,(char*) a, (char*) a_end) &&
ismbchar_cp932(cs,(char*) b, (char*) b_end))
{
uint a_char= cp932code(*a, *(a+1));
uint b_char= cp932code(*b, *(b+1));
if (a_char != b_char)
return a_char - b_char;
a += 2;
b += 2;
} else
{
if (sort_order_cp932[(uchar)*a] != sort_order_cp932[(uchar)*b])
return sort_order_cp932[(uchar)*a] - sort_order_cp932[(uchar)*b];
a++;
b++;
}
}
*a_res= a;
*b_res= b;
return 0;
}
static int my_strnncoll_cp932(CHARSET_INFO *cs __attribute__((unused)),
const uchar *a, size_t a_length,
const uchar *b, size_t b_length,
my_bool b_is_prefix)
{
int res= my_strnncoll_cp932_internal(cs, &a, a_length, &b, b_length);
if (b_is_prefix && a_length > b_length)
a_length= b_length;
return res ? res : (int) (a_length - b_length);
}
static int my_strnncollsp_cp932(CHARSET_INFO *cs __attribute__((unused)),
const uchar *a, size_t a_length,
const uchar *b, size_t b_length,
my_bool diff_if_only_endspace_difference
__attribute__((unused)))
{
const uchar *a_end= a + a_length;
const uchar *b_end= b + b_length;
int res= my_strnncoll_cp932_internal(cs, &a, a_length, &b, b_length);
#ifndef VARCHAR_WITH_DIFF_ENDSPACE_ARE_DIFFERENT_FOR_UNIQUE
diff_if_only_endspace_difference= 0;
#endif
if (!res && (a != a_end || b != b_end))
{
int swap= 1;
if (diff_if_only_endspace_difference)
res= 1; /* Assume 'a' is bigger */
/*
Check the next not space character of the longer key. If it's < ' ',
then it's smaller than the other key.
*/
if (a == a_end)
{
/* put shorter key in a */
a_end= b_end;
a= b;
swap= -1; /* swap sign of result */
res= -res;
}
for (; a < a_end ; a++)
{
if (*a != (uchar) ' ')
return (*a < (uchar) ' ') ? -swap : swap;
}
}
return res;
}
static const uint16 cp932_to_unicode[65536]=
{
@ -34720,15 +34637,36 @@ size_t my_numcells_cp932(CHARSET_INFO *cs __attribute__((unused)),
}
static MY_COLLATION_HANDLER my_collation_ci_handler =
/*
cp932_chinese_ci and cp932_bin sort character blocks in this order:
1. [00..7F] - 7BIT characters (ASCII)
2. [81..9F][40..7E,80..FC] - MB2 characters, part1
3. [A1..DF] - 8BIT characters (Kana)
4. [E0..FC][40..7E,80..FC] - MB2 characters, part2
*/
#define MY_FUNCTION_NAME(x) my_ ## x ## _cp932_japanese_ci
#define WEIGHT_PAD_SPACE (256 * (int) ' ')
#define WEIGHT_MB1(x) (256 * (int) sort_order_cp932[(uchar) (x)])
#define WEIGHT_MB2(x,y) (cp932code(x, y))
#include "strcoll.ic"
#define MY_FUNCTION_NAME(x) my_ ## x ## _cp932_bin
#define WEIGHT_PAD_SPACE (256 * (int) ' ')
#define WEIGHT_MB1(x) (256 * (int) (uchar) (x))
#define WEIGHT_MB2(x,y) (cp932code(x, y))
#include "strcoll.ic"
static MY_COLLATION_HANDLER my_collation_handler_cp932_japanese_ci=
{
NULL, /* init */
my_strnncoll_cp932,
my_strnncollsp_cp932,
NULL, /* init */
my_strnncoll_cp932_japanese_ci,
my_strnncollsp_cp932_japanese_ci,
my_strnxfrm_mb,
my_strnxfrmlen_simple,
my_like_range_mb,
my_wildcmp_mb, /* wildcmp */
my_wildcmp_mb,
my_strcasecmp_8bit,
my_instr_mb,
my_hash_sort_simple,
@ -34736,6 +34674,22 @@ static MY_COLLATION_HANDLER my_collation_ci_handler =
};
static MY_COLLATION_HANDLER my_collation_handler_cp932_bin=
{
NULL, /* init */
my_strnncoll_cp932_bin,
my_strnncollsp_cp932_bin,
my_strnxfrm_mb,
my_strnxfrmlen_simple,
my_like_range_mb,
my_wildcmp_mb_bin,
my_strcasecmp_mb_bin,
my_instr_mb,
my_hash_sort_mb_bin,
my_propagate_simple
};
static MY_CHARSET_HANDLER my_charset_handler=
{
NULL, /* init */
@ -34800,7 +34754,7 @@ struct charset_info_st my_charset_cp932_japanese_ci=
1, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_handler,
&my_collation_ci_handler
&my_collation_handler_cp932_japanese_ci
};
struct charset_info_st my_charset_cp932_bin=
@ -34832,7 +34786,7 @@ struct charset_info_st my_charset_cp932_bin=
1, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_handler,
&my_collation_mb_bin_handler
&my_collation_handler_cp932_bin
};
#endif

View file

@ -201,8 +201,10 @@ static const uchar sort_order_euc_kr[]=
iseuc_kr_tail2(c) || \
iseuc_kr_tail3(c))
#define euckrcode(c,d) (((uchar)(c) <<8) | (uchar)(d))
#define MY_FUNCTION_NAME(x) my_ ## x ## _euckr
#define IS_MB1_CHAR(x) ((uchar) (x) < 0x80)
#define IS_MB2_CHAR(x,y) (iseuc_kr_head(x) && iseuc_kr_tail(y))
#define DEFINE_ASIAN_ROUTINES
#include "ctype-mb.ic"
@ -9938,21 +9940,50 @@ my_mb_wc_euc_kr(CHARSET_INFO *cs __attribute__((unused)),
}
static MY_COLLATION_HANDLER my_collation_ci_handler =
#define MY_FUNCTION_NAME(x) my_ ## x ## _euckr_korean_ci
#define WEIGHT_MB1(x) (sort_order_euc_kr[(uchar) (x)])
#define WEIGHT_MB2(x,y) (euckrcode(x, y))
#include "strcoll.ic"
#define MY_FUNCTION_NAME(x) my_ ## x ## _euckr_bin
#define WEIGHT_MB1(x) ((uchar) (x))
#define WEIGHT_MB2(x,y) (euckrcode(x, y))
#include "strcoll.ic"
static MY_COLLATION_HANDLER my_collation_handler_euckr_korean_ci=
{
NULL, /* init */
my_strnncoll_simple, /* strnncoll */
my_strnncollsp_simple,
my_strnxfrm_mb, /* strnxfrm */
NULL, /* init */
my_strnncoll_euckr_korean_ci,
my_strnncollsp_euckr_korean_ci,
my_strnxfrm_mb,
my_strnxfrmlen_simple,
my_like_range_mb, /* like_range */
my_wildcmp_mb, /* wildcmp */
my_like_range_mb,
my_wildcmp_mb,
my_strcasecmp_mb,
my_instr_mb,
my_hash_sort_simple,
my_propagate_simple
};
static MY_COLLATION_HANDLER my_collation_handler_euckr_bin=
{
NULL, /* init */
my_strnncoll_euckr_bin,
my_strnncollsp_euckr_bin,
my_strnxfrm_mb,
my_strnxfrmlen_simple,
my_like_range_mb,
my_wildcmp_mb_bin,
my_strcasecmp_mb_bin,
my_instr_mb,
my_hash_sort_mb_bin,
my_propagate_simple
};
static MY_CHARSET_HANDLER my_charset_handler=
{
NULL, /* init */
@ -10017,7 +10048,7 @@ struct charset_info_st my_charset_euckr_korean_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_handler,
&my_collation_ci_handler
&my_collation_handler_euckr_korean_ci
};
@ -10050,7 +10081,7 @@ struct charset_info_st my_charset_euckr_bin=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_handler,
&my_collation_mb_bin_handler
&my_collation_handler_euckr_bin
};
#endif

View file

@ -163,9 +163,11 @@ static const uchar sort_order_gb2312[]=
#define isgb2312head(c) (0xa1<=(uchar)(c) && (uchar)(c)<=0xf7)
#define isgb2312tail(c) (0xa1<=(uchar)(c) && (uchar)(c)<=0xfe)
#define gb2312code(c,d) (((uchar)(c) <<8) | (uchar)(d))
#define MY_FUNCTION_NAME(x) my_ ## x ## _gb2312
#define IS_MB1_CHAR(x) ((uchar) (x) < 0x80)
#define IS_MB2_CHAR(x,y) (isgb2312head(x) && isgb2312tail(y))
#define DEFINE_ASIAN_ROUTINES
#include "ctype-mb.ic"
@ -6341,11 +6343,23 @@ my_mb_wc_gb2312(CHARSET_INFO *cs __attribute__((unused)),
}
static MY_COLLATION_HANDLER my_collation_ci_handler =
#define MY_FUNCTION_NAME(x) my_ ## x ## _gb2312_chinese_ci
#define WEIGHT_MB1(x) (sort_order_gb2312[(uchar) (x)])
#define WEIGHT_MB2(x,y) (gb2312code(x, y))
#include "strcoll.ic"
#define MY_FUNCTION_NAME(x) my_ ## x ## _gb2312_bin
#define WEIGHT_MB1(x) ((uchar) (x))
#define WEIGHT_MB2(x,y) (gb2312code(x, y))
#include "strcoll.ic"
static MY_COLLATION_HANDLER my_collation_handler_gb2312_chinese_ci=
{
NULL, /* init */
my_strnncoll_simple, /* strnncoll */
my_strnncollsp_simple,
NULL, /* init */
my_strnncoll_gb2312_chinese_ci,
my_strnncollsp_gb2312_chinese_ci,
my_strnxfrm_mb, /* strnxfrm */
my_strnxfrmlen_simple,
my_like_range_mb, /* like_range */
@ -6356,6 +6370,24 @@ static MY_COLLATION_HANDLER my_collation_ci_handler =
my_propagate_simple
};
static MY_COLLATION_HANDLER my_collation_handler_gb2312_bin=
{
NULL, /* init */
my_strnncoll_gb2312_bin,
my_strnncollsp_gb2312_bin,
my_strnxfrm_mb,
my_strnxfrmlen_simple,
my_like_range_mb,
my_wildcmp_mb_bin,
my_strcasecmp_mb_bin,
my_instr_mb,
my_hash_sort_mb_bin,
my_propagate_simple
};
static MY_CHARSET_HANDLER my_charset_handler=
{
NULL, /* init */
@ -6420,9 +6452,10 @@ struct charset_info_st my_charset_gb2312_chinese_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_handler,
&my_collation_ci_handler
&my_collation_handler_gb2312_chinese_ci
};
struct charset_info_st my_charset_gb2312_bin=
{
86,0,0, /* number */
@ -6452,7 +6485,7 @@ struct charset_info_st my_charset_gb2312_bin=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_handler,
&my_collation_mb_bin_handler
&my_collation_handler_gb2312_bin
};
#endif

View file

@ -44,6 +44,7 @@
#define gbktail(e) ((uchar)(e&0xff))
#define MY_FUNCTION_NAME(x) my_ ## x ## _gbk
#define IS_MB1_CHAR(x) ((uchar) (x) < 0x80)
#define IS_MB2_CHAR(x,y) (isgbkhead(x) && isgbktail(y))
#define DEFINE_ASIAN_ROUTINES
#include "ctype-mb.ic"
@ -3450,87 +3451,6 @@ static uint16 gbksortorder(uint16 i)
}
int my_strnncoll_gbk_internal(const uchar **a_res, const uchar **b_res,
size_t length)
{
const uchar *a= *a_res, *b= *b_res;
uint a_char,b_char;
while (length--)
{
if ((length > 0) && isgbkcode(*a,*(a+1)) && isgbkcode(*b, *(b+1)))
{
a_char= gbkcode(*a,*(a+1));
b_char= gbkcode(*b,*(b+1));
if (a_char != b_char)
return ((int) gbksortorder((uint16) a_char) -
(int) gbksortorder((uint16) b_char));
a+= 2;
b+= 2;
length--;
}
else if (sort_order_gbk[*a++] != sort_order_gbk[*b++])
return ((int) sort_order_gbk[a[-1]] -
(int) sort_order_gbk[b[-1]]);
}
*a_res= a;
*b_res= b;
return 0;
}
int my_strnncoll_gbk(CHARSET_INFO *cs __attribute__((unused)),
const uchar *a, size_t a_length,
const uchar *b, size_t b_length,
my_bool b_is_prefix)
{
size_t length= MY_MIN(a_length, b_length);
int res= my_strnncoll_gbk_internal(&a, &b, length);
return res ? res : (int) ((b_is_prefix ? length : a_length) - b_length);
}
static int my_strnncollsp_gbk(CHARSET_INFO * cs __attribute__((unused)),
const uchar *a, size_t a_length,
const uchar *b, size_t b_length,
my_bool diff_if_only_endspace_difference)
{
size_t length= MY_MIN(a_length, b_length);
int res= my_strnncoll_gbk_internal(&a, &b, length);
#ifndef VARCHAR_WITH_DIFF_ENDSPACE_ARE_DIFFERENT_FOR_UNIQUE
diff_if_only_endspace_difference= 0;
#endif
if (!res && a_length != b_length)
{
const uchar *end;
int swap= 1;
if (diff_if_only_endspace_difference)
res= 1; /* Assume 'a' is bigger */
/*
Check the next not space character of the longer key. If it's < ' ',
then it's smaller than the other key.
*/
if (a_length < b_length)
{
/* put shorter key in a */
a_length= b_length;
a= b;
swap= -1; /* swap sign of result */
res= -res;
}
for (end= a + a_length-length; a < end ; a++)
{
if (*a != ' ')
return (*a < ' ') ? -swap : swap;
}
}
return res;
}
static size_t
my_strnxfrm_gbk(CHARSET_INFO *cs,
uchar *dst, size_t dstlen, uint nweights,
@ -10735,11 +10655,23 @@ my_mb_wc_gbk(CHARSET_INFO *cs __attribute__((unused)),
}
static MY_COLLATION_HANDLER my_collation_ci_handler =
#define MY_FUNCTION_NAME(x) my_ ## x ## _gbk_chinese_ci
#define WEIGHT_MB1(x) (sort_order_gbk[(uchar) (x)])
#define WEIGHT_MB2(x,y) (gbksortorder(gbkcode(x,y)))
#include "strcoll.ic"
#define MY_FUNCTION_NAME(x) my_ ## x ## _gbk_bin
#define WEIGHT_MB1(x) ((uchar) (x))
#define WEIGHT_MB2(x,y) (gbkcode(x,y))
#include "strcoll.ic"
static MY_COLLATION_HANDLER my_collation_handler_gbk_chinese_ci=
{
NULL, /* init */
my_strnncoll_gbk,
my_strnncollsp_gbk,
NULL, /* init */
my_strnncoll_gbk_chinese_ci,
my_strnncollsp_gbk_chinese_ci,
my_strnxfrm_gbk,
my_strnxfrmlen_simple,
my_like_range_mb,
@ -10750,6 +10682,24 @@ static MY_COLLATION_HANDLER my_collation_ci_handler =
my_propagate_simple
};
static MY_COLLATION_HANDLER my_collation_handler_gbk_bin=
{
NULL, /* init */
my_strnncoll_gbk_bin,
my_strnncollsp_gbk_bin,
my_strnxfrm_mb,
my_strnxfrmlen_simple,
my_like_range_mb,
my_wildcmp_mb_bin,
my_strcasecmp_mb_bin,
my_instr_mb,
my_hash_sort_mb_bin,
my_propagate_simple
};
static MY_CHARSET_HANDLER my_charset_handler=
{
NULL, /* init */
@ -10814,7 +10764,7 @@ struct charset_info_st my_charset_gbk_chinese_ci=
1, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_handler,
&my_collation_ci_handler
&my_collation_handler_gbk_chinese_ci
};
struct charset_info_st my_charset_gbk_bin=
@ -10846,7 +10796,7 @@ struct charset_info_st my_charset_gbk_bin=
1, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_handler,
&my_collation_mb_bin_handler
&my_collation_handler_gbk_bin
};

View file

@ -256,3 +256,5 @@ MY_FUNCTION_NAME(well_formed_char_length)(CHARSET_INFO *cs __attribute__((unused
return nchars0 - nchars;
}
#endif /* DEFINE_WELL_FORMED_CHAR_LENGTH_USING_CHARLEN */
#undef MY_FUNCTION_NAME

View file

@ -186,6 +186,7 @@ static const uchar sort_order_sjis[]=
#define MY_FUNCTION_NAME(x) my_ ## x ## _sjis
#define IS_8BIT_CHAR(x) issjiskata(x)
#define IS_MB1_CHAR(x) ((uchar) (x) < 0x80 || issjiskata(x))
#define IS_MB2_CHAR(x,y) (issjishead(x) && issjistail(y))
#define DEFINE_ASIAN_ROUTINES
#include "ctype-mb.ic"
@ -1088,90 +1089,6 @@ static MY_UNICASE_INFO my_caseinfo_sjis=
};
static int my_strnncoll_sjis_internal(CHARSET_INFO *cs,
const uchar **a_res, size_t a_length,
const uchar **b_res, size_t b_length)
{
const uchar *a= *a_res, *b= *b_res;
const uchar *a_end= a + a_length;
const uchar *b_end= b + b_length;
while (a < a_end && b < b_end)
{
if (ismbchar_sjis(cs,(char*) a, (char*) a_end) &&
ismbchar_sjis(cs,(char*) b, (char*) b_end))
{
uint a_char= sjiscode(*a, *(a+1));
uint b_char= sjiscode(*b, *(b+1));
if (a_char != b_char)
return (int) a_char - (int) b_char;
a += 2;
b += 2;
} else
{
if (sort_order_sjis[(uchar)*a] != sort_order_sjis[(uchar)*b])
return sort_order_sjis[(uchar)*a] - sort_order_sjis[(uchar)*b];
a++;
b++;
}
}
*a_res= a;
*b_res= b;
return 0;
}
static int my_strnncoll_sjis(CHARSET_INFO *cs __attribute__((unused)),
const uchar *a, size_t a_length,
const uchar *b, size_t b_length,
my_bool b_is_prefix)
{
int res= my_strnncoll_sjis_internal(cs, &a, a_length, &b, b_length);
if (b_is_prefix && a_length > b_length)
a_length= b_length;
return res ? res : (int) (a_length - b_length);
}
static int my_strnncollsp_sjis(CHARSET_INFO *cs __attribute__((unused)),
const uchar *a, size_t a_length,
const uchar *b, size_t b_length,
my_bool diff_if_only_endspace_difference)
{
const uchar *a_end= a + a_length, *b_end= b + b_length;
int res= my_strnncoll_sjis_internal(cs, &a, a_length, &b, b_length);
#ifndef VARCHAR_WITH_DIFF_ENDSPACE_ARE_DIFFERENT_FOR_UNIQUE
diff_if_only_endspace_difference= 0;
#endif
if (!res && (a != a_end || b != b_end))
{
int swap= 1;
if (diff_if_only_endspace_difference)
res= 1; /* Assume 'a' is bigger */
/*
Check the next not space character of the longer key. If it's < ' ',
then it's smaller than the other key.
*/
if (a == a_end)
{
/* put shorter key in a */
a_end= b_end;
a= b;
swap= -1; /* swap sign of result */
res= -res;
}
for (; a < a_end ; a++)
{
if (*a != ' ')
return (*a < ' ') ? -swap : swap;
}
}
return res;
}
/* SJIS->Unicode conversion table */
static uint16 sjis_to_unicode[65536]=
{
@ -34099,15 +34016,36 @@ size_t my_numcells_sjis(CHARSET_INFO *cs __attribute__((unused)),
}
static MY_COLLATION_HANDLER my_collation_ci_handler =
/*
sjis_chinese_ci and sjis_bin sort character blocks in this order:
1. [00..7F] - 7BIT characters (ASCII)
2. [81..9F][40..7E,80..FC] - MB2 characters, part1
3. [A1..DF] - 8BIT characters (Kana)
4. [E0..FC][40..7E,80..FC] - MB2 characters, part2
*/
#define MY_FUNCTION_NAME(x) my_ ## x ## _sjis_japanese_ci
#define WEIGHT_PAD_SPACE (256 * (int) ' ')
#define WEIGHT_MB1(x) (256 * (int) sort_order_sjis[(uchar) (x)])
#define WEIGHT_MB2(x,y) (sjiscode(x, y))
#include "strcoll.ic"
#define MY_FUNCTION_NAME(x) my_ ## x ## _sjis_bin
#define WEIGHT_PAD_SPACE (256 * (int) ' ')
#define WEIGHT_MB1(x) (256 * (int) (uchar) (x))
#define WEIGHT_MB2(x,y) (sjiscode(x, y))
#include "strcoll.ic"
static MY_COLLATION_HANDLER my_collation_handler_sjis_japanese_ci=
{
NULL, /* init */
my_strnncoll_sjis,
my_strnncollsp_sjis,
NULL, /* init */
my_strnncoll_sjis_japanese_ci,
my_strnncollsp_sjis_japanese_ci,
my_strnxfrm_mb,
my_strnxfrmlen_simple,
my_like_range_mb,
my_wildcmp_mb, /* wildcmp */
my_wildcmp_mb,
my_strcasecmp_8bit,
my_instr_mb,
my_hash_sort_simple,
@ -34115,6 +34053,22 @@ static MY_COLLATION_HANDLER my_collation_ci_handler =
};
static MY_COLLATION_HANDLER my_collation_handler_sjis_bin=
{
NULL, /* init */
my_strnncoll_sjis_bin,
my_strnncollsp_sjis_bin,
my_strnxfrm_mb,
my_strnxfrmlen_simple,
my_like_range_mb,
my_wildcmp_mb_bin,
my_strcasecmp_mb_bin,
my_instr_mb,
my_hash_sort_mb_bin,
my_propagate_simple
};
static MY_CHARSET_HANDLER my_charset_handler=
{
NULL, /* init */
@ -34179,7 +34133,7 @@ struct charset_info_st my_charset_sjis_japanese_ci=
1, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_handler,
&my_collation_ci_handler
&my_collation_handler_sjis_japanese_ci
};
struct charset_info_st my_charset_sjis_bin=
@ -34211,7 +34165,7 @@ struct charset_info_st my_charset_sjis_bin=
1, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_handler,
&my_collation_mb_bin_handler
&my_collation_handler_sjis_bin
};
#endif

231
strings/strcoll.ic Normal file
View file

@ -0,0 +1,231 @@
/*
Copyright (c) 2015, MariaDB Foundation
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef MY_FUNCTION_NAME
#error MY_FUNCTION_NAME is not defined
#endif
/*
The weight for automatically padded spaces when comparing strings with
the PAD SPACE property.
Should normally be equal to the weight of a regular space.
*/
#ifndef WEIGHT_PAD_SPACE
#define WEIGHT_PAD_SPACE (' ')
#endif
/*
Weight of an illegal byte, must follow these rules:
1. Must be greater than weight of any normal character in the collation.
2. Two different bad bytes must have different weights and must be
compared in their binary order.
Depends on mbmaxlen of the character set, as well as how the collation
sorts various single-byte and multi-byte character blocks.
The macro below is the default definition, it is suitable for mbmaxlen=2
character sets that sort all multi-byte characters after all single-byte
characters: big5, euckr, gb2312, gbk.
All mbmaxlen>2 character sets must provide their own definitions.
All collations that have a more complex order (than just MB1 followed by MB2)
must also provide their own definitions (see definitions for
cp932_japanese_ci and sjis_japanese_ci as examples of a more complex order).
*/
#ifndef WEIGHT_ILSEQ
#define WEIGHT_ILSEQ(x) (0xFF00 + (x))
#endif
/**
Scan a valid character, or a bad byte, or an auto-padded space
from a string and calculate the weight of the scanned sequence.
@param [OUT] weight - the weight is returned here
@param str - the string
@param end - the end of the string
@return - the number of bytes scanned
The including source file must define the following macros:
IS_MB1_CHAR(x)
IS_MB2_CHAR(x,y)
WEIGHT_PAD_SPACE
WEIGHT_MB1(x)
WEIGHT_MB2(x,y)
WEIGHT_ILSEQ(x)
*/
static inline uint
MY_FUNCTION_NAME(scan_weight)(int *weight, const uchar *str, const uchar *end)
{
if (str >= end)
{
*weight= WEIGHT_PAD_SPACE;
return 0;
}
if (IS_MB1_CHAR(*str))
{
*weight= WEIGHT_MB1(*str); /* A valid single byte character*/
return 1;
}
if (str + 2 > end) /* The string ended unexpectedly */
goto bad; /* Treat as a bad byte */
if (IS_MB2_CHAR(str[0], str[1]))
{
*weight= WEIGHT_MB2(str[0], str[1]);
return 2; /* A valid two-byte character */
}
bad:
*weight= WEIGHT_ILSEQ(str[0]); /* Bad byte */
return 1;
}
/**
Compare two strings according to the collation,
without handling the PAD SPACE property.
Note, cs->coll->strnncoll() is usually used to compare identifiers.
Perhaps we should eventually (in 10.2?) create a new collation
my_charset_utf8_general_ci_no_pad and have only one comparison function
in MY_COLLATION_HANDLER.
@param cs - the character set and collation
@param a - the left string
@param a_length - the length of the left string
@param b - the right string
@param b_length - the length of the right string
@param b_is_prefix - if the caller wants to check if "b" is a prefix of "a"
@return - the comparison result
*/
static int
MY_FUNCTION_NAME(strnncoll)(CHARSET_INFO *cs __attribute__((unused)),
const uchar *a, size_t a_length,
const uchar *b, size_t b_length,
my_bool b_is_prefix)
{
const uchar *a_end= a + a_length;
const uchar *b_end= b + b_length;
for ( ; ; )
{
int a_weight, b_weight, res;
uint a_wlen= MY_FUNCTION_NAME(scan_weight)(&a_weight, a, a_end);
uint b_wlen= MY_FUNCTION_NAME(scan_weight)(&b_weight, b, b_end);
/*
a_wlen b_wlen Comment
------ ------ -------
0 0 Strings ended simultaneously, "a" and "b" are equal.
0 >0 "a" is a prefix of "b", so "a" is smaller.
>0 0 "b" is a prefix of "a", check b_is_prefix.
>0 >0 Two weights were scanned, check weight difference.
*/
if (!a_wlen)
return b_wlen ? -b_weight : 0;
if (!b_wlen)
return b_is_prefix ? 0 : a_weight;
if ((res= (a_weight - b_weight)))
return res;
/*
None of the strings has ended yet.
*/
DBUG_ASSERT(a < a_end);
DBUG_ASSERT(b < b_end);
a+= a_wlen;
b+= b_wlen;
}
DBUG_ASSERT(0);
return 0;
}
/**
Compare two strings according to the collation, with PAD SPACE handling.
@param cs - the character set and collation
@param a - the left string
@param a_length - the length of the left string
@param b - the right string
@param b_length - the length of the right string
@param diff_if_only_endspace_difference - not used in the code.
TODO: this should be eventually removed (in 10.2?)
@return - the comparison result
*/
static int
MY_FUNCTION_NAME(strnncollsp)(CHARSET_INFO *cs __attribute__((unused)),
const uchar *a, size_t a_length,
const uchar *b, size_t b_length,
my_bool diff_if_only_endspace_difference
__attribute__((unused)))
{
const uchar *a_end= a + a_length;
const uchar *b_end= b + b_length;
for ( ; ; )
{
int a_weight, b_weight, res;
uint a_wlen= MY_FUNCTION_NAME(scan_weight)(&a_weight, a, a_end);
uint b_wlen= MY_FUNCTION_NAME(scan_weight)(&b_weight, b, b_end);
if ((res= (a_weight - b_weight)))
{
/*
Got two different weights. Each weight can be generated by either of:
- a real character
- a bad byte sequence or an incomplete byte sequence
- an auto-generated trailing space (PAD SPACE)
It does not matter how exactly each weight was generated.
Just return the weight difference.
*/
return res;
}
if (!a_wlen && !b_wlen)
{
/*
Got two auto-generated trailing spaces, i.e.
both strings have now ended, so they are equal.
*/
DBUG_ASSERT(a == a_end);
DBUG_ASSERT(b == b_end);
return 0;
}
/*
At least one of the strings has not ended yet, continue comparison.
*/
DBUG_ASSERT(a < a_end || b < b_end);
a+= a_wlen;
b+= b_wlen;
}
DBUG_ASSERT(0);
return 0;
}
/*
We usually include this file at least two times from the same source file,
for the _ci and the _bin collations. Prepare for the second inclusion.
*/
#undef MY_FUNCTION_NAME
#undef WEIGHT_ILSEQ
#undef WEIGHT_MB1
#undef WEIGHT_MB2
#undef WEIGHT_PAD_SPACE

View file

@ -95,11 +95,361 @@ static CHARSET_INFO *charset_list[]=
};
typedef struct
{
const char *a;
size_t alen;
const char *b;
size_t blen;
int res;
} STRNNCOLL_PARAM;
#define CSTR(x) (x),(sizeof(x)-1)
/*
Byte sequence types used in the tests:
8BIT - a 8 bit byte (>=00x80) which makes a single byte characters
MB2 - two bytes that make a valid character
H2 - a byte which is a valid MB2 head byte
T2 - a byte which is a valid MB2 tail byte
ILSEQ - a byte which makes an illegal sequence
H2+ILSEQ - a sequence that starts with a valid H2 byte,
but not followed by a valid T2 byte.
Charset H2 T2 8BIT
------- ---------------- --------------- --------
big5 [A1..F9] [40..7E,A1..FE]
euckr [81..FE] [41..5A,61..7A,81..FE]
gb2312 [A1..F7] [A1..FE]
gbk [81..FE] [40..7E,80..FE]
cp932 [81..9F,E0..FC] [40..7E,80..FC] [A1..DF]
sjis [81..9F,E0..FC] [40..7E,80..FC] [A1..DF]
Essential byte sequences in various character sets:
Sequence big5 cp932 euckr gb2312 gbk sjis
-------- ---- ----- ----- ------ --- ----
80 ILSEQ ILSEQ ILSEQ ILSEQ ILSEQ ILSEQ
81 ILSEQ H2 H2 ILSEQ H2 H2
A1 H2 8BIT H2 H2 H2 8BIT
A1A1 MB2 8BIT+8BIT MB2 MB2 MB2 8BIT+8BIT
E0E0 MB2 MB2 MB2 MB2 MB2 MB2
F9FE MB2 H2+ILSEQ MB2 ILSEQ+T2 MB2 H2+ILSEQ
*/
/*
For character sets that have the following byte sequences:
80 - ILSEQ
81 - ILSEQ or H2
F9 - ILSEQ or H2
A1A1 - MB2 or 8BIT+8BIT
E0E0 - MB2
*/
STRNNCOLL_PARAM strcoll_mb2_common[]=
{
/* Compare two good sequences */
{CSTR(""), CSTR(""), 0},
{CSTR(""), CSTR(" "), 0},
{CSTR(""), CSTR("A"), -1},
{CSTR(""), CSTR("a"), -1},
{CSTR(""), CSTR("\xA1\xA1"), -1},
{CSTR(""), CSTR("\xE0\xE0"), -1},
{CSTR(" "), CSTR(""), 0},
{CSTR(" "), CSTR(" "), 0},
{CSTR(" "), CSTR("A"), -1},
{CSTR(" "), CSTR("a"), -1},
{CSTR(" "), CSTR("\xA1\xA1"), -1},
{CSTR(" "), CSTR("\xE0\xE0"), -1},
{CSTR("a"), CSTR(""), 1},
{CSTR("a"), CSTR(" "), 1},
{CSTR("a"), CSTR("a"), 0},
{CSTR("a"), CSTR("\xA1\xA1"), -1},
{CSTR("a"), CSTR("\xE0\xE0"), -1},
{CSTR("\xA1\xA1"), CSTR("\xA1\xA1"), 0},
{CSTR("\xA1\xA1"), CSTR("\xE0\xE0"), -1},
/* Compare a good character to an illegal or an incomplete sequence */
{CSTR(""), CSTR("\x80"), -1},
{CSTR(""), CSTR("\x81"), -1},
{CSTR(""), CSTR("\xF9"), -1},
{CSTR(" "), CSTR("\x80"), -1},
{CSTR(" "), CSTR("\x81"), -1},
{CSTR(" "), CSTR("\xF9"), -1},
{CSTR("a"), CSTR("\x80"), -1},
{CSTR("a"), CSTR("\x81"), -1},
{CSTR("a"), CSTR("\xF9"), -1},
{CSTR("\xA1\xA1"), CSTR("\x80"), -1},
{CSTR("\xA1\xA1"), CSTR("\x81"), -1},
{CSTR("\xA1\xA1"), CSTR("\xF9"), -1},
{CSTR("\xE0\xE0"), CSTR("\x80"), -1},
{CSTR("\xE0\xE0"), CSTR("\x81"), -1},
{CSTR("\xE0\xE0"), CSTR("\xF9"), -1},
/* Compare two bad/incomplete sequences */
{CSTR("\x80"), CSTR("\x80"), 0},
{CSTR("\x80"), CSTR("\x81"), -1},
{CSTR("\x80"), CSTR("\xF9"), -1},
{CSTR("\x81"), CSTR("\x81"), 0},
{CSTR("\x81"), CSTR("\xF9"), -1},
{NULL, 0, NULL, 0, 0}
};
/*
For character sets that have good mb2 characters A1A1 and F9FE
*/
STRNNCOLL_PARAM strcoll_mb2_A1A1_mb2_F9FE[]=
{
/* Compare two good characters */
{CSTR(""), CSTR("\xF9\xFE"), -1},
{CSTR(" "), CSTR("\xF9\xFE"), -1},
{CSTR("a") , CSTR("\xF9\xFE"), -1},
{CSTR("\xA1\xA1"), CSTR("\xF9\xFE"), -1},
{CSTR("\xF9\xFE"), CSTR("\xF9\xFE"), 0},
/* Compare a good character to an illegal or an incomplete sequence */
{CSTR(""), CSTR("\xA1"), -1},
{CSTR(""), CSTR("\xF9"), -1},
{CSTR("a"), CSTR("\xA1"), -1},
{CSTR("a"), CSTR("\xF9"), -1},
{CSTR("\xA1\xA1"), CSTR("\xA1"), -1},
{CSTR("\xA1\xA1"), CSTR("\xF9"), -1},
{CSTR("\xF9\xFE"), CSTR("\x80"), -1},
{CSTR("\xF9\xFE"), CSTR("\x81"), -1},
{CSTR("\xF9\xFE"), CSTR("\xA1"), -1},
{CSTR("\xF9\xFE"), CSTR("\xF9"), -1},
/* Compare two bad/incomplete sequences */
{CSTR("\x80"), CSTR("\xA1"), -1},
{CSTR("\x80"), CSTR("\xF9"), -1},
{NULL, 0, NULL, 0, 0}
};
/*
For character sets that have:
A1A1 - a good mb2 character
F9FE - a bad sequence
*/
STRNNCOLL_PARAM strcoll_mb2_A1A1_bad_F9FE[]=
{
/* Compare a good character to an illegal or an incomplete sequence */
{CSTR(""), CSTR("\xF9\xFE"), -1},
{CSTR(" "), CSTR("\xF9\xFE"), -1},
{CSTR("a") , CSTR("\xF9\xFE"), -1},
{CSTR("\xA1\xA1"), CSTR("\xF9\xFE"), -1},
{CSTR(""), CSTR("\xA1"), -1},
{CSTR(""), CSTR("\xF9"), -1},
{CSTR("a"), CSTR("\xA1"), -1},
{CSTR("a"), CSTR("\xF9"), -1},
{CSTR("\xA1\xA1"), CSTR("\xA1"), -1},
{CSTR("\xA1\xA1"), CSTR("\xF9"), -1},
/* Compare two bad/incomplete sequences */
{CSTR("\xF9\xFE"), CSTR("\x80"), 1},
{CSTR("\xF9\xFE"), CSTR("\x81"), 1},
{CSTR("\xF9\xFE"), CSTR("\xA1"), 1},
{CSTR("\xF9\xFE"), CSTR("\xF9"), 1},
{CSTR("\x80"), CSTR("\xA1"), -1},
{CSTR("\x80"), CSTR("\xF9"), -1},
{CSTR("\xF9\xFE"), CSTR("\xF9\xFE"), 0},
{NULL, 0, NULL, 0, 0}
};
/*
For character sets that have:
80 - ILSEQ or H2
81 - ILSEQ or H2
A1 - 8BIT
F9 - ILSEQ or H2
F9FE - a bad sequence (ILSEQ+XX or H2+ILSEQ)
*/
STRNNCOLL_PARAM strcoll_mb1_A1_bad_F9FE[]=
{
/* Compare two good characters */
{CSTR(""), CSTR("\xA1"), -1},
{CSTR("\xA1\xA1"), CSTR("\xA1"), 1},
/* Compare a good character to an illegal or an incomplete sequence */
{CSTR(""), CSTR("\xF9"), -1},
{CSTR(""), CSTR("\xF9\xFE"), -1},
{CSTR(" "), CSTR("\xF9\xFE"), -1},
{CSTR("a"), CSTR("\xF9\xFE"), -1},
{CSTR("a"), CSTR("\xA1"), -1},
{CSTR("a"), CSTR("\xF9"), -1},
{CSTR("\xA1\xA1"), CSTR("\xF9"), -1},
{CSTR("\xA1\xA1"), CSTR("\xF9\xFE"), -1},
{CSTR("\xF9\xFE"), CSTR("\x80"), 1},
{CSTR("\xF9\xFE"), CSTR("\x81"), 1},
{CSTR("\xF9\xFE"), CSTR("\xA1"), 1},
{CSTR("\xF9\xFE"), CSTR("\xF9"), 1},
{CSTR("\x80"), CSTR("\xA1"), 1},
/* Compare two bad/incomplete sequences */
{CSTR("\x80"), CSTR("\xF9"), -1},
{CSTR("\xF9\xFE"), CSTR("\xF9\xFE"), 0},
{NULL, 0, NULL, 0, 0}
};
/*
For character sets (e.g. cp932 and sjis) that have:
8181 - a valid MB2 character
A1 - a valid 8BIT character
E0E0 - a valid MB2 character
and sort in this order:
8181 < A1 < E0E0
*/
STRNNCOLL_PARAM strcoll_8181_A1_E0E0[]=
{
{CSTR("\x81\x81"), CSTR("\xA1"), -1},
{CSTR("\x81\x81"), CSTR("\xE0\xE0"), -1},
{CSTR("\xA1"), CSTR("\xE0\xE0"), -1},
{NULL, 0, NULL, 0, 0}
};
static void
str2hex(char *dst, size_t dstlen, const char *src, size_t srclen)
{
char *dstend= dst + dstlen;
const char *srcend= src + srclen;
for (*dst= '\0' ; dst + 3 < dstend && src < srcend; )
{
sprintf(dst, "%02X", (unsigned char) src[0]);
dst+=2;
src++;
}
}
/*
Check if the two comparison result are semantically equal:
both are negative, both are positive, or both are zero.
*/
static int
eqres(int ares, int bres)
{
return (ares < 0 && bres < 0) ||
(ares > 0 && bres > 0) ||
(ares == 0 && bres == 0);
}
static int
strcollsp(CHARSET_INFO *cs, const STRNNCOLL_PARAM *param)
{
int failed= 0;
const STRNNCOLL_PARAM *p;
diag("%-20s %-10s %-10s %10s %10s", "Collation", "a", "b", "ExpectSign", "Actual");
for (p= param; p->a; p++)
{
char ahex[64], bhex[64];
int res= cs->coll->strnncollsp(cs, (uchar *) p->a, p->alen,
(uchar *) p->b, p->blen, 0);
str2hex(ahex, sizeof(ahex), p->a, p->alen);
str2hex(bhex, sizeof(bhex), p->b, p->blen);
diag("%-20s %-10s %-10s %10d %10d%s",
cs->name, ahex, bhex, p->res, res,
eqres(res, p->res) ? "" : " FAILED");
if (!eqres(res, p->res))
{
failed++;
}
else
{
/* Test in reverse order */
res= cs->coll->strnncollsp(cs, (uchar *) p->b, p->blen,
(uchar *) p->a, p->alen, 0);
if (!eqres(res, -p->res))
{
diag("Comparison in reverse order failed. Expected %d, got %d",
-p->res, res);
failed++;
}
}
}
return failed;
}
static int
test_strcollsp()
{
int failed= 0;
#ifdef HAVE_CHARSET_big5
failed+= strcollsp(&my_charset_big5_chinese_ci, strcoll_mb2_common);
failed+= strcollsp(&my_charset_big5_chinese_ci, strcoll_mb2_A1A1_mb2_F9FE);
failed+= strcollsp(&my_charset_big5_bin, strcoll_mb2_common);
failed+= strcollsp(&my_charset_big5_bin, strcoll_mb2_A1A1_mb2_F9FE);
#endif
#ifdef HAVE_CHARSET_cp932
failed+= strcollsp(&my_charset_cp932_japanese_ci, strcoll_mb2_common);
failed+= strcollsp(&my_charset_cp932_japanese_ci, strcoll_mb1_A1_bad_F9FE);
failed+= strcollsp(&my_charset_cp932_bin, strcoll_mb2_common);
failed+= strcollsp(&my_charset_cp932_bin, strcoll_mb1_A1_bad_F9FE);
failed+= strcollsp(&my_charset_cp932_japanese_ci, strcoll_8181_A1_E0E0);
failed+= strcollsp(&my_charset_cp932_bin, strcoll_8181_A1_E0E0);
#endif
#ifdef HAVE_CHARSET_euckr
failed+= strcollsp(&my_charset_euckr_korean_ci, strcoll_mb2_common);
failed+= strcollsp(&my_charset_euckr_korean_ci, strcoll_mb2_A1A1_mb2_F9FE);
failed+= strcollsp(&my_charset_euckr_bin, strcoll_mb2_common);
failed+= strcollsp(&my_charset_euckr_bin, strcoll_mb2_A1A1_mb2_F9FE);
#endif
#ifdef HAVE_CHARSET_gb2312
failed+= strcollsp(&my_charset_gb2312_chinese_ci, strcoll_mb2_common);
failed+= strcollsp(&my_charset_gb2312_chinese_ci, strcoll_mb2_A1A1_bad_F9FE);
failed+= strcollsp(&my_charset_gb2312_bin, strcoll_mb2_common);
failed+= strcollsp(&my_charset_gb2312_bin, strcoll_mb2_A1A1_bad_F9FE);
#endif
#ifdef HAVE_CHARSET_gbk
failed+= strcollsp(&my_charset_gbk_chinese_ci, strcoll_mb2_common);
failed+= strcollsp(&my_charset_gbk_chinese_ci, strcoll_mb2_A1A1_mb2_F9FE);
failed+= strcollsp(&my_charset_gbk_bin, strcoll_mb2_common);
failed+= strcollsp(&my_charset_gbk_bin, strcoll_mb2_A1A1_mb2_F9FE);
#endif
#ifdef HAVE_CHARSET_sjis
failed+= strcollsp(&my_charset_sjis_japanese_ci, strcoll_mb2_common);
failed+= strcollsp(&my_charset_sjis_bin, strcoll_mb2_common);
failed+= strcollsp(&my_charset_sjis_japanese_ci, strcoll_mb1_A1_bad_F9FE);
failed+= strcollsp(&my_charset_sjis_bin, strcoll_mb1_A1_bad_F9FE);
failed+= strcollsp(&my_charset_sjis_japanese_ci, strcoll_8181_A1_E0E0);
failed+= strcollsp(&my_charset_sjis_bin, strcoll_8181_A1_E0E0);
#endif
return failed;
}
int main()
{
size_t i, failed= 0;
plan(1);
plan(2);
diag("Testing my_like_range_xxx() functions");
for (i= 0; i < array_elements(charset_list); i++)
@ -112,5 +462,10 @@ int main()
}
}
ok(failed == 0, "Testing my_like_range_xxx() functions");
diag("Testing cs->coll->strnncollsp()");
failed= test_strcollsp();
ok(failed == 0, "Testing cs->coll->strnncollsp()");
return exit_status();
}