diff --git a/mysys/charset-def.c b/mysys/charset-def.c index f168556e3f7..92453aa0f59 100644 --- a/mysys/charset-def.c +++ b/mysys/charset-def.c @@ -62,6 +62,9 @@ extern CHARSET_INFO my_charset_utf8_slovak_uca_ci; extern CHARSET_INFO my_charset_utf8_spanish2_uca_ci; extern CHARSET_INFO my_charset_utf8_roman_uca_ci; extern CHARSET_INFO my_charset_utf8_persian_uca_ci; +#ifdef HAVE_CYBOZU_COLLATION +extern CHARSET_INFO my_charset_utf8_general_cs; +#endif #endif #endif /* HAVE_UCA_COLLATIONS */ @@ -156,6 +159,9 @@ my_bool init_compiled_charsets(myf flags __attribute__((unused))) #ifdef HAVE_CHARSET_utf8 add_compiled_collation(&my_charset_utf8_general_ci); add_compiled_collation(&my_charset_utf8_bin); +#ifdef HAVE_CYBOZU_COLLATION + add_compiled_collation(&my_charset_utf8_general_cs); +#endif #ifdef HAVE_UCA_COLLATIONS add_compiled_collation(&my_charset_utf8_general_uca_ci); add_compiled_collation(&my_charset_utf8_icelandic_uca_ci); diff --git a/sql/mysqld.cc b/sql/mysqld.cc index 31bf142c8ca..95de170b99d 100644 --- a/sql/mysqld.cc +++ b/sql/mysqld.cc @@ -4075,8 +4075,6 @@ errorconn: NullS); sql_perror(buff); } - my_security_attr_free(sa_event); - my_security_attr_free(sa_mapping); if (handle_client_file_map) CloseHandle(handle_client_file_map); if (handle_client_map) diff --git a/strings/ctype-big5.c b/strings/ctype-big5.c index f5ef82a7600..336b5f0e832 100644 --- a/strings/ctype-big5.c +++ b/strings/ctype-big5.c @@ -6300,11 +6300,7 @@ uint my_well_formed_len_big5(CHARSET_INFO *cs __attribute__((unused)), const char *emb= e - 1; /* Last possible end of an MB character */ while (pos && b < e) { - /* - Cast to int8 for extra safety. "char" can be unsigned - by default on some platforms. - */ - if (((int8)b[0]) >= 0) + if ((uchar) b[0] < 128) { /* Single byte ascii character */ b++; diff --git a/strings/ctype-sjis.c b/strings/ctype-sjis.c index b0e2d1fcb1a..9a54fe5595b 100644 --- a/strings/ctype-sjis.c +++ b/strings/ctype-sjis.c @@ -4591,12 +4591,7 @@ uint my_well_formed_len_sjis(CHARSET_INFO *cs __attribute__((unused)), const char *b0= b; while (pos && b < e) { - /* - Cast to int8 for extra safety. - "char" can be unsigned by default - on some platforms. - */ - if (((int8)b[0]) >= 0) + if ((uchar) b[0] < 128) { /* Single byte ascii character */ b++; diff --git a/strings/ctype-utf8.c b/strings/ctype-utf8.c index cfb169043dc..e8b7a5e9e81 100644 --- a/strings/ctype-utf8.c +++ b/strings/ctype-utf8.c @@ -2148,12 +2148,7 @@ int my_strcasecmp_utf8(CHARSET_INFO *cs, const char *s, const char *t) { my_wc_t s_wc,t_wc; - /* - Cast to int8 for extra safety. - char can be unsigned by default - on some platforms. - */ - if (((int8)s[0]) >= 0) + if ((uchar) s[0] < 128) { /* s[0] is between 0 and 127. @@ -2200,7 +2195,7 @@ int my_strcasecmp_utf8(CHARSET_INFO *cs, const char *s, const char *t) /* Do the same for the second string */ - if (((int8)t[0]) >= 0) + if ((uchar) t[0] < 128) { /* Convert single byte character into weight */ t_wc= plane00[(uchar) t[0]].tolower; @@ -2410,6 +2405,172 @@ CHARSET_INFO my_charset_utf8_bin= &my_collation_mb_bin_handler }; +#ifdef HAVE_CYBOZU_COLLATION + +/* + * These functions bacically do the same as their original, except + * that they return 0 only when two comparing unicode strings are + * strictly the same in case-sensitive way. See "save_diff" local + * variable to what they actually do. + */ + +static int my_strnncoll_utf8_cs(CHARSET_INFO *cs, + const uchar *s, uint slen, + const uchar *t, uint tlen, + my_bool t_is_prefix) +{ + int s_res,t_res; + my_wc_t s_wc,t_wc; + const uchar *se=s+slen; + const uchar *te=t+tlen; + int save_diff = 0; + int diff; + + while ( s < se && t < te ) + { + int plane; + s_res=my_utf8_uni(cs,&s_wc, s, se); + t_res=my_utf8_uni(cs,&t_wc, t, te); + + if ( s_res <= 0 || t_res <= 0 ) + + { + /* Incorrect string, compare by char value */ + return ((int)s[0]-(int)t[0]); + } + + if ( save_diff == 0 ) + { + save_diff = ((int)s_wc) - ((int)t_wc); + } + plane=(s_wc>>8) & 0xFF; + s_wc = uni_plane[plane] ? uni_plane[plane][s_wc & 0xFF].sort : s_wc; + plane=(t_wc>>8) & 0xFF; + t_wc = uni_plane[plane] ? uni_plane[plane][t_wc & 0xFF].sort : t_wc; + if ( s_wc != t_wc ) + { + return ((int) s_wc) - ((int) t_wc); + } + + s+=s_res; + t+=t_res; + } + diff = ( (se-s) - (te-t) ); + return t_is_prefix ? t-te : ((diff == 0) ? save_diff : diff); +} + +static int my_strnncollsp_utf8_cs(CHARSET_INFO *cs, + const uchar *s, uint slen, + const uchar *t, uint tlen) +{ + int s_res,t_res; + my_wc_t s_wc,t_wc; + const uchar *se= s+slen; + const uchar *te= t+tlen; + int save_diff = 0; + + while ( s < se && t < te ) + { + int plane; + s_res=my_utf8_uni(cs,&s_wc, s, se); + t_res=my_utf8_uni(cs,&t_wc, t, te); + + if ( s_res <= 0 || t_res <= 0 ) + { + /* Incorrect string, compare by char value */ + return ((int)s[0]-(int)t[0]); + } + + if ( save_diff == 0 ) + { + save_diff = ((int)s_wc) - ((int)t_wc); + } + plane=(s_wc>>8) & 0xFF; + s_wc = uni_plane[plane] ? uni_plane[plane][s_wc & 0xFF].sort : s_wc; + plane=(t_wc>>8) & 0xFF; + t_wc = uni_plane[plane] ? uni_plane[plane][t_wc & 0xFF].sort : t_wc; + if ( s_wc != t_wc ) + { + return ((int) s_wc) - ((int) t_wc); + } + + s+=s_res; + t+=t_res; + } + + slen= se-s; + tlen= te-t; + + if (slen != tlen) + { + int swap= 0; + if (slen < tlen) + { + slen= tlen; + s= t; + se= te; + swap= -1; + } + /* + This following loop uses the fact that in UTF-8 + all multibyte characters are greater than space, + and all multibyte head characters are greater than + space. It means if we meet a character greater + than space, it always means that the longer string + is greater. So we can reuse the same loop from the + 8bit version, without having to process full multibute + sequences. + */ + for ( ; s < se; s++) + { + if (*s != ' ') + return ((int)*s - (int) ' ') ^ swap; + } + } + return save_diff; +} + +static MY_COLLATION_HANDLER my_collation_cs_handler = +{ + NULL, /* init */ + my_strnncoll_utf8_cs, + my_strnncollsp_utf8_cs, + my_strnxfrm_utf8, + my_like_range_simple, + my_wildcmp_mb, + my_strcasecmp_utf8, + my_instr_mb, + my_hash_sort_utf8 +}; + +CHARSET_INFO my_charset_utf8_general_cs= +{ + 254,0,0, /* number */ + MY_CS_COMPILED|MY_CS_UNICODE, /* state */ + "utf8", /* cs name */ + "utf8_general_cs", /* name */ + "", /* comment */ + NULL, /* tailoring */ + ctype_utf8, /* ctype */ + to_lower_utf8, /* to_lower */ + to_upper_utf8, /* to_upper */ + to_upper_utf8, /* sort_order */ + NULL, /* contractions */ + NULL, /* sort_order_big*/ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + NULL, /* state_map */ + NULL, /* ident_map */ + 1, /* strxfrm_multiply */ + 1, /* mbminlen */ + 3, /* mbmaxlen */ + 0, /* min_sort_char */ + 255, /* max_sort_char */ + &my_charset_utf8_handler, + &my_collation_cs_handler +}; +#endif /* Cybozu Hack */ + #ifdef MY_TEST_UTF8 #include