mirror of
https://github.com/MariaDB/server.git
synced 2025-01-16 03:52:35 +01:00
merge
mysys/charset-def.c: Auto merged BitKeeper/deleted/.del-errmsg.txt~f96b7055cac394e: Auto merged sql/mysqld.cc: Auto merged strings/ctype-big5.c: Auto merged strings/ctype-sjis.c: Auto merged strings/ctype-utf8.c: Auto merged mysql-test/r/type_blob.result: after merge fix
This commit is contained in:
commit
26ff24f0f7
5 changed files with 176 additions and 20 deletions
|
@ -62,6 +62,9 @@ extern CHARSET_INFO my_charset_utf8_slovak_uca_ci;
|
||||||
extern CHARSET_INFO my_charset_utf8_spanish2_uca_ci;
|
extern CHARSET_INFO my_charset_utf8_spanish2_uca_ci;
|
||||||
extern CHARSET_INFO my_charset_utf8_roman_uca_ci;
|
extern CHARSET_INFO my_charset_utf8_roman_uca_ci;
|
||||||
extern CHARSET_INFO my_charset_utf8_persian_uca_ci;
|
extern CHARSET_INFO my_charset_utf8_persian_uca_ci;
|
||||||
|
#ifdef HAVE_CYBOZU_COLLATION
|
||||||
|
extern CHARSET_INFO my_charset_utf8_general_cs;
|
||||||
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#endif /* HAVE_UCA_COLLATIONS */
|
#endif /* HAVE_UCA_COLLATIONS */
|
||||||
|
@ -156,6 +159,9 @@ my_bool init_compiled_charsets(myf flags __attribute__((unused)))
|
||||||
#ifdef HAVE_CHARSET_utf8
|
#ifdef HAVE_CHARSET_utf8
|
||||||
add_compiled_collation(&my_charset_utf8_general_ci);
|
add_compiled_collation(&my_charset_utf8_general_ci);
|
||||||
add_compiled_collation(&my_charset_utf8_bin);
|
add_compiled_collation(&my_charset_utf8_bin);
|
||||||
|
#ifdef HAVE_CYBOZU_COLLATION
|
||||||
|
add_compiled_collation(&my_charset_utf8_general_cs);
|
||||||
|
#endif
|
||||||
#ifdef HAVE_UCA_COLLATIONS
|
#ifdef HAVE_UCA_COLLATIONS
|
||||||
add_compiled_collation(&my_charset_utf8_general_uca_ci);
|
add_compiled_collation(&my_charset_utf8_general_uca_ci);
|
||||||
add_compiled_collation(&my_charset_utf8_icelandic_uca_ci);
|
add_compiled_collation(&my_charset_utf8_icelandic_uca_ci);
|
||||||
|
|
|
@ -4075,8 +4075,6 @@ errorconn:
|
||||||
NullS);
|
NullS);
|
||||||
sql_perror(buff);
|
sql_perror(buff);
|
||||||
}
|
}
|
||||||
my_security_attr_free(sa_event);
|
|
||||||
my_security_attr_free(sa_mapping);
|
|
||||||
if (handle_client_file_map)
|
if (handle_client_file_map)
|
||||||
CloseHandle(handle_client_file_map);
|
CloseHandle(handle_client_file_map);
|
||||||
if (handle_client_map)
|
if (handle_client_map)
|
||||||
|
|
|
@ -6300,11 +6300,7 @@ uint my_well_formed_len_big5(CHARSET_INFO *cs __attribute__((unused)),
|
||||||
const char *emb= e - 1; /* Last possible end of an MB character */
|
const char *emb= e - 1; /* Last possible end of an MB character */
|
||||||
while (pos && b < e)
|
while (pos && b < e)
|
||||||
{
|
{
|
||||||
/*
|
if ((uchar) b[0] < 128)
|
||||||
Cast to int8 for extra safety. "char" can be unsigned
|
|
||||||
by default on some platforms.
|
|
||||||
*/
|
|
||||||
if (((int8)b[0]) >= 0)
|
|
||||||
{
|
{
|
||||||
/* Single byte ascii character */
|
/* Single byte ascii character */
|
||||||
b++;
|
b++;
|
||||||
|
|
|
@ -4591,12 +4591,7 @@ uint my_well_formed_len_sjis(CHARSET_INFO *cs __attribute__((unused)),
|
||||||
const char *b0= b;
|
const char *b0= b;
|
||||||
while (pos && b < e)
|
while (pos && b < e)
|
||||||
{
|
{
|
||||||
/*
|
if ((uchar) b[0] < 128)
|
||||||
Cast to int8 for extra safety.
|
|
||||||
"char" can be unsigned by default
|
|
||||||
on some platforms.
|
|
||||||
*/
|
|
||||||
if (((int8)b[0]) >= 0)
|
|
||||||
{
|
{
|
||||||
/* Single byte ascii character */
|
/* Single byte ascii character */
|
||||||
b++;
|
b++;
|
||||||
|
|
|
@ -2148,12 +2148,7 @@ int my_strcasecmp_utf8(CHARSET_INFO *cs, const char *s, const char *t)
|
||||||
{
|
{
|
||||||
my_wc_t s_wc,t_wc;
|
my_wc_t s_wc,t_wc;
|
||||||
|
|
||||||
/*
|
if ((uchar) s[0] < 128)
|
||||||
Cast to int8 for extra safety.
|
|
||||||
char can be unsigned by default
|
|
||||||
on some platforms.
|
|
||||||
*/
|
|
||||||
if (((int8)s[0]) >= 0)
|
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
s[0] is between 0 and 127.
|
s[0] is between 0 and 127.
|
||||||
|
@ -2200,7 +2195,7 @@ int my_strcasecmp_utf8(CHARSET_INFO *cs, const char *s, const char *t)
|
||||||
|
|
||||||
/* Do the same for the second string */
|
/* Do the same for the second string */
|
||||||
|
|
||||||
if (((int8)t[0]) >= 0)
|
if ((uchar) t[0] < 128)
|
||||||
{
|
{
|
||||||
/* Convert single byte character into weight */
|
/* Convert single byte character into weight */
|
||||||
t_wc= plane00[(uchar) t[0]].tolower;
|
t_wc= plane00[(uchar) t[0]].tolower;
|
||||||
|
@ -2410,6 +2405,172 @@ CHARSET_INFO my_charset_utf8_bin=
|
||||||
&my_collation_mb_bin_handler
|
&my_collation_mb_bin_handler
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#ifdef HAVE_CYBOZU_COLLATION
|
||||||
|
|
||||||
|
/*
|
||||||
|
* These functions bacically do the same as their original, except
|
||||||
|
* that they return 0 only when two comparing unicode strings are
|
||||||
|
* strictly the same in case-sensitive way. See "save_diff" local
|
||||||
|
* variable to what they actually do.
|
||||||
|
*/
|
||||||
|
|
||||||
|
static int my_strnncoll_utf8_cs(CHARSET_INFO *cs,
|
||||||
|
const uchar *s, uint slen,
|
||||||
|
const uchar *t, uint tlen,
|
||||||
|
my_bool t_is_prefix)
|
||||||
|
{
|
||||||
|
int s_res,t_res;
|
||||||
|
my_wc_t s_wc,t_wc;
|
||||||
|
const uchar *se=s+slen;
|
||||||
|
const uchar *te=t+tlen;
|
||||||
|
int save_diff = 0;
|
||||||
|
int diff;
|
||||||
|
|
||||||
|
while ( s < se && t < te )
|
||||||
|
{
|
||||||
|
int plane;
|
||||||
|
s_res=my_utf8_uni(cs,&s_wc, s, se);
|
||||||
|
t_res=my_utf8_uni(cs,&t_wc, t, te);
|
||||||
|
|
||||||
|
if ( s_res <= 0 || t_res <= 0 )
|
||||||
|
|
||||||
|
{
|
||||||
|
/* Incorrect string, compare by char value */
|
||||||
|
return ((int)s[0]-(int)t[0]);
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( save_diff == 0 )
|
||||||
|
{
|
||||||
|
save_diff = ((int)s_wc) - ((int)t_wc);
|
||||||
|
}
|
||||||
|
plane=(s_wc>>8) & 0xFF;
|
||||||
|
s_wc = uni_plane[plane] ? uni_plane[plane][s_wc & 0xFF].sort : s_wc;
|
||||||
|
plane=(t_wc>>8) & 0xFF;
|
||||||
|
t_wc = uni_plane[plane] ? uni_plane[plane][t_wc & 0xFF].sort : t_wc;
|
||||||
|
if ( s_wc != t_wc )
|
||||||
|
{
|
||||||
|
return ((int) s_wc) - ((int) t_wc);
|
||||||
|
}
|
||||||
|
|
||||||
|
s+=s_res;
|
||||||
|
t+=t_res;
|
||||||
|
}
|
||||||
|
diff = ( (se-s) - (te-t) );
|
||||||
|
return t_is_prefix ? t-te : ((diff == 0) ? save_diff : diff);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int my_strnncollsp_utf8_cs(CHARSET_INFO *cs,
|
||||||
|
const uchar *s, uint slen,
|
||||||
|
const uchar *t, uint tlen)
|
||||||
|
{
|
||||||
|
int s_res,t_res;
|
||||||
|
my_wc_t s_wc,t_wc;
|
||||||
|
const uchar *se= s+slen;
|
||||||
|
const uchar *te= t+tlen;
|
||||||
|
int save_diff = 0;
|
||||||
|
|
||||||
|
while ( s < se && t < te )
|
||||||
|
{
|
||||||
|
int plane;
|
||||||
|
s_res=my_utf8_uni(cs,&s_wc, s, se);
|
||||||
|
t_res=my_utf8_uni(cs,&t_wc, t, te);
|
||||||
|
|
||||||
|
if ( s_res <= 0 || t_res <= 0 )
|
||||||
|
{
|
||||||
|
/* Incorrect string, compare by char value */
|
||||||
|
return ((int)s[0]-(int)t[0]);
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( save_diff == 0 )
|
||||||
|
{
|
||||||
|
save_diff = ((int)s_wc) - ((int)t_wc);
|
||||||
|
}
|
||||||
|
plane=(s_wc>>8) & 0xFF;
|
||||||
|
s_wc = uni_plane[plane] ? uni_plane[plane][s_wc & 0xFF].sort : s_wc;
|
||||||
|
plane=(t_wc>>8) & 0xFF;
|
||||||
|
t_wc = uni_plane[plane] ? uni_plane[plane][t_wc & 0xFF].sort : t_wc;
|
||||||
|
if ( s_wc != t_wc )
|
||||||
|
{
|
||||||
|
return ((int) s_wc) - ((int) t_wc);
|
||||||
|
}
|
||||||
|
|
||||||
|
s+=s_res;
|
||||||
|
t+=t_res;
|
||||||
|
}
|
||||||
|
|
||||||
|
slen= se-s;
|
||||||
|
tlen= te-t;
|
||||||
|
|
||||||
|
if (slen != tlen)
|
||||||
|
{
|
||||||
|
int swap= 0;
|
||||||
|
if (slen < tlen)
|
||||||
|
{
|
||||||
|
slen= tlen;
|
||||||
|
s= t;
|
||||||
|
se= te;
|
||||||
|
swap= -1;
|
||||||
|
}
|
||||||
|
/*
|
||||||
|
This following loop uses the fact that in UTF-8
|
||||||
|
all multibyte characters are greater than space,
|
||||||
|
and all multibyte head characters are greater than
|
||||||
|
space. It means if we meet a character greater
|
||||||
|
than space, it always means that the longer string
|
||||||
|
is greater. So we can reuse the same loop from the
|
||||||
|
8bit version, without having to process full multibute
|
||||||
|
sequences.
|
||||||
|
*/
|
||||||
|
for ( ; s < se; s++)
|
||||||
|
{
|
||||||
|
if (*s != ' ')
|
||||||
|
return ((int)*s - (int) ' ') ^ swap;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return save_diff;
|
||||||
|
}
|
||||||
|
|
||||||
|
static MY_COLLATION_HANDLER my_collation_cs_handler =
|
||||||
|
{
|
||||||
|
NULL, /* init */
|
||||||
|
my_strnncoll_utf8_cs,
|
||||||
|
my_strnncollsp_utf8_cs,
|
||||||
|
my_strnxfrm_utf8,
|
||||||
|
my_like_range_simple,
|
||||||
|
my_wildcmp_mb,
|
||||||
|
my_strcasecmp_utf8,
|
||||||
|
my_instr_mb,
|
||||||
|
my_hash_sort_utf8
|
||||||
|
};
|
||||||
|
|
||||||
|
CHARSET_INFO my_charset_utf8_general_cs=
|
||||||
|
{
|
||||||
|
254,0,0, /* number */
|
||||||
|
MY_CS_COMPILED|MY_CS_UNICODE, /* state */
|
||||||
|
"utf8", /* cs name */
|
||||||
|
"utf8_general_cs", /* name */
|
||||||
|
"", /* comment */
|
||||||
|
NULL, /* tailoring */
|
||||||
|
ctype_utf8, /* ctype */
|
||||||
|
to_lower_utf8, /* to_lower */
|
||||||
|
to_upper_utf8, /* to_upper */
|
||||||
|
to_upper_utf8, /* sort_order */
|
||||||
|
NULL, /* contractions */
|
||||||
|
NULL, /* sort_order_big*/
|
||||||
|
NULL, /* tab_to_uni */
|
||||||
|
NULL, /* tab_from_uni */
|
||||||
|
NULL, /* state_map */
|
||||||
|
NULL, /* ident_map */
|
||||||
|
1, /* strxfrm_multiply */
|
||||||
|
1, /* mbminlen */
|
||||||
|
3, /* mbmaxlen */
|
||||||
|
0, /* min_sort_char */
|
||||||
|
255, /* max_sort_char */
|
||||||
|
&my_charset_utf8_handler,
|
||||||
|
&my_collation_cs_handler
|
||||||
|
};
|
||||||
|
#endif /* Cybozu Hack */
|
||||||
|
|
||||||
|
|
||||||
#ifdef MY_TEST_UTF8
|
#ifdef MY_TEST_UTF8
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
|
|
Loading…
Reference in a new issue