mirror of
https://github.com/MariaDB/server.git
synced 2026-04-21 07:45:32 +02:00
Bug#28875 Conversion between ASCII and LATIN1 charsets does not function
(Regression, caused by a patch for the bug 22646). Problem: when result type of date_format() was changed from binary string to character string, mixing date_format() with a ascii column in CONCAT() stopped to work. Fix: - adding "repertoire" flag into DTCollation class, to mark items which can return only pure ASCII strings. - allow character set conversion from pure ASCII to other character sets.
This commit is contained in:
parent
a8b2d9c951
commit
4eebfd09c2
20 changed files with 457 additions and 57 deletions
|
|
@ -179,14 +179,16 @@ is_case_sensitive(CHARSET_INFO *cs)
|
|||
cs->sort_order['a'] < cs->sort_order['B']) ? 1 : 0;
|
||||
}
|
||||
|
||||
|
||||
void dispcset(FILE *f,CHARSET_INFO *cs)
|
||||
{
|
||||
fprintf(f,"{\n");
|
||||
fprintf(f," %d,%d,%d,\n",cs->number,0,0);
|
||||
fprintf(f," MY_CS_COMPILED%s%s%s,\n",
|
||||
cs->state & MY_CS_BINSORT ? "|MY_CS_BINSORT" : "",
|
||||
cs->state & MY_CS_PRIMARY ? "|MY_CS_PRIMARY" : "",
|
||||
is_case_sensitive(cs) ? "|MY_CS_CSSORT" : "");
|
||||
fprintf(f," MY_CS_COMPILED%s%s%s%s,\n",
|
||||
cs->state & MY_CS_BINSORT ? "|MY_CS_BINSORT" : "",
|
||||
cs->state & MY_CS_PRIMARY ? "|MY_CS_PRIMARY" : "",
|
||||
is_case_sensitive(cs) ? "|MY_CS_CSSORT" : "",
|
||||
my_charset_is_8bit_pure_ascii(cs) ? "|MY_CS_PUREASCII" : "");
|
||||
|
||||
if (cs->name)
|
||||
{
|
||||
|
|
@ -243,6 +245,28 @@ void dispcset(FILE *f,CHARSET_INFO *cs)
|
|||
}
|
||||
|
||||
|
||||
static void
|
||||
fprint_copyright(FILE *file)
|
||||
{
|
||||
fprintf(file,
|
||||
"/* Copyright (C) 2000-2007 MySQL AB\n"
|
||||
"\n"
|
||||
" This program is free software; you can redistribute it and/or modify\n"
|
||||
" it under the terms of the GNU General Public License as published by\n"
|
||||
" the Free Software Foundation; version 2 of the License.\n"
|
||||
"\n"
|
||||
" This program is distributed in the hope that it will be useful,\n"
|
||||
" but WITHOUT ANY WARRANTY; without even the implied warranty of\n"
|
||||
" MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n"
|
||||
" GNU General Public License for more details.\n"
|
||||
"\n"
|
||||
" You should have received a copy of the GNU General Public License\n"
|
||||
" along with this program; if not, write to the Free Software\n"
|
||||
" Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */\n"
|
||||
"\n");
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
main(int argc, char **argv __attribute__((unused)))
|
||||
{
|
||||
|
|
@ -283,6 +307,7 @@ main(int argc, char **argv __attribute__((unused)))
|
|||
"directory:\n");
|
||||
fprintf(f, " ./conf_to_src ../sql/share/charsets/ > FILE\n");
|
||||
fprintf(f, "*/\n\n");
|
||||
fprint_copyright(f);
|
||||
fprintf(f,"#include <my_global.h>\n");
|
||||
fprintf(f,"#include <m_ctype.h>\n\n");
|
||||
|
||||
|
|
|
|||
|
|
@ -5,7 +5,8 @@
|
|||
To re-generate, run the following in the strings/ directory:
|
||||
./conf_to_src ../sql/share/charsets/ > FILE
|
||||
*/
|
||||
/* Copyright (C) 2000-2003 MySQL AB
|
||||
|
||||
/* Copyright (C) 2000-2007 MySQL AB
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
|
@ -6721,7 +6722,7 @@ CHARSET_INFO compiled_charsets[] = {
|
|||
#ifdef HAVE_CHARSET_ascii
|
||||
{
|
||||
11,0,0,
|
||||
MY_CS_COMPILED|MY_CS_PRIMARY,
|
||||
MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_PUREASCII,
|
||||
"ascii", /* cset name */
|
||||
"ascii_general_ci", /* coll name */
|
||||
"", /* comment */
|
||||
|
|
@ -7810,7 +7811,7 @@ CHARSET_INFO compiled_charsets[] = {
|
|||
#ifdef HAVE_CHARSET_ascii
|
||||
{
|
||||
65,0,0,
|
||||
MY_CS_COMPILED|MY_CS_BINSORT,
|
||||
MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_PUREASCII,
|
||||
"ascii", /* cset name */
|
||||
"ascii_bin", /* coll name */
|
||||
"", /* comment */
|
||||
|
|
|
|||
|
|
@ -306,3 +306,89 @@ my_bool my_parse_charset_xml(const char *buf, uint len,
|
|||
my_xml_parser_free(&p);
|
||||
return rc;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
Check repertoire: detect pure ascii strings
|
||||
*/
|
||||
uint
|
||||
my_string_repertoire(CHARSET_INFO *cs, const char *str, ulong length)
|
||||
{
|
||||
const char *strend= str + length;
|
||||
if (cs->mbminlen == 1)
|
||||
{
|
||||
for ( ; str < strend; str++)
|
||||
{
|
||||
if (((uchar) *str) > 0x7F)
|
||||
return MY_REPERTOIRE_UNICODE30;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
my_wc_t wc;
|
||||
int chlen;
|
||||
for (; (chlen= cs->cset->mb_wc(cs, &wc, str, strend)) > 0; str+= chlen)
|
||||
{
|
||||
if (wc > 0x7F)
|
||||
return MY_REPERTOIRE_UNICODE30;
|
||||
}
|
||||
}
|
||||
return MY_REPERTOIRE_ASCII;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
Detect whether a character set is ASCII compatible.
|
||||
|
||||
Returns TRUE for:
|
||||
|
||||
- all 8bit character sets whose Unicode mapping of 0x7B is '{'
|
||||
(ignores swe7 which maps 0x7B to "LATIN LETTER A WITH DIAERESIS")
|
||||
|
||||
- all multi-byte character sets having mbminlen == 1
|
||||
(ignores ucs2 whose mbminlen is 2)
|
||||
|
||||
TODO:
|
||||
|
||||
When merging to 5.2, this function should be changed
|
||||
to check a new flag MY_CS_NONASCII,
|
||||
|
||||
return (cs->flag & MY_CS_NONASCII) ? 0 : 1;
|
||||
|
||||
This flag was previously added into 5.2 under terms
|
||||
of WL#3759 "Optimize identifier conversion in client-server protocol"
|
||||
especially to mark character sets not compatible with ASCII.
|
||||
|
||||
We won't backport this flag to 5.0 or 5.1.
|
||||
This function is Ok for 5.0 and 5.1, because we're not going
|
||||
to introduce new tricky character sets between 5.0 and 5.2.
|
||||
*/
|
||||
my_bool
|
||||
my_charset_is_ascii_based(CHARSET_INFO *cs)
|
||||
{
|
||||
return
|
||||
(cs->mbmaxlen == 1 && cs->tab_to_uni && cs->tab_to_uni['{'] == '{') ||
|
||||
(cs->mbminlen == 1 && cs->mbmaxlen > 1);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
Detect if a character set is 8bit,
|
||||
and it is pure ascii, i.e. doesn't have
|
||||
characters outside U+0000..U+007F
|
||||
This functions is shared between "conf_to_src"
|
||||
and dynamic charsets loader in "mysqld".
|
||||
*/
|
||||
my_bool
|
||||
my_charset_is_8bit_pure_ascii(CHARSET_INFO *cs)
|
||||
{
|
||||
size_t code;
|
||||
if (!cs->tab_to_uni)
|
||||
return 0;
|
||||
for (code= 0; code < 256; code++)
|
||||
{
|
||||
if (cs->tab_to_uni[code] > 0x7F)
|
||||
return 0;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue