Bug#28875 Conversion between ASCII and LATIN1 charsets does not function

(Regression, caused by a patch for the bug 22646).
Problem: when result type of date_format() was changed from
binary string to character string, mixing date_format()
with a ascii column in CONCAT() stopped to work.
Fix:
- adding "repertoire" flag into DTCollation class,
to mark items which can return only pure ASCII strings.
- allow character set conversion from pure ASCII to other character sets.
This commit is contained in:
bar@mysql.com/bar.myoffice.izhnet.ru 2007-08-03 15:25:23 +05:00
commit 4eebfd09c2
20 changed files with 457 additions and 57 deletions

View file

@ -179,14 +179,16 @@ is_case_sensitive(CHARSET_INFO *cs)
cs->sort_order['a'] < cs->sort_order['B']) ? 1 : 0;
}
void dispcset(FILE *f,CHARSET_INFO *cs)
{
fprintf(f,"{\n");
fprintf(f," %d,%d,%d,\n",cs->number,0,0);
fprintf(f," MY_CS_COMPILED%s%s%s,\n",
cs->state & MY_CS_BINSORT ? "|MY_CS_BINSORT" : "",
cs->state & MY_CS_PRIMARY ? "|MY_CS_PRIMARY" : "",
is_case_sensitive(cs) ? "|MY_CS_CSSORT" : "");
fprintf(f," MY_CS_COMPILED%s%s%s%s,\n",
cs->state & MY_CS_BINSORT ? "|MY_CS_BINSORT" : "",
cs->state & MY_CS_PRIMARY ? "|MY_CS_PRIMARY" : "",
is_case_sensitive(cs) ? "|MY_CS_CSSORT" : "",
my_charset_is_8bit_pure_ascii(cs) ? "|MY_CS_PUREASCII" : "");
if (cs->name)
{
@ -243,6 +245,28 @@ void dispcset(FILE *f,CHARSET_INFO *cs)
}
static void
fprint_copyright(FILE *file)
{
fprintf(file,
"/* Copyright (C) 2000-2007 MySQL AB\n"
"\n"
" This program is free software; you can redistribute it and/or modify\n"
" it under the terms of the GNU General Public License as published by\n"
" the Free Software Foundation; version 2 of the License.\n"
"\n"
" This program is distributed in the hope that it will be useful,\n"
" but WITHOUT ANY WARRANTY; without even the implied warranty of\n"
" MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n"
" GNU General Public License for more details.\n"
"\n"
" You should have received a copy of the GNU General Public License\n"
" along with this program; if not, write to the Free Software\n"
" Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */\n"
"\n");
}
int
main(int argc, char **argv __attribute__((unused)))
{
@ -283,6 +307,7 @@ main(int argc, char **argv __attribute__((unused)))
"directory:\n");
fprintf(f, " ./conf_to_src ../sql/share/charsets/ > FILE\n");
fprintf(f, "*/\n\n");
fprint_copyright(f);
fprintf(f,"#include <my_global.h>\n");
fprintf(f,"#include <m_ctype.h>\n\n");

View file

@ -5,7 +5,8 @@
To re-generate, run the following in the strings/ directory:
./conf_to_src ../sql/share/charsets/ > FILE
*/
/* Copyright (C) 2000-2003 MySQL AB
/* Copyright (C) 2000-2007 MySQL AB
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -6721,7 +6722,7 @@ CHARSET_INFO compiled_charsets[] = {
#ifdef HAVE_CHARSET_ascii
{
11,0,0,
MY_CS_COMPILED|MY_CS_PRIMARY,
MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_PUREASCII,
"ascii", /* cset name */
"ascii_general_ci", /* coll name */
"", /* comment */
@ -7810,7 +7811,7 @@ CHARSET_INFO compiled_charsets[] = {
#ifdef HAVE_CHARSET_ascii
{
65,0,0,
MY_CS_COMPILED|MY_CS_BINSORT,
MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_PUREASCII,
"ascii", /* cset name */
"ascii_bin", /* coll name */
"", /* comment */

View file

@ -306,3 +306,89 @@ my_bool my_parse_charset_xml(const char *buf, uint len,
my_xml_parser_free(&p);
return rc;
}
/*
Check repertoire: detect pure ascii strings
*/
uint
my_string_repertoire(CHARSET_INFO *cs, const char *str, ulong length)
{
const char *strend= str + length;
if (cs->mbminlen == 1)
{
for ( ; str < strend; str++)
{
if (((uchar) *str) > 0x7F)
return MY_REPERTOIRE_UNICODE30;
}
}
else
{
my_wc_t wc;
int chlen;
for (; (chlen= cs->cset->mb_wc(cs, &wc, str, strend)) > 0; str+= chlen)
{
if (wc > 0x7F)
return MY_REPERTOIRE_UNICODE30;
}
}
return MY_REPERTOIRE_ASCII;
}
/*
Detect whether a character set is ASCII compatible.
Returns TRUE for:
- all 8bit character sets whose Unicode mapping of 0x7B is '{'
(ignores swe7 which maps 0x7B to "LATIN LETTER A WITH DIAERESIS")
- all multi-byte character sets having mbminlen == 1
(ignores ucs2 whose mbminlen is 2)
TODO:
When merging to 5.2, this function should be changed
to check a new flag MY_CS_NONASCII,
return (cs->flag & MY_CS_NONASCII) ? 0 : 1;
This flag was previously added into 5.2 under terms
of WL#3759 "Optimize identifier conversion in client-server protocol"
especially to mark character sets not compatible with ASCII.
We won't backport this flag to 5.0 or 5.1.
This function is Ok for 5.0 and 5.1, because we're not going
to introduce new tricky character sets between 5.0 and 5.2.
*/
my_bool
my_charset_is_ascii_based(CHARSET_INFO *cs)
{
return
(cs->mbmaxlen == 1 && cs->tab_to_uni && cs->tab_to_uni['{'] == '{') ||
(cs->mbminlen == 1 && cs->mbmaxlen > 1);
}
/*
Detect if a character set is 8bit,
and it is pure ascii, i.e. doesn't have
characters outside U+0000..U+007F
This functions is shared between "conf_to_src"
and dynamic charsets loader in "mysqld".
*/
my_bool
my_charset_is_8bit_pure_ascii(CHARSET_INFO *cs)
{
size_t code;
if (!cs->tab_to_uni)
return 0;
for (code= 0; code < 256; code++)
{
if (cs->tab_to_uni[code] > 0x7F)
return 0;
}
return 1;
}