Bug#28875 Conversion between ASCII and LATIN1 charsets does not function

(Regression, caused by a patch for the bug 22646). Problem: when result type of date_format() was changed from binary string to character string, mixing date_format() with a ascii column in CONCAT() stopped to work. Fix: - adding "repertoire" flag into DTCollation class, to mark items which can return only pure ASCII strings. - allow character set conversion from pure ASCII to other character sets.
2026-04-21 07:45:32 +02:00 · 2007-08-03 15:25:23 +05:00 · 2007-08-03 15:25:23 +05:00 · 4eebfd09c2
commit 4eebfd09c2
parent a8b2d9c951
20 changed files with 457 additions and 57 deletions
--- a/strings/conf_to_src.c
+++ b/strings/conf_to_src.c
@ -179,14 +179,16 @@ is_case_sensitive(CHARSET_INFO *cs)
         cs->sort_order['a'] < cs->sort_order['B']) ? 1 : 0;
 }

+
 void dispcset(FILE *f,CHARSET_INFO *cs)
 {
  fprintf(f,"{\n");
  fprintf(f,"  %d,%d,%d,\n",cs->number,0,0);
-  fprintf(f,"  MY_CS_COMPILED%s%s%s,\n",
-          cs->state & MY_CS_BINSORT ? "|MY_CS_BINSORT" : "",
-          cs->state & MY_CS_PRIMARY ? "|MY_CS_PRIMARY" : "",
-          is_case_sensitive(cs)     ? "|MY_CS_CSSORT"  : "");
+  fprintf(f,"  MY_CS_COMPILED%s%s%s%s,\n",
+          cs->state & MY_CS_BINSORT         ? "|MY_CS_BINSORT"   : "",
+          cs->state & MY_CS_PRIMARY         ? "|MY_CS_PRIMARY"   : "",
+          is_case_sensitive(cs)             ? "|MY_CS_CSSORT"    : "",
+          my_charset_is_8bit_pure_ascii(cs) ? "|MY_CS_PUREASCII" : "");
  
  if (cs->name)
  {
@ -243,6 +245,28 @@ void dispcset(FILE *f,CHARSET_INFO *cs)
 }


+static void
+fprint_copyright(FILE *file)
+{
+  fprintf(file,
+"/* Copyright (C) 2000-2007 MySQL AB\n"
+"\n"
+"   This program is free software; you can redistribute it and/or modify\n"
+"   it under the terms of the GNU General Public License as published by\n"
+"   the Free Software Foundation; version 2 of the License.\n"
+"\n"
+"   This program is distributed in the hope that it will be useful,\n"
+"   but WITHOUT ANY WARRANTY; without even the implied warranty of\n"
+"   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n"
+"   GNU General Public License for more details.\n"
+"\n"
+"   You should have received a copy of the GNU General Public License\n"
+"   along with this program; if not, write to the Free Software\n"
+"   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA */\n"
+"\n");
+}
+
+
 int
 main(int argc, char **argv  __attribute__((unused)))
 {
@ -283,6 +307,7 @@ main(int argc, char **argv  __attribute__((unused)))
          "directory:\n");
  fprintf(f, "    ./conf_to_src ../sql/share/charsets/ > FILE\n");
  fprintf(f, "*/\n\n");
+  fprint_copyright(f);
  fprintf(f,"#include <my_global.h>\n");
  fprintf(f,"#include <m_ctype.h>\n\n");
  
--- a/strings/ctype-extra.c
+++ b/strings/ctype-extra.c
@ -5,7 +5,8 @@
  To re-generate, run the following in the strings/ directory:
    ./conf_to_src ../sql/share/charsets/ > FILE
 */
-/* Copyright (C) 2000-2003 MySQL AB
+
+/* Copyright (C) 2000-2007 MySQL AB

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
@ -6721,7 +6722,7 @@ CHARSET_INFO compiled_charsets[] = {
 #ifdef HAVE_CHARSET_ascii
 {
  11,0,0,
-  MY_CS_COMPILED|MY_CS_PRIMARY,
+  MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_PUREASCII,
  "ascii",                     /* cset name     */
  "ascii_general_ci",                     /* coll name     */
  "",                       /* comment       */
@ -7810,7 +7811,7 @@ CHARSET_INFO compiled_charsets[] = {
 #ifdef HAVE_CHARSET_ascii
 {
  65,0,0,
-  MY_CS_COMPILED|MY_CS_BINSORT,
+  MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_PUREASCII,
  "ascii",                     /* cset name     */
  "ascii_bin",                     /* coll name     */
  "",                       /* comment       */
--- a/strings/ctype.c
+++ b/strings/ctype.c
@ -306,3 +306,89 @@ my_bool my_parse_charset_xml(const char *buf, uint len,
  my_xml_parser_free(&p);
  return rc;
 }
+
+
+/*
+  Check repertoire: detect pure ascii strings
+*/
+uint
+my_string_repertoire(CHARSET_INFO *cs, const char *str, ulong length)
+{
+  const char *strend= str + length;
+  if (cs->mbminlen == 1)
+  {
+    for ( ; str < strend; str++)
+    {
+      if (((uchar) *str) > 0x7F)
+        return MY_REPERTOIRE_UNICODE30;
+    }
+  }
+  else
+  {
+    my_wc_t wc;
+    int chlen;
+    for (; (chlen= cs->cset->mb_wc(cs, &wc, str, strend)) > 0; str+= chlen)
+    {
+      if (wc > 0x7F)
+        return MY_REPERTOIRE_UNICODE30;
+    }
+  }
+  return MY_REPERTOIRE_ASCII;
+}
+
+
+/*
+  Detect whether a character set is ASCII compatible.
+
+  Returns TRUE for:
+  
+  - all 8bit character sets whose Unicode mapping of 0x7B is '{'
+    (ignores swe7 which maps 0x7B to "LATIN LETTER A WITH DIAERESIS")
+  
+  - all multi-byte character sets having mbminlen == 1
+    (ignores ucs2 whose mbminlen is 2)
+  
+  TODO:
+  
+  When merging to 5.2, this function should be changed
+  to check a new flag MY_CS_NONASCII, 
+  
+     return (cs->flag & MY_CS_NONASCII) ? 0 : 1;
+  
+  This flag was previously added into 5.2 under terms
+  of WL#3759 "Optimize identifier conversion in client-server protocol"
+  especially to mark character sets not compatible with ASCII.
+  
+  We won't backport this flag to 5.0 or 5.1.
+  This function is Ok for 5.0 and 5.1, because we're not going
+  to introduce new tricky character sets between 5.0 and 5.2.
+*/
+my_bool
+my_charset_is_ascii_based(CHARSET_INFO *cs)
+{
+  return 
+    (cs->mbmaxlen == 1 && cs->tab_to_uni && cs->tab_to_uni['{'] == '{') ||
+    (cs->mbminlen == 1 && cs->mbmaxlen > 1);
+}
+
+
+/*
+  Detect if a character set is 8bit,
+  and it is pure ascii, i.e. doesn't have
+  characters outside U+0000..U+007F
+  This functions is shared between "conf_to_src"
+  and dynamic charsets loader in "mysqld".
+*/
+my_bool
+my_charset_is_8bit_pure_ascii(CHARSET_INFO *cs)
+{
+  size_t code;
+  if (!cs->tab_to_uni)
+    return 0;
+  for (code= 0; code < 256; code++)
+  {
+    if (cs->tab_to_uni[code] > 0x7F)
+      return 0;
+  }
+  return 1;
+}