Merge pippilotta.erinye.com:/shared/home/df/mysql/build/mysql-5.0

into pippilotta.erinye.com:/shared/home/df/mysql/build/mysql-5.0-build
2025-01-29 02:05:57 +01:00 · 2007-08-05 16:37:34 +02:00 · 2007-08-05 16:37:34 +02:00 · fe87e88f52
commit fe87e88f52
parent edc99e12b4 a150ca337f
20 changed files with 431 additions and 56 deletions
--- a/include/m_ctype.h
+++ b/include/m_ctype.h
@ -78,8 +78,14 @@ extern MY_UNICASE_INFO *my_unicase_turkish[256];
 #define MY_CS_READY	256    /* if a charset is initialized    */
 #define MY_CS_AVAILABLE	512    /* If either compiled-in or loaded*/
 #define MY_CS_CSSORT	1024   /* if case sensitive sort order   */	
+#define MY_CS_PUREASCII 2048   /* if a charset is pure ascii     */
 #define MY_CHARSET_UNDEFINED 0

+/* Character repertoire flags */
+#define MY_REPERTOIRE_ASCII      1 /* Pure ASCII            U+0000..U+007F */
+#define MY_REPERTOIRE_EXTENDED   2 /* Extended characters:  U+0080..U+FFFF */
+#define MY_REPERTOIRE_UNICODE30  3 /* ASCII | EXTENDED:     U+0000..U+FFFF */
+

 typedef struct my_uni_idx_st
 {
@ -436,6 +442,11 @@ my_bool my_propagate_simple(CHARSET_INFO *cs, const uchar *str, uint len);
 my_bool my_propagate_complex(CHARSET_INFO *cs, const uchar *str, uint len);


+uint my_string_repertoire(CHARSET_INFO *cs, const char *str, ulong len);
+my_bool my_charset_is_ascii_based(CHARSET_INFO *cs);
+my_bool my_charset_is_8bit_pure_ascii(CHARSET_INFO *cs);
+
+
 #define	_MY_U	01	/* Upper case */
 #define	_MY_L	02	/* Lower case */
 #define	_MY_NMR	04	/* Numeral (digit) */
--- a/mysql-test/r/ctype_ucs.result
+++ b/mysql-test/r/ctype_ucs.result
@ -896,4 +896,30 @@ select hex(convert(s1 using latin1)) from t1;
 hex(convert(s1 using latin1))
 7F
 drop table t1;
+create table t1 (a varchar(15) character set ascii not null, b int);
+insert into t1 values ('a',1);
+select concat(a,if(b<10,_ucs2 0x0061,_ucs2 0x0062)) from t1;
+concat(a,if(b<10,_ucs2 0x0061,_ucs2 0x0062))
+aa
+select concat(a,if(b>10,_ucs2 0x0061,_ucs2 0x0062)) from t1;
+concat(a,if(b>10,_ucs2 0x0061,_ucs2 0x0062))
+ab
+select * from t1 where a=if(b<10,_ucs2 0x0061,_ucs2 0x0062);
+a	b
+a	1
+select * from t1 where a=if(b>10,_ucs2 0x0061,_ucs2 0x0062);
+a	b
+select concat(a,if(b<10,_ucs2 0x00C0,_ucs2 0x0062)) from t1;
+ERROR HY000: Illegal mix of collations (ascii_general_ci,IMPLICIT) and (ucs2_general_ci,COERCIBLE) for operation 'concat'
+select concat(a,if(b>10,_ucs2 0x00C0,_ucs2 0x0062)) from t1;
+ERROR HY000: Illegal mix of collations (ascii_general_ci,IMPLICIT) and (ucs2_general_ci,COERCIBLE) for operation 'concat'
+select concat(a,if(b<10,_ucs2 0x0062,_ucs2 0x00C0)) from t1;
+ERROR HY000: Illegal mix of collations (ascii_general_ci,IMPLICIT) and (ucs2_general_ci,COERCIBLE) for operation 'concat'
+select concat(a,if(b>10,_ucs2 0x0062,_ucs2 0x00C0)) from t1;
+ERROR HY000: Illegal mix of collations (ascii_general_ci,IMPLICIT) and (ucs2_general_ci,COERCIBLE) for operation 'concat'
+select * from t1 where a=if(b<10,_ucs2 0x00C0,_ucs2 0x0062);
+ERROR HY000: Illegal mix of collations (ascii_general_ci,IMPLICIT) and (ucs2_general_ci,COERCIBLE) for operation '='
+select * from t1 where a=if(b<10,_ucs2 0x0062,_ucs2 0x00C0);
+ERROR HY000: Illegal mix of collations (ascii_general_ci,IMPLICIT) and (ucs2_general_ci,COERCIBLE) for operation '='
+drop table t1;
 End of 5.0 tests
--- a/mysql-test/r/ctype_utf8.result
+++ b/mysql-test/r/ctype_utf8.result
@ -1639,6 +1639,42 @@ coercibility(col1)	collation(col1)
 0	utf8_swedish_ci
 drop view v1, v2;
 drop table t1;
+set names utf8;
+create table t1 (a varchar(10) character set latin1, b int);
+insert into t1 values ('a',1);
+select concat(a, if(b>10, N'x', N'y')) from t1;
+concat(a, if(b>10, N'x', N'y'))
+ay
+select concat(a, if(b>10, N'æ', N'ß')) from t1;
+ERROR HY000: Illegal mix of collations (latin1_swedish_ci,IMPLICIT) and (utf8_general_ci,COERCIBLE) for operation 'concat'
+drop table t1;
+set names utf8;
+create table t1 (a varchar(10) character set latin1, b int);
+insert into t1 values ('a',1);
+select concat(a, if(b>10, _utf8'x', _utf8'y')) from t1;
+concat(a, if(b>10, _utf8'x', _utf8'y'))
+ay
+select concat(a, if(b>10, _utf8'æ', _utf8'ß')) from t1;
+ERROR HY000: Illegal mix of collations (latin1_swedish_ci,IMPLICIT) and (utf8_general_ci,COERCIBLE) for operation 'concat'
+drop table t1;
+set names utf8;
+create table t1 (a varchar(10) character set latin1, b int);
+insert into t1 values ('a',1);
+select concat(a, if(b>10, _utf8 0x78, _utf8 0x79)) from t1;
+concat(a, if(b>10, _utf8 0x78, _utf8 0x79))
+ay
+select concat(a, if(b>10, _utf8 0xC3A6, _utf8 0xC3AF)) from t1;
+ERROR HY000: Illegal mix of collations (latin1_swedish_ci,IMPLICIT) and (utf8_general_ci,COERCIBLE) for operation 'concat'
+drop table t1;
+set names utf8;
+create table t1 (a varchar(10) character set latin1, b int);
+insert into t1 values ('a',1);
+select concat(a, if(b>10, 'x' 'x', 'y' 'y')) from t1;
+concat(a, if(b>10, 'x' 'x', 'y' 'y'))
+ayy
+select concat(a, if(b>10, 'x' 'æ', 'y' 'ß')) from t1;
+ERROR HY000: Illegal mix of collations (latin1_swedish_ci,IMPLICIT) and (utf8_general_ci,COERCIBLE) for operation 'concat'
+drop table t1;
 CREATE TABLE t1 (
 colA int(11) NOT NULL,
 colB varchar(255) character set utf8 NOT NULL,
--- a/mysql-test/r/func_time.result
+++ b/mysql-test/r/func_time.result
@ -1246,3 +1246,19 @@ SELECT TIME_FORMAT(SEC_TO_TIME(a),"%H:%i:%s") FROM (SELECT 3020399 AS a UNION SE
 TIME_FORMAT(SEC_TO_TIME(a),"%H:%i:%s")
 838:59:58
 838:59:59
+set names latin1;
+create table t1 (a varchar(15) character set ascii not null);
+insert into t1 values ('070514-000000');
+select concat(a,ifnull(min(date_format(now(), '%Y-%m-%d')),' ull')) from t1;
+concat(a,ifnull(min(date_format(now(), '%Y-%m-%d')),' ull'))
+#
+set names swe7;
+select concat(a,ifnull(min(date_format(now(), '%Y-%m-%d')),' ull')) from t1;
+ERROR HY000: Illegal mix of collations (ascii_general_ci,IMPLICIT) and (swe7_swedish_ci,COERCIBLE) for operation 'concat'
+set names latin1;
+set lc_time_names=fr_FR;
+select concat(a,ifnull(min(date_format(now(), '%Y-%m-%d')),' ull')) from t1;
+ERROR HY000: Illegal mix of collations (ascii_general_ci,IMPLICIT) and (latin1_swedish_ci,COERCIBLE) for operation 'concat'
+set lc_time_names=en_US;
+drop table t1;
+End of 5.0 tests
--- a/mysql-test/t/ctype_ucs.test
+++ b/mysql-test/t/ctype_ucs.test
@ -622,4 +622,33 @@ select hex(s2) from t1;
 select hex(convert(s1 using latin1)) from t1;
 drop table t1;

+#
+# Conversion from UCS2 to ASCII is possible
+# if the UCS2 string consists of only ASCII characters
+#
+create table t1 (a varchar(15) character set ascii not null, b int);
+insert into t1 values ('a',1);
+select concat(a,if(b<10,_ucs2 0x0061,_ucs2 0x0062)) from t1;
+select concat(a,if(b>10,_ucs2 0x0061,_ucs2 0x0062)) from t1;
+select * from t1 where a=if(b<10,_ucs2 0x0061,_ucs2 0x0062);
+select * from t1 where a=if(b>10,_ucs2 0x0061,_ucs2 0x0062);
+
+#
+# Conversion from UCS2 to ASCII is not possible if 
+# the UCS2 string has non-ASCII characters
+#
+--error 1267
+select concat(a,if(b<10,_ucs2 0x00C0,_ucs2 0x0062)) from t1;
+--error 1267
+select concat(a,if(b>10,_ucs2 0x00C0,_ucs2 0x0062)) from t1;
+--error 1267
+select concat(a,if(b<10,_ucs2 0x0062,_ucs2 0x00C0)) from t1;
+--error 1267
+select concat(a,if(b>10,_ucs2 0x0062,_ucs2 0x00C0)) from t1;
+--error 1267
+select * from t1 where a=if(b<10,_ucs2 0x00C0,_ucs2 0x0062);
+--error 1267
+select * from t1 where a=if(b<10,_ucs2 0x0062,_ucs2 0x00C0);
+drop table t1;
+
 --echo End of 5.0 tests
--- a/mysql-test/t/ctype_utf8.test
+++ b/mysql-test/t/ctype_utf8.test
@ -1314,6 +1314,46 @@ select coercibility(col1), collation(col1) from v2;
 drop view v1, v2;
 drop table t1;

+#
+# Check conversion of NCHAR strings to subset (e.g. latin1).
+# Conversion is possible if string repertoire is ASCII.
+# Conversion is not possible if the string have extended characters
+#
+set names utf8;
+create table t1 (a varchar(10) character set latin1, b int);
+insert into t1 values ('a',1);
+select concat(a, if(b>10, N'x', N'y')) from t1;
+--error 1267
+select concat(a, if(b>10, N'æ', N'ß')) from t1;
+drop table t1;
+
+# Conversion tests for character set introducers
+set names utf8;
+create table t1 (a varchar(10) character set latin1, b int);
+insert into t1 values ('a',1);
+select concat(a, if(b>10, _utf8'x', _utf8'y')) from t1;
+--error 1267
+select concat(a, if(b>10, _utf8'æ', _utf8'ß')) from t1;
+drop table t1;
+
+# Conversion tests for introducer + HEX string
+set names utf8;
+create table t1 (a varchar(10) character set latin1, b int);
+insert into t1 values ('a',1);
+select concat(a, if(b>10, _utf8 0x78, _utf8 0x79)) from t1;
+--error 1267
+select concat(a, if(b>10, _utf8 0xC3A6, _utf8 0xC3AF)) from t1;
+drop table t1;
+
+# Conversion tests for "text_literal TEXT_STRING_literal" syntax structure
+set names utf8;
+create table t1 (a varchar(10) character set latin1, b int);
+insert into t1 values ('a',1);
+select concat(a, if(b>10, 'x' 'x', 'y' 'y')) from t1;
+--error 1267
+select concat(a, if(b>10, 'x' 'æ', 'y' 'ß')) from t1;
+drop table t1;
+

 #
 # Bug#19960: Inconsistent results when joining
--- a/mysql-test/t/func_time.test
+++ b/mysql-test/t/func_time.test
@ -752,3 +752,29 @@ DROP TABLE t1;
 # Check if using GROUP BY with TIME_FORMAT() produces correct results

 SELECT TIME_FORMAT(SEC_TO_TIME(a),"%H:%i:%s") FROM (SELECT 3020399 AS a UNION SELECT 3020398 ) x GROUP BY 1;
+
+#
+# Bug#28875 Conversion between ASCII and LATIN1 charsets does not function
+#
+set names latin1;
+create table t1 (a varchar(15) character set ascii not null);
+insert into t1 values ('070514-000000');
+# Conversion of date_format() result to ASCII
+# is safe with the default locale en_US
+--replace_column 1 #
+select concat(a,ifnull(min(date_format(now(), '%Y-%m-%d')),' ull')) from t1;
+# Error for swe7: it is not ASCII compatible
+set names swe7;
+--error 1267
+select concat(a,ifnull(min(date_format(now(), '%Y-%m-%d')),' ull')) from t1;
+set names latin1;
+# Conversion of date_format() result to ASCII
+# is not safe with the non-default locale fr_FR
+# because month and day names can have accented characters
+set lc_time_names=fr_FR;
+--error 1267
+select concat(a,ifnull(min(date_format(now(), '%Y-%m-%d')),' ull')) from t1;
+set lc_time_names=en_US;
+drop table t1;
+
+--echo End of 5.0 tests
--- a/mysys/charset.c
+++ b/mysys/charset.c
@ -277,6 +277,9 @@ static int add_collation(CHARSET_INFO *cs)
        if (sort_order && sort_order['A'] < sort_order['a'] &&
                          sort_order['a'] < sort_order['B'])
          all_charsets[cs->number]->state|= MY_CS_CSSORT; 
+
+        if (my_charset_is_8bit_pure_ascii(all_charsets[cs->number]))
+          all_charsets[cs->number]->state|= MY_CS_PUREASCII;
      }
    }
    else
--- a/sql/item.cc
+++ b/sql/item.cc
@ -1327,6 +1327,25 @@ void Item::split_sum_func2(THD *thd, Item **ref_pointer_array,
 }


+static bool
+left_is_superset(DTCollation *left, DTCollation *right)
+{
+  /* Allow convert to Unicode */
+  if (left->collation->state & MY_CS_UNICODE &&
+      (left->derivation < right->derivation ||
+       (left->derivation == right->derivation &&
+        !(right->collation->state & MY_CS_UNICODE))))
+    return TRUE;
+  /* Allow convert from ASCII */
+  if (right->repertoire == MY_REPERTOIRE_ASCII &&
+      (left->derivation < right->derivation ||
+       (left->derivation == right->derivation &&
+        !(left->repertoire == MY_REPERTOIRE_ASCII))))
+    return TRUE;
+  /* Disallow conversion otherwise */
+  return FALSE;
+}
+
 /*
   Aggregate two collations together taking
   into account their coercibility (aka derivation):
@ -1391,18 +1410,12 @@ bool DTCollation::aggregate(DTCollation &dt, uint flags)
       ; // Do nothing
    }
    else if ((flags & MY_COLL_ALLOW_SUPERSET_CONV) &&
-             collation->state & MY_CS_UNICODE &&
-             (derivation < dt.derivation ||
-             (derivation == dt.derivation &&
-             !(dt.collation->state & MY_CS_UNICODE))))
+             left_is_superset(this, &dt))
    {
      // Do nothing
    }
    else if ((flags & MY_COLL_ALLOW_SUPERSET_CONV) &&
-             dt.collation->state & MY_CS_UNICODE &&
-             (dt.derivation < derivation ||
-              (dt.derivation == derivation &&
-             !(collation->state & MY_CS_UNICODE))))
+             left_is_superset(&dt, this))
    {
      set(dt);
    }
@ -1421,7 +1434,7 @@ bool DTCollation::aggregate(DTCollation &dt, uint flags)
    else
    {
      // Cannot apply conversion
-      set(0, DERIVATION_NONE);
+      set(0, DERIVATION_NONE, 0);
      return 1;
    }
  }
@ -1443,8 +1456,8 @@ bool DTCollation::aggregate(DTCollation &dt, uint flags)
    {
      if (derivation == DERIVATION_EXPLICIT)
      {
-	set(0, DERIVATION_NONE);
-	return 1;
+        set(0, DERIVATION_NONE, 0);
+        return 1;
      }
      if (collation->state & MY_CS_BINSORT)
        return 0;
@ -1458,6 +1471,7 @@ bool DTCollation::aggregate(DTCollation &dt, uint flags)
      set(bin, DERIVATION_NONE);
    }
  }
+  repertoire|= dt.repertoire;
  return 0;
 }

@ -1597,12 +1611,16 @@ bool agg_item_charsets(DTCollation &coll, const char *fname,
  {
    Item* conv;
    uint32 dummy_offset;
-    if (!String::needs_conversion(0, coll.collation,
-                                  (*arg)->collation.collation,
+    if (!String::needs_conversion(0, (*arg)->collation.collation,
+                                  coll.collation,
                                  &dummy_offset))
      continue;

-    if (!(conv= (*arg)->safe_charset_converter(coll.collation)))
+    if (!(conv= (*arg)->safe_charset_converter(coll.collation)) &&
+        ((*arg)->collation.repertoire == MY_REPERTOIRE_ASCII))
+      conv= new Item_func_conv_charset(*arg, coll.collation, 1);
+
+    if (!conv)
    {
      if (nargs >=2 && nargs <= 3)
      {
--- a/sql/item.h
+++ b/sql/item.h
@ -49,29 +49,50 @@ class DTCollation {
 public:
  CHARSET_INFO     *collation;
  enum Derivation derivation;
+  uint repertoire;
  
+  void set_repertoire_from_charset(CHARSET_INFO *cs)
+  {
+    repertoire= cs->state & MY_CS_PUREASCII ?
+                MY_REPERTOIRE_ASCII : MY_REPERTOIRE_UNICODE30;
+  }
  DTCollation()
  {
    collation= &my_charset_bin;
    derivation= DERIVATION_NONE;
+    repertoire= MY_REPERTOIRE_UNICODE30;
  }
  DTCollation(CHARSET_INFO *collation_arg, Derivation derivation_arg)
  {
    collation= collation_arg;
    derivation= derivation_arg;
+    set_repertoire_from_charset(collation_arg);
  }
  void set(DTCollation &dt)
  { 
    collation= dt.collation;
    derivation= dt.derivation;
+    repertoire= dt.repertoire;
  }
  void set(CHARSET_INFO *collation_arg, Derivation derivation_arg)
  {
    collation= collation_arg;
    derivation= derivation_arg;
+    set_repertoire_from_charset(collation_arg);
+  }
+  void set(CHARSET_INFO *collation_arg,
+           Derivation derivation_arg,
+           uint repertoire_arg)
+  {
+    collation= collation_arg;
+    derivation= derivation_arg;
+    repertoire= repertoire_arg;
  }
  void set(CHARSET_INFO *collation_arg)
-  { collation= collation_arg; }
+  {
+    collation= collation_arg;
+    set_repertoire_from_charset(collation_arg);
+  }
  void set(Derivation derivation_arg)
  { derivation= derivation_arg; }
  bool aggregate(DTCollation &dt, uint flags= 0);
@ -1672,10 +1693,11 @@ class Item_string :public Item
 {
 public:
  Item_string(const char *str,uint length,
-  	      CHARSET_INFO *cs, Derivation dv= DERIVATION_COERCIBLE)
+              CHARSET_INFO *cs, Derivation dv= DERIVATION_COERCIBLE,
+              uint repertoire= MY_REPERTOIRE_UNICODE30)
  {
-    collation.set(cs, dv);
-    str_value.set_or_copy_aligned(str,length,cs);
+    str_value.set_or_copy_aligned(str, length, cs);
+    collation.set(cs, dv, repertoire);
    /*
      We have to have a different max_length than 'length' here to
      ensure that we get the right length if we do use the item
@ -1699,10 +1721,11 @@ public:
    fixed= 1;
  }
  Item_string(const char *name_par, const char *str, uint length,
-	      CHARSET_INFO *cs, Derivation dv= DERIVATION_COERCIBLE)
+              CHARSET_INFO *cs, Derivation dv= DERIVATION_COERCIBLE,
+              uint repertoire= MY_REPERTOIRE_UNICODE30)
  {
-    collation.set(cs, dv);
-    str_value.set_or_copy_aligned(str,length,cs);
+    str_value.set_or_copy_aligned(str, length, cs);
+    collation.set(cs, dv, repertoire);
    max_length= str_value.numchars()*cs->mbmaxlen;
    set_name(name_par, 0, cs);
    decimals=NOT_FIXED_DEC;
@ -1718,6 +1741,12 @@ public:
    str_value.copy(str_arg, length_arg, collation.collation);
    max_length= str_value.numchars() * collation.collation->mbmaxlen;
  }
+  void set_repertoire_from_value()
+  {
+    collation.repertoire= my_string_repertoire(str_value.charset(),
+                                               str_value.ptr(),
+                                               str_value.length());
+  }
  enum Type type() const { return STRING_ITEM; }
  double val_real();
  longlong val_int();
--- a/sql/item_func.cc
+++ b/sql/item_func.cc
@ -3767,7 +3767,7 @@ static user_var_entry *get_variable(HASH *hash, LEX_STRING &name,
    entry->value=0;
    entry->length=0;
    entry->update_query_id=0;
-    entry->collation.set(NULL, DERIVATION_IMPLICIT);
+    entry->collation.set(NULL, DERIVATION_IMPLICIT, 0);
    entry->unsigned_flag= 0;
    /*
      If we are here, we were called from a SET or a query which sets a
--- a/sql/item_strfunc.cc
+++ b/sql/item_strfunc.cc
@ -2673,7 +2673,8 @@ void Item_func_set_collation::fix_length_and_dec()
             colname, args[0]->collation.collation->csname);
    return;
  }
-  collation.set(set_collation, DERIVATION_EXPLICIT);
+  collation.set(set_collation, DERIVATION_EXPLICIT,
+                args[0]->collation.repertoire);
  max_length= args[0]->max_length;
 }

--- a/sql/item_timefunc.cc
+++ b/sql/item_timefunc.cc
@ -1718,7 +1718,11 @@ void Item_func_date_format::fix_length_and_dec()
  Item *arg1= args[1]->this_item();

  decimals=0;
-  collation.set(thd->variables.collation_connection);
+  CHARSET_INFO *cs= thd->variables.collation_connection;
+  uint32 repertoire= arg1->collation.repertoire;
+  if (!thd->variables.lc_time_names->is_ascii)
+    repertoire|= MY_REPERTOIRE_EXTENDED;
+  collation.set(cs, arg1->collation.derivation, repertoire);
  if (arg1->type() == STRING_ITEM)
  {						// Optimize the normal case
    fixed_length=1;
--- a/sql/sql_lex.cc
+++ b/sql/sql_lex.cc
@ -311,10 +311,12 @@ static char *get_text(Lex_input_stream *lip)
  uint found_escape=0;
  CHARSET_INFO *cs= lip->m_thd->charset();

+  lip->tok_bitmap= 0;
  sep= yyGetLast();			// String should end with this
  while (lip->ptr != lip->end_of_query)
  {
-    c = yyGet();
+    c= yyGet();
+    lip->tok_bitmap|= c;
 #ifdef USE_MB
    {
      int l;
@ -605,6 +607,7 @@ int MYSQLlex(void *arg, void *yythd)
 	break;
      }
      yylval->lex_str.length= lip->yytoklen;
+      lex->text_string_is_7bit= (lip->tok_bitmap & 0x80) ? 0 : 1;
      return(NCHAR_STRING);

    case MY_LEX_IDENT_OR_HEX:
@ -926,6 +929,7 @@ int MYSQLlex(void *arg, void *yythd)
 	break;
      }
      yylval->lex_str.length=lip->yytoklen;
+      lex->text_string_is_7bit= (lip->tok_bitmap & 0x80) ? 0 : 1;
      return(TEXT_STRING);

    case MY_LEX_COMMENT:			//  Comment
--- a/sql/sql_lex.h
+++ b/sql/sql_lex.h
@ -957,6 +957,9 @@ public:

  /** Position of ';' in the stream, to delimit multiple queries. */
  const char* found_semicolon;
+  
+  /** Token character bitmaps, to detect 7bit strings. */
+  uchar tok_bitmap;

  /** SQL_MODE = IGNORE_SPACE. */
  bool ignore_space;
@ -994,6 +997,7 @@ typedef struct st_lex : public Query_tables_list
  gptr yacc_yyss,yacc_yyvs;
  THD *thd;
  CHARSET_INFO *charset, *underscore_charset;
+  bool text_string_is_7bit;
  /* store original leaf_tables for INSERT SELECT and PS/SP */
  TABLE_LIST *leaf_tables_insert;
  /* Position (first character index) of SELECT of CREATE VIEW statement */
--- a/sql/sql_string.cc
+++ b/sql/sql_string.cc
@ -263,6 +263,8 @@ bool String::needs_conversion(uint32 arg_length,
      (to_cs == &my_charset_bin) || 
      (to_cs == from_cs) ||
      my_charset_same(from_cs, to_cs) ||
+      (my_charset_is_ascii_based(to_cs) &&
+       my_charset_is_8bit_pure_ascii(from_cs)) ||
      ((from_cs == &my_charset_bin) &&
       (!(*offset=(arg_length % to_cs->mbminlen)))))
    return FALSE;
--- a/sql/sql_yacc.yy
+++ b/sql/sql_yacc.yy
@ -7523,18 +7523,54 @@ opt_load_data_set_spec:
 /* Common definitions */

 text_literal:
-	TEXT_STRING_literal
-	{
-	  THD *thd= YYTHD;
-	  $$ = new Item_string($1.str,$1.length,thd->variables.collation_connection);
-	}
-	| NCHAR_STRING
-	{ $$=  new Item_string($1.str,$1.length,national_charset_info); }
-	| UNDERSCORE_CHARSET TEXT_STRING
-	  { $$ = new Item_string($2.str,$2.length,Lex->underscore_charset); }
-	| text_literal TEXT_STRING_literal
-	  { ((Item_string*) $1)->append($2.str,$2.length); }
-	;
+        TEXT_STRING
+        {
+          LEX_STRING tmp;
+          THD *thd= YYTHD;
+          CHARSET_INFO *cs_con= thd->variables.collation_connection;
+          CHARSET_INFO *cs_cli= thd->variables.character_set_client;
+          uint repertoire= thd->lex->text_string_is_7bit &&
+                             my_charset_is_ascii_based(cs_cli) ?
+                           MY_REPERTOIRE_ASCII : MY_REPERTOIRE_UNICODE30;
+          if (thd->charset_is_collation_connection ||
+              (repertoire == MY_REPERTOIRE_ASCII &&
+               my_charset_is_ascii_based(cs_con)))
+            tmp= $1;
+          else
+            thd->convert_string(&tmp, cs_con, $1.str, $1.length, cs_cli);
+          $$= new Item_string(tmp.str, tmp.length, cs_con,
+                              DERIVATION_COERCIBLE, repertoire);
+        }
+        | NCHAR_STRING
+        {
+          uint repertoire= Lex->text_string_is_7bit ?
+                           MY_REPERTOIRE_ASCII : MY_REPERTOIRE_UNICODE30;
+          DBUG_ASSERT(my_charset_is_ascii_based(national_charset_info));
+          $$= new Item_string($1.str, $1.length, national_charset_info,
+                              DERIVATION_COERCIBLE, repertoire);
+        }
+        | UNDERSCORE_CHARSET TEXT_STRING
+          {
+            $$= new Item_string($2.str, $2.length, Lex->underscore_charset);
+            ((Item_string*) $$)->set_repertoire_from_value();
+          }
+        | text_literal TEXT_STRING_literal
+          {
+            Item_string* item= (Item_string*) $1;
+            item->append($2.str, $2.length);
+            if (!(item->collation.repertoire & MY_REPERTOIRE_EXTENDED))
+            {
+              /*
+                 If the string has been pure ASCII so far,
+                 check the new part.
+              */
+              CHARSET_INFO *cs= YYTHD->variables.collation_connection;
+              item->collation.repertoire|= my_string_repertoire(cs,
+                                                                $2.str,
+                                                                $2.length);
+            }
+          }
+        ;

 text_string:
 	TEXT_STRING_literal
@ -7606,20 +7642,22 @@ literal:
 	| TRUE_SYM	{ $$= new Item_int((char*) "TRUE",1,1); }
 	| HEX_NUM	{ $$ =	new Item_hex_string($1.str, $1.length);}
 	| BIN_NUM	{ $$= new Item_bin_string($1.str, $1.length); }
-	| UNDERSCORE_CHARSET HEX_NUM
-	  {
-	    Item *tmp= new Item_hex_string($2.str, $2.length);
-	    /*
-	      it is OK only emulate fix_fieds, because we need only
+        | UNDERSCORE_CHARSET HEX_NUM
+          {
+            Item *tmp= new Item_hex_string($2.str, $2.length);
+            /*
+              it is OK only emulate fix_fieds, because we need only
              value of constant
-	    */
-	    String *str= tmp ?
-	      tmp->quick_fix_field(), tmp->val_str((String*) 0) :
-	      (String*) 0;
-	    $$= new Item_string(str ? str->ptr() : "",
-				str ? str->length() : 0,
-				Lex->underscore_charset);
-	  }
+            */
+            String *str= tmp ?
+              tmp->quick_fix_field(), tmp->val_str((String*) 0) :
+              (String*) 0;
+            $$= new Item_string(str ? str->ptr() : "",
+                                str ? str->length() : 0,
+                                Lex->underscore_charset);
+            if ($$)
+              ((Item_string *) $$)->set_repertoire_from_value();
+          }
 	| UNDERSCORE_CHARSET BIN_NUM
          {
 	    Item *tmp= new Item_bin_string($2.str, $2.length);
--- a/strings/conf_to_src.c
+++ b/strings/conf_to_src.c
@ -179,14 +179,16 @@ is_case_sensitive(CHARSET_INFO *cs)
         cs->sort_order['a'] < cs->sort_order['B']) ? 1 : 0;
 }

+
 void dispcset(FILE *f,CHARSET_INFO *cs)
 {
  fprintf(f,"{\n");
  fprintf(f,"  %d,%d,%d,\n",cs->number,0,0);
-  fprintf(f,"  MY_CS_COMPILED%s%s%s,\n",
-          cs->state & MY_CS_BINSORT ? "|MY_CS_BINSORT" : "",
-          cs->state & MY_CS_PRIMARY ? "|MY_CS_PRIMARY" : "",
-          is_case_sensitive(cs)     ? "|MY_CS_CSSORT"  : "");
+  fprintf(f,"  MY_CS_COMPILED%s%s%s%s,\n",
+          cs->state & MY_CS_BINSORT         ? "|MY_CS_BINSORT"   : "",
+          cs->state & MY_CS_PRIMARY         ? "|MY_CS_PRIMARY"   : "",
+          is_case_sensitive(cs)             ? "|MY_CS_CSSORT"    : "",
+          my_charset_is_8bit_pure_ascii(cs) ? "|MY_CS_PUREASCII" : "");
  
  if (cs->name)
  {
--- a/strings/ctype-extra.c
+++ b/strings/ctype-extra.c
@ -6722,7 +6722,7 @@ CHARSET_INFO compiled_charsets[] = {
 #ifdef HAVE_CHARSET_ascii
 {
  11,0,0,
-  MY_CS_COMPILED|MY_CS_PRIMARY,
+  MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_PUREASCII,
  "ascii",                     /* cset name     */
  "ascii_general_ci",                     /* coll name     */
  "",                       /* comment       */
@ -7811,7 +7811,7 @@ CHARSET_INFO compiled_charsets[] = {
 #ifdef HAVE_CHARSET_ascii
 {
  65,0,0,
-  MY_CS_COMPILED|MY_CS_BINSORT,
+  MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_PUREASCII,
  "ascii",                     /* cset name     */
  "ascii_bin",                     /* coll name     */
  "",                       /* comment       */
--- a/strings/ctype.c
+++ b/strings/ctype.c
@ -306,3 +306,89 @@ my_bool my_parse_charset_xml(const char *buf, uint len,
  my_xml_parser_free(&p);
  return rc;
 }
+
+
+/*
+  Check repertoire: detect pure ascii strings
+*/
+uint
+my_string_repertoire(CHARSET_INFO *cs, const char *str, ulong length)
+{
+  const char *strend= str + length;
+  if (cs->mbminlen == 1)
+  {
+    for ( ; str < strend; str++)
+    {
+      if (((uchar) *str) > 0x7F)
+        return MY_REPERTOIRE_UNICODE30;
+    }
+  }
+  else
+  {
+    my_wc_t wc;
+    int chlen;
+    for (; (chlen= cs->cset->mb_wc(cs, &wc, str, strend)) > 0; str+= chlen)
+    {
+      if (wc > 0x7F)
+        return MY_REPERTOIRE_UNICODE30;
+    }
+  }
+  return MY_REPERTOIRE_ASCII;
+}
+
+
+/*
+  Detect whether a character set is ASCII compatible.
+
+  Returns TRUE for:
+  
+  - all 8bit character sets whose Unicode mapping of 0x7B is '{'
+    (ignores swe7 which maps 0x7B to "LATIN LETTER A WITH DIAERESIS")
+  
+  - all multi-byte character sets having mbminlen == 1
+    (ignores ucs2 whose mbminlen is 2)
+  
+  TODO:
+  
+  When merging to 5.2, this function should be changed
+  to check a new flag MY_CS_NONASCII, 
+  
+     return (cs->flag & MY_CS_NONASCII) ? 0 : 1;
+  
+  This flag was previously added into 5.2 under terms
+  of WL#3759 "Optimize identifier conversion in client-server protocol"
+  especially to mark character sets not compatible with ASCII.
+  
+  We won't backport this flag to 5.0 or 5.1.
+  This function is Ok for 5.0 and 5.1, because we're not going
+  to introduce new tricky character sets between 5.0 and 5.2.
+*/
+my_bool
+my_charset_is_ascii_based(CHARSET_INFO *cs)
+{
+  return 
+    (cs->mbmaxlen == 1 && cs->tab_to_uni && cs->tab_to_uni['{'] == '{') ||
+    (cs->mbminlen == 1 && cs->mbmaxlen > 1);
+}
+
+
+/*
+  Detect if a character set is 8bit,
+  and it is pure ascii, i.e. doesn't have
+  characters outside U+0000..U+007F
+  This functions is shared between "conf_to_src"
+  and dynamic charsets loader in "mysqld".
+*/
+my_bool
+my_charset_is_8bit_pure_ascii(CHARSET_INFO *cs)
+{
+  size_t code;
+  if (!cs->tab_to_uni)
+    return 0;
+  for (code= 0; code < 256; code++)
+  {
+    if (cs->tab_to_uni[code] > 0x7F)
+      return 0;
+  }
+  return 1;
+}