MDEV-6661 PI() does not work well in UCS2/UTF16/UTF32 context

MDEV-6666 Malformed result for CONCAT(utf8_column, binary_string) Item_static_string_func::safe_charset_converter() and Item_hex_string::safe_charset_converter() did not handle character sets with mbminlen>1 properly, as well as did not handle conversion from binary to multi-byte well. Introducing Item::const_charset_converter(), to reuse it in a number of Item_*::safe_charset_converter().
2025-01-16 03:52:35 +01:00 · 2014-09-01 20:57:32 +04:00 · 2014-09-01 20:57:32 +04:00 · 1427e1db99
commit 1427e1db99
parent 18b307a7d2
16 changed files with 433 additions and 189 deletions
--- a/mysql-test/r/ctype_ucs.result
+++ b/mysql-test/r/ctype_ucs.result
@ -5323,3 +5323,15 @@ DROP TABLE t1;
 #
 # End of 5.6 tests
 #
+#
+# Start of 10.0 tests
+#
+#
+# MDEV-6661 PI() does not work well in UCS2/UTF16/UTF32 context
+#
+SELECT CONCAT(CONVERT('pi=' USING ucs2),PI()) AS PI;
+PI
+pi=3.141593
+#
+# End of 10.0 tests
+#
--- a/mysql-test/r/ctype_utf16.result
+++ b/mysql-test/r/ctype_utf16.result
@ -2078,3 +2078,45 @@ DFFFFFDFFFFF9CFFFF9DFFFF9EFFFF
 #
 # End of 5.6 tests
 #
+#
+# Start of 10.0 tests
+#
+#
+# MDEV-6661 PI() does not work well in UCS2/UTF16/UTF32 context
+#
+SELECT CONCAT(CONVERT('pi=' USING utf16),PI()) AS PI;
+PI
+pi=3.141593
+#
+# MDEV-6666 Malformed result for CONCAT(utf8_column, binary_string)
+#
+SET NAMES utf8mb4;
+CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET utf16);
+INSERT INTO t1 VALUES ('a');
+SELECT CONCAT(a,0xD800) FROM t1;
+ERROR HY000: Invalid utf16 character string: 'D800'
+SELECT CONCAT(a,0xD800DC00) FROM t1;
+CONCAT(a,0xD800DC00)
+a𐀀
+SELECT CONCAT(a,0x00FF) FROM t1;
+CONCAT(a,0x00FF)
+aÿ
+DROP TABLE t1;
+SELECT CONCAT(_utf16'a' COLLATE utf16_unicode_ci, _binary 0xD800);
+ERROR HY000: Invalid utf16 character string: 'D800'
+PREPARE stmt FROM "SELECT CONCAT(_utf16'a' COLLATE utf16_unicode_ci, ?)";
+SET @arg00=_binary 0xD800;
+EXECUTE stmt USING @arg00;
+ERROR HY000: Invalid utf16 character string: 'D800'
+SET @arg00=_binary 0xD800DC00;
+EXECUTE stmt USING @arg00;
+CONCAT(_utf16'a' COLLATE utf16_unicode_ci, ?)
+a𐀀
+SET @arg00=_binary 0x00FF;
+EXECUTE stmt USING @arg00;
+CONCAT(_utf16'a' COLLATE utf16_unicode_ci, ?)
+aÿ
+DEALLOCATE PREPARE stmt;
+#
+# End of 10.0 tests
+#
--- a/mysql-test/r/ctype_utf32.result
+++ b/mysql-test/r/ctype_utf32.result
@ -2164,3 +2164,45 @@ DFFFFFDFFFFF9CFFFF9DFFFF9EFFFF
 #
 # End of 5.6 tests
 #
+#
+# Start of 10.0 tests
+#
+#
+# MDEV-6661 PI() does not work well in UCS2/UTF16/UTF32 context
+#
+SELECT CONCAT(CONVERT('pi=' USING utf32),PI()) AS PI;
+PI
+pi=3.141593
+#
+# MDEV-6666 Malformed result for CONCAT(utf8_column, binary_string)
+#
+SET NAMES utf8mb4;
+CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET utf32);
+INSERT INTO t1 VALUES ('a');
+SELECT CONCAT(a,0x20FFFF) FROM t1;
+ERROR HY000: Invalid utf32 character string: '0020FF'
+SELECT CONCAT(a,0x010000) FROM t1;
+CONCAT(a,0x010000)
+a𐀀
+SELECT CONCAT(a,0x00FF) FROM t1;
+CONCAT(a,0x00FF)
+aÿ
+DROP TABLE t1;
+SELECT CONCAT(_utf32'a' COLLATE utf32_unicode_ci, _binary 0x20FFFF);
+ERROR HY000: Invalid utf32 character string: '0020FF'
+PREPARE stmt FROM "SELECT CONCAT(_utf32'a' COLLATE utf32_unicode_ci, ?)";
+SET @arg00=_binary 0x20FFFF;
+EXECUTE stmt USING @arg00;
+ERROR HY000: Invalid utf32 character string: '0020FF'
+SET @arg00=_binary 0x010000;
+EXECUTE stmt USING @arg00;
+CONCAT(_utf32'a' COLLATE utf32_unicode_ci, ?)
+a𐀀
+SET @arg00=_binary 0x00FF;
+EXECUTE stmt USING @arg00;
+CONCAT(_utf32'a' COLLATE utf32_unicode_ci, ?)
+aÿ
+DEALLOCATE PREPARE stmt;
+#
+# End of 10.0 tests
+#
--- a/mysql-test/r/ctype_utf8.result
+++ b/mysql-test/r/ctype_utf8.result
@ -5933,3 +5933,27 @@ set max_sort_length=default;
 #
 # End of 5.6 tests
 #
+#
+# Start of 10.0 tests
+#
+#
+# MDEV-6666 Malformed result for CONCAT(utf8_column, binary_string)
+#
+CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET utf8);
+INSERT INTO t1 VALUES ('a');
+SELECT CONCAT(a,0xFF) FROM t1;
+ERROR HY000: Invalid utf8 character string: 'FF'
+SELECT CONCAT(a,0xC3BF) FROM t1;
+CONCAT(a,0xC3BF)
+aÿ
+DROP TABLE t1;
+SELECT CONCAT('a' COLLATE utf8_unicode_ci, _binary 0xFF);
+ERROR HY000: Invalid utf8 character string: 'FF'
+PREPARE stmt FROM "SELECT CONCAT('a' COLLATE utf8_unicode_ci, ?)";
+SET @arg00=_binary 0xFF;
+EXECUTE stmt USING @arg00;
+ERROR HY000: Invalid utf8 character string: 'FF'
+DEALLOCATE PREPARE stmt;
+#
+# End of 10.0 tests
+#
--- a/mysql-test/t/ctype_ucs.test
+++ b/mysql-test/t/ctype_ucs.test
@ -891,3 +891,17 @@ DROP TABLE t1;
 --echo #
 --echo # End of 5.6 tests
 --echo #
+
+
+--echo #
+--echo # Start of 10.0 tests
+--echo #
+
+--echo #
+--echo # MDEV-6661 PI() does not work well in UCS2/UTF16/UTF32 context
+--echo #
+SELECT CONCAT(CONVERT('pi=' USING ucs2),PI()) AS PI;
+
+--echo #
+--echo # End of 10.0 tests
+--echo #
--- a/mysql-test/t/ctype_utf16.test
+++ b/mysql-test/t/ctype_utf16.test
@ -816,3 +816,39 @@ set collation_connection=utf16_bin;
 --echo # End of 5.6 tests
 --echo #

+--echo #
+--echo # Start of 10.0 tests
+--echo #
+
+--echo #
+--echo # MDEV-6661 PI() does not work well in UCS2/UTF16/UTF32 context
+--echo #
+SELECT CONCAT(CONVERT('pi=' USING utf16),PI()) AS PI;
+
+--echo #
+--echo # MDEV-6666 Malformed result for CONCAT(utf8_column, binary_string)
+--echo #
+
+SET NAMES utf8mb4;
+CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET utf16);
+INSERT INTO t1 VALUES ('a');
+--error ER_INVALID_CHARACTER_STRING
+SELECT CONCAT(a,0xD800) FROM t1;
+SELECT CONCAT(a,0xD800DC00) FROM t1;
+SELECT CONCAT(a,0x00FF) FROM t1;
+DROP TABLE t1;
+--error ER_INVALID_CHARACTER_STRING
+SELECT CONCAT(_utf16'a' COLLATE utf16_unicode_ci, _binary 0xD800);
+PREPARE stmt FROM "SELECT CONCAT(_utf16'a' COLLATE utf16_unicode_ci, ?)";
+SET @arg00=_binary 0xD800;
+--error ER_INVALID_CHARACTER_STRING
+EXECUTE stmt USING @arg00;
+SET @arg00=_binary 0xD800DC00;
+EXECUTE stmt USING @arg00;
+SET @arg00=_binary 0x00FF;
+EXECUTE stmt USING @arg00;
+DEALLOCATE PREPARE stmt;
+
+--echo #
+--echo # End of 10.0 tests
+--echo #
--- a/mysql-test/t/ctype_utf32.test
+++ b/mysql-test/t/ctype_utf32.test
@ -1,4 +1,5 @@
 -- source include/have_utf32.inc
+-- source include/have_utf8mb4.inc

 SET TIME_ZONE = '+03:00';

@ -918,3 +919,40 @@ set collation_connection=utf32_bin;
 --echo #


+--echo #
+--echo # Start of 10.0 tests
+--echo #
+
+--echo #
+--echo # MDEV-6661 PI() does not work well in UCS2/UTF16/UTF32 context
+--echo #
+SELECT CONCAT(CONVERT('pi=' USING utf32),PI()) AS PI;
+
+--echo #
+--echo # MDEV-6666 Malformed result for CONCAT(utf8_column, binary_string)
+--echo #
+
+SET NAMES utf8mb4;
+CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET utf32);
+INSERT INTO t1 VALUES ('a');
+--error ER_INVALID_CHARACTER_STRING
+SELECT CONCAT(a,0x20FFFF) FROM t1;
+SELECT CONCAT(a,0x010000) FROM t1;
+SELECT CONCAT(a,0x00FF) FROM t1;
+DROP TABLE t1;
+--error ER_INVALID_CHARACTER_STRING
+SELECT CONCAT(_utf32'a' COLLATE utf32_unicode_ci, _binary 0x20FFFF);
+PREPARE stmt FROM "SELECT CONCAT(_utf32'a' COLLATE utf32_unicode_ci, ?)";
+SET @arg00=_binary 0x20FFFF;
+--error ER_INVALID_CHARACTER_STRING
+EXECUTE stmt USING @arg00;
+SET @arg00=_binary 0x010000;
+EXECUTE stmt USING @arg00;
+SET @arg00=_binary 0x00FF;
+EXECUTE stmt USING @arg00;
+DEALLOCATE PREPARE stmt;
+
+--echo #
+--echo # End of 10.0 tests
+--echo #
+
--- a/mysql-test/t/ctype_utf8.test
+++ b/mysql-test/t/ctype_utf8.test
@ -1654,3 +1654,28 @@ set max_sort_length=default;
 --echo # End of 5.6 tests
 --echo #

+--echo #
+--echo # Start of 10.0 tests
+--echo #
+
+--echo #
+--echo # MDEV-6666 Malformed result for CONCAT(utf8_column, binary_string)
+--echo #
+
+CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET utf8);
+INSERT INTO t1 VALUES ('a');
+--error ER_INVALID_CHARACTER_STRING
+SELECT CONCAT(a,0xFF) FROM t1;
+SELECT CONCAT(a,0xC3BF) FROM t1;
+DROP TABLE t1;
+--error ER_INVALID_CHARACTER_STRING
+SELECT CONCAT('a' COLLATE utf8_unicode_ci, _binary 0xFF);
+PREPARE stmt FROM "SELECT CONCAT('a' COLLATE utf8_unicode_ci, ?)";
+SET @arg00=_binary 0xFF;
+--error ER_INVALID_CHARACTER_STRING
+EXECUTE stmt USING @arg00;
+DEALLOCATE PREPARE stmt;
+
+--echo #
+--echo # End of 10.0 tests
+--echo #
--- a/sql/item.cc
+++ b/sql/item.cc
@ -1166,6 +1166,8 @@ bool Item::eq(const Item *item, bool binary_cmp) const

 Item *Item::safe_charset_converter(CHARSET_INFO *tocs)
 {
+  if (!needs_charset_converter(tocs))
+    return this;
  Item_func_conv_charset *conv= new Item_func_conv_charset(this, tocs, 1);
  return conv->safe ? conv : NULL;
 }
@ -1192,77 +1194,55 @@ Item *Item_num::safe_charset_converter(CHARSET_INFO *tocs)
  if (!(tocs->state & MY_CS_NONASCII))
    return this;
  
-  Item_string *conv;
-  uint conv_errors;
-  char buf[64], buf2[64];
-  String tmp(buf, sizeof(buf), &my_charset_bin);
-  String cstr(buf2, sizeof(buf2), &my_charset_bin);
-  String *ostr= val_str(&tmp);
-  char *ptr;
-  cstr.copy(ostr->ptr(), ostr->length(), ostr->charset(), tocs, &conv_errors);
-  if (conv_errors || !(conv= new Item_string(cstr.ptr(), cstr.length(),
-                                             cstr.charset(),
-                                             collation.derivation)))
-  {
-    /*
-      Safe conversion is not possible (or EOM).
-      We could not convert a string into the requested character set
-      without data loss. The target charset does not cover all the
-      characters from the string. Operation cannot be done correctly.
-    */
-    return NULL;
-  }
-  if (!(ptr= current_thd->strmake(cstr.ptr(), cstr.length())))
-    return NULL;
-  conv->str_value.set(ptr, cstr.length(), cstr.charset());
-  /* Ensure that no one is going to change the result string */
-  conv->str_value.mark_as_const();
-  conv->fix_char_length(max_char_length());
+  Item *conv;
+  if ((conv= const_charset_converter(tocs, true)))
+    conv->fix_char_length(max_char_length());
  return conv;
 }


-Item *Item_static_float_func::safe_charset_converter(CHARSET_INFO *tocs)
-{
-  Item_string *conv;
-  char buf[64];
-  String *s, tmp(buf, sizeof(buf), &my_charset_bin);
-  s= val_str(&tmp);
-  if ((conv= new Item_static_string_func(func_name, s->ptr(), s->length(),
-                                         s->charset())))
-  {
-    conv->str_value.copy();
-    conv->str_value.mark_as_const();
-  }
-  return conv;
-}
-
-
-Item *Item_string::safe_charset_converter(CHARSET_INFO *tocs)
-{
-  return charset_converter(tocs, true);
-}
-
-
 /**
-  Convert a string item into the requested character set.
+  Create character set converter for constant items
+  using Item_null, Item_string or Item_static_string_func.

  @param tocs       Character set to to convert the string to.
  @param lossless   Whether data loss is acceptable.
-
-  @return A new item representing the converted string.
+  @param func_name  Function name, or NULL.
+  
+  @return           this, if conversion is not needed,
+                    NULL, if safe conversion is not possible, or
+                    a new item representing the converted constant.
 */
-Item *Item_string::charset_converter(CHARSET_INFO *tocs, bool lossless)
+Item *Item::const_charset_converter(CHARSET_INFO *tocs,
+                                    bool lossless,
+                                    const char *func_name)
 {
-  Item_string *conv;
+  DBUG_ASSERT(const_item());
+  DBUG_ASSERT(fixed);
+  StringBuffer<64>tmp;
+  String *s= val_str(&tmp);
+  if (!s)
+    return new Item_null((char *) func_name, tocs);
+
+  if (!needs_charset_converter(s->length(), tocs))
+  {
+    if (collation.collation == &my_charset_bin && tocs != &my_charset_bin &&
+        !this->check_well_formed_result(s, true))
+      return NULL;
+    return this;
+  }
+
  uint conv_errors;
-  char *ptr;
-  String tmp, cstr, *ostr= val_str(&tmp);
-  cstr.copy(ostr->ptr(), ostr->length(), ostr->charset(), tocs, &conv_errors);
-  conv_errors= lossless && conv_errors;
-  if (conv_errors || !(conv= new Item_string(cstr.ptr(), cstr.length(),
-                                             cstr.charset(),
-                                             collation.derivation)))
+  Item_string *conv= func_name ?
+                     new Item_static_string_func(func_name,
+                                                 s, tocs, &conv_errors,
+                                                 collation.derivation,
+                                                 collation.repertoire) :
+                     new Item_string(s, tocs, &conv_errors,
+                                     collation.derivation,
+                                     collation.repertoire);
+
+  if (!conv || (conv_errors && lossless))
  {
    /*
      Safe conversion is not possible (or EOM).
@ -1272,56 +1252,44 @@ Item *Item_string::charset_converter(CHARSET_INFO *tocs, bool lossless)
    */
    return NULL;
  }
-  if (!(ptr= current_thd->strmake(cstr.ptr(), cstr.length())))
+  if (s->charset() == &my_charset_bin && tocs != &my_charset_bin &&
+      !conv->check_well_formed_result(true))
    return NULL;
-  conv->str_value.set(ptr, cstr.length(), cstr.charset());
-  /* Ensure that no one is going to change the result string */
-  conv->str_value.mark_as_const();
  return conv;
 }

+
 Item *Item_param::safe_charset_converter(CHARSET_INFO *tocs)
 {
+  /*
+    Return "this" if in prepare. result_type may change at execition time,
+    to it's possible that the converter will not be needed at all:
+
+    PREPARE stmt FROM 'SELECT * FROM t1 WHERE field = ?';
+    SET @@arg= 1;
+    EXECUTE stms USING @arg;
+
+    result_type is STRING_RESULT at prepare time,
+    and INT_RESULT at execution time.
+  */
  if (const_item())
  {
    uint cnv_errors;
    String *ostr= val_str(&cnvstr);
+    if (!needs_charset_converter(tocs))
+      return this;
    cnvitem->str_value.copy(ostr->ptr(), ostr->length(),
                            ostr->charset(), tocs, &cnv_errors);
    if (cnv_errors)
       return NULL;
+    if (ostr->charset() == &my_charset_bin && tocs != &my_charset_bin &&
+        !cnvitem->check_well_formed_result(&cnvitem->str_value, true))
+      return NULL;
    cnvitem->str_value.mark_as_const();
    cnvitem->max_length= cnvitem->str_value.numchars() * tocs->mbmaxlen;
    return cnvitem;
  }
-  return Item::safe_charset_converter(tocs);
-}
-
-
-Item *Item_static_string_func::safe_charset_converter(CHARSET_INFO *tocs)
-{
-  Item_string *conv;
-  uint conv_errors;
-  String tmp, cstr, *ostr= val_str(&tmp);
-  cstr.copy(ostr->ptr(), ostr->length(), ostr->charset(), tocs, &conv_errors);
-  if (conv_errors ||
-      !(conv= new Item_static_string_func(func_name,
-                                          cstr.ptr(), cstr.length(),
-                                          cstr.charset(),
-                                          collation.derivation)))
-  {
-    /*
-      Safe conversion is not possible (or EOM).
-      We could not convert a string into the requested character set
-      without data loss. The target charset does not cover all the
-      characters from the string. Operation cannot be done correctly.
-    */
-    return NULL;
-  }
-  conv->str_value.copy();
-  /* Ensure that no one is going to change the result string */
-  conv->str_value.mark_as_const();
-  return conv;
+  return this;
 }


@ -2203,33 +2171,10 @@ bool agg_item_set_converter(DTCollation &coll, const char *fname,

  for (i= 0, arg= args; i < nargs; i++, arg+= item_sep)
  {
-    Item* conv;
-    uint32 dummy_offset;
-    if (!String::needs_conversion(1, (*arg)->collation.collation,
-                                  coll.collation,
-                                  &dummy_offset))
+    Item* conv= (*arg)->safe_charset_converter(coll.collation);
+    if (conv == *arg)
      continue;
-
-    /*
-      No needs to add converter if an "arg" is NUMERIC or DATETIME
-      value (which is pure ASCII) and at the same time target DTCollation
-      is ASCII-compatible. For example, no needs to rewrite:
-        SELECT * FROM t1 WHERE datetime_field = '2010-01-01';
-      to
-        SELECT * FROM t1 WHERE CONVERT(datetime_field USING cs) = '2010-01-01';
-      
-      TODO: avoid conversion of any values with
-      repertoire ASCII and 7bit-ASCII-compatible,
-      not only numeric/datetime origin.
-    */
-    if ((*arg)->collation.derivation == DERIVATION_NUMERIC &&
-        (*arg)->collation.repertoire == MY_REPERTOIRE_ASCII &&
-        !((*arg)->collation.collation->state & MY_CS_NONASCII) &&
-        !(coll.collation->state & MY_CS_NONASCII))
-      continue;
-
-    if (!(conv= (*arg)->safe_charset_converter(coll.collation)) &&
-        ((*arg)->collation.repertoire == MY_REPERTOIRE_ASCII))
+    if (!conv && ((*arg)->collation.repertoire == MY_REPERTOIRE_ASCII))
      conv= new Item_func_conv_charset(*arg, coll.collation, 1);

    if (!conv)
@ -3015,7 +2960,7 @@ String *Item_float::val_str(String *str)
 {
  // following assert is redundant, because fixed=1 assigned in constructor
  DBUG_ASSERT(fixed == 1);
-  str->set_real(value,decimals,&my_charset_bin);
+  str->set_real(value, decimals, &my_charset_numeric);
  return str;
 }

@ -5375,13 +5320,6 @@ bool Item_field::vcol_in_partition_func_processor(uchar *int_arg)
 }


-Item *Item_field::safe_charset_converter(CHARSET_INFO *tocs)
-{
-  no_const_subst= 1;
-  return Item::safe_charset_converter(tocs);
-}
-
-
 void Item_field::cleanup()
 {
  DBUG_ENTER("Item_field::cleanup");
@ -5687,10 +5625,7 @@ String *Item::check_well_formed_result(String *str, bool send_error)
 {
  /* Check whether we got a well-formed string */
  CHARSET_INFO *cs= str->charset();
-  int well_formed_error;
-  uint wlen= cs->cset->well_formed_len(cs,
-                                       str->ptr(), str->ptr() + str->length(),
-                                       str->length(), &well_formed_error);
+  uint wlen= str->well_formed_length();
  if (wlen < str->length())
  {
    THD *thd= current_thd;
@ -6441,19 +6376,6 @@ bool Item_hex_constant::eq(const Item *arg, bool binary_cmp) const
 }


-Item *Item_hex_constant::safe_charset_converter(CHARSET_INFO *tocs)
-{
-  Item_string *conv;
-  String tmp, *str= val_str(&tmp);
-
-  if (!(conv= new Item_string(str->ptr(), str->length(), tocs)))
-    return NULL;
-  conv->str_value.copy();
-  conv->str_value.mark_as_const();
-  return conv;
-}
-
-
 /*
  bin item.
  In string context this is a binary string.
--- a/sql/item.h
+++ b/sql/item.h
@ -1463,6 +1463,48 @@ public:
  virtual Item *expr_cache_insert_transformer(uchar *thd_arg) { return this; }
  virtual bool expr_cache_is_needed(THD *) { return FALSE; }
  virtual Item *safe_charset_converter(CHARSET_INFO *tocs);
+  bool needs_charset_converter(uint32 length, CHARSET_INFO *tocs)
+  {
+    /*
+      This will return "true" if conversion happens:
+      - between two non-binary different character sets
+      - from "binary" to "unsafe" character set
+        (those that can have non-well-formed string)
+      - from "binary" to UCS2-alike character set with mbminlen>1,
+        when prefix left-padding is needed for an incomplete character:
+        binary 0xFF -> ucs2 0x00FF)
+    */
+    if (!String::needs_conversion_on_storage(length,
+                                             collation.collation, tocs))
+      return false;
+    /*
+      No needs to add converter if an "arg" is NUMERIC or DATETIME
+      value (which is pure ASCII) and at the same time target DTCollation
+      is ASCII-compatible. For example, no needs to rewrite:
+        SELECT * FROM t1 WHERE datetime_field = '2010-01-01';
+      to
+        SELECT * FROM t1 WHERE CONVERT(datetime_field USING cs) = '2010-01-01';
+      
+      TODO: avoid conversion of any values with
+      repertoire ASCII and 7bit-ASCII-compatible,
+      not only numeric/datetime origin.
+    */
+    if (collation.derivation == DERIVATION_NUMERIC &&
+        collation.repertoire == MY_REPERTOIRE_ASCII &&
+        !(collation.collation->state & MY_CS_NONASCII) &&
+        !(tocs->state & MY_CS_NONASCII))
+      return false;
+    return true;
+  }
+  bool needs_charset_converter(CHARSET_INFO *tocs)
+  {
+    // Pass 1 as length to force conversion if tocs->mbminlen>1.
+    return needs_charset_converter(1, tocs);
+  }
+  Item *const_charset_converter(CHARSET_INFO *tocs, bool lossless,
+                                const char *func_name);
+  Item *const_charset_converter(CHARSET_INFO *tocs, bool lossless)
+  { return const_charset_converter(tocs, lossless, NULL); }
  void delete_self()
  {
    cleanup();
@ -2189,7 +2231,6 @@ public:
  Item *replace_equal_field(uchar *arg);
  inline uint32 max_disp_length() { return field->max_display_length(); }
  Item_field *field_for_view_update() { return this; }
-  Item *safe_charset_converter(CHARSET_INFO *tocs);
  int fix_outer_field(THD *thd, Field **field, Item **reference);
  virtual Item *update_value_transformer(uchar *select_arg);
  virtual void print(String *str, enum_query_type query_type);
@ -2213,13 +2254,13 @@ public:
 class Item_null :public Item_basic_constant
 {
 public:
-  Item_null(char *name_par=0)
+  Item_null(char *name_par=0, CHARSET_INFO *cs= &my_charset_bin)
  {
    maybe_null= null_value= TRUE;
    max_length= 0;
    name= name_par ? name_par : (char*) "NULL";
    fixed= 1;
-    collation.set(&my_charset_bin, DERIVATION_IGNORABLE);
+    collation.set(cs, DERIVATION_IGNORABLE);
  }
  enum Type type() const { return NULL_ITEM; }
  bool eq(const Item *item, bool binary_cmp) const;
@ -2594,7 +2635,10 @@ public:
    str->append(func_name);
  }

-  Item *safe_charset_converter(CHARSET_INFO *tocs);
+  Item *safe_charset_converter(CHARSET_INFO *tocs)
+  {
+    return const_charset_converter(tocs, true, func_name);
+  }
 };


@ -2621,6 +2665,19 @@ public:
    // it is constant => can be used without fix_fields (and frequently used)
    fixed= 1;
  }
+  Item_string(const String *str, CHARSET_INFO *tocs, uint *conv_errors,
+              Derivation dv, uint repertoire)
+    :m_cs_specified(false)
+  {
+    if (str_value.copy(str, tocs, conv_errors))
+      str_value.set("", 0, tocs); // EOM ?
+    str_value.mark_as_const();
+    collation.set(tocs, dv, repertoire);
+    fix_char_length(str_value.numchars());
+    set_name(str_value.ptr(), str_value.length(), tocs);
+    decimals= NOT_FIXED_DEC;
+    fixed= 1;
+  }
  /* Just create an item and do not fill string representation */
  Item_string(CHARSET_INFO *cs, Derivation dv= DERIVATION_COERCIBLE)
    : m_cs_specified(FALSE)
@ -2678,8 +2735,10 @@ public:
    return new Item_string(name, str_value.ptr(), 
    			   str_value.length(), collation.collation);
  }
-  Item *safe_charset_converter(CHARSET_INFO *tocs);
-  Item *charset_converter(CHARSET_INFO *tocs, bool lossless);
+  Item *safe_charset_converter(CHARSET_INFO *tocs)
+  {
+    return const_charset_converter(tocs, true);
+  }
  inline void append(char *str, uint length)
  {
    str_value.append(str, length);
@ -2728,6 +2787,9 @@ public:
    m_cs_specified= cs_specified;
  }

+  String *check_well_formed_result(bool send_error)
+  { return Item::check_well_formed_result(&str_value, send_error); }
+
 private:
  bool m_cs_specified;
 };
@ -2749,7 +2811,17 @@ public:
                          Derivation dv= DERIVATION_COERCIBLE)
    :Item_string(NullS, str, length, cs, dv), func_name(name_par)
  {}
-  Item *safe_charset_converter(CHARSET_INFO *tocs);
+  Item_static_string_func(const char *name_par,
+                          const String *str,
+                          CHARSET_INFO *tocs, uint *conv_errors,
+                          Derivation dv, uint repertoire)
+    :Item_string(str, tocs, conv_errors, dv, repertoire),
+     func_name(name_par)
+  {}
+  Item *safe_charset_converter(CHARSET_INFO *tocs)
+  {
+    return const_charset_converter(tocs, true, func_name);
+  }

  virtual inline void print(String *str, enum_query_type query_type)
  {
@ -2852,7 +2924,10 @@ public:
  enum Type type() const { return VARBIN_ITEM; }
  enum Item_result result_type () const { return STRING_RESULT; }
  enum_field_types field_type() const { return MYSQL_TYPE_VARCHAR; }
-  virtual Item *safe_charset_converter(CHARSET_INFO *tocs);
+  virtual Item *safe_charset_converter(CHARSET_INFO *tocs)
+  {
+    return const_charset_converter(tocs, true);
+  }
  bool check_partition_func_processor(uchar *int_arg) {return FALSE;}
  bool check_vcol_func_processor(uchar *arg) { return FALSE;}
  bool basic_const_item() const { return 1; }
--- a/sql/item_strfunc.cc
+++ b/sql/item_strfunc.cc
@ -2328,32 +2328,6 @@ void Item_func_decode::crypto_transform(String *res)
 }


-Item *Item_func_sysconst::safe_charset_converter(CHARSET_INFO *tocs)
-{
-  Item_string *conv;
-  uint conv_errors;
-  String tmp, cstr, *ostr= val_str(&tmp);
-  if (null_value)
-  {
-    Item *null_item= new Item_null((char *) fully_qualified_func_name());
-    null_item->collation.set (tocs);
-    return null_item;
-  }
-  cstr.copy(ostr->ptr(), ostr->length(), ostr->charset(), tocs, &conv_errors);
-  if (conv_errors ||
-      !(conv= new Item_static_string_func(fully_qualified_func_name(),
-                                          cstr.ptr(), cstr.length(),
-                                          cstr.charset(),
-                                          collation.derivation)))
-  {
-    return NULL;
-  }
-  conv->str_value.copy();
-  conv->str_value.mark_as_const();
-  return conv;
-}
-
-
 String *Item_func_database::val_str(String *str)
 {
  DBUG_ASSERT(fixed == 1);
--- a/sql/item_strfunc.h
+++ b/sql/item_strfunc.h
@ -542,7 +542,10 @@ class Item_func_sysconst :public Item_str_func
 public:
  Item_func_sysconst()
  { collation.set(system_charset_info,DERIVATION_SYSCONST); }
-  Item *safe_charset_converter(CHARSET_INFO *tocs);
+  Item *safe_charset_converter(CHARSET_INFO *tocs)
+  {
+    return const_charset_converter(tocs, true, fully_qualified_func_name());
+  }
  /*
    Used to create correct Item name in new converted item in
    safe_charset_converter, return string representation of this function
--- a/sql/item_xmlfunc.cc
+++ b/sql/item_xmlfunc.cc
@ -532,6 +532,32 @@ public:
 };


+/**
+  A string whose value may be changed during execution.
+*/
+class Item_string_xml_non_const: public Item_string
+{
+public:
+  Item_string_xml_non_const(const char *str, uint length, CHARSET_INFO *cs)
+    :Item_string(str, length, cs)
+  { }
+  bool const_item() const { return false ; }
+  bool basic_const_item() const { return false; }
+  void set_value(const char *str, uint length, CHARSET_INFO *cs)
+  {
+    str_value.set(str, length, cs);
+  }
+  Item *safe_charset_converter(CHARSET_INFO *tocs)
+  {
+    /*
+      Item_string::safe_charset_converter() does not accept non-constants.
+      Note, conversion is not really needed here anyway.
+    */
+    return this;
+  }
+};
+
+
 class Item_nodeset_to_const_comparator :public Item_bool_func
 {
  String *pxml;
@ -550,7 +576,8 @@ public:
  longlong val_int()
  {
    Item_func *comp= (Item_func*)args[1];
-    Item_string *fake= (Item_string*)(comp->arguments()[0]);
+    Item_string_xml_non_const *fake=
+      (Item_string_xml_non_const*)(comp->arguments()[0]);
    String *res= args[0]->val_nodeset(&tmp_nodeset);
    MY_XPATH_FLT *fltbeg= (MY_XPATH_FLT*) res->ptr();
    MY_XPATH_FLT *fltend= (MY_XPATH_FLT*) (res->ptr() + res->length());
@ -568,8 +595,8 @@ public:
        if ((node->parent == flt->num) &&
            (node->type == MY_XML_NODE_TEXT))
        {
-          fake->str_value.set(node->beg, node->end - node->beg,
-                              collation.collation);
+          fake->set_value(node->beg, node->end - node->beg,
+                          collation.collation);
          if (args[1]->val_int())
            return 1;
        }
@ -956,14 +983,12 @@ static Item *create_comparator(MY_XPATH *xpath,
  {
    /*
     Compare a node set to a scalar value.
-     We just create a fake Item_string() argument,
+     We just create a fake Item_string_xml_non_const() argument,
     which will be filled to the partular value
     in a loop through all of the nodes in the node set.
    */

-    Item_string *fake= new Item_string("", 0, xpath->cs);
-    /* Don't cache fake because its value will be changed during comparison.*/
-    fake->set_used_tables(RAND_TABLE_BIT);
+    Item_string *fake= new Item_string_xml_non_const("", 0, xpath->cs);
    Item_nodeset_func *nodeset;
    Item *scalar, *comp;
    if (a->type() == Item::XPATH_NODESET)
--- a/sql/sql_get_diagnostics.cc
+++ b/sql/sql_get_diagnostics.cc
@ -267,9 +267,11 @@ Condition_information_item::make_utf8_string_item(THD *thd, const String *str)
  CHARSET_INFO *to_cs= &my_charset_utf8_general_ci;
  /* If a charset was not set, assume that no conversion is needed. */
  CHARSET_INFO *from_cs= str->charset() ? str->charset() : to_cs;
-  Item_string *item= new Item_string(str->ptr(), str->length(), from_cs);
+  String tmp(str->ptr(), str->length(), from_cs);
  /* If necessary, convert the string (ignoring errors), then copy it over. */
-  return item ? item->charset_converter(to_cs, false) : NULL;
+  uint conv_errors;
+  return new Item_string(&tmp, to_cs, &conv_errors,
+                         DERIVATION_COERCIBLE, MY_REPERTOIRE_UNICODE30);
 }


--- a/sql/sql_string.h
+++ b/sql/sql_string.h
@ -351,6 +351,10 @@ public:
  bool set_or_copy_aligned(const char *s, uint32 arg_length, CHARSET_INFO *cs);
  bool copy(const char*s,uint32 arg_length, CHARSET_INFO *csfrom,
 	    CHARSET_INFO *csto, uint *errors);
+  bool copy(const String *str, CHARSET_INFO *tocs, uint *errors)
+  {
+    return copy(str->ptr(), str->length(), str->charset(), tocs, errors);
+  }
  void move(String &s)
  {
    free();
@ -517,6 +521,12 @@ public:
  {
    return (s->alloced && Ptr >= s->Ptr && Ptr < s->Ptr + s->str_length);
  }
+  uint well_formed_length() const
+  {
+    int dummy_error;
+    return charset()->cset->well_formed_len(charset(), ptr(), ptr() + length(),
+                                            length(), &dummy_error);
+  }
  bool is_ascii() const
  {
    if (length() == 0)
--- a/sql/sql_yacc.yy
+++ b/sql/sql_yacc.yy
@ -13432,7 +13432,7 @@ literal:
                                    str ? str->length() : 0,
                                    $1);
            if (!item_str ||
-                !item_str->check_well_formed_result(&item_str->str_value, TRUE))
+                !item_str->check_well_formed_result(true))
            {
              MYSQL_YYABORT;
            }
@ -13461,7 +13461,7 @@ literal:
                                    str ? str->length() : 0,
                                    $1);
            if (!item_str ||
-                !item_str->check_well_formed_result(&item_str->str_value, TRUE))
+                !item_str->check_well_formed_result(true))
            {
              MYSQL_YYABORT;
            }