Step#3 MDEV-27896 Wrong result upon COLLATE latin1_bin CHARACTER SET latin1 on the table or the database level

Splitting Lex_exact_charset_extended_collation_attrs_st into small components.

- Adding classes:
  * Lex_exact_charset
  * Lex_context_collation
  * Lex_exact_collation
  * Lex_extended_collation_st
  * Lex_extended_collation
  and moving pieces of the code from methods
  * merge_charset_clause_and_collate_clause()
  * merge_collate_clause_and_collate_clause()
  into smaller methods in the new classes.
  It's easier to read, handle and reuse the code this way.

- Moving static methods find_default_collation() and find_binary_collation()
  from Lex_exact_charset_extended_collation_attrs_st to non-static methods in
  Lex_exact_charset_opt_extended_collate, as now it's a better place for them.

- Using Lex_extended_collation_st in sql_yacc.yy to handle COLLATE clauses,
  to handle both context and extended collations
  (instead of the previous notation with NULL CHARSET_INFO pointer
   meaning DEFAULT, and not-NULL meaning an exact collation).
  This change will also help to add more context (UCA1400) collations soon.
  The old notation with CHARSET_INFO won't be enough.

- Adding LEX::set_names() and reusing it in two places in sql_yacc.yy

- Removing the opt_collate_or_default rule. It's was used only
  to handle the CONVERT TO related grammar. Had to add some code duplication,
  but it will be gone in one of the next commits.

This change will also soon help to add
Lex_extended_charset_extended_collation_attrs_st -
a new class to handle table and database level CHARACTER SET and COLLATE
clauses easier.
This commit is contained in:
Alexander Barkov 2022-05-23 11:05:33 +04:00
parent e7f635e2d2
commit 89adedcb9f
6 changed files with 775 additions and 301 deletions

View file

@ -5514,11 +5514,9 @@ public:
{
if (!charset)
return Lex_column_charset_collation_attrs();
return Lex_column_charset_collation_attrs(
charset,
flags & CONTEXT_COLLATION_FLAG ?
Lex_column_charset_collation_attrs_st::TYPE_COLLATE_CONTEXTUALLY_TYPED :
Lex_column_charset_collation_attrs_st::TYPE_CHARACTER_SET);
if (flags & CONTEXT_COLLATION_FLAG)
return Lex_column_charset_collation_attrs(Lex_context_collation(charset));
return Lex_column_charset_collation_attrs(Lex_exact_collation(charset));
}
};

View file

@ -21,49 +21,317 @@
#include "mysqld_error.h"
static void
raise_ER_CONFLICTING_DECLARATIONS(const char *clause1,
const char *name1,
const char *clause2,
const char *name2,
bool reverse_order)
{
if (!reverse_order)
my_error(ER_CONFLICTING_DECLARATIONS, MYF(0),
clause1, name1, clause2, name2);
else
my_error(ER_CONFLICTING_DECLARATIONS, MYF(0),
clause2, name2, clause1, name1);
}
bool Lex_exact_charset::raise_if_not_equal(const Lex_exact_charset &rhs) const
{
if (m_ci == rhs.m_ci)
return false;
my_error(ER_CONFLICTING_DECLARATIONS, MYF(0),
"CHARACTER SET ", m_ci->cs_name.str,
"CHARACTER SET ", rhs.m_ci->cs_name.str);
return true;
}
bool Lex_exact_charset::
raise_if_not_applicable(const Lex_exact_collation &cl) const
{
return Lex_exact_charset_opt_extended_collate(m_ci, false).
raise_if_not_applicable(cl);
}
bool Lex_exact_charset_opt_extended_collate::
raise_if_not_applicable(const Lex_exact_collation &cl) const
{
if (!my_charset_same(m_ci, cl.charset_info()))
{
my_error(ER_COLLATION_CHARSET_MISMATCH, MYF(0),
cl.charset_info()->coll_name.str, m_ci->cs_name.str);
return true;
}
return false;
}
bool
Lex_exact_collation::raise_if_not_equal(const Lex_exact_collation &cl) const
{
if (m_ci != cl.m_ci)
{
my_error(ER_CONFLICTING_DECLARATIONS, MYF(0),
"COLLATE ", m_ci->coll_name.str,
"COLLATE ", cl.m_ci->coll_name.str);
return true;
}
return false;
}
/*
Merge an exact collation and a contexual collation.
@param cl - The contextual collation to merge to "this".
@param reverse_order - If the contextual collation is on the left side
Use reverse_order as follows:
false: COLLATE latin1_swedish_ci COLLATE DEFAULT
true: COLLATE DEFAULT COLLATE latin1_swedish_ci
*/
bool
Lex_exact_collation::
raise_if_conflicts_with_context_collation(const Lex_context_collation &cl,
bool reverse_order) const
{
if (cl.is_contextually_typed_collate_default() &&
!(m_ci->state & MY_CS_PRIMARY))
{
raise_ER_CONFLICTING_DECLARATIONS("COLLATE ", m_ci->coll_name.str,
"COLLATE ", "DEFAULT", reverse_order);
return true;
}
if (cl.is_contextually_typed_binary_style() &&
!(m_ci->state & MY_CS_BINSORT))
{
raise_ER_CONFLICTING_DECLARATIONS("COLLATE ", m_ci->coll_name.str,
"", "BINARY", reverse_order);
return true;
}
return false;
}
bool
Lex_context_collation::raise_if_not_equal(const Lex_context_collation &cl) const
{
/*
Only equal context collations are possible here so far:
- Column grammar only supports BINARY, but does not support COLLATE DEFAULT
- DB/Table grammar only support COLLATE DEFAULT
But we'll have different collations here - uca140 is coming soon.
*/
DBUG_ASSERT(m_ci == cl.m_ci);
return false;
}
/*
Resolve a context collation to the character set (when the former gets known):
CREATE TABLE t1 (a CHAR(10) BINARY) CHARACTER SET latin1;
CREATE DATABASE db1 COLLATE DEFAULT CHARACTER SET latin1;
*/
bool Lex_exact_charset_opt_extended_collate::
merge_context_collation_override(const Lex_context_collation &cl)
{
DBUG_ASSERT(m_ci);
// CHAR(10) BINARY
if (cl.is_contextually_typed_binary_style())
{
CHARSET_INFO *ci= find_bin_collation();
if (!ci)
return true;
m_ci= ci;
m_with_collate= true;
return false;
}
// COLLATE DEFAULT
if (cl.is_contextually_typed_collate_default())
{
CHARSET_INFO *ci= find_default_collation();
DBUG_ASSERT(ci);
if (!ci)
return true;
m_ci= ci;
m_with_collate= true;
return false;
}
/*
A non-binary and non-default contextually typed collation.
We don't have such yet - the parser cannot produce this.
But we have "uca1400_as_ci" coming soon.
*/
DBUG_ASSERT(0);
return false;
}
bool Lex_extended_collation_st::merge_exact_charset(const Lex_exact_charset &cs)
{
switch (m_type) {
case TYPE_EXACT:
{
// COLLATE latin1_swedish_ci .. CHARACTER SET latin1
return cs.raise_if_not_applicable(Lex_exact_collation(m_ci));
}
case TYPE_CONTEXTUALLY_TYPED:
{
// COLLATE DEFAULT .. CHARACTER SET latin1
Lex_exact_charset_opt_extended_collate tmp(cs);
if (tmp.merge_context_collation(Lex_context_collation(m_ci)))
return true;
*this= Lex_extended_collation(tmp.collation());
return false;
}
}
DBUG_ASSERT(0);
return false;
}
bool Lex_extended_collation_st::
merge_exact_collation(const Lex_exact_collation &rhs)
{
switch (m_type) {
case TYPE_EXACT:
/*
EXACT + EXACT
COLLATE latin1_bin .. COLLATE latin1_bin
*/
return Lex_exact_collation(m_ci).raise_if_not_equal(rhs);
case TYPE_CONTEXTUALLY_TYPED:
{
/*
CONTEXT + EXACT
CHAR(10) COLLATE DEFAULT .. COLLATE latin1_swedish_ci
CHAR(10) BINARY .. COLLATE latin1_bin
CHAR(10) COLLATE uca1400_as_ci .. COLLATE latin1_bin - coming soon
*/
if (rhs.raise_if_conflicts_with_context_collation(
Lex_context_collation(m_ci), true))
return true;
*this= Lex_extended_collation(rhs);
return false;
}
}
DBUG_ASSERT(0);
return false;
}
bool Lex_extended_collation_st::
raise_if_conflicts_with_context_collation(const Lex_context_collation &rhs)
const
{
switch (m_type) {
case TYPE_EXACT:
/*
EXACT + CONTEXT
COLLATE latin1_swedish_ci .. COLLATE DEFAULT
*/
return Lex_exact_collation(m_ci).
raise_if_conflicts_with_context_collation(rhs, false);
case TYPE_CONTEXTUALLY_TYPED:
{
/*
CONTEXT + CONTEXT:
CHAR(10) BINARY .. COLLATE DEFAULT - not supported by the parser
CREATE DATABASE db1 COLLATE DEFAULT COLLATE DEFAULT;
*/
return Lex_context_collation(m_ci).raise_if_not_equal(rhs);
}
}
DBUG_ASSERT(0);
return false;
}
/*
Merge two non-empty COLLATE clauses.
*/
bool Lex_extended_collation_st::merge(const Lex_extended_collation_st &rhs)
{
switch (rhs.type()) {
case TYPE_EXACT:
/*
EXACT + EXACT
COLLATE latin1_swedish_ci .. COLLATE latin1_swedish_ci
CONTEXT + EXACT
COLLATE DEFAULT .. COLLATE latin1_swedish_ci
CHAR(10) BINARY .. COLLATE latin1_bin
*/
return merge_exact_collation(Lex_exact_collation(rhs.m_ci));
case TYPE_CONTEXTUALLY_TYPED:
/*
EXACT + CONTEXT
COLLATE latin1_swedish_ci .. COLLATE DEFAULT
CONTEXT + CONTEXT
COLLATE DEFAULT .. COLLATE DEFAULT
CHAR(10) BINARY .. COLLATE DEFAULT
*/
return raise_if_conflicts_with_context_collation(
Lex_context_collation(rhs.m_ci));
}
DBUG_ASSERT(0);
return false;
}
/** find a collation with binary comparison rules
*/
CHARSET_INFO *Lex_exact_charset_extended_collation_attrs_st::
find_bin_collation(CHARSET_INFO *cs)
CHARSET_INFO *Lex_exact_charset_opt_extended_collate::find_bin_collation() const
{
/*
We don't need to handle old_mode=UTF8_IS_UTF8MB3 here,
because "cs" points to a real character set name.
because "m_ci" points to a real character set name.
It can be either "utf8mb3" or "utf8mb4". It cannot be "utf8".
No thd->get_utf8_flag() flag passed to get_charset_by_csname().
*/
DBUG_ASSERT(cs->cs_name.length !=4 || memcmp(cs->cs_name.str, "utf8", 4));
DBUG_ASSERT(m_ci->cs_name.length !=4 || memcmp(m_ci->cs_name.str, "utf8", 4));
/*
CREATE TABLE t1 (a CHAR(10) BINARY)
CHARACTER SET utf8mb4 COLLATE utf8mb4_bin;
Nothing to do, we have the binary collation already.
*/
if (cs->state & MY_CS_BINSORT)
return cs;
if (m_ci->state & MY_CS_BINSORT)
return m_ci;
// CREATE TABLE t1 (a CHAR(10) BINARY) CHARACTER SET utf8mb4;
const LEX_CSTRING &cs_name= cs->cs_name;
if (!(cs= get_charset_by_csname(cs->cs_name.str, MY_CS_BINSORT, MYF(0))))
CHARSET_INFO *cs;
if (!(cs= get_charset_by_csname(m_ci->cs_name.str, MY_CS_BINSORT, MYF(0))))
{
char tmp[65];
strxnmov(tmp, sizeof(tmp)-1, cs_name.str, "_bin", NULL);
strxnmov(tmp, sizeof(tmp)-1, m_ci->cs_name.str, "_bin", NULL);
my_error(ER_UNKNOWN_COLLATION, MYF(0), tmp);
}
return cs;
}
CHARSET_INFO *Lex_exact_charset_extended_collation_attrs_st::
find_default_collation(CHARSET_INFO *cs)
CHARSET_INFO *
Lex_exact_charset_opt_extended_collate::find_default_collation() const
{
// See comments in find_bin_collation()
DBUG_ASSERT(cs->cs_name.length !=4 || memcmp(cs->cs_name.str, "utf8", 4));
DBUG_ASSERT(m_ci->cs_name.length !=4 || memcmp(m_ci->cs_name.str, "utf8", 4));
/*
CREATE TABLE t1 (a CHAR(10) COLLATE DEFAULT) CHARACTER SET utf8mb4;
Nothing to do, we have the default collation already.
*/
if (cs->state & MY_CS_PRIMARY)
return cs;
if (m_ci->state & MY_CS_PRIMARY)
return m_ci;
/*
CREATE TABLE t1 (a CHAR(10) COLLATE DEFAULT)
CHARACTER SET utf8mb4 COLLATE utf8mb4_bin;
@ -71,7 +339,8 @@ CHARSET_INFO *Lex_exact_charset_extended_collation_attrs_st::
Don't need to handle old_mode=UTF8_IS_UTF8MB3 here.
See comments in find_bin_collation.
*/
cs= get_charset_by_csname(cs->cs_name.str, MY_CS_PRIMARY, MYF(MY_WME));
CHARSET_INFO *cs= get_charset_by_csname(m_ci->cs_name.str,
MY_CS_PRIMARY, MYF(MY_WME));
/*
The above should never fail, as we have default collations for
all character sets.
@ -81,21 +350,6 @@ CHARSET_INFO *Lex_exact_charset_extended_collation_attrs_st::
}
bool Lex_exact_charset_extended_collation_attrs_st::
set_charset_collate_exact(CHARSET_INFO *cs, CHARSET_INFO *cl)
{
DBUG_ASSERT(cs != nullptr && cl != nullptr);
if (!my_charset_same(cl, cs))
{
my_error(ER_COLLATION_CHARSET_MISMATCH, MYF(0),
cl->coll_name.str, cs->cs_name.str);
return true;
}
set_collate_exact(cl);
return false;
}
/*
Resolve an empty or a contextually typed collation according to the
upper level default character set (and optionally a collation), e.g.:
@ -122,103 +376,84 @@ CHARSET_INFO *Lex_exact_charset_extended_collation_attrs_st::
DBUG_ASSERT(m_ci);
return m_ci;
case TYPE_COLLATE_CONTEXTUALLY_TYPED:
break;
{
Lex_exact_charset_opt_extended_collate tmp(def, true);
if (tmp.merge_context_collation_override(Lex_context_collation(m_ci)))
return NULL;
return tmp.collation().charset_info();
}
}
// Contextually typed
DBUG_ASSERT(m_ci);
if (is_contextually_typed_binary_style()) // CHAR(10) BINARY
return find_bin_collation(def);
if (is_contextually_typed_collate_default()) // CHAR(10) COLLATE DEFAULT
return find_default_collation(def);
/*
Non-binary and non-default contextually typed collation.
We don't have such yet - the parser cannot produce this.
But will have soon, e.g. "uca1400_as_ci".
*/
DBUG_ASSERT(0);
return NULL;
}
/*
Merge the CHARACTER SET clause to:
- an empty COLLATE clause
- an explicitly typed collation name
- a contextually typed collation
"this" corresponds to `CHARACTER SET xxx [BINARY]`
"cl" corresponds to the COLLATE clause
*/
bool Lex_exact_charset_extended_collation_attrs_st::
merge_charset_clause_and_collate_clause(
const Lex_exact_charset_extended_collation_attrs_st &cl)
merge_exact_collation(const Lex_exact_collation &cl)
{
if (cl.is_empty()) // No COLLATE clause
return false;
switch (m_type) {
case TYPE_EMPTY:
/*
No CHARACTER SET clause
CHAR(10) NOT NULL COLLATE latin1_bin
CHAR(10) NOT NULL COLLATE DEFAULT
*/
*this= cl;
*this= Lex_exact_charset_extended_collation_attrs(cl);
return false;
case TYPE_CHARACTER_SET:
{
// CHARACTER SET latin1 .. COLLATE latin1_swedish_ci
if (Lex_exact_charset(m_ci).raise_if_not_applicable(cl))
return true;
*this= Lex_exact_charset_extended_collation_attrs(cl);
return false;
}
case TYPE_COLLATE_EXACT:
{
Lex_exact_charset_opt_extended_collate ecs(m_ci, m_type == TYPE_COLLATE_EXACT);
if (ecs.merge_collate_or_error(cl))
return true;
set_collate_exact(ecs.charset_info());
return false;
// [CHARACTER SET latin1] COLLATE latin1_bin .. COLLATE latin1_bin
return Lex_exact_collation(m_ci).raise_if_not_equal(cl);
}
case TYPE_COLLATE_CONTEXTUALLY_TYPED:
break;
}
if (is_contextually_typed_collation())
{
if (cl.is_contextually_typed_collation())
{
/*
CONTEXT + CONTEXT:
CHAR(10) BINARY .. COLLATE DEFAULT - not supported by the parser
CHAR(10) BINARY .. COLLATE uca1400_as_ci - not supported yet
*/
DBUG_ASSERT(0); // Not possible yet
// COLLATE DEFAULT .. COLLATE latin1_swedish_ci
if (cl.raise_if_conflicts_with_context_collation(
Lex_context_collation(m_ci), true))
return true;
*this= Lex_exact_charset_extended_collation_attrs(cl);
return false;
}
}
DBUG_ASSERT(0);
return false;
}
bool Lex_exact_charset_extended_collation_attrs_st::
merge_context_collation(const Lex_context_collation &cl)
{
switch (m_type) {
case TYPE_EMPTY:
/*
CONTEXT + EXPLICIT
CHAR(10) COLLATE DEFAULT .. COLLATE latin1_swedish_ci
CHAR(10) BINARY .. COLLATE latin1_bin
CHAR(10) COLLATE uca1400_as_ci .. COLLATE latin1_bin
No CHARACTER SET clause
CHAR(10) NOT NULL .. COLLATE DEFAULT
*/
if (is_contextually_typed_collate_default() &&
!(cl.charset_info()->state & MY_CS_PRIMARY))
{
my_error(ER_CONFLICTING_DECLARATIONS, MYF(0),
"COLLATE ", "DEFAULT", "COLLATE ",
cl.charset_info()->coll_name.str);
return true;
}
if (is_contextually_typed_binary_style() &&
!(cl.charset_info()->state & MY_CS_BINSORT))
{
my_error(ER_CONFLICTING_DECLARATIONS, MYF(0),
"", "BINARY", "COLLATE ", cl.charset_info()->coll_name.str);
return true;
}
*this= cl;
*this= Lex_exact_charset_extended_collation_attrs(cl);
return false;
case TYPE_CHARACTER_SET:
{
// CHARACTER SET latin1 .. COLLATE DEFAULT
Lex_exact_charset_opt_extended_collate tmp(m_ci, false);
if (tmp.merge_context_collation(cl))
return true;
*this= Lex_exact_charset_extended_collation_attrs(tmp.collation());
return false;
}
case TYPE_COLLATE_EXACT:
// [CHARACTER SET latin1] COLLATE latin1_swedish_ci .. COLLATE DEFAULT
return Lex_exact_collation(m_ci).
raise_if_conflicts_with_context_collation(cl, false);
case TYPE_COLLATE_CONTEXTUALLY_TYPED:
// COLLATE DEFAULT .. COLLATE DEFAULT
return Lex_context_collation(m_ci).raise_if_not_equal(cl);
}
DBUG_ASSERT(0);
@ -227,69 +462,37 @@ bool Lex_exact_charset_extended_collation_attrs_st::
bool Lex_exact_charset_opt_extended_collate::
merge_collate_or_error(
const Lex_exact_charset_extended_collation_attrs_st &cl)
merge_exact_collation(const Lex_exact_collation &cl)
{
DBUG_ASSERT(cl.type() !=
Lex_exact_charset_extended_collation_attrs_st::TYPE_CHARACTER_SET);
// CHARACTER SET latin1 [COLLATE latin1_bin] .. COLLATE latin1_bin
if (m_with_collate)
return Lex_exact_collation(m_ci).raise_if_not_equal(cl);
if (raise_if_not_applicable(cl))
return true;
*this= Lex_exact_charset_opt_extended_collate(cl);
return false;
}
bool Lex_exact_charset_opt_extended_collate::
merge_context_collation(const Lex_context_collation &cl)
{
// CHARACTER SET latin1 [COLLATE latin1_bin] .. COLLATE DEFAULT
if (m_with_collate)
return Lex_exact_collation(m_ci).
raise_if_conflicts_with_context_collation(cl, false);
return merge_context_collation_override(cl);
}
bool Lex_exact_charset_extended_collation_attrs_st::
merge_collation(const Lex_extended_collation_st &cl)
{
switch (cl.type()) {
case Lex_exact_charset_extended_collation_attrs_st::TYPE_EMPTY:
return false;
case Lex_exact_charset_extended_collation_attrs_st::TYPE_CHARACTER_SET:
DBUG_ASSERT(0);
return false;
case Lex_exact_charset_extended_collation_attrs_st::TYPE_COLLATE_EXACT:
/*
EXPLICIT + EXPLICIT
CHAR(10) CHARACTER SET latin1 .. COLLATE latin1_bin
CHAR(10) CHARACTER SET latin1 COLLATE latin1_bin .. COLLATE latin1_bin
CHAR(10) COLLATE latin1_bin .. COLLATE latin1_bin
CHAR(10) COLLATE latin1_bin .. COLLATE latin1_bin
CHAR(10) CHARACTER SET latin1 BINARY .. COLLATE latin1_bin
*/
if (m_with_collate && m_ci != cl.charset_info())
{
my_error(ER_CONFLICTING_DECLARATIONS, MYF(0),
"COLLATE ", m_ci->coll_name.str,
"COLLATE ", cl.charset_info()->coll_name.str);
return true;
}
if (!my_charset_same(m_ci, cl.charset_info()))
{
my_error(ER_COLLATION_CHARSET_MISMATCH, MYF(0),
cl.charset_info()->coll_name.str, m_ci->cs_name.str);
return true;
}
m_ci= cl.charset_info();
m_with_collate= true;
return false;
case Lex_exact_charset_extended_collation_attrs_st::TYPE_COLLATE_CONTEXTUALLY_TYPED:
if (cl.is_contextually_typed_collate_default())
{
/*
SET NAMES latin1 COLLATE DEFAULT;
ALTER TABLE t1 CONVERT TO CHARACTER SET latin1 COLLATE DEFAULT;
*/
CHARSET_INFO *tmp= Lex_exact_charset_extended_collation_attrs_st::find_default_collation(m_ci);
if (!tmp)
return true;
m_ci= tmp;
m_with_collate= true;
return false;
}
else
{
/*
EXPLICIT + CONTEXT
CHAR(10) COLLATE latin1_bin .. COLLATE DEFAULT not possible yet
CHAR(10) COLLATE latin1_bin .. COLLATE uca1400_as_ci
*/
DBUG_ASSERT(0); // Not possible yet
return false;
}
case Lex_extended_collation_st::TYPE_EXACT:
return merge_exact_collation(Lex_exact_collation(cl.charset_info()));
case Lex_extended_collation_st::TYPE_CONTEXTUALLY_TYPED:
return merge_context_collation(Lex_context_collation(cl.charset_info()));
}
DBUG_ASSERT(0);
return false;
@ -297,48 +500,40 @@ bool Lex_exact_charset_opt_extended_collate::
/*
This method is used in the "attribute_list" rule to merge two independent
COLLATE clauses (not belonging to a CHARACTER SET clause).
Mix an unordered combination of CHARACTER SET and COLLATE clauses
(i.e. COLLATE can come before CHARACTER SET).
Merge a CHARACTER SET clause.
@param cs - The "CHARACTER SET exact_charset_name".
*/
bool
Lex_exact_charset_extended_collation_attrs_st::
merge_collate_clause_and_collate_clause(
const Lex_exact_charset_extended_collation_attrs_st &cl)
bool Lex_exact_charset_extended_collation_attrs_st::
merge_exact_charset(const Lex_exact_charset &cs)
{
/*
"BINARY" and "COLLATE DEFAULT" are not possible
in an independent COLLATE clause in a column attribute.
*/
DBUG_ASSERT(!is_contextually_typed_collation());
DBUG_ASSERT(!cl.is_contextually_typed_collation());
if (cl.is_empty())
return false;
DBUG_ASSERT(cs.charset_info());
switch (m_type) {
case TYPE_EMPTY:
*this= cl;
// CHARACTER SET cs
*this= Lex_exact_charset_extended_collation_attrs(cs);
return false;
case TYPE_CHARACTER_SET:
DBUG_ASSERT(0);
return false;
case TYPE_COLLATE_EXACT:
case TYPE_COLLATE_CONTEXTUALLY_TYPED:
break;
}
/*
Two independent explicit collations:
CHAR(10) NOT NULL COLLATE latin1_bin DEFAULT 'a' COLLATE latin1_bin
Note, we should perhaps eventually disallow double COLLATE clauses.
But for now let's just disallow only conflicting ones.
*/
if (charset_info() != cl.charset_info())
{
my_error(ER_CONFLICTING_DECLARATIONS, MYF(0),
"COLLATE ", charset_info()->coll_name.str,
"COLLATE ", cl.charset_info()->coll_name.str);
return true;
case TYPE_CHARACTER_SET:
// CHARACTER SET cs1 .. CHARACTER SET cs2
return Lex_exact_charset(m_ci).raise_if_not_equal(cs);
case TYPE_COLLATE_EXACT:
// COLLATE latin1_bin .. CHARACTER SET cs
return cs.raise_if_not_applicable(Lex_exact_collation(m_ci));
case TYPE_COLLATE_CONTEXTUALLY_TYPED:
// COLLATE DEFAULT .. CHARACTER SET cs
{
Lex_exact_charset_opt_extended_collate tmp(cs);
if (tmp.merge_context_collation(Lex_context_collation(m_ci)))
return true;
*this= Lex_exact_charset_extended_collation_attrs(tmp.collation());
return false;
}
}
DBUG_ASSERT(0);
return false;
}

View file

@ -16,8 +16,202 @@
#ifndef LEX_CHARSET_INCLUDED
#define LEX_CHARSET_INCLUDED
/*
Parse time character set and collation.
An exact character set, e.g:
CHARACTER SET latin1
*/
class Lex_exact_charset
{
CHARSET_INFO *m_ci;
public:
explicit Lex_exact_charset(CHARSET_INFO *ci)
:m_ci(ci)
{
DBUG_ASSERT(m_ci);
DBUG_ASSERT(m_ci->state & MY_CS_PRIMARY);
}
CHARSET_INFO *charset_info() const { return m_ci; }
bool raise_if_not_equal(const Lex_exact_charset &rhs) const;
bool raise_if_not_applicable(const class Lex_exact_collation &cl) const;
};
/*
A contextually typed collation, e.g.:
COLLATE DEFAULT
CHAR(10) BINARY
*/
class Lex_context_collation
{
CHARSET_INFO *m_ci;
public:
explicit Lex_context_collation(CHARSET_INFO *ci)
:m_ci(ci)
{
DBUG_ASSERT(ci);
}
CHARSET_INFO *charset_info() const { return m_ci; }
bool is_contextually_typed_collate_default() const
{
return m_ci == &my_collation_contextually_typed_default;
}
bool is_contextually_typed_binary_style() const
{
return m_ci == &my_collation_contextually_typed_binary;
}
bool raise_if_not_equal(const Lex_context_collation &cl) const;
};
/*
An exact collation, e.g.
COLLATE latin1_swedish_ci
*/
class Lex_exact_collation
{
CHARSET_INFO *m_ci;
public:
explicit Lex_exact_collation(CHARSET_INFO *ci)
:m_ci(ci)
{
DBUG_ASSERT(ci);
}
CHARSET_INFO *charset_info() const { return m_ci; }
// EXACT + EXACT
bool raise_if_not_equal(const Lex_exact_collation &cl) const;
// EXACT + CONTEXT
// CONTEXT + EXACT
bool raise_if_conflicts_with_context_collation(const Lex_context_collation &,
bool reverse_order) const;
};
/*
Parse time COLLATE clause:
COLLATE colation_name
The collation can be either exact or contextual:
COLLATE latin1_bin
COLLATE DEFAULT
*/
class Lex_extended_collation_st
{
public:
enum Type
{
TYPE_EXACT,
TYPE_CONTEXTUALLY_TYPED
};
protected:
CHARSET_INFO *m_ci;
Type m_type;
public:
void init(CHARSET_INFO *ci, Type type)
{
m_ci= ci;
m_type= type;
}
CHARSET_INFO *charset_info() const { return m_ci; }
Type type() const { return m_type; }
void set_collate_default()
{
m_ci= &my_collation_contextually_typed_default;
m_type= TYPE_CONTEXTUALLY_TYPED;
}
bool raise_if_conflicts_with_context_collation(const Lex_context_collation &)
const;
bool merge_exact_charset(const Lex_exact_charset &rhs);
bool merge_exact_collation(const Lex_exact_collation &rhs);
bool merge(const Lex_extended_collation_st &rhs);
};
class Lex_extended_collation: public Lex_extended_collation_st
{
public:
Lex_extended_collation(CHARSET_INFO *ci, Type type)
{
init(ci, type);
}
Lex_extended_collation(const Lex_exact_collation &rhs)
{
init(rhs.charset_info(), TYPE_EXACT);
}
};
/*
CHARACTER SET cs_exact [COLLATE cl_exact_or_context]
*/
class Lex_exact_charset_opt_extended_collate
{
CHARSET_INFO *m_ci;
bool m_with_collate;
public:
Lex_exact_charset_opt_extended_collate(CHARSET_INFO *ci, bool with_collate)
:m_ci(ci), m_with_collate(with_collate)
{
DBUG_ASSERT(m_ci);
DBUG_ASSERT((m_ci->state & MY_CS_PRIMARY) || m_with_collate);
}
Lex_exact_charset_opt_extended_collate(const Lex_exact_charset &cs)
:m_ci(cs.charset_info()), m_with_collate(false)
{
DBUG_ASSERT(m_ci);
DBUG_ASSERT(m_ci->state & MY_CS_PRIMARY);
}
Lex_exact_charset_opt_extended_collate(const Lex_exact_collation &cl)
:m_ci(cl.charset_info()), m_with_collate(true)
{
DBUG_ASSERT(m_ci);
}
bool with_collate() const { return m_with_collate; }
CHARSET_INFO *find_bin_collation() const;
CHARSET_INFO *find_default_collation() const;
bool raise_if_not_applicable(const Lex_exact_collation &cl) const;
/*
Add another COLLATE clause (exact or context).
So the full syntax looks like:
CHARACTER SET cs [COLLATE cl] ... COLLATE cl2
*/
bool merge_collation(const Lex_extended_collation_st &cl)
{
switch (cl.type()) {
case Lex_extended_collation_st::TYPE_EXACT:
return merge_exact_collation(Lex_exact_collation(cl.charset_info()));
case Lex_extended_collation_st::TYPE_CONTEXTUALLY_TYPED:
return merge_context_collation(Lex_context_collation(cl.charset_info()));
}
DBUG_ASSERT(0);
return false;
}
/*
Add a context collation:
CHARACTER SET cs [COLLATE cl] ... COLLATE DEFAULT
*/
bool merge_context_collation(const Lex_context_collation &cl);
bool merge_context_collation_override(const Lex_context_collation &cl);
/*
Add an exact collation:
CHARACTER SET cs [COLLATE cl] ... COLLATE latin1_bin
*/
bool merge_exact_collation(const Lex_exact_collation &cl);
Lex_exact_collation collation() const
{
return Lex_exact_collation(m_ci);
}
Lex_exact_charset charset() const
{
if ((m_ci->state & MY_CS_PRIMARY))
return Lex_exact_charset(m_ci);
return Lex_exact_charset(find_default_collation());
}
};
/*
Parse time character set and collation for:
[CHARACTER SET cs_exact] [COLLATE cl_exact_or_context]
Can be:
@ -65,9 +259,18 @@ public:
protected:
CHARSET_INFO *m_ci;
Type m_type;
public:
static CHARSET_INFO *find_bin_collation(CHARSET_INFO *cs);
static CHARSET_INFO *find_default_collation(CHARSET_INFO *cs);
protected:
static Type type_from_lex_collation_type(Lex_extended_collation_st::Type type)
{
switch (type) {
case Lex_extended_collation_st::TYPE_EXACT:
return TYPE_COLLATE_EXACT;
case Lex_extended_collation_st::TYPE_CONTEXTUALLY_TYPED:
return TYPE_COLLATE_CONTEXTUALLY_TYPED;
}
DBUG_ASSERT(0);
return TYPE_COLLATE_EXACT;
}
public:
void init()
{
@ -80,6 +283,21 @@ public:
m_ci= cs;
m_type= type;
}
void init(const Lex_exact_charset &cs)
{
m_ci= cs.charset_info();
m_type= TYPE_CHARACTER_SET;
}
void init(const Lex_exact_collation &cs)
{
m_ci= cs.charset_info();
m_type= TYPE_COLLATE_EXACT;
}
void init(const Lex_exact_charset_opt_extended_collate &cscl)
{
cscl.with_collate() ? init(cscl.collation()) :
init(cscl.charset());
}
bool is_empty() const
{
return m_type == TYPE_EMPTY;
@ -90,23 +308,26 @@ public:
m_ci= cs;
m_type= TYPE_CHARACTER_SET;
}
void set_charset_collate_default(CHARSET_INFO *cs)
bool set_charset_collate_default(CHARSET_INFO *cs)
{
DBUG_ASSERT(cs);
m_ci= cs;
m_type= TYPE_COLLATE_EXACT;
}
bool set_charset_collate_binary(CHARSET_INFO *cs)
{
DBUG_ASSERT(cs);
if (!(cs= find_bin_collation(cs)))
if (!(cs= Lex_exact_charset_opt_extended_collate(cs, true).
find_default_collation()))
return true;
m_ci= cs;
m_type= TYPE_COLLATE_EXACT;
return false;
}
bool set_charset_collate_binary(CHARSET_INFO *cs)
{
DBUG_ASSERT(cs);
if (!(cs= Lex_exact_charset_opt_extended_collate(cs, true).
find_bin_collation()))
return true;
m_ci= cs;
m_type= TYPE_COLLATE_EXACT;
return false;
}
bool set_charset_collate_exact(CHARSET_INFO *cs,
CHARSET_INFO *cl);
void set_collate_default()
{
m_ci= &my_collation_contextually_typed_default;
@ -119,17 +340,7 @@ public:
}
bool is_contextually_typed_collate_default() const
{
return m_ci == &my_collation_contextually_typed_default;
}
bool is_contextually_typed_binary_style() const
{
return m_ci == &my_collation_contextually_typed_binary;
}
void set_collate_exact(CHARSET_INFO *cl)
{
DBUG_ASSERT(cl);
m_ci= cl;
m_type= TYPE_COLLATE_EXACT;
return Lex_context_collation(m_ci).is_contextually_typed_collate_default();
}
CHARSET_INFO *charset_info() const
{
@ -144,44 +355,55 @@ public:
return m_type == TYPE_COLLATE_CONTEXTUALLY_TYPED;
}
CHARSET_INFO *resolved_to_character_set(CHARSET_INFO *cs) const;
bool merge_charset_clause_and_collate_clause(
const Lex_exact_charset_extended_collation_attrs_st &cl);
bool merge_collate_clause_and_collate_clause(
const Lex_exact_charset_extended_collation_attrs_st &cl);
};
/*
CHARACTER SET cs_exact [COLLATE cl_exact_or_context]
*/
class Lex_exact_charset_opt_extended_collate
{
CHARSET_INFO *m_ci;
bool m_with_collate;
public:
Lex_exact_charset_opt_extended_collate(CHARSET_INFO *ci, bool with_collate)
:m_ci(ci), m_with_collate(with_collate)
/*
Merge the column CHARACTER SET clause to:
- an exact collation name
- a contextually typed collation
"this" corresponds to `CHARACTER SET xxx [BINARY]`
"cl" corresponds to the COLLATE clause
*/
bool merge_column_charset_clause_and_collate_clause(
const Lex_exact_charset_extended_collation_attrs_st &cl)
{
DBUG_ASSERT(m_ci);
// Item_func_set_collation uses non-default collations in "ci"
//DBUG_ASSERT(m_ci->default_flag() || m_with_collate);
switch (cl.type()) {
case TYPE_EMPTY:
return false;
case TYPE_COLLATE_EXACT:
return merge_exact_collation(Lex_exact_collation(cl.charset_info()));
case TYPE_COLLATE_CONTEXTUALLY_TYPED:
case TYPE_CHARACTER_SET:
break;
}
DBUG_ASSERT(0);
return false;
}
/*
Add another COLLATE clause (exact or context).
So the full syntax looks like:
CHARACTER SET cs [COLLATE cl] ... COLLATE cl2
This method is used in the "attribute_list" rule to merge two independent
COLLATE clauses (not belonging to a CHARACTER SET clause).
"BINARY" and "COLLATE DEFAULT" are not possible
in an independent COLLATE clause in a column attribute.
*/
bool merge_collate_or_error(
const Lex_exact_charset_extended_collation_attrs_st &cl);
bool merge_opt_collate_or_error(
const Lex_exact_charset_extended_collation_attrs_st &cl)
bool merge_column_collate_clause_and_collate_clause(
const Lex_exact_charset_extended_collation_attrs_st &cl)
{
if (cl.is_empty())
DBUG_ASSERT(m_type != TYPE_COLLATE_CONTEXTUALLY_TYPED);
DBUG_ASSERT(m_type != TYPE_CHARACTER_SET);
switch (cl.type()) {
case TYPE_EMPTY:
return false;
return merge_collate_or_error(cl);
case TYPE_COLLATE_EXACT:
return merge_exact_collation(Lex_exact_collation(cl.charset_info()));
case TYPE_COLLATE_CONTEXTUALLY_TYPED:
case TYPE_CHARACTER_SET:
break;
}
DBUG_ASSERT(0);
return false;
}
CHARSET_INFO *charset_info() const { return m_ci; }
bool with_collate() const { return m_with_collate; }
bool merge_exact_charset(const Lex_exact_charset &cs);
bool merge_exact_collation(const Lex_exact_collation &cl);
bool merge_context_collation(const Lex_context_collation &cl);
bool merge_collation(const Lex_extended_collation_st &cl);
};
@ -197,6 +419,32 @@ public:
{
init(collation, type);
}
explicit
Lex_exact_charset_extended_collation_attrs(const Lex_exact_charset &cs)
{
init(cs.charset_info(), TYPE_CHARACTER_SET);
}
explicit
Lex_exact_charset_extended_collation_attrs(const Lex_exact_collation &cl)
{
init(cl.charset_info(), TYPE_COLLATE_EXACT);
}
explicit
Lex_exact_charset_extended_collation_attrs(const Lex_context_collation &cl)
{
init(cl.charset_info(), TYPE_COLLATE_CONTEXTUALLY_TYPED);
}
explicit
Lex_exact_charset_extended_collation_attrs(
const Lex_exact_charset_opt_extended_collate &cscl)
{
init(cscl);
}
explicit
Lex_exact_charset_extended_collation_attrs(const Lex_extended_collation_st &cl)
{
init(cl.charset_info(), type_from_lex_collation_type(cl.type()));
}
static Lex_exact_charset_extended_collation_attrs national(bool bin_mod)
{
return bin_mod ?

View file

@ -11874,6 +11874,21 @@ bool LEX::sp_create_set_password_instr(THD *thd,
}
bool LEX::set_names(const char *pos,
const Lex_exact_charset_opt_extended_collate &cscl,
bool no_lookahead)
{
if (sp_create_assignment_lex(thd, pos))
return true;
CHARSET_INFO *ci= cscl.collation().charset_info();
set_var_collation_client *var;
var= new (thd->mem_root) set_var_collation_client(ci, ci, ci);
return unlikely(var == NULL) ||
unlikely(thd->lex->var_list.push_back(var, thd->mem_root)) ||
unlikely(sp_create_assignment_instr(thd, no_lookahead));
}
bool LEX::map_data_type(const Lex_ident_sys_st &schema_name,
Lex_field_type_st *type) const
{

View file

@ -3833,6 +3833,9 @@ public:
int case_stmt_action_then();
bool setup_select_in_parentheses();
bool set_names(const char *pos,
const Lex_exact_charset_opt_extended_collate &cs,
bool no_lookahead);
bool set_trigger_new_row(const LEX_CSTRING *name, Item *val);
bool set_trigger_field(const LEX_CSTRING *name1, const LEX_CSTRING *name2,
Item *val);

View file

@ -215,6 +215,7 @@ void _CONCAT_UNDERSCORED(turn_parser_debug_on,yyparse)()
Lex_field_type_st Lex_field_type;
Lex_exact_charset_extended_collation_attrs_st
Lex_exact_charset_extended_collation_attrs;
Lex_extended_collation_st Lex_extended_collation;
Lex_dyncol_type_st Lex_dyncol_type;
Lex_for_loop_st for_loop;
Lex_for_loop_bounds_st for_loop_bounds;
@ -1387,6 +1388,10 @@ bool my_yyoverflow(short **a, YYSTYPE **b, size_t *yystacksize);
attribute_list
field_def
%type <Lex_extended_collation>
collation_name
collation_name_or_default
%type <Lex_dyncol_type> opt_dyncol_type dyncol_type
numeric_dyncol_type temporal_dyncol_type string_dyncol_type
@ -1579,14 +1584,11 @@ bool my_yyoverflow(short **a, YYSTYPE **b, size_t *yystacksize);
text_or_password
%type <charset>
opt_collate_or_default
charset_name
charset_or_alias
charset_name_or_default
old_or_new_charset_name
old_or_new_charset_name_or_default
collation_name
collation_name_or_default
opt_load_data_charset
UNDERSCORE_CHARSET
@ -5532,13 +5534,15 @@ default_collation:
opt_default COLLATE_SYM opt_equal collation_name_or_default
{
HA_CREATE_INFO *cinfo= &Lex->create_info;
bool is_exact= $4.type() == Lex_extended_collation_st::TYPE_EXACT;
CHARSET_INFO *cl= is_exact ? $4.charset_info() : NULL;
if (unlikely((cinfo->used_fields & HA_CREATE_USED_DEFAULT_CHARSET) &&
cinfo->default_table_charset && $4 &&
!($4= merge_charset_and_collation(cinfo->default_table_charset,
$4))))
cinfo->default_table_charset && cl &&
!(cl= merge_charset_and_collation(cinfo->default_table_charset,
cl))))
MYSQL_YYABORT;
Lex->create_info.default_table_charset= $4;
Lex->create_info.default_table_charset= cl;
Lex->create_info.used_fields|= HA_CREATE_USED_DEFAULT_CHARSET;
}
;
@ -5791,7 +5795,7 @@ field_type_or_serial:
field_def
{
auto tmp= $1.charset_collation_attrs();
if (tmp.merge_charset_clause_and_collate_clause($3))
if (tmp.merge_column_charset_clause_and_collate_clause($3))
MYSQL_YYABORT;
Lex->last_field->set_charset_collation_attrs(tmp);
}
@ -5831,7 +5835,7 @@ field_def:
| attribute_list compressed_deprecated_column_attribute { $$= $1; }
| attribute_list compressed_deprecated_column_attribute attribute_list
{
if (($$= $1).merge_collate_clause_and_collate_clause($3))
if (($$= $1).merge_column_collate_clause_and_collate_clause($3))
MYSQL_YYABORT;
}
| opt_generated_always AS virtual_column_func
@ -6312,7 +6316,7 @@ opt_precision:
attribute_list:
attribute_list attribute
{
if (($$= $1).merge_collate_clause_and_collate_clause($2))
if (($$= $1).merge_column_collate_clause_and_collate_clause($2))
MYSQL_YYABORT;
}
| attribute
@ -6339,7 +6343,7 @@ attribute:
}
| COLLATE_SYM collation_name
{
$$.set_collate_exact($2);
$$= Lex_exact_charset_extended_collation_attrs($2);
}
| serial_attribute { $$.init(); }
;
@ -6479,20 +6483,17 @@ old_or_new_charset_name_or_default:
collation_name:
ident_or_text
{
if (unlikely(!($$= mysqld_collation_get_by_name($1.str,
CHARSET_INFO *cs;
if (unlikely(!(cs= mysqld_collation_get_by_name($1.str,
thd->get_utf8_flag()))))
MYSQL_YYABORT;
$$= Lex_extended_collation(Lex_exact_collation(cs));
}
;
opt_collate_or_default:
/* empty */ { $$=NULL; }
| COLLATE_SYM collation_name_or_default { $$=$2; }
;
collation_name_or_default:
collation_name { $$=$1; }
| DEFAULT { $$=NULL; }
| DEFAULT { $$.set_collate_default(); }
;
opt_default:
@ -6535,11 +6536,18 @@ binary:
}
| charset_or_alias COLLATE_SYM collation_name
{
if ($$.set_charset_collate_exact($1, $3))
if ($3.merge_exact_charset(Lex_exact_charset($1)))
MYSQL_YYABORT;
$$= Lex_exact_charset_extended_collation_attrs($3);
}
| COLLATE_SYM collation_name
{
$$= Lex_exact_charset_extended_collation_attrs($2);
}
| COLLATE_SYM DEFAULT
{
$$.set_collate_default();
}
| COLLATE_SYM collation_name { $$.set_collate_exact($2); }
| COLLATE_SYM DEFAULT { $$.set_collate_default(); }
;
opt_bin_mod:
@ -7610,17 +7618,28 @@ alter_list_item:
lex->alter_info.flags|= ALTER_RENAME_INDEX;
}
| CONVERT_SYM TO_SYM charset charset_name_or_default
opt_collate_or_default
{
if (!$4)
{
$4= thd->variables.collation_database;
}
$5= $5 ? $5 : $4;
if (unlikely(!my_charset_same($4,$5)))
if (unlikely(Lex->create_info.add_alter_list_item_convert_to_charset($4)))
MYSQL_YYABORT;
Lex->alter_info.flags|= ALTER_CONVERT_TO;
}
| CONVERT_SYM TO_SYM charset charset_name_or_default
COLLATE_SYM collation_name_or_default
{
if (!$4)
{
$4= thd->variables.collation_database;
}
bool is_exact= $6.type() == Lex_extended_collation_st::TYPE_EXACT;
CHARSET_INFO *cl= is_exact ? $6.charset_info() : $4;
if (unlikely(!my_charset_same($4,cl)))
my_yyabort_error((ER_COLLATION_CHARSET_MISMATCH, MYF(0),
$5->coll_name.str, $4->cs_name.str));
if (unlikely(Lex->create_info.add_alter_list_item_convert_to_charset($5)))
cl->coll_name.str, $4->cs_name.str));
if (unlikely(Lex->create_info.add_alter_list_item_convert_to_charset(cl)))
MYSQL_YYABORT;
Lex->alter_info.flags|= ALTER_CONVERT_TO;
}
@ -9739,7 +9758,9 @@ string_factor_expr:
primary_expr
| string_factor_expr COLLATE_SYM collation_name
{
if (unlikely(!($$= new (thd->mem_root) Item_func_set_collation(thd, $1, $3))))
if (unlikely(!($$= new (thd->mem_root)
Item_func_set_collation(thd, $1,
$3.charset_info()))))
MYSQL_YYABORT;
}
;
@ -16581,26 +16602,20 @@ option_value_no_option_type:
thd->parse_error();
MYSQL_YYABORT;
}
| NAMES_SYM charset_name_or_default opt_collate_or_default
| NAMES_SYM charset_name_or_default
{
if (sp_create_assignment_lex(thd, $1.pos()))
CHARSET_INFO *def= global_system_variables.character_set_client;
Lex_exact_charset_opt_extended_collate tmp($2 ? $2 : def, false);
if (Lex->set_names($1.pos(), tmp, yychar == YYEMPTY))
MYSQL_YYABORT;
LEX *lex= Lex;
CHARSET_INFO *cs2;
CHARSET_INFO *cs3;
cs2= $2 ? $2 : global_system_variables.character_set_client;
cs3= $3 ? $3 : cs2;
if (unlikely(!my_charset_same(cs2, cs3)))
{
my_error(ER_COLLATION_CHARSET_MISMATCH, MYF(0),
cs3->coll_name.str, cs2->cs_name.str);
MYSQL_YYABORT;
}
set_var_collation_client *var;
var= new (thd->mem_root) set_var_collation_client(cs3, cs3, cs3);
if (unlikely(var == NULL) ||
unlikely(lex->var_list.push_back(var, thd->mem_root)) ||
unlikely(sp_create_assignment_instr(thd, yychar == YYEMPTY)))
}
| NAMES_SYM charset_name_or_default
COLLATE_SYM collation_name_or_default
{
CHARSET_INFO *def= global_system_variables.character_set_client;
Lex_exact_charset_opt_extended_collate tmp($2 ? $2 : def, false);
if (tmp.merge_collation($4) ||
Lex->set_names($1.pos(), tmp, yychar == YYEMPTY))
MYSQL_YYABORT;
}
| DEFAULT ROLE_SYM grant_role