mariadb/sql/lex_charset.cc
Alexander Barkov e9adc3959e A cleanup for MDEV-27896 Wrong result upon COLLATE latin1_bin CHARACTER SET latin1 on the table or the database level
Changing the error messages in a statement like this:

CREATE DATABASE db1
         COLLATE utf8mb4_bin
         CHARACTER SET utf8mb4
         CHARACTER SET latin1;

from
  COLLATION 'utf8mb4_bin' is not valid for CHARACTER SET 'latin1'

to a more expected:

  Conflicting declarations: 'CHARACTER SET utf8mb4' and 'CHARACTER SET latin1'

In order to do this:
- Adding a new type TYPE_CHARACTER_SET_COLLATE_EXACT into
  Lex_exact_charset_extended_collation_attrs_st

- Removing m_had_charset_exact from its descendant class
  Lex_extended_charset_extended_collation_attrs_st

Additional cleanup:
- Changing methods in Lex_exact_charset_extended_collation_attrs_st
  set_charset(), set_charset_collate_default(), set_charset_collate_binary()
  to get Lex_exact_charset instead CHARSET_INFO as a parameter,
  to guarantee that the argument is only CHARACTER SET and does not have
  any COLLATE clauses yet. This change is not directly related to
  the error message change.
2022-05-25 11:16:38 +04:00

700 lines
20 KiB
C++

/* Copyright (c) 2021, 2022, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */
#include "my_global.h"
#include "my_sys.h"
#include "m_ctype.h"
#include "lex_charset.h"
#include "mysqld_error.h"
static void
raise_ER_CONFLICTING_DECLARATIONS(const char *clause1,
const char *name1,
const char *clause2,
const char *name2,
bool reverse_order)
{
if (!reverse_order)
my_error(ER_CONFLICTING_DECLARATIONS, MYF(0),
clause1, name1, clause2, name2);
else
my_error(ER_CONFLICTING_DECLARATIONS, MYF(0),
clause2, name2, clause1, name1);
}
static void
raise_ER_CONFLICTING_DECLARATIONS(const char *clause1,
const char *name1,
const char *name1_part2,
const char *clause2,
const char *name2,
bool reverse_order)
{
char def[MY_CS_NAME_SIZE * 2];
my_snprintf(def, sizeof(def), "%s (%s)", name1, name1_part2);
raise_ER_CONFLICTING_DECLARATIONS(clause1, def,
clause2, name2,
reverse_order);
}
bool Lex_exact_charset::raise_if_not_equal(const Lex_exact_charset &rhs) const
{
if (m_ci == rhs.m_ci)
return false;
my_error(ER_CONFLICTING_DECLARATIONS, MYF(0),
"CHARACTER SET ", m_ci->cs_name.str,
"CHARACTER SET ", rhs.m_ci->cs_name.str);
return true;
}
bool Lex_exact_charset::
raise_if_not_applicable(const Lex_exact_collation &cl) const
{
return Lex_exact_charset_opt_extended_collate(m_ci, false).
raise_if_not_applicable(cl);
}
bool Lex_exact_charset_opt_extended_collate::
raise_if_charsets_differ(const Lex_exact_charset &cs) const
{
if (!my_charset_same(m_ci, cs.charset_info()))
{
my_error(ER_CONFLICTING_DECLARATIONS, MYF(0),
"CHARACTER SET ", m_ci->cs_name.str,
"CHARACTER SET ", cs.charset_info()->cs_name.str);
return true;
}
return false;
}
bool Lex_exact_charset_opt_extended_collate::
raise_if_not_applicable(const Lex_exact_collation &cl) const
{
if (!my_charset_same(m_ci, cl.charset_info()))
{
my_error(ER_COLLATION_CHARSET_MISMATCH, MYF(0),
cl.charset_info()->coll_name.str, m_ci->cs_name.str);
return true;
}
return false;
}
bool
Lex_exact_collation::raise_if_not_equal(const Lex_exact_collation &cl) const
{
if (m_ci != cl.m_ci)
{
my_error(ER_CONFLICTING_DECLARATIONS, MYF(0),
"COLLATE ", m_ci->coll_name.str,
"COLLATE ", cl.m_ci->coll_name.str);
return true;
}
return false;
}
/*
Merge an exact collation and a contexual collation.
@param cl - The contextual collation to merge to "this".
@param reverse_order - If the contextual collation is on the left side
Use reverse_order as follows:
false: COLLATE latin1_swedish_ci COLLATE DEFAULT
true: COLLATE DEFAULT COLLATE latin1_swedish_ci
*/
bool
Lex_exact_collation::
raise_if_conflicts_with_context_collation(const Lex_context_collation &cl,
bool reverse_order) const
{
if (cl.is_contextually_typed_collate_default() &&
!(m_ci->state & MY_CS_PRIMARY))
{
raise_ER_CONFLICTING_DECLARATIONS("COLLATE ", m_ci->coll_name.str,
"COLLATE ", "DEFAULT", reverse_order);
return true;
}
if (cl.is_contextually_typed_binary_style() &&
!(m_ci->state & MY_CS_BINSORT))
{
raise_ER_CONFLICTING_DECLARATIONS("COLLATE ", m_ci->coll_name.str,
"", "BINARY", reverse_order);
return true;
}
return false;
}
bool
Lex_context_collation::raise_if_not_equal(const Lex_context_collation &cl) const
{
/*
Only equal context collations are possible here so far:
- Column grammar only supports BINARY, but does not support COLLATE DEFAULT
- DB/Table grammar only support COLLATE DEFAULT
But we'll have different collations here - uca140 is coming soon.
*/
DBUG_ASSERT(m_ci == cl.m_ci);
return false;
}
/*
Resolve a context collation to the character set (when the former gets known):
CREATE TABLE t1 (a CHAR(10) BINARY) CHARACTER SET latin1;
CREATE DATABASE db1 COLLATE DEFAULT CHARACTER SET latin1;
*/
bool Lex_exact_charset_opt_extended_collate::
merge_context_collation_override(const Lex_context_collation &cl)
{
DBUG_ASSERT(m_ci);
// CHAR(10) BINARY
if (cl.is_contextually_typed_binary_style())
{
CHARSET_INFO *ci= find_bin_collation();
if (!ci)
return true;
m_ci= ci;
m_with_collate= true;
return false;
}
// COLLATE DEFAULT
if (cl.is_contextually_typed_collate_default())
{
CHARSET_INFO *ci= find_default_collation();
DBUG_ASSERT(ci);
if (!ci)
return true;
m_ci= ci;
m_with_collate= true;
return false;
}
/*
A non-binary and non-default contextually typed collation.
We don't have such yet - the parser cannot produce this.
But we have "uca1400_as_ci" coming soon.
*/
DBUG_ASSERT(0);
return false;
}
bool Lex_extended_collation_st::merge_exact_charset(const Lex_exact_charset &cs)
{
switch (m_type) {
case TYPE_EXACT:
{
// COLLATE latin1_swedish_ci .. CHARACTER SET latin1
return cs.raise_if_not_applicable(Lex_exact_collation(m_ci));
}
case TYPE_CONTEXTUALLY_TYPED:
{
// COLLATE DEFAULT .. CHARACTER SET latin1
Lex_exact_charset_opt_extended_collate tmp(cs);
if (tmp.merge_context_collation(Lex_context_collation(m_ci)))
return true;
*this= Lex_extended_collation(tmp.collation());
return false;
}
}
DBUG_ASSERT(0);
return false;
}
bool Lex_extended_collation_st::
merge_exact_collation(const Lex_exact_collation &rhs)
{
switch (m_type) {
case TYPE_EXACT:
/*
EXACT + EXACT
COLLATE latin1_bin .. COLLATE latin1_bin
*/
return Lex_exact_collation(m_ci).raise_if_not_equal(rhs);
case TYPE_CONTEXTUALLY_TYPED:
{
/*
CONTEXT + EXACT
CHAR(10) COLLATE DEFAULT .. COLLATE latin1_swedish_ci
CHAR(10) BINARY .. COLLATE latin1_bin
CHAR(10) COLLATE uca1400_as_ci .. COLLATE latin1_bin - coming soon
*/
if (rhs.raise_if_conflicts_with_context_collation(
Lex_context_collation(m_ci), true))
return true;
*this= Lex_extended_collation(rhs);
return false;
}
}
DBUG_ASSERT(0);
return false;
}
bool Lex_extended_collation_st::
raise_if_conflicts_with_context_collation(const Lex_context_collation &rhs)
const
{
switch (m_type) {
case TYPE_EXACT:
/*
EXACT + CONTEXT
COLLATE latin1_swedish_ci .. COLLATE DEFAULT
*/
return Lex_exact_collation(m_ci).
raise_if_conflicts_with_context_collation(rhs, false);
case TYPE_CONTEXTUALLY_TYPED:
{
/*
CONTEXT + CONTEXT:
CHAR(10) BINARY .. COLLATE DEFAULT - not supported by the parser
CREATE DATABASE db1 COLLATE DEFAULT COLLATE DEFAULT;
*/
return Lex_context_collation(m_ci).raise_if_not_equal(rhs);
}
}
DBUG_ASSERT(0);
return false;
}
/*
Merge two non-empty COLLATE clauses.
*/
bool Lex_extended_collation_st::merge(const Lex_extended_collation_st &rhs)
{
switch (rhs.type()) {
case TYPE_EXACT:
/*
EXACT + EXACT
COLLATE latin1_swedish_ci .. COLLATE latin1_swedish_ci
CONTEXT + EXACT
COLLATE DEFAULT .. COLLATE latin1_swedish_ci
CHAR(10) BINARY .. COLLATE latin1_bin
*/
return merge_exact_collation(Lex_exact_collation(rhs.m_ci));
case TYPE_CONTEXTUALLY_TYPED:
/*
EXACT + CONTEXT
COLLATE latin1_swedish_ci .. COLLATE DEFAULT
CONTEXT + CONTEXT
COLLATE DEFAULT .. COLLATE DEFAULT
CHAR(10) BINARY .. COLLATE DEFAULT
*/
return raise_if_conflicts_with_context_collation(
Lex_context_collation(rhs.m_ci));
}
DBUG_ASSERT(0);
return false;
}
/** find a collation with binary comparison rules
*/
CHARSET_INFO *Lex_exact_charset_opt_extended_collate::find_bin_collation() const
{
/*
We don't need to handle old_mode=UTF8_IS_UTF8MB3 here,
because "m_ci" points to a real character set name.
It can be either "utf8mb3" or "utf8mb4". It cannot be "utf8".
No thd->get_utf8_flag() flag passed to get_charset_by_csname().
*/
DBUG_ASSERT(m_ci->cs_name.length !=4 || memcmp(m_ci->cs_name.str, "utf8", 4));
/*
CREATE TABLE t1 (a CHAR(10) BINARY)
CHARACTER SET utf8mb4 COLLATE utf8mb4_bin;
Nothing to do, we have the binary collation already.
*/
if (m_ci->state & MY_CS_BINSORT)
return m_ci;
// CREATE TABLE t1 (a CHAR(10) BINARY) CHARACTER SET utf8mb4;
CHARSET_INFO *cs;
if (!(cs= get_charset_by_csname(m_ci->cs_name.str, MY_CS_BINSORT, MYF(0))))
{
char tmp[65];
strxnmov(tmp, sizeof(tmp)-1, m_ci->cs_name.str, "_bin", NULL);
my_error(ER_UNKNOWN_COLLATION, MYF(0), tmp);
}
return cs;
}
CHARSET_INFO *
Lex_exact_charset_opt_extended_collate::find_default_collation() const
{
// See comments in find_bin_collation()
DBUG_ASSERT(m_ci->cs_name.length !=4 || memcmp(m_ci->cs_name.str, "utf8", 4));
/*
CREATE TABLE t1 (a CHAR(10) COLLATE DEFAULT) CHARACTER SET utf8mb4;
Nothing to do, we have the default collation already.
*/
if (m_ci->state & MY_CS_PRIMARY)
return m_ci;
/*
CREATE TABLE t1 (a CHAR(10) COLLATE DEFAULT)
CHARACTER SET utf8mb4 COLLATE utf8mb4_bin;
Don't need to handle old_mode=UTF8_IS_UTF8MB3 here.
See comments in find_bin_collation.
*/
CHARSET_INFO *cs= get_charset_by_csname(m_ci->cs_name.str,
MY_CS_PRIMARY, MYF(MY_WME));
/*
The above should never fail, as we have default collations for
all character sets.
*/
DBUG_ASSERT(cs);
return cs;
}
/*
Resolve an empty or a contextually typed collation according to the
upper level default character set (and optionally a collation), e.g.:
CREATE TABLE t1 (a CHAR(10)) CHARACTER SET latin1;
CREATE TABLE t1 (a CHAR(10) BINARY) CHARACTER SET latin1;
CREATE TABLE t1 (a CHAR(10) COLLATE DEFAULT)
CHARACTER SET latin1 COLLATE latin1_bin;
"this" is the COLLATE clause (e.g. of a column)
"def" is the upper level CHARACTER SET clause (e.g. of a table)
*/
CHARSET_INFO *Lex_exact_charset_extended_collation_attrs_st::
resolved_to_character_set(CHARSET_INFO *def) const
{
DBUG_ASSERT(def);
switch (m_type) {
case TYPE_EMPTY:
return def;
case TYPE_CHARACTER_SET:
case TYPE_CHARACTER_SET_COLLATE_EXACT:
case TYPE_COLLATE_EXACT:
DBUG_ASSERT(m_ci);
return m_ci;
case TYPE_COLLATE_CONTEXTUALLY_TYPED:
{
Lex_exact_charset_opt_extended_collate tmp(def, true);
if (tmp.merge_context_collation_override(Lex_context_collation(m_ci)))
return NULL;
return tmp.collation().charset_info();
}
}
DBUG_ASSERT(0);
return NULL;
}
bool Lex_exact_charset_extended_collation_attrs_st::
merge_exact_collation(const Lex_exact_collation &cl)
{
switch (m_type) {
case TYPE_EMPTY:
/*
No CHARACTER SET clause
CHAR(10) NOT NULL COLLATE latin1_bin
*/
*this= Lex_exact_charset_extended_collation_attrs(cl);
return false;
case TYPE_CHARACTER_SET:
{
// CHARACTER SET latin1 .. COLLATE latin1_swedish_ci
Lex_exact_charset_opt_extended_collate tmp(m_ci, false);
if (tmp.merge_exact_collation(cl))
return true;
*this= Lex_exact_charset_extended_collation_attrs(tmp);
return false;
}
case TYPE_CHARACTER_SET_COLLATE_EXACT:
case TYPE_COLLATE_EXACT:
{
// [CHARACTER SET latin1] COLLATE latin1_bin .. COLLATE latin1_bin
return Lex_exact_collation(m_ci).raise_if_not_equal(cl);
}
case TYPE_COLLATE_CONTEXTUALLY_TYPED:
{
// COLLATE DEFAULT .. COLLATE latin1_swedish_ci
if (cl.raise_if_conflicts_with_context_collation(
Lex_context_collation(m_ci), true))
return true;
*this= Lex_exact_charset_extended_collation_attrs(cl);
return false;
}
}
DBUG_ASSERT(0);
return false;
}
bool Lex_exact_charset_extended_collation_attrs_st::
merge_context_collation(const Lex_context_collation &cl)
{
switch (m_type) {
case TYPE_EMPTY:
/*
No CHARACTER SET clause
CHAR(10) NOT NULL .. COLLATE DEFAULT
*/
*this= Lex_exact_charset_extended_collation_attrs(cl);
return false;
case TYPE_CHARACTER_SET:
{
// CHARACTER SET latin1 .. COLLATE DEFAULT
Lex_exact_charset_opt_extended_collate tmp(m_ci, false);
if (tmp.merge_context_collation(cl))
return true;
*this= Lex_exact_charset_extended_collation_attrs(tmp);
return false;
}
case TYPE_CHARACTER_SET_COLLATE_EXACT:
case TYPE_COLLATE_EXACT:
// [CHARACTER SET latin1] COLLATE latin1_swedish_ci .. COLLATE DEFAULT
return Lex_exact_collation(m_ci).
raise_if_conflicts_with_context_collation(cl, false);
case TYPE_COLLATE_CONTEXTUALLY_TYPED:
// COLLATE DEFAULT .. COLLATE DEFAULT
return Lex_context_collation(m_ci).raise_if_not_equal(cl);
}
DBUG_ASSERT(0);
return false;
}
bool Lex_exact_charset_opt_extended_collate::
merge_exact_collation(const Lex_exact_collation &cl)
{
// CHARACTER SET latin1 [COLLATE latin1_bin] .. COLLATE latin1_bin
if (m_with_collate)
return Lex_exact_collation(m_ci).raise_if_not_equal(cl);
if (raise_if_not_applicable(cl))
return true;
*this= Lex_exact_charset_opt_extended_collate(cl);
return false;
}
bool Lex_exact_charset_opt_extended_collate::
merge_context_collation(const Lex_context_collation &cl)
{
// CHARACTER SET latin1 [COLLATE latin1_bin] .. COLLATE DEFAULT
if (m_with_collate)
return Lex_exact_collation(m_ci).
raise_if_conflicts_with_context_collation(cl, false);
return merge_context_collation_override(cl);
}
bool Lex_exact_charset_extended_collation_attrs_st::
merge_collation(const Lex_extended_collation_st &cl)
{
switch (cl.type()) {
case Lex_extended_collation_st::TYPE_EXACT:
return merge_exact_collation(Lex_exact_collation(cl.charset_info()));
case Lex_extended_collation_st::TYPE_CONTEXTUALLY_TYPED:
return merge_context_collation(Lex_context_collation(cl.charset_info()));
}
DBUG_ASSERT(0);
return false;
}
/*
Mix an unordered combination of CHARACTER SET and COLLATE clauses
(i.e. COLLATE can come before CHARACTER SET).
Merge a CHARACTER SET clause.
@param cs - The "CHARACTER SET exact_charset_name".
*/
bool Lex_exact_charset_extended_collation_attrs_st::
merge_exact_charset(const Lex_exact_charset &cs)
{
DBUG_ASSERT(cs.charset_info());
switch (m_type) {
case TYPE_EMPTY:
// CHARACTER SET cs
*this= Lex_exact_charset_extended_collation_attrs(cs);
return false;
case TYPE_CHARACTER_SET:
// CHARACTER SET cs1 .. CHARACTER SET cs2
return Lex_exact_charset(m_ci).raise_if_not_equal(cs);
case TYPE_COLLATE_EXACT:
// COLLATE latin1_bin .. CHARACTER SET cs
if (cs.raise_if_not_applicable(Lex_exact_collation(m_ci)))
return true;
m_type= TYPE_CHARACTER_SET_COLLATE_EXACT;
return false;
case TYPE_CHARACTER_SET_COLLATE_EXACT:
// CHARACTER SET cs1 COLLATE cl .. CHARACTER SET cs2
return Lex_exact_charset_opt_extended_collate(m_ci, true).
raise_if_charsets_differ(cs);
case TYPE_COLLATE_CONTEXTUALLY_TYPED:
// COLLATE DEFAULT .. CHARACTER SET cs
{
Lex_exact_charset_opt_extended_collate tmp(cs);
if (tmp.merge_context_collation(Lex_context_collation(m_ci)))
return true;
*this= Lex_exact_charset_extended_collation_attrs(tmp);
return false;
}
}
DBUG_ASSERT(0);
return false;
}
bool Lex_extended_charset_extended_collation_attrs_st::merge_charset_default()
{
if (m_charset_order == CHARSET_TYPE_EMPTY)
m_charset_order= CHARSET_TYPE_CONTEXT;
Lex_opt_context_charset_st::merge_charset_default();
return false;
}
bool Lex_extended_charset_extended_collation_attrs_st::
merge_exact_charset(const Lex_exact_charset &cs)
{
if (m_charset_order == CHARSET_TYPE_EMPTY)
m_charset_order= CHARSET_TYPE_EXACT;
return Lex_exact_charset_extended_collation_attrs_st::merge_exact_charset(cs);
}
bool Lex_extended_charset_extended_collation_attrs_st::
raise_if_charset_conflicts_with_default(
const Lex_exact_charset_opt_extended_collate &def) const
{
DBUG_ASSERT(m_charset_order != CHARSET_TYPE_EMPTY || is_empty());
if (!my_charset_same(def.collation().charset_info(), m_ci))
{
raise_ER_CONFLICTING_DECLARATIONS("CHARACTER SET ", "DEFAULT",
def.collation().charset_info()->cs_name.str,
"CHARACTER SET ", m_ci->cs_name.str,
m_charset_order == CHARSET_TYPE_EXACT);
return true;
}
return false;
}
CHARSET_INFO *
Lex_extended_charset_extended_collation_attrs_st::
resolved_to_context(const Charset_collation_context &ctx) const
{
if (Lex_opt_context_charset_st::is_empty())
{
// Without CHARACTER SET DEFAULT
return Lex_exact_charset_extended_collation_attrs_st::
resolved_to_character_set(ctx.collate_default().charset_info());
}
// With CHARACTER SET DEFAULT
switch (type()) {
case TYPE_EMPTY:
// CHARACTER SET DEFAULT;
return ctx.charset_default().charset().charset_info();
case TYPE_CHARACTER_SET:
// CHARACTER SET DEFAULT CHARACTER SET cs_exact
if (raise_if_charset_conflicts_with_default(ctx.charset_default()))
{
/*
A possible scenario:
SET character_set_server=utf8mb4;
CREATE DATABASE db1 CHARACTER SET latin1 CHARACTER SET DEFAULT;
*/
return NULL;
}
return m_ci;
case TYPE_CHARACTER_SET_COLLATE_EXACT:
case TYPE_COLLATE_EXACT:
{
/*
CREATE DATABASE db1
COLLATE cl_exact
[ CHARACTER SET cs_exact ]
CHARACTER SET DEFAULT;
*/
if (m_type == TYPE_CHARACTER_SET_COLLATE_EXACT &&
raise_if_charset_conflicts_with_default(ctx.charset_default()))
{
/*
A possible scenario:
SET character_set_server=utf8mb4;
CREATE DATABASE db1
COLLATE latin1_bin
CHARACTER SET latin1
CHARACTER SET DEFAULT;
*/
return NULL;
}
/*
Now check that "COLLATE cl_exact" does not conflict with
CHARACTER SET DEFAULT.
*/
if (ctx.charset_default().
raise_if_not_applicable(Lex_exact_collation(m_ci)))
{
/*
A possible scenario:
SET character_set_server=utf8mb4;
CREATE DATABASE db1
COLLATE latin1_bin
CHARACTER SET DEFAULT;
*/
return NULL;
}
return m_ci;
}
case TYPE_COLLATE_CONTEXTUALLY_TYPED:
/*
Both CHARACTER SET and COLLATE are contextual:
ALTER DATABASE db1 CHARACTER SET DEFAULT COLLATE DEFAULT;
ALTER DATABASE db1 COLLATE DEFAULT CHARACTER SET DEFAULT;
*/
return Lex_exact_charset_extended_collation_attrs_st::
resolved_to_character_set(ctx.charset_default().
collation().charset_info());
}
DBUG_ASSERT(0);
return NULL;
}