mariadb/sql/lex_charset.cc
Alexander Barkov 133446828c MDEV-27009 Add UCA-14.0.0 collations
- Added one neutral and 22 tailored (language specific) collations based on
  Unicode Collation Algorithm version 14.0.0.

  Collations were added for Unicode character sets
  utf8mb3, utf8mb4, ucs2, utf16, utf32.

  Every tailoring was added with four accent and case
  sensitivity flag combinations, e.g:

  * utf8mb4_uca1400_swedish_as_cs
  * utf8mb4_uca1400_swedish_as_ci
  * utf8mb4_uca1400_swedish_ai_cs
  * utf8mb4_uca1400_swedish_ai_ci

  and their _nopad_ variants:

  * utf8mb4_uca1400_swedish_nopad_as_cs
  * utf8mb4_uca1400_swedish_nopad_as_ci
  * utf8mb4_uca1400_swedish_nopad_ai_cs
  * utf8mb4_uca1400_swedish_nopad_ai_ci

- Introducing a conception of contextually typed named collations:

  CREATE DATABASE db1 CHARACTER SET utf8mb4;
  CREATE TABLE db1.t1 (a CHAR(10) COLLATE uca1400_as_ci);

  The idea is that there is no a need to specify the character set prefix
  in the new collation names. It's enough to type just the suffix
  "uca1400_as_ci". The character set is taken from the context.

  In the above example script the context character set is utf8mb4.
  So the CREATE TABLE will make a column with the collation
  utf8mb4_uca1400_as_ci.

  Short collations names can be used in any parts of the SQL syntax
  where the COLLATE clause is understood.

- New collations are displayed only one time
  (without character set combinations) by these statements:

     SELECT * FROM INFORMATION_SCHEMA.COLLATIONS;
     SHOW COLLATION;

  For example, all these collations:
  - utf8mb3_uca1400_swedish_as_ci
  - utf8mb4_uca1400_swedish_as_ci
  - ucs2_uca1400_swedish_as_ci
  - utf16_uca1400_swedish_as_ci
  - utf32_uca1400_swedish_as_ci
  have just one entry in INFORMATION_SCHEMA.COLLATIONS and SHOW COLLATION,
  with COLLATION_NAME equal to "uca1400_swedish_as_ci", which is the suffix
  without the character set name:

SELECT COLLATION_NAME FROM INFORMATION_SCHEMA.COLLATIONS
WHERE COLLATION_NAME LIKE '%uca1400_swedish_as_ci';

+-----------------------+
| COLLATION_NAME        |
+-----------------------+
| uca1400_swedish_as_ci |
+-----------------------+

  Note, the behaviour of old collations did not change.
  Non-unicode collations (e.g. latin1_swedish_ci) and
  old UCA-4.0.0 collations (e.g. utf8mb4_unicode_ci)
  are still displayed with the character set prefix, as before.

- The structure of the table INFORMATION_SCHEMA.COLLATIONS was changed.

  The NOT NULL constraint was removed from these columns:
  - CHARACTER_SET_NAME
  - ID
  - IS_DEFAULT
  and from the corresponding columns in SHOW COLLATION.

  For example:

SELECT COLLATION_NAME, CHARACTER_SET_NAME, ID, IS_DEFAULT
FROM INFORMATION_SCHEMA.COLLATIONS
WHERE COLLATION_NAME LIKE '%uca1400_swedish_as_ci';
+-----------------------+--------------------+------+------------+
| COLLATION_NAME        | CHARACTER_SET_NAME | ID   | IS_DEFAULT |
+-----------------------+--------------------+------+------------+
| uca1400_swedish_as_ci | NULL               | NULL | NULL       |
+-----------------------+--------------------+------+------------+

  The NULL value in these columns now means that the collation
  is applicable to multiple character sets.
  The behavioir of old collations did not change.
  Make sure your client programs can handle NULL values in these columns.

- The structure of the table
  INFORMATION_SCHEMA.COLLATION_CHARACTER_SET_APPLICABILITY was changed.

  Three new NOT NULL columns were added:
  - FULL_COLLATION_NAME
  - ID
  - IS_DEFAULT

  New collations have multiple entries in COLLATION_CHARACTER_SET_APPLICABILITY.
  The column COLLATION_NAME contains the collation name without the character
  set prefix. The column FULL_COLLATION_NAME contains the collation name with
  the character set prefix.

  Old collations have full collation name in both FULL_COLLATION_NAME and
  COLLATION_NAME.

SELECT COLLATION_NAME, FULL_COLLATION_NAME, CHARACTER_SET_NAME, ID, IS_DEFAULT
FROM INFORMATION_SCHEMA.COLLATION_CHARACTER_SET_APPLICABILITY
WHERE FULL_COLLATION_NAME RLIKE '^(utf8mb4|latin1).*swedish.*ci$';
+-----------------------------+-------------------------------------+--------------------+------+------------+
| COLLATION_NAME              | FULL_COLLATION_NAME                 | CHARACTER_SET_NAME | ID   | IS_DEFAULT |
+-----------------------------+-------------------------------------+--------------------+------+------------+
| latin1_swedish_ci           | latin1_swedish_ci                   | latin1             |    8 | Yes        |
| latin1_swedish_nopad_ci     | latin1_swedish_nopad_ci             | latin1             | 1032 |            |
| utf8mb4_swedish_ci          | utf8mb4_swedish_ci                  | utf8mb4            |  232 |            |
| uca1400_swedish_ai_ci       | utf8mb4_uca1400_swedish_ai_ci       | utf8mb4            | 2368 |            |
| uca1400_swedish_as_ci       | utf8mb4_uca1400_swedish_as_ci       | utf8mb4            | 2370 |            |
| uca1400_swedish_nopad_ai_ci | utf8mb4_uca1400_swedish_nopad_ai_ci | utf8mb4            | 2372 |            |
| uca1400_swedish_nopad_as_ci | utf8mb4_uca1400_swedish_nopad_as_ci | utf8mb4            | 2374 |            |
+-----------------------------+-------------------------------------+--------------------+------+------------+

- Other INFORMATION_SCHEMA queries:

  SELECT COLLATION_NAME FROM INFORMATION_SCHEMA.COLUMNS;
  SELECT COLLATION_NAME FROM INFORMATION_SCHEMA.PARAMETERS;
  SELECT TABLE_COLLATION FROM INFORMATION_SCHEMA.TABLES;
  SELECT DEFAULT_COLLATION_NAME FROM INFORMATION_SCHEMA.SCHEMATA;
  SELECT COLLATION_NAME FROM INFORMATION_SCHEMA.ROUTINES;
  SELECT COLLATION_CONNECTION FROM INFORMATION_SCHEMA.EVENTS;
  SELECT DATABASE_COLLATION FROM INFORMATION_SCHEMA.EVENTS;
  SELECT COLLATION_CONNECTION FROM INFORMATION_SCHEMA.ROUTINES;
  SELECT DATABASE_COLLATION FROM INFORMATION_SCHEMA.ROUTINES;
  SELECT COLLATION_CONNECTION FROM INFORMATION_SCHEMA.TRIGGERS;
  SELECT DATABASE_COLLATION FROM INFORMATION_SCHEMA.TRIGGERS;
  SELECT COLLATION_CONNECTION FROM INFORMATION_SCHEMA.VIEWS;

  display full collation names, including character sets prefix,
  for all collations, including new collations.

  Corresponding SHOW commands also display full collation names
  in collation related columns:

  SHOW CREATE TABLE t1;
  SHOW CREATE DATABASE db1;
  SHOW TABLE STATUS;
  SHOW CREATE FUNCTION f1;
  SHOW CREATE PROCEDURE p1;
  SHOW CREATE EVENT ev1;
  SHOW CREATE TRIGGER tr1;
  SHOW CREATE VIEW;

  These INFORMATION_SCHEMA queries and SHOW statements may change in
  the future, to display show collation names.
2022-08-10 15:04:24 +02:00

775 lines
22 KiB
C++

/* Copyright (c) 2021, 2022, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */
#include "my_global.h"
#include "my_sys.h"
#include "m_ctype.h"
#include "lex_charset.h"
#include "mysqld_error.h"
static void
raise_ER_CONFLICTING_DECLARATIONS(const char *clause1,
const char *name1,
const char *clause2,
const char *name2,
bool reverse_order)
{
if (!reverse_order)
my_error(ER_CONFLICTING_DECLARATIONS, MYF(0),
clause1, name1, clause2, name2);
else
my_error(ER_CONFLICTING_DECLARATIONS, MYF(0),
clause2, name2, clause1, name1);
}
static void
raise_ER_CONFLICTING_DECLARATIONS(const char *clause1,
const char *name1,
const char *name1_part2,
const char *clause2,
const char *name2,
bool reverse_order)
{
char def[MY_CS_CHARACTER_SET_NAME_SIZE * 2];
my_snprintf(def, sizeof(def), "%s (%s)", name1, name1_part2);
raise_ER_CONFLICTING_DECLARATIONS(clause1, def,
clause2, name2,
reverse_order);
}
bool Lex_exact_charset::raise_if_not_equal(const Lex_exact_charset &rhs) const
{
if (m_ci == rhs.m_ci)
return false;
my_error(ER_CONFLICTING_DECLARATIONS, MYF(0),
"CHARACTER SET ", m_ci->cs_name.str,
"CHARACTER SET ", rhs.m_ci->cs_name.str);
return true;
}
bool Lex_exact_charset::
raise_if_not_applicable(const Lex_exact_collation &cl) const
{
return Lex_exact_charset_opt_extended_collate(m_ci, false).
raise_if_not_applicable(cl);
}
bool Lex_exact_charset_opt_extended_collate::
raise_if_charsets_differ(const Lex_exact_charset &cs) const
{
if (!my_charset_same(m_ci, cs.charset_info()))
{
my_error(ER_CONFLICTING_DECLARATIONS, MYF(0),
"CHARACTER SET ", m_ci->cs_name.str,
"CHARACTER SET ", cs.charset_info()->cs_name.str);
return true;
}
return false;
}
bool Lex_exact_charset_opt_extended_collate::
raise_if_not_applicable(const Lex_exact_collation &cl) const
{
if (!my_charset_same(m_ci, cl.charset_info()))
{
my_error(ER_COLLATION_CHARSET_MISMATCH, MYF(0),
cl.charset_info()->coll_name.str, m_ci->cs_name.str);
return true;
}
return false;
}
bool
Lex_exact_collation::raise_if_not_equal(const Lex_exact_collation &cl) const
{
if (m_ci != cl.m_ci)
{
my_error(ER_CONFLICTING_DECLARATIONS, MYF(0),
"COLLATE ", m_ci->coll_name.str,
"COLLATE ", cl.m_ci->coll_name.str);
return true;
}
return false;
}
/*
Merge an exact collation and a contexual collation.
@param cl - The contextual collation to merge to "this".
@param reverse_order - If the contextual collation is on the left side
Use reverse_order as follows:
false: COLLATE latin1_swedish_ci COLLATE DEFAULT
true: COLLATE DEFAULT COLLATE latin1_swedish_ci
*/
bool
Lex_exact_collation::
raise_if_conflicts_with_context_collation(const Lex_context_collation &cl,
bool reverse_order) const
{
if (cl.is_contextually_typed_collate_default())
{
if (!(m_ci->state & MY_CS_PRIMARY))
{
raise_ER_CONFLICTING_DECLARATIONS("COLLATE ", m_ci->coll_name.str,
"COLLATE ", "DEFAULT", reverse_order);
return true;
}
return false;
}
if (cl.is_contextually_typed_binary_style())
{
if (!(m_ci->state & MY_CS_BINSORT))
{
raise_ER_CONFLICTING_DECLARATIONS("COLLATE ", m_ci->coll_name.str,
"", "BINARY", reverse_order);
return true;
}
return false;
}
DBUG_ASSERT(!strncmp(cl.charset_info()->coll_name.str,
STRING_WITH_LEN("utf8mb4_uca1400_")));
Charset_loader_server loader;
CHARSET_INFO *ci= loader.get_exact_collation_by_context_name(
m_ci,
cl.collation_name_context_suffix().str,
MYF(0));
if (m_ci != ci)
{
raise_ER_CONFLICTING_DECLARATIONS("COLLATE ",
m_ci->coll_name.str,
"COLLATE ",
cl.collation_name_for_show().str,
reverse_order);
return true;
}
return false;
}
bool
Lex_context_collation::raise_if_not_equal(const Lex_context_collation &cl) const
{
/*
Only equal context collations are possible here so far:
- Column grammar only supports BINARY, but does not support COLLATE DEFAULT
- DB/Table grammar only support COLLATE DEFAULT
*/
if (m_ci != cl.m_ci)
{
my_error(ER_CONFLICTING_DECLARATIONS, MYF(0),
is_contextually_typed_binary_style() ? "" : "COLLATE ",
collation_name_for_show().str,
cl.is_contextually_typed_binary_style() ? "" : "COLLATE ",
cl.collation_name_for_show().str);
return true;
}
return false;
}
/*
Resolve a context collation to the character set (when the former gets known):
CREATE TABLE t1 (a CHAR(10) BINARY) CHARACTER SET latin1;
CREATE DATABASE db1 COLLATE DEFAULT CHARACTER SET latin1;
*/
bool Lex_exact_charset_opt_extended_collate::
merge_context_collation_override(const Lex_context_collation &cl)
{
DBUG_ASSERT(m_ci);
// CHAR(10) BINARY
if (cl.is_contextually_typed_binary_style())
{
CHARSET_INFO *ci= find_bin_collation();
if (!ci)
return true;
m_ci= ci;
m_with_collate= true;
return false;
}
// COLLATE DEFAULT
if (cl.is_contextually_typed_collate_default())
{
CHARSET_INFO *ci= find_default_collation();
DBUG_ASSERT(ci);
if (!ci)
return true;
m_ci= ci;
m_with_collate= true;
return false;
}
DBUG_ASSERT(!strncmp(cl.charset_info()->coll_name.str,
STRING_WITH_LEN("utf8mb4_uca1400_")));
CHARSET_INFO *ci= Charset_loader_server().
get_exact_collation_by_context_name_or_error(m_ci,
cl.charset_info()->coll_name.str + 8, MYF(0));
if (!ci)
return true;
m_ci= ci;
m_with_collate= true;
return false;
}
bool Lex_extended_collation_st::merge_exact_charset(const Lex_exact_charset &cs)
{
switch (m_type) {
case TYPE_EXACT:
{
// COLLATE latin1_swedish_ci .. CHARACTER SET latin1
return cs.raise_if_not_applicable(Lex_exact_collation(m_ci));
}
case TYPE_CONTEXTUALLY_TYPED:
{
// COLLATE DEFAULT .. CHARACTER SET latin1
Lex_exact_charset_opt_extended_collate tmp(cs);
if (tmp.merge_context_collation(Lex_context_collation(m_ci)))
return true;
*this= Lex_extended_collation(tmp.collation());
return false;
}
}
DBUG_ASSERT(0);
return false;
}
bool Lex_extended_collation_st::
merge_exact_collation(const Lex_exact_collation &rhs)
{
switch (m_type) {
case TYPE_EXACT:
/*
EXACT + EXACT
COLLATE latin1_bin .. COLLATE latin1_bin
*/
return Lex_exact_collation(m_ci).raise_if_not_equal(rhs);
case TYPE_CONTEXTUALLY_TYPED:
{
/*
CONTEXT + EXACT
CHAR(10) COLLATE DEFAULT .. COLLATE latin1_swedish_ci
CHAR(10) BINARY .. COLLATE latin1_bin
CHAR(10) COLLATE uca1400_as_ci .. COLLATE latin1_bin
*/
if (rhs.raise_if_conflicts_with_context_collation(
Lex_context_collation(m_ci), true))
return true;
*this= Lex_extended_collation(rhs);
return false;
}
}
DBUG_ASSERT(0);
return false;
}
bool Lex_extended_collation_st::
raise_if_conflicts_with_context_collation(const Lex_context_collation &rhs)
const
{
switch (m_type) {
case TYPE_EXACT:
/*
EXACT + CONTEXT
COLLATE latin1_swedish_ci .. COLLATE DEFAULT
*/
return Lex_exact_collation(m_ci).
raise_if_conflicts_with_context_collation(rhs, false);
case TYPE_CONTEXTUALLY_TYPED:
{
/*
CONTEXT + CONTEXT:
CHAR(10) BINARY .. COLLATE DEFAULT - not supported by the parser
CREATE DATABASE db1 COLLATE DEFAULT COLLATE DEFAULT;
*/
return Lex_context_collation(m_ci).raise_if_not_equal(rhs);
}
}
DBUG_ASSERT(0);
return false;
}
/*
Merge two non-empty COLLATE clauses.
*/
bool Lex_extended_collation_st::merge(const Lex_extended_collation_st &rhs)
{
switch (rhs.type()) {
case TYPE_EXACT:
/*
EXACT + EXACT
COLLATE latin1_swedish_ci .. COLLATE latin1_swedish_ci
CONTEXT + EXACT
COLLATE DEFAULT .. COLLATE latin1_swedish_ci
CHAR(10) BINARY .. COLLATE latin1_bin
*/
return merge_exact_collation(Lex_exact_collation(rhs.m_ci));
case TYPE_CONTEXTUALLY_TYPED:
/*
EXACT + CONTEXT
COLLATE latin1_swedish_ci .. COLLATE DEFAULT
CONTEXT + CONTEXT
COLLATE DEFAULT .. COLLATE DEFAULT
CHAR(10) BINARY .. COLLATE DEFAULT
*/
return raise_if_conflicts_with_context_collation(
Lex_context_collation(rhs.m_ci));
}
DBUG_ASSERT(0);
return false;
}
LEX_CSTRING Lex_context_collation::collation_name_for_show() const
{
if (is_contextually_typed_collate_default())
return LEX_CSTRING({STRING_WITH_LEN("DEFAULT")});
if (is_contextually_typed_binary_style())
return LEX_CSTRING({STRING_WITH_LEN("BINARY")});
return collation_name_context_suffix();
}
bool Lex_extended_collation_st::set_by_name(const char *name, myf my_flags)
{
Charset_loader_server loader;
CHARSET_INFO *cs;
if (!strncasecmp(name, STRING_WITH_LEN("uca1400_")))
{
if (!(cs= loader.get_context_collation_or_error(name, my_flags)))
return true;
*this= Lex_extended_collation(Lex_context_collation(cs));
return false;
}
if (!(cs= loader.get_exact_collation_or_error(name, my_flags)))
return true;
*this= Lex_extended_collation(Lex_exact_collation(cs));
return false;
}
/** find a collation with binary comparison rules
*/
CHARSET_INFO *Lex_exact_charset_opt_extended_collate::find_bin_collation() const
{
/*
We don't need to handle old_mode=UTF8_IS_UTF8MB3 here,
because "m_ci" points to a real character set name.
It can be either "utf8mb3" or "utf8mb4". It cannot be "utf8".
No thd->get_utf8_flag() flag passed to get_charset_by_csname().
*/
DBUG_ASSERT(m_ci->cs_name.length !=4 || memcmp(m_ci->cs_name.str, "utf8", 4));
/*
CREATE TABLE t1 (a CHAR(10) BINARY)
CHARACTER SET utf8mb4 COLLATE utf8mb4_bin;
Nothing to do, we have the binary collation already.
*/
if (m_ci->state & MY_CS_BINSORT)
return m_ci;
// CREATE TABLE t1 (a CHAR(10) BINARY) CHARACTER SET utf8mb4;
CHARSET_INFO *cs;
if (!(cs= get_charset_by_csname(m_ci->cs_name.str, MY_CS_BINSORT, MYF(0))))
{
char tmp[65];
strxnmov(tmp, sizeof(tmp)-1, m_ci->cs_name.str, "_bin", NULL);
my_error(ER_UNKNOWN_COLLATION, MYF(0), tmp);
}
return cs;
}
CHARSET_INFO *
Lex_exact_charset_opt_extended_collate::find_default_collation() const
{
// See comments in find_bin_collation()
DBUG_ASSERT(m_ci->cs_name.length !=4 || memcmp(m_ci->cs_name.str, "utf8", 4));
/*
CREATE TABLE t1 (a CHAR(10) COLLATE DEFAULT) CHARACTER SET utf8mb4;
Nothing to do, we have the default collation already.
*/
if (m_ci->state & MY_CS_PRIMARY)
return m_ci;
/*
CREATE TABLE t1 (a CHAR(10) COLLATE DEFAULT)
CHARACTER SET utf8mb4 COLLATE utf8mb4_bin;
Don't need to handle old_mode=UTF8_IS_UTF8MB3 here.
See comments in find_bin_collation.
*/
CHARSET_INFO *cs= get_charset_by_csname(m_ci->cs_name.str,
MY_CS_PRIMARY, MYF(MY_WME));
/*
The above should never fail, as we have default collations for
all character sets.
*/
DBUG_ASSERT(cs);
return cs;
}
/*
Resolve an empty or a contextually typed collation according to the
upper level default character set (and optionally a collation), e.g.:
CREATE TABLE t1 (a CHAR(10)) CHARACTER SET latin1;
CREATE TABLE t1 (a CHAR(10) BINARY) CHARACTER SET latin1;
CREATE TABLE t1 (a CHAR(10) COLLATE DEFAULT)
CHARACTER SET latin1 COLLATE latin1_bin;
"this" is the COLLATE clause (e.g. of a column)
"def" is the upper level CHARACTER SET clause (e.g. of a table)
*/
CHARSET_INFO *Lex_exact_charset_extended_collation_attrs_st::
resolved_to_character_set(CHARSET_INFO *def) const
{
DBUG_ASSERT(def);
switch (m_type) {
case TYPE_EMPTY:
return def;
case TYPE_CHARACTER_SET:
case TYPE_CHARACTER_SET_COLLATE_EXACT:
case TYPE_COLLATE_EXACT:
DBUG_ASSERT(m_ci);
return m_ci;
case TYPE_COLLATE_CONTEXTUALLY_TYPED:
{
Lex_exact_charset_opt_extended_collate tmp(def, true);
if (tmp.merge_context_collation_override(Lex_context_collation(m_ci)))
return NULL;
return tmp.collation().charset_info();
}
}
DBUG_ASSERT(0);
return NULL;
}
bool Lex_exact_charset_extended_collation_attrs_st::
merge_exact_collation(const Lex_exact_collation &cl)
{
switch (m_type) {
case TYPE_EMPTY:
/*
No CHARACTER SET clause
CHAR(10) NOT NULL COLLATE latin1_bin
*/
*this= Lex_exact_charset_extended_collation_attrs(cl);
return false;
case TYPE_CHARACTER_SET:
{
// CHARACTER SET latin1 .. COLLATE latin1_swedish_ci
Lex_exact_charset_opt_extended_collate tmp(m_ci, false);
if (tmp.merge_exact_collation(cl))
return true;
*this= Lex_exact_charset_extended_collation_attrs(tmp);
return false;
}
case TYPE_CHARACTER_SET_COLLATE_EXACT:
case TYPE_COLLATE_EXACT:
{
// [CHARACTER SET latin1] COLLATE latin1_bin .. COLLATE latin1_bin
return Lex_exact_collation(m_ci).raise_if_not_equal(cl);
}
case TYPE_COLLATE_CONTEXTUALLY_TYPED:
{
// COLLATE DEFAULT .. COLLATE latin1_swedish_ci
if (cl.raise_if_conflicts_with_context_collation(
Lex_context_collation(m_ci), true))
return true;
*this= Lex_exact_charset_extended_collation_attrs(cl);
return false;
}
}
DBUG_ASSERT(0);
return false;
}
bool Lex_exact_charset_extended_collation_attrs_st::
merge_context_collation(const Lex_context_collation &cl)
{
switch (m_type) {
case TYPE_EMPTY:
/*
No CHARACTER SET clause
CHAR(10) NOT NULL .. COLLATE DEFAULT
*/
*this= Lex_exact_charset_extended_collation_attrs(cl);
return false;
case TYPE_CHARACTER_SET:
{
// CHARACTER SET latin1 .. COLLATE DEFAULT
Lex_exact_charset_opt_extended_collate tmp(m_ci, false);
if (tmp.merge_context_collation(cl))
return true;
*this= Lex_exact_charset_extended_collation_attrs(tmp);
return false;
}
case TYPE_CHARACTER_SET_COLLATE_EXACT:
case TYPE_COLLATE_EXACT:
// [CHARACTER SET latin1] COLLATE latin1_swedish_ci .. COLLATE DEFAULT
return Lex_exact_collation(m_ci).
raise_if_conflicts_with_context_collation(cl, false);
case TYPE_COLLATE_CONTEXTUALLY_TYPED:
// COLLATE DEFAULT .. COLLATE DEFAULT
return Lex_context_collation(m_ci).raise_if_not_equal(cl);
}
DBUG_ASSERT(0);
return false;
}
bool Lex_exact_charset_opt_extended_collate::
merge_exact_collation(const Lex_exact_collation &cl)
{
// CHARACTER SET latin1 [COLLATE latin1_bin] .. COLLATE latin1_bin
if (m_with_collate)
return Lex_exact_collation(m_ci).raise_if_not_equal(cl);
return merge_exact_collation_override(cl);
}
bool Lex_exact_charset_opt_extended_collate::
merge_exact_collation_override(const Lex_exact_collation &cl)
{
// CHARACTER SET latin1 [COLLATE latin1_bin] .. COLLATE latin1_bin
if (raise_if_not_applicable(cl))
return true;
*this= Lex_exact_charset_opt_extended_collate(cl);
return false;
}
bool Lex_exact_charset_opt_extended_collate::
merge_context_collation(const Lex_context_collation &cl)
{
// CHARACTER SET latin1 [COLLATE latin1_bin] .. COLLATE DEFAULT
if (m_with_collate)
return Lex_exact_collation(m_ci).
raise_if_conflicts_with_context_collation(cl, false);
return merge_context_collation_override(cl);
}
bool Lex_exact_charset_extended_collation_attrs_st::
merge_collation(const Lex_extended_collation_st &cl)
{
switch (cl.type()) {
case Lex_extended_collation_st::TYPE_EXACT:
return merge_exact_collation(Lex_exact_collation(cl.charset_info()));
case Lex_extended_collation_st::TYPE_CONTEXTUALLY_TYPED:
return merge_context_collation(Lex_context_collation(cl.charset_info()));
}
DBUG_ASSERT(0);
return false;
}
/*
Mix an unordered combination of CHARACTER SET and COLLATE clauses
(i.e. COLLATE can come before CHARACTER SET).
Merge a CHARACTER SET clause.
@param cs - The "CHARACTER SET exact_charset_name".
*/
bool Lex_exact_charset_extended_collation_attrs_st::
merge_exact_charset(const Lex_exact_charset &cs)
{
DBUG_ASSERT(cs.charset_info());
switch (m_type) {
case TYPE_EMPTY:
// CHARACTER SET cs
*this= Lex_exact_charset_extended_collation_attrs(cs);
return false;
case TYPE_CHARACTER_SET:
// CHARACTER SET cs1 .. CHARACTER SET cs2
return Lex_exact_charset(m_ci).raise_if_not_equal(cs);
case TYPE_COLLATE_EXACT:
// COLLATE latin1_bin .. CHARACTER SET cs
if (cs.raise_if_not_applicable(Lex_exact_collation(m_ci)))
return true;
m_type= TYPE_CHARACTER_SET_COLLATE_EXACT;
return false;
case TYPE_CHARACTER_SET_COLLATE_EXACT:
// CHARACTER SET cs1 COLLATE cl .. CHARACTER SET cs2
return Lex_exact_charset_opt_extended_collate(m_ci, true).
raise_if_charsets_differ(cs);
case TYPE_COLLATE_CONTEXTUALLY_TYPED:
// COLLATE DEFAULT .. CHARACTER SET cs
{
Lex_exact_charset_opt_extended_collate tmp(cs);
if (tmp.merge_context_collation(Lex_context_collation(m_ci)))
return true;
*this= Lex_exact_charset_extended_collation_attrs(tmp);
return false;
}
}
DBUG_ASSERT(0);
return false;
}
bool Lex_extended_charset_extended_collation_attrs_st::merge_charset_default()
{
if (m_charset_order == CHARSET_TYPE_EMPTY)
m_charset_order= CHARSET_TYPE_CONTEXT;
Lex_opt_context_charset_st::merge_charset_default();
return false;
}
bool Lex_extended_charset_extended_collation_attrs_st::
merge_exact_charset(const Lex_exact_charset &cs)
{
if (m_charset_order == CHARSET_TYPE_EMPTY)
m_charset_order= CHARSET_TYPE_EXACT;
return Lex_exact_charset_extended_collation_attrs_st::merge_exact_charset(cs);
}
bool Lex_extended_charset_extended_collation_attrs_st::
raise_if_charset_conflicts_with_default(
const Lex_exact_charset_opt_extended_collate &def) const
{
DBUG_ASSERT(m_charset_order != CHARSET_TYPE_EMPTY || is_empty());
if (!my_charset_same(def.collation().charset_info(), m_ci))
{
raise_ER_CONFLICTING_DECLARATIONS("CHARACTER SET ", "DEFAULT",
def.collation().charset_info()->cs_name.str,
"CHARACTER SET ", m_ci->cs_name.str,
m_charset_order == CHARSET_TYPE_EXACT);
return true;
}
return false;
}
CHARSET_INFO *
Lex_extended_charset_extended_collation_attrs_st::
resolved_to_context(const Charset_collation_context &ctx) const
{
if (Lex_opt_context_charset_st::is_empty())
{
// Without CHARACTER SET DEFAULT
return Lex_exact_charset_extended_collation_attrs_st::
resolved_to_character_set(ctx.collate_default().charset_info());
}
// With CHARACTER SET DEFAULT
switch (type()) {
case TYPE_EMPTY:
// CHARACTER SET DEFAULT;
return ctx.charset_default().charset().charset_info();
case TYPE_CHARACTER_SET:
// CHARACTER SET DEFAULT CHARACTER SET cs_exact
if (raise_if_charset_conflicts_with_default(ctx.charset_default()))
{
/*
A possible scenario:
SET character_set_server=utf8mb4;
CREATE DATABASE db1 CHARACTER SET latin1 CHARACTER SET DEFAULT;
*/
return NULL;
}
return m_ci;
case TYPE_CHARACTER_SET_COLLATE_EXACT:
case TYPE_COLLATE_EXACT:
{
/*
CREATE DATABASE db1
COLLATE cl_exact
[ CHARACTER SET cs_exact ]
CHARACTER SET DEFAULT;
*/
if (m_type == TYPE_CHARACTER_SET_COLLATE_EXACT &&
raise_if_charset_conflicts_with_default(ctx.charset_default()))
{
/*
A possible scenario:
SET character_set_server=utf8mb4;
CREATE DATABASE db1
COLLATE latin1_bin
CHARACTER SET latin1
CHARACTER SET DEFAULT;
*/
return NULL;
}
/*
Now check that "COLLATE cl_exact" does not conflict with
CHARACTER SET DEFAULT.
*/
if (ctx.charset_default().
raise_if_not_applicable(Lex_exact_collation(m_ci)))
{
/*
A possible scenario:
SET character_set_server=utf8mb4;
CREATE DATABASE db1
COLLATE latin1_bin
CHARACTER SET DEFAULT;
*/
return NULL;
}
return m_ci;
}
case TYPE_COLLATE_CONTEXTUALLY_TYPED:
/*
Both CHARACTER SET and COLLATE are contextual:
ALTER DATABASE db1 CHARACTER SET DEFAULT COLLATE DEFAULT;
ALTER DATABASE db1 COLLATE DEFAULT CHARACTER SET DEFAULT;
*/
return Lex_exact_charset_extended_collation_attrs_st::
resolved_to_character_set(ctx.charset_default().
collation().charset_info());
}
DBUG_ASSERT(0);
return NULL;
}