mirror of
https://github.com/MariaDB/server.git
synced 2025-01-16 12:02:42 +01:00
133446828c
- Added one neutral and 22 tailored (language specific) collations based on Unicode Collation Algorithm version 14.0.0. Collations were added for Unicode character sets utf8mb3, utf8mb4, ucs2, utf16, utf32. Every tailoring was added with four accent and case sensitivity flag combinations, e.g: * utf8mb4_uca1400_swedish_as_cs * utf8mb4_uca1400_swedish_as_ci * utf8mb4_uca1400_swedish_ai_cs * utf8mb4_uca1400_swedish_ai_ci and their _nopad_ variants: * utf8mb4_uca1400_swedish_nopad_as_cs * utf8mb4_uca1400_swedish_nopad_as_ci * utf8mb4_uca1400_swedish_nopad_ai_cs * utf8mb4_uca1400_swedish_nopad_ai_ci - Introducing a conception of contextually typed named collations: CREATE DATABASE db1 CHARACTER SET utf8mb4; CREATE TABLE db1.t1 (a CHAR(10) COLLATE uca1400_as_ci); The idea is that there is no a need to specify the character set prefix in the new collation names. It's enough to type just the suffix "uca1400_as_ci". The character set is taken from the context. In the above example script the context character set is utf8mb4. So the CREATE TABLE will make a column with the collation utf8mb4_uca1400_as_ci. Short collations names can be used in any parts of the SQL syntax where the COLLATE clause is understood. - New collations are displayed only one time (without character set combinations) by these statements: SELECT * FROM INFORMATION_SCHEMA.COLLATIONS; SHOW COLLATION; For example, all these collations: - utf8mb3_uca1400_swedish_as_ci - utf8mb4_uca1400_swedish_as_ci - ucs2_uca1400_swedish_as_ci - utf16_uca1400_swedish_as_ci - utf32_uca1400_swedish_as_ci have just one entry in INFORMATION_SCHEMA.COLLATIONS and SHOW COLLATION, with COLLATION_NAME equal to "uca1400_swedish_as_ci", which is the suffix without the character set name: SELECT COLLATION_NAME FROM INFORMATION_SCHEMA.COLLATIONS WHERE COLLATION_NAME LIKE '%uca1400_swedish_as_ci'; +-----------------------+ | COLLATION_NAME | +-----------------------+ | uca1400_swedish_as_ci | +-----------------------+ Note, the behaviour of old collations did not change. Non-unicode collations (e.g. latin1_swedish_ci) and old UCA-4.0.0 collations (e.g. utf8mb4_unicode_ci) are still displayed with the character set prefix, as before. - The structure of the table INFORMATION_SCHEMA.COLLATIONS was changed. The NOT NULL constraint was removed from these columns: - CHARACTER_SET_NAME - ID - IS_DEFAULT and from the corresponding columns in SHOW COLLATION. For example: SELECT COLLATION_NAME, CHARACTER_SET_NAME, ID, IS_DEFAULT FROM INFORMATION_SCHEMA.COLLATIONS WHERE COLLATION_NAME LIKE '%uca1400_swedish_as_ci'; +-----------------------+--------------------+------+------------+ | COLLATION_NAME | CHARACTER_SET_NAME | ID | IS_DEFAULT | +-----------------------+--------------------+------+------------+ | uca1400_swedish_as_ci | NULL | NULL | NULL | +-----------------------+--------------------+------+------------+ The NULL value in these columns now means that the collation is applicable to multiple character sets. The behavioir of old collations did not change. Make sure your client programs can handle NULL values in these columns. - The structure of the table INFORMATION_SCHEMA.COLLATION_CHARACTER_SET_APPLICABILITY was changed. Three new NOT NULL columns were added: - FULL_COLLATION_NAME - ID - IS_DEFAULT New collations have multiple entries in COLLATION_CHARACTER_SET_APPLICABILITY. The column COLLATION_NAME contains the collation name without the character set prefix. The column FULL_COLLATION_NAME contains the collation name with the character set prefix. Old collations have full collation name in both FULL_COLLATION_NAME and COLLATION_NAME. SELECT COLLATION_NAME, FULL_COLLATION_NAME, CHARACTER_SET_NAME, ID, IS_DEFAULT FROM INFORMATION_SCHEMA.COLLATION_CHARACTER_SET_APPLICABILITY WHERE FULL_COLLATION_NAME RLIKE '^(utf8mb4|latin1).*swedish.*ci$'; +-----------------------------+-------------------------------------+--------------------+------+------------+ | COLLATION_NAME | FULL_COLLATION_NAME | CHARACTER_SET_NAME | ID | IS_DEFAULT | +-----------------------------+-------------------------------------+--------------------+------+------------+ | latin1_swedish_ci | latin1_swedish_ci | latin1 | 8 | Yes | | latin1_swedish_nopad_ci | latin1_swedish_nopad_ci | latin1 | 1032 | | | utf8mb4_swedish_ci | utf8mb4_swedish_ci | utf8mb4 | 232 | | | uca1400_swedish_ai_ci | utf8mb4_uca1400_swedish_ai_ci | utf8mb4 | 2368 | | | uca1400_swedish_as_ci | utf8mb4_uca1400_swedish_as_ci | utf8mb4 | 2370 | | | uca1400_swedish_nopad_ai_ci | utf8mb4_uca1400_swedish_nopad_ai_ci | utf8mb4 | 2372 | | | uca1400_swedish_nopad_as_ci | utf8mb4_uca1400_swedish_nopad_as_ci | utf8mb4 | 2374 | | +-----------------------------+-------------------------------------+--------------------+------+------------+ - Other INFORMATION_SCHEMA queries: SELECT COLLATION_NAME FROM INFORMATION_SCHEMA.COLUMNS; SELECT COLLATION_NAME FROM INFORMATION_SCHEMA.PARAMETERS; SELECT TABLE_COLLATION FROM INFORMATION_SCHEMA.TABLES; SELECT DEFAULT_COLLATION_NAME FROM INFORMATION_SCHEMA.SCHEMATA; SELECT COLLATION_NAME FROM INFORMATION_SCHEMA.ROUTINES; SELECT COLLATION_CONNECTION FROM INFORMATION_SCHEMA.EVENTS; SELECT DATABASE_COLLATION FROM INFORMATION_SCHEMA.EVENTS; SELECT COLLATION_CONNECTION FROM INFORMATION_SCHEMA.ROUTINES; SELECT DATABASE_COLLATION FROM INFORMATION_SCHEMA.ROUTINES; SELECT COLLATION_CONNECTION FROM INFORMATION_SCHEMA.TRIGGERS; SELECT DATABASE_COLLATION FROM INFORMATION_SCHEMA.TRIGGERS; SELECT COLLATION_CONNECTION FROM INFORMATION_SCHEMA.VIEWS; display full collation names, including character sets prefix, for all collations, including new collations. Corresponding SHOW commands also display full collation names in collation related columns: SHOW CREATE TABLE t1; SHOW CREATE DATABASE db1; SHOW TABLE STATUS; SHOW CREATE FUNCTION f1; SHOW CREATE PROCEDURE p1; SHOW CREATE EVENT ev1; SHOW CREATE TRIGGER tr1; SHOW CREATE VIEW; These INFORMATION_SCHEMA queries and SHOW statements may change in the future, to display show collation names.
775 lines
22 KiB
C++
775 lines
22 KiB
C++
/* Copyright (c) 2021, 2022, MariaDB Corporation.
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation; version 2 of the License.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; if not, write to the Free Software
|
|
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */
|
|
|
|
|
|
#include "my_global.h"
|
|
#include "my_sys.h"
|
|
#include "m_ctype.h"
|
|
#include "lex_charset.h"
|
|
#include "mysqld_error.h"
|
|
|
|
|
|
static void
|
|
raise_ER_CONFLICTING_DECLARATIONS(const char *clause1,
|
|
const char *name1,
|
|
const char *clause2,
|
|
const char *name2,
|
|
bool reverse_order)
|
|
{
|
|
if (!reverse_order)
|
|
my_error(ER_CONFLICTING_DECLARATIONS, MYF(0),
|
|
clause1, name1, clause2, name2);
|
|
else
|
|
my_error(ER_CONFLICTING_DECLARATIONS, MYF(0),
|
|
clause2, name2, clause1, name1);
|
|
}
|
|
|
|
|
|
static void
|
|
raise_ER_CONFLICTING_DECLARATIONS(const char *clause1,
|
|
const char *name1,
|
|
const char *name1_part2,
|
|
const char *clause2,
|
|
const char *name2,
|
|
bool reverse_order)
|
|
{
|
|
char def[MY_CS_CHARACTER_SET_NAME_SIZE * 2];
|
|
my_snprintf(def, sizeof(def), "%s (%s)", name1, name1_part2);
|
|
raise_ER_CONFLICTING_DECLARATIONS(clause1, def,
|
|
clause2, name2,
|
|
reverse_order);
|
|
}
|
|
|
|
|
|
bool Lex_exact_charset::raise_if_not_equal(const Lex_exact_charset &rhs) const
|
|
{
|
|
if (m_ci == rhs.m_ci)
|
|
return false;
|
|
my_error(ER_CONFLICTING_DECLARATIONS, MYF(0),
|
|
"CHARACTER SET ", m_ci->cs_name.str,
|
|
"CHARACTER SET ", rhs.m_ci->cs_name.str);
|
|
return true;
|
|
}
|
|
|
|
|
|
bool Lex_exact_charset::
|
|
raise_if_not_applicable(const Lex_exact_collation &cl) const
|
|
{
|
|
return Lex_exact_charset_opt_extended_collate(m_ci, false).
|
|
raise_if_not_applicable(cl);
|
|
}
|
|
|
|
|
|
bool Lex_exact_charset_opt_extended_collate::
|
|
raise_if_charsets_differ(const Lex_exact_charset &cs) const
|
|
{
|
|
if (!my_charset_same(m_ci, cs.charset_info()))
|
|
{
|
|
my_error(ER_CONFLICTING_DECLARATIONS, MYF(0),
|
|
"CHARACTER SET ", m_ci->cs_name.str,
|
|
"CHARACTER SET ", cs.charset_info()->cs_name.str);
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
|
|
bool Lex_exact_charset_opt_extended_collate::
|
|
raise_if_not_applicable(const Lex_exact_collation &cl) const
|
|
{
|
|
if (!my_charset_same(m_ci, cl.charset_info()))
|
|
{
|
|
my_error(ER_COLLATION_CHARSET_MISMATCH, MYF(0),
|
|
cl.charset_info()->coll_name.str, m_ci->cs_name.str);
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
|
|
bool
|
|
Lex_exact_collation::raise_if_not_equal(const Lex_exact_collation &cl) const
|
|
{
|
|
if (m_ci != cl.m_ci)
|
|
{
|
|
my_error(ER_CONFLICTING_DECLARATIONS, MYF(0),
|
|
"COLLATE ", m_ci->coll_name.str,
|
|
"COLLATE ", cl.m_ci->coll_name.str);
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
|
|
/*
|
|
Merge an exact collation and a contexual collation.
|
|
@param cl - The contextual collation to merge to "this".
|
|
@param reverse_order - If the contextual collation is on the left side
|
|
|
|
Use reverse_order as follows:
|
|
false: COLLATE latin1_swedish_ci COLLATE DEFAULT
|
|
true: COLLATE DEFAULT COLLATE latin1_swedish_ci
|
|
*/
|
|
bool
|
|
Lex_exact_collation::
|
|
raise_if_conflicts_with_context_collation(const Lex_context_collation &cl,
|
|
bool reverse_order) const
|
|
{
|
|
if (cl.is_contextually_typed_collate_default())
|
|
{
|
|
if (!(m_ci->state & MY_CS_PRIMARY))
|
|
{
|
|
raise_ER_CONFLICTING_DECLARATIONS("COLLATE ", m_ci->coll_name.str,
|
|
"COLLATE ", "DEFAULT", reverse_order);
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
if (cl.is_contextually_typed_binary_style())
|
|
{
|
|
if (!(m_ci->state & MY_CS_BINSORT))
|
|
{
|
|
raise_ER_CONFLICTING_DECLARATIONS("COLLATE ", m_ci->coll_name.str,
|
|
"", "BINARY", reverse_order);
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
DBUG_ASSERT(!strncmp(cl.charset_info()->coll_name.str,
|
|
STRING_WITH_LEN("utf8mb4_uca1400_")));
|
|
|
|
Charset_loader_server loader;
|
|
CHARSET_INFO *ci= loader.get_exact_collation_by_context_name(
|
|
m_ci,
|
|
cl.collation_name_context_suffix().str,
|
|
MYF(0));
|
|
if (m_ci != ci)
|
|
{
|
|
raise_ER_CONFLICTING_DECLARATIONS("COLLATE ",
|
|
m_ci->coll_name.str,
|
|
"COLLATE ",
|
|
cl.collation_name_for_show().str,
|
|
reverse_order);
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
|
|
bool
|
|
Lex_context_collation::raise_if_not_equal(const Lex_context_collation &cl) const
|
|
{
|
|
/*
|
|
Only equal context collations are possible here so far:
|
|
- Column grammar only supports BINARY, but does not support COLLATE DEFAULT
|
|
- DB/Table grammar only support COLLATE DEFAULT
|
|
*/
|
|
if (m_ci != cl.m_ci)
|
|
{
|
|
my_error(ER_CONFLICTING_DECLARATIONS, MYF(0),
|
|
is_contextually_typed_binary_style() ? "" : "COLLATE ",
|
|
collation_name_for_show().str,
|
|
cl.is_contextually_typed_binary_style() ? "" : "COLLATE ",
|
|
cl.collation_name_for_show().str);
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
|
|
/*
|
|
Resolve a context collation to the character set (when the former gets known):
|
|
CREATE TABLE t1 (a CHAR(10) BINARY) CHARACTER SET latin1;
|
|
CREATE DATABASE db1 COLLATE DEFAULT CHARACTER SET latin1;
|
|
*/
|
|
bool Lex_exact_charset_opt_extended_collate::
|
|
merge_context_collation_override(const Lex_context_collation &cl)
|
|
{
|
|
DBUG_ASSERT(m_ci);
|
|
|
|
// CHAR(10) BINARY
|
|
if (cl.is_contextually_typed_binary_style())
|
|
{
|
|
CHARSET_INFO *ci= find_bin_collation();
|
|
if (!ci)
|
|
return true;
|
|
m_ci= ci;
|
|
m_with_collate= true;
|
|
return false;
|
|
}
|
|
|
|
// COLLATE DEFAULT
|
|
if (cl.is_contextually_typed_collate_default())
|
|
{
|
|
CHARSET_INFO *ci= find_default_collation();
|
|
DBUG_ASSERT(ci);
|
|
if (!ci)
|
|
return true;
|
|
m_ci= ci;
|
|
m_with_collate= true;
|
|
return false;
|
|
}
|
|
|
|
DBUG_ASSERT(!strncmp(cl.charset_info()->coll_name.str,
|
|
STRING_WITH_LEN("utf8mb4_uca1400_")));
|
|
|
|
CHARSET_INFO *ci= Charset_loader_server().
|
|
get_exact_collation_by_context_name_or_error(m_ci,
|
|
cl.charset_info()->coll_name.str + 8, MYF(0));
|
|
if (!ci)
|
|
return true;
|
|
m_ci= ci;
|
|
m_with_collate= true;
|
|
return false;
|
|
}
|
|
|
|
|
|
bool Lex_extended_collation_st::merge_exact_charset(const Lex_exact_charset &cs)
|
|
{
|
|
switch (m_type) {
|
|
case TYPE_EXACT:
|
|
{
|
|
// COLLATE latin1_swedish_ci .. CHARACTER SET latin1
|
|
return cs.raise_if_not_applicable(Lex_exact_collation(m_ci));
|
|
}
|
|
case TYPE_CONTEXTUALLY_TYPED:
|
|
{
|
|
// COLLATE DEFAULT .. CHARACTER SET latin1
|
|
Lex_exact_charset_opt_extended_collate tmp(cs);
|
|
if (tmp.merge_context_collation(Lex_context_collation(m_ci)))
|
|
return true;
|
|
*this= Lex_extended_collation(tmp.collation());
|
|
return false;
|
|
}
|
|
}
|
|
DBUG_ASSERT(0);
|
|
return false;
|
|
}
|
|
|
|
|
|
bool Lex_extended_collation_st::
|
|
merge_exact_collation(const Lex_exact_collation &rhs)
|
|
{
|
|
switch (m_type) {
|
|
|
|
case TYPE_EXACT:
|
|
/*
|
|
EXACT + EXACT
|
|
COLLATE latin1_bin .. COLLATE latin1_bin
|
|
*/
|
|
return Lex_exact_collation(m_ci).raise_if_not_equal(rhs);
|
|
|
|
case TYPE_CONTEXTUALLY_TYPED:
|
|
{
|
|
/*
|
|
CONTEXT + EXACT
|
|
CHAR(10) COLLATE DEFAULT .. COLLATE latin1_swedish_ci
|
|
CHAR(10) BINARY .. COLLATE latin1_bin
|
|
CHAR(10) COLLATE uca1400_as_ci .. COLLATE latin1_bin
|
|
*/
|
|
if (rhs.raise_if_conflicts_with_context_collation(
|
|
Lex_context_collation(m_ci), true))
|
|
return true;
|
|
*this= Lex_extended_collation(rhs);
|
|
return false;
|
|
}
|
|
}
|
|
DBUG_ASSERT(0);
|
|
return false;
|
|
}
|
|
|
|
|
|
bool Lex_extended_collation_st::
|
|
raise_if_conflicts_with_context_collation(const Lex_context_collation &rhs)
|
|
const
|
|
{
|
|
switch (m_type) {
|
|
|
|
case TYPE_EXACT:
|
|
/*
|
|
EXACT + CONTEXT
|
|
COLLATE latin1_swedish_ci .. COLLATE DEFAULT
|
|
*/
|
|
return Lex_exact_collation(m_ci).
|
|
raise_if_conflicts_with_context_collation(rhs, false);
|
|
|
|
case TYPE_CONTEXTUALLY_TYPED:
|
|
{
|
|
/*
|
|
CONTEXT + CONTEXT:
|
|
CHAR(10) BINARY .. COLLATE DEFAULT - not supported by the parser
|
|
CREATE DATABASE db1 COLLATE DEFAULT COLLATE DEFAULT;
|
|
*/
|
|
return Lex_context_collation(m_ci).raise_if_not_equal(rhs);
|
|
}
|
|
}
|
|
DBUG_ASSERT(0);
|
|
return false;
|
|
}
|
|
|
|
|
|
/*
|
|
Merge two non-empty COLLATE clauses.
|
|
*/
|
|
bool Lex_extended_collation_st::merge(const Lex_extended_collation_st &rhs)
|
|
{
|
|
switch (rhs.type()) {
|
|
case TYPE_EXACT:
|
|
/*
|
|
EXACT + EXACT
|
|
COLLATE latin1_swedish_ci .. COLLATE latin1_swedish_ci
|
|
|
|
CONTEXT + EXACT
|
|
COLLATE DEFAULT .. COLLATE latin1_swedish_ci
|
|
CHAR(10) BINARY .. COLLATE latin1_bin
|
|
*/
|
|
return merge_exact_collation(Lex_exact_collation(rhs.m_ci));
|
|
case TYPE_CONTEXTUALLY_TYPED:
|
|
/*
|
|
EXACT + CONTEXT
|
|
COLLATE latin1_swedish_ci .. COLLATE DEFAULT
|
|
|
|
CONTEXT + CONTEXT
|
|
COLLATE DEFAULT .. COLLATE DEFAULT
|
|
CHAR(10) BINARY .. COLLATE DEFAULT
|
|
*/
|
|
return raise_if_conflicts_with_context_collation(
|
|
Lex_context_collation(rhs.m_ci));
|
|
}
|
|
DBUG_ASSERT(0);
|
|
return false;
|
|
}
|
|
|
|
|
|
LEX_CSTRING Lex_context_collation::collation_name_for_show() const
|
|
{
|
|
if (is_contextually_typed_collate_default())
|
|
return LEX_CSTRING({STRING_WITH_LEN("DEFAULT")});
|
|
if (is_contextually_typed_binary_style())
|
|
return LEX_CSTRING({STRING_WITH_LEN("BINARY")});
|
|
return collation_name_context_suffix();
|
|
}
|
|
|
|
|
|
bool Lex_extended_collation_st::set_by_name(const char *name, myf my_flags)
|
|
{
|
|
Charset_loader_server loader;
|
|
CHARSET_INFO *cs;
|
|
|
|
if (!strncasecmp(name, STRING_WITH_LEN("uca1400_")))
|
|
{
|
|
if (!(cs= loader.get_context_collation_or_error(name, my_flags)))
|
|
return true;
|
|
|
|
*this= Lex_extended_collation(Lex_context_collation(cs));
|
|
return false;
|
|
}
|
|
|
|
if (!(cs= loader.get_exact_collation_or_error(name, my_flags)))
|
|
return true;
|
|
|
|
*this= Lex_extended_collation(Lex_exact_collation(cs));
|
|
return false;
|
|
}
|
|
|
|
|
|
/** find a collation with binary comparison rules
|
|
*/
|
|
CHARSET_INFO *Lex_exact_charset_opt_extended_collate::find_bin_collation() const
|
|
{
|
|
/*
|
|
We don't need to handle old_mode=UTF8_IS_UTF8MB3 here,
|
|
because "m_ci" points to a real character set name.
|
|
It can be either "utf8mb3" or "utf8mb4". It cannot be "utf8".
|
|
No thd->get_utf8_flag() flag passed to get_charset_by_csname().
|
|
*/
|
|
DBUG_ASSERT(m_ci->cs_name.length !=4 || memcmp(m_ci->cs_name.str, "utf8", 4));
|
|
/*
|
|
CREATE TABLE t1 (a CHAR(10) BINARY)
|
|
CHARACTER SET utf8mb4 COLLATE utf8mb4_bin;
|
|
Nothing to do, we have the binary collation already.
|
|
*/
|
|
if (m_ci->state & MY_CS_BINSORT)
|
|
return m_ci;
|
|
|
|
// CREATE TABLE t1 (a CHAR(10) BINARY) CHARACTER SET utf8mb4;
|
|
CHARSET_INFO *cs;
|
|
if (!(cs= get_charset_by_csname(m_ci->cs_name.str, MY_CS_BINSORT, MYF(0))))
|
|
{
|
|
char tmp[65];
|
|
strxnmov(tmp, sizeof(tmp)-1, m_ci->cs_name.str, "_bin", NULL);
|
|
my_error(ER_UNKNOWN_COLLATION, MYF(0), tmp);
|
|
}
|
|
return cs;
|
|
}
|
|
|
|
|
|
CHARSET_INFO *
|
|
Lex_exact_charset_opt_extended_collate::find_default_collation() const
|
|
{
|
|
// See comments in find_bin_collation()
|
|
DBUG_ASSERT(m_ci->cs_name.length !=4 || memcmp(m_ci->cs_name.str, "utf8", 4));
|
|
/*
|
|
CREATE TABLE t1 (a CHAR(10) COLLATE DEFAULT) CHARACTER SET utf8mb4;
|
|
Nothing to do, we have the default collation already.
|
|
*/
|
|
if (m_ci->state & MY_CS_PRIMARY)
|
|
return m_ci;
|
|
/*
|
|
CREATE TABLE t1 (a CHAR(10) COLLATE DEFAULT)
|
|
CHARACTER SET utf8mb4 COLLATE utf8mb4_bin;
|
|
|
|
Don't need to handle old_mode=UTF8_IS_UTF8MB3 here.
|
|
See comments in find_bin_collation.
|
|
*/
|
|
CHARSET_INFO *cs= get_charset_by_csname(m_ci->cs_name.str,
|
|
MY_CS_PRIMARY, MYF(MY_WME));
|
|
/*
|
|
The above should never fail, as we have default collations for
|
|
all character sets.
|
|
*/
|
|
DBUG_ASSERT(cs);
|
|
return cs;
|
|
}
|
|
|
|
|
|
/*
|
|
Resolve an empty or a contextually typed collation according to the
|
|
upper level default character set (and optionally a collation), e.g.:
|
|
CREATE TABLE t1 (a CHAR(10)) CHARACTER SET latin1;
|
|
CREATE TABLE t1 (a CHAR(10) BINARY) CHARACTER SET latin1;
|
|
CREATE TABLE t1 (a CHAR(10) COLLATE DEFAULT)
|
|
CHARACTER SET latin1 COLLATE latin1_bin;
|
|
|
|
"this" is the COLLATE clause (e.g. of a column)
|
|
"def" is the upper level CHARACTER SET clause (e.g. of a table)
|
|
*/
|
|
CHARSET_INFO *Lex_exact_charset_extended_collation_attrs_st::
|
|
resolved_to_character_set(CHARSET_INFO *def) const
|
|
{
|
|
DBUG_ASSERT(def);
|
|
|
|
switch (m_type) {
|
|
case TYPE_EMPTY:
|
|
return def;
|
|
case TYPE_CHARACTER_SET:
|
|
case TYPE_CHARACTER_SET_COLLATE_EXACT:
|
|
case TYPE_COLLATE_EXACT:
|
|
DBUG_ASSERT(m_ci);
|
|
return m_ci;
|
|
case TYPE_COLLATE_CONTEXTUALLY_TYPED:
|
|
{
|
|
Lex_exact_charset_opt_extended_collate tmp(def, true);
|
|
if (tmp.merge_context_collation_override(Lex_context_collation(m_ci)))
|
|
return NULL;
|
|
return tmp.collation().charset_info();
|
|
}
|
|
}
|
|
DBUG_ASSERT(0);
|
|
return NULL;
|
|
}
|
|
|
|
|
|
bool Lex_exact_charset_extended_collation_attrs_st::
|
|
merge_exact_collation(const Lex_exact_collation &cl)
|
|
{
|
|
switch (m_type) {
|
|
case TYPE_EMPTY:
|
|
/*
|
|
No CHARACTER SET clause
|
|
CHAR(10) NOT NULL COLLATE latin1_bin
|
|
*/
|
|
*this= Lex_exact_charset_extended_collation_attrs(cl);
|
|
return false;
|
|
case TYPE_CHARACTER_SET:
|
|
{
|
|
// CHARACTER SET latin1 .. COLLATE latin1_swedish_ci
|
|
Lex_exact_charset_opt_extended_collate tmp(m_ci, false);
|
|
if (tmp.merge_exact_collation(cl))
|
|
return true;
|
|
*this= Lex_exact_charset_extended_collation_attrs(tmp);
|
|
return false;
|
|
}
|
|
case TYPE_CHARACTER_SET_COLLATE_EXACT:
|
|
case TYPE_COLLATE_EXACT:
|
|
{
|
|
// [CHARACTER SET latin1] COLLATE latin1_bin .. COLLATE latin1_bin
|
|
return Lex_exact_collation(m_ci).raise_if_not_equal(cl);
|
|
}
|
|
case TYPE_COLLATE_CONTEXTUALLY_TYPED:
|
|
{
|
|
// COLLATE DEFAULT .. COLLATE latin1_swedish_ci
|
|
if (cl.raise_if_conflicts_with_context_collation(
|
|
Lex_context_collation(m_ci), true))
|
|
return true;
|
|
*this= Lex_exact_charset_extended_collation_attrs(cl);
|
|
return false;
|
|
}
|
|
}
|
|
DBUG_ASSERT(0);
|
|
return false;
|
|
}
|
|
|
|
|
|
bool Lex_exact_charset_extended_collation_attrs_st::
|
|
merge_context_collation(const Lex_context_collation &cl)
|
|
{
|
|
switch (m_type) {
|
|
case TYPE_EMPTY:
|
|
/*
|
|
No CHARACTER SET clause
|
|
CHAR(10) NOT NULL .. COLLATE DEFAULT
|
|
*/
|
|
*this= Lex_exact_charset_extended_collation_attrs(cl);
|
|
return false;
|
|
case TYPE_CHARACTER_SET:
|
|
{
|
|
// CHARACTER SET latin1 .. COLLATE DEFAULT
|
|
Lex_exact_charset_opt_extended_collate tmp(m_ci, false);
|
|
if (tmp.merge_context_collation(cl))
|
|
return true;
|
|
*this= Lex_exact_charset_extended_collation_attrs(tmp);
|
|
return false;
|
|
}
|
|
case TYPE_CHARACTER_SET_COLLATE_EXACT:
|
|
case TYPE_COLLATE_EXACT:
|
|
// [CHARACTER SET latin1] COLLATE latin1_swedish_ci .. COLLATE DEFAULT
|
|
return Lex_exact_collation(m_ci).
|
|
raise_if_conflicts_with_context_collation(cl, false);
|
|
case TYPE_COLLATE_CONTEXTUALLY_TYPED:
|
|
// COLLATE DEFAULT .. COLLATE DEFAULT
|
|
return Lex_context_collation(m_ci).raise_if_not_equal(cl);
|
|
}
|
|
|
|
DBUG_ASSERT(0);
|
|
return false;
|
|
}
|
|
|
|
|
|
bool Lex_exact_charset_opt_extended_collate::
|
|
merge_exact_collation(const Lex_exact_collation &cl)
|
|
{
|
|
// CHARACTER SET latin1 [COLLATE latin1_bin] .. COLLATE latin1_bin
|
|
if (m_with_collate)
|
|
return Lex_exact_collation(m_ci).raise_if_not_equal(cl);
|
|
return merge_exact_collation_override(cl);
|
|
}
|
|
|
|
|
|
bool Lex_exact_charset_opt_extended_collate::
|
|
merge_exact_collation_override(const Lex_exact_collation &cl)
|
|
{
|
|
// CHARACTER SET latin1 [COLLATE latin1_bin] .. COLLATE latin1_bin
|
|
if (raise_if_not_applicable(cl))
|
|
return true;
|
|
*this= Lex_exact_charset_opt_extended_collate(cl);
|
|
return false;
|
|
}
|
|
|
|
|
|
bool Lex_exact_charset_opt_extended_collate::
|
|
merge_context_collation(const Lex_context_collation &cl)
|
|
{
|
|
// CHARACTER SET latin1 [COLLATE latin1_bin] .. COLLATE DEFAULT
|
|
if (m_with_collate)
|
|
return Lex_exact_collation(m_ci).
|
|
raise_if_conflicts_with_context_collation(cl, false);
|
|
return merge_context_collation_override(cl);
|
|
}
|
|
|
|
|
|
bool Lex_exact_charset_extended_collation_attrs_st::
|
|
merge_collation(const Lex_extended_collation_st &cl)
|
|
{
|
|
switch (cl.type()) {
|
|
case Lex_extended_collation_st::TYPE_EXACT:
|
|
return merge_exact_collation(Lex_exact_collation(cl.charset_info()));
|
|
case Lex_extended_collation_st::TYPE_CONTEXTUALLY_TYPED:
|
|
return merge_context_collation(Lex_context_collation(cl.charset_info()));
|
|
}
|
|
DBUG_ASSERT(0);
|
|
return false;
|
|
}
|
|
|
|
|
|
/*
|
|
Mix an unordered combination of CHARACTER SET and COLLATE clauses
|
|
(i.e. COLLATE can come before CHARACTER SET).
|
|
Merge a CHARACTER SET clause.
|
|
@param cs - The "CHARACTER SET exact_charset_name".
|
|
*/
|
|
bool Lex_exact_charset_extended_collation_attrs_st::
|
|
merge_exact_charset(const Lex_exact_charset &cs)
|
|
{
|
|
DBUG_ASSERT(cs.charset_info());
|
|
|
|
switch (m_type) {
|
|
case TYPE_EMPTY:
|
|
// CHARACTER SET cs
|
|
*this= Lex_exact_charset_extended_collation_attrs(cs);
|
|
return false;
|
|
|
|
case TYPE_CHARACTER_SET:
|
|
// CHARACTER SET cs1 .. CHARACTER SET cs2
|
|
return Lex_exact_charset(m_ci).raise_if_not_equal(cs);
|
|
|
|
case TYPE_COLLATE_EXACT:
|
|
// COLLATE latin1_bin .. CHARACTER SET cs
|
|
if (cs.raise_if_not_applicable(Lex_exact_collation(m_ci)))
|
|
return true;
|
|
m_type= TYPE_CHARACTER_SET_COLLATE_EXACT;
|
|
return false;
|
|
|
|
case TYPE_CHARACTER_SET_COLLATE_EXACT:
|
|
// CHARACTER SET cs1 COLLATE cl .. CHARACTER SET cs2
|
|
return Lex_exact_charset_opt_extended_collate(m_ci, true).
|
|
raise_if_charsets_differ(cs);
|
|
|
|
case TYPE_COLLATE_CONTEXTUALLY_TYPED:
|
|
// COLLATE DEFAULT .. CHARACTER SET cs
|
|
{
|
|
Lex_exact_charset_opt_extended_collate tmp(cs);
|
|
if (tmp.merge_context_collation(Lex_context_collation(m_ci)))
|
|
return true;
|
|
*this= Lex_exact_charset_extended_collation_attrs(tmp);
|
|
return false;
|
|
}
|
|
}
|
|
DBUG_ASSERT(0);
|
|
return false;
|
|
}
|
|
|
|
|
|
bool Lex_extended_charset_extended_collation_attrs_st::merge_charset_default()
|
|
{
|
|
if (m_charset_order == CHARSET_TYPE_EMPTY)
|
|
m_charset_order= CHARSET_TYPE_CONTEXT;
|
|
Lex_opt_context_charset_st::merge_charset_default();
|
|
return false;
|
|
}
|
|
|
|
|
|
bool Lex_extended_charset_extended_collation_attrs_st::
|
|
merge_exact_charset(const Lex_exact_charset &cs)
|
|
{
|
|
if (m_charset_order == CHARSET_TYPE_EMPTY)
|
|
m_charset_order= CHARSET_TYPE_EXACT;
|
|
return Lex_exact_charset_extended_collation_attrs_st::merge_exact_charset(cs);
|
|
}
|
|
|
|
|
|
bool Lex_extended_charset_extended_collation_attrs_st::
|
|
raise_if_charset_conflicts_with_default(
|
|
const Lex_exact_charset_opt_extended_collate &def) const
|
|
{
|
|
DBUG_ASSERT(m_charset_order != CHARSET_TYPE_EMPTY || is_empty());
|
|
if (!my_charset_same(def.collation().charset_info(), m_ci))
|
|
{
|
|
raise_ER_CONFLICTING_DECLARATIONS("CHARACTER SET ", "DEFAULT",
|
|
def.collation().charset_info()->cs_name.str,
|
|
"CHARACTER SET ", m_ci->cs_name.str,
|
|
m_charset_order == CHARSET_TYPE_EXACT);
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
|
|
CHARSET_INFO *
|
|
Lex_extended_charset_extended_collation_attrs_st::
|
|
resolved_to_context(const Charset_collation_context &ctx) const
|
|
{
|
|
if (Lex_opt_context_charset_st::is_empty())
|
|
{
|
|
// Without CHARACTER SET DEFAULT
|
|
return Lex_exact_charset_extended_collation_attrs_st::
|
|
resolved_to_character_set(ctx.collate_default().charset_info());
|
|
}
|
|
|
|
// With CHARACTER SET DEFAULT
|
|
switch (type()) {
|
|
case TYPE_EMPTY:
|
|
// CHARACTER SET DEFAULT;
|
|
return ctx.charset_default().charset().charset_info();
|
|
|
|
case TYPE_CHARACTER_SET:
|
|
// CHARACTER SET DEFAULT CHARACTER SET cs_exact
|
|
if (raise_if_charset_conflicts_with_default(ctx.charset_default()))
|
|
{
|
|
/*
|
|
A possible scenario:
|
|
SET character_set_server=utf8mb4;
|
|
CREATE DATABASE db1 CHARACTER SET latin1 CHARACTER SET DEFAULT;
|
|
*/
|
|
return NULL;
|
|
}
|
|
return m_ci;
|
|
|
|
case TYPE_CHARACTER_SET_COLLATE_EXACT:
|
|
case TYPE_COLLATE_EXACT:
|
|
{
|
|
/*
|
|
CREATE DATABASE db1
|
|
COLLATE cl_exact
|
|
[ CHARACTER SET cs_exact ]
|
|
CHARACTER SET DEFAULT;
|
|
*/
|
|
if (m_type == TYPE_CHARACTER_SET_COLLATE_EXACT &&
|
|
raise_if_charset_conflicts_with_default(ctx.charset_default()))
|
|
{
|
|
/*
|
|
A possible scenario:
|
|
SET character_set_server=utf8mb4;
|
|
CREATE DATABASE db1
|
|
COLLATE latin1_bin
|
|
CHARACTER SET latin1
|
|
CHARACTER SET DEFAULT;
|
|
*/
|
|
return NULL;
|
|
}
|
|
/*
|
|
Now check that "COLLATE cl_exact" does not conflict with
|
|
CHARACTER SET DEFAULT.
|
|
*/
|
|
if (ctx.charset_default().
|
|
raise_if_not_applicable(Lex_exact_collation(m_ci)))
|
|
{
|
|
/*
|
|
A possible scenario:
|
|
SET character_set_server=utf8mb4;
|
|
CREATE DATABASE db1
|
|
COLLATE latin1_bin
|
|
CHARACTER SET DEFAULT;
|
|
*/
|
|
return NULL;
|
|
}
|
|
return m_ci;
|
|
}
|
|
|
|
case TYPE_COLLATE_CONTEXTUALLY_TYPED:
|
|
/*
|
|
Both CHARACTER SET and COLLATE are contextual:
|
|
ALTER DATABASE db1 CHARACTER SET DEFAULT COLLATE DEFAULT;
|
|
ALTER DATABASE db1 COLLATE DEFAULT CHARACTER SET DEFAULT;
|
|
*/
|
|
return Lex_exact_charset_extended_collation_attrs_st::
|
|
resolved_to_character_set(ctx.charset_default().
|
|
collation().charset_info());
|
|
}
|
|
DBUG_ASSERT(0);
|
|
return NULL;
|
|
}
|