2022-02-09 18:21:39 +01:00
|
|
|
/* Copyright (c) 2021, 2022, MariaDB Corporation.
|
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
|
|
it under the terms of the GNU General Public License as published by
|
|
|
|
the Free Software Foundation; version 2 of the License.
|
|
|
|
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
GNU General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
|
|
along with this program; if not, write to the Free Software
|
|
|
|
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */
|
|
|
|
|
|
|
|
|
|
|
|
#include "my_global.h"
|
|
|
|
#include "my_sys.h"
|
|
|
|
#include "m_ctype.h"
|
|
|
|
#include "lex_charset.h"
|
|
|
|
#include "mysqld_error.h"
|
|
|
|
|
|
|
|
|
2022-05-23 09:05:33 +02:00
|
|
|
static void
|
|
|
|
raise_ER_CONFLICTING_DECLARATIONS(const char *clause1,
|
|
|
|
const char *name1,
|
|
|
|
const char *clause2,
|
|
|
|
const char *name2,
|
|
|
|
bool reverse_order)
|
|
|
|
{
|
|
|
|
if (!reverse_order)
|
|
|
|
my_error(ER_CONFLICTING_DECLARATIONS, MYF(0),
|
|
|
|
clause1, name1, clause2, name2);
|
|
|
|
else
|
|
|
|
my_error(ER_CONFLICTING_DECLARATIONS, MYF(0),
|
|
|
|
clause2, name2, clause1, name1);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2022-05-17 10:52:23 +02:00
|
|
|
static void
|
|
|
|
raise_ER_CONFLICTING_DECLARATIONS(const char *clause1,
|
|
|
|
const char *name1,
|
|
|
|
const char *name1_part2,
|
|
|
|
const char *clause2,
|
|
|
|
const char *name2,
|
|
|
|
bool reverse_order)
|
|
|
|
{
|
MDEV-27009 Add UCA-14.0.0 collations
- Added one neutral and 22 tailored (language specific) collations based on
Unicode Collation Algorithm version 14.0.0.
Collations were added for Unicode character sets
utf8mb3, utf8mb4, ucs2, utf16, utf32.
Every tailoring was added with four accent and case
sensitivity flag combinations, e.g:
* utf8mb4_uca1400_swedish_as_cs
* utf8mb4_uca1400_swedish_as_ci
* utf8mb4_uca1400_swedish_ai_cs
* utf8mb4_uca1400_swedish_ai_ci
and their _nopad_ variants:
* utf8mb4_uca1400_swedish_nopad_as_cs
* utf8mb4_uca1400_swedish_nopad_as_ci
* utf8mb4_uca1400_swedish_nopad_ai_cs
* utf8mb4_uca1400_swedish_nopad_ai_ci
- Introducing a conception of contextually typed named collations:
CREATE DATABASE db1 CHARACTER SET utf8mb4;
CREATE TABLE db1.t1 (a CHAR(10) COLLATE uca1400_as_ci);
The idea is that there is no a need to specify the character set prefix
in the new collation names. It's enough to type just the suffix
"uca1400_as_ci". The character set is taken from the context.
In the above example script the context character set is utf8mb4.
So the CREATE TABLE will make a column with the collation
utf8mb4_uca1400_as_ci.
Short collations names can be used in any parts of the SQL syntax
where the COLLATE clause is understood.
- New collations are displayed only one time
(without character set combinations) by these statements:
SELECT * FROM INFORMATION_SCHEMA.COLLATIONS;
SHOW COLLATION;
For example, all these collations:
- utf8mb3_uca1400_swedish_as_ci
- utf8mb4_uca1400_swedish_as_ci
- ucs2_uca1400_swedish_as_ci
- utf16_uca1400_swedish_as_ci
- utf32_uca1400_swedish_as_ci
have just one entry in INFORMATION_SCHEMA.COLLATIONS and SHOW COLLATION,
with COLLATION_NAME equal to "uca1400_swedish_as_ci", which is the suffix
without the character set name:
SELECT COLLATION_NAME FROM INFORMATION_SCHEMA.COLLATIONS
WHERE COLLATION_NAME LIKE '%uca1400_swedish_as_ci';
+-----------------------+
| COLLATION_NAME |
+-----------------------+
| uca1400_swedish_as_ci |
+-----------------------+
Note, the behaviour of old collations did not change.
Non-unicode collations (e.g. latin1_swedish_ci) and
old UCA-4.0.0 collations (e.g. utf8mb4_unicode_ci)
are still displayed with the character set prefix, as before.
- The structure of the table INFORMATION_SCHEMA.COLLATIONS was changed.
The NOT NULL constraint was removed from these columns:
- CHARACTER_SET_NAME
- ID
- IS_DEFAULT
and from the corresponding columns in SHOW COLLATION.
For example:
SELECT COLLATION_NAME, CHARACTER_SET_NAME, ID, IS_DEFAULT
FROM INFORMATION_SCHEMA.COLLATIONS
WHERE COLLATION_NAME LIKE '%uca1400_swedish_as_ci';
+-----------------------+--------------------+------+------------+
| COLLATION_NAME | CHARACTER_SET_NAME | ID | IS_DEFAULT |
+-----------------------+--------------------+------+------------+
| uca1400_swedish_as_ci | NULL | NULL | NULL |
+-----------------------+--------------------+------+------------+
The NULL value in these columns now means that the collation
is applicable to multiple character sets.
The behavioir of old collations did not change.
Make sure your client programs can handle NULL values in these columns.
- The structure of the table
INFORMATION_SCHEMA.COLLATION_CHARACTER_SET_APPLICABILITY was changed.
Three new NOT NULL columns were added:
- FULL_COLLATION_NAME
- ID
- IS_DEFAULT
New collations have multiple entries in COLLATION_CHARACTER_SET_APPLICABILITY.
The column COLLATION_NAME contains the collation name without the character
set prefix. The column FULL_COLLATION_NAME contains the collation name with
the character set prefix.
Old collations have full collation name in both FULL_COLLATION_NAME and
COLLATION_NAME.
SELECT COLLATION_NAME, FULL_COLLATION_NAME, CHARACTER_SET_NAME, ID, IS_DEFAULT
FROM INFORMATION_SCHEMA.COLLATION_CHARACTER_SET_APPLICABILITY
WHERE FULL_COLLATION_NAME RLIKE '^(utf8mb4|latin1).*swedish.*ci$';
+-----------------------------+-------------------------------------+--------------------+------+------------+
| COLLATION_NAME | FULL_COLLATION_NAME | CHARACTER_SET_NAME | ID | IS_DEFAULT |
+-----------------------------+-------------------------------------+--------------------+------+------------+
| latin1_swedish_ci | latin1_swedish_ci | latin1 | 8 | Yes |
| latin1_swedish_nopad_ci | latin1_swedish_nopad_ci | latin1 | 1032 | |
| utf8mb4_swedish_ci | utf8mb4_swedish_ci | utf8mb4 | 232 | |
| uca1400_swedish_ai_ci | utf8mb4_uca1400_swedish_ai_ci | utf8mb4 | 2368 | |
| uca1400_swedish_as_ci | utf8mb4_uca1400_swedish_as_ci | utf8mb4 | 2370 | |
| uca1400_swedish_nopad_ai_ci | utf8mb4_uca1400_swedish_nopad_ai_ci | utf8mb4 | 2372 | |
| uca1400_swedish_nopad_as_ci | utf8mb4_uca1400_swedish_nopad_as_ci | utf8mb4 | 2374 | |
+-----------------------------+-------------------------------------+--------------------+------+------------+
- Other INFORMATION_SCHEMA queries:
SELECT COLLATION_NAME FROM INFORMATION_SCHEMA.COLUMNS;
SELECT COLLATION_NAME FROM INFORMATION_SCHEMA.PARAMETERS;
SELECT TABLE_COLLATION FROM INFORMATION_SCHEMA.TABLES;
SELECT DEFAULT_COLLATION_NAME FROM INFORMATION_SCHEMA.SCHEMATA;
SELECT COLLATION_NAME FROM INFORMATION_SCHEMA.ROUTINES;
SELECT COLLATION_CONNECTION FROM INFORMATION_SCHEMA.EVENTS;
SELECT DATABASE_COLLATION FROM INFORMATION_SCHEMA.EVENTS;
SELECT COLLATION_CONNECTION FROM INFORMATION_SCHEMA.ROUTINES;
SELECT DATABASE_COLLATION FROM INFORMATION_SCHEMA.ROUTINES;
SELECT COLLATION_CONNECTION FROM INFORMATION_SCHEMA.TRIGGERS;
SELECT DATABASE_COLLATION FROM INFORMATION_SCHEMA.TRIGGERS;
SELECT COLLATION_CONNECTION FROM INFORMATION_SCHEMA.VIEWS;
display full collation names, including character sets prefix,
for all collations, including new collations.
Corresponding SHOW commands also display full collation names
in collation related columns:
SHOW CREATE TABLE t1;
SHOW CREATE DATABASE db1;
SHOW TABLE STATUS;
SHOW CREATE FUNCTION f1;
SHOW CREATE PROCEDURE p1;
SHOW CREATE EVENT ev1;
SHOW CREATE TRIGGER tr1;
SHOW CREATE VIEW;
These INFORMATION_SCHEMA queries and SHOW statements may change in
the future, to display show collation names.
2021-11-28 13:55:15 +01:00
|
|
|
char def[MY_CS_CHARACTER_SET_NAME_SIZE * 2];
|
2022-05-17 10:52:23 +02:00
|
|
|
my_snprintf(def, sizeof(def), "%s (%s)", name1, name1_part2);
|
|
|
|
raise_ER_CONFLICTING_DECLARATIONS(clause1, def,
|
|
|
|
clause2, name2,
|
|
|
|
reverse_order);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2022-05-23 09:05:33 +02:00
|
|
|
bool Lex_exact_charset::raise_if_not_equal(const Lex_exact_charset &rhs) const
|
|
|
|
{
|
|
|
|
if (m_ci == rhs.m_ci)
|
|
|
|
return false;
|
|
|
|
my_error(ER_CONFLICTING_DECLARATIONS, MYF(0),
|
|
|
|
"CHARACTER SET ", m_ci->cs_name.str,
|
|
|
|
"CHARACTER SET ", rhs.m_ci->cs_name.str);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
bool Lex_exact_charset::
|
|
|
|
raise_if_not_applicable(const Lex_exact_collation &cl) const
|
|
|
|
{
|
|
|
|
return Lex_exact_charset_opt_extended_collate(m_ci, false).
|
|
|
|
raise_if_not_applicable(cl);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2022-05-25 09:07:04 +02:00
|
|
|
bool Lex_exact_charset_opt_extended_collate::
|
|
|
|
raise_if_charsets_differ(const Lex_exact_charset &cs) const
|
|
|
|
{
|
|
|
|
if (!my_charset_same(m_ci, cs.charset_info()))
|
|
|
|
{
|
|
|
|
my_error(ER_CONFLICTING_DECLARATIONS, MYF(0),
|
|
|
|
"CHARACTER SET ", m_ci->cs_name.str,
|
|
|
|
"CHARACTER SET ", cs.charset_info()->cs_name.str);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2022-05-23 09:05:33 +02:00
|
|
|
bool Lex_exact_charset_opt_extended_collate::
|
|
|
|
raise_if_not_applicable(const Lex_exact_collation &cl) const
|
|
|
|
{
|
|
|
|
if (!my_charset_same(m_ci, cl.charset_info()))
|
|
|
|
{
|
|
|
|
my_error(ER_COLLATION_CHARSET_MISMATCH, MYF(0),
|
|
|
|
cl.charset_info()->coll_name.str, m_ci->cs_name.str);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
bool
|
|
|
|
Lex_exact_collation::raise_if_not_equal(const Lex_exact_collation &cl) const
|
|
|
|
{
|
|
|
|
if (m_ci != cl.m_ci)
|
|
|
|
{
|
|
|
|
my_error(ER_CONFLICTING_DECLARATIONS, MYF(0),
|
|
|
|
"COLLATE ", m_ci->coll_name.str,
|
|
|
|
"COLLATE ", cl.m_ci->coll_name.str);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
Merge an exact collation and a contexual collation.
|
|
|
|
@param cl - The contextual collation to merge to "this".
|
|
|
|
@param reverse_order - If the contextual collation is on the left side
|
|
|
|
|
|
|
|
Use reverse_order as follows:
|
|
|
|
false: COLLATE latin1_swedish_ci COLLATE DEFAULT
|
|
|
|
true: COLLATE DEFAULT COLLATE latin1_swedish_ci
|
|
|
|
*/
|
|
|
|
bool
|
|
|
|
Lex_exact_collation::
|
|
|
|
raise_if_conflicts_with_context_collation(const Lex_context_collation &cl,
|
|
|
|
bool reverse_order) const
|
|
|
|
{
|
MDEV-27009 Add UCA-14.0.0 collations
- Added one neutral and 22 tailored (language specific) collations based on
Unicode Collation Algorithm version 14.0.0.
Collations were added for Unicode character sets
utf8mb3, utf8mb4, ucs2, utf16, utf32.
Every tailoring was added with four accent and case
sensitivity flag combinations, e.g:
* utf8mb4_uca1400_swedish_as_cs
* utf8mb4_uca1400_swedish_as_ci
* utf8mb4_uca1400_swedish_ai_cs
* utf8mb4_uca1400_swedish_ai_ci
and their _nopad_ variants:
* utf8mb4_uca1400_swedish_nopad_as_cs
* utf8mb4_uca1400_swedish_nopad_as_ci
* utf8mb4_uca1400_swedish_nopad_ai_cs
* utf8mb4_uca1400_swedish_nopad_ai_ci
- Introducing a conception of contextually typed named collations:
CREATE DATABASE db1 CHARACTER SET utf8mb4;
CREATE TABLE db1.t1 (a CHAR(10) COLLATE uca1400_as_ci);
The idea is that there is no a need to specify the character set prefix
in the new collation names. It's enough to type just the suffix
"uca1400_as_ci". The character set is taken from the context.
In the above example script the context character set is utf8mb4.
So the CREATE TABLE will make a column with the collation
utf8mb4_uca1400_as_ci.
Short collations names can be used in any parts of the SQL syntax
where the COLLATE clause is understood.
- New collations are displayed only one time
(without character set combinations) by these statements:
SELECT * FROM INFORMATION_SCHEMA.COLLATIONS;
SHOW COLLATION;
For example, all these collations:
- utf8mb3_uca1400_swedish_as_ci
- utf8mb4_uca1400_swedish_as_ci
- ucs2_uca1400_swedish_as_ci
- utf16_uca1400_swedish_as_ci
- utf32_uca1400_swedish_as_ci
have just one entry in INFORMATION_SCHEMA.COLLATIONS and SHOW COLLATION,
with COLLATION_NAME equal to "uca1400_swedish_as_ci", which is the suffix
without the character set name:
SELECT COLLATION_NAME FROM INFORMATION_SCHEMA.COLLATIONS
WHERE COLLATION_NAME LIKE '%uca1400_swedish_as_ci';
+-----------------------+
| COLLATION_NAME |
+-----------------------+
| uca1400_swedish_as_ci |
+-----------------------+
Note, the behaviour of old collations did not change.
Non-unicode collations (e.g. latin1_swedish_ci) and
old UCA-4.0.0 collations (e.g. utf8mb4_unicode_ci)
are still displayed with the character set prefix, as before.
- The structure of the table INFORMATION_SCHEMA.COLLATIONS was changed.
The NOT NULL constraint was removed from these columns:
- CHARACTER_SET_NAME
- ID
- IS_DEFAULT
and from the corresponding columns in SHOW COLLATION.
For example:
SELECT COLLATION_NAME, CHARACTER_SET_NAME, ID, IS_DEFAULT
FROM INFORMATION_SCHEMA.COLLATIONS
WHERE COLLATION_NAME LIKE '%uca1400_swedish_as_ci';
+-----------------------+--------------------+------+------------+
| COLLATION_NAME | CHARACTER_SET_NAME | ID | IS_DEFAULT |
+-----------------------+--------------------+------+------------+
| uca1400_swedish_as_ci | NULL | NULL | NULL |
+-----------------------+--------------------+------+------------+
The NULL value in these columns now means that the collation
is applicable to multiple character sets.
The behavioir of old collations did not change.
Make sure your client programs can handle NULL values in these columns.
- The structure of the table
INFORMATION_SCHEMA.COLLATION_CHARACTER_SET_APPLICABILITY was changed.
Three new NOT NULL columns were added:
- FULL_COLLATION_NAME
- ID
- IS_DEFAULT
New collations have multiple entries in COLLATION_CHARACTER_SET_APPLICABILITY.
The column COLLATION_NAME contains the collation name without the character
set prefix. The column FULL_COLLATION_NAME contains the collation name with
the character set prefix.
Old collations have full collation name in both FULL_COLLATION_NAME and
COLLATION_NAME.
SELECT COLLATION_NAME, FULL_COLLATION_NAME, CHARACTER_SET_NAME, ID, IS_DEFAULT
FROM INFORMATION_SCHEMA.COLLATION_CHARACTER_SET_APPLICABILITY
WHERE FULL_COLLATION_NAME RLIKE '^(utf8mb4|latin1).*swedish.*ci$';
+-----------------------------+-------------------------------------+--------------------+------+------------+
| COLLATION_NAME | FULL_COLLATION_NAME | CHARACTER_SET_NAME | ID | IS_DEFAULT |
+-----------------------------+-------------------------------------+--------------------+------+------------+
| latin1_swedish_ci | latin1_swedish_ci | latin1 | 8 | Yes |
| latin1_swedish_nopad_ci | latin1_swedish_nopad_ci | latin1 | 1032 | |
| utf8mb4_swedish_ci | utf8mb4_swedish_ci | utf8mb4 | 232 | |
| uca1400_swedish_ai_ci | utf8mb4_uca1400_swedish_ai_ci | utf8mb4 | 2368 | |
| uca1400_swedish_as_ci | utf8mb4_uca1400_swedish_as_ci | utf8mb4 | 2370 | |
| uca1400_swedish_nopad_ai_ci | utf8mb4_uca1400_swedish_nopad_ai_ci | utf8mb4 | 2372 | |
| uca1400_swedish_nopad_as_ci | utf8mb4_uca1400_swedish_nopad_as_ci | utf8mb4 | 2374 | |
+-----------------------------+-------------------------------------+--------------------+------+------------+
- Other INFORMATION_SCHEMA queries:
SELECT COLLATION_NAME FROM INFORMATION_SCHEMA.COLUMNS;
SELECT COLLATION_NAME FROM INFORMATION_SCHEMA.PARAMETERS;
SELECT TABLE_COLLATION FROM INFORMATION_SCHEMA.TABLES;
SELECT DEFAULT_COLLATION_NAME FROM INFORMATION_SCHEMA.SCHEMATA;
SELECT COLLATION_NAME FROM INFORMATION_SCHEMA.ROUTINES;
SELECT COLLATION_CONNECTION FROM INFORMATION_SCHEMA.EVENTS;
SELECT DATABASE_COLLATION FROM INFORMATION_SCHEMA.EVENTS;
SELECT COLLATION_CONNECTION FROM INFORMATION_SCHEMA.ROUTINES;
SELECT DATABASE_COLLATION FROM INFORMATION_SCHEMA.ROUTINES;
SELECT COLLATION_CONNECTION FROM INFORMATION_SCHEMA.TRIGGERS;
SELECT DATABASE_COLLATION FROM INFORMATION_SCHEMA.TRIGGERS;
SELECT COLLATION_CONNECTION FROM INFORMATION_SCHEMA.VIEWS;
display full collation names, including character sets prefix,
for all collations, including new collations.
Corresponding SHOW commands also display full collation names
in collation related columns:
SHOW CREATE TABLE t1;
SHOW CREATE DATABASE db1;
SHOW TABLE STATUS;
SHOW CREATE FUNCTION f1;
SHOW CREATE PROCEDURE p1;
SHOW CREATE EVENT ev1;
SHOW CREATE TRIGGER tr1;
SHOW CREATE VIEW;
These INFORMATION_SCHEMA queries and SHOW statements may change in
the future, to display show collation names.
2021-11-28 13:55:15 +01:00
|
|
|
if (cl.is_contextually_typed_collate_default())
|
2022-05-23 09:05:33 +02:00
|
|
|
{
|
MDEV-27009 Add UCA-14.0.0 collations
- Added one neutral and 22 tailored (language specific) collations based on
Unicode Collation Algorithm version 14.0.0.
Collations were added for Unicode character sets
utf8mb3, utf8mb4, ucs2, utf16, utf32.
Every tailoring was added with four accent and case
sensitivity flag combinations, e.g:
* utf8mb4_uca1400_swedish_as_cs
* utf8mb4_uca1400_swedish_as_ci
* utf8mb4_uca1400_swedish_ai_cs
* utf8mb4_uca1400_swedish_ai_ci
and their _nopad_ variants:
* utf8mb4_uca1400_swedish_nopad_as_cs
* utf8mb4_uca1400_swedish_nopad_as_ci
* utf8mb4_uca1400_swedish_nopad_ai_cs
* utf8mb4_uca1400_swedish_nopad_ai_ci
- Introducing a conception of contextually typed named collations:
CREATE DATABASE db1 CHARACTER SET utf8mb4;
CREATE TABLE db1.t1 (a CHAR(10) COLLATE uca1400_as_ci);
The idea is that there is no a need to specify the character set prefix
in the new collation names. It's enough to type just the suffix
"uca1400_as_ci". The character set is taken from the context.
In the above example script the context character set is utf8mb4.
So the CREATE TABLE will make a column with the collation
utf8mb4_uca1400_as_ci.
Short collations names can be used in any parts of the SQL syntax
where the COLLATE clause is understood.
- New collations are displayed only one time
(without character set combinations) by these statements:
SELECT * FROM INFORMATION_SCHEMA.COLLATIONS;
SHOW COLLATION;
For example, all these collations:
- utf8mb3_uca1400_swedish_as_ci
- utf8mb4_uca1400_swedish_as_ci
- ucs2_uca1400_swedish_as_ci
- utf16_uca1400_swedish_as_ci
- utf32_uca1400_swedish_as_ci
have just one entry in INFORMATION_SCHEMA.COLLATIONS and SHOW COLLATION,
with COLLATION_NAME equal to "uca1400_swedish_as_ci", which is the suffix
without the character set name:
SELECT COLLATION_NAME FROM INFORMATION_SCHEMA.COLLATIONS
WHERE COLLATION_NAME LIKE '%uca1400_swedish_as_ci';
+-----------------------+
| COLLATION_NAME |
+-----------------------+
| uca1400_swedish_as_ci |
+-----------------------+
Note, the behaviour of old collations did not change.
Non-unicode collations (e.g. latin1_swedish_ci) and
old UCA-4.0.0 collations (e.g. utf8mb4_unicode_ci)
are still displayed with the character set prefix, as before.
- The structure of the table INFORMATION_SCHEMA.COLLATIONS was changed.
The NOT NULL constraint was removed from these columns:
- CHARACTER_SET_NAME
- ID
- IS_DEFAULT
and from the corresponding columns in SHOW COLLATION.
For example:
SELECT COLLATION_NAME, CHARACTER_SET_NAME, ID, IS_DEFAULT
FROM INFORMATION_SCHEMA.COLLATIONS
WHERE COLLATION_NAME LIKE '%uca1400_swedish_as_ci';
+-----------------------+--------------------+------+------------+
| COLLATION_NAME | CHARACTER_SET_NAME | ID | IS_DEFAULT |
+-----------------------+--------------------+------+------------+
| uca1400_swedish_as_ci | NULL | NULL | NULL |
+-----------------------+--------------------+------+------------+
The NULL value in these columns now means that the collation
is applicable to multiple character sets.
The behavioir of old collations did not change.
Make sure your client programs can handle NULL values in these columns.
- The structure of the table
INFORMATION_SCHEMA.COLLATION_CHARACTER_SET_APPLICABILITY was changed.
Three new NOT NULL columns were added:
- FULL_COLLATION_NAME
- ID
- IS_DEFAULT
New collations have multiple entries in COLLATION_CHARACTER_SET_APPLICABILITY.
The column COLLATION_NAME contains the collation name without the character
set prefix. The column FULL_COLLATION_NAME contains the collation name with
the character set prefix.
Old collations have full collation name in both FULL_COLLATION_NAME and
COLLATION_NAME.
SELECT COLLATION_NAME, FULL_COLLATION_NAME, CHARACTER_SET_NAME, ID, IS_DEFAULT
FROM INFORMATION_SCHEMA.COLLATION_CHARACTER_SET_APPLICABILITY
WHERE FULL_COLLATION_NAME RLIKE '^(utf8mb4|latin1).*swedish.*ci$';
+-----------------------------+-------------------------------------+--------------------+------+------------+
| COLLATION_NAME | FULL_COLLATION_NAME | CHARACTER_SET_NAME | ID | IS_DEFAULT |
+-----------------------------+-------------------------------------+--------------------+------+------------+
| latin1_swedish_ci | latin1_swedish_ci | latin1 | 8 | Yes |
| latin1_swedish_nopad_ci | latin1_swedish_nopad_ci | latin1 | 1032 | |
| utf8mb4_swedish_ci | utf8mb4_swedish_ci | utf8mb4 | 232 | |
| uca1400_swedish_ai_ci | utf8mb4_uca1400_swedish_ai_ci | utf8mb4 | 2368 | |
| uca1400_swedish_as_ci | utf8mb4_uca1400_swedish_as_ci | utf8mb4 | 2370 | |
| uca1400_swedish_nopad_ai_ci | utf8mb4_uca1400_swedish_nopad_ai_ci | utf8mb4 | 2372 | |
| uca1400_swedish_nopad_as_ci | utf8mb4_uca1400_swedish_nopad_as_ci | utf8mb4 | 2374 | |
+-----------------------------+-------------------------------------+--------------------+------+------------+
- Other INFORMATION_SCHEMA queries:
SELECT COLLATION_NAME FROM INFORMATION_SCHEMA.COLUMNS;
SELECT COLLATION_NAME FROM INFORMATION_SCHEMA.PARAMETERS;
SELECT TABLE_COLLATION FROM INFORMATION_SCHEMA.TABLES;
SELECT DEFAULT_COLLATION_NAME FROM INFORMATION_SCHEMA.SCHEMATA;
SELECT COLLATION_NAME FROM INFORMATION_SCHEMA.ROUTINES;
SELECT COLLATION_CONNECTION FROM INFORMATION_SCHEMA.EVENTS;
SELECT DATABASE_COLLATION FROM INFORMATION_SCHEMA.EVENTS;
SELECT COLLATION_CONNECTION FROM INFORMATION_SCHEMA.ROUTINES;
SELECT DATABASE_COLLATION FROM INFORMATION_SCHEMA.ROUTINES;
SELECT COLLATION_CONNECTION FROM INFORMATION_SCHEMA.TRIGGERS;
SELECT DATABASE_COLLATION FROM INFORMATION_SCHEMA.TRIGGERS;
SELECT COLLATION_CONNECTION FROM INFORMATION_SCHEMA.VIEWS;
display full collation names, including character sets prefix,
for all collations, including new collations.
Corresponding SHOW commands also display full collation names
in collation related columns:
SHOW CREATE TABLE t1;
SHOW CREATE DATABASE db1;
SHOW TABLE STATUS;
SHOW CREATE FUNCTION f1;
SHOW CREATE PROCEDURE p1;
SHOW CREATE EVENT ev1;
SHOW CREATE TRIGGER tr1;
SHOW CREATE VIEW;
These INFORMATION_SCHEMA queries and SHOW statements may change in
the future, to display show collation names.
2021-11-28 13:55:15 +01:00
|
|
|
if (!(m_ci->state & MY_CS_PRIMARY))
|
|
|
|
{
|
|
|
|
raise_ER_CONFLICTING_DECLARATIONS("COLLATE ", m_ci->coll_name.str,
|
|
|
|
"COLLATE ", "DEFAULT", reverse_order);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
return false;
|
2022-05-23 09:05:33 +02:00
|
|
|
}
|
|
|
|
|
MDEV-27009 Add UCA-14.0.0 collations
- Added one neutral and 22 tailored (language specific) collations based on
Unicode Collation Algorithm version 14.0.0.
Collations were added for Unicode character sets
utf8mb3, utf8mb4, ucs2, utf16, utf32.
Every tailoring was added with four accent and case
sensitivity flag combinations, e.g:
* utf8mb4_uca1400_swedish_as_cs
* utf8mb4_uca1400_swedish_as_ci
* utf8mb4_uca1400_swedish_ai_cs
* utf8mb4_uca1400_swedish_ai_ci
and their _nopad_ variants:
* utf8mb4_uca1400_swedish_nopad_as_cs
* utf8mb4_uca1400_swedish_nopad_as_ci
* utf8mb4_uca1400_swedish_nopad_ai_cs
* utf8mb4_uca1400_swedish_nopad_ai_ci
- Introducing a conception of contextually typed named collations:
CREATE DATABASE db1 CHARACTER SET utf8mb4;
CREATE TABLE db1.t1 (a CHAR(10) COLLATE uca1400_as_ci);
The idea is that there is no a need to specify the character set prefix
in the new collation names. It's enough to type just the suffix
"uca1400_as_ci". The character set is taken from the context.
In the above example script the context character set is utf8mb4.
So the CREATE TABLE will make a column with the collation
utf8mb4_uca1400_as_ci.
Short collations names can be used in any parts of the SQL syntax
where the COLLATE clause is understood.
- New collations are displayed only one time
(without character set combinations) by these statements:
SELECT * FROM INFORMATION_SCHEMA.COLLATIONS;
SHOW COLLATION;
For example, all these collations:
- utf8mb3_uca1400_swedish_as_ci
- utf8mb4_uca1400_swedish_as_ci
- ucs2_uca1400_swedish_as_ci
- utf16_uca1400_swedish_as_ci
- utf32_uca1400_swedish_as_ci
have just one entry in INFORMATION_SCHEMA.COLLATIONS and SHOW COLLATION,
with COLLATION_NAME equal to "uca1400_swedish_as_ci", which is the suffix
without the character set name:
SELECT COLLATION_NAME FROM INFORMATION_SCHEMA.COLLATIONS
WHERE COLLATION_NAME LIKE '%uca1400_swedish_as_ci';
+-----------------------+
| COLLATION_NAME |
+-----------------------+
| uca1400_swedish_as_ci |
+-----------------------+
Note, the behaviour of old collations did not change.
Non-unicode collations (e.g. latin1_swedish_ci) and
old UCA-4.0.0 collations (e.g. utf8mb4_unicode_ci)
are still displayed with the character set prefix, as before.
- The structure of the table INFORMATION_SCHEMA.COLLATIONS was changed.
The NOT NULL constraint was removed from these columns:
- CHARACTER_SET_NAME
- ID
- IS_DEFAULT
and from the corresponding columns in SHOW COLLATION.
For example:
SELECT COLLATION_NAME, CHARACTER_SET_NAME, ID, IS_DEFAULT
FROM INFORMATION_SCHEMA.COLLATIONS
WHERE COLLATION_NAME LIKE '%uca1400_swedish_as_ci';
+-----------------------+--------------------+------+------------+
| COLLATION_NAME | CHARACTER_SET_NAME | ID | IS_DEFAULT |
+-----------------------+--------------------+------+------------+
| uca1400_swedish_as_ci | NULL | NULL | NULL |
+-----------------------+--------------------+------+------------+
The NULL value in these columns now means that the collation
is applicable to multiple character sets.
The behavioir of old collations did not change.
Make sure your client programs can handle NULL values in these columns.
- The structure of the table
INFORMATION_SCHEMA.COLLATION_CHARACTER_SET_APPLICABILITY was changed.
Three new NOT NULL columns were added:
- FULL_COLLATION_NAME
- ID
- IS_DEFAULT
New collations have multiple entries in COLLATION_CHARACTER_SET_APPLICABILITY.
The column COLLATION_NAME contains the collation name without the character
set prefix. The column FULL_COLLATION_NAME contains the collation name with
the character set prefix.
Old collations have full collation name in both FULL_COLLATION_NAME and
COLLATION_NAME.
SELECT COLLATION_NAME, FULL_COLLATION_NAME, CHARACTER_SET_NAME, ID, IS_DEFAULT
FROM INFORMATION_SCHEMA.COLLATION_CHARACTER_SET_APPLICABILITY
WHERE FULL_COLLATION_NAME RLIKE '^(utf8mb4|latin1).*swedish.*ci$';
+-----------------------------+-------------------------------------+--------------------+------+------------+
| COLLATION_NAME | FULL_COLLATION_NAME | CHARACTER_SET_NAME | ID | IS_DEFAULT |
+-----------------------------+-------------------------------------+--------------------+------+------------+
| latin1_swedish_ci | latin1_swedish_ci | latin1 | 8 | Yes |
| latin1_swedish_nopad_ci | latin1_swedish_nopad_ci | latin1 | 1032 | |
| utf8mb4_swedish_ci | utf8mb4_swedish_ci | utf8mb4 | 232 | |
| uca1400_swedish_ai_ci | utf8mb4_uca1400_swedish_ai_ci | utf8mb4 | 2368 | |
| uca1400_swedish_as_ci | utf8mb4_uca1400_swedish_as_ci | utf8mb4 | 2370 | |
| uca1400_swedish_nopad_ai_ci | utf8mb4_uca1400_swedish_nopad_ai_ci | utf8mb4 | 2372 | |
| uca1400_swedish_nopad_as_ci | utf8mb4_uca1400_swedish_nopad_as_ci | utf8mb4 | 2374 | |
+-----------------------------+-------------------------------------+--------------------+------+------------+
- Other INFORMATION_SCHEMA queries:
SELECT COLLATION_NAME FROM INFORMATION_SCHEMA.COLUMNS;
SELECT COLLATION_NAME FROM INFORMATION_SCHEMA.PARAMETERS;
SELECT TABLE_COLLATION FROM INFORMATION_SCHEMA.TABLES;
SELECT DEFAULT_COLLATION_NAME FROM INFORMATION_SCHEMA.SCHEMATA;
SELECT COLLATION_NAME FROM INFORMATION_SCHEMA.ROUTINES;
SELECT COLLATION_CONNECTION FROM INFORMATION_SCHEMA.EVENTS;
SELECT DATABASE_COLLATION FROM INFORMATION_SCHEMA.EVENTS;
SELECT COLLATION_CONNECTION FROM INFORMATION_SCHEMA.ROUTINES;
SELECT DATABASE_COLLATION FROM INFORMATION_SCHEMA.ROUTINES;
SELECT COLLATION_CONNECTION FROM INFORMATION_SCHEMA.TRIGGERS;
SELECT DATABASE_COLLATION FROM INFORMATION_SCHEMA.TRIGGERS;
SELECT COLLATION_CONNECTION FROM INFORMATION_SCHEMA.VIEWS;
display full collation names, including character sets prefix,
for all collations, including new collations.
Corresponding SHOW commands also display full collation names
in collation related columns:
SHOW CREATE TABLE t1;
SHOW CREATE DATABASE db1;
SHOW TABLE STATUS;
SHOW CREATE FUNCTION f1;
SHOW CREATE PROCEDURE p1;
SHOW CREATE EVENT ev1;
SHOW CREATE TRIGGER tr1;
SHOW CREATE VIEW;
These INFORMATION_SCHEMA queries and SHOW statements may change in
the future, to display show collation names.
2021-11-28 13:55:15 +01:00
|
|
|
if (cl.is_contextually_typed_binary_style())
|
2022-05-23 09:05:33 +02:00
|
|
|
{
|
MDEV-27009 Add UCA-14.0.0 collations
- Added one neutral and 22 tailored (language specific) collations based on
Unicode Collation Algorithm version 14.0.0.
Collations were added for Unicode character sets
utf8mb3, utf8mb4, ucs2, utf16, utf32.
Every tailoring was added with four accent and case
sensitivity flag combinations, e.g:
* utf8mb4_uca1400_swedish_as_cs
* utf8mb4_uca1400_swedish_as_ci
* utf8mb4_uca1400_swedish_ai_cs
* utf8mb4_uca1400_swedish_ai_ci
and their _nopad_ variants:
* utf8mb4_uca1400_swedish_nopad_as_cs
* utf8mb4_uca1400_swedish_nopad_as_ci
* utf8mb4_uca1400_swedish_nopad_ai_cs
* utf8mb4_uca1400_swedish_nopad_ai_ci
- Introducing a conception of contextually typed named collations:
CREATE DATABASE db1 CHARACTER SET utf8mb4;
CREATE TABLE db1.t1 (a CHAR(10) COLLATE uca1400_as_ci);
The idea is that there is no a need to specify the character set prefix
in the new collation names. It's enough to type just the suffix
"uca1400_as_ci". The character set is taken from the context.
In the above example script the context character set is utf8mb4.
So the CREATE TABLE will make a column with the collation
utf8mb4_uca1400_as_ci.
Short collations names can be used in any parts of the SQL syntax
where the COLLATE clause is understood.
- New collations are displayed only one time
(without character set combinations) by these statements:
SELECT * FROM INFORMATION_SCHEMA.COLLATIONS;
SHOW COLLATION;
For example, all these collations:
- utf8mb3_uca1400_swedish_as_ci
- utf8mb4_uca1400_swedish_as_ci
- ucs2_uca1400_swedish_as_ci
- utf16_uca1400_swedish_as_ci
- utf32_uca1400_swedish_as_ci
have just one entry in INFORMATION_SCHEMA.COLLATIONS and SHOW COLLATION,
with COLLATION_NAME equal to "uca1400_swedish_as_ci", which is the suffix
without the character set name:
SELECT COLLATION_NAME FROM INFORMATION_SCHEMA.COLLATIONS
WHERE COLLATION_NAME LIKE '%uca1400_swedish_as_ci';
+-----------------------+
| COLLATION_NAME |
+-----------------------+
| uca1400_swedish_as_ci |
+-----------------------+
Note, the behaviour of old collations did not change.
Non-unicode collations (e.g. latin1_swedish_ci) and
old UCA-4.0.0 collations (e.g. utf8mb4_unicode_ci)
are still displayed with the character set prefix, as before.
- The structure of the table INFORMATION_SCHEMA.COLLATIONS was changed.
The NOT NULL constraint was removed from these columns:
- CHARACTER_SET_NAME
- ID
- IS_DEFAULT
and from the corresponding columns in SHOW COLLATION.
For example:
SELECT COLLATION_NAME, CHARACTER_SET_NAME, ID, IS_DEFAULT
FROM INFORMATION_SCHEMA.COLLATIONS
WHERE COLLATION_NAME LIKE '%uca1400_swedish_as_ci';
+-----------------------+--------------------+------+------------+
| COLLATION_NAME | CHARACTER_SET_NAME | ID | IS_DEFAULT |
+-----------------------+--------------------+------+------------+
| uca1400_swedish_as_ci | NULL | NULL | NULL |
+-----------------------+--------------------+------+------------+
The NULL value in these columns now means that the collation
is applicable to multiple character sets.
The behavioir of old collations did not change.
Make sure your client programs can handle NULL values in these columns.
- The structure of the table
INFORMATION_SCHEMA.COLLATION_CHARACTER_SET_APPLICABILITY was changed.
Three new NOT NULL columns were added:
- FULL_COLLATION_NAME
- ID
- IS_DEFAULT
New collations have multiple entries in COLLATION_CHARACTER_SET_APPLICABILITY.
The column COLLATION_NAME contains the collation name without the character
set prefix. The column FULL_COLLATION_NAME contains the collation name with
the character set prefix.
Old collations have full collation name in both FULL_COLLATION_NAME and
COLLATION_NAME.
SELECT COLLATION_NAME, FULL_COLLATION_NAME, CHARACTER_SET_NAME, ID, IS_DEFAULT
FROM INFORMATION_SCHEMA.COLLATION_CHARACTER_SET_APPLICABILITY
WHERE FULL_COLLATION_NAME RLIKE '^(utf8mb4|latin1).*swedish.*ci$';
+-----------------------------+-------------------------------------+--------------------+------+------------+
| COLLATION_NAME | FULL_COLLATION_NAME | CHARACTER_SET_NAME | ID | IS_DEFAULT |
+-----------------------------+-------------------------------------+--------------------+------+------------+
| latin1_swedish_ci | latin1_swedish_ci | latin1 | 8 | Yes |
| latin1_swedish_nopad_ci | latin1_swedish_nopad_ci | latin1 | 1032 | |
| utf8mb4_swedish_ci | utf8mb4_swedish_ci | utf8mb4 | 232 | |
| uca1400_swedish_ai_ci | utf8mb4_uca1400_swedish_ai_ci | utf8mb4 | 2368 | |
| uca1400_swedish_as_ci | utf8mb4_uca1400_swedish_as_ci | utf8mb4 | 2370 | |
| uca1400_swedish_nopad_ai_ci | utf8mb4_uca1400_swedish_nopad_ai_ci | utf8mb4 | 2372 | |
| uca1400_swedish_nopad_as_ci | utf8mb4_uca1400_swedish_nopad_as_ci | utf8mb4 | 2374 | |
+-----------------------------+-------------------------------------+--------------------+------+------------+
- Other INFORMATION_SCHEMA queries:
SELECT COLLATION_NAME FROM INFORMATION_SCHEMA.COLUMNS;
SELECT COLLATION_NAME FROM INFORMATION_SCHEMA.PARAMETERS;
SELECT TABLE_COLLATION FROM INFORMATION_SCHEMA.TABLES;
SELECT DEFAULT_COLLATION_NAME FROM INFORMATION_SCHEMA.SCHEMATA;
SELECT COLLATION_NAME FROM INFORMATION_SCHEMA.ROUTINES;
SELECT COLLATION_CONNECTION FROM INFORMATION_SCHEMA.EVENTS;
SELECT DATABASE_COLLATION FROM INFORMATION_SCHEMA.EVENTS;
SELECT COLLATION_CONNECTION FROM INFORMATION_SCHEMA.ROUTINES;
SELECT DATABASE_COLLATION FROM INFORMATION_SCHEMA.ROUTINES;
SELECT COLLATION_CONNECTION FROM INFORMATION_SCHEMA.TRIGGERS;
SELECT DATABASE_COLLATION FROM INFORMATION_SCHEMA.TRIGGERS;
SELECT COLLATION_CONNECTION FROM INFORMATION_SCHEMA.VIEWS;
display full collation names, including character sets prefix,
for all collations, including new collations.
Corresponding SHOW commands also display full collation names
in collation related columns:
SHOW CREATE TABLE t1;
SHOW CREATE DATABASE db1;
SHOW TABLE STATUS;
SHOW CREATE FUNCTION f1;
SHOW CREATE PROCEDURE p1;
SHOW CREATE EVENT ev1;
SHOW CREATE TRIGGER tr1;
SHOW CREATE VIEW;
These INFORMATION_SCHEMA queries and SHOW statements may change in
the future, to display show collation names.
2021-11-28 13:55:15 +01:00
|
|
|
if (!(m_ci->state & MY_CS_BINSORT))
|
|
|
|
{
|
|
|
|
raise_ER_CONFLICTING_DECLARATIONS("COLLATE ", m_ci->coll_name.str,
|
|
|
|
"", "BINARY", reverse_order);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
DBUG_ASSERT(!strncmp(cl.charset_info()->coll_name.str,
|
|
|
|
STRING_WITH_LEN("utf8mb4_uca1400_")));
|
|
|
|
|
|
|
|
Charset_loader_server loader;
|
|
|
|
CHARSET_INFO *ci= loader.get_exact_collation_by_context_name(
|
|
|
|
m_ci,
|
|
|
|
cl.collation_name_context_suffix().str,
|
|
|
|
MYF(0));
|
|
|
|
if (m_ci != ci)
|
|
|
|
{
|
|
|
|
raise_ER_CONFLICTING_DECLARATIONS("COLLATE ",
|
|
|
|
m_ci->coll_name.str,
|
|
|
|
"COLLATE ",
|
|
|
|
cl.collation_name_for_show().str,
|
|
|
|
reverse_order);
|
2022-05-23 09:05:33 +02:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
bool
|
|
|
|
Lex_context_collation::raise_if_not_equal(const Lex_context_collation &cl) const
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
Only equal context collations are possible here so far:
|
|
|
|
- Column grammar only supports BINARY, but does not support COLLATE DEFAULT
|
|
|
|
- DB/Table grammar only support COLLATE DEFAULT
|
|
|
|
*/
|
MDEV-27009 Add UCA-14.0.0 collations
- Added one neutral and 22 tailored (language specific) collations based on
Unicode Collation Algorithm version 14.0.0.
Collations were added for Unicode character sets
utf8mb3, utf8mb4, ucs2, utf16, utf32.
Every tailoring was added with four accent and case
sensitivity flag combinations, e.g:
* utf8mb4_uca1400_swedish_as_cs
* utf8mb4_uca1400_swedish_as_ci
* utf8mb4_uca1400_swedish_ai_cs
* utf8mb4_uca1400_swedish_ai_ci
and their _nopad_ variants:
* utf8mb4_uca1400_swedish_nopad_as_cs
* utf8mb4_uca1400_swedish_nopad_as_ci
* utf8mb4_uca1400_swedish_nopad_ai_cs
* utf8mb4_uca1400_swedish_nopad_ai_ci
- Introducing a conception of contextually typed named collations:
CREATE DATABASE db1 CHARACTER SET utf8mb4;
CREATE TABLE db1.t1 (a CHAR(10) COLLATE uca1400_as_ci);
The idea is that there is no a need to specify the character set prefix
in the new collation names. It's enough to type just the suffix
"uca1400_as_ci". The character set is taken from the context.
In the above example script the context character set is utf8mb4.
So the CREATE TABLE will make a column with the collation
utf8mb4_uca1400_as_ci.
Short collations names can be used in any parts of the SQL syntax
where the COLLATE clause is understood.
- New collations are displayed only one time
(without character set combinations) by these statements:
SELECT * FROM INFORMATION_SCHEMA.COLLATIONS;
SHOW COLLATION;
For example, all these collations:
- utf8mb3_uca1400_swedish_as_ci
- utf8mb4_uca1400_swedish_as_ci
- ucs2_uca1400_swedish_as_ci
- utf16_uca1400_swedish_as_ci
- utf32_uca1400_swedish_as_ci
have just one entry in INFORMATION_SCHEMA.COLLATIONS and SHOW COLLATION,
with COLLATION_NAME equal to "uca1400_swedish_as_ci", which is the suffix
without the character set name:
SELECT COLLATION_NAME FROM INFORMATION_SCHEMA.COLLATIONS
WHERE COLLATION_NAME LIKE '%uca1400_swedish_as_ci';
+-----------------------+
| COLLATION_NAME |
+-----------------------+
| uca1400_swedish_as_ci |
+-----------------------+
Note, the behaviour of old collations did not change.
Non-unicode collations (e.g. latin1_swedish_ci) and
old UCA-4.0.0 collations (e.g. utf8mb4_unicode_ci)
are still displayed with the character set prefix, as before.
- The structure of the table INFORMATION_SCHEMA.COLLATIONS was changed.
The NOT NULL constraint was removed from these columns:
- CHARACTER_SET_NAME
- ID
- IS_DEFAULT
and from the corresponding columns in SHOW COLLATION.
For example:
SELECT COLLATION_NAME, CHARACTER_SET_NAME, ID, IS_DEFAULT
FROM INFORMATION_SCHEMA.COLLATIONS
WHERE COLLATION_NAME LIKE '%uca1400_swedish_as_ci';
+-----------------------+--------------------+------+------------+
| COLLATION_NAME | CHARACTER_SET_NAME | ID | IS_DEFAULT |
+-----------------------+--------------------+------+------------+
| uca1400_swedish_as_ci | NULL | NULL | NULL |
+-----------------------+--------------------+------+------------+
The NULL value in these columns now means that the collation
is applicable to multiple character sets.
The behavioir of old collations did not change.
Make sure your client programs can handle NULL values in these columns.
- The structure of the table
INFORMATION_SCHEMA.COLLATION_CHARACTER_SET_APPLICABILITY was changed.
Three new NOT NULL columns were added:
- FULL_COLLATION_NAME
- ID
- IS_DEFAULT
New collations have multiple entries in COLLATION_CHARACTER_SET_APPLICABILITY.
The column COLLATION_NAME contains the collation name without the character
set prefix. The column FULL_COLLATION_NAME contains the collation name with
the character set prefix.
Old collations have full collation name in both FULL_COLLATION_NAME and
COLLATION_NAME.
SELECT COLLATION_NAME, FULL_COLLATION_NAME, CHARACTER_SET_NAME, ID, IS_DEFAULT
FROM INFORMATION_SCHEMA.COLLATION_CHARACTER_SET_APPLICABILITY
WHERE FULL_COLLATION_NAME RLIKE '^(utf8mb4|latin1).*swedish.*ci$';
+-----------------------------+-------------------------------------+--------------------+------+------------+
| COLLATION_NAME | FULL_COLLATION_NAME | CHARACTER_SET_NAME | ID | IS_DEFAULT |
+-----------------------------+-------------------------------------+--------------------+------+------------+
| latin1_swedish_ci | latin1_swedish_ci | latin1 | 8 | Yes |
| latin1_swedish_nopad_ci | latin1_swedish_nopad_ci | latin1 | 1032 | |
| utf8mb4_swedish_ci | utf8mb4_swedish_ci | utf8mb4 | 232 | |
| uca1400_swedish_ai_ci | utf8mb4_uca1400_swedish_ai_ci | utf8mb4 | 2368 | |
| uca1400_swedish_as_ci | utf8mb4_uca1400_swedish_as_ci | utf8mb4 | 2370 | |
| uca1400_swedish_nopad_ai_ci | utf8mb4_uca1400_swedish_nopad_ai_ci | utf8mb4 | 2372 | |
| uca1400_swedish_nopad_as_ci | utf8mb4_uca1400_swedish_nopad_as_ci | utf8mb4 | 2374 | |
+-----------------------------+-------------------------------------+--------------------+------+------------+
- Other INFORMATION_SCHEMA queries:
SELECT COLLATION_NAME FROM INFORMATION_SCHEMA.COLUMNS;
SELECT COLLATION_NAME FROM INFORMATION_SCHEMA.PARAMETERS;
SELECT TABLE_COLLATION FROM INFORMATION_SCHEMA.TABLES;
SELECT DEFAULT_COLLATION_NAME FROM INFORMATION_SCHEMA.SCHEMATA;
SELECT COLLATION_NAME FROM INFORMATION_SCHEMA.ROUTINES;
SELECT COLLATION_CONNECTION FROM INFORMATION_SCHEMA.EVENTS;
SELECT DATABASE_COLLATION FROM INFORMATION_SCHEMA.EVENTS;
SELECT COLLATION_CONNECTION FROM INFORMATION_SCHEMA.ROUTINES;
SELECT DATABASE_COLLATION FROM INFORMATION_SCHEMA.ROUTINES;
SELECT COLLATION_CONNECTION FROM INFORMATION_SCHEMA.TRIGGERS;
SELECT DATABASE_COLLATION FROM INFORMATION_SCHEMA.TRIGGERS;
SELECT COLLATION_CONNECTION FROM INFORMATION_SCHEMA.VIEWS;
display full collation names, including character sets prefix,
for all collations, including new collations.
Corresponding SHOW commands also display full collation names
in collation related columns:
SHOW CREATE TABLE t1;
SHOW CREATE DATABASE db1;
SHOW TABLE STATUS;
SHOW CREATE FUNCTION f1;
SHOW CREATE PROCEDURE p1;
SHOW CREATE EVENT ev1;
SHOW CREATE TRIGGER tr1;
SHOW CREATE VIEW;
These INFORMATION_SCHEMA queries and SHOW statements may change in
the future, to display show collation names.
2021-11-28 13:55:15 +01:00
|
|
|
if (m_ci != cl.m_ci)
|
|
|
|
{
|
|
|
|
my_error(ER_CONFLICTING_DECLARATIONS, MYF(0),
|
|
|
|
is_contextually_typed_binary_style() ? "" : "COLLATE ",
|
|
|
|
collation_name_for_show().str,
|
|
|
|
cl.is_contextually_typed_binary_style() ? "" : "COLLATE ",
|
|
|
|
cl.collation_name_for_show().str);
|
|
|
|
return true;
|
|
|
|
}
|
2022-05-23 09:05:33 +02:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
Resolve a context collation to the character set (when the former gets known):
|
|
|
|
CREATE TABLE t1 (a CHAR(10) BINARY) CHARACTER SET latin1;
|
|
|
|
CREATE DATABASE db1 COLLATE DEFAULT CHARACTER SET latin1;
|
|
|
|
*/
|
|
|
|
bool Lex_exact_charset_opt_extended_collate::
|
MDEV-30164 System variable for default collations
This patch adds a way to override default collations
(or "character set collations") for desired character sets.
The SQL standard says:
> Each collation known in an SQL-environment is applicable to one
> or more character sets, and for each character set, one or more
> collations are applicable to it, one of which is associated with
> it as its character set collation.
In MariaDB, character set collations has been hard-coded so far,
e.g. utf8mb4_general_ci has been a hard-coded character set collation
for utf8mb4.
This patch allows to override (globally per server, or per session)
character set collations, so for example, uca1400_ai_ci can be set as a
character set collation for Unicode character sets
(instead of compiled xxx_general_ci).
The array of overridden character set collations is stored in a new
(session and global) system variable @@character_set_collations and
can be set as a comma separated list of charset=collation pairs, e.g.:
SET @@character_set_collations='utf8mb3=uca1400_ai_ci,utf8mb4=uca1400_ai_ci';
The variable is empty by default, which mean use the hard-coded
character set collations (e.g. utf8mb4_general_ci for utf8mb4).
The variable can also be set globally by passing to the server startup command
line, and/or in my.cnf.
2022-12-14 15:46:27 +01:00
|
|
|
merge_context_collation_override(Sql_used *used,
|
|
|
|
const Charset_collation_map_st &map,
|
|
|
|
const Lex_context_collation &cl)
|
2022-05-23 09:05:33 +02:00
|
|
|
{
|
|
|
|
DBUG_ASSERT(m_ci);
|
|
|
|
|
|
|
|
// CHAR(10) BINARY
|
|
|
|
if (cl.is_contextually_typed_binary_style())
|
|
|
|
{
|
|
|
|
CHARSET_INFO *ci= find_bin_collation();
|
|
|
|
if (!ci)
|
|
|
|
return true;
|
|
|
|
m_ci= ci;
|
|
|
|
m_with_collate= true;
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
// COLLATE DEFAULT
|
|
|
|
if (cl.is_contextually_typed_collate_default())
|
|
|
|
{
|
MDEV-30164 System variable for default collations
This patch adds a way to override default collations
(or "character set collations") for desired character sets.
The SQL standard says:
> Each collation known in an SQL-environment is applicable to one
> or more character sets, and for each character set, one or more
> collations are applicable to it, one of which is associated with
> it as its character set collation.
In MariaDB, character set collations has been hard-coded so far,
e.g. utf8mb4_general_ci has been a hard-coded character set collation
for utf8mb4.
This patch allows to override (globally per server, or per session)
character set collations, so for example, uca1400_ai_ci can be set as a
character set collation for Unicode character sets
(instead of compiled xxx_general_ci).
The array of overridden character set collations is stored in a new
(session and global) system variable @@character_set_collations and
can be set as a comma separated list of charset=collation pairs, e.g.:
SET @@character_set_collations='utf8mb3=uca1400_ai_ci,utf8mb4=uca1400_ai_ci';
The variable is empty by default, which mean use the hard-coded
character set collations (e.g. utf8mb4_general_ci for utf8mb4).
The variable can also be set globally by passing to the server startup command
line, and/or in my.cnf.
2022-12-14 15:46:27 +01:00
|
|
|
CHARSET_INFO *ci= find_mapped_default_collation(used, map);
|
2022-05-23 09:05:33 +02:00
|
|
|
DBUG_ASSERT(ci);
|
|
|
|
if (!ci)
|
|
|
|
return true;
|
|
|
|
m_ci= ci;
|
|
|
|
m_with_collate= true;
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
MDEV-27009 Add UCA-14.0.0 collations
- Added one neutral and 22 tailored (language specific) collations based on
Unicode Collation Algorithm version 14.0.0.
Collations were added for Unicode character sets
utf8mb3, utf8mb4, ucs2, utf16, utf32.
Every tailoring was added with four accent and case
sensitivity flag combinations, e.g:
* utf8mb4_uca1400_swedish_as_cs
* utf8mb4_uca1400_swedish_as_ci
* utf8mb4_uca1400_swedish_ai_cs
* utf8mb4_uca1400_swedish_ai_ci
and their _nopad_ variants:
* utf8mb4_uca1400_swedish_nopad_as_cs
* utf8mb4_uca1400_swedish_nopad_as_ci
* utf8mb4_uca1400_swedish_nopad_ai_cs
* utf8mb4_uca1400_swedish_nopad_ai_ci
- Introducing a conception of contextually typed named collations:
CREATE DATABASE db1 CHARACTER SET utf8mb4;
CREATE TABLE db1.t1 (a CHAR(10) COLLATE uca1400_as_ci);
The idea is that there is no a need to specify the character set prefix
in the new collation names. It's enough to type just the suffix
"uca1400_as_ci". The character set is taken from the context.
In the above example script the context character set is utf8mb4.
So the CREATE TABLE will make a column with the collation
utf8mb4_uca1400_as_ci.
Short collations names can be used in any parts of the SQL syntax
where the COLLATE clause is understood.
- New collations are displayed only one time
(without character set combinations) by these statements:
SELECT * FROM INFORMATION_SCHEMA.COLLATIONS;
SHOW COLLATION;
For example, all these collations:
- utf8mb3_uca1400_swedish_as_ci
- utf8mb4_uca1400_swedish_as_ci
- ucs2_uca1400_swedish_as_ci
- utf16_uca1400_swedish_as_ci
- utf32_uca1400_swedish_as_ci
have just one entry in INFORMATION_SCHEMA.COLLATIONS and SHOW COLLATION,
with COLLATION_NAME equal to "uca1400_swedish_as_ci", which is the suffix
without the character set name:
SELECT COLLATION_NAME FROM INFORMATION_SCHEMA.COLLATIONS
WHERE COLLATION_NAME LIKE '%uca1400_swedish_as_ci';
+-----------------------+
| COLLATION_NAME |
+-----------------------+
| uca1400_swedish_as_ci |
+-----------------------+
Note, the behaviour of old collations did not change.
Non-unicode collations (e.g. latin1_swedish_ci) and
old UCA-4.0.0 collations (e.g. utf8mb4_unicode_ci)
are still displayed with the character set prefix, as before.
- The structure of the table INFORMATION_SCHEMA.COLLATIONS was changed.
The NOT NULL constraint was removed from these columns:
- CHARACTER_SET_NAME
- ID
- IS_DEFAULT
and from the corresponding columns in SHOW COLLATION.
For example:
SELECT COLLATION_NAME, CHARACTER_SET_NAME, ID, IS_DEFAULT
FROM INFORMATION_SCHEMA.COLLATIONS
WHERE COLLATION_NAME LIKE '%uca1400_swedish_as_ci';
+-----------------------+--------------------+------+------------+
| COLLATION_NAME | CHARACTER_SET_NAME | ID | IS_DEFAULT |
+-----------------------+--------------------+------+------------+
| uca1400_swedish_as_ci | NULL | NULL | NULL |
+-----------------------+--------------------+------+------------+
The NULL value in these columns now means that the collation
is applicable to multiple character sets.
The behavioir of old collations did not change.
Make sure your client programs can handle NULL values in these columns.
- The structure of the table
INFORMATION_SCHEMA.COLLATION_CHARACTER_SET_APPLICABILITY was changed.
Three new NOT NULL columns were added:
- FULL_COLLATION_NAME
- ID
- IS_DEFAULT
New collations have multiple entries in COLLATION_CHARACTER_SET_APPLICABILITY.
The column COLLATION_NAME contains the collation name without the character
set prefix. The column FULL_COLLATION_NAME contains the collation name with
the character set prefix.
Old collations have full collation name in both FULL_COLLATION_NAME and
COLLATION_NAME.
SELECT COLLATION_NAME, FULL_COLLATION_NAME, CHARACTER_SET_NAME, ID, IS_DEFAULT
FROM INFORMATION_SCHEMA.COLLATION_CHARACTER_SET_APPLICABILITY
WHERE FULL_COLLATION_NAME RLIKE '^(utf8mb4|latin1).*swedish.*ci$';
+-----------------------------+-------------------------------------+--------------------+------+------------+
| COLLATION_NAME | FULL_COLLATION_NAME | CHARACTER_SET_NAME | ID | IS_DEFAULT |
+-----------------------------+-------------------------------------+--------------------+------+------------+
| latin1_swedish_ci | latin1_swedish_ci | latin1 | 8 | Yes |
| latin1_swedish_nopad_ci | latin1_swedish_nopad_ci | latin1 | 1032 | |
| utf8mb4_swedish_ci | utf8mb4_swedish_ci | utf8mb4 | 232 | |
| uca1400_swedish_ai_ci | utf8mb4_uca1400_swedish_ai_ci | utf8mb4 | 2368 | |
| uca1400_swedish_as_ci | utf8mb4_uca1400_swedish_as_ci | utf8mb4 | 2370 | |
| uca1400_swedish_nopad_ai_ci | utf8mb4_uca1400_swedish_nopad_ai_ci | utf8mb4 | 2372 | |
| uca1400_swedish_nopad_as_ci | utf8mb4_uca1400_swedish_nopad_as_ci | utf8mb4 | 2374 | |
+-----------------------------+-------------------------------------+--------------------+------+------------+
- Other INFORMATION_SCHEMA queries:
SELECT COLLATION_NAME FROM INFORMATION_SCHEMA.COLUMNS;
SELECT COLLATION_NAME FROM INFORMATION_SCHEMA.PARAMETERS;
SELECT TABLE_COLLATION FROM INFORMATION_SCHEMA.TABLES;
SELECT DEFAULT_COLLATION_NAME FROM INFORMATION_SCHEMA.SCHEMATA;
SELECT COLLATION_NAME FROM INFORMATION_SCHEMA.ROUTINES;
SELECT COLLATION_CONNECTION FROM INFORMATION_SCHEMA.EVENTS;
SELECT DATABASE_COLLATION FROM INFORMATION_SCHEMA.EVENTS;
SELECT COLLATION_CONNECTION FROM INFORMATION_SCHEMA.ROUTINES;
SELECT DATABASE_COLLATION FROM INFORMATION_SCHEMA.ROUTINES;
SELECT COLLATION_CONNECTION FROM INFORMATION_SCHEMA.TRIGGERS;
SELECT DATABASE_COLLATION FROM INFORMATION_SCHEMA.TRIGGERS;
SELECT COLLATION_CONNECTION FROM INFORMATION_SCHEMA.VIEWS;
display full collation names, including character sets prefix,
for all collations, including new collations.
Corresponding SHOW commands also display full collation names
in collation related columns:
SHOW CREATE TABLE t1;
SHOW CREATE DATABASE db1;
SHOW TABLE STATUS;
SHOW CREATE FUNCTION f1;
SHOW CREATE PROCEDURE p1;
SHOW CREATE EVENT ev1;
SHOW CREATE TRIGGER tr1;
SHOW CREATE VIEW;
These INFORMATION_SCHEMA queries and SHOW statements may change in
the future, to display show collation names.
2021-11-28 13:55:15 +01:00
|
|
|
DBUG_ASSERT(!strncmp(cl.charset_info()->coll_name.str,
|
|
|
|
STRING_WITH_LEN("utf8mb4_uca1400_")));
|
|
|
|
|
|
|
|
CHARSET_INFO *ci= Charset_loader_server().
|
|
|
|
get_exact_collation_by_context_name_or_error(m_ci,
|
|
|
|
cl.charset_info()->coll_name.str + 8, MYF(0));
|
|
|
|
if (!ci)
|
|
|
|
return true;
|
|
|
|
m_ci= ci;
|
|
|
|
m_with_collate= true;
|
2022-05-23 09:05:33 +02:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
MDEV-30164 System variable for default collations
This patch adds a way to override default collations
(or "character set collations") for desired character sets.
The SQL standard says:
> Each collation known in an SQL-environment is applicable to one
> or more character sets, and for each character set, one or more
> collations are applicable to it, one of which is associated with
> it as its character set collation.
In MariaDB, character set collations has been hard-coded so far,
e.g. utf8mb4_general_ci has been a hard-coded character set collation
for utf8mb4.
This patch allows to override (globally per server, or per session)
character set collations, so for example, uca1400_ai_ci can be set as a
character set collation for Unicode character sets
(instead of compiled xxx_general_ci).
The array of overridden character set collations is stored in a new
(session and global) system variable @@character_set_collations and
can be set as a comma separated list of charset=collation pairs, e.g.:
SET @@character_set_collations='utf8mb3=uca1400_ai_ci,utf8mb4=uca1400_ai_ci';
The variable is empty by default, which mean use the hard-coded
character set collations (e.g. utf8mb4_general_ci for utf8mb4).
The variable can also be set globally by passing to the server startup command
line, and/or in my.cnf.
2022-12-14 15:46:27 +01:00
|
|
|
bool Lex_extended_collation_st::merge_exact_charset(Sql_used *used,
|
|
|
|
const Charset_collation_map_st &map,
|
|
|
|
const Lex_exact_charset &cs)
|
2022-05-23 09:05:33 +02:00
|
|
|
{
|
|
|
|
switch (m_type) {
|
|
|
|
case TYPE_EXACT:
|
|
|
|
{
|
|
|
|
// COLLATE latin1_swedish_ci .. CHARACTER SET latin1
|
|
|
|
return cs.raise_if_not_applicable(Lex_exact_collation(m_ci));
|
|
|
|
}
|
|
|
|
case TYPE_CONTEXTUALLY_TYPED:
|
|
|
|
{
|
|
|
|
// COLLATE DEFAULT .. CHARACTER SET latin1
|
|
|
|
Lex_exact_charset_opt_extended_collate tmp(cs);
|
MDEV-30164 System variable for default collations
This patch adds a way to override default collations
(or "character set collations") for desired character sets.
The SQL standard says:
> Each collation known in an SQL-environment is applicable to one
> or more character sets, and for each character set, one or more
> collations are applicable to it, one of which is associated with
> it as its character set collation.
In MariaDB, character set collations has been hard-coded so far,
e.g. utf8mb4_general_ci has been a hard-coded character set collation
for utf8mb4.
This patch allows to override (globally per server, or per session)
character set collations, so for example, uca1400_ai_ci can be set as a
character set collation for Unicode character sets
(instead of compiled xxx_general_ci).
The array of overridden character set collations is stored in a new
(session and global) system variable @@character_set_collations and
can be set as a comma separated list of charset=collation pairs, e.g.:
SET @@character_set_collations='utf8mb3=uca1400_ai_ci,utf8mb4=uca1400_ai_ci';
The variable is empty by default, which mean use the hard-coded
character set collations (e.g. utf8mb4_general_ci for utf8mb4).
The variable can also be set globally by passing to the server startup command
line, and/or in my.cnf.
2022-12-14 15:46:27 +01:00
|
|
|
if (tmp.merge_context_collation(used, map, Lex_context_collation(m_ci)))
|
2022-05-23 09:05:33 +02:00
|
|
|
return true;
|
|
|
|
*this= Lex_extended_collation(tmp.collation());
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
DBUG_ASSERT(0);
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
bool Lex_extended_collation_st::
|
|
|
|
merge_exact_collation(const Lex_exact_collation &rhs)
|
|
|
|
{
|
|
|
|
switch (m_type) {
|
|
|
|
|
|
|
|
case TYPE_EXACT:
|
|
|
|
/*
|
|
|
|
EXACT + EXACT
|
|
|
|
COLLATE latin1_bin .. COLLATE latin1_bin
|
|
|
|
*/
|
|
|
|
return Lex_exact_collation(m_ci).raise_if_not_equal(rhs);
|
|
|
|
|
|
|
|
case TYPE_CONTEXTUALLY_TYPED:
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
CONTEXT + EXACT
|
|
|
|
CHAR(10) COLLATE DEFAULT .. COLLATE latin1_swedish_ci
|
|
|
|
CHAR(10) BINARY .. COLLATE latin1_bin
|
MDEV-27009 Add UCA-14.0.0 collations
- Added one neutral and 22 tailored (language specific) collations based on
Unicode Collation Algorithm version 14.0.0.
Collations were added for Unicode character sets
utf8mb3, utf8mb4, ucs2, utf16, utf32.
Every tailoring was added with four accent and case
sensitivity flag combinations, e.g:
* utf8mb4_uca1400_swedish_as_cs
* utf8mb4_uca1400_swedish_as_ci
* utf8mb4_uca1400_swedish_ai_cs
* utf8mb4_uca1400_swedish_ai_ci
and their _nopad_ variants:
* utf8mb4_uca1400_swedish_nopad_as_cs
* utf8mb4_uca1400_swedish_nopad_as_ci
* utf8mb4_uca1400_swedish_nopad_ai_cs
* utf8mb4_uca1400_swedish_nopad_ai_ci
- Introducing a conception of contextually typed named collations:
CREATE DATABASE db1 CHARACTER SET utf8mb4;
CREATE TABLE db1.t1 (a CHAR(10) COLLATE uca1400_as_ci);
The idea is that there is no a need to specify the character set prefix
in the new collation names. It's enough to type just the suffix
"uca1400_as_ci". The character set is taken from the context.
In the above example script the context character set is utf8mb4.
So the CREATE TABLE will make a column with the collation
utf8mb4_uca1400_as_ci.
Short collations names can be used in any parts of the SQL syntax
where the COLLATE clause is understood.
- New collations are displayed only one time
(without character set combinations) by these statements:
SELECT * FROM INFORMATION_SCHEMA.COLLATIONS;
SHOW COLLATION;
For example, all these collations:
- utf8mb3_uca1400_swedish_as_ci
- utf8mb4_uca1400_swedish_as_ci
- ucs2_uca1400_swedish_as_ci
- utf16_uca1400_swedish_as_ci
- utf32_uca1400_swedish_as_ci
have just one entry in INFORMATION_SCHEMA.COLLATIONS and SHOW COLLATION,
with COLLATION_NAME equal to "uca1400_swedish_as_ci", which is the suffix
without the character set name:
SELECT COLLATION_NAME FROM INFORMATION_SCHEMA.COLLATIONS
WHERE COLLATION_NAME LIKE '%uca1400_swedish_as_ci';
+-----------------------+
| COLLATION_NAME |
+-----------------------+
| uca1400_swedish_as_ci |
+-----------------------+
Note, the behaviour of old collations did not change.
Non-unicode collations (e.g. latin1_swedish_ci) and
old UCA-4.0.0 collations (e.g. utf8mb4_unicode_ci)
are still displayed with the character set prefix, as before.
- The structure of the table INFORMATION_SCHEMA.COLLATIONS was changed.
The NOT NULL constraint was removed from these columns:
- CHARACTER_SET_NAME
- ID
- IS_DEFAULT
and from the corresponding columns in SHOW COLLATION.
For example:
SELECT COLLATION_NAME, CHARACTER_SET_NAME, ID, IS_DEFAULT
FROM INFORMATION_SCHEMA.COLLATIONS
WHERE COLLATION_NAME LIKE '%uca1400_swedish_as_ci';
+-----------------------+--------------------+------+------------+
| COLLATION_NAME | CHARACTER_SET_NAME | ID | IS_DEFAULT |
+-----------------------+--------------------+------+------------+
| uca1400_swedish_as_ci | NULL | NULL | NULL |
+-----------------------+--------------------+------+------------+
The NULL value in these columns now means that the collation
is applicable to multiple character sets.
The behavioir of old collations did not change.
Make sure your client programs can handle NULL values in these columns.
- The structure of the table
INFORMATION_SCHEMA.COLLATION_CHARACTER_SET_APPLICABILITY was changed.
Three new NOT NULL columns were added:
- FULL_COLLATION_NAME
- ID
- IS_DEFAULT
New collations have multiple entries in COLLATION_CHARACTER_SET_APPLICABILITY.
The column COLLATION_NAME contains the collation name without the character
set prefix. The column FULL_COLLATION_NAME contains the collation name with
the character set prefix.
Old collations have full collation name in both FULL_COLLATION_NAME and
COLLATION_NAME.
SELECT COLLATION_NAME, FULL_COLLATION_NAME, CHARACTER_SET_NAME, ID, IS_DEFAULT
FROM INFORMATION_SCHEMA.COLLATION_CHARACTER_SET_APPLICABILITY
WHERE FULL_COLLATION_NAME RLIKE '^(utf8mb4|latin1).*swedish.*ci$';
+-----------------------------+-------------------------------------+--------------------+------+------------+
| COLLATION_NAME | FULL_COLLATION_NAME | CHARACTER_SET_NAME | ID | IS_DEFAULT |
+-----------------------------+-------------------------------------+--------------------+------+------------+
| latin1_swedish_ci | latin1_swedish_ci | latin1 | 8 | Yes |
| latin1_swedish_nopad_ci | latin1_swedish_nopad_ci | latin1 | 1032 | |
| utf8mb4_swedish_ci | utf8mb4_swedish_ci | utf8mb4 | 232 | |
| uca1400_swedish_ai_ci | utf8mb4_uca1400_swedish_ai_ci | utf8mb4 | 2368 | |
| uca1400_swedish_as_ci | utf8mb4_uca1400_swedish_as_ci | utf8mb4 | 2370 | |
| uca1400_swedish_nopad_ai_ci | utf8mb4_uca1400_swedish_nopad_ai_ci | utf8mb4 | 2372 | |
| uca1400_swedish_nopad_as_ci | utf8mb4_uca1400_swedish_nopad_as_ci | utf8mb4 | 2374 | |
+-----------------------------+-------------------------------------+--------------------+------+------------+
- Other INFORMATION_SCHEMA queries:
SELECT COLLATION_NAME FROM INFORMATION_SCHEMA.COLUMNS;
SELECT COLLATION_NAME FROM INFORMATION_SCHEMA.PARAMETERS;
SELECT TABLE_COLLATION FROM INFORMATION_SCHEMA.TABLES;
SELECT DEFAULT_COLLATION_NAME FROM INFORMATION_SCHEMA.SCHEMATA;
SELECT COLLATION_NAME FROM INFORMATION_SCHEMA.ROUTINES;
SELECT COLLATION_CONNECTION FROM INFORMATION_SCHEMA.EVENTS;
SELECT DATABASE_COLLATION FROM INFORMATION_SCHEMA.EVENTS;
SELECT COLLATION_CONNECTION FROM INFORMATION_SCHEMA.ROUTINES;
SELECT DATABASE_COLLATION FROM INFORMATION_SCHEMA.ROUTINES;
SELECT COLLATION_CONNECTION FROM INFORMATION_SCHEMA.TRIGGERS;
SELECT DATABASE_COLLATION FROM INFORMATION_SCHEMA.TRIGGERS;
SELECT COLLATION_CONNECTION FROM INFORMATION_SCHEMA.VIEWS;
display full collation names, including character sets prefix,
for all collations, including new collations.
Corresponding SHOW commands also display full collation names
in collation related columns:
SHOW CREATE TABLE t1;
SHOW CREATE DATABASE db1;
SHOW TABLE STATUS;
SHOW CREATE FUNCTION f1;
SHOW CREATE PROCEDURE p1;
SHOW CREATE EVENT ev1;
SHOW CREATE TRIGGER tr1;
SHOW CREATE VIEW;
These INFORMATION_SCHEMA queries and SHOW statements may change in
the future, to display show collation names.
2021-11-28 13:55:15 +01:00
|
|
|
CHAR(10) COLLATE uca1400_as_ci .. COLLATE latin1_bin
|
2022-05-23 09:05:33 +02:00
|
|
|
*/
|
|
|
|
if (rhs.raise_if_conflicts_with_context_collation(
|
|
|
|
Lex_context_collation(m_ci), true))
|
|
|
|
return true;
|
|
|
|
*this= Lex_extended_collation(rhs);
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
DBUG_ASSERT(0);
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
bool Lex_extended_collation_st::
|
|
|
|
raise_if_conflicts_with_context_collation(const Lex_context_collation &rhs)
|
|
|
|
const
|
|
|
|
{
|
|
|
|
switch (m_type) {
|
|
|
|
|
|
|
|
case TYPE_EXACT:
|
|
|
|
/*
|
|
|
|
EXACT + CONTEXT
|
|
|
|
COLLATE latin1_swedish_ci .. COLLATE DEFAULT
|
|
|
|
*/
|
|
|
|
return Lex_exact_collation(m_ci).
|
|
|
|
raise_if_conflicts_with_context_collation(rhs, false);
|
|
|
|
|
|
|
|
case TYPE_CONTEXTUALLY_TYPED:
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
CONTEXT + CONTEXT:
|
|
|
|
CHAR(10) BINARY .. COLLATE DEFAULT - not supported by the parser
|
|
|
|
CREATE DATABASE db1 COLLATE DEFAULT COLLATE DEFAULT;
|
|
|
|
*/
|
|
|
|
return Lex_context_collation(m_ci).raise_if_not_equal(rhs);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
DBUG_ASSERT(0);
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
Merge two non-empty COLLATE clauses.
|
|
|
|
*/
|
|
|
|
bool Lex_extended_collation_st::merge(const Lex_extended_collation_st &rhs)
|
|
|
|
{
|
|
|
|
switch (rhs.type()) {
|
|
|
|
case TYPE_EXACT:
|
|
|
|
/*
|
|
|
|
EXACT + EXACT
|
|
|
|
COLLATE latin1_swedish_ci .. COLLATE latin1_swedish_ci
|
|
|
|
|
|
|
|
CONTEXT + EXACT
|
|
|
|
COLLATE DEFAULT .. COLLATE latin1_swedish_ci
|
|
|
|
CHAR(10) BINARY .. COLLATE latin1_bin
|
|
|
|
*/
|
|
|
|
return merge_exact_collation(Lex_exact_collation(rhs.m_ci));
|
|
|
|
case TYPE_CONTEXTUALLY_TYPED:
|
|
|
|
/*
|
|
|
|
EXACT + CONTEXT
|
|
|
|
COLLATE latin1_swedish_ci .. COLLATE DEFAULT
|
|
|
|
|
|
|
|
CONTEXT + CONTEXT
|
|
|
|
COLLATE DEFAULT .. COLLATE DEFAULT
|
|
|
|
CHAR(10) BINARY .. COLLATE DEFAULT
|
|
|
|
*/
|
|
|
|
return raise_if_conflicts_with_context_collation(
|
|
|
|
Lex_context_collation(rhs.m_ci));
|
|
|
|
}
|
|
|
|
DBUG_ASSERT(0);
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
MDEV-27009 Add UCA-14.0.0 collations
- Added one neutral and 22 tailored (language specific) collations based on
Unicode Collation Algorithm version 14.0.0.
Collations were added for Unicode character sets
utf8mb3, utf8mb4, ucs2, utf16, utf32.
Every tailoring was added with four accent and case
sensitivity flag combinations, e.g:
* utf8mb4_uca1400_swedish_as_cs
* utf8mb4_uca1400_swedish_as_ci
* utf8mb4_uca1400_swedish_ai_cs
* utf8mb4_uca1400_swedish_ai_ci
and their _nopad_ variants:
* utf8mb4_uca1400_swedish_nopad_as_cs
* utf8mb4_uca1400_swedish_nopad_as_ci
* utf8mb4_uca1400_swedish_nopad_ai_cs
* utf8mb4_uca1400_swedish_nopad_ai_ci
- Introducing a conception of contextually typed named collations:
CREATE DATABASE db1 CHARACTER SET utf8mb4;
CREATE TABLE db1.t1 (a CHAR(10) COLLATE uca1400_as_ci);
The idea is that there is no a need to specify the character set prefix
in the new collation names. It's enough to type just the suffix
"uca1400_as_ci". The character set is taken from the context.
In the above example script the context character set is utf8mb4.
So the CREATE TABLE will make a column with the collation
utf8mb4_uca1400_as_ci.
Short collations names can be used in any parts of the SQL syntax
where the COLLATE clause is understood.
- New collations are displayed only one time
(without character set combinations) by these statements:
SELECT * FROM INFORMATION_SCHEMA.COLLATIONS;
SHOW COLLATION;
For example, all these collations:
- utf8mb3_uca1400_swedish_as_ci
- utf8mb4_uca1400_swedish_as_ci
- ucs2_uca1400_swedish_as_ci
- utf16_uca1400_swedish_as_ci
- utf32_uca1400_swedish_as_ci
have just one entry in INFORMATION_SCHEMA.COLLATIONS and SHOW COLLATION,
with COLLATION_NAME equal to "uca1400_swedish_as_ci", which is the suffix
without the character set name:
SELECT COLLATION_NAME FROM INFORMATION_SCHEMA.COLLATIONS
WHERE COLLATION_NAME LIKE '%uca1400_swedish_as_ci';
+-----------------------+
| COLLATION_NAME |
+-----------------------+
| uca1400_swedish_as_ci |
+-----------------------+
Note, the behaviour of old collations did not change.
Non-unicode collations (e.g. latin1_swedish_ci) and
old UCA-4.0.0 collations (e.g. utf8mb4_unicode_ci)
are still displayed with the character set prefix, as before.
- The structure of the table INFORMATION_SCHEMA.COLLATIONS was changed.
The NOT NULL constraint was removed from these columns:
- CHARACTER_SET_NAME
- ID
- IS_DEFAULT
and from the corresponding columns in SHOW COLLATION.
For example:
SELECT COLLATION_NAME, CHARACTER_SET_NAME, ID, IS_DEFAULT
FROM INFORMATION_SCHEMA.COLLATIONS
WHERE COLLATION_NAME LIKE '%uca1400_swedish_as_ci';
+-----------------------+--------------------+------+------------+
| COLLATION_NAME | CHARACTER_SET_NAME | ID | IS_DEFAULT |
+-----------------------+--------------------+------+------------+
| uca1400_swedish_as_ci | NULL | NULL | NULL |
+-----------------------+--------------------+------+------------+
The NULL value in these columns now means that the collation
is applicable to multiple character sets.
The behavioir of old collations did not change.
Make sure your client programs can handle NULL values in these columns.
- The structure of the table
INFORMATION_SCHEMA.COLLATION_CHARACTER_SET_APPLICABILITY was changed.
Three new NOT NULL columns were added:
- FULL_COLLATION_NAME
- ID
- IS_DEFAULT
New collations have multiple entries in COLLATION_CHARACTER_SET_APPLICABILITY.
The column COLLATION_NAME contains the collation name without the character
set prefix. The column FULL_COLLATION_NAME contains the collation name with
the character set prefix.
Old collations have full collation name in both FULL_COLLATION_NAME and
COLLATION_NAME.
SELECT COLLATION_NAME, FULL_COLLATION_NAME, CHARACTER_SET_NAME, ID, IS_DEFAULT
FROM INFORMATION_SCHEMA.COLLATION_CHARACTER_SET_APPLICABILITY
WHERE FULL_COLLATION_NAME RLIKE '^(utf8mb4|latin1).*swedish.*ci$';
+-----------------------------+-------------------------------------+--------------------+------+------------+
| COLLATION_NAME | FULL_COLLATION_NAME | CHARACTER_SET_NAME | ID | IS_DEFAULT |
+-----------------------------+-------------------------------------+--------------------+------+------------+
| latin1_swedish_ci | latin1_swedish_ci | latin1 | 8 | Yes |
| latin1_swedish_nopad_ci | latin1_swedish_nopad_ci | latin1 | 1032 | |
| utf8mb4_swedish_ci | utf8mb4_swedish_ci | utf8mb4 | 232 | |
| uca1400_swedish_ai_ci | utf8mb4_uca1400_swedish_ai_ci | utf8mb4 | 2368 | |
| uca1400_swedish_as_ci | utf8mb4_uca1400_swedish_as_ci | utf8mb4 | 2370 | |
| uca1400_swedish_nopad_ai_ci | utf8mb4_uca1400_swedish_nopad_ai_ci | utf8mb4 | 2372 | |
| uca1400_swedish_nopad_as_ci | utf8mb4_uca1400_swedish_nopad_as_ci | utf8mb4 | 2374 | |
+-----------------------------+-------------------------------------+--------------------+------+------------+
- Other INFORMATION_SCHEMA queries:
SELECT COLLATION_NAME FROM INFORMATION_SCHEMA.COLUMNS;
SELECT COLLATION_NAME FROM INFORMATION_SCHEMA.PARAMETERS;
SELECT TABLE_COLLATION FROM INFORMATION_SCHEMA.TABLES;
SELECT DEFAULT_COLLATION_NAME FROM INFORMATION_SCHEMA.SCHEMATA;
SELECT COLLATION_NAME FROM INFORMATION_SCHEMA.ROUTINES;
SELECT COLLATION_CONNECTION FROM INFORMATION_SCHEMA.EVENTS;
SELECT DATABASE_COLLATION FROM INFORMATION_SCHEMA.EVENTS;
SELECT COLLATION_CONNECTION FROM INFORMATION_SCHEMA.ROUTINES;
SELECT DATABASE_COLLATION FROM INFORMATION_SCHEMA.ROUTINES;
SELECT COLLATION_CONNECTION FROM INFORMATION_SCHEMA.TRIGGERS;
SELECT DATABASE_COLLATION FROM INFORMATION_SCHEMA.TRIGGERS;
SELECT COLLATION_CONNECTION FROM INFORMATION_SCHEMA.VIEWS;
display full collation names, including character sets prefix,
for all collations, including new collations.
Corresponding SHOW commands also display full collation names
in collation related columns:
SHOW CREATE TABLE t1;
SHOW CREATE DATABASE db1;
SHOW TABLE STATUS;
SHOW CREATE FUNCTION f1;
SHOW CREATE PROCEDURE p1;
SHOW CREATE EVENT ev1;
SHOW CREATE TRIGGER tr1;
SHOW CREATE VIEW;
These INFORMATION_SCHEMA queries and SHOW statements may change in
the future, to display show collation names.
2021-11-28 13:55:15 +01:00
|
|
|
LEX_CSTRING Lex_context_collation::collation_name_for_show() const
|
|
|
|
{
|
|
|
|
if (is_contextually_typed_collate_default())
|
|
|
|
return LEX_CSTRING({STRING_WITH_LEN("DEFAULT")});
|
|
|
|
if (is_contextually_typed_binary_style())
|
|
|
|
return LEX_CSTRING({STRING_WITH_LEN("BINARY")});
|
|
|
|
return collation_name_context_suffix();
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
bool Lex_extended_collation_st::set_by_name(const char *name, myf my_flags)
|
|
|
|
{
|
|
|
|
Charset_loader_server loader;
|
|
|
|
CHARSET_INFO *cs;
|
|
|
|
|
|
|
|
if (!strncasecmp(name, STRING_WITH_LEN("uca1400_")))
|
|
|
|
{
|
|
|
|
if (!(cs= loader.get_context_collation_or_error(name, my_flags)))
|
|
|
|
return true;
|
|
|
|
|
|
|
|
*this= Lex_extended_collation(Lex_context_collation(cs));
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!(cs= loader.get_exact_collation_or_error(name, my_flags)))
|
|
|
|
return true;
|
|
|
|
|
|
|
|
*this= Lex_extended_collation(Lex_exact_collation(cs));
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2022-02-09 18:21:39 +01:00
|
|
|
/** find a collation with binary comparison rules
|
|
|
|
*/
|
2022-05-23 09:05:33 +02:00
|
|
|
CHARSET_INFO *Lex_exact_charset_opt_extended_collate::find_bin_collation() const
|
2022-02-09 18:21:39 +01:00
|
|
|
{
|
|
|
|
/*
|
|
|
|
We don't need to handle old_mode=UTF8_IS_UTF8MB3 here,
|
2022-05-23 09:05:33 +02:00
|
|
|
because "m_ci" points to a real character set name.
|
2022-02-09 18:21:39 +01:00
|
|
|
It can be either "utf8mb3" or "utf8mb4". It cannot be "utf8".
|
|
|
|
No thd->get_utf8_flag() flag passed to get_charset_by_csname().
|
|
|
|
*/
|
2022-05-23 09:05:33 +02:00
|
|
|
DBUG_ASSERT(m_ci->cs_name.length !=4 || memcmp(m_ci->cs_name.str, "utf8", 4));
|
2022-02-09 18:21:39 +01:00
|
|
|
/*
|
|
|
|
CREATE TABLE t1 (a CHAR(10) BINARY)
|
|
|
|
CHARACTER SET utf8mb4 COLLATE utf8mb4_bin;
|
|
|
|
Nothing to do, we have the binary collation already.
|
|
|
|
*/
|
2022-05-23 09:05:33 +02:00
|
|
|
if (m_ci->state & MY_CS_BINSORT)
|
|
|
|
return m_ci;
|
2022-02-09 18:21:39 +01:00
|
|
|
|
|
|
|
// CREATE TABLE t1 (a CHAR(10) BINARY) CHARACTER SET utf8mb4;
|
2022-05-23 09:05:33 +02:00
|
|
|
CHARSET_INFO *cs;
|
|
|
|
if (!(cs= get_charset_by_csname(m_ci->cs_name.str, MY_CS_BINSORT, MYF(0))))
|
2022-02-09 18:21:39 +01:00
|
|
|
{
|
|
|
|
char tmp[65];
|
2022-05-23 09:05:33 +02:00
|
|
|
strxnmov(tmp, sizeof(tmp)-1, m_ci->cs_name.str, "_bin", NULL);
|
2022-02-09 18:21:39 +01:00
|
|
|
my_error(ER_UNKNOWN_COLLATION, MYF(0), tmp);
|
|
|
|
}
|
|
|
|
return cs;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2022-05-23 09:05:33 +02:00
|
|
|
CHARSET_INFO *
|
MDEV-30164 System variable for default collations
This patch adds a way to override default collations
(or "character set collations") for desired character sets.
The SQL standard says:
> Each collation known in an SQL-environment is applicable to one
> or more character sets, and for each character set, one or more
> collations are applicable to it, one of which is associated with
> it as its character set collation.
In MariaDB, character set collations has been hard-coded so far,
e.g. utf8mb4_general_ci has been a hard-coded character set collation
for utf8mb4.
This patch allows to override (globally per server, or per session)
character set collations, so for example, uca1400_ai_ci can be set as a
character set collation for Unicode character sets
(instead of compiled xxx_general_ci).
The array of overridden character set collations is stored in a new
(session and global) system variable @@character_set_collations and
can be set as a comma separated list of charset=collation pairs, e.g.:
SET @@character_set_collations='utf8mb3=uca1400_ai_ci,utf8mb4=uca1400_ai_ci';
The variable is empty by default, which mean use the hard-coded
character set collations (e.g. utf8mb4_general_ci for utf8mb4).
The variable can also be set globally by passing to the server startup command
line, and/or in my.cnf.
2022-12-14 15:46:27 +01:00
|
|
|
Lex_exact_charset_opt_extended_collate::find_compiled_default_collation() const
|
2022-02-09 18:21:39 +01:00
|
|
|
{
|
|
|
|
// See comments in find_bin_collation()
|
2022-05-23 09:05:33 +02:00
|
|
|
DBUG_ASSERT(m_ci->cs_name.length !=4 || memcmp(m_ci->cs_name.str, "utf8", 4));
|
2022-02-09 18:21:39 +01:00
|
|
|
/*
|
|
|
|
CREATE TABLE t1 (a CHAR(10) COLLATE DEFAULT) CHARACTER SET utf8mb4;
|
|
|
|
Nothing to do, we have the default collation already.
|
|
|
|
*/
|
2022-05-23 09:05:33 +02:00
|
|
|
if (m_ci->state & MY_CS_PRIMARY)
|
|
|
|
return m_ci;
|
2022-02-09 18:21:39 +01:00
|
|
|
/*
|
|
|
|
CREATE TABLE t1 (a CHAR(10) COLLATE DEFAULT)
|
|
|
|
CHARACTER SET utf8mb4 COLLATE utf8mb4_bin;
|
|
|
|
|
|
|
|
Don't need to handle old_mode=UTF8_IS_UTF8MB3 here.
|
|
|
|
See comments in find_bin_collation.
|
|
|
|
*/
|
2022-05-23 09:05:33 +02:00
|
|
|
CHARSET_INFO *cs= get_charset_by_csname(m_ci->cs_name.str,
|
|
|
|
MY_CS_PRIMARY, MYF(MY_WME));
|
2022-02-09 18:21:39 +01:00
|
|
|
/*
|
|
|
|
The above should never fail, as we have default collations for
|
|
|
|
all character sets.
|
|
|
|
*/
|
|
|
|
DBUG_ASSERT(cs);
|
|
|
|
return cs;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
MDEV-30164 System variable for default collations
This patch adds a way to override default collations
(or "character set collations") for desired character sets.
The SQL standard says:
> Each collation known in an SQL-environment is applicable to one
> or more character sets, and for each character set, one or more
> collations are applicable to it, one of which is associated with
> it as its character set collation.
In MariaDB, character set collations has been hard-coded so far,
e.g. utf8mb4_general_ci has been a hard-coded character set collation
for utf8mb4.
This patch allows to override (globally per server, or per session)
character set collations, so for example, uca1400_ai_ci can be set as a
character set collation for Unicode character sets
(instead of compiled xxx_general_ci).
The array of overridden character set collations is stored in a new
(session and global) system variable @@character_set_collations and
can be set as a comma separated list of charset=collation pairs, e.g.:
SET @@character_set_collations='utf8mb3=uca1400_ai_ci,utf8mb4=uca1400_ai_ci';
The variable is empty by default, which mean use the hard-coded
character set collations (e.g. utf8mb4_general_ci for utf8mb4).
The variable can also be set globally by passing to the server startup command
line, and/or in my.cnf.
2022-12-14 15:46:27 +01:00
|
|
|
CHARSET_INFO *
|
|
|
|
Lex_exact_charset_opt_extended_collate::
|
|
|
|
find_mapped_default_collation(Sql_used *used,
|
|
|
|
const Charset_collation_map_st &map) const
|
|
|
|
{
|
|
|
|
CHARSET_INFO *cs= find_compiled_default_collation();
|
|
|
|
if (!cs)
|
|
|
|
return nullptr;
|
|
|
|
return map.get_collation_for_charset(used, cs);
|
|
|
|
}
|
|
|
|
|
2022-02-09 18:21:39 +01:00
|
|
|
/*
|
|
|
|
Resolve an empty or a contextually typed collation according to the
|
|
|
|
upper level default character set (and optionally a collation), e.g.:
|
|
|
|
CREATE TABLE t1 (a CHAR(10)) CHARACTER SET latin1;
|
|
|
|
CREATE TABLE t1 (a CHAR(10) BINARY) CHARACTER SET latin1;
|
|
|
|
CREATE TABLE t1 (a CHAR(10) COLLATE DEFAULT)
|
|
|
|
CHARACTER SET latin1 COLLATE latin1_bin;
|
|
|
|
|
|
|
|
"this" is the COLLATE clause (e.g. of a column)
|
|
|
|
"def" is the upper level CHARACTER SET clause (e.g. of a table)
|
|
|
|
*/
|
2022-05-23 06:40:26 +02:00
|
|
|
CHARSET_INFO *Lex_exact_charset_extended_collation_attrs_st::
|
MDEV-30164 System variable for default collations
This patch adds a way to override default collations
(or "character set collations") for desired character sets.
The SQL standard says:
> Each collation known in an SQL-environment is applicable to one
> or more character sets, and for each character set, one or more
> collations are applicable to it, one of which is associated with
> it as its character set collation.
In MariaDB, character set collations has been hard-coded so far,
e.g. utf8mb4_general_ci has been a hard-coded character set collation
for utf8mb4.
This patch allows to override (globally per server, or per session)
character set collations, so for example, uca1400_ai_ci can be set as a
character set collation for Unicode character sets
(instead of compiled xxx_general_ci).
The array of overridden character set collations is stored in a new
(session and global) system variable @@character_set_collations and
can be set as a comma separated list of charset=collation pairs, e.g.:
SET @@character_set_collations='utf8mb3=uca1400_ai_ci,utf8mb4=uca1400_ai_ci';
The variable is empty by default, which mean use the hard-coded
character set collations (e.g. utf8mb4_general_ci for utf8mb4).
The variable can also be set globally by passing to the server startup command
line, and/or in my.cnf.
2022-12-14 15:46:27 +01:00
|
|
|
resolved_to_character_set(Sql_used *used,
|
|
|
|
const Charset_collation_map_st &map,
|
|
|
|
CHARSET_INFO *def) const
|
2022-02-09 18:21:39 +01:00
|
|
|
{
|
|
|
|
DBUG_ASSERT(def);
|
|
|
|
|
|
|
|
switch (m_type) {
|
|
|
|
case TYPE_EMPTY:
|
|
|
|
return def;
|
|
|
|
case TYPE_CHARACTER_SET:
|
MDEV-30164 System variable for default collations
This patch adds a way to override default collations
(or "character set collations") for desired character sets.
The SQL standard says:
> Each collation known in an SQL-environment is applicable to one
> or more character sets, and for each character set, one or more
> collations are applicable to it, one of which is associated with
> it as its character set collation.
In MariaDB, character set collations has been hard-coded so far,
e.g. utf8mb4_general_ci has been a hard-coded character set collation
for utf8mb4.
This patch allows to override (globally per server, or per session)
character set collations, so for example, uca1400_ai_ci can be set as a
character set collation for Unicode character sets
(instead of compiled xxx_general_ci).
The array of overridden character set collations is stored in a new
(session and global) system variable @@character_set_collations and
can be set as a comma separated list of charset=collation pairs, e.g.:
SET @@character_set_collations='utf8mb3=uca1400_ai_ci,utf8mb4=uca1400_ai_ci';
The variable is empty by default, which mean use the hard-coded
character set collations (e.g. utf8mb4_general_ci for utf8mb4).
The variable can also be set globally by passing to the server startup command
line, and/or in my.cnf.
2022-12-14 15:46:27 +01:00
|
|
|
{
|
|
|
|
DBUG_ASSERT(m_ci);
|
|
|
|
return map.get_collation_for_charset(used, m_ci);
|
|
|
|
}
|
2022-05-25 09:07:04 +02:00
|
|
|
case TYPE_CHARACTER_SET_COLLATE_EXACT:
|
2022-02-09 18:21:39 +01:00
|
|
|
case TYPE_COLLATE_EXACT:
|
|
|
|
DBUG_ASSERT(m_ci);
|
|
|
|
return m_ci;
|
|
|
|
case TYPE_COLLATE_CONTEXTUALLY_TYPED:
|
2022-05-23 09:05:33 +02:00
|
|
|
{
|
|
|
|
Lex_exact_charset_opt_extended_collate tmp(def, true);
|
MDEV-30164 System variable for default collations
This patch adds a way to override default collations
(or "character set collations") for desired character sets.
The SQL standard says:
> Each collation known in an SQL-environment is applicable to one
> or more character sets, and for each character set, one or more
> collations are applicable to it, one of which is associated with
> it as its character set collation.
In MariaDB, character set collations has been hard-coded so far,
e.g. utf8mb4_general_ci has been a hard-coded character set collation
for utf8mb4.
This patch allows to override (globally per server, or per session)
character set collations, so for example, uca1400_ai_ci can be set as a
character set collation for Unicode character sets
(instead of compiled xxx_general_ci).
The array of overridden character set collations is stored in a new
(session and global) system variable @@character_set_collations and
can be set as a comma separated list of charset=collation pairs, e.g.:
SET @@character_set_collations='utf8mb3=uca1400_ai_ci,utf8mb4=uca1400_ai_ci';
The variable is empty by default, which mean use the hard-coded
character set collations (e.g. utf8mb4_general_ci for utf8mb4).
The variable can also be set globally by passing to the server startup command
line, and/or in my.cnf.
2022-12-14 15:46:27 +01:00
|
|
|
if (tmp.merge_context_collation_override(used, map, Lex_context_collation(m_ci)))
|
2022-05-23 09:05:33 +02:00
|
|
|
return NULL;
|
|
|
|
return tmp.collation().charset_info();
|
|
|
|
}
|
2022-02-09 18:21:39 +01:00
|
|
|
}
|
|
|
|
DBUG_ASSERT(0);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2022-05-23 06:40:26 +02:00
|
|
|
bool Lex_exact_charset_extended_collation_attrs_st::
|
2022-05-23 09:05:33 +02:00
|
|
|
merge_exact_collation(const Lex_exact_collation &cl)
|
2022-02-09 18:21:39 +01:00
|
|
|
{
|
|
|
|
switch (m_type) {
|
|
|
|
case TYPE_EMPTY:
|
|
|
|
/*
|
|
|
|
No CHARACTER SET clause
|
|
|
|
CHAR(10) NOT NULL COLLATE latin1_bin
|
|
|
|
*/
|
2022-05-23 09:05:33 +02:00
|
|
|
*this= Lex_exact_charset_extended_collation_attrs(cl);
|
2022-02-09 18:21:39 +01:00
|
|
|
return false;
|
|
|
|
case TYPE_CHARACTER_SET:
|
|
|
|
{
|
2022-05-23 09:05:33 +02:00
|
|
|
// CHARACTER SET latin1 .. COLLATE latin1_swedish_ci
|
2022-05-25 09:07:04 +02:00
|
|
|
Lex_exact_charset_opt_extended_collate tmp(m_ci, false);
|
|
|
|
if (tmp.merge_exact_collation(cl))
|
2022-02-09 18:21:39 +01:00
|
|
|
return true;
|
2022-05-25 09:07:04 +02:00
|
|
|
*this= Lex_exact_charset_extended_collation_attrs(tmp);
|
2022-02-09 18:21:39 +01:00
|
|
|
return false;
|
|
|
|
}
|
2022-05-25 09:07:04 +02:00
|
|
|
case TYPE_CHARACTER_SET_COLLATE_EXACT:
|
2022-05-23 09:05:33 +02:00
|
|
|
case TYPE_COLLATE_EXACT:
|
|
|
|
{
|
|
|
|
// [CHARACTER SET latin1] COLLATE latin1_bin .. COLLATE latin1_bin
|
|
|
|
return Lex_exact_collation(m_ci).raise_if_not_equal(cl);
|
|
|
|
}
|
2022-02-09 18:21:39 +01:00
|
|
|
case TYPE_COLLATE_CONTEXTUALLY_TYPED:
|
|
|
|
{
|
2022-05-23 09:05:33 +02:00
|
|
|
// COLLATE DEFAULT .. COLLATE latin1_swedish_ci
|
|
|
|
if (cl.raise_if_conflicts_with_context_collation(
|
|
|
|
Lex_context_collation(m_ci), true))
|
|
|
|
return true;
|
|
|
|
*this= Lex_exact_charset_extended_collation_attrs(cl);
|
2022-02-09 18:21:39 +01:00
|
|
|
return false;
|
|
|
|
}
|
2022-05-23 09:05:33 +02:00
|
|
|
}
|
|
|
|
DBUG_ASSERT(0);
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2022-02-09 18:21:39 +01:00
|
|
|
|
2022-05-23 09:05:33 +02:00
|
|
|
bool Lex_exact_charset_extended_collation_attrs_st::
|
MDEV-30164 System variable for default collations
This patch adds a way to override default collations
(or "character set collations") for desired character sets.
The SQL standard says:
> Each collation known in an SQL-environment is applicable to one
> or more character sets, and for each character set, one or more
> collations are applicable to it, one of which is associated with
> it as its character set collation.
In MariaDB, character set collations has been hard-coded so far,
e.g. utf8mb4_general_ci has been a hard-coded character set collation
for utf8mb4.
This patch allows to override (globally per server, or per session)
character set collations, so for example, uca1400_ai_ci can be set as a
character set collation for Unicode character sets
(instead of compiled xxx_general_ci).
The array of overridden character set collations is stored in a new
(session and global) system variable @@character_set_collations and
can be set as a comma separated list of charset=collation pairs, e.g.:
SET @@character_set_collations='utf8mb3=uca1400_ai_ci,utf8mb4=uca1400_ai_ci';
The variable is empty by default, which mean use the hard-coded
character set collations (e.g. utf8mb4_general_ci for utf8mb4).
The variable can also be set globally by passing to the server startup command
line, and/or in my.cnf.
2022-12-14 15:46:27 +01:00
|
|
|
merge_context_collation(Sql_used *used,
|
|
|
|
const Charset_collation_map_st &map,
|
|
|
|
const Lex_context_collation &cl)
|
2022-05-23 09:05:33 +02:00
|
|
|
{
|
|
|
|
switch (m_type) {
|
|
|
|
case TYPE_EMPTY:
|
2022-02-09 18:21:39 +01:00
|
|
|
/*
|
2022-05-23 09:05:33 +02:00
|
|
|
No CHARACTER SET clause
|
|
|
|
CHAR(10) NOT NULL .. COLLATE DEFAULT
|
2022-02-09 18:21:39 +01:00
|
|
|
*/
|
2022-05-23 09:05:33 +02:00
|
|
|
*this= Lex_exact_charset_extended_collation_attrs(cl);
|
|
|
|
return false;
|
|
|
|
case TYPE_CHARACTER_SET:
|
2022-02-09 18:21:39 +01:00
|
|
|
{
|
2022-05-23 09:05:33 +02:00
|
|
|
// CHARACTER SET latin1 .. COLLATE DEFAULT
|
|
|
|
Lex_exact_charset_opt_extended_collate tmp(m_ci, false);
|
MDEV-30164 System variable for default collations
This patch adds a way to override default collations
(or "character set collations") for desired character sets.
The SQL standard says:
> Each collation known in an SQL-environment is applicable to one
> or more character sets, and for each character set, one or more
> collations are applicable to it, one of which is associated with
> it as its character set collation.
In MariaDB, character set collations has been hard-coded so far,
e.g. utf8mb4_general_ci has been a hard-coded character set collation
for utf8mb4.
This patch allows to override (globally per server, or per session)
character set collations, so for example, uca1400_ai_ci can be set as a
character set collation for Unicode character sets
(instead of compiled xxx_general_ci).
The array of overridden character set collations is stored in a new
(session and global) system variable @@character_set_collations and
can be set as a comma separated list of charset=collation pairs, e.g.:
SET @@character_set_collations='utf8mb3=uca1400_ai_ci,utf8mb4=uca1400_ai_ci';
The variable is empty by default, which mean use the hard-coded
character set collations (e.g. utf8mb4_general_ci for utf8mb4).
The variable can also be set globally by passing to the server startup command
line, and/or in my.cnf.
2022-12-14 15:46:27 +01:00
|
|
|
if (tmp.merge_context_collation(used, map, cl))
|
2022-05-23 09:05:33 +02:00
|
|
|
return true;
|
2022-05-25 09:07:04 +02:00
|
|
|
*this= Lex_exact_charset_extended_collation_attrs(tmp);
|
2022-05-23 09:05:33 +02:00
|
|
|
return false;
|
2022-02-09 18:21:39 +01:00
|
|
|
}
|
2022-05-25 09:07:04 +02:00
|
|
|
case TYPE_CHARACTER_SET_COLLATE_EXACT:
|
2022-05-23 09:05:33 +02:00
|
|
|
case TYPE_COLLATE_EXACT:
|
|
|
|
// [CHARACTER SET latin1] COLLATE latin1_swedish_ci .. COLLATE DEFAULT
|
|
|
|
return Lex_exact_collation(m_ci).
|
|
|
|
raise_if_conflicts_with_context_collation(cl, false);
|
|
|
|
case TYPE_COLLATE_CONTEXTUALLY_TYPED:
|
|
|
|
// COLLATE DEFAULT .. COLLATE DEFAULT
|
|
|
|
return Lex_context_collation(m_ci).raise_if_not_equal(cl);
|
2022-02-09 18:21:39 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
DBUG_ASSERT(0);
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2022-05-23 06:40:26 +02:00
|
|
|
bool Lex_exact_charset_opt_extended_collate::
|
2022-05-23 09:05:33 +02:00
|
|
|
merge_exact_collation(const Lex_exact_collation &cl)
|
2022-02-09 18:21:39 +01:00
|
|
|
{
|
2022-05-23 09:05:33 +02:00
|
|
|
// CHARACTER SET latin1 [COLLATE latin1_bin] .. COLLATE latin1_bin
|
|
|
|
if (m_with_collate)
|
|
|
|
return Lex_exact_collation(m_ci).raise_if_not_equal(cl);
|
MDEV-27009 Add UCA-14.0.0 collations
- Added one neutral and 22 tailored (language specific) collations based on
Unicode Collation Algorithm version 14.0.0.
Collations were added for Unicode character sets
utf8mb3, utf8mb4, ucs2, utf16, utf32.
Every tailoring was added with four accent and case
sensitivity flag combinations, e.g:
* utf8mb4_uca1400_swedish_as_cs
* utf8mb4_uca1400_swedish_as_ci
* utf8mb4_uca1400_swedish_ai_cs
* utf8mb4_uca1400_swedish_ai_ci
and their _nopad_ variants:
* utf8mb4_uca1400_swedish_nopad_as_cs
* utf8mb4_uca1400_swedish_nopad_as_ci
* utf8mb4_uca1400_swedish_nopad_ai_cs
* utf8mb4_uca1400_swedish_nopad_ai_ci
- Introducing a conception of contextually typed named collations:
CREATE DATABASE db1 CHARACTER SET utf8mb4;
CREATE TABLE db1.t1 (a CHAR(10) COLLATE uca1400_as_ci);
The idea is that there is no a need to specify the character set prefix
in the new collation names. It's enough to type just the suffix
"uca1400_as_ci". The character set is taken from the context.
In the above example script the context character set is utf8mb4.
So the CREATE TABLE will make a column with the collation
utf8mb4_uca1400_as_ci.
Short collations names can be used in any parts of the SQL syntax
where the COLLATE clause is understood.
- New collations are displayed only one time
(without character set combinations) by these statements:
SELECT * FROM INFORMATION_SCHEMA.COLLATIONS;
SHOW COLLATION;
For example, all these collations:
- utf8mb3_uca1400_swedish_as_ci
- utf8mb4_uca1400_swedish_as_ci
- ucs2_uca1400_swedish_as_ci
- utf16_uca1400_swedish_as_ci
- utf32_uca1400_swedish_as_ci
have just one entry in INFORMATION_SCHEMA.COLLATIONS and SHOW COLLATION,
with COLLATION_NAME equal to "uca1400_swedish_as_ci", which is the suffix
without the character set name:
SELECT COLLATION_NAME FROM INFORMATION_SCHEMA.COLLATIONS
WHERE COLLATION_NAME LIKE '%uca1400_swedish_as_ci';
+-----------------------+
| COLLATION_NAME |
+-----------------------+
| uca1400_swedish_as_ci |
+-----------------------+
Note, the behaviour of old collations did not change.
Non-unicode collations (e.g. latin1_swedish_ci) and
old UCA-4.0.0 collations (e.g. utf8mb4_unicode_ci)
are still displayed with the character set prefix, as before.
- The structure of the table INFORMATION_SCHEMA.COLLATIONS was changed.
The NOT NULL constraint was removed from these columns:
- CHARACTER_SET_NAME
- ID
- IS_DEFAULT
and from the corresponding columns in SHOW COLLATION.
For example:
SELECT COLLATION_NAME, CHARACTER_SET_NAME, ID, IS_DEFAULT
FROM INFORMATION_SCHEMA.COLLATIONS
WHERE COLLATION_NAME LIKE '%uca1400_swedish_as_ci';
+-----------------------+--------------------+------+------------+
| COLLATION_NAME | CHARACTER_SET_NAME | ID | IS_DEFAULT |
+-----------------------+--------------------+------+------------+
| uca1400_swedish_as_ci | NULL | NULL | NULL |
+-----------------------+--------------------+------+------------+
The NULL value in these columns now means that the collation
is applicable to multiple character sets.
The behavioir of old collations did not change.
Make sure your client programs can handle NULL values in these columns.
- The structure of the table
INFORMATION_SCHEMA.COLLATION_CHARACTER_SET_APPLICABILITY was changed.
Three new NOT NULL columns were added:
- FULL_COLLATION_NAME
- ID
- IS_DEFAULT
New collations have multiple entries in COLLATION_CHARACTER_SET_APPLICABILITY.
The column COLLATION_NAME contains the collation name without the character
set prefix. The column FULL_COLLATION_NAME contains the collation name with
the character set prefix.
Old collations have full collation name in both FULL_COLLATION_NAME and
COLLATION_NAME.
SELECT COLLATION_NAME, FULL_COLLATION_NAME, CHARACTER_SET_NAME, ID, IS_DEFAULT
FROM INFORMATION_SCHEMA.COLLATION_CHARACTER_SET_APPLICABILITY
WHERE FULL_COLLATION_NAME RLIKE '^(utf8mb4|latin1).*swedish.*ci$';
+-----------------------------+-------------------------------------+--------------------+------+------------+
| COLLATION_NAME | FULL_COLLATION_NAME | CHARACTER_SET_NAME | ID | IS_DEFAULT |
+-----------------------------+-------------------------------------+--------------------+------+------------+
| latin1_swedish_ci | latin1_swedish_ci | latin1 | 8 | Yes |
| latin1_swedish_nopad_ci | latin1_swedish_nopad_ci | latin1 | 1032 | |
| utf8mb4_swedish_ci | utf8mb4_swedish_ci | utf8mb4 | 232 | |
| uca1400_swedish_ai_ci | utf8mb4_uca1400_swedish_ai_ci | utf8mb4 | 2368 | |
| uca1400_swedish_as_ci | utf8mb4_uca1400_swedish_as_ci | utf8mb4 | 2370 | |
| uca1400_swedish_nopad_ai_ci | utf8mb4_uca1400_swedish_nopad_ai_ci | utf8mb4 | 2372 | |
| uca1400_swedish_nopad_as_ci | utf8mb4_uca1400_swedish_nopad_as_ci | utf8mb4 | 2374 | |
+-----------------------------+-------------------------------------+--------------------+------+------------+
- Other INFORMATION_SCHEMA queries:
SELECT COLLATION_NAME FROM INFORMATION_SCHEMA.COLUMNS;
SELECT COLLATION_NAME FROM INFORMATION_SCHEMA.PARAMETERS;
SELECT TABLE_COLLATION FROM INFORMATION_SCHEMA.TABLES;
SELECT DEFAULT_COLLATION_NAME FROM INFORMATION_SCHEMA.SCHEMATA;
SELECT COLLATION_NAME FROM INFORMATION_SCHEMA.ROUTINES;
SELECT COLLATION_CONNECTION FROM INFORMATION_SCHEMA.EVENTS;
SELECT DATABASE_COLLATION FROM INFORMATION_SCHEMA.EVENTS;
SELECT COLLATION_CONNECTION FROM INFORMATION_SCHEMA.ROUTINES;
SELECT DATABASE_COLLATION FROM INFORMATION_SCHEMA.ROUTINES;
SELECT COLLATION_CONNECTION FROM INFORMATION_SCHEMA.TRIGGERS;
SELECT DATABASE_COLLATION FROM INFORMATION_SCHEMA.TRIGGERS;
SELECT COLLATION_CONNECTION FROM INFORMATION_SCHEMA.VIEWS;
display full collation names, including character sets prefix,
for all collations, including new collations.
Corresponding SHOW commands also display full collation names
in collation related columns:
SHOW CREATE TABLE t1;
SHOW CREATE DATABASE db1;
SHOW TABLE STATUS;
SHOW CREATE FUNCTION f1;
SHOW CREATE PROCEDURE p1;
SHOW CREATE EVENT ev1;
SHOW CREATE TRIGGER tr1;
SHOW CREATE VIEW;
These INFORMATION_SCHEMA queries and SHOW statements may change in
the future, to display show collation names.
2021-11-28 13:55:15 +01:00
|
|
|
return merge_exact_collation_override(cl);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
bool Lex_exact_charset_opt_extended_collate::
|
|
|
|
merge_exact_collation_override(const Lex_exact_collation &cl)
|
|
|
|
{
|
|
|
|
// CHARACTER SET latin1 [COLLATE latin1_bin] .. COLLATE latin1_bin
|
2022-05-23 09:05:33 +02:00
|
|
|
if (raise_if_not_applicable(cl))
|
|
|
|
return true;
|
|
|
|
*this= Lex_exact_charset_opt_extended_collate(cl);
|
|
|
|
return false;
|
|
|
|
}
|
2022-02-09 18:21:39 +01:00
|
|
|
|
|
|
|
|
2022-05-23 09:05:33 +02:00
|
|
|
bool Lex_exact_charset_opt_extended_collate::
|
MDEV-30164 System variable for default collations
This patch adds a way to override default collations
(or "character set collations") for desired character sets.
The SQL standard says:
> Each collation known in an SQL-environment is applicable to one
> or more character sets, and for each character set, one or more
> collations are applicable to it, one of which is associated with
> it as its character set collation.
In MariaDB, character set collations has been hard-coded so far,
e.g. utf8mb4_general_ci has been a hard-coded character set collation
for utf8mb4.
This patch allows to override (globally per server, or per session)
character set collations, so for example, uca1400_ai_ci can be set as a
character set collation for Unicode character sets
(instead of compiled xxx_general_ci).
The array of overridden character set collations is stored in a new
(session and global) system variable @@character_set_collations and
can be set as a comma separated list of charset=collation pairs, e.g.:
SET @@character_set_collations='utf8mb3=uca1400_ai_ci,utf8mb4=uca1400_ai_ci';
The variable is empty by default, which mean use the hard-coded
character set collations (e.g. utf8mb4_general_ci for utf8mb4).
The variable can also be set globally by passing to the server startup command
line, and/or in my.cnf.
2022-12-14 15:46:27 +01:00
|
|
|
merge_context_collation(Sql_used *used,
|
|
|
|
const Charset_collation_map_st &map,
|
|
|
|
const Lex_context_collation &cl)
|
2022-05-23 09:05:33 +02:00
|
|
|
{
|
|
|
|
// CHARACTER SET latin1 [COLLATE latin1_bin] .. COLLATE DEFAULT
|
|
|
|
if (m_with_collate)
|
|
|
|
return Lex_exact_collation(m_ci).
|
|
|
|
raise_if_conflicts_with_context_collation(cl, false);
|
MDEV-30164 System variable for default collations
This patch adds a way to override default collations
(or "character set collations") for desired character sets.
The SQL standard says:
> Each collation known in an SQL-environment is applicable to one
> or more character sets, and for each character set, one or more
> collations are applicable to it, one of which is associated with
> it as its character set collation.
In MariaDB, character set collations has been hard-coded so far,
e.g. utf8mb4_general_ci has been a hard-coded character set collation
for utf8mb4.
This patch allows to override (globally per server, or per session)
character set collations, so for example, uca1400_ai_ci can be set as a
character set collation for Unicode character sets
(instead of compiled xxx_general_ci).
The array of overridden character set collations is stored in a new
(session and global) system variable @@character_set_collations and
can be set as a comma separated list of charset=collation pairs, e.g.:
SET @@character_set_collations='utf8mb3=uca1400_ai_ci,utf8mb4=uca1400_ai_ci';
The variable is empty by default, which mean use the hard-coded
character set collations (e.g. utf8mb4_general_ci for utf8mb4).
The variable can also be set globally by passing to the server startup command
line, and/or in my.cnf.
2022-12-14 15:46:27 +01:00
|
|
|
return merge_context_collation_override(used, map, cl);
|
2022-05-23 09:05:33 +02:00
|
|
|
}
|
2022-02-09 18:21:39 +01:00
|
|
|
|
2022-05-23 09:05:33 +02:00
|
|
|
|
|
|
|
bool Lex_exact_charset_extended_collation_attrs_st::
|
MDEV-30164 System variable for default collations
This patch adds a way to override default collations
(or "character set collations") for desired character sets.
The SQL standard says:
> Each collation known in an SQL-environment is applicable to one
> or more character sets, and for each character set, one or more
> collations are applicable to it, one of which is associated with
> it as its character set collation.
In MariaDB, character set collations has been hard-coded so far,
e.g. utf8mb4_general_ci has been a hard-coded character set collation
for utf8mb4.
This patch allows to override (globally per server, or per session)
character set collations, so for example, uca1400_ai_ci can be set as a
character set collation for Unicode character sets
(instead of compiled xxx_general_ci).
The array of overridden character set collations is stored in a new
(session and global) system variable @@character_set_collations and
can be set as a comma separated list of charset=collation pairs, e.g.:
SET @@character_set_collations='utf8mb3=uca1400_ai_ci,utf8mb4=uca1400_ai_ci';
The variable is empty by default, which mean use the hard-coded
character set collations (e.g. utf8mb4_general_ci for utf8mb4).
The variable can also be set globally by passing to the server startup command
line, and/or in my.cnf.
2022-12-14 15:46:27 +01:00
|
|
|
merge_collation(Sql_used *used,
|
|
|
|
const Charset_collation_map_st &map,
|
|
|
|
const Lex_extended_collation_st &cl)
|
2022-05-23 09:05:33 +02:00
|
|
|
{
|
|
|
|
switch (cl.type()) {
|
|
|
|
case Lex_extended_collation_st::TYPE_EXACT:
|
|
|
|
return merge_exact_collation(Lex_exact_collation(cl.charset_info()));
|
|
|
|
case Lex_extended_collation_st::TYPE_CONTEXTUALLY_TYPED:
|
MDEV-30164 System variable for default collations
This patch adds a way to override default collations
(or "character set collations") for desired character sets.
The SQL standard says:
> Each collation known in an SQL-environment is applicable to one
> or more character sets, and for each character set, one or more
> collations are applicable to it, one of which is associated with
> it as its character set collation.
In MariaDB, character set collations has been hard-coded so far,
e.g. utf8mb4_general_ci has been a hard-coded character set collation
for utf8mb4.
This patch allows to override (globally per server, or per session)
character set collations, so for example, uca1400_ai_ci can be set as a
character set collation for Unicode character sets
(instead of compiled xxx_general_ci).
The array of overridden character set collations is stored in a new
(session and global) system variable @@character_set_collations and
can be set as a comma separated list of charset=collation pairs, e.g.:
SET @@character_set_collations='utf8mb3=uca1400_ai_ci,utf8mb4=uca1400_ai_ci';
The variable is empty by default, which mean use the hard-coded
character set collations (e.g. utf8mb4_general_ci for utf8mb4).
The variable can also be set globally by passing to the server startup command
line, and/or in my.cnf.
2022-12-14 15:46:27 +01:00
|
|
|
return merge_context_collation(used, map,
|
|
|
|
Lex_context_collation(cl.charset_info()));
|
2022-02-09 18:21:39 +01:00
|
|
|
}
|
|
|
|
DBUG_ASSERT(0);
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
2022-05-23 09:05:33 +02:00
|
|
|
Mix an unordered combination of CHARACTER SET and COLLATE clauses
|
|
|
|
(i.e. COLLATE can come before CHARACTER SET).
|
|
|
|
Merge a CHARACTER SET clause.
|
|
|
|
@param cs - The "CHARACTER SET exact_charset_name".
|
2022-02-09 18:21:39 +01:00
|
|
|
*/
|
2022-05-23 09:05:33 +02:00
|
|
|
bool Lex_exact_charset_extended_collation_attrs_st::
|
MDEV-30164 System variable for default collations
This patch adds a way to override default collations
(or "character set collations") for desired character sets.
The SQL standard says:
> Each collation known in an SQL-environment is applicable to one
> or more character sets, and for each character set, one or more
> collations are applicable to it, one of which is associated with
> it as its character set collation.
In MariaDB, character set collations has been hard-coded so far,
e.g. utf8mb4_general_ci has been a hard-coded character set collation
for utf8mb4.
This patch allows to override (globally per server, or per session)
character set collations, so for example, uca1400_ai_ci can be set as a
character set collation for Unicode character sets
(instead of compiled xxx_general_ci).
The array of overridden character set collations is stored in a new
(session and global) system variable @@character_set_collations and
can be set as a comma separated list of charset=collation pairs, e.g.:
SET @@character_set_collations='utf8mb3=uca1400_ai_ci,utf8mb4=uca1400_ai_ci';
The variable is empty by default, which mean use the hard-coded
character set collations (e.g. utf8mb4_general_ci for utf8mb4).
The variable can also be set globally by passing to the server startup command
line, and/or in my.cnf.
2022-12-14 15:46:27 +01:00
|
|
|
merge_exact_charset(Sql_used *used,
|
|
|
|
const Charset_collation_map_st &map,
|
|
|
|
const Lex_exact_charset &cs)
|
2022-02-09 18:21:39 +01:00
|
|
|
{
|
2022-05-23 09:05:33 +02:00
|
|
|
DBUG_ASSERT(cs.charset_info());
|
2022-02-09 18:21:39 +01:00
|
|
|
|
|
|
|
switch (m_type) {
|
|
|
|
case TYPE_EMPTY:
|
2022-05-23 09:05:33 +02:00
|
|
|
// CHARACTER SET cs
|
|
|
|
*this= Lex_exact_charset_extended_collation_attrs(cs);
|
2022-02-09 18:21:39 +01:00
|
|
|
return false;
|
2022-05-23 09:05:33 +02:00
|
|
|
|
2022-02-09 18:21:39 +01:00
|
|
|
case TYPE_CHARACTER_SET:
|
2022-05-23 09:05:33 +02:00
|
|
|
// CHARACTER SET cs1 .. CHARACTER SET cs2
|
|
|
|
return Lex_exact_charset(m_ci).raise_if_not_equal(cs);
|
|
|
|
|
2022-02-09 18:21:39 +01:00
|
|
|
case TYPE_COLLATE_EXACT:
|
2022-05-23 09:05:33 +02:00
|
|
|
// COLLATE latin1_bin .. CHARACTER SET cs
|
2022-05-25 09:07:04 +02:00
|
|
|
if (cs.raise_if_not_applicable(Lex_exact_collation(m_ci)))
|
|
|
|
return true;
|
|
|
|
m_type= TYPE_CHARACTER_SET_COLLATE_EXACT;
|
|
|
|
return false;
|
|
|
|
|
|
|
|
case TYPE_CHARACTER_SET_COLLATE_EXACT:
|
|
|
|
// CHARACTER SET cs1 COLLATE cl .. CHARACTER SET cs2
|
|
|
|
return Lex_exact_charset_opt_extended_collate(m_ci, true).
|
|
|
|
raise_if_charsets_differ(cs);
|
2022-02-09 18:21:39 +01:00
|
|
|
|
2022-05-23 09:05:33 +02:00
|
|
|
case TYPE_COLLATE_CONTEXTUALLY_TYPED:
|
|
|
|
// COLLATE DEFAULT .. CHARACTER SET cs
|
|
|
|
{
|
|
|
|
Lex_exact_charset_opt_extended_collate tmp(cs);
|
MDEV-30164 System variable for default collations
This patch adds a way to override default collations
(or "character set collations") for desired character sets.
The SQL standard says:
> Each collation known in an SQL-environment is applicable to one
> or more character sets, and for each character set, one or more
> collations are applicable to it, one of which is associated with
> it as its character set collation.
In MariaDB, character set collations has been hard-coded so far,
e.g. utf8mb4_general_ci has been a hard-coded character set collation
for utf8mb4.
This patch allows to override (globally per server, or per session)
character set collations, so for example, uca1400_ai_ci can be set as a
character set collation for Unicode character sets
(instead of compiled xxx_general_ci).
The array of overridden character set collations is stored in a new
(session and global) system variable @@character_set_collations and
can be set as a comma separated list of charset=collation pairs, e.g.:
SET @@character_set_collations='utf8mb3=uca1400_ai_ci,utf8mb4=uca1400_ai_ci';
The variable is empty by default, which mean use the hard-coded
character set collations (e.g. utf8mb4_general_ci for utf8mb4).
The variable can also be set globally by passing to the server startup command
line, and/or in my.cnf.
2022-12-14 15:46:27 +01:00
|
|
|
if (tmp.merge_context_collation(used, map, Lex_context_collation(m_ci)))
|
2022-05-23 09:05:33 +02:00
|
|
|
return true;
|
2022-05-25 09:07:04 +02:00
|
|
|
*this= Lex_exact_charset_extended_collation_attrs(tmp);
|
2022-05-23 09:05:33 +02:00
|
|
|
return false;
|
|
|
|
}
|
2022-02-09 18:21:39 +01:00
|
|
|
}
|
2022-05-23 09:05:33 +02:00
|
|
|
DBUG_ASSERT(0);
|
2022-02-09 18:21:39 +01:00
|
|
|
return false;
|
|
|
|
}
|
2022-05-17 10:52:23 +02:00
|
|
|
|
|
|
|
|
|
|
|
bool Lex_extended_charset_extended_collation_attrs_st::merge_charset_default()
|
|
|
|
{
|
|
|
|
if (m_charset_order == CHARSET_TYPE_EMPTY)
|
|
|
|
m_charset_order= CHARSET_TYPE_CONTEXT;
|
|
|
|
Lex_opt_context_charset_st::merge_charset_default();
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
bool Lex_extended_charset_extended_collation_attrs_st::
|
MDEV-30164 System variable for default collations
This patch adds a way to override default collations
(or "character set collations") for desired character sets.
The SQL standard says:
> Each collation known in an SQL-environment is applicable to one
> or more character sets, and for each character set, one or more
> collations are applicable to it, one of which is associated with
> it as its character set collation.
In MariaDB, character set collations has been hard-coded so far,
e.g. utf8mb4_general_ci has been a hard-coded character set collation
for utf8mb4.
This patch allows to override (globally per server, or per session)
character set collations, so for example, uca1400_ai_ci can be set as a
character set collation for Unicode character sets
(instead of compiled xxx_general_ci).
The array of overridden character set collations is stored in a new
(session and global) system variable @@character_set_collations and
can be set as a comma separated list of charset=collation pairs, e.g.:
SET @@character_set_collations='utf8mb3=uca1400_ai_ci,utf8mb4=uca1400_ai_ci';
The variable is empty by default, which mean use the hard-coded
character set collations (e.g. utf8mb4_general_ci for utf8mb4).
The variable can also be set globally by passing to the server startup command
line, and/or in my.cnf.
2022-12-14 15:46:27 +01:00
|
|
|
merge_exact_charset(Sql_used *used,
|
|
|
|
const Charset_collation_map_st &map,
|
|
|
|
const Lex_exact_charset &cs)
|
2022-05-17 10:52:23 +02:00
|
|
|
{
|
|
|
|
if (m_charset_order == CHARSET_TYPE_EMPTY)
|
|
|
|
m_charset_order= CHARSET_TYPE_EXACT;
|
MDEV-30164 System variable for default collations
This patch adds a way to override default collations
(or "character set collations") for desired character sets.
The SQL standard says:
> Each collation known in an SQL-environment is applicable to one
> or more character sets, and for each character set, one or more
> collations are applicable to it, one of which is associated with
> it as its character set collation.
In MariaDB, character set collations has been hard-coded so far,
e.g. utf8mb4_general_ci has been a hard-coded character set collation
for utf8mb4.
This patch allows to override (globally per server, or per session)
character set collations, so for example, uca1400_ai_ci can be set as a
character set collation for Unicode character sets
(instead of compiled xxx_general_ci).
The array of overridden character set collations is stored in a new
(session and global) system variable @@character_set_collations and
can be set as a comma separated list of charset=collation pairs, e.g.:
SET @@character_set_collations='utf8mb3=uca1400_ai_ci,utf8mb4=uca1400_ai_ci';
The variable is empty by default, which mean use the hard-coded
character set collations (e.g. utf8mb4_general_ci for utf8mb4).
The variable can also be set globally by passing to the server startup command
line, and/or in my.cnf.
2022-12-14 15:46:27 +01:00
|
|
|
return Lex_exact_charset_extended_collation_attrs_st::
|
|
|
|
merge_exact_charset(used, map, cs);
|
2022-05-17 10:52:23 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
bool Lex_extended_charset_extended_collation_attrs_st::
|
|
|
|
raise_if_charset_conflicts_with_default(
|
|
|
|
const Lex_exact_charset_opt_extended_collate &def) const
|
|
|
|
{
|
|
|
|
DBUG_ASSERT(m_charset_order != CHARSET_TYPE_EMPTY || is_empty());
|
|
|
|
if (!my_charset_same(def.collation().charset_info(), m_ci))
|
|
|
|
{
|
|
|
|
raise_ER_CONFLICTING_DECLARATIONS("CHARACTER SET ", "DEFAULT",
|
|
|
|
def.collation().charset_info()->cs_name.str,
|
|
|
|
"CHARACTER SET ", m_ci->cs_name.str,
|
|
|
|
m_charset_order == CHARSET_TYPE_EXACT);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
CHARSET_INFO *
|
|
|
|
Lex_extended_charset_extended_collation_attrs_st::
|
MDEV-30164 System variable for default collations
This patch adds a way to override default collations
(or "character set collations") for desired character sets.
The SQL standard says:
> Each collation known in an SQL-environment is applicable to one
> or more character sets, and for each character set, one or more
> collations are applicable to it, one of which is associated with
> it as its character set collation.
In MariaDB, character set collations has been hard-coded so far,
e.g. utf8mb4_general_ci has been a hard-coded character set collation
for utf8mb4.
This patch allows to override (globally per server, or per session)
character set collations, so for example, uca1400_ai_ci can be set as a
character set collation for Unicode character sets
(instead of compiled xxx_general_ci).
The array of overridden character set collations is stored in a new
(session and global) system variable @@character_set_collations and
can be set as a comma separated list of charset=collation pairs, e.g.:
SET @@character_set_collations='utf8mb3=uca1400_ai_ci,utf8mb4=uca1400_ai_ci';
The variable is empty by default, which mean use the hard-coded
character set collations (e.g. utf8mb4_general_ci for utf8mb4).
The variable can also be set globally by passing to the server startup command
line, and/or in my.cnf.
2022-12-14 15:46:27 +01:00
|
|
|
resolved_to_context(Sql_used *used,
|
|
|
|
const Charset_collation_map_st &map,
|
|
|
|
const Charset_collation_context &ctx) const
|
2022-05-17 10:52:23 +02:00
|
|
|
{
|
|
|
|
if (Lex_opt_context_charset_st::is_empty())
|
|
|
|
{
|
|
|
|
// Without CHARACTER SET DEFAULT
|
|
|
|
return Lex_exact_charset_extended_collation_attrs_st::
|
MDEV-30164 System variable for default collations
This patch adds a way to override default collations
(or "character set collations") for desired character sets.
The SQL standard says:
> Each collation known in an SQL-environment is applicable to one
> or more character sets, and for each character set, one or more
> collations are applicable to it, one of which is associated with
> it as its character set collation.
In MariaDB, character set collations has been hard-coded so far,
e.g. utf8mb4_general_ci has been a hard-coded character set collation
for utf8mb4.
This patch allows to override (globally per server, or per session)
character set collations, so for example, uca1400_ai_ci can be set as a
character set collation for Unicode character sets
(instead of compiled xxx_general_ci).
The array of overridden character set collations is stored in a new
(session and global) system variable @@character_set_collations and
can be set as a comma separated list of charset=collation pairs, e.g.:
SET @@character_set_collations='utf8mb3=uca1400_ai_ci,utf8mb4=uca1400_ai_ci';
The variable is empty by default, which mean use the hard-coded
character set collations (e.g. utf8mb4_general_ci for utf8mb4).
The variable can also be set globally by passing to the server startup command
line, and/or in my.cnf.
2022-12-14 15:46:27 +01:00
|
|
|
resolved_to_character_set(used, map,
|
|
|
|
ctx.collate_default().charset_info());
|
2022-05-17 10:52:23 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
// With CHARACTER SET DEFAULT
|
|
|
|
switch (type()) {
|
|
|
|
case TYPE_EMPTY:
|
|
|
|
// CHARACTER SET DEFAULT;
|
|
|
|
return ctx.charset_default().charset().charset_info();
|
|
|
|
|
|
|
|
case TYPE_CHARACTER_SET:
|
|
|
|
// CHARACTER SET DEFAULT CHARACTER SET cs_exact
|
|
|
|
if (raise_if_charset_conflicts_with_default(ctx.charset_default()))
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
A possible scenario:
|
|
|
|
SET character_set_server=utf8mb4;
|
|
|
|
CREATE DATABASE db1 CHARACTER SET latin1 CHARACTER SET DEFAULT;
|
|
|
|
*/
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
return m_ci;
|
|
|
|
|
2022-05-25 09:07:04 +02:00
|
|
|
case TYPE_CHARACTER_SET_COLLATE_EXACT:
|
2022-05-17 10:52:23 +02:00
|
|
|
case TYPE_COLLATE_EXACT:
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
CREATE DATABASE db1
|
|
|
|
COLLATE cl_exact
|
|
|
|
[ CHARACTER SET cs_exact ]
|
|
|
|
CHARACTER SET DEFAULT;
|
|
|
|
*/
|
2022-05-25 09:07:04 +02:00
|
|
|
if (m_type == TYPE_CHARACTER_SET_COLLATE_EXACT &&
|
2022-05-17 10:52:23 +02:00
|
|
|
raise_if_charset_conflicts_with_default(ctx.charset_default()))
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
A possible scenario:
|
|
|
|
SET character_set_server=utf8mb4;
|
|
|
|
CREATE DATABASE db1
|
|
|
|
COLLATE latin1_bin
|
|
|
|
CHARACTER SET latin1
|
|
|
|
CHARACTER SET DEFAULT;
|
|
|
|
*/
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
/*
|
|
|
|
Now check that "COLLATE cl_exact" does not conflict with
|
|
|
|
CHARACTER SET DEFAULT.
|
|
|
|
*/
|
|
|
|
if (ctx.charset_default().
|
|
|
|
raise_if_not_applicable(Lex_exact_collation(m_ci)))
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
A possible scenario:
|
|
|
|
SET character_set_server=utf8mb4;
|
|
|
|
CREATE DATABASE db1
|
|
|
|
COLLATE latin1_bin
|
|
|
|
CHARACTER SET DEFAULT;
|
|
|
|
*/
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
return m_ci;
|
|
|
|
}
|
|
|
|
|
|
|
|
case TYPE_COLLATE_CONTEXTUALLY_TYPED:
|
|
|
|
/*
|
|
|
|
Both CHARACTER SET and COLLATE are contextual:
|
|
|
|
ALTER DATABASE db1 CHARACTER SET DEFAULT COLLATE DEFAULT;
|
|
|
|
ALTER DATABASE db1 COLLATE DEFAULT CHARACTER SET DEFAULT;
|
|
|
|
*/
|
|
|
|
return Lex_exact_charset_extended_collation_attrs_st::
|
MDEV-30164 System variable for default collations
This patch adds a way to override default collations
(or "character set collations") for desired character sets.
The SQL standard says:
> Each collation known in an SQL-environment is applicable to one
> or more character sets, and for each character set, one or more
> collations are applicable to it, one of which is associated with
> it as its character set collation.
In MariaDB, character set collations has been hard-coded so far,
e.g. utf8mb4_general_ci has been a hard-coded character set collation
for utf8mb4.
This patch allows to override (globally per server, or per session)
character set collations, so for example, uca1400_ai_ci can be set as a
character set collation for Unicode character sets
(instead of compiled xxx_general_ci).
The array of overridden character set collations is stored in a new
(session and global) system variable @@character_set_collations and
can be set as a comma separated list of charset=collation pairs, e.g.:
SET @@character_set_collations='utf8mb3=uca1400_ai_ci,utf8mb4=uca1400_ai_ci';
The variable is empty by default, which mean use the hard-coded
character set collations (e.g. utf8mb4_general_ci for utf8mb4).
The variable can also be set globally by passing to the server startup command
line, and/or in my.cnf.
2022-12-14 15:46:27 +01:00
|
|
|
resolved_to_character_set(used, map,
|
|
|
|
ctx.charset_default().
|
|
|
|
collation().charset_info());
|
2022-05-17 10:52:23 +02:00
|
|
|
}
|
|
|
|
DBUG_ASSERT(0);
|
|
|
|
return NULL;
|
|
|
|
}
|