mirror of
https://github.com/MariaDB/server.git
synced 2025-01-16 03:52:35 +01:00
75f25e4ca7
This patch adds a way to override default collations (or "character set collations") for desired character sets. The SQL standard says: > Each collation known in an SQL-environment is applicable to one > or more character sets, and for each character set, one or more > collations are applicable to it, one of which is associated with > it as its character set collation. In MariaDB, character set collations has been hard-coded so far, e.g. utf8mb4_general_ci has been a hard-coded character set collation for utf8mb4. This patch allows to override (globally per server, or per session) character set collations, so for example, uca1400_ai_ci can be set as a character set collation for Unicode character sets (instead of compiled xxx_general_ci). The array of overridden character set collations is stored in a new (session and global) system variable @@character_set_collations and can be set as a comma separated list of charset=collation pairs, e.g.: SET @@character_set_collations='utf8mb3=uca1400_ai_ci,utf8mb4=uca1400_ai_ci'; The variable is empty by default, which mean use the hard-coded character set collations (e.g. utf8mb4_general_ci for utf8mb4). The variable can also be set globally by passing to the server startup command line, and/or in my.cnf.
117 lines
3.8 KiB
C++
117 lines
3.8 KiB
C++
/* Copyright (c) 2023, MariaDB Corporation.
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation; version 2 of the License.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; if not, write to the Free Software
|
|
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */
|
|
|
|
|
|
#include "my_global.h"
|
|
#include "my_sys.h"
|
|
#include "lex_charset.h"
|
|
#include "mysqld_error.h"
|
|
#include "charset_collations.h"
|
|
#include "simple_tokenizer.h"
|
|
|
|
bool Charset_collation_map_st::insert_or_replace(
|
|
const Lex_exact_charset &charset,
|
|
const Lex_extended_collation &collation,
|
|
bool error_on_conflicting_duplicate)
|
|
{
|
|
Lex_exact_charset_opt_extended_collate res(charset);
|
|
Sql_used used;
|
|
if (res.merge_collation_override(&used, *this, collation))
|
|
return true;
|
|
|
|
if (error_on_conflicting_duplicate)
|
|
{
|
|
const Elem_st *dup;
|
|
if ((dup= find_elem_by_charset_id(charset.charset_info()->number)) &&
|
|
dup->to() != res.collation().charset_info())
|
|
{
|
|
my_error(ER_CONFLICTING_DECLARATIONS, MYF(0),
|
|
"", dup->to()->coll_name.str,
|
|
"", res.collation().charset_info()->coll_name.str);
|
|
return true;
|
|
}
|
|
}
|
|
return insert_or_replace(Elem(charset.charset_info(),
|
|
res.collation().charset_info()));
|
|
}
|
|
|
|
|
|
bool Charset_collation_map_st::insert_or_replace(
|
|
const LEX_CSTRING &cs_name,
|
|
const LEX_CSTRING &cl_name,
|
|
bool error_on_conflicting_duplicate,
|
|
myf utf8_flag)
|
|
{
|
|
char charset_name_c[MY_CS_CHARACTER_SET_NAME_SIZE + 1/*for '\0'*/];
|
|
strmake(charset_name_c, cs_name.str, cs_name.length);
|
|
CHARSET_INFO *cs= get_charset_by_csname(charset_name_c,
|
|
MY_CS_PRIMARY, utf8_flag);
|
|
if (!cs)
|
|
{
|
|
my_error(ER_UNKNOWN_CHARACTER_SET, MYF(0), charset_name_c);
|
|
return true;
|
|
}
|
|
|
|
char collation_name_c[MY_CS_COLLATION_NAME_SIZE + 1/*for '\0'*/];
|
|
strmake(collation_name_c, cl_name.str, cl_name.length);
|
|
|
|
Lex_exact_collation tmpec(&my_charset_bin);
|
|
Lex_extended_collation tmp(tmpec);
|
|
if (tmp.set_by_name(collation_name_c, utf8_flag))
|
|
return true;
|
|
|
|
return insert_or_replace(Lex_exact_charset(cs), tmp,
|
|
error_on_conflicting_duplicate);
|
|
}
|
|
|
|
|
|
bool Charset_collation_map_st::from_text(const LEX_CSTRING &str, myf utf8_flag)
|
|
{
|
|
init();
|
|
Simple_tokenizer stream(str.str, str.length);
|
|
|
|
/*
|
|
Allow relaxed comma parsing:
|
|
SET @@character_set_collations=
|
|
',,,utf8mb3 = utf8mb3_bin,,latin1 = latin1_bin,,,';
|
|
It makes it easier for the user to edit the value
|
|
using SQL functions CONCAT or REGEXP_REPLACE.
|
|
*/
|
|
for ( ; ; )
|
|
{
|
|
LEX_CSTRING charset_name= stream.get_ident();
|
|
if (charset_name.length)
|
|
{
|
|
if (stream.get_char('='))
|
|
return true;
|
|
LEX_CSTRING collation_name= stream.get_ident();
|
|
if (!collation_name.length)
|
|
return true;
|
|
/*
|
|
Don't allow duplicate conflicting declarations within the same string:
|
|
SET @@var='utf8mb3=utf8mb3_general_ci,utf8mb3=utf8mb3_bin';
|
|
*/
|
|
if (insert_or_replace(charset_name, collation_name,
|
|
true/*err on dup*/, utf8_flag))
|
|
return true;
|
|
}
|
|
if (!stream.get_char(','))
|
|
continue;
|
|
if (stream.eof())
|
|
return false;
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|