mirror of
				https://github.com/MariaDB/server.git
				synced 2025-10-31 19:06:14 +01:00 
			
		
		
		
	 75f25e4ca7
			
		
	
	
	75f25e4ca7
	
	
	
		
			
			This patch adds a way to override default collations (or "character set collations") for desired character sets. The SQL standard says: > Each collation known in an SQL-environment is applicable to one > or more character sets, and for each character set, one or more > collations are applicable to it, one of which is associated with > it as its character set collation. In MariaDB, character set collations has been hard-coded so far, e.g. utf8mb4_general_ci has been a hard-coded character set collation for utf8mb4. This patch allows to override (globally per server, or per session) character set collations, so for example, uca1400_ai_ci can be set as a character set collation for Unicode character sets (instead of compiled xxx_general_ci). The array of overridden character set collations is stored in a new (session and global) system variable @@character_set_collations and can be set as a comma separated list of charset=collation pairs, e.g.: SET @@character_set_collations='utf8mb3=uca1400_ai_ci,utf8mb4=uca1400_ai_ci'; The variable is empty by default, which mean use the hard-coded character set collations (e.g. utf8mb4_general_ci for utf8mb4). The variable can also be set globally by passing to the server startup command line, and/or in my.cnf.
		
			
				
	
	
		
			117 lines
		
	
	
	
		
			3.8 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			117 lines
		
	
	
	
		
			3.8 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
| /* Copyright (c) 2023, MariaDB Corporation.
 | |
| 
 | |
|    This program is free software; you can redistribute it and/or modify
 | |
|    it under the terms of the GNU General Public License as published by
 | |
|    the Free Software Foundation; version 2 of the License.
 | |
| 
 | |
|    This program is distributed in the hope that it will be useful,
 | |
|    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | |
|    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | |
|    GNU General Public License for more details.
 | |
| 
 | |
|    You should have received a copy of the GNU General Public License
 | |
|    along with this program; if not, write to the Free Software
 | |
|    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335  USA */
 | |
| 
 | |
| 
 | |
| #include "my_global.h"
 | |
| #include "my_sys.h"
 | |
| #include "lex_charset.h"
 | |
| #include "mysqld_error.h"
 | |
| #include "charset_collations.h"
 | |
| #include "simple_tokenizer.h"
 | |
| 
 | |
| bool Charset_collation_map_st::insert_or_replace(
 | |
|                                   const Lex_exact_charset &charset,
 | |
|                                   const Lex_extended_collation &collation,
 | |
|                                   bool error_on_conflicting_duplicate)
 | |
| {
 | |
|   Lex_exact_charset_opt_extended_collate res(charset);
 | |
|   Sql_used used;
 | |
|   if (res.merge_collation_override(&used, *this, collation))
 | |
|     return true;
 | |
| 
 | |
|   if (error_on_conflicting_duplicate)
 | |
|   {
 | |
|     const Elem_st *dup;
 | |
|     if ((dup= find_elem_by_charset_id(charset.charset_info()->number)) &&
 | |
|         dup->to() != res.collation().charset_info())
 | |
|     {
 | |
|       my_error(ER_CONFLICTING_DECLARATIONS, MYF(0),
 | |
|                "", dup->to()->coll_name.str,
 | |
|                "", res.collation().charset_info()->coll_name.str);
 | |
|       return true;
 | |
|     }
 | |
|   }
 | |
|   return insert_or_replace(Elem(charset.charset_info(),
 | |
|                                 res.collation().charset_info()));
 | |
| }
 | |
| 
 | |
| 
 | |
| bool Charset_collation_map_st::insert_or_replace(
 | |
|                                   const LEX_CSTRING &cs_name,
 | |
|                                   const LEX_CSTRING &cl_name,
 | |
|                                   bool error_on_conflicting_duplicate,
 | |
|                                   myf utf8_flag)
 | |
| {
 | |
|   char charset_name_c[MY_CS_CHARACTER_SET_NAME_SIZE + 1/*for '\0'*/];
 | |
|   strmake(charset_name_c, cs_name.str, cs_name.length);
 | |
|   CHARSET_INFO *cs= get_charset_by_csname(charset_name_c,
 | |
|                                           MY_CS_PRIMARY, utf8_flag);
 | |
|   if (!cs)
 | |
|   {
 | |
|     my_error(ER_UNKNOWN_CHARACTER_SET, MYF(0), charset_name_c);
 | |
|     return true;
 | |
|   }
 | |
| 
 | |
|   char collation_name_c[MY_CS_COLLATION_NAME_SIZE + 1/*for '\0'*/];
 | |
|   strmake(collation_name_c, cl_name.str, cl_name.length);
 | |
| 
 | |
|   Lex_exact_collation tmpec(&my_charset_bin);
 | |
|   Lex_extended_collation tmp(tmpec);
 | |
|   if (tmp.set_by_name(collation_name_c, utf8_flag))
 | |
|     return true;
 | |
| 
 | |
|   return insert_or_replace(Lex_exact_charset(cs), tmp,
 | |
|                            error_on_conflicting_duplicate);
 | |
| }
 | |
| 
 | |
| 
 | |
| bool Charset_collation_map_st::from_text(const LEX_CSTRING &str, myf utf8_flag)
 | |
| {
 | |
|   init();
 | |
|   Simple_tokenizer stream(str.str, str.length);
 | |
| 
 | |
|   /*
 | |
|     Allow relaxed comma parsing:
 | |
|     SET @@character_set_collations=
 | |
|        ',,,utf8mb3 = utf8mb3_bin,,latin1 = latin1_bin,,,';
 | |
|     It makes it easier for the user to edit the value
 | |
|     using SQL functions CONCAT or REGEXP_REPLACE.
 | |
|   */
 | |
|   for ( ; ; )
 | |
|   {
 | |
|     LEX_CSTRING charset_name= stream.get_ident();
 | |
|     if (charset_name.length)
 | |
|     {
 | |
|       if (stream.get_char('='))
 | |
|         return true;
 | |
|       LEX_CSTRING collation_name= stream.get_ident();
 | |
|       if (!collation_name.length)
 | |
|         return true;
 | |
|       /*
 | |
|         Don't allow duplicate conflicting declarations within the same string:
 | |
|           SET @@var='utf8mb3=utf8mb3_general_ci,utf8mb3=utf8mb3_bin';
 | |
|       */
 | |
|       if (insert_or_replace(charset_name, collation_name,
 | |
|                             true/*err on dup*/, utf8_flag))
 | |
|         return true;
 | |
|     }
 | |
|     if (!stream.get_char(','))
 | |
|       continue;
 | |
|     if (stream.eof())
 | |
|       return false;
 | |
|     return true;
 | |
|   }
 | |
|   return false;
 | |
| }
 |