mirror of
				https://github.com/MariaDB/server.git
				synced 2025-11-04 04:46:15 +01:00 
			
		
		
		
	This patch adds a way to override default collations (or "character set collations") for desired character sets. The SQL standard says: > Each collation known in an SQL-environment is applicable to one > or more character sets, and for each character set, one or more > collations are applicable to it, one of which is associated with > it as its character set collation. In MariaDB, character set collations has been hard-coded so far, e.g. utf8mb4_general_ci has been a hard-coded character set collation for utf8mb4. This patch allows to override (globally per server, or per session) character set collations, so for example, uca1400_ai_ci can be set as a character set collation for Unicode character sets (instead of compiled xxx_general_ci). The array of overridden character set collations is stored in a new (session and global) system variable @@character_set_collations and can be set as a comma separated list of charset=collation pairs, e.g.: SET @@character_set_collations='utf8mb3=uca1400_ai_ci,utf8mb4=uca1400_ai_ci'; The variable is empty by default, which mean use the hard-coded character set collations (e.g. utf8mb4_general_ci for utf8mb4). The variable can also be set globally by passing to the server startup command line, and/or in my.cnf.
		
			
				
	
	
		
			85 lines
		
	
	
	
		
			2 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			85 lines
		
	
	
	
		
			2 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
/* Copyright (c) 2023, MariaDB Corporation.
 | 
						|
 | 
						|
   This program is free software; you can redistribute it and/or modify
 | 
						|
   it under the terms of the GNU General Public License as published by
 | 
						|
   the Free Software Foundation; version 2 of the License.
 | 
						|
 | 
						|
   This program is distributed in the hope that it will be useful,
 | 
						|
   but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
						|
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
						|
   GNU General Public License for more details.
 | 
						|
 | 
						|
   You should have received a copy of the GNU General Public License
 | 
						|
   along with this program; if not, write to the Free Software
 | 
						|
   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335  USA */
 | 
						|
 | 
						|
#ifndef SIMPLE_TOKENIZER_INCLUDED
 | 
						|
#define SIMPLE_TOKENIZER_INCLUDED
 | 
						|
 | 
						|
 | 
						|
class Simple_tokenizer
 | 
						|
{
 | 
						|
  const char *m_ptr;
 | 
						|
  const char *m_end;
 | 
						|
public:
 | 
						|
  Simple_tokenizer(const char *str, size_t length)
 | 
						|
   :m_ptr(str), m_end(str + length)
 | 
						|
  { }
 | 
						|
  const char *ptr() const
 | 
						|
  {
 | 
						|
    return m_ptr;
 | 
						|
  }
 | 
						|
  bool eof() const
 | 
						|
  {
 | 
						|
    return m_ptr >= m_end;
 | 
						|
  }
 | 
						|
  void get_spaces()
 | 
						|
  {
 | 
						|
    for ( ; !eof(); m_ptr++)
 | 
						|
    {
 | 
						|
      if (m_ptr[0] != ' ')
 | 
						|
        break;
 | 
						|
    }
 | 
						|
  }
 | 
						|
  bool is_ident_start(char ch) const
 | 
						|
  {
 | 
						|
    return (ch >= 'a' && ch <= 'z') ||
 | 
						|
           (ch >= 'A' && ch <= 'Z') ||
 | 
						|
           ch == '_';
 | 
						|
  }
 | 
						|
  bool is_ident_body(char ch) const
 | 
						|
  {
 | 
						|
    return is_ident_start(ch) ||
 | 
						|
           (ch >= '0' && ch <= '9');
 | 
						|
  }
 | 
						|
  bool is_ident_start() const
 | 
						|
  {
 | 
						|
    return !eof() && is_ident_start(*m_ptr);
 | 
						|
  }
 | 
						|
  bool is_ident_body() const
 | 
						|
  {
 | 
						|
    return !eof() && is_ident_body(*m_ptr);
 | 
						|
  }
 | 
						|
  LEX_CSTRING get_ident()
 | 
						|
  {
 | 
						|
    get_spaces();
 | 
						|
    if (!is_ident_start())
 | 
						|
      return {m_ptr,0};
 | 
						|
    const char *start= m_ptr++;
 | 
						|
    for ( ; is_ident_body(); m_ptr++)
 | 
						|
    { }
 | 
						|
    LEX_CSTRING res= {start, (size_t) (m_ptr - start)};
 | 
						|
    return res;
 | 
						|
  }
 | 
						|
  bool get_char(char ch)
 | 
						|
  {
 | 
						|
    get_spaces();
 | 
						|
    if (eof() || *m_ptr != ch)
 | 
						|
      return true;
 | 
						|
    m_ptr++;
 | 
						|
    return false;
 | 
						|
  }
 | 
						|
};
 | 
						|
 | 
						|
 | 
						|
#endif // SIMPLE_TOKENIZER_INCLUDED
 |