mirror of
				https://github.com/MariaDB/server.git
				synced 2025-11-03 20:36:16 +01:00 
			
		
		
		
	This patch also fixes:
  MDEV-33050 Build-in schemas like oracle_schema are accent insensitive
  MDEV-33084 LASTVAL(t1) and LASTVAL(T1) do not work well with lower-case-table-names=0
  MDEV-33085 Tables T1 and t1 do not work well with ENGINE=CSV and lower-case-table-names=0
  MDEV-33086 SHOW OPEN TABLES IN DB1 -- is case insensitive with lower-case-table-names=0
  MDEV-33088 Cannot create triggers in the database `MYSQL`
  MDEV-33103 LOCK TABLE t1 AS t2 -- alias is not case sensitive with lower-case-table-names=0
  MDEV-33109 DROP DATABASE MYSQL -- does not drop SP with lower-case-table-names=0
  MDEV-33110 HANDLER commands are case insensitive with lower-case-table-names=0
  MDEV-33119 User is case insensitive in INFORMATION_SCHEMA.VIEWS
  MDEV-33120 System log table names are case insensitive with lower-cast-table-names=0
- Removing the virtual function strnncoll() from MY_COLLATION_HANDLER
- Adding a wrapper function CHARSET_INFO::streq(), to compare
  two strings for equality. For now it calls strnncoll() internally.
  In the future it will turn into a virtual function.
- Adding new accent sensitive case insensitive collations:
    - utf8mb4_general1400_as_ci
    - utf8mb3_general1400_as_ci
  They implement accent sensitive case insensitive comparison.
  The weight of a character is equal to the code point of its
  upper case variant. These collations use Unicode-14.0.0 casefolding data.
  The result of
     my_charset_utf8mb3_general1400_as_ci.strcoll()
  is very close to the former
     my_charset_utf8mb3_general_ci.strcasecmp()
  There is only a difference in a couple dozen rare characters, because:
    - the switch from "tolower" to "toupper" comparison, to make
      utf8mb3_general1400_as_ci closer to utf8mb3_general_ci
    - the switch from Unicode-3.0.0 to Unicode-14.0.0
  This difference should be tolarable. See the list of affected
  characters in the MDEV description.
  Note, utf8mb4_general1400_as_ci correctly handles non-BMP characters!
  Unlike utf8mb4_general_ci, it does not treat all BMP characters
  as equal.
- Adding classes representing names of the file based database objects:
    Lex_ident_db
    Lex_ident_table
    Lex_ident_trigger
  Their comparison collation depends on the underlying
  file system case sensitivity and on --lower-case-table-names
  and can be either my_charset_bin or my_charset_utf8mb3_general1400_as_ci.
- Adding classes representing names of other database objects,
  whose names have case insensitive comparison style,
  using my_charset_utf8mb3_general1400_as_ci:
  Lex_ident_column
  Lex_ident_sys_var
  Lex_ident_user_var
  Lex_ident_sp_var
  Lex_ident_ps
  Lex_ident_i_s_table
  Lex_ident_window
  Lex_ident_func
  Lex_ident_partition
  Lex_ident_with_element
  Lex_ident_rpl_filter
  Lex_ident_master_info
  Lex_ident_host
  Lex_ident_locale
  Lex_ident_plugin
  Lex_ident_engine
  Lex_ident_server
  Lex_ident_savepoint
  Lex_ident_charset
  engine_option_value::Name
- All the mentioned Lex_ident_xxx classes implement a method streq():
  if (ident1.streq(ident2))
     do_equal();
  This method works as a wrapper for CHARSET_INFO::streq().
- Changing a lot of "LEX_CSTRING name" to "Lex_ident_xxx name"
  in class members and in function/method parameters.
- Replacing all calls like
    system_charset_info->coll->strcasecmp(ident1, ident2)
  to
    ident1.streq(ident2)
- Taking advantage of the c++11 user defined literal operator
  for LEX_CSTRING (see m_strings.h) and Lex_ident_xxx (see lex_ident.h)
  data types. Use example:
  const Lex_ident_column primary_key_name= "PRIMARY"_Lex_ident_column;
  is now a shorter version of:
  const Lex_ident_column primary_key_name=
    Lex_ident_column({STRING_WITH_LEN("PRIMARY")});
		
	
			
		
			
				
	
	
		
			185 lines
		
	
	
	
		
			4.7 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			185 lines
		
	
	
	
		
			4.7 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
/*
 | 
						|
   Copyright (c) 2018, MariaDB Corporation.
 | 
						|
 | 
						|
   This program is free software; you can redistribute it and/or modify
 | 
						|
   it under the terms of the GNU General Public License as published by
 | 
						|
   the Free Software Foundation; version 2 of the License.
 | 
						|
 | 
						|
   This program is distributed in the hope that it will be useful,
 | 
						|
   but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
						|
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
						|
   GNU General Public License for more details.
 | 
						|
 | 
						|
   You should have received a copy of the GNU General Public License
 | 
						|
   along with this program; if not, write to the Free Software
 | 
						|
   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA */
 | 
						|
 | 
						|
 | 
						|
#ifndef LEX_STRING_INCLUDED
 | 
						|
#define LEX_STRING_INCLUDED
 | 
						|
 | 
						|
 | 
						|
typedef struct st_mysql_const_lex_string LEX_CSTRING;
 | 
						|
 | 
						|
 | 
						|
class Lex_cstring : public LEX_CSTRING
 | 
						|
{
 | 
						|
  public:
 | 
						|
  constexpr Lex_cstring()
 | 
						|
   :LEX_CSTRING({NULL, 0})
 | 
						|
  { }
 | 
						|
  constexpr Lex_cstring(const LEX_CSTRING &str)
 | 
						|
   :LEX_CSTRING(str)
 | 
						|
  { }
 | 
						|
  constexpr Lex_cstring(const char *_str, size_t _len)
 | 
						|
   :LEX_CSTRING({_str, _len})
 | 
						|
  { }
 | 
						|
  Lex_cstring(const char *start, const char *end)
 | 
						|
  {
 | 
						|
    DBUG_ASSERT(start <= end);
 | 
						|
    str= start;
 | 
						|
    length= end - start;
 | 
						|
  }
 | 
						|
 | 
						|
  bool bin_eq(const LEX_CSTRING &rhs) const
 | 
						|
  {
 | 
						|
    return length == rhs.length && !memcmp(str, rhs.str, length);
 | 
						|
  }
 | 
						|
 | 
						|
  void set(const char *_str, size_t _len)
 | 
						|
  {
 | 
						|
    str= _str;
 | 
						|
    length= _len;
 | 
						|
  }
 | 
						|
 | 
						|
  /*
 | 
						|
    Trim left white spaces.
 | 
						|
    Assumes that there are no multi-bytes characters
 | 
						|
    that can be considered white-space.
 | 
						|
  */
 | 
						|
  Lex_cstring ltrim_whitespace(CHARSET_INFO *cs) const
 | 
						|
  {
 | 
						|
    DBUG_ASSERT(cs->mbminlen == 1);
 | 
						|
    Lex_cstring str= *this;
 | 
						|
    while (str.length > 0 && my_isspace(cs, str.str[0]))
 | 
						|
    {
 | 
						|
      str.length--;
 | 
						|
      str.str++;
 | 
						|
    }
 | 
						|
    return str;
 | 
						|
  }
 | 
						|
 | 
						|
  /*
 | 
						|
    Trim right white spaces.
 | 
						|
    Assumes that there are no multi-bytes characters
 | 
						|
    that can be considered white-space.
 | 
						|
    Also, assumes that the character set supports backward space parsing.
 | 
						|
  */
 | 
						|
  Lex_cstring rtrim_whitespace(CHARSET_INFO *cs) const
 | 
						|
  {
 | 
						|
    DBUG_ASSERT(cs->mbminlen == 1);
 | 
						|
    Lex_cstring str= *this;
 | 
						|
    while (str.length > 0 && my_isspace(cs, str.str[str.length - 1]))
 | 
						|
    {
 | 
						|
      str.length --;
 | 
						|
    }
 | 
						|
    return str;
 | 
						|
  }
 | 
						|
 | 
						|
  /*
 | 
						|
    Trim all spaces.
 | 
						|
  */
 | 
						|
  Lex_cstring trim_whitespace(CHARSET_INFO *cs) const
 | 
						|
  {
 | 
						|
    return ltrim_whitespace(cs).rtrim_whitespace(cs);
 | 
						|
  }
 | 
						|
 | 
						|
  /*
 | 
						|
    Trim all spaces and return the length of the leading space sequence.
 | 
						|
  */
 | 
						|
  Lex_cstring trim_whitespace(CHARSET_INFO *cs, size_t *prefix_length) const
 | 
						|
  {
 | 
						|
    Lex_cstring tmp= Lex_cstring(*this).ltrim_whitespace(cs);
 | 
						|
    if (prefix_length)
 | 
						|
      *prefix_length= tmp.str - str;
 | 
						|
    return tmp.rtrim_whitespace(cs);
 | 
						|
  }
 | 
						|
 | 
						|
  /*
 | 
						|
    Return the "n" leftmost bytes if this[0] is longer than "n" bytes,
 | 
						|
    or return this[0] itself otherwise.
 | 
						|
  */
 | 
						|
  Lex_cstring left(size_t n) const
 | 
						|
  {
 | 
						|
    return Lex_cstring(str, MY_MIN(length, n));
 | 
						|
  }
 | 
						|
  /*
 | 
						|
    If this[0] is shorter than "pos" bytes, then return an empty string.
 | 
						|
    Otherwise, return a substring of this[0] starting from
 | 
						|
    the byte position "pos" until the end.
 | 
						|
  */
 | 
						|
  Lex_cstring substr(size_t pos) const
 | 
						|
  {
 | 
						|
    return length <= pos ? Lex_cstring(str + length, (size_t) 0) :
 | 
						|
                           Lex_cstring(str + pos, length - pos);
 | 
						|
  }
 | 
						|
  // Check if a prefix of this[0] is equal to "rhs".
 | 
						|
  bool starts_with(const LEX_CSTRING &rhs) const
 | 
						|
  {
 | 
						|
    DBUG_ASSERT(str);
 | 
						|
    DBUG_ASSERT(rhs.str);
 | 
						|
    return length >= rhs.length && !memcmp(str, rhs.str, rhs.length);
 | 
						|
  }
 | 
						|
};
 | 
						|
 | 
						|
 | 
						|
class Lex_cstring_strlen: public Lex_cstring
 | 
						|
{
 | 
						|
public:
 | 
						|
  Lex_cstring_strlen(const char *from)
 | 
						|
   :Lex_cstring(from, from ? strlen(from) : 0)
 | 
						|
  { }
 | 
						|
};
 | 
						|
 | 
						|
 | 
						|
/* Functions to compare if two lex strings are equal */
 | 
						|
 | 
						|
/*
 | 
						|
  Compare to LEX_CSTRING's and return 0 if equal
 | 
						|
*/
 | 
						|
 | 
						|
static inline bool cmp(const LEX_CSTRING *a, const LEX_CSTRING *b)
 | 
						|
{
 | 
						|
  return a->length != b->length ||
 | 
						|
    (a->length && memcmp(a->str, b->str, a->length));
 | 
						|
}
 | 
						|
static inline bool cmp(const LEX_CSTRING a, const LEX_CSTRING b)
 | 
						|
{
 | 
						|
  return a.length != b.length || (a.length && memcmp(a.str, b.str, a.length));
 | 
						|
}
 | 
						|
 | 
						|
/*
 | 
						|
  Compare if two LEX_CSTRING are equal. Assumption is that
 | 
						|
  character set is ASCII (like for plugin names)
 | 
						|
*/
 | 
						|
 | 
						|
static inline bool lex_string_eq(const LEX_CSTRING *a, const LEX_CSTRING *b)
 | 
						|
{
 | 
						|
  if (a->length != b->length)
 | 
						|
    return 0;                                   /* Different */
 | 
						|
  return strcasecmp(a->str, b->str) == 0;
 | 
						|
}
 | 
						|
 | 
						|
/*
 | 
						|
  To be used when calling lex_string_eq with STRING_WITH_LEN() as second
 | 
						|
  argument
 | 
						|
*/
 | 
						|
 | 
						|
static inline bool lex_string_eq(const LEX_CSTRING *a, const char *b, size_t b_length)
 | 
						|
{
 | 
						|
  if (a->length != b_length)
 | 
						|
    return 0;                                   /* Different */
 | 
						|
  return strcasecmp(a->str, b) == 0;
 | 
						|
}
 | 
						|
 | 
						|
#endif /* LEX_STRING_INCLUDED */
 |