diff --git a/include/m_ctype.h b/include/m_ctype.h index 33362045284..2f5899704c7 100644 --- a/include/m_ctype.h +++ b/include/m_ctype.h @@ -1932,6 +1932,52 @@ my_strcasecmp_latin1(const char *a, const char *b) int my_wc_mb_utf8mb4_bmp_only(CHARSET_INFO *cs, my_wc_t wc, uchar *r, uchar *e); +#ifdef __cplusplus + +class String_copy_status: protected MY_STRCOPY_STATUS +{ +public: + const char *source_end_pos() const + { return m_source_end_pos; } + const char *well_formed_error_pos() const + { return m_well_formed_error_pos; } +}; + + +class Well_formed_prefix_status: public String_copy_status +{ +public: + Well_formed_prefix_status(CHARSET_INFO *cs, + const char *str, const char *end, size_t nchars) + { cs->well_formed_char_length(str, end, nchars, this); } +}; + + +class Well_formed_prefix: public Well_formed_prefix_status +{ + const char *m_str; // The beginning of the string +public: + Well_formed_prefix(CHARSET_INFO *cs, const char *str, const char *end, + size_t nchars) + :Well_formed_prefix_status(cs, str, end, nchars), m_str(str) + { } + Well_formed_prefix(CHARSET_INFO *cs, const char *str, size_t length, + size_t nchars) + :Well_formed_prefix_status(cs, str, str + length, nchars), m_str(str) + { } + Well_formed_prefix(CHARSET_INFO *cs, const char *str, size_t length) + :Well_formed_prefix_status(cs, str, str + length, length), m_str(str) + { } + Well_formed_prefix(CHARSET_INFO *cs, LEX_CSTRING str, size_t nchars) + :Well_formed_prefix_status(cs, str.str, str.str + str.length, nchars), + m_str(str.str) + { } + size_t length() const { return m_source_end_pos - m_str; } +}; + +#endif /* __cplusplus */ + + #ifdef __cplusplus } #endif diff --git a/sql/lex_ident_cli.h b/sql/lex_ident_cli.h new file mode 100644 index 00000000000..438354dda4b --- /dev/null +++ b/sql/lex_ident_cli.h @@ -0,0 +1,122 @@ +/* Copyright (c) 2000, 2019, Oracle and/or its affiliates. + Copyright (c) 2010, 2024, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + + +#ifndef LEX_IDENT_CLI +#define LEX_IDENT_CLI + +#include "my_global.h" +#include "m_ctype.h" + + +/** + A string with metadata. Usually points to a string in the client + character set, but unlike Lex_ident_cli_st (see below) it does not + necessarily point to a query fragment. It can also point to memory + of other kinds (e.g. an additional THD allocated memory buffer + not overlapping with the current query text). + + We'll add more flags here eventually, to know if the string has, e.g.: + - multi-byte characters + - bad byte sequences + - backslash escapes: 'a\nb' + and reuse the original query fragments instead of making the string + copy too early, in Lex_input_stream::get_text(). + This will allow to avoid unnecessary copying, as well as + create more optimal Item types in sql_yacc.yy +*/ +struct Lex_string_with_metadata_st: public LEX_CSTRING +{ +private: + bool m_is_8bit; // True if the string has 8bit characters + char m_quote; // Quote character, or 0 if not quoted +public: + void set_8bit(bool is_8bit) { m_is_8bit= is_8bit; } + void set_metadata(bool is_8bit, char quote) + { + m_is_8bit= is_8bit; + m_quote= quote; + } + void set(const char *s, size_t len, bool is_8bit, char quote) + { + str= s; + length= len; + set_metadata(is_8bit, quote); + } + void set(const LEX_CSTRING *s, bool is_8bit, char quote) + { + ((LEX_CSTRING &)*this)= *s; + set_metadata(is_8bit, quote); + } + bool is_8bit() const { return m_is_8bit; } + bool is_quoted() const { return m_quote != '\0'; } + char quote() const { return m_quote; } + // Get string repertoire by the 8-bit flag and the character set + my_repertoire_t repertoire(CHARSET_INFO *cs) const + { + return !m_is_8bit && my_charset_is_ascii_based(cs) ? + MY_REPERTOIRE_ASCII : MY_REPERTOIRE_UNICODE30; + } + // Get string repertoire by the 8-bit flag, for ASCII-based character sets + my_repertoire_t repertoire() const + { + return !m_is_8bit ? MY_REPERTOIRE_ASCII : MY_REPERTOIRE_UNICODE30; + } +}; + + +/* + Used to store identifiers in the client character set. + Points to a query fragment. +*/ +struct Lex_ident_cli_st: public Lex_string_with_metadata_st +{ +public: + void set_keyword(const char *s, size_t len) + { + set(s, len, false, '\0'); + } + void set_ident(const char *s, size_t len, bool is_8bit) + { + set(s, len, is_8bit, '\0'); + } + void set_ident_quoted(const char *s, size_t len, bool is_8bit, char quote) + { + set(s, len, is_8bit, quote); + } + void set_unquoted(const LEX_CSTRING *s, bool is_8bit) + { + set(s, is_8bit, '\0'); + } + const char *pos() const { return str - is_quoted(); } + const char *end() const { return str + length + is_quoted(); } +}; + + +class Lex_ident_cli: public Lex_ident_cli_st +{ +public: + Lex_ident_cli(const LEX_CSTRING *s, bool is_8bit) + { + set_unquoted(s, is_8bit); + } + Lex_ident_cli(const char *s, size_t len) + { + set_ident(s, len, false); + } +}; + +#endif // LEX_IDENT_CLI diff --git a/sql/lex_ident_sys.h b/sql/lex_ident_sys.h new file mode 100644 index 00000000000..e49c3bec3c2 --- /dev/null +++ b/sql/lex_ident_sys.h @@ -0,0 +1,75 @@ +/* Copyright (c) 2000, 2019, Oracle and/or its affiliates. + Copyright (c) 2010, 2024, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + + +#ifndef LEX_IDENT_SYS +#define LEX_IDENT_SYS + +#include "lex_ident_cli.h" +#include "sql_alloc.h" + +extern "C" MYSQL_PLUGIN_IMPORT CHARSET_INFO *system_charset_info; + +struct Lex_ident_sys_st: public LEX_CSTRING, Sql_alloc +{ +public: + bool copy_ident_cli(const THD *thd, const Lex_ident_cli_st *str); + bool copy_keyword(const THD *thd, const Lex_ident_cli_st *str); + bool copy_sys(const THD *thd, const LEX_CSTRING *str); + bool convert(const THD *thd, const LEX_CSTRING *str, CHARSET_INFO *cs); + bool copy_or_convert(const THD *thd, const Lex_ident_cli_st *str, + CHARSET_INFO *cs); + bool is_null() const { return str == NULL; } + bool to_size_number(ulonglong *to) const; + void set_valid_utf8(const LEX_CSTRING *name) + { + DBUG_ASSERT(Well_formed_prefix(system_charset_info, name->str, + name->length).length() == name->length); + str= name->str ; length= name->length; + } +}; + + +class Lex_ident_sys: public Lex_ident_sys_st +{ +public: + Lex_ident_sys(const THD *thd, const Lex_ident_cli_st *str) + { + if (copy_ident_cli(thd, str)) + *this= Lex_ident_sys(); + } + Lex_ident_sys() + { + ((LEX_CSTRING &) *this)= {nullptr, 0}; + } + Lex_ident_sys(const char *name, size_t length) + { + LEX_CSTRING tmp= {name, length}; + set_valid_utf8(&tmp); + } + Lex_ident_sys(const THD *thd, const LEX_CSTRING *str) + { + set_valid_utf8(str); + } + Lex_ident_sys & operator=(const Lex_ident_sys_st &name) + { + Lex_ident_sys_st::operator=(name); + return *this; + } +}; + + +#endif // LEX_IDENT_SYS diff --git a/sql/sql_lex.h b/sql/sql_lex.h index 3f4af2fa940..0cee544795c 100644 --- a/sql/sql_lex.h +++ b/sql/sql_lex.h @@ -21,6 +21,7 @@ #ifndef SQL_LEX_INCLUDED #define SQL_LEX_INCLUDED +#include "lex_ident_sys.h" #include "violite.h" /* SSL_type */ #include "sql_trigger.h" #include "thr_lock.h" /* thr_lock_type, TL_UNLOCK */ @@ -47,153 +48,6 @@ typedef Bitmap nesting_map; /* YACC and LEX Definitions */ -/** - A string with metadata. Usually points to a string in the client - character set, but unlike Lex_ident_cli_st (see below) it does not - necessarily point to a query fragment. It can also point to memory - of other kinds (e.g. an additional THD allocated memory buffer - not overlapping with the current query text). - - We'll add more flags here eventually, to know if the string has, e.g.: - - multi-byte characters - - bad byte sequences - - backslash escapes: 'a\nb' - and reuse the original query fragments instead of making the string - copy too early, in Lex_input_stream::get_text(). - This will allow to avoid unnecessary copying, as well as - create more optimal Item types in sql_yacc.yy -*/ -struct Lex_string_with_metadata_st: public LEX_CSTRING -{ -private: - bool m_is_8bit; // True if the string has 8bit characters - char m_quote; // Quote character, or 0 if not quoted -public: - void set_8bit(bool is_8bit) { m_is_8bit= is_8bit; } - void set_metadata(bool is_8bit, char quote) - { - m_is_8bit= is_8bit; - m_quote= quote; - } - void set(const char *s, size_t len, bool is_8bit, char quote) - { - str= s; - length= len; - set_metadata(is_8bit, quote); - } - void set(const LEX_CSTRING *s, bool is_8bit, char quote) - { - ((LEX_CSTRING &)*this)= *s; - set_metadata(is_8bit, quote); - } - bool is_8bit() const { return m_is_8bit; } - bool is_quoted() const { return m_quote != '\0'; } - char quote() const { return m_quote; } - // Get string repertoire by the 8-bit flag and the character set - my_repertoire_t repertoire(CHARSET_INFO *cs) const - { - return !m_is_8bit && my_charset_is_ascii_based(cs) ? - MY_REPERTOIRE_ASCII : MY_REPERTOIRE_UNICODE30; - } - // Get string repertoire by the 8-bit flag, for ASCII-based character sets - my_repertoire_t repertoire() const - { - return !m_is_8bit ? MY_REPERTOIRE_ASCII : MY_REPERTOIRE_UNICODE30; - } -}; - - -/* - Used to store identifiers in the client character set. - Points to a query fragment. -*/ -struct Lex_ident_cli_st: public Lex_string_with_metadata_st -{ -public: - void set_keyword(const char *s, size_t len) - { - set(s, len, false, '\0'); - } - void set_ident(const char *s, size_t len, bool is_8bit) - { - set(s, len, is_8bit, '\0'); - } - void set_ident_quoted(const char *s, size_t len, bool is_8bit, char quote) - { - set(s, len, is_8bit, quote); - } - void set_unquoted(const LEX_CSTRING *s, bool is_8bit) - { - set(s, is_8bit, '\0'); - } - const char *pos() const { return str - is_quoted(); } - const char *end() const { return str + length + is_quoted(); } -}; - - -class Lex_ident_cli: public Lex_ident_cli_st -{ -public: - Lex_ident_cli(const LEX_CSTRING *s, bool is_8bit) - { - set_unquoted(s, is_8bit); - } - Lex_ident_cli(const char *s, size_t len) - { - set_ident(s, len, false); - } -}; - - -struct Lex_ident_sys_st: public LEX_CSTRING, Sql_alloc -{ -public: - bool copy_ident_cli(const THD *thd, const Lex_ident_cli_st *str); - bool copy_keyword(const THD *thd, const Lex_ident_cli_st *str); - bool copy_sys(const THD *thd, const LEX_CSTRING *str); - bool convert(const THD *thd, const LEX_CSTRING *str, CHARSET_INFO *cs); - bool copy_or_convert(const THD *thd, const Lex_ident_cli_st *str, - CHARSET_INFO *cs); - bool is_null() const { return str == NULL; } - bool to_size_number(ulonglong *to) const; - void set_valid_utf8(const LEX_CSTRING *name) - { - DBUG_ASSERT(Well_formed_prefix(system_charset_info, name->str, - name->length).length() == name->length); - str= name->str ; length= name->length; - } -}; - - -class Lex_ident_sys: public Lex_ident_sys_st -{ -public: - Lex_ident_sys(const THD *thd, const Lex_ident_cli_st *str) - { - if (copy_ident_cli(thd, str)) - ((LEX_CSTRING &) *this)= null_clex_str; - } - Lex_ident_sys() - { - ((LEX_CSTRING &) *this)= null_clex_str; - } - Lex_ident_sys(const char *name, size_t length) - { - LEX_CSTRING tmp= {name, length}; - set_valid_utf8(&tmp); - } - Lex_ident_sys(const THD *thd, const LEX_CSTRING *str) - { - set_valid_utf8(str); - } - Lex_ident_sys & operator=(const Lex_ident_sys_st &name) - { - Lex_ident_sys_st::operator=(name); - return *this; - } -}; - - struct Lex_column_list_privilege_st { List *m_columns; diff --git a/sql/sql_string.h b/sql/sql_string.h index 0c14290c312..e815534a33e 100644 --- a/sql/sql_string.h +++ b/sql/sql_string.h @@ -55,48 +55,6 @@ inline uint32 copy_and_convert(char *to, size_t to_length, CHARSET_INFO *to_cs, } -class String_copy_status: protected MY_STRCOPY_STATUS -{ -public: - const char *source_end_pos() const - { return m_source_end_pos; } - const char *well_formed_error_pos() const - { return m_well_formed_error_pos; } -}; - - -class Well_formed_prefix_status: public String_copy_status -{ -public: - Well_formed_prefix_status(CHARSET_INFO *cs, - const char *str, const char *end, size_t nchars) - { cs->well_formed_char_length(str, end, nchars, this); } -}; - - -class Well_formed_prefix: public Well_formed_prefix_status -{ - const char *m_str; // The beginning of the string -public: - Well_formed_prefix(CHARSET_INFO *cs, const char *str, const char *end, - size_t nchars) - :Well_formed_prefix_status(cs, str, end, nchars), m_str(str) - { } - Well_formed_prefix(CHARSET_INFO *cs, const char *str, size_t length, - size_t nchars) - :Well_formed_prefix_status(cs, str, str + length, nchars), m_str(str) - { } - Well_formed_prefix(CHARSET_INFO *cs, const char *str, size_t length) - :Well_formed_prefix_status(cs, str, str + length, length), m_str(str) - { } - Well_formed_prefix(CHARSET_INFO *cs, LEX_CSTRING str, size_t nchars) - :Well_formed_prefix_status(cs, str.str, str.str + str.length, nchars), - m_str(str.str) - { } - size_t length() const { return m_source_end_pos - m_str; } -}; - - class String_copier: public String_copy_status, protected MY_STRCONV_STATUS {