mirror of
				https://github.com/MariaDB/server.git
				synced 2025-10-31 19:06:14 +01:00 
			
		
		
		
	 bd30c796fa
			
		
	
	
	bd30c796fa
	
	
	
		
			
			- Using Lex_ident_sys to scan identifiers, like the SQL parser does.
  This fixes handling of double-quote-delimited and backtick-delimited identifiers,
  as well as handling of non-ASCII identifiers.
  Unescaping and converting from the client character set to the system
  character set is now done using Lex_ident_cli_st and Lex_ident_sys,
  like it's done in the SQL tokenizer/parser.
  Adding helper methods to_ident_cli() and to_ident_sys()
  in Optimizer_hint_parser::Token.
- Fixing the hint parser to report a syntax error when an empty identifiers:
    SELECT /*+ BKA(``) */ * FROM t1;
- Moving a part of the code from opt_hints_parser.h to opt_hints_parser.cc
  Moving these method definitions:
  - Optimizer_hint_tokenizer::find_keyword()
  - Optimizer_hint_tokenizer::get_token()
  to avoid huge pieces of the code in the header file.
- A Lex_ident_cli_st cleanup
  Fixing a few Lex_ident_cli_st methods to return Lex_ident_cli_st &
  instead of void, to use them easier in the caller code.
- Fixing the hint parser to display the correct line number
  Adding a new data type Lex_comment_st
  (a combination of LEX_CSTRING and a line number)
  Using it in sql_yacc.yy
- Getting rid of redundant dependencies on sql_hints_parser.h
  Moving void LEX::resolve_optimizer_hints() from sql_lex.h to sql_lex.cc
  Adding a class Optimizer_hint_parser_output, deriving from
  Optimizer_hint_parser::Hint_list. Fixing the hint parser to
  return a pointer to an allocated instance of Optimizer_hint_parser_output
  rather than an instance of Optimizer_hint_parser::Hint_list.
  This allows to use a forward declaration of Optimizer_hint_parser_output
  in sql_lex.h and thus avoid dependencies on sql_hints_parser.h.
		
	
			
		
			
				
	
	
		
			127 lines
		
	
	
	
		
			3.8 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			127 lines
		
	
	
	
		
			3.8 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
| /* Copyright (c) 2000, 2019, Oracle and/or its affiliates.
 | |
|    Copyright (c) 2010, 2024, MariaDB Corporation.
 | |
| 
 | |
|    This program is free software; you can redistribute it and/or modify
 | |
|    it under the terms of the GNU General Public License as published by
 | |
|    the Free Software Foundation; version 2 of the License.
 | |
| 
 | |
|    This program is distributed in the hope that it will be useful,
 | |
|    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | |
|    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | |
|    GNU General Public License for more details.
 | |
| 
 | |
|    You should have received a copy of the GNU General Public License
 | |
|    along with this program; if not, write to the Free Software
 | |
|    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335  USA */
 | |
| 
 | |
| 
 | |
| #ifndef LEX_IDENT_CLI
 | |
| #define LEX_IDENT_CLI
 | |
| 
 | |
| #include "my_global.h"
 | |
| #include "m_ctype.h"
 | |
| 
 | |
| 
 | |
| /**
 | |
|   A string with metadata. Usually points to a string in the client
 | |
|   character set, but unlike Lex_ident_cli_st (see below) it does not
 | |
|   necessarily point to a query fragment. It can also point to memory
 | |
|   of other kinds (e.g. an additional THD allocated memory buffer
 | |
|   not overlapping with the current query text).
 | |
| 
 | |
|   We'll add more flags here eventually, to know if the string has, e.g.:
 | |
|   - multi-byte characters
 | |
|   - bad byte sequences
 | |
|   - backslash escapes:   'a\nb'
 | |
|   and reuse the original query fragments instead of making the string
 | |
|   copy too early, in Lex_input_stream::get_text().
 | |
|   This will allow to avoid unnecessary copying, as well as
 | |
|   create more optimal Item types in sql_yacc.yy
 | |
| */
 | |
| struct Lex_string_with_metadata_st: public LEX_CSTRING
 | |
| {
 | |
| private:
 | |
|   bool m_is_8bit; // True if the string has 8bit characters
 | |
|   char m_quote;   // Quote character, or 0 if not quoted
 | |
| public:
 | |
|   void set_8bit(bool is_8bit) { m_is_8bit= is_8bit; }
 | |
|   void set_metadata(bool is_8bit, char quote)
 | |
|   {
 | |
|     m_is_8bit= is_8bit;
 | |
|     m_quote= quote;
 | |
|   }
 | |
|   void set(const char *s, size_t len, bool is_8bit, char quote)
 | |
|   {
 | |
|     str= s;
 | |
|     length= len;
 | |
|     set_metadata(is_8bit, quote);
 | |
|   }
 | |
|   void set(const LEX_CSTRING *s, bool is_8bit, char quote)
 | |
|   {
 | |
|     ((LEX_CSTRING &)*this)= *s;
 | |
|     set_metadata(is_8bit, quote);
 | |
|   }
 | |
|   bool is_8bit() const { return m_is_8bit; }
 | |
|   bool is_quoted() const { return m_quote != '\0'; }
 | |
|   char quote() const { return m_quote; }
 | |
|   // Get string repertoire by the 8-bit flag and the character set
 | |
|   my_repertoire_t repertoire(CHARSET_INFO *cs) const
 | |
|   {
 | |
|     return !m_is_8bit && my_charset_is_ascii_based(cs) ?
 | |
|            MY_REPERTOIRE_ASCII : MY_REPERTOIRE_UNICODE30;
 | |
|   }
 | |
|   // Get string repertoire by the 8-bit flag, for ASCII-based character sets
 | |
|   my_repertoire_t repertoire() const
 | |
|   {
 | |
|     return !m_is_8bit ? MY_REPERTOIRE_ASCII : MY_REPERTOIRE_UNICODE30;
 | |
|   }
 | |
| };
 | |
| 
 | |
| 
 | |
| /*
 | |
|   Used to store identifiers in the client character set.
 | |
|   Points to a query fragment.
 | |
| */
 | |
| struct Lex_ident_cli_st: public Lex_string_with_metadata_st
 | |
| {
 | |
| public:
 | |
|   Lex_ident_cli_st & set_keyword(const char *s, size_t len)
 | |
|   {
 | |
|     set(s, len, false, '\0');
 | |
|     return *this;
 | |
|   }
 | |
|   Lex_ident_cli_st & set_ident(const char *s, size_t len, bool is_8bit)
 | |
|   {
 | |
|     set(s, len, is_8bit, '\0');
 | |
|     return *this;
 | |
|   }
 | |
|   Lex_ident_cli_st & set_ident_quoted(const char *s, size_t len,
 | |
|                                       bool is_8bit, char quote)
 | |
|   {
 | |
|     set(s, len, is_8bit, quote);
 | |
|     return *this;
 | |
|   }
 | |
|   Lex_ident_cli_st & set_unquoted(const LEX_CSTRING *s, bool is_8bit)
 | |
|   {
 | |
|     set(s, is_8bit, '\0');
 | |
|     return *this;
 | |
|   }
 | |
|   const char *pos() const { return str - is_quoted(); }
 | |
|   const char *end() const { return str + length + is_quoted(); }
 | |
| };
 | |
| 
 | |
| 
 | |
| class Lex_ident_cli: public Lex_ident_cli_st
 | |
| {
 | |
| public:
 | |
|   Lex_ident_cli(const LEX_CSTRING *s, bool is_8bit)
 | |
|   {
 | |
|     set_unquoted(s, is_8bit);
 | |
|   }
 | |
|   Lex_ident_cli(const char *s, size_t len)
 | |
|   {
 | |
|     set_ident(s, len, false);
 | |
|   }
 | |
| };
 | |
| 
 | |
| #endif // LEX_IDENT_CLI
 |