mariadb/sql/lex_ident_cli.h
Alexander Barkov 8879474654 MDEV-33281 Implement optimizer hints
- Using Lex_ident_sys to scan identifiers, like the SQL parser does.

  This fixes handling of double-quote-delimited and backtick-delimited identifiers,
  as well as handling of non-ASCII identifiers.

  Unescaping and converting from the client character set to the system
  character set is now done using Lex_ident_cli_st and Lex_ident_sys,
  like it's done in the SQL tokenizer/parser.
  Adding helper methods to_ident_cli() and to_ident_sys()
  in Optimizer_hint_parser::Token.

- Fixing the hint parser to report a syntax error when an empty identifiers:
    SELECT /*+ BKA(``) */ * FROM t1;

- Moving a part of the code from opt_hints_parser.h to opt_hints_parser.cc

  Moving these method definitions:
  - Optimizer_hint_tokenizer::find_keyword()
  - Optimizer_hint_tokenizer::get_token()

  to avoid huge pieces of the code in the header file.

- A Lex_ident_cli_st cleanup
  Fixing a few Lex_ident_cli_st methods to return Lex_ident_cli_st &
  instead of void, to use them easier in the caller code.

- Fixing the hint parser to display the correct line number

  Adding a new data type Lex_comment_st
  (a combination of LEX_CSTRING and a line number)
  Using it in sql_yacc.yy

- Getting rid of redundant dependencies on sql_hints_parser.h

  Moving void LEX::resolve_optimizer_hints() from sql_lex.h to sql_lex.cc

  Adding a class Optimizer_hint_parser_output, deriving from
  Optimizer_hint_parser::Hint_list. Fixing the hint parser to
  return a pointer to an allocated instance of Optimizer_hint_parser_output
  rather than an instance of Optimizer_hint_parser::Hint_list.
  This allows to use a forward declaration of Optimizer_hint_parser_output
  in sql_lex.h and thus avoid dependencies on sql_hints_parser.h.
2025-03-18 18:28:18 +01:00

127 lines
3.8 KiB
C++

/* Copyright (c) 2000, 2019, Oracle and/or its affiliates.
Copyright (c) 2010, 2024, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */
#ifndef LEX_IDENT_CLI
#define LEX_IDENT_CLI
#include "my_global.h"
#include "m_ctype.h"
/**
A string with metadata. Usually points to a string in the client
character set, but unlike Lex_ident_cli_st (see below) it does not
necessarily point to a query fragment. It can also point to memory
of other kinds (e.g. an additional THD allocated memory buffer
not overlapping with the current query text).
We'll add more flags here eventually, to know if the string has, e.g.:
- multi-byte characters
- bad byte sequences
- backslash escapes: 'a\nb'
and reuse the original query fragments instead of making the string
copy too early, in Lex_input_stream::get_text().
This will allow to avoid unnecessary copying, as well as
create more optimal Item types in sql_yacc.yy
*/
struct Lex_string_with_metadata_st: public LEX_CSTRING
{
private:
bool m_is_8bit; // True if the string has 8bit characters
char m_quote; // Quote character, or 0 if not quoted
public:
void set_8bit(bool is_8bit) { m_is_8bit= is_8bit; }
void set_metadata(bool is_8bit, char quote)
{
m_is_8bit= is_8bit;
m_quote= quote;
}
void set(const char *s, size_t len, bool is_8bit, char quote)
{
str= s;
length= len;
set_metadata(is_8bit, quote);
}
void set(const LEX_CSTRING *s, bool is_8bit, char quote)
{
((LEX_CSTRING &)*this)= *s;
set_metadata(is_8bit, quote);
}
bool is_8bit() const { return m_is_8bit; }
bool is_quoted() const { return m_quote != '\0'; }
char quote() const { return m_quote; }
// Get string repertoire by the 8-bit flag and the character set
my_repertoire_t repertoire(CHARSET_INFO *cs) const
{
return !m_is_8bit && my_charset_is_ascii_based(cs) ?
MY_REPERTOIRE_ASCII : MY_REPERTOIRE_UNICODE30;
}
// Get string repertoire by the 8-bit flag, for ASCII-based character sets
my_repertoire_t repertoire() const
{
return !m_is_8bit ? MY_REPERTOIRE_ASCII : MY_REPERTOIRE_UNICODE30;
}
};
/*
Used to store identifiers in the client character set.
Points to a query fragment.
*/
struct Lex_ident_cli_st: public Lex_string_with_metadata_st
{
public:
Lex_ident_cli_st & set_keyword(const char *s, size_t len)
{
set(s, len, false, '\0');
return *this;
}
Lex_ident_cli_st & set_ident(const char *s, size_t len, bool is_8bit)
{
set(s, len, is_8bit, '\0');
return *this;
}
Lex_ident_cli_st & set_ident_quoted(const char *s, size_t len,
bool is_8bit, char quote)
{
set(s, len, is_8bit, quote);
return *this;
}
Lex_ident_cli_st & set_unquoted(const LEX_CSTRING *s, bool is_8bit)
{
set(s, is_8bit, '\0');
return *this;
}
const char *pos() const { return str - is_quoted(); }
const char *end() const { return str + length + is_quoted(); }
};
class Lex_ident_cli: public Lex_ident_cli_st
{
public:
Lex_ident_cli(const LEX_CSTRING *s, bool is_8bit)
{
set_unquoted(s, is_8bit);
}
Lex_ident_cli(const char *s, size_t len)
{
set_ident(s, len, false);
}
};
#endif // LEX_IDENT_CLI