mariadb/storage/innobase/include/fts0ast.h
2020-10-22 17:08:49 +03:00

340 lines
10 KiB
C

/*****************************************************************************
Copyright (c) 2007, 2018, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2016, 2020, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
*****************************************************************************/
/******************************************************************//**
@file include/fts0ast.h
The FTS query parser (AST) abstract syntax tree routines
Created 2007/03/16/03 Sunny Bains
*******************************************************/
#ifndef INNOBASE_FST0AST_H
#define INNOBASE_FST0AST_H
#include "mem0mem.h"
/* The type of AST Node */
enum fts_ast_type_t {
FTS_AST_OPER, /*!< Operator */
FTS_AST_NUMB, /*!< Number */
FTS_AST_TERM, /*!< Term (or word) */
FTS_AST_TEXT, /*!< Text string */
FTS_AST_PARSER_PHRASE_LIST, /*!< Phase for plugin parser
The difference from text type
is that we tokenize text into
term list */
FTS_AST_LIST, /*!< Expression list */
FTS_AST_SUBEXP_LIST /*!< Sub-Expression list */
};
/* The FTS query operators that we support */
enum fts_ast_oper_t {
FTS_NONE, /*!< No operator */
FTS_IGNORE, /*!< Ignore rows that contain
this word */
FTS_EXIST, /*!< Include rows that contain
this word */
FTS_NEGATE, /*!< Include rows that contain
this word but rank them
lower*/
FTS_INCR_RATING, /*!< Increase the rank for this
word*/
FTS_DECR_RATING, /*!< Decrease the rank for this
word*/
FTS_DISTANCE, /*!< Proximity distance */
FTS_IGNORE_SKIP, /*!< Transient node operator
signifies that this is a
FTS_IGNORE node, and ignored in
the first pass of
fts_ast_visit() */
FTS_EXIST_SKIP /*!< Transient node operator
signifies that this ia a
FTS_EXIST node, and ignored in
the first pass of
fts_ast_visit() */
};
/* Data types used by the FTS parser */
struct fts_lexer_t;
struct fts_ast_node_t;
struct fts_ast_state_t;
struct fts_ast_string_t;
typedef dberr_t (*fts_ast_callback)(fts_ast_oper_t, fts_ast_node_t*, void*);
/********************************************************************
Parse the string using the lexer setup within state.*/
int
fts_parse(
/*======*/
/* out: 0 on OK, 1 on error */
fts_ast_state_t* state); /*!< in: ast state instance.*/
/********************************************************************
Create an AST operator node */
extern
fts_ast_node_t*
fts_ast_create_node_oper(
/*=====================*/
void* arg, /*!< in: ast state */
fts_ast_oper_t oper); /*!< in: ast operator */
/********************************************************************
Create an AST term node, makes a copy of ptr */
extern
fts_ast_node_t*
fts_ast_create_node_term(
/*=====================*/
void* arg, /*!< in: ast state */
const fts_ast_string_t* ptr); /*!< in: term string */
/********************************************************************
Create an AST text node */
extern
fts_ast_node_t*
fts_ast_create_node_text(
/*=====================*/
void* arg, /*!< in: ast state */
const fts_ast_string_t* ptr); /*!< in: text string */
/********************************************************************
Create an AST expr list node */
extern
fts_ast_node_t*
fts_ast_create_node_list(
/*=====================*/
void* arg, /*!< in: ast state */
fts_ast_node_t* expr); /*!< in: ast expr */
/********************************************************************
Create a sub-expression list node. This function takes ownership of
expr and is responsible for deleting it. */
extern
fts_ast_node_t*
fts_ast_create_node_subexp_list(
/*============================*/
/* out: new node */
void* arg, /*!< in: ast state instance */
fts_ast_node_t* expr); /*!< in: ast expr instance */
/********************************************************************
Set the wildcard attribute of a term.*/
extern
void
fts_ast_term_set_wildcard(
/*======================*/
fts_ast_node_t* node); /*!< in: term to change */
/********************************************************************
Set the proximity attribute of a text node. */
void
fts_ast_text_set_distance(
/*======================*/
fts_ast_node_t* node, /*!< in/out: text node */
ulint distance); /*!< in: the text proximity
distance */
/********************************************************************//**
Free a fts_ast_node_t instance.
@return next node to free */
fts_ast_node_t*
fts_ast_free_node(
/*==============*/
fts_ast_node_t* node); /*!< in: node to free */
/********************************************************************
Add a sub-expression to an AST*/
extern
fts_ast_node_t*
fts_ast_add_node(
/*=============*/
fts_ast_node_t* list, /*!< in: list node instance */
fts_ast_node_t* node); /*!< in: (sub) expr to add */
/********************************************************************
Print the AST node recursively.*/
extern
void
fts_ast_node_print(
/*===============*/
fts_ast_node_t* node); /*!< in: ast node to print */
/********************************************************************
Free node and expr allocations.*/
extern
void
fts_ast_state_free(
/*===============*/
fts_ast_state_t*state); /*!< in: state instance
to free */
/** Check only union operation involved in the node
@param[in] node ast node to check
@return true if the node contains only union else false. */
bool
fts_ast_node_check_union(
fts_ast_node_t* node);
/******************************************************************//**
Traverse the AST - in-order traversal.
@return DB_SUCCESS if all went well */
dberr_t
fts_ast_visit(
/*==========*/
fts_ast_oper_t oper, /*!< in: FTS operator */
fts_ast_node_t* node, /*!< in: instance to traverse*/
fts_ast_callback visitor, /*!< in: callback */
void* arg, /*!< in: callback arg */
bool* has_ignore) /*!< out: whether we encounter
and ignored processing an
operator, currently we only
ignore FTS_IGNORE operator */
MY_ATTRIBUTE((nonnull, warn_unused_result));
/********************************************************************
Create a lex instance.*/
fts_lexer_t*
fts_lexer_create(
/*=============*/
ibool boolean_mode, /*!< in: query type */
const byte* query, /*!< in: query string */
ulint query_len) /*!< in: query string len */
MY_ATTRIBUTE((nonnull, malloc, warn_unused_result));
/********************************************************************
Free an fts_lexer_t instance.*/
void
fts_lexer_free(
/*===========*/
fts_lexer_t* fts_lexer) /*!< in: lexer instance to
free */
MY_ATTRIBUTE((nonnull));
/**
Create an ast string object, with NUL-terminator, so the string
has one more byte than len
@param[in] str pointer to string
@param[in] len length of the string
@return ast string with NUL-terminator */
fts_ast_string_t*
fts_ast_string_create(
const byte* str,
ulint len);
/**
Free an ast string instance
@param[in,out] ast_str string to free */
void
fts_ast_string_free(
fts_ast_string_t* ast_str);
/**
Translate ast string of type FTS_AST_NUMB to unsigned long by strtoul
@param[in] str string to translate
@param[in] base the base
@return translated number */
ulint
fts_ast_string_to_ul(
const fts_ast_string_t* ast_str,
int base);
/* String of length len.
We always store the string of length len with a terminating '\0',
regardless of there is any 0x00 in the string itself */
struct fts_ast_string_t {
/*!< Pointer to string. */
byte* str;
/*!< Length of the string. */
ulint len;
};
/* Query term type */
struct fts_ast_term_t {
fts_ast_string_t* ptr; /*!< Pointer to term string.*/
ibool wildcard; /*!< TRUE if wild card set.*/
};
/* Query text type */
struct fts_ast_text_t {
fts_ast_string_t* ptr; /*!< Pointer to text string.*/
ulint distance; /*!< > 0 if proximity distance
set */
};
/* The list of nodes in an expr list */
struct fts_ast_list_t {
fts_ast_node_t* head; /*!< Children list head */
fts_ast_node_t* tail; /*!< Children list tail */
};
/* FTS AST node to store the term, text, operator and sub-expressions.*/
struct fts_ast_node_t {
fts_ast_type_t type; /*!< The type of node */
fts_ast_text_t text; /*!< Text node */
fts_ast_term_t term; /*!< Term node */
fts_ast_oper_t oper; /*!< Operator value */
fts_ast_list_t list; /*!< Expression list */
fts_ast_node_t* next; /*!< Link for expr list */
fts_ast_node_t* next_alloc; /*!< For tracking allocations */
bool visited; /*!< whether this node is
already processed */
/** current transaction */
const trx_t* trx;
/* Used by plugin parser */
fts_ast_node_t* up_node; /*!< Direct up node */
bool go_up; /*!< Flag if go one level up */
};
/* To track state during parsing */
struct fts_ast_state_t {
mem_heap_t* heap; /*!< Heap to use for alloc */
fts_ast_node_t* root; /*!< If all goes OK, then this
will point to the root.*/
fts_ast_list_t list; /*!< List of nodes allocated */
fts_lexer_t* lexer; /*!< Lexer callback + arg */
CHARSET_INFO* charset; /*!< charset used for
tokenization */
/* Used by plugin parser */
fts_ast_node_t* cur_node; /*!< Current node into which
we add new node */
int depth; /*!< Depth of parsing state */
};
/******************************************************************//**
Create an AST term node, makes a copy of ptr for plugin parser
@return node */
extern
fts_ast_node_t*
fts_ast_create_node_term_for_parser(
/*==========i=====================*/
void* arg, /*!< in: ast state */
const char* ptr, /*!< in: term string */
const ulint len); /*!< in: term string length */
/******************************************************************//**
Create an AST phrase list node for plugin parser
@return node */
extern
fts_ast_node_t*
fts_ast_create_node_phrase_list(
/*============================*/
void* arg); /*!< in: ast state */
#ifdef UNIV_DEBUG
const char*
fts_ast_node_type_get(fts_ast_type_t type);
#endif /* UNIV_DEBUG */
#endif /* INNOBASE_FSTS0AST_H */