mirror of
				https://github.com/MariaDB/server.git
				synced 2025-11-03 20:36:16 +01:00 
			
		
		
		
	
		
			
				
	
	
		
			340 lines
		
	
	
	
		
			10 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			340 lines
		
	
	
	
		
			10 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
/*****************************************************************************
 | 
						|
 | 
						|
Copyright (c) 2007, 2018, Oracle and/or its affiliates. All Rights Reserved.
 | 
						|
Copyright (c) 2016, 2020, MariaDB Corporation.
 | 
						|
 | 
						|
This program is free software; you can redistribute it and/or modify it under
 | 
						|
the terms of the GNU General Public License as published by the Free Software
 | 
						|
Foundation; version 2 of the License.
 | 
						|
 | 
						|
This program is distributed in the hope that it will be useful, but WITHOUT
 | 
						|
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 | 
						|
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 | 
						|
 | 
						|
You should have received a copy of the GNU General Public License along with
 | 
						|
this program; if not, write to the Free Software Foundation, Inc.,
 | 
						|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
 | 
						|
 | 
						|
*****************************************************************************/
 | 
						|
 | 
						|
/******************************************************************//**
 | 
						|
@file include/fts0ast.h
 | 
						|
The FTS query parser (AST) abstract syntax tree routines
 | 
						|
 | 
						|
Created 2007/03/16/03 Sunny Bains
 | 
						|
*******************************************************/
 | 
						|
 | 
						|
#ifndef INNOBASE_FST0AST_H
 | 
						|
#define INNOBASE_FST0AST_H
 | 
						|
 | 
						|
#include "mem0mem.h"
 | 
						|
 | 
						|
/* The type of AST Node */
 | 
						|
enum fts_ast_type_t {
 | 
						|
	FTS_AST_OPER,				/*!< Operator */
 | 
						|
	FTS_AST_NUMB,				/*!< Number */
 | 
						|
	FTS_AST_TERM,				/*!< Term (or word) */
 | 
						|
	FTS_AST_TEXT,				/*!< Text string */
 | 
						|
	FTS_AST_PARSER_PHRASE_LIST,		/*!< Phase for plugin parser
 | 
						|
						The difference from text type
 | 
						|
						is that we tokenize text into
 | 
						|
						term list */
 | 
						|
	FTS_AST_LIST,				/*!< Expression list */
 | 
						|
	FTS_AST_SUBEXP_LIST			/*!< Sub-Expression list */
 | 
						|
};
 | 
						|
 | 
						|
/* The FTS query operators that we support */
 | 
						|
enum fts_ast_oper_t {
 | 
						|
	FTS_NONE,				/*!< No operator */
 | 
						|
 | 
						|
	FTS_IGNORE,				/*!< Ignore rows that contain
 | 
						|
						this word */
 | 
						|
 | 
						|
	FTS_EXIST,				/*!< Include rows that contain
 | 
						|
						this word */
 | 
						|
 | 
						|
	FTS_NEGATE,				/*!< Include rows that contain
 | 
						|
						this word but rank them
 | 
						|
						lower*/
 | 
						|
 | 
						|
	FTS_INCR_RATING,			/*!< Increase the rank for this
 | 
						|
						word*/
 | 
						|
 | 
						|
	FTS_DECR_RATING,			/*!< Decrease the rank for this
 | 
						|
						word*/
 | 
						|
 | 
						|
	FTS_DISTANCE,				/*!< Proximity distance */
 | 
						|
	FTS_IGNORE_SKIP,			/*!< Transient node operator
 | 
						|
						signifies that this is a
 | 
						|
						FTS_IGNORE node, and ignored in
 | 
						|
						the first pass of
 | 
						|
						fts_ast_visit() */
 | 
						|
	FTS_EXIST_SKIP				/*!< Transient node operator
 | 
						|
						signifies that this ia a
 | 
						|
						FTS_EXIST node, and ignored in
 | 
						|
						the first pass of
 | 
						|
						fts_ast_visit() */
 | 
						|
};
 | 
						|
 | 
						|
/* Data types used by the FTS parser */
 | 
						|
struct fts_lexer_t;
 | 
						|
struct fts_ast_node_t;
 | 
						|
struct fts_ast_state_t;
 | 
						|
struct fts_ast_string_t;
 | 
						|
 | 
						|
typedef dberr_t (*fts_ast_callback)(fts_ast_oper_t, fts_ast_node_t*, void*);
 | 
						|
 | 
						|
/********************************************************************
 | 
						|
Parse the string using the lexer setup within state.*/
 | 
						|
int
 | 
						|
fts_parse(
 | 
						|
/*======*/
 | 
						|
						/* out: 0 on OK, 1 on error */
 | 
						|
	fts_ast_state_t* state);		/*!< in: ast state instance.*/
 | 
						|
 | 
						|
/********************************************************************
 | 
						|
Create an AST operator node */
 | 
						|
extern
 | 
						|
fts_ast_node_t*
 | 
						|
fts_ast_create_node_oper(
 | 
						|
/*=====================*/
 | 
						|
	void*		arg,			/*!< in: ast state */
 | 
						|
	fts_ast_oper_t	oper);			/*!< in: ast operator */
 | 
						|
/********************************************************************
 | 
						|
Create an AST term node, makes a copy of ptr */
 | 
						|
extern
 | 
						|
fts_ast_node_t*
 | 
						|
fts_ast_create_node_term(
 | 
						|
/*=====================*/
 | 
						|
	void*			arg,		/*!< in: ast state */
 | 
						|
	const fts_ast_string_t*	ptr);		/*!< in: term string */
 | 
						|
/********************************************************************
 | 
						|
Create an AST text node */
 | 
						|
extern
 | 
						|
fts_ast_node_t*
 | 
						|
fts_ast_create_node_text(
 | 
						|
/*=====================*/
 | 
						|
	void*			arg,		/*!< in: ast state */
 | 
						|
	const fts_ast_string_t*	ptr);		/*!< in: text string */
 | 
						|
/********************************************************************
 | 
						|
Create an AST expr list node */
 | 
						|
extern
 | 
						|
fts_ast_node_t*
 | 
						|
fts_ast_create_node_list(
 | 
						|
/*=====================*/
 | 
						|
	void*		arg,			/*!< in: ast state */
 | 
						|
	fts_ast_node_t*	expr);			/*!< in: ast expr */
 | 
						|
/********************************************************************
 | 
						|
Create a sub-expression list node. This function takes ownership of
 | 
						|
expr and is responsible for deleting it. */
 | 
						|
extern
 | 
						|
fts_ast_node_t*
 | 
						|
fts_ast_create_node_subexp_list(
 | 
						|
/*============================*/
 | 
						|
						/* out: new node */
 | 
						|
	void*		arg,			/*!< in: ast state instance */
 | 
						|
	fts_ast_node_t*	expr);			/*!< in: ast expr instance */
 | 
						|
/********************************************************************
 | 
						|
Set the wildcard attribute of a term.*/
 | 
						|
extern
 | 
						|
void
 | 
						|
fts_ast_term_set_wildcard(
 | 
						|
/*======================*/
 | 
						|
	fts_ast_node_t*	node);			/*!< in: term to change */
 | 
						|
/********************************************************************
 | 
						|
Set the proximity attribute of a text node. */
 | 
						|
void
 | 
						|
fts_ast_text_set_distance(
 | 
						|
/*======================*/
 | 
						|
	fts_ast_node_t*	node,			/*!< in/out: text node */
 | 
						|
	ulint		distance);		/*!< in: the text proximity
 | 
						|
						distance */
 | 
						|
/********************************************************************//**
 | 
						|
Free a fts_ast_node_t instance.
 | 
						|
@return next node to free */
 | 
						|
fts_ast_node_t*
 | 
						|
fts_ast_free_node(
 | 
						|
/*==============*/
 | 
						|
	fts_ast_node_t*	node);			/*!< in: node to free */
 | 
						|
/********************************************************************
 | 
						|
Add a sub-expression to an AST*/
 | 
						|
extern
 | 
						|
fts_ast_node_t*
 | 
						|
fts_ast_add_node(
 | 
						|
/*=============*/
 | 
						|
	fts_ast_node_t*	list,			/*!< in: list node instance */
 | 
						|
	fts_ast_node_t*	node);			/*!< in: (sub) expr to add */
 | 
						|
/********************************************************************
 | 
						|
Print the AST node recursively.*/
 | 
						|
extern
 | 
						|
void
 | 
						|
fts_ast_node_print(
 | 
						|
/*===============*/
 | 
						|
	fts_ast_node_t*	node);			/*!< in: ast node to print */
 | 
						|
/********************************************************************
 | 
						|
Free node and expr allocations.*/
 | 
						|
extern
 | 
						|
void
 | 
						|
fts_ast_state_free(
 | 
						|
/*===============*/
 | 
						|
	fts_ast_state_t*state);			/*!< in: state instance
 | 
						|
						to free */
 | 
						|
/** Check only union operation involved in the node
 | 
						|
@param[in]	node	ast node to check
 | 
						|
@return true if the node contains only union else false. */
 | 
						|
bool
 | 
						|
fts_ast_node_check_union(
 | 
						|
	fts_ast_node_t*	node);
 | 
						|
 | 
						|
/******************************************************************//**
 | 
						|
Traverse the AST - in-order traversal.
 | 
						|
@return DB_SUCCESS if all went well */
 | 
						|
dberr_t
 | 
						|
fts_ast_visit(
 | 
						|
/*==========*/
 | 
						|
	fts_ast_oper_t		oper,		/*!< in: FTS operator */
 | 
						|
	fts_ast_node_t*		node,		/*!< in: instance to traverse*/
 | 
						|
	fts_ast_callback	visitor,	/*!< in: callback */
 | 
						|
	void*			arg,		/*!< in: callback arg */
 | 
						|
	bool*			has_ignore)	/*!< out: whether we encounter
 | 
						|
						and ignored processing an
 | 
						|
						operator, currently we only
 | 
						|
						ignore FTS_IGNORE operator */
 | 
						|
	MY_ATTRIBUTE((nonnull, warn_unused_result));
 | 
						|
/********************************************************************
 | 
						|
Create a lex instance.*/
 | 
						|
fts_lexer_t*
 | 
						|
fts_lexer_create(
 | 
						|
/*=============*/
 | 
						|
	ibool		boolean_mode,		/*!< in: query type */
 | 
						|
	const byte*	query,			/*!< in: query string */
 | 
						|
	ulint		query_len)		/*!< in: query string len */
 | 
						|
	MY_ATTRIBUTE((nonnull, malloc, warn_unused_result));
 | 
						|
/********************************************************************
 | 
						|
Free an fts_lexer_t instance.*/
 | 
						|
void
 | 
						|
fts_lexer_free(
 | 
						|
/*===========*/
 | 
						|
	fts_lexer_t*	fts_lexer)		/*!< in: lexer instance to
 | 
						|
						free */
 | 
						|
	MY_ATTRIBUTE((nonnull));
 | 
						|
 | 
						|
/**
 | 
						|
Create an ast string object, with NUL-terminator, so the string
 | 
						|
has one more byte than len
 | 
						|
@param[in] str		pointer to string
 | 
						|
@param[in] len		length of the string
 | 
						|
@return ast string with NUL-terminator */
 | 
						|
fts_ast_string_t*
 | 
						|
fts_ast_string_create(
 | 
						|
	const byte*	str,
 | 
						|
	ulint		len);
 | 
						|
 | 
						|
/**
 | 
						|
Free an ast string instance
 | 
						|
@param[in,out] ast_str		string to free */
 | 
						|
void
 | 
						|
fts_ast_string_free(
 | 
						|
	fts_ast_string_t*	ast_str);
 | 
						|
 | 
						|
/**
 | 
						|
Translate ast string of type FTS_AST_NUMB to unsigned long by strtoul
 | 
						|
@param[in] str		string to translate
 | 
						|
@param[in] base		the base
 | 
						|
@return translated number */
 | 
						|
ulint
 | 
						|
fts_ast_string_to_ul(
 | 
						|
	const fts_ast_string_t*	ast_str,
 | 
						|
	int			base);
 | 
						|
 | 
						|
/* String of length len.
 | 
						|
We always store the string of length len with a terminating '\0',
 | 
						|
regardless of there is any 0x00 in the string itself */
 | 
						|
struct fts_ast_string_t {
 | 
						|
	/*!< Pointer to string. */
 | 
						|
	byte*		str;
 | 
						|
 | 
						|
	/*!< Length of the string. */
 | 
						|
	ulint		len;
 | 
						|
};
 | 
						|
 | 
						|
/* Query term type */
 | 
						|
struct fts_ast_term_t {
 | 
						|
	fts_ast_string_t*	ptr;		/*!< Pointer to term string.*/
 | 
						|
	ibool			wildcard;	/*!< TRUE if wild card set.*/
 | 
						|
};
 | 
						|
 | 
						|
/* Query text type */
 | 
						|
struct fts_ast_text_t {
 | 
						|
	fts_ast_string_t*	ptr;		/*!< Pointer to text string.*/
 | 
						|
	ulint			distance;	/*!< > 0 if proximity distance
 | 
						|
						set */
 | 
						|
};
 | 
						|
 | 
						|
/* The list of nodes in an expr list */
 | 
						|
struct fts_ast_list_t {
 | 
						|
	fts_ast_node_t*	head;			/*!< Children list head */
 | 
						|
	fts_ast_node_t*	tail;			/*!< Children list tail */
 | 
						|
};
 | 
						|
 | 
						|
/* FTS AST node to store the term, text, operator and sub-expressions.*/
 | 
						|
struct fts_ast_node_t {
 | 
						|
	fts_ast_type_t	type;			/*!< The type of node */
 | 
						|
	fts_ast_text_t	text;			/*!< Text node */
 | 
						|
	fts_ast_term_t	term;			/*!< Term node */
 | 
						|
	fts_ast_oper_t	oper;			/*!< Operator value */
 | 
						|
	fts_ast_list_t	list;			/*!< Expression list */
 | 
						|
	fts_ast_node_t*	next;			/*!< Link for expr list */
 | 
						|
	fts_ast_node_t*	next_alloc;		/*!< For tracking allocations */
 | 
						|
	bool		visited;		/*!< whether this node is
 | 
						|
						already processed */
 | 
						|
	/** current transaction */
 | 
						|
	const trx_t*	trx;
 | 
						|
	/* Used by plugin parser */
 | 
						|
	fts_ast_node_t* up_node;		/*!< Direct up node */
 | 
						|
	bool		go_up;			/*!< Flag if go one level up */
 | 
						|
};
 | 
						|
 | 
						|
/* To track state during parsing */
 | 
						|
struct fts_ast_state_t {
 | 
						|
	mem_heap_t*	heap;			/*!< Heap to use for alloc */
 | 
						|
	fts_ast_node_t*	root;			/*!< If all goes OK, then this
 | 
						|
						will point to the root.*/
 | 
						|
 | 
						|
	fts_ast_list_t	list;			/*!< List of nodes allocated */
 | 
						|
 | 
						|
	fts_lexer_t*	lexer;			/*!< Lexer callback + arg */
 | 
						|
	CHARSET_INFO*	charset;		/*!< charset used for
 | 
						|
						tokenization */
 | 
						|
	/* Used by plugin parser */
 | 
						|
	fts_ast_node_t*	cur_node;		/*!< Current node into which
 | 
						|
						 we add new node */
 | 
						|
	int		depth;			/*!< Depth of parsing state */
 | 
						|
};
 | 
						|
 | 
						|
/******************************************************************//**
 | 
						|
Create an AST term node, makes a copy of ptr for plugin parser
 | 
						|
@return node */
 | 
						|
extern
 | 
						|
fts_ast_node_t*
 | 
						|
fts_ast_create_node_term_for_parser(
 | 
						|
/*==========i=====================*/
 | 
						|
	void*		arg,			/*!< in: ast state */
 | 
						|
	const char*	ptr,			/*!< in: term string */
 | 
						|
	const ulint	len);			/*!< in: term string length */
 | 
						|
 | 
						|
/******************************************************************//**
 | 
						|
Create an AST phrase list node for plugin parser
 | 
						|
@return node */
 | 
						|
extern
 | 
						|
fts_ast_node_t*
 | 
						|
fts_ast_create_node_phrase_list(
 | 
						|
/*============================*/
 | 
						|
	void*		arg);			/*!< in: ast state */
 | 
						|
 | 
						|
#ifdef UNIV_DEBUG
 | 
						|
const char*
 | 
						|
fts_ast_node_type_get(fts_ast_type_t	type);
 | 
						|
#endif /* UNIV_DEBUG */
 | 
						|
 | 
						|
#endif /* INNOBASE_FSTS0AST_H */
 |