mirror of
				https://github.com/MariaDB/server.git
				synced 2025-11-04 04:46:15 +01:00 
			
		
		
		
	
		
			
				
	
	
		
			283 lines
		
	
	
	
		
			7.8 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			283 lines
		
	
	
	
		
			7.8 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
/*****************************************************************************
 | 
						|
 | 
						|
Copyright (c) 2013, 2015, Oracle and/or its affiliates. All Rights Reserved.
 | 
						|
 | 
						|
This program is free software; you can redistribute it and/or modify it under
 | 
						|
the terms of the GNU General Public License as published by the Free Software
 | 
						|
Foundation; version 2 of the License.
 | 
						|
 | 
						|
This program is distributed in the hope that it will be useful, but WITHOUT
 | 
						|
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 | 
						|
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 | 
						|
 | 
						|
You should have received a copy of the GNU General Public License along with
 | 
						|
this program; if not, write to the Free Software Foundation, Inc.,
 | 
						|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
 | 
						|
 | 
						|
*****************************************************************************/
 | 
						|
 | 
						|
/******************************************************************//**
 | 
						|
@file fts/fts0plugin.cc
 | 
						|
Full Text Search plugin support.
 | 
						|
 | 
						|
Created 2013/06/04 Shaohua Wang
 | 
						|
***********************************************************************/
 | 
						|
 | 
						|
#include "fts0ast.h"
 | 
						|
#include "fts0plugin.h"
 | 
						|
#include "fts0tokenize.h"
 | 
						|
 | 
						|
#include "ft_global.h"
 | 
						|
 | 
						|
/******************************************************************//**
 | 
						|
FTS default parser init
 | 
						|
@return 0 */
 | 
						|
static int fts_default_parser_init(MYSQL_FTPARSER_PARAM*) { return 0; }
 | 
						|
 | 
						|
/******************************************************************//**
 | 
						|
FTS default parser deinit
 | 
						|
@return 0 */
 | 
						|
static int fts_default_parser_deinit(MYSQL_FTPARSER_PARAM*) { return 0; }
 | 
						|
 | 
						|
/******************************************************************//**
 | 
						|
FTS default parser parse from ft_static.c in MYISAM.
 | 
						|
@return 0 if parse successfully, or return non-zero */
 | 
						|
static
 | 
						|
int
 | 
						|
fts_default_parser_parse(
 | 
						|
/*=====================*/
 | 
						|
	MYSQL_FTPARSER_PARAM *param)	/*!< in: plugin parser param */
 | 
						|
{
 | 
						|
	return(param->mysql_parse(param, param->doc, param->length));
 | 
						|
}
 | 
						|
 | 
						|
/* FTS default parser from ft_static.c in MYISAM. */
 | 
						|
struct st_mysql_ftparser fts_default_parser =
 | 
						|
{
 | 
						|
	MYSQL_FTPARSER_INTERFACE_VERSION,
 | 
						|
	fts_default_parser_parse,
 | 
						|
	fts_default_parser_init,
 | 
						|
	fts_default_parser_deinit
 | 
						|
};
 | 
						|
 | 
						|
/******************************************************************//**
 | 
						|
Get a operator node from token boolean info
 | 
						|
@return node */
 | 
						|
static
 | 
						|
fts_ast_node_t*
 | 
						|
fts_query_get_oper_node(
 | 
						|
/*====================*/
 | 
						|
	MYSQL_FTPARSER_BOOLEAN_INFO*	info,	/*!< in: token info */
 | 
						|
	fts_ast_state_t*		state)	/*!< in/out: query parse state*/
 | 
						|
{
 | 
						|
	fts_ast_node_t*	oper_node = NULL;
 | 
						|
 | 
						|
	if (info->yesno > 0) {
 | 
						|
		oper_node = fts_ast_create_node_oper(state, FTS_EXIST);
 | 
						|
	} else if (info->yesno < 0) {
 | 
						|
		oper_node = fts_ast_create_node_oper(state, FTS_IGNORE);
 | 
						|
	} else if (info->weight_adjust > 0) {
 | 
						|
		oper_node = fts_ast_create_node_oper(state, FTS_INCR_RATING);
 | 
						|
	} else if (info->weight_adjust < 0) {
 | 
						|
		oper_node = fts_ast_create_node_oper(state, FTS_DECR_RATING);
 | 
						|
	} else if (info->wasign > 0) {
 | 
						|
		oper_node = fts_ast_create_node_oper(state, FTS_NEGATE);
 | 
						|
	}
 | 
						|
 | 
						|
	return(oper_node);
 | 
						|
}
 | 
						|
 | 
						|
/******************************************************************//**
 | 
						|
FTS plugin parser 'myql_add_word' callback function for query parse.
 | 
						|
Refer to 'st_mysql_ftparser_param' for more detail.
 | 
						|
Note:
 | 
						|
a. Parse logic refers to 'ftb_query_add_word' from ft_boolean_search.c in MYISAM;
 | 
						|
b. Parse node or tree refers to fts0pars.y.
 | 
						|
@return 0 if add successfully, or return non-zero. */
 | 
						|
static
 | 
						|
int
 | 
						|
fts_query_add_word_for_parser(
 | 
						|
/*==========================*/
 | 
						|
	MYSQL_FTPARSER_PARAM*	param,		/*!< in: parser param */
 | 
						|
	const char*			word,		/*!< in: token */
 | 
						|
	int			word_len,	/*!< in: token length */
 | 
						|
	MYSQL_FTPARSER_BOOLEAN_INFO*	info)	/*!< in: token info */
 | 
						|
{
 | 
						|
	fts_ast_state_t* state =
 | 
						|
		static_cast<fts_ast_state_t*>(param->mysql_ftparam);
 | 
						|
	fts_ast_node_t*	cur_node = state->cur_node;
 | 
						|
	fts_ast_node_t*	oper_node = NULL;
 | 
						|
	fts_ast_node_t*	term_node = NULL;
 | 
						|
	fts_ast_node_t*	node = NULL;
 | 
						|
 | 
						|
	switch (info->type) {
 | 
						|
	case FT_TOKEN_STOPWORD:
 | 
						|
		/* We only handler stopword in phrase */
 | 
						|
		if (cur_node->type != FTS_AST_PARSER_PHRASE_LIST) {
 | 
						|
			break;
 | 
						|
		}
 | 
						|
		/* fall through */
 | 
						|
 | 
						|
	case FT_TOKEN_WORD:
 | 
						|
		term_node = fts_ast_create_node_term_for_parser(
 | 
						|
			state, word, ulint(word_len));
 | 
						|
 | 
						|
		if (info->trunc) {
 | 
						|
			fts_ast_term_set_wildcard(term_node);
 | 
						|
		}
 | 
						|
 | 
						|
		if (cur_node->type == FTS_AST_PARSER_PHRASE_LIST) {
 | 
						|
			/* Ignore operator inside phrase */
 | 
						|
			fts_ast_add_node(cur_node, term_node);
 | 
						|
		} else {
 | 
						|
			ut_ad(cur_node->type == FTS_AST_LIST
 | 
						|
			      || cur_node->type == FTS_AST_SUBEXP_LIST);
 | 
						|
			oper_node = fts_query_get_oper_node(info, state);
 | 
						|
 | 
						|
			if (oper_node) {
 | 
						|
				node = fts_ast_create_node_list(state, oper_node);
 | 
						|
				fts_ast_add_node(node, term_node);
 | 
						|
				fts_ast_add_node(cur_node, node);
 | 
						|
			} else {
 | 
						|
				fts_ast_add_node(cur_node, term_node);
 | 
						|
			}
 | 
						|
		}
 | 
						|
 | 
						|
		break;
 | 
						|
 | 
						|
	case FT_TOKEN_LEFT_PAREN:
 | 
						|
		/* Check parse error */
 | 
						|
		if (cur_node->type != FTS_AST_LIST
 | 
						|
		    && cur_node->type != FTS_AST_SUBEXP_LIST) {
 | 
						|
			return(1);
 | 
						|
		}
 | 
						|
 | 
						|
		/* Set operator */
 | 
						|
                oper_node = fts_query_get_oper_node(info, state);
 | 
						|
		if (oper_node != NULL) {
 | 
						|
			node = fts_ast_create_node_list(state, oper_node);
 | 
						|
			fts_ast_add_node(cur_node, node);
 | 
						|
			node->go_up = true;
 | 
						|
			node->up_node = cur_node;
 | 
						|
			cur_node = node;
 | 
						|
		}
 | 
						|
 | 
						|
		if (info->quot) {
 | 
						|
			/* Phrase node */
 | 
						|
			node = fts_ast_create_node_phrase_list(state);
 | 
						|
		} else {
 | 
						|
			/* Subexp list node */
 | 
						|
			node = fts_ast_create_node_subexp_list(state, NULL);
 | 
						|
		}
 | 
						|
 | 
						|
		fts_ast_add_node(cur_node, node);
 | 
						|
 | 
						|
		node->up_node = cur_node;
 | 
						|
		state->cur_node = node;
 | 
						|
		state->depth += 1;
 | 
						|
 | 
						|
		break;
 | 
						|
 | 
						|
	case FT_TOKEN_RIGHT_PAREN:
 | 
						|
		info->quot = 0;
 | 
						|
 | 
						|
		if (cur_node->up_node != NULL) {
 | 
						|
			cur_node = cur_node->up_node;
 | 
						|
 | 
						|
			if (cur_node->go_up) {
 | 
						|
				ut_a(cur_node->up_node
 | 
						|
				     && !(cur_node->up_node->go_up));
 | 
						|
				cur_node = cur_node->up_node;
 | 
						|
			}
 | 
						|
		}
 | 
						|
 | 
						|
		state->cur_node = cur_node;
 | 
						|
 | 
						|
		if (state->depth > 0) {
 | 
						|
			state->depth--;
 | 
						|
		} else {
 | 
						|
			/* Parentheses mismatch */
 | 
						|
			return(1);
 | 
						|
		}
 | 
						|
 | 
						|
		break;
 | 
						|
 | 
						|
	case FT_TOKEN_EOF:
 | 
						|
	default:
 | 
						|
		break;
 | 
						|
	}
 | 
						|
 | 
						|
	return(0);
 | 
						|
}
 | 
						|
 | 
						|
/******************************************************************//**
 | 
						|
FTS plugin parser 'myql_parser' callback function for query parse.
 | 
						|
Refer to 'st_mysql_ftparser_param' for more detail.
 | 
						|
@return 0 if parse successfully */
 | 
						|
static
 | 
						|
int
 | 
						|
fts_parse_query_internal(
 | 
						|
/*=====================*/
 | 
						|
	MYSQL_FTPARSER_PARAM*	param,	/*!< in: parser param */
 | 
						|
	const char*			query,	/*!< in: query string */
 | 
						|
	int			len)	/*!< in: query length */
 | 
						|
{
 | 
						|
	MYSQL_FTPARSER_BOOLEAN_INFO	info;
 | 
						|
	const CHARSET_INFO*		cs = param->cs;
 | 
						|
	uchar**	start = (uchar**)(&query);
 | 
						|
	uchar*	end = (uchar*)(query + len);
 | 
						|
	FT_WORD	w = {NULL, 0, 0};
 | 
						|
 | 
						|
	info.prev = ' ';
 | 
						|
	info.quot = 0;
 | 
						|
	memset(&w, 0, sizeof(w));
 | 
						|
	/* Note: We don't handle simple parser mode here,
 | 
						|
	but user supplied plugin parser should handler it. */
 | 
						|
	while (fts_get_word(cs, start, end, &w, &info)) {
 | 
						|
		int ret = param->mysql_add_word(
 | 
						|
				param,
 | 
						|
				reinterpret_cast<char*>(w.pos),
 | 
						|
				int(w.len), &info);
 | 
						|
		if (ret) {
 | 
						|
			return(ret);
 | 
						|
		}
 | 
						|
	}
 | 
						|
 | 
						|
	return(0);
 | 
						|
}
 | 
						|
 | 
						|
/******************************************************************//**
 | 
						|
fts parse query by plugin parser.
 | 
						|
@return 0 if parse successfully, or return non-zero. */
 | 
						|
int
 | 
						|
fts_parse_by_parser(
 | 
						|
/*================*/
 | 
						|
	ibool			mode,		/*!< in: parse boolean mode */
 | 
						|
	uchar*			query_str,	/*!< in: query string */
 | 
						|
	ulint			query_len,	/*!< in: query string length */
 | 
						|
	st_mysql_ftparser*	parser,		/*!< in: fts plugin parser */
 | 
						|
	fts_ast_state_t*	state)		/*!< in/out: parser state */
 | 
						|
{
 | 
						|
	MYSQL_FTPARSER_PARAM	param;
 | 
						|
	int	ret;
 | 
						|
 | 
						|
	ut_ad(parser);
 | 
						|
 | 
						|
	/* Initial parser param */
 | 
						|
	param.mysql_parse = fts_parse_query_internal;
 | 
						|
	param.mysql_add_word = fts_query_add_word_for_parser;
 | 
						|
	param.mysql_ftparam = static_cast<void*>(state);
 | 
						|
	param.cs = state->charset;
 | 
						|
	param.doc = reinterpret_cast<char*>(query_str);
 | 
						|
	param.length = static_cast<int>(query_len);
 | 
						|
	param.flags = 0;
 | 
						|
	param.mode = mode ?
 | 
						|
		     MYSQL_FTPARSER_FULL_BOOLEAN_INFO :
 | 
						|
		     MYSQL_FTPARSER_SIMPLE_MODE;
 | 
						|
 | 
						|
	PARSER_INIT(parser, ¶m);
 | 
						|
	ret = parser->parse(¶m);
 | 
						|
	PARSER_DEINIT(parser, ¶m);
 | 
						|
 | 
						|
	return(ret | state->depth);
 | 
						|
}
 |