mirror of
				https://github.com/MariaDB/server.git
				synced 2025-10-25 00:48:31 +02:00 
			
		
		
		
	 ab0190101b
			
		
	
	
	ab0190101b
	
	
	
		
			
			Until now, the attribute EXTENDED of CHECK TABLE was ignored by InnoDB, and InnoDB only counted the records in each index according to the current read view. Unless the attribute QUICK was specified, the function btr_validate_index() would be invoked to validate the B-tree structure (the sibling and child links between index pages). The EXTENDED check will not only count all index records according to the current read view, but also ensure that any delete-marked records in the clustered index are waiting for the purge of history, and that all secondary index records point to a version of the clustered index record that is waiting for the purge of history. In other words, no index may contain orphan records. Normal MVCC reads and the non-EXTENDED version of CHECK TABLE would ignore these orphans. Unpurged records merely result in warnings (at most one per index), not errors, and no indexes will be flagged as corrupted due to such garbage. It will remain possible to SELECT data from such indexes or tables (which will skip such records) or to rebuild the table to reclaim some space. We introduce purge_sys.end_view that will be (almost) a copy of purge_sys.view at the end of a batch of purging committed transaction history. It is not an exact copy, because if the size of a purge batch is limited by innodb_purge_batch_size, some records that purge_sys.view would allow to be purged will be left over for subsequent batches. The purge_sys.view is relevant in the purge of committed transaction history, to determine if records are safe to remove. The new purge_sys.end_view is relevant in MVCC operations and in CHECK TABLE ... EXTENDED. It tells which undo log records are safe to access (have not been discarded at the end of a purge batch). purge_sys.clone_oldest_view<true>(): In trx_lists_init_at_db_start(), clone the oldest read view similar to purge_sys_t::clone_end_view() so that CHECK TABLE ... EXTENDED will not report bogus failures between InnoDB restart and the completed purge of committed transaction history. purge_sys_t::is_purgeable(): Replaces purge_sys_t::changes_visible() in the case that purge_sys.latch will not be held by the caller. Among other things, this guards access to BLOBs. It is not safe to dereference any BLOBs of a delete-marked purgeable record, because they may have already been freed. purge_sys_t::view_guard::view(): Return a reference to purge_sys.view that will be protected by purge_sys.latch, held by purge_sys_t::view_guard. purge_sys_t::end_view_guard::view(): Return a reference to purge_sys.end_view while it is protected by purge_sys.end_latch. Whenever a thread needs to retrieve an older version of a clustered index record, it will hold a page latch on the clustered index page and potentially also on a secondary index page that points to the clustered index page. If these pages contain purgeable records that would be accessed by a currently running purge batch, the progress of the purge batch would be blocked by the page latches. Hence, it is safe to make a copy of purge_sys.end_view while holding an index page latch, and consult the copy of the view to determine whether a record should already have been purged. btr_validate_index(): Remove a redundant check. row_check_index_match(): Check if a secondary index record and a version of a clustered index record match each other. row_check_index(): Replaces row_scan_index_for_mysql(). Count the records in each index directly, duplicating the relevant logic from row_search_mvcc(). Initialize check_table_extended_view for CHECK ... EXTENDED while holding an index leaf page latch. If we encounter an orphan record, the copy of purge_sys.end_view that we make is safe for visibility checks, and trx_undo_get_undo_rec() will check for the safety to access each undo log record. Should that check fail, we should return DB_MISSING_HISTORY to report a corrupted index. The EXTENDED check tries to match each secondary index record with every available clustered index record version, by duplicating the logic of row_vers_build_for_consistent_read() and invoking trx_undo_prev_version_build() directly. Before invoking row_check_index_match() on delete-marked clustered index record versions, we will consult purge_sys.is_purgeable() in order to avoid accessing freed BLOBs. We will always check that the DB_TRX_ID or PAGE_MAX_TRX_ID does not exceed the global maximum. Orphan secondary index records will be flagged only if everything up to PAGE_MAX_TRX_ID has been purged. We warn also about clustered index records whose nonzero DB_TRX_ID should have been reset in purge or rollback. trx_set_rw_mode(): Move an assertion from ReadView::set_creator_trx_id(). trx_undo_prev_version_build(): Remove two debug-only parameters, and return an error code instead of a Boolean. trx_undo_get_undo_rec(): Return a pointer to the undo log record, or nullptr if one cannot be retrieved. Instead of consulting the purge_sys.view, consult the purge_sys.end_view to determine which records can be accessed. trx_undo_get_rec_if_purgeable(): A variant of trx_undo_get_undo_rec() that will consult purge_sys.view instead of purge_sys.end_view. TRX_UNDO_CHECK_PURGEABILITY: A new parameter to trx_undo_prev_version_build(), passed by row_vers_old_has_index_entry() so that purge_sys.view instead of purge_sys.end_view will be consulted to determine whether a secondary index record may be safely purged. row_upd_changes_disowned_external(): Remove. This should be more expensive than briefly latching purge_sys in trx_undo_prev_version_build() (which may make use of transactional memory). row_sel_reset_old_vers_heap(): New function, split from row_sel_build_prev_vers_for_mysql(). row_sel_build_prev_vers_for_mysql(): Reorder some parameters to simplify the call to row_sel_reset_old_vers_heap(). row_search_for_mysql(): Replaced with direct calls to row_search_mvcc(). sel_node_get_nth_plan(): Define inline in row0sel.h open_step(): Define at the call site, in simplified form. sel_node_reset_cursor(): Merged with the only caller open_step(). --- ReadViewBase::check_trx_id_sanity(): Remove. Let us handle "future" DB_TRX_ID in a more meaningful way: row_sel_clust_sees(): Return DB_SUCCESS if the record is visible, DB_SUCCESS_LOCKED_REC if it is invisible, and DB_CORRUPTION if the DB_TRX_ID is in the future. row_undo_mod_must_purge(), row_undo_mod_clust(): Silently ignore corrupted DB_TRX_ID. We are in ROLLBACK, and we should have noticed that corruption when we were about to modify the record in the first place (leading us to refuse the operation). row_vers_build_for_consistent_read(): Return DB_CORRUPTION if DB_TRX_ID is in the future. Tested by: Matthias Leich Reviewed by: Vladislav Lesin
		
			
				
	
	
		
			816 lines
		
	
	
	
		
			20 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			816 lines
		
	
	
	
		
			20 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
| /*****************************************************************************
 | |
| 
 | |
| Copyright (c) 2007, 2020, Oracle and/or its affiliates. All Rights Reserved.
 | |
| Copyright (c) 2018, 2022, MariaDB Corporation.
 | |
| 
 | |
| This program is free software; you can redistribute it and/or modify it under
 | |
| the terms of the GNU General Public License as published by the Free Software
 | |
| Foundation; version 2 of the License.
 | |
| 
 | |
| This program is distributed in the hope that it will be useful, but WITHOUT
 | |
| ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 | |
| FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 | |
| 
 | |
| You should have received a copy of the GNU General Public License along with
 | |
| this program; if not, write to the Free Software Foundation, Inc.,
 | |
| 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
 | |
| 
 | |
| *****************************************************************************/
 | |
| 
 | |
| /**************************************************//**
 | |
| @file fts/fts0ast.cc
 | |
| Full Text Search parser helper file.
 | |
| 
 | |
| Created 2007/3/16 Sunny Bains.
 | |
| ***********************************************************************/
 | |
| 
 | |
| #include "row0sel.h"
 | |
| #include "fts0ast.h"
 | |
| #include "fts0pars.h"
 | |
| #include "fts0fts.h"
 | |
| #include "trx0trx.h"
 | |
| 
 | |
| /* The FTS ast visit pass. */
 | |
| enum fts_ast_visit_pass_t {
 | |
| 	FTS_PASS_FIRST,		/*!< First visit pass,
 | |
| 				process operators excluding
 | |
| 				FTS_EXIST and FTS_IGNORE */
 | |
| 	FTS_PASS_EXIST,		/*!< Exist visit pass,
 | |
| 				process operator FTS_EXIST */
 | |
| 	FTS_PASS_IGNORE		/*!< Ignore visit pass,
 | |
| 				process operator FTS_IGNORE */
 | |
| };
 | |
| 
 | |
| /******************************************************************//**
 | |
| Create an empty fts_ast_node_t.
 | |
| @return Create a new node */
 | |
| static
 | |
| fts_ast_node_t*
 | |
| fts_ast_node_create(void)
 | |
| /*=====================*/
 | |
| {
 | |
| 	fts_ast_node_t*	node;
 | |
| 
 | |
| 	node = (fts_ast_node_t*) ut_zalloc_nokey(sizeof(*node));
 | |
| 
 | |
| 	return(node);
 | |
| }
 | |
| 
 | |
| /** Track node allocations, in case there is an error during parsing. */
 | |
| static
 | |
| void
 | |
| fts_ast_state_add_node(
 | |
| 	fts_ast_state_t*state,			/*!< in: ast instance */
 | |
| 	fts_ast_node_t*	node)			/*!< in: node to add to ast */
 | |
| {
 | |
| 	if (!state->list.head) {
 | |
| 		ut_a(!state->list.tail);
 | |
| 
 | |
| 		state->list.head = state->list.tail = node;
 | |
| 	} else {
 | |
| 		state->list.tail->next_alloc = node;
 | |
| 		state->list.tail = node;
 | |
| 	}
 | |
| }
 | |
| 
 | |
| /******************************************************************//**
 | |
| Create a operator fts_ast_node_t.
 | |
| @return new node */
 | |
| fts_ast_node_t*
 | |
| fts_ast_create_node_oper(
 | |
| /*=====================*/
 | |
| 	void*		arg,			/*!< in: ast state instance */
 | |
| 	fts_ast_oper_t	oper)			/*!< in: ast operator */
 | |
| {
 | |
| 	fts_ast_node_t*	node = fts_ast_node_create();
 | |
| 
 | |
| 	node->type = FTS_AST_OPER;
 | |
| 	node->oper = oper;
 | |
| 
 | |
| 	fts_ast_state_add_node((fts_ast_state_t*) arg, node);
 | |
| 
 | |
| 	return(node);
 | |
| }
 | |
| 
 | |
| /******************************************************************//**
 | |
| This function takes ownership of the ptr and is responsible
 | |
| for free'ing it
 | |
| @return new node or a node list with tokenized words */
 | |
| fts_ast_node_t*
 | |
| fts_ast_create_node_term(
 | |
| /*=====================*/
 | |
| 	void*			arg,		/*!< in: ast state instance */
 | |
| 	const fts_ast_string_t*	ptr)		/*!< in: ast term string */
 | |
| {
 | |
| 	fts_ast_state_t*	state = static_cast<fts_ast_state_t*>(arg);
 | |
| 	ulint			len = ptr->len;
 | |
| 	ulint			cur_pos = 0;
 | |
| 	fts_ast_node_t*         node = NULL;
 | |
| 	fts_ast_node_t*		node_list = NULL;
 | |
| 	fts_ast_node_t*		first_node = NULL;
 | |
| 
 | |
| 	/* Scan the incoming string and filter out any "non-word" characters */
 | |
| 	while (cur_pos < len) {
 | |
| 		fts_string_t	str;
 | |
| 		ulint		cur_len;
 | |
| 
 | |
| 		cur_len = innobase_mysql_fts_get_token(
 | |
| 			state->charset,
 | |
| 			reinterpret_cast<const byte*>(ptr->str) + cur_pos,
 | |
| 			reinterpret_cast<const byte*>(ptr->str) + len, &str);
 | |
| 
 | |
| 		if (cur_len == 0) {
 | |
| 			break;
 | |
| 		}
 | |
| 
 | |
| 		cur_pos += cur_len;
 | |
| 
 | |
| 		if (str.f_n_char > 0) {
 | |
| 			/* If the subsequent term (after the first one)'s size
 | |
| 			is less than fts_min_token_size or the term is greater
 | |
| 			than fts_max_token_size, we shall ignore that. This is
 | |
| 			to make consistent with MyISAM behavior */
 | |
| 			if ((first_node && (str.f_n_char < fts_min_token_size))
 | |
| 			    || str.f_n_char > fts_max_token_size) {
 | |
| 				continue;
 | |
| 			}
 | |
| 
 | |
| 			node = fts_ast_node_create();
 | |
| 
 | |
| 			node->type = FTS_AST_TERM;
 | |
| 
 | |
| 			node->term.ptr = fts_ast_string_create(
 | |
| 						str.f_str, str.f_len);
 | |
| 
 | |
| 			fts_ast_state_add_node(
 | |
| 				static_cast<fts_ast_state_t*>(arg), node);
 | |
| 
 | |
| 			if (first_node) {
 | |
| 				/* There is more than one word, create
 | |
| 				a list to organize them */
 | |
| 				if (!node_list) {
 | |
| 					node_list = fts_ast_create_node_list(
 | |
| 						static_cast<fts_ast_state_t*>(
 | |
| 							arg),
 | |
| 						 first_node);
 | |
| 				}
 | |
| 
 | |
| 				fts_ast_add_node(node_list, node);
 | |
| 			} else {
 | |
| 				first_node = node;
 | |
| 			}
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	return((node_list != NULL) ? node_list : first_node);
 | |
| }
 | |
| 
 | |
| /******************************************************************//**
 | |
| Create an AST term node, makes a copy of ptr for plugin parser
 | |
| @return node */
 | |
| fts_ast_node_t*
 | |
| fts_ast_create_node_term_for_parser(
 | |
| /*================================*/
 | |
| 	void*		arg,		/*!< in: ast state */
 | |
| 	const char*	ptr,		/*!< in: term string */
 | |
| 	const ulint	len)		/*!< in: term string length */
 | |
| {
 | |
| 	fts_ast_node_t*		node = NULL;
 | |
| 
 | |
| 	/* '%' as first char is forbidden for LIKE in internal SQL parser;
 | |
| 	'%' as last char is reserved for wildcard search;*/
 | |
| 	if (len == 0 || len > FTS_MAX_WORD_LEN
 | |
| 	    || ptr[0] == '%' || ptr[len - 1] == '%') {
 | |
| 		return(NULL);
 | |
| 	}
 | |
| 
 | |
| 	node = fts_ast_node_create();
 | |
| 
 | |
| 	node->type = FTS_AST_TERM;
 | |
| 
 | |
| 	node->term.ptr = fts_ast_string_create(
 | |
| 			reinterpret_cast<const byte*>(ptr), len);
 | |
| 
 | |
| 	fts_ast_state_add_node(static_cast<fts_ast_state_t*>(arg), node);
 | |
| 
 | |
| 	return(node);
 | |
| }
 | |
| 
 | |
| /******************************************************************//**
 | |
| This function takes ownership of the ptr and is responsible
 | |
| for free'ing it.
 | |
| @return new node */
 | |
| fts_ast_node_t*
 | |
| fts_ast_create_node_text(
 | |
| /*=====================*/
 | |
| 	void*			arg,	/*!< in: ast state instance */
 | |
| 	const fts_ast_string_t*	ptr)	/*!< in: ast text string */
 | |
| {
 | |
| 	ulint		len = ptr->len;
 | |
| 	fts_ast_node_t*	node = NULL;
 | |
| 
 | |
| 	/* Once we come here, the string must have at least 2 quotes ""
 | |
| 	around the query string, which could be empty. Also the query
 | |
| 	string may contain 0x00 in it, we don't treat it as null-terminated. */
 | |
| 	ut_ad(len >= 2);
 | |
| 	ut_ad(ptr->str[0] == '\"' && ptr->str[len - 1] == '\"');
 | |
| 
 | |
| 	if (len == 2) {
 | |
| 		/* If the query string contains nothing except quotes,
 | |
| 		it's obviously an invalid query. */
 | |
| 		return(NULL);
 | |
| 	}
 | |
| 
 | |
| 	node = fts_ast_node_create();
 | |
| 
 | |
| 	/*!< We ignore the actual quotes "" */
 | |
| 	len -= 2;
 | |
| 
 | |
| 	node->type = FTS_AST_TEXT;
 | |
| 	/*!< Skip copying the first quote */
 | |
| 	node->text.ptr = fts_ast_string_create(
 | |
| 			reinterpret_cast<const byte*>(ptr->str + 1), len);
 | |
| 	node->text.distance = ULINT_UNDEFINED;
 | |
| 
 | |
| 	fts_ast_state_add_node((fts_ast_state_t*) arg, node);
 | |
| 
 | |
| 	return(node);
 | |
| }
 | |
| 
 | |
| /******************************************************************//**
 | |
| Create an AST phrase list node for plugin parser
 | |
| @return node */
 | |
| fts_ast_node_t*
 | |
| fts_ast_create_node_phrase_list(
 | |
| /*============================*/
 | |
| 	void*		arg)			/*!< in: ast state */
 | |
| {
 | |
| 	fts_ast_node_t*		node = fts_ast_node_create();
 | |
| 
 | |
| 	node->type = FTS_AST_PARSER_PHRASE_LIST;
 | |
| 
 | |
| 	node->text.distance = ULINT_UNDEFINED;
 | |
| 	node->list.head = node->list.tail = NULL;
 | |
| 
 | |
| 	fts_ast_state_add_node(static_cast<fts_ast_state_t*>(arg), node);
 | |
| 
 | |
| 	return(node);
 | |
| }
 | |
| 
 | |
| /******************************************************************//**
 | |
| This function takes ownership of the expr and is responsible
 | |
| for free'ing it.
 | |
| @return new node */
 | |
| fts_ast_node_t*
 | |
| fts_ast_create_node_list(
 | |
| /*=====================*/
 | |
| 	void*		arg,			/*!< in: ast state instance */
 | |
| 	fts_ast_node_t*	expr)			/*!< in: ast expr instance */
 | |
| {
 | |
| 	fts_ast_node_t*	node = fts_ast_node_create();
 | |
| 
 | |
| 	node->type = FTS_AST_LIST;
 | |
| 	node->list.head = node->list.tail = expr;
 | |
| 
 | |
| 	fts_ast_state_add_node((fts_ast_state_t*) arg, node);
 | |
| 
 | |
| 	return(node);
 | |
| }
 | |
| 
 | |
| /******************************************************************//**
 | |
| Create a sub-expression list node. This function takes ownership of
 | |
| expr and is responsible for deleting it.
 | |
| @return new node */
 | |
| fts_ast_node_t*
 | |
| fts_ast_create_node_subexp_list(
 | |
| /*============================*/
 | |
| 	void*		arg,			/*!< in: ast state instance */
 | |
| 	fts_ast_node_t*	expr)			/*!< in: ast expr instance */
 | |
| {
 | |
| 	fts_ast_node_t*	node = fts_ast_node_create();
 | |
| 
 | |
| 	node->type = FTS_AST_SUBEXP_LIST;
 | |
| 	node->list.head = node->list.tail = expr;
 | |
| 
 | |
| 	fts_ast_state_add_node((fts_ast_state_t*) arg, node);
 | |
| 
 | |
| 	return(node);
 | |
| }
 | |
| 
 | |
| /******************************************************************//**
 | |
| Free an expr list node elements. */
 | |
| static
 | |
| void
 | |
| fts_ast_free_list(
 | |
| /*==============*/
 | |
| 	fts_ast_node_t*	node)			/*!< in: ast node to free */
 | |
| {
 | |
| 	ut_a(node->type == FTS_AST_LIST
 | |
| 	     || node->type == FTS_AST_SUBEXP_LIST
 | |
| 	     || node->type == FTS_AST_PARSER_PHRASE_LIST);
 | |
| 
 | |
| 	for (node = node->list.head;
 | |
| 	     node != NULL;
 | |
| 	     node = fts_ast_free_node(node)) {
 | |
| 
 | |
| 		/*!< No op */
 | |
| 	}
 | |
| }
 | |
| 
 | |
| /********************************************************************//**
 | |
| Free a fts_ast_node_t instance.
 | |
| @return next node to free */
 | |
| fts_ast_node_t*
 | |
| fts_ast_free_node(
 | |
| /*==============*/
 | |
| 	fts_ast_node_t*	node)			/*!< in: the node to free */
 | |
| {
 | |
| 	fts_ast_node_t*	next_node;
 | |
| 
 | |
| 	switch (node->type) {
 | |
| 	case FTS_AST_TEXT:
 | |
| 		if (node->text.ptr) {
 | |
| 			fts_ast_string_free(node->text.ptr);
 | |
| 			node->text.ptr = NULL;
 | |
| 		}
 | |
| 		break;
 | |
| 
 | |
| 	case FTS_AST_TERM:
 | |
| 		if (node->term.ptr) {
 | |
| 			fts_ast_string_free(node->term.ptr);
 | |
| 			node->term.ptr = NULL;
 | |
| 		}
 | |
| 		break;
 | |
| 
 | |
| 	case FTS_AST_LIST:
 | |
| 	case FTS_AST_SUBEXP_LIST:
 | |
| 	case FTS_AST_PARSER_PHRASE_LIST:
 | |
| 		fts_ast_free_list(node);
 | |
| 		node->list.head = node->list.tail = NULL;
 | |
| 		break;
 | |
| 
 | |
| 	case FTS_AST_OPER:
 | |
| 		break;
 | |
| 
 | |
| 	default:
 | |
| 		ut_error;
 | |
| 	}
 | |
| 
 | |
| 	/*!< Get next node before freeing the node itself */
 | |
| 	next_node = node->next;
 | |
| 
 | |
| 	ut_free(node);
 | |
| 
 | |
| 	return(next_node);
 | |
| }
 | |
| 
 | |
| /******************************************************************//**
 | |
| This AST takes ownership of the expr and is responsible
 | |
| for free'ing it.
 | |
| @return in param "list" */
 | |
| fts_ast_node_t*
 | |
| fts_ast_add_node(
 | |
| /*=============*/
 | |
| 	fts_ast_node_t*	node,			/*!< in: list instance */
 | |
| 	fts_ast_node_t*	elem)			/*!< in: node to add to list */
 | |
| {
 | |
| 	if (!elem) {
 | |
| 		return(NULL);
 | |
| 	}
 | |
| 
 | |
| 	ut_a(!elem->next);
 | |
| 	ut_a(node->type == FTS_AST_LIST
 | |
| 	     || node->type == FTS_AST_SUBEXP_LIST
 | |
| 	     || node->type == FTS_AST_PARSER_PHRASE_LIST);
 | |
| 
 | |
| 	if (!node->list.head) {
 | |
| 		ut_a(!node->list.tail);
 | |
| 
 | |
| 		node->list.head = node->list.tail = elem;
 | |
| 	} else {
 | |
| 		ut_a(node->list.tail);
 | |
| 
 | |
| 		node->list.tail->next = elem;
 | |
| 		node->list.tail = elem;
 | |
| 	}
 | |
| 
 | |
| 	return(node);
 | |
| }
 | |
| 
 | |
| /******************************************************************//**
 | |
| Set the wildcard attribute of a term. */
 | |
| void
 | |
| fts_ast_term_set_wildcard(
 | |
| /*======================*/
 | |
| 	fts_ast_node_t*	node)			/*!< in/out: set attribute of
 | |
| 						a term node */
 | |
| {
 | |
| 	if (!node) {
 | |
| 		return;
 | |
| 	}
 | |
| 
 | |
| 	/* If it's a node list, the wildcard should be set to the tail node*/
 | |
| 	if (node->type == FTS_AST_LIST)	{
 | |
| 		ut_ad(node->list.tail != NULL);
 | |
| 		node = node->list.tail;
 | |
| 	}
 | |
| 
 | |
| 	ut_a(node->type == FTS_AST_TERM);
 | |
| 	ut_a(!node->term.wildcard);
 | |
| 
 | |
| 	node->term.wildcard = TRUE;
 | |
| }
 | |
| 
 | |
| /******************************************************************//**
 | |
| Set the proximity attribute of a text node. */
 | |
| void
 | |
| fts_ast_text_set_distance(
 | |
| /*======================*/
 | |
| 	fts_ast_node_t*	node,			/*!< in/out: text node */
 | |
| 	ulint		distance)		/*!< in: the text proximity
 | |
| 						distance */
 | |
| {
 | |
| 	if (node == NULL) {
 | |
| 		return;
 | |
| 	}
 | |
| 
 | |
| 	ut_a(node->type == FTS_AST_TEXT);
 | |
| 	ut_a(node->text.distance == ULINT_UNDEFINED);
 | |
| 
 | |
| 	node->text.distance = distance;
 | |
| }
 | |
| 
 | |
| /******************************************************************//**
 | |
| Free node and expr allocations. */
 | |
| void
 | |
| fts_ast_state_free(
 | |
| /*===============*/
 | |
| 	fts_ast_state_t*state)			/*!< in: ast state to free */
 | |
| {
 | |
| 	fts_ast_node_t*	node = state->list.head;
 | |
| 
 | |
| 	/* Free the nodes that were allocated during parsing. */
 | |
| 	while (node) {
 | |
| 		fts_ast_node_t*	next = node->next_alloc;
 | |
| 
 | |
| 		if (node->type == FTS_AST_TEXT && node->text.ptr) {
 | |
| 			fts_ast_string_free(node->text.ptr);
 | |
| 			node->text.ptr = NULL;
 | |
| 		} else if (node->type == FTS_AST_TERM && node->term.ptr) {
 | |
| 			fts_ast_string_free(node->term.ptr);
 | |
| 			node->term.ptr = NULL;
 | |
| 		}
 | |
| 
 | |
| 		ut_free(node);
 | |
| 		node = next;
 | |
| 	}
 | |
| 
 | |
| 	state->root = state->list.head = state->list.tail = NULL;
 | |
| }
 | |
| 
 | |
| /** Print the ast string
 | |
| @param[in] str		string to print */
 | |
| static
 | |
| void
 | |
| fts_ast_string_print(
 | |
| 	const fts_ast_string_t*	ast_str)
 | |
| {
 | |
| 	for (ulint i = 0; i < ast_str->len; ++i) {
 | |
| 		printf("%c", ast_str->str[i]);
 | |
| 	}
 | |
| 
 | |
| 	printf("\n");
 | |
| }
 | |
| 
 | |
| /******************************************************************//**
 | |
| Print an ast node recursively. */
 | |
| static
 | |
| void
 | |
| fts_ast_node_print_recursive(
 | |
| /*=========================*/
 | |
| 	fts_ast_node_t*	node,			/*!< in: ast node to print */
 | |
| 	ulint		level)			/*!< in: recursive level */
 | |
| {
 | |
| 	/* Print alignment blank */
 | |
| 	for (ulint i = 0; i < level; i++) {
 | |
| 		printf("  ");
 | |
| 	}
 | |
| 
 | |
| 	switch (node->type) {
 | |
| 	case FTS_AST_TEXT:
 | |
| 		printf("TEXT: ");
 | |
| 		fts_ast_string_print(node->text.ptr);
 | |
| 		break;
 | |
| 
 | |
| 	case FTS_AST_TERM:
 | |
| 		printf("TERM: ");
 | |
| 		fts_ast_string_print(node->term.ptr);
 | |
| 		break;
 | |
| 
 | |
| 	case FTS_AST_LIST:
 | |
| 		printf("LIST: \n");
 | |
| 
 | |
| 		for (node = node->list.head; node; node = node->next) {
 | |
| 			fts_ast_node_print_recursive(node, level + 1);
 | |
| 		}
 | |
| 		break;
 | |
| 
 | |
| 	case FTS_AST_SUBEXP_LIST:
 | |
| 		printf("SUBEXP_LIST: \n");
 | |
| 
 | |
| 		for (node = node->list.head; node; node = node->next) {
 | |
| 			fts_ast_node_print_recursive(node, level + 1);
 | |
| 		}
 | |
| 		break;
 | |
| 
 | |
| 	case FTS_AST_OPER:
 | |
| 		printf("OPER: %d\n", node->oper);
 | |
| 		break;
 | |
| 
 | |
| 	case FTS_AST_PARSER_PHRASE_LIST:
 | |
| 		printf("PARSER_PHRASE_LIST: \n");
 | |
| 
 | |
| 		for (node = node->list.head; node; node = node->next) {
 | |
| 			fts_ast_node_print_recursive(node, level + 1);
 | |
| 		}
 | |
| 		break;
 | |
| 
 | |
| 	default:
 | |
| 		ut_error;
 | |
| 	}
 | |
| }
 | |
| 
 | |
| /******************************************************************//**
 | |
| Print an ast node */
 | |
| void
 | |
| fts_ast_node_print(
 | |
| /*===============*/
 | |
| 	fts_ast_node_t* node)		/*!< in: ast node to print */
 | |
| {
 | |
| 	fts_ast_node_print_recursive(node, 0);
 | |
| }
 | |
| 
 | |
| /** Check only union operation involved in the node
 | |
| @param[in]	node	ast node to check
 | |
| @return true if the node contains only union else false. */
 | |
| bool
 | |
| fts_ast_node_check_union(
 | |
| 	fts_ast_node_t*	node)
 | |
| {
 | |
| 	if (node->type == FTS_AST_LIST
 | |
| 	    || node->type == FTS_AST_SUBEXP_LIST) {
 | |
| 
 | |
| 		for (node = node->list.head; node; node = node->next) {
 | |
| 			if (!fts_ast_node_check_union(node)) {
 | |
| 				return(false);
 | |
| 			}
 | |
| 		}
 | |
| 
 | |
| 	} else if (node->type == FTS_AST_PARSER_PHRASE_LIST) {
 | |
| 		/* Phrase search for plugin parser */
 | |
| 		return(false);
 | |
| 	} else if (node->type == FTS_AST_OPER
 | |
| 		   && (node->oper == FTS_IGNORE
 | |
| 		       || node->oper == FTS_EXIST)) {
 | |
| 
 | |
| 		return(false);
 | |
| 	} else if (node->type == FTS_AST_TEXT) {
 | |
| 		/* Distance or phrase search query. */
 | |
| 		return(false);
 | |
| 	}
 | |
| 
 | |
| 	return(true);
 | |
| }
 | |
| 
 | |
| /******************************************************************//**
 | |
| Traverse the AST - in-order traversal, except for the FTX_EXIST and FTS_IGNORE
 | |
| nodes, which will be ignored in the first pass of each level, and visited in a
 | |
| second and third pass after all other nodes in the same level are visited.
 | |
| @return DB_SUCCESS if all went well */
 | |
| dberr_t
 | |
| fts_ast_visit(
 | |
| /*==========*/
 | |
| 	fts_ast_oper_t		oper,		/*!< in: current operator */
 | |
| 	fts_ast_node_t*		node,		/*!< in: current root node */
 | |
| 	fts_ast_callback	visitor,	/*!< in: callback function */
 | |
| 	void*			arg,		/*!< in: arg for callback */
 | |
| 	bool*			has_ignore)	/*!< out: true, if the operator
 | |
| 						was ignored during processing,
 | |
| 						currently we ignore FTS_EXIST
 | |
| 						and FTS_IGNORE operators */
 | |
| {
 | |
| 	dberr_t			error = DB_SUCCESS;
 | |
| 	fts_ast_node_t*		oper_node = NULL;
 | |
| 	fts_ast_node_t*		start_node;
 | |
| 	bool			revisit = false;
 | |
| 	bool			will_be_ignored = false;
 | |
| 	fts_ast_visit_pass_t	visit_pass = FTS_PASS_FIRST;
 | |
| 	const trx_t*		trx = node->trx;
 | |
| 
 | |
| 	start_node = node->list.head;
 | |
| 
 | |
| 	ut_a(node->type == FTS_AST_LIST
 | |
| 	     || node->type == FTS_AST_SUBEXP_LIST);
 | |
| 
 | |
| 	if (oper == FTS_EXIST_SKIP) {
 | |
| 		visit_pass = FTS_PASS_EXIST;
 | |
| 	} else if (oper == FTS_IGNORE_SKIP) {
 | |
| 		visit_pass = FTS_PASS_IGNORE;
 | |
| 	}
 | |
| 
 | |
| 	/* In the first pass of the tree, at the leaf level of the
 | |
| 	tree, FTS_EXIST and FTS_IGNORE operation will be ignored.
 | |
| 	It will be repeated at the level above the leaf level.
 | |
| 
 | |
| 	The basic idea here is that when we encounter FTS_EXIST or
 | |
| 	FTS_IGNORE, we will change the operator node into FTS_EXIST_SKIP
 | |
| 	or FTS_IGNORE_SKIP, and term node & text node with the operators
 | |
| 	is ignored in the first pass. We have two passes during the revisit:
 | |
| 	We process nodes with FTS_EXIST_SKIP in the exist pass, and then
 | |
| 	process nodes with FTS_IGNORE_SKIP in the ignore pass.
 | |
| 
 | |
| 	The order should be restrictly followed, or we will get wrong results.
 | |
| 	For example, we have a query 'a +b -c d +e -f'.
 | |
| 	first pass: process 'a' and 'd' by union;
 | |
| 	exist pass: process '+b' and '+e' by intersection;
 | |
| 	ignore pass: process '-c' and '-f' by difference. */
 | |
| 
 | |
| 	for (node = node->list.head;
 | |
| 	     node && (error == DB_SUCCESS);
 | |
| 	     node = node->next) {
 | |
| 
 | |
| 		switch (node->type) {
 | |
| 		case FTS_AST_LIST:
 | |
| 			if (visit_pass != FTS_PASS_FIRST) {
 | |
| 				break;
 | |
| 			}
 | |
| 
 | |
| 			error = fts_ast_visit(oper, node, visitor,
 | |
| 					      arg, &will_be_ignored);
 | |
| 
 | |
| 			/* If will_be_ignored is set to true, then
 | |
| 			we encountered and ignored a FTS_EXIST or FTS_IGNORE
 | |
| 			operator. */
 | |
| 			if (will_be_ignored) {
 | |
| 				revisit = true;
 | |
| 				/* Remember oper for list in case '-abc&def',
 | |
| 				ignored oper is from previous node of list.*/
 | |
| 				node->oper = oper;
 | |
| 			}
 | |
| 
 | |
| 			break;
 | |
| 
 | |
| 		case FTS_AST_OPER:
 | |
| 			oper = node->oper;
 | |
| 			oper_node = node;
 | |
| 
 | |
| 			/* Change the operator for revisit */
 | |
| 			if (oper == FTS_EXIST) {
 | |
| 				oper_node->oper = FTS_EXIST_SKIP;
 | |
| 			} else if (oper == FTS_IGNORE) {
 | |
| 				oper_node->oper = FTS_IGNORE_SKIP;
 | |
| 			}
 | |
| 
 | |
| 			break;
 | |
| 
 | |
| 		default:
 | |
| 			if (node->visited) {
 | |
| 				continue;
 | |
| 			}
 | |
| 
 | |
| 			ut_a(oper == FTS_NONE || !oper_node
 | |
| 			     || oper_node->oper == oper
 | |
| 			     || oper_node->oper == FTS_EXIST_SKIP
 | |
| 			     || oper_node->oper == FTS_IGNORE_SKIP);
 | |
| 
 | |
| 			if (oper== FTS_EXIST || oper == FTS_IGNORE) {
 | |
| 				*has_ignore = true;
 | |
| 				continue;
 | |
| 			}
 | |
| 
 | |
| 			/* Process leaf node accroding to its pass.*/
 | |
| 			if (oper == FTS_EXIST_SKIP
 | |
| 			    && visit_pass == FTS_PASS_EXIST) {
 | |
| 				error = visitor(FTS_EXIST, node, arg);
 | |
| 				node->visited = true;
 | |
| 			} else if (oper == FTS_IGNORE_SKIP
 | |
| 				   && visit_pass == FTS_PASS_IGNORE) {
 | |
| 				error = visitor(FTS_IGNORE, node, arg);
 | |
| 				node->visited = true;
 | |
| 			} else if (visit_pass == FTS_PASS_FIRST) {
 | |
| 				error = visitor(oper, node, arg);
 | |
| 				node->visited = true;
 | |
| 			}
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	if (trx_is_interrupted(trx)) {
 | |
| 		return DB_INTERRUPTED;
 | |
| 	}
 | |
| 
 | |
| 	if (revisit) {
 | |
| 		/* Exist pass processes the skipped FTS_EXIST operation. */
 | |
|                 for (node = start_node;
 | |
| 		     node && error == DB_SUCCESS;
 | |
| 		     node = node->next) {
 | |
| 
 | |
| 			if (node->type == FTS_AST_LIST
 | |
| 			    && node->oper != FTS_IGNORE) {
 | |
| 				error = fts_ast_visit(FTS_EXIST_SKIP, node,
 | |
| 					visitor, arg, &will_be_ignored);
 | |
| 			}
 | |
| 		}
 | |
| 
 | |
| 		/* Ignore pass processes the skipped FTS_IGNORE operation. */
 | |
| 		for (node = start_node;
 | |
| 		     node && error == DB_SUCCESS;
 | |
| 		     node = node->next) {
 | |
| 
 | |
| 			if (node->type == FTS_AST_LIST) {
 | |
| 				error = fts_ast_visit(FTS_IGNORE_SKIP, node,
 | |
| 					visitor, arg, &will_be_ignored);
 | |
| 			}
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	return(error);
 | |
| }
 | |
| 
 | |
| /**
 | |
| Create an ast string object, with NUL-terminator, so the string
 | |
| has one more byte than len
 | |
| @param[in] str		pointer to string
 | |
| @param[in] len		length of the string
 | |
| @return ast string with NUL-terminator */
 | |
| fts_ast_string_t*
 | |
| fts_ast_string_create(
 | |
| 	const byte*	str,
 | |
| 	ulint		len)
 | |
| {
 | |
| 	fts_ast_string_t*	ast_str;
 | |
| 
 | |
| 	ut_ad(len > 0);
 | |
| 
 | |
| 	ast_str = static_cast<fts_ast_string_t*>(
 | |
| 		ut_malloc_nokey(sizeof(fts_ast_string_t)));
 | |
| 
 | |
| 	ast_str->str = static_cast<byte*>(ut_malloc_nokey(len + 1));
 | |
| 
 | |
| 	ast_str->len = len;
 | |
| 	memcpy(ast_str->str, str, len);
 | |
| 	ast_str->str[len] = '\0';
 | |
| 
 | |
| 	return(ast_str);
 | |
| }
 | |
| 
 | |
| /**
 | |
| Free an ast string instance
 | |
| @param[in,out] ast_str		string to free */
 | |
| void
 | |
| fts_ast_string_free(
 | |
| 	fts_ast_string_t*	ast_str)
 | |
| {
 | |
| 	if (ast_str != NULL) {
 | |
| 		ut_free(ast_str->str);
 | |
| 		ut_free(ast_str);
 | |
| 	}
 | |
| }
 | |
| 
 | |
| /**
 | |
| Translate ast string of type FTS_AST_NUMB to unsigned long by strtoul
 | |
| @param[in] str		string to translate
 | |
| @param[in] base		the base
 | |
| @return translated number */
 | |
| ulint
 | |
| fts_ast_string_to_ul(
 | |
| 	const fts_ast_string_t*	ast_str,
 | |
| 	int			base)
 | |
| {
 | |
| 	return(strtoul(reinterpret_cast<const char*>(ast_str->str),
 | |
| 		       NULL, base));
 | |
| }
 | |
| 
 | |
| #ifdef UNIV_DEBUG
 | |
| const char*
 | |
| fts_ast_node_type_get(fts_ast_type_t	type)
 | |
| {
 | |
| 	switch (type) {
 | |
| 	case FTS_AST_OPER:
 | |
| 		return("FTS_AST_OPER");
 | |
| 	case FTS_AST_NUMB:
 | |
| 		return("FTS_AST_NUMB");
 | |
| 	case FTS_AST_TERM:
 | |
| 		return("FTS_AST_TERM");
 | |
| 	case FTS_AST_TEXT:
 | |
| 		return("FTS_AST_TEXT");
 | |
| 	case FTS_AST_LIST:
 | |
| 		return("FTS_AST_LIST");
 | |
| 	case FTS_AST_SUBEXP_LIST:
 | |
| 		return("FTS_AST_SUBEXP_LIST");
 | |
| 	case FTS_AST_PARSER_PHRASE_LIST:
 | |
| 		return("FTS_AST_PARSER_PHRASE_LIST");
 | |
| 	}
 | |
| 	ut_ad(0);
 | |
| 	return("FTS_UNKNOWN");
 | |
| }
 | |
| #endif /* UNIV_DEBUG */
 |