mirror of
				https://github.com/MariaDB/server.git
				synced 2025-10-31 02:46:29 +01:00 
			
		
		
		
	
		
			
				
	
	
		
			222 lines
		
	
	
	
		
			7.6 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			222 lines
		
	
	
	
		
			7.6 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| /* Copyright (c) 2005 MySQL AB, 2009 Sun Microsystems, Inc.
 | |
|    Use is subject to license terms.
 | |
| 
 | |
|    This program is free software; you can redistribute it and/or modify
 | |
|    it under the terms of the GNU General Public License as published by
 | |
|    the Free Software Foundation; version 2 of the License.
 | |
| 
 | |
|    This program is distributed in the hope that it will be useful,
 | |
|    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | |
|    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | |
|    GNU General Public License for more details.
 | |
| 
 | |
|    You should have received a copy of the GNU General Public License
 | |
|    along with this program; if not, write to the Free Software
 | |
|    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335  USA */
 | |
| 
 | |
| #ifndef _my_plugin_ftparser_h
 | |
| #define _my_plugin_ftparser_h
 | |
| #include "plugin.h"
 | |
| 
 | |
| #ifdef __cplusplus
 | |
| extern "C" {
 | |
| #endif
 | |
| 
 | |
| /*************************************************************************
 | |
|   API for Full-text parser plugin. (MYSQL_FTPARSER_PLUGIN)
 | |
| */
 | |
| 
 | |
| #define MYSQL_FTPARSER_INTERFACE_VERSION 0x0100
 | |
| 
 | |
| /* Parsing modes. Set in  MYSQL_FTPARSER_PARAM::mode */
 | |
| enum enum_ftparser_mode
 | |
| {
 | |
| /*
 | |
|   Fast and simple mode.  This mode is used for indexing, and natural
 | |
|   language queries.
 | |
| 
 | |
|   The parser is expected to return only those words that go into the
 | |
|   index. Stopwords or too short/long words should not be returned. The
 | |
|   'boolean_info' argument of mysql_add_word() does not have to be set.
 | |
| */
 | |
|   MYSQL_FTPARSER_SIMPLE_MODE= 0,
 | |
| 
 | |
| /*
 | |
|   Parse with stopwords mode.  This mode is used in boolean searches for
 | |
|   "phrase matching."
 | |
| 
 | |
|   The parser is not allowed to ignore words in this mode.  Every word
 | |
|   should be returned, including stopwords and words that are too short
 | |
|   or long.  The 'boolean_info' argument of mysql_add_word() does not
 | |
|   have to be set.
 | |
| */
 | |
|   MYSQL_FTPARSER_WITH_STOPWORDS= 1,
 | |
| 
 | |
| /*
 | |
|   Parse in boolean mode.  This mode is used to parse a boolean query string.
 | |
| 
 | |
|   The parser should provide a valid MYSQL_FTPARSER_BOOLEAN_INFO
 | |
|   structure in the 'boolean_info' argument to mysql_add_word().
 | |
|   Usually that means that the parser should recognize boolean operators
 | |
|   in the parsing stream and set appropriate fields in
 | |
|   MYSQL_FTPARSER_BOOLEAN_INFO structure accordingly.  As for
 | |
|   MYSQL_FTPARSER_WITH_STOPWORDS mode, no word should be ignored.
 | |
|   Instead, use FT_TOKEN_STOPWORD for the token type of such a word.
 | |
| */
 | |
|   MYSQL_FTPARSER_FULL_BOOLEAN_INFO= 2
 | |
| };
 | |
| 
 | |
| /*
 | |
|   Token types for boolean mode searching (used for the type member of
 | |
|   MYSQL_FTPARSER_BOOLEAN_INFO struct)
 | |
| 
 | |
|   FT_TOKEN_EOF: End of data.
 | |
|   FT_TOKEN_WORD: Regular word.
 | |
|   FT_TOKEN_LEFT_PAREN: Left parenthesis (start of group/sub-expression).
 | |
|   FT_TOKEN_RIGHT_PAREN: Right parenthesis (end of group/sub-expression).
 | |
|   FT_TOKEN_STOPWORD: Stopword.
 | |
| */
 | |
| 
 | |
| enum enum_ft_token_type
 | |
| {
 | |
|   FT_TOKEN_EOF= 0,
 | |
|   FT_TOKEN_WORD= 1,
 | |
|   FT_TOKEN_LEFT_PAREN= 2,
 | |
|   FT_TOKEN_RIGHT_PAREN= 3,
 | |
|   FT_TOKEN_STOPWORD= 4
 | |
| };
 | |
| 
 | |
| /*
 | |
|   This structure is used in boolean search mode only. It conveys
 | |
|   boolean-mode metadata to the MySQL search engine for every word in
 | |
|   the search query. A valid instance of this structure must be filled
 | |
|   in by the plugin parser and passed as an argument in the call to
 | |
|   mysql_add_word (the callback function in the MYSQL_FTPARSER_PARAM
 | |
|   structure) when a query is parsed in boolean mode.
 | |
| 
 | |
|   type: The token type.  Should be one of the enum_ft_token_type values.
 | |
| 
 | |
|   yesno: Whether the word must be present for a match to occur:
 | |
|     >0 Must be present
 | |
|     <0 Must not be present
 | |
|     0  Neither; the word is optional but its presence increases the relevance
 | |
|   With the default settings of the ft_boolean_syntax system variable,
 | |
|   >0 corresponds to the '+' operator, <0 corresponds to the '-' operator,
 | |
|   and 0 means neither operator was used.
 | |
| 
 | |
|   weight_adjust: A weighting factor that determines how much a match
 | |
|   for the word counts.  Positive values increase, negative - decrease the
 | |
|   relative word's importance in the query.
 | |
| 
 | |
|   wasign: The sign of the word's weight in the query. If it's non-negative
 | |
|   the match for the word will increase document relevance, if it's
 | |
|   negative - decrease (the word becomes a "noise word", the less of it the
 | |
|   better).
 | |
| 
 | |
|   trunc: Corresponds to the '*' operator in the default setting of the
 | |
|   ft_boolean_syntax system variable.
 | |
| */
 | |
| 
 | |
| typedef struct st_mysql_ftparser_boolean_info
 | |
| {
 | |
|   enum enum_ft_token_type type;
 | |
|   int yesno;
 | |
|   int weight_adjust;
 | |
|   char wasign;
 | |
|   char trunc;
 | |
|   /* These are parser state and must be removed. */
 | |
|   char prev;
 | |
|   char *quot;
 | |
| } MYSQL_FTPARSER_BOOLEAN_INFO;
 | |
| 
 | |
| /*
 | |
|   The following flag means that buffer with a string (document, word)
 | |
|   may be overwritten by the caller before the end of the parsing (that is
 | |
|   before st_mysql_ftparser::deinit() call). If one needs the string
 | |
|   to survive between two successive calls of the parsing function, she
 | |
|   needs to save a copy of it. The flag may be set by MySQL before calling
 | |
|   st_mysql_ftparser::parse(), or it may be set by a plugin before calling
 | |
|   st_mysql_ftparser_param::mysql_parse() or
 | |
|   st_mysql_ftparser_param::mysql_add_word().
 | |
| */
 | |
| #define MYSQL_FTFLAGS_NEED_COPY 1
 | |
| 
 | |
| /*
 | |
|   An argument of the full-text parser plugin. This structure is
 | |
|   filled in by MySQL server and passed to the parsing function of the
 | |
|   plugin as an in/out parameter.
 | |
| 
 | |
|   mysql_parse: A pointer to the built-in parser implementation of the
 | |
|   server. It's set by the server and can be used by the parser plugin
 | |
|   to invoke the MySQL default parser.  If plugin's role is to extract
 | |
|   textual data from .doc, .pdf or .xml content, it might extract
 | |
|   plaintext from the content, and then pass the text to the default
 | |
|   MySQL parser to be parsed.
 | |
| 
 | |
|   mysql_add_word: A server callback to add a new word.  When parsing
 | |
|   a document, the server sets this to point at a function that adds
 | |
|   the word to MySQL full-text index.  When parsing a search query,
 | |
|   this function will add the new word to the list of words to search
 | |
|   for.  The boolean_info argument can be NULL for all cases except
 | |
|   when mode is MYSQL_FTPARSER_FULL_BOOLEAN_INFO. A plugin can replace this
 | |
|   callback to post-process every parsed word before passing it to the original
 | |
|   mysql_add_word function.
 | |
| 
 | |
|   ftparser_state: A generic pointer. The plugin can set it to point
 | |
|   to information to be used internally for its own purposes.
 | |
| 
 | |
|   mysql_ftparam: This is set by the server.  It is used by MySQL functions
 | |
|   called via mysql_parse() and mysql_add_word() callback.  The plugin
 | |
|   should not modify it.
 | |
| 
 | |
|   cs: Information about the character set of the document or query string.
 | |
| 
 | |
|   doc: A pointer to the document or query string to be parsed.
 | |
| 
 | |
|   length: Length of the document or query string, in bytes.
 | |
| 
 | |
|   flags: See MYSQL_FTFLAGS_* constants above.
 | |
| 
 | |
|   mode: The parsing mode.  With boolean operators, with stopwords, or
 | |
|   nothing.  See  enum_ftparser_mode above.
 | |
| */
 | |
| 
 | |
| typedef struct st_mysql_ftparser_param
 | |
| {
 | |
|   int (*mysql_parse)(struct st_mysql_ftparser_param *,
 | |
|                      const char *doc, int doc_len);
 | |
|   int (*mysql_add_word)(struct st_mysql_ftparser_param *,
 | |
|                         const char *word, int word_len,
 | |
|                         MYSQL_FTPARSER_BOOLEAN_INFO *boolean_info);
 | |
|   void *ftparser_state;
 | |
|   void *mysql_ftparam;
 | |
|   const struct charset_info_st *cs;
 | |
|   const char *doc;
 | |
|   int length;
 | |
|   unsigned int flags;
 | |
|   enum enum_ftparser_mode mode;
 | |
| } MYSQL_FTPARSER_PARAM;
 | |
| 
 | |
| /*
 | |
|   Full-text parser descriptor.
 | |
| 
 | |
|   interface_version is, e.g., MYSQL_FTPARSER_INTERFACE_VERSION.
 | |
|   The parsing, initialization, and deinitialization functions are
 | |
|   invoked per SQL statement for which the parser is used.
 | |
| */
 | |
| 
 | |
| struct st_mysql_ftparser
 | |
| {
 | |
|   int interface_version;
 | |
|   int (*parse)(MYSQL_FTPARSER_PARAM *param);
 | |
|   int (*init)(MYSQL_FTPARSER_PARAM *param);
 | |
|   int (*deinit)(MYSQL_FTPARSER_PARAM *param);
 | |
| };
 | |
| 
 | |
| 
 | |
| #ifdef __cplusplus
 | |
| }
 | |
| #endif
 | |
| 
 | |
| #endif
 | |
| 
 | 
