Update Mroonga to the latest version on 2014-10-21T04:51:38+0900

This commit is contained in:
Kentoku SHIBA 2014-10-21 04:51:38 +09:00
commit 7f3d555087
484 changed files with 6263 additions and 3314 deletions

View file

@ -116,6 +116,7 @@ typedef enum {
GRN_CAS_ERROR = -70,
GRN_UNSUPPORTED_COMMAND_VERSION = -71,
GRN_NORMALIZER_ERROR = -72,
GRN_TOKEN_FILTER_ERROR = -73,
} grn_rc;
GRN_API grn_rc grn_init(void);
@ -478,7 +479,8 @@ typedef enum {
GRN_PROC_COMMAND,
GRN_PROC_FUNCTION,
GRN_PROC_HOOK,
GRN_PROC_NORMALIZER
GRN_PROC_NORMALIZER,
GRN_PROC_TOKEN_FILTER
} grn_proc_type;
GRN_API grn_obj *grn_proc_create(grn_ctx *ctx,
@ -790,7 +792,8 @@ typedef enum {
GRN_INFO_II_SPLIT_THRESHOLD,
GRN_INFO_SUPPORT_ZLIB,
GRN_INFO_SUPPORT_LZO,
GRN_INFO_NORMALIZER
GRN_INFO_NORMALIZER,
GRN_INFO_TOKEN_FILTERS
} grn_info_type;
GRN_API grn_obj *grn_obj_get_info(grn_ctx *ctx, grn_obj *obj, grn_info_type type, grn_obj *valuebuf);
@ -1692,6 +1695,8 @@ GRN_API grn_obj *grn_expr_append_const_int(grn_ctx *ctx, grn_obj *expr, int i,
grn_operator op, int nargs);
GRN_API grn_rc grn_expr_append_op(grn_ctx *ctx, grn_obj *expr, grn_operator op, int nargs);
GRN_API grn_rc grn_expr_get_keywords(grn_ctx *ctx, grn_obj *expr, grn_obj *keywords);
GRN_API grn_rc grn_expr_syntax_escape(grn_ctx *ctx,
const char *query, int query_size,
const char *target_characters,

View file

@ -2,5 +2,6 @@ groonga_includedir = $(pkgincludedir)/groonga
groonga_include_HEADERS = \
plugin.h \
tokenizer.h \
token_filter.h \
nfkc.h \
normalizer.h

View file

@ -0,0 +1,71 @@
/* -*- c-basic-offset: 2 -*- */
/*
Copyright(C) 2014 Brazil
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License version 2.1 as published by the Free Software Foundation.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef GRN_PLUGIN_TOKEN_FILTER_H
#define GRN_PLUGIN_TOKEN_FILTER_H
#include <stddef.h>
#include <groonga/tokenizer.h>
#ifdef __cplusplus
extern "C" {
#endif /* __cplusplus */
typedef void *grn_token_filter_init_func(grn_ctx *ctx,
grn_obj *table,
grn_token_mode mode);
typedef void grn_token_filter_filter_func(grn_ctx *ctx,
grn_token *current_token,
grn_token *next_token,
void *user_data);
typedef void grn_token_filter_fin_func(grn_ctx *ctx,
void *user_data);
/*
grn_token_filter_register() registers a plugin to the database which is
associated with `ctx'. `plugin_name_ptr' and `plugin_name_length' specify the
plugin name. Alphabetic letters ('A'-'Z' and 'a'-'z'), digits ('0'-'9') and
an underscore ('_') are capable characters.
`init', `filter' and `fin' specify the plugin functions.
`init' is called for initializing a token_filter for a document or
query.
`filter' is called for filtering tokens one by one.
`fin' is called for finalizing a token_filter.
grn_token_filter_register() returns GRN_SUCCESS on success, an error
code on failure.
*/
GRN_PLUGIN_EXPORT grn_rc grn_token_filter_register(grn_ctx *ctx,
const char *plugin_name_ptr,
int plugin_name_length,
grn_token_filter_init_func *init,
grn_token_filter_filter_func *filter,
grn_token_filter_fin_func *fin);
#ifdef __cplusplus
} /* extern "C" */
#endif /* __cplusplus */
#endif /* GRN_PLUGIN_TOKEN_FILTER_H */

View file

@ -29,6 +29,23 @@ extern "C" {
#define GRN_TOKENIZER_TOKENIZED_DELIMITER_UTF8 "\xEF\xBF\xBE"
#define GRN_TOKENIZER_TOKENIZED_DELIMITER_UTF8_LEN 3
/*
grn_token_mode describes propose for tokenization.
`GRN_TOKEN_GET`: Tokenization for search.
`GRN_TOKEN_ADD`: Tokenization for adding token to index.
`GRN_TOKEN_DEL`: Tokenization for deleting token from index.
@since 4.0.7
*/
typedef enum {
GRN_TOKEN_GET = 0,
GRN_TOKEN_ADD,
GRN_TOKEN_DEL
} grn_token_mode;
/*
grn_tokenizer_charlen() returns the length (#bytes) of the first character
in the string specified by `str_ptr' and `str_length'. If the starting bytes
@ -84,7 +101,7 @@ struct _grn_tokenizer_query {
grn_encoding encoding;
unsigned int flags;
grn_bool have_tokenized_delimiter;
unsigned int token_mode;
grn_token_mode token_mode;
};
/*
@ -176,6 +193,21 @@ typedef unsigned int grn_tokenizer_status;
#define GRN_TOKENIZER_CONTINUE GRN_TOKENIZER_TOKEN_CONTINUE
#define GRN_TOKENIZER_LAST GRN_TOKENIZER_TOKEN_LAST
typedef struct _grn_token grn_token;
GRN_PLUGIN_EXPORT grn_obj *grn_token_get_data(grn_ctx *ctx,
grn_token *token);
GRN_PLUGIN_EXPORT grn_rc grn_token_set_data(grn_ctx *ctx,
grn_token *token,
const char *str_ptr,
int str_length);
GRN_PLUGIN_EXPORT grn_tokenizer_status grn_token_get_status(grn_ctx *ctx,
grn_token *token);
GRN_PLUGIN_EXPORT grn_rc grn_token_set_status(grn_ctx *ctx,
grn_token *token,
grn_tokenizer_status status);
/*
grn_tokenizer_token_push() pushes the next token into `token'. Note that
grn_tokenizer_token_push() does not make a copy of the given string. This