Update Mroonga to the latest version on 2014-09-21T00:33:44+0900

This commit is contained in:
Kentoku SHIBA 2014-09-21 00:33:45 +09:00
commit 0cc855cdc8
2027 changed files with 460307 additions and 0 deletions

View file

@ -0,0 +1,20 @@
# Copyright(C) 2012 Brazil
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License version 2.1 as published by the Free Software Foundation.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
if(NOT MRN_GROONGA_BUNDLED)
install(FILES groonga.h DESTINATION "${GRN_INCLUDE_DIR}")
install(DIRECTORY groonga DESTINATION "${GRN_INCLUDE_DIR}"
FILES_MATCHING PATTERN "*.h")
endif()

View file

@ -0,0 +1,6 @@
SUBDIRS = groonga
pkginclude_HEADERS = groonga.h
EXTRA_DIST = \
CMakeLists.txt

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,6 @@
groonga_includedir = $(pkgincludedir)/groonga
groonga_include_HEADERS = \
plugin.h \
tokenizer.h \
nfkc.h \
normalizer.h

View file

@ -0,0 +1,32 @@
/* -*- c-basic-offset: 2 -*- */
/* Copyright(C) 2009 Brazil
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License version 2.1 as published by the Free Software Foundation.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef GRN_NFKC_H
#define GRN_NFKC_H
#include <groonga.h>
#ifdef __cplusplus
extern "C" {
#endif
GRN_API grn_char_type grn_nfkc_char_type(const unsigned char *str);
#ifdef __cplusplus
}
#endif
#endif /* GRN_NFKC_H */

View file

@ -0,0 +1,55 @@
/* -*- c-basic-offset: 2 -*- */
/*
Copyright(C) 2012 Brazil
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License version 2.1 as published by the Free Software Foundation.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef GROONGA_NORMALIER_H
#define GROONGA_NORMALIER_H
#include <stddef.h>
#include <groonga/plugin.h>
#ifdef __cplusplus
extern "C" {
#endif /* __cplusplus */
/*
grn_normalizer_register() registers a normalizer to the database
which is associated with `ctx'. `name_ptr' and `name_length' specify
the normalizer name. `name_length' can be `-1'. `-1' means that
`name_ptr` is NULL-terminated. Alphabetic letters ('A'-'Z' and
'a'-'z'), digits ('0'-'9') and an underscore ('_') are capable
characters. `init', `next' and `fin' specify the normalizer
functions. `init' is called for initializing a tokenizer for a
document or query. `next' is called for extracting tokens one by
one. `fin' is called for finalizing a
tokenizer. grn_tokenizer_register() returns GRN_SUCCESS on success,
an error code on failure. See "groonga.h" for more details of
grn_proc_func and grn_user_data, that is used as an argument of
grn_proc_func.
*/
GRN_PLUGIN_EXPORT grn_rc grn_normalizer_register(grn_ctx *ctx,
const char *name_ptr,
int name_length,
grn_proc_func *init,
grn_proc_func *next,
grn_proc_func *fin);
#ifdef __cplusplus
} /* extern "C" */
#endif /* __cplusplus */
#endif /* GROONGA_NORMALIER_H */

View file

@ -0,0 +1,152 @@
/* -*- c-basic-offset: 2 -*- */
/*
Copyright(C) 2010-2014 Brazil
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License version 2.1 as published by the Free Software Foundation.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef GRN_PLUGIN_H
#define GRN_PLUGIN_H
#include <stddef.h>
#include <groonga.h>
#ifdef __cplusplus
extern "C" {
#endif
#define GRN_PLUGIN_INIT grn_plugin_impl_init
#define GRN_PLUGIN_REGISTER grn_plugin_impl_register
#define GRN_PLUGIN_FIN grn_plugin_impl_fin
#if defined(_WIN32) || defined(_WIN64)
# define GRN_PLUGIN_EXPORT __declspec(dllexport)
#else /* defined(_WIN32) || defined(_WIN64) */
# define GRN_PLUGIN_EXPORT
#endif /* defined(_WIN32) || defined(_WIN64) */
GRN_PLUGIN_EXPORT grn_rc GRN_PLUGIN_INIT(grn_ctx *ctx);
GRN_PLUGIN_EXPORT grn_rc GRN_PLUGIN_REGISTER(grn_ctx *ctx);
GRN_PLUGIN_EXPORT grn_rc GRN_PLUGIN_FIN(grn_ctx *ctx);
/*
Don't call these functions directly. Use GRN_PLUGIN_MALLOC(),
GRN_PLUGIN_REALLOC() and GRN_PLUGIN_FREE() instead.
*/
GRN_API void *grn_plugin_malloc(grn_ctx *ctx, size_t size, const char *file,
int line, const char *func);
GRN_API void *grn_plugin_realloc(grn_ctx *ctx, void *ptr, size_t size,
const char *file, int line, const char *func);
GRN_API void grn_plugin_free(grn_ctx *ctx, void *ptr, const char *file,
int line, const char *func);
#define GRN_PLUGIN_MALLOC(ctx, size) \
grn_plugin_malloc((ctx), (size), __FILE__, __LINE__, __FUNCTION__)
#define GRN_PLUGIN_REALLOC(ctx, ptr, size) \
grn_plugin_realloc((ctx), (ptr), (size), __FILE__, __LINE__, __FUNCTION__)
#define GRN_PLUGIN_FREE(ctx, ptr) \
grn_plugin_free((ctx), (ptr), __FILE__, __LINE__, __FUNCTION__)
#define GRN_PLUGIN_LOG(ctx, level, ...) \
GRN_LOG((ctx), (level), __VA_ARGS__)
/*
Don't call grn_plugin_set_error() directly. This function is used in
GRN_PLUGIN_SET_ERROR().
*/
GRN_API void grn_plugin_set_error(grn_ctx *ctx, grn_log_level level,
grn_rc error_code,
const char *file, int line, const char *func,
const char *format, ...) GRN_ATTRIBUTE_PRINTF(7);
/*
Don't call these functions directly. grn_plugin_backtrace() and
grn_plugin_logtrace() are used in GRN_PLUGIN_SET_ERROR().
*/
GRN_API void grn_plugin_backtrace(grn_ctx *ctx);
GRN_API void grn_plugin_logtrace(grn_ctx *ctx, grn_log_level level);
/*
Don't use GRN_PLUGIN_SET_ERROR() directly. This macro is used in
GRN_PLUGIN_ERROR().
*/
#define GRN_PLUGIN_SET_ERROR(ctx, level, error_code, ...) do { \
grn_plugin_set_error(ctx, level, error_code, \
__FILE__, __LINE__, __FUNCTION__, __VA_ARGS__); \
GRN_LOG(ctx, level, __VA_ARGS__); \
grn_plugin_backtrace(ctx); \
grn_plugin_logtrace(ctx, level); \
} while (0)
#define GRN_PLUGIN_ERROR(ctx, error_code, ...) \
GRN_PLUGIN_SET_ERROR(ctx, GRN_LOG_ERROR, error_code, __VA_ARGS__)
typedef struct _grn_plugin_mutex grn_plugin_mutex;
GRN_API grn_plugin_mutex *grn_plugin_mutex_open(grn_ctx *ctx);
/*
grn_plugin_mutex_create() is deprecated. Use grn_plugin_mutex_open()
instead.
*/
GRN_API grn_plugin_mutex *grn_plugin_mutex_create(grn_ctx *ctx);
GRN_API void grn_plugin_mutex_close(grn_ctx *ctx, grn_plugin_mutex *mutex);
/*
grn_plugin_mutex_destroy() is deprecated. Use grn_plugin_mutex_close()
instead.
*/
GRN_API void grn_plugin_mutex_destroy(grn_ctx *ctx, grn_plugin_mutex *mutex);
GRN_API void grn_plugin_mutex_lock(grn_ctx *ctx, grn_plugin_mutex *mutex);
GRN_API void grn_plugin_mutex_unlock(grn_ctx *ctx, grn_plugin_mutex *mutex);
GRN_API grn_obj *grn_plugin_proc_alloc(grn_ctx *ctx, grn_user_data *user_data,
grn_id domain, grn_obj_flags flags);
GRN_API grn_obj *grn_plugin_proc_get_var(grn_ctx *ctx, grn_user_data *user_data,
const char *name, int name_size);
GRN_API grn_obj *grn_plugin_proc_get_var_by_offset(grn_ctx *ctx,
grn_user_data *user_data,
unsigned int offset);
GRN_API const char *grn_plugin_win32_base_dir(void);
GRN_API int grn_plugin_charlen(grn_ctx *ctx, const char *str_ptr,
unsigned int str_length, grn_encoding encoding);
GRN_API int grn_plugin_isspace(grn_ctx *ctx, const char *str_ptr,
unsigned int str_length, grn_encoding encoding);
GRN_API grn_rc grn_plugin_expr_var_init(grn_ctx *ctx,
grn_expr_var *var,
const char *name,
int name_size);
GRN_API grn_obj * grn_plugin_command_create(grn_ctx *ctx,
const char *name,
int name_size,
grn_proc_func func,
unsigned int n_vars,
grn_expr_var *vars);
#ifdef __cplusplus
}
#endif
#endif /* GRN_PLUGIN_H */

View file

@ -0,0 +1,225 @@
/* -*- c-basic-offset: 2 -*- */
/*
Copyright(C) 2012 Brazil
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License version 2.1 as published by the Free Software Foundation.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef GRN_PLUGIN_TOKENIZER_H
#define GRN_PLUGIN_TOKENIZER_H
#include <stddef.h>
#include <groonga/plugin.h>
#ifdef __cplusplus
extern "C" {
#endif /* __cplusplus */
#define GRN_TOKENIZER_TOKENIZED_DELIMITER_UTF8 "\xEF\xBF\xBE"
#define GRN_TOKENIZER_TOKENIZED_DELIMITER_UTF8_LEN 3
/*
grn_tokenizer_charlen() returns the length (#bytes) of the first character
in the string specified by `str_ptr' and `str_length'. If the starting bytes
are invalid as a character, grn_tokenizer_charlen() returns 0. See
grn_encoding in "groonga.h" for more details of `encoding'
Deprecated. Use grn_plugin_charlen() instead.
*/
int grn_tokenizer_charlen(grn_ctx *ctx, const char *str_ptr,
unsigned int str_length, grn_encoding encoding);
/*
grn_tokenizer_isspace() returns the length (#bytes) of the first character
in the string specified by `str_ptr' and `str_length' if it is a space
character. Otherwise, grn_tokenizer_isspace() returns 0.
Deprecated. Use grn_plugin_isspace() instead.
*/
int grn_tokenizer_isspace(grn_ctx *ctx, const char *str_ptr,
unsigned int str_length, grn_encoding encoding);
/*
grn_tokenizer_is_tokenized_delimiter() returns whether is the first
character in the string specified by `str_ptr' and `str_length' the
special tokenized delimiter character or not.
*/
grn_bool grn_tokenizer_is_tokenized_delimiter(grn_ctx *ctx,
const char *str_ptr,
unsigned int str_length,
grn_encoding encoding);
/*
grn_tokenizer_have_tokenized_delimiter() returns whether is there
the special delimiter character in the string specified by `str_ptr'
and `str_length' the special tokenized delimiter character or not.
*/
GRN_PLUGIN_EXPORT grn_bool grn_tokenizer_have_tokenized_delimiter(grn_ctx *ctx,
const char *str_ptr,
unsigned int str_length,
grn_encoding encoding);
/*
grn_tokenizer_query is a structure for storing a query. See the following
functions.
*/
typedef struct _grn_tokenizer_query grn_tokenizer_query;
struct _grn_tokenizer_query {
grn_obj *normalized_query;
char *query_buf;
const char *ptr;
unsigned int length;
grn_encoding encoding;
unsigned int flags;
grn_bool have_tokenized_delimiter;
unsigned int token_mode;
};
/*
grn_tokenizer_query_open() parses `args' and returns a new object of
grn_tokenizer_query. The new object stores information of the query.
grn_tokenizer_query_open() normalizes the query if the target table
requires normalization. grn_tokenizer_query_open() returns NULL if
something goes wrong. Note that grn_tokenizer_query_open() must be called
just once in the function that initializes a tokenizer.
See `GRN_STRING_*' flags for `normalize_flags'.
*/
GRN_PLUGIN_EXPORT grn_tokenizer_query *grn_tokenizer_query_open(grn_ctx *ctx,
int num_args, grn_obj **args,
unsigned int normalize_flags);
/*
grn_tokenizer_query_create() is deprecated. Use grn_tokenizer_query_open()
instead.
*/
grn_tokenizer_query *grn_tokenizer_query_create(grn_ctx *ctx,
int num_args, grn_obj **args);
/*
grn_tokenizer_query_close() finalizes an object of grn_tokenizer_query
and then frees memory allocated for that object.
*/
GRN_PLUGIN_EXPORT void grn_tokenizer_query_close(grn_ctx *ctx, grn_tokenizer_query *query);
/*
grn_tokenizer_query_destroy() is deprecated. Use grn_tokenizer_query_close()
instead.
*/
void grn_tokenizer_query_destroy(grn_ctx *ctx, grn_tokenizer_query *query);
/*
grn_tokenizer_token is needed to return tokens. A grn_tokenizer_token object
stores a token to be returned and it must be maintained until a request for
next token or finalization comes.
*/
typedef struct _grn_tokenizer_token grn_tokenizer_token;
struct _grn_tokenizer_token {
grn_obj str;
grn_obj status;
};
/*
grn_tokenizer_token_init() initializes `token'. Note that an initialized
object must be finalized by grn_tokenizer_token_fin().
*/
GRN_PLUGIN_EXPORT void grn_tokenizer_token_init(grn_ctx *ctx, grn_tokenizer_token *token);
/*
grn_tokenizer_token_fin() finalizes `token' that has been initialized by
grn_tokenizer_token_init().
*/
GRN_PLUGIN_EXPORT void grn_tokenizer_token_fin(grn_ctx *ctx, grn_tokenizer_token *token);
/*
* grn_tokenizer_status is a flag set for tokenizer status codes.
* If a document or query contains no tokens, push an empty string with
* GRN_TOKENIZER_TOKEN_LAST as a token.
*/
typedef unsigned int grn_tokenizer_status;
/* GRN_TOKENIZER_TOKEN_CONTINUE means that the next token is not the last one. */
#define GRN_TOKENIZER_TOKEN_CONTINUE (0)
/* GRN_TOKENIZER_TOKEN_LAST means that the next token is the last one. */
#define GRN_TOKENIZER_TOKEN_LAST (0x01L<<0)
/* GRN_TOKENIZER_TOKEN_OVERLAP means that ... */
#define GRN_TOKENIZER_TOKEN_OVERLAP (0x01L<<1)
/* GRN_TOKENIZER_TOKEN_UNMATURED means that ... */
#define GRN_TOKENIZER_TOKEN_UNMATURED (0x01L<<2)
/* GRN_TOKENIZER_TOKEN_REACH_END means that ... */
#define GRN_TOKENIZER_TOKEN_REACH_END (0x01L<<3)
/* GRN_TOKENIZER_TOKEN_SKIP means that the token is skipped */
#define GRN_TOKENIZER_TOKEN_SKIP (0x01L<<4)
/* GRN_TOKENIZER_TOKEN_SKIP_WITH_POSITION means that the token and postion is skipped */
#define GRN_TOKENIZER_TOKEN_SKIP_WITH_POSITION (0x01L<<5)
/*
* GRN_TOKENIZER_CONTINUE and GRN_TOKENIZER_LAST are deprecated. They
* are just for backward compatibility. Use
* GRN_TOKENIZER_TOKEN_CONTINUE and GRN_TOKENIZER_TOKEN_LAST
* instead.
*/
#define GRN_TOKENIZER_CONTINUE GRN_TOKENIZER_TOKEN_CONTINUE
#define GRN_TOKENIZER_LAST GRN_TOKENIZER_TOKEN_LAST
/*
grn_tokenizer_token_push() pushes the next token into `token'. Note that
grn_tokenizer_token_push() does not make a copy of the given string. This
means that you have to maintain a memory space allocated to the string.
Also note that the grn_tokenizer_token object must be maintained until the
request for the next token or finalization comes. See grn_tokenizer_status in
this header for more details of `status'.
*/
GRN_PLUGIN_EXPORT void grn_tokenizer_token_push(grn_ctx *ctx, grn_tokenizer_token *token,
const char *str_ptr, unsigned int str_length,
grn_tokenizer_status status);
/*
grn_tokenizer_tokenized_delimiter_next() extracts the next token
from the string specified by `str_ptr' and `str_length' and pushes
the next token into `token'. It returns the string after the next
token. The returned string may be `NULL' when all tokens are
extracted.
*/
GRN_PLUGIN_EXPORT const char *grn_tokenizer_tokenized_delimiter_next(grn_ctx *ctx,
grn_tokenizer_token *token,
const char *str_ptr,
unsigned int str_length,
grn_encoding encoding);
/*
grn_tokenizer_register() registers a plugin to the database which is
associated with `ctx'. `plugin_name_ptr' and `plugin_name_length' specify the
plugin name. Alphabetic letters ('A'-'Z' and 'a'-'z'), digits ('0'-'9') and
an underscore ('_') are capable characters. `init', `next' and `fin' specify
the plugin functions. `init' is called for initializing a tokenizer for a
document or query. `next' is called for extracting tokens one by one. `fin'
is called for finalizing a tokenizer. grn_tokenizer_register() returns
GRN_SUCCESS on success, an error code on failure. See "groonga.h" for more
details of grn_proc_func and grn_user_data, that is used as an argument of
grn_proc_func.
*/
GRN_PLUGIN_EXPORT grn_rc grn_tokenizer_register(grn_ctx *ctx, const char *plugin_name_ptr,
unsigned int plugin_name_length,
grn_proc_func *init, grn_proc_func *next,
grn_proc_func *fin);
#ifdef __cplusplus
} /* extern "C" */
#endif /* __cplusplus */
#endif /* GRN_PLUGIN_TOKENIZER_H */