mirror of
https://github.com/MariaDB/server.git
synced 2025-02-20 20:33:15 +01:00

The easiest way to compile and test the server with UBSAN is to run: ./BUILD/compile-pentium64-ubsan and then run mysql-test-run. After this commit, one should be able to run this without any UBSAN warnings. There is still a few compiler warnings that should be fixed at some point, but these do not expose any real bugs. The 'special' cases where we disable, suppress or circumvent UBSAN are: - ref10 source (as here we intentionally do some shifts that UBSAN complains about. - x86 version of optimized int#korr() methods. UBSAN do not like unaligned memory access of integers. Fixed by using byte_order_generic.h when compiling with UBSAN - We use smaller thread stack with ASAN and UBSAN, which forced me to disable a few tests that prints the thread stack size. - Verifying class types does not work for shared libraries. I added suppression in mysql-test-run.pl for this case. - Added '#ifdef WITH_UBSAN' when using integer arithmetic where it is safe to have overflows (two cases, in item_func.cc). Things fixed: - Don't left shift signed values (byte_order_generic.h, mysqltest.c, item_sum.cc and many more) - Don't assign not non existing values to enum variables. - Ensure that bool and enum values are properly initialized in constructors. This was needed as UBSAN checks that these types has correct values when one copies an object. (gcalc_tools.h, ha_partition.cc, item_sum.cc, partition_element.h ...) - Ensure we do not called handler functions on unallocated objects or deleted objects. (events.cc, sql_acl.cc). - Fixed bugs in Item_sp::Item_sp() where we did not call constructor on Query_arena object. - Fixed several cast of objects to an incompatible class! (Item.cc, Item_buff.cc, item_timefunc.cc, opt_subselect.cc, sql_acl.cc, sql_select.cc ...) - Ensure we do not do integer arithmetic that causes over or underflows. This includes also ++ and -- of integers. (Item_func.cc, Item_strfunc.cc, item_timefunc.cc, sql_base.cc ...) - Added JSON_VALUE_UNITIALIZED to json_value_types and ensure that value_type is initialized to this instead of to -1, which is not a valid enum value for json_value_types. - Ensure we do not call memcpy() when second argument could be null. - Fixed that Item_func_str::make_empty_result() creates an empty string instead of a null string (safer as it ensures we do not do arithmetic on null strings). Other things: - Changed struct st_position to an OBJECT and added an initialization function to it to ensure that we do not copy or use uninitialized members. The change to a class was also motived that we used "struct st_position" and POSITION randomly trough the code which was confusing. - Notably big rewrite in sql_acl.cc to avoid using deleted objects. - Changed in sql_partition to use '^' instead of '-'. This is safe as the operator is either 0 or 0x8000000000000000ULL. - Added check for select_nr < INT_MAX in JOIN::build_explain() to avoid bug when get_select() could return NULL. - Reordered elements in POSITION for better alignment. - Changed sql_test.cc::print_plan() to use pointers instead of objects. - Fixed bug in find_set() where could could execute '1 << -1'. - Added variable have_sanitizer, used by mtr. (This variable was before only in 10.5 and up). It can now have one of two values: ASAN or UBSAN. - Moved ~Archive_share() from ha_archive.cc to ha_archive.h and marked it virtual. This was an effort to get UBSAN to work with loaded storage engines. I kept the change as the new place is better. - Added in CONNECT engine COLBLK::SetName(), to get around a wrong cast in tabutil.cpp. - Added HAVE_REPLICATION around usage of rgi_slave, to get embedded server to compile with UBSAN. (Patch from Marko). - Added #ifdef for powerpc64 to avoid a bug in old gcc versions related to integer arithmetic. Changes that should not be needed but had to be done to suppress warnings from UBSAN: - Added static_cast<<uint16_t>> around shift to get rid of a LOT of compiler warnings when using UBSAN. - Had to change some '/' of 2 base integers to shift to get rid of some compile time warnings. Reviewed by: - Json changes: Alexey Botchkov - Charset changes in ctype-uca.c: Alexander Barkov - InnoDB changes & Embedded server: Marko Mäkelä - sql_acl.cc changes: Vicențiu Ciorbaru - build_explain() changes: Sergey Petrunia
3276 lines
88 KiB
C
3276 lines
88 KiB
C
/* -*- c-basic-offset: 2 -*- */
|
|
/* Copyright(C) 2009-2016 Brazil
|
|
|
|
This library is free software; you can redistribute it and/or
|
|
modify it under the terms of the GNU Lesser General Public
|
|
License version 2.1 as published by the Free Software Foundation.
|
|
|
|
This library is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
Lesser General Public License for more details.
|
|
|
|
You should have received a copy of the GNU Lesser General Public
|
|
License along with this library; if not, write to the Free Software
|
|
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
|
|
*/
|
|
#include "grn.h"
|
|
#include <limits.h>
|
|
#include <stdarg.h>
|
|
#include <string.h>
|
|
#include "grn_db.h"
|
|
#include "grn_str.h"
|
|
#include "grn_nfkc.h"
|
|
|
|
#ifndef _ISOC99_SOURCE
|
|
#define _ISOC99_SOURCE
|
|
#endif /* _ISOC99_SOURCE */
|
|
#include <math.h>
|
|
|
|
#if defined(HAVE__GMTIME64_S) && defined(__GNUC__)
|
|
# ifdef _WIN64
|
|
# define gmtime_s(tm, time) _gmtime64_s(tm, time)
|
|
# else /* _WIN64 */
|
|
# define gmtime_s(tm, time) _gmtime32_s(tm, time)
|
|
# endif /* _WIN64 */
|
|
#endif /* defined(HAVE__GMTIME64_S) && defined(__GNUC__) */
|
|
|
|
inline static int
|
|
grn_str_charlen_utf8(grn_ctx *ctx, const unsigned char *str, const unsigned char *end)
|
|
{
|
|
/* MEMO: This function allows non-null-terminated string as str. */
|
|
/* But requires the end of string. */
|
|
if (end <= str || !*str) {
|
|
return 0;
|
|
}
|
|
if (*str & 0x80) {
|
|
int i;
|
|
int len;
|
|
GRN_BIT_SCAN_REV(~(((uint) *str) << 24), len);
|
|
len = 31 - len;
|
|
if ((unsigned int)(len - 2) >= 3) { /* (len == 1 || len >= 5) */
|
|
GRN_LOG(ctx, GRN_LOG_WARNING,
|
|
"grn_str_charlen_utf8(): first byte is invalid");
|
|
return 0;
|
|
}
|
|
if (str + len > end) {
|
|
GRN_LOG(ctx, GRN_LOG_WARNING,
|
|
"grn_str_charlen_utf8(): incomplete character");
|
|
return 0;
|
|
}
|
|
for (i = 1; i < len; ++i) {
|
|
if ((str[i] & 0xc0) != 0x80) {
|
|
GRN_LOG(ctx, GRN_LOG_WARNING,
|
|
"grn_str_charlen_utf8(): <%d>th byte is invalid",
|
|
i + 1);
|
|
return 0;
|
|
}
|
|
}
|
|
return len;
|
|
} else {
|
|
return 1;
|
|
}
|
|
}
|
|
|
|
unsigned int
|
|
grn_str_charlen(grn_ctx *ctx, const char *str, grn_encoding encoding)
|
|
{
|
|
/* MEMO: This function requires null-terminated string as str.*/
|
|
unsigned char *p = (unsigned char *) str;
|
|
if (!*p) { return 0; }
|
|
switch (encoding) {
|
|
case GRN_ENC_EUC_JP :
|
|
if (*p & 0x80) {
|
|
if (*(p + 1)) {
|
|
return 2;
|
|
} else {
|
|
/* This is invalid character */
|
|
GRN_LOG(ctx, GRN_LOG_WARNING, "invalid euc-jp string end on grn_str_charlen");
|
|
return 0;
|
|
}
|
|
}
|
|
return 1;
|
|
case GRN_ENC_UTF8 :
|
|
if (*p & 0x80) {
|
|
int b, w;
|
|
size_t size;
|
|
for (b = 0x40, w = 0; b && (*p & b); b >>= 1, w++);
|
|
if (!w) {
|
|
GRN_LOG(ctx, GRN_LOG_WARNING, "invalid utf8 string(1) on grn_str_charlen");
|
|
return 0;
|
|
}
|
|
for (size = 1; w--; size++) {
|
|
if (!*++p || (*p & 0xc0) != 0x80) {
|
|
GRN_LOG(ctx, GRN_LOG_WARNING, "invalid utf8 string(2) on grn_str_charlen");
|
|
return 0;
|
|
}
|
|
}
|
|
return size;
|
|
} else {
|
|
return 1;
|
|
}
|
|
case GRN_ENC_SJIS :
|
|
if (*p & 0x80) {
|
|
/* we regard 0xa0 as JIS X 0201 KANA. adjusted to other tools. */
|
|
if (0xa0 <= *p && *p <= 0xdf) {
|
|
/* hankaku-kana */
|
|
return 1;
|
|
} else if (!(*(p + 1))) {
|
|
/* This is invalid character */
|
|
GRN_LOG(ctx, GRN_LOG_WARNING, "invalid sjis string end on grn_str_charlen");
|
|
return 0;
|
|
} else {
|
|
return 2;
|
|
}
|
|
} else {
|
|
return 1;
|
|
}
|
|
default :
|
|
return 1;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
int
|
|
grn_charlen_(grn_ctx *ctx, const char *str, const char *end, grn_encoding encoding)
|
|
{
|
|
/* MEMO: This function allows non-null-terminated string as str. */
|
|
/* But requires the end of string. */
|
|
unsigned char *p = (unsigned char *) str;
|
|
if (p >= (unsigned char *)end) { return 0; }
|
|
switch (encoding) {
|
|
case GRN_ENC_EUC_JP :
|
|
if (*p & 0x80) {
|
|
if ((p + 1) < (unsigned char *)end) {
|
|
return 2;
|
|
} else {
|
|
/* This is invalid character */
|
|
GRN_LOG(ctx, GRN_LOG_WARNING, "invalid euc-jp string end on grn_charlen");
|
|
return 0;
|
|
}
|
|
}
|
|
return 1;
|
|
case GRN_ENC_UTF8 :
|
|
return grn_str_charlen_utf8(ctx, p, (unsigned char *)end);
|
|
case GRN_ENC_SJIS :
|
|
if (*p & 0x80) {
|
|
/* we regard 0xa0 as JIS X 0201 KANA. adjusted to other tools. */
|
|
if (0xa0 <= *p && *p <= 0xdf) {
|
|
/* hankaku-kana */
|
|
return 1;
|
|
} else if (++p >= (unsigned char *)end) {
|
|
/* This is invalid character */
|
|
GRN_LOG(ctx, GRN_LOG_WARNING, "invalid sjis string end on grn_charlen");
|
|
return 0;
|
|
} else {
|
|
return 2;
|
|
}
|
|
} else {
|
|
return 1;
|
|
}
|
|
default :
|
|
return 1;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
int
|
|
grn_charlen(grn_ctx *ctx, const char *str, const char *end)
|
|
{
|
|
return grn_charlen_(ctx, str, end, ctx->encoding);
|
|
}
|
|
|
|
static unsigned char symbol[] = {
|
|
',', '.', 0, ':', ';', '?', '!', 0, 0, 0, '`', 0, '^', '~', '_', 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, '-', '-', '/', '\\', 0, 0, '|', 0, 0, 0, '\'', 0,
|
|
'"', '(', ')', 0, 0, '[', ']', '{', '}', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
'+', '-', 0, 0, 0, '=', 0, '<', '>', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
'$', 0, 0, '%', '#', '&', '*', '@', 0, 0, 0, 0, 0, 0, 0, 0
|
|
};
|
|
|
|
inline static grn_rc
|
|
normalize_euc(grn_ctx *ctx, grn_str *nstr)
|
|
{
|
|
static uint16_t hankana[] = {
|
|
0xa1a1, 0xa1a3, 0xa1d6, 0xa1d7, 0xa1a2, 0xa1a6, 0xa5f2, 0xa5a1, 0xa5a3,
|
|
0xa5a5, 0xa5a7, 0xa5a9, 0xa5e3, 0xa5e5, 0xa5e7, 0xa5c3, 0xa1bc, 0xa5a2,
|
|
0xa5a4, 0xa5a6, 0xa5a8, 0xa5aa, 0xa5ab, 0xa5ad, 0xa5af, 0xa5b1, 0xa5b3,
|
|
0xa5b5, 0xa5b7, 0xa5b9, 0xa5bb, 0xa5bd, 0xa5bf, 0xa5c1, 0xa5c4, 0xa5c6,
|
|
0xa5c8, 0xa5ca, 0xa5cb, 0xa5cc, 0xa5cd, 0xa5ce, 0xa5cf, 0xa5d2, 0xa5d5,
|
|
0xa5d8, 0xa5db, 0xa5de, 0xa5df, 0xa5e0, 0xa5e1, 0xa5e2, 0xa5e4, 0xa5e6,
|
|
0xa5e8, 0xa5e9, 0xa5ea, 0xa5eb, 0xa5ec, 0xa5ed, 0xa5ef, 0xa5f3, 0xa1ab,
|
|
0xa1eb
|
|
};
|
|
static unsigned char dakuten[] = {
|
|
0xf4, 0, 0, 0, 0, 0xac, 0, 0xae, 0, 0xb0, 0, 0xb2, 0, 0xb4, 0, 0xb6, 0,
|
|
0xb8, 0, 0xba, 0, 0xbc, 0, 0xbe, 0, 0xc0, 0, 0xc2, 0, 0, 0xc5, 0, 0xc7,
|
|
0, 0xc9, 0, 0, 0, 0, 0, 0, 0xd0, 0, 0, 0xd3, 0, 0, 0xd6, 0, 0, 0xd9, 0,
|
|
0, 0xdc
|
|
};
|
|
static unsigned char handaku[] = {
|
|
0xd1, 0, 0, 0xd4, 0, 0, 0xd7, 0, 0, 0xda, 0, 0, 0xdd
|
|
};
|
|
int16_t *ch;
|
|
const unsigned char *s, *s_, *e;
|
|
unsigned char *d, *d0, *d_, b;
|
|
uint_least8_t *cp, *ctypes, ctype;
|
|
size_t size = nstr->orig_blen, length = 0;
|
|
int removeblankp = nstr->flags & GRN_STR_REMOVEBLANK;
|
|
if (!(nstr->norm = GRN_MALLOC(size * 2 + 1))) {
|
|
return GRN_NO_MEMORY_AVAILABLE;
|
|
}
|
|
d0 = (unsigned char *) nstr->norm;
|
|
if (nstr->flags & GRN_STR_WITH_CHECKS) {
|
|
if (!(nstr->checks = GRN_MALLOC(size * 2 * sizeof(int16_t) + 1))) {
|
|
GRN_FREE(nstr->norm);
|
|
nstr->norm = NULL;
|
|
return GRN_NO_MEMORY_AVAILABLE;
|
|
}
|
|
}
|
|
ch = nstr->checks;
|
|
if (nstr->flags & GRN_STR_WITH_CTYPES) {
|
|
if (!(nstr->ctypes = GRN_MALLOC(size + 1))) {
|
|
GRN_FREE(nstr->checks);
|
|
GRN_FREE(nstr->norm);
|
|
nstr->checks = NULL;
|
|
nstr->norm = NULL;
|
|
return GRN_NO_MEMORY_AVAILABLE;
|
|
}
|
|
}
|
|
cp = ctypes = nstr->ctypes;
|
|
e = (unsigned char *)nstr->orig + size;
|
|
for (s = s_ = (unsigned char *) nstr->orig, d = d_ = d0; s < e; s++) {
|
|
if ((*s & 0x80)) {
|
|
if (((s + 1) < e) && (*(s + 1) & 0x80)) {
|
|
unsigned char c1 = *s++, c2 = *s, c3 = 0;
|
|
switch (c1 >> 4) {
|
|
case 0x08 :
|
|
if (c1 == 0x8e && 0xa0 <= c2 && c2 <= 0xdf) {
|
|
uint16_t c = hankana[c2 - 0xa0];
|
|
switch (c) {
|
|
case 0xa1ab :
|
|
if (d > d0 + 1 && d[-2] == 0xa5
|
|
&& 0xa6 <= d[-1] && d[-1] <= 0xdb && (b = dakuten[d[-1] - 0xa6])) {
|
|
*(d - 1) = b;
|
|
if (ch) { ch[-1] += 2; s_ += 2; }
|
|
continue;
|
|
} else {
|
|
*d++ = c >> 8; *d = c & 0xff;
|
|
}
|
|
break;
|
|
case 0xa1eb :
|
|
if (d > d0 + 1 && d[-2] == 0xa5
|
|
&& 0xcf <= d[-1] && d[-1] <= 0xdb && (b = handaku[d[-1] - 0xcf])) {
|
|
*(d - 1) = b;
|
|
if (ch) { ch[-1] += 2; s_ += 2; }
|
|
continue;
|
|
} else {
|
|
*d++ = c >> 8; *d = c & 0xff;
|
|
}
|
|
break;
|
|
default :
|
|
*d++ = c >> 8; *d = c & 0xff;
|
|
break;
|
|
}
|
|
ctype = GRN_CHAR_KATAKANA;
|
|
} else {
|
|
*d++ = c1; *d = c2;
|
|
ctype = GRN_CHAR_OTHERS;
|
|
}
|
|
break;
|
|
case 0x09 :
|
|
*d++ = c1; *d = c2;
|
|
ctype = GRN_CHAR_OTHERS;
|
|
break;
|
|
case 0x0a :
|
|
switch (c1 & 0x0f) {
|
|
case 1 :
|
|
switch (c2) {
|
|
case 0xbc :
|
|
*d++ = c1; *d = c2;
|
|
ctype = GRN_CHAR_KATAKANA;
|
|
break;
|
|
case 0xb9 :
|
|
*d++ = c1; *d = c2;
|
|
ctype = GRN_CHAR_KANJI;
|
|
break;
|
|
case 0xa1 :
|
|
if (removeblankp) {
|
|
if (cp > ctypes) { *(cp - 1) |= GRN_STR_BLANK; }
|
|
continue;
|
|
} else {
|
|
*d = ' ';
|
|
ctype = GRN_STR_BLANK|GRN_CHAR_SYMBOL;
|
|
}
|
|
break;
|
|
default :
|
|
if (c2 >= 0xa4 && (c3 = symbol[c2 - 0xa4])) {
|
|
*d = c3;
|
|
ctype = GRN_CHAR_SYMBOL;
|
|
} else {
|
|
*d++ = c1; *d = c2;
|
|
ctype = GRN_CHAR_OTHERS;
|
|
}
|
|
break;
|
|
}
|
|
break;
|
|
case 2 :
|
|
*d++ = c1; *d = c2;
|
|
ctype = GRN_CHAR_SYMBOL;
|
|
break;
|
|
case 3 :
|
|
c3 = c2 - 0x80;
|
|
if ('a' <= c3 && c3 <= 'z') {
|
|
ctype = GRN_CHAR_ALPHA;
|
|
*d = c3;
|
|
} else if ('A' <= c3 && c3 <= 'Z') {
|
|
ctype = GRN_CHAR_ALPHA;
|
|
*d = c3 + 0x20;
|
|
} else if ('0' <= c3 && c3 <= '9') {
|
|
ctype = GRN_CHAR_DIGIT;
|
|
*d = c3;
|
|
} else {
|
|
ctype = GRN_CHAR_OTHERS;
|
|
*d++ = c1; *d = c2;
|
|
}
|
|
break;
|
|
case 4 :
|
|
*d++ = c1; *d = c2;
|
|
ctype = GRN_CHAR_HIRAGANA;
|
|
break;
|
|
case 5 :
|
|
*d++ = c1; *d = c2;
|
|
ctype = GRN_CHAR_KATAKANA;
|
|
break;
|
|
case 6 :
|
|
case 7 :
|
|
case 8 :
|
|
*d++ = c1; *d = c2;
|
|
ctype = GRN_CHAR_SYMBOL;
|
|
break;
|
|
default :
|
|
*d++ = c1; *d = c2;
|
|
ctype = GRN_CHAR_OTHERS;
|
|
break;
|
|
}
|
|
break;
|
|
default :
|
|
*d++ = c1; *d = c2;
|
|
ctype = GRN_CHAR_KANJI;
|
|
break;
|
|
}
|
|
} else {
|
|
/* skip invalid character */
|
|
continue;
|
|
}
|
|
} else {
|
|
unsigned char c = *s;
|
|
switch (c >> 4) {
|
|
case 0 :
|
|
case 1 :
|
|
/* skip unprintable ascii */
|
|
if (cp > ctypes) { *(cp - 1) |= GRN_STR_BLANK; }
|
|
continue;
|
|
case 2 :
|
|
if (c == 0x20) {
|
|
if (removeblankp) {
|
|
if (cp > ctypes) { *(cp - 1) |= GRN_STR_BLANK; }
|
|
continue;
|
|
} else {
|
|
*d = ' ';
|
|
ctype = GRN_STR_BLANK|GRN_CHAR_SYMBOL;
|
|
}
|
|
} else {
|
|
*d = c;
|
|
ctype = GRN_CHAR_SYMBOL;
|
|
}
|
|
break;
|
|
case 3 :
|
|
*d = c;
|
|
ctype = (c <= 0x39) ? GRN_CHAR_DIGIT : GRN_CHAR_SYMBOL;
|
|
break;
|
|
case 4 :
|
|
*d = ('A' <= c) ? c + 0x20 : c;
|
|
ctype = (c == 0x40) ? GRN_CHAR_SYMBOL : GRN_CHAR_ALPHA;
|
|
break;
|
|
case 5 :
|
|
*d = (c <= 'Z') ? c + 0x20 : c;
|
|
ctype = (c <= 0x5a) ? GRN_CHAR_ALPHA : GRN_CHAR_SYMBOL;
|
|
break;
|
|
case 6 :
|
|
*d = c;
|
|
ctype = (c == 0x60) ? GRN_CHAR_SYMBOL : GRN_CHAR_ALPHA;
|
|
break;
|
|
case 7 :
|
|
*d = c;
|
|
ctype = (c <= 0x7a) ? GRN_CHAR_ALPHA : (c == 0x7f ? GRN_CHAR_OTHERS : GRN_CHAR_SYMBOL);
|
|
break;
|
|
default :
|
|
*d = c;
|
|
ctype = GRN_CHAR_OTHERS;
|
|
break;
|
|
}
|
|
}
|
|
d++;
|
|
length++;
|
|
if (cp) { *cp++ = ctype; }
|
|
if (ch) {
|
|
*ch++ = (int16_t)(s + 1 - s_);
|
|
s_ = s + 1;
|
|
while (++d_ < d) { *ch++ = 0; }
|
|
}
|
|
}
|
|
if (cp) { *cp = GRN_CHAR_NULL; }
|
|
*d = '\0';
|
|
nstr->length = length;
|
|
nstr->norm_blen = (size_t)(d - (unsigned char *)nstr->norm);
|
|
return GRN_SUCCESS;
|
|
}
|
|
|
|
#ifdef GRN_WITH_NFKC
|
|
inline static grn_rc
|
|
normalize_utf8(grn_ctx *ctx, grn_str *nstr)
|
|
{
|
|
int16_t *ch;
|
|
const unsigned char *s, *s_, *s__ = NULL, *p, *p2, *pe, *e;
|
|
unsigned char *d, *d_, *de;
|
|
uint_least8_t *cp;
|
|
size_t length = 0, ls, lp, size = nstr->orig_blen, ds = size * 3;
|
|
int removeblankp = nstr->flags & GRN_STR_REMOVEBLANK;
|
|
if (!(nstr->norm = GRN_MALLOC(ds + 1))) {
|
|
return GRN_NO_MEMORY_AVAILABLE;
|
|
}
|
|
if (nstr->flags & GRN_STR_WITH_CHECKS) {
|
|
if (!(nstr->checks = GRN_MALLOC(ds * sizeof(int16_t) + 1))) {
|
|
GRN_FREE(nstr->norm); nstr->norm = NULL;
|
|
return GRN_NO_MEMORY_AVAILABLE;
|
|
}
|
|
}
|
|
ch = nstr->checks;
|
|
if (nstr->flags & GRN_STR_WITH_CTYPES) {
|
|
if (!(nstr->ctypes = GRN_MALLOC(ds + 1))) {
|
|
if (nstr->checks) { GRN_FREE(nstr->checks); nstr->checks = NULL; }
|
|
GRN_FREE(nstr->norm); nstr->norm = NULL;
|
|
return GRN_NO_MEMORY_AVAILABLE;
|
|
}
|
|
}
|
|
cp = nstr->ctypes;
|
|
d = (unsigned char *)nstr->norm;
|
|
de = d + ds;
|
|
d_ = NULL;
|
|
e = (unsigned char *)nstr->orig + size;
|
|
for (s = s_ = (unsigned char *)nstr->orig; ; s += ls) {
|
|
if (!(ls = grn_str_charlen_utf8(ctx, s, e))) {
|
|
break;
|
|
}
|
|
if ((p = (unsigned char *)grn_nfkc_decompose(s))) {
|
|
pe = p + strlen((char *)p);
|
|
} else {
|
|
p = s;
|
|
pe = p + ls;
|
|
}
|
|
if (d_ && (p2 = (unsigned char *)grn_nfkc_compose(d_, p))) {
|
|
p = p2;
|
|
pe = p + strlen((char *)p);
|
|
if (cp) { cp--; }
|
|
if (ch) {
|
|
ch -= (d - d_);
|
|
s_ = s__;
|
|
}
|
|
d = d_;
|
|
length--;
|
|
}
|
|
for (; ; p += lp) {
|
|
if (!(lp = grn_str_charlen_utf8(ctx, p, pe))) {
|
|
break;
|
|
}
|
|
if ((*p == ' ' && removeblankp) || *p < 0x20 /* skip unprintable ascii */ ) {
|
|
if (cp > nstr->ctypes) { *(cp - 1) |= GRN_STR_BLANK; }
|
|
} else {
|
|
if (de <= d + lp) {
|
|
unsigned char *norm;
|
|
ds += (ds >> 1) + lp;
|
|
if (!(norm = GRN_REALLOC(nstr->norm, ds + 1))) {
|
|
if (nstr->ctypes) { GRN_FREE(nstr->ctypes); nstr->ctypes = NULL; }
|
|
if (nstr->checks) { GRN_FREE(nstr->checks); nstr->checks = NULL; }
|
|
GRN_FREE(nstr->norm); nstr->norm = NULL;
|
|
return GRN_NO_MEMORY_AVAILABLE;
|
|
}
|
|
de = norm + ds;
|
|
d = norm + (d - (unsigned char *)nstr->norm);
|
|
nstr->norm = (char *)norm;
|
|
if (ch) {
|
|
int16_t *checks;
|
|
if (!(checks = GRN_REALLOC(nstr->checks, ds * sizeof(int16_t)+ 1))) {
|
|
if (nstr->ctypes) { GRN_FREE(nstr->ctypes); nstr->ctypes = NULL; }
|
|
GRN_FREE(nstr->checks); nstr->checks = NULL;
|
|
GRN_FREE(nstr->norm); nstr->norm = NULL;
|
|
return GRN_NO_MEMORY_AVAILABLE;
|
|
}
|
|
ch = checks + (ch - nstr->checks);
|
|
nstr->checks = checks;
|
|
}
|
|
if (cp) {
|
|
uint_least8_t *ctypes;
|
|
if (!(ctypes = GRN_REALLOC(nstr->ctypes, ds + 1))) {
|
|
GRN_FREE(nstr->ctypes); nstr->ctypes = NULL;
|
|
if (nstr->checks) { GRN_FREE(nstr->checks); nstr->checks = NULL; }
|
|
GRN_FREE(nstr->norm); nstr->norm = NULL;
|
|
return GRN_NO_MEMORY_AVAILABLE;
|
|
}
|
|
cp = ctypes + (cp - nstr->ctypes);
|
|
nstr->ctypes = ctypes;
|
|
}
|
|
}
|
|
grn_memcpy(d, p, lp);
|
|
d_ = d;
|
|
d += lp;
|
|
length++;
|
|
if (cp) { *cp++ = grn_nfkc_char_type(p); }
|
|
if (ch) {
|
|
size_t i;
|
|
if (s_ == s + ls) {
|
|
*ch++ = -1;
|
|
} else {
|
|
*ch++ = (int16_t)(s + ls - s_);
|
|
s__ = s_;
|
|
s_ = s + ls;
|
|
}
|
|
for (i = lp; i > 1; i--) { *ch++ = 0; }
|
|
}
|
|
}
|
|
}
|
|
}
|
|
if (cp) { *cp = GRN_CHAR_NULL; }
|
|
*d = '\0';
|
|
nstr->length = length;
|
|
nstr->norm_blen = (size_t)(d - (unsigned char *)nstr->norm);
|
|
return GRN_SUCCESS;
|
|
}
|
|
#endif /* GRN_WITH_NFKC */
|
|
|
|
inline static grn_rc
|
|
normalize_sjis(grn_ctx *ctx, grn_str *nstr)
|
|
{
|
|
static uint16_t hankana[] = {
|
|
0x8140, 0x8142, 0x8175, 0x8176, 0x8141, 0x8145, 0x8392, 0x8340, 0x8342,
|
|
0x8344, 0x8346, 0x8348, 0x8383, 0x8385, 0x8387, 0x8362, 0x815b, 0x8341,
|
|
0x8343, 0x8345, 0x8347, 0x8349, 0x834a, 0x834c, 0x834e, 0x8350, 0x8352,
|
|
0x8354, 0x8356, 0x8358, 0x835a, 0x835c, 0x835e, 0x8360, 0x8363, 0x8365,
|
|
0x8367, 0x8369, 0x836a, 0x836b, 0x836c, 0x836d, 0x836e, 0x8371, 0x8374,
|
|
0x8377, 0x837a, 0x837d, 0x837e, 0x8380, 0x8381, 0x8382, 0x8384, 0x8386,
|
|
0x8388, 0x8389, 0x838a, 0x838b, 0x838c, 0x838d, 0x838f, 0x8393, 0x814a,
|
|
0x814b
|
|
};
|
|
static unsigned char dakuten[] = {
|
|
0x94, 0, 0, 0, 0, 0x4b, 0, 0x4d, 0, 0x4f, 0, 0x51, 0, 0x53, 0, 0x55, 0,
|
|
0x57, 0, 0x59, 0, 0x5b, 0, 0x5d, 0, 0x5f, 0, 0x61, 0, 0, 0x64, 0, 0x66,
|
|
0, 0x68, 0, 0, 0, 0, 0, 0, 0x6f, 0, 0, 0x72, 0, 0, 0x75, 0, 0, 0x78, 0,
|
|
0, 0x7b
|
|
};
|
|
static unsigned char handaku[] = {
|
|
0x70, 0, 0, 0x73, 0, 0, 0x76, 0, 0, 0x79, 0, 0, 0x7c
|
|
};
|
|
int16_t *ch;
|
|
const unsigned char *s, *s_;
|
|
unsigned char *d, *d0, *d_, b, *e;
|
|
uint_least8_t *cp, *ctypes, ctype;
|
|
size_t size = nstr->orig_blen, length = 0;
|
|
int removeblankp = nstr->flags & GRN_STR_REMOVEBLANK;
|
|
if (!(nstr->norm = GRN_MALLOC(size * 2 + 1))) {
|
|
return GRN_NO_MEMORY_AVAILABLE;
|
|
}
|
|
d0 = (unsigned char *) nstr->norm;
|
|
if (nstr->flags & GRN_STR_WITH_CHECKS) {
|
|
if (!(nstr->checks = GRN_MALLOC(size * 2 * sizeof(int16_t) + 1))) {
|
|
GRN_FREE(nstr->norm);
|
|
nstr->norm = NULL;
|
|
return GRN_NO_MEMORY_AVAILABLE;
|
|
}
|
|
}
|
|
ch = nstr->checks;
|
|
if (nstr->flags & GRN_STR_WITH_CTYPES) {
|
|
if (!(nstr->ctypes = GRN_MALLOC(size + 1))) {
|
|
GRN_FREE(nstr->checks);
|
|
GRN_FREE(nstr->norm);
|
|
nstr->checks = NULL;
|
|
nstr->norm = NULL;
|
|
return GRN_NO_MEMORY_AVAILABLE;
|
|
}
|
|
}
|
|
cp = ctypes = nstr->ctypes;
|
|
e = (unsigned char *)nstr->orig + size;
|
|
for (s = s_ = (unsigned char *) nstr->orig, d = d_ = d0; s < e; s++) {
|
|
if ((*s & 0x80)) {
|
|
if (0xa0 <= *s && *s <= 0xdf) {
|
|
uint16_t c = hankana[*s - 0xa0];
|
|
switch (c) {
|
|
case 0x814a :
|
|
if (d > d0 + 1 && d[-2] == 0x83
|
|
&& 0x45 <= d[-1] && d[-1] <= 0x7a && (b = dakuten[d[-1] - 0x45])) {
|
|
*(d - 1) = b;
|
|
if (ch) { ch[-1]++; s_++; }
|
|
continue;
|
|
} else {
|
|
*d++ = c >> 8; *d = c & 0xff;
|
|
}
|
|
break;
|
|
case 0x814b :
|
|
if (d > d0 + 1 && d[-2] == 0x83
|
|
&& 0x6e <= d[-1] && d[-1] <= 0x7a && (b = handaku[d[-1] - 0x6e])) {
|
|
*(d - 1) = b;
|
|
if (ch) { ch[-1]++; s_++; }
|
|
continue;
|
|
} else {
|
|
*d++ = c >> 8; *d = c & 0xff;
|
|
}
|
|
break;
|
|
default :
|
|
*d++ = c >> 8; *d = c & 0xff;
|
|
break;
|
|
}
|
|
ctype = GRN_CHAR_KATAKANA;
|
|
} else {
|
|
if ((s + 1) < e && 0x40 <= *(s + 1) && *(s + 1) <= 0xfc) {
|
|
unsigned char c1 = *s++, c2 = *s, c3 = 0;
|
|
if (0x81 <= c1 && c1 <= 0x87) {
|
|
switch (c1 & 0x0f) {
|
|
case 1 :
|
|
switch (c2) {
|
|
case 0x5b :
|
|
*d++ = c1; *d = c2;
|
|
ctype = GRN_CHAR_KATAKANA;
|
|
break;
|
|
case 0x58 :
|
|
*d++ = c1; *d = c2;
|
|
ctype = GRN_CHAR_KANJI;
|
|
break;
|
|
case 0x40 :
|
|
if (removeblankp) {
|
|
if (cp > ctypes) { *(cp - 1) |= GRN_STR_BLANK; }
|
|
continue;
|
|
} else {
|
|
*d = ' ';
|
|
ctype = GRN_STR_BLANK|GRN_CHAR_SYMBOL;
|
|
}
|
|
break;
|
|
default :
|
|
if (0x43 <= c2 && c2 <= 0x7e && (c3 = symbol[c2 - 0x43])) {
|
|
*d = c3;
|
|
ctype = GRN_CHAR_SYMBOL;
|
|
} else if (0x7f <= c2 && c2 <= 0x97 && (c3 = symbol[c2 - 0x44])) {
|
|
*d = c3;
|
|
ctype = GRN_CHAR_SYMBOL;
|
|
} else {
|
|
*d++ = c1; *d = c2;
|
|
ctype = GRN_CHAR_OTHERS;
|
|
}
|
|
break;
|
|
}
|
|
break;
|
|
case 2 :
|
|
c3 = c2 - 0x1f;
|
|
if (0x4f <= c2 && c2 <= 0x58) {
|
|
ctype = GRN_CHAR_DIGIT;
|
|
*d = c2 - 0x1f;
|
|
} else if (0x60 <= c2 && c2 <= 0x79) {
|
|
ctype = GRN_CHAR_ALPHA;
|
|
*d = c2 + 0x01;
|
|
} else if (0x81 <= c2 && c2 <= 0x9a) {
|
|
ctype = GRN_CHAR_ALPHA;
|
|
*d = c2 - 0x20;
|
|
} else if (0x9f <= c2 && c2 <= 0xf1) {
|
|
*d++ = c1; *d = c2;
|
|
ctype = GRN_CHAR_HIRAGANA;
|
|
} else {
|
|
*d++ = c1; *d = c2;
|
|
ctype = GRN_CHAR_OTHERS;
|
|
}
|
|
break;
|
|
case 3 :
|
|
if (0x40 <= c2 && c2 <= 0x96) {
|
|
*d++ = c1; *d = c2;
|
|
ctype = GRN_CHAR_KATAKANA;
|
|
} else {
|
|
*d++ = c1; *d = c2;
|
|
ctype = GRN_CHAR_SYMBOL;
|
|
}
|
|
break;
|
|
case 4 :
|
|
case 7 :
|
|
*d++ = c1; *d = c2;
|
|
ctype = GRN_CHAR_SYMBOL;
|
|
break;
|
|
default :
|
|
*d++ = c1; *d = c2;
|
|
ctype = GRN_CHAR_OTHERS;
|
|
break;
|
|
}
|
|
} else {
|
|
*d++ = c1; *d = c2;
|
|
ctype = GRN_CHAR_KANJI;
|
|
}
|
|
} else {
|
|
/* skip invalid character */
|
|
continue;
|
|
}
|
|
}
|
|
} else {
|
|
unsigned char c = *s;
|
|
switch (c >> 4) {
|
|
case 0 :
|
|
case 1 :
|
|
/* skip unprintable ascii */
|
|
if (cp > ctypes) { *(cp - 1) |= GRN_STR_BLANK; }
|
|
continue;
|
|
case 2 :
|
|
if (c == 0x20) {
|
|
if (removeblankp) {
|
|
if (cp > ctypes) { *(cp - 1) |= GRN_STR_BLANK; }
|
|
continue;
|
|
} else {
|
|
*d = ' ';
|
|
ctype = GRN_STR_BLANK|GRN_CHAR_SYMBOL;
|
|
}
|
|
} else {
|
|
*d = c;
|
|
ctype = GRN_CHAR_SYMBOL;
|
|
}
|
|
break;
|
|
case 3 :
|
|
*d = c;
|
|
ctype = (c <= 0x39) ? GRN_CHAR_DIGIT : GRN_CHAR_SYMBOL;
|
|
break;
|
|
case 4 :
|
|
*d = ('A' <= c) ? c + 0x20 : c;
|
|
ctype = (c == 0x40) ? GRN_CHAR_SYMBOL : GRN_CHAR_ALPHA;
|
|
break;
|
|
case 5 :
|
|
*d = (c <= 'Z') ? c + 0x20 : c;
|
|
ctype = (c <= 0x5a) ? GRN_CHAR_ALPHA : GRN_CHAR_SYMBOL;
|
|
break;
|
|
case 6 :
|
|
*d = c;
|
|
ctype = (c == 0x60) ? GRN_CHAR_SYMBOL : GRN_CHAR_ALPHA;
|
|
break;
|
|
case 7 :
|
|
*d = c;
|
|
ctype = (c <= 0x7a) ? GRN_CHAR_ALPHA : (c == 0x7f ? GRN_CHAR_OTHERS : GRN_CHAR_SYMBOL);
|
|
break;
|
|
default :
|
|
*d = c;
|
|
ctype = GRN_CHAR_OTHERS;
|
|
break;
|
|
}
|
|
}
|
|
d++;
|
|
length++;
|
|
if (cp) { *cp++ = ctype; }
|
|
if (ch) {
|
|
*ch++ = (int16_t)(s + 1 - s_);
|
|
s_ = s + 1;
|
|
while (++d_ < d) { *ch++ = 0; }
|
|
}
|
|
}
|
|
if (cp) { *cp = GRN_CHAR_NULL; }
|
|
*d = '\0';
|
|
nstr->length = length;
|
|
nstr->norm_blen = (size_t)(d - (unsigned char *)nstr->norm);
|
|
return GRN_SUCCESS;
|
|
}
|
|
|
|
inline static grn_rc
|
|
normalize_none(grn_ctx *ctx, grn_str *nstr)
|
|
{
|
|
int16_t *ch;
|
|
const unsigned char *s, *s_, *e;
|
|
unsigned char *d, *d0, *d_;
|
|
uint_least8_t *cp, *ctypes, ctype;
|
|
size_t size = nstr->orig_blen, length = 0;
|
|
int removeblankp = nstr->flags & GRN_STR_REMOVEBLANK;
|
|
if (!(nstr->norm = GRN_MALLOC(size + 1))) {
|
|
return GRN_NO_MEMORY_AVAILABLE;
|
|
}
|
|
d0 = (unsigned char *) nstr->norm;
|
|
if (nstr->flags & GRN_STR_WITH_CHECKS) {
|
|
if (!(nstr->checks = GRN_MALLOC(size * sizeof(int16_t) + 1))) {
|
|
GRN_FREE(nstr->norm);
|
|
nstr->norm = NULL;
|
|
return GRN_NO_MEMORY_AVAILABLE;
|
|
}
|
|
}
|
|
ch = nstr->checks;
|
|
if (nstr->flags & GRN_STR_WITH_CTYPES) {
|
|
if (!(nstr->ctypes = GRN_MALLOC(size + 1))) {
|
|
GRN_FREE(nstr->checks);
|
|
GRN_FREE(nstr->norm);
|
|
nstr->checks = NULL;
|
|
nstr->norm = NULL;
|
|
return GRN_NO_MEMORY_AVAILABLE;
|
|
}
|
|
}
|
|
cp = ctypes = nstr->ctypes;
|
|
e = (unsigned char *)nstr->orig + size;
|
|
for (s = s_ = (unsigned char *) nstr->orig, d = d_ = d0; s < e; s++) {
|
|
unsigned char c = *s;
|
|
switch (c >> 4) {
|
|
case 0 :
|
|
case 1 :
|
|
/* skip unprintable ascii */
|
|
if (cp > ctypes) { *(cp - 1) |= GRN_STR_BLANK; }
|
|
continue;
|
|
case 2 :
|
|
if (c == 0x20) {
|
|
if (removeblankp) {
|
|
if (cp > ctypes) { *(cp - 1) |= GRN_STR_BLANK; }
|
|
continue;
|
|
} else {
|
|
*d = ' ';
|
|
ctype = GRN_STR_BLANK|GRN_CHAR_SYMBOL;
|
|
}
|
|
} else {
|
|
*d = c;
|
|
ctype = GRN_CHAR_SYMBOL;
|
|
}
|
|
break;
|
|
case 3 :
|
|
*d = c;
|
|
ctype = (c <= 0x39) ? GRN_CHAR_DIGIT : GRN_CHAR_SYMBOL;
|
|
break;
|
|
case 4 :
|
|
*d = ('A' <= c) ? c + 0x20 : c;
|
|
ctype = (c == 0x40) ? GRN_CHAR_SYMBOL : GRN_CHAR_ALPHA;
|
|
break;
|
|
case 5 :
|
|
*d = (c <= 'Z') ? c + 0x20 : c;
|
|
ctype = (c <= 0x5a) ? GRN_CHAR_ALPHA : GRN_CHAR_SYMBOL;
|
|
break;
|
|
case 6 :
|
|
*d = c;
|
|
ctype = (c == 0x60) ? GRN_CHAR_SYMBOL : GRN_CHAR_ALPHA;
|
|
break;
|
|
case 7 :
|
|
*d = c;
|
|
ctype = (c <= 0x7a) ? GRN_CHAR_ALPHA : (c == 0x7f ? GRN_CHAR_OTHERS : GRN_CHAR_SYMBOL);
|
|
break;
|
|
default :
|
|
*d = c;
|
|
ctype = GRN_CHAR_OTHERS;
|
|
break;
|
|
}
|
|
d++;
|
|
length++;
|
|
if (cp) { *cp++ = ctype; }
|
|
if (ch) {
|
|
*ch++ = (int16_t)(s + 1 - s_);
|
|
s_ = s + 1;
|
|
while (++d_ < d) { *ch++ = 0; }
|
|
}
|
|
}
|
|
if (cp) { *cp = GRN_CHAR_NULL; }
|
|
*d = '\0';
|
|
nstr->length = length;
|
|
nstr->norm_blen = (size_t)(d - (unsigned char *)nstr->norm);
|
|
return GRN_SUCCESS;
|
|
}
|
|
|
|
/* use cp1252 as latin1 */
|
|
inline static grn_rc
|
|
normalize_latin1(grn_ctx *ctx, grn_str *nstr)
|
|
{
|
|
int16_t *ch;
|
|
const unsigned char *s, *s_, *e;
|
|
unsigned char *d, *d0, *d_;
|
|
uint_least8_t *cp, *ctypes, ctype;
|
|
size_t size = nstr->orig_blen, length = 0;
|
|
int removeblankp = nstr->flags & GRN_STR_REMOVEBLANK;
|
|
if (!(nstr->norm = GRN_MALLOC(size + 1))) {
|
|
return GRN_NO_MEMORY_AVAILABLE;
|
|
}
|
|
d0 = (unsigned char *) nstr->norm;
|
|
if (nstr->flags & GRN_STR_WITH_CHECKS) {
|
|
if (!(nstr->checks = GRN_MALLOC(size * sizeof(int16_t) + 1))) {
|
|
GRN_FREE(nstr->norm);
|
|
nstr->norm = NULL;
|
|
return GRN_NO_MEMORY_AVAILABLE;
|
|
}
|
|
}
|
|
ch = nstr->checks;
|
|
if (nstr->flags & GRN_STR_WITH_CTYPES) {
|
|
if (!(nstr->ctypes = GRN_MALLOC(size + 1))) {
|
|
GRN_FREE(nstr->checks);
|
|
GRN_FREE(nstr->norm);
|
|
nstr->checks = NULL;
|
|
nstr->norm = NULL;
|
|
return GRN_NO_MEMORY_AVAILABLE;
|
|
}
|
|
}
|
|
cp = ctypes = nstr->ctypes;
|
|
e = (unsigned char *)nstr->orig + size;
|
|
for (s = s_ = (unsigned char *) nstr->orig, d = d_ = d0; s < e; s++) {
|
|
unsigned char c = *s;
|
|
switch (c >> 4) {
|
|
case 0 :
|
|
case 1 :
|
|
/* skip unprintable ascii */
|
|
if (cp > ctypes) { *(cp - 1) |= GRN_STR_BLANK; }
|
|
continue;
|
|
case 2 :
|
|
if (c == 0x20) {
|
|
if (removeblankp) {
|
|
if (cp > ctypes) { *(cp - 1) |= GRN_STR_BLANK; }
|
|
continue;
|
|
} else {
|
|
*d = ' ';
|
|
ctype = GRN_STR_BLANK|GRN_CHAR_SYMBOL;
|
|
}
|
|
} else {
|
|
*d = c;
|
|
ctype = GRN_CHAR_SYMBOL;
|
|
}
|
|
break;
|
|
case 3 :
|
|
*d = c;
|
|
ctype = (c <= 0x39) ? GRN_CHAR_DIGIT : GRN_CHAR_SYMBOL;
|
|
break;
|
|
case 4 :
|
|
*d = ('A' <= c) ? c + 0x20 : c;
|
|
ctype = (c == 0x40) ? GRN_CHAR_SYMBOL : GRN_CHAR_ALPHA;
|
|
break;
|
|
case 5 :
|
|
*d = (c <= 'Z') ? c + 0x20 : c;
|
|
ctype = (c <= 0x5a) ? GRN_CHAR_ALPHA : GRN_CHAR_SYMBOL;
|
|
break;
|
|
case 6 :
|
|
*d = c;
|
|
ctype = (c == 0x60) ? GRN_CHAR_SYMBOL : GRN_CHAR_ALPHA;
|
|
break;
|
|
case 7 :
|
|
*d = c;
|
|
ctype = (c <= 0x7a) ? GRN_CHAR_ALPHA : (c == 0x7f ? GRN_CHAR_OTHERS : GRN_CHAR_SYMBOL);
|
|
break;
|
|
case 8 :
|
|
if (c == 0x8a || c == 0x8c || c == 0x8e) {
|
|
*d = c + 0x10;
|
|
ctype = GRN_CHAR_ALPHA;
|
|
} else {
|
|
*d = c;
|
|
ctype = GRN_CHAR_SYMBOL;
|
|
}
|
|
break;
|
|
case 9 :
|
|
if (c == 0x9a || c == 0x9c || c == 0x9e || c == 0x9f) {
|
|
*d = (c == 0x9f) ? c + 0x60 : c;
|
|
ctype = GRN_CHAR_ALPHA;
|
|
} else {
|
|
*d = c;
|
|
ctype = GRN_CHAR_SYMBOL;
|
|
}
|
|
break;
|
|
case 0x0c :
|
|
*d = c + 0x20;
|
|
ctype = GRN_CHAR_ALPHA;
|
|
break;
|
|
case 0x0d :
|
|
*d = (c == 0xd7 || c == 0xdf) ? c : c + 0x20;
|
|
ctype = (c == 0xd7) ? GRN_CHAR_SYMBOL : GRN_CHAR_ALPHA;
|
|
break;
|
|
case 0x0e :
|
|
*d = c;
|
|
ctype = GRN_CHAR_ALPHA;
|
|
break;
|
|
case 0x0f :
|
|
*d = c;
|
|
ctype = (c == 0xf7) ? GRN_CHAR_SYMBOL : GRN_CHAR_ALPHA;
|
|
break;
|
|
default :
|
|
*d = c;
|
|
ctype = GRN_CHAR_OTHERS;
|
|
break;
|
|
}
|
|
d++;
|
|
length++;
|
|
if (cp) { *cp++ = ctype; }
|
|
if (ch) {
|
|
*ch++ = (int16_t)(s + 1 - s_);
|
|
s_ = s + 1;
|
|
while (++d_ < d) { *ch++ = 0; }
|
|
}
|
|
}
|
|
if (cp) { *cp = GRN_CHAR_NULL; }
|
|
*d = '\0';
|
|
nstr->length = length;
|
|
nstr->norm_blen = (size_t)(d - (unsigned char *)nstr->norm);
|
|
return GRN_SUCCESS;
|
|
}
|
|
|
|
inline static grn_rc
|
|
normalize_koi8r(grn_ctx *ctx, grn_str *nstr)
|
|
{
|
|
int16_t *ch;
|
|
const unsigned char *s, *s_, *e;
|
|
unsigned char *d, *d0, *d_;
|
|
uint_least8_t *cp, *ctypes, ctype;
|
|
size_t size = strlen(nstr->orig), length = 0;
|
|
int removeblankp = nstr->flags & GRN_STR_REMOVEBLANK;
|
|
if (!(nstr->norm = GRN_MALLOC(size + 1))) {
|
|
return GRN_NO_MEMORY_AVAILABLE;
|
|
}
|
|
d0 = (unsigned char *) nstr->norm;
|
|
if (nstr->flags & GRN_STR_WITH_CHECKS) {
|
|
if (!(nstr->checks = GRN_MALLOC(size * sizeof(int16_t) + 1))) {
|
|
GRN_FREE(nstr->norm);
|
|
nstr->norm = NULL;
|
|
return GRN_NO_MEMORY_AVAILABLE;
|
|
}
|
|
}
|
|
ch = nstr->checks;
|
|
if (nstr->flags & GRN_STR_WITH_CTYPES) {
|
|
if (!(nstr->ctypes = GRN_MALLOC(size + 1))) {
|
|
GRN_FREE(nstr->checks);
|
|
GRN_FREE(nstr->norm);
|
|
nstr->checks = NULL;
|
|
nstr->norm = NULL;
|
|
return GRN_NO_MEMORY_AVAILABLE;
|
|
}
|
|
}
|
|
cp = ctypes = nstr->ctypes;
|
|
e = (unsigned char *)nstr->orig + size;
|
|
for (s = s_ = (unsigned char *) nstr->orig, d = d_ = d0; s < e; s++) {
|
|
unsigned char c = *s;
|
|
switch (c >> 4) {
|
|
case 0 :
|
|
case 1 :
|
|
/* skip unprintable ascii */
|
|
if (cp > ctypes) { *(cp - 1) |= GRN_STR_BLANK; }
|
|
continue;
|
|
case 2 :
|
|
if (c == 0x20) {
|
|
if (removeblankp) {
|
|
if (cp > ctypes) { *(cp - 1) |= GRN_STR_BLANK; }
|
|
continue;
|
|
} else {
|
|
*d = ' ';
|
|
ctype = GRN_STR_BLANK|GRN_CHAR_SYMBOL;
|
|
}
|
|
} else {
|
|
*d = c;
|
|
ctype = GRN_CHAR_SYMBOL;
|
|
}
|
|
break;
|
|
case 3 :
|
|
*d = c;
|
|
ctype = (c <= 0x39) ? GRN_CHAR_DIGIT : GRN_CHAR_SYMBOL;
|
|
break;
|
|
case 4 :
|
|
*d = ('A' <= c) ? c + 0x20 : c;
|
|
ctype = (c == 0x40) ? GRN_CHAR_SYMBOL : GRN_CHAR_ALPHA;
|
|
break;
|
|
case 5 :
|
|
*d = (c <= 'Z') ? c + 0x20 : c;
|
|
ctype = (c <= 0x5a) ? GRN_CHAR_ALPHA : GRN_CHAR_SYMBOL;
|
|
break;
|
|
case 6 :
|
|
*d = c;
|
|
ctype = (c == 0x60) ? GRN_CHAR_SYMBOL : GRN_CHAR_ALPHA;
|
|
break;
|
|
case 7 :
|
|
*d = c;
|
|
ctype = (c <= 0x7a) ? GRN_CHAR_ALPHA : (c == 0x7f ? GRN_CHAR_OTHERS : GRN_CHAR_SYMBOL);
|
|
break;
|
|
case 0x0a :
|
|
*d = c;
|
|
ctype = (c == 0xa3) ? GRN_CHAR_ALPHA : GRN_CHAR_OTHERS;
|
|
break;
|
|
case 0x0b :
|
|
if (c == 0xb3) {
|
|
*d = c - 0x10;
|
|
ctype = GRN_CHAR_ALPHA;
|
|
} else {
|
|
*d = c;
|
|
ctype = GRN_CHAR_OTHERS;
|
|
}
|
|
break;
|
|
case 0x0c :
|
|
case 0x0d :
|
|
*d = c;
|
|
ctype = GRN_CHAR_ALPHA;
|
|
break;
|
|
case 0x0e :
|
|
case 0x0f :
|
|
*d = c - 0x20;
|
|
ctype = GRN_CHAR_ALPHA;
|
|
break;
|
|
default :
|
|
*d = c;
|
|
ctype = GRN_CHAR_OTHERS;
|
|
break;
|
|
}
|
|
d++;
|
|
length++;
|
|
if (cp) { *cp++ = ctype; }
|
|
if (ch) {
|
|
*ch++ = (int16_t)(s + 1 - s_);
|
|
s_ = s + 1;
|
|
while (++d_ < d) { *ch++ = 0; }
|
|
}
|
|
}
|
|
if (cp) { *cp = GRN_CHAR_NULL; }
|
|
*d = '\0';
|
|
nstr->length = length;
|
|
nstr->norm_blen = (size_t)(d - (unsigned char *)nstr->norm);
|
|
return GRN_SUCCESS;
|
|
}
|
|
|
|
static grn_str *
|
|
grn_fakenstr_open(grn_ctx *ctx, const char *str, size_t str_len, grn_encoding encoding, int flags)
|
|
{
|
|
/* TODO: support GRN_STR_REMOVEBLANK flag and ctypes */
|
|
grn_str *nstr;
|
|
if (!(nstr = GRN_MALLOC(sizeof(grn_str)))) {
|
|
GRN_LOG(ctx, GRN_LOG_ALERT, "memory allocation on grn_fakenstr_open failed !");
|
|
return NULL;
|
|
}
|
|
if (!(nstr->norm = GRN_MALLOC(str_len + 1))) {
|
|
GRN_LOG(ctx, GRN_LOG_ALERT, "memory allocation for keyword on grn_snip_add_cond failed !");
|
|
GRN_FREE(nstr);
|
|
return NULL;
|
|
}
|
|
nstr->orig = str;
|
|
nstr->orig_blen = str_len;
|
|
grn_memcpy(nstr->norm, str, str_len);
|
|
nstr->norm[str_len] = '\0';
|
|
nstr->norm_blen = str_len;
|
|
nstr->ctypes = NULL;
|
|
nstr->flags = flags;
|
|
|
|
if (flags & GRN_STR_WITH_CHECKS) {
|
|
int16_t f = 0;
|
|
unsigned char c;
|
|
size_t i;
|
|
if (!(nstr->checks = (int16_t *) GRN_MALLOC(sizeof(int16_t) * str_len))) {
|
|
GRN_FREE(nstr->norm);
|
|
GRN_FREE(nstr);
|
|
return NULL;
|
|
}
|
|
switch (encoding) {
|
|
case GRN_ENC_EUC_JP:
|
|
for (i = 0; i < str_len; i++) {
|
|
if (!f) {
|
|
c = (unsigned char) str[i];
|
|
f = ((c >= 0xa1U && c <= 0xfeU) || c == 0x8eU ? 2 : (c == 0x8fU ? 3 : 1)
|
|
);
|
|
nstr->checks[i] = f;
|
|
} else {
|
|
nstr->checks[i] = 0;
|
|
}
|
|
f--;
|
|
}
|
|
break;
|
|
case GRN_ENC_SJIS:
|
|
for (i = 0; i < str_len; i++) {
|
|
if (!f) {
|
|
c = (unsigned char) str[i];
|
|
f = (c >= 0x81U && ((c <= 0x9fU) || (c >= 0xe0U && c <= 0xfcU)) ? 2 : 1);
|
|
nstr->checks[i] = f;
|
|
} else {
|
|
nstr->checks[i] = 0;
|
|
}
|
|
f--;
|
|
}
|
|
break;
|
|
case GRN_ENC_UTF8:
|
|
for (i = 0; i < str_len; i++) {
|
|
if (!f) {
|
|
c = (unsigned char) str[i];
|
|
f = (c & 0x80U ? (c & 0x20U ? (c & 0x10U ? 4 : 3)
|
|
: 2)
|
|
: 1);
|
|
nstr->checks[i] = f;
|
|
} else {
|
|
nstr->checks[i] = 0;
|
|
}
|
|
f--;
|
|
}
|
|
break;
|
|
default:
|
|
for (i = 0; i < str_len; i++) {
|
|
nstr->checks[i] = 1;
|
|
}
|
|
break;
|
|
}
|
|
} else {
|
|
nstr->checks = NULL;
|
|
}
|
|
return nstr;
|
|
}
|
|
|
|
grn_str *
|
|
grn_str_open_(grn_ctx *ctx, const char *str, unsigned int str_len, int flags, grn_encoding encoding)
|
|
{
|
|
grn_rc rc;
|
|
grn_str *nstr;
|
|
if (!str || !str_len) { return NULL; }
|
|
|
|
if (!(flags & GRN_STR_NORMALIZE)) {
|
|
return grn_fakenstr_open(ctx, str, str_len, encoding, flags);
|
|
}
|
|
|
|
if (!(nstr = GRN_MALLOC(sizeof(grn_str)))) {
|
|
GRN_LOG(ctx, GRN_LOG_ALERT, "memory allocation on grn_str_open failed !");
|
|
return NULL;
|
|
}
|
|
nstr->orig = str;
|
|
nstr->orig_blen = str_len;
|
|
nstr->norm = NULL;
|
|
nstr->norm_blen = 0;
|
|
nstr->checks = NULL;
|
|
nstr->ctypes = NULL;
|
|
nstr->encoding = encoding;
|
|
nstr->flags = flags;
|
|
switch (encoding) {
|
|
case GRN_ENC_EUC_JP :
|
|
rc = normalize_euc(ctx, nstr);
|
|
break;
|
|
case GRN_ENC_UTF8 :
|
|
#ifdef GRN_WITH_NFKC
|
|
rc = normalize_utf8(ctx, nstr);
|
|
#else /* GRN_WITH_NFKC */
|
|
rc = normalize_none(ctx, nstr);
|
|
#endif /* GRN_WITH_NFKC */
|
|
break;
|
|
case GRN_ENC_SJIS :
|
|
rc = normalize_sjis(ctx, nstr);
|
|
break;
|
|
case GRN_ENC_LATIN1 :
|
|
rc = normalize_latin1(ctx, nstr);
|
|
break;
|
|
case GRN_ENC_KOI8R :
|
|
rc = normalize_koi8r(ctx, nstr);
|
|
break;
|
|
default :
|
|
rc = normalize_none(ctx, nstr);
|
|
break;
|
|
}
|
|
if (rc) {
|
|
grn_str_close(ctx, nstr);
|
|
return NULL;
|
|
}
|
|
return nstr;
|
|
}
|
|
|
|
grn_str *
|
|
grn_str_open(grn_ctx *ctx, const char *str, unsigned int str_len, int flags)
|
|
{
|
|
return grn_str_open_(ctx, str, str_len, flags, ctx->encoding);
|
|
}
|
|
|
|
grn_rc
|
|
grn_str_close(grn_ctx *ctx, grn_str *nstr)
|
|
{
|
|
if (nstr) {
|
|
if (nstr->norm) { GRN_FREE(nstr->norm); }
|
|
if (nstr->ctypes) { GRN_FREE(nstr->ctypes); }
|
|
if (nstr->checks) { GRN_FREE(nstr->checks); }
|
|
GRN_FREE(nstr);
|
|
return GRN_SUCCESS;
|
|
} else {
|
|
return GRN_INVALID_ARGUMENT;
|
|
}
|
|
}
|
|
|
|
static const char *grn_enc_string[] = {
|
|
"default",
|
|
"none",
|
|
"euc_jp",
|
|
"utf8",
|
|
"sjis",
|
|
"latin1",
|
|
"koi8r"
|
|
};
|
|
|
|
const char *
|
|
grn_encoding_to_string(grn_encoding enc)
|
|
{
|
|
if (enc < (sizeof(grn_enc_string) / sizeof(char *))) {
|
|
return grn_enc_string[enc];
|
|
} else {
|
|
return "unknown";
|
|
}
|
|
}
|
|
|
|
grn_encoding
|
|
grn_encoding_parse(const char *str)
|
|
{
|
|
grn_encoding e = GRN_ENC_UTF8;
|
|
int i = sizeof(grn_enc_string) / sizeof(grn_enc_string[0]);
|
|
while (i--) {
|
|
if (!strcmp(str, grn_enc_string[i])) {
|
|
e = (grn_encoding)i;
|
|
}
|
|
}
|
|
return e;
|
|
}
|
|
|
|
size_t
|
|
grn_str_len(grn_ctx *ctx, const char *str, grn_encoding encoding, const char **last)
|
|
{
|
|
size_t len, tlen;
|
|
const char *p = NULL;
|
|
for (len = 0; ; len++) {
|
|
p = str;
|
|
if (!(tlen = grn_str_charlen(ctx, str, encoding))) {
|
|
break;
|
|
}
|
|
str += tlen;
|
|
}
|
|
if (last) { *last = p; }
|
|
return len;
|
|
}
|
|
|
|
int
|
|
grn_isspace(const char *str, grn_encoding encoding)
|
|
{
|
|
const unsigned char *s = (const unsigned char *) str;
|
|
if (!s) { return 0; }
|
|
switch (s[0]) {
|
|
case ' ' :
|
|
case '\f' :
|
|
case '\n' :
|
|
case '\r' :
|
|
case '\t' :
|
|
case '\v' :
|
|
return 1;
|
|
case 0x81 :
|
|
if (encoding == GRN_ENC_SJIS && s[1] == 0x40) { return 2; }
|
|
break;
|
|
case 0xA1 :
|
|
if (encoding == GRN_ENC_EUC_JP && s[1] == 0xA1) { return 2; }
|
|
break;
|
|
case 0xE3 :
|
|
if (encoding == GRN_ENC_UTF8 && s[1] == 0x80 && s[2] == 0x80) { return 3; }
|
|
break;
|
|
default :
|
|
break;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
int8_t
|
|
grn_atoi8(const char *nptr, const char *end, const char **rest)
|
|
{
|
|
const char *p = nptr;
|
|
int8_t v = 0, t, n = 0, o = 0;
|
|
if (p < end && *p == '-') {
|
|
p++;
|
|
n = 1;
|
|
o = 1;
|
|
}
|
|
while (p < end && *p >= '0' && *p <= '9') {
|
|
t = v * 10 - (*p - '0');
|
|
if (t > v || (!n && t == INT8_MIN)) { v = 0; break; }
|
|
v = t;
|
|
o = 0;
|
|
p++;
|
|
}
|
|
if (rest) { *rest = o ? nptr : p; }
|
|
return n ? v : -v;
|
|
}
|
|
|
|
uint8_t
|
|
grn_atoui8(const char *nptr, const char *end, const char **rest)
|
|
{
|
|
uint8_t v = 0, t;
|
|
while (nptr < end && *nptr >= '0' && *nptr <= '9') {
|
|
t = v * 10 + (*nptr - '0');
|
|
if (t < v) { v = 0; break; }
|
|
v = t;
|
|
nptr++;
|
|
}
|
|
if (rest) { *rest = nptr; }
|
|
return v;
|
|
}
|
|
|
|
int16_t
|
|
grn_atoi16(const char *nptr, const char *end, const char **rest)
|
|
{
|
|
const char *p = nptr;
|
|
int16_t v = 0, t, n = 0, o = 0;
|
|
if (p < end && *p == '-') {
|
|
p++;
|
|
n = 1;
|
|
o = 1;
|
|
}
|
|
while (p < end && *p >= '0' && *p <= '9') {
|
|
t = v * 10 - (*p - '0');
|
|
if (t > v || (!n && t == INT16_MIN)) { v = 0; break; }
|
|
v = t;
|
|
o = 0;
|
|
p++;
|
|
}
|
|
if (rest) { *rest = o ? nptr : p; }
|
|
return n ? v : -v;
|
|
}
|
|
|
|
uint16_t
|
|
grn_atoui16(const char *nptr, const char *end, const char **rest)
|
|
{
|
|
uint16_t v = 0, t;
|
|
while (nptr < end && *nptr >= '0' && *nptr <= '9') {
|
|
t = v * 10 + (*nptr - '0');
|
|
if (t < v) { v = 0; break; }
|
|
v = t;
|
|
nptr++;
|
|
}
|
|
if (rest) { *rest = nptr; }
|
|
return v;
|
|
}
|
|
|
|
int
|
|
grn_atoi(const char *nptr, const char *end, const char **rest)
|
|
{
|
|
const char *p = nptr;
|
|
int v = 0, t, n = 0, o = 0;
|
|
if (p < end && *p == '-') {
|
|
p++;
|
|
n = 1;
|
|
o = 1;
|
|
}
|
|
while (p < end && *p >= '0' && *p <= '9') {
|
|
t = v * 10 - (*p - '0');
|
|
if (t > v || (!n && t == INT32_MIN)) { v = 0; break; }
|
|
v = t;
|
|
o = 0;
|
|
p++;
|
|
}
|
|
if (rest) { *rest = o ? nptr : p; }
|
|
return n ? v : -v;
|
|
}
|
|
|
|
unsigned int
|
|
grn_atoui(const char *nptr, const char *end, const char **rest)
|
|
{
|
|
unsigned int v = 0, t;
|
|
while (nptr < end && *nptr >= '0' && *nptr <= '9') {
|
|
t = v * 10 + (*nptr - '0');
|
|
if (t < v) { v = 0; break; }
|
|
v = t;
|
|
nptr++;
|
|
}
|
|
if (rest) { *rest = nptr; }
|
|
return v;
|
|
}
|
|
|
|
int64_t
|
|
grn_atoll(const char *nptr, const char *end, const char **rest)
|
|
{
|
|
const char *p = nptr;
|
|
int o = 0;
|
|
int64_t v = 0;
|
|
if (p < end && *p == '-') {
|
|
p++;
|
|
o = 1;
|
|
while (p < end && *p >= '0' && *p <= '9') {
|
|
int64_t t = v * 10 - (*p - '0');
|
|
if (t > v) { v = 0; break; }
|
|
v = t;
|
|
o = 0;
|
|
p++;
|
|
}
|
|
} else {
|
|
while (p < end && *p >= '0' && *p <= '9') {
|
|
int64_t t = v * 10 + (*p - '0');
|
|
if (t < v) { v = 0; break; }
|
|
v = t;
|
|
p++;
|
|
}
|
|
}
|
|
if (rest) { *rest = o ? nptr : p; }
|
|
return v;
|
|
}
|
|
|
|
uint64_t
|
|
grn_atoull(const char *nptr, const char *end, const char **rest)
|
|
{
|
|
uint64_t v = 0, t;
|
|
while (nptr < end && *nptr >= '0' && *nptr <= '9') {
|
|
t = v * 10 + (*nptr - '0');
|
|
if (t < v) { v = 0; break; }
|
|
v = t;
|
|
nptr++;
|
|
}
|
|
if (rest) { *rest = nptr; }
|
|
return v;
|
|
}
|
|
|
|
unsigned int
|
|
grn_htoui(const char *nptr, const char *end, const char **rest)
|
|
{
|
|
unsigned int v = 0, t;
|
|
while (nptr < end) {
|
|
switch (*nptr) {
|
|
case '0' :
|
|
case '1' :
|
|
case '2' :
|
|
case '3' :
|
|
case '4' :
|
|
case '5' :
|
|
case '6' :
|
|
case '7' :
|
|
case '8' :
|
|
case '9' :
|
|
t = v * 16 + (*nptr++ - '0');
|
|
break;
|
|
case 'a' :
|
|
case 'b' :
|
|
case 'c' :
|
|
case 'd' :
|
|
case 'e' :
|
|
case 'f' :
|
|
t = v * 16 + (*nptr++ - 'a') + 10;
|
|
break;
|
|
case 'A' :
|
|
case 'B' :
|
|
case 'C' :
|
|
case 'D' :
|
|
case 'E' :
|
|
case 'F' :
|
|
t = v * 16 + (*nptr++ - 'A') + 10;
|
|
break;
|
|
default :
|
|
v = 0; goto exit;
|
|
}
|
|
if (t < v) { v = 0; goto exit; }
|
|
v = t;
|
|
}
|
|
exit :
|
|
if (rest) { *rest = nptr; }
|
|
return v;
|
|
}
|
|
|
|
void
|
|
grn_itoh(unsigned int i, char *p, unsigned int len)
|
|
{
|
|
static const char *hex = "0123456789ABCDEF";
|
|
p += len - 1;
|
|
while (len--) {
|
|
*p-- = hex[i & 0xf];
|
|
i >>= 4;
|
|
}
|
|
}
|
|
|
|
grn_rc
|
|
grn_itoa(int i, char *p, char *end, char **rest)
|
|
{
|
|
char *q;
|
|
if (p >= end) { return GRN_INVALID_ARGUMENT; }
|
|
q = p;
|
|
if (i < 0) {
|
|
*p++ = '-';
|
|
q = p;
|
|
if (i == INT_MIN) {
|
|
if (p >= end) { return GRN_INVALID_ARGUMENT; }
|
|
*p++ = (-(i % 10)) + '0';
|
|
i /= 10;
|
|
}
|
|
i = -i;
|
|
}
|
|
do {
|
|
if (p >= end) { return GRN_INVALID_ARGUMENT; }
|
|
*p++ = i % 10 + '0';
|
|
} while ((i /= 10) > 0);
|
|
if (rest) { *rest = p; }
|
|
for (p--; q < p; q++, p--) {
|
|
char t = *q;
|
|
*q = *p;
|
|
*p = t;
|
|
}
|
|
return GRN_SUCCESS;
|
|
}
|
|
|
|
grn_rc
|
|
grn_itoa_padded(int i, char *p, char *end, char ch)
|
|
{
|
|
char *q;
|
|
if (p >= end) { return GRN_INVALID_ARGUMENT; }
|
|
if (i < 0) {
|
|
*p++ = '-';
|
|
if (i == INT_MIN) {
|
|
if (p >= end) { return GRN_INVALID_ARGUMENT; }
|
|
*p++ = (-(i % 10)) + '0';
|
|
i /= 10;
|
|
}
|
|
i = -i;
|
|
}
|
|
q = end - 1;
|
|
do {
|
|
if (q < p) { return GRN_INVALID_ARGUMENT; }
|
|
*q-- = i % 10 + '0';
|
|
} while ((i /= 10) > 0);
|
|
while (q >= p) {
|
|
*q-- = ch;
|
|
}
|
|
return GRN_SUCCESS;
|
|
}
|
|
|
|
grn_rc
|
|
grn_lltoa(int64_t i, char *p, char *end, char **rest)
|
|
{
|
|
char *q;
|
|
if (p >= end) { return GRN_INVALID_ARGUMENT; }
|
|
q = p;
|
|
if (i < 0) {
|
|
*p++ = '-';
|
|
q = p;
|
|
if (i == INT64_MIN) {
|
|
*p++ = (-(i % 10)) + '0';
|
|
i /= 10;
|
|
}
|
|
i = -i;
|
|
}
|
|
do {
|
|
if (p >= end) { return GRN_INVALID_ARGUMENT; }
|
|
*p++ = i % 10 + '0';
|
|
} while ((i /= 10) > 0);
|
|
if (rest) { *rest = p; }
|
|
for (p--; q < p; q++, p--) {
|
|
char t = *q;
|
|
*q = *p;
|
|
*p = t;
|
|
}
|
|
return GRN_SUCCESS;
|
|
}
|
|
|
|
grn_rc
|
|
grn_ulltoa(uint64_t i, char *p, char *end, char **rest)
|
|
{
|
|
char *q;
|
|
if (p >= end) { return GRN_INVALID_ARGUMENT; }
|
|
q = p;
|
|
do {
|
|
if (p >= end) { return GRN_INVALID_ARGUMENT; }
|
|
*p++ = i % 10 + '0';
|
|
} while ((i /= 10) > 0);
|
|
if (rest) { *rest = p; }
|
|
for (p--; q < p; q++, p--) {
|
|
char t = *q;
|
|
*q = *p;
|
|
*p = t;
|
|
}
|
|
return GRN_SUCCESS;
|
|
}
|
|
|
|
#define I2B(i) \
|
|
("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"[(i) & 0x3f])
|
|
|
|
#define B2I(b) \
|
|
(((b) < '+' || 'z' < (b)) ? 0xff : "\x3e\xff\xff\xff\x3f\x34\x35\x36\x37\x38\x39\x3a\x3b\x3c\x3d\xff\xff\xff\xff\xff\xff\xff\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\xff\xff\xff\xff\xff\xff\x1a\x1b\x1c\x1d\x1e\x1f\x20\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f\x30\x31\x32\x33"[(b) - '+'])
|
|
|
|
#define MASK 0x34d34d34
|
|
|
|
char *
|
|
grn_itob(grn_id id, char *p)
|
|
{
|
|
id ^= MASK;
|
|
*p++ = I2B(id >> 24);
|
|
*p++ = I2B(id >> 18);
|
|
*p++ = I2B(id >> 12);
|
|
*p++ = I2B(id >> 6);
|
|
*p++ = I2B(id);
|
|
return p;
|
|
}
|
|
|
|
grn_id
|
|
grn_btoi(char *b)
|
|
{
|
|
uint8_t i;
|
|
grn_id id = 0;
|
|
int len = 5;
|
|
while (len--) {
|
|
char c = *b++;
|
|
if ((i = B2I(c)) == 0xff) { return 0; }
|
|
id = (id << 6) + i;
|
|
}
|
|
return id ^ MASK;
|
|
}
|
|
|
|
#define I2B32H(i) ("0123456789ABCDEFGHIJKLMNOPQRSTUV"[(i) & 0x1f])
|
|
|
|
char *
|
|
grn_lltob32h(int64_t i, char *p)
|
|
{
|
|
uint64_t u = (uint64_t)i + 0x8000000000000000ULL;
|
|
*p++ = I2B32H(u >> 60);
|
|
*p++ = I2B32H(u >> 55);
|
|
*p++ = I2B32H(u >> 50);
|
|
*p++ = I2B32H(u >> 45);
|
|
*p++ = I2B32H(u >> 40);
|
|
*p++ = I2B32H(u >> 35);
|
|
*p++ = I2B32H(u >> 30);
|
|
*p++ = I2B32H(u >> 25);
|
|
*p++ = I2B32H(u >> 20);
|
|
*p++ = I2B32H(u >> 15);
|
|
*p++ = I2B32H(u >> 10);
|
|
*p++ = I2B32H(u >> 5);
|
|
*p++ = I2B32H(u);
|
|
return p;
|
|
}
|
|
|
|
char *
|
|
grn_ulltob32h(uint64_t i, char *p)
|
|
{
|
|
char lb = (i >> 59) & 0x10;
|
|
i += 0x8000000000000000ULL;
|
|
*p++ = lb + I2B32H(i >> 60);
|
|
*p++ = I2B32H(i >> 55);
|
|
*p++ = I2B32H(i >> 50);
|
|
*p++ = I2B32H(i >> 45);
|
|
*p++ = I2B32H(i >> 40);
|
|
*p++ = I2B32H(i >> 35);
|
|
*p++ = I2B32H(i >> 30);
|
|
*p++ = I2B32H(i >> 25);
|
|
*p++ = I2B32H(i >> 20);
|
|
*p++ = I2B32H(i >> 15);
|
|
*p++ = I2B32H(i >> 10);
|
|
*p++ = I2B32H(i >> 5);
|
|
*p++ = I2B32H(i);
|
|
return p;
|
|
}
|
|
|
|
grn_rc
|
|
grn_aton(grn_ctx *ctx, const char *p, const char *end, const char **rest,
|
|
grn_obj *res)
|
|
{
|
|
if (*p == '+') {
|
|
p++;
|
|
}
|
|
|
|
switch (*p) {
|
|
case '-' :
|
|
case '0' : case '1' : case '2' : case '3' : case '4' :
|
|
case '5' : case '6' : case '7' : case '8' : case '9' :
|
|
{
|
|
int64_t int64;
|
|
char rest_char;
|
|
int64 = grn_atoll(p, end, rest);
|
|
rest_char = **rest;
|
|
if (end == *rest) {
|
|
if ((int64_t)INT32_MIN <= int64 && int64 <= (int64_t)INT32_MAX) {
|
|
grn_obj_reinit(ctx, res, GRN_DB_INT32, 0);
|
|
GRN_INT32_SET(ctx, res, int64);
|
|
} else if ((int64_t)INT32_MAX < int64 && int64 <= (int64_t)UINT32_MAX) {
|
|
grn_obj_reinit(ctx, res, GRN_DB_UINT32, 0);
|
|
GRN_UINT32_SET(ctx, res, int64);
|
|
} else {
|
|
grn_obj_reinit(ctx, res, GRN_DB_INT64, 0);
|
|
GRN_INT64_SET(ctx, res, int64);
|
|
}
|
|
} else {
|
|
if (*p != '-' && rest_char >= '0' && rest_char <= '9') {
|
|
uint64_t uint64 = grn_atoull(p, end, rest);
|
|
if (end == *rest) {
|
|
grn_obj_reinit(ctx, res, GRN_DB_UINT64, 0);
|
|
GRN_UINT64_SET(ctx, res, uint64);
|
|
}
|
|
}
|
|
if (end != *rest) {
|
|
if (rest_char == '.' || rest_char == 'e' || rest_char == 'E' ||
|
|
(rest_char >= '0' && rest_char <= '9')) {
|
|
char *rest_float;
|
|
double d;
|
|
errno = 0;
|
|
d = strtod(p, &rest_float);
|
|
if (!errno && rest_float == end) {
|
|
grn_obj_reinit(ctx, res, GRN_DB_FLOAT, 0);
|
|
GRN_FLOAT_SET(ctx, res, d);
|
|
*rest = rest_float;
|
|
} else {
|
|
return GRN_INVALID_ARGUMENT;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
break;
|
|
default :
|
|
return GRN_INVALID_ARGUMENT;
|
|
}
|
|
|
|
return GRN_SUCCESS;
|
|
}
|
|
|
|
int
|
|
grn_str_tok(const char *str, size_t str_len, char delim, const char **tokbuf, int buf_size, const char **rest)
|
|
{
|
|
const char **tok = tokbuf, **tok_end = tokbuf + buf_size;
|
|
if (buf_size > 0) {
|
|
const char *str_end = str + str_len;
|
|
for (;;str++) {
|
|
if (str == str_end) {
|
|
*tok++ = str;
|
|
break;
|
|
}
|
|
if (delim == *str) {
|
|
// *str = '\0';
|
|
*tok++ = str;
|
|
if (tok == tok_end) { break; }
|
|
}
|
|
}
|
|
}
|
|
if (rest) { *rest = str; }
|
|
return tok - tokbuf;
|
|
}
|
|
|
|
inline static int
|
|
op_getopt_flag(int *flags, const grn_str_getopt_opt *o,
|
|
int argc, char * const argv[], int i, const char *optvalue)
|
|
{
|
|
switch (o->op) {
|
|
case GETOPT_OP_NONE:
|
|
break;
|
|
case GETOPT_OP_ON:
|
|
*flags |= o->flag;
|
|
break;
|
|
case GETOPT_OP_OFF:
|
|
*flags &= ~o->flag;
|
|
break;
|
|
case GETOPT_OP_UPDATE:
|
|
*flags = o->flag;
|
|
break;
|
|
default:
|
|
return i;
|
|
}
|
|
if (o->arg) {
|
|
if (optvalue) {
|
|
*o->arg = (char *)optvalue;
|
|
} else if (++i < argc) {
|
|
*o->arg = argv[i];
|
|
} else {
|
|
return -1;
|
|
}
|
|
}
|
|
return i;
|
|
}
|
|
|
|
int
|
|
grn_str_getopt(int argc, char * const argv[], const grn_str_getopt_opt *opts,
|
|
int *flags)
|
|
{
|
|
int i;
|
|
for (i = 1; i < argc; i++) {
|
|
const char * v = argv[i];
|
|
if (*v == '-') {
|
|
const grn_str_getopt_opt *o;
|
|
int found;
|
|
if (*++v == '-') {
|
|
const char *eq;
|
|
size_t len;
|
|
found = 0;
|
|
v++;
|
|
for (eq = v; *eq != '\0' && *eq != '='; eq++) {}
|
|
len = eq - v;
|
|
for (o = opts; o->opt != '\0' || o->longopt != NULL; o++) {
|
|
if (o->longopt && strlen(o->longopt) == len &&
|
|
!memcmp(v, o->longopt, len)) {
|
|
i = op_getopt_flag(flags, o, argc, argv, i,
|
|
(*eq == '\0' ? NULL : eq + 1));
|
|
if (i < 0) {
|
|
fprintf(stderr, "%s: option '--%s' needs argument.\n", argv[0], o->longopt);
|
|
return -1;
|
|
}
|
|
found = 1;
|
|
break;
|
|
}
|
|
}
|
|
if (!found) { goto exit; }
|
|
} else {
|
|
const char *p;
|
|
for (p = v; *p; p++) {
|
|
found = 0;
|
|
for (o = opts; o->opt != '\0' || o->longopt != NULL; o++) {
|
|
if (o->opt && *p == o->opt) {
|
|
i = op_getopt_flag(flags, o, argc, argv, i, NULL);
|
|
if (i < 0) {
|
|
fprintf(stderr, "%s: option '-%c' needs argument.\n", argv[0], *p);
|
|
return -1;
|
|
}
|
|
found = 1;
|
|
break;
|
|
}
|
|
}
|
|
if (!found) { goto exit; }
|
|
}
|
|
}
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
return i;
|
|
exit:
|
|
fprintf(stderr, "%s: cannot recognize option '%s'.\n", argv[0], argv[i]);
|
|
return -1;
|
|
}
|
|
|
|
#define UNIT_SIZE (1 << 12)
|
|
#define UNIT_MASK (UNIT_SIZE - 1)
|
|
|
|
int grn_bulk_margin_size = 0;
|
|
|
|
grn_rc
|
|
grn_bulk_resize(grn_ctx *ctx, grn_obj *buf, unsigned int newsize)
|
|
{
|
|
char *head;
|
|
unsigned int rounded_newsize;
|
|
newsize += grn_bulk_margin_size + 1;
|
|
if (GRN_BULK_OUTP(buf)) {
|
|
rounded_newsize = (newsize + (UNIT_MASK)) & ~UNIT_MASK;
|
|
if (rounded_newsize < newsize) { return GRN_NOT_ENOUGH_SPACE; }
|
|
newsize = rounded_newsize;
|
|
head = buf->u.b.head - (buf->u.b.head ? grn_bulk_margin_size : 0);
|
|
if (!(head = GRN_REALLOC(head, newsize))) { return GRN_NO_MEMORY_AVAILABLE; }
|
|
buf->u.b.curr = head + grn_bulk_margin_size + GRN_BULK_VSIZE(buf);
|
|
buf->u.b.head = head + grn_bulk_margin_size;
|
|
buf->u.b.tail = head + newsize;
|
|
} else {
|
|
if (newsize > GRN_BULK_BUFSIZE) {
|
|
rounded_newsize = (newsize + (UNIT_MASK)) & ~UNIT_MASK;
|
|
if (rounded_newsize < newsize) { return GRN_NOT_ENOUGH_SPACE; }
|
|
newsize = rounded_newsize;
|
|
if (!(head = GRN_MALLOC(newsize))) { return GRN_NO_MEMORY_AVAILABLE; }
|
|
grn_memcpy(head, GRN_BULK_HEAD(buf), GRN_BULK_VSIZE(buf));
|
|
buf->u.b.curr = head + grn_bulk_margin_size + GRN_BULK_VSIZE(buf);
|
|
buf->u.b.head = head + grn_bulk_margin_size;
|
|
buf->u.b.tail = head + newsize;
|
|
buf->header.impl_flags |= GRN_OBJ_OUTPLACE;
|
|
}
|
|
}
|
|
return GRN_SUCCESS;
|
|
}
|
|
|
|
grn_rc
|
|
grn_bulk_reinit(grn_ctx *ctx, grn_obj *buf, unsigned int size)
|
|
{
|
|
GRN_BULK_REWIND(buf);
|
|
return grn_bulk_resize(ctx, buf, size);
|
|
}
|
|
|
|
grn_rc
|
|
grn_bulk_write(grn_ctx *ctx, grn_obj *buf, const char *str, unsigned int len)
|
|
{
|
|
grn_rc rc = GRN_SUCCESS;
|
|
char *curr;
|
|
if (GRN_BULK_REST(buf) < len) {
|
|
if ((rc = grn_bulk_resize(ctx, buf, GRN_BULK_VSIZE(buf) + len))) { return rc; }
|
|
}
|
|
curr = GRN_BULK_CURR(buf);
|
|
if (str)
|
|
grn_memcpy(curr, str, len);
|
|
GRN_BULK_INCR_LEN(buf, len);
|
|
return rc;
|
|
}
|
|
|
|
grn_rc
|
|
grn_bulk_write_from(grn_ctx *ctx, grn_obj *bulk,
|
|
const char *str, unsigned int from, unsigned int len)
|
|
{
|
|
grn_rc rc = grn_bulk_truncate(ctx, bulk, from);
|
|
if (!rc) { rc = grn_bulk_write(ctx, bulk, str, len); }
|
|
return rc;
|
|
}
|
|
|
|
grn_rc
|
|
grn_bulk_reserve(grn_ctx *ctx, grn_obj *buf, unsigned int len)
|
|
{
|
|
grn_rc rc = GRN_SUCCESS;
|
|
if (GRN_BULK_REST(buf) < len) {
|
|
if ((rc = grn_bulk_resize(ctx, buf, GRN_BULK_VSIZE(buf) + len))) { return rc; }
|
|
}
|
|
return rc;
|
|
}
|
|
|
|
grn_rc
|
|
grn_bulk_space(grn_ctx *ctx, grn_obj *buf, unsigned int len)
|
|
{
|
|
grn_rc rc = grn_bulk_reserve(ctx, buf, len);
|
|
if (!rc) {
|
|
GRN_BULK_INCR_LEN(buf, len);
|
|
}
|
|
return rc;
|
|
}
|
|
|
|
static grn_rc
|
|
grn_bulk_space_clear(grn_ctx *ctx, grn_obj *buf, unsigned int len)
|
|
{
|
|
grn_rc rc = grn_bulk_reserve(ctx, buf, len);
|
|
if (!rc) {
|
|
memset(GRN_BULK_CURR(buf), 0, len);
|
|
GRN_BULK_INCR_LEN(buf, len);
|
|
}
|
|
return rc;
|
|
}
|
|
|
|
grn_rc
|
|
grn_bulk_truncate(grn_ctx *ctx, grn_obj *bulk, unsigned int len)
|
|
{
|
|
if (GRN_BULK_OUTP(bulk)) {
|
|
if ((bulk->u.b.tail - bulk->u.b.head) < len) {
|
|
return grn_bulk_space_clear(ctx, bulk, len);
|
|
} else {
|
|
bulk->u.b.curr = bulk->u.b.head + len;
|
|
}
|
|
} else {
|
|
if (GRN_BULK_BUFSIZE < len) {
|
|
return grn_bulk_space_clear(ctx, bulk, len);
|
|
} else {
|
|
bulk->header.flags &= ~GRN_BULK_BUFSIZE_MAX;
|
|
bulk->header.flags += len;
|
|
}
|
|
}
|
|
return GRN_SUCCESS;
|
|
}
|
|
|
|
grn_rc
|
|
grn_text_itoa(grn_ctx *ctx, grn_obj *buf, int i)
|
|
{
|
|
grn_rc rc = GRN_SUCCESS;
|
|
for (;;) {
|
|
char *curr = GRN_BULK_CURR(buf);
|
|
char *tail = GRN_BULK_TAIL(buf);
|
|
if (grn_itoa(i, curr, tail, &curr)) {
|
|
if ((rc = grn_bulk_resize(ctx, buf, GRN_BULK_WSIZE(buf) + UNIT_SIZE))) { return rc; }
|
|
} else {
|
|
GRN_BULK_SET_CURR(buf, curr);
|
|
break;
|
|
}
|
|
}
|
|
return rc;
|
|
}
|
|
|
|
grn_rc
|
|
grn_text_itoa_padded(grn_ctx *ctx, grn_obj *buf, int i, char ch, unsigned int len)
|
|
{
|
|
grn_rc rc = GRN_SUCCESS;
|
|
char *curr;
|
|
if ((rc = grn_bulk_reserve(ctx, buf, len))) { return rc; }
|
|
curr = GRN_BULK_CURR(buf);
|
|
if (!grn_itoa_padded(i, curr, curr + len, ch)) {
|
|
GRN_BULK_SET_CURR(buf, curr + len);
|
|
}
|
|
return rc;
|
|
}
|
|
|
|
grn_rc
|
|
grn_text_lltoa(grn_ctx *ctx, grn_obj *buf, long long int i)
|
|
{
|
|
grn_rc rc = GRN_SUCCESS;
|
|
for (;;) {
|
|
char *curr = GRN_BULK_CURR(buf);
|
|
char *tail = GRN_BULK_TAIL(buf);
|
|
if (grn_lltoa(i, curr, tail, &curr)) {
|
|
if ((rc = grn_bulk_resize(ctx, buf, GRN_BULK_WSIZE(buf) + UNIT_SIZE))) { return rc; }
|
|
} else {
|
|
GRN_BULK_SET_CURR(buf, curr);
|
|
break;
|
|
}
|
|
}
|
|
return rc;
|
|
}
|
|
|
|
grn_rc
|
|
grn_text_ulltoa(grn_ctx *ctx, grn_obj *buf, unsigned long long int i)
|
|
{
|
|
grn_rc rc = GRN_SUCCESS;
|
|
for (;;) {
|
|
char *curr = GRN_BULK_CURR(buf);
|
|
char *tail = GRN_BULK_TAIL(buf);
|
|
if (grn_ulltoa(i, curr, tail, &curr)) {
|
|
if ((rc = grn_bulk_resize(ctx, buf, GRN_BULK_WSIZE(buf) + UNIT_SIZE))) { return rc; }
|
|
} else {
|
|
GRN_BULK_SET_CURR(buf, curr);
|
|
break;
|
|
}
|
|
}
|
|
return rc;
|
|
}
|
|
|
|
inline static void
|
|
ftoa_(grn_ctx *ctx, grn_obj *buf, double d)
|
|
{
|
|
char *start;
|
|
size_t before_size;
|
|
size_t len;
|
|
#define DIGIT_NUMBER 16
|
|
#define FIRST_BUFFER_SIZE (DIGIT_NUMBER + 4)
|
|
before_size = GRN_BULK_VSIZE(buf);
|
|
grn_bulk_reserve(ctx, buf, FIRST_BUFFER_SIZE);
|
|
grn_text_printf(ctx, buf, "%#.*g", DIGIT_NUMBER, d);
|
|
len = GRN_BULK_VSIZE(buf) - before_size;
|
|
start = GRN_BULK_CURR(buf) - len;
|
|
#undef FIRST_BUFFER_SIZE
|
|
#undef DIGIT_NUMBER
|
|
if (start[len - 1] == '.') {
|
|
GRN_TEXT_PUTC(ctx, buf, '0');
|
|
} else {
|
|
char *p, *q;
|
|
start[len] = '\0';
|
|
if ((p = strchr(start, 'e'))) {
|
|
for (q = p; *(q - 2) != '.' && *(q - 1) == '0'; q--) { len--; }
|
|
grn_memmove(q, p, start + len - q);
|
|
} else {
|
|
for (q = start + len; *(q - 2) != '.' && *(q - 1) == '0'; q--) { len--; }
|
|
}
|
|
grn_bulk_truncate(ctx, buf, before_size + len);
|
|
}
|
|
}
|
|
|
|
grn_rc
|
|
grn_text_ftoa(grn_ctx *ctx, grn_obj *buf, double d)
|
|
{
|
|
grn_rc rc = GRN_SUCCESS;
|
|
if (GRN_BULK_REST(buf) < 32) {
|
|
if ((rc = grn_bulk_resize(ctx, buf, GRN_BULK_VSIZE(buf) + 32))) { return rc; }
|
|
}
|
|
#ifdef HAVE_FPCLASSIFY
|
|
switch (fpclassify(d)) {
|
|
case FP_NAN :
|
|
GRN_TEXT_PUTS(ctx, buf, "#<nan>");
|
|
break;
|
|
case FP_INFINITE :
|
|
GRN_TEXT_PUTS(ctx, buf, d > 0 ? "#i1/0" : "#i-1/0");
|
|
break;
|
|
default :
|
|
ftoa_(ctx, buf, d);
|
|
break;
|
|
}
|
|
#else /* HAVE_FPCLASSIFY */
|
|
if (d == d) {
|
|
if (d != 0 && ((d / 2.0) == d)) {
|
|
GRN_TEXT_PUTS(ctx, buf, d > 0 ? "#i1/0" : "#i-1/0");
|
|
} else {
|
|
ftoa_(ctx, buf, d);
|
|
}
|
|
} else {
|
|
GRN_TEXT_PUTS(ctx, buf, "#<nan>");
|
|
}
|
|
#endif /* HAVE_FPCLASSIFY */
|
|
return rc;
|
|
}
|
|
|
|
grn_rc
|
|
grn_text_itoh(grn_ctx *ctx, grn_obj *buf, int i, unsigned int len)
|
|
{
|
|
grn_rc rc = GRN_SUCCESS;
|
|
if (GRN_BULK_REST(buf) < len) {
|
|
if ((rc = grn_bulk_resize(ctx, buf, GRN_BULK_VSIZE(buf) + len))) { return rc; }
|
|
}
|
|
grn_itoh(i, GRN_BULK_CURR(buf), len);
|
|
GRN_BULK_INCR_LEN(buf, len);
|
|
return rc;
|
|
}
|
|
|
|
grn_rc
|
|
grn_text_itob(grn_ctx *ctx, grn_obj *buf, grn_id id)
|
|
{
|
|
size_t len = 5;
|
|
grn_rc rc = GRN_SUCCESS;
|
|
if (GRN_BULK_REST(buf) < len) {
|
|
if ((rc = grn_bulk_resize(ctx, buf, GRN_BULK_VSIZE(buf) + len))) { return rc; }
|
|
}
|
|
grn_itob(id, GRN_BULK_CURR(buf));
|
|
GRN_BULK_INCR_LEN(buf, len);
|
|
return rc;
|
|
}
|
|
|
|
grn_rc
|
|
grn_text_lltob32h(grn_ctx *ctx, grn_obj *buf, long long int i)
|
|
{
|
|
size_t len = 13;
|
|
grn_rc rc = GRN_SUCCESS;
|
|
if (GRN_BULK_REST(buf) < len) {
|
|
if ((rc = grn_bulk_resize(ctx, buf, GRN_BULK_VSIZE(buf) + len))) { return rc; }
|
|
}
|
|
grn_lltob32h(i, GRN_BULK_CURR(buf));
|
|
GRN_BULK_INCR_LEN(buf, len);
|
|
return rc;
|
|
}
|
|
|
|
grn_rc
|
|
grn_text_esc(grn_ctx *ctx, grn_obj *buf, const char *s, unsigned int len)
|
|
{
|
|
const char *e;
|
|
unsigned int l;
|
|
grn_rc rc = GRN_SUCCESS;
|
|
|
|
GRN_TEXT_PUTC(ctx, buf, '"');
|
|
for (e = s + len; s < e; s += l) {
|
|
if (!(l = grn_charlen(ctx, s, e))) { break; }
|
|
if (l == 1) {
|
|
switch (*s) {
|
|
case '"' :
|
|
grn_bulk_write(ctx, buf, "\\\"", 2);
|
|
break;
|
|
case '\\' :
|
|
grn_bulk_write(ctx, buf, "\\\\", 2);
|
|
break;
|
|
case '\b' :
|
|
grn_bulk_write(ctx, buf, "\\b", 2);
|
|
break;
|
|
case '\f' :
|
|
grn_bulk_write(ctx, buf, "\\f", 2);
|
|
break;
|
|
case '\n' :
|
|
grn_bulk_write(ctx, buf, "\\n", 2);
|
|
break;
|
|
case '\r' :
|
|
grn_bulk_write(ctx, buf, "\\r", 2);
|
|
break;
|
|
case '\t' :
|
|
grn_bulk_write(ctx, buf, "\\t", 2);
|
|
break;
|
|
case '\x00': case '\x01': case '\x02': case '\x03': case '\x04': case '\x05':
|
|
case '\x06': case '\x07': case '\x0b': case '\x0e': case '\x0f': case '\x10':
|
|
case '\x11': case '\x12': case '\x13': case '\x14': case '\x15': case '\x16':
|
|
case '\x17': case '\x18': case '\x19': case '\x1a': case '\x1b': case '\x1c':
|
|
case '\x1d': case '\x1e': case '\x1f': case '\x7f':
|
|
if (!(rc = grn_bulk_write(ctx, buf, "\\u", 2))) {
|
|
if ((rc = grn_text_itoh(ctx, buf, *s, 4))) {
|
|
GRN_BULK_INCR_LEN(buf, -2);
|
|
return rc;
|
|
}
|
|
} else {
|
|
return rc;
|
|
}
|
|
break;
|
|
default :
|
|
GRN_TEXT_PUTC(ctx, buf, *s);
|
|
}
|
|
} else if (l == 3) {
|
|
if (*s == '\xe2' && *(s + 1) == '\x80') {
|
|
switch (*(s + 2)) {
|
|
case '\xa8': /* \u2028 */
|
|
grn_bulk_write(ctx, buf, "\\u2028", 6);
|
|
break;
|
|
case '\xa9': /* \u2029 */
|
|
grn_bulk_write(ctx, buf, "\\u2029", 6);
|
|
break;
|
|
default:
|
|
grn_bulk_write(ctx, buf, s, l);
|
|
}
|
|
} else {
|
|
grn_bulk_write(ctx, buf, s, l);
|
|
}
|
|
} else {
|
|
grn_bulk_write(ctx, buf, s, l);
|
|
}
|
|
}
|
|
GRN_TEXT_PUTC(ctx, buf, '"');
|
|
return rc;
|
|
}
|
|
|
|
grn_rc
|
|
grn_text_escape_xml(grn_ctx *ctx, grn_obj *buf, const char *s, unsigned int len)
|
|
{
|
|
const char *e;
|
|
unsigned int l;
|
|
grn_rc rc = GRN_SUCCESS;
|
|
|
|
for (e = s + len; s < e; s += l) {
|
|
if (!(l = grn_charlen(ctx, s, e))) { break; }
|
|
if (l == 1) {
|
|
switch (*s) {
|
|
case '"' :
|
|
grn_bulk_write(ctx, buf, """, 6);
|
|
break;
|
|
case '<' :
|
|
grn_bulk_write(ctx, buf, "<", 4);
|
|
break;
|
|
case '>' :
|
|
grn_bulk_write(ctx, buf, ">", 4);
|
|
break;
|
|
case '&' :
|
|
grn_bulk_write(ctx, buf, "&", 5);
|
|
break;
|
|
default :
|
|
GRN_TEXT_PUTC(ctx, buf, *s);
|
|
}
|
|
} else {
|
|
grn_bulk_write(ctx, buf, s, l);
|
|
}
|
|
}
|
|
return rc;
|
|
}
|
|
|
|
#define TOK_ESC (0x80)
|
|
|
|
const char *
|
|
grn_text_unesc_tok(grn_ctx *ctx, grn_obj *buf, const char *s, const char *e, char *tok_type)
|
|
{
|
|
const char *p;
|
|
unsigned int len;
|
|
uint8_t stat = GRN_TOK_VOID;
|
|
for (p = s; p < e; p += len) {
|
|
if (!(len = grn_charlen(ctx, p, e))) {
|
|
p = e;
|
|
stat &= ~TOK_ESC;
|
|
goto exit;
|
|
}
|
|
switch (stat) {
|
|
case GRN_TOK_VOID :
|
|
if (*p == ' ') { continue; }
|
|
switch (*p) {
|
|
case '"' :
|
|
stat = GRN_TOK_STRING;
|
|
break;
|
|
case '\'' :
|
|
stat = GRN_TOK_QUOTE;
|
|
break;
|
|
case ')' :
|
|
case '(' :
|
|
GRN_TEXT_PUT(ctx, buf, p, len);
|
|
p += len;
|
|
stat = GRN_TOK_SYMBOL;
|
|
goto exit;
|
|
case '\\' :
|
|
stat = GRN_TOK_SYMBOL|TOK_ESC;
|
|
break;
|
|
default :
|
|
stat = GRN_TOK_SYMBOL;
|
|
GRN_TEXT_PUT(ctx, buf, p, len);
|
|
break;
|
|
}
|
|
break;
|
|
case GRN_TOK_SYMBOL :
|
|
if (*p == ' ') { goto exit; }
|
|
switch (*p) {
|
|
case '\'' :
|
|
case '"' :
|
|
case ')' :
|
|
case '(' :
|
|
goto exit;
|
|
case '\\' :
|
|
stat |= TOK_ESC;
|
|
break;
|
|
default :
|
|
GRN_TEXT_PUT(ctx, buf, p, len);
|
|
break;
|
|
}
|
|
break;
|
|
case GRN_TOK_STRING :
|
|
switch (*p) {
|
|
case '"' :
|
|
p += len;
|
|
goto exit;
|
|
case '\\' :
|
|
stat |= TOK_ESC;
|
|
break;
|
|
default :
|
|
GRN_TEXT_PUT(ctx, buf, p, len);
|
|
break;
|
|
}
|
|
break;
|
|
case GRN_TOK_QUOTE :
|
|
switch (*p) {
|
|
case '\'' :
|
|
p += len;
|
|
goto exit;
|
|
case '\\' :
|
|
stat |= TOK_ESC;
|
|
break;
|
|
default :
|
|
GRN_TEXT_PUT(ctx, buf, p, len);
|
|
break;
|
|
}
|
|
break;
|
|
case GRN_TOK_SYMBOL|TOK_ESC :
|
|
case GRN_TOK_STRING|TOK_ESC :
|
|
case GRN_TOK_QUOTE|TOK_ESC :
|
|
switch (*p) {
|
|
case 'b' :
|
|
GRN_TEXT_PUTC(ctx, buf, '\b');
|
|
break;
|
|
case 'f' :
|
|
GRN_TEXT_PUTC(ctx, buf, '\f');
|
|
break;
|
|
case 'n' :
|
|
GRN_TEXT_PUTC(ctx, buf, '\n');
|
|
break;
|
|
case 'r' :
|
|
GRN_TEXT_PUTC(ctx, buf, '\r');
|
|
break;
|
|
case 't' :
|
|
GRN_TEXT_PUTC(ctx, buf, '\t');
|
|
break;
|
|
default :
|
|
GRN_TEXT_PUT(ctx, buf, p, len);
|
|
break;
|
|
}
|
|
stat &= ~TOK_ESC;
|
|
break;
|
|
}
|
|
}
|
|
exit :
|
|
*tok_type = stat;
|
|
return p;
|
|
}
|
|
|
|
grn_rc
|
|
grn_text_benc(grn_ctx *ctx, grn_obj *buf, unsigned int v)
|
|
{
|
|
grn_rc rc = GRN_SUCCESS;
|
|
uint8_t *p;
|
|
if (GRN_BULK_REST(buf) < 5) {
|
|
if ((rc = grn_bulk_resize(ctx, buf, GRN_BULK_VSIZE(buf) + 5))) { return rc; }
|
|
}
|
|
p = (uint8_t *)GRN_BULK_CURR(buf);
|
|
GRN_B_ENC(v, p);
|
|
GRN_BULK_SET_CURR(buf, (char *)p);
|
|
return rc;
|
|
}
|
|
|
|
/* 0x00 - 0x7f */
|
|
static const int_least8_t urlenc_tbl[] = {
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1
|
|
};
|
|
|
|
grn_rc
|
|
grn_text_urlenc(grn_ctx *ctx, grn_obj *buf, const char *s, unsigned int len)
|
|
{
|
|
const char *e, c = '%';
|
|
for (e = s + len; s < e; s++) {
|
|
if ((signed char)*s < 0 || urlenc_tbl[(int)*s]) {
|
|
if (!grn_bulk_write(ctx, buf, &c, 1)) {
|
|
if (grn_text_itoh(ctx, buf, *s, 2)) {
|
|
GRN_BULK_INCR_LEN(buf, -1);
|
|
}
|
|
}
|
|
} else {
|
|
GRN_TEXT_PUTC(ctx, buf, *s);
|
|
}
|
|
}
|
|
return GRN_SUCCESS;
|
|
}
|
|
|
|
static const char *weekdays[7] = {"Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"};
|
|
static const char *months[12] = {
|
|
"Jan", "Feb", "Mar", "Apr", "May", "Jun",
|
|
"Jul", "Aug", "Sep", "Oct", "Nov", "Dec"};
|
|
|
|
grn_rc
|
|
grn_text_time2rfc1123(grn_ctx *ctx, grn_obj *bulk, int sec)
|
|
{
|
|
time_t tsec;
|
|
struct tm *t;
|
|
#ifdef HAVE__GMTIME64_S
|
|
struct tm tm;
|
|
tsec = (time_t)sec;
|
|
t = (gmtime_s(&tm, &tsec) == 0) ? &tm : NULL;
|
|
#else /* HAVE__GMTIME64_S */
|
|
# ifdef HAVE_GMTIME_R
|
|
struct tm tm;
|
|
tsec = (time_t)sec;
|
|
t = gmtime_r(&tsec, &tm);
|
|
# else /* HAVE_GMTIME_R */
|
|
tsec = (time_t)sec;
|
|
t = gmtime(&tsec);
|
|
# endif /* HAVE_GMTIME_R */
|
|
#endif /* HAVE__GMTIME64_S */
|
|
if (t) {
|
|
GRN_TEXT_SET(ctx, bulk, weekdays[t->tm_wday], 3);
|
|
GRN_TEXT_PUTS(ctx, bulk, ", ");
|
|
grn_text_itoa_padded(ctx, bulk, t->tm_mday, '0', 2);
|
|
GRN_TEXT_PUTS(ctx, bulk, " ");
|
|
GRN_TEXT_PUT(ctx, bulk, months[t->tm_mon], 3);
|
|
GRN_TEXT_PUTS(ctx, bulk, " ");
|
|
grn_text_itoa(ctx, bulk, t->tm_year + 1900);
|
|
GRN_TEXT_PUTS(ctx, bulk, " ");
|
|
grn_text_itoa_padded(ctx, bulk, t->tm_hour, '0', 2);
|
|
GRN_TEXT_PUTS(ctx, bulk, ":");
|
|
grn_text_itoa_padded(ctx, bulk, t->tm_min, '0', 2);
|
|
GRN_TEXT_PUTS(ctx, bulk, ":");
|
|
grn_text_itoa_padded(ctx, bulk, t->tm_sec, '0', 2);
|
|
GRN_TEXT_PUTS(ctx, bulk, " GMT");
|
|
} else {
|
|
GRN_TEXT_SETS(ctx, bulk, "Mon, 16 Mar 1980 20:40:00 GMT");
|
|
}
|
|
return GRN_SUCCESS;
|
|
}
|
|
|
|
grn_rc
|
|
grn_text_printf(grn_ctx *ctx, grn_obj *bulk, const char *format, ...)
|
|
{
|
|
va_list args;
|
|
|
|
va_start(args, format);
|
|
grn_text_vprintf(ctx, bulk, format, args);
|
|
va_end(args);
|
|
|
|
return GRN_SUCCESS;
|
|
}
|
|
|
|
grn_rc
|
|
grn_text_vprintf(grn_ctx *ctx, grn_obj *bulk, const char *format, va_list args)
|
|
{
|
|
grn_bool is_written = GRN_FALSE;
|
|
int written_size;
|
|
|
|
{
|
|
int rest_size;
|
|
va_list copied_args;
|
|
|
|
rest_size = GRN_BULK_REST(bulk);
|
|
va_copy(copied_args, args);
|
|
written_size = vsnprintf(GRN_BULK_CURR(bulk), rest_size,
|
|
format, copied_args);
|
|
va_end(copied_args);
|
|
|
|
if (0 <= written_size && written_size < rest_size) {
|
|
is_written = GRN_TRUE;
|
|
}
|
|
}
|
|
|
|
if (!is_written) {
|
|
#ifdef WIN32
|
|
# define N_NEW_SIZES 3
|
|
int i;
|
|
int new_sizes[N_NEW_SIZES];
|
|
|
|
new_sizes[0] = GRN_BULK_REST(bulk) + strlen(format) * 2;
|
|
new_sizes[1] = new_sizes[0] + 4096;
|
|
new_sizes[2] = new_sizes[0] + 65536;
|
|
|
|
for (i = 0; i < N_NEW_SIZES; i++) {
|
|
grn_rc rc;
|
|
int new_size = new_sizes[i];
|
|
va_list copied_args;
|
|
|
|
rc = grn_bulk_reserve(ctx, bulk, GRN_BULK_VSIZE(bulk) + new_size);
|
|
if (rc) {
|
|
return rc;
|
|
}
|
|
va_copy(copied_args, args);
|
|
written_size = vsnprintf(GRN_BULK_CURR(bulk), new_size,
|
|
format, copied_args);
|
|
va_end(copied_args);
|
|
if (written_size != -1) {
|
|
break;
|
|
}
|
|
}
|
|
# undef N_NEW_SIZES
|
|
#else /* WIN32 */
|
|
grn_rc rc;
|
|
int required_size = written_size + 1; /* "+ 1" for terminate '\0'. */
|
|
|
|
rc = grn_bulk_reserve(ctx, bulk, GRN_BULK_VSIZE(bulk) + required_size);
|
|
if (rc) {
|
|
return rc;
|
|
}
|
|
written_size = vsnprintf(GRN_BULK_CURR(bulk), required_size,
|
|
format, args);
|
|
#endif /* WIN32 */
|
|
}
|
|
|
|
if (written_size < 0) {
|
|
return GRN_INVALID_ARGUMENT;
|
|
}
|
|
|
|
GRN_BULK_INCR_LEN(bulk, written_size);
|
|
return GRN_SUCCESS;
|
|
}
|
|
|
|
grn_rc
|
|
grn_bulk_fin(grn_ctx *ctx, grn_obj *buf)
|
|
{
|
|
if (!(buf->header.impl_flags & GRN_OBJ_REFER)) {
|
|
if (GRN_BULK_OUTP(buf) && buf->u.b.head) {
|
|
GRN_REALLOC(buf->u.b.head - grn_bulk_margin_size, 0);
|
|
}
|
|
}
|
|
buf->header.flags = 0;
|
|
buf->header.impl_flags &= ~GRN_OBJ_DO_SHALLOW_COPY;
|
|
buf->u.b.head = NULL;
|
|
buf->u.b.curr = NULL;
|
|
buf->u.b.tail = NULL;
|
|
return GRN_SUCCESS;
|
|
}
|
|
|
|
grn_rc
|
|
grn_substring(grn_ctx *ctx, char **str, char **str_end, int start, int end, grn_encoding encoding)
|
|
{
|
|
int i;
|
|
size_t l;
|
|
char *s = *str, *e = *str_end;
|
|
for (i = 0; s < e; i++, s += l) {
|
|
if (i == start) { *str = s; }
|
|
if (!(l = grn_charlen(ctx, s, e))) {
|
|
return GRN_INVALID_ARGUMENT;
|
|
}
|
|
if (i == end) {
|
|
*str_end = s;
|
|
break;
|
|
}
|
|
}
|
|
return GRN_SUCCESS;
|
|
}
|
|
|
|
static void
|
|
grn_text_atoj(grn_ctx *ctx, grn_obj *bulk, grn_obj *obj, grn_id id)
|
|
{
|
|
uint32_t vs;
|
|
grn_obj buf;
|
|
if (obj->header.type == GRN_ACCESSOR) {
|
|
grn_accessor *a = (grn_accessor *)obj;
|
|
GRN_TEXT_INIT(&buf, 0);
|
|
for (;;) {
|
|
GRN_BULK_REWIND(&buf);
|
|
switch (a->action) {
|
|
case GRN_ACCESSOR_GET_ID :
|
|
GRN_UINT32_PUT(ctx, &buf, id);
|
|
buf.header.domain = GRN_DB_UINT32;
|
|
break;
|
|
case GRN_ACCESSOR_GET_KEY :
|
|
grn_table_get_key2(ctx, a->obj, id, &buf);
|
|
buf.header.domain = DB_OBJ(a->obj)->header.domain;
|
|
break;
|
|
case GRN_ACCESSOR_GET_VALUE :
|
|
grn_obj_get_value(ctx, a->obj, id, &buf);
|
|
buf.header.domain = GRN_DB_INT32; /* fix me */
|
|
break;
|
|
case GRN_ACCESSOR_GET_SCORE :
|
|
{
|
|
grn_rset_recinfo *ri = (grn_rset_recinfo *)grn_obj_get_value_(ctx, a->obj, id, &vs);
|
|
int32_t int32_score = ri->score;
|
|
GRN_INT32_PUT(ctx, &buf, int32_score);
|
|
}
|
|
buf.header.domain = GRN_DB_INT32;
|
|
break;
|
|
case GRN_ACCESSOR_GET_NSUBRECS :
|
|
{
|
|
grn_rset_recinfo *ri = (grn_rset_recinfo *)grn_obj_get_value_(ctx, a->obj, id, &vs);
|
|
GRN_INT32_PUT(ctx, &buf, ri->n_subrecs);
|
|
}
|
|
buf.header.domain = GRN_DB_INT32;
|
|
break;
|
|
case GRN_ACCESSOR_GET_COLUMN_VALUE :
|
|
if ((a->obj->header.flags & GRN_OBJ_COLUMN_TYPE_MASK) == GRN_OBJ_COLUMN_VECTOR) {
|
|
if (a->next) {
|
|
grn_id *idp;
|
|
grn_obj_get_value(ctx, a->obj, id, &buf);
|
|
idp = (grn_id *)GRN_BULK_HEAD(&buf);
|
|
GRN_TEXT_PUTC(ctx, bulk, '[');
|
|
for (vs = GRN_BULK_VSIZE(&buf) / sizeof(grn_id); vs--; idp++) {
|
|
grn_text_atoj(ctx, bulk, (grn_obj *)a->next, *idp);
|
|
if (vs) { GRN_TEXT_PUTC(ctx, bulk, ','); }
|
|
}
|
|
GRN_TEXT_PUTC(ctx, bulk, ']');
|
|
} else {
|
|
grn_text_atoj(ctx, bulk, a->obj, id);
|
|
}
|
|
goto exit;
|
|
} else {
|
|
grn_obj_get_value(ctx, a->obj, id, &buf);
|
|
}
|
|
break;
|
|
case GRN_ACCESSOR_GET_DB_OBJ :
|
|
/* todo */
|
|
break;
|
|
case GRN_ACCESSOR_LOOKUP :
|
|
/* todo */
|
|
break;
|
|
case GRN_ACCESSOR_FUNCALL :
|
|
/* todo */
|
|
break;
|
|
}
|
|
if (a->next) {
|
|
a = a->next;
|
|
id = *((grn_id *)GRN_BULK_HEAD(&buf));
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
} else {
|
|
switch (obj->header.type) {
|
|
case GRN_COLUMN_FIX_SIZE :
|
|
GRN_VALUE_FIX_SIZE_INIT(&buf, 0, DB_OBJ(obj)->range);
|
|
break;
|
|
case GRN_COLUMN_VAR_SIZE :
|
|
if ((obj->header.flags & GRN_OBJ_COLUMN_TYPE_MASK) == GRN_OBJ_COLUMN_VECTOR) {
|
|
grn_obj *range = grn_ctx_at(ctx, DB_OBJ(obj)->range);
|
|
if (range->header.flags & GRN_OBJ_KEY_VAR_SIZE) {
|
|
GRN_VALUE_VAR_SIZE_INIT(&buf, GRN_OBJ_VECTOR, DB_OBJ(obj)->range);
|
|
} else {
|
|
GRN_VALUE_FIX_SIZE_INIT(&buf, GRN_OBJ_VECTOR, DB_OBJ(obj)->range);
|
|
}
|
|
} else {
|
|
GRN_VALUE_VAR_SIZE_INIT(&buf, 0, DB_OBJ(obj)->range);
|
|
}
|
|
break;
|
|
case GRN_COLUMN_INDEX :
|
|
GRN_UINT32_INIT(&buf, 0);
|
|
break;
|
|
default:
|
|
GRN_TEXT_INIT(&buf, 0);
|
|
break;
|
|
}
|
|
grn_obj_get_value(ctx, obj, id, &buf);
|
|
}
|
|
grn_text_otoj(ctx, bulk, &buf, NULL);
|
|
exit :
|
|
grn_obj_close(ctx, &buf);
|
|
}
|
|
|
|
grn_rc
|
|
grn_text_otoj(grn_ctx *ctx, grn_obj *bulk, grn_obj *obj, grn_obj_format *format)
|
|
{
|
|
grn_obj buf;
|
|
GRN_TEXT_INIT(&buf, 0);
|
|
switch (obj->header.type) {
|
|
case GRN_BULK :
|
|
switch (obj->header.domain) {
|
|
case GRN_DB_VOID :
|
|
case GRN_DB_SHORT_TEXT :
|
|
case GRN_DB_TEXT :
|
|
case GRN_DB_LONG_TEXT :
|
|
grn_text_esc(ctx, bulk, GRN_BULK_HEAD(obj), GRN_BULK_VSIZE(obj));
|
|
break;
|
|
case GRN_DB_BOOL :
|
|
if (*((unsigned char *)GRN_BULK_HEAD(obj))) {
|
|
GRN_TEXT_PUTS(ctx, bulk, "true");
|
|
} else {
|
|
GRN_TEXT_PUTS(ctx, bulk, "false");
|
|
}
|
|
break;
|
|
case GRN_DB_INT8 :
|
|
grn_text_itoa(ctx, bulk, GRN_BULK_VSIZE(obj) ? GRN_INT8_VALUE(obj) : 0);
|
|
break;
|
|
case GRN_DB_UINT8 :
|
|
grn_text_lltoa(ctx, bulk, GRN_BULK_VSIZE(obj) ? GRN_UINT8_VALUE(obj) : 0);
|
|
break;
|
|
case GRN_DB_INT16 :
|
|
grn_text_itoa(ctx, bulk, GRN_BULK_VSIZE(obj) ? GRN_INT16_VALUE(obj) : 0);
|
|
break;
|
|
case GRN_DB_UINT16 :
|
|
grn_text_lltoa(ctx, bulk, GRN_BULK_VSIZE(obj) ? GRN_UINT16_VALUE(obj) : 0);
|
|
break;
|
|
case GRN_DB_INT32 :
|
|
grn_text_itoa(ctx, bulk, GRN_BULK_VSIZE(obj) ? GRN_INT32_VALUE(obj) : 0);
|
|
break;
|
|
case GRN_DB_UINT32 :
|
|
grn_text_lltoa(ctx, bulk, GRN_BULK_VSIZE(obj) ? GRN_UINT32_VALUE(obj) : 0);
|
|
break;
|
|
case GRN_DB_INT64 :
|
|
grn_text_lltoa(ctx, bulk, GRN_BULK_VSIZE(obj) ? GRN_INT64_VALUE(obj) : 0);
|
|
break;
|
|
case GRN_DB_UINT64 :
|
|
grn_text_ulltoa(ctx, bulk, GRN_BULK_VSIZE(obj) ? GRN_UINT64_VALUE(obj) : 0);
|
|
break;
|
|
case GRN_DB_FLOAT :
|
|
grn_text_ftoa(ctx, bulk, GRN_BULK_VSIZE(obj) ? GRN_FLOAT_VALUE(obj) : 0);
|
|
break;
|
|
case GRN_DB_TIME :
|
|
{
|
|
double dv = *((int64_t *)GRN_BULK_HEAD(obj));
|
|
dv /= 1000000.0;
|
|
grn_text_ftoa(ctx, bulk, dv);
|
|
}
|
|
break;
|
|
case GRN_DB_TOKYO_GEO_POINT :
|
|
case GRN_DB_WGS84_GEO_POINT :
|
|
if (GRN_BULK_VSIZE(obj) == sizeof(grn_geo_point)) {
|
|
grn_geo_point *gp = (grn_geo_point *)GRN_BULK_HEAD(obj);
|
|
GRN_TEXT_PUTC(ctx, bulk, '"');
|
|
grn_text_itoa(ctx, bulk, gp->latitude);
|
|
GRN_TEXT_PUTC(ctx, bulk, 'x');
|
|
grn_text_itoa(ctx, bulk, gp->longitude);
|
|
GRN_TEXT_PUTC(ctx, bulk, '"');
|
|
} else {
|
|
GRN_TEXT_PUTS(ctx, bulk, "\"\"");
|
|
}
|
|
break;
|
|
default :
|
|
if (format) {
|
|
int j;
|
|
int ncolumns = GRN_BULK_VSIZE(&format->columns)/sizeof(grn_obj *);
|
|
grn_id id = GRN_RECORD_VALUE(obj);
|
|
grn_obj **columns = (grn_obj **)GRN_BULK_HEAD(&format->columns);
|
|
if (format->flags & GRN_OBJ_FORMAT_WITH_COLUMN_NAMES) {
|
|
GRN_TEXT_PUTS(ctx, bulk, "[");
|
|
for (j = 0; j < ncolumns; j++) {
|
|
grn_id range_id;
|
|
if (j) { GRN_TEXT_PUTC(ctx, bulk, ','); }
|
|
GRN_TEXT_PUTS(ctx, bulk, "[");
|
|
GRN_BULK_REWIND(&buf);
|
|
grn_column_name_(ctx, columns[j], &buf);
|
|
grn_text_otoj(ctx, bulk, &buf, NULL);
|
|
GRN_TEXT_PUTC(ctx, bulk, ',');
|
|
/* column range */
|
|
range_id = grn_obj_get_range(ctx, columns[j]);
|
|
if (range_id == GRN_ID_NIL) {
|
|
GRN_TEXT_PUTS(ctx, bulk, "null");
|
|
} else {
|
|
int name_len;
|
|
grn_obj *range_obj;
|
|
char name_buf[GRN_TABLE_MAX_KEY_SIZE];
|
|
|
|
range_obj = grn_ctx_at(ctx, range_id);
|
|
name_len = grn_obj_name(ctx, range_obj, name_buf,
|
|
GRN_TABLE_MAX_KEY_SIZE);
|
|
GRN_BULK_REWIND(&buf);
|
|
GRN_TEXT_PUT(ctx, &buf, name_buf, name_len);
|
|
grn_text_otoj(ctx, bulk, &buf, NULL);
|
|
}
|
|
GRN_TEXT_PUTS(ctx, bulk, "]");
|
|
}
|
|
GRN_TEXT_PUTS(ctx, bulk, "],");
|
|
}
|
|
GRN_TEXT_PUTC(ctx, bulk, '[');
|
|
for (j = 0; j < ncolumns; j++) {
|
|
if (j) { GRN_TEXT_PUTC(ctx, bulk, ','); }
|
|
grn_text_atoj(ctx, bulk, columns[j], id);
|
|
}
|
|
GRN_TEXT_PUTC(ctx, bulk, ']');
|
|
} else {
|
|
if (GRN_BULK_VSIZE(obj) == 0) {
|
|
GRN_TEXT_PUTS(ctx, bulk, "null");
|
|
} else {
|
|
grn_obj *table = grn_ctx_at(ctx, obj->header.domain);
|
|
grn_id id = GRN_RECORD_VALUE(obj);
|
|
if (table && table->header.type != GRN_TABLE_NO_KEY) {
|
|
/* todo : temporal patch. grn_table_at() is kinda costful... */
|
|
if (grn_table_at(ctx, table, id)) {
|
|
grn_obj *accessor = grn_obj_column(ctx, table,
|
|
GRN_COLUMN_NAME_KEY,
|
|
GRN_COLUMN_NAME_KEY_LEN);
|
|
if (accessor) {
|
|
grn_obj_get_value(ctx, accessor, id, &buf);
|
|
grn_obj_unlink(ctx, accessor);
|
|
}
|
|
}
|
|
grn_text_otoj(ctx, bulk, &buf, format);
|
|
} else {
|
|
grn_text_lltoa(ctx, bulk, id);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
break;
|
|
case GRN_UVECTOR :
|
|
if (format) {
|
|
if (format->flags & GRN_OBJ_FORMAT_WITH_WEIGHT) {
|
|
int i, n;
|
|
grn_obj *domain;
|
|
|
|
n = grn_uvector_size(ctx, obj);
|
|
domain = grn_ctx_at(ctx, obj->header.domain);
|
|
GRN_TEXT_PUTS(ctx, bulk, "{");
|
|
for (i = 0; i < n; i++) {
|
|
grn_id id;
|
|
unsigned int weight;
|
|
|
|
if (i > 0) {
|
|
GRN_TEXT_PUTC(ctx, bulk, ',');
|
|
}
|
|
id = grn_uvector_get_element(ctx, obj, i, &weight);
|
|
if (domain) {
|
|
if (domain->header.type == GRN_TABLE_NO_KEY) {
|
|
GRN_TEXT_PUTC(ctx, bulk, '"');
|
|
grn_text_ulltoa(ctx, bulk, id);
|
|
GRN_TEXT_PUTC(ctx, bulk, '"');
|
|
} else {
|
|
GRN_BULK_REWIND(&buf);
|
|
grn_table_get_key2(ctx, domain, id, &buf);
|
|
grn_text_otoj(ctx, bulk, &buf, NULL);
|
|
}
|
|
} else {
|
|
GRN_TEXT_PUTC(ctx, bulk, '"');
|
|
grn_text_ulltoa(ctx, bulk, id);
|
|
GRN_TEXT_PUTC(ctx, bulk, '"');
|
|
}
|
|
GRN_TEXT_PUTC(ctx, bulk, ':');
|
|
grn_text_ulltoa(ctx, bulk, weight);
|
|
}
|
|
GRN_TEXT_PUTS(ctx, bulk, "}");
|
|
} else {
|
|
/* TODO: Does we still need this code? If we don't need this, we should
|
|
remove this. */
|
|
int i, j;
|
|
grn_id *v = (grn_id *)GRN_BULK_HEAD(obj), *ve = (grn_id *)GRN_BULK_CURR(obj);
|
|
int ncolumns = GRN_BULK_VSIZE(&format->columns) / sizeof(grn_obj *);
|
|
grn_obj **columns = (grn_obj **)GRN_BULK_HEAD(&format->columns);
|
|
GRN_TEXT_PUTS(ctx, bulk, "[[");
|
|
grn_text_itoa(ctx, bulk, ve - v);
|
|
GRN_TEXT_PUTC(ctx, bulk, ']');
|
|
if (v < ve) {
|
|
if (format->flags & GRN_OBJ_FORMAT_WITH_COLUMN_NAMES) {
|
|
GRN_TEXT_PUTS(ctx, bulk, ",[");
|
|
for (j = 0; j < ncolumns; j++) {
|
|
grn_id range_id;
|
|
if (j) { GRN_TEXT_PUTC(ctx, bulk, ','); }
|
|
GRN_TEXT_PUTS(ctx, bulk, "[");
|
|
GRN_BULK_REWIND(&buf);
|
|
grn_column_name_(ctx, columns[j], &buf);
|
|
grn_text_otoj(ctx, bulk, &buf, NULL);
|
|
GRN_TEXT_PUTC(ctx, bulk, ',');
|
|
/* column range */
|
|
range_id = grn_obj_get_range(ctx, columns[j]);
|
|
if (range_id == GRN_ID_NIL) {
|
|
GRN_TEXT_PUTS(ctx, bulk, "null");
|
|
} else {
|
|
int name_len;
|
|
grn_obj *range_obj;
|
|
char name_buf[GRN_TABLE_MAX_KEY_SIZE];
|
|
|
|
range_obj = grn_ctx_at(ctx, range_id);
|
|
name_len = grn_obj_name(ctx, range_obj, name_buf,
|
|
GRN_TABLE_MAX_KEY_SIZE);
|
|
GRN_BULK_REWIND(&buf);
|
|
GRN_TEXT_PUT(ctx, &buf, name_buf, name_len);
|
|
grn_text_otoj(ctx, bulk, &buf, NULL);
|
|
}
|
|
GRN_TEXT_PUTS(ctx, bulk, "]");
|
|
}
|
|
GRN_TEXT_PUTC(ctx, bulk, ']');
|
|
}
|
|
for (i = 0;; i++) {
|
|
GRN_TEXT_PUTS(ctx, bulk, ",[");
|
|
for (j = 0; j < ncolumns; j++) {
|
|
if (j) { GRN_TEXT_PUTC(ctx, bulk, ','); }
|
|
GRN_BULK_REWIND(&buf);
|
|
grn_obj_get_value(ctx, columns[j], *v, &buf);
|
|
grn_text_otoj(ctx, bulk, &buf, NULL);
|
|
}
|
|
GRN_TEXT_PUTC(ctx, bulk, ']');
|
|
v++;
|
|
if (v < ve) {
|
|
GRN_TEXT_PUTC(ctx, bulk, ',');
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
GRN_TEXT_PUTC(ctx, bulk, ']');
|
|
}
|
|
} else {
|
|
grn_obj *range = grn_ctx_at(ctx, obj->header.domain);
|
|
if (range && range->header.type == GRN_TYPE) {
|
|
grn_id value_size = ((struct _grn_type *)range)->obj.range;
|
|
char *v = (char *)GRN_BULK_HEAD(obj),
|
|
*ve = (char *)GRN_BULK_CURR(obj);
|
|
GRN_TEXT_PUTC(ctx, bulk, '[');
|
|
if (v < ve) {
|
|
for (;;) {
|
|
grn_obj value;
|
|
GRN_OBJ_INIT(&value, GRN_BULK, 0, obj->header.domain);
|
|
grn_bulk_write_from(ctx, &value, v, 0, value_size);
|
|
grn_text_otoj(ctx, bulk, &value, NULL);
|
|
|
|
v += value_size;
|
|
if (v < ve) {
|
|
GRN_TEXT_PUTC(ctx, bulk, ',');
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
GRN_TEXT_PUTC(ctx, bulk, ']');
|
|
} else {
|
|
grn_id *v = (grn_id *)GRN_BULK_HEAD(obj),
|
|
*ve = (grn_id *)GRN_BULK_CURR(obj);
|
|
GRN_TEXT_PUTC(ctx, bulk, '[');
|
|
if (v < ve) {
|
|
for (;;) {
|
|
if (range->header.type != GRN_TABLE_NO_KEY) {
|
|
grn_obj key;
|
|
GRN_OBJ_INIT(&key, GRN_BULK, 0, range->header.domain);
|
|
grn_table_get_key2(ctx, range, *v, &key);
|
|
grn_text_otoj(ctx, bulk, &key, NULL);
|
|
GRN_OBJ_FIN(ctx, &key);
|
|
} else {
|
|
grn_text_lltoa(ctx, bulk, *v);
|
|
}
|
|
v++;
|
|
if (v < ve) {
|
|
GRN_TEXT_PUTC(ctx, bulk, ',');
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
GRN_TEXT_PUTC(ctx, bulk, ']');
|
|
}
|
|
}
|
|
break;
|
|
case GRN_VECTOR :
|
|
if (obj->header.domain == GRN_DB_VOID) {
|
|
ERR(GRN_INVALID_ARGUMENT, "invalid obj->header.domain");
|
|
} else {
|
|
unsigned int i, n;
|
|
grn_obj value;
|
|
grn_obj weight;
|
|
grn_bool with_weight;
|
|
|
|
GRN_VOID_INIT(&value);
|
|
GRN_UINT32_INIT(&weight, 0);
|
|
with_weight = (format && format->flags & GRN_OBJ_FORMAT_WITH_WEIGHT);
|
|
n = grn_vector_size(ctx, obj);
|
|
if (with_weight) {
|
|
GRN_TEXT_PUTC(ctx, bulk, '{');
|
|
} else {
|
|
GRN_TEXT_PUTC(ctx, bulk, '[');
|
|
}
|
|
for (i = 0; i < n; i++) {
|
|
const char *_value;
|
|
unsigned int _weight, length;
|
|
grn_id domain;
|
|
if (i) { GRN_TEXT_PUTC(ctx, bulk, ','); }
|
|
|
|
length = grn_vector_get_element(ctx, obj, i,
|
|
&_value, &_weight, &domain);
|
|
if (domain != GRN_DB_VOID) {
|
|
grn_obj_reinit(ctx, &value, domain, 0);
|
|
} else {
|
|
grn_obj_reinit(ctx, &value, obj->header.domain, 0);
|
|
}
|
|
grn_bulk_write(ctx, &value, _value, length);
|
|
grn_text_otoj(ctx, bulk, &value, NULL);
|
|
if (with_weight) {
|
|
GRN_TEXT_PUTC(ctx, bulk, ':');
|
|
GRN_UINT32_SET(ctx, &weight, _weight);
|
|
grn_text_otoj(ctx, bulk, &weight, NULL);
|
|
}
|
|
}
|
|
if (with_weight) {
|
|
GRN_TEXT_PUTC(ctx, bulk, '}');
|
|
} else {
|
|
GRN_TEXT_PUTC(ctx, bulk, ']');
|
|
}
|
|
GRN_OBJ_FIN(ctx, &value);
|
|
GRN_OBJ_FIN(ctx, &weight);
|
|
}
|
|
break;
|
|
case GRN_PVECTOR :
|
|
if (format) {
|
|
ERR(GRN_FUNCTION_NOT_IMPLEMENTED,
|
|
"cannot print GRN_PVECTOR using grn_obj_format");
|
|
} else {
|
|
unsigned int i, n;
|
|
GRN_TEXT_PUTC(ctx, bulk, '[');
|
|
n = GRN_BULK_VSIZE(obj) / sizeof(grn_obj *);
|
|
for (i = 0; i < n; i++) {
|
|
grn_obj *value;
|
|
|
|
if (i) { GRN_TEXT_PUTC(ctx, bulk, ','); }
|
|
value = GRN_PTR_VALUE_AT(obj, i);
|
|
grn_text_otoj(ctx, bulk, value, NULL);
|
|
}
|
|
GRN_TEXT_PUTC(ctx, bulk, ']');
|
|
}
|
|
break;
|
|
case GRN_TABLE_HASH_KEY :
|
|
case GRN_TABLE_PAT_KEY :
|
|
case GRN_TABLE_NO_KEY :
|
|
if (format) {
|
|
int i, j;
|
|
int ncolumns = GRN_BULK_VSIZE(&format->columns)/sizeof(grn_obj *);
|
|
grn_obj **columns = (grn_obj **)GRN_BULK_HEAD(&format->columns);
|
|
grn_table_cursor *tc = grn_table_cursor_open(ctx, obj, NULL, 0, NULL, 0,
|
|
format->offset, format->limit,
|
|
GRN_CURSOR_ASCENDING);
|
|
if (!tc) { ERRCLR(ctx); }
|
|
GRN_TEXT_PUTS(ctx, bulk, "[[");
|
|
grn_text_itoa(ctx, bulk, format->nhits);
|
|
GRN_TEXT_PUTC(ctx, bulk, ']');
|
|
if (format->flags & GRN_OBJ_FORMAT_WITH_COLUMN_NAMES) {
|
|
GRN_TEXT_PUTS(ctx, bulk, ",[");
|
|
for (j = 0; j < ncolumns; j++) {
|
|
grn_id range_id;
|
|
if (j) { GRN_TEXT_PUTC(ctx, bulk, ','); }
|
|
GRN_TEXT_PUTS(ctx, bulk, "[");
|
|
GRN_BULK_REWIND(&buf);
|
|
grn_column_name_(ctx, columns[j], &buf);
|
|
grn_text_otoj(ctx, bulk, &buf, NULL);
|
|
GRN_TEXT_PUTC(ctx, bulk, ',');
|
|
/* column range */
|
|
range_id = grn_obj_get_range(ctx, columns[j]);
|
|
if (range_id == GRN_ID_NIL) {
|
|
GRN_TEXT_PUTS(ctx, bulk, "null");
|
|
} else {
|
|
int name_len;
|
|
grn_obj *range_obj;
|
|
char name_buf[GRN_TABLE_MAX_KEY_SIZE];
|
|
|
|
range_obj = grn_ctx_at(ctx, range_id);
|
|
name_len = grn_obj_name(ctx, range_obj, name_buf,
|
|
GRN_TABLE_MAX_KEY_SIZE);
|
|
GRN_BULK_REWIND(&buf);
|
|
GRN_TEXT_PUT(ctx, &buf, name_buf, name_len);
|
|
grn_text_otoj(ctx, bulk, &buf, NULL);
|
|
}
|
|
GRN_TEXT_PUTS(ctx, bulk, "]");
|
|
}
|
|
GRN_TEXT_PUTC(ctx, bulk, ']');
|
|
}
|
|
if (tc) {
|
|
grn_id id;
|
|
for (i = 0; (id = grn_table_cursor_next(ctx, tc)) != GRN_ID_NIL; i++) {
|
|
GRN_TEXT_PUTS(ctx, bulk, ",[");
|
|
for (j = 0; j < ncolumns; j++) {
|
|
if (j) { GRN_TEXT_PUTC(ctx, bulk, ','); }
|
|
grn_text_atoj(ctx, bulk, columns[j], id);
|
|
}
|
|
GRN_TEXT_PUTC(ctx, bulk, ']');
|
|
}
|
|
grn_table_cursor_close(ctx, tc);
|
|
}
|
|
GRN_TEXT_PUTC(ctx, bulk, ']');
|
|
} else {
|
|
int i;
|
|
grn_id id;
|
|
grn_obj *column = grn_obj_column(ctx, obj,
|
|
GRN_COLUMN_NAME_KEY,
|
|
GRN_COLUMN_NAME_KEY_LEN);
|
|
grn_table_cursor *tc = grn_table_cursor_open(ctx, obj, NULL, 0, NULL, 0,
|
|
0, -1, GRN_CURSOR_ASCENDING);
|
|
GRN_TEXT_PUTC(ctx, bulk, '[');
|
|
if (tc) {
|
|
for (i = 0; (id = grn_table_cursor_next(ctx, tc)) != GRN_ID_NIL; i++) {
|
|
if (i) { GRN_TEXT_PUTC(ctx, bulk, ','); }
|
|
GRN_BULK_REWIND(&buf);
|
|
grn_obj_get_value(ctx, column, id, &buf);
|
|
grn_text_esc(ctx, bulk, GRN_BULK_HEAD(&buf), GRN_BULK_VSIZE(&buf));
|
|
}
|
|
grn_table_cursor_close(ctx, tc);
|
|
}
|
|
GRN_TEXT_PUTC(ctx, bulk, ']');
|
|
grn_obj_unlink(ctx, column);
|
|
}
|
|
break;
|
|
}
|
|
grn_obj_close(ctx, &buf);
|
|
return GRN_SUCCESS;
|
|
}
|
|
|
|
const char *
|
|
grn_text_urldec(grn_ctx *ctx, grn_obj *buf, const char *p, const char *e, char d)
|
|
{
|
|
while (p < e) {
|
|
if (*p == d) {
|
|
p++; break;
|
|
} else if (*p == '%' && p + 3 <= e) {
|
|
const char *r;
|
|
unsigned int c = grn_htoui(p + 1, p + 3, &r);
|
|
if (p + 3 == r) {
|
|
GRN_TEXT_PUTC(ctx, buf, c);
|
|
p += 3;
|
|
} else {
|
|
GRN_LOG(ctx, GRN_LOG_NOTICE, "invalid %% sequence (%c%c)", p[1], p[2]);
|
|
GRN_TEXT_PUTC(ctx, buf, '%');
|
|
p += 1;
|
|
}
|
|
} else {
|
|
GRN_TEXT_PUTC(ctx, buf, *p);
|
|
p++;
|
|
}
|
|
}
|
|
return p;
|
|
}
|
|
|
|
const char *
|
|
grn_text_cgidec(grn_ctx *ctx, grn_obj *buf, const char *p, const char *e,
|
|
const char *delimiters)
|
|
{
|
|
while (p < e) {
|
|
grn_bool found_delimiter = GRN_FALSE;
|
|
const char *delimiter;
|
|
for (delimiter = delimiters; *delimiter; delimiter++) {
|
|
if (*p == *delimiter) {
|
|
found_delimiter = GRN_TRUE;
|
|
break;
|
|
}
|
|
}
|
|
if (found_delimiter) {
|
|
p++;
|
|
break;
|
|
}
|
|
|
|
if (*p == '+') {
|
|
GRN_TEXT_PUTC(ctx, buf, ' ');
|
|
p++;
|
|
} else if (*p == '%' && p + 3 <= e) {
|
|
const char *r;
|
|
unsigned int c = grn_htoui(p + 1, p + 3, &r);
|
|
if (p + 3 == r) {
|
|
GRN_TEXT_PUTC(ctx, buf, c);
|
|
p += 3;
|
|
} else {
|
|
GRN_LOG(ctx, GRN_LOG_NOTICE, "invalid %% sequence (%c%c)", p[1], p[2]);
|
|
GRN_TEXT_PUTC(ctx, buf, '%');
|
|
p += 1;
|
|
}
|
|
} else {
|
|
GRN_TEXT_PUTC(ctx, buf, *p);
|
|
p++;
|
|
}
|
|
}
|
|
return p;
|
|
}
|
|
|
|
void
|
|
grn_str_url_path_normalize(grn_ctx *ctx, const char *path, size_t path_len,
|
|
char *buf, size_t buf_len)
|
|
{
|
|
char *b = buf, *be = buf + buf_len - 1;
|
|
const char *p = path, *pe = path + path_len, *pc;
|
|
|
|
if (buf_len < 2) { return; }
|
|
|
|
while (p < pe) {
|
|
for (pc = p; pc < pe && *pc != '/'; pc++) {}
|
|
if (*p == '.') {
|
|
if (pc == p + 2 && *(p + 1) == '.') {
|
|
/* '..' */
|
|
if (b - buf >= 2) {
|
|
for (b -= 2; *b != '/' && b >= buf; b--) {}
|
|
}
|
|
if (*b == '/') {
|
|
b++;
|
|
ERR(GRN_INVALID_ARGUMENT, "parent path doesn't exist.");
|
|
}
|
|
p = pc + 1;
|
|
continue;
|
|
} else if (pc == p + 1) {
|
|
/* '.' */
|
|
p = pc + 1;
|
|
continue;
|
|
}
|
|
}
|
|
if (be - b >= pc - p) {
|
|
grn_memcpy(b, p, (pc - p));
|
|
b += pc - p;
|
|
p = pc;
|
|
if (p < pe && *pc == '/' && be > b) {
|
|
*b++ = '/';
|
|
p++;
|
|
}
|
|
}
|
|
}
|
|
*b = '\0';
|
|
}
|
|
|
|
grn_bool
|
|
grn_bulk_is_zero(grn_ctx *ctx, grn_obj *obj)
|
|
{
|
|
const char *v = GRN_BULK_HEAD(obj);
|
|
unsigned int s = GRN_BULK_VSIZE(obj);
|
|
for (; s; s--, v++) {
|
|
if (*v) { return GRN_FALSE; }
|
|
}
|
|
return GRN_TRUE;
|
|
}
|
|
|