mariadb/storage/mroonga/vendor/groonga/lib/str.c

3252 lines
87 KiB
C

/* -*- c-basic-offset: 2 -*- */
/* Copyright(C) 2009-2015 Brazil
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License version 2.1 as published by the Free Software Foundation.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "grn.h"
#include <limits.h>
#include <stdio.h>
#include <stdarg.h>
#include <string.h>
#include "grn_db.h"
#include "grn_str.h"
#ifndef _ISOC99_SOURCE
#define _ISOC99_SOURCE
#endif /* _ISOC99_SOURCE */
#include <math.h>
#if defined(HAVE__GMTIME64_S) && defined(__GNUC__)
# ifdef _WIN64
# define gmtime_s(tm, time) _gmtime64_s(tm, time)
# else /* _WIN64 */
# define gmtime_s(tm, time) _gmtime32_s(tm, time)
# endif /* _WIN64 */
#endif /* defined(HAVE__GMTIME64_S) && defined(__GNUC__) */
/* For Visual C++ 2010. Drop the code when we drop Visual C++ 2010 support. */
#if defined(_MSC_VER) && _MSC_VER < 1800
# define va_copy(destination, source) destination = source
#endif
inline static int
grn_str_charlen_utf8(grn_ctx *ctx, const unsigned char *str, const unsigned char *end)
{
/* MEMO: This function allows non-null-terminated string as str. */
/* But requires the end of string. */
if (end <= str || !*str) {
return 0;
}
if (*str & 0x80) {
int i;
int len;
GRN_BIT_SCAN_REV(~(*str << 24), len);
len = 31 - len;
if ((unsigned int)(len - 2) >= 3) { /* (len == 1 || len >= 5) */
GRN_LOG(ctx, GRN_LOG_WARNING,
"grn_str_charlen_utf8(): first byte is invalid");
return 0;
}
if (str + len > end) {
GRN_LOG(ctx, GRN_LOG_WARNING,
"grn_str_charlen_utf8(): incomplete character");
return 0;
}
for (i = 1; i < len; ++i) {
if ((str[i] & 0xc0) != 0x80) {
GRN_LOG(ctx, GRN_LOG_WARNING,
"grn_str_charlen_utf8(): <%d>th byte is invalid",
i + 1);
return 0;
}
}
return len;
} else {
return 1;
}
}
unsigned int
grn_str_charlen(grn_ctx *ctx, const char *str, grn_encoding encoding)
{
/* MEMO: This function requires null-terminated string as str.*/
unsigned char *p = (unsigned char *) str;
if (!*p) { return 0; }
switch (encoding) {
case GRN_ENC_EUC_JP :
if (*p & 0x80) {
if (*(p + 1)) {
return 2;
} else {
/* This is invalid character */
GRN_LOG(ctx, GRN_LOG_WARNING, "invalid euc-jp string end on grn_str_charlen");
return 0;
}
}
return 1;
break;
case GRN_ENC_UTF8 :
if (*p & 0x80) {
int b, w;
size_t size;
for (b = 0x40, w = 0; b && (*p & b); b >>= 1, w++);
if (!w) {
GRN_LOG(ctx, GRN_LOG_WARNING, "invalid utf8 string(1) on grn_str_charlen");
return 0;
}
for (size = 1; w--; size++) {
if (!*++p || (*p & 0xc0) != 0x80) {
GRN_LOG(ctx, GRN_LOG_WARNING, "invalid utf8 string(2) on grn_str_charlen");
return 0;
}
}
return size;
} else {
return 1;
}
break;
case GRN_ENC_SJIS :
if (*p & 0x80) {
/* we regard 0xa0 as JIS X 0201 KANA. adjusted to other tools. */
if (0xa0 <= *p && *p <= 0xdf) {
/* hankaku-kana */
return 1;
} else if (!(*(p + 1))) {
/* This is invalid character */
GRN_LOG(ctx, GRN_LOG_WARNING, "invalid sjis string end on grn_str_charlen");
return 0;
} else {
return 2;
}
} else {
return 1;
}
break;
default :
return 1;
break;
}
return 0;
}
int
grn_charlen_(grn_ctx *ctx, const char *str, const char *end, grn_encoding encoding)
{
/* MEMO: This function allows non-null-terminated string as str. */
/* But requires the end of string. */
unsigned char *p = (unsigned char *) str;
if (p >= (unsigned char *)end) { return 0; }
switch (encoding) {
case GRN_ENC_EUC_JP :
if (*p & 0x80) {
if ((p + 1) < (unsigned char *)end) {
return 2;
} else {
/* This is invalid character */
GRN_LOG(ctx, GRN_LOG_WARNING, "invalid euc-jp string end on grn_charlen");
return 0;
}
}
return 1;
break;
case GRN_ENC_UTF8 :
return grn_str_charlen_utf8(ctx, p, (unsigned char *)end);
break;
case GRN_ENC_SJIS :
if (*p & 0x80) {
/* we regard 0xa0 as JIS X 0201 KANA. adjusted to other tools. */
if (0xa0 <= *p && *p <= 0xdf) {
/* hankaku-kana */
return 1;
} else if (++p >= (unsigned char *)end) {
/* This is invalid character */
GRN_LOG(ctx, GRN_LOG_WARNING, "invalid sjis string end on grn_charlen");
return 0;
} else {
return 2;
}
} else {
return 1;
}
break;
default :
return 1;
break;
}
return 0;
}
int
grn_charlen(grn_ctx *ctx, const char *str, const char *end)
{
return grn_charlen_(ctx, str, end, ctx->encoding);
}
static unsigned char symbol[] = {
',', '.', 0, ':', ';', '?', '!', 0, 0, 0, '`', 0, '^', '~', '_', 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, '-', '-', '/', '\\', 0, 0, '|', 0, 0, 0, '\'', 0,
'"', '(', ')', 0, 0, '[', ']', '{', '}', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
'+', '-', 0, 0, 0, '=', 0, '<', '>', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
'$', 0, 0, '%', '#', '&', '*', '@', 0, 0, 0, 0, 0, 0, 0, 0
};
inline static grn_rc
normalize_euc(grn_ctx *ctx, grn_str *nstr)
{
static uint16_t hankana[] = {
0xa1a1, 0xa1a3, 0xa1d6, 0xa1d7, 0xa1a2, 0xa1a6, 0xa5f2, 0xa5a1, 0xa5a3,
0xa5a5, 0xa5a7, 0xa5a9, 0xa5e3, 0xa5e5, 0xa5e7, 0xa5c3, 0xa1bc, 0xa5a2,
0xa5a4, 0xa5a6, 0xa5a8, 0xa5aa, 0xa5ab, 0xa5ad, 0xa5af, 0xa5b1, 0xa5b3,
0xa5b5, 0xa5b7, 0xa5b9, 0xa5bb, 0xa5bd, 0xa5bf, 0xa5c1, 0xa5c4, 0xa5c6,
0xa5c8, 0xa5ca, 0xa5cb, 0xa5cc, 0xa5cd, 0xa5ce, 0xa5cf, 0xa5d2, 0xa5d5,
0xa5d8, 0xa5db, 0xa5de, 0xa5df, 0xa5e0, 0xa5e1, 0xa5e2, 0xa5e4, 0xa5e6,
0xa5e8, 0xa5e9, 0xa5ea, 0xa5eb, 0xa5ec, 0xa5ed, 0xa5ef, 0xa5f3, 0xa1ab,
0xa1eb
};
static unsigned char dakuten[] = {
0xf4, 0, 0, 0, 0, 0xac, 0, 0xae, 0, 0xb0, 0, 0xb2, 0, 0xb4, 0, 0xb6, 0,
0xb8, 0, 0xba, 0, 0xbc, 0, 0xbe, 0, 0xc0, 0, 0xc2, 0, 0, 0xc5, 0, 0xc7,
0, 0xc9, 0, 0, 0, 0, 0, 0, 0xd0, 0, 0, 0xd3, 0, 0, 0xd6, 0, 0, 0xd9, 0,
0, 0xdc
};
static unsigned char handaku[] = {
0xd1, 0, 0, 0xd4, 0, 0, 0xd7, 0, 0, 0xda, 0, 0, 0xdd
};
int16_t *ch;
const unsigned char *s, *s_, *e;
unsigned char *d, *d0, *d_, b;
uint_least8_t *cp, *ctypes, ctype;
size_t size = nstr->orig_blen, length = 0;
int removeblankp = nstr->flags & GRN_STR_REMOVEBLANK;
if (!(nstr->norm = GRN_MALLOC(size * 2 + 1))) {
return GRN_NO_MEMORY_AVAILABLE;
}
d0 = (unsigned char *) nstr->norm;
if (nstr->flags & GRN_STR_WITH_CHECKS) {
if (!(nstr->checks = GRN_MALLOC(size * 2 * sizeof(int16_t) + 1))) {
GRN_FREE(nstr->norm);
nstr->norm = NULL;
return GRN_NO_MEMORY_AVAILABLE;
}
}
ch = nstr->checks;
if (nstr->flags & GRN_STR_WITH_CTYPES) {
if (!(nstr->ctypes = GRN_MALLOC(size + 1))) {
GRN_FREE(nstr->checks);
GRN_FREE(nstr->norm);
nstr->checks = NULL;
nstr->norm = NULL;
return GRN_NO_MEMORY_AVAILABLE;
}
}
cp = ctypes = nstr->ctypes;
e = (unsigned char *)nstr->orig + size;
for (s = s_ = (unsigned char *) nstr->orig, d = d_ = d0; s < e; s++) {
if ((*s & 0x80)) {
if (((s + 1) < e) && (*(s + 1) & 0x80)) {
unsigned char c1 = *s++, c2 = *s, c3 = 0;
switch (c1 >> 4) {
case 0x08 :
if (c1 == 0x8e && 0xa0 <= c2 && c2 <= 0xdf) {
uint16_t c = hankana[c2 - 0xa0];
switch (c) {
case 0xa1ab :
if (d > d0 + 1 && d[-2] == 0xa5
&& 0xa6 <= d[-1] && d[-1] <= 0xdb && (b = dakuten[d[-1] - 0xa6])) {
*(d - 1) = b;
if (ch) { ch[-1] += 2; s_ += 2; }
continue;
} else {
*d++ = c >> 8; *d = c & 0xff;
}
break;
case 0xa1eb :
if (d > d0 + 1 && d[-2] == 0xa5
&& 0xcf <= d[-1] && d[-1] <= 0xdb && (b = handaku[d[-1] - 0xcf])) {
*(d - 1) = b;
if (ch) { ch[-1] += 2; s_ += 2; }
continue;
} else {
*d++ = c >> 8; *d = c & 0xff;
}
break;
default :
*d++ = c >> 8; *d = c & 0xff;
break;
}
ctype = GRN_CHAR_KATAKANA;
} else {
*d++ = c1; *d = c2;
ctype = GRN_CHAR_OTHERS;
}
break;
case 0x09 :
*d++ = c1; *d = c2;
ctype = GRN_CHAR_OTHERS;
break;
case 0x0a :
switch (c1 & 0x0f) {
case 1 :
switch (c2) {
case 0xbc :
*d++ = c1; *d = c2;
ctype = GRN_CHAR_KATAKANA;
break;
case 0xb9 :
*d++ = c1; *d = c2;
ctype = GRN_CHAR_KANJI;
break;
case 0xa1 :
if (removeblankp) {
if (cp > ctypes) { *(cp - 1) |= GRN_STR_BLANK; }
continue;
} else {
*d = ' ';
ctype = GRN_STR_BLANK|GRN_CHAR_SYMBOL;
}
break;
default :
if (c2 >= 0xa4 && (c3 = symbol[c2 - 0xa4])) {
*d = c3;
ctype = GRN_CHAR_SYMBOL;
} else {
*d++ = c1; *d = c2;
ctype = GRN_CHAR_OTHERS;
}
break;
}
break;
case 2 :
*d++ = c1; *d = c2;
ctype = GRN_CHAR_SYMBOL;
break;
case 3 :
c3 = c2 - 0x80;
if ('a' <= c3 && c3 <= 'z') {
ctype = GRN_CHAR_ALPHA;
*d = c3;
} else if ('A' <= c3 && c3 <= 'Z') {
ctype = GRN_CHAR_ALPHA;
*d = c3 + 0x20;
} else if ('0' <= c3 && c3 <= '9') {
ctype = GRN_CHAR_DIGIT;
*d = c3;
} else {
ctype = GRN_CHAR_OTHERS;
*d++ = c1; *d = c2;
}
break;
case 4 :
*d++ = c1; *d = c2;
ctype = GRN_CHAR_HIRAGANA;
break;
case 5 :
*d++ = c1; *d = c2;
ctype = GRN_CHAR_KATAKANA;
break;
case 6 :
case 7 :
case 8 :
*d++ = c1; *d = c2;
ctype = GRN_CHAR_SYMBOL;
break;
default :
*d++ = c1; *d = c2;
ctype = GRN_CHAR_OTHERS;
break;
}
break;
default :
*d++ = c1; *d = c2;
ctype = GRN_CHAR_KANJI;
break;
}
} else {
/* skip invalid character */
continue;
}
} else {
unsigned char c = *s;
switch (c >> 4) {
case 0 :
case 1 :
/* skip unprintable ascii */
if (cp > ctypes) { *(cp - 1) |= GRN_STR_BLANK; }
continue;
case 2 :
if (c == 0x20) {
if (removeblankp) {
if (cp > ctypes) { *(cp - 1) |= GRN_STR_BLANK; }
continue;
} else {
*d = ' ';
ctype = GRN_STR_BLANK|GRN_CHAR_SYMBOL;
}
} else {
*d = c;
ctype = GRN_CHAR_SYMBOL;
}
break;
case 3 :
*d = c;
ctype = (c <= 0x39) ? GRN_CHAR_DIGIT : GRN_CHAR_SYMBOL;
break;
case 4 :
*d = ('A' <= c) ? c + 0x20 : c;
ctype = (c == 0x40) ? GRN_CHAR_SYMBOL : GRN_CHAR_ALPHA;
break;
case 5 :
*d = (c <= 'Z') ? c + 0x20 : c;
ctype = (c <= 0x5a) ? GRN_CHAR_ALPHA : GRN_CHAR_SYMBOL;
break;
case 6 :
*d = c;
ctype = (c == 0x60) ? GRN_CHAR_SYMBOL : GRN_CHAR_ALPHA;
break;
case 7 :
*d = c;
ctype = (c <= 0x7a) ? GRN_CHAR_ALPHA : (c == 0x7f ? GRN_CHAR_OTHERS : GRN_CHAR_SYMBOL);
break;
default :
*d = c;
ctype = GRN_CHAR_OTHERS;
break;
}
}
d++;
length++;
if (cp) { *cp++ = ctype; }
if (ch) {
*ch++ = (int16_t)(s + 1 - s_);
s_ = s + 1;
while (++d_ < d) { *ch++ = 0; }
}
}
if (cp) { *cp = GRN_CHAR_NULL; }
*d = '\0';
nstr->length = length;
nstr->norm_blen = (size_t)(d - (unsigned char *)nstr->norm);
return GRN_SUCCESS;
}
#ifdef GRN_WITH_NFKC
const char *grn_nfkc_map1(const unsigned char *str);
const char *grn_nfkc_map2(const unsigned char *prefix, const unsigned char *suffix);
inline static grn_rc
normalize_utf8(grn_ctx *ctx, grn_str *nstr)
{
int16_t *ch;
const unsigned char *s, *s_, *s__ = NULL, *p, *p2, *pe, *e;
unsigned char *d, *d_, *de;
uint_least8_t *cp;
size_t length = 0, ls, lp, size = nstr->orig_blen, ds = size * 3;
int removeblankp = nstr->flags & GRN_STR_REMOVEBLANK;
if (!(nstr->norm = GRN_MALLOC(ds + 1))) {
return GRN_NO_MEMORY_AVAILABLE;
}
if (nstr->flags & GRN_STR_WITH_CHECKS) {
if (!(nstr->checks = GRN_MALLOC(ds * sizeof(int16_t) + 1))) {
GRN_FREE(nstr->norm); nstr->norm = NULL;
return GRN_NO_MEMORY_AVAILABLE;
}
}
ch = nstr->checks;
if (nstr->flags & GRN_STR_WITH_CTYPES) {
if (!(nstr->ctypes = GRN_MALLOC(ds + 1))) {
if (nstr->checks) { GRN_FREE(nstr->checks); nstr->checks = NULL; }
GRN_FREE(nstr->norm); nstr->norm = NULL;
return GRN_NO_MEMORY_AVAILABLE;
}
}
cp = nstr->ctypes;
d = (unsigned char *)nstr->norm;
de = d + ds;
d_ = NULL;
e = (unsigned char *)nstr->orig + size;
for (s = s_ = (unsigned char *)nstr->orig; ; s += ls) {
if (!(ls = grn_str_charlen_utf8(ctx, s, e))) {
break;
}
if ((p = (unsigned char *)grn_nfkc_map1(s))) {
pe = p + strlen((char *)p);
} else {
p = s;
pe = p + ls;
}
if (d_ && (p2 = (unsigned char *)grn_nfkc_map2(d_, p))) {
p = p2;
pe = p + strlen((char *)p);
if (cp) { cp--; }
if (ch) {
ch -= (d - d_);
s_ = s__;
}
d = d_;
length--;
}
for (; ; p += lp) {
if (!(lp = grn_str_charlen_utf8(ctx, p, pe))) {
break;
}
if ((*p == ' ' && removeblankp) || *p < 0x20 /* skip unprintable ascii */ ) {
if (cp > nstr->ctypes) { *(cp - 1) |= GRN_STR_BLANK; }
} else {
if (de <= d + lp) {
unsigned char *norm;
ds += (ds >> 1) + lp;
if (!(norm = GRN_REALLOC(nstr->norm, ds + 1))) {
if (nstr->ctypes) { GRN_FREE(nstr->ctypes); nstr->ctypes = NULL; }
if (nstr->checks) { GRN_FREE(nstr->checks); nstr->checks = NULL; }
GRN_FREE(nstr->norm); nstr->norm = NULL;
return GRN_NO_MEMORY_AVAILABLE;
}
de = norm + ds;
d = norm + (d - (unsigned char *)nstr->norm);
nstr->norm = (char *)norm;
if (ch) {
int16_t *checks;
if (!(checks = GRN_REALLOC(nstr->checks, ds * sizeof(int16_t)+ 1))) {
if (nstr->ctypes) { GRN_FREE(nstr->ctypes); nstr->ctypes = NULL; }
GRN_FREE(nstr->checks); nstr->checks = NULL;
GRN_FREE(nstr->norm); nstr->norm = NULL;
return GRN_NO_MEMORY_AVAILABLE;
}
ch = checks + (ch - nstr->checks);
nstr->checks = checks;
}
if (cp) {
uint_least8_t *ctypes;
if (!(ctypes = GRN_REALLOC(nstr->ctypes, ds + 1))) {
GRN_FREE(nstr->ctypes); nstr->ctypes = NULL;
if (nstr->checks) { GRN_FREE(nstr->checks); nstr->checks = NULL; }
GRN_FREE(nstr->norm); nstr->norm = NULL;
return GRN_NO_MEMORY_AVAILABLE;
}
cp = ctypes + (cp - nstr->ctypes);
nstr->ctypes = ctypes;
}
}
grn_memcpy(d, p, lp);
d_ = d;
d += lp;
length++;
if (cp) { *cp++ = grn_nfkc_char_type(p); }
if (ch) {
size_t i;
if (s_ == s + ls) {
*ch++ = -1;
} else {
*ch++ = (int16_t)(s + ls - s_);
s__ = s_;
s_ = s + ls;
}
for (i = lp; i > 1; i--) { *ch++ = 0; }
}
}
}
}
if (cp) { *cp = GRN_CHAR_NULL; }
*d = '\0';
nstr->length = length;
nstr->norm_blen = (size_t)(d - (unsigned char *)nstr->norm);
return GRN_SUCCESS;
}
#endif /* GRN_WITH_NFKC */
inline static grn_rc
normalize_sjis(grn_ctx *ctx, grn_str *nstr)
{
static uint16_t hankana[] = {
0x8140, 0x8142, 0x8175, 0x8176, 0x8141, 0x8145, 0x8392, 0x8340, 0x8342,
0x8344, 0x8346, 0x8348, 0x8383, 0x8385, 0x8387, 0x8362, 0x815b, 0x8341,
0x8343, 0x8345, 0x8347, 0x8349, 0x834a, 0x834c, 0x834e, 0x8350, 0x8352,
0x8354, 0x8356, 0x8358, 0x835a, 0x835c, 0x835e, 0x8360, 0x8363, 0x8365,
0x8367, 0x8369, 0x836a, 0x836b, 0x836c, 0x836d, 0x836e, 0x8371, 0x8374,
0x8377, 0x837a, 0x837d, 0x837e, 0x8380, 0x8381, 0x8382, 0x8384, 0x8386,
0x8388, 0x8389, 0x838a, 0x838b, 0x838c, 0x838d, 0x838f, 0x8393, 0x814a,
0x814b
};
static unsigned char dakuten[] = {
0x94, 0, 0, 0, 0, 0x4b, 0, 0x4d, 0, 0x4f, 0, 0x51, 0, 0x53, 0, 0x55, 0,
0x57, 0, 0x59, 0, 0x5b, 0, 0x5d, 0, 0x5f, 0, 0x61, 0, 0, 0x64, 0, 0x66,
0, 0x68, 0, 0, 0, 0, 0, 0, 0x6f, 0, 0, 0x72, 0, 0, 0x75, 0, 0, 0x78, 0,
0, 0x7b
};
static unsigned char handaku[] = {
0x70, 0, 0, 0x73, 0, 0, 0x76, 0, 0, 0x79, 0, 0, 0x7c
};
int16_t *ch;
const unsigned char *s, *s_;
unsigned char *d, *d0, *d_, b, *e;
uint_least8_t *cp, *ctypes, ctype;
size_t size = nstr->orig_blen, length = 0;
int removeblankp = nstr->flags & GRN_STR_REMOVEBLANK;
if (!(nstr->norm = GRN_MALLOC(size * 2 + 1))) {
return GRN_NO_MEMORY_AVAILABLE;
}
d0 = (unsigned char *) nstr->norm;
if (nstr->flags & GRN_STR_WITH_CHECKS) {
if (!(nstr->checks = GRN_MALLOC(size * 2 * sizeof(int16_t) + 1))) {
GRN_FREE(nstr->norm);
nstr->norm = NULL;
return GRN_NO_MEMORY_AVAILABLE;
}
}
ch = nstr->checks;
if (nstr->flags & GRN_STR_WITH_CTYPES) {
if (!(nstr->ctypes = GRN_MALLOC(size + 1))) {
GRN_FREE(nstr->checks);
GRN_FREE(nstr->norm);
nstr->checks = NULL;
nstr->norm = NULL;
return GRN_NO_MEMORY_AVAILABLE;
}
}
cp = ctypes = nstr->ctypes;
e = (unsigned char *)nstr->orig + size;
for (s = s_ = (unsigned char *) nstr->orig, d = d_ = d0; s < e; s++) {
if ((*s & 0x80)) {
if (0xa0 <= *s && *s <= 0xdf) {
uint16_t c = hankana[*s - 0xa0];
switch (c) {
case 0x814a :
if (d > d0 + 1 && d[-2] == 0x83
&& 0x45 <= d[-1] && d[-1] <= 0x7a && (b = dakuten[d[-1] - 0x45])) {
*(d - 1) = b;
if (ch) { ch[-1]++; s_++; }
continue;
} else {
*d++ = c >> 8; *d = c & 0xff;
}
break;
case 0x814b :
if (d > d0 + 1 && d[-2] == 0x83
&& 0x6e <= d[-1] && d[-1] <= 0x7a && (b = handaku[d[-1] - 0x6e])) {
*(d - 1) = b;
if (ch) { ch[-1]++; s_++; }
continue;
} else {
*d++ = c >> 8; *d = c & 0xff;
}
break;
default :
*d++ = c >> 8; *d = c & 0xff;
break;
}
ctype = GRN_CHAR_KATAKANA;
} else {
if ((s + 1) < e && 0x40 <= *(s + 1) && *(s + 1) <= 0xfc) {
unsigned char c1 = *s++, c2 = *s, c3 = 0;
if (0x81 <= c1 && c1 <= 0x87) {
switch (c1 & 0x0f) {
case 1 :
switch (c2) {
case 0x5b :
*d++ = c1; *d = c2;
ctype = GRN_CHAR_KATAKANA;
break;
case 0x58 :
*d++ = c1; *d = c2;
ctype = GRN_CHAR_KANJI;
break;
case 0x40 :
if (removeblankp) {
if (cp > ctypes) { *(cp - 1) |= GRN_STR_BLANK; }
continue;
} else {
*d = ' ';
ctype = GRN_STR_BLANK|GRN_CHAR_SYMBOL;
}
break;
default :
if (0x43 <= c2 && c2 <= 0x7e && (c3 = symbol[c2 - 0x43])) {
*d = c3;
ctype = GRN_CHAR_SYMBOL;
} else if (0x7f <= c2 && c2 <= 0x97 && (c3 = symbol[c2 - 0x44])) {
*d = c3;
ctype = GRN_CHAR_SYMBOL;
} else {
*d++ = c1; *d = c2;
ctype = GRN_CHAR_OTHERS;
}
break;
}
break;
case 2 :
c3 = c2 - 0x1f;
if (0x4f <= c2 && c2 <= 0x58) {
ctype = GRN_CHAR_DIGIT;
*d = c2 - 0x1f;
} else if (0x60 <= c2 && c2 <= 0x79) {
ctype = GRN_CHAR_ALPHA;
*d = c2 + 0x01;
} else if (0x81 <= c2 && c2 <= 0x9a) {
ctype = GRN_CHAR_ALPHA;
*d = c2 - 0x20;
} else if (0x9f <= c2 && c2 <= 0xf1) {
*d++ = c1; *d = c2;
ctype = GRN_CHAR_HIRAGANA;
} else {
*d++ = c1; *d = c2;
ctype = GRN_CHAR_OTHERS;
}
break;
case 3 :
if (0x40 <= c2 && c2 <= 0x96) {
*d++ = c1; *d = c2;
ctype = GRN_CHAR_KATAKANA;
} else {
*d++ = c1; *d = c2;
ctype = GRN_CHAR_SYMBOL;
}
break;
case 4 :
case 7 :
*d++ = c1; *d = c2;
ctype = GRN_CHAR_SYMBOL;
break;
default :
*d++ = c1; *d = c2;
ctype = GRN_CHAR_OTHERS;
break;
}
} else {
*d++ = c1; *d = c2;
ctype = GRN_CHAR_KANJI;
}
} else {
/* skip invalid character */
continue;
}
}
} else {
unsigned char c = *s;
switch (c >> 4) {
case 0 :
case 1 :
/* skip unprintable ascii */
if (cp > ctypes) { *(cp - 1) |= GRN_STR_BLANK; }
continue;
case 2 :
if (c == 0x20) {
if (removeblankp) {
if (cp > ctypes) { *(cp - 1) |= GRN_STR_BLANK; }
continue;
} else {
*d = ' ';
ctype = GRN_STR_BLANK|GRN_CHAR_SYMBOL;
}
} else {
*d = c;
ctype = GRN_CHAR_SYMBOL;
}
break;
case 3 :
*d = c;
ctype = (c <= 0x39) ? GRN_CHAR_DIGIT : GRN_CHAR_SYMBOL;
break;
case 4 :
*d = ('A' <= c) ? c + 0x20 : c;
ctype = (c == 0x40) ? GRN_CHAR_SYMBOL : GRN_CHAR_ALPHA;
break;
case 5 :
*d = (c <= 'Z') ? c + 0x20 : c;
ctype = (c <= 0x5a) ? GRN_CHAR_ALPHA : GRN_CHAR_SYMBOL;
break;
case 6 :
*d = c;
ctype = (c == 0x60) ? GRN_CHAR_SYMBOL : GRN_CHAR_ALPHA;
break;
case 7 :
*d = c;
ctype = (c <= 0x7a) ? GRN_CHAR_ALPHA : (c == 0x7f ? GRN_CHAR_OTHERS : GRN_CHAR_SYMBOL);
break;
default :
*d = c;
ctype = GRN_CHAR_OTHERS;
break;
}
}
d++;
length++;
if (cp) { *cp++ = ctype; }
if (ch) {
*ch++ = (int16_t)(s + 1 - s_);
s_ = s + 1;
while (++d_ < d) { *ch++ = 0; }
}
}
if (cp) { *cp = GRN_CHAR_NULL; }
*d = '\0';
nstr->length = length;
nstr->norm_blen = (size_t)(d - (unsigned char *)nstr->norm);
return GRN_SUCCESS;
}
inline static grn_rc
normalize_none(grn_ctx *ctx, grn_str *nstr)
{
int16_t *ch;
const unsigned char *s, *s_, *e;
unsigned char *d, *d0, *d_;
uint_least8_t *cp, *ctypes, ctype;
size_t size = nstr->orig_blen, length = 0;
int removeblankp = nstr->flags & GRN_STR_REMOVEBLANK;
if (!(nstr->norm = GRN_MALLOC(size + 1))) {
return GRN_NO_MEMORY_AVAILABLE;
}
d0 = (unsigned char *) nstr->norm;
if (nstr->flags & GRN_STR_WITH_CHECKS) {
if (!(nstr->checks = GRN_MALLOC(size * sizeof(int16_t) + 1))) {
GRN_FREE(nstr->norm);
nstr->norm = NULL;
return GRN_NO_MEMORY_AVAILABLE;
}
}
ch = nstr->checks;
if (nstr->flags & GRN_STR_WITH_CTYPES) {
if (!(nstr->ctypes = GRN_MALLOC(size + 1))) {
GRN_FREE(nstr->checks);
GRN_FREE(nstr->norm);
nstr->checks = NULL;
nstr->norm = NULL;
return GRN_NO_MEMORY_AVAILABLE;
}
}
cp = ctypes = nstr->ctypes;
e = (unsigned char *)nstr->orig + size;
for (s = s_ = (unsigned char *) nstr->orig, d = d_ = d0; s < e; s++) {
unsigned char c = *s;
switch (c >> 4) {
case 0 :
case 1 :
/* skip unprintable ascii */
if (cp > ctypes) { *(cp - 1) |= GRN_STR_BLANK; }
continue;
case 2 :
if (c == 0x20) {
if (removeblankp) {
if (cp > ctypes) { *(cp - 1) |= GRN_STR_BLANK; }
continue;
} else {
*d = ' ';
ctype = GRN_STR_BLANK|GRN_CHAR_SYMBOL;
}
} else {
*d = c;
ctype = GRN_CHAR_SYMBOL;
}
break;
case 3 :
*d = c;
ctype = (c <= 0x39) ? GRN_CHAR_DIGIT : GRN_CHAR_SYMBOL;
break;
case 4 :
*d = ('A' <= c) ? c + 0x20 : c;
ctype = (c == 0x40) ? GRN_CHAR_SYMBOL : GRN_CHAR_ALPHA;
break;
case 5 :
*d = (c <= 'Z') ? c + 0x20 : c;
ctype = (c <= 0x5a) ? GRN_CHAR_ALPHA : GRN_CHAR_SYMBOL;
break;
case 6 :
*d = c;
ctype = (c == 0x60) ? GRN_CHAR_SYMBOL : GRN_CHAR_ALPHA;
break;
case 7 :
*d = c;
ctype = (c <= 0x7a) ? GRN_CHAR_ALPHA : (c == 0x7f ? GRN_CHAR_OTHERS : GRN_CHAR_SYMBOL);
break;
default :
*d = c;
ctype = GRN_CHAR_OTHERS;
break;
}
d++;
length++;
if (cp) { *cp++ = ctype; }
if (ch) {
*ch++ = (int16_t)(s + 1 - s_);
s_ = s + 1;
while (++d_ < d) { *ch++ = 0; }
}
}
if (cp) { *cp = GRN_CHAR_NULL; }
*d = '\0';
nstr->length = length;
nstr->norm_blen = (size_t)(d - (unsigned char *)nstr->norm);
return GRN_SUCCESS;
}
/* use cp1252 as latin1 */
inline static grn_rc
normalize_latin1(grn_ctx *ctx, grn_str *nstr)
{
int16_t *ch;
const unsigned char *s, *s_, *e;
unsigned char *d, *d0, *d_;
uint_least8_t *cp, *ctypes, ctype;
size_t size = nstr->orig_blen, length = 0;
int removeblankp = nstr->flags & GRN_STR_REMOVEBLANK;
if (!(nstr->norm = GRN_MALLOC(size + 1))) {
return GRN_NO_MEMORY_AVAILABLE;
}
d0 = (unsigned char *) nstr->norm;
if (nstr->flags & GRN_STR_WITH_CHECKS) {
if (!(nstr->checks = GRN_MALLOC(size * sizeof(int16_t) + 1))) {
GRN_FREE(nstr->norm);
nstr->norm = NULL;
return GRN_NO_MEMORY_AVAILABLE;
}
}
ch = nstr->checks;
if (nstr->flags & GRN_STR_WITH_CTYPES) {
if (!(nstr->ctypes = GRN_MALLOC(size + 1))) {
GRN_FREE(nstr->checks);
GRN_FREE(nstr->norm);
nstr->checks = NULL;
nstr->norm = NULL;
return GRN_NO_MEMORY_AVAILABLE;
}
}
cp = ctypes = nstr->ctypes;
e = (unsigned char *)nstr->orig + size;
for (s = s_ = (unsigned char *) nstr->orig, d = d_ = d0; s < e; s++) {
unsigned char c = *s;
switch (c >> 4) {
case 0 :
case 1 :
/* skip unprintable ascii */
if (cp > ctypes) { *(cp - 1) |= GRN_STR_BLANK; }
continue;
case 2 :
if (c == 0x20) {
if (removeblankp) {
if (cp > ctypes) { *(cp - 1) |= GRN_STR_BLANK; }
continue;
} else {
*d = ' ';
ctype = GRN_STR_BLANK|GRN_CHAR_SYMBOL;
}
} else {
*d = c;
ctype = GRN_CHAR_SYMBOL;
}
break;
case 3 :
*d = c;
ctype = (c <= 0x39) ? GRN_CHAR_DIGIT : GRN_CHAR_SYMBOL;
break;
case 4 :
*d = ('A' <= c) ? c + 0x20 : c;
ctype = (c == 0x40) ? GRN_CHAR_SYMBOL : GRN_CHAR_ALPHA;
break;
case 5 :
*d = (c <= 'Z') ? c + 0x20 : c;
ctype = (c <= 0x5a) ? GRN_CHAR_ALPHA : GRN_CHAR_SYMBOL;
break;
case 6 :
*d = c;
ctype = (c == 0x60) ? GRN_CHAR_SYMBOL : GRN_CHAR_ALPHA;
break;
case 7 :
*d = c;
ctype = (c <= 0x7a) ? GRN_CHAR_ALPHA : (c == 0x7f ? GRN_CHAR_OTHERS : GRN_CHAR_SYMBOL);
break;
case 8 :
if (c == 0x8a || c == 0x8c || c == 0x8e) {
*d = c + 0x10;
ctype = GRN_CHAR_ALPHA;
} else {
*d = c;
ctype = GRN_CHAR_SYMBOL;
}
break;
case 9 :
if (c == 0x9a || c == 0x9c || c == 0x9e || c == 0x9f) {
*d = (c == 0x9f) ? c + 0x60 : c;
ctype = GRN_CHAR_ALPHA;
} else {
*d = c;
ctype = GRN_CHAR_SYMBOL;
}
break;
case 0x0c :
*d = c + 0x20;
ctype = GRN_CHAR_ALPHA;
break;
case 0x0d :
*d = (c == 0xd7 || c == 0xdf) ? c : c + 0x20;
ctype = (c == 0xd7) ? GRN_CHAR_SYMBOL : GRN_CHAR_ALPHA;
break;
case 0x0e :
*d = c;
ctype = GRN_CHAR_ALPHA;
break;
case 0x0f :
*d = c;
ctype = (c == 0xf7) ? GRN_CHAR_SYMBOL : GRN_CHAR_ALPHA;
break;
default :
*d = c;
ctype = GRN_CHAR_OTHERS;
break;
}
d++;
length++;
if (cp) { *cp++ = ctype; }
if (ch) {
*ch++ = (int16_t)(s + 1 - s_);
s_ = s + 1;
while (++d_ < d) { *ch++ = 0; }
}
}
if (cp) { *cp = GRN_CHAR_NULL; }
*d = '\0';
nstr->length = length;
nstr->norm_blen = (size_t)(d - (unsigned char *)nstr->norm);
return GRN_SUCCESS;
}
inline static grn_rc
normalize_koi8r(grn_ctx *ctx, grn_str *nstr)
{
int16_t *ch;
const unsigned char *s, *s_, *e;
unsigned char *d, *d0, *d_;
uint_least8_t *cp, *ctypes, ctype;
size_t size = strlen(nstr->orig), length = 0;
int removeblankp = nstr->flags & GRN_STR_REMOVEBLANK;
if (!(nstr->norm = GRN_MALLOC(size + 1))) {
return GRN_NO_MEMORY_AVAILABLE;
}
d0 = (unsigned char *) nstr->norm;
if (nstr->flags & GRN_STR_WITH_CHECKS) {
if (!(nstr->checks = GRN_MALLOC(size * sizeof(int16_t) + 1))) {
GRN_FREE(nstr->norm);
nstr->norm = NULL;
return GRN_NO_MEMORY_AVAILABLE;
}
}
ch = nstr->checks;
if (nstr->flags & GRN_STR_WITH_CTYPES) {
if (!(nstr->ctypes = GRN_MALLOC(size + 1))) {
GRN_FREE(nstr->checks);
GRN_FREE(nstr->norm);
nstr->checks = NULL;
nstr->norm = NULL;
return GRN_NO_MEMORY_AVAILABLE;
}
}
cp = ctypes = nstr->ctypes;
e = (unsigned char *)nstr->orig + size;
for (s = s_ = (unsigned char *) nstr->orig, d = d_ = d0; s < e; s++) {
unsigned char c = *s;
switch (c >> 4) {
case 0 :
case 1 :
/* skip unprintable ascii */
if (cp > ctypes) { *(cp - 1) |= GRN_STR_BLANK; }
continue;
case 2 :
if (c == 0x20) {
if (removeblankp) {
if (cp > ctypes) { *(cp - 1) |= GRN_STR_BLANK; }
continue;
} else {
*d = ' ';
ctype = GRN_STR_BLANK|GRN_CHAR_SYMBOL;
}
} else {
*d = c;
ctype = GRN_CHAR_SYMBOL;
}
break;
case 3 :
*d = c;
ctype = (c <= 0x39) ? GRN_CHAR_DIGIT : GRN_CHAR_SYMBOL;
break;
case 4 :
*d = ('A' <= c) ? c + 0x20 : c;
ctype = (c == 0x40) ? GRN_CHAR_SYMBOL : GRN_CHAR_ALPHA;
break;
case 5 :
*d = (c <= 'Z') ? c + 0x20 : c;
ctype = (c <= 0x5a) ? GRN_CHAR_ALPHA : GRN_CHAR_SYMBOL;
break;
case 6 :
*d = c;
ctype = (c == 0x60) ? GRN_CHAR_SYMBOL : GRN_CHAR_ALPHA;
break;
case 7 :
*d = c;
ctype = (c <= 0x7a) ? GRN_CHAR_ALPHA : (c == 0x7f ? GRN_CHAR_OTHERS : GRN_CHAR_SYMBOL);
break;
case 0x0a :
*d = c;
ctype = (c == 0xa3) ? GRN_CHAR_ALPHA : GRN_CHAR_OTHERS;
break;
case 0x0b :
if (c == 0xb3) {
*d = c - 0x10;
ctype = GRN_CHAR_ALPHA;
} else {
*d = c;
ctype = GRN_CHAR_OTHERS;
}
break;
case 0x0c :
case 0x0d :
*d = c;
ctype = GRN_CHAR_ALPHA;
break;
case 0x0e :
case 0x0f :
*d = c - 0x20;
ctype = GRN_CHAR_ALPHA;
break;
default :
*d = c;
ctype = GRN_CHAR_OTHERS;
break;
}
d++;
length++;
if (cp) { *cp++ = ctype; }
if (ch) {
*ch++ = (int16_t)(s + 1 - s_);
s_ = s + 1;
while (++d_ < d) { *ch++ = 0; }
}
}
if (cp) { *cp = GRN_CHAR_NULL; }
*d = '\0';
nstr->length = length;
nstr->norm_blen = (size_t)(d - (unsigned char *)nstr->norm);
return GRN_SUCCESS;
}
static grn_str *
grn_fakenstr_open(grn_ctx *ctx, const char *str, size_t str_len, grn_encoding encoding, int flags)
{
/* TODO: support GRN_STR_REMOVEBLANK flag and ctypes */
grn_str *nstr;
if (!(nstr = GRN_MALLOC(sizeof(grn_str)))) {
GRN_LOG(ctx, GRN_LOG_ALERT, "memory allocation on grn_fakenstr_open failed !");
return NULL;
}
if (!(nstr->norm = GRN_MALLOC(str_len + 1))) {
GRN_LOG(ctx, GRN_LOG_ALERT, "memory allocation for keyword on grn_snip_add_cond failed !");
GRN_FREE(nstr);
return NULL;
}
nstr->orig = str;
nstr->orig_blen = str_len;
grn_memcpy(nstr->norm, str, str_len);
nstr->norm[str_len] = '\0';
nstr->norm_blen = str_len;
nstr->ctypes = NULL;
nstr->flags = flags;
if (flags & GRN_STR_WITH_CHECKS) {
int16_t f = 0;
unsigned char c;
size_t i;
if (!(nstr->checks = (int16_t *) GRN_MALLOC(sizeof(int16_t) * str_len))) {
GRN_FREE(nstr->norm);
GRN_FREE(nstr);
return NULL;
}
switch (encoding) {
case GRN_ENC_EUC_JP:
for (i = 0; i < str_len; i++) {
if (!f) {
c = (unsigned char) str[i];
f = ((c >= 0xa1U && c <= 0xfeU) || c == 0x8eU ? 2 : (c == 0x8fU ? 3 : 1)
);
nstr->checks[i] = f;
} else {
nstr->checks[i] = 0;
}
f--;
}
break;
case GRN_ENC_SJIS:
for (i = 0; i < str_len; i++) {
if (!f) {
c = (unsigned char) str[i];
f = (c >= 0x81U && ((c <= 0x9fU) || (c >= 0xe0U && c <= 0xfcU)) ? 2 : 1);
nstr->checks[i] = f;
} else {
nstr->checks[i] = 0;
}
f--;
}
break;
case GRN_ENC_UTF8:
for (i = 0; i < str_len; i++) {
if (!f) {
c = (unsigned char) str[i];
f = (c & 0x80U ? (c & 0x20U ? (c & 0x10U ? 4 : 3)
: 2)
: 1);
nstr->checks[i] = f;
} else {
nstr->checks[i] = 0;
}
f--;
}
break;
default:
for (i = 0; i < str_len; i++) {
nstr->checks[i] = 1;
}
break;
}
} else {
nstr->checks = NULL;
}
return nstr;
}
grn_str *
grn_str_open_(grn_ctx *ctx, const char *str, unsigned int str_len, int flags, grn_encoding encoding)
{
grn_rc rc;
grn_str *nstr;
if (!str || !str_len) { return NULL; }
if (!(flags & GRN_STR_NORMALIZE)) {
return grn_fakenstr_open(ctx, str, str_len, encoding, flags);
}
if (!(nstr = GRN_MALLOC(sizeof(grn_str)))) {
GRN_LOG(ctx, GRN_LOG_ALERT, "memory allocation on grn_str_open failed !");
return NULL;
}
nstr->orig = str;
nstr->orig_blen = str_len;
nstr->norm = NULL;
nstr->norm_blen = 0;
nstr->checks = NULL;
nstr->ctypes = NULL;
nstr->encoding = encoding;
nstr->flags = flags;
switch (encoding) {
case GRN_ENC_EUC_JP :
rc = normalize_euc(ctx, nstr);
break;
case GRN_ENC_UTF8 :
#ifdef GRN_WITH_NFKC
rc = normalize_utf8(ctx, nstr);
#else /* GRN_WITH_NFKC */
rc = normalize_none(ctx, nstr);
#endif /* GRN_WITH_NFKC */
break;
case GRN_ENC_SJIS :
rc = normalize_sjis(ctx, nstr);
break;
case GRN_ENC_LATIN1 :
rc = normalize_latin1(ctx, nstr);
break;
case GRN_ENC_KOI8R :
rc = normalize_koi8r(ctx, nstr);
break;
default :
rc = normalize_none(ctx, nstr);
break;
}
if (rc) {
grn_str_close(ctx, nstr);
return NULL;
}
return nstr;
}
grn_str *
grn_str_open(grn_ctx *ctx, const char *str, unsigned int str_len, int flags)
{
return grn_str_open_(ctx, str, str_len, flags, ctx->encoding);
}
grn_rc
grn_str_close(grn_ctx *ctx, grn_str *nstr)
{
if (nstr) {
if (nstr->norm) { GRN_FREE(nstr->norm); }
if (nstr->ctypes) { GRN_FREE(nstr->ctypes); }
if (nstr->checks) { GRN_FREE(nstr->checks); }
GRN_FREE(nstr);
return GRN_SUCCESS;
} else {
return GRN_INVALID_ARGUMENT;
}
}
static const char *grn_enc_string[] = {
"default",
"none",
"euc_jp",
"utf8",
"sjis",
"latin1",
"koi8r"
};
const char *
grn_encoding_to_string(grn_encoding enc)
{
if (enc < (sizeof(grn_enc_string) / sizeof(char *))) {
return grn_enc_string[enc];
} else {
return "unknown";
}
}
grn_encoding
grn_encoding_parse(const char *str)
{
grn_encoding e = GRN_ENC_UTF8;
int i = sizeof(grn_enc_string) / sizeof(grn_enc_string[0]);
while (i--) {
if (!strcmp(str, grn_enc_string[i])) {
e = (grn_encoding)i;
}
}
return e;
}
size_t
grn_str_len(grn_ctx *ctx, const char *str, grn_encoding encoding, const char **last)
{
size_t len, tlen;
const char *p = NULL;
for (len = 0; ; len++) {
p = str;
if (!(tlen = grn_str_charlen(ctx, str, encoding))) {
break;
}
str += tlen;
}
if (last) { *last = p; }
return len;
}
int
grn_isspace(const char *str, grn_encoding encoding)
{
const unsigned char *s = (const unsigned char *) str;
if (!s) { return 0; }
switch (s[0]) {
case ' ' :
case '\f' :
case '\n' :
case '\r' :
case '\t' :
case '\v' :
return 1;
case 0x81 :
if (encoding == GRN_ENC_SJIS && s[1] == 0x40) { return 2; }
break;
case 0xA1 :
if (encoding == GRN_ENC_EUC_JP && s[1] == 0xA1) { return 2; }
break;
case 0xE3 :
if (encoding == GRN_ENC_UTF8 && s[1] == 0x80 && s[2] == 0x80) { return 3; }
break;
default :
break;
}
return 0;
}
int8_t
grn_atoi8(const char *nptr, const char *end, const char **rest)
{
const char *p = nptr;
int8_t v = 0, t, n = 0, o = 0;
if (p < end && *p == '-') {
p++;
n = 1;
o = 1;
}
while (p < end && *p >= '0' && *p <= '9') {
t = v * 10 - (*p - '0');
if (t > v || (!n && t == INT8_MIN)) { v = 0; break; }
v = t;
o = 0;
p++;
}
if (rest) { *rest = o ? nptr : p; }
return n ? v : -v;
}
uint8_t
grn_atoui8(const char *nptr, const char *end, const char **rest)
{
uint8_t v = 0, t;
while (nptr < end && *nptr >= '0' && *nptr <= '9') {
t = v * 10 + (*nptr - '0');
if (t < v) { v = 0; break; }
v = t;
nptr++;
}
if (rest) { *rest = nptr; }
return v;
}
int16_t
grn_atoi16(const char *nptr, const char *end, const char **rest)
{
const char *p = nptr;
int16_t v = 0, t, n = 0, o = 0;
if (p < end && *p == '-') {
p++;
n = 1;
o = 1;
}
while (p < end && *p >= '0' && *p <= '9') {
t = v * 10 - (*p - '0');
if (t > v || (!n && t == INT16_MIN)) { v = 0; break; }
v = t;
o = 0;
p++;
}
if (rest) { *rest = o ? nptr : p; }
return n ? v : -v;
}
uint16_t
grn_atoui16(const char *nptr, const char *end, const char **rest)
{
uint16_t v = 0, t;
while (nptr < end && *nptr >= '0' && *nptr <= '9') {
t = v * 10 + (*nptr - '0');
if (t < v) { v = 0; break; }
v = t;
nptr++;
}
if (rest) { *rest = nptr; }
return v;
}
int
grn_atoi(const char *nptr, const char *end, const char **rest)
{
const char *p = nptr;
int v = 0, t, n = 0, o = 0;
if (p < end && *p == '-') {
p++;
n = 1;
o = 1;
}
while (p < end && *p >= '0' && *p <= '9') {
t = v * 10 - (*p - '0');
if (t > v || (!n && t == INT32_MIN)) { v = 0; break; }
v = t;
o = 0;
p++;
}
if (rest) { *rest = o ? nptr : p; }
return n ? v : -v;
}
unsigned int
grn_atoui(const char *nptr, const char *end, const char **rest)
{
unsigned int v = 0, t;
while (nptr < end && *nptr >= '0' && *nptr <= '9') {
t = v * 10 + (*nptr - '0');
if (t < v) { v = 0; break; }
v = t;
nptr++;
}
if (rest) { *rest = nptr; }
return v;
}
int64_t
grn_atoll(const char *nptr, const char *end, const char **rest)
{
/* FIXME: INT_MIN is not supported */
const char *p = nptr;
int n = 0, o = 0;
int64_t v = 0, t;
if (p < end && *p == '-') {
p++;
n = 1;
o = 1;
}
while (p < end && *p >= '0' && *p <= '9') {
t = v * 10 + (*p - '0');
if (t < v) { v = 0; break; }
v = t;
o = 0;
p++;
}
if (rest) { *rest = o ? nptr : p; }
return n ? -v : v;
}
uint64_t
grn_atoull(const char *nptr, const char *end, const char **rest)
{
uint64_t v = 0, t;
while (nptr < end && *nptr >= '0' && *nptr <= '9') {
t = v * 10 + (*nptr - '0');
if (t < v) { v = 0; break; }
v = t;
nptr++;
}
if (rest) { *rest = nptr; }
return v;
}
unsigned int
grn_htoui(const char *nptr, const char *end, const char **rest)
{
unsigned int v = 0, t;
while (nptr < end) {
switch (*nptr) {
case '0' :
case '1' :
case '2' :
case '3' :
case '4' :
case '5' :
case '6' :
case '7' :
case '8' :
case '9' :
t = v * 16 + (*nptr++ - '0');
break;
case 'a' :
case 'b' :
case 'c' :
case 'd' :
case 'e' :
case 'f' :
t = v * 16 + (*nptr++ - 'a') + 10;
break;
case 'A' :
case 'B' :
case 'C' :
case 'D' :
case 'E' :
case 'F' :
t = v * 16 + (*nptr++ - 'A') + 10;
break;
default :
v = 0; goto exit;
}
if (t < v) { v = 0; goto exit; }
v = t;
}
exit :
if (rest) { *rest = nptr; }
return v;
}
void
grn_itoh(unsigned int i, char *p, unsigned int len)
{
static const char *hex = "0123456789ABCDEF";
p += len - 1;
while (len--) {
*p-- = hex[i & 0xf];
i >>= 4;
}
}
grn_rc
grn_itoa(int i, char *p, char *end, char **rest)
{
char *q;
if (p >= end) { return GRN_INVALID_ARGUMENT; }
q = p;
if (i < 0) {
*p++ = '-';
q = p;
if (i == INT_MIN) {
if (p >= end) { return GRN_INVALID_ARGUMENT; }
*p++ = (-(i % 10)) + '0';
i /= 10;
}
i = -i;
}
do {
if (p >= end) { return GRN_INVALID_ARGUMENT; }
*p++ = i % 10 + '0';
} while ((i /= 10) > 0);
if (rest) { *rest = p; }
for (p--; q < p; q++, p--) {
char t = *q;
*q = *p;
*p = t;
}
return GRN_SUCCESS;
}
grn_rc
grn_itoa_padded(int i, char *p, char *end, char ch)
{
char *q;
if (p >= end) { return GRN_INVALID_ARGUMENT; }
if (i < 0) {
*p++ = '-';
if (i == INT_MIN) {
if (p >= end) { return GRN_INVALID_ARGUMENT; }
*p++ = (-(i % 10)) + '0';
i /= 10;
}
i = -i;
}
q = end - 1;
do {
if (q < p) { return GRN_INVALID_ARGUMENT; }
*q-- = i % 10 + '0';
} while ((i /= 10) > 0);
while (q >= p) {
*q-- = ch;
}
return GRN_SUCCESS;
}
grn_rc
grn_lltoa(int64_t i, char *p, char *end, char **rest)
{
char *q;
if (p >= end) { return GRN_INVALID_ARGUMENT; }
q = p;
if (i < 0) {
*p++ = '-';
q = p;
if (i == INT64_MIN) {
*p++ = (-(i % 10)) + '0';
i /= 10;
}
i = -i;
}
do {
if (p >= end) { return GRN_INVALID_ARGUMENT; }
*p++ = i % 10 + '0';
} while ((i /= 10) > 0);
if (rest) { *rest = p; }
for (p--; q < p; q++, p--) {
char t = *q;
*q = *p;
*p = t;
}
return GRN_SUCCESS;
}
grn_rc
grn_ulltoa(uint64_t i, char *p, char *end, char **rest)
{
char *q;
if (p >= end) { return GRN_INVALID_ARGUMENT; }
q = p;
do {
if (p >= end) { return GRN_INVALID_ARGUMENT; }
*p++ = i % 10 + '0';
} while ((i /= 10) > 0);
if (rest) { *rest = p; }
for (p--; q < p; q++, p--) {
char t = *q;
*q = *p;
*p = t;
}
return GRN_SUCCESS;
}
#define I2B(i) \
("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"[(i) & 0x3f])
#define B2I(b) \
(((b) < '+' || 'z' < (b)) ? 0xff : "\x3e\xff\xff\xff\x3f\x34\x35\x36\x37\x38\x39\x3a\x3b\x3c\x3d\xff\xff\xff\xff\xff\xff\xff\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\xff\xff\xff\xff\xff\xff\x1a\x1b\x1c\x1d\x1e\x1f\x20\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f\x30\x31\x32\x33"[(b) - '+'])
#define MASK 0x34d34d34
char *
grn_itob(grn_id id, char *p)
{
id ^= MASK;
*p++ = I2B(id >> 24);
*p++ = I2B(id >> 18);
*p++ = I2B(id >> 12);
*p++ = I2B(id >> 6);
*p++ = I2B(id);
return p;
}
grn_id
grn_btoi(char *b)
{
uint8_t i;
grn_id id = 0;
int len = 5;
while (len--) {
char c = *b++;
if ((i = B2I(c)) == 0xff) { return 0; }
id = (id << 6) + i;
}
return id ^ MASK;
}
#define I2B32H(i) ("0123456789ABCDEFGHIJKLMNOPQRSTUV"[(i) & 0x1f])
char *
grn_lltob32h(int64_t i, char *p)
{
uint64_t u = (uint64_t)i + 0x8000000000000000ULL;
*p++ = I2B32H(u >> 60);
*p++ = I2B32H(u >> 55);
*p++ = I2B32H(u >> 50);
*p++ = I2B32H(u >> 45);
*p++ = I2B32H(u >> 40);
*p++ = I2B32H(u >> 35);
*p++ = I2B32H(u >> 30);
*p++ = I2B32H(u >> 25);
*p++ = I2B32H(u >> 20);
*p++ = I2B32H(u >> 15);
*p++ = I2B32H(u >> 10);
*p++ = I2B32H(u >> 5);
*p++ = I2B32H(u);
return p;
}
char *
grn_ulltob32h(uint64_t i, char *p)
{
char lb = (i >> 59) & 0x10;
i += 0x8000000000000000ULL;
*p++ = lb + I2B32H(i >> 60);
*p++ = I2B32H(i >> 55);
*p++ = I2B32H(i >> 50);
*p++ = I2B32H(i >> 45);
*p++ = I2B32H(i >> 40);
*p++ = I2B32H(i >> 35);
*p++ = I2B32H(i >> 30);
*p++ = I2B32H(i >> 25);
*p++ = I2B32H(i >> 20);
*p++ = I2B32H(i >> 15);
*p++ = I2B32H(i >> 10);
*p++ = I2B32H(i >> 5);
*p++ = I2B32H(i);
return p;
}
grn_rc
grn_aton(grn_ctx *ctx, const char *p, const char *end, const char **rest,
grn_obj *res)
{
if (*p == '+') {
p++;
}
switch (*p) {
case '-' :
case '0' : case '1' : case '2' : case '3' : case '4' :
case '5' : case '6' : case '7' : case '8' : case '9' :
{
int64_t int64;
char rest_char;
int64 = grn_atoll(p, end, rest);
rest_char = **rest;
if (end == *rest) {
if ((int64_t)INT32_MIN <= int64 && int64 <= (int64_t)INT32_MAX) {
grn_obj_reinit(ctx, res, GRN_DB_INT32, 0);
GRN_INT32_SET(ctx, res, int64);
} else if ((int64_t)INT32_MAX < int64 && int64 <= (int64_t)UINT32_MAX) {
grn_obj_reinit(ctx, res, GRN_DB_UINT32, 0);
GRN_UINT32_SET(ctx, res, int64);
} else {
grn_obj_reinit(ctx, res, GRN_DB_INT64, 0);
GRN_INT64_SET(ctx, res, int64);
}
} else if (rest_char == '.' || rest_char == 'e' || rest_char == 'E' ||
(rest_char >= '0' && rest_char <= '9')) {
char *rest_float;
double d;
errno = 0;
d = strtod(p, &rest_float);
if (!errno && rest_float == end) {
grn_obj_reinit(ctx, res, GRN_DB_FLOAT, 0);
GRN_FLOAT_SET(ctx, res, d);
*rest = rest_float;
} else {
return GRN_INVALID_ARGUMENT;
}
}
}
break;
default :
return GRN_INVALID_ARGUMENT;
break;
}
return GRN_SUCCESS;
}
int
grn_str_tok(const char *str, size_t str_len, char delim, const char **tokbuf, int buf_size, const char **rest)
{
const char **tok = tokbuf, **tok_end = tokbuf + buf_size;
if (buf_size > 0) {
const char *str_end = str + str_len;
for (;;str++) {
if (str == str_end) {
*tok++ = str;
break;
}
if (delim == *str) {
// *str = '\0';
*tok++ = str;
if (tok == tok_end) { break; }
}
}
}
if (rest) { *rest = str; }
return tok - tokbuf;
}
inline static int
op_getopt_flag(int *flags, const grn_str_getopt_opt *o,
int argc, char * const argv[], int i, const char *optvalue)
{
switch (o->op) {
case GETOPT_OP_NONE:
break;
case GETOPT_OP_ON:
*flags |= o->flag;
break;
case GETOPT_OP_OFF:
*flags &= ~o->flag;
break;
case GETOPT_OP_UPDATE:
*flags = o->flag;
break;
default:
return i;
}
if (o->arg) {
if (optvalue) {
*o->arg = (char *)optvalue;
} else if (++i < argc) {
*o->arg = argv[i];
} else {
return -1;
}
}
return i;
}
int
grn_str_getopt(int argc, char * const argv[], const grn_str_getopt_opt *opts,
int *flags)
{
int i;
for (i = 1; i < argc; i++) {
const char * v = argv[i];
if (*v == '-') {
const grn_str_getopt_opt *o;
int found;
if (*++v == '-') {
const char *eq;
size_t len;
found = 0;
v++;
for (eq = v; *eq != '\0' && *eq != '='; eq++) {}
len = eq - v;
for (o = opts; o->opt != '\0' || o->longopt != NULL; o++) {
if (o->longopt && strlen(o->longopt) == len &&
!memcmp(v, o->longopt, len)) {
i = op_getopt_flag(flags, o, argc, argv, i,
(*eq == '\0' ? NULL : eq + 1));
if (i < 0) {
fprintf(stderr, "%s: option '--%s' needs argument.\n", argv[0], o->longopt);
return -1;
}
found = 1;
break;
}
}
if (!found) { goto exit; }
} else {
const char *p;
for (p = v; *p; p++) {
found = 0;
for (o = opts; o->opt != '\0' || o->longopt != NULL; o++) {
if (o->opt && *p == o->opt) {
i = op_getopt_flag(flags, o, argc, argv, i, NULL);
if (i < 0) {
fprintf(stderr, "%s: option '-%c' needs argument.\n", argv[0], *p);
return -1;
}
found = 1;
break;
}
}
if (!found) { goto exit; }
}
}
} else {
break;
}
}
return i;
exit:
fprintf(stderr, "%s: cannot recognize option '%s'.\n", argv[0], argv[i]);
return -1;
}
#define UNIT_SIZE (1 << 12)
#define UNIT_MASK (UNIT_SIZE - 1)
int grn_bulk_margin_size = 0;
grn_rc
grn_bulk_resize(grn_ctx *ctx, grn_obj *buf, unsigned int newsize)
{
char *head;
unsigned int rounded_newsize;
newsize += grn_bulk_margin_size + 1;
if (GRN_BULK_OUTP(buf)) {
rounded_newsize = (newsize + (UNIT_MASK)) & ~UNIT_MASK;
if (rounded_newsize < newsize) { return GRN_NOT_ENOUGH_SPACE; }
newsize = rounded_newsize;
head = buf->u.b.head - (buf->u.b.head ? grn_bulk_margin_size : 0);
if (!(head = GRN_REALLOC(head, newsize))) { return GRN_NO_MEMORY_AVAILABLE; }
buf->u.b.curr = head + grn_bulk_margin_size + GRN_BULK_VSIZE(buf);
buf->u.b.head = head + grn_bulk_margin_size;
buf->u.b.tail = head + newsize;
} else {
if (newsize > GRN_BULK_BUFSIZE) {
rounded_newsize = (newsize + (UNIT_MASK)) & ~UNIT_MASK;
if (rounded_newsize < newsize) { return GRN_NOT_ENOUGH_SPACE; }
newsize = rounded_newsize;
if (!(head = GRN_MALLOC(newsize))) { return GRN_NO_MEMORY_AVAILABLE; }
grn_memcpy(head, GRN_BULK_HEAD(buf), GRN_BULK_VSIZE(buf));
buf->u.b.curr = head + grn_bulk_margin_size + GRN_BULK_VSIZE(buf);
buf->u.b.head = head + grn_bulk_margin_size;
buf->u.b.tail = head + newsize;
buf->header.impl_flags |= GRN_OBJ_OUTPLACE;
}
}
return GRN_SUCCESS;
}
grn_rc
grn_bulk_reinit(grn_ctx *ctx, grn_obj *buf, unsigned int size)
{
GRN_BULK_REWIND(buf);
return grn_bulk_resize(ctx, buf, size);
}
grn_rc
grn_bulk_write(grn_ctx *ctx, grn_obj *buf, const char *str, unsigned int len)
{
grn_rc rc = GRN_SUCCESS;
char *curr;
if (GRN_BULK_REST(buf) < len) {
if ((rc = grn_bulk_resize(ctx, buf, GRN_BULK_VSIZE(buf) + len))) { return rc; }
}
curr = GRN_BULK_CURR(buf);
grn_memcpy(curr, str, len);
GRN_BULK_INCR_LEN(buf, len);
return rc;
}
grn_rc
grn_bulk_write_from(grn_ctx *ctx, grn_obj *bulk,
const char *str, unsigned int from, unsigned int len)
{
grn_rc rc = grn_bulk_truncate(ctx, bulk, from);
if (!rc) { rc = grn_bulk_write(ctx, bulk, str, len); }
return rc;
}
grn_rc
grn_bulk_reserve(grn_ctx *ctx, grn_obj *buf, unsigned int len)
{
grn_rc rc = GRN_SUCCESS;
if (GRN_BULK_REST(buf) < len) {
if ((rc = grn_bulk_resize(ctx, buf, GRN_BULK_VSIZE(buf) + len))) { return rc; }
}
return rc;
}
grn_rc
grn_bulk_space(grn_ctx *ctx, grn_obj *buf, unsigned int len)
{
grn_rc rc = grn_bulk_reserve(ctx, buf, len);
if (!rc) {
GRN_BULK_INCR_LEN(buf, len);
}
return rc;
}
static grn_rc
grn_bulk_space_clear(grn_ctx *ctx, grn_obj *buf, unsigned int len)
{
grn_rc rc = grn_bulk_reserve(ctx, buf, len);
if (!rc) {
memset(GRN_BULK_CURR(buf), 0, len);
GRN_BULK_INCR_LEN(buf, len);
}
return rc;
}
grn_rc
grn_bulk_truncate(grn_ctx *ctx, grn_obj *bulk, unsigned int len)
{
if (GRN_BULK_OUTP(bulk)) {
if ((bulk->u.b.tail - bulk->u.b.head) < len) {
return grn_bulk_space_clear(ctx, bulk, len);
} else {
bulk->u.b.curr = bulk->u.b.head + len;
}
} else {
if (GRN_BULK_BUFSIZE < len) {
return grn_bulk_space_clear(ctx, bulk, len);
} else {
bulk->header.flags &= ~GRN_BULK_BUFSIZE_MAX;
bulk->header.flags += len;
}
}
return GRN_SUCCESS;
}
grn_rc
grn_text_itoa(grn_ctx *ctx, grn_obj *buf, int i)
{
grn_rc rc = GRN_SUCCESS;
for (;;) {
char *curr = GRN_BULK_CURR(buf);
char *tail = GRN_BULK_TAIL(buf);
if (grn_itoa(i, curr, tail, &curr)) {
if ((rc = grn_bulk_resize(ctx, buf, GRN_BULK_WSIZE(buf) + UNIT_SIZE))) { return rc; }
} else {
GRN_BULK_SET_CURR(buf, curr);
break;
}
}
return rc;
}
grn_rc
grn_text_itoa_padded(grn_ctx *ctx, grn_obj *buf, int i, char ch, unsigned int len)
{
grn_rc rc = GRN_SUCCESS;
char *curr;
if ((rc = grn_bulk_reserve(ctx, buf, len))) { return rc; }
curr = GRN_BULK_CURR(buf);
if (!grn_itoa_padded(i, curr, curr + len, ch)) {
GRN_BULK_SET_CURR(buf, curr + len);
}
return rc;
}
grn_rc
grn_text_lltoa(grn_ctx *ctx, grn_obj *buf, long long int i)
{
grn_rc rc = GRN_SUCCESS;
for (;;) {
char *curr = GRN_BULK_CURR(buf);
char *tail = GRN_BULK_TAIL(buf);
if (grn_lltoa(i, curr, tail, &curr)) {
if ((rc = grn_bulk_resize(ctx, buf, GRN_BULK_WSIZE(buf) + UNIT_SIZE))) { return rc; }
} else {
GRN_BULK_SET_CURR(buf, curr);
break;
}
}
return rc;
}
grn_rc
grn_text_ulltoa(grn_ctx *ctx, grn_obj *buf, unsigned long long int i)
{
grn_rc rc = GRN_SUCCESS;
for (;;) {
char *curr = GRN_BULK_CURR(buf);
char *tail = GRN_BULK_TAIL(buf);
if (grn_ulltoa(i, curr, tail, &curr)) {
if ((rc = grn_bulk_resize(ctx, buf, GRN_BULK_WSIZE(buf) + UNIT_SIZE))) { return rc; }
} else {
GRN_BULK_SET_CURR(buf, curr);
break;
}
}
return rc;
}
inline static void
ftoa_(grn_ctx *ctx, grn_obj *buf, double d)
{
char *start;
size_t before_size;
size_t len;
#define DIGIT_NUMBER 15
#define FIRST_BUFFER_SIZE (DIGIT_NUMBER + 4)
before_size = GRN_BULK_VSIZE(buf);
grn_bulk_reserve(ctx, buf, FIRST_BUFFER_SIZE);
grn_text_printf(ctx, buf, "%#.*g", DIGIT_NUMBER, d);
len = GRN_BULK_VSIZE(buf) - before_size;
start = GRN_BULK_CURR(buf) - len;
#undef FIRST_BUFFER_SIZE
#undef DIGIT_NUMBER
if (start[len - 1] == '.') {
GRN_TEXT_PUTC(ctx, buf, '0');
} else {
char *p, *q;
start[len] = '\0';
if ((p = strchr(start, 'e'))) {
for (q = p; *(q - 2) != '.' && *(q - 1) == '0'; q--) { len--; }
grn_memmove(q, p, start + len - q);
} else {
for (q = start + len; *(q - 2) != '.' && *(q - 1) == '0'; q--) { len--; }
}
grn_bulk_truncate(ctx, buf, before_size + len);
}
}
grn_rc
grn_text_ftoa(grn_ctx *ctx, grn_obj *buf, double d)
{
grn_rc rc = GRN_SUCCESS;
if (GRN_BULK_REST(buf) < 32) {
if ((rc = grn_bulk_resize(ctx, buf, GRN_BULK_VSIZE(buf) + 32))) { return rc; }
}
#ifdef HAVE_FPCLASSIFY
switch (fpclassify(d)) {
case FP_NAN :
GRN_TEXT_PUTS(ctx, buf, "#<nan>");
break;
case FP_INFINITE :
GRN_TEXT_PUTS(ctx, buf, d > 0 ? "#i1/0" : "#i-1/0");
break;
default :
ftoa_(ctx, buf, d);
break;
}
#else /* HAVE_FPCLASSIFY */
if (d == d) {
if (d != 0 && ((d / 2.0) == d)) {
GRN_TEXT_PUTS(ctx, buf, d > 0 ? "#i1/0" : "#i-1/0");
} else {
ftoa_(ctx, buf, d);
}
} else {
GRN_TEXT_PUTS(ctx, buf, "#<nan>");
}
#endif /* HAVE_FPCLASSIFY */
return rc;
}
grn_rc
grn_text_itoh(grn_ctx *ctx, grn_obj *buf, int i, unsigned int len)
{
grn_rc rc = GRN_SUCCESS;
if (GRN_BULK_REST(buf) < len) {
if ((rc = grn_bulk_resize(ctx, buf, GRN_BULK_VSIZE(buf) + len))) { return rc; }
}
grn_itoh(i, GRN_BULK_CURR(buf), len);
GRN_BULK_INCR_LEN(buf, len);
return rc;
}
grn_rc
grn_text_itob(grn_ctx *ctx, grn_obj *buf, grn_id id)
{
size_t len = 5;
grn_rc rc = GRN_SUCCESS;
if (GRN_BULK_REST(buf) < len) {
if ((rc = grn_bulk_resize(ctx, buf, GRN_BULK_VSIZE(buf) + len))) { return rc; }
}
grn_itob(id, GRN_BULK_CURR(buf));
GRN_BULK_INCR_LEN(buf, len);
return rc;
}
grn_rc
grn_text_lltob32h(grn_ctx *ctx, grn_obj *buf, long long int i)
{
size_t len = 13;
grn_rc rc = GRN_SUCCESS;
if (GRN_BULK_REST(buf) < len) {
if ((rc = grn_bulk_resize(ctx, buf, GRN_BULK_VSIZE(buf) + len))) { return rc; }
}
grn_lltob32h(i, GRN_BULK_CURR(buf));
GRN_BULK_INCR_LEN(buf, len);
return rc;
}
grn_rc
grn_text_esc(grn_ctx *ctx, grn_obj *buf, const char *s, unsigned int len)
{
const char *e;
unsigned int l;
grn_rc rc = GRN_SUCCESS;
GRN_TEXT_PUTC(ctx, buf, '"');
for (e = s + len; s < e; s += l) {
if (!(l = grn_charlen(ctx, s, e))) { break; }
if (l == 1) {
switch (*s) {
case '"' :
grn_bulk_write(ctx, buf, "\\\"", 2);
break;
case '\\' :
grn_bulk_write(ctx, buf, "\\\\", 2);
break;
case '\b' :
grn_bulk_write(ctx, buf, "\\b", 2);
break;
case '\f' :
grn_bulk_write(ctx, buf, "\\f", 2);
break;
case '\n' :
grn_bulk_write(ctx, buf, "\\n", 2);
break;
case '\r' :
grn_bulk_write(ctx, buf, "\\r", 2);
break;
case '\t' :
grn_bulk_write(ctx, buf, "\\t", 2);
break;
case '\x00': case '\x01': case '\x02': case '\x03': case '\x04': case '\x05':
case '\x06': case '\x07': case '\x0b': case '\x0e': case '\x0f': case '\x10':
case '\x11': case '\x12': case '\x13': case '\x14': case '\x15': case '\x16':
case '\x17': case '\x18': case '\x19': case '\x1a': case '\x1b': case '\x1c':
case '\x1d': case '\x1e': case '\x1f': case '\x7f':
if (!(rc = grn_bulk_write(ctx, buf, "\\u", 2))) {
if ((rc = grn_text_itoh(ctx, buf, *s, 4))) {
GRN_BULK_INCR_LEN(buf, -2);
return rc;
}
} else {
return rc;
}
break;
default :
GRN_TEXT_PUTC(ctx, buf, *s);
}
} else if (l == 3) {
if (*s == '\xe2' && *(s + 1) == '\x80') {
switch (*(s + 2)) {
case '\xa8': /* \u2028 */
grn_bulk_write(ctx, buf, "\\u2028", 6);
break;
case '\xa9': /* \u2029 */
grn_bulk_write(ctx, buf, "\\u2029", 6);
break;
default:
grn_bulk_write(ctx, buf, s, l);
}
} else {
grn_bulk_write(ctx, buf, s, l);
}
} else {
grn_bulk_write(ctx, buf, s, l);
}
}
GRN_TEXT_PUTC(ctx, buf, '"');
return rc;
}
grn_rc
grn_text_escape_xml(grn_ctx *ctx, grn_obj *buf, const char *s, unsigned int len)
{
const char *e;
unsigned int l;
grn_rc rc = GRN_SUCCESS;
for (e = s + len; s < e; s += l) {
if (!(l = grn_charlen(ctx, s, e))) { break; }
if (l == 1) {
switch (*s) {
case '"' :
grn_bulk_write(ctx, buf, "&quot;", 6);
break;
case '<' :
grn_bulk_write(ctx, buf, "&lt;", 4);
break;
case '>' :
grn_bulk_write(ctx, buf, "&gt;", 4);
break;
case '&' :
grn_bulk_write(ctx, buf, "&amp;", 5);
break;
default :
GRN_TEXT_PUTC(ctx, buf, *s);
}
} else {
grn_bulk_write(ctx, buf, s, l);
}
}
return rc;
}
#define TOK_ESC (0x80)
const char *
grn_text_unesc_tok(grn_ctx *ctx, grn_obj *buf, const char *s, const char *e, char *tok_type)
{
const char *p;
unsigned int len;
uint8_t stat = GRN_TOK_VOID;
for (p = s; p < e; p += len) {
if (!(len = grn_charlen(ctx, p, e))) {
p = e;
stat &= ~TOK_ESC;
goto exit;
}
switch (stat) {
case GRN_TOK_VOID :
if (*p == ' ') { continue; }
switch (*p) {
case '"' :
stat = GRN_TOK_STRING;
break;
case '\'' :
stat = GRN_TOK_QUOTE;
break;
case ')' :
case '(' :
GRN_TEXT_PUT(ctx, buf, p, len);
p += len;
stat = GRN_TOK_SYMBOL;
goto exit;
case '\\' :
stat = GRN_TOK_SYMBOL|TOK_ESC;
break;
default :
stat = GRN_TOK_SYMBOL;
GRN_TEXT_PUT(ctx, buf, p, len);
break;
}
break;
case GRN_TOK_SYMBOL :
if (*p == ' ') { goto exit; }
switch (*p) {
case '\'' :
case '"' :
case ')' :
case '(' :
goto exit;
case '\\' :
stat |= TOK_ESC;
break;
default :
GRN_TEXT_PUT(ctx, buf, p, len);
break;
}
break;
case GRN_TOK_STRING :
switch (*p) {
case '"' :
p += len;
goto exit;
case '\\' :
stat |= TOK_ESC;
break;
default :
GRN_TEXT_PUT(ctx, buf, p, len);
break;
}
break;
case GRN_TOK_QUOTE :
switch (*p) {
case '\'' :
p += len;
goto exit;
case '\\' :
stat |= TOK_ESC;
break;
default :
GRN_TEXT_PUT(ctx, buf, p, len);
break;
}
break;
case GRN_TOK_SYMBOL|TOK_ESC :
case GRN_TOK_STRING|TOK_ESC :
case GRN_TOK_QUOTE|TOK_ESC :
switch (*p) {
case 'b' :
GRN_TEXT_PUTC(ctx, buf, '\b');
break;
case 'f' :
GRN_TEXT_PUTC(ctx, buf, '\f');
break;
case 'n' :
GRN_TEXT_PUTC(ctx, buf, '\n');
break;
case 'r' :
GRN_TEXT_PUTC(ctx, buf, '\r');
break;
case 't' :
GRN_TEXT_PUTC(ctx, buf, '\t');
break;
default :
GRN_TEXT_PUT(ctx, buf, p, len);
break;
}
stat &= ~TOK_ESC;
break;
}
}
exit :
*tok_type = stat;
return p;
}
grn_rc
grn_text_benc(grn_ctx *ctx, grn_obj *buf, unsigned int v)
{
grn_rc rc = GRN_SUCCESS;
uint8_t *p;
if (GRN_BULK_REST(buf) < 5) {
if ((rc = grn_bulk_resize(ctx, buf, GRN_BULK_VSIZE(buf) + 5))) { return rc; }
}
p = (uint8_t *)GRN_BULK_CURR(buf);
GRN_B_ENC(v, p);
GRN_BULK_SET_CURR(buf, (char *)p);
return rc;
}
/* 0x00 - 0x7f */
static const int_least8_t urlenc_tbl[] = {
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1
};
grn_rc
grn_text_urlenc(grn_ctx *ctx, grn_obj *buf, const char *s, unsigned int len)
{
const char *e, c = '%';
for (e = s + len; s < e; s++) {
if ((signed char)*s < 0 || urlenc_tbl[(int)*s]) {
if (!grn_bulk_write(ctx, buf, &c, 1)) {
if (grn_text_itoh(ctx, buf, *s, 2)) {
GRN_BULK_INCR_LEN(buf, -1);
}
}
} else {
GRN_TEXT_PUTC(ctx, buf, *s);
}
}
return GRN_SUCCESS;
}
static const char *weekdays[7] = {"Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"};
static const char *months[12] = {
"Jan", "Feb", "Mar", "Apr", "May", "Jun",
"Jul", "Aug", "Sep", "Oct", "Nov", "Dec"};
grn_rc
grn_text_time2rfc1123(grn_ctx *ctx, grn_obj *bulk, int sec)
{
time_t tsec;
struct tm *t;
#ifdef HAVE__GMTIME64_S
struct tm tm;
tsec = (time_t)sec;
t = (gmtime_s(&tm, &tsec) == 0) ? &tm : NULL;
#else /* HAVE__GMTIME64_S */
# ifdef HAVE_GMTIME_R
struct tm tm;
tsec = (time_t)sec;
t = gmtime_r(&tsec, &tm);
# else /* HAVE_GMTIME_R */
tsec = (time_t)sec;
t = gmtime(&tsec);
# endif /* HAVE_GMTIME_R */
#endif /* HAVE__GMTIME64_S */
if (t) {
GRN_TEXT_SET(ctx, bulk, weekdays[t->tm_wday], 3);
GRN_TEXT_PUTS(ctx, bulk, ", ");
grn_text_itoa_padded(ctx, bulk, t->tm_mday, '0', 2);
GRN_TEXT_PUTS(ctx, bulk, " ");
GRN_TEXT_PUT(ctx, bulk, months[t->tm_mon], 3);
GRN_TEXT_PUTS(ctx, bulk, " ");
grn_text_itoa(ctx, bulk, t->tm_year + 1900);
GRN_TEXT_PUTS(ctx, bulk, " ");
grn_text_itoa_padded(ctx, bulk, t->tm_hour, '0', 2);
GRN_TEXT_PUTS(ctx, bulk, ":");
grn_text_itoa_padded(ctx, bulk, t->tm_min, '0', 2);
GRN_TEXT_PUTS(ctx, bulk, ":");
grn_text_itoa_padded(ctx, bulk, t->tm_sec, '0', 2);
GRN_TEXT_PUTS(ctx, bulk, " GMT");
} else {
GRN_TEXT_SETS(ctx, bulk, "Mon, 16 Mar 1980 20:40:00 GMT");
}
return GRN_SUCCESS;
}
grn_rc
grn_text_printf(grn_ctx *ctx, grn_obj *bulk, const char *format, ...)
{
va_list args;
va_start(args, format);
grn_text_vprintf(ctx, bulk, format, args);
va_end(args);
return GRN_SUCCESS;
}
grn_rc
grn_text_vprintf(grn_ctx *ctx, grn_obj *bulk, const char *format, va_list args)
{
grn_bool is_written = GRN_FALSE;
int written_size;
{
int rest_size;
va_list copied_args;
rest_size = GRN_BULK_REST(bulk);
va_copy(copied_args, args);
written_size = vsnprintf(GRN_BULK_CURR(bulk), rest_size,
format, copied_args);
va_end(copied_args);
if (written_size < rest_size) {
is_written = GRN_TRUE;
}
}
#ifdef WIN32
if (written_size == -1 && errno == ERANGE) {
# define N_NEW_SIZES 3
int i;
int new_sizes[N_NEW_SIZES];
new_sizes[0] = GRN_BULK_REST(bulk) + strlen(format) * 2;
new_sizes[1] = new_sizes[0] + 4096;
new_sizes[2] = new_sizes[0] + 65536;
for (i = 0; i < N_NEW_SIZES; i++) {
grn_rc rc;
int new_size = new_sizes[i];
va_list copied_args;
rc = grn_bulk_reserve(ctx, bulk, GRN_BULK_VSIZE(bulk) + new_size);
if (rc) {
return rc;
}
va_copy(copied_args, args);
written_size = vsnprintf(GRN_BULK_CURR(bulk), new_size,
format, copied_args);
va_end(copied_args);
if (written_size != -1) {
break;
}
}
# undef N_NEW_SIZES
}
#else /* WIN32 */
if (!is_written) {
grn_rc rc;
int required_size = written_size + 1; /* "+ 1" for terminate '\0'. */
rc = grn_bulk_reserve(ctx, bulk, GRN_BULK_VSIZE(bulk) + required_size);
if (rc) {
return rc;
}
written_size = vsnprintf(GRN_BULK_CURR(bulk), required_size,
format, args);
}
#endif /* WIN32 */
if (written_size < 0) {
return GRN_INVALID_ARGUMENT;
}
GRN_BULK_INCR_LEN(bulk, written_size);
return GRN_SUCCESS;
}
grn_rc
grn_bulk_fin(grn_ctx *ctx, grn_obj *buf)
{
if (!(buf->header.impl_flags & GRN_OBJ_REFER)) {
if (GRN_BULK_OUTP(buf) && buf->u.b.head) {
GRN_REALLOC(buf->u.b.head - grn_bulk_margin_size, 0);
}
}
buf->header.flags = 0;
buf->header.impl_flags &= ~GRN_OBJ_DO_SHALLOW_COPY;
buf->u.b.head = NULL;
buf->u.b.curr = NULL;
buf->u.b.tail = NULL;
return GRN_SUCCESS;
}
grn_rc
grn_substring(grn_ctx *ctx, char **str, char **str_end, int start, int end, grn_encoding encoding)
{
int i;
size_t l;
char *s = *str, *e = *str_end;
for (i = 0; s < e; i++, s += l) {
if (i == start) { *str = s; }
if (!(l = grn_charlen(ctx, s, e))) {
return GRN_INVALID_ARGUMENT;
}
if (i == end) {
*str_end = s;
break;
}
}
return GRN_SUCCESS;
}
static void
grn_text_atoj(grn_ctx *ctx, grn_obj *bulk, grn_obj *obj, grn_id id)
{
uint32_t vs;
grn_obj buf;
if (obj->header.type == GRN_ACCESSOR) {
grn_accessor *a = (grn_accessor *)obj;
GRN_TEXT_INIT(&buf, 0);
for (;;) {
GRN_BULK_REWIND(&buf);
switch (a->action) {
case GRN_ACCESSOR_GET_ID :
GRN_UINT32_PUT(ctx, &buf, id);
buf.header.domain = GRN_DB_UINT32;
break;
case GRN_ACCESSOR_GET_KEY :
grn_table_get_key2(ctx, a->obj, id, &buf);
buf.header.domain = DB_OBJ(a->obj)->header.domain;
break;
case GRN_ACCESSOR_GET_VALUE :
grn_obj_get_value(ctx, a->obj, id, &buf);
buf.header.domain = GRN_DB_INT32; /* fix me */
break;
case GRN_ACCESSOR_GET_SCORE :
{
grn_rset_recinfo *ri = (grn_rset_recinfo *)grn_obj_get_value_(ctx, a->obj, id, &vs);
int32_t int32_score = ri->score;
GRN_INT32_PUT(ctx, &buf, int32_score);
}
buf.header.domain = GRN_DB_INT32;
break;
case GRN_ACCESSOR_GET_NSUBRECS :
{
grn_rset_recinfo *ri = (grn_rset_recinfo *)grn_obj_get_value_(ctx, a->obj, id, &vs);
GRN_INT32_PUT(ctx, &buf, ri->n_subrecs);
}
buf.header.domain = GRN_DB_INT32;
break;
case GRN_ACCESSOR_GET_COLUMN_VALUE :
if ((a->obj->header.flags & GRN_OBJ_COLUMN_TYPE_MASK) == GRN_OBJ_COLUMN_VECTOR) {
if (a->next) {
grn_id *idp;
grn_obj_get_value(ctx, a->obj, id, &buf);
idp = (grn_id *)GRN_BULK_HEAD(&buf);
GRN_TEXT_PUTC(ctx, bulk, '[');
for (vs = GRN_BULK_VSIZE(&buf) / sizeof(grn_id); vs--; idp++) {
grn_text_atoj(ctx, bulk, (grn_obj *)a->next, *idp);
if (vs) { GRN_TEXT_PUTC(ctx, bulk, ','); }
}
GRN_TEXT_PUTC(ctx, bulk, ']');
} else {
grn_text_atoj(ctx, bulk, a->obj, id);
}
goto exit;
} else {
grn_obj_get_value(ctx, a->obj, id, &buf);
}
break;
case GRN_ACCESSOR_GET_DB_OBJ :
/* todo */
break;
case GRN_ACCESSOR_LOOKUP :
/* todo */
break;
case GRN_ACCESSOR_FUNCALL :
/* todo */
break;
}
if (a->next) {
a = a->next;
id = *((grn_id *)GRN_BULK_HEAD(&buf));
} else {
break;
}
}
} else {
switch (obj->header.type) {
case GRN_COLUMN_FIX_SIZE :
GRN_VALUE_FIX_SIZE_INIT(&buf, 0, DB_OBJ(obj)->range);
break;
case GRN_COLUMN_VAR_SIZE :
if ((obj->header.flags & GRN_OBJ_COLUMN_TYPE_MASK) == GRN_OBJ_COLUMN_VECTOR) {
grn_obj *range = grn_ctx_at(ctx, DB_OBJ(obj)->range);
if (range->header.flags & GRN_OBJ_KEY_VAR_SIZE) {
GRN_VALUE_VAR_SIZE_INIT(&buf, GRN_OBJ_VECTOR, DB_OBJ(obj)->range);
} else {
GRN_VALUE_FIX_SIZE_INIT(&buf, GRN_OBJ_VECTOR, DB_OBJ(obj)->range);
}
} else {
GRN_VALUE_VAR_SIZE_INIT(&buf, 0, DB_OBJ(obj)->range);
}
break;
case GRN_COLUMN_INDEX :
GRN_UINT32_INIT(&buf, 0);
break;
default:
GRN_TEXT_INIT(&buf, 0);
break;
}
grn_obj_get_value(ctx, obj, id, &buf);
}
grn_text_otoj(ctx, bulk, &buf, NULL);
exit :
grn_obj_close(ctx, &buf);
}
grn_rc
grn_text_otoj(grn_ctx *ctx, grn_obj *bulk, grn_obj *obj, grn_obj_format *format)
{
grn_obj buf;
GRN_TEXT_INIT(&buf, 0);
switch (obj->header.type) {
case GRN_BULK :
switch (obj->header.domain) {
case GRN_DB_VOID :
case GRN_DB_SHORT_TEXT :
case GRN_DB_TEXT :
case GRN_DB_LONG_TEXT :
grn_text_esc(ctx, bulk, GRN_BULK_HEAD(obj), GRN_BULK_VSIZE(obj));
break;
case GRN_DB_BOOL :
if (*((unsigned char *)GRN_BULK_HEAD(obj))) {
GRN_TEXT_PUTS(ctx, bulk, "true");
} else {
GRN_TEXT_PUTS(ctx, bulk, "false");
}
break;
case GRN_DB_INT8 :
grn_text_itoa(ctx, bulk, GRN_BULK_VSIZE(obj) ? GRN_INT8_VALUE(obj) : 0);
break;
case GRN_DB_UINT8 :
grn_text_lltoa(ctx, bulk, GRN_BULK_VSIZE(obj) ? GRN_UINT8_VALUE(obj) : 0);
break;
case GRN_DB_INT16 :
grn_text_itoa(ctx, bulk, GRN_BULK_VSIZE(obj) ? GRN_INT16_VALUE(obj) : 0);
break;
case GRN_DB_UINT16 :
grn_text_lltoa(ctx, bulk, GRN_BULK_VSIZE(obj) ? GRN_UINT16_VALUE(obj) : 0);
break;
case GRN_DB_INT32 :
grn_text_itoa(ctx, bulk, GRN_BULK_VSIZE(obj) ? GRN_INT32_VALUE(obj) : 0);
break;
case GRN_DB_UINT32 :
grn_text_lltoa(ctx, bulk, GRN_BULK_VSIZE(obj) ? GRN_UINT32_VALUE(obj) : 0);
break;
case GRN_DB_INT64 :
grn_text_lltoa(ctx, bulk, GRN_BULK_VSIZE(obj) ? GRN_INT64_VALUE(obj) : 0);
break;
case GRN_DB_UINT64 :
grn_text_ulltoa(ctx, bulk, GRN_BULK_VSIZE(obj) ? GRN_UINT64_VALUE(obj) : 0);
break;
case GRN_DB_FLOAT :
grn_text_ftoa(ctx, bulk, GRN_BULK_VSIZE(obj) ? GRN_FLOAT_VALUE(obj) : 0);
break;
case GRN_DB_TIME :
{
double dv = *((int64_t *)GRN_BULK_HEAD(obj));
dv /= 1000000.0;
grn_text_ftoa(ctx, bulk, dv);
}
break;
case GRN_DB_TOKYO_GEO_POINT :
case GRN_DB_WGS84_GEO_POINT :
if (GRN_BULK_VSIZE(obj) == sizeof(grn_geo_point)) {
grn_geo_point *gp = (grn_geo_point *)GRN_BULK_HEAD(obj);
GRN_TEXT_PUTC(ctx, bulk, '"');
grn_text_itoa(ctx, bulk, gp->latitude);
GRN_TEXT_PUTC(ctx, bulk, 'x');
grn_text_itoa(ctx, bulk, gp->longitude);
GRN_TEXT_PUTC(ctx, bulk, '"');
} else {
GRN_TEXT_PUTS(ctx, bulk, "\"\"");
}
break;
default :
if (format) {
int j;
int ncolumns = GRN_BULK_VSIZE(&format->columns)/sizeof(grn_obj *);
grn_id id = GRN_RECORD_VALUE(obj);
grn_obj **columns = (grn_obj **)GRN_BULK_HEAD(&format->columns);
if (format->flags & GRN_OBJ_FORMAT_WITH_COLUMN_NAMES) {
GRN_TEXT_PUTS(ctx, bulk, "[");
for (j = 0; j < ncolumns; j++) {
grn_id range_id;
if (j) { GRN_TEXT_PUTC(ctx, bulk, ','); }
GRN_TEXT_PUTS(ctx, bulk, "[");
GRN_BULK_REWIND(&buf);
grn_column_name_(ctx, columns[j], &buf);
grn_text_otoj(ctx, bulk, &buf, NULL);
GRN_TEXT_PUTC(ctx, bulk, ',');
/* column range */
range_id = grn_obj_get_range(ctx, columns[j]);
if (range_id == GRN_ID_NIL) {
GRN_TEXT_PUTS(ctx, bulk, "null");
} else {
int name_len;
grn_obj *range_obj;
char name_buf[GRN_TABLE_MAX_KEY_SIZE];
range_obj = grn_ctx_at(ctx, range_id);
name_len = grn_obj_name(ctx, range_obj, name_buf,
GRN_TABLE_MAX_KEY_SIZE);
GRN_BULK_REWIND(&buf);
GRN_TEXT_PUT(ctx, &buf, name_buf, name_len);
grn_text_otoj(ctx, bulk, &buf, NULL);
}
GRN_TEXT_PUTS(ctx, bulk, "]");
}
GRN_TEXT_PUTS(ctx, bulk, "],");
}
GRN_TEXT_PUTC(ctx, bulk, '[');
for (j = 0; j < ncolumns; j++) {
if (j) { GRN_TEXT_PUTC(ctx, bulk, ','); }
grn_text_atoj(ctx, bulk, columns[j], id);
}
GRN_TEXT_PUTC(ctx, bulk, ']');
} else {
if (GRN_BULK_VSIZE(obj) == 0) {
GRN_TEXT_PUTS(ctx, bulk, "null");
} else {
grn_obj *table = grn_ctx_at(ctx, obj->header.domain);
grn_id id = GRN_RECORD_VALUE(obj);
if (table && table->header.type != GRN_TABLE_NO_KEY) {
/* todo : temporal patch. grn_table_at() is kinda costful... */
if (grn_table_at(ctx, table, id)) {
grn_obj *accessor = grn_obj_column(ctx, table,
GRN_COLUMN_NAME_KEY,
GRN_COLUMN_NAME_KEY_LEN);
if (accessor) {
grn_obj_get_value(ctx, accessor, id, &buf);
grn_obj_unlink(ctx, accessor);
}
}
grn_text_otoj(ctx, bulk, &buf, format);
} else {
grn_text_lltoa(ctx, bulk, id);
}
}
}
}
break;
case GRN_UVECTOR :
if (format) {
int i, j;
grn_id *v = (grn_id *)GRN_BULK_HEAD(obj), *ve = (grn_id *)GRN_BULK_CURR(obj);
int ncolumns = GRN_BULK_VSIZE(&format->columns) / sizeof(grn_obj *);
grn_obj **columns = (grn_obj **)GRN_BULK_HEAD(&format->columns);
GRN_TEXT_PUTS(ctx, bulk, "[[");
grn_text_itoa(ctx, bulk, ve - v);
GRN_TEXT_PUTC(ctx, bulk, ']');
if (v < ve) {
if (format->flags & GRN_OBJ_FORMAT_WITH_COLUMN_NAMES) {
GRN_TEXT_PUTS(ctx, bulk, ",[");
for (j = 0; j < ncolumns; j++) {
grn_id range_id;
if (j) { GRN_TEXT_PUTC(ctx, bulk, ','); }
GRN_TEXT_PUTS(ctx, bulk, "[");
GRN_BULK_REWIND(&buf);
grn_column_name_(ctx, columns[j], &buf);
grn_text_otoj(ctx, bulk, &buf, NULL);
GRN_TEXT_PUTC(ctx, bulk, ',');
/* column range */
range_id = grn_obj_get_range(ctx, columns[j]);
if (range_id == GRN_ID_NIL) {
GRN_TEXT_PUTS(ctx, bulk, "null");
} else {
int name_len;
grn_obj *range_obj;
char name_buf[GRN_TABLE_MAX_KEY_SIZE];
range_obj = grn_ctx_at(ctx, range_id);
name_len = grn_obj_name(ctx, range_obj, name_buf,
GRN_TABLE_MAX_KEY_SIZE);
GRN_BULK_REWIND(&buf);
GRN_TEXT_PUT(ctx, &buf, name_buf, name_len);
grn_text_otoj(ctx, bulk, &buf, NULL);
}
GRN_TEXT_PUTS(ctx, bulk, "]");
}
GRN_TEXT_PUTC(ctx, bulk, ']');
}
for (i = 0;; i++) {
GRN_TEXT_PUTS(ctx, bulk, ",[");
for (j = 0; j < ncolumns; j++) {
if (j) { GRN_TEXT_PUTC(ctx, bulk, ','); }
GRN_BULK_REWIND(&buf);
grn_obj_get_value(ctx, columns[j], *v, &buf);
grn_text_otoj(ctx, bulk, &buf, NULL);
}
GRN_TEXT_PUTC(ctx, bulk, ']');
v++;
if (v < ve) {
GRN_TEXT_PUTC(ctx, bulk, ',');
} else {
break;
}
}
}
GRN_TEXT_PUTC(ctx, bulk, ']');
} else {
grn_obj *range = grn_ctx_at(ctx, obj->header.domain);
if (range && range->header.type == GRN_TYPE) {
grn_id value_size = ((struct _grn_type *)range)->obj.range;
char *v = (char *)GRN_BULK_HEAD(obj),
*ve = (char *)GRN_BULK_CURR(obj);
GRN_TEXT_PUTC(ctx, bulk, '[');
if (v < ve) {
for (;;) {
grn_obj value;
GRN_OBJ_INIT(&value, GRN_BULK, 0, obj->header.domain);
grn_bulk_write_from(ctx, &value, v, 0, value_size);
grn_text_otoj(ctx, bulk, &value, NULL);
v += value_size;
if (v < ve) {
GRN_TEXT_PUTC(ctx, bulk, ',');
} else {
break;
}
}
}
GRN_TEXT_PUTC(ctx, bulk, ']');
} else {
grn_id *v = (grn_id *)GRN_BULK_HEAD(obj),
*ve = (grn_id *)GRN_BULK_CURR(obj);
GRN_TEXT_PUTC(ctx, bulk, '[');
if (v < ve) {
for (;;) {
if (range->header.type != GRN_TABLE_NO_KEY) {
grn_obj key;
GRN_OBJ_INIT(&key, GRN_BULK, 0, range->header.domain);
grn_table_get_key2(ctx, range, *v, &key);
grn_text_otoj(ctx, bulk, &key, NULL);
GRN_OBJ_FIN(ctx, &key);
} else {
grn_text_lltoa(ctx, bulk, *v);
}
v++;
if (v < ve) {
GRN_TEXT_PUTC(ctx, bulk, ',');
} else {
break;
}
}
}
GRN_TEXT_PUTC(ctx, bulk, ']');
}
}
break;
case GRN_VECTOR :
if (obj->header.domain == GRN_DB_VOID) {
ERR(GRN_INVALID_ARGUMENT, "invalid obj->header.domain");
} else {
unsigned int i, n;
grn_obj value;
grn_obj weight;
grn_bool with_weight;
GRN_VOID_INIT(&value);
GRN_UINT32_INIT(&weight, 0);
with_weight = (format && format->flags & GRN_OBJ_FORMAT_WITH_WEIGHT);
n = grn_vector_size(ctx, obj);
GRN_TEXT_PUTC(ctx, bulk, '[');
for (i = 0; i < n; i++) {
const char *_value;
unsigned int _weight, length;
grn_id domain;
if (i) { GRN_TEXT_PUTC(ctx, bulk, ','); }
length = grn_vector_get_element(ctx, obj, i,
&_value, &_weight, &domain);
if (domain != GRN_DB_VOID) {
grn_obj_reinit(ctx, &value, domain, 0);
} else {
grn_obj_reinit(ctx, &value, obj->header.domain, 0);
}
if (with_weight) {
GRN_TEXT_PUTC(ctx, bulk, '{');
}
grn_bulk_write(ctx, &value, _value, length);
grn_text_otoj(ctx, bulk, &value, NULL);
if (with_weight) {
GRN_TEXT_PUTC(ctx, bulk, ':');
GRN_UINT32_SET(ctx, &weight, _weight);
grn_text_otoj(ctx, bulk, &weight, NULL);
GRN_TEXT_PUTC(ctx, bulk, '}');
}
}
GRN_TEXT_PUTC(ctx, bulk, ']');
GRN_OBJ_FIN(ctx, &value);
GRN_OBJ_FIN(ctx, &weight);
}
break;
case GRN_PVECTOR :
if (format) {
ERR(GRN_FUNCTION_NOT_IMPLEMENTED,
"cannot print GRN_PVECTOR using grn_obj_format");
} else {
unsigned int i, n;
GRN_TEXT_PUTC(ctx, bulk, '[');
n = GRN_BULK_VSIZE(obj) / sizeof(grn_obj *);
for (i = 0; i < n; i++) {
grn_obj *value;
if (i) { GRN_TEXT_PUTC(ctx, bulk, ','); }
value = GRN_PTR_VALUE_AT(obj, i);
grn_text_otoj(ctx, bulk, value, NULL);
}
GRN_TEXT_PUTC(ctx, bulk, ']');
}
break;
case GRN_TABLE_HASH_KEY :
case GRN_TABLE_PAT_KEY :
case GRN_TABLE_NO_KEY :
if (format) {
int i, j;
int ncolumns = GRN_BULK_VSIZE(&format->columns)/sizeof(grn_obj *);
grn_obj **columns = (grn_obj **)GRN_BULK_HEAD(&format->columns);
grn_table_cursor *tc = grn_table_cursor_open(ctx, obj, NULL, 0, NULL, 0,
format->offset, format->limit,
GRN_CURSOR_ASCENDING);
if (!tc) { ERRCLR(ctx); }
GRN_TEXT_PUTS(ctx, bulk, "[[");
grn_text_itoa(ctx, bulk, format->nhits);
GRN_TEXT_PUTC(ctx, bulk, ']');
if (format->flags & GRN_OBJ_FORMAT_WITH_COLUMN_NAMES) {
GRN_TEXT_PUTS(ctx, bulk, ",[");
for (j = 0; j < ncolumns; j++) {
grn_id range_id;
if (j) { GRN_TEXT_PUTC(ctx, bulk, ','); }
GRN_TEXT_PUTS(ctx, bulk, "[");
GRN_BULK_REWIND(&buf);
grn_column_name_(ctx, columns[j], &buf);
grn_text_otoj(ctx, bulk, &buf, NULL);
GRN_TEXT_PUTC(ctx, bulk, ',');
/* column range */
range_id = grn_obj_get_range(ctx, columns[j]);
if (range_id == GRN_ID_NIL) {
GRN_TEXT_PUTS(ctx, bulk, "null");
} else {
int name_len;
grn_obj *range_obj;
char name_buf[GRN_TABLE_MAX_KEY_SIZE];
range_obj = grn_ctx_at(ctx, range_id);
name_len = grn_obj_name(ctx, range_obj, name_buf,
GRN_TABLE_MAX_KEY_SIZE);
GRN_BULK_REWIND(&buf);
GRN_TEXT_PUT(ctx, &buf, name_buf, name_len);
grn_text_otoj(ctx, bulk, &buf, NULL);
}
GRN_TEXT_PUTS(ctx, bulk, "]");
}
GRN_TEXT_PUTC(ctx, bulk, ']');
}
if (tc) {
grn_id id;
for (i = 0; (id = grn_table_cursor_next(ctx, tc)) != GRN_ID_NIL; i++) {
GRN_TEXT_PUTS(ctx, bulk, ",[");
for (j = 0; j < ncolumns; j++) {
if (j) { GRN_TEXT_PUTC(ctx, bulk, ','); }
grn_text_atoj(ctx, bulk, columns[j], id);
}
GRN_TEXT_PUTC(ctx, bulk, ']');
}
grn_table_cursor_close(ctx, tc);
}
GRN_TEXT_PUTC(ctx, bulk, ']');
} else {
int i;
grn_id id;
grn_obj *column = grn_obj_column(ctx, obj,
GRN_COLUMN_NAME_KEY,
GRN_COLUMN_NAME_KEY_LEN);
grn_table_cursor *tc = grn_table_cursor_open(ctx, obj, NULL, 0, NULL, 0,
0, -1, GRN_CURSOR_ASCENDING);
GRN_TEXT_PUTC(ctx, bulk, '[');
if (tc) {
for (i = 0; (id = grn_table_cursor_next(ctx, tc)) != GRN_ID_NIL; i++) {
if (i) { GRN_TEXT_PUTC(ctx, bulk, ','); }
GRN_BULK_REWIND(&buf);
grn_obj_get_value(ctx, column, id, &buf);
grn_text_esc(ctx, bulk, GRN_BULK_HEAD(&buf), GRN_BULK_VSIZE(&buf));
}
grn_table_cursor_close(ctx, tc);
}
GRN_TEXT_PUTC(ctx, bulk, ']');
grn_obj_unlink(ctx, column);
}
break;
}
grn_obj_close(ctx, &buf);
return GRN_SUCCESS;
}
const char *
grn_text_urldec(grn_ctx *ctx, grn_obj *buf, const char *p, const char *e, char d)
{
while (p < e) {
if (*p == d) {
p++; break;
} else if (*p == '%' && p + 3 <= e) {
const char *r;
unsigned int c = grn_htoui(p + 1, p + 3, &r);
if (p + 3 == r) {
GRN_TEXT_PUTC(ctx, buf, c);
p += 3;
} else {
GRN_LOG(ctx, GRN_LOG_NOTICE, "invalid %% sequence (%c%c)", p[1], p[2]);
GRN_TEXT_PUTC(ctx, buf, '%');
p += 1;
}
} else {
GRN_TEXT_PUTC(ctx, buf, *p);
p++;
}
}
return p;
}
const char *
grn_text_cgidec(grn_ctx *ctx, grn_obj *buf, const char *p, const char *e,
const char *delimiters)
{
while (p < e) {
grn_bool found_delimiter = GRN_FALSE;
const char *delimiter;
for (delimiter = delimiters; *delimiter; delimiter++) {
if (*p == *delimiter) {
found_delimiter = GRN_TRUE;
break;
}
}
if (found_delimiter) {
p++;
break;
}
if (*p == '+') {
GRN_TEXT_PUTC(ctx, buf, ' ');
p++;
} else if (*p == '%' && p + 3 <= e) {
const char *r;
unsigned int c = grn_htoui(p + 1, p + 3, &r);
if (p + 3 == r) {
GRN_TEXT_PUTC(ctx, buf, c);
p += 3;
} else {
GRN_LOG(ctx, GRN_LOG_NOTICE, "invalid %% sequence (%c%c)", p[1], p[2]);
GRN_TEXT_PUTC(ctx, buf, '%');
p += 1;
}
} else {
GRN_TEXT_PUTC(ctx, buf, *p);
p++;
}
}
return p;
}
void
grn_str_url_path_normalize(grn_ctx *ctx, const char *path, size_t path_len,
char *buf, size_t buf_len)
{
char *b = buf, *be = buf + buf_len - 1;
const char *p = path, *pe = path + path_len, *pc;
if (buf_len < 2) { return; }
while (p < pe) {
for (pc = p; pc < pe && *pc != '/'; pc++) {}
if (*p == '.') {
if (pc == p + 2 && *(p + 1) == '.') {
/* '..' */
if (b - buf >= 2) {
for (b -= 2; *b != '/' && b >= buf; b--) {}
}
if (*b == '/') {
b++;
ERR(GRN_INVALID_ARGUMENT, "parent path doesn't exist.");
}
p = pc + 1;
continue;
} else if (pc == p + 1) {
/* '.' */
p = pc + 1;
continue;
}
}
if (be - b >= pc - p) {
grn_memcpy(b, p, (pc - p));
b += pc - p;
p = pc;
if (p < pe && *pc == '/' && be > b) {
*b++ = '/';
p++;
}
}
}
*b = '\0';
}
grn_rc
grn_text_fgets(grn_ctx *ctx, grn_obj *buf, FILE *fp)
{
size_t len;
grn_rc rc = GRN_END_OF_DATA;
for (;;) {
grn_bulk_reserve(ctx, buf, BUFSIZ);
if (!fgets(GRN_BULK_CURR(buf), BUFSIZ, fp)) { break; }
if (!(len = strlen(GRN_BULK_CURR(buf)))) { break; }
GRN_BULK_INCR_LEN(buf, len);
rc = GRN_SUCCESS;
if (GRN_BULK_CURR(buf)[-1] == '\n') { break; }
}
return rc;
}
grn_bool
grn_bulk_is_zero(grn_ctx *ctx, grn_obj *obj)
{
const char *v = GRN_BULK_HEAD(obj);
unsigned int s = GRN_BULK_VSIZE(obj);
for (; s; s--, v++) {
if (*v) { return GRN_FALSE; }
}
return GRN_TRUE;
}