mariadb/storage/mroonga/vendor/groonga/lib/load.c
Monty 0ccdf54b64 Check and remove high stack usage
I checked all stack overflow potential problems found with
gcc -Wstack-usage=16384
and
clang -Wframe-larger-than=16384 -no-inline

Fixes:
Added '#pragma clang diagnostic ignored "-Wframe-larger-than="'
  to a lot of function to where stack usage large but resonable.
- Added stack check warnings to BUILD scrips when using clang and debug.

Function changed to use malloc instead allocating things on stack:
- read_bootstrap_query() now allocates line_buffer (20000 bytes) with
  malloc() instead of using stack. This has a small performance impact
  but this is not releant for bootstrap.
- mroonga grn_select() used 65856 bytes on stack. Changed it to use
  malloc().
- Wsrep_schema::replay_transaction() and
  Wsrep_schema::recover_sr_transactions().
- Connect zipOpen3()

Not fixed:
- mroonga/vendor/groonga/lib/expr.c grn_proc_call() uses
  43712 byte on stack.  However this is not easy to fix as the stack
  used is caused by a lot of code generated by defines.
- Most changes in mroonga/groonga where only adding of pragmas to disable
  stack warnings.
- rocksdb/options/options_helper.cc uses 20288 of stack space.
  (no reason to fix except to get rid of the compiler warning)
- Causes using alloca() where the allocation size is resonable.
- An issue in libmariadb (reported to connectors).
2024-04-23 14:12:31 +03:00

1235 lines
40 KiB
C

/* -*- c-basic-offset: 2 -*- */
/*
Copyright(C) 2009-2017 Brazil
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License version 2.1 as published by the Free Software Foundation.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
*/
#include "grn_load.h"
#include "grn_ctx_impl.h"
#include "grn_db.h"
#include "grn_util.h"
#include <my_attribute.h>
PRAGMA_DISABLE_CHECK_STACK_FRAME
static void
grn_loader_save_error(grn_ctx *ctx, grn_loader *loader)
{
loader->rc = ctx->rc;
grn_strcpy(loader->errbuf, GRN_CTX_MSGSIZE, ctx->errbuf);
}
static grn_obj *
values_add(grn_ctx *ctx, grn_loader *loader)
{
grn_obj *res;
uint32_t curr_size = loader->values_size * sizeof(grn_obj);
if (curr_size < GRN_TEXT_LEN(&loader->values)) {
res = (grn_obj *)(GRN_TEXT_VALUE(&loader->values) + curr_size);
res->header.domain = GRN_DB_TEXT;
GRN_BULK_REWIND(res);
} else {
if (grn_bulk_space(ctx, &loader->values, sizeof(grn_obj))) { return NULL; }
res = (grn_obj *)(GRN_TEXT_VALUE(&loader->values) + curr_size);
GRN_TEXT_INIT(res, 0);
}
loader->values_size++;
loader->last = res;
return res;
}
static grn_obj *
values_next(grn_ctx *ctx, grn_obj *value)
{
if (value->header.domain == GRN_JSON_LOAD_OPEN_BRACKET ||
value->header.domain == GRN_JSON_LOAD_OPEN_BRACE) {
value += GRN_UINT32_VALUE(value);
}
return value + 1;
}
static int
values_len(grn_ctx *ctx, grn_obj *head, grn_obj *tail)
{
int len;
for (len = 0; head < tail; head = values_next(ctx, head), len++) ;
return len;
}
static grn_id
loader_add(grn_ctx *ctx, grn_obj *key)
{
int added = 0;
grn_loader *loader = &ctx->impl->loader;
grn_id id = grn_table_add_by_key(ctx, loader->table, key, &added);
if (id == GRN_ID_NIL) {
grn_loader_save_error(ctx, loader);
return id;
}
if (!added && loader->ifexists) {
grn_obj *v = grn_expr_get_var_by_offset(ctx, loader->ifexists, 0);
grn_obj *result;
GRN_RECORD_SET(ctx, v, id);
result = grn_expr_exec(ctx, loader->ifexists, 0);
if (!grn_obj_is_true(ctx, result)) {
id = 0;
}
}
return id;
}
static void
add_weight_vector(grn_ctx *ctx,
grn_obj *column,
grn_obj *value,
grn_obj *vector)
{
unsigned int i, n;
grn_obj weight_buffer;
n = GRN_UINT32_VALUE(value);
GRN_UINT32_INIT(&weight_buffer, 0);
for (i = 0; i < n; i += 2) {
grn_rc rc;
grn_obj *key, *weight;
key = value + 1 + i;
weight = key + 1;
GRN_BULK_REWIND(&weight_buffer);
rc = grn_obj_cast(ctx, weight, &weight_buffer, GRN_TRUE);
if (rc != GRN_SUCCESS) {
grn_obj *range;
range = grn_ctx_at(ctx, weight_buffer.header.domain);
ERR_CAST(column, range, weight);
grn_obj_unlink(ctx, range);
break;
}
grn_vector_add_element(ctx,
vector,
GRN_BULK_HEAD(key),
GRN_BULK_VSIZE(key),
GRN_UINT32_VALUE(&weight_buffer),
key->header.domain);
}
GRN_OBJ_FIN(ctx, &weight_buffer);
}
static void
set_vector(grn_ctx *ctx, grn_obj *column, grn_id id, grn_obj *vector)
{
int n = GRN_UINT32_VALUE(vector);
grn_obj buf, *v = vector + 1;
grn_id range_id;
grn_obj *range;
range_id = DB_OBJ(column)->range;
range = grn_ctx_at(ctx, range_id);
if (grn_obj_is_table(ctx, range)) {
GRN_RECORD_INIT(&buf, GRN_OBJ_VECTOR, range_id);
while (n--) {
grn_bool cast_failed = GRN_FALSE;
grn_obj record, *element = v;
if (range_id != element->header.domain) {
GRN_RECORD_INIT(&record, 0, range_id);
if (grn_obj_cast(ctx, element, &record, GRN_TRUE)) {
cast_failed = GRN_TRUE;
ERR_CAST(column, range, element);
}
element = &record;
}
if (!cast_failed) {
GRN_UINT32_PUT(ctx, &buf, GRN_RECORD_VALUE(element));
}
if (element == &record) { GRN_OBJ_FIN(ctx, element); }
v = values_next(ctx, v);
}
} else {
if (((struct _grn_type *)range)->obj.header.flags & GRN_OBJ_KEY_VAR_SIZE) {
GRN_TEXT_INIT(&buf, GRN_OBJ_VECTOR);
while (n--) {
switch (v->header.domain) {
case GRN_DB_TEXT :
{
grn_bool cast_failed = GRN_FALSE;
grn_obj casted_element, *element = v;
if (range_id != element->header.domain) {
GRN_OBJ_INIT(&casted_element, GRN_BULK, 0, range_id);
if (grn_obj_cast(ctx, element, &casted_element, GRN_TRUE)) {
cast_failed = GRN_TRUE;
ERR_CAST(column, range, element);
}
element = &casted_element;
}
if (!cast_failed) {
grn_vector_add_element(ctx, &buf,
GRN_TEXT_VALUE(element),
GRN_TEXT_LEN(element),
0,
element->header.domain);
}
if (element == &casted_element) { GRN_OBJ_FIN(ctx, element); }
break;
}
case GRN_JSON_LOAD_OPEN_BRACE :
add_weight_vector(ctx, column, v, &buf);
n -= GRN_UINT32_VALUE(v);
break;
default :
ERR(GRN_INVALID_ARGUMENT, "array must contain string or object");
break;
}
v = values_next(ctx, v);
}
} else {
grn_id value_size = ((grn_db_obj *)range)->range;
GRN_VALUE_FIX_SIZE_INIT(&buf, GRN_OBJ_VECTOR, range_id);
while (n--) {
grn_bool cast_failed = GRN_FALSE;
grn_obj casted_element, *element = v;
if (range_id != element->header.domain) {
GRN_OBJ_INIT(&casted_element, GRN_BULK, 0, range_id);
if (grn_obj_cast(ctx, element, &casted_element, GRN_TRUE)) {
cast_failed = GRN_TRUE;
ERR_CAST(column, range, element);
}
element = &casted_element;
}
if (!cast_failed) {
grn_bulk_write(ctx, &buf, GRN_TEXT_VALUE(element), value_size);
}
if (element == &casted_element) { GRN_OBJ_FIN(ctx, element); }
v = values_next(ctx, v);
}
}
}
grn_obj_set_value(ctx, column, id, &buf, GRN_OBJ_SET);
GRN_OBJ_FIN(ctx, &buf);
}
static void
set_weight_vector(grn_ctx *ctx, grn_obj *column, grn_id id, grn_obj *value)
{
if (!grn_obj_is_weight_vector_column(ctx, column)) {
char column_name[GRN_TABLE_MAX_KEY_SIZE];
int column_name_size;
column_name_size = grn_obj_name(ctx, column, column_name,
GRN_TABLE_MAX_KEY_SIZE);
ERR(GRN_INVALID_ARGUMENT,
"<%.*s>: columns except weight vector column don't support object value",
column_name_size, column_name);
return;
}
{
grn_obj vector;
GRN_TEXT_INIT(&vector, GRN_OBJ_VECTOR);
add_weight_vector(ctx, column, value, &vector);
grn_obj_set_value(ctx, column, id, &vector, GRN_OBJ_SET);
GRN_OBJ_FIN(ctx, &vector);
}
}
static inline int
name_equal(const char *p, unsigned int size, const char *name)
{
if (strlen(name) != size) { return 0; }
if (*p != GRN_DB_PSEUDO_COLUMN_PREFIX) { return 0; }
return !memcmp(p + 1, name + 1, size - 1);
}
static void
report_set_column_value_failure(grn_ctx *ctx,
grn_obj *key,
const char *column_name,
unsigned int column_name_size,
grn_obj *column_value)
{
grn_obj key_inspected, column_value_inspected;
GRN_TEXT_INIT(&key_inspected, 0);
GRN_TEXT_INIT(&column_value_inspected, 0);
grn_inspect_limited(ctx, &key_inspected, key);
grn_inspect_limited(ctx, &column_value_inspected, column_value);
GRN_LOG(ctx, GRN_LOG_ERROR,
"[table][load] failed to set column value: %s: "
"key: <%.*s>, column: <%.*s>, value: <%.*s>",
ctx->errbuf,
(int)GRN_TEXT_LEN(&key_inspected),
GRN_TEXT_VALUE(&key_inspected),
column_name_size,
column_name,
(int)GRN_TEXT_LEN(&column_value_inspected),
GRN_TEXT_VALUE(&column_value_inspected));
GRN_OBJ_FIN(ctx, &key_inspected);
GRN_OBJ_FIN(ctx, &column_value_inspected);
}
static grn_id
parse_id_value(grn_ctx *ctx, grn_obj *value)
{
switch (value->header.type) {
case GRN_DB_UINT32 :
return GRN_UINT32_VALUE(value);
case GRN_DB_INT32 :
return GRN_INT32_VALUE(value);
default :
{
grn_id id = GRN_ID_NIL;
grn_obj casted_value;
GRN_UINT32_INIT(&casted_value, 0);
if (grn_obj_cast(ctx, value, &casted_value, GRN_FALSE) != GRN_SUCCESS) {
grn_obj inspected;
GRN_TEXT_INIT(&inspected, 0);
grn_inspect(ctx, &inspected, value);
ERR(GRN_INVALID_ARGUMENT,
"<%s>: failed to cast to <UInt32>: <%.*s>",
GRN_COLUMN_NAME_ID,
(int)GRN_TEXT_LEN(&inspected),
GRN_TEXT_VALUE(&inspected));
GRN_OBJ_FIN(ctx, &inspected);
} else {
id = GRN_UINT32_VALUE(&casted_value);
}
GRN_OBJ_FIN(ctx, &casted_value);
return id;
}
}
}
static void
bracket_close(grn_ctx *ctx, grn_loader *loader)
{
grn_id id = GRN_ID_NIL;
grn_obj *value, *value_end, *id_value = NULL, *key_value = NULL;
grn_obj *col, **cols; /* Columns except _id and _key. */
uint32_t i, begin;
uint32_t ncols; /* Number of columns except _id and _key. */
uint32_t nvalues; /* Number of values in brackets. */
uint32_t depth;
grn_bool is_record_load = GRN_FALSE;
cols = (grn_obj **)GRN_BULK_HEAD(&loader->columns);
ncols = GRN_BULK_VSIZE(&loader->columns) / sizeof(grn_obj *);
GRN_UINT32_POP(&loader->level, begin);
value = (grn_obj *)GRN_TEXT_VALUE(&loader->values) + begin;
value_end = (grn_obj *)GRN_TEXT_VALUE(&loader->values) + loader->values_size;
GRN_ASSERT(value->header.domain == GRN_JSON_LOAD_OPEN_BRACKET);
GRN_UINT32_SET(ctx, value, loader->values_size - begin - 1);
value++;
depth = GRN_BULK_VSIZE(&loader->level);
if (depth > sizeof(uint32_t) * loader->emit_level) {
return;
}
if (depth == 0 || !loader->table ||
loader->columns_status == GRN_LOADER_COLUMNS_BROKEN) {
goto exit;
}
nvalues = values_len(ctx, value, value_end);
if (loader->columns_status == GRN_LOADER_COLUMNS_UNSET) {
/*
* Target columns and _id or _key are not specified yet and values are
* handled as column names and "_id" or "_key".
*/
for (i = 0; i < nvalues; i++) {
const char *col_name;
unsigned int col_name_size;
if (value->header.domain != GRN_DB_TEXT) {
grn_obj buffer;
GRN_TEXT_INIT(&buffer, 0);
grn_inspect(ctx, &buffer, value);
ERR(GRN_INVALID_ARGUMENT,
"column name must be string: <%.*s>",
(int)GRN_TEXT_LEN(&buffer), GRN_TEXT_VALUE(&buffer));
grn_loader_save_error(ctx, loader);
GRN_OBJ_FIN(ctx, &buffer);
loader->columns_status = GRN_LOADER_COLUMNS_BROKEN;
goto exit;
}
col_name = GRN_TEXT_VALUE(value);
col_name_size = GRN_TEXT_LEN(value);
col = grn_obj_column(ctx, loader->table, col_name, col_name_size);
if (!col) {
ERR(GRN_INVALID_ARGUMENT, "nonexistent column: <%.*s>",
col_name_size, col_name);
grn_loader_save_error(ctx, loader);
loader->columns_status = GRN_LOADER_COLUMNS_BROKEN;
goto exit;
}
if (name_equal(col_name, col_name_size, GRN_COLUMN_NAME_ID)) {
grn_obj_unlink(ctx, col);
if (loader->id_offset != -1 || loader->key_offset != -1) {
/* _id and _key must not appear more than once. */
if (loader->id_offset != -1) {
ERR(GRN_INVALID_ARGUMENT,
"duplicated id and key columns: <%s> at %d and <%s> at %d",
GRN_COLUMN_NAME_ID, i,
GRN_COLUMN_NAME_ID, loader->id_offset);
} else {
ERR(GRN_INVALID_ARGUMENT,
"duplicated id and key columns: <%s> at %d and <%s> at %d",
GRN_COLUMN_NAME_ID, i,
GRN_COLUMN_NAME_KEY, loader->key_offset);
}
grn_loader_save_error(ctx, loader);
loader->columns_status = GRN_LOADER_COLUMNS_BROKEN;
goto exit;
}
loader->id_offset = i;
} else if (name_equal(col_name, col_name_size, GRN_COLUMN_NAME_KEY)) {
grn_obj_unlink(ctx, col);
if (loader->id_offset != -1 || loader->key_offset != -1) {
/* _id and _key must not appear more than once. */
if (loader->id_offset != -1) {
ERR(GRN_INVALID_ARGUMENT,
"duplicated id and key columns: <%s> at %d and <%s> at %d",
GRN_COLUMN_NAME_KEY, i,
GRN_COLUMN_NAME_ID, loader->id_offset);
} else {
ERR(GRN_INVALID_ARGUMENT,
"duplicated id and key columns: <%s> at %d and <%s> at %d",
GRN_COLUMN_NAME_KEY, i,
GRN_COLUMN_NAME_KEY, loader->key_offset);
}
grn_loader_save_error(ctx, loader);
loader->columns_status = GRN_LOADER_COLUMNS_BROKEN;
goto exit;
}
loader->key_offset = i;
} else {
GRN_PTR_PUT(ctx, &loader->columns, col);
}
value++;
}
switch (loader->table->header.type) {
case GRN_TABLE_HASH_KEY :
case GRN_TABLE_PAT_KEY :
case GRN_TABLE_DAT_KEY :
if (loader->id_offset == -1 && loader->key_offset == -1) {
ERR(GRN_INVALID_ARGUMENT, "missing id or key column");
grn_loader_save_error(ctx, loader);
loader->columns_status = GRN_LOADER_COLUMNS_BROKEN;
goto exit;
}
break;
}
loader->columns_status = GRN_LOADER_COLUMNS_SET;
goto exit;
}
is_record_load = GRN_TRUE;
/* Target columns and _id or _key are already specified. */
if (!nvalues) {
/*
* Accept empty arrays because a dump command may output a load command
* which contains empty arrays for a table with deleted records.
*/
id = grn_table_add(ctx, loader->table, NULL, 0, NULL);
} else {
uint32_t expected_nvalues = ncols;
if (loader->id_offset != -1 || loader->key_offset != -1) {
expected_nvalues++;
}
if (nvalues != expected_nvalues) {
ERR(GRN_INVALID_ARGUMENT,
"unexpected #values: expected:%u, actual:%u",
expected_nvalues, nvalues);
grn_loader_save_error(ctx, loader);
goto exit;
}
if (loader->id_offset != -1) {
id_value = value + loader->id_offset;
id = parse_id_value(ctx, id_value);
if (grn_table_at(ctx, loader->table, id) == GRN_ID_NIL) {
id = grn_table_add(ctx, loader->table, NULL, 0, NULL);
}
} else if (loader->key_offset != -1) {
key_value = value + loader->key_offset;
id = loader_add(ctx, key_value);
} else {
id = grn_table_add(ctx, loader->table, NULL, 0, NULL);
}
}
if (id == GRN_ID_NIL) {
/* Target record is not available. */
goto exit;
}
for (i = 0; i < nvalues; i++, value = values_next(ctx, value)) {
if ((uint) i == (uint) loader->id_offset ||
(uint) i == (uint) loader->key_offset) {
/* Skip _id and _key, because it's already used to get id. */
continue;
}
col = *cols;
if (value->header.domain == GRN_JSON_LOAD_OPEN_BRACKET) {
set_vector(ctx, col, id, value);
} else if (value->header.domain == GRN_JSON_LOAD_OPEN_BRACE) {
set_weight_vector(ctx, col, id, value);
} else {
grn_obj_set_value(ctx, col, id, value, GRN_OBJ_SET);
}
if (ctx->rc != GRN_SUCCESS) {
char column_name[GRN_TABLE_MAX_KEY_SIZE];
unsigned int column_name_size;
grn_loader_save_error(ctx, loader);
column_name_size = grn_obj_name(ctx, col, column_name,
GRN_TABLE_MAX_KEY_SIZE);
report_set_column_value_failure(ctx, key_value,
column_name, column_name_size,
value);
ERRCLR(ctx);
}
cols++;
}
if (loader->each) {
grn_obj *v = grn_expr_get_var_by_offset(ctx, loader->each, 0);
GRN_RECORD_SET(ctx, v, id);
grn_expr_exec(ctx, loader->each, 0);
}
loader->nrecords++;
exit:
if (is_record_load) {
if (loader->output_ids) {
GRN_UINT32_PUT(ctx, &(loader->ids), id);
}
if (loader->output_errors) {
GRN_INT32_PUT(ctx, &(loader->return_codes), ctx->rc);
grn_vector_add_element(ctx,
&(loader->error_messages),
ctx->errbuf,
strlen(ctx->errbuf),
0,
GRN_DB_TEXT);
}
}
loader->values_size = begin;
ERRCLR(ctx);
}
static void
brace_close(grn_ctx *ctx, grn_loader *loader)
{
grn_id id = GRN_ID_NIL;
grn_obj *value, *value_begin, *value_end;
grn_obj *id_value = NULL, *key_value = NULL;
uint32_t begin;
GRN_UINT32_POP(&loader->level, begin);
value_begin = (grn_obj *)GRN_TEXT_VALUE(&loader->values) + begin;
value_end = (grn_obj *)GRN_TEXT_VALUE(&loader->values) + loader->values_size;
GRN_ASSERT(value->header.domain == GRN_JSON_LOAD_OPEN_BRACE);
GRN_UINT32_SET(ctx, value_begin, loader->values_size - begin - 1);
value_begin++;
if ((size_t) GRN_BULK_VSIZE(&loader->level) > sizeof(uint32_t) * loader->emit_level) {
return;
}
if (!loader->table) {
goto exit;
}
/* Scan values to find _id or _key. */
for (value = value_begin; value + 1 < value_end;
value = values_next(ctx, value)) {
const char *name = GRN_TEXT_VALUE(value);
unsigned int name_size = GRN_TEXT_LEN(value);
if (value->header.domain != GRN_DB_TEXT) {
grn_obj buffer;
GRN_TEXT_INIT(&buffer, 0);
grn_inspect(ctx, &buffer, value);
GRN_LOG(ctx, GRN_LOG_ERROR,
"column name must be string: <%.*s>",
(int)GRN_TEXT_LEN(&buffer), GRN_TEXT_VALUE(&buffer));
GRN_OBJ_FIN(ctx, &buffer);
goto exit;
}
value++;
if (name_equal(name, name_size, GRN_COLUMN_NAME_ID)) {
if (id_value || key_value) {
if (loader->table->header.type == GRN_TABLE_NO_KEY) {
GRN_LOG(ctx, GRN_LOG_ERROR, "duplicated '_id' column");
goto exit;
} else {
GRN_LOG(ctx, GRN_LOG_ERROR,
"duplicated key columns: %s and %s",
id_value ? GRN_COLUMN_NAME_ID : GRN_COLUMN_NAME_KEY,
GRN_COLUMN_NAME_ID);
goto exit;
}
}
id_value = value;
} else if (name_equal(name, name_size, GRN_COLUMN_NAME_KEY)) {
if (id_value || key_value) {
GRN_LOG(ctx, GRN_LOG_ERROR,
"duplicated key columns: %s and %s",
id_value ? GRN_COLUMN_NAME_ID : GRN_COLUMN_NAME_KEY,
GRN_COLUMN_NAME_KEY);
goto exit;
}
key_value = value;
}
}
switch (loader->table->header.type) {
case GRN_TABLE_HASH_KEY :
case GRN_TABLE_PAT_KEY :
case GRN_TABLE_DAT_KEY :
/* The target table requires _id or _key. */
if (!id_value && !key_value) {
GRN_LOG(ctx, GRN_LOG_ERROR, "neither _key nor _id is assigned");
goto exit;
}
break;
default :
/* The target table does not have _key. */
if (key_value) {
GRN_LOG(ctx, GRN_LOG_ERROR, "nonexistent key value");
goto exit;
}
break;
}
if (id_value) {
id = parse_id_value(ctx, id_value);
if (grn_table_at(ctx, loader->table, id) == GRN_ID_NIL) {
if (ctx->rc == GRN_SUCCESS) {
id = grn_table_add(ctx, loader->table, NULL, 0, NULL);
}
}
} else if (key_value) {
id = loader_add(ctx, key_value);
} else {
id = grn_table_add(ctx, loader->table, NULL, 0, NULL);
}
if (id == GRN_ID_NIL) {
/* Target record is not available. */
goto exit;
}
for (value = value_begin; value + 1 < value_end;
value = values_next(ctx, value)) {
grn_obj *col;
const char *name = GRN_TEXT_VALUE(value);
unsigned int name_size = GRN_TEXT_LEN(value);
value++;
if (value == id_value || value == key_value) {
/* Skip _id and _key, because it's already used to get id. */
continue;
}
col = grn_obj_column(ctx, loader->table, name, name_size);
if (!col) {
GRN_LOG(ctx, GRN_LOG_ERROR, "invalid column('%.*s')",
(int)name_size, name);
/* Automatic column creation is disabled. */
/*
if (value->header.domain == GRN_JSON_LOAD_OPEN_BRACKET) {
grn_obj *v = value + 1;
col = grn_column_create(ctx, loader->table, name, name_size,
NULL, GRN_OBJ_PERSISTENT|GRN_OBJ_COLUMN_VECTOR,
grn_ctx_at(ctx, v->header.domain));
} else {
col = grn_column_create(ctx, loader->table, name, name_size,
NULL, GRN_OBJ_PERSISTENT,
grn_ctx_at(ctx, value->header.domain));
}
*/
} else {
if (value->header.domain == GRN_JSON_LOAD_OPEN_BRACKET) {
set_vector(ctx, col, id, value);
} else if (value->header.domain == GRN_JSON_LOAD_OPEN_BRACE) {
set_weight_vector(ctx, col, id, value);
} else {
grn_obj_set_value(ctx, col, id, value, GRN_OBJ_SET);
}
if (ctx->rc != GRN_SUCCESS) {
grn_loader_save_error(ctx, loader);
report_set_column_value_failure(ctx, key_value,
name, name_size, value);
ERRCLR(ctx);
}
grn_obj_unlink(ctx, col);
}
}
if (loader->each) {
value = grn_expr_get_var_by_offset(ctx, loader->each, 0);
GRN_RECORD_SET(ctx, value, id);
grn_expr_exec(ctx, loader->each, 0);
}
loader->nrecords++;
exit:
if (loader->output_ids) {
GRN_UINT32_PUT(ctx, &(loader->ids), id);
}
if (loader->output_errors) {
GRN_INT32_PUT(ctx, &(loader->return_codes), ctx->rc);
grn_vector_add_element(ctx,
&(loader->error_messages),
ctx->errbuf,
strlen(ctx->errbuf),
0,
GRN_DB_TEXT);
}
loader->values_size = begin;
ERRCLR(ctx);
}
#define JSON_READ_OPEN_BRACKET() do {\
GRN_UINT32_PUT(ctx, &loader->level, loader->values_size);\
values_add(ctx, loader);\
loader->last->header.domain = GRN_JSON_LOAD_OPEN_BRACKET;\
loader->stat = GRN_LOADER_TOKEN;\
str++;\
} while (0)
#define JSON_READ_OPEN_BRACE() do {\
GRN_UINT32_PUT(ctx, &loader->level, loader->values_size);\
values_add(ctx, loader);\
loader->last->header.domain = GRN_JSON_LOAD_OPEN_BRACE;\
loader->stat = GRN_LOADER_TOKEN;\
str++;\
} while (0)
static void
json_read(grn_ctx *ctx, grn_loader *loader, const char *str, unsigned int str_len)
{
const char *const beg = str;
char c;
int len;
const char *se = str + str_len;
while (str < se) {
c = *str;
switch (loader->stat) {
case GRN_LOADER_BEGIN :
if ((len = grn_isspace(str, ctx->encoding))) {
str += len;
continue;
}
switch (c) {
case '[' :
JSON_READ_OPEN_BRACKET();
break;
case '{' :
JSON_READ_OPEN_BRACE();
break;
default :
ERR(GRN_INVALID_ARGUMENT,
"JSON must start with '[' or '{': <%.*s>", str_len, beg);
loader->stat = GRN_LOADER_END;
break;
}
break;
case GRN_LOADER_TOKEN :
if ((len = grn_isspace(str, ctx->encoding))) {
str += len;
continue;
}
switch (c) {
case '"' :
loader->stat = GRN_LOADER_STRING;
values_add(ctx, loader);
str++;
break;
case '[' :
JSON_READ_OPEN_BRACKET();
break;
case '{' :
JSON_READ_OPEN_BRACE();
break;
case ':' :
str++;
break;
case ',' :
str++;
break;
case ']' :
bracket_close(ctx, loader);
loader->stat = GRN_BULK_VSIZE(&loader->level) ? GRN_LOADER_TOKEN : GRN_LOADER_END;
if (ctx->rc == GRN_CANCEL) {
loader->stat = GRN_LOADER_END;
}
str++;
break;
case '}' :
brace_close(ctx, loader);
loader->stat = GRN_BULK_VSIZE(&loader->level) ? GRN_LOADER_TOKEN : GRN_LOADER_END;
if (ctx->rc == GRN_CANCEL) {
loader->stat = GRN_LOADER_END;
}
str++;
break;
case '+' : case '-' : case '0' : case '1' : case '2' : case '3' :
case '4' : case '5' : case '6' : case '7' : case '8' : case '9' :
loader->stat = GRN_LOADER_NUMBER;
values_add(ctx, loader);
break;
default :
if (('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z') || ('_' == c)) {
loader->stat = GRN_LOADER_SYMBOL;
values_add(ctx, loader);
} else {
if ((len = grn_charlen(ctx, str, se))) {
GRN_LOG(ctx, GRN_LOG_ERROR, "ignored invalid char('%c') at", c);
GRN_LOG(ctx, GRN_LOG_ERROR, "%.*s", (int)(str - beg) + len, beg);
GRN_LOG(ctx, GRN_LOG_ERROR, "%*s", (int)(str - beg) + 1, "^");
str += len;
} else {
GRN_LOG(ctx, GRN_LOG_ERROR, "ignored invalid char(\\x%.2x) after", c);
GRN_LOG(ctx, GRN_LOG_ERROR, "%.*s", (int)(str - beg), beg);
str = se;
}
}
break;
}
break;
case GRN_LOADER_SYMBOL :
if (('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z') ||
('0' <= c && c <= '9') || ('_' == c)) {
GRN_TEXT_PUTC(ctx, loader->last, c);
str++;
} else {
char *v = GRN_TEXT_VALUE(loader->last);
switch (*v) {
case 'n' :
if (GRN_TEXT_LEN(loader->last) == 4 && !memcmp(v, "null", 4)) {
loader->last->header.domain = GRN_DB_VOID;
GRN_BULK_REWIND(loader->last);
}
break;
case 't' :
if (GRN_TEXT_LEN(loader->last) == 4 && !memcmp(v, "true", 4)) {
loader->last->header.domain = GRN_DB_BOOL;
GRN_BOOL_SET(ctx, loader->last, GRN_TRUE);
}
break;
case 'f' :
if (GRN_TEXT_LEN(loader->last) == 5 && !memcmp(v, "false", 5)) {
loader->last->header.domain = GRN_DB_BOOL;
GRN_BOOL_SET(ctx, loader->last, GRN_FALSE);
}
break;
default :
break;
}
loader->stat = GRN_BULK_VSIZE(&loader->level) ? GRN_LOADER_TOKEN : GRN_LOADER_END;
}
break;
case GRN_LOADER_NUMBER :
switch (c) {
case '+' : case '-' : case '.' : case 'e' : case 'E' :
case '0' : case '1' : case '2' : case '3' : case '4' :
case '5' : case '6' : case '7' : case '8' : case '9' :
GRN_TEXT_PUTC(ctx, loader->last, c);
str++;
break;
default :
{
const char *cur, *str = GRN_BULK_HEAD(loader->last);
const char *str_end = GRN_BULK_CURR(loader->last);
int64_t i = grn_atoll(str, str_end, &cur);
if (cur == str_end) {
loader->last->header.domain = GRN_DB_INT64;
GRN_INT64_SET(ctx, loader->last, i);
} else if (cur != str) {
uint64_t i = grn_atoull(str, str_end, &cur);
if (cur == str_end) {
loader->last->header.domain = GRN_DB_UINT64;
GRN_UINT64_SET(ctx, loader->last, i);
} else if (cur != str) {
double d;
char *end;
grn_obj buf;
GRN_TEXT_INIT(&buf, 0);
GRN_TEXT_PUT(ctx, &buf, str, GRN_BULK_VSIZE(loader->last));
GRN_TEXT_PUTC(ctx, &buf, '\0');
errno = 0;
d = strtod(GRN_TEXT_VALUE(&buf), &end);
if (!errno && end + 1 == GRN_BULK_CURR(&buf)) {
loader->last->header.domain = GRN_DB_FLOAT;
GRN_FLOAT_SET(ctx, loader->last, d);
}
GRN_OBJ_FIN(ctx, &buf);
}
}
}
loader->stat = GRN_BULK_VSIZE(&loader->level) ? GRN_LOADER_TOKEN : GRN_LOADER_END;
break;
}
break;
case GRN_LOADER_STRING :
switch (c) {
case '\\' :
loader->stat = GRN_LOADER_STRING_ESC;
str++;
break;
case '"' :
str++;
loader->stat = GRN_BULK_VSIZE(&loader->level) ? GRN_LOADER_TOKEN : GRN_LOADER_END;
/*
*(GRN_BULK_CURR(loader->last)) = '\0';
GRN_LOG(ctx, GRN_LOG_ALERT, "read str(%s)", GRN_TEXT_VALUE(loader->last));
*/
break;
default :
if ((len = grn_charlen(ctx, str, se))) {
GRN_TEXT_PUT(ctx, loader->last, str, len);
str += len;
} else {
GRN_LOG(ctx, GRN_LOG_ERROR, "ignored invalid char(\\x%.2x) after", c);
GRN_LOG(ctx, GRN_LOG_ERROR, "%.*s", (int)(str - beg), beg);
str = se;
}
break;
}
break;
case GRN_LOADER_STRING_ESC :
switch (c) {
case 'b' :
GRN_TEXT_PUTC(ctx, loader->last, '\b');
loader->stat = GRN_LOADER_STRING;
break;
case 'f' :
GRN_TEXT_PUTC(ctx, loader->last, '\f');
loader->stat = GRN_LOADER_STRING;
break;
case 'n' :
GRN_TEXT_PUTC(ctx, loader->last, '\n');
loader->stat = GRN_LOADER_STRING;
break;
case 'r' :
GRN_TEXT_PUTC(ctx, loader->last, '\r');
loader->stat = GRN_LOADER_STRING;
break;
case 't' :
GRN_TEXT_PUTC(ctx, loader->last, '\t');
loader->stat = GRN_LOADER_STRING;
break;
case 'u' :
loader->stat = GRN_LOADER_UNICODE0;
break;
default :
GRN_TEXT_PUTC(ctx, loader->last, c);
loader->stat = GRN_LOADER_STRING;
break;
}
str++;
break;
case GRN_LOADER_UNICODE0 :
switch (c) {
case '0' : case '1' : case '2' : case '3' : case '4' :
case '5' : case '6' : case '7' : case '8' : case '9' :
loader->unichar = (c - '0') * 0x1000;
break;
case 'a' : case 'b' : case 'c' : case 'd' : case 'e' : case 'f' :
loader->unichar = (c - 'a' + 10) * 0x1000;
break;
case 'A' : case 'B' : case 'C' : case 'D' : case 'E' : case 'F' :
loader->unichar = (c - 'A' + 10) * 0x1000;
break;
default :
;// todo : error
}
loader->stat = GRN_LOADER_UNICODE1;
str++;
break;
case GRN_LOADER_UNICODE1 :
switch (c) {
case '0' : case '1' : case '2' : case '3' : case '4' :
case '5' : case '6' : case '7' : case '8' : case '9' :
loader->unichar += (c - '0') * 0x100;
break;
case 'a' : case 'b' : case 'c' : case 'd' : case 'e' : case 'f' :
loader->unichar += (c - 'a' + 10) * 0x100;
break;
case 'A' : case 'B' : case 'C' : case 'D' : case 'E' : case 'F' :
loader->unichar += (c - 'A' + 10) * 0x100;
break;
default :
;// todo : error
}
loader->stat = GRN_LOADER_UNICODE2;
str++;
break;
case GRN_LOADER_UNICODE2 :
switch (c) {
case '0' : case '1' : case '2' : case '3' : case '4' :
case '5' : case '6' : case '7' : case '8' : case '9' :
loader->unichar += (c - '0') * 0x10;
break;
case 'a' : case 'b' : case 'c' : case 'd' : case 'e' : case 'f' :
loader->unichar += (c - 'a' + 10) * 0x10;
break;
case 'A' : case 'B' : case 'C' : case 'D' : case 'E' : case 'F' :
loader->unichar += (c - 'A' + 10) * 0x10;
break;
default :
;// todo : error
}
loader->stat = GRN_LOADER_UNICODE3;
str++;
break;
case GRN_LOADER_UNICODE3 :
switch (c) {
case '0' : case '1' : case '2' : case '3' : case '4' :
case '5' : case '6' : case '7' : case '8' : case '9' :
loader->unichar += (c - '0');
break;
case 'a' : case 'b' : case 'c' : case 'd' : case 'e' : case 'f' :
loader->unichar += (c - 'a' + 10);
break;
case 'A' : case 'B' : case 'C' : case 'D' : case 'E' : case 'F' :
loader->unichar += (c - 'A' + 10);
break;
default :
;// todo : error
}
{
uint32_t u = loader->unichar;
if (u < 0x80) {
GRN_TEXT_PUTC(ctx, loader->last, u);
} else {
if (u < 0x800) {
GRN_TEXT_PUTC(ctx, loader->last, ((u >> 6) & 0x1f) | 0xc0);
} else {
GRN_TEXT_PUTC(ctx, loader->last, (u >> 12) | 0xe0);
GRN_TEXT_PUTC(ctx, loader->last, ((u >> 6) & 0x3f) | 0x80);
}
GRN_TEXT_PUTC(ctx, loader->last, (u & 0x3f) | 0x80);
}
}
loader->stat = GRN_LOADER_STRING;
str++;
break;
case GRN_LOADER_END :
str = se;
break;
}
}
}
#undef JSON_READ_OPEN_BRACKET
#undef JSON_READ_OPEN_BRACE
/*
* grn_loader_parse_columns parses a columns parameter.
* Columns except _id and _key are appended to loader->columns.
* If it contains _id or _key, loader->id_offset or loader->key_offset is set.
*/
static grn_rc
grn_loader_parse_columns(grn_ctx *ctx, grn_loader *loader,
const char *str, unsigned int str_size)
{
const char *ptr = str, *ptr_end = ptr + str_size, *rest;
const char *tokens[256], *token_end;
while (ptr < ptr_end) {
int i, n = grn_tokenize(ptr, ptr_end - ptr, tokens, 256, &rest);
for (i = 0; i < n; i++) {
grn_obj *column;
token_end = tokens[i];
while (ptr < token_end && (' ' == *ptr || ',' == *ptr)) {
ptr++;
}
column = grn_obj_column(ctx, loader->table, ptr, token_end - ptr);
if (!column) {
ERR(GRN_INVALID_ARGUMENT, "nonexistent column: <%.*s>",
(int)(token_end - ptr), ptr);
return ctx->rc;
}
if (name_equal(ptr, token_end - ptr, GRN_COLUMN_NAME_ID)) {
grn_obj_unlink(ctx, column);
if (loader->id_offset != -1 || loader->key_offset != -1) {
/* _id and _key must not appear more than once. */
if (loader->id_offset != -1) {
ERR(GRN_INVALID_ARGUMENT,
"duplicated id and key columns: <%s> at %d and <%s> at %d",
GRN_COLUMN_NAME_ID, i,
GRN_COLUMN_NAME_ID, loader->id_offset);
} else {
ERR(GRN_INVALID_ARGUMENT,
"duplicated id and key columns: <%s> at %d and <%s> at %d",
GRN_COLUMN_NAME_ID, i,
GRN_COLUMN_NAME_KEY, loader->key_offset);
}
return ctx->rc;
}
loader->id_offset = i;
} else if (name_equal(ptr, token_end - ptr, GRN_COLUMN_NAME_KEY)) {
grn_obj_unlink(ctx, column);
if (loader->id_offset != -1 || loader->key_offset != -1) {
/* _id and _key must not appear more than once. */
if (loader->id_offset != -1) {
ERR(GRN_INVALID_ARGUMENT,
"duplicated id and key columns: <%s> at %d and <%s> at %d",
GRN_COLUMN_NAME_KEY, i,
GRN_COLUMN_NAME_ID, loader->id_offset);
} else {
ERR(GRN_INVALID_ARGUMENT,
"duplicated id and key columns: <%s> at %d and <%s> at %d",
GRN_COLUMN_NAME_KEY, i,
GRN_COLUMN_NAME_KEY, loader->key_offset);
}
return ctx->rc;
}
loader->key_offset = i;
} else {
GRN_PTR_PUT(ctx, &loader->columns, column);
}
ptr = token_end;
}
ptr = rest;
}
switch (loader->table->header.type) {
case GRN_TABLE_HASH_KEY :
case GRN_TABLE_PAT_KEY :
case GRN_TABLE_DAT_KEY :
if (loader->id_offset == -1 && loader->key_offset == -1) {
ERR(GRN_INVALID_ARGUMENT, "missing id or key column");
return ctx->rc;
}
break;
}
return ctx->rc;
}
static grn_com_addr *addr;
void
grn_load_internal(grn_ctx *ctx, grn_load_input *input)
{
grn_loader *loader = &ctx->impl->loader;
loader->emit_level = input->emit_level;
if (ctx->impl->edge) {
grn_edge *edge = grn_edges_add_communicator(ctx, addr);
grn_obj *msg = grn_msg_open(ctx, edge->com, &ctx->impl->edge->send_old);
/* build msg */
grn_edge_dispatch(ctx, edge, msg);
}
if (input->table.length > 0) {
grn_ctx_loader_clear(ctx);
loader->input_type = input->type;
if (grn_db_check_name(ctx, input->table.value, input->table.length)) {
GRN_DB_CHECK_NAME_ERR("[table][load]",
input->table.value,
(int)(input->table.length));
loader->stat = GRN_LOADER_END;
return;
}
loader->table = grn_ctx_get(ctx, input->table.value, input->table.length);
if (!loader->table) {
ERR(GRN_INVALID_ARGUMENT,
"nonexistent table: <%.*s>",
(int)(input->table.length),
input->table.value);
loader->stat = GRN_LOADER_END;
return;
}
if (input->columns.length > 0) {
grn_rc rc = grn_loader_parse_columns(ctx,
loader,
input->columns.value,
input->columns.length);
if (rc != GRN_SUCCESS) {
loader->columns_status = GRN_LOADER_COLUMNS_BROKEN;
loader->stat = GRN_LOADER_END;
return;
}
loader->columns_status = GRN_LOADER_COLUMNS_SET;
}
if (input->if_exists.length > 0) {
grn_obj *v;
GRN_EXPR_CREATE_FOR_QUERY(ctx, loader->table, loader->ifexists, v);
if (loader->ifexists && v) {
grn_expr_parse(ctx,
loader->ifexists,
input->if_exists.value,
input->if_exists.length,
NULL, GRN_OP_EQUAL, GRN_OP_AND,
GRN_EXPR_SYNTAX_SCRIPT|GRN_EXPR_ALLOW_UPDATE);
}
}
if (input->each.length > 0) {
grn_obj *v;
GRN_EXPR_CREATE_FOR_QUERY(ctx, loader->table, loader->each, v);
if (loader->each && v) {
grn_expr_parse(ctx, loader->each,
input->each.value,
input->each.length,
NULL, GRN_OP_EQUAL, GRN_OP_AND,
GRN_EXPR_SYNTAX_SCRIPT|GRN_EXPR_ALLOW_UPDATE);
}
}
loader->output_ids = input->output_ids;
loader->output_errors = input->output_errors;
} else {
if (!loader->table) {
ERR(GRN_INVALID_ARGUMENT, "mandatory \"table\" parameter is absent");
loader->stat = GRN_LOADER_END;
return;
}
}
switch (loader->input_type) {
case GRN_CONTENT_JSON :
json_read(ctx, loader, input->values.value, input->values.length);
break;
case GRN_CONTENT_NONE :
case GRN_CONTENT_TSV :
case GRN_CONTENT_XML :
case GRN_CONTENT_MSGPACK :
case GRN_CONTENT_GROONGA_COMMAND_LIST :
ERR(GRN_FUNCTION_NOT_IMPLEMENTED, "unsupported input_type");
loader->stat = GRN_LOADER_END;
// todo
break;
}
}
grn_rc
grn_load(grn_ctx *ctx, grn_content_type input_type,
const char *table, unsigned int table_len,
const char *columns, unsigned int columns_len,
const char *values, unsigned int values_len,
const char *ifexists, unsigned int ifexists_len,
const char *each, unsigned int each_len)
{
if (!ctx || !ctx->impl) {
ERR(GRN_INVALID_ARGUMENT, "db not initialized");
return ctx->rc;
}
GRN_API_ENTER;
{
grn_load_input input;
input.type = input_type;
input.table.value = table;
input.table.length = table_len;
input.columns.value = columns;
input.columns.length = columns_len;
input.values.value = values;
input.values.length = values_len;
input.if_exists.value = ifexists;
input.if_exists.length = ifexists_len;
input.each.value = each;
input.each.length = each_len;
input.output_ids = GRN_FALSE;
input.output_errors = GRN_FALSE;
input.emit_level = 1;
grn_load_internal(ctx, &input);
}
GRN_API_RETURN(ctx->rc);
}
PRAGMA_REENABLE_CHECK_STACK_FRAME