mirror of
https://github.com/MariaDB/server.git
synced 2025-02-05 21:32:18 +01:00
275 lines
7 KiB
C
275 lines
7 KiB
C
/* -*- c-basic-offset: 2; coding: utf-8 -*- */
|
|
/*
|
|
Copyright (C) 2015-2016 Kouhei Sutou <kou@clear-code.com>
|
|
|
|
This library is free software; you can redistribute it and/or
|
|
modify it under the terms of the GNU Lesser General Public
|
|
License version 2.1 as published by the Free Software Foundation.
|
|
|
|
This library is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
Lesser General Public License for more details.
|
|
|
|
You should have received a copy of the GNU Lesser General Public
|
|
License along with this library; if not, write to the Free Software
|
|
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
|
|
*/
|
|
|
|
/*
|
|
Groonga: ed300a833d44eaefa978b5ecf46a96ef91ae0891
|
|
|
|
CFLAGS: -O2 -g
|
|
% make --quiet -C benchmark run-bench-nfkc
|
|
run-bench-nfkc:
|
|
(total) (average) (median)
|
|
map1 - switch : (0.0060ms) (0.00060000ms) (0.00000000ms)
|
|
map1 - table : (0.00000000ms) (0.00000000ms) (0.00000000ms)
|
|
map2 - switch - no change: (0.0010ms) (0.00010000ms) (0.00000000ms)
|
|
map2 - table - no change: (0.00000000ms) (0.00000000ms) (0.00000000ms)
|
|
map2 - switch - change: (0.0010ms) (0.00010000ms) (0.00000000ms)
|
|
map2 - table - change: (0.0010ms) (0.00010000ms) (0.00000000ms)
|
|
*/
|
|
|
|
#include <stdlib.h>
|
|
#include <stdio.h>
|
|
|
|
#include <glib.h>
|
|
|
|
#include <groonga.h>
|
|
|
|
#include "lib/benchmark.h"
|
|
|
|
#include "../lib/nfkc50.c"
|
|
|
|
#define MAX_UNICODE 0x110000
|
|
#define BUFFER_SIZE 0x100
|
|
|
|
static inline int
|
|
ucs2utf8(unsigned int i, unsigned char *buf)
|
|
{
|
|
unsigned char *p = buf;
|
|
if (i < 0x80) {
|
|
*p++ = i;
|
|
} else {
|
|
if (i < 0x800) {
|
|
*p++ = (i >> 6) | 0xc0;
|
|
} else {
|
|
if (i < 0x00010000) {
|
|
*p++ = (i >> 12) | 0xe0;
|
|
} else {
|
|
if (i < 0x00200000) {
|
|
*p++ = (i >> 18) | 0xf0;
|
|
} else {
|
|
if (i < 0x04000000) {
|
|
*p++ = (i >> 24) | 0xf8;
|
|
} else if (i < 0x80000000) {
|
|
*p++ = (i >> 30) | 0xfc;
|
|
*p++ = ((i >> 24) & 0x3f) | 0x80;
|
|
}
|
|
*p++ = ((i >> 18) & 0x3f) | 0x80;
|
|
}
|
|
*p++ = ((i >> 12) & 0x3f) | 0x80;
|
|
}
|
|
*p++ = ((i >> 6) & 0x3f) | 0x80;
|
|
}
|
|
*p++ = (0x3f & i) | 0x80;
|
|
}
|
|
*p = '\0';
|
|
return (p - buf);
|
|
}
|
|
|
|
static void
|
|
bench_char_type(gpointer user_data)
|
|
{
|
|
uint64_t code_point;
|
|
char utf8[7];
|
|
|
|
for (code_point = 1; code_point < MAX_UNICODE; code_point++) {
|
|
ucs2utf8(code_point, (unsigned char *)utf8);
|
|
grn_nfkc50_char_type(utf8);
|
|
}
|
|
}
|
|
|
|
static void
|
|
bench_decompose(gpointer user_data)
|
|
{
|
|
uint64_t code_point;
|
|
char utf8[7];
|
|
|
|
for (code_point = 1; code_point < MAX_UNICODE; code_point++) {
|
|
ucs2utf8(code_point, (unsigned char *)utf8);
|
|
grn_nfkc50_decompose(utf8);
|
|
}
|
|
}
|
|
|
|
static void
|
|
bench_compose_no_change(gpointer user_data)
|
|
{
|
|
uint64_t prefix_code_point;
|
|
uint64_t suffix_code_point = 0x61; /* a */
|
|
char prefix_utf8[7];
|
|
char suffix_utf8[7];
|
|
|
|
ucs2utf8(suffix_code_point, (unsigned char *)suffix_utf8);
|
|
for (prefix_code_point = 1;
|
|
prefix_code_point < MAX_UNICODE;
|
|
prefix_code_point++) {
|
|
ucs2utf8(prefix_code_point, (unsigned char *)prefix_utf8);
|
|
grn_nfkc50_compose(prefix_utf8, suffix_utf8);
|
|
}
|
|
}
|
|
|
|
static void
|
|
bench_compose_change(gpointer user_data)
|
|
{
|
|
uint64_t prefix_code_point;
|
|
uint64_t suffix_code_point = 0x11ba;
|
|
char prefix_utf8[7];
|
|
char suffix_utf8[7];
|
|
|
|
ucs2utf8(suffix_code_point, (unsigned char *)suffix_utf8);
|
|
for (prefix_code_point = 1;
|
|
prefix_code_point < MAX_UNICODE;
|
|
prefix_code_point++) {
|
|
ucs2utf8(prefix_code_point, (unsigned char *)prefix_utf8);
|
|
grn_nfkc50_compose(prefix_utf8, suffix_utf8);
|
|
}
|
|
}
|
|
|
|
/*
|
|
static void
|
|
check_char_type(gpointer user_data)
|
|
{
|
|
uint64_t code_point;
|
|
char utf8[7];
|
|
|
|
for (code_point = 1; code_point < MAX_UNICODE; code_point++) {
|
|
grn_char_type a;
|
|
grn_char_type b;
|
|
|
|
ucs2utf8(code_point, (unsigned char *)utf8);
|
|
a = grn_nfkc_char_type(utf8);
|
|
b = grn_nfkc50_char_type(utf8);
|
|
if (a == b) {
|
|
continue;
|
|
}
|
|
printf("%lx: %s: %d != %d\n", code_point, utf8, a, b);
|
|
}
|
|
}
|
|
|
|
static void
|
|
check_decompose(gpointer user_data)
|
|
{
|
|
uint64_t code_point;
|
|
char utf8[7];
|
|
|
|
for (code_point = 1; code_point < MAX_UNICODE; code_point++) {
|
|
const char *a;
|
|
const char *b;
|
|
|
|
ucs2utf8(code_point, (unsigned char *)utf8);
|
|
a = grn_nfkc_decompose(utf8);
|
|
b = grn_nfkc50_decompose(utf8);
|
|
if (a == b) {
|
|
continue;
|
|
}
|
|
if (!a || !b) {
|
|
printf("%lx: %s: %s != %s\n", code_point, utf8, a, b);
|
|
continue;
|
|
}
|
|
if (strcmp(a, b) != 0) {
|
|
printf("%lx: %s: %s != %s\n", code_point, utf8, a, b);
|
|
}
|
|
}
|
|
}
|
|
|
|
static void
|
|
check_compose(gpointer user_data)
|
|
{
|
|
uint64_t prefix_code_point;
|
|
uint64_t suffix_code_point;
|
|
char prefix_utf8[7];
|
|
char suffix_utf8[7];
|
|
|
|
for (prefix_code_point = 1;
|
|
prefix_code_point < MAX_UNICODE;
|
|
prefix_code_point++) {
|
|
ucs2utf8(prefix_code_point, (unsigned char *)prefix_utf8);
|
|
for (suffix_code_point = 1;
|
|
suffix_code_point < MAX_UNICODE;
|
|
suffix_code_point++) {
|
|
const char *a;
|
|
const char *b;
|
|
|
|
ucs2utf8(suffix_code_point, (unsigned char *)suffix_utf8);
|
|
a = grn_nfkc_compose(prefix_utf8, suffix_utf8);
|
|
b = grn_nfkc50_compose(prefix_utf8, suffix_utf8);
|
|
if (a == b) {
|
|
continue;
|
|
}
|
|
if (!a || !b) {
|
|
printf("%lx-%lx: %s-%s: %s != %s\n",
|
|
prefix_code_point, suffix_code_point,
|
|
prefix_utf8, suffix_utf8,
|
|
a, b);
|
|
continue;
|
|
}
|
|
if (strcmp(a, b) != 0) {
|
|
printf("%lx-%lx: %s-%s: %s != %s\n",
|
|
prefix_code_point, suffix_code_point,
|
|
prefix_utf8, suffix_utf8,
|
|
a, b);
|
|
}
|
|
}
|
|
if ((prefix_code_point % 10000) == 0) {
|
|
printf("%" G_GUINT64_FORMAT "\n", prefix_code_point);
|
|
}
|
|
}
|
|
}
|
|
*/
|
|
|
|
int
|
|
main(int argc, gchar **argv)
|
|
{
|
|
grn_rc rc;
|
|
BenchReporter *reporter;
|
|
gint n = 10;
|
|
|
|
rc = grn_init();
|
|
if (rc != GRN_SUCCESS) {
|
|
g_print("failed to initialize Groonga: <%d>: %s\n",
|
|
rc, grn_get_global_error_message());
|
|
return EXIT_FAILURE;
|
|
}
|
|
bench_init(&argc, &argv);
|
|
|
|
reporter = bench_reporter_new();
|
|
|
|
if (g_getenv("N")) {
|
|
n = atoi(g_getenv("N"));
|
|
}
|
|
|
|
#define REGISTER(label, bench_function) \
|
|
bench_reporter_register(reporter, label, n, \
|
|
NULL, \
|
|
bench_function, \
|
|
NULL, \
|
|
NULL)
|
|
REGISTER("char_type ", bench_char_type);
|
|
REGISTER("decompose ", bench_decompose);
|
|
REGISTER("compose - no change", bench_compose_no_change);
|
|
REGISTER("compose - change", bench_compose_change);
|
|
|
|
/*
|
|
REGISTER("check - char_type", check_char_type);
|
|
REGISTER("check - decompose", check_decompose);
|
|
REGISTER("check - compose ", check_compose);
|
|
*/
|
|
#undef REGISTER
|
|
|
|
bench_reporter_run(reporter);
|
|
g_object_unref(reporter);
|
|
|
|
return EXIT_SUCCESS;
|
|
}
|