mariadb/storage/mroonga/lib/mrn_encoding.cpp
Monty a206658b98 Change CHARSET_INFO character set and collaction names to LEX_CSTRING
This change removed 68 explict strlen() calls from the code.

The following renames was done to ensure we don't use the old names
when merging code from earlier releases, as using the new variables
for print function could result in crashes:
- charset->csname renamed to charset->cs_name
- charset->name renamed to charset->coll_name

Almost everything where mechanical changes except:
- Changed to use the new Protocol::store(LEX_CSTRING..) when possible
- Changed to use field->store(LEX_CSTRING*, CHARSET_INFO*) when possible
- Changed to use String->append(LEX_CSTRING&) when possible

Other things:
- There where compiler issues with ensuring that all character set names
  points to the same string: gcc doesn't allow one to use integer constants
  when defining global structures (constant char * pointers works fine).
  To get around this, I declared defines for each character set name
  length.
2021-05-19 22:54:07 +02:00

242 lines
8.2 KiB
C++

/* -*- c-basic-offset: 2 -*- */
/*
Copyright(C) 2013 Kouhei Sutou <kou@clear-code.com>
Copyright(C) 2011-2013 Kentoku SHIBA
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
*/
#include <mrn_err.h>
#include "mrn_encoding.hpp"
namespace mrn {
namespace encoding {
CHARSET_INFO *mrn_charset_utf8 = NULL;
CHARSET_INFO *mrn_charset_utf8mb4 = NULL;
CHARSET_INFO *mrn_charset_binary = NULL;
CHARSET_INFO *mrn_charset_ascii = NULL;
CHARSET_INFO *mrn_charset_latin1_1 = NULL;
CHARSET_INFO *mrn_charset_latin1_2 = NULL;
CHARSET_INFO *mrn_charset_cp932 = NULL;
CHARSET_INFO *mrn_charset_sjis = NULL;
CHARSET_INFO *mrn_charset_eucjpms = NULL;
CHARSET_INFO *mrn_charset_ujis = NULL;
CHARSET_INFO *mrn_charset_koi8r = NULL;
void init(void) {
CHARSET_INFO **cs;
MRN_DBUG_ENTER_FUNCTION();
for (cs = all_charsets; cs < all_charsets + MY_ALL_CHARSETS_SIZE; cs++)
{
if (!cs[0])
continue;
if (!strcmp(cs[0]->cs_name.str, "utf8mb3"))
{
DBUG_PRINT("info", ("mroonga: %s is %s [%p]",
cs[0]->coll_name.str, cs[0]->cs_name.str, cs[0]->cset));
if (!mrn_charset_utf8)
mrn_charset_utf8 = cs[0];
else if (mrn_charset_utf8->cset != cs[0]->cset)
DBUG_ASSERT(0);
continue;
}
if (!strcmp(cs[0]->cs_name.str, "utf8mb4"))
{
DBUG_PRINT("info", ("mroonga: %s is %s [%p]",
cs[0]->coll_name.str, cs[0]->cs_name.str, cs[0]->cset));
if (!mrn_charset_utf8mb4)
mrn_charset_utf8mb4 = cs[0];
else if (mrn_charset_utf8mb4->cset != cs[0]->cset)
DBUG_ASSERT(0);
continue;
}
if (!strcmp(cs[0]->cs_name.str, "binary"))
{
DBUG_PRINT("info", ("mroonga: %s is %s [%p]",
cs[0]->coll_name.str, cs[0]->cs_name.str, cs[0]->cset));
if (!mrn_charset_binary)
mrn_charset_binary = cs[0];
else if (mrn_charset_binary->cset != cs[0]->cset)
DBUG_ASSERT(0);
continue;
}
if (!strcmp(cs[0]->cs_name.str, "ascii"))
{
DBUG_PRINT("info", ("mroonga: %s is %s [%p]",
cs[0]->coll_name.str, cs[0]->cs_name.str, cs[0]->cset));
if (!mrn_charset_ascii)
mrn_charset_ascii = cs[0];
else if (mrn_charset_ascii->cset != cs[0]->cset)
DBUG_ASSERT(0);
continue;
}
if (!strcmp(cs[0]->cs_name.str, "latin1"))
{
DBUG_PRINT("info", ("mroonga: %s is %s [%p]",
cs[0]->coll_name.str, cs[0]->cs_name.str, cs[0]->cset));
if (!mrn_charset_latin1_1)
mrn_charset_latin1_1 = cs[0];
else if (mrn_charset_latin1_1->cset != cs[0]->cset)
{
if (!mrn_charset_latin1_2)
mrn_charset_latin1_2 = cs[0];
else if (mrn_charset_latin1_2->cset != cs[0]->cset)
DBUG_ASSERT(0);
}
continue;
}
if (!strcmp(cs[0]->cs_name.str, "cp932"))
{
DBUG_PRINT("info", ("mroonga: %s is %s [%p]",
cs[0]->coll_name.str, cs[0]->cs_name.str, cs[0]->cset));
if (!mrn_charset_cp932)
mrn_charset_cp932 = cs[0];
else if (mrn_charset_cp932->cset != cs[0]->cset)
DBUG_ASSERT(0);
continue;
}
if (!strcmp(cs[0]->cs_name.str, "sjis"))
{
DBUG_PRINT("info", ("mroonga: %s is %s [%p]",
cs[0]->coll_name.str, cs[0]->cs_name.str, cs[0]->cset));
if (!mrn_charset_sjis)
mrn_charset_sjis = cs[0];
else if (mrn_charset_sjis->cset != cs[0]->cset)
DBUG_ASSERT(0);
continue;
}
if (!strcmp(cs[0]->cs_name.str, "eucjpms"))
{
DBUG_PRINT("info", ("mroonga: %s is %s [%p]",
cs[0]->coll_name.str, cs[0]->cs_name.str, cs[0]->cset));
if (!mrn_charset_eucjpms)
mrn_charset_eucjpms = cs[0];
else if (mrn_charset_eucjpms->cset != cs[0]->cset)
DBUG_ASSERT(0);
continue;
}
if (!strcmp(cs[0]->cs_name.str, "ujis"))
{
DBUG_PRINT("info", ("mroonga: %s is %s [%p]",
cs[0]->coll_name.str, cs[0]->cs_name.str, cs[0]->cset));
if (!mrn_charset_ujis)
mrn_charset_ujis = cs[0];
else if (mrn_charset_ujis->cset != cs[0]->cset)
DBUG_ASSERT(0);
continue;
}
if (!strcmp(cs[0]->cs_name.str, "koi8r"))
{
DBUG_PRINT("info", ("mroonga: %s is %s [%p]",
cs[0]->coll_name.str, cs[0]->cs_name.str, cs[0]->cset));
if (!mrn_charset_koi8r)
mrn_charset_koi8r = cs[0];
else if (mrn_charset_koi8r->cset != cs[0]->cset)
DBUG_ASSERT(0);
continue;
}
DBUG_PRINT("info", ("mroonga: %s[%s][%p] is not supported",
cs[0]->coll_name.str, cs[0]->cs_name.str, cs[0]->cset));
}
DBUG_VOID_RETURN;
}
int set(grn_ctx *ctx, const CHARSET_INFO *charset) {
MRN_DBUG_ENTER_FUNCTION();
int error = 0;
if (!set_raw(ctx, charset)) {
const char *name = "<null>";
const char *csname = "<null>";
if (charset) {
name = charset->coll_name.str;
csname = charset->cs_name.str;
}
error = ER_MRN_CHARSET_NOT_SUPPORT_NUM;
my_printf_error(error,
ER_MRN_CHARSET_NOT_SUPPORT_STR,
MYF(0), name, csname);
}
DBUG_RETURN(error);
}
bool set_raw(grn_ctx *ctx, const CHARSET_INFO *charset) {
MRN_DBUG_ENTER_FUNCTION();
if (!charset)
{
GRN_CTX_SET_ENCODING(ctx, GRN_ENC_NONE);
DBUG_RETURN(true);
}
if (charset->cset == mrn_charset_utf8->cset)
{
GRN_CTX_SET_ENCODING(ctx, GRN_ENC_UTF8);
DBUG_RETURN(true);
}
if (mrn_charset_utf8mb4 && charset->cset == mrn_charset_utf8mb4->cset)
{
GRN_CTX_SET_ENCODING(ctx, GRN_ENC_UTF8);
DBUG_RETURN(true);
}
if (charset->cset == mrn_charset_cp932->cset)
{
GRN_CTX_SET_ENCODING(ctx, GRN_ENC_SJIS);
DBUG_RETURN(true);
}
if (charset->cset == mrn_charset_eucjpms->cset)
{
GRN_CTX_SET_ENCODING(ctx, GRN_ENC_EUC_JP);
DBUG_RETURN(true);
}
if (charset->cset == mrn_charset_latin1_1->cset)
{
GRN_CTX_SET_ENCODING(ctx, GRN_ENC_LATIN1);
DBUG_RETURN(true);
}
if (charset->cset == mrn_charset_latin1_2->cset)
{
GRN_CTX_SET_ENCODING(ctx, GRN_ENC_LATIN1);
DBUG_RETURN(true);
}
if (charset->cset == mrn_charset_koi8r->cset)
{
GRN_CTX_SET_ENCODING(ctx, GRN_ENC_KOI8R);
DBUG_RETURN(true);
}
if (charset->cset == mrn_charset_binary->cset)
{
GRN_CTX_SET_ENCODING(ctx, GRN_ENC_NONE);
DBUG_RETURN(true);
}
if (charset->cset == mrn_charset_ascii->cset)
{
GRN_CTX_SET_ENCODING(ctx, GRN_ENC_UTF8);
DBUG_RETURN(true);
}
if (charset->cset == mrn_charset_sjis->cset)
{
GRN_CTX_SET_ENCODING(ctx, GRN_ENC_SJIS);
DBUG_RETURN(true);
}
if (charset->cset == mrn_charset_ujis->cset)
{
GRN_CTX_SET_ENCODING(ctx, GRN_ENC_EUC_JP);
DBUG_RETURN(true);
}
GRN_CTX_SET_ENCODING(ctx, GRN_ENC_NONE);
DBUG_RETURN(false);
}
}
}