mirror of
https://github.com/MariaDB/server.git
synced 2025-01-15 19:42:28 +01:00
144 lines
4.9 KiB
C++
144 lines
4.9 KiB
C++
/* -*- c-basic-offset: 2 -*- */
|
|
/*
|
|
Copyright(C) 2013 Kouhei Sutou <kou@clear-code.com>
|
|
|
|
This library is free software; you can redistribute it and/or
|
|
modify it under the terms of the GNU Lesser General Public
|
|
License as published by the Free Software Foundation; either
|
|
version 2.1 of the License, or (at your option) any later version.
|
|
|
|
This library is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
Lesser General Public License for more details.
|
|
|
|
You should have received a copy of the GNU Lesser General Public
|
|
License along with this library; if not, write to the Free Software
|
|
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
|
|
*/
|
|
|
|
#include "mrn_field_normalizer.hpp"
|
|
#include "mrn_encoding.hpp"
|
|
|
|
// for debug
|
|
#define MRN_CLASS_NAME "mrn::FieldNormalizer"
|
|
|
|
namespace mrn {
|
|
FieldNormalizer::FieldNormalizer(grn_ctx *ctx, THD *thread, Field *field)
|
|
: ctx_(ctx),
|
|
thread_(thread),
|
|
field_(field) {
|
|
}
|
|
|
|
FieldNormalizer::~FieldNormalizer() = default;
|
|
|
|
bool FieldNormalizer::should_normalize() {
|
|
MRN_DBUG_ENTER_METHOD();
|
|
|
|
DBUG_PRINT("info",
|
|
("mroonga: result_type = %u", field_->result_type()));
|
|
DBUG_PRINT("info",
|
|
("mroonga: charset->name = %s", field_->charset()->coll_name.str));
|
|
DBUG_PRINT("info",
|
|
("mroonga: charset->csname = %s", field_->charset()->cs_name.str));
|
|
DBUG_PRINT("info",
|
|
("mroonga: charset->state = %u", field_->charset()->state));
|
|
bool need_normalize_p;
|
|
if (field_->charset()->state & (MY_CS_BINSORT | MY_CS_CSSORT)) {
|
|
need_normalize_p = false;
|
|
DBUG_PRINT("info",
|
|
("mroonga: should_normalize: false: sort is required"));
|
|
} else {
|
|
if (is_text_type()) {
|
|
need_normalize_p = true;
|
|
DBUG_PRINT("info", ("mroonga: should_normalize: true: text type"));
|
|
} else {
|
|
need_normalize_p = false;
|
|
DBUG_PRINT("info", ("mroonga: should_normalize: false: no text type"));
|
|
}
|
|
}
|
|
|
|
DBUG_RETURN(need_normalize_p);
|
|
}
|
|
|
|
bool FieldNormalizer::is_text_type() {
|
|
MRN_DBUG_ENTER_METHOD();
|
|
bool text_type_p;
|
|
switch (field_->type()) {
|
|
case MYSQL_TYPE_VARCHAR:
|
|
case MYSQL_TYPE_BLOB:
|
|
case MYSQL_TYPE_VAR_STRING:
|
|
text_type_p = true;
|
|
break;
|
|
case MYSQL_TYPE_STRING:
|
|
switch (field_->real_type()) {
|
|
case MYSQL_TYPE_ENUM:
|
|
case MYSQL_TYPE_SET:
|
|
text_type_p = false;
|
|
break;
|
|
default:
|
|
text_type_p = true;
|
|
break;
|
|
}
|
|
break;
|
|
default:
|
|
text_type_p = false;
|
|
break;
|
|
}
|
|
DBUG_RETURN(text_type_p);
|
|
}
|
|
|
|
grn_obj *FieldNormalizer::normalize(const char *string,
|
|
unsigned int string_length) {
|
|
MRN_DBUG_ENTER_METHOD();
|
|
grn_obj *normalizer = find_grn_normalizer();
|
|
int flags = 0;
|
|
grn_encoding original_encoding = GRN_CTX_GET_ENCODING(ctx_);
|
|
encoding::set_raw(ctx_, field_->charset());
|
|
grn_obj *grn_string = grn_string_open(ctx_, string, string_length,
|
|
normalizer, flags);
|
|
GRN_CTX_SET_ENCODING(ctx_, original_encoding);
|
|
DBUG_RETURN(grn_string);
|
|
}
|
|
|
|
grn_obj *FieldNormalizer::find_grn_normalizer() {
|
|
MRN_DBUG_ENTER_METHOD();
|
|
|
|
const CHARSET_INFO *charset_info = field_->charset();
|
|
const char *normalizer_name = NULL;
|
|
const char *default_normalizer_name = "NormalizerAuto";
|
|
if ((strcmp(charset_info->coll_name.str, "utf8mb3_general_ci") == 0) ||
|
|
(strcmp(charset_info->coll_name.str, "utf8mb4_general_ci") == 0)) {
|
|
normalizer_name = "NormalizerMySQLGeneralCI";
|
|
} else if ((strcmp(charset_info->coll_name.str, "utf8mb3_unicode_ci") == 0) ||
|
|
(strcmp(charset_info->coll_name.str, "utf8mb4_unicode_ci") == 0)) {
|
|
normalizer_name = "NormalizerMySQLUnicodeCI";
|
|
} else if ((strcmp(charset_info->coll_name.str, "utf8mb3_unicode_520_ci") == 0) ||
|
|
(strcmp(charset_info->coll_name.str, "utf8mb4_unicode_520_ci") == 0)) {
|
|
normalizer_name = "NormalizerMySQLUnicode520CI";
|
|
}
|
|
|
|
grn_obj *normalizer = NULL;
|
|
if (normalizer_name) {
|
|
normalizer = grn_ctx_get(ctx_, normalizer_name, -1);
|
|
if (!normalizer) {
|
|
char error_message[MRN_MESSAGE_BUFFER_SIZE];
|
|
snprintf(error_message, MRN_MESSAGE_BUFFER_SIZE,
|
|
"%s normalizer isn't found for %s. "
|
|
"Install groonga-normalizer-mysql normalizer. "
|
|
"%s is used as fallback.",
|
|
normalizer_name,
|
|
charset_info->coll_name.str,
|
|
default_normalizer_name);
|
|
push_warning(thread_, MRN_SEVERITY_WARNING,
|
|
HA_ERR_UNSUPPORTED, error_message);
|
|
}
|
|
}
|
|
|
|
if (!normalizer) {
|
|
normalizer = grn_ctx_get(ctx_, default_normalizer_name, -1);
|
|
}
|
|
|
|
DBUG_RETURN(normalizer);
|
|
}
|
|
}
|