mirror of
				https://github.com/MariaDB/server.git
				synced 2025-11-04 04:46:15 +01:00 
			
		
		
		
	
		
			
				
	
	
		
			144 lines
		
	
	
	
		
			4.9 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			144 lines
		
	
	
	
		
			4.9 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
/* -*- c-basic-offset: 2 -*- */
 | 
						|
/*
 | 
						|
  Copyright(C) 2013  Kouhei Sutou <kou@clear-code.com>
 | 
						|
 | 
						|
  This library is free software; you can redistribute it and/or
 | 
						|
  modify it under the terms of the GNU Lesser General Public
 | 
						|
  License as published by the Free Software Foundation; either
 | 
						|
  version 2.1 of the License, or (at your option) any later version.
 | 
						|
 | 
						|
  This library is distributed in the hope that it will be useful,
 | 
						|
  but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
						|
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 | 
						|
  Lesser General Public License for more details.
 | 
						|
 | 
						|
  You should have received a copy of the GNU Lesser General Public
 | 
						|
  License along with this library; if not, write to the Free Software
 | 
						|
  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1335  USA
 | 
						|
*/
 | 
						|
 | 
						|
#include "mrn_field_normalizer.hpp"
 | 
						|
#include "mrn_encoding.hpp"
 | 
						|
 | 
						|
// for debug
 | 
						|
#define MRN_CLASS_NAME "mrn::FieldNormalizer"
 | 
						|
 | 
						|
namespace mrn {
 | 
						|
  FieldNormalizer::FieldNormalizer(grn_ctx *ctx, THD *thread, Field *field)
 | 
						|
    : ctx_(ctx),
 | 
						|
      thread_(thread),
 | 
						|
      field_(field) {
 | 
						|
  }
 | 
						|
 | 
						|
  FieldNormalizer::~FieldNormalizer() = default;
 | 
						|
 | 
						|
  bool FieldNormalizer::should_normalize() {
 | 
						|
    MRN_DBUG_ENTER_METHOD();
 | 
						|
 | 
						|
    DBUG_PRINT("info",
 | 
						|
               ("mroonga: result_type = %u", field_->result_type()));
 | 
						|
    DBUG_PRINT("info",
 | 
						|
               ("mroonga: charset->name = %s", field_->charset()->coll_name.str));
 | 
						|
    DBUG_PRINT("info",
 | 
						|
               ("mroonga: charset->csname = %s", field_->charset()->cs_name.str));
 | 
						|
    DBUG_PRINT("info",
 | 
						|
               ("mroonga: charset->state = %u", field_->charset()->state));
 | 
						|
    bool need_normalize_p;
 | 
						|
    if (field_->charset()->state & (MY_CS_BINSORT | MY_CS_CSSORT)) {
 | 
						|
      need_normalize_p = false;
 | 
						|
      DBUG_PRINT("info",
 | 
						|
                 ("mroonga: should_normalize: false: sort is required"));
 | 
						|
    } else {
 | 
						|
      if (is_text_type()) {
 | 
						|
        need_normalize_p = true;
 | 
						|
        DBUG_PRINT("info", ("mroonga: should_normalize: true: text type"));
 | 
						|
      } else {
 | 
						|
        need_normalize_p = false;
 | 
						|
        DBUG_PRINT("info", ("mroonga: should_normalize: false: no text type"));
 | 
						|
      }
 | 
						|
    }
 | 
						|
 | 
						|
    DBUG_RETURN(need_normalize_p);
 | 
						|
  }
 | 
						|
 | 
						|
  bool FieldNormalizer::is_text_type() {
 | 
						|
    MRN_DBUG_ENTER_METHOD();
 | 
						|
    bool text_type_p;
 | 
						|
    switch (field_->type()) {
 | 
						|
    case MYSQL_TYPE_VARCHAR:
 | 
						|
    case MYSQL_TYPE_BLOB:
 | 
						|
    case MYSQL_TYPE_VAR_STRING:
 | 
						|
      text_type_p = true;
 | 
						|
      break;
 | 
						|
    case MYSQL_TYPE_STRING:
 | 
						|
      switch (field_->real_type()) {
 | 
						|
      case MYSQL_TYPE_ENUM:
 | 
						|
      case MYSQL_TYPE_SET:
 | 
						|
        text_type_p = false;
 | 
						|
        break;
 | 
						|
      default:
 | 
						|
        text_type_p = true;
 | 
						|
        break;
 | 
						|
      }
 | 
						|
      break;
 | 
						|
    default:
 | 
						|
      text_type_p = false;
 | 
						|
      break;
 | 
						|
    }
 | 
						|
    DBUG_RETURN(text_type_p);
 | 
						|
  }
 | 
						|
 | 
						|
  grn_obj *FieldNormalizer::normalize(const char *string,
 | 
						|
                                      unsigned int string_length) {
 | 
						|
    MRN_DBUG_ENTER_METHOD();
 | 
						|
    grn_obj *normalizer = find_grn_normalizer();
 | 
						|
    int flags = 0;
 | 
						|
    grn_encoding original_encoding = GRN_CTX_GET_ENCODING(ctx_);
 | 
						|
    encoding::set_raw(ctx_, field_->charset());
 | 
						|
    grn_obj *grn_string = grn_string_open(ctx_, string, string_length,
 | 
						|
                                          normalizer, flags);
 | 
						|
    GRN_CTX_SET_ENCODING(ctx_, original_encoding);
 | 
						|
    DBUG_RETURN(grn_string);
 | 
						|
  }
 | 
						|
 | 
						|
  grn_obj *FieldNormalizer::find_grn_normalizer() {
 | 
						|
    MRN_DBUG_ENTER_METHOD();
 | 
						|
 | 
						|
    const CHARSET_INFO *charset_info = field_->charset();
 | 
						|
    const char *normalizer_name = NULL;
 | 
						|
    const char *default_normalizer_name = "NormalizerAuto";
 | 
						|
    if ((strcmp(charset_info->coll_name.str, "utf8mb3_general_ci") == 0) ||
 | 
						|
        (strcmp(charset_info->coll_name.str, "utf8mb4_general_ci") == 0)) {
 | 
						|
      normalizer_name = "NormalizerMySQLGeneralCI";
 | 
						|
    } else if ((strcmp(charset_info->coll_name.str, "utf8mb3_unicode_ci") == 0) ||
 | 
						|
               (strcmp(charset_info->coll_name.str, "utf8mb4_unicode_ci") == 0)) {
 | 
						|
      normalizer_name = "NormalizerMySQLUnicodeCI";
 | 
						|
    } else if ((strcmp(charset_info->coll_name.str, "utf8mb3_unicode_520_ci") == 0) ||
 | 
						|
               (strcmp(charset_info->coll_name.str, "utf8mb4_unicode_520_ci") == 0)) {
 | 
						|
      normalizer_name = "NormalizerMySQLUnicode520CI";
 | 
						|
    }
 | 
						|
 | 
						|
    grn_obj *normalizer = NULL;
 | 
						|
    if (normalizer_name) {
 | 
						|
      normalizer = grn_ctx_get(ctx_, normalizer_name, -1);
 | 
						|
      if (!normalizer) {
 | 
						|
        char error_message[MRN_MESSAGE_BUFFER_SIZE];
 | 
						|
        snprintf(error_message, MRN_MESSAGE_BUFFER_SIZE,
 | 
						|
                 "%s normalizer isn't found for %s. "
 | 
						|
                 "Install groonga-normalizer-mysql normalizer. "
 | 
						|
                 "%s is used as fallback.",
 | 
						|
                 normalizer_name,
 | 
						|
                 charset_info->coll_name.str,
 | 
						|
                 default_normalizer_name);
 | 
						|
        push_warning(thread_, MRN_SEVERITY_WARNING,
 | 
						|
                     HA_ERR_UNSUPPORTED, error_message);
 | 
						|
      }
 | 
						|
    }
 | 
						|
 | 
						|
    if (!normalizer) {
 | 
						|
      normalizer = grn_ctx_get(ctx_, default_normalizer_name, -1);
 | 
						|
    }
 | 
						|
 | 
						|
    DBUG_RETURN(normalizer);
 | 
						|
  }
 | 
						|
}
 |