mirror of
				https://github.com/MariaDB/server.git
				synced 2025-10-25 00:48:31 +02:00 
			
		
		
		
	
		
			
				
	
	
		
			144 lines
		
	
	
	
		
			4.9 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			144 lines
		
	
	
	
		
			4.9 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
| /* -*- c-basic-offset: 2 -*- */
 | |
| /*
 | |
|   Copyright(C) 2013  Kouhei Sutou <kou@clear-code.com>
 | |
| 
 | |
|   This library is free software; you can redistribute it and/or
 | |
|   modify it under the terms of the GNU Lesser General Public
 | |
|   License as published by the Free Software Foundation; either
 | |
|   version 2.1 of the License, or (at your option) any later version.
 | |
| 
 | |
|   This library is distributed in the hope that it will be useful,
 | |
|   but WITHOUT ANY WARRANTY; without even the implied warranty of
 | |
|   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 | |
|   Lesser General Public License for more details.
 | |
| 
 | |
|   You should have received a copy of the GNU Lesser General Public
 | |
|   License along with this library; if not, write to the Free Software
 | |
|   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1335  USA
 | |
| */
 | |
| 
 | |
| #include "mrn_field_normalizer.hpp"
 | |
| #include "mrn_encoding.hpp"
 | |
| 
 | |
| // for debug
 | |
| #define MRN_CLASS_NAME "mrn::FieldNormalizer"
 | |
| 
 | |
| namespace mrn {
 | |
|   FieldNormalizer::FieldNormalizer(grn_ctx *ctx, THD *thread, Field *field)
 | |
|     : ctx_(ctx),
 | |
|       thread_(thread),
 | |
|       field_(field) {
 | |
|   }
 | |
| 
 | |
|   FieldNormalizer::~FieldNormalizer() = default;
 | |
| 
 | |
|   bool FieldNormalizer::should_normalize() {
 | |
|     MRN_DBUG_ENTER_METHOD();
 | |
| 
 | |
|     DBUG_PRINT("info",
 | |
|                ("mroonga: result_type = %u", field_->result_type()));
 | |
|     DBUG_PRINT("info",
 | |
|                ("mroonga: charset->name = %s", field_->charset()->coll_name.str));
 | |
|     DBUG_PRINT("info",
 | |
|                ("mroonga: charset->csname = %s", field_->charset()->cs_name.str));
 | |
|     DBUG_PRINT("info",
 | |
|                ("mroonga: charset->state = %u", field_->charset()->state));
 | |
|     bool need_normalize_p;
 | |
|     if (field_->charset()->state & (MY_CS_BINSORT | MY_CS_CSSORT)) {
 | |
|       need_normalize_p = false;
 | |
|       DBUG_PRINT("info",
 | |
|                  ("mroonga: should_normalize: false: sort is required"));
 | |
|     } else {
 | |
|       if (is_text_type()) {
 | |
|         need_normalize_p = true;
 | |
|         DBUG_PRINT("info", ("mroonga: should_normalize: true: text type"));
 | |
|       } else {
 | |
|         need_normalize_p = false;
 | |
|         DBUG_PRINT("info", ("mroonga: should_normalize: false: no text type"));
 | |
|       }
 | |
|     }
 | |
| 
 | |
|     DBUG_RETURN(need_normalize_p);
 | |
|   }
 | |
| 
 | |
|   bool FieldNormalizer::is_text_type() {
 | |
|     MRN_DBUG_ENTER_METHOD();
 | |
|     bool text_type_p;
 | |
|     switch (field_->type()) {
 | |
|     case MYSQL_TYPE_VARCHAR:
 | |
|     case MYSQL_TYPE_BLOB:
 | |
|     case MYSQL_TYPE_VAR_STRING:
 | |
|       text_type_p = true;
 | |
|       break;
 | |
|     case MYSQL_TYPE_STRING:
 | |
|       switch (field_->real_type()) {
 | |
|       case MYSQL_TYPE_ENUM:
 | |
|       case MYSQL_TYPE_SET:
 | |
|         text_type_p = false;
 | |
|         break;
 | |
|       default:
 | |
|         text_type_p = true;
 | |
|         break;
 | |
|       }
 | |
|       break;
 | |
|     default:
 | |
|       text_type_p = false;
 | |
|       break;
 | |
|     }
 | |
|     DBUG_RETURN(text_type_p);
 | |
|   }
 | |
| 
 | |
|   grn_obj *FieldNormalizer::normalize(const char *string,
 | |
|                                       unsigned int string_length) {
 | |
|     MRN_DBUG_ENTER_METHOD();
 | |
|     grn_obj *normalizer = find_grn_normalizer();
 | |
|     int flags = 0;
 | |
|     grn_encoding original_encoding = GRN_CTX_GET_ENCODING(ctx_);
 | |
|     encoding::set_raw(ctx_, field_->charset());
 | |
|     grn_obj *grn_string = grn_string_open(ctx_, string, string_length,
 | |
|                                           normalizer, flags);
 | |
|     GRN_CTX_SET_ENCODING(ctx_, original_encoding);
 | |
|     DBUG_RETURN(grn_string);
 | |
|   }
 | |
| 
 | |
|   grn_obj *FieldNormalizer::find_grn_normalizer() {
 | |
|     MRN_DBUG_ENTER_METHOD();
 | |
| 
 | |
|     const CHARSET_INFO *charset_info = field_->charset();
 | |
|     const char *normalizer_name = NULL;
 | |
|     const char *default_normalizer_name = "NormalizerAuto";
 | |
|     if ((strcmp(charset_info->coll_name.str, "utf8mb3_general_ci") == 0) ||
 | |
|         (strcmp(charset_info->coll_name.str, "utf8mb4_general_ci") == 0)) {
 | |
|       normalizer_name = "NormalizerMySQLGeneralCI";
 | |
|     } else if ((strcmp(charset_info->coll_name.str, "utf8mb3_unicode_ci") == 0) ||
 | |
|                (strcmp(charset_info->coll_name.str, "utf8mb4_unicode_ci") == 0)) {
 | |
|       normalizer_name = "NormalizerMySQLUnicodeCI";
 | |
|     } else if ((strcmp(charset_info->coll_name.str, "utf8mb3_unicode_520_ci") == 0) ||
 | |
|                (strcmp(charset_info->coll_name.str, "utf8mb4_unicode_520_ci") == 0)) {
 | |
|       normalizer_name = "NormalizerMySQLUnicode520CI";
 | |
|     }
 | |
| 
 | |
|     grn_obj *normalizer = NULL;
 | |
|     if (normalizer_name) {
 | |
|       normalizer = grn_ctx_get(ctx_, normalizer_name, -1);
 | |
|       if (!normalizer) {
 | |
|         char error_message[MRN_MESSAGE_BUFFER_SIZE];
 | |
|         snprintf(error_message, MRN_MESSAGE_BUFFER_SIZE,
 | |
|                  "%s normalizer isn't found for %s. "
 | |
|                  "Install groonga-normalizer-mysql normalizer. "
 | |
|                  "%s is used as fallback.",
 | |
|                  normalizer_name,
 | |
|                  charset_info->coll_name.str,
 | |
|                  default_normalizer_name);
 | |
|         push_warning(thread_, MRN_SEVERITY_WARNING,
 | |
|                      HA_ERR_UNSUPPORTED, error_message);
 | |
|       }
 | |
|     }
 | |
| 
 | |
|     if (!normalizer) {
 | |
|       normalizer = grn_ctx_get(ctx_, default_normalizer_name, -1);
 | |
|     }
 | |
| 
 | |
|     DBUG_RETURN(normalizer);
 | |
|   }
 | |
| }
 | 
