1 /* -*- c-basic-offset: 2 -*- */ 2 /* 3 Copyright(C) 2013 Kouhei Sutou <kou@clear-code.com> 4 5 This library is free software; you can redistribute it and/or 6 modify it under the terms of the GNU Lesser General Public 7 License as published by the Free Software Foundation; either 8 version 2.1 of the License, or (at your option) any later version. 9 10 This library is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 Lesser General Public License for more details. 14 15 You should have received a copy of the GNU Lesser General Public 16 License along with this library; if not, write to the Free Software 17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA 18 */ 19 20 #include "mrn_field_normalizer.hpp" 21 #include "mrn_encoding.hpp" 22 23 // for debug 24 #define MRN_CLASS_NAME "mrn::FieldNormalizer" 25 26 namespace mrn { FieldNormalizer(grn_ctx * ctx,THD * thread,Field * field)27 FieldNormalizer::FieldNormalizer(grn_ctx *ctx, THD *thread, Field *field) 28 : ctx_(ctx), 29 thread_(thread), 30 field_(field) { 31 } 32 ~FieldNormalizer()33 FieldNormalizer::~FieldNormalizer() { 34 } 35 should_normalize()36 bool FieldNormalizer::should_normalize() { 37 MRN_DBUG_ENTER_METHOD(); 38 39 DBUG_PRINT("info", 40 ("mroonga: result_type = %u", field_->result_type())); 41 DBUG_PRINT("info", 42 ("mroonga: charset->name = %s", field_->charset()->name)); 43 DBUG_PRINT("info", 44 ("mroonga: charset->csname = %s", field_->charset()->csname)); 45 DBUG_PRINT("info", 46 ("mroonga: charset->state = %u", field_->charset()->state)); 47 bool need_normalize_p; 48 if (field_->charset()->state & (MY_CS_BINSORT | MY_CS_CSSORT)) { 49 need_normalize_p = false; 50 DBUG_PRINT("info", 51 ("mroonga: should_normalize: false: sort is required")); 52 } else { 53 if (is_text_type()) { 54 need_normalize_p = true; 55 DBUG_PRINT("info", ("mroonga: should_normalize: true: text type")); 56 } else { 57 need_normalize_p = false; 58 DBUG_PRINT("info", ("mroonga: should_normalize: false: no text type")); 59 } 60 } 61 62 DBUG_RETURN(need_normalize_p); 63 } 64 is_text_type()65 bool FieldNormalizer::is_text_type() { 66 MRN_DBUG_ENTER_METHOD(); 67 bool text_type_p; 68 switch (field_->type()) { 69 case MYSQL_TYPE_VARCHAR: 70 case MYSQL_TYPE_BLOB: 71 case MYSQL_TYPE_VAR_STRING: 72 text_type_p = true; 73 break; 74 case MYSQL_TYPE_STRING: 75 switch (field_->real_type()) { 76 case MYSQL_TYPE_ENUM: 77 case MYSQL_TYPE_SET: 78 text_type_p = false; 79 break; 80 default: 81 text_type_p = true; 82 break; 83 } 84 break; 85 default: 86 text_type_p = false; 87 break; 88 } 89 DBUG_RETURN(text_type_p); 90 } 91 normalize(const char * string,unsigned int string_length)92 grn_obj *FieldNormalizer::normalize(const char *string, 93 unsigned int string_length) { 94 MRN_DBUG_ENTER_METHOD(); 95 grn_obj *normalizer = find_grn_normalizer(); 96 int flags = 0; 97 grn_encoding original_encoding = GRN_CTX_GET_ENCODING(ctx_); 98 encoding::set_raw(ctx_, field_->charset()); 99 grn_obj *grn_string = grn_string_open(ctx_, string, string_length, 100 normalizer, flags); 101 GRN_CTX_SET_ENCODING(ctx_, original_encoding); 102 DBUG_RETURN(grn_string); 103 } 104 find_grn_normalizer()105 grn_obj *FieldNormalizer::find_grn_normalizer() { 106 MRN_DBUG_ENTER_METHOD(); 107 108 const CHARSET_INFO *charset_info = field_->charset(); 109 const char *normalizer_name = NULL; 110 const char *default_normalizer_name = "NormalizerAuto"; 111 if ((strcmp(charset_info->name, "utf8_general_ci") == 0) || 112 (strcmp(charset_info->name, "utf8mb4_general_ci") == 0)) { 113 normalizer_name = "NormalizerMySQLGeneralCI"; 114 } else if ((strcmp(charset_info->name, "utf8_unicode_ci") == 0) || 115 (strcmp(charset_info->name, "utf8mb4_unicode_ci") == 0)) { 116 normalizer_name = "NormalizerMySQLUnicodeCI"; 117 } else if ((strcmp(charset_info->name, "utf8_unicode_520_ci") == 0) || 118 (strcmp(charset_info->name, "utf8mb4_unicode_520_ci") == 0)) { 119 normalizer_name = "NormalizerMySQLUnicode520CI"; 120 } 121 122 grn_obj *normalizer = NULL; 123 if (normalizer_name) { 124 normalizer = grn_ctx_get(ctx_, normalizer_name, -1); 125 if (!normalizer) { 126 char error_message[MRN_MESSAGE_BUFFER_SIZE]; 127 snprintf(error_message, MRN_MESSAGE_BUFFER_SIZE, 128 "%s normalizer isn't found for %s. " 129 "Install groonga-normalizer-mysql normalizer. " 130 "%s is used as fallback.", 131 normalizer_name, 132 charset_info->name, 133 default_normalizer_name); 134 push_warning(thread_, MRN_SEVERITY_WARNING, 135 HA_ERR_UNSUPPORTED, error_message); 136 } 137 } 138 139 if (!normalizer) { 140 normalizer = grn_ctx_get(ctx_, default_normalizer_name, -1); 141 } 142 143 DBUG_RETURN(normalizer); 144 } 145 } 146