1 /* -*- c-basic-offset: 2 -*- */
2 /*
3   Copyright(C) 2013  Kouhei Sutou <kou@clear-code.com>
4 
5   This library is free software; you can redistribute it and/or
6   modify it under the terms of the GNU Lesser General Public
7   License as published by the Free Software Foundation; either
8   version 2.1 of the License, or (at your option) any later version.
9 
10   This library is distributed in the hope that it will be useful,
11   but WITHOUT ANY WARRANTY; without even the implied warranty of
12   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13   Lesser General Public License for more details.
14 
15   You should have received a copy of the GNU Lesser General Public
16   License along with this library; if not, write to the Free Software
17   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1335  USA
18 */
19 
20 #include "mrn_field_normalizer.hpp"
21 #include "mrn_encoding.hpp"
22 
23 // for debug
24 #define MRN_CLASS_NAME "mrn::FieldNormalizer"
25 
26 namespace mrn {
FieldNormalizer(grn_ctx * ctx,THD * thread,Field * field)27   FieldNormalizer::FieldNormalizer(grn_ctx *ctx, THD *thread, Field *field)
28     : ctx_(ctx),
29       thread_(thread),
30       field_(field) {
31   }
32 
~FieldNormalizer()33   FieldNormalizer::~FieldNormalizer() {
34   }
35 
should_normalize()36   bool FieldNormalizer::should_normalize() {
37     MRN_DBUG_ENTER_METHOD();
38 
39     DBUG_PRINT("info",
40                ("mroonga: result_type = %u", field_->result_type()));
41     DBUG_PRINT("info",
42                ("mroonga: charset->name = %s", field_->charset()->name));
43     DBUG_PRINT("info",
44                ("mroonga: charset->csname = %s", field_->charset()->csname));
45     DBUG_PRINT("info",
46                ("mroonga: charset->state = %u", field_->charset()->state));
47     bool need_normalize_p;
48     if (field_->charset()->state & (MY_CS_BINSORT | MY_CS_CSSORT)) {
49       need_normalize_p = false;
50       DBUG_PRINT("info",
51                  ("mroonga: should_normalize: false: sort is required"));
52     } else {
53       if (is_text_type()) {
54         need_normalize_p = true;
55         DBUG_PRINT("info", ("mroonga: should_normalize: true: text type"));
56       } else {
57         need_normalize_p = false;
58         DBUG_PRINT("info", ("mroonga: should_normalize: false: no text type"));
59       }
60     }
61 
62     DBUG_RETURN(need_normalize_p);
63   }
64 
is_text_type()65   bool FieldNormalizer::is_text_type() {
66     MRN_DBUG_ENTER_METHOD();
67     bool text_type_p;
68     switch (field_->type()) {
69     case MYSQL_TYPE_VARCHAR:
70     case MYSQL_TYPE_BLOB:
71     case MYSQL_TYPE_VAR_STRING:
72       text_type_p = true;
73       break;
74     case MYSQL_TYPE_STRING:
75       switch (field_->real_type()) {
76       case MYSQL_TYPE_ENUM:
77       case MYSQL_TYPE_SET:
78         text_type_p = false;
79         break;
80       default:
81         text_type_p = true;
82         break;
83       }
84       break;
85     default:
86       text_type_p = false;
87       break;
88     }
89     DBUG_RETURN(text_type_p);
90   }
91 
normalize(const char * string,unsigned int string_length)92   grn_obj *FieldNormalizer::normalize(const char *string,
93                                       unsigned int string_length) {
94     MRN_DBUG_ENTER_METHOD();
95     grn_obj *normalizer = find_grn_normalizer();
96     int flags = 0;
97     grn_encoding original_encoding = GRN_CTX_GET_ENCODING(ctx_);
98     encoding::set_raw(ctx_, field_->charset());
99     grn_obj *grn_string = grn_string_open(ctx_, string, string_length,
100                                           normalizer, flags);
101     GRN_CTX_SET_ENCODING(ctx_, original_encoding);
102     DBUG_RETURN(grn_string);
103   }
104 
find_grn_normalizer()105   grn_obj *FieldNormalizer::find_grn_normalizer() {
106     MRN_DBUG_ENTER_METHOD();
107 
108     const CHARSET_INFO *charset_info = field_->charset();
109     const char *normalizer_name = NULL;
110     const char *default_normalizer_name = "NormalizerAuto";
111     if ((strcmp(charset_info->name, "utf8_general_ci") == 0) ||
112         (strcmp(charset_info->name, "utf8mb4_general_ci") == 0)) {
113       normalizer_name = "NormalizerMySQLGeneralCI";
114     } else if ((strcmp(charset_info->name, "utf8_unicode_ci") == 0) ||
115                (strcmp(charset_info->name, "utf8mb4_unicode_ci") == 0)) {
116       normalizer_name = "NormalizerMySQLUnicodeCI";
117     } else if ((strcmp(charset_info->name, "utf8_unicode_520_ci") == 0) ||
118                (strcmp(charset_info->name, "utf8mb4_unicode_520_ci") == 0)) {
119       normalizer_name = "NormalizerMySQLUnicode520CI";
120     }
121 
122     grn_obj *normalizer = NULL;
123     if (normalizer_name) {
124       normalizer = grn_ctx_get(ctx_, normalizer_name, -1);
125       if (!normalizer) {
126         char error_message[MRN_MESSAGE_BUFFER_SIZE];
127         snprintf(error_message, MRN_MESSAGE_BUFFER_SIZE,
128                  "%s normalizer isn't found for %s. "
129                  "Install groonga-normalizer-mysql normalizer. "
130                  "%s is used as fallback.",
131                  normalizer_name,
132                  charset_info->name,
133                  default_normalizer_name);
134         push_warning(thread_, MRN_SEVERITY_WARNING,
135                      HA_ERR_UNSUPPORTED, error_message);
136       }
137     }
138 
139     if (!normalizer) {
140       normalizer = grn_ctx_get(ctx_, default_normalizer_name, -1);
141     }
142 
143     DBUG_RETURN(normalizer);
144   }
145 }
146