1 /*
2  * Copyright (c) 2018, 2019, Oracle and/or its affiliates. All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License, version 2.0,
6  * as published by the Free Software Foundation.
7  *
8  * This program is also distributed with certain software (including
9  * but not limited to OpenSSL) that is licensed under separate terms,
10  * as designated in a particular file or component or in included license
11  * documentation.  The authors of MySQL hereby grant you an additional
12  * permission to link the program and your derivative works with the
13  * separately licensed software that they have included with MySQL.
14  *
15  * This program is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18  * GNU General Public License, version 2.0, for more details.
19  *
20  * You should have received a copy of the GNU General Public License
21  * along with this program; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA
23  */
24 
25 #include "plugin/x/src/index_field.h"
26 
27 #include <algorithm>
28 #include <cstring>
29 #include <limits>
30 
31 #include "plugin/x/src/helper/generate_hash.h"
32 #include "plugin/x/src/query_string_builder.h"
33 #include "plugin/x/src/xpl_error.h"
34 #include "plugin/x/src/xpl_regex.h"
35 #include "plugin/x/src/xpl_resultset.h"
36 
37 namespace xpl {
38 
39 namespace {
40 
get_prefix(const char * const prefix,const int32_t precision,const int32_t scale,const bool is_unsigned,const bool is_required)41 std::string get_prefix(const char *const prefix, const int32_t precision,
42                        const int32_t scale, const bool is_unsigned,
43                        const bool is_required) {
44   // type
45   std::string result{"$ix_"};
46   result += prefix;
47   if (precision > 0) result += std::to_string(precision);
48   if (scale > 0) result += "_" + std::to_string(scale);
49   result += "_";
50 
51   // additional traits (unsigned, required, ...)
52   std::string traits;
53   if (is_unsigned) traits += "u";
54   if (is_required) traits += "r";
55 
56   return traits.empty() ? result : result + traits + "_";
57 }
58 
docpath_hash(const std::string & path)59 std::string docpath_hash(const std::string &path) {
60   return generate_hash(path.size() > 2 ? path.substr(2)  // skip '$.'
61                                        : path);          // hash for '$'
62 }
63 
extract_type_details(const std::string & type_name,int32_t * precision,int32_t * scale,bool * is_unsigned)64 void extract_type_details(const std::string &type_name, int32_t *precision,
65                           int32_t *scale, bool *is_unsigned) {
66   static const Regex re(
67       "\\w+(?:\\(([0-9]+)(?: *, *([0-9]+))?\\))?( +UNSIGNED)?.*");
68   Regex::Group_list groups;
69   if (!re.match_groups(type_name.c_str(), &groups, false) || groups.size() < 4)
70     return;
71   *precision = groups[1].empty() ? -1 : std::stoi(groups[1]);
72   *scale = groups[2].empty() ? -1 : std::stoi(groups[2]);
73   *is_unsigned = !groups[3].empty();
74 }
75 
get_virtual_column_name(const char * prefix,const std::string & type_name,const std::string & path,const bool is_required)76 std::string get_virtual_column_name(const char *prefix,
77                                     const std::string &type_name,
78                                     const std::string &path,
79                                     const bool is_required) {
80   bool is_unsigned{false};
81   int32_t precision{-1}, scale{-1};
82   extract_type_details(type_name, &precision, &scale, &is_unsigned);
83   return get_prefix(prefix, precision, scale, is_unsigned, is_required) +
84          docpath_hash(path);
85 }
86 
87 }  // namespace
88 
add_column_if_necessary(iface::Sql_session * sql_session,const std::string & schema,const std::string & collection,Query_string_builder * qb) const89 ngs::Error_code Index_field::add_column_if_necessary(
90     iface::Sql_session *sql_session, const std::string &schema,
91     const std::string &collection, Query_string_builder *qb) const {
92   ngs::Error_code error;
93   const bool is_field_exists =
94       is_column_exists(sql_session, schema, collection, &error);
95   if (error) return error;
96   if (!is_field_exists) {
97     add_column(qb);
98     qb->put(",");
99   }
100   return ngs::Success();
101 }
102 
is_column_exists(iface::Sql_session * sql_session,const std::string & schema_name,const std::string & table_name,ngs::Error_code * error) const103 bool Index_field::is_column_exists(iface::Sql_session *sql_session,
104                                    const std::string &schema_name,
105                                    const std::string &table_name,
106                                    ngs::Error_code *error) const {
107   Query_string_builder qb;
108   qb.put("SHOW COLUMNS FROM ")
109       .quote_identifier(schema_name)
110       .dot()
111       .quote_identifier(table_name)
112       .put(" WHERE Field = ")
113       .quote_string(m_name);
114 
115   Collect_resultset resultset;
116   *error = sql_session->execute(qb.get().data(), qb.get().length(), &resultset);
117   return resultset.get_row_list().size() > 0;
118 }
119 
add_column(Query_string_builder * qb) const120 void Index_field::add_column(Query_string_builder *qb) const {
121   qb->put(" ADD COLUMN ").quote_identifier(m_name).put(" ");
122   add_type(qb);
123   qb->put(" GENERATED ALWAYS AS (");
124   add_path(qb);
125   qb->put(") ");
126   add_options(qb);
127 }
128 
add_field(Query_string_builder * qb) const129 void Index_field::add_field(Query_string_builder *qb) const {
130   qb->quote_identifier(m_name);
131   add_length(qb);
132 }
133 
add_options(Query_string_builder * qb) const134 void Index_field::add_options(Query_string_builder *qb) const {
135   qb->put(m_is_virtual_allowed ? "VIRTUAL" : "STORED");
136   if (m_is_required) qb->put(" NOT NULL");
137 }
138 
get_type_id(const std::string & type_name)139 Index_field::Type_id Index_field::get_type_id(const std::string &type_name) {
140   static const std::array<const char *const, 22> VALID_TYPES{
141       {"TINYINT", "SMALLINT",  "MEDIUMINT", "INT",     "INTEGER", "BIGINT",
142        "REAL",    "FLOAT",     "DOUBLE",    "DECIMAL", "NUMERIC", "DATE",
143        "TIME",    "TIMESTAMP", "DATETIME",  "YEAR",    "BIT",     "BLOB",
144        "TEXT",    "GEOJSON",   "FULLTEXT",  "CHAR"}};
145   std::string name(type_name);
146   std::transform(name.begin(), name.end(), name.begin(), ::toupper);
147   auto i = std::find_if(VALID_TYPES.begin(), VALID_TYPES.end(),
148                         [&name](const char *const arg) {
149                           return std::strcmp(name.c_str(), arg) == 0;
150                         });
151   return i == VALID_TYPES.end()
152              ? Type_id::k_unsupported
153              : static_cast<Type_id>(std::distance(VALID_TYPES.begin(), i));
154 }
155 
156 ///////////////////////////////////
157 
158 class Index_numeric_field : public Index_field {
159  public:
Index_numeric_field(const char * const prefix,const std::string & type_name,const std::string & path,const bool is_required,const bool is_virtual_allowed)160   Index_numeric_field(const char *const prefix, const std::string &type_name,
161                       const std::string &path, const bool is_required,
162                       const bool is_virtual_allowed)
163       : Index_field(
164             path, is_required,
165             get_virtual_column_name(prefix, type_name, path, is_required),
166             is_virtual_allowed),
167         m_type_name(type_name) {}
168 
169  protected:
add_type(Query_string_builder * qb) const170   void add_type(Query_string_builder *qb) const override {
171     qb->put(m_type_name);
172   }
173 
add_path(Query_string_builder * qb) const174   void add_path(Query_string_builder *qb) const override {
175     qb->put("JSON_EXTRACT(doc, ").quote_string(m_path).put(")");
176   }
177 
178   const std::string m_type_name;
179 };
180 
181 class Index_string_field : public Index_field {
182  public:
Index_string_field(const char * const prefix,const std::string & type_name,const std::string & path,const bool is_required,const bool is_virtual_allowed)183   Index_string_field(const char *const prefix, const std::string &type_name,
184                      const std::string &path, const bool is_required,
185                      const bool is_virtual_allowed)
186       : Index_field(
187             path, is_required,
188             get_virtual_column_name(prefix, type_name, path, is_required),
189             is_virtual_allowed),
190         m_type_name(type_name) {}
191 
192  protected:
add_type(Query_string_builder * qb) const193   void add_type(Query_string_builder *qb) const override {
194     qb->put(m_type_name);
195   }
196 
add_path(Query_string_builder * qb) const197   void add_path(Query_string_builder *qb) const override {
198     qb->put("JSON_UNQUOTE(JSON_EXTRACT(doc, ").quote_string(m_path).put("))");
199   }
200 
201   const std::string m_type_name;
202 };
203 
204 class Index_binary_field : public Index_string_field {
205  public:
Index_binary_field(const char * const prefix,const std::string & type_name,const std::string & length,const std::string & path,const bool is_required,const bool is_virtual_allowed)206   Index_binary_field(const char *const prefix, const std::string &type_name,
207                      const std::string &length, const std::string &path,
208                      const bool is_required, const bool is_virtual_allowed)
209       : Index_string_field(prefix, type_name, path, is_required,
210                            is_virtual_allowed),
211         m_length(length) {}
212 
213  protected:
add_length(Query_string_builder * qb) const214   void add_length(Query_string_builder *qb) const override {
215     qb->put(m_length);
216   }
217   const std::string m_length;
218 };
219 
220 class Index_text_field : public Index_binary_field {
221  public:
Index_text_field(const char * const prefix,const std::string & type_name,const std::string & length,const std::string & path,const bool is_required,const bool is_virtual_allowed)222   Index_text_field(const char *const prefix, const std::string &type_name,
223                    const std::string &length, const std::string &path,
224                    const bool is_required, const bool is_virtual_allowed)
225       : Index_binary_field(prefix, type_name, length, path, is_required,
226                            is_virtual_allowed) {}
227 
228  protected:
add_type(Query_string_builder * qb) const229   void add_type(Query_string_builder *qb) const override {
230     std::string type_name(m_type_name);
231     auto pos = type_name.find(m_length);
232     type_name.erase(pos, m_length.size());
233     qb->put(type_name);
234   }
235 };
236 
237 class Index_geojson_field : public Index_field {
238  public:
Index_geojson_field(const int64_t options,const int64_t srid,const std::string & path,const bool is_required)239   Index_geojson_field(const int64_t options, const int64_t srid,
240                       const std::string &path, const bool is_required)
241       : Index_field(
242             path, is_required,
243             get_prefix("gj", -1, -1, false, is_required) + docpath_hash(path),
244             false),
245         m_options(options),
246         m_srid(srid) {}
247 
248  protected:
add_type(Query_string_builder * qb) const249   void add_type(Query_string_builder *qb) const override {
250     qb->put("GEOMETRY");
251   }
252 
add_path(Query_string_builder * qb) const253   void add_path(Query_string_builder *qb) const override {
254     qb->put("ST_GEOMFROMGEOJSON(JSON_EXTRACT(doc, ")
255         .quote_string(m_path)
256         .put("),")
257         .put(m_options)
258         .put(",")
259         .put(m_srid)
260         .put(")");
261   }
262 
add_options(Query_string_builder * qb) const263   void add_options(Query_string_builder *qb) const override {
264     Index_field::add_options(qb);
265     qb->put(" SRID ").put(m_srid);
266   }
267 
268   const int64_t m_options, m_srid;
269 };
270 
271 class Index_fulltext_field : public Index_field {
272  public:
Index_fulltext_field(const std::string & path,const bool is_required)273   Index_fulltext_field(const std::string &path, const bool is_required)
274       : Index_field(
275             path, is_required,
276             get_prefix("ft", -1, -1, false, is_required) + docpath_hash(path),
277             false) {}
278 
279  protected:
add_type(Query_string_builder * qb) const280   void add_type(Query_string_builder *qb) const override { qb->put("TEXT"); }
281 
add_path(Query_string_builder * qb) const282   void add_path(Query_string_builder *qb) const override {
283     qb->put("JSON_UNQUOTE(JSON_EXTRACT(doc, ").quote_string(m_path).put("))");
284   }
285 };
286 
287 /////////////////////////////////////////////
288 
289 namespace {
290 
is_valid(const uint64_t arg)291 inline bool is_valid(const uint64_t arg) {
292   return arg != std::numeric_limits<uint64_t>::max();
293 }
294 
295 }  // namespace
296 
create(const bool is_virtual_allowed,const Admin_command_index::Index_field_info & info,ngs::Error_code * error)297 const Index_field *Index_field::create(
298     const bool is_virtual_allowed,
299     const Admin_command_index::Index_field_info &info, ngs::Error_code *error) {
300   if (info.m_path.empty()) {
301     *error = ngs::Error(ER_X_CMD_ARGUMENT_VALUE,
302                         "Argument value for document member is invalid");
303     return nullptr;
304   }
305 
306   static const Regex re{
307       "(BIT)(?:\\([0-9]+\\))?|"
308       "(TINYINT|SMALLINT|MEDIUMINT|INT|INTEGER|BIGINT)"
309       "(?:\\([0-9]+\\))?(?: +UNSIGNED)?|"
310       "(DECIMAL|FLOAT|DOUBLE|REAL|NUMERIC)"
311       "(?:\\([0-9]+(?: *, *[0-9]+)?\\))?(?: +UNSIGNED)?|"
312       "(DATE)|(TIME|TIMESTAMP|DATETIME)(?:\\([0-6]\\))?|(YEAR)(?:\\(4\\))?|"
313       "(BLOB)(?:(\\([0-9]+\\)))?|"
314       "(CHAR|TEXT)(?:(\\([0-9]+\\)))?"
315       "(?: +(?:CHARACTER SET|CHARSET) +\\w+)?(?: +COLLATE +\\w+)?|"
316       "(GEOJSON|FULLTEXT)",
317   };
318 
319   Regex::Group_list re_groups;
320   if (!re.match_groups(info.m_type.c_str(), &re_groups)) {
321     *error = ngs::Error(ER_X_CMD_ARGUMENT_VALUE,
322                         "Invalid or unsupported type specification '%s'",
323                         info.m_type.c_str());
324     return nullptr;
325   }
326 
327   const std::string &type_name = re_groups[1];
328   const std::string &length =
329       re_groups.size() > 2 ? re_groups[2] : std::string();
330 
331   auto type_id = get_type_id(type_name);
332 
333   if (type_id != Type_id::k_geojson &&
334       (is_valid(info.m_options) || is_valid(info.m_srid))) {
335     *error = ngs::Error(ER_X_CMD_ARGUMENT_VALUE,
336                         "Unsupported argument specification for '%s'",
337                         info.m_path.c_str());
338     return nullptr;
339   }
340 
341   switch (type_id) {
342     case Type_id::k_tinyint:
343       return new Index_numeric_field("it", info.m_type, info.m_path,
344                                      info.m_is_required, is_virtual_allowed);
345 
346     case Type_id::k_smallint:
347       return new Index_numeric_field("is", info.m_type, info.m_path,
348                                      info.m_is_required, is_virtual_allowed);
349 
350     case Type_id::k_mediumint:
351       return new Index_numeric_field("im", info.m_type, info.m_path,
352                                      info.m_is_required, is_virtual_allowed);
353 
354     case Type_id::k_int:
355     case Type_id::k_integer:
356       return new Index_numeric_field("i", info.m_type, info.m_path,
357                                      info.m_is_required, is_virtual_allowed);
358 
359     case Type_id::k_bigint:
360       return new Index_numeric_field("ib", info.m_type, info.m_path,
361                                      info.m_is_required, is_virtual_allowed);
362 
363     case Type_id::k_real:
364       return new Index_numeric_field("fr", info.m_type, info.m_path,
365                                      info.m_is_required, is_virtual_allowed);
366 
367     case Type_id::k_float:
368       return new Index_numeric_field("f", info.m_type, info.m_path,
369                                      info.m_is_required, is_virtual_allowed);
370 
371     case Type_id::k_double:
372       return new Index_numeric_field("fd", info.m_type, info.m_path,
373                                      info.m_is_required, is_virtual_allowed);
374 
375     case Type_id::k_decimal:
376       return new Index_numeric_field("xd", info.m_type, info.m_path,
377                                      info.m_is_required, is_virtual_allowed);
378 
379     case Type_id::k_numeric:
380       return new Index_numeric_field("xn", info.m_type, info.m_path,
381                                      info.m_is_required, is_virtual_allowed);
382 
383     case Type_id::k_date:
384       return new Index_string_field("d", info.m_type, info.m_path,
385                                     info.m_is_required, is_virtual_allowed);
386 
387     case Type_id::k_time:
388       return new Index_string_field("dt", info.m_type, info.m_path,
389                                     info.m_is_required, is_virtual_allowed);
390 
391     case Type_id::k_timestamp:
392       return new Index_string_field("ds", info.m_type, info.m_path,
393                                     info.m_is_required, is_virtual_allowed);
394 
395     case Type_id::k_datetime:
396       return new Index_string_field("dd", info.m_type, info.m_path,
397                                     info.m_is_required, is_virtual_allowed);
398 
399     case Type_id::k_year:
400       return new Index_string_field("dy", info.m_type, info.m_path,
401                                     info.m_is_required, is_virtual_allowed);
402 
403     case Type_id::k_bit:
404       return new Index_string_field("t", info.m_type, info.m_path,
405                                     info.m_is_required, is_virtual_allowed);
406 
407     case Type_id::k_blob:
408       return new Index_binary_field("bt", info.m_type, length, info.m_path,
409                                     info.m_is_required, is_virtual_allowed);
410 
411     case Type_id::k_text:
412       return new Index_text_field("t", info.m_type, length, info.m_path,
413                                   info.m_is_required, is_virtual_allowed);
414 
415     case Type_id::k_geojson:
416       return new Index_geojson_field(
417           is_valid(info.m_options) ? info.m_options : 1,
418           is_valid(info.m_srid) ? info.m_srid : 4326, info.m_path,
419           info.m_is_required);
420 
421     case Type_id::k_fulltext:
422       return new Index_fulltext_field(info.m_path, info.m_is_required);
423 
424     case Type_id::k_char:
425       return new Index_string_field("c", info.m_type, info.m_path,
426                                     info.m_is_required, is_virtual_allowed);
427 
428     case Type_id::k_unsupported:
429       *error = ngs::Error(ER_X_CMD_ARGUMENT_VALUE,
430                           "Invalid or unsupported type specification '%s'",
431                           info.m_type.c_str());
432       break;
433   }
434   return nullptr;
435 }
436 
437 }  // namespace xpl
438