1 /*
2 * Copyright (c) 2018, 2019, Oracle and/or its affiliates. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License, version 2.0,
6 * as published by the Free Software Foundation.
7 *
8 * This program is also distributed with certain software (including
9 * but not limited to OpenSSL) that is licensed under separate terms,
10 * as designated in a particular file or component or in included license
11 * documentation. The authors of MySQL hereby grant you an additional
12 * permission to link the program and your derivative works with the
13 * separately licensed software that they have included with MySQL.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License, version 2.0, for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
23 */
24
25 #include "plugin/x/src/index_field.h"
26
27 #include <algorithm>
28 #include <cstring>
29 #include <limits>
30
31 #include "plugin/x/src/helper/generate_hash.h"
32 #include "plugin/x/src/query_string_builder.h"
33 #include "plugin/x/src/xpl_error.h"
34 #include "plugin/x/src/xpl_regex.h"
35 #include "plugin/x/src/xpl_resultset.h"
36
37 namespace xpl {
38
39 namespace {
40
get_prefix(const char * const prefix,const int32_t precision,const int32_t scale,const bool is_unsigned,const bool is_required)41 std::string get_prefix(const char *const prefix, const int32_t precision,
42 const int32_t scale, const bool is_unsigned,
43 const bool is_required) {
44 // type
45 std::string result{"$ix_"};
46 result += prefix;
47 if (precision > 0) result += std::to_string(precision);
48 if (scale > 0) result += "_" + std::to_string(scale);
49 result += "_";
50
51 // additional traits (unsigned, required, ...)
52 std::string traits;
53 if (is_unsigned) traits += "u";
54 if (is_required) traits += "r";
55
56 return traits.empty() ? result : result + traits + "_";
57 }
58
docpath_hash(const std::string & path)59 std::string docpath_hash(const std::string &path) {
60 return generate_hash(path.size() > 2 ? path.substr(2) // skip '$.'
61 : path); // hash for '$'
62 }
63
extract_type_details(const std::string & type_name,int32_t * precision,int32_t * scale,bool * is_unsigned)64 void extract_type_details(const std::string &type_name, int32_t *precision,
65 int32_t *scale, bool *is_unsigned) {
66 static const Regex re(
67 "\\w+(?:\\(([0-9]+)(?: *, *([0-9]+))?\\))?( +UNSIGNED)?.*");
68 Regex::Group_list groups;
69 if (!re.match_groups(type_name.c_str(), &groups, false) || groups.size() < 4)
70 return;
71 *precision = groups[1].empty() ? -1 : std::stoi(groups[1]);
72 *scale = groups[2].empty() ? -1 : std::stoi(groups[2]);
73 *is_unsigned = !groups[3].empty();
74 }
75
get_virtual_column_name(const char * prefix,const std::string & type_name,const std::string & path,const bool is_required)76 std::string get_virtual_column_name(const char *prefix,
77 const std::string &type_name,
78 const std::string &path,
79 const bool is_required) {
80 bool is_unsigned{false};
81 int32_t precision{-1}, scale{-1};
82 extract_type_details(type_name, &precision, &scale, &is_unsigned);
83 return get_prefix(prefix, precision, scale, is_unsigned, is_required) +
84 docpath_hash(path);
85 }
86
87 } // namespace
88
add_column_if_necessary(iface::Sql_session * sql_session,const std::string & schema,const std::string & collection,Query_string_builder * qb) const89 ngs::Error_code Index_field::add_column_if_necessary(
90 iface::Sql_session *sql_session, const std::string &schema,
91 const std::string &collection, Query_string_builder *qb) const {
92 ngs::Error_code error;
93 const bool is_field_exists =
94 is_column_exists(sql_session, schema, collection, &error);
95 if (error) return error;
96 if (!is_field_exists) {
97 add_column(qb);
98 qb->put(",");
99 }
100 return ngs::Success();
101 }
102
is_column_exists(iface::Sql_session * sql_session,const std::string & schema_name,const std::string & table_name,ngs::Error_code * error) const103 bool Index_field::is_column_exists(iface::Sql_session *sql_session,
104 const std::string &schema_name,
105 const std::string &table_name,
106 ngs::Error_code *error) const {
107 Query_string_builder qb;
108 qb.put("SHOW COLUMNS FROM ")
109 .quote_identifier(schema_name)
110 .dot()
111 .quote_identifier(table_name)
112 .put(" WHERE Field = ")
113 .quote_string(m_name);
114
115 Collect_resultset resultset;
116 *error = sql_session->execute(qb.get().data(), qb.get().length(), &resultset);
117 return resultset.get_row_list().size() > 0;
118 }
119
add_column(Query_string_builder * qb) const120 void Index_field::add_column(Query_string_builder *qb) const {
121 qb->put(" ADD COLUMN ").quote_identifier(m_name).put(" ");
122 add_type(qb);
123 qb->put(" GENERATED ALWAYS AS (");
124 add_path(qb);
125 qb->put(") ");
126 add_options(qb);
127 }
128
add_field(Query_string_builder * qb) const129 void Index_field::add_field(Query_string_builder *qb) const {
130 qb->quote_identifier(m_name);
131 add_length(qb);
132 }
133
add_options(Query_string_builder * qb) const134 void Index_field::add_options(Query_string_builder *qb) const {
135 qb->put(m_is_virtual_allowed ? "VIRTUAL" : "STORED");
136 if (m_is_required) qb->put(" NOT NULL");
137 }
138
get_type_id(const std::string & type_name)139 Index_field::Type_id Index_field::get_type_id(const std::string &type_name) {
140 static const std::array<const char *const, 22> VALID_TYPES{
141 {"TINYINT", "SMALLINT", "MEDIUMINT", "INT", "INTEGER", "BIGINT",
142 "REAL", "FLOAT", "DOUBLE", "DECIMAL", "NUMERIC", "DATE",
143 "TIME", "TIMESTAMP", "DATETIME", "YEAR", "BIT", "BLOB",
144 "TEXT", "GEOJSON", "FULLTEXT", "CHAR"}};
145 std::string name(type_name);
146 std::transform(name.begin(), name.end(), name.begin(), ::toupper);
147 auto i = std::find_if(VALID_TYPES.begin(), VALID_TYPES.end(),
148 [&name](const char *const arg) {
149 return std::strcmp(name.c_str(), arg) == 0;
150 });
151 return i == VALID_TYPES.end()
152 ? Type_id::k_unsupported
153 : static_cast<Type_id>(std::distance(VALID_TYPES.begin(), i));
154 }
155
156 ///////////////////////////////////
157
158 class Index_numeric_field : public Index_field {
159 public:
Index_numeric_field(const char * const prefix,const std::string & type_name,const std::string & path,const bool is_required,const bool is_virtual_allowed)160 Index_numeric_field(const char *const prefix, const std::string &type_name,
161 const std::string &path, const bool is_required,
162 const bool is_virtual_allowed)
163 : Index_field(
164 path, is_required,
165 get_virtual_column_name(prefix, type_name, path, is_required),
166 is_virtual_allowed),
167 m_type_name(type_name) {}
168
169 protected:
add_type(Query_string_builder * qb) const170 void add_type(Query_string_builder *qb) const override {
171 qb->put(m_type_name);
172 }
173
add_path(Query_string_builder * qb) const174 void add_path(Query_string_builder *qb) const override {
175 qb->put("JSON_EXTRACT(doc, ").quote_string(m_path).put(")");
176 }
177
178 const std::string m_type_name;
179 };
180
181 class Index_string_field : public Index_field {
182 public:
Index_string_field(const char * const prefix,const std::string & type_name,const std::string & path,const bool is_required,const bool is_virtual_allowed)183 Index_string_field(const char *const prefix, const std::string &type_name,
184 const std::string &path, const bool is_required,
185 const bool is_virtual_allowed)
186 : Index_field(
187 path, is_required,
188 get_virtual_column_name(prefix, type_name, path, is_required),
189 is_virtual_allowed),
190 m_type_name(type_name) {}
191
192 protected:
add_type(Query_string_builder * qb) const193 void add_type(Query_string_builder *qb) const override {
194 qb->put(m_type_name);
195 }
196
add_path(Query_string_builder * qb) const197 void add_path(Query_string_builder *qb) const override {
198 qb->put("JSON_UNQUOTE(JSON_EXTRACT(doc, ").quote_string(m_path).put("))");
199 }
200
201 const std::string m_type_name;
202 };
203
204 class Index_binary_field : public Index_string_field {
205 public:
Index_binary_field(const char * const prefix,const std::string & type_name,const std::string & length,const std::string & path,const bool is_required,const bool is_virtual_allowed)206 Index_binary_field(const char *const prefix, const std::string &type_name,
207 const std::string &length, const std::string &path,
208 const bool is_required, const bool is_virtual_allowed)
209 : Index_string_field(prefix, type_name, path, is_required,
210 is_virtual_allowed),
211 m_length(length) {}
212
213 protected:
add_length(Query_string_builder * qb) const214 void add_length(Query_string_builder *qb) const override {
215 qb->put(m_length);
216 }
217 const std::string m_length;
218 };
219
220 class Index_text_field : public Index_binary_field {
221 public:
Index_text_field(const char * const prefix,const std::string & type_name,const std::string & length,const std::string & path,const bool is_required,const bool is_virtual_allowed)222 Index_text_field(const char *const prefix, const std::string &type_name,
223 const std::string &length, const std::string &path,
224 const bool is_required, const bool is_virtual_allowed)
225 : Index_binary_field(prefix, type_name, length, path, is_required,
226 is_virtual_allowed) {}
227
228 protected:
add_type(Query_string_builder * qb) const229 void add_type(Query_string_builder *qb) const override {
230 std::string type_name(m_type_name);
231 auto pos = type_name.find(m_length);
232 type_name.erase(pos, m_length.size());
233 qb->put(type_name);
234 }
235 };
236
237 class Index_geojson_field : public Index_field {
238 public:
Index_geojson_field(const int64_t options,const int64_t srid,const std::string & path,const bool is_required)239 Index_geojson_field(const int64_t options, const int64_t srid,
240 const std::string &path, const bool is_required)
241 : Index_field(
242 path, is_required,
243 get_prefix("gj", -1, -1, false, is_required) + docpath_hash(path),
244 false),
245 m_options(options),
246 m_srid(srid) {}
247
248 protected:
add_type(Query_string_builder * qb) const249 void add_type(Query_string_builder *qb) const override {
250 qb->put("GEOMETRY");
251 }
252
add_path(Query_string_builder * qb) const253 void add_path(Query_string_builder *qb) const override {
254 qb->put("ST_GEOMFROMGEOJSON(JSON_EXTRACT(doc, ")
255 .quote_string(m_path)
256 .put("),")
257 .put(m_options)
258 .put(",")
259 .put(m_srid)
260 .put(")");
261 }
262
add_options(Query_string_builder * qb) const263 void add_options(Query_string_builder *qb) const override {
264 Index_field::add_options(qb);
265 qb->put(" SRID ").put(m_srid);
266 }
267
268 const int64_t m_options, m_srid;
269 };
270
271 class Index_fulltext_field : public Index_field {
272 public:
Index_fulltext_field(const std::string & path,const bool is_required)273 Index_fulltext_field(const std::string &path, const bool is_required)
274 : Index_field(
275 path, is_required,
276 get_prefix("ft", -1, -1, false, is_required) + docpath_hash(path),
277 false) {}
278
279 protected:
add_type(Query_string_builder * qb) const280 void add_type(Query_string_builder *qb) const override { qb->put("TEXT"); }
281
add_path(Query_string_builder * qb) const282 void add_path(Query_string_builder *qb) const override {
283 qb->put("JSON_UNQUOTE(JSON_EXTRACT(doc, ").quote_string(m_path).put("))");
284 }
285 };
286
287 /////////////////////////////////////////////
288
289 namespace {
290
is_valid(const uint64_t arg)291 inline bool is_valid(const uint64_t arg) {
292 return arg != std::numeric_limits<uint64_t>::max();
293 }
294
295 } // namespace
296
create(const bool is_virtual_allowed,const Admin_command_index::Index_field_info & info,ngs::Error_code * error)297 const Index_field *Index_field::create(
298 const bool is_virtual_allowed,
299 const Admin_command_index::Index_field_info &info, ngs::Error_code *error) {
300 if (info.m_path.empty()) {
301 *error = ngs::Error(ER_X_CMD_ARGUMENT_VALUE,
302 "Argument value for document member is invalid");
303 return nullptr;
304 }
305
306 static const Regex re{
307 "(BIT)(?:\\([0-9]+\\))?|"
308 "(TINYINT|SMALLINT|MEDIUMINT|INT|INTEGER|BIGINT)"
309 "(?:\\([0-9]+\\))?(?: +UNSIGNED)?|"
310 "(DECIMAL|FLOAT|DOUBLE|REAL|NUMERIC)"
311 "(?:\\([0-9]+(?: *, *[0-9]+)?\\))?(?: +UNSIGNED)?|"
312 "(DATE)|(TIME|TIMESTAMP|DATETIME)(?:\\([0-6]\\))?|(YEAR)(?:\\(4\\))?|"
313 "(BLOB)(?:(\\([0-9]+\\)))?|"
314 "(CHAR|TEXT)(?:(\\([0-9]+\\)))?"
315 "(?: +(?:CHARACTER SET|CHARSET) +\\w+)?(?: +COLLATE +\\w+)?|"
316 "(GEOJSON|FULLTEXT)",
317 };
318
319 Regex::Group_list re_groups;
320 if (!re.match_groups(info.m_type.c_str(), &re_groups)) {
321 *error = ngs::Error(ER_X_CMD_ARGUMENT_VALUE,
322 "Invalid or unsupported type specification '%s'",
323 info.m_type.c_str());
324 return nullptr;
325 }
326
327 const std::string &type_name = re_groups[1];
328 const std::string &length =
329 re_groups.size() > 2 ? re_groups[2] : std::string();
330
331 auto type_id = get_type_id(type_name);
332
333 if (type_id != Type_id::k_geojson &&
334 (is_valid(info.m_options) || is_valid(info.m_srid))) {
335 *error = ngs::Error(ER_X_CMD_ARGUMENT_VALUE,
336 "Unsupported argument specification for '%s'",
337 info.m_path.c_str());
338 return nullptr;
339 }
340
341 switch (type_id) {
342 case Type_id::k_tinyint:
343 return new Index_numeric_field("it", info.m_type, info.m_path,
344 info.m_is_required, is_virtual_allowed);
345
346 case Type_id::k_smallint:
347 return new Index_numeric_field("is", info.m_type, info.m_path,
348 info.m_is_required, is_virtual_allowed);
349
350 case Type_id::k_mediumint:
351 return new Index_numeric_field("im", info.m_type, info.m_path,
352 info.m_is_required, is_virtual_allowed);
353
354 case Type_id::k_int:
355 case Type_id::k_integer:
356 return new Index_numeric_field("i", info.m_type, info.m_path,
357 info.m_is_required, is_virtual_allowed);
358
359 case Type_id::k_bigint:
360 return new Index_numeric_field("ib", info.m_type, info.m_path,
361 info.m_is_required, is_virtual_allowed);
362
363 case Type_id::k_real:
364 return new Index_numeric_field("fr", info.m_type, info.m_path,
365 info.m_is_required, is_virtual_allowed);
366
367 case Type_id::k_float:
368 return new Index_numeric_field("f", info.m_type, info.m_path,
369 info.m_is_required, is_virtual_allowed);
370
371 case Type_id::k_double:
372 return new Index_numeric_field("fd", info.m_type, info.m_path,
373 info.m_is_required, is_virtual_allowed);
374
375 case Type_id::k_decimal:
376 return new Index_numeric_field("xd", info.m_type, info.m_path,
377 info.m_is_required, is_virtual_allowed);
378
379 case Type_id::k_numeric:
380 return new Index_numeric_field("xn", info.m_type, info.m_path,
381 info.m_is_required, is_virtual_allowed);
382
383 case Type_id::k_date:
384 return new Index_string_field("d", info.m_type, info.m_path,
385 info.m_is_required, is_virtual_allowed);
386
387 case Type_id::k_time:
388 return new Index_string_field("dt", info.m_type, info.m_path,
389 info.m_is_required, is_virtual_allowed);
390
391 case Type_id::k_timestamp:
392 return new Index_string_field("ds", info.m_type, info.m_path,
393 info.m_is_required, is_virtual_allowed);
394
395 case Type_id::k_datetime:
396 return new Index_string_field("dd", info.m_type, info.m_path,
397 info.m_is_required, is_virtual_allowed);
398
399 case Type_id::k_year:
400 return new Index_string_field("dy", info.m_type, info.m_path,
401 info.m_is_required, is_virtual_allowed);
402
403 case Type_id::k_bit:
404 return new Index_string_field("t", info.m_type, info.m_path,
405 info.m_is_required, is_virtual_allowed);
406
407 case Type_id::k_blob:
408 return new Index_binary_field("bt", info.m_type, length, info.m_path,
409 info.m_is_required, is_virtual_allowed);
410
411 case Type_id::k_text:
412 return new Index_text_field("t", info.m_type, length, info.m_path,
413 info.m_is_required, is_virtual_allowed);
414
415 case Type_id::k_geojson:
416 return new Index_geojson_field(
417 is_valid(info.m_options) ? info.m_options : 1,
418 is_valid(info.m_srid) ? info.m_srid : 4326, info.m_path,
419 info.m_is_required);
420
421 case Type_id::k_fulltext:
422 return new Index_fulltext_field(info.m_path, info.m_is_required);
423
424 case Type_id::k_char:
425 return new Index_string_field("c", info.m_type, info.m_path,
426 info.m_is_required, is_virtual_allowed);
427
428 case Type_id::k_unsupported:
429 *error = ngs::Error(ER_X_CMD_ARGUMENT_VALUE,
430 "Invalid or unsupported type specification '%s'",
431 info.m_type.c_str());
432 break;
433 }
434 return nullptr;
435 }
436
437 } // namespace xpl
438