1/*****************************************************************************
2
3Copyright (c) 2007, 2020, Oracle and/or its affiliates. All Rights Reserved.
4
5This program is free software; you can redistribute it and/or modify it under
6the terms of the GNU General Public License, version 2.0, as published by the
7Free Software Foundation.
8
9This program is also distributed with certain software (including but not
10limited to OpenSSL) that is licensed under separate terms, as designated in a
11particular file or component or in included license documentation. The authors
12of MySQL hereby grant you an additional permission to link the program and
13your derivative works with the separately licensed software that they have
14included with MySQL.
15
16This program is distributed in the hope that it will be useful, but WITHOUT
17ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
18FOR A PARTICULAR PURPOSE. See the GNU General Public License, version 2.0,
19for more details.
20
21You should have received a copy of the GNU General Public License along with
22this program; if not, write to the Free Software Foundation, Inc.,
2351 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA
24
25*****************************************************************************/
26
27/** @file include/fts0types.ic
28 Full text search types.
29
30 Created 2007-03-27 Sunny Bains
31 *******************************************************/
32
33#ifndef INNOBASE_FTS0TYPES_IC
34#define INNOBASE_FTS0TYPES_IC
35
36#include "ha_prototypes.h"
37#include "rem0cmp.h"
38
39/** Duplicate a string. */
40UNIV_INLINE
41void fts_string_dup(fts_string_t *dst,       /*!< in: dup to here */
42                    const fts_string_t *src, /*!< in: src string */
43                    mem_heap_t *heap)        /*!< in: heap to use */
44{
45  dst->f_str = (byte *)mem_heap_alloc(heap, src->f_len + 1);
46  memcpy(dst->f_str, src->f_str, src->f_len);
47
48  dst->f_len = src->f_len;
49  dst->f_str[src->f_len] = 0;
50  dst->f_n_char = src->f_n_char;
51}
52
53/** Compare two fts_trx_row_t doc_ids.
54 @return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2 */
55UNIV_INLINE
56int fts_trx_row_doc_id_cmp(const void *p1, /*!< in: id1 */
57                           const void *p2) /*!< in: id2 */
58{
59  const fts_trx_row_t *tr1 = (const fts_trx_row_t *)p1;
60  const fts_trx_row_t *tr2 = (const fts_trx_row_t *)p2;
61
62  return ((int)(tr1->doc_id - tr2->doc_id));
63}
64
65/** Compare two fts_ranking_t doc_ids.
66 @return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2 */
67UNIV_INLINE
68int fts_ranking_doc_id_cmp(const void *p1, /*!< in: id1 */
69                           const void *p2) /*!< in: id2 */
70{
71  const fts_ranking_t *rk1 = (const fts_ranking_t *)p1;
72  const fts_ranking_t *rk2 = (const fts_ranking_t *)p2;
73
74  return ((int)(rk1->doc_id - rk2->doc_id));
75}
76
77/** Compare two fts_update_t doc_ids.
78 @return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2 */
79UNIV_INLINE
80int fts_update_doc_id_cmp(const void *p1, /*!< in: id1 */
81                          const void *p2) /*!< in: id2 */
82{
83  const fts_update_t *up1 = (const fts_update_t *)p1;
84  const fts_update_t *up2 = (const fts_update_t *)p2;
85
86  return ((int)(up1->doc_id - up2->doc_id));
87}
88
89/** Get the first character's code position for FTS index partition */
90extern ulint innobase_strnxfrm(const CHARSET_INFO *cs, /*!< in: Character set */
91                               const uchar *p2,        /*!< in: string */
92                               const ulint len2);      /*!< in: string length */
93
94/** Check if fts index charset is cjk
95@param[in]	cs	charset
96@retval	true	if the charset is cjk
97@retval	false	if not. */
98UNIV_INLINE
99bool fts_is_charset_cjk(const CHARSET_INFO *cs) {
100  if (strcmp(cs->name, "gb2312_chinese_ci") == 0 ||
101      strcmp(cs->name, "gbk_chinese_ci") == 0 ||
102      strcmp(cs->name, "big5_chinese_ci") == 0 ||
103      strcmp(cs->name, "gb18030_chinese_ci") == 0 ||
104      strcmp(cs->name, "ujis_japanese_ci") == 0 ||
105      strcmp(cs->name, "sjis_japanese_ci") == 0 ||
106      strcmp(cs->name, "cp932_japanese_ci") == 0 ||
107      strcmp(cs->name, "eucjpms_japanese_ci") == 0 ||
108      strcmp(cs->name, "euckr_korean_ci") == 0) {
109    return (true);
110  } else {
111    return (false);
112  }
113}
114
115/** Select the FTS auxiliary index for the given character by range.
116@param[in]	cs	charset
117@param[in]	str	string
118@param[in]	len	string length
119@retval	the index to use for the string */
120UNIV_INLINE
121ulint fts_select_index_by_range(const CHARSET_INFO *cs, const byte *str,
122                                ulint len) {
123  ulint selected = 0;
124  ulint value = innobase_strnxfrm(cs, str, len);
125
126  while (fts_index_selector[selected].value != 0) {
127    if (fts_index_selector[selected].value == value) {
128      return (selected);
129
130    } else if (fts_index_selector[selected].value > value) {
131      return (selected > 0 ? selected - 1 : 0);
132    }
133
134    ++selected;
135  }
136
137  ut_ad(selected > 1);
138
139  return (selected - 1);
140}
141
142/** Select the FTS auxiliary index for the given character by hash.
143@param[in]	cs	charset
144@param[in]	str	string
145@param[in]	len	string length
146@retval the index to use for the string */
147UNIV_INLINE
148ulint fts_select_index_by_hash(const CHARSET_INFO *cs, const byte *str,
149                               ulint len) {
150  int char_len;
151
152  ut_ad(!(str == nullptr && len > 0));
153
154  if (str == nullptr || len == 0) {
155    return 0;
156  }
157
158  /* Get the first char */
159  char_len = my_mbcharlen_ptr(cs, reinterpret_cast<const char *>(str),
160                              reinterpret_cast<const char *>(str + len));
161  ut_ad(static_cast<ulint>(char_len) <= len);
162
163  /*
164    Get collation hash code. Force truncation to ulong for legacy reasons;
165    it gives different results for Windows and Linux, but it needs to match
166    on-disk data.
167   */
168  uint64 nr1 = 1;
169  uint64 nr2 = 4;
170  cs->coll->hash_sort(cs, str, char_len, &nr1, &nr2);
171
172  return (static_cast<ulong>(nr1) % FTS_NUM_AUX_INDEX);
173}
174
175/** Select the FTS auxiliary index for the given character.
176@param[in]	cs	charset
177@param[in]	str	string
178@param[in]	len	string length in bytes
179@retval	the index to use for the string */
180UNIV_INLINE
181ulint fts_select_index(const CHARSET_INFO *cs, const byte *str, ulint len) {
182  ulint selected;
183
184  if (fts_is_charset_cjk(cs)) {
185    selected = fts_select_index_by_hash(cs, str, len);
186  } else {
187    selected = fts_select_index_by_range(cs, str, len);
188  }
189
190  return (selected);
191}
192
193/** Return the selected FTS aux index suffix. */
194UNIV_INLINE
195const char *fts_get_suffix(ulint selected) /*!< in: selected index */
196{
197  return (fts_index_selector[selected].suffix);
198}
199
200/** Return the selected FTS aux index suffix in 5.7 compatible format
201@param[in]	selected	selected index
202@return the suffix name */
203UNIV_INLINE
204const char *fts_get_suffix_5_7(ulint selected) {
205  return (fts_index_selector_5_7[selected].suffix);
206}
207
208#endif /* INNOBASE_FTS0TYPES_IC */
209