1/***************************************************************************** 2 3Copyright (c) 2007, 2020, Oracle and/or its affiliates. All Rights Reserved. 4 5This program is free software; you can redistribute it and/or modify it under 6the terms of the GNU General Public License, version 2.0, as published by the 7Free Software Foundation. 8 9This program is also distributed with certain software (including but not 10limited to OpenSSL) that is licensed under separate terms, as designated in a 11particular file or component or in included license documentation. The authors 12of MySQL hereby grant you an additional permission to link the program and 13your derivative works with the separately licensed software that they have 14included with MySQL. 15 16This program is distributed in the hope that it will be useful, but WITHOUT 17ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 18FOR A PARTICULAR PURPOSE. See the GNU General Public License, version 2.0, 19for more details. 20 21You should have received a copy of the GNU General Public License along with 22this program; if not, write to the Free Software Foundation, Inc., 2351 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 24 25*****************************************************************************/ 26 27/** @file include/fts0types.ic 28 Full text search types. 29 30 Created 2007-03-27 Sunny Bains 31 *******************************************************/ 32 33#ifndef INNOBASE_FTS0TYPES_IC 34#define INNOBASE_FTS0TYPES_IC 35 36#include "ha_prototypes.h" 37#include "rem0cmp.h" 38 39/** Duplicate a string. */ 40UNIV_INLINE 41void fts_string_dup(fts_string_t *dst, /*!< in: dup to here */ 42 const fts_string_t *src, /*!< in: src string */ 43 mem_heap_t *heap) /*!< in: heap to use */ 44{ 45 dst->f_str = (byte *)mem_heap_alloc(heap, src->f_len + 1); 46 memcpy(dst->f_str, src->f_str, src->f_len); 47 48 dst->f_len = src->f_len; 49 dst->f_str[src->f_len] = 0; 50 dst->f_n_char = src->f_n_char; 51} 52 53/** Compare two fts_trx_row_t doc_ids. 54 @return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2 */ 55UNIV_INLINE 56int fts_trx_row_doc_id_cmp(const void *p1, /*!< in: id1 */ 57 const void *p2) /*!< in: id2 */ 58{ 59 const fts_trx_row_t *tr1 = (const fts_trx_row_t *)p1; 60 const fts_trx_row_t *tr2 = (const fts_trx_row_t *)p2; 61 62 return ((int)(tr1->doc_id - tr2->doc_id)); 63} 64 65/** Compare two fts_ranking_t doc_ids. 66 @return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2 */ 67UNIV_INLINE 68int fts_ranking_doc_id_cmp(const void *p1, /*!< in: id1 */ 69 const void *p2) /*!< in: id2 */ 70{ 71 const fts_ranking_t *rk1 = (const fts_ranking_t *)p1; 72 const fts_ranking_t *rk2 = (const fts_ranking_t *)p2; 73 74 return ((int)(rk1->doc_id - rk2->doc_id)); 75} 76 77/** Compare two fts_update_t doc_ids. 78 @return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2 */ 79UNIV_INLINE 80int fts_update_doc_id_cmp(const void *p1, /*!< in: id1 */ 81 const void *p2) /*!< in: id2 */ 82{ 83 const fts_update_t *up1 = (const fts_update_t *)p1; 84 const fts_update_t *up2 = (const fts_update_t *)p2; 85 86 return ((int)(up1->doc_id - up2->doc_id)); 87} 88 89/** Get the first character's code position for FTS index partition */ 90extern ulint innobase_strnxfrm(const CHARSET_INFO *cs, /*!< in: Character set */ 91 const uchar *p2, /*!< in: string */ 92 const ulint len2); /*!< in: string length */ 93 94/** Check if fts index charset is cjk 95@param[in] cs charset 96@retval true if the charset is cjk 97@retval false if not. */ 98UNIV_INLINE 99bool fts_is_charset_cjk(const CHARSET_INFO *cs) { 100 if (strcmp(cs->name, "gb2312_chinese_ci") == 0 || 101 strcmp(cs->name, "gbk_chinese_ci") == 0 || 102 strcmp(cs->name, "big5_chinese_ci") == 0 || 103 strcmp(cs->name, "gb18030_chinese_ci") == 0 || 104 strcmp(cs->name, "ujis_japanese_ci") == 0 || 105 strcmp(cs->name, "sjis_japanese_ci") == 0 || 106 strcmp(cs->name, "cp932_japanese_ci") == 0 || 107 strcmp(cs->name, "eucjpms_japanese_ci") == 0 || 108 strcmp(cs->name, "euckr_korean_ci") == 0) { 109 return (true); 110 } else { 111 return (false); 112 } 113} 114 115/** Select the FTS auxiliary index for the given character by range. 116@param[in] cs charset 117@param[in] str string 118@param[in] len string length 119@retval the index to use for the string */ 120UNIV_INLINE 121ulint fts_select_index_by_range(const CHARSET_INFO *cs, const byte *str, 122 ulint len) { 123 ulint selected = 0; 124 ulint value = innobase_strnxfrm(cs, str, len); 125 126 while (fts_index_selector[selected].value != 0) { 127 if (fts_index_selector[selected].value == value) { 128 return (selected); 129 130 } else if (fts_index_selector[selected].value > value) { 131 return (selected > 0 ? selected - 1 : 0); 132 } 133 134 ++selected; 135 } 136 137 ut_ad(selected > 1); 138 139 return (selected - 1); 140} 141 142/** Select the FTS auxiliary index for the given character by hash. 143@param[in] cs charset 144@param[in] str string 145@param[in] len string length 146@retval the index to use for the string */ 147UNIV_INLINE 148ulint fts_select_index_by_hash(const CHARSET_INFO *cs, const byte *str, 149 ulint len) { 150 int char_len; 151 152 ut_ad(!(str == nullptr && len > 0)); 153 154 if (str == nullptr || len == 0) { 155 return 0; 156 } 157 158 /* Get the first char */ 159 char_len = my_mbcharlen_ptr(cs, reinterpret_cast<const char *>(str), 160 reinterpret_cast<const char *>(str + len)); 161 ut_ad(static_cast<ulint>(char_len) <= len); 162 163 /* 164 Get collation hash code. Force truncation to ulong for legacy reasons; 165 it gives different results for Windows and Linux, but it needs to match 166 on-disk data. 167 */ 168 uint64 nr1 = 1; 169 uint64 nr2 = 4; 170 cs->coll->hash_sort(cs, str, char_len, &nr1, &nr2); 171 172 return (static_cast<ulong>(nr1) % FTS_NUM_AUX_INDEX); 173} 174 175/** Select the FTS auxiliary index for the given character. 176@param[in] cs charset 177@param[in] str string 178@param[in] len string length in bytes 179@retval the index to use for the string */ 180UNIV_INLINE 181ulint fts_select_index(const CHARSET_INFO *cs, const byte *str, ulint len) { 182 ulint selected; 183 184 if (fts_is_charset_cjk(cs)) { 185 selected = fts_select_index_by_hash(cs, str, len); 186 } else { 187 selected = fts_select_index_by_range(cs, str, len); 188 } 189 190 return (selected); 191} 192 193/** Return the selected FTS aux index suffix. */ 194UNIV_INLINE 195const char *fts_get_suffix(ulint selected) /*!< in: selected index */ 196{ 197 return (fts_index_selector[selected].suffix); 198} 199 200/** Return the selected FTS aux index suffix in 5.7 compatible format 201@param[in] selected selected index 202@return the suffix name */ 203UNIV_INLINE 204const char *fts_get_suffix_5_7(ulint selected) { 205 return (fts_index_selector_5_7[selected].suffix); 206} 207 208#endif /* INNOBASE_FTS0TYPES_IC */ 209