1 /* Copyright (C) 2000-2015 Lavtech.com corp. All rights reserved. 2 3 This program is free software; you can redistribute it and/or modify 4 it under the terms of the GNU General Public License as published by 5 the Free Software Foundation; either version 2 of the License, or 6 (at your option) any later version. 7 8 This program is distributed in the hope that it will be useful, 9 but WITHOUT ANY WARRANTY; without even the implied warranty of 10 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 GNU General Public License for more details. 12 13 You should have received a copy of the GNU General Public License 14 along with this program; if not, write to the Free Software 15 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 16 */ 17 18 #ifndef __INDEXCACHE_H__ 19 #define __INDEXCACHE_H__ 20 21 #define UDM_INDEXER_THREADS_MAX 64 22 #define INVERTED_INDEX_CACHE_PARTS 32 23 #define INVERTED_INDEX_CACHE_PART_SIZE ((4 * 1024 * 1024) / (INVERTED_INDEX_CACHE_PARTS)) 24 25 struct udm_inverted_index_cache_st; 26 27 typedef struct 28 { 29 udm_timer_t load; 30 udm_timer_t sort; 31 udm_timer_t pack; 32 udm_timer_t send; 33 udm_timer_t send_multi; 34 udm_timer_t conv; 35 udm_timer_t unpack_cached_copy; 36 udm_timer_t parse; 37 udm_timer_t prepare_words; 38 udm_timer_t sort_wordlist; 39 udm_timer_t pairs; 40 unsigned long long bytes_loaded; 41 } UDM_INVERTED_INDEX_STATS; 42 43 44 typedef struct 45 { 46 UDM_UNIDATA *unidata; 47 UDM_ENV *Env; 48 int cnvflags; /* e.g. for StripAccents */ 49 int pair_limit; 50 int save_section_size; 51 int aggregate_section_flags; 52 } UDM_INVERTED_INDEX_CACHE_PARAM; 53 54 typedef struct 55 { 56 UDM_CONV cnv; 57 urlid_t url_id; 58 struct udm_inverted_index_cache_st *cache; 59 } UDM_CONSTWORD_HASH_DATA; 60 61 62 typedef struct 63 { 64 char *ptr; /* 8 */ /* Word followed by encoded positions */ 65 urlid_t url_id; /* 4 */ 66 uint4 length:24; /* 3 */ 67 udm_secno_t secno; /* 1 */ 68 } UDM_INVERTED_INDEX_CACHE_ITEM; 69 70 71 typedef struct 72 { 73 size_t nitems; /* 8 */ 74 size_t mitems; /* 8 */ 75 UDM_INVERTED_INDEX_CACHE_ITEM *Item; /* 8 */ 76 } UDM_INVERTED_INDEX_CACHE_PART; 77 78 79 typedef struct 80 { 81 char *str; /* 8 */ 82 udmhash32_t crc; /* 4 */ 83 urlid_t last_url_id; /* 4 */ 84 udm_pos_t last_url_id_count:23; /* */ 85 int is_stopword:1; /* */ 86 unsigned int length:8; /* */ 87 } UDM_INVERTED_INDEX_WORD; 88 89 typedef struct 90 { 91 size_t nitems; 92 size_t mitems; 93 UDM_INVERTED_INDEX_WORD *Item; 94 } UDM_INVERTED_INDEX_WORD_LIST; 95 96 97 typedef struct 98 { 99 UDM_MEMROOT root; 100 UDM_HASH hash; 101 UDM_INVERTED_INDEX_WORD_LIST list; 102 } UDM_INVERTED_INDEX_WORD_DICTIONARY; 103 104 105 typedef struct 106 { 107 uint4 word_id; /* 4 */ 108 UDM_COORD coord; /* 4 */ 109 } UDM_INVERTED_INDEX_COORD; 110 111 112 typedef struct 113 { 114 size_t nitems; 115 size_t mitems; 116 UDM_INVERTED_INDEX_COORD *Item; 117 } UDM_INVERTED_INDEX_COORD_LIST; 118 119 120 typedef struct udm_inverted_index_cache_st 121 { 122 size_t nitems; 123 size_t mitems; 124 UDM_INVERTED_INDEX_CACHE_PART *Item; 125 UDM_MEMROOT coord_root; 126 UDM_INVERTED_INDEX_CACHE_PARAM param; 127 UDM_INVERTED_INDEX_WORD_DICTIONARY Words; 128 UDM_INVERTED_INDEX_STATS Stats; 129 } UDM_INVERTED_INDEX_CACHE; 130 131 132 void UdmInvertedIndexCachePartSort(UDM_INVERTED_INDEX_CACHE_PART *part); 133 void UdmInvertedIndexCachePartInit(UDM_INVERTED_INDEX_CACHE_PART *part); 134 void UdmInvertedIndexCachePartFree(UDM_INVERTED_INDEX_CACHE_PART *part); 135 udm_rc_t UdmInvertedIndexCacheAdd(UDM_AGENT *A, 136 UDM_CONSTWORD_HASH_DATA *data, 137 UDM_INVERTED_INDEX_CACHE *cache, 138 const UDM_WORD *W, size_t ncoords); 139 udm_rc_t UdmInvertedIndexCacheAllocParts(UDM_AGENT *A, 140 UDM_INVERTED_INDEX_CACHE *cache, 141 size_t n); 142 143 void UdmInvertedIndexCacheInit(UDM_INVERTED_INDEX_CACHE *L, UDM_ENV *Env); 144 void UdmInvertedIndexCacheReset(UDM_INVERTED_INDEX_CACHE *L); 145 void UdmInvertedIndexCacheFree(UDM_INVERTED_INDEX_CACHE *L); 146 udm_rc_t UdmInvertedIndexCacheAddPart(UDM_AGENT *A, UDM_INVERTED_INDEX_CACHE *cache); 147 148 udm_rc_t UdmConstWordListToInvertedIndexCoordList(UDM_INVERTED_INDEX_CACHE *cache, 149 UDM_INVERTED_INDEX_COORD_LIST *CL, 150 UDM_CONSTWORDLIST *CWL); 151 152 udm_rc_t UdmInvertedIndexCoordListInit(UDM_INVERTED_INDEX_COORD_LIST *L, size_t mitems); 153 void UdmInvertedIndexCoordListFree(UDM_INVERTED_INDEX_COORD_LIST *L); 154 void UdmInvertedIndexCoordListSort(UDM_INVERTED_INDEX_COORD_LIST *L); 155 156 udm_rc_t UdmInvertedIndexCoordList2InvertedIndexCache(UDM_AGENT *A, 157 UDM_INVERTED_INDEX_COORD_LIST *CL, 158 UDM_CONSTWORDLIST *CWLWithPos, 159 UDM_CONSTWORD_HASH_DATA *data, 160 UDM_INVERTED_INDEX_CACHE *L); 161 size_t UdmInvertedIndexCacheEstimateUsedMemory(const UDM_INVERTED_INDEX_CACHE *); 162 163 #endif 164