1 /*------------------------------------------------------------------------- 2 * 3 * spell.h 4 * 5 * Declarations for ISpell dictionary 6 * 7 * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group 8 * 9 * src/include/tsearch/dicts/spell.h 10 * 11 *------------------------------------------------------------------------- 12 */ 13 14 #ifndef __SPELL_H__ 15 #define __SPELL_H__ 16 17 #include "regex/regex.h" 18 #include "tsearch/dicts/regis.h" 19 #include "tsearch/ts_public.h" 20 21 /* 22 * SPNode and SPNodeData are used to represent prefix tree (Trie) to store 23 * a words list. 24 */ 25 struct SPNode; 26 27 typedef struct 28 { 29 uint32 val:8, 30 isword:1, 31 /* Stores compound flags listed below */ 32 compoundflag:4, 33 /* Reference to an entry of the AffixData field */ 34 affix:19; 35 struct SPNode *node; 36 } SPNodeData; 37 38 /* 39 * Names of FF_ are correlated with Hunspell options in affix file 40 * http://hunspell.sourceforge.net/ 41 */ 42 #define FF_COMPOUNDONLY 0x01 43 #define FF_COMPOUNDBEGIN 0x02 44 #define FF_COMPOUNDMIDDLE 0x04 45 #define FF_COMPOUNDLAST 0x08 46 #define FF_COMPOUNDFLAG ( FF_COMPOUNDBEGIN | FF_COMPOUNDMIDDLE | \ 47 FF_COMPOUNDLAST ) 48 #define FF_COMPOUNDFLAGMASK 0x0f 49 50 typedef struct SPNode 51 { 52 uint32 length; 53 SPNodeData data[FLEXIBLE_ARRAY_MEMBER]; 54 } SPNode; 55 56 #define SPNHDRSZ (offsetof(SPNode,data)) 57 58 /* 59 * Represents an entry in a words list. 60 */ 61 typedef struct spell_struct 62 { 63 union 64 { 65 /* 66 * flag is filled in by NIImportDictionary(). After 67 * NISortDictionary(), d is used instead of flag. 68 */ 69 char *flag; 70 /* d is used in mkSPNode() */ 71 struct 72 { 73 /* Reference to an entry of the AffixData field */ 74 int affix; 75 /* Length of the word */ 76 int len; 77 } d; 78 } p; 79 char word[FLEXIBLE_ARRAY_MEMBER]; 80 } SPELL; 81 82 #define SPELLHDRSZ (offsetof(SPELL, word)) 83 84 /* 85 * If an affix uses a regex, we have to store that separately in a struct 86 * that won't move around when arrays of affixes are enlarged or sorted. 87 * This is so that it can be found to be cleaned up at context destruction. 88 */ 89 typedef struct aff_regex_struct 90 { 91 regex_t regex; 92 MemoryContextCallback mcallback; 93 } aff_regex_struct; 94 95 /* 96 * Represents an entry in an affix list. 97 */ 98 typedef struct aff_struct 99 { 100 char *flag; 101 /* FF_SUFFIX or FF_PREFIX */ 102 uint32 type:1, 103 flagflags:7, 104 issimple:1, 105 isregis:1, 106 replen:14; 107 char *find; 108 char *repl; 109 union 110 { 111 aff_regex_struct *pregex; 112 Regis regis; 113 } reg; 114 } AFFIX; 115 116 /* 117 * affixes use dictionary flags too 118 */ 119 #define FF_COMPOUNDPERMITFLAG 0x10 120 #define FF_COMPOUNDFORBIDFLAG 0x20 121 #define FF_CROSSPRODUCT 0x40 122 123 /* 124 * Don't change the order of these. Initialization sorts by these, 125 * and expects prefixes to come first after sorting. 126 */ 127 #define FF_SUFFIX 1 128 #define FF_PREFIX 0 129 130 /* 131 * AffixNode and AffixNodeData are used to represent prefix tree (Trie) to store 132 * an affix list. 133 */ 134 struct AffixNode; 135 136 typedef struct 137 { 138 uint32 val:8, 139 naff:24; 140 AFFIX **aff; 141 struct AffixNode *node; 142 } AffixNodeData; 143 144 typedef struct AffixNode 145 { 146 uint32 isvoid:1, 147 length:31; 148 AffixNodeData data[FLEXIBLE_ARRAY_MEMBER]; 149 } AffixNode; 150 151 #define ANHRDSZ (offsetof(AffixNode, data)) 152 153 typedef struct 154 { 155 char *affix; 156 int len; 157 bool issuffix; 158 } CMPDAffix; 159 160 /* 161 * Type of encoding affix flags in Hunspell dictionaries 162 */ 163 typedef enum 164 { 165 FM_CHAR, /* one character (like ispell) */ 166 FM_LONG, /* two characters */ 167 FM_NUM /* number, >= 0 and < 65536 */ 168 } FlagMode; 169 170 /* 171 * Structure to store Hunspell options. Flag representation depends on flag 172 * type. These flags are about support of compound words. 173 */ 174 typedef struct CompoundAffixFlag 175 { 176 union 177 { 178 /* Flag name if flagMode is FM_CHAR or FM_LONG */ 179 char *s; 180 /* Flag name if flagMode is FM_NUM */ 181 uint32 i; 182 } flag; 183 /* we don't have a bsearch_arg version, so, copy FlagMode */ 184 FlagMode flagMode; 185 uint32 value; 186 } CompoundAffixFlag; 187 188 #define FLAGNUM_MAXSIZE (1 << 16) 189 190 typedef struct 191 { 192 int maffixes; 193 int naffixes; 194 AFFIX *Affix; 195 196 AffixNode *Suffix; 197 AffixNode *Prefix; 198 199 SPNode *Dictionary; 200 /* Array of sets of affixes */ 201 char **AffixData; 202 int lenAffixData; 203 int nAffixData; 204 bool useFlagAliases; 205 206 CMPDAffix *CompoundAffix; 207 208 bool usecompound; 209 FlagMode flagMode; 210 211 /* 212 * All follow fields are actually needed only for initialization 213 */ 214 215 /* Array of Hunspell options in affix file */ 216 CompoundAffixFlag *CompoundAffixFlags; 217 /* number of entries in CompoundAffixFlags array */ 218 int nCompoundAffixFlag; 219 /* allocated length of CompoundAffixFlags array */ 220 int mCompoundAffixFlag; 221 222 /* 223 * Remaining fields are only used during dictionary construction; they are 224 * set up by NIStartBuild and cleared by NIFinishBuild. 225 */ 226 MemoryContext buildCxt; /* temp context for construction */ 227 228 /* Temporary array of all words in the dict file */ 229 SPELL **Spell; 230 int nspell; /* number of valid entries in Spell array */ 231 int mspell; /* allocated length of Spell array */ 232 233 /* These are used to allocate "compact" data without palloc overhead */ 234 char *firstfree; /* first free address (always maxaligned) */ 235 size_t avail; /* free space remaining at firstfree */ 236 } IspellDict; 237 238 extern TSLexeme *NINormalizeWord(IspellDict *Conf, char *word); 239 240 extern void NIStartBuild(IspellDict *Conf); 241 extern void NIImportAffixes(IspellDict *Conf, const char *filename); 242 extern void NIImportDictionary(IspellDict *Conf, const char *filename); 243 extern void NISortDictionary(IspellDict *Conf); 244 extern void NISortAffixes(IspellDict *Conf); 245 extern void NIFinishBuild(IspellDict *Conf); 246 247 #endif 248