1 /*-------------------------------------------------------------------------
2  *
3  * spell.h
4  *
5  * Declarations for ISpell dictionary
6  *
7  * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
8  *
9  * src/include/tsearch/dicts/spell.h
10  *
11  *-------------------------------------------------------------------------
12  */
13 
14 #ifndef __SPELL_H__
15 #define __SPELL_H__
16 
17 #include "regex/regex.h"
18 #include "tsearch/dicts/regis.h"
19 #include "tsearch/ts_public.h"
20 
21 /*
22  * SPNode and SPNodeData are used to represent prefix tree (Trie) to store
23  * a words list.
24  */
25 struct SPNode;
26 
27 typedef struct
28 {
29 	uint32		val:8,
30 				isword:1,
31 	/* Stores compound flags listed below */
32 				compoundflag:4,
33 	/* Reference to an entry of the AffixData field */
34 				affix:19;
35 	struct SPNode *node;
36 } SPNodeData;
37 
38 /*
39  * Names of FF_ are correlated with Hunspell options in affix file
40  * http://hunspell.sourceforge.net/
41  */
42 #define FF_COMPOUNDONLY		0x01
43 #define FF_COMPOUNDBEGIN	0x02
44 #define FF_COMPOUNDMIDDLE	0x04
45 #define FF_COMPOUNDLAST		0x08
46 #define FF_COMPOUNDFLAG		( FF_COMPOUNDBEGIN | FF_COMPOUNDMIDDLE | \
47 							FF_COMPOUNDLAST )
48 #define FF_COMPOUNDFLAGMASK		0x0f
49 
50 typedef struct SPNode
51 {
52 	uint32		length;
53 	SPNodeData	data[FLEXIBLE_ARRAY_MEMBER];
54 } SPNode;
55 
56 #define SPNHDRSZ	(offsetof(SPNode,data))
57 
58 /*
59  * Represents an entry in a words list.
60  */
61 typedef struct spell_struct
62 {
63 	union
64 	{
65 		/*
66 		 * flag is filled in by NIImportDictionary(). After
67 		 * NISortDictionary(), d is used instead of flag.
68 		 */
69 		char	   *flag;
70 		/* d is used in mkSPNode() */
71 		struct
72 		{
73 			/* Reference to an entry of the AffixData field */
74 			int			affix;
75 			/* Length of the word */
76 			int			len;
77 		}			d;
78 	}			p;
79 	char		word[FLEXIBLE_ARRAY_MEMBER];
80 } SPELL;
81 
82 #define SPELLHDRSZ	(offsetof(SPELL, word))
83 
84 /*
85  * If an affix uses a regex, we have to store that separately in a struct
86  * that won't move around when arrays of affixes are enlarged or sorted.
87  * This is so that it can be found to be cleaned up at context destruction.
88  */
89 typedef struct aff_regex_struct
90 {
91 	regex_t		regex;
92 	MemoryContextCallback mcallback;
93 } aff_regex_struct;
94 
95 /*
96  * Represents an entry in an affix list.
97  */
98 typedef struct aff_struct
99 {
100 	char	   *flag;
101 	/* FF_SUFFIX or FF_PREFIX */
102 	uint32		type:1,
103 				flagflags:7,
104 				issimple:1,
105 				isregis:1,
106 				replen:14;
107 	char	   *find;
108 	char	   *repl;
109 	union
110 	{
111 		aff_regex_struct *pregex;
112 		Regis		regis;
113 	}			reg;
114 } AFFIX;
115 
116 /*
117  * affixes use dictionary flags too
118  */
119 #define FF_COMPOUNDPERMITFLAG	0x10
120 #define FF_COMPOUNDFORBIDFLAG	0x20
121 #define FF_CROSSPRODUCT			0x40
122 
123 /*
124  * Don't change the order of these. Initialization sorts by these,
125  * and expects prefixes to come first after sorting.
126  */
127 #define FF_SUFFIX				1
128 #define FF_PREFIX				0
129 
130 /*
131  * AffixNode and AffixNodeData are used to represent prefix tree (Trie) to store
132  * an affix list.
133  */
134 struct AffixNode;
135 
136 typedef struct
137 {
138 	uint32		val:8,
139 				naff:24;
140 	AFFIX	  **aff;
141 	struct AffixNode *node;
142 } AffixNodeData;
143 
144 typedef struct AffixNode
145 {
146 	uint32		isvoid:1,
147 				length:31;
148 	AffixNodeData data[FLEXIBLE_ARRAY_MEMBER];
149 } AffixNode;
150 
151 #define ANHRDSZ		   (offsetof(AffixNode, data))
152 
153 typedef struct
154 {
155 	char	   *affix;
156 	int			len;
157 	bool		issuffix;
158 } CMPDAffix;
159 
160 /*
161  * Type of encoding affix flags in Hunspell dictionaries
162  */
163 typedef enum
164 {
165 	FM_CHAR,					/* one character (like ispell) */
166 	FM_LONG,					/* two characters */
167 	FM_NUM						/* number, >= 0 and < 65536 */
168 } FlagMode;
169 
170 /*
171  * Structure to store Hunspell options. Flag representation depends on flag
172  * type. These flags are about support of compound words.
173  */
174 typedef struct CompoundAffixFlag
175 {
176 	union
177 	{
178 		/* Flag name if flagMode is FM_CHAR or FM_LONG */
179 		char	   *s;
180 		/* Flag name if flagMode is FM_NUM */
181 		uint32		i;
182 	}			flag;
183 	/* we don't have a bsearch_arg version, so, copy FlagMode */
184 	FlagMode	flagMode;
185 	uint32		value;
186 } CompoundAffixFlag;
187 
188 #define FLAGNUM_MAXSIZE		(1 << 16)
189 
190 typedef struct
191 {
192 	int			maffixes;
193 	int			naffixes;
194 	AFFIX	   *Affix;
195 
196 	AffixNode  *Suffix;
197 	AffixNode  *Prefix;
198 
199 	SPNode	   *Dictionary;
200 	/* Array of sets of affixes */
201 	char	  **AffixData;
202 	int			lenAffixData;
203 	int			nAffixData;
204 	bool		useFlagAliases;
205 
206 	CMPDAffix  *CompoundAffix;
207 
208 	bool		usecompound;
209 	FlagMode	flagMode;
210 
211 	/*
212 	 * All follow fields are actually needed only for initialization
213 	 */
214 
215 	/* Array of Hunspell options in affix file */
216 	CompoundAffixFlag *CompoundAffixFlags;
217 	/* number of entries in CompoundAffixFlags array */
218 	int			nCompoundAffixFlag;
219 	/* allocated length of CompoundAffixFlags array */
220 	int			mCompoundAffixFlag;
221 
222 	/*
223 	 * Remaining fields are only used during dictionary construction; they are
224 	 * set up by NIStartBuild and cleared by NIFinishBuild.
225 	 */
226 	MemoryContext buildCxt;		/* temp context for construction */
227 
228 	/* Temporary array of all words in the dict file */
229 	SPELL	  **Spell;
230 	int			nspell;			/* number of valid entries in Spell array */
231 	int			mspell;			/* allocated length of Spell array */
232 
233 	/* These are used to allocate "compact" data without palloc overhead */
234 	char	   *firstfree;		/* first free address (always maxaligned) */
235 	size_t		avail;			/* free space remaining at firstfree */
236 } IspellDict;
237 
238 extern TSLexeme *NINormalizeWord(IspellDict *Conf, char *word);
239 
240 extern void NIStartBuild(IspellDict *Conf);
241 extern void NIImportAffixes(IspellDict *Conf, const char *filename);
242 extern void NIImportDictionary(IspellDict *Conf, const char *filename);
243 extern void NISortDictionary(IspellDict *Conf);
244 extern void NISortAffixes(IspellDict *Conf);
245 extern void NIFinishBuild(IspellDict *Conf);
246 
247 #endif
248