1 /*
2  *  This is a temporary holding place for definitions that don't yet
3  *  have a home.
4  */
5 
6 #ifndef _AF_DEFS_H
7 #define _AF_DEFS_H
8 
9 /* new */
10 
11 #include <stdio.h>
12 #include "err.h"
13 
14 /* database file type */
15 
16 #define AFFTINFO         (0)
17 #define AFFTDOCTAB       (1)
18 #define AFFTUDICT        (2)
19 #define AFFTUPOST        (3)
20 #define AFFTUFIELD       (4)
21 #define AFFTLPOST        (5)
22 #define AFFTLFIELD       (6)
23 #define AFFTFDEF         (7)
24 #define AFFTUWORD        (8)
25 #define AFFTLWORD        (9)
26 #define AFFTLOCK        (10)
27 
28 static char *affntab[] = {
29 	".db",   /* AFFTINFO */
30 	".dt",   /* AFFTDOCTAB */
31 	".x0",   /* AFFTUDICT */
32 	".y0",   /* AFFTUPOST */
33 	".z0",   /* AFFTUFIELD */
34 	".y1",   /* AFFTLPOST */
35 	".z1",   /* AFFTLFIELD */
36 	".fd",   /* AFFTFDEF */
37 	".w0",   /* AFFTUWORD */
38 	".w1",   /* AFFTLWORD */
39 	".lk",   /* AFFTLOCK */
40 };
41 #ifdef NOTHING
42 static char *ftfn[] = {
43 	ETYMON_DBF_INFO_EXT,       /* AFFTINFO */
44 	ETYMON_DBF_DOCTAB_EXT,   /* AFFTINFO */
45 	ETYMON_DBF_UDICT_EXT,      /* AFFTUDICT */
46 	ETYMON_DBF_UPOST_EXT,      /* AFFTUPOST */
47 	ETYMON_DBF_UFIELD_EXT,     /* AFFTUFIELD */
48 	ETYMON_DBF_LPOST_EXT,      /* AFFTLPOST */
49 	ETYMON_DBF_LFIELD_EXT,     /* AFFTLFIELD */
50 	ETYMON_DBF_FDEF_EXT,       /* AFFTFDEF */
51 	ETYMON_DBF_UWORD_EXT,      /* AFFTUWORD */
52 	ETYMON_DBF_LWORD_EXT,      /* AFFTLWORD */
53 	ETYMON_DBF_LOCK_EXT,       /* AFFTLOCK */
54 };
55 #endif
56 
57 typedef struct {
58 	FILE *info;
59 	FILE *doctab;
60 	FILE *udict;
61 	FILE *upost;
62 	FILE *ufield;
63 	FILE *lpost;
64 	FILE *lfield;
65 	FILE *fdef;
66 	FILE *uword;
67 	FILE *lword;
68 	FILE *lock;
69 } Affile;
70 
71 /* old */
72 
73 #include <stdlib.h>
74 #include <fcntl.h>
75 #include "config.h"
76 
77 /* I'd like to move this to where it is used and reduce the number of
78    times it is used. */
79 #define ETYMON_AF_BANNER "Amberfish, Version " AF_VERSION
80 #define ETYMON_AF_COPYRIGHT "Copyright (C) 1999-2004 Etymon Systems, Inc.  All Rights Reserved."
81 #define ETYMON_AF_BANNER_STAMP ETYMON_AF_BANNER ".  " ETYMON_AF_COPYRIGHT
82 
83 /* Uint4 at beginning of db info file to indicate index version, used
84  * to determine compatibility; increment it here if the index format
85  * changes */
86 #define ETYMON_INDEX_MAGIC (5)
87 
88 /* maximum char[] size for an absolute path */
89 #define AFPATHSIZE (1024)
90 #define ETYMON_MAX_PATH_SIZE AFPATHSIZE
91 
92 /* maximum char[] key size */
93 /* 11 is big enough to hold the default Uint4 keys */
94 #define ETYMON_MAX_KEY_SIZE (11)
95 
96 /* maximum char[] size for a field name (not an entire field path) */
97 #define ETYMON_AF_MAX_FIELDNAME_SIZE (32)
98 
99 /* maximum char[] size for an error message */
100 #define ETYMON_MAX_MSG_SIZE (1024)
101 
102 #define ETYMON_DBF_INFO AFFTINFO
103 #define ETYMON_DBF_INFO_EXT getftfn(AFFTINFO)
104 #define ETYMON_DBF_DOCTABLE AFFTDOCTAB
105 #define ETYMON_DBF_DOCTABLE_EXT getftfn(AFFTDOCTAB)
106 #define ETYMON_DBF_UDICT AFFTUDICT
107 #define ETYMON_DBF_UDICT_EXT getftfn(AFFTUDICT)
108 #define ETYMON_DBF_UPOST AFFTUPOST
109 #define ETYMON_DBF_UPOST_EXT getftfn(AFFTUPOST)
110 #define ETYMON_DBF_UFIELD AFFTUFIELD
111 #define ETYMON_DBF_UFIELD_EXT getftfn(AFFTUFIELD)
112 #define ETYMON_DBF_LPOST AFFTLPOST
113 #define ETYMON_DBF_LPOST_EXT getftfn(AFFTLPOST)
114 #define ETYMON_DBF_LFIELD AFFTLFIELD
115 #define ETYMON_DBF_LFIELD_EXT getftfn(AFFTLFIELD)
116 #define ETYMON_DBF_FDEF AFFTFDEF
117 #define ETYMON_DBF_FDEF_EXT getftfn(AFFTFDEF)
118 #define ETYMON_DBF_UWORD AFFTUWORD
119 #define ETYMON_DBF_UWORD_EXT getftfn(AFFTUWORD)
120 #define ETYMON_DBF_LWORD AFFTLWORD
121 #define ETYMON_DBF_LWORD_EXT getftfn(AFFTLWORD)
122 #define ETYMON_DBF_LOCK AFFTLOCK
123 #define ETYMON_DBF_LOCK_EXT getftfn(AFFTLOCK)
124 
125 #define ETYMON_DB_PERM (S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH)
126 
127 typedef struct {
128         unsigned char key[ETYMON_MAX_KEY_SIZE]; /* document key */
129         char filename[ETYMON_MAX_PATH_SIZE]; /* document source file name */
130 	etymon_af_off_t begin; /* starting offset of document within the file */
131         etymon_af_off_t end; /* ending offset of document within the file (one byte past end) */
132 	Uint4 parent;  /* doc_id of parent document */
133         Uint1 dclass_id; /* unique id associated with dclass */
134 	Uint1 deleted; /* 1 = marked as deleted; 0 = not deleted */
135 } ETYMON_DOCTABLE;
136 
137 typedef struct {
138 	unsigned char name[ETYMON_AF_MAX_FIELDNAME_SIZE];
139 	Uint2 left;
140 	Uint2 right;
141 } ETYMON_AF_FDEF_DISK;
142 
143 typedef struct {
144 	int (*error)(char*, int);
145 } ETYMON_LOG;
146 
147 /* maximum number of keys in a page */
148 #define ETYMON_MAX_KEYS_L (5000)
149 #define ETYMON_MAX_KEYS_NL (8000)
150 
151 /* maximum char[] size for a parsed token */
152 /* this must be set to the larger of ETYMON_MAX_WORD_SIZE and ETYMON_MAX_FIELDNAME_SIZE */
153 #define ETYMON_MAX_TOKEN_SIZE (32)
154 
155 /* maximum char[] size for an indexable word */
156 #define ETYMON_MAX_WORD_SIZE (32)
157 
158 /* maximum char[] size for a single term within a query */
159 #define ETYMON_MAX_QUERY_TERM_SIZE (1024)
160 
161 /* maximum level of nesting in structured fields */
162 #define ETYMON_MAX_FIELD_NEST (64)
163 
164 /* maxmium depth (levels) of tree allowed */
165 #define ETYMON_MAX_PAGE_DEPTH (4)
166 
167 /* average key length */
168 #define ETYMON_MEAN_KEY_LEN_L (8)
169 #define ETYMON_MEAN_KEY_LEN_NL (5)
170 
171 /* size of key buffer in pages */
172 #define ETYMON_MAX_KEY_AREA_L (ETYMON_MAX_KEYS_L * ETYMON_MEAN_KEY_LEN_L)
173 #define ETYMON_MAX_KEY_AREA_NL (ETYMON_MAX_KEYS_NL * ETYMON_MEAN_KEY_LEN_NL)
174 
175 #define ETYMON_AF_MAX_OP_STACK_DEPTH (256)
176 #define ETYMON_AF_MAX_R_STACK_DEPTH (256)
177 
178 #define ETYMON_AF_OP_OR (1)
179 #define ETYMON_AF_OP_AND (2)
180 #define ETYMON_AF_OP_GROUP_OPEN (3)
181 #define ETYMON_AF_OP_GROUP_CLOSE (4)
182 
183 typedef struct {
184 	Uint2 n; /* number of keys */
185 	Uint4 p[ETYMON_MAX_KEYS_NL + 1]; /* pointers to other pages (offset) */
186 	Uint2 offset[ETYMON_MAX_KEYS_NL + 1]; /* offsets to keys */
187 	unsigned char keys[ETYMON_MAX_KEY_AREA_NL]; /* key buffer */
188 } ETYMON_INDEX_PAGE_NL;
189 
190 typedef struct {
191 	Uint2 n; /* number of keys */
192 	Uint4 prev; /* previous leaf node (offset) */
193 	Uint4 next; /* next left node (offset) */
194 	Uint4 post[ETYMON_MAX_KEYS_L]; /* postings for each key */
195 	Uint4 post_n[ETYMON_MAX_KEYS_L]; /* number of postings for each key */
196 	Uint2 offset[ETYMON_MAX_KEYS_L + 1]; /* offsets to keys */
197 	unsigned char keys[ETYMON_MAX_KEY_AREA_L]; /* key buffer */
198 } ETYMON_INDEX_PAGE_L;
199 
200 typedef struct {
201 	Uint4 pos; /* position of page on disk or 0 if empty slot */
202 	Uint1 is_nl; /* is it a non-leaf (here) or leaf (in the leaf cache) */
203 	ETYMON_INDEX_PAGE_NL nl;
204 } ETYMON_INDEX_PCACHE_NODE;
205 
206 typedef struct {
207 	Uint2 f[ETYMON_MAX_FIELD_NEST];
208 	int next;
209 } ETYMON_INDEX_FCACHE_NODE;
210 
211 typedef struct {
212 	Uint4 wn;
213 	int next;
214 } ETYMON_INDEX_WNCACHE_NODE;
215 
216 typedef struct {
217 	unsigned char word[ETYMON_MAX_WORD_SIZE];
218 	int left;
219 	int right;
220 	int next; /* circular linked list */
221 	Uint2 freq;
222 	Uint4 doc_id;
223 	int fields;
224 	int word_numbers_head;
225 	int word_numbers_tail;
226 } ETYMON_INDEX_WCACHE_NODE;
227 
228 typedef struct {
229 	Uint4 doc_id; /* document id */
230 	Uint2 freq; /* frequency */
231 	Uint4 fields; /* field pointer */
232 	Uint4 fields_n; /* number of fields */
233 	Uint4 word_numbers; /* word numbers pointer */
234 	Uint4 word_numbers_n; /* number of word numbers */
235 	Uint4 next; /* next posting (index) or 0 */
236 } ETYMON_INDEX_UPOST;
237 
238 typedef struct {
239 	Uint4 doc_id; /* document id */
240 	Uint2 freq; /* frequency */
241 	Uint4 fields; /* field pointer */
242 	Uint4 fields_n; /* number of fields */
243 	Uint4 word_numbers; /* word numbers pointer */
244 	Uint4 word_numbers_n; /* number of word numbers */
245 } ETYMON_INDEX_LPOST;
246 
247 typedef struct {
248 	Uint2 fields[ETYMON_MAX_FIELD_NEST];
249 	Uint4 next; /* next field (index) or 0 */
250 } ETYMON_INDEX_UFIELD;
251 
252 typedef struct {
253 	Uint4 wn;
254 	Uint4 next; /* next word number (index) or 0 */
255 } ETYMON_INDEX_UWORD;
256 
257 typedef struct {
258 	Uint2 fields[ETYMON_MAX_FIELD_NEST];
259 } ETYMON_INDEX_LFIELD;
260 
261 typedef struct {
262 	Uint4 wn;
263 } ETYMON_INDEX_LWORD;
264 
265 typedef struct ETYMON_AF_FDEF_MEM_STRUCT {
266 	Uint2 n;
267 	unsigned char name[ETYMON_AF_MAX_FIELDNAME_SIZE];
268 	struct ETYMON_AF_FDEF_MEM_STRUCT* left;
269 	struct ETYMON_AF_FDEF_MEM_STRUCT* right;
270 	struct ETYMON_AF_FDEF_MEM_STRUCT* next;
271 } ETYMON_AF_FDEF_MEM;
272 
273 typedef struct {
274 	char* dbname; /* database name */
275 	int doctable_fd; /* doctable file descriptor */
276 	etymon_af_off_t doctable_next_id; /* next available doctable number */
277 	ETYMON_DOCTABLE doctable; /* doctable buffer to use repeatedly */
278 	ETYMON_INDEX_WCACHE_NODE* wcache; /* binary tree (array) of word cache nodes */
279 	size_t wcache_size;
280 	size_t wcache_count;
281 	int wcache_root;
282 	ETYMON_INDEX_FCACHE_NODE* fcache; /* linked list (array) of field cache nodes */
283 	size_t fcache_size;
284 	size_t fcache_count;
285 	ETYMON_INDEX_WNCACHE_NODE* wncache; /* linked list (array) of word number cache nodes */
286 	size_t wncache_size;
287 	size_t wncache_count;
288 	int udict_fd; /* udict file descriptor */
289 	etymon_af_off_t udict_size; /* current size of udict */
290 	int upost_fd; /* upost file descriptor */
291 	etymon_af_off_t upost_isize; /* current size of upost */
292 	int ufield_fd; /* ufield file descriptor */
293 	etymon_af_off_t ufield_isize; /* current size of ufield */
294 	int uword_fd; /* uword file descriptor */
295 	etymon_af_off_t uword_isize; /* current size of uword */
296 	Uint4 udict_root; /* root of the udict tree (offset) */
297 	ETYMON_INDEX_PCACHE_NODE* pcache_nl; /* non-leaf page cache */
298 	ETYMON_INDEX_PAGE_L pcache_l; /* leaf page cache */
299 	Uint4 pcache_l_write; /* offset position for write caching, or 0 if pcache_l has been flushed */
300 	int pcache_nl_size;
301 	int pcache_count;
302 	ETYMON_INDEX_PAGE_NL overflow_nl; /* overflow non-leaf page */
303 	ETYMON_INDEX_PAGE_L overflow_l; /* overflow leaf page */
304 	ETYMON_INDEX_PAGE_L extra_l; /* extra leaf page */
305 	ETYMON_INDEX_UPOST upost;
306 	ETYMON_INDEX_UFIELD ufield;
307 	ETYMON_INDEX_UWORD uword;
308 	ETYMON_AF_FDEF_MEM* fdef_root; /* pointer to root node of fdef binary tree */
309 	ETYMON_AF_FDEF_MEM* fdef_tail; /* pointer to tail node of fdef threaded list */
310 	Uint2 fdef_count;
311 	int phrase; /* enable phrase searching */
312 	int word_proximity; /* enable word proximity operator */
313 	int stemming; /* enable stemming */
314 	int number_words; /* enable recordings of word number data */
315 	int doc_n; /* total number of (non-deleted) documents in
316 		     database */
317 	int verbose;
318 	int long_words;
319 	int flushmsg;
320 } ETYMON_INDEX_INDEXING_STATE;
321 
322 #endif
323