1 /* 2 * This is a temporary holding place for definitions that don't yet 3 * have a home. 4 */ 5 6 #ifndef _AF_DEFS_H 7 #define _AF_DEFS_H 8 9 /* new */ 10 11 #include <stdio.h> 12 #include "err.h" 13 14 /* database file type */ 15 16 #define AFFTINFO (0) 17 #define AFFTDOCTAB (1) 18 #define AFFTUDICT (2) 19 #define AFFTUPOST (3) 20 #define AFFTUFIELD (4) 21 #define AFFTLPOST (5) 22 #define AFFTLFIELD (6) 23 #define AFFTFDEF (7) 24 #define AFFTUWORD (8) 25 #define AFFTLWORD (9) 26 #define AFFTLOCK (10) 27 28 static char *affntab[] = { 29 ".db", /* AFFTINFO */ 30 ".dt", /* AFFTDOCTAB */ 31 ".x0", /* AFFTUDICT */ 32 ".y0", /* AFFTUPOST */ 33 ".z0", /* AFFTUFIELD */ 34 ".y1", /* AFFTLPOST */ 35 ".z1", /* AFFTLFIELD */ 36 ".fd", /* AFFTFDEF */ 37 ".w0", /* AFFTUWORD */ 38 ".w1", /* AFFTLWORD */ 39 ".lk", /* AFFTLOCK */ 40 }; 41 #ifdef NOTHING 42 static char *ftfn[] = { 43 ETYMON_DBF_INFO_EXT, /* AFFTINFO */ 44 ETYMON_DBF_DOCTAB_EXT, /* AFFTINFO */ 45 ETYMON_DBF_UDICT_EXT, /* AFFTUDICT */ 46 ETYMON_DBF_UPOST_EXT, /* AFFTUPOST */ 47 ETYMON_DBF_UFIELD_EXT, /* AFFTUFIELD */ 48 ETYMON_DBF_LPOST_EXT, /* AFFTLPOST */ 49 ETYMON_DBF_LFIELD_EXT, /* AFFTLFIELD */ 50 ETYMON_DBF_FDEF_EXT, /* AFFTFDEF */ 51 ETYMON_DBF_UWORD_EXT, /* AFFTUWORD */ 52 ETYMON_DBF_LWORD_EXT, /* AFFTLWORD */ 53 ETYMON_DBF_LOCK_EXT, /* AFFTLOCK */ 54 }; 55 #endif 56 57 typedef struct { 58 FILE *info; 59 FILE *doctab; 60 FILE *udict; 61 FILE *upost; 62 FILE *ufield; 63 FILE *lpost; 64 FILE *lfield; 65 FILE *fdef; 66 FILE *uword; 67 FILE *lword; 68 FILE *lock; 69 } Affile; 70 71 /* old */ 72 73 #include <stdlib.h> 74 #include <fcntl.h> 75 #include "config.h" 76 77 /* I'd like to move this to where it is used and reduce the number of 78 times it is used. */ 79 #define ETYMON_AF_BANNER "Amberfish, Version " AF_VERSION 80 #define ETYMON_AF_COPYRIGHT "Copyright (C) 1999-2004 Etymon Systems, Inc. All Rights Reserved." 81 #define ETYMON_AF_BANNER_STAMP ETYMON_AF_BANNER ". " ETYMON_AF_COPYRIGHT 82 83 /* Uint4 at beginning of db info file to indicate index version, used 84 * to determine compatibility; increment it here if the index format 85 * changes */ 86 #define ETYMON_INDEX_MAGIC (5) 87 88 /* maximum char[] size for an absolute path */ 89 #define AFPATHSIZE (1024) 90 #define ETYMON_MAX_PATH_SIZE AFPATHSIZE 91 92 /* maximum char[] key size */ 93 /* 11 is big enough to hold the default Uint4 keys */ 94 #define ETYMON_MAX_KEY_SIZE (11) 95 96 /* maximum char[] size for a field name (not an entire field path) */ 97 #define ETYMON_AF_MAX_FIELDNAME_SIZE (32) 98 99 /* maximum char[] size for an error message */ 100 #define ETYMON_MAX_MSG_SIZE (1024) 101 102 #define ETYMON_DBF_INFO AFFTINFO 103 #define ETYMON_DBF_INFO_EXT getftfn(AFFTINFO) 104 #define ETYMON_DBF_DOCTABLE AFFTDOCTAB 105 #define ETYMON_DBF_DOCTABLE_EXT getftfn(AFFTDOCTAB) 106 #define ETYMON_DBF_UDICT AFFTUDICT 107 #define ETYMON_DBF_UDICT_EXT getftfn(AFFTUDICT) 108 #define ETYMON_DBF_UPOST AFFTUPOST 109 #define ETYMON_DBF_UPOST_EXT getftfn(AFFTUPOST) 110 #define ETYMON_DBF_UFIELD AFFTUFIELD 111 #define ETYMON_DBF_UFIELD_EXT getftfn(AFFTUFIELD) 112 #define ETYMON_DBF_LPOST AFFTLPOST 113 #define ETYMON_DBF_LPOST_EXT getftfn(AFFTLPOST) 114 #define ETYMON_DBF_LFIELD AFFTLFIELD 115 #define ETYMON_DBF_LFIELD_EXT getftfn(AFFTLFIELD) 116 #define ETYMON_DBF_FDEF AFFTFDEF 117 #define ETYMON_DBF_FDEF_EXT getftfn(AFFTFDEF) 118 #define ETYMON_DBF_UWORD AFFTUWORD 119 #define ETYMON_DBF_UWORD_EXT getftfn(AFFTUWORD) 120 #define ETYMON_DBF_LWORD AFFTLWORD 121 #define ETYMON_DBF_LWORD_EXT getftfn(AFFTLWORD) 122 #define ETYMON_DBF_LOCK AFFTLOCK 123 #define ETYMON_DBF_LOCK_EXT getftfn(AFFTLOCK) 124 125 #define ETYMON_DB_PERM (S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH) 126 127 typedef struct { 128 unsigned char key[ETYMON_MAX_KEY_SIZE]; /* document key */ 129 char filename[ETYMON_MAX_PATH_SIZE]; /* document source file name */ 130 etymon_af_off_t begin; /* starting offset of document within the file */ 131 etymon_af_off_t end; /* ending offset of document within the file (one byte past end) */ 132 Uint4 parent; /* doc_id of parent document */ 133 Uint1 dclass_id; /* unique id associated with dclass */ 134 Uint1 deleted; /* 1 = marked as deleted; 0 = not deleted */ 135 } ETYMON_DOCTABLE; 136 137 typedef struct { 138 unsigned char name[ETYMON_AF_MAX_FIELDNAME_SIZE]; 139 Uint2 left; 140 Uint2 right; 141 } ETYMON_AF_FDEF_DISK; 142 143 typedef struct { 144 int (*error)(char*, int); 145 } ETYMON_LOG; 146 147 /* maximum number of keys in a page */ 148 #define ETYMON_MAX_KEYS_L (5000) 149 #define ETYMON_MAX_KEYS_NL (8000) 150 151 /* maximum char[] size for a parsed token */ 152 /* this must be set to the larger of ETYMON_MAX_WORD_SIZE and ETYMON_MAX_FIELDNAME_SIZE */ 153 #define ETYMON_MAX_TOKEN_SIZE (32) 154 155 /* maximum char[] size for an indexable word */ 156 #define ETYMON_MAX_WORD_SIZE (32) 157 158 /* maximum char[] size for a single term within a query */ 159 #define ETYMON_MAX_QUERY_TERM_SIZE (1024) 160 161 /* maximum level of nesting in structured fields */ 162 #define ETYMON_MAX_FIELD_NEST (64) 163 164 /* maxmium depth (levels) of tree allowed */ 165 #define ETYMON_MAX_PAGE_DEPTH (4) 166 167 /* average key length */ 168 #define ETYMON_MEAN_KEY_LEN_L (8) 169 #define ETYMON_MEAN_KEY_LEN_NL (5) 170 171 /* size of key buffer in pages */ 172 #define ETYMON_MAX_KEY_AREA_L (ETYMON_MAX_KEYS_L * ETYMON_MEAN_KEY_LEN_L) 173 #define ETYMON_MAX_KEY_AREA_NL (ETYMON_MAX_KEYS_NL * ETYMON_MEAN_KEY_LEN_NL) 174 175 #define ETYMON_AF_MAX_OP_STACK_DEPTH (256) 176 #define ETYMON_AF_MAX_R_STACK_DEPTH (256) 177 178 #define ETYMON_AF_OP_OR (1) 179 #define ETYMON_AF_OP_AND (2) 180 #define ETYMON_AF_OP_GROUP_OPEN (3) 181 #define ETYMON_AF_OP_GROUP_CLOSE (4) 182 183 typedef struct { 184 Uint2 n; /* number of keys */ 185 Uint4 p[ETYMON_MAX_KEYS_NL + 1]; /* pointers to other pages (offset) */ 186 Uint2 offset[ETYMON_MAX_KEYS_NL + 1]; /* offsets to keys */ 187 unsigned char keys[ETYMON_MAX_KEY_AREA_NL]; /* key buffer */ 188 } ETYMON_INDEX_PAGE_NL; 189 190 typedef struct { 191 Uint2 n; /* number of keys */ 192 Uint4 prev; /* previous leaf node (offset) */ 193 Uint4 next; /* next left node (offset) */ 194 Uint4 post[ETYMON_MAX_KEYS_L]; /* postings for each key */ 195 Uint4 post_n[ETYMON_MAX_KEYS_L]; /* number of postings for each key */ 196 Uint2 offset[ETYMON_MAX_KEYS_L + 1]; /* offsets to keys */ 197 unsigned char keys[ETYMON_MAX_KEY_AREA_L]; /* key buffer */ 198 } ETYMON_INDEX_PAGE_L; 199 200 typedef struct { 201 Uint4 pos; /* position of page on disk or 0 if empty slot */ 202 Uint1 is_nl; /* is it a non-leaf (here) or leaf (in the leaf cache) */ 203 ETYMON_INDEX_PAGE_NL nl; 204 } ETYMON_INDEX_PCACHE_NODE; 205 206 typedef struct { 207 Uint2 f[ETYMON_MAX_FIELD_NEST]; 208 int next; 209 } ETYMON_INDEX_FCACHE_NODE; 210 211 typedef struct { 212 Uint4 wn; 213 int next; 214 } ETYMON_INDEX_WNCACHE_NODE; 215 216 typedef struct { 217 unsigned char word[ETYMON_MAX_WORD_SIZE]; 218 int left; 219 int right; 220 int next; /* circular linked list */ 221 Uint2 freq; 222 Uint4 doc_id; 223 int fields; 224 int word_numbers_head; 225 int word_numbers_tail; 226 } ETYMON_INDEX_WCACHE_NODE; 227 228 typedef struct { 229 Uint4 doc_id; /* document id */ 230 Uint2 freq; /* frequency */ 231 Uint4 fields; /* field pointer */ 232 Uint4 fields_n; /* number of fields */ 233 Uint4 word_numbers; /* word numbers pointer */ 234 Uint4 word_numbers_n; /* number of word numbers */ 235 Uint4 next; /* next posting (index) or 0 */ 236 } ETYMON_INDEX_UPOST; 237 238 typedef struct { 239 Uint4 doc_id; /* document id */ 240 Uint2 freq; /* frequency */ 241 Uint4 fields; /* field pointer */ 242 Uint4 fields_n; /* number of fields */ 243 Uint4 word_numbers; /* word numbers pointer */ 244 Uint4 word_numbers_n; /* number of word numbers */ 245 } ETYMON_INDEX_LPOST; 246 247 typedef struct { 248 Uint2 fields[ETYMON_MAX_FIELD_NEST]; 249 Uint4 next; /* next field (index) or 0 */ 250 } ETYMON_INDEX_UFIELD; 251 252 typedef struct { 253 Uint4 wn; 254 Uint4 next; /* next word number (index) or 0 */ 255 } ETYMON_INDEX_UWORD; 256 257 typedef struct { 258 Uint2 fields[ETYMON_MAX_FIELD_NEST]; 259 } ETYMON_INDEX_LFIELD; 260 261 typedef struct { 262 Uint4 wn; 263 } ETYMON_INDEX_LWORD; 264 265 typedef struct ETYMON_AF_FDEF_MEM_STRUCT { 266 Uint2 n; 267 unsigned char name[ETYMON_AF_MAX_FIELDNAME_SIZE]; 268 struct ETYMON_AF_FDEF_MEM_STRUCT* left; 269 struct ETYMON_AF_FDEF_MEM_STRUCT* right; 270 struct ETYMON_AF_FDEF_MEM_STRUCT* next; 271 } ETYMON_AF_FDEF_MEM; 272 273 typedef struct { 274 char* dbname; /* database name */ 275 int doctable_fd; /* doctable file descriptor */ 276 etymon_af_off_t doctable_next_id; /* next available doctable number */ 277 ETYMON_DOCTABLE doctable; /* doctable buffer to use repeatedly */ 278 ETYMON_INDEX_WCACHE_NODE* wcache; /* binary tree (array) of word cache nodes */ 279 size_t wcache_size; 280 size_t wcache_count; 281 int wcache_root; 282 ETYMON_INDEX_FCACHE_NODE* fcache; /* linked list (array) of field cache nodes */ 283 size_t fcache_size; 284 size_t fcache_count; 285 ETYMON_INDEX_WNCACHE_NODE* wncache; /* linked list (array) of word number cache nodes */ 286 size_t wncache_size; 287 size_t wncache_count; 288 int udict_fd; /* udict file descriptor */ 289 etymon_af_off_t udict_size; /* current size of udict */ 290 int upost_fd; /* upost file descriptor */ 291 etymon_af_off_t upost_isize; /* current size of upost */ 292 int ufield_fd; /* ufield file descriptor */ 293 etymon_af_off_t ufield_isize; /* current size of ufield */ 294 int uword_fd; /* uword file descriptor */ 295 etymon_af_off_t uword_isize; /* current size of uword */ 296 Uint4 udict_root; /* root of the udict tree (offset) */ 297 ETYMON_INDEX_PCACHE_NODE* pcache_nl; /* non-leaf page cache */ 298 ETYMON_INDEX_PAGE_L pcache_l; /* leaf page cache */ 299 Uint4 pcache_l_write; /* offset position for write caching, or 0 if pcache_l has been flushed */ 300 int pcache_nl_size; 301 int pcache_count; 302 ETYMON_INDEX_PAGE_NL overflow_nl; /* overflow non-leaf page */ 303 ETYMON_INDEX_PAGE_L overflow_l; /* overflow leaf page */ 304 ETYMON_INDEX_PAGE_L extra_l; /* extra leaf page */ 305 ETYMON_INDEX_UPOST upost; 306 ETYMON_INDEX_UFIELD ufield; 307 ETYMON_INDEX_UWORD uword; 308 ETYMON_AF_FDEF_MEM* fdef_root; /* pointer to root node of fdef binary tree */ 309 ETYMON_AF_FDEF_MEM* fdef_tail; /* pointer to tail node of fdef threaded list */ 310 Uint2 fdef_count; 311 int phrase; /* enable phrase searching */ 312 int word_proximity; /* enable word proximity operator */ 313 int stemming; /* enable stemming */ 314 int number_words; /* enable recordings of word number data */ 315 int doc_n; /* total number of (non-deleted) documents in 316 database */ 317 int verbose; 318 int long_words; 319 int flushmsg; 320 } ETYMON_INDEX_INDEXING_STATE; 321 322 #endif 323