1 #ifndef _AF_INDEX_H
2 #define _AF_INDEX_H
3 
4 /***** new *****/
5 
6 #include "config.h"
7 
8 typedef struct {
9 	Uint2 dbid;
10 	int memory;  /* maximum amount of memory to use during indexing (MB) */
11 	char **source;  /* list of files to index */
12 	int sourcen;
13 	char *doctype;
14 	Afchar *split;  /* delimiter of multiple documents (if any)
15                            within a file */
16 	int dlevel;  /* maximum number of levels to descend (nested documents) */
17 	int verbose;  /* boolean: verbose output */
18 	int _stdin;  /* boolean: read files to index from standard input */
19 	int _longwords;
20 } Afindex;
21 
22 typedef struct {
23 	int _tmp;
24 } Afindex_r;
25 
26 /***** old *****/
27 
28 #include "defs.h"
29 #include "docbuf.h"
30 
31 #ifdef ZZZZZ
32 typedef struct {
33 	ETYMON_LOG log;
34 	char* dbname; /* database name */
35 	int memory; /* maximum amount of memory to use during indexing (MB) */
36 	int dlevel; /* maximum number of levels to descend (nested documents) */
37 	char* dclass; /* document class */
38 	char** files; /* list of files to index */
39 	int files_n;
40 	int files_stdin; /* boolean: read files to index from standard input */
41 /*	int phrase; // boolean: enable phrase search */
42 /*	int word_proximity; // boolean: enable word proximity operator */
43 	char* split; /* delimiter of multiple documents (if any)
44                         within a file */
45 	int verbose; /* boolean: verbose output */
46 	char* dc_options; /* document class options */
47 	int long_words;
48 } ETYMON_INDEX_OPTIONS;
49 #endif
50 
51 typedef struct {
52 	unsigned char* key; /* document key */
53 	char* filename; /* document source file name */
54 	etymon_af_off_t begin; /* starting offset of document within the file */
55 	etymon_af_off_t end; /* ending offset of document within the file (one byte past end) */
56 	Uint4 parent;  /* doc_id of parent document */
57 	Uint1 dclass_id; /* unique id associated with dclass */
58 	ETYMON_INDEX_INDEXING_STATE* state;
59 } ETYMON_AF_INDEX_ADD_DOC;
60 
61 typedef struct {
62 	Uint4 doc_id; /* unique id associated with dclass */
63 	unsigned char* word; /* word to add to the index, must be unsigned char[ETYMON_MAX_WORD_SIZE] */
64 	Uint2* fields; /* array representing field location, must be Uint2[ETYMON_MAX_FIELD_NEST] */
65 	Uint4 word_number; /* word position, starting with 1 */
66 	ETYMON_INDEX_INDEXING_STATE* state;
67 } ETYMON_AF_INDEX_ADD_WORD;
68 
69 typedef struct {
70 	int use_docbuf;  /* 1: read files via docbuf;
71 			    0: don't use docbuf; this will also
72 			       disable splitting files */
73 	void* dc_state;
74 } ETYMON_AF_DC_INIT;
75 
76 typedef struct ETYMON_AF_DC_SPLIT_STRUCT {
77 	etymon_af_off_t end;  /* ending offset of document within the
78 				 file (one byte past end) */
79 	struct ETYMON_AF_DC_SPLIT_STRUCT* next;
80 } ETYMON_AF_DC_SPLIT;
81 
82 typedef struct {
83 	ETYMON_DOCBUF* docbuf;
84 	char* filename;
85 	ETYMON_AF_DC_SPLIT* split_list;
86 	int dlevel;  /* maximum number of levels to descend (nested
87 			documents) */
88 	Uint1 dclass_id;
89 	ETYMON_INDEX_INDEXING_STATE* state;
90 	void* dc_state;
91 } ETYMON_AF_DC_INDEX;
92 
93 /* Used by search.cc as well; should it be moved out of index.cc? */
94 int etymon_index_search_keys_nl(unsigned char* word, size_t word_len, ETYMON_INDEX_PAGE_NL* page);
95 
96 /* Used by search.cc as well; should it be moved out of index.cc? */
97 int etymon_index_search_keys_l(unsigned char* word, size_t word_len, ETYMON_INDEX_PAGE_L* page, int* match);
98 
99 int etymon_index_add_files(Afindex *opt);
100 
101 #ifdef ZZZZZ
102 int etymon_index_optimize_old(ETYMON_INDEX_OPTIONS* opt);
103 #endif
104 
105 Uint4 etymon_af_index_add_doc(ETYMON_AF_INDEX_ADD_DOC* opt);
106 
107 int etymon_af_index_add_word(ETYMON_AF_INDEX_ADD_WORD* opt);
108 
109 Uint4 etymon_index_dclass_get_next_doc_id(ETYMON_INDEX_INDEXING_STATE* state);
110 
111 int etymon_index_valid_word(unsigned char* word);
112 
113 #endif
114