1 /* @include embindex **********************************************************
2 **
3 ** B+ Tree Indexing plus Disc Cache.
4 **
5 ** @author Copyright (c) 2003 Alan Bleasby
6 ** @version $Revision: 1.34 $
7 ** @modified $Date: 2012/05/24 16:57:10 $ by $Author: rice $
8 ** @@
9 **
10 ** This library is free software; you can redistribute it and/or
11 ** modify it under the terms of the GNU Lesser General Public
12 ** License as published by the Free Software Foundation; either
13 ** version 2.1 of the License, or (at your option) any later version.
14 **
15 ** This library is distributed in the hope that it will be useful,
16 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18 ** Lesser General Public License for more details.
19 **
20 ** You should have received a copy of the GNU Lesser General Public
21 ** License along with this library; if not, write to the Free Software
22 ** Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
23 ** MA  02110-1301,  USA.
24 **
25 ******************************************************************************/
26 
27 #ifndef EMBINDEX_H
28 #define EMBINDEX_H
29 
30 
31 
32 /* ========================================================================= */
33 /* ============================= include files ============================= */
34 /* ========================================================================= */
35 
36 #include "ajdefine.h"
37 #include "ajstr.h"
38 #include "ajlist.h"
39 #include "ajindex.h"
40 #include "ajreg.h"
41 
42 AJ_BEGIN_DECLS
43 
44 
45 
46 
47 /* ========================================================================= */
48 /* =============================== constants =============================== */
49 /* ========================================================================= */
50 
51 
52 
53 
54 /* ========================================================================= */
55 /* ============================== public data ============================== */
56 /* ========================================================================= */
57 
58 
59 
60 
61 /* @data EmbPBtreeEntry *******************************************************
62 **
63 ** Index tree entries
64 **
65 ** @alias EmbOBtreeEntry
66 ** @alias EmbSBtreeEntry
67 **
68 ** @attr dbname [AjPStr] Database name
69 ** @attr dbrs [AjPStr] Index resource definition
70 ** @attr release [AjPStr] Release number
71 ** @attr date [AjPStr] Release date
72 ** @attr dbtype [AjPStr] Database type
73 ** @attr directory [AjPStr] Database directory
74 ** @attr idirectory [AjPStr] Index directory
75 ** @attr idextension [AjPStr] Id index extension
76 ** @attr maxid [AjPStr] Longest id in data
77 ** @attr files [AjPList] List of data filenames
78 ** @attr reffiles [AjPList*] Lists of data reference filenames
79 ** @attr fields [AjPList] EMBOSS index field structures
80 ** @attr id [AjPStr] Entry identifier
81 ** @attr idcache [AjPBtcache] Id cache structure
82 ** @attr pripagecount [ajlong] Cache primary page count
83 ** @attr secpagecount [ajlong] Cache secondary page count
84 ** @attr do_id [AjBool] If true, build id index
85 ** @attr compressed [AjBool] If true, compress id index
86 ** @attr nfiles [ajuint] Data file count
87 ** @attr refcount [ajuint] Reference file(s) for each entry
88 ** @attr idlen [ajuint] Maximum id length in index
89 ** @attr idmaxlen [ajuint] Maximum id length in data
90 ** @attr idtruncate [ajuint] Number of ids truncated
91 ** @attr pripagesize [ajuint] Default page size
92 ** @attr pricachesize [ajuint] Defalt cache size
93 ** @attr idorder [ajuint] Id index primary order
94 ** @attr idfill [ajuint] Id index primary fill count
95 ** @attr secpagesize [ajuint] Default page size
96 ** @attr seccachesize [ajuint] Defalt cache size
97 ** @attr idsecorder [ajuint] Id index secondary order
98 ** @attr idsecfill [ajuint] Id index secondary fill count
99 ** @attr fpos [ajlong] Input file position
100 ** @attr reffpos [ajlong*] Input extra (reference) file positions
101 ******************************************************************************/
102 
103 typedef struct EmbSBtreeEntry
104 {
105     AjPStr dbname;
106     AjPStr dbrs;
107     AjPStr release;
108     AjPStr date;
109     AjPStr dbtype;
110 
111     AjPStr directory;
112     AjPStr idirectory;
113     AjPStr idextension;
114     AjPStr maxid;
115 
116     AjPList files;
117     AjPList *reffiles;
118     AjPList fields;
119 
120     AjPStr id;
121     AjPBtcache idcache;
122     ajlong pripagecount;
123     ajlong secpagecount;
124 
125     AjBool do_id;
126     AjBool compressed;
127 
128     ajuint nfiles;
129 
130     ajuint refcount;
131 
132     ajuint idlen;
133     ajuint idmaxlen;
134     ajuint idtruncate;
135 
136     ajuint pripagesize;
137     ajuint pricachesize;
138 
139     ajuint idorder;
140     ajuint idfill;
141 
142     ajuint secpagesize;
143     ajuint seccachesize;
144 
145     ajuint idsecorder;
146     ajuint idsecfill;
147 
148 
149     ajlong fpos;
150     ajlong *reffpos;
151 
152 } EmbOBtreeEntry;
153 #define EmbPBtreeEntry EmbOBtreeEntry*
154 
155 
156 /* @data EmbPBtreeField *******************************************************
157 **
158 ** Index tree entries
159 **
160 ** @alias EmbSBtreeField
161 ** @alias EmbOBtreeField
162 **
163 ** @attr cache     [AjPBtcache] Cache structure
164 ** @attr data      [AjPList] Keywords to index
165 ** @attr name      [AjPStr] File basename
166 ** @attr extension [AjPStr] File extension
167 ** @attr maxkey    [AjPStr] Longest keyword found
168 ** @attr freelist  [AjPStr*] Free data elements for reuse
169 ** @attr pripagecount [ajulong] Index primary page count
170 ** @attr secpagecount [ajulong] Index secondary page count
171 ** @attr pripagesize  [ajuint] Index primary page size
172 ** @attr secpagesize  [ajuint] Index secondary page size
173 ** @attr pricachesize [ajuint] Index primary cache size
174 ** @attr seccachesize [ajuint] Index secondary cache size
175 ** @attr order     [ajuint] Primary page order
176 ** @attr fill      [ajuint] Primary page fill count
177 ** @attr secorder  [ajuint] Secondary page order
178 ** @attr secfill   [ajuint] Secondary page fill count
179 ** @attr refcount  [ajuint] Number of reference file(s) per entry
180 ** @attr len       [ajuint] Maximum keyword length in index
181 ** @attr idlen     [ajuint] Maximum id length in index
182 ** @attr maxlen    [ajuint] Maximum keyword length in data
183 ** @attr truncate  [ajuint] Number of keywords truncated
184 ** @attr freecount [ajuint] Free list used
185 ** @attr freesize  [ajuint] Free list size
186 ** @attr secondary [AjBool] Secondary index if true
187 ** @attr compressed [AjBool] Compress index if true
188 ** @attr Padding [char[4]] Padding to alignment boundary
189 ******************************************************************************/
190 
191 typedef struct EmbSBtreeField
192 {
193     AjPBtcache cache;
194     AjPList data;
195     AjPStr name;
196     AjPStr extension;
197     AjPStr maxkey;
198     AjPStr *freelist;
199     ajulong pripagecount;
200     ajulong secpagecount;
201     ajuint pripagesize;
202     ajuint secpagesize;
203     ajuint pricachesize;
204     ajuint seccachesize;
205     ajuint order;
206     ajuint fill;
207     ajuint secorder;
208     ajuint secfill;
209     ajuint refcount;
210     ajuint len;
211     ajuint idlen;
212     ajuint maxlen;
213     ajuint truncate;
214     ajuint freecount;
215     ajuint freesize;
216     AjBool secondary;
217     AjBool compressed;
218     char   Padding[4];
219 } EmbOBtreeField;
220 #define EmbPBtreeField EmbOBtreeField*
221 
222 /* ========================================================================= */
223 /* =========================== public functions ============================ */
224 /* ========================================================================= */
225 
226 
227 
228 /*
229 ** Prototype definitions
230 */
231 
232 void   embBtreeIndexEntry(EmbPBtreeEntry entry,
233                           ajuint dbno);
234 void   embBtreeIndexField(EmbPBtreeField field,
235                           const EmbPBtreeEntry entry,
236                           ajuint dbno);
237 ajuint embBtreeIndexPrimary(EmbPBtreeField field,
238                             const EmbPBtreeEntry entry,
239                             ajuint dbno);
240 ajuint embBtreeIndexSecondary(EmbPBtreeField field,
241                               const EmbPBtreeEntry entry);
242 
243 void   embBtreeFindEmblAc(const AjPStr readline, EmbPBtreeField field,
244                           AjPStr *Pstr);
245 void   embBtreeParseEmblAc(const AjPStr readline, EmbPBtreeField field);
246 void   embBtreeParseEmblDe(const AjPStr readline, EmbPBtreeField field);
247 void   embBtreeParseEmblKw(const AjPStr readline, EmbPBtreeField field);
248 void   embBtreeParseEmblSv(const AjPStr readline, EmbPBtreeField field);
249 void   embBtreeParseEmblTx(const AjPStr readline, EmbPBtreeField field);
250 void   embBtreeParseFastaAc(const AjPStr readline, EmbPBtreeField field);
251 void   embBtreeParseFastaDe(const AjPStr readline, EmbPBtreeField field);
252 void   embBtreeParseFastaSv(const AjPStr readline, EmbPBtreeField field);
253 void   embBtreeParseGenbankAc(const AjPStr readline, EmbPBtreeField field);
254 void   embBtreeParseGenbankDe(const AjPStr readline, EmbPBtreeField field);
255 void   embBtreeParseGenbankKw(const AjPStr readline, EmbPBtreeField field);
256 void   embBtreeParseGenbankTx(const AjPStr readline, EmbPBtreeField field);
257 
258 void   embBtreeParseEntry(const AjPStr readline, AjPRegexp regexp,
259                           EmbPBtreeEntry entry);
260 void   embBtreeParseField(const AjPStr readline, AjPRegexp regexp,
261                           EmbPBtreeField field);
262 void   embBtreeParseFieldSecond(const AjPStr readline, AjPRegexp regexp,
263                                EmbPBtreeField field);
264 void   embBtreeParseFieldThird(const AjPStr readline, AjPRegexp regexp,
265                                EmbPBtreeField field);
266 void   embBtreeParseFieldTrim(const AjPStr readline, AjPRegexp regexp,
267                               EmbPBtreeField field);
268 void   embBtreeReportEntry(AjPFile outf, const EmbPBtreeEntry entry);
269 void   embBtreeReportField(AjPFile outf, const EmbPBtreeField field);
270 void   embBtreeEmblAC(const AjPStr acline, AjPList aclist);
271 void   embBtreeEmblKW(const AjPStr kwline, AjPList kwlist, ajuint maxlen);
272 void   embBtreeEmblDE(const AjPStr deline, AjPList delist, ajuint maxlen);
273 void   embBtreeEmblSV(const AjPStr idline, AjPList svlist);
274 void   embBtreeEmblTX(const AjPStr kwline, AjPList kwlist, ajuint maxlen);
275 void   embBtreeGenBankAC(const AjPStr acline, AjPList aclist);
276 void   embBtreeGenBankKW(const AjPStr kwline, AjPList kwlist, ajuint maxlen);
277 void   embBtreeGenBankDE(const AjPStr kwline, AjPList kwlist, ajuint maxlen);
278 void   embBtreeGenBankTX(const AjPStr kwline, AjPList kwlist, ajuint maxlen);
279 
280 void   embBtreeFastaDE(const AjPStr kwline, AjPList kwlist, ajuint maxlen);
281 void   embBtreeFastaSV(const AjPStr kwline, AjPList kwlist, ajuint maxlen);
282 
283 
284 ajuint  embBtreeReadDir(AjPStr **filelist, const AjPStr fdirectory,
285 		       const AjPStr files, const AjPStr exclude);
286 EmbPBtreeEntry embBtreeEntryNew(ajuint refcount);
287 void           embBtreeEntrySetCompressed(EmbPBtreeEntry entry);
288 ajuint         embBtreeSetFields(EmbPBtreeEntry entry, AjPStr const * fields);
289 void           embBtreeEntryDel(EmbPBtreeEntry *thys);
290 void           embBtreeSetDbInfo(EmbPBtreeEntry entry, const AjPStr name,
291 				 const AjPStr dbrs,
292 		                 const AjPStr date, const AjPStr release,
293 		                 const AjPStr type, const AjPStr directory,
294 		                 const AjPStr idirectory);
295 ajuint          embBtreeGetFiles(EmbPBtreeEntry entry, const AjPStr fdirectory,
296 				const AjPStr files, const AjPStr exclude);
297 AjBool         embBtreeWriteEntryFile(const EmbPBtreeEntry entry);
298 void           embBtreeGetRsInfo(EmbPBtreeEntry entry);
299 AjBool         embBtreeOpenCaches(EmbPBtreeEntry entry);
300 AjBool         embBtreeCloseCaches(EmbPBtreeEntry entry);
301 AjBool         embBtreeDumpParameters(EmbPBtreeEntry entry);
302 
303 EmbPBtreeField embBtreeFieldNewC(const char* nametxt);
304 EmbPBtreeField embBtreeFieldNewS(const AjPStr name, ajuint refcount);
305 void           embBtreeFieldDel(EmbPBtreeField *Pthis);
306 AjBool         embBtreeFieldGetdataS(EmbPBtreeField field, AjPStr *Pstr);
307 void           embBtreeFieldSetCompressed(EmbPBtreeField field);
308 void           embBtreeFieldSetSecondary(EmbPBtreeField field);
309 void           embBtreeFieldSetIdtype(EmbPBtreeField field);
310 EmbPBtreeField embBtreeGetFieldC(EmbPBtreeEntry entry, const char * nametxt);
311 EmbPBtreeField embBtreeGetFieldS(EmbPBtreeEntry entry, const AjPStr name);
312 
313 void           embIndexExit(void);
314 
315 /*
316 ** End of prototype definitions
317 */
318 
319 #if 0
320 AjBool         embBtreeProbeCaches(EmbPBtreeEntry entry);
321 #endif
322 
323 
324 AJ_END_DECLS
325 
326 #endif  /* !EMBINDEX_H */
327 
328