1 /* @source embindex ***********************************************************
2 **
3 ** B+ Tree Indexing plus Disc Cache.
4 **
5 ** @author Copyright (c) 2003 Alan Bleasby
6 ** @version $Revision: 1.57 $
7 ** @modified $Date: 2012/12/07 10:24:08 $ by $Author: rice $
8 ** @@
9 **
10 ** This library is free software; you can redistribute it and/or
11 ** modify it under the terms of the GNU Lesser General Public
12 ** License as published by the Free Software Foundation; either
13 ** version 2.1 of the License, or (at your option) any later version.
14 **
15 ** This library is distributed in the hope that it will be useful,
16 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18 ** Lesser General Public License for more details.
19 **
20 ** You should have received a copy of the GNU Lesser General Public
21 ** License along with this library; if not, write to the Free Software
22 ** Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
23 ** MA  02110-1301,  USA.
24 **
25 ******************************************************************************/
26 
27 
28 #include "ajlib.h"
29 
30 #include "embindex.h"
31 #include "ajlist.h"
32 #include "ajindex.h"
33 #include "ajreg.h"
34 #include "ajarr.h"
35 #include "ajnam.h"
36 
37 #include <errno.h>
38 
39 #define BTENTRYFILE     ".ent"
40 #define KWLIMIT 12
41 
42 
43 static AjPStr embindexLine      = NULL;
44 static AjPStr embindexToken     = NULL;
45 static AjPStr embindexTstr      = NULL;
46 static AjPStr embindexPrefix    = NULL;
47 static AjPStr embindexFormat    = NULL;
48 static AjPStrTok embindexHandle = NULL;
49 
50 static AjPStr  indexWord = NULL;
51 static AjPBtId indexId   = NULL;
52 
53 static AjPFile btreeCreateFile(const AjPStr idirectory, const AjPStr dbname,
54 			       const char *add);
55 
56 
57 
58 
59 /* @func embBtreeIndexEntry ***************************************************
60 **
61 ** Add a term to an index entry cache
62 **
63 ** @param [u] entry [EmbPBtreeEntry] Entry with id
64 ** @param [r] dbno [ajuint] Database number for an identifier index field
65 ** @return [void]
66 **
67 ** @release 6.4.0
68 ** @@
69 ******************************************************************************/
70 
embBtreeIndexEntry(EmbPBtreeEntry entry,ajuint dbno)71 void embBtreeIndexEntry(EmbPBtreeEntry entry,
72                         ajuint dbno)
73 {
74     AjBool dotrunc = ajFalse;
75     ajuint iref;
76 
77     if(!indexId)
78         indexId = ajBtreeIdNew(entry->refcount);
79 
80     if(entry->do_id)
81     {
82         if(ajStrGetLen(entry->id) > entry->idlen)
83         {
84             dotrunc = ajTrue;
85             if(ajStrGetLen(entry->id) > entry->idmaxlen)
86                 ajWarn("id '%S' too long (%u), truncating to idlen %d",
87                        entry->id, ajStrGetLen(entry->id), entry->idlen);
88 
89         }
90 
91         if(ajStrGetLen(entry->id) > entry->idmaxlen)
92         {
93             entry->idmaxlen = ajStrGetLen(entry->id);
94             ajStrAssignS(&entry->maxid, entry->id);
95         }
96 
97         if(dotrunc)
98         {
99             entry->idtruncate++;
100             ajStrTruncateLen(&entry->id,entry->idlen);
101         }
102 
103         ajStrAssignS(&indexId->id,entry->id);
104         indexId->dbno = dbno;
105         indexId->dups = 0;
106         indexId->offset = entry->fpos;
107         indexId->refcount = entry->refcount;
108 
109         if(entry->refcount)
110         {
111             for(iref=0; iref < entry->refcount; iref++)
112                 indexId->refoffsets[iref] = entry->reffpos[iref];
113         }
114 
115         ajBtreeIdentIndex(entry->idcache,indexId);
116     }
117 
118     return;
119 }
120 
121 
122 
123 
124 /* @func embBtreeIndexField ***************************************************
125 **
126 ** Add a term to an index field cache
127 **
128 ** @param [u] field [EmbPBtreeField] Field with list of data
129 ** @param [r] entry [const EmbPBtreeEntry] Entry with id
130 ** @param [r] dbno [ajuint] Database number for an identifier index field
131 ** @return [void]
132 **
133 ** @release 6.4.0
134 ** @@
135 ******************************************************************************/
136 
embBtreeIndexField(EmbPBtreeField field,const EmbPBtreeEntry entry,ajuint dbno)137 void embBtreeIndexField(EmbPBtreeField field,
138                         const EmbPBtreeEntry entry,
139                         ajuint dbno)
140 {
141     AjBool dotrunc = ajFalse;
142 
143     ajuint iref;
144 
145     if(!indexId)
146         indexId = ajBtreeIdNew(field->refcount);
147 
148     while(ajListstrPop(field->data,&indexWord))
149     {
150         if(ajStrGetLen(indexWord) > field->len)
151         {
152             dotrunc = ajTrue;
153             if(ajStrGetLen(indexWord) > field->maxlen)
154                 ajWarn("%S field token '%S' too long (%u), "
155                        "truncating to %Slen %d",
156                        field->name, indexWord, ajStrGetLen(indexWord),
157                        field->name, field->len);
158         }
159 
160         if(ajStrGetLen(indexWord) > field->maxlen)
161         {
162             field->maxlen = ajStrGetLen(indexWord);
163             ajStrAssignS(&field->maxkey, indexWord);
164         }
165 
166         if(dotrunc)
167         {
168             field->truncate++;
169             ajStrTruncateLen(&indexWord,field->len);
170         }
171 
172         if(field->secondary)
173         {
174             ajBtreeKeyIndex(field->cache, indexWord, entry->id);
175         }
176         else
177         {
178             ajStrAssignS(&indexId->id,indexWord);
179             indexId->dbno = dbno;
180             indexId->dups = 0;
181             indexId->offset = entry->fpos;
182 
183             if(entry->refcount)
184             {
185                 for(iref=0; iref < entry->refcount; iref++)
186                     indexId->refoffsets[iref] = entry->reffpos[iref];
187             }
188 
189             ajBtreeIdentIndex(field->cache,indexId);
190         }
191     }
192 
193     return;
194 }
195 
196 
197 
198 
199 /* @func embBtreeIndexPrimary *************************************************
200 **
201 ** Add a term to an index field cache
202 **
203 ** @param [u] field [EmbPBtreeField] Field with list of data
204 ** @param [r] entry [const EmbPBtreeEntry] Entry with id
205 ** @param [r] dbno [ajuint] Database number for an identifier index field
206 ** @return [ajuint] Number of keys added
207 **
208 ** @release 6.5.0
209 ** @@
210 ******************************************************************************/
211 
embBtreeIndexPrimary(EmbPBtreeField field,const EmbPBtreeEntry entry,ajuint dbno)212 ajuint embBtreeIndexPrimary(EmbPBtreeField field,
213                             const EmbPBtreeEntry entry,
214                             ajuint dbno)
215 {
216     ajuint ret = 0;
217     AjBool dotrunc = ajFalse;
218     ajuint iref = 0;
219 
220     if(!indexId)
221         indexId = ajBtreeIdNew(entry->refcount);
222 
223     while(embBtreeFieldGetdataS(field, &indexWord))
224     {
225         if(ajStrGetLen(indexWord) > field->len)
226         {
227             dotrunc = ajTrue;
228             if(ajStrGetLen(indexWord) > field->maxlen)
229                 ajWarn("%S field token '%S' too long (%u), "
230                        "truncating to %Slen %d",
231                        field->name, indexWord,
232                        ajStrGetLen(indexWord),
233                        field->name, field->len);
234         }
235 
236         if(ajStrGetLen(indexWord) > field->maxlen)
237         {
238             field->maxlen = ajStrGetLen(indexWord);
239             ajStrAssignS(&field->maxkey, indexWord);
240         }
241 
242         if(dotrunc)
243         {
244             field->truncate++;
245             ajStrTruncateLen(&indexWord,field->len);
246         }
247 
248         ajStrAssignS(&indexId->id, indexWord);
249         indexId->dbno = dbno;
250         indexId->dups = 0;
251         indexId->offset = entry->fpos;
252         indexId->refcount = entry->refcount;
253 
254         if(entry->refcount)
255         {
256             for(iref=0; iref < entry->refcount; iref++)
257                indexId->refoffsets[iref] = entry->reffpos[iref];
258         }
259 
260         ajBtreeIdentIndex(field->cache, indexId);
261         ret++;
262     }
263 
264     return ret;
265 }
266 
267 
268 
269 
270 /* @func embBtreeIndexSecondary ***********************************************
271 **
272 ** Add a term to an index field cache
273 **
274 ** @param [u] field [EmbPBtreeField] Field with list of data
275 ** @param [r] entry [const EmbPBtreeEntry] Entry identifier
276 ** @return [ajuint] Number of keys added
277 **
278 ** @release 6.5.0
279 ** @@
280 ******************************************************************************/
281 
embBtreeIndexSecondary(EmbPBtreeField field,const EmbPBtreeEntry entry)282 ajuint embBtreeIndexSecondary(EmbPBtreeField field,
283                               const EmbPBtreeEntry entry)
284 {
285     ajuint ret = 0;
286     AjBool dotrunc = ajFalse;
287 
288     while(embBtreeFieldGetdataS(field, &indexWord))
289     {
290         if(ajStrGetLen(indexWord) > field->len)
291         {
292             dotrunc = ajTrue;
293             if(ajStrGetLen(indexWord) > field->maxlen)
294                 ajWarn("%S field token '%S' too long (%u), "
295                        "truncating to %Slen %d",
296                        field->name, indexWord,
297                        ajStrGetLen(indexWord),
298                        field->name, field->len);
299         }
300 
301         if(ajStrGetLen(indexWord) > field->maxlen)
302         {
303             field->maxlen = ajStrGetLen(indexWord);
304             ajStrAssignS(&field->maxkey, indexWord);
305         }
306 
307         if(dotrunc)
308         {
309             field->truncate++;
310             ajStrTruncateLen(&indexWord,field->len);
311         }
312 
313         ajBtreeKeyIndex(field->cache, indexWord, entry->id);
314         ret++;
315     }
316 
317     return ret;
318 }
319 
320 
321 
322 
323 /* @func embBtreeParseEntry ***************************************************
324 **
325 ** Parse an entry ID from an input record
326 **
327 ** @param [r]readline [const AjPStr] INput record
328 ** @param [u] regexp [AjPRegexp] Regular expression to extract tokens
329 ** @param [u] entry [EmbPBtreeEntry] Entry
330 ** @return [void]
331 **
332 ** @release 6.4.0
333 ** @@
334 ******************************************************************************/
335 
embBtreeParseEntry(const AjPStr readline,AjPRegexp regexp,EmbPBtreeEntry entry)336 void embBtreeParseEntry(const AjPStr readline, AjPRegexp regexp,
337                         EmbPBtreeEntry entry)
338 {
339     if(ajRegExec(regexp, readline))
340     {
341         ajRegSubI(regexp, 1, &entry->id);
342     }
343 
344     return;
345 }
346 
347 
348 
349 
350 /* @func embBtreeParseField ***************************************************
351 **
352 ** Parse field tokens from an input record, iterating over a
353 ** regular expression.
354 **
355 ** @param [r]readline [const AjPStr] Input record
356 ** @param [u] regexp [AjPRegexp] Regular expression to extract tokens
357 ** @param [u] field [EmbPBtreeField] Field
358 ** @return [void]
359 **
360 ** @release 6.4.0
361 ** @@
362 ******************************************************************************/
363 
embBtreeParseField(const AjPStr readline,AjPRegexp regexp,EmbPBtreeField field)364 void embBtreeParseField(const AjPStr readline, AjPRegexp regexp,
365                         EmbPBtreeField field)
366 {
367     AjPStr tmpfd = NULL;
368 
369     ajStrAssignS(&embindexLine,readline);
370 
371     while(ajRegExec(regexp, embindexLine))
372     {
373         if(field->freecount)
374             tmpfd = field->freelist[--field->freecount];
375 
376         ajRegSubI(regexp, 1, &tmpfd);
377         ajRegPost(regexp, &embindexLine);
378 
379         if(!ajStrGetLen(tmpfd))
380         {
381             ajStrDel(&tmpfd);
382             continue;
383         }
384 
385         ajListstrPushAppend(field->data,tmpfd);
386         ajDebug("++%S '%S'\n", field->name, tmpfd);
387         tmpfd = NULL;
388     }
389 
390     return;
391 }
392 
393 
394 
395 
396 /* @func embBtreeFieldGetdataS *************************************************
397 **
398 ** Return the next field data value as a word
399 **
400 ** @param [u] field [EmbPBtreeField] Field
401 ** @param [w] Pstr [AjPStr*] Data value field
402 ** @return [AjBool] True if data was found
403 **
404 ** @release 6.5.0
405 ** @@
406 ******************************************************************************/
407 
embBtreeFieldGetdataS(EmbPBtreeField field,AjPStr * Pstr)408 AjBool embBtreeFieldGetdataS(EmbPBtreeField field, AjPStr *Pstr)
409 {
410     AjPStr tmpstr = NULL;
411     ajuint oldfreesize = 0;
412 
413     if(!ajListGetLength(field->data))
414         return ajFalse;
415 
416     ajListPop(field->data,(void **)&tmpstr);
417     ajStrAssignS(Pstr, tmpstr);
418 
419     if(!field->freelist)
420     {
421         field->freecount = 0;
422         field->freesize = 16;
423         AJCNEW(field->freelist, field->freesize);
424     }
425 
426     if(field->freesize == field->freecount)
427     {
428         oldfreesize = field->freesize;
429         field->freesize *= 2;
430         AJCRESIZE0(field->freelist, oldfreesize, field->freesize);
431     }
432 
433     field->freelist[field->freecount++] = tmpstr;
434     tmpstr = NULL;
435 
436     return ajTrue;
437 }
438 
439 
440 
441 
442 /* @func embBtreeParseFieldSecond *********************************************
443 **
444 ** Parse field tokens from an input record using the first and second
445 ** matches to a regular expression.
446 **
447 ** @param [r] readline [const AjPStr] Input record
448 ** @param [u] regexp [AjPRegexp] Regular expression to extract tokens
449 ** @param [u] field [EmbPBtreeField] Field
450 ** @return [void]
451 **
452 ** @release 6.4.0
453 ** @@
454 ******************************************************************************/
455 
embBtreeParseFieldSecond(const AjPStr readline,AjPRegexp regexp,EmbPBtreeField field)456 void embBtreeParseFieldSecond(const AjPStr readline, AjPRegexp regexp,
457                               EmbPBtreeField field)
458 {
459     AjPStr tmpfd = NULL;
460 
461     if(ajRegExec(regexp, readline))
462     {
463         if(field->freecount)
464             tmpfd = field->freelist[--field->freecount];
465 
466         ajRegSubI(regexp, 1, &tmpfd);
467         ajRegSubI(regexp, 1, &tmpfd);
468 
469         if(ajStrGetLen(tmpfd))
470         {
471             ajListstrPushAppend(field->data, tmpfd);
472             ajDebug("++%S '%S'\n", field->name, tmpfd);
473             tmpfd = NULL;
474             if(field->freecount)
475                 tmpfd = field->freelist[--field->freecount];
476         }
477 
478         ajRegSubI(regexp,2, &tmpfd);
479 
480         if(ajStrGetLen(tmpfd))
481         {
482             ajListstrPushAppend(field->data,tmpfd);
483             ajDebug("++%S '%S'\n", field->name, tmpfd);
484             tmpfd = NULL;
485         }
486     }
487 
488     ajStrDel(&tmpfd);
489 
490     return;
491 }
492 
493 
494 
495 
496 /* @func embBtreeParseFieldThird **********************************************
497 **
498 ** Parse field tokens from an input record using the first and third
499 ** matches to a regular expression.
500 **
501 ** @param [r] readline [const AjPStr] Input record
502 ** @param [u] regexp [AjPRegexp] Regular expression to extract tokens
503 ** @param [u] field [EmbPBtreeField] Field
504 ** @return [void]
505 **
506 ** @release 6.4.0
507 ** @@
508 ******************************************************************************/
509 
embBtreeParseFieldThird(const AjPStr readline,AjPRegexp regexp,EmbPBtreeField field)510 void embBtreeParseFieldThird(const AjPStr readline, AjPRegexp regexp,
511                              EmbPBtreeField field)
512 {
513     AjPStr tmpfd = NULL;
514 
515     if(ajRegExec(regexp, readline))
516     {
517         if(field->freecount)
518             tmpfd = field->freelist[--field->freecount];
519 
520         ajRegSubI(regexp, 1, &tmpfd);
521 
522         if(ajStrGetLen(tmpfd))
523         {
524             ajListstrPushAppend(field->data,tmpfd);
525             ajDebug("++%S '%S'\n", field->name, tmpfd);
526             tmpfd = NULL;
527             if(field->freecount)
528                 tmpfd = field->freelist[--field->freecount];
529         }
530 
531         ajRegSubI(regexp, 3, &tmpfd);
532 
533         if(ajStrGetLen(tmpfd))
534         {
535             ajListstrPushAppend(field->data,tmpfd);
536             ajDebug("++%S '%S'\n", field->name, tmpfd);
537             tmpfd = NULL;
538         }
539     }
540 
541     ajStrDel(&tmpfd);
542 
543     return;
544 }
545 
546 
547 
548 
549 /* @func embBtreeParseFieldTrim ***********************************************
550 **
551 ** Parse field tokens from an input record and trim any trailing whitespace,
552 ** iterating over a regular expression.
553 **
554 ** @param [r]readline [const AjPStr] Input record
555 ** @param [u] regexp [AjPRegexp] Regular expression to extract tokens
556 ** @param [u] field [EmbPBtreeField] Field
557 ** @return [void]
558 **
559 ** @release 6.4.0
560 ** @@
561 ******************************************************************************/
562 
embBtreeParseFieldTrim(const AjPStr readline,AjPRegexp regexp,EmbPBtreeField field)563 void embBtreeParseFieldTrim(const AjPStr readline, AjPRegexp regexp,
564                             EmbPBtreeField field)
565 {
566     AjPStr tmpfd = NULL;
567 
568     ajStrAssignS(&embindexLine,readline);
569 
570     while(ajRegExec(regexp, embindexLine))
571     {
572         if(field->freecount)
573             tmpfd = field->freelist[--field->freecount];
574 
575         ajRegSubI(regexp, 1, &tmpfd);
576         ajRegPost(regexp, &embindexLine);
577 
578         ajStrTrimWhiteEnd(&tmpfd);
579 
580         if(!ajStrGetLen(tmpfd))
581         {
582             ajStrDel(&tmpfd);
583             continue;
584         }
585 
586         ajListstrPushAppend(field->data,tmpfd);
587         ajDebug("++%S '%S'\n", field->name, tmpfd);
588         tmpfd = NULL;
589     }
590 
591     ajStrDel(&tmpfd);
592 
593     return;
594 }
595 
596 
597 
598 
599 /* @func embBtreeReportEntry **************************************************
600 **
601 ** Report on indexing of entries
602 **
603 ** @param [u] outf [AjPFile] Output file
604 ** @param [r] entry [const EmbPBtreeEntry] Entry
605 **
606 ** @return [void]
607 **
608 ** @release 6.4.0
609 ******************************************************************************/
610 
embBtreeReportEntry(AjPFile outf,const EmbPBtreeEntry entry)611 void embBtreeReportEntry(AjPFile outf, const EmbPBtreeEntry entry)
612 {
613     if(entry->idtruncate)
614         ajFmtPrintF(outf,
615                     "Entry idlen %u truncated %u IDs. "
616                     "Maximum ID length was %u for '%S'.\n",
617                     entry->idlen, entry->idtruncate,
618                     entry->idmaxlen, entry->maxid);
619     else
620         ajFmtPrintF(outf,
621                     "Entry idlen %u OK. "
622                     "Maximum ID length was %u for '%S'.\n",
623                     entry->idlen,
624                     entry->idmaxlen, entry->maxid);
625 
626     if(entry->idmaxlen > entry->idlen)
627     {
628         ajWarn("Entry idlen %u truncated %u IDs. "
629                "Maximum ID length was %u for '%S'.",
630                entry->idlen, entry->idtruncate,
631                entry->idmaxlen, entry->maxid);
632     }
633 
634     return;
635 }
636 
637 
638 
639 
640 /* @func embBtreeReportField **************************************************
641 **
642 ** Report on indexing of field
643 **
644 ** @param [u] outf [AjPFile] Output file
645 ** @param [r] field [const EmbPBtreeField] Field
646 **
647 ** @return [void]
648 **
649 ** @release 6.4.0
650 ******************************************************************************/
651 
embBtreeReportField(AjPFile outf,const EmbPBtreeField field)652 void embBtreeReportField(AjPFile outf, const EmbPBtreeField field)
653 {
654     if(field->truncate)
655         ajFmtPrintF(outf,
656                     "Field %S %Slen %u truncated %u terms. "
657                     "Maximum %S term length was %u for '%S'.\n",
658                     field->name, field->name, field->len, field->truncate,
659                     field->name, field->maxlen, field->maxkey);
660     else
661         ajFmtPrintF(outf,
662                     "Field %S %Slen %u OK. "
663                     "Maximum %S term length was %u for '%S'.\n",
664                     field->name, field->name, field->len,
665                     field->name, field->maxlen, field->maxkey);
666 
667     if(field->maxlen > field->len)
668     {
669         ajWarn("Field %S %Slen %u truncated %u terms. "
670                "Maximum %S term length was %u for '%S'.",
671                field->name, field->name, field->len, field->truncate,
672                field->name, field->maxlen, field->maxkey);
673     }
674 
675     return;
676 }
677 
678 
679 
680 
681 /* @func embBtreeEmblKW *******************************************************
682 **
683 ** Extract keywords from an EMBL KW line
684 **
685 ** @param [r] kwline [const AjPStr] keyword line
686 ** @param [w] kwlist [AjPList] list of keywords
687 ** @param [r] maxlen [ajuint] max keyword length
688 **
689 ** @return [void]
690 **
691 ** @release 3.0.0
692 ** @@
693 ******************************************************************************/
694 
embBtreeEmblKW(const AjPStr kwline,AjPList kwlist,ajuint maxlen)695 void embBtreeEmblKW(const AjPStr kwline, AjPList kwlist, ajuint maxlen)
696 {
697     AjPStr token     = NULL;
698     AjPStr str       = NULL;
699 
700     ajStrAssignSubS(&embindexLine, kwline, 5, -1);
701 
702     ajStrTokenAssignC(&embindexHandle,embindexLine,"\n\r;");
703 
704     while(ajStrTokenNextParse(embindexHandle,&token))
705     {
706 	ajStrTrimEndC(&token,".");
707 	ajStrTrimWhite(&token);
708 
709 	if(ajStrGetLen(token))
710 	{
711 	    if(maxlen)
712 	    {
713 		if(ajStrGetLen(token) > maxlen)
714 		    ajStrAssignSubS(&str,token,0,maxlen-1);
715 		else
716 		    ajStrAssignS(&str,token);
717 
718 	    }
719 	    else
720 		ajStrAssignS(&str,token);
721 
722 	    ajListstrPush(kwlist, str);
723 	    str = NULL;
724 	}
725     }
726 
727     ajStrDel(&token);
728 
729     return;
730 }
731 
732 
733 
734 
735 /* @func embBtreeEmblTX *******************************************************
736 **
737 ** Extract keywords from an EMBL OC or OS line
738 **
739 ** @param [r] txline [const AjPStr] taxonomy line
740 ** @param [w] txlist [AjPList] list of taxons
741 ** @param [r] maxlen [ajuint] max taxon length
742 **
743 ** @return [void]
744 **
745 ** @release 3.0.0
746 ** @@
747 ******************************************************************************/
748 
embBtreeEmblTX(const AjPStr txline,AjPList txlist,ajuint maxlen)749 void embBtreeEmblTX(const AjPStr txline, AjPList txlist, ajuint maxlen)
750 {
751     AjPStr token     = NULL;
752     AjPStr str       = NULL;
753 
754     ajStrAssignSubS(&embindexLine, txline, 5, -1);
755 
756     ajStrTokenAssignC(&embindexHandle,embindexLine,"\n\r;()");
757 
758     while(ajStrTokenNextParse(embindexHandle,&token))
759     {
760 	ajStrTrimEndC(&token,".");
761 	ajStrTrimEndC(&token," ");
762 	ajStrTrimWhite(&token);
763 
764 	if(ajStrGetLen(token))
765 	{
766 	    if(maxlen)
767 	    {
768 		if(ajStrGetLen(token) > maxlen)
769 		    ajStrAssignSubS(&str,token,0,maxlen-1);
770 		else
771 		    ajStrAssignS(&str,token);
772 
773 	    }
774 	    else
775 		ajStrAssignS(&str,token);
776 
777 	    ajListstrPush(txlist, str);
778 	    str = NULL;
779 	}
780     }
781 
782     ajStrDel(&token);
783 
784     return;
785 }
786 
787 
788 
789 
790 /* @func embBtreeEmblAC *******************************************************
791 **
792 ** Extract accession numbers from an EMBL AC line
793 **
794 ** @param [r] acline[const AjPStr] AC line
795 ** @param [w] aclist [AjPList] list of accession numbers
796 **
797 ** @return [void]
798 **
799 ** @release 3.0.0
800 ** @@
801 ******************************************************************************/
802 
embBtreeEmblAC(const AjPStr acline,AjPList aclist)803 void embBtreeEmblAC(const AjPStr acline, AjPList aclist)
804 {
805     char *p          = NULL;
806     char *q          = NULL;
807     ajuint lo = 0;
808     ajuint hi = 0;
809     ajuint field = 0;
810     ajuint i;
811     AjPStr str = NULL;
812 
813     ajStrAssignSubS(&embindexLine, acline, 5, -1);
814 
815     ajStrTokenAssignC(&embindexHandle,embindexLine,"\n\r;");
816 
817     while(ajStrTokenNextParse(embindexHandle,&embindexToken))
818     {
819 	ajStrTrimWhite(&embindexToken);
820 
821 	if((p=strchr(MAJSTRGETPTR(embindexToken),(int)'-')))
822 	{
823 	    q = p;
824 
825 	    while(isdigit((int)*(--q)));
826 
827 	    ++q;
828 	    ajStrAssignSubC(&embindexTstr,q,0,(ajuint)(p-q-1));
829 	    ajStrToUint(embindexTstr,&lo);
830 	    field = (ajuint) (p-q);
831 	    ajFmtPrintS(&embindexFormat,"%%S%%0%uu",field);
832 
833 	    ++p;
834 	    q = p;
835 
836 	    while(!isdigit((int)*q))
837 		++q;
838 
839 	    sscanf(q,"%u",&hi);
840 	    ajStrAssignSubC(&embindexPrefix,p,0,(ajuint)(q-p-1));
841 
842 	    for(i=lo;i<=hi;++i)
843 	    {
844 		ajFmtPrintS(&str,MAJSTRGETPTR(embindexFormat),
845 			    embindexPrefix,i);
846 		ajListstrPush(aclist, str);
847 		str = NULL;
848 	    }
849 	}
850 	else
851 	{
852 	    ajStrAssignS(&str,embindexToken);
853 	    ajListstrPush(aclist, str);
854 	    str = NULL;
855 	}
856     }
857 
858     return;
859 }
860 
861 
862 
863 
864 /* @func embBtreeEmblSV *******************************************************
865 **
866 ** Extract sequence version from an EMBL new format ID line
867 **
868 ** @param [r] idline[const AjPStr] AC line
869 ** @param [w] svlist [AjPList] list of accession numbers
870 **
871 ** @return [void]
872 **
873 ** @release 4.0.0
874 ** @@
875 ******************************************************************************/
876 
embBtreeEmblSV(const AjPStr idline,AjPList svlist)877 void embBtreeEmblSV(const AjPStr idline, AjPList svlist)
878 {
879     AjPStr token     = NULL;
880     AjPStr str       = NULL;
881     AjPStr idstr      = NULL;
882     AjPStr svstr      = NULL;
883 
884     ajStrAssignSubS(&embindexLine, idline, 5, -1);
885 
886     ajStrTokenAssignC(&embindexHandle,embindexLine," \t\n\r;");
887 
888     if(!ajStrTokenNextParse(embindexHandle,&idstr))
889 	return;
890 
891     if(!ajStrTokenNextParse(embindexHandle,&token))
892 	return;
893 
894     if(!ajStrTokenNextParse(embindexHandle,&svstr))
895 	return;
896 
897     if(!ajStrMatchC(token, "SV"))
898 	return;
899 
900     str = ajStrNewRes(MAJSTRGETLEN(idstr)+MAJSTRGETLEN(svstr)+2);
901 
902     ajFmtPrintS(&str,"%S.%S", idstr, svstr);
903 
904     ajListstrPush(svlist, str);
905     str = NULL;
906 
907     ajStrDel(&idstr);
908     ajStrDel(&svstr);
909     ajStrDel(&token);
910 
911     return;
912 }
913 
914 
915 
916 
917 /* @func embBtreeEmblDE *******************************************************
918 **
919 ** Extract words from an EMBL DE line
920 **
921 ** @param [r] deline[const AjPStr] description line
922 ** @param [w] delist [AjPList] list of descriptions
923 ** @param [r] maxlen [ajuint] max keyword length
924 **
925 ** @return [void]
926 **
927 ** @release 3.0.0
928 ** @@
929 ******************************************************************************/
930 
embBtreeEmblDE(const AjPStr deline,AjPList delist,ajuint maxlen)931 void embBtreeEmblDE(const AjPStr deline, AjPList delist, ajuint maxlen)
932 {
933     AjPStr token     = NULL;
934     AjPStr str       = NULL;
935 
936     ajStrAssignSubS(&embindexLine, deline, 5, -1);
937 
938     ajStrTokenAssignC(&embindexHandle,embindexLine,"\n\r \t()");
939 
940     while(ajStrTokenNextParse(embindexHandle,&token))
941     {
942 	ajStrTrimWhite(&token);
943 	ajStrTrimEndC(&token,".,:'\"");
944 	ajStrTrimStartC(&token,"'\"");
945 
946 	if(ajStrGetLen(token))
947 	{
948 	    if(maxlen)
949 	    {
950 		if(ajStrGetLen(token) > maxlen)
951 		    ajStrAssignSubS(&str,token,0,maxlen-1);
952 		else
953 		    ajStrAssignS(&str,token);
954 
955 	    }
956 	    else
957 		ajStrAssignS(&str,token);
958 
959 	    ajListstrPush(delist, str);
960 	    str = NULL;
961 	}
962     }
963 
964     ajStrDel(&token);
965 
966     return;
967 }
968 
969 
970 
971 
972 /* @func embBtreeParseEmblKw **************************************************
973 **
974 ** Extract keywords from an EMBL KW line
975 **
976 ** @param [r] readline [const AjPStr] keyword line
977 ** @param [u] field [EmbPBtreeField] list of descriptions
978 **
979 ** @return [void]
980 **
981 ** @release 6.5.0
982 ** @@
983 ******************************************************************************/
984 
embBtreeParseEmblKw(const AjPStr readline,EmbPBtreeField field)985 void embBtreeParseEmblKw(const AjPStr readline, EmbPBtreeField field)
986 {
987     AjPStr str       = NULL;
988 
989     ajStrAssignSubS(&embindexLine, readline, 5, -1);
990 
991     ajStrTokenAssignC(&embindexHandle,embindexLine,"\n\r;");
992 
993     while(ajStrTokenNextParse(embindexHandle,&embindexToken))
994     {
995 	ajStrTrimEndC(&embindexToken,".");
996 	ajStrTrimWhite(&embindexToken);
997 
998 	if(ajStrGetLen(embindexToken))
999 	{
1000             if(field->freecount)
1001                 str = field->freelist[--field->freecount];
1002 
1003             ajStrAssignS(&str,embindexToken);
1004 
1005 	    ajListstrPushAppend(field->data, str);
1006 	    str = NULL;
1007 	}
1008     }
1009 
1010     return;
1011 }
1012 
1013 
1014 
1015 
1016 /* @func embBtreeParseEmblTx **************************************************
1017 **
1018 ** Extract keywords from an EMBL OC or OS line
1019 **
1020 ** @param [r] readline [const AjPStr] taxonomy line
1021 ** @param [u] field [EmbPBtreeField] list of descriptions
1022 **
1023 ** @return [void]
1024 **
1025 ** @release 6.5.0
1026 ** @@
1027 ******************************************************************************/
1028 
embBtreeParseEmblTx(const AjPStr readline,EmbPBtreeField field)1029 void embBtreeParseEmblTx(const AjPStr readline, EmbPBtreeField field)
1030 {
1031     AjPStr str = NULL;
1032 
1033     ajStrAssignSubS(&embindexLine, readline, 5, -1);
1034 
1035     ajStrTokenAssignC(&embindexHandle,embindexLine,"\n\r;()");
1036 
1037     while(ajStrTokenNextParse(embindexHandle,&embindexToken))
1038     {
1039 	ajStrTrimEndC(&embindexToken,".");
1040 	ajStrTrimEndC(&embindexToken," ");
1041 	ajStrTrimWhite(&embindexToken);
1042 
1043 	if(ajStrGetLen(embindexToken))
1044 	{
1045             if(field->freecount)
1046                 str = field->freelist[--field->freecount];
1047 
1048             ajStrAssignS(&str,embindexToken);
1049 
1050 	    ajListstrPushAppend(field->data, str);
1051 	    str = NULL;
1052 	}
1053     }
1054 
1055     return;
1056 }
1057 
1058 
1059 
1060 
1061 /* @func embBtreeParseEmblAc **************************************************
1062 **
1063 ** Extract accession numbers from an EMBL AC line
1064 **
1065 ** @param [r] readline[const AjPStr] AC line
1066 ** @param [u] field [EmbPBtreeField] list of descriptions
1067 **
1068 ** @return [void]
1069 **
1070 ** @release 6.5.0
1071 ** @@
1072 ******************************************************************************/
1073 
embBtreeParseEmblAc(const AjPStr readline,EmbPBtreeField field)1074 void embBtreeParseEmblAc(const AjPStr readline, EmbPBtreeField field)
1075 {
1076     char *p          = NULL;
1077     char *q          = NULL;
1078     ajuint lo = 0;
1079     ajuint hi = 0;
1080     ajuint ifield = 0;
1081     ajuint i;
1082     AjPStr str = NULL;
1083 
1084     ajStrAssignSubS(&embindexLine, readline, 5, -1);
1085 
1086     ajStrTokenAssignC(&embindexHandle,embindexLine,"\n\r;");
1087 
1088     while(ajStrTokenNextParse(embindexHandle,&embindexToken))
1089     {
1090 	ajStrTrimWhite(&embindexToken);
1091 
1092         if(field->freecount)
1093             str = field->freelist[--field->freecount];
1094 
1095         /* Check for EMBL accession range */
1096 	if((p=strchr(MAJSTRGETPTR(embindexToken),(int)'-')))
1097 	{
1098 	    q = p;
1099 
1100 	    while(isdigit((int)*(--q)));
1101 
1102 	    ++q;
1103 	    ajStrAssignSubC(&embindexTstr,q,0,(ajuint)(p-q-1));
1104 	    ajStrToUint(embindexTstr,&lo);
1105 	    ifield = (ajuint) (p-q);
1106 	    ajFmtPrintS(&embindexFormat,"%%S%%0%uu",ifield);
1107 
1108 	    ++p;
1109 	    q = p;
1110 
1111 	    while(!isdigit((int)*q))
1112 		++q;
1113 
1114 	    sscanf(q,"%u",&hi);
1115 	    ajStrAssignSubC(&embindexPrefix,p,0,(ajuint)(q-p-1));
1116 
1117 	    for(i=lo;i<=hi;++i)
1118 	    {
1119 		ajFmtPrintS(&str,MAJSTRGETPTR(embindexFormat),
1120 			    embindexPrefix,i);
1121 		ajListstrPushAppend(field->data, str);
1122 		str = NULL;
1123 	    }
1124 	}
1125 	else                    /* simple accession number */
1126 	{
1127 	    ajStrAssignS(&str,embindexToken);
1128 	    ajListstrPushAppend(field->data, str);
1129 	    str = NULL;
1130 	}
1131     }
1132 
1133     return;
1134 }
1135 
1136 
1137 
1138 
1139 /* @func embBtreeFindEmblAc ***************************************************
1140 **
1141 ** Returns first accession number from an EMBL/UniProt AC line
1142 **
1143 ** @param [r] readline[const AjPStr] AC line
1144 ** @param [u] field [EmbPBtreeField] list of descriptions
1145 ** @param [w] Pstr [AjPStr*] First accession
1146 **
1147 ** @return [void]
1148 **
1149 ** @release 6.5.0
1150 ** @@
1151 ******************************************************************************/
1152 
embBtreeFindEmblAc(const AjPStr readline,EmbPBtreeField field,AjPStr * Pstr)1153 void embBtreeFindEmblAc(const AjPStr readline, EmbPBtreeField field,
1154                         AjPStr *Pstr)
1155 {
1156     char *p          = NULL;
1157     char *q          = NULL;
1158     ajuint lo = 0;
1159     ajuint hi = 0;
1160     ajuint ifield = 0;
1161     ajuint i;
1162 
1163     ajStrAssignSubS(&embindexLine, readline, 5, -1);
1164 
1165     ajStrTokenAssignC(&embindexHandle,embindexLine," \t\n\r;");
1166 
1167     while(ajStrTokenNextParse(embindexHandle,&embindexToken))
1168     {
1169 	ajStrTrimWhite(&embindexToken);
1170 
1171         /* Check for EMBL accession range */
1172 	if((p=strchr(MAJSTRGETPTR(embindexToken),(int)'-')))
1173 	{
1174 	    q = p;
1175 
1176 	    while(isdigit((int)*(--q)));
1177 
1178 	    ++q;
1179 	    ajStrAssignSubC(&embindexTstr,q,0,(ajuint)(p-q-1));
1180 	    ajStrToUint(embindexTstr,&lo);
1181 	    ifield = (ajuint) (p-q);
1182 	    ajFmtPrintS(&embindexFormat,"%%S%%0%uu",ifield);
1183 
1184 	    ++p;
1185 	    q = p;
1186 
1187 	    while(!isdigit((int)*q))
1188 		++q;
1189 
1190 	    sscanf(q,"%u",&hi);
1191 	    ajStrAssignSubC(&embindexPrefix,p,0,(ajuint)(q-p-1));
1192 
1193 	    for(i=lo;i<=hi;++i)
1194 	    {
1195 		if(field->freecount)
1196                     *Pstr = field->freelist[--field->freecount];
1197                 ajFmtPrintS(Pstr,MAJSTRGETPTR(embindexFormat),
1198 			    embindexPrefix,i);
1199 		return;
1200 	    }
1201 	}
1202 	else                    /* simple accession number */
1203 	{
1204             if(field->freecount)
1205                 *Pstr = field->freelist[--field->freecount];
1206 	    ajStrAssignS(Pstr,embindexToken);
1207 	    return;
1208 	}
1209     }
1210 
1211     return;
1212 }
1213 
1214 
1215 
1216 
1217 /* @func embBtreeParseEmblSv **************************************************
1218 **
1219 ** Extract sequence version from an EMBL new format ID line
1220 **
1221 ** @param [r] readline[const AjPStr] AC line
1222 ** @param [u] field [EmbPBtreeField] list of descriptions
1223 **
1224 ** @return [void]
1225 **
1226 ** @release 6.5.0
1227 ** @@
1228 ******************************************************************************/
1229 
embBtreeParseEmblSv(const AjPStr readline,EmbPBtreeField field)1230 void embBtreeParseEmblSv(const AjPStr readline, EmbPBtreeField field)
1231 {
1232     AjPStr str = NULL;
1233 
1234     ajStrAssignSubS(&embindexLine, readline, 5, -1);
1235 
1236     ajStrTokenAssignC(&embindexHandle,embindexLine," \t\n\r;");
1237 
1238     if(!ajStrTokenNextParse(embindexHandle,&embindexToken))
1239 	return;
1240 
1241     if(field->freecount)
1242         str = field->freelist[--field->freecount];
1243 
1244     ajStrAssignS(&str, embindexToken);
1245     ajStrAppendK(&str, '.');
1246 
1247     if(!ajStrTokenNextParse(embindexHandle,&embindexToken))
1248 	return;
1249 
1250     if(!ajStrMatchC(embindexToken, "SV"))
1251 	return;
1252 
1253     if(!ajStrTokenNextParse(embindexHandle,&embindexToken))
1254 	return;
1255 
1256     ajStrAppendS(&str, embindexToken);
1257 
1258     ajListstrPushAppend(field->data, str);
1259     str = NULL;
1260 
1261     return;
1262 }
1263 
1264 
1265 
1266 
1267 /* @func embBtreeParseEmblDe **************************************************
1268 **
1269 ** Extract words from an EMBL DE line
1270 **
1271 ** @param [r] readline[const AjPStr] description line
1272 ** @param [u] field [EmbPBtreeField] list of descriptions
1273 **
1274 ** @return [void]
1275 **
1276 ** @release 6.5.0
1277 ** @@
1278 ******************************************************************************/
1279 
embBtreeParseEmblDe(const AjPStr readline,EmbPBtreeField field)1280 void embBtreeParseEmblDe(const AjPStr readline, EmbPBtreeField field)
1281 {
1282     AjPStr str = NULL;
1283 
1284     ajStrAssignSubS(&embindexLine, readline, 5, -1);
1285 
1286     ajStrTokenAssignC(&embindexHandle,embindexLine,"\n\r \t()");
1287 
1288     while(ajStrTokenNextParse(embindexHandle,&embindexToken))
1289     {
1290 	ajStrTrimWhite(&embindexToken);
1291 	ajStrTrimEndC(&embindexToken,".,:;'\"");
1292 	ajStrTrimStartC(&embindexToken,"'\"");
1293 
1294 	if(ajStrGetLen(embindexToken))
1295 	{
1296             if(field->freecount)
1297                 str = field->freelist[--field->freecount];
1298 
1299             ajStrAssignS(&str, embindexToken);
1300 
1301 	    ajListstrPushAppend(field->data, str);
1302 	    str = NULL;
1303 	}
1304     }
1305 
1306     return;
1307 }
1308 
1309 
1310 
1311 
1312 /* @func embBtreeParseGenbankAc ***********************************************
1313 **
1314 ** Extract accession numbers from a GenBank ACCESSION line
1315 **
1316 ** @param [r] readline [const AjPStr] AC line
1317 ** @param [u] field [EmbPBtreeField] Field with list of data
1318 **
1319 ** @return [void]
1320 **
1321 ** @release 6.5.0
1322 ** @@
1323 ******************************************************************************/
1324 
embBtreeParseGenbankAc(const AjPStr readline,EmbPBtreeField field)1325 void embBtreeParseGenbankAc(const AjPStr readline, EmbPBtreeField field)
1326 {
1327     AjPStr str       = NULL;
1328     char *p          = NULL;
1329     char *q          = NULL;
1330     ajuint lo = 0;
1331     ajuint hi = 0;
1332     ajuint ifield = 0;
1333     ajuint i;
1334 
1335     ajStrAssignSubS(&embindexLine, readline, 12, -1);
1336 
1337     ajStrTokenAssignC(&embindexHandle,embindexLine,"\n\r ");
1338 
1339     while(ajStrTokenNextParse(embindexHandle,&embindexToken))
1340     {
1341 	ajStrTrimWhite(&embindexToken);
1342 
1343         /* check for accession number range */
1344 	if((p=strchr(MAJSTRGETPTR(embindexToken),(int)'-')))
1345 	{
1346 	    q = p;
1347 
1348 	    while(isdigit((int)*(--q)));
1349 
1350 	    ++q;
1351 	    ajStrAssignSubC(&embindexTstr,q,0,(ajuint)(p-q-1));
1352 	    ajStrToUint(embindexTstr,&lo);
1353 	    ifield = (ajuint) (p-q);
1354 	    ajFmtPrintS(&embindexFormat,"%%S%%0%uu",ifield);
1355 
1356 	    ++p;
1357 	    q = p;
1358 
1359 	    while(!isdigit((int)*q))
1360 		++q;
1361 
1362 	    sscanf(q,"%u",&hi);
1363 	    ajStrAssignSubC(&embindexPrefix,p,0,(ajuint)(q-p-1));
1364 
1365 	    for(i=lo;i<=hi;++i)
1366 	    {
1367                 if(field->freecount)
1368                     str = field->freelist[--field->freecount];
1369 
1370 		ajFmtPrintS(&str,MAJSTRGETPTR(embindexFormat),embindexPrefix,i);
1371 		ajListstrPushAppend(field->data, str);
1372 		str = NULL;
1373 	    }
1374 	}
1375 	else                    /* simple accession number */
1376 	{
1377             if(field->freecount)
1378                 str = field->freelist[--field->freecount];
1379 
1380 	    ajStrAssignS(&str,embindexToken);
1381 	    ajListstrPushAppend(field->data, str);
1382         }
1383     }
1384 
1385     return;
1386 }
1387 
1388 
1389 
1390 
1391 /* @func embBtreeParseGenbankDe ***********************************************
1392 **
1393 ** Extract keywords from a GenBank DESCRIPTION line
1394 **
1395 ** @param [r] readline [const AjPStr] AC line
1396 ** @param [u] field [EmbPBtreeField] Field with list of data
1397 **
1398 ** @return [void]
1399 **
1400 ** @release 6.5.0
1401 ** @@
1402 ******************************************************************************/
1403 
embBtreeParseGenbankDe(const AjPStr readline,EmbPBtreeField field)1404 void embBtreeParseGenbankDe(const AjPStr readline, EmbPBtreeField field)
1405 {
1406     AjPStr str = NULL;
1407 
1408     ajStrAssignSubS(&embindexLine, readline, 10, -1);
1409 
1410     ajStrTokenAssignC(&embindexHandle,embindexLine,"\n\r \t()");
1411 
1412     while(ajStrTokenNextParse(embindexHandle,&embindexToken))
1413     {
1414 	ajStrTrimEndC(&embindexToken,".");
1415 	ajStrTrimWhite(&embindexToken);
1416 
1417 	if(ajStrGetLen(embindexToken))
1418 	{
1419 	    ajStrAssignS(&str,embindexToken);
1420 
1421 	    ajListstrPushAppend(field->data, str);
1422 	    str = NULL;
1423 	}
1424     }
1425 
1426     return;
1427 }
1428 
1429 
1430 
1431 
1432 /* @func embBtreeParseGenbankKw ***********************************************
1433 **
1434 ** Extract keywords from a GenBank KEYWORDS line
1435 **
1436 ** @param [r] readline [const AjPStr] AC line
1437 ** @param [u] field [EmbPBtreeField] Field with list of data
1438 **
1439 ** @return [void]
1440 **
1441 ** @release 6.5.0
1442 ** @@
1443 ******************************************************************************/
1444 
embBtreeParseGenbankKw(const AjPStr readline,EmbPBtreeField field)1445 void embBtreeParseGenbankKw(const AjPStr readline, EmbPBtreeField field)
1446 {
1447     AjPStr str = NULL;
1448 
1449     ajStrAssignSubS(&embindexLine, readline, 8, -1);
1450 
1451     ajStrTokenAssignC(&embindexHandle,embindexLine,"\n\r;");
1452 
1453     while(ajStrTokenNextParse(embindexHandle,&embindexToken))
1454     {
1455 	ajStrTrimEndC(&embindexToken,".");
1456 	ajStrTrimWhite(&embindexToken);
1457 
1458 	if(ajStrGetLen(embindexToken))
1459 	{
1460             if(field->freecount)
1461                 str = field->freelist[--field->freecount];
1462 
1463 	    ajStrAssignS(&str,embindexToken);
1464 
1465 	    ajListstrPushAppend(field->data, str);
1466 	    str = NULL;
1467 	}
1468     }
1469 
1470     return;
1471 }
1472 
1473 
1474 
1475 
1476 /* @func embBtreeParseGenbankTx ***********************************************
1477 **
1478 ** Extract keywords from a GenBank ORGANISM line
1479 **
1480 ** @param [r] readline [const AjPStr] AC line
1481 ** @param [u] field [EmbPBtreeField] Field with list of data
1482 **
1483 ** @return [void]
1484 **
1485 ** @release 6.5.0
1486 ** @@
1487 ******************************************************************************/
1488 
embBtreeParseGenbankTx(const AjPStr readline,EmbPBtreeField field)1489 void embBtreeParseGenbankTx(const AjPStr readline, EmbPBtreeField field)
1490 {
1491     AjPStr str = NULL;
1492 
1493     ajStrAssignSubS(&embindexLine, readline, 9, -1);
1494 
1495     ajStrTokenAssignC(&embindexHandle,embindexLine,"\n\r;()");
1496 
1497     while(ajStrTokenNextParse(embindexHandle,&embindexToken))
1498     {
1499 	ajStrTrimEndC(&embindexToken,".");
1500 	ajStrTrimEndC(&embindexToken," ");
1501 	ajStrTrimWhite(&embindexToken);
1502 
1503 	if(ajStrGetLen(embindexToken))
1504 	{
1505             if(field->freecount)
1506                 str = field->freelist[--field->freecount];
1507 
1508 	    ajStrAssignS(&str,embindexToken);
1509 
1510 	    ajListstrPushAppend(field->data, str);
1511 	    str = NULL;
1512 	}
1513     }
1514 
1515     return;
1516 }
1517 
1518 
1519 
1520 
1521 /* @func embBtreeGenBankAC ****************************************************
1522 **
1523 ** Extract accession numbers from a GenBank ACCESSION line
1524 **
1525 ** @param [r] acline[const AjPStr] AC line
1526 ** @param [w] aclist [AjPList] list of accession numbers
1527 **
1528 ** @return [void]
1529 **
1530 ** @release 3.0.0
1531 ** @@
1532 ******************************************************************************/
1533 
embBtreeGenBankAC(const AjPStr acline,AjPList aclist)1534 void embBtreeGenBankAC(const AjPStr acline, AjPList aclist)
1535 {
1536     AjPStr token     = NULL;
1537     AjPStr str       = NULL;
1538     AjPStr tstr      = NULL;
1539     AjPStr prefix    = NULL;
1540     AjPStr format    = NULL;
1541     char *p          = NULL;
1542     char *q          = NULL;
1543     ajuint lo = 0;
1544     ajuint hi = 0;
1545     ajuint field = 0;
1546     ajuint i;
1547 
1548     ajStrAssignSubS(&embindexLine, acline, 12, -1);
1549 
1550     ajStrTokenAssignC(&embindexHandle,embindexLine,"\n\r ");
1551 
1552     while(ajStrTokenNextParse(embindexHandle,&token))
1553     {
1554 	ajStrTrimWhite(&token);
1555 
1556 	if((p=strchr(MAJSTRGETPTR(token),(int)'-')))
1557 	{
1558 	    q = p;
1559 
1560 	    while(isdigit((int)*(--q)));
1561 
1562 	    ++q;
1563 	    ajStrAssignSubC(&tstr,q,0,(ajuint)(p-q-1));
1564 	    ajStrToUint(tstr,&lo);
1565 	    field = (ajuint) (p-q);
1566 	    ajFmtPrintS(&format,"%%S%%0%uu",field);
1567 
1568 	    ++p;
1569 	    q = p;
1570 
1571 	    while(!isdigit((int)*q))
1572 		++q;
1573 
1574 	    sscanf(q,"%u",&hi);
1575 	    ajStrAssignSubC(&prefix,p,0,(ajuint)(q-p-1));
1576 
1577 	    for(i=lo;i<=hi;++i)
1578 	    {
1579 		ajFmtPrintS(&str,MAJSTRGETPTR(format),prefix,i);
1580 		ajListstrPushAppend(aclist, str);
1581 		str = NULL;
1582 	    }
1583 	}
1584 	else
1585 	{
1586 	    ajStrAssignS(&str,token);
1587 	    ajListstrPushAppend(aclist, str);
1588 	    str = NULL;
1589 	}
1590     }
1591 
1592     ajStrDel(&tstr);
1593     ajStrDel(&prefix);
1594     ajStrDel(&format);
1595     ajStrDel(&token);
1596 
1597     return;
1598 }
1599 
1600 
1601 
1602 
1603 /* @func embBtreeGenBankKW ****************************************************
1604 **
1605 ** Extract keywords from a GenBank KEYWORDS line
1606 **
1607 ** @param [r] kwline[const AjPStr] keyword line
1608 ** @param [w] kwlist [AjPList] list of keywords
1609 ** @param [r] maxlen [ajuint] max keyword length
1610 **
1611 ** @return [void]
1612 **
1613 ** @release 3.0.0
1614 ** @@
1615 ******************************************************************************/
1616 
embBtreeGenBankKW(const AjPStr kwline,AjPList kwlist,ajuint maxlen)1617 void embBtreeGenBankKW(const AjPStr kwline, AjPList kwlist, ajuint maxlen)
1618 {
1619     AjPStr token     = NULL;
1620     AjPStr str       = NULL;
1621 
1622     ajStrAssignSubS(&embindexLine, kwline, 8, -1);
1623 
1624     ajStrTokenAssignC(&embindexHandle,embindexLine,"\n\r;");
1625 
1626     while(ajStrTokenNextParse(embindexHandle,&token))
1627     {
1628 	ajStrTrimEndC(&token,".");
1629 	ajStrTrimWhite(&token);
1630 
1631 	if(ajStrGetLen(token))
1632 	{
1633 	    if(maxlen)
1634 	    {
1635 		if(ajStrGetLen(token) > maxlen)
1636 		    ajStrAssignSubS(&str,token,0,maxlen-1);
1637 		else
1638 		    ajStrAssignS(&str,token);
1639 
1640 	    }
1641 	    else
1642 		ajStrAssignS(&str,token);
1643 
1644 	    ajListstrPushAppend(kwlist, str);
1645 	    str = NULL;
1646 	}
1647     }
1648 
1649     ajStrDel(&token);
1650 
1651     return;
1652 }
1653 
1654 
1655 
1656 
1657 /* @func embBtreeGenBankDE ****************************************************
1658 **
1659 ** Extract keywords from a GenBank DESCRIPTION line
1660 **
1661 ** @param [r] kwline[const AjPStr] keyword line
1662 ** @param [w] kwlist [AjPList] list of keywords
1663 ** @param [r] maxlen [ajuint] max keyword length
1664 **
1665 ** @return [void]
1666 **
1667 ** @release 3.0.0
1668 ** @@
1669 ******************************************************************************/
1670 
embBtreeGenBankDE(const AjPStr kwline,AjPList kwlist,ajuint maxlen)1671 void embBtreeGenBankDE(const AjPStr kwline, AjPList kwlist, ajuint maxlen)
1672 {
1673     AjPStr token     = NULL;
1674     AjPStr str       = NULL;
1675 
1676     ajStrAssignSubS(&embindexLine, kwline, 10, -1);
1677 
1678     ajStrTokenAssignC(&embindexHandle,embindexLine,"\n\r \t()");
1679 
1680     while(ajStrTokenNextParse(embindexHandle,&token))
1681     {
1682 	ajStrTrimEndC(&token,".");
1683 	ajStrTrimWhite(&token);
1684 
1685 	if(ajStrGetLen(token))
1686 	{
1687 	    if(maxlen)
1688 	    {
1689 		if(ajStrGetLen(token) > maxlen)
1690 		    ajStrAssignSubS(&str,token,0,maxlen-1);
1691 		else
1692 		    ajStrAssignS(&str,token);
1693 
1694 	    }
1695 	    else
1696 		ajStrAssignS(&str,token);
1697 
1698 	    ajListstrPushAppend(kwlist, str);
1699 	    str = NULL;
1700 	}
1701     }
1702 
1703     ajStrDel(&token);
1704 
1705     return;
1706 }
1707 
1708 
1709 
1710 
1711 /* @func embBtreeGenBankTX ****************************************************
1712 **
1713 ** Extract keywords from a GenBank ORGANISM line
1714 **
1715 ** @param [r] kwline[const AjPStr] keyword line
1716 ** @param [w] kwlist [AjPList] list of keywords
1717 ** @param [r] maxlen [ajuint] max keyword length
1718 **
1719 ** @return [void]
1720 **
1721 ** @release 3.0.0
1722 ** @@
1723 ******************************************************************************/
1724 
embBtreeGenBankTX(const AjPStr kwline,AjPList kwlist,ajuint maxlen)1725 void embBtreeGenBankTX(const AjPStr kwline, AjPList kwlist, ajuint maxlen)
1726 {
1727     AjPStr token     = NULL;
1728     AjPStr str       = NULL;
1729 
1730     ajStrAssignSubS(&embindexLine, kwline, 9, -1);
1731 
1732     ajStrTokenAssignC(&embindexHandle,embindexLine,"\n\r;()");
1733 
1734     while(ajStrTokenNextParse(embindexHandle,&token))
1735     {
1736 	ajStrTrimEndC(&token,".");
1737 	ajStrTrimEndC(&token," ");
1738 	ajStrTrimWhite(&token);
1739 
1740 	if(ajStrGetLen(token))
1741 	{
1742 	    if(maxlen)
1743 	    {
1744 		if(ajStrGetLen(token) > maxlen)
1745 		    ajStrAssignSubS(&str,token,0,maxlen-1);
1746 		else
1747 		    ajStrAssignS(&str,token);
1748 
1749 	    }
1750 	    else
1751 		ajStrAssignS(&str,token);
1752 
1753 	    ajListstrPushAppend(kwlist, str);
1754 	    str = NULL;
1755 	}
1756     }
1757 
1758     ajStrDel(&token);
1759 
1760     return;
1761 }
1762 
1763 
1764 
1765 
1766 /* @func embBtreeParseFastaDe *************************************************
1767 **
1768 ** Extract keywords from a Fasta description
1769 **
1770 ** @param [r] readline [const AjPStr] keyword line
1771 ** @param [u] field [EmbPBtreeField] Field with list of data
1772 **
1773 ** @return [void]
1774 **
1775 ** @release 6.5.0
1776 ** @@
1777 ******************************************************************************/
1778 
embBtreeParseFastaDe(const AjPStr readline,EmbPBtreeField field)1779 void embBtreeParseFastaDe(const AjPStr readline, EmbPBtreeField field)
1780 {
1781     AjPStr str = NULL;
1782 
1783     ajStrTokenAssignC(&embindexHandle,readline,"\n\r ");
1784 
1785     while(ajStrTokenNextParse(embindexHandle,&embindexToken))
1786     {
1787 	ajStrTrimEndC(&embindexToken,".");
1788 	ajStrTrimWhite(&embindexToken);
1789 
1790 	if(ajStrGetLen(embindexToken))
1791 	{
1792             if(field->freecount)
1793                 str = field->freelist[--field->freecount];
1794 
1795             ajStrAssignS(&str,embindexToken);
1796 
1797 	    ajListstrPushAppend(field->data, str);
1798 	    str = NULL;
1799 	}
1800     }
1801 
1802     return;
1803 }
1804 
1805 
1806 
1807 
1808 /* @func embBtreeParseFastaAc *************************************************
1809 **
1810 ** Extract sequence version keywords from a Fasta description
1811 **
1812 ** @param [r] readline [const AjPStr] keyword line
1813 ** @param [u] field [EmbPBtreeField] Field with list of data
1814 **
1815 ** @return [void]
1816 **
1817 ** @release 6.5.0
1818 ** @@
1819 ******************************************************************************/
1820 
embBtreeParseFastaAc(const AjPStr readline,EmbPBtreeField field)1821 void embBtreeParseFastaAc(const AjPStr readline, EmbPBtreeField field)
1822 {
1823     AjPStr str = NULL;
1824 
1825     ajStrTokenAssignC(&embindexHandle,readline,"\n\r ");
1826 
1827     while(ajStrTokenNextParse(embindexHandle,&embindexToken))
1828     {
1829 	ajStrTrimEndC(&embindexToken,".");
1830 	ajStrTrimWhite(&embindexToken);
1831 
1832 	if(ajStrGetLen(embindexToken))
1833 	{
1834             if(field->freecount)
1835                 str = field->freelist[--field->freecount];
1836 
1837 	    ajStrAssignS(&str,embindexToken);
1838 
1839 	    ajListstrPushAppend(field->data, str);
1840 	    str = NULL;
1841 	}
1842     }
1843 
1844     return;
1845 }
1846 
1847 
1848 
1849 
1850 /* @func embBtreeFastaDE ******************************************************
1851 **
1852 ** Extract keywords from a Fasta description
1853 **
1854 ** @param [r] kwline[const AjPStr] keyword line
1855 ** @param [w] kwlist [AjPList] list of keywords
1856 ** @param [r] maxlen [ajuint] max keyword length
1857 **
1858 ** @return [void]
1859 **
1860 ** @release 3.0.0
1861 ** @@
1862 ******************************************************************************/
1863 
embBtreeFastaDE(const AjPStr kwline,AjPList kwlist,ajuint maxlen)1864 void embBtreeFastaDE(const AjPStr kwline, AjPList kwlist, ajuint maxlen)
1865 {
1866     AjPStr token     = NULL;
1867     AjPStr str       = NULL;
1868 
1869     ajStrTokenAssignC(&embindexHandle,kwline,"\n\r ");
1870 
1871     while(ajStrTokenNextParse(embindexHandle,&token))
1872     {
1873 	ajStrTrimEndC(&token,".");
1874 	ajStrTrimWhite(&token);
1875 
1876 	if(ajStrGetLen(token))
1877 	{
1878 	    str = ajStrNew();
1879 
1880 	    if(maxlen)
1881 	    {
1882 		if(ajStrGetLen(token) > maxlen)
1883 		    ajStrAssignSubS(&str,token,0,maxlen-1);
1884 		else
1885 		    ajStrAssignS(&str,token);
1886 
1887 	    }
1888 	    else
1889 		ajStrAssignS(&str,token);
1890 
1891 	    ajListstrPushAppend(kwlist, str);
1892 	    str = NULL;
1893 	}
1894     }
1895 
1896     ajStrDel(&token);
1897 
1898     return;
1899 }
1900 
1901 
1902 
1903 
1904 /* @func embBtreeParseFastaSv *************************************************
1905 **
1906 ** Extract sequence version keywords from a Fasta description
1907 **
1908 ** @param [r] readline [const AjPStr] keyword line
1909 ** @param [u] field [EmbPBtreeField] Field with list of data
1910 **
1911 ** @return [void]
1912 **
1913 ** @release 6.5.0
1914 ** @@
1915 ******************************************************************************/
1916 
embBtreeParseFastaSv(const AjPStr readline,EmbPBtreeField field)1917 void embBtreeParseFastaSv(const AjPStr readline, EmbPBtreeField field)
1918 {
1919     AjPStr str = NULL;
1920 
1921     ajStrTokenAssignC(&embindexHandle,readline,"\n\r ");
1922 
1923     while(ajStrTokenNextParse(embindexHandle,&embindexToken))
1924     {
1925 	ajStrTrimEndC(&embindexToken,".");
1926 	ajStrTrimWhite(&embindexToken);
1927 
1928 	if(ajStrGetLen(embindexToken))
1929 	{
1930             if(field->freecount)
1931                 str = field->freelist[--field->freecount];
1932 
1933 	    ajStrAssignS(&str,embindexToken);
1934 
1935 	    ajListstrPushAppend(field->data, str);
1936 	    str = NULL;
1937 	}
1938     }
1939 
1940     return;
1941 }
1942 
1943 
1944 
1945 
1946 /* @func embBtreeFastaSV ******************************************************
1947 **
1948 ** Extract sequence version keywords from a Fasta description
1949 **
1950 ** @param [r] kwline[const AjPStr] sequence version or GI string
1951 ** @param [w] kwlist [AjPList] list of sequence versions
1952 ** @param [r] maxlen [ajuint] max sequence version length
1953 **
1954 ** @return [void]
1955 **
1956 ** @release 6.0.0
1957 ** @@
1958 ******************************************************************************/
1959 
embBtreeFastaSV(const AjPStr kwline,AjPList kwlist,ajuint maxlen)1960 void embBtreeFastaSV(const AjPStr kwline, AjPList kwlist, ajuint maxlen)
1961 {
1962     AjPStr token     = NULL;
1963     AjPStr str       = NULL;
1964 
1965     ajStrTokenAssignC(&embindexHandle,kwline,"\n ");
1966 
1967     while(ajStrTokenNextParse(embindexHandle,&token))
1968     {
1969 	ajStrTrimEndC(&token,".");
1970 	ajStrTrimWhite(&token);
1971 
1972 	if(ajStrGetLen(token))
1973 	{
1974 	    if(maxlen)
1975 	    {
1976 		if(ajStrGetLen(token) > maxlen)
1977                     ajStrAssignSubS(&str,token,0,maxlen-1);
1978 		else
1979 		    ajStrAssignS(&str,token);
1980             }
1981 	    else
1982 		ajStrAssignS(&str,token);
1983 
1984 	    ajListstrPushAppend(kwlist, str);
1985 	    str = NULL;
1986 	}
1987     }
1988 
1989     ajStrDel(&token);
1990 
1991     return;
1992 }
1993 
1994 
1995 
1996 
1997 /* @func embBtreeReadDir ******************************************************
1998 **
1999 ** Read files to index
2000 **
2001 ** @param [w] filelist [AjPStr**] list of files to read
2002 ** @param [r] fdirectory [const AjPStr] Directory to scan
2003 ** @param [r] files [const AjPStr] Filename to search for (or NULL)
2004 ** @param [r] exclude [const AjPStr] list of files to exclude
2005 **
2006 ** @return [ajuint] number of matching files
2007 **
2008 ** @release 2.8.0
2009 ** @@
2010 ******************************************************************************/
2011 
embBtreeReadDir(AjPStr ** filelist,const AjPStr fdirectory,const AjPStr files,const AjPStr exclude)2012 ajuint embBtreeReadDir(AjPStr **filelist, const AjPStr fdirectory,
2013 		      const AjPStr files, const AjPStr exclude)
2014 {
2015     AjPList lfiles = NULL;
2016     ajuint nfiles;
2017     ajuint nremove;
2018     ajuint i;
2019     ajuint j;
2020     AjPStr file    = NULL;
2021     AjPStr *removelist = NULL;
2022 
2023     /* ajDebug("In ajBtreeReadDir\n"); */
2024 
2025     lfiles = ajListNew();
2026     nfiles = ajFilelistAddPathWild(lfiles, fdirectory, files);
2027 
2028     nremove = ajArrCommaList(exclude,&removelist);
2029 
2030     for(i=0;i<nfiles;++i)
2031     {
2032 	ajListPop(lfiles,(void **)&file);
2033 	ajFilenameTrimPath(&file);
2034 
2035 	for(j=0;j<nremove && ! ajStrMatchWildS(file,removelist[j]);++j);
2036 
2037 	if(j == nremove)
2038 	    ajListstrPushAppend(lfiles, file);
2039     }
2040 
2041     nfiles =  (ajuint) ajListToarray(lfiles,(void ***)&(*filelist));
2042     ajListFree(&lfiles);
2043 
2044     for(i=0; i<nremove;++i)
2045 	ajStrDel(&removelist[i]);
2046 
2047     AJFREE(removelist);
2048 
2049     return nfiles;
2050 }
2051 
2052 
2053 
2054 
2055 /* @funcstatic btreeCreateFile ************************************************
2056 **
2057 ** Open B+tree file for writing
2058 **
2059 ** @param [r] idirectory [const AjPStr] Directory for index files
2060 ** @param [r] dbname [const AjPStr] name of database
2061 ** @param [r] add [const char *] type of file
2062 **
2063 ** @return [AjPFile] opened file
2064 **
2065 ** @release 2.9.0
2066 ** @@
2067 ******************************************************************************/
2068 
btreeCreateFile(const AjPStr idirectory,const AjPStr dbname,const char * add)2069 static AjPFile btreeCreateFile(const AjPStr idirectory, const AjPStr dbname,
2070 			       const char *add)
2071 {
2072     AjPStr filename = NULL;
2073     AjPFile fp      = NULL;
2074 
2075     /* ajDebug("In btreeCreateFile\n"); */
2076 
2077     filename = ajStrNew();
2078 
2079     if(!ajStrGetLen(idirectory))
2080         ajFmtPrintS(&filename,"%S%s",dbname,add);
2081     else
2082         ajFmtPrintS(&filename,"%S%s%S%s",idirectory,SLASH_STRING,dbname,add);
2083 
2084     fp =  ajFileNewOutNameS(filename);
2085 
2086     ajStrDel(&filename);
2087 
2088     return fp;
2089 }
2090 
2091 
2092 
2093 
2094 /* @func embBtreeEntryNew *****************************************************
2095 **
2096 ** Construct a database entry object
2097 **
2098 ** @param [r] refcount [ajuint] Number of reference file(s) per entry
2099 ** @return [EmbPBtreeEntry] db entry object pointer
2100 **
2101 ** @release 3.0.0
2102 ** @@
2103 ******************************************************************************/
2104 
embBtreeEntryNew(ajuint refcount)2105 EmbPBtreeEntry embBtreeEntryNew(ajuint refcount)
2106 {
2107     EmbPBtreeEntry thys;
2108     ajuint  iref;
2109 
2110     AJNEW0(thys);
2111 
2112     thys->do_id          = ajFalse;
2113 
2114     thys->dbname  = ajStrNew();
2115     thys->dbrs    = ajStrNew();
2116     thys->date    = ajStrNew();
2117     thys->release = ajStrNew();
2118     thys->dbtype  = ajStrNew();
2119 
2120     thys->directory  = ajStrNew();
2121     thys->idirectory = ajStrNew();
2122     thys->idextension = ajStrNew();
2123     thys->maxid       = ajStrNew();
2124 
2125     thys->files    = ajListNew();
2126 
2127     thys->id = ajStrNew();
2128 
2129     if(refcount)
2130     {
2131         thys->refcount = refcount;
2132 
2133         AJCNEW0(thys->reffpos, refcount);
2134         AJCNEW0(thys->reffiles, refcount);
2135 
2136         for(iref=0; iref < refcount; iref++)
2137             thys->reffiles[iref] = ajListNew();
2138     }
2139 
2140     return thys;
2141 }
2142 
2143 
2144 
2145 
2146 /* @func embBtreeEntryDel *****************************************************
2147 **
2148 ** Delete a database entry object
2149 **
2150 ** @param [d] pthis [EmbPBtreeEntry*] db entry object pointer
2151 ** @return [void]
2152 **
2153 ** @release 3.0.0
2154 ** @@
2155 ******************************************************************************/
2156 
embBtreeEntryDel(EmbPBtreeEntry * pthis)2157 void embBtreeEntryDel(EmbPBtreeEntry* pthis)
2158 {
2159     EmbPBtreeEntry thys;
2160     EmbPBtreeField field;
2161     AjPStr tmpstr = NULL;
2162 
2163     ajuint iref;
2164 
2165     thys = *pthis;
2166 
2167     ajStrDel(&thys->dbname);
2168     ajStrDel(&thys->idextension);
2169     ajStrDel(&thys->maxid);
2170     ajStrDel(&thys->dbrs);
2171     ajStrDel(&thys->date);
2172     ajStrDel(&thys->release);
2173     ajStrDel(&thys->dbtype);
2174 
2175     ajStrDel(&thys->directory);
2176     ajStrDel(&thys->idirectory);
2177 
2178 
2179     while(ajListPop(thys->files,(void **)&tmpstr))
2180 	ajStrDel(&tmpstr);
2181 
2182     ajListFree(&thys->files);
2183 
2184     if(thys->reffiles)
2185     {
2186         for(iref=0; iref < thys->refcount; iref++)
2187         {
2188             while(ajListPop(thys->reffiles[iref],(void **)&tmpstr))
2189                 ajStrDel(&tmpstr);
2190 
2191             ajListFree(&thys->reffiles[iref]);
2192         }
2193 
2194         AJFREE(thys->reffiles);
2195     }
2196 
2197     if(thys->reffpos)
2198         AJFREE(thys->reffpos);
2199 
2200     while(ajListPop(thys->fields,(void **)&field))
2201 	embBtreeFieldDel(&field);
2202 
2203     ajListFree(&thys->fields);
2204 
2205     ajStrDel(&thys->id);
2206 
2207     AJFREE(*pthis);
2208 
2209     return;
2210 }
2211 
2212 
2213 
2214 
2215 /* @func embBtreeEntrySetCompressed *******************************************
2216 **
2217 ** Set database entry to be compressed on writing
2218 **
2219 ** @param [u] entry [EmbPBtreeEntry] Database entry information
2220 **
2221 ** @return [void]
2222 **
2223 ** @release 6.4.0
2224 ** @@
2225 ******************************************************************************/
2226 
embBtreeEntrySetCompressed(EmbPBtreeEntry entry)2227 void embBtreeEntrySetCompressed(EmbPBtreeEntry entry)
2228 {
2229     entry->compressed = ajTrue;
2230 
2231     return;
2232 }
2233 
2234 
2235 
2236 
2237 /* @func embBtreeSetFields ****************************************************
2238 **
2239 ** Set database fields to index
2240 **
2241 ** @param [w] entry [EmbPBtreeEntry] Database entry information
2242 ** @param [r] fields [AjPStr const *] user specified fields
2243 **
2244 ** @return [ajuint] number of fields set
2245 **
2246 ** @release 3.0.0
2247 ** @@
2248 ******************************************************************************/
2249 
embBtreeSetFields(EmbPBtreeEntry entry,AjPStr const * fields)2250 ajuint embBtreeSetFields(EmbPBtreeEntry entry, AjPStr const *fields)
2251 {
2252     ajuint nfields;
2253     EmbPBtreeField field = NULL;
2254 
2255     nfields = 0;
2256 
2257     if(!entry->fields)
2258         entry->fields = ajListNew();
2259 
2260     while(fields[nfields])
2261     {
2262 	if(ajStrMatchCaseC(fields[nfields], "id"))
2263 	    entry->do_id = ajTrue;
2264 
2265 	else
2266         {
2267             field = embBtreeFieldNewS(fields[nfields], entry->refcount);
2268             ajListPushAppend(entry->fields, field);
2269             field = NULL;
2270         }
2271 	++nfields;
2272     }
2273 
2274     return nfields;
2275 }
2276 
2277 
2278 
2279 
2280 /* @func embBtreeSetDbInfo ****************************************************
2281 **
2282 ** Set general database information
2283 **
2284 ** @param [w] entry [EmbPBtreeEntry] Database entry information
2285 ** @param [r] name [const AjPStr] user specified name
2286 ** @param [r] dbrs [const AjPStr] user specified resource
2287 ** @param [r] date [const AjPStr] user specified date
2288 ** @param [r] release [const AjPStr] user specified release
2289 ** @param [r] type [const AjPStr] user specified type
2290 ** @param [r] directory [const AjPStr] user specified directory
2291 ** @param [r] idirectory [const AjPStr] user specified index directory
2292 **
2293 ** @return [void]
2294 **
2295 ** @release 3.0.0
2296 ** @@
2297 ******************************************************************************/
2298 
embBtreeSetDbInfo(EmbPBtreeEntry entry,const AjPStr name,const AjPStr dbrs,const AjPStr date,const AjPStr release,const AjPStr type,const AjPStr directory,const AjPStr idirectory)2299 void embBtreeSetDbInfo(EmbPBtreeEntry entry, const AjPStr name,
2300 		       const AjPStr dbrs,
2301 		       const AjPStr date, const AjPStr release,
2302 		       const AjPStr type, const AjPStr directory,
2303 		       const AjPStr idirectory)
2304 {
2305     ajStrAssignS(&entry->dbname, name);
2306     ajStrAssignC(&entry->idextension, "xid");
2307     ajStrAssignS(&entry->date, date);
2308     ajStrAssignS(&entry->release, release);
2309     ajStrAssignS(&entry->dbtype, type);
2310     ajStrAssignS(&entry->dbrs, dbrs);
2311 
2312     ajStrAssignS(&entry->directory,directory);
2313     ajStrAssignS(&entry->idirectory,idirectory);
2314 
2315     return;
2316 }
2317 
2318 
2319 
2320 
2321 /* @func embBtreeGetFieldC ****************************************************
2322 **
2323 ** Set database fields to index
2324 **
2325 ** @param [w] entry [EmbPBtreeEntry] Database entry information
2326 ** @param [r] nametxt [const char*] Field name
2327 **
2328 ** @return [EmbPBtreeField] Btree index field definition
2329 **
2330 ** @release 6.4.0
2331 ** @@
2332 ******************************************************************************/
2333 
embBtreeGetFieldC(EmbPBtreeEntry entry,const char * nametxt)2334 EmbPBtreeField embBtreeGetFieldC(EmbPBtreeEntry entry, const char * nametxt)
2335 {
2336     EmbPBtreeField ret = NULL;
2337     EmbPBtreeField field = NULL;
2338 
2339     AjIList iter;
2340 
2341     if(!ajListGetLength(entry->fields))
2342         return NULL;
2343 
2344     iter = ajListIterNewread(entry->fields);
2345     while(!ajListIterDone(iter))
2346     {
2347         field = ajListIterGet(iter);
2348         if(ajStrMatchC(field->name, nametxt))
2349         {
2350             ret = field;
2351             break;
2352         }
2353     }
2354 
2355     ajListIterDel(&iter);
2356 
2357     return ret;
2358 }
2359 
2360 
2361 
2362 
2363 /* @func embBtreeGetFieldS ****************************************************
2364 **
2365 ** Set database fields to index
2366 **
2367 ** @param [w] entry [EmbPBtreeEntry] Database entry information
2368 ** @param [r] name [const AjPStr] Field name
2369 **
2370 ** @return [EmbPBtreeField] Btree index field definition
2371 **
2372 ** @release 6.4.0
2373 ** @@
2374 ******************************************************************************/
2375 
embBtreeGetFieldS(EmbPBtreeEntry entry,const AjPStr name)2376 EmbPBtreeField embBtreeGetFieldS(EmbPBtreeEntry entry, const AjPStr name)
2377 {
2378     EmbPBtreeField ret = NULL;
2379     EmbPBtreeField field = NULL;
2380 
2381     AjIList iter;
2382 
2383     if(!ajListGetLength(entry->fields))
2384         return NULL;
2385 
2386     iter = ajListIterNewread(entry->fields);
2387     while(!ajListIterDone(iter))
2388     {
2389         field = ajListIterGet(iter);
2390         if(ajStrMatchS(field->name, name))
2391         {
2392             ret = field;
2393             break;
2394         }
2395     }
2396 
2397     ajListIterDel(&iter);
2398 
2399     return ret;
2400 }
2401 
2402 
2403 
2404 
2405 /* @func embBtreeGetFiles *****************************************************
2406 **
2407 ** Read files to index
2408 **
2409 ** @param [u] entry [EmbPBtreeEntry] list of files to read
2410 ** @param [r] fdirectory [const AjPStr] Directory to scan
2411 ** @param [r] files [const AjPStr] Filename to search for (or NULL)
2412 ** @param [r] exclude [const AjPStr] list of files to exclude
2413 **
2414 ** @return [ajuint] number of matching files
2415 **
2416 ** @release 3.0.0
2417 ** @@
2418 ******************************************************************************/
2419 
embBtreeGetFiles(EmbPBtreeEntry entry,const AjPStr fdirectory,const AjPStr files,const AjPStr exclude)2420 ajuint embBtreeGetFiles(EmbPBtreeEntry entry, const AjPStr fdirectory,
2421 		       const AjPStr files, const AjPStr exclude)
2422 {
2423     ajuint nfiles;
2424     ajuint nremove;
2425     ajuint i;
2426     ajuint j;
2427     AjPStr file    = NULL;
2428     AjPStr *removelist = NULL;
2429     ajuint count = 0;
2430 
2431     /* ajDebug("In embBtreeGetFiles\n"); */
2432 
2433     nfiles = ajFilelistAddPathWild(entry->files, fdirectory,files);
2434 
2435     nremove = ajArrCommaList(exclude,&removelist);
2436 
2437     count = 0;
2438 
2439     for(i=0;i<nfiles;++i)
2440     {
2441 	ajListPop(entry->files,(void **)&file);
2442 	ajFilenameTrimPath(&file);
2443 
2444 	for(j=0;j<nremove && !ajStrMatchWildS(file,removelist[j]);++j);
2445 
2446 	if(j == nremove)
2447 	{
2448 	    ajListstrPushAppend(entry->files, file);
2449 	    ++count;
2450 	}
2451     }
2452 
2453     ajListSort(entry->files, &ajStrVcmp);
2454 
2455     entry->nfiles = count;
2456 
2457     for(i=0; i<nremove;++i)
2458 	ajStrDel(&removelist[i]);
2459 
2460     AJFREE(removelist);
2461 
2462     return count;
2463 }
2464 
2465 
2466 
2467 
2468 /* @func embBtreeWriteEntryFile ***********************************************
2469 **
2470 ** Put files to entry file
2471 **
2472 ** @param [r] entry [const EmbPBtreeEntry] database data
2473 **
2474 ** @return [AjBool] true on success
2475 **
2476 ** @release 3.0.0
2477 ** @@
2478 ******************************************************************************/
2479 
embBtreeWriteEntryFile(const EmbPBtreeEntry entry)2480 AjBool embBtreeWriteEntryFile(const EmbPBtreeEntry entry)
2481 {
2482     AjPFile entfile = NULL;
2483     ajuint i;
2484     ajuint iref;
2485     AjPStr tmpstr = NULL;
2486     AjPStr refstr = NULL;
2487 
2488     /* ajDebug("In embBtreeWriteEntryFile\n"); */
2489 
2490     entfile = btreeCreateFile(entry->idirectory,entry->dbname,BTENTRYFILE);
2491     if(!entfile)
2492     {
2493         ajWarn("Failed to create file '%S' entry->dbname "
2494                "in directory 'entry->idirectory' "
2495                "error:%d '%s'",
2496                errno, strerror(errno));
2497 	return ajFalse;
2498     }
2499 
2500     ajFmtPrintF(entfile,"# Number of files: %u\n",entry->nfiles);
2501     ajFmtPrintF(entfile,"# Release: %S\n",entry->release);
2502     ajFmtPrintF(entfile,"# Date:    %S\n",entry->date);
2503 
2504     if(!entry->refcount)
2505 	ajFmtPrintF(entfile,"Single");
2506     else
2507 	ajFmtPrintF(entfile,"Reference %u", entry->refcount+1);
2508 
2509     ajFmtPrintF(entfile," filename database\n");
2510 
2511     for(i=0;i<entry->nfiles;++i)
2512 	if(!entry->refcount)
2513 	{
2514 	    ajListPop(entry->files,(void **)&tmpstr);
2515 	    ajFmtPrintF(entfile,"%S\n",tmpstr);
2516 	    ajListstrPushAppend(entry->files, tmpstr);
2517 	}
2518 	else
2519 	{
2520 	    ajListPop(entry->files,(void **)&tmpstr);
2521 	    ajFmtPrintF(entfile,"%S",tmpstr);
2522             for(iref=0; iref < entry->refcount; iref++)
2523             {
2524                 ajListPop(entry->reffiles[iref],(void **)&refstr);
2525                 ajFmtPrintF(entfile," %S",refstr);
2526                 ajListstrPushAppend(entry->reffiles[iref], refstr);
2527             }
2528 
2529             ajFmtPrintF(entfile,"\n");
2530 	    ajListstrPushAppend(entry->files, tmpstr);
2531 	}
2532 
2533     ajFileClose(&entfile);
2534 
2535     return ajTrue;
2536 }
2537 
2538 
2539 
2540 
2541 /* @func embBtreeGetRsInfo ****************************************************
2542 **
2543 ** Get resource information for selected database
2544 **
2545 ** @param [u] entry [EmbPBtreeEntry] database data
2546 **
2547 ** @return [void]
2548 **
2549 ** @release 3.0.0
2550 ** @@
2551 ******************************************************************************/
2552 
embBtreeGetRsInfo(EmbPBtreeEntry entry)2553 void embBtreeGetRsInfo(EmbPBtreeEntry entry)
2554 {
2555     AjPStr attrstr = NULL;
2556     AjPStr value = NULL;
2557     ajuint  n = 0;
2558     AjIList iter;
2559     EmbPBtreeField field;
2560 
2561     value = ajStrNew();
2562 
2563     ajStrAssignC(&attrstr, "type");
2564     if(!ajNamRsAttrValueS(entry->dbrs, attrstr, &value))
2565 	ajFatal("Missing resource entry (%S) for indexing",entry->dbrs);
2566 
2567     if(!ajStrMatchCaseC(value,"Index"))
2568 	ajFatal("Incorrect 'type' field for resource (%S)",entry->dbrs);
2569 
2570     entry->pricachesize = BT_CACHESIZE;
2571 
2572     if(ajNamRsAttrValueC(MAJSTRGETPTR(entry->dbrs),"cachesize",&value) ||
2573        ajNamGetValueC("CACHESIZE",&value))
2574     {
2575 	if(ajStrToUint(value,&n))
2576 	    entry->pricachesize = n;
2577 	else
2578 	    ajErr("Bad value for environment variable 'CACHESIZE'");
2579     }
2580     else
2581     {
2582 	ajDebug("CACHESIZE defaults to %d\n", entry->pricachesize);
2583     }
2584 
2585     entry->seccachesize = entry->pricachesize;
2586 
2587     if(ajNamRsAttrValueC(MAJSTRGETPTR(entry->dbrs),"seccachesize",&value) ||
2588        ajNamGetValueC("SECCACHESIZE",&value))
2589     {
2590 	if(ajStrToUint(value,&n))
2591 	    entry->seccachesize = n;
2592 	else
2593 	    ajErr("Bad value for environment variable 'SECCACHESIZE'");
2594     }
2595     else
2596     {
2597 	ajDebug("SECCACHESIZE defaults to %d\n", entry->seccachesize);
2598     }
2599 
2600     entry->pripagesize = BT_PAGESIZE;
2601 
2602     if(ajNamRsAttrValueC(MAJSTRGETPTR(entry->dbrs),"pagesize",&value) ||
2603        ajNamGetValueC("PAGESIZE",&value))
2604     {
2605 	if(ajStrToUint(value,&n))
2606 	    entry->pripagesize = n;
2607 	else
2608 	    ajErr("Bad value for environment variable 'PAGESIZE'");
2609     }
2610     else
2611     {
2612 	ajDebug("PAGESIZE defaults to %d\n", entry->pripagesize);
2613     }
2614 
2615     entry->secpagesize = entry->pripagesize;
2616 
2617     if(ajNamRsAttrValueC(MAJSTRGETPTR(entry->dbrs),"secpagesize",&value) ||
2618        ajNamGetValueC("SECPAGESIZE",&value))
2619     {
2620 	if(ajStrToUint(value,&n))
2621 	    entry->secpagesize = n;
2622 	else
2623 	    ajErr("Bad value for environment variable 'SECPAGESIZE'");
2624     }
2625     else
2626     {
2627 	ajDebug("SECPAGESIZE defaults to %d\n", entry->secpagesize);
2628     }
2629 
2630     entry->idlen = BT_KWLIMIT;
2631     ajStrAssignC(&attrstr, "idlen");
2632 
2633     if(ajNamRsAttrValueS(entry->dbrs,attrstr,&value))
2634     {
2635 	if(ajStrToUint(value,&n))
2636 	    entry->idlen = n;
2637 	else
2638 	    ajErr("Bad value for index resource 'idlen'");
2639     }
2640 
2641     ajStrAssignC(&attrstr, "idpagesize");
2642     if(ajNamRsAttrValueS(entry->dbrs,attrstr,&value))
2643     {
2644         if(ajStrToUint(value,&n))
2645 	    entry->pripagesize = n;
2646 	else
2647 	    ajErr("Bad value for index resource 'idpagesize'");
2648     }
2649 
2650     ajStrAssignC(&attrstr, "idsecpagesize");
2651     if(ajNamRsAttrValueS(entry->dbrs,attrstr,&value))
2652     {
2653         if(ajStrToUint(value,&n))
2654 	    entry->secpagesize = n;
2655 	else
2656 	    ajErr("Bad value for index resource 'idsecpagesize'");
2657     }
2658 
2659     ajStrAssignC(&attrstr, "idcachesize");
2660     if(ajNamRsAttrValueS(entry->dbrs, attrstr, &value))
2661     {
2662         if(ajStrToUint(value,&n))
2663 	    entry->pricachesize = n;
2664 	else
2665 	    ajErr("Bad value for index resource 'idcachesize'");
2666     }
2667 
2668     ajStrAssignC(&attrstr, "idseccachesize");
2669     if(ajNamRsAttrValueS(entry->dbrs, attrstr, &value))
2670     {
2671         if(ajStrToUint(value,&n))
2672 	    entry->seccachesize = n;
2673 	else
2674 	    ajErr("Bad value for index resource 'idseccachesize'");
2675     }
2676 
2677     if(!entry->secpagesize)
2678         entry->secpagesize = entry->pripagesize;
2679 
2680     if(!entry->seccachesize)
2681         entry->seccachesize = entry->pricachesize;
2682 
2683     entry->idorder = (entry->pripagesize - (BT_NODEPREAMBLE + BT_PTRLEN)) /
2684         ((entry->idlen + 1) + BT_IDKEYEXTRA);
2685 
2686     entry->idfill  = (entry->pripagesize - BT_BUCKPREAMBLE) /
2687         ((entry->idlen + 1) + BT_KEYLENENTRY +
2688          BT_DDOFF + entry->refcount*BT_EXTRA);
2689 
2690     entry->idsecorder = (entry->secpagesize - (BT_NODEPREAMBLE + BT_PTRLEN)) /
2691         (BT_OFFKEYLEN + BT_IDKEYEXTRA);
2692 
2693     entry->idsecfill  = (entry->secpagesize - BT_BUCKPREAMBLE) /
2694         (BT_DOFF + entry->refcount*BT_EXTRA);
2695 
2696 /* now process the same values for each index field */
2697 
2698     if(ajListGetLength(entry->fields))
2699     {
2700         iter = ajListIterNewread(entry->fields);
2701 
2702         while(!ajListIterDone(iter))
2703         {
2704             field = ajListIterGet(iter);
2705 
2706             field->idlen = entry->idlen;
2707 
2708             ajFmtPrintS(&attrstr, "%Slen", field->name);
2709             if(!ajNamRsAttrValueS(entry->dbrs,attrstr,&value))
2710                 field->len = ajBtreeFieldGetLenS(field->name);
2711             else
2712             {
2713                 if(ajStrToUint(value,&n))
2714                     field->len = n;
2715                 else
2716                 {
2717                     ajErr("Bad value for index resource '%S'", attrstr);
2718                     field->len = 15;
2719                 }
2720             }
2721 
2722             field->pripagesize = entry->pripagesize;
2723             field->secpagesize = entry->secpagesize;
2724 
2725             ajFmtPrintS(&attrstr, "%Spagesize", field->name);
2726 
2727             if(ajNamRsAttrValueS(entry->dbrs,attrstr,&value))
2728             {
2729                 if(ajStrToUint(value,&n))
2730                 {
2731                     field->pripagesize = n;
2732                     field->secpagesize = n;
2733                 }
2734                 else
2735                 {
2736                     ajErr("Bad value for index resource '%S'", attrstr);
2737                     field->pripagesize = entry->pripagesize;
2738                 }
2739             }
2740 
2741 
2742             ajFmtPrintS(&attrstr, "%Ssecpagesize", field->name);
2743 
2744             if(ajNamRsAttrValueS(entry->dbrs,attrstr,&value))
2745             {
2746                 if(ajStrToUint(value,&n))
2747                 {
2748                     field->secpagesize = n;
2749                 }
2750                 else
2751                 {
2752                     ajErr("Bad value for index resource '%S'", attrstr);
2753                 }
2754             }
2755 
2756             field->pricachesize = entry->pricachesize;
2757             field->seccachesize = entry->seccachesize;
2758 
2759             ajFmtPrintS(&attrstr, "%Scachesize", field->name);
2760 
2761             if(ajNamRsAttrValueS(entry->dbrs,attrstr,&value))
2762             {
2763                 if(ajStrToUint(value,&n))
2764                 {
2765                     field->pricachesize = n;
2766                     field->seccachesize = n;
2767                 }
2768                 else
2769                 {
2770                     ajErr("Bad value for index resource '%Scachesize'",
2771                           field->name);
2772                     field->pricachesize = entry->pricachesize;
2773                 }
2774             }
2775 
2776             ajFmtPrintS(&attrstr, "%Sseccachesize", field->name);
2777 
2778             if(ajNamRsAttrValueS(entry->dbrs,attrstr,&value))
2779             {
2780                 if(ajStrToUint(value,&n))
2781                 {
2782                     field->seccachesize = n;
2783                 }
2784                 else
2785                 {
2786                     ajErr("Bad value for index resource '%S'", attrstr);
2787                 }
2788             }
2789 
2790             field->order =
2791                 (field->pripagesize - (BT_NODEPREAMBLE + BT_PTRLEN)) /
2792                 ((field->len + 1) + BT_IDKEYEXTRA);
2793 
2794             field->fill  =
2795                 (field->pripagesize - BT_BUCKPREAMBLE) /
2796                 ((field->len + 1) + BT_KEYLENENTRY +
2797                  BT_DDOFF + field->refcount*BT_EXTRA);
2798 
2799             if(!field->secondary)
2800             {
2801                 field->secorder =
2802                     (field->secpagesize - (BT_NODEPREAMBLE + BT_PTRLEN)) /
2803                     (BT_OFFKEYLEN + BT_IDKEYEXTRA);
2804                 field->secfill  =
2805                     (field->secpagesize - BT_BUCKPREAMBLE) /
2806                     (BT_DOFF + field->refcount*BT_EXTRA);
2807             }
2808             else
2809             {
2810     /*
2811      *  The secondary tree keys are the IDs of the entries containing
2812      *  the keywords so we use the entry idlen for their size limit
2813      */
2814                 field->secorder =
2815                     (field->secpagesize - (BT_NODEPREAMBLE + BT_PTRLEN)) /
2816                     ((entry->idlen + 1) + BT_IDKEYEXTRA);
2817                 field->secfill  =
2818                     (field->secpagesize - BT_BUCKPREAMBLE) /
2819                     ((entry->idlen + 1) + BT_KEYLENENTRY);
2820             }
2821         }
2822 
2823         ajListIterDel(&iter);
2824     }
2825 
2826     ajStrDel(&attrstr);
2827     ajStrDel(&value);
2828 
2829     return;
2830 }
2831 
2832 
2833 
2834 
2835 /* @func embBtreeOpenCaches ***************************************************
2836 **
2837 ** Open index files for writing
2838 **
2839 ** @param [u] entry [EmbPBtreeEntry] database data
2840 **
2841 ** @return [AjBool] true on success
2842 **
2843 ** @release 3.0.0
2844 ** @@
2845 ******************************************************************************/
2846 
embBtreeOpenCaches(EmbPBtreeEntry entry)2847 AjBool embBtreeOpenCaches(EmbPBtreeEntry entry)
2848 {
2849     ajuint level    = 0;
2850     ajlong count    = 0L;
2851     ajlong countall = 0L;
2852     AjIList iter;
2853     EmbPBtreeField field;
2854 
2855     if(entry->do_id)
2856     {
2857 	entry->idcache = ajBtreeIdcacheNewS(entry->dbname,
2858                                             entry->idextension,
2859                                             entry->idirectory,
2860                                             "wb+",
2861                                             entry->compressed,
2862                                             entry->idlen,
2863                                             entry->refcount,
2864                                             entry->pripagesize,
2865                                             entry->secpagesize,
2866                                             entry->pricachesize,
2867                                             entry->seccachesize,
2868                                             0,
2869                                             0,
2870                                             entry->idorder,
2871                                             entry->idfill,
2872                                             level,
2873                                             entry->idsecorder,
2874                                             entry->idsecfill,
2875                                             count,
2876                                             countall);
2877 	if(!entry->idcache)
2878 	    ajFatal("Cannot open ID index");
2879     }
2880 
2881     if(ajListGetLength(entry->fields))
2882     {
2883         iter = ajListIterNewread(entry->fields);
2884 
2885         while(!ajListIterDone(iter))
2886         {
2887             field = ajListIterGet(iter);
2888 
2889             if(field->secondary)
2890                 field->cache = ajBtreeSeccacheNewS(entry->dbname,
2891                                                    field->extension,
2892                                                    entry->idirectory,
2893                                                    "wb+",
2894                                                    field->compressed,
2895                                                    field->len,
2896                                                    field->idlen,
2897                                                    field->pripagesize,
2898                                                    field->secpagesize,
2899                                                    field->pricachesize,
2900                                                    field->seccachesize,
2901                                                    field->pripagecount,
2902                                                    field->secpagecount,
2903                                                    field->order,
2904                                                    field->fill,
2905                                                    level,
2906                                                    field->secorder,
2907                                                    field->secfill,
2908                                                    count,
2909                                                    countall);
2910             else
2911                 field->cache = ajBtreeIdcacheNewS(entry->dbname,
2912                                                   field->extension,
2913                                                   entry->idirectory,
2914                                                   "wb+",
2915                                                   field->compressed,
2916                                                   field->len,
2917                                                   field->refcount,
2918                                                   field->pripagesize,
2919                                                   field->secpagesize,
2920                                                   field->pricachesize,
2921                                                   field->seccachesize,
2922                                                   field->pripagecount,
2923                                                   field->secpagecount,
2924                                                   field->order,
2925                                                   field->fill,
2926                                                   level,
2927                                                   field->secorder,
2928                                                   field->secfill,
2929                                                   count,
2930                                                   countall);
2931             if(!field->cache)
2932                 ajFatal("Cannot open %S index", field->extension);
2933         }
2934 
2935         ajListIterDel(&iter);
2936     }
2937 
2938     return ajTrue;
2939 }
2940 
2941 
2942 
2943 
2944 /* @func embBtreeCloseCaches **************************************************
2945 **
2946 ** Close index files
2947 **
2948 ** @param [u] entry [EmbPBtreeEntry] database data
2949 **
2950 ** @return [AjBool] true on success
2951 **
2952 ** @release 3.0.0
2953 ** @@
2954 ******************************************************************************/
2955 
embBtreeCloseCaches(EmbPBtreeEntry entry)2956 AjBool embBtreeCloseCaches(EmbPBtreeEntry entry)
2957 {
2958     AjIList iter;
2959     EmbPBtreeField field;
2960 
2961     if(entry->do_id)
2962     {
2963 	ajBtreeCacheDel(&entry->idcache);
2964     }
2965 
2966     if(ajListGetLength(entry->fields))
2967     {
2968         iter = ajListIterNewread(entry->fields);
2969 
2970         while(!ajListIterDone(iter))
2971         {
2972             field = ajListIterGet(iter);
2973 
2974             ajBtreeCacheDel(&field->cache);
2975          }
2976         ajListIterDel(&iter);
2977     }
2978 
2979     return ajTrue;
2980 }
2981 
2982 
2983 
2984 
2985 #if 0
2986 /* @func embBtreeProbeCaches **************************************************
2987 **
2988 ** Close index files
2989 **
2990 ** @param [u] entry [EmbPBtreeEntry] database data
2991 **
2992 ** @return [AjBool] true on success
2993 **
2994 ** @release 6.0.0
2995 ** @@
2996 ******************************************************************************/
2997 
2998 AjBool embBtreeProbeCaches(EmbPBtreeEntry entry)
2999 {
3000     AjIList iter;
3001     EmbPBtreeField field;
3002 
3003     if(entry->do_id)
3004     {
3005 	ajBtreeProbePriArray(entry->idcache);
3006 	ajBtreeProbeSecArray(entry->idcache);
3007     }
3008 
3009     if(ajListGetLength(entry->fields))
3010     {
3011         iter = ajListIterNewread(entry->fields);
3012 
3013         while(!ajListIterDone(iter))
3014         {
3015             field = ajListIterGet(iter);
3016             if(ajStrMatchC(field->extension, "ac") ||
3017                ajStrMatchC(field->extension, "sv"))
3018             {
3019             	ajBtreeProbePriArray(field->cache);
3020                 ajBtreeProbeSecArray(field->cache);
3021             }
3022         }
3023         ajListIterDel(&iter);
3024     }
3025 
3026     return ajTrue;
3027 }
3028 
3029 #endif
3030 
3031 
3032 
3033 
3034 /* @func embBtreeDumpParameters ***********************************************
3035 **
3036 ** Write index parameter files
3037 **
3038 ** @param [u] entry [EmbPBtreeEntry] database data
3039 **
3040 ** @return [AjBool] true on success
3041 **
3042 ** @release 3.0.0
3043 ** @@
3044 ******************************************************************************/
3045 
embBtreeDumpParameters(EmbPBtreeEntry entry)3046 AjBool embBtreeDumpParameters(EmbPBtreeEntry entry)
3047 {
3048     AjIList iter;
3049     EmbPBtreeField field;
3050 
3051     if(entry->do_id)
3052 	ajBtreeWriteParamsS(entry->idcache, entry->dbname,
3053                             entry->idextension, entry->idirectory);
3054 
3055     if(ajListGetLength(entry->fields))
3056     {
3057         iter = ajListIterNewread(entry->fields);
3058 
3059         while(!ajListIterDone(iter))
3060         {
3061             field = ajListIterGet(iter);
3062             ajBtreeWriteParamsS(field->cache, entry->dbname,
3063                                 field->extension, entry->idirectory);
3064         }
3065         ajListIterDel(&iter);
3066     }
3067 
3068     return ajTrue;
3069 }
3070 
3071 
3072 
3073 
3074 /* @func embBtreeFieldNewC ****************************************************
3075 **
3076 ** Constructor for a Btree index field
3077 **
3078 ** @param [r] nametxt [const char*] Name
3079 ** @return [EmbPBtreeField] Btree field
3080 **
3081 ** @release 6.4.0
3082 ******************************************************************************/
3083 
embBtreeFieldNewC(const char * nametxt)3084 EmbPBtreeField embBtreeFieldNewC(const char* nametxt)
3085 {
3086     EmbPBtreeField ret = NULL;
3087 
3088     AJNEW0(ret);
3089 
3090     ajStrAssignC(&ret->name, nametxt);
3091     ajStrAssignS(&ret->extension, ajBtreeFieldGetExtensionC(nametxt));
3092     ret->secondary = ajBtreeFieldGetSecondaryC(nametxt);
3093 
3094     if(!ajStrGetLen(ret->extension))
3095     {
3096         ajStrAssignK(&ret->extension, 'x');
3097         ajStrAppendC(&ret->extension, nametxt);
3098     }
3099 
3100     ret->data = ajListNew();
3101 
3102     return ret;
3103 }
3104 
3105 
3106 
3107 
3108 /* @func embBtreeFieldNewS ****************************************************
3109 **
3110 ** Constructor for a Btree index field
3111 **
3112 ** @param [r] name [const AjPStr] Name
3113 ** @param [r] refcount [ajuint] Number of reference files
3114 ** @return [EmbPBtreeField] Btree field
3115 **
3116 ** @release 6.4.0
3117 ******************************************************************************/
3118 
embBtreeFieldNewS(const AjPStr name,ajuint refcount)3119 EmbPBtreeField embBtreeFieldNewS(const AjPStr name, ajuint refcount)
3120 {
3121     EmbPBtreeField ret = NULL;
3122 
3123     AJNEW0(ret);
3124 
3125     ret->name = ajStrNewS(name);
3126     ret->extension = ajStrNewS(ajBtreeFieldGetExtensionS(name));
3127     ret->secondary = ajBtreeFieldGetSecondaryS(name);
3128 
3129     if(!ajStrGetLen(ret->extension))
3130     {
3131         ajStrAssignK(&ret->extension, 'x');
3132         ajStrAppendS(&ret->extension, name);
3133     }
3134 
3135     ret->maxkey = ajStrNewC("");
3136 
3137     ret->data = ajListNew();
3138 
3139     ret->refcount = refcount;
3140 
3141     return ret;
3142 }
3143 
3144 
3145 
3146 
3147 /* @func embBtreeFieldDel *****************************************************
3148 **
3149 ** Destructor for a Btree index field
3150 **
3151 ** @param [d] Pthis [EmbPBtreeField*] Btree index field object
3152 ** @return [void]
3153 **
3154 ** @release 6.4.0
3155 ******************************************************************************/
3156 
embBtreeFieldDel(EmbPBtreeField * Pthis)3157 void embBtreeFieldDel(EmbPBtreeField *Pthis)
3158 {
3159     EmbPBtreeField  thys;
3160 
3161     if(!Pthis) return;
3162 
3163     thys = *Pthis;
3164 
3165     ajStrDel(&thys->name);
3166     ajStrDel(&thys->extension);
3167     ajStrDel(&thys->maxkey);
3168     ajListstrFree(&thys->data);
3169 
3170     while(thys->freecount)
3171         ajStrDel(&thys->freelist[--thys->freecount]);
3172 
3173     if(thys->freelist)
3174         AJFREE(thys->freelist);
3175 
3176     AJFREE(*Pthis);
3177     *Pthis = NULL;
3178 
3179     return;
3180 }
3181 
3182 
3183 
3184 
3185 
3186 /* @func embBtreeFieldSetCompressed *******************************************
3187 **
3188 ** Set database field to be compressed on writing
3189 **
3190 ** @param [u] field [EmbPBtreeField] Database field information
3191 **
3192 ** @return [void]
3193 **
3194 ** @release 6.4.0
3195 ** @@
3196 ******************************************************************************/
3197 
embBtreeFieldSetCompressed(EmbPBtreeField field)3198 void embBtreeFieldSetCompressed(EmbPBtreeField field)
3199 {
3200     field->compressed = ajTrue;
3201 
3202     return;
3203 }
3204 
3205 
3206 
3207 
3208 /* @func embBtreeFieldSetIdtype ***********************************************
3209 **
3210 ** Set database field to be identifier type (not secondary) on writing
3211 **
3212 ** @param [u] field [EmbPBtreeField] Database field information
3213 **
3214 ** @return [void]
3215 **
3216 ** @release 6.4.0
3217 ** @@
3218 ******************************************************************************/
3219 
embBtreeFieldSetIdtype(EmbPBtreeField field)3220 void embBtreeFieldSetIdtype(EmbPBtreeField field)
3221 {
3222     field->secondary = ajFalse;
3223 
3224     return;
3225 }
3226 
3227 
3228 
3229 
3230 /* @func embBtreeFieldSetSecondary ********************************************
3231 **
3232 ** Set database field to be secondary on writing
3233 **
3234 ** @param [u] field [EmbPBtreeField] Database field information
3235 **
3236 ** @return [void]
3237 **
3238 ** @release 6.4.0
3239 ** @@
3240 ******************************************************************************/
3241 
embBtreeFieldSetSecondary(EmbPBtreeField field)3242 void embBtreeFieldSetSecondary(EmbPBtreeField field)
3243 {
3244     field->secondary = ajTrue;
3245 
3246     return;
3247 }
3248 
3249 
3250 
3251 
3252 /* @func embIndexExit *********************************************************
3253 **
3254 ** Cleanup indexing internals on exit
3255 **
3256 ** @return [void]
3257 **
3258 ** @release 6.0.0
3259 ******************************************************************************/
3260 
embIndexExit(void)3261 void embIndexExit(void)
3262 {
3263     ajStrDel(&embindexLine);
3264     ajStrDel(&embindexToken);
3265     ajStrDel(&embindexTstr);
3266     ajStrDel(&embindexPrefix);
3267     ajStrDel(&embindexFormat);
3268     ajStrTokenDel(&embindexHandle);
3269 
3270     ajStrDel(&indexWord);
3271     ajBtreeIdDel(&indexId);
3272 
3273     return;
3274 }
3275