1 /* @source embindex ***********************************************************
2 **
3 ** B+ Tree Indexing plus Disc Cache.
4 **
5 ** @author Copyright (c) 2003 Alan Bleasby
6 ** @version $Revision: 1.57 $
7 ** @modified $Date: 2012/12/07 10:24:08 $ by $Author: rice $
8 ** @@
9 **
10 ** This library is free software; you can redistribute it and/or
11 ** modify it under the terms of the GNU Lesser General Public
12 ** License as published by the Free Software Foundation; either
13 ** version 2.1 of the License, or (at your option) any later version.
14 **
15 ** This library is distributed in the hope that it will be useful,
16 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 ** Lesser General Public License for more details.
19 **
20 ** You should have received a copy of the GNU Lesser General Public
21 ** License along with this library; if not, write to the Free Software
22 ** Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
23 ** MA 02110-1301, USA.
24 **
25 ******************************************************************************/
26
27
28 #include "ajlib.h"
29
30 #include "embindex.h"
31 #include "ajlist.h"
32 #include "ajindex.h"
33 #include "ajreg.h"
34 #include "ajarr.h"
35 #include "ajnam.h"
36
37 #include <errno.h>
38
39 #define BTENTRYFILE ".ent"
40 #define KWLIMIT 12
41
42
43 static AjPStr embindexLine = NULL;
44 static AjPStr embindexToken = NULL;
45 static AjPStr embindexTstr = NULL;
46 static AjPStr embindexPrefix = NULL;
47 static AjPStr embindexFormat = NULL;
48 static AjPStrTok embindexHandle = NULL;
49
50 static AjPStr indexWord = NULL;
51 static AjPBtId indexId = NULL;
52
53 static AjPFile btreeCreateFile(const AjPStr idirectory, const AjPStr dbname,
54 const char *add);
55
56
57
58
59 /* @func embBtreeIndexEntry ***************************************************
60 **
61 ** Add a term to an index entry cache
62 **
63 ** @param [u] entry [EmbPBtreeEntry] Entry with id
64 ** @param [r] dbno [ajuint] Database number for an identifier index field
65 ** @return [void]
66 **
67 ** @release 6.4.0
68 ** @@
69 ******************************************************************************/
70
embBtreeIndexEntry(EmbPBtreeEntry entry,ajuint dbno)71 void embBtreeIndexEntry(EmbPBtreeEntry entry,
72 ajuint dbno)
73 {
74 AjBool dotrunc = ajFalse;
75 ajuint iref;
76
77 if(!indexId)
78 indexId = ajBtreeIdNew(entry->refcount);
79
80 if(entry->do_id)
81 {
82 if(ajStrGetLen(entry->id) > entry->idlen)
83 {
84 dotrunc = ajTrue;
85 if(ajStrGetLen(entry->id) > entry->idmaxlen)
86 ajWarn("id '%S' too long (%u), truncating to idlen %d",
87 entry->id, ajStrGetLen(entry->id), entry->idlen);
88
89 }
90
91 if(ajStrGetLen(entry->id) > entry->idmaxlen)
92 {
93 entry->idmaxlen = ajStrGetLen(entry->id);
94 ajStrAssignS(&entry->maxid, entry->id);
95 }
96
97 if(dotrunc)
98 {
99 entry->idtruncate++;
100 ajStrTruncateLen(&entry->id,entry->idlen);
101 }
102
103 ajStrAssignS(&indexId->id,entry->id);
104 indexId->dbno = dbno;
105 indexId->dups = 0;
106 indexId->offset = entry->fpos;
107 indexId->refcount = entry->refcount;
108
109 if(entry->refcount)
110 {
111 for(iref=0; iref < entry->refcount; iref++)
112 indexId->refoffsets[iref] = entry->reffpos[iref];
113 }
114
115 ajBtreeIdentIndex(entry->idcache,indexId);
116 }
117
118 return;
119 }
120
121
122
123
124 /* @func embBtreeIndexField ***************************************************
125 **
126 ** Add a term to an index field cache
127 **
128 ** @param [u] field [EmbPBtreeField] Field with list of data
129 ** @param [r] entry [const EmbPBtreeEntry] Entry with id
130 ** @param [r] dbno [ajuint] Database number for an identifier index field
131 ** @return [void]
132 **
133 ** @release 6.4.0
134 ** @@
135 ******************************************************************************/
136
embBtreeIndexField(EmbPBtreeField field,const EmbPBtreeEntry entry,ajuint dbno)137 void embBtreeIndexField(EmbPBtreeField field,
138 const EmbPBtreeEntry entry,
139 ajuint dbno)
140 {
141 AjBool dotrunc = ajFalse;
142
143 ajuint iref;
144
145 if(!indexId)
146 indexId = ajBtreeIdNew(field->refcount);
147
148 while(ajListstrPop(field->data,&indexWord))
149 {
150 if(ajStrGetLen(indexWord) > field->len)
151 {
152 dotrunc = ajTrue;
153 if(ajStrGetLen(indexWord) > field->maxlen)
154 ajWarn("%S field token '%S' too long (%u), "
155 "truncating to %Slen %d",
156 field->name, indexWord, ajStrGetLen(indexWord),
157 field->name, field->len);
158 }
159
160 if(ajStrGetLen(indexWord) > field->maxlen)
161 {
162 field->maxlen = ajStrGetLen(indexWord);
163 ajStrAssignS(&field->maxkey, indexWord);
164 }
165
166 if(dotrunc)
167 {
168 field->truncate++;
169 ajStrTruncateLen(&indexWord,field->len);
170 }
171
172 if(field->secondary)
173 {
174 ajBtreeKeyIndex(field->cache, indexWord, entry->id);
175 }
176 else
177 {
178 ajStrAssignS(&indexId->id,indexWord);
179 indexId->dbno = dbno;
180 indexId->dups = 0;
181 indexId->offset = entry->fpos;
182
183 if(entry->refcount)
184 {
185 for(iref=0; iref < entry->refcount; iref++)
186 indexId->refoffsets[iref] = entry->reffpos[iref];
187 }
188
189 ajBtreeIdentIndex(field->cache,indexId);
190 }
191 }
192
193 return;
194 }
195
196
197
198
199 /* @func embBtreeIndexPrimary *************************************************
200 **
201 ** Add a term to an index field cache
202 **
203 ** @param [u] field [EmbPBtreeField] Field with list of data
204 ** @param [r] entry [const EmbPBtreeEntry] Entry with id
205 ** @param [r] dbno [ajuint] Database number for an identifier index field
206 ** @return [ajuint] Number of keys added
207 **
208 ** @release 6.5.0
209 ** @@
210 ******************************************************************************/
211
embBtreeIndexPrimary(EmbPBtreeField field,const EmbPBtreeEntry entry,ajuint dbno)212 ajuint embBtreeIndexPrimary(EmbPBtreeField field,
213 const EmbPBtreeEntry entry,
214 ajuint dbno)
215 {
216 ajuint ret = 0;
217 AjBool dotrunc = ajFalse;
218 ajuint iref = 0;
219
220 if(!indexId)
221 indexId = ajBtreeIdNew(entry->refcount);
222
223 while(embBtreeFieldGetdataS(field, &indexWord))
224 {
225 if(ajStrGetLen(indexWord) > field->len)
226 {
227 dotrunc = ajTrue;
228 if(ajStrGetLen(indexWord) > field->maxlen)
229 ajWarn("%S field token '%S' too long (%u), "
230 "truncating to %Slen %d",
231 field->name, indexWord,
232 ajStrGetLen(indexWord),
233 field->name, field->len);
234 }
235
236 if(ajStrGetLen(indexWord) > field->maxlen)
237 {
238 field->maxlen = ajStrGetLen(indexWord);
239 ajStrAssignS(&field->maxkey, indexWord);
240 }
241
242 if(dotrunc)
243 {
244 field->truncate++;
245 ajStrTruncateLen(&indexWord,field->len);
246 }
247
248 ajStrAssignS(&indexId->id, indexWord);
249 indexId->dbno = dbno;
250 indexId->dups = 0;
251 indexId->offset = entry->fpos;
252 indexId->refcount = entry->refcount;
253
254 if(entry->refcount)
255 {
256 for(iref=0; iref < entry->refcount; iref++)
257 indexId->refoffsets[iref] = entry->reffpos[iref];
258 }
259
260 ajBtreeIdentIndex(field->cache, indexId);
261 ret++;
262 }
263
264 return ret;
265 }
266
267
268
269
270 /* @func embBtreeIndexSecondary ***********************************************
271 **
272 ** Add a term to an index field cache
273 **
274 ** @param [u] field [EmbPBtreeField] Field with list of data
275 ** @param [r] entry [const EmbPBtreeEntry] Entry identifier
276 ** @return [ajuint] Number of keys added
277 **
278 ** @release 6.5.0
279 ** @@
280 ******************************************************************************/
281
embBtreeIndexSecondary(EmbPBtreeField field,const EmbPBtreeEntry entry)282 ajuint embBtreeIndexSecondary(EmbPBtreeField field,
283 const EmbPBtreeEntry entry)
284 {
285 ajuint ret = 0;
286 AjBool dotrunc = ajFalse;
287
288 while(embBtreeFieldGetdataS(field, &indexWord))
289 {
290 if(ajStrGetLen(indexWord) > field->len)
291 {
292 dotrunc = ajTrue;
293 if(ajStrGetLen(indexWord) > field->maxlen)
294 ajWarn("%S field token '%S' too long (%u), "
295 "truncating to %Slen %d",
296 field->name, indexWord,
297 ajStrGetLen(indexWord),
298 field->name, field->len);
299 }
300
301 if(ajStrGetLen(indexWord) > field->maxlen)
302 {
303 field->maxlen = ajStrGetLen(indexWord);
304 ajStrAssignS(&field->maxkey, indexWord);
305 }
306
307 if(dotrunc)
308 {
309 field->truncate++;
310 ajStrTruncateLen(&indexWord,field->len);
311 }
312
313 ajBtreeKeyIndex(field->cache, indexWord, entry->id);
314 ret++;
315 }
316
317 return ret;
318 }
319
320
321
322
323 /* @func embBtreeParseEntry ***************************************************
324 **
325 ** Parse an entry ID from an input record
326 **
327 ** @param [r]readline [const AjPStr] INput record
328 ** @param [u] regexp [AjPRegexp] Regular expression to extract tokens
329 ** @param [u] entry [EmbPBtreeEntry] Entry
330 ** @return [void]
331 **
332 ** @release 6.4.0
333 ** @@
334 ******************************************************************************/
335
embBtreeParseEntry(const AjPStr readline,AjPRegexp regexp,EmbPBtreeEntry entry)336 void embBtreeParseEntry(const AjPStr readline, AjPRegexp regexp,
337 EmbPBtreeEntry entry)
338 {
339 if(ajRegExec(regexp, readline))
340 {
341 ajRegSubI(regexp, 1, &entry->id);
342 }
343
344 return;
345 }
346
347
348
349
350 /* @func embBtreeParseField ***************************************************
351 **
352 ** Parse field tokens from an input record, iterating over a
353 ** regular expression.
354 **
355 ** @param [r]readline [const AjPStr] Input record
356 ** @param [u] regexp [AjPRegexp] Regular expression to extract tokens
357 ** @param [u] field [EmbPBtreeField] Field
358 ** @return [void]
359 **
360 ** @release 6.4.0
361 ** @@
362 ******************************************************************************/
363
embBtreeParseField(const AjPStr readline,AjPRegexp regexp,EmbPBtreeField field)364 void embBtreeParseField(const AjPStr readline, AjPRegexp regexp,
365 EmbPBtreeField field)
366 {
367 AjPStr tmpfd = NULL;
368
369 ajStrAssignS(&embindexLine,readline);
370
371 while(ajRegExec(regexp, embindexLine))
372 {
373 if(field->freecount)
374 tmpfd = field->freelist[--field->freecount];
375
376 ajRegSubI(regexp, 1, &tmpfd);
377 ajRegPost(regexp, &embindexLine);
378
379 if(!ajStrGetLen(tmpfd))
380 {
381 ajStrDel(&tmpfd);
382 continue;
383 }
384
385 ajListstrPushAppend(field->data,tmpfd);
386 ajDebug("++%S '%S'\n", field->name, tmpfd);
387 tmpfd = NULL;
388 }
389
390 return;
391 }
392
393
394
395
396 /* @func embBtreeFieldGetdataS *************************************************
397 **
398 ** Return the next field data value as a word
399 **
400 ** @param [u] field [EmbPBtreeField] Field
401 ** @param [w] Pstr [AjPStr*] Data value field
402 ** @return [AjBool] True if data was found
403 **
404 ** @release 6.5.0
405 ** @@
406 ******************************************************************************/
407
embBtreeFieldGetdataS(EmbPBtreeField field,AjPStr * Pstr)408 AjBool embBtreeFieldGetdataS(EmbPBtreeField field, AjPStr *Pstr)
409 {
410 AjPStr tmpstr = NULL;
411 ajuint oldfreesize = 0;
412
413 if(!ajListGetLength(field->data))
414 return ajFalse;
415
416 ajListPop(field->data,(void **)&tmpstr);
417 ajStrAssignS(Pstr, tmpstr);
418
419 if(!field->freelist)
420 {
421 field->freecount = 0;
422 field->freesize = 16;
423 AJCNEW(field->freelist, field->freesize);
424 }
425
426 if(field->freesize == field->freecount)
427 {
428 oldfreesize = field->freesize;
429 field->freesize *= 2;
430 AJCRESIZE0(field->freelist, oldfreesize, field->freesize);
431 }
432
433 field->freelist[field->freecount++] = tmpstr;
434 tmpstr = NULL;
435
436 return ajTrue;
437 }
438
439
440
441
442 /* @func embBtreeParseFieldSecond *********************************************
443 **
444 ** Parse field tokens from an input record using the first and second
445 ** matches to a regular expression.
446 **
447 ** @param [r] readline [const AjPStr] Input record
448 ** @param [u] regexp [AjPRegexp] Regular expression to extract tokens
449 ** @param [u] field [EmbPBtreeField] Field
450 ** @return [void]
451 **
452 ** @release 6.4.0
453 ** @@
454 ******************************************************************************/
455
embBtreeParseFieldSecond(const AjPStr readline,AjPRegexp regexp,EmbPBtreeField field)456 void embBtreeParseFieldSecond(const AjPStr readline, AjPRegexp regexp,
457 EmbPBtreeField field)
458 {
459 AjPStr tmpfd = NULL;
460
461 if(ajRegExec(regexp, readline))
462 {
463 if(field->freecount)
464 tmpfd = field->freelist[--field->freecount];
465
466 ajRegSubI(regexp, 1, &tmpfd);
467 ajRegSubI(regexp, 1, &tmpfd);
468
469 if(ajStrGetLen(tmpfd))
470 {
471 ajListstrPushAppend(field->data, tmpfd);
472 ajDebug("++%S '%S'\n", field->name, tmpfd);
473 tmpfd = NULL;
474 if(field->freecount)
475 tmpfd = field->freelist[--field->freecount];
476 }
477
478 ajRegSubI(regexp,2, &tmpfd);
479
480 if(ajStrGetLen(tmpfd))
481 {
482 ajListstrPushAppend(field->data,tmpfd);
483 ajDebug("++%S '%S'\n", field->name, tmpfd);
484 tmpfd = NULL;
485 }
486 }
487
488 ajStrDel(&tmpfd);
489
490 return;
491 }
492
493
494
495
496 /* @func embBtreeParseFieldThird **********************************************
497 **
498 ** Parse field tokens from an input record using the first and third
499 ** matches to a regular expression.
500 **
501 ** @param [r] readline [const AjPStr] Input record
502 ** @param [u] regexp [AjPRegexp] Regular expression to extract tokens
503 ** @param [u] field [EmbPBtreeField] Field
504 ** @return [void]
505 **
506 ** @release 6.4.0
507 ** @@
508 ******************************************************************************/
509
embBtreeParseFieldThird(const AjPStr readline,AjPRegexp regexp,EmbPBtreeField field)510 void embBtreeParseFieldThird(const AjPStr readline, AjPRegexp regexp,
511 EmbPBtreeField field)
512 {
513 AjPStr tmpfd = NULL;
514
515 if(ajRegExec(regexp, readline))
516 {
517 if(field->freecount)
518 tmpfd = field->freelist[--field->freecount];
519
520 ajRegSubI(regexp, 1, &tmpfd);
521
522 if(ajStrGetLen(tmpfd))
523 {
524 ajListstrPushAppend(field->data,tmpfd);
525 ajDebug("++%S '%S'\n", field->name, tmpfd);
526 tmpfd = NULL;
527 if(field->freecount)
528 tmpfd = field->freelist[--field->freecount];
529 }
530
531 ajRegSubI(regexp, 3, &tmpfd);
532
533 if(ajStrGetLen(tmpfd))
534 {
535 ajListstrPushAppend(field->data,tmpfd);
536 ajDebug("++%S '%S'\n", field->name, tmpfd);
537 tmpfd = NULL;
538 }
539 }
540
541 ajStrDel(&tmpfd);
542
543 return;
544 }
545
546
547
548
549 /* @func embBtreeParseFieldTrim ***********************************************
550 **
551 ** Parse field tokens from an input record and trim any trailing whitespace,
552 ** iterating over a regular expression.
553 **
554 ** @param [r]readline [const AjPStr] Input record
555 ** @param [u] regexp [AjPRegexp] Regular expression to extract tokens
556 ** @param [u] field [EmbPBtreeField] Field
557 ** @return [void]
558 **
559 ** @release 6.4.0
560 ** @@
561 ******************************************************************************/
562
embBtreeParseFieldTrim(const AjPStr readline,AjPRegexp regexp,EmbPBtreeField field)563 void embBtreeParseFieldTrim(const AjPStr readline, AjPRegexp regexp,
564 EmbPBtreeField field)
565 {
566 AjPStr tmpfd = NULL;
567
568 ajStrAssignS(&embindexLine,readline);
569
570 while(ajRegExec(regexp, embindexLine))
571 {
572 if(field->freecount)
573 tmpfd = field->freelist[--field->freecount];
574
575 ajRegSubI(regexp, 1, &tmpfd);
576 ajRegPost(regexp, &embindexLine);
577
578 ajStrTrimWhiteEnd(&tmpfd);
579
580 if(!ajStrGetLen(tmpfd))
581 {
582 ajStrDel(&tmpfd);
583 continue;
584 }
585
586 ajListstrPushAppend(field->data,tmpfd);
587 ajDebug("++%S '%S'\n", field->name, tmpfd);
588 tmpfd = NULL;
589 }
590
591 ajStrDel(&tmpfd);
592
593 return;
594 }
595
596
597
598
599 /* @func embBtreeReportEntry **************************************************
600 **
601 ** Report on indexing of entries
602 **
603 ** @param [u] outf [AjPFile] Output file
604 ** @param [r] entry [const EmbPBtreeEntry] Entry
605 **
606 ** @return [void]
607 **
608 ** @release 6.4.0
609 ******************************************************************************/
610
embBtreeReportEntry(AjPFile outf,const EmbPBtreeEntry entry)611 void embBtreeReportEntry(AjPFile outf, const EmbPBtreeEntry entry)
612 {
613 if(entry->idtruncate)
614 ajFmtPrintF(outf,
615 "Entry idlen %u truncated %u IDs. "
616 "Maximum ID length was %u for '%S'.\n",
617 entry->idlen, entry->idtruncate,
618 entry->idmaxlen, entry->maxid);
619 else
620 ajFmtPrintF(outf,
621 "Entry idlen %u OK. "
622 "Maximum ID length was %u for '%S'.\n",
623 entry->idlen,
624 entry->idmaxlen, entry->maxid);
625
626 if(entry->idmaxlen > entry->idlen)
627 {
628 ajWarn("Entry idlen %u truncated %u IDs. "
629 "Maximum ID length was %u for '%S'.",
630 entry->idlen, entry->idtruncate,
631 entry->idmaxlen, entry->maxid);
632 }
633
634 return;
635 }
636
637
638
639
640 /* @func embBtreeReportField **************************************************
641 **
642 ** Report on indexing of field
643 **
644 ** @param [u] outf [AjPFile] Output file
645 ** @param [r] field [const EmbPBtreeField] Field
646 **
647 ** @return [void]
648 **
649 ** @release 6.4.0
650 ******************************************************************************/
651
embBtreeReportField(AjPFile outf,const EmbPBtreeField field)652 void embBtreeReportField(AjPFile outf, const EmbPBtreeField field)
653 {
654 if(field->truncate)
655 ajFmtPrintF(outf,
656 "Field %S %Slen %u truncated %u terms. "
657 "Maximum %S term length was %u for '%S'.\n",
658 field->name, field->name, field->len, field->truncate,
659 field->name, field->maxlen, field->maxkey);
660 else
661 ajFmtPrintF(outf,
662 "Field %S %Slen %u OK. "
663 "Maximum %S term length was %u for '%S'.\n",
664 field->name, field->name, field->len,
665 field->name, field->maxlen, field->maxkey);
666
667 if(field->maxlen > field->len)
668 {
669 ajWarn("Field %S %Slen %u truncated %u terms. "
670 "Maximum %S term length was %u for '%S'.",
671 field->name, field->name, field->len, field->truncate,
672 field->name, field->maxlen, field->maxkey);
673 }
674
675 return;
676 }
677
678
679
680
681 /* @func embBtreeEmblKW *******************************************************
682 **
683 ** Extract keywords from an EMBL KW line
684 **
685 ** @param [r] kwline [const AjPStr] keyword line
686 ** @param [w] kwlist [AjPList] list of keywords
687 ** @param [r] maxlen [ajuint] max keyword length
688 **
689 ** @return [void]
690 **
691 ** @release 3.0.0
692 ** @@
693 ******************************************************************************/
694
embBtreeEmblKW(const AjPStr kwline,AjPList kwlist,ajuint maxlen)695 void embBtreeEmblKW(const AjPStr kwline, AjPList kwlist, ajuint maxlen)
696 {
697 AjPStr token = NULL;
698 AjPStr str = NULL;
699
700 ajStrAssignSubS(&embindexLine, kwline, 5, -1);
701
702 ajStrTokenAssignC(&embindexHandle,embindexLine,"\n\r;");
703
704 while(ajStrTokenNextParse(embindexHandle,&token))
705 {
706 ajStrTrimEndC(&token,".");
707 ajStrTrimWhite(&token);
708
709 if(ajStrGetLen(token))
710 {
711 if(maxlen)
712 {
713 if(ajStrGetLen(token) > maxlen)
714 ajStrAssignSubS(&str,token,0,maxlen-1);
715 else
716 ajStrAssignS(&str,token);
717
718 }
719 else
720 ajStrAssignS(&str,token);
721
722 ajListstrPush(kwlist, str);
723 str = NULL;
724 }
725 }
726
727 ajStrDel(&token);
728
729 return;
730 }
731
732
733
734
735 /* @func embBtreeEmblTX *******************************************************
736 **
737 ** Extract keywords from an EMBL OC or OS line
738 **
739 ** @param [r] txline [const AjPStr] taxonomy line
740 ** @param [w] txlist [AjPList] list of taxons
741 ** @param [r] maxlen [ajuint] max taxon length
742 **
743 ** @return [void]
744 **
745 ** @release 3.0.0
746 ** @@
747 ******************************************************************************/
748
embBtreeEmblTX(const AjPStr txline,AjPList txlist,ajuint maxlen)749 void embBtreeEmblTX(const AjPStr txline, AjPList txlist, ajuint maxlen)
750 {
751 AjPStr token = NULL;
752 AjPStr str = NULL;
753
754 ajStrAssignSubS(&embindexLine, txline, 5, -1);
755
756 ajStrTokenAssignC(&embindexHandle,embindexLine,"\n\r;()");
757
758 while(ajStrTokenNextParse(embindexHandle,&token))
759 {
760 ajStrTrimEndC(&token,".");
761 ajStrTrimEndC(&token," ");
762 ajStrTrimWhite(&token);
763
764 if(ajStrGetLen(token))
765 {
766 if(maxlen)
767 {
768 if(ajStrGetLen(token) > maxlen)
769 ajStrAssignSubS(&str,token,0,maxlen-1);
770 else
771 ajStrAssignS(&str,token);
772
773 }
774 else
775 ajStrAssignS(&str,token);
776
777 ajListstrPush(txlist, str);
778 str = NULL;
779 }
780 }
781
782 ajStrDel(&token);
783
784 return;
785 }
786
787
788
789
790 /* @func embBtreeEmblAC *******************************************************
791 **
792 ** Extract accession numbers from an EMBL AC line
793 **
794 ** @param [r] acline[const AjPStr] AC line
795 ** @param [w] aclist [AjPList] list of accession numbers
796 **
797 ** @return [void]
798 **
799 ** @release 3.0.0
800 ** @@
801 ******************************************************************************/
802
embBtreeEmblAC(const AjPStr acline,AjPList aclist)803 void embBtreeEmblAC(const AjPStr acline, AjPList aclist)
804 {
805 char *p = NULL;
806 char *q = NULL;
807 ajuint lo = 0;
808 ajuint hi = 0;
809 ajuint field = 0;
810 ajuint i;
811 AjPStr str = NULL;
812
813 ajStrAssignSubS(&embindexLine, acline, 5, -1);
814
815 ajStrTokenAssignC(&embindexHandle,embindexLine,"\n\r;");
816
817 while(ajStrTokenNextParse(embindexHandle,&embindexToken))
818 {
819 ajStrTrimWhite(&embindexToken);
820
821 if((p=strchr(MAJSTRGETPTR(embindexToken),(int)'-')))
822 {
823 q = p;
824
825 while(isdigit((int)*(--q)));
826
827 ++q;
828 ajStrAssignSubC(&embindexTstr,q,0,(ajuint)(p-q-1));
829 ajStrToUint(embindexTstr,&lo);
830 field = (ajuint) (p-q);
831 ajFmtPrintS(&embindexFormat,"%%S%%0%uu",field);
832
833 ++p;
834 q = p;
835
836 while(!isdigit((int)*q))
837 ++q;
838
839 sscanf(q,"%u",&hi);
840 ajStrAssignSubC(&embindexPrefix,p,0,(ajuint)(q-p-1));
841
842 for(i=lo;i<=hi;++i)
843 {
844 ajFmtPrintS(&str,MAJSTRGETPTR(embindexFormat),
845 embindexPrefix,i);
846 ajListstrPush(aclist, str);
847 str = NULL;
848 }
849 }
850 else
851 {
852 ajStrAssignS(&str,embindexToken);
853 ajListstrPush(aclist, str);
854 str = NULL;
855 }
856 }
857
858 return;
859 }
860
861
862
863
864 /* @func embBtreeEmblSV *******************************************************
865 **
866 ** Extract sequence version from an EMBL new format ID line
867 **
868 ** @param [r] idline[const AjPStr] AC line
869 ** @param [w] svlist [AjPList] list of accession numbers
870 **
871 ** @return [void]
872 **
873 ** @release 4.0.0
874 ** @@
875 ******************************************************************************/
876
embBtreeEmblSV(const AjPStr idline,AjPList svlist)877 void embBtreeEmblSV(const AjPStr idline, AjPList svlist)
878 {
879 AjPStr token = NULL;
880 AjPStr str = NULL;
881 AjPStr idstr = NULL;
882 AjPStr svstr = NULL;
883
884 ajStrAssignSubS(&embindexLine, idline, 5, -1);
885
886 ajStrTokenAssignC(&embindexHandle,embindexLine," \t\n\r;");
887
888 if(!ajStrTokenNextParse(embindexHandle,&idstr))
889 return;
890
891 if(!ajStrTokenNextParse(embindexHandle,&token))
892 return;
893
894 if(!ajStrTokenNextParse(embindexHandle,&svstr))
895 return;
896
897 if(!ajStrMatchC(token, "SV"))
898 return;
899
900 str = ajStrNewRes(MAJSTRGETLEN(idstr)+MAJSTRGETLEN(svstr)+2);
901
902 ajFmtPrintS(&str,"%S.%S", idstr, svstr);
903
904 ajListstrPush(svlist, str);
905 str = NULL;
906
907 ajStrDel(&idstr);
908 ajStrDel(&svstr);
909 ajStrDel(&token);
910
911 return;
912 }
913
914
915
916
917 /* @func embBtreeEmblDE *******************************************************
918 **
919 ** Extract words from an EMBL DE line
920 **
921 ** @param [r] deline[const AjPStr] description line
922 ** @param [w] delist [AjPList] list of descriptions
923 ** @param [r] maxlen [ajuint] max keyword length
924 **
925 ** @return [void]
926 **
927 ** @release 3.0.0
928 ** @@
929 ******************************************************************************/
930
embBtreeEmblDE(const AjPStr deline,AjPList delist,ajuint maxlen)931 void embBtreeEmblDE(const AjPStr deline, AjPList delist, ajuint maxlen)
932 {
933 AjPStr token = NULL;
934 AjPStr str = NULL;
935
936 ajStrAssignSubS(&embindexLine, deline, 5, -1);
937
938 ajStrTokenAssignC(&embindexHandle,embindexLine,"\n\r \t()");
939
940 while(ajStrTokenNextParse(embindexHandle,&token))
941 {
942 ajStrTrimWhite(&token);
943 ajStrTrimEndC(&token,".,:'\"");
944 ajStrTrimStartC(&token,"'\"");
945
946 if(ajStrGetLen(token))
947 {
948 if(maxlen)
949 {
950 if(ajStrGetLen(token) > maxlen)
951 ajStrAssignSubS(&str,token,0,maxlen-1);
952 else
953 ajStrAssignS(&str,token);
954
955 }
956 else
957 ajStrAssignS(&str,token);
958
959 ajListstrPush(delist, str);
960 str = NULL;
961 }
962 }
963
964 ajStrDel(&token);
965
966 return;
967 }
968
969
970
971
972 /* @func embBtreeParseEmblKw **************************************************
973 **
974 ** Extract keywords from an EMBL KW line
975 **
976 ** @param [r] readline [const AjPStr] keyword line
977 ** @param [u] field [EmbPBtreeField] list of descriptions
978 **
979 ** @return [void]
980 **
981 ** @release 6.5.0
982 ** @@
983 ******************************************************************************/
984
embBtreeParseEmblKw(const AjPStr readline,EmbPBtreeField field)985 void embBtreeParseEmblKw(const AjPStr readline, EmbPBtreeField field)
986 {
987 AjPStr str = NULL;
988
989 ajStrAssignSubS(&embindexLine, readline, 5, -1);
990
991 ajStrTokenAssignC(&embindexHandle,embindexLine,"\n\r;");
992
993 while(ajStrTokenNextParse(embindexHandle,&embindexToken))
994 {
995 ajStrTrimEndC(&embindexToken,".");
996 ajStrTrimWhite(&embindexToken);
997
998 if(ajStrGetLen(embindexToken))
999 {
1000 if(field->freecount)
1001 str = field->freelist[--field->freecount];
1002
1003 ajStrAssignS(&str,embindexToken);
1004
1005 ajListstrPushAppend(field->data, str);
1006 str = NULL;
1007 }
1008 }
1009
1010 return;
1011 }
1012
1013
1014
1015
1016 /* @func embBtreeParseEmblTx **************************************************
1017 **
1018 ** Extract keywords from an EMBL OC or OS line
1019 **
1020 ** @param [r] readline [const AjPStr] taxonomy line
1021 ** @param [u] field [EmbPBtreeField] list of descriptions
1022 **
1023 ** @return [void]
1024 **
1025 ** @release 6.5.0
1026 ** @@
1027 ******************************************************************************/
1028
embBtreeParseEmblTx(const AjPStr readline,EmbPBtreeField field)1029 void embBtreeParseEmblTx(const AjPStr readline, EmbPBtreeField field)
1030 {
1031 AjPStr str = NULL;
1032
1033 ajStrAssignSubS(&embindexLine, readline, 5, -1);
1034
1035 ajStrTokenAssignC(&embindexHandle,embindexLine,"\n\r;()");
1036
1037 while(ajStrTokenNextParse(embindexHandle,&embindexToken))
1038 {
1039 ajStrTrimEndC(&embindexToken,".");
1040 ajStrTrimEndC(&embindexToken," ");
1041 ajStrTrimWhite(&embindexToken);
1042
1043 if(ajStrGetLen(embindexToken))
1044 {
1045 if(field->freecount)
1046 str = field->freelist[--field->freecount];
1047
1048 ajStrAssignS(&str,embindexToken);
1049
1050 ajListstrPushAppend(field->data, str);
1051 str = NULL;
1052 }
1053 }
1054
1055 return;
1056 }
1057
1058
1059
1060
1061 /* @func embBtreeParseEmblAc **************************************************
1062 **
1063 ** Extract accession numbers from an EMBL AC line
1064 **
1065 ** @param [r] readline[const AjPStr] AC line
1066 ** @param [u] field [EmbPBtreeField] list of descriptions
1067 **
1068 ** @return [void]
1069 **
1070 ** @release 6.5.0
1071 ** @@
1072 ******************************************************************************/
1073
embBtreeParseEmblAc(const AjPStr readline,EmbPBtreeField field)1074 void embBtreeParseEmblAc(const AjPStr readline, EmbPBtreeField field)
1075 {
1076 char *p = NULL;
1077 char *q = NULL;
1078 ajuint lo = 0;
1079 ajuint hi = 0;
1080 ajuint ifield = 0;
1081 ajuint i;
1082 AjPStr str = NULL;
1083
1084 ajStrAssignSubS(&embindexLine, readline, 5, -1);
1085
1086 ajStrTokenAssignC(&embindexHandle,embindexLine,"\n\r;");
1087
1088 while(ajStrTokenNextParse(embindexHandle,&embindexToken))
1089 {
1090 ajStrTrimWhite(&embindexToken);
1091
1092 if(field->freecount)
1093 str = field->freelist[--field->freecount];
1094
1095 /* Check for EMBL accession range */
1096 if((p=strchr(MAJSTRGETPTR(embindexToken),(int)'-')))
1097 {
1098 q = p;
1099
1100 while(isdigit((int)*(--q)));
1101
1102 ++q;
1103 ajStrAssignSubC(&embindexTstr,q,0,(ajuint)(p-q-1));
1104 ajStrToUint(embindexTstr,&lo);
1105 ifield = (ajuint) (p-q);
1106 ajFmtPrintS(&embindexFormat,"%%S%%0%uu",ifield);
1107
1108 ++p;
1109 q = p;
1110
1111 while(!isdigit((int)*q))
1112 ++q;
1113
1114 sscanf(q,"%u",&hi);
1115 ajStrAssignSubC(&embindexPrefix,p,0,(ajuint)(q-p-1));
1116
1117 for(i=lo;i<=hi;++i)
1118 {
1119 ajFmtPrintS(&str,MAJSTRGETPTR(embindexFormat),
1120 embindexPrefix,i);
1121 ajListstrPushAppend(field->data, str);
1122 str = NULL;
1123 }
1124 }
1125 else /* simple accession number */
1126 {
1127 ajStrAssignS(&str,embindexToken);
1128 ajListstrPushAppend(field->data, str);
1129 str = NULL;
1130 }
1131 }
1132
1133 return;
1134 }
1135
1136
1137
1138
1139 /* @func embBtreeFindEmblAc ***************************************************
1140 **
1141 ** Returns first accession number from an EMBL/UniProt AC line
1142 **
1143 ** @param [r] readline[const AjPStr] AC line
1144 ** @param [u] field [EmbPBtreeField] list of descriptions
1145 ** @param [w] Pstr [AjPStr*] First accession
1146 **
1147 ** @return [void]
1148 **
1149 ** @release 6.5.0
1150 ** @@
1151 ******************************************************************************/
1152
embBtreeFindEmblAc(const AjPStr readline,EmbPBtreeField field,AjPStr * Pstr)1153 void embBtreeFindEmblAc(const AjPStr readline, EmbPBtreeField field,
1154 AjPStr *Pstr)
1155 {
1156 char *p = NULL;
1157 char *q = NULL;
1158 ajuint lo = 0;
1159 ajuint hi = 0;
1160 ajuint ifield = 0;
1161 ajuint i;
1162
1163 ajStrAssignSubS(&embindexLine, readline, 5, -1);
1164
1165 ajStrTokenAssignC(&embindexHandle,embindexLine," \t\n\r;");
1166
1167 while(ajStrTokenNextParse(embindexHandle,&embindexToken))
1168 {
1169 ajStrTrimWhite(&embindexToken);
1170
1171 /* Check for EMBL accession range */
1172 if((p=strchr(MAJSTRGETPTR(embindexToken),(int)'-')))
1173 {
1174 q = p;
1175
1176 while(isdigit((int)*(--q)));
1177
1178 ++q;
1179 ajStrAssignSubC(&embindexTstr,q,0,(ajuint)(p-q-1));
1180 ajStrToUint(embindexTstr,&lo);
1181 ifield = (ajuint) (p-q);
1182 ajFmtPrintS(&embindexFormat,"%%S%%0%uu",ifield);
1183
1184 ++p;
1185 q = p;
1186
1187 while(!isdigit((int)*q))
1188 ++q;
1189
1190 sscanf(q,"%u",&hi);
1191 ajStrAssignSubC(&embindexPrefix,p,0,(ajuint)(q-p-1));
1192
1193 for(i=lo;i<=hi;++i)
1194 {
1195 if(field->freecount)
1196 *Pstr = field->freelist[--field->freecount];
1197 ajFmtPrintS(Pstr,MAJSTRGETPTR(embindexFormat),
1198 embindexPrefix,i);
1199 return;
1200 }
1201 }
1202 else /* simple accession number */
1203 {
1204 if(field->freecount)
1205 *Pstr = field->freelist[--field->freecount];
1206 ajStrAssignS(Pstr,embindexToken);
1207 return;
1208 }
1209 }
1210
1211 return;
1212 }
1213
1214
1215
1216
1217 /* @func embBtreeParseEmblSv **************************************************
1218 **
1219 ** Extract sequence version from an EMBL new format ID line
1220 **
1221 ** @param [r] readline[const AjPStr] AC line
1222 ** @param [u] field [EmbPBtreeField] list of descriptions
1223 **
1224 ** @return [void]
1225 **
1226 ** @release 6.5.0
1227 ** @@
1228 ******************************************************************************/
1229
embBtreeParseEmblSv(const AjPStr readline,EmbPBtreeField field)1230 void embBtreeParseEmblSv(const AjPStr readline, EmbPBtreeField field)
1231 {
1232 AjPStr str = NULL;
1233
1234 ajStrAssignSubS(&embindexLine, readline, 5, -1);
1235
1236 ajStrTokenAssignC(&embindexHandle,embindexLine," \t\n\r;");
1237
1238 if(!ajStrTokenNextParse(embindexHandle,&embindexToken))
1239 return;
1240
1241 if(field->freecount)
1242 str = field->freelist[--field->freecount];
1243
1244 ajStrAssignS(&str, embindexToken);
1245 ajStrAppendK(&str, '.');
1246
1247 if(!ajStrTokenNextParse(embindexHandle,&embindexToken))
1248 return;
1249
1250 if(!ajStrMatchC(embindexToken, "SV"))
1251 return;
1252
1253 if(!ajStrTokenNextParse(embindexHandle,&embindexToken))
1254 return;
1255
1256 ajStrAppendS(&str, embindexToken);
1257
1258 ajListstrPushAppend(field->data, str);
1259 str = NULL;
1260
1261 return;
1262 }
1263
1264
1265
1266
1267 /* @func embBtreeParseEmblDe **************************************************
1268 **
1269 ** Extract words from an EMBL DE line
1270 **
1271 ** @param [r] readline[const AjPStr] description line
1272 ** @param [u] field [EmbPBtreeField] list of descriptions
1273 **
1274 ** @return [void]
1275 **
1276 ** @release 6.5.0
1277 ** @@
1278 ******************************************************************************/
1279
embBtreeParseEmblDe(const AjPStr readline,EmbPBtreeField field)1280 void embBtreeParseEmblDe(const AjPStr readline, EmbPBtreeField field)
1281 {
1282 AjPStr str = NULL;
1283
1284 ajStrAssignSubS(&embindexLine, readline, 5, -1);
1285
1286 ajStrTokenAssignC(&embindexHandle,embindexLine,"\n\r \t()");
1287
1288 while(ajStrTokenNextParse(embindexHandle,&embindexToken))
1289 {
1290 ajStrTrimWhite(&embindexToken);
1291 ajStrTrimEndC(&embindexToken,".,:;'\"");
1292 ajStrTrimStartC(&embindexToken,"'\"");
1293
1294 if(ajStrGetLen(embindexToken))
1295 {
1296 if(field->freecount)
1297 str = field->freelist[--field->freecount];
1298
1299 ajStrAssignS(&str, embindexToken);
1300
1301 ajListstrPushAppend(field->data, str);
1302 str = NULL;
1303 }
1304 }
1305
1306 return;
1307 }
1308
1309
1310
1311
1312 /* @func embBtreeParseGenbankAc ***********************************************
1313 **
1314 ** Extract accession numbers from a GenBank ACCESSION line
1315 **
1316 ** @param [r] readline [const AjPStr] AC line
1317 ** @param [u] field [EmbPBtreeField] Field with list of data
1318 **
1319 ** @return [void]
1320 **
1321 ** @release 6.5.0
1322 ** @@
1323 ******************************************************************************/
1324
embBtreeParseGenbankAc(const AjPStr readline,EmbPBtreeField field)1325 void embBtreeParseGenbankAc(const AjPStr readline, EmbPBtreeField field)
1326 {
1327 AjPStr str = NULL;
1328 char *p = NULL;
1329 char *q = NULL;
1330 ajuint lo = 0;
1331 ajuint hi = 0;
1332 ajuint ifield = 0;
1333 ajuint i;
1334
1335 ajStrAssignSubS(&embindexLine, readline, 12, -1);
1336
1337 ajStrTokenAssignC(&embindexHandle,embindexLine,"\n\r ");
1338
1339 while(ajStrTokenNextParse(embindexHandle,&embindexToken))
1340 {
1341 ajStrTrimWhite(&embindexToken);
1342
1343 /* check for accession number range */
1344 if((p=strchr(MAJSTRGETPTR(embindexToken),(int)'-')))
1345 {
1346 q = p;
1347
1348 while(isdigit((int)*(--q)));
1349
1350 ++q;
1351 ajStrAssignSubC(&embindexTstr,q,0,(ajuint)(p-q-1));
1352 ajStrToUint(embindexTstr,&lo);
1353 ifield = (ajuint) (p-q);
1354 ajFmtPrintS(&embindexFormat,"%%S%%0%uu",ifield);
1355
1356 ++p;
1357 q = p;
1358
1359 while(!isdigit((int)*q))
1360 ++q;
1361
1362 sscanf(q,"%u",&hi);
1363 ajStrAssignSubC(&embindexPrefix,p,0,(ajuint)(q-p-1));
1364
1365 for(i=lo;i<=hi;++i)
1366 {
1367 if(field->freecount)
1368 str = field->freelist[--field->freecount];
1369
1370 ajFmtPrintS(&str,MAJSTRGETPTR(embindexFormat),embindexPrefix,i);
1371 ajListstrPushAppend(field->data, str);
1372 str = NULL;
1373 }
1374 }
1375 else /* simple accession number */
1376 {
1377 if(field->freecount)
1378 str = field->freelist[--field->freecount];
1379
1380 ajStrAssignS(&str,embindexToken);
1381 ajListstrPushAppend(field->data, str);
1382 }
1383 }
1384
1385 return;
1386 }
1387
1388
1389
1390
1391 /* @func embBtreeParseGenbankDe ***********************************************
1392 **
1393 ** Extract keywords from a GenBank DESCRIPTION line
1394 **
1395 ** @param [r] readline [const AjPStr] AC line
1396 ** @param [u] field [EmbPBtreeField] Field with list of data
1397 **
1398 ** @return [void]
1399 **
1400 ** @release 6.5.0
1401 ** @@
1402 ******************************************************************************/
1403
embBtreeParseGenbankDe(const AjPStr readline,EmbPBtreeField field)1404 void embBtreeParseGenbankDe(const AjPStr readline, EmbPBtreeField field)
1405 {
1406 AjPStr str = NULL;
1407
1408 ajStrAssignSubS(&embindexLine, readline, 10, -1);
1409
1410 ajStrTokenAssignC(&embindexHandle,embindexLine,"\n\r \t()");
1411
1412 while(ajStrTokenNextParse(embindexHandle,&embindexToken))
1413 {
1414 ajStrTrimEndC(&embindexToken,".");
1415 ajStrTrimWhite(&embindexToken);
1416
1417 if(ajStrGetLen(embindexToken))
1418 {
1419 ajStrAssignS(&str,embindexToken);
1420
1421 ajListstrPushAppend(field->data, str);
1422 str = NULL;
1423 }
1424 }
1425
1426 return;
1427 }
1428
1429
1430
1431
1432 /* @func embBtreeParseGenbankKw ***********************************************
1433 **
1434 ** Extract keywords from a GenBank KEYWORDS line
1435 **
1436 ** @param [r] readline [const AjPStr] AC line
1437 ** @param [u] field [EmbPBtreeField] Field with list of data
1438 **
1439 ** @return [void]
1440 **
1441 ** @release 6.5.0
1442 ** @@
1443 ******************************************************************************/
1444
embBtreeParseGenbankKw(const AjPStr readline,EmbPBtreeField field)1445 void embBtreeParseGenbankKw(const AjPStr readline, EmbPBtreeField field)
1446 {
1447 AjPStr str = NULL;
1448
1449 ajStrAssignSubS(&embindexLine, readline, 8, -1);
1450
1451 ajStrTokenAssignC(&embindexHandle,embindexLine,"\n\r;");
1452
1453 while(ajStrTokenNextParse(embindexHandle,&embindexToken))
1454 {
1455 ajStrTrimEndC(&embindexToken,".");
1456 ajStrTrimWhite(&embindexToken);
1457
1458 if(ajStrGetLen(embindexToken))
1459 {
1460 if(field->freecount)
1461 str = field->freelist[--field->freecount];
1462
1463 ajStrAssignS(&str,embindexToken);
1464
1465 ajListstrPushAppend(field->data, str);
1466 str = NULL;
1467 }
1468 }
1469
1470 return;
1471 }
1472
1473
1474
1475
1476 /* @func embBtreeParseGenbankTx ***********************************************
1477 **
1478 ** Extract keywords from a GenBank ORGANISM line
1479 **
1480 ** @param [r] readline [const AjPStr] AC line
1481 ** @param [u] field [EmbPBtreeField] Field with list of data
1482 **
1483 ** @return [void]
1484 **
1485 ** @release 6.5.0
1486 ** @@
1487 ******************************************************************************/
1488
embBtreeParseGenbankTx(const AjPStr readline,EmbPBtreeField field)1489 void embBtreeParseGenbankTx(const AjPStr readline, EmbPBtreeField field)
1490 {
1491 AjPStr str = NULL;
1492
1493 ajStrAssignSubS(&embindexLine, readline, 9, -1);
1494
1495 ajStrTokenAssignC(&embindexHandle,embindexLine,"\n\r;()");
1496
1497 while(ajStrTokenNextParse(embindexHandle,&embindexToken))
1498 {
1499 ajStrTrimEndC(&embindexToken,".");
1500 ajStrTrimEndC(&embindexToken," ");
1501 ajStrTrimWhite(&embindexToken);
1502
1503 if(ajStrGetLen(embindexToken))
1504 {
1505 if(field->freecount)
1506 str = field->freelist[--field->freecount];
1507
1508 ajStrAssignS(&str,embindexToken);
1509
1510 ajListstrPushAppend(field->data, str);
1511 str = NULL;
1512 }
1513 }
1514
1515 return;
1516 }
1517
1518
1519
1520
1521 /* @func embBtreeGenBankAC ****************************************************
1522 **
1523 ** Extract accession numbers from a GenBank ACCESSION line
1524 **
1525 ** @param [r] acline[const AjPStr] AC line
1526 ** @param [w] aclist [AjPList] list of accession numbers
1527 **
1528 ** @return [void]
1529 **
1530 ** @release 3.0.0
1531 ** @@
1532 ******************************************************************************/
1533
embBtreeGenBankAC(const AjPStr acline,AjPList aclist)1534 void embBtreeGenBankAC(const AjPStr acline, AjPList aclist)
1535 {
1536 AjPStr token = NULL;
1537 AjPStr str = NULL;
1538 AjPStr tstr = NULL;
1539 AjPStr prefix = NULL;
1540 AjPStr format = NULL;
1541 char *p = NULL;
1542 char *q = NULL;
1543 ajuint lo = 0;
1544 ajuint hi = 0;
1545 ajuint field = 0;
1546 ajuint i;
1547
1548 ajStrAssignSubS(&embindexLine, acline, 12, -1);
1549
1550 ajStrTokenAssignC(&embindexHandle,embindexLine,"\n\r ");
1551
1552 while(ajStrTokenNextParse(embindexHandle,&token))
1553 {
1554 ajStrTrimWhite(&token);
1555
1556 if((p=strchr(MAJSTRGETPTR(token),(int)'-')))
1557 {
1558 q = p;
1559
1560 while(isdigit((int)*(--q)));
1561
1562 ++q;
1563 ajStrAssignSubC(&tstr,q,0,(ajuint)(p-q-1));
1564 ajStrToUint(tstr,&lo);
1565 field = (ajuint) (p-q);
1566 ajFmtPrintS(&format,"%%S%%0%uu",field);
1567
1568 ++p;
1569 q = p;
1570
1571 while(!isdigit((int)*q))
1572 ++q;
1573
1574 sscanf(q,"%u",&hi);
1575 ajStrAssignSubC(&prefix,p,0,(ajuint)(q-p-1));
1576
1577 for(i=lo;i<=hi;++i)
1578 {
1579 ajFmtPrintS(&str,MAJSTRGETPTR(format),prefix,i);
1580 ajListstrPushAppend(aclist, str);
1581 str = NULL;
1582 }
1583 }
1584 else
1585 {
1586 ajStrAssignS(&str,token);
1587 ajListstrPushAppend(aclist, str);
1588 str = NULL;
1589 }
1590 }
1591
1592 ajStrDel(&tstr);
1593 ajStrDel(&prefix);
1594 ajStrDel(&format);
1595 ajStrDel(&token);
1596
1597 return;
1598 }
1599
1600
1601
1602
1603 /* @func embBtreeGenBankKW ****************************************************
1604 **
1605 ** Extract keywords from a GenBank KEYWORDS line
1606 **
1607 ** @param [r] kwline[const AjPStr] keyword line
1608 ** @param [w] kwlist [AjPList] list of keywords
1609 ** @param [r] maxlen [ajuint] max keyword length
1610 **
1611 ** @return [void]
1612 **
1613 ** @release 3.0.0
1614 ** @@
1615 ******************************************************************************/
1616
embBtreeGenBankKW(const AjPStr kwline,AjPList kwlist,ajuint maxlen)1617 void embBtreeGenBankKW(const AjPStr kwline, AjPList kwlist, ajuint maxlen)
1618 {
1619 AjPStr token = NULL;
1620 AjPStr str = NULL;
1621
1622 ajStrAssignSubS(&embindexLine, kwline, 8, -1);
1623
1624 ajStrTokenAssignC(&embindexHandle,embindexLine,"\n\r;");
1625
1626 while(ajStrTokenNextParse(embindexHandle,&token))
1627 {
1628 ajStrTrimEndC(&token,".");
1629 ajStrTrimWhite(&token);
1630
1631 if(ajStrGetLen(token))
1632 {
1633 if(maxlen)
1634 {
1635 if(ajStrGetLen(token) > maxlen)
1636 ajStrAssignSubS(&str,token,0,maxlen-1);
1637 else
1638 ajStrAssignS(&str,token);
1639
1640 }
1641 else
1642 ajStrAssignS(&str,token);
1643
1644 ajListstrPushAppend(kwlist, str);
1645 str = NULL;
1646 }
1647 }
1648
1649 ajStrDel(&token);
1650
1651 return;
1652 }
1653
1654
1655
1656
1657 /* @func embBtreeGenBankDE ****************************************************
1658 **
1659 ** Extract keywords from a GenBank DESCRIPTION line
1660 **
1661 ** @param [r] kwline[const AjPStr] keyword line
1662 ** @param [w] kwlist [AjPList] list of keywords
1663 ** @param [r] maxlen [ajuint] max keyword length
1664 **
1665 ** @return [void]
1666 **
1667 ** @release 3.0.0
1668 ** @@
1669 ******************************************************************************/
1670
embBtreeGenBankDE(const AjPStr kwline,AjPList kwlist,ajuint maxlen)1671 void embBtreeGenBankDE(const AjPStr kwline, AjPList kwlist, ajuint maxlen)
1672 {
1673 AjPStr token = NULL;
1674 AjPStr str = NULL;
1675
1676 ajStrAssignSubS(&embindexLine, kwline, 10, -1);
1677
1678 ajStrTokenAssignC(&embindexHandle,embindexLine,"\n\r \t()");
1679
1680 while(ajStrTokenNextParse(embindexHandle,&token))
1681 {
1682 ajStrTrimEndC(&token,".");
1683 ajStrTrimWhite(&token);
1684
1685 if(ajStrGetLen(token))
1686 {
1687 if(maxlen)
1688 {
1689 if(ajStrGetLen(token) > maxlen)
1690 ajStrAssignSubS(&str,token,0,maxlen-1);
1691 else
1692 ajStrAssignS(&str,token);
1693
1694 }
1695 else
1696 ajStrAssignS(&str,token);
1697
1698 ajListstrPushAppend(kwlist, str);
1699 str = NULL;
1700 }
1701 }
1702
1703 ajStrDel(&token);
1704
1705 return;
1706 }
1707
1708
1709
1710
1711 /* @func embBtreeGenBankTX ****************************************************
1712 **
1713 ** Extract keywords from a GenBank ORGANISM line
1714 **
1715 ** @param [r] kwline[const AjPStr] keyword line
1716 ** @param [w] kwlist [AjPList] list of keywords
1717 ** @param [r] maxlen [ajuint] max keyword length
1718 **
1719 ** @return [void]
1720 **
1721 ** @release 3.0.0
1722 ** @@
1723 ******************************************************************************/
1724
embBtreeGenBankTX(const AjPStr kwline,AjPList kwlist,ajuint maxlen)1725 void embBtreeGenBankTX(const AjPStr kwline, AjPList kwlist, ajuint maxlen)
1726 {
1727 AjPStr token = NULL;
1728 AjPStr str = NULL;
1729
1730 ajStrAssignSubS(&embindexLine, kwline, 9, -1);
1731
1732 ajStrTokenAssignC(&embindexHandle,embindexLine,"\n\r;()");
1733
1734 while(ajStrTokenNextParse(embindexHandle,&token))
1735 {
1736 ajStrTrimEndC(&token,".");
1737 ajStrTrimEndC(&token," ");
1738 ajStrTrimWhite(&token);
1739
1740 if(ajStrGetLen(token))
1741 {
1742 if(maxlen)
1743 {
1744 if(ajStrGetLen(token) > maxlen)
1745 ajStrAssignSubS(&str,token,0,maxlen-1);
1746 else
1747 ajStrAssignS(&str,token);
1748
1749 }
1750 else
1751 ajStrAssignS(&str,token);
1752
1753 ajListstrPushAppend(kwlist, str);
1754 str = NULL;
1755 }
1756 }
1757
1758 ajStrDel(&token);
1759
1760 return;
1761 }
1762
1763
1764
1765
1766 /* @func embBtreeParseFastaDe *************************************************
1767 **
1768 ** Extract keywords from a Fasta description
1769 **
1770 ** @param [r] readline [const AjPStr] keyword line
1771 ** @param [u] field [EmbPBtreeField] Field with list of data
1772 **
1773 ** @return [void]
1774 **
1775 ** @release 6.5.0
1776 ** @@
1777 ******************************************************************************/
1778
embBtreeParseFastaDe(const AjPStr readline,EmbPBtreeField field)1779 void embBtreeParseFastaDe(const AjPStr readline, EmbPBtreeField field)
1780 {
1781 AjPStr str = NULL;
1782
1783 ajStrTokenAssignC(&embindexHandle,readline,"\n\r ");
1784
1785 while(ajStrTokenNextParse(embindexHandle,&embindexToken))
1786 {
1787 ajStrTrimEndC(&embindexToken,".");
1788 ajStrTrimWhite(&embindexToken);
1789
1790 if(ajStrGetLen(embindexToken))
1791 {
1792 if(field->freecount)
1793 str = field->freelist[--field->freecount];
1794
1795 ajStrAssignS(&str,embindexToken);
1796
1797 ajListstrPushAppend(field->data, str);
1798 str = NULL;
1799 }
1800 }
1801
1802 return;
1803 }
1804
1805
1806
1807
1808 /* @func embBtreeParseFastaAc *************************************************
1809 **
1810 ** Extract sequence version keywords from a Fasta description
1811 **
1812 ** @param [r] readline [const AjPStr] keyword line
1813 ** @param [u] field [EmbPBtreeField] Field with list of data
1814 **
1815 ** @return [void]
1816 **
1817 ** @release 6.5.0
1818 ** @@
1819 ******************************************************************************/
1820
embBtreeParseFastaAc(const AjPStr readline,EmbPBtreeField field)1821 void embBtreeParseFastaAc(const AjPStr readline, EmbPBtreeField field)
1822 {
1823 AjPStr str = NULL;
1824
1825 ajStrTokenAssignC(&embindexHandle,readline,"\n\r ");
1826
1827 while(ajStrTokenNextParse(embindexHandle,&embindexToken))
1828 {
1829 ajStrTrimEndC(&embindexToken,".");
1830 ajStrTrimWhite(&embindexToken);
1831
1832 if(ajStrGetLen(embindexToken))
1833 {
1834 if(field->freecount)
1835 str = field->freelist[--field->freecount];
1836
1837 ajStrAssignS(&str,embindexToken);
1838
1839 ajListstrPushAppend(field->data, str);
1840 str = NULL;
1841 }
1842 }
1843
1844 return;
1845 }
1846
1847
1848
1849
1850 /* @func embBtreeFastaDE ******************************************************
1851 **
1852 ** Extract keywords from a Fasta description
1853 **
1854 ** @param [r] kwline[const AjPStr] keyword line
1855 ** @param [w] kwlist [AjPList] list of keywords
1856 ** @param [r] maxlen [ajuint] max keyword length
1857 **
1858 ** @return [void]
1859 **
1860 ** @release 3.0.0
1861 ** @@
1862 ******************************************************************************/
1863
embBtreeFastaDE(const AjPStr kwline,AjPList kwlist,ajuint maxlen)1864 void embBtreeFastaDE(const AjPStr kwline, AjPList kwlist, ajuint maxlen)
1865 {
1866 AjPStr token = NULL;
1867 AjPStr str = NULL;
1868
1869 ajStrTokenAssignC(&embindexHandle,kwline,"\n\r ");
1870
1871 while(ajStrTokenNextParse(embindexHandle,&token))
1872 {
1873 ajStrTrimEndC(&token,".");
1874 ajStrTrimWhite(&token);
1875
1876 if(ajStrGetLen(token))
1877 {
1878 str = ajStrNew();
1879
1880 if(maxlen)
1881 {
1882 if(ajStrGetLen(token) > maxlen)
1883 ajStrAssignSubS(&str,token,0,maxlen-1);
1884 else
1885 ajStrAssignS(&str,token);
1886
1887 }
1888 else
1889 ajStrAssignS(&str,token);
1890
1891 ajListstrPushAppend(kwlist, str);
1892 str = NULL;
1893 }
1894 }
1895
1896 ajStrDel(&token);
1897
1898 return;
1899 }
1900
1901
1902
1903
1904 /* @func embBtreeParseFastaSv *************************************************
1905 **
1906 ** Extract sequence version keywords from a Fasta description
1907 **
1908 ** @param [r] readline [const AjPStr] keyword line
1909 ** @param [u] field [EmbPBtreeField] Field with list of data
1910 **
1911 ** @return [void]
1912 **
1913 ** @release 6.5.0
1914 ** @@
1915 ******************************************************************************/
1916
embBtreeParseFastaSv(const AjPStr readline,EmbPBtreeField field)1917 void embBtreeParseFastaSv(const AjPStr readline, EmbPBtreeField field)
1918 {
1919 AjPStr str = NULL;
1920
1921 ajStrTokenAssignC(&embindexHandle,readline,"\n\r ");
1922
1923 while(ajStrTokenNextParse(embindexHandle,&embindexToken))
1924 {
1925 ajStrTrimEndC(&embindexToken,".");
1926 ajStrTrimWhite(&embindexToken);
1927
1928 if(ajStrGetLen(embindexToken))
1929 {
1930 if(field->freecount)
1931 str = field->freelist[--field->freecount];
1932
1933 ajStrAssignS(&str,embindexToken);
1934
1935 ajListstrPushAppend(field->data, str);
1936 str = NULL;
1937 }
1938 }
1939
1940 return;
1941 }
1942
1943
1944
1945
1946 /* @func embBtreeFastaSV ******************************************************
1947 **
1948 ** Extract sequence version keywords from a Fasta description
1949 **
1950 ** @param [r] kwline[const AjPStr] sequence version or GI string
1951 ** @param [w] kwlist [AjPList] list of sequence versions
1952 ** @param [r] maxlen [ajuint] max sequence version length
1953 **
1954 ** @return [void]
1955 **
1956 ** @release 6.0.0
1957 ** @@
1958 ******************************************************************************/
1959
embBtreeFastaSV(const AjPStr kwline,AjPList kwlist,ajuint maxlen)1960 void embBtreeFastaSV(const AjPStr kwline, AjPList kwlist, ajuint maxlen)
1961 {
1962 AjPStr token = NULL;
1963 AjPStr str = NULL;
1964
1965 ajStrTokenAssignC(&embindexHandle,kwline,"\n ");
1966
1967 while(ajStrTokenNextParse(embindexHandle,&token))
1968 {
1969 ajStrTrimEndC(&token,".");
1970 ajStrTrimWhite(&token);
1971
1972 if(ajStrGetLen(token))
1973 {
1974 if(maxlen)
1975 {
1976 if(ajStrGetLen(token) > maxlen)
1977 ajStrAssignSubS(&str,token,0,maxlen-1);
1978 else
1979 ajStrAssignS(&str,token);
1980 }
1981 else
1982 ajStrAssignS(&str,token);
1983
1984 ajListstrPushAppend(kwlist, str);
1985 str = NULL;
1986 }
1987 }
1988
1989 ajStrDel(&token);
1990
1991 return;
1992 }
1993
1994
1995
1996
1997 /* @func embBtreeReadDir ******************************************************
1998 **
1999 ** Read files to index
2000 **
2001 ** @param [w] filelist [AjPStr**] list of files to read
2002 ** @param [r] fdirectory [const AjPStr] Directory to scan
2003 ** @param [r] files [const AjPStr] Filename to search for (or NULL)
2004 ** @param [r] exclude [const AjPStr] list of files to exclude
2005 **
2006 ** @return [ajuint] number of matching files
2007 **
2008 ** @release 2.8.0
2009 ** @@
2010 ******************************************************************************/
2011
embBtreeReadDir(AjPStr ** filelist,const AjPStr fdirectory,const AjPStr files,const AjPStr exclude)2012 ajuint embBtreeReadDir(AjPStr **filelist, const AjPStr fdirectory,
2013 const AjPStr files, const AjPStr exclude)
2014 {
2015 AjPList lfiles = NULL;
2016 ajuint nfiles;
2017 ajuint nremove;
2018 ajuint i;
2019 ajuint j;
2020 AjPStr file = NULL;
2021 AjPStr *removelist = NULL;
2022
2023 /* ajDebug("In ajBtreeReadDir\n"); */
2024
2025 lfiles = ajListNew();
2026 nfiles = ajFilelistAddPathWild(lfiles, fdirectory, files);
2027
2028 nremove = ajArrCommaList(exclude,&removelist);
2029
2030 for(i=0;i<nfiles;++i)
2031 {
2032 ajListPop(lfiles,(void **)&file);
2033 ajFilenameTrimPath(&file);
2034
2035 for(j=0;j<nremove && ! ajStrMatchWildS(file,removelist[j]);++j);
2036
2037 if(j == nremove)
2038 ajListstrPushAppend(lfiles, file);
2039 }
2040
2041 nfiles = (ajuint) ajListToarray(lfiles,(void ***)&(*filelist));
2042 ajListFree(&lfiles);
2043
2044 for(i=0; i<nremove;++i)
2045 ajStrDel(&removelist[i]);
2046
2047 AJFREE(removelist);
2048
2049 return nfiles;
2050 }
2051
2052
2053
2054
2055 /* @funcstatic btreeCreateFile ************************************************
2056 **
2057 ** Open B+tree file for writing
2058 **
2059 ** @param [r] idirectory [const AjPStr] Directory for index files
2060 ** @param [r] dbname [const AjPStr] name of database
2061 ** @param [r] add [const char *] type of file
2062 **
2063 ** @return [AjPFile] opened file
2064 **
2065 ** @release 2.9.0
2066 ** @@
2067 ******************************************************************************/
2068
btreeCreateFile(const AjPStr idirectory,const AjPStr dbname,const char * add)2069 static AjPFile btreeCreateFile(const AjPStr idirectory, const AjPStr dbname,
2070 const char *add)
2071 {
2072 AjPStr filename = NULL;
2073 AjPFile fp = NULL;
2074
2075 /* ajDebug("In btreeCreateFile\n"); */
2076
2077 filename = ajStrNew();
2078
2079 if(!ajStrGetLen(idirectory))
2080 ajFmtPrintS(&filename,"%S%s",dbname,add);
2081 else
2082 ajFmtPrintS(&filename,"%S%s%S%s",idirectory,SLASH_STRING,dbname,add);
2083
2084 fp = ajFileNewOutNameS(filename);
2085
2086 ajStrDel(&filename);
2087
2088 return fp;
2089 }
2090
2091
2092
2093
2094 /* @func embBtreeEntryNew *****************************************************
2095 **
2096 ** Construct a database entry object
2097 **
2098 ** @param [r] refcount [ajuint] Number of reference file(s) per entry
2099 ** @return [EmbPBtreeEntry] db entry object pointer
2100 **
2101 ** @release 3.0.0
2102 ** @@
2103 ******************************************************************************/
2104
embBtreeEntryNew(ajuint refcount)2105 EmbPBtreeEntry embBtreeEntryNew(ajuint refcount)
2106 {
2107 EmbPBtreeEntry thys;
2108 ajuint iref;
2109
2110 AJNEW0(thys);
2111
2112 thys->do_id = ajFalse;
2113
2114 thys->dbname = ajStrNew();
2115 thys->dbrs = ajStrNew();
2116 thys->date = ajStrNew();
2117 thys->release = ajStrNew();
2118 thys->dbtype = ajStrNew();
2119
2120 thys->directory = ajStrNew();
2121 thys->idirectory = ajStrNew();
2122 thys->idextension = ajStrNew();
2123 thys->maxid = ajStrNew();
2124
2125 thys->files = ajListNew();
2126
2127 thys->id = ajStrNew();
2128
2129 if(refcount)
2130 {
2131 thys->refcount = refcount;
2132
2133 AJCNEW0(thys->reffpos, refcount);
2134 AJCNEW0(thys->reffiles, refcount);
2135
2136 for(iref=0; iref < refcount; iref++)
2137 thys->reffiles[iref] = ajListNew();
2138 }
2139
2140 return thys;
2141 }
2142
2143
2144
2145
2146 /* @func embBtreeEntryDel *****************************************************
2147 **
2148 ** Delete a database entry object
2149 **
2150 ** @param [d] pthis [EmbPBtreeEntry*] db entry object pointer
2151 ** @return [void]
2152 **
2153 ** @release 3.0.0
2154 ** @@
2155 ******************************************************************************/
2156
embBtreeEntryDel(EmbPBtreeEntry * pthis)2157 void embBtreeEntryDel(EmbPBtreeEntry* pthis)
2158 {
2159 EmbPBtreeEntry thys;
2160 EmbPBtreeField field;
2161 AjPStr tmpstr = NULL;
2162
2163 ajuint iref;
2164
2165 thys = *pthis;
2166
2167 ajStrDel(&thys->dbname);
2168 ajStrDel(&thys->idextension);
2169 ajStrDel(&thys->maxid);
2170 ajStrDel(&thys->dbrs);
2171 ajStrDel(&thys->date);
2172 ajStrDel(&thys->release);
2173 ajStrDel(&thys->dbtype);
2174
2175 ajStrDel(&thys->directory);
2176 ajStrDel(&thys->idirectory);
2177
2178
2179 while(ajListPop(thys->files,(void **)&tmpstr))
2180 ajStrDel(&tmpstr);
2181
2182 ajListFree(&thys->files);
2183
2184 if(thys->reffiles)
2185 {
2186 for(iref=0; iref < thys->refcount; iref++)
2187 {
2188 while(ajListPop(thys->reffiles[iref],(void **)&tmpstr))
2189 ajStrDel(&tmpstr);
2190
2191 ajListFree(&thys->reffiles[iref]);
2192 }
2193
2194 AJFREE(thys->reffiles);
2195 }
2196
2197 if(thys->reffpos)
2198 AJFREE(thys->reffpos);
2199
2200 while(ajListPop(thys->fields,(void **)&field))
2201 embBtreeFieldDel(&field);
2202
2203 ajListFree(&thys->fields);
2204
2205 ajStrDel(&thys->id);
2206
2207 AJFREE(*pthis);
2208
2209 return;
2210 }
2211
2212
2213
2214
2215 /* @func embBtreeEntrySetCompressed *******************************************
2216 **
2217 ** Set database entry to be compressed on writing
2218 **
2219 ** @param [u] entry [EmbPBtreeEntry] Database entry information
2220 **
2221 ** @return [void]
2222 **
2223 ** @release 6.4.0
2224 ** @@
2225 ******************************************************************************/
2226
embBtreeEntrySetCompressed(EmbPBtreeEntry entry)2227 void embBtreeEntrySetCompressed(EmbPBtreeEntry entry)
2228 {
2229 entry->compressed = ajTrue;
2230
2231 return;
2232 }
2233
2234
2235
2236
2237 /* @func embBtreeSetFields ****************************************************
2238 **
2239 ** Set database fields to index
2240 **
2241 ** @param [w] entry [EmbPBtreeEntry] Database entry information
2242 ** @param [r] fields [AjPStr const *] user specified fields
2243 **
2244 ** @return [ajuint] number of fields set
2245 **
2246 ** @release 3.0.0
2247 ** @@
2248 ******************************************************************************/
2249
embBtreeSetFields(EmbPBtreeEntry entry,AjPStr const * fields)2250 ajuint embBtreeSetFields(EmbPBtreeEntry entry, AjPStr const *fields)
2251 {
2252 ajuint nfields;
2253 EmbPBtreeField field = NULL;
2254
2255 nfields = 0;
2256
2257 if(!entry->fields)
2258 entry->fields = ajListNew();
2259
2260 while(fields[nfields])
2261 {
2262 if(ajStrMatchCaseC(fields[nfields], "id"))
2263 entry->do_id = ajTrue;
2264
2265 else
2266 {
2267 field = embBtreeFieldNewS(fields[nfields], entry->refcount);
2268 ajListPushAppend(entry->fields, field);
2269 field = NULL;
2270 }
2271 ++nfields;
2272 }
2273
2274 return nfields;
2275 }
2276
2277
2278
2279
2280 /* @func embBtreeSetDbInfo ****************************************************
2281 **
2282 ** Set general database information
2283 **
2284 ** @param [w] entry [EmbPBtreeEntry] Database entry information
2285 ** @param [r] name [const AjPStr] user specified name
2286 ** @param [r] dbrs [const AjPStr] user specified resource
2287 ** @param [r] date [const AjPStr] user specified date
2288 ** @param [r] release [const AjPStr] user specified release
2289 ** @param [r] type [const AjPStr] user specified type
2290 ** @param [r] directory [const AjPStr] user specified directory
2291 ** @param [r] idirectory [const AjPStr] user specified index directory
2292 **
2293 ** @return [void]
2294 **
2295 ** @release 3.0.0
2296 ** @@
2297 ******************************************************************************/
2298
embBtreeSetDbInfo(EmbPBtreeEntry entry,const AjPStr name,const AjPStr dbrs,const AjPStr date,const AjPStr release,const AjPStr type,const AjPStr directory,const AjPStr idirectory)2299 void embBtreeSetDbInfo(EmbPBtreeEntry entry, const AjPStr name,
2300 const AjPStr dbrs,
2301 const AjPStr date, const AjPStr release,
2302 const AjPStr type, const AjPStr directory,
2303 const AjPStr idirectory)
2304 {
2305 ajStrAssignS(&entry->dbname, name);
2306 ajStrAssignC(&entry->idextension, "xid");
2307 ajStrAssignS(&entry->date, date);
2308 ajStrAssignS(&entry->release, release);
2309 ajStrAssignS(&entry->dbtype, type);
2310 ajStrAssignS(&entry->dbrs, dbrs);
2311
2312 ajStrAssignS(&entry->directory,directory);
2313 ajStrAssignS(&entry->idirectory,idirectory);
2314
2315 return;
2316 }
2317
2318
2319
2320
2321 /* @func embBtreeGetFieldC ****************************************************
2322 **
2323 ** Set database fields to index
2324 **
2325 ** @param [w] entry [EmbPBtreeEntry] Database entry information
2326 ** @param [r] nametxt [const char*] Field name
2327 **
2328 ** @return [EmbPBtreeField] Btree index field definition
2329 **
2330 ** @release 6.4.0
2331 ** @@
2332 ******************************************************************************/
2333
embBtreeGetFieldC(EmbPBtreeEntry entry,const char * nametxt)2334 EmbPBtreeField embBtreeGetFieldC(EmbPBtreeEntry entry, const char * nametxt)
2335 {
2336 EmbPBtreeField ret = NULL;
2337 EmbPBtreeField field = NULL;
2338
2339 AjIList iter;
2340
2341 if(!ajListGetLength(entry->fields))
2342 return NULL;
2343
2344 iter = ajListIterNewread(entry->fields);
2345 while(!ajListIterDone(iter))
2346 {
2347 field = ajListIterGet(iter);
2348 if(ajStrMatchC(field->name, nametxt))
2349 {
2350 ret = field;
2351 break;
2352 }
2353 }
2354
2355 ajListIterDel(&iter);
2356
2357 return ret;
2358 }
2359
2360
2361
2362
2363 /* @func embBtreeGetFieldS ****************************************************
2364 **
2365 ** Set database fields to index
2366 **
2367 ** @param [w] entry [EmbPBtreeEntry] Database entry information
2368 ** @param [r] name [const AjPStr] Field name
2369 **
2370 ** @return [EmbPBtreeField] Btree index field definition
2371 **
2372 ** @release 6.4.0
2373 ** @@
2374 ******************************************************************************/
2375
embBtreeGetFieldS(EmbPBtreeEntry entry,const AjPStr name)2376 EmbPBtreeField embBtreeGetFieldS(EmbPBtreeEntry entry, const AjPStr name)
2377 {
2378 EmbPBtreeField ret = NULL;
2379 EmbPBtreeField field = NULL;
2380
2381 AjIList iter;
2382
2383 if(!ajListGetLength(entry->fields))
2384 return NULL;
2385
2386 iter = ajListIterNewread(entry->fields);
2387 while(!ajListIterDone(iter))
2388 {
2389 field = ajListIterGet(iter);
2390 if(ajStrMatchS(field->name, name))
2391 {
2392 ret = field;
2393 break;
2394 }
2395 }
2396
2397 ajListIterDel(&iter);
2398
2399 return ret;
2400 }
2401
2402
2403
2404
2405 /* @func embBtreeGetFiles *****************************************************
2406 **
2407 ** Read files to index
2408 **
2409 ** @param [u] entry [EmbPBtreeEntry] list of files to read
2410 ** @param [r] fdirectory [const AjPStr] Directory to scan
2411 ** @param [r] files [const AjPStr] Filename to search for (or NULL)
2412 ** @param [r] exclude [const AjPStr] list of files to exclude
2413 **
2414 ** @return [ajuint] number of matching files
2415 **
2416 ** @release 3.0.0
2417 ** @@
2418 ******************************************************************************/
2419
embBtreeGetFiles(EmbPBtreeEntry entry,const AjPStr fdirectory,const AjPStr files,const AjPStr exclude)2420 ajuint embBtreeGetFiles(EmbPBtreeEntry entry, const AjPStr fdirectory,
2421 const AjPStr files, const AjPStr exclude)
2422 {
2423 ajuint nfiles;
2424 ajuint nremove;
2425 ajuint i;
2426 ajuint j;
2427 AjPStr file = NULL;
2428 AjPStr *removelist = NULL;
2429 ajuint count = 0;
2430
2431 /* ajDebug("In embBtreeGetFiles\n"); */
2432
2433 nfiles = ajFilelistAddPathWild(entry->files, fdirectory,files);
2434
2435 nremove = ajArrCommaList(exclude,&removelist);
2436
2437 count = 0;
2438
2439 for(i=0;i<nfiles;++i)
2440 {
2441 ajListPop(entry->files,(void **)&file);
2442 ajFilenameTrimPath(&file);
2443
2444 for(j=0;j<nremove && !ajStrMatchWildS(file,removelist[j]);++j);
2445
2446 if(j == nremove)
2447 {
2448 ajListstrPushAppend(entry->files, file);
2449 ++count;
2450 }
2451 }
2452
2453 ajListSort(entry->files, &ajStrVcmp);
2454
2455 entry->nfiles = count;
2456
2457 for(i=0; i<nremove;++i)
2458 ajStrDel(&removelist[i]);
2459
2460 AJFREE(removelist);
2461
2462 return count;
2463 }
2464
2465
2466
2467
2468 /* @func embBtreeWriteEntryFile ***********************************************
2469 **
2470 ** Put files to entry file
2471 **
2472 ** @param [r] entry [const EmbPBtreeEntry] database data
2473 **
2474 ** @return [AjBool] true on success
2475 **
2476 ** @release 3.0.0
2477 ** @@
2478 ******************************************************************************/
2479
embBtreeWriteEntryFile(const EmbPBtreeEntry entry)2480 AjBool embBtreeWriteEntryFile(const EmbPBtreeEntry entry)
2481 {
2482 AjPFile entfile = NULL;
2483 ajuint i;
2484 ajuint iref;
2485 AjPStr tmpstr = NULL;
2486 AjPStr refstr = NULL;
2487
2488 /* ajDebug("In embBtreeWriteEntryFile\n"); */
2489
2490 entfile = btreeCreateFile(entry->idirectory,entry->dbname,BTENTRYFILE);
2491 if(!entfile)
2492 {
2493 ajWarn("Failed to create file '%S' entry->dbname "
2494 "in directory 'entry->idirectory' "
2495 "error:%d '%s'",
2496 errno, strerror(errno));
2497 return ajFalse;
2498 }
2499
2500 ajFmtPrintF(entfile,"# Number of files: %u\n",entry->nfiles);
2501 ajFmtPrintF(entfile,"# Release: %S\n",entry->release);
2502 ajFmtPrintF(entfile,"# Date: %S\n",entry->date);
2503
2504 if(!entry->refcount)
2505 ajFmtPrintF(entfile,"Single");
2506 else
2507 ajFmtPrintF(entfile,"Reference %u", entry->refcount+1);
2508
2509 ajFmtPrintF(entfile," filename database\n");
2510
2511 for(i=0;i<entry->nfiles;++i)
2512 if(!entry->refcount)
2513 {
2514 ajListPop(entry->files,(void **)&tmpstr);
2515 ajFmtPrintF(entfile,"%S\n",tmpstr);
2516 ajListstrPushAppend(entry->files, tmpstr);
2517 }
2518 else
2519 {
2520 ajListPop(entry->files,(void **)&tmpstr);
2521 ajFmtPrintF(entfile,"%S",tmpstr);
2522 for(iref=0; iref < entry->refcount; iref++)
2523 {
2524 ajListPop(entry->reffiles[iref],(void **)&refstr);
2525 ajFmtPrintF(entfile," %S",refstr);
2526 ajListstrPushAppend(entry->reffiles[iref], refstr);
2527 }
2528
2529 ajFmtPrintF(entfile,"\n");
2530 ajListstrPushAppend(entry->files, tmpstr);
2531 }
2532
2533 ajFileClose(&entfile);
2534
2535 return ajTrue;
2536 }
2537
2538
2539
2540
2541 /* @func embBtreeGetRsInfo ****************************************************
2542 **
2543 ** Get resource information for selected database
2544 **
2545 ** @param [u] entry [EmbPBtreeEntry] database data
2546 **
2547 ** @return [void]
2548 **
2549 ** @release 3.0.0
2550 ** @@
2551 ******************************************************************************/
2552
embBtreeGetRsInfo(EmbPBtreeEntry entry)2553 void embBtreeGetRsInfo(EmbPBtreeEntry entry)
2554 {
2555 AjPStr attrstr = NULL;
2556 AjPStr value = NULL;
2557 ajuint n = 0;
2558 AjIList iter;
2559 EmbPBtreeField field;
2560
2561 value = ajStrNew();
2562
2563 ajStrAssignC(&attrstr, "type");
2564 if(!ajNamRsAttrValueS(entry->dbrs, attrstr, &value))
2565 ajFatal("Missing resource entry (%S) for indexing",entry->dbrs);
2566
2567 if(!ajStrMatchCaseC(value,"Index"))
2568 ajFatal("Incorrect 'type' field for resource (%S)",entry->dbrs);
2569
2570 entry->pricachesize = BT_CACHESIZE;
2571
2572 if(ajNamRsAttrValueC(MAJSTRGETPTR(entry->dbrs),"cachesize",&value) ||
2573 ajNamGetValueC("CACHESIZE",&value))
2574 {
2575 if(ajStrToUint(value,&n))
2576 entry->pricachesize = n;
2577 else
2578 ajErr("Bad value for environment variable 'CACHESIZE'");
2579 }
2580 else
2581 {
2582 ajDebug("CACHESIZE defaults to %d\n", entry->pricachesize);
2583 }
2584
2585 entry->seccachesize = entry->pricachesize;
2586
2587 if(ajNamRsAttrValueC(MAJSTRGETPTR(entry->dbrs),"seccachesize",&value) ||
2588 ajNamGetValueC("SECCACHESIZE",&value))
2589 {
2590 if(ajStrToUint(value,&n))
2591 entry->seccachesize = n;
2592 else
2593 ajErr("Bad value for environment variable 'SECCACHESIZE'");
2594 }
2595 else
2596 {
2597 ajDebug("SECCACHESIZE defaults to %d\n", entry->seccachesize);
2598 }
2599
2600 entry->pripagesize = BT_PAGESIZE;
2601
2602 if(ajNamRsAttrValueC(MAJSTRGETPTR(entry->dbrs),"pagesize",&value) ||
2603 ajNamGetValueC("PAGESIZE",&value))
2604 {
2605 if(ajStrToUint(value,&n))
2606 entry->pripagesize = n;
2607 else
2608 ajErr("Bad value for environment variable 'PAGESIZE'");
2609 }
2610 else
2611 {
2612 ajDebug("PAGESIZE defaults to %d\n", entry->pripagesize);
2613 }
2614
2615 entry->secpagesize = entry->pripagesize;
2616
2617 if(ajNamRsAttrValueC(MAJSTRGETPTR(entry->dbrs),"secpagesize",&value) ||
2618 ajNamGetValueC("SECPAGESIZE",&value))
2619 {
2620 if(ajStrToUint(value,&n))
2621 entry->secpagesize = n;
2622 else
2623 ajErr("Bad value for environment variable 'SECPAGESIZE'");
2624 }
2625 else
2626 {
2627 ajDebug("SECPAGESIZE defaults to %d\n", entry->secpagesize);
2628 }
2629
2630 entry->idlen = BT_KWLIMIT;
2631 ajStrAssignC(&attrstr, "idlen");
2632
2633 if(ajNamRsAttrValueS(entry->dbrs,attrstr,&value))
2634 {
2635 if(ajStrToUint(value,&n))
2636 entry->idlen = n;
2637 else
2638 ajErr("Bad value for index resource 'idlen'");
2639 }
2640
2641 ajStrAssignC(&attrstr, "idpagesize");
2642 if(ajNamRsAttrValueS(entry->dbrs,attrstr,&value))
2643 {
2644 if(ajStrToUint(value,&n))
2645 entry->pripagesize = n;
2646 else
2647 ajErr("Bad value for index resource 'idpagesize'");
2648 }
2649
2650 ajStrAssignC(&attrstr, "idsecpagesize");
2651 if(ajNamRsAttrValueS(entry->dbrs,attrstr,&value))
2652 {
2653 if(ajStrToUint(value,&n))
2654 entry->secpagesize = n;
2655 else
2656 ajErr("Bad value for index resource 'idsecpagesize'");
2657 }
2658
2659 ajStrAssignC(&attrstr, "idcachesize");
2660 if(ajNamRsAttrValueS(entry->dbrs, attrstr, &value))
2661 {
2662 if(ajStrToUint(value,&n))
2663 entry->pricachesize = n;
2664 else
2665 ajErr("Bad value for index resource 'idcachesize'");
2666 }
2667
2668 ajStrAssignC(&attrstr, "idseccachesize");
2669 if(ajNamRsAttrValueS(entry->dbrs, attrstr, &value))
2670 {
2671 if(ajStrToUint(value,&n))
2672 entry->seccachesize = n;
2673 else
2674 ajErr("Bad value for index resource 'idseccachesize'");
2675 }
2676
2677 if(!entry->secpagesize)
2678 entry->secpagesize = entry->pripagesize;
2679
2680 if(!entry->seccachesize)
2681 entry->seccachesize = entry->pricachesize;
2682
2683 entry->idorder = (entry->pripagesize - (BT_NODEPREAMBLE + BT_PTRLEN)) /
2684 ((entry->idlen + 1) + BT_IDKEYEXTRA);
2685
2686 entry->idfill = (entry->pripagesize - BT_BUCKPREAMBLE) /
2687 ((entry->idlen + 1) + BT_KEYLENENTRY +
2688 BT_DDOFF + entry->refcount*BT_EXTRA);
2689
2690 entry->idsecorder = (entry->secpagesize - (BT_NODEPREAMBLE + BT_PTRLEN)) /
2691 (BT_OFFKEYLEN + BT_IDKEYEXTRA);
2692
2693 entry->idsecfill = (entry->secpagesize - BT_BUCKPREAMBLE) /
2694 (BT_DOFF + entry->refcount*BT_EXTRA);
2695
2696 /* now process the same values for each index field */
2697
2698 if(ajListGetLength(entry->fields))
2699 {
2700 iter = ajListIterNewread(entry->fields);
2701
2702 while(!ajListIterDone(iter))
2703 {
2704 field = ajListIterGet(iter);
2705
2706 field->idlen = entry->idlen;
2707
2708 ajFmtPrintS(&attrstr, "%Slen", field->name);
2709 if(!ajNamRsAttrValueS(entry->dbrs,attrstr,&value))
2710 field->len = ajBtreeFieldGetLenS(field->name);
2711 else
2712 {
2713 if(ajStrToUint(value,&n))
2714 field->len = n;
2715 else
2716 {
2717 ajErr("Bad value for index resource '%S'", attrstr);
2718 field->len = 15;
2719 }
2720 }
2721
2722 field->pripagesize = entry->pripagesize;
2723 field->secpagesize = entry->secpagesize;
2724
2725 ajFmtPrintS(&attrstr, "%Spagesize", field->name);
2726
2727 if(ajNamRsAttrValueS(entry->dbrs,attrstr,&value))
2728 {
2729 if(ajStrToUint(value,&n))
2730 {
2731 field->pripagesize = n;
2732 field->secpagesize = n;
2733 }
2734 else
2735 {
2736 ajErr("Bad value for index resource '%S'", attrstr);
2737 field->pripagesize = entry->pripagesize;
2738 }
2739 }
2740
2741
2742 ajFmtPrintS(&attrstr, "%Ssecpagesize", field->name);
2743
2744 if(ajNamRsAttrValueS(entry->dbrs,attrstr,&value))
2745 {
2746 if(ajStrToUint(value,&n))
2747 {
2748 field->secpagesize = n;
2749 }
2750 else
2751 {
2752 ajErr("Bad value for index resource '%S'", attrstr);
2753 }
2754 }
2755
2756 field->pricachesize = entry->pricachesize;
2757 field->seccachesize = entry->seccachesize;
2758
2759 ajFmtPrintS(&attrstr, "%Scachesize", field->name);
2760
2761 if(ajNamRsAttrValueS(entry->dbrs,attrstr,&value))
2762 {
2763 if(ajStrToUint(value,&n))
2764 {
2765 field->pricachesize = n;
2766 field->seccachesize = n;
2767 }
2768 else
2769 {
2770 ajErr("Bad value for index resource '%Scachesize'",
2771 field->name);
2772 field->pricachesize = entry->pricachesize;
2773 }
2774 }
2775
2776 ajFmtPrintS(&attrstr, "%Sseccachesize", field->name);
2777
2778 if(ajNamRsAttrValueS(entry->dbrs,attrstr,&value))
2779 {
2780 if(ajStrToUint(value,&n))
2781 {
2782 field->seccachesize = n;
2783 }
2784 else
2785 {
2786 ajErr("Bad value for index resource '%S'", attrstr);
2787 }
2788 }
2789
2790 field->order =
2791 (field->pripagesize - (BT_NODEPREAMBLE + BT_PTRLEN)) /
2792 ((field->len + 1) + BT_IDKEYEXTRA);
2793
2794 field->fill =
2795 (field->pripagesize - BT_BUCKPREAMBLE) /
2796 ((field->len + 1) + BT_KEYLENENTRY +
2797 BT_DDOFF + field->refcount*BT_EXTRA);
2798
2799 if(!field->secondary)
2800 {
2801 field->secorder =
2802 (field->secpagesize - (BT_NODEPREAMBLE + BT_PTRLEN)) /
2803 (BT_OFFKEYLEN + BT_IDKEYEXTRA);
2804 field->secfill =
2805 (field->secpagesize - BT_BUCKPREAMBLE) /
2806 (BT_DOFF + field->refcount*BT_EXTRA);
2807 }
2808 else
2809 {
2810 /*
2811 * The secondary tree keys are the IDs of the entries containing
2812 * the keywords so we use the entry idlen for their size limit
2813 */
2814 field->secorder =
2815 (field->secpagesize - (BT_NODEPREAMBLE + BT_PTRLEN)) /
2816 ((entry->idlen + 1) + BT_IDKEYEXTRA);
2817 field->secfill =
2818 (field->secpagesize - BT_BUCKPREAMBLE) /
2819 ((entry->idlen + 1) + BT_KEYLENENTRY);
2820 }
2821 }
2822
2823 ajListIterDel(&iter);
2824 }
2825
2826 ajStrDel(&attrstr);
2827 ajStrDel(&value);
2828
2829 return;
2830 }
2831
2832
2833
2834
2835 /* @func embBtreeOpenCaches ***************************************************
2836 **
2837 ** Open index files for writing
2838 **
2839 ** @param [u] entry [EmbPBtreeEntry] database data
2840 **
2841 ** @return [AjBool] true on success
2842 **
2843 ** @release 3.0.0
2844 ** @@
2845 ******************************************************************************/
2846
embBtreeOpenCaches(EmbPBtreeEntry entry)2847 AjBool embBtreeOpenCaches(EmbPBtreeEntry entry)
2848 {
2849 ajuint level = 0;
2850 ajlong count = 0L;
2851 ajlong countall = 0L;
2852 AjIList iter;
2853 EmbPBtreeField field;
2854
2855 if(entry->do_id)
2856 {
2857 entry->idcache = ajBtreeIdcacheNewS(entry->dbname,
2858 entry->idextension,
2859 entry->idirectory,
2860 "wb+",
2861 entry->compressed,
2862 entry->idlen,
2863 entry->refcount,
2864 entry->pripagesize,
2865 entry->secpagesize,
2866 entry->pricachesize,
2867 entry->seccachesize,
2868 0,
2869 0,
2870 entry->idorder,
2871 entry->idfill,
2872 level,
2873 entry->idsecorder,
2874 entry->idsecfill,
2875 count,
2876 countall);
2877 if(!entry->idcache)
2878 ajFatal("Cannot open ID index");
2879 }
2880
2881 if(ajListGetLength(entry->fields))
2882 {
2883 iter = ajListIterNewread(entry->fields);
2884
2885 while(!ajListIterDone(iter))
2886 {
2887 field = ajListIterGet(iter);
2888
2889 if(field->secondary)
2890 field->cache = ajBtreeSeccacheNewS(entry->dbname,
2891 field->extension,
2892 entry->idirectory,
2893 "wb+",
2894 field->compressed,
2895 field->len,
2896 field->idlen,
2897 field->pripagesize,
2898 field->secpagesize,
2899 field->pricachesize,
2900 field->seccachesize,
2901 field->pripagecount,
2902 field->secpagecount,
2903 field->order,
2904 field->fill,
2905 level,
2906 field->secorder,
2907 field->secfill,
2908 count,
2909 countall);
2910 else
2911 field->cache = ajBtreeIdcacheNewS(entry->dbname,
2912 field->extension,
2913 entry->idirectory,
2914 "wb+",
2915 field->compressed,
2916 field->len,
2917 field->refcount,
2918 field->pripagesize,
2919 field->secpagesize,
2920 field->pricachesize,
2921 field->seccachesize,
2922 field->pripagecount,
2923 field->secpagecount,
2924 field->order,
2925 field->fill,
2926 level,
2927 field->secorder,
2928 field->secfill,
2929 count,
2930 countall);
2931 if(!field->cache)
2932 ajFatal("Cannot open %S index", field->extension);
2933 }
2934
2935 ajListIterDel(&iter);
2936 }
2937
2938 return ajTrue;
2939 }
2940
2941
2942
2943
2944 /* @func embBtreeCloseCaches **************************************************
2945 **
2946 ** Close index files
2947 **
2948 ** @param [u] entry [EmbPBtreeEntry] database data
2949 **
2950 ** @return [AjBool] true on success
2951 **
2952 ** @release 3.0.0
2953 ** @@
2954 ******************************************************************************/
2955
embBtreeCloseCaches(EmbPBtreeEntry entry)2956 AjBool embBtreeCloseCaches(EmbPBtreeEntry entry)
2957 {
2958 AjIList iter;
2959 EmbPBtreeField field;
2960
2961 if(entry->do_id)
2962 {
2963 ajBtreeCacheDel(&entry->idcache);
2964 }
2965
2966 if(ajListGetLength(entry->fields))
2967 {
2968 iter = ajListIterNewread(entry->fields);
2969
2970 while(!ajListIterDone(iter))
2971 {
2972 field = ajListIterGet(iter);
2973
2974 ajBtreeCacheDel(&field->cache);
2975 }
2976 ajListIterDel(&iter);
2977 }
2978
2979 return ajTrue;
2980 }
2981
2982
2983
2984
2985 #if 0
2986 /* @func embBtreeProbeCaches **************************************************
2987 **
2988 ** Close index files
2989 **
2990 ** @param [u] entry [EmbPBtreeEntry] database data
2991 **
2992 ** @return [AjBool] true on success
2993 **
2994 ** @release 6.0.0
2995 ** @@
2996 ******************************************************************************/
2997
2998 AjBool embBtreeProbeCaches(EmbPBtreeEntry entry)
2999 {
3000 AjIList iter;
3001 EmbPBtreeField field;
3002
3003 if(entry->do_id)
3004 {
3005 ajBtreeProbePriArray(entry->idcache);
3006 ajBtreeProbeSecArray(entry->idcache);
3007 }
3008
3009 if(ajListGetLength(entry->fields))
3010 {
3011 iter = ajListIterNewread(entry->fields);
3012
3013 while(!ajListIterDone(iter))
3014 {
3015 field = ajListIterGet(iter);
3016 if(ajStrMatchC(field->extension, "ac") ||
3017 ajStrMatchC(field->extension, "sv"))
3018 {
3019 ajBtreeProbePriArray(field->cache);
3020 ajBtreeProbeSecArray(field->cache);
3021 }
3022 }
3023 ajListIterDel(&iter);
3024 }
3025
3026 return ajTrue;
3027 }
3028
3029 #endif
3030
3031
3032
3033
3034 /* @func embBtreeDumpParameters ***********************************************
3035 **
3036 ** Write index parameter files
3037 **
3038 ** @param [u] entry [EmbPBtreeEntry] database data
3039 **
3040 ** @return [AjBool] true on success
3041 **
3042 ** @release 3.0.0
3043 ** @@
3044 ******************************************************************************/
3045
embBtreeDumpParameters(EmbPBtreeEntry entry)3046 AjBool embBtreeDumpParameters(EmbPBtreeEntry entry)
3047 {
3048 AjIList iter;
3049 EmbPBtreeField field;
3050
3051 if(entry->do_id)
3052 ajBtreeWriteParamsS(entry->idcache, entry->dbname,
3053 entry->idextension, entry->idirectory);
3054
3055 if(ajListGetLength(entry->fields))
3056 {
3057 iter = ajListIterNewread(entry->fields);
3058
3059 while(!ajListIterDone(iter))
3060 {
3061 field = ajListIterGet(iter);
3062 ajBtreeWriteParamsS(field->cache, entry->dbname,
3063 field->extension, entry->idirectory);
3064 }
3065 ajListIterDel(&iter);
3066 }
3067
3068 return ajTrue;
3069 }
3070
3071
3072
3073
3074 /* @func embBtreeFieldNewC ****************************************************
3075 **
3076 ** Constructor for a Btree index field
3077 **
3078 ** @param [r] nametxt [const char*] Name
3079 ** @return [EmbPBtreeField] Btree field
3080 **
3081 ** @release 6.4.0
3082 ******************************************************************************/
3083
embBtreeFieldNewC(const char * nametxt)3084 EmbPBtreeField embBtreeFieldNewC(const char* nametxt)
3085 {
3086 EmbPBtreeField ret = NULL;
3087
3088 AJNEW0(ret);
3089
3090 ajStrAssignC(&ret->name, nametxt);
3091 ajStrAssignS(&ret->extension, ajBtreeFieldGetExtensionC(nametxt));
3092 ret->secondary = ajBtreeFieldGetSecondaryC(nametxt);
3093
3094 if(!ajStrGetLen(ret->extension))
3095 {
3096 ajStrAssignK(&ret->extension, 'x');
3097 ajStrAppendC(&ret->extension, nametxt);
3098 }
3099
3100 ret->data = ajListNew();
3101
3102 return ret;
3103 }
3104
3105
3106
3107
3108 /* @func embBtreeFieldNewS ****************************************************
3109 **
3110 ** Constructor for a Btree index field
3111 **
3112 ** @param [r] name [const AjPStr] Name
3113 ** @param [r] refcount [ajuint] Number of reference files
3114 ** @return [EmbPBtreeField] Btree field
3115 **
3116 ** @release 6.4.0
3117 ******************************************************************************/
3118
embBtreeFieldNewS(const AjPStr name,ajuint refcount)3119 EmbPBtreeField embBtreeFieldNewS(const AjPStr name, ajuint refcount)
3120 {
3121 EmbPBtreeField ret = NULL;
3122
3123 AJNEW0(ret);
3124
3125 ret->name = ajStrNewS(name);
3126 ret->extension = ajStrNewS(ajBtreeFieldGetExtensionS(name));
3127 ret->secondary = ajBtreeFieldGetSecondaryS(name);
3128
3129 if(!ajStrGetLen(ret->extension))
3130 {
3131 ajStrAssignK(&ret->extension, 'x');
3132 ajStrAppendS(&ret->extension, name);
3133 }
3134
3135 ret->maxkey = ajStrNewC("");
3136
3137 ret->data = ajListNew();
3138
3139 ret->refcount = refcount;
3140
3141 return ret;
3142 }
3143
3144
3145
3146
3147 /* @func embBtreeFieldDel *****************************************************
3148 **
3149 ** Destructor for a Btree index field
3150 **
3151 ** @param [d] Pthis [EmbPBtreeField*] Btree index field object
3152 ** @return [void]
3153 **
3154 ** @release 6.4.0
3155 ******************************************************************************/
3156
embBtreeFieldDel(EmbPBtreeField * Pthis)3157 void embBtreeFieldDel(EmbPBtreeField *Pthis)
3158 {
3159 EmbPBtreeField thys;
3160
3161 if(!Pthis) return;
3162
3163 thys = *Pthis;
3164
3165 ajStrDel(&thys->name);
3166 ajStrDel(&thys->extension);
3167 ajStrDel(&thys->maxkey);
3168 ajListstrFree(&thys->data);
3169
3170 while(thys->freecount)
3171 ajStrDel(&thys->freelist[--thys->freecount]);
3172
3173 if(thys->freelist)
3174 AJFREE(thys->freelist);
3175
3176 AJFREE(*Pthis);
3177 *Pthis = NULL;
3178
3179 return;
3180 }
3181
3182
3183
3184
3185
3186 /* @func embBtreeFieldSetCompressed *******************************************
3187 **
3188 ** Set database field to be compressed on writing
3189 **
3190 ** @param [u] field [EmbPBtreeField] Database field information
3191 **
3192 ** @return [void]
3193 **
3194 ** @release 6.4.0
3195 ** @@
3196 ******************************************************************************/
3197
embBtreeFieldSetCompressed(EmbPBtreeField field)3198 void embBtreeFieldSetCompressed(EmbPBtreeField field)
3199 {
3200 field->compressed = ajTrue;
3201
3202 return;
3203 }
3204
3205
3206
3207
3208 /* @func embBtreeFieldSetIdtype ***********************************************
3209 **
3210 ** Set database field to be identifier type (not secondary) on writing
3211 **
3212 ** @param [u] field [EmbPBtreeField] Database field information
3213 **
3214 ** @return [void]
3215 **
3216 ** @release 6.4.0
3217 ** @@
3218 ******************************************************************************/
3219
embBtreeFieldSetIdtype(EmbPBtreeField field)3220 void embBtreeFieldSetIdtype(EmbPBtreeField field)
3221 {
3222 field->secondary = ajFalse;
3223
3224 return;
3225 }
3226
3227
3228
3229
3230 /* @func embBtreeFieldSetSecondary ********************************************
3231 **
3232 ** Set database field to be secondary on writing
3233 **
3234 ** @param [u] field [EmbPBtreeField] Database field information
3235 **
3236 ** @return [void]
3237 **
3238 ** @release 6.4.0
3239 ** @@
3240 ******************************************************************************/
3241
embBtreeFieldSetSecondary(EmbPBtreeField field)3242 void embBtreeFieldSetSecondary(EmbPBtreeField field)
3243 {
3244 field->secondary = ajTrue;
3245
3246 return;
3247 }
3248
3249
3250
3251
3252 /* @func embIndexExit *********************************************************
3253 **
3254 ** Cleanup indexing internals on exit
3255 **
3256 ** @return [void]
3257 **
3258 ** @release 6.0.0
3259 ******************************************************************************/
3260
embIndexExit(void)3261 void embIndexExit(void)
3262 {
3263 ajStrDel(&embindexLine);
3264 ajStrDel(&embindexToken);
3265 ajStrDel(&embindexTstr);
3266 ajStrDel(&embindexPrefix);
3267 ajStrDel(&embindexFormat);
3268 ajStrTokenDel(&embindexHandle);
3269
3270 ajStrDel(&indexWord);
3271 ajBtreeIdDel(&indexId);
3272
3273 return;
3274 }
3275