1 /* @source ajfeatdb ***********************************************************
2 **
3 ** AJAX feature database access functions
4 **
5 ** These functions control all aspects of AJAX feature database access
6 **
7 ** @author Copyright (C) 2010 Peter Rice
8 ** @version $Revision: 1.46 $
9 ** @modified Sep 2010 pmr first version
10 ** @modified $Date: 2012/12/07 10:20:52 $ by $Author: rice $
11 ** @@
12 **
13 ** This library is free software; you can redistribute it and/or
14 ** modify it under the terms of the GNU Lesser General Public
15 ** License as published by the Free Software Foundation; either
16 ** version 2.1 of the License, or (at your option) any later version.
17 **
18 ** This library is distributed in the hope that it will be useful,
19 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
20 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21 ** Lesser General Public License for more details.
22 **
23 ** You should have received a copy of the GNU Lesser General Public
24 ** License along with this library; if not, write to the Free Software
25 ** Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
26 ** MA  02110-1301,  USA.
27 **
28 ******************************************************************************/
29 
30 
31 #include "ajlib.h"
32 
33 #include "ajfeatdb.h"
34 #include "ajfeat.h"
35 #include "ajfeatread.h"
36 #include "ajtextdata.h"
37 
38 #include "ajtagval.h"
39 #include "ajsql.h"
40 #include "ajindex.h"
41 #include "ajhttp.h"
42 #include "ajutil.h"
43 #include "ajnam.h"
44 #include "ajcall.h"
45 #include "ajfileio.h"
46 
47 
48 #include <limits.h>
49 #include <stdarg.h>
50 #include <sys/types.h>
51 #include <errno.h>
52 #include <signal.h>
53 
54 
55 #ifndef WIN32
56 #include <sys/socket.h>
57 #include <netinet/in.h>
58 #include <arpa/inet.h>
59 
60 #include <netdb.h>
61 
62 #include <dirent.h>
63 #include <unistd.h>
64 #else
65 #include <winsock2.h>
66 #include <ws2tcpip.h>
67 #endif
68 
69 
70 
71 static AjPRegexp featCdDivExp = NULL;
72 
73 static AjPRegexp featRegGcgId = NULL;
74 static AjPRegexp featRegGcgCont = NULL;
75 static AjPRegexp featRegGcgId2 = NULL;
76 static AjPRegexp featRegGcgSplit = NULL;
77 
78 static AjPRegexp featRegGcgRefId = NULL;
79 
80 static char* featCdName = NULL;
81 static ajuint featCdMaxNameSize = 0;
82 
83 
84 /* @datastatic FeatPCdDiv *****************************************************
85 **
86 ** EMBLCD division file record structure
87 **
88 ** @alias FeatSCdDiv
89 ** @alias FeatOCdDiv
90 **
91 ** @attr FileName [AjPStr] Filename(s)
92 ** @attr DivCode [ajuint] Division code
93 ** @attr Padding [char[4]] Padding to alignment boundary
94 ** @@
95 ******************************************************************************/
96 
97 typedef struct FeatSCdDiv
98 {
99     AjPStr FileName;
100     ajuint DivCode;
101     char Padding[4];
102 } FeatOCdDiv;
103 
104 #define FeatPCdDiv FeatOCdDiv*
105 
106 
107 
108 
109 /* @datastatic FeatPCdEntry ***************************************************
110 **
111 ** EMBLCD entrynam.idx file record structure
112 **
113 ** @alias FeatSCdEntry
114 ** @alias FeatOCdEntry
115 **
116 ** @attr div [ajuint] division file record
117 ** @attr annoff [ajuint] data file offset
118 ** @attr seqoff [ajuint] sequence file offset (if any)
119 ** @@
120 ******************************************************************************/
121 
122 typedef struct FeatSCdEntry
123 {
124     ajuint div;
125     ajuint annoff;
126     ajuint seqoff;
127 } FeatOCdEntry;
128 
129 #define FeatPCdEntry FeatOCdEntry*
130 
131 
132 
133 
134 /* @datastatic FeatPCdFHeader **************************************************
135 **
136 ** EMBLCD index file header structure, same for all index files.
137 **
138 ** @alias FeatSCdFHeader
139 ** @alias FeatOCdFHeader
140 **
141 ** @attr FileSize [ajuint] Index file size
142 ** @attr NRecords [ajuint] Index record count
143 ** @attr IdSize [ajuint] Index string length
144 ** @attr RelDay [ajuint] Release date - day
145 ** @attr RelMonth [ajuint] Release date - month
146 ** @attr RelYear [ajuint] Release date - year
147 ** @attr RecSize [short] Record size
148 ** @attr SPadding [short] Padding to alignment boundary
149 ** @attr DbName [char[24]] Database name
150 ** @attr Release [char[12]] Release name/number
151 ** @attr Date [char[4]] Date as three integers.
152 ** @@
153 ******************************************************************************/
154 
155 typedef struct FeatSCdFHeader
156 {
157     ajuint FileSize;
158     ajuint NRecords;
159     ajuint IdSize;
160     ajuint RelDay;
161     ajuint RelMonth;
162     ajuint RelYear;
163     short RecSize;
164     short SPadding;
165     char DbName[24];
166     char Release[12];
167     char Date[4];
168 } FeatOCdFHeader;
169 
170 #define FeatPCdFHeader FeatOCdFHeader*
171 
172 
173 
174 
175 /* @datastatic FeatPCdFile ****************************************************
176 **
177 ** EMBLCD file data structure
178 **
179 ** @alias FeatSCdFile
180 ** @alias FeatOCdFile
181 **
182 ** @attr Header [FeatPCdFHeader] Header data
183 ** @attr File [AjPFile] File
184 ** @attr NRecords [ajuint] Number of records
185 ** @attr RecSize [ajuint] Record length (for calculating record offsets)
186 ** @@
187 ******************************************************************************/
188 
189 typedef struct FeatSCdFile
190 {
191     FeatPCdFHeader Header;
192     AjPFile File;
193     ajuint NRecords;
194     ajuint RecSize;
195 } FeatOCdFile;
196 
197 #define FeatPCdFile FeatOCdFile*
198 
199 
200 
201 
202 /* @datastatic FeatPCdHit *****************************************************
203 **
204 ** EMBLCD hit file record structure
205 **
206 ** @alias FeatSCdHit
207 ** @alias FeatOCdHit
208 **
209 ** @attr HitList [ajuint*] Array of hits, as record numbers in the
210 **                         entrynam.idx file
211 ** @attr NHits [ajuint] Number of hits in HitList array
212 ** @attr Padding [char[4]] Padding to alignment boundary
213 ** @@
214 ******************************************************************************/
215 
216 typedef struct FeatSCdHit
217 {
218     ajuint* HitList;
219     ajuint NHits;
220     char Padding[4];
221 } FeatOCdHit;
222 
223 #define FeatPCdHit FeatOCdHit*
224 
225 
226 
227 
228 /* @datastatic FeatPCdIdx *****************************************************
229 **
230 ** EMBLCD entryname index file record structure
231 **
232 ** @alias FeatSCdIdx
233 ** @alias FeatOCdIdx
234 **
235 ** @attr AnnOffset [ajuint] Data file offset (see DivCode)
236 ** @attr SeqOffset [ajuint] Sequence file offset (if any) (see DivCode)
237 ** @attr EntryName [AjPStr] Entry ID - the file is sorted by these
238 ** @attr DivCode [ajushort] Division file record
239 ** @attr Padding [char[6]] Padding to alignment boundary
240 ** @@
241 ******************************************************************************/
242 
243 typedef struct FeatSCdIdx
244 {
245     ajuint AnnOffset;
246     ajuint SeqOffset;
247     AjPStr EntryName;
248     ajushort DivCode;
249     char Padding[6];
250 } FeatOCdIdx;
251 
252 #define FeatPCdIdx FeatOCdIdx*
253 
254 
255 
256 
257 /* @datastatic FeatPCdTrg *****************************************************
258 **
259 ** EMBLCD target (.trg) file record structure
260 **
261 ** @alias FeatSCdTrg
262 ** @alias FeatOCdTrg
263 **
264 ** @attr FirstHit [ajuint] First hit record in .hit file
265 ** @attr NHits [ajuint] Number of hit records in .hit file
266 ** @attr Target [AjPStr] Indexed target string (the file is sorted by these)
267 ** @@
268 ******************************************************************************/
269 
270 typedef struct FeatSCdTrg
271 {
272     ajuint FirstHit;
273     ajuint NHits;
274     AjPStr Target;
275 } FeatOCdTrg;
276 
277 #define FeatPCdTrg FeatOCdTrg*
278 
279 
280 
281 
282 /* @datastatic FeatPCdQry *****************************************************
283 **
284 ** EMBLCD query structure
285 **
286 ** @alias FeatSCdQry
287 ** @alias FeatOCdQry
288 **
289 ** @attr divfile [AjPStr] division.lkp
290 ** @attr idxfile [AjPStr] entryname.idx
291 ** @attr datfile [AjPStr] main data reference
292 ** @attr seqfile [AjPStr] sequence
293 ** @attr tblfile [AjPStr] BLAST table
294 ** @attr srcfile [AjPStr] BLAST FASTA source data
295 ** @attr dfp [FeatPCdFile] division.lkp
296 ** @attr ifp [FeatPCdFile] entryname.idx
297 ** @attr trgfp [FeatPCdFile] acnum.trg
298 ** @attr hitfp [FeatPCdFile] acnum.hit
299 ** @attr trgLine [FeatPCdTrg]acnum input line
300 ** @attr name [char*] filename from division.lkp
301 ** @attr nameSize [ajuint] division.lkp filename length
302 ** @attr div [ajuint] current division number
303 ** @attr maxdiv [ajuint] max division number
304 ** @attr type [ajuint] BLAST type
305 ** @attr libr [AjPFile] main data reference or BLAST header
306 ** @attr libs [AjPFile] sequence or BLAST compressed sequence
307 ** @attr libt [AjPFile] blast table
308 ** @attr libf [AjPFile] blast FASTA source data
309 ** @attr idnum [ajuint] current BLAST entry offset
310 ** @attr TopHdr [ajuint] BLAST table headers offset
311 ** @attr TopCmp [ajuint] BLAST table sequence offset
312 ** @attr TopAmb [ajuint] BLAST table ambiguities offset
313 ** @attr TopSrc [ajuint] BLAST table FASTA source offset
314 ** @attr Size [ajuint] BLAST database size
315 ** @attr Skip [AjBool*] skip file(s) in division.lkp
316 ** @attr idxLine [FeatPCdIdx] entryname.idx input line
317 ** @attr Samefile [AjBool] true if the same file is passed to
318 **                         ajFilebuffReopenFile
319 ** @attr Padding [char[4]] Padding to alignment boundary
320 ** @@
321 ******************************************************************************/
322 
323 typedef struct FeatSCdQry
324 {
325     AjPStr divfile;
326     AjPStr idxfile;
327     AjPStr datfile;
328     AjPStr seqfile;
329     AjPStr tblfile;
330     AjPStr srcfile;
331 
332     FeatPCdFile dfp;
333     FeatPCdFile ifp;
334     FeatPCdFile trgfp;
335     FeatPCdFile hitfp;
336     FeatPCdTrg trgLine;
337 
338     char* name;
339     ajuint nameSize;
340     ajuint div;
341     ajuint maxdiv;
342 
343     ajuint type;
344 
345     AjPFile libr;
346     AjPFile libs;
347     AjPFile libt;
348     AjPFile libf;
349 
350     ajuint idnum;
351     ajuint TopHdr;
352     ajuint TopCmp;
353     ajuint TopAmb;
354     ajuint TopSrc;
355     ajuint Size;
356 
357     AjBool* Skip;
358     FeatPCdIdx idxLine;
359     AjBool Samefile;
360     char Padding[4];
361 } FeatOCdQry;
362 
363 #define FeatPCdQry FeatOCdQry*
364 
365 
366 
367 
368 /* @datastatic FeatPEmbossQry *************************************************
369 **
370 ** Btree 'emboss' query structure
371 **
372 ** @alias FeatSEmbossQry
373 ** @alias FeatOEmbossQry
374 **
375 ** @attr idcache [AjPBtcache] ID cache
376 ** @attr Caches [AjPList] Caches for each query field
377 ** @attr files [AjPStr*] database filenames
378 ** @attr reffiles [AjPStr**] database reference filenames
379 ** @attr Skip [AjBool*] files numbers to exclude
380 ** @attr List [AjPList] List of files
381 ** @attr libs [AjPFile] Primary (database source) file
382 ** @attr libr [AjPFile] Secondary (database bibliographic source) file
383 ** @attr div [ajuint] division number of currently open database file
384 ** @attr refcount [ajuint] number of reference file(s) per entry
385 ** @attr nentries [ajint] number of entries in the filename array(s)
386 **                        -1 when done
387 ** @attr Samefile [AjBool] true if the same file is passed to
388 **                         ajFilebuffReopenFile
389 ** @@
390 ******************************************************************************/
391 
392 typedef struct FeatSEmbossQry
393 {
394     AjPBtcache idcache;
395     AjPList Caches;
396 
397     AjPStr *files;
398     AjPStr **reffiles;
399     AjBool *Skip;
400 
401     AjPList List;
402 
403     AjPFile libs;
404     AjPFile libr;
405 
406     ajuint div;
407     ajuint refcount;
408     ajint nentries;
409 
410     AjBool Samefile;
411 } FeatOEmbossQry;
412 
413 #define FeatPEmbossQry FeatOEmbossQry*
414 
415 
416 
417 static AjBool featAccessDas(AjPFeattabin ftabin);
418 static AjBool featAccessChado(AjPFeattabin fttabin);
419 static AjBool featAccessEmbossGcg(AjPFeattabin fttabin);
420 static AjBool featAccessGcg(AjPFeattabin fttabin);
421 
422 static AjPSqlconnection featChadoConnect(const AjPQuery qry);
423 static void featChadoChildfeatureQuery(AjPSqlconnection connection,
424 			               AjPFeattable  feattab,
425                                        const AjPStr srcfeature);
426 
427 static AjBool featChadoQryfeatureQuery(AjPSqlconnection connection, AjPStr sql,
428 			               AjPFeattable feattab,
429 			               ajint qrystart, ajint qryend);
430 
431 static AjPFeature featChadoChildfeatureRow(AjPFeattable fttab, AjPSqlrow line);
432 static AjPStr featChadoQryfeatureRow(AjPFeattable fttab, AjPSqlrow row,
433 				     ajint qrystart, ajint qryend);
434 
435 static ajuint      featCdDivNext(AjPQuery qry);
436 static void        featCdIdxDel(FeatPCdIdx* pthys);
437 static void        featCdTrgDel(FeatPCdTrg* pthys);
438 
439 static int         featCdEntryCmp(const void* a, const void* b);
440 static void        featCdEntryDel(void** pentry, void* cl);
441 static void        featCdFileClose(FeatPCdFile *thys);
442 static FeatPCdFile featCdFileOpen(const AjPStr dir, const char* name,
443                                   AjPStr* fullname);
444 static ajint       featCdFileSeek(FeatPCdFile fil, ajuint ipos);
445 static void        featCdIdxLine(FeatPCdIdx idxLine,  ajuint ipos,
446                                  FeatPCdFile fp);
447 static char*       featCdIdxName(ajuint ipos, FeatPCdFile fp);
448 static AjBool      featCdIdxQuery(AjPQuery qry, const AjPStr idqry);
449 static ajuint      featCdIdxSearch(FeatPCdIdx idxLine, const AjPStr entry,
450                                    FeatPCdFile fp);
451 static AjBool      featCdQryClose(AjPQuery qry);
452 static AjBool      featCdQryEntry(AjPQuery qry);
453 static AjBool      featCdQryFile(AjPQuery qry);
454 static AjBool      featCdQryOpen(AjPQuery qry);
455 static AjBool      featCdQryNext(AjPQuery qry);
456 static AjBool      featCdQryQuery(AjPQuery qry);
457 static AjBool      featCdQryReuse(AjPQuery qry);
458 static AjBool      featCdReadHeader(FeatPCdFile fp);
459 static AjBool      featCdTrgClose(FeatPCdFile *trgfil, FeatPCdFile *hitfil);
460 static ajuint      featCdTrgFind(AjPQuery qry, const char* indexname,
461                                  const AjPStr qrystring);
462 static void        featCdTrgLine(FeatPCdTrg trgLine, ajuint ipos,
463                                  FeatPCdFile fp);
464 static char*       featCdTrgName(ajuint ipos, FeatPCdFile fp);
465 static AjBool      featCdTrgOpen(const AjPStr dir, const char* name,
466                                  FeatPCdFile *trgfil, FeatPCdFile *hitfil);
467 static AjBool      featCdTrgQuery(AjPQuery qry, const AjPStr field,
468                                 const AjPStr wildqry);
469 static ajuint      featCdTrgSearch(FeatPCdTrg trgLine, const AjPStr name,
470                                  FeatPCdFile fp);
471 static AjBool      featEmbossGcgAll(AjPFeattabin fttabin);
472 static void        featEmbossGcgLoadBuff(AjPFeattabin fttabin);
473 static AjBool      featEmbossGcgReadRef(AjPFeattabin fttabin);
474 static AjBool      featEmbossGcgReadSeq(AjPFeattabin fttabin);
475 
476 static AjBool      featEmbossOpenCache(AjPQuery qry, const char *ext,
477                                        AjPBtcache *cache);
478 static AjBool      featEmbossQryClose(AjPQuery qry);
479 static AjBool      featEmbossQryEntry(AjPQuery qry);
480 static AjBool      featEmbossQryNext(AjPQuery qry);
481 static AjBool      featEmbossQryOpen(AjPQuery qry);
482 static AjBool      featEmbossQryOrganisms(AjPQuery qry);
483 static AjBool      featEmbossQryQuery(AjPQuery qry);
484 static AjBool      featEmbossQryReuse(AjPQuery qry);
485 
486 static AjBool      featGcgAll(AjPFeattabin fttabin);
487 static void        featGcgBinDecode(AjPStr *pthis, ajuint rdlen);
488 static void        featGcgLoadBuff(AjPFeattabin fttabin);
489 static AjBool      featGcgReadRef(AjPFeattabin fttabin);
490 static AjBool      featGcgReadSeq(AjPFeattabin fttabin);
491 
492 
493 
494 
495 /* @funclist feattabAccess ****************************************************
496 **
497 ** Functions to access each database or feature access method
498 **
499 ******************************************************************************/
500 
501 static AjOFeattabAccess feattabAccess[] =
502 {
503   /*  Name     AccessFunction  FreeFunction
504       Qlink    Description
505       Alias    Entry    Query    All      Chunk     Padding */
506 
507     {
508       "das",   &featAccessDas, NULL,
509       "&",     "retrieve features from a DAS server",
510       AJFALSE, AJTRUE,  AJTRUE,  AJFALSE, AJFALSE, AJFALSE
511     },
512     {
513       "chado", &featAccessChado, NULL,
514       "",      "retrieve features from a CHADO server",
515       AJFALSE, AJTRUE,  AJFALSE, AJFALSE, AJFALSE, AJFALSE
516     },
517     {
518       "gcg",   &featAccessGcg, NULL,
519       "|&!^=", "emboss dbigcg indexed",
520       AJFALSE, AJTRUE,  AJTRUE,  AJTRUE,  AJFALSE, AJFALSE
521     },
522     {
523       "embossgcg", &featAccessEmbossGcg, NULL,
524       "|&!^=", "emboss dbxgcg indexed",
525       AJFALSE, AJTRUE,  AJTRUE,  AJTRUE,  AJFALSE, AJFALSE
526     },
527     {
528       NULL, NULL, NULL,
529       NULL, NULL,
530       AJFALSE, AJFALSE, AJFALSE, AJFALSE, AJFALSE, AJFALSE
531     }
532 };
533 
534 
535 
536 
537 /* @func ajFeatdbInit *********************************************************
538 **
539 ** Initialise feature database internals
540 **
541 ** @return [void]
542 **
543 ** @release 6.4.0
544 ******************************************************************************/
545 
ajFeatdbInit(void)546 void ajFeatdbInit(void)
547 {
548     AjPTable table;
549     ajuint i = 0;
550 
551     table = ajFeattabaccessGetDb();
552 
553     while(feattabAccess[i].Name)
554     {
555         ajCallTableRegister(table, feattabAccess[i].Name,
556                             (void*) &feattabAccess[i]);
557 	i++;
558     }
559 
560     return;
561 }
562 
563 
564 
565 
566 
567 /* @section EMBL CD Database Indexing *****************************************
568 **
569 ** These functions manage the EMBL CD-ROM index access methods.
570 ** These include the "efetch" indexing used at the Sanger Centre
571 ** based on Erik Sonnhammer's indexseqlibs code
572 ** and a direct copy of the database and index files from the
573 ** EMBL CD-RM distribution.
574 **
575 ** The index files start with a file "division.lkp" which contains
576 ** the list of database filenames and an index number for each.
577 **
578 ** "entrynam.idx" is a sorted index by entry name for each entry
579 ** which points to a file number and a byte offset within the file.
580 **
581 ** "acnum.trg" and "acnum.hit" index accession numbers and link them
582 ** to record numbers in "entrynam.idx"
583 **
584 ** Other index files are not used yet by EMBOSS but could be added
585 ** using the "des" field in queries to search descriptions, and so on.
586 **
587 ******************************************************************************/
588 
589 
590 
591 
592 /* @funcstatic featCdFileOpen *************************************************
593 **
594 ** Opens a named EMBL CD-ROM index file.
595 **
596 ** @param [r] dir [const AjPStr] Directory
597 ** @param [r] name [const char*] File name.
598 ** @param [w] fullname [AjPStr*] Full file name with directory path
599 ** @return [FeatPCdFile] EMBL CD-ROM index file object.
600 **
601 ** @release 6.5.0
602 ** @@
603 ******************************************************************************/
604 
featCdFileOpen(const AjPStr dir,const char * name,AjPStr * fullname)605 static FeatPCdFile featCdFileOpen(const AjPStr dir, const char* name,
606                                   AjPStr* fullname)
607 {
608     FeatPCdFile thys = NULL;
609 
610 
611     AJNEW0(thys);
612 
613     thys->File = ajFileNewInNamePathC(name, dir);
614 
615     if(!thys->File)
616     {
617         AJFREE(thys);
618 
619         return NULL;
620     }
621 
622 
623     AJNEW0(thys->Header);
624 
625     featCdReadHeader(thys);
626     thys->NRecords = thys->Header->NRecords;
627     thys->RecSize = thys->Header->RecSize;
628 
629     ajStrAssignS(fullname, ajFileGetPrintnameS(thys->File));
630 
631     ajDebug("featCdFileOpen '%F' NRecords: %d RecSize: %d\n",
632             thys->File, thys->NRecords, thys->RecSize);
633 
634 
635     return thys;
636 }
637 
638 
639 
640 
641 /* @funcstatic featCdFileSeek *************************************************
642 **
643 ** Sets the file position in an EMBL CD-ROM index file.
644 **
645 ** @param [u] fil [FeatPCdFile] EMBL CD-ROM index file object.
646 ** @param [r] ipos [ajuint] Offset.
647 ** @return [ajint] Return value from the seek operation.
648 **
649 ** @release 6.5.0
650 ** @@
651 ******************************************************************************/
652 
653 
featCdFileSeek(FeatPCdFile fil,ajuint ipos)654 static ajint featCdFileSeek(FeatPCdFile fil, ajuint ipos)
655 {
656     ajint ret;
657     ajuint jpos;
658 
659     jpos = 300 + ipos*fil->RecSize;
660     ret = ajFileSeek(fil->File, jpos, 0);
661 
662     /*
663       ajDebug("featCdFileSeek rec %u pos %u tell %Ld returns %d\n",
664       ipos, jpos, ajFileResetPos(fil->File), ret);
665     */
666 
667     return ret;
668 }
669 
670 
671 
672 
673 /* @funcstatic featCdFileClose ************************************************
674 **
675 ** Closes an EMBL CD-ROM index file.
676 **
677 ** @param [d] pthis [FeatPCdFile*] EMBL CD-ROM index file.
678 ** @return [void]
679 **
680 ** @release 6.5.0
681 ** @@
682 ******************************************************************************/
683 
featCdFileClose(FeatPCdFile * pthis)684 static void featCdFileClose(FeatPCdFile* pthis)
685 {
686     FeatPCdFile thys;
687 
688     thys = *pthis;
689 
690     if(!thys)
691         return;
692 
693     ajDebug("featCdFileClose of %F\n", (*pthis)->File);
694 
695     ajFileClose(&thys->File);
696     AJFREE(thys->Header);
697     AJFREE(*pthis);
698 
699     return;
700 }
701 
702 
703 
704 
705 /* @funcstatic featCdIdxSearch ************************************************
706 **
707 ** Binary search through an EMBL CD-ROM index file for an exact match.
708 **
709 ** @param [u] idxLine [FeatPCdIdx] Index file record.
710 ** @param [r] entry [const AjPStr] Entry name to search for.
711 ** @param [u] fil [FeatPCdFile] EMBL CD-ROM index file.
712 ** @return [ajuint] Record number on success, -1 on failure.
713 **
714 ** @release 6.5.0
715 ** @@
716 ******************************************************************************/
717 
featCdIdxSearch(FeatPCdIdx idxLine,const AjPStr entry,FeatPCdFile fil)718 static ajuint featCdIdxSearch(FeatPCdIdx idxLine, const AjPStr entry,
719                               FeatPCdFile fil)
720 {
721     AjPStr entrystr = NULL;
722     ajint ihi;
723     ajint ilo;
724     ajint ipos = 0;
725     ajint icmp = 0;
726     char *name;
727 
728     ajStrAssignS(&entrystr, entry);
729     ajStrFmtUpper(&entrystr);
730 
731     ajDebug("featCdIdxSearch (entry '%S') records: %d\n",
732             entrystr, fil->NRecords);
733 
734     if(fil->NRecords < 1)
735         return -1;
736 
737     ilo = 0;
738     ihi = fil->NRecords - 1;
739 
740     while(ilo <= ihi)
741     {
742         ipos = (ilo + ihi)/2;
743         name = featCdIdxName(ipos, fil);
744         icmp = ajStrCmpC(entrystr, name);
745         ajDebug("idx test %u '%s' %2d (+/- %u)\n", ipos, name, icmp, ihi-ilo);
746 
747         if(!icmp)
748             break;
749 
750         if(icmp < 0)
751             ihi = ipos-1;
752         else
753             ilo = ipos+1;
754     }
755 
756     ajStrDel(&entrystr);
757 
758     if(icmp)
759         return -1;
760 
761     featCdIdxLine(idxLine, ipos, fil);
762 
763     return ipos;
764 }
765 
766 
767 
768 
769 /* @funcstatic featCdIdxQuery *************************************************
770 **
771 ** Binary search of an EMBL CD-ROM index file for entries matching a
772 ** wildcard entry name.
773 **
774 ** @param [u] qry [AjPQuery] Query object.
775 ** @param [r] idqry [const AjPStr] ID Query
776 ** @return [AjBool] ajTrue on success.
777 **
778 ** @release 6.5.0
779 ** @@
780 ******************************************************************************/
781 
featCdIdxQuery(AjPQuery qry,const AjPStr idqry)782 static AjBool featCdIdxQuery(AjPQuery qry, const AjPStr idqry)
783 {
784     FeatPCdQry qryd;
785 
786     AjPList list;
787     FeatPCdIdx idxLine;
788     FeatPCdFile fil;
789 
790     AjPStr idstr = NULL;
791     AjPStr idpref = NULL;
792     ajint ihi;
793     ajint ilo;
794     ajint ipos = 0;
795     ajint icmp;
796     char *name;
797     ajint i;
798     ajint ilen;
799     ajint jlo;
800     ajint jhi;
801     ajint khi;
802     AjBool first;
803     ajint ifail = 0;
804     ajint iskip = 0;
805 
806     FeatPCdEntry entry;
807 
808     qryd    = qry->QryData;
809     list    = qry->ResultsList;
810     idxLine = qryd->idxLine;
811     fil     = qryd->ifp;
812 
813     ajStrAssignS(&idstr,idqry);
814     ajStrFmtUpper(&idstr);
815     ajStrAssignS(&idpref, idstr);
816 
817     ajStrRemoveWild(&idpref);
818 
819     ajDebug("featCdIdxQuery (wild '%S' prefix '%S')\n",
820             idstr, idpref);
821 
822     jlo = ilo = 0;
823     khi = jhi = ihi = fil->NRecords-1;
824 
825     ilen = ajStrGetLen(idpref);
826     first = ajTrue;
827 
828     if(ilen)
829     {                          /* find first entry with this prefix */
830         while(ilo <= ihi)
831         {
832             ipos = (ilo + ihi)/2;
833             name = featCdIdxName(ipos, fil);
834             name[ilen] = '\0';
835             icmp = ajStrCmpC(idpref, name); /* test prefix */
836             ajDebug("idx test %d '%s' %2d (+/- %d)\n",
837                     ipos, name, icmp, ihi-ilo);
838 
839             if(!icmp)
840             {                        /* hit prefix - test for first */
841                 ajDebug("idx hit %d\n", ipos);
842 
843                 if(first)
844                 {
845                     jhi = ihi;
846                     first = ajFalse;
847                     khi = ipos;
848                 }
849 
850                 jlo = ipos;
851             }
852 
853             if(icmp > 0)
854                 ilo = ipos+1;
855             else
856                 ihi = ipos-1;
857         }
858 
859         if(first)
860         {                         /* failed to find any with prefix */
861             ajStrDel(&idstr);
862             ajStrDel(&idpref);
863 
864             return ajFalse;
865         }
866 
867         ajDebug("first pass: ipos %d jlo %d jhi %d\n", ipos, jlo, jhi);
868 
869         /* now search below for last */
870 
871         ilo = jlo+1;
872         ihi = jhi;
873 
874         while(ilo <= ihi)
875         {
876             ipos = (ilo + ihi)/2;
877             name = featCdIdxName(ipos, fil);
878             name[ilen] = '\0';
879             icmp = ajStrCmpC(idpref, name);
880             ajDebug("idx test %d '%s' %2d (+/- %d)\n",
881                     ipos, name, icmp, ihi-ilo);
882 
883             if(!icmp)
884             {                           /* hit prefix */
885                 ajDebug("idx hit %d\n", ipos);
886                 khi = ipos;
887             }
888 
889             if(icmp < 0)
890                 ihi = ipos-1;
891             else
892                 ilo = ipos+1;
893         }
894 
895         ajDebug("second pass: ipos %d jlo %d khi %d\n",
896                 ipos, jlo, khi);
897 
898         name = featCdIdxName(jlo, fil);
899         ajDebug("first  %d '%s'\n", jlo, name);
900         name = featCdIdxName(khi, fil);
901         ajDebug(" last  %d '%s'\n", khi, name);
902     }
903 
904     for(i=jlo; i < (khi+1); i++)
905     {
906         featCdIdxLine(idxLine, i, fil);
907 
908         if(ajStrMatchWildS(idxLine->EntryName, idstr))
909         {
910             if(!qryd->Skip[idxLine->DivCode-1])
911             {
912                 if(ifail)
913                 {
914                     ajDebug("FAIL: %d entries\n", ifail);
915                     ifail=0;
916                 }
917 
918                 if(iskip)
919                 {
920                     ajDebug("SKIP: %d entries\n", iskip);
921                     iskip=0;
922                 }
923 
924                 ajDebug("  OK: '%S'\n", idxLine->EntryName);
925                 AJNEW0(entry);
926                 entry->div = idxLine->DivCode;
927                 entry->annoff = idxLine->AnnOffset;
928                 entry->seqoff = idxLine->SeqOffset;
929                 ajListPushAppend(list, (void*)entry);
930             }
931             else
932             {
933                 ajDebug("SKIP: '%S' [file %d]\n",
934                         idxLine->EntryName, idxLine->DivCode);
935                 iskip++;
936             }
937         }
938         else
939         {
940             ++ifail;
941             /* ajDebug("FAIL: '%S' '%S'\n", idxLine->EntryName, idstr);*/
942         }
943     }
944 
945     if(ifail)
946     {
947         ajDebug("FAIL: %d entries\n", ifail);
948         ifail=0;
949     }
950 
951     if(iskip)
952     {
953         ajDebug("SKIP: %d entries\n", iskip);
954         ifail=0;
955     }
956 
957     ajStrDel(&idstr);
958     ajStrDel(&idpref);
959 
960     if(ajListGetLength(list))
961         return ajTrue;
962 
963     return ajFalse;
964 }
965 
966 
967 
968 
969 /* @funcstatic featCdTrgSearch ************************************************
970 **
971 ** Binary search of EMBL CD-ROM target file, for example an accession number
972 ** search.
973 **
974 ** @param [u] trgLine [FeatPCdTrg] Target file record.
975 ** @param [r] entry [const AjPStr] Entry name or accession number.
976 ** @param [u] fp [FeatPCdFile] EMBL CD-ROM target file
977 ** @return [ajuint] Record number, or -1 on failure.
978 **
979 ** @release 6.5.0
980 ** @@
981 ******************************************************************************/
982 
featCdTrgSearch(FeatPCdTrg trgLine,const AjPStr entry,FeatPCdFile fp)983 static ajuint featCdTrgSearch(FeatPCdTrg trgLine, const AjPStr entry,
984                               FeatPCdFile fp)
985 {
986     AjPStr entrystr = NULL;
987     ajint ihi;
988     ajint ilo;
989     ajint ipos;
990     ajint icmp;
991     ajint itry;
992     char *name;
993 
994     ajStrAssignS(&entrystr, entry);
995     ajStrFmtUpper(&entrystr);
996 
997     if(fp->NRecords < 1)
998         return -1;
999 
1000     ilo  = 0;
1001     ihi  = fp->NRecords;
1002     ipos = (ilo + ihi)/2;
1003     icmp = -1;
1004     ajDebug("featCdTrgSearch '%S' recSize: %d\n", entry, fp->RecSize);
1005     name = featCdTrgName(ipos, fp);
1006     icmp = ajStrCmpC(entrystr, name);
1007 
1008     ajDebug("trg testa %d '%s' %2d (+/- %d)\n", ipos, name, icmp, ihi-ilo);
1009 
1010     while(icmp)
1011     {
1012         if(icmp < 0)
1013             ihi = ipos;
1014         else
1015             ilo = ipos;
1016 
1017         itry = (ilo + ihi)/2;
1018 
1019         if(itry == ipos)
1020         {
1021             ajDebug("'%S' not found in .trg\n", entrystr);
1022             ajStrDel(&entrystr);
1023 
1024             return -1;
1025         }
1026 
1027         ipos = itry;
1028         name = featCdTrgName(ipos, fp);
1029         icmp = ajStrCmpC(entrystr, name);
1030         ajDebug("trg testb %d '%s' %2d (+/- %d)\n",
1031                 ipos, name, icmp, ihi-ilo);
1032     }
1033 
1034     featCdTrgLine(trgLine, ipos, fp);
1035 
1036     ajStrDel(&entrystr);
1037 
1038     if(!trgLine->NHits)
1039         return -1;
1040 
1041     ajDebug("found in .trg at record %d\n", ipos);
1042 
1043 
1044     return ipos;
1045 }
1046 
1047 
1048 
1049 
1050 /* @funcstatic featCdIdxName **************************************************
1051 **
1052 ** Reads the name from record ipos of an EMBL CD-ROM index file.
1053 ** The name length is known from the index file object.
1054 **
1055 ** @param [r] ipos [ajuint] Record number.
1056 ** @param [u] fil [FeatPCdFile] EMBL CD-ROM index file.
1057 ** @return [char*] Name read from file.
1058 **
1059 ** @release 6.5.0
1060 ** @@
1061 ******************************************************************************/
1062 
featCdIdxName(ajuint ipos,FeatPCdFile fil)1063 static char* featCdIdxName(ajuint ipos, FeatPCdFile fil)
1064 {
1065     ajuint nameSize;
1066 
1067     nameSize = fil->RecSize-10;
1068 
1069     if(featCdMaxNameSize < nameSize)
1070     {
1071         featCdMaxNameSize = nameSize;
1072         if(featCdName)
1073             ajCharDel(&featCdName);
1074         featCdName = ajCharNewRes(featCdMaxNameSize+1);
1075     }
1076 
1077     featCdFileSeek(fil, ipos);
1078     ajReadbinCharTrim(fil->File, nameSize, featCdName);
1079 
1080     return featCdName;
1081 }
1082 
1083 
1084 
1085 
1086 /* @funcstatic featCdIdxLine **************************************************
1087 **
1088 ** Reads a numbered record from an EMBL CD-ROM index file.
1089 **
1090 ** @param [u] idxLine [FeatPCdIdx] Index file record.
1091 ** @param [r] ipos [ajuint] Record number.
1092 ** @param [u] fil [FeatPCdFile] EMBL CD-ROM index file.
1093 ** @return [void]
1094 **
1095 ** @release 6.5.0
1096 ** @@
1097 ******************************************************************************/
1098 
featCdIdxLine(FeatPCdIdx idxLine,ajuint ipos,FeatPCdFile fil)1099 static void featCdIdxLine(FeatPCdIdx idxLine, ajuint ipos, FeatPCdFile fil)
1100 {
1101     ajuint nameSize;
1102 
1103     nameSize = fil->RecSize-10;
1104 
1105     ajDebug("featCdIdxLine nameSize: %u max: %u ipos: %u '%F'\n",
1106             nameSize, featCdMaxNameSize, ipos, fil->File);
1107 
1108     if(featCdMaxNameSize < nameSize)
1109     {
1110         featCdMaxNameSize = nameSize;
1111 
1112         if(featCdName)
1113             ajCharDel(&featCdName);
1114 
1115         featCdName = ajCharNewRes(featCdMaxNameSize+1);
1116     }
1117 
1118     featCdFileSeek(fil, ipos);
1119     ajReadbinCharTrim(fil->File, nameSize, featCdName);
1120 
1121     ajStrAssignC(&idxLine->EntryName,featCdName);
1122 
1123     ajReadbinUint(fil->File, &idxLine->AnnOffset);
1124     ajReadbinUint(fil->File, &idxLine->SeqOffset);
1125     ajReadbinUint2(fil->File, &idxLine->DivCode);
1126 
1127     ajDebug("read ann: %u seq: %u div: %u\n",
1128             idxLine->AnnOffset, idxLine->SeqOffset,
1129             (ajuint) idxLine->DivCode);
1130     return;
1131 }
1132 
1133 
1134 
1135 
1136 /* @funcstatic featCdTrgName **************************************************
1137 **
1138 ** Reads the target name from an EMBL CD-ROM index target file.
1139 **
1140 ** @param [r] ipos [ajuint] Record number.
1141 ** @param [u] fil [FeatPCdFile] EMBL CD-ROM index target file.
1142 ** @return [char*] Name.
1143 **
1144 ** @release 6.5.0
1145 ** @@
1146 ******************************************************************************/
1147 
featCdTrgName(ajuint ipos,FeatPCdFile fil)1148 static char* featCdTrgName(ajuint ipos, FeatPCdFile fil)
1149 {
1150     ajuint nameSize;
1151     ajint i;
1152 
1153     nameSize = fil->RecSize-8;
1154 
1155     if(featCdMaxNameSize < nameSize)
1156     {
1157         featCdMaxNameSize = nameSize;
1158 
1159         if(featCdName)
1160             ajCharDel(&featCdName);
1161 
1162         featCdName = ajCharNewRes(featCdMaxNameSize+1);
1163     }
1164 
1165     featCdFileSeek(fil, ipos);
1166     ajReadbinInt(fil->File, &i);
1167     ajReadbinInt(fil->File, &i);
1168     ajReadbinCharTrim(fil->File, nameSize, featCdName);
1169 
1170     ajDebug("featCdTrgName maxNameSize:%d nameSize:%d name '%s'\n",
1171             featCdMaxNameSize, nameSize, featCdName);
1172 
1173     return featCdName;
1174 }
1175 
1176 
1177 
1178 
1179 /* @funcstatic featCdTrgLine **************************************************
1180 **
1181 ** Reads a line from an EMBL CD-ROM index target file.
1182 **
1183 ** @param [w] trgLine [FeatPCdTrg] Target file record.
1184 ** @param [r] ipos [ajuint] Record number.
1185 ** @param [u] fil [FeatPCdFile] EMBL CD-ROM index target file.
1186 ** @return [void].
1187 **
1188 ** @release 6.5.0
1189 ** @@
1190 ******************************************************************************/
1191 
featCdTrgLine(FeatPCdTrg trgLine,ajuint ipos,FeatPCdFile fil)1192 static void featCdTrgLine(FeatPCdTrg trgLine, ajuint ipos, FeatPCdFile fil)
1193 {
1194     ajuint nameSize;
1195 
1196     nameSize = fil->RecSize-8;
1197 
1198     if(featCdMaxNameSize < nameSize)
1199     {
1200         featCdMaxNameSize = nameSize;
1201 
1202         if(featCdName)
1203             ajCharDel(&featCdName);
1204 
1205         featCdName = ajCharNewRes(featCdMaxNameSize+1);
1206     }
1207 
1208     featCdFileSeek(fil, ipos);
1209 
1210     ajReadbinUint(fil->File, &trgLine->NHits);
1211     ajReadbinUint(fil->File, &trgLine->FirstHit);
1212     ajReadbinCharTrim(fil->File, nameSize, featCdName);
1213 
1214     ajStrAssignC(&trgLine->Target,featCdName);
1215 
1216     ajDebug("featCdTrgLine %d nHits %d firstHit %d target '%S'\n",
1217             ipos, trgLine->NHits, trgLine->FirstHit, trgLine->Target);
1218 
1219     return;
1220 }
1221 
1222 
1223 
1224 
1225 /* @funcstatic featCdReadHeader ***********************************************
1226 **
1227 ** Reads the header of an EMBL CD-ROM index file.
1228 **
1229 ** @param [u] fil [FeatPCdFile] EMBL CD-ROM index file.
1230 ** @return [AjBool] ajTrue on success.
1231 **
1232 ** @release 6.5.0
1233 ** @@
1234 ******************************************************************************/
1235 
featCdReadHeader(FeatPCdFile fil)1236 static AjBool featCdReadHeader(FeatPCdFile fil)
1237 {
1238     ajint i;
1239 
1240     FeatPCdFHeader header;
1241     char date[8]; /* ajReadbinCharTrim needs space for trailing null */
1242 
1243     header = fil->Header;
1244 
1245     ajReadbinUint(fil->File, &header->FileSize);
1246     ajReadbinUint(fil->File, &header->NRecords);
1247     ajReadbinInt2(fil->File, &header->RecSize);
1248 
1249     header->IdSize = header->RecSize - 10;
1250 
1251     ajReadbinCharTrim(fil->File, 20, header->DbName);
1252     ajReadbinCharTrim(fil->File, 10, header->Release);
1253 
1254     ajReadbinCharTrim(fil->File, 4, date);
1255 
1256     for(i=1;i<4;i++)
1257         header->Date[i] = date[i];
1258 
1259     header->RelYear  = header->Date[1];
1260     header->RelMonth = header->Date[2];
1261     header->RelDay   = header->Date[3];
1262 
1263     ajDebug("featCdReadHeader file %F\n", fil->File);
1264     ajDebug("  FileSize: %d NRecords: %hd recsize: %d idsize: %d\n",
1265             header->FileSize, header->NRecords,
1266             header->RecSize, header->IdSize);
1267 
1268     return ajTrue;
1269 }
1270 
1271 
1272 
1273 
1274 /* @funcstatic featCdTrgOpen **************************************************
1275 **
1276 ** Opens an EMBL CD-ROM target file pair.
1277 **
1278 ** @param [r] dir [const AjPStr] Directory.
1279 ** @param [r] name [const char*] File name.
1280 ** @param [w] trgfil [FeatPCdFile*] Target file.
1281 ** @param [w] hitfil [FeatPCdFile*] Hit file.
1282 ** @return [AjBool] ajTrue on success.
1283 **
1284 ** @release 6.5.0
1285 ** @@
1286 ******************************************************************************/
1287 
featCdTrgOpen(const AjPStr dir,const char * name,FeatPCdFile * trgfil,FeatPCdFile * hitfil)1288 static AjBool featCdTrgOpen(const AjPStr dir, const char* name,
1289                             FeatPCdFile* trgfil, FeatPCdFile* hitfil)
1290 {
1291     AjPStr tmpname  = NULL;
1292     AjPStr fullname = NULL;
1293 
1294     ajDebug("featCdTrgOpen dir '%S' name '%s'\n",
1295             dir, name);
1296 
1297     ajFmtPrintS(&tmpname, "%s.trg",name);
1298     *trgfil = featCdFileOpen(dir, ajStrGetPtr(tmpname), &fullname);
1299     ajStrDel(&tmpname);
1300 
1301     if(!*trgfil)
1302         return ajFalse;
1303 
1304     ajFmtPrintS(&tmpname, "%s.hit",name);
1305     *hitfil = featCdFileOpen(dir, ajStrGetPtr(tmpname), &fullname);
1306     ajStrDel(&tmpname);
1307     ajStrDel(&fullname);
1308 
1309     if(!*hitfil)
1310         return ajFalse;
1311 
1312     return ajTrue;
1313 }
1314 
1315 
1316 
1317 
1318 /* @funcstatic featCdTrgClose *************************************************
1319 **
1320 ** Close an EMBL CD-ROM target file pair.
1321 **
1322 ** @param [w] ptrgfil [FeatPCdFile*] Target file.
1323 ** @param [w] phitfil [FeatPCdFile*] Hit file.
1324 ** @return [AjBool] ajTrue on success.
1325 **
1326 ** @release 6.5.0
1327 ** @@
1328 ******************************************************************************/
1329 
featCdTrgClose(FeatPCdFile * ptrgfil,FeatPCdFile * phitfil)1330 static AjBool featCdTrgClose(FeatPCdFile* ptrgfil, FeatPCdFile* phitfil)
1331 {
1332     featCdFileClose(ptrgfil);
1333     featCdFileClose(phitfil);
1334 
1335     return ajTrue;
1336 }
1337 
1338 
1339 
1340 
1341 /* @section GCG Database Indexing *********************************************
1342 **
1343 ** These functions manage the GCG index access methods.
1344 **
1345 ******************************************************************************/
1346 
1347 
1348 
1349 
1350 /* @funcstatic featAccessGcg ***************************************************
1351 **
1352 ** Reads feature(s) from a GCG formatted database, using EMBLCD index
1353 ** files. Returns with the file pointer set to the position in the
1354 ** sequence file and reference files.
1355 **
1356 ** @param [u] fttabin [AjPFeattabin] Feature table input.
1357 ** @return [AjBool] ajTrue on success.
1358 **
1359 ** @release 6.5.0
1360 ** @@
1361 ******************************************************************************/
1362 
featAccessGcg(AjPFeattabin fttabin)1363 static AjBool featAccessGcg(AjPFeattabin fttabin)
1364 {
1365     AjBool retval = ajFalse;
1366     AjPQuery qry;
1367     FeatPCdQry qryd;
1368 
1369     ajDebug("featAccessGcg type %d\n", fttabin->Input->Query->QueryType);
1370 
1371     qry  = fttabin->Input->Query;
1372     qryd = qry->QryData;
1373 
1374     if(qry->QueryType == AJQUERY_ALL)
1375     {
1376         retval = featGcgAll(fttabin);
1377 
1378         return retval;
1379     }
1380 
1381     /* we need to search the index files and return a query */
1382 
1383     if(qry->QryData)
1384     {                                /* reuse unfinished query data */
1385         if(!featCdQryReuse(qry))
1386             return ajFalse;
1387     }
1388     else
1389     {
1390         fttabin->Input->Single = ajTrue;
1391 
1392         if(!featCdQryOpen(qry))
1393         {
1394             ajWarn("Failed to open index for database '%S'",
1395                    qry->DbName);
1396 
1397             return ajFalse;
1398         }
1399 
1400         qryd = qry->QryData;
1401         ajFilebuffDel(&fttabin->Input->Filebuff);
1402         fttabin->Input->Filebuff = ajFilebuffNewNofile();
1403 
1404         /* binary search for the entryname we need */
1405 
1406         if(qry->QueryType == AJQUERY_ENTRY)
1407         {
1408             ajDebug("entry fields: %Lu hasacc:%B\n",
1409                     ajListGetLength(qry->QueryFields), qry->HasAcc);
1410 
1411             if(!featCdQryEntry(qry))
1412                 ajDebug("GCG Entry failed\n");
1413         }
1414 
1415         if(qry->QueryType == AJQUERY_QUERY)
1416         {
1417             ajDebug("query fields: %Lu hasacc:%B\n",
1418                     ajListGetLength(qry->QueryFields), qry->HasAcc);
1419             if(!featCdQryQuery(qry))
1420                 ajDebug("GCG Query failed\n");
1421         }
1422 
1423         AJFREE(qryd->trgLine);
1424     }
1425 
1426     if(ajListGetLength(qry->ResultsList))
1427     {
1428         retval = featCdQryNext(qry);
1429 
1430         if(retval)
1431             featGcgLoadBuff(fttabin);
1432     }
1433 
1434     if(!ajListGetLength(qry->ResultsList))
1435     {
1436         ajFileClose(&qryd->libr);
1437         ajFileClose(&qryd->libs);
1438         featCdQryClose(qry);
1439     }
1440 
1441     if(retval)
1442         ajStrAssignS(&fttabin->Input->Db, qry->DbName);
1443 
1444     return retval;
1445 }
1446 
1447 
1448 
1449 
1450 /* @funcstatic featGcgLoadBuff ************************************************
1451 **
1452 ** Copies text data to a buffered file, and feature data for an
1453 ** AjPFeattabin internal data structure for reading later
1454 **
1455 ** @param [u] fttabin [AjPFeattabin] Feature table input object
1456 ** @return [void]
1457 **
1458 ** @release 6.5.0
1459 ** @@
1460 ******************************************************************************/
1461 
featGcgLoadBuff(AjPFeattabin fttabin)1462 static void featGcgLoadBuff(AjPFeattabin fttabin)
1463 {
1464     AjPQuery qry;
1465     FeatPCdQry qryd;
1466 
1467     qry  = fttabin->Input->Query;
1468     qryd = qry->QryData;
1469 
1470     if(!qry->QryData)
1471         ajFatal("featGcgLoadBuff Query Data not initialised");
1472 
1473     /* copy all the ref data */
1474 
1475     featGcgReadRef(fttabin);
1476 
1477     /* skip the sequence (do we care about the format?) */
1478     featGcgReadSeq(fttabin);
1479 
1480     /* ajFilebuffTraceFull(fttabin->Input->Filebuff, 9999, 100); */
1481 
1482     if(!qryd->libr)
1483     {
1484         ajFileClose(&qryd->libs);
1485         ajDebug("featGcgLoadBuff: closed files\n");
1486     }
1487 
1488     return;
1489 }
1490 
1491 
1492 
1493 
1494 /* @funcstatic featGcgReadRef *************************************************
1495 **
1496 ** Copies text data to a buffered file for reading later
1497 **
1498 ** @param [u] fttabin [AjPFeattabin] Feature table input object
1499 ** @return [AjBool] ajTrue on success
1500 **
1501 ** @release 6.5.0
1502 ** @@
1503 ******************************************************************************/
1504 
featGcgReadRef(AjPFeattabin fttabin)1505 static AjBool featGcgReadRef(AjPFeattabin fttabin)
1506 {
1507     AjPStr line = NULL;
1508     AjPQuery qry;
1509     FeatPCdQry qryd;
1510     ajlong rpos;
1511     AjPStr id       = NULL;
1512     AjPStr idc      = NULL;
1513     AjBool ispir           = ajFalse;
1514     AjBool continued       = ajFalse;
1515     AjBool testcontinue    = ajFalse;
1516     char *p = NULL;
1517 
1518     qry  = fttabin->Input->Query;
1519     qryd = qry->QryData;
1520 
1521     if(!featRegGcgRefId)
1522         featRegGcgRefId =ajRegCompC("^>...([^ \n]+)");
1523 
1524     if(!featRegGcgSplit)
1525         featRegGcgSplit =ajRegCompC("_0+$");
1526 
1527     if(!ajReadline(qryd->libr, &line))  /* end of file */
1528         return ajFalse;
1529 
1530     if(ajStrGetCharFirst(line) != '>')  /* not start of entry */
1531         ajFatal("featGcgReadRef bad entry start:\n'%S'", line);
1532 
1533     if(ajStrGetCharPos(line, 3) == ';') /* PIR entry */
1534         ispir = ajTrue;
1535 
1536     if(ispir)
1537         ajFilebuffLoadS(fttabin->Input->Filebuff, line);
1538 
1539     if(ajRegExec(featRegGcgRefId, line))
1540     {
1541         continued = ajFalse;
1542         ajRegSubI(featRegGcgRefId, 1, &id);
1543 
1544         if(ajRegExec(featRegGcgSplit, id))
1545         {
1546             continued = ajTrue;
1547             p = ajStrGetuniquePtr(&id);
1548             p = strrchr(p,(ajint)'_');
1549             *(++p)='\0';
1550             ajStrSetValid(&id);
1551         }
1552     }
1553     else
1554     {
1555         ajDebug("featGcgReadRef bad ID line\n'%S'\n", line);
1556         ajFatal("featGcgReadRef bad ID line\n'%S'\n", line);
1557     }
1558 
1559     if(!ajReadline(qryd->libr, &line))  /* blank desc line */
1560     {
1561         ajStrDel(&id);
1562 
1563         return ajFalse;
1564     }
1565 
1566     if(ispir)
1567         ajFilebuffLoadS(fttabin->Input->Filebuff, line);
1568 
1569     rpos = ajFileResetPos(qryd->libr);
1570 
1571     while(ajReadline(qryd->libr, &line))
1572     {                                   /* end of file */
1573         if(ajStrGetCharFirst(line) == '>')
1574         {                               /* start of next entry */
1575             /* skip over split entries so it can be used for "all" */
1576 
1577             if(continued)
1578             {
1579                 testcontinue=ajTrue;
1580                 ajRegExec(featRegGcgRefId, line);
1581                 ajRegSubI(featRegGcgRefId, 1, &idc);
1582 
1583                 if(!ajStrPrefixS(idc,id))
1584                 {
1585                     ajFileSeek(qryd->libr, rpos, 0);
1586                     ajStrDel(&line);
1587                     ajStrDel(&id);
1588                     ajStrDel(&idc);
1589 
1590                     return ajTrue;
1591                 }
1592             }
1593             else
1594             {
1595                 ajFileSeek(qryd->libr, rpos, 0);
1596                 ajStrDel(&line);
1597                 ajStrDel(&id);
1598                 ajStrDel(&idc);
1599 
1600                 return ajTrue;
1601             }
1602         }
1603 
1604         rpos = ajFileResetPos(qryd->libr);
1605 
1606         if(!testcontinue)
1607         {
1608             ajStrExchangeCC(&line, ". .", "..");
1609             ajFilebuffLoadS(fttabin->Input->Filebuff, line);
1610         }
1611     }
1612 
1613 
1614     /* at end of file */
1615 
1616     ajFileClose(&qryd->libr);
1617 
1618     ajStrDel(&line);
1619     ajStrDel(&id);
1620     ajStrDel(&idc);
1621 
1622     return ajTrue;
1623 }
1624 
1625 
1626 
1627 
1628 /* @funcstatic featGcgReadSeq *************************************************
1629 **
1630 ** Skips unwanted sequence data so file is at start of next entry.
1631 **
1632 ** @param [u] fttabin [AjPFeattabin] Feature table input object
1633 ** @return [AjBool] ajTrue on success
1634 **
1635 ** @release 6.5.0
1636 ** @@
1637 ******************************************************************************/
1638 
featGcgReadSeq(AjPFeattabin fttabin)1639 static AjBool featGcgReadSeq(AjPFeattabin fttabin)
1640 {
1641     AjPStr line = NULL;
1642     AjPQuery qry;
1643     FeatPCdQry qryd;
1644     AjPStr gcgtype    = NULL;
1645     AjPStr tmpstr     = NULL;
1646     AjPStr dstr       = NULL;
1647     AjPStr id         = NULL;
1648     AjPStr idc        = NULL;
1649     AjPStr contseq    = NULL;
1650 
1651     ajint gcglen;
1652     ajint pos;
1653     ajint rblock;
1654     ajlong spos;
1655     AjBool ispir     = ajFalse;
1656     char *p = NULL;
1657     AjBool continued = ajFalse;
1658 
1659     qry  = fttabin->Input->Query;
1660     qryd = qry->QryData;
1661 
1662     if(!featRegGcgId)
1663     {
1664         featRegGcgId =ajRegCompC("^>...([^ ]+) +([^ ]+) +(Dummy Header|[^ ]+)"
1665                                 " +([^ ]+) +([0-9]+)");
1666         featRegGcgId2=ajRegCompC("^>[PF]1;([^ ]+)");
1667     }
1668 
1669     if(!featRegGcgSplit)
1670         featRegGcgSplit =ajRegCompC("_0+$");
1671 
1672     ajDebug("featGcgReadSeq pos: %Ld\n", ajFileResetPos(qryd->libs));
1673 
1674     if(!ajReadline(qryd->libs, &line))  /* end of file */
1675         return ajFalse;
1676 
1677     ajDebug("test ID line\n'%S'\n", line);
1678 
1679     if(ajRegExec(featRegGcgId, line))
1680     {
1681         continued = ajFalse;
1682         ajRegSubI(featRegGcgId, 3, &gcgtype);
1683         ajRegSubI(featRegGcgId, 5, &tmpstr);
1684         ajRegSubI(featRegGcgId, 1, &id);
1685 
1686         if(ajRegExec(featRegGcgSplit, id))
1687         {
1688             continued = ajTrue;
1689             p = ajStrGetuniquePtr(&id);
1690             p = strrchr(p,(ajint)'_');
1691             *(++p)='\0';
1692             ajStrSetValid(&id);
1693 
1694             if(!contseq)
1695                 contseq = ajStrNew();
1696 
1697             if(!dstr)
1698                 dstr = ajStrNew();
1699         }
1700 
1701         ajStrToInt(tmpstr, &gcglen);
1702     }
1703     else if(ajRegExec(featRegGcgId2, line))
1704     {
1705         ajStrAssignC(&gcgtype, "ASCII");
1706         ajRegSubI(featRegGcgId, 1, &tmpstr);
1707         ispir = ajTrue;
1708     }
1709     else
1710     {
1711         ajDebug("featGcgReadSeq bad ID line\n'%S'\n", line);
1712         ajFatal("featGcgReadSeq bad ID line\n'%S'\n", line);
1713 
1714         return ajFalse;
1715     }
1716 
1717     if(!ajReadline(qryd->libs, &line))  /* desc line */
1718         return ajFalse;
1719 
1720     /*
1721     ** need to pick up the length and type, and read to the end of sequence
1722     ** see fasta code to get a real sequence for this
1723     ** Also need to handle split entries and go find the rest
1724     */
1725 
1726     if(ispir)
1727     {
1728         spos = ajFileResetPos(qryd->libs);
1729 
1730         while(ajReadline(qryd->libs, &line))
1731         {                               /* end of file */
1732             if(ajStrGetCharFirst(line) == '>')
1733             {                           /* start of next entry */
1734                 ajFileSeek(qryd->libs, spos, 0);
1735                 break;
1736             }
1737 
1738             spos = ajFileResetPos(qryd->libs);
1739             ajFilebuffLoadS(fttabin->Input->Filebuff, line);
1740         }
1741     }
1742     else
1743     {
1744         ajStrSetRes(&contseq, gcglen+3);
1745         rblock = gcglen;
1746 
1747         if(ajStrGetCharFirst(gcgtype) == '2')
1748             rblock = (rblock+3)/4;
1749 
1750         if(!ajReadbinBinary(qryd->libs, rblock, 1,
1751                             ajStrGetuniquePtr(&contseq)))
1752             ajFatal("error reading file %F", qryd->libs);
1753 
1754         /* convert 2bit to ascii */
1755         if(ajStrGetCharFirst(gcgtype) == '2')
1756             featGcgBinDecode(&contseq, gcglen);
1757         else if(ajStrGetCharFirst(gcgtype) == 'A')
1758         {
1759             /* are seq chars OK? */
1760             ajStrSetValidLen(&contseq, gcglen);
1761         }
1762         else
1763         {
1764             ajRegSubI(featRegGcgId, 1, &tmpstr);
1765             ajFatal("Unknown GCG entry type '%S', entry name '%S'",
1766                     gcgtype, tmpstr);
1767         }
1768 
1769         if(!ajReadline(qryd->libs, &line)) /* newline at end */
1770             ajFatal("error reading file %F", qryd->libs);
1771         ajStrDel(&contseq);
1772 
1773         if(continued)
1774         {
1775             spos = ajFileResetPos(qryd->libs);
1776 
1777             while(ajReadline(qryd->libs,&line))
1778             {
1779                 ajRegExec(featRegGcgId, line);
1780                 ajRegSubI(featRegGcgId, 5, &tmpstr);
1781                 ajRegSubI(featRegGcgId, 1, &idc);
1782 
1783                 if(!ajStrPrefixS(idc,id))
1784                 {
1785                     ajFileSeek(qryd->libs, spos, 0);
1786                     break;
1787                 }
1788 
1789                 ajStrToInt(tmpstr, &gcglen);
1790 
1791                 if(!ajReadline(qryd->libs, &dstr)) /* desc line */
1792                     return ajFalse;
1793 
1794                 ajStrSetRes(&contseq, gcglen+3);
1795 
1796                 rblock = gcglen;
1797                 if(ajStrGetCharFirst(gcgtype) == '2')
1798                     rblock = (rblock+3)/4;
1799 
1800                 if(!ajReadbinBinary(qryd->libs, rblock, 1,
1801                                     ajStrGetuniquePtr(&contseq)))
1802                     ajFatal("error reading file %F", qryd->libs);
1803 
1804                 /* convert 2bit to ascii */
1805                 if(ajStrGetCharFirst(gcgtype) == '2')
1806                     featGcgBinDecode(&contseq, gcglen);
1807                 else if(ajStrGetCharFirst(gcgtype) == 'A')
1808                 {
1809                     /* are seq chars OK? */
1810                     ajStrSetValidLen(&contseq, gcglen);
1811                 }
1812                 else
1813                 {
1814                     ajRegSubI(featRegGcgId, 1, &tmpstr);
1815                     ajFatal("Unknown GCG entry: name '%S'",
1816                             tmpstr);
1817                 }
1818 
1819                 if(!ajReadline(qryd->libs, &line)) /* newline at end */
1820                     ajFatal("error reading file %F", qryd->libs);
1821 
1822                 if(!featRegGcgCont)
1823                     featRegGcgCont = ajRegCompC("^([^ ]+) +([^ ]+) +([^ ]+) +"
1824                                                "([^ ]+) +([^ ]+) +([^ ]+) "
1825                                                "+([^ ]+) +"
1826                                                "([^ ]+) +([0-9]+)");
1827 
1828                 ajRegExec(featRegGcgCont, dstr);
1829                 ajRegSubI(featRegGcgCont, 9, &tmpstr);
1830                 ajStrToInt(tmpstr, &pos);
1831                 /*seqin->Inseq->Len = pos-1;*/
1832 
1833                 /*ajStrAppendS(&seqin->Inseq,contseq);*/
1834                 spos = ajFileResetPos(qryd->libs);
1835             }
1836         }
1837     }
1838 
1839     ajStrDel(&line);
1840     ajStrDel(&gcgtype);
1841     ajStrDel(&tmpstr);
1842     ajStrDel(&dstr);
1843     ajStrDel(&id);
1844     ajStrDel(&idc);
1845     ajStrDel(&contseq);
1846 
1847     return ajTrue;
1848 }
1849 
1850 
1851 
1852 
1853 /* @funcstatic featGcgBinDecode ***********************************************
1854 **
1855 ** Convert GCG binary to ASCII sequence.
1856 **
1857 ** @param [u] pthis [AjPStr*] Binary string
1858 ** @param [r] sqlen [ajuint] Expected sequence length
1859 ** @return [void]
1860 **
1861 ** @release 6.5.0
1862 ** @@
1863 ******************************************************************************/
1864 
featGcgBinDecode(AjPStr * pthis,ajuint sqlen)1865 static void featGcgBinDecode(AjPStr *pthis, ajuint sqlen)
1866 {
1867     char* seqp;
1868     char* cp;
1869     char* start;
1870     const char* gcgbton="CTAG";
1871     char stmp;
1872     ajint rdlen;
1873 
1874     start = ajStrGetuniquePtr(pthis);
1875     rdlen = (sqlen+3)/4;
1876 
1877     seqp = start + rdlen;
1878     cp = start + 4*rdlen;
1879 
1880     ajDebug("seqp:%x start:%x cp:%x sqlen:%d len:%d size:%d (seqp-start):%d\n",
1881             seqp, start, cp, sqlen,
1882             ajStrGetLen(*pthis), ajStrGetRes(*pthis),
1883             (seqp - start));
1884 
1885     while(seqp > start)
1886     {
1887         stmp = *--seqp;
1888         *--cp = gcgbton[stmp&3];
1889         *--cp = gcgbton[(stmp >>= 2)&3];
1890         *--cp = gcgbton[(stmp >>= 2)&3];
1891         *--cp = gcgbton[(stmp >>= 2)&3];
1892     }
1893 
1894     start[sqlen] = '\0';
1895     ajStrSetValidLen(pthis, sqlen);
1896 
1897     return;
1898 }
1899 
1900 
1901 
1902 
1903 /* @funcstatic featGcgAll *****************************************************
1904 **
1905 ** Opens the first or next GCG file for further reading
1906 **
1907 ** @param [u] fttabin [AjPFeattabin] Feature table input.
1908 ** @return [AjBool] ajTrue on success.
1909 **
1910 ** @release 6.5.0
1911 ** @@
1912 ******************************************************************************/
1913 
featGcgAll(AjPFeattabin fttabin)1914 static AjBool featGcgAll(AjPFeattabin fttabin)
1915 {
1916     AjPQuery qry;
1917     FeatPCdQry qryd;
1918 
1919     qry = fttabin->Input->Query;
1920     qryd = qry->QryData;
1921 
1922     ajDebug("featGcgAll\n");
1923 
1924     if(!qry->QryData)
1925     {
1926         ajDebug("featGcgAll initialising\n");
1927         fttabin->Input->Single = ajTrue;
1928 
1929         if(!featCdQryOpen(qry))
1930         {
1931             ajErr("featGcgAll failed");
1932 
1933             return ajFalse;
1934         }
1935     }
1936 
1937     qryd = qry->QryData;
1938     ajFilebuffDel(&fttabin->Input->Filebuff);
1939     fttabin->Input->Filebuff = ajFilebuffNewNofile();
1940 
1941     if(!qryd->libr)
1942     {
1943         if(!featCdDivNext(qry))
1944         {
1945             featCdQryClose(qry);
1946             ajDebug("featGcgAll finished\n");
1947 
1948             return ajFalse;
1949         }
1950 
1951         if(!featCdQryFile(qry))
1952         {
1953             ajErr("featGcgAll out of data");
1954 
1955             return ajFalse;
1956         }
1957 
1958         ajDebug("featCdQryOpen processing file %2d '%F'\n", qryd->div,
1959                 qryd->libr);
1960         if(qryd->libs)
1961             ajDebug("               sequence file    '%F'\n", qryd->libs);
1962     }
1963 
1964     featGcgLoadBuff(fttabin);
1965 
1966     if(!qry->CaseId)
1967         qry->QryDone = ajTrue;
1968 
1969     return ajTrue;
1970 }
1971 
1972 
1973 
1974 
1975 /* @funcstatic featCdDivNext **************************************************
1976 **
1977 ** Sets the division count to the next included file. We need the division
1978 ** file to be already open.
1979 **
1980 ** @param [u] qry [AjPQuery] query object.
1981 ** @return [ajuint] File number (starting at 1) or zero if all files are done.
1982 **
1983 ** @release 6.5.0
1984 ** @@
1985 ******************************************************************************/
1986 
featCdDivNext(AjPQuery qry)1987 static ajuint featCdDivNext(AjPQuery qry)
1988 {
1989     FeatPCdQry qryd;
1990     AjPStr fullName = NULL;
1991     ajuint i;
1992 
1993     qryd = qry->QryData;
1994 
1995     ajDebug("featCdDivNext div: %d dfp: %x nameSize: %d name '%s'\n",
1996             qryd->div, qryd->maxdiv, qryd->nameSize, qryd->name);
1997 
1998     for(i=qryd->div; i < qryd->maxdiv; i++)
1999         if(!qryd->Skip[i])
2000         {
2001             qryd->div = i+1;
2002             ajDebug("next file is %d '%S'\n", qryd->div, fullName);
2003             return qryd->div;
2004         }
2005         else
2006             ajDebug("skip %d  '%S'\n", (i+1), fullName);
2007 
2008     return 0;
2009 }
2010 
2011 
2012 
2013 
2014 /* @funcstatic featCdQryFile **************************************************
2015 **
2016 ** Opens a specific file number for an EMBLCD index
2017 **
2018 ** @param [u] qry [AjPQuery] Query data
2019 ** @return [AjBool] ajTrue on success
2020 **
2021 ** @release 6.5.0
2022 ** @@
2023 ******************************************************************************/
2024 
featCdQryFile(AjPQuery qry)2025 static AjBool featCdQryFile(AjPQuery qry)
2026 {
2027     FeatPCdQry qryd;
2028     short j;
2029 
2030     if(!featCdDivExp)
2031         featCdDivExp = ajRegCompC("^([^ ]+)( +([^ ]+))?");
2032 
2033     ajDebug("featCdQryFile qry %x\n",qry);
2034     qryd = qry->QryData;
2035     ajDebug("featCdQryFile qryd %x\n",qryd);
2036     ajDebug("featCdQryFile %F\n",qryd->dfp->File);
2037 
2038     featCdFileSeek(qryd->dfp, (qryd->div - 1));
2039 
2040     /* note - we must not use featCdFileReadName - we need spaces for GCG */
2041 
2042     ajReadbinInt2(qryd->dfp->File, &j);
2043 
2044     ajReadbinChar(qryd->dfp->File, qryd->nameSize, qryd->name);
2045     ajDebug("DivCode: %d, code: %2hd '%s'\n",
2046             qryd->div, j, qryd->name);
2047 
2048     /**ajCharFmtLower(qryd->name);**/
2049     if(!ajRegExecC(featCdDivExp, qryd->name))
2050     {
2051         ajErr("index division file error '%S'", qryd->name);
2052 
2053         return ajFalse;
2054     }
2055 
2056     ajRegSubI(featCdDivExp, 1, &qryd->datfile);
2057     ajRegSubI(featCdDivExp, 3, &qryd->seqfile);
2058     ajDebug("File(s) '%S' '%S'\n", qryd->datfile, qryd->seqfile);
2059 
2060     ajFileClose(&qryd->libr);
2061     qryd->libr = ajFileNewInNamePathS(qryd->datfile, qry->Directory);
2062 
2063     if(!qryd->libr)
2064     {
2065         ajErr("Cannot open database file '%S' for database '%S'",
2066 	      qryd->datfile, qry->DbName);
2067 
2068         return ajFalse;
2069     }
2070 
2071     if(ajStrGetLen(qryd->seqfile))
2072     {
2073         ajFileClose(&qryd->libs);
2074         qryd->libs = ajFileNewInNamePathS(qryd->seqfile, qry->Directory);
2075 
2076         if(!qryd->libs)
2077         {
2078             ajErr("Cannot open sequence file '%S' for database '%S'",
2079 		  qryd->seqfile, qry->DbName);
2080 
2081             return ajFalse;
2082         }
2083     }
2084     else
2085         qryd->libs = NULL;
2086 
2087     return ajTrue;
2088 }
2089 
2090 
2091 
2092 
2093 /* @funcstatic featCdTrgQuery *************************************************
2094 **
2095 ** Binary search of an EMBL CD-ROM index file for entries matching a
2096 ** wildcard query.
2097 **
2098 ** Where more than one query field is defined (usually acc and sv) it
2099 ** can test all and append to a single list.
2100 **
2101 ** @param [u] qry [AjPQuery] Query object.
2102 ** @param [r] field [const AjPStr] Query field
2103 ** @param [r] wildqry [const AjPStr] Query string
2104 ** @return [AjBool] ajTrue on success.
2105 **
2106 ** @release 6.5.0
2107 ** @@
2108 ******************************************************************************/
2109 
featCdTrgQuery(AjPQuery qry,const AjPStr field,const AjPStr wildqry)2110 static AjBool featCdTrgQuery(AjPQuery qry, const AjPStr field,
2111                              const AjPStr wildqry)
2112 {
2113     ajint ret=0;
2114 
2115     if(ajStrMatchC(field, "org"))
2116         ret += featCdTrgFind(qry, "taxon", wildqry);
2117 
2118     if(ajStrMatchC(field, "key"))
2119         ret += featCdTrgFind(qry, "keyword", wildqry);
2120 
2121     if(ajStrMatchC(field, "des"))
2122         ret += featCdTrgFind(qry, "des", wildqry);
2123 
2124     if(ajStrMatchC(field, "sv"))
2125         ret += featCdTrgFind(qry, "seqvn", wildqry);
2126 
2127     if(ajStrMatchC(field, "gi"))
2128         ret += featCdTrgFind(qry, "gi", wildqry);
2129 
2130     if(qry->HasAcc && ajStrMatchC(field, "acc"))
2131         ret += featCdTrgFind(qry, "acnum", wildqry);
2132 
2133 
2134     if(ret)
2135         return ajTrue;
2136 
2137     return ajFalse;
2138 }
2139 
2140 
2141 
2142 
2143 /* @funcstatic featCdTrgFind **************************************************
2144 **
2145 ** Binary search of an EMBL CD-ROM index file for entries matching a
2146 ** wildcard query.
2147 **
2148 ** Where more than one query field is defined (usually acc and sv) it
2149 ** can test all and append to a single list.
2150 **
2151 ** @param [u] qry [AjPQuery] Query object.
2152 ** @param [r] indexname [const char*] Index name.
2153 ** @param [r] queryName [const AjPStr] Query string.
2154 ** @return [ajuint] Number of matches found
2155 **
2156 ** @release 6.5.0
2157 ** @@
2158 ******************************************************************************/
2159 
featCdTrgFind(AjPQuery qry,const char * indexname,const AjPStr queryName)2160 static ajuint featCdTrgFind(AjPQuery qry, const char* indexname,
2161                             const AjPStr queryName)
2162 {
2163     FeatPCdQry wild;
2164     AjPList   l;
2165     FeatPCdTrg trgline;
2166     FeatPCdIdx idxline;
2167     FeatPCdFile idxfp;
2168     FeatPCdFile trgfp;
2169     FeatPCdFile hitfp;
2170     AjBool *skip;
2171 
2172     AjPStr fdstr    = NULL;
2173     AjPStr fdprefix = NULL;
2174 
2175     ajint t;
2176     ajint b;
2177     ajint t2;
2178     ajint b2;
2179     ajint t3;
2180     ajint pos = 0;
2181     ajint prefixlen;
2182     ajint start;
2183     ajint end;
2184     ajint i;
2185     ajint j;
2186     ajint k;
2187     ajint cmp;
2188     AjBool match;
2189 
2190     AjBool first;
2191     char   *name;
2192 
2193     FeatPCdEntry entry;
2194 
2195 
2196     wild    = qry->QryData;
2197     l       = qry->ResultsList;
2198     trgline = wild->trgLine;
2199     idxline = wild->idxLine;
2200     idxfp   = wild->ifp;
2201     trgfp   = wild->trgfp;
2202     hitfp   = wild->hitfp;
2203     skip    = wild->Skip;
2204 
2205 
2206     if(!featCdTrgOpen(qry->IndexDir, indexname, &trgfp, &hitfp))
2207         return 0;
2208 
2209     /* fdstr is the original query string, in uppercase */
2210 
2211     /* fdprefix is the fixed (no wildcard) prefix of fdstr */
2212 
2213     ajStrAssignS(&fdstr,queryName);
2214     ajStrFmtUpper(&fdstr);
2215     ajStrAssignS(&fdprefix,fdstr);
2216 
2217     ajStrRemoveWild(&fdprefix);
2218 
2219     ajDebug("queryName '%S' fdstr '%S' fdprefix '%S'\n",
2220             queryName, fdstr, fdprefix);
2221 
2222     b = b2 = 0;
2223     t = t2 = t3 = trgfp->NRecords - 1;
2224 
2225     prefixlen = ajStrGetLen(fdprefix);
2226     first = ajTrue;
2227 
2228     if(prefixlen)
2229     {
2230         /*
2231         ** (1a) we have a prefix (no wildcard at the start)
2232         ** look for the prefix fdprefix
2233         ** Set range of records that match (will be consecutive of course)
2234         ** from first match
2235         */
2236 
2237         while(b<=t)
2238         {
2239             pos = (t+b)/2;
2240             name = featCdTrgName(pos,trgfp);
2241             name[prefixlen]='\0';      /* truncate to prefix length */
2242             cmp = ajStrCmpC(fdprefix,name);
2243             /*      match = ajStrMatchWildC(fdstr,name);*/
2244             ajDebug(" trg testc %d '%s' '%S' %B (+/- %d)\n",
2245                     pos,name,fdprefix,cmp, t-b);
2246             if(!cmp)
2247             {
2248                 ajDebug(" trg hit %d\n",pos);
2249 
2250                 if(first)
2251                 {
2252                     first = ajFalse;
2253                     t2 = t;
2254                     t3 = pos;
2255                 }
2256 
2257                 b2 = pos;
2258             }
2259 
2260             if(cmp>0)
2261                 b = pos+1;
2262             else
2263                 t = pos-1;
2264         }
2265 
2266         if(first)
2267         {
2268             ajStrDel(&fdprefix);
2269             ajStrDel(&fdstr);
2270             featCdTrgClose(&trgfp,&hitfp);
2271 
2272             return ajFalse;
2273         }
2274 
2275         ajDebug("first pass: pos:%d b2:%d t2:%d\n",pos,b2,t2);
2276 
2277         /*
2278         ** (1b) Process below
2279         */
2280 
2281         b = b2-1;
2282         t = t2;
2283 
2284         while(b<=t)
2285         {
2286             pos = (t+b)/2;
2287             name = featCdTrgName(pos,trgfp);
2288             name[prefixlen]='\0';
2289             cmp = ajStrCmpC(fdprefix,name);
2290             /* match = ajStrMatchWildC(fdstr,name); */
2291             ajDebug(" trg testd %d '%s' '%S' %B (+/- %d)\n",
2292                     pos,name,fdprefix,cmp,t-b);
2293 
2294             if(!cmp)
2295             {
2296                 ajDebug(" trg hit %d\n",pos);
2297                 t3 = pos;
2298             }
2299 
2300             if(cmp<0)
2301                 t = pos-1;
2302             else
2303                 b = pos+1;
2304         }
2305 
2306         ajDebug("second pass: pos:%d b2:%d t3:%d\n",pos,b2,t3);
2307         name = featCdTrgName(b2,trgfp);
2308         ajDebug("first %d '%s'\n",b2,name);
2309         name = featCdTrgName(t3,trgfp);
2310         ajDebug("last %d '%s'\n",t3,name);
2311     }
2312 
2313 
2314     start = b2;
2315     end   = t3;
2316 
2317     for(i=start;i<(end+1);++i)
2318     {
2319         name = featCdTrgName(i,trgfp);
2320         match = ajCharMatchWildC(name, ajStrGetPtr(fdstr));
2321 
2322         ajDebug("third pass: match:%B i:%d name '%s' queryName '%S'\n",
2323                 match, i, name, fdstr);
2324 
2325         if(!match)
2326             continue;
2327 
2328         featCdTrgLine(trgline, i, trgfp);
2329         featCdFileSeek(hitfp,trgline->FirstHit-1);
2330         ajDebug("Query First: %d Count: %d\n",
2331                 trgline->FirstHit, trgline->NHits);
2332         pos = trgline->FirstHit;
2333 
2334         for(j=0;j<(ajint)trgline->NHits;++j)
2335         {
2336             ajReadbinInt(hitfp->File, &k);
2337             --k;
2338             ajDebug("hitlist[%d] entry = %d\n",j,k);
2339             featCdIdxLine(idxline,k,idxfp);
2340 
2341             if(!skip[idxline->DivCode-1])
2342             {
2343                 AJNEW0(entry);
2344                 entry->div = idxline->DivCode;
2345                 entry->annoff = idxline->AnnOffset;
2346                 entry->seqoff = idxline->SeqOffset;
2347                 ajListPushAppend(l,(void*)entry);
2348             }
2349             else
2350                 ajDebug("SKIP: token '%S' [file %d]\n",
2351                         queryName,idxline->DivCode);
2352         }
2353     }
2354 
2355     featCdTrgClose(&trgfp, &hitfp);
2356 
2357 
2358     ajStrDel(&trgline->Target);
2359     ajStrDel(&fdstr);
2360     ajStrDel(&fdprefix);
2361 
2362     return (ajuint) ajListGetLength(l);
2363 }
2364 
2365 
2366 
2367 
2368 /* @funcstatic featCdIdxDel ***************************************************
2369 **
2370 ** Destructor for FeatPCdIdx
2371 **
2372 ** @param [d] pthys [FeatPCdIdx*] Cd index object
2373 ** @return [void]
2374 **
2375 ** @release 6.5.0
2376 ******************************************************************************/
2377 
featCdIdxDel(FeatPCdIdx * pthys)2378 static void featCdIdxDel(FeatPCdIdx* pthys)
2379 {
2380     FeatPCdIdx thys = *pthys;
2381 
2382     if(!thys)
2383         return;
2384 
2385     ajStrDel(&thys->EntryName);
2386     AJFREE(*pthys);
2387 
2388     return;
2389 }
2390 
2391 
2392 
2393 
2394 /* @funcstatic featCdTrgDel ***************************************************
2395 **
2396 ** Destructor for FeatPCdTrg
2397 **
2398 ** @param [d] pthys [FeatPCdTrg*] Cd index target object
2399 ** @return [void]
2400 **
2401 ** @release 6.5.0
2402 **
2403 ******************************************************************************/
2404 
featCdTrgDel(FeatPCdTrg * pthys)2405 static void featCdTrgDel(FeatPCdTrg* pthys)
2406 {
2407     FeatPCdTrg thys = *pthys;
2408 
2409     if(!thys)
2410         return;
2411 
2412     ajStrDel(&thys->Target);
2413     AJFREE(*pthys);
2414 
2415     return;
2416 }
2417 
2418 
2419 
2420 
2421 /* @section B+tree GCG Database Indexing *************************************
2422 **
2423 ** These functions manage the EMBOSS B+tree GCG index access methods.
2424 **
2425 ******************************************************************************/
2426 
2427 
2428 
2429 
2430 /* @funcstatic featAccessEmbossGcg ********************************************
2431 **
2432 ** Reads feature(s) from a GCG formatted database, using B+tree index
2433 ** files. Returns with the file pointer set to the position in the
2434 ** reference and sequence file.
2435 **
2436 ** @param [u] fttabin [AjPFeattabin] Feature table input.
2437 ** @return [AjBool] ajTrue on success.
2438 **
2439 ** @release 6.5.0
2440 ** @@
2441 ******************************************************************************/
2442 
featAccessEmbossGcg(AjPFeattabin fttabin)2443 static AjBool featAccessEmbossGcg(AjPFeattabin fttabin)
2444 {
2445     AjBool retval = ajFalse;
2446 
2447     AjPQuery qry;
2448     FeatPEmbossQry qryd = NULL;
2449 
2450 
2451     qry = fttabin->Input->Query;
2452     qryd = qry->QryData;
2453     ajDebug("featAccessEmbossGcg type %d\n", qry->QueryType);
2454 
2455     if(!ajNamDbGetDbalias(qry->DbName, &qry->DbAlias))
2456         ajStrAssignS(&qry->DbAlias, qry->DbName);
2457 
2458     if(qry->QueryType == AJQUERY_ALL)
2459         return featEmbossGcgAll(fttabin);
2460 
2461 
2462     if(!qry->QryData)
2463     {
2464         if(!featEmbossQryOpen(qry))
2465             return ajFalse;
2466 
2467         qryd = qry->QryData;
2468         fttabin->Input->Single = ajTrue;
2469         ajFilebuffDel(&fttabin->Input->Filebuff);
2470         fttabin->Input->Filebuff = ajFilebuffNewNofile();
2471 
2472         if(qry->QueryType == AJQUERY_ENTRY)
2473         {
2474             if(!featEmbossQryEntry(qry))
2475                 ajDebug("embossgcg B+tree Entry failed\n");
2476         }
2477 
2478         if(qry->QueryType == AJQUERY_QUERY)
2479         {
2480             if(!featEmbossQryQuery(qry))
2481                 ajDebug("embossgcg B+tree Query failed\n");
2482         }
2483     }
2484     else
2485     {
2486         if(!featEmbossQryReuse(qry))
2487         {
2488             featEmbossQryClose(qry);
2489             return ajFalse;
2490         }
2491         ajFilebuffClear(fttabin->Input->Filebuff, -1);
2492     }
2493 
2494     if(ajListGetLength(qry->ResultsList))
2495     {
2496         retval = featEmbossQryNext(qry);
2497 
2498         if(retval)
2499         {
2500             featEmbossGcgLoadBuff(fttabin);
2501             ajStrAssignS(&fttabin->Input->Db, qry->DbName);
2502         }
2503     }
2504 
2505     if(!ajListGetLength(qry->ResultsList)) /* could be emptied by code above */
2506     {
2507         featEmbossQryClose(qry);
2508         ajFileClose(&qryd->libs);
2509         ajFileClose(&qryd->libr);
2510     }
2511 
2512     return retval;
2513 }
2514 
2515 
2516 
2517 
2518 /* @funcstatic featEmbossGcgAll ***********************************************
2519 **
2520 ** Opens the first or next GCG file for further reading
2521 **
2522 ** @param [u] fttabin [AjPFeattabin] Feature table input.
2523 ** @return [AjBool] ajTrue on success.
2524 **
2525 ** @release 6.5.0
2526 ** @@
2527 ******************************************************************************/
2528 
featEmbossGcgAll(AjPFeattabin fttabin)2529 static AjBool featEmbossGcgAll(AjPFeattabin fttabin)
2530 {
2531     AjPQuery qry;
2532     FeatPEmbossQry qryd;
2533     static ajint i   = 0;
2534     ajuint iref;
2535     AjPStr name      = NULL;
2536     AjBool ok        = ajFalse;
2537 /*
2538   AjPStrTok handle = NULL;
2539   AjPStr wildname  = NULL;
2540   AjBool found     = ajFalse;
2541 */
2542 
2543     qry = fttabin->Input->Query;
2544     qryd = qry->QryData;
2545 
2546     ajDebug("featEmbossGcgAll\n");
2547 
2548 
2549     if(!qry->QryData)
2550     {
2551         ajDebug("featEmbossGcgAll initialising\n");
2552 
2553         qry->QryData = AJNEW0(qryd);
2554         qryd = qry->QryData;
2555         i = -1;
2556         ajBtreeReadEntriesS(qry->DbAlias,qry->IndexDir,
2557                             qry->Directory,
2558                             &qryd->files,
2559                             &qryd->reffiles,
2560                             &qryd->refcount);
2561 
2562         fttabin->Input->Single = ajTrue;
2563     }
2564 
2565     qryd = qry->QryData;
2566     ajFilebuffDel(&fttabin->Input->Filebuff);
2567     fttabin->Input->Filebuff = ajFilebuffNewNofile();
2568 
2569     if(!qryd->libs)
2570     {
2571         while(!ok && qryd->files[++i])
2572         {
2573             ajStrAssignS(&name,qryd->files[i]);
2574             if(ajFilenameTestInclude(name, qry->Exclude, qry->Filename))
2575                 ok = ajTrue;
2576         }
2577 
2578         ajStrDel(&name);
2579 
2580 /*      if(qry->Exclude)
2581         {
2582         ok = ajFalse;
2583         wildname = ajStrNew();
2584         name     = ajStrNew();
2585         while(!ok)
2586         {
2587         ajStrAssignS(&name,qryd->files[i]);
2588         ajFilenameTrimPath(&name);
2589         handle = ajStrTokenNewC(qry->Exclude," \n");
2590         found = ajFalse;
2591         while(ajStrTokenNextParseC(&handle," \n",&wildname))
2592         if(ajStrMatchWildS(name,wildname))
2593         {
2594         found = ajTrue;
2595         break;
2596         }
2597         ajStrTokenDel(&handle);
2598         if(!found)
2599         ok = ajTrue;
2600         else
2601         {
2602         ++i;
2603         if(!qryd->files[i])
2604         ok = ajTrue;
2605         }
2606         }
2607 
2608         ajStrDel(&wildname);
2609         ajStrDel(&name);
2610         }
2611 */
2612 
2613         if(!qryd->files[i])
2614         {
2615             ajDebug("featEmbossGcgAll finished\n");
2616             i=0;
2617 
2618             while(qryd->files[i])
2619             {
2620                 ajStrDel(&qryd->files[i]);
2621 
2622                 if(qryd->reffiles)
2623                 {
2624                     for(iref=0; iref < qryd->refcount; iref++)
2625                         ajStrDel(&qryd->reffiles[iref][i]);
2626                 }
2627 
2628                 ++i;
2629             }
2630 
2631             AJFREE(qryd->files);
2632             AJFREE(qryd->reffiles);
2633 
2634             AJFREE(qry->QryData);
2635             qry->QryData = NULL;
2636 
2637             return ajFalse;
2638         }
2639 
2640 
2641         qryd->libs = ajFileNewInNameS(qryd->files[i]);
2642 
2643         if(!qryd->libs)
2644         {
2645             ajDebug("featEmbossGcgAll: cannot open sequence file\n");
2646 
2647             return ajFalse;
2648         }
2649 
2650 
2651         if(qryd->reffiles)
2652             qryd->libr = ajFileNewInNameS(qryd->reffiles[0][i]);
2653 
2654         if(!qryd->libr)
2655         {
2656             ajDebug("featEmbossGcgAll: cannot open reference file\n");
2657 
2658             return ajFalse;
2659         }
2660     }
2661 
2662     featEmbossGcgLoadBuff(fttabin);
2663 
2664     if(!qry->CaseId)
2665         qry->QryDone = ajTrue;
2666 
2667     return ajTrue;
2668 }
2669 
2670 
2671 
2672 
2673 /* @funcstatic featEmbossGcgLoadBuff ******************************************
2674 **
2675 ** Copies text data to a buffered file, and skips sequence data.
2676 **
2677 ** @param [u] fttabin [AjPFeattabin] Feature table input object
2678 ** @return [void]
2679 **
2680 ** @release 6.5.0
2681 ** @@
2682 ******************************************************************************/
2683 
featEmbossGcgLoadBuff(AjPFeattabin fttabin)2684 static void featEmbossGcgLoadBuff(AjPFeattabin fttabin)
2685 {
2686     AjPQuery qry;
2687     FeatPEmbossQry qryd;
2688 
2689     qry  = fttabin->Input->Query;
2690     qryd = qry->QryData;
2691 
2692     ajDebug("featEmbossGcgLoadBuff\n");
2693 
2694     if(!qry->QryData)
2695         ajFatal("featEmbossGcgLoadBuff Query Data not initialised");
2696 
2697     /* copy all the ref data */
2698 
2699     featEmbossGcgReadRef(fttabin);
2700 
2701     /* skip the sequence (do we care about the format?) */
2702     featEmbossGcgReadSeq(fttabin);
2703 
2704     /* ajFilebuffTraceFull(fttabin->Input->Filebuff, 9999, 100); */
2705 
2706     if(!qryd->libr)
2707         ajFileClose(&qryd->libs);
2708 
2709     return;
2710 }
2711 
2712 
2713 
2714 
2715 /* @funcstatic featEmbossGcgReadRef *******************************************
2716 **
2717 ** Copies text data to a buffered file for reading later
2718 **
2719 ** @param [u] fttabin [AjPFeattabin] Feature table input object
2720 ** @return [AjBool] ajTrue on success
2721 **
2722 ** @release 6.5.0
2723 ** @@
2724 ******************************************************************************/
2725 
featEmbossGcgReadRef(AjPFeattabin fttabin)2726 static AjBool featEmbossGcgReadRef(AjPFeattabin fttabin)
2727 {
2728     AjPStr line = NULL;
2729     AjPQuery qry;
2730     FeatPEmbossQry qryd;
2731     ajlong rpos;
2732     AjPStr id       = NULL;
2733     AjPStr idc      = NULL;
2734     AjBool ispir           = ajFalse;
2735     AjBool continued       = ajFalse;
2736     AjBool testcontinue    = ajFalse;
2737     char *p = NULL;
2738 
2739     qry  = fttabin->Input->Query;
2740     qryd = qry->QryData;
2741 
2742     if(!featRegGcgRefId)
2743         featRegGcgRefId =ajRegCompC("^>...([^ \n]+)");
2744 
2745     if(!featRegGcgSplit)
2746         featRegGcgSplit =ajRegCompC("_0+$");
2747 
2748     if(!ajReadline(qryd->libr, &line))  /* end of file */
2749         return ajFalse;
2750 
2751     if(ajStrGetCharFirst(line) != '>')  /* not start of entry */
2752         ajFatal("featGcgReadRef bad entry start:\n'%S'", line);
2753 
2754     if(ajStrGetCharPos(line, 3) == ';') /* PIR entry */
2755         ispir = ajTrue;
2756 
2757     if(ispir)
2758         ajFilebuffLoadS(fttabin->Input->Filebuff, line);
2759 
2760 
2761     if(ajRegExec(featRegGcgRefId, line))
2762     {
2763         continued = ajFalse;
2764         ajRegSubI(featRegGcgRefId, 1, &id);
2765 
2766         if(ajRegExec(featRegGcgSplit,id))
2767         {
2768             continued = ajTrue;
2769             p = ajStrGetuniquePtr(&id);
2770             p = strrchr(p,(ajint)'_');
2771             *(++p)='\0';
2772             ajStrSetValid(&id);
2773         }
2774     }
2775     else
2776     {
2777         ajDebug("featEmbossGcgReadRef bad ID line\n'%S'\n", line);
2778         ajFatal("featEmbossGcgReadRef bad ID line\n'%S'\n", line);
2779     }
2780 
2781 
2782 
2783     if(!ajReadline(qryd->libr, &line))  /* blank desc line */
2784 
2785         return ajFalse;
2786 
2787     if(ispir)
2788         ajFilebuffLoadS(fttabin->Input->Filebuff, line);
2789 
2790 
2791     rpos = ajFileResetPos(qryd->libr);
2792 
2793     while(ajReadline(qryd->libr, &line))
2794     {
2795         /* end of file */
2796         if(ajStrGetCharFirst(line) == '>')
2797         {                               /* start of next entry */
2798             /* skip over split entries so it can be used for "all" */
2799 
2800             if(continued)
2801             {
2802                 testcontinue=ajTrue;
2803                 ajRegExec(featRegGcgRefId, line);
2804                 ajRegSubI(featRegGcgRefId, 1, &idc);
2805 
2806                 if(!ajStrPrefixS(idc,id))
2807                 {
2808                     ajFileSeek(qryd->libr, rpos, 0);
2809                     ajStrDel(&line);
2810                     ajStrDel(&id);
2811                     ajStrDel(&idc);
2812 
2813                     return ajTrue;
2814                 }
2815             }
2816             else
2817             {
2818                 ajFileSeek(qryd->libr, rpos, 0);
2819                 ajStrDel(&line);
2820                 ajStrDel(&id);
2821                 ajStrDel(&idc);
2822 
2823                 return ajTrue;
2824             }
2825         }
2826         rpos = ajFileResetPos(qryd->libr);
2827 
2828 
2829         if(!testcontinue)
2830         {
2831             ajStrExchangeCC(&line, ". .", "..");
2832             ajFilebuffLoadS(fttabin->Input->Filebuff, line);
2833         }
2834     }
2835 
2836 
2837     /* at end of file */
2838 
2839     ajFileClose(&qryd->libr);
2840     ajStrDel(&line);
2841     ajStrDel(&id);
2842     ajStrDel(&idc);
2843 
2844     return ajTrue;
2845 }
2846 
2847 
2848 
2849 
2850 /* @funcstatic featEmbossGcgReadSeq *******************************************
2851 **
2852 ** Skips sequence data.
2853 **
2854 ** @param [u] fttabin [AjPFeattabin] feature table input object
2855 ** @return [AjBool] ajTrue on success
2856 **
2857 ** @release 6.5.0
2858 ** @@
2859 ******************************************************************************/
2860 
featEmbossGcgReadSeq(AjPFeattabin fttabin)2861 static AjBool featEmbossGcgReadSeq(AjPFeattabin fttabin)
2862 {
2863     AjPStr line = NULL;
2864     AjPQuery qry;
2865     FeatPEmbossQry qryd;
2866     AjPStr gcgtype    = NULL;
2867     AjPStr tmpstr     = NULL;
2868     AjPStr dstr       = NULL;
2869     AjPStr id         = NULL;
2870     AjPStr idc        = NULL;
2871     AjPStr contseq    = NULL;
2872 
2873     ajint gcglen;
2874     ajint pos;
2875     ajint rblock;
2876     ajlong spos;
2877     AjBool ispir     = ajFalse;
2878     char *p = NULL;
2879     AjBool continued = ajFalse;
2880 
2881 
2882     qry  = fttabin->Input->Query;
2883     qryd = qry->QryData;
2884 
2885     if(!featRegGcgId)
2886     {
2887         featRegGcgId =ajRegCompC("^>...([^ ]+) +(Dummy Header|[^ ]+) +"
2888                                  "([^ ]+) +([^ ]+) +([0-9]+)");
2889         featRegGcgId2=ajRegCompC("^>[PF]1;([^ ]+)");
2890     }
2891 
2892     if(!featRegGcgSplit)
2893         featRegGcgSplit =ajRegCompC("_0+$");
2894 
2895     ajDebug("featEmbossGcgReadSeq pos: %Ld\n", ajFileResetPos(qryd->libs));
2896 
2897     if(!ajReadline(qryd->libs, &line))  /* end of file */
2898         return ajFalse;
2899 
2900     ajDebug("test ID line\n'%S'\n", line);
2901 
2902     if(ajRegExec(featRegGcgId, line))
2903     {
2904         continued = ajFalse;
2905         ajRegSubI(featRegGcgId, 3, &gcgtype);
2906         ajRegSubI(featRegGcgId, 5, &tmpstr);
2907         ajRegSubI(featRegGcgId, 1, &id);
2908 
2909         if(ajRegExec(featRegGcgSplit, id))
2910         {
2911             continued = ajTrue;
2912             p = ajStrGetuniquePtr(&id);
2913             p = strrchr(p,(ajint)'_');
2914             *(++p)='\0';
2915             ajStrSetValid(&id);
2916 
2917             if(!contseq)
2918                 contseq = ajStrNew();
2919 
2920             if(!dstr)
2921                 dstr = ajStrNew();
2922         }
2923 
2924         ajStrToInt(tmpstr, &gcglen);
2925     }
2926     else if(ajRegExec(featRegGcgId2, line))
2927     {
2928         ajStrAssignC(&gcgtype, "ASCII");
2929         ajRegSubI(featRegGcgId, 1, &tmpstr);
2930         ispir = ajTrue;
2931     }
2932     else
2933     {
2934         ajDebug("featEmbossGcgReadSeq bad ID line\n'%S'\n", line);
2935         ajFatal("featEmbossGcgReadSeq bad ID line\n'%S'\n", line);
2936 
2937         return ajFalse;
2938     }
2939 
2940     if(!ajReadline(qryd->libs, &line))  /* desc line */
2941         return ajFalse;
2942 
2943     /*
2944     ** need to pick up the length and type, and read to the end of sequence
2945     ** see fasta code to get a real sequence for this
2946     ** Also need to handle split entries and go find the rest
2947     */
2948 
2949     if(ispir)
2950     {
2951         spos = ajFileResetPos(qryd->libs);
2952 
2953         while(ajReadline(qryd->libs, &line))
2954         {                               /* end of file */
2955             if(ajStrGetCharFirst(line) == '>')
2956             {                           /* start of next entry */
2957                 ajFileSeek(qryd->libs, spos, 0);
2958                 break;
2959             }
2960 
2961             spos = ajFileResetPos(qryd->libs);
2962             ajFilebuffLoadS(fttabin->Input->Filebuff, line);
2963         }
2964     }
2965     else
2966     {
2967         ajStrSetRes(&contseq, gcglen+3);
2968         rblock = gcglen;
2969 
2970         if(ajStrGetCharFirst(gcgtype) == '2')
2971             rblock = (rblock+3)/4;
2972 
2973         if(!ajReadbinBinary(qryd->libs, rblock, 1,
2974                             ajStrGetuniquePtr(&contseq)))
2975             ajFatal("error reading file %F", qryd->libs);
2976 
2977         /* convert 2bit to ascii */
2978         if(ajStrGetCharFirst(gcgtype) == '2')
2979             featGcgBinDecode(&contseq, gcglen);
2980         else if(ajStrGetCharFirst(gcgtype) == 'A')
2981         {
2982             /* are seq chars OK? */
2983             ajStrSetValidLen(&contseq, gcglen);
2984         }
2985         else
2986         {
2987             ajRegSubI(featRegGcgId, 1, &tmpstr);
2988             ajFatal("Unknown GCG entry type '%S', entry name '%S'",
2989                     gcgtype, tmpstr);
2990         }
2991 
2992         if(!ajReadline(qryd->libs, &line)) /* newline at end */
2993             ajFatal("error reading file %F", qryd->libs);
2994 
2995         if(continued)
2996         {
2997             spos = ajFileResetPos(qryd->libs);
2998 
2999             while(ajReadline(qryd->libs,&line))
3000             {
3001                 ajRegExec(featRegGcgId, line);
3002                 ajRegSubI(featRegGcgId, 5, &tmpstr);
3003                 ajRegSubI(featRegGcgId, 1, &idc);
3004 
3005                 if(!ajStrPrefixS(idc,id))
3006                 {
3007                     ajFileSeek(qryd->libs, spos, 0);
3008                     break;
3009                 }
3010 
3011                 ajStrToInt(tmpstr, &gcglen);
3012 
3013                 if(!ajReadline(qryd->libs, &dstr)) /* desc line */
3014                     return ajFalse;
3015 
3016                 ajStrSetRes(&contseq, gcglen+3);
3017 
3018                 rblock = gcglen;
3019                 if(ajStrGetCharFirst(gcgtype) == '2')
3020                     rblock = (rblock+3)/4;
3021 
3022                 if(!ajReadbinBinary(qryd->libs, rblock, 1,
3023                                     ajStrGetuniquePtr(&contseq)))
3024                     ajFatal("error reading file %F", qryd->libs);
3025 
3026                 /* convert 2bit to ascii */
3027                 if(ajStrGetCharFirst(gcgtype) == '2')
3028                     featGcgBinDecode(&contseq, gcglen);
3029                 else if(ajStrGetCharFirst(gcgtype) == 'A')
3030                 {
3031                     /* are seq chars OK? */
3032                     ajStrSetValidLen(&contseq, gcglen);
3033                 }
3034                 else
3035                 {
3036                     ajRegSubI(featRegGcgId, 1, &tmpstr);
3037                     ajFatal("Unknown GCG entry: name '%S'",
3038                             tmpstr);
3039                 }
3040 
3041                 if(!ajReadline(qryd->libs, &line)) /* newline at end */
3042                     ajFatal("error reading file %F", qryd->libs);
3043 
3044                 if(!featRegGcgCont)
3045                     featRegGcgCont = ajRegCompC("^([^ ]+) +([^ ]+) +([^ ]+) +"
3046                                                 "([^ ]+) +([^ ]+) +([^ ]+) "
3047                                                 "+([^ ]+) +"
3048                                                 "([^ ]+) +([0-9]+)");
3049 
3050                 ajRegExec(featRegGcgCont, dstr);
3051                 ajRegSubI(featRegGcgCont, 9, &tmpstr);
3052                 ajStrToInt(tmpstr, &pos);
3053                 /*seqin->Inseq->Len = pos-1;*/
3054 
3055                 /*ajStrAppendS(&seqin->Inseq,contseq);*/
3056                 spos = ajFileResetPos(qryd->libs);
3057             }
3058         }
3059     }
3060 
3061     ajStrDel(&gcgtype);
3062     ajStrDel(&line);
3063     ajStrDel(&tmpstr);
3064     ajStrDel(&dstr);
3065     ajStrDel(&id);
3066     ajStrDel(&idc);
3067     ajStrDel(&contseq);
3068 
3069     return ajTrue;
3070 }
3071 
3072 
3073 
3074 
3075 /* @funcstatic featCdQryReuse *************************************************
3076 **
3077 ** Tests whether Cd index query data can be reused or whether we are finished.
3078 **
3079 ** Clears qryData structure when finished.
3080 **
3081 ** @param [u] qry [AjPQuery] Query data
3082 ** @return [AjBool] ajTrue if we can continue,
3083 **                  ajFalse if all is done.
3084 **
3085 ** @release 6.5.0
3086 ** @@
3087 ******************************************************************************/
3088 
featCdQryReuse(AjPQuery qry)3089 static AjBool featCdQryReuse(AjPQuery qry)
3090 {
3091     FeatPCdQry qryd;
3092 
3093     qryd = qry->QryData;
3094 
3095     if(!qry || !qryd)
3096         return ajFalse;
3097 
3098 
3099     /*    ajDebug("qry->ResultsList  %x\n",qry->ResultsList);*/
3100     if(!qry->ResultsList)
3101     {
3102         ajDebug("query data all finished\n");
3103         AJFREE(qry->QryData);
3104         qryd = NULL;
3105 
3106         return ajFalse;
3107     }
3108     else
3109     {
3110         ajDebug("reusing data from previous call %x\n", qry->QryData);
3111         ajDebug("listlen  %Lu\n", ajListGetLength(qry->ResultsList));
3112         ajDebug("divfile '%S'\n", qryd->divfile);
3113         ajDebug("idxfile '%S'\n", qryd->idxfile);
3114         ajDebug("datfile '%S'\n", qryd->datfile);
3115         ajDebug("seqfile '%S'\n", qryd->seqfile);
3116         ajDebug("name    '%s'\n", qryd->name);
3117         ajDebug("nameSize %d\n",  qryd->nameSize);
3118         ajDebug("div      %d\n",  qryd->div);
3119         ajDebug("maxdiv   %d\n",  qryd->maxdiv);
3120         /*ajListTrace(qry->ResultsList);*/
3121     }
3122 
3123     return ajTrue;
3124 }
3125 
3126 
3127 
3128 
3129 /* @funcstatic featCdQryOpen **************************************************
3130 **
3131 ** Opens everything for a new CD query
3132 **
3133 ** @param [u] qry [AjPQuery] Query data
3134 ** @return [AjBool] ajTrue if we can continue,
3135 **                  ajFalse if all is done.
3136 **
3137 ** @release 6.5.0
3138 ** @@
3139 ******************************************************************************/
3140 
featCdQryOpen(AjPQuery qry)3141 static AjBool featCdQryOpen(AjPQuery qry)
3142 {
3143     FeatPCdQry qryd;
3144 
3145     ajuint i;
3146     short j;
3147     static char *name;
3148     AjPStr fullName = NULL;
3149 
3150     if(!ajStrGetLen(qry->IndexDir))
3151     {
3152         ajDebug("no indexdir defined for database '%S'\n", qry->DbName);
3153         ajErr("no indexdir defined for database '%S'", qry->DbName);
3154 
3155         return ajFalse;
3156     }
3157 
3158     ajDebug("directory '%S' fields: %Lu hasacc:%B\n",
3159             qry->IndexDir, ajListGetLength(qry->QueryFields), qry->HasAcc);
3160 
3161     qry->QryData = AJNEW0(qryd);
3162     AJNEW0(qryd->idxLine);
3163     AJNEW0(qryd->trgLine);
3164     qryd->dfp = featCdFileOpen(qry->IndexDir, "division.lkp", &qryd->divfile);
3165 
3166     if(!qryd->dfp)
3167     {
3168         ajWarn("Cannot open division file '%S' for database '%S'",
3169                qryd->divfile, qry->DbName);
3170 
3171         return ajFalse;
3172     }
3173 
3174 
3175     qryd->nameSize = qryd->dfp->RecSize - 2;
3176     qryd->maxdiv   = qryd->dfp->NRecords;
3177     ajDebug("nameSize: %d maxdiv: %d\n",
3178             qryd->nameSize, qryd->maxdiv);
3179     qryd->name = ajCharNewRes(qryd->nameSize+1);
3180     name = ajCharNewRes(qryd->nameSize+1);
3181     AJCNEW0(qryd->Skip, qryd->maxdiv);
3182     featCdFileSeek(qryd->dfp, 0);
3183 
3184     for(i=0; i < qryd->maxdiv; i++)
3185     {
3186         ajReadbinInt2(qryd->dfp->File, &j);
3187         ajReadbinCharTrim(qryd->dfp->File, qryd->nameSize, name);
3188 
3189         ajStrAssignC(&fullName, name);
3190         ajFilenameReplacePathS(&fullName, qry->Directory);
3191 
3192         if(!ajFilenameTestInclude(fullName, qry->Exclude, qry->Filename))
3193             qryd->Skip[i] = ajTrue;
3194     }
3195 
3196     qryd->ifp = featCdFileOpen(qry->IndexDir, "entrynam.idx", &qryd->idxfile);
3197 
3198     if(!qryd->ifp)
3199     {
3200         ajErr("Cannot open index file '%S' for database '%S'",
3201 	      qryd->idxfile, qry->DbName);
3202 
3203         return ajFalse;
3204     }
3205 
3206     ajStrDel(&fullName);
3207     ajCharDel(&name);
3208 
3209     return ajTrue;
3210 }
3211 
3212 
3213 
3214 
3215 /* @funcstatic featCdQryEntry *************************************************
3216 **
3217 ** Queries for a single entry in an EMBLCD index
3218 **
3219 ** @param [u] qry [AjPQuery] Query data
3220 ** @return [AjBool] ajTrue if we can continue,
3221 **                  ajFalse if all is done.
3222 **
3223 ** @release 6.5.0
3224 ** @@
3225 ******************************************************************************/
3226 
featCdQryEntry(AjPQuery qry)3227 static AjBool featCdQryEntry(AjPQuery qry)
3228 {
3229     FeatPCdEntry entry = NULL;
3230     ajint ipos = -1;
3231     ajint trghit;
3232     FeatPCdQry qryd;
3233     const AjPList fdlist;
3234     AjIList iter;
3235     AjPQueryField fd;
3236     AjPStr qrystr = NULL;
3237     ajuint i;
3238     ajuint ii;
3239     ajint j;
3240 
3241     const char* embossfields[] = {
3242         "id", "acc",   "sv",    "org",   "key",     "des", "gi", NULL
3243     };
3244     const char* emblcdfields[] = {
3245         NULL, "acnum", "seqvn", "taxon", "keyword", "des", "gi", NULL
3246     };
3247 
3248     fdlist = ajQueryGetallFields(qry);
3249 
3250     ajQueryGetQuery(qry, &qrystr);
3251 
3252     ajDebug("featCdQryEntry %S hasacc:%B\n",
3253             qrystr , qry->HasAcc);
3254     ajStrDel(&qrystr);
3255 
3256     qryd = qry->QryData;
3257     iter= ajListIterNewread(fdlist);
3258 
3259     while(!ajListIterDone(iter))
3260     {
3261         fd = ajListIterGet(iter);
3262         ajDebug("query link: %u field '%S' wild '%S'\n",
3263                 fd->Link, fd->Field, fd->Wildquery);
3264 
3265         if((fd->Link == AJQLINK_ELSE) && ajListGetLength(qry->ResultsList))
3266             continue;
3267 
3268         for(i=0; embossfields[i]; i++)
3269         {
3270             ajDebug("test field[%u] '%S' = '%s'\n",
3271                     i, fd->Field, emblcdfields[i]);
3272             if(ajStrMatchC(fd->Field, embossfields[i]))
3273             {
3274                 ajDebug("match field[%u] '%S' = '%s'\n",
3275                         i, fd->Field, emblcdfields[i]);
3276                 if(!emblcdfields[i]) /* ID index */
3277                 {
3278                     ipos = featCdIdxSearch(qryd->idxLine,
3279                                            fd->Wildquery, qryd->ifp);
3280 
3281                     if(ipos >= 0)
3282                     {
3283                         if(!qryd->Skip[qryd->idxLine->DivCode-1])
3284                         {
3285                             AJNEW0(entry);
3286                             entry->div = qryd->idxLine->DivCode;
3287                             entry->annoff = qryd->idxLine->AnnOffset;
3288                             entry->seqoff = qryd->idxLine->SeqOffset;
3289                             ajListPushAppend(qry->ResultsList, (void*)entry);
3290                         }
3291                         else
3292                             ajDebug("SKIP: '%S' [file %d]\n",
3293                                     fd->Wildquery, qryd->idxLine->DivCode);
3294                     }
3295                 }
3296                 else            /* target/hit index */
3297                 {
3298                     if(featCdTrgOpen(qry->IndexDir, emblcdfields[i],
3299                                      &qryd->trgfp, &qryd->hitfp))
3300                     {
3301                         trghit = featCdTrgSearch(qryd->trgLine,
3302                                                  fd->Wildquery, qryd->trgfp);
3303 
3304                         if(trghit >= 0)
3305                         {
3306                             featCdFileSeek(qryd->hitfp,
3307                                            qryd->trgLine->FirstHit-1);
3308                             ajDebug("%s First: %d Count: %d\n",
3309                                     emblcdfields[i],
3310                                     qryd->trgLine->FirstHit,
3311                                     qryd->trgLine->NHits);
3312                             ipos = qryd->trgLine->FirstHit;
3313 
3314                             for(ii = 0; ii < qryd->trgLine->NHits; ii++)
3315                             {
3316                                 ajReadbinInt(qryd->hitfp->File, &j);
3317                                 j--;
3318                                 ajDebug("hitlist[%u] entry = %d\n", ii, j);
3319                                 featCdIdxLine(qryd->idxLine, j, qryd->ifp);
3320 
3321                                 if(!qryd->Skip[qryd->idxLine->DivCode-1])
3322                                 {
3323                                     AJNEW0(entry);
3324                                     entry->div = qryd->idxLine->DivCode;
3325                                     entry->annoff = qryd->idxLine->AnnOffset;
3326                                     entry->seqoff = qryd->idxLine->SeqOffset;
3327                                     ajListPushAppend(qry->ResultsList,
3328                                                      (void*)entry);
3329                                 }
3330                                 else
3331                                     ajDebug("SKIP: %s '%S' [file %d]\n",
3332                                             emblcdfields[i],
3333                                             fd->Wildquery,
3334                                             qryd->idxLine->DivCode);
3335                             }
3336                         }
3337 
3338                         featCdTrgClose(&qryd->trgfp, &qryd->hitfp);
3339                         ajStrDel(&qryd->trgLine->Target);
3340                     }
3341                 }
3342                 break;
3343             }
3344         }
3345         if(!embossfields[i])
3346         {
3347             if(featCdTrgOpen(qry->IndexDir, MAJSTRGETPTR(fd->Field),
3348                              &qryd->trgfp, &qryd->hitfp))
3349             {
3350                 trghit = featCdTrgSearch(qryd->trgLine,
3351                                          fd->Wildquery, qryd->trgfp);
3352 
3353                 if(trghit >= 0)
3354                 {
3355                     featCdFileSeek(qryd->hitfp,
3356                                   qryd->trgLine->FirstHit-1);
3357                     ajDebug("%s First: %d Count: %d\n",
3358                             fd->Field, qryd->trgLine->FirstHit,
3359                             qryd->trgLine->NHits);
3360                     ipos = qryd->trgLine->FirstHit;
3361 
3362                     for(ii = 0; ii < qryd->trgLine->NHits; ii++)
3363                     {
3364                         ajReadbinInt(qryd->hitfp->File, &j);
3365                         j--;
3366                         ajDebug("hitlist[%u] entry = %d\n", ii, j);
3367                         featCdIdxLine(qryd->idxLine, j, qryd->ifp);
3368 
3369                         if(!qryd->Skip[qryd->idxLine->DivCode-1])
3370                         {
3371                             AJNEW0(entry);
3372                             entry->div = qryd->idxLine->DivCode;
3373                             entry->annoff = qryd->idxLine->AnnOffset;
3374                             entry->seqoff = qryd->idxLine->SeqOffset;
3375                             ajListPushAppend(qry->ResultsList, (void*)entry);
3376                         }
3377                         else
3378                             ajDebug("SKIP: %S '%S' [file %d]\n",
3379                                     fd->Field,
3380                                     fd->Wildquery,
3381                                     qryd->idxLine->DivCode);
3382                     }
3383                 }
3384 
3385                 featCdTrgClose(&qryd->trgfp, &qryd->hitfp);
3386                 ajStrDel(&qryd->trgLine->Target);
3387             }
3388         }
3389     }
3390 
3391     ajListIterDel(&iter);
3392     if(ipos < 0)
3393         return ajFalse;
3394 
3395     if(!ajListGetLength(qry->ResultsList))
3396         return ajFalse;
3397 
3398     if(!qry->CaseId)
3399         qry->QryDone = ajTrue;
3400 
3401     return ajTrue;
3402 }
3403 
3404 
3405 
3406 
3407 /* @funcstatic featCdQryQuery *************************************************
3408 **
3409 ** Queries for one or more entries in an EMBLCD index
3410 **
3411 ** @param [u] qry [AjPQuery] Query data
3412 ** @return [AjBool] ajTrue if we can continue,
3413 **                  ajFalse if all is done.
3414 **
3415 ** @release 6.5.0
3416 ** @@
3417 ******************************************************************************/
3418 
featCdQryQuery(AjPQuery qry)3419 static AjBool featCdQryQuery(AjPQuery qry)
3420 {
3421     AjIList iter = NULL;
3422     AjPQueryField field = NULL;
3423 
3424     FeatPCdEntry newhit;
3425     FeatPCdEntry *allhits = NULL;
3426     ajulong** keys = NULL;
3427 
3428     AjPTable newtable = NULL;
3429 
3430     ajuint i;
3431     ajuint lasthits = 0;
3432     ajuint fdhits = 0;
3433 
3434     ajulong *ikey = NULL;
3435 
3436     ajuint ishift = sizeof(ajulong)/2;
3437 
3438     if(!qry->CaseId)
3439         qry->QryDone = ajTrue;
3440 
3441     ajTableSettypeUlong(qry->ResultsTable);
3442     ajTableSetDestroyboth(qry->ResultsTable);
3443 
3444     iter = ajListIterNewread(qry->QueryFields);
3445 
3446     while(!ajListIterDone(iter))
3447     {
3448         field = ajListIterGet(iter);
3449 
3450         if((field->Link == AJQLINK_ELSE) && (lasthits > 0))
3451         {
3452             ajDebug("ELSE: lasthits:%u skip\n", lasthits);
3453             continue;
3454         }
3455 
3456         if(ajStrMatchC(field->Field, "id"))
3457             featCdIdxQuery(qry, field->Wildquery);
3458         else
3459             featCdTrgQuery(qry, field->Field, field->Wildquery);
3460 
3461         fdhits = (ajuint) ajListGetLength(qry->ResultsList);
3462 
3463         ajDebug("featCdQryQuery hits: %u link: %u\n",
3464                 fdhits, field->Link);
3465 
3466         switch(field->Link)
3467         {
3468             case AJQLINK_INIT:
3469                 while(ajListPop(qry->ResultsList, (void**)&newhit))
3470                 {
3471                     AJNEW(ikey);
3472                     *ikey = (((ajulong)newhit->div) << ishift) +
3473                         (ajulong)newhit->annoff;
3474                     ajTablePutClean(qry->ResultsTable, ikey, newhit,
3475                                     &ajMemFree, &ajMemFree);
3476                     ikey = NULL;
3477                 }
3478 
3479                 break;
3480 
3481             case AJQLINK_OR:
3482                 while(ajListPop(qry->ResultsList, (void**)&newhit))
3483                 {
3484                     AJNEW(ikey);
3485                     *ikey = (((ajulong)newhit->div) << ishift) +
3486                         (ajulong)newhit->annoff;
3487                     ajTablePutClean(qry->ResultsTable, ikey, newhit,
3488                                     &ajMemFree, &ajMemFree);
3489                     ikey = NULL;
3490                 }
3491                 break;
3492 
3493             case AJQLINK_AND:
3494                 newtable = ajTableulongNew(fdhits);
3495                 ajTableSetDestroyboth(newtable);
3496                 while(ajListPop(qry->ResultsList, (void**)&newhit))
3497                 {
3498                     AJNEW(ikey);
3499                     *ikey = (((ajulong)newhit->div) << ishift) +
3500                         (ajulong)newhit->annoff;
3501                     ajTablePutClean(newtable, ikey, newhit,
3502                                     &ajMemFree, &ajMemFree);
3503                     ikey = NULL;
3504                 }
3505                 ajTableMergeAnd(qry->ResultsTable, newtable);
3506                 ajTableDelValdel(&newtable, &ajMemFree);
3507                 break;
3508 
3509             case AJQLINK_EOR:
3510             case AJQLINK_ELSE:
3511                 newtable = ajTableulongNew(fdhits);
3512                 ajTableSetDestroyboth(newtable);
3513                 while(ajListPop(qry->ResultsList, (void**)&newhit))
3514                 {
3515                     AJNEW(ikey);
3516                     *ikey = (((ajulong)newhit->div) << ishift) + newhit->annoff;
3517                     ajTablePutClean(newtable, ikey, newhit,
3518                                     &ajMemFree, &ajMemFree);
3519                     ikey = NULL;
3520                 }
3521 
3522                 ajTableMergeEor(qry->ResultsTable, newtable);
3523                 ajTableDelValdel(&newtable, &ajMemFree);
3524                 break;
3525 
3526             case AJQLINK_NOT:
3527                 newtable = ajTableulongNew(fdhits);
3528                 ajTableSetDestroyboth(newtable);
3529                 while(ajListPop(qry->ResultsList, (void**)&newhit))
3530                 {
3531                     AJNEW(ikey);
3532                     *ikey = (((ajulong)newhit->div) << ishift) + newhit->annoff;
3533                     ajTablePutClean(newtable, ikey, newhit,
3534                                     &ajMemFree, &ajMemFree);
3535                     ikey = NULL;
3536                 }
3537                 ajTableMergeNot(qry->ResultsTable, newtable);
3538                 ajTableDelValdel(&newtable, &ajMemFree);
3539                 break;
3540 
3541             default:
3542                 ajErr("Unexpected query link operator number '%u'",
3543                       field->Link);
3544                 break;
3545         }
3546 
3547         lasthits = fdhits;
3548     }
3549 
3550     ajListIterDel(&iter);
3551 
3552     ajTableToarrayKeysValues(qry->ResultsTable, (void***) &keys,
3553                              (void***)&allhits);
3554     for(i=0; allhits[i]; i++)
3555     {
3556         AJFREE(keys[i]);
3557         ajListPushAppend(qry->ResultsList, (void*) allhits[i]);
3558     }
3559     AJFREE(keys);
3560 
3561     ajDebug("ajListSortUnique len:%Lu\n",
3562             ajListGetLength(qry->ResultsList));
3563     ajListSortUnique(qry->ResultsList,
3564                      &featCdEntryCmp, &featCdEntryDel);
3565 
3566     AJFREE(allhits);
3567 
3568     ajDebug("featCdQryQuery clear results table\n");
3569     ajTableClear(qry->ResultsTable);
3570 
3571     if(ajListGetLength(qry->ResultsList))
3572         return ajTrue;
3573 
3574     return ajFalse;
3575 }
3576 
3577 
3578 
3579 
3580 /* @funcstatic featCdEntryCmp *************************************************
3581 **
3582 ** Compares two FeatPEntry objects
3583 **
3584 ** @param [r] pa [const void*] FeatPEntry object
3585 ** @param [r] pb [const void*] FeatPEntry object
3586 ** @return [int] -1 if first entry should sort before second, +1 if the
3587 **         second entry should sort first. 0 if they are identical
3588 **
3589 ** @release 6.5.0
3590 ** @@
3591 ******************************************************************************/
featCdEntryCmp(const void * pa,const void * pb)3592 static int featCdEntryCmp(const void* pa, const void* pb)
3593 {
3594     const FeatPCdEntry a;
3595     const FeatPCdEntry b;
3596 
3597     a = *(FeatPCdEntry const *) pa;
3598     b = *(FeatPCdEntry const *) pb;
3599 
3600     /*
3601       ajDebug("featCdEntryCmp %x %d %d : %x %d %d\n",
3602       a, a->div, a->annoff,
3603       b, b->div, b->annoff);
3604     */
3605     if(a->div != b->div)
3606         return (a->div - b->div);
3607 
3608     return (a->annoff - b->annoff);
3609 }
3610 
3611 
3612 
3613 
3614 /* @funcstatic featCdEntryDel***************************************************
3615 **
3616 ** Deletes a FeatPCdEntry object
3617 **
3618 ** @param [r] pentry [void**] Address of a FeatPCdEntry object
3619 ** @param [r] cl [void*] Standard unused argument, usually NULL.
3620 ** @return [void]
3621 **
3622 ** @release 6.5.0
3623 ** @@
3624 ******************************************************************************/
featCdEntryDel(void ** pentry,void * cl)3625 static void featCdEntryDel(void** pentry, void* cl)
3626 {
3627     (void) cl;
3628 
3629     AJFREE(*pentry);
3630 
3631     return;
3632 }
3633 
3634 
3635 
3636 
3637 /* @funcstatic featCdQryNext **************************************************
3638 **
3639 ** Processes the next query for an EMBLCD index
3640 **
3641 ** @param [u] qry [AjPQuery] Query data
3642 ** @return [AjBool] ajTrue if successful
3643 **
3644 ** @release 6.5.0
3645 ** @@
3646 ******************************************************************************/
3647 
featCdQryNext(AjPQuery qry)3648 static AjBool featCdQryNext(AjPQuery qry)
3649 {
3650     FeatPCdEntry entry;
3651     FeatPCdQry qryd;
3652     void* item;
3653 
3654     qryd = qry->QryData;
3655 
3656     if(!ajListGetLength(qry->ResultsList))
3657         return ajFalse;
3658 
3659     ajDebug("qry->ResultsList (b) length %Lu\n",
3660             ajListGetLength(qry->ResultsList));
3661     /*ajListTrace(qry->ResultsList);*/
3662     ajListPop(qry->ResultsList, &item);
3663     entry = (FeatPCdEntry) item;
3664 
3665     /*
3666       ajDebug("entry: %x div: %d (%d) ann: %d seq: %d\n",
3667       entry, entry->div, qryd->div, entry->annoff, entry->seqoff);
3668     */
3669     qryd->idnum = entry->annoff - 1;
3670 
3671     /*
3672       ajDebug("idnum: %d\n", qryd->idnum);
3673     */
3674     qryd->Samefile = ajTrue;
3675 
3676     if(entry->div != qryd->div)
3677     {
3678         qryd->Samefile = ajFalse;
3679         qryd->div = entry->div;
3680         /*ajDebug("div: %d\n", qryd->div);*/
3681 
3682         if(!featCdQryFile(qry))
3683             return ajFalse;
3684     }
3685 
3686     ajDebug("Offsets(cd) %d %d\n", entry->annoff, entry->seqoff);
3687     ajDebug("libr %x\n", qryd->libr);
3688     ajDebug("libr %F\n", qryd->libr);
3689 
3690     ajFileSeek(qryd->libr, entry->annoff,0);
3691 
3692     if(qryd->libs)
3693         ajFileSeek(qryd->libs, entry->seqoff,0);
3694 
3695     AJFREE(entry);
3696 
3697     if(!qry->CaseId)
3698         qry->QryDone = ajTrue;
3699 
3700     return ajTrue;
3701 }
3702 
3703 
3704 
3705 
3706 /* @funcstatic featCdQryClose *************************************************
3707 **
3708 ** Closes query data for an EMBLCD index
3709 **
3710 ** @param [u] qry [AjPQuery] Query data
3711 ** @return [AjBool] ajTrue if all is done
3712 **
3713 ** @release 6.5.0
3714 ** @@
3715 ******************************************************************************/
3716 
featCdQryClose(AjPQuery qry)3717 static AjBool featCdQryClose(AjPQuery qry)
3718 {
3719     FeatPCdQry qryd = NULL;
3720 
3721     ajDebug("featCdQryClose clean up qryd\n");
3722 
3723     qryd = qry->QryData;
3724 
3725     if(!qryd)
3726         return ajTrue;
3727 
3728     ajCharDel(&qryd->name);
3729     ajStrDel(&qryd->divfile);
3730     ajStrDel(&qryd->idxfile);
3731     ajStrDel(&qryd->datfile);
3732     ajStrDel(&qryd->seqfile);
3733     ajStrDel(&qryd->srcfile);
3734     ajStrDel(&qryd->tblfile);
3735 
3736     featCdIdxDel(&qryd->idxLine);
3737     featCdTrgDel(&qryd->trgLine);
3738 
3739     featCdFileClose(&qryd->ifp);
3740     featCdFileClose(&qryd->dfp);
3741     /* defined in a buffer, cleared there */
3742     /*
3743       ajFileClose(&qryd->libr);
3744       ajFileClose(&qryd->libs);
3745     */
3746     qryd->libr=0;
3747     qryd->libs=0;
3748     AJFREE(qryd->trgLine);
3749     AJFREE(qryd->idxLine);
3750     AJFREE(qryd->Skip);
3751 
3752     /* keep QryData for use at top of loop */
3753 
3754     return ajTrue;
3755 }
3756 
3757 
3758 
3759 
3760 /* @section B+tree Database Indexing *****************************************
3761 **
3762 ** These functions manage the B+tree index access methods.
3763 **
3764 ******************************************************************************/
3765 
3766 
3767 
3768 
3769 /* @funcstatic featEmbossQryReuse *********************************************
3770 **
3771 ** Tests whether the B+tree index query data can be reused or it's finished.
3772 **
3773 ** Clears qryData structure when finished.
3774 **
3775 ** @param [u] qry [AjPQuery] Query data
3776 ** @return [AjBool] ajTrue if reusable,
3777 **                  ajFalse if finished.
3778 **
3779 ** @release 6.5.0
3780 ** @@
3781 ******************************************************************************/
3782 
featEmbossQryReuse(AjPQuery qry)3783 static AjBool featEmbossQryReuse(AjPQuery qry)
3784 {
3785     FeatPEmbossQry qryd;
3786 
3787     qryd = qry->QryData;
3788 
3789     if(!qry || !qryd)
3790         return ajFalse;
3791 
3792 
3793     if(!qry->ResultsList)
3794     {
3795         ajDebug("featEmbossQryReuse: query data all finished\n");
3796 
3797         return ajFalse;
3798     }
3799     else
3800     {
3801         ajDebug("featEmbossQryReuse: reusing data from previous call %x\n",
3802                 qry->QryData);
3803         /*ajListTrace(qry->ResultsList);*/
3804     }
3805 
3806 
3807     qryd->nentries = -1;
3808 
3809 
3810     return ajTrue;
3811 }
3812 
3813 
3814 
3815 
3816 /* @funcstatic featEmbossQryOpen **********************************************
3817 **
3818 ** Open caches (etc) for B+tree search
3819 **
3820 ** @param [u] qry [AjPQuery] Query data
3821 ** @return [AjBool] ajTrue if we can continue,
3822 **                  ajFalse if all is done.
3823 **
3824 ** @release 6.5.0
3825 ** @@
3826 ******************************************************************************/
3827 
featEmbossQryOpen(AjPQuery qry)3828 static AjBool featEmbossQryOpen(AjPQuery qry)
3829 {
3830     FeatPEmbossQry qryd;
3831     ajint i;
3832     AjPStr name     = NULL;
3833     AjIList iter = NULL;
3834     AjPQueryField field = NULL;
3835     AjPBtcache cache = NULL;
3836 
3837     qry->QryData = AJNEW0(qryd);
3838     qryd = qry->QryData;
3839     qryd->div = -1;
3840     qryd->nentries = -1;
3841 
3842     qryd->Caches = ajListNew();
3843 
3844     if(!ajStrGetLen(qry->IndexDir))
3845     {
3846         ajDebug("no indexdir defined for database '%S'\n", qry->DbName);
3847         ajErr("no indexdir defined for database '%S'", qry->DbName);
3848         return ajFalse;
3849     }
3850 
3851     if(!featEmbossOpenCache(qry,"id",&qryd->idcache))
3852         return ajFalse;
3853 
3854     iter = ajListIterNewread(qry->QueryFields);
3855     while(!ajListIterDone(iter))
3856     {
3857         field = ajListIterGet(iter);
3858 
3859         ajStrFmtLower(&field->Wildquery);
3860         if(!featEmbossOpenCache(qry, MAJSTRGETPTR(field->Field), &cache))
3861             return ajFalse;
3862         ajListPushAppend(qryd->Caches, cache);
3863         cache = NULL;
3864     }
3865     ajListIterDel(&iter);
3866 
3867 
3868     ajDebug("directory '%S'fields: %Lu hasacc:%B\n",
3869             qry->IndexDir, ajListGetLength(qry->QueryFields), qry->HasAcc);
3870 
3871 
3872     if(ajStrGetLen(qry->Exclude) && qryd->nentries >= 0)
3873     {
3874         AJCNEW0(qryd->Skip,qryd->nentries);
3875         name     = ajStrNew();
3876 
3877         for(i=0; i < qryd->nentries; ++i)
3878         {
3879             ajStrAssignS(&name,qryd->files[i]);
3880 
3881             if(!ajFilenameTestInclude(name, qry->Exclude, qry->Filename))
3882                 qryd->Skip[i] = ajTrue;
3883         }
3884 
3885         ajStrDel(&name);
3886     }
3887 
3888     return ajTrue;
3889 }
3890 
3891 
3892 
3893 
3894 /* @funcstatic featEmbossOpenCache ********************************************
3895 **
3896 ** Create primary B+tree index cache
3897 **
3898 ** @param [u] qry [AjPQuery] Query data
3899 ** @param [r] ext [const char*] Index file extension
3900 ** @param [w] cache [AjPBtcache*] cache
3901 ** @return [AjBool] True on success
3902 **
3903 ** @release 6.5.0
3904 ** @@
3905 ******************************************************************************/
3906 
featEmbossOpenCache(AjPQuery qry,const char * ext,AjPBtcache * cache)3907 static AjBool featEmbossOpenCache(AjPQuery qry, const char *ext,
3908                                   AjPBtcache *cache)
3909 {
3910     FeatPEmbossQry qryd;
3911     AjPStr indexextname = NULL;
3912 
3913     qryd = qry->QryData;
3914 
3915     indexextname = ajStrNewS(ajBtreeFieldGetExtensionC(ext));
3916 
3917     *cache = ajBtreeCacheNewReadS(qry->DbAlias,indexextname,
3918                                   qry->IndexDir);
3919     ajStrDel(&indexextname);
3920 
3921     if(!*cache)
3922     {
3923         qryd->nentries = -1;
3924 
3925         return ajFalse;
3926     }
3927 
3928     if(qryd->nentries == -1)
3929         qryd->nentries = ajBtreeReadEntriesS(qry->DbAlias,
3930                                              qry->IndexDir,
3931                                              qry->Directory,
3932                                              &qryd->files,
3933                                              &qryd->reffiles,
3934                                              &qryd->refcount);
3935 
3936     return ajTrue;
3937 }
3938 
3939 
3940 
3941 
3942 
3943 /* @funcstatic featEmbossQryEntry *********************************************
3944 **
3945 ** Queries for a single entry in a B+tree index
3946 **
3947 ** @param [u] qry [AjPQuery] Query data
3948 ** @return [AjBool] ajTrue if can continue,
3949 **                  ajFalse if all is done.
3950 **
3951 ** @release 6.5.0
3952 ** @@
3953 ******************************************************************************/
3954 
featEmbossQryEntry(AjPQuery qry)3955 static AjBool featEmbossQryEntry(AjPQuery qry)
3956 {
3957     AjPBtHitref newhit  = NULL;
3958     FeatPEmbossQry qryd;
3959     const AjPList fdlist;
3960     const AjPList cachelist;
3961     AjIList iter;
3962     AjIList icache;
3963     AjPBtcache cache;
3964     AjPQueryField fd;
3965     AjPBtHitref *allhits = NULL;
3966     ajuint i;
3967 
3968     ajDebug("featEmbossQryEntry fields: %Lu hasacc:%B\n",
3969             ajListGetLength(qry->QueryFields), qry->HasAcc);
3970 
3971     qryd = qry->QryData;
3972 
3973     fdlist = ajQueryGetallFields(qry);
3974     cachelist = qryd->Caches;
3975 
3976     iter= ajListIterNewread(fdlist);
3977     icache = ajListIterNewread(cachelist);
3978     while(!ajListIterDone(iter))
3979     {
3980         fd = ajListIterGet(iter);
3981         cache = ajListIterGet(icache);
3982 
3983         ajDebug("qry type:%d field '%S' wild '%S'\n",
3984                 fd->Link, fd->Field, fd->Wildquery);
3985 
3986         if((fd->Link == AJQLINK_ELSE) && ajListGetLength(qry->ResultsList))
3987             continue;
3988 
3989         if(!ajBtreeCacheIsSecondary(cache))
3990         {
3991             ajBtreeIdentFetchHitref(cache,fd->Wildquery,
3992                                     qry->ResultsList);
3993         }
3994     }
3995 
3996     ajListIterDel(&iter);
3997     ajListIterDel(&icache);
3998 
3999     if(ajStrGetLen(qry->Organisms))
4000     {
4001         ajTableSetDestroy(qry->ResultsTable, NULL, &ajBtreeHitrefDelVoid);
4002         ajTableSettypeUser(qry->ResultsTable,
4003                            &ajBtreeHitrefCmp, &ajBtreeHitrefHash);
4004 
4005         while(ajListPop(qry->ResultsList, (void**)&newhit))
4006             ajTablePutClean(qry->ResultsTable, newhit, newhit,
4007                             NULL, &ajBtreeHitrefDelVoid);
4008 
4009          featEmbossQryOrganisms(qry);
4010 
4011          ajTableToarrayValues(qry->ResultsTable, (void***)&allhits);
4012 
4013          for(i=0; allhits[i]; i++)
4014              ajListPushAppend(qry->ResultsList, (void*) allhits[i]);
4015     }
4016 
4017     if(!ajListGetLength(qry->ResultsList))
4018         return ajFalse;
4019 
4020     if(!qry->CaseId)
4021         qry->QryDone = ajTrue;
4022 
4023     return ajTrue;
4024 }
4025 
4026 
4027 
4028 
4029 /* @funcstatic featEmbossQryNext **********************************************
4030 **
4031 ** Processes the next query for a B+tree index
4032 **
4033 ** @param [u] qry [AjPQuery] Query data
4034 ** @return [AjBool] ajTrue if successful
4035 **
4036 ** @release 6.5.0
4037 ** @@
4038 ******************************************************************************/
4039 
featEmbossQryNext(AjPQuery qry)4040 static AjBool featEmbossQryNext(AjPQuery qry)
4041 {
4042     AjPBtHitref entry;
4043     FeatPEmbossQry qryd;
4044     void* item;
4045     AjBool ok = ajFalse;
4046 
4047 
4048     qryd = qry->QryData;
4049 
4050     if(!ajListGetLength(qry->ResultsList))
4051         return ajFalse;
4052 
4053     /*ajListTrace(qry->ResultsList);*/
4054 
4055 
4056     if(!qryd->Skip)
4057     {
4058         ajListPop(qry->ResultsList, &item);
4059         entry = (AjPBtHitref) item;
4060     }
4061     else
4062     {
4063         ok = ajFalse;
4064 
4065         while(!ok)
4066         {
4067             ajListPop(qry->ResultsList, &item);
4068             entry = (AjPBtHitref) item;
4069 
4070             if(!qryd->Skip[entry->dbno])
4071                 ok = ajTrue;
4072             else
4073             {
4074                 ajBtreeHitrefDel(&entry);
4075 
4076                 if(!ajListGetLength(qry->ResultsList))
4077                     return ajFalse;
4078             }
4079         }
4080     }
4081 
4082     qryd->Samefile = ajTrue;
4083 
4084     if(entry->dbno != qryd->div)
4085     {
4086         qryd->Samefile = ajFalse;
4087         qryd->div = entry->dbno;
4088         ajFileClose(&qryd->libs);
4089 
4090         if(qryd->reffiles)
4091             ajFileClose(&qryd->libr);
4092     }
4093 
4094     if(!qryd->libs)
4095     {
4096         qryd->libs = ajFileNewInNameS(qryd->files[entry->dbno]);
4097 
4098         if(!qryd->libs)
4099         {
4100             ajBtreeHitrefDel(&entry);
4101 
4102             return ajFalse;
4103         }
4104     }
4105 
4106     if(qryd->reffiles && !qryd->libr)
4107     {
4108         ajFileClose(&qryd->libr);
4109         qryd->libr = ajFileNewInNameS(qryd->reffiles[0][entry->dbno]);
4110 
4111         if(!qryd->libr)
4112         {
4113             ajBtreeHitrefDel(&entry);
4114 
4115             return ajFalse;
4116         }
4117     }
4118 
4119 
4120     ajFileSeek(qryd->libs, (ajlong) entry->offset, 0);
4121     if(qryd->reffiles)
4122         ajFileSeek(qryd->libr, (ajlong) entry->refoffset, 0);
4123 
4124     ajBtreeHitrefDel(&entry);
4125 
4126     if(!qry->CaseId)
4127         qry->QryDone = ajTrue;
4128 
4129     return ajTrue;
4130 }
4131 
4132 
4133 
4134 
4135 /* @funcstatic featEmbossQryClose *********************************************
4136 **
4137 ** Closes query data for a B+tree index
4138 **
4139 ** @param [u] qry [AjPQuery] Query data
4140 ** @return [AjBool] ajTrue if we can continue,
4141 **                  ajFalse if all is done.
4142 **
4143 ** @release 6.5.0
4144 ** @@
4145 ******************************************************************************/
4146 
featEmbossQryClose(AjPQuery qry)4147 static AjBool featEmbossQryClose(AjPQuery qry)
4148 {
4149     FeatPEmbossQry qryd;
4150     void* item;
4151     ajint i;
4152     ajuint iref;
4153 
4154     if(!qry)
4155         return ajFalse;
4156 
4157     if(!qry->QryData)
4158         return ajFalse;
4159 
4160     ajDebug("featEmbossQryClose clean up qryd\n");
4161 
4162     qryd = qry->QryData;
4163 
4164     while(ajListGetLength(qryd->Caches))
4165     {
4166         ajListPop(qryd->Caches, &item);
4167         ajBtreeCacheDel((AjPBtcache*) &item);
4168     }
4169     ajListFree(&qryd->Caches);
4170     ajBtreeCacheDel(&qryd->idcache);
4171 
4172     ajListFree(&qry->ResultsList);
4173 
4174     if(qryd->Skip)
4175     {
4176         AJFREE(qryd->Skip);
4177         qryd->Skip = NULL;
4178     }
4179 
4180     if(qryd->files)
4181     {
4182         i = 0;
4183 
4184         while(qryd->files[i])
4185         {
4186             ajStrDel(&qryd->files[i]);
4187 
4188             if(qryd->reffiles)
4189             {
4190                 for(iref=0; iref < qryd->refcount; iref++)
4191                     ajStrDel(&qryd->reffiles[iref][i]);
4192             }
4193 
4194             ++i;
4195         }
4196 
4197         AJFREE(qryd->files);
4198     }
4199 
4200     if(qryd->reffiles)
4201     {
4202         for(iref=0; iref < qryd->refcount; iref++)
4203                     AJFREE(qryd->reffiles[iref]);
4204 
4205         AJFREE(qryd->reffiles);
4206     }
4207 
4208     qryd->files = NULL;
4209     qryd->reffiles = NULL;
4210 
4211 
4212     /* keep QryData for use at top of loop */
4213 
4214     return ajTrue;
4215 }
4216 
4217 
4218 
4219 
4220 /* @funcstatic featEmbossQryOrganisms *****************************************
4221 **
4222 ** Restricts results to matches to organism(s) in database
4223 **
4224 ** @param [u] qry [AjPQuery] Query data
4225 ** @return [AjBool] ajTrue on success.
4226 **
4227 ** @release 6.5.0
4228 ** @@
4229 ******************************************************************************/
4230 
featEmbossQryOrganisms(AjPQuery qry)4231 static AjBool featEmbossQryOrganisms(AjPQuery qry)
4232 {
4233     FeatPEmbossQry qryd;
4234     AjPBtcache orgcache;
4235     AjPStr orgqry = NULL;
4236     AjPStrTok orghandle = NULL;
4237     AjPTable orgtable = NULL;
4238     AjPList orglist = NULL;
4239     AjPBtHitref newhit;
4240     ajulong fdhits = 0UL;
4241 
4242     if(!ajStrGetLen(qry->Organisms))
4243         return ajTrue;
4244 
4245     qryd = qry->QryData;
4246 
4247     featEmbossOpenCache(qry, "org", &orgcache);
4248     orglist = ajListNew();
4249     orghandle = ajStrTokenNewC(qry->Organisms, "\t,;|");
4250     while(ajStrTokenNextParse(orghandle, &orgqry))
4251     {
4252         if(ajBtreeCacheIsSecondary(orgcache))
4253         {
4254             if(!qry->Wild)
4255             {
4256                 ajBtreeKeyFetchHitref(orgcache,qryd->idcache,
4257                                       orgqry, orglist);
4258 
4259             }
4260             else
4261             {
4262                ajBtreeKeyFetchwildHitref(orgcache, qryd->idcache,
4263                                          orgqry, orglist);
4264             }
4265         }
4266         else
4267         {
4268             ajBtreeIdentFetchwildHitref(orgcache,
4269                                         orgqry, orglist);
4270         }
4271 
4272         fdhits += ajListGetLength(orglist);
4273         ajDebug("Organisms list orgqry '%S' list '%Lu'", orgqry, fdhits);
4274 
4275     }
4276 
4277     orgtable = ajTableNewFunctionLen(fdhits,
4278 				     &ajBtreeHitrefCmp,
4279                                      &ajBtreeHitrefHash,
4280 				     NULL, &ajBtreeHitrefDelVoid);
4281     while(ajListPop(orglist, (void**)&newhit))
4282         ajTablePutClean(orgtable, newhit, newhit,
4283                         NULL, &ajBtreeHitrefDelVoid);
4284 
4285     ajStrTokenDel(&orghandle);
4286 
4287     ajTableMergeAnd(qry->ResultsTable, orgtable);
4288     ajListFree(&orglist);
4289     ajBtreeCacheDel(&orgcache);
4290     ajTableFree(&orgtable);
4291     ajStrDel(&orgqry);
4292 
4293     return ajTrue;
4294 }
4295 
4296 
4297 
4298 
4299 /* @funcstatic featEmbossQryQuery *********************************************
4300 **
4301 ** Queries for one or more entries in an EMBOSS B+tree index
4302 **
4303 ** @param [u] qry [AjPQuery] Query data
4304 ** @return [AjBool] ajTrue if we can continue,
4305 **                  ajFalse if all is done.
4306 **
4307 ** @release 6.5.0
4308 ** @@
4309 ******************************************************************************/
4310 
featEmbossQryQuery(AjPQuery qry)4311 static AjBool featEmbossQryQuery(AjPQuery qry)
4312 {
4313     FeatPEmbossQry qryd;
4314 
4315     const AjPList fdlist;
4316     const AjPList cachelist;
4317     AjIList iter;
4318     AjIList icache;
4319     AjPBtcache cache;
4320     AjPQueryField fd;
4321 
4322     AjPBtHitref newhit;
4323     AjPBtHitref *allhits = NULL;
4324     AjPTable newtable = NULL;
4325 
4326     ajuint i;
4327     ajulong lasthits = 0UL;
4328     ajulong fdhits = 0UL;
4329 
4330     if(!qry->CaseId)
4331         qry->QryDone = ajTrue;
4332 
4333     qryd = qry->QryData;
4334 
4335     cachelist = qryd->Caches;
4336 
4337     ajTableSetDestroy(qry->ResultsTable, NULL, &ajBtreeHitrefDelVoid);
4338     ajTableSettypeUser(qry->ResultsTable,
4339                        &ajBtreeHitrefCmp, &ajBtreeHitrefHash);
4340 
4341     fdlist = ajQueryGetallFields(qry);
4342 
4343     iter = ajListIterNewread(fdlist);
4344     icache = ajListIterNewread(cachelist);
4345     while(!ajListIterDone(iter))
4346     {
4347         fd = ajListIterGet(iter);
4348         cache = ajListIterGet(icache);
4349 
4350         ajDebug("field '%S' query: '%S'\n", fd->Field, fd->Wildquery);
4351 
4352         if((fd->Link == AJQLINK_ELSE) && (lasthits > 0UL))
4353         {
4354             continue;
4355         }
4356 
4357         /* is this a primary or secondary key (check the cache)? */
4358 
4359         if(ajBtreeCacheIsSecondary(cache))
4360         {
4361             if(!qry->Wild)
4362             {
4363                 ajBtreeKeyFetchHitref(cache, qryd->idcache,
4364                                       fd->Wildquery, qry->ResultsList);
4365             }
4366             else
4367             {
4368                 ajBtreeKeyFetchwildHitref(cache, qryd->idcache,
4369                                           fd->Wildquery, qry->ResultsList);
4370             }
4371         }
4372         else
4373         {
4374             ajBtreeIdentFetchwildHitref(cache,fd->Wildquery,qry->ResultsList);
4375             ajDebug("ajBtreeIdentFetchwild results:%Lu\n",
4376                     ajListGetLength(qry->ResultsList));
4377         }
4378 
4379         fdhits = ajListGetLength(qry->ResultsList);
4380 
4381         switch(fd->Link)
4382         {
4383             case AJQLINK_INIT:
4384                 while(ajListPop(qry->ResultsList, (void**)&newhit))
4385                     ajTablePutClean(qry->ResultsTable, newhit, newhit,
4386                                     NULL, &ajBtreeHitrefDelVoid);
4387                 break;
4388 
4389             case AJQLINK_OR:
4390                 newtable = ajTableNewFunctionLen(fdhits,
4391                                                  &ajBtreeHitrefCmp,
4392                                                  &ajBtreeHitrefHash,
4393                                                  NULL, &ajBtreeHitrefDelVoid);
4394                 while(ajListPop(qry->ResultsList, (void**)&newhit))
4395                     ajTablePutClean(newtable, newhit, newhit,
4396                                     NULL, &ajBtreeHitrefDelVoid);
4397 
4398                 ajTableMergeOr(qry->ResultsTable, newtable);
4399                 ajTableDel(&newtable);
4400                 break;
4401 
4402             case AJQLINK_AND:
4403                 newtable = ajTableNewFunctionLen(fdhits,
4404                                                  &ajBtreeHitrefCmp,
4405                                                  &ajBtreeHitrefHash,
4406                                                  NULL, &ajBtreeHitrefDelVoid);
4407                 while(ajListPop(qry->ResultsList, (void**)&newhit))
4408                     ajTablePutClean(newtable, newhit, newhit,
4409                                     NULL, &ajBtreeHitrefDelVoid);
4410 
4411                 ajTableMergeAnd(qry->ResultsTable, newtable);
4412                 ajTableDel(&newtable);
4413                 break;
4414 
4415             case AJQLINK_EOR:
4416             case AJQLINK_ELSE:
4417                 newtable = ajTableNewFunctionLen(fdhits,
4418                                                  &ajBtreeHitrefCmp,
4419                                                  &ajBtreeHitrefHash,
4420                                                  NULL, ajBtreeHitrefDelVoid);
4421                 while(ajListPop(qry->ResultsList, (void**)&newhit))
4422                     ajTablePutClean(newtable, newhit, newhit,
4423                                     NULL, &ajBtreeHitrefDelVoid);
4424 
4425                 ajTableMergeEor(qry->ResultsTable, newtable);
4426                 ajTableDel(&newtable);
4427                 break;
4428 
4429             case AJQLINK_NOT:
4430                 newtable = ajTableNewFunctionLen(fdhits,
4431                                                  &ajBtreeHitrefCmp,
4432                                                  &ajBtreeHitrefHash,
4433                                                  NULL, &ajBtreeHitrefDelVoid);
4434                 while(ajListPop(qry->ResultsList, (void**)&newhit))
4435                     ajTablePutClean(newtable, newhit, newhit,
4436                                     NULL, &ajBtreeHitrefDelVoid);
4437 
4438                 ajTableMergeNot(qry->ResultsTable, newtable);
4439                 ajTableDel(&newtable);
4440                 break;
4441 
4442             default:
4443                 ajErr("Unexpected query link operator number '%u'",
4444                       fd->Link);
4445                 break;
4446         }
4447 
4448         lasthits = fdhits;
4449     }
4450 
4451     ajListIterDel(&iter);
4452     ajListIterDel(&icache);
4453 
4454     if(ajStrGetLen(qry->Organisms))
4455         featEmbossQryOrganisms(qry);
4456 
4457     ajTableToarrayValues(qry->ResultsTable, (void***)&allhits);
4458     for(i=0; allhits[i]; i++)
4459         ajListPushAppend(qry->ResultsList, (void*) allhits[i]);
4460 
4461     AJFREE(allhits);
4462 
4463     ajTableClear(qry->ResultsTable);
4464 
4465     if(ajListGetLength(qry->ResultsList))
4466         return ajTrue;
4467 
4468     return ajFalse;
4469 }
4470 
4471 
4472 
4473 
4474 /* @section CHADO Indexing ****************************************************
4475 **
4476 ** These functions manage the CHADO database access methods.
4477 **
4478 ******************************************************************************/
4479 
4480 
4481 
4482 
4483 /* @funcstatic featAccessChado ************************************************
4484 **
4485 ** Reads features from CHADO databases
4486 **
4487 ** @param [u] fttabin [AjPFeattabin] Feature input.
4488 ** @return [AjBool] ajTrue on success.
4489 **
4490 ** @release 6.4.0
4491 ** @@
4492 ******************************************************************************/
4493 
featAccessChado(AjPFeattabin fttabin)4494 static AjBool featAccessChado(AjPFeattabin fttabin)
4495 {
4496     AjPQuery qry = NULL;
4497     AjPStr seqid = NULL;
4498 
4499     AjPStr filterseqregions = NULL;
4500 
4501     AjIList fielditer     = NULL;
4502     AjPQueryField field   = NULL;
4503     AjPFeattable  feattab = NULL;
4504 
4505     AjPStr fieldname = NULL;
4506     AjPStr condition = NULL;
4507 
4508     AjPSqlconnection connection = NULL;
4509 
4510     AjBool ret = ajTrue;
4511 
4512 #if !defined(HAVE_MYSQL) && !defined(HAVE_POSTGRESQL)
4513     ajWarn("Cannot use access method chado without mysql or postgresql");
4514     return ajFalse;
4515 #endif
4516 
4517     if(fttabin->Input->Records)
4518 	return ajFalse;
4519 
4520     qry = fttabin->Input->Query;
4521 
4522     ajDebug("featAccesschado: %S fields: %Lu\n",
4523             qry->DbAlias, ajListGetLength(qry->QueryFields));
4524 
4525     filterseqregions = ajStrNew();
4526 
4527     fielditer = ajListIterNewread(qry->QueryFields);
4528 
4529     while(!ajListIterDone(fielditer))
4530     {
4531 	field = ajListIterGet(fielditer);
4532 
4533 	ajStrAssignS(&fieldname, field->Field);
4534 
4535 	ajDebug("field:%S - val:%S\n", field->Field, field->Wildquery);
4536 
4537 	if(ajStrMatchCaseC(field->Field, "id"))
4538 	{
4539 	    ajStrAssignS(&seqid, field->Wildquery);
4540 	    ajStrAssignC(&fieldname, "uniquename");
4541 	}
4542 
4543 	if(ajStrFindAnyC(field->Wildquery,"*?") != -1)
4544 	{
4545 	    ajStrExchangeKK(&field->Wildquery,'*','%');
4546 	    ajStrExchangeKK(&field->Wildquery,'?','_');
4547 	    ajFmtPrintS(&condition," %S LIKE '%S'", fieldname,
4548 	                   field->Wildquery);
4549 	}
4550 	else
4551 	    ajFmtPrintS(&condition,"%S = '%S'", fieldname,
4552 	                   field->Wildquery);
4553 
4554 	if (ajStrGetLen(filterseqregions))
4555 	    ajStrAppendC(&filterseqregions," AND ");
4556 
4557 	ajStrAppendS(&filterseqregions, condition);
4558 
4559 	ajStrDel(&fieldname);
4560 	ajStrDel(&condition);
4561     }
4562 
4563     ajListIterDel(&fielditer);
4564 
4565     ajDebug("dbfilter: %S\n", qry->DbFilter);
4566 
4567     if(ajStrGetLen(qry->DbFilter))
4568         ajFmtPrintAppS(&filterseqregions, " AND %S", qry->DbFilter);
4569 
4570     ajDebug("filter: %S\n", filterseqregions);
4571 
4572     feattab = ajFeattableNew(seqid);
4573     connection = featChadoConnect(qry);
4574 
4575     if(!ajStrGetLen(feattab->Db))
4576 	ajStrAssignS(&feattab->Db, qry->DbName);
4577 
4578     ret = featChadoQryfeatureQuery(connection, filterseqregions, feattab,
4579                                    fttabin->Start, fttabin->End);
4580 
4581     ajSqlconnectionDel(&connection);
4582 
4583     fttabin->Input->TextData = feattab;
4584 
4585     qry->QryDone = ajTrue;
4586 
4587     ajStrDel(&seqid);
4588     ajStrDel(&filterseqregions);
4589 
4590     return ret;
4591 }
4592 
4593 
4594 
4595 
4596 /* @funcstatic featChadoQryfeatureQuery ***************************************
4597 **
4598 ** Queries the connected chado database for the emboss feature query.
4599 ** More queries are made for the child features of the queried feature(s)
4600 ** using function featChadoChildfeatureQuery.
4601 **
4602 ** @param [u] connection [AjPSqlconnection] SQL Database connection
4603 ** @param [u] filter [AjPStr] Filter conditions for the SQL query
4604 ** @param [u] feattab [AjPFeattable] Feature table
4605 ** @param [r] qrystart [ajint] start pos specified as part of the feature query
4606 ** @param [r] qryend [ajint] end pos specified as part of the feature query
4607 ** @return [AjBool] returns ajFalse if no features found
4608 **
4609 ** @release 6.4.0
4610 ** @@
4611 ******************************************************************************/
4612 
featChadoQryfeatureQuery(AjPSqlconnection connection,AjPStr filter,AjPFeattable feattab,ajint qrystart,ajint qryend)4613 static AjBool featChadoQryfeatureQuery(AjPSqlconnection connection,
4614 				       AjPStr filter,
4615 				       AjPFeattable  feattab,
4616 				       ajint qrystart, ajint qryend)
4617 {
4618     AjPSqlstatement statement = NULL;
4619     AjBool debugOn = ajFalse;
4620     ajint i = 0;
4621     AjPStr sql = NULL;
4622 
4623     AjISqlrow iter = NULL;
4624     AjPSqlrow row  = NULL;
4625     AjPStr colstr = NULL;
4626     AjPStr rowstr = NULL;
4627     AjPStr regionuniquename = NULL;
4628 
4629     AjBool ret = ajTrue;
4630 
4631     debugOn = ajDebugOn();
4632     sql = ajStrNew();
4633 
4634     ajFmtPrintS(&sql,
4635 	    "SELECT feature.name, feature.uniquename,\n"
4636 	    " (SELECT COUNT(*) FROM featureloc"
4637 	    " WHERE feature.feature_id = featureloc.srcfeature_id),\n"
4638 	    " srcfeatureloc.srcfeature_id,"
4639 	    " srcfeature.uniquename,"
4640 	    " srcfeatureloc.fmin,"
4641 	    " srcfeatureloc.fmax,"
4642 	    " srcfeatureloc.strand, srcfeatureloc.phase,\n"
4643 	    " feature.seqlen,"
4644 	    " cvterm.name, db.name,"
4645 	    " feature.timelastmodified, feature.is_obsolete\n"
4646 	    " FROM feature\n"
4647 	    " LEFT JOIN cvterm ON cvterm.cvterm_id = feature.type_id\n"
4648 	    " LEFT OUTER JOIN dbxref ON dbxref.dbxref_id = feature.dbxref_id\n"
4649 	    " LEFT OUTER JOIN db USING (db_id)\n"
4650 	    " LEFT OUTER JOIN featureloc srcfeatureloc"
4651 	    " ON feature.feature_id = srcfeatureloc.feature_id\n"
4652 	    " LEFT OUTER JOIN feature srcfeature"
4653 	    " ON srcfeature.feature_id = srcfeatureloc.srcfeature_id\n"
4654 	    " WHERE feature.%S\n",
4655 	    filter
4656 	    );
4657 
4658     ajDebug("SQL, query-feature query:\n%S\n",sql);
4659 
4660     statement = ajSqlstatementNewRun(connection,sql);
4661 
4662     if(!statement)
4663     {
4664 	ajErr("Could not execute SQL statement [%S]", sql);
4665 	ajExitBad();
4666 	return ajFalse;
4667     }
4668 
4669     iter = ajSqlrowiterNew(statement);
4670 
4671     if(!iter)
4672 	ret = ajFalse;
4673 
4674     while(!ajSqlrowiterDone(iter))
4675     {
4676 	row = ajSqlrowiterGet(iter);
4677 
4678 	if(debugOn)
4679 	{
4680 	    while(row->Current < row->Columns)
4681 	    {
4682 		ajSqlcolumnToStr(row,&colstr);
4683 		ajFmtPrintAppS(&rowstr, "\t%S",colstr);
4684 	    }
4685 
4686 	    ajDebug("qryfeature row %d: %S\n", ++i, rowstr);
4687 	    ajStrSetClear(&rowstr);
4688 	}
4689 
4690 	ajSqlcolumnRewind(row);
4691 
4692 	regionuniquename = featChadoQryfeatureRow(feattab, row,
4693 	                                          qrystart, qryend);
4694 	featChadoChildfeatureQuery(connection, feattab, regionuniquename);
4695 	ajStrDel(&regionuniquename);
4696     }
4697 
4698     ajSqlrowiterDel(&iter);
4699     ajSqlstatementDel(&statement);
4700     ajStrDel(&colstr);
4701     ajStrDel(&rowstr);
4702     ajStrDel(&sql);
4703 
4704     return ret;
4705 }
4706 
4707 
4708 
4709 
4710 /* @funcstatic featChadoChildfeatureQuery *************************************
4711 **
4712 ** Query to retrieve features located on the feature specified
4713 ** using the srcfeature parameter
4714 **
4715 ** @param [u] connection [AjPSqlconnection] SQL Database connection
4716 ** @param [u] feattab [AjPFeattable] Feature table
4717 ** @param [r] srcfeature [const AjPStr] Unique name of the sequence region
4718 ** @return [void]
4719 **
4720 ** @release 6.4.0
4721 ** @@
4722 ******************************************************************************/
4723 
featChadoChildfeatureQuery(AjPSqlconnection connection,AjPFeattable feattab,const AjPStr srcfeature)4724 static void featChadoChildfeatureQuery(AjPSqlconnection connection,
4725 			               AjPFeattable  feattab,
4726                                        const AjPStr srcfeature)
4727 {
4728     AjPSqlstatement statement = NULL;
4729     AjBool debugOn = ajFalse;
4730     ajint i = 0;
4731 
4732     AjISqlrow iter = NULL;
4733     AjPSqlrow row  = NULL;
4734     AjPStr colstr  = NULL;
4735     AjPStr rowstr  = NULL;
4736     AjPStr sql     = NULL;
4737     AjPFeature seqregionf = NULL;
4738 
4739     debugOn = ajDebugOn();
4740     sql   = ajStrNew();
4741 
4742     ajListPeekLast(feattab->Features, (void**)&seqregionf);
4743 
4744     /*
4745      * in SQL query below, feature refers to the sequence region being queried
4746      * sfeature refers to the features located within the sequence region,
4747      *
4748      * it is assumed that results are ordered such that features with the same
4749      * uniquename follows each other, we should check whether we need to use
4750      * explicit ORDER BY for any reason...
4751      */
4752 
4753     ajFmtPrintS(&sql,
4754 	    "SELECT sfeature.name, sfeature.uniquename, relation.name,"
4755 	    " object.uniquename, object_type.name, featureloc.fmin,"
4756 	    " featureloc.fmax, featureloc.strand, featureloc.phase,\n"
4757 	    " cvterm.name, db.name, analysis.program, analysisfeature.rawscore,"
4758 	    " sfeature.timelastmodified, sfeature.is_obsolete\n"
4759 	    " FROM feature\n"
4760 	    " JOIN featureloc"
4761 	    " ON feature.feature_id = featureloc.srcfeature_id\n"
4762 	    " JOIN feature sfeature"
4763 	    " ON sfeature.feature_id = featureloc.feature_id\n"
4764 	    " LEFT OUTER JOIN feature_relationship"
4765 	    " ON sfeature.feature_id = feature_relationship.subject_id\n"
4766 	    " LEFT OUTER JOIN cvterm relation"
4767 	    " ON relation.cvterm_id = feature_relationship.type_id\n"
4768 	    " LEFT OUTER JOIN feature object"
4769 	    " ON object.feature_id = feature_relationship.object_id\n"
4770 	    " LEFT OUTER JOIN cvterm object_type"
4771 	    " ON object.type_id = object_type.cvterm_id\n"
4772 	    " JOIN cvterm ON cvterm.cvterm_id = sfeature.type_id\n"
4773 	    " LEFT OUTER JOIN dbxref ON dbxref.dbxref_id = sfeature.dbxref_id\n"
4774 	    " LEFT OUTER JOIN db USING (db_id)\n"
4775 	    " LEFT OUTER JOIN analysisfeature"
4776 	    " ON sfeature.feature_id=analysisfeature.feature_id\n"
4777 	    " LEFT OUTER JOIN analysis USING(analysis_id)\n"
4778 	    " WHERE \n"
4779 	    " feature.uniquename='%S'"
4780 	    " AND"
4781 	    " ((featureloc.fmin>%d AND featureloc.fmin<%d)"
4782 	    " OR"
4783 	    " (featureloc.fmax>=%d AND featureloc.fmax<=%d))\n",
4784 	    srcfeature,
4785 	    seqregionf->Start, seqregionf->End,
4786 	    seqregionf->Start, seqregionf->End);
4787 
4788     ajDebug("SQL features:\n%S\n",sql);
4789 
4790     statement = ajSqlstatementNewRun(connection,sql);
4791 
4792     if(!statement)
4793     {
4794 	ajErr("Could not execute SQL statement [%S]", sql);
4795 	ajExitAbort();
4796     }
4797 
4798     iter = ajSqlrowiterNew(statement);
4799 
4800     while(!ajSqlrowiterDone(iter))
4801     {
4802 	row = ajSqlrowiterGet(iter);
4803 
4804 	if(debugOn)
4805 	{
4806 	    while(row->Current < row->Columns)
4807 	    {
4808 		ajSqlcolumnToStr(row,&colstr);
4809 		ajFmtPrintAppS(&rowstr, "\t%S",colstr);
4810 	    }
4811 
4812 	    ajDebug("row %d: %S\n", ++i, rowstr);
4813 	    ajStrSetClear(&rowstr);
4814 	}
4815 
4816 	ajSqlcolumnRewind(row);
4817 
4818 	featChadoChildfeatureRow(feattab, row);
4819     }
4820 
4821     ajSqlrowiterDel(&iter);
4822     ajSqlstatementDel(&statement);
4823     ajStrDel(&colstr);
4824     ajStrDel(&rowstr);
4825     ajStrDel(&sql);
4826 
4827     return;
4828 }
4829 
4830 
4831 
4832 
4833 /* @funcstatic featChadoChildfeatureRow ***************************************
4834 **
4835 ** Generates an emboss feature obj from a row of chado query results, and
4836 ** appends it to the feature table fttab.
4837 **
4838 ** @param [u] fttab [AjPFeattable] Feature table
4839 ** @param [u] row [AjPSqlrow] Input row
4840 ** @return [AjPFeature] New feature
4841 **
4842 ** @release 6.4.0
4843 ** @@
4844 ******************************************************************************/
4845 
featChadoChildfeatureRow(AjPFeattable fttab,AjPSqlrow row)4846 static AjPFeature featChadoChildfeatureRow(AjPFeattable fttab, AjPSqlrow row)
4847 {
4848     AjPFeature feature = NULL;
4849     AjPFeature prevft  = NULL;
4850 
4851     AjPStr name = NULL;
4852     AjPStr type = NULL;
4853     AjPStr source = NULL;
4854     AjPStr sourcedb = NULL;
4855     AjPStr sourceprogram = NULL;
4856     AjPTagval idtag = NULL;
4857 
4858     ajint start  = 0;
4859     ajint end    = 0;
4860     float score  = 0;
4861     char  strand = '+';
4862     ajint i = 0;
4863     ajint frame = 0;
4864 
4865     AjPStr entryid = NULL;
4866     AjBool updateprevft = ajFalse;
4867     AjPStr alias = NULL;
4868     AjPStr relation = NULL;
4869 
4870     /* The object can also be thought of as parent (containing feature),
4871      * and subject as child (contained feature or subfeature). ref:gmod.org */
4872 
4873     AjPStr object = NULL;
4874     AjPStr objecttype = NULL;
4875     AjPTime timelm = NULL;
4876     AjPStr timelmS = NULL;
4877     AjBool isObsolete = ajFalse;
4878     AjPStr isObsoleteS = NULL;
4879 
4880     if(!ajSqlrowGetColumns(row))
4881 	return NULL;
4882 
4883     timelm = ajTimeNew();
4884 
4885     ajSqlcolumnToStr(row, &name);
4886     ajSqlcolumnToStr(row, &entryid);
4887     ajSqlcolumnToStr(row, &relation);
4888     ajSqlcolumnToStr(row, &object);
4889     ajSqlcolumnToStr(row, &objecttype);
4890     ajSqlcolumnToInt(row, &start);
4891     ajSqlcolumnToInt(row, &end);
4892 
4893     ajSqlcolumnToInt(row, &i);
4894     if( i == 1 )
4895 	strand = '+';
4896     else if( i == -1 )
4897 	strand = '-';
4898     else
4899 	strand = '\0';		/* change to \0 later */
4900 
4901     ajSqlcolumnToInt(row, &frame);
4902 
4903     ajSqlcolumnToStr(row, &type);
4904     ajSqlcolumnToStr(row, &sourcedb);
4905     ajSqlcolumnToStr(row, &sourceprogram);
4906     ajSqlcolumnToFloat(row, &score);
4907     /*ajSqlcolumnToStr(row, &alias);*/
4908     ajSqlcolumnToTime(row, &timelm);
4909     ajSqlcolumnToBool(row, &isObsolete);
4910 
4911     if(ajStrGetLen(sourcedb))
4912 	ajStrAssignS(&source, sourcedb);
4913     else if(ajStrGetLen(sourceprogram))
4914 	ajStrAssignS(&source, sourceprogram);
4915     else
4916 	source = ajStrNewC(".");
4917 
4918     ajListPeekLast(fttab->Features, (void**)&prevft);
4919 
4920     if(++start==(ajint)prevft->Start && end==(ajint)prevft->End)
4921     {
4922 	ajListPeekFirst(prevft->GffTags, (void**)&idtag);
4923 	if(ajStrMatchS(entryid, MAJTAGVALGETVALUE(idtag)))
4924 	{
4925 	    updateprevft = ajTrue;
4926 	    feature = prevft;
4927 	}
4928     }
4929 
4930     if(!updateprevft)
4931     {
4932 	feature = ajFeatNewNuc(fttab,
4933 	                       source,
4934 	                       type,
4935 	                       start,
4936 	                       end,
4937 	                       score,
4938 	                       strand,
4939 	                       frame,
4940 	                       0,0,0, NULL, NULL);
4941 
4942 	ajFeatGfftagAddCS(feature, "ID", entryid);
4943 	ajFeatGfftagAddCS(feature, "Name", name);
4944 	/*ajFeatGfftagAddC(feature, "Alias", alias);*/
4945 
4946 	/* FIXME: it looks we replace all type 'synonyms' with SO:0000110
4947 	 * following tag is a workaround until it is fixed */
4948 	if(ajStrMatchC(feature->Type, "SO:0000110"))
4949 	    ajFeatGfftagAddCS(feature, "type", type);
4950     }
4951 
4952     if(ajStrMatchC(relation, "part_of") ||
4953 	    ajStrMatchC(relation, "proper_part_of") ||
4954 	    ajStrMatchC(relation, "partof") ||
4955 	    ajStrMatchC(relation, "producedby"))
4956     {
4957 	ajFeatGfftagAddCS(feature, "Parent", object);
4958 	ajFeatGfftagAddCS(feature, "parent_type", objecttype);
4959     }
4960     else if(ajStrMatchC(relation, "derives_from"))
4961     {
4962 	ajFeatGfftagAddCS(feature, "Derives_from", object);
4963 	ajFeatGfftagAddCS(feature, "parent_type", objecttype);
4964     }
4965     else if(ajStrGetLen(relation))
4966     {
4967 	ajFeatGfftagAddSS(feature, relation, object);
4968 	ajFeatGfftagAddCS(feature, "object_type", objecttype);
4969     }
4970 
4971 
4972     if(!updateprevft)
4973     {
4974 	if(timelm)
4975 	{
4976 	    ajFmtPrintS(&timelmS,"%D",timelm);
4977 	    ajFeatGfftagAddCS(feature,"timelastmodified", timelmS);
4978 	}
4979 
4980 	if(isObsolete)
4981 	{
4982 	    isObsoleteS = ajStrNewC("true");
4983 	    ajFeatGfftagAddCS(feature,"isObsolete", isObsoleteS);
4984 	    ajStrDel(&isObsoleteS);
4985 	}
4986     }
4987 
4988     ajFeatTrace(feature);
4989 
4990     ajStrDel(&name);
4991     ajStrDel(&entryid);
4992     ajStrDel(&relation);
4993     ajStrDel(&object);
4994     ajStrDel(&objecttype);
4995     ajStrDel(&type);
4996     ajStrDel(&source);
4997     ajStrDel(&sourcedb);
4998     ajStrDel(&sourceprogram);
4999     ajStrDel(&alias);
5000     ajTimeDel(&timelm);
5001     ajStrDel(&timelmS);
5002 
5003     return feature;
5004 }
5005 
5006 
5007 
5008 
5009 /* @funcstatic featChadoQryfeatureRow *****************************************
5010 **
5011 ** Generates a new feature from chado query-feature query result row.
5012 ** Checks whether the query feature is located on another feature (seq region)
5013 **
5014 ** @param [u] fttab    [AjPFeattable] Feature table
5015 ** @param [u] row      [AjPSqlrow]    Input row
5016 ** @param [r] qrystart [ajint]        Query start position
5017 ** @param [r] qryend   [ajint]        Query end position
5018 ** @return [AjPStr] New feature ID
5019 **
5020 ** @release 6.4.0
5021 ** @@
5022 ******************************************************************************/
5023 
featChadoQryfeatureRow(AjPFeattable fttab,AjPSqlrow row,ajint qrystart,ajint qryend)5024 static AjPStr featChadoQryfeatureRow(AjPFeattable fttab, AjPSqlrow row,
5025 	                             ajint qrystart, ajint qryend)
5026 {
5027     AjPFeature gf  = NULL;
5028     AjPStr name   = NULL;
5029 
5030     AjPStr source = NULL;
5031     AjPStr type   = NULL;
5032     ajint start  = 1;
5033     ajint seqlen = 0;
5034     float score  = 0;
5035     char  strand = '+';
5036     ajint loccount  = 0; /* this is to be removed; initially thought
5037                             we can use location count to decide whether
5038                             the query feature is a sequence region,
5039                             however current code decides it by checking whether
5040                             a parent location is defined */
5041     ajint i = 0;
5042     ajint frame = 0;
5043     AjPStr uniquename = NULL;
5044     AjPStr alias = NULL;
5045     AjPStr regionid = NULL;
5046     AjPStr regionuniquename = NULL;
5047     ajint regionstart  = 1;
5048     ajint regionend    = 0;
5049     AjPTime timelm = NULL;
5050     AjPStr timelmS = NULL;
5051     AjBool isObsolete = ajFalse;
5052     AjPStr isObsoleteS = NULL;
5053 
5054     if(!ajSqlrowGetColumns(row))
5055 	return NULL;
5056 
5057     timelm = ajTimeNew();
5058 
5059     ajSqlcolumnToStr(row, &name);
5060     ajSqlcolumnToStr(row, &uniquename);
5061     ajSqlcolumnToInt(row, &loccount);
5062     ajSqlcolumnToStr(row, &regionid);
5063     ajSqlcolumnToStr(row, &regionuniquename);
5064     ajSqlcolumnToInt(row, &regionstart);
5065     ajSqlcolumnToInt(row, &regionend);
5066     ajDebug("location count:%d  parent:%S parent-id:%S"
5067 	    " region start:%d  region end:%d\n",
5068 	    loccount,
5069 	    regionuniquename, regionid, regionstart, regionend);
5070 
5071     ajSqlcolumnToInt(row, &i);
5072     if( i == 1 )
5073 	strand = '+';
5074     else if( i == -1 )
5075 	strand = '-';
5076     else
5077 	strand = '\0';
5078 
5079     ajSqlcolumnToInt(row, &frame);
5080 
5081     ajSqlcolumnToInt(row, &seqlen);
5082 
5083     ajSqlcolumnToStr(row, &type);
5084     ajSqlcolumnToStr(row, &source);
5085     /*ajSqlcolumnToStr(row, &alias);*/
5086     ajSqlcolumnToTime(row, &timelm);
5087     ajSqlcolumnToBool(row, &isObsolete);
5088 
5089     if(!ajStrGetLen(source))
5090 	ajStrAssignS(&source, fttab->Db);
5091 
5092 
5093     if(qryend)
5094     {
5095 	start = qrystart;
5096 	seqlen = qryend;
5097     }
5098 
5099     regionstart++;
5100 
5101     gf = ajFeatNewNuc(fttab, source, type,
5102                       (ajStrGetLen(regionuniquename) ? regionstart : start),
5103                       (ajStrGetLen(regionuniquename) ? regionend   : seqlen),
5104                       score,
5105                       strand,
5106                       frame,
5107                       0,0,0, NULL, NULL);
5108 
5109     ajFeatGfftagAddCS(gf, "ID", uniquename);
5110     ajFeatGfftagAddCS(gf, "Name", name);
5111     /*ajFeatGfftagAddC(gf, "Alias", alias);*/
5112 
5113     if(timelm)
5114     {
5115 	ajFmtPrintS(&timelmS,"%D",timelm);
5116 	ajFeatGfftagAddCS(gf,"timelastmodified", timelmS);
5117     }
5118 
5119     if(isObsolete)
5120     {
5121 	isObsoleteS = ajStrNewC("true");
5122 	ajFeatGfftagAddCS(gf,"isObsolete", isObsoleteS);
5123 	ajStrDel(&isObsoleteS);
5124     }
5125 
5126     ajFeatTrace(gf);
5127 
5128     ajStrDel(&name);
5129     ajStrDel(&type);
5130     ajStrDel(&source);
5131     ajStrDel(&alias);
5132     ajTimeDel(&timelm);
5133     ajStrDel(&timelmS);
5134     ajStrDel(&regionid);
5135 
5136     if(ajStrGetLen(regionuniquename))
5137     {
5138 	if(loccount)
5139 	    ajDebug("Feature '%S' is used as a source feature for %d other"
5140 		    " feature(s) but it is itself located on feature '%S';"
5141 		    " making queries on the parent feature '%S'\n",
5142 		    uniquename, loccount, regionuniquename, regionuniquename);
5143 	ajStrAssignS(&fttab->Seqid, regionuniquename);
5144 
5145 	ajStrDel(&uniquename);
5146 	return regionuniquename;
5147     }
5148 
5149     ajStrDel(&regionuniquename);
5150 
5151     return uniquename;
5152 }
5153 
5154 
5155 
5156 
5157 /* @funcstatic featChadoConnect ***********************************************
5158 **
5159 ** Connects to the chado database required by the query
5160 **
5161 ** @param [r] qry [const AjPQuery] Query object
5162 ** @return [AjPSqlconnection] SQL Database connection
5163 **
5164 ** @release 6.4.0
5165 ** @@
5166 ******************************************************************************/
5167 
featChadoConnect(const AjPQuery qry)5168 static AjPSqlconnection featChadoConnect(const AjPQuery qry)
5169 {
5170     AjESqlconnectionClient client;
5171 
5172     ajint iport = 3306;
5173 
5174     AjPStr url = NULL;
5175     AjPUrlref uo  = NULL;
5176 
5177     AjPStr password   = NULL;
5178     AjPStr socketfile = NULL;
5179 
5180     AjPSqlconnection connection = NULL;
5181 
5182     url = ajStrNew();
5183 
5184     if(!ajNamDbGetUrl(qry->DbName, &url))
5185     {
5186 	ajErr("no URL defined for database %S", qry->DbName);
5187 
5188 	return ajFalse;
5189     }
5190 
5191     uo = ajHttpUrlrefNew();
5192 
5193     ajHttpUrlrefParseC(&uo, ajStrGetPtr(url));
5194     ajHttpUrlrefSplitPort(uo);
5195     ajHttpUrlrefSplitUsername(uo);
5196 
5197     if(ajStrMatchCaseC(uo->Method,"mysql"))
5198         client = ajESqlconnectionClientMySQL;
5199     else if(ajStrMatchCaseC(uo->Method,"postgresql"))
5200     {
5201         client = ajESqlconnectionClientPostgreSQL;
5202         iport = 5432;
5203     }
5204     else
5205         client = ajESqlconnectionClientNULL;
5206 
5207     if(!ajStrGetLen(uo->Port))
5208         ajFmtPrintS(&uo->Port,"%d",iport);
5209 
5210     if(ajStrGetLen(uo->Password))
5211     {
5212         password = ajStrNew();
5213         ajStrAssignS(&password,uo->Password);
5214     }
5215 
5216     connection = ajSqlconnectionNewData(client,uo->Username,password,
5217                                         uo->Host,uo->Port,socketfile,
5218                                         uo->Absolute);
5219 
5220     ajStrDel(&password);
5221 
5222     if(!connection)
5223         ajErr("Could not connect to database server");
5224 
5225     ajStrDel(&url);
5226     ajHttpUrlrefDel(&uo);
5227 
5228     return connection;
5229 }
5230 
5231 
5232 
5233 
5234 /* @section DAS ***************************************************************
5235 **
5236 ** These functions manage the DAS database access methods.
5237 **
5238 ******************************************************************************/
5239 
5240 
5241 
5242 
5243 /* @funcstatic featAccessDas **************************************************
5244 **
5245 ** Feature access method for DAS feature sources
5246 **
5247 ** @param [u] fttabin [AjPFeattabin] Feature input.
5248 ** @return [AjBool] ajTrue on success.
5249 **
5250 ** @release 6.4.0
5251 ** @@
5252 ******************************************************************************/
5253 
featAccessDas(AjPFeattabin fttabin)5254 static AjBool featAccessDas(AjPFeattabin fttabin)
5255 {
5256     AjPStr host         = NULL;
5257     AjPStr path         = NULL;
5258     AjIList iter        = NULL;
5259     AjPQueryField field = NULL;
5260     AjPQuery qry        = NULL;
5261     AjPTextin textin    = NULL;
5262     AjPStr dasqueryurl  = NULL;
5263 
5264     AjBool ret = ajTrue;
5265     ajint port = 80;
5266 
5267     textin = fttabin->Input;
5268 
5269     qry = textin->Query;
5270 
5271     if(qry->QryDone)
5272 	return ajFalse;
5273 
5274     if(!ajHttpQueryUrl(qry, &port, &host, &path))
5275     {
5276 	ajStrDel(&host);
5277 	ajStrDel(&path);
5278 
5279 	return ajFalse;
5280     }
5281 
5282     if(ajStrGetCharLast(path)!='/')
5283 	ajStrAppendK(&path,'/');
5284 
5285     dasqueryurl = ajStrNew();
5286 
5287     iter = ajListIterNewread(qry->QueryFields);
5288 
5289     while(!ajListIterDone(iter))
5290     {
5291 	field = ajListIterGet(iter);
5292 
5293 	if(ajStrMatchCaseC(field->Field, "id"))
5294 	{
5295 	    if(!ajStrGetLen(dasqueryurl))
5296 		ajFmtPrintS(&dasqueryurl,"segment=%S",
5297 		            field->Wildquery);
5298 	    else
5299 		ajFmtPrintS(&dasqueryurl,"%S;segment=%S",
5300 		            dasqueryurl,
5301 		            field->Wildquery);
5302 
5303 	    /* TODO: segment specific start,end positions */
5304 	    if(fttabin->End > 0)
5305 		ajFmtPrintS(&dasqueryurl,"%S:%u,%u",
5306 		            dasqueryurl,fttabin->Start,fttabin->End);
5307 	}
5308 	else {
5309 	    if(!ajStrGetLen(dasqueryurl))
5310 		ajFmtPrintS(&dasqueryurl,"%S=%S",
5311 		            field->Field,
5312 		            field->Wildquery);
5313 	    else
5314 		ajFmtPrintS(&dasqueryurl,"%S;%S=%S",
5315 		            dasqueryurl,
5316 		            field->Field,
5317 		            field->Wildquery);
5318 
5319 	    /* TODO: segment specific start,end positions */
5320 	    if(fttabin->End > 0)
5321 		ajFmtPrintS(&dasqueryurl,"%S:%u,%u",
5322 		            dasqueryurl,fttabin->Start,fttabin->End);
5323 	}
5324 
5325     }
5326 
5327     ajFmtPrintS(&path,"%Sfeatures?%S",path, dasqueryurl);
5328 
5329     ajFilebuffDel(&textin->Filebuff);
5330     textin->Filebuff = ajHttpRead(qry->DbHttpVer, qry->DbName, qry->DbProxy,
5331                                   host, port, path);
5332 
5333     if (textin->Filebuff)
5334 	ajFilebuffHtmlNoheader(textin->Filebuff);
5335     else
5336 	ret = ajFalse;
5337 
5338     qry->QryDone = ajTrue;
5339 
5340     ajStrDel(&host);
5341     ajStrDel(&path);
5342     ajStrDel(&dasqueryurl);
5343 
5344     ajListIterDel(&iter);
5345 
5346     return ret;
5347 }
5348 
5349 
5350 
5351 
5352 /* @func ajFeatdbPrintAccess **************************************************
5353 **
5354 ** Reports the internal data structures
5355 **
5356 ** @param [u] outf [AjPFile] Output file
5357 ** @param [r] full [AjBool] Full report (usually ajFalse)
5358 ** @return [void]
5359 **
5360 ** @release 6.4.0
5361 ** @@
5362 ******************************************************************************/
5363 
ajFeatdbPrintAccess(AjPFile outf,AjBool full)5364 void ajFeatdbPrintAccess(AjPFile outf, AjBool full)
5365 {
5366     ajint i = 0;
5367 
5368     ajFmtPrintF(outf, "\n");
5369     ajFmtPrintF(outf, "# Feature access methods\n");
5370     ajFmtPrintF(outf, "# Name       Alias Entry Query   All Description\n");
5371     ajFmtPrintF(outf, "\n");
5372     ajFmtPrintF(outf, "method {\n");
5373 
5374     for(i=0; feattabAccess[i].Name; i++)
5375 	if(full || !feattabAccess[i].Alias)
5376 	    ajFmtPrintF(outf, "  %-10s %5B %5B %5B %5B \"%s\"\n",
5377 			feattabAccess[i].Name,  feattabAccess[i].Alias,
5378 			feattabAccess[i].Entry, feattabAccess[i].Query,
5379 			feattabAccess[i].All,   feattabAccess[i].Desc);
5380 
5381     ajFmtPrintF(outf, "}\n\n");
5382 
5383     return;
5384 }
5385 
5386 
5387 
5388 
5389 /* @func ajFeatdbExit *********************************************************
5390 **
5391 ** Cleans up feature database processing internal memory
5392 **
5393 ** @return [void]
5394 **
5395 ** @release 6.4.0
5396 ** @@
5397 ******************************************************************************/
5398 
ajFeatdbExit(void)5399 void ajFeatdbExit(void)
5400 {
5401     ajRegFree(&featCdDivExp);
5402     ajCharDel(&featCdName);
5403     ajRegFree(&featRegGcgId);
5404     ajRegFree(&featRegGcgCont);
5405     ajRegFree(&featRegGcgId2);
5406     ajRegFree(&featRegGcgSplit);
5407     ajRegFree(&featRegGcgRefId);
5408 
5409     return;
5410 }
5411