1 /* @source ajfeatdb ***********************************************************
2 **
3 ** AJAX feature database access functions
4 **
5 ** These functions control all aspects of AJAX feature database access
6 **
7 ** @author Copyright (C) 2010 Peter Rice
8 ** @version $Revision: 1.46 $
9 ** @modified Sep 2010 pmr first version
10 ** @modified $Date: 2012/12/07 10:20:52 $ by $Author: rice $
11 ** @@
12 **
13 ** This library is free software; you can redistribute it and/or
14 ** modify it under the terms of the GNU Lesser General Public
15 ** License as published by the Free Software Foundation; either
16 ** version 2.1 of the License, or (at your option) any later version.
17 **
18 ** This library is distributed in the hope that it will be useful,
19 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
20 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 ** Lesser General Public License for more details.
22 **
23 ** You should have received a copy of the GNU Lesser General Public
24 ** License along with this library; if not, write to the Free Software
25 ** Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
26 ** MA 02110-1301, USA.
27 **
28 ******************************************************************************/
29
30
31 #include "ajlib.h"
32
33 #include "ajfeatdb.h"
34 #include "ajfeat.h"
35 #include "ajfeatread.h"
36 #include "ajtextdata.h"
37
38 #include "ajtagval.h"
39 #include "ajsql.h"
40 #include "ajindex.h"
41 #include "ajhttp.h"
42 #include "ajutil.h"
43 #include "ajnam.h"
44 #include "ajcall.h"
45 #include "ajfileio.h"
46
47
48 #include <limits.h>
49 #include <stdarg.h>
50 #include <sys/types.h>
51 #include <errno.h>
52 #include <signal.h>
53
54
55 #ifndef WIN32
56 #include <sys/socket.h>
57 #include <netinet/in.h>
58 #include <arpa/inet.h>
59
60 #include <netdb.h>
61
62 #include <dirent.h>
63 #include <unistd.h>
64 #else
65 #include <winsock2.h>
66 #include <ws2tcpip.h>
67 #endif
68
69
70
71 static AjPRegexp featCdDivExp = NULL;
72
73 static AjPRegexp featRegGcgId = NULL;
74 static AjPRegexp featRegGcgCont = NULL;
75 static AjPRegexp featRegGcgId2 = NULL;
76 static AjPRegexp featRegGcgSplit = NULL;
77
78 static AjPRegexp featRegGcgRefId = NULL;
79
80 static char* featCdName = NULL;
81 static ajuint featCdMaxNameSize = 0;
82
83
84 /* @datastatic FeatPCdDiv *****************************************************
85 **
86 ** EMBLCD division file record structure
87 **
88 ** @alias FeatSCdDiv
89 ** @alias FeatOCdDiv
90 **
91 ** @attr FileName [AjPStr] Filename(s)
92 ** @attr DivCode [ajuint] Division code
93 ** @attr Padding [char[4]] Padding to alignment boundary
94 ** @@
95 ******************************************************************************/
96
97 typedef struct FeatSCdDiv
98 {
99 AjPStr FileName;
100 ajuint DivCode;
101 char Padding[4];
102 } FeatOCdDiv;
103
104 #define FeatPCdDiv FeatOCdDiv*
105
106
107
108
109 /* @datastatic FeatPCdEntry ***************************************************
110 **
111 ** EMBLCD entrynam.idx file record structure
112 **
113 ** @alias FeatSCdEntry
114 ** @alias FeatOCdEntry
115 **
116 ** @attr div [ajuint] division file record
117 ** @attr annoff [ajuint] data file offset
118 ** @attr seqoff [ajuint] sequence file offset (if any)
119 ** @@
120 ******************************************************************************/
121
122 typedef struct FeatSCdEntry
123 {
124 ajuint div;
125 ajuint annoff;
126 ajuint seqoff;
127 } FeatOCdEntry;
128
129 #define FeatPCdEntry FeatOCdEntry*
130
131
132
133
134 /* @datastatic FeatPCdFHeader **************************************************
135 **
136 ** EMBLCD index file header structure, same for all index files.
137 **
138 ** @alias FeatSCdFHeader
139 ** @alias FeatOCdFHeader
140 **
141 ** @attr FileSize [ajuint] Index file size
142 ** @attr NRecords [ajuint] Index record count
143 ** @attr IdSize [ajuint] Index string length
144 ** @attr RelDay [ajuint] Release date - day
145 ** @attr RelMonth [ajuint] Release date - month
146 ** @attr RelYear [ajuint] Release date - year
147 ** @attr RecSize [short] Record size
148 ** @attr SPadding [short] Padding to alignment boundary
149 ** @attr DbName [char[24]] Database name
150 ** @attr Release [char[12]] Release name/number
151 ** @attr Date [char[4]] Date as three integers.
152 ** @@
153 ******************************************************************************/
154
155 typedef struct FeatSCdFHeader
156 {
157 ajuint FileSize;
158 ajuint NRecords;
159 ajuint IdSize;
160 ajuint RelDay;
161 ajuint RelMonth;
162 ajuint RelYear;
163 short RecSize;
164 short SPadding;
165 char DbName[24];
166 char Release[12];
167 char Date[4];
168 } FeatOCdFHeader;
169
170 #define FeatPCdFHeader FeatOCdFHeader*
171
172
173
174
175 /* @datastatic FeatPCdFile ****************************************************
176 **
177 ** EMBLCD file data structure
178 **
179 ** @alias FeatSCdFile
180 ** @alias FeatOCdFile
181 **
182 ** @attr Header [FeatPCdFHeader] Header data
183 ** @attr File [AjPFile] File
184 ** @attr NRecords [ajuint] Number of records
185 ** @attr RecSize [ajuint] Record length (for calculating record offsets)
186 ** @@
187 ******************************************************************************/
188
189 typedef struct FeatSCdFile
190 {
191 FeatPCdFHeader Header;
192 AjPFile File;
193 ajuint NRecords;
194 ajuint RecSize;
195 } FeatOCdFile;
196
197 #define FeatPCdFile FeatOCdFile*
198
199
200
201
202 /* @datastatic FeatPCdHit *****************************************************
203 **
204 ** EMBLCD hit file record structure
205 **
206 ** @alias FeatSCdHit
207 ** @alias FeatOCdHit
208 **
209 ** @attr HitList [ajuint*] Array of hits, as record numbers in the
210 ** entrynam.idx file
211 ** @attr NHits [ajuint] Number of hits in HitList array
212 ** @attr Padding [char[4]] Padding to alignment boundary
213 ** @@
214 ******************************************************************************/
215
216 typedef struct FeatSCdHit
217 {
218 ajuint* HitList;
219 ajuint NHits;
220 char Padding[4];
221 } FeatOCdHit;
222
223 #define FeatPCdHit FeatOCdHit*
224
225
226
227
228 /* @datastatic FeatPCdIdx *****************************************************
229 **
230 ** EMBLCD entryname index file record structure
231 **
232 ** @alias FeatSCdIdx
233 ** @alias FeatOCdIdx
234 **
235 ** @attr AnnOffset [ajuint] Data file offset (see DivCode)
236 ** @attr SeqOffset [ajuint] Sequence file offset (if any) (see DivCode)
237 ** @attr EntryName [AjPStr] Entry ID - the file is sorted by these
238 ** @attr DivCode [ajushort] Division file record
239 ** @attr Padding [char[6]] Padding to alignment boundary
240 ** @@
241 ******************************************************************************/
242
243 typedef struct FeatSCdIdx
244 {
245 ajuint AnnOffset;
246 ajuint SeqOffset;
247 AjPStr EntryName;
248 ajushort DivCode;
249 char Padding[6];
250 } FeatOCdIdx;
251
252 #define FeatPCdIdx FeatOCdIdx*
253
254
255
256
257 /* @datastatic FeatPCdTrg *****************************************************
258 **
259 ** EMBLCD target (.trg) file record structure
260 **
261 ** @alias FeatSCdTrg
262 ** @alias FeatOCdTrg
263 **
264 ** @attr FirstHit [ajuint] First hit record in .hit file
265 ** @attr NHits [ajuint] Number of hit records in .hit file
266 ** @attr Target [AjPStr] Indexed target string (the file is sorted by these)
267 ** @@
268 ******************************************************************************/
269
270 typedef struct FeatSCdTrg
271 {
272 ajuint FirstHit;
273 ajuint NHits;
274 AjPStr Target;
275 } FeatOCdTrg;
276
277 #define FeatPCdTrg FeatOCdTrg*
278
279
280
281
282 /* @datastatic FeatPCdQry *****************************************************
283 **
284 ** EMBLCD query structure
285 **
286 ** @alias FeatSCdQry
287 ** @alias FeatOCdQry
288 **
289 ** @attr divfile [AjPStr] division.lkp
290 ** @attr idxfile [AjPStr] entryname.idx
291 ** @attr datfile [AjPStr] main data reference
292 ** @attr seqfile [AjPStr] sequence
293 ** @attr tblfile [AjPStr] BLAST table
294 ** @attr srcfile [AjPStr] BLAST FASTA source data
295 ** @attr dfp [FeatPCdFile] division.lkp
296 ** @attr ifp [FeatPCdFile] entryname.idx
297 ** @attr trgfp [FeatPCdFile] acnum.trg
298 ** @attr hitfp [FeatPCdFile] acnum.hit
299 ** @attr trgLine [FeatPCdTrg]acnum input line
300 ** @attr name [char*] filename from division.lkp
301 ** @attr nameSize [ajuint] division.lkp filename length
302 ** @attr div [ajuint] current division number
303 ** @attr maxdiv [ajuint] max division number
304 ** @attr type [ajuint] BLAST type
305 ** @attr libr [AjPFile] main data reference or BLAST header
306 ** @attr libs [AjPFile] sequence or BLAST compressed sequence
307 ** @attr libt [AjPFile] blast table
308 ** @attr libf [AjPFile] blast FASTA source data
309 ** @attr idnum [ajuint] current BLAST entry offset
310 ** @attr TopHdr [ajuint] BLAST table headers offset
311 ** @attr TopCmp [ajuint] BLAST table sequence offset
312 ** @attr TopAmb [ajuint] BLAST table ambiguities offset
313 ** @attr TopSrc [ajuint] BLAST table FASTA source offset
314 ** @attr Size [ajuint] BLAST database size
315 ** @attr Skip [AjBool*] skip file(s) in division.lkp
316 ** @attr idxLine [FeatPCdIdx] entryname.idx input line
317 ** @attr Samefile [AjBool] true if the same file is passed to
318 ** ajFilebuffReopenFile
319 ** @attr Padding [char[4]] Padding to alignment boundary
320 ** @@
321 ******************************************************************************/
322
323 typedef struct FeatSCdQry
324 {
325 AjPStr divfile;
326 AjPStr idxfile;
327 AjPStr datfile;
328 AjPStr seqfile;
329 AjPStr tblfile;
330 AjPStr srcfile;
331
332 FeatPCdFile dfp;
333 FeatPCdFile ifp;
334 FeatPCdFile trgfp;
335 FeatPCdFile hitfp;
336 FeatPCdTrg trgLine;
337
338 char* name;
339 ajuint nameSize;
340 ajuint div;
341 ajuint maxdiv;
342
343 ajuint type;
344
345 AjPFile libr;
346 AjPFile libs;
347 AjPFile libt;
348 AjPFile libf;
349
350 ajuint idnum;
351 ajuint TopHdr;
352 ajuint TopCmp;
353 ajuint TopAmb;
354 ajuint TopSrc;
355 ajuint Size;
356
357 AjBool* Skip;
358 FeatPCdIdx idxLine;
359 AjBool Samefile;
360 char Padding[4];
361 } FeatOCdQry;
362
363 #define FeatPCdQry FeatOCdQry*
364
365
366
367
368 /* @datastatic FeatPEmbossQry *************************************************
369 **
370 ** Btree 'emboss' query structure
371 **
372 ** @alias FeatSEmbossQry
373 ** @alias FeatOEmbossQry
374 **
375 ** @attr idcache [AjPBtcache] ID cache
376 ** @attr Caches [AjPList] Caches for each query field
377 ** @attr files [AjPStr*] database filenames
378 ** @attr reffiles [AjPStr**] database reference filenames
379 ** @attr Skip [AjBool*] files numbers to exclude
380 ** @attr List [AjPList] List of files
381 ** @attr libs [AjPFile] Primary (database source) file
382 ** @attr libr [AjPFile] Secondary (database bibliographic source) file
383 ** @attr div [ajuint] division number of currently open database file
384 ** @attr refcount [ajuint] number of reference file(s) per entry
385 ** @attr nentries [ajint] number of entries in the filename array(s)
386 ** -1 when done
387 ** @attr Samefile [AjBool] true if the same file is passed to
388 ** ajFilebuffReopenFile
389 ** @@
390 ******************************************************************************/
391
392 typedef struct FeatSEmbossQry
393 {
394 AjPBtcache idcache;
395 AjPList Caches;
396
397 AjPStr *files;
398 AjPStr **reffiles;
399 AjBool *Skip;
400
401 AjPList List;
402
403 AjPFile libs;
404 AjPFile libr;
405
406 ajuint div;
407 ajuint refcount;
408 ajint nentries;
409
410 AjBool Samefile;
411 } FeatOEmbossQry;
412
413 #define FeatPEmbossQry FeatOEmbossQry*
414
415
416
417 static AjBool featAccessDas(AjPFeattabin ftabin);
418 static AjBool featAccessChado(AjPFeattabin fttabin);
419 static AjBool featAccessEmbossGcg(AjPFeattabin fttabin);
420 static AjBool featAccessGcg(AjPFeattabin fttabin);
421
422 static AjPSqlconnection featChadoConnect(const AjPQuery qry);
423 static void featChadoChildfeatureQuery(AjPSqlconnection connection,
424 AjPFeattable feattab,
425 const AjPStr srcfeature);
426
427 static AjBool featChadoQryfeatureQuery(AjPSqlconnection connection, AjPStr sql,
428 AjPFeattable feattab,
429 ajint qrystart, ajint qryend);
430
431 static AjPFeature featChadoChildfeatureRow(AjPFeattable fttab, AjPSqlrow line);
432 static AjPStr featChadoQryfeatureRow(AjPFeattable fttab, AjPSqlrow row,
433 ajint qrystart, ajint qryend);
434
435 static ajuint featCdDivNext(AjPQuery qry);
436 static void featCdIdxDel(FeatPCdIdx* pthys);
437 static void featCdTrgDel(FeatPCdTrg* pthys);
438
439 static int featCdEntryCmp(const void* a, const void* b);
440 static void featCdEntryDel(void** pentry, void* cl);
441 static void featCdFileClose(FeatPCdFile *thys);
442 static FeatPCdFile featCdFileOpen(const AjPStr dir, const char* name,
443 AjPStr* fullname);
444 static ajint featCdFileSeek(FeatPCdFile fil, ajuint ipos);
445 static void featCdIdxLine(FeatPCdIdx idxLine, ajuint ipos,
446 FeatPCdFile fp);
447 static char* featCdIdxName(ajuint ipos, FeatPCdFile fp);
448 static AjBool featCdIdxQuery(AjPQuery qry, const AjPStr idqry);
449 static ajuint featCdIdxSearch(FeatPCdIdx idxLine, const AjPStr entry,
450 FeatPCdFile fp);
451 static AjBool featCdQryClose(AjPQuery qry);
452 static AjBool featCdQryEntry(AjPQuery qry);
453 static AjBool featCdQryFile(AjPQuery qry);
454 static AjBool featCdQryOpen(AjPQuery qry);
455 static AjBool featCdQryNext(AjPQuery qry);
456 static AjBool featCdQryQuery(AjPQuery qry);
457 static AjBool featCdQryReuse(AjPQuery qry);
458 static AjBool featCdReadHeader(FeatPCdFile fp);
459 static AjBool featCdTrgClose(FeatPCdFile *trgfil, FeatPCdFile *hitfil);
460 static ajuint featCdTrgFind(AjPQuery qry, const char* indexname,
461 const AjPStr qrystring);
462 static void featCdTrgLine(FeatPCdTrg trgLine, ajuint ipos,
463 FeatPCdFile fp);
464 static char* featCdTrgName(ajuint ipos, FeatPCdFile fp);
465 static AjBool featCdTrgOpen(const AjPStr dir, const char* name,
466 FeatPCdFile *trgfil, FeatPCdFile *hitfil);
467 static AjBool featCdTrgQuery(AjPQuery qry, const AjPStr field,
468 const AjPStr wildqry);
469 static ajuint featCdTrgSearch(FeatPCdTrg trgLine, const AjPStr name,
470 FeatPCdFile fp);
471 static AjBool featEmbossGcgAll(AjPFeattabin fttabin);
472 static void featEmbossGcgLoadBuff(AjPFeattabin fttabin);
473 static AjBool featEmbossGcgReadRef(AjPFeattabin fttabin);
474 static AjBool featEmbossGcgReadSeq(AjPFeattabin fttabin);
475
476 static AjBool featEmbossOpenCache(AjPQuery qry, const char *ext,
477 AjPBtcache *cache);
478 static AjBool featEmbossQryClose(AjPQuery qry);
479 static AjBool featEmbossQryEntry(AjPQuery qry);
480 static AjBool featEmbossQryNext(AjPQuery qry);
481 static AjBool featEmbossQryOpen(AjPQuery qry);
482 static AjBool featEmbossQryOrganisms(AjPQuery qry);
483 static AjBool featEmbossQryQuery(AjPQuery qry);
484 static AjBool featEmbossQryReuse(AjPQuery qry);
485
486 static AjBool featGcgAll(AjPFeattabin fttabin);
487 static void featGcgBinDecode(AjPStr *pthis, ajuint rdlen);
488 static void featGcgLoadBuff(AjPFeattabin fttabin);
489 static AjBool featGcgReadRef(AjPFeattabin fttabin);
490 static AjBool featGcgReadSeq(AjPFeattabin fttabin);
491
492
493
494
495 /* @funclist feattabAccess ****************************************************
496 **
497 ** Functions to access each database or feature access method
498 **
499 ******************************************************************************/
500
501 static AjOFeattabAccess feattabAccess[] =
502 {
503 /* Name AccessFunction FreeFunction
504 Qlink Description
505 Alias Entry Query All Chunk Padding */
506
507 {
508 "das", &featAccessDas, NULL,
509 "&", "retrieve features from a DAS server",
510 AJFALSE, AJTRUE, AJTRUE, AJFALSE, AJFALSE, AJFALSE
511 },
512 {
513 "chado", &featAccessChado, NULL,
514 "", "retrieve features from a CHADO server",
515 AJFALSE, AJTRUE, AJFALSE, AJFALSE, AJFALSE, AJFALSE
516 },
517 {
518 "gcg", &featAccessGcg, NULL,
519 "|&!^=", "emboss dbigcg indexed",
520 AJFALSE, AJTRUE, AJTRUE, AJTRUE, AJFALSE, AJFALSE
521 },
522 {
523 "embossgcg", &featAccessEmbossGcg, NULL,
524 "|&!^=", "emboss dbxgcg indexed",
525 AJFALSE, AJTRUE, AJTRUE, AJTRUE, AJFALSE, AJFALSE
526 },
527 {
528 NULL, NULL, NULL,
529 NULL, NULL,
530 AJFALSE, AJFALSE, AJFALSE, AJFALSE, AJFALSE, AJFALSE
531 }
532 };
533
534
535
536
537 /* @func ajFeatdbInit *********************************************************
538 **
539 ** Initialise feature database internals
540 **
541 ** @return [void]
542 **
543 ** @release 6.4.0
544 ******************************************************************************/
545
ajFeatdbInit(void)546 void ajFeatdbInit(void)
547 {
548 AjPTable table;
549 ajuint i = 0;
550
551 table = ajFeattabaccessGetDb();
552
553 while(feattabAccess[i].Name)
554 {
555 ajCallTableRegister(table, feattabAccess[i].Name,
556 (void*) &feattabAccess[i]);
557 i++;
558 }
559
560 return;
561 }
562
563
564
565
566
567 /* @section EMBL CD Database Indexing *****************************************
568 **
569 ** These functions manage the EMBL CD-ROM index access methods.
570 ** These include the "efetch" indexing used at the Sanger Centre
571 ** based on Erik Sonnhammer's indexseqlibs code
572 ** and a direct copy of the database and index files from the
573 ** EMBL CD-RM distribution.
574 **
575 ** The index files start with a file "division.lkp" which contains
576 ** the list of database filenames and an index number for each.
577 **
578 ** "entrynam.idx" is a sorted index by entry name for each entry
579 ** which points to a file number and a byte offset within the file.
580 **
581 ** "acnum.trg" and "acnum.hit" index accession numbers and link them
582 ** to record numbers in "entrynam.idx"
583 **
584 ** Other index files are not used yet by EMBOSS but could be added
585 ** using the "des" field in queries to search descriptions, and so on.
586 **
587 ******************************************************************************/
588
589
590
591
592 /* @funcstatic featCdFileOpen *************************************************
593 **
594 ** Opens a named EMBL CD-ROM index file.
595 **
596 ** @param [r] dir [const AjPStr] Directory
597 ** @param [r] name [const char*] File name.
598 ** @param [w] fullname [AjPStr*] Full file name with directory path
599 ** @return [FeatPCdFile] EMBL CD-ROM index file object.
600 **
601 ** @release 6.5.0
602 ** @@
603 ******************************************************************************/
604
featCdFileOpen(const AjPStr dir,const char * name,AjPStr * fullname)605 static FeatPCdFile featCdFileOpen(const AjPStr dir, const char* name,
606 AjPStr* fullname)
607 {
608 FeatPCdFile thys = NULL;
609
610
611 AJNEW0(thys);
612
613 thys->File = ajFileNewInNamePathC(name, dir);
614
615 if(!thys->File)
616 {
617 AJFREE(thys);
618
619 return NULL;
620 }
621
622
623 AJNEW0(thys->Header);
624
625 featCdReadHeader(thys);
626 thys->NRecords = thys->Header->NRecords;
627 thys->RecSize = thys->Header->RecSize;
628
629 ajStrAssignS(fullname, ajFileGetPrintnameS(thys->File));
630
631 ajDebug("featCdFileOpen '%F' NRecords: %d RecSize: %d\n",
632 thys->File, thys->NRecords, thys->RecSize);
633
634
635 return thys;
636 }
637
638
639
640
641 /* @funcstatic featCdFileSeek *************************************************
642 **
643 ** Sets the file position in an EMBL CD-ROM index file.
644 **
645 ** @param [u] fil [FeatPCdFile] EMBL CD-ROM index file object.
646 ** @param [r] ipos [ajuint] Offset.
647 ** @return [ajint] Return value from the seek operation.
648 **
649 ** @release 6.5.0
650 ** @@
651 ******************************************************************************/
652
653
featCdFileSeek(FeatPCdFile fil,ajuint ipos)654 static ajint featCdFileSeek(FeatPCdFile fil, ajuint ipos)
655 {
656 ajint ret;
657 ajuint jpos;
658
659 jpos = 300 + ipos*fil->RecSize;
660 ret = ajFileSeek(fil->File, jpos, 0);
661
662 /*
663 ajDebug("featCdFileSeek rec %u pos %u tell %Ld returns %d\n",
664 ipos, jpos, ajFileResetPos(fil->File), ret);
665 */
666
667 return ret;
668 }
669
670
671
672
673 /* @funcstatic featCdFileClose ************************************************
674 **
675 ** Closes an EMBL CD-ROM index file.
676 **
677 ** @param [d] pthis [FeatPCdFile*] EMBL CD-ROM index file.
678 ** @return [void]
679 **
680 ** @release 6.5.0
681 ** @@
682 ******************************************************************************/
683
featCdFileClose(FeatPCdFile * pthis)684 static void featCdFileClose(FeatPCdFile* pthis)
685 {
686 FeatPCdFile thys;
687
688 thys = *pthis;
689
690 if(!thys)
691 return;
692
693 ajDebug("featCdFileClose of %F\n", (*pthis)->File);
694
695 ajFileClose(&thys->File);
696 AJFREE(thys->Header);
697 AJFREE(*pthis);
698
699 return;
700 }
701
702
703
704
705 /* @funcstatic featCdIdxSearch ************************************************
706 **
707 ** Binary search through an EMBL CD-ROM index file for an exact match.
708 **
709 ** @param [u] idxLine [FeatPCdIdx] Index file record.
710 ** @param [r] entry [const AjPStr] Entry name to search for.
711 ** @param [u] fil [FeatPCdFile] EMBL CD-ROM index file.
712 ** @return [ajuint] Record number on success, -1 on failure.
713 **
714 ** @release 6.5.0
715 ** @@
716 ******************************************************************************/
717
featCdIdxSearch(FeatPCdIdx idxLine,const AjPStr entry,FeatPCdFile fil)718 static ajuint featCdIdxSearch(FeatPCdIdx idxLine, const AjPStr entry,
719 FeatPCdFile fil)
720 {
721 AjPStr entrystr = NULL;
722 ajint ihi;
723 ajint ilo;
724 ajint ipos = 0;
725 ajint icmp = 0;
726 char *name;
727
728 ajStrAssignS(&entrystr, entry);
729 ajStrFmtUpper(&entrystr);
730
731 ajDebug("featCdIdxSearch (entry '%S') records: %d\n",
732 entrystr, fil->NRecords);
733
734 if(fil->NRecords < 1)
735 return -1;
736
737 ilo = 0;
738 ihi = fil->NRecords - 1;
739
740 while(ilo <= ihi)
741 {
742 ipos = (ilo + ihi)/2;
743 name = featCdIdxName(ipos, fil);
744 icmp = ajStrCmpC(entrystr, name);
745 ajDebug("idx test %u '%s' %2d (+/- %u)\n", ipos, name, icmp, ihi-ilo);
746
747 if(!icmp)
748 break;
749
750 if(icmp < 0)
751 ihi = ipos-1;
752 else
753 ilo = ipos+1;
754 }
755
756 ajStrDel(&entrystr);
757
758 if(icmp)
759 return -1;
760
761 featCdIdxLine(idxLine, ipos, fil);
762
763 return ipos;
764 }
765
766
767
768
769 /* @funcstatic featCdIdxQuery *************************************************
770 **
771 ** Binary search of an EMBL CD-ROM index file for entries matching a
772 ** wildcard entry name.
773 **
774 ** @param [u] qry [AjPQuery] Query object.
775 ** @param [r] idqry [const AjPStr] ID Query
776 ** @return [AjBool] ajTrue on success.
777 **
778 ** @release 6.5.0
779 ** @@
780 ******************************************************************************/
781
featCdIdxQuery(AjPQuery qry,const AjPStr idqry)782 static AjBool featCdIdxQuery(AjPQuery qry, const AjPStr idqry)
783 {
784 FeatPCdQry qryd;
785
786 AjPList list;
787 FeatPCdIdx idxLine;
788 FeatPCdFile fil;
789
790 AjPStr idstr = NULL;
791 AjPStr idpref = NULL;
792 ajint ihi;
793 ajint ilo;
794 ajint ipos = 0;
795 ajint icmp;
796 char *name;
797 ajint i;
798 ajint ilen;
799 ajint jlo;
800 ajint jhi;
801 ajint khi;
802 AjBool first;
803 ajint ifail = 0;
804 ajint iskip = 0;
805
806 FeatPCdEntry entry;
807
808 qryd = qry->QryData;
809 list = qry->ResultsList;
810 idxLine = qryd->idxLine;
811 fil = qryd->ifp;
812
813 ajStrAssignS(&idstr,idqry);
814 ajStrFmtUpper(&idstr);
815 ajStrAssignS(&idpref, idstr);
816
817 ajStrRemoveWild(&idpref);
818
819 ajDebug("featCdIdxQuery (wild '%S' prefix '%S')\n",
820 idstr, idpref);
821
822 jlo = ilo = 0;
823 khi = jhi = ihi = fil->NRecords-1;
824
825 ilen = ajStrGetLen(idpref);
826 first = ajTrue;
827
828 if(ilen)
829 { /* find first entry with this prefix */
830 while(ilo <= ihi)
831 {
832 ipos = (ilo + ihi)/2;
833 name = featCdIdxName(ipos, fil);
834 name[ilen] = '\0';
835 icmp = ajStrCmpC(idpref, name); /* test prefix */
836 ajDebug("idx test %d '%s' %2d (+/- %d)\n",
837 ipos, name, icmp, ihi-ilo);
838
839 if(!icmp)
840 { /* hit prefix - test for first */
841 ajDebug("idx hit %d\n", ipos);
842
843 if(first)
844 {
845 jhi = ihi;
846 first = ajFalse;
847 khi = ipos;
848 }
849
850 jlo = ipos;
851 }
852
853 if(icmp > 0)
854 ilo = ipos+1;
855 else
856 ihi = ipos-1;
857 }
858
859 if(first)
860 { /* failed to find any with prefix */
861 ajStrDel(&idstr);
862 ajStrDel(&idpref);
863
864 return ajFalse;
865 }
866
867 ajDebug("first pass: ipos %d jlo %d jhi %d\n", ipos, jlo, jhi);
868
869 /* now search below for last */
870
871 ilo = jlo+1;
872 ihi = jhi;
873
874 while(ilo <= ihi)
875 {
876 ipos = (ilo + ihi)/2;
877 name = featCdIdxName(ipos, fil);
878 name[ilen] = '\0';
879 icmp = ajStrCmpC(idpref, name);
880 ajDebug("idx test %d '%s' %2d (+/- %d)\n",
881 ipos, name, icmp, ihi-ilo);
882
883 if(!icmp)
884 { /* hit prefix */
885 ajDebug("idx hit %d\n", ipos);
886 khi = ipos;
887 }
888
889 if(icmp < 0)
890 ihi = ipos-1;
891 else
892 ilo = ipos+1;
893 }
894
895 ajDebug("second pass: ipos %d jlo %d khi %d\n",
896 ipos, jlo, khi);
897
898 name = featCdIdxName(jlo, fil);
899 ajDebug("first %d '%s'\n", jlo, name);
900 name = featCdIdxName(khi, fil);
901 ajDebug(" last %d '%s'\n", khi, name);
902 }
903
904 for(i=jlo; i < (khi+1); i++)
905 {
906 featCdIdxLine(idxLine, i, fil);
907
908 if(ajStrMatchWildS(idxLine->EntryName, idstr))
909 {
910 if(!qryd->Skip[idxLine->DivCode-1])
911 {
912 if(ifail)
913 {
914 ajDebug("FAIL: %d entries\n", ifail);
915 ifail=0;
916 }
917
918 if(iskip)
919 {
920 ajDebug("SKIP: %d entries\n", iskip);
921 iskip=0;
922 }
923
924 ajDebug(" OK: '%S'\n", idxLine->EntryName);
925 AJNEW0(entry);
926 entry->div = idxLine->DivCode;
927 entry->annoff = idxLine->AnnOffset;
928 entry->seqoff = idxLine->SeqOffset;
929 ajListPushAppend(list, (void*)entry);
930 }
931 else
932 {
933 ajDebug("SKIP: '%S' [file %d]\n",
934 idxLine->EntryName, idxLine->DivCode);
935 iskip++;
936 }
937 }
938 else
939 {
940 ++ifail;
941 /* ajDebug("FAIL: '%S' '%S'\n", idxLine->EntryName, idstr);*/
942 }
943 }
944
945 if(ifail)
946 {
947 ajDebug("FAIL: %d entries\n", ifail);
948 ifail=0;
949 }
950
951 if(iskip)
952 {
953 ajDebug("SKIP: %d entries\n", iskip);
954 ifail=0;
955 }
956
957 ajStrDel(&idstr);
958 ajStrDel(&idpref);
959
960 if(ajListGetLength(list))
961 return ajTrue;
962
963 return ajFalse;
964 }
965
966
967
968
969 /* @funcstatic featCdTrgSearch ************************************************
970 **
971 ** Binary search of EMBL CD-ROM target file, for example an accession number
972 ** search.
973 **
974 ** @param [u] trgLine [FeatPCdTrg] Target file record.
975 ** @param [r] entry [const AjPStr] Entry name or accession number.
976 ** @param [u] fp [FeatPCdFile] EMBL CD-ROM target file
977 ** @return [ajuint] Record number, or -1 on failure.
978 **
979 ** @release 6.5.0
980 ** @@
981 ******************************************************************************/
982
featCdTrgSearch(FeatPCdTrg trgLine,const AjPStr entry,FeatPCdFile fp)983 static ajuint featCdTrgSearch(FeatPCdTrg trgLine, const AjPStr entry,
984 FeatPCdFile fp)
985 {
986 AjPStr entrystr = NULL;
987 ajint ihi;
988 ajint ilo;
989 ajint ipos;
990 ajint icmp;
991 ajint itry;
992 char *name;
993
994 ajStrAssignS(&entrystr, entry);
995 ajStrFmtUpper(&entrystr);
996
997 if(fp->NRecords < 1)
998 return -1;
999
1000 ilo = 0;
1001 ihi = fp->NRecords;
1002 ipos = (ilo + ihi)/2;
1003 icmp = -1;
1004 ajDebug("featCdTrgSearch '%S' recSize: %d\n", entry, fp->RecSize);
1005 name = featCdTrgName(ipos, fp);
1006 icmp = ajStrCmpC(entrystr, name);
1007
1008 ajDebug("trg testa %d '%s' %2d (+/- %d)\n", ipos, name, icmp, ihi-ilo);
1009
1010 while(icmp)
1011 {
1012 if(icmp < 0)
1013 ihi = ipos;
1014 else
1015 ilo = ipos;
1016
1017 itry = (ilo + ihi)/2;
1018
1019 if(itry == ipos)
1020 {
1021 ajDebug("'%S' not found in .trg\n", entrystr);
1022 ajStrDel(&entrystr);
1023
1024 return -1;
1025 }
1026
1027 ipos = itry;
1028 name = featCdTrgName(ipos, fp);
1029 icmp = ajStrCmpC(entrystr, name);
1030 ajDebug("trg testb %d '%s' %2d (+/- %d)\n",
1031 ipos, name, icmp, ihi-ilo);
1032 }
1033
1034 featCdTrgLine(trgLine, ipos, fp);
1035
1036 ajStrDel(&entrystr);
1037
1038 if(!trgLine->NHits)
1039 return -1;
1040
1041 ajDebug("found in .trg at record %d\n", ipos);
1042
1043
1044 return ipos;
1045 }
1046
1047
1048
1049
1050 /* @funcstatic featCdIdxName **************************************************
1051 **
1052 ** Reads the name from record ipos of an EMBL CD-ROM index file.
1053 ** The name length is known from the index file object.
1054 **
1055 ** @param [r] ipos [ajuint] Record number.
1056 ** @param [u] fil [FeatPCdFile] EMBL CD-ROM index file.
1057 ** @return [char*] Name read from file.
1058 **
1059 ** @release 6.5.0
1060 ** @@
1061 ******************************************************************************/
1062
featCdIdxName(ajuint ipos,FeatPCdFile fil)1063 static char* featCdIdxName(ajuint ipos, FeatPCdFile fil)
1064 {
1065 ajuint nameSize;
1066
1067 nameSize = fil->RecSize-10;
1068
1069 if(featCdMaxNameSize < nameSize)
1070 {
1071 featCdMaxNameSize = nameSize;
1072 if(featCdName)
1073 ajCharDel(&featCdName);
1074 featCdName = ajCharNewRes(featCdMaxNameSize+1);
1075 }
1076
1077 featCdFileSeek(fil, ipos);
1078 ajReadbinCharTrim(fil->File, nameSize, featCdName);
1079
1080 return featCdName;
1081 }
1082
1083
1084
1085
1086 /* @funcstatic featCdIdxLine **************************************************
1087 **
1088 ** Reads a numbered record from an EMBL CD-ROM index file.
1089 **
1090 ** @param [u] idxLine [FeatPCdIdx] Index file record.
1091 ** @param [r] ipos [ajuint] Record number.
1092 ** @param [u] fil [FeatPCdFile] EMBL CD-ROM index file.
1093 ** @return [void]
1094 **
1095 ** @release 6.5.0
1096 ** @@
1097 ******************************************************************************/
1098
featCdIdxLine(FeatPCdIdx idxLine,ajuint ipos,FeatPCdFile fil)1099 static void featCdIdxLine(FeatPCdIdx idxLine, ajuint ipos, FeatPCdFile fil)
1100 {
1101 ajuint nameSize;
1102
1103 nameSize = fil->RecSize-10;
1104
1105 ajDebug("featCdIdxLine nameSize: %u max: %u ipos: %u '%F'\n",
1106 nameSize, featCdMaxNameSize, ipos, fil->File);
1107
1108 if(featCdMaxNameSize < nameSize)
1109 {
1110 featCdMaxNameSize = nameSize;
1111
1112 if(featCdName)
1113 ajCharDel(&featCdName);
1114
1115 featCdName = ajCharNewRes(featCdMaxNameSize+1);
1116 }
1117
1118 featCdFileSeek(fil, ipos);
1119 ajReadbinCharTrim(fil->File, nameSize, featCdName);
1120
1121 ajStrAssignC(&idxLine->EntryName,featCdName);
1122
1123 ajReadbinUint(fil->File, &idxLine->AnnOffset);
1124 ajReadbinUint(fil->File, &idxLine->SeqOffset);
1125 ajReadbinUint2(fil->File, &idxLine->DivCode);
1126
1127 ajDebug("read ann: %u seq: %u div: %u\n",
1128 idxLine->AnnOffset, idxLine->SeqOffset,
1129 (ajuint) idxLine->DivCode);
1130 return;
1131 }
1132
1133
1134
1135
1136 /* @funcstatic featCdTrgName **************************************************
1137 **
1138 ** Reads the target name from an EMBL CD-ROM index target file.
1139 **
1140 ** @param [r] ipos [ajuint] Record number.
1141 ** @param [u] fil [FeatPCdFile] EMBL CD-ROM index target file.
1142 ** @return [char*] Name.
1143 **
1144 ** @release 6.5.0
1145 ** @@
1146 ******************************************************************************/
1147
featCdTrgName(ajuint ipos,FeatPCdFile fil)1148 static char* featCdTrgName(ajuint ipos, FeatPCdFile fil)
1149 {
1150 ajuint nameSize;
1151 ajint i;
1152
1153 nameSize = fil->RecSize-8;
1154
1155 if(featCdMaxNameSize < nameSize)
1156 {
1157 featCdMaxNameSize = nameSize;
1158
1159 if(featCdName)
1160 ajCharDel(&featCdName);
1161
1162 featCdName = ajCharNewRes(featCdMaxNameSize+1);
1163 }
1164
1165 featCdFileSeek(fil, ipos);
1166 ajReadbinInt(fil->File, &i);
1167 ajReadbinInt(fil->File, &i);
1168 ajReadbinCharTrim(fil->File, nameSize, featCdName);
1169
1170 ajDebug("featCdTrgName maxNameSize:%d nameSize:%d name '%s'\n",
1171 featCdMaxNameSize, nameSize, featCdName);
1172
1173 return featCdName;
1174 }
1175
1176
1177
1178
1179 /* @funcstatic featCdTrgLine **************************************************
1180 **
1181 ** Reads a line from an EMBL CD-ROM index target file.
1182 **
1183 ** @param [w] trgLine [FeatPCdTrg] Target file record.
1184 ** @param [r] ipos [ajuint] Record number.
1185 ** @param [u] fil [FeatPCdFile] EMBL CD-ROM index target file.
1186 ** @return [void].
1187 **
1188 ** @release 6.5.0
1189 ** @@
1190 ******************************************************************************/
1191
featCdTrgLine(FeatPCdTrg trgLine,ajuint ipos,FeatPCdFile fil)1192 static void featCdTrgLine(FeatPCdTrg trgLine, ajuint ipos, FeatPCdFile fil)
1193 {
1194 ajuint nameSize;
1195
1196 nameSize = fil->RecSize-8;
1197
1198 if(featCdMaxNameSize < nameSize)
1199 {
1200 featCdMaxNameSize = nameSize;
1201
1202 if(featCdName)
1203 ajCharDel(&featCdName);
1204
1205 featCdName = ajCharNewRes(featCdMaxNameSize+1);
1206 }
1207
1208 featCdFileSeek(fil, ipos);
1209
1210 ajReadbinUint(fil->File, &trgLine->NHits);
1211 ajReadbinUint(fil->File, &trgLine->FirstHit);
1212 ajReadbinCharTrim(fil->File, nameSize, featCdName);
1213
1214 ajStrAssignC(&trgLine->Target,featCdName);
1215
1216 ajDebug("featCdTrgLine %d nHits %d firstHit %d target '%S'\n",
1217 ipos, trgLine->NHits, trgLine->FirstHit, trgLine->Target);
1218
1219 return;
1220 }
1221
1222
1223
1224
1225 /* @funcstatic featCdReadHeader ***********************************************
1226 **
1227 ** Reads the header of an EMBL CD-ROM index file.
1228 **
1229 ** @param [u] fil [FeatPCdFile] EMBL CD-ROM index file.
1230 ** @return [AjBool] ajTrue on success.
1231 **
1232 ** @release 6.5.0
1233 ** @@
1234 ******************************************************************************/
1235
featCdReadHeader(FeatPCdFile fil)1236 static AjBool featCdReadHeader(FeatPCdFile fil)
1237 {
1238 ajint i;
1239
1240 FeatPCdFHeader header;
1241 char date[8]; /* ajReadbinCharTrim needs space for trailing null */
1242
1243 header = fil->Header;
1244
1245 ajReadbinUint(fil->File, &header->FileSize);
1246 ajReadbinUint(fil->File, &header->NRecords);
1247 ajReadbinInt2(fil->File, &header->RecSize);
1248
1249 header->IdSize = header->RecSize - 10;
1250
1251 ajReadbinCharTrim(fil->File, 20, header->DbName);
1252 ajReadbinCharTrim(fil->File, 10, header->Release);
1253
1254 ajReadbinCharTrim(fil->File, 4, date);
1255
1256 for(i=1;i<4;i++)
1257 header->Date[i] = date[i];
1258
1259 header->RelYear = header->Date[1];
1260 header->RelMonth = header->Date[2];
1261 header->RelDay = header->Date[3];
1262
1263 ajDebug("featCdReadHeader file %F\n", fil->File);
1264 ajDebug(" FileSize: %d NRecords: %hd recsize: %d idsize: %d\n",
1265 header->FileSize, header->NRecords,
1266 header->RecSize, header->IdSize);
1267
1268 return ajTrue;
1269 }
1270
1271
1272
1273
1274 /* @funcstatic featCdTrgOpen **************************************************
1275 **
1276 ** Opens an EMBL CD-ROM target file pair.
1277 **
1278 ** @param [r] dir [const AjPStr] Directory.
1279 ** @param [r] name [const char*] File name.
1280 ** @param [w] trgfil [FeatPCdFile*] Target file.
1281 ** @param [w] hitfil [FeatPCdFile*] Hit file.
1282 ** @return [AjBool] ajTrue on success.
1283 **
1284 ** @release 6.5.0
1285 ** @@
1286 ******************************************************************************/
1287
featCdTrgOpen(const AjPStr dir,const char * name,FeatPCdFile * trgfil,FeatPCdFile * hitfil)1288 static AjBool featCdTrgOpen(const AjPStr dir, const char* name,
1289 FeatPCdFile* trgfil, FeatPCdFile* hitfil)
1290 {
1291 AjPStr tmpname = NULL;
1292 AjPStr fullname = NULL;
1293
1294 ajDebug("featCdTrgOpen dir '%S' name '%s'\n",
1295 dir, name);
1296
1297 ajFmtPrintS(&tmpname, "%s.trg",name);
1298 *trgfil = featCdFileOpen(dir, ajStrGetPtr(tmpname), &fullname);
1299 ajStrDel(&tmpname);
1300
1301 if(!*trgfil)
1302 return ajFalse;
1303
1304 ajFmtPrintS(&tmpname, "%s.hit",name);
1305 *hitfil = featCdFileOpen(dir, ajStrGetPtr(tmpname), &fullname);
1306 ajStrDel(&tmpname);
1307 ajStrDel(&fullname);
1308
1309 if(!*hitfil)
1310 return ajFalse;
1311
1312 return ajTrue;
1313 }
1314
1315
1316
1317
1318 /* @funcstatic featCdTrgClose *************************************************
1319 **
1320 ** Close an EMBL CD-ROM target file pair.
1321 **
1322 ** @param [w] ptrgfil [FeatPCdFile*] Target file.
1323 ** @param [w] phitfil [FeatPCdFile*] Hit file.
1324 ** @return [AjBool] ajTrue on success.
1325 **
1326 ** @release 6.5.0
1327 ** @@
1328 ******************************************************************************/
1329
featCdTrgClose(FeatPCdFile * ptrgfil,FeatPCdFile * phitfil)1330 static AjBool featCdTrgClose(FeatPCdFile* ptrgfil, FeatPCdFile* phitfil)
1331 {
1332 featCdFileClose(ptrgfil);
1333 featCdFileClose(phitfil);
1334
1335 return ajTrue;
1336 }
1337
1338
1339
1340
1341 /* @section GCG Database Indexing *********************************************
1342 **
1343 ** These functions manage the GCG index access methods.
1344 **
1345 ******************************************************************************/
1346
1347
1348
1349
1350 /* @funcstatic featAccessGcg ***************************************************
1351 **
1352 ** Reads feature(s) from a GCG formatted database, using EMBLCD index
1353 ** files. Returns with the file pointer set to the position in the
1354 ** sequence file and reference files.
1355 **
1356 ** @param [u] fttabin [AjPFeattabin] Feature table input.
1357 ** @return [AjBool] ajTrue on success.
1358 **
1359 ** @release 6.5.0
1360 ** @@
1361 ******************************************************************************/
1362
featAccessGcg(AjPFeattabin fttabin)1363 static AjBool featAccessGcg(AjPFeattabin fttabin)
1364 {
1365 AjBool retval = ajFalse;
1366 AjPQuery qry;
1367 FeatPCdQry qryd;
1368
1369 ajDebug("featAccessGcg type %d\n", fttabin->Input->Query->QueryType);
1370
1371 qry = fttabin->Input->Query;
1372 qryd = qry->QryData;
1373
1374 if(qry->QueryType == AJQUERY_ALL)
1375 {
1376 retval = featGcgAll(fttabin);
1377
1378 return retval;
1379 }
1380
1381 /* we need to search the index files and return a query */
1382
1383 if(qry->QryData)
1384 { /* reuse unfinished query data */
1385 if(!featCdQryReuse(qry))
1386 return ajFalse;
1387 }
1388 else
1389 {
1390 fttabin->Input->Single = ajTrue;
1391
1392 if(!featCdQryOpen(qry))
1393 {
1394 ajWarn("Failed to open index for database '%S'",
1395 qry->DbName);
1396
1397 return ajFalse;
1398 }
1399
1400 qryd = qry->QryData;
1401 ajFilebuffDel(&fttabin->Input->Filebuff);
1402 fttabin->Input->Filebuff = ajFilebuffNewNofile();
1403
1404 /* binary search for the entryname we need */
1405
1406 if(qry->QueryType == AJQUERY_ENTRY)
1407 {
1408 ajDebug("entry fields: %Lu hasacc:%B\n",
1409 ajListGetLength(qry->QueryFields), qry->HasAcc);
1410
1411 if(!featCdQryEntry(qry))
1412 ajDebug("GCG Entry failed\n");
1413 }
1414
1415 if(qry->QueryType == AJQUERY_QUERY)
1416 {
1417 ajDebug("query fields: %Lu hasacc:%B\n",
1418 ajListGetLength(qry->QueryFields), qry->HasAcc);
1419 if(!featCdQryQuery(qry))
1420 ajDebug("GCG Query failed\n");
1421 }
1422
1423 AJFREE(qryd->trgLine);
1424 }
1425
1426 if(ajListGetLength(qry->ResultsList))
1427 {
1428 retval = featCdQryNext(qry);
1429
1430 if(retval)
1431 featGcgLoadBuff(fttabin);
1432 }
1433
1434 if(!ajListGetLength(qry->ResultsList))
1435 {
1436 ajFileClose(&qryd->libr);
1437 ajFileClose(&qryd->libs);
1438 featCdQryClose(qry);
1439 }
1440
1441 if(retval)
1442 ajStrAssignS(&fttabin->Input->Db, qry->DbName);
1443
1444 return retval;
1445 }
1446
1447
1448
1449
1450 /* @funcstatic featGcgLoadBuff ************************************************
1451 **
1452 ** Copies text data to a buffered file, and feature data for an
1453 ** AjPFeattabin internal data structure for reading later
1454 **
1455 ** @param [u] fttabin [AjPFeattabin] Feature table input object
1456 ** @return [void]
1457 **
1458 ** @release 6.5.0
1459 ** @@
1460 ******************************************************************************/
1461
featGcgLoadBuff(AjPFeattabin fttabin)1462 static void featGcgLoadBuff(AjPFeattabin fttabin)
1463 {
1464 AjPQuery qry;
1465 FeatPCdQry qryd;
1466
1467 qry = fttabin->Input->Query;
1468 qryd = qry->QryData;
1469
1470 if(!qry->QryData)
1471 ajFatal("featGcgLoadBuff Query Data not initialised");
1472
1473 /* copy all the ref data */
1474
1475 featGcgReadRef(fttabin);
1476
1477 /* skip the sequence (do we care about the format?) */
1478 featGcgReadSeq(fttabin);
1479
1480 /* ajFilebuffTraceFull(fttabin->Input->Filebuff, 9999, 100); */
1481
1482 if(!qryd->libr)
1483 {
1484 ajFileClose(&qryd->libs);
1485 ajDebug("featGcgLoadBuff: closed files\n");
1486 }
1487
1488 return;
1489 }
1490
1491
1492
1493
1494 /* @funcstatic featGcgReadRef *************************************************
1495 **
1496 ** Copies text data to a buffered file for reading later
1497 **
1498 ** @param [u] fttabin [AjPFeattabin] Feature table input object
1499 ** @return [AjBool] ajTrue on success
1500 **
1501 ** @release 6.5.0
1502 ** @@
1503 ******************************************************************************/
1504
featGcgReadRef(AjPFeattabin fttabin)1505 static AjBool featGcgReadRef(AjPFeattabin fttabin)
1506 {
1507 AjPStr line = NULL;
1508 AjPQuery qry;
1509 FeatPCdQry qryd;
1510 ajlong rpos;
1511 AjPStr id = NULL;
1512 AjPStr idc = NULL;
1513 AjBool ispir = ajFalse;
1514 AjBool continued = ajFalse;
1515 AjBool testcontinue = ajFalse;
1516 char *p = NULL;
1517
1518 qry = fttabin->Input->Query;
1519 qryd = qry->QryData;
1520
1521 if(!featRegGcgRefId)
1522 featRegGcgRefId =ajRegCompC("^>...([^ \n]+)");
1523
1524 if(!featRegGcgSplit)
1525 featRegGcgSplit =ajRegCompC("_0+$");
1526
1527 if(!ajReadline(qryd->libr, &line)) /* end of file */
1528 return ajFalse;
1529
1530 if(ajStrGetCharFirst(line) != '>') /* not start of entry */
1531 ajFatal("featGcgReadRef bad entry start:\n'%S'", line);
1532
1533 if(ajStrGetCharPos(line, 3) == ';') /* PIR entry */
1534 ispir = ajTrue;
1535
1536 if(ispir)
1537 ajFilebuffLoadS(fttabin->Input->Filebuff, line);
1538
1539 if(ajRegExec(featRegGcgRefId, line))
1540 {
1541 continued = ajFalse;
1542 ajRegSubI(featRegGcgRefId, 1, &id);
1543
1544 if(ajRegExec(featRegGcgSplit, id))
1545 {
1546 continued = ajTrue;
1547 p = ajStrGetuniquePtr(&id);
1548 p = strrchr(p,(ajint)'_');
1549 *(++p)='\0';
1550 ajStrSetValid(&id);
1551 }
1552 }
1553 else
1554 {
1555 ajDebug("featGcgReadRef bad ID line\n'%S'\n", line);
1556 ajFatal("featGcgReadRef bad ID line\n'%S'\n", line);
1557 }
1558
1559 if(!ajReadline(qryd->libr, &line)) /* blank desc line */
1560 {
1561 ajStrDel(&id);
1562
1563 return ajFalse;
1564 }
1565
1566 if(ispir)
1567 ajFilebuffLoadS(fttabin->Input->Filebuff, line);
1568
1569 rpos = ajFileResetPos(qryd->libr);
1570
1571 while(ajReadline(qryd->libr, &line))
1572 { /* end of file */
1573 if(ajStrGetCharFirst(line) == '>')
1574 { /* start of next entry */
1575 /* skip over split entries so it can be used for "all" */
1576
1577 if(continued)
1578 {
1579 testcontinue=ajTrue;
1580 ajRegExec(featRegGcgRefId, line);
1581 ajRegSubI(featRegGcgRefId, 1, &idc);
1582
1583 if(!ajStrPrefixS(idc,id))
1584 {
1585 ajFileSeek(qryd->libr, rpos, 0);
1586 ajStrDel(&line);
1587 ajStrDel(&id);
1588 ajStrDel(&idc);
1589
1590 return ajTrue;
1591 }
1592 }
1593 else
1594 {
1595 ajFileSeek(qryd->libr, rpos, 0);
1596 ajStrDel(&line);
1597 ajStrDel(&id);
1598 ajStrDel(&idc);
1599
1600 return ajTrue;
1601 }
1602 }
1603
1604 rpos = ajFileResetPos(qryd->libr);
1605
1606 if(!testcontinue)
1607 {
1608 ajStrExchangeCC(&line, ". .", "..");
1609 ajFilebuffLoadS(fttabin->Input->Filebuff, line);
1610 }
1611 }
1612
1613
1614 /* at end of file */
1615
1616 ajFileClose(&qryd->libr);
1617
1618 ajStrDel(&line);
1619 ajStrDel(&id);
1620 ajStrDel(&idc);
1621
1622 return ajTrue;
1623 }
1624
1625
1626
1627
1628 /* @funcstatic featGcgReadSeq *************************************************
1629 **
1630 ** Skips unwanted sequence data so file is at start of next entry.
1631 **
1632 ** @param [u] fttabin [AjPFeattabin] Feature table input object
1633 ** @return [AjBool] ajTrue on success
1634 **
1635 ** @release 6.5.0
1636 ** @@
1637 ******************************************************************************/
1638
featGcgReadSeq(AjPFeattabin fttabin)1639 static AjBool featGcgReadSeq(AjPFeattabin fttabin)
1640 {
1641 AjPStr line = NULL;
1642 AjPQuery qry;
1643 FeatPCdQry qryd;
1644 AjPStr gcgtype = NULL;
1645 AjPStr tmpstr = NULL;
1646 AjPStr dstr = NULL;
1647 AjPStr id = NULL;
1648 AjPStr idc = NULL;
1649 AjPStr contseq = NULL;
1650
1651 ajint gcglen;
1652 ajint pos;
1653 ajint rblock;
1654 ajlong spos;
1655 AjBool ispir = ajFalse;
1656 char *p = NULL;
1657 AjBool continued = ajFalse;
1658
1659 qry = fttabin->Input->Query;
1660 qryd = qry->QryData;
1661
1662 if(!featRegGcgId)
1663 {
1664 featRegGcgId =ajRegCompC("^>...([^ ]+) +([^ ]+) +(Dummy Header|[^ ]+)"
1665 " +([^ ]+) +([0-9]+)");
1666 featRegGcgId2=ajRegCompC("^>[PF]1;([^ ]+)");
1667 }
1668
1669 if(!featRegGcgSplit)
1670 featRegGcgSplit =ajRegCompC("_0+$");
1671
1672 ajDebug("featGcgReadSeq pos: %Ld\n", ajFileResetPos(qryd->libs));
1673
1674 if(!ajReadline(qryd->libs, &line)) /* end of file */
1675 return ajFalse;
1676
1677 ajDebug("test ID line\n'%S'\n", line);
1678
1679 if(ajRegExec(featRegGcgId, line))
1680 {
1681 continued = ajFalse;
1682 ajRegSubI(featRegGcgId, 3, &gcgtype);
1683 ajRegSubI(featRegGcgId, 5, &tmpstr);
1684 ajRegSubI(featRegGcgId, 1, &id);
1685
1686 if(ajRegExec(featRegGcgSplit, id))
1687 {
1688 continued = ajTrue;
1689 p = ajStrGetuniquePtr(&id);
1690 p = strrchr(p,(ajint)'_');
1691 *(++p)='\0';
1692 ajStrSetValid(&id);
1693
1694 if(!contseq)
1695 contseq = ajStrNew();
1696
1697 if(!dstr)
1698 dstr = ajStrNew();
1699 }
1700
1701 ajStrToInt(tmpstr, &gcglen);
1702 }
1703 else if(ajRegExec(featRegGcgId2, line))
1704 {
1705 ajStrAssignC(&gcgtype, "ASCII");
1706 ajRegSubI(featRegGcgId, 1, &tmpstr);
1707 ispir = ajTrue;
1708 }
1709 else
1710 {
1711 ajDebug("featGcgReadSeq bad ID line\n'%S'\n", line);
1712 ajFatal("featGcgReadSeq bad ID line\n'%S'\n", line);
1713
1714 return ajFalse;
1715 }
1716
1717 if(!ajReadline(qryd->libs, &line)) /* desc line */
1718 return ajFalse;
1719
1720 /*
1721 ** need to pick up the length and type, and read to the end of sequence
1722 ** see fasta code to get a real sequence for this
1723 ** Also need to handle split entries and go find the rest
1724 */
1725
1726 if(ispir)
1727 {
1728 spos = ajFileResetPos(qryd->libs);
1729
1730 while(ajReadline(qryd->libs, &line))
1731 { /* end of file */
1732 if(ajStrGetCharFirst(line) == '>')
1733 { /* start of next entry */
1734 ajFileSeek(qryd->libs, spos, 0);
1735 break;
1736 }
1737
1738 spos = ajFileResetPos(qryd->libs);
1739 ajFilebuffLoadS(fttabin->Input->Filebuff, line);
1740 }
1741 }
1742 else
1743 {
1744 ajStrSetRes(&contseq, gcglen+3);
1745 rblock = gcglen;
1746
1747 if(ajStrGetCharFirst(gcgtype) == '2')
1748 rblock = (rblock+3)/4;
1749
1750 if(!ajReadbinBinary(qryd->libs, rblock, 1,
1751 ajStrGetuniquePtr(&contseq)))
1752 ajFatal("error reading file %F", qryd->libs);
1753
1754 /* convert 2bit to ascii */
1755 if(ajStrGetCharFirst(gcgtype) == '2')
1756 featGcgBinDecode(&contseq, gcglen);
1757 else if(ajStrGetCharFirst(gcgtype) == 'A')
1758 {
1759 /* are seq chars OK? */
1760 ajStrSetValidLen(&contseq, gcglen);
1761 }
1762 else
1763 {
1764 ajRegSubI(featRegGcgId, 1, &tmpstr);
1765 ajFatal("Unknown GCG entry type '%S', entry name '%S'",
1766 gcgtype, tmpstr);
1767 }
1768
1769 if(!ajReadline(qryd->libs, &line)) /* newline at end */
1770 ajFatal("error reading file %F", qryd->libs);
1771 ajStrDel(&contseq);
1772
1773 if(continued)
1774 {
1775 spos = ajFileResetPos(qryd->libs);
1776
1777 while(ajReadline(qryd->libs,&line))
1778 {
1779 ajRegExec(featRegGcgId, line);
1780 ajRegSubI(featRegGcgId, 5, &tmpstr);
1781 ajRegSubI(featRegGcgId, 1, &idc);
1782
1783 if(!ajStrPrefixS(idc,id))
1784 {
1785 ajFileSeek(qryd->libs, spos, 0);
1786 break;
1787 }
1788
1789 ajStrToInt(tmpstr, &gcglen);
1790
1791 if(!ajReadline(qryd->libs, &dstr)) /* desc line */
1792 return ajFalse;
1793
1794 ajStrSetRes(&contseq, gcglen+3);
1795
1796 rblock = gcglen;
1797 if(ajStrGetCharFirst(gcgtype) == '2')
1798 rblock = (rblock+3)/4;
1799
1800 if(!ajReadbinBinary(qryd->libs, rblock, 1,
1801 ajStrGetuniquePtr(&contseq)))
1802 ajFatal("error reading file %F", qryd->libs);
1803
1804 /* convert 2bit to ascii */
1805 if(ajStrGetCharFirst(gcgtype) == '2')
1806 featGcgBinDecode(&contseq, gcglen);
1807 else if(ajStrGetCharFirst(gcgtype) == 'A')
1808 {
1809 /* are seq chars OK? */
1810 ajStrSetValidLen(&contseq, gcglen);
1811 }
1812 else
1813 {
1814 ajRegSubI(featRegGcgId, 1, &tmpstr);
1815 ajFatal("Unknown GCG entry: name '%S'",
1816 tmpstr);
1817 }
1818
1819 if(!ajReadline(qryd->libs, &line)) /* newline at end */
1820 ajFatal("error reading file %F", qryd->libs);
1821
1822 if(!featRegGcgCont)
1823 featRegGcgCont = ajRegCompC("^([^ ]+) +([^ ]+) +([^ ]+) +"
1824 "([^ ]+) +([^ ]+) +([^ ]+) "
1825 "+([^ ]+) +"
1826 "([^ ]+) +([0-9]+)");
1827
1828 ajRegExec(featRegGcgCont, dstr);
1829 ajRegSubI(featRegGcgCont, 9, &tmpstr);
1830 ajStrToInt(tmpstr, &pos);
1831 /*seqin->Inseq->Len = pos-1;*/
1832
1833 /*ajStrAppendS(&seqin->Inseq,contseq);*/
1834 spos = ajFileResetPos(qryd->libs);
1835 }
1836 }
1837 }
1838
1839 ajStrDel(&line);
1840 ajStrDel(&gcgtype);
1841 ajStrDel(&tmpstr);
1842 ajStrDel(&dstr);
1843 ajStrDel(&id);
1844 ajStrDel(&idc);
1845 ajStrDel(&contseq);
1846
1847 return ajTrue;
1848 }
1849
1850
1851
1852
1853 /* @funcstatic featGcgBinDecode ***********************************************
1854 **
1855 ** Convert GCG binary to ASCII sequence.
1856 **
1857 ** @param [u] pthis [AjPStr*] Binary string
1858 ** @param [r] sqlen [ajuint] Expected sequence length
1859 ** @return [void]
1860 **
1861 ** @release 6.5.0
1862 ** @@
1863 ******************************************************************************/
1864
featGcgBinDecode(AjPStr * pthis,ajuint sqlen)1865 static void featGcgBinDecode(AjPStr *pthis, ajuint sqlen)
1866 {
1867 char* seqp;
1868 char* cp;
1869 char* start;
1870 const char* gcgbton="CTAG";
1871 char stmp;
1872 ajint rdlen;
1873
1874 start = ajStrGetuniquePtr(pthis);
1875 rdlen = (sqlen+3)/4;
1876
1877 seqp = start + rdlen;
1878 cp = start + 4*rdlen;
1879
1880 ajDebug("seqp:%x start:%x cp:%x sqlen:%d len:%d size:%d (seqp-start):%d\n",
1881 seqp, start, cp, sqlen,
1882 ajStrGetLen(*pthis), ajStrGetRes(*pthis),
1883 (seqp - start));
1884
1885 while(seqp > start)
1886 {
1887 stmp = *--seqp;
1888 *--cp = gcgbton[stmp&3];
1889 *--cp = gcgbton[(stmp >>= 2)&3];
1890 *--cp = gcgbton[(stmp >>= 2)&3];
1891 *--cp = gcgbton[(stmp >>= 2)&3];
1892 }
1893
1894 start[sqlen] = '\0';
1895 ajStrSetValidLen(pthis, sqlen);
1896
1897 return;
1898 }
1899
1900
1901
1902
1903 /* @funcstatic featGcgAll *****************************************************
1904 **
1905 ** Opens the first or next GCG file for further reading
1906 **
1907 ** @param [u] fttabin [AjPFeattabin] Feature table input.
1908 ** @return [AjBool] ajTrue on success.
1909 **
1910 ** @release 6.5.0
1911 ** @@
1912 ******************************************************************************/
1913
featGcgAll(AjPFeattabin fttabin)1914 static AjBool featGcgAll(AjPFeattabin fttabin)
1915 {
1916 AjPQuery qry;
1917 FeatPCdQry qryd;
1918
1919 qry = fttabin->Input->Query;
1920 qryd = qry->QryData;
1921
1922 ajDebug("featGcgAll\n");
1923
1924 if(!qry->QryData)
1925 {
1926 ajDebug("featGcgAll initialising\n");
1927 fttabin->Input->Single = ajTrue;
1928
1929 if(!featCdQryOpen(qry))
1930 {
1931 ajErr("featGcgAll failed");
1932
1933 return ajFalse;
1934 }
1935 }
1936
1937 qryd = qry->QryData;
1938 ajFilebuffDel(&fttabin->Input->Filebuff);
1939 fttabin->Input->Filebuff = ajFilebuffNewNofile();
1940
1941 if(!qryd->libr)
1942 {
1943 if(!featCdDivNext(qry))
1944 {
1945 featCdQryClose(qry);
1946 ajDebug("featGcgAll finished\n");
1947
1948 return ajFalse;
1949 }
1950
1951 if(!featCdQryFile(qry))
1952 {
1953 ajErr("featGcgAll out of data");
1954
1955 return ajFalse;
1956 }
1957
1958 ajDebug("featCdQryOpen processing file %2d '%F'\n", qryd->div,
1959 qryd->libr);
1960 if(qryd->libs)
1961 ajDebug(" sequence file '%F'\n", qryd->libs);
1962 }
1963
1964 featGcgLoadBuff(fttabin);
1965
1966 if(!qry->CaseId)
1967 qry->QryDone = ajTrue;
1968
1969 return ajTrue;
1970 }
1971
1972
1973
1974
1975 /* @funcstatic featCdDivNext **************************************************
1976 **
1977 ** Sets the division count to the next included file. We need the division
1978 ** file to be already open.
1979 **
1980 ** @param [u] qry [AjPQuery] query object.
1981 ** @return [ajuint] File number (starting at 1) or zero if all files are done.
1982 **
1983 ** @release 6.5.0
1984 ** @@
1985 ******************************************************************************/
1986
featCdDivNext(AjPQuery qry)1987 static ajuint featCdDivNext(AjPQuery qry)
1988 {
1989 FeatPCdQry qryd;
1990 AjPStr fullName = NULL;
1991 ajuint i;
1992
1993 qryd = qry->QryData;
1994
1995 ajDebug("featCdDivNext div: %d dfp: %x nameSize: %d name '%s'\n",
1996 qryd->div, qryd->maxdiv, qryd->nameSize, qryd->name);
1997
1998 for(i=qryd->div; i < qryd->maxdiv; i++)
1999 if(!qryd->Skip[i])
2000 {
2001 qryd->div = i+1;
2002 ajDebug("next file is %d '%S'\n", qryd->div, fullName);
2003 return qryd->div;
2004 }
2005 else
2006 ajDebug("skip %d '%S'\n", (i+1), fullName);
2007
2008 return 0;
2009 }
2010
2011
2012
2013
2014 /* @funcstatic featCdQryFile **************************************************
2015 **
2016 ** Opens a specific file number for an EMBLCD index
2017 **
2018 ** @param [u] qry [AjPQuery] Query data
2019 ** @return [AjBool] ajTrue on success
2020 **
2021 ** @release 6.5.0
2022 ** @@
2023 ******************************************************************************/
2024
featCdQryFile(AjPQuery qry)2025 static AjBool featCdQryFile(AjPQuery qry)
2026 {
2027 FeatPCdQry qryd;
2028 short j;
2029
2030 if(!featCdDivExp)
2031 featCdDivExp = ajRegCompC("^([^ ]+)( +([^ ]+))?");
2032
2033 ajDebug("featCdQryFile qry %x\n",qry);
2034 qryd = qry->QryData;
2035 ajDebug("featCdQryFile qryd %x\n",qryd);
2036 ajDebug("featCdQryFile %F\n",qryd->dfp->File);
2037
2038 featCdFileSeek(qryd->dfp, (qryd->div - 1));
2039
2040 /* note - we must not use featCdFileReadName - we need spaces for GCG */
2041
2042 ajReadbinInt2(qryd->dfp->File, &j);
2043
2044 ajReadbinChar(qryd->dfp->File, qryd->nameSize, qryd->name);
2045 ajDebug("DivCode: %d, code: %2hd '%s'\n",
2046 qryd->div, j, qryd->name);
2047
2048 /**ajCharFmtLower(qryd->name);**/
2049 if(!ajRegExecC(featCdDivExp, qryd->name))
2050 {
2051 ajErr("index division file error '%S'", qryd->name);
2052
2053 return ajFalse;
2054 }
2055
2056 ajRegSubI(featCdDivExp, 1, &qryd->datfile);
2057 ajRegSubI(featCdDivExp, 3, &qryd->seqfile);
2058 ajDebug("File(s) '%S' '%S'\n", qryd->datfile, qryd->seqfile);
2059
2060 ajFileClose(&qryd->libr);
2061 qryd->libr = ajFileNewInNamePathS(qryd->datfile, qry->Directory);
2062
2063 if(!qryd->libr)
2064 {
2065 ajErr("Cannot open database file '%S' for database '%S'",
2066 qryd->datfile, qry->DbName);
2067
2068 return ajFalse;
2069 }
2070
2071 if(ajStrGetLen(qryd->seqfile))
2072 {
2073 ajFileClose(&qryd->libs);
2074 qryd->libs = ajFileNewInNamePathS(qryd->seqfile, qry->Directory);
2075
2076 if(!qryd->libs)
2077 {
2078 ajErr("Cannot open sequence file '%S' for database '%S'",
2079 qryd->seqfile, qry->DbName);
2080
2081 return ajFalse;
2082 }
2083 }
2084 else
2085 qryd->libs = NULL;
2086
2087 return ajTrue;
2088 }
2089
2090
2091
2092
2093 /* @funcstatic featCdTrgQuery *************************************************
2094 **
2095 ** Binary search of an EMBL CD-ROM index file for entries matching a
2096 ** wildcard query.
2097 **
2098 ** Where more than one query field is defined (usually acc and sv) it
2099 ** can test all and append to a single list.
2100 **
2101 ** @param [u] qry [AjPQuery] Query object.
2102 ** @param [r] field [const AjPStr] Query field
2103 ** @param [r] wildqry [const AjPStr] Query string
2104 ** @return [AjBool] ajTrue on success.
2105 **
2106 ** @release 6.5.0
2107 ** @@
2108 ******************************************************************************/
2109
featCdTrgQuery(AjPQuery qry,const AjPStr field,const AjPStr wildqry)2110 static AjBool featCdTrgQuery(AjPQuery qry, const AjPStr field,
2111 const AjPStr wildqry)
2112 {
2113 ajint ret=0;
2114
2115 if(ajStrMatchC(field, "org"))
2116 ret += featCdTrgFind(qry, "taxon", wildqry);
2117
2118 if(ajStrMatchC(field, "key"))
2119 ret += featCdTrgFind(qry, "keyword", wildqry);
2120
2121 if(ajStrMatchC(field, "des"))
2122 ret += featCdTrgFind(qry, "des", wildqry);
2123
2124 if(ajStrMatchC(field, "sv"))
2125 ret += featCdTrgFind(qry, "seqvn", wildqry);
2126
2127 if(ajStrMatchC(field, "gi"))
2128 ret += featCdTrgFind(qry, "gi", wildqry);
2129
2130 if(qry->HasAcc && ajStrMatchC(field, "acc"))
2131 ret += featCdTrgFind(qry, "acnum", wildqry);
2132
2133
2134 if(ret)
2135 return ajTrue;
2136
2137 return ajFalse;
2138 }
2139
2140
2141
2142
2143 /* @funcstatic featCdTrgFind **************************************************
2144 **
2145 ** Binary search of an EMBL CD-ROM index file for entries matching a
2146 ** wildcard query.
2147 **
2148 ** Where more than one query field is defined (usually acc and sv) it
2149 ** can test all and append to a single list.
2150 **
2151 ** @param [u] qry [AjPQuery] Query object.
2152 ** @param [r] indexname [const char*] Index name.
2153 ** @param [r] queryName [const AjPStr] Query string.
2154 ** @return [ajuint] Number of matches found
2155 **
2156 ** @release 6.5.0
2157 ** @@
2158 ******************************************************************************/
2159
featCdTrgFind(AjPQuery qry,const char * indexname,const AjPStr queryName)2160 static ajuint featCdTrgFind(AjPQuery qry, const char* indexname,
2161 const AjPStr queryName)
2162 {
2163 FeatPCdQry wild;
2164 AjPList l;
2165 FeatPCdTrg trgline;
2166 FeatPCdIdx idxline;
2167 FeatPCdFile idxfp;
2168 FeatPCdFile trgfp;
2169 FeatPCdFile hitfp;
2170 AjBool *skip;
2171
2172 AjPStr fdstr = NULL;
2173 AjPStr fdprefix = NULL;
2174
2175 ajint t;
2176 ajint b;
2177 ajint t2;
2178 ajint b2;
2179 ajint t3;
2180 ajint pos = 0;
2181 ajint prefixlen;
2182 ajint start;
2183 ajint end;
2184 ajint i;
2185 ajint j;
2186 ajint k;
2187 ajint cmp;
2188 AjBool match;
2189
2190 AjBool first;
2191 char *name;
2192
2193 FeatPCdEntry entry;
2194
2195
2196 wild = qry->QryData;
2197 l = qry->ResultsList;
2198 trgline = wild->trgLine;
2199 idxline = wild->idxLine;
2200 idxfp = wild->ifp;
2201 trgfp = wild->trgfp;
2202 hitfp = wild->hitfp;
2203 skip = wild->Skip;
2204
2205
2206 if(!featCdTrgOpen(qry->IndexDir, indexname, &trgfp, &hitfp))
2207 return 0;
2208
2209 /* fdstr is the original query string, in uppercase */
2210
2211 /* fdprefix is the fixed (no wildcard) prefix of fdstr */
2212
2213 ajStrAssignS(&fdstr,queryName);
2214 ajStrFmtUpper(&fdstr);
2215 ajStrAssignS(&fdprefix,fdstr);
2216
2217 ajStrRemoveWild(&fdprefix);
2218
2219 ajDebug("queryName '%S' fdstr '%S' fdprefix '%S'\n",
2220 queryName, fdstr, fdprefix);
2221
2222 b = b2 = 0;
2223 t = t2 = t3 = trgfp->NRecords - 1;
2224
2225 prefixlen = ajStrGetLen(fdprefix);
2226 first = ajTrue;
2227
2228 if(prefixlen)
2229 {
2230 /*
2231 ** (1a) we have a prefix (no wildcard at the start)
2232 ** look for the prefix fdprefix
2233 ** Set range of records that match (will be consecutive of course)
2234 ** from first match
2235 */
2236
2237 while(b<=t)
2238 {
2239 pos = (t+b)/2;
2240 name = featCdTrgName(pos,trgfp);
2241 name[prefixlen]='\0'; /* truncate to prefix length */
2242 cmp = ajStrCmpC(fdprefix,name);
2243 /* match = ajStrMatchWildC(fdstr,name);*/
2244 ajDebug(" trg testc %d '%s' '%S' %B (+/- %d)\n",
2245 pos,name,fdprefix,cmp, t-b);
2246 if(!cmp)
2247 {
2248 ajDebug(" trg hit %d\n",pos);
2249
2250 if(first)
2251 {
2252 first = ajFalse;
2253 t2 = t;
2254 t3 = pos;
2255 }
2256
2257 b2 = pos;
2258 }
2259
2260 if(cmp>0)
2261 b = pos+1;
2262 else
2263 t = pos-1;
2264 }
2265
2266 if(first)
2267 {
2268 ajStrDel(&fdprefix);
2269 ajStrDel(&fdstr);
2270 featCdTrgClose(&trgfp,&hitfp);
2271
2272 return ajFalse;
2273 }
2274
2275 ajDebug("first pass: pos:%d b2:%d t2:%d\n",pos,b2,t2);
2276
2277 /*
2278 ** (1b) Process below
2279 */
2280
2281 b = b2-1;
2282 t = t2;
2283
2284 while(b<=t)
2285 {
2286 pos = (t+b)/2;
2287 name = featCdTrgName(pos,trgfp);
2288 name[prefixlen]='\0';
2289 cmp = ajStrCmpC(fdprefix,name);
2290 /* match = ajStrMatchWildC(fdstr,name); */
2291 ajDebug(" trg testd %d '%s' '%S' %B (+/- %d)\n",
2292 pos,name,fdprefix,cmp,t-b);
2293
2294 if(!cmp)
2295 {
2296 ajDebug(" trg hit %d\n",pos);
2297 t3 = pos;
2298 }
2299
2300 if(cmp<0)
2301 t = pos-1;
2302 else
2303 b = pos+1;
2304 }
2305
2306 ajDebug("second pass: pos:%d b2:%d t3:%d\n",pos,b2,t3);
2307 name = featCdTrgName(b2,trgfp);
2308 ajDebug("first %d '%s'\n",b2,name);
2309 name = featCdTrgName(t3,trgfp);
2310 ajDebug("last %d '%s'\n",t3,name);
2311 }
2312
2313
2314 start = b2;
2315 end = t3;
2316
2317 for(i=start;i<(end+1);++i)
2318 {
2319 name = featCdTrgName(i,trgfp);
2320 match = ajCharMatchWildC(name, ajStrGetPtr(fdstr));
2321
2322 ajDebug("third pass: match:%B i:%d name '%s' queryName '%S'\n",
2323 match, i, name, fdstr);
2324
2325 if(!match)
2326 continue;
2327
2328 featCdTrgLine(trgline, i, trgfp);
2329 featCdFileSeek(hitfp,trgline->FirstHit-1);
2330 ajDebug("Query First: %d Count: %d\n",
2331 trgline->FirstHit, trgline->NHits);
2332 pos = trgline->FirstHit;
2333
2334 for(j=0;j<(ajint)trgline->NHits;++j)
2335 {
2336 ajReadbinInt(hitfp->File, &k);
2337 --k;
2338 ajDebug("hitlist[%d] entry = %d\n",j,k);
2339 featCdIdxLine(idxline,k,idxfp);
2340
2341 if(!skip[idxline->DivCode-1])
2342 {
2343 AJNEW0(entry);
2344 entry->div = idxline->DivCode;
2345 entry->annoff = idxline->AnnOffset;
2346 entry->seqoff = idxline->SeqOffset;
2347 ajListPushAppend(l,(void*)entry);
2348 }
2349 else
2350 ajDebug("SKIP: token '%S' [file %d]\n",
2351 queryName,idxline->DivCode);
2352 }
2353 }
2354
2355 featCdTrgClose(&trgfp, &hitfp);
2356
2357
2358 ajStrDel(&trgline->Target);
2359 ajStrDel(&fdstr);
2360 ajStrDel(&fdprefix);
2361
2362 return (ajuint) ajListGetLength(l);
2363 }
2364
2365
2366
2367
2368 /* @funcstatic featCdIdxDel ***************************************************
2369 **
2370 ** Destructor for FeatPCdIdx
2371 **
2372 ** @param [d] pthys [FeatPCdIdx*] Cd index object
2373 ** @return [void]
2374 **
2375 ** @release 6.5.0
2376 ******************************************************************************/
2377
featCdIdxDel(FeatPCdIdx * pthys)2378 static void featCdIdxDel(FeatPCdIdx* pthys)
2379 {
2380 FeatPCdIdx thys = *pthys;
2381
2382 if(!thys)
2383 return;
2384
2385 ajStrDel(&thys->EntryName);
2386 AJFREE(*pthys);
2387
2388 return;
2389 }
2390
2391
2392
2393
2394 /* @funcstatic featCdTrgDel ***************************************************
2395 **
2396 ** Destructor for FeatPCdTrg
2397 **
2398 ** @param [d] pthys [FeatPCdTrg*] Cd index target object
2399 ** @return [void]
2400 **
2401 ** @release 6.5.0
2402 **
2403 ******************************************************************************/
2404
featCdTrgDel(FeatPCdTrg * pthys)2405 static void featCdTrgDel(FeatPCdTrg* pthys)
2406 {
2407 FeatPCdTrg thys = *pthys;
2408
2409 if(!thys)
2410 return;
2411
2412 ajStrDel(&thys->Target);
2413 AJFREE(*pthys);
2414
2415 return;
2416 }
2417
2418
2419
2420
2421 /* @section B+tree GCG Database Indexing *************************************
2422 **
2423 ** These functions manage the EMBOSS B+tree GCG index access methods.
2424 **
2425 ******************************************************************************/
2426
2427
2428
2429
2430 /* @funcstatic featAccessEmbossGcg ********************************************
2431 **
2432 ** Reads feature(s) from a GCG formatted database, using B+tree index
2433 ** files. Returns with the file pointer set to the position in the
2434 ** reference and sequence file.
2435 **
2436 ** @param [u] fttabin [AjPFeattabin] Feature table input.
2437 ** @return [AjBool] ajTrue on success.
2438 **
2439 ** @release 6.5.0
2440 ** @@
2441 ******************************************************************************/
2442
featAccessEmbossGcg(AjPFeattabin fttabin)2443 static AjBool featAccessEmbossGcg(AjPFeattabin fttabin)
2444 {
2445 AjBool retval = ajFalse;
2446
2447 AjPQuery qry;
2448 FeatPEmbossQry qryd = NULL;
2449
2450
2451 qry = fttabin->Input->Query;
2452 qryd = qry->QryData;
2453 ajDebug("featAccessEmbossGcg type %d\n", qry->QueryType);
2454
2455 if(!ajNamDbGetDbalias(qry->DbName, &qry->DbAlias))
2456 ajStrAssignS(&qry->DbAlias, qry->DbName);
2457
2458 if(qry->QueryType == AJQUERY_ALL)
2459 return featEmbossGcgAll(fttabin);
2460
2461
2462 if(!qry->QryData)
2463 {
2464 if(!featEmbossQryOpen(qry))
2465 return ajFalse;
2466
2467 qryd = qry->QryData;
2468 fttabin->Input->Single = ajTrue;
2469 ajFilebuffDel(&fttabin->Input->Filebuff);
2470 fttabin->Input->Filebuff = ajFilebuffNewNofile();
2471
2472 if(qry->QueryType == AJQUERY_ENTRY)
2473 {
2474 if(!featEmbossQryEntry(qry))
2475 ajDebug("embossgcg B+tree Entry failed\n");
2476 }
2477
2478 if(qry->QueryType == AJQUERY_QUERY)
2479 {
2480 if(!featEmbossQryQuery(qry))
2481 ajDebug("embossgcg B+tree Query failed\n");
2482 }
2483 }
2484 else
2485 {
2486 if(!featEmbossQryReuse(qry))
2487 {
2488 featEmbossQryClose(qry);
2489 return ajFalse;
2490 }
2491 ajFilebuffClear(fttabin->Input->Filebuff, -1);
2492 }
2493
2494 if(ajListGetLength(qry->ResultsList))
2495 {
2496 retval = featEmbossQryNext(qry);
2497
2498 if(retval)
2499 {
2500 featEmbossGcgLoadBuff(fttabin);
2501 ajStrAssignS(&fttabin->Input->Db, qry->DbName);
2502 }
2503 }
2504
2505 if(!ajListGetLength(qry->ResultsList)) /* could be emptied by code above */
2506 {
2507 featEmbossQryClose(qry);
2508 ajFileClose(&qryd->libs);
2509 ajFileClose(&qryd->libr);
2510 }
2511
2512 return retval;
2513 }
2514
2515
2516
2517
2518 /* @funcstatic featEmbossGcgAll ***********************************************
2519 **
2520 ** Opens the first or next GCG file for further reading
2521 **
2522 ** @param [u] fttabin [AjPFeattabin] Feature table input.
2523 ** @return [AjBool] ajTrue on success.
2524 **
2525 ** @release 6.5.0
2526 ** @@
2527 ******************************************************************************/
2528
featEmbossGcgAll(AjPFeattabin fttabin)2529 static AjBool featEmbossGcgAll(AjPFeattabin fttabin)
2530 {
2531 AjPQuery qry;
2532 FeatPEmbossQry qryd;
2533 static ajint i = 0;
2534 ajuint iref;
2535 AjPStr name = NULL;
2536 AjBool ok = ajFalse;
2537 /*
2538 AjPStrTok handle = NULL;
2539 AjPStr wildname = NULL;
2540 AjBool found = ajFalse;
2541 */
2542
2543 qry = fttabin->Input->Query;
2544 qryd = qry->QryData;
2545
2546 ajDebug("featEmbossGcgAll\n");
2547
2548
2549 if(!qry->QryData)
2550 {
2551 ajDebug("featEmbossGcgAll initialising\n");
2552
2553 qry->QryData = AJNEW0(qryd);
2554 qryd = qry->QryData;
2555 i = -1;
2556 ajBtreeReadEntriesS(qry->DbAlias,qry->IndexDir,
2557 qry->Directory,
2558 &qryd->files,
2559 &qryd->reffiles,
2560 &qryd->refcount);
2561
2562 fttabin->Input->Single = ajTrue;
2563 }
2564
2565 qryd = qry->QryData;
2566 ajFilebuffDel(&fttabin->Input->Filebuff);
2567 fttabin->Input->Filebuff = ajFilebuffNewNofile();
2568
2569 if(!qryd->libs)
2570 {
2571 while(!ok && qryd->files[++i])
2572 {
2573 ajStrAssignS(&name,qryd->files[i]);
2574 if(ajFilenameTestInclude(name, qry->Exclude, qry->Filename))
2575 ok = ajTrue;
2576 }
2577
2578 ajStrDel(&name);
2579
2580 /* if(qry->Exclude)
2581 {
2582 ok = ajFalse;
2583 wildname = ajStrNew();
2584 name = ajStrNew();
2585 while(!ok)
2586 {
2587 ajStrAssignS(&name,qryd->files[i]);
2588 ajFilenameTrimPath(&name);
2589 handle = ajStrTokenNewC(qry->Exclude," \n");
2590 found = ajFalse;
2591 while(ajStrTokenNextParseC(&handle," \n",&wildname))
2592 if(ajStrMatchWildS(name,wildname))
2593 {
2594 found = ajTrue;
2595 break;
2596 }
2597 ajStrTokenDel(&handle);
2598 if(!found)
2599 ok = ajTrue;
2600 else
2601 {
2602 ++i;
2603 if(!qryd->files[i])
2604 ok = ajTrue;
2605 }
2606 }
2607
2608 ajStrDel(&wildname);
2609 ajStrDel(&name);
2610 }
2611 */
2612
2613 if(!qryd->files[i])
2614 {
2615 ajDebug("featEmbossGcgAll finished\n");
2616 i=0;
2617
2618 while(qryd->files[i])
2619 {
2620 ajStrDel(&qryd->files[i]);
2621
2622 if(qryd->reffiles)
2623 {
2624 for(iref=0; iref < qryd->refcount; iref++)
2625 ajStrDel(&qryd->reffiles[iref][i]);
2626 }
2627
2628 ++i;
2629 }
2630
2631 AJFREE(qryd->files);
2632 AJFREE(qryd->reffiles);
2633
2634 AJFREE(qry->QryData);
2635 qry->QryData = NULL;
2636
2637 return ajFalse;
2638 }
2639
2640
2641 qryd->libs = ajFileNewInNameS(qryd->files[i]);
2642
2643 if(!qryd->libs)
2644 {
2645 ajDebug("featEmbossGcgAll: cannot open sequence file\n");
2646
2647 return ajFalse;
2648 }
2649
2650
2651 if(qryd->reffiles)
2652 qryd->libr = ajFileNewInNameS(qryd->reffiles[0][i]);
2653
2654 if(!qryd->libr)
2655 {
2656 ajDebug("featEmbossGcgAll: cannot open reference file\n");
2657
2658 return ajFalse;
2659 }
2660 }
2661
2662 featEmbossGcgLoadBuff(fttabin);
2663
2664 if(!qry->CaseId)
2665 qry->QryDone = ajTrue;
2666
2667 return ajTrue;
2668 }
2669
2670
2671
2672
2673 /* @funcstatic featEmbossGcgLoadBuff ******************************************
2674 **
2675 ** Copies text data to a buffered file, and skips sequence data.
2676 **
2677 ** @param [u] fttabin [AjPFeattabin] Feature table input object
2678 ** @return [void]
2679 **
2680 ** @release 6.5.0
2681 ** @@
2682 ******************************************************************************/
2683
featEmbossGcgLoadBuff(AjPFeattabin fttabin)2684 static void featEmbossGcgLoadBuff(AjPFeattabin fttabin)
2685 {
2686 AjPQuery qry;
2687 FeatPEmbossQry qryd;
2688
2689 qry = fttabin->Input->Query;
2690 qryd = qry->QryData;
2691
2692 ajDebug("featEmbossGcgLoadBuff\n");
2693
2694 if(!qry->QryData)
2695 ajFatal("featEmbossGcgLoadBuff Query Data not initialised");
2696
2697 /* copy all the ref data */
2698
2699 featEmbossGcgReadRef(fttabin);
2700
2701 /* skip the sequence (do we care about the format?) */
2702 featEmbossGcgReadSeq(fttabin);
2703
2704 /* ajFilebuffTraceFull(fttabin->Input->Filebuff, 9999, 100); */
2705
2706 if(!qryd->libr)
2707 ajFileClose(&qryd->libs);
2708
2709 return;
2710 }
2711
2712
2713
2714
2715 /* @funcstatic featEmbossGcgReadRef *******************************************
2716 **
2717 ** Copies text data to a buffered file for reading later
2718 **
2719 ** @param [u] fttabin [AjPFeattabin] Feature table input object
2720 ** @return [AjBool] ajTrue on success
2721 **
2722 ** @release 6.5.0
2723 ** @@
2724 ******************************************************************************/
2725
featEmbossGcgReadRef(AjPFeattabin fttabin)2726 static AjBool featEmbossGcgReadRef(AjPFeattabin fttabin)
2727 {
2728 AjPStr line = NULL;
2729 AjPQuery qry;
2730 FeatPEmbossQry qryd;
2731 ajlong rpos;
2732 AjPStr id = NULL;
2733 AjPStr idc = NULL;
2734 AjBool ispir = ajFalse;
2735 AjBool continued = ajFalse;
2736 AjBool testcontinue = ajFalse;
2737 char *p = NULL;
2738
2739 qry = fttabin->Input->Query;
2740 qryd = qry->QryData;
2741
2742 if(!featRegGcgRefId)
2743 featRegGcgRefId =ajRegCompC("^>...([^ \n]+)");
2744
2745 if(!featRegGcgSplit)
2746 featRegGcgSplit =ajRegCompC("_0+$");
2747
2748 if(!ajReadline(qryd->libr, &line)) /* end of file */
2749 return ajFalse;
2750
2751 if(ajStrGetCharFirst(line) != '>') /* not start of entry */
2752 ajFatal("featGcgReadRef bad entry start:\n'%S'", line);
2753
2754 if(ajStrGetCharPos(line, 3) == ';') /* PIR entry */
2755 ispir = ajTrue;
2756
2757 if(ispir)
2758 ajFilebuffLoadS(fttabin->Input->Filebuff, line);
2759
2760
2761 if(ajRegExec(featRegGcgRefId, line))
2762 {
2763 continued = ajFalse;
2764 ajRegSubI(featRegGcgRefId, 1, &id);
2765
2766 if(ajRegExec(featRegGcgSplit,id))
2767 {
2768 continued = ajTrue;
2769 p = ajStrGetuniquePtr(&id);
2770 p = strrchr(p,(ajint)'_');
2771 *(++p)='\0';
2772 ajStrSetValid(&id);
2773 }
2774 }
2775 else
2776 {
2777 ajDebug("featEmbossGcgReadRef bad ID line\n'%S'\n", line);
2778 ajFatal("featEmbossGcgReadRef bad ID line\n'%S'\n", line);
2779 }
2780
2781
2782
2783 if(!ajReadline(qryd->libr, &line)) /* blank desc line */
2784
2785 return ajFalse;
2786
2787 if(ispir)
2788 ajFilebuffLoadS(fttabin->Input->Filebuff, line);
2789
2790
2791 rpos = ajFileResetPos(qryd->libr);
2792
2793 while(ajReadline(qryd->libr, &line))
2794 {
2795 /* end of file */
2796 if(ajStrGetCharFirst(line) == '>')
2797 { /* start of next entry */
2798 /* skip over split entries so it can be used for "all" */
2799
2800 if(continued)
2801 {
2802 testcontinue=ajTrue;
2803 ajRegExec(featRegGcgRefId, line);
2804 ajRegSubI(featRegGcgRefId, 1, &idc);
2805
2806 if(!ajStrPrefixS(idc,id))
2807 {
2808 ajFileSeek(qryd->libr, rpos, 0);
2809 ajStrDel(&line);
2810 ajStrDel(&id);
2811 ajStrDel(&idc);
2812
2813 return ajTrue;
2814 }
2815 }
2816 else
2817 {
2818 ajFileSeek(qryd->libr, rpos, 0);
2819 ajStrDel(&line);
2820 ajStrDel(&id);
2821 ajStrDel(&idc);
2822
2823 return ajTrue;
2824 }
2825 }
2826 rpos = ajFileResetPos(qryd->libr);
2827
2828
2829 if(!testcontinue)
2830 {
2831 ajStrExchangeCC(&line, ". .", "..");
2832 ajFilebuffLoadS(fttabin->Input->Filebuff, line);
2833 }
2834 }
2835
2836
2837 /* at end of file */
2838
2839 ajFileClose(&qryd->libr);
2840 ajStrDel(&line);
2841 ajStrDel(&id);
2842 ajStrDel(&idc);
2843
2844 return ajTrue;
2845 }
2846
2847
2848
2849
2850 /* @funcstatic featEmbossGcgReadSeq *******************************************
2851 **
2852 ** Skips sequence data.
2853 **
2854 ** @param [u] fttabin [AjPFeattabin] feature table input object
2855 ** @return [AjBool] ajTrue on success
2856 **
2857 ** @release 6.5.0
2858 ** @@
2859 ******************************************************************************/
2860
featEmbossGcgReadSeq(AjPFeattabin fttabin)2861 static AjBool featEmbossGcgReadSeq(AjPFeattabin fttabin)
2862 {
2863 AjPStr line = NULL;
2864 AjPQuery qry;
2865 FeatPEmbossQry qryd;
2866 AjPStr gcgtype = NULL;
2867 AjPStr tmpstr = NULL;
2868 AjPStr dstr = NULL;
2869 AjPStr id = NULL;
2870 AjPStr idc = NULL;
2871 AjPStr contseq = NULL;
2872
2873 ajint gcglen;
2874 ajint pos;
2875 ajint rblock;
2876 ajlong spos;
2877 AjBool ispir = ajFalse;
2878 char *p = NULL;
2879 AjBool continued = ajFalse;
2880
2881
2882 qry = fttabin->Input->Query;
2883 qryd = qry->QryData;
2884
2885 if(!featRegGcgId)
2886 {
2887 featRegGcgId =ajRegCompC("^>...([^ ]+) +(Dummy Header|[^ ]+) +"
2888 "([^ ]+) +([^ ]+) +([0-9]+)");
2889 featRegGcgId2=ajRegCompC("^>[PF]1;([^ ]+)");
2890 }
2891
2892 if(!featRegGcgSplit)
2893 featRegGcgSplit =ajRegCompC("_0+$");
2894
2895 ajDebug("featEmbossGcgReadSeq pos: %Ld\n", ajFileResetPos(qryd->libs));
2896
2897 if(!ajReadline(qryd->libs, &line)) /* end of file */
2898 return ajFalse;
2899
2900 ajDebug("test ID line\n'%S'\n", line);
2901
2902 if(ajRegExec(featRegGcgId, line))
2903 {
2904 continued = ajFalse;
2905 ajRegSubI(featRegGcgId, 3, &gcgtype);
2906 ajRegSubI(featRegGcgId, 5, &tmpstr);
2907 ajRegSubI(featRegGcgId, 1, &id);
2908
2909 if(ajRegExec(featRegGcgSplit, id))
2910 {
2911 continued = ajTrue;
2912 p = ajStrGetuniquePtr(&id);
2913 p = strrchr(p,(ajint)'_');
2914 *(++p)='\0';
2915 ajStrSetValid(&id);
2916
2917 if(!contseq)
2918 contseq = ajStrNew();
2919
2920 if(!dstr)
2921 dstr = ajStrNew();
2922 }
2923
2924 ajStrToInt(tmpstr, &gcglen);
2925 }
2926 else if(ajRegExec(featRegGcgId2, line))
2927 {
2928 ajStrAssignC(&gcgtype, "ASCII");
2929 ajRegSubI(featRegGcgId, 1, &tmpstr);
2930 ispir = ajTrue;
2931 }
2932 else
2933 {
2934 ajDebug("featEmbossGcgReadSeq bad ID line\n'%S'\n", line);
2935 ajFatal("featEmbossGcgReadSeq bad ID line\n'%S'\n", line);
2936
2937 return ajFalse;
2938 }
2939
2940 if(!ajReadline(qryd->libs, &line)) /* desc line */
2941 return ajFalse;
2942
2943 /*
2944 ** need to pick up the length and type, and read to the end of sequence
2945 ** see fasta code to get a real sequence for this
2946 ** Also need to handle split entries and go find the rest
2947 */
2948
2949 if(ispir)
2950 {
2951 spos = ajFileResetPos(qryd->libs);
2952
2953 while(ajReadline(qryd->libs, &line))
2954 { /* end of file */
2955 if(ajStrGetCharFirst(line) == '>')
2956 { /* start of next entry */
2957 ajFileSeek(qryd->libs, spos, 0);
2958 break;
2959 }
2960
2961 spos = ajFileResetPos(qryd->libs);
2962 ajFilebuffLoadS(fttabin->Input->Filebuff, line);
2963 }
2964 }
2965 else
2966 {
2967 ajStrSetRes(&contseq, gcglen+3);
2968 rblock = gcglen;
2969
2970 if(ajStrGetCharFirst(gcgtype) == '2')
2971 rblock = (rblock+3)/4;
2972
2973 if(!ajReadbinBinary(qryd->libs, rblock, 1,
2974 ajStrGetuniquePtr(&contseq)))
2975 ajFatal("error reading file %F", qryd->libs);
2976
2977 /* convert 2bit to ascii */
2978 if(ajStrGetCharFirst(gcgtype) == '2')
2979 featGcgBinDecode(&contseq, gcglen);
2980 else if(ajStrGetCharFirst(gcgtype) == 'A')
2981 {
2982 /* are seq chars OK? */
2983 ajStrSetValidLen(&contseq, gcglen);
2984 }
2985 else
2986 {
2987 ajRegSubI(featRegGcgId, 1, &tmpstr);
2988 ajFatal("Unknown GCG entry type '%S', entry name '%S'",
2989 gcgtype, tmpstr);
2990 }
2991
2992 if(!ajReadline(qryd->libs, &line)) /* newline at end */
2993 ajFatal("error reading file %F", qryd->libs);
2994
2995 if(continued)
2996 {
2997 spos = ajFileResetPos(qryd->libs);
2998
2999 while(ajReadline(qryd->libs,&line))
3000 {
3001 ajRegExec(featRegGcgId, line);
3002 ajRegSubI(featRegGcgId, 5, &tmpstr);
3003 ajRegSubI(featRegGcgId, 1, &idc);
3004
3005 if(!ajStrPrefixS(idc,id))
3006 {
3007 ajFileSeek(qryd->libs, spos, 0);
3008 break;
3009 }
3010
3011 ajStrToInt(tmpstr, &gcglen);
3012
3013 if(!ajReadline(qryd->libs, &dstr)) /* desc line */
3014 return ajFalse;
3015
3016 ajStrSetRes(&contseq, gcglen+3);
3017
3018 rblock = gcglen;
3019 if(ajStrGetCharFirst(gcgtype) == '2')
3020 rblock = (rblock+3)/4;
3021
3022 if(!ajReadbinBinary(qryd->libs, rblock, 1,
3023 ajStrGetuniquePtr(&contseq)))
3024 ajFatal("error reading file %F", qryd->libs);
3025
3026 /* convert 2bit to ascii */
3027 if(ajStrGetCharFirst(gcgtype) == '2')
3028 featGcgBinDecode(&contseq, gcglen);
3029 else if(ajStrGetCharFirst(gcgtype) == 'A')
3030 {
3031 /* are seq chars OK? */
3032 ajStrSetValidLen(&contseq, gcglen);
3033 }
3034 else
3035 {
3036 ajRegSubI(featRegGcgId, 1, &tmpstr);
3037 ajFatal("Unknown GCG entry: name '%S'",
3038 tmpstr);
3039 }
3040
3041 if(!ajReadline(qryd->libs, &line)) /* newline at end */
3042 ajFatal("error reading file %F", qryd->libs);
3043
3044 if(!featRegGcgCont)
3045 featRegGcgCont = ajRegCompC("^([^ ]+) +([^ ]+) +([^ ]+) +"
3046 "([^ ]+) +([^ ]+) +([^ ]+) "
3047 "+([^ ]+) +"
3048 "([^ ]+) +([0-9]+)");
3049
3050 ajRegExec(featRegGcgCont, dstr);
3051 ajRegSubI(featRegGcgCont, 9, &tmpstr);
3052 ajStrToInt(tmpstr, &pos);
3053 /*seqin->Inseq->Len = pos-1;*/
3054
3055 /*ajStrAppendS(&seqin->Inseq,contseq);*/
3056 spos = ajFileResetPos(qryd->libs);
3057 }
3058 }
3059 }
3060
3061 ajStrDel(&gcgtype);
3062 ajStrDel(&line);
3063 ajStrDel(&tmpstr);
3064 ajStrDel(&dstr);
3065 ajStrDel(&id);
3066 ajStrDel(&idc);
3067 ajStrDel(&contseq);
3068
3069 return ajTrue;
3070 }
3071
3072
3073
3074
3075 /* @funcstatic featCdQryReuse *************************************************
3076 **
3077 ** Tests whether Cd index query data can be reused or whether we are finished.
3078 **
3079 ** Clears qryData structure when finished.
3080 **
3081 ** @param [u] qry [AjPQuery] Query data
3082 ** @return [AjBool] ajTrue if we can continue,
3083 ** ajFalse if all is done.
3084 **
3085 ** @release 6.5.0
3086 ** @@
3087 ******************************************************************************/
3088
featCdQryReuse(AjPQuery qry)3089 static AjBool featCdQryReuse(AjPQuery qry)
3090 {
3091 FeatPCdQry qryd;
3092
3093 qryd = qry->QryData;
3094
3095 if(!qry || !qryd)
3096 return ajFalse;
3097
3098
3099 /* ajDebug("qry->ResultsList %x\n",qry->ResultsList);*/
3100 if(!qry->ResultsList)
3101 {
3102 ajDebug("query data all finished\n");
3103 AJFREE(qry->QryData);
3104 qryd = NULL;
3105
3106 return ajFalse;
3107 }
3108 else
3109 {
3110 ajDebug("reusing data from previous call %x\n", qry->QryData);
3111 ajDebug("listlen %Lu\n", ajListGetLength(qry->ResultsList));
3112 ajDebug("divfile '%S'\n", qryd->divfile);
3113 ajDebug("idxfile '%S'\n", qryd->idxfile);
3114 ajDebug("datfile '%S'\n", qryd->datfile);
3115 ajDebug("seqfile '%S'\n", qryd->seqfile);
3116 ajDebug("name '%s'\n", qryd->name);
3117 ajDebug("nameSize %d\n", qryd->nameSize);
3118 ajDebug("div %d\n", qryd->div);
3119 ajDebug("maxdiv %d\n", qryd->maxdiv);
3120 /*ajListTrace(qry->ResultsList);*/
3121 }
3122
3123 return ajTrue;
3124 }
3125
3126
3127
3128
3129 /* @funcstatic featCdQryOpen **************************************************
3130 **
3131 ** Opens everything for a new CD query
3132 **
3133 ** @param [u] qry [AjPQuery] Query data
3134 ** @return [AjBool] ajTrue if we can continue,
3135 ** ajFalse if all is done.
3136 **
3137 ** @release 6.5.0
3138 ** @@
3139 ******************************************************************************/
3140
featCdQryOpen(AjPQuery qry)3141 static AjBool featCdQryOpen(AjPQuery qry)
3142 {
3143 FeatPCdQry qryd;
3144
3145 ajuint i;
3146 short j;
3147 static char *name;
3148 AjPStr fullName = NULL;
3149
3150 if(!ajStrGetLen(qry->IndexDir))
3151 {
3152 ajDebug("no indexdir defined for database '%S'\n", qry->DbName);
3153 ajErr("no indexdir defined for database '%S'", qry->DbName);
3154
3155 return ajFalse;
3156 }
3157
3158 ajDebug("directory '%S' fields: %Lu hasacc:%B\n",
3159 qry->IndexDir, ajListGetLength(qry->QueryFields), qry->HasAcc);
3160
3161 qry->QryData = AJNEW0(qryd);
3162 AJNEW0(qryd->idxLine);
3163 AJNEW0(qryd->trgLine);
3164 qryd->dfp = featCdFileOpen(qry->IndexDir, "division.lkp", &qryd->divfile);
3165
3166 if(!qryd->dfp)
3167 {
3168 ajWarn("Cannot open division file '%S' for database '%S'",
3169 qryd->divfile, qry->DbName);
3170
3171 return ajFalse;
3172 }
3173
3174
3175 qryd->nameSize = qryd->dfp->RecSize - 2;
3176 qryd->maxdiv = qryd->dfp->NRecords;
3177 ajDebug("nameSize: %d maxdiv: %d\n",
3178 qryd->nameSize, qryd->maxdiv);
3179 qryd->name = ajCharNewRes(qryd->nameSize+1);
3180 name = ajCharNewRes(qryd->nameSize+1);
3181 AJCNEW0(qryd->Skip, qryd->maxdiv);
3182 featCdFileSeek(qryd->dfp, 0);
3183
3184 for(i=0; i < qryd->maxdiv; i++)
3185 {
3186 ajReadbinInt2(qryd->dfp->File, &j);
3187 ajReadbinCharTrim(qryd->dfp->File, qryd->nameSize, name);
3188
3189 ajStrAssignC(&fullName, name);
3190 ajFilenameReplacePathS(&fullName, qry->Directory);
3191
3192 if(!ajFilenameTestInclude(fullName, qry->Exclude, qry->Filename))
3193 qryd->Skip[i] = ajTrue;
3194 }
3195
3196 qryd->ifp = featCdFileOpen(qry->IndexDir, "entrynam.idx", &qryd->idxfile);
3197
3198 if(!qryd->ifp)
3199 {
3200 ajErr("Cannot open index file '%S' for database '%S'",
3201 qryd->idxfile, qry->DbName);
3202
3203 return ajFalse;
3204 }
3205
3206 ajStrDel(&fullName);
3207 ajCharDel(&name);
3208
3209 return ajTrue;
3210 }
3211
3212
3213
3214
3215 /* @funcstatic featCdQryEntry *************************************************
3216 **
3217 ** Queries for a single entry in an EMBLCD index
3218 **
3219 ** @param [u] qry [AjPQuery] Query data
3220 ** @return [AjBool] ajTrue if we can continue,
3221 ** ajFalse if all is done.
3222 **
3223 ** @release 6.5.0
3224 ** @@
3225 ******************************************************************************/
3226
featCdQryEntry(AjPQuery qry)3227 static AjBool featCdQryEntry(AjPQuery qry)
3228 {
3229 FeatPCdEntry entry = NULL;
3230 ajint ipos = -1;
3231 ajint trghit;
3232 FeatPCdQry qryd;
3233 const AjPList fdlist;
3234 AjIList iter;
3235 AjPQueryField fd;
3236 AjPStr qrystr = NULL;
3237 ajuint i;
3238 ajuint ii;
3239 ajint j;
3240
3241 const char* embossfields[] = {
3242 "id", "acc", "sv", "org", "key", "des", "gi", NULL
3243 };
3244 const char* emblcdfields[] = {
3245 NULL, "acnum", "seqvn", "taxon", "keyword", "des", "gi", NULL
3246 };
3247
3248 fdlist = ajQueryGetallFields(qry);
3249
3250 ajQueryGetQuery(qry, &qrystr);
3251
3252 ajDebug("featCdQryEntry %S hasacc:%B\n",
3253 qrystr , qry->HasAcc);
3254 ajStrDel(&qrystr);
3255
3256 qryd = qry->QryData;
3257 iter= ajListIterNewread(fdlist);
3258
3259 while(!ajListIterDone(iter))
3260 {
3261 fd = ajListIterGet(iter);
3262 ajDebug("query link: %u field '%S' wild '%S'\n",
3263 fd->Link, fd->Field, fd->Wildquery);
3264
3265 if((fd->Link == AJQLINK_ELSE) && ajListGetLength(qry->ResultsList))
3266 continue;
3267
3268 for(i=0; embossfields[i]; i++)
3269 {
3270 ajDebug("test field[%u] '%S' = '%s'\n",
3271 i, fd->Field, emblcdfields[i]);
3272 if(ajStrMatchC(fd->Field, embossfields[i]))
3273 {
3274 ajDebug("match field[%u] '%S' = '%s'\n",
3275 i, fd->Field, emblcdfields[i]);
3276 if(!emblcdfields[i]) /* ID index */
3277 {
3278 ipos = featCdIdxSearch(qryd->idxLine,
3279 fd->Wildquery, qryd->ifp);
3280
3281 if(ipos >= 0)
3282 {
3283 if(!qryd->Skip[qryd->idxLine->DivCode-1])
3284 {
3285 AJNEW0(entry);
3286 entry->div = qryd->idxLine->DivCode;
3287 entry->annoff = qryd->idxLine->AnnOffset;
3288 entry->seqoff = qryd->idxLine->SeqOffset;
3289 ajListPushAppend(qry->ResultsList, (void*)entry);
3290 }
3291 else
3292 ajDebug("SKIP: '%S' [file %d]\n",
3293 fd->Wildquery, qryd->idxLine->DivCode);
3294 }
3295 }
3296 else /* target/hit index */
3297 {
3298 if(featCdTrgOpen(qry->IndexDir, emblcdfields[i],
3299 &qryd->trgfp, &qryd->hitfp))
3300 {
3301 trghit = featCdTrgSearch(qryd->trgLine,
3302 fd->Wildquery, qryd->trgfp);
3303
3304 if(trghit >= 0)
3305 {
3306 featCdFileSeek(qryd->hitfp,
3307 qryd->trgLine->FirstHit-1);
3308 ajDebug("%s First: %d Count: %d\n",
3309 emblcdfields[i],
3310 qryd->trgLine->FirstHit,
3311 qryd->trgLine->NHits);
3312 ipos = qryd->trgLine->FirstHit;
3313
3314 for(ii = 0; ii < qryd->trgLine->NHits; ii++)
3315 {
3316 ajReadbinInt(qryd->hitfp->File, &j);
3317 j--;
3318 ajDebug("hitlist[%u] entry = %d\n", ii, j);
3319 featCdIdxLine(qryd->idxLine, j, qryd->ifp);
3320
3321 if(!qryd->Skip[qryd->idxLine->DivCode-1])
3322 {
3323 AJNEW0(entry);
3324 entry->div = qryd->idxLine->DivCode;
3325 entry->annoff = qryd->idxLine->AnnOffset;
3326 entry->seqoff = qryd->idxLine->SeqOffset;
3327 ajListPushAppend(qry->ResultsList,
3328 (void*)entry);
3329 }
3330 else
3331 ajDebug("SKIP: %s '%S' [file %d]\n",
3332 emblcdfields[i],
3333 fd->Wildquery,
3334 qryd->idxLine->DivCode);
3335 }
3336 }
3337
3338 featCdTrgClose(&qryd->trgfp, &qryd->hitfp);
3339 ajStrDel(&qryd->trgLine->Target);
3340 }
3341 }
3342 break;
3343 }
3344 }
3345 if(!embossfields[i])
3346 {
3347 if(featCdTrgOpen(qry->IndexDir, MAJSTRGETPTR(fd->Field),
3348 &qryd->trgfp, &qryd->hitfp))
3349 {
3350 trghit = featCdTrgSearch(qryd->trgLine,
3351 fd->Wildquery, qryd->trgfp);
3352
3353 if(trghit >= 0)
3354 {
3355 featCdFileSeek(qryd->hitfp,
3356 qryd->trgLine->FirstHit-1);
3357 ajDebug("%s First: %d Count: %d\n",
3358 fd->Field, qryd->trgLine->FirstHit,
3359 qryd->trgLine->NHits);
3360 ipos = qryd->trgLine->FirstHit;
3361
3362 for(ii = 0; ii < qryd->trgLine->NHits; ii++)
3363 {
3364 ajReadbinInt(qryd->hitfp->File, &j);
3365 j--;
3366 ajDebug("hitlist[%u] entry = %d\n", ii, j);
3367 featCdIdxLine(qryd->idxLine, j, qryd->ifp);
3368
3369 if(!qryd->Skip[qryd->idxLine->DivCode-1])
3370 {
3371 AJNEW0(entry);
3372 entry->div = qryd->idxLine->DivCode;
3373 entry->annoff = qryd->idxLine->AnnOffset;
3374 entry->seqoff = qryd->idxLine->SeqOffset;
3375 ajListPushAppend(qry->ResultsList, (void*)entry);
3376 }
3377 else
3378 ajDebug("SKIP: %S '%S' [file %d]\n",
3379 fd->Field,
3380 fd->Wildquery,
3381 qryd->idxLine->DivCode);
3382 }
3383 }
3384
3385 featCdTrgClose(&qryd->trgfp, &qryd->hitfp);
3386 ajStrDel(&qryd->trgLine->Target);
3387 }
3388 }
3389 }
3390
3391 ajListIterDel(&iter);
3392 if(ipos < 0)
3393 return ajFalse;
3394
3395 if(!ajListGetLength(qry->ResultsList))
3396 return ajFalse;
3397
3398 if(!qry->CaseId)
3399 qry->QryDone = ajTrue;
3400
3401 return ajTrue;
3402 }
3403
3404
3405
3406
3407 /* @funcstatic featCdQryQuery *************************************************
3408 **
3409 ** Queries for one or more entries in an EMBLCD index
3410 **
3411 ** @param [u] qry [AjPQuery] Query data
3412 ** @return [AjBool] ajTrue if we can continue,
3413 ** ajFalse if all is done.
3414 **
3415 ** @release 6.5.0
3416 ** @@
3417 ******************************************************************************/
3418
featCdQryQuery(AjPQuery qry)3419 static AjBool featCdQryQuery(AjPQuery qry)
3420 {
3421 AjIList iter = NULL;
3422 AjPQueryField field = NULL;
3423
3424 FeatPCdEntry newhit;
3425 FeatPCdEntry *allhits = NULL;
3426 ajulong** keys = NULL;
3427
3428 AjPTable newtable = NULL;
3429
3430 ajuint i;
3431 ajuint lasthits = 0;
3432 ajuint fdhits = 0;
3433
3434 ajulong *ikey = NULL;
3435
3436 ajuint ishift = sizeof(ajulong)/2;
3437
3438 if(!qry->CaseId)
3439 qry->QryDone = ajTrue;
3440
3441 ajTableSettypeUlong(qry->ResultsTable);
3442 ajTableSetDestroyboth(qry->ResultsTable);
3443
3444 iter = ajListIterNewread(qry->QueryFields);
3445
3446 while(!ajListIterDone(iter))
3447 {
3448 field = ajListIterGet(iter);
3449
3450 if((field->Link == AJQLINK_ELSE) && (lasthits > 0))
3451 {
3452 ajDebug("ELSE: lasthits:%u skip\n", lasthits);
3453 continue;
3454 }
3455
3456 if(ajStrMatchC(field->Field, "id"))
3457 featCdIdxQuery(qry, field->Wildquery);
3458 else
3459 featCdTrgQuery(qry, field->Field, field->Wildquery);
3460
3461 fdhits = (ajuint) ajListGetLength(qry->ResultsList);
3462
3463 ajDebug("featCdQryQuery hits: %u link: %u\n",
3464 fdhits, field->Link);
3465
3466 switch(field->Link)
3467 {
3468 case AJQLINK_INIT:
3469 while(ajListPop(qry->ResultsList, (void**)&newhit))
3470 {
3471 AJNEW(ikey);
3472 *ikey = (((ajulong)newhit->div) << ishift) +
3473 (ajulong)newhit->annoff;
3474 ajTablePutClean(qry->ResultsTable, ikey, newhit,
3475 &ajMemFree, &ajMemFree);
3476 ikey = NULL;
3477 }
3478
3479 break;
3480
3481 case AJQLINK_OR:
3482 while(ajListPop(qry->ResultsList, (void**)&newhit))
3483 {
3484 AJNEW(ikey);
3485 *ikey = (((ajulong)newhit->div) << ishift) +
3486 (ajulong)newhit->annoff;
3487 ajTablePutClean(qry->ResultsTable, ikey, newhit,
3488 &ajMemFree, &ajMemFree);
3489 ikey = NULL;
3490 }
3491 break;
3492
3493 case AJQLINK_AND:
3494 newtable = ajTableulongNew(fdhits);
3495 ajTableSetDestroyboth(newtable);
3496 while(ajListPop(qry->ResultsList, (void**)&newhit))
3497 {
3498 AJNEW(ikey);
3499 *ikey = (((ajulong)newhit->div) << ishift) +
3500 (ajulong)newhit->annoff;
3501 ajTablePutClean(newtable, ikey, newhit,
3502 &ajMemFree, &ajMemFree);
3503 ikey = NULL;
3504 }
3505 ajTableMergeAnd(qry->ResultsTable, newtable);
3506 ajTableDelValdel(&newtable, &ajMemFree);
3507 break;
3508
3509 case AJQLINK_EOR:
3510 case AJQLINK_ELSE:
3511 newtable = ajTableulongNew(fdhits);
3512 ajTableSetDestroyboth(newtable);
3513 while(ajListPop(qry->ResultsList, (void**)&newhit))
3514 {
3515 AJNEW(ikey);
3516 *ikey = (((ajulong)newhit->div) << ishift) + newhit->annoff;
3517 ajTablePutClean(newtable, ikey, newhit,
3518 &ajMemFree, &ajMemFree);
3519 ikey = NULL;
3520 }
3521
3522 ajTableMergeEor(qry->ResultsTable, newtable);
3523 ajTableDelValdel(&newtable, &ajMemFree);
3524 break;
3525
3526 case AJQLINK_NOT:
3527 newtable = ajTableulongNew(fdhits);
3528 ajTableSetDestroyboth(newtable);
3529 while(ajListPop(qry->ResultsList, (void**)&newhit))
3530 {
3531 AJNEW(ikey);
3532 *ikey = (((ajulong)newhit->div) << ishift) + newhit->annoff;
3533 ajTablePutClean(newtable, ikey, newhit,
3534 &ajMemFree, &ajMemFree);
3535 ikey = NULL;
3536 }
3537 ajTableMergeNot(qry->ResultsTable, newtable);
3538 ajTableDelValdel(&newtable, &ajMemFree);
3539 break;
3540
3541 default:
3542 ajErr("Unexpected query link operator number '%u'",
3543 field->Link);
3544 break;
3545 }
3546
3547 lasthits = fdhits;
3548 }
3549
3550 ajListIterDel(&iter);
3551
3552 ajTableToarrayKeysValues(qry->ResultsTable, (void***) &keys,
3553 (void***)&allhits);
3554 for(i=0; allhits[i]; i++)
3555 {
3556 AJFREE(keys[i]);
3557 ajListPushAppend(qry->ResultsList, (void*) allhits[i]);
3558 }
3559 AJFREE(keys);
3560
3561 ajDebug("ajListSortUnique len:%Lu\n",
3562 ajListGetLength(qry->ResultsList));
3563 ajListSortUnique(qry->ResultsList,
3564 &featCdEntryCmp, &featCdEntryDel);
3565
3566 AJFREE(allhits);
3567
3568 ajDebug("featCdQryQuery clear results table\n");
3569 ajTableClear(qry->ResultsTable);
3570
3571 if(ajListGetLength(qry->ResultsList))
3572 return ajTrue;
3573
3574 return ajFalse;
3575 }
3576
3577
3578
3579
3580 /* @funcstatic featCdEntryCmp *************************************************
3581 **
3582 ** Compares two FeatPEntry objects
3583 **
3584 ** @param [r] pa [const void*] FeatPEntry object
3585 ** @param [r] pb [const void*] FeatPEntry object
3586 ** @return [int] -1 if first entry should sort before second, +1 if the
3587 ** second entry should sort first. 0 if they are identical
3588 **
3589 ** @release 6.5.0
3590 ** @@
3591 ******************************************************************************/
featCdEntryCmp(const void * pa,const void * pb)3592 static int featCdEntryCmp(const void* pa, const void* pb)
3593 {
3594 const FeatPCdEntry a;
3595 const FeatPCdEntry b;
3596
3597 a = *(FeatPCdEntry const *) pa;
3598 b = *(FeatPCdEntry const *) pb;
3599
3600 /*
3601 ajDebug("featCdEntryCmp %x %d %d : %x %d %d\n",
3602 a, a->div, a->annoff,
3603 b, b->div, b->annoff);
3604 */
3605 if(a->div != b->div)
3606 return (a->div - b->div);
3607
3608 return (a->annoff - b->annoff);
3609 }
3610
3611
3612
3613
3614 /* @funcstatic featCdEntryDel***************************************************
3615 **
3616 ** Deletes a FeatPCdEntry object
3617 **
3618 ** @param [r] pentry [void**] Address of a FeatPCdEntry object
3619 ** @param [r] cl [void*] Standard unused argument, usually NULL.
3620 ** @return [void]
3621 **
3622 ** @release 6.5.0
3623 ** @@
3624 ******************************************************************************/
featCdEntryDel(void ** pentry,void * cl)3625 static void featCdEntryDel(void** pentry, void* cl)
3626 {
3627 (void) cl;
3628
3629 AJFREE(*pentry);
3630
3631 return;
3632 }
3633
3634
3635
3636
3637 /* @funcstatic featCdQryNext **************************************************
3638 **
3639 ** Processes the next query for an EMBLCD index
3640 **
3641 ** @param [u] qry [AjPQuery] Query data
3642 ** @return [AjBool] ajTrue if successful
3643 **
3644 ** @release 6.5.0
3645 ** @@
3646 ******************************************************************************/
3647
featCdQryNext(AjPQuery qry)3648 static AjBool featCdQryNext(AjPQuery qry)
3649 {
3650 FeatPCdEntry entry;
3651 FeatPCdQry qryd;
3652 void* item;
3653
3654 qryd = qry->QryData;
3655
3656 if(!ajListGetLength(qry->ResultsList))
3657 return ajFalse;
3658
3659 ajDebug("qry->ResultsList (b) length %Lu\n",
3660 ajListGetLength(qry->ResultsList));
3661 /*ajListTrace(qry->ResultsList);*/
3662 ajListPop(qry->ResultsList, &item);
3663 entry = (FeatPCdEntry) item;
3664
3665 /*
3666 ajDebug("entry: %x div: %d (%d) ann: %d seq: %d\n",
3667 entry, entry->div, qryd->div, entry->annoff, entry->seqoff);
3668 */
3669 qryd->idnum = entry->annoff - 1;
3670
3671 /*
3672 ajDebug("idnum: %d\n", qryd->idnum);
3673 */
3674 qryd->Samefile = ajTrue;
3675
3676 if(entry->div != qryd->div)
3677 {
3678 qryd->Samefile = ajFalse;
3679 qryd->div = entry->div;
3680 /*ajDebug("div: %d\n", qryd->div);*/
3681
3682 if(!featCdQryFile(qry))
3683 return ajFalse;
3684 }
3685
3686 ajDebug("Offsets(cd) %d %d\n", entry->annoff, entry->seqoff);
3687 ajDebug("libr %x\n", qryd->libr);
3688 ajDebug("libr %F\n", qryd->libr);
3689
3690 ajFileSeek(qryd->libr, entry->annoff,0);
3691
3692 if(qryd->libs)
3693 ajFileSeek(qryd->libs, entry->seqoff,0);
3694
3695 AJFREE(entry);
3696
3697 if(!qry->CaseId)
3698 qry->QryDone = ajTrue;
3699
3700 return ajTrue;
3701 }
3702
3703
3704
3705
3706 /* @funcstatic featCdQryClose *************************************************
3707 **
3708 ** Closes query data for an EMBLCD index
3709 **
3710 ** @param [u] qry [AjPQuery] Query data
3711 ** @return [AjBool] ajTrue if all is done
3712 **
3713 ** @release 6.5.0
3714 ** @@
3715 ******************************************************************************/
3716
featCdQryClose(AjPQuery qry)3717 static AjBool featCdQryClose(AjPQuery qry)
3718 {
3719 FeatPCdQry qryd = NULL;
3720
3721 ajDebug("featCdQryClose clean up qryd\n");
3722
3723 qryd = qry->QryData;
3724
3725 if(!qryd)
3726 return ajTrue;
3727
3728 ajCharDel(&qryd->name);
3729 ajStrDel(&qryd->divfile);
3730 ajStrDel(&qryd->idxfile);
3731 ajStrDel(&qryd->datfile);
3732 ajStrDel(&qryd->seqfile);
3733 ajStrDel(&qryd->srcfile);
3734 ajStrDel(&qryd->tblfile);
3735
3736 featCdIdxDel(&qryd->idxLine);
3737 featCdTrgDel(&qryd->trgLine);
3738
3739 featCdFileClose(&qryd->ifp);
3740 featCdFileClose(&qryd->dfp);
3741 /* defined in a buffer, cleared there */
3742 /*
3743 ajFileClose(&qryd->libr);
3744 ajFileClose(&qryd->libs);
3745 */
3746 qryd->libr=0;
3747 qryd->libs=0;
3748 AJFREE(qryd->trgLine);
3749 AJFREE(qryd->idxLine);
3750 AJFREE(qryd->Skip);
3751
3752 /* keep QryData for use at top of loop */
3753
3754 return ajTrue;
3755 }
3756
3757
3758
3759
3760 /* @section B+tree Database Indexing *****************************************
3761 **
3762 ** These functions manage the B+tree index access methods.
3763 **
3764 ******************************************************************************/
3765
3766
3767
3768
3769 /* @funcstatic featEmbossQryReuse *********************************************
3770 **
3771 ** Tests whether the B+tree index query data can be reused or it's finished.
3772 **
3773 ** Clears qryData structure when finished.
3774 **
3775 ** @param [u] qry [AjPQuery] Query data
3776 ** @return [AjBool] ajTrue if reusable,
3777 ** ajFalse if finished.
3778 **
3779 ** @release 6.5.0
3780 ** @@
3781 ******************************************************************************/
3782
featEmbossQryReuse(AjPQuery qry)3783 static AjBool featEmbossQryReuse(AjPQuery qry)
3784 {
3785 FeatPEmbossQry qryd;
3786
3787 qryd = qry->QryData;
3788
3789 if(!qry || !qryd)
3790 return ajFalse;
3791
3792
3793 if(!qry->ResultsList)
3794 {
3795 ajDebug("featEmbossQryReuse: query data all finished\n");
3796
3797 return ajFalse;
3798 }
3799 else
3800 {
3801 ajDebug("featEmbossQryReuse: reusing data from previous call %x\n",
3802 qry->QryData);
3803 /*ajListTrace(qry->ResultsList);*/
3804 }
3805
3806
3807 qryd->nentries = -1;
3808
3809
3810 return ajTrue;
3811 }
3812
3813
3814
3815
3816 /* @funcstatic featEmbossQryOpen **********************************************
3817 **
3818 ** Open caches (etc) for B+tree search
3819 **
3820 ** @param [u] qry [AjPQuery] Query data
3821 ** @return [AjBool] ajTrue if we can continue,
3822 ** ajFalse if all is done.
3823 **
3824 ** @release 6.5.0
3825 ** @@
3826 ******************************************************************************/
3827
featEmbossQryOpen(AjPQuery qry)3828 static AjBool featEmbossQryOpen(AjPQuery qry)
3829 {
3830 FeatPEmbossQry qryd;
3831 ajint i;
3832 AjPStr name = NULL;
3833 AjIList iter = NULL;
3834 AjPQueryField field = NULL;
3835 AjPBtcache cache = NULL;
3836
3837 qry->QryData = AJNEW0(qryd);
3838 qryd = qry->QryData;
3839 qryd->div = -1;
3840 qryd->nentries = -1;
3841
3842 qryd->Caches = ajListNew();
3843
3844 if(!ajStrGetLen(qry->IndexDir))
3845 {
3846 ajDebug("no indexdir defined for database '%S'\n", qry->DbName);
3847 ajErr("no indexdir defined for database '%S'", qry->DbName);
3848 return ajFalse;
3849 }
3850
3851 if(!featEmbossOpenCache(qry,"id",&qryd->idcache))
3852 return ajFalse;
3853
3854 iter = ajListIterNewread(qry->QueryFields);
3855 while(!ajListIterDone(iter))
3856 {
3857 field = ajListIterGet(iter);
3858
3859 ajStrFmtLower(&field->Wildquery);
3860 if(!featEmbossOpenCache(qry, MAJSTRGETPTR(field->Field), &cache))
3861 return ajFalse;
3862 ajListPushAppend(qryd->Caches, cache);
3863 cache = NULL;
3864 }
3865 ajListIterDel(&iter);
3866
3867
3868 ajDebug("directory '%S'fields: %Lu hasacc:%B\n",
3869 qry->IndexDir, ajListGetLength(qry->QueryFields), qry->HasAcc);
3870
3871
3872 if(ajStrGetLen(qry->Exclude) && qryd->nentries >= 0)
3873 {
3874 AJCNEW0(qryd->Skip,qryd->nentries);
3875 name = ajStrNew();
3876
3877 for(i=0; i < qryd->nentries; ++i)
3878 {
3879 ajStrAssignS(&name,qryd->files[i]);
3880
3881 if(!ajFilenameTestInclude(name, qry->Exclude, qry->Filename))
3882 qryd->Skip[i] = ajTrue;
3883 }
3884
3885 ajStrDel(&name);
3886 }
3887
3888 return ajTrue;
3889 }
3890
3891
3892
3893
3894 /* @funcstatic featEmbossOpenCache ********************************************
3895 **
3896 ** Create primary B+tree index cache
3897 **
3898 ** @param [u] qry [AjPQuery] Query data
3899 ** @param [r] ext [const char*] Index file extension
3900 ** @param [w] cache [AjPBtcache*] cache
3901 ** @return [AjBool] True on success
3902 **
3903 ** @release 6.5.0
3904 ** @@
3905 ******************************************************************************/
3906
featEmbossOpenCache(AjPQuery qry,const char * ext,AjPBtcache * cache)3907 static AjBool featEmbossOpenCache(AjPQuery qry, const char *ext,
3908 AjPBtcache *cache)
3909 {
3910 FeatPEmbossQry qryd;
3911 AjPStr indexextname = NULL;
3912
3913 qryd = qry->QryData;
3914
3915 indexextname = ajStrNewS(ajBtreeFieldGetExtensionC(ext));
3916
3917 *cache = ajBtreeCacheNewReadS(qry->DbAlias,indexextname,
3918 qry->IndexDir);
3919 ajStrDel(&indexextname);
3920
3921 if(!*cache)
3922 {
3923 qryd->nentries = -1;
3924
3925 return ajFalse;
3926 }
3927
3928 if(qryd->nentries == -1)
3929 qryd->nentries = ajBtreeReadEntriesS(qry->DbAlias,
3930 qry->IndexDir,
3931 qry->Directory,
3932 &qryd->files,
3933 &qryd->reffiles,
3934 &qryd->refcount);
3935
3936 return ajTrue;
3937 }
3938
3939
3940
3941
3942
3943 /* @funcstatic featEmbossQryEntry *********************************************
3944 **
3945 ** Queries for a single entry in a B+tree index
3946 **
3947 ** @param [u] qry [AjPQuery] Query data
3948 ** @return [AjBool] ajTrue if can continue,
3949 ** ajFalse if all is done.
3950 **
3951 ** @release 6.5.0
3952 ** @@
3953 ******************************************************************************/
3954
featEmbossQryEntry(AjPQuery qry)3955 static AjBool featEmbossQryEntry(AjPQuery qry)
3956 {
3957 AjPBtHitref newhit = NULL;
3958 FeatPEmbossQry qryd;
3959 const AjPList fdlist;
3960 const AjPList cachelist;
3961 AjIList iter;
3962 AjIList icache;
3963 AjPBtcache cache;
3964 AjPQueryField fd;
3965 AjPBtHitref *allhits = NULL;
3966 ajuint i;
3967
3968 ajDebug("featEmbossQryEntry fields: %Lu hasacc:%B\n",
3969 ajListGetLength(qry->QueryFields), qry->HasAcc);
3970
3971 qryd = qry->QryData;
3972
3973 fdlist = ajQueryGetallFields(qry);
3974 cachelist = qryd->Caches;
3975
3976 iter= ajListIterNewread(fdlist);
3977 icache = ajListIterNewread(cachelist);
3978 while(!ajListIterDone(iter))
3979 {
3980 fd = ajListIterGet(iter);
3981 cache = ajListIterGet(icache);
3982
3983 ajDebug("qry type:%d field '%S' wild '%S'\n",
3984 fd->Link, fd->Field, fd->Wildquery);
3985
3986 if((fd->Link == AJQLINK_ELSE) && ajListGetLength(qry->ResultsList))
3987 continue;
3988
3989 if(!ajBtreeCacheIsSecondary(cache))
3990 {
3991 ajBtreeIdentFetchHitref(cache,fd->Wildquery,
3992 qry->ResultsList);
3993 }
3994 }
3995
3996 ajListIterDel(&iter);
3997 ajListIterDel(&icache);
3998
3999 if(ajStrGetLen(qry->Organisms))
4000 {
4001 ajTableSetDestroy(qry->ResultsTable, NULL, &ajBtreeHitrefDelVoid);
4002 ajTableSettypeUser(qry->ResultsTable,
4003 &ajBtreeHitrefCmp, &ajBtreeHitrefHash);
4004
4005 while(ajListPop(qry->ResultsList, (void**)&newhit))
4006 ajTablePutClean(qry->ResultsTable, newhit, newhit,
4007 NULL, &ajBtreeHitrefDelVoid);
4008
4009 featEmbossQryOrganisms(qry);
4010
4011 ajTableToarrayValues(qry->ResultsTable, (void***)&allhits);
4012
4013 for(i=0; allhits[i]; i++)
4014 ajListPushAppend(qry->ResultsList, (void*) allhits[i]);
4015 }
4016
4017 if(!ajListGetLength(qry->ResultsList))
4018 return ajFalse;
4019
4020 if(!qry->CaseId)
4021 qry->QryDone = ajTrue;
4022
4023 return ajTrue;
4024 }
4025
4026
4027
4028
4029 /* @funcstatic featEmbossQryNext **********************************************
4030 **
4031 ** Processes the next query for a B+tree index
4032 **
4033 ** @param [u] qry [AjPQuery] Query data
4034 ** @return [AjBool] ajTrue if successful
4035 **
4036 ** @release 6.5.0
4037 ** @@
4038 ******************************************************************************/
4039
featEmbossQryNext(AjPQuery qry)4040 static AjBool featEmbossQryNext(AjPQuery qry)
4041 {
4042 AjPBtHitref entry;
4043 FeatPEmbossQry qryd;
4044 void* item;
4045 AjBool ok = ajFalse;
4046
4047
4048 qryd = qry->QryData;
4049
4050 if(!ajListGetLength(qry->ResultsList))
4051 return ajFalse;
4052
4053 /*ajListTrace(qry->ResultsList);*/
4054
4055
4056 if(!qryd->Skip)
4057 {
4058 ajListPop(qry->ResultsList, &item);
4059 entry = (AjPBtHitref) item;
4060 }
4061 else
4062 {
4063 ok = ajFalse;
4064
4065 while(!ok)
4066 {
4067 ajListPop(qry->ResultsList, &item);
4068 entry = (AjPBtHitref) item;
4069
4070 if(!qryd->Skip[entry->dbno])
4071 ok = ajTrue;
4072 else
4073 {
4074 ajBtreeHitrefDel(&entry);
4075
4076 if(!ajListGetLength(qry->ResultsList))
4077 return ajFalse;
4078 }
4079 }
4080 }
4081
4082 qryd->Samefile = ajTrue;
4083
4084 if(entry->dbno != qryd->div)
4085 {
4086 qryd->Samefile = ajFalse;
4087 qryd->div = entry->dbno;
4088 ajFileClose(&qryd->libs);
4089
4090 if(qryd->reffiles)
4091 ajFileClose(&qryd->libr);
4092 }
4093
4094 if(!qryd->libs)
4095 {
4096 qryd->libs = ajFileNewInNameS(qryd->files[entry->dbno]);
4097
4098 if(!qryd->libs)
4099 {
4100 ajBtreeHitrefDel(&entry);
4101
4102 return ajFalse;
4103 }
4104 }
4105
4106 if(qryd->reffiles && !qryd->libr)
4107 {
4108 ajFileClose(&qryd->libr);
4109 qryd->libr = ajFileNewInNameS(qryd->reffiles[0][entry->dbno]);
4110
4111 if(!qryd->libr)
4112 {
4113 ajBtreeHitrefDel(&entry);
4114
4115 return ajFalse;
4116 }
4117 }
4118
4119
4120 ajFileSeek(qryd->libs, (ajlong) entry->offset, 0);
4121 if(qryd->reffiles)
4122 ajFileSeek(qryd->libr, (ajlong) entry->refoffset, 0);
4123
4124 ajBtreeHitrefDel(&entry);
4125
4126 if(!qry->CaseId)
4127 qry->QryDone = ajTrue;
4128
4129 return ajTrue;
4130 }
4131
4132
4133
4134
4135 /* @funcstatic featEmbossQryClose *********************************************
4136 **
4137 ** Closes query data for a B+tree index
4138 **
4139 ** @param [u] qry [AjPQuery] Query data
4140 ** @return [AjBool] ajTrue if we can continue,
4141 ** ajFalse if all is done.
4142 **
4143 ** @release 6.5.0
4144 ** @@
4145 ******************************************************************************/
4146
featEmbossQryClose(AjPQuery qry)4147 static AjBool featEmbossQryClose(AjPQuery qry)
4148 {
4149 FeatPEmbossQry qryd;
4150 void* item;
4151 ajint i;
4152 ajuint iref;
4153
4154 if(!qry)
4155 return ajFalse;
4156
4157 if(!qry->QryData)
4158 return ajFalse;
4159
4160 ajDebug("featEmbossQryClose clean up qryd\n");
4161
4162 qryd = qry->QryData;
4163
4164 while(ajListGetLength(qryd->Caches))
4165 {
4166 ajListPop(qryd->Caches, &item);
4167 ajBtreeCacheDel((AjPBtcache*) &item);
4168 }
4169 ajListFree(&qryd->Caches);
4170 ajBtreeCacheDel(&qryd->idcache);
4171
4172 ajListFree(&qry->ResultsList);
4173
4174 if(qryd->Skip)
4175 {
4176 AJFREE(qryd->Skip);
4177 qryd->Skip = NULL;
4178 }
4179
4180 if(qryd->files)
4181 {
4182 i = 0;
4183
4184 while(qryd->files[i])
4185 {
4186 ajStrDel(&qryd->files[i]);
4187
4188 if(qryd->reffiles)
4189 {
4190 for(iref=0; iref < qryd->refcount; iref++)
4191 ajStrDel(&qryd->reffiles[iref][i]);
4192 }
4193
4194 ++i;
4195 }
4196
4197 AJFREE(qryd->files);
4198 }
4199
4200 if(qryd->reffiles)
4201 {
4202 for(iref=0; iref < qryd->refcount; iref++)
4203 AJFREE(qryd->reffiles[iref]);
4204
4205 AJFREE(qryd->reffiles);
4206 }
4207
4208 qryd->files = NULL;
4209 qryd->reffiles = NULL;
4210
4211
4212 /* keep QryData for use at top of loop */
4213
4214 return ajTrue;
4215 }
4216
4217
4218
4219
4220 /* @funcstatic featEmbossQryOrganisms *****************************************
4221 **
4222 ** Restricts results to matches to organism(s) in database
4223 **
4224 ** @param [u] qry [AjPQuery] Query data
4225 ** @return [AjBool] ajTrue on success.
4226 **
4227 ** @release 6.5.0
4228 ** @@
4229 ******************************************************************************/
4230
featEmbossQryOrganisms(AjPQuery qry)4231 static AjBool featEmbossQryOrganisms(AjPQuery qry)
4232 {
4233 FeatPEmbossQry qryd;
4234 AjPBtcache orgcache;
4235 AjPStr orgqry = NULL;
4236 AjPStrTok orghandle = NULL;
4237 AjPTable orgtable = NULL;
4238 AjPList orglist = NULL;
4239 AjPBtHitref newhit;
4240 ajulong fdhits = 0UL;
4241
4242 if(!ajStrGetLen(qry->Organisms))
4243 return ajTrue;
4244
4245 qryd = qry->QryData;
4246
4247 featEmbossOpenCache(qry, "org", &orgcache);
4248 orglist = ajListNew();
4249 orghandle = ajStrTokenNewC(qry->Organisms, "\t,;|");
4250 while(ajStrTokenNextParse(orghandle, &orgqry))
4251 {
4252 if(ajBtreeCacheIsSecondary(orgcache))
4253 {
4254 if(!qry->Wild)
4255 {
4256 ajBtreeKeyFetchHitref(orgcache,qryd->idcache,
4257 orgqry, orglist);
4258
4259 }
4260 else
4261 {
4262 ajBtreeKeyFetchwildHitref(orgcache, qryd->idcache,
4263 orgqry, orglist);
4264 }
4265 }
4266 else
4267 {
4268 ajBtreeIdentFetchwildHitref(orgcache,
4269 orgqry, orglist);
4270 }
4271
4272 fdhits += ajListGetLength(orglist);
4273 ajDebug("Organisms list orgqry '%S' list '%Lu'", orgqry, fdhits);
4274
4275 }
4276
4277 orgtable = ajTableNewFunctionLen(fdhits,
4278 &ajBtreeHitrefCmp,
4279 &ajBtreeHitrefHash,
4280 NULL, &ajBtreeHitrefDelVoid);
4281 while(ajListPop(orglist, (void**)&newhit))
4282 ajTablePutClean(orgtable, newhit, newhit,
4283 NULL, &ajBtreeHitrefDelVoid);
4284
4285 ajStrTokenDel(&orghandle);
4286
4287 ajTableMergeAnd(qry->ResultsTable, orgtable);
4288 ajListFree(&orglist);
4289 ajBtreeCacheDel(&orgcache);
4290 ajTableFree(&orgtable);
4291 ajStrDel(&orgqry);
4292
4293 return ajTrue;
4294 }
4295
4296
4297
4298
4299 /* @funcstatic featEmbossQryQuery *********************************************
4300 **
4301 ** Queries for one or more entries in an EMBOSS B+tree index
4302 **
4303 ** @param [u] qry [AjPQuery] Query data
4304 ** @return [AjBool] ajTrue if we can continue,
4305 ** ajFalse if all is done.
4306 **
4307 ** @release 6.5.0
4308 ** @@
4309 ******************************************************************************/
4310
featEmbossQryQuery(AjPQuery qry)4311 static AjBool featEmbossQryQuery(AjPQuery qry)
4312 {
4313 FeatPEmbossQry qryd;
4314
4315 const AjPList fdlist;
4316 const AjPList cachelist;
4317 AjIList iter;
4318 AjIList icache;
4319 AjPBtcache cache;
4320 AjPQueryField fd;
4321
4322 AjPBtHitref newhit;
4323 AjPBtHitref *allhits = NULL;
4324 AjPTable newtable = NULL;
4325
4326 ajuint i;
4327 ajulong lasthits = 0UL;
4328 ajulong fdhits = 0UL;
4329
4330 if(!qry->CaseId)
4331 qry->QryDone = ajTrue;
4332
4333 qryd = qry->QryData;
4334
4335 cachelist = qryd->Caches;
4336
4337 ajTableSetDestroy(qry->ResultsTable, NULL, &ajBtreeHitrefDelVoid);
4338 ajTableSettypeUser(qry->ResultsTable,
4339 &ajBtreeHitrefCmp, &ajBtreeHitrefHash);
4340
4341 fdlist = ajQueryGetallFields(qry);
4342
4343 iter = ajListIterNewread(fdlist);
4344 icache = ajListIterNewread(cachelist);
4345 while(!ajListIterDone(iter))
4346 {
4347 fd = ajListIterGet(iter);
4348 cache = ajListIterGet(icache);
4349
4350 ajDebug("field '%S' query: '%S'\n", fd->Field, fd->Wildquery);
4351
4352 if((fd->Link == AJQLINK_ELSE) && (lasthits > 0UL))
4353 {
4354 continue;
4355 }
4356
4357 /* is this a primary or secondary key (check the cache)? */
4358
4359 if(ajBtreeCacheIsSecondary(cache))
4360 {
4361 if(!qry->Wild)
4362 {
4363 ajBtreeKeyFetchHitref(cache, qryd->idcache,
4364 fd->Wildquery, qry->ResultsList);
4365 }
4366 else
4367 {
4368 ajBtreeKeyFetchwildHitref(cache, qryd->idcache,
4369 fd->Wildquery, qry->ResultsList);
4370 }
4371 }
4372 else
4373 {
4374 ajBtreeIdentFetchwildHitref(cache,fd->Wildquery,qry->ResultsList);
4375 ajDebug("ajBtreeIdentFetchwild results:%Lu\n",
4376 ajListGetLength(qry->ResultsList));
4377 }
4378
4379 fdhits = ajListGetLength(qry->ResultsList);
4380
4381 switch(fd->Link)
4382 {
4383 case AJQLINK_INIT:
4384 while(ajListPop(qry->ResultsList, (void**)&newhit))
4385 ajTablePutClean(qry->ResultsTable, newhit, newhit,
4386 NULL, &ajBtreeHitrefDelVoid);
4387 break;
4388
4389 case AJQLINK_OR:
4390 newtable = ajTableNewFunctionLen(fdhits,
4391 &ajBtreeHitrefCmp,
4392 &ajBtreeHitrefHash,
4393 NULL, &ajBtreeHitrefDelVoid);
4394 while(ajListPop(qry->ResultsList, (void**)&newhit))
4395 ajTablePutClean(newtable, newhit, newhit,
4396 NULL, &ajBtreeHitrefDelVoid);
4397
4398 ajTableMergeOr(qry->ResultsTable, newtable);
4399 ajTableDel(&newtable);
4400 break;
4401
4402 case AJQLINK_AND:
4403 newtable = ajTableNewFunctionLen(fdhits,
4404 &ajBtreeHitrefCmp,
4405 &ajBtreeHitrefHash,
4406 NULL, &ajBtreeHitrefDelVoid);
4407 while(ajListPop(qry->ResultsList, (void**)&newhit))
4408 ajTablePutClean(newtable, newhit, newhit,
4409 NULL, &ajBtreeHitrefDelVoid);
4410
4411 ajTableMergeAnd(qry->ResultsTable, newtable);
4412 ajTableDel(&newtable);
4413 break;
4414
4415 case AJQLINK_EOR:
4416 case AJQLINK_ELSE:
4417 newtable = ajTableNewFunctionLen(fdhits,
4418 &ajBtreeHitrefCmp,
4419 &ajBtreeHitrefHash,
4420 NULL, ajBtreeHitrefDelVoid);
4421 while(ajListPop(qry->ResultsList, (void**)&newhit))
4422 ajTablePutClean(newtable, newhit, newhit,
4423 NULL, &ajBtreeHitrefDelVoid);
4424
4425 ajTableMergeEor(qry->ResultsTable, newtable);
4426 ajTableDel(&newtable);
4427 break;
4428
4429 case AJQLINK_NOT:
4430 newtable = ajTableNewFunctionLen(fdhits,
4431 &ajBtreeHitrefCmp,
4432 &ajBtreeHitrefHash,
4433 NULL, &ajBtreeHitrefDelVoid);
4434 while(ajListPop(qry->ResultsList, (void**)&newhit))
4435 ajTablePutClean(newtable, newhit, newhit,
4436 NULL, &ajBtreeHitrefDelVoid);
4437
4438 ajTableMergeNot(qry->ResultsTable, newtable);
4439 ajTableDel(&newtable);
4440 break;
4441
4442 default:
4443 ajErr("Unexpected query link operator number '%u'",
4444 fd->Link);
4445 break;
4446 }
4447
4448 lasthits = fdhits;
4449 }
4450
4451 ajListIterDel(&iter);
4452 ajListIterDel(&icache);
4453
4454 if(ajStrGetLen(qry->Organisms))
4455 featEmbossQryOrganisms(qry);
4456
4457 ajTableToarrayValues(qry->ResultsTable, (void***)&allhits);
4458 for(i=0; allhits[i]; i++)
4459 ajListPushAppend(qry->ResultsList, (void*) allhits[i]);
4460
4461 AJFREE(allhits);
4462
4463 ajTableClear(qry->ResultsTable);
4464
4465 if(ajListGetLength(qry->ResultsList))
4466 return ajTrue;
4467
4468 return ajFalse;
4469 }
4470
4471
4472
4473
4474 /* @section CHADO Indexing ****************************************************
4475 **
4476 ** These functions manage the CHADO database access methods.
4477 **
4478 ******************************************************************************/
4479
4480
4481
4482
4483 /* @funcstatic featAccessChado ************************************************
4484 **
4485 ** Reads features from CHADO databases
4486 **
4487 ** @param [u] fttabin [AjPFeattabin] Feature input.
4488 ** @return [AjBool] ajTrue on success.
4489 **
4490 ** @release 6.4.0
4491 ** @@
4492 ******************************************************************************/
4493
featAccessChado(AjPFeattabin fttabin)4494 static AjBool featAccessChado(AjPFeattabin fttabin)
4495 {
4496 AjPQuery qry = NULL;
4497 AjPStr seqid = NULL;
4498
4499 AjPStr filterseqregions = NULL;
4500
4501 AjIList fielditer = NULL;
4502 AjPQueryField field = NULL;
4503 AjPFeattable feattab = NULL;
4504
4505 AjPStr fieldname = NULL;
4506 AjPStr condition = NULL;
4507
4508 AjPSqlconnection connection = NULL;
4509
4510 AjBool ret = ajTrue;
4511
4512 #if !defined(HAVE_MYSQL) && !defined(HAVE_POSTGRESQL)
4513 ajWarn("Cannot use access method chado without mysql or postgresql");
4514 return ajFalse;
4515 #endif
4516
4517 if(fttabin->Input->Records)
4518 return ajFalse;
4519
4520 qry = fttabin->Input->Query;
4521
4522 ajDebug("featAccesschado: %S fields: %Lu\n",
4523 qry->DbAlias, ajListGetLength(qry->QueryFields));
4524
4525 filterseqregions = ajStrNew();
4526
4527 fielditer = ajListIterNewread(qry->QueryFields);
4528
4529 while(!ajListIterDone(fielditer))
4530 {
4531 field = ajListIterGet(fielditer);
4532
4533 ajStrAssignS(&fieldname, field->Field);
4534
4535 ajDebug("field:%S - val:%S\n", field->Field, field->Wildquery);
4536
4537 if(ajStrMatchCaseC(field->Field, "id"))
4538 {
4539 ajStrAssignS(&seqid, field->Wildquery);
4540 ajStrAssignC(&fieldname, "uniquename");
4541 }
4542
4543 if(ajStrFindAnyC(field->Wildquery,"*?") != -1)
4544 {
4545 ajStrExchangeKK(&field->Wildquery,'*','%');
4546 ajStrExchangeKK(&field->Wildquery,'?','_');
4547 ajFmtPrintS(&condition," %S LIKE '%S'", fieldname,
4548 field->Wildquery);
4549 }
4550 else
4551 ajFmtPrintS(&condition,"%S = '%S'", fieldname,
4552 field->Wildquery);
4553
4554 if (ajStrGetLen(filterseqregions))
4555 ajStrAppendC(&filterseqregions," AND ");
4556
4557 ajStrAppendS(&filterseqregions, condition);
4558
4559 ajStrDel(&fieldname);
4560 ajStrDel(&condition);
4561 }
4562
4563 ajListIterDel(&fielditer);
4564
4565 ajDebug("dbfilter: %S\n", qry->DbFilter);
4566
4567 if(ajStrGetLen(qry->DbFilter))
4568 ajFmtPrintAppS(&filterseqregions, " AND %S", qry->DbFilter);
4569
4570 ajDebug("filter: %S\n", filterseqregions);
4571
4572 feattab = ajFeattableNew(seqid);
4573 connection = featChadoConnect(qry);
4574
4575 if(!ajStrGetLen(feattab->Db))
4576 ajStrAssignS(&feattab->Db, qry->DbName);
4577
4578 ret = featChadoQryfeatureQuery(connection, filterseqregions, feattab,
4579 fttabin->Start, fttabin->End);
4580
4581 ajSqlconnectionDel(&connection);
4582
4583 fttabin->Input->TextData = feattab;
4584
4585 qry->QryDone = ajTrue;
4586
4587 ajStrDel(&seqid);
4588 ajStrDel(&filterseqregions);
4589
4590 return ret;
4591 }
4592
4593
4594
4595
4596 /* @funcstatic featChadoQryfeatureQuery ***************************************
4597 **
4598 ** Queries the connected chado database for the emboss feature query.
4599 ** More queries are made for the child features of the queried feature(s)
4600 ** using function featChadoChildfeatureQuery.
4601 **
4602 ** @param [u] connection [AjPSqlconnection] SQL Database connection
4603 ** @param [u] filter [AjPStr] Filter conditions for the SQL query
4604 ** @param [u] feattab [AjPFeattable] Feature table
4605 ** @param [r] qrystart [ajint] start pos specified as part of the feature query
4606 ** @param [r] qryend [ajint] end pos specified as part of the feature query
4607 ** @return [AjBool] returns ajFalse if no features found
4608 **
4609 ** @release 6.4.0
4610 ** @@
4611 ******************************************************************************/
4612
featChadoQryfeatureQuery(AjPSqlconnection connection,AjPStr filter,AjPFeattable feattab,ajint qrystart,ajint qryend)4613 static AjBool featChadoQryfeatureQuery(AjPSqlconnection connection,
4614 AjPStr filter,
4615 AjPFeattable feattab,
4616 ajint qrystart, ajint qryend)
4617 {
4618 AjPSqlstatement statement = NULL;
4619 AjBool debugOn = ajFalse;
4620 ajint i = 0;
4621 AjPStr sql = NULL;
4622
4623 AjISqlrow iter = NULL;
4624 AjPSqlrow row = NULL;
4625 AjPStr colstr = NULL;
4626 AjPStr rowstr = NULL;
4627 AjPStr regionuniquename = NULL;
4628
4629 AjBool ret = ajTrue;
4630
4631 debugOn = ajDebugOn();
4632 sql = ajStrNew();
4633
4634 ajFmtPrintS(&sql,
4635 "SELECT feature.name, feature.uniquename,\n"
4636 " (SELECT COUNT(*) FROM featureloc"
4637 " WHERE feature.feature_id = featureloc.srcfeature_id),\n"
4638 " srcfeatureloc.srcfeature_id,"
4639 " srcfeature.uniquename,"
4640 " srcfeatureloc.fmin,"
4641 " srcfeatureloc.fmax,"
4642 " srcfeatureloc.strand, srcfeatureloc.phase,\n"
4643 " feature.seqlen,"
4644 " cvterm.name, db.name,"
4645 " feature.timelastmodified, feature.is_obsolete\n"
4646 " FROM feature\n"
4647 " LEFT JOIN cvterm ON cvterm.cvterm_id = feature.type_id\n"
4648 " LEFT OUTER JOIN dbxref ON dbxref.dbxref_id = feature.dbxref_id\n"
4649 " LEFT OUTER JOIN db USING (db_id)\n"
4650 " LEFT OUTER JOIN featureloc srcfeatureloc"
4651 " ON feature.feature_id = srcfeatureloc.feature_id\n"
4652 " LEFT OUTER JOIN feature srcfeature"
4653 " ON srcfeature.feature_id = srcfeatureloc.srcfeature_id\n"
4654 " WHERE feature.%S\n",
4655 filter
4656 );
4657
4658 ajDebug("SQL, query-feature query:\n%S\n",sql);
4659
4660 statement = ajSqlstatementNewRun(connection,sql);
4661
4662 if(!statement)
4663 {
4664 ajErr("Could not execute SQL statement [%S]", sql);
4665 ajExitBad();
4666 return ajFalse;
4667 }
4668
4669 iter = ajSqlrowiterNew(statement);
4670
4671 if(!iter)
4672 ret = ajFalse;
4673
4674 while(!ajSqlrowiterDone(iter))
4675 {
4676 row = ajSqlrowiterGet(iter);
4677
4678 if(debugOn)
4679 {
4680 while(row->Current < row->Columns)
4681 {
4682 ajSqlcolumnToStr(row,&colstr);
4683 ajFmtPrintAppS(&rowstr, "\t%S",colstr);
4684 }
4685
4686 ajDebug("qryfeature row %d: %S\n", ++i, rowstr);
4687 ajStrSetClear(&rowstr);
4688 }
4689
4690 ajSqlcolumnRewind(row);
4691
4692 regionuniquename = featChadoQryfeatureRow(feattab, row,
4693 qrystart, qryend);
4694 featChadoChildfeatureQuery(connection, feattab, regionuniquename);
4695 ajStrDel(®ionuniquename);
4696 }
4697
4698 ajSqlrowiterDel(&iter);
4699 ajSqlstatementDel(&statement);
4700 ajStrDel(&colstr);
4701 ajStrDel(&rowstr);
4702 ajStrDel(&sql);
4703
4704 return ret;
4705 }
4706
4707
4708
4709
4710 /* @funcstatic featChadoChildfeatureQuery *************************************
4711 **
4712 ** Query to retrieve features located on the feature specified
4713 ** using the srcfeature parameter
4714 **
4715 ** @param [u] connection [AjPSqlconnection] SQL Database connection
4716 ** @param [u] feattab [AjPFeattable] Feature table
4717 ** @param [r] srcfeature [const AjPStr] Unique name of the sequence region
4718 ** @return [void]
4719 **
4720 ** @release 6.4.0
4721 ** @@
4722 ******************************************************************************/
4723
featChadoChildfeatureQuery(AjPSqlconnection connection,AjPFeattable feattab,const AjPStr srcfeature)4724 static void featChadoChildfeatureQuery(AjPSqlconnection connection,
4725 AjPFeattable feattab,
4726 const AjPStr srcfeature)
4727 {
4728 AjPSqlstatement statement = NULL;
4729 AjBool debugOn = ajFalse;
4730 ajint i = 0;
4731
4732 AjISqlrow iter = NULL;
4733 AjPSqlrow row = NULL;
4734 AjPStr colstr = NULL;
4735 AjPStr rowstr = NULL;
4736 AjPStr sql = NULL;
4737 AjPFeature seqregionf = NULL;
4738
4739 debugOn = ajDebugOn();
4740 sql = ajStrNew();
4741
4742 ajListPeekLast(feattab->Features, (void**)&seqregionf);
4743
4744 /*
4745 * in SQL query below, feature refers to the sequence region being queried
4746 * sfeature refers to the features located within the sequence region,
4747 *
4748 * it is assumed that results are ordered such that features with the same
4749 * uniquename follows each other, we should check whether we need to use
4750 * explicit ORDER BY for any reason...
4751 */
4752
4753 ajFmtPrintS(&sql,
4754 "SELECT sfeature.name, sfeature.uniquename, relation.name,"
4755 " object.uniquename, object_type.name, featureloc.fmin,"
4756 " featureloc.fmax, featureloc.strand, featureloc.phase,\n"
4757 " cvterm.name, db.name, analysis.program, analysisfeature.rawscore,"
4758 " sfeature.timelastmodified, sfeature.is_obsolete\n"
4759 " FROM feature\n"
4760 " JOIN featureloc"
4761 " ON feature.feature_id = featureloc.srcfeature_id\n"
4762 " JOIN feature sfeature"
4763 " ON sfeature.feature_id = featureloc.feature_id\n"
4764 " LEFT OUTER JOIN feature_relationship"
4765 " ON sfeature.feature_id = feature_relationship.subject_id\n"
4766 " LEFT OUTER JOIN cvterm relation"
4767 " ON relation.cvterm_id = feature_relationship.type_id\n"
4768 " LEFT OUTER JOIN feature object"
4769 " ON object.feature_id = feature_relationship.object_id\n"
4770 " LEFT OUTER JOIN cvterm object_type"
4771 " ON object.type_id = object_type.cvterm_id\n"
4772 " JOIN cvterm ON cvterm.cvterm_id = sfeature.type_id\n"
4773 " LEFT OUTER JOIN dbxref ON dbxref.dbxref_id = sfeature.dbxref_id\n"
4774 " LEFT OUTER JOIN db USING (db_id)\n"
4775 " LEFT OUTER JOIN analysisfeature"
4776 " ON sfeature.feature_id=analysisfeature.feature_id\n"
4777 " LEFT OUTER JOIN analysis USING(analysis_id)\n"
4778 " WHERE \n"
4779 " feature.uniquename='%S'"
4780 " AND"
4781 " ((featureloc.fmin>%d AND featureloc.fmin<%d)"
4782 " OR"
4783 " (featureloc.fmax>=%d AND featureloc.fmax<=%d))\n",
4784 srcfeature,
4785 seqregionf->Start, seqregionf->End,
4786 seqregionf->Start, seqregionf->End);
4787
4788 ajDebug("SQL features:\n%S\n",sql);
4789
4790 statement = ajSqlstatementNewRun(connection,sql);
4791
4792 if(!statement)
4793 {
4794 ajErr("Could not execute SQL statement [%S]", sql);
4795 ajExitAbort();
4796 }
4797
4798 iter = ajSqlrowiterNew(statement);
4799
4800 while(!ajSqlrowiterDone(iter))
4801 {
4802 row = ajSqlrowiterGet(iter);
4803
4804 if(debugOn)
4805 {
4806 while(row->Current < row->Columns)
4807 {
4808 ajSqlcolumnToStr(row,&colstr);
4809 ajFmtPrintAppS(&rowstr, "\t%S",colstr);
4810 }
4811
4812 ajDebug("row %d: %S\n", ++i, rowstr);
4813 ajStrSetClear(&rowstr);
4814 }
4815
4816 ajSqlcolumnRewind(row);
4817
4818 featChadoChildfeatureRow(feattab, row);
4819 }
4820
4821 ajSqlrowiterDel(&iter);
4822 ajSqlstatementDel(&statement);
4823 ajStrDel(&colstr);
4824 ajStrDel(&rowstr);
4825 ajStrDel(&sql);
4826
4827 return;
4828 }
4829
4830
4831
4832
4833 /* @funcstatic featChadoChildfeatureRow ***************************************
4834 **
4835 ** Generates an emboss feature obj from a row of chado query results, and
4836 ** appends it to the feature table fttab.
4837 **
4838 ** @param [u] fttab [AjPFeattable] Feature table
4839 ** @param [u] row [AjPSqlrow] Input row
4840 ** @return [AjPFeature] New feature
4841 **
4842 ** @release 6.4.0
4843 ** @@
4844 ******************************************************************************/
4845
featChadoChildfeatureRow(AjPFeattable fttab,AjPSqlrow row)4846 static AjPFeature featChadoChildfeatureRow(AjPFeattable fttab, AjPSqlrow row)
4847 {
4848 AjPFeature feature = NULL;
4849 AjPFeature prevft = NULL;
4850
4851 AjPStr name = NULL;
4852 AjPStr type = NULL;
4853 AjPStr source = NULL;
4854 AjPStr sourcedb = NULL;
4855 AjPStr sourceprogram = NULL;
4856 AjPTagval idtag = NULL;
4857
4858 ajint start = 0;
4859 ajint end = 0;
4860 float score = 0;
4861 char strand = '+';
4862 ajint i = 0;
4863 ajint frame = 0;
4864
4865 AjPStr entryid = NULL;
4866 AjBool updateprevft = ajFalse;
4867 AjPStr alias = NULL;
4868 AjPStr relation = NULL;
4869
4870 /* The object can also be thought of as parent (containing feature),
4871 * and subject as child (contained feature or subfeature). ref:gmod.org */
4872
4873 AjPStr object = NULL;
4874 AjPStr objecttype = NULL;
4875 AjPTime timelm = NULL;
4876 AjPStr timelmS = NULL;
4877 AjBool isObsolete = ajFalse;
4878 AjPStr isObsoleteS = NULL;
4879
4880 if(!ajSqlrowGetColumns(row))
4881 return NULL;
4882
4883 timelm = ajTimeNew();
4884
4885 ajSqlcolumnToStr(row, &name);
4886 ajSqlcolumnToStr(row, &entryid);
4887 ajSqlcolumnToStr(row, &relation);
4888 ajSqlcolumnToStr(row, &object);
4889 ajSqlcolumnToStr(row, &objecttype);
4890 ajSqlcolumnToInt(row, &start);
4891 ajSqlcolumnToInt(row, &end);
4892
4893 ajSqlcolumnToInt(row, &i);
4894 if( i == 1 )
4895 strand = '+';
4896 else if( i == -1 )
4897 strand = '-';
4898 else
4899 strand = '\0'; /* change to \0 later */
4900
4901 ajSqlcolumnToInt(row, &frame);
4902
4903 ajSqlcolumnToStr(row, &type);
4904 ajSqlcolumnToStr(row, &sourcedb);
4905 ajSqlcolumnToStr(row, &sourceprogram);
4906 ajSqlcolumnToFloat(row, &score);
4907 /*ajSqlcolumnToStr(row, &alias);*/
4908 ajSqlcolumnToTime(row, &timelm);
4909 ajSqlcolumnToBool(row, &isObsolete);
4910
4911 if(ajStrGetLen(sourcedb))
4912 ajStrAssignS(&source, sourcedb);
4913 else if(ajStrGetLen(sourceprogram))
4914 ajStrAssignS(&source, sourceprogram);
4915 else
4916 source = ajStrNewC(".");
4917
4918 ajListPeekLast(fttab->Features, (void**)&prevft);
4919
4920 if(++start==(ajint)prevft->Start && end==(ajint)prevft->End)
4921 {
4922 ajListPeekFirst(prevft->GffTags, (void**)&idtag);
4923 if(ajStrMatchS(entryid, MAJTAGVALGETVALUE(idtag)))
4924 {
4925 updateprevft = ajTrue;
4926 feature = prevft;
4927 }
4928 }
4929
4930 if(!updateprevft)
4931 {
4932 feature = ajFeatNewNuc(fttab,
4933 source,
4934 type,
4935 start,
4936 end,
4937 score,
4938 strand,
4939 frame,
4940 0,0,0, NULL, NULL);
4941
4942 ajFeatGfftagAddCS(feature, "ID", entryid);
4943 ajFeatGfftagAddCS(feature, "Name", name);
4944 /*ajFeatGfftagAddC(feature, "Alias", alias);*/
4945
4946 /* FIXME: it looks we replace all type 'synonyms' with SO:0000110
4947 * following tag is a workaround until it is fixed */
4948 if(ajStrMatchC(feature->Type, "SO:0000110"))
4949 ajFeatGfftagAddCS(feature, "type", type);
4950 }
4951
4952 if(ajStrMatchC(relation, "part_of") ||
4953 ajStrMatchC(relation, "proper_part_of") ||
4954 ajStrMatchC(relation, "partof") ||
4955 ajStrMatchC(relation, "producedby"))
4956 {
4957 ajFeatGfftagAddCS(feature, "Parent", object);
4958 ajFeatGfftagAddCS(feature, "parent_type", objecttype);
4959 }
4960 else if(ajStrMatchC(relation, "derives_from"))
4961 {
4962 ajFeatGfftagAddCS(feature, "Derives_from", object);
4963 ajFeatGfftagAddCS(feature, "parent_type", objecttype);
4964 }
4965 else if(ajStrGetLen(relation))
4966 {
4967 ajFeatGfftagAddSS(feature, relation, object);
4968 ajFeatGfftagAddCS(feature, "object_type", objecttype);
4969 }
4970
4971
4972 if(!updateprevft)
4973 {
4974 if(timelm)
4975 {
4976 ajFmtPrintS(&timelmS,"%D",timelm);
4977 ajFeatGfftagAddCS(feature,"timelastmodified", timelmS);
4978 }
4979
4980 if(isObsolete)
4981 {
4982 isObsoleteS = ajStrNewC("true");
4983 ajFeatGfftagAddCS(feature,"isObsolete", isObsoleteS);
4984 ajStrDel(&isObsoleteS);
4985 }
4986 }
4987
4988 ajFeatTrace(feature);
4989
4990 ajStrDel(&name);
4991 ajStrDel(&entryid);
4992 ajStrDel(&relation);
4993 ajStrDel(&object);
4994 ajStrDel(&objecttype);
4995 ajStrDel(&type);
4996 ajStrDel(&source);
4997 ajStrDel(&sourcedb);
4998 ajStrDel(&sourceprogram);
4999 ajStrDel(&alias);
5000 ajTimeDel(&timelm);
5001 ajStrDel(&timelmS);
5002
5003 return feature;
5004 }
5005
5006
5007
5008
5009 /* @funcstatic featChadoQryfeatureRow *****************************************
5010 **
5011 ** Generates a new feature from chado query-feature query result row.
5012 ** Checks whether the query feature is located on another feature (seq region)
5013 **
5014 ** @param [u] fttab [AjPFeattable] Feature table
5015 ** @param [u] row [AjPSqlrow] Input row
5016 ** @param [r] qrystart [ajint] Query start position
5017 ** @param [r] qryend [ajint] Query end position
5018 ** @return [AjPStr] New feature ID
5019 **
5020 ** @release 6.4.0
5021 ** @@
5022 ******************************************************************************/
5023
featChadoQryfeatureRow(AjPFeattable fttab,AjPSqlrow row,ajint qrystart,ajint qryend)5024 static AjPStr featChadoQryfeatureRow(AjPFeattable fttab, AjPSqlrow row,
5025 ajint qrystart, ajint qryend)
5026 {
5027 AjPFeature gf = NULL;
5028 AjPStr name = NULL;
5029
5030 AjPStr source = NULL;
5031 AjPStr type = NULL;
5032 ajint start = 1;
5033 ajint seqlen = 0;
5034 float score = 0;
5035 char strand = '+';
5036 ajint loccount = 0; /* this is to be removed; initially thought
5037 we can use location count to decide whether
5038 the query feature is a sequence region,
5039 however current code decides it by checking whether
5040 a parent location is defined */
5041 ajint i = 0;
5042 ajint frame = 0;
5043 AjPStr uniquename = NULL;
5044 AjPStr alias = NULL;
5045 AjPStr regionid = NULL;
5046 AjPStr regionuniquename = NULL;
5047 ajint regionstart = 1;
5048 ajint regionend = 0;
5049 AjPTime timelm = NULL;
5050 AjPStr timelmS = NULL;
5051 AjBool isObsolete = ajFalse;
5052 AjPStr isObsoleteS = NULL;
5053
5054 if(!ajSqlrowGetColumns(row))
5055 return NULL;
5056
5057 timelm = ajTimeNew();
5058
5059 ajSqlcolumnToStr(row, &name);
5060 ajSqlcolumnToStr(row, &uniquename);
5061 ajSqlcolumnToInt(row, &loccount);
5062 ajSqlcolumnToStr(row, ®ionid);
5063 ajSqlcolumnToStr(row, ®ionuniquename);
5064 ajSqlcolumnToInt(row, ®ionstart);
5065 ajSqlcolumnToInt(row, ®ionend);
5066 ajDebug("location count:%d parent:%S parent-id:%S"
5067 " region start:%d region end:%d\n",
5068 loccount,
5069 regionuniquename, regionid, regionstart, regionend);
5070
5071 ajSqlcolumnToInt(row, &i);
5072 if( i == 1 )
5073 strand = '+';
5074 else if( i == -1 )
5075 strand = '-';
5076 else
5077 strand = '\0';
5078
5079 ajSqlcolumnToInt(row, &frame);
5080
5081 ajSqlcolumnToInt(row, &seqlen);
5082
5083 ajSqlcolumnToStr(row, &type);
5084 ajSqlcolumnToStr(row, &source);
5085 /*ajSqlcolumnToStr(row, &alias);*/
5086 ajSqlcolumnToTime(row, &timelm);
5087 ajSqlcolumnToBool(row, &isObsolete);
5088
5089 if(!ajStrGetLen(source))
5090 ajStrAssignS(&source, fttab->Db);
5091
5092
5093 if(qryend)
5094 {
5095 start = qrystart;
5096 seqlen = qryend;
5097 }
5098
5099 regionstart++;
5100
5101 gf = ajFeatNewNuc(fttab, source, type,
5102 (ajStrGetLen(regionuniquename) ? regionstart : start),
5103 (ajStrGetLen(regionuniquename) ? regionend : seqlen),
5104 score,
5105 strand,
5106 frame,
5107 0,0,0, NULL, NULL);
5108
5109 ajFeatGfftagAddCS(gf, "ID", uniquename);
5110 ajFeatGfftagAddCS(gf, "Name", name);
5111 /*ajFeatGfftagAddC(gf, "Alias", alias);*/
5112
5113 if(timelm)
5114 {
5115 ajFmtPrintS(&timelmS,"%D",timelm);
5116 ajFeatGfftagAddCS(gf,"timelastmodified", timelmS);
5117 }
5118
5119 if(isObsolete)
5120 {
5121 isObsoleteS = ajStrNewC("true");
5122 ajFeatGfftagAddCS(gf,"isObsolete", isObsoleteS);
5123 ajStrDel(&isObsoleteS);
5124 }
5125
5126 ajFeatTrace(gf);
5127
5128 ajStrDel(&name);
5129 ajStrDel(&type);
5130 ajStrDel(&source);
5131 ajStrDel(&alias);
5132 ajTimeDel(&timelm);
5133 ajStrDel(&timelmS);
5134 ajStrDel(®ionid);
5135
5136 if(ajStrGetLen(regionuniquename))
5137 {
5138 if(loccount)
5139 ajDebug("Feature '%S' is used as a source feature for %d other"
5140 " feature(s) but it is itself located on feature '%S';"
5141 " making queries on the parent feature '%S'\n",
5142 uniquename, loccount, regionuniquename, regionuniquename);
5143 ajStrAssignS(&fttab->Seqid, regionuniquename);
5144
5145 ajStrDel(&uniquename);
5146 return regionuniquename;
5147 }
5148
5149 ajStrDel(®ionuniquename);
5150
5151 return uniquename;
5152 }
5153
5154
5155
5156
5157 /* @funcstatic featChadoConnect ***********************************************
5158 **
5159 ** Connects to the chado database required by the query
5160 **
5161 ** @param [r] qry [const AjPQuery] Query object
5162 ** @return [AjPSqlconnection] SQL Database connection
5163 **
5164 ** @release 6.4.0
5165 ** @@
5166 ******************************************************************************/
5167
featChadoConnect(const AjPQuery qry)5168 static AjPSqlconnection featChadoConnect(const AjPQuery qry)
5169 {
5170 AjESqlconnectionClient client;
5171
5172 ajint iport = 3306;
5173
5174 AjPStr url = NULL;
5175 AjPUrlref uo = NULL;
5176
5177 AjPStr password = NULL;
5178 AjPStr socketfile = NULL;
5179
5180 AjPSqlconnection connection = NULL;
5181
5182 url = ajStrNew();
5183
5184 if(!ajNamDbGetUrl(qry->DbName, &url))
5185 {
5186 ajErr("no URL defined for database %S", qry->DbName);
5187
5188 return ajFalse;
5189 }
5190
5191 uo = ajHttpUrlrefNew();
5192
5193 ajHttpUrlrefParseC(&uo, ajStrGetPtr(url));
5194 ajHttpUrlrefSplitPort(uo);
5195 ajHttpUrlrefSplitUsername(uo);
5196
5197 if(ajStrMatchCaseC(uo->Method,"mysql"))
5198 client = ajESqlconnectionClientMySQL;
5199 else if(ajStrMatchCaseC(uo->Method,"postgresql"))
5200 {
5201 client = ajESqlconnectionClientPostgreSQL;
5202 iport = 5432;
5203 }
5204 else
5205 client = ajESqlconnectionClientNULL;
5206
5207 if(!ajStrGetLen(uo->Port))
5208 ajFmtPrintS(&uo->Port,"%d",iport);
5209
5210 if(ajStrGetLen(uo->Password))
5211 {
5212 password = ajStrNew();
5213 ajStrAssignS(&password,uo->Password);
5214 }
5215
5216 connection = ajSqlconnectionNewData(client,uo->Username,password,
5217 uo->Host,uo->Port,socketfile,
5218 uo->Absolute);
5219
5220 ajStrDel(&password);
5221
5222 if(!connection)
5223 ajErr("Could not connect to database server");
5224
5225 ajStrDel(&url);
5226 ajHttpUrlrefDel(&uo);
5227
5228 return connection;
5229 }
5230
5231
5232
5233
5234 /* @section DAS ***************************************************************
5235 **
5236 ** These functions manage the DAS database access methods.
5237 **
5238 ******************************************************************************/
5239
5240
5241
5242
5243 /* @funcstatic featAccessDas **************************************************
5244 **
5245 ** Feature access method for DAS feature sources
5246 **
5247 ** @param [u] fttabin [AjPFeattabin] Feature input.
5248 ** @return [AjBool] ajTrue on success.
5249 **
5250 ** @release 6.4.0
5251 ** @@
5252 ******************************************************************************/
5253
featAccessDas(AjPFeattabin fttabin)5254 static AjBool featAccessDas(AjPFeattabin fttabin)
5255 {
5256 AjPStr host = NULL;
5257 AjPStr path = NULL;
5258 AjIList iter = NULL;
5259 AjPQueryField field = NULL;
5260 AjPQuery qry = NULL;
5261 AjPTextin textin = NULL;
5262 AjPStr dasqueryurl = NULL;
5263
5264 AjBool ret = ajTrue;
5265 ajint port = 80;
5266
5267 textin = fttabin->Input;
5268
5269 qry = textin->Query;
5270
5271 if(qry->QryDone)
5272 return ajFalse;
5273
5274 if(!ajHttpQueryUrl(qry, &port, &host, &path))
5275 {
5276 ajStrDel(&host);
5277 ajStrDel(&path);
5278
5279 return ajFalse;
5280 }
5281
5282 if(ajStrGetCharLast(path)!='/')
5283 ajStrAppendK(&path,'/');
5284
5285 dasqueryurl = ajStrNew();
5286
5287 iter = ajListIterNewread(qry->QueryFields);
5288
5289 while(!ajListIterDone(iter))
5290 {
5291 field = ajListIterGet(iter);
5292
5293 if(ajStrMatchCaseC(field->Field, "id"))
5294 {
5295 if(!ajStrGetLen(dasqueryurl))
5296 ajFmtPrintS(&dasqueryurl,"segment=%S",
5297 field->Wildquery);
5298 else
5299 ajFmtPrintS(&dasqueryurl,"%S;segment=%S",
5300 dasqueryurl,
5301 field->Wildquery);
5302
5303 /* TODO: segment specific start,end positions */
5304 if(fttabin->End > 0)
5305 ajFmtPrintS(&dasqueryurl,"%S:%u,%u",
5306 dasqueryurl,fttabin->Start,fttabin->End);
5307 }
5308 else {
5309 if(!ajStrGetLen(dasqueryurl))
5310 ajFmtPrintS(&dasqueryurl,"%S=%S",
5311 field->Field,
5312 field->Wildquery);
5313 else
5314 ajFmtPrintS(&dasqueryurl,"%S;%S=%S",
5315 dasqueryurl,
5316 field->Field,
5317 field->Wildquery);
5318
5319 /* TODO: segment specific start,end positions */
5320 if(fttabin->End > 0)
5321 ajFmtPrintS(&dasqueryurl,"%S:%u,%u",
5322 dasqueryurl,fttabin->Start,fttabin->End);
5323 }
5324
5325 }
5326
5327 ajFmtPrintS(&path,"%Sfeatures?%S",path, dasqueryurl);
5328
5329 ajFilebuffDel(&textin->Filebuff);
5330 textin->Filebuff = ajHttpRead(qry->DbHttpVer, qry->DbName, qry->DbProxy,
5331 host, port, path);
5332
5333 if (textin->Filebuff)
5334 ajFilebuffHtmlNoheader(textin->Filebuff);
5335 else
5336 ret = ajFalse;
5337
5338 qry->QryDone = ajTrue;
5339
5340 ajStrDel(&host);
5341 ajStrDel(&path);
5342 ajStrDel(&dasqueryurl);
5343
5344 ajListIterDel(&iter);
5345
5346 return ret;
5347 }
5348
5349
5350
5351
5352 /* @func ajFeatdbPrintAccess **************************************************
5353 **
5354 ** Reports the internal data structures
5355 **
5356 ** @param [u] outf [AjPFile] Output file
5357 ** @param [r] full [AjBool] Full report (usually ajFalse)
5358 ** @return [void]
5359 **
5360 ** @release 6.4.0
5361 ** @@
5362 ******************************************************************************/
5363
ajFeatdbPrintAccess(AjPFile outf,AjBool full)5364 void ajFeatdbPrintAccess(AjPFile outf, AjBool full)
5365 {
5366 ajint i = 0;
5367
5368 ajFmtPrintF(outf, "\n");
5369 ajFmtPrintF(outf, "# Feature access methods\n");
5370 ajFmtPrintF(outf, "# Name Alias Entry Query All Description\n");
5371 ajFmtPrintF(outf, "\n");
5372 ajFmtPrintF(outf, "method {\n");
5373
5374 for(i=0; feattabAccess[i].Name; i++)
5375 if(full || !feattabAccess[i].Alias)
5376 ajFmtPrintF(outf, " %-10s %5B %5B %5B %5B \"%s\"\n",
5377 feattabAccess[i].Name, feattabAccess[i].Alias,
5378 feattabAccess[i].Entry, feattabAccess[i].Query,
5379 feattabAccess[i].All, feattabAccess[i].Desc);
5380
5381 ajFmtPrintF(outf, "}\n\n");
5382
5383 return;
5384 }
5385
5386
5387
5388
5389 /* @func ajFeatdbExit *********************************************************
5390 **
5391 ** Cleans up feature database processing internal memory
5392 **
5393 ** @return [void]
5394 **
5395 ** @release 6.4.0
5396 ** @@
5397 ******************************************************************************/
5398
ajFeatdbExit(void)5399 void ajFeatdbExit(void)
5400 {
5401 ajRegFree(&featCdDivExp);
5402 ajCharDel(&featCdName);
5403 ajRegFree(&featRegGcgId);
5404 ajRegFree(&featRegGcgCont);
5405 ajRegFree(&featRegGcgId2);
5406 ajRegFree(&featRegGcgSplit);
5407 ajRegFree(&featRegGcgRefId);
5408
5409 return;
5410 }
5411