1 /* @source ajfeatread *********************************************************
2 **
3 ** AJAX feature reading functions
4 **
5 ** These functions control all aspects of AJAX feature reading
6 **
7 ** @author Copyright (C) 1999 Richard Bruskiewich
8 ** @version $Revision: 1.62 $
9 ** @modified 2000 Ian Longden.
10 ** @modified 2001 Peter Rice.
11 ** @modified $Date: 2013/06/29 22:31:59 $ by $Author: rice $
12 ** @@
13 **
14 ** This library is free software; you can redistribute it and/or
15 ** modify it under the terms of the GNU Lesser General Public
16 ** License as published by the Free Software Foundation; either
17 ** version 2.1 of the License, or (at your option) any later version.
18 **
19 ** This library is distributed in the hope that it will be useful,
20 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
21 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 ** Lesser General Public License for more details.
23 **
24 ** You should have received a copy of the GNU Lesser General Public
25 ** License along with this library; if not, write to the Free Software
26 ** Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
27 ** MA 02110-1301, USA.
28 **
29 ******************************************************************************/
30
31
32 #include "ajlib.h"
33
34 #include "ajfeatread.h"
35 #include "ajcall.h"
36 #include "ajfeat.h"
37 #include "ajtagval.h"
38 #include "ajreg.h"
39 #include "ajdom.h"
40 #include "ajtextread.h"
41 #include "ajfileio.h"
42 #include "ajquery.h"
43 #include "ajnam.h"
44
45
46 #include <limits.h>
47 #include <math.h>
48 #include <errno.h>
49
50 #ifdef WIN32
51 #define fileno _fileno
52 #endif
53
54 AjPTable feattabDbMethods = NULL;
55
56 static AjPRegexp featinRegUfoFmt = NULL;
57 static AjPRegexp featinRegUfoFile = NULL;
58 static AjPRegexp featRegFlag = NULL;
59 static AjPRegexp featRegMore = NULL;
60 static AjPRegexp featRegGroup = NULL;
61 static AjPRegexp featRegGff3Group = NULL;
62
63 static AjPStr featinUfoTest = NULL;
64 static AjPStr featReadLine = NULL;
65 static AjPStr featinTagNote = NULL;
66 static AjPStr featinTagComm = NULL;
67 static AjPStr featinTagFtid = NULL;
68 static AjPStr featinSourcePir = NULL;
69 static AjPStr featinSourceSwiss = NULL;
70 static AjPStr featinSourceRefseqp = NULL;
71 static AjPStr featinSourceEmbl = NULL;
72
73 static AjBool FeatInitGff2 = AJFALSE;
74 static AjBool FeatInitGff3 = AJFALSE;
75 static AjBool FeatInitEmbl = AJFALSE;
76 static AjBool FeatInitPir = AJFALSE;
77 static AjBool FeatInitSwiss = AJFALSE;
78 static AjBool FeatInitRefseqp = AJFALSE;
79 static AjPStr featProcessLine = NULL;
80
81 static AjPStr featGroup = NULL;
82 static AjPStr featId = NULL;
83 static AjPStr featLabel = NULL;
84
85 static AjPStr featSeqid = NULL;
86 static AjPStr featSource = NULL;
87 static AjPStr featFeature = NULL;
88 static AjPStrTok featGffSplit = NULL;
89 static AjPStrTok featEmblSplit = NULL;
90 static AjPStr featLocStr = NULL;
91 static AjPStr featLocToken = NULL;
92 static AjPStr featLocDb = NULL;
93 static AjPStr featSaveGroupStr = NULL;
94 static ajint featSaveExon = 0;
95 static ajint featSaveGroup = 0;
96 static AjPStr featinFormatTmp = NULL;
97 static AjPStr featinValTmp = NULL;
98 static AjPStr featinTmpStr = NULL;
99
100
101
102 static void featGff3Matchtable(AjPFeattable ftable,
103 AjPTable *idtable, AjPList *childlist);
104 static AjBool featFindInformatC(const char* format, ajint* iformat);
105 static AjBool featFindInformatS(const AjPStr format, ajint* iformat);
106 static AjBool featDelRegEmbl(void);
107 static AjBool featDelRegGff2(void);
108 static AjBool featDelRegGff3(void);
109 static AjBool featDelRegPir(void);
110 static AjBool featDelRegRefseqp(void);
111 static AjBool featDelRegSwiss(void);
112 static AjBool featEmblLoc(const AjPStr loc,
113 AjPStr* begstr, AjBool* between,
114 AjBool* simple, AjPStr* endstr);
115 static AjBool featEmblLocNum(const AjPStr loc,
116 AjBool* bound, ajuint* num);
117 static AjBool featEmblLocRange(const AjPStr loc,
118 ajuint* num1, ajuint* num2);
119 static AjBool featEmblOperIn(const AjPStr loc, AjPStr* opnam,
120 AjPStr* value, AjPStr* rest);
121 static AjBool featEmblOperNone(const AjPStr loc, AjPStr* entryid,
122 AjPStr* value, AjPStr* rest);
123 static AjBool featEmblOperOut(const AjPStr loc,
124 AjPStr* opnam, AjPStr* value);
125 static AjBool featFormatSet(AjPFeattabin featin);
126 static AjPFeature featPirFromLine(AjPFeattable thys,
127 const AjPStr origline);
128 static AjPFeature featSwissFromLine(AjPFeattable thys, const AjPStr line,
129 AjPStr* savefeat, AjPStr* savefrom,
130 AjPStr* saveto, AjPStr* saveline);
131 static AjPFeature featSwissProcess(AjPFeattable thys, const AjPStr feature,
132 const AjPStr fromstr, const AjPStr tostr,
133 const AjPStr source,
134 const AjPStr tags);
135 static AjBool featReadChado(AjPFeattabin feattabin,
136 AjPFeattable ftable);
137 static AjBool featReadDasgff(AjPFeattabin feattabin,
138 AjPFeattable ftable);
139 static AjBool featReadEmbl(AjPFeattabin feattabin,
140 AjPFeattable ftable);
141 static AjBool featReadGenpept(AjPFeattabin feattabin,
142 AjPFeattable ftable);
143 static AjBool featReadRefseq(AjPFeattabin feattabin,
144 AjPFeattable ftable);
145 static AjBool featReadRefseqp(AjPFeattabin feattabin,
146 AjPFeattable ftable);
147 static AjBool featReadGff2(AjPFeattabin feattabin,
148 AjPFeattable ftable);
149 static AjBool featReadGff3(AjPFeattabin feattabin,
150 AjPFeattable ftable);
151 static AjBool featReadGff3old(AjPFeattabin feattabin,
152 AjPFeattable ftable);
153 static AjBool featReadPir(AjPFeattabin feattabin,
154 AjPFeattable ftable);
155 static AjBool featReadSwiss(AjPFeattabin feattabin,
156 AjPFeattable ftable);
157
158 static AjBool featRegInitEmbl(void);
159 static AjBool featRegInitGff2(void);
160 static AjBool featRegInitGff3(void);
161 static AjBool featRegInitPir(void);
162 static AjBool featRegInitRefseqp(void);
163 static AjBool featRegInitSwiss(void);
164
165 static AjPFeature featEmblFromLine(AjPFeattable thys, const AjPStr line,
166 AjPStr* savefeat,
167 AjPStr* saveloc, AjPStr* saveline);
168 static AjPFeature featEmblProcess(AjPFeattable thys, const AjPStr feature,
169 const AjPStr source,
170 AjPStr* loc, AjPStr* tags);
171 static AjPFeature featRefseqpFromLine(AjPFeattable thys, const AjPStr line,
172 AjPStr* savefeat,
173 AjPStr* saveloc, AjPStr* saveline);
174 static AjPFeature featRefseqpProcess(AjPFeattable thys, const AjPStr feature,
175 const AjPStr source,
176 AjPStr* loc, AjPStr* tags);
177 static AjPFeature featGff2FromLine(AjPFeattable thys, const AjPStr line,
178 float version);
179
180 static AjPFeature featGff3FromLine(AjPFeattable thys, const AjPStr line,
181 AjPTable idtable, AjPList childlist);
182 static AjPFeature featGff3oldFromLine(AjPFeattable thys, const AjPStr line);
183
184 static void featGff2ProcessTagval(AjPFeature gf,
185 AjPFeattable table,
186 const AjPStr groupfield,
187 float version);
188
189 static AjPStr featGff3ProcessTagval(AjPFeature gf,
190 AjPFeattable table,
191 const AjPStr groupfield,
192 AjBool *parent);
193 static void featGff3oldProcessTagval(AjPFeature gf,
194 AjPFeattable table,
195 const AjPStr groupfield);
196 static AjBool featEmblTvRest(AjPStr* tags, AjPStr* skip);
197 static AjBool featEmblTvTagVal(AjPStr* tags, AjPStr* tag, AjPStr* value);
198 static void featGff3FlagSet(AjPFeature gf, const AjPStr flags);
199 static void featFlagSet(AjPFeature gf, const AjPStr flags);
200 static void featGroupSet(AjPFeature gf, AjPFeattable table,
201 const AjPStr grouptag);
202 static void featGff3GroupSet(AjPFeature gf, AjPFeattable table,
203 const AjPStr grouptag);
204
205 /* Set each of the regular expressions below, depending on feature format */
206
207 static AjPRegexp GffRegexNumeric = NULL;
208 static AjPRegexp GffRegexblankline = NULL;
209 static AjPRegexp GffRegexversion = NULL;
210 static AjPRegexp GffRegexdate = NULL;
211 static AjPRegexp GffRegexregion = NULL;
212 static AjPRegexp GffRegexcomment = NULL;
213 static AjPRegexp GffRegextype = NULL;
214
215 static AjPRegexp GffRegexTvTagval = NULL;
216
217 static AjPRegexp Gff3RegexNumeric = NULL;
218 static AjPRegexp Gff3Regexblankline = NULL;
219 static AjPRegexp Gff3Regexversion = NULL;
220 static AjPRegexp Gff3Regexdate = NULL;
221 static AjPRegexp Gff3Regexregion = NULL;
222 static AjPRegexp Gff3Regexcomment = NULL;
223 static AjPRegexp Gff3Regexdirective = NULL;
224 static AjPRegexp Gff3Regextype = NULL;
225
226 static AjPRegexp Gff3RegexTvTagval = NULL;
227 static AjPRegexp Gff3oldRegexTvTagval = NULL;
228
229 static AjPRegexp PirRegexAll = NULL;
230 static AjPRegexp PirRegexCom = NULL;
231 static AjPRegexp PirRegexLoc = NULL;
232 static AjPRegexp PirRegexPos = NULL;
233
234 static AjPRegexp SwRegexComment = NULL;
235 static AjPRegexp SwRegexFtid = NULL;
236 static AjPRegexp SwRegexNew = NULL;
237 static AjPRegexp SwRegexNext = NULL;
238
239 static AjPRegexp featTagTrans = NULL;
240
241
242
243 /* @datastatic FeatPListUfo ***************************************************
244 **
245 ** Usa processing list of UFOs from a list file.
246 **
247 ** Includes data from the original UFO (@listfile)
248 **
249 ** @alias FeatSListUfo
250 ** @alias FeatOListUfo
251 **
252 ** @attr Begin [ajint] Begin if defined in original UFO
253 ** @attr End [ajint] End if defined in original UFO
254 ** @attr Rev [AjBool] Reverse if defined in original UFO
255 ** @attr Format [ajuint] Format number from original UFO
256 ** @attr Formatstr [AjPStr] Format name from original UFO
257 ** @attr Ufo [AjPStr] Current UFO
258 ** @attr Fpos [ajulong] Start position offset
259 ** @attr Features [AjBool] if true, process features
260 ** @attr Padding [char[4]] Padding to alignment boundary
261 ** @@
262 ******************************************************************************/
263
264 typedef struct FeatSListUfo
265 {
266 ajint Begin;
267 ajint End;
268 AjBool Rev;
269 ajuint Format;
270 AjPStr Formatstr;
271 AjPStr Ufo;
272 ajulong Fpos;
273 AjBool Features;
274 char Padding[4];
275 } FeatOListUfo;
276
277 #define FeatPListUfo FeatOListUfo*
278
279
280
281
282 /* @datastatic FeatPInformat **************************************************
283 **
284 ** Feature input format definition
285 **
286 ** @alias FeatSInformat
287 ** @alias FeatOInformat
288 **
289 ** @attr Name [const char*] Input format name
290 ** @attr Obo [const char*] Ontology term id from EDAM
291 ** @attr Alias [AjBool] True if name is an alias for an identical definition
292 ** @attr Try [AjBool] If true, try for an unknown input. Duplicate names
293 ** and read-anything formats are set false
294 ** @attr Nucleotide [AjBool] True if suitable for nucleotide data
295 ** @attr Protein [AjBool] True if suitable for protein data
296 ** @attr Used [AjBool] True if already used (initialised)
297 ** @attr Padding [AjBool] Padding to alignment boundary
298 ** @attr Read [AjBool function] Function to read feature data
299 ** @attr InitReg [AjBool function] Function to initialise regular expressions
300 ** @attr DelReg [AjBool function] Function to clean up regular expressions
301 ** @attr Desc [const char*] Description
302 ** @@
303 ******************************************************************************/
304
305 typedef struct FeatSInformat
306 {
307 const char *Name;
308 const char *Obo;
309 AjBool Alias;
310 AjBool Try;
311 AjBool Nucleotide;
312 AjBool Protein;
313 AjBool Used;
314 AjBool Padding;
315 AjBool (*Read) (AjPFeattabin thys, AjPFeattable ftable);
316 AjBool (*InitReg) (void);
317 AjBool (*DelReg) (void);
318 const char *Desc;
319 } FeatOInformat;
320
321 #define FeatPInformat FeatOInformat*
322
323 /* name Dna Protein
324 input-function init-regex-function del-regex-function */
325
326
327
328
329 /* @funclist featinformatDef **************************************************
330 **
331 ** Input feature formats
332 **
333 ** Includes the read function (featRead), and initialising (featRegInit)
334 ** and deletion (featDelReg) of parsing regular expression.
335 **
336 ******************************************************************************/
337
338 static FeatOInformat featinformatDef[] =
339 {
340 /*Name OBO
341 Alias Try Dna Prot Used (initially false) Padding
342 ReadFunction RegInitFunction RegDelFunction
343 Description*/
344 {"unknown", "0000",
345 AJFALSE, AJFALSE, AJFALSE, AJFALSE, AJFALSE, AJFALSE,
346 &featReadGff3, NULL, NULL,
347 "unknown format"},
348 {"embl", "1927",
349 AJFALSE, AJTRUE, AJTRUE, AJFALSE, AJFALSE, AJFALSE,
350 &featReadEmbl, &featRegInitEmbl, &featDelRegEmbl,
351 "embl/genbank/ddbj format"},
352 {"em", "1927",
353 AJTRUE, AJFALSE, AJTRUE, AJFALSE, AJFALSE, AJFALSE,
354 &featReadEmbl, &featRegInitEmbl, &featDelRegEmbl,
355 "embl/genbank/ddbj format"},
356 {"genbank", "1936",
357 AJTRUE, AJFALSE, AJTRUE, AJFALSE, AJFALSE, AJFALSE,
358 &featReadEmbl, &featRegInitEmbl, &featDelRegEmbl,
359 "embl/genbank/ddbj format"},
360 {"gb", "1936",
361 AJTRUE, AJFALSE, AJTRUE, AJFALSE, AJFALSE, AJFALSE,
362 &featReadEmbl, &featRegInitEmbl, &featDelRegEmbl,
363 "embl/genbank/ddbj format"},
364 {"ddbj", "1936",
365 AJTRUE, AJFALSE, AJTRUE, AJFALSE, AJFALSE, AJFALSE,
366 &featReadEmbl, &featRegInitEmbl, &featDelRegEmbl,
367 "embl/genbank/ddbj format"},
368 {"refseq", "1958",
369 AJFALSE, AJTRUE, AJTRUE, AJFALSE, AJFALSE, AJFALSE,
370 &featReadRefseq, & featRegInitEmbl, &featDelRegEmbl,
371 "embl/genbank/ddbj format"},
372 {"refseqp", "0000",
373 AJFALSE, AJTRUE, AJFALSE, AJTRUE, AJFALSE, AJFALSE,
374 &featReadRefseqp, &featRegInitRefseqp, &featDelRegRefseqp,
375 "RefSeq protein format"},
376 {"genpept", "0000",
377 AJFALSE, AJTRUE, AJFALSE, AJTRUE, AJFALSE, AJFALSE,
378 &featReadGenpept, &featRegInitSwiss, &featDelRegSwiss,
379 "genpept format"},
380 {"gff3", "1939",
381 AJFALSE, AJTRUE, AJTRUE, AJTRUE, AJFALSE, AJFALSE,
382 &featReadGff3, &featRegInitGff3, &featDelRegGff3,
383 "GFF version 3"},
384 {"gff3emboss", "1939",
385 AJFALSE, AJFALSE, AJTRUE, AJTRUE, AJFALSE, AJFALSE,
386 &featReadGff3old, &featRegInitGff3, &featDelRegGff3,
387 "GFF version 3 written by EMBOSS before release 6.4.0"},
388 {"gff2", "1938",
389 AJFALSE, AJTRUE, AJTRUE, AJTRUE, AJFALSE, AJFALSE,
390 &featReadGff2, &featRegInitGff2, &featDelRegGff2,
391 "GFF version 1 or 2"},
392 {"gff", "1939",
393 AJTRUE, AJFALSE, AJTRUE, AJTRUE, AJFALSE, AJFALSE,
394 &featReadGff3, &featRegInitGff3, &featDelRegGff3,
395 "GFF version3"},
396 {"swiss", "1963",
397 AJFALSE, AJTRUE, AJFALSE, AJTRUE, AJFALSE, AJFALSE,
398 &featReadSwiss, &featRegInitSwiss, &featDelRegSwiss,
399 "SwissProt format"},
400 {"sw", "1963",
401 AJTRUE, AJFALSE, AJFALSE, AJTRUE, AJFALSE, AJFALSE,
402 &featReadSwiss, &featRegInitSwiss, &featDelRegSwiss,
403 "SwissProt format"},
404 {"uniprot", "2188",
405 AJTRUE, AJFALSE, AJFALSE, AJTRUE, AJFALSE, AJFALSE,
406 &featReadSwiss, &featRegInitSwiss, &featDelRegSwiss,
407 "SwissProt format"},
408 {"swissprot", "0000",
409 AJTRUE, AJTRUE, AJFALSE, AJTRUE, AJFALSE, AJFALSE,
410 &featReadSwiss, &featRegInitSwiss, &featDelRegSwiss,
411 "SwissProt format"},
412 {"pir", "1948",
413 AJFALSE, AJTRUE, AJFALSE, AJTRUE, AJFALSE, AJFALSE,
414 &featReadPir, &featRegInitPir, &featDelRegPir,
415 "PIR format"},
416 {"nbrf", "1948",
417 AJTRUE, AJFALSE, AJFALSE, AJTRUE, AJFALSE, AJFALSE,
418 &featReadPir, &featRegInitPir, &featDelRegPir,
419 "PIR format"},
420 {"dasgff", "1978",
421 AJFALSE, AJTRUE, AJTRUE, AJFALSE, AJFALSE, AJFALSE,
422 &featReadDasgff, NULL, NULL,
423 "DAS versions 1.5 or 1.6"},
424 {"chado", "0000",
425 AJFALSE, AJTRUE, AJTRUE, AJFALSE, AJFALSE, AJFALSE,
426 &featReadChado, NULL, NULL,
427 "CHADO"},
428 {NULL, NULL,
429 AJFALSE, AJFALSE, AJFALSE, AJFALSE, AJFALSE, AJFALSE,
430 NULL, NULL, NULL,
431 NULL}
432 };
433
434
435
436
437 /* @datastatic FeatPTypein ****************************************************
438 **
439 ** feature input types
440 **
441 ** @alias FeatSTypein
442 ** @alias FeatOTypein
443 **
444 ** @attr Name [const char*] Specified name
445 ** @attr Value [const char*] Internal type "P" or "N"
446 ** @@
447 ******************************************************************************/
448
449 typedef struct FeatSTypein
450 {
451 const char* Name;
452 const char* Value;
453 } FeatOTypein;
454
455 #define FeatPTypein FeatOTypein*
456
457
458
459
460 static FeatOTypein featinTypes[] =
461 {
462 {"P", "P"},
463 {"protein", "P"},
464 {"protfeatures", "P"},
465 {"N", "N"},
466 {"nucleotide", "N"},
467 {"nucfeatures", "N"},
468 {"any", ""},
469 {"features", ""},
470 {NULL, NULL}
471 };
472
473
474
475
476 static ajuint feattabinReadFmt(AjPFeattabin feattabin,
477 AjPFeattable ftable,
478 ajuint format);
479 static AjBool feattabinRead(AjPFeattabin feattabin,
480 AjPFeattable ftable);
481 static AjBool feattabinformatFind(const AjPStr format,
482 ajint* iformat);
483 static AjBool feattabinFormatSet(AjPFeattabin feattabin,
484 AjPFeattable ftable);
485 static AjBool feattabinListProcess(AjPFeattabin feattabin,
486 AjPFeattable ftable,
487 const AjPStr listfile);
488 static void feattabinListNoComment(AjPStr* text);
489 static void feattabinQryRestore(AjPFeattabin feattabin,
490 const FeatPListUfo node);
491 static void feattabinQrySave(FeatPListUfo node,
492 const AjPFeattabin feattabin);
493 static AjBool feattabinQryProcess(AjPFeattabin feattabin,
494 AjPFeattable ftable);
495 static AjBool feattabinQueryMatch(const AjPQuery thys,
496 const AjPFeattable ftable);
497 static AjBool feattableDefine(AjPFeattable thys,
498 AjPFeattabin feattabin);
499
500
501
502
503 /* @func ajFeattabinNew *******************************************************
504 **
505 ** Constructor for an empty feature table input object
506 **
507 ** @return [AjPFeattabin] Feature table input object
508 ** @category new [AjPFeattabin] Constructor
509 **
510 ** @release 6.4.0
511 ** @@
512 ******************************************************************************/
513
ajFeattabinNew(void)514 AjPFeattabin ajFeattabinNew(void)
515 {
516 AjPFeattabin pthis;
517 AJNEW0(pthis);
518
519 pthis->Input = ajTextinNewDatatype(AJDATATYPE_FEATURES);
520
521 /* ajDebug("ajFeatTabinNew %x\n", pthis);*/
522
523 return pthis;
524 }
525
526
527
528
529 /* @func ajFeattabinNewSS *****************************************************
530 **
531 ** Constructor for an empty feature table input object. The format and
532 ** name are read.
533 **
534 ** @param [r] fmt [const AjPStr] feature format
535 ** @param [r] name [const AjPStr] sequence name
536 ** @param [r] type [const char*] feature type
537 ** @return [AjPFeattabin] Feature table input object
538 ** @category new [AjPFeattabin] Constructor with format, name and type
539 **
540 ** @release 6.4.0
541 ** @@
542 ******************************************************************************/
543
ajFeattabinNewSS(const AjPStr fmt,const AjPStr name,const char * type)544 AjPFeattabin ajFeattabinNewSS(const AjPStr fmt, const AjPStr name,
545 const char* type)
546 {
547 AjPFeattabin pthis;
548 ajint iformat = 0;
549
550 if(!featFindInformatS(fmt, &iformat))
551 return NULL;
552
553 pthis = ajFeattabinNew();
554 ajStrAssignC(&pthis->Formatstr, featinformatDef[pthis->Input->Format].Name);
555 pthis->Input->Format = iformat;
556 ajStrAssignC(&pthis->Type, type);
557 ajStrAssignS(&pthis->Seqname, name);
558 pthis->Input->Filebuff = ajFilebuffNewNofile();
559
560 return pthis;
561 }
562
563
564
565
566 /* @func ajFeattabinNewCSF ****************************************************
567 **
568 ** Constructor for an empty feature table input object. The format and
569 ** name are read. The file buffer is moved to the feature table input
570 ** object and should not be deleted by the calling program.
571 **
572 ** @param [r] fmt [const char*] feature format
573 ** @param [r] name [const AjPStr] sequence name
574 ** @param [r] type [const char*] feature type
575 ** @param [u] buff [AjPFilebuff] Buffer containing feature data
576 ** @return [AjPFeattabin] Feature table input object
577 ** @category new [AjPFeattabin] Constructor with format, name, type
578 ** and input file
579 **
580 ** @release 6.4.0
581 ** @@
582 ******************************************************************************/
583
ajFeattabinNewCSF(const char * fmt,const AjPStr name,const char * type,AjPFilebuff buff)584 AjPFeattabin ajFeattabinNewCSF(const char* fmt, const AjPStr name,
585 const char* type, AjPFilebuff buff)
586 {
587 AjPFeattabin pthis;
588 ajint iformat = 0;
589
590 if(!featFindInformatC(fmt, &iformat))
591 return NULL;
592
593 pthis = ajFeattabinNew();
594 ajStrAssignC(&pthis->Formatstr, featinformatDef[iformat].Name);
595 pthis->Input->Format = iformat;
596 ajStrAssignC(&pthis->Type, type);
597 ajStrAssignS(&pthis->Seqname, name);
598 pthis->Local = ajTrue;
599 pthis->Input->Filebuff = buff;
600
601 return pthis;
602 }
603
604
605
606
607 /* @func ajFeattabinNewSSF ****************************************************
608 **
609 ** Constructor for an empty feature table input object. The format and
610 ** name are read. The file buffer is moved to the feature table input
611 ** object and should not be deleted by the calling program.
612 **
613 ** @param [r] fmt [const AjPStr] feature format
614 ** @param [r] name [const AjPStr] sequence name
615 ** @param [r] type [const char*] feature type
616 ** @param [u] buff [AjPFilebuff] Buffer containing feature data
617 ** @return [AjPFeattabin] Feature table input object
618 ** @category new [AjPFeattabin] Constructor with format, name, type
619 ** and input file
620 **
621 ** @release 6.4.0
622 ** @@
623 ******************************************************************************/
624
ajFeattabinNewSSF(const AjPStr fmt,const AjPStr name,const char * type,AjPFilebuff buff)625 AjPFeattabin ajFeattabinNewSSF(const AjPStr fmt, const AjPStr name,
626 const char* type, AjPFilebuff buff)
627 {
628 AjPFeattabin pthis;
629 ajint iformat = 0;
630
631 if(!featFindInformatS(fmt, &iformat))
632 return NULL;
633
634 pthis = ajFeattabinNew();
635 ajStrAssignC(&pthis->Formatstr, featinformatDef[iformat].Name);
636 pthis->Input->Format = iformat;
637 ajStrAssignC(&pthis->Type, type);
638 ajStrAssignS(&pthis->Seqname, name);
639 pthis->Local = ajTrue;
640 pthis->Input->Filebuff = buff;
641
642 return pthis;
643 }
644
645
646
647
648 /* @func ajFeattabinNewSeqinSS ************************************************
649 **
650 ** Constructor for an empty feature table input object. The format and
651 ** name are read.
652 **
653 ** @param [r] seqin [const AjPSeqin] sequence input object
654 ** @param [r] fmt [const AjPStr] feature format
655 ** @param [r] name [const AjPStr] sequence name
656 ** @param [r] type [const char*] feature type
657 ** @return [AjPFeattabin] Feature table input object
658 ** @category new [AjPFeattabin] Constructor with format, name and type
659 **
660 ** @release 6.4.0
661 ** @@
662 ******************************************************************************/
663
ajFeattabinNewSeqinSS(const AjPSeqin seqin,const AjPStr fmt,const AjPStr name,const char * type)664 AjPFeattabin ajFeattabinNewSeqinSS(const AjPSeqin seqin, const AjPStr fmt,
665 const AjPStr name, const char* type)
666 {
667 AjPFeattabin pthis;
668 ajint iformat = 0;
669
670 if(!featFindInformatS(fmt, &iformat))
671 return NULL;
672
673 pthis = ajFeattabinNew();
674 ajStrAssignC(&pthis->Formatstr, featinformatDef[pthis->Input->Format].Name);
675 pthis->Input->Format = iformat;
676 ajStrAssignC(&pthis->Type, type);
677 ajStrAssignS(&pthis->Seqname, name);
678 pthis->Input->Filebuff = ajFilebuffNewNofile();
679
680
681 pthis->Start = seqin->Begin;
682 pthis->End = seqin->End;
683
684 return pthis;
685 }
686
687
688
689
690 /* @func ajFeattabinNewSeqinSSF ***********************************************
691 **
692 ** Constructor for an empty feature table input object. The format and
693 ** name are read. The file buffer is moved to the feature table input
694 ** object and should not be deleted by the calling program.
695 **
696 ** @param [r] seqin [const AjPSeqin] sequence input object
697 ** @param [r] fmt [const AjPStr] feature format
698 ** @param [r] name [const AjPStr] sequence name
699 ** @param [r] type [const char*] feature type
700 ** @param [u] buff [AjPFilebuff] Buffer containing feature data
701 ** @return [AjPFeattabin] Feature table input object
702 ** @category new [AjPFeattabin] Constructor with format, name, type
703 ** and input file
704 **
705 ** @release 6.6.0
706 ** @@
707 ******************************************************************************/
708
ajFeattabinNewSeqinSSF(const AjPSeqin seqin,const AjPStr fmt,const AjPStr name,const char * type,AjPFilebuff buff)709 AjPFeattabin ajFeattabinNewSeqinSSF(const AjPSeqin seqin, const AjPStr fmt,
710 const AjPStr name, const char* type,
711 AjPFilebuff buff)
712 {
713 AjPFeattabin pthis;
714 ajint iformat = 0;
715
716 if(!featFindInformatS(fmt, &iformat))
717 return NULL;
718
719 pthis = ajFeattabinNew();
720 ajStrAssignC(&pthis->Formatstr, featinformatDef[iformat].Name);
721 pthis->Input->Format = iformat;
722 ajStrAssignC(&pthis->Type, type);
723 ajStrAssignS(&pthis->Seqname, name);
724 pthis->Local = ajTrue;
725 pthis->Input->Filebuff = buff;
726
727 pthis->Start = seqin->Begin;
728 pthis->End = seqin->End;
729
730 return pthis;
731 }
732
733
734
735
736 /* @func ajFeattabinDel *******************************************************
737 **
738 ** Destructor for a feature table input object
739 **
740 ** @param [d] pthis [AjPFeattabin*] Feature table input object
741 ** @return [void]
742 ** @category delete [AjPFeattabin] Destructor
743 **
744 ** @release 6.4.0
745 ** @@
746 ******************************************************************************/
747
ajFeattabinDel(AjPFeattabin * pthis)748 void ajFeattabinDel(AjPFeattabin* pthis)
749 {
750 AjPFeattabin thys;
751
752 thys = *pthis;
753
754 if(!thys)
755 return;
756
757 ajTextinDel(&thys->Input);
758
759 ajStrDel(&thys->Ufo);
760 ajStrDel(&thys->Formatstr);
761 ajStrDel(&thys->Filename);
762 ajStrDel(&thys->Filename);
763 ajStrDel(&thys->Seqid);
764 ajStrDel(&thys->Seqname);
765 ajStrDel(&thys->Type);
766 AJFREE(*pthis);
767
768 return;
769 }
770
771
772
773
774
775 /* @func ajFeattabinClear *****************************************************
776 **
777 ** Clears a feature table input object back to "as new" condition, except
778 ** for the USA list which must be preserved.
779 **
780 ** @param [u] thys [AjPFeattabin] Sequence input
781 ** @return [void]
782 **
783 ** @release 6.4.0
784 ** @@
785 ******************************************************************************/
786
ajFeattabinClear(AjPFeattabin thys)787 void ajFeattabinClear(AjPFeattabin thys)
788 {
789 ajDebug("ajFeattabinClear called Local:%B\n", thys->Local);
790
791 if(!thys)
792 return;
793
794 ajStrSetClear(&thys->Ufo);
795 ajStrSetClear(&thys->Seqname);
796 ajStrSetClear(&thys->Formatstr);
797 ajStrSetClear(&thys->Filename);
798 ajStrSetClear(&thys->Seqid);
799 ajStrSetClear(&thys->Type);
800
801 if(!thys->Local)
802 ajTextinClear(thys->Input);
803 else
804 ajTextinClearNofile(thys->Input);
805
806 thys->Start = 0;
807 thys->End = 0;
808 thys->Rev = ajFalse;
809
810 return;
811 }
812
813
814
815
816 /* @func ajFeattabinSetRange **************************************************
817 **
818 ** Set the begin and end range for a feature table
819 **
820 ** @param [u] thys [AjPFeattabin] Feature table input object
821 ** @param [r] fbegin [ajint] Begin position
822 ** @param [r] fend [ajint] End position
823 ** @return [void]
824 **
825 ** @release 6.4.0
826 ******************************************************************************/
827
ajFeattabinSetRange(AjPFeattabin thys,ajint fbegin,ajint fend)828 void ajFeattabinSetRange(AjPFeattabin thys, ajint fbegin, ajint fend)
829 {
830 if(fbegin)
831 thys->Start = fbegin;
832
833 if(fend)
834 thys->End = fend;
835
836 return;
837 }
838
839
840
841
842 /* @func ajFeattabinSetTypeC **************************************************
843 **
844 ** Sets the type for feature input
845 **
846 ** @param [u] thys [AjPFeattabin] Feature input object
847 ** @param [r] type [const char*] Feature type "nucleotide" "protein"
848 ** @return [AjBool] ajTrue on success
849 **
850 ** @release 6.4.0
851 ** @@
852 ******************************************************************************/
853
ajFeattabinSetTypeC(AjPFeattabin thys,const char * type)854 AjBool ajFeattabinSetTypeC(AjPFeattabin thys, const char* type)
855 {
856 ajint i = 0;
857
858 if(!*type)
859 return ajTrue;
860
861 for(i=0; featinTypes[i].Name; i++)
862 {
863 if(ajCharMatchCaseC(featinTypes[i].Name, type))
864 {
865 if(featinTypes[i].Value)
866 ajStrAssignC(&thys->Type, featinTypes[i].Value);
867
868 return ajTrue;
869 }
870 i++;
871 }
872
873 ajErr("Unrecognized feature input type '%s'", type);
874
875 return ajFalse;
876 }
877
878
879
880
881 /* @func ajFeattabinSetTypeS **************************************************
882 **
883 ** Sets the type for feature input
884 **
885 ** @param [u] thys [AjPFeattabin] Feature input object
886 ** @param [r] type [const AjPStr] Feature type "nucleotide" "protein"
887 ** @return [AjBool] ajTrue on success
888 **
889 ** @release 6.4.0
890 ** @@
891 ******************************************************************************/
892
ajFeattabinSetTypeS(AjPFeattabin thys,const AjPStr type)893 AjBool ajFeattabinSetTypeS(AjPFeattabin thys, const AjPStr type)
894 {
895 return ajFeattabinSetTypeC(thys, ajStrGetPtr(type));
896 }
897
898
899
900
901 /* @func ajFeattabinQryC ******************************************************
902 **
903 ** Resets a feature table input object using a new Universal
904 ** Query Address
905 **
906 ** @param [u] thys [AjPFeattabin] Feature table input object.
907 ** @param [r] txt [const char*] Query
908 ** @return [void]
909 **
910 ** @release 6.4.0
911 ** @@
912 ******************************************************************************/
913
ajFeattabinQryC(AjPFeattabin thys,const char * txt)914 void ajFeattabinQryC(AjPFeattabin thys, const char* txt)
915 {
916 ajFeattabinClear(thys);
917 ajStrAssignC(&thys->Input->Qry, txt);
918
919 return;
920 }
921
922
923
924
925
926 /* @func ajFeattabinQryS ******************************************************
927 **
928 ** Resets a feature table input object using a new Universal
929 ** Query Address
930 **
931 ** @param [u] thys [AjPFeattabin] Feature table input object.
932 ** @param [r] str [const AjPStr] Query
933 ** @return [void]
934 **
935 ** @release 6.4.0
936 ** @@
937 ******************************************************************************/
938
ajFeattabinQryS(AjPFeattabin thys,const AjPStr str)939 void ajFeattabinQryS(AjPFeattabin thys, const AjPStr str)
940 {
941 ajFeattabinClear(thys);
942 ajStrAssignS(&thys->Input->Qry, str);
943
944 return;
945 }
946
947
948
949
950 /* @func ajFeattabinRead ******************************************************
951 **
952 ** Generic interface function for reading in features from a file
953 ** given the file handle.
954 **
955 ** @param [u] feattabin [AjPFeattabin] Feature table input
956 ** @param [u] ftable [AjPFeattable] Feature table containing
957 ** the features read in
958 ** @return [AjBool] True on success
959 **
960 ** @release 6.4.0
961 ** @@
962 ******************************************************************************/
963
ajFeattabinRead(AjPFeattabin feattabin,AjPFeattable ftable)964 AjBool ajFeattabinRead(AjPFeattabin feattabin, AjPFeattable ftable)
965 {
966 AjBool ret = ajFalse;
967 FeatPListUfo node = NULL;
968 AjBool listdata = ajFalse;
969
970 if(feattabin->Input->Filebuff)
971 {
972 /* (a) if file still open, keep reading */
973 ajDebug("ajFeattabinRead: input file '%F' still there, try again\n",
974 feattabin->Input->Filebuff->File);
975 ret = feattabinRead(feattabin, ftable);
976 ajDebug("ajFeattabinRead: open buffer qry: '%S' returns: %B\n",
977 feattabin->Input->Qry, ret);
978 }
979 else
980 {
981 /* (b) if we have a list, try the next query in the list */
982 if(ajListGetLength(feattabin->Input->List))
983 {
984 listdata = ajTrue;
985 ajListPop(feattabin->Input->List, (void**) &node);
986
987 ajDebug("++pop from list '%S'\n", node->Ufo);
988 ajFeattabinQryS(feattabin, node->Ufo);
989 ajDebug("++SAVE FEATTABIN '%S' '%S' %d\n",
990 feattabin->Input->Qry,
991 feattabin->Input->Formatstr, feattabin->Input->Format);
992
993 feattabinQryRestore(feattabin, node);
994
995 ajStrDel(&node->Ufo);
996 ajStrDel(&node->Formatstr);
997 AJFREE(node);
998
999 ajDebug("ajFeattabinRead: open list, try '%S'\n",
1000 feattabin->Input->Qry);
1001
1002 if(!feattabinQryProcess(feattabin, ftable) &&
1003 !ajListGetLength(feattabin->Input->List))
1004 return ajFalse;
1005
1006 ret = feattabinRead(feattabin, ftable);
1007 ajDebug("ajFeattabinRead: list qry: '%S' returns: %B\n",
1008 feattabin->Input->Qry, ret);
1009 }
1010 else
1011 {
1012 ajDebug("ajFeattabinRead: no file yet - test query '%S'\n",
1013 feattabin->Input->Qry);
1014
1015 /* (c) Must be a query - decode it */
1016 if(!feattabinQryProcess(feattabin, ftable) &&
1017 !ajListGetLength(feattabin->Input->List))
1018 return ajFalse;
1019
1020 if(ajListGetLength(feattabin->Input->List)) /* could be new list */
1021 listdata = ajTrue;
1022
1023 ret = feattabinRead(feattabin, ftable);
1024 ajDebug("ajFeattabinRead: new qry: '%S' returns: %B\n",
1025 feattabin->Input->Qry, ret);
1026 }
1027 }
1028
1029 /* Now read whatever we got */
1030
1031 while(!ret && ajListGetLength(feattabin->Input->List))
1032 {
1033 /* Failed, but we have a list still - keep trying it */
1034 if(listdata)
1035 ajErr("Failed to read data data '%S'",
1036 feattabin->Input->Qry);
1037
1038 listdata = ajTrue;
1039 ajListPop(feattabin->Input->List,(void**) &node);
1040 ajDebug("++try again: pop from list '%S'\n", node->Ufo);
1041 ajFeattabinQryS(feattabin, node->Ufo);
1042 ajDebug("++SAVE (AGAIN) FEATTABIN '%S' '%S' %d\n",
1043 feattabin->Input->Qry,
1044 feattabin->Input->Formatstr, feattabin->Input->Format);
1045
1046 feattabinQryRestore(feattabin, node);
1047
1048 ajStrDel(&node->Ufo);
1049 ajStrDel(&node->Formatstr);
1050 AJFREE(node);
1051
1052 if(!feattabinQryProcess(feattabin, ftable))
1053 continue;
1054
1055 ret = feattabinRead(feattabin, ftable);
1056 ajDebug("ajFeattabinRead: list retry qry: '%S' returns: %B\n",
1057 feattabin->Input->Qry, ret);
1058 }
1059
1060 if(!ret)
1061 {
1062 if(listdata)
1063 ajErr("Failed to read data data '%S'",
1064 feattabin->Input->Qry);
1065
1066 return ajFalse;
1067 }
1068
1069
1070 feattableDefine(ftable, feattabin);
1071
1072 return ajTrue;
1073 }
1074
1075
1076
1077
1078 /* @funcstatic feattabinQueryMatch ********************************************
1079 **
1080 ** Compares a feature table item to a query and returns true if they match.
1081 **
1082 ** @param [r] thys [const AjPQuery] query.
1083 ** @param [r] ftable [const AjPFeattable] Feature table data.
1084 ** @return [AjBool] ajTrue if the data matches the query.
1085 **
1086 ** @release 6.4.0
1087 ** @@
1088 ******************************************************************************/
1089
feattabinQueryMatch(const AjPQuery thys,const AjPFeattable ftable)1090 static AjBool feattabinQueryMatch(const AjPQuery thys,
1091 const AjPFeattable ftable)
1092 {
1093 AjBool tested = ajFalse;
1094 AjIList iterfield = NULL;
1095 AjPQueryField field = NULL;
1096 AjBool ok = ajFalse;
1097
1098 ajDebug("feattabinQueryMatch '%S' fields: %Lu Case %B Done %B\n",
1099 ftable->Seqid, ajListGetLength(thys->QueryFields),
1100 thys->CaseId, thys->QryDone);
1101
1102 if(!thys) /* no query to test, that's fine */
1103 return ajTrue;
1104
1105 if(thys->QryDone) /* do we need to test here? */
1106 return ajTrue;
1107
1108 /* test the query field(s) */
1109
1110 iterfield = ajListIterNewread(thys->QueryFields);
1111 while(!ajListIterDone(iterfield))
1112 {
1113 field = ajListIterGet(iterfield);
1114
1115 ajDebug(" field: '%S' Query: '%S'\n",
1116 field->Field, field->Wildquery);
1117 if(ajStrMatchC(field->Field, "id"))
1118 {
1119 ajDebug(" id test: '%S'\n",
1120 ftable->Seqid);
1121 if(thys->CaseId)
1122 {
1123 if(ajStrMatchWildS(ftable->Seqid, field->Wildquery))
1124 {
1125 ajListIterDel(&iterfield);
1126 return ajTrue;
1127 }
1128 }
1129 else
1130 {
1131 if(ajStrMatchWildCaseS(ftable->Seqid, field->Wildquery))
1132 {
1133 ajListIterDel(&iterfield);
1134 return ajTrue;
1135 }
1136 }
1137
1138 ajDebug("id test failed\n");
1139 tested = ajTrue;
1140 ok = ajFalse;
1141 }
1142
1143 if(ajStrMatchC(field->Field, "acc")) /* test id, use trueid */
1144 {
1145 if(ajStrMatchWildCaseS(ftable->Seqid, field->Wildquery))
1146 {
1147 ajListIterDel(&iterfield);
1148 return ajTrue;
1149 }
1150 }
1151
1152 }
1153
1154 ajListIterDel(&iterfield);
1155
1156 if(!tested) /* nothing to test, so accept it anyway */
1157 {
1158 ajDebug(" no tests: assume OK\n");
1159 return ajTrue;
1160 }
1161
1162 ajDebug("result: %B\n", ok);
1163
1164 return ok;
1165 }
1166
1167
1168
1169
1170 /* @funcstatic feattableDefine ************************************************
1171 **
1172 ** Make sure all feature table object attributes are defined
1173 ** using values from the feature table input object if needed
1174 **
1175 ** @param [u] thys [AjPFeattable] feature table data returned.
1176 ** @param [u] feattabin [AjPFeattabin] Feature table input definitions
1177 ** @return [AjBool] ajTrue on success.
1178 **
1179 ** @release 6.4.0
1180 ** @@
1181 ******************************************************************************/
1182
feattableDefine(AjPFeattable thys,AjPFeattabin feattabin)1183 static AjBool feattableDefine(AjPFeattable thys, AjPFeattabin feattabin)
1184 {
1185
1186 /* if values are missing in the data object, we can use defaults
1187 from feattabin or calculate where possible */
1188
1189 /* assign the dbname if defined in the AjPFeattabin object */
1190 if(ajStrGetLen(feattabin->Input->Db))
1191 ajStrAssignS(&thys->Db, feattabin->Input->Db);
1192
1193 return ajTrue;
1194 }
1195
1196
1197
1198
1199
1200 /* @funcstatic feattabinReadFmt ***********************************************
1201 **
1202 ** Tests whether data data can be read using the specified format.
1203 ** Then tests whether the data data matches data data query criteria
1204 ** and checks any specified type. Applies upper and lower case.
1205 **
1206 ** @param [u] feattabin [AjPFeattabin] Feature table input object
1207 ** @param [w] ftable [AjPFeattable] Feature table data object
1208 ** @param [r] format [ajuint] input format code
1209 ** @return [ajuint] 0 if successful.
1210 ** 1 if the query match failed.
1211 ** 2 if the data data type failed
1212 ** 3 if it failed to read any data data
1213 **
1214 ** @release 6.4.0
1215 ** @@
1216 ** This is the only function that calls the appropriate Read function
1217 ** feattabinReadXxxxxx where Xxxxxxx is the supported data data format.
1218 **
1219 ** Some of the feattabinReadXxxxxx functions fail to reset the buffer correctly,
1220 ** which is a very serious problem when cycling through all of them to
1221 ** identify an unknown format. The extra ajFileBuffReset call at the end is
1222 ** intended to address this problem. The individual functions should still
1223 ** reset the buffer in case they are called from elsewhere.
1224 **
1225 ******************************************************************************/
1226
feattabinReadFmt(AjPFeattabin feattabin,AjPFeattable ftable,ajuint format)1227 static ajuint feattabinReadFmt(AjPFeattabin feattabin, AjPFeattable ftable,
1228 ajuint format)
1229 {
1230 ajDebug("++feattabinReadFmt format %d (%s) '%S'\n",
1231 format, featinformatDef[format].Name,
1232 feattabin->Input->Qry);
1233
1234 feattabin->Input->Records = 0;
1235
1236 if(!featinformatDef[format].Used)
1237 {
1238 if(featinformatDef[format].InitReg &&
1239 !(*featinformatDef[format].InitReg)())
1240 {
1241 ajDebug("Initialisation failed for %s\n",
1242 featinformatDef[format].Name);
1243 ajErr("Initialisation failed for feature format %s",
1244 featinformatDef[format].Name);
1245 }
1246 featinformatDef[format].Used = ajTrue;
1247 }
1248
1249 /* Calling funclist featinFormatDef() */
1250 if((*featinformatDef[format].Read)(feattabin, ftable))
1251 {
1252 ajDebug("feattabinReadFmt success with format %d (%s)\n",
1253 format, featinformatDef[format].Name);
1254 ajDebug("id: '%S'\n",
1255 ftable->Seqid);
1256 feattabin->Input->Format = format;
1257 ajStrAssignC(&feattabin->Input->Formatstr,
1258 featinformatDef[format].Name);
1259 ajStrAssignC(&ftable->Formatstr, featinformatDef[format].Name);
1260 ajStrAssignEmptyS(&ftable->Db, feattabin->Input->Db);
1261 ajStrAssignS(&ftable->Filename, feattabin->Input->Filename);
1262
1263 if(feattabinQueryMatch(feattabin->Input->Query, ftable))
1264 {
1265 /* ajFeattabinTrace(feattabin); */
1266
1267 return FMT_OK;
1268 }
1269
1270 ajDebug("query match failed, continuing ...\n");
1271 ajFeattableClear(ftable);
1272
1273 return FMT_NOMATCH;
1274 }
1275 else
1276 {
1277 ajDebug("Testing input buffer: IsBuff: %B Eof: %B\n",
1278 ajFilebuffIsBuffered(feattabin->Input->Filebuff),
1279 ajFilebuffIsEof(feattabin->Input->Filebuff));
1280
1281 if (!ajFilebuffIsBuffered(feattabin->Input->Filebuff) &&
1282 ajFilebuffIsEof(feattabin->Input->Filebuff))
1283 return FMT_EOF;
1284
1285 ajFilebuffReset(feattabin->Input->Filebuff);
1286 ajDebug("Format %d (%s) failed, file buffer reset by feattabinReadFmt\n",
1287 format, featinformatDef[format].Name);
1288 /* ajFilebuffTraceFull(feattabin->Filebuff, 10, 10);*/
1289 }
1290
1291 ajDebug("++feattabinReadFmt failed - nothing read\n");
1292
1293 return FMT_FAIL;
1294 }
1295
1296
1297
1298
1299 /* @funcstatic feattabinRead **************************************************
1300 **
1301 ** Given data in a feature table input structure,
1302 ** tries to read everything needed
1303 ** using the specified format or by trial and error.
1304 **
1305 ** @param [u] feattabin [AjPFeattabin] Feature table input object
1306 ** @param [w] ftable [AjPFeattable] Feature table object
1307 ** @return [AjBool] ajTrue on success
1308 **
1309 ** @release 6.4.0
1310 ** @@
1311 ******************************************************************************/
1312
feattabinRead(AjPFeattabin feattabin,AjPFeattable ftable)1313 static AjBool feattabinRead(AjPFeattabin feattabin, AjPFeattable ftable)
1314 {
1315 ajuint i;
1316 ajuint istat = 0;
1317 ajuint jstat = 0;
1318
1319 AjPFilebuff buff = feattabin->Input->Filebuff;
1320 /* AjBool regfile = ajFalse; */
1321 AjBool ok;
1322
1323 AjPTextAccess textaccess = feattabin->Input->Query->TextAccess;
1324 AjPFeattabAccess feattabaccess = feattabin->Input->Query->Access;
1325
1326 ajFeattableClear(ftable);
1327 ajDebug("feattabinRead: cleared\n");
1328
1329 if(feattabin->Input->Single && feattabin->Input->Count)
1330 {
1331 /*
1332 ** One data data item at a time is read.
1333 ** The first data data item was read by ACD
1334 ** for the following ones we need to reset the AjPFeattabin
1335 **
1336 ** Single is set by the access method
1337 */
1338
1339 ajDebug("feattabinRead: single access - count %d - call access"
1340 " routine again\n",
1341 feattabin->Input->Count);
1342 /* Calling funclist feattabinAccess() */
1343 if(textaccess)
1344 {
1345 if(!(*textaccess->Access)(feattabin->Input))
1346 {
1347 ajDebug("feattabinRead: (*textaccess->Access)(feattabin->Input) "
1348 "*failed*\n");
1349
1350 return ajFalse;
1351 }
1352 }
1353
1354 if(feattabaccess)
1355 {
1356 if(!(*feattabaccess->Access)(feattabin))
1357 {
1358 ajDebug("feattabinRead: (*feattabaccess->Access)(feattabin) "
1359 "*failed*\n");
1360
1361 return ajFalse;
1362 }
1363 }
1364
1365 buff = feattabin->Input->Filebuff;
1366 }
1367
1368 ajDebug("feattabinRead: feattabin format %d '%S'\n",
1369 feattabin->Input->Format,
1370 feattabin->Input->Formatstr);
1371
1372 feattabin->Input->Count++;
1373
1374 if(!feattabin->Input->Filebuff && !feattabin->Input->TextData)
1375 return ajFalse;
1376
1377 ok = ajFilebuffIsBuffered(feattabin->Input->Filebuff);
1378
1379 while(ok)
1380 { /* skip blank lines */
1381 ok = ajBuffreadLine(feattabin->Input->Filebuff, &featReadLine);
1382
1383 if(ok && !ajStrIsWhite(featReadLine))
1384 {
1385 ajFilebuffClear(feattabin->Input->Filebuff,1);
1386 break;
1387 }
1388 }
1389
1390 if(!feattabin->Input->Format)
1391 { /* no format specified, try all defaults */
1392 /*
1393 regfile = ajFileIsFile(ajFilebuffGetFile(feattabin->Input->Filebuff));
1394 */
1395 for(i = 1; featinformatDef[i].Name; i++)
1396 {
1397 if(!featinformatDef[i].Try) /* skip if Try is ajFalse */
1398 continue;
1399
1400 if(!featinformatDef[i].Used)
1401 {
1402 if(featinformatDef[i].InitReg &&
1403 !(*featinformatDef[i].InitReg)())
1404 {
1405 ajDebug("Initialisation failed for %s\n",
1406 featinformatDef[i].Name);
1407 ajErr("Initialisation failed for feature format %s",
1408 featinformatDef[i].Name);
1409 }
1410 featinformatDef[i].Used = ajTrue;
1411 }
1412
1413 ajDebug("feattabinRead:try format %d (%s)\n",
1414 i, featinformatDef[i].Name);
1415
1416 istat = feattabinReadFmt(feattabin, ftable, i);
1417
1418 switch(istat)
1419 {
1420 case FMT_OK:
1421 ajDebug("++feattabinRead OK, set format %d\n",
1422 feattabin->Input->Format);
1423 feattableDefine(ftable, feattabin);
1424
1425 return ajTrue;
1426 case FMT_BADTYPE:
1427 ajDebug("feattabinRead: (a1) "
1428 "feattabinReadFmt stat == BADTYPE *failed*\n");
1429
1430 return ajFalse;
1431 case FMT_FAIL:
1432 ajDebug("feattabinRead: (b1) "
1433 "feattabinReadFmt stat == FAIL *failed*\n");
1434 break; /* we can try next format */
1435 case FMT_NOMATCH:
1436 ajDebug("feattabinRead: (c1) "
1437 "feattabinReadFmt stat==NOMATCH try again\n");
1438 break;
1439 case FMT_EOF:
1440 ajDebug("feattabinRead: (d1) "
1441 "feattabinReadFmt stat == EOF *failed*\n");
1442 return ajFalse; /* EOF and unbuffered */
1443 case FMT_EMPTY:
1444 ajWarn("data data '%S' has zero length, ignored",
1445 ajFeattableGetQryS(ftable));
1446 ajDebug("feattabinRead: (e1) "
1447 "feattabinReadFmt stat==EMPTY try again\n");
1448 break;
1449 default:
1450 ajDebug("unknown code %d from feattabinReadFmt\n", stat);
1451 }
1452
1453 ajFeattableClear(ftable);
1454
1455 if(feattabin->Input->Format)
1456 break; /* we read something */
1457
1458 ajFilebuffTrace(feattabin->Input->Filebuff);
1459 }
1460
1461 if(!feattabin->Input->Format)
1462 { /* all default formats failed, give up */
1463 ajDebug("feattabinRead:all default formats failed, give up\n");
1464
1465 return ajFalse;
1466 }
1467
1468 ajDebug("++feattabinRead set format %d\n",
1469 feattabin->Input->Format);
1470 }
1471 else
1472 { /* one format specified */
1473 ajDebug("feattabinRead: one format specified\n");
1474 ajFilebuffSetUnbuffered(feattabin->Input->Filebuff);
1475
1476 ajDebug("++feattabinRead known format %d\n",
1477 feattabin->Input->Format);
1478 istat = feattabinReadFmt(feattabin, ftable, feattabin->Input->Format);
1479
1480 switch(istat)
1481 {
1482 case FMT_OK:
1483 feattableDefine(ftable, feattabin);
1484
1485 return ajTrue;
1486 case FMT_BADTYPE:
1487 ajDebug("feattabinRead: (a2) "
1488 "feattabinReadFmt stat == BADTYPE *failed*\n");
1489
1490 return ajFalse;
1491
1492 case FMT_FAIL:
1493 ajDebug("feattabinRead: (b2) "
1494 "feattabinReadFmt stat == FAIL *failed*\n");
1495
1496 return ajFalse;
1497
1498 case FMT_NOMATCH:
1499 ajDebug("feattabinRead: (c2) "
1500 "feattabinReadFmt stat == NOMATCH *try again*\n");
1501 break;
1502 case FMT_EOF:
1503 ajDebug("feattabinRead: (d2) "
1504 "feattabinReadFmt stat == EOF *try again*\n");
1505 if(feattabin->Input->Records)
1506 ajErr("Error reading file '%F' with format '%s': "
1507 "end-of-file before end of data "
1508 "(read %u records)",
1509 ajFilebuffGetFile(feattabin->Input->Filebuff),
1510 featinformatDef[feattabin->Input->Format].Name,
1511 feattabin->Input->Records);
1512 break; /* simply end-of-file */
1513 case FMT_EMPTY:
1514 ajWarn("feature table data '%S' has zero length, ignored",
1515 ajFeattableGetQryS(ftable));
1516 ajDebug("feattabinRead: (e2) "
1517 "feattabinReadFmt stat == EMPTY *try again*\n");
1518 break;
1519 default:
1520 ajDebug("unknown code %d from feattabinReadFmt\n", stat);
1521 }
1522
1523 ajFeattableClear(ftable); /* 1 : read, failed to match id/acc/query */
1524 }
1525
1526 /* failed - probably entry/accession query failed. Can we try again? */
1527
1528 ajDebug("feattabinRead failed - try again with format %d '%s' code %d\n",
1529 feattabin->Input->Format,
1530 featinformatDef[feattabin->Input->Format].Name, istat);
1531
1532 ajDebug("Search:%B Chunk:%B Data:%x ajFileBuffEmpty:%B\n",
1533 feattabin->Input->Search, feattabin->Input->ChunkEntries,
1534 feattabin->Input->TextData, ajFilebuffIsEmpty(buff));
1535
1536 if(ajFilebuffIsEmpty(buff) && feattabin->Input->ChunkEntries)
1537 {
1538 if(textaccess && !(*textaccess->Access)(feattabin->Input))
1539 return ajFalse;
1540 else if(feattabaccess && !(*feattabaccess->Access)(feattabin))
1541 return ajFalse;
1542 buff = feattabin->Input->Filebuff;
1543 }
1544
1545
1546 /* need to check end-of-file to avoid repeats */
1547 while(feattabin->Input->Search &&
1548 (feattabin->Input->TextData || !ajFilebuffIsEmpty(buff)))
1549 {
1550 jstat = feattabinReadFmt(feattabin, ftable, feattabin->Input->Format);
1551
1552 switch(jstat)
1553 {
1554 case FMT_OK:
1555 feattableDefine(ftable, feattabin);
1556
1557 return ajTrue;
1558
1559 case FMT_BADTYPE:
1560 ajDebug("feattabinRead: (a3) "
1561 "feattabinReadFmt stat == BADTYPE *failed*\n");
1562
1563 return ajFalse;
1564
1565 case FMT_FAIL:
1566 ajDebug("feattabinRead: (b3) "
1567 "feattabinReadFmt stat == FAIL *failed*\n");
1568
1569 return ajFalse;
1570
1571 case FMT_NOMATCH:
1572 ajDebug("feattabinRead: (c3) "
1573 "feattabinReadFmt stat == NOMATCH *try again*\n");
1574 break;
1575 case FMT_EOF:
1576 ajDebug("feattabinRead: (d3) "
1577 "feattabinReadFmt stat == EOF *failed*\n");
1578
1579 return ajFalse; /* we already tried again */
1580
1581 case FMT_EMPTY:
1582 if(istat != FMT_EMPTY)
1583 ajWarn("assmebly data '%S' has zero length, ignored",
1584 ajFeattableGetQryS(ftable));
1585 ajDebug("feattabinRead: (e3) "
1586 "feattabinReadFmt stat == EMPTY *try again*\n");
1587 break;
1588
1589 default:
1590 ajDebug("unknown code %d from feattabinReadFmt\n", stat);
1591 }
1592
1593 ajFeattableClear(ftable); /* 1 : read, failed to match id/acc/query */
1594 }
1595
1596 if(feattabin->Input->Format)
1597 ajDebug("feattabinRead: *failed* to read data data %S "
1598 "using format %s\n",
1599 feattabin->Input->Qry,
1600 featinformatDef[feattabin->Input->Format].Name);
1601 else
1602 ajDebug("feattabinRead: *failed* to read data data %S "
1603 "using any format\n",
1604 feattabin->Input->Qry);
1605
1606 return ajFalse;
1607 }
1608
1609
1610
1611
1612 /* @func ajFeattableNewRead ***************************************************
1613 **
1614 ** Generic interface function for reading in features from a file
1615 ** given the file handle.
1616 **
1617 ** @param [u] ftin [AjPFeattabin] Specifies the external source (file)
1618 ** of the features to be read in
1619 ** @return [AjPFeattable] Pointer to a new feature table containing
1620 ** the features read in
1621 ** @category new [AjPFeattable] Reads in a feature set in a specified format
1622 **
1623 ** @release 6.2.0
1624 ** @@
1625 ******************************************************************************/
1626
ajFeattableNewRead(AjPFeattabin ftin)1627 AjPFeattable ajFeattableNewRead(AjPFeattabin ftin)
1628 {
1629 AjPTextin textin = ftin->Input;
1630 AjPFilebuff file;
1631 ajint format;
1632 AjBool ok = ajTrue;
1633
1634 AjPFeattable features = NULL;
1635 AjBool result = ajFalse;
1636
1637 if(!ftin)
1638 return NULL;
1639
1640 textin = ftin->Input;
1641 file = textin->Filebuff;
1642
1643 if(!file)
1644 return NULL;
1645
1646 format = ftin->Input->Format;
1647
1648 if(!format)
1649 return NULL;
1650
1651 ajDebug("ajFeattableNewRead format %d '%s' file %x type: '%S'\n",
1652 format, featinformatDef[format].Name, file, ftin->Type);
1653
1654 if(!featinformatDef[format].Used)
1655 {
1656 /* Calling funclist featinformatDef() */
1657 if(featinformatDef[format].InitReg &&
1658 !(*featinformatDef[format].InitReg)())
1659 {
1660 ajDebug("Initialisation failed for %s\n",
1661 featinformatDef[format].Name);
1662 ajErr("Initialisation failed for feature format %s",
1663 featinformatDef[format].Name);
1664 }
1665
1666 featinformatDef[format].Used = ajTrue;
1667 }
1668
1669 features = ajFeattableNew(ftin->Seqname);
1670 features->Start = ftin->Start;
1671 features->End = ftin->End;
1672 features->Rev = ftin->Rev;
1673
1674 while(ok)
1675 { /* skip blank lines */
1676 ok = ajBuffreadLine(textin->Filebuff, &featReadLine);
1677
1678 if(ok && !ajStrIsWhite(featReadLine))
1679 {
1680 ajFilebuffClear(textin->Filebuff,1);
1681 break;
1682 }
1683 }
1684
1685 /* Calling funclist featinformatDef() */
1686 result = (*featinformatDef[format].Read)(ftin, features);
1687
1688 if(result)
1689 {
1690 if(ftin->Circular)
1691 features->Circular = ajTrue;
1692
1693 /* ajFeattableTrace(features); */
1694 ajDebug("ajFeattableNewRead read %d features\n",
1695 ajFeattableGetSize(features));
1696
1697 return features;
1698 }
1699 else
1700 {
1701 ajDebug("ajFeattableNewRead failed, read %d features\n",
1702 ajFeattableGetSize(features));
1703 ajFeattableDel(&(features));
1704 }
1705
1706 return NULL;
1707 }
1708
1709
1710
1711
1712 /* @func ajFeattableNewReadUfo ************************************************
1713 **
1714 ** Parses a UFO, opens an input file, and reads a feature table
1715 **
1716 ** @param [u] featin [AjPFeattabin] Feature input object
1717 ** @param [r] ufo [const AjPStr] UFO feature spec
1718 ** @return [AjPFeattable] Feature table created, (or NULL if failed)
1719 ** @category new [AjPFeattable] Parses a UFO, opens an input file,
1720 ** and reads a feature table
1721 **
1722 ** @release 6.2.0
1723 ** @@
1724 ******************************************************************************/
1725
ajFeattableNewReadUfo(AjPFeattabin featin,const AjPStr ufo)1726 AjPFeattable ajFeattableNewReadUfo(AjPFeattabin featin,
1727 const AjPStr ufo)
1728 {
1729 AjPFeattable ret = NULL;
1730
1731 AjBool fmtstat = ajFalse; /* status returns from regex tests */
1732 AjBool filstat = ajFalse; /* status returns from regex tests */
1733 ajint i;
1734
1735 ajDebug("ajFeattableNewReadUfo ufo: '%S' filebuff: %x\n",
1736 ufo, featin->Input->Filebuff);
1737
1738 if(!featinRegUfoFmt)
1739 featinRegUfoFmt = ajRegCompC("^([A-Za-z0-9][A-Za-z0-9]+):+(.*)$");
1740 /* \1 format */
1741 /* \2 remainder */
1742
1743 if(!featinRegUfoFile)
1744 featinRegUfoFile = ajRegCompC("^(([A-Za-z]:)?[^:]+)$");
1745
1746 /*ajDebug("ajFeattableNewReadUfo UFO '%S'\n", ufo);*/
1747
1748 ajStrAssignS(&featinUfoTest, ufo);
1749
1750 if(ajStrGetLen(ufo))
1751 {
1752 fmtstat = ajRegExec(featinRegUfoFmt, featinUfoTest);
1753 /*ajDebug("feat format regexp: %B\n", fmtstat);*/
1754 }
1755
1756 if(fmtstat)
1757 {
1758 ajRegSubI(featinRegUfoFmt, 1, &featin->Formatstr);
1759 ajStrAssignEmptyC(&featin->Formatstr,
1760 featinformatDef[0].Name); /* unknown */
1761 ajRegSubI(featinRegUfoFmt, 2, &featinUfoTest); /* trim off the format */
1762 /*ajDebug("found feat format %S\n", featin->Formatstr);*/
1763
1764 if(!featFindInformatS(featin->Formatstr, &featin->Input->Format))
1765 ajErr("unknown input feature table format '%S'\n"
1766 "NO Features will be read in", featin->Formatstr);
1767 }
1768 else
1769 {
1770 /*ajDebug("no format specified in UFO");*/
1771 }
1772
1773 featFormatSet(featin);
1774
1775 filstat = ajRegExec(featinRegUfoFile, featinUfoTest);
1776 /*ajDebug("featinRegUfoFile: %B\n", filstat);*/
1777
1778 if(filstat)
1779 ajRegSubI(featinRegUfoFile, 1, &featin->Filename);
1780 else
1781 {
1782 if(ajStrGetLen(featin->Seqname) && ajStrGetLen(featin->Formatstr))
1783 {
1784 ajFmtPrintS(&featinUfoTest, "%S.%S",
1785 featin->Seqname, featin->Formatstr);
1786 ajStrAssignEmptyS(&featin->Filename, featinUfoTest);
1787 /*ajDebug("generate filename '%S'\n", featin->Filename);*/
1788 }
1789 else
1790 {
1791 /*ajDebug("unable to generate filename "
1792 "Featin Seqname '%S' Formatstr '%S'\n",
1793 featin->Seqname, featin->Formatstr);*/
1794 return NULL;
1795 }
1796 }
1797
1798 /* Open the file so that we can try to read it */
1799
1800 /* ajDebug("trying to open '%S'\n", featin->Filename);*/
1801 ajFilebuffDel(&featin->Input->Filebuff);
1802 featin->Input->Filebuff = ajFilebuffNewNameS(featin->Filename);
1803
1804 if(!featin->Input->Filebuff)
1805 return NULL;
1806 /*ajDebug("after opening '%S'\n", featin->Filename);*/
1807
1808
1809 /* OKAY if we have a format specified try this and this ONLY */
1810 if(featin->Input->Format)
1811 ret = ajFeattableNewRead(featin);
1812 /* else loop through all types and try to read features */
1813 else
1814 for(i=1;featinformatDef[i].Name;i++)
1815 {
1816 featin->Input->Format = i;
1817
1818 ret = ajFeattableNewRead(featin);
1819
1820 if(ret)
1821 break;
1822
1823 /* Reset buffer to start */
1824 ajFilebuffReset(featin->Input->Filebuff);
1825
1826 }
1827
1828 ajFilebuffDel(&featin->Input->Filebuff);
1829
1830 return ret;
1831 }
1832
1833
1834
1835
1836 /* @funcstatic featFormatSet **************************************************
1837 **
1838 ** Sets the input format for a feature table using the feature input
1839 ** object's defined format.
1840 **
1841 ** @param [u] featin [AjPFeattabin] Feature table input.
1842 ** @return [AjBool] ajTrue on success.
1843 **
1844 ** @release 2.0.0
1845 ** @@
1846 ******************************************************************************/
1847
featFormatSet(AjPFeattabin featin)1848 static AjBool featFormatSet(AjPFeattabin featin)
1849 {
1850 if(ajStrGetLen(featin->Formatstr))
1851 {
1852 /*ajDebug("... input format value '%S'\n", featin->Formatstr);*/
1853 if(featFindInformatS(featin->Formatstr, &featin->Input->Format))
1854 {
1855 /* we may need to set feature table format too? */
1856
1857 /*
1858 (void) ajStrAssignS(&thys->Formatstr, featin->Formatstr);
1859 thys->Format = featin->Format;
1860 ajDebug("...format OK '%S' = %d\n", featin->Formatstr,
1861 featin->Format);
1862 */
1863 }
1864 else
1865 {
1866 /*ajDebug("...format unknown '%S'\n", featin->Formatstr);*/
1867 }
1868
1869 return ajTrue;
1870 }
1871 else
1872 {
1873 /*ajDebug("...input format not set\n");*/
1874 }
1875
1876
1877 return ajFalse;
1878 }
1879
1880
1881
1882
1883 /* @funcstatic featReadChado **************************************************
1884 **
1885 ** Copies features attached to fttabin object to fttab object.
1886 **
1887 ** @param [u] fttabin [AjPFeattabIn] Feature table input
1888 ** @param [u] fttab [AjPFeattable] Feature table
1889 ** @return [AjBool] ajTrue on success
1890 **
1891 ** @release 6.4.0
1892 ** @@
1893 ******************************************************************************/
1894
featReadChado(AjPFeattabIn fttabin,AjPFeattable fttab)1895 static AjBool featReadChado(AjPFeattabIn fttabin, AjPFeattable fttab)
1896 {
1897
1898 if(fttabin->Input->TextData && fttabin->Input->Records==0)
1899 {
1900 AjPFeattable ft = fttabin->Input->TextData;
1901
1902 ajFeattableSetDefname(fttab, ft->Seqid);
1903
1904 ajListFree(&fttab->Features);
1905 fttab->Features = ft->Features;
1906 ft->Features = NULL;
1907 fttab->Start = ft->Start;
1908 fttab->End = ft->End;
1909 fttab->Len = ft->Len;
1910
1911 ajFeattableDel(&ft);
1912
1913 return ajTrue;
1914 }
1915
1916 return ajFalse;
1917 }
1918
1919
1920
1921
1922 /* @funcstatic featReadDasgff *************************************************
1923 **
1924 ** Reads feature data in DAS 1.5/1.6 features format (XML).
1925 **
1926 ** @param [u] feattabin [AjPFeattabin] Feature table input
1927 ** @param [u] ftable [AjPFeattable] Feature table
1928 ** @return [AjBool] ajTrue on success
1929 **
1930 ** @release 6.4.0
1931 ** @@
1932 ******************************************************************************/
1933
featReadDasgff(AjPFeattabin feattabin,AjPFeattable ftable)1934 static AjBool featReadDasgff(AjPFeattabin feattabin, AjPFeattable ftable)
1935 {
1936 AjPDomDocument doc = NULL;
1937 AjPDomNodeList features = NULL;
1938 AjPDomNodeList segments = NULL;
1939 AjPDomNodeList notes = NULL;
1940 AjPDomNodeList links = NULL;
1941 AjPDomNodeList targets = NULL;
1942 AjPDomNodeList parents = NULL;
1943 AjPDomNodeList parts = NULL;
1944 AjPDomNodeList groups = NULL;
1945
1946 AjPDomNode dasfeature = NULL;
1947 AjPDomNode segment = NULL;
1948
1949 AjPDomElement e = NULL;
1950 AjPFeature feature = NULL;
1951 AjPStr attval = NULL;
1952 AjPStr elmtxt = NULL;
1953 AjPStr type = NULL;
1954 AjPStr label = NULL;
1955
1956 AjBool ret = AJTRUE;
1957
1958 ajint i;
1959 ajint j;
1960 ajuint start;
1961 ajuint end;
1962 AjPFilebuff inf = feattabin->Input->Filebuff;
1963
1964 doc = ajDomImplementationCreateDocument(NULL,NULL,NULL);
1965
1966 if (ajDomReadFilebuff(doc,inf) == -1)
1967 {
1968 ajDomDocumentDestroyNode(doc,&doc);
1969 return AJFALSE;
1970 }
1971
1972
1973 /*
1974 * <SEGMENT> (required; one or more)
1975 * provides information on the reference segment queried.
1976 * The id attribute is required.
1977 * The version attribute (optional) indicates the version of the reference
1978 * object being annotated,
1979 * used for coordinate systems which are not themselves versioned.
1980 *
1981 * The optional label attribute provides a human readable label
1982 * for display purposes.
1983 *
1984 * TODO: here we assume one segment, spec suggests it can be more than one
1985 */
1986
1987 segments = ajDomDocumentGetElementsByTagNameC(doc, "SEGMENT");
1988
1989 if(segments==NULL || ajDomNodeListGetLen(segments) == 0)
1990 {
1991 ajDomDocumentDestroyNodeList(doc,&segments,AJDOMKEEP);
1992 ajDomDocumentDestroyNode(doc,&doc);
1993 ajDebug("featReadDasgff: no <SEGMENT> tag found");
1994 return AJFALSE;
1995 }
1996
1997 segment = ajDomNodeListItem(segments, 0);
1998
1999 attval = ajDomElementGetAttributeC(segment,"id");
2000
2001 if(ajStrGetLen(attval) == 0)
2002 {
2003 ajStrDel(&attval);
2004 ajDomDocumentDestroyNodeList(doc,&segments,AJDOMKEEP);
2005 ajDomDocumentDestroyNode(doc,&doc);
2006 return AJFALSE;
2007 }
2008
2009 ajStrAssignS(&ftable->Seqid,attval);
2010 ajStrDel(&attval);
2011
2012 attval = ajDomElementGetAttributeC(segment,"start");
2013 ajStrToUint(attval, &ftable->Start);
2014 ftable->Start--;
2015 ajStrDel(&attval);
2016
2017 attval = ajDomElementGetAttributeC(segment,"stop");
2018 ajStrToUint(attval, &ftable->End);
2019 ftable->End--;
2020 ajStrDel(&attval);
2021
2022 ftable->Len = ftable->End - ftable->Start;
2023
2024 features = ajDomDocumentGetElementsByTagNameC(doc, "FEATURE");
2025
2026 if(features)
2027 {
2028
2029 for (i = 0; i < ajDomNodeListGetLen(features); i++)
2030 {
2031 dasfeature = ajDomNodeListItem(features, i);
2032
2033
2034 /*
2035 * <START>, <END> (optional; one apiece per feature)
2036 *
2037 * indicates the start and end of the feature in the coordinate
2038 * system of the reference object given in the <SEGMENT> tag.
2039 * If one element is present, the other must be also.
2040 * If start and end are zero, or omitted entirely, it is assumed
2041 * that the feature is an annotation of the reference object
2042 * as a whole rather than a region of sequence.
2043 */
2044
2045 e = ajDomElementGetFirstChildByTagNameC(doc, dasfeature, "START");
2046 elmtxt = ajDomElementGetText(e);
2047 ajStrToUint(elmtxt, &start);
2048 ajDebug(" start = %S\n",elmtxt);
2049
2050 e = ajDomElementGetFirstChildByTagNameC(doc, dasfeature, "END");
2051 elmtxt = ajDomElementGetText(e);
2052 ajDebug(" end = %S\n",elmtxt);
2053 ajStrToUint(elmtxt, &end);
2054
2055 feature = ajFeatNewII(ftable, start, end);
2056
2057
2058 /*
2059 * The id attribute (required) is an identifier for the feature.
2060 * it must be unique to the feature across the data source.
2061 *
2062 * The label attribute (optional) is a suggested label to display
2063 * for the feature. If not present, it is assumed the id attribute
2064 * is suitable for display.
2065 */
2066
2067 attval = ajDomElementGetAttributeC(dasfeature, "id");
2068
2069 if(ajStrGetLen(attval) == 0)
2070 {
2071 ret = AJFALSE;
2072 break;
2073 }
2074
2075 ajDebug(" id = %S\n",attval);
2076 ajFeatGfftagAddCS(feature, "ID", attval);
2077 ajStrDel(&attval);
2078
2079
2080 /* <TYPE> (required; one per feature)
2081 * indicates the type of the annotation.
2082 * attributes are id (required), which is a unique ID
2083 * for the annotation type,
2084 * category (optional, recommended), which provides functional
2085 * grouping to related types, and cvId (optional, recommended)
2086 * which is the ID of a term from a relevant controlled vocabulary
2087 *
2088 * gff3: type of the feature (previously called the "method").
2089 * This is constrained to be either:
2090 * (a) a term from the "lite" sequence ontology, SOFA; or
2091 * (b) a SOFA accession number.
2092 * The latter alternative is distinguished using the syntax
2093 * SO:000000. This field is required.
2094 */
2095
2096 e = ajDomElementGetFirstChildByTagNameC(doc,dasfeature,"TYPE");
2097
2098 if(e == NULL)
2099 {
2100 ret = AJFALSE;
2101 break;
2102 }
2103
2104 attval = ajDomElementGetAttributeC(e, "id");
2105
2106 if(ajStrGetLen(attval) == 0)
2107 {
2108 ajStrDel(&attval);
2109 ajDebug("featReadDasgff: required 'id' attribute of annotation"
2110 " type <TYPE> is empty\n");
2111 /* normally parsing should stop here
2112 * but many DAS sources are missing this requirement */
2113 /* ret = AJFALSE; break; */
2114 }
2115
2116 ajDebug(" type - id = %S\n",attval);
2117 ajStrAssignS(&feature->Type, attval);
2118 ajStrDel(&attval);
2119 attval = ajDomElementGetAttributeC(e, "cvId");
2120
2121 if(ajStrGetLen(attval)>0)
2122 ajFeatGfftagAddCS(feature,"Ontology_term",attval);
2123
2124 ajStrDel(&attval);
2125 attval = ajDomElementGetAttributeC(e, "category");
2126
2127 if(ajStrGetLen(attval)>0)
2128 ajFeatGfftagAddCS(feature,"category",attval);
2129
2130 ajStrDel(&attval);
2131
2132
2133 /*
2134 * <METHOD> - "source" on gff
2135 *
2136 * method used to identify the feature.
2137 * The id attribute is required.
2138 * The cvId (optional, recommended) attribute is an ontology term
2139 * ID from the Evidence Codes Ontology, and as such is a generic
2140 * (potentially less specific) representation of the method.
2141 *
2142 */
2143
2144 e = ajDomElementGetFirstChildByTagNameC(doc,dasfeature,"METHOD");
2145
2146 if(e == NULL)
2147 {
2148 ret = AJFALSE;
2149 break;
2150 }
2151
2152 attval = ajDomElementGetAttributeC(e, "id");
2153 ajDebug(" method id = %S\n",attval);
2154 ajStrAssignRef(&feature->Source, attval);
2155 ajStrDel(&attval);
2156 attval = ajDomElementGetAttributeC(e, "cvId");
2157
2158 if(ajStrGetLen(attval) > 0)
2159 ajFeatSetSource(feature, attval);
2160
2161 ajStrDel(&attval);
2162
2163
2164 /*
2165 * <SCORE> (optional; one per feature)
2166 *
2167 * integer or floating point number indicating the "score" of
2168 * the method used to find the current feature.
2169 * If this field is inapplicable, the contents of the tag can be
2170 * replaced with a - symbol.
2171 * This is the assumed value if the tag is omitted entirely.
2172 */
2173
2174 e = ajDomElementGetFirstChildByTagNameC(doc,dasfeature,"SCORE");
2175
2176 if(e!=NULL)
2177 {
2178 elmtxt = ajDomElementGetText(e);
2179 ajDebug(" score = %S\n",elmtxt);
2180 ajStrToFloat(elmtxt, &feature->Score);
2181 }
2182
2183
2184 /*
2185 * <ORIENTATION> (optional; one per feature)
2186 *
2187 * orientation of the feature relative to the direction of
2188 * transcription. It may be 0 for features that are unrelated to
2189 * transcription, +, for features that are on the sense strand,
2190 * and -, for features on the antisense strand.
2191 * If this tag is omitted, a value of 0 is assumed.
2192 */
2193
2194 e = ajDomElementGetFirstChildByTagNameC(doc,dasfeature,
2195 "ORIENTATION");
2196 if(e != NULL)
2197 {
2198 elmtxt = ajDomElementGetText(e);
2199 ajDebug(" strand = %S\n",elmtxt);
2200 feature->Strand = ajStrGetCharFirst(elmtxt);
2201 }
2202
2203
2204 /*
2205 * <PHASE> (optional; one per feature)
2206 *
2207 * position of the feature relative to open reading frame, if any.
2208 * It may be one of the integers 0, 1 or 2, corresponding to
2209 * each of the three reading frames,
2210 * or - if the feature is unrelated to a reading frame.
2211 * If this tag is omitted, a value of - is assumed.
2212 */
2213
2214 e = ajDomElementGetFirstChildByTagNameC(doc, dasfeature, "PHASE");
2215
2216 if(e != NULL)
2217 {
2218 elmtxt = ajDomElementGetText(e);
2219 ajDebug(" phase = %S\n",elmtxt);
2220
2221 /* TODO: new integer value to represent features
2222 * unrelated to a reading frame */
2223 if ( ajStrGetCharFirst(elmtxt) == '-' )
2224 feature->Frame = 0;
2225
2226 ajStrToInt(elmtxt, &feature->Frame);
2227 }
2228
2229
2230 /*
2231 * <NOTE> (optional; zero or more per feature)
2232 *
2233 * A human-readable note in plain text format only
2234 *
2235 */
2236
2237 notes = ajDomElementGetElementsByTagNameC(dasfeature,"NOTE");
2238
2239 if (notes!=NULL)
2240 {
2241 for(j=0; j<ajDomNodeListGetLen(notes); j++)
2242 {
2243 e = ajDomNodeListItem(notes,j);
2244 elmtxt = ajDomElementGetText(e);
2245
2246 /* the following check covers empty nodes: <NOTE/> */
2247 if(elmtxt != NULL)
2248 ajFeatGfftagAddCS(feature, "Note", elmtxt);
2249 }
2250
2251 ajDomDocumentDestroyNodeList(doc,¬es,AJDOMKEEP);
2252 }
2253
2254
2255 /*
2256 * <LINK> (optional; zero or more per feature)
2257 *
2258 * link to a web page somewhere that provides more information
2259 * about this feature.
2260 * element text is an optional human readable label
2261 * for display purposes.
2262 *
2263 */
2264
2265 links = ajDomElementGetElementsByTagNameC(dasfeature,"LINK");
2266
2267 if (links!=NULL)
2268 {
2269 for(j=0; j<ajDomNodeListGetLen(links); j++)
2270 {
2271 e = ajDomNodeListItem(links,j);
2272 attval = ajDomElementGetAttributeC(e, "href");
2273 ajDebug(" link - href = %S\n",attval);
2274 ajFeatGfftagAddCS(feature, "dasgff_link", attval);
2275 ajStrDel(&attval);
2276 }
2277
2278 ajDomDocumentDestroyNodeList(doc,&links,AJDOMKEEP);
2279 }
2280
2281
2282 /*
2283 * <TARGET> (optional; zero or more per feature)
2284 *
2285 * target sequence in a sequence similarity match.
2286 * The id attribute provides
2287 * the reference ID for the target sequence,
2288 * and the start and stop attributes indicate the segment
2289 * that matched across the target sequence.
2290 * All three attributes are required.
2291 * content of the tag (optional) is a human readable label.
2292 *
2293 * gff mapping: attributes column - Target tag
2294 */
2295
2296 targets = ajDomElementGetElementsByTagNameC(dasfeature,"TARGET");
2297
2298 if(targets!=NULL)
2299 {
2300 for(j=0;j<ajDomNodeListGetLen(targets);j++)
2301 {
2302 e = ajDomNodeListItem(targets,j);
2303
2304 attval = ajDomElementGetAttributeC(e, "start");
2305 ajStrToUint(attval, &start);
2306 ajStrDel(&attval);
2307 attval = ajDomElementGetAttributeC(e, "stop");
2308 ajStrToUint(attval, &end);
2309 ajStrDel(&attval);
2310
2311 attval = ajDomElementGetAttributeC(e, "id");
2312 ajDebug(" target id = %S\n",attval);
2313
2314 if(end)
2315 ajFmtPrintAppS(&attval, " %d %d", start, end);
2316
2317 ajFeatGfftagAddCS(feature,"Target",attval);
2318
2319 ajStrDel(&attval);
2320 }
2321
2322 ajDomDocumentDestroyNodeList(doc,&targets,AJDOMKEEP);
2323 }
2324
2325
2326 /*
2327 * <PARENT>, <PART> (optional; zero or more per feature)
2328 *
2329 * A replacement for the <GROUP> tag,
2330 * these tags identify other features that are parents or children
2331 * of this feature within a hierarchy.
2332 * Each has a single required attribute, id,
2333 * which refers to a separate <FEATURE> tag.
2334 * This mechanism means a parent or child feature need
2335 * only be defined once and may be referred to multiple times.
2336 * This is preferable to the use of <GROUP> tags,
2337 * where a parent must be defined separately for every child.
2338 * It also allows more than two levels of hierarchy to be defined
2339 * (e.g. a gene has parts - transcripts, and transcripts have parts
2340 * - exons).
2341 * In addition, parent features may have start/end positions.
2342 *
2343 */
2344
2345 groups = ajDomElementGetElementsByTagNameC(dasfeature,"GROUP");
2346
2347 if (groups!=NULL)
2348 {
2349 for(j=0; j<ajDomNodeListGetLen(groups); j++)
2350 {
2351 e = ajDomNodeListItem(groups,j);
2352 attval = ajDomElementGetAttributeC(e, "id");
2353
2354 if(ajStrGetLen(attval) == 0)
2355 {
2356 ret = AJFALSE;
2357 ajStrDel(&attval);
2358 break;
2359 }
2360
2361 type = ajDomElementGetAttributeC(e, "type");
2362
2363 if(ajStrGetLen(type))
2364 ajFmtPrintAppS(&attval," type=%S", type);
2365
2366 label = ajDomElementGetAttributeC(e, "label");
2367
2368 if(ajStrGetLen(label))
2369 ajFmtPrintAppS(&attval," label=%S", label);
2370
2371 ajDebug(" group - id = %S\n",attval);
2372 ajFeatGfftagAddCS(feature,"group",attval);
2373
2374 ajStrDel(&type);
2375 ajStrDel(&label);
2376 ajStrDel(&attval);
2377 }
2378
2379 ajDomDocumentDestroyNodeList(doc,&groups,AJDOMKEEP);
2380 }
2381
2382
2383 parents = ajDomElementGetElementsByTagNameC(dasfeature,"PARENT");
2384
2385 if (parents!=NULL)
2386 {
2387 for(j=0; j<ajDomNodeListGetLen(parents); j++)
2388 {
2389 e = ajDomNodeListItem(parents,j);
2390 attval = ajDomElementGetAttributeC(e, "id");
2391
2392 if(ajStrGetLen(attval) == 0)
2393 {
2394 ret = AJFALSE;
2395 break;
2396 }
2397
2398 ajDebug(" parent - id = %S\n",attval);
2399 ajFeatGfftagAddCS(feature,"Parent",attval);
2400 ajStrDel(&attval);
2401 }
2402
2403 ajDomDocumentDestroyNodeList(doc,&parents,AJDOMKEEP);
2404 }
2405
2406 parts = ajDomElementGetElementsByTagNameC(dasfeature,"PART");
2407
2408 if (parts!=NULL)
2409 {
2410 for(j=0; j<ajDomNodeListGetLen(parts); j++)
2411 {
2412 e = ajDomNodeListItem(parts,j);
2413 attval = ajDomElementGetAttributeC(e, "id");
2414
2415 if(ajStrGetLen(attval) == 0)
2416 {
2417 ret = AJFALSE;
2418 break;
2419 }
2420
2421 ajDebug(" part - id = %S\n",attval);
2422 ajFeatTagAddCS(feature,"part",attval);
2423 ajStrDel(&attval);
2424 }
2425
2426 ajDomDocumentDestroyNodeList(doc,&parts,AJDOMKEEP);
2427 }
2428
2429 }
2430
2431 ajDomDocumentDestroyNodeList(doc,&features,AJDOMKEEP);
2432
2433 }
2434
2435 ajDomDocumentDestroyNodeList(doc,&segments,AJDOMKEEP);
2436 ajDomDocumentDestroyNode(doc,&doc);
2437
2438 return ret;
2439 }
2440
2441
2442
2443
2444 /* @funcstatic featReadEmbl ***************************************************
2445 **
2446 ** Reads feature data in EMBL format. Also handles GenBank, RefSeq, RefSeqP
2447 **
2448 ** @param [u] feattabin [AjPFeattabin] Feature table input
2449 ** @param [u] ftable [AjPFeattable] Feature table
2450 ** @return [AjBool] ajTrue on success
2451 **
2452 ** @release 1.0.0
2453 ** @@
2454 ******************************************************************************/
2455
featReadEmbl(AjPFeattabin feattabin,AjPFeattable ftable)2456 static AjBool featReadEmbl(AjPFeattabin feattabin, AjPFeattable ftable)
2457 {
2458 AjBool found = ajFalse;
2459 AjPStr savefeat = NULL;
2460 AjPStr saveline = NULL;
2461 AjPStr saveloc = NULL;
2462
2463 AjPStr token = NULL;
2464 AjPStrTok handle = NULL;
2465
2466 AjBool isGenbank = ajFalse;
2467 ajlong fpos = 0;
2468
2469 AjPFilebuff file = feattabin->Input->Filebuff;
2470
2471 if(!featReadLine)
2472 featReadLine = ajStrNewRes(100);
2473
2474 ajFeattableSetNuc(ftable);
2475
2476 while(ajBuffreadLinePosStore(file, &featReadLine, &fpos,
2477 feattabin->Input->Text,
2478 &ftable->TextPtr))
2479 {
2480 if(ajStrPrefixC(featReadLine, "//"))
2481 {
2482 break;
2483 }
2484 else if(ajStrPrefixC(featReadLine, "ID "))
2485 {
2486 isGenbank = ajFalse;
2487 ajStrTokenAssignC(&handle, featReadLine, " ;\t\n\r");
2488 ajStrTokenNextParse(handle, &token); /* 'ID' */
2489 ajStrTokenNextParse(handle, &ftable->Seqid); /* entry name */
2490 ajStrTokenDel(&handle);
2491 ajStrDel(&token);
2492 }
2493 else if(ajStrPrefixC(featReadLine, "LOCUS "))
2494 {
2495 isGenbank = ajTrue;
2496 } /* if it's an EMBL feature do stuff */
2497 else if(ajStrPrefixC(featReadLine, "FEATURES "))
2498 {
2499 isGenbank = ajTrue;
2500 }
2501 else if(ajStrPrefixC(featReadLine, "FT "))
2502 {
2503 ajStrTrimWhiteEnd(&featReadLine); /* remove newline */
2504
2505 if(featEmblFromLine(ftable, featReadLine,
2506 &savefeat, &saveloc, &saveline))
2507 found = ajTrue;
2508 }
2509
2510 /* if it's a GenBank feature do stuff */
2511 else if(isGenbank && ajStrPrefixC(featReadLine, " "))
2512 {
2513 ajStrTrimWhiteEnd(&featReadLine); /* remove newline */
2514
2515 if(featEmblFromLine(ftable, featReadLine,
2516 &savefeat, &saveloc, &saveline))
2517 found = ajTrue;
2518 }
2519 else if(isGenbank)
2520 {
2521 isGenbank = ajFalse;
2522 }
2523
2524 }
2525
2526 if(featEmblFromLine(ftable, NULL, &savefeat, &saveloc, &saveline))
2527 found = ajTrue;
2528
2529 ajStrDel(&saveloc);
2530 ajStrDel(&saveline);
2531 ajStrDel(&savefeat);
2532
2533 return found;
2534 }
2535
2536
2537
2538
2539 /* @funcstatic featReadGenpept ************************************************
2540 **
2541 ** Reads feature data in GENPEPT format
2542 **
2543 ** @param [u] feattabin [AjPFeattabin] Feature table input
2544 ** @param [u] ftable [AjPFeattable] Feature table
2545 ** @return [AjBool] ajTrue on success
2546 **
2547 ** @release 6.2.0
2548 ** @@
2549 ******************************************************************************/
2550
featReadGenpept(AjPFeattabin feattabin,AjPFeattable ftable)2551 static AjBool featReadGenpept(AjPFeattabin feattabin, AjPFeattable ftable)
2552 {
2553 AjBool found = ajFalse;
2554 AjPStr savefeat = NULL;
2555 AjPStr saveline = NULL;
2556 AjPStr saveloc = NULL;
2557
2558 AjPFilebuff file = feattabin->Input->Filebuff;
2559
2560 if(!featReadLine)
2561 featReadLine = ajStrNewRes(100);
2562
2563 ajFeattableSetProt(ftable);
2564
2565 while(ajBuffreadLine(file, &featReadLine))
2566 {
2567 if(ajStrPrefixC(featReadLine, " "))
2568 {
2569 ajStrTrimWhiteEnd(&featReadLine); /* remove newline */
2570
2571 if(featEmblFromLine(ftable, featReadLine,
2572 &savefeat, &saveloc, &saveline))
2573 found = ajTrue;
2574 }
2575 }
2576
2577 if(featEmblFromLine(ftable, NULL, &savefeat, &saveloc, &saveline))
2578 found = ajTrue;
2579
2580 ajStrDel(&saveloc);
2581 ajStrDel(&saveline);
2582 ajStrDel(&savefeat);
2583
2584 return found;
2585 }
2586
2587
2588
2589
2590 /* @funcstatic featReadRefseq *************************************************
2591 **
2592 ** Reads feature data in REFSEQ format
2593 **
2594 ** @param [u] feattabin [AjPFeattabin] Feature table input
2595 ** @param [u] ftable [AjPFeattable] Feature table
2596 ** @return [AjBool] ajTrue on success
2597 **
2598 ** @release 6.2.0
2599 ** @@
2600 ******************************************************************************/
2601
featReadRefseq(AjPFeattabin feattabin,AjPFeattable ftable)2602 static AjBool featReadRefseq(AjPFeattabin feattabin, AjPFeattable ftable)
2603 {
2604 AjBool found = ajFalse;
2605 AjPStr savefeat = NULL;
2606 AjPStr saveline = NULL;
2607 AjPStr saveloc = NULL;
2608
2609 AjPFilebuff file = feattabin->Input->Filebuff;
2610
2611 if(!featReadLine)
2612 featReadLine = ajStrNewRes(100);
2613
2614 ajFeattableSetNuc(ftable);
2615
2616 while(ajBuffreadLine(file, &featReadLine))
2617 {
2618 if(ajStrPrefixC(featReadLine, " "))
2619 {
2620 ajStrTrimWhiteEnd(&featReadLine); /* remove newline */
2621
2622 if(featEmblFromLine(ftable, featReadLine,
2623 &savefeat, &saveloc, &saveline))
2624 found = ajTrue;
2625 }
2626 }
2627
2628 if(featEmblFromLine(ftable, NULL, &savefeat, &saveloc, &saveline))
2629 found = ajTrue;
2630
2631 ajStrDel(&saveloc);
2632 ajStrDel(&saveline);
2633 ajStrDel(&savefeat);
2634
2635 return found;
2636 }
2637
2638
2639
2640
2641 /* @funcstatic featReadRefseqp ************************************************
2642 **
2643 ** Reads feature data in REFSEQP format
2644 **
2645 ** @param [u] feattabin [AjPFeattabin] Feature table input
2646 ** @param [u] ftable [AjPFeattable] Feature table
2647 ** @return [AjBool] ajTrue on success
2648 **
2649 ** @release 6.2.0
2650 ** @@
2651 ******************************************************************************/
2652
featReadRefseqp(AjPFeattabin feattabin,AjPFeattable ftable)2653 static AjBool featReadRefseqp(AjPFeattabin feattabin, AjPFeattable ftable)
2654 {
2655 AjBool found = ajFalse;
2656 AjPStr savefeat = NULL;
2657 AjPStr saveline = NULL;
2658 AjPStr saveloc = NULL;
2659 AjPFilebuff file = feattabin->Input->Filebuff;
2660
2661 if(!featReadLine)
2662 featReadLine = ajStrNewRes(100);
2663
2664 ajFeattableSetProt(ftable);
2665
2666 while(ajBuffreadLine(file, &featReadLine))
2667 {
2668 if(ajStrPrefixC(featReadLine, " "))
2669 {
2670 ajStrTrimWhiteEnd(&featReadLine); /* remove newline */
2671
2672 if(featRefseqpFromLine(ftable, featReadLine,
2673 &savefeat, &saveloc, &saveline))
2674 found = ajTrue;
2675 }
2676 }
2677
2678 if(featRefseqpFromLine(ftable, NULL, &savefeat, &saveloc, &saveline))
2679 found = ajTrue;
2680
2681 ajStrDel(&saveloc);
2682 ajStrDel(&saveline);
2683 ajStrDel(&savefeat);
2684
2685 return found;
2686 }
2687
2688
2689
2690
2691 /* @funcstatic featReadPir ****************************************************
2692 **
2693 ** Reads feature data in PIR format
2694 **
2695 ** @param [u] feattabin [AjPFeattabin] Feature table input
2696 ** @param [u] ftable [AjPFeattable] Feature table
2697 ** @return [AjBool] ajTrue on success
2698 **
2699 ** @release 2.0.0
2700 ** @@
2701 ******************************************************************************/
2702
featReadPir(AjPFeattabin feattabin,AjPFeattable ftable)2703 static AjBool featReadPir(AjPFeattabin feattabin, AjPFeattable ftable)
2704 {
2705 AjBool found = ajFalse;
2706
2707 AjPFilebuff file = feattabin->Input->Filebuff;
2708
2709 /*ajDebug("featReadPir..........\n");*/
2710
2711 if(!featReadLine)
2712 featReadLine = ajStrNewRes(100);
2713
2714 while(ajBuffreadLine(file, &featReadLine))
2715 {
2716 ajStrTrimWhite(&featReadLine);
2717
2718 /* ajDebug("++ line '%S'\n", line); */
2719
2720 if(ajStrPrefixC(featReadLine, "F;"))
2721 {
2722 if(featPirFromLine(ftable, featReadLine))
2723 found = ajTrue;
2724 }
2725 }
2726
2727 return found;
2728 }
2729
2730
2731
2732
2733 /* @funcstatic featPirFromLine ************************************************
2734 **
2735 ** Read input file line in PIR format
2736 **
2737 ** Format is :-
2738 ** F;position/type: note #comment
2739 **
2740 ** @param [u] thys [AjPFeattable] Feature table
2741 ** @param [r] origline [const AjPStr] Input line
2742 ** @return [AjPFeature] New feature.
2743 **
2744 ** @release 2.0.0
2745 ** @@
2746 ******************************************************************************/
2747
featPirFromLine(AjPFeattable thys,const AjPStr origline)2748 static AjPFeature featPirFromLine(AjPFeattable thys,
2749 const AjPStr origline)
2750 {
2751 AjPStr temp = NULL;
2752 static AjPFeature gf = NULL; /* made static so that it's easy
2753 to add second line of description */
2754 /* AjPFeature gfpos = NULL; */
2755 AjPStr locstr = NULL;
2756 AjPStr typstr = NULL;
2757 AjPStr notestr = NULL;
2758 AjPStr comstr = NULL;
2759 AjPStr exonstr = NULL;
2760 AjPStr posstr = NULL;
2761 ajint i = 0;
2762 AjBool mother = ajTrue;
2763 ajuint Start = 0;
2764 ajuint End = 0;
2765 ajuint Flags = 0;
2766 const AjPStr pirtype = NULL;
2767
2768 /*ajDebug("featPirFromLine..........\n'%S'\n", origline);*/
2769
2770 if(!featinTagNote)
2771 ajStrAssignC(&featinTagNote, "note");
2772
2773 if(!featinTagComm)
2774 ajStrAssignC(&featinTagComm, "comment");
2775
2776 if(!featinSourcePir)
2777 featinSourcePir = ajStrNewC("PIR");
2778
2779 if(!ajRegExec(PirRegexAll, origline))
2780 return NULL;
2781
2782 ajRegSubI(PirRegexAll, 1, &locstr);
2783 ajRegSubI(PirRegexAll, 2, &typstr);
2784 ajRegSubI(PirRegexAll, 3, ¬estr);
2785
2786 /* remove spaces in feature type so we can look it up */
2787
2788 ajStrExchangeCC(&typstr, " ", "_");
2789
2790 pirtype = ajFeattypeGetInternalPir(typstr);
2791 ajStrRemoveWhiteExcess(¬estr);
2792
2793 /* decode the position(s) */
2794
2795 while(ajRegExec(PirRegexLoc, locstr)) /* split at ',' */
2796 {
2797 ajRegSubI(PirRegexLoc, 1, &exonstr);
2798 ajRegPost(PirRegexLoc, &temp);
2799 ajStrAssignS(&locstr, temp);
2800 i = 0;
2801
2802 while(ajRegExec(PirRegexPos, exonstr)) /* split at '-' */
2803 {
2804 ajRegSubI(PirRegexPos, 1, &posstr);
2805
2806 if(!i++)
2807 if(!ajStrToUint(posstr, &Start))
2808 Start = 1;
2809
2810 ajRegPost(PirRegexPos, &temp);
2811 ajStrAssignS(&exonstr, temp);
2812 }
2813
2814 if(!ajStrToUint(posstr, &End))
2815 End = 1;
2816
2817 if(mother)
2818 gf = ajFeatNewProtFlags(thys,
2819 featinSourcePir, /* source sequence */
2820 pirtype,
2821 Start, End,
2822 0.0,
2823 Flags);
2824 if(!mother || ajStrGetLen(locstr))
2825 {
2826 /* gfpos = */ ajFeatNewProtFlagsSub(thys,
2827 gf,
2828 featinSourcePir, /* source sequence */
2829 pirtype,
2830 Start, End,
2831 0.0,
2832 Flags);
2833 if(!gf->Start || gf->Start > Start)
2834 gf->Start = Start;
2835 if(!gf->End || gf->End < End)
2836 gf->End = End;
2837 }
2838
2839 /* for the first feature, process the rest of the tags */
2840
2841 if(mother)
2842 {
2843 if(ajStrGetLen(notestr))
2844 ajFeatTagAddSS(gf, featinTagNote, notestr);
2845
2846 ajRegPost(PirRegexAll, &temp);
2847
2848 while(ajRegExec(PirRegexCom, temp))
2849 {
2850 ajRegSubI(PirRegexCom, 1, &comstr);
2851 ajStrRemoveWhiteExcess(&comstr);
2852 ajFeatTagAddSS(gf, featinTagComm, comstr);
2853 ajRegPost(PirRegexCom, &temp);
2854 }
2855 }
2856
2857 mother = ajFalse;
2858 }
2859
2860 if(mother)
2861 ajFeatWarn("featPirFromLine: Did not understand location '%S'", locstr);
2862
2863 ajStrDel(&temp);
2864 ajStrDel(&locstr);
2865 ajStrDel(&typstr);
2866 ajStrDel(¬estr);
2867 ajStrDel(&comstr);
2868 ajStrDel(&exonstr);
2869 ajStrDel(&posstr);
2870
2871 return gf;
2872 }
2873
2874
2875
2876
2877 /* @funcstatic featReadSwiss **************************************************
2878 **
2879 ** Reads feature data in SwissProt format
2880 **
2881 ** @param [u] feattabin [AjPFeattabin] Feature table input
2882 ** @param [u] ftable [AjPFeattable] Feature table
2883 ** @return [AjBool] ajTrue on success
2884 **
2885 ** @release 1.0.0
2886 ** @@
2887 ******************************************************************************/
2888
featReadSwiss(AjPFeattabin feattabin,AjPFeattable ftable)2889 static AjBool featReadSwiss(AjPFeattabin feattabin, AjPFeattable ftable)
2890 {
2891 AjBool found = ajFalse;
2892 AjPStr savefeat = NULL;
2893 AjPStr saveline = NULL;
2894 AjPStr savefrom = NULL;
2895 AjPStr saveto = NULL;
2896
2897 AjPStrTok handle = NULL;
2898 AjPStr token = NULL;
2899 AjPFilebuff file = feattabin->Input->Filebuff;
2900 AjPStr defname = NULL;
2901 AjBool setname = ajFalse;
2902
2903 /*ajDebug("featReadSwiss..........\n");*/
2904
2905 if(!featReadLine)
2906 featReadLine = ajStrNewRes(100);
2907
2908 while(ajBuffreadLine(file, &featReadLine))
2909 {
2910 ajStrTrimWhite(&featReadLine);
2911
2912 /* ajDebug("++ line '%S'\n", line); */
2913
2914 if(ajStrPrefixC(featReadLine, "//"))
2915 {
2916 break;
2917 }
2918 else if(ajStrPrefixC(featReadLine, "ID "))
2919 {
2920 ajStrTokenAssignC(&handle, featReadLine, " \n\r");
2921 ajStrTokenNextParse(handle, &token); /* 'ID' */
2922 ajStrTokenNextParse(handle, &defname); /* entry name */
2923 if(!setname)
2924 {
2925 ajFeattableSetDefname(ftable, defname);
2926 setname = ajTrue;
2927 }
2928 }
2929 else if(ajStrPrefixC(featReadLine, "AC "))
2930 {
2931 ajStrTokenAssignC(&handle, featReadLine, " \n\r,;");
2932 ajStrTokenNextParse(handle, &token); /* 'AC' */
2933 ajStrTokenNextParse(handle, &defname); /* accession */
2934 if(!setname)
2935 {
2936 ajFeattableSetDefname(ftable, defname);
2937 setname = ajTrue;
2938 }
2939 }
2940 else if(ajStrPrefixC(featReadLine, "FT "))
2941 {
2942 if(featSwissFromLine(ftable, featReadLine,
2943 &savefeat, &savefrom, &saveto, &saveline))
2944 found = ajTrue;
2945 }
2946 }
2947
2948 if(featSwissFromLine(ftable, NULL,
2949 &savefeat, &savefrom, &saveto, &saveline))
2950 found = ajTrue;
2951
2952 if(found)
2953 {
2954 ajFeattableSetProt(ftable);
2955 }
2956
2957 /*ajDebug("featReadSwiss returns %B\n", found);*/
2958
2959 ajStrDel(&savefeat);
2960 ajStrDel(&savefrom);
2961 ajStrDel(&saveto);
2962 ajStrDel(&saveline);
2963 ajStrDel(&token);
2964 ajStrDel(&defname);
2965
2966 ajStrTokenDel(&handle);
2967
2968 return found;
2969 }
2970
2971
2972
2973
2974 /* @funcstatic featGff2ProcessTagval ******************************************
2975 **
2976 ** Parses and adds a tag-value set to the specified AjPFeature;
2977 ** looked at 'parse_group' method in GFF::GeneFeature.pm Perl module
2978 ** for inspiration
2979 **
2980 ** @param [u] gf [AjPFeature] Feature
2981 ** @param [u] table [AjPFeattable] Feature table
2982 ** @param [r] groupfield [const AjPStr] Group field identifier
2983 ** @param [r] version [float] GFF version
2984 ** @return [void]
2985 **
2986 ** @release 6.4.0
2987 ** @@
2988 ******************************************************************************/
2989
featGff2ProcessTagval(AjPFeature gf,AjPFeattable table,const AjPStr groupfield,float version)2990 static void featGff2ProcessTagval(AjPFeature gf, AjPFeattable table,
2991 const AjPStr groupfield, float version)
2992 {
2993 AjPStr TvString = NULL;
2994 AjPStr tmptag = NULL;
2995 AjBool grpset = ajFalse;
2996 AjBool escapedquote = ajFalse;
2997
2998 /*ajDebug("featGff2ProcessTagval version %3.1f '%S'\n",
2999 version, groupfield); */
3000
3001 /* Validate arguments */
3002 if(!ajStrGetLen(groupfield)) /* no tags, must be new */
3003 return;
3004
3005 if(E_FPEQ(version,1.0F,U_FEPS))
3006 {
3007 featGroupSet(gf, table, groupfield);
3008 /*ajDebug("V1.0 group: '%S'\n", groupfield);*/
3009 grpset = ajTrue;
3010
3011 return;
3012 }
3013
3014 /*
3015 * Version 2 or greater: parse groupfield for semicolon ';'
3016 * delimited tag-value structures, taking special care about
3017 * double quoted string context. rbsk first version of code was
3018 * adapted from GFF.pm (th/rbsk), itself inherited from AceParse.pm,
3019 * courtesy of James Gilbert
3020 */
3021
3022 ajStrAssignS(&TvString, groupfield);
3023 if(ajStrFindC(TvString, "\\\"") >= 0)
3024 {
3025 escapedquote = ajTrue;
3026 ajStrExchangeCC(&TvString, "\\\"", "\001");
3027 }
3028
3029 while(ajStrGetLen(TvString))
3030 {
3031 if(ajRegExec(GffRegexTvTagval, TvString))
3032 {
3033 ajRegSubI(GffRegexTvTagval, 1, &tmptag);
3034 ajRegSubI(GffRegexTvTagval, 2, &featinValTmp);
3035 ajStrTrimWhite(&featinValTmp);
3036 /*ajDebug("GffTv '%S' '%S'\n", tmptag, featinValTmp);*/
3037 ajRegPost(GffRegexTvTagval, &TvString);
3038
3039 if(ajStrMatchC(tmptag, "Sequence"))
3040 {
3041 featGroupSet(gf, table, featinValTmp);
3042 grpset = ajTrue;
3043 }
3044 else if(ajStrMatchC(tmptag, "ID"))
3045 {
3046 featGroupSet(gf, table, featinValTmp);
3047 grpset = ajTrue;
3048 }
3049 else if(ajStrMatchC(tmptag, "FeatFlags"))
3050 featFlagSet(gf, featinValTmp);
3051 else if(ajStrMatchC(tmptag, "featflags"))
3052 featFlagSet(gf, featinValTmp);
3053 else
3054 {
3055 /*ajDebug("Before QuoteStrip: '%S'\n", featinValTmp);*/
3056 ajStrQuoteStrip(&featinValTmp);
3057 /*ajDebug(" After QuoteStrip: '%S'\n", featinValTmp);*/
3058 ajFeatTagAddSS(gf,tmptag,featinValTmp);
3059 }
3060 }
3061 else
3062 {
3063 if(escapedquote)
3064 ajStrExchangeCC(&featinValTmp, "\001", "\"");
3065 /*ajDebug("Choked on '%S'\n", TvString);*/
3066 ajFeatWarn("GFF tag parsing failed on '%S' in\n'%S'",
3067 TvString, groupfield);
3068 }
3069 }
3070
3071 if(!grpset)
3072 {
3073 featGroupSet(gf, table, NULL);
3074 grpset = ajTrue;
3075 }
3076
3077 ajStrDel(&TvString);
3078 ajStrDel(&tmptag);
3079
3080 return;
3081 }
3082
3083
3084
3085
3086 /* @funcstatic featGff3ProcessTagval ******************************************
3087 **
3088 ** Parses and adds a tag-value set to the specified AjPFeature
3089 **
3090 ** @param [u] gf [AjPFeature] Feature
3091 ** @param [u] table [AjPFeattable] Feature table
3092 ** @param [r] groupfield [const AjPStr] Group field identifier
3093 ** @param [w] parent [AjBool*] True if any Parent tag is found
3094 ** @return [AjPStr] Identifier if any
3095 **
3096 ** @release 6.0.0
3097 ** @@
3098 ******************************************************************************/
3099
featGff3ProcessTagval(AjPFeature gf,AjPFeattable table,const AjPStr groupfield,AjBool * parent)3100 static AjPStr featGff3ProcessTagval(AjPFeature gf, AjPFeattable table,
3101 const AjPStr groupfield,
3102 AjBool *parent)
3103 {
3104 AjPStr TvString = NULL;
3105 AjPStr tmptag = NULL;
3106 AjBool grpset = ajFalse;
3107 char cp;
3108 AjPStr idstr = NULL;
3109
3110 /*ajDebug("featGff3ProcessTagval '%S'\n",
3111 groupfield); */
3112
3113 *parent = ajFalse;
3114
3115 /* Validate arguments */
3116 if(!ajStrGetLen(groupfield)) /* no tags, must be new */
3117 return idstr;
3118
3119 /*
3120 * parse groupfield for semicolon ';'
3121 * delimited tag-value structures, taking special care about
3122 * double quoted string context.
3123 */
3124
3125 ajStrAssignS(&TvString, groupfield);
3126 while(ajStrGetLen(TvString))
3127 {
3128 if(ajRegExec(Gff3RegexTvTagval, TvString))
3129 {
3130 ajRegSubI(Gff3RegexTvTagval, 1, &tmptag);
3131 ajRegSubI(Gff3RegexTvTagval, 2, &featinValTmp);
3132 ajStrFmtPercentDecode(&featinValTmp);
3133 ajStrTrimWhite(&featinValTmp);
3134 /*ajDebug("Gff3Tv '%S' '%S'\n", tmptag, featinValTmp);*/
3135 ajRegPost(Gff3RegexTvTagval, &TvString);
3136
3137 cp = ajStrGetCharFirst(tmptag);
3138 if(isupper((int)cp))
3139 {
3140 if(ajStrMatchC(tmptag, "ID"))
3141 {
3142 ajStrAssignS(&idstr, featinValTmp);
3143 }
3144 else if(ajStrMatchC(tmptag, "Parent"))
3145 {
3146 *parent = ajTrue;
3147 featGff3GroupSet(gf, table, featinValTmp);
3148 grpset = ajTrue;
3149 }
3150 else if(ajStrMatchC(tmptag, "Is_circular"))
3151 {
3152 ajFeattableSetCircular(table);
3153 }
3154
3155 ajFeatGfftagAddSS(gf,tmptag,featinValTmp);
3156 }
3157
3158 else{
3159 if(ajStrMatchC(tmptag, "featflags"))
3160 {
3161 featGff3FlagSet(gf, featinValTmp);
3162 }
3163 else
3164 {
3165 ajFeatTagAddSS(gf,tmptag,featinValTmp);
3166 }
3167 }
3168 }
3169 else
3170 {
3171 /*ajDebug("GFF3 choked on '%S'\n", TvString);*/
3172 ajFeatWarn("GFF3 tag parsing failed on '%S' in\n'%S'",
3173 TvString, groupfield);
3174 break;
3175 }
3176 }
3177
3178 if(!grpset)
3179 {
3180 featGroupSet(gf, table, NULL);
3181 grpset = ajTrue;
3182 }
3183
3184 ajStrDel(&TvString);
3185 ajStrDel(&tmptag);
3186
3187 return idstr;
3188 }
3189
3190
3191
3192
3193 /* @funcstatic featGff3oldProcessTagval ***************************************
3194 **
3195 ** Parses and adds a tag-value set to the specified AjPFeature fror GFF3 before
3196 ** EMBOSS 6.4.0
3197 **
3198 ** @param [u] gf [AjPFeature] Feature
3199 ** @param [u] table [AjPFeattable] Feature table
3200 ** @param [r] groupfield [const AjPStr] Group field identifier
3201 ** @return [void]
3202 **
3203 ** @release 6.4.0
3204 ** @@
3205 ******************************************************************************/
3206
featGff3oldProcessTagval(AjPFeature gf,AjPFeattable table,const AjPStr groupfield)3207 static void featGff3oldProcessTagval(AjPFeature gf, AjPFeattable table,
3208 const AjPStr groupfield)
3209 {
3210 AjPStr TvString = NULL;
3211 AjPStr tmptag = NULL;
3212 AjBool grpset = ajFalse;
3213 AjBool escapedquote = ajFalse;
3214
3215 /*ajDebug("featGff3oldProcessTagval '%S'\n",
3216 groupfield); */
3217
3218 /* Validate arguments */
3219 if(!ajStrGetLen(groupfield)) /* no tags, must be new */
3220 return;
3221
3222 /*
3223 * parse groupfield for semicolon ';'
3224 * delimited tag-value structures, taking special care about
3225 * double quoted string context.
3226 */
3227
3228 ajStrAssignS(&TvString, groupfield);
3229 if(ajStrFindC(TvString, "\\\"") >= 0)
3230 {
3231 escapedquote = ajTrue;
3232 ajStrExchangeCC(&TvString, "\\\"", "\001");
3233 }
3234
3235 while(ajStrGetLen(TvString))
3236 {
3237 if(ajRegExec(Gff3oldRegexTvTagval, TvString))
3238 {
3239 ajRegSubI(Gff3oldRegexTvTagval, 1, &tmptag);
3240 ajRegSubI(Gff3oldRegexTvTagval, 2, &featinValTmp);
3241 ajStrTrimWhite(&featinValTmp);
3242 /*ajDebug("Gff3Tv '%S' '%S'\n", tmptag, featinValTmp);*/
3243 ajRegPost(Gff3oldRegexTvTagval, &TvString);
3244
3245 if(ajStrMatchC(tmptag, "Sequence"))
3246 {
3247 featGroupSet(gf, table, featinValTmp);
3248 grpset = ajTrue;
3249 }
3250 else if(ajStrMatchC(tmptag, "ID"))
3251 {
3252 featGroupSet(gf, table, featinValTmp);
3253 grpset = ajTrue;
3254 }
3255 else if(ajStrMatchC(tmptag, "FeatFlags"))
3256 featFlagSet(gf, featinValTmp);
3257 else if(ajStrMatchC(tmptag, "featflags"))
3258 featFlagSet(gf, featinValTmp);
3259 else
3260 {
3261 if(escapedquote)
3262 ajStrExchangeCC(&featinValTmp, "\001", "\"");
3263 /*ajDebug("GFF3 before QuoteStrip: '%S'\n", featinValTmp);*/
3264 ajStrQuoteStrip(&featinValTmp);
3265 /*ajDebug(" GFF3 after QuoteStrip: '%S'\n", featinValTmp);*/
3266 ajFeatTagAddSS(gf,tmptag,featinValTmp);
3267 }
3268 }
3269 else
3270 {
3271 /*ajDebug("GFF3 choked on '%S'\n", TvString);*/
3272 ajFeatWarn("GFF3 tag parsing failed on '%S' in\n'%S'",
3273 TvString, groupfield);
3274 }
3275 }
3276
3277 if(!grpset)
3278 {
3279 featGroupSet(gf, table, NULL);
3280 grpset = ajTrue;
3281 }
3282
3283 ajStrDel(&TvString);
3284 ajStrDel(&tmptag);
3285
3286 return;
3287 }
3288
3289
3290
3291
3292 /* @funcstatic featSwissFromLine **********************************************
3293 **
3294 ** Read input file in Swiss format
3295 **
3296 ** Format is :-
3297 ** 0-1 FT
3298 ** 5-12 Keyname
3299 ** 14-19 From
3300 ** 21-26 To
3301 ** 34-74 Description
3302 **
3303 ** @param [u] thys [AjPFeattable] Feature table
3304 ** @param [r] origline [const AjPStr] Input line
3305 ** @param [w] savefeat [AjPStr*] Stored feature type
3306 ** @param [w] savefrom [AjPStr*] Continued from position
3307 ** @param [w] saveto [AjPStr*] Continued to position
3308 ** @param [w] saveline [AjPStr*] Continued tag-value pairs
3309 ** @return [AjPFeature] New feature.
3310 **
3311 ** @release 1.0.0
3312 ** @@
3313 ******************************************************************************/
3314
featSwissFromLine(AjPFeattable thys,const AjPStr origline,AjPStr * savefeat,AjPStr * savefrom,AjPStr * saveto,AjPStr * saveline)3315 static AjPFeature featSwissFromLine(AjPFeattable thys,
3316 const AjPStr origline,
3317 AjPStr* savefeat,
3318 AjPStr* savefrom,
3319 AjPStr* saveto,
3320 AjPStr* saveline)
3321 {
3322 AjPStr temp = NULL;
3323 static AjPFeature gf = NULL; /* made static so that it's easy
3324 to add second line of description */
3325 AjBool newft = ajFalse;
3326
3327 /*ajDebug("featSwissFromLine..........\n'%S'\n", origline);*/
3328
3329 if(!featinSourceSwiss)
3330 featinSourceSwiss = ajStrNewC("SWISSPROT");
3331
3332 if(origline)
3333 newft = ajRegExec(SwRegexNew, origline);
3334
3335
3336 if(newft || !origline) /* process the last feature */
3337 {
3338 /* ajDebug("++ feat+from+to '%S' '%S' '%S'\n+ saveline '%S'\n",
3339 *savefeat, *savefrom, *saveto, *saveline); */
3340
3341 if(ajStrGetLen(*savefrom)) /* finish the current feature */
3342 gf = featSwissProcess(thys, *savefeat, *savefrom, *saveto,
3343 featinSourceSwiss, *saveline);
3344 else /* maybe there were no features */
3345 gf = NULL;
3346
3347 ajStrDel(savefeat);
3348 ajStrDel(savefrom);
3349 ajStrDel(saveto);
3350 ajStrDel(saveline);
3351 }
3352
3353 if(!origline) /* we are only cleaning up */
3354 return gf;
3355
3356 if(newft) /* if new feature initialise for it */
3357 {
3358 ajRegSubI(SwRegexNew, 2, savefeat);
3359 ajRegSubI(SwRegexNew, 3, savefrom);
3360 ajRegSubI(SwRegexNew, 4, saveto);
3361 ajRegSubI(SwRegexNew, 5, saveline);
3362 ajStrTrimWhite(savefeat);
3363 /*ajDebug(" newft type '%S' from '%S' to '%S' rest '%S'\n",
3364 *savefeat, *savefrom, *saveto, *saveline);*/
3365 return gf;
3366 }
3367 else /* more tag-values */
3368 {
3369 if(ajRegExec(SwRegexNext, origline))
3370 {
3371 ajRegSubI(SwRegexNext, 1, &temp);
3372 ajStrAppendC(saveline, " ");
3373 ajStrAppendS(saveline, temp);
3374 }
3375 else
3376 ajFeatWarn("%S: Bad SwissProt feature line:\n%S",
3377 thys->Seqid, origline);
3378 }
3379
3380 ajStrDel(&temp);
3381
3382 return gf;
3383 }
3384
3385
3386
3387
3388 /* @funcstatic featSwissProcess ***********************************************
3389 **
3390 ** Processes one feature location and qualifier tags for SwissProt
3391 **
3392 ** @param [u] thys [AjPFeattable] Feature table
3393 ** @param [r] feature [const AjPStr] Feature type key
3394 ** @param [r] fromstr [const AjPStr] Feature start
3395 ** @param [r] tostr [const AjPStr] Feature end
3396 ** @param [r] source [const AjPStr] Feature table source
3397 ** @param [r] tags [const AjPStr] Feature qualifier tags string
3398 ** @return [AjPFeature] Feature as inserted into the feature table
3399 **
3400 ** @release 2.0.0
3401 ** @@
3402 ******************************************************************************/
3403
featSwissProcess(AjPFeattable thys,const AjPStr feature,const AjPStr fromstr,const AjPStr tostr,const AjPStr source,const AjPStr tags)3404 static AjPFeature featSwissProcess(AjPFeattable thys, const AjPStr feature,
3405 const AjPStr fromstr, const AjPStr tostr,
3406 const AjPStr source,
3407 const AjPStr tags)
3408 {
3409 AjPFeature ret;
3410 ajint Start = 0;
3411 ajint End = 0;
3412 ajint flags = 0;
3413
3414 AjPStr note = NULL;
3415 AjPStr comment = NULL;
3416 AjPStr ftid = NULL;
3417 AjPStr tagstr = NULL;
3418
3419 if(!featinTagNote)
3420 ajStrAssignC(&featinTagNote, "note");
3421
3422 if(!featinTagComm)
3423 ajStrAssignC(&featinTagComm, "comment");
3424
3425 if(!featinTagFtid)
3426 ajStrAssignC(&featinTagFtid, "ftid");
3427
3428 switch(ajStrGetCharFirst(fromstr))
3429 {
3430 case '?':
3431 flags |= AJFEATFLAG_START_UNSURE;
3432 ajStrAssignS(&featinTmpStr, fromstr);
3433 ajStrCutStart(&featinTmpStr, 1);
3434
3435 if(!ajStrToInt(featinTmpStr, &Start))
3436 Start = 0;
3437
3438 break;
3439 case '<':
3440 case '>': /* just to be sure */
3441 flags |= AJFEATFLAG_START_BEFORE_SEQ;
3442 ajStrAssignS(&featinTmpStr, fromstr);
3443 ajStrCutStart(&featinTmpStr, 1);
3444
3445 if(!ajStrToInt(featinTmpStr, &Start))
3446 Start = 0;
3447
3448 break;
3449
3450 default:
3451 if(!ajStrToInt(fromstr, &Start))
3452 Start = 0;
3453 }
3454
3455 switch(ajStrGetCharFirst(tostr))
3456 {
3457 case '?':
3458 flags |= AJFEATFLAG_END_UNSURE;
3459 ajStrAssignS(&featinTmpStr, tostr);
3460 ajStrCutStart(&featinTmpStr, 1);
3461
3462 if(!ajStrToInt(featinTmpStr, &End))
3463 End = 0;
3464
3465 break;
3466 case '<': /* just to be sure */
3467 case '>':
3468 flags |= AJFEATFLAG_END_AFTER_SEQ;
3469 ajStrAssignS(&featinTmpStr, tostr);
3470 ajStrCutStart(&featinTmpStr, 1);
3471
3472 if(!ajStrToInt(featinTmpStr, &End))
3473 End = 0;
3474
3475 break;
3476 default:
3477 if(!ajStrToInt(tostr, &End))
3478 End = 0;
3479 }
3480
3481 ajStrDelStatic(&featinTmpStr);
3482
3483 ret = ajFeatNewProtFlags(thys,
3484 source, /* source sequence */
3485 feature,
3486 Start, End,
3487 0.0,
3488 flags);
3489
3490 ajStrAssignS(&tagstr, tags);
3491 ajStrTrimC(&tagstr, " .");
3492
3493 if(ajRegExec(SwRegexFtid, tagstr))
3494 {
3495 ajRegSubI(SwRegexFtid, 1, ¬e);
3496 ajRegSubI(SwRegexFtid, 2, &ftid);
3497 /*ajDebug("Swiss ftid found\n");*/
3498 /*ajDebug("ftid: '%S'\n",ftid);*/
3499 ajStrAssignS(&tagstr, note);
3500 ajStrTrimC(&tagstr, " .");
3501 }
3502
3503 if(ajRegExec(SwRegexComment, tagstr))
3504 {
3505 ajRegSubI(SwRegexComment, 1, ¬e);
3506 ajRegSubI(SwRegexComment, 2, &comment);
3507 /*ajDebug("Swiss comment found\nNote: '%S'\nComment: '%S'\n",
3508 note, comment);*/
3509 ajStrTrimC(¬e, " .");
3510
3511 if(ajStrGetLen(note))
3512 ajFeatTagAddSS(ret, featinTagNote, note);
3513
3514 if(ajStrGetLen(comment))
3515 ajFeatTagAddSS(ret, featinTagComm, comment);
3516 }
3517 else
3518 {
3519 /*ajDebug("Simple swiss note: '%S'\n", tagstr);*/
3520 if(ajStrGetLen(tagstr))
3521 ajFeatTagAddSS(ret, featinTagNote, tagstr);
3522 }
3523
3524 if(ajStrGetLen(ftid))
3525 ajFeatTagAddSS(ret, featinTagFtid, ftid);
3526
3527 ajStrDel(¬e);
3528 ajStrDel(&comment);
3529 ajStrDel(&ftid);
3530 ajStrDel(&tagstr);
3531
3532 return ret;
3533 }
3534
3535
3536
3537
3538 /* @funcstatic featEmblFromLine ***********************************************
3539 **
3540 ** Converts an input EMBL format line into a feature.
3541 ** Starts a new feature by processing any existing feature data.
3542 ** Creates or appends the type, location and tag-value pairs.
3543 ** With a NULL as the input line, simply processes the type, location
3544 ** and tag-values.
3545 **
3546 ** @param [u] thys [AjPFeattable] Feature table
3547 ** @param [r] origline [const AjPStr] Input line (NULL to process last
3548 ** feature at end of input)
3549 ** @param [w] savefeat [AjPStr*] Stored feature type
3550 ** @param [w] saveloc [AjPStr*] Continued location
3551 ** @param [w] saveline [AjPStr*] Continued tag-value pairs
3552 ** @return [AjPFeature] New feature
3553 **
3554 ** @release 1.0.0
3555 ** @@
3556 ******************************************************************************/
3557
featEmblFromLine(AjPFeattable thys,const AjPStr origline,AjPStr * savefeat,AjPStr * saveloc,AjPStr * saveline)3558 static AjPFeature featEmblFromLine(AjPFeattable thys,
3559 const AjPStr origline,
3560 AjPStr* savefeat,
3561 AjPStr* saveloc,
3562 AjPStr* saveline)
3563 {
3564 static AjPFeature gf = NULL; /* so tag-values can be added LATER */
3565 AjPStr temp = NULL;
3566 AjBool newft = ajFalse;
3567 AjBool doft = ajFalse;
3568
3569 if(!featinSourceEmbl)
3570 featinSourceEmbl = ajStrNewC("EMBL");
3571
3572 if(origline)
3573 {
3574 /* As BufferFile can't be edited */
3575 ajStrAssignS(&featProcessLine,origline);
3576 /* chop first 5 characters */
3577 ajStrCutStart(&featProcessLine, 5);
3578
3579 /* look for the feature key */
3580 if(ajStrGetCharFirst(featProcessLine) != ' ')
3581 {
3582 newft = ajTrue;
3583
3584 if(ajStrGetLen(*saveloc))
3585 doft = ajTrue;
3586 }
3587 }
3588 else
3589 {
3590 ajStrAssignClear(&featProcessLine);
3591 newft = ajFalse; /* no new data, just process */
3592
3593 if(ajStrGetLen(*saveloc))
3594 doft = ajTrue;
3595 }
3596
3597 /* ajDebug("+ newft: %B doft: %B\n+ line '%S'\n",
3598 newft, doft, featProcessLine); */
3599
3600 if(doft) /* process the last feature */
3601 {
3602 /* ajDebug("++ saveloc '%S'\n+ saveline '%S'\n",
3603 *saveloc, *saveline); */
3604
3605 gf = featEmblProcess(thys, *savefeat, featinSourceEmbl,
3606 saveloc, saveline);
3607
3608 ajStrDelStatic(saveloc);
3609 ajStrDelStatic(saveline);
3610 }
3611
3612 if(!origline) /* we are only cleaning up */
3613 {
3614 if(doft)
3615 return gf;
3616 else
3617 return NULL;
3618 }
3619
3620 ajStrRemoveWhiteExcess(&featProcessLine);
3621
3622 if(newft) /* if new feature initialise for it */
3623 {
3624 ajStrTokenAssignC(&featEmblSplit, featProcessLine, " ");
3625 ajStrTokenNextParse(featEmblSplit, savefeat);
3626
3627 if(ajStrTokenNextParseC(featEmblSplit, " /", saveloc))
3628 ajStrTokenRestParse(featEmblSplit, saveline);
3629 else
3630 ajStrAssignResC(saveline, 512, ""); /* location only */
3631
3632 ajStrTokenDel(&featEmblSplit);
3633
3634 return gf;
3635 }
3636 else if(!ajStrGetLen(*saveline)) /* no tag-values yet, more location? */
3637 {
3638 if(ajStrGetCharFirst(featProcessLine) != '/')
3639 {
3640 ajStrTokenAssignC(&featEmblSplit, featProcessLine, " ");
3641 ajStrTokenNextParse(featEmblSplit, &temp);
3642
3643 if(ajStrGetLen(temp))
3644 ajStrAppendS(saveloc, temp);
3645
3646 ajStrTokenRestParse(featEmblSplit, &temp);
3647
3648 if(ajStrGetLen(temp))
3649 ajStrAppendS(saveline, temp);
3650
3651 ajStrDel(&temp);
3652 ajStrTokenDel(&featEmblSplit);
3653
3654 return gf;
3655 }
3656 }
3657
3658 /* tag-values continued */
3659 ajStrAppendK(saveline, ' ');
3660 ajStrAppendS(saveline, featProcessLine);
3661
3662 return gf;
3663 }
3664
3665
3666
3667
3668 /* @funcstatic featEmblProcess ************************************************
3669 **
3670 ** Processes one feature location and qualifier tags for EMBL
3671 **
3672 ** @param [u] thys [AjPFeattable] Feature table
3673 ** @param [r] feature [const AjPStr] Feature type key
3674 ** @param [r] source [const AjPStr] Feature table source
3675 ** @param [w] loc [AjPStr*] Feature location
3676 ** @param [w] tags [AjPStr*] Feature qualifier tags string
3677 ** @return [AjPFeature] Feature as inserted into the feature table
3678 **
3679 ** @release 2.0.0
3680 ** @@
3681 ******************************************************************************/
3682
featEmblProcess(AjPFeattable thys,const AjPStr feature,const AjPStr source,AjPStr * loc,AjPStr * tags)3683 static AjPFeature featEmblProcess(AjPFeattable thys, const AjPStr feature,
3684 const AjPStr source,
3685 AjPStr* loc, AjPStr* tags)
3686 {
3687 AjPFeature ret = NULL;
3688 AjPFeature gf = NULL;
3689 /* AjPFeature gfpos = NULL; */
3690 AjPStr tag = NULL;
3691 AjPStr val = NULL;
3692 AjPStr opnam = NULL;
3693 AjPStr opval = NULL;
3694 AjPStr prestr = NULL;
3695 AjBool Fwd = ajTrue;
3696 AjBool LocFwd = ajTrue;
3697 AjPStr begstr = NULL;
3698 AjPStr delstr = NULL;
3699 AjPStr endstr = NULL;
3700 AjPStr locstr = NULL;
3701 AjPStr rest = NULL;
3702 AjPStr tagname = NULL;
3703 AjPStr tagrest = NULL;
3704 AjBool Simple = ajFalse; /* Simple - single position (see also label) */
3705 AjBool BegBound = ajFalse;
3706 AjBool EndBound = ajFalse;
3707 ajuint BegNum = 0;
3708 ajuint EndNum = 0;
3709 ajuint Beg2 = 0;
3710 ajuint End2 = 0;
3711 ajuint tmpbeg = 0;
3712 ajuint tmpend = 0;
3713 ajuint Flags = 0;
3714 ajuint tmpflags = 0;
3715 AjBool Between = ajFalse;
3716 AjBool Join = ajFalse;
3717 AjBool Order = ajFalse;
3718 char Strand = '+';
3719 AjBool Parent = ajTrue;
3720 ajint Frame = 0;
3721 float Score = 0.0;
3722 AjBool HasOper = ajFalse;
3723 AjBool RemoteId = ajFalse;
3724 AjBool IsLabel = ajFalse; /* uses obsolete label */
3725 ajint Exon = 0;
3726 ajlong ipos;
3727 ajint itags = 0;
3728 const AjPStr tmpft = NULL;
3729 const AjPStr tmpfeatid = NULL;
3730 ajuint noparentflags = 0;
3731
3732 noparentflags = ~(AJFEATFLAG_POINT | AJFEATFLAG_BETWEEN_SEQ |
3733 AJFEATFLAG_REMOTEID | AJFEATFLAG_LABEL |
3734 AJFEATFLAG_START_TWO | AJFEATFLAG_END_TWO |
3735 AJFEATFLAG_START_BEFORE_SEQ | AJFEATFLAG_END_AFTER_SEQ);
3736
3737 ajStrRemoveWhite(loc); /* no white space needed */
3738 ajStrRemoveWhiteExcess(tags); /* single spaces only */
3739
3740 /*ajDebug("Clean location '%S'\n", *loc);*/
3741 /*ajDebug("Clean tags '%S'\n", *tags);*/
3742
3743 ajStrAssignS(&opval, *loc);
3744 ipos = ajStrFindAnyK(opval, ','); /* multiple locations */
3745
3746 if(ipos >= 0)
3747 {
3748 /* ajDebug("Multiple locations, test operator(s)\n"); */
3749 while(ajStrGetLen(opval) &&
3750 featEmblOperOut(opval, &opnam, &featinTmpStr))
3751 {
3752 if(!ajStrHasParentheses(featinTmpStr))
3753 break;
3754
3755 /* ajDebug("OperOut %S( '%S' )\n", opnam, featinTmpStr); */
3756 if(ajStrMatchCaseC(opnam, "complement"))
3757 Fwd = !Fwd;
3758
3759 else if(ajStrMatchCaseC(opnam, "one_of"))
3760 Order = ajTrue;
3761
3762 else if(ajStrMatchCaseC(opnam, "join"))
3763 Join = ajTrue;
3764
3765 else if(ajStrMatchCaseC(opnam, "order"))
3766 Order = ajTrue;
3767
3768 else if(ajStrMatchCaseC(opnam, "group"))
3769 Order = ajTrue;
3770
3771 else
3772 ajFeatWarn("%S: unrecognised operator '%S()' in '%S'",
3773 thys->Seqid, opnam, opval);
3774
3775 ajStrAssignS(&opval, featinTmpStr);
3776 }
3777 }
3778
3779 while(ajStrGetLen(opval))
3780 {
3781 LocFwd = Fwd;
3782 BegBound = ajFalse;
3783 EndBound = ajFalse;
3784 Simple = ajFalse;
3785 Between = ajFalse;
3786 BegNum = EndNum = Beg2 = End2 = 0;
3787 HasOper = ajFalse;
3788 RemoteId = ajFalse;
3789 IsLabel = ajFalse;
3790
3791 ajStrDelStatic(&featId);
3792 ajStrDelStatic(&featLabel);
3793
3794 /* check for complement() */
3795 /* set locstr as the whole (or rest) of the location */
3796
3797 if(featEmblOperIn(opval, &opnam, &locstr, &featinTmpStr))
3798 {
3799 /* ajDebug("OperIn %S( '%S' )\n", opnam, locstr); */
3800 if(ajStrMatchCaseC(opnam, "complement"))
3801 LocFwd = !LocFwd;
3802
3803 ajStrAssignS(&opval, featinTmpStr);
3804 /* ajDebug("rest: '%S'\n", opval); */
3805 HasOper = ajTrue;
3806 }
3807 else
3808 {
3809 ajStrAssignS(&locstr, opval);
3810 /* ajDebug("OperIn simple '%S'\n", locstr); */
3811 }
3812
3813 if(featEmblOperNone(locstr,
3814 &featId, &featinTmpStr, &rest)) /* one exon */
3815 {
3816 /* ajDebug("OperNone '%S' \n", featinTmpStr); */
3817 if(ajStrGetLen(featId))
3818 {
3819 /* ajDebug("External entryid '%S'\n", featId); */
3820 RemoteId = ajTrue;
3821 }
3822 if(!featEmblLoc(featinTmpStr, &begstr, &Between, &Simple, &endstr))
3823 {
3824 ajStrAssignS(&begstr, featinTmpStr);
3825 ajStrAssignS(&endstr, begstr);
3826 Simple = ajTrue;
3827 ajDebug("Bad feature numeric location '%S' in '%S' - "
3828 "test later for label",
3829 begstr, locstr);
3830 }
3831
3832 ajStrAssignS(&featinTmpStr, rest);
3833
3834 if(!HasOper)
3835 ajStrAssignS(&opval, featinTmpStr);
3836
3837 if(featEmblLocNum(begstr, &BegBound, &BegNum))
3838 {
3839 Beg2 = 0;
3840 /* ajDebug("Begin '%S' %d Bound: %B\n",
3841 begstr, BegNum, BegBound); */
3842 }
3843 else if(featEmblLocRange(begstr, &BegNum, &Beg2))
3844 {
3845 BegBound = ajFalse;
3846 /* ajDebug("Begin range (%d . %d)\n", BegNum, Beg2); */
3847 }
3848 else
3849 {
3850 /* ajDebug("Begin is a label '%S'\n", begstr); */
3851 IsLabel = ajTrue;
3852 Simple = ajTrue;
3853 ajStrAssignS(&featLabel, begstr);
3854 ajFeatWarn("%S: Simple feature location '%S' in '%S'",
3855 thys->Seqid, begstr, locstr);
3856 }
3857
3858 if(featEmblLocNum(endstr, &EndBound, &EndNum))
3859 {
3860 End2 = 0;
3861 /* ajDebug(" End '%S' %d Bound: %B\n",
3862 endstr, EndNum, EndBound); */
3863 }
3864 else if(featEmblLocRange(endstr, &End2, &EndNum))
3865 {
3866 EndBound = ajFalse;
3867 /* ajDebug(" End range (%d . %d)\n", End2, EndNum); */
3868 }
3869 else
3870 {
3871 IsLabel = ajTrue;
3872 Simple = ajTrue;
3873 ajStrAssignS(&featLabel, endstr);
3874 /* ajDebug(" End is a label '%S'\n", endstr); */
3875 ajErr("%S: Simple feature end '%S' in '%S'",
3876 thys->Seqid, begstr, locstr);
3877 }
3878 }
3879 else
3880 {
3881 ajErr("Unable to parse location:\n'%S'", opval);
3882 }
3883 /* location has been read in, now store it */
3884
3885 Flags = 0;
3886 if(LocFwd)
3887 Strand = '+';
3888 else
3889 Strand = '-';
3890
3891 if(Simple)
3892 Flags |= AJFEATFLAG_POINT;
3893 if(Between)
3894 Flags |= AJFEATFLAG_BETWEEN_SEQ;
3895 if(End2)
3896 Flags |= AJFEATFLAG_END_TWO;
3897 if(Beg2)
3898 Flags |= AJFEATFLAG_START_TWO;
3899 if(BegBound)
3900 Flags |= AJFEATFLAG_START_BEFORE_SEQ;
3901 if(EndBound)
3902 Flags |= AJFEATFLAG_END_AFTER_SEQ;
3903 if(RemoteId)
3904 Flags |= AJFEATFLAG_REMOTEID;
3905 if(IsLabel)
3906 Flags |= AJFEATFLAG_LABEL;
3907 if(IsLabel)
3908 ajFeatWarn("%S: Feature location with label '%S'",
3909 thys->Seqid, locstr);
3910 if(Join || Order)
3911 Flags |= AJFEATFLAG_MULTIPLE;
3912 if(Order)
3913 Flags |= AJFEATFLAG_ORDER;
3914
3915 if(Parent)
3916 {
3917 if(!Fwd)
3918 Flags |= AJFEATFLAG_COMPLEMENT_MAIN;
3919 }
3920
3921 /* ajDebug("Calling featFeatNew, Flags: %x\n", Flags); */
3922 tmpft = ajFeattypeGetInternalNuc(feature);
3923
3924 if(Parent)
3925 {
3926 if(ajStrGetLen(opval) && RemoteId)
3927 {
3928 tmpbeg = 0;
3929 tmpend = 0;
3930 tmpflags = Flags & noparentflags;
3931 tmpfeatid = NULL;
3932 }
3933 else
3934 {
3935 tmpbeg = BegNum;
3936 tmpend = EndNum;
3937 tmpflags = Flags;
3938 tmpfeatid = featId;
3939 }
3940
3941 gf = ajFeatNewNucFlags(thys,
3942 source, /* source sequence */
3943 tmpft,
3944 tmpbeg, tmpend,
3945 Score,
3946 Strand,
3947 Frame,
3948 Exon, Beg2, End2,
3949 tmpfeatid, featLabel, tmpflags);
3950 /* gfpos = gf; */
3951 }
3952
3953 if(!Parent || ajStrGetLen(opval))
3954 {
3955 Exon++;
3956 /*gfpos = */ ajFeatNewNucFlagsSub(thys,
3957 gf,
3958 source, /* source sequence */
3959 tmpft,
3960 BegNum, EndNum,
3961 Score,
3962 Strand,
3963 Frame,
3964 Exon, Beg2, End2,
3965 featId, featLabel, Flags);
3966 if(!RemoteId)
3967 {
3968 if(Fwd)
3969 {
3970 if(!gf->Start || gf->Start > BegNum)
3971 gf->Start = BegNum;
3972 if(!gf->End || gf->End < EndNum)
3973 gf->End = EndNum;
3974 }
3975 else
3976 {
3977 if(!gf->End || gf->End > BegNum)
3978 gf->End = BegNum;
3979 if(!gf->Start || gf->Start < EndNum)
3980 gf->Start = EndNum;
3981 }
3982 }
3983 }
3984
3985 if(Parent)
3986 {
3987 ret = gf;
3988 Parent = ajFalse;
3989 Exon = 1;
3990 }
3991 }
3992
3993 while(ajStrGetLen(*tags))
3994 {
3995 itags++;
3996
3997 if(featEmblTvTagVal(tags, &tag, &val))
3998 {
3999 ajStrQuoteStrip(&val);
4000
4001 if(ajStrMatchC(tag, "codon_start"))
4002 {
4003 ajStrToInt(val, &Frame);
4004 gf->Frame = Frame;
4005 }
4006 if(ajStrMatchC(tag, "note") && ajStrGetCharFirst(val) == '*')
4007 {
4008 ajDebug("special note '%S'\n", val);
4009 ajStrCutStart(&val, 1);
4010 ajStrExtractFirst(val, &tagrest, &tagname);
4011
4012 if(ajStrMatchC(tagname, "Type"))
4013 {
4014 ajStrExtractFirst(tagrest, &tagname, &gf->Type);
4015 }
4016 else
4017 {
4018 ajFeatGfftagAddSS(gf, tagname, tagrest);
4019 }
4020 continue;
4021
4022 }
4023 if(!ajFeatTagAddSS(ret, tag, val))
4024 ajFeatWarn("%S: Bad value '%S' for tag '/%S'",
4025 thys->Seqid, val, tag);
4026
4027 }
4028 else if(featEmblTvRest(tags, &featinTmpStr))
4029 {
4030 /* anything non-whitespace up to '/' is bad */
4031 ajFeatWarn("Bad feature syntax %S: skipping '%S'",
4032 thys->Seqid, featinTmpStr);
4033 }
4034 else
4035 {
4036 ajFeatWarn("Bad feature syntax %S: giving up at '%S'",
4037 thys->Seqid, *tags);
4038 ajStrAssignClear(tags);
4039 }
4040
4041 }
4042
4043 ajDebug("featEmblProcess found %d feature tags\n", itags);
4044
4045 ajStrDelStatic(&featinTmpStr);
4046 ajStrDel(&prestr);
4047 ajStrDel(&val);
4048 ajStrDel(&tag);
4049 ajStrDel(&begstr);
4050 ajStrDel(&delstr);
4051 ajStrDel(&opnam);
4052 ajStrDel(&opval);
4053 ajStrDel(&locstr);
4054 ajStrDel(&endstr);
4055 ajStrDel(&rest);
4056 ajStrDel(&tagname);
4057 ajStrDel(&tagrest);
4058
4059 return ret;
4060 }
4061
4062
4063
4064
4065 /* @funcstatic featEmblOperOut ************************************************
4066 **
4067 ** Process operator and contents up to matching right parenthesis at end
4068 **
4069 ** Return operator and content of parentheses
4070 **
4071 ** Ignores internal operators
4072 ** For example:
4073 **
4074 ** join(complement(12..15),complement(1..3))
4075 ** returns "join" and "complement(12..15),complement(1..3)"
4076 **
4077 ** complement(12..15),complement(1..3)
4078 ** fails because both complement() operators cover only part of the string
4079 ** and will be processed as separate locations.
4080 **
4081 ** @param [r] loc [const AjPStr] Feature location
4082 ** @param [w] opnam [AjPStr*] Operator name
4083 ** @param [w] value [AjPStr*] Operator value
4084 ** @return [AjBool] ajTrue if an operator was found
4085 **
4086 ** @release 3.0.0
4087 ******************************************************************************/
4088
featEmblOperOut(const AjPStr loc,AjPStr * opnam,AjPStr * value)4089 static AjBool featEmblOperOut(const AjPStr loc, AjPStr* opnam, AjPStr* value)
4090 {
4091 ajint left=0;
4092 ajint right=0;
4093 ajuint ipos=0;
4094 ajint bracepos = 0;
4095 const char* cp = ajStrGetPtr(loc);
4096
4097 if(*cp == '(') return ajFalse; /* starts with '(' */
4098 if(!isalpha((ajint)*cp)) return ajFalse; /* starts with location */
4099 if(!islower((ajint)*cp)) return ajFalse; /* starts with location */
4100
4101 while(*cp)
4102 {
4103 ipos ++;
4104
4105 switch(*cp++)
4106 {
4107 case ',':
4108 if(!left) return ajFalse; /* loc,loc,oper(loc),loc */
4109 break;
4110 case '(':
4111 if(!left) bracepos = ipos-1;
4112 left++;
4113 break;
4114 case ')':
4115 right++;
4116 if(right == left)
4117 {
4118 if(ipos != ajStrGetLen(loc)) /* partial operator */
4119 return ajFalse;
4120 }
4121 else if(right > left) return ajFalse;
4122 break;
4123 default:
4124 break;
4125 }
4126 }
4127
4128 if(!left)
4129 return ajFalse; /* no parentheses */
4130
4131 if(right != left)
4132 return ajFalse; /* unmatched '(' */
4133
4134
4135 ajStrAssignSubS(opnam, loc, 0, bracepos-1);
4136 ajStrAssignSubS(value, loc, bracepos+1, ipos-2);
4137
4138 return ajTrue;
4139 }
4140
4141
4142
4143
4144 /* @funcstatic featEmblOperIn *************************************************
4145 **
4146 ** Process operator and contents up to matching right parenthesis at end
4147 **
4148 ** Return operator and content of parentheses
4149 **
4150 ** For operators with single locations, though the location can include ()
4151 ** For example:
4152 **
4153 ** complement(12..15)
4154 **
4155 ** complement((11.12)..(14.15))
4156 **
4157 ** @param [r] loc [const AjPStr] Feature location
4158 ** @param [w] opnam [AjPStr*] Operator name
4159 ** @param [w] value [AjPStr*] Operator value
4160 ** @param [w] rest [AjPStr*] Remainder of location
4161 ** @return [AjBool] ajTrue if an operator was found
4162 **
4163 ** @release 3.0.0
4164 ******************************************************************************/
4165
featEmblOperIn(const AjPStr loc,AjPStr * opnam,AjPStr * value,AjPStr * rest)4166 static AjBool featEmblOperIn(const AjPStr loc, AjPStr* opnam, AjPStr* value,
4167 AjPStr* rest)
4168 {
4169 ajint left=0;
4170 ajint right=0;
4171 ajint depth;
4172 ajuint ipos=0;
4173 ajint bracepos = 0;
4174 const char* cp = ajStrGetPtr(loc);
4175
4176 if(*cp == '(')
4177 return ajFalse; /* starts with '(' */
4178
4179 while(*cp)
4180 {
4181 if(*cp == ',')
4182 break;
4183
4184 ipos ++;
4185
4186 switch(*cp++)
4187 {
4188 case '(':
4189 if(!left)
4190 bracepos = ipos-1;
4191
4192 left++;
4193
4194 if(left>3)
4195 return ajFalse;
4196
4197 depth = left - right;
4198
4199 if(depth > 2)
4200 return ajFalse;
4201
4202 break;
4203 case ')':
4204 right++;
4205
4206 if(right>3)
4207 return ajFalse;
4208
4209 if(right > left)
4210 return ajFalse;
4211
4212 break;
4213 default:
4214 break;
4215 }
4216 }
4217
4218 if(!left)
4219 return ajFalse; /* no parentheses */
4220
4221 if(right != left)
4222 return ajFalse; /* unmatched '(' */
4223
4224
4225 ajStrAssignSubS(opnam, loc, 0, bracepos-1);
4226 ajStrAssignSubS(value, loc, bracepos+1, ipos-2);
4227
4228 if(ipos < ajStrGetLen(loc))
4229 ajStrAssignSubS(rest, loc, ipos+1, -1);
4230 else
4231 ajStrAssignClear(rest);
4232
4233 return ajTrue;
4234 }
4235
4236
4237
4238
4239 /* @funcstatic featEmblOperNone ***********************************************
4240 **
4241 ** Process anything up to ':' as an entry ID (a feature in another entry)
4242 ** Anything up to ',' is this location
4243 **
4244 ** @param [r] loc [const AjPStr] Feature location
4245 ** @param [w] entryid [AjPStr*] Entry ID if found, else empty
4246 ** @param [w] value [AjPStr*] Location with entryID removed
4247 ** @param [w] rest [AjPStr*] Remainder of location
4248 ** @return [AjBool] ajTrue if a match was found
4249 ** ajFalse means an error occurred
4250 **
4251 ** @release 3.0.0
4252 ******************************************************************************/
4253
featEmblOperNone(const AjPStr loc,AjPStr * entryid,AjPStr * value,AjPStr * rest)4254 static AjBool featEmblOperNone(const AjPStr loc, AjPStr* entryid,
4255 AjPStr* value, AjPStr* rest)
4256 {
4257 ajuint ipos=0;
4258 ajuint idpos = 0;
4259 const char* cp = ajStrGetPtr(loc);
4260
4261 while(*cp)
4262 {
4263 if(*cp == ',')
4264 break;
4265
4266 ipos ++;
4267
4268 switch(*cp++)
4269 {
4270 case ':':
4271 idpos = ipos;
4272 break;
4273 default:
4274 break;
4275 }
4276 }
4277
4278 if(idpos)
4279 {
4280 ajStrAssignSubS(entryid, loc, 0, idpos-2);
4281 ajStrAssignSubS(value, loc, idpos, ipos-1);
4282 }
4283 else
4284 {
4285 ajStrAssignClear(entryid);
4286 ajStrAssignSubS(value, loc, 0, ipos-1);
4287 }
4288
4289 if(ipos < ajStrGetLen(loc))
4290 ajStrAssignSubS(rest, loc, ipos+1, -1);
4291 else
4292 ajStrAssignClear(rest);
4293
4294 return ajTrue;
4295 }
4296
4297
4298
4299
4300 /* @funcstatic featEmblLoc ****************************************************
4301 **
4302 ** Process simple feature location in the forms begin..end or begin^end
4303 **
4304 ** @param [r] loc [const AjPStr] Feature location
4305 ** @param [w] begstr [AjPStr*] Start position
4306 ** @param [w] between [AjBool*] ajTrue for '^' as a position between 2 bases
4307 ** @param [w] simple [AjBool*] ajTrue for single base location
4308 ** @param [w] endstr [AjPStr*] End position
4309 ** @return [AjBool] ajTrue if a match was found
4310 ** ajFalse means an error occurred
4311 **
4312 ** @release 3.0.0
4313 ******************************************************************************/
4314
featEmblLoc(const AjPStr loc,AjPStr * begstr,AjBool * between,AjBool * simple,AjPStr * endstr)4315 static AjBool featEmblLoc(const AjPStr loc, AjPStr* begstr, AjBool* between,
4316 AjBool* simple, AjPStr* endstr)
4317 {
4318 ajint ipos=0;
4319 ajint bracket=0;
4320 AjBool numeric=ajTrue;
4321 AjBool dot = ajFalse;
4322 AjBool end = ajFalse;
4323 ajint iend = 0;
4324 ajint ibeg = 0;
4325 const char* cp = ajStrGetPtr(loc);
4326
4327 *between = ajFalse;
4328 *simple = ajFalse;
4329 ajStrAssignClear(begstr);
4330 ajStrAssignClear(endstr);
4331
4332 while(*cp)
4333 {
4334 ipos ++;
4335
4336 switch(*cp)
4337 {
4338 case '.':
4339 if(bracket)
4340 break;
4341
4342 if(dot)
4343 {
4344 if(end)
4345 return ajFalse;
4346
4347 end = ajTrue;
4348 ibeg = ipos - 2;
4349 iend = ipos - 1;
4350 }
4351 else
4352 dot = ajTrue;
4353 break;
4354 case '^':
4355 ibeg = ipos - 1;
4356 iend = ipos - 1;
4357 end = ajTrue;
4358 *between = ajTrue;
4359 break;
4360 case '(':
4361 bracket++;
4362 numeric = ajFalse;
4363 break;
4364 case ')':
4365 if(!bracket)
4366 return ajFalse;
4367 bracket--;
4368 break;
4369 default:
4370 if(!isdigit((ajint) *cp))
4371 numeric = ajFalse;
4372
4373 break;
4374 }
4375
4376 cp++;
4377 }
4378
4379 if(dot && !end)
4380 return ajFalse;
4381
4382 if(end)
4383 {
4384 ajStrAssignSubS(begstr, loc, 0, ibeg-1);
4385 ajStrAssignSubS(endstr, loc, iend+1, ipos-1);
4386 }
4387 else /* simple location e.g. 43 */
4388 {
4389 *simple = ajTrue;
4390 ajStrAssignSubS(begstr, loc, 0, -1);
4391
4392 if(numeric)
4393 ajStrAssignSubS(endstr, loc, 0, -1);
4394 else
4395 ajStrAssignClear(endstr);
4396 }
4397
4398 return ajTrue;
4399 }
4400
4401
4402
4403
4404 /* @funcstatic featEmblLocNum *************************************************
4405 **
4406 ** Process a single base position with a possible less than or greater than
4407 ** character in front to mark a position beyond the bounds of the sequence.
4408 **
4409 ** @param [r] loc [const AjPStr] Feature location
4410 ** @param [w] bound [AjBool*] ajTrue if less than or greater than specified
4411 ** @param [w] num [ajuint*] Base position
4412 ** @return [AjBool] ajTrue if a match was found
4413 ** ajFalse means an error occurred
4414 **
4415 ** @release 3.0.0
4416 ******************************************************************************/
4417
featEmblLocNum(const AjPStr loc,AjBool * bound,ajuint * num)4418 static AjBool featEmblLocNum(const AjPStr loc, AjBool* bound, ajuint* num)
4419 {
4420 const char* cp = ajStrGetPtr(loc);
4421
4422 *bound = ajFalse;
4423 *num = 0;
4424
4425 if((*cp == '<') || (*cp == '>'))
4426 {
4427 *bound = ajTrue;
4428 cp++;
4429 }
4430
4431 while(*cp)
4432 {
4433 if(!isdigit((ajint)*cp))
4434 return ajFalse;
4435
4436 *num = 10* (*num) + (*cp - '0');
4437 cp++;
4438 }
4439
4440 return ajTrue;
4441 }
4442
4443
4444
4445
4446 /* @funcstatic featEmblLocRange ***********************************************
4447 **
4448 ** Process a range position (12.14) meaning any base from 12 to 14 as the
4449 ** start or end of a feature location.
4450 **
4451 ** @param [r] loc [const AjPStr] Feature location
4452 ** @param [w] num1 [ajuint*] First base position
4453 ** @param [w] num2 [ajuint*] Last base position
4454 ** @return [AjBool] ajTrue if a match was found
4455 ** ajFalse means an error occurred
4456 **
4457 ** @release 3.0.0
4458 ******************************************************************************/
4459
featEmblLocRange(const AjPStr loc,ajuint * num1,ajuint * num2)4460 static AjBool featEmblLocRange(const AjPStr loc, ajuint* num1, ajuint* num2)
4461 {
4462 ajuint ipos=0;
4463 AjBool dot = ajFalse;
4464 const char* cp = ajStrGetPtr(loc);
4465
4466 if(*cp != '(')
4467 return ajFalse;
4468
4469 if(ajStrGetCharLast(loc) != ')')
4470 return ajFalse;
4471
4472 *num1 = 0;
4473 *num2 = 0;
4474
4475 while(*cp)
4476 {
4477 ipos ++;
4478
4479 switch(*cp)
4480 {
4481 case '.':
4482 if(dot)
4483 return ajFalse;
4484
4485 dot = ajTrue;
4486 break;
4487 case '(':
4488 if(ipos != 1)
4489 return ajFalse;
4490
4491 break;
4492 case ')':
4493 if(ipos != ajStrGetLen(loc))
4494 return ajFalse;
4495
4496 break;
4497 default:
4498 if(!isdigit((ajint)*cp))
4499 return ajFalse;
4500
4501 if(dot)
4502 *num2 = 10* (*num2) + (*cp - '0');
4503 else
4504 *num1 = 10* (*num1) + (*cp - '0');
4505
4506 break;
4507 }
4508 cp++;
4509 }
4510
4511 if(!dot)
4512 return ajFalse;
4513
4514 return ajTrue;
4515 }
4516
4517
4518
4519
4520 /* @funcstatic featEmblTvTagVal ***********************************************
4521 **
4522 ** Tests for a feature tag value next in the input string
4523 **
4524 ** @param [u] tags [AjPStr*] Feature tag-values string, returned with
4525 ** current tag-value removed
4526 ** @param [w] name [AjPStr*] Feature tag name
4527 ** @param [w] value [AjPStr*] Feature tag value
4528 ** @return [AjBool] ajTrue if a match was found
4529 ** ajFalse means an error occurred
4530 **
4531 ** @release 3.0.0
4532 ******************************************************************************/
4533
featEmblTvTagVal(AjPStr * tags,AjPStr * name,AjPStr * value)4534 static AjBool featEmblTvTagVal(AjPStr* tags, AjPStr* name, AjPStr* value)
4535 {
4536 const char* cp;
4537 const char* cq;
4538 ajint i;
4539 AjPStr testtags = NULL;
4540 AjPStr testname = NULL;
4541 AjPStr testvalue = NULL;
4542 AjPStr tmptag = NULL;
4543 static ajint depth = 0;
4544
4545 tmptag = ajStrNewS(*tags);
4546 cp = ajStrGetPtr(tmptag);
4547 ajStrAssignClear(value);
4548
4549 while((*cp == ' '))
4550 cp++;
4551
4552 if(*cp++ != '/')
4553 {
4554 ajStrDel(&tmptag);
4555 return ajFalse;
4556 }
4557
4558 cq = cp;
4559 i=0;
4560
4561 while(isalpha((ajint)*cp) || (*cp == '_'))
4562 {
4563 i++;
4564 cp++;
4565 }
4566
4567 ajStrAssignLenC(name, cq, i);
4568
4569 while(*cp == ' ')
4570 cp++;
4571
4572 if(!ajStrGetLen(*name))
4573 {
4574 ajStrDel(&tmptag);
4575
4576 return ajFalse;
4577 }
4578
4579 switch(*cp)
4580 {
4581 case '\0': /* /name is end of input */
4582 ajStrAssignClear(tags);
4583 ajStrAssignClear(value);
4584 ajStrDel(&tmptag);
4585
4586 return ajTrue;
4587 case '/': /* /name then next tag, no value */
4588 ajStrAssignC(tags, cp);
4589 ajStrAssignClear(value);
4590 ajStrDel(&tmptag);
4591
4592 return ajTrue;
4593 case '=': /* /name=value */
4594 break;
4595 default: /* anything else is bad */
4596 ajStrDel(&tmptag);
4597
4598 return ajFalse;
4599 }
4600
4601 cp++;
4602
4603 if(*cp == '"') /* /name="... */
4604 {
4605 cq = cp;
4606 cp++;
4607 i=0;
4608
4609 while(*cp)
4610 {
4611 i++;
4612
4613 if(*cp == '"')
4614 {
4615 ajStrAppendLenC(value, cq, i);
4616 i = 0;
4617 cp++;
4618
4619 if(!*cp || (*cp != '"')) /* all done */
4620 {
4621 while(*cp == ' ')
4622 cp++;
4623
4624 if(*cp)
4625 ajStrAssignC(tags, cp);
4626 else
4627 ajStrAssignClear(tags);
4628
4629 ajStrDel(&testtags);
4630 ajStrDel(&testname);
4631 ajStrDel(&testvalue);
4632 ajStrDel(&tmptag);
4633
4634 return ajTrue;
4635 }
4636 else /* "" but is it really internal */
4637 {
4638 if(depth)
4639 {
4640 ajStrDel(&testtags);
4641 ajStrDel(&testname);
4642 ajStrDel(&testvalue);
4643 ajStrDel(&tmptag);
4644
4645 return ajTrue;
4646 }
4647
4648 depth++;
4649 ajStrAssignC(&testtags, cp);
4650
4651 if(featEmblTvTagVal(&testtags, &testname, &testvalue))
4652 { /* looks like an extra closing quote */
4653 depth--;
4654 ajStrDel(&testtags);
4655 ajStrDel(&testname);
4656 ajStrDel(&testvalue);
4657 cp++;
4658
4659 while(*cp == ' ')
4660 cp++;
4661
4662 ajStrAssignC(tags, cp);
4663 ajStrDel(&tmptag);
4664
4665 return ajTrue;
4666 }
4667 else /* really an internal " */
4668 {
4669 depth--;
4670 cq = cp-1;
4671 i = 1;
4672 }
4673 }
4674 }
4675 cp++;
4676 }
4677 ajStrAssignClear(tags);
4678 }
4679 else
4680 {
4681 cq = cp;
4682 i=0;
4683
4684 while(*cp && (*cp != ' ') && (*cp != '/'))
4685 {
4686 cp++;
4687 i++;
4688 }
4689
4690 ajStrAssignLenC(value, cq, i);
4691 ajStrAssignC(tags, cp);
4692 ajStrDel(&tmptag);
4693
4694 return ajTrue;
4695 }
4696
4697 ajStrDel(&testtags);
4698 ajStrDel(&testname);
4699 ajStrDel(&testvalue);
4700 ajStrDel(&tmptag);
4701
4702 return ajFalse;
4703
4704 }
4705
4706
4707
4708
4709 /* @funcstatic featEmblTvRest *************************************************
4710 **
4711 ** Process bad tag-value text up to next qualifier
4712 **
4713 ** @param [u] tags [AjPStr*] Feature tag-values string, returned with
4714 ** skipped text removed
4715 ** @param [w] skip [AjPStr*] Skipped text
4716 ** @return [AjBool] ajTrue if a match was found
4717 ** ajFalse means an error occurred
4718 **
4719 ** @release 3.0.0
4720 ******************************************************************************/
4721
featEmblTvRest(AjPStr * tags,AjPStr * skip)4722 static AjBool featEmblTvRest(AjPStr* tags, AjPStr* skip)
4723 {
4724 AjPStr testtags = NULL;
4725 AjPStr testname = NULL;
4726 AjPStr testvalue = NULL;
4727 AjBool ok = ajFalse;
4728 const char* cp = ajStrGetPtr(*tags);
4729
4730 ajStrAssignClear(skip);
4731
4732 while(*cp)
4733 {
4734 if(*cp == '/')
4735 {
4736 ajStrAssignC(&testtags,cp);
4737 ok = featEmblTvTagVal(&testtags, &testname, &testvalue);
4738 ajStrDel(&testtags);
4739 ajStrDel(&testname);
4740 ajStrDel(&testvalue);
4741
4742 if(ok)
4743 {
4744 ajStrAssignC(tags, cp);
4745
4746 return ajTrue;
4747 }
4748
4749 ajStrAppendK(skip, *cp);
4750 }
4751 else
4752 {
4753 ajStrAppendK(skip, *cp);
4754 }
4755 cp++;
4756 }
4757
4758 ajStrAssignClear(tags);
4759
4760 return ajTrue;
4761 }
4762
4763
4764
4765
4766 /* @funcstatic featGff2FromLine ***********************************************
4767 **
4768 ** Converts an input GFF format line into a feature
4769 **
4770 ** @param [u] thys [AjPFeattable] Feature table
4771 ** @param [r] line [const AjPStr] Input line
4772 ** @param [r] version [float] GFF version (1.0 for old format behaviour)
4773 ** @return [AjPFeature] New feature
4774 **
4775 ** @release 6.4.0
4776 ** @@
4777 ******************************************************************************/
4778
featGff2FromLine(AjPFeattable thys,const AjPStr line,float version)4779 static AjPFeature featGff2FromLine(AjPFeattable thys, const AjPStr line,
4780 float version)
4781 {
4782 AjPFeature gf = NULL;
4783 AjPStr token = NULL;
4784 ajint Start = 0;
4785 ajint End = 0;
4786 float fscore = 0.0;
4787 ajint itemp;
4788
4789 char strand;
4790 ajint frame;
4791
4792 if(!ajStrGetLen(line))
4793 return NULL;
4794
4795 ajStrTokenAssignC(&featGffSplit, line, "\t");
4796
4797 if(!ajStrTokenNextParse(featGffSplit, &featSeqid)) /* seqname */
4798 goto Error;
4799
4800 if(!ajStrTokenNextParse(featGffSplit, &featSource)) /* source */
4801 goto Error;
4802
4803 if(!ajStrTokenNextParse(featGffSplit, &featFeature)) /* feature */
4804 goto Error;
4805
4806 if(!ajStrTokenNextParse(featGffSplit, &token)) /* start */
4807 goto Error;
4808 if(!ajStrToInt(token, &Start))
4809 Start = 0;
4810
4811 if(!ajStrTokenNextParse(featGffSplit, &token)) /* end */
4812 goto Error;
4813
4814 if(!ajStrToInt(token, &End))
4815 End = 0;
4816
4817 if(!ajStrTokenNextParse(featGffSplit, &token)) /* score */
4818 goto Error;
4819
4820 if(!ajStrToFloat(token, &fscore))
4821 fscore = 0.0;
4822
4823 if(!ajStrTokenNextParse(featGffSplit, &token)) /* strand */
4824 goto Error;
4825
4826 if(!ajStrCmpC(token,"+"))
4827 strand = '+';
4828 else if(!ajStrCmpC(token,"-"))
4829 strand = '-';
4830 else
4831 strand = '\0'; /* change to \0 later */
4832
4833 if(!ajStrTokenNextParse(featGffSplit, &token)) /* frame */
4834 goto Error;
4835
4836 if(!ajStrCmpC(token,"0"))
4837 frame = 1;
4838 else if(!ajStrCmpC(token,"1"))
4839 frame = 2;
4840 else if(!ajStrCmpC(token,"2"))
4841 frame = 3;
4842 else
4843 frame = 0;
4844
4845 if(strand == '-')
4846 {
4847 if(Start < End)
4848 {
4849 itemp = Start;
4850 Start = End;
4851 End = itemp;
4852 }
4853 }
4854
4855 /* feature object construction
4856 and group tag */
4857 if(ajStrMatchC(thys->Type, "P"))
4858 gf = ajFeatNewProt(thys,
4859 featSource,
4860 featFeature,
4861 Start, End,
4862 fscore);
4863 else
4864 gf = ajFeatNewNuc(thys,
4865 featSource,
4866 featFeature,
4867 Start, End,
4868 fscore,
4869 strand,
4870 frame,
4871 0,0,0, NULL, NULL);
4872 if(ajStrTokenRestParse(featGffSplit, &featGroup))
4873 featGff2ProcessTagval(gf, thys, featGroup, version);
4874
4875 ajStrDel(&token);
4876
4877 return gf;
4878
4879 Error:
4880
4881 ajStrTokenDel(&featGffSplit);
4882
4883 ajStrDelStatic(&featSeqid);
4884 ajStrDelStatic(&featSource);
4885 ajStrDelStatic(&featFeature);
4886 ajStrDel(&token);
4887
4888 return gf;
4889 }
4890
4891
4892
4893
4894 /* @funcstatic featGff3FromLine ***********************************************
4895 **
4896 ** Converts an input GFF3 format line into a feature
4897 **
4898 ** @param [u] thys [AjPFeattable] Feature table
4899 ** @param [r] line [const AjPStr] Input line
4900 ** @param [u] idtable [AjPTable] Table of identifiers and features
4901 ** @param [u] childlist [AjPList] List of child features
4902 ** @return [AjPFeature] New feature
4903 **
4904 ** @release 6.0.0
4905 ** @@
4906 ******************************************************************************/
4907
featGff3FromLine(AjPFeattable thys,const AjPStr line,AjPTable idtable,AjPList childlist)4908 static AjPFeature featGff3FromLine(AjPFeattable thys, const AjPStr line,
4909 AjPTable idtable, AjPList childlist)
4910 {
4911 AjPFeature gf = NULL;
4912 AjPFeature gftop = NULL;
4913 AjPFeature gfknown = NULL;
4914 AjPStr token = NULL;
4915 ajint Start = 0;
4916 ajint End = 0;
4917 float fscore = 0.0;
4918
4919 char strand;
4920 ajint frame;
4921
4922 AjPStr idstr = NULL;
4923 AjPStr idtag = NULL;
4924 AjBool hasparent = ajFalse;
4925 AjIList tagiter = NULL;
4926 AjPTagval tagval = NULL;
4927
4928 if(!ajStrGetLen(line))
4929 return NULL;
4930
4931 ajStrTokenAssignC(&featGffSplit, line, "\t");
4932
4933 if(!ajStrTokenNextParse(featGffSplit, &featSeqid)) /* seqname */
4934 goto Error;
4935
4936 if(!ajStrTokenNextParse(featGffSplit, &featSource)) /* source */
4937 goto Error;
4938
4939 if(!ajStrTokenNextParse(featGffSplit, &featFeature)) /* feature */
4940 goto Error;
4941
4942 if(!ajStrTokenNextParse(featGffSplit, &token)) /* start */
4943 goto Error;
4944
4945 if(!ajStrToInt(token, &Start))
4946 Start = 0;
4947
4948 if(!ajStrTokenNextParse(featGffSplit, &token)) /* end */
4949 goto Error;
4950
4951 if(!ajStrToInt(token, &End))
4952 End = 0;
4953
4954 if(!ajStrTokenNextParse(featGffSplit, &token)) /* score */
4955 goto Error;
4956
4957 if(!ajStrToFloat(token, &fscore))
4958 fscore = 0.0;
4959
4960 if(!ajStrTokenNextParse(featGffSplit, &token)) /* strand */
4961 goto Error;
4962
4963 if(!ajStrCmpC(token,"+"))
4964 strand = '+';
4965 else if(!ajStrCmpC(token,"-"))
4966 strand = '-';
4967 else
4968 strand = '\0'; /* change to \0 later */
4969
4970 if(!ajStrTokenNextParse(featGffSplit, &token)) /* frame */
4971 goto Error;
4972
4973 if(!ajStrCmpC(token,"0"))
4974 frame = 1;
4975 else if(!ajStrCmpC(token,"1"))
4976 frame = 2;
4977 else if(!ajStrCmpC(token,"2"))
4978 frame = 3;
4979 else
4980 frame = 0;
4981
4982 if(!ajStrGetLen(thys->Seqid))
4983 ajStrAssignS(&thys->Seqid, featSeqid);
4984
4985 /* feature object construction
4986 and group tag */
4987
4988 if(ajStrMatchC(thys->Type, "P"))
4989 gf = ajFeatNewProt(thys,
4990 featSource,
4991 featFeature,
4992 Start, End,
4993 fscore);
4994 else
4995 gf = ajFeatNewNuc(thys,
4996 featSource,
4997 featFeature,
4998 Start, End,
4999 fscore,
5000 strand,
5001 frame,
5002 0,0,0, NULL, NULL);
5003
5004 if(!ajStrMatchS(featSeqid, thys->Seqid))
5005 {
5006 ajStrAssignS(&gf->Remote, featSeqid);
5007 gf->Flags |= AJFEATFLAG_REMOTEID;
5008 }
5009
5010 if(ajStrTokenRestParse(featGffSplit, &featGroup))
5011 {
5012 idstr = featGff3ProcessTagval(gf, thys, featGroup, &hasparent);
5013 if(idstr)
5014 {
5015 gfknown = ajTableFetchmodS(idtable, idstr);
5016 if(gfknown)
5017 {
5018 if(ajListGetLength(gfknown->Subfeatures))
5019 {
5020 gftop = gfknown;
5021 ajDebug("known gff3 ID add '%S' %Lu '%S' %u..%u "
5022 "match '%S' %u..%u\n",
5023 idstr, ajListGetLength(gftop->Subfeatures),
5024 gf->Type, gf->Start, gf->End,
5025 gfknown->Type, gfknown->Start, gfknown->End);
5026 ajStrDel(&idstr);
5027 }
5028 else
5029 {
5030 ajDebug("known gff3 ID new top '%S' '%S' %u..%u %p "
5031 "match '%S' %u..%u %p\n",
5032 idstr, gf->Type, gf->Start, gf->End, gf,
5033 gfknown->Type, gfknown->Start, gfknown->End,
5034 gfknown);
5035
5036 gftop = gfknown;
5037 gfknown = ajFeatNewFeat(gftop);
5038 gftop->Flags |= AJFEATFLAG_GENERATED;
5039 gftop->Flags |= AJFEATFLAG_MULTIPLE;
5040 if(!gftop->Subfeatures)
5041 gftop->Subfeatures = ajListNew();
5042 idtag = ajStrNewS(idstr);
5043 ajStrAppendC(&idtag, ".emboss");
5044 ajFeatGfftagAddCS(gftop, "ID", idtag);
5045 ajStrDel(&idtag);
5046 ajListPushAppend(gftop->Subfeatures, gfknown);
5047 ajTablePut(idtable, idstr, gftop);
5048 }
5049
5050 if(gf->Start < gftop->Start)
5051 gftop->Start = gf->Start;
5052 if(gf->End > gftop->End)
5053 gftop->End = gf->End;
5054 ajListPopLast(thys->Features, (void**) &gf);
5055 ajListPushAppend(gftop->Subfeatures, gf);
5056
5057 tagiter = ajListIterNewread(gf->GffTags);
5058
5059 while(!ajListIterDone(tagiter))
5060 {
5061 tagval = ajListIterGet(tagiter);
5062
5063 if(ajStrMatchC(MAJTAGVALGETTAG(tagval), "ID"))
5064 continue;
5065 if(ajStrMatchC(MAJTAGVALGETTAG(tagval), "Parent"))
5066 continue;
5067 ajFeatGfftagAddSS(gftop, MAJTAGVALGETTAG(tagval),
5068 MAJTAGVALGETVALUE(tagval));
5069 }
5070
5071 ajListIterDel(&tagiter);
5072 }
5073 else
5074 {
5075 ajTablePut(idtable, idstr, gf);
5076 if(hasparent)
5077 {
5078 ajListPopLast(thys->Features, (void**) &gf);
5079 ajListPushAppend(childlist, gf);
5080 }
5081 }
5082 }
5083 else if(hasparent)
5084 {
5085 ajListPopLast(thys->Features, (void**) &gf);
5086 ajListPushAppend(childlist, gf);
5087 }
5088 }
5089
5090 ajStrDel(&token);
5091
5092 return gf;
5093
5094 Error:
5095
5096 ajStrTokenDel(&featGffSplit);
5097
5098 ajStrDelStatic(&featSeqid);
5099 ajStrDelStatic(&featSource);
5100 ajStrDelStatic(&featFeature);
5101 ajStrDel(&token);
5102
5103 return gf;
5104 }
5105
5106
5107
5108
5109 /* @funcstatic featGff3oldFromLine ********************************************
5110 **
5111 ** Converts an input GFF3 format line into a feature for EMBOSS before 6.4.0
5112 **
5113 ** @param [u] thys [AjPFeattable] Feature table
5114 ** @param [r] line [const AjPStr] Input line
5115 ** @return [AjPFeature] New feature
5116 **
5117 ** @release 6.4.0
5118 ** @@
5119 ******************************************************************************/
5120
featGff3oldFromLine(AjPFeattable thys,const AjPStr line)5121 static AjPFeature featGff3oldFromLine(AjPFeattable thys, const AjPStr line)
5122 {
5123 AjPFeature gf = NULL;
5124 AjPStr token = NULL;
5125 ajint Start = 0;
5126 ajint End = 0;
5127 float fscore = 0.0;
5128
5129 char strand;
5130 ajint frame;
5131
5132 if(!ajStrGetLen(line))
5133 return NULL;
5134
5135 ajStrTokenAssignC(&featGffSplit, line, "\t");
5136
5137 if(!ajStrTokenNextParse(featGffSplit, &featSeqid)) /* seqname */
5138 goto Error;
5139
5140 if(!ajStrTokenNextParse(featGffSplit, &featSource)) /* source */
5141 goto Error;
5142
5143 if(!ajStrTokenNextParse(featGffSplit, &featFeature)) /* feature */
5144 goto Error;
5145
5146 if(!ajStrTokenNextParse(featGffSplit, &token)) /* start */
5147 goto Error;
5148
5149 if(!ajStrToInt(token, &Start))
5150 Start = 0;
5151
5152 if(!ajStrTokenNextParse(featGffSplit, &token)) /* end */
5153 goto Error;
5154
5155 if(!ajStrToInt(token, &End))
5156 End = 0;
5157
5158 if(!ajStrTokenNextParse(featGffSplit, &token)) /* score */
5159 goto Error;
5160
5161 if(!ajStrToFloat(token, &fscore))
5162 fscore = 0.0;
5163
5164 if(!ajStrTokenNextParse(featGffSplit, &token)) /* strand */
5165 goto Error;
5166
5167 if(!ajStrCmpC(token,"+"))
5168 strand = '+';
5169 else if(!ajStrCmpC(token,"-"))
5170 strand = '-';
5171 else
5172 strand = '\0'; /* change to \0 later */
5173
5174 if(!ajStrTokenNextParse(featGffSplit, &token)) /* frame */
5175 goto Error;
5176
5177 if(!ajStrCmpC(token,"0"))
5178 frame = 1;
5179 else if(!ajStrCmpC(token,"1"))
5180 frame = 2;
5181 else if(!ajStrCmpC(token,"2"))
5182 frame = 3;
5183 else
5184 frame = 0;
5185
5186 /* feature object construction
5187 and group tag */
5188
5189 if(ajStrMatchC(thys->Type, "P"))
5190 gf = ajFeatNewProt(thys,
5191 featSource,
5192 featFeature,
5193 Start, End,
5194 fscore);
5195 else
5196 gf = ajFeatNewNuc(thys,
5197 featSource,
5198 featFeature,
5199 Start, End,
5200 fscore,
5201 strand,
5202 frame,
5203 0,0,0, NULL, NULL);
5204
5205 if(ajStrTokenRestParse(featGffSplit, &featGroup))
5206 featGff3oldProcessTagval(gf, thys, featGroup);
5207
5208 ajStrDel(&token);
5209
5210 return gf;
5211
5212 Error:
5213
5214 ajStrTokenDel(&featGffSplit);
5215
5216 ajStrDelStatic(&featSeqid);
5217 ajStrDelStatic(&featSource);
5218 ajStrDelStatic(&featFeature);
5219 ajStrDel(&token);
5220
5221 return gf;
5222 }
5223
5224
5225
5226
5227 /* @funcstatic featReadGff2 ***************************************************
5228 **
5229 ** Read input file in GFF 2 format
5230 **
5231 ** @param [u] feattabin [AjPFeattabin] Feature table input
5232 ** @param [u] ftable [AjPFeattable] Feature table
5233 ** @return [AjBool] ajTrue on success
5234 **
5235 ** @release 6.4.0
5236 ** @@
5237 ******************************************************************************/
5238
featReadGff2(AjPFeattabin feattabin,AjPFeattable ftable)5239 static AjBool featReadGff2(AjPFeattabin feattabin, AjPFeattable ftable)
5240 {
5241 AjPStr line = NULL;
5242 AjPStr verstr = NULL;
5243 AjPStr start = NULL;
5244 AjPStr end = NULL;
5245 AjPStr type = NULL;
5246
5247 AjBool found = ajFalse;
5248 float version = 2.0;
5249
5250 AjPFilebuff file = feattabin->Input->Filebuff;
5251
5252 /* ajDebug("featReadGff..........\n"); */
5253
5254 while(ajBuffreadLine(file, &line))
5255 {
5256 ajStrTrimWhite(&line);
5257
5258 /* Header information */
5259
5260 if(ajRegExec(GffRegexblankline, line))
5261 version = 2.0;
5262 else if(ajRegExec(GffRegexversion,line))
5263 {
5264 verstr = ajStrNew();
5265 ajRegSubI(GffRegexversion, 1, &verstr);
5266 ajStrToFloat(verstr, &version);
5267 ajStrDel(&verstr);
5268 }
5269 /*
5270 else if(ajRegExec(GffRegexdate,line))
5271 {
5272 AjPStr year = NULL;
5273 AjPStr month = NULL;
5274 AjPStr day = NULL;
5275 ajint nYear, nMonth, nDay;
5276 ajRegSubI(GffRegexdate, 1, &year);
5277 ajRegSubI(GffRegexdate, 2, &month);
5278 ajRegSubI(GffRegexdate, 3, &day);
5279 ajStrToInt(year, &nYear);
5280 ajStrToInt(month, &nMonth);
5281 ajStrToInt(day, &nDay);
5282 ajStrDel(&year);
5283 ajStrDel(&month);
5284 ajStrDel(&day);
5285 }
5286 */
5287 else if(ajRegExec(GffRegexregion,line))
5288 {
5289 start = ajStrNew();
5290 end = ajStrNew();
5291 ajRegSubI(GffRegexregion, 1, &ftable->Seqid);
5292 ajRegSubI(GffRegexregion, 2, &start);
5293 ajRegSubI(GffRegexregion, 3, &end);
5294 ajStrToUint(start, &(ftable->Start));
5295 ajStrToUint(end, &(ftable->End));
5296 ajStrDel(&start);
5297 ajStrDel(&end);
5298 }
5299 else if(ajRegExec(GffRegextype,line))
5300 {
5301 ajRegSubI(GffRegextype, 1, &type);
5302
5303 if(ajStrMatchCaseC(type, "Protein"))
5304 ajFeattableSetProt(ftable);
5305 else
5306 ajFeattableSetNuc(ftable);
5307
5308 ajStrDel(&type);
5309 ajRegSubI(GffRegextype, 3, &ftable->Seqid);
5310 }
5311 else if(ajRegExec(GffRegexcomment,line))
5312 version = 2.0; /* ignore for now... could store them in
5313 ajFeattable for future reference though?...*/
5314 /* the real feature stuff */
5315 else /* must be a real feature at last !! */
5316 if(featGff2FromLine(ftable, line, version)) /* for ajFeattableAdd */
5317 found = ajTrue;
5318
5319 }
5320 ajStrDel(&line);
5321
5322 return found;
5323 }
5324
5325
5326
5327
5328 /* @funcstatic featReadGff3 ***************************************************
5329 **
5330 ** Read input file in GFF3 format
5331 **
5332 ** @param [u] feattabin [AjPFeattabin] Feature table input
5333 ** @param [u] ftable [AjPFeattable] Feature table
5334 ** @return [AjBool] ajTrue on success
5335 **
5336 ** @release 6.0.0
5337 ** @@
5338 ******************************************************************************/
5339
featReadGff3(AjPFeattabin feattabin,AjPFeattable ftable)5340 static AjBool featReadGff3(AjPFeattabin feattabin, AjPFeattable ftable)
5341 {
5342 AjPStr line = NULL;
5343 AjPStr verstr = NULL;
5344 AjPStr start = NULL;
5345 AjPStr end = NULL;
5346 ajlong fpos = 0;
5347
5348 AjBool found = ajFalse;
5349 AjBool oldemboss = ajFalse;
5350 float version = 3.0;
5351 char cp;
5352 AjPFilebuff file = feattabin->Input->Filebuff;
5353 AjBool haveversion = ajFalse;
5354
5355 AjPTable idtable = NULL;
5356 AjPList childlist = NULL;
5357
5358 idtable = ajTablestrNew(1000);
5359 childlist = ajListNew();
5360
5361 /* ajDebug("featReadGff3..........\n"); */
5362
5363 while(ajBuffreadLinePosStore(file, &line, &fpos,
5364 feattabin->Input->Text,
5365 &ftable->TextPtr))
5366 {
5367 ajStrTrimWhite(&line);
5368
5369 /* Header information */
5370
5371 if(ajRegExec(Gff3Regexdirective,line))
5372 {
5373 if(ajRegExec(Gff3Regexversion,line))
5374 {
5375 if(haveversion) /* starting another feature table */
5376 {
5377 ajFilebuffClearStore(file, 1, line,
5378 feattabin->Input->Text,
5379 &ftable->TextPtr);
5380
5381 featGff3Matchtable(ftable, &idtable, &childlist);
5382
5383 ajStrDel(&line);
5384
5385 return found;
5386 }
5387
5388 ajRegSubI(Gff3Regexversion, 1, &verstr);
5389 ajStrToFloat(verstr, &version);
5390 ajStrDel(&verstr);
5391
5392 haveversion = ajTrue;
5393 if(version < 3.0)
5394 {
5395 ajStrDel(&line);
5396 ajTableDel(&idtable);
5397 ajListFree(&childlist);
5398
5399 return ajFalse;
5400 }
5401 }
5402 else if(ajRegExec(Gff3Regexregion,line))
5403 {
5404 start = ajStrNew();
5405 end = ajStrNew();
5406 ajRegSubI(Gff3Regexregion, 1, &ftable->Seqid);
5407 ajRegSubI(Gff3Regexregion, 2, &start);
5408 ajRegSubI(Gff3Regexregion, 3, &end);
5409 ajStrToUint(start, &(ftable->Start));
5410 ajStrToUint(end, &(ftable->End));
5411 ajStrDel(&start);
5412 ajStrDel(&end);
5413 }
5414 }
5415 else if(ajRegExec(Gff3Regexcomment,line))
5416 {
5417 if(ajStrPrefixC(line, "#!Source-version EMBOSS 6."))
5418 {
5419 cp = ajStrGetCharPos(line, 26);
5420 if(cp >= '0'&& cp <= '3')
5421 oldemboss = ajTrue;
5422 }
5423 if(ajStrPrefixC(line, "#!Type Protein"))
5424 {
5425 ajFeattableSetProt(ftable);
5426 }
5427 }
5428 /* the real feature stuff */
5429 else /* must be a real feature at last !! */
5430 {
5431 if(oldemboss)
5432 {
5433 if(featGff3oldFromLine(ftable, line)) /* does ajFeattableAdd */
5434 found = ajTrue;
5435 }
5436 else
5437 {
5438 if(featGff3FromLine(ftable, line, idtable, childlist))
5439 found = ajTrue;
5440 }
5441 }
5442 }
5443
5444 featGff3Matchtable(ftable, &idtable, &childlist);
5445
5446 ajStrDel(&line);
5447
5448 return found;
5449 }
5450
5451
5452
5453
5454 /* @funcstatic featGff3Matchtable *********************************************
5455 **
5456 ** Match GFF3 features with parent values to their parent features
5457 **
5458 ** @param [u] ftable [AjPFeattable] Feature table
5459 ** @param [d] idtable [AjPTable*] Pointer to table of features by identifier
5460 ** @param [d] childlist [AjPList*] Pointer to list of child features
5461 ** @return [void]
5462 **
5463 ** @release 6.4.0
5464 ** @@
5465 ******************************************************************************/
5466
featGff3Matchtable(AjPFeattable ftable,AjPTable * idtable,AjPList * childlist)5467 static void featGff3Matchtable(AjPFeattable ftable,
5468 AjPTable *idtable, AjPList *childlist)
5469 {
5470 AjPFeature gf = NULL;
5471 AjPFeature gfid = NULL;
5472 const AjPStr idparent = NULL;
5473
5474 ajDebug("featGff3Matchtable feats %Lu idtable: %Lu childlist: %Lu\n",
5475 ajListGetLength(ftable->Features),
5476 ajTableGetLength(*idtable), ajListGetLength(*childlist));
5477
5478 while(ajListGetLength(*childlist))
5479 {
5480 ajListPop(*childlist, (void**) &gf);
5481 ajListDrop(ftable->Features, gf);
5482 idparent = ajFeatGetParent(gf);
5483 if(idparent)
5484 {
5485 ajDebug(" parent '%S' '%S' %u..%u\n",
5486 idparent, gf->Type, gf->Start, gf->End);
5487 gfid = ajTableFetchmodV(*idtable, idparent);
5488 if(gfid)
5489 {
5490 ajDebug(" parent '%S' matching\n",
5491 idparent);
5492 if(ajStrMatchS(gf->Type, gfid->Type))
5493 gfid->Flags |= AJFEATFLAG_MULTIPLE;
5494 ajListPushAppend(gfid->Subfeatures, gf);
5495 }
5496 else
5497 {
5498 ajDebug(" ++ parent '%S' no matching id\n",
5499 idparent);
5500 ajFeatWarn("featGff3Matchtable GFF3 Parent '%S' no matching id",
5501 idparent);
5502 ajListPushAppend(ftable->Features, gf);
5503 }
5504 }
5505 else
5506 {
5507 ajDebug(" ++ child with no parent %S %u..%u\n",
5508 gf->Type, gf->Start, gf->End);
5509 ajFeatWarn("Child feature with no parent %S %u..%u",
5510 gf->Type, gf->Start, gf->End);
5511 ajListPushAppend(ftable->Features, gf);
5512 }
5513 }
5514
5515 ajDebug("featGff3Matchtable done feats %Lu\n",
5516 ajListGetLength(ftable->Features));
5517
5518 ajTableDel(idtable);
5519 ajListFree(childlist);
5520
5521 return;
5522 }
5523
5524
5525
5526
5527 /* @funcstatic featReadGff3old ************************************************
5528 **
5529 ** Read input file in GFF3 format from EMBOSS releases before 6.4.0
5530 **
5531 ** @param [u] feattabin [AjPFeattabin] Feature table input
5532 ** @param [u] ftable [AjPFeattable] Feature table
5533 ** @return [AjBool] ajTrue on success
5534 **
5535 ** @release 6.4.0
5536 ** @@
5537 ******************************************************************************/
5538
featReadGff3old(AjPFeattabin feattabin,AjPFeattable ftable)5539 static AjBool featReadGff3old(AjPFeattabin feattabin, AjPFeattable ftable)
5540 {
5541 AjPStr line = NULL;
5542 AjPStr verstr = NULL;
5543 AjPStr start = NULL;
5544 AjPStr end = NULL;
5545
5546 AjBool found = ajFalse;
5547 float version = 3.0;
5548
5549 AjPFilebuff file = feattabin->Input->Filebuff;
5550
5551 /* ajDebug("featReadGff3..........\n"); */
5552
5553 while(ajBuffreadLine(file, &line))
5554 {
5555 ajStrTrimWhite(&line);
5556
5557 /* Header information */
5558
5559 if(ajRegExec(Gff3Regexblankline, line))
5560 version = 3.0;
5561 else if(ajRegExec(Gff3Regexversion,line))
5562 {
5563 verstr = ajStrNew();
5564 ajRegSubI(Gff3Regexversion, 1, &verstr);
5565 ajStrToFloat(verstr, &version);
5566 ajStrDel(&verstr);
5567
5568 if(version < 3.0)
5569 {
5570 ajStrDel(&line);
5571
5572 return ajFalse;
5573 }
5574 }
5575 else if(ajRegExec(Gff3Regexregion,line))
5576 {
5577 start = ajStrNew();
5578 end = ajStrNew();
5579 ajRegSubI(Gff3Regexregion, 1, &ftable->Seqid);
5580 ajRegSubI(Gff3Regexregion, 2, &start);
5581 ajRegSubI(Gff3Regexregion, 3, &end);
5582 ajStrToUint(start, &(ftable->Start));
5583 ajStrToUint(end, &(ftable->End));
5584 ajStrDel(&start);
5585 ajStrDel(&end);
5586 }
5587 /* the real feature stuff */
5588 else /* must be a real feature at last !! */
5589 if(featGff3oldFromLine(ftable, line)) /* does ajFeattableAdd */
5590 found = ajTrue;
5591
5592 }
5593 ajStrDel(&line);
5594
5595 return found;
5596 }
5597
5598
5599
5600
5601 /* @funcstatic featRefseqpFromLine ********************************************
5602 **
5603 ** Converts an input RefSeq protein format line into a feature.
5604 ** Starts a new feature by processing any existing feature data.
5605 ** Creates or appends the type, location and tag-value pairs.
5606 ** With a NULL as the input line, simply processes the type, location
5607 ** and tag-values.
5608 **
5609 ** @param [u] thys [AjPFeattable] Feature table
5610 ** @param [r] origline [const AjPStr] Input line (NULL to process last
5611 ** feature at end of input)
5612 ** @param [w] savefeat [AjPStr*] Stored feature type
5613 ** @param [w] saveloc [AjPStr*] Continued location
5614 ** @param [w] saveline [AjPStr*] Continued tag-value pairs
5615 ** @return [AjPFeature] New feature
5616 **
5617 ** @release 6.2.0
5618 ** @@
5619 ******************************************************************************/
5620
featRefseqpFromLine(AjPFeattable thys,const AjPStr origline,AjPStr * savefeat,AjPStr * saveloc,AjPStr * saveline)5621 static AjPFeature featRefseqpFromLine(AjPFeattable thys,
5622 const AjPStr origline,
5623 AjPStr* savefeat,
5624 AjPStr* saveloc,
5625 AjPStr* saveline)
5626 {
5627 static AjPFeature gf = NULL; /* so tag-values can be added LATER */
5628 AjPStr temp = NULL;
5629 AjBool newft = ajFalse;
5630 AjBool doft = ajFalse;
5631
5632 if(!featinSourceRefseqp)
5633 featinSourceRefseqp = ajStrNewC("REFSEQP");
5634
5635 /* ajDebug("featRefseqpFromLine '%S'\n", origline); */
5636
5637 if(origline)
5638 {
5639 /* As BufferFile can't be edited */
5640 ajStrAssignS(&featProcessLine,origline);
5641 /* chop first 5 characters */
5642 ajStrCutStart(&featProcessLine, 5);
5643
5644 /* look for the feature key */
5645 if(ajStrGetCharFirst(featProcessLine) != ' ')
5646 {
5647 newft = ajTrue;
5648
5649 if(ajStrGetLen(*saveloc))
5650 doft = ajTrue;
5651 }
5652 }
5653 else
5654 {
5655 ajStrAssignClear(&featProcessLine);
5656 newft = ajFalse; /* no new data, just process */
5657
5658 if(ajStrGetLen(*saveloc))
5659 doft = ajTrue;
5660 }
5661
5662 /*
5663 ajDebug("+ newft: %B doft: %B\n+ line '%S'\n",
5664 newft, doft, line);
5665 */
5666
5667 if(doft) /* process the last feature */
5668 {
5669 /*
5670 ** ajDebug("++ saveloc '%S'\n+ saveline '%S'\n",
5671 ** *saveloc, *saveline);
5672 */
5673
5674 gf = featRefseqpProcess(thys, *savefeat, featinSourceRefseqp,
5675 saveloc, saveline);
5676
5677 ajStrDelStatic(saveloc);
5678 ajStrDelStatic(saveline);
5679 }
5680
5681 if(!origline) /* we are only cleaning up */
5682 return gf;
5683
5684 ajStrRemoveWhiteExcess(&featProcessLine);
5685
5686 if(newft) /* if new feature initialise for it */
5687 {
5688 ajStrTokenAssignC(&featEmblSplit, featProcessLine, " ");
5689 ajStrTokenNextParse(featEmblSplit, savefeat);
5690
5691 if(ajStrTokenNextParseC(featEmblSplit, " /", saveloc))
5692 ajStrTokenRestParse(featEmblSplit, saveline);
5693 else
5694 ajStrAssignResC(saveline, 512, ""); /* location only */
5695
5696 ajStrTokenDel(&featEmblSplit);
5697
5698 return gf;
5699 }
5700 else if(!ajStrGetLen(*saveline)) /* no tag-values yet, more location? */
5701 {
5702 if(ajStrGetCharFirst(featProcessLine) != '/')
5703 {
5704 ajStrTokenAssignC(&featEmblSplit, featProcessLine, " ");
5705 ajStrTokenNextParse(featEmblSplit, &temp);
5706
5707 if(ajStrGetLen(temp))
5708 ajStrAppendS(saveloc, temp);
5709
5710 ajStrTokenRestParse(featEmblSplit, &temp);
5711
5712 if(ajStrGetLen(temp))
5713 ajStrAppendS(saveline, temp);
5714
5715 ajStrDel(&temp);
5716 ajStrTokenDel(&featEmblSplit);
5717
5718 return gf;
5719 }
5720 }
5721
5722 /* tag-values continued */
5723 ajStrAppendK(saveline, ' ');
5724 ajStrAppendS(saveline, featProcessLine);
5725
5726 return gf;
5727 }
5728
5729
5730
5731
5732 /* @funcstatic featRefseqpProcess *********************************************
5733 **
5734 ** Processes one feature location and qualifier tags for RefSeq protein
5735 **
5736 ** @param [u] thys [AjPFeattable] Feature table
5737 ** @param [r] feature [const AjPStr] Feature type key
5738 ** @param [r] source [const AjPStr] Feature table source
5739 ** @param [w] loc [AjPStr*] Feature location
5740 ** @param [w] tags [AjPStr*] Feature qualifier tags string
5741 ** @return [AjPFeature] Feature as inserted into the feature table
5742 **
5743 ** @release 6.2.0
5744 ** @@
5745 ******************************************************************************/
5746
featRefseqpProcess(AjPFeattable thys,const AjPStr feature,const AjPStr source,AjPStr * loc,AjPStr * tags)5747 static AjPFeature featRefseqpProcess(AjPFeattable thys, const AjPStr feature,
5748 const AjPStr source,
5749 AjPStr* loc, AjPStr* tags)
5750 {
5751 AjPFeature ret = NULL;
5752 AjPFeature gf = NULL;
5753 AjPStr tag = NULL;
5754 AjPStr val = NULL;
5755 AjPStr opnam = NULL;
5756 AjPStr opval = NULL;
5757 AjPStr prestr = NULL;
5758 AjBool Fwd = ajTrue;
5759 AjBool LocFwd = ajTrue;
5760 AjPStr begstr = NULL;
5761 AjPStr delstr = NULL;
5762 AjPStr endstr = NULL;
5763 AjPStr locstr = NULL;
5764 AjPStr rest = NULL;
5765 AjBool Simple = ajFalse; /* Simple - single position (see also label) */
5766 AjBool BegBound = ajFalse;
5767 AjBool EndBound = ajFalse;
5768 ajuint BegNum = 0;
5769 ajuint EndNum = 0;
5770 ajuint Beg2 = 0;
5771 ajuint End2 = 0;
5772 ajuint tmpbeg = 0;
5773 ajuint tmpend = 0;
5774 ajuint tmpflags = 0;
5775 AjBool Between = ajFalse;
5776 AjBool Join = ajFalse;
5777 AjBool Order = ajFalse;
5778 ajint Flags;
5779 ajint ExonFlags;
5780 AjBool Parent = ajTrue;
5781 ajint Frame = 0;
5782 float Score = 0.0;
5783 AjBool HasOper = ajFalse;
5784 AjBool RemoteId = ajFalse;
5785 AjBool IsLabel = ajFalse; /* uses obsolete label */
5786 ajint Exon = 0;
5787 ajlong ipos;
5788 ajint itags = 0;
5789 const AjPStr tmpft = NULL;
5790 ajuint noparentflags = 0;
5791
5792 noparentflags = ~(AJFEATFLAG_POINT | AJFEATFLAG_BETWEEN_SEQ |
5793 AJFEATFLAG_REMOTEID | AJFEATFLAG_LABEL |
5794 AJFEATFLAG_START_TWO | AJFEATFLAG_END_TWO |
5795 AJFEATFLAG_START_BEFORE_SEQ | AJFEATFLAG_END_AFTER_SEQ);
5796
5797 ajStrRemoveWhite(loc); /* no white space needed */
5798 ajStrRemoveWhiteExcess(tags); /* single spaces only */
5799
5800 /*ajDebug("Clean location '%S'\n", *loc);*/
5801 /*ajDebug("Clean tags '%S'\n", *tags);*/
5802
5803 ajStrAssignS(&opval, *loc);
5804 ipos = ajStrFindAnyK(opval, ','); /* multiple locations */
5805
5806 if(ipos >= 0)
5807 {
5808 /* ajDebug("Multiple locations, test operator(s)\n"); */
5809 while(ajStrGetLen(opval) &&
5810 featEmblOperOut(opval, &opnam, &featinTmpStr))
5811 {
5812 if(!ajStrHasParentheses(featinTmpStr))
5813 break;
5814
5815 /* ajDebug("OperOut %S( '%S' )\n", opnam, featinTmpStr); */
5816 if(ajStrMatchCaseC(opnam, "complement"))
5817 Fwd = !Fwd;
5818
5819 else if(ajStrMatchCaseC(opnam, "one_of"))
5820 Order = ajTrue;
5821
5822 else if(ajStrMatchCaseC(opnam, "join"))
5823 Join = ajTrue;
5824
5825 else if(ajStrMatchCaseC(opnam, "order"))
5826 Order = ajTrue;
5827
5828 else if(ajStrMatchCaseC(opnam, "group"))
5829 Order = ajTrue;
5830
5831 else
5832 ajFeatWarn("%S: unrecognised operator '%S()' in '%S'",
5833 thys->Seqid, opnam, opval);
5834
5835 ajStrAssignS(&opval, featinTmpStr);
5836 }
5837 }
5838
5839 while(ajStrGetLen(opval))
5840 {
5841 LocFwd = Fwd;
5842 BegBound = ajFalse;
5843 EndBound = ajFalse;
5844 Simple = ajFalse;
5845 Between = ajFalse;
5846 BegNum = EndNum = Beg2 = End2 = 0;
5847 HasOper = ajFalse;
5848 RemoteId = ajFalse;
5849 IsLabel = ajFalse;
5850
5851 ajStrDelStatic(&featId);
5852 ajStrDelStatic(&featLabel);
5853
5854 /* check for complement() */
5855 /* set locstr as the whole (or rest) of the location */
5856
5857 if(featEmblOperIn(opval, &opnam, &locstr, &featinTmpStr))
5858 {
5859 /* ajDebug("OperIn %S( '%S' )\n", opnam, locstr); */
5860 if(ajStrMatchCaseC(opnam, "complement"))
5861 LocFwd = !LocFwd;
5862
5863 ajStrAssignS(&opval, featinTmpStr);
5864 /* ajDebug("rest: '%S'\n", opval); */
5865 HasOper = ajTrue;
5866 }
5867 else
5868 {
5869 ajStrAssignS(&locstr, opval);
5870 /* ajDebug("OperIn simple '%S'\n", locstr); */
5871 }
5872
5873 if(featEmblOperNone(locstr,
5874 &featId, &featinTmpStr, &rest)) /* one exon */
5875 {
5876 /* ajDebug("OperNone '%S' \n", featinTmpStr); */
5877 if(ajStrGetLen(featId))
5878 {
5879 /* ajDebug("External entryid '%S'\n", featId); */
5880 RemoteId = ajTrue;
5881 }
5882 if(!featEmblLoc(featinTmpStr, &begstr, &Between, &Simple, &endstr))
5883 {
5884 ajStrAssignS(&begstr, featinTmpStr);
5885 ajStrAssignS(&endstr, begstr);
5886 Simple = ajTrue;
5887 ajDebug("Bad feature numeric location '%S' in '%S' - "
5888 "test later for label",
5889 begstr, locstr);
5890 }
5891
5892 ajStrAssignS(&featinTmpStr, rest);
5893
5894 if(!HasOper)
5895 ajStrAssignS(&opval, featinTmpStr);
5896
5897 if(featEmblLocNum(begstr, &BegBound, &BegNum))
5898 {
5899 Beg2 = 0;
5900 /* ajDebug("Begin '%S' %d Bound: %B\n",
5901 begstr, BegNum, BegBound); */
5902 }
5903 else if(featEmblLocRange(begstr, &BegNum, &Beg2))
5904 {
5905 BegBound = ajFalse;
5906 /* ajDebug("Begin range (%d . %d)\n", BegNum, Beg2); */
5907 }
5908 else
5909 {
5910 /* ajDebug("Begin is a label '%S'\n", begstr); */
5911 IsLabel = ajTrue;
5912 Simple = ajTrue;
5913 ajStrAssignS(&featLabel, begstr);
5914 ajFeatWarn("%S: Simple feature location '%S' in '%S'",
5915 thys->Seqid, begstr, locstr);
5916 }
5917
5918 if(featEmblLocNum(endstr, &EndBound, &EndNum))
5919 {
5920 End2 = 0;
5921 /* ajDebug(" End '%S' %d Bound: %B\n",
5922 endstr, EndNum, EndBound); */
5923 }
5924 else if(featEmblLocRange(endstr, &End2, &EndNum))
5925 {
5926 EndBound = ajFalse;
5927 /* ajDebug(" End range (%d . %d)\n", End2, EndNum); */
5928 }
5929 else
5930 {
5931 IsLabel = ajTrue;
5932 Simple = ajTrue;
5933 ajStrAssignS(&featLabel, endstr);
5934 /* ajDebug(" End is a label '%S'\n", endstr); */
5935 ajErr("%S: Simple feature end '%S' in '%S'",
5936 thys->Seqid, begstr, locstr);
5937 }
5938 }
5939 else
5940 {
5941 ajErr("Unable to parse location:\n'%S'", opval);
5942 }
5943 /* location has been read in, now store it */
5944
5945 Flags = ExonFlags = 0;
5946
5947 if(Simple)
5948 Flags |= AJFEATFLAG_POINT;
5949 if(Between)
5950 Flags |= AJFEATFLAG_BETWEEN_SEQ;
5951 if(End2)
5952 Flags |= AJFEATFLAG_END_TWO;
5953 if(Beg2)
5954 Flags |= AJFEATFLAG_START_TWO;
5955 if(BegBound)
5956 Flags |= AJFEATFLAG_START_BEFORE_SEQ;
5957 if(EndBound)
5958 Flags |= AJFEATFLAG_END_AFTER_SEQ;
5959 if(RemoteId)
5960 Flags |= AJFEATFLAG_REMOTEID;
5961 if(IsLabel)
5962 Flags |= AJFEATFLAG_LABEL;
5963 if(IsLabel)
5964 ajFeatWarn("%S: Feature location with label '%S'",
5965 thys->Seqid, locstr);
5966 if(Join || Order)
5967 Flags |= AJFEATFLAG_MULTIPLE;
5968 if(Order)
5969 Flags |= AJFEATFLAG_ORDER;
5970
5971 ExonFlags = Flags;
5972
5973 if(Parent)
5974 {
5975 if(!Fwd)
5976 Flags |= AJFEATFLAG_COMPLEMENT_MAIN;
5977 }
5978
5979 /* ajDebug("Calling featFeatNew, Flags: %x\n", Flags); */
5980 tmpft = ajFeattypeGetInternalRefseqp(feature);
5981
5982 if(Parent)
5983 {
5984 if(ajStrGetLen(opval) && RemoteId)
5985 {
5986 tmpbeg = 0;
5987 tmpend = 0;
5988 tmpflags = Flags & noparentflags;
5989 }
5990 else
5991 {
5992 tmpbeg = BegNum;
5993 tmpend = EndNum;
5994 tmpflags = Flags;
5995 }
5996
5997 gf = ajFeatNewProtFlags(thys,
5998 source, /* source sequence */
5999 tmpft,
6000 tmpbeg, tmpend,
6001 Score,
6002 tmpflags);
6003
6004 /* gfpos = gf; */
6005 }
6006
6007 if(!Parent || ajStrGetLen(opval))
6008 {
6009 Exon++;
6010 /*gfpos = */ ajFeatNewProtFlagsSub(thys,
6011 gf,
6012 source, /* source sequence */
6013 tmpft,
6014 BegNum, EndNum,
6015 Score,
6016 Flags);
6017 if(!RemoteId)
6018 {
6019 if(Fwd)
6020 {
6021 if(!gf->Start || gf->Start > BegNum)
6022 gf->Start = BegNum;
6023 if(!gf->End || gf->End < EndNum)
6024 gf->End = EndNum;
6025 }
6026 else
6027 {
6028 if(!gf->End || gf->End > BegNum)
6029 gf->End = BegNum;
6030 if(!gf->Start || gf->Start < EndNum)
6031 gf->Start = EndNum;
6032 }
6033 }
6034 }
6035
6036 if(Parent)
6037 {
6038 ret = gf;
6039 Parent = ajFalse;
6040 Exon = 1;
6041 }
6042 }
6043
6044 while(ajStrGetLen(*tags))
6045 {
6046 itags++;
6047
6048 if(featEmblTvTagVal(tags, &tag, &val))
6049 {
6050 ajStrQuoteStrip(&val);
6051
6052 if(!ajFeatTagAddSS(ret, tag, val))
6053 ajFeatWarn("%S: Bad value '%S' for tag '/%S'",
6054 thys->Seqid, val, tag);
6055
6056 if(ajStrMatchC(tag, "codon_start"))
6057 {
6058 ajStrToInt(val, &Frame);
6059 gf->Frame = Frame;
6060 }
6061 }
6062 else if(featEmblTvRest(tags, &featinTmpStr))
6063 {
6064 /* anything non-whitespace up to '/' is bad */
6065 ajFeatWarn("Bad feature syntax %S: skipping '%S'",
6066 thys->Seqid, featinTmpStr);
6067 }
6068 else
6069 {
6070 ajFeatWarn("Bad feature syntax %S: giving up at '%S'",
6071 thys->Seqid, *tags);
6072 ajStrAssignClear(tags);
6073 }
6074
6075 }
6076
6077 ajDebug("featRefseqpProcess found %d feature tags\n", itags);
6078
6079 ajStrDelStatic(&featinTmpStr);
6080 ajStrDel(&prestr);
6081 ajStrDel(&val);
6082 ajStrDel(&tag);
6083 ajStrDel(&begstr);
6084 ajStrDel(&delstr);
6085 ajStrDel(&opnam);
6086 ajStrDel(&opval);
6087 ajStrDel(&locstr);
6088 ajStrDel(&endstr);
6089 ajStrDel(&rest);
6090
6091 return ret;
6092 }
6093
6094
6095
6096
6097 /* @funcstatic featRegInitEmbl ************************************************
6098 **
6099 ** Initialise regular expressions and data structures for
6100 ** EMBL/GenBank/DDBJ format
6101 **
6102 ** @return [AjBool] ajTrue if successful
6103 **
6104 ** @release 1.0.0
6105 ** @@
6106 ******************************************************************************/
6107
featRegInitEmbl(void)6108 static AjBool featRegInitEmbl(void)
6109 {
6110 if(FeatInitEmbl)
6111 return ajTrue;
6112
6113 ajFeatVocabInit("embl");
6114
6115 FeatInitEmbl = ajTrue;
6116
6117 return ajTrue;
6118 }
6119
6120
6121
6122
6123 /* @funcstatic featRegInitRefseqp *********************************************
6124 **
6125 ** Initialise regular expressions and data structures for
6126 ** RefSeq protein format
6127 **
6128 ** @return [AjBool] ajTrue if successful
6129 **
6130 ** @release 6.2.0
6131 ** @@
6132 ******************************************************************************/
6133
featRegInitRefseqp(void)6134 static AjBool featRegInitRefseqp(void)
6135 {
6136 if(FeatInitRefseqp)
6137 return ajTrue;
6138
6139 ajFeatVocabInit("refseqp");
6140
6141 FeatInitRefseqp = ajTrue;
6142
6143 return ajTrue;
6144 }
6145
6146
6147
6148
6149 /* @funcstatic featRegInitSwiss ***********************************************
6150 **
6151 ** Initialise regular expressions and data structures for
6152 ** SwissProt format
6153 **
6154 ** @return [AjBool] ajTrue if successful
6155 **
6156 ** @release 1.0.0
6157 ** @@
6158 ******************************************************************************/
6159
featRegInitSwiss(void)6160 static AjBool featRegInitSwiss(void)
6161 {
6162 if(FeatInitSwiss)
6163 return ajTrue;
6164
6165 ajFeatVocabInit("swiss");
6166
6167 if(!SwRegexNew)
6168 SwRegexNew = ajRegCompC("^FT (([^ ]+) +([?<]?[0-9]+|[?]) +"
6169 "([?>]?[0-9]+|[?]) *)(.*)$");
6170 if(!SwRegexNext)
6171 SwRegexNext = ajRegCompC("^FT +(.*)$");
6172
6173 if(!SwRegexComment)
6174 SwRegexComment = ajRegCompC("^(.*)[(]([^)]+)[)]$");
6175
6176 if(!SwRegexFtid)
6177 SwRegexFtid = ajRegCompC("^(.*)/FTId=([^ .]+)$");
6178
6179 FeatInitSwiss = ajTrue;
6180
6181 return ajTrue;
6182 }
6183
6184
6185
6186
6187 /* @funcstatic featRegInitPir *************************************************
6188 **
6189 ** Initialise regular expressions and data structures for ajFeat in
6190 ** PIR format
6191 **
6192 ** @return [AjBool] ajTrue if successful
6193 **
6194 ** @release 2.0.0
6195 ** @@
6196 ******************************************************************************/
6197
featRegInitPir(void)6198 static AjBool featRegInitPir(void)
6199 {
6200 if(FeatInitPir)
6201 return ajTrue;
6202
6203 ajFeatVocabInit("pir");
6204
6205 if(!PirRegexAll)
6206 PirRegexAll = ajRegCompC("^F;([^/]+)/([^:]+):([^#]*)");
6207
6208 if(!PirRegexCom)
6209 PirRegexCom = ajRegCompC("^#([^#]*)");
6210
6211 if(!PirRegexLoc)
6212 PirRegexLoc = ajRegCompC("^([^,]+),?");
6213
6214 if(!PirRegexPos)
6215 PirRegexPos = ajRegCompC("^([^-]+)-?");
6216
6217 FeatInitPir = ajTrue;
6218
6219 return ajTrue;
6220 }
6221
6222
6223
6224
6225 /* @funcstatic featRegInitGff2 ************************************************
6226 **
6227 ** Initialise regular expressions and data structures for ajFeat GFF format
6228 **
6229 ** @return [AjBool] ajTrue if successful
6230 **
6231 ** @release 6.4.0
6232 ** @@
6233 ******************************************************************************/
6234
featRegInitGff2(void)6235 static AjBool featRegInitGff2(void)
6236 {
6237 /* Setup any global static runtime resources here
6238 for example, regular expression compilation calls */
6239
6240 if(FeatInitGff2)
6241 return ajTrue;
6242
6243 ajFeatVocabInit("gff2");
6244 ajFeatVocabInit("gff2protein");
6245
6246 /*ajDebug("featRegInitGff2 Compiling regexps\n");*/
6247
6248 GffRegexNumeric = ajRegCompC("^[\\+-]?[0-9]+\\.?[0-9]*$");
6249 GffRegexblankline = ajRegCompC("^[ ]*$");
6250 GffRegexversion = ajRegCompC("^##gff-version[ ]+([0-9]+)");
6251 GffRegexdate = ajRegCompC("^##date[ ]+([0-9][0-9][0-9][0-9])-"
6252 "([0-9][0-9]?)-([0-9][0-9]?)");
6253 GffRegexregion = ajRegCompC("^##sequence-region[ ]+([0-9a-zA-Z]+)"
6254 "[ ]+([\\+-]?[0-9]+)[ ]+([\\+-]?[0-9]+)");
6255 GffRegexcomment = ajRegCompC("^#[ ]*(.*)");
6256 GffRegextype = ajRegCompC("^##[Tt]ype +(\\S+)( +(\\S+))?");
6257
6258 GffRegexTvTagval = ajRegCompC(" *([^ =]+)[ =](\"[^\"]*\"|"
6259 "[^;]+)(;|$)"); /* "tag name */
6260
6261 FeatInitGff2 = ajTrue;
6262
6263 return ajTrue;
6264 }
6265
6266
6267
6268
6269 /* @funcstatic featRegInitGff3 ************************************************
6270 **
6271 ** Initialise regular expressions and data structures for ajFeat GFF3 format
6272 **
6273 ** @return [AjBool] ajTrue if successful
6274 **
6275 ** @release 6.0.0
6276 ** @@
6277 ******************************************************************************/
6278
featRegInitGff3(void)6279 static AjBool featRegInitGff3(void)
6280 {
6281 /* Setup any global static runtime resources here
6282 for example, regular expression compilation calls */
6283
6284 if(FeatInitGff3)
6285 return ajTrue;
6286
6287 ajFeatVocabInit("gff3");
6288 ajFeatVocabInit("gff3protein");
6289
6290 /*ajDebug("featRegInitGff3 Compiling regexps\n");*/
6291
6292 Gff3RegexNumeric = ajRegCompC("^[\\+-]?[0-9]+\\.?[0-9]*$");
6293 Gff3Regexblankline = ajRegCompC("^[ ]*$");
6294 Gff3Regexversion = ajRegCompC("^##gff-version[ ]+([0-9]+)");
6295 Gff3Regexregion = ajRegCompC("^##sequence-region[ ]+([0-9a-zA-Z]+)"
6296 "[ ]+([\\+-]?[0-9]+)[ ]+([\\+-]?[0-9]+)");
6297 Gff3Regexdirective = ajRegCompC("^##(.*)");
6298 Gff3Regexcomment = ajRegCompC("^#(.*)");
6299 Gff3RegexTvTagval = ajRegCompC(" *([^ =]+)[ =]([^;]+)*(;|$)"); /* "tag name */
6300 Gff3oldRegexTvTagval = ajRegCompC(" *([^ =]+)[ =](\"[^\"]*\"|"
6301 "[^;]+)(;|$)"); /* "tag name */
6302
6303 FeatInitGff3 = ajTrue;
6304
6305 if(!featRegInitGff2())
6306 return ajFalse;
6307
6308 return ajTrue;
6309 }
6310
6311
6312
6313
6314 /* @funcstatic featDelRegEmbl *************************************************
6315 **
6316 ** Cleanup and exit routines. Free and destroy regular expressions
6317 **
6318 ** @return [AjBool] ajFalse if unsuccessful
6319 **
6320 ** @release 1.0.0
6321 ** @@
6322 ******************************************************************************/
6323
featDelRegEmbl(void)6324 static AjBool featDelRegEmbl(void)
6325 {
6326 if(!FeatInitEmbl)
6327 return ajTrue;
6328
6329 FeatInitEmbl = ajFalse;
6330
6331 return ajTrue;
6332 }
6333
6334
6335
6336
6337 /* @funcstatic featDelRegPir **************************************************
6338 **
6339 ** Cleanup and exit routines. Free and destroy regular expressions
6340 **
6341 ** @return [AjBool] ajFalse if unsuccessful
6342 **
6343 ** @release 2.0.0
6344 ** @@
6345 ******************************************************************************/
6346
featDelRegPir(void)6347 static AjBool featDelRegPir(void)
6348 {
6349 if(!FeatInitPir)
6350 return ajTrue;
6351
6352 ajRegFree(&PirRegexAll);
6353 ajRegFree(&PirRegexCom);
6354 ajRegFree(&PirRegexLoc);
6355 ajRegFree(&PirRegexPos);
6356
6357 FeatInitPir = ajFalse;
6358
6359 return ajTrue;
6360 }
6361
6362
6363
6364
6365 /* @funcstatic featDelRegRefseqp **********************************************
6366 **
6367 ** Cleanup and exit routines. Free and destroy regular expressions
6368 **
6369 ** @return [AjBool] ajFalse if unsuccessful
6370 **
6371 ** @release 6.2.0
6372 ** @@
6373 ******************************************************************************/
6374
featDelRegRefseqp(void)6375 static AjBool featDelRegRefseqp(void)
6376 {
6377 if(!FeatInitRefseqp)
6378 return ajTrue;
6379
6380 FeatInitRefseqp = ajFalse;
6381
6382 return ajTrue;
6383 }
6384
6385
6386
6387
6388 /* @funcstatic featDelRegSwiss ************************************************
6389 **
6390 ** Cleanup and exit routines. Free and destroy regular expressions
6391 **
6392 ** @return [AjBool] ajFalse if unsuccessful
6393 **
6394 ** @release 1.0.0
6395 ** @@
6396 ******************************************************************************/
6397
featDelRegSwiss(void)6398 static AjBool featDelRegSwiss(void)
6399 {
6400 if(!FeatInitSwiss)
6401 return ajTrue;
6402
6403 ajRegFree(&SwRegexComment);
6404 ajRegFree(&SwRegexFtid);
6405 ajRegFree(&SwRegexNew);
6406 ajRegFree(&SwRegexNext);
6407
6408 FeatInitSwiss = ajFalse;
6409
6410 return ajTrue;
6411 }
6412
6413
6414
6415
6416 /* @funcstatic featDelRegGff2 *************************************************
6417 **
6418 ** Cleanup and exit routines. Free and destroy regular expressions
6419 **
6420 ** @return [AjBool] ajFalse if unsuccessful
6421 **
6422 ** @release 6.4.0
6423 ** @@
6424 ******************************************************************************/
6425
featDelRegGff2(void)6426 static AjBool featDelRegGff2(void)
6427 {
6428 if(!FeatInitGff2)
6429 return ajTrue;
6430
6431 /* Clean-up any global static runtime resources here
6432 for example, regular expression pattern variables */
6433
6434 ajRegFree(&GffRegexNumeric);
6435 ajRegFree(&GffRegexblankline);
6436 ajRegFree(&GffRegexversion);
6437 ajRegFree(&GffRegexdate);
6438 ajRegFree(&GffRegexregion);
6439 ajRegFree(&GffRegexcomment);
6440 ajRegFree(&GffRegextype);
6441 ajRegFree(&GffRegexTvTagval);
6442
6443 FeatInitGff2 = ajFalse;
6444
6445 return ajTrue;
6446 }
6447
6448
6449
6450
6451 /* @funcstatic featDelRegGff3 *************************************************
6452 **
6453 ** Cleanup and exit routines. Free and destroy regular expressions
6454 **
6455 ** @return [AjBool] ajFalse if unsuccessful
6456 **
6457 ** @release 6.0.0
6458 ** @@
6459 ******************************************************************************/
6460
featDelRegGff3(void)6461 static AjBool featDelRegGff3(void)
6462 {
6463 if(!FeatInitGff3)
6464 return ajTrue;
6465
6466 /* Clean-up any global static runtime resources here
6467 for example, regular expression pattern variables */
6468
6469 ajRegFree(&Gff3RegexNumeric);
6470 ajRegFree(&Gff3Regexblankline);
6471 ajRegFree(&Gff3Regexversion);
6472 ajRegFree(&Gff3Regexdate);
6473 ajRegFree(&Gff3Regexregion);
6474 ajRegFree(&Gff3Regexcomment);
6475 ajRegFree(&Gff3Regexdirective);
6476 ajRegFree(&Gff3Regextype);
6477 ajRegFree(&Gff3RegexTvTagval);
6478 ajRegFree(&Gff3oldRegexTvTagval);
6479
6480 FeatInitGff3 = ajFalse;
6481
6482 featDelRegGff2();
6483
6484 return ajTrue;
6485 }
6486
6487
6488
6489
6490 /* @func ajFeatreadExit *******************************************************
6491 **
6492 ** Cleans up feature table input internal memory
6493 **
6494 ** @return [void]
6495 **
6496 ** @release 6.4.0
6497 ** @@
6498 ******************************************************************************/
6499
ajFeatreadExit(void)6500 void ajFeatreadExit(void)
6501 {
6502 ajint i;
6503
6504 for(i=1;featinformatDef[i].Name;i++)
6505 {
6506 if(featinformatDef[i].Used)
6507 {
6508 /* Calling funclist featinformatDef() */
6509 if(featinformatDef[i].DelReg &&
6510 !(*featinformatDef[i].DelReg)())
6511 {
6512 /*ajDebug("No DelReg yet for %s\n",featinformatDef[i].Name);*/
6513 ajErr("No DelReg yet for %s\n",featinformatDef[i].Name);
6514 }
6515 }
6516 }
6517
6518 ajRegFree(&featRegFlag);
6519 ajRegFree(&featRegMore);
6520 ajRegFree(&featRegGroup);
6521 ajRegFree(&featRegGff3Group);
6522
6523 ajStrDel(&featinFormatTmp);
6524 ajStrDel(&featinValTmp);
6525 ajStrDel(&featinUfoTest);
6526 ajStrDel(&featReadLine);
6527 ajStrDel(&featProcessLine);
6528 ajStrDel(&featinTmpStr);
6529 ajStrDel(&featGroup);
6530 ajStrDel(&featSeqid);
6531 ajStrDel(&featSource);
6532 ajStrDel(&featFeature);
6533
6534 ajStrDel(&featinTagNote);
6535 ajStrDel(&featinTagComm);
6536 ajStrDel(&featinTagFtid);
6537
6538 ajStrDel(&featinSourcePir);
6539 ajStrDel(&featinSourceSwiss);
6540 ajStrDel(&featinSourceEmbl);
6541 ajStrDel(&featinSourceRefseqp);
6542 ajStrDel(&featId);
6543 ajStrDel(&featLabel);
6544
6545 ajStrDel(&featLocStr);
6546 ajStrDel(&featLocToken);
6547 ajStrDel(&featLocDb);
6548 ajStrDel(&featSaveGroupStr);
6549
6550 ajStrTokenDel(&featGffSplit);
6551 ajStrTokenDel(&featEmblSplit);
6552
6553 ajRegFree(&featinRegUfoFmt);
6554 ajRegFree(&featinRegUfoFile);
6555
6556 ajRegFree(&featTagTrans);
6557
6558 ajTableDel(&feattabDbMethods);
6559
6560 return;
6561 }
6562
6563
6564
6565
6566
6567 /* @section Internals *********************************************************
6568 **
6569 ** Functions to return internal values
6570 **
6571 ** @nam3rule Type Internals for feature table datatype
6572 ** @nam4rule Get Return a value
6573 ** @nam5rule Fields Known query fields for ajFeatRead
6574 ** @nam5rule Qlinks Known query link operators for ajFeatRead
6575 **
6576 ** @valrule * [const char*] Internal value
6577 **
6578 ** @fcategory misc
6579 **
6580 ******************************************************************************/
6581
6582
6583
6584
6585 /* @func ajFeattabinTypeGetFields *********************************************
6586 **
6587 ** Returns the listof known field names for ajFeatRead
6588 **
6589 ** @return [const char*] List of field names
6590 **
6591 ** @release 6.4.0
6592 ** @@
6593 ******************************************************************************/
6594
ajFeattabinTypeGetFields(void)6595 const char* ajFeattabinTypeGetFields(void)
6596 {
6597 return "id acc";
6598 }
6599
6600
6601
6602
6603 /* @func ajFeattabinTypeGetQlinks *********************************************
6604 **
6605 ** Returns the listof known query link operators for ajFeatRead
6606 **
6607 ** @return [const char*] List of field names
6608 **
6609 ** @release 6.4.0
6610 ** @@
6611 ******************************************************************************/
6612
ajFeattabinTypeGetQlinks(void)6613 const char* ajFeattabinTypeGetQlinks(void)
6614 {
6615 return "|";
6616 }
6617
6618
6619
6620
6621 /* @funcstatic featFindInformatC **********************************************
6622 **
6623 ** Looks for the specified format(s) in the internal definitions and
6624 ** returns the index.
6625 **
6626 ** Given a single format, sets iformat.
6627 **
6628 ** @param [r] format [const char*] Format required.
6629 ** @param [w] iformat [ajint*] Index
6630 ** @return [AjBool] ajTrue on success.
6631 **
6632 ** @release 6.4.0
6633 ** @@
6634 ******************************************************************************/
6635
featFindInformatC(const char * format,ajint * iformat)6636 static AjBool featFindInformatC(const char* format, ajint* iformat)
6637 {
6638 ajint i = 0;
6639
6640 /*ajDebug("featFindInformatC '%s'\n", format);*/
6641 if(!*format)
6642 return ajFalse;
6643
6644 ajStrAssignC(&featinFormatTmp, format);
6645 ajStrFmtLower(&featinFormatTmp);
6646
6647 for(i=0; featinformatDef[i].Name; i++)
6648 {
6649 /*ajDebug("test %d '%s' \n", i, featinformatDef[i].Name);*/
6650 if(ajStrMatchC(featinFormatTmp,
6651 featinformatDef[i].Name))
6652 {
6653 *iformat = i;
6654 (void) ajStrDelStatic(&featinFormatTmp);
6655 /*ajDebug("found '%s' at %d\n", featinformatDef[i].Name, i);*/
6656 return ajTrue;
6657 }
6658 }
6659
6660 ajErr("Unknown input feat format '%s'", format);
6661
6662 ajStrDelStatic(&featinFormatTmp);
6663
6664 return ajFalse;
6665 }
6666
6667
6668
6669
6670 /* @funcstatic featFindInformatS **********************************************
6671 **
6672 ** Looks for the specified format(s) in the internal definitions and
6673 ** returns the index.
6674 **
6675 ** Given a single format, sets iformat.
6676 **
6677 ** @param [r] format [const AjPStr] Format required.
6678 ** @param [w] iformat [ajint*] Index
6679 ** @return [AjBool] ajTrue on success.
6680 **
6681 ** @release 6.4.0
6682 ** @@
6683 ******************************************************************************/
6684
featFindInformatS(const AjPStr format,ajint * iformat)6685 static AjBool featFindInformatS(const AjPStr format, ajint* iformat)
6686 {
6687 return featFindInformatC(ajStrGetPtr(format), iformat);
6688 }
6689
6690
6691
6692
6693 /* @func ajFeatinPrintFormat **************************************************
6694 **
6695 ** Reports the internal data structures for input feature formats
6696 **
6697 ** @param [u] outf [AjPFile] Output file
6698 ** @param [r] full [AjBool] Full report (usually ajFalse)
6699 ** @return [void]
6700 **
6701 ** @release 6.4.0
6702 ** @@
6703 ******************************************************************************/
6704
ajFeatinPrintFormat(AjPFile outf,AjBool full)6705 void ajFeatinPrintFormat(AjPFile outf, AjBool full)
6706 {
6707 ajint i = 0;
6708
6709 ajFmtPrintF(outf, "\n");
6710 ajFmtPrintF(outf, "# Feature input formats\n");
6711 ajFmtPrintF(outf, "# Name Format name (or alias)\n");
6712 ajFmtPrintF(outf, "# Alias Name is an alias\n");
6713 ajFmtPrintF(outf, "# Nuc Valid for nucleotide sequences\n");
6714 ajFmtPrintF(outf, "# Pro Valid for protein sequences\n");
6715 ajFmtPrintF(outf, "# Name Alias Nuc Pro "
6716 "Description\n");
6717 ajFmtPrintF(outf, "Informat {\n");
6718
6719 for(i=0; featinformatDef[i].Name; i++)
6720 {
6721 if(full || !featinformatDef[i].Alias)
6722 ajFmtPrintF(outf, " %-12s %5B %5B %5B \"%s\"\n",
6723 featinformatDef[i].Name,
6724 featinformatDef[i].Alias,
6725 featinformatDef[i].Nucleotide,
6726 featinformatDef[i].Protein,
6727 featinformatDef[i].Desc);
6728 }
6729
6730 ajFmtPrintF(outf, "}\n\n");
6731
6732 return;
6733 }
6734
6735
6736
6737
6738 /* @func ajFeatinPrinthtmlFormat **********************************************
6739 **
6740 ** Reports the internal data structures for input feature formats
6741 **
6742 ** @param [u] outf [AjPFile] Output file
6743 ** @return [void]
6744 **
6745 ** @release 6.4.0
6746 ** @@
6747 ******************************************************************************/
6748
ajFeatinPrinthtmlFormat(AjPFile outf)6749 void ajFeatinPrinthtmlFormat(AjPFile outf)
6750 {
6751 ajint i = 0;
6752
6753 ajFmtPrintF(outf, "<table border=3>");
6754 ajFmtPrintF(outf, "<tr><th>Features Input Format</th><th>Alias</th>\n");
6755 ajFmtPrintF(outf, "<th>Nuc</th><th>Pro</th>\n");
6756 ajFmtPrintF(outf, "<th>Description</th></tr>\n");
6757
6758 for(i=0; featinformatDef[i].Name; i++)
6759 {
6760 if(!featinformatDef[i].Alias)
6761 ajFmtPrintF(outf, "<tr><td>\n%-12s\n</td><td>%5B\n</td>"
6762 "<td>%5B\n</td><td>%5B\n</td><td>\"%s\"</td></tr>\n",
6763 featinformatDef[i].Name,
6764 featinformatDef[i].Alias,
6765 featinformatDef[i].Nucleotide,
6766 featinformatDef[i].Protein,
6767 featinformatDef[i].Desc);
6768 }
6769
6770 ajFmtPrintF(outf, "</table>\n");
6771
6772 return;
6773 }
6774
6775
6776
6777
6778 /* @func ajFeatinPrintbookFormat **********************************************
6779 **
6780 ** Reports the input feature format internals in docbook text format
6781 **
6782 ** @param [u] outf [AjPFile] Output file
6783 ** @return [void]
6784 **
6785 ** @release 6.4.0
6786 ** @@
6787 ******************************************************************************/
6788
ajFeatinPrintbookFormat(AjPFile outf)6789 void ajFeatinPrintbookFormat(AjPFile outf)
6790 {
6791 ajint i = 0;
6792 ajint j = 0;
6793 AjPStr namestr = NULL;
6794
6795 AjPList fmtlist;
6796 AjPStr* names;
6797
6798 fmtlist = ajListstrNew();
6799
6800 ajFmtPrintF(outf, "<para>The supported feature formats are summarised "
6801 "in the table below. The columns are as follows: "
6802 "<emphasis>Output format</emphasis> (format name), "
6803 "<emphasis>Nuc</emphasis> (\"true\" indicates nucleotide "
6804 "sequence data may be represented), <emphasis>Pro</emphasis> "
6805 "(\"true\" indicates protein sequence data may be "
6806 "represented) and <emphasis>Description</emphasis> "
6807 "(short description of the format).</para>\n\n");
6808
6809 ajFmtPrintF(outf, "<table frame=\"box\" rules=\"cols\">\n");
6810 ajFmtPrintF(outf, " <caption>Input feature formats</caption>\n");
6811 ajFmtPrintF(outf, " <thead>\n");
6812 ajFmtPrintF(outf, " <tr align=\"center\">\n");
6813 ajFmtPrintF(outf, " <th>Output Format</th>\n");
6814 ajFmtPrintF(outf, " <th>Nuc</th>\n");
6815 ajFmtPrintF(outf, " <th>Pro</th>\n");
6816 ajFmtPrintF(outf, " <th>Description</th>\n");
6817 ajFmtPrintF(outf, " </tr>\n");
6818 ajFmtPrintF(outf, " </thead>\n");
6819 ajFmtPrintF(outf, " <tbody>\n");
6820
6821 for(i=1; featinformatDef[i].Name; i++)
6822 {
6823 if(!featinformatDef[i].Alias)
6824 {
6825 namestr = ajStrNewC(featinformatDef[i].Name);
6826 ajListPush(fmtlist, namestr);
6827 namestr = NULL;
6828 }
6829 }
6830
6831 ajListSort(fmtlist, &ajStrVcmp);
6832 ajListstrToarray(fmtlist, &names);
6833
6834 for(i=0; names[i]; i++)
6835 {
6836 for(j=0; featinformatDef[j].Name; j++)
6837 {
6838 if(ajStrMatchC(names[i],featinformatDef[j].Name))
6839 {
6840 ajFmtPrintF(outf, " <tr>\n");
6841 ajFmtPrintF(outf, " <td>%s</td>\n",
6842 featinformatDef[j].Name);
6843 ajFmtPrintF(outf, " <td>%B</td>\n",
6844 featinformatDef[j].Nucleotide);
6845 ajFmtPrintF(outf, " <td>%B</td>\n",
6846 featinformatDef[j].Protein);
6847 ajFmtPrintF(outf, " <td>%s</td>\n",
6848 featinformatDef[j].Desc);
6849 ajFmtPrintF(outf, " </tr>\n");
6850 }
6851 }
6852 }
6853
6854 ajFmtPrintF(outf, " </tbody>\n");
6855 ajFmtPrintF(outf, "</table>\n");
6856 ajStrDel(&namestr);
6857
6858 names = NULL;
6859 ajListstrFreeData(&fmtlist);
6860
6861 return;
6862 }
6863
6864
6865
6866
6867 /* @func ajFeatinPrintwikiFormat **********************************************
6868 **
6869 ** Reports the input feature format internals in wiki text format
6870 **
6871 ** @param [u] outf [AjPFile] Output file
6872 ** @return [void]
6873 **
6874 ** @release 6.4.0
6875 ** @@
6876 ******************************************************************************/
6877
ajFeatinPrintwikiFormat(AjPFile outf)6878 void ajFeatinPrintwikiFormat(AjPFile outf)
6879 {
6880 ajint i = 0;
6881 ajint j = 0;
6882 AjPStr namestr = NULL;
6883
6884 ajFmtPrintF(outf, "{| class=\"wikitable sortable\" border=\"2\"\n");
6885 ajFmtPrintF(outf, "|-\n");
6886 ajFmtPrintF(outf, "!Format!!Nuc!!Pro!!"
6887 "class=\"unsortable\"|Description\n");
6888
6889 for(i=1; featinformatDef[i].Name; i++)
6890 {
6891 if(!featinformatDef[i].Alias)
6892 {
6893 ajFmtPrintF(outf, "|-\n");
6894 ajStrAssignC(&namestr, featinformatDef[i].Name);
6895
6896
6897 for(j=i+1; featinformatDef[j].Name; j++)
6898 {
6899 if(featinformatDef[j].Read == featinformatDef[i].Read)
6900 {
6901 ajFmtPrintAppS(&namestr, "<br>%s",
6902 featinformatDef[j].Name);
6903 if(!featinformatDef[j].Alias)
6904 {
6905 ajWarn("Feature input format '%s' same as '%s' "
6906 "but not alias",
6907 featinformatDef[j].Name,
6908 featinformatDef[i].Name);
6909 }
6910 }
6911 }
6912 ajFmtPrintF(outf, "|%S||%B||%B||%s\n",
6913 namestr,
6914 featinformatDef[i].Nucleotide,
6915 featinformatDef[i].Protein,
6916 featinformatDef[i].Desc);
6917 }
6918 }
6919
6920 ajFmtPrintF(outf, "|}\n\n");
6921
6922 ajStrDel(&namestr);
6923
6924 return;
6925 }
6926
6927
6928
6929
6930 /* #datasection [AjPFeattaball] Feature Input Stream **************************
6931 **
6932 ** Function is for manipulating feature input stream objects
6933 **
6934 ** #nam2rule Feattaball Feature input stream objects
6935 **
6936 ******************************************************************************/
6937
6938
6939
6940
6941 /* #section Feature Input StreamConstructors **********************************
6942 **
6943 ** All constructors return a new feature input stream object by pointer.
6944 ** It is the responsibility of the user to first destroy any previous
6945 ** feature input object. The target pointer does not need to be
6946 ** initialised to NULL, but it is good programming practice to do so
6947 ** anyway.
6948 **
6949 ** #fdata [AjPFeattaball]
6950 **
6951 ** #nam3rule New Constructor
6952 **
6953 ** #valrule * [AjPFeattaball] Feature input stream object
6954 **
6955 ** #fcategory new
6956 **
6957 ******************************************************************************/
6958
6959
6960
6961
6962 /* @func ajFeattaballNew ******************************************************
6963 **
6964 ** Creates a new feature input stream object.
6965 **
6966 ** @return [AjPFeattaball] New feature input stream object.
6967 **
6968 ** @release 6.4.0
6969 ** @@
6970 ******************************************************************************/
6971
ajFeattaballNew(void)6972 AjPFeattaball ajFeattaballNew(void)
6973 {
6974 AjPFeattaball pthis;
6975
6976 AJNEW0(pthis);
6977
6978 pthis->Feattabin = ajFeattabinNew();
6979 pthis->Feattable = ajFeattableNew(NULL);
6980
6981 return pthis;
6982 }
6983
6984
6985
6986
6987
6988 /* ==================================================================== */
6989 /* ========================== destructors ============================= */
6990 /* ==================================================================== */
6991
6992
6993
6994
6995 /* #section Feature Input Stream Destructors **********************************
6996 **
6997 ** Destruction destroys all internal data structures and frees the
6998 ** memory allocated for the feature input stream object.
6999 **
7000 ** #fdata [AjPFeattaball]
7001 **
7002 ** #nam3rule Del Destructor
7003 **
7004 ** #argrule Del pthis [AjPFeattaball*] Feature input stream
7005 **
7006 ** #valrule * [void]
7007 **
7008 ** #fcategory delete
7009 **
7010 ******************************************************************************/
7011
7012
7013
7014
7015 /* @func ajFeattaballDel ******************************************************
7016 **
7017 ** Deletes a feature input stream object.
7018 **
7019 ** @param [d] pthis [AjPFeattaball*] Feature input stream
7020 ** @return [void]
7021 **
7022 ** @release 6.4.0
7023 ** @@
7024 ******************************************************************************/
7025
ajFeattaballDel(AjPFeattaball * pthis)7026 void ajFeattaballDel(AjPFeattaball* pthis)
7027 {
7028 AjPFeattaball thys;
7029
7030 if(!pthis)
7031 return;
7032
7033 thys = *pthis;
7034
7035 if(!thys)
7036 return;
7037
7038 ajFeattabinDel(&thys->Feattabin);
7039 if(!thys->Returned)
7040 ajFeattableDel(&thys->Feattable);
7041
7042 AJFREE(*pthis);
7043
7044 return;
7045 }
7046
7047
7048
7049
7050 /* ==================================================================== */
7051 /* =========================== Modifiers ============================== */
7052 /* ==================================================================== */
7053
7054
7055
7056
7057 /* #section feature input stream modifiers ************************************
7058 **
7059 ** These functions use the contents of a feature input stream object and
7060 ** update them.
7061 **
7062 ** #fdata [AjPFeattaball]
7063 **
7064 ** #nam3rule Clear Clear all values
7065 **
7066 ** #argrule * thys [AjPFeattaball] Feature input stream object
7067 **
7068 ** #valrule * [void]
7069 **
7070 ** #fcategory modify
7071 **
7072 ******************************************************************************/
7073
7074
7075
7076
7077 /* @func ajFeattaballClear ****************************************************
7078 **
7079 ** Clears a feature input stream object back to "as new" condition,
7080 ** except for the query list which must be preserved.
7081 **
7082 ** @param [w] thys [AjPFeattaball] Feature input stream
7083 ** @return [void]
7084 **
7085 ** @release 6.4.0
7086 ** @@
7087 ******************************************************************************/
7088
ajFeattaballClear(AjPFeattaball thys)7089 void ajFeattaballClear(AjPFeattaball thys)
7090 {
7091
7092 ajDebug("ajFeattaballClear called\n");
7093
7094 if(!thys)
7095 return;
7096
7097 ajFeattabinClear(thys->Feattabin);
7098
7099 if(!thys->Returned)
7100 ajFeattableClear(thys->Feattable);
7101
7102 return;
7103 }
7104
7105
7106
7107
7108 /* #section Feature input stream casts ****************************************
7109 **
7110 ** These functions return the contents of a feature input stream object
7111 **
7112 ** #fdata [AjPFeattaball]
7113 **
7114 ** #nam3rule Get Get feature input stream values
7115 ** #nam3rule Getfeattab Get feature table values
7116 ** #nam4rule Id Get identifier of current term
7117 **
7118 ** #argrule * thys [const AjPFeattaball] Feature input stream object
7119 **
7120 ** #valrule * [const AjPStr] String value
7121 **
7122 ** #fcategory cast
7123 **
7124 ******************************************************************************/
7125
7126
7127
7128
7129 /* @func ajFeattaballGetfeattableId *******************************************
7130 **
7131 ** Returns the identifier of the current feature table in an input stream
7132 **
7133 ** @param [r] thys [const AjPFeattaball] Feature input stream
7134 ** @return [const AjPStr] Identifier
7135 **
7136 ** @release 6.4.0
7137 ** @@
7138 ******************************************************************************/
7139
ajFeattaballGetfeattableId(const AjPFeattaball thys)7140 const AjPStr ajFeattaballGetfeattableId(const AjPFeattaball thys)
7141 {
7142 if(!thys)
7143 return NULL;
7144
7145 ajDebug("ajFeattaballGetfeattableId called\n");
7146
7147 return ajFeattableGetName(thys->Feattable);
7148 }
7149
7150
7151
7152
7153 /* @func ajFeattaballNext *****************************************************
7154 **
7155 ** Parse a feature query into format, access, file and entry
7156 **
7157 ** Split at delimiters. Check for the first part as a valid format
7158 ** Check for the remaining first part as a database name or as a file
7159 ** that can be opened.
7160 ** Anything left is an entryname spec.
7161 **
7162 ** Return the results in the AjPFeattable object but leave the file open for
7163 ** future calls.
7164 **
7165 ** @param [w] thys [AjPFeattaball] Feature input stream
7166 ** @param [u] Pfeattable [AjPFeattable*] Feature table returned
7167 ** @return [AjBool] ajTrue on success.
7168 **
7169 ** @release 6.4.0
7170 ** @@
7171 ******************************************************************************/
7172
ajFeattaballNext(AjPFeattaball thys,AjPFeattable * Pfeattable)7173 AjBool ajFeattaballNext(AjPFeattaball thys, AjPFeattable *Pfeattable)
7174 {
7175 if(!thys->Count)
7176 {
7177 thys->Count = 1;
7178
7179 thys->Totfeattables++;
7180
7181 *Pfeattable= thys->Feattable;
7182 thys->Returned = ajTrue;
7183
7184 return ajTrue;
7185 }
7186
7187
7188 if(ajFeattabinRead(thys->Feattabin, thys->Feattable))
7189 {
7190 thys->Count++;
7191
7192 thys->Totfeattables++;
7193
7194 *Pfeattable = thys->Feattable;
7195 thys->Returned = ajTrue;
7196
7197 ajDebug("ajFeattaballNext success\n");
7198
7199 return ajTrue;
7200 }
7201
7202 ajDebug("ajFeattaballNext failed\n");
7203
7204 ajFeattaballClear(thys);
7205
7206 return ajFalse;
7207 }
7208
7209
7210
7211
7212 /* #section Cast **************************************************************
7213 **
7214 ** Return a reference to the call table
7215 **
7216 **
7217 ******************************************************************************/
7218
7219
7220
7221
7222 /* @func ajFeattabaccessGetDb *************************************************
7223 **
7224 ** returns the table in which feature table database access details
7225 ** are registered
7226 **
7227 ** @return [AjPTable] Access functions hash table
7228 **
7229 ** @release 6.4.0
7230 ** @@
7231 ******************************************************************************/
7232
ajFeattabaccessGetDb(void)7233 AjPTable ajFeattabaccessGetDb(void)
7234 {
7235 if(!feattabDbMethods)
7236 feattabDbMethods = ajCallTableNew();
7237
7238 return feattabDbMethods;
7239 }
7240
7241
7242
7243
7244
7245 /* @func ajFeattabaccessMethodGetQlinks ***************************************
7246 **
7247 ** Tests for a named method for feature table term reading and returns the
7248 ** known query link operators
7249 **
7250 ** @param [r] method [const AjPStr] Method required.
7251 ** @return [const char*] Known link operators
7252 **
7253 ** @release 6.4.0
7254 ** @@
7255 ******************************************************************************/
7256
ajFeattabaccessMethodGetQlinks(const AjPStr method)7257 const char* ajFeattabaccessMethodGetQlinks(const AjPStr method)
7258 {
7259 AjPFeattabAccess methoddata;
7260
7261 methoddata = ajCallTableGetS(feattabDbMethods, method);
7262 if(!methoddata)
7263 return NULL;
7264
7265 return methoddata->Qlink;
7266 }
7267
7268
7269
7270
7271 /* @func ajFeattabaccessMethodGetScope ****************************************
7272 **
7273 ** Tests for a named method for feature table term reading and returns
7274 ** the scope (entry, query or all).
7275 *
7276 ** @param [r] method [const AjPStr] Method required.
7277 ** @return [ajuint] Scope flags
7278 **
7279 ** @release 6.4.0
7280 ** @@
7281 ******************************************************************************/
7282
ajFeattabaccessMethodGetScope(const AjPStr method)7283 ajuint ajFeattabaccessMethodGetScope(const AjPStr method)
7284 {
7285 AjPFeattabAccess methoddata;
7286 ajuint ret = 0;
7287
7288 methoddata = ajCallTableGetS(feattabDbMethods, method);
7289 if(!methoddata)
7290 return 0;
7291
7292 if(methoddata->Entry)
7293 ret |= AJMETHOD_ENTRY;
7294 if(methoddata->Query)
7295 ret |= AJMETHOD_QUERY;
7296 if(methoddata->All)
7297 ret |= AJMETHOD_ALL;
7298
7299 return ret;
7300 }
7301
7302
7303
7304
7305 /* @func ajFeattabaccessMethodTest ********************************************
7306 **
7307 ** Tests for a named method for feature table reading.
7308 **
7309 ** @param [r] method [const AjPStr] Method required.
7310 ** @return [AjBool] ajTrue on success.
7311 **
7312 ** @release 6.4.0
7313 ** @@
7314 ******************************************************************************/
7315
ajFeattabaccessMethodTest(const AjPStr method)7316 AjBool ajFeattabaccessMethodTest(const AjPStr method)
7317 {
7318 if(ajCallTableGetS(feattabDbMethods, method))
7319 return ajTrue;
7320
7321 return ajFalse;
7322 }
7323
7324
7325
7326
7327 /* @funcstatic feattabinQryRestore ********************************************
7328 **
7329 ** Restores a feature input specification from a FeatPListUfo node
7330 **
7331 ** @param [w] feattabin [AjPFeattabin] feature table input object
7332 ** @param [r] node [const FeatPListUfo] Feature list node
7333 ** @return [void]
7334 **
7335 ** @release 6.4.0
7336 ******************************************************************************/
7337
feattabinQryRestore(AjPFeattabin feattabin,const FeatPListUfo node)7338 static void feattabinQryRestore(AjPFeattabin feattabin,
7339 const FeatPListUfo node)
7340 {
7341 feattabin->Input->Format = node->Format;
7342 feattabin->Input->Fpos = node->Fpos;
7343 ajStrAssignS(&feattabin->Input->Formatstr, node->Formatstr);
7344
7345 return;
7346 }
7347
7348
7349
7350
7351 /* @funcstatic feattabinQrySave ***********************************************
7352 **
7353 ** Saves a data input specification in a FeatPListUfo node
7354 **
7355 ** @param [w] node [FeatPListUfo] Feature list node
7356 ** @param [r] feattabin [const AjPFeattabin] Feature table input object
7357 ** @return [void]
7358 **
7359 ** @release 6.4.0
7360 ******************************************************************************/
7361
feattabinQrySave(FeatPListUfo node,const AjPFeattabin feattabin)7362 static void feattabinQrySave(FeatPListUfo node,
7363 const AjPFeattabin feattabin)
7364 {
7365 node->Format = feattabin->Input->Format;
7366 node->Fpos = feattabin->Input->Fpos;
7367 ajStrAssignS(&node->Formatstr, feattabin->Input->Formatstr);
7368
7369 return;
7370 }
7371
7372
7373
7374
7375 /* @funcstatic feattabinQryProcess ********************************************
7376 **
7377 ** Converts a feature table query into an open file.
7378 **
7379 ** Tests for "format::" and sets this if it is found
7380 **
7381 ** Then tests for "list:" or "@" and processes as a list file
7382 ** using feattabinListProcess which in turn invokes feattabinQryProcess
7383 ** until a valid query is found.
7384 **
7385 ** Then tests for dbname:query and opens the file (at the correct position
7386 ** if the database definition defines it)
7387 **
7388 ** If there is no database, looks for file:query and opens the file.
7389 ** In this case the file position is not known and data data reading
7390 ** will have to scan for the entry/entries we need.
7391 **
7392 ** @param [u] feattabin [AjPFeattabin] feature table input structure.
7393 ** @param [u] ftable [AjPFeattable] Feature table data to be read.
7394 ** The format will be replaced
7395 ** if defined in the query string.
7396 ** @return [AjBool] ajTrue on success.
7397 **
7398 ** @release 6.4.0
7399 ** @@
7400 ******************************************************************************/
7401
feattabinQryProcess(AjPFeattabin feattabin,AjPFeattable ftable)7402 static AjBool feattabinQryProcess(AjPFeattabin feattabin,
7403 AjPFeattable ftable)
7404 {
7405 AjBool ret = ajTrue;
7406 AjPStr qrystr = NULL;
7407 AjBool featmethod = ajFalse;
7408 const AjPStr fmtstr = NULL;
7409 AjPTextin textin;
7410 AjPQuery qry;
7411 AjPFeattabAccess feataccess = NULL;
7412
7413 textin = feattabin->Input;
7414 qry = textin->Query;
7415
7416 /* pick up the original query string */
7417 qrystr = ajStrNewS(textin->Qry);
7418
7419 ajDebug("feattabinQryProcess '%S'\n", qrystr);
7420
7421 /* look for a format:: prefix */
7422 fmtstr = ajQuerystrParseFormat(&qrystr, textin, feattabinformatFind);
7423 ajDebug("feattabinQryProcess ... fmtstr '%S' '%S'\n", fmtstr, qrystr);
7424
7425 /* (seq/feat) look for a [range] suffix */
7426 ajQuerystrParseRange(&qrystr, &feattabin->Start, &feattabin->End,
7427 &feattabin->Rev);
7428 ajDebug("feattabinQryProcess ... range %d..%d rev:%B '%S'\n",
7429 feattabin->Start, feattabin->End, feattabin->Rev, qrystr);
7430
7431 /* look for a list:: or (at):: listfile of queries - process and return */
7432 if(ajQuerystrParseListfile(&qrystr))
7433 {
7434 ajDebug("feattabinQryProcess ... listfile '%S'\n", qrystr);
7435 ret = feattabinListProcess(feattabin, ftable, qrystr);
7436 ajStrDel(&qrystr);
7437 return ret;
7438 }
7439
7440 /* try general text access methods (file, asis, text database access */
7441 ajDebug("feattabinQryProcess ... no listfile '%S'\n", qrystr);
7442 if(!ajQuerystrParseRead(&qrystr, textin, feattabinformatFind, &featmethod))
7443 {
7444 ajStrDel(&qrystr);
7445 return ajFalse;
7446 }
7447
7448 feattabinFormatSet(feattabin, ftable);
7449
7450 ajDebug("feattabinQryProcess ... read nontext: %B '%S'\n",
7451 featmethod, qrystr);
7452 ajStrDel(&qrystr);
7453
7454 /* we found a non-text method */
7455 if(featmethod)
7456 {
7457 ajDebug("feattabinQryProcess ... call method '%S'\n", qry->Method);
7458 ajDebug("feattabinQryProcess ... textin format %d '%S'\n",
7459 textin->Format, textin->Formatstr);
7460 ajDebug("feattabinQryProcess ... query format '%S'\n",
7461 qry->Formatstr);
7462 qry->Access = ajCallTableGetS(feattabDbMethods,qry->Method);
7463 feataccess = qry->Access;
7464 return (*feataccess->Access)(feattabin);
7465 }
7466
7467 ajDebug("feattabinQryProcess text method '%S' success\n", qry->Method);
7468
7469 return ajTrue;
7470 }
7471
7472
7473
7474
7475
7476 /* #datasection [AjPList] Query field list ************************************
7477 **
7478 ** Query fields lists are handled internally. Only static functions
7479 ** should appear here
7480 **
7481 ******************************************************************************/
7482
7483
7484
7485
7486 /* @funcstatic feattabinListProcess *******************************************
7487 **
7488 ** Processes a file of queries.
7489 ** This function is called by, and calls, feattabinQryProcess. There is
7490 ** a depth check to avoid infinite loops, for example where a list file
7491 ** refers to itself.
7492 **
7493 ** This function produces a list (AjPList) of queries with all list references
7494 ** expanded into lists of queries.
7495 **
7496 ** Because queries in a list can have their own format
7497 ** the prior settings are stored with each query in the list node so that they
7498 ** can be restored after.
7499 **
7500 ** @param [u] feattabin [AjPFeattabin] Feature table input
7501 ** @param [u] ftable [AjPFeattable] Feature table data
7502 ** @param [r] listfile [const AjPStr] Name of list file.,
7503 ** @return [AjBool] ajTrue on success.
7504 **
7505 ** @release 6.4.0
7506 ** @@
7507 ******************************************************************************/
7508
feattabinListProcess(AjPFeattabin feattabin,AjPFeattable ftable,const AjPStr listfile)7509 static AjBool feattabinListProcess(AjPFeattabin feattabin,
7510 AjPFeattable ftable,
7511 const AjPStr listfile)
7512 {
7513 AjPList list = NULL;
7514 AjPFile file = NULL;
7515 AjPStr token = NULL;
7516 AjPStr rest = NULL;
7517 AjBool ret = ajFalse;
7518 FeatPListUfo node = NULL;
7519
7520 ajuint recnum = 0;
7521 static ajint depth = 0;
7522 static ajint MAXDEPTH = 16;
7523
7524 depth++;
7525 ajDebug("++feattabinListProcess %S depth %d\n",
7526 listfile, depth);
7527
7528 if(depth > MAXDEPTH)
7529 ajFatal("Query list too deep");
7530
7531 if(!feattabin->Input->List)
7532 feattabin->Input->List = ajListNew();
7533
7534 list = ajListNew();
7535
7536 file = ajFileNewInNameS(listfile);
7537
7538 if(!file)
7539 {
7540 ajErr("Failed to open list file '%S'", listfile);
7541 depth--;
7542
7543 return ret;
7544 }
7545
7546 while(ajReadlineTrim(file, &featReadLine))
7547 {
7548 ++recnum;
7549 feattabinListNoComment(&featReadLine);
7550 if(ajStrExtractWord(featReadLine, &rest, &token))
7551 {
7552 if(ajStrGetLen(rest))
7553 {
7554 ajErr("Bad record %u in list file '%S'\n'%S'",
7555 recnum, listfile, featReadLine);
7556 }
7557 else if(ajStrGetLen(token))
7558 {
7559 ajDebug("++Add to list: '%S'\n", token);
7560 AJNEW0(node);
7561 ajStrAssignS(&node->Ufo, token);
7562 feattabinQrySave(node, feattabin);
7563 ajListPushAppend(list, node);
7564 }
7565 }
7566 }
7567
7568 ajFileClose(&file);
7569 ajStrDel(&token);
7570 ajStrDel(&rest);
7571
7572 ajDebug("Trace feattabin->Input->List\n");
7573 ajQuerylistTrace(feattabin->Input->List);
7574 ajDebug("Trace new list\n");
7575 ajQuerylistTrace(list);
7576 ajListPushlist(feattabin->Input->List, &list);
7577
7578 ajDebug("Trace combined feattabin->Input->List\n");
7579 ajQuerylistTrace(feattabin->Input->List);
7580
7581 /*
7582 ** now try the first item on the list
7583 ** this can descend recursively if it is also a list
7584 ** which is why we check the depth above
7585 */
7586
7587 if(ajListPop(feattabin->Input->List, (void**) &node))
7588 {
7589 ajDebug("++pop first item '%S'\n", node->Ufo);
7590 ajFeattabinQryS(feattabin, node->Ufo);
7591 feattabinQryRestore(feattabin, node);
7592 ajStrDel(&node->Ufo);
7593 ajStrDel(&node->Formatstr);
7594 AJFREE(node);
7595 ajDebug("descending with query '%S'\n", feattabin->Input->Qry);
7596 ret = feattabinQryProcess(feattabin, ftable);
7597 }
7598
7599 depth--;
7600 ajDebug("++feattabinListProcess depth: %d returns: %B\n", depth, ret);
7601
7602 return ret;
7603 }
7604
7605
7606
7607
7608 /* @funcstatic feattabinListNoComment *****************************************
7609 **
7610 ** Strips comments from a character string (a line from a list file).
7611 ** Comments are blank lines or any text following a "#" character.
7612 **
7613 ** @param [u] text [AjPStr*] Line of text from input file.
7614 ** @return [void]
7615 **
7616 ** @release 6.4.0
7617 ** @@
7618 ******************************************************************************/
7619
feattabinListNoComment(AjPStr * text)7620 static void feattabinListNoComment(AjPStr* text)
7621 {
7622 ajuint i;
7623 char *cp;
7624
7625 i = ajStrGetLen(*text);
7626
7627 if(!i) /* empty string */
7628 return;
7629
7630 MAJSTRGETUNIQUESTR(text);
7631
7632 cp = strchr(ajStrGetPtr(*text), '#');
7633
7634 if(cp)
7635 { /* comment found */
7636 *cp = '\0';
7637 ajStrSetValid(text);
7638 }
7639
7640 return;
7641 }
7642
7643
7644
7645
7646 /* @funcstatic feattabinFormatSet *********************************************
7647 **
7648 ** Sets the input format for data data using the data data
7649 ** input object's defined format
7650 **
7651 ** @param [u] feattabin [AjPFeattabin] Dfeature table input.
7652 ** @param [u] ftable [AjPFeattable] Feature table data
7653 ** @return [AjBool] ajTrue on success.
7654 **
7655 ** @release 6.4.0
7656 ** @@
7657 ******************************************************************************/
7658
feattabinFormatSet(AjPFeattabin feattabin,AjPFeattable ftable)7659 static AjBool feattabinFormatSet(AjPFeattabin feattabin, AjPFeattable ftable)
7660 {
7661
7662 if(ajStrGetLen(feattabin->Input->Formatstr))
7663 {
7664 ajDebug("... input format value '%S'\n",
7665 feattabin->Input->Formatstr);
7666
7667 if(feattabinformatFind(feattabin->Input->Formatstr,
7668 &feattabin->Input->Format))
7669 {
7670 ajStrAssignS(&ftable->Formatstr,
7671 feattabin->Input->Formatstr);
7672 ftable->Format = feattabin->Input->Format;
7673 ajDebug("...format OK '%S' = %d\n",
7674 feattabin->Input->Formatstr,
7675 feattabin->Input->Format);
7676 }
7677 else
7678 ajDebug("...format unknown '%S'\n",
7679 feattabin->Input->Formatstr);
7680
7681 return ajTrue;
7682 }
7683 else
7684 ajDebug("...input format not set\n");
7685
7686
7687 return ajFalse;
7688 }
7689
7690
7691
7692
7693 /* @funcstatic featGff3FlagSet ************************************************
7694 **
7695 ** Sets the flags for a GFF3 feature.
7696 **
7697 ** @param [u] gf [AjPFeature] Feature
7698 ** @param [r] flagstr [const AjPStr] Flags as a hexadecimal value
7699 ** @return [void]
7700 **
7701 ** @release 6.4.0
7702 ** @@
7703 ******************************************************************************/
7704
featGff3FlagSet(AjPFeature gf,const AjPStr flagstr)7705 static void featGff3FlagSet(AjPFeature gf, const AjPStr flagstr)
7706 {
7707 AjPStr savstr = NULL;
7708 AjPStr typstr = NULL;
7709 AjPStr valstr = NULL;
7710 ajint flags = 0;
7711 ajint num = 0;
7712
7713 if(!featRegFlag)
7714 featRegFlag = ajRegCompC("[ \"]*(0x[0-9a-f]+)");
7715
7716 if(!featRegMore)
7717 featRegMore = ajRegCompC("[,]*([^:]+):([^,]+)");
7718
7719 /*ajDebug("featGff3FlagSet '%S'\n", flagstr);*/
7720 ajStrAssignS(&savstr, flagstr);
7721
7722 if(ajRegExec(featRegFlag, savstr))
7723 {
7724 ajRegSubI(featRegFlag, 1, &featinTmpStr);
7725
7726 if(ajStrToHex(featinTmpStr, &flags))
7727 {
7728 if(flags & AJFEATFLAG_GROUP)
7729 flags |= AJFEATFLAG_ORDER;
7730
7731 if(flags & AJFEATFLAG_ONEOF)
7732 flags |= AJFEATFLAG_ORDER;
7733
7734 gf->Flags |= flags;
7735 }
7736
7737 /*ajDebug("flags: %x", gf->Flags);*/
7738 ajRegPost(featRegFlag, &featinTmpStr);
7739 ajStrAssignS(&savstr, featinTmpStr);
7740 }
7741
7742 while(ajRegExec(featRegMore, savstr))
7743 {
7744 ajRegSubI(featRegMore, 1, &typstr);
7745 ajRegSubI(featRegMore, 2, &valstr);
7746
7747 /*ajDebug("flag type '%S' val '%S'\n", typstr, valstr);*/
7748
7749 if(ajStrMatchCaseC(typstr, "start_before"))
7750 {
7751 if(ajStrMatchC(valstr, "true"))
7752 gf->Flags |= AJFEATFLAG_START_BEFORE_SEQ;
7753 }
7754 else if(ajStrMatchCaseC(typstr, "end_after"))
7755 {
7756 if(ajStrMatchC(valstr, "true"))
7757 gf->Flags |= AJFEATFLAG_END_AFTER_SEQ;
7758 }
7759 else if(ajStrMatchCaseC(typstr, "between"))
7760 {
7761 if(ajStrMatchC(valstr, "true"))
7762 gf->Flags |= AJFEATFLAG_BETWEEN_SEQ;
7763 }
7764 else if(ajStrMatchCaseC(typstr, "start2"))
7765 {
7766 if(ajStrToInt(valstr, &num))
7767 {
7768 gf->Start2 = num;
7769 gf->Flags |= AJFEATFLAG_START_TWO;
7770 }
7771 }
7772 else if(ajStrMatchCaseC(typstr, "end2"))
7773 {
7774 if(ajStrToInt(valstr, &num))
7775 {
7776 gf->End2 = num;
7777 gf->Flags |= AJFEATFLAG_END_TWO;
7778 }
7779 }
7780 else if(ajStrMatchCaseC(typstr, "label"))
7781 {
7782 ajFeatWarn("GFF3 label '%S' used", valstr);
7783 ajStrAssignS(&gf->Label, valstr);
7784 }
7785 else if(ajStrMatchCaseC(typstr, "start_unsure"))
7786 {
7787 if(ajStrMatchC(valstr, "true"))
7788 gf->Flags |= AJFEATFLAG_START_UNSURE;
7789 }
7790 else if(ajStrMatchCaseC(typstr, "end_unsure"))
7791 {
7792 if(ajStrMatchC(valstr, "true"))
7793 gf->Flags |= AJFEATFLAG_END_UNSURE;
7794 }
7795 else if(ajStrMatchCaseC(typstr, "type"))
7796 {
7797 ajStrAssignS(&gf->Type, ajFeattypeGetInternal(valstr));
7798
7799 }
7800 else
7801 ajFeatWarn("Unknown GFF3 featflags type '%S:%S'", typstr, valstr);
7802
7803 ajRegPost(featRegMore, &featinTmpStr);
7804 ajStrAssignS(&savstr, featinTmpStr);
7805 }
7806
7807 ajStrDel(&savstr);
7808 ajStrDel(&typstr);
7809 ajStrDel(&valstr);
7810
7811 return;
7812 }
7813
7814
7815
7816
7817 /* @funcstatic featFlagSet ****************************************************
7818 **
7819 ** Sets the flags for a feature.
7820 **
7821 ** @param [u] gf [AjPFeature] Feature
7822 ** @param [r] flagstr [const AjPStr] Flags as a hexadecimal value
7823 ** @return [void]
7824 **
7825 ** @release 2.0.0
7826 ** @@
7827 ******************************************************************************/
7828
featFlagSet(AjPFeature gf,const AjPStr flagstr)7829 static void featFlagSet(AjPFeature gf, const AjPStr flagstr)
7830 {
7831 AjPStr savstr = NULL;
7832 AjPStr typstr = NULL;
7833 AjPStr valstr = NULL;
7834 ajint flags = 0;
7835 ajint num = 0;
7836
7837 if(!featRegFlag)
7838 featRegFlag = ajRegCompC("[ \"]*(0x[0-9a-f]+)");
7839
7840 if(!featRegMore)
7841 featRegMore = ajRegCompC("[ \"]*([^:]+):([^: \"]+)");
7842
7843 /*ajDebug("featFlagSet '%S'\n", flagstr);*/
7844 ajStrAssignS(&savstr, flagstr);
7845
7846 if(ajRegExec(featRegFlag, savstr))
7847 {
7848 ajRegSubI(featRegFlag, 1, &featinTmpStr);
7849
7850 if(ajStrToHex(featinTmpStr, &flags))
7851 {
7852 if(flags & AJFEATFLAG_GROUP)
7853 flags |= AJFEATFLAG_ORDER;
7854
7855 if(flags & AJFEATFLAG_ONEOF)
7856 flags |= AJFEATFLAG_ORDER;
7857
7858 gf->Flags = flags;
7859 }
7860
7861 /*ajDebug("flags: %x", gf->Flags);*/
7862 ajRegPost(featRegFlag, &featinTmpStr);
7863 ajStrAssignS(&savstr, featinTmpStr);
7864 }
7865
7866 while(ajRegExec(featRegMore, savstr))
7867 {
7868 ajRegSubI(featRegMore, 1, &typstr);
7869 ajRegSubI(featRegMore, 2, &valstr);
7870
7871 /*ajDebug("flag type '%S' val '%S'\n", typstr, valstr);*/
7872
7873 if(ajStrMatchCaseC(typstr, "start2"))
7874 {
7875 if(ajStrToInt(valstr, &num))
7876 gf->Start2 = num;
7877 }
7878 else if(ajStrMatchCaseC(typstr, "end2"))
7879 {
7880 if(ajStrToInt(valstr, &num))
7881 gf->End2 = num;
7882 }
7883 else if(ajStrMatchCaseC(typstr, "remoteid"))
7884 ajStrAssignS(&gf->Remote, valstr);
7885 else if(ajStrMatchCaseC(typstr, "label"))
7886 {
7887 ajFeatWarn("GFF label '%S' used", valstr);
7888 ajStrAssignS(&gf->Label, valstr);
7889 }
7890 else
7891 ajFeatWarn("Unknown GFF FeatFlags type '%S:%S'", typstr, valstr);
7892
7893 ajRegPost(featRegMore, &featinTmpStr);
7894 ajStrAssignS(&savstr, featinTmpStr);
7895 }
7896
7897 ajStrDel(&savstr);
7898 ajStrDel(&typstr);
7899 ajStrDel(&valstr);
7900
7901 return;
7902 }
7903
7904
7905
7906
7907 /* @funcstatic featGroupSet ***************************************************
7908 **
7909 ** Sets the group tag for a feature.
7910 **
7911 ** @param [u] gf [AjPFeature] Feature
7912 ** @param [u] table [AjPFeattable] Feature table
7913 ** @param [r] grouptag [const AjPStr] Group field identifier
7914 ** @return [void]
7915 **
7916 ** @release 2.0.0
7917 ** @@
7918 ******************************************************************************/
7919
featGroupSet(AjPFeature gf,AjPFeattable table,const AjPStr grouptag)7920 static void featGroupSet(AjPFeature gf, AjPFeattable table,
7921 const AjPStr grouptag)
7922 {
7923 AjPStr namstr = NULL;
7924 AjPStr grpstr = NULL;
7925 ajint grpnum;
7926
7927 if(!featRegGroup)
7928 featRegGroup = ajRegCompC("^\"(([^.]*)[.])?([0-9]+)");
7929
7930 if(ajStrGetLen(grouptag) && ajStrMatchCaseS(grouptag, featSaveGroupStr))
7931 {
7932 gf->Group = featSaveGroup;
7933 gf->Exon = ++featSaveExon;
7934
7935 return;
7936 }
7937
7938
7939 if(ajStrGetLen(grouptag) && ajRegExec(featRegGroup, grouptag))
7940 {
7941 ajStrAssignS(&featSaveGroupStr, grouptag);
7942 ajRegSubI(featRegGroup, 2, &namstr);
7943 ajRegSubI(featRegGroup, 3, &grpstr);
7944
7945 /*ajDebug("featGroupSet '%S' name: '%S' group: '%S'\n",
7946 grouptag, namstr, grpstr);*/
7947
7948 if(ajStrToInt(grpstr, &grpnum)) /* true, if the regex worked */
7949 {
7950 gf->Group = grpnum;
7951 featSaveGroup = grpnum;
7952 }
7953 else
7954 gf->Group = ++(table->Groups);
7955
7956 if(ajStrGetLen(namstr))
7957 {
7958 if(!ajStrMatchCaseS(namstr, table->Seqid))
7959 {
7960 ajDebug("GFF group field '%S' table '%S'\n",
7961 grouptag, table->Seqid);
7962 ajFeatWarn("GFF group field '%S' for table '%S'",
7963 grouptag, table->Seqid);
7964 }
7965 }
7966 }
7967 else /* regex failed, make something up */
7968 {
7969 ajStrAssignS(&grpstr, grouptag);
7970 gf->Group = ++(table->Groups);
7971 featSaveGroup = gf->Group;
7972 gf->Exon = 0;
7973 featSaveExon = 0;
7974 }
7975 ajStrDel(&namstr);
7976 ajStrDel(&grpstr);
7977
7978 return;
7979 }
7980
7981
7982
7983
7984 /* @funcstatic featGff3GroupSet ***********************************************
7985 **
7986 ** Sets the group tag for a feature.
7987 **
7988 ** @param [u] gf [AjPFeature] Feature
7989 ** @param [u] table [AjPFeattable] Feature table
7990 ** @param [r] grouptag [const AjPStr] Group field identifier
7991 ** @return [void]
7992 **
7993 ** @release 6.4.0
7994 ** @@
7995 ******************************************************************************/
7996
featGff3GroupSet(AjPFeature gf,AjPFeattable table,const AjPStr grouptag)7997 static void featGff3GroupSet(AjPFeature gf, AjPFeattable table,
7998 const AjPStr grouptag)
7999 {
8000 AjPStr namstr = NULL;
8001 AjPStr grpstr = NULL;
8002 ajint grpnum;
8003
8004 if(!featRegGff3Group)
8005 featRegGff3Group = ajRegCompC("^(([^.]*)[.])?([0-9]+)");
8006
8007 if(ajStrGetLen(grouptag) && ajStrMatchCaseS(grouptag, featSaveGroupStr))
8008 {
8009 gf->Group = featSaveGroup;
8010 gf->Exon = ++featSaveExon;
8011
8012 return;
8013 }
8014
8015
8016 if(ajStrGetLen(grouptag) && ajRegExec(featRegGff3Group, grouptag))
8017 {
8018 ajStrAssignS(&featSaveGroupStr, grouptag);
8019 ajRegSubI(featRegGff3Group, 2, &namstr);
8020 ajRegSubI(featRegGff3Group, 3, &grpstr);
8021
8022 /*ajDebug("featGroupSet '%S' name: '%S' group: '%S'\n",
8023 grouptag, namstr, grpstr);*/
8024
8025 if(ajStrToInt(grpstr, &grpnum)) /* true, if the regex worked */
8026 {
8027 gf->Group = grpnum;
8028 featSaveGroup = grpnum;
8029 }
8030 else
8031 gf->Group = ++(table->Groups);
8032
8033 /* TODO: this validation should be updated and uncommented back
8034 if(ajStrGetLen(namstr))
8035 {
8036 if(!ajStrMatchCaseS(namstr, table->Seqid))
8037 {
8038 ajDebug("GFF group field '%S' table '%S'\n",
8039 grouptag, table->Seqid);
8040 ajFeatWarn("GFF group field '%S' for table '%S'",
8041 grouptag, table->Seqid);
8042 }
8043 }
8044 */
8045 }
8046 else /* regex failed, make something up */
8047 {
8048 ajStrAssignS(&grpstr, grouptag);
8049 gf->Group = ++(table->Groups);
8050 featSaveGroup = gf->Group;
8051 gf->Exon = 0;
8052 featSaveExon = 0;
8053 }
8054 ajStrDel(&namstr);
8055 ajStrDel(&grpstr);
8056
8057 return;
8058 }
8059
8060
8061
8062
8063 /* #datasection [none] Input formats ******************************************
8064 **
8065 ** Input formats internals
8066 **
8067 ** #nam2rule Featinformat Data data input format specific
8068 **
8069 ******************************************************************************/
8070
8071
8072
8073
8074 /* #section cast **************************************************************
8075 **
8076 ** Values for input formats
8077 **
8078 ** #fdata [none]
8079 **
8080 ** #nam3rule Find Return index to named format
8081 ** #nam3rule Term Test format EDAM term
8082 ** #nam3rule Test Test format value
8083 **
8084 ** #argrule Find format [const AjPStr] Format name
8085 ** #argrule Term term [const AjPStr] Format EDAM term
8086 ** #argrule Test format [const AjPStr] Format name
8087 ** #argrule Find iformat [ajint*] Index matching format name
8088 **
8089 ** #valrule * [AjBool] True if found
8090 **
8091 ** #fcategory cast
8092 **
8093 ******************************************************************************/
8094
8095
8096
8097
8098 /* @funcstatic feattabinformatFind ********************************************
8099 **
8100 ** Looks for the specified format(s) in the internal definitions and
8101 ** returns the index.
8102 **
8103 ** Sets iformat as the recognised format, and returns ajTrue.
8104 **
8105 ** @param [r] format [const AjPStr] Format required.
8106 ** @param [w] iformat [ajint*] Index
8107 ** @return [AjBool] ajTrue on success.
8108 **
8109 ** @release 6.4.0
8110 ** @@
8111 ******************************************************************************/
8112
feattabinformatFind(const AjPStr format,ajint * iformat)8113 static AjBool feattabinformatFind(const AjPStr format, ajint* iformat)
8114 {
8115 AjPStr tmpformat = NULL;
8116 ajuint i = 0;
8117
8118 /* ajDebug("feattabinformatFind '%S'\n", format); */
8119 if(!ajStrGetLen(format))
8120 return ajFalse;
8121
8122 ajStrAssignS(&tmpformat, format);
8123 ajStrFmtLower(&tmpformat);
8124
8125 for(i=0; featinformatDef[i].Name; i++)
8126 {
8127 /* ajDebug("test %d '%s' '%s' '%s'\n",
8128 i, featinformatDef[i].Name,
8129 featinformatDef[i].Obo,
8130 featinformatDef[i].Desc); */
8131 if(ajStrMatchC(tmpformat, featinformatDef[i].Name) ||
8132 ajStrMatchC(format, featinformatDef[i].Obo))
8133 {
8134 *iformat = i;
8135 ajStrDel(&tmpformat);
8136 /* ajDebug("found '%s' at %d\n", featinformatDef[i].Name, i); */
8137 return ajTrue;
8138 }
8139 }
8140
8141 ajErr("Unknown input format '%S'", format);
8142
8143 ajStrDel(&tmpformat);
8144
8145 return ajFalse;
8146 }
8147
8148
8149
8150
8151 /* @func ajFeattabinformatTerm ************************************************
8152 **
8153 ** Tests whether a feature table data input format term is known
8154 **
8155 ** @param [r] term [const AjPStr] Format term EDAM ID
8156 ** @return [AjBool] ajTrue if term was accepted
8157 **
8158 ** @release 6.4.0
8159 ** @@
8160 ******************************************************************************/
8161
ajFeattabinformatTerm(const AjPStr term)8162 AjBool ajFeattabinformatTerm(const AjPStr term)
8163 {
8164 ajuint i;
8165
8166 for(i=0; featinformatDef[i].Name; i++)
8167 if(ajStrMatchC(term, featinformatDef[i].Obo))
8168 return ajTrue;
8169
8170 return ajFalse;
8171 }
8172
8173
8174
8175
8176 /* @func ajFeattabinformatTest ************************************************
8177 **
8178 ** Tests whether a named feature table data input format is known
8179 **
8180 ** @param [r] format [const AjPStr] Format
8181 ** @return [AjBool] ajTrue if format was accepted
8182 **
8183 ** @release 6.4.0
8184 ** @@
8185 ******************************************************************************/
8186
ajFeattabinformatTest(const AjPStr format)8187 AjBool ajFeattabinformatTest(const AjPStr format)
8188 {
8189 ajuint i;
8190
8191 for(i=0; featinformatDef[i].Name; i++)
8192 {
8193 if(ajStrMatchCaseC(format, featinformatDef[i].Name))
8194 return ajTrue;
8195 if(ajStrMatchC(format, featinformatDef[i].Obo))
8196 return ajTrue;
8197 }
8198
8199 return ajFalse;
8200 }
8201
8202
8203
8204
8205 #ifdef AJ_COMPILE_DEPRECATED_BOOK
8206 #endif
8207
8208
8209
8210
8211 #ifdef AJ_COMPILE_DEPRECATED
8212 /* @obsolete ajFeattabInNew
8213 ** @rename ajFeattabinNew
8214 */
ajFeattabInNew(void)8215 __deprecated AjPFeattabIn ajFeattabInNew(void)
8216 {
8217 return ajFeattabinNew();
8218 }
8219
8220
8221
8222
8223 /* @obsolete ajFeattabInNewSS
8224 ** @rename ajFeattabinNewSS
8225 */
ajFeattabInNewSS(const AjPStr fmt,const AjPStr name,const char * type)8226 __deprecated AjPFeattabIn ajFeattabInNewSS(const AjPStr fmt, const AjPStr name,
8227 const char* type)
8228 {
8229 return ajFeattabinNewSS(fmt, name, type);
8230 }
8231
8232
8233
8234
8235 /* @obsolete ajFeattabInNewCSF
8236 ** @rename ajFeattabinNewCSF
8237 */
ajFeattabInNewCSF(const char * fmt,const AjPStr name,const char * type,AjPFilebuff buff)8238 __deprecated AjPFeattabIn ajFeattabInNewCSF(const char* fmt, const AjPStr name,
8239 const char* type, AjPFilebuff buff)
8240 {
8241 return ajFeattabinNewCSF(fmt, name, type, buff);
8242 }
8243
8244
8245
8246
8247 /* @obsolete ajFeattabInNewSSF
8248 ** @rename ajFeattabinNewSSF
8249 */
ajFeattabInNewSSF(const AjPStr fmt,const AjPStr name,const char * type,AjPFilebuff buff)8250 __deprecated AjPFeattabIn ajFeattabInNewSSF(const AjPStr fmt, const AjPStr name,
8251 const char* type, AjPFilebuff buff)
8252 {
8253 return ajFeattabinNewSSF(fmt, name, type, buff);
8254 }
8255
8256
8257
8258
8259 /* @obsolete ajFeattabInDel
8260 ** @rename ajFeattabinDel
8261 */
ajFeattabInDel(AjPFeattabIn * pthis)8262 __deprecated void ajFeattabInDel(AjPFeattabIn* pthis)
8263 {
8264 ajFeattabinDel(pthis);
8265 return;
8266 }
8267
8268
8269
8270
8271 /* @obsolete ajFeattabInClear
8272 ** @rename ajFeattabinClear
8273 */
ajFeattabInClear(AjPFeattabIn thys)8274 __deprecated void ajFeattabInClear(AjPFeattabIn thys)
8275 {
8276 ajFeattabinClear(thys);
8277 return;
8278 }
8279
8280
8281
8282
8283 /* @obsolete ajFeattabInSetTypeC
8284 ** @rename ajFeattabinSetTypeC
8285 */
ajFeattabInSetTypeC(AjPFeattabIn thys,const char * type)8286 __deprecated AjBool ajFeattabInSetTypeC(AjPFeattabIn thys, const char* type)
8287 {
8288 return ajFeattabinSetTypeC(thys, type);
8289 }
8290
8291
8292
8293
8294 /* @obsolete ajFeattabInSetType
8295 ** @rename ajFeattabinSetTypeS
8296 */
ajFeattabInSetType(AjPFeattabIn thys,const AjPStr type)8297 __deprecated AjBool ajFeattabInSetType(AjPFeattabIn thys, const AjPStr type)
8298 {
8299 return ajFeattabinSetTypeC(thys, ajStrGetPtr(type));
8300 }
8301
8302
8303
8304
8305
8306 /* @obsolete ajFeatRead
8307 ** @rename ajFeattableNewRead
8308 */
8309
ajFeatRead(AjPFeattabIn ftin)8310 __deprecated AjPFeattable ajFeatRead(AjPFeattabIn ftin)
8311 {
8312 return ajFeattableNewRead(ftin);
8313 }
8314
8315
8316
8317
8318 /* @obsolete ajFeatUfoRead
8319 ** @rename ajFeattableNewReadUfo
8320 */
8321
ajFeatUfoRead(AjPFeattabIn featin,const AjPStr ufo)8322 __deprecated AjPFeattable ajFeatUfoRead(AjPFeattabIn featin,
8323 const AjPStr ufo)
8324 {
8325
8326 return ajFeattableNewReadUfo (featin, ufo);
8327 }
8328 #endif
8329