1 /* @source showseq application
2 **
3 ** Display a sequence with translations, features and other bits
4 **
5 ** @author Copyright (C) Gary Williams (gwilliam@hgmp.mrc.ac.uk)
6 ** 14 Sept 1999 - GWW - written
7 ** @@
8 **
9 ** This program is free software; you can redistribute it and/or
10 ** modify it under the terms of the GNU General Public License
11 ** as published by the Free Software Foundation; either version 2
12 ** of the License, or (at your option) any later version.
13 **
14 ** This program is distributed in the hope that it will be useful,
15 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
16 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17 ** GNU General Public License for more details.
18 **
19 ** You should have received a copy of the GNU General Public License
20 ** along with this program; if not, write to the Free Software
21 ** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
22 ******************************************************************************/
23 
24 #include "emboss.h"
25 
26 
27 
28 
29 static void showseq_FormatShow(EmbPShow ss,
30 			       const AjPStr format, const AjPTrn trnTable,
31 			       const AjPRange translaterange,
32 			       const AjPRange revtranslaterange,
33 			       const AjPRange uppercase,
34 			       const AjPRange highlight, AjBool threeletter,
35 			       AjBool numberseq, const AjPFeattable feat,
36 			       ajint orfminsize, const AjPList restrictlist,
37 			       AjBool plasmid, AjBool flat,
38 			       const AjPRange annotation);
39 
40 static void showseq_read_equiv(AjPFile equfile, AjPTable table);
41 static void showseq_read_file_of_enzyme_names(AjPStr *enzymes);
42 static AjBool showseq_MatchFeature(const AjPFeature gf,
43 				   AjPFeature newgf,
44 				   const AjPStr matchsource,
45 				   const AjPStr matchtype,
46 				   ajint matchsense, AjBool testscore,
47                                    float minscore,
48 				   float maxscore, const AjPStr matchtag,
49 				   const AjPStr matchvalue, AjBool *tagsmatch,
50 				   AjBool stricttags);
51 static AjBool showseq_MatchPatternTags(const AjPFeature gf,
52 				       AjPFeature newgf,
53 				       const AjPStr tpattern,
54 				       const AjPStr vpattern,
55 				       AjBool stricttags);
56 static void showseq_FeatureFilter(const AjPFeattable featab,
57 				  AjPFeattable newfeatab,
58 				  const AjPStr matchsource,
59 				  const AjPStr matchtype,
60 				  ajint matchsense, AjBool testscore,
61                                   float minscore,
62 				  float maxscore, const AjPStr matchtag,
63 				  const AjPStr matchvalue, AjBool stricttags);
64 static AjPFeature showseq_FeatCopy(const AjPFeature orig);
65 
66 
67 #define ENZDATA "REBASE/embossre.enz"
68 #define EQUDATA "REBASE/embossre.equ"
69 #define EQUGUESS 3500     /* Estimate of number of equivalent names */
70 
71 
72 
73 
74 /* @prog showseq **************************************************************
75 **
76 ** Display a sequence with features, translation etc
77 **
78 ******************************************************************************/
79 
main(int argc,char ** argv)80 int main(int argc, char **argv)
81 {
82 
83     ajint begin;
84     ajint end;
85     AjPSeqall seqall;
86     AjPSeq seq;
87     EmbPShow ss;
88     AjPFile outfile;
89     AjPStr formatname;
90     AjPStr *thinglist;
91     AjPStr tablename;
92     ajint table = 0;
93     AjPRange translaterange;
94     AjPRange revtranslaterange;
95     AjPRange uppercase;
96     AjPRange highlight;
97     AjBool threeletter;
98     AjBool numberseq;
99     AjBool nameseq;
100     ajint width;
101     ajint length;
102     ajint margin;
103     AjBool description;
104     ajint offset;
105     AjBool html;
106     AjBool stricttags;
107 
108     AjPStr descriptionline;
109     AjPFeattable feattab;
110     AjPFeattable newfeattab;/* feature table copy, unwanted features removed */
111     ajint orfminsize;
112     AjBool flat;
113     AjPTrn trnTable;
114     AjPRange annotation;
115 
116     /* holds ACD or constructed format for output */
117     AjPStr format;
118     ajint i;
119 
120     /* stuff lifted from Alan's 'restrict.c' */
121     AjPStr enzymes = NULL;
122     ajint mincuts;
123     ajint maxcuts;
124     ajint sitelen;
125     AjBool single;
126     AjBool blunt;
127     AjBool sticky;
128     AjBool ambiguity;
129     AjBool plasmid;
130     AjBool commercial;
131     AjBool limit;
132     AjBool methyl;
133     AjPFile enzfile  = NULL;
134     AjPFile equfile  = NULL;
135     AjPFile methfile = NULL;
136     AjPTable retable = NULL;
137     ajint hits;
138     AjPList restrictlist = NULL;
139 
140     /* feature filter criteria */
141     AjPStr matchsource = NULL;
142     AjPStr matchtype   = NULL;
143     ajint matchsense;
144     float minscore;
145     float maxscore;
146     AjPStr matchtag   = NULL;
147     AjPStr matchvalue = NULL;
148     AjBool testscore = AJFALSE;
149 
150 
151     embInit("showseq", argc, argv);
152 
153     seqall         = ajAcdGetSeqall("sequence");
154     outfile        = ajAcdGetOutfile("outfile");
155     formatname     = ajAcdGetListSingle("format");
156     thinglist      = ajAcdGetList("things");
157     tablename      = ajAcdGetListSingle("table");
158     translaterange = ajAcdGetRange("translate");
159     revtranslaterange = ajAcdGetRange("revtranslate");
160     uppercase      = ajAcdGetRange("uppercase");
161     highlight      = ajAcdGetRange("highlight");
162     annotation     = ajAcdGetRange("annotation");
163     threeletter    = ajAcdGetBoolean("threeletter");
164     numberseq      = ajAcdGetBoolean("number");
165     width          = ajAcdGetInt("width");
166     length         = ajAcdGetInt("length");
167     margin         = ajAcdGetInt("margin");
168     nameseq        = ajAcdGetBoolean("name");
169     description    = ajAcdGetBoolean("description");
170     offset         = ajAcdGetInt("offset");
171     html           = ajAcdGetBoolean("html");
172     orfminsize     = ajAcdGetInt("orfminsize");
173     flat           = ajAcdGetBoolean("flatreformat");
174 
175     /*  restriction enzyme stuff */
176     mincuts    = ajAcdGetInt("mincuts");
177     maxcuts    = ajAcdGetInt("maxcuts");
178     sitelen    = ajAcdGetInt("sitelen");
179     single     = ajAcdGetBoolean("single");
180     blunt      = ajAcdGetBoolean("blunt");
181     sticky     = ajAcdGetBoolean("sticky");
182     ambiguity  = ajAcdGetBoolean("ambiguity");
183     plasmid    = ajAcdGetBoolean("plasmid");
184     commercial = ajAcdGetBoolean("commercial");
185     limit      = ajAcdGetBoolean("limit");
186     methyl     = ajAcdGetBoolean("methylation");
187     enzymes    = ajAcdGetString("enzymes");
188     methfile   = ajAcdGetDatafile("mfile");
189 
190     /* feature filter criteria */
191     matchsource = ajAcdGetString("sourcematch");
192     matchtype   = ajAcdGetString("typematch");
193     matchsense  = ajAcdGetInt("sensematch");
194     minscore    = ajAcdGetFloat("minscore");
195     maxscore    = ajAcdGetFloat("maxscore");
196     matchtag    = ajAcdGetString("tagmatch");
197     matchvalue  = ajAcdGetString("valuematch");
198     stricttags  = ajAcdGetBoolean("stricttags");
199 
200     testscore = (minscore || maxscore);
201     if(minscore && !maxscore)
202         if(minscore > maxscore)
203             maxscore = minscore;
204     if(!minscore && maxscore)
205         if(minscore > maxscore)
206             minscore = maxscore;
207 
208     format = ajStrNew();
209 
210     /* read the local file of enzymes names */
211     showseq_read_file_of_enzyme_names(&enzymes);
212 
213     /* check that the translate range is ordered */
214     if(!ajRangeIsOrdered(translaterange))
215 	ajWarn("Translation ranges are not in ascending, "
216 		"non-overlapping order.");
217 
218 
219     /* get the format to use */
220     if(ajStrMatchC(formatname, "0"))
221 	for(i=0; thinglist[i]; i++)
222 	{
223 	    ajStrAppendS(&format, thinglist[i]);
224 	    ajStrAppendC(&format, " ");
225 	}
226     else if(ajStrMatchC(formatname, "1"))
227 	ajStrAssignC(&format, "S A ");
228     else if(ajStrMatchC(formatname, "2"))
229 	ajStrAssignC(&format, "B N T S A F ");
230     else if(ajStrMatchC(formatname, "3"))
231 	ajStrAssignC(&format, "B N T S A ");
232     else if(ajStrMatchC(formatname, "4"))
233 	ajStrAssignC(&format, "B N T S B 1 A F ");
234     else if(ajStrMatchC(formatname, "5"))
235 	ajStrAssignC(&format, "B N T S B 1 2 3 A F ");
236     else if(ajStrMatchC(formatname, "6"))
237 	ajStrAssignC(&format, "B N T S B 1 2 3 T -3 -2 -1 A F ");
238     else if(ajStrMatchC(formatname, "7"))
239 	ajStrAssignC(&format, "B R S N T C -R B 1 2 3 T -3 -2 -1 A ");
240     else if(ajStrMatchC(formatname, "8"))
241 	ajStrAssignC(&format, "B 1 2 3 N T R S T C -R T -3 -2 -1 A F ");
242     else
243 	ajFatal("Invalid format type: %S", formatname);
244 
245 
246     /* make the format upper case */
247     ajStrFmtUpper(&format);
248 
249     /* create the translation table */
250     trnTable = ajTrnNewI(table);
251 
252     /*
253     **  most of this is lifted from the program 'restrict.c' by Alan
254     **  Bleasby
255     */
256     if(ajStrFindC(format, "R") != -1)
257     {
258         if(single)
259             maxcuts = mincuts = 1;
260         retable = ajTablestrNew(EQUGUESS);
261         enzfile = ajDatafileNewInNameC(ENZDATA);
262 
263         if(!enzfile)
264             ajFatal("Cannot locate enzyme file. Run REBASEEXTRACT");
265 
266         if(limit)
267         {
268             equfile = ajDatafileNewInNameC(EQUDATA);
269             if(!equfile)
270                 limit = ajFalse;
271             else
272             {
273                 showseq_read_equiv(equfile, retable);
274                 ajFileClose(&equfile);
275             }
276         }
277     }
278 
279 
280     while(ajSeqallNext(seqall, &seq))
281     {
282 	begin = ajSeqGetBegin(seq)-1;
283 	end   = ajSeqGetEnd(seq)-1;
284 
285 	restrictlist = ajListNew();
286 
287 	/* do the name and description */
288 	if(nameseq)
289 	{
290 	    if(html)
291 		ajFmtPrintF(outfile, "<H2>%S</H2>\n",
292 			    ajSeqGetNameS(seq));
293 	    else
294 		ajFmtPrintF(outfile, "%S\n", ajSeqGetNameS(seq));
295 	}
296 
297 	if(description)
298 	{
299 	    /*
300 	    **  wrap the description line at the width of the sequence
301 	    **  plus margin
302 	    */
303 	    if(html)
304 		ajFmtPrintF(outfile, "<H3>%S</H3>\n",
305 			    ajSeqGetDescS(seq));
306 	    else
307 	    {
308 		descriptionline = ajStrNew();
309 		ajStrAssignS(&descriptionline, ajSeqGetDescS(seq));
310 		ajStrFmtWrap(&descriptionline, width+margin);
311 		ajFmtPrintF(outfile, "%S\n", descriptionline);
312 		ajStrDel(&descriptionline);
313 	    }
314 	}
315 
316 
317 	/* get the feature table of the sequence */
318 	feattab = ajSeqGetFeatCopy(seq);
319 
320 	/* new feature table to hold the filetered features */
321         newfeattab = ajFeattableNew(NULL);
322 	ajDebug("created newfeattab %x\n", newfeattab);
323 
324         /* only copy features in the table that match our criteria */
325         showseq_FeatureFilter(feattab, newfeattab, matchsource, matchtype,
326 			      matchsense, testscore,
327                               minscore, maxscore, matchtag,
328 			      matchvalue, stricttags);
329 
330 
331 	/*
332 	**  most of this is lifted from the program 'restrict.c' by Alan
333 	**  Bleasby
334 	*/
335 	if(ajStrFindC(format, "R") != -1)
336 	{
337 	    ajFileSeek(enzfile, 0L, 0);
338 	    hits = embPatRestrictMatch(seq, 1, ajSeqGetLen(seq),
339 				       enzfile, methfile, enzymes,
340 				       sitelen, plasmid, ambiguity, mincuts,
341 				       maxcuts, blunt, sticky, commercial,
342 				       methyl, restrictlist);
343 	    if(hits)
344 	    {
345 		/* this bit is lifted from printHits */
346 		embPatRestrictRestrict(restrictlist, hits, !limit,
347 				       ajFalse);
348 		if(limit)
349 		    embPatRestrictPreferred(restrictlist,retable);
350 	    }
351 
352 	}
353 
354 
355 
356 	/* make the Show Object */
357 	ss = embShowNew(seq, begin, end, width, length, margin, html, offset);
358 
359 	/* get the number of the genetic code used */
360 	ajStrToInt(tablename, &table);
361 
362 	if(html)
363 	    ajFmtPrintF(outfile, "<PRE>");
364 
365 	showseq_FormatShow(ss, format, trnTable,
366                            translaterange, revtranslaterange,
367 			   uppercase, highlight, threeletter,
368 			   numberseq, newfeattab, orfminsize,
369 			   restrictlist, plasmid, flat, annotation);
370 
371 	embShowPrint(outfile, ss);
372 
373 	embShowDel(&ss);
374 
375 	ajListFree(&restrictlist);
376 	ajFeattableDel(&newfeattab);
377 	ajFeattableDel(&feattab);
378 
379 	/* add a newline at the end of the sequence */
380 	ajFmtPrintF(outfile, "\n");
381 
382 	if(html)
383 	    ajFmtPrintF(outfile, "</PRE>\n");
384     }
385 
386 
387     ajStrDel(&format);
388     ajFileClose(&outfile);
389     ajTrnDel(&trnTable);
390     ajSeqallDel(&seqall);
391     ajSeqDel(&seq);
392     ajStrDelarray(&thinglist);
393     ajStrDel(&formatname);
394     ajStrDel(&tablename);
395     ajStrDel(&enzymes);
396     ajStrDel(&matchsource);
397     ajStrDel(&matchtype);
398     ajStrDel(&matchtag);
399     ajStrDel(&matchvalue);
400     ajRangeDel(&translaterange);
401     ajRangeDel(&revtranslaterange);
402     ajRangeDel(&uppercase);
403     ajRangeDel(&highlight);
404     ajRangeDel(&annotation);
405     ajFileClose(&enzfile);
406     ajFileClose(&methfile);
407 
408     embExit();
409 
410     return 0;
411 }
412 
413 
414 
415 
416 /* @funcstatic showseq_FormatShow *********************************************
417 **
418 ** Set up the EmbPShow object, according to the required format
419 **
420 ** @param [u] ss [EmbPShow] Show sequence object
421 ** @param [r] format [const AjPStr] format codes for the required
422 **                       things to display
423 ** @param [r] trnTable [const AjPTrn] genetic code translation table
424 ** @param [r] translaterange [const AjPRange] ranges to translate
425 ** @param [r] revtranslaterange [const AjPRange] ranges to translate in reverse
426 ** @param [r] uppercase [const AjPRange] ranges to uppercase
427 ** @param [r] highlight [const AjPRange] ranges to colour in HTML
428 ** @param [r] threeletter [AjBool] use 3-letter code
429 ** @param [r] numberseq [AjBool] put numbers on sequences
430 ** @param [r] feat [const AjPFeattable] sequence's feature table
431 **                                 NULL after - pointer stored internally
432 ** @param [r] orfminsize [ajint] minimum size of ORFs to display
433 **                              (0 for no ORFs)
434 ** @param [r] restrictlist [const AjPList] restriction enzyme site list
435 **                            (or NULL) NULL after - pointer stored internally
436 ** @param [r] plasmid [AjBool] Circular (plasmid) sequence
437 ** @param [r] flat [AjBool] show restriction sites in flat format
438 ** @param [r] annotation [const AjPRange] ranges to annotate
439 ** @return [void]
440 ** @@
441 ******************************************************************************/
442 
showseq_FormatShow(EmbPShow ss,const AjPStr format,const AjPTrn trnTable,const AjPRange translaterange,const AjPRange revtranslaterange,const AjPRange uppercase,const AjPRange highlight,AjBool threeletter,AjBool numberseq,const AjPFeattable feat,ajint orfminsize,const AjPList restrictlist,AjBool plasmid,AjBool flat,const AjPRange annotation)443 static void showseq_FormatShow(EmbPShow ss,
444 			       const AjPStr format, const AjPTrn trnTable,
445 			       const AjPRange translaterange,
446 			       const AjPRange revtranslaterange,
447 			       const AjPRange uppercase,
448 			       const AjPRange highlight,  AjBool threeletter,
449 			       AjBool numberseq, const AjPFeattable feat,
450 			       ajint orfminsize, const AjPList restrictlist,
451 			       AjBool plasmid, AjBool flat,
452 			       const AjPRange annotation)
453 {
454     AjPStrTok tok;
455     char white[] = " \t\n\r";
456     char whiteplus[] = " \t,.!@#$%^&*()_+|~`\\={}[]:;\"'<>,.?/";
457     AjPStr code = NULL;
458 
459     /* start token to parse format */
460     tok = ajStrTokenNewC(format,  white);
461     while(ajStrTokenNextParseC(tok, whiteplus, &code))
462     {
463 	ajStrFmtUpper(&code);
464 
465 	if(!ajStrCmpC(code, "S"))
466 	    embShowAddSeq(ss, numberseq, threeletter, uppercase,
467 			  highlight);
468 	else if(!ajStrCmpC(code, "B"))
469 	    embShowAddBlank(ss);
470 	else if(!ajStrCmpC(code, "1"))
471 	    embShowAddTran(ss, trnTable, 1, threeletter, numberseq,
472 			   translaterange, orfminsize,
473 			   AJFALSE, AJFALSE, AJFALSE, AJFALSE);
474 	else if(!ajStrCmpC(code, "2"))
475 	    embShowAddTran(ss, trnTable, 2, threeletter, numberseq,
476 			   translaterange, orfminsize,
477 			   AJFALSE, AJFALSE, AJFALSE, AJFALSE);
478 	else if(!ajStrCmpC(code, "3"))
479 	    embShowAddTran(ss, trnTable, 3, threeletter, numberseq,
480 			   translaterange, orfminsize,
481 			   AJFALSE, AJFALSE, AJFALSE, AJFALSE);
482 	else if(!ajStrCmpC(code, "-1"))
483 	    embShowAddTran(ss, trnTable, -1, threeletter, numberseq,
484 			   revtranslaterange, orfminsize,
485 			   AJFALSE, AJFALSE, AJFALSE, AJFALSE);
486 	else if(!ajStrCmpC(code, "-2"))
487 	    embShowAddTran(ss, trnTable, -2, threeletter, numberseq,
488 			   revtranslaterange, orfminsize,
489 			   AJFALSE, AJFALSE, AJFALSE, AJFALSE);
490 	else if(!ajStrCmpC(code, "-3"))
491 	    embShowAddTran(ss, trnTable, -3, threeletter, numberseq,
492 			   revtranslaterange, orfminsize,
493 			   AJFALSE, AJFALSE, AJFALSE, AJFALSE);
494 	else if(!ajStrCmpC(code, "T"))
495 	    embShowAddTicks(ss);
496 	else if(!ajStrCmpC(code, "N"))
497 	    embShowAddTicknum(ss);
498 	else if(!ajStrCmpC(code, "C"))
499 	    embShowAddComp(ss, numberseq);
500 	else if(!ajStrCmpC(code, "F"))
501 	    embShowAddFT(ss, feat);
502 	else if(!ajStrCmpC(code, "R"))
503 	    embShowAddRE(ss, 1, restrictlist, plasmid, flat);
504 	else if(!ajStrCmpC(code, "-R"))
505 	    embShowAddRE(ss, -1, restrictlist, plasmid, flat);
506 	else if(!ajStrCmpC(code, "A"))
507 	    embShowAddNote(ss, annotation);
508 	else
509 	    ajFatal("Formatting code not recognised: '%S'", code);
510     }
511 
512     ajStrDel(&code);
513     ajStrTokenDel(&tok);
514 
515     return;
516 }
517 
518 
519 
520 
521 /* @funcstatic showseq_read_equiv *********************************************
522 **
523 ** reads the equivalents file.
524 **
525 ** @param [u] equfile [AjPFile] file to read
526 ** @param [w] table [AjPTable] table to write to.
527 ** @return [void]
528 ** @@
529 ******************************************************************************/
530 
showseq_read_equiv(AjPFile equfile,AjPTable table)531 static void showseq_read_equiv(AjPFile equfile, AjPTable table)
532 {
533     AjPStr line;
534     AjPStr key;
535     AjPStr value;
536 
537     const char *p;
538 
539     line = ajStrNew();
540 
541     while(ajReadlineTrim(equfile,&line))
542     {
543         p = ajStrGetPtr(line);
544 
545         if(!*p || *p=='#' || *p=='!')
546             continue;
547 
548         p = ajSysFuncStrtok(p," \t\n");
549         key = ajStrNewC(p);
550         p = ajSysFuncStrtok(NULL," \t\n");
551         value = ajStrNewC(p);
552         ajTablePut(table,(void *)key, (void *)value);
553     }
554 
555     return;
556 }
557 
558 
559 
560 
561 /* @funcstatic showseq_read_file_of_enzyme_names ******************************
562 **
563 ** If the list of enzymes starts with a '@' if opens that file, reads in
564 ** the list of enzyme names and replaces the input string with the enzyme names
565 **
566 ** @param [u] enzymes [AjPStr*] names of enzymes to search for or 'all'
567 **                              or '@file'
568 ** @return [void]
569 ** @@
570 ******************************************************************************/
571 
showseq_read_file_of_enzyme_names(AjPStr * enzymes)572 static void showseq_read_file_of_enzyme_names(AjPStr *enzymes)
573 {
574     AjPFile file = NULL;
575     AjPStr line;
576     const char *p = NULL;
577 
578     if(ajStrFindC(*enzymes, "@") == 0)
579     {
580 	ajStrTrimC(enzymes, "@");	/* remove the @ */
581 	file = ajFileNewInNameS(*enzymes);
582 	if(file == NULL)
583 	    ajFatal("Cannot open the file of enzyme names: '%S'", enzymes);
584 
585 	/* blank off the enzyme file name and replace with the enzyme names */
586 	ajStrSetClear(enzymes);
587 	line = ajStrNew();
588 	while(ajReadlineTrim(file, &line))
589 	{
590 	    p = ajStrGetPtr(line);
591 
592 	    if(!*p || *p == '#' || *p == '!')
593 		continue;
594 
595 	    ajStrAppendS(enzymes, line);
596 	    ajStrAppendC(enzymes, ",");
597 	}
598 	ajStrDel(&line);
599 
600 	ajFileClose(&file);
601     }
602 
603     return;
604 }
605 
606 
607 
608 
609 /* @funcstatic showseq_FeatureFilter ******************************************
610 **
611 ** Removes unwanted features from a feature table
612 **
613 ** @param [r] featab [const AjPFeattable] Feature table to filter
614 ** @param [u] newfeatab [AjPFeattable] Retured table of filtered features
615 ** @param [r] matchsource [const AjPStr] Required Source pattern
616 ** @param [r] matchtype [const AjPStr] Required Type pattern
617 ** @param [r] matchsense [ajint] Required Sense pattern +1,0,-1
618 **                               (or other value$
619 ** @param [r] testscore [AjBool] Filter by score values
620 ** @param [r] minscore [float] Min required Score pattern
621 ** @param [r] maxscore [float] Max required Score pattern
622 ** @param [r] matchtag [const AjPStr] Required Tag pattern
623 ** @param [r] matchvalue [const AjPStr] Required Value pattern
624 ** @param [r] stricttags [AjBool] If false then only display tag/values
625 **                                that match the criteria
626 ** @return [void]
627 ** @@
628 ******************************************************************************/
629 
showseq_FeatureFilter(const AjPFeattable featab,AjPFeattable newfeatab,const AjPStr matchsource,const AjPStr matchtype,ajint matchsense,AjBool testscore,float minscore,float maxscore,const AjPStr matchtag,const AjPStr matchvalue,AjBool stricttags)630 static void showseq_FeatureFilter(const AjPFeattable featab,
631 				  AjPFeattable newfeatab,
632 				  const AjPStr matchsource,
633 				  const AjPStr matchtype,
634 				  ajint matchsense, AjBool testscore,
635                                   float minscore,
636 				  float maxscore, const AjPStr matchtag,
637 				  const AjPStr matchvalue, AjBool stricttags)
638 {
639 
640     AjIList iter = NULL;
641     AjPFeature gf = NULL;
642     AjPFeature newgf = NULL;
643     AjBool tagsmatch;
644 
645     tagsmatch = ajFalse;
646 
647     /* foreach feature in the feature table */
648     if(featab)
649     {
650 	iter = ajListIterNewread(featab->Features);
651 	while(!ajListIterDone(iter))
652 	{
653 	    gf = (AjPFeature)ajListIterGet(iter);
654             newgf = showseq_FeatCopy(gf); /* copy of gf that we can add */
655 	    /* required tags to */
656 	    if(showseq_MatchFeature(gf, newgf, matchsource, matchtype,
657 				    matchsense, testscore,
658                                     minscore, maxscore, matchtag,
659 				    matchvalue, &tagsmatch, stricttags))
660 	    	 /*
661 		 ** There's a match, so add the copy of gf
662 		 ** to the new feature table
663 		 */
664                 ajFeattableAdd(newfeatab, newgf);
665 	    else
666 	    	ajFeatDel(&newgf);
667 	}
668 	ajListIterDel(&iter);
669     }
670 
671     return;
672 }
673 
674 
675 
676 
677 /* @funcstatic showseq_MatchFeature *******************************************
678 **
679 ** Test if a feature matches a set of criteria
680 **
681 ** @param [r] gf [const AjPFeature] Feature to test
682 ** @param [u] newgf [AjPFeature] Copy of feature with filtered
683 **                                    Tag/Value list
684 ** @param [r] source [const AjPStr] Required Source pattern
685 ** @param [r] type [const AjPStr] Required Type pattern
686 ** @param [r] sense [ajint] Required Sense pattern +1,0,-1 (or other value$
687 ** @param [r] testscore [AjBool] Filter by score values
688 ** @param [r] minscore [float] Min required Score pattern
689 ** @param [r] maxscore [float] Max required Score pattern
690 ** @param [r] tag [const AjPStr] Required Tag pattern
691 ** @param [r] value [const AjPStr] Required Value pattern
692 ** @param [u] tagsmatch [AjBool *] true if a join has matching tag/values
693 ** @param [r] stricttags [AjBool] If false then only display tag/values
694 **                                that match the criteria
695 ** @return [AjBool] True if feature matches criteria
696 ** @@
697 ******************************************************************************/
698 
showseq_MatchFeature(const AjPFeature gf,AjPFeature newgf,const AjPStr source,const AjPStr type,ajint sense,AjBool testscore,float minscore,float maxscore,const AjPStr tag,const AjPStr value,AjBool * tagsmatch,AjBool stricttags)699 static AjBool showseq_MatchFeature(const AjPFeature gf, AjPFeature newgf,
700 				   const AjPStr source, const AjPStr type,
701 				   ajint sense,
702 				   AjBool testscore,
703                                    float minscore, float maxscore,
704 				   const AjPStr tag, const AjPStr value,
705 				   AjBool *tagsmatch, AjBool stricttags)
706 {
707     AjPStrTok tokens = NULL;
708     AjPStr key = NULL;
709     AjBool val = ajFalse;
710 
711     /*
712     ** is this a child of a join() ?
713     ** if it is a child, then we use the previous result of MatchPatternTags
714     */
715     if(!ajFeatIsMultiple(gf))
716 	*tagsmatch = showseq_MatchPatternTags(gf, newgf, tag, value,
717 					      stricttags);
718 
719     /* ignore remote IDs */
720     if(!ajFeatIsLocal(gf))
721 	return ajFalse;
722 
723      /* check source, type, sense, score, tags, values
724      ** Match anything:
725      **      for strings, '*'
726      **      for sense, 0
727      **      for score, maxscore <= minscore
728      */
729     if(!embMiscMatchPatternDelimC(ajFeatGetSource(gf), source,",;|") ||
730        (ajFeatGetStrand(gf) == '+' && sense == -1) ||
731        (ajFeatGetStrand(gf) == '-' && sense == +1) ||
732        (testscore && ajFeatGetScore(gf) < minscore) ||
733        (testscore && ajFeatGetScore(gf) > maxscore) ||
734        !*tagsmatch)
735 	return ajFalse;
736 
737     if(ajStrGetLen(type))
738     {
739         val = ajFalse;
740         tokens = ajStrTokenNewC(type, " \t\n\r,;|");
741 
742         while (ajStrTokenNextParse(tokens, &key))
743         {
744             if (ajFeatTypeMatchWildS(gf, key))
745             {
746                 val = ajTrue;
747                 break;
748             }
749         }
750 
751         ajStrTokenDel( &tokens);
752         ajStrDel(&key);
753         if(!val)
754             return ajFalse;
755     }
756 
757     return ajTrue;
758 }
759 
760 
761 
762 
763 /* @funcstatic showseq_MatchPatternTags ***************************************
764 **
765 ** Checks for a match of the tagpattern and valuepattern to at least one
766 ** tag=value pair
767 **
768 ** @param [r] gf [const AjPFeature] Feature to process
769 ** @param [u] newgf [AjPFeature] Copy of feature returned
770 **                               with filtered Tag/Value list
771 ** @param [r] tpattern [const AjPStr] tags pattern to match with
772 ** @param [r] vpattern [const AjPStr] values pattern to match with
773 ** @param [r] stricttags [AjBool] If false then only display tag/values
774 **                                that match the criteria
775 **
776 ** @return [AjBool] ajTrue = found a match
777 ** @@
778 ******************************************************************************/
779 
showseq_MatchPatternTags(const AjPFeature gf,AjPFeature newgf,const AjPStr tpattern,const AjPStr vpattern,AjBool stricttags)780 static AjBool showseq_MatchPatternTags(const AjPFeature gf,
781 				       AjPFeature newgf,
782 				       const AjPStr tpattern,
783 				       const AjPStr vpattern,
784 				       AjBool stricttags)
785 {
786     AjIList titer;                      /* iterator for feat */
787     AjPStr tagnam = NULL;	        /* tag name from tag structure */
788     AjPStr tagval = NULL;       	/* tag value from tag structure */
789     AjBool val = ajFalse;               /* returned value */
790     AjBool tval;                        /* tags result */
791     AjBool vval;                        /* value result */
792 
793     /*
794     **  If there are no tags to match, but the patterns are
795     **  both '*', then allow this as a match
796     */
797     if(ajListGetLength(gf->Tags) == 0 &&
798         !ajStrCmpC(tpattern, "*") &&
799         !ajStrCmpC(vpattern, "*"))
800         return ajTrue;
801 
802 
803     /* iterate through the tags and test for match to patterns */
804     titer = ajFeatTagIter(gf);
805     while(ajFeatTagval(titer, &tagnam, &tagval))
806     {
807         tval = embMiscMatchPatternDelimC(tagnam, tpattern,",;|");
808         /*
809         ** If tag has no value then
810         **   If vpattern is '*' the value pattern is a match
811         ** Else check vpattern
812         */
813         if(!ajStrGetLen(tagval))
814 	{
815             if(!ajStrCmpC(vpattern, "*"))
816             	vval = ajTrue;
817             else
818 		vval = ajFalse;
819         }
820 	else
821             /*
822             ** The value can be one or more words and the vpattern could
823             ** be the whole phrase, so test not only each word in vpattern
824 	    ** against the value, but also test to see if there is a match
825 	    ** of the whole of vpattern without spitting it up into words.
826             */
827             vval = (ajStrMatchS(tagval, vpattern) ||
828 		    embMiscMatchPatternDelimC(tagval, vpattern,",;|"));
829 
830 
831         if(tval && vval)
832 	{
833             val = ajTrue;
834 	    /*
835 	    ** if strict tags then only want to see the tags
836 	    ** that match the criteria
837 	    */
838 	    if(stricttags)
839 	    	ajFeatTagAddSS(newgf, tagnam, tagval);
840         }
841 
842         /* if not strict tags then need to see all the tags */
843         if(!stricttags)
844             ajFeatTagAddSS(newgf, tagnam, tagval);
845     }
846     ajListIterDel(&titer);
847 
848     ajStrDel(&tagnam);
849     ajStrDel(&tagval);
850 
851     return val;
852 }
853 
854 
855 
856 
857 /* @funcstatic showseq_FeatCopy ***********************************************
858 **
859 ** Makes a copy of a feature, except for the Tags list which is added later.
860 ** This is mostly copied from the original in ajFeatCopy,
861 ** but without the Tags stuff.
862 **
863 ** @param [r]   orig  [const AjPFeature]  Original feature
864 ** @return [AjPFeature] New copy of  feature
865 ** @@
866 ******************************************************************************/
867 
showseq_FeatCopy(const AjPFeature orig)868 static AjPFeature showseq_FeatCopy(const AjPFeature orig)
869 {
870 
871     AjPFeature ret;
872 
873     AJNEW0(ret);
874 
875     ret->Tags = ajListNew();
876 
877     ajStrAssignS(&ret->Source, orig->Source);
878     ajStrAssignS(&ret->Type, orig->Type);
879     ajStrAssignS(&ret->Remote, orig->Remote);
880     ajStrAssignS(&ret->Label, orig->Label);
881 
882     ret->Protein = orig->Protein;
883     ret->Start   = orig->Start;
884     ret->End     = orig->End;
885     ret->Start2  = orig->Start2;
886     ret->End2    = orig->End2;
887     ret->Score   = orig->Score;
888     ret->Strand  = orig->Strand;
889     ret->Frame   = orig->Frame;
890     ret->Flags   = orig->Flags;
891     ret->Group   = orig->Group;
892     ret->Exon    = orig->Exon;
893 
894     return ret;
895 }
896