1 /* @source embpatlist *********************************************************
2 **
3 ** Functions for patternlist handling.
4 **
5 ** @author Copyright (C) 2004 Henrikki Almusa, Medicel Oy,Finland
6 ** @version $Revision: 1.23 $
7 ** @modified $Date: 2013/06/29 22:38:59 $ by $Author: rice $
8 ** @@
9 **
10 ** This library is free software; you can redistribute it and/or
11 ** modify it under the terms of the GNU Lesser General Public
12 ** License as published by the Free Software Foundation; either
13 ** version 2.1 of the License, or (at your option) any later version.
14 **
15 ** This library is distributed in the hope that it will be useful,
16 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18 ** Lesser General Public License for more details.
19 **
20 ** You should have received a copy of the GNU Lesser General Public
21 ** License along with this library; if not, write to the Free Software
22 ** Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
23 ** MA  02110-1301,  USA.
24 **
25 ******************************************************************************/
26 
27 #include "embpatlist.h"
28 #include "embpat.h"
29 #include "embmat.h"
30 
31 #include "ajlib.h"
32 #include "ajpat.h"
33 #include "ajseq.h"
34 #include "ajfeat.h"
35 
36 
37 
38 static AjPStr featMotifNuc = NULL;
39 static AjPStr featMotifProt = NULL;
40 
41 
42 
43 
44 /* @func embPatlistSeqSearch **************************************************
45 **
46 ** The main search function of patterns. It compiles the patterns and searches
47 ** with them. If the pattern fails to compile, it is removed from list.
48 **
49 ** @param [w] ftable [AjPFeattable] Table of found features
50 ** @param [r] seq [const AjPSeq] Sequence to search
51 ** @param [u] plist [AjPPatlistSeq] List of patterns to search with
52 ** @param [r] reverse [AjBool] Search reverse sequence as well
53 ** @return [void]
54 **
55 ** @release 4.0.0
56 ** @@
57 ******************************************************************************/
58 
embPatlistSeqSearch(AjPFeattable ftable,const AjPSeq seq,AjPPatlistSeq plist,AjBool reverse)59 void embPatlistSeqSearch(AjPFeattable ftable, const AjPSeq seq,
60 			 AjPPatlistSeq plist, AjBool reverse)
61 {
62     AjPPatternSeq patseq = NULL;
63     AjPPatComp compPat;
64 
65     ajDebug ("embPatlistSeqSearch: Searching '%S' for %d patterns\n",
66 	     ajSeqGetNameS(seq), ajPatlistSeqGetSize(plist));
67 
68     while (ajPatlistSeqGetNext(plist,&patseq))
69     {
70         compPat = ajPatternSeqGetCompiled(patseq);
71 
72 	if (!compPat && !embPatternSeqCompile(patseq))
73         {
74             ajPatlistSeqRemoveCurrent(plist);
75             continue;
76         }
77 
78         embPatternSeqSearch(ftable,seq,patseq,reverse);
79         ajDebug("end loop\n");
80     }
81 
82     ajPatlistSeqRewind(plist);
83 
84     return;
85 }
86 
87 
88 
89 
90 /* @func embPatlistSeqSearchAll ***********************************************
91 **
92 ** The main search function of patterns. It compiles the patterns and searches
93 ** with them. If the pattern fails to compile, it is removed from list.
94 **
95 ** This version of the function calls the DFA version of the regular
96 ** expression search which returns all matches.
97 **
98 ** @param [w] ftable [AjPFeattable] Table of found features
99 ** @param [r] seq [const AjPSeq] Sequence to search
100 ** @param [u] plist [AjPPatlistSeq] List of patterns to search with
101 ** @param [r] reverse [AjBool] Search reverse sequence as well
102 ** @return [void]
103 **
104 ** @release 4.0.0
105 ** @@
106 ******************************************************************************/
107 
embPatlistSeqSearchAll(AjPFeattable ftable,const AjPSeq seq,AjPPatlistSeq plist,AjBool reverse)108 void embPatlistSeqSearchAll(AjPFeattable ftable, const AjPSeq seq,
109                             AjPPatlistSeq plist, AjBool reverse)
110 {
111     AjPPatternSeq patseq = NULL;
112     AjPPatComp compPat;
113 
114     ajDebug ("embPatlistSeqSearchAll: Searching '%S' for %d patterns\n",
115 	     ajSeqGetNameS(seq), ajPatlistSeqGetSize(plist));
116 
117     while (ajPatlistSeqGetNext(plist,&patseq))
118     {
119         compPat = ajPatternSeqGetCompiled(patseq);
120 
121 	if (!compPat && !embPatternSeqCompile(patseq))
122         {
123             ajPatlistSeqRemoveCurrent(plist);
124             continue;
125         }
126 
127         embPatternSeqSearchAll(ftable,seq,patseq,reverse);
128         ajDebug("end loop\n");
129     }
130 
131     ajPatlistSeqRewind(plist);
132 
133     return;
134 }
135 
136 
137 
138 
139 /* @func embPatlistRegexSearch ************************************************
140 **
141 ** The main search function of patterns. It compiles the patterns and searches
142 ** with them. If the pattern fails to compile, it is removed from list.
143 **
144 ** @param [w] ftable [AjPFeattable] Table of found features
145 ** @param [r] seq [const AjPSeq] Sequence to search
146 ** @param [u] plist [AjPPatlistRegex] List of patterns to search with
147 ** @param [r] reverse [AjBool] Search reverse sequence as well
148 ** @return [void]
149 **
150 ** @release 4.0.0
151 ** @@
152 ******************************************************************************/
153 
embPatlistRegexSearch(AjPFeattable ftable,const AjPSeq seq,AjPPatlistRegex plist,AjBool reverse)154 void embPatlistRegexSearch(AjPFeattable ftable, const AjPSeq seq,
155                            AjPPatlistRegex plist, AjBool reverse)
156 {
157     AjPPatternRegex patreg = NULL;
158     AjPRegexp compPat;
159     AjPStr tmp = NULL;
160 
161     ajStrAssignS(&tmp,ajSeqGetNameS(seq));
162     ajDebug ("embPatlistRegexSearch: Searching '%S' for patterns\n",tmp);
163 
164     while (ajPatlistRegexGetNext(plist,&patreg))
165     {
166         compPat = ajPatternRegexGetCompiled(patreg);
167 
168 	if (!compPat)
169         {
170             ajPatlistRegexRemoveCurrent(plist);
171             continue;
172         }
173 
174         embPatternRegexSearch(ftable,seq,patreg,reverse);
175         ajDebug("end loop\n");
176     }
177 
178     /* ajDebug ("embPatlistRegexSearch: Done search '%S'\n",tmp); */
179 
180     ajPatlistRegexRewind(plist);
181 
182     ajStrDel(&tmp);
183 
184     return;
185 }
186 
187 
188 
189 
190 /* @func embPatlistRegexSearchAll *********************************************
191 **
192 ** The main search function of patterns. It compiles the patterns and searches
193 ** with them. If the pattern fails to compile, it is removed from list.
194 **
195 ** This version of the function calls the DFA version of the regular
196 ** expression search which returns all matches.
197 **
198 ** @param [w] ftable [AjPFeattable] Table of found features
199 ** @param [r] seq [const AjPSeq] Sequence to search
200 ** @param [u] plist [AjPPatlistRegex] List of patterns to search with
201 ** @param [r] reverse [AjBool] Search reverse sequence as well
202 ** @return [void]
203 **
204 ** @release 4.0.0
205 ** @@
206 ******************************************************************************/
207 
embPatlistRegexSearchAll(AjPFeattable ftable,const AjPSeq seq,AjPPatlistRegex plist,AjBool reverse)208 void embPatlistRegexSearchAll(AjPFeattable ftable, const AjPSeq seq,
209                               AjPPatlistRegex plist, AjBool reverse)
210 {
211     AjPPatternRegex patreg = NULL;
212     AjPRegexp compPat;
213     AjPStr tmp = NULL;
214 
215     ajStrAssignS(&tmp,ajSeqGetNameS(seq));
216     ajDebug ("embPatlistRegexSearch: Searching '%S' for patterns\n",tmp);
217 
218     while (ajPatlistRegexGetNext(plist,&patreg))
219     {
220         compPat = ajPatternRegexGetCompiled(patreg);
221 
222 	if (!compPat)
223         {
224             ajPatlistRegexRemoveCurrent(plist);
225             continue;
226         }
227 
228         embPatternRegexSearchAll(ftable,seq,patreg,reverse);
229         ajDebug("end loop\n");
230     }
231 
232     /* ajDebug ("embPatlistRegexSearchAll: Done search '%S'\n",tmp); */
233 
234     ajPatlistRegexRewind(plist);
235 
236     ajStrDel(&tmp);
237 
238     return;
239 }
240 
241 
242 
243 
244 /* @func embPatternRegexSearch ************************************************
245 **
246 ** The search function for a single regular expression pattern.
247 **
248 ** @param [w] ftable [AjPFeattable] Table of found features
249 ** @param [r] seq [const AjPSeq] Sequence to search
250 ** @param [r] pat [const AjPPatternRegex] Pattern to search with
251 ** @param [r] reverse [AjBool] Sequence has been reversed
252 ** @return [void]
253 **
254 ** @release 4.0.0
255 ** @@
256 ******************************************************************************/
257 
embPatternRegexSearch(AjPFeattable ftable,const AjPSeq seq,const AjPPatternRegex pat,AjBool reverse)258 void embPatternRegexSearch (AjPFeattable ftable, const AjPSeq seq,
259 			    const AjPPatternRegex pat, AjBool reverse)
260 {
261     ajint pos=0;
262     ajint off;
263     ajint len;
264     AjPFeature sf    = NULL;
265     AjPStr substr    = NULL;
266     AjPStr seqstr    = NULL;
267     AjPStr tmpstr = NULL;
268     AjPStr tmp       = ajStrNew();
269     AjPRegexp patexp = ajPatternRegexGetCompiled(pat);
270     ajint adj;
271     AjBool isreversed;
272     AjPSeq revseq;
273     ajint seqlen;
274 
275     seqlen = ajSeqGetLen(seq);
276     if(!seqlen)
277         return;
278 
279     isreversed = ajSeqIsReversedTrue(seq);
280 
281     if(isreversed)
282 	seqlen += ajSeqGetOffset(seq);
283 
284     pos = ajSeqGetBeginTrue(seq);
285     adj = ajSeqGetEndTrue(seq);
286 
287     if(!ajStrGetLen(featMotifProt))
288         ajStrAssignC(&featMotifProt, "SO:0001067");
289 
290     if(!ajStrGetLen(featMotifNuc))
291         ajStrAssignC(&featMotifNuc, "SO:0000714");
292 
293     /*ajDebug("embPatternRegexSearch pos: %d adj: %d reverse: %B\n",
294       pos, adj, reverse, isreversed);*/
295     /*ajDebug("seqlen:%d len: %d offset: %d offend: %d begin: %d end: %d\n",
296 	   seqlen , ajSeqGetLen(seq), ajSeqGetOffset(seq),
297 	   ajSeqGetOffend(seq), ajSeqGetBegin(seq), ajSeqGetEnd(seq));*/
298 
299     if (reverse)
300     {
301         revseq = ajSeqNewSeq(seq);
302         ajStrAssignSubS(&seqstr, ajSeqGetSeqS(revseq), pos-1, adj-1);
303         ajSeqstrReverse(&seqstr);
304     }
305 
306     ajStrAssignSubS(&seqstr, ajSeqGetSeqS(seq), pos-1, adj-1);
307 
308     ajStrFmtUpper(&seqstr);
309 
310     while(ajStrGetLen(seqstr) && ajRegExec(patexp, seqstr))
311     {
312 	off = ajRegOffset(patexp);
313 	len = ajRegLenI(patexp, 0);
314 
315 	if(off || len)
316 	{
317 	    ajRegSubI(patexp, 0, &substr);
318 	    ajRegPost(patexp, &tmp);
319 	    ajStrAssignS(&seqstr, substr);
320             ajStrAppendS(&seqstr, tmp);
321 	    pos += off;
322 
323 	    /*ajDebug("match pos: %d adj: %d len: %d off:%d\n",
324               pos, adj, len, off);*/
325             if (reverse)
326                 sf = ajFeatNew(ftable, NULL, featMotifNuc,
327                                    adj - pos - len + 2,
328                                    adj - pos + 1,
329                                    0.0, '-', 0);
330 	    else
331             {
332                 if(ajSeqIsProt(seq) || ajFeattableIsProt(ftable))
333                     sf = ajFeatNewProt(ftable, NULL, featMotifProt,
334                                        pos, pos + len - 1,
335                                        0.0);
336                 else
337                     sf = ajFeatNew(ftable, NULL, featMotifNuc,
338                                    pos, pos + len - 1,
339                                    0.0, '.', 0);
340             }
341 
342 	    if(isreversed)
343 		ajFeatReverse(sf, seqlen);
344 
345 	    ajFmtPrintS(&tmpstr,"*pat %S:%S",
346                         ajPatternRegexGetName(pat),
347                         ajPatternRegexGetPattern(pat));
348 	    ajFeatTagAddSS(sf,NULL,tmpstr);
349 	    pos += 1;
350 	    ajStrCutStart(&seqstr, 1);
351 	}
352 	else
353 	{
354 	    pos++;
355 	    ajStrCutStart(&seqstr, 1);
356 	}
357     }
358 
359     ajStrDel(&tmpstr);
360     ajStrDel(&tmp);
361     ajStrDel(&substr);
362     ajStrDel(&seqstr);
363 
364     if(reverse)
365 	ajSeqDel(&revseq);
366 
367     return;
368 }
369 
370 
371 
372 
373 /* @func embPatternRegexSearchAll *********************************************
374 **
375 ** The search function to find all matches for a single
376 ** regular expression pattern.
377 **
378 ** @param [w] ftable [AjPFeattable] Table of found features
379 ** @param [r] seq [const AjPSeq] Sequence to search
380 ** @param [r] pat [const AjPPatternRegex] Pattern to search with
381 ** @param [r] reverse [AjBool] Sequence has been reversed
382 ** @return [void]
383 **
384 ** @release 4.0.0
385 ** @@
386 ******************************************************************************/
387 
embPatternRegexSearchAll(AjPFeattable ftable,const AjPSeq seq,const AjPPatternRegex pat,AjBool reverse)388 void embPatternRegexSearchAll(AjPFeattable ftable, const AjPSeq seq,
389                               const AjPPatternRegex pat, AjBool reverse)
390 {
391     ajint pos=0;
392     ajint off;
393     ajint len;
394     AjPFeature sf    = NULL;
395     AjPStr substr    = NULL;
396     AjPStr seqstr    = NULL;
397     AjPStr savestr    = NULL;
398     AjPStr tmpstr = NULL;
399     AjPStr tmp       = ajStrNew();
400     AjPRegexp patexp = ajPatternRegexGetCompiled(pat);
401     ajint adj;
402     AjBool isreversed;
403     AjPSeq revseq;
404     ajint seqlen;
405     ajuint j;
406     ajuint k;
407 
408     seqlen = ajSeqGetLen(seq);
409     if(!seqlen)
410         return;
411 
412     isreversed = ajSeqIsReversedTrue(seq);
413 
414     if(isreversed)
415 	seqlen += ajSeqGetOffset(seq);
416 
417     pos = ajSeqGetBeginTrue(seq);
418     adj = ajSeqGetEndTrue(seq);
419 
420     if(!ajStrGetLen(featMotifProt))
421         ajStrAssignC(&featMotifProt, "SO:0001067");
422 
423     if(!ajStrGetLen(featMotifNuc))
424         ajStrAssignC(&featMotifNuc, "SO:0000714");
425 
426     /*ajDebug("embPatternRegexSearch pos: %d adj: %d reverse: %B\n",
427       pos, adj, reverse, isreversed);*/
428     /*ajDebug("seqlen:%d len: %d offset: %d offend: %d begin: %d end: %d\n",
429 	   seqlen , ajSeqGetLen(seq), ajSeqGetOffset(seq),
430 	   ajSeqGetOffend(seq), ajSeqGetBegin(seq), ajSeqGetEnd(seq));*/
431 
432     if (reverse)
433     {
434         revseq = ajSeqNewSeq(seq);
435         ajStrAssignSubS(&seqstr, ajSeqGetSeqS(revseq), pos-1, adj-1);
436         ajSeqstrReverse(&seqstr);
437     }
438 
439     ajStrAssignSubS(&seqstr, ajSeqGetSeqS(seq), pos-1, adj-1);
440 
441     ajStrFmtUpper(&seqstr);
442 
443     while(ajStrGetLen(seqstr) && ajRegExecall(patexp, seqstr))
444     {
445         j = ajRegGetMatches(patexp);
446 
447         /*ajDebug("found %d matches in last %u at pos %d off %d\n",
448                 j, ajStrGetLen(seqstr), pos, ajRegOffsetI(patexp,0));*/
449 
450         for(k=0; k < j; k++)
451         {
452             off = ajRegOffsetI(patexp,k);
453             len = ajRegLenI(patexp, k);
454 
455             if(!k)
456                 pos += off;
457 
458             ajRegSubI(patexp, k, &substr);
459             ajRegPost(patexp, &tmp);
460             if(!k)
461             {
462                 ajStrAssignS(&savestr, substr);
463                 ajStrAppendS(&savestr, tmp);
464             }
465 
466             /*ajDebug("match pos: %d adj: %d len: %d off:%d\n",
467               pos, adj, len, off);*/
468             if (reverse)
469                 sf = ajFeatNew(ftable, NULL, featMotifNuc,
470                                adj - pos - len + 2,
471                                adj - pos + 1,
472                                0.0, '-', 0);
473             else
474             {
475                 if(ajSeqIsProt(seq) || ajFeattableIsProt(ftable))
476                     sf = ajFeatNewProt(ftable, NULL, featMotifProt,
477                                        pos, pos + len - 1,
478                                        0.0);
479                 else
480                     sf = ajFeatNew(ftable, NULL, featMotifNuc,
481                                    pos, pos + len - 1,
482                                    0.0, '.', 0);
483             }
484 
485             if(isreversed)
486                 ajFeatReverse(sf, seqlen);
487 
488             ajFmtPrintS(&tmpstr,"*pat %S:%S",
489                         ajPatternRegexGetName(pat),
490                         ajPatternRegexGetPattern(pat));
491             ajFeatTagAddSS(sf,NULL,tmpstr);
492         }
493 
494         pos++;
495 
496         ajStrAssignS(&seqstr, savestr);
497         ajStrCutStart(&seqstr, 1);
498     }
499 
500     ajStrDel(&tmpstr);
501     ajStrDel(&tmp);
502     ajStrDel(&substr);
503     ajStrDel(&seqstr);
504     ajStrDel(&savestr);
505 
506     if(reverse)
507 	ajSeqDel(&revseq);
508 
509     return;
510 }
511 
512 
513 
514 
515 /* @func embPatternSeqSearch **************************************************
516 **
517 ** The search function for a single sequence pattern.
518 **
519 ** @param [w] ftable [AjPFeattable] Table of found features
520 ** @param [r] seq [const AjPSeq] Sequence to search
521 ** @param [r] pat [const AjPPatternSeq] Pattern to search with
522 ** @param [r] reverse [AjBool] Search reverse sequence as well
523 ** @return [void]
524 **
525 ** @release 4.0.0
526 ** @@
527 ******************************************************************************/
528 
embPatternSeqSearch(AjPFeattable ftable,const AjPSeq seq,const AjPPatternSeq pat,AjBool reverse)529 void embPatternSeqSearch (AjPFeattable ftable, const AjPSeq seq,
530 			  const AjPPatternSeq pat, AjBool reverse)
531 {
532     const void *tidy;
533     ajuint hits;
534     ajuint i;
535     AjPPatComp pattern;
536     EmbPMatMatch m = NULL;
537     AjPFeature sf  = NULL;
538     AjPSeq revseq  = NULL;
539     AjPList list   = ajListNew();
540     AjPStr seqstr  = ajStrNew();
541     AjPStr seqname = ajStrNew();
542     AjPStr tmp     = ajStrNew();
543     ajint adj;
544     ajint begin;
545     AjBool isreversed;
546     ajint seqlen;
547 
548     seqlen = ajSeqGetLen(seq);
549     if(!seqlen)
550         return;
551 
552     isreversed = ajSeqIsReversedTrue(seq);
553 
554     if(isreversed)
555 	seqlen += ajSeqGetOffset(seq);
556 
557     begin = ajSeqGetBeginTrue(seq);
558     adj = ajSeqGetEndTrue(seq);
559 
560     if(!ajStrGetLen(featMotifProt))
561         ajStrAssignC(&featMotifProt, "SO:0001067");
562 
563     if(!ajStrGetLen(featMotifNuc))
564         ajStrAssignC(&featMotifNuc, "SO:0000714");
565 
566     ajStrAssignS(&seqname,ajSeqGetNameS(seq));
567     pattern = ajPatternSeqGetCompiled(pat);
568 
569     if (reverse)
570     {
571         revseq = ajSeqNewSeq(seq);
572         ajStrAssignSubS(&seqstr, ajSeqGetSeqS(revseq),
573 			begin-1,adj-1);
574         ajSeqstrReverse(&seqstr);
575     }
576     else
577         ajStrAssignSubS(&seqstr, ajSeqGetSeqS(seq),
578 			begin-1,adj-1);
579 
580     ajStrFmtUpper(&seqstr);
581     /*ajDebug("seqlen:%d len: %d offset: %d offend: %d begin: %d end: %d\n"
582 	   "'%S'\n",
583 	   seqlen , ajSeqGetLen(seq), ajSeqGetOffset(seq),
584 	   ajSeqGetOffend(seq), ajSeqGetBegin(seq), ajSeqGetEnd(seq),
585 	   seqstr);*/
586 
587     ajDebug("embPatternSeqSearch '%S' protein: %B reverse: %B\n",
588 	    pattern->pattern, pat->Protein, reverse);
589     embPatFuzzSearchII(pattern,begin,seqname,seqstr,list,
590                        ajPatternSeqGetMismatch(pat),&hits,&tidy);
591 
592     ajDebug ("embPatternSeqSearch: found %d hits\n",hits);
593 
594     if(!reverse)
595 	ajListReverse(list);
596 
597     for(i=0;i<hits;++i)
598     {
599         ajListPop(list,(void **)&m);
600 
601  	if (reverse)
602 	    sf = ajFeatNew(ftable, NULL, featMotifNuc,
603                            adj - m->start - m->len + begin + 1,
604                            adj - m->start + begin,
605                            0.0, '-', 0);
606 	else
607         {
608 	    if(ajSeqIsProt(seq) || ajFeattableIsProt(ftable))
609                 sf = ajFeatNewProt(ftable, NULL, featMotifProt,
610                                    m->start,
611                                    m->start + m->len - 1,
612                                    0.0);
613             else
614                 sf = ajFeatNew(ftable, NULL, featMotifNuc,
615                                m->start,
616                                m->start + m->len - 1,
617                                0.0, '.', 0);
618         }
619 
620 	if(isreversed)
621 	    ajFeatReverse(sf, seqlen);
622 
623 	/*
624 	ajUser("isrev: %B reverse: %B begin: %d adj: %d "
625 	       "start: %d len: %d seqlen: %d %d..%d '%c'\n",
626 	       isreversed, reverse, begin, adj, m->start, m->len, seqlen,
627 	       sf->Start, sf->End, sf->Strand);
628 	*/
629 
630 	ajFeatSetScore(sf, (float) (m->len - m->mm));
631 
632         ajFmtPrintS(&tmp, "*pat %S:%S",
633                     ajPatternSeqGetName(pat),
634                     ajPatternSeqGetPattern(pat));
635         ajFeatTagAddSS(sf,NULL,tmp);
636 
637         if(m->mm)
638         {
639             ajFmtPrintS(&tmp, "*mismatch %d", m->mm);
640             ajFeatTagAddSS(sf, NULL, tmp);
641         }
642 
643         embMatMatchDel(&m);
644     }
645 
646     ajStrDel(&seqname);
647     ajStrDel(&seqstr);
648     ajStrDel(&tmp);
649     ajListFree(&list);
650 
651     if (reverse)
652         ajSeqDel(&revseq);
653 
654     return;
655 }
656 
657 
658 
659 
660 /* @func embPatternSeqSearchAll ***********************************************
661 **
662 ** The search function for a single sequence pattern using the DFA
663 ** regular expression search which finds all matches.
664 **
665 ** @param [w] ftable [AjPFeattable] Table of found features
666 ** @param [r] seq [const AjPSeq] Sequence to search
667 ** @param [r] pat [const AjPPatternSeq] Pattern to search with
668 ** @param [r] reverse [AjBool] Search reverse sequence as well
669 ** @return [void]
670 **
671 ** @release 4.0.0
672 ** @@
673 ******************************************************************************/
674 
embPatternSeqSearchAll(AjPFeattable ftable,const AjPSeq seq,const AjPPatternSeq pat,AjBool reverse)675 void embPatternSeqSearchAll(AjPFeattable ftable, const AjPSeq seq,
676                             const AjPPatternSeq pat, AjBool reverse)
677 {
678     const void *tidy;
679     ajuint hits;
680     ajuint i;
681     AjPPatComp pattern;
682     EmbPMatMatch m = NULL;
683     AjPFeature sf  = NULL;
684     AjPSeq revseq  = NULL;
685     AjPList list   = ajListNew();
686     AjPStr seqstr  = ajStrNew();
687     AjPStr seqname = ajStrNew();
688     AjPStr tmp     = ajStrNew();
689     ajint adj;
690     ajint begin;
691     AjBool isreversed;
692     ajint seqlen;
693 
694     seqlen = ajSeqGetLen(seq);
695     if(!seqlen)
696         return;
697 
698     isreversed = ajSeqIsReversedTrue(seq);
699 
700     if(isreversed)
701 	seqlen += ajSeqGetOffset(seq);
702 
703     begin = ajSeqGetBeginTrue(seq);
704     adj = ajSeqGetEndTrue(seq);
705 
706     if(!ajStrGetLen(featMotifProt))
707         ajStrAssignC(&featMotifProt, "SO:0001067");
708 
709     if(!ajStrGetLen(featMotifNuc))
710         ajStrAssignC(&featMotifNuc, "SO:0000714");
711 
712     ajStrAssignS(&seqname,ajSeqGetNameS(seq));
713     pattern = ajPatternSeqGetCompiled(pat);
714 
715     if (reverse)
716     {
717         revseq = ajSeqNewSeq(seq);
718         ajStrAssignSubS(&seqstr, ajSeqGetSeqS(revseq),
719 			begin-1,adj-1);
720         ajSeqstrReverse(&seqstr);
721     }
722     else
723         ajStrAssignSubS(&seqstr, ajSeqGetSeqS(seq),
724 			begin-1,adj-1);
725 
726     ajStrFmtUpper(&seqstr);
727     /*ajDebug("seqlen:%d len: %d offset: %d offend: %d begin: %d end: %d\n"
728 	   "'%S'\n",
729 	   seqlen , ajSeqGetLen(seq), ajSeqGetOffset(seq),
730 	   ajSeqGetOffend(seq), ajSeqGetBegin(seq), ajSeqGetEnd(seq),
731 	   seqstr);*/
732 
733     ajDebug("embPatternSeqSearchAll '%S' protein: %B reverse: %B\n",
734 	    pattern->pattern, pat->Protein, reverse);
735     embPatFuzzSearchAllII(pattern,begin,seqname,seqstr,list,
736                           ajPatternSeqGetMismatch(pat),&hits,&tidy);
737 
738     ajDebug ("embPatternSeqSearchAll: found %d hits\n",hits);
739 
740     if(!reverse)
741 	ajListReverse(list);
742 
743     for(i=0;i<hits;++i)
744     {
745         ajListPop(list,(void **)&m);
746 
747  	if (reverse)
748 	    sf = ajFeatNew(ftable, NULL, featMotifNuc,
749                            adj - m->start - m->len + begin + 1,
750                            adj - m->start + begin,
751                            0.0, '-', 0);
752 	else
753         {
754 	    if(ajSeqIsProt(seq) || ajFeattableIsProt(ftable))
755                 sf = ajFeatNewProt(ftable, NULL, featMotifProt,
756                                    m->start,
757                                    m->start + m->len - 1,
758                                    0.0);
759             else
760                 sf = ajFeatNew(ftable, NULL, featMotifNuc,
761                                m->start,
762                                m->start + m->len - 1,
763                                0.0, '.', 0);
764         }
765 
766 	if(isreversed)
767 	    ajFeatReverse(sf, seqlen);
768 
769 	/*
770 	ajUser("isrev: %B reverse: %B begin: %d adj: %d "
771 	       "start: %d len: %d seqlen: %d %d..%d '%c'\n",
772 	       isreversed, reverse, begin, adj, m->start, m->len, seqlen,
773 	       sf->Start, sf->End, sf->Strand);
774 	*/
775 
776 	ajFeatSetScore(sf, (float) (m->len - m->mm));
777 
778         ajFmtPrintS(&tmp, "*pat %S:%S",
779                     ajPatternSeqGetName(pat),
780                     ajPatternSeqGetPattern(pat));
781         ajFeatTagAddSS(sf,NULL,tmp);
782 
783         if(m->mm)
784         {
785             ajFmtPrintS(&tmp, "*mismatch %d", m->mm);
786             ajFeatTagAddSS(sf, NULL, tmp);
787         }
788 
789         embMatMatchDel(&m);
790     }
791 
792     ajStrDel(&seqname);
793     ajStrDel(&seqstr);
794     ajStrDel(&tmp);
795     ajListFree(&list);
796 
797     if (reverse)
798         ajSeqDel(&revseq);
799 
800     return;
801 }
802 
803 
804 
805 
806 /* @func embPatternSeqCompile *************************************************
807 **
808 ** Adds compiled pattern into AjPPattern.
809 **
810 ** @param [w] pat [AjPPatternSeq] Pattern for compiling
811 ** @return [AjBool] True, if compilation succeeded
812 **
813 ** @release 4.0.0
814 ** @@
815 ******************************************************************************/
embPatternSeqCompile(AjPPatternSeq pat)816 AjBool embPatternSeqCompile (AjPPatternSeq pat)
817 {
818     AjPPatComp embpat;
819     AjBool embType;
820     AjPStr pattern = NULL;
821 
822     ajStrAssignS(&pattern,ajPatternSeqGetPattern(pat));
823     ajStrFmtUpper(&pattern);
824     ajDebug("embPatlistSeqCompile: name %S, pattern %S\n",
825             ajPatternSeqGetName(pat),pattern);
826 
827     embpat = ajPatCompNew();
828 
829     if (ajPatternSeqGetProtein(pat))
830 	embType=ajTrue;
831     else
832 	embType=ajFalse;
833 
834     if (!embPatGetTypeII(embpat,pattern,
835 			 ajPatternSeqGetMismatch(pat),embType))
836     {
837 	ajDebug("embPatlistSeqCompile: Illegal pattern %S: '%S'\n",
838 		ajPatternSeqGetName(pat),ajPatternSeqGetPattern(pat));
839 	ajPatCompDel(&embpat);
840 	ajStrDel(&pattern);
841 
842 	return ajFalse;
843     }
844 
845     embPatCompileII(embpat,ajPatternSeqGetMismatch(pat));
846     ajPatternSeqSetCompiled(pat,embpat);
847     ajStrDel(&pattern);
848 
849     return ajTrue;
850 }
851 
852 
853 
854 
855 /* @func embPatlistExit *******************************************************
856 **
857 ** Cleanup pattern list indexing internals on exit
858 **
859 ** @return [void]
860 **
861 ** @release 6.1.0
862 ******************************************************************************/
863 
embPatlistExit(void)864 void embPatlistExit(void)
865 {
866     ajStrDel(&featMotifNuc);
867     ajStrDel(&featMotifProt);
868 
869     return;
870 }
871