1 /* @source embpatlist *********************************************************
2 **
3 ** Functions for patternlist handling.
4 **
5 ** @author Copyright (C) 2004 Henrikki Almusa, Medicel Oy,Finland
6 ** @version $Revision: 1.23 $
7 ** @modified $Date: 2013/06/29 22:38:59 $ by $Author: rice $
8 ** @@
9 **
10 ** This library is free software; you can redistribute it and/or
11 ** modify it under the terms of the GNU Lesser General Public
12 ** License as published by the Free Software Foundation; either
13 ** version 2.1 of the License, or (at your option) any later version.
14 **
15 ** This library is distributed in the hope that it will be useful,
16 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 ** Lesser General Public License for more details.
19 **
20 ** You should have received a copy of the GNU Lesser General Public
21 ** License along with this library; if not, write to the Free Software
22 ** Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
23 ** MA 02110-1301, USA.
24 **
25 ******************************************************************************/
26
27 #include "embpatlist.h"
28 #include "embpat.h"
29 #include "embmat.h"
30
31 #include "ajlib.h"
32 #include "ajpat.h"
33 #include "ajseq.h"
34 #include "ajfeat.h"
35
36
37
38 static AjPStr featMotifNuc = NULL;
39 static AjPStr featMotifProt = NULL;
40
41
42
43
44 /* @func embPatlistSeqSearch **************************************************
45 **
46 ** The main search function of patterns. It compiles the patterns and searches
47 ** with them. If the pattern fails to compile, it is removed from list.
48 **
49 ** @param [w] ftable [AjPFeattable] Table of found features
50 ** @param [r] seq [const AjPSeq] Sequence to search
51 ** @param [u] plist [AjPPatlistSeq] List of patterns to search with
52 ** @param [r] reverse [AjBool] Search reverse sequence as well
53 ** @return [void]
54 **
55 ** @release 4.0.0
56 ** @@
57 ******************************************************************************/
58
embPatlistSeqSearch(AjPFeattable ftable,const AjPSeq seq,AjPPatlistSeq plist,AjBool reverse)59 void embPatlistSeqSearch(AjPFeattable ftable, const AjPSeq seq,
60 AjPPatlistSeq plist, AjBool reverse)
61 {
62 AjPPatternSeq patseq = NULL;
63 AjPPatComp compPat;
64
65 ajDebug ("embPatlistSeqSearch: Searching '%S' for %d patterns\n",
66 ajSeqGetNameS(seq), ajPatlistSeqGetSize(plist));
67
68 while (ajPatlistSeqGetNext(plist,&patseq))
69 {
70 compPat = ajPatternSeqGetCompiled(patseq);
71
72 if (!compPat && !embPatternSeqCompile(patseq))
73 {
74 ajPatlistSeqRemoveCurrent(plist);
75 continue;
76 }
77
78 embPatternSeqSearch(ftable,seq,patseq,reverse);
79 ajDebug("end loop\n");
80 }
81
82 ajPatlistSeqRewind(plist);
83
84 return;
85 }
86
87
88
89
90 /* @func embPatlistSeqSearchAll ***********************************************
91 **
92 ** The main search function of patterns. It compiles the patterns and searches
93 ** with them. If the pattern fails to compile, it is removed from list.
94 **
95 ** This version of the function calls the DFA version of the regular
96 ** expression search which returns all matches.
97 **
98 ** @param [w] ftable [AjPFeattable] Table of found features
99 ** @param [r] seq [const AjPSeq] Sequence to search
100 ** @param [u] plist [AjPPatlistSeq] List of patterns to search with
101 ** @param [r] reverse [AjBool] Search reverse sequence as well
102 ** @return [void]
103 **
104 ** @release 4.0.0
105 ** @@
106 ******************************************************************************/
107
embPatlistSeqSearchAll(AjPFeattable ftable,const AjPSeq seq,AjPPatlistSeq plist,AjBool reverse)108 void embPatlistSeqSearchAll(AjPFeattable ftable, const AjPSeq seq,
109 AjPPatlistSeq plist, AjBool reverse)
110 {
111 AjPPatternSeq patseq = NULL;
112 AjPPatComp compPat;
113
114 ajDebug ("embPatlistSeqSearchAll: Searching '%S' for %d patterns\n",
115 ajSeqGetNameS(seq), ajPatlistSeqGetSize(plist));
116
117 while (ajPatlistSeqGetNext(plist,&patseq))
118 {
119 compPat = ajPatternSeqGetCompiled(patseq);
120
121 if (!compPat && !embPatternSeqCompile(patseq))
122 {
123 ajPatlistSeqRemoveCurrent(plist);
124 continue;
125 }
126
127 embPatternSeqSearchAll(ftable,seq,patseq,reverse);
128 ajDebug("end loop\n");
129 }
130
131 ajPatlistSeqRewind(plist);
132
133 return;
134 }
135
136
137
138
139 /* @func embPatlistRegexSearch ************************************************
140 **
141 ** The main search function of patterns. It compiles the patterns and searches
142 ** with them. If the pattern fails to compile, it is removed from list.
143 **
144 ** @param [w] ftable [AjPFeattable] Table of found features
145 ** @param [r] seq [const AjPSeq] Sequence to search
146 ** @param [u] plist [AjPPatlistRegex] List of patterns to search with
147 ** @param [r] reverse [AjBool] Search reverse sequence as well
148 ** @return [void]
149 **
150 ** @release 4.0.0
151 ** @@
152 ******************************************************************************/
153
embPatlistRegexSearch(AjPFeattable ftable,const AjPSeq seq,AjPPatlistRegex plist,AjBool reverse)154 void embPatlistRegexSearch(AjPFeattable ftable, const AjPSeq seq,
155 AjPPatlistRegex plist, AjBool reverse)
156 {
157 AjPPatternRegex patreg = NULL;
158 AjPRegexp compPat;
159 AjPStr tmp = NULL;
160
161 ajStrAssignS(&tmp,ajSeqGetNameS(seq));
162 ajDebug ("embPatlistRegexSearch: Searching '%S' for patterns\n",tmp);
163
164 while (ajPatlistRegexGetNext(plist,&patreg))
165 {
166 compPat = ajPatternRegexGetCompiled(patreg);
167
168 if (!compPat)
169 {
170 ajPatlistRegexRemoveCurrent(plist);
171 continue;
172 }
173
174 embPatternRegexSearch(ftable,seq,patreg,reverse);
175 ajDebug("end loop\n");
176 }
177
178 /* ajDebug ("embPatlistRegexSearch: Done search '%S'\n",tmp); */
179
180 ajPatlistRegexRewind(plist);
181
182 ajStrDel(&tmp);
183
184 return;
185 }
186
187
188
189
190 /* @func embPatlistRegexSearchAll *********************************************
191 **
192 ** The main search function of patterns. It compiles the patterns and searches
193 ** with them. If the pattern fails to compile, it is removed from list.
194 **
195 ** This version of the function calls the DFA version of the regular
196 ** expression search which returns all matches.
197 **
198 ** @param [w] ftable [AjPFeattable] Table of found features
199 ** @param [r] seq [const AjPSeq] Sequence to search
200 ** @param [u] plist [AjPPatlistRegex] List of patterns to search with
201 ** @param [r] reverse [AjBool] Search reverse sequence as well
202 ** @return [void]
203 **
204 ** @release 4.0.0
205 ** @@
206 ******************************************************************************/
207
embPatlistRegexSearchAll(AjPFeattable ftable,const AjPSeq seq,AjPPatlistRegex plist,AjBool reverse)208 void embPatlistRegexSearchAll(AjPFeattable ftable, const AjPSeq seq,
209 AjPPatlistRegex plist, AjBool reverse)
210 {
211 AjPPatternRegex patreg = NULL;
212 AjPRegexp compPat;
213 AjPStr tmp = NULL;
214
215 ajStrAssignS(&tmp,ajSeqGetNameS(seq));
216 ajDebug ("embPatlistRegexSearch: Searching '%S' for patterns\n",tmp);
217
218 while (ajPatlistRegexGetNext(plist,&patreg))
219 {
220 compPat = ajPatternRegexGetCompiled(patreg);
221
222 if (!compPat)
223 {
224 ajPatlistRegexRemoveCurrent(plist);
225 continue;
226 }
227
228 embPatternRegexSearchAll(ftable,seq,patreg,reverse);
229 ajDebug("end loop\n");
230 }
231
232 /* ajDebug ("embPatlistRegexSearchAll: Done search '%S'\n",tmp); */
233
234 ajPatlistRegexRewind(plist);
235
236 ajStrDel(&tmp);
237
238 return;
239 }
240
241
242
243
244 /* @func embPatternRegexSearch ************************************************
245 **
246 ** The search function for a single regular expression pattern.
247 **
248 ** @param [w] ftable [AjPFeattable] Table of found features
249 ** @param [r] seq [const AjPSeq] Sequence to search
250 ** @param [r] pat [const AjPPatternRegex] Pattern to search with
251 ** @param [r] reverse [AjBool] Sequence has been reversed
252 ** @return [void]
253 **
254 ** @release 4.0.0
255 ** @@
256 ******************************************************************************/
257
embPatternRegexSearch(AjPFeattable ftable,const AjPSeq seq,const AjPPatternRegex pat,AjBool reverse)258 void embPatternRegexSearch (AjPFeattable ftable, const AjPSeq seq,
259 const AjPPatternRegex pat, AjBool reverse)
260 {
261 ajint pos=0;
262 ajint off;
263 ajint len;
264 AjPFeature sf = NULL;
265 AjPStr substr = NULL;
266 AjPStr seqstr = NULL;
267 AjPStr tmpstr = NULL;
268 AjPStr tmp = ajStrNew();
269 AjPRegexp patexp = ajPatternRegexGetCompiled(pat);
270 ajint adj;
271 AjBool isreversed;
272 AjPSeq revseq;
273 ajint seqlen;
274
275 seqlen = ajSeqGetLen(seq);
276 if(!seqlen)
277 return;
278
279 isreversed = ajSeqIsReversedTrue(seq);
280
281 if(isreversed)
282 seqlen += ajSeqGetOffset(seq);
283
284 pos = ajSeqGetBeginTrue(seq);
285 adj = ajSeqGetEndTrue(seq);
286
287 if(!ajStrGetLen(featMotifProt))
288 ajStrAssignC(&featMotifProt, "SO:0001067");
289
290 if(!ajStrGetLen(featMotifNuc))
291 ajStrAssignC(&featMotifNuc, "SO:0000714");
292
293 /*ajDebug("embPatternRegexSearch pos: %d adj: %d reverse: %B\n",
294 pos, adj, reverse, isreversed);*/
295 /*ajDebug("seqlen:%d len: %d offset: %d offend: %d begin: %d end: %d\n",
296 seqlen , ajSeqGetLen(seq), ajSeqGetOffset(seq),
297 ajSeqGetOffend(seq), ajSeqGetBegin(seq), ajSeqGetEnd(seq));*/
298
299 if (reverse)
300 {
301 revseq = ajSeqNewSeq(seq);
302 ajStrAssignSubS(&seqstr, ajSeqGetSeqS(revseq), pos-1, adj-1);
303 ajSeqstrReverse(&seqstr);
304 }
305
306 ajStrAssignSubS(&seqstr, ajSeqGetSeqS(seq), pos-1, adj-1);
307
308 ajStrFmtUpper(&seqstr);
309
310 while(ajStrGetLen(seqstr) && ajRegExec(patexp, seqstr))
311 {
312 off = ajRegOffset(patexp);
313 len = ajRegLenI(patexp, 0);
314
315 if(off || len)
316 {
317 ajRegSubI(patexp, 0, &substr);
318 ajRegPost(patexp, &tmp);
319 ajStrAssignS(&seqstr, substr);
320 ajStrAppendS(&seqstr, tmp);
321 pos += off;
322
323 /*ajDebug("match pos: %d adj: %d len: %d off:%d\n",
324 pos, adj, len, off);*/
325 if (reverse)
326 sf = ajFeatNew(ftable, NULL, featMotifNuc,
327 adj - pos - len + 2,
328 adj - pos + 1,
329 0.0, '-', 0);
330 else
331 {
332 if(ajSeqIsProt(seq) || ajFeattableIsProt(ftable))
333 sf = ajFeatNewProt(ftable, NULL, featMotifProt,
334 pos, pos + len - 1,
335 0.0);
336 else
337 sf = ajFeatNew(ftable, NULL, featMotifNuc,
338 pos, pos + len - 1,
339 0.0, '.', 0);
340 }
341
342 if(isreversed)
343 ajFeatReverse(sf, seqlen);
344
345 ajFmtPrintS(&tmpstr,"*pat %S:%S",
346 ajPatternRegexGetName(pat),
347 ajPatternRegexGetPattern(pat));
348 ajFeatTagAddSS(sf,NULL,tmpstr);
349 pos += 1;
350 ajStrCutStart(&seqstr, 1);
351 }
352 else
353 {
354 pos++;
355 ajStrCutStart(&seqstr, 1);
356 }
357 }
358
359 ajStrDel(&tmpstr);
360 ajStrDel(&tmp);
361 ajStrDel(&substr);
362 ajStrDel(&seqstr);
363
364 if(reverse)
365 ajSeqDel(&revseq);
366
367 return;
368 }
369
370
371
372
373 /* @func embPatternRegexSearchAll *********************************************
374 **
375 ** The search function to find all matches for a single
376 ** regular expression pattern.
377 **
378 ** @param [w] ftable [AjPFeattable] Table of found features
379 ** @param [r] seq [const AjPSeq] Sequence to search
380 ** @param [r] pat [const AjPPatternRegex] Pattern to search with
381 ** @param [r] reverse [AjBool] Sequence has been reversed
382 ** @return [void]
383 **
384 ** @release 4.0.0
385 ** @@
386 ******************************************************************************/
387
embPatternRegexSearchAll(AjPFeattable ftable,const AjPSeq seq,const AjPPatternRegex pat,AjBool reverse)388 void embPatternRegexSearchAll(AjPFeattable ftable, const AjPSeq seq,
389 const AjPPatternRegex pat, AjBool reverse)
390 {
391 ajint pos=0;
392 ajint off;
393 ajint len;
394 AjPFeature sf = NULL;
395 AjPStr substr = NULL;
396 AjPStr seqstr = NULL;
397 AjPStr savestr = NULL;
398 AjPStr tmpstr = NULL;
399 AjPStr tmp = ajStrNew();
400 AjPRegexp patexp = ajPatternRegexGetCompiled(pat);
401 ajint adj;
402 AjBool isreversed;
403 AjPSeq revseq;
404 ajint seqlen;
405 ajuint j;
406 ajuint k;
407
408 seqlen = ajSeqGetLen(seq);
409 if(!seqlen)
410 return;
411
412 isreversed = ajSeqIsReversedTrue(seq);
413
414 if(isreversed)
415 seqlen += ajSeqGetOffset(seq);
416
417 pos = ajSeqGetBeginTrue(seq);
418 adj = ajSeqGetEndTrue(seq);
419
420 if(!ajStrGetLen(featMotifProt))
421 ajStrAssignC(&featMotifProt, "SO:0001067");
422
423 if(!ajStrGetLen(featMotifNuc))
424 ajStrAssignC(&featMotifNuc, "SO:0000714");
425
426 /*ajDebug("embPatternRegexSearch pos: %d adj: %d reverse: %B\n",
427 pos, adj, reverse, isreversed);*/
428 /*ajDebug("seqlen:%d len: %d offset: %d offend: %d begin: %d end: %d\n",
429 seqlen , ajSeqGetLen(seq), ajSeqGetOffset(seq),
430 ajSeqGetOffend(seq), ajSeqGetBegin(seq), ajSeqGetEnd(seq));*/
431
432 if (reverse)
433 {
434 revseq = ajSeqNewSeq(seq);
435 ajStrAssignSubS(&seqstr, ajSeqGetSeqS(revseq), pos-1, adj-1);
436 ajSeqstrReverse(&seqstr);
437 }
438
439 ajStrAssignSubS(&seqstr, ajSeqGetSeqS(seq), pos-1, adj-1);
440
441 ajStrFmtUpper(&seqstr);
442
443 while(ajStrGetLen(seqstr) && ajRegExecall(patexp, seqstr))
444 {
445 j = ajRegGetMatches(patexp);
446
447 /*ajDebug("found %d matches in last %u at pos %d off %d\n",
448 j, ajStrGetLen(seqstr), pos, ajRegOffsetI(patexp,0));*/
449
450 for(k=0; k < j; k++)
451 {
452 off = ajRegOffsetI(patexp,k);
453 len = ajRegLenI(patexp, k);
454
455 if(!k)
456 pos += off;
457
458 ajRegSubI(patexp, k, &substr);
459 ajRegPost(patexp, &tmp);
460 if(!k)
461 {
462 ajStrAssignS(&savestr, substr);
463 ajStrAppendS(&savestr, tmp);
464 }
465
466 /*ajDebug("match pos: %d adj: %d len: %d off:%d\n",
467 pos, adj, len, off);*/
468 if (reverse)
469 sf = ajFeatNew(ftable, NULL, featMotifNuc,
470 adj - pos - len + 2,
471 adj - pos + 1,
472 0.0, '-', 0);
473 else
474 {
475 if(ajSeqIsProt(seq) || ajFeattableIsProt(ftable))
476 sf = ajFeatNewProt(ftable, NULL, featMotifProt,
477 pos, pos + len - 1,
478 0.0);
479 else
480 sf = ajFeatNew(ftable, NULL, featMotifNuc,
481 pos, pos + len - 1,
482 0.0, '.', 0);
483 }
484
485 if(isreversed)
486 ajFeatReverse(sf, seqlen);
487
488 ajFmtPrintS(&tmpstr,"*pat %S:%S",
489 ajPatternRegexGetName(pat),
490 ajPatternRegexGetPattern(pat));
491 ajFeatTagAddSS(sf,NULL,tmpstr);
492 }
493
494 pos++;
495
496 ajStrAssignS(&seqstr, savestr);
497 ajStrCutStart(&seqstr, 1);
498 }
499
500 ajStrDel(&tmpstr);
501 ajStrDel(&tmp);
502 ajStrDel(&substr);
503 ajStrDel(&seqstr);
504 ajStrDel(&savestr);
505
506 if(reverse)
507 ajSeqDel(&revseq);
508
509 return;
510 }
511
512
513
514
515 /* @func embPatternSeqSearch **************************************************
516 **
517 ** The search function for a single sequence pattern.
518 **
519 ** @param [w] ftable [AjPFeattable] Table of found features
520 ** @param [r] seq [const AjPSeq] Sequence to search
521 ** @param [r] pat [const AjPPatternSeq] Pattern to search with
522 ** @param [r] reverse [AjBool] Search reverse sequence as well
523 ** @return [void]
524 **
525 ** @release 4.0.0
526 ** @@
527 ******************************************************************************/
528
embPatternSeqSearch(AjPFeattable ftable,const AjPSeq seq,const AjPPatternSeq pat,AjBool reverse)529 void embPatternSeqSearch (AjPFeattable ftable, const AjPSeq seq,
530 const AjPPatternSeq pat, AjBool reverse)
531 {
532 const void *tidy;
533 ajuint hits;
534 ajuint i;
535 AjPPatComp pattern;
536 EmbPMatMatch m = NULL;
537 AjPFeature sf = NULL;
538 AjPSeq revseq = NULL;
539 AjPList list = ajListNew();
540 AjPStr seqstr = ajStrNew();
541 AjPStr seqname = ajStrNew();
542 AjPStr tmp = ajStrNew();
543 ajint adj;
544 ajint begin;
545 AjBool isreversed;
546 ajint seqlen;
547
548 seqlen = ajSeqGetLen(seq);
549 if(!seqlen)
550 return;
551
552 isreversed = ajSeqIsReversedTrue(seq);
553
554 if(isreversed)
555 seqlen += ajSeqGetOffset(seq);
556
557 begin = ajSeqGetBeginTrue(seq);
558 adj = ajSeqGetEndTrue(seq);
559
560 if(!ajStrGetLen(featMotifProt))
561 ajStrAssignC(&featMotifProt, "SO:0001067");
562
563 if(!ajStrGetLen(featMotifNuc))
564 ajStrAssignC(&featMotifNuc, "SO:0000714");
565
566 ajStrAssignS(&seqname,ajSeqGetNameS(seq));
567 pattern = ajPatternSeqGetCompiled(pat);
568
569 if (reverse)
570 {
571 revseq = ajSeqNewSeq(seq);
572 ajStrAssignSubS(&seqstr, ajSeqGetSeqS(revseq),
573 begin-1,adj-1);
574 ajSeqstrReverse(&seqstr);
575 }
576 else
577 ajStrAssignSubS(&seqstr, ajSeqGetSeqS(seq),
578 begin-1,adj-1);
579
580 ajStrFmtUpper(&seqstr);
581 /*ajDebug("seqlen:%d len: %d offset: %d offend: %d begin: %d end: %d\n"
582 "'%S'\n",
583 seqlen , ajSeqGetLen(seq), ajSeqGetOffset(seq),
584 ajSeqGetOffend(seq), ajSeqGetBegin(seq), ajSeqGetEnd(seq),
585 seqstr);*/
586
587 ajDebug("embPatternSeqSearch '%S' protein: %B reverse: %B\n",
588 pattern->pattern, pat->Protein, reverse);
589 embPatFuzzSearchII(pattern,begin,seqname,seqstr,list,
590 ajPatternSeqGetMismatch(pat),&hits,&tidy);
591
592 ajDebug ("embPatternSeqSearch: found %d hits\n",hits);
593
594 if(!reverse)
595 ajListReverse(list);
596
597 for(i=0;i<hits;++i)
598 {
599 ajListPop(list,(void **)&m);
600
601 if (reverse)
602 sf = ajFeatNew(ftable, NULL, featMotifNuc,
603 adj - m->start - m->len + begin + 1,
604 adj - m->start + begin,
605 0.0, '-', 0);
606 else
607 {
608 if(ajSeqIsProt(seq) || ajFeattableIsProt(ftable))
609 sf = ajFeatNewProt(ftable, NULL, featMotifProt,
610 m->start,
611 m->start + m->len - 1,
612 0.0);
613 else
614 sf = ajFeatNew(ftable, NULL, featMotifNuc,
615 m->start,
616 m->start + m->len - 1,
617 0.0, '.', 0);
618 }
619
620 if(isreversed)
621 ajFeatReverse(sf, seqlen);
622
623 /*
624 ajUser("isrev: %B reverse: %B begin: %d adj: %d "
625 "start: %d len: %d seqlen: %d %d..%d '%c'\n",
626 isreversed, reverse, begin, adj, m->start, m->len, seqlen,
627 sf->Start, sf->End, sf->Strand);
628 */
629
630 ajFeatSetScore(sf, (float) (m->len - m->mm));
631
632 ajFmtPrintS(&tmp, "*pat %S:%S",
633 ajPatternSeqGetName(pat),
634 ajPatternSeqGetPattern(pat));
635 ajFeatTagAddSS(sf,NULL,tmp);
636
637 if(m->mm)
638 {
639 ajFmtPrintS(&tmp, "*mismatch %d", m->mm);
640 ajFeatTagAddSS(sf, NULL, tmp);
641 }
642
643 embMatMatchDel(&m);
644 }
645
646 ajStrDel(&seqname);
647 ajStrDel(&seqstr);
648 ajStrDel(&tmp);
649 ajListFree(&list);
650
651 if (reverse)
652 ajSeqDel(&revseq);
653
654 return;
655 }
656
657
658
659
660 /* @func embPatternSeqSearchAll ***********************************************
661 **
662 ** The search function for a single sequence pattern using the DFA
663 ** regular expression search which finds all matches.
664 **
665 ** @param [w] ftable [AjPFeattable] Table of found features
666 ** @param [r] seq [const AjPSeq] Sequence to search
667 ** @param [r] pat [const AjPPatternSeq] Pattern to search with
668 ** @param [r] reverse [AjBool] Search reverse sequence as well
669 ** @return [void]
670 **
671 ** @release 4.0.0
672 ** @@
673 ******************************************************************************/
674
embPatternSeqSearchAll(AjPFeattable ftable,const AjPSeq seq,const AjPPatternSeq pat,AjBool reverse)675 void embPatternSeqSearchAll(AjPFeattable ftable, const AjPSeq seq,
676 const AjPPatternSeq pat, AjBool reverse)
677 {
678 const void *tidy;
679 ajuint hits;
680 ajuint i;
681 AjPPatComp pattern;
682 EmbPMatMatch m = NULL;
683 AjPFeature sf = NULL;
684 AjPSeq revseq = NULL;
685 AjPList list = ajListNew();
686 AjPStr seqstr = ajStrNew();
687 AjPStr seqname = ajStrNew();
688 AjPStr tmp = ajStrNew();
689 ajint adj;
690 ajint begin;
691 AjBool isreversed;
692 ajint seqlen;
693
694 seqlen = ajSeqGetLen(seq);
695 if(!seqlen)
696 return;
697
698 isreversed = ajSeqIsReversedTrue(seq);
699
700 if(isreversed)
701 seqlen += ajSeqGetOffset(seq);
702
703 begin = ajSeqGetBeginTrue(seq);
704 adj = ajSeqGetEndTrue(seq);
705
706 if(!ajStrGetLen(featMotifProt))
707 ajStrAssignC(&featMotifProt, "SO:0001067");
708
709 if(!ajStrGetLen(featMotifNuc))
710 ajStrAssignC(&featMotifNuc, "SO:0000714");
711
712 ajStrAssignS(&seqname,ajSeqGetNameS(seq));
713 pattern = ajPatternSeqGetCompiled(pat);
714
715 if (reverse)
716 {
717 revseq = ajSeqNewSeq(seq);
718 ajStrAssignSubS(&seqstr, ajSeqGetSeqS(revseq),
719 begin-1,adj-1);
720 ajSeqstrReverse(&seqstr);
721 }
722 else
723 ajStrAssignSubS(&seqstr, ajSeqGetSeqS(seq),
724 begin-1,adj-1);
725
726 ajStrFmtUpper(&seqstr);
727 /*ajDebug("seqlen:%d len: %d offset: %d offend: %d begin: %d end: %d\n"
728 "'%S'\n",
729 seqlen , ajSeqGetLen(seq), ajSeqGetOffset(seq),
730 ajSeqGetOffend(seq), ajSeqGetBegin(seq), ajSeqGetEnd(seq),
731 seqstr);*/
732
733 ajDebug("embPatternSeqSearchAll '%S' protein: %B reverse: %B\n",
734 pattern->pattern, pat->Protein, reverse);
735 embPatFuzzSearchAllII(pattern,begin,seqname,seqstr,list,
736 ajPatternSeqGetMismatch(pat),&hits,&tidy);
737
738 ajDebug ("embPatternSeqSearchAll: found %d hits\n",hits);
739
740 if(!reverse)
741 ajListReverse(list);
742
743 for(i=0;i<hits;++i)
744 {
745 ajListPop(list,(void **)&m);
746
747 if (reverse)
748 sf = ajFeatNew(ftable, NULL, featMotifNuc,
749 adj - m->start - m->len + begin + 1,
750 adj - m->start + begin,
751 0.0, '-', 0);
752 else
753 {
754 if(ajSeqIsProt(seq) || ajFeattableIsProt(ftable))
755 sf = ajFeatNewProt(ftable, NULL, featMotifProt,
756 m->start,
757 m->start + m->len - 1,
758 0.0);
759 else
760 sf = ajFeatNew(ftable, NULL, featMotifNuc,
761 m->start,
762 m->start + m->len - 1,
763 0.0, '.', 0);
764 }
765
766 if(isreversed)
767 ajFeatReverse(sf, seqlen);
768
769 /*
770 ajUser("isrev: %B reverse: %B begin: %d adj: %d "
771 "start: %d len: %d seqlen: %d %d..%d '%c'\n",
772 isreversed, reverse, begin, adj, m->start, m->len, seqlen,
773 sf->Start, sf->End, sf->Strand);
774 */
775
776 ajFeatSetScore(sf, (float) (m->len - m->mm));
777
778 ajFmtPrintS(&tmp, "*pat %S:%S",
779 ajPatternSeqGetName(pat),
780 ajPatternSeqGetPattern(pat));
781 ajFeatTagAddSS(sf,NULL,tmp);
782
783 if(m->mm)
784 {
785 ajFmtPrintS(&tmp, "*mismatch %d", m->mm);
786 ajFeatTagAddSS(sf, NULL, tmp);
787 }
788
789 embMatMatchDel(&m);
790 }
791
792 ajStrDel(&seqname);
793 ajStrDel(&seqstr);
794 ajStrDel(&tmp);
795 ajListFree(&list);
796
797 if (reverse)
798 ajSeqDel(&revseq);
799
800 return;
801 }
802
803
804
805
806 /* @func embPatternSeqCompile *************************************************
807 **
808 ** Adds compiled pattern into AjPPattern.
809 **
810 ** @param [w] pat [AjPPatternSeq] Pattern for compiling
811 ** @return [AjBool] True, if compilation succeeded
812 **
813 ** @release 4.0.0
814 ** @@
815 ******************************************************************************/
embPatternSeqCompile(AjPPatternSeq pat)816 AjBool embPatternSeqCompile (AjPPatternSeq pat)
817 {
818 AjPPatComp embpat;
819 AjBool embType;
820 AjPStr pattern = NULL;
821
822 ajStrAssignS(&pattern,ajPatternSeqGetPattern(pat));
823 ajStrFmtUpper(&pattern);
824 ajDebug("embPatlistSeqCompile: name %S, pattern %S\n",
825 ajPatternSeqGetName(pat),pattern);
826
827 embpat = ajPatCompNew();
828
829 if (ajPatternSeqGetProtein(pat))
830 embType=ajTrue;
831 else
832 embType=ajFalse;
833
834 if (!embPatGetTypeII(embpat,pattern,
835 ajPatternSeqGetMismatch(pat),embType))
836 {
837 ajDebug("embPatlistSeqCompile: Illegal pattern %S: '%S'\n",
838 ajPatternSeqGetName(pat),ajPatternSeqGetPattern(pat));
839 ajPatCompDel(&embpat);
840 ajStrDel(&pattern);
841
842 return ajFalse;
843 }
844
845 embPatCompileII(embpat,ajPatternSeqGetMismatch(pat));
846 ajPatternSeqSetCompiled(pat,embpat);
847 ajStrDel(&pattern);
848
849 return ajTrue;
850 }
851
852
853
854
855 /* @func embPatlistExit *******************************************************
856 **
857 ** Cleanup pattern list indexing internals on exit
858 **
859 ** @return [void]
860 **
861 ** @release 6.1.0
862 ******************************************************************************/
863
embPatlistExit(void)864 void embPatlistExit(void)
865 {
866 ajStrDel(&featMotifNuc);
867 ajStrDel(&featMotifProt);
868
869 return;
870 }
871