1 /* @source emowse application
2 **
3 ** Finds proteins matching mass spectrometry data
4 **
5 ** @author Copyright (C) Alan Bleasby (ableasby@hgmp.mrc.ac.uk)
6 ** @@
7 **
8 ** This program is free software; you can redistribute it and/or
9 ** modify it under the terms of the GNU General Public License
10 ** as published by the Free Software Foundation; either version 2
11 ** of the License, or (at your option) any later version.
12 **
13 ** This program is distributed in the hope that it will be useful,
14 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
15 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 ** GNU General Public License for more details.
17 **
18 ** You should have received a copy of the GNU General Public License
19 ** along with this program; if not, write to the Free Software
20 ** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
21 ******************************************************************************/
22
23 #include "emboss.h"
24 #include <math.h>
25 #include <string.h>
26 #include <errno.h>
27
28
29
30
31 #define FGUESS 128000 /* Uncritical guess of freq data size */
32 #define MILLION (double)1000000.
33 #define MAXLIST 50
34
35
36
37
38 /* @datastatic EmbPMdata ******************************************************
39 **
40 ** EMowse data
41 **
42 ** @alias EmbSMdata
43 ** @alias EmbOMdata
44 **
45 ** @attr mwt [double] Molecular weight
46 ** @attr sdata [AjPStr] string data
47 ******************************************************************************/
48
49 typedef struct EmbSMdata
50 {
51 double mwt;
52 AjPStr sdata;
53 } EmbOMdata;
54 #define EmbPMdata EmbOMdata*
55
56
57
58
59 /* @datastatic PHits **********************************************************
60 **
61 ** EMowse hits
62 **
63 ** @alias SHits
64 ** @alias OHits
65 **
66 ** @attr seq [AjPStr] Sequence
67 ** @attr name [AjPStr] Name
68 ** @attr desc [AjPStr] Description
69 ** @attr found [AjPInt] Found data
70 ** @attr score [double] Score
71 ** @attr mwt [double] Molecular weight
72 ** @attr frags [EmbPMolFrag*] Fragment data
73 ** @attr nf [ajint] Number of fragments
74 ** @attr Padding [char[4]] Padding to alignment boundary
75 ******************************************************************************/
76
77 typedef struct SHits
78 {
79 AjPStr seq;
80 AjPStr name;
81 AjPStr desc;
82 AjPInt found;
83 double score;
84 double mwt;
85 EmbPMolFrag* frags;
86 ajint nf;
87 char Padding[4];
88 } OHits;
89 #define PHits OHits*
90
91
92
93
94 static void emowse_read_freqs(AjPFile finf, AjPDouble *freqs);
95 static AjBool emowse_molwt_outofrange(double thys, double given, double range);
96 static ajint emowse_read_data(AjPFile inf, EmbPMdata** data);
97 static ajint emowse_sort_data(const void *a, const void *b);
98 static ajint emowse_hit_sort(const void *a, const void *b);
99 static void emowse_match(EmbPMdata const * data, ajint dno, AjPList flist,
100 ajint nfrags, double tol,
101 const AjPSeq seq, AjPList hlist,
102 double partials, double cmw, ajint enz,
103 const AjPDouble freqs);
104
105 static ajint emowse_seq_comp(ajint bidx, ajint thys, const AjPSeq seq,
106 EmbPMdata const *data, EmbPMolFrag const *frags);
107 static ajint emowse_get_index(double actmw, double maxmw, double minmw,
108 EmbPMolFrag const *frags,
109 ajint fno, double *bestmw,
110 ajint *myindex, ajint thys, const AjPSeq seq,
111 EmbPMdata const *data);
112
113 static ajint emowse_seq_search(const AjPStr substr, AjPStr* s);
114 static AjBool emowse_msearch(const char *seq, const char *pat, AjBool term);
115 static void emowse_mreverse(char *s);
116 static ajint emowse_get_orc(AjPStr *orc, const char *s, ajint pos);
117 static AjBool emowse_comp_search(const AjPStr substr, const char *s);
118 static void emowse_print_hits(AjPFile outf, AjPList hlist, ajint dno,
119 EmbPMdata const * data);
120
121
122
123
124 /* @prog emowse ***************************************************************
125 **
126 ** Protein identification by mass spectrometry
127 **
128 ******************************************************************************/
129
main(int argc,char ** argv)130 int main(int argc, char **argv)
131 {
132 AjPSeq seq = NULL;
133 AjPSeqall seqall = NULL;
134 AjPFile outf = NULL;
135 AjPFile mwinf = NULL;
136 AjPFile ffile = NULL;
137 AjPStr enzyme = NULL;
138 ajint smolwt;
139 ajint range;
140 float tol;
141 float partials;
142 AjPDouble freqs = NULL;
143 ajint begin;
144 ajint end;
145 double smw;
146 ajint rno;
147 ajint i;
148
149 AjPList flist = NULL;
150 EmbPMdata *data = NULL;
151 ajint dno;
152 ajint nfrags;
153 AjPList hlist = NULL;
154 AjPFile mfptr = NULL;
155
156 EmbPPropMolwt *mwdata = NULL;
157 AjBool mono;
158
159 embInit("emowse", argc, argv);
160
161 seqall = ajAcdGetSeqall("sequence");
162 mwinf = ajAcdGetInfile("infile");
163 enzyme = ajAcdGetListSingle("enzyme");
164 smolwt = ajAcdGetInt("weight");
165 range = ajAcdGetInt("pcrange");
166 ffile = ajAcdGetDatafile("frequencies");
167 tol = ajAcdGetFloat("tolerance");
168 partials = ajAcdGetFloat("partials");
169 outf = ajAcdGetOutfile("outfile");
170 mfptr = ajAcdGetDatafile("mwdata");
171 mono = ajAcdGetBoolean("mono");
172
173 mwdata = embPropEmolwtRead(mfptr);
174
175 freqs = ajDoubleNewRes(FGUESS);
176 emowse_read_freqs(ffile, &freqs);
177 ajFileClose(&ffile);
178
179 if(ajFmtScanS(enzyme,"%d",&rno)!=1)
180 ajFatal("Illegal enzyme entry [%S]",enzyme);
181
182
183 if(!(dno = emowse_read_data(mwinf,&data)))
184 ajFatal("No molecular weights in the file");
185 ajFileClose(&mwinf);
186
187
188 hlist = ajListNew();
189
190
191 while(ajSeqallNext(seqall,&seq))
192 {
193 begin = ajSeqallGetseqBegin(seqall);
194 end = ajSeqallGetseqEnd(seqall);
195
196
197 smw = embPropCalcMolwt(ajSeqGetSeqC(seq),--begin,--end,mwdata,mono);
198 if(smolwt)
199 if(emowse_molwt_outofrange(smw,(double)smolwt,(double)range))
200 continue;
201
202 flist = ajListNew();
203 nfrags = embMolGetFrags(ajSeqGetSeqS(seq),rno,mwdata,mono,&flist);
204
205 emowse_match(data,dno,flist,nfrags,(double)tol,seq,hlist,
206 (double)partials,
207 smw,rno,freqs);
208
209 ajListFree(&flist);
210 }
211
212
213 emowse_print_hits(outf,hlist,dno,data);
214 for(i=0;i<dno;i++)
215 {
216 ajStrDel(&data[i]->sdata);
217 AJFREE(data[i]);
218 }
219 AJFREE(data);
220
221 ajListFree(&hlist);
222
223 ajFileClose(&mfptr);
224
225 ajSeqallDel(&seqall);
226 ajSeqDel(&seq);
227 ajFileClose(&outf);
228 ajStrDel(&enzyme);
229
230 ajDoubleDel(&freqs);
231 ajListFree(&flist);
232
233 embPropMolwtDel(&mwdata);
234
235 embExit();
236
237 return 0;
238 }
239
240
241
242
243 /* @funcstatic emowse_read_freqs **********************************************
244 **
245 ** Undocumented.
246 **
247 ** @param [u] finf [AjPFile] Undocumented
248 ** @param [w] freqs [AjPDouble*] Undocumented
249 ** @return [void]
250 ** @@
251 ******************************************************************************/
252
emowse_read_freqs(AjPFile finf,AjPDouble * freqs)253 static void emowse_read_freqs(AjPFile finf, AjPDouble *freqs)
254 {
255 ajint c = 0;
256 AjPStr str = NULL;
257 double f = 0.0;
258
259 str = ajStrNew();
260
261 while(ajReadlineTrim(finf,&str))
262 {
263 if(!ajStrToDouble(str, &f))
264 ajWarn("Bad record in frequencies file '%F': '%S'\n",
265 finf, str);
266
267 ajDoublePut(freqs,c,f);
268
269 ++c;
270 }
271
272 ajStrDel(&str);
273
274 return;
275 }
276
277
278
279
280 /* @funcstatic emowse_molwt_outofrange ****************************************
281 **
282 ** Undocumented.
283 **
284 ** @param [r] thys [double] Undocumented
285 ** @param [r] given [double] Undocumented
286 ** @param [r] range [double] Undocumented
287 ** @return [AjBool] Undocumented
288 ** @@
289 ******************************************************************************/
290
emowse_molwt_outofrange(double thys,double given,double range)291 static AjBool emowse_molwt_outofrange(double thys, double given, double range)
292 {
293 double diff;
294
295 diff = given * range / (double)100.;
296
297 if(thys>=given-diff && thys<=given+diff)
298 return ajFalse;
299
300 return ajTrue;
301 }
302
303
304
305
306 /* @funcstatic emowse_read_data ***********************************************
307 **
308 ** Undocumented.
309 **
310 ** @param [u] inf [AjPFile] Undocumented
311 ** @param [w] data [EmbPMdata**] Undocumented
312 ** @return [ajint] Undocumented
313 ** @@
314 ******************************************************************************/
315
emowse_read_data(AjPFile inf,EmbPMdata ** data)316 static ajint emowse_read_data(AjPFile inf, EmbPMdata** data)
317 {
318 AjPStr str = NULL;
319 double v;
320 AjPStrTok token = NULL;
321 EmbPMdata ptr = NULL;
322 AjPList l;
323 ajint n;
324
325 str = ajStrNew();
326 l = ajListNew();
327
328 while(ajReadlineTrim(inf,&str))
329 if(sscanf(ajStrGetPtr(str),"%lf",&v)==1)
330 {
331 AJNEW0(ptr);
332 ptr->mwt = v;
333 ptr->sdata=ajStrNew();
334 ajStrRemoveWhiteExcess(&str);
335 token = ajStrTokenNewC(str," \t\r\n");
336 ajStrTokenNextParseC(token," \t\r\n",&ptr->sdata);
337 ajStrTokenNextParseC(token," \t\r\n",&ptr->sdata);
338 ajStrTokenDel(&token);
339 ajListPush(l,(void *)ptr);
340 }
341
342 ajListSort(l, &emowse_sort_data);
343 n = (ajuint) ajListToarray(l,(void ***)data);
344 ajListFree(&l);
345 ajStrDel(&str);
346 ajStrTokenDel(&token);
347
348 return n;
349 }
350
351
352
353
354 /* @funcstatic emowse_sort_data ***********************************************
355 **
356 ** Undocumented.
357 **
358 ** @param [r] a [const void*] Undocumented
359 ** @param [r] b [const void*] Undocumented
360 ** @return [ajint] Undocumented
361 ** @@
362 ******************************************************************************/
363
emowse_sort_data(const void * a,const void * b)364 static ajint emowse_sort_data(const void *a, const void *b)
365 {
366 return (ajint)((*(EmbPMdata const *)a)->mwt -
367 (*(EmbPMdata const *)b)->mwt);
368 }
369
370
371
372
373 /* @funcstatic emowse_hit_sort ************************************************
374 **
375 ** Undocumented.
376 **
377 ** @param [r] a [const void*] Undocumented
378 ** @param [r] b [const void*] Undocumented
379 ** @return [ajint] Undocumented
380 ** @@
381 ******************************************************************************/
382
emowse_hit_sort(const void * a,const void * b)383 static ajint emowse_hit_sort(const void *a, const void *b)
384 {
385 double x;
386 double y;
387
388 x = (*(PHits const *)a)->score;
389 y = (*(PHits const *)b)->score;
390
391 if(x==y)
392 return 0;
393 else if(x<y)
394 return -1;
395
396 return 1;
397 }
398
399
400
401
402 /* @funcstatic emowse_match ***************************************************
403 **
404 ** Undocumented.
405 **
406 ** @param [r] data [EmbPMdata const *] Undocumented
407 ** @param [r] dno [ajint] Undocumented
408 ** @param [u] flist [AjPList] Undocumented
409 ** @param [r] nfrags [ajint] Undocumented
410 ** @param [r] tol [double] Undocumented
411 ** @param [r] seq [const AjPSeq] Undocumented
412 ** @param [u] hlist [AjPList] Undocumented
413 ** @param [r] partials [double] Undocumented
414 ** @param [r] cmw [double] Undocumented
415 ** @param [r] rno [ajint] Undocumented
416 ** @param [r] freqs [const AjPDouble] Undocumented
417 ** @@
418 ******************************************************************************/
419
emowse_match(EmbPMdata const * data,ajint dno,AjPList flist,ajint nfrags,double tol,const AjPSeq seq,AjPList hlist,double partials,double cmw,ajint rno,const AjPDouble freqs)420 static void emowse_match(EmbPMdata const * data, ajint dno, AjPList flist,
421 ajint nfrags, double tol,
422 const AjPSeq seq, AjPList hlist,
423 double partials, double cmw, ajint rno,
424 const AjPDouble freqs)
425 {
426 double actmw;
427 double minmw;
428 double maxmw;
429 double smw;
430 ajint ft;
431 double qtol;
432 double f;
433 double sumf;
434 double bestmw = 0.;
435 static double min = (double)0.;
436 static ajint n = 0;
437
438 ajint i;
439 ajint j;
440 ajint nd;
441
442 ajint x;
443 ajint myindex;
444 AjBool ispart = ajFalse;
445 ajint isumf;
446 AjPInt found = NULL;
447 PHits hits = NULL;
448 EmbPMolFrag *frags = NULL;
449
450
451 ajListReverse(flist);
452 ajListToarray(flist,(void ***)&frags);
453
454
455 sumf = (double)1.;
456
457 smw = cmw;
458
459
460 cmw /= (double)10000.;
461 if(cmw>(double)79.)
462 cmw = (double)79.;
463
464
465 found = ajIntNew();
466
467
468 for(i=0;i<dno;++i)
469 {
470 actmw = data[i]->mwt;
471 qtol = actmw / (double)100.;
472 minmw = actmw - (tol*qtol);
473 if(minmw<(double)0.)
474 minmw = (double)0.;
475 maxmw = actmw + (tol*qtol);
476
477 x = emowse_get_index(actmw,maxmw,minmw,frags,nfrags,&bestmw,&myindex,
478 i,seq,data);
479
480 if(bestmw > MILLION)
481 {
482 bestmw -= MILLION;
483 ispart = ajTrue;
484 }
485 else
486 ispart = ajFalse;
487
488 if(x == -2)
489 {
490 for(j=0;j<dno;++j)
491 ajIntPut(&found,j,-1);
492 break;
493 }
494
495
496 if(x<0)
497 {
498 ajIntPut(&found,i,-1);
499 continue;
500 }
501 ajIntPut(&found,i,x);
502
503 if(ispart)
504 bestmw *= partials;
505
506 if(ispart && bestmw < (double)101.)
507 f = (double).99;
508 else
509 {
510 f = bestmw / (double)100.;
511 if((ajint)f >= 200)
512 f = (double) 199.;
513
514 ft = (rno-1)*16000 + (ajint)cmw*200 + (ajint)f;
515 f = ajDoubleGet(freqs,ft);
516 if(!f)
517 continue;
518 }
519
520 sumf *= f;
521 if(sumf < (double)1.0e-31)
522 sumf = (double)1.0e-31;
523 }
524
525
526 isumf = (ajint)sumf;
527
528 sumf = (double)1. / sumf;
529 sumf *= (double)50000. / (cmw*(double)10000.);
530
531
532 if(n<MAXLIST && isumf!=1)
533 {
534 AJNEW0(hits);
535 hits->seq = ajSeqGetSeqCopyS(seq);
536 hits->desc = ajStrNewC(ajStrGetPtr(ajSeqGetDescS(seq)));
537 hits->found = found;
538 hits->score = sumf;
539 hits->mwt = smw;
540 hits->name = ajStrNewC(ajSeqGetNameC(seq));
541 hits->frags = frags;
542 hits->nf = nfrags;
543 ajListPush(hlist,(void *)hits);
544 ajListSort(hlist, &emowse_hit_sort);
545 ajListPop(hlist,(void **)&hits);
546 min = hits->score;
547 ajListPush(hlist,(void *)hits);
548 ++n;
549 }
550 else if(sumf>min && isumf!=1)
551 {
552 ajListPop(hlist,(void **)&hits);
553 ajStrDel(&hits->seq);
554 ajStrDel(&hits->name);
555 ajStrDel(&hits->desc);
556 ajIntDel(&hits->found);
557 nd = hits->nf;
558 for(i=0;i<nd;++i)
559 AJFREE(hits->frags[i]);
560
561 AJFREE(hits->frags);
562
563 AJFREE(hits);
564
565 AJNEW0(hits);
566 hits->seq = ajSeqGetSeqCopyS(seq);
567 hits->desc = ajStrNewC(ajStrGetPtr(ajSeqGetDescS(seq)));
568 hits->found = found;
569 hits->name = ajStrNewC(ajSeqGetNameC(seq));
570 hits->score = sumf;
571 hits->mwt = smw;
572 hits->frags = frags;
573 hits->nf = nfrags;
574
575 ajListPush(hlist,(void *)hits);
576 ajListSort(hlist, &emowse_hit_sort);
577 ajListPop(hlist,(void **)&hits);
578 min = hits->score;
579 ajListPush(hlist,(void *)hits);
580 }
581 else
582 {
583 ajIntDel(&found);
584 for(i=0;i<nfrags;++i)
585 AJFREE(frags[i]);
586 AJFREE(frags);
587 }
588
589 return;
590 }
591
592
593
594
595 /* @funcstatic emowse_get_index ***********************************************
596 **
597 ** Undocumented.
598 **
599 ** @param [r] actmw [double] Undocumented
600 ** @param [r] maxmw [double] Undocumented
601 ** @param [r] minmw [double] Undocumented
602 ** @param [r] frags [EmbPMolFrag const *] Undocumented
603 ** @param [r] fno [ajint] Undocumented
604 ** @param [w] bestmw [double*] Undocumented
605 ** @param [w] myindex [ajint*] Undocumented
606 ** @param [r] thys [ajint] Undocumented
607 ** @param [r] seq [const AjPSeq] Undocumented
608 ** @param [r] data [EmbPMdata const *] Undocumented
609 ** @return [ajint] Undocumented
610 ** @@
611 ******************************************************************************/
612
emowse_get_index(double actmw,double maxmw,double minmw,EmbPMolFrag const * frags,ajint fno,double * bestmw,ajint * myindex,ajint thys,const AjPSeq seq,EmbPMdata const * data)613 static ajint emowse_get_index(double actmw, double maxmw, double minmw,
614 EmbPMolFrag const *frags,
615 ajint fno, double *bestmw,
616 ajint *myindex, ajint thys, const AjPSeq seq,
617 EmbPMdata const *data)
618 {
619 double mw1;
620 double mw2;
621 double best;
622 ajint cnt;
623 ajint fl;
624 ajint bidx = 0;
625
626
627 cnt = fl = 0;
628 best = (double)-1.0;
629
630 /*
631 ** This assumes the lowest array entry has been sorted to be the highest
632 ** mw
633 */
634
635 while(cnt<fno)
636 {
637 mw1 = frags[cnt]->mwt;
638
639 if(mw1>MILLION || mw1>maxmw)
640 {
641 ++cnt;
642 continue;
643 }
644
645 if(mw1<minmw)
646 break;
647
648
649 best = mw1;
650 bidx = cnt;
651
652
653 while(cnt+1<fno)
654 {
655 mw2 = frags[cnt+1]->mwt;
656
657 if(mw2>MILLION)
658 {
659 ++cnt;
660 continue;
661
662 }
663
664 if(abs((int)(mw2-actmw))>abs((int)(mw1-actmw)))
665 break;
666 else
667 {
668 mw1 = best = mw2;
669 bidx = cnt;
670 ++cnt;
671 }
672 }
673
674 break;
675 }
676
677 fl = (cnt==fno) ? 1 : 0;
678
679 if(best != (double)-1.)
680 {
681 if(!emowse_seq_comp(bidx,thys,seq,data,frags))
682 return -1;
683 *bestmw = best;
684 *myindex = bidx;
685 return bidx;
686 }
687
688 cnt = 0;
689 while(cnt<fno)
690 {
691 mw1 = frags[cnt]->mwt;
692 if(mw1<MILLION)
693 {
694 ++cnt;
695 continue;
696 }
697 mw1 -= MILLION;
698
699 if(mw1<minmw)
700 break;
701
702 if(mw1>maxmw)
703 {
704 ++cnt;
705 continue;
706 }
707
708 best = mw1;
709 bidx = cnt;
710 while(cnt+1>=fno)
711 {
712 mw2 = frags[cnt+1]->mwt;
713
714 if(mw2<MILLION)
715 {
716 ++cnt;
717 continue;
718 }
719 mw2 -= MILLION;
720
721 if(fabs(mw2-actmw)>fabs(mw1-actmw))
722 break;
723 else
724 {
725 mw1 = best = mw2;
726 bidx = cnt++;
727 }
728 }
729
730 break;
731 }
732
733 if(cnt==fno && fl)
734 return -2;
735
736 if(best == (double)-1.)
737 return -1;
738
739 if(!emowse_seq_comp(bidx,thys,seq,data,frags))
740 return -1;
741
742 *bestmw = best + MILLION;
743 *myindex = bidx + (ajint)MILLION;
744
745 return *myindex;
746 }
747
748
749
750
751 /* @funcstatic emowse_seq_comp ************************************************
752 **
753 ** Undocumented.
754 **
755 ** @param [r] bidx [ajint] Undocumented
756 ** @param [r] thys [ajint] Undocumented
757 ** @param [r] seq [const AjPSeq] Undocumented
758 ** @param [r] data [EmbPMdata const *] Undocumented
759 ** @param [r] frags [EmbPMolFrag const *] Undocumented
760 ** @return [ajint] Undocumented
761 ** @@
762 ******************************************************************************/
763
emowse_seq_comp(ajint bidx,ajint thys,const AjPSeq seq,EmbPMdata const * data,EmbPMolFrag const * frags)764 static ajint emowse_seq_comp(ajint bidx, ajint thys, const AjPSeq seq,
765 EmbPMdata const *data, EmbPMolFrag const *frags)
766 {
767 ajint beg;
768 ajint end;
769 ajint len;
770 AjPStr result = NULL;
771 const AjPStr str = NULL;
772 AjPStr substr = NULL;
773 AjPStrTok token = NULL;
774 const char *p;
775
776
777 if(!ajStrGetLen(data[thys]->sdata))
778 return 1;
779
780 beg = frags[bidx]->begin - 1;
781 end = frags[bidx]->end - 1;
782
783 str = ajSeqGetSeqS(seq);
784
785 result = ajStrNew();
786 substr = ajStrNew();
787 ajStrAssignSubS(&substr,str,beg,end);
788 ajStrFmtUpper(&substr);
789
790 token = ajStrTokenNewC(data[thys]->sdata," \r\t\n");
791
792 while(ajStrTokenNextParseC(token," \r\t\n",&result))
793 {
794 len = ajStrGetLen(result);
795 ajStrFmtUpper(&result);
796 p = ajStrGetPtr(result);
797
798 if(p[len-1]!=')')
799 ajFatal("Missing ')' in subline %S",substr);
800
801 if(ajStrPrefixC(result,"SEQ("))
802 {
803 ajStrAssignC(&result,p+4);
804 ajStrPasteCountK(&result,5,'\0',1);
805 if(!emowse_seq_search(substr,&result))
806 return 0;
807 }
808 else if(ajStrPrefixC(result,"COMP("))
809 {
810 ajStrAssignC(&result,p+5);
811 ajStrPasteCountK(&result, 5, '\0', 1);
812 if(!emowse_comp_search(substr,ajStrGetPtr(result)))
813 return 0;
814 }
815 else
816 ajFatal("Unknown Query type [%S]",result);
817
818 }
819 ajStrTokenDel(&token);
820
821
822 ajStrDel(&substr);
823 ajStrDel(&result);
824
825 return 1;
826 }
827
828
829
830
831 /* @funcstatic emowse_mreverse ************************************************
832 **
833 ** Undocumented.
834 **
835 ** @param [u] s [char*] Undocumented
836 ** @@
837 ******************************************************************************/
838
emowse_mreverse(char * s)839 static void emowse_mreverse(char *s)
840 {
841 ajint i;
842 ajint len;
843 char *p;
844 AjPStr rev;
845 size_t stlen;
846
847 rev = ajStrNewC(s);
848 ajStrReverse(&rev);
849 p = ajStrGetuniquePtr(&rev);
850
851 stlen = strlen(s);
852 len = (ajint) stlen;
853
854 for(i=0;i<len;++i)
855 {
856 if(p[i]==']')
857 {
858 p[i]='[';
859 continue;
860 }
861
862 if(p[i]=='[')
863 p[i]=']';
864 }
865
866 strcpy(s,ajStrGetPtr(rev));
867
868 ajStrDel(&rev);
869
870 return;
871 }
872
873
874
875
876 /* @funcstatic emowse_seq_search **********************************************
877 **
878 ** Undocumented.
879 **
880 ** @param [r] substr [const AjPStr] Undocumented
881 ** @param [u] str [AjPStr*] Undocumented
882 ** @return [AjBool] Undocumented
883 ** @@
884 ******************************************************************************/
885
emowse_seq_search(const AjPStr substr,AjPStr * str)886 static AjBool emowse_seq_search(const AjPStr substr, AjPStr *str)
887 {
888 const char *p;
889 char *q;
890 char* s;
891 static AjPStr t = NULL;
892
893 if(!t)
894 t = ajStrNew();
895
896 p = ajStrGetPtr(substr);
897 ajStrAssignC(&t,p);
898 q = ajStrGetuniquePtr(&t);
899 s = ajStrGetuniquePtr(str);
900
901 if(!strncmp(s,"B-",2))
902 {
903 if(!emowse_msearch(q,s+2,ajFalse))
904 return ajFalse;
905 }
906 else if(!strncmp(s,"N-",2))
907 {
908 if(!emowse_msearch(q,s+2,ajTrue))
909 return ajFalse;
910 }
911 else if(!strncmp(s,"C-",2))
912 {
913 emowse_mreverse(s+2);
914 emowse_mreverse(q);
915
916 if(!emowse_msearch(q,s+2,ajTrue))
917 return ajFalse;
918 }
919 else if(!strncmp(s,"*-",2))
920 {
921 if(!emowse_msearch(q,s+2,ajFalse))
922 return ajTrue;
923 emowse_mreverse(s+2);
924
925 if(!emowse_msearch(q,s+2,ajFalse))
926 return ajFalse;
927 }
928
929 return ajTrue;
930 }
931
932
933
934
935 /* @funcstatic emowse_msearch *************************************************
936 **
937 ** Undocumented.
938 **
939 ** @param [r] seq [const char*] Undocumented
940 ** @param [r] pat [const char*] Undocumented
941 ** @param [r] term [AjBool] Undocumented
942 ** @return [AjBool] Undocumented
943 ** @@
944 ******************************************************************************/
945
emowse_msearch(const char * seq,const char * pat,AjBool term)946 static AjBool emowse_msearch(const char *seq, const char *pat, AjBool term)
947 {
948 AjPStr orc = NULL;
949
950 ajint qpos;
951 ajint fpos;
952
953 AjBool fl;
954 ajint i;
955 ajint t;
956 ajint ofpos;
957 const char *p;
958
959 qpos = ofpos = fpos = 0;
960 fl = ajFalse;
961
962
963 orc = ajStrNew();
964
965 while(pat[qpos])
966 {
967 if((term && fl) || !seq[fpos])
968 {
969 ajStrDel(&orc);
970 return ajFalse;
971 }
972
973
974 if(pat[qpos]=='X')
975 {
976 ++qpos;
977 ++fpos;
978 continue;
979 }
980
981 if(pat[qpos]=='[')
982 {
983 ++qpos;
984 while(pat[qpos]!=']')
985 {
986 if(!pat[qpos])
987 ajFatal("Missing ']' in term %s",pat);
988
989 ajStrAppendK(&orc,pat[qpos++]);
990 }
991
992 t = ajStrGetLen(orc);
993 p = ajStrGetPtr(orc);
994 for(i=0;i<t;++i)
995 if(p[i]==seq[fpos])
996 break;
997
998 if(t==i)
999 {
1000 fpos = ++ofpos;
1001 qpos=0;
1002 fl=ajTrue;
1003 continue;
1004 }
1005 else
1006 {
1007 ++fpos;
1008 ++qpos;
1009 continue;
1010 }
1011 }
1012
1013 if(pat[qpos]==seq[fpos])
1014 {
1015 ++fpos;
1016 ++qpos;
1017 continue;
1018 }
1019 else
1020 {
1021 fpos = ++ofpos;
1022 qpos = 0;
1023 fl = ajTrue;
1024 }
1025 }
1026
1027
1028 ajStrDel(&orc);
1029
1030 return ajTrue;
1031 }
1032
1033
1034
1035
1036 /* @funcstatic emowse_comp_search *********************************************
1037 **
1038 ** Undocumented.
1039 **
1040 ** @param [r] substr [const AjPStr] Undocumented
1041 ** @param [r] s [const char*] Undocumented
1042 ** @return [AjBool] Undocumented
1043 ** @@
1044 ******************************************************************************/
1045
emowse_comp_search(const AjPStr substr,const char * s)1046 static AjBool emowse_comp_search(const AjPStr substr, const char *s)
1047 {
1048 AjPInt arr;
1049 ajint i;
1050 ajint len;
1051 ajint v;
1052 ajint n;
1053 ajint w;
1054
1055 ajint qpos;
1056 char c;
1057 AjPStr orc;
1058 const char *r;
1059
1060
1061 const char *p;
1062 AjPStr t;
1063
1064 p = ajStrGetPtr(substr);
1065 len = ajStrGetLen(substr);
1066
1067 arr = ajIntNewRes(256);
1068
1069 for(i=0;i<256;++i)
1070 ajIntPut(&arr,i,0);
1071
1072 for(i=0;i<len;++i)
1073 {
1074 v = ajIntGet(arr,(ajint)p[i]);
1075 ajIntPut(&arr,(ajint)p[i],v+1);
1076 }
1077
1078
1079 t = ajStrNewC(s);
1080 ajStrRemoveWhite(&t);
1081
1082 p = ajStrGetPtr(t);
1083 qpos = 0;
1084 orc = ajStrNew();
1085
1086 while((c=p[qpos]))
1087 {
1088 if(c=='*')
1089 {
1090 n = emowse_get_orc(&orc,p,qpos);
1091 r = ajStrGetPtr(orc);
1092 qpos += (n+3);
1093 for(i=0;i<n;++i)
1094 if(ajIntGet(arr,r[i]))
1095 break;
1096
1097 if(i==n)
1098 {
1099 ajStrDel(&t);
1100 ajStrDel(&orc);
1101 ajIntDel(&arr);
1102 return ajFalse;
1103 }
1104
1105 continue;
1106 }
1107
1108 i = qpos;
1109 while((c=p[i])>='0' && c<='9')
1110 ++i;
1111
1112 if(i==qpos || p[i]!='[')
1113 ajFatal("Bad integer [%s]",p);
1114
1115 if(sscanf(p+qpos,"%d",&v)!=1)
1116 ajFatal("Bad integer [%s]",p);
1117
1118 qpos = --i;
1119 ajStrSetClear(&orc);
1120 n = emowse_get_orc(&orc,p,qpos);
1121 r = ajStrGetPtr(orc);
1122 qpos += (n+3);
1123 w = 0;
1124
1125 for(i=0;i<n;++i)
1126 w += ajIntGet(arr,(ajint)r[i]);
1127
1128 if(w!=v)
1129 {
1130 ajStrDel(&t);
1131 ajStrDel(&orc);
1132 ajIntDel(&arr);
1133 return ajFalse;
1134 }
1135 }
1136
1137
1138 ajStrDel(&orc);
1139 ajStrDel(&t);
1140 ajIntDel(&arr);
1141
1142 return ajTrue;
1143 }
1144
1145
1146
1147
1148 /* @funcstatic emowse_get_orc *************************************************
1149 **
1150 ** Undocumented.
1151 **
1152 ** @param [u] orc [AjPStr*] Undocumented
1153 ** @param [r] s [const char*] Undocumented
1154 ** @param [r] pos [ajint] Undocumented
1155 ** @return [ajint] Undocumented
1156 ** @@
1157 ******************************************************************************/
1158
emowse_get_orc(AjPStr * orc,const char * s,ajint pos)1159 static ajint emowse_get_orc(AjPStr *orc, const char *s, ajint pos)
1160 {
1161 ajint i;
1162
1163 i = 0;
1164 if(s[++pos]!='[')
1165 ajFatal("Bad query given [%s]",s);
1166 ++pos;
1167
1168 while(s[pos]!=']')
1169 {
1170 if(!s[pos])
1171 ajFatal("Unterminated square brackets [%s]",s);
1172 ajStrAppendK(orc,s[pos++]);
1173 ++i;
1174 }
1175
1176 return i;
1177 }
1178
1179
1180
1181
1182 /* @funcstatic emowse_print_hits **********************************************
1183 **
1184 ** Undocumented.
1185 **
1186 ** @param [u] outf [AjPFile] Undocumented
1187 ** @param [u] hlist [AjPList] Undocumented
1188 ** @param [r] dno [ajint] Undocumented
1189 ** @param [r] data [EmbPMdata const *] Undocumented
1190 ** @@
1191 ******************************************************************************/
1192
emowse_print_hits(AjPFile outf,AjPList hlist,ajint dno,EmbPMdata const * data)1193 static void emowse_print_hits(AjPFile outf, AjPList hlist, ajint dno,
1194 EmbPMdata const * data)
1195 {
1196 PHits hits = NULL;
1197 AjIList iter = NULL;
1198 ajint c;
1199 ajint i;
1200 ajint j;
1201 ajint pvt;
1202 double conf;
1203 AjBool partial;
1204 AjPFloat nmarray = NULL;
1205 ajint nmn;
1206 ajint len;
1207 ajint v;
1208 AjPStr substr = NULL;
1209
1210
1211 nmarray = ajFloatNew();
1212 substr = ajStrNew();
1213
1214 ajListReverse(hlist);
1215
1216
1217 ajFmtPrintF(outf,"\nUsing data fragments of:\n");
1218
1219 for(i=0;i<dno;++i)
1220 ajFmtPrintF(outf," %7.1f %S\n",data[i]->mwt,data[i]->sdata);
1221
1222 ajFmtPrintF(outf,"\n");
1223
1224 iter = ajListIterNewread(hlist);
1225 c = 0;
1226 while(!ajListIterDone(iter))
1227 {
1228 hits = (PHits) ajListIterGet(iter);
1229 ajFmtPrintF(outf,"%-3d %-13S%-62.62S\n",++c,hits->name,hits->desc);
1230 }
1231 ajListIterDel(&iter);
1232
1233
1234 iter = ajListIterNewread(hlist);
1235 c = 0;
1236 while(!ajListIterDone(iter))
1237 {
1238 hits = (PHits) ajListIterGet(iter);
1239
1240 for(i=pvt=0;i<dno;++i)
1241 if(ajIntGet(hits->found,i) > -1)
1242 ++pvt;
1243 conf = (double)pvt / (double)dno;
1244
1245
1246 ajFmtPrintF(outf,"\n %-3d: %-12S %.3e %-8.1f %-6.3f\n",++c,
1247 hits->name,hits->score,hits->mwt,conf);
1248 ajFmtPrintF(outf," %S\n",hits->desc);
1249 ajFmtPrintF(outf," Mw Start End Seq\n");
1250
1251 for(i=nmn=0;i<dno;++i)
1252 {
1253 partial = (ajIntGet(hits->found,i)>1000000) ? ajTrue : ajFalse;
1254
1255 if((v=ajIntGet(hits->found,i))>-1)
1256 {
1257 if(v>=(ajint)MILLION)
1258 v -= (ajint)MILLION;
1259
1260 ajStrAssignSubC(&substr,ajStrGetPtr(hits->seq),
1261 hits->frags[v]->begin-1,
1262 hits->frags[v]->end-1);
1263
1264 len = ajStrGetLen(substr);
1265 ajFmtPrintF(outf," ");
1266
1267 if(partial)
1268 ajFmtPrintF(outf,"*");
1269 else
1270 ajFmtPrintF(outf," ");
1271
1272 if(hits->frags[v]->mwt > MILLION)
1273 hits->frags[v]->mwt -= MILLION;
1274
1275 ajFmtPrintF(outf,"%-6.1f %-6d %-6d %-45.45S",
1276 hits->frags[v]->mwt,hits->frags[v]->begin,
1277 hits->frags[v]->end,substr);
1278 if(len>45)
1279 ajFmtPrintF(outf,"...");
1280 ajFmtPrintF(outf,"\n");
1281 }
1282 else
1283 ajFloatPut(&nmarray,nmn++,(float)data[i]->mwt);
1284 }
1285 if(nmn)
1286 {
1287 ajFmtPrintF(outf," No Match ");
1288 for(i=0,j=0;i<nmn;++i,++j)
1289 {
1290 ajFmtPrintF(outf,"%-6.1f ",ajFloatGet(nmarray,i));
1291
1292 if(j==6 && i!=nmn-1)
1293 {
1294 ajFmtPrintF(outf,"\n ");
1295 j = 0;
1296 }
1297 }
1298 ajFmtPrintF(outf,"\n");
1299 }
1300
1301 ajStrDel(&hits->name);
1302 ajStrDel(&hits->seq);
1303 ajStrDel(&hits->desc);
1304 ajIntDel(&hits->found);
1305 len = hits->nf;
1306
1307 for(i=0;i<len;++i)
1308 AJFREE(hits->frags[i]);
1309 AJFREE(hits->frags);
1310 AJFREE(hits);
1311 }
1312
1313 ajListIterDel(&iter);
1314 ajStrDel(&substr);
1315 ajFloatDel(&nmarray);
1316
1317
1318 return;
1319 }
1320