1 /* @source emowse application
2 **
3 ** Finds proteins matching mass spectrometry data
4 **
5 ** @author Copyright (C) Alan Bleasby (ableasby@hgmp.mrc.ac.uk)
6 ** @@
7 **
8 ** This program is free software; you can redistribute it and/or
9 ** modify it under the terms of the GNU General Public License
10 ** as published by the Free Software Foundation; either version 2
11 ** of the License, or (at your option) any later version.
12 **
13 ** This program is distributed in the hope that it will be useful,
14 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
15 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 ** GNU General Public License for more details.
17 **
18 ** You should have received a copy of the GNU General Public License
19 ** along with this program; if not, write to the Free Software
20 ** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
21 ******************************************************************************/
22 
23 #include "emboss.h"
24 #include <math.h>
25 #include <string.h>
26 #include <errno.h>
27 
28 
29 
30 
31 #define FGUESS 128000		/* Uncritical guess of freq data size */
32 #define MILLION (double)1000000.
33 #define MAXLIST 50
34 
35 
36 
37 
38 /* @datastatic EmbPMdata ******************************************************
39 **
40 ** EMowse data
41 **
42 ** @alias EmbSMdata
43 ** @alias EmbOMdata
44 **
45 ** @attr mwt [double] Molecular weight
46 ** @attr sdata [AjPStr] string data
47 ******************************************************************************/
48 
49 typedef struct EmbSMdata
50 {
51     double mwt;
52     AjPStr sdata;
53 } EmbOMdata;
54 #define EmbPMdata EmbOMdata*
55 
56 
57 
58 
59 /* @datastatic PHits **********************************************************
60 **
61 ** EMowse hits
62 **
63 ** @alias SHits
64 ** @alias OHits
65 **
66 ** @attr seq [AjPStr] Sequence
67 ** @attr name [AjPStr] Name
68 ** @attr desc [AjPStr] Description
69 ** @attr found [AjPInt] Found data
70 ** @attr score [double] Score
71 ** @attr mwt [double] Molecular weight
72 ** @attr frags [EmbPMolFrag*] Fragment data
73 ** @attr nf [ajint] Number of fragments
74 ** @attr Padding [char[4]] Padding to alignment boundary
75 ******************************************************************************/
76 
77 typedef struct SHits
78 {
79     AjPStr seq;
80     AjPStr name;
81     AjPStr desc;
82     AjPInt found;
83     double score;
84     double mwt;
85     EmbPMolFrag* frags;
86     ajint    nf;
87     char Padding[4];
88 } OHits;
89 #define PHits OHits*
90 
91 
92 
93 
94 static void emowse_read_freqs(AjPFile finf, AjPDouble *freqs);
95 static AjBool emowse_molwt_outofrange(double thys, double given, double range);
96 static ajint emowse_read_data(AjPFile inf, EmbPMdata** data);
97 static ajint emowse_sort_data(const void *a, const void *b);
98 static ajint emowse_hit_sort(const void *a, const void *b);
99 static void emowse_match(EmbPMdata const * data, ajint dno, AjPList flist,
100 			 ajint nfrags, double tol,
101 			 const AjPSeq seq, AjPList hlist,
102 			 double partials, double cmw, ajint enz,
103 			 const AjPDouble freqs);
104 
105 static ajint emowse_seq_comp(ajint bidx, ajint thys, const AjPSeq seq,
106 			     EmbPMdata const *data, EmbPMolFrag const *frags);
107 static ajint emowse_get_index(double actmw, double maxmw, double minmw,
108 			      EmbPMolFrag const *frags,
109 			      ajint fno, double *bestmw,
110 			      ajint *myindex, ajint thys, const AjPSeq seq,
111 			      EmbPMdata const *data);
112 
113 static ajint emowse_seq_search(const AjPStr substr, AjPStr* s);
114 static AjBool emowse_msearch(const char *seq, const char *pat, AjBool term);
115 static void emowse_mreverse(char *s);
116 static ajint emowse_get_orc(AjPStr *orc, const char *s, ajint pos);
117 static AjBool emowse_comp_search(const AjPStr substr, const char *s);
118 static void emowse_print_hits(AjPFile outf, AjPList hlist, ajint dno,
119 			      EmbPMdata const * data);
120 
121 
122 
123 
124 /* @prog emowse ***************************************************************
125 **
126 ** Protein identification by mass spectrometry
127 **
128 ******************************************************************************/
129 
main(int argc,char ** argv)130 int main(int argc, char **argv)
131 {
132     AjPSeq seq = NULL;
133     AjPSeqall seqall = NULL;
134     AjPFile outf = NULL;
135     AjPFile mwinf = NULL;
136     AjPFile ffile = NULL;
137     AjPStr enzyme = NULL;
138     ajint smolwt;
139     ajint range;
140     float tol;
141     float partials;
142     AjPDouble freqs = NULL;
143     ajint begin;
144     ajint end;
145     double smw;
146     ajint rno;
147     ajint i;
148 
149     AjPList flist = NULL;
150     EmbPMdata *data = NULL;
151     ajint dno;
152     ajint nfrags;
153     AjPList hlist = NULL;
154     AjPFile mfptr = NULL;
155 
156     EmbPPropMolwt *mwdata = NULL;
157     AjBool mono;
158 
159     embInit("emowse", argc, argv);
160 
161     seqall   = ajAcdGetSeqall("sequence");
162     mwinf    = ajAcdGetInfile("infile");
163     enzyme   = ajAcdGetListSingle("enzyme");
164     smolwt   = ajAcdGetInt("weight");
165     range    = ajAcdGetInt("pcrange");
166     ffile    = ajAcdGetDatafile("frequencies");
167     tol      = ajAcdGetFloat("tolerance");
168     partials = ajAcdGetFloat("partials");
169     outf     = ajAcdGetOutfile("outfile");
170     mfptr    = ajAcdGetDatafile("mwdata");
171     mono     = ajAcdGetBoolean("mono");
172 
173     mwdata = embPropEmolwtRead(mfptr);
174 
175     freqs = ajDoubleNewRes(FGUESS);
176     emowse_read_freqs(ffile, &freqs);
177     ajFileClose(&ffile);
178 
179     if(ajFmtScanS(enzyme,"%d",&rno)!=1)
180 	ajFatal("Illegal enzyme entry [%S]",enzyme);
181 
182 
183     if(!(dno = emowse_read_data(mwinf,&data)))
184 	ajFatal("No molecular weights in the file");
185     ajFileClose(&mwinf);
186 
187 
188     hlist = ajListNew();
189 
190 
191     while(ajSeqallNext(seqall,&seq))
192     {
193 	begin = ajSeqallGetseqBegin(seqall);
194 	end   = ajSeqallGetseqEnd(seqall);
195 
196 
197 	smw = embPropCalcMolwt(ajSeqGetSeqC(seq),--begin,--end,mwdata,mono);
198 	if(smolwt)
199 	    if(emowse_molwt_outofrange(smw,(double)smolwt,(double)range))
200 		continue;
201 
202 	flist  = ajListNew();
203 	nfrags = embMolGetFrags(ajSeqGetSeqS(seq),rno,mwdata,mono,&flist);
204 
205 	emowse_match(data,dno,flist,nfrags,(double)tol,seq,hlist,
206 		     (double)partials,
207 		     smw,rno,freqs);
208 
209 	ajListFree(&flist);
210     }
211 
212 
213     emowse_print_hits(outf,hlist,dno,data);
214     for(i=0;i<dno;i++)
215     {
216 	ajStrDel(&data[i]->sdata);
217 	AJFREE(data[i]);
218     }
219     AJFREE(data);
220 
221     ajListFree(&hlist);
222 
223     ajFileClose(&mfptr);
224 
225     ajSeqallDel(&seqall);
226     ajSeqDel(&seq);
227     ajFileClose(&outf);
228     ajStrDel(&enzyme);
229 
230     ajDoubleDel(&freqs);
231     ajListFree(&flist);
232 
233     embPropMolwtDel(&mwdata);
234 
235     embExit();
236 
237     return 0;
238 }
239 
240 
241 
242 
243 /* @funcstatic emowse_read_freqs **********************************************
244 **
245 ** Undocumented.
246 **
247 ** @param [u] finf [AjPFile] Undocumented
248 ** @param [w] freqs [AjPDouble*] Undocumented
249 ** @return [void]
250 ** @@
251 ******************************************************************************/
252 
emowse_read_freqs(AjPFile finf,AjPDouble * freqs)253 static void emowse_read_freqs(AjPFile finf, AjPDouble *freqs)
254 {
255     ajint c = 0;
256     AjPStr  str = NULL;
257     double f = 0.0;
258 
259     str = ajStrNew();
260 
261     while(ajReadlineTrim(finf,&str))
262     {
263         if(!ajStrToDouble(str, &f))
264 	    ajWarn("Bad record in frequencies file '%F': '%S'\n",
265 		   finf, str);
266 
267         ajDoublePut(freqs,c,f);
268 
269 	++c;
270     }
271 
272     ajStrDel(&str);
273 
274     return;
275 }
276 
277 
278 
279 
280 /* @funcstatic emowse_molwt_outofrange ****************************************
281 **
282 ** Undocumented.
283 **
284 ** @param [r] thys [double] Undocumented
285 ** @param [r] given [double] Undocumented
286 ** @param [r] range [double] Undocumented
287 ** @return [AjBool] Undocumented
288 ** @@
289 ******************************************************************************/
290 
emowse_molwt_outofrange(double thys,double given,double range)291 static AjBool emowse_molwt_outofrange(double thys, double given, double range)
292 {
293     double diff;
294 
295     diff = given * range / (double)100.;
296 
297     if(thys>=given-diff && thys<=given+diff)
298 	return ajFalse;
299 
300     return ajTrue;
301 }
302 
303 
304 
305 
306 /* @funcstatic emowse_read_data ***********************************************
307 **
308 ** Undocumented.
309 **
310 ** @param [u] inf [AjPFile] Undocumented
311 ** @param [w] data [EmbPMdata**] Undocumented
312 ** @return [ajint] Undocumented
313 ** @@
314 ******************************************************************************/
315 
emowse_read_data(AjPFile inf,EmbPMdata ** data)316 static ajint emowse_read_data(AjPFile inf, EmbPMdata** data)
317 {
318     AjPStr str = NULL;
319     double v;
320     AjPStrTok token = NULL;
321     EmbPMdata ptr   = NULL;
322     AjPList l;
323     ajint n;
324 
325     str = ajStrNew();
326     l   = ajListNew();
327 
328     while(ajReadlineTrim(inf,&str))
329 	if(sscanf(ajStrGetPtr(str),"%lf",&v)==1)
330 	{
331 	    AJNEW0(ptr);
332 	    ptr->mwt = v;
333 	    ptr->sdata=ajStrNew();
334 	    ajStrRemoveWhiteExcess(&str);
335 	    token = ajStrTokenNewC(str," \t\r\n");
336 	    ajStrTokenNextParseC(token," \t\r\n",&ptr->sdata);
337 	    ajStrTokenNextParseC(token," \t\r\n",&ptr->sdata);
338 	    ajStrTokenDel(&token);
339 	    ajListPush(l,(void *)ptr);
340 	}
341 
342     ajListSort(l, &emowse_sort_data);
343     n = (ajuint) ajListToarray(l,(void ***)data);
344     ajListFree(&l);
345     ajStrDel(&str);
346     ajStrTokenDel(&token);
347 
348     return n;
349 }
350 
351 
352 
353 
354 /* @funcstatic emowse_sort_data ***********************************************
355 **
356 ** Undocumented.
357 **
358 ** @param [r] a [const void*] Undocumented
359 ** @param [r] b [const void*] Undocumented
360 ** @return [ajint] Undocumented
361 ** @@
362 ******************************************************************************/
363 
emowse_sort_data(const void * a,const void * b)364 static ajint emowse_sort_data(const void *a, const void *b)
365 {
366     return (ajint)((*(EmbPMdata const *)a)->mwt -
367 		   (*(EmbPMdata const *)b)->mwt);
368 }
369 
370 
371 
372 
373 /* @funcstatic emowse_hit_sort ************************************************
374 **
375 ** Undocumented.
376 **
377 ** @param [r] a [const void*] Undocumented
378 ** @param [r] b [const void*] Undocumented
379 ** @return [ajint] Undocumented
380 ** @@
381 ******************************************************************************/
382 
emowse_hit_sort(const void * a,const void * b)383 static ajint emowse_hit_sort(const void *a, const void *b)
384 {
385     double x;
386     double y;
387 
388     x = (*(PHits const *)a)->score;
389     y = (*(PHits const *)b)->score;
390 
391     if(x==y)
392 	return 0;
393     else if(x<y)
394 	return -1;
395 
396     return 1;
397 }
398 
399 
400 
401 
402 /* @funcstatic emowse_match ***************************************************
403 **
404 ** Undocumented.
405 **
406 ** @param [r] data [EmbPMdata const *] Undocumented
407 ** @param [r] dno [ajint] Undocumented
408 ** @param [u] flist [AjPList] Undocumented
409 ** @param [r] nfrags [ajint] Undocumented
410 ** @param [r] tol [double] Undocumented
411 ** @param [r] seq [const AjPSeq] Undocumented
412 ** @param [u] hlist [AjPList] Undocumented
413 ** @param [r] partials [double] Undocumented
414 ** @param [r] cmw [double] Undocumented
415 ** @param [r] rno [ajint] Undocumented
416 ** @param [r] freqs [const AjPDouble] Undocumented
417 ** @@
418 ******************************************************************************/
419 
emowse_match(EmbPMdata const * data,ajint dno,AjPList flist,ajint nfrags,double tol,const AjPSeq seq,AjPList hlist,double partials,double cmw,ajint rno,const AjPDouble freqs)420 static void emowse_match(EmbPMdata const * data, ajint dno, AjPList flist,
421 			 ajint nfrags, double tol,
422 			 const AjPSeq seq, AjPList hlist,
423 			 double partials, double cmw, ajint rno,
424 			 const AjPDouble freqs)
425 {
426     double actmw;
427     double minmw;
428     double maxmw;
429     double smw;
430     ajint ft;
431     double qtol;
432     double f;
433     double sumf;
434     double bestmw = 0.;
435     static double min = (double)0.;
436     static ajint n = 0;
437 
438     ajint i;
439     ajint j;
440     ajint nd;
441 
442     ajint x;
443     ajint myindex;
444     AjBool ispart = ajFalse;
445     ajint isumf;
446     AjPInt found = NULL;
447     PHits hits   = NULL;
448     EmbPMolFrag *frags = NULL;
449 
450 
451     ajListReverse(flist);
452     ajListToarray(flist,(void ***)&frags);
453 
454 
455     sumf = (double)1.;
456 
457     smw = cmw;
458 
459 
460     cmw /= (double)10000.;
461     if(cmw>(double)79.)
462 	cmw = (double)79.;
463 
464 
465     found = ajIntNew();
466 
467 
468     for(i=0;i<dno;++i)
469     {
470 	actmw = data[i]->mwt;
471 	qtol  = actmw / (double)100.;
472 	minmw = actmw - (tol*qtol);
473 	if(minmw<(double)0.)
474 	    minmw = (double)0.;
475 	maxmw = actmw + (tol*qtol);
476 
477 	x = emowse_get_index(actmw,maxmw,minmw,frags,nfrags,&bestmw,&myindex,
478 			     i,seq,data);
479 
480 	if(bestmw > MILLION)
481 	{
482 	    bestmw -= MILLION;
483 	    ispart = ajTrue;
484 	}
485 	else
486 	    ispart = ajFalse;
487 
488 	if(x == -2)
489 	{
490 	    for(j=0;j<dno;++j)
491 		ajIntPut(&found,j,-1);
492 	    break;
493 	}
494 
495 
496 	if(x<0)
497 	{
498 	    ajIntPut(&found,i,-1);
499 	    continue;
500 	}
501 	ajIntPut(&found,i,x);
502 
503 	if(ispart)
504 	    bestmw *= partials;
505 
506 	if(ispart && bestmw < (double)101.)
507 	    f = (double).99;
508 	else
509 	{
510 	    f = bestmw / (double)100.;
511 	    if((ajint)f >= 200)
512 		f = (double) 199.;
513 
514 	    ft = (rno-1)*16000 + (ajint)cmw*200 + (ajint)f;
515 	    f  = ajDoubleGet(freqs,ft);
516 	    if(!f)
517 		continue;
518 	}
519 
520 	sumf *= f;
521 	if(sumf < (double)1.0e-31)
522 	    sumf = (double)1.0e-31;
523     }
524 
525 
526     isumf = (ajint)sumf;
527 
528     sumf = (double)1. / sumf;
529     sumf *= (double)50000. / (cmw*(double)10000.);
530 
531 
532     if(n<MAXLIST && isumf!=1)
533     {
534 	AJNEW0(hits);
535 	hits->seq   = ajSeqGetSeqCopyS(seq);
536 	hits->desc  = ajStrNewC(ajStrGetPtr(ajSeqGetDescS(seq)));
537 	hits->found = found;
538 	hits->score = sumf;
539 	hits->mwt   = smw;
540 	hits->name  = ajStrNewC(ajSeqGetNameC(seq));
541 	hits->frags = frags;
542 	hits->nf = nfrags;
543 	ajListPush(hlist,(void *)hits);
544 	ajListSort(hlist, &emowse_hit_sort);
545 	ajListPop(hlist,(void **)&hits);
546 	min = hits->score;
547 	ajListPush(hlist,(void *)hits);
548 	++n;
549     }
550     else if(sumf>min && isumf!=1)
551     {
552 	ajListPop(hlist,(void **)&hits);
553 	ajStrDel(&hits->seq);
554 	ajStrDel(&hits->name);
555 	ajStrDel(&hits->desc);
556 	ajIntDel(&hits->found);
557         nd = hits->nf;
558 	for(i=0;i<nd;++i)
559 	    AJFREE(hits->frags[i]);
560 
561 	AJFREE(hits->frags);
562 
563 	AJFREE(hits);
564 
565 	AJNEW0(hits);
566 	hits->seq   = ajSeqGetSeqCopyS(seq);
567 	hits->desc  = ajStrNewC(ajStrGetPtr(ajSeqGetDescS(seq)));
568 	hits->found = found;
569 	hits->name  = ajStrNewC(ajSeqGetNameC(seq));
570 	hits->score = sumf;
571 	hits->mwt   = smw;
572 	hits->frags = frags;
573         hits->nf    = nfrags;
574 
575 	ajListPush(hlist,(void *)hits);
576 	ajListSort(hlist, &emowse_hit_sort);
577 	ajListPop(hlist,(void **)&hits);
578 	min = hits->score;
579 	ajListPush(hlist,(void *)hits);
580     }
581     else
582     {
583 	ajIntDel(&found);
584 	for(i=0;i<nfrags;++i)
585 	    AJFREE(frags[i]);
586 	AJFREE(frags);
587     }
588 
589     return;
590 }
591 
592 
593 
594 
595 /* @funcstatic emowse_get_index ***********************************************
596 **
597 ** Undocumented.
598 **
599 ** @param [r] actmw [double] Undocumented
600 ** @param [r] maxmw [double] Undocumented
601 ** @param [r] minmw [double] Undocumented
602 ** @param [r] frags [EmbPMolFrag const *] Undocumented
603 ** @param [r] fno [ajint] Undocumented
604 ** @param [w] bestmw [double*] Undocumented
605 ** @param [w] myindex [ajint*] Undocumented
606 ** @param [r] thys [ajint] Undocumented
607 ** @param [r] seq [const AjPSeq] Undocumented
608 ** @param [r] data [EmbPMdata const *] Undocumented
609 ** @return [ajint] Undocumented
610 ** @@
611 ******************************************************************************/
612 
emowse_get_index(double actmw,double maxmw,double minmw,EmbPMolFrag const * frags,ajint fno,double * bestmw,ajint * myindex,ajint thys,const AjPSeq seq,EmbPMdata const * data)613 static ajint emowse_get_index(double actmw, double maxmw, double minmw,
614 			      EmbPMolFrag const *frags,
615 			      ajint fno, double *bestmw,
616 			      ajint *myindex, ajint thys, const AjPSeq seq,
617 			      EmbPMdata const *data)
618 {
619     double mw1;
620     double mw2;
621     double best;
622     ajint cnt;
623     ajint fl;
624     ajint bidx = 0;
625 
626 
627     cnt = fl = 0;
628     best = (double)-1.0;
629 
630     /*
631     **  This assumes the lowest array entry has been sorted to be the highest
632     **  mw
633     */
634 
635     while(cnt<fno)
636     {
637 	mw1 = frags[cnt]->mwt;
638 
639 	if(mw1>MILLION || mw1>maxmw)
640 	{
641 	    ++cnt;
642 	    continue;
643 	}
644 
645 	if(mw1<minmw)
646 	    break;
647 
648 
649 	best = mw1;
650 	bidx = cnt;
651 
652 
653 	while(cnt+1<fno)
654 	{
655 	    mw2 = frags[cnt+1]->mwt;
656 
657 	    if(mw2>MILLION)
658 	    {
659 		++cnt;
660 		continue;
661 
662 	    }
663 
664 	    if(abs((int)(mw2-actmw))>abs((int)(mw1-actmw)))
665 		break;
666 	    else
667 	    {
668 		mw1  = best = mw2;
669 		bidx = cnt;
670 		++cnt;
671 	    }
672 	}
673 
674 	break;
675     }
676 
677     fl = (cnt==fno) ? 1 : 0;
678 
679     if(best != (double)-1.)
680     {
681 	if(!emowse_seq_comp(bidx,thys,seq,data,frags))
682 	    return -1;
683 	*bestmw = best;
684 	*myindex  = bidx;
685 	return bidx;
686     }
687 
688     cnt = 0;
689     while(cnt<fno)
690     {
691 	mw1 = frags[cnt]->mwt;
692 	if(mw1<MILLION)
693 	{
694 	    ++cnt;
695 	    continue;
696 	}
697 	mw1 -= MILLION;
698 
699 	if(mw1<minmw)
700 	    break;
701 
702 	if(mw1>maxmw)
703 	{
704 	    ++cnt;
705 	    continue;
706 	}
707 
708 	best = mw1;
709 	bidx = cnt;
710 	while(cnt+1>=fno)
711 	{
712 	    mw2 = frags[cnt+1]->mwt;
713 
714 	    if(mw2<MILLION)
715 	    {
716 		++cnt;
717 		continue;
718 	    }
719 	    mw2 -= MILLION;
720 
721 	    if(fabs(mw2-actmw)>fabs(mw1-actmw))
722 		break;
723 	    else
724 	    {
725 		mw1 = best = mw2;
726 		bidx = cnt++;
727 	    }
728 	}
729 
730 	break;
731     }
732 
733     if(cnt==fno && fl)
734 	return -2;
735 
736     if(best == (double)-1.)
737 	return -1;
738 
739     if(!emowse_seq_comp(bidx,thys,seq,data,frags))
740 	return -1;
741 
742     *bestmw = best + MILLION;
743     *myindex  = bidx + (ajint)MILLION;
744 
745     return *myindex;
746 }
747 
748 
749 
750 
751 /* @funcstatic emowse_seq_comp ************************************************
752 **
753 ** Undocumented.
754 **
755 ** @param [r] bidx [ajint] Undocumented
756 ** @param [r] thys [ajint] Undocumented
757 ** @param [r] seq [const AjPSeq] Undocumented
758 ** @param [r] data [EmbPMdata const *] Undocumented
759 ** @param [r] frags [EmbPMolFrag const *] Undocumented
760 ** @return [ajint] Undocumented
761 ** @@
762 ******************************************************************************/
763 
emowse_seq_comp(ajint bidx,ajint thys,const AjPSeq seq,EmbPMdata const * data,EmbPMolFrag const * frags)764 static ajint emowse_seq_comp(ajint bidx, ajint thys, const AjPSeq seq,
765 			     EmbPMdata const *data, EmbPMolFrag const *frags)
766 {
767     ajint beg;
768     ajint end;
769     ajint len;
770     AjPStr result   = NULL;
771     const AjPStr str      = NULL;
772     AjPStr substr   = NULL;
773     AjPStrTok token = NULL;
774     const char *p;
775 
776 
777     if(!ajStrGetLen(data[thys]->sdata))
778 	return 1;
779 
780     beg = frags[bidx]->begin - 1;
781     end = frags[bidx]->end   - 1;
782 
783     str = ajSeqGetSeqS(seq);
784 
785     result = ajStrNew();
786     substr = ajStrNew();
787     ajStrAssignSubS(&substr,str,beg,end);
788     ajStrFmtUpper(&substr);
789 
790     token = ajStrTokenNewC(data[thys]->sdata," \r\t\n");
791 
792     while(ajStrTokenNextParseC(token," \r\t\n",&result))
793     {
794 	len = ajStrGetLen(result);
795 	ajStrFmtUpper(&result);
796 	p = ajStrGetPtr(result);
797 
798 	if(p[len-1]!=')')
799 	    ajFatal("Missing ')' in subline %S",substr);
800 
801 	if(ajStrPrefixC(result,"SEQ("))
802 	{
803 	    ajStrAssignC(&result,p+4);
804 	    ajStrPasteCountK(&result,5,'\0',1);
805 	    if(!emowse_seq_search(substr,&result))
806 		return 0;
807 	}
808 	else if(ajStrPrefixC(result,"COMP("))
809 	{
810 	    ajStrAssignC(&result,p+5);
811 	    ajStrPasteCountK(&result, 5, '\0', 1);
812 	    if(!emowse_comp_search(substr,ajStrGetPtr(result)))
813 		return 0;
814 	}
815 	else
816 	    ajFatal("Unknown Query type [%S]",result);
817 
818     }
819     ajStrTokenDel(&token);
820 
821 
822     ajStrDel(&substr);
823     ajStrDel(&result);
824 
825     return 1;
826 }
827 
828 
829 
830 
831 /* @funcstatic emowse_mreverse ************************************************
832 **
833 ** Undocumented.
834 **
835 ** @param [u] s [char*] Undocumented
836 ** @@
837 ******************************************************************************/
838 
emowse_mreverse(char * s)839 static void emowse_mreverse(char *s)
840 {
841     ajint i;
842     ajint len;
843     char *p;
844     AjPStr rev;
845     size_t stlen;
846 
847     rev = ajStrNewC(s);
848     ajStrReverse(&rev);
849     p = ajStrGetuniquePtr(&rev);
850 
851     stlen = strlen(s);
852     len   = (ajint) stlen;
853 
854     for(i=0;i<len;++i)
855     {
856 	if(p[i]==']')
857 	{
858 	    p[i]='[';
859 	    continue;
860 	}
861 
862 	if(p[i]=='[')
863 	    p[i]=']';
864     }
865 
866     strcpy(s,ajStrGetPtr(rev));
867 
868     ajStrDel(&rev);
869 
870     return;
871 }
872 
873 
874 
875 
876 /* @funcstatic emowse_seq_search **********************************************
877 **
878 ** Undocumented.
879 **
880 ** @param [r] substr [const AjPStr] Undocumented
881 ** @param [u] str [AjPStr*] Undocumented
882 ** @return [AjBool] Undocumented
883 ** @@
884 ******************************************************************************/
885 
emowse_seq_search(const AjPStr substr,AjPStr * str)886 static AjBool emowse_seq_search(const AjPStr substr, AjPStr *str)
887 {
888     const char *p;
889     char *q;
890     char* s;
891     static AjPStr t = NULL;
892 
893     if(!t)
894 	t = ajStrNew();
895 
896     p = ajStrGetPtr(substr);
897     ajStrAssignC(&t,p);
898     q = ajStrGetuniquePtr(&t);
899     s = ajStrGetuniquePtr(str);
900 
901     if(!strncmp(s,"B-",2))
902     {
903 	if(!emowse_msearch(q,s+2,ajFalse))
904 	    return ajFalse;
905     }
906     else if(!strncmp(s,"N-",2))
907     {
908 	if(!emowse_msearch(q,s+2,ajTrue))
909 	    return ajFalse;
910     }
911     else if(!strncmp(s,"C-",2))
912     {
913 	emowse_mreverse(s+2);
914 	emowse_mreverse(q);
915 
916 	if(!emowse_msearch(q,s+2,ajTrue))
917 	    return ajFalse;
918     }
919     else if(!strncmp(s,"*-",2))
920     {
921 	if(!emowse_msearch(q,s+2,ajFalse))
922 	    return ajTrue;
923 	emowse_mreverse(s+2);
924 
925 	if(!emowse_msearch(q,s+2,ajFalse))
926 	    return ajFalse;
927     }
928 
929     return ajTrue;
930 }
931 
932 
933 
934 
935 /* @funcstatic emowse_msearch *************************************************
936 **
937 ** Undocumented.
938 **
939 ** @param [r] seq [const char*] Undocumented
940 ** @param [r] pat [const char*] Undocumented
941 ** @param [r] term [AjBool] Undocumented
942 ** @return [AjBool] Undocumented
943 ** @@
944 ******************************************************************************/
945 
emowse_msearch(const char * seq,const char * pat,AjBool term)946 static AjBool emowse_msearch(const char *seq, const char *pat, AjBool term)
947 {
948     AjPStr orc = NULL;
949 
950     ajint qpos;
951     ajint fpos;
952 
953     AjBool fl;
954     ajint i;
955     ajint t;
956     ajint ofpos;
957     const char *p;
958 
959     qpos = ofpos = fpos = 0;
960     fl = ajFalse;
961 
962 
963     orc = ajStrNew();
964 
965     while(pat[qpos])
966     {
967 	if((term && fl) || !seq[fpos])
968 	{
969 	    ajStrDel(&orc);
970 	    return ajFalse;
971 	}
972 
973 
974 	if(pat[qpos]=='X')
975 	{
976 	    ++qpos;
977 	    ++fpos;
978 	    continue;
979 	}
980 
981 	if(pat[qpos]=='[')
982 	{
983 	    ++qpos;
984 	    while(pat[qpos]!=']')
985 	    {
986 		if(!pat[qpos])
987 		    ajFatal("Missing ']' in term %s",pat);
988 
989 		ajStrAppendK(&orc,pat[qpos++]);
990 	    }
991 
992 	    t = ajStrGetLen(orc);
993 	    p = ajStrGetPtr(orc);
994 	    for(i=0;i<t;++i)
995 		if(p[i]==seq[fpos])
996 		    break;
997 
998 	    if(t==i)
999 	    {
1000 		fpos = ++ofpos;
1001 		qpos=0;
1002 		fl=ajTrue;
1003 		continue;
1004 	    }
1005 	    else
1006 	    {
1007 		++fpos;
1008 		++qpos;
1009 		continue;
1010 	    }
1011 	}
1012 
1013 	if(pat[qpos]==seq[fpos])
1014 	{
1015 	    ++fpos;
1016 	    ++qpos;
1017 	    continue;
1018 	}
1019 	else
1020 	{
1021 	    fpos = ++ofpos;
1022 	    qpos = 0;
1023 	    fl   = ajTrue;
1024 	}
1025     }
1026 
1027 
1028     ajStrDel(&orc);
1029 
1030     return ajTrue;
1031 }
1032 
1033 
1034 
1035 
1036 /* @funcstatic emowse_comp_search *********************************************
1037 **
1038 ** Undocumented.
1039 **
1040 ** @param [r] substr [const AjPStr] Undocumented
1041 ** @param [r] s [const char*] Undocumented
1042 ** @return [AjBool] Undocumented
1043 ** @@
1044 ******************************************************************************/
1045 
emowse_comp_search(const AjPStr substr,const char * s)1046 static AjBool emowse_comp_search(const AjPStr substr, const char *s)
1047 {
1048     AjPInt arr;
1049     ajint i;
1050     ajint len;
1051     ajint v;
1052     ajint n;
1053     ajint w;
1054 
1055     ajint qpos;
1056     char c;
1057     AjPStr orc;
1058     const char *r;
1059 
1060 
1061     const char *p;
1062     AjPStr t;
1063 
1064     p   = ajStrGetPtr(substr);
1065     len = ajStrGetLen(substr);
1066 
1067     arr = ajIntNewRes(256);
1068 
1069     for(i=0;i<256;++i)
1070 	ajIntPut(&arr,i,0);
1071 
1072     for(i=0;i<len;++i)
1073     {
1074 	v = ajIntGet(arr,(ajint)p[i]);
1075 	ajIntPut(&arr,(ajint)p[i],v+1);
1076     }
1077 
1078 
1079     t = ajStrNewC(s);
1080     ajStrRemoveWhite(&t);
1081 
1082     p    = ajStrGetPtr(t);
1083     qpos = 0;
1084     orc  = ajStrNew();
1085 
1086     while((c=p[qpos]))
1087     {
1088 	if(c=='*')
1089 	{
1090 	    n = emowse_get_orc(&orc,p,qpos);
1091 	    r = ajStrGetPtr(orc);
1092 	    qpos += (n+3);
1093 	    for(i=0;i<n;++i)
1094 		if(ajIntGet(arr,r[i]))
1095 		    break;
1096 
1097 	    if(i==n)
1098 	    {
1099 		ajStrDel(&t);
1100 		ajStrDel(&orc);
1101 		ajIntDel(&arr);
1102 		return ajFalse;
1103 	    }
1104 
1105 	    continue;
1106 	}
1107 
1108 	i = qpos;
1109 	while((c=p[i])>='0' && c<='9')
1110 	    ++i;
1111 
1112 	if(i==qpos || p[i]!='[')
1113 	    ajFatal("Bad integer [%s]",p);
1114 
1115 	if(sscanf(p+qpos,"%d",&v)!=1)
1116 	    ajFatal("Bad integer [%s]",p);
1117 
1118 	qpos = --i;
1119 	ajStrSetClear(&orc);
1120 	n = emowse_get_orc(&orc,p,qpos);
1121 	r = ajStrGetPtr(orc);
1122 	qpos += (n+3);
1123 	w = 0;
1124 
1125 	for(i=0;i<n;++i)
1126 	    w += ajIntGet(arr,(ajint)r[i]);
1127 
1128 	if(w!=v)
1129 	{
1130 	    ajStrDel(&t);
1131 	    ajStrDel(&orc);
1132 	    ajIntDel(&arr);
1133 	    return ajFalse;
1134 	}
1135     }
1136 
1137 
1138     ajStrDel(&orc);
1139     ajStrDel(&t);
1140     ajIntDel(&arr);
1141 
1142     return ajTrue;
1143 }
1144 
1145 
1146 
1147 
1148 /* @funcstatic emowse_get_orc *************************************************
1149 **
1150 ** Undocumented.
1151 **
1152 ** @param [u] orc [AjPStr*] Undocumented
1153 ** @param [r] s [const char*] Undocumented
1154 ** @param [r] pos [ajint] Undocumented
1155 ** @return [ajint] Undocumented
1156 ** @@
1157 ******************************************************************************/
1158 
emowse_get_orc(AjPStr * orc,const char * s,ajint pos)1159 static ajint emowse_get_orc(AjPStr *orc, const char *s, ajint pos)
1160 {
1161     ajint i;
1162 
1163     i = 0;
1164     if(s[++pos]!='[')
1165 	ajFatal("Bad query given [%s]",s);
1166     ++pos;
1167 
1168     while(s[pos]!=']')
1169     {
1170 	if(!s[pos])
1171 	    ajFatal("Unterminated square brackets [%s]",s);
1172 	ajStrAppendK(orc,s[pos++]);
1173 	++i;
1174     }
1175 
1176     return i;
1177 }
1178 
1179 
1180 
1181 
1182 /* @funcstatic emowse_print_hits **********************************************
1183 **
1184 ** Undocumented.
1185 **
1186 ** @param [u] outf [AjPFile] Undocumented
1187 ** @param [u] hlist [AjPList] Undocumented
1188 ** @param [r] dno [ajint] Undocumented
1189 ** @param [r] data [EmbPMdata const *] Undocumented
1190 ** @@
1191 ******************************************************************************/
1192 
emowse_print_hits(AjPFile outf,AjPList hlist,ajint dno,EmbPMdata const * data)1193 static void emowse_print_hits(AjPFile outf, AjPList hlist, ajint dno,
1194 			      EmbPMdata const * data)
1195 {
1196     PHits hits   = NULL;
1197     AjIList iter = NULL;
1198     ajint c;
1199     ajint i;
1200     ajint j;
1201     ajint pvt;
1202     double conf;
1203     AjBool partial;
1204     AjPFloat nmarray = NULL;
1205     ajint nmn;
1206     ajint len;
1207     ajint v;
1208     AjPStr substr = NULL;
1209 
1210 
1211     nmarray = ajFloatNew();
1212     substr  = ajStrNew();
1213 
1214     ajListReverse(hlist);
1215 
1216 
1217     ajFmtPrintF(outf,"\nUsing data fragments of:\n");
1218 
1219     for(i=0;i<dno;++i)
1220 	ajFmtPrintF(outf,"         %7.1f  %S\n",data[i]->mwt,data[i]->sdata);
1221 
1222     ajFmtPrintF(outf,"\n");
1223 
1224     iter = ajListIterNewread(hlist);
1225     c    = 0;
1226     while(!ajListIterDone(iter))
1227     {
1228 	hits = (PHits) ajListIterGet(iter);
1229 	ajFmtPrintF(outf,"%-3d %-13S%-62.62S\n",++c,hits->name,hits->desc);
1230     }
1231     ajListIterDel(&iter);
1232 
1233 
1234     iter = ajListIterNewread(hlist);
1235     c    = 0;
1236     while(!ajListIterDone(iter))
1237     {
1238 	hits = (PHits) ajListIterGet(iter);
1239 
1240 	for(i=pvt=0;i<dno;++i)
1241 	    if(ajIntGet(hits->found,i) > -1)
1242 		++pvt;
1243 	conf = (double)pvt / (double)dno;
1244 
1245 
1246 	ajFmtPrintF(outf,"\n    %-3d: %-12S   %.3e %-8.1f   %-6.3f\n",++c,
1247 		    hits->name,hits->score,hits->mwt,conf);
1248 	ajFmtPrintF(outf,"         %S\n",hits->desc);
1249 	ajFmtPrintF(outf,"         Mw     Start  End    Seq\n");
1250 
1251 	for(i=nmn=0;i<dno;++i)
1252 	{
1253 	    partial = (ajIntGet(hits->found,i)>1000000) ? ajTrue : ajFalse;
1254 
1255 	    if((v=ajIntGet(hits->found,i))>-1)
1256 	    {
1257 		if(v>=(ajint)MILLION)
1258 		    v -= (ajint)MILLION;
1259 
1260 		ajStrAssignSubC(&substr,ajStrGetPtr(hits->seq),
1261 			     hits->frags[v]->begin-1,
1262 			     hits->frags[v]->end-1);
1263 
1264 		len = ajStrGetLen(substr);
1265 		ajFmtPrintF(outf,"        ");
1266 
1267 		if(partial)
1268 		    ajFmtPrintF(outf,"*");
1269 		else
1270 		    ajFmtPrintF(outf," ");
1271 
1272 		if(hits->frags[v]->mwt > MILLION)
1273 		    hits->frags[v]->mwt -= MILLION;
1274 
1275 		ajFmtPrintF(outf,"%-6.1f %-6d %-6d %-45.45S",
1276 			    hits->frags[v]->mwt,hits->frags[v]->begin,
1277 			    hits->frags[v]->end,substr);
1278 		if(len>45)
1279 		    ajFmtPrintF(outf,"...");
1280 		ajFmtPrintF(outf,"\n");
1281 	    }
1282 	    else
1283 		ajFloatPut(&nmarray,nmn++,(float)data[i]->mwt);
1284 	}
1285 	if(nmn)
1286 	{
1287 	    ajFmtPrintF(outf,"         No Match      ");
1288 	    for(i=0,j=0;i<nmn;++i,++j)
1289 	    {
1290 		ajFmtPrintF(outf,"%-6.1f ",ajFloatGet(nmarray,i));
1291 
1292 		if(j==6 && i!=nmn-1)
1293 		{
1294 		    ajFmtPrintF(outf,"\n                       ");
1295 		    j = 0;
1296 		}
1297 	    }
1298 	    ajFmtPrintF(outf,"\n");
1299 	}
1300 
1301 	ajStrDel(&hits->name);
1302 	ajStrDel(&hits->seq);
1303 	ajStrDel(&hits->desc);
1304 	ajIntDel(&hits->found);
1305 	len = hits->nf;
1306 
1307 	for(i=0;i<len;++i)
1308 	    AJFREE(hits->frags[i]);
1309 	AJFREE(hits->frags);
1310 	AJFREE(hits);
1311     }
1312 
1313     ajListIterDel(&iter);
1314     ajStrDel(&substr);
1315     ajFloatDel(&nmarray);
1316 
1317 
1318     return;
1319 }
1320