1 /* @source helixturnhelix application
2 **
3 ** Reports nucleic acid binding domains
4 ** @author Copyright (C) Alan Bleasby (ableasby@hgmp.mrc.ac.uk)
5 ** @@
6 **
7 ** Original program "HELIXTURNHELIX" by Peter Rice (EGCG 1990)
8 ** This program uses the method of Dodd and Egan (1987) J. Mol. Biol.
9 ** 194:557-564 to determine the significance of possible helix-turn-helix
10 ** matches in protein sequences
11 **
12 ** This program is free software; you can redistribute it and/or
13 ** modify it under the terms of the GNU General Public License
14 ** as published by the Free Software Foundation; either version 2
15 ** of the License, or (at your option) any later version.
16 **
17 ** This program is distributed in the hope that it will be useful,
18 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
19 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20 ** GNU General Public License for more details.
21 **
22 ** You should have received a copy of the GNU General Public License
23 ** along with this program; if not, write to the Free Software
24 ** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
25 ******************************************************************************/
26 
27 #include "emboss.h"
28 #include <math.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include <ctype.h>
32 
33 
34 
35 
36 #define HTHFILE "Ehth.dat"
37 #define HTH87FILE "Ehth87.dat"
38 
39 
40 
41 
42 typedef struct DNAB DNAB;
43 struct DNAB
44 {
45     AjPStr name;
46     AjPStr seq;
47     ajint pos;
48     float sd;
49     ajint wt;
50     char Padding[4];
51 };
52 
53 
54 
55 static ajint helixturnhelix_readNab(AjPInt2d *matrix,AjBool eightyseven);
56 static void helixturnhelix_print_hits(AjPList ajb,
57 				      ajint n, ajint lastcol,
58 				      AjBool eightyseven, AjPFile outf);
59 static void helixturnhelix_report_hits(AjPList ajb, ajint lastcol,
60 				       AjPFeattable TabRpt);
61 
62 
63 
64 
65 /* @prog helixturnhelix *******************************************************
66 **
67 ** Report nucleic acid binding motifs
68 **
69 ******************************************************************************/
70 
main(int argc,char ** argv)71 int main(int argc, char **argv)
72 {
73     AjPSeqall seqall;
74     AjPSeq    seq  = NULL;
75     AjPFile   outf = NULL;
76     AjPReport report = NULL;
77     AjPList   ajb = NULL;
78     AjPStr    strand = NULL;
79     AjPStr    substr = NULL;
80     AjBool    eightyseven = ajFalse;
81     float     mean;
82     float     sd;
83     float     minsd;
84     static DNAB *lp;
85 
86     AjPInt2d matrix = NULL;
87     AjPStr tmpStr   = NULL;
88     AjPFeattable TabRpt = NULL;
89 
90     ajint begin;
91     ajint end;
92     ajint len;
93 
94     const char *p;
95     char *q;
96 
97     ajint i;
98     ajint j;
99     ajint cols;
100     ajint lastcol;
101 
102     ajint n = 0;
103 
104     ajint sp;
105     ajint se;
106     ajint weight;
107 
108     float minscore;
109     float thissd;
110 
111     embInit("helixturnhelix",argc,argv);
112 
113     seqall = ajAcdGetSeqall("sequence");
114     report = ajAcdGetReport("outfile");
115     mean   = ajAcdGetFloat("mean");
116     sd     = ajAcdGetFloat("sdvalue");
117     minsd  = ajAcdGetFloat("minsd");
118 
119     /* obsolete. Can be uncommented in acd file and here to reuse */
120 
121     /* outf      = ajAcdGetOutfile("originalfile"); */
122 
123     substr = ajStrNew();
124     matrix = ajInt2dNew();
125 
126     eightyseven = ajAcdGetBoolean("eightyseven");
127 
128     cols = helixturnhelix_readNab(&matrix,eightyseven);
129     ajDebug("cols = %d\n",cols);
130 
131     lastcol = cols-3;
132 
133     minscore = mean + (minsd*sd);
134 
135     ajb=ajListNew();
136 
137     ajFmtPrintAppS(&tmpStr,"Hits above +%.2f SD (%.2f)", minsd, minscore);
138     ajReportSetHeaderS(report, tmpStr);
139 
140     while(ajSeqallNext(seqall, &seq))
141     {
142 	n = 0;
143 	begin = ajSeqallGetseqBegin(seqall);
144 	end   = ajSeqallGetseqEnd(seqall);
145 
146 
147 	strand = ajSeqGetSeqCopyS(seq);
148 	ajStrFmtUpper(&strand);
149 
150 	ajStrAssignSubC(&substr,ajStrGetPtr(strand),begin-1,end-1);
151 	len = ajStrGetLen(substr);
152 
153 	TabRpt = ajFeattableNewSeq(seq);
154 
155 	q = ajStrGetuniquePtr(&substr);
156 	for(i=0;i<len;++i,++q)
157 	    *q = (char) ajBasecodeToInt(*q);
158 
159 	p = ajStrGetPtr(substr);
160 
161 	se = (len-lastcol)+1;
162 	for(i=0;i<se;++i)
163 	{
164 	    weight = 0;
165 	    for(j=0;j<lastcol;++j)
166 		weight+=ajInt2dGet(matrix,(ajint)*(p+i+j),j);
167 	    thissd=((float)weight-mean)/sd;
168 	    if(thissd>minsd)
169 	    {
170 		AJNEW(lp);
171 		lp->name = ajStrNewC(ajSeqGetNameC(seq));
172 		lp->seq  = ajStrNew();
173 		sp = begin - 1 + i;
174 		lp->pos = sp+1;
175 		ajStrAssignSubC(&lp->seq,ajStrGetPtr(strand),sp,sp+lastcol-1);
176 		lp->sd = thissd;
177 		lp->wt = weight;
178 		ajListPush(ajb,(void *)lp);
179 		++n;
180 	    }
181 	}
182 	helixturnhelix_report_hits(ajb, lastcol, TabRpt);
183 
184 	ajReportWrite(report, TabRpt, seq);
185 	ajFeattableDel(&TabRpt);
186 	ajStrDel(&strand);
187     }
188     ajReportSetSeqstats(report, seqall);
189 
190     if(!n)
191     {
192 	if(outf)
193 	    ajFmtPrintF(outf,"\nNo hits above +%.2f SD (%.2f)\n",
194 			minsd,minscore);
195     }
196     else
197         if(outf)
198 	{
199 	    ajFmtPrintF(outf, "\nHELIXTURNHELIX: Nucleic Acid Binding "
200 			"Domain search\n\n");
201 	    ajFmtPrintF(outf,"\nHits above +%.2f SD (%.2f)\n",minsd,minscore);
202 	    helixturnhelix_print_hits(ajb, n, lastcol,
203 				      eightyseven, outf);
204 	}
205 
206     ajInt2dDel(&matrix);
207 
208     ajSeqDel(&seq);
209     ajStrDel(&substr);
210     ajListFree(&ajb);
211 
212     if(outf)
213 	ajFileClose(&outf);
214 
215     ajReportClose(report);
216 
217     ajReportDel(&report);
218     ajSeqallDel(&seqall);
219     ajFeattableDel(&TabRpt);
220     ajStrDel(&strand);
221     ajStrDel(&tmpStr);
222 
223     embExit();
224 
225     return 0;
226 }
227 
228 
229 
230 
231 /* @funcstatic helixturnhelix_readNab *****************************************
232 **
233 ** Undocumented.
234 **
235 ** @param [w] matrix [AjPInt2d*] Undocumented
236 ** @param [r] eightyseven [AjBool] Undocumented
237 ** @return [ajint] Undocumented
238 ** @@
239 ******************************************************************************/
240 
241 
helixturnhelix_readNab(AjPInt2d * matrix,AjBool eightyseven)242 static ajint helixturnhelix_readNab(AjPInt2d *matrix,AjBool eightyseven)
243 {
244     AjPFile mfptr = NULL;
245     AjPStr  line  = NULL;
246     AjPStr  delim = NULL;
247     AjBool  pass;
248 
249     const char *p;
250     const char *q;
251 
252     ajint xcols = 0;
253     ajint cols  = 0;
254 
255     float sample;
256     float expected;
257     float pee;
258     float exptot;
259     ajint rt;
260 
261     ajuint i;
262     ajuint j;
263     ajuint maxi;
264     ajuint maxj;
265     ajint c = 0;
266     ajint v;
267 
268     ajuint d1;
269     ajuint d2;
270 
271     ajint **mat;
272 
273     if(eightyseven)
274 	mfptr = ajDatafileNewInNameC(HTH87FILE);
275     else
276 	mfptr = ajDatafileNewInNameC(HTHFILE);
277     if(!mfptr)
278 	ajFatal("HTH file not found\n");
279 
280     line  = ajStrNew();
281     delim = ajStrNewC(" :\t\n");
282 
283     pass = ajTrue;
284 
285     while(ajReadline(mfptr, &line))
286     {
287 	p = ajStrGetPtr(line);
288 
289 	if(*p=='#' || *p=='!' || *p=='\n')
290 	    continue;
291 
292 	if(ajStrPrefixC(line,"Sample:"))
293 	{
294 	    if(sscanf(p,"%*s%f",&sample)!=1)
295 		ajFatal("No sample size given");
296 	    continue;
297 	}
298 
299 	while((*p!='\n') && (*p<'A' || *p>'Z'))
300 	    ++p;
301 
302 	cols = ajStrParseCountC(line,ajStrGetPtr(delim));
303 
304 	if(pass)
305 	{
306 	    pass  = ajFalse;
307 	    xcols = cols;
308 	}
309 	else
310 	    if(xcols!=cols)
311 		ajFatal("Assymetric table");
312 
313 	d1 = ajBasecodeToInt((char)toupper((ajint)*p));
314 
315 	q = ajStrGetPtr(line);
316 	c = 0;
317 	q = ajSysFuncStrtok(q,ajStrGetPtr(delim));
318 
319 	while((q=ajSysFuncStrtok(NULL,ajStrGetPtr(delim))))
320 	{
321 	    sscanf(q,"%d",&v);
322 	    ajInt2dPut(matrix,d1,c++,v);
323 	}
324 
325 	if(c>1)
326 	    maxi = c-2;
327 	else
328 	    maxi=0;
329 	for(i=0,rt=0;i<maxi;++i)
330 	    rt += ajInt2dGet(*matrix,d1,i);
331 
332 	if(rt!=ajInt2dGet(*matrix,d1,c-2))
333 	    ajFatal("Row didn't match total");
334     }
335 
336 
337     mat = ajInt2dInt(*matrix);
338     ajInt2dLen(*matrix,&d1,&d2);
339 
340 
341     for(j=0;j<d2-2;++j)
342     {
343 	rt = 0;
344 
345 	for(i=0;i<d1;++i)
346 	{
347 	    if(!mat[i][d2-1])
348 		continue;
349 	    rt += mat[i][j];
350 	}
351 
352 	if(rt!=(ajint)sample)
353 	    ajFatal("Column doesn't match sample size");
354     }
355 
356     exptot = 0.0;
357     for(i=0;i<d1;++i)
358     {
359 	if(!mat[i][d2-1])
360 	    continue;
361 
362 	expected = (float)mat[i][c-1];
363 	expected = (float) ((double)expected * 0.0001);
364 	exptot += expected;
365 
366 	if(c>1)
367 	    maxj = c-2;
368 	else
369 	    maxj=0;
370 	for(j=0;j<maxj;++j)
371 	{
372 	    if(!mat[i][j])
373 		pee=((float)1.0<(float)1.0/((sample+(float)1.0)*expected)) ?
374 		    (float)1.0 : (float)1.0/((sample+(float)1.0)*expected);
375 	    else
376 		pee = ((float)mat[i][j])/(sample*expected);
377 	    mat[i][j]=(ajint)((double)100.0*log(pee));
378 	}
379     }
380 
381     if((float)fabs((double)(1.0-exptot)) > 0.05)
382 	ajFatal("Expected column total != 1.0");
383 
384     for(i=0;i<d1;++i)
385 	for(j=0;j<d2;++j)
386 	    ajInt2dPut(matrix,i,j,mat[i][j]);
387 
388     for(i=0;i<d1;++i)
389 	AJFREE(mat[i]);
390     AJFREE(mat);
391 
392     ajStrDel(&line);
393     ajStrDel(&delim);
394     ajFileClose(&mfptr);
395 
396     return cols;
397 }
398 
399 
400 
401 
402 /* @funcstatic helixturnhelix_print_hits **************************************
403 **
404 ** Undocumented.
405 **
406 ** @param [u] ajb [AjPList] Undocumented
407 ** @param [r] n [ajint] Undocumented
408 ** @param [r] lastcol [ajint] Undocumented
409 ** @param [r] eightyseven [AjBool] Undocumented
410 ** @param [u] outf [AjPFile] Undocumented
411 ** @@
412 ******************************************************************************/
413 
414 
helixturnhelix_print_hits(AjPList ajb,ajint n,ajint lastcol,AjBool eightyseven,AjPFile outf)415 static void helixturnhelix_print_hits(AjPList ajb, ajint n,
416 				      ajint lastcol,
417 				      AjBool eightyseven, AjPFile outf)
418 {
419     DNAB     **lp;
420 
421     AjPUint hp    = NULL;
422     AjPFloat hsd = NULL;
423 
424     ajint   i;
425 
426     AJCNEW(lp, n);
427 
428     hp  = ajUintNew();
429     hsd = ajFloatNew();
430 
431     for(i=0;i<n;++i)
432     {
433 	if(!ajListPop(ajb,(void **)&lp[i]))
434 	    ajFatal("List ended prematurely");
435 	ajUintPut(&hp,i,i);
436 	ajFloatPut(&hsd,i,lp[i]->sd);
437     }
438     ajSortFloatDecI(ajFloatFloat(hsd),ajUintUint(hp),n);
439     ajFloatDel(&hsd);
440 
441     for(i=0;i<n;++i)
442     {
443 	ajFmtPrintF(outf,"\nScore %d (+%.2f SD) in %s at residue %d\n",
444 		   lp[ajUintGet(hp,i)]->wt,lp[ajUintGet(hp,i)]->sd,
445 		    ajStrGetPtr(lp[ajUintGet(hp,i)]->name),
446 		   lp[ajUintGet(hp,i)]->pos);
447 	ajFmtPrintF(outf,"\n Sequence:  %s\n",
448 		    ajStrGetPtr(lp[ajUintGet(hp,i)]->seq));
449 
450 	if(eightyseven)
451 	{
452 	    ajFmtPrintF(outf,"            |                  |\n");
453 	    ajFmtPrintF(outf,"%13d                  %d\n",
454 			lp[ajUintGet(hp,i)]->pos,
455 			lp[ajUintGet(hp,i)]->pos+lastcol-1);
456 	}
457 	else
458 	{
459 	    ajFmtPrintF(outf,"            |                    |\n");
460 	    ajFmtPrintF(outf,"%13d                    %d\n",
461 			lp[ajUintGet(hp,i)]->pos,
462 			lp[ajUintGet(hp,i)]->pos+lastcol-1);
463 	}
464     }
465 
466 
467     for(i=0;i<n;++i)
468     {
469 	ajStrDel(&lp[i]->name);
470 	ajStrDel(&lp[i]->seq);
471     }
472     AJFREE(lp);
473     ajUintDel(&hp);
474 
475     return;
476 }
477 
478 
479 
480 
481 /* @funcstatic helixturnhelix_report_hits *************************************
482 **
483 ** Undocumented.
484 **
485 ** @param [u] ajb [AjPList] List of hits - which are deleted at the end
486 ** @param [r] lastcol [ajint] Undocumented
487 ** @param [u] TabRpt [AjPFeattable] Undocumented
488 ** @return [void]
489 ** @@
490 ******************************************************************************/
491 
helixturnhelix_report_hits(AjPList ajb,ajint lastcol,AjPFeattable TabRpt)492 static void helixturnhelix_report_hits(AjPList ajb,
493 				       ajint lastcol, AjPFeattable TabRpt)
494 {
495     DNAB     **lp = NULL;
496 
497     AjPUint   hp  = NULL;
498     AjPFloat hsd = NULL;
499 
500     ajint n;
501     ajint i;
502     AjPFeature gf = NULL;
503 
504     AjPStr tmpStr = NULL;
505     AjPStr fthit = NULL;
506     struct DNAB *dnab;
507 
508     if(!fthit)
509 	ajStrAssignC(&fthit, "SO:0001081");
510 
511     hp  = ajUintNew();
512     hsd = ajFloatNew();
513 
514     n = (ajuint) ajListToarray(ajb, (void***) &lp);
515 
516     if(!n)
517     {
518 	ajUintDel(&hp);
519 	ajFloatDel(&hsd);
520 	return;
521     }
522 
523     for(i=0;i<n;++i)
524     {
525 	ajUintPut(&hp,i,i);
526 	ajFloatPut(&hsd,i,lp[i]->sd);
527     }
528     ajSortFloatDecI(ajFloatFloat(hsd),ajUintUint(hp),n);
529     ajFloatDel(&hsd);
530 
531     for(i=0;i<n;++i)
532     {
533         gf = ajFeatNewProt(TabRpt, NULL, fthit,
534 			   lp[ajUintGet(hp,i)]->pos,
535 			   lp[ajUintGet(hp,i)]->pos+lastcol-1,
536 			   (float) lp[ajUintGet(hp,i)]->wt);
537 	ajFmtPrintS(&tmpStr, "*pos %d", lp[ajUintGet(hp,i)]->pos);
538 	ajFeatTagAddSS(gf, NULL, tmpStr);
539 	ajFmtPrintS(&tmpStr, "*sd %.2f", lp[ajUintGet(hp,i)]->sd);
540 	ajFeatTagAddSS(gf, NULL, tmpStr);
541 
542     }
543 
544     while(ajListPop(ajb,(void **)&dnab))
545     {
546 	ajStrDel(&dnab->name);
547 	ajStrDel(&dnab->seq);
548 	AJFREE(dnab);
549     }
550 
551 
552     AJFREE(lp);
553     ajUintDel(&hp);
554 
555     ajStrDel(&fthit);
556     ajStrDel(&tmpStr);
557 
558     return;
559 }
560