1 /* @source helixturnhelix application
2 **
3 ** Reports nucleic acid binding domains
4 ** @author Copyright (C) Alan Bleasby (ableasby@hgmp.mrc.ac.uk)
5 ** @@
6 **
7 ** Original program "HELIXTURNHELIX" by Peter Rice (EGCG 1990)
8 ** This program uses the method of Dodd and Egan (1987) J. Mol. Biol.
9 ** 194:557-564 to determine the significance of possible helix-turn-helix
10 ** matches in protein sequences
11 **
12 ** This program is free software; you can redistribute it and/or
13 ** modify it under the terms of the GNU General Public License
14 ** as published by the Free Software Foundation; either version 2
15 ** of the License, or (at your option) any later version.
16 **
17 ** This program is distributed in the hope that it will be useful,
18 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
19 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 ** GNU General Public License for more details.
21 **
22 ** You should have received a copy of the GNU General Public License
23 ** along with this program; if not, write to the Free Software
24 ** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
25 ******************************************************************************/
26
27 #include "emboss.h"
28 #include <math.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include <ctype.h>
32
33
34
35
36 #define HTHFILE "Ehth.dat"
37 #define HTH87FILE "Ehth87.dat"
38
39
40
41
42 typedef struct DNAB DNAB;
43 struct DNAB
44 {
45 AjPStr name;
46 AjPStr seq;
47 ajint pos;
48 float sd;
49 ajint wt;
50 char Padding[4];
51 };
52
53
54
55 static ajint helixturnhelix_readNab(AjPInt2d *matrix,AjBool eightyseven);
56 static void helixturnhelix_print_hits(AjPList ajb,
57 ajint n, ajint lastcol,
58 AjBool eightyseven, AjPFile outf);
59 static void helixturnhelix_report_hits(AjPList ajb, ajint lastcol,
60 AjPFeattable TabRpt);
61
62
63
64
65 /* @prog helixturnhelix *******************************************************
66 **
67 ** Report nucleic acid binding motifs
68 **
69 ******************************************************************************/
70
main(int argc,char ** argv)71 int main(int argc, char **argv)
72 {
73 AjPSeqall seqall;
74 AjPSeq seq = NULL;
75 AjPFile outf = NULL;
76 AjPReport report = NULL;
77 AjPList ajb = NULL;
78 AjPStr strand = NULL;
79 AjPStr substr = NULL;
80 AjBool eightyseven = ajFalse;
81 float mean;
82 float sd;
83 float minsd;
84 static DNAB *lp;
85
86 AjPInt2d matrix = NULL;
87 AjPStr tmpStr = NULL;
88 AjPFeattable TabRpt = NULL;
89
90 ajint begin;
91 ajint end;
92 ajint len;
93
94 const char *p;
95 char *q;
96
97 ajint i;
98 ajint j;
99 ajint cols;
100 ajint lastcol;
101
102 ajint n = 0;
103
104 ajint sp;
105 ajint se;
106 ajint weight;
107
108 float minscore;
109 float thissd;
110
111 embInit("helixturnhelix",argc,argv);
112
113 seqall = ajAcdGetSeqall("sequence");
114 report = ajAcdGetReport("outfile");
115 mean = ajAcdGetFloat("mean");
116 sd = ajAcdGetFloat("sdvalue");
117 minsd = ajAcdGetFloat("minsd");
118
119 /* obsolete. Can be uncommented in acd file and here to reuse */
120
121 /* outf = ajAcdGetOutfile("originalfile"); */
122
123 substr = ajStrNew();
124 matrix = ajInt2dNew();
125
126 eightyseven = ajAcdGetBoolean("eightyseven");
127
128 cols = helixturnhelix_readNab(&matrix,eightyseven);
129 ajDebug("cols = %d\n",cols);
130
131 lastcol = cols-3;
132
133 minscore = mean + (minsd*sd);
134
135 ajb=ajListNew();
136
137 ajFmtPrintAppS(&tmpStr,"Hits above +%.2f SD (%.2f)", minsd, minscore);
138 ajReportSetHeaderS(report, tmpStr);
139
140 while(ajSeqallNext(seqall, &seq))
141 {
142 n = 0;
143 begin = ajSeqallGetseqBegin(seqall);
144 end = ajSeqallGetseqEnd(seqall);
145
146
147 strand = ajSeqGetSeqCopyS(seq);
148 ajStrFmtUpper(&strand);
149
150 ajStrAssignSubC(&substr,ajStrGetPtr(strand),begin-1,end-1);
151 len = ajStrGetLen(substr);
152
153 TabRpt = ajFeattableNewSeq(seq);
154
155 q = ajStrGetuniquePtr(&substr);
156 for(i=0;i<len;++i,++q)
157 *q = (char) ajBasecodeToInt(*q);
158
159 p = ajStrGetPtr(substr);
160
161 se = (len-lastcol)+1;
162 for(i=0;i<se;++i)
163 {
164 weight = 0;
165 for(j=0;j<lastcol;++j)
166 weight+=ajInt2dGet(matrix,(ajint)*(p+i+j),j);
167 thissd=((float)weight-mean)/sd;
168 if(thissd>minsd)
169 {
170 AJNEW(lp);
171 lp->name = ajStrNewC(ajSeqGetNameC(seq));
172 lp->seq = ajStrNew();
173 sp = begin - 1 + i;
174 lp->pos = sp+1;
175 ajStrAssignSubC(&lp->seq,ajStrGetPtr(strand),sp,sp+lastcol-1);
176 lp->sd = thissd;
177 lp->wt = weight;
178 ajListPush(ajb,(void *)lp);
179 ++n;
180 }
181 }
182 helixturnhelix_report_hits(ajb, lastcol, TabRpt);
183
184 ajReportWrite(report, TabRpt, seq);
185 ajFeattableDel(&TabRpt);
186 ajStrDel(&strand);
187 }
188 ajReportSetSeqstats(report, seqall);
189
190 if(!n)
191 {
192 if(outf)
193 ajFmtPrintF(outf,"\nNo hits above +%.2f SD (%.2f)\n",
194 minsd,minscore);
195 }
196 else
197 if(outf)
198 {
199 ajFmtPrintF(outf, "\nHELIXTURNHELIX: Nucleic Acid Binding "
200 "Domain search\n\n");
201 ajFmtPrintF(outf,"\nHits above +%.2f SD (%.2f)\n",minsd,minscore);
202 helixturnhelix_print_hits(ajb, n, lastcol,
203 eightyseven, outf);
204 }
205
206 ajInt2dDel(&matrix);
207
208 ajSeqDel(&seq);
209 ajStrDel(&substr);
210 ajListFree(&ajb);
211
212 if(outf)
213 ajFileClose(&outf);
214
215 ajReportClose(report);
216
217 ajReportDel(&report);
218 ajSeqallDel(&seqall);
219 ajFeattableDel(&TabRpt);
220 ajStrDel(&strand);
221 ajStrDel(&tmpStr);
222
223 embExit();
224
225 return 0;
226 }
227
228
229
230
231 /* @funcstatic helixturnhelix_readNab *****************************************
232 **
233 ** Undocumented.
234 **
235 ** @param [w] matrix [AjPInt2d*] Undocumented
236 ** @param [r] eightyseven [AjBool] Undocumented
237 ** @return [ajint] Undocumented
238 ** @@
239 ******************************************************************************/
240
241
helixturnhelix_readNab(AjPInt2d * matrix,AjBool eightyseven)242 static ajint helixturnhelix_readNab(AjPInt2d *matrix,AjBool eightyseven)
243 {
244 AjPFile mfptr = NULL;
245 AjPStr line = NULL;
246 AjPStr delim = NULL;
247 AjBool pass;
248
249 const char *p;
250 const char *q;
251
252 ajint xcols = 0;
253 ajint cols = 0;
254
255 float sample;
256 float expected;
257 float pee;
258 float exptot;
259 ajint rt;
260
261 ajuint i;
262 ajuint j;
263 ajuint maxi;
264 ajuint maxj;
265 ajint c = 0;
266 ajint v;
267
268 ajuint d1;
269 ajuint d2;
270
271 ajint **mat;
272
273 if(eightyseven)
274 mfptr = ajDatafileNewInNameC(HTH87FILE);
275 else
276 mfptr = ajDatafileNewInNameC(HTHFILE);
277 if(!mfptr)
278 ajFatal("HTH file not found\n");
279
280 line = ajStrNew();
281 delim = ajStrNewC(" :\t\n");
282
283 pass = ajTrue;
284
285 while(ajReadline(mfptr, &line))
286 {
287 p = ajStrGetPtr(line);
288
289 if(*p=='#' || *p=='!' || *p=='\n')
290 continue;
291
292 if(ajStrPrefixC(line,"Sample:"))
293 {
294 if(sscanf(p,"%*s%f",&sample)!=1)
295 ajFatal("No sample size given");
296 continue;
297 }
298
299 while((*p!='\n') && (*p<'A' || *p>'Z'))
300 ++p;
301
302 cols = ajStrParseCountC(line,ajStrGetPtr(delim));
303
304 if(pass)
305 {
306 pass = ajFalse;
307 xcols = cols;
308 }
309 else
310 if(xcols!=cols)
311 ajFatal("Assymetric table");
312
313 d1 = ajBasecodeToInt((char)toupper((ajint)*p));
314
315 q = ajStrGetPtr(line);
316 c = 0;
317 q = ajSysFuncStrtok(q,ajStrGetPtr(delim));
318
319 while((q=ajSysFuncStrtok(NULL,ajStrGetPtr(delim))))
320 {
321 sscanf(q,"%d",&v);
322 ajInt2dPut(matrix,d1,c++,v);
323 }
324
325 if(c>1)
326 maxi = c-2;
327 else
328 maxi=0;
329 for(i=0,rt=0;i<maxi;++i)
330 rt += ajInt2dGet(*matrix,d1,i);
331
332 if(rt!=ajInt2dGet(*matrix,d1,c-2))
333 ajFatal("Row didn't match total");
334 }
335
336
337 mat = ajInt2dInt(*matrix);
338 ajInt2dLen(*matrix,&d1,&d2);
339
340
341 for(j=0;j<d2-2;++j)
342 {
343 rt = 0;
344
345 for(i=0;i<d1;++i)
346 {
347 if(!mat[i][d2-1])
348 continue;
349 rt += mat[i][j];
350 }
351
352 if(rt!=(ajint)sample)
353 ajFatal("Column doesn't match sample size");
354 }
355
356 exptot = 0.0;
357 for(i=0;i<d1;++i)
358 {
359 if(!mat[i][d2-1])
360 continue;
361
362 expected = (float)mat[i][c-1];
363 expected = (float) ((double)expected * 0.0001);
364 exptot += expected;
365
366 if(c>1)
367 maxj = c-2;
368 else
369 maxj=0;
370 for(j=0;j<maxj;++j)
371 {
372 if(!mat[i][j])
373 pee=((float)1.0<(float)1.0/((sample+(float)1.0)*expected)) ?
374 (float)1.0 : (float)1.0/((sample+(float)1.0)*expected);
375 else
376 pee = ((float)mat[i][j])/(sample*expected);
377 mat[i][j]=(ajint)((double)100.0*log(pee));
378 }
379 }
380
381 if((float)fabs((double)(1.0-exptot)) > 0.05)
382 ajFatal("Expected column total != 1.0");
383
384 for(i=0;i<d1;++i)
385 for(j=0;j<d2;++j)
386 ajInt2dPut(matrix,i,j,mat[i][j]);
387
388 for(i=0;i<d1;++i)
389 AJFREE(mat[i]);
390 AJFREE(mat);
391
392 ajStrDel(&line);
393 ajStrDel(&delim);
394 ajFileClose(&mfptr);
395
396 return cols;
397 }
398
399
400
401
402 /* @funcstatic helixturnhelix_print_hits **************************************
403 **
404 ** Undocumented.
405 **
406 ** @param [u] ajb [AjPList] Undocumented
407 ** @param [r] n [ajint] Undocumented
408 ** @param [r] lastcol [ajint] Undocumented
409 ** @param [r] eightyseven [AjBool] Undocumented
410 ** @param [u] outf [AjPFile] Undocumented
411 ** @@
412 ******************************************************************************/
413
414
helixturnhelix_print_hits(AjPList ajb,ajint n,ajint lastcol,AjBool eightyseven,AjPFile outf)415 static void helixturnhelix_print_hits(AjPList ajb, ajint n,
416 ajint lastcol,
417 AjBool eightyseven, AjPFile outf)
418 {
419 DNAB **lp;
420
421 AjPUint hp = NULL;
422 AjPFloat hsd = NULL;
423
424 ajint i;
425
426 AJCNEW(lp, n);
427
428 hp = ajUintNew();
429 hsd = ajFloatNew();
430
431 for(i=0;i<n;++i)
432 {
433 if(!ajListPop(ajb,(void **)&lp[i]))
434 ajFatal("List ended prematurely");
435 ajUintPut(&hp,i,i);
436 ajFloatPut(&hsd,i,lp[i]->sd);
437 }
438 ajSortFloatDecI(ajFloatFloat(hsd),ajUintUint(hp),n);
439 ajFloatDel(&hsd);
440
441 for(i=0;i<n;++i)
442 {
443 ajFmtPrintF(outf,"\nScore %d (+%.2f SD) in %s at residue %d\n",
444 lp[ajUintGet(hp,i)]->wt,lp[ajUintGet(hp,i)]->sd,
445 ajStrGetPtr(lp[ajUintGet(hp,i)]->name),
446 lp[ajUintGet(hp,i)]->pos);
447 ajFmtPrintF(outf,"\n Sequence: %s\n",
448 ajStrGetPtr(lp[ajUintGet(hp,i)]->seq));
449
450 if(eightyseven)
451 {
452 ajFmtPrintF(outf," | |\n");
453 ajFmtPrintF(outf,"%13d %d\n",
454 lp[ajUintGet(hp,i)]->pos,
455 lp[ajUintGet(hp,i)]->pos+lastcol-1);
456 }
457 else
458 {
459 ajFmtPrintF(outf," | |\n");
460 ajFmtPrintF(outf,"%13d %d\n",
461 lp[ajUintGet(hp,i)]->pos,
462 lp[ajUintGet(hp,i)]->pos+lastcol-1);
463 }
464 }
465
466
467 for(i=0;i<n;++i)
468 {
469 ajStrDel(&lp[i]->name);
470 ajStrDel(&lp[i]->seq);
471 }
472 AJFREE(lp);
473 ajUintDel(&hp);
474
475 return;
476 }
477
478
479
480
481 /* @funcstatic helixturnhelix_report_hits *************************************
482 **
483 ** Undocumented.
484 **
485 ** @param [u] ajb [AjPList] List of hits - which are deleted at the end
486 ** @param [r] lastcol [ajint] Undocumented
487 ** @param [u] TabRpt [AjPFeattable] Undocumented
488 ** @return [void]
489 ** @@
490 ******************************************************************************/
491
helixturnhelix_report_hits(AjPList ajb,ajint lastcol,AjPFeattable TabRpt)492 static void helixturnhelix_report_hits(AjPList ajb,
493 ajint lastcol, AjPFeattable TabRpt)
494 {
495 DNAB **lp = NULL;
496
497 AjPUint hp = NULL;
498 AjPFloat hsd = NULL;
499
500 ajint n;
501 ajint i;
502 AjPFeature gf = NULL;
503
504 AjPStr tmpStr = NULL;
505 AjPStr fthit = NULL;
506 struct DNAB *dnab;
507
508 if(!fthit)
509 ajStrAssignC(&fthit, "SO:0001081");
510
511 hp = ajUintNew();
512 hsd = ajFloatNew();
513
514 n = (ajuint) ajListToarray(ajb, (void***) &lp);
515
516 if(!n)
517 {
518 ajUintDel(&hp);
519 ajFloatDel(&hsd);
520 return;
521 }
522
523 for(i=0;i<n;++i)
524 {
525 ajUintPut(&hp,i,i);
526 ajFloatPut(&hsd,i,lp[i]->sd);
527 }
528 ajSortFloatDecI(ajFloatFloat(hsd),ajUintUint(hp),n);
529 ajFloatDel(&hsd);
530
531 for(i=0;i<n;++i)
532 {
533 gf = ajFeatNewProt(TabRpt, NULL, fthit,
534 lp[ajUintGet(hp,i)]->pos,
535 lp[ajUintGet(hp,i)]->pos+lastcol-1,
536 (float) lp[ajUintGet(hp,i)]->wt);
537 ajFmtPrintS(&tmpStr, "*pos %d", lp[ajUintGet(hp,i)]->pos);
538 ajFeatTagAddSS(gf, NULL, tmpStr);
539 ajFmtPrintS(&tmpStr, "*sd %.2f", lp[ajUintGet(hp,i)]->sd);
540 ajFeatTagAddSS(gf, NULL, tmpStr);
541
542 }
543
544 while(ajListPop(ajb,(void **)&dnab))
545 {
546 ajStrDel(&dnab->name);
547 ajStrDel(&dnab->seq);
548 AJFREE(dnab);
549 }
550
551
552 AJFREE(lp);
553 ajUintDel(&hp);
554
555 ajStrDel(&fthit);
556 ajStrDel(&tmpStr);
557
558 return;
559 }
560