1 /* score_main.c
2 * Fri Feb 18 10:31:48 1994
3 *
4 * main() for scoring test sequences with a model.
5 * Also, can print out alignments of model to sequence so
6 * that the pairwise assignments can be seen.
7 */
8
9 #include <stdio.h>
10 #include <stdlib.h>
11 #include <string.h>
12 #include <time.h>
13 #include <math.h>
14
15 #ifdef NEED_GETOPTH
16 #include <getopt.h>
17 #endif
18
19 #include "structs.h"
20 #include "funcs.h"
21 #include "squid.h"
22 #include "version.h"
23
24 #ifdef MEMDEBUG
25 #include "dbmalloc.h"
26 #endif
27
28 #define OPTIONS "ag:ms"
29
30 static char usage[] = "\
31 Usage: coves [-options] <CM file> <seqfile>\n\
32 where options are:\n\
33 -a : show all pairs, not just Watson-Crick\n\
34 -g <gcfrac> : set expected background GC composition (default 0.5)\n\
35 -m : mountain representation of structural alignment\n\
36 -s : secondary structure string representation of \n\
37 structural alignment\n";
38
39 static char banner[] = "\
40 coves - scoring and structure prediction of RNA sequences\n\
41 using a covariance model";
42
43 int
main(int argc,char ** argv)44 main(int argc, char **argv)
45 {
46 char *seq; /* a sequence to score */
47 SQINFO sqinfo; /* info about seq */
48 char *seqfile; /* sequence file */
49 int fmt; /* format of sequence file */
50 SQFILE *dbfp; /* open sequence file for reading */
51 char *cmfile; /* file containing covariance model */
52 struct cm_s *cm; /* model */
53 double score; /* score of alignment */
54 struct trace_s *tr; /* traceback of alignment */
55 struct align_s *ali; /* alignment of seq to model */
56 char *aseq; /* "aligned" sequence string */
57 char *khstruct; /* secondary structure string */
58 char buffer[61]; /* output buffer for structures */
59 int len; /* length of aseq, khstruct */
60 int apos; /* position in aseq, khstruct */
61 double rfreq[ALPHASIZE]; /* expected background symbol frequencies */
62 struct istate_s *icm; /* integer log odds model */
63 int statenum; /* # of states in icm */
64
65 int do_khstructure; /* TRUE if we print a structure string */
66 int do_mountain; /* TRUE if we show a mountain structure */
67 int watsoncrick; /* TRUE if only canonical pairs are indicated */
68 double gcfrac; /* expected background GC fraction */
69
70 #ifdef MEMDEBUG
71 unsigned long histid1, histid2, orig_size, curr_size;
72 #endif
73
74 int optc;
75 extern char *optarg; /* for getopt() */
76 extern int optind; /* for getopt() */
77
78 /***********************************************
79 * Parse command line
80 ***********************************************/
81
82 do_khstructure = FALSE;
83 do_mountain = FALSE;
84 watsoncrick = TRUE;
85 gcfrac = 0.5;
86
87 while ((optc = getopt(argc, argv, OPTIONS)) != -1)
88 switch (optc) {
89
90 case 'a': watsoncrick = FALSE; break;
91 case 'g': gcfrac = (double) atof(optarg); break;
92 case 'm': do_mountain = TRUE; break;
93 case 's': do_khstructure = TRUE; break;
94
95 case 'h':
96 printf("%s\n version %s (%s)\n%s\n", banner, RELEASE, RELEASEDATE, usage);
97 exit(0);
98
99 default: Die("unrecognized option %c\n", optc);
100 }
101
102 if (argc - optind != 2)
103 Die("%s\n", usage);
104
105 cmfile = argv[argc-2];
106 seqfile = argv[argc-1];
107
108 if (! SeqfileFormat(seqfile, &fmt, NULL))
109 Die("Failed to determine format of sequence database %s", seqfile);
110
111 if ((dbfp = SeqfileOpen(seqfile, fmt, NULL)) == NULL)
112 Die("Failed to open sequence file %s for reading", seqfile);
113
114 if (! ReadCM(cmfile, &cm))
115 Die("Failed to read model from file %s", cmfile);
116
117 rfreq[1] = rfreq[2] = gcfrac / 2.;
118 rfreq[0] = rfreq[3] = (1. - gcfrac) / 2.;
119
120 if (! RearrangeCM(cm, rfreq, &icm, &statenum))
121 Die("failed to make integer log odds model");
122
123 /***********************************************
124 * Print banner
125 ***********************************************/
126
127 puts(banner);
128 printf(" version %s, %s\n\n", RELEASE, RELEASEDATE);
129
130 printf("---------------------------------------------------\n");
131 printf("Database to search/score: %s\n", seqfile);
132 printf("Model: %s\n", cmfile);
133 printf("GC%% of background model: %.0f%%\n", (gcfrac*100.));
134 printf("---------------------------------------------------\n");
135 puts("");
136
137 /***********************************************
138 * Score each sequence
139 ***********************************************/
140
141 #ifdef MEMDEBUG
142 orig_size = malloc_size(&histid1);
143 #endif
144
145 while (ReadSeq(dbfp, fmt, &seq, &sqinfo))
146 {
147 char *prepseq;
148 prepseq = Strdup(seq);
149 PrepareSequence(prepseq);
150
151 if (! ViterbiAlign(icm, statenum, prepseq, &score, &tr))
152 Die("ViterbiAlign() failed on sequence %s", sqinfo.name);
153 free(prepseq);
154
155 printf("%6.2f bits : %s\n", score, sqinfo.name);
156
157 if (do_khstructure || do_mountain)
158 {
159 if (! Trace2ali(seq, tr, watsoncrick, &ali)) Die("Trace2ali failed");
160
161 if (do_khstructure)
162 {
163 if (! Align2kh(ali, &aseq, &khstruct)) Die("Align2kh failed\n");
164
165 /* Print out the sequence and structure
166 */
167 len = strlen(aseq);
168 buffer[60] = '\0';
169 for (apos = 1; apos <= len; apos += 60)
170 {
171 strncpy(buffer, aseq + apos - 1, 60);
172 printf(" %10s %s\n", sqinfo.name, buffer);
173 strncpy(buffer, khstruct + apos - 1, 60);
174 printf(" %10s %s\n", sqinfo.name, buffer);
175 puts("");
176 }
177 free(aseq);
178 free(khstruct);
179 }
180
181 if (do_mountain)
182 {
183 PrintAliLandscape(stdout, cm, ali);
184 puts("");
185 }
186
187 Free_align(ali);
188 }
189
190 FreeSequence(seq, &sqinfo);
191
192 #ifdef MEMDEBUG
193 curr_size = malloc_size(&histid2);
194 if (curr_size != orig_size)
195 {
196 Warn("malloc-debug: current size %ul, starting size %ul\n", curr_size, orig_size);
197 malloc_list(2,histid1, histid2);
198 }
199 #endif
200
201 }
202
203 SeqfileClose(dbfp);
204 FreeCM(cm);
205 free(icm);
206 return 0;
207 }
208
209
210