1 #include "sywise20.h"
2 #include "version.h"
3 #include "genestats.h"
4 #include "geneutil.h"
5 #include "standardout.h"
6
7
8 char * program_name = "sywise";
9
10 char * codon_table = "codon.table";
11
show_version(FILE * ofp)12 void show_version(FILE * ofp)
13 {
14 fprintf(ofp,"%s\nVersion: %s\nReleased: %s\nCompiled: %s\n",program_name,VERSION_NUMBER,RELEASE_DAY,COMPILE_DATE);
15 fprintf(ofp,"\nThis program is freely distributed under a Gnu Public License\n");
16 fprintf(ofp,"The source code is copyright (c) EMBL and others\n");
17 fprintf(ofp,"There is no warranty, implied or otherwise on the performance of this program\n");
18 fprintf(ofp,"For more information read the GNULICENSE file in the distribution\n\n");
19 fprintf(ofp,"Credits: Ewan Birney <birney@ebi.ac.uk>\n");
20 exit(63);
21 }
22
show_help(FILE * ofp)23 void show_help(FILE * ofp)
24 {
25 fprintf(ofp,"%s pairwise-alignment-as-fasta\n",program_name);
26
27 show_help_GeneModelParam(ofp);
28
29 show_help_ShowGenomicRegionOptions(ofp);
30
31 show_help_DPRunImpl(ofp);
32
33 show_help_StandardOutputOptions(ofp);
34
35 show_standard_options(ofp);
36 }
37
make_PairBaseModelScore(void)38 PairBaseModelScore * make_PairBaseModelScore(void)
39 {
40 PairBaseModelScore * out;
41 PairBaseModel * m;
42
43 m = simple_PairBaseModel(0.6,0.4,0.9);
44
45 out= new_PairBaseModelScore(m);
46
47 free_PairBaseModel(m);
48
49 /*
50 out = zero_PairBaseModelScore();
51 */
52
53 show_PairBaseModelScore(out,stdout);
54 return out;
55 }
56
57 CodonTable * ct;
58
59
make_PairBaseCodonModelScore(CompProb * cp)60 PairBaseCodonModelScore * make_PairBaseCodonModelScore(CompProb * cp)
61 {
62 PairBaseCodonModelScore * out;
63 PairBaseCodonModel * m;
64 FILE * ifp;
65
66
67 if( ct == NULL ) {
68 fatal("Could not read codon table");
69 }
70
71 assert(cp);
72
73 /*
74 m = very_simple_PairBaseCodonModel(0.8,1.0/20.0,0.2,0.2,ct);
75 */
76
77 m = make_flat_PairBaseCodonModel(cp,0.1,0.01,ct);
78
79
80
81 diagonal_tweak_PairBaseCodonModel(m,2.0,16.0,1.0/4.0);
82
83
84 out = new_PairBaseCodonModelScore(m);
85
86 /* flatten_diagonal_PairBaseCodonModelScore(out,ct); */
87
88 free_PairBaseCodonModel(m);
89
90 /* show_PairBaseCodonModelScore(out,ct,stdout); */
91
92 return out;
93 }
94
95
show_score_sequence(AlnBlock * alb,PairBaseSeq * pbs,PairBaseModelScore * m,FILE * ofp)96 void show_score_sequence(AlnBlock * alb,PairBaseSeq * pbs,PairBaseModelScore * m,FILE * ofp)
97 {
98 AlnColumn * alc;
99 char seq1[20];
100 char seq2[20];
101 int match_score = 0;
102
103
104 for(alc=alb->start;alc != NULL; alc=alc->next) {
105 if( strcmp(alc->alu[1]->text_label,"CODON") == 0 ) {
106 seq1[0] = char_for_base(anchor_base_from_pairbase(pbs->seq[alc->alu[1]->start+1]));
107 seq1[1] = char_for_base(anchor_base_from_pairbase(pbs->seq[alc->alu[1]->start+2]));
108 seq1[2] = char_for_base(anchor_base_from_pairbase(pbs->seq[alc->alu[1]->start+3]));
109
110 seq2[0] = char_for_base(informant_base_from_pairbase(pbs->seq[alc->alu[1]->start+1]));
111 seq2[1] = char_for_base(informant_base_from_pairbase(pbs->seq[alc->alu[1]->start+2]));
112 seq2[2] = char_for_base(informant_base_from_pairbase(pbs->seq[alc->alu[1]->start+3]));
113
114 seq1[3] = '\0';
115 seq2[3] = '\0';
116
117 match_score = m->base[pbs->seq[alc->alu[1]->start+1]];
118 match_score += m->base[pbs->seq[alc->alu[1]->start+2]];
119 match_score += m->base[pbs->seq[alc->alu[1]->start+3]];
120
121 fprintf(ofp,"%s %5d %5d %s [%c] vs %s [%c] %-.2f %-.2f\n",alc->alu[1]->text_label,alc->alu[0]->start+1,alc->alu[1]->start+1,seq1,aminoacid_from_seq(ct,seq1),seq2,aminoacid_from_seq(ct,seq2),Score2Bits(alc->alu[0]->score[0]),Score2Bits(match_score));
122 }
123 if( strstr(alc->alu[1]->text_label,"SS") != NULL ) {
124
125 seq1[0] = char_for_base(anchor_base_from_pairbase(pbs->seq[alc->alu[1]->start-3]));
126 seq1[1] = char_for_base(anchor_base_from_pairbase(pbs->seq[alc->alu[1]->start-2]));
127 seq1[2] = char_for_base(anchor_base_from_pairbase(pbs->seq[alc->alu[1]->start-1]));
128 seq1[3] = char_for_base(anchor_base_from_pairbase(pbs->seq[alc->alu[1]->start]));
129 seq1[4] = char_for_base(anchor_base_from_pairbase(pbs->seq[alc->alu[1]->start+1]));
130 seq1[5] = char_for_base(anchor_base_from_pairbase(pbs->seq[alc->alu[1]->start+2]));
131 seq1[6] = char_for_base(anchor_base_from_pairbase(pbs->seq[alc->alu[1]->start+3]));
132 seq1[7] = char_for_base(anchor_base_from_pairbase(pbs->seq[alc->alu[1]->start+4]));
133 seq1[8] = char_for_base(anchor_base_from_pairbase(pbs->seq[alc->alu[1]->start+5]));
134 seq1[9] = char_for_base(anchor_base_from_pairbase(pbs->seq[alc->alu[1]->start+6]));
135 seq1[10] = char_for_base(anchor_base_from_pairbase(pbs->seq[alc->alu[1]->start+7]));
136
137 seq2[0] = char_for_base(informant_base_from_pairbase(pbs->seq[alc->alu[1]->start-3]));
138 seq2[1] = char_for_base(informant_base_from_pairbase(pbs->seq[alc->alu[1]->start-2]));
139 seq2[2] = char_for_base(informant_base_from_pairbase(pbs->seq[alc->alu[1]->start-1]));
140 seq2[3] = char_for_base(informant_base_from_pairbase(pbs->seq[alc->alu[1]->start]));
141 seq2[4] = char_for_base(informant_base_from_pairbase(pbs->seq[alc->alu[1]->start+1]));
142 seq2[5] = char_for_base(informant_base_from_pairbase(pbs->seq[alc->alu[1]->start+2]));
143 seq2[6] = char_for_base(informant_base_from_pairbase(pbs->seq[alc->alu[1]->start+3]));
144 seq2[7] = char_for_base(informant_base_from_pairbase(pbs->seq[alc->alu[1]->start+4]));
145 seq2[8] = char_for_base(informant_base_from_pairbase(pbs->seq[alc->alu[1]->start+5]));
146 seq2[9] = char_for_base(informant_base_from_pairbase(pbs->seq[alc->alu[1]->start+6]));
147 seq2[10] = char_for_base(informant_base_from_pairbase(pbs->seq[alc->alu[1]->start+7]));
148
149 seq1[11] = '\0';
150 seq2[11] = '\0';
151
152
153 fprintf(ofp,"%12s %5d %5d %.2f\n",alc->alu[1]->text_label,alc->alu[0]->start+1,alc->alu[1]->start+1,Score2Bits(alc->alu[0]->score[0]));
154 fprintf(ofp," %s\n %s\n",seq1,seq2);
155 }
156 }
157
158 }
159
160
id(int i)161 double id(int i)
162 {
163 return (double)i;
164 }
165
main(int argc,char ** argv)166 int main(int argc,char ** argv)
167 {
168 int i;
169
170 DPRunImpl * dpri = NULL;
171 GeneModelParam * gmp = NULL;
172 GeneModel * gm = NULL;
173
174 FILE * ifp;
175 SeqAlign * al;
176 PairBaseSeq * pbs;
177
178 ComplexSequenceEval * splice5;
179 ComplexSequenceEval * splice3;
180 ComplexSequence * cseq;
181
182
183 CompMat * score_mat;
184 CompProb * comp_prob;
185 RandomModel * rm;
186
187 PairBaseCodonModelScore * codon_score;
188 PairBaseModelScore* nonc_score;
189
190 PairBaseCodonModelScore * start;
191 PairBaseCodonModelScore * stop;
192
193
194 SyExonScore * exonscore;
195
196 PackAln * pal;
197 AlnBlock * alb;
198
199 Genomic * genomic;
200 GenomicRegion * gr;
201 GenomicRegion * gr2;
202 Protein * trans;
203
204 StandardOutputOptions * std_opt;
205 ShowGenomicRegionOptions * sgro;
206
207 char * dump_packaln = NULL;
208 char * read_packaln = NULL;
209 FILE * packifp = NULL;
210
211 boolean show_trans = 1;
212 boolean show_gene_raw = 0;
213
214
215
216 ct = read_CodonTable_file(codon_table);
217 /*
218 score_mat = read_Blast_file_CompMat("blosum62.bla");
219 comp_prob = CompProb_from_halfbit(score_mat);
220 */
221 rm = default_RandomModel();
222
223 comp_prob = read_Blast_file_CompProb("wag85");
224
225 fold_column_RandomModel_CompProb(comp_prob,rm);
226
227 dpri = new_DPRunImpl_from_argv(&argc,argv);
228 if( dpri == NULL ) {
229 fatal("Unable to build DPRun implementation. Bad arguments");
230 }
231
232 gmp = new_GeneModelParam_from_argv(&argc,argv);
233
234 std_opt = new_StandardOutputOptions_from_argv(&argc,argv);
235 sgro = new_ShowGenomicRegionOptions_from_argv(&argc,argv);
236
237
238 dump_packaln = strip_out_assigned_argument(&argc,argv,"dump");
239 read_packaln = strip_out_assigned_argument(&argc,argv,"recover");
240
241 strip_out_standard_options(&argc,argv,show_help,show_version);
242 if( argc != 2 ) {
243 show_help(stdout);
244 exit(12);
245 }
246
247
248 if((gm=GeneModel_from_GeneModelParam(gmp)) == NULL ) {
249 fatal("Could not build gene model");
250 }
251
252 codon_score = make_PairBaseCodonModelScore(comp_prob);
253 nonc_score = make_PairBaseModelScore();
254
255 splice5 = ComplexSequenceEval_from_pwmDNAScore_splice(gm->splice5score);
256 splice3 = ComplexSequenceEval_from_pwmDNAScore_splice(gm->splice3score);
257
258 if((ifp = openfile(argv[1],"r")) == NULL ) {
259 fatal("Could not open file %s",argv[1]);
260 }
261
262 al = read_fasta_SeqAlign(ifp);
263
264 assert(al);
265 assert(al->len == 2);
266 assert(al->seq[0]->len > 0);
267 assert(al->seq[1]->len > 0);
268
269 /* write_fasta_SeqAlign(al,stdout);*/
270
271
272 pbs = new_PairBaseSeq_SeqAlign(al);
273
274 if( read_packaln == NULL ) {
275 cseq = ComplexSequence_from_PairBaseSeq(pbs,splice5,splice3);
276 }
277
278 start = make_start_PairBaseCodonModelScore(ct);
279 stop = make_stop_PairBaseCodonModelScore(ct);
280
281
282 /* show_PairBaseCodonModelScore(stop,ct,stdout); */
283
284 /*
285 for(i=0;i<pbs->anchor->len;i++) {
286 printf("%3d %c For %-6d %-6d %c Rev %-6d %-6d\n",i,pbs->anchor->seq[i],
287 CSEQ_PAIR_5SS(cseq,i),CSEQ_PAIR_3SS(cseq,i),
288 char_complement_base(pbs->anchor->seq[i]),
289 CSEQ_REV_PAIR_5SS(cseq,i),CSEQ_REV_PAIR_3SS(cseq,i));
290 }
291 */
292
293
294 /* show_ComplexSequence(cseq,stdout);
295
296 */
297
298
299 exonscore = SyExonScore_flat_model(100,150,0.1,1.0);
300 /*
301 for(i=0;i<cseq->length;i++) {
302 fprintf(stdout,"%d PairSeq is %d score %d\n",i,CSEQ_PAIR_PAIRBASE(cseq,i),nonc_score->base[CSEQ_PAIR_PAIRBASE(cseq,i)]);
303 }
304 exit(0);
305 */
306
307 if( read_packaln != NULL ) {
308 packifp = openfile(read_packaln,"r");
309 if( packifp == NULL ) {
310 fatal("File %s is unopenable - ignoring dump command",dump_packaln);
311 } else {
312 pal = read_simple_PackAln(packifp);
313 }
314 } else {
315 pal = PackAln_bestmemory_SyWise20(exonscore,cseq,codon_score,nonc_score,start,stop,Probability2Score(1.0/100.0),Probability2Score(1.0/10000.0),Probability2Score(1.0/10.0),NULL,dpri);
316 }
317
318 alb = convert_PackAln_to_AlnBlock_SyWise20(pal);
319
320
321 if( dump_packaln != NULL ) {
322 packifp = openfile(dump_packaln,"w");
323 if( packifp == NULL ) {
324 warn("File %s is unopenable - ignoring dump command",dump_packaln);
325 } else {
326 show_simple_PackAln(pal,packifp);
327 }
328 }
329
330 show_score_sequence(alb,pbs,nonc_score,stdout);
331 /*
332 show_StandardOutputOptions(std_opt,alb,pal,"//",stdout);
333 */
334 genomic = Genomic_from_Sequence(al->seq[0]);
335 gr = new_GenomicRegion(genomic);
336 gr2 = new_GenomicRegion(genomic);
337
338 add_Genes_to_GenomicRegion_new(gr,alb);
339
340
341 show_GenomicRegionOptions(sgro,gr,ct,"//",stdout);
342
343 return 0;
344 }
345
346
347
348
349