1 #include "sywise20.h"
2 #include "version.h"
3 #include "genestats.h"
4 #include "geneutil.h"
5 #include "standardout.h"
6 
7 
8 char * program_name = "sywise";
9 
10 char * codon_table = "codon.table";
11 
show_version(FILE * ofp)12 void show_version(FILE * ofp)
13 {
14   fprintf(ofp,"%s\nVersion: %s\nReleased: %s\nCompiled: %s\n",program_name,VERSION_NUMBER,RELEASE_DAY,COMPILE_DATE);
15   fprintf(ofp,"\nThis program is freely distributed under a Gnu Public License\n");
16   fprintf(ofp,"The source code is copyright (c) EMBL and others\n");
17   fprintf(ofp,"There is no warranty, implied or otherwise on the performance of this program\n");
18   fprintf(ofp,"For more information read the GNULICENSE file in the distribution\n\n");
19   fprintf(ofp,"Credits: Ewan Birney <birney@ebi.ac.uk>\n");
20   exit(63);
21 }
22 
show_help(FILE * ofp)23 void show_help(FILE * ofp)
24 {
25   fprintf(ofp,"%s pairwise-alignment-as-fasta\n",program_name);
26 
27   show_help_GeneModelParam(ofp);
28 
29   show_help_ShowGenomicRegionOptions(ofp);
30 
31   show_help_DPRunImpl(ofp);
32 
33   show_help_StandardOutputOptions(ofp);
34 
35   show_standard_options(ofp);
36 }
37 
make_PairBaseModelScore(void)38 PairBaseModelScore * make_PairBaseModelScore(void)
39 {
40   PairBaseModelScore * out;
41   PairBaseModel * m;
42 
43   m = simple_PairBaseModel(0.6,0.4,0.9);
44 
45   out= new_PairBaseModelScore(m);
46 
47   free_PairBaseModel(m);
48 
49 /*
50   out = zero_PairBaseModelScore();
51 */
52 
53   show_PairBaseModelScore(out,stdout);
54   return out;
55 }
56 
57 CodonTable * ct;
58 
59 
make_PairBaseCodonModelScore(CompProb * cp)60 PairBaseCodonModelScore * make_PairBaseCodonModelScore(CompProb * cp)
61 {
62   PairBaseCodonModelScore * out;
63   PairBaseCodonModel * m;
64   FILE * ifp;
65 
66 
67   if( ct == NULL ) {
68     fatal("Could not read codon table");
69   }
70 
71   assert(cp);
72 
73 /*
74   m = very_simple_PairBaseCodonModel(0.8,1.0/20.0,0.2,0.2,ct);
75 */
76 
77   m = make_flat_PairBaseCodonModel(cp,0.1,0.01,ct);
78 
79 
80 
81   diagonal_tweak_PairBaseCodonModel(m,2.0,16.0,1.0/4.0);
82 
83 
84   out = new_PairBaseCodonModelScore(m);
85 
86 /*  flatten_diagonal_PairBaseCodonModelScore(out,ct); */
87 
88   free_PairBaseCodonModel(m);
89 
90   /*  show_PairBaseCodonModelScore(out,ct,stdout); */
91 
92   return out;
93 }
94 
95 
show_score_sequence(AlnBlock * alb,PairBaseSeq * pbs,PairBaseModelScore * m,FILE * ofp)96 void show_score_sequence(AlnBlock * alb,PairBaseSeq * pbs,PairBaseModelScore * m,FILE * ofp)
97 {
98   AlnColumn * alc;
99   char seq1[20];
100   char seq2[20];
101   int match_score = 0;
102 
103 
104   for(alc=alb->start;alc != NULL; alc=alc->next) {
105     if( strcmp(alc->alu[1]->text_label,"CODON") == 0 ) {
106       seq1[0] = char_for_base(anchor_base_from_pairbase(pbs->seq[alc->alu[1]->start+1]));
107       seq1[1] = char_for_base(anchor_base_from_pairbase(pbs->seq[alc->alu[1]->start+2]));
108       seq1[2] = char_for_base(anchor_base_from_pairbase(pbs->seq[alc->alu[1]->start+3]));
109 
110       seq2[0] = char_for_base(informant_base_from_pairbase(pbs->seq[alc->alu[1]->start+1]));
111       seq2[1] = char_for_base(informant_base_from_pairbase(pbs->seq[alc->alu[1]->start+2]));
112       seq2[2] = char_for_base(informant_base_from_pairbase(pbs->seq[alc->alu[1]->start+3]));
113 
114       seq1[3] = '\0';
115       seq2[3] = '\0';
116 
117       match_score = m->base[pbs->seq[alc->alu[1]->start+1]];
118       match_score += m->base[pbs->seq[alc->alu[1]->start+2]];
119       match_score += m->base[pbs->seq[alc->alu[1]->start+3]];
120 
121       fprintf(ofp,"%s %5d %5d %s [%c] vs %s [%c] %-.2f %-.2f\n",alc->alu[1]->text_label,alc->alu[0]->start+1,alc->alu[1]->start+1,seq1,aminoacid_from_seq(ct,seq1),seq2,aminoacid_from_seq(ct,seq2),Score2Bits(alc->alu[0]->score[0]),Score2Bits(match_score));
122     }
123     if( strstr(alc->alu[1]->text_label,"SS") != NULL ) {
124 
125       seq1[0] = char_for_base(anchor_base_from_pairbase(pbs->seq[alc->alu[1]->start-3]));
126       seq1[1] = char_for_base(anchor_base_from_pairbase(pbs->seq[alc->alu[1]->start-2]));
127       seq1[2] = char_for_base(anchor_base_from_pairbase(pbs->seq[alc->alu[1]->start-1]));
128       seq1[3] = char_for_base(anchor_base_from_pairbase(pbs->seq[alc->alu[1]->start]));
129       seq1[4] = char_for_base(anchor_base_from_pairbase(pbs->seq[alc->alu[1]->start+1]));
130       seq1[5] = char_for_base(anchor_base_from_pairbase(pbs->seq[alc->alu[1]->start+2]));
131       seq1[6] = char_for_base(anchor_base_from_pairbase(pbs->seq[alc->alu[1]->start+3]));
132       seq1[7] = char_for_base(anchor_base_from_pairbase(pbs->seq[alc->alu[1]->start+4]));
133       seq1[8] = char_for_base(anchor_base_from_pairbase(pbs->seq[alc->alu[1]->start+5]));
134       seq1[9] = char_for_base(anchor_base_from_pairbase(pbs->seq[alc->alu[1]->start+6]));
135       seq1[10] = char_for_base(anchor_base_from_pairbase(pbs->seq[alc->alu[1]->start+7]));
136 
137       seq2[0] = char_for_base(informant_base_from_pairbase(pbs->seq[alc->alu[1]->start-3]));
138       seq2[1] = char_for_base(informant_base_from_pairbase(pbs->seq[alc->alu[1]->start-2]));
139       seq2[2] = char_for_base(informant_base_from_pairbase(pbs->seq[alc->alu[1]->start-1]));
140       seq2[3] = char_for_base(informant_base_from_pairbase(pbs->seq[alc->alu[1]->start]));
141       seq2[4] = char_for_base(informant_base_from_pairbase(pbs->seq[alc->alu[1]->start+1]));
142       seq2[5] = char_for_base(informant_base_from_pairbase(pbs->seq[alc->alu[1]->start+2]));
143       seq2[6] = char_for_base(informant_base_from_pairbase(pbs->seq[alc->alu[1]->start+3]));
144       seq2[7] = char_for_base(informant_base_from_pairbase(pbs->seq[alc->alu[1]->start+4]));
145       seq2[8] = char_for_base(informant_base_from_pairbase(pbs->seq[alc->alu[1]->start+5]));
146       seq2[9] = char_for_base(informant_base_from_pairbase(pbs->seq[alc->alu[1]->start+6]));
147       seq2[10] = char_for_base(informant_base_from_pairbase(pbs->seq[alc->alu[1]->start+7]));
148 
149       seq1[11] = '\0';
150       seq2[11] = '\0';
151 
152 
153       fprintf(ofp,"%12s %5d %5d %.2f\n",alc->alu[1]->text_label,alc->alu[0]->start+1,alc->alu[1]->start+1,Score2Bits(alc->alu[0]->score[0]));
154       fprintf(ofp,"     %s\n     %s\n",seq1,seq2);
155     }
156   }
157 
158 }
159 
160 
id(int i)161 double id(int i)
162 {
163   return (double)i;
164 }
165 
main(int argc,char ** argv)166 int main(int argc,char ** argv)
167 {
168   int i;
169 
170   DPRunImpl * dpri = NULL;
171   GeneModelParam * gmp = NULL;
172   GeneModel * gm = NULL;
173 
174   FILE * ifp;
175   SeqAlign   * al;
176   PairBaseSeq * pbs;
177 
178   ComplexSequenceEval * splice5;
179   ComplexSequenceEval * splice3;
180   ComplexSequence * cseq;
181 
182 
183   CompMat * score_mat;
184   CompProb * comp_prob;
185   RandomModel * rm;
186 
187   PairBaseCodonModelScore * codon_score;
188   PairBaseModelScore* nonc_score;
189 
190   PairBaseCodonModelScore * start;
191   PairBaseCodonModelScore * stop;
192 
193 
194   SyExonScore * exonscore;
195 
196   PackAln * pal;
197   AlnBlock * alb;
198 
199   Genomic * genomic;
200   GenomicRegion * gr;
201   GenomicRegion * gr2;
202   Protein * trans;
203 
204   StandardOutputOptions * std_opt;
205   ShowGenomicRegionOptions * sgro;
206 
207   char * dump_packaln = NULL;
208   char * read_packaln = NULL;
209   FILE * packifp = NULL;
210 
211   boolean show_trans    = 1;
212   boolean show_gene_raw = 0;
213 
214 
215 
216   ct = read_CodonTable_file(codon_table);
217 /*
218   score_mat = read_Blast_file_CompMat("blosum62.bla");
219   comp_prob = CompProb_from_halfbit(score_mat);
220 */
221   rm = default_RandomModel();
222 
223   comp_prob = read_Blast_file_CompProb("wag85");
224 
225   fold_column_RandomModel_CompProb(comp_prob,rm);
226 
227   dpri = new_DPRunImpl_from_argv(&argc,argv);
228   if( dpri == NULL ) {
229     fatal("Unable to build DPRun implementation. Bad arguments");
230   }
231 
232   gmp = new_GeneModelParam_from_argv(&argc,argv);
233 
234   std_opt = new_StandardOutputOptions_from_argv(&argc,argv);
235   sgro = new_ShowGenomicRegionOptions_from_argv(&argc,argv);
236 
237 
238   dump_packaln = strip_out_assigned_argument(&argc,argv,"dump");
239   read_packaln = strip_out_assigned_argument(&argc,argv,"recover");
240 
241   strip_out_standard_options(&argc,argv,show_help,show_version);
242   if( argc != 2 ) {
243     show_help(stdout);
244     exit(12);
245   }
246 
247 
248   if((gm=GeneModel_from_GeneModelParam(gmp)) == NULL ) {
249     fatal("Could not build gene model");
250   }
251 
252   codon_score = make_PairBaseCodonModelScore(comp_prob);
253   nonc_score  = make_PairBaseModelScore();
254 
255   splice5 = ComplexSequenceEval_from_pwmDNAScore_splice(gm->splice5score);
256   splice3 = ComplexSequenceEval_from_pwmDNAScore_splice(gm->splice3score);
257 
258   if((ifp = openfile(argv[1],"r")) == NULL ) {
259     fatal("Could not open file %s",argv[1]);
260   }
261 
262   al = read_fasta_SeqAlign(ifp);
263 
264   assert(al);
265   assert(al->len == 2);
266   assert(al->seq[0]->len > 0);
267   assert(al->seq[1]->len > 0);
268 
269 /*  write_fasta_SeqAlign(al,stdout);*/
270 
271 
272   pbs = new_PairBaseSeq_SeqAlign(al);
273 
274   if( read_packaln == NULL ) {
275     cseq = ComplexSequence_from_PairBaseSeq(pbs,splice5,splice3);
276   }
277 
278   start = make_start_PairBaseCodonModelScore(ct);
279   stop  = make_stop_PairBaseCodonModelScore(ct);
280 
281 
282 /*  show_PairBaseCodonModelScore(stop,ct,stdout); */
283 
284 /*
285   for(i=0;i<pbs->anchor->len;i++) {
286     printf("%3d  %c For %-6d %-6d %c Rev %-6d %-6d\n",i,pbs->anchor->seq[i],
287 	   CSEQ_PAIR_5SS(cseq,i),CSEQ_PAIR_3SS(cseq,i),
288 	   char_complement_base(pbs->anchor->seq[i]),
289 	   CSEQ_REV_PAIR_5SS(cseq,i),CSEQ_REV_PAIR_3SS(cseq,i));
290   }
291 */
292 
293 
294   /*  show_ComplexSequence(cseq,stdout);
295 
296   */
297 
298 
299   exonscore = SyExonScore_flat_model(100,150,0.1,1.0);
300   /*
301   for(i=0;i<cseq->length;i++) {
302     fprintf(stdout,"%d PairSeq is %d score %d\n",i,CSEQ_PAIR_PAIRBASE(cseq,i),nonc_score->base[CSEQ_PAIR_PAIRBASE(cseq,i)]);
303   }
304   exit(0);
305   */
306 
307   if( read_packaln != NULL ) {
308     packifp = openfile(read_packaln,"r");
309     if( packifp == NULL ) {
310       fatal("File %s is unopenable - ignoring dump command",dump_packaln);
311     } else {
312       pal = read_simple_PackAln(packifp);
313     }
314   } else {
315     pal = PackAln_bestmemory_SyWise20(exonscore,cseq,codon_score,nonc_score,start,stop,Probability2Score(1.0/100.0),Probability2Score(1.0/10000.0),Probability2Score(1.0/10.0),NULL,dpri);
316   }
317 
318   alb = convert_PackAln_to_AlnBlock_SyWise20(pal);
319 
320 
321   if( dump_packaln != NULL ) {
322     packifp = openfile(dump_packaln,"w");
323     if( packifp == NULL ) {
324       warn("File %s is unopenable - ignoring dump command",dump_packaln);
325     } else {
326       show_simple_PackAln(pal,packifp);
327     }
328   }
329 
330   show_score_sequence(alb,pbs,nonc_score,stdout);
331 /*
332   show_StandardOutputOptions(std_opt,alb,pal,"//",stdout);
333 */
334   genomic = Genomic_from_Sequence(al->seq[0]);
335   gr = new_GenomicRegion(genomic);
336   gr2 = new_GenomicRegion(genomic);
337 
338   add_Genes_to_GenomicRegion_new(gr,alb);
339 
340 
341   show_GenomicRegionOptions(sgro,gr,ct,"//",stdout);
342 
343   return 0;
344 }
345 
346 
347 
348 
349