1 #include "seaview.h"
2 #include "pdf_or_ps.h"
3 #include "svg.h"
4 #include <ctype.h>
5 #include <time.h>
6 #ifndef WIN32
7 #include <unistd.h>
8 #endif
9
10 /* included functions */
11 int read_mase_seqs_header(const char *masefname, char ***pseq, char ***pseqname,
12 char ***pcomments, char **pheader, char **err_message);
13 int one_more_seq_found(int count1, char ***pseq, char ***pseqname, char ***pcomments);
14 int read_fasta_align(const char *fname, char ***pseq, char ***pseqname,
15 char ***pcomments, char **pheader, char **err_message, int spaces_in_names);
16 int read_phylip_align(const char *fname, char ***pseq, char ***pseqname,
17 char ***pcomments, char **pheader, char **err_message);
18 int read_clustal_align(const char *fname, char ***pseq, char ***pseqname,
19 char ***pcomments, char **pheader, char **err_message);
20 int read_msf_align(const char *fname, char ***pseq, char ***pseqname,
21 char ***pcomments, char **pheader, char **err_message);
22 int is_a_protein_seq(char *seq);
23 int save_fasta_file(const char *fname, char **seq, char **comments,
24 char **seqname, int totseqs, int *eachlength, region *region_used,
25 int *sel_seqs, int tot_sel_seqs, int spaces_in_names, int pad_to_max_length);
26 int save_phylip_file(const char *fname, char **seq,
27 char **seqname, int totseqs, int *eachlength, region *region_used,
28 int *sel_seqs, int tot_sel_seqs, int phylipwidnames);
29 int output_next_res_from_region(char *seq, int lenseq,
30 list_segments **segment, int *current, FILE *out, int total,
31 int use_dots);
32 void save_regions(list_regions *regions, FILE *out);
33 int save_mase_file(const char *fname, char **seq, char **comments,
34 char *header, char **seqname, int totseqs, int *eachlength,
35 list_regions *regions, region *region_used, int numb_species_sets,
36 int **list_species_sets, char **name_species_sets,
37 int *sel_seqs, int tot_sel_seqs, int tot_comment_lines,
38 char **comment_name, char **comment_line,
39 int tot_trees, char **trees, const Fl_Menu_Item *menu_tree_items);
40 int save_clustal_file(const char *fname, char **seq,
41 char **seqname, int totseqs, int *eachlength, region *region_used,
42 int *sel_seqs, int tot_sel_seqs);
43 int calc_gcg_check(list_segments *psegment, char *seq);
44 int save_msf_file(const char *fname, char **seq,
45 char **seqname, int totseqs, int *eachlength, region *region_used,
46 int protein, int *sel_seqs, int tot_sel_seqs);
47 char *save_alignment_or_region(const char *fname, char **seq, char **comments,
48 char *header, char **seqname, int totseqs, int *eachlength,
49 list_regions *regions, region *region_used, known_format format,
50 int numb_species_sets, int **list_species_sets,
51 char **name_species_sets, int *sel_seqs, int tot_sel_seqs, int protein,
52 int tot_comment_lines, char **comment_name, char **comment_line, int phylipwidnames,
53 int tot_trees, char **trees, const Fl_Menu_Item *menu_tree_items, int spaces_in_fasta_names);
54 char *get_full_path(const char *fname);
55 static void save_species_sets(int numb_species_sets, int **list_species_sets,
56 char **name_species_sets, int totseqs, FILE *out);
57 void save_comment_lines(int tot_comment_lines, char **names, char **lines,
58 FILE *out);
59 known_format what_format(const char *filename);
60 char* seaview_file_chooser_save_as(const char* message, const char* fname, SEA_VIEW *view, known_format* new_format);
61 const char *extract_dirname(const char *pathname);
62 int printout(SEA_VIEW *view, const char *filename,
63 int fontsize, int block_size, Fl_Paged_Device::Page_Format pageformat, int vary_only, int ref0,
64 int pdfkindvalue, Fl_Paged_Device::Page_Layout layout, int svg_width = 0);
65 static void color_pdf_display(SEA_VIEW *view, int (*calc_color_function)( int ), char *oneline,
66 int widnames, double x, double y, int fontsize, double char_width, double descender,int num, int current);
67 static void color_svg_display(SEA_VIEW *view, int (*calc_color_function)( int ), char *oneline,
68 int widnames, double x, double y, int fontsize, double char_width);
69 static int calc_vary_lines(int *vary_pos, int widpos);
70 static void out_vary_pos(int *vary_pos, int widnames, int widpos, int nl, FILE *textfile, double x, double y);
71 SEA_VIEW* read_alignment_file(const char *infile);
72 SEA_VIEW *cmdline_read_input_alignment(int argc, char **argv);
73 void format_conversion(int argc, char **argv);
74 char *process_output_options(int argc, char **argv, known_format& out_format, bool& std_output);
75 #ifndef NO_PDF
76 void printout_cmdline(int argc, char **argv);
77 #endif
78
79
80 /* external */
81 extern char *f_format_names[];
82 extern char *f_format_exts[];
83 extern int nbr_formats;
84 extern float argval(int argc, char *argv[], const char *arg, float defval);
85 extern int calc_max_seq_length(int seq_length, int tot_seqs);
86 extern int max_protcolors;
87 extern char def_stdcolorgroups[];
88 extern int def_protcolors_rgb[];
89 char *get_res_value(const char *name, const char *def_value);
90 extern int prep_custom_colors(int *colors, char *customcolors, int max_colors);
91 extern color_choice prep_aa_color_code(char *list_std, char *list_alt,
92 int maxprotcolors, int *numb_stdprotcolors, int *numb_altprotcolors);
93 extern void load_resources(const char *progname);
94 extern void allonge_seqs(char **seq, int totseqs, int maxlen, int *eachlength,
95 int tot_comment_lines, char **comment_line, char **pregion_line);
96 extern int int_res_value(const char *name, int def_value);
97 extern const char *progname;
98 #if defined(__APPLE__)
99 extern const char *MG_GetBundleResourcesDir(void);
100 #else
101 extern char *get_prog_dir(void);
102 #endif
103
104
105 extern int save_nexus_file(const char *fname, int ntaxa, int protein,
106 char **seqs, char **taxnames, char **notes, char *header,
107 int num_species_sets, int **list_species_sets,
108 char **name_species_sets,
109 list_regions *charsets,
110 int tot_comment_lines, char **comment_name, char **comment_line,
111 region *region_used, int *sel_seqs, int tot_sel_seqs, int *eachlength,
112 int tot_trees, char **trees, const Fl_Menu_Item *items);
113 extern char *my_fgets(char *s, int n, FILE *f);
114 extern char *argname(int argc, char *argv[], const char *arg);
115 extern int isarg(int argc, char *argv[], const char *arg);
116 extern char *create_tmp_filename(void);
117 extern void delete_tmp_filename(const char *base_fname);
118 extern char *translate_with_gaps(char *seq, int gc);
119 extern int get_ncbi_gc_from_comment(char *comment);
120 extern int create_gblocks_mask(SEA_VIEW *view, region *myregion, int no_gui, int b5_val, int b4_val, int b3_val, int b2_val);
121 extern char *back_translate_with_gaps(char *prot, char *dna);
122 extern void del_gap_only_sites(SEA_VIEW *view);
123 extern void save_bootstrap_replicates(const char *fname, int replicates, SEA_VIEW *view);
124 extern "C" {
125 int get_acnuc_gc_number(int ncbi_gc);
126 int get_ncbi_gc_number(int ncbi_gc);
127 }
128
129
read_mase_seqs_header(const char * masefname,char *** pseq,char *** pseqname,char *** pcomments,char ** pheader,char ** err_message)130 int read_mase_seqs_header(const char *masefname, char ***pseq, char ***pseqname,
131 char ***pcomments, char **pheader, char **err_message)
132 {
133 #define MAXLENSEQ 10000 /* unite d'allocation de memoire */
134 #define lline 2000
135 FILE *masef;
136 char line[lline], *i, *base, *header = NULL, *provseq = NULL, *p;
137 int l, lenseqs, lpre, lseq, l2, totseqs = -1, want_header, curr_max_header;
138 static char ret_message[200];
139 char **seq, **seqname, **comments;
140
141 *ret_message = 0;
142 *err_message = ret_message;
143 if( (masef=fopen(masefname,"r")) == NULL) {
144 sprintf(ret_message,"File not found:%s",masefname);
145 return 0;
146 }
147 want_header = (pheader != NULL);
148
149 if(fgets(line, lline, masef)==NULL)goto fini;
150 if(strchr(line, '\n') == NULL) {
151 strcpy(ret_message,"Not a mase file!");
152 goto fini;
153 }
154 if(strncmp(line,";;",2)==0) {
155 if(want_header) {
156 if( (header=(char *)malloc(MAXLENCOM+1)) ==
157 NULL)goto nomem;
158 curr_max_header = MAXLENCOM;
159 strcpy(header,line);
160 lpre=strlen(line);
161 }
162 do {
163 if( fgets(line,lline,masef)==NULL ) goto fini;
164 if(strncmp(line,";;",2)!=0) break;
165 if(header != NULL) {
166 lseq=strlen(line);
167 if(lpre+lseq > curr_max_header) {
168 curr_max_header += MAXLENCOM;
169 if( (p=(char *)malloc(curr_max_header+1))
170 == NULL ) goto nomem;
171 memcpy(p, header, lpre);
172 free(header);
173 header = p;
174 }
175 memcpy(header+lpre,line, lseq);
176 lpre += lseq;
177 }
178 }
179 while (1);
180 if( want_header ) {
181 header[lpre] = 0;
182 header=(char *)realloc(header,lpre+1);
183 }
184 }
185 if(*line != ';' ) {
186 strcpy(ret_message,"Not a mase file!");
187 goto fini;
188 }
189
190 lenseqs=MAXLENSEQ;
191 if( (provseq=(char *)malloc(lenseqs+1)) ==NULL)goto nomem;
192
193 i=line;
194 while(i!=NULL){
195 totseqs = one_more_seq_found(totseqs, &seq, &seqname, &comments);
196 if(totseqs == -1) goto nomem;
197 if(comments!=NULL) {
198 if( (comments[totseqs]=(char *)malloc(MAXLENCOM+1)) ==
199 NULL)goto nomem;
200 strcpy(comments[totseqs],line);
201 lpre=strlen(line); l=MAXLENCOM;
202 while(*fgets(line,lline,masef)==';') {
203 lseq=strlen(line);
204 if(lpre+lseq <= l) {
205 strcpy(comments[totseqs]+lpre,line);
206 lpre += lseq;
207 }
208 else l=lpre-1;
209 }
210 if(lpre<MAXLENCOM)
211 comments[totseqs]=(char *)realloc(comments[totseqs],lpre+1);
212 }
213 else while(*fgets(line,lline,masef)==';');
214 l = strlen(line);
215 while((line[l-1] == ' ' || line[l-1] == '\n') && l>0 ) l--; line[l] = 0;
216 if( (seqname[totseqs]=(char *)malloc(l+1)) == NULL)goto nomem;
217 strcpy(seqname[totseqs],line);
218 lseq = 0; /* what is already put in provseq */
219 while( (i=fgets(line,lline,masef))!= NULL && *i != ';' ) {
220 l2 = strlen(line);
221 if( line[l2 - 1] == '\n' ) l2--;
222 while(l2>0 && line[l2-1]==' ')l2--;
223 if(lseq + l2 > lenseqs) {
224 char *temp;
225 lenseqs += MAXLENSEQ;
226 temp = (char *)malloc(lenseqs+1);
227 if(temp == NULL) goto nomem;
228 memcpy(temp, provseq, lseq);
229 free(provseq);
230 provseq = temp;
231 }
232 memcpy(provseq+lseq, line, l2);
233 lseq += l2;
234 }
235 provseq[lseq]='\0';
236 seq[totseqs] = (char *)malloc(lseq+1);
237 if(seq[totseqs] == NULL) goto nomem;
238 /* ignore space or non printable characters */
239 base=provseq - 1; p = seq[totseqs] - 1;
240 while ( *(++base) != 0) {
241 if(isprint(*base) && ! isspace(*base) ) {
242 // *(++p) = toupper(*base);
243 *(++p) = *base;
244 }
245 }
246 *(++p) = 0;
247 }
248 seq = (char **)realloc(seq, (totseqs + 1)*sizeof(char *));
249 seqname = (char **)realloc(seqname, (totseqs + 1)*sizeof(char *));
250 comments = (char **)realloc(comments, (totseqs + 1)*sizeof(char *));
251 *pseq = seq; *pseqname = seqname; *pcomments = comments;
252 fini:
253 fclose(masef);
254 if(want_header) *pheader = header;
255 if(provseq != NULL) free(provseq);
256 return totseqs+1;
257 nomem:
258 sprintf(ret_message,"Error: Not enough memory!");
259 totseqs = -1;
260 goto fini;
261 }
262
263
parse_trees_from_header(char * header,SEA_VIEW * view)264 void parse_trees_from_header(char *header, SEA_VIEW *view)
265 {
266 char *new_header, *old_header, *fin_new_header, *p, *q;
267 int l_header, l;
268 if(header == NULL) return;
269 old_header = header;
270 l_header=strlen(header);
271 if( (new_header = (char *)malloc(l_header+1)) == NULL) out_of_memory();
272 fin_new_header = new_header;
273 *new_header = 0;
274 while (*header!= 0) {
275 if(strncmp(header,";;$",3) == 0) {
276 p = header + 3;
277 while(*p == ' ') p++;
278 q = strchr(p, '\n');
279 *q = 0;
280 view->menu_trees->add(p, trees_callback, NULL, 0);
281 int rank = view->menu_trees->vlength();
282 (view->menu_trees->vitem(rank - 1))->labelfont(FL_HELVETICA_ITALIC);
283 *q = '\n';
284 p = q + 1;
285 l = 1;
286 while(TRUE) {
287 q = strchr(p, '\n');
288 if(strncmp(q+1, ";;", 2) != 0) break;
289 if (*(q+3) == '$' && *(q+4) != '\n') break;
290 if (strncmp(q+1, ";;@ of species =", 16) == 0) break;
291 if (strncmp(q+1, ";;# of segments=", 16) == 0) break;
292 p = q + 1;
293 l++;
294 }
295 if(view->tot_trees == 0) view->trees = (char **)malloc(sizeof(char *));
296 else view->trees = (char **)realloc(view->trees, (view->tot_trees + 1) * sizeof(char *));
297 view->trees[view->tot_trees] = (char *)malloc(q - header + 1);
298 p = view->trees[view->tot_trees];
299 for(int i = 0; i < l; i++) {
300 header = strchr(header, '\n') + 1;
301 q = (char *)memccpy(p, header + 2, '\n', l_header);
302 p += (q - p - 1);
303 }
304 *p = 0;
305 view->tot_trees++;
306 }
307 else {
308 p=(char *)memccpy(fin_new_header, header, '\n', l_header);
309 fin_new_header += (p - fin_new_header);
310 }
311 header = strchr(header,'\n') + 1;
312 }
313 *fin_new_header = 0;
314 strcpy(old_header, new_header);
315 free(new_header);
316 }
317
318
one_more_seq_found(int count1,char *** pseq,char *** pseqname,char *** pcomments)319 int one_more_seq_found(int count1, char ***pseq, char ***pseqname, char ***pcomments)
320 {
321 static int max_count;
322 char **seq, **seqname, **comments;
323
324 if(count1 == -1) max_count = 0;
325
326 if(count1 + 1 < max_count) return count1 + 1;
327
328 count1++;
329 if(max_count == 0) {
330 max_count = 100;
331 seq = (char **)malloc(max_count * sizeof(char *));
332 if(seq == NULL) return -1;
333 seqname = (char **)malloc(max_count * sizeof(char *));
334 if(seqname == NULL) return -1;
335 comments = (char **)malloc(max_count * sizeof(char *));
336 if(comments == NULL) return -1;
337 }
338 else {
339 seq = *pseq; seqname = *pseqname; comments = *pcomments;
340 max_count = 3 * max_count;
341 seq = (char **)realloc(seq, max_count * sizeof(char *));
342 if(seq == NULL) return -1;
343 seqname = (char **)realloc(seqname, max_count * sizeof(char *));
344 if(seqname == NULL) return -1;
345 comments = (char **)realloc(comments, max_count * sizeof(char *));
346 if(comments == NULL) return -1;
347 }
348
349 *pseq = seq; *pseqname = seqname; *pcomments = comments;
350 return count1;
351 }
352
353
read_fasta_align(const char * fname,char *** pseq,char *** pseqname,char *** pcomments,char ** pheader,char ** err_message,int spaces_in_names)354 int read_fasta_align(const char *fname, char ***pseq, char ***pseqname,
355 char ***pcomments, char **pheader, char **err_message, int spaces_in_names)
356 {
357 FILE *in;
358 int totseqs, lseq, l2, l, lenseqs;
359 char line[500], *p, *i, c, *q, *r;
360 static char ret_message[200];
361 char **seq, **seqname, **comments, *tmpseq = NULL;
362
363 *ret_message = 0;
364 *err_message = ret_message;
365 if( (in=fopen(fname,"r")) == NULL) {
366 sprintf(ret_message,"File not found:%s", fname);
367 return 0;
368 }
369
370 /* calcul du nombre de sequences dans le fichier */
371 totseqs = 0;
372 while(fgets(line, sizeof(line), in) != NULL) {
373 if(*line == '>') totseqs++;
374 }
375 rewind(in);
376 seq = (char **)malloc(totseqs * sizeof(char *));
377 if(seq == NULL) goto nomem;
378 comments = (char **)malloc(totseqs * sizeof(char *));
379 if(comments == NULL) goto nomem;
380 seqname = (char **)malloc(totseqs * sizeof(char *));
381 if(seqname == NULL) goto nomem;
382 *pseq = seq; *pcomments = comments; *pseqname = seqname;
383
384 lenseqs = MAXLENSEQ;
385 tmpseq = (char *)malloc(lenseqs + 1);
386 if(tmpseq == NULL) goto nomem;
387 totseqs = -1;
388 i = fgets(line, sizeof(line), in);
389 if(line[0] != '>') {
390 strcpy(ret_message,"File not in Fasta format!");
391 totseqs = -1; goto fini;
392 }
393 while( i != NULL ){
394 /* finish reading very long title line */
395 c = line[strlen(line) - 1];
396 while(c != '\n' && c != '\r' && c != EOF) c = getc(in);
397 q = line + strlen(line) - 1;
398 while(q > line + 1 && (*q == '\n' || *q == '\r')) *(q--) = 0;
399 totseqs++;
400 p = line + 1;
401 while (*p == ' ') p++;
402 if(spaces_in_names) {
403 while(*p && *p != '\n') p++;
404 while(*(p-1) == ' ') p--;
405 }
406 else {
407 while(*p && *p != ' ' && *p != '\n') p++;
408 }
409 r = line + 1;
410 while (*r == ' ') r++;
411 l = p - r;
412 if( (seqname[totseqs] = (char *)malloc(l+1)) == NULL)goto nomem;
413 memcpy(seqname[totseqs], r, l); seqname[totseqs][l] = 0;
414 /* use rest of title line, if any, as comment */
415 while(*p == ' ') p++;
416 l = q - p + 1;
417 if( l > 0) {
418 comments[totseqs] = (char *)malloc(l + 3);
419 if(comments[totseqs] != NULL) {
420 strcpy(comments[totseqs], ";");
421 strcpy(comments[totseqs] + 1, p);
422 strcpy(comments[totseqs] + l + 1, "\n");
423 }
424 }
425 else comments[totseqs] = NULL;
426 lseq = 0;
427 while( (i=fgets(line, sizeof(line), in))!= NULL && *i != '>' ) {
428 l2 = strlen(line);
429 if( line[l2 - 1] == '\n' ) l2--;
430 while(l2>0 && line[l2-1]==' ')l2--;
431 if(lseq + l2 > lenseqs) {
432 lenseqs += MAXLENSEQ;
433 tmpseq= (char *)realloc(tmpseq, lenseqs + 1);
434 if(tmpseq == NULL) goto nomem;
435 }
436 /* copy seq data excluding spaces (because of gblocks) */
437 p = tmpseq+lseq;
438 q = line;
439 while (q < line + l2) {
440 if(*q != ' ') *(p++) = *q;
441 q++;
442 }
443 lseq += p - (tmpseq+lseq);
444 }
445 tmpseq[lseq]='\0';
446 seq[totseqs] = (char *)malloc(lseq + 1);
447 if(seq[totseqs] == NULL) goto nomem;
448 memcpy(seq[totseqs], tmpseq, lseq + 1);
449 }
450 fini:
451 fclose(in);
452 if(tmpseq != NULL) free(tmpseq);
453 *pheader = NULL;
454 return totseqs+1;
455 nomem:
456 sprintf(ret_message,"Error: Not enough memory!");
457 totseqs = -1;
458 goto fini;
459 }
460
461
read_phylip_align(const char * fname,char *** pseq,char *** pseqname,char *** pcomments,char ** pheader,char ** err_message)462 int read_phylip_align(const char *fname, char ***pseq, char ***pseqname,
463 char ***pcomments, char **pheader, char **err_message)
464 {
465 FILE *in;
466 char *p, *q;
467 int c;
468 static char line[300];
469 char **seq=0, **comments=0, **seqname=0;
470 int totseqs, lenseqs, i, l;
471 static char ret_message[200];
472 *ret_message = 0;
473 *err_message = ret_message;
474 in=fopen(fname,"r");
475 if(in==NULL) {
476 sprintf(ret_message,"File not found:%s",fname);
477 return 0;
478 }
479 fgets(line,sizeof(line),in);
480 if( sscanf(line, "%d%d", &totseqs, &lenseqs) != 2) {
481 sprintf(ret_message,"Not a PHYLIP file");
482 totseqs = 0;
483 goto fini;
484 }
485 seq = (char **)malloc(totseqs * sizeof(char *));
486 if(seq == NULL) goto nomem;
487 seqname = (char **)malloc(totseqs * sizeof(char *));
488 if(seqname == NULL) goto nomem;
489 comments = (char **)malloc(totseqs * sizeof(char *));
490 if(comments == NULL) goto nomem;
491 for(i=0; i<totseqs; i++) {
492 if( (seq[i] = (char *)malloc(lenseqs+1) ) == NULL ) goto nomem;
493 comments[i] = NULL;
494 }
495 for(i=0; i<totseqs; i++) {
496 fgets(line,sizeof(line),in);
497 p = strstr(line, " "); if(p == NULL) p = line + 10;
498 if( (seqname[i] = (char *)malloc(p - line + 1) ) == NULL ) goto nomem;
499 memcpy(seqname[i], line, p - line); seqname[i][p - line] = 0;
500 q = seq[i];
501 while(*p != 0 && *p != '\n') {
502 if(*p != ' ') {
503 if(q - seq[i] >= lenseqs) goto badfile;
504 *(q++) = *p;
505 }
506 p++;
507 }
508 c = *p;
509 if(c == '\n') continue;
510 while(TRUE) {
511 c = fgetc(in);
512 if(c == EOF) goto badfile;
513 if(c == '\n' || c == '\r') break;
514 if(c != ' ') {
515 if(q - seq[i] >= lenseqs) goto badfile;
516 *(q++) = (char)c;
517 }
518 }
519 if(c == '\r') {c = fgetc(in); if(c != '\n') ungetc(c, in); }
520 }
521 l = q - seq[totseqs - 1];
522 while( l < lenseqs) {
523 do c = fgetc(in); while(c != '\n' && c != '\r' && c != EOF);
524 if(c == EOF) goto badfile;
525 if(c == '\r') {c = fgetc(in); if(c != '\n') ungetc(c, in); }
526 for(i=0; i<totseqs; i++) {
527 q = seq[i] + l;
528 while(TRUE) {
529 c = fgetc(in);
530 if(c == EOF) goto badfile;
531 if(c == '\n' || c == '\r') break;
532 if(c != ' ') {
533 if(q - seq[i] >= lenseqs) goto badfile;
534 *(q++) = (char)c;
535 }
536 }
537 if(c == '\r') {c = fgetc(in); if(c != '\n') ungetc(c, in); }
538 }
539 l = q - seq[totseqs - 1];
540 }
541 for(i=0; i<totseqs; i++) seq[i][l] = 0;
542 fini:
543 *pheader = NULL;
544 fclose(in);
545 *pseq = seq; *pseqname = seqname; *pcomments = comments;
546 return totseqs;
547 nomem:
548 sprintf(ret_message,"Not enough memory!");
549 totseqs = 0;
550 goto fini;
551 badfile:
552 sprintf(ret_message,"Bad file format");
553 totseqs = 0;
554 goto fini;
555 }
556
557
read_clustal_align(const char * fname,char *** pseq,char *** pseqname,char *** pcomments,char ** pheader,char ** err_message)558 int read_clustal_align(const char *fname, char ***pseq, char ***pseqname,
559 char ***pcomments, char **pheader, char **err_message)
560 {
561 FILE *in;
562 char line[200], *p;
563 int i, l, curr_spec, first=TRUE, curr_len, next_len, tot_spec, curr_max_len,
564 carac, wid_name;
565 static char ret_message[200];
566 char **seq, **comments, **seqname = NULL;
567
568 *ret_message = 0;
569 *err_message = ret_message;
570 in=fopen(fname,"r");
571 if(in==NULL) {
572 sprintf(ret_message,"File not found:%s",fname);
573 return 0;
574 }
575 fgets(line,sizeof(line),in);
576 if(strncmp(line,"CLUSTAL",7) != 0) { /* skip 1st line with CLUSTAL in it */
577 strcpy(ret_message,"File not in CLUSTAL format!");
578 tot_spec = -1; goto fini;
579 }
580 /* skip next empty lines */
581 do {
582 carac = getc(in);
583 if(carac == ' ') {
584 fgets(line,sizeof(line),in);
585 carac = getc(in);
586 }
587 }
588 while(carac == '\n' || carac == '\r');
589 ungetc(carac, in); /* back to start of 1st non-empty line */
590 tot_spec = curr_spec = -1; curr_len = next_len = 0;
591 while( fgets(line, sizeof(line), in) != NULL ) {
592 if(*line == '\n' || *line == ' ') {
593 curr_spec = -1;
594 curr_len = next_len;
595 first = FALSE;
596 continue;
597 }
598 else if(tot_spec >= 0 && curr_spec == -1 &&
599 strncmp(line, seqname[0], strlen(seqname[0]) ) != 0) {
600 break;
601 }
602 else {
603 if(first) {
604 curr_spec = one_more_seq_found(curr_spec, &seq, &seqname, &comments);
605 if(curr_spec == -1) goto nomem;
606 }
607 else curr_spec++;
608 }
609 if(first && curr_spec == 0) {
610 /* calcul long partie nom: enlever tout ce qui n'est pas espace en fin */
611 p = line + strlen(line) - 2;
612 while(*p == ' ' || isdigit(*p) ) p--;
613 while (*p != ' ') p--;
614 wid_name = p - line + 1;
615 }
616 if(first) {
617 seqname[curr_spec] = (char *)malloc(wid_name+1);
618 if(seqname[curr_spec]==NULL) {
619 goto nomem;
620 }
621 memcpy(seqname[curr_spec], line, wid_name);
622 p = seqname[curr_spec] + wid_name - 1;
623 while(*p==' ') p--; *(p+1)=0;
624 if(curr_spec > tot_spec) tot_spec = curr_spec;
625 seq[curr_spec] = (char *)malloc(CLU_BLOCK_LEN+1);
626 curr_max_len = CLU_BLOCK_LEN;
627 if(seq[curr_spec]==NULL) {
628 goto nomem;
629 }
630 comments[curr_spec] = NULL;
631 }
632 if(curr_spec == 0) {
633 l = strlen(line) - 1;
634 p = line + l - 1;
635 while(*p == ' ' || isdigit(*p) ) { p--; l--; }
636 l -= wid_name;
637 if(curr_len + l > curr_max_len) {
638 curr_max_len += CLU_BLOCK_LEN;
639 for(i=0; i<=tot_spec; i++) {
640 p = (char *)malloc(curr_max_len+1);
641 if(p == NULL) goto nomem;
642 memcpy(p, seq[i], curr_len);
643 free(seq[i]);
644 seq[i] = p;
645 }
646
647 }
648 next_len = curr_len + l;
649 }
650 memcpy(seq[curr_spec]+curr_len, line + wid_name, l);
651 }
652 for(i=0; i<=tot_spec; i++) seq[i][next_len] = 0;
653 seq = (char **)realloc(seq, (tot_spec + 1)*sizeof(char *));
654 seqname = (char **)realloc(seqname, (tot_spec + 1)*sizeof(char *));
655 comments = (char **)realloc(comments, (tot_spec + 1)*sizeof(char *));
656 *pseq = seq; *pseqname = seqname; *pcomments = comments;
657 fini:
658 *pheader = NULL;
659 fclose(in);
660 return tot_spec + 1;
661 nomem:
662 sprintf(ret_message,"Error: Not enough memory!");
663 tot_spec = -1;
664 goto fini;
665 }
666
667
read_msf_align(const char * fname,char *** pseq,char *** pseqname,char *** pcomments,char ** pheader,char ** err_message)668 int read_msf_align(const char *fname, char ***pseq, char ***pseqname,
669 char ***pcomments, char **pheader, char **err_message)
670 {
671 FILE *in;
672 char line[100], *p, *q;
673 int l, curr_spec, maxwidname=0, curr_len, tot_spec, wid_1_line, wid_block;
674 static char ret_message[200];
675 char **seq, **seqname, **comments;
676
677 *ret_message = 0;
678 *err_message = ret_message;
679 in=fopen(fname,"r");
680 if(in==NULL) {
681 sprintf(ret_message,"File not found:%s",fname);
682 return 0;
683 }
684
685 /* compter le nbre de seqs dans le fichier */
686 tot_spec = 0;
687 while(fgets(line, sizeof(line), in) != NULL) {
688 if(strncmp(line, "//", 2) == 0) break;
689 if(strstr(line, "Name: ") != NULL) tot_spec++;
690 }
691 rewind(in);
692 seq = (char **)malloc(tot_spec * sizeof(char *));
693 if(seq == NULL) goto nomem;
694 comments = (char **)malloc(tot_spec * sizeof(char *));
695 if(comments == NULL) goto nomem;
696 seqname = (char **)malloc(tot_spec * sizeof(char *));
697 if(seqname == NULL) goto nomem;
698 *pseq = seq; *pcomments = comments; *pseqname = seqname;
699
700 p = NULL;
701 while( fgets(line,sizeof(line),in) != NULL) {
702 if( (p = strstr(line, "MSF: ")) != NULL) break;
703 }
704 if(p == NULL || tot_spec == 0) {
705 strcpy(ret_message,"File not in MSF format!");
706 tot_spec = -1; goto fini;
707 }
708 tot_spec = -1;
709 do {
710 fgets(line,sizeof(line),in);
711 if( (p = strstr(line, "Name:") ) == NULL) continue;
712 tot_spec++;
713 q = strstr(p, " Len: ");
714 sscanf(q + 5, "%d", &l);
715 seq[tot_spec] = (char *)malloc(l + 1);
716 if(seq[tot_spec]==NULL) goto nomem;
717 p += 5; while(*p == ' ') p++;
718 while(*q == ' ') q--;
719 l = q - p + 1;
720 seqname[tot_spec] = (char *)malloc(l + 1);
721 if(seqname[tot_spec]==NULL) goto nomem;
722 memcpy(seqname[tot_spec], p, l); seqname[tot_spec][l] = 0;
723 if(l > maxwidname) maxwidname = l;
724 comments[tot_spec] = NULL;
725 }
726 while(strncmp(line, "//", 2) != 0);
727 curr_spec = 0; curr_len = 0; wid_block = 0;
728 while( fgets(line, sizeof(line), in) != NULL ) {
729 p = line; while(*p == ' ') p++;
730 l = strlen(seqname[curr_spec]);
731 if(strncmp(p, seqname[curr_spec], l) != 0) continue;
732 p += l; while(*p == ' ') p++; p--;
733 q = seq[curr_spec] + curr_len;
734 while( *(++p) != '\n') {
735 if( *p == ' ') continue;
736 if(*p == '.') *p = '-';
737 *(q++) = *p;
738 }
739 *q = 0;
740 wid_1_line = q - (seq[curr_spec] + curr_len);
741 wid_block = (wid_1_line > wid_block ? wid_1_line : wid_block);
742 if(curr_spec == tot_spec) {
743 curr_len += wid_block;
744 curr_spec = 0;
745 wid_block = 0;
746 }
747 else curr_spec++;
748 }
749 fini:
750 *pheader = NULL;
751 fclose(in);
752 return tot_spec + 1;
753 nomem:
754 sprintf(ret_message,"Error: Not enough memory!");
755 tot_spec = -1;
756 goto fini;
757 }
758
759
is_a_protein_seq(char * seq)760 int is_a_protein_seq(char *seq)
761 /* returns TRUE if seq looks like a protein sequence (less than 80% ACGTU) */
762 {
763 static char dna[]="ACGTURY";
764 int total=0, length=0;
765 while(*seq != 0) {
766 if(*seq != '-' && *seq != '?' && toupper(*seq) != 'N') {
767 if( strchr(dna, toupper(*seq)) != NULL ) total++;
768 length++;
769 }
770 seq++;
771 }
772 return ( (float)(total) / length ) <= 0.8 ;
773 }
774
775
is_a_protein_alignment(SEA_VIEW * view)776 int is_a_protein_alignment(SEA_VIEW *view)
777 /* returns TRUE if alignment looks like protein data (less than 80% ACGTU) */
778 {
779 char *seq;
780 for (int i = 0; i < view->tot_seqs; i++) {
781 seq = view->sequence[i];
782 while (*seq == '-') seq++;
783 if (*seq != 0) return is_a_protein_seq(view->sequence[i]);
784 }
785 return false;
786 }
787
save_phylip_file(const char * fname,char ** seq,char ** seqname,int totseqs,int * eachlength,region * region_used,int * sel_seqs,int tot_sel_seqs,int phylipwidnames)788 int save_phylip_file(const char *fname, char **seq,
789 char **seqname, int totseqs, int *eachlength, region *region_used,
790 int *sel_seqs, int tot_sel_seqs, int phylipwidnames)
791 /* sauver des sequences ou des regions au format phylip
792 region_used pointe vers la region a sauver
793 si region_used == NULL, toutes les sequences sont sauvees
794 rend 0 si ok,
795 1 si erreur d'ecriture dans le fichier
796 2 si tentative de depasser la longueur d'une sequence
797 */
798 {
799 const int widphylin = 60;
800 FILE *out;
801 int lenseqs, i, j, retval = 1, current, save_current, err, vtotseqs, lu, maxlname;
802 list_segments *psegment, all_sequence, *curr_segment;
803 region maregion;
804 char c;
805
806 if(totseqs == 0) return 0;
807 if( (out = fopen(fname,"w")) == NULL) return 1;
808 if(region_used == NULL) { /* on veut tout sauver */
809 tot_sel_seqs = 0;
810 all_sequence.debut = 1;
811 all_sequence.fin = eachlength[0];
812 for(i = 1; i < totseqs; i++) /* calcul long max des seqs */
813 if( eachlength[i] > all_sequence.fin )
814 all_sequence.fin = eachlength[i];
815 all_sequence.next = NULL;
816 maregion.list = &all_sequence;
817 region_used = &maregion;
818 }
819 /* calcul longueur des regions */
820 lenseqs = 0;
821 psegment = region_used->list;
822 while(psegment != NULL) {
823 lenseqs += psegment->fin - psegment->debut + 1;
824 psegment = psegment->next;
825 }
826 /* longest seq name */
827 vtotseqs = 0;
828 maxlname = 10; // 10 is the minimum name length
829 for(i=0; i < totseqs; i++) {
830 if(tot_sel_seqs == 0 || sel_seqs[i]) {
831 ++vtotseqs;
832 if( (j = strlen(seqname[i])) > maxlname) maxlname = j;
833 }
834 }
835 if(maxlname > phylipwidnames) maxlname = phylipwidnames;
836 fprintf(out,"%d %d\n", vtotseqs, lenseqs);
837 for(i=0; i < totseqs; i++) {
838 if(tot_sel_seqs != 0 && ! sel_seqs[i]) continue;
839 psegment = region_used->list; current = 0;
840 for(j = 0; j < maxlname; j++) {
841 if( (c = seqname[i][j]) == 0) break;
842 putc(c == ' ' ? '_' : c, out);
843 }
844 while(j <= maxlname) { putc( ' ', out ); j++; }
845 lu = 0;
846 while(lu < widphylin && psegment != NULL) {
847 putc( ' ', out );
848 err = output_next_res_from_region(seq[i], eachlength[i], &psegment,
849 ¤t, out, 10, FALSE);
850 lu += err;
851 }
852 putc('\n', out);
853 if(ferror(out)) goto fin;
854 }
855 while( psegment != NULL ) {
856 putc('\n',out);
857 curr_segment = psegment; save_current = current;
858 for(i=0; i < totseqs; i++) {
859 if(tot_sel_seqs != 0 && ! sel_seqs[i]) continue;
860 psegment = curr_segment; current = save_current;
861 for(j = 0; j <= maxlname; j++) putc( ' ', out );
862 lu = 0;
863 while(lu < widphylin && psegment != NULL) {
864 putc( ' ', out );
865 err = output_next_res_from_region(seq[i], eachlength[i],
866 &psegment, ¤t, out, 10, FALSE);
867 lu += err;
868 }
869 putc('\n', out);
870 if(ferror(out)) goto fin;
871 }
872 }
873 retval = 0;
874 fin:
875 fclose(out);
876 return retval;
877 }
878
879
output_next_res_from_region(char * seq,int lenseq,list_segments ** segment,int * current,FILE * out,int total,int use_dots)880 int output_next_res_from_region(char *seq, int lenseq,
881 list_segments **segment, int *current, FILE *out, int total,
882 int use_dots)
883 /* ecrire dans le fichier out les total residus a partir de la position courante
884 dans une liste de regions. lenseq est la longueur de la sequence seq.
885 La position courante est determinee par le segment courant (*segment, qui peut
886 etre modifie par la fonction) et par la position (from 0) dans celui-ci (*current
887 qui est modifie par la fonction pour etre pret pour l'appel suivant).
888 Si le segment demande va au dela de la fin de la seq, des - sont ecrits.
889 Rend le nombre de residus effectivement ecrits, qui est tjrs celui demande.
890 Doit etre appelle la premiere fois avec *current = 0
891 */
892 {
893 int debut, fin, vfin, ecrit = 0, nombre;
894 char *p;
895 static char line[500];
896 if( *segment == NULL)
897 return 0;
898 do {
899 debut = (*segment)->debut; fin = (*segment)->fin;
900 vfin = fin; if(fin > lenseq) vfin = lenseq;
901 nombre = total;
902 if( nombre - 1 + *current + debut > vfin)
903 nombre = vfin + 1 - *current - debut;
904 if(nombre > 0) {
905 memcpy(line, seq + *current + debut - 1, nombre);
906 line[nombre] = 0;
907 if(use_dots) {
908 p = line;
909 while( (p = strchr(p, '-')) != NULL) *p = '.';
910 }
911 fwrite(line, 1, nombre, out);
912 ecrit += nombre; total -= nombre; (*current) += nombre;
913 }
914 if( fin > lenseq && total > 0 ) {
915 nombre = total;
916 if( nombre - 1 + *current + debut > fin)
917 nombre = fin + 1 - *current - debut;
918 ecrit += nombre; (*current) += nombre; total -= nombre;
919 while(nombre-- > 0) putc('-', out);
920 }
921 if( *current + debut > fin) {
922 *segment = (*segment)->next;
923 if(*segment == NULL) break;
924 *current = 0;
925 }
926 }
927 while(total > 0);
928 return ecrit;
929 }
930
931
save_regions(list_regions * regions,FILE * out)932 void save_regions(list_regions *regions, FILE *out)
933 {
934 int total;
935 unsigned l_line;
936 list_segments *segment;
937 char line[80];
938 do {
939 total = 0;
940 segment = regions->element->list;
941 while(segment != NULL) {
942 total++;
943 segment = segment->next;
944 }
945 if(total == 0) continue;
946 fprintf(out,";;# of segments=%d %s\n",total,regions->element->name);
947 strcpy(line, ";;"); l_line=2;
948 segment = regions->element->list;
949 while(segment != NULL) {
950 if(l_line + 12 >= sizeof(line)-1) {
951 fputs(line,out); putc('\n',out);
952 strcpy(line,";;"); l_line=2;
953 }
954 sprintf(line+l_line," %d,%d", segment->debut, segment->fin);
955 l_line += strlen(line+l_line);
956 segment= segment->next;
957 }
958 fputs(line,out); putc('\n',out);
959 }
960 while( regions = regions->next, regions != NULL );
961 }
962
963
save_mase_file(const char * fname,char ** seq,char ** comments,char * header,char ** seqname,int totseqs,int * eachlength,list_regions * regions,region * region_used,int numb_species_sets,int ** list_species_sets,char ** name_species_sets,int * sel_seqs,int tot_sel_seqs,int tot_comment_lines,char ** comment_name,char ** comment_line,int tot_trees,char ** trees,const Fl_Menu_Item * menu_tree_items)964 int save_mase_file(const char *fname, char **seq, char **comments,
965 char *header, char **seqname, int totseqs, int *eachlength,
966 list_regions *regions, region *region_used, int numb_species_sets,
967 int **list_species_sets, char **name_species_sets,
968 int *sel_seqs, int tot_sel_seqs, int tot_comment_lines,
969 char **comment_name, char **comment_line,
970 int tot_trees, char **trees, const Fl_Menu_Item *menu_tree_items)
971 /* sauver un alignement au format mase
972 regions: l'ensemble des regions a ecrire (si on sauve tout l'alignement)
973 NULL si on ne sauve que des regions
974 region_used: pointeur vers la region a sauver
975 et on ne sauve que les seqs selectionnees s'il y en a,
976 ou NULL pour sauver tout l'alignement
977 rend 0 si OK,
978 1 si erreur ecriture du fichier
979 2 si depassement de longueur d'une sequence (avec region seulement)
980 */
981 {
982 FILE *out;
983 int num, retval = 1, current, ecrit, maxlength;
984 time_t heure;
985 list_segments *psegment, all_sequence;
986 region maregion;
987
988 if(totseqs == 0) return 0;
989 out=fopen(fname,"w");
990 if(out == NULL) return 1;
991
992 maxlength = 0;
993
994 time(&heure);
995 fprintf(out,";; saved by seaview on %s",ctime(&heure));
996 if(region_used == NULL) { /* on veut tout sauver */
997 all_sequence.debut = 1;
998 all_sequence.next = NULL;
999 maregion.list = &all_sequence;
1000 maregion.name = NULL;
1001 region_used = &maregion;
1002 tot_sel_seqs = 0;
1003 maxlength = eachlength[0];
1004 for(num=1; num<totseqs; num++) {
1005 if(maxlength < eachlength[num]) maxlength = eachlength[num];
1006 }
1007 }
1008 else
1009 fprintf(out,";; region choice only: %s\n",region_used->name);
1010 if(ferror(out)) goto fin;
1011 if(header != NULL && *header != 0) {
1012 fputs(header,out);
1013 if(ferror(out)) goto fin;
1014 }
1015 if(regions != NULL) {
1016 save_regions(regions, out);
1017 if(ferror(out)) goto fin;
1018 }
1019 if(tot_comment_lines > 0) {
1020 save_comment_lines(tot_comment_lines, comment_name, comment_line, out);
1021 if(ferror(out)) goto fin;
1022 }
1023 if(numb_species_sets != 0) { /* sauver les species sets */
1024 save_species_sets(numb_species_sets, list_species_sets,
1025 name_species_sets, totseqs, out);
1026 if(ferror(out)) goto fin;
1027 }
1028 if(tot_trees > 0) { /* write trees out */
1029 for(num = 0; num < tot_trees; num++) {
1030 fprintf(out, ";;$ %s\n", menu_tree_items[num].label());
1031 char *tree = strdup(trees[num]);
1032 char *p;
1033 while ((p=strchr(tree, '\n')) != NULL) *p = ' ';
1034 int l = strlen(tree);
1035 for (p = tree; p < tree + l; p += 80) {
1036 while (*p == '$') { // avoid begin line with ";;$..." that marks a new tree
1037 fputs(";;$\n", out);
1038 p++;
1039 }
1040 fprintf(out, ";;%.80s\n", p);
1041 }
1042 free(tree);
1043 }
1044 }
1045 for(num=0; num<totseqs; num++) {
1046 if( tot_sel_seqs != 0 && ! sel_seqs[num] ) continue;
1047 current = 0; psegment = region_used->list;
1048 all_sequence.fin = eachlength[num];
1049 if(comments != NULL && comments[num] != NULL)
1050 fputs(comments[num], out);
1051 else fputs(";no comment\n", out);
1052 if(ferror(out)) goto fin;
1053 fprintf(out,"%s\n",seqname[num]);
1054 if(ferror(out)) goto fin;
1055 do {
1056 ecrit = output_next_res_from_region(seq[num], eachlength[num],
1057 &psegment, ¤t, out, 60, FALSE);
1058 if(ferror(out)) goto fin;
1059 if( ecrit > 0) putc('\n', out);
1060 else if(ecrit == -1) {retval = 2; goto fin; }
1061 }
1062 while(ecrit != 0);
1063 if( (ecrit = maxlength - eachlength[num]) > 0) {
1064 int n;
1065 for(n = 1; n <= ecrit; n++) {
1066 putc('-', out); if(n % 60 == 0) putc('\n', out);
1067 }
1068 putc('\n', out);
1069 }
1070 if(ferror(out)) goto fin;
1071 }
1072 retval = 0;
1073 fin:
1074 if( fclose(out) != 0 ) retval = 1;
1075 return retval;
1076 }
1077
1078
save_fasta_file(const char * fname,char ** seq,char ** comments,char ** seqname,int totseqs,int * eachlength,region * region_used,int * sel_seqs,int tot_sel_seqs,int spaces_in_names,int pad_to_max_length)1079 int save_fasta_file(const char *fname, char **seq, char **comments,
1080 char **seqname, int totseqs, int *eachlength, region *region_used,
1081 int *sel_seqs, int tot_sel_seqs, int spaces_in_names, int pad_to_max_length)
1082 /* sauver des sequences ou des regions au format fasta
1083 region_used pointe vers la liste des regions a sauver
1084 si region_used == NULL, toutes les sequences sont sauvees
1085 si pad_to_max_length == TRUE && region_used == NULL, les seqs sont allong�es a leur longueur max
1086 rend 0 si ok, 1 si erreur d'ecriture dans le fichier
1087 2 si tentative de depasser la longueur d'une sequence
1088 */
1089 {
1090 FILE *out;
1091 int num, retval = 1, current, ecrit, save_full = (region_used == NULL);
1092 list_segments *psegment, all_sequence;
1093 region maregion;
1094 char *p;
1095
1096 if(totseqs == 0) return 0;
1097 if( (out = fopen(fname,"w")) == NULL) return 1;
1098 if(region_used == NULL) { /* on veut tout sauver */
1099 tot_sel_seqs = 0;
1100 all_sequence.debut = 1;
1101 all_sequence.fin = eachlength[0];
1102 for(num = 1; num < totseqs; num++)
1103 if( eachlength[num] < all_sequence.fin )
1104 all_sequence.fin = eachlength[num];
1105 all_sequence.next = NULL;
1106 maregion.list = &all_sequence;
1107 region_used = &maregion;
1108 }
1109 for(num=0; num<totseqs; num++) {
1110 if( tot_sel_seqs != 0 && ! sel_seqs[num] ) continue;
1111 current = 0; psegment = region_used->list;
1112 if (!save_full || !pad_to_max_length) all_sequence.fin = eachlength[num];
1113 fputc('>', out);
1114 p = seqname[num];
1115 if(spaces_in_names) {
1116 fputs(p, out);
1117 }
1118 else {
1119 while(*p != 0) { fputc( *p == ' ' ? '_' : *p, out); p++; }
1120 if(comments != NULL && comments[num] != NULL) {
1121 putc(' ', out);
1122 p = comments[num] + 1;
1123 while(*p != '\n' && *p != 0) putc(*(p++), out);
1124 int gc = get_ncbi_gc_from_comment(comments[num]);
1125 char *q = strstr(comments[num], "/transl_table=");
1126 if (gc > 1 && q != NULL && q >= p) {
1127 fprintf(out, " /transl_table=%d", gc);
1128 }
1129 }
1130 }
1131 putc('\n', out);
1132 if(ferror(out)) goto fin;
1133 do {
1134 ecrit = output_next_res_from_region(seq[num], eachlength[num],
1135 &psegment, ¤t, out, 60, FALSE);
1136 if( ecrit > 0) putc('\n', out);
1137 else if(ecrit == -1) {retval = 2; goto fin; }
1138 }
1139 while(ecrit != 0);
1140 if(ferror(out)) goto fin;
1141 }
1142 retval = 0;
1143 fin:
1144 if( fclose(out) != 0 ) return 1;
1145 return retval;
1146 }
1147
1148
1149
save_clustal_file(const char * fname,char ** seq,char ** seqname,int totseqs,int * eachlength,region * region_used,int * sel_seqs,int tot_sel_seqs)1150 int save_clustal_file(const char *fname, char **seq,
1151 char **seqname, int totseqs, int *eachlength, region *region_used,
1152 int *sel_seqs, int tot_sel_seqs)
1153 /* sauver des sequences ou des regions au format clustal
1154 region_used pointe vers la liste des regions a sauver
1155 si region_used == NULL, toutes les sequences sont sauvees
1156 rend 0 si ok, 1 si erreur d'ecriture dans le fichier
1157 2 si tentative de depasser la longueur d'une sequence
1158 */
1159 {
1160 const int widcluslin = 60;
1161 FILE *out;
1162 int i, j, retval = 1, current, save_current, err, l, lmax;
1163 list_segments *psegment, all_sequence, *curr_segment;
1164 region maregion;
1165
1166 if(totseqs == 0) return 0;
1167 if( (out = fopen(fname,"w")) == NULL) return 1;
1168 if(region_used == NULL) { /* on veut tout sauver */
1169 tot_sel_seqs = 0;
1170 all_sequence.debut = 1;
1171 all_sequence.fin = eachlength[0];
1172 for(i = 1; i < totseqs; i++)
1173 if( eachlength[i] > all_sequence.fin )
1174 all_sequence.fin = eachlength[i];
1175 all_sequence.next = NULL;
1176 maregion.list = &all_sequence;
1177 region_used = &maregion;
1178 }
1179 lmax = 0;
1180 for(i=0; i < totseqs; i++) {
1181 if( tot_sel_seqs != 0 && ! sel_seqs[i] ) continue;
1182 l = strlen(seqname[i]);
1183 if(l > lmax) lmax = l;
1184 }
1185 lmax += 2;
1186
1187 fprintf(out,"CLUSTAL W (1.7) multiple sequence alignment\n\n\n");
1188 current = 0; psegment = region_used->list;
1189 while( psegment != NULL ) {
1190 curr_segment = psegment; save_current = current;
1191 for(i=0; i < totseqs; i++) {
1192 if( tot_sel_seqs != 0 && ! sel_seqs[i] ) continue;
1193 psegment = curr_segment; current = save_current;
1194 /* remplacer espaces internes par _ */
1195 for(j = 0; j < lmax; j++) {
1196 if(seqname[i][j] == 0) break;
1197 putc( (seqname[i][j] == ' ' ? '_' : seqname[i][j] ),
1198 out);
1199 }
1200 while( j < lmax) {
1201 putc(' ', out); j++;
1202 }
1203 err= output_next_res_from_region(seq[i], eachlength[i],
1204 &psegment, ¤t, out, widcluslin, FALSE);
1205 putc('\n', out);
1206 if(err == -1) {retval = 2; goto fin; }
1207 if(ferror(out)) goto fin;
1208 }
1209 fprintf(out, "\n\n");
1210 }
1211 retval = 0;
1212 fin:
1213 if( fclose(out) != 0 ) return 1;
1214 return retval;
1215 }
1216
1217
calc_gcg_check(list_segments * psegment,char * seq)1218 int calc_gcg_check(list_segments *psegment, char *seq)
1219 {
1220 int i, debut, fin, residue, pos = 0;
1221 long check = 0;
1222 while(psegment != NULL) {
1223 debut = psegment->debut; fin = psegment->fin;
1224 for( i=debut; i<= fin; i++) {
1225 residue = toupper(seq[i - 1]);
1226 if(residue == '-') residue = '.';
1227 check += (( (pos++) % 57)+1) * residue;
1228 }
1229 psegment = psegment->next;
1230 }
1231 return (check % 10000);
1232 }
1233
1234
save_msf_file(const char * fname,char ** seq,char ** seqname,int totseqs,int * eachlength,region * region_used,int protein,int * sel_seqs,int tot_sel_seqs)1235 int save_msf_file(const char *fname, char **seq,
1236 char **seqname, int totseqs, int *eachlength, region *region_used,
1237 int protein, int *sel_seqs, int tot_sel_seqs)
1238 /* sauver des sequences ou des regions au format MSF
1239 region_used pointe vers la liste des regions a sauver
1240 si region_used == NULL, toutes les sequences sont sauvees
1241 rend 0 si ok, 1 si erreur d'ecriture dans le fichier
1242 2 si tentative de depasser la longueur d'une sequence
1243 */
1244 {
1245 FILE *out;
1246 int i, j, k, retval = 1, current, save_current, err, lenseqs, gen_check,
1247 *check_val, curr_len, toprint, save_complete, fromseq, new_current;
1248 list_segments *psegment, all_sequence, *curr_segment, *new_segment;
1249 region maregion;
1250
1251 if(totseqs == 0) return 0;
1252 if( (out = fopen(fname,"w")) == NULL) return 1;
1253 save_complete = (region_used == NULL);
1254 if(save_complete) { /* on veut tout sauver */
1255 tot_sel_seqs = 0;
1256 all_sequence.debut = 1;
1257 all_sequence.fin = 0;
1258 for(i = 0; i < totseqs; i++) {
1259 if( eachlength[i] > all_sequence.fin )
1260 all_sequence.fin = eachlength[i];
1261 }
1262 lenseqs = all_sequence.fin;
1263 all_sequence.next = NULL;
1264 maregion.list = &all_sequence;
1265 region_used = &maregion;
1266 }
1267 else {
1268 /* calcul longueur des regions */
1269 lenseqs = 0;
1270 psegment = region_used->list;
1271 while(psegment != NULL) {
1272 lenseqs += psegment->fin - psegment->debut + 1;
1273 psegment = psegment->next;
1274 }
1275 }
1276 for(i = 0, k = 0; i < totseqs; i++) /* nbre de seqs editees */
1277 if( tot_sel_seqs == 0 || sel_seqs[i] ) k++;
1278 check_val = (int *)malloc( k * sizeof(int) );
1279 if(check_val == NULL) {
1280 fclose(out);
1281 return 1; /* pas tres precis */
1282 }
1283 gen_check = 0;
1284 for(i = 0, j = 0; i < totseqs; i++) {
1285 if( tot_sel_seqs != 0 && ! sel_seqs[i] ) continue;
1286 check_val[j] = calc_gcg_check(region_used->list, seq[i]);
1287 gen_check += check_val[j++];
1288 }
1289 gen_check = gen_check % 10000;
1290 fprintf(out, "!!%2s_MULTIPLE_ALIGNMENT 1.0\n %s", (protein?"AA":"NA"), extract_filename(fname) );
1291 fprintf(out," MSF: %d Type: %c Check:%6d .. \n\n",
1292 lenseqs, (protein ? 'P' : 'N'), gen_check);
1293 for(i = 0 , j = 0; i < totseqs; i++) {
1294 if( tot_sel_seqs != 0 && ! sel_seqs[i] ) continue;
1295 fprintf(out, " Name: %-15.15s Len:%5d Check:%6d Weight: 1.00\n",
1296 seqname[i], lenseqs, check_val[j++]);
1297 }
1298 fprintf(out,"\n//\n\n\n");
1299 new_current = 0; new_segment = region_used->list; curr_len = 0;
1300 while( new_segment != NULL && curr_len < lenseqs) {
1301 curr_segment = new_segment; save_current = new_current;
1302 fprintf(out, "\n");
1303 for(i=0; i < totseqs; i++) {
1304 if( tot_sel_seqs != 0 && ! sel_seqs[i] ) continue;
1305 psegment = curr_segment; current = save_current;
1306 for(j = 0; j < MSF_WID_NAME; j++) {
1307 if(seqname[i][j] == 0) break;
1308 putc(seqname[i][j],out);
1309 }
1310 while( j < MSF_WID_NAME + 1) {
1311 putc(' ', out); j++;
1312 }
1313 for(k = curr_len; k < curr_len + 50 && k < lenseqs; k += 10) {
1314 toprint = 10;
1315 if(k + toprint > lenseqs) toprint = lenseqs - k;
1316 fromseq = toprint;
1317 if(save_complete && k + fromseq > eachlength[i])
1318 fromseq = eachlength[i] - k;
1319 if(fromseq < 0) fromseq = 0;
1320 if(fromseq > 0) {
1321 err= output_next_res_from_region(
1322 seq[i], eachlength[i], &psegment,
1323 ¤t, out, fromseq, TRUE);
1324 if(ferror(out)) goto fin;
1325 if(err == -1) {retval = 2; goto fin; }
1326 }
1327 while(fromseq < toprint) {
1328 putc('.', out); fromseq++;
1329 }
1330 putc(' ', out);
1331 }
1332 putc('\n', out);
1333 if( (!save_complete) || eachlength[i] == lenseqs) {
1334 new_current = current;
1335 new_segment = psegment;
1336 }
1337 if(ferror(out)) goto fin;
1338 }
1339 curr_len += 50;
1340 fprintf(out, "\n");
1341 }
1342 retval = 0;
1343 fin:
1344 if( fclose(out) != 0 ) retval = 1;
1345 free(check_val);
1346 return retval;
1347 }
1348
1349
save_alignment_or_region(const char * fname,char ** seq,char ** comments,char * header,char ** seqname,int totseqs,int * eachlength,list_regions * regions,region * region_used,known_format format,int numb_species_sets,int ** list_species_sets,char ** name_species_sets,int * sel_seqs,int tot_sel_seqs,int protein,int tot_comment_lines,char ** comment_name,char ** comment_line,int phylipwidnames,int tot_trees,char ** trees,const Fl_Menu_Item * items,int spaces_in_fasta_names)1350 char *save_alignment_or_region(const char *fname, char **seq, char **comments,
1351 char *header, char **seqname, int totseqs, int *eachlength,
1352 list_regions *regions, region *region_used, known_format format,
1353 int numb_species_sets, int **list_species_sets,
1354 char **name_species_sets, int *sel_seqs, int tot_sel_seqs, int protein,
1355 int tot_comment_lines, char **comment_name, char **comment_line, int phylipwidnames,
1356 int tot_trees, char **trees, const Fl_Menu_Item *items, int spaces_in_fasta_names)
1357 /* sauver des sequences ou des regions au format de fichier format
1358 region_used pointe vers la liste des regions a sauver
1359 si region_used == NULL, toutes les sequences entieres sont sauvees
1360 rend NULL si ok
1361 un message d'erreur sinon.
1362 */
1363 {
1364 int err=0;
1365 static char err_message[200];
1366
1367 if(format == MASE_FORMAT)
1368 err = save_mase_file(fname, seq, comments,
1369 header, seqname, totseqs, eachlength,
1370 regions, region_used, numb_species_sets, list_species_sets,
1371 name_species_sets, sel_seqs, tot_sel_seqs,
1372 tot_comment_lines, comment_name, comment_line,
1373 tot_trees, trees, items);
1374 else if(format == NEXUS_FORMAT) {
1375 err = save_nexus_file(fname, totseqs, protein,
1376 seq, seqname, comments, header,
1377 numb_species_sets, list_species_sets, name_species_sets,
1378 regions, tot_comment_lines, comment_name, comment_line,
1379 region_used, sel_seqs, tot_sel_seqs, eachlength, tot_trees, trees, items);
1380 }
1381 else if(format == PHYLIP_FORMAT)
1382 err = save_phylip_file(fname, seq,
1383 seqname, totseqs, eachlength, region_used,
1384 sel_seqs, tot_sel_seqs, phylipwidnames);
1385 else if(format == CLUSTAL_FORMAT)
1386 err = save_clustal_file(fname, seq,
1387 seqname, totseqs, eachlength, region_used,
1388 sel_seqs, tot_sel_seqs);
1389 else if(format == MSF_FORMAT)
1390 err = save_msf_file(fname, seq,
1391 seqname, totseqs, eachlength, region_used, protein,
1392 sel_seqs, tot_sel_seqs);
1393 else if(format == FASTA_FORMAT)
1394 err = save_fasta_file(fname, seq, comments,
1395 seqname, totseqs, eachlength, region_used,
1396 sel_seqs, tot_sel_seqs, spaces_in_fasta_names);
1397 if(err == 0)
1398 return NULL;
1399 else if(err == 1)
1400 sprintf(err_message,"Error while writing to file %s",fname);
1401 else if(err == 2)
1402 strcpy(err_message,
1403 "Error: region goes beyond the end of one sequence");
1404 return err_message;
1405 }
1406
1407
1408 #if !(defined(WIN32) || defined(__APPLE__))
1409 static char seaview_prog_dir[200] = "";
inform_prog_dir(const char * arg0)1410 void inform_prog_dir(const char *arg0)
1411 {
1412 char *p;
1413 if((p = (char*)strrchr(arg0, '/')) != NULL) {
1414 memcpy(seaview_prog_dir, arg0, p - arg0 + 1);
1415 seaview_prog_dir[p - arg0 + 1] = 0;
1416 }
1417 else seaview_prog_dir[0] = 0;
1418 }
1419
get_prog_dir(void)1420 char *get_prog_dir(void)
1421 {
1422 return seaview_prog_dir;
1423 }
1424 #endif
1425
1426
get_full_path(const char * fname)1427 char *get_full_path(const char *fname)
1428 /* to get full pathname to file fname searching for its name, for it in the prog dir
1429 and then for it through all path directories
1430 returns NULL if not found
1431 */
1432 {
1433 #define Mxdir 600
1434 #ifdef WIN32
1435 #define PATH_SEPAR ';'
1436 #define DIR_SEPAR '\\'
1437 #else
1438 #define PATH_SEPAR ':'
1439 #define DIR_SEPAR '/'
1440 #endif
1441 static char dir[Mxdir+1];
1442 char *path, *deb, *fin;
1443 FILE *fich;
1444 int lf, ltot;
1445
1446 strcpy(dir, fname);
1447 if(strchr(fname, DIR_SEPAR) != NULL) {// if fname is a pathname
1448 fich = fopen(dir, "r"); /* try first explicit filename */
1449 goto way_out; // and don't search more
1450 }
1451 #if defined(__APPLE__)
1452 sprintf(dir, "%s/%s", MG_GetBundleResourcesDir(), fname);
1453 fich = fopen(dir, "r");
1454 if(fich != NULL) goto way_out;
1455 #else
1456 /* try dir where program was launched */
1457 deb = get_prog_dir();
1458 if(deb != NULL && *deb != 0) {
1459 strcpy(dir, deb);
1460 strcat(dir, fname);
1461 fich = fopen(dir, "r");
1462 if(fich != NULL) goto way_out;
1463 }
1464 #endif
1465 path = getenv("PATH"); // get the list of path directories, separated by : or ;
1466 if (path == NULL ) return NULL;
1467 lf = strlen(fname);
1468 deb = path;
1469 do {
1470 fin = strchr(deb,PATH_SEPAR);
1471 if(fin != NULL)
1472 { ltot = fin-deb; if(ltot > 0) strncpy(dir,deb,ltot); }
1473 else
1474 { strcpy(dir,deb); ltot=strlen(dir); }
1475 /* now one directory is in string dir */
1476 if( ltot > 0 && ltot + lf + 1 <= Mxdir)
1477 {
1478 dir[ltot] = DIR_SEPAR;
1479 strcpy(dir+ltot+1,fname); /* now dir is appended with filename */
1480 fich = fopen(dir,"r");
1481 if( fich != NULL) break;
1482 }
1483 else fich = NULL;
1484 deb = fin+1;
1485 }
1486 while (fin != NULL);
1487 way_out:
1488 if(fich == NULL) return NULL;
1489 fclose(fich);
1490 #ifndef WIN32
1491 if(*dir != '/') {
1492 if(strncmp(dir, "./", 2) == 0) memmove(dir, dir + 2, strlen(dir) - 1);
1493 char *p, *q;
1494 char *cdir = (char *)malloc(PATH_MAX);
1495 p = getcwd(cdir, PATH_MAX);
1496 q = (char *)malloc(strlen(p) + 1 + strlen(dir) + 1);
1497 sprintf(q, "%s/%s", p, dir);
1498 strcpy(dir, q);
1499 free(q);
1500 free(cdir);
1501 }
1502 #endif
1503 return dir;
1504 #undef Mxdir
1505 }
1506
1507
save_species_sets(int numb_species_sets,int ** list_species_sets,char ** name_species_sets,int totseqs,FILE * out)1508 static void save_species_sets(int numb_species_sets, int **list_species_sets,
1509 char **name_species_sets, int totseqs, FILE *out)
1510 {
1511 int num, i, previous, total;
1512 for(num=0; num < numb_species_sets; num++) {
1513 total = 0;
1514 for(i=0; i< totseqs; i++)
1515 if( list_species_sets[num][i] ) total++;
1516 if( total == 0 ) continue;
1517 fprintf(out,";;@ of species = %d %s\n;;", total,
1518 name_species_sets[num]);
1519 for(previous = 0; previous < totseqs; previous++)
1520 if( list_species_sets[num][previous] ) break;
1521 total = 0;
1522 for(i = previous+1; i < totseqs; i++) {
1523 if( list_species_sets[num][i] ) {
1524 fprintf(out," %d,", previous+1);
1525 previous = i;
1526 total++;
1527 if( total >= 15 ) {
1528 fprintf(out, "\n;;");
1529 total = 0;
1530 }
1531 }
1532 }
1533 fprintf(out," %d\n", previous+1);
1534 }
1535 }
1536
1537
save_comment_lines(int tot_comment_lines,char ** names,char ** lines,FILE * out)1538 void save_comment_lines(int tot_comment_lines, char **names, char **lines,
1539 FILE *out)
1540 {
1541 int num, l, pos;
1542
1543 for(num = 0; num < tot_comment_lines; num++) {
1544 if( (l = strlen(lines[num]) ) == 0) continue;
1545 fprintf(out, ";;|%s\n", names[num]);
1546 for(pos = 0; pos < l; pos += 60)
1547 fprintf(out, ";;%.60s\n", lines[num]+pos);
1548 fprintf(out, ";;||\n");
1549 }
1550 }
1551
1552
what_format(const char * filename)1553 known_format what_format(const char *filename)
1554 /*
1555 returns an alignment format (>= 0)
1556 -1 unknown format
1557 -2 a Newick tree
1558 */
1559 {
1560 FILE *in;
1561 char line[100], *p;
1562 int format = -1;
1563 int nseq, lseq;
1564
1565 in = fl_fopen(filename, "r");
1566 if(in == NULL) return (known_format)-1;
1567 p = fgets(line, sizeof(line), in);
1568 if( p == NULL) { fclose(in); return (known_format)-1; }
1569 while(*p) { *p = toupper(*p); p++; }
1570 if(*line == ';') format = MASE_FORMAT;
1571 else if(*line == '>') format = FASTA_FORMAT;
1572 else if(*line == '(' || *line == '[') format = -2;
1573 else if(strncmp(line, "CLUSTAL", 7) == 0) format = CLUSTAL_FORMAT;
1574 else if(strncmp(line, "#NEXUS", 6) == 0) format = NEXUS_FORMAT;
1575 else {
1576 nseq = lseq = -1;
1577 sscanf(line, "%d%d", &nseq, &lseq);
1578 if(nseq != -1 && lseq != -1) format = PHYLIP_FORMAT;
1579 else {
1580 /* try MSF format */
1581 do {
1582 p = fgets(line, sizeof(line), in);
1583 if(p != NULL && strstr(p, " MSF: ") != NULL) format = MSF_FORMAT;
1584 }
1585 while(p != NULL && strncmp(p, "//", 2) != 0 );
1586 }
1587 }
1588 fclose(in);
1589 return (known_format)format;
1590 }
1591
1592
my_fgets(char * s,int n,FILE * f)1593 char *my_fgets(char *s, int n, FILE *f)
1594 {
1595 int next_char, ahead;
1596 char *p;
1597
1598 p = s;
1599 while(--n > 0) {
1600 next_char = getc(f);
1601 if( next_char == '\r' || next_char == '\n' ) {
1602 *(p++) = '\n';
1603 ahead = getc(f);
1604 if(ahead == EOF) break;
1605 if( (next_char == '\r' && ahead != '\n') || (next_char == '\n' && ahead != '\r') ) {
1606 ungetc(ahead, f);
1607 }
1608 break;
1609 }
1610 else if (next_char == EOF)
1611 break;
1612 *(p++) = next_char;
1613 }
1614 *p = 0;
1615 return (p == s ? NULL : s);
1616 }
1617
1618
seaview_file_chooser_save_as(const char * message,const char * fname,SEA_VIEW * view,known_format * new_format)1619 char *seaview_file_chooser_save_as(const char* message, const char* fname, SEA_VIEW *view, known_format* new_format)
1620 {
1621 #ifndef MICRO
1622 const char *prev_label = Fl_File_Chooser::show_label;
1623 Fl_File_Chooser::show_label = "Format";
1624 #endif
1625
1626 char *p, types_list[500] = "";
1627 Fl_Native_File_Chooser *chooser = new Fl_Native_File_Chooser();
1628 #ifndef MICRO
1629 Fl_File_Chooser::show_label = prev_label;
1630 #endif
1631
1632 chooser->type(Fl_Native_File_Chooser::BROWSE_SAVE_FILE);
1633 chooser->options(Fl_Native_File_Chooser::SAVEAS_CONFIRM | chooser->options());
1634 chooser->options(Fl_Native_File_Chooser::USE_FILTER_EXT | chooser->options());
1635 chooser->title(message);
1636 chooser->directory(extract_dirname(fname));
1637 chooser->preset_file(extract_filename(fname));
1638
1639 p = types_list;
1640 for(int f = 0; f < nbr_formats; f++) {
1641 sprintf(p, "%s\t*.%s\n", f_format_names[f], f_format_exts[f]);
1642 p += strlen(p);
1643 }
1644 chooser->filter(types_list);
1645 chooser->filter_value(view->format_for_save);
1646 char *filename = run_and_close_native_file_chooser(chooser, TRUE);
1647 if (filename && new_format) *new_format = (known_format)chooser->filter_value();
1648 delete chooser;
1649 return filename;
1650 }
1651
1652
extract_dirname(const char * pathname)1653 const char *extract_dirname(const char *pathname)
1654 {
1655 static char dirname[300];
1656 char *p;
1657
1658 #if defined(WIN32)
1659 p = strrchr(pathname,'\\');
1660 #else
1661 p = (char*)strrchr(pathname,'/');
1662 #endif
1663 if(p == NULL) dirname[0] = 0;
1664 else {
1665 memcpy(dirname, pathname, p - pathname);
1666 dirname[p - pathname] = 0;
1667 }
1668 return dirname;
1669 }
1670
printout(SEA_VIEW * view,const char * filename,int fontsize,int block_size,Fl_Paged_Device::Page_Format pageformat,int vary_only,int ref0,int pdfkindvalue,Fl_Paged_Device::Page_Layout layout,int svg_width)1671 int printout(SEA_VIEW *view, const char *filename,
1672 int fontsize, int block_size, Fl_Paged_Device::Page_Format pageformat, int vary_only, int ref0,
1673 int pdfkindvalue, Fl_Paged_Device::Page_Layout layout, int svg_width)
1674 {
1675 int num, i, j, k, current, max_seq_length, fin, curr_lines, widnames,
1676 res_per_line, nl, firstpage, lines_per_page, use_pdf, use_svg, top_margin;
1677 Fl_Surface_Device *surface;
1678 FILE *textfile = NULL;
1679 time_t heure;
1680 static char unnamed[] = "<unnamed>";
1681 static char num_line[200];
1682 int lettre, char_per_line;
1683 short *vary_need = NULL;
1684 int *vary_pos; /* rang ds alignement de la colonne imprim�e */
1685 char *p, oneline[500];
1686 int (*calc_color_function)(int);
1687 double char_width = fontsize/2, descender, margin = 25;
1688 if(view->tot_seqs == 0) return 0;
1689 if(view->protein) calc_color_function = get_color_for_aa;
1690 else calc_color_function = get_color_for_base;
1691 use_pdf = (pdfkindvalue == PDF_COLOR || pdfkindvalue == PDF_BW);
1692 use_svg = (pdfkindvalue == SVG);
1693 max_seq_length = 0; widnames = 0;
1694 for (i=0; i < view->tot_seqs; i++) {
1695 if (view->each_length[i] > max_seq_length) max_seq_length = view->each_length[i];
1696 if ( ( fin=strlen(view->seqname[i]) ) > widnames) widnames = fin;
1697 }
1698 widnames += 2;
1699 if (use_pdf) {
1700 surface = new PDF_or_PS_File_Device();
1701 if (((PDF_or_PS_File_Device*)surface)->begin_document(filename, pageformat, layout)) {
1702 delete surface;
1703 return 0;
1704 }
1705 }
1706 else if(use_svg) {
1707 FILE *out = fl_fopen(filename, "w");
1708 if (out == NULL) exit(1);
1709 #ifndef NO_PDF
1710 SVG_PDF_width_Graphics_Driver *pdf_d = new SVG_PDF_width_Graphics_Driver(NULL);
1711 pdf_d->font(FL_COURIER, fontsize);
1712 char_width = pdf_d->width("X", 1);
1713 delete pdf_d;
1714 #endif
1715 margin = char_width;
1716 char_per_line = (int)( (svg_width - 2*margin) / char_width + 0.5);
1717 fin = (char_per_line - widnames + 1) / (block_size + 1);
1718 if (fin < 1) { /* garde fou */
1719 fin = 1; block_size = char_per_line - widnames;
1720 }
1721 res_per_line = fin * block_size;
1722 int nl = (max_seq_length + res_per_line - 1) /res_per_line;
1723 int totl = nl * (view->tot_seqs + 2) + 2;
1724 surface = new SVG_File_Surface(svg_width, totl * fontsize, out, true);
1725 surface->set_current();
1726 fl_color(210,210,210); // draw grey background
1727 fl_rectf(0,0,svg_width, totl * fontsize);
1728 }
1729 else {
1730 textfile = fopen(filename, "w");
1731 if(textfile == NULL) return TRUE;
1732 }
1733 #ifndef NO_PDF
1734 jmp_buf* jbuf;
1735 if (use_pdf) ((PDF_or_PS_File_Device*)surface)->surface_try(&jbuf);
1736 if ( (!use_pdf) || (setjmp(*jbuf) == 0)) {
1737 #endif
1738 if (use_pdf) {
1739 surface->driver()->font(FL_COURIER, fontsize);
1740 char_width = fl_width("X");
1741 int pwidth, pheight;
1742 ((PDF_or_PS_File_Device*)surface)->printable_rect(&pwidth, &pheight);
1743 int l, r, t, b;
1744 ((PDF_or_PS_File_Device*)surface)->margins(&l, &t, &r, &b);
1745 margin -= l;
1746 char_per_line = (int)((pwidth - 2*margin) / char_width + 0.5);
1747 lines_per_page = (int)((pheight - 2*margin) / fontsize + 0.5);
1748 descender = fl_descent();
1749 top_margin = margin;
1750 }
1751 else if (use_svg) {
1752 surface->driver()->font(FL_COURIER, fontsize);
1753 lines_per_page = 10000000;
1754 top_margin = fontsize;
1755 }
1756 else char_per_line = 90;
1757 firstpage = TRUE;
1758
1759 if(ref0 < 0) vary_only = FALSE;
1760 time(&heure);
1761 sprintf(oneline,"Alignment: %s", view->masename == NULL ? unnamed : PREPARE_LABEL(view->masename) );
1762 if (use_pdf) {
1763 ((PDF_or_PS_File_Device*)surface)->start_page();
1764 ((PDF_or_PS_File_Device*)surface)->origin(0, fontsize);
1765 fl_draw(oneline, margin, top_margin);
1766 }
1767 else if(!use_svg) {fputs(oneline, textfile); fputs("\n", textfile);}
1768 curr_lines = 1;
1769 if(vary_only) {
1770 const char fixed[] = "Displaying variable sites only.";
1771 if (use_pdf || use_svg) fl_draw(fixed, margin, top_margin + curr_lines * fontsize);
1772 else {fputs(fixed, textfile); fputs("\n", textfile);}
1773 ++curr_lines;
1774 }
1775 if (use_pdf) {
1776 sprintf(oneline,"Seaview [blocks=%d fontsize=%d %s%s] on %s",
1777 block_size, fontsize, pageformat == Fl_Paged_Device::A4 ? "A4" : "LETTER",
1778 layout == Fl_Paged_Device::LANDSCAPE ? "-landscape" : "", ctime(&heure));
1779 p = strchr(oneline, '\n'); if (p) *p = 0;
1780 fl_draw(oneline, margin, top_margin + curr_lines * fontsize);
1781 curr_lines += 2;
1782 }
1783 else if(use_svg) {
1784 sprintf(oneline,"created by Seaview on %s", ctime(&heure));
1785 p = strchr(oneline, '\n'); if (p) *p = 0;
1786 fl_color(FL_BLACK);
1787 fl_draw(oneline, margin, top_margin + curr_lines * fontsize);
1788 curr_lines += 1;
1789 }
1790 else {
1791 fputs("Seaview text-only output\n", textfile);
1792 curr_lines += 2;
1793 }
1794 if(vary_only) {
1795 vary_need = (short *)calloc(max_seq_length, sizeof(short));
1796 if(vary_need == NULL) return TRUE;
1797 vary_pos = (int *)calloc(char_per_line, sizeof(int));
1798 if(vary_pos == NULL) return TRUE;
1799 for(i = 0; i < max_seq_length; i++) {
1800 for(num = 0; num < view->tot_seqs; num++) {
1801 if( toupper(view->sequence[num][i]) != toupper(view->sequence[ref0][i]) ) {
1802 vary_need[i] = TRUE;
1803 break;
1804 }
1805 }
1806 }
1807 }
1808 /* nombre max de blocks qui tiennent sur une ligne de cpl chars */
1809 fin = (char_per_line - widnames + 1) / (block_size + 1);
1810 if(fin < 1) { /* garde fou */
1811 fin = 1; block_size = char_per_line - widnames;
1812 }
1813 res_per_line = fin * block_size;
1814 current = 0;
1815 while( current < max_seq_length ) {
1816 nl = 1;
1817 if(vary_only) {
1818 memset(vary_pos, 0, res_per_line * sizeof(int) );
1819 i = -1; j = 0; k = 0;
1820 while( j < res_per_line) {
1821 if(current + i >= max_seq_length) break;
1822 if( !vary_need[current + ++i] ) continue;
1823 j++;
1824 vary_pos[k++] = current + i + 1;
1825 if( j % block_size == 0) k++;
1826 }
1827 nl = calc_vary_lines(vary_pos, k);
1828 }
1829 if( use_pdf && (!firstpage) && (curr_lines + view->tot_seqs + nl > lines_per_page)) {
1830 ((PDF_or_PS_File_Device*)surface)->end_page();
1831 ((PDF_or_PS_File_Device*)surface)->start_page();
1832 ((PDF_or_PS_File_Device*)surface)->origin(0, fontsize);
1833 surface->driver()->font(FL_COURIER, fontsize);
1834 curr_lines = 0;
1835 }
1836 if(vary_only) {
1837 out_vary_pos(vary_pos, widnames, k, nl, textfile, margin, top_margin + curr_lines * fontsize);
1838 curr_lines += nl;
1839 }
1840 else {
1841 sprintf(num_line, "%d", current + 1);
1842 fin = strlen(num_line);
1843 memmove(num_line + widnames - fin + 1, num_line, fin+1);
1844 if(fin <= widnames) memset(num_line, ' ', widnames - fin + 1);
1845 if( use_pdf || use_svg) fl_draw(num_line, margin, top_margin + curr_lines * fontsize);
1846 else {fputs(num_line, textfile);fputs("\n",textfile);}
1847 ++curr_lines;
1848 }
1849 for(num=0; num < view->tot_seqs; num++) {
1850 k = 0;
1851 for(j = 0; j < widnames; j++) {
1852 if(view->seqname[num][j] == 0) break;
1853 oneline[k++] = view->seqname[num][j];
1854 }
1855 while( j < widnames) {
1856 j++;
1857 oneline[k++] = ' ';
1858 }
1859 if(vary_only) {
1860 i = -1; j = 0;
1861 while( j < res_per_line) {
1862 if(current + i >= max_seq_length) break;
1863 if( !vary_need[current + ++i] ) continue;
1864 j++;
1865 if(current + i < view->each_length[num]) {
1866 if(num != ref0) lettre = ( toupper(view->sequence[num][current+i]) ==
1867 toupper(view->sequence[ref0][current+i]) ? '.' : view->sequence[num][current+i] );
1868 else lettre = view->sequence[ref0][current+i];
1869 oneline[k++] = lettre;
1870 }
1871 if( j % block_size == 0) oneline[k++] = ' ';
1872 }
1873 if(num == view->tot_seqs - 1) current = current + i + 1;
1874 }
1875
1876 else {
1877 fin = res_per_line;
1878 if(current+fin > view->each_length[num])
1879 fin = view->each_length[num] - current;
1880 if(ref0 != -1 && num != ref0) {
1881 /* ecriture par reference a seq ref0 */
1882 for(i=0; i<fin; i++) {
1883 lettre = ( toupper(view->sequence[num][current+i]) ==
1884 toupper(view->sequence[ref0][current+i]) ? '.' : view->sequence[num][current+i] );
1885 oneline[k++] = lettre;
1886 if( i < fin-1 && (i+1)%block_size == 0)
1887 oneline[k++] = ' ';
1888 }
1889 }
1890 else { /* ecriture normale de seq */
1891 for(i=0; i<fin; i++) {
1892 oneline[k++] = view->sequence[num][current+i];
1893 if( i < fin-1 && (i+1)%block_size == 0)
1894 oneline[k++] = ' ';
1895 }
1896 }
1897 }
1898 oneline[k] = 0;
1899 if(!view->allow_lower) majuscules(oneline + widnames);
1900 if(use_pdf && (curr_lines >= lines_per_page)) {
1901 ((PDF_or_PS_File_Device*)surface)->end_page();
1902 ((PDF_or_PS_File_Device*)surface)->start_page();
1903 ((PDF_or_PS_File_Device*)surface)->origin(0, fontsize);
1904 surface->driver()->font(FL_COURIER, fontsize);
1905 curr_lines = 0;
1906 }
1907 if(!use_pdf && !use_svg) {
1908 fputs(oneline, textfile); fputs("\n", textfile);
1909 }
1910 else if(pdfkindvalue == PDF_BW) {
1911 fl_draw(oneline, margin, top_margin + curr_lines * fontsize);
1912 }
1913 else {
1914 if (use_pdf) color_pdf_display(view, calc_color_function, oneline, widnames, margin,
1915 top_margin + curr_lines * fontsize,
1916 fontsize, char_width, descender, num, current);
1917 else color_svg_display(view, calc_color_function, oneline, widnames, margin,
1918 top_margin + curr_lines * fontsize, fontsize, char_width);
1919 }
1920 ++curr_lines;
1921 firstpage = FALSE;
1922 }
1923 if(!use_pdf && !use_svg) {
1924 fputs("\n", textfile);
1925 }
1926 else if(curr_lines + 1 <= lines_per_page) {
1927 ++curr_lines;
1928 }
1929 if( ! vary_only ) current += res_per_line;
1930 }
1931 if(use_pdf) {
1932 ((PDF_or_PS_File_Device*)surface)->end_page();
1933 ((PDF_or_PS_File_Device*)surface)->end_job();
1934 }
1935 else if(use_svg) {
1936 fl_color(FL_GRAY);
1937 fl_line_style(0, 3);
1938 fl_rect(1,1, ((SVG_File_Surface*)surface)->width()-2, ((SVG_File_Surface*)surface)->height()-2);
1939 ((SVG_File_Surface*)surface)->end();
1940 }
1941 else fclose(textfile);
1942 #ifndef NO_PDF
1943 } /* end of PDF_TRY */
1944 if (use_pdf && ((PDF_or_PS_File_Device*)surface)->surface_catch()) {
1945 ((PDF_or_PS_File_Device*)surface)->error_catch();
1946 }
1947 #endif
1948 if (use_pdf || use_svg) delete surface;
1949 return FALSE;
1950 }
1951
1952
color_pdf_display(SEA_VIEW * view,int (* calc_color_function)(int),char * oneline,int widnames,double x,double y,int fontsize,double char_width,double descender,int num,int current)1953 static void color_pdf_display(SEA_VIEW *view, int (*calc_color_function)( int ), char *oneline,
1954 int widnames, double x, double y, int fontsize, double char_width, double descender,
1955 int num, int current)
1956 {
1957 double xx;
1958 int c, l, count = 0;
1959 char *p, **clines;
1960
1961 clines = (char **)malloc(sizeof(char *) * view->numb_gc); if(clines==NULL) return;
1962 l = strlen(oneline);
1963 for(c = 1; c < view->numb_gc; c++) {
1964 clines[c] = (char *)malloc(l + 1); if(clines[c] == NULL) return;
1965 memset(clines[c], ' ', l); clines[c][l] = 0;
1966 }
1967 for(p = oneline + widnames; *p != 0; p++) {
1968 if(*p == ' ') continue;
1969 if(view->curr_colors != view->codoncolors) c = calc_color_function(*p);
1970 else c = view->col_rank[num][current + count++];
1971 if(c > 0) clines[c][p - oneline] = 'X';
1972 }
1973 for(c = 1; c < view->numb_gc; c++) {
1974 if(strchr(clines[c], 'X') == NULL) continue;
1975 fl_color(view->curr_colors[c]);
1976 for(xx = x + widnames*char_width, p = clines[c] + widnames; *p != 0; p++, xx += char_width) {
1977 if(*p == ' ') continue;
1978 fl_rectf(xx, y - fontsize + descender, char_width+1, fontsize);
1979 }
1980 }
1981 fl_color(FL_BLACK);
1982 fl_draw(oneline, x, y);
1983 for(c = 1; c < view->numb_gc; c++) free(clines[c]);
1984 free(clines);
1985 }
1986
color_svg_display(SEA_VIEW * view,int (* calc_color_function)(int),char * oneline,int widnames,double x,double y,int fontsize,double char_width)1987 static void color_svg_display(SEA_VIEW *view, int (*calc_color_function)( int ), char *oneline,
1988 int widnames, double x, double y, int fontsize, double char_width)
1989 {
1990 double xx;
1991 int c, l;
1992 char *p;
1993
1994 l = strlen(oneline);
1995 char *aux = (char*)malloc(l+1);
1996 fl_font(FL_COURIER_BOLD, fontsize);
1997 xx = x + widnames*char_width;
1998 for (c = 0; c < view->numb_gc; c++) {
1999 memset(aux, ' ', l-widnames);
2000 for (p = oneline+widnames; p < oneline+l; p++) {
2001 if (calc_color_function(*p) == c) aux[p-(oneline+widnames)] = *p;
2002 }
2003 fl_color(view->curr_colors[c]);
2004 fl_draw(aux, l - widnames, xx, y);
2005 }
2006 fl_font(FL_COURIER, fontsize);
2007 fl_color(FL_BLACK);
2008 fl_draw(oneline, widnames, x, y);
2009 }
2010
calc_vary_lines(int * vary_pos,int widpos)2011 static int calc_vary_lines(int *vary_pos, int widpos)
2012 {
2013 int maxi = 0, num, nl;
2014
2015 for(num = 0; num < widpos; num++)
2016 if(vary_pos[num] > maxi) maxi = vary_pos[num];
2017 if(maxi >= 100000)
2018 nl = 6;
2019 else if(maxi >= 10000)
2020 nl = 5;
2021 else if(maxi >= 1000)
2022 nl = 4;
2023 else if(maxi >= 100)
2024 nl = 3;
2025 else if(maxi >= 10)
2026 nl = 2;
2027 else
2028 nl = 1;
2029 return nl;
2030 }
2031
2032
out_vary_pos(int * vary_pos,int widnames,int widpos,int nl,FILE * textfile,double x,double y)2033 static void out_vary_pos(int *vary_pos, int widnames, int widpos, int nl, FILE *textfile, double x, double y)
2034 {
2035 int num, l, k, echelle, digit, val;
2036 static char chiffre[] = "0123456789";
2037 char oneline[300];
2038
2039 echelle = 1; k = 0;
2040 for(l = 2; l <= nl; l++) echelle *= 10;
2041 for(l = nl; l > 0; l--) {
2042 for(num = 0; num < widnames; num++) oneline[k++] = ' ';
2043 for(num = 0; num < widpos; num++) {
2044 val = vary_pos[num];
2045 if(val < echelle)
2046 oneline[k++] = ' ';
2047 else {
2048 digit = (val / echelle) % 10 ;
2049 oneline[k++] = *(chiffre + digit);
2050 }
2051 }
2052 oneline[k] = 0;
2053 if(textfile == NULL) {
2054 fl_draw(oneline, x, y);
2055 y += fl_height();
2056 }
2057 else {fputs(oneline, textfile); fputs("\n",textfile); }
2058 k = 0;
2059 echelle /= 10;
2060 }
2061 }
2062
2063
read_alignment_file(const char * infile)2064 SEA_VIEW* read_alignment_file(const char *infile )
2065 {
2066 char *err_message;
2067 int i;
2068 SEA_VIEW* view = new SEA_VIEW;
2069 memset(view, 0, sizeof(SEA_VIEW));
2070 view->menubar = new Fl_Menu_Bar(0,0, 30, 30);
2071 view->menu_trees = new vlength_menu(view->menubar, "Trees", NULL, 0);
2072
2073 #ifndef R_OK
2074 #define R_OK 04
2075 #endif
2076 if (fl_access(infile, R_OK)) {
2077 fprintf(stderr, "File %s\nis not readable or does not exist", infile);
2078 return NULL;
2079 }
2080 known_format file_format = what_format(infile);
2081 if (file_format < 0) {
2082 fprintf(stderr, "File %s\nis not of a format readable by seaview", infile);
2083 return NULL;
2084 }
2085 if (file_format == MASE_FORMAT) {
2086 view->tot_seqs = read_mase_seqs_header(infile, &view->sequence,
2087 &view->seqname, &view->comments, &view->header,
2088 &err_message);
2089 /* interpreter les regions du header du fichier mase */
2090 view->regions = parse_regions_from_header(view->header);
2091 /* interpreter les species sets du fichier mase */
2092 view->numb_species_sets = parse_species_sets_from_header(view->header,
2093 view->list_species_sets, view->name_species_sets, view->tot_seqs);
2094 /* interpreter les trees du fichier mase */
2095 parse_trees_from_header(view->header, view);
2096 /* interpreter les comment lines du header */
2097 view->tot_comment_lines = parse_comment_lines_from_header(view->header,
2098 &(view->comment_line), &(view->comment_name),
2099 &(view->comment_length) , &(view->max_seq_length));
2100 }
2101 else if(file_format == FASTA_FORMAT)
2102 view->tot_seqs = read_fasta_align(infile, &view->sequence,
2103 &view->seqname, &view->comments, &view->header, &err_message, view->spaces_in_fasta_names);
2104 else if(file_format == PHYLIP_FORMAT)
2105 view->tot_seqs = read_phylip_align(infile, &view->sequence,
2106 &view->seqname, &view->comments, &view->header, &err_message);
2107 else if(file_format == CLUSTAL_FORMAT)
2108 view->tot_seqs = read_clustal_align(infile, &view->sequence,
2109 &view->seqname, &view->comments, &view->header, &err_message);
2110 else if(file_format == MSF_FORMAT)
2111 view->tot_seqs = read_msf_align(infile, &view->sequence,
2112 &view->seqname, &view->comments, &view->header, &err_message);
2113 else if(file_format == NEXUS_FORMAT) {
2114 int **list_sp = NULL; char **name_sp = NULL; int i;
2115 view->tot_seqs = read_nexus_align((char*)infile, &view->sequence,
2116 &view->seqname, &view->comments, &view->header,
2117 &err_message, &view->regions, &view->numb_species_sets,
2118 &list_sp, &name_sp, &view->tot_comment_lines,
2119 &view->comment_name, &view->comment_line,
2120 &view->comment_length, &view->protein, view);
2121 for(i= 0; i < view->numb_species_sets; i++) {
2122 view->list_species_sets[i] = list_sp[i];
2123 view->name_species_sets[i] = name_sp[i];
2124 }
2125 if(list_sp != NULL) free(list_sp);
2126 if(name_sp != NULL) free(name_sp);
2127 }
2128 if (view->tot_seqs == 0) return NULL;
2129 if (file_format != NEXUS_FORMAT) view->protein = is_a_protein_alignment(view);
2130 view->each_length = new int[view->tot_seqs];
2131 for (i = 0; i < view->tot_seqs; i++) {
2132 view->each_length[i] = strlen(view->sequence[i]);
2133 }
2134 view->seq_length = 0;
2135 for (i = 0; i < view->tot_seqs; i++) {
2136 if (view->each_length[i] > view->seq_length) view->seq_length = view->each_length[i];
2137 }
2138 view->masename = strdup(infile);
2139 view->format_for_save = file_format;
2140 return view;
2141 }
2142
cmdline_read_input_alignment(int argc,char ** argv)2143 SEA_VIEW *cmdline_read_input_alignment(int argc, char **argv)
2144 {
2145 char *fname, line[500];
2146 fname = argv[argc-1];
2147 if (strcmp(fname, "-") == 0) {
2148 fname = create_tmp_filename();
2149 FILE *out = fopen(fname, "w");
2150 while (fgets(line, sizeof(line), stdin) != NULL) {
2151 fputs(line, out);
2152 }
2153 fclose(out);
2154 }
2155 SEA_VIEW *view = read_alignment_file(fname);
2156 if (strcmp(argv[argc-1], "-") == 0) delete_tmp_filename(fname);
2157 if (!view) {
2158 fputs("\n", stderr);
2159 exit(1);
2160 }
2161 view->max_seq_length = calc_max_seq_length(view->seq_length, view->tot_seqs);
2162 allonge_seqs(view->sequence, view->tot_seqs, view->max_seq_length, view->each_length,
2163 view->tot_comment_lines, view->comment_line, NULL);
2164 return view;
2165 }
2166
format_conversion(int argc,char ** argv)2167 void format_conversion(int argc, char **argv)
2168 {
2169 char *err_message, *p;
2170 int i, j, gc;
2171 bool std_output = false;
2172 bool save_fragment = false;
2173 bool bootstrap = false;
2174 region *myregion;
2175
2176 SEA_VIEW *view = cmdline_read_input_alignment(argc, argv);
2177 if (view == NULL) {
2178 fprintf(stderr, "No sequence found in %s", argv[argc-1]);
2179 exit(1);
2180 }
2181 known_format out_format = view->format_for_save;
2182 load_resources(progname);
2183 view->phylipwidnames = int_res_value("phylipwidnames", 30);
2184 char *outfile = process_output_options(argc, argv, out_format, std_output);
2185
2186 if ( !view->protein && isarg(argc, argv, "-translate")) {
2187 bool no_terminal_stop = isarg(argc, argv, "-no_terminal_stop");
2188 for (i = 0; i < view->tot_seqs; i++) {
2189 gc = (view->comments != NULL ? get_ncbi_gc_from_comment(view->comments[i]) : 1);
2190 p = translate_with_gaps(view->sequence[i], get_acnuc_gc_number(gc));
2191 free(view->sequence[i]);
2192 view->sequence[i] = p;
2193 if (no_terminal_stop) {
2194 char *q = p + strlen(p) - 1;
2195 while (q > p && *q == '-') q--;
2196 if (q >= p && *q == '*') *q = '-';
2197 }
2198 view->each_length[i] = strlen(p);
2199 view->regions = NULL;
2200 }
2201 view->protein = true;
2202 }
2203
2204 while ( (p = argname(argc, argv, "-def_species_group")) != NULL) {
2205 p = strtok(p, ",");
2206 view->name_species_sets[view->numb_species_sets] = strdup(p);
2207 view->list_species_sets[view->numb_species_sets] = (int*)calloc(view->tot_seqs, sizeof(int));
2208 while (true) {
2209 p = strtok(NULL, ",");
2210 if (!p) break;
2211 if (strchr(p, '-') == NULL) {
2212 sscanf(p, "%d", &i);
2213 view->list_species_sets[view->numb_species_sets][i-1] = 1;
2214 }
2215 else {
2216 sscanf(p, "%d-%d", &i, &j);
2217 if (i <= j) {
2218 while (i <= j) view->list_species_sets[view->numb_species_sets][i++ - 1] = 1;
2219 }
2220 }
2221 }
2222 view->numb_species_sets++;
2223 for (i = 0; i < argc; i++) {
2224 if (strcmp(argv[i], "-def_species_group") == 0) {
2225 *argv[i] = '+';
2226 break;
2227 }
2228 }
2229 }
2230
2231 while ( (p = argname(argc, argv, "-def_site_selection")) != NULL) {
2232 int from, to;
2233 list_segments *current_seg;
2234 region *site_selection = (region*)calloc(1, sizeof(region));
2235 list_regions *last_reg = (list_regions*)calloc(1, sizeof(list_regions));
2236 last_reg->element = site_selection;
2237 if (!view->regions) {
2238 view->regions = last_reg;
2239 }
2240 else {
2241 list_regions *current_reg;
2242 current_reg = view->regions;
2243 while (current_reg->next) current_reg = current_reg->next;
2244 current_reg->next = last_reg;
2245 }
2246 p = strtok(p, ",");
2247 site_selection->name = strdup(p);
2248 while (true) {
2249 p = strtok(NULL, ",");
2250 if (!p) break;
2251 if (strchr(p, '-') == NULL) {
2252 sscanf(p, "%d", &from);
2253 to = from; }
2254 else {
2255 sscanf(p, "%d-%d", &from, &to);
2256 if (from > to) continue;
2257 }
2258 if (to > view->seq_length) to = view->seq_length;
2259 if (from > view->seq_length) continue;
2260 list_segments *seg = (list_segments*)malloc(sizeof(list_segments));
2261 seg->debut = from;
2262 seg->fin = to;
2263 seg->next = NULL;
2264 if (!site_selection->list) {
2265 site_selection->list = seg;
2266 }
2267 else {
2268 current_seg->next = seg;
2269 }
2270 current_seg = seg;
2271 }
2272 for (i = 0; i < argc; i++) {
2273 if (strcmp(argv[i], "-def_site_selection") == 0) {
2274 *argv[i] = '+';
2275 break;
2276 }
2277 }
2278 }
2279
2280 if (isarg(argc, argv, "-gblocks")) { // gblocks method
2281 myregion = (region*)malloc(sizeof(region));
2282 myregion->name = strdup("Gblocks");
2283 view->region_line = (char*)malloc(view->seq_length + 1);
2284 create_gblocks_mask(view, myregion, true, isarg(argc, argv, "-b5"), isarg(argc, argv, "-b4"),
2285 isarg(argc, argv, "-b3"), isarg(argc, argv, "-b2"));
2286 list_regions *r = view->regions, *pre = NULL;
2287 while (r) {
2288 if (strcmp(r->element->name, myregion->name) == 0) break;
2289 pre = r;
2290 r = r->next;
2291 }
2292 if (r) {
2293 free(r->element->name);
2294 r->element = myregion;
2295 }
2296 else {
2297 list_regions *elt = (list_regions*)malloc(sizeof(list_regions));
2298 elt->element = myregion;
2299 elt->next = NULL;
2300 if (pre) pre->next = elt;
2301 else view->regions = elt;
2302 }
2303 }
2304
2305 if ( (p = argname(argc, argv, "-sites")) ) {
2306 save_fragment = true;
2307 list_regions *elt = view->regions;
2308 while (elt) {
2309 if (strcmp(elt->element->name, p) == 0) break;
2310 elt = elt->next;
2311 }
2312 if (elt) view->active_region = elt->element;
2313 }
2314
2315 if ( (p = argname(argc, argv, "-species")) ) {
2316 save_fragment = true;
2317 for (i = 0; i < view->numb_species_sets; i++) {
2318 if (strcmp(view->name_species_sets[i], p) == 0) break;
2319 }
2320 if (i < view->numb_species_sets) {
2321 view->sel_seqs = view->list_species_sets[i];
2322 view->tot_sel_seqs = 0;
2323 for (i = 0; i < view->tot_seqs; i++) if (view->sel_seqs[i]) view->tot_sel_seqs++;
2324 }
2325 }
2326 if (isarg(argc, argv, "-del_gap_only_sites") && !isarg(argc, argv, "-sites")) del_gap_only_sites(view);
2327
2328 if ( (p = argname(argc, argv, "-bootstrap")) ) {
2329 int replicates = -1;
2330 sscanf(p, "%d", &replicates);
2331 if (replicates == -1) replicates = 1;
2332 bootstrap = true;
2333 save_fragment = false;
2334 save_bootstrap_replicates(outfile, replicates, view);
2335 }
2336
2337
2338 if (save_fragment) {
2339 myregion = view->active_region;
2340 if (myregion == NULL) {
2341 myregion = (region *)malloc(sizeof(region));
2342 myregion->list = (list_segments *)malloc(sizeof(list_segments));
2343 myregion->list->debut = 1;
2344 myregion->list->fin = view->seq_length;
2345 myregion->list->next = NULL;
2346 myregion->name = (char *)"all";
2347 }
2348 err_message = save_alignment_or_region(outfile, view->sequence, view->comments,
2349 view->header, view->seqname, view->tot_seqs, view->each_length,
2350 NULL, myregion, out_format,
2351 0, NULL, NULL, view->sel_seqs, view->tot_sel_seqs,
2352 view->protein, 0, NULL, NULL, view->phylipwidnames,
2353 view->tot_trees, view->trees,
2354 view->menu_trees->vitem(0), view->spaces_in_fasta_names);
2355 }
2356 else if (!bootstrap) {
2357 err_message = save_alignment_or_region(outfile, view->sequence, view->comments,
2358 view->header, view->seqname, view->tot_seqs, view->each_length,
2359 view->regions, NULL, out_format,
2360 view->numb_species_sets, view->list_species_sets,
2361 view->name_species_sets, NULL, 0, view->protein,
2362 view->tot_comment_lines, view->comment_name,
2363 view->comment_line, view->phylipwidnames,
2364 view->tot_trees, view->trees,
2365 view->menu_trees->vitem(0), view->spaces_in_fasta_names);
2366 }
2367 if (err_message) {
2368 fprintf(stderr, "%s\n", err_message);
2369 if (std_output) delete_tmp_filename(outfile);
2370 exit(1);
2371 }
2372 if (std_output) {
2373 char line[200];
2374 FILE *in = fopen(outfile, "r");
2375 while (fgets(line, sizeof(line), in) != NULL) {
2376 fputs(line, stdout);
2377 }
2378 fclose(in);
2379 fflush(stdout);
2380 fl_unlink(outfile);
2381 }
2382 exit(0);
2383 }
2384
2385
process_output_options(int argc,char ** argv,known_format & out_format,bool & std_output)2386 char *process_output_options(int argc, char **argv, known_format& out_format, bool& std_output)
2387 {
2388 int i;
2389 char upper_format[20], *p;
2390 std_output = false;
2391 char *outformatname = argname(argc, argv, "-output_format");
2392 if (outformatname) {
2393 majuscules(outformatname);
2394 for (i = 0; i < nbr_formats; i++) {
2395 strcpy(upper_format, f_format_names[i]);
2396 majuscules(upper_format);
2397 if (strcmp(upper_format, outformatname)== 0) break;
2398 }
2399 if (i >= nbr_formats) {
2400 fprintf(stderr, "'%s' is not of a format known by seaview", outformatname);
2401 exit(1);
2402 }
2403 out_format = (known_format)i;
2404 }
2405 char *outfile = argname(argc, argv, "-o");
2406 if (outfile == NULL) {
2407 outfile = new char[strlen(argv[argc-1]) + 12];
2408 strcpy((char*)outfile, argv[argc-1]);
2409 p = strrchr(outfile, '.');
2410 if (!p) { p = (char*)outfile + strlen(outfile); *p = '.'; }
2411 strcpy(p+1, f_format_exts[out_format]);
2412 if (strcmp(outfile, argv[argc-1]) == 0) {
2413 strcpy(p, "_2.");
2414 strcpy(p+3, f_format_exts[out_format]);
2415 }
2416 }
2417 else if (strcmp(outfile, "-") == 0) {
2418 outfile = strdup(create_tmp_filename());
2419 std_output = true;
2420 }
2421 return outfile;
2422 }
2423
2424
2425 #ifndef NO_PDF
printout_cmdline(int argc,char ** argv)2426 void printout_cmdline(int argc, char **argv)
2427 {
2428 bool std_output = false;
2429 SEA_VIEW *view = cmdline_read_input_alignment(argc, argv);
2430 if (view == NULL) {
2431 fprintf(stderr, "No sequence found in %s", argv[argc-1]);
2432 exit(1);
2433 }
2434 load_resources(progname);
2435 //protein colors
2436 static char stdcolorgroups[50], customprotcolors[300];
2437 strcpy(stdcolorgroups, get_res_value("stdcolorgroups", def_stdcolorgroups));
2438 strcpy(customprotcolors, get_res_value("protcolors", ""));
2439 int *protcolors = (int *)malloc(max_protcolors * sizeof(int));
2440 int *def_protcolors = (int *)malloc(max_protcolors * sizeof(int));
2441 for(int i=0; i<max_protcolors; i++) def_protcolors[i] =
2442 fl_rgb_color(def_protcolors_rgb[3*i], def_protcolors_rgb[3*i+1], def_protcolors_rgb[3*i+2]);
2443
2444 memcpy(protcolors, def_protcolors, max_protcolors * sizeof(int) );
2445 int cur_protcolors = prep_custom_colors(protcolors, customprotcolors, max_protcolors);
2446 static int dnacolors[] = { FL_BLACK, FL_RED, FL_DARK_GREEN, FL_YELLOW, FL_BLUE };
2447 /* process resource-read stdcolorgroups and altcolorgroups */
2448 prep_aa_color_code(stdcolorgroups, (char*)"",
2449 cur_protcolors, &view->numb_stdprotcolors, &view->numb_altprotcolors);
2450
2451 view->max_seq_length = calc_max_seq_length(view->seq_length, view->tot_seqs);
2452 if (view->protein) {
2453 view->numb_gc = view->numb_stdprotcolors;
2454 view->curr_colors = view->stdprotcolors = protcolors;
2455 protcolors[0] = FL_BLACK;
2456 }
2457 else {
2458 view->numb_gc = view->numb_dnacolors = 5;
2459 view->curr_colors = view->dnacolors = dnacolors;
2460 }
2461 known_format out_format = view->format_for_save;
2462 char *outfile = process_output_options(argc, argv, out_format, std_output);
2463 int fontsize = (int)argval(argc, argv, "-fontsize", 10);
2464 int blocksize = (int)argval(argc, argv, "-blocksize", 10);
2465 #ifdef NO_PDF
2466 int use_svg = false;
2467 int svg_width = 0;
2468 #else
2469 int use_svg = isarg(argc, argv, "-svg");
2470 int svg_width = (int)argval(argc, argv, "-svg", 1000);
2471 #endif
2472 int landscape = isarg(argc, argv, "-landscape");
2473 int letter = isarg(argc, argv, "-letter");
2474 printout(view, outfile,
2475 fontsize, blocksize,
2476 letter ? Fl_Paged_Device::LETTER : Fl_Paged_Device::A4,
2477 false, -1,
2478 use_svg ? SVG : PDF_COLOR,
2479 landscape ? Fl_Paged_Device::LANDSCAPE : Fl_Paged_Device::PORTRAIT,
2480 svg_width);
2481 if (std_output) {
2482 char line[200];
2483 FILE *in = fopen(outfile, "r");
2484 if (in) {
2485 while (fgets(line, sizeof(line), in) != NULL) {
2486 fputs(line, stdout);
2487 }
2488 fclose(in);
2489 fflush(stdout);
2490 }
2491 delete_tmp_filename(outfile);
2492 }
2493 exit(0);
2494 }
2495 #endif // ! NO_PDF
2496