1 #include "seaview.h"
2 #include "pdf_or_ps.h"
3 #include "svg.h"
4 #include <ctype.h>
5 #include <time.h>
6 #ifndef WIN32
7 #include <unistd.h>
8 #endif
9 
10 /* included functions */
11 int read_mase_seqs_header(const char *masefname, char ***pseq, char ***pseqname,
12 		char ***pcomments, char **pheader, char **err_message);
13 int one_more_seq_found(int count1, char ***pseq, char ***pseqname, char ***pcomments);
14 int read_fasta_align(const char *fname, char ***pseq, char ***pseqname,
15 		char ***pcomments, char **pheader, char **err_message, int spaces_in_names);
16 int read_phylip_align(const char *fname, char ***pseq, char ***pseqname,
17 		char ***pcomments, char **pheader, char **err_message);
18 int read_clustal_align(const char *fname, char ***pseq, char ***pseqname,
19 		char ***pcomments, char **pheader, char **err_message);
20 int read_msf_align(const char *fname, char ***pseq, char ***pseqname,
21 		char ***pcomments, char **pheader, char **err_message);
22 int is_a_protein_seq(char *seq);
23 int save_fasta_file(const char *fname, char **seq, char **comments,
24 	char **seqname, int totseqs, int *eachlength, region *region_used,
25 	int *sel_seqs, int tot_sel_seqs, int spaces_in_names, int pad_to_max_length);
26 int save_phylip_file(const char *fname, char **seq,
27 	char **seqname, int totseqs, int *eachlength, region *region_used,
28 	int *sel_seqs, int tot_sel_seqs, int phylipwidnames);
29 int output_next_res_from_region(char *seq, int lenseq,
30 	list_segments **segment, int *current, FILE *out, int total,
31 	int use_dots);
32 void save_regions(list_regions *regions, FILE *out);
33 int save_mase_file(const char *fname, char **seq, char **comments,
34 	char *header, char **seqname, int totseqs, int *eachlength,
35 	list_regions *regions, region *region_used, int numb_species_sets,
36 	int **list_species_sets, char **name_species_sets,
37 	int *sel_seqs, int tot_sel_seqs, int tot_comment_lines,
38 	char **comment_name, char **comment_line,
39 	int tot_trees, char **trees, const Fl_Menu_Item *menu_tree_items);
40 int save_clustal_file(const char *fname, char **seq,
41 	char **seqname, int totseqs, int *eachlength, region *region_used,
42 	int *sel_seqs, int tot_sel_seqs);
43 int calc_gcg_check(list_segments *psegment, char *seq);
44 int save_msf_file(const char *fname, char **seq,
45 	char **seqname, int totseqs, int *eachlength, region *region_used,
46 	int protein, int *sel_seqs, int tot_sel_seqs);
47 char *save_alignment_or_region(const char *fname, char **seq, char **comments,
48 	char *header, char **seqname, int totseqs, int *eachlength,
49 	list_regions *regions, region *region_used, known_format format,
50 	int numb_species_sets, int **list_species_sets,
51 	char **name_species_sets, int *sel_seqs, int tot_sel_seqs, int protein,
52 	int tot_comment_lines, char **comment_name, char **comment_line, int phylipwidnames,
53 	int tot_trees, char **trees, const Fl_Menu_Item *menu_tree_items, int spaces_in_fasta_names);
54 char *get_full_path(const char *fname);
55 static void save_species_sets(int numb_species_sets, int **list_species_sets,
56 	char **name_species_sets, int totseqs, FILE *out);
57 void save_comment_lines(int tot_comment_lines, char **names, char **lines,
58 	FILE *out);
59 known_format what_format(const char *filename);
60 char* seaview_file_chooser_save_as(const char* message, const char* fname, SEA_VIEW *view, known_format* new_format);
61 const char *extract_dirname(const char *pathname);
62 int printout(SEA_VIEW *view, const char *filename,
63 	     int fontsize, int block_size, Fl_Paged_Device::Page_Format pageformat, int vary_only, int ref0,
64 	     int pdfkindvalue, Fl_Paged_Device::Page_Layout layout, int svg_width = 0);
65 static void color_pdf_display(SEA_VIEW *view, int (*calc_color_function)( int ), char *oneline,
66 			      int widnames, double x, double y, int fontsize, double char_width, double descender,int num, int current);
67 static void color_svg_display(SEA_VIEW *view, int (*calc_color_function)( int ), char *oneline,
68 			      int widnames, double x, double y, int fontsize, double char_width);
69 static int calc_vary_lines(int *vary_pos, int widpos);
70 static void out_vary_pos(int *vary_pos, int widnames, int widpos, int nl, FILE *textfile, double x, double y);
71 SEA_VIEW* read_alignment_file(const char *infile);
72 SEA_VIEW *cmdline_read_input_alignment(int argc, char **argv);
73 void format_conversion(int argc, char **argv);
74 char *process_output_options(int argc, char **argv, known_format& out_format, bool& std_output);
75 #ifndef NO_PDF
76 void printout_cmdline(int argc, char **argv);
77 #endif
78 
79 
80 /* external */
81 extern char *f_format_names[];
82 extern char *f_format_exts[];
83 extern int nbr_formats;
84 extern float argval(int argc, char *argv[], const char *arg, float defval);
85 extern int calc_max_seq_length(int seq_length, int tot_seqs);
86 extern int max_protcolors;
87 extern char def_stdcolorgroups[];
88 extern int def_protcolors_rgb[];
89 char *get_res_value(const char *name, const char *def_value);
90 extern int prep_custom_colors(int *colors, char *customcolors, int max_colors);
91 extern color_choice prep_aa_color_code(char *list_std, char *list_alt,
92 				       int maxprotcolors, int *numb_stdprotcolors, int *numb_altprotcolors);
93 extern void load_resources(const char *progname);
94 extern void allonge_seqs(char **seq, int totseqs, int maxlen, int *eachlength,
95 			 int tot_comment_lines, char **comment_line, char **pregion_line);
96 extern int int_res_value(const char *name, int def_value);
97 extern const char *progname;
98 #if defined(__APPLE__)
99 extern const char *MG_GetBundleResourcesDir(void);
100 #else
101 extern char *get_prog_dir(void);
102 #endif
103 
104 
105 extern int save_nexus_file(const char *fname, int ntaxa, int protein,
106 	char **seqs, char **taxnames, char **notes, char *header,
107 	int num_species_sets, int **list_species_sets,
108 	char **name_species_sets,
109 	list_regions *charsets,
110 	int tot_comment_lines, char **comment_name, char **comment_line,
111 	region *region_used, int *sel_seqs, int tot_sel_seqs, int *eachlength,
112 	int tot_trees, char **trees, const Fl_Menu_Item *items);
113 extern char *my_fgets(char *s, int n, FILE *f);
114 extern char *argname(int argc, char *argv[], const char *arg);
115 extern int isarg(int argc, char *argv[], const char *arg);
116 extern char *create_tmp_filename(void);
117 extern void delete_tmp_filename(const char *base_fname);
118 extern char *translate_with_gaps(char *seq, int gc);
119 extern int get_ncbi_gc_from_comment(char *comment);
120 extern int create_gblocks_mask(SEA_VIEW *view, region *myregion, int no_gui, int b5_val, int b4_val, int b3_val, int b2_val);
121 extern char *back_translate_with_gaps(char *prot, char *dna);
122 extern void del_gap_only_sites(SEA_VIEW *view);
123 extern void save_bootstrap_replicates(const char *fname, int replicates, SEA_VIEW *view);
124 extern "C" {
125   int get_acnuc_gc_number(int ncbi_gc);
126   int get_ncbi_gc_number(int ncbi_gc);
127 }
128 
129 
read_mase_seqs_header(const char * masefname,char *** pseq,char *** pseqname,char *** pcomments,char ** pheader,char ** err_message)130 int read_mase_seqs_header(const char *masefname, char ***pseq, char ***pseqname,
131 		char ***pcomments, char **pheader, char **err_message)
132 {
133 #define MAXLENSEQ 10000 /* unite d'allocation de memoire */
134 #define lline  2000
135 FILE *masef;
136 char line[lline], *i, *base, *header = NULL, *provseq = NULL, *p;
137 int  l, lenseqs, lpre, lseq, l2, totseqs = -1, want_header, curr_max_header;
138 static char ret_message[200];
139 char **seq, **seqname, **comments;
140 
141 *ret_message = 0;
142 *err_message = ret_message;
143 if( (masef=fopen(masefname,"r")) == NULL) {
144 	sprintf(ret_message,"File not found:%s",masefname);
145 	return 0;
146 	}
147 want_header = (pheader != NULL);
148 
149 if(fgets(line, lline, masef)==NULL)goto fini;
150 if(strchr(line, '\n') == NULL) {
151 	strcpy(ret_message,"Not a mase file!");
152 	goto fini;
153 	}
154 if(strncmp(line,";;",2)==0) {
155 	if(want_header) {
156 		if( (header=(char *)malloc(MAXLENCOM+1)) ==
157 							 NULL)goto nomem;
158 		curr_max_header = MAXLENCOM;
159 		strcpy(header,line);
160 		lpre=strlen(line);
161 		}
162 	do	{
163 		if( fgets(line,lline,masef)==NULL ) goto fini;
164 		if(strncmp(line,";;",2)!=0) break;
165 		if(header != NULL) {
166 			lseq=strlen(line);
167 			if(lpre+lseq > curr_max_header) {
168 				curr_max_header += MAXLENCOM;
169 				if( (p=(char *)malloc(curr_max_header+1))
170 					== NULL ) goto nomem;
171 				memcpy(p, header, lpre);
172 				free(header);
173 				header = p;
174 				}
175 			memcpy(header+lpre,line, lseq);
176 			lpre += lseq;
177 			}
178 		}
179 	while (1);
180 	if( want_header ) {
181 		header[lpre] = 0;
182 		header=(char *)realloc(header,lpre+1);
183 		}
184 	}
185 if(*line != ';' ) {
186 	strcpy(ret_message,"Not a mase file!");
187 	goto fini;
188 	}
189 
190 lenseqs=MAXLENSEQ;
191 if( (provseq=(char *)malloc(lenseqs+1)) ==NULL)goto nomem;
192 
193 i=line;
194 while(i!=NULL){
195 	totseqs = one_more_seq_found(totseqs, &seq, &seqname, &comments);
196 	if(totseqs == -1) goto nomem;
197 	if(comments!=NULL) {
198 		if( (comments[totseqs]=(char *)malloc(MAXLENCOM+1)) ==
199 							 NULL)goto nomem;
200 		strcpy(comments[totseqs],line);
201 		lpre=strlen(line); l=MAXLENCOM;
202 		while(*fgets(line,lline,masef)==';') {
203 			lseq=strlen(line);
204 			if(lpre+lseq <= l) {
205 				strcpy(comments[totseqs]+lpre,line);
206 				lpre += lseq;
207 				}
208 			else l=lpre-1;
209 			}
210 		if(lpre<MAXLENCOM)
211 		   comments[totseqs]=(char *)realloc(comments[totseqs],lpre+1);
212 		}
213 	else	while(*fgets(line,lline,masef)==';');
214 	l = strlen(line);
215 	while((line[l-1] == ' ' || line[l-1] == '\n') && l>0 ) l--; line[l] = 0;
216 	if( (seqname[totseqs]=(char *)malloc(l+1)) == NULL)goto nomem;
217 	strcpy(seqname[totseqs],line);
218 	lseq = 0; /* what is already put in provseq */
219 	while( (i=fgets(line,lline,masef))!= NULL && *i != ';' ) {
220 	    	l2 = strlen(line);
221 		if( line[l2 - 1] == '\n' ) l2--;
222 	   	while(l2>0 && line[l2-1]==' ')l2--;
223 		if(lseq + l2 > lenseqs) {
224 			char *temp;
225 			lenseqs += MAXLENSEQ;
226 			temp = (char *)malloc(lenseqs+1);
227 			if(temp == NULL) goto nomem;
228 			memcpy(temp, provseq, lseq);
229 			free(provseq);
230 			provseq = temp;
231 			}
232 	   	memcpy(provseq+lseq, line, l2);
233 		lseq += l2;
234 		}
235 	provseq[lseq]='\0';
236 	seq[totseqs] = (char *)malloc(lseq+1);
237 	if(seq[totseqs] == NULL) goto nomem;
238 /* ignore space or non printable characters */
239 	base=provseq - 1; p = seq[totseqs] - 1;
240 	while ( *(++base) != 0) {
241 		if(isprint(*base) && ! isspace(*base) ) {
242 //			*(++p) = toupper(*base);
243 			*(++p) = *base;
244 			}
245 		}
246 	*(++p) = 0;
247 	}
248 seq = (char **)realloc(seq, (totseqs + 1)*sizeof(char *));
249 seqname = (char **)realloc(seqname, (totseqs + 1)*sizeof(char *));
250 comments = (char **)realloc(comments, (totseqs + 1)*sizeof(char *));
251 *pseq = seq; *pseqname = seqname; *pcomments = comments;
252 fini:
253 fclose(masef);
254 if(want_header) *pheader = header;
255 if(provseq != NULL) free(provseq);
256 return totseqs+1;
257 nomem:
258 sprintf(ret_message,"Error: Not enough memory!");
259 totseqs = -1;
260 goto fini;
261 }
262 
263 
parse_trees_from_header(char * header,SEA_VIEW * view)264 void parse_trees_from_header(char *header, SEA_VIEW *view)
265 {
266 	char *new_header, *old_header, *fin_new_header, *p, *q;
267 	int l_header, l;
268 	if(header == NULL) return;
269 	old_header = header;
270 	l_header=strlen(header);
271 	if( (new_header = (char *)malloc(l_header+1)) == NULL) out_of_memory();
272 	fin_new_header = new_header;
273 	*new_header = 0;
274 	while (*header!= 0) {
275 		if(strncmp(header,";;$",3) == 0) {
276 			p = header + 3;
277 			while(*p == ' ') p++;
278 			q = strchr(p, '\n');
279 			*q = 0;
280 		  view->menu_trees->add(p, trees_callback, NULL, 0);
281 		  int rank = view->menu_trees->vlength();
282 		  (view->menu_trees->vitem(rank - 1))->labelfont(FL_HELVETICA_ITALIC);
283 			*q = '\n';
284 			p = q + 1;
285 			l = 1;
286 			while(TRUE) {
287 				q = strchr(p, '\n');
288 				if(strncmp(q+1, ";;", 2) != 0) break;
289 				if (*(q+3) == '$' && *(q+4) != '\n') break;
290 				if (strncmp(q+1, ";;@ of species =", 16) == 0) break;
291 				if (strncmp(q+1, ";;# of segments=", 16) == 0) break;
292 				p = q + 1;
293 				l++;
294 				}
295 			if(view->tot_trees == 0) view->trees = (char **)malloc(sizeof(char *));
296 			else view->trees = (char **)realloc(view->trees, (view->tot_trees + 1) * sizeof(char *));
297 			view->trees[view->tot_trees] = (char *)malloc(q - header + 1);
298 			p = view->trees[view->tot_trees];
299 			for(int i = 0; i < l; i++) {
300 				header = strchr(header, '\n') + 1;
301 				q = (char *)memccpy(p, header + 2, '\n', l_header);
302 				p += (q - p - 1);
303 				}
304 			*p = 0;
305 			view->tot_trees++;
306 		}
307 		else	{
308 			p=(char *)memccpy(fin_new_header, header, '\n', l_header);
309 			fin_new_header += (p - fin_new_header);
310 		}
311 		header = strchr(header,'\n') + 1;
312 	}
313 	*fin_new_header = 0;
314 	strcpy(old_header, new_header);
315 	free(new_header);
316 }
317 
318 
one_more_seq_found(int count1,char *** pseq,char *** pseqname,char *** pcomments)319 int one_more_seq_found(int count1, char ***pseq, char ***pseqname, char ***pcomments)
320 {
321 static int max_count;
322 char **seq, **seqname, **comments;
323 
324 if(count1 == -1) max_count = 0;
325 
326 if(count1 + 1 < max_count) return count1 + 1;
327 
328 count1++;
329 if(max_count == 0) {
330 	max_count = 100;
331 	seq = (char **)malloc(max_count * sizeof(char *));
332 	if(seq == NULL) return -1;
333 	seqname = (char **)malloc(max_count * sizeof(char *));
334 	if(seqname == NULL) return -1;
335 	comments = (char **)malloc(max_count * sizeof(char *));
336 	if(comments == NULL) return -1;
337 	}
338 else {
339 	seq = *pseq; seqname = *pseqname; comments = *pcomments;
340 	max_count = 3 * max_count;
341 	seq = (char **)realloc(seq, max_count * sizeof(char *));
342 	if(seq == NULL) return -1;
343 	seqname = (char **)realloc(seqname, max_count * sizeof(char *));
344 	if(seqname == NULL) return -1;
345 	comments = (char **)realloc(comments, max_count * sizeof(char *));
346 	if(comments == NULL) return -1;
347 	}
348 
349 *pseq = seq; *pseqname = seqname; *pcomments = comments;
350 return count1;
351 }
352 
353 
read_fasta_align(const char * fname,char *** pseq,char *** pseqname,char *** pcomments,char ** pheader,char ** err_message,int spaces_in_names)354 int read_fasta_align(const char *fname, char ***pseq, char ***pseqname,
355 		char ***pcomments, char **pheader, char **err_message, int spaces_in_names)
356 {
357 FILE *in;
358 int totseqs, lseq, l2, l, lenseqs;
359 char line[500], *p, *i, c, *q, *r;
360 static char ret_message[200];
361 char **seq, **seqname, **comments, *tmpseq = NULL;
362 
363 *ret_message = 0;
364 *err_message = ret_message;
365 if( (in=fopen(fname,"r")) == NULL) {
366 	sprintf(ret_message,"File not found:%s", fname);
367 	return 0;
368 	}
369 
370 /* calcul du nombre de sequences dans le fichier */
371 totseqs = 0;
372 while(fgets(line, sizeof(line), in) != NULL) {
373 	if(*line == '>') totseqs++;
374 	}
375 rewind(in);
376 seq = (char **)malloc(totseqs * sizeof(char *));
377 if(seq == NULL) goto nomem;
378 comments = (char **)malloc(totseqs * sizeof(char *));
379 if(comments == NULL) goto nomem;
380 seqname = (char **)malloc(totseqs * sizeof(char *));
381 if(seqname == NULL) goto nomem;
382 *pseq = seq; *pcomments = comments; *pseqname = seqname;
383 
384 lenseqs = MAXLENSEQ;
385 tmpseq = (char *)malloc(lenseqs + 1);
386 if(tmpseq == NULL) goto nomem;
387 totseqs = -1;
388 i = fgets(line, sizeof(line), in);
389 if(line[0] != '>') {
390 	strcpy(ret_message,"File not in Fasta format!");
391 	totseqs = -1; goto fini;
392 	}
393 while( i != NULL ){
394 	/* finish reading very long title line */
395 	c = line[strlen(line) - 1];
396 	while(c != '\n' && c != '\r' && c != EOF) c = getc(in);
397 	q = line + strlen(line) - 1;
398 	while(q > line + 1 && (*q == '\n' || *q == '\r')) *(q--) = 0;
399 	totseqs++;
400 	p = line + 1;
401   while (*p == ' ') p++;
402 	if(spaces_in_names) {
403 	  while(*p && *p != '\n') p++;
404 	  while(*(p-1) == ' ') p--;
405 	  }
406 	else {
407 	  while(*p && *p != ' ' && *p != '\n') p++;
408 	  }
409   r = line + 1;
410   while (*r == ' ') r++;
411 	l = p - r;
412 	if( (seqname[totseqs] = (char *)malloc(l+1)) == NULL)goto nomem;
413 	memcpy(seqname[totseqs], r, l); seqname[totseqs][l] = 0;
414 	/* use rest of title line, if any, as comment */
415 	while(*p == ' ') p++;
416 	l = q - p + 1;
417 	if( l > 0) {
418 		comments[totseqs] = (char *)malloc(l + 3);
419 		if(comments[totseqs] != NULL) {
420 			strcpy(comments[totseqs], ";");
421 			strcpy(comments[totseqs] + 1, p);
422 			strcpy(comments[totseqs] + l + 1, "\n");
423 			}
424 		}
425 	else comments[totseqs] = NULL;
426 	lseq = 0;
427 	while( (i=fgets(line, sizeof(line), in))!= NULL && *i != '>' ) {
428 		l2 = strlen(line);
429 		if( line[l2 - 1] == '\n' ) l2--;
430 	   	while(l2>0 && line[l2-1]==' ')l2--;
431 		if(lseq + l2 > lenseqs) {
432 			lenseqs += MAXLENSEQ;
433 			tmpseq= (char *)realloc(tmpseq, lenseqs + 1);
434 			if(tmpseq == NULL) goto nomem;
435 			}
436 /* copy seq data excluding spaces (because of gblocks) */
437 		p = tmpseq+lseq;
438 		q = line;
439 		while (q < line + l2) {
440 			if(*q != ' ') *(p++) = *q;
441 			q++;
442 			}
443 		lseq += p - (tmpseq+lseq);
444 		}
445 	tmpseq[lseq]='\0';
446 	seq[totseqs] = (char *)malloc(lseq + 1);
447 	if(seq[totseqs] == NULL) goto nomem;
448 	memcpy(seq[totseqs], tmpseq, lseq + 1);
449 	}
450 fini:
451 fclose(in);
452 if(tmpseq != NULL) free(tmpseq);
453 *pheader = NULL;
454 return totseqs+1;
455 nomem:
456 sprintf(ret_message,"Error: Not enough memory!");
457 totseqs = -1;
458 goto fini;
459 }
460 
461 
read_phylip_align(const char * fname,char *** pseq,char *** pseqname,char *** pcomments,char ** pheader,char ** err_message)462 int read_phylip_align(const char *fname, char ***pseq, char ***pseqname,
463 		char ***pcomments, char **pheader, char **err_message)
464 {
465 FILE *in;
466 char *p, *q;
467 int c;
468 static char line[300];
469 char **seq=0, **comments=0, **seqname=0;
470 int totseqs, lenseqs, i, l;
471 static char ret_message[200];
472 *ret_message = 0;
473 *err_message = ret_message;
474 in=fopen(fname,"r");
475 if(in==NULL) {
476 	sprintf(ret_message,"File not found:%s",fname);
477 	return 0;
478 	}
479 fgets(line,sizeof(line),in);
480 if( sscanf(line, "%d%d", &totseqs, &lenseqs) != 2) {
481 		sprintf(ret_message,"Not a PHYLIP file");
482 		totseqs = 0;
483 		goto fini;
484 		}
485 seq = (char **)malloc(totseqs * sizeof(char *));
486 if(seq == NULL) goto nomem;
487 seqname = (char **)malloc(totseqs * sizeof(char *));
488 if(seqname == NULL) goto nomem;
489 comments = (char **)malloc(totseqs * sizeof(char *));
490 if(comments == NULL) goto nomem;
491 for(i=0; i<totseqs; i++) {
492 	if( (seq[i] = (char *)malloc(lenseqs+1) ) == NULL ) goto nomem;
493 	comments[i] = NULL;
494 	}
495 for(i=0; i<totseqs; i++) {
496   fgets(line,sizeof(line),in);
497   p = strstr(line, " "); if(p == NULL) p = line + 10;
498   if( (seqname[i] = (char *)malloc(p - line + 1) ) == NULL ) goto nomem;
499   memcpy(seqname[i], line, p - line); seqname[i][p - line] = 0;
500   q = seq[i];
501   while(*p != 0 && *p != '\n') {
502 	  if(*p != ' ') {
503 		if(q - seq[i] >= lenseqs) goto badfile;
504 		*(q++) = *p;
505 		}
506 	  p++;
507 	  }
508   c = *p;
509   if(c == '\n') continue;
510   while(TRUE) {
511 	c = fgetc(in);
512 	if(c == EOF) goto badfile;
513 	if(c == '\n' || c == '\r') break;
514 	if(c != ' ') {
515 	  if(q - seq[i] >= lenseqs) goto badfile;
516 	  *(q++) = (char)c;
517 	  }
518 	}
519   if(c == '\r') {c = fgetc(in); if(c != '\n') ungetc(c, in); }
520   }
521 l = q - seq[totseqs - 1];
522 while( l < lenseqs) {
523   do c = fgetc(in); while(c != '\n' && c != '\r' && c != EOF);
524   if(c == EOF) goto badfile;
525   if(c == '\r') {c = fgetc(in); if(c != '\n') ungetc(c, in); }
526   for(i=0; i<totseqs; i++) {
527 	q = seq[i] + l;
528 	while(TRUE) {
529 	  c = fgetc(in);
530 	  if(c == EOF) goto badfile;
531 	  if(c == '\n' || c == '\r') break;
532 	  if(c != ' ') {
533 		if(q - seq[i] >= lenseqs) goto badfile;
534 		*(q++) = (char)c;
535 		}
536 	  }
537 	if(c == '\r') {c = fgetc(in); if(c != '\n') ungetc(c, in); }
538 	}
539   l = q - seq[totseqs - 1];
540   }
541 for(i=0; i<totseqs; i++) seq[i][l] = 0;
542 fini:
543 *pheader = NULL;
544 fclose(in);
545 *pseq = seq; *pseqname = seqname; *pcomments = comments;
546 return totseqs;
547 nomem:
548 	sprintf(ret_message,"Not enough memory!");
549 	totseqs = 0;
550 	goto fini;
551 badfile:
552 	sprintf(ret_message,"Bad file format");
553 	totseqs = 0;
554 	goto fini;
555 }
556 
557 
read_clustal_align(const char * fname,char *** pseq,char *** pseqname,char *** pcomments,char ** pheader,char ** err_message)558 int read_clustal_align(const char *fname, char ***pseq, char ***pseqname,
559 		char ***pcomments, char **pheader, char **err_message)
560 {
561 FILE *in;
562 char line[200], *p;
563 int i, l, curr_spec, first=TRUE, curr_len, next_len, tot_spec, curr_max_len,
564 	carac, wid_name;
565 static char ret_message[200];
566 char **seq, **comments, **seqname = NULL;
567 
568 *ret_message = 0;
569 *err_message = ret_message;
570 in=fopen(fname,"r");
571 if(in==NULL) {
572 	sprintf(ret_message,"File not found:%s",fname);
573 	return 0;
574 	}
575 fgets(line,sizeof(line),in);
576 if(strncmp(line,"CLUSTAL",7) != 0) { /* skip 1st line with CLUSTAL in it */
577 	strcpy(ret_message,"File not in CLUSTAL format!");
578 	tot_spec = -1; goto fini;
579 	}
580 /* skip next empty lines */
581 do	{
582 	carac = getc(in);
583 	if(carac == ' ') {
584 		fgets(line,sizeof(line),in);
585 		carac = getc(in);
586 		}
587 	}
588 while(carac == '\n' || carac == '\r');
589 ungetc(carac, in); /* back to start of 1st non-empty line */
590 tot_spec = curr_spec = -1; curr_len = next_len = 0;
591 while( fgets(line, sizeof(line), in) != NULL ) {
592 	if(*line == '\n' || *line == ' ') {
593 		curr_spec = -1;
594 		curr_len = next_len;
595 		first = FALSE;
596 		continue;
597 		}
598 	else if(tot_spec >= 0 && curr_spec == -1 &&
599 		strncmp(line, seqname[0], strlen(seqname[0]) ) != 0) {
600 		break;
601 		}
602 	else {
603 		if(first) {
604 			curr_spec = one_more_seq_found(curr_spec, &seq, &seqname, &comments);
605 			if(curr_spec == -1) goto nomem;
606 			}
607 		else	curr_spec++;
608 		}
609 	if(first && curr_spec == 0) {
610 /* calcul long partie nom: enlever tout ce qui n'est pas espace en fin */
611 		p = line + strlen(line) - 2;
612 		while(*p == ' ' || isdigit(*p) ) p--;
613 		while (*p != ' ') p--;
614 		wid_name = p - line + 1;
615 		}
616 	if(first) {
617 		seqname[curr_spec] = (char *)malloc(wid_name+1);
618 		if(seqname[curr_spec]==NULL) {
619 			goto nomem;
620 			}
621 		memcpy(seqname[curr_spec], line, wid_name);
622 		p = seqname[curr_spec] + wid_name - 1;
623 		while(*p==' ') p--; *(p+1)=0;
624 		if(curr_spec > tot_spec) tot_spec = curr_spec;
625 		seq[curr_spec] = (char *)malloc(CLU_BLOCK_LEN+1);
626 		curr_max_len = CLU_BLOCK_LEN;
627 		if(seq[curr_spec]==NULL) {
628 			goto nomem;
629 			}
630 		comments[curr_spec] = NULL;
631 		}
632 	if(curr_spec == 0) {
633 		l = strlen(line) - 1;
634 		p = line + l - 1;
635 		while(*p == ' ' || isdigit(*p) ) { p--; l--; }
636 		l -= wid_name;
637 		if(curr_len + l > curr_max_len) {
638 			curr_max_len += CLU_BLOCK_LEN;
639 			for(i=0; i<=tot_spec; i++) {
640 				p = (char *)malloc(curr_max_len+1);
641 				if(p == NULL) goto nomem;
642 				memcpy(p, seq[i], curr_len);
643 				free(seq[i]);
644 				seq[i] = p;
645 				}
646 
647 			}
648 		next_len = curr_len + l;
649 		}
650 	memcpy(seq[curr_spec]+curr_len, line + wid_name, l);
651 	}
652 for(i=0; i<=tot_spec; i++) seq[i][next_len] = 0;
653 seq = (char **)realloc(seq, (tot_spec + 1)*sizeof(char *));
654 seqname = (char **)realloc(seqname, (tot_spec + 1)*sizeof(char *));
655 comments = (char **)realloc(comments, (tot_spec + 1)*sizeof(char *));
656 *pseq = seq; *pseqname = seqname; *pcomments = comments;
657 fini:
658 *pheader = NULL;
659 fclose(in);
660 return tot_spec + 1;
661 nomem:
662 sprintf(ret_message,"Error: Not enough memory!");
663 tot_spec = -1;
664 goto fini;
665 }
666 
667 
read_msf_align(const char * fname,char *** pseq,char *** pseqname,char *** pcomments,char ** pheader,char ** err_message)668 int read_msf_align(const char *fname, char ***pseq, char ***pseqname,
669 		char ***pcomments, char **pheader, char **err_message)
670 {
671 FILE *in;
672 char line[100], *p, *q;
673 int l, curr_spec, maxwidname=0, curr_len, tot_spec, wid_1_line, wid_block;
674 static char ret_message[200];
675 char **seq, **seqname, **comments;
676 
677 *ret_message = 0;
678 *err_message = ret_message;
679 in=fopen(fname,"r");
680 if(in==NULL) {
681 	sprintf(ret_message,"File not found:%s",fname);
682 	return 0;
683 	}
684 
685 /* compter le nbre de seqs dans le fichier */
686 tot_spec = 0;
687 while(fgets(line, sizeof(line), in) != NULL) {
688 	if(strncmp(line, "//", 2) == 0) break;
689 	if(strstr(line, "Name: ") != NULL) tot_spec++;
690 	}
691 rewind(in);
692 seq = (char **)malloc(tot_spec * sizeof(char *));
693 if(seq == NULL) goto nomem;
694 comments = (char **)malloc(tot_spec * sizeof(char *));
695 if(comments == NULL) goto nomem;
696 seqname = (char **)malloc(tot_spec * sizeof(char *));
697 if(seqname == NULL) goto nomem;
698 *pseq = seq; *pcomments = comments; *pseqname = seqname;
699 
700 p = NULL;
701 while( fgets(line,sizeof(line),in) != NULL) {
702 	if( (p = strstr(line, "MSF: ")) != NULL) break;
703 	}
704 if(p == NULL || tot_spec == 0) {
705 	strcpy(ret_message,"File not in MSF format!");
706 	tot_spec = -1; goto fini;
707 	}
708 tot_spec = -1;
709 do	{
710 	fgets(line,sizeof(line),in);
711 	if( (p = strstr(line, "Name:") ) == NULL) continue;
712 	tot_spec++;
713 	q = strstr(p, " Len: ");
714 	sscanf(q + 5, "%d", &l);
715 	seq[tot_spec] = (char *)malloc(l + 1);
716 	if(seq[tot_spec]==NULL) goto nomem;
717 	p += 5; while(*p == ' ') p++;
718 	while(*q == ' ') q--;
719 	l = q - p + 1;
720 	seqname[tot_spec] = (char *)malloc(l + 1);
721 	if(seqname[tot_spec]==NULL) goto nomem;
722 	memcpy(seqname[tot_spec], p, l); seqname[tot_spec][l] = 0;
723 	if(l > maxwidname) maxwidname = l;
724 	comments[tot_spec] = NULL;
725 	}
726 while(strncmp(line, "//", 2) != 0);
727 curr_spec = 0; curr_len = 0; wid_block = 0;
728 while( fgets(line, sizeof(line), in) != NULL ) {
729 	p = line; while(*p == ' ') p++;
730 	l = strlen(seqname[curr_spec]);
731 	if(strncmp(p, seqname[curr_spec], l) != 0) continue;
732 	p += l; while(*p == ' ') p++; p--;
733 	q = seq[curr_spec] + curr_len;
734 	while( *(++p) != '\n') {
735 		if( *p == ' ') continue;
736 		if(*p == '.') *p = '-';
737 		*(q++) = *p;
738 		}
739 	*q = 0;
740 	wid_1_line = q - (seq[curr_spec] + curr_len);
741 	wid_block = (wid_1_line > wid_block ? wid_1_line : wid_block);
742 	if(curr_spec == tot_spec) {
743 		curr_len += wid_block;
744 		curr_spec = 0;
745 		wid_block = 0;
746 		}
747 	else	curr_spec++;
748 	}
749 fini:
750 *pheader = NULL;
751 fclose(in);
752 return tot_spec + 1;
753 nomem:
754 sprintf(ret_message,"Error: Not enough memory!");
755 tot_spec = -1;
756 goto fini;
757 }
758 
759 
is_a_protein_seq(char * seq)760 int is_a_protein_seq(char *seq)
761 /* returns TRUE if seq looks like a protein sequence (less than 80% ACGTU) */
762 {
763 static char dna[]="ACGTURY";
764 int total=0, length=0;
765 while(*seq != 0) {
766 	if(*seq != '-' && *seq != '?' && toupper(*seq) != 'N') {
767 		if( strchr(dna, toupper(*seq)) != NULL ) total++;
768 		length++;
769 		}
770 	seq++;
771 	}
772 return ( (float)(total) / length ) <= 0.8 ;
773 }
774 
775 
is_a_protein_alignment(SEA_VIEW * view)776 int is_a_protein_alignment(SEA_VIEW *view)
777 /* returns TRUE if alignment looks like protein data (less than 80% ACGTU) */
778 {
779   char *seq;
780   for (int i = 0; i < view->tot_seqs; i++) {
781     seq = view->sequence[i];
782     while (*seq == '-') seq++;
783     if (*seq != 0) return is_a_protein_seq(view->sequence[i]);
784   }
785   return false;
786 }
787 
save_phylip_file(const char * fname,char ** seq,char ** seqname,int totseqs,int * eachlength,region * region_used,int * sel_seqs,int tot_sel_seqs,int phylipwidnames)788 int save_phylip_file(const char *fname, char **seq,
789 	char **seqname, int totseqs, int *eachlength, region *region_used,
790 	int *sel_seqs, int tot_sel_seqs, int phylipwidnames)
791 /* sauver des sequences ou des regions au format phylip
792 region_used pointe vers la region a sauver
793 si region_used == NULL, toutes les sequences sont sauvees
794 rend 0 si ok,
795 	1 si erreur d'ecriture dans le fichier
796 	2 si tentative de depasser la longueur d'une sequence
797 */
798 {
799 const int widphylin = 60;
800 FILE *out;
801 int lenseqs, i, j, retval = 1, current, save_current, err, vtotseqs, lu, maxlname;
802 list_segments *psegment, all_sequence, *curr_segment;
803 region maregion;
804 char c;
805 
806 if(totseqs == 0) return 0;
807 if( (out = fopen(fname,"w")) == NULL) return 1;
808 if(region_used == NULL) { /* on veut tout sauver */
809 	tot_sel_seqs = 0;
810 	all_sequence.debut = 1;
811 	all_sequence.fin = eachlength[0];
812 	for(i = 1; i < totseqs; i++) /* calcul long max des seqs */
813 		if( eachlength[i] > all_sequence.fin )
814 			 all_sequence.fin = eachlength[i];
815 	all_sequence.next = NULL;
816 	maregion.list = &all_sequence;
817 	region_used = &maregion;
818 	}
819 /* calcul longueur des regions */
820 lenseqs = 0;
821 psegment = region_used->list;
822 while(psegment != NULL) {
823 	lenseqs += psegment->fin - psegment->debut + 1;
824 	psegment = psegment->next;
825 	}
826 /* longest seq name */
827 vtotseqs = 0;
828 maxlname = 10; // 10 is the minimum name length
829 for(i=0; i < totseqs; i++) {
830 	if(tot_sel_seqs == 0 || sel_seqs[i]) {
831 		++vtotseqs;
832 		if( (j = strlen(seqname[i])) > maxlname) maxlname = j;
833 		}
834 	}
835 if(maxlname > phylipwidnames) maxlname = phylipwidnames;
836 fprintf(out,"%d   %d\n", vtotseqs, lenseqs);
837 for(i=0; i < totseqs; i++) {
838 	if(tot_sel_seqs != 0 && ! sel_seqs[i]) continue;
839 	psegment = region_used->list; current = 0;
840 	for(j = 0; j < maxlname; j++) {
841 		if( (c = seqname[i][j]) == 0) break;
842 		putc(c == ' ' ? '_' : c, out);
843 		}
844 	while(j <= maxlname) { putc( ' ', out ); j++; }
845 	lu = 0;
846 	while(lu < widphylin && psegment != NULL) {
847 		putc( ' ', out );
848 		err = output_next_res_from_region(seq[i], eachlength[i], &psegment,
849 			&current, out, 10, FALSE);
850 		lu += err;
851 		}
852 	putc('\n', out);
853 	if(ferror(out)) goto fin;
854 	}
855 while( psegment != NULL ) {
856 	putc('\n',out);
857 	curr_segment = psegment; save_current = current;
858 	for(i=0; i < totseqs; i++) {
859 		if(tot_sel_seqs != 0 && ! sel_seqs[i]) continue;
860 		psegment = curr_segment; current = save_current;
861 		for(j = 0; j <= maxlname; j++) putc( ' ', out );
862 		lu = 0;
863 		while(lu < widphylin && psegment != NULL) {
864 			putc( ' ', out );
865 			err = output_next_res_from_region(seq[i], eachlength[i],
866 				&psegment, &current, out, 10, FALSE);
867 			lu += err;
868 			}
869 		putc('\n', out);
870 		if(ferror(out)) goto fin;
871 		}
872 	}
873 retval = 0;
874 fin:
875 fclose(out);
876 return retval;
877 }
878 
879 
output_next_res_from_region(char * seq,int lenseq,list_segments ** segment,int * current,FILE * out,int total,int use_dots)880 int output_next_res_from_region(char *seq, int lenseq,
881 	list_segments **segment, int *current, FILE *out, int total,
882 	int use_dots)
883 /* ecrire dans le fichier out les total residus a partir de la position courante
884 dans une liste de regions. lenseq est la longueur de la sequence seq.
885 La position courante est determinee par le segment courant (*segment, qui peut
886 etre modifie par la fonction) et par la position (from 0) dans celui-ci (*current
887 qui est modifie par la fonction pour etre pret pour l'appel suivant).
888 Si le segment demande va au dela de la fin de la seq, des - sont ecrits.
889 Rend le nombre de residus effectivement ecrits, qui est tjrs celui demande.
890 Doit etre appelle la premiere fois avec *current = 0
891 */
892 {
893 int debut, fin, vfin, ecrit = 0, nombre;
894 char *p;
895 static char line[500];
896 if( *segment == NULL)
897 	return 0;
898 do	{
899 	debut = (*segment)->debut; fin = (*segment)->fin;
900 	vfin = fin; if(fin > lenseq) vfin = lenseq;
901 	nombre = total;
902 	if( nombre - 1 + *current + debut > vfin)
903 		nombre = vfin + 1 - *current - debut;
904 	if(nombre > 0) {
905 		memcpy(line, seq + *current + debut - 1, nombre);
906 		line[nombre] = 0;
907 		if(use_dots) {
908 			p = line;
909 			while( (p = strchr(p, '-')) != NULL) *p = '.';
910 			}
911 		fwrite(line, 1, nombre, out);
912 		ecrit += nombre; total -= nombre; (*current) += nombre;
913 		}
914 	if( fin > lenseq && total > 0 ) {
915 		nombre = total;
916 		if( nombre - 1 + *current + debut > fin)
917 			nombre = fin + 1 - *current - debut;
918 		ecrit += nombre; (*current) += nombre; total -= nombre;
919 		while(nombre-- > 0) putc('-', out);
920 		}
921 	if( *current + debut > fin) {
922 		*segment = (*segment)->next;
923 		if(*segment == NULL) break;
924 		*current = 0;
925 		}
926 	}
927 while(total > 0);
928 return ecrit;
929 }
930 
931 
save_regions(list_regions * regions,FILE * out)932 void save_regions(list_regions *regions, FILE *out)
933 {
934   int total;
935   unsigned l_line;
936 list_segments *segment;
937 char line[80];
938 do	{
939 	total = 0;
940 	segment = regions->element->list;
941 	while(segment != NULL) {
942 		total++;
943 		segment = segment->next;
944 		}
945 	if(total == 0) continue;
946 	fprintf(out,";;# of segments=%d %s\n",total,regions->element->name);
947 	strcpy(line, ";;"); l_line=2;
948 	segment = regions->element->list;
949 	while(segment != NULL) {
950 		if(l_line + 12 >= sizeof(line)-1) {
951 			fputs(line,out); putc('\n',out);
952 			strcpy(line,";;"); l_line=2;
953 			}
954 		sprintf(line+l_line," %d,%d", segment->debut, segment->fin);
955 		l_line += strlen(line+l_line);
956 		segment= segment->next;
957 		}
958 	fputs(line,out); putc('\n',out);
959 	}
960 while(	regions = regions->next, regions != NULL );
961 }
962 
963 
save_mase_file(const char * fname,char ** seq,char ** comments,char * header,char ** seqname,int totseqs,int * eachlength,list_regions * regions,region * region_used,int numb_species_sets,int ** list_species_sets,char ** name_species_sets,int * sel_seqs,int tot_sel_seqs,int tot_comment_lines,char ** comment_name,char ** comment_line,int tot_trees,char ** trees,const Fl_Menu_Item * menu_tree_items)964 int save_mase_file(const char *fname, char **seq, char **comments,
965 	char *header, char **seqname, int totseqs, int *eachlength,
966 	list_regions *regions, region *region_used, int numb_species_sets,
967 	int **list_species_sets, char **name_species_sets,
968 	int *sel_seqs, int tot_sel_seqs, int tot_comment_lines,
969 	char **comment_name, char **comment_line,
970 	int tot_trees, char **trees, const Fl_Menu_Item *menu_tree_items)
971 /* sauver un alignement au format mase
972 regions: l'ensemble des regions a ecrire (si on sauve tout l'alignement)
973 	NULL si on ne sauve que des regions
974 region_used: pointeur vers la region a sauver
975 	     et on ne sauve que les seqs selectionnees s'il y en a,
976 	ou NULL pour sauver tout l'alignement
977 rend 0 si OK,
978 	1 si erreur ecriture du fichier
979 	2 si depassement de longueur d'une sequence (avec region seulement)
980 */
981 {
982 FILE *out;
983 int num, retval = 1, current, ecrit, maxlength;
984 time_t heure;
985 list_segments *psegment, all_sequence;
986 region maregion;
987 
988 if(totseqs == 0) return 0;
989 out=fopen(fname,"w");
990 if(out == NULL) return 1;
991 
992 maxlength = 0;
993 
994 time(&heure);
995 fprintf(out,";; saved by seaview on %s",ctime(&heure));
996 if(region_used == NULL) { /* on veut tout sauver */
997 	all_sequence.debut = 1;
998 	all_sequence.next = NULL;
999 	maregion.list = &all_sequence;
1000 	maregion.name = NULL;
1001 	region_used = &maregion;
1002 	tot_sel_seqs = 0;
1003 	maxlength = eachlength[0];
1004 	for(num=1; num<totseqs; num++) {
1005 		if(maxlength < eachlength[num]) maxlength = eachlength[num];
1006 		}
1007 	}
1008 else
1009 	fprintf(out,";; region choice only: %s\n",region_used->name);
1010 if(ferror(out)) goto fin;
1011 if(header != NULL && *header != 0) {
1012 	fputs(header,out);
1013 	if(ferror(out)) goto fin;
1014 	}
1015 if(regions != NULL) {
1016 	save_regions(regions, out);
1017 	if(ferror(out)) goto fin;
1018 	}
1019 if(tot_comment_lines > 0) {
1020 	save_comment_lines(tot_comment_lines, comment_name, comment_line, out);
1021 	if(ferror(out)) goto fin;
1022 	}
1023 if(numb_species_sets != 0) { /* sauver les species sets */
1024 	save_species_sets(numb_species_sets, list_species_sets,
1025 		name_species_sets, totseqs, out);
1026 	if(ferror(out)) goto fin;
1027 	}
1028 if(tot_trees > 0) { /* write trees out */
1029 	for(num = 0; num < tot_trees; num++) {
1030 		fprintf(out, ";;$ %s\n", menu_tree_items[num].label());
1031     char *tree = strdup(trees[num]);
1032     char *p;
1033     while ((p=strchr(tree, '\n')) != NULL) *p = ' ';
1034     int l = strlen(tree);
1035 		for (p = tree; p < tree + l; p += 80) {
1036 			while (*p == '$') { // avoid begin line with ";;$..." that marks a new tree
1037 			  fputs(";;$\n", out);
1038 			  p++;
1039 			  }
1040 			fprintf(out, ";;%.80s\n", p);
1041 			}
1042     free(tree);
1043 		}
1044 	}
1045 for(num=0; num<totseqs; num++) {
1046 	if( tot_sel_seqs != 0 && ! sel_seqs[num] ) continue;
1047 	current = 0; psegment = region_used->list;
1048 	all_sequence.fin = eachlength[num];
1049 	if(comments != NULL && comments[num] != NULL)
1050 		fputs(comments[num], out);
1051 	else fputs(";no comment\n", out);
1052 	if(ferror(out)) goto fin;
1053 	fprintf(out,"%s\n",seqname[num]);
1054 	if(ferror(out)) goto fin;
1055 	do	{
1056 		ecrit = output_next_res_from_region(seq[num], eachlength[num],
1057 			&psegment, &current, out, 60, FALSE);
1058  		if(ferror(out)) goto fin;
1059 		if( ecrit > 0) putc('\n', out);
1060 		else if(ecrit == -1) {retval = 2; goto fin; }
1061 		}
1062 	while(ecrit != 0);
1063 	if( (ecrit = maxlength - eachlength[num]) > 0) {
1064 		int n;
1065 		for(n = 1; n <= ecrit; n++) {
1066 			putc('-', out); if(n % 60 == 0) putc('\n', out);
1067 			}
1068 		putc('\n', out);
1069 		}
1070 	if(ferror(out)) goto fin;
1071 	}
1072 retval = 0;
1073 fin:
1074 if( fclose(out) != 0 ) retval = 1;
1075 return retval;
1076 }
1077 
1078 
save_fasta_file(const char * fname,char ** seq,char ** comments,char ** seqname,int totseqs,int * eachlength,region * region_used,int * sel_seqs,int tot_sel_seqs,int spaces_in_names,int pad_to_max_length)1079 int save_fasta_file(const char *fname, char **seq, char **comments,
1080 	char **seqname, int totseqs, int *eachlength, region *region_used,
1081 	int *sel_seqs, int tot_sel_seqs, int spaces_in_names, int pad_to_max_length)
1082 /* sauver des sequences ou des regions au format fasta
1083 region_used pointe vers la liste des regions a sauver
1084 si region_used == NULL, toutes les sequences sont sauvees
1085 si pad_to_max_length == TRUE && region_used == NULL, les seqs sont allong�es a leur longueur max
1086 rend 0 si ok, 1 si erreur d'ecriture dans le fichier
1087 	2 si tentative de depasser la longueur d'une sequence
1088 */
1089 {
1090 FILE *out;
1091 int num, retval = 1, current, ecrit, save_full = (region_used == NULL);
1092 list_segments *psegment, all_sequence;
1093 region maregion;
1094 char *p;
1095 
1096 if(totseqs == 0) return 0;
1097 if( (out = fopen(fname,"w")) == NULL) return 1;
1098 if(region_used == NULL) { /* on veut tout sauver */
1099 	tot_sel_seqs = 0;
1100 	all_sequence.debut = 1;
1101 	all_sequence.fin = eachlength[0];
1102 	for(num = 1; num < totseqs; num++)
1103 		if( eachlength[num] < all_sequence.fin )
1104 			 all_sequence.fin = eachlength[num];
1105 	all_sequence.next = NULL;
1106 	maregion.list = &all_sequence;
1107 	region_used = &maregion;
1108 	}
1109 for(num=0; num<totseqs; num++) {
1110 	if( tot_sel_seqs != 0 && ! sel_seqs[num] ) continue;
1111 	current = 0; psegment = region_used->list;
1112 	if (!save_full || !pad_to_max_length) all_sequence.fin = eachlength[num];
1113 	fputc('>', out);
1114 	p = seqname[num];
1115 	if(spaces_in_names) {
1116 	  fputs(p, out);
1117 	  }
1118 	else {
1119 	  while(*p != 0) { fputc( *p == ' ' ? '_' : *p, out); p++; }
1120 	  if(comments != NULL && comments[num] != NULL) {
1121 		  putc(' ', out);
1122 		  p = comments[num] + 1;
1123 		  while(*p != '\n' && *p != 0) putc(*(p++), out);
1124             int gc = get_ncbi_gc_from_comment(comments[num]);
1125             char *q = strstr(comments[num], "/transl_table=");
1126             if (gc > 1 && q != NULL && q >= p) {
1127               fprintf(out, " /transl_table=%d", gc);
1128             }
1129 		  }
1130 	  }
1131 	putc('\n', out);
1132 	if(ferror(out)) goto fin;
1133 	do	{
1134 		ecrit = output_next_res_from_region(seq[num], eachlength[num],
1135 			&psegment, &current, out, 60, FALSE);
1136 		if( ecrit > 0) putc('\n', out);
1137 		else if(ecrit == -1) {retval = 2; goto fin; }
1138 		}
1139 	while(ecrit != 0);
1140 	if(ferror(out)) goto fin;
1141 	}
1142 retval = 0;
1143 fin:
1144 if( fclose(out) != 0 ) return 1;
1145 return retval;
1146 }
1147 
1148 
1149 
save_clustal_file(const char * fname,char ** seq,char ** seqname,int totseqs,int * eachlength,region * region_used,int * sel_seqs,int tot_sel_seqs)1150 int save_clustal_file(const char *fname, char **seq,
1151 	char **seqname, int totseqs, int *eachlength, region *region_used,
1152 	int *sel_seqs, int tot_sel_seqs)
1153 /* sauver des sequences ou des regions au format clustal
1154 region_used pointe vers la liste des regions a sauver
1155 si region_used == NULL, toutes les sequences sont sauvees
1156 rend 0 si ok, 1 si erreur d'ecriture dans le fichier
1157 	2 si tentative de depasser la longueur d'une sequence
1158 */
1159 {
1160 const int widcluslin = 60;
1161 FILE *out;
1162 int i, j, retval = 1, current, save_current, err, l, lmax;
1163 list_segments *psegment, all_sequence, *curr_segment;
1164 region maregion;
1165 
1166 if(totseqs == 0) return 0;
1167 if( (out = fopen(fname,"w")) == NULL) return 1;
1168 if(region_used == NULL) { /* on veut tout sauver */
1169 	tot_sel_seqs = 0;
1170 	all_sequence.debut = 1;
1171 	all_sequence.fin = eachlength[0];
1172 	for(i = 1; i < totseqs; i++)
1173 		if( eachlength[i] > all_sequence.fin )
1174 			 all_sequence.fin = eachlength[i];
1175 	all_sequence.next = NULL;
1176 	maregion.list = &all_sequence;
1177 	region_used = &maregion;
1178 	}
1179 lmax = 0;
1180 for(i=0; i < totseqs; i++) {
1181 	if( tot_sel_seqs != 0 && ! sel_seqs[i] ) continue;
1182 	l = strlen(seqname[i]);
1183 	if(l > lmax) lmax = l;
1184 	}
1185 lmax += 2;
1186 
1187 fprintf(out,"CLUSTAL W (1.7) multiple sequence alignment\n\n\n");
1188 current = 0; psegment = region_used->list;
1189 while( psegment != NULL ) {
1190 	curr_segment = psegment; save_current = current;
1191 	for(i=0; i < totseqs; i++) {
1192 		if( tot_sel_seqs != 0 && ! sel_seqs[i] ) continue;
1193 		psegment = curr_segment; current = save_current;
1194 /* remplacer espaces internes par _ */
1195 		for(j = 0; j < lmax; j++) {
1196 			if(seqname[i][j] == 0) break;
1197 			putc( (seqname[i][j] == ' ' ? '_' : seqname[i][j] ),
1198 				out);
1199 			}
1200 		while( j < lmax) {
1201 			putc(' ', out); j++;
1202 			}
1203 		err= output_next_res_from_region(seq[i], eachlength[i],
1204 			&psegment, &current, out, widcluslin, FALSE);
1205 		putc('\n', out);
1206 		if(err == -1) {retval = 2; goto fin; }
1207 		if(ferror(out)) goto fin;
1208 		}
1209 	fprintf(out, "\n\n");
1210 	}
1211 retval = 0;
1212 fin:
1213 if( fclose(out) != 0 ) return 1;
1214 return retval;
1215 }
1216 
1217 
calc_gcg_check(list_segments * psegment,char * seq)1218 int calc_gcg_check(list_segments *psegment, char *seq)
1219 {
1220 int  i, debut, fin, residue, pos = 0;
1221 long check = 0;
1222 while(psegment != NULL) {
1223 	debut = psegment->debut; fin = psegment->fin;
1224         for( i=debut; i<= fin; i++) {
1225 		residue = toupper(seq[i - 1]);
1226 		if(residue == '-') residue = '.';
1227                 check += (( (pos++) % 57)+1) * residue;
1228 		}
1229 	psegment = psegment->next;
1230 	}
1231 return (check % 10000);
1232 }
1233 
1234 
save_msf_file(const char * fname,char ** seq,char ** seqname,int totseqs,int * eachlength,region * region_used,int protein,int * sel_seqs,int tot_sel_seqs)1235 int save_msf_file(const char *fname, char **seq,
1236 	char **seqname, int totseqs, int *eachlength, region *region_used,
1237 	int protein, int *sel_seqs, int tot_sel_seqs)
1238 /* sauver des sequences ou des regions au format MSF
1239 region_used pointe vers la liste des regions a sauver
1240 si region_used == NULL, toutes les sequences sont sauvees
1241 rend 0 si ok, 1 si erreur d'ecriture dans le fichier
1242 	2 si tentative de depasser la longueur d'une sequence
1243 */
1244 {
1245 FILE *out;
1246 int i, j, k, retval = 1, current, save_current, err, lenseqs, gen_check,
1247 	*check_val, curr_len, toprint, save_complete, fromseq, new_current;
1248 list_segments *psegment, all_sequence, *curr_segment, *new_segment;
1249 region maregion;
1250 
1251 if(totseqs == 0) return 0;
1252 if( (out = fopen(fname,"w")) == NULL) return 1;
1253 save_complete = (region_used == NULL);
1254 if(save_complete) { /* on veut tout sauver */
1255 	tot_sel_seqs = 0;
1256 	all_sequence.debut = 1;
1257 	all_sequence.fin = 0;
1258 	for(i = 0; i < totseqs; i++) {
1259 		if( eachlength[i] > all_sequence.fin )
1260 			 all_sequence.fin = eachlength[i];
1261 		}
1262 	lenseqs = all_sequence.fin;
1263 	all_sequence.next = NULL;
1264 	maregion.list = &all_sequence;
1265 	region_used = &maregion;
1266 	}
1267 else	{
1268 	/* calcul longueur des regions */
1269 	lenseqs = 0;
1270 	psegment = region_used->list;
1271 	while(psegment != NULL) {
1272 		lenseqs += psegment->fin - psegment->debut + 1;
1273 		psegment = psegment->next;
1274 		}
1275 	}
1276 for(i = 0, k = 0; i < totseqs; i++) /* nbre de seqs editees */
1277 	if( tot_sel_seqs == 0 || sel_seqs[i] ) k++;
1278 check_val = (int *)malloc( k * sizeof(int) );
1279 if(check_val == NULL) {
1280 	fclose(out);
1281 	return 1; /* pas tres precis */
1282 	}
1283 gen_check = 0;
1284 for(i = 0, j = 0; i < totseqs; i++) {
1285 	if( tot_sel_seqs != 0 && ! sel_seqs[i] ) continue;
1286 	check_val[j] = calc_gcg_check(region_used->list, seq[i]);
1287 	gen_check += check_val[j++];
1288 	}
1289 gen_check = gen_check % 10000;
1290   fprintf(out, "!!%2s_MULTIPLE_ALIGNMENT 1.0\n %s", (protein?"AA":"NA"), extract_filename(fname) );
1291 fprintf(out,"   MSF: %d  Type: %c    Check:%6d   .. \n\n",
1292 	lenseqs, (protein ? 'P' : 'N'), gen_check);
1293 for(i = 0 , j = 0; i < totseqs; i++) {
1294 	if( tot_sel_seqs != 0 && ! sel_seqs[i] ) continue;
1295 	fprintf(out, " Name: %-15.15s   Len:%5d  Check:%6d  Weight:  1.00\n",
1296 		seqname[i], lenseqs, check_val[j++]);
1297 	}
1298 fprintf(out,"\n//\n\n\n");
1299 new_current = 0; new_segment = region_used->list; curr_len = 0;
1300 while( new_segment != NULL && curr_len < lenseqs) {
1301 	curr_segment = new_segment; save_current = new_current;
1302 	fprintf(out, "\n");
1303 	for(i=0; i < totseqs; i++) {
1304 		if( tot_sel_seqs != 0 && ! sel_seqs[i] ) continue;
1305 		psegment = curr_segment; current = save_current;
1306 		for(j = 0; j < MSF_WID_NAME; j++) {
1307 			if(seqname[i][j] == 0) break;
1308 			putc(seqname[i][j],out);
1309 			}
1310 		while( j < MSF_WID_NAME + 1) {
1311 			putc(' ', out); j++;
1312 			}
1313 		for(k = curr_len; k < curr_len + 50 && k < lenseqs; k += 10) {
1314 			toprint = 10;
1315 			if(k + toprint > lenseqs) toprint = lenseqs - k;
1316 			fromseq = toprint;
1317 			if(save_complete && k + fromseq > eachlength[i])
1318 					fromseq = eachlength[i] - k;
1319 			if(fromseq < 0) fromseq = 0;
1320 			if(fromseq > 0) {
1321 				err= output_next_res_from_region(
1322 					seq[i], eachlength[i], &psegment,
1323 					&current, out, fromseq, TRUE);
1324  				if(ferror(out)) goto fin;
1325 				if(err == -1) {retval = 2; goto fin; }
1326 				}
1327 			while(fromseq < toprint) {
1328 				putc('.', out); fromseq++;
1329 				}
1330 			putc(' ', out);
1331 			}
1332 		putc('\n', out);
1333 		if( (!save_complete) || eachlength[i] == lenseqs) {
1334 			new_current = current;
1335 			new_segment = psegment;
1336 			}
1337 		if(ferror(out)) goto fin;
1338 		}
1339 	curr_len += 50;
1340 	fprintf(out, "\n");
1341 	}
1342 retval = 0;
1343 fin:
1344 if( fclose(out) != 0 ) retval = 1;
1345 free(check_val);
1346 return retval;
1347 }
1348 
1349 
save_alignment_or_region(const char * fname,char ** seq,char ** comments,char * header,char ** seqname,int totseqs,int * eachlength,list_regions * regions,region * region_used,known_format format,int numb_species_sets,int ** list_species_sets,char ** name_species_sets,int * sel_seqs,int tot_sel_seqs,int protein,int tot_comment_lines,char ** comment_name,char ** comment_line,int phylipwidnames,int tot_trees,char ** trees,const Fl_Menu_Item * items,int spaces_in_fasta_names)1350 char *save_alignment_or_region(const char *fname, char **seq, char **comments,
1351 	char *header, char **seqname, int totseqs, int *eachlength,
1352 	list_regions *regions, region *region_used, known_format format,
1353 	int numb_species_sets, int **list_species_sets,
1354 	char **name_species_sets, int *sel_seqs, int tot_sel_seqs, int protein,
1355 	int tot_comment_lines, char **comment_name, char **comment_line, int phylipwidnames,
1356 	int tot_trees, char **trees, const Fl_Menu_Item *items, int spaces_in_fasta_names)
1357 /* sauver des sequences ou des regions au format de fichier format
1358 region_used pointe vers la liste des regions a sauver
1359 si region_used == NULL, toutes les sequences entieres sont sauvees
1360 rend NULL si ok
1361 	un message d'erreur sinon.
1362 */
1363 {
1364 int err=0;
1365 static char err_message[200];
1366 
1367 if(format == MASE_FORMAT)
1368 	err = save_mase_file(fname, seq, comments,
1369 		header, seqname, totseqs, eachlength,
1370 		regions, region_used, numb_species_sets, list_species_sets,
1371 		name_species_sets, sel_seqs, tot_sel_seqs,
1372 		tot_comment_lines, comment_name, comment_line,
1373 		tot_trees, trees, items);
1374 else if(format == NEXUS_FORMAT) {
1375 	err = save_nexus_file(fname, totseqs, protein,
1376 		seq, seqname, comments, header,
1377 		numb_species_sets, list_species_sets, name_species_sets,
1378 		regions, tot_comment_lines, comment_name, comment_line,
1379 		region_used, sel_seqs, tot_sel_seqs, eachlength, tot_trees, trees, items);
1380 	}
1381 else if(format == PHYLIP_FORMAT)
1382 	err = save_phylip_file(fname, seq,
1383 		seqname, totseqs, eachlength, region_used,
1384 		sel_seqs, tot_sel_seqs, phylipwidnames);
1385 else if(format == CLUSTAL_FORMAT)
1386 	err = save_clustal_file(fname, seq,
1387 		seqname, totseqs, eachlength, region_used,
1388 		sel_seqs, tot_sel_seqs);
1389 else if(format == MSF_FORMAT)
1390 	err = save_msf_file(fname, seq,
1391 		seqname, totseqs, eachlength, region_used, protein,
1392 		sel_seqs, tot_sel_seqs);
1393 else if(format == FASTA_FORMAT)
1394 	err = save_fasta_file(fname, seq, comments,
1395 		seqname, totseqs, eachlength, region_used,
1396 		sel_seqs, tot_sel_seqs, spaces_in_fasta_names);
1397 if(err == 0)
1398 	return NULL;
1399 else if(err == 1)
1400 	sprintf(err_message,"Error while writing to file %s",fname);
1401 else if(err == 2)
1402 	strcpy(err_message,
1403 		"Error: region goes beyond the end of one sequence");
1404 return err_message;
1405 }
1406 
1407 
1408 #if !(defined(WIN32) || defined(__APPLE__))
1409 static char seaview_prog_dir[200] = "";
inform_prog_dir(const char * arg0)1410 void inform_prog_dir(const char *arg0)
1411 {
1412 char *p;
1413 if((p = (char*)strrchr(arg0, '/')) != NULL) {
1414 	memcpy(seaview_prog_dir, arg0, p - arg0 + 1);
1415 	seaview_prog_dir[p - arg0 + 1] = 0;
1416 	}
1417 else seaview_prog_dir[0] = 0;
1418 }
1419 
get_prog_dir(void)1420 char *get_prog_dir(void)
1421 {
1422 return seaview_prog_dir;
1423 }
1424 #endif
1425 
1426 
get_full_path(const char * fname)1427 char *get_full_path(const char *fname)
1428 /* to get full pathname to file fname searching for its name, for it in the prog dir
1429 and then for it through all path directories
1430 returns NULL if not found
1431 */
1432 {
1433 #define Mxdir 600
1434 #ifdef WIN32
1435 #define PATH_SEPAR ';'
1436 #define DIR_SEPAR '\\'
1437 #else
1438 #define PATH_SEPAR ':'
1439 #define DIR_SEPAR '/'
1440 #endif
1441 static char dir[Mxdir+1];
1442 char *path, *deb, *fin;
1443 FILE *fich;
1444 int lf, ltot;
1445 
1446 strcpy(dir, fname);
1447 if(strchr(fname, DIR_SEPAR) != NULL) {// if fname is a pathname
1448 	fich = fopen(dir, "r"); /* try first explicit filename */
1449 	goto way_out; // and don't search more
1450 	}
1451 #if defined(__APPLE__)
1452 sprintf(dir, "%s/%s", MG_GetBundleResourcesDir(), fname);
1453 fich = fopen(dir, "r");
1454 if(fich != NULL) goto way_out;
1455 #else
1456 /* try dir where program was launched */
1457 deb = get_prog_dir();
1458 if(deb != NULL && *deb != 0) {
1459 	strcpy(dir, deb);
1460 	strcat(dir, fname);
1461 	fich = fopen(dir, "r");
1462 	if(fich != NULL) goto way_out;
1463 	}
1464 #endif
1465 path = getenv("PATH"); // get the list of path directories, separated by : or ;
1466 if (path == NULL ) return NULL;
1467 lf = strlen(fname);
1468 deb = path;
1469 do      {
1470 		fin = strchr(deb,PATH_SEPAR);
1471 		if(fin != NULL)
1472 				{ ltot = fin-deb; if(ltot > 0) strncpy(dir,deb,ltot);  }
1473 		else
1474 				{ strcpy(dir,deb); ltot=strlen(dir); }
1475 		/* now one directory is in string dir */
1476 		if( ltot > 0 && ltot + lf + 1 <= Mxdir)
1477 				{
1478 				dir[ltot] = DIR_SEPAR;
1479 				strcpy(dir+ltot+1,fname); /* now dir is appended with filename */
1480 				fich = fopen(dir,"r");
1481 				if( fich != NULL) break;
1482 				}
1483 		else fich = NULL;
1484 		deb = fin+1;
1485 		}
1486 while (fin != NULL);
1487 way_out:
1488 	if(fich == NULL) return NULL;
1489 	fclose(fich);
1490 #ifndef WIN32
1491 	if(*dir != '/') {
1492 		if(strncmp(dir, "./", 2) == 0) memmove(dir, dir + 2, strlen(dir) - 1);
1493 		char *p, *q;
1494 		char *cdir = (char *)malloc(PATH_MAX);
1495 		p = getcwd(cdir, PATH_MAX);
1496 		q = (char *)malloc(strlen(p) + 1 + strlen(dir) + 1);
1497 		sprintf(q, "%s/%s", p, dir);
1498 		strcpy(dir, q);
1499 		free(q);
1500 		free(cdir);
1501 		}
1502 #endif
1503 	return dir;
1504 #undef Mxdir
1505 }
1506 
1507 
save_species_sets(int numb_species_sets,int ** list_species_sets,char ** name_species_sets,int totseqs,FILE * out)1508 static void save_species_sets(int numb_species_sets, int **list_species_sets,
1509 	char **name_species_sets, int totseqs, FILE *out)
1510 {
1511 int num, i, previous, total;
1512 for(num=0; num < numb_species_sets; num++) {
1513 	total = 0;
1514 	for(i=0; i< totseqs; i++)
1515 		if( list_species_sets[num][i] ) total++;
1516 	if( total == 0 ) continue;
1517 	fprintf(out,";;@ of species = %d %s\n;;", total,
1518 		name_species_sets[num]);
1519 	for(previous = 0; previous < totseqs; previous++)
1520 		if( list_species_sets[num][previous] ) break;
1521 	total = 0;
1522 	for(i = previous+1; i < totseqs; i++) {
1523 		if( list_species_sets[num][i] ) {
1524 			fprintf(out," %d,", previous+1);
1525 			previous = i;
1526 			total++;
1527 			if( total >= 15 ) {
1528 				fprintf(out, "\n;;");
1529 				total = 0;
1530 				}
1531 			}
1532 		}
1533 	fprintf(out," %d\n", previous+1);
1534 	}
1535 }
1536 
1537 
save_comment_lines(int tot_comment_lines,char ** names,char ** lines,FILE * out)1538 void save_comment_lines(int tot_comment_lines, char **names, char **lines,
1539 	FILE *out)
1540 {
1541 int num, l, pos;
1542 
1543 for(num = 0; num < tot_comment_lines; num++) {
1544 	if( (l = strlen(lines[num]) ) == 0) continue;
1545 	fprintf(out, ";;|%s\n", names[num]);
1546 	for(pos = 0; pos < l; pos += 60)
1547 		fprintf(out, ";;%.60s\n", lines[num]+pos);
1548 	fprintf(out, ";;||\n");
1549 	}
1550 }
1551 
1552 
what_format(const char * filename)1553 known_format what_format(const char *filename)
1554 /*
1555  returns an alignment format (>= 0)
1556          -1 unknown format
1557          -2 a Newick tree
1558  */
1559 {
1560 FILE *in;
1561 char line[100], *p;
1562 int format = -1;
1563 int nseq, lseq;
1564 
1565 in = fl_fopen(filename, "r");
1566 if(in == NULL) return (known_format)-1;
1567 p = fgets(line, sizeof(line), in);
1568 if( p == NULL) { fclose(in); return (known_format)-1; }
1569 while(*p) { *p = toupper(*p); p++; }
1570 if(*line == ';') format = MASE_FORMAT;
1571 else if(*line == '>') format = FASTA_FORMAT;
1572 else if(*line == '(' || *line == '[') format = -2;
1573 else if(strncmp(line, "CLUSTAL", 7) == 0) format = CLUSTAL_FORMAT;
1574 else if(strncmp(line, "#NEXUS", 6) == 0) format = NEXUS_FORMAT;
1575 else {
1576 	nseq = lseq = -1;
1577 	sscanf(line, "%d%d", &nseq, &lseq);
1578 	if(nseq != -1 && lseq != -1) format = PHYLIP_FORMAT;
1579 	else {
1580 		/* try MSF format */
1581 		do 	{
1582 			p = fgets(line, sizeof(line), in);
1583 			if(p != NULL && strstr(p, " MSF: ") !=  NULL) format = MSF_FORMAT;
1584 			}
1585 		while(p != NULL  && strncmp(p, "//", 2) != 0 );
1586 		}
1587 	}
1588 fclose(in);
1589 return (known_format)format;
1590 }
1591 
1592 
my_fgets(char * s,int n,FILE * f)1593 char *my_fgets(char *s, int n, FILE *f)
1594 {
1595 int next_char, ahead;
1596 char *p;
1597 
1598 p = s;
1599 while(--n > 0) {
1600 	next_char = getc(f);
1601 	if( next_char == '\r' || next_char == '\n' ) {
1602 		*(p++) = '\n';
1603 		ahead = getc(f);
1604 		if(ahead == EOF) break;
1605 		if( (next_char == '\r' && ahead != '\n') || (next_char == '\n' && ahead != '\r') ) {
1606 			ungetc(ahead, f);
1607 			}
1608 		break;
1609 		}
1610  	else if (next_char == EOF)
1611 		break;
1612 	*(p++) = next_char;
1613 	}
1614 *p = 0;
1615 return (p == s ? NULL : s);
1616 }
1617 
1618 
seaview_file_chooser_save_as(const char * message,const char * fname,SEA_VIEW * view,known_format * new_format)1619 char *seaview_file_chooser_save_as(const char* message, const char* fname, SEA_VIEW *view, known_format* new_format)
1620 {
1621 #ifndef MICRO
1622   const char *prev_label = Fl_File_Chooser::show_label;
1623   Fl_File_Chooser::show_label = "Format";
1624 #endif
1625 
1626 char *p, types_list[500] = "";
1627 Fl_Native_File_Chooser *chooser = new Fl_Native_File_Chooser();
1628 #ifndef MICRO
1629   Fl_File_Chooser::show_label = prev_label;
1630 #endif
1631 
1632 chooser->type(Fl_Native_File_Chooser::BROWSE_SAVE_FILE);
1633 chooser->options(Fl_Native_File_Chooser::SAVEAS_CONFIRM | chooser->options());
1634 chooser->options(Fl_Native_File_Chooser::USE_FILTER_EXT | chooser->options());
1635 chooser->title(message);
1636 chooser->directory(extract_dirname(fname));
1637 chooser->preset_file(extract_filename(fname));
1638 
1639 p = types_list;
1640 for(int f = 0; f < nbr_formats; f++) {
1641 	sprintf(p, "%s\t*.%s\n", f_format_names[f], f_format_exts[f]);
1642 	p += strlen(p);
1643 	}
1644 chooser->filter(types_list);
1645 chooser->filter_value(view->format_for_save);
1646 char *filename = run_and_close_native_file_chooser(chooser, TRUE);
1647 if (filename && new_format) *new_format = (known_format)chooser->filter_value();
1648 delete chooser;
1649 return filename;
1650 }
1651 
1652 
extract_dirname(const char * pathname)1653 const char *extract_dirname(const char *pathname)
1654 {
1655 	static char dirname[300];
1656 	char *p;
1657 
1658 #if defined(WIN32)
1659 	p = strrchr(pathname,'\\');
1660 #else
1661 	p = (char*)strrchr(pathname,'/');
1662 #endif
1663 	if(p == NULL) dirname[0] = 0;
1664 	else {
1665 		memcpy(dirname, pathname, p - pathname);
1666 		dirname[p - pathname] = 0;
1667 		}
1668 	return dirname;
1669 }
1670 
printout(SEA_VIEW * view,const char * filename,int fontsize,int block_size,Fl_Paged_Device::Page_Format pageformat,int vary_only,int ref0,int pdfkindvalue,Fl_Paged_Device::Page_Layout layout,int svg_width)1671 int printout(SEA_VIEW *view, const char *filename,
1672 	     int fontsize, int block_size, Fl_Paged_Device::Page_Format pageformat, int vary_only, int ref0,
1673 	     int pdfkindvalue, Fl_Paged_Device::Page_Layout layout, int svg_width)
1674 {
1675   int num, i, j, k, current, max_seq_length, fin, curr_lines, widnames,
1676   res_per_line, nl, firstpage, lines_per_page, use_pdf, use_svg, top_margin;
1677   Fl_Surface_Device *surface;
1678   FILE *textfile = NULL;
1679   time_t heure;
1680   static char unnamed[] = "<unnamed>";
1681   static char num_line[200];
1682   int lettre, char_per_line;
1683   short *vary_need = NULL;
1684   int *vary_pos; /* rang ds alignement de la colonne imprim�e */
1685   char *p, oneline[500];
1686   int (*calc_color_function)(int);
1687   double	char_width = fontsize/2, descender, margin = 25;
1688   if(view->tot_seqs == 0) return 0;
1689   if(view->protein) calc_color_function = get_color_for_aa;
1690   else  calc_color_function = get_color_for_base;
1691   use_pdf = (pdfkindvalue == PDF_COLOR || pdfkindvalue == PDF_BW);
1692   use_svg = (pdfkindvalue == SVG);
1693   max_seq_length = 0; widnames = 0;
1694   for (i=0; i < view->tot_seqs; i++) {
1695     if (view->each_length[i] > max_seq_length) max_seq_length = view->each_length[i];
1696     if ( ( fin=strlen(view->seqname[i]) ) > widnames) widnames = fin;
1697   }
1698   widnames += 2;
1699   if (use_pdf) {
1700     surface = new PDF_or_PS_File_Device();
1701     if (((PDF_or_PS_File_Device*)surface)->begin_document(filename, pageformat, layout)) {
1702       delete surface;
1703       return 0;
1704     }
1705   }
1706   else if(use_svg) {
1707     FILE *out = fl_fopen(filename, "w");
1708     if (out == NULL) exit(1);
1709 #ifndef NO_PDF
1710     SVG_PDF_width_Graphics_Driver *pdf_d = new SVG_PDF_width_Graphics_Driver(NULL);
1711     pdf_d->font(FL_COURIER, fontsize);
1712     char_width = pdf_d->width("X", 1);
1713     delete pdf_d;
1714 #endif
1715     margin = char_width;
1716     char_per_line = (int)( (svg_width - 2*margin) / char_width + 0.5);
1717     fin = (char_per_line - widnames + 1) / (block_size + 1);
1718     if (fin < 1) { /* garde fou */
1719       fin = 1; block_size = char_per_line - widnames;
1720     }
1721     res_per_line = fin * block_size;
1722     int nl = (max_seq_length + res_per_line - 1) /res_per_line;
1723     int totl = nl * (view->tot_seqs + 2) + 2;
1724     surface = new SVG_File_Surface(svg_width, totl * fontsize, out, true);
1725     surface->set_current();
1726     fl_color(210,210,210); // draw grey background
1727     fl_rectf(0,0,svg_width, totl * fontsize);
1728     }
1729   else {
1730     textfile = fopen(filename, "w");
1731     if(textfile == NULL) return TRUE;
1732   }
1733 #ifndef NO_PDF
1734   jmp_buf* jbuf;
1735   if (use_pdf) ((PDF_or_PS_File_Device*)surface)->surface_try(&jbuf);
1736   if ( (!use_pdf) || (setjmp(*jbuf) == 0)) {
1737 #endif
1738     if (use_pdf) {
1739       surface->driver()->font(FL_COURIER, fontsize);
1740       char_width = fl_width("X");
1741       int pwidth, pheight;
1742       ((PDF_or_PS_File_Device*)surface)->printable_rect(&pwidth, &pheight);
1743       int l, r, t, b;
1744       ((PDF_or_PS_File_Device*)surface)->margins(&l, &t, &r, &b);
1745       margin -= l;
1746       char_per_line = (int)((pwidth - 2*margin) / char_width + 0.5);
1747       lines_per_page = (int)((pheight - 2*margin) / fontsize + 0.5);
1748       descender = fl_descent();
1749       top_margin = margin;
1750     }
1751     else if (use_svg) {
1752       surface->driver()->font(FL_COURIER, fontsize);
1753       lines_per_page = 10000000;
1754       top_margin = fontsize;
1755       }
1756     else char_per_line = 90;
1757     firstpage = TRUE;
1758 
1759     if(ref0 < 0) vary_only = FALSE;
1760     time(&heure);
1761     sprintf(oneline,"Alignment: %s", view->masename == NULL ? unnamed : PREPARE_LABEL(view->masename) );
1762     if (use_pdf) {
1763       ((PDF_or_PS_File_Device*)surface)->start_page();
1764       ((PDF_or_PS_File_Device*)surface)->origin(0, fontsize);
1765       fl_draw(oneline, margin, top_margin);
1766     }
1767     else if(!use_svg) {fputs(oneline, textfile); fputs("\n", textfile);}
1768     curr_lines = 1;
1769     if(vary_only) {
1770       const char fixed[] = "Displaying variable sites only.";
1771       if (use_pdf || use_svg) fl_draw(fixed, margin, top_margin + curr_lines * fontsize);
1772       else {fputs(fixed, textfile); fputs("\n", textfile);}
1773       ++curr_lines;
1774     }
1775     if (use_pdf) {
1776       sprintf(oneline,"Seaview [blocks=%d fontsize=%d %s%s] on %s",
1777 	    block_size, fontsize, pageformat == Fl_Paged_Device::A4 ? "A4" : "LETTER",
1778 	    layout == Fl_Paged_Device::LANDSCAPE ? "-landscape" : "", ctime(&heure));
1779       p = strchr(oneline, '\n'); if (p) *p = 0;
1780       fl_draw(oneline, margin, top_margin + curr_lines * fontsize);
1781       curr_lines += 2;
1782     }
1783     else if(use_svg) {
1784       sprintf(oneline,"created by Seaview on %s", ctime(&heure));
1785       p = strchr(oneline, '\n'); if (p) *p = 0;
1786       fl_color(FL_BLACK);
1787       fl_draw(oneline, margin, top_margin + curr_lines * fontsize);
1788       curr_lines += 1;
1789     }
1790     else {
1791       fputs("Seaview text-only output\n", textfile);
1792       curr_lines += 2;
1793     }
1794     if(vary_only) {
1795       vary_need = (short *)calloc(max_seq_length, sizeof(short));
1796       if(vary_need == NULL) return TRUE;
1797       vary_pos = (int *)calloc(char_per_line, sizeof(int));
1798       if(vary_pos == NULL) return TRUE;
1799       for(i = 0; i < max_seq_length; i++) {
1800 	for(num = 0; num < view->tot_seqs; num++) {
1801 	  if( toupper(view->sequence[num][i]) != toupper(view->sequence[ref0][i]) ) {
1802 	    vary_need[i] = TRUE;
1803 	    break;
1804 	  }
1805 	}
1806       }
1807     }
1808     /* nombre max de blocks qui tiennent sur une ligne de cpl chars */
1809     fin = (char_per_line - widnames + 1) / (block_size + 1);
1810     if(fin < 1) { /* garde fou */
1811       fin = 1; block_size = char_per_line - widnames;
1812     }
1813     res_per_line = fin * block_size;
1814     current = 0;
1815     while( current < max_seq_length ) {
1816       nl = 1;
1817       if(vary_only) {
1818 	memset(vary_pos, 0, res_per_line * sizeof(int) );
1819 	i = -1; j = 0; k = 0;
1820 	while( j < res_per_line) {
1821 	  if(current + i >= max_seq_length) break;
1822 	  if( !vary_need[current + ++i] ) continue;
1823 	  j++;
1824 	  vary_pos[k++] = current + i + 1;
1825 	  if( j % block_size == 0) k++;
1826 	}
1827 	nl = calc_vary_lines(vary_pos,  k);
1828       }
1829       if( use_pdf && (!firstpage) && (curr_lines + view->tot_seqs + nl > lines_per_page)) {
1830 	((PDF_or_PS_File_Device*)surface)->end_page();
1831 	((PDF_or_PS_File_Device*)surface)->start_page();
1832 	((PDF_or_PS_File_Device*)surface)->origin(0, fontsize);
1833 	surface->driver()->font(FL_COURIER, fontsize);
1834 	curr_lines = 0;
1835       }
1836       if(vary_only) {
1837 	out_vary_pos(vary_pos, widnames, k, nl, textfile, margin, top_margin + curr_lines * fontsize);
1838 	curr_lines += nl;
1839       }
1840       else	{
1841 	sprintf(num_line, "%d", current + 1);
1842 	fin = strlen(num_line);
1843 	memmove(num_line + widnames - fin + 1, num_line, fin+1);
1844 	if(fin <= widnames) memset(num_line, ' ', widnames - fin + 1);
1845 	if( use_pdf || use_svg) fl_draw(num_line, margin, top_margin + curr_lines * fontsize);
1846 	else {fputs(num_line, textfile);fputs("\n",textfile);}
1847 	++curr_lines;
1848       }
1849       for(num=0; num < view->tot_seqs; num++) {
1850 	k = 0;
1851 	for(j = 0; j < widnames; j++) {
1852 	  if(view->seqname[num][j] == 0) break;
1853 	  oneline[k++] = view->seqname[num][j];
1854 	}
1855 	while( j < widnames) {
1856 	  j++;
1857 	  oneline[k++] = ' ';
1858 	}
1859 	if(vary_only) {
1860 	  i = -1; j = 0;
1861 	  while( j < res_per_line) {
1862 	    if(current + i >= max_seq_length) break;
1863 	    if( !vary_need[current + ++i] ) continue;
1864 	    j++;
1865 	    if(current + i < view->each_length[num]) {
1866 	      if(num != ref0) lettre = ( toupper(view->sequence[num][current+i]) ==
1867 					toupper(view->sequence[ref0][current+i]) ? '.' : view->sequence[num][current+i] );
1868 	      else lettre = view->sequence[ref0][current+i];
1869 	      oneline[k++] = lettre;
1870 	    }
1871 	    if( j % block_size == 0) oneline[k++] = ' ';
1872 	  }
1873 	  if(num == view->tot_seqs - 1) current = current + i + 1;
1874 	}
1875 
1876 	else	{
1877 	  fin = res_per_line;
1878 	  if(current+fin > view->each_length[num])
1879 	    fin = view->each_length[num] - current;
1880 	  if(ref0 != -1 && num != ref0) {
1881 	    /* ecriture par reference a seq ref0 */
1882 	    for(i=0; i<fin; i++) {
1883 	      lettre = ( toupper(view->sequence[num][current+i]) ==
1884 			toupper(view->sequence[ref0][current+i]) ? '.' : view->sequence[num][current+i] );
1885 	      oneline[k++] = lettre;
1886 	      if( i < fin-1 && (i+1)%block_size == 0)
1887 		oneline[k++] = ' ';
1888 	    }
1889 	  }
1890 	  else	{ /* ecriture normale de seq */
1891 	    for(i=0; i<fin; i++) {
1892 	      oneline[k++] = view->sequence[num][current+i];
1893 	      if( i < fin-1 && (i+1)%block_size == 0)
1894 		oneline[k++] = ' ';
1895 	    }
1896 	  }
1897 	}
1898 	oneline[k] = 0;
1899 	if(!view->allow_lower) majuscules(oneline + widnames);
1900 	if(use_pdf && (curr_lines >= lines_per_page)) {
1901 	  ((PDF_or_PS_File_Device*)surface)->end_page();
1902 	  ((PDF_or_PS_File_Device*)surface)->start_page();
1903 	  ((PDF_or_PS_File_Device*)surface)->origin(0, fontsize);
1904 	  surface->driver()->font(FL_COURIER, fontsize);
1905 	  curr_lines = 0;
1906 	}
1907 	if(!use_pdf && !use_svg) {
1908 	  fputs(oneline, textfile); fputs("\n", textfile);
1909 	}
1910 	else if(pdfkindvalue == PDF_BW) {
1911 	  fl_draw(oneline, margin, top_margin + curr_lines * fontsize);
1912 	}
1913 	else {
1914 	  if (use_pdf) color_pdf_display(view, calc_color_function, oneline, widnames, margin,
1915 			    top_margin + curr_lines * fontsize,
1916 			    fontsize, char_width, descender, num, current);
1917 	  else color_svg_display(view, calc_color_function, oneline, widnames, margin,
1918 				 top_margin + curr_lines * fontsize, fontsize, char_width);
1919 	}
1920 	++curr_lines;
1921 	firstpage = FALSE;
1922       }
1923       if(!use_pdf && !use_svg) {
1924 	fputs("\n", textfile);
1925       }
1926       else if(curr_lines + 1 <= lines_per_page) {
1927 	++curr_lines;
1928       }
1929       if( ! vary_only ) current += res_per_line;
1930     }
1931     if(use_pdf) {
1932       ((PDF_or_PS_File_Device*)surface)->end_page();
1933       ((PDF_or_PS_File_Device*)surface)->end_job();
1934     }
1935     else if(use_svg) {
1936       fl_color(FL_GRAY);
1937       fl_line_style(0, 3);
1938       fl_rect(1,1, ((SVG_File_Surface*)surface)->width()-2, ((SVG_File_Surface*)surface)->height()-2);
1939       ((SVG_File_Surface*)surface)->end();
1940       }
1941     else fclose(textfile);
1942 #ifndef NO_PDF
1943   } /* end of PDF_TRY */
1944   if (use_pdf && ((PDF_or_PS_File_Device*)surface)->surface_catch()) {
1945     ((PDF_or_PS_File_Device*)surface)->error_catch();
1946   }
1947 #endif
1948   if (use_pdf || use_svg) delete surface;
1949   return FALSE;
1950 }
1951 
1952 
color_pdf_display(SEA_VIEW * view,int (* calc_color_function)(int),char * oneline,int widnames,double x,double y,int fontsize,double char_width,double descender,int num,int current)1953 static void color_pdf_display(SEA_VIEW *view, int (*calc_color_function)( int ), char *oneline,
1954 			      int widnames, double x, double y, int fontsize, double char_width, double descender,
1955 			      int num, int current)
1956 {
1957   double  xx;
1958   int c, l, count = 0;
1959   char *p, **clines;
1960 
1961   clines = (char **)malloc(sizeof(char *) * view->numb_gc); if(clines==NULL) return;
1962   l = strlen(oneline);
1963   for(c = 1; c < view->numb_gc; c++) {
1964     clines[c] = (char *)malloc(l + 1); if(clines[c] == NULL) return;
1965     memset(clines[c], ' ', l); clines[c][l] = 0;
1966   }
1967   for(p = oneline + widnames; *p != 0; p++) {
1968     if(*p == ' ') continue;
1969     if(view->curr_colors != view->codoncolors) c = calc_color_function(*p);
1970     else c = view->col_rank[num][current + count++];
1971     if(c > 0) clines[c][p - oneline] = 'X';
1972   }
1973   for(c = 1; c < view->numb_gc; c++) {
1974     if(strchr(clines[c], 'X') == NULL) continue;
1975     fl_color(view->curr_colors[c]);
1976     for(xx = x + widnames*char_width, p = clines[c] + widnames; *p != 0; p++, xx += char_width) {
1977       if(*p == ' ') continue;
1978       fl_rectf(xx, y - fontsize + descender, char_width+1, fontsize);
1979     }
1980   }
1981   fl_color(FL_BLACK);
1982   fl_draw(oneline, x, y);
1983   for(c = 1; c < view->numb_gc; c++) free(clines[c]);
1984   free(clines);
1985 }
1986 
color_svg_display(SEA_VIEW * view,int (* calc_color_function)(int),char * oneline,int widnames,double x,double y,int fontsize,double char_width)1987 static void color_svg_display(SEA_VIEW *view, int (*calc_color_function)( int ), char *oneline,
1988 			      int widnames, double x, double y, int fontsize, double char_width)
1989 {
1990   double  xx;
1991   int c, l;
1992   char *p;
1993 
1994   l = strlen(oneline);
1995   char *aux = (char*)malloc(l+1);
1996   fl_font(FL_COURIER_BOLD, fontsize);
1997   xx = x + widnames*char_width;
1998   for (c = 0; c < view->numb_gc; c++) {
1999     memset(aux, ' ', l-widnames);
2000     for (p = oneline+widnames; p < oneline+l; p++) {
2001       if (calc_color_function(*p) == c) aux[p-(oneline+widnames)] = *p;
2002       }
2003     fl_color(view->curr_colors[c]);
2004     fl_draw(aux, l - widnames, xx, y);
2005   }
2006   fl_font(FL_COURIER, fontsize);
2007   fl_color(FL_BLACK);
2008   fl_draw(oneline, widnames, x, y);
2009 }
2010 
calc_vary_lines(int * vary_pos,int widpos)2011 static int calc_vary_lines(int *vary_pos, int widpos)
2012 {
2013   int maxi = 0, num, nl;
2014 
2015   for(num = 0; num < widpos; num++)
2016     if(vary_pos[num] > maxi) maxi = vary_pos[num];
2017   if(maxi >= 100000)
2018     nl = 6;
2019   else if(maxi >= 10000)
2020     nl = 5;
2021   else if(maxi >= 1000)
2022     nl = 4;
2023   else if(maxi >= 100)
2024     nl = 3;
2025   else if(maxi >= 10)
2026     nl = 2;
2027   else
2028     nl = 1;
2029   return nl;
2030 }
2031 
2032 
out_vary_pos(int * vary_pos,int widnames,int widpos,int nl,FILE * textfile,double x,double y)2033 static void out_vary_pos(int *vary_pos, int widnames, int widpos, int nl, FILE *textfile, double x, double y)
2034 {
2035   int num, l, k, echelle, digit, val;
2036   static char chiffre[] = "0123456789";
2037   char oneline[300];
2038 
2039   echelle = 1; k = 0;
2040   for(l = 2; l <= nl; l++) echelle *= 10;
2041   for(l = nl; l > 0; l--) {
2042     for(num = 0; num < widnames; num++) oneline[k++] = ' ';
2043     for(num = 0; num < widpos; num++) {
2044       val = vary_pos[num];
2045       if(val < echelle)
2046 	oneline[k++] = ' ';
2047       else	{
2048 	digit = (val / echelle) % 10 ;
2049 	oneline[k++] = *(chiffre + digit);
2050       }
2051     }
2052     oneline[k] = 0;
2053     if(textfile == NULL) {
2054       fl_draw(oneline, x, y);
2055       y += fl_height();
2056     }
2057     else {fputs(oneline, textfile); fputs("\n",textfile); }
2058     k = 0;
2059     echelle /= 10;
2060   }
2061 }
2062 
2063 
read_alignment_file(const char * infile)2064 SEA_VIEW* read_alignment_file(const char *infile )
2065 {
2066   char *err_message;
2067   int i;
2068   SEA_VIEW* view = new SEA_VIEW;
2069   memset(view, 0, sizeof(SEA_VIEW));
2070   view->menubar = new Fl_Menu_Bar(0,0, 30, 30);
2071   view->menu_trees = new vlength_menu(view->menubar, "Trees", NULL, 0);
2072 
2073 #ifndef R_OK
2074 #define R_OK 04
2075 #endif
2076   if (fl_access(infile, R_OK)) {
2077     fprintf(stderr, "File %s\nis not readable or does not exist", infile);
2078     return NULL;
2079     }
2080   known_format file_format = what_format(infile);
2081   if (file_format < 0) {
2082     fprintf(stderr, "File %s\nis not of a format readable by seaview", infile);
2083     return NULL;
2084   }
2085   if (file_format == MASE_FORMAT) {
2086     view->tot_seqs = read_mase_seqs_header(infile, &view->sequence,
2087 					   &view->seqname, &view->comments, &view->header,
2088 					   &err_message);
2089     /* interpreter les regions du header du fichier mase */
2090     view->regions = parse_regions_from_header(view->header);
2091     /* interpreter les species sets du fichier mase */
2092     view->numb_species_sets = parse_species_sets_from_header(view->header,
2093 							     view->list_species_sets, view->name_species_sets, view->tot_seqs);
2094     /* interpreter les trees du fichier mase */
2095     parse_trees_from_header(view->header, view);
2096     /* interpreter les comment lines du header */
2097     view->tot_comment_lines = parse_comment_lines_from_header(view->header,
2098 							      &(view->comment_line), &(view->comment_name),
2099 							      &(view->comment_length) , &(view->max_seq_length));
2100   }
2101   else if(file_format == FASTA_FORMAT)
2102     view->tot_seqs = read_fasta_align(infile, &view->sequence,
2103 				      &view->seqname, &view->comments, &view->header, &err_message, view->spaces_in_fasta_names);
2104   else if(file_format == PHYLIP_FORMAT)
2105     view->tot_seqs = read_phylip_align(infile, &view->sequence,
2106 				       &view->seqname, &view->comments, &view->header, &err_message);
2107   else if(file_format == CLUSTAL_FORMAT)
2108     view->tot_seqs = read_clustal_align(infile, &view->sequence,
2109 					&view->seqname, &view->comments, &view->header, &err_message);
2110   else if(file_format == MSF_FORMAT)
2111     view->tot_seqs = read_msf_align(infile, &view->sequence,
2112 				    &view->seqname, &view->comments, &view->header, &err_message);
2113   else if(file_format == NEXUS_FORMAT) {
2114     int **list_sp = NULL; char **name_sp = NULL; int i;
2115     view->tot_seqs = read_nexus_align((char*)infile, &view->sequence,
2116 				      &view->seqname, &view->comments, &view->header,
2117 				      &err_message, &view->regions, &view->numb_species_sets,
2118 				      &list_sp, &name_sp, &view->tot_comment_lines,
2119 				      &view->comment_name, &view->comment_line,
2120 				      &view->comment_length, &view->protein, view);
2121     for(i= 0; i < view->numb_species_sets; i++) {
2122       view->list_species_sets[i] = list_sp[i];
2123       view->name_species_sets[i] = name_sp[i];
2124     }
2125     if(list_sp != NULL) free(list_sp);
2126     if(name_sp != NULL) free(name_sp);
2127   }
2128   if (view->tot_seqs == 0) return NULL;
2129   if (file_format != NEXUS_FORMAT) view->protein = is_a_protein_alignment(view);
2130   view->each_length = new int[view->tot_seqs];
2131   for (i = 0; i < view->tot_seqs; i++) {
2132     view->each_length[i] = strlen(view->sequence[i]);
2133   }
2134   view->seq_length = 0;
2135   for (i = 0; i < view->tot_seqs; i++) {
2136     if (view->each_length[i] > view->seq_length) view->seq_length = view->each_length[i];
2137   }
2138   view->masename = strdup(infile);
2139   view->format_for_save = file_format;
2140   return view;
2141 }
2142 
cmdline_read_input_alignment(int argc,char ** argv)2143 SEA_VIEW *cmdline_read_input_alignment(int argc, char **argv)
2144 {
2145   char *fname, line[500];
2146   fname = argv[argc-1];
2147   if (strcmp(fname, "-") == 0) {
2148     fname = create_tmp_filename();
2149     FILE *out = fopen(fname, "w");
2150     while (fgets(line, sizeof(line), stdin) != NULL) {
2151       fputs(line, out);
2152     }
2153     fclose(out);
2154   }
2155   SEA_VIEW *view = read_alignment_file(fname);
2156   if (strcmp(argv[argc-1], "-") == 0) delete_tmp_filename(fname);
2157   if (!view) {
2158     fputs("\n", stderr);
2159     exit(1);
2160   }
2161   view->max_seq_length = calc_max_seq_length(view->seq_length, view->tot_seqs);
2162   allonge_seqs(view->sequence, view->tot_seqs, view->max_seq_length, view->each_length,
2163 	       view->tot_comment_lines, view->comment_line, NULL);
2164   return view;
2165 }
2166 
format_conversion(int argc,char ** argv)2167 void format_conversion(int argc, char **argv)
2168 {
2169   char *err_message, *p;
2170   int i, j, gc;
2171   bool std_output = false;
2172   bool save_fragment = false;
2173   bool bootstrap = false;
2174   region *myregion;
2175 
2176   SEA_VIEW *view = cmdline_read_input_alignment(argc, argv);
2177   if (view == NULL) {
2178     fprintf(stderr, "No sequence found in %s", argv[argc-1]);
2179     exit(1);
2180   }
2181   known_format out_format = view->format_for_save;
2182   load_resources(progname);
2183   view->phylipwidnames = int_res_value("phylipwidnames", 30);
2184   char *outfile = process_output_options(argc, argv, out_format, std_output);
2185 
2186   if ( !view->protein && isarg(argc, argv, "-translate")) {
2187     bool no_terminal_stop = isarg(argc, argv, "-no_terminal_stop");
2188     for (i = 0; i < view->tot_seqs; i++) {
2189       gc = (view->comments != NULL ? get_ncbi_gc_from_comment(view->comments[i]) : 1);
2190       p = translate_with_gaps(view->sequence[i], get_acnuc_gc_number(gc));
2191       free(view->sequence[i]);
2192       view->sequence[i] = p;
2193       if (no_terminal_stop) {
2194         char *q = p + strlen(p) - 1;
2195         while (q > p && *q == '-') q--;
2196         if (q >= p && *q == '*') *q = '-';
2197       }
2198       view->each_length[i] = strlen(p);
2199       view->regions = NULL;
2200       }
2201     view->protein = true;
2202     }
2203 
2204   while ( (p = argname(argc, argv, "-def_species_group")) != NULL) {
2205     p = strtok(p, ",");
2206     view->name_species_sets[view->numb_species_sets] = strdup(p);
2207     view->list_species_sets[view->numb_species_sets] = (int*)calloc(view->tot_seqs, sizeof(int));
2208     while (true) {
2209       p = strtok(NULL, ",");
2210       if (!p) break;
2211       if (strchr(p, '-') == NULL) {
2212 	sscanf(p, "%d", &i);
2213 	view->list_species_sets[view->numb_species_sets][i-1] = 1;
2214       }
2215       else {
2216 	sscanf(p, "%d-%d", &i, &j);
2217 	if (i <= j) {
2218 	  while (i <= j) view->list_species_sets[view->numb_species_sets][i++ - 1] = 1;
2219 	  }
2220       }
2221     }
2222     view->numb_species_sets++;
2223     for (i = 0; i < argc; i++) {
2224       if (strcmp(argv[i], "-def_species_group") == 0) {
2225 	*argv[i] = '+';
2226 	break;
2227       }
2228     }
2229   }
2230 
2231   while ( (p = argname(argc, argv, "-def_site_selection")) != NULL) {
2232     int from, to;
2233     list_segments *current_seg;
2234     region *site_selection = (region*)calloc(1, sizeof(region));
2235     list_regions *last_reg = (list_regions*)calloc(1, sizeof(list_regions));
2236     last_reg->element = site_selection;
2237     if (!view->regions) {
2238       view->regions = last_reg;
2239       }
2240     else {
2241       list_regions *current_reg;
2242       current_reg = view->regions;
2243       while (current_reg->next) current_reg = current_reg->next;
2244       current_reg->next = last_reg;
2245       }
2246     p = strtok(p, ",");
2247     site_selection->name = strdup(p);
2248     while (true) {
2249       p = strtok(NULL, ",");
2250       if (!p) break;
2251       if (strchr(p, '-') == NULL) {
2252 	sscanf(p, "%d", &from);
2253 	to = from;      }
2254       else {
2255 	sscanf(p, "%d-%d", &from, &to);
2256 	if (from > to) continue;
2257       }
2258       if (to > view->seq_length) to = view->seq_length;
2259       if (from > view->seq_length) continue;
2260       list_segments *seg = (list_segments*)malloc(sizeof(list_segments));
2261       seg->debut = from;
2262       seg->fin = to;
2263       seg->next = NULL;
2264       if (!site_selection->list) {
2265 	site_selection->list = seg;
2266 	}
2267       else {
2268 	current_seg->next = seg;
2269 	}
2270       current_seg = seg;
2271     }
2272     for (i = 0; i < argc; i++) {
2273       if (strcmp(argv[i], "-def_site_selection") == 0) {
2274 	*argv[i] = '+';
2275 	break;
2276       }
2277     }
2278   }
2279 
2280   if (isarg(argc, argv, "-gblocks")) { // gblocks method
2281     myregion = (region*)malloc(sizeof(region));
2282     myregion->name = strdup("Gblocks");
2283     view->region_line = (char*)malloc(view->seq_length + 1);
2284     create_gblocks_mask(view, myregion, true, isarg(argc, argv, "-b5"), isarg(argc, argv, "-b4"),
2285 			isarg(argc, argv, "-b3"), isarg(argc, argv, "-b2"));
2286     list_regions *r = view->regions, *pre = NULL;
2287     while (r) {
2288       if (strcmp(r->element->name, myregion->name) == 0) break;
2289       pre = r;
2290       r = r->next;
2291     }
2292     if (r) {
2293       free(r->element->name);
2294       r->element = myregion;
2295     }
2296     else {
2297       list_regions *elt = (list_regions*)malloc(sizeof(list_regions));
2298       elt->element = myregion;
2299       elt->next = NULL;
2300       if (pre) pre->next = elt;
2301       else view->regions = elt;
2302     }
2303   }
2304 
2305   if ( (p = argname(argc, argv, "-sites")) ) {
2306     save_fragment = true;
2307     list_regions *elt = view->regions;
2308     while (elt) {
2309       if (strcmp(elt->element->name, p) == 0) break;
2310       elt = elt->next;
2311     }
2312     if (elt) view->active_region = elt->element;
2313   }
2314 
2315   if ( (p = argname(argc, argv, "-species")) ) {
2316     save_fragment = true;
2317     for (i = 0; i < view->numb_species_sets; i++) {
2318       if (strcmp(view->name_species_sets[i], p) == 0) break;
2319     }
2320     if (i < view->numb_species_sets) {
2321       view->sel_seqs = view->list_species_sets[i];
2322       view->tot_sel_seqs = 0;
2323       for (i = 0; i < view->tot_seqs; i++) if (view->sel_seqs[i]) view->tot_sel_seqs++;
2324     }
2325   }
2326   if (isarg(argc, argv, "-del_gap_only_sites") && !isarg(argc, argv, "-sites")) del_gap_only_sites(view);
2327 
2328   if ( (p = argname(argc, argv, "-bootstrap")) ) {
2329     int replicates = -1;
2330     sscanf(p, "%d", &replicates);
2331     if (replicates == -1) replicates = 1;
2332     bootstrap = true;
2333     save_fragment = false;
2334     save_bootstrap_replicates(outfile, replicates, view);
2335   }
2336 
2337 
2338   if (save_fragment) {
2339     myregion = view->active_region;
2340     if (myregion == NULL) {
2341       myregion = (region *)malloc(sizeof(region));
2342       myregion->list = (list_segments *)malloc(sizeof(list_segments));
2343       myregion->list->debut = 1;
2344       myregion->list->fin = view->seq_length;
2345       myregion->list->next = NULL;
2346       myregion->name = (char *)"all";
2347     }
2348     err_message = save_alignment_or_region(outfile, view->sequence, view->comments,
2349 					   view->header, view->seqname, view->tot_seqs, view->each_length,
2350 					   NULL, myregion, out_format,
2351 					   0, NULL, NULL, view->sel_seqs, view->tot_sel_seqs,
2352 					   view->protein, 0, NULL, NULL, view->phylipwidnames,
2353 					   view->tot_trees, view->trees,
2354 					   view->menu_trees->vitem(0), view->spaces_in_fasta_names);
2355   }
2356   else if (!bootstrap) {
2357     err_message = save_alignment_or_region(outfile, view->sequence, view->comments,
2358 				 view->header, view->seqname, view->tot_seqs, view->each_length,
2359 				 view->regions, NULL, out_format,
2360 				 view->numb_species_sets, view->list_species_sets,
2361 				 view->name_species_sets, NULL, 0, view->protein,
2362 				 view->tot_comment_lines, view->comment_name,
2363 				 view->comment_line, view->phylipwidnames,
2364 				 view->tot_trees, view->trees,
2365 				 view->menu_trees->vitem(0), view->spaces_in_fasta_names);
2366   }
2367   if (err_message) {
2368     fprintf(stderr, "%s\n", err_message);
2369     if (std_output) delete_tmp_filename(outfile);
2370     exit(1);
2371     }
2372   if (std_output) {
2373     char line[200];
2374     FILE *in = fopen(outfile, "r");
2375     while (fgets(line, sizeof(line), in) != NULL) {
2376       fputs(line, stdout);
2377     }
2378     fclose(in);
2379     fflush(stdout);
2380     fl_unlink(outfile);
2381     }
2382   exit(0);
2383 }
2384 
2385 
process_output_options(int argc,char ** argv,known_format & out_format,bool & std_output)2386 char *process_output_options(int argc, char **argv, known_format& out_format, bool& std_output)
2387 {
2388   int i;
2389   char upper_format[20], *p;
2390   std_output = false;
2391   char *outformatname = argname(argc, argv, "-output_format");
2392   if (outformatname) {
2393     majuscules(outformatname);
2394     for (i = 0; i < nbr_formats; i++) {
2395       strcpy(upper_format, f_format_names[i]);
2396       majuscules(upper_format);
2397       if (strcmp(upper_format, outformatname)== 0) break;
2398     }
2399     if (i >= nbr_formats) {
2400       fprintf(stderr, "'%s' is not of a format known by seaview", outformatname);
2401       exit(1);
2402     }
2403     out_format = (known_format)i;
2404   }
2405   char *outfile = argname(argc, argv, "-o");
2406   if (outfile == NULL) {
2407     outfile = new char[strlen(argv[argc-1]) + 12];
2408     strcpy((char*)outfile, argv[argc-1]);
2409     p = strrchr(outfile, '.');
2410     if (!p) { p = (char*)outfile + strlen(outfile); *p = '.'; }
2411     strcpy(p+1, f_format_exts[out_format]);
2412     if (strcmp(outfile, argv[argc-1]) == 0) {
2413       strcpy(p, "_2.");
2414       strcpy(p+3, f_format_exts[out_format]);
2415     }
2416   }
2417   else if (strcmp(outfile, "-") == 0) {
2418     outfile = strdup(create_tmp_filename());
2419     std_output = true;
2420   }
2421   return outfile;
2422 }
2423 
2424 
2425 #ifndef NO_PDF
printout_cmdline(int argc,char ** argv)2426 void printout_cmdline(int argc, char **argv)
2427 {
2428   bool std_output = false;
2429   SEA_VIEW *view = cmdline_read_input_alignment(argc, argv);
2430   if (view == NULL) {
2431     fprintf(stderr, "No sequence found in %s", argv[argc-1]);
2432     exit(1);
2433   }
2434   load_resources(progname);
2435   //protein colors
2436   static char stdcolorgroups[50], customprotcolors[300];
2437   strcpy(stdcolorgroups, get_res_value("stdcolorgroups", def_stdcolorgroups));
2438   strcpy(customprotcolors, get_res_value("protcolors", ""));
2439   int *protcolors = (int *)malloc(max_protcolors * sizeof(int));
2440   int *def_protcolors = (int *)malloc(max_protcolors * sizeof(int));
2441   for(int i=0; i<max_protcolors; i++) def_protcolors[i] =
2442     fl_rgb_color(def_protcolors_rgb[3*i], def_protcolors_rgb[3*i+1], def_protcolors_rgb[3*i+2]);
2443 
2444   memcpy(protcolors, def_protcolors, max_protcolors * sizeof(int) );
2445   int cur_protcolors = prep_custom_colors(protcolors, customprotcolors, max_protcolors);
2446   static int dnacolors[] = { FL_BLACK, FL_RED, FL_DARK_GREEN, FL_YELLOW, FL_BLUE };
2447   /* process resource-read stdcolorgroups and altcolorgroups */
2448   prep_aa_color_code(stdcolorgroups, (char*)"",
2449 					 cur_protcolors, &view->numb_stdprotcolors, &view->numb_altprotcolors);
2450 
2451   view->max_seq_length = calc_max_seq_length(view->seq_length, view->tot_seqs);
2452   if (view->protein) {
2453     view->numb_gc = view->numb_stdprotcolors;
2454     view->curr_colors = view->stdprotcolors = protcolors;
2455     protcolors[0] = FL_BLACK;
2456     }
2457   else {
2458     view->numb_gc = view->numb_dnacolors = 5;
2459     view->curr_colors = view->dnacolors = dnacolors;
2460     }
2461   known_format out_format = view->format_for_save;
2462   char *outfile = process_output_options(argc, argv, out_format, std_output);
2463   int fontsize = (int)argval(argc, argv, "-fontsize", 10);
2464   int blocksize = (int)argval(argc, argv, "-blocksize", 10);
2465 #ifdef NO_PDF
2466   int use_svg = false;
2467   int svg_width = 0;
2468 #else
2469   int use_svg = isarg(argc, argv, "-svg");
2470   int svg_width = (int)argval(argc, argv, "-svg", 1000);
2471 #endif
2472   int landscape = isarg(argc, argv, "-landscape");
2473   int letter = isarg(argc, argv, "-letter");
2474   printout(view, outfile,
2475 	    fontsize,  blocksize,
2476 	   letter ? Fl_Paged_Device::LETTER : Fl_Paged_Device::A4,
2477 	   false, -1,
2478 	   use_svg ? SVG : PDF_COLOR,
2479 	   landscape ? Fl_Paged_Device::LANDSCAPE : Fl_Paged_Device::PORTRAIT,
2480 	   svg_width);
2481   if (std_output) {
2482     char line[200];
2483     FILE *in = fopen(outfile, "r");
2484     if (in) {
2485       while (fgets(line, sizeof(line), in) != NULL) {
2486 	fputs(line, stdout);
2487       }
2488       fclose(in);
2489       fflush(stdout);
2490     }
2491     delete_tmp_filename(outfile);
2492   }
2493   exit(0);
2494 }
2495 #endif // ! NO_PDF
2496