1 
2 #include <stdlib.h>
3 #include <stdio.h>
4 #include <string.h>
5 #include <ctype.h>
6 #if defined(WIN32)
7 #include <Windows.h>
8 #elif defined(unix) || defined(__APPLE__)
9 #define unixlike
10 #include <termios.h>
11 #endif
12 
13 
14 
15 /* included functions */
16 int trim_key(char *name); /* remove trailing spaces */
17 void padtosize(char *pname, char *name, int length);
18 char *get_code_descr(int code);
19 int calc_codon_number(char *codon);
20 char codaa(char *codon, int code);
21 int get_ncbi_gc_number(int gc);
22 int get_acnuc_gc_number(int ncbi_gc);
23 int strcmptrail(char *s1, int l1, char *s2, int l2);
24 void majuscules(char *name);
25 void compact(char *chaine);
26 void *mycalloc(int nbr, size_t taille);
27 char complementer_base(char nucl);
28 void complementer_seq(char *deb_ch, int l);
29 char init_codon_to_aa(char *codon, int gc);
30 char stop_codon_to_aa(char *codon, int gc);
31 int notrail2(char *chaine, int len);
32 int prepch(char *chaine, char **posmot);
33 int compch(char *cible, int lcible, char **posmot, int nbrmots);
34 int chg_acnuc(char *acnucvar, char *gcgacnucvar);
35 void gets_no_echo(char *password, size_t lpw);
36 
37 
trim_key(char * name)38 int trim_key(char *name) /* remove trailing spaces */
39 {
40 char *p;
41 int l = (int)strlen(name);
42 p = name + l - 1;
43 while( p >= name && *p == ' ' ) *(p--) = 0;
44 return (int)((p + 1) - name);
45 }
46 
47 
padtosize(char * pname,char * name,int length)48 void padtosize(char *pname, char *name, int length)
49 {
50 int i;
51 strncpy(pname,name,length);
52 pname[length]=0;
53 for(i=(int)strlen(pname);i<length;i++) pname[i]=' ';
54 }
55 
56 
57 #define TOTCODES 27  /* nbre total de codes definis, 0 inclus */
58 int totcodes=TOTCODES;
59 
60 char aminoacids[]="RLSTPAGVKNQHEDYCFIMW*X";
61 
62 struct genetic_code_libel { /* definition d'un code genetique */
63 	char libel[61]; /* nom du code decrivant ses variants % code standard */
64 	char* target;
65 	int code[65]; /* tableau codon->acide amine */
66 	int ncbi_gc; /* numero NCBI du meme code */
67 	int codon_init[64]; /* tableau codon initiateur ou stop -> acide amine */
68 	};
69 
70 /*
71 les codons sont numerotes de 1 a 64 selon ordre alphabetique;
72 le numero 65 est attribue a tout codon avec base hors AcCcGgTtUu
73 les acides amines sont numerotes selon l'ordre de la variable aminoacids
74 de un a 20 + * pour stop et X pour inconnu.
75  Table codon_init gives 19 (=M) for initiation codons and 21 (=*) for stop codons.
76  In some genetic codes, the same codon corresponds to an aa in table code
77  and to a stop in table codon_init.
78 */
79 
80 /* initialisation de tous les codes genetiques */
81 struct genetic_code_libel genetic_code[TOTCODES] =
82 {
83 
84 { /* 0: standard */
85 	{"Standard genetic code"},
86 	"Standard",
87 /*ANN*/	{9,10,9,10,4,4,4,4,1,3,1,3,18,18,19,18,
88 /*CNN*/	11,12,11,12,5,5,5,5,1,1,1,1,2,2,2,2,
89 /*GNN*/	13,14,13,14,6,6,6,6,7,7,7,7,8,8,8,8,
90 /*TNN*/	21,15,21,15,3,3,3,3,21,16,20,16,2,17,2,17,22},
91 /*ncbi*/1,
92 /*init*/{0,0,0,0,0,0,0,0,0,0,0,0,0,0,19,0, /* AUG */
93 	0,0,0,0,0,0,0,0,0,0,0,0,0,0,19,0, /* CUG */
94 	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
95 	21,0,21,0,0,0,0,0,21,0,0,0,0,0,19,0} /* UUG */
96 }
97 ,
98 { /* 1: yeast mt */
99 	{"CUN=T  AUA=M  UGA=W"},
100 	"Yeast Mitochondrial",
101 	{9,10,9,10,4,4,4,4,1,3,1,3,19,18,19,18,
102 	11,12,11,12,5,5,5,5,1,1,1,1,4,4,4,4,
103 	13,14,13,14,6,6,6,6,7,7,7,7,8,8,8,8,
104 	21,15,21,15,3,3,3,3,20,16,20,16,2,17,2,17,22},
105 	3,
106 	{0,0,0,0,0,0,0,0,0,0,0,0,19,0,19,0, /* AUA, AUG */
107 	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
108 	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
109 	21,0,21,0,0,0,0,0,0,0,0,0,0,0,0,0}
110 }
111 ,
112 { /* 2: :    MITOCHONDRIAL CODE OF VERTEBRATES */
113 	{"AGR=*  AUA=M  UGA=W"},
114 	"Vertebrate Mitochondrial",
115      {9,10,9,10,4,4,4,4,21,3,21,3,19,18,19,18,11,12,11,12,
116      5,5,5,5,1,1,1,1,2,2,2,2,13,14,13,14,6,6,6,6,7,7,7,7,8,8,8,8,21,15,
117      21,15,3,3,3,3,20,16,20,16,2,17,2,17,22},
118 	2,
119 	{0,0,0,0,0,0,0,0,21,0,21,0,19,19,19,19, /* AUN */
120 	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
121 	0,0,0,0,0,0,0,0,0,0,0,0,0,0,19,0, /* GUG */
122 	21,0,21,0,0,0,0,0,0,0,0,0,0,0,0,0}
123 }
124 ,
125 { /* 3:   MITOCHONDRIAL CODE OF FILAMENTOUS FUNGI */
126 	{"UGA=W"},
127 	"Mold Mitochondrial; Protozoan Mitochondrial; Coelenterate Mitochondrial; Mycoplasma; Spiroplasma",
128      {9,10,9,10,4,4,4,4,1,3,1,3,18,18,19,18,11,12,11,12,5,5,5,
129      5,1,1,1,1,2,2,2,2,13,14,13,14,6,6,6,6,7,7,7,7,8,8,8,8,21,15,21,
130      15,3,3,3,3,20,16,20,16,2,17,2,17,22},
131 	4,
132 	{0,0,0,0,0,0,0,0,0,0,0,0,19,19,19,19, /* AUN */
133 	0,0,0,0,0,0,0,0,0,0,0,0,0,0,19,0, /* CUG */
134 	0,0,0,0,0,0,0,0,0,0,0,0,0,0,19,0, /* GUG */
135 	21,0,21,0,0,0,0,0,0,0,0,0,19,0,19,0} /* UUR */
136 }
137 ,
138 { /* 4:    MITOCHONDRIAL CODE OF INSECT AND PLATYHELMINTHES  */
139 	{"AUA=M  UGA=W  AGR=S"},
140 	"Invertebrate Mitochondrial",
141      {9,10,9,10,4,4,4,4,3,3,3,3,19,18,19,18,11,12,11,12,5,5,5,
142      5,1,1,1,1,2,2,2,2,13,14,13,14,6,6,6,6,7,7,7,7,8,8,8,8,21,15,21,
143      15,3,3,3,3,20,16,20,16,2,17,2,17,22},
144 	5,
145 	{0,0,0,0,0,0,0,0,0,0,0,0,19,19,19,19, /* AUN */
146 	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
147 	0,0,0,0,0,0,0,0,0,0,0,0,0,0,19,0, /* GUG */
148 	21,0,21,0,0,0,0,0,0,0,0,0,0,0,19,0} /* UUG */
149 }
150 ,
151 { /* 5:    Nuclear code of Candida cylindracea (see nature 341:164) */
152 	{"CUG=S"},
153 	"Alternative Yeast Nuclear",
154      	{9,10,9,10,4,4,4,4,1,3,1,3,18,18,19,18,
155 	11,12,11,12,5,5,5,5,1,1,1,1,2,2,3,2,
156 	13,14,13,14,6,6,6,6,7,7,7,7,8,8,8,8,
157 	21,15,21,15,3,3,3,3,21,16,20,16,2,17,2,17,22},
158 	12,
159 	{0,0,0,0,0,0,0,0,0,0,0,0,0,0,19,0, /* AUG */
160 	0,0,0,0,0,0,0,0,0,0,0,0,0,0,19,0, /* CUG */
161 	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
162 	21,0,21,0,0,0,0,0,21,0,0,0,0,0,0,0}
163 }
164 ,
165 { /* 6:   NUCLEAR CODE OF CILIATA: UAR = Gln = Q */
166 	{"UAR=Q"},
167 	"Ciliate Nuclear; Dasycladacean Nuclear; Hexamita Nuclear",
168      {9,10,9,10,4,4,4,4,1,3,1,3,18,18,19,18,11,12,11,12,5,5,5,
169      5,1,1,1,1,2,2,2,2,13,14,13,14,6,6,6,6,7,7,7,7,8,8,8,8,11,15,11,
170      15,3,3,3,3,21,16,20,16,2,17,2,17,22},
171 	6,
172 	{0,0,0,0,0,0,0,0,0,0,0,0,0,0,19,0, /* AUG */
173 	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
174 	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
175 	0,0,0,0,0,0,0,0,21,0,0,0,0,0,0,0}
176 }
177 ,
178 { /* 7:   NUCLEAR CODE OF EUPLOTES */
179 	{"UGA=C"},
180 	"Euplotid Nuclear",
181      {9,10,9,10,4,4,4,4,1,3,1,3,18,18,19,18,11,12,11,12,5,5,5,
182      5,1,1,1,1,2,2,2,2,13,14,13,14,6,6,6,6,7,7,7,7,8,8,8,8,21,15,21,
183      15,3,3,3,3,16,16,20,16,2,17,2,17,22},
184 	10,
185 	{0,0,0,0,0,0,0,0,0,0,0,0,0,0,19,0, /* AUG */
186 	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
187 	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
188 	21,0,21,0,0,0,0,0,0,0,0,0,0,0,0,0}
189 }
190 ,
191 { /* 8:   MITOCHONDRIAL CODE OF ECHINODERMS */
192 	{"UGA=W  AGR=S  AAA=N"},
193 	"Echinoderm Mitochondrial; Flatworm Mitochondrial",
194      	{10,10,9,10,4,4,4,4,3,3,3,3,18,18,19,18,
195 	11,12,11,12,5,5,5,5,1,1,1,1,2,2,2,2,
196 	13,14,13,14,6,6,6,6,7,7,7,7,8,8,8,8,
197 	21,15,21,15,3,3,3,3,20,16,20,16,2,17,2,17,22},
198 	9,
199 	{0,0,0,0,0,0,0,0,0,0,0,0,0,0,19,0, /* AUG */
200 	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
201 	0,0,0,0,0,0,0,0,0,0,0,0,0,0,19,0, /* GUG */
202 	21,0,21,0,0,0,0,0,0,0,0,0,0,0,0,0}
203 }
204 ,
205 { /* 9:   MITOCHONDRIAL CODE OF ASCIDIACEA */
206 	{"UGA=W  AGR=G  AUA=M"},
207 	"Ascidian Mitochondrial",
208      	{9,10,9,10,4,4,4,4,7,3,7,3,19,18,19,18,
209 	11,12,11,12,5,5,5,5,1,1,1,1,2,2,2,2,
210 	13,14,13,14,6,6,6,6,7,7,7,7,8,8,8,8,
211 	21,15,21,15,3,3,3,3,20,16,20,16,2,17,2,17,22},
212 	13,
213 	{0,0,0,0,0,0,0,0,0,0,0,0,19,0,19,0, /* AUA,AUG */
214 	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
215 	0,0,0,0,0,0,0,0,0,0,0,0,0,0,19,0, /* GUG */
216 	21,0,21,0,0,0,0,0,0,0,0,0,0,0,19,0} /* UUG */
217 }
218 ,
219 { /* 10:   MITOCHONDRIAL CODE OF PLATYHELMINTHES */
220 	{"UGA=W  AGR=S  UAA=Y AAA=N"},
221 	"Alternative Flatworm Mitochondrial",
222 	{10,10,9,10,4,4,4,4,3,3,3,3,18,18,19,18,
223 	11,12,11,12,5,5,5,5,1,1,1,1,2,2,2,2,
224 	13,14,13,14,6,6,6,6,7,7,7,7,8,8,8,8,
225 	15,15,21,15,3,3,3,3,20,16,20,16,2,17,2,17,22},
226 	14,
227 	{0,0,0,0,0,0,0,0,0,0,0,0,0,0,19,0, /* AUG */
228 	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
229 	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
230 	0,0,21,0,0,0,0,0,0,0,0,0,0,0,0,0}
231 }
232 ,
233 { /* 11:   NUCLEAR CODE OF BLEPHARISMA */
234 	{"UAG=Q"},
235 	"Blepharisma Macronuclear",
236 /*ANN*/	{9,10,9,10,4,4,4,4,1,3,1,3,18,18,19,18,
237 /*CNN*/	11,12,11,12,5,5,5,5,1,1,1,1,2,2,2,2,
238 /*GNN*/	13,14,13,14,6,6,6,6,7,7,7,7,8,8,8,8,
239 /*TNN*/	21,15,11,15,3,3,3,3,21,16,20,16,2,17,2,17,22},
240 	15,
241 	{0,0,0,0,0,0,0,0,0,0,0,0,0,0,19,0, /* AUG */
242 	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
243 	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
244 	21,0,0,0,0,0,0,0,21,0,0,0,0,0,0,0}
245 }
246 ,
247 { /* 12:   NUCLEAR CODE OF BACTERIA: differs only for initiation codons */
248 	{"NUG=AUN=M when initiation codon"},
249 	"Bacterial, Archaeal and Plant Plastid",
250 /*ANN*/	{9,10,9,10,4,4,4,4,1,3,1,3,18,18,19,18,
251 /*CNN*/	11,12,11,12,5,5,5,5,1,1,1,1,2,2,2,2,
252 /*GNN*/	13,14,13,14,6,6,6,6,7,7,7,7,8,8,8,8,
253 /*TNN*/	21,15,21,15,3,3,3,3,21,16,20,16,2,17,2,17,22},
254 	11,
255 	{0,0,0,0,0,0,0,0,0,0,0,0,19,19,19,19, /* AUN */
256 	0,0,0,0,0,0,0,0,0,0,0,0,0,0,19,0, /* CUG */
257 	0,0,0,0,0,0,0,0,0,0,0,0,0,0,19,0, /* GUG */
258 	21,0,21,0,0,0,0,0,21,0,0,0,0,0,19,0} /* UUG */
259 }
260 ,
261 { /* 13: Chlorophycean Mitochondrial */
262 	{"UAG=Leu"},
263 	"Chlorophycean Mitochondrial",
264 /*ANN*/	{9,10,9,10,4,4,4,4,1,3,1,3,18,18,19,18,
265 /*CNN*/	11,12,11,12,5,5,5,5,1,1,1,1,2,2,2,2,
266 /*GNN*/	13,14,13,14,6,6,6,6,7,7,7,7,8,8,8,8,
267 /*TNN*/	21,15,2,15,3,3,3,3,21,16,20,16,2,17,2,17,22},
268 /*ncbi*/16,
269 /*init*/{0,0,0,0,0,0,0,0,0,0,0,0,0,0,19,0, /* AUG */
270 	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
271 	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
272 	21,0,0,0,0,0,0,0,21,0,0,0,0,0,0,0}
273 }
274 ,
275 { /* 14:    MITOCHONDRIAL CODE OF TREMATODE  */
276 	{"AUA=M  UGA=W  AGR=S AAA=N"},
277 	"Trematode Mitochondrial",
278      {10,10,9,10,4,4,4,4,3,3,3,3,19,18,19,18,11,12,11,12,5,5,5,
279      5,1,1,1,1,2,2,2,2,13,14,13,14,6,6,6,6,7,7,7,7,8,8,8,8,21,15,21,
280      15,3,3,3,3,20,16,20,16,2,17,2,17,22},
281 	21,
282 	{0,0,0,0,0,0,0,0,0,0,0,0,0,0,19,0, /* AUG */
283 	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
284 	0,0,0,0,0,0,0,0,0,0,0,0,0,0,19,0, /* GUG */
285 	21,0,21,0,0,0,0,0,0,0,0,0,0,0,0,0}
286 }
287 ,
288 { /* 15: TAG-Leu,TCA-stop */
289 	{"UAG=L UCA=*"},
290 	"Scenedesmus obliquus mitochondrial",
291 /*ANN*/	{9,10,9,10,4,4,4,4,1,3,1,3,18,18,19,18,
292 /*CNN*/	11,12,11,12,5,5,5,5,1,1,1,1,2,2,2,2,
293 /*GNN*/	13,14,13,14,6,6,6,6,7,7,7,7,8,8,8,8,
294 /*TNN*/	21,15,2,15,21,3,3,3,21,16,20,16,2,17,2,17,22},
295 /*ncbi*/22,
296 /*init*/{0,0,0,0,0,0,0,0,0,0,0,0,0,0,19,0, /* AUG */
297 	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
298 	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
299 	21,0,0,0,21,0,0,0,21,0,0,0,0,0,0,0}
300 }
301 ,
302 { /* 16: Thraustochytrium-mt */
303 	{"UUA=*"},
304 	"Thraustochytrium mitochondrial",
305 /*ANN*/	{9,10,9,10,4,4,4,4,1,3,1,3,18,18,19,18,
306 /*CNN*/	11,12,11,12,5,5,5,5,1,1,1,1,2,2,2,2,
307 /*GNN*/	13,14,13,14,6,6,6,6,7,7,7,7,8,8,8,8,
308 /*TNN*/	21,15,21,15,3,3,3,3,21,16,20,16,21,17,2,17,22},
309 /*ncbi*/23,
310 /*init*/{0,0,0,0,0,0,0,0,0,0,0,0,0,0,19,19, /* AUG AUU */
311 	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
312 	0,0,0,0,0,0,0,0,0,0,0,0,0,0,19,0, /* GUG */
313 	21,0,21,0,0,0,0,0,21,0,0,0,21,0,0,0}
314 }
315   ,
316   { /* 17:   MITOCHONDRIAL CODE OF Pterobranchia */
317     {"UGA=W  AGA=S  AGG=K"},
318     "Pterobranchia Mitochondrial",
319 /*ANN*/	{9,10,9,10,4,4,4,4,3,3,9,3,18,18,19,18,
320 /*CNN*/	11,12,11,12,5,5,5,5,1,1,1,1,2,2,2,2,
321 /*GNN*/	13,14,13,14,6,6,6,6,7,7,7,7,8,8,8,8,
322 /*TNN*/	21,15,21,15,3,3,3,3,20,16,20,16,2,17,2,17,22},
323 /*ncbi*/24,
324 /*init*/{0,0,0,0,0,0,0,0,0,0,0,0,0,0,19,0, /* AUG */
325          0,0,0,0,0,0,0,0,0,0,0,0,0,0,19,0, /* CUG */
326          0,0,0,0,0,0,0,0,0,0,0,0,0,0,19,0, /* GUG */
327          21,0,21,0,0,0,0,0,0,0,0,0,0,0,19,0} /* UUG */
328   }
329   ,
330   { /* 18: Candidate Division SR1 and Gracilibacteria */
331     {"UGA=G"},
332     "Candidate Division SR1 and Gracilibacteria",
333     /*ANN*/	{9,10,9,10,4,4,4,4,1,3,1,3,18,18,19,18,
334     /*CNN*/	11,12,11,12,5,5,5,5,1,1,1,1,2,2,2,2,
335     /*GNN*/	13,14,13,14,6,6,6,6,7,7,7,7,8,8,8,8,
336     /*TNN*/	21,15,21,15,3,3,3,3,7,16,20,16,2,17,2,17,22},
337     /*ncbi*/25,
338     /*init*/{0,0,0,0,0,0,0,0,0,0,0,0,0,0,19,0, /* AUG */
339       0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
340       0,0,0,0,0,0,0,0,0,0,0,0,0,0,19,0, /* GUG */
341       21,0,21,0,0,0,0,0,0,0,0,0,0,0,19,0} /* UUG */
342   }
343   ,
344   { /* 19: Pachysolen tannophilus */
345     {"CUG=A"},
346     "Pachysolen tannophilus Nuclear",
347 /*ANN*/	{9,10,9,10,4,4,4,4,1,3,1,3,18,18,19,18,
348 /*CNN*/	11,12,11,12,5,5,5,5,1,1,1,1,2,2,6,2,
349 /*GNN*/	13,14,13,14,6,6,6,6,7,7,7,7,8,8,8,8,
350 /*TNN*/	21,15,21,15,3,3,3,3,21,16,20,16,2,17,2,17,22},
351 /*ncbi*/26,
352 /*init*/{0,0,0,0,0,0,0,0,0,0,0,0,0,0,19,0, /* AUG */
353 	0,0,0,0,0,0,0,0,0,0,0,0,0,0,19,0, /* CUG */
354 	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
355 	21,0,21,0,0,0,0,0,21,0,0,0,0,0,0,0}
356   }
357   ,
358     { /* 20: Karyorelict Nuclear */
359     {"UAR=Q, UGA=W, CUG=A"},
360     "Karyorelict Nuclear",
361 /*ANN*/	{9,10,9,10,4,4,4,4,1,3,1,3,18,18,19,18,
362 /*CNN*/	11,12,11,12,5,5,5,5,1,1,1,1,2,2,6,2,
363 /*GNN*/	13,14,13,14,6,6,6,6,7,7,7,7,8,8,8,8,
364 /*TNN*/	11,15,11,15,3,3,3,3,20,16,20,16,2,17,2,17,22},
365 /*ncbi*/27,
366 /*init*/{0,0,0,0,0,0,0,0,0,0,0,0,0,0,19,0, /* AUG */
367 	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
368 	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
369 	0,0,0,0,0,0,0,0,21,0,0,0,0,0,0,0}
370   }
371   ,
372     { /* 21: Condylostoma Nuclear */
373     {"UAR=Q, UGA=W, CUG=A"},
374     "Condylostoma Nuclear",
375 /*ANN*/	{9,10,9,10,4,4,4,4,1,3,1,3,18,18,19,18,
376 /*CNN*/	11,12,11,12,5,5,5,5,1,1,1,1,2,2,6,2,
377 /*GNN*/	13,14,13,14,6,6,6,6,7,7,7,7,8,8,8,8,
378 /*TNN*/	11,15,11,15,3,3,3,3,20,16,20,16,2,17,2,17,22},
379 /*ncbi*/28,
380 /*init*/{0,0,0,0,0,0,0,0,0,0,0,0,0,0,19,0, /* AUG */
381 	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
382 	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
383 	21,0,21,0,0,0,0,0,21,0,0,0,0,0,0,0}
384   }
385   ,
386     { /* 22: Mesodinium Nuclear */
387     {"UAR=Y, CUG=A"},
388     "Mesodinium Nuclear",
389 /*ANN*/	{9,10,9,10,4,4,4,4,1,3,1,3,18,18,19,18,
390 /*CNN*/	11,12,11,12,5,5,5,5,1,1,1,1,2,2,6,2,
391 /*GNN*/	13,14,13,14,6,6,6,6,7,7,7,7,8,8,8,8,
392 /*TNN*/	15,15,15,15,3,3,3,3,21,16,20,16,2,17,2,17,22},
393 /*ncbi*/29,
394 /*init*/{0,0,0,0,0,0,0,0,0,0,0,0,0,0,19,0, /* AUG */
395 	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
396 	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
397 	0,0,0,0,0,0,0,0,21,0,0,0,0,0,0,0}
398   }
399   ,
400     { /* 23: Peritrich Nuclear */
401     {"UAR=E, CUG=A"},
402     "Peritrich Nuclear",
403 /*ANN*/	{9,10,9,10,4,4,4,4,1,3,1,3,18,18,19,18,
404 /*CNN*/	11,12,11,12,5,5,5,5,1,1,1,1,2,2,6,2,
405 /*GNN*/	13,14,13,14,6,6,6,6,7,7,7,7,8,8,8,8,
406 /*TNN*/	13,15,13,15,3,3,3,3,21,16,20,16,2,17,2,17,22},
407 /*ncbi*/30,
408 /*init*/{0,0,0,0,0,0,0,0,0,0,0,0,0,0,19,0, /* AUG */
409 	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
410 	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
411 	0,0,0,0,0,0,0,0,21,0,0,0,0,0,0,0}
412   }
413   ,
414     { /* 24: Blastocrithidia Nuclear */
415     {"UAR=E, UGA=W"},
416     "Blastocrithidia Nuclear",
417 /*ANN*/	{9,10,9,10,4,4,4,4,1,3,1,3,18,18,19,18,
418 /*CNN*/	11,12,11,12,5,5,5,5,1,1,1,1,2,2,2,2,
419 /*GNN*/	13,14,13,14,6,6,6,6,7,7,7,7,8,8,8,8,
420 /*TNN*/	13,15,13,15,3,3,3,3,20,16,20,16,2,17,2,17,22},
421 /*ncbi*/31,
422 /*init*/{0,0,0,0,0,0,0,0,0,0,0,0,0,0,19,0, /* AUG */
423 	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
424 	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
425 	21,0,21,0,0,0,0,0,0,0,0,0,0,0,0,0}
426   }
427   ,
428     { /* 25: Balanophoraceae Plastid */
429     {"UAG=W"},
430     "Balanophoraceae Plastid",
431 /*ANN*/	{9,10,9,10,4,4,4,4,1,3,1,3,18,18,19,18,
432 /*CNN*/	11,12,11,12,5,5,5,5,1,1,1,1,2,2,2,2,
433 /*GNN*/	13,14,13,14,6,6,6,6,7,7,7,7,8,8,8,8,
434 /*TNN*/	21,15,20,15,3,3,3,3,21,16,20,16,2,17,2,17,22},
435 /*ncbi*/32,
436 /*init*/{0,0,0,0,0,0,0,0,0,0,0,0,19,19,19,19, /* AUN */
437 	0,0,0,0,0,0,0,0,0,0,0,0,0,0,19,0, /* CUG */
438 	0,0,0,0,0,0,0,0,0,0,0,0,0,0,19,0, /* GUG */
439 	21,0,0,0,0,0,0,0,21,0,0,0,0,0,19,0} /* UUG */
440   }
441   ,
442     { /* 26: Cephalodiscidae Mitochondria */
443     {"UAA=Y, UGA=W, AGA=S, AGG=K"},
444     "Cephalodiscidae Mitochondrial",
445 /*ANN*/	{9,10,9,10,4,4,4,4,3,3,9,3,18,18,19,18,
446 /*CNN*/	11,12,11,12,5,5,5,5,1,1,1,1,2,2,2,2,
447 /*GNN*/	13,14,13,14,6,6,6,6,7,7,7,7,8,8,8,8,
448 /*TNN*/	15,15,21,15,3,3,3,3,20,16,20,16,2,17,2,17,22},
449 /*ncbi*/33,
450 /*init*/{0,0,0,0,0,0,0,0,0,0,0,0,0,0,19,0, /* AUG */
451 	0,0,0,0,0,0,0,0,0,0,0,0,0,0,19,0, /* CUG */
452 	0,0,0,0,0,0,0,0,0,0,0,0,0,0,19,0, /* GUG */
453 	0,0,21,0,0,0,0,0,0,0,0,0,0,0,19,0} /* UUG */
454   }
455 
456 /*       1         2
457 1234567890123456789012
458 RLSTPAGVKNQHEDYCFIMW*X
459 */
460 
461 };
462 
463 
get_code_descr(int code)464 char *get_code_descr(int code)
465 /*
466 get a 60-letter (or less) description of a variant genetic code
467 return value	pointer to the description, not to be altered!
468 */
469 {
470 if(code>=0 && code<totcodes)
471 	return genetic_code[code].libel ;
472 else
473 	return "Unknown genetic code. Standard code is used.";
474 }
475 
476 
get_code_target(int code)477 char *get_code_target(int code)
478 /*
479 get a description of target organisms of a variant genetic code
480 return value	pointer to the description, not to be altered!
481 */
482 {
483 if(code >= 0 && code < totcodes)
484 	return genetic_code[code].target ;
485 else
486 	return "Unknown genetic code. Standard code is used.";
487 }
488 
489 
calc_codon_number(char * codon)490 int calc_codon_number(char *codon)
491 {
492 static char nucleotides[] = "AaCcGgTtUu";
493 static int nucnum[5] = {0,1,2,3,3};
494 int num, i, base;
495 char *p;
496 
497 num = 0;
498 for(i = 1; i <= 3; i++) {
499 	p = strchr(nucleotides, *codon);
500 	if(p == NULL) {
501 		num = 64;
502 		break;
503 		}
504 	else
505 		base = (int)(p-nucleotides)/2;
506 	num = num * 4 + nucnum[base];
507 	codon++;
508 	}
509 return num;
510 }
511 
512 
codaa(char * codon,int code)513 char codaa(char *codon, int code)
514 /*
515 amino acid translation:
516 codon	a 3-base string
517 code	the genetic code to be used
518 return value	the amino acid as 1 character
519 */
520 {
521 struct genetic_code_libel *pdata;
522 int num;
523 
524 num = calc_codon_number(codon);
525 if(code < 0 || code >= totcodes)code = 0;/*use regular code if unknown number */
526 pdata = &genetic_code[code]; /* ici ecriture plus compacte mal compilee sur PC*/
527 return aminoacids[ pdata->code[num] - 1 ];
528 }
529 
530 
get_ncbi_gc_number(int gc)531 int get_ncbi_gc_number(int gc)
532 { /* from acnuc to ncbi genetic code number */
533 return genetic_code[gc].ncbi_gc;
534 }
535 
536 
get_acnuc_gc_number(int ncbi_gc)537 int get_acnuc_gc_number(int ncbi_gc)
538 { /* from ncbi to acnuc genetic code number (returns 0 if not found) */
539 int num;
540 
541 for( num = 0; num < totcodes; num++ )
542 	if(genetic_code[num].ncbi_gc == ncbi_gc) return num;
543 return 0;
544 }
545 
546 
strcmptrail(char * s1,int l1,char * s2,int l2)547 int strcmptrail(char *s1, int l1, char *s2, int l2)
548 /*
549 compare strings s1 and s2 of length l1 and l2 as done by strcmp
550 but ignores all trailing spaces
551 */
552 {
553 char *fin;
554 int l, flag=1;
555 
556 if(l1 > 0) {
557 	if( (fin = (char *)memchr(s1, 0, l1) ) != NULL) l1 = (int)(fin - s1);
558 	}
559 if(l2 > 0) {
560 	if( (fin = (char *)memchr(s2, 0, l2) ) != NULL) l2 = (int)(fin - s2);
561 	}
562 
563 if(l2 > l1) {
564 	flag = -1;
565 	fin=s1; s1=s2; s2=fin;
566 	l=l1; l1=l2; l2=l;
567 	}
568 l = l2;
569 fin = s2 + l;
570 while(s2 < fin) {
571 	if( *s1 != *s2 ) return (*s1 - *s2)*flag;
572 	s1++; s2++;
573 	}
574 fin= s1+l1-l2;
575 while(s1 < fin)	{
576 	if( *s1 != ' ') return flag;
577 	s1++;
578 	}
579 return 0;
580 }
581 
582 
majuscules(char * name)583 void majuscules(char *name)
584 {
585 name--;
586 while(*(++name) != 0) *name = toupper(*name);
587 }
588 
589 
compact(char * chaine)590 void compact(char *chaine)
591 {
592 int l;
593 char *p, *q;
594 
595 l= (int)strlen(chaine); p=chaine+l;
596 while( *(--p) == ' ' && p>=chaine) *p=0;
597 while((p=strchr(chaine,' '))!=NULL) {
598 	q=p+1;
599 	while(*q==' ') q++;
600 	l= (int)(q-p);
601 	while(*q!=0) {*(q-l) = *q; q++; }
602 	*(q-l)=0;
603 	}
604 }
605 
606 
607 
mycalloc(int nbre,size_t size)608 void *mycalloc(int nbre, size_t size)
609 {
610 void *point;
611 
612 point = calloc((unsigned)nbre, size);
613 if(point == NULL) {
614 	fprintf(stderr,"Error: problem allocating memory.\n");
615 	exit(1);
616 	}
617 return point;
618 }
619 
620 
complementer_base(char nucl)621 char complementer_base(char nucl)
622 {
623     switch (nucl) {
624         case 'a':
625         case 'A': return('t');
626 
627         case 'c':
628         case 'C': return('g');
629 
630         case 'g':
631         case 'G': return('c');
632 
633         case 'u':
634         case 'U':
635         case 't':
636         case 'T': return('a');
637 
638         case 'r':
639         case 'R': return('y');
640 
641         case 'y':
642         case 'Y': return('r');
643 
644 	default : return('n');
645 
646 	}
647 }
648 
649 
650 /*   ~~~~~~~~~~~~ retourne le complementaire d'une sequence ~~~~~~~~~~~
651  * recoit l'adresse du debut d'un tableau de caractere et sa longueur
652  * inverse et complemente cette sequence
653  * prend en compte si c'est un adn ou un arn
654  * -------------------------------------------------------------------- */
655 
complementer_seq(char * deb_ch,int l)656 void complementer_seq(char *deb_ch, int l)
657 {
658     int ii = 0;
659     char compl1,compl2;
660 
661     for(ii = 0; ii <= (l-1)/2; ii++)
662     {
663 	compl1 = complementer_base(*(deb_ch+ii));
664 
665 	compl2 = complementer_base(*(deb_ch+l-ii-1));
666 
667 	*(deb_ch+ii)     = compl2;
668 	*(deb_ch+l-ii-1) = compl1;
669     }
670 
671 
672 }
673 
674 
init_codon_to_aa(char * codon,int gc)675 char init_codon_to_aa(char *codon, int gc)
676 {
677 int num, aa;
678 struct genetic_code_libel *pdata;
679 
680 num = calc_codon_number(codon);
681 if(num >= 64) return 'X';
682 /* use regular code if unknown number */
683 if(gc < 0 || gc >= totcodes) gc = 0;
684 pdata = &genetic_code[gc];
685 aa = pdata->codon_init[num];
686 /* if not listed in expected init codons */
687 if(aa == 0 || aa == 21) aa = pdata->code[num];
688 return aminoacids[aa - 1];
689 }
690 
691 
stop_codon_to_aa(char * codon,int gc)692 char stop_codon_to_aa(char *codon, int gc)
693 {
694 int num, aa;
695 struct genetic_code_libel *pdata;
696 
697 num = calc_codon_number(codon);
698 if(num >= 64) return 'X';
699 /* use regular code if unknown number */
700 if(gc < 0 || gc >= totcodes) gc = 0;
701 pdata = &genetic_code[gc];
702 aa = pdata->code[num];
703 if (aa != 21) {
704 	aa = pdata->codon_init[num];
705 	/* if not listed in expected stop codons */
706 	if (aa != 21) aa = pdata->code[num];
707 }
708 return aminoacids[aa - 1];
709 }
710 
711 
notrail2(char * chaine,int len)712 int notrail2(char *chaine, int len)
713 {
714 len--;
715 while(len>=0 && chaine[len]==' ') len--;
716 return len+1;
717 }
718 
719 
prepch(char * chaine,char ** posmot)720 int prepch(char *chaine, char **posmot)
721 {
722 /*
723 chaine: template a rechercher qui contient des wildcard @
724 posmot: tableau de pointeurs vers char au retour rempli avec des pointeurs adequats qui pointent dans chaine qui ne doit plus etre modifiee
725 valeur rendue: nbre de pointeurs dans tableau posmot
726 */
727 char *pos;
728 int nbrmots;
729 static char wildcard='@';
730 
731 if(strchr(chaine,'@')==NULL) return 0;
732 nbrmots= -1;
733 pos=chaine+strlen(chaine)-1;
734 while( pos>=chaine && *pos==' ' ) pos--;
735 *(pos+1)=0;
736 
737 pos=chaine;
738 while(*pos!=0) {
739 	if(*pos==wildcard) {
740 		posmot[++nbrmots]=NULL;
741 		*pos=0;
742 		while(*(pos+1)==wildcard) pos++;
743 		}
744 	else	{
745 		posmot[++nbrmots]=pos;
746 		while( *(pos+1)!=wildcard && *(pos+1) !=0 ) pos++;
747 		}
748 	pos++;
749 	}
750 return nbrmots+1;
751 }
752 
753 
compch(char * cible,int lcible,char ** posmot,int nbrmots)754 int compch(char *cible, int lcible, char **posmot, int nbrmots)
755 {
756 /*
757 cible: chaine a tester pour presence du template
758 lcible: long. de cible qui n'est pas forcement finie par \0
759 	doit etre <= 150
760 posmot: tableau fabrique par prepch
761 nbrmots: valeur rendue par prepch
762 valeur rendue: 1 ssi template present dans cible, 0 si absent
763 */
764 int num= 0, l, total;
765 char *pos;
766 static char vcible[151];
767 
768 pos=cible+lcible-1;
769 while( pos>=cible && *pos==' ' ) pos--;
770 lcible= (int)(pos-cible+1);
771 memcpy(vcible,cible,lcible);
772 vcible[lcible]=0;
773 cible=vcible;
774 if(posmot[nbrmots-1]==NULL)
775 	total=nbrmots-1;
776 else
777 	total=nbrmots-2;
778 
779 if(posmot[0]!=NULL) { /* comparaison avec mot initial */
780 	l= (int)strlen(posmot[0]);
781 	if(strncmp(cible,posmot[0],l)!=0) return 0;
782 	cible += l;
783 	num++;
784 	}
785 while(num<total) { /* recherche des mots internes */
786 	num++;
787 	pos=strstr(cible,posmot[num]);
788 	if(pos==NULL) return 0;
789 	l= (int)strlen(posmot[num]);
790 	cible = pos+l;
791 	num++;
792 	}
793 if( total==nbrmots-1 ) return 1; /* template se termine par @ */
794 /* test si cible se termine par dernier mot du template */
795 l= (int)strlen(posmot[nbrmots-1]);
796 if( strcmp(vcible+lcible-l,posmot[nbrmots-1]) == 0 ) return 1;
797 return 0;
798 }
799 
800 
chg_acnuc(char * acnucvar,char * gcgacnucvar)801 int chg_acnuc(char *acnucvar, char *gcgacnucvar)
802 /*
803 Changing the values of variables  acnuc  and  gcgacnuc :
804 acnucvar	the new value of acnuc (may be a variable, may be acnuc itself)
805 gcgacnucvar	the new value of gcgacnuc (may be a variable,
806 		may be gcgacnuc itself)
807 returns TRUE if error, FALSE if ok
808 */
809 {
810 static char newacnuc[200], newgcgacnuc[200];
811 char *point;
812 
813 if( strcmp(acnucvar,"acnuc") != 0 ) {
814 	point=getenv(acnucvar);
815 	if(point!=NULL) acnucvar=point;
816 	strcpy(newacnuc,"acnuc=");
817 	strcat(newacnuc,acnucvar);
818 	if( putenv(newacnuc) ) return 1;
819 	}
820 
821 if( strcmp(gcgacnucvar,"gcgacnuc") != 0 ) {
822 	point=getenv(gcgacnucvar);
823 	if(point!=NULL) gcgacnucvar=point;
824 	strcpy(newgcgacnuc,"gcgacnuc=");
825 	strcat(newgcgacnuc,gcgacnucvar);
826 	if( putenv(newgcgacnuc) ) return 1;
827 	}
828 return 0;
829 }
830 
831 
gets_no_echo(char * password,size_t lpw)832 void gets_no_echo(char *password, size_t lpw)
833 {
834 char *p, c;
835 #ifdef unixlike
836 struct termios initialrsettings, newrsettings;
837 int err;
838 #elif defined(WIN32)
839 DWORD mode, savemode, lu;
840 HANDLE hconsole;
841 int err;
842 #endif
843 
844 #ifdef unixlike
845 err = tcgetattr( fileno(stdin), &initialrsettings );
846 if(err == 0) {
847 	newrsettings = initialrsettings;
848 	newrsettings.c_lflag &= ~ECHO;
849 	newrsettings.c_lflag &= ~ICANON;
850 	tcsetattr( fileno(stdin), TCSAFLUSH, &newrsettings );
851 	}
852 #elif defined(WIN32)
853 hconsole = GetStdHandle(STD_INPUT_HANDLE);
854 err = GetConsoleMode(hconsole, &savemode);
855 if(err != 0) {
856 	mode = savemode;
857 	mode &= ~ENABLE_ECHO_INPUT;
858 	mode &= ~ENABLE_LINE_INPUT;
859 	SetConsoleMode(hconsole, mode);
860 	FlushConsoleInputBuffer(hconsole); /* necessary */
861 	}
862 #endif
863 p = password;
864 do {
865 #ifdef WIN32
866 	ReadConsole(hconsole, &c, 1, &lu, NULL);
867 #else
868 	c = getchar();
869 #endif
870 	if(c == EOF || c == '\n' || c == '\r') break;
871 	if(c != '\b' && c != '\x7F' ) {
872 		*(p++) = c;
873 		putchar('*');
874 		}
875 	else {
876 		if(p > password) {
877 			p--;
878 			putchar('\b');putchar(' ');putchar('\b');
879 			}
880 		}
881 	}
882 while(p - password < lpw);
883 *p = 0;
884 #ifdef unixlike
885 if(err == 0) {
886 	tcsetattr( fileno(stdin), TCSANOW, &initialrsettings );
887 	putchar('\n');
888 	}
889 #elif defined(WIN32)
890 if(err != 0) {
891 	SetConsoleMode(hconsole, savemode);
892 	putchar('\n');
893 	}
894 #endif
895 return;
896 }
897 
898 
899