1
2 #include <stdlib.h>
3 #include <stdio.h>
4 #include <string.h>
5 #include <ctype.h>
6 #if defined(WIN32)
7 #include <Windows.h>
8 #elif defined(unix) || defined(__APPLE__)
9 #define unixlike
10 #include <termios.h>
11 #endif
12
13
14
15 /* included functions */
16 int trim_key(char *name); /* remove trailing spaces */
17 void padtosize(char *pname, char *name, int length);
18 char *get_code_descr(int code);
19 int calc_codon_number(char *codon);
20 char codaa(char *codon, int code);
21 int get_ncbi_gc_number(int gc);
22 int get_acnuc_gc_number(int ncbi_gc);
23 int strcmptrail(char *s1, int l1, char *s2, int l2);
24 void majuscules(char *name);
25 void compact(char *chaine);
26 void *mycalloc(int nbr, size_t taille);
27 char complementer_base(char nucl);
28 void complementer_seq(char *deb_ch, int l);
29 char init_codon_to_aa(char *codon, int gc);
30 char stop_codon_to_aa(char *codon, int gc);
31 int notrail2(char *chaine, int len);
32 int prepch(char *chaine, char **posmot);
33 int compch(char *cible, int lcible, char **posmot, int nbrmots);
34 int chg_acnuc(char *acnucvar, char *gcgacnucvar);
35 void gets_no_echo(char *password, size_t lpw);
36
37
trim_key(char * name)38 int trim_key(char *name) /* remove trailing spaces */
39 {
40 char *p;
41 int l = (int)strlen(name);
42 p = name + l - 1;
43 while( p >= name && *p == ' ' ) *(p--) = 0;
44 return (int)((p + 1) - name);
45 }
46
47
padtosize(char * pname,char * name,int length)48 void padtosize(char *pname, char *name, int length)
49 {
50 int i;
51 strncpy(pname,name,length);
52 pname[length]=0;
53 for(i=(int)strlen(pname);i<length;i++) pname[i]=' ';
54 }
55
56
57 #define TOTCODES 27 /* nbre total de codes definis, 0 inclus */
58 int totcodes=TOTCODES;
59
60 char aminoacids[]="RLSTPAGVKNQHEDYCFIMW*X";
61
62 struct genetic_code_libel { /* definition d'un code genetique */
63 char libel[61]; /* nom du code decrivant ses variants % code standard */
64 char* target;
65 int code[65]; /* tableau codon->acide amine */
66 int ncbi_gc; /* numero NCBI du meme code */
67 int codon_init[64]; /* tableau codon initiateur ou stop -> acide amine */
68 };
69
70 /*
71 les codons sont numerotes de 1 a 64 selon ordre alphabetique;
72 le numero 65 est attribue a tout codon avec base hors AcCcGgTtUu
73 les acides amines sont numerotes selon l'ordre de la variable aminoacids
74 de un a 20 + * pour stop et X pour inconnu.
75 Table codon_init gives 19 (=M) for initiation codons and 21 (=*) for stop codons.
76 In some genetic codes, the same codon corresponds to an aa in table code
77 and to a stop in table codon_init.
78 */
79
80 /* initialisation de tous les codes genetiques */
81 struct genetic_code_libel genetic_code[TOTCODES] =
82 {
83
84 { /* 0: standard */
85 {"Standard genetic code"},
86 "Standard",
87 /*ANN*/ {9,10,9,10,4,4,4,4,1,3,1,3,18,18,19,18,
88 /*CNN*/ 11,12,11,12,5,5,5,5,1,1,1,1,2,2,2,2,
89 /*GNN*/ 13,14,13,14,6,6,6,6,7,7,7,7,8,8,8,8,
90 /*TNN*/ 21,15,21,15,3,3,3,3,21,16,20,16,2,17,2,17,22},
91 /*ncbi*/1,
92 /*init*/{0,0,0,0,0,0,0,0,0,0,0,0,0,0,19,0, /* AUG */
93 0,0,0,0,0,0,0,0,0,0,0,0,0,0,19,0, /* CUG */
94 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
95 21,0,21,0,0,0,0,0,21,0,0,0,0,0,19,0} /* UUG */
96 }
97 ,
98 { /* 1: yeast mt */
99 {"CUN=T AUA=M UGA=W"},
100 "Yeast Mitochondrial",
101 {9,10,9,10,4,4,4,4,1,3,1,3,19,18,19,18,
102 11,12,11,12,5,5,5,5,1,1,1,1,4,4,4,4,
103 13,14,13,14,6,6,6,6,7,7,7,7,8,8,8,8,
104 21,15,21,15,3,3,3,3,20,16,20,16,2,17,2,17,22},
105 3,
106 {0,0,0,0,0,0,0,0,0,0,0,0,19,0,19,0, /* AUA, AUG */
107 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
108 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
109 21,0,21,0,0,0,0,0,0,0,0,0,0,0,0,0}
110 }
111 ,
112 { /* 2: : MITOCHONDRIAL CODE OF VERTEBRATES */
113 {"AGR=* AUA=M UGA=W"},
114 "Vertebrate Mitochondrial",
115 {9,10,9,10,4,4,4,4,21,3,21,3,19,18,19,18,11,12,11,12,
116 5,5,5,5,1,1,1,1,2,2,2,2,13,14,13,14,6,6,6,6,7,7,7,7,8,8,8,8,21,15,
117 21,15,3,3,3,3,20,16,20,16,2,17,2,17,22},
118 2,
119 {0,0,0,0,0,0,0,0,21,0,21,0,19,19,19,19, /* AUN */
120 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
121 0,0,0,0,0,0,0,0,0,0,0,0,0,0,19,0, /* GUG */
122 21,0,21,0,0,0,0,0,0,0,0,0,0,0,0,0}
123 }
124 ,
125 { /* 3: MITOCHONDRIAL CODE OF FILAMENTOUS FUNGI */
126 {"UGA=W"},
127 "Mold Mitochondrial; Protozoan Mitochondrial; Coelenterate Mitochondrial; Mycoplasma; Spiroplasma",
128 {9,10,9,10,4,4,4,4,1,3,1,3,18,18,19,18,11,12,11,12,5,5,5,
129 5,1,1,1,1,2,2,2,2,13,14,13,14,6,6,6,6,7,7,7,7,8,8,8,8,21,15,21,
130 15,3,3,3,3,20,16,20,16,2,17,2,17,22},
131 4,
132 {0,0,0,0,0,0,0,0,0,0,0,0,19,19,19,19, /* AUN */
133 0,0,0,0,0,0,0,0,0,0,0,0,0,0,19,0, /* CUG */
134 0,0,0,0,0,0,0,0,0,0,0,0,0,0,19,0, /* GUG */
135 21,0,21,0,0,0,0,0,0,0,0,0,19,0,19,0} /* UUR */
136 }
137 ,
138 { /* 4: MITOCHONDRIAL CODE OF INSECT AND PLATYHELMINTHES */
139 {"AUA=M UGA=W AGR=S"},
140 "Invertebrate Mitochondrial",
141 {9,10,9,10,4,4,4,4,3,3,3,3,19,18,19,18,11,12,11,12,5,5,5,
142 5,1,1,1,1,2,2,2,2,13,14,13,14,6,6,6,6,7,7,7,7,8,8,8,8,21,15,21,
143 15,3,3,3,3,20,16,20,16,2,17,2,17,22},
144 5,
145 {0,0,0,0,0,0,0,0,0,0,0,0,19,19,19,19, /* AUN */
146 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
147 0,0,0,0,0,0,0,0,0,0,0,0,0,0,19,0, /* GUG */
148 21,0,21,0,0,0,0,0,0,0,0,0,0,0,19,0} /* UUG */
149 }
150 ,
151 { /* 5: Nuclear code of Candida cylindracea (see nature 341:164) */
152 {"CUG=S"},
153 "Alternative Yeast Nuclear",
154 {9,10,9,10,4,4,4,4,1,3,1,3,18,18,19,18,
155 11,12,11,12,5,5,5,5,1,1,1,1,2,2,3,2,
156 13,14,13,14,6,6,6,6,7,7,7,7,8,8,8,8,
157 21,15,21,15,3,3,3,3,21,16,20,16,2,17,2,17,22},
158 12,
159 {0,0,0,0,0,0,0,0,0,0,0,0,0,0,19,0, /* AUG */
160 0,0,0,0,0,0,0,0,0,0,0,0,0,0,19,0, /* CUG */
161 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
162 21,0,21,0,0,0,0,0,21,0,0,0,0,0,0,0}
163 }
164 ,
165 { /* 6: NUCLEAR CODE OF CILIATA: UAR = Gln = Q */
166 {"UAR=Q"},
167 "Ciliate Nuclear; Dasycladacean Nuclear; Hexamita Nuclear",
168 {9,10,9,10,4,4,4,4,1,3,1,3,18,18,19,18,11,12,11,12,5,5,5,
169 5,1,1,1,1,2,2,2,2,13,14,13,14,6,6,6,6,7,7,7,7,8,8,8,8,11,15,11,
170 15,3,3,3,3,21,16,20,16,2,17,2,17,22},
171 6,
172 {0,0,0,0,0,0,0,0,0,0,0,0,0,0,19,0, /* AUG */
173 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
174 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
175 0,0,0,0,0,0,0,0,21,0,0,0,0,0,0,0}
176 }
177 ,
178 { /* 7: NUCLEAR CODE OF EUPLOTES */
179 {"UGA=C"},
180 "Euplotid Nuclear",
181 {9,10,9,10,4,4,4,4,1,3,1,3,18,18,19,18,11,12,11,12,5,5,5,
182 5,1,1,1,1,2,2,2,2,13,14,13,14,6,6,6,6,7,7,7,7,8,8,8,8,21,15,21,
183 15,3,3,3,3,16,16,20,16,2,17,2,17,22},
184 10,
185 {0,0,0,0,0,0,0,0,0,0,0,0,0,0,19,0, /* AUG */
186 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
187 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
188 21,0,21,0,0,0,0,0,0,0,0,0,0,0,0,0}
189 }
190 ,
191 { /* 8: MITOCHONDRIAL CODE OF ECHINODERMS */
192 {"UGA=W AGR=S AAA=N"},
193 "Echinoderm Mitochondrial; Flatworm Mitochondrial",
194 {10,10,9,10,4,4,4,4,3,3,3,3,18,18,19,18,
195 11,12,11,12,5,5,5,5,1,1,1,1,2,2,2,2,
196 13,14,13,14,6,6,6,6,7,7,7,7,8,8,8,8,
197 21,15,21,15,3,3,3,3,20,16,20,16,2,17,2,17,22},
198 9,
199 {0,0,0,0,0,0,0,0,0,0,0,0,0,0,19,0, /* AUG */
200 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
201 0,0,0,0,0,0,0,0,0,0,0,0,0,0,19,0, /* GUG */
202 21,0,21,0,0,0,0,0,0,0,0,0,0,0,0,0}
203 }
204 ,
205 { /* 9: MITOCHONDRIAL CODE OF ASCIDIACEA */
206 {"UGA=W AGR=G AUA=M"},
207 "Ascidian Mitochondrial",
208 {9,10,9,10,4,4,4,4,7,3,7,3,19,18,19,18,
209 11,12,11,12,5,5,5,5,1,1,1,1,2,2,2,2,
210 13,14,13,14,6,6,6,6,7,7,7,7,8,8,8,8,
211 21,15,21,15,3,3,3,3,20,16,20,16,2,17,2,17,22},
212 13,
213 {0,0,0,0,0,0,0,0,0,0,0,0,19,0,19,0, /* AUA,AUG */
214 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
215 0,0,0,0,0,0,0,0,0,0,0,0,0,0,19,0, /* GUG */
216 21,0,21,0,0,0,0,0,0,0,0,0,0,0,19,0} /* UUG */
217 }
218 ,
219 { /* 10: MITOCHONDRIAL CODE OF PLATYHELMINTHES */
220 {"UGA=W AGR=S UAA=Y AAA=N"},
221 "Alternative Flatworm Mitochondrial",
222 {10,10,9,10,4,4,4,4,3,3,3,3,18,18,19,18,
223 11,12,11,12,5,5,5,5,1,1,1,1,2,2,2,2,
224 13,14,13,14,6,6,6,6,7,7,7,7,8,8,8,8,
225 15,15,21,15,3,3,3,3,20,16,20,16,2,17,2,17,22},
226 14,
227 {0,0,0,0,0,0,0,0,0,0,0,0,0,0,19,0, /* AUG */
228 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
229 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
230 0,0,21,0,0,0,0,0,0,0,0,0,0,0,0,0}
231 }
232 ,
233 { /* 11: NUCLEAR CODE OF BLEPHARISMA */
234 {"UAG=Q"},
235 "Blepharisma Macronuclear",
236 /*ANN*/ {9,10,9,10,4,4,4,4,1,3,1,3,18,18,19,18,
237 /*CNN*/ 11,12,11,12,5,5,5,5,1,1,1,1,2,2,2,2,
238 /*GNN*/ 13,14,13,14,6,6,6,6,7,7,7,7,8,8,8,8,
239 /*TNN*/ 21,15,11,15,3,3,3,3,21,16,20,16,2,17,2,17,22},
240 15,
241 {0,0,0,0,0,0,0,0,0,0,0,0,0,0,19,0, /* AUG */
242 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
243 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
244 21,0,0,0,0,0,0,0,21,0,0,0,0,0,0,0}
245 }
246 ,
247 { /* 12: NUCLEAR CODE OF BACTERIA: differs only for initiation codons */
248 {"NUG=AUN=M when initiation codon"},
249 "Bacterial, Archaeal and Plant Plastid",
250 /*ANN*/ {9,10,9,10,4,4,4,4,1,3,1,3,18,18,19,18,
251 /*CNN*/ 11,12,11,12,5,5,5,5,1,1,1,1,2,2,2,2,
252 /*GNN*/ 13,14,13,14,6,6,6,6,7,7,7,7,8,8,8,8,
253 /*TNN*/ 21,15,21,15,3,3,3,3,21,16,20,16,2,17,2,17,22},
254 11,
255 {0,0,0,0,0,0,0,0,0,0,0,0,19,19,19,19, /* AUN */
256 0,0,0,0,0,0,0,0,0,0,0,0,0,0,19,0, /* CUG */
257 0,0,0,0,0,0,0,0,0,0,0,0,0,0,19,0, /* GUG */
258 21,0,21,0,0,0,0,0,21,0,0,0,0,0,19,0} /* UUG */
259 }
260 ,
261 { /* 13: Chlorophycean Mitochondrial */
262 {"UAG=Leu"},
263 "Chlorophycean Mitochondrial",
264 /*ANN*/ {9,10,9,10,4,4,4,4,1,3,1,3,18,18,19,18,
265 /*CNN*/ 11,12,11,12,5,5,5,5,1,1,1,1,2,2,2,2,
266 /*GNN*/ 13,14,13,14,6,6,6,6,7,7,7,7,8,8,8,8,
267 /*TNN*/ 21,15,2,15,3,3,3,3,21,16,20,16,2,17,2,17,22},
268 /*ncbi*/16,
269 /*init*/{0,0,0,0,0,0,0,0,0,0,0,0,0,0,19,0, /* AUG */
270 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
271 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
272 21,0,0,0,0,0,0,0,21,0,0,0,0,0,0,0}
273 }
274 ,
275 { /* 14: MITOCHONDRIAL CODE OF TREMATODE */
276 {"AUA=M UGA=W AGR=S AAA=N"},
277 "Trematode Mitochondrial",
278 {10,10,9,10,4,4,4,4,3,3,3,3,19,18,19,18,11,12,11,12,5,5,5,
279 5,1,1,1,1,2,2,2,2,13,14,13,14,6,6,6,6,7,7,7,7,8,8,8,8,21,15,21,
280 15,3,3,3,3,20,16,20,16,2,17,2,17,22},
281 21,
282 {0,0,0,0,0,0,0,0,0,0,0,0,0,0,19,0, /* AUG */
283 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
284 0,0,0,0,0,0,0,0,0,0,0,0,0,0,19,0, /* GUG */
285 21,0,21,0,0,0,0,0,0,0,0,0,0,0,0,0}
286 }
287 ,
288 { /* 15: TAG-Leu,TCA-stop */
289 {"UAG=L UCA=*"},
290 "Scenedesmus obliquus mitochondrial",
291 /*ANN*/ {9,10,9,10,4,4,4,4,1,3,1,3,18,18,19,18,
292 /*CNN*/ 11,12,11,12,5,5,5,5,1,1,1,1,2,2,2,2,
293 /*GNN*/ 13,14,13,14,6,6,6,6,7,7,7,7,8,8,8,8,
294 /*TNN*/ 21,15,2,15,21,3,3,3,21,16,20,16,2,17,2,17,22},
295 /*ncbi*/22,
296 /*init*/{0,0,0,0,0,0,0,0,0,0,0,0,0,0,19,0, /* AUG */
297 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
298 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
299 21,0,0,0,21,0,0,0,21,0,0,0,0,0,0,0}
300 }
301 ,
302 { /* 16: Thraustochytrium-mt */
303 {"UUA=*"},
304 "Thraustochytrium mitochondrial",
305 /*ANN*/ {9,10,9,10,4,4,4,4,1,3,1,3,18,18,19,18,
306 /*CNN*/ 11,12,11,12,5,5,5,5,1,1,1,1,2,2,2,2,
307 /*GNN*/ 13,14,13,14,6,6,6,6,7,7,7,7,8,8,8,8,
308 /*TNN*/ 21,15,21,15,3,3,3,3,21,16,20,16,21,17,2,17,22},
309 /*ncbi*/23,
310 /*init*/{0,0,0,0,0,0,0,0,0,0,0,0,0,0,19,19, /* AUG AUU */
311 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
312 0,0,0,0,0,0,0,0,0,0,0,0,0,0,19,0, /* GUG */
313 21,0,21,0,0,0,0,0,21,0,0,0,21,0,0,0}
314 }
315 ,
316 { /* 17: MITOCHONDRIAL CODE OF Pterobranchia */
317 {"UGA=W AGA=S AGG=K"},
318 "Pterobranchia Mitochondrial",
319 /*ANN*/ {9,10,9,10,4,4,4,4,3,3,9,3,18,18,19,18,
320 /*CNN*/ 11,12,11,12,5,5,5,5,1,1,1,1,2,2,2,2,
321 /*GNN*/ 13,14,13,14,6,6,6,6,7,7,7,7,8,8,8,8,
322 /*TNN*/ 21,15,21,15,3,3,3,3,20,16,20,16,2,17,2,17,22},
323 /*ncbi*/24,
324 /*init*/{0,0,0,0,0,0,0,0,0,0,0,0,0,0,19,0, /* AUG */
325 0,0,0,0,0,0,0,0,0,0,0,0,0,0,19,0, /* CUG */
326 0,0,0,0,0,0,0,0,0,0,0,0,0,0,19,0, /* GUG */
327 21,0,21,0,0,0,0,0,0,0,0,0,0,0,19,0} /* UUG */
328 }
329 ,
330 { /* 18: Candidate Division SR1 and Gracilibacteria */
331 {"UGA=G"},
332 "Candidate Division SR1 and Gracilibacteria",
333 /*ANN*/ {9,10,9,10,4,4,4,4,1,3,1,3,18,18,19,18,
334 /*CNN*/ 11,12,11,12,5,5,5,5,1,1,1,1,2,2,2,2,
335 /*GNN*/ 13,14,13,14,6,6,6,6,7,7,7,7,8,8,8,8,
336 /*TNN*/ 21,15,21,15,3,3,3,3,7,16,20,16,2,17,2,17,22},
337 /*ncbi*/25,
338 /*init*/{0,0,0,0,0,0,0,0,0,0,0,0,0,0,19,0, /* AUG */
339 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
340 0,0,0,0,0,0,0,0,0,0,0,0,0,0,19,0, /* GUG */
341 21,0,21,0,0,0,0,0,0,0,0,0,0,0,19,0} /* UUG */
342 }
343 ,
344 { /* 19: Pachysolen tannophilus */
345 {"CUG=A"},
346 "Pachysolen tannophilus Nuclear",
347 /*ANN*/ {9,10,9,10,4,4,4,4,1,3,1,3,18,18,19,18,
348 /*CNN*/ 11,12,11,12,5,5,5,5,1,1,1,1,2,2,6,2,
349 /*GNN*/ 13,14,13,14,6,6,6,6,7,7,7,7,8,8,8,8,
350 /*TNN*/ 21,15,21,15,3,3,3,3,21,16,20,16,2,17,2,17,22},
351 /*ncbi*/26,
352 /*init*/{0,0,0,0,0,0,0,0,0,0,0,0,0,0,19,0, /* AUG */
353 0,0,0,0,0,0,0,0,0,0,0,0,0,0,19,0, /* CUG */
354 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
355 21,0,21,0,0,0,0,0,21,0,0,0,0,0,0,0}
356 }
357 ,
358 { /* 20: Karyorelict Nuclear */
359 {"UAR=Q, UGA=W, CUG=A"},
360 "Karyorelict Nuclear",
361 /*ANN*/ {9,10,9,10,4,4,4,4,1,3,1,3,18,18,19,18,
362 /*CNN*/ 11,12,11,12,5,5,5,5,1,1,1,1,2,2,6,2,
363 /*GNN*/ 13,14,13,14,6,6,6,6,7,7,7,7,8,8,8,8,
364 /*TNN*/ 11,15,11,15,3,3,3,3,20,16,20,16,2,17,2,17,22},
365 /*ncbi*/27,
366 /*init*/{0,0,0,0,0,0,0,0,0,0,0,0,0,0,19,0, /* AUG */
367 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
368 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
369 0,0,0,0,0,0,0,0,21,0,0,0,0,0,0,0}
370 }
371 ,
372 { /* 21: Condylostoma Nuclear */
373 {"UAR=Q, UGA=W, CUG=A"},
374 "Condylostoma Nuclear",
375 /*ANN*/ {9,10,9,10,4,4,4,4,1,3,1,3,18,18,19,18,
376 /*CNN*/ 11,12,11,12,5,5,5,5,1,1,1,1,2,2,6,2,
377 /*GNN*/ 13,14,13,14,6,6,6,6,7,7,7,7,8,8,8,8,
378 /*TNN*/ 11,15,11,15,3,3,3,3,20,16,20,16,2,17,2,17,22},
379 /*ncbi*/28,
380 /*init*/{0,0,0,0,0,0,0,0,0,0,0,0,0,0,19,0, /* AUG */
381 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
382 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
383 21,0,21,0,0,0,0,0,21,0,0,0,0,0,0,0}
384 }
385 ,
386 { /* 22: Mesodinium Nuclear */
387 {"UAR=Y, CUG=A"},
388 "Mesodinium Nuclear",
389 /*ANN*/ {9,10,9,10,4,4,4,4,1,3,1,3,18,18,19,18,
390 /*CNN*/ 11,12,11,12,5,5,5,5,1,1,1,1,2,2,6,2,
391 /*GNN*/ 13,14,13,14,6,6,6,6,7,7,7,7,8,8,8,8,
392 /*TNN*/ 15,15,15,15,3,3,3,3,21,16,20,16,2,17,2,17,22},
393 /*ncbi*/29,
394 /*init*/{0,0,0,0,0,0,0,0,0,0,0,0,0,0,19,0, /* AUG */
395 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
396 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
397 0,0,0,0,0,0,0,0,21,0,0,0,0,0,0,0}
398 }
399 ,
400 { /* 23: Peritrich Nuclear */
401 {"UAR=E, CUG=A"},
402 "Peritrich Nuclear",
403 /*ANN*/ {9,10,9,10,4,4,4,4,1,3,1,3,18,18,19,18,
404 /*CNN*/ 11,12,11,12,5,5,5,5,1,1,1,1,2,2,6,2,
405 /*GNN*/ 13,14,13,14,6,6,6,6,7,7,7,7,8,8,8,8,
406 /*TNN*/ 13,15,13,15,3,3,3,3,21,16,20,16,2,17,2,17,22},
407 /*ncbi*/30,
408 /*init*/{0,0,0,0,0,0,0,0,0,0,0,0,0,0,19,0, /* AUG */
409 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
410 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
411 0,0,0,0,0,0,0,0,21,0,0,0,0,0,0,0}
412 }
413 ,
414 { /* 24: Blastocrithidia Nuclear */
415 {"UAR=E, UGA=W"},
416 "Blastocrithidia Nuclear",
417 /*ANN*/ {9,10,9,10,4,4,4,4,1,3,1,3,18,18,19,18,
418 /*CNN*/ 11,12,11,12,5,5,5,5,1,1,1,1,2,2,2,2,
419 /*GNN*/ 13,14,13,14,6,6,6,6,7,7,7,7,8,8,8,8,
420 /*TNN*/ 13,15,13,15,3,3,3,3,20,16,20,16,2,17,2,17,22},
421 /*ncbi*/31,
422 /*init*/{0,0,0,0,0,0,0,0,0,0,0,0,0,0,19,0, /* AUG */
423 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
424 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
425 21,0,21,0,0,0,0,0,0,0,0,0,0,0,0,0}
426 }
427 ,
428 { /* 25: Balanophoraceae Plastid */
429 {"UAG=W"},
430 "Balanophoraceae Plastid",
431 /*ANN*/ {9,10,9,10,4,4,4,4,1,3,1,3,18,18,19,18,
432 /*CNN*/ 11,12,11,12,5,5,5,5,1,1,1,1,2,2,2,2,
433 /*GNN*/ 13,14,13,14,6,6,6,6,7,7,7,7,8,8,8,8,
434 /*TNN*/ 21,15,20,15,3,3,3,3,21,16,20,16,2,17,2,17,22},
435 /*ncbi*/32,
436 /*init*/{0,0,0,0,0,0,0,0,0,0,0,0,19,19,19,19, /* AUN */
437 0,0,0,0,0,0,0,0,0,0,0,0,0,0,19,0, /* CUG */
438 0,0,0,0,0,0,0,0,0,0,0,0,0,0,19,0, /* GUG */
439 21,0,0,0,0,0,0,0,21,0,0,0,0,0,19,0} /* UUG */
440 }
441 ,
442 { /* 26: Cephalodiscidae Mitochondria */
443 {"UAA=Y, UGA=W, AGA=S, AGG=K"},
444 "Cephalodiscidae Mitochondrial",
445 /*ANN*/ {9,10,9,10,4,4,4,4,3,3,9,3,18,18,19,18,
446 /*CNN*/ 11,12,11,12,5,5,5,5,1,1,1,1,2,2,2,2,
447 /*GNN*/ 13,14,13,14,6,6,6,6,7,7,7,7,8,8,8,8,
448 /*TNN*/ 15,15,21,15,3,3,3,3,20,16,20,16,2,17,2,17,22},
449 /*ncbi*/33,
450 /*init*/{0,0,0,0,0,0,0,0,0,0,0,0,0,0,19,0, /* AUG */
451 0,0,0,0,0,0,0,0,0,0,0,0,0,0,19,0, /* CUG */
452 0,0,0,0,0,0,0,0,0,0,0,0,0,0,19,0, /* GUG */
453 0,0,21,0,0,0,0,0,0,0,0,0,0,0,19,0} /* UUG */
454 }
455
456 /* 1 2
457 1234567890123456789012
458 RLSTPAGVKNQHEDYCFIMW*X
459 */
460
461 };
462
463
get_code_descr(int code)464 char *get_code_descr(int code)
465 /*
466 get a 60-letter (or less) description of a variant genetic code
467 return value pointer to the description, not to be altered!
468 */
469 {
470 if(code>=0 && code<totcodes)
471 return genetic_code[code].libel ;
472 else
473 return "Unknown genetic code. Standard code is used.";
474 }
475
476
get_code_target(int code)477 char *get_code_target(int code)
478 /*
479 get a description of target organisms of a variant genetic code
480 return value pointer to the description, not to be altered!
481 */
482 {
483 if(code >= 0 && code < totcodes)
484 return genetic_code[code].target ;
485 else
486 return "Unknown genetic code. Standard code is used.";
487 }
488
489
calc_codon_number(char * codon)490 int calc_codon_number(char *codon)
491 {
492 static char nucleotides[] = "AaCcGgTtUu";
493 static int nucnum[5] = {0,1,2,3,3};
494 int num, i, base;
495 char *p;
496
497 num = 0;
498 for(i = 1; i <= 3; i++) {
499 p = strchr(nucleotides, *codon);
500 if(p == NULL) {
501 num = 64;
502 break;
503 }
504 else
505 base = (int)(p-nucleotides)/2;
506 num = num * 4 + nucnum[base];
507 codon++;
508 }
509 return num;
510 }
511
512
codaa(char * codon,int code)513 char codaa(char *codon, int code)
514 /*
515 amino acid translation:
516 codon a 3-base string
517 code the genetic code to be used
518 return value the amino acid as 1 character
519 */
520 {
521 struct genetic_code_libel *pdata;
522 int num;
523
524 num = calc_codon_number(codon);
525 if(code < 0 || code >= totcodes)code = 0;/*use regular code if unknown number */
526 pdata = &genetic_code[code]; /* ici ecriture plus compacte mal compilee sur PC*/
527 return aminoacids[ pdata->code[num] - 1 ];
528 }
529
530
get_ncbi_gc_number(int gc)531 int get_ncbi_gc_number(int gc)
532 { /* from acnuc to ncbi genetic code number */
533 return genetic_code[gc].ncbi_gc;
534 }
535
536
get_acnuc_gc_number(int ncbi_gc)537 int get_acnuc_gc_number(int ncbi_gc)
538 { /* from ncbi to acnuc genetic code number (returns 0 if not found) */
539 int num;
540
541 for( num = 0; num < totcodes; num++ )
542 if(genetic_code[num].ncbi_gc == ncbi_gc) return num;
543 return 0;
544 }
545
546
strcmptrail(char * s1,int l1,char * s2,int l2)547 int strcmptrail(char *s1, int l1, char *s2, int l2)
548 /*
549 compare strings s1 and s2 of length l1 and l2 as done by strcmp
550 but ignores all trailing spaces
551 */
552 {
553 char *fin;
554 int l, flag=1;
555
556 if(l1 > 0) {
557 if( (fin = (char *)memchr(s1, 0, l1) ) != NULL) l1 = (int)(fin - s1);
558 }
559 if(l2 > 0) {
560 if( (fin = (char *)memchr(s2, 0, l2) ) != NULL) l2 = (int)(fin - s2);
561 }
562
563 if(l2 > l1) {
564 flag = -1;
565 fin=s1; s1=s2; s2=fin;
566 l=l1; l1=l2; l2=l;
567 }
568 l = l2;
569 fin = s2 + l;
570 while(s2 < fin) {
571 if( *s1 != *s2 ) return (*s1 - *s2)*flag;
572 s1++; s2++;
573 }
574 fin= s1+l1-l2;
575 while(s1 < fin) {
576 if( *s1 != ' ') return flag;
577 s1++;
578 }
579 return 0;
580 }
581
582
majuscules(char * name)583 void majuscules(char *name)
584 {
585 name--;
586 while(*(++name) != 0) *name = toupper(*name);
587 }
588
589
compact(char * chaine)590 void compact(char *chaine)
591 {
592 int l;
593 char *p, *q;
594
595 l= (int)strlen(chaine); p=chaine+l;
596 while( *(--p) == ' ' && p>=chaine) *p=0;
597 while((p=strchr(chaine,' '))!=NULL) {
598 q=p+1;
599 while(*q==' ') q++;
600 l= (int)(q-p);
601 while(*q!=0) {*(q-l) = *q; q++; }
602 *(q-l)=0;
603 }
604 }
605
606
607
mycalloc(int nbre,size_t size)608 void *mycalloc(int nbre, size_t size)
609 {
610 void *point;
611
612 point = calloc((unsigned)nbre, size);
613 if(point == NULL) {
614 fprintf(stderr,"Error: problem allocating memory.\n");
615 exit(1);
616 }
617 return point;
618 }
619
620
complementer_base(char nucl)621 char complementer_base(char nucl)
622 {
623 switch (nucl) {
624 case 'a':
625 case 'A': return('t');
626
627 case 'c':
628 case 'C': return('g');
629
630 case 'g':
631 case 'G': return('c');
632
633 case 'u':
634 case 'U':
635 case 't':
636 case 'T': return('a');
637
638 case 'r':
639 case 'R': return('y');
640
641 case 'y':
642 case 'Y': return('r');
643
644 default : return('n');
645
646 }
647 }
648
649
650 /* ~~~~~~~~~~~~ retourne le complementaire d'une sequence ~~~~~~~~~~~
651 * recoit l'adresse du debut d'un tableau de caractere et sa longueur
652 * inverse et complemente cette sequence
653 * prend en compte si c'est un adn ou un arn
654 * -------------------------------------------------------------------- */
655
complementer_seq(char * deb_ch,int l)656 void complementer_seq(char *deb_ch, int l)
657 {
658 int ii = 0;
659 char compl1,compl2;
660
661 for(ii = 0; ii <= (l-1)/2; ii++)
662 {
663 compl1 = complementer_base(*(deb_ch+ii));
664
665 compl2 = complementer_base(*(deb_ch+l-ii-1));
666
667 *(deb_ch+ii) = compl2;
668 *(deb_ch+l-ii-1) = compl1;
669 }
670
671
672 }
673
674
init_codon_to_aa(char * codon,int gc)675 char init_codon_to_aa(char *codon, int gc)
676 {
677 int num, aa;
678 struct genetic_code_libel *pdata;
679
680 num = calc_codon_number(codon);
681 if(num >= 64) return 'X';
682 /* use regular code if unknown number */
683 if(gc < 0 || gc >= totcodes) gc = 0;
684 pdata = &genetic_code[gc];
685 aa = pdata->codon_init[num];
686 /* if not listed in expected init codons */
687 if(aa == 0 || aa == 21) aa = pdata->code[num];
688 return aminoacids[aa - 1];
689 }
690
691
stop_codon_to_aa(char * codon,int gc)692 char stop_codon_to_aa(char *codon, int gc)
693 {
694 int num, aa;
695 struct genetic_code_libel *pdata;
696
697 num = calc_codon_number(codon);
698 if(num >= 64) return 'X';
699 /* use regular code if unknown number */
700 if(gc < 0 || gc >= totcodes) gc = 0;
701 pdata = &genetic_code[gc];
702 aa = pdata->code[num];
703 if (aa != 21) {
704 aa = pdata->codon_init[num];
705 /* if not listed in expected stop codons */
706 if (aa != 21) aa = pdata->code[num];
707 }
708 return aminoacids[aa - 1];
709 }
710
711
notrail2(char * chaine,int len)712 int notrail2(char *chaine, int len)
713 {
714 len--;
715 while(len>=0 && chaine[len]==' ') len--;
716 return len+1;
717 }
718
719
prepch(char * chaine,char ** posmot)720 int prepch(char *chaine, char **posmot)
721 {
722 /*
723 chaine: template a rechercher qui contient des wildcard @
724 posmot: tableau de pointeurs vers char au retour rempli avec des pointeurs adequats qui pointent dans chaine qui ne doit plus etre modifiee
725 valeur rendue: nbre de pointeurs dans tableau posmot
726 */
727 char *pos;
728 int nbrmots;
729 static char wildcard='@';
730
731 if(strchr(chaine,'@')==NULL) return 0;
732 nbrmots= -1;
733 pos=chaine+strlen(chaine)-1;
734 while( pos>=chaine && *pos==' ' ) pos--;
735 *(pos+1)=0;
736
737 pos=chaine;
738 while(*pos!=0) {
739 if(*pos==wildcard) {
740 posmot[++nbrmots]=NULL;
741 *pos=0;
742 while(*(pos+1)==wildcard) pos++;
743 }
744 else {
745 posmot[++nbrmots]=pos;
746 while( *(pos+1)!=wildcard && *(pos+1) !=0 ) pos++;
747 }
748 pos++;
749 }
750 return nbrmots+1;
751 }
752
753
compch(char * cible,int lcible,char ** posmot,int nbrmots)754 int compch(char *cible, int lcible, char **posmot, int nbrmots)
755 {
756 /*
757 cible: chaine a tester pour presence du template
758 lcible: long. de cible qui n'est pas forcement finie par \0
759 doit etre <= 150
760 posmot: tableau fabrique par prepch
761 nbrmots: valeur rendue par prepch
762 valeur rendue: 1 ssi template present dans cible, 0 si absent
763 */
764 int num= 0, l, total;
765 char *pos;
766 static char vcible[151];
767
768 pos=cible+lcible-1;
769 while( pos>=cible && *pos==' ' ) pos--;
770 lcible= (int)(pos-cible+1);
771 memcpy(vcible,cible,lcible);
772 vcible[lcible]=0;
773 cible=vcible;
774 if(posmot[nbrmots-1]==NULL)
775 total=nbrmots-1;
776 else
777 total=nbrmots-2;
778
779 if(posmot[0]!=NULL) { /* comparaison avec mot initial */
780 l= (int)strlen(posmot[0]);
781 if(strncmp(cible,posmot[0],l)!=0) return 0;
782 cible += l;
783 num++;
784 }
785 while(num<total) { /* recherche des mots internes */
786 num++;
787 pos=strstr(cible,posmot[num]);
788 if(pos==NULL) return 0;
789 l= (int)strlen(posmot[num]);
790 cible = pos+l;
791 num++;
792 }
793 if( total==nbrmots-1 ) return 1; /* template se termine par @ */
794 /* test si cible se termine par dernier mot du template */
795 l= (int)strlen(posmot[nbrmots-1]);
796 if( strcmp(vcible+lcible-l,posmot[nbrmots-1]) == 0 ) return 1;
797 return 0;
798 }
799
800
chg_acnuc(char * acnucvar,char * gcgacnucvar)801 int chg_acnuc(char *acnucvar, char *gcgacnucvar)
802 /*
803 Changing the values of variables acnuc and gcgacnuc :
804 acnucvar the new value of acnuc (may be a variable, may be acnuc itself)
805 gcgacnucvar the new value of gcgacnuc (may be a variable,
806 may be gcgacnuc itself)
807 returns TRUE if error, FALSE if ok
808 */
809 {
810 static char newacnuc[200], newgcgacnuc[200];
811 char *point;
812
813 if( strcmp(acnucvar,"acnuc") != 0 ) {
814 point=getenv(acnucvar);
815 if(point!=NULL) acnucvar=point;
816 strcpy(newacnuc,"acnuc=");
817 strcat(newacnuc,acnucvar);
818 if( putenv(newacnuc) ) return 1;
819 }
820
821 if( strcmp(gcgacnucvar,"gcgacnuc") != 0 ) {
822 point=getenv(gcgacnucvar);
823 if(point!=NULL) gcgacnucvar=point;
824 strcpy(newgcgacnuc,"gcgacnuc=");
825 strcat(newgcgacnuc,gcgacnucvar);
826 if( putenv(newgcgacnuc) ) return 1;
827 }
828 return 0;
829 }
830
831
gets_no_echo(char * password,size_t lpw)832 void gets_no_echo(char *password, size_t lpw)
833 {
834 char *p, c;
835 #ifdef unixlike
836 struct termios initialrsettings, newrsettings;
837 int err;
838 #elif defined(WIN32)
839 DWORD mode, savemode, lu;
840 HANDLE hconsole;
841 int err;
842 #endif
843
844 #ifdef unixlike
845 err = tcgetattr( fileno(stdin), &initialrsettings );
846 if(err == 0) {
847 newrsettings = initialrsettings;
848 newrsettings.c_lflag &= ~ECHO;
849 newrsettings.c_lflag &= ~ICANON;
850 tcsetattr( fileno(stdin), TCSAFLUSH, &newrsettings );
851 }
852 #elif defined(WIN32)
853 hconsole = GetStdHandle(STD_INPUT_HANDLE);
854 err = GetConsoleMode(hconsole, &savemode);
855 if(err != 0) {
856 mode = savemode;
857 mode &= ~ENABLE_ECHO_INPUT;
858 mode &= ~ENABLE_LINE_INPUT;
859 SetConsoleMode(hconsole, mode);
860 FlushConsoleInputBuffer(hconsole); /* necessary */
861 }
862 #endif
863 p = password;
864 do {
865 #ifdef WIN32
866 ReadConsole(hconsole, &c, 1, &lu, NULL);
867 #else
868 c = getchar();
869 #endif
870 if(c == EOF || c == '\n' || c == '\r') break;
871 if(c != '\b' && c != '\x7F' ) {
872 *(p++) = c;
873 putchar('*');
874 }
875 else {
876 if(p > password) {
877 p--;
878 putchar('\b');putchar(' ');putchar('\b');
879 }
880 }
881 }
882 while(p - password < lpw);
883 *p = 0;
884 #ifdef unixlike
885 if(err == 0) {
886 tcsetattr( fileno(stdin), TCSANOW, &initialrsettings );
887 putchar('\n');
888 }
889 #elif defined(WIN32)
890 if(err != 0) {
891 SetConsoleMode(hconsole, savemode);
892 putchar('\n');
893 }
894 #endif
895 return;
896 }
897
898
899