1 /* @source garnier application
2 **
3 ** Secondary structure prediction
4 ** @author Copyright (C) Rodrigo Lopez
5 ** @@
6 **
7 ** The sequence is taken from a ABI trace file and written to a
8 ** sequence file.
9 **
10 ** This program is free software; you can redistribute it and/or
11 ** modify it under the terms of the GNU General Public License
12 ** as published by the Free Software Foundation; either version 2
13 ** of the License, or (at your option) any later version.
14 **
15 ** This program is distributed in the hope that it will be useful,
16 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 ** GNU General Public License for more details.
19 **
20 ** You should have received a copy of the GNU General Public License
21 ** along with this program; if not, write to the Free Software
22 ** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
23 ******************************************************************************/
24
25 #include "emboss.h"
26 #include <math.h>
27 #include <stdlib.h>
28
29
30
31
32 /* Bill Pearson's include files */
33
34 char amino[] = "GAVLISTDENQKHRFYWCMP";
35
36 ajint helix[20][17]=
37 {
38 { -5,-10,-15,-20,-30,-40,-50,-60,-86,-60,-50,-40,-30,-20,-15,-10, -5},
39 { 5, 10, 15, 20, 30, 40, 50, 60, 65, 60, 50, 40, 30, 20, 15, 10, 5},
40 { 0, 0, 0, 0, 0, 0, 0, 5, 10, 14, 10, 5, 0, 0, 0, 0, 0},
41 { 0, 5, 10, 15, 20, 25, 28, 30, 32, 30, 28, 25, 20, 15, 10, 5, 0},
42 { 5, 10, 15, 20, 25, 20, 15, 10, 6, 0,-10,-15,-20,-25,-20,-10, -5},
43 { 0, -5,-10,-15,-20,-25,-30,-35,-39,-35,-30,-25,-20,-15,-10, -5, 0},
44 { 0, 0, 0, -5,-10,-15,-20,-25,-26,-25,-20,-15,-10, -5, 0, 0, 0},
45 { 0, -5,-10,-15,-20,-15,-10, 0, 5, 10, 15, 20, 20, 20, 15, 10, 5},
46 { 0, 0, 0, 0, 10, 20, 60, 70, 78, 78, 78, 78, 78, 70, 60, 40, 20},
47 { 0, 0, 0, 0,-10,-20,-30,-40,-51,-40,-30,-20,-10, 0, 0, 0, 0},
48 { 0, 0, 0, 0, 5, 10, 20, 20, 10,-10,-20,-20,-10, -5, 0, 0, 0},
49 { 20, 40, 50, 55, 60, 60, 50, 30, 23, 10, 5, 0, 0, 0, 0, 0, 0},
50 { 10, 20, 30, 40, 50, 50, 50, 30, 12,-20,-10, 0, 0, 0, 0, 0, 0},
51 { 0, 0, 0, 0, 0, 0, 0, 0, -9,-15,-20,-30,-40,-50,-50,-30,-10},
52 { 0, 0, 0, 0, 0, 5, 10, 15, 16, 15, 10, 5, 0, 0, 0, 0, 0},
53 { -5,-10,-15,-20,-25,-30,-35,-40,-45,-40,-35,-30,-25,-20,-15,-10, -5},
54 { -10,-20,-40,-50,-50,-10, 0, 10, 12, 10, 0,-10,-50,-50,-40,-20,-10},
55 { 0, 0, 0, 0, 0, 0, -5,-10,-13,-10, -5, 0, 0, 0, 0, 0, 0},
56 { 10, 20, 25, 30, 35, 40, 45, 50, 53, 50, 45, 40, 35, 30, 25, 20, 10},
57 { -10,-20,-40,-60,-80,-100,-120,-140,-77,-60,-30,-20,-10, 0, 0, 0, 0}
58 };
59
60
61
62
63 ajint extend [20][17]=
64 {
65 { 10, 20, 30, 40, 40, 20, 0,-20,-42,-20, 0, 20, 40, 40, 30, 20,-10},
66 { 0, 0, 0, 0, -5,-10,-15,-20,-23,-20,-15,-10, -5, 0, 0, 0, 0},
67 { 0, 0,-10,-20, 0, 20, 40, 60, 68, 60, 40, 20, 0,-20,-10, 0, 0},
68 { 0, 0, 0, 0, 0, 5, 10, 20, 23, 20, 10, 5, 0, 0, 0, 0, 0},
69 { 0,-10,-20,-10, 0, 20, 40, 60, 67, 60, 40, 20, 0,-10,-20,-10, 0},
70 { 0, 10, 20, 10, 0, -5,-10,-15,-17,-15,-10, -5, 0, 10, 20, 10, 0},
71 { 5, 10, 15, 20, 15, 15, 10, 10, 13, 10, 10, 15, 15, 20, 15, 10, 5},
72 { 0, 5, 10, 15, 20, 0,-20,-30,-44,-30,-20, 0, 0, 0, 0, 0, 0},
73 {-10,-15,-20,-25,-30,-35,-40,-45,-50,-55,-60,-60,-50,-40,-30,-20,-10},
74 { 10, 30, 50, 30, 20, 0,-15,-30,-41,-30,-15, 0, 20, 50, 30, 50, 10},
75 { 0, 0, 0, 0, 0, -5,-10, 0, 12, 20, 30, 40, 50, 50, 40, 30, 15},
76 { -5,-10,-15,-20,-30,-40,-50,-40,-33,-20,-10, 0, 10, 10, 0, 0, 0},
77 {-10,-20,-40,-20,-10, 0,-10,-20,-25,-35,-30,-25,-20,-15,-10, -5, 0},
78 { 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0},
79 { 0, 0, 0, 0, 0, 5, 10, 20, 26, 10,-10,-30,-60,-65,-60,-40,-20},
80 { 0, 5, 10, 15, 20, 25, 30, 35, 40, 35, 30, 25, 20, 15, 10, 5, 0},
81 { 0, 0, 0, 0, 0,-10,-10,-10,-10,-10,-10,-15,-20,-25,-30,-20,-10},
82 { 0, 0, 0, 0, 0, 10, 20, 30, 44, 30, 20, 10, 0, 0, 0, 0, 0},
83 {-10,-20,-30,-40,-40,-30, 0, 10, 23, 10, 0,-30,-40,-40,-30,-20,-10},
84 { 10, 20, 30, 30, 20, 10, 0,-10,-18,-20,-10, 10, 30, 40, 30, 20, 10}
85 };
86
87
88
89
90 ajint turns[20][17]=
91 {
92 { 0, 0, 0, 0, 10, 30, 55, 55, 57, 40, 0, 0, 0, 0, 0, 0, 0},
93 { 0, 0, 0,-10,-20,-30,-40,-50,-50,-40,-30,-20,-10, 0, 0, 0, 0},
94 { 0, 0, 0, 0,-10,-20,-30,-40,-60,-40,-30,-20,-10, 0, 0, 0, 0},
95 { 0, 0, 0,-10,-20,-30,-40,-50,-56,-20,-10, 0, 0, 0, 0, 0, 0},
96 { 0, 0, 0, 0, 0,-10,-20,-30,-46,-40,-10, 0, 0, 20, 30, 20, 10},
97 { 0,-10,-20,-20, 10, 15, 20, 25, 26, 25, 20, 15, 10, 0, 0, 0, 0},
98 { 0, 10, 20, 20, 20, 15, 18, 5, 3, 5, 10, 15, 20, 20, 20, 10, 0},
99 { 0, 0, 0, 0, 0, 0, 5, 10, 31, 10, 5, 0, 0, 0, 0, 0, 0},
100 { 0, -5,-10,-15,-20,-30,-40,-45,-47,-20, 0, 10, 5, 0, 0, 0, 0},
101 { 0, 0, 0, 10, 20, 30, 35, 40, 42, 40, 35, 30, 20, 10, 5, 0, 0},
102 { 10, 20, 30, 25, 20, 15, 10, 5, 4, 20, 30, 40, 50, 60, 50, 40, 20},
103 {-10,-20,-30,-40,-25,-10, 0, 10, 10, 10, 0,-20,-30,-20,-10, -5, 0},
104 { 0, 0, 0, 0, 0, 0, 0, 0, -3, 0, 10, 20, 30, 20, 10, 0, 0},
105 { 0, 0, 0, 0, 0, 0, 0, 10, 21, 30, 40, 30, 20, 10, 0, 0, 0},
106 { 0, 0, 0, 0, 0, -5,-10,-15,-18,-15, 0, 15, 30, 25, 20, 10, 0},
107 { 0, 0, 0, 5, 15, 15, 20, 25, 29, 25, 20, 15, 15, 5, 0, 0, 0},
108 { 0, 0, 0, 10, 20, 30, 40, 80, 36,-30, 30, 40, 50, 60, 70, 40, 20},
109 { 20, 40, 50, 60, 60, 55, 50, 45, 44, 40, 35, 30, 25, 20, 15, 10, 5},
110 { -5,-15,-20,-25,-30,-35,-40,-45,-48,-45,-40,-35,-30,-25,-20,-15, -5},
111 { 10, 20, 30, 40, 50, 70, 10,-90, 36, 90, 10, 0, 0, 0, 0, 0, 0}
112 };
113
114
115
116
117 ajint coil[20][17]=
118 {
119 { 0, 0, 0, 0, 10, 30, 40, 45, 49, 45, 40, 30, 10, 0, 0, 0, 0},
120 { 0, 0, 0, 0, -5,-10,-20,-25,-25,-25,-20,-15,-10, -5, 0, 0, 0},
121 { 0, 0, 0, 0,-10,-20,-25,-30,-35,-30,-25,-20,-10, 0, 0, 0, 0},
122 { 0, 0, 0,-10,-20,-30,-40,-30,-20,-20,-10, 0, 0, 0, 0, 0, 0},
123 { 0, 0, 0, 0, 0,-10,-20,-30,-33,-30,-10, 0, 10, 20, 30, 20, 0},
124 { 0,-10,-20,-20, 10, 15, 20, 25, 50, 25, 20, 15, 10, 0, 0, 0, 0},
125 { 0, 10, 20, 30, 20, 15, 10, 15, 17, 15, 10, 15, 20, 30, 20, 10, 0},
126 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
127 { 0, 0, 10, 20, 40, 20, 0,-10,-44,-40,-20,-10, 0, 0, 0, 0, 0},
128 { 0, 0, 0, 10, 20, 30, 35, 40, 46, 40, 35, 30, 20, 10, 0, 0, 0},
129 { 10, 20, 30, 25, 20, 15, 10, 0, -5, 20, 30, 40, 50, 60, 50, 40, 20},
130 {-10,-20,-30,-40,-25,-20,-10, -8, -8, 0, 0,-20,-30,-20,-10, -5, 0},
131 { 0, 0, 0, 0, 0, 0, 0, 10, 16, 15, 10, 10, 10, 10, 5, 0, 0},
132 { 0, 0, 0, 0, 0, 0, 0, 0,-12, 0, 20, 30, 20, 10, 0, 0, 0},
133 { 0, 0, 0, 0, 0, -5,-10,-20,-41,-20, 0, 15, 30, 25, 20, 10, 0},
134 { 0, 0, 0, 0, 0, 0, 0, 0, -6, 0, 0, 0, 0, 0, 0, 0, 0},
135 { 0, 0, 0, 10, 20, 30, 40, 20, 12, 20, 30, 40, 50, 60, 70, 40, 20},
136 { 0, 0, 0, 0, 0, 0,-10,-30,-47,-30,-10, 0, 0, 0, 0, 0, 0},
137 { 0, -5,-10,-15,-20,-25,-30,-40,-41,-40,-30,-25,-20,-15,-10, -5, 0},
138 { 0, 0, 10, 20, 30, 40, 50, 55, 58, 50, 10, 0, 0, 0, 0, 0, 0}
139 };
140
141
142
143 #define NA 124
144 #define EL 125
145 #define ES 126
146
147 /* 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 */
148 ajint aascii[]=
149 {
150 EL,NA,NA,NA,NA,NA,NA,NA,NA,NA,EL,NA,NA,EL,NA,NA,
151 NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,
152 NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,
153 NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,
154 ES, 0,20, 4, 3, 6,13, 7, 8, 9,NA,11,10,12, 2,NA,
155 14, 5, 1,15,16,NA,19,17,22,18,21,NA,NA,NA,NA,NA,
156 NA, 0,20, 4, 3, 6,13, 7, 8, 9,NA,11,10,12, 2,NA,
157 14, 5, 1,15,16,NA,19,17,22,18,21,NA,NA,NA,NA,NA
158 };
159
160 ajint *sascii;
161 #define AAMASK 127
162
163 ajint nascii[]=
164 {
165 /* 0 1 2 3 5 6 7 8 9 10 11 12 13 14 15 15
166 ** @ A B C D E F G H I J K L M N O
167 ** P Q R S T U V W X Y Z
168 */
169 EL,NA,NA,NA,NA,NA,NA,NA,NA,NA,EL,NA,NA,EL,NA,NA,
170 NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,
171 NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,16,NA,NA,
172 NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,
173 ES, 0,14, 1,11,NA,NA, 2,12,NA,NA,10,NA, 7,15,NA,
174 5, 6, 5, 9, 3, 4,13, 8,15, 6,NA,NA,NA,NA,NA,NA,
175 NA, 0,14, 1,11,NA,NA, 2,12,NA,NA,10,NA, 7,15,NA,
176 5, 6, 5, 9, 3, 4,13, 8,15, 6,NA,NA,NA,NA,NA,NA
177 };
178
179
180
181
182 ajint gdelval = -12;
183 ajint del_set = 0;
184
185 ajint ggapval = -2;
186 ajint gap_set = 0;
187
188 ajint gshift = -30;
189 ajint shift_set = 0;
190
191 #define EOSEQ 31
192 #define MAXSQ 32
193
194 char qsqnam[] = {"aa"};
195 char sqnam[] = {"aa"};
196 char sqtype[] = {"protein"};
197
198 char *sq;
199 char aa[MAXSQ] = {"ARNDCQEGHILKMFPSTWYVBZX"};
200
201 ajint naa = 23;
202 ajint nsq;
203
204 ajint haa[MAXSQ] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,2,6,0};
205 ajint *hsq;
206
207 ajint apam250[450] =
208 {
209 2,
210 -2, 6,
211 0, 0, 2,
212 0,-1, 2, 4,
213 -2,-4,-4,-5,12,
214 0, 1, 1, 2,-5, 4,
215 0,-1, 1, 3,-5, 2, 4,
216 1,-3, 0, 1,-3,-1, 0, 5,
217 -1, 2, 2, 1,-3, 3, 1,-2, 6,
218 -1,-2,-2,-2,-2,-2,-2,-3,-2, 5,
219 -2,-3,-3,-4,-6,-2,-3,-4,-2, 2, 6,
220 -1, 3, 1, 0,-5, 1, 0,-2, 0,-2,-3, 5,
221 -1, 0,-2,-3,-5,-1,-2,-3,-2, 2, 4, 0, 6,
222 -4,-4,-4,-6,-4,-5,-5,-5,-2, 1, 2,-5, 0, 9,
223 1, 0,-1,-1,-3, 0,-1,-1, 0,-2,-3,-1,-2,-5, 6,
224 1, 0, 1, 0, 0,-1, 0, 1,-1,-1,-3, 0,-2,-3, 1, 2,
225 1,-1, 0, 0,-2,-1, 0, 0,-1, 0,-2, 0,-1,-3, 0, 1, 3,
226 -6, 2,-4,-7,-8,-5,-7,-7,-3,-5,-2,-3,-4, 0,-6,-2,-5,17,
227 -3,-4,-2,-4, 0,-4,-4,-5, 0,-1,-1,-4,-2, 7,-5,-3,-3, 0,10,
228 0,-2,-2,-2,-2,-2,-2,-1,-2, 4, 2,-2, 2,-1,-1,-1, 0,-6,-2, 4,
229 0,-1, 2, 3,-4, 1, 2, 0, 1,-2,-3, 1,-2,-5,-1, 0, 0,-5,-3,-2, 2,
230 0, 0, 1, 3,-5, 3, 3,-1, 2,-2,-3, 0,-2,-5, 0, 0,-1,-6,-4,-2, 2, 3,
231 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
232 };
233
234
235
236
237 ajint abl50[] =
238 {
239 5,
240 -2, 7,
241 -1,-1, 7,
242 -2,-2, 2, 8,
243 -1,-4,-2,-4,13,
244 -1, 1, 0, 0,-3, 7,
245 -1, 0, 0, 2,-3, 2, 6,
246 0,-3, 0,-1,-3,-2,-3, 8,
247 -2, 0, 1,-1,-3, 1, 0,-2,10,
248 -1,-4,-3,-4,-2,-3,-4,-4,-4, 5,
249 -2,-3,-4,-4,-2,-2,-3,-4,-3, 2, 5,
250 -1, 3, 0,-1,-3, 2, 1,-2, 0,-3,-3, 6,
251 -1,-2,-2,-4,-2, 0,-2,-3,-1, 2, 3,-2, 7,
252 -3,-3,-4,-5,-2,-4,-3,-4,-1, 0, 1,-4, 0, 8,
253 -1,-3,-2,-1,-4,-1,-1,-2,-2,-3,-4,-1,-3,-4,10,
254 1,-1, 1, 0,-1, 0,-1, 0,-1,-3,-3, 0,-2,-3,-1, 5,
255 0,-1, 0,-1,-1,-1,-1,-2,-2,-1,-1,-1,-1,-2,-1, 2, 5,
256 -3,-3,-4,-5,-5,-1,-3,-3,-3,-3,-2,-3,-1, 1,-4,-4,-3,15,
257 -2,-1,-2,-3,-3,-1,-2,-3, 2,-1,-1,-2, 0, 4,-3,-2,-2, 2, 8,
258 0,-3,-3,-4,-1,-3,-3,-4,-4, 4, 1,-3, 1,-1,-3,-2, 0,-3,-1, 5,
259 -2,-1, 4, 5,-3, 0, 1,-1, 0,-4,-4, 0,-3,-4,-2, 0, 0,-5,-3,-4, 5,
260 -1, 0, 0, 1,-3, 4, 5,-2, 0,-3,-3, 1,-1,-4,-1, 0,-1,-2,-2,-3, 2, 5,
261 -1,-1,-1,-1,-2,-1,-1,-2,-1,-1,-1,-1,-1,-2,-2,-1, 0,-3,-1,-1,-1,-1,-1
262 };
263
264
265
266
267 ajint abl62[] =
268 {
269 4,
270 -1, 5,
271 -2, 0, 6,
272 -2,-2, 1, 6,
273 0,-3,-3,-3, 9,
274 -1, 1, 0, 0,-3, 5,
275 -1, 0, 0, 2,-4, 2, 5,
276 0,-2, 0,-1,-3,-2,-2, 6,
277 -2, 0, 1,-1,-3, 0, 0,-2, 8,
278 -1,-3,-3,-3,-1,-3,-3,-4,-3, 4,
279 -1,-2,-3,-4,-1,-2,-3,-4,-3, 2, 4,
280 -1, 2, 0,-1,-3, 1, 1,-2,-1,-3,-2, 5,
281 -1,-1,-2,-3,-1, 0,-2,-3,-2, 1, 2,-1, 5,
282 -2,-3,-3,-3,-2,-3,-3,-3,-1, 0, 0,-3, 0, 6,
283 -1,-2,-2,-1,-3,-1,-1,-2,-2,-3,-3,-1,-2,-4, 7,
284 1,-1, 1, 0,-1, 0, 0, 0,-1,-2,-2, 0,-1,-2,-1, 4,
285 0,-1, 0,-1,-1,-1,-1,-2,-2,-1,-1,-1,-1,-2,-1, 1, 5,
286 -3,-3,-4,-4,-2,-2,-3,-2,-2,-3,-2,-3,-1, 1,-4,-3,-2,11,
287 -2,-2,-2,-3,-2,-1,-2,-3, 2,-1,-1,-2,-1, 3,-3,-2,-2, 2, 7,
288 0,-3,-3,-3,-1,-2,-2,-3,-3, 3, 1,-2, 1,-1,-2,-2, 0,-3,-1, 4,
289 -2,-1, 3, 4,-3, 0, 1,-1, 0,-3,-4, 0,-3,-3,-2, 0,-1,-4,-3,-3, 4,
290 -1, 0, 0, 1,-3, 3, 4,-2, 0,-3,-3, 1,-1,-3,-1, 0,-1,-3,-2,-2, 1, 4,
291 0,-1,-1,-1,-2,-1,-1,-1,-1,-1,-1,-1,-1,-1,-2, 0, 0,-2,-1,-1,-1,-1,-1
292 };
293
294 /* DNA alphabet
295 **
296 ** A, C, G, T U
297 ** R, Y
298 ** M (A or C) 6
299 ** W (A or T) 7
300 ** S (C or G) 8
301 ** K (G or T) 9
302 ** D (not C) 10
303 ** H (not G) 11
304 ** V (not T) 12
305 ** B (not A) 13
306 ** N X 14
307 */
308
309 char nt[MAXSQ]={"ACGTURYMWSKDHVBNX"};
310
311 ajint nnt = 17;
312
313 ajint hnt[MAXSQ] = {0,1,2,3,3,0,1,0,0,1,2,0,0,0,1,0,0};
314
315 ajint npam[450] =
316 {
317 /* A C G T U R Y M W S K D H V B N X */
318 5, /* A */
319 -4, 5, /* C */
320 -4,-4, 5, /* G */
321 -4,-4,-4, 5, /* T */
322 -4,-4,-4, 5, 5, /* U */
323 2,-1, 2,-1,-1, 2, /* R (A G)*/
324 -1, 2,-1, 2, 2,-2, 2, /* Y (C T)*/
325 2, 2,-1,-1,-1,-1,-1, 2, /* M (A C)*/
326 2,-1,-1, 2, 2, 1, 1, 1, 2, /* W (A T)*/
327 -1, 2, 2,-1,-1, 1, 1, 1,-1, 2, /* S (C G)*/
328 -1,-1, 2, 2, 2, 1, 1,-1, 1, 1, 2, /* K (G T)*/
329 1,-2, 1, 1, 1, 1,-1,-1, 1,-1, 1, 1, /* D (!C) */
330 1, 1,-2, 1, 1,-1, 1, 1, 1,-1,-1,-1, 1, /* H (!G) */
331 1, 1, 1,-2,-1, 1,-1, 1,-1, 1,-1,-1,-1, 1, /* V (!T) */
332 -2, 1, 1, 1, 1,-1, 1,-1,-1, 1, 1,-1,-1,-1, 1, /* B (!A) */
333 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, /* N */
334 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1}; /* X */
335 /* A C G T U R Y M W S K D H V B N */
336
337
338
339
340 ajint *pam;
341 ajint pam2[MAXSQ][MAXSQ];
342 ajint pamh1[MAXSQ]; /* used for kfact replacement */
343
344
345
346
347 static void garnier_report(AjPReport report, AjPFeattable TabRpt,
348 ajint from, ajint to, char *seq,
349 ajint begin, ajint Idc);
350 static void garnier_do(AjPFile outf, ajint s, ajint len,
351 char *seq, const char *name,
352 ajint Idc);
353 static void garnier_makemap (const char *input, ajint *map, ajint n);
354
355
356
357
358 /* @prog garnier **************************************************************
359 **
360 ** Predicts protein secondary structure
361 **
362 ******************************************************************************/
363
main(int argc,char ** argv)364 int main(int argc, char **argv)
365 {
366 AjPSeqall seqall;
367 AjPSeq seq = NULL;
368 AjPFile outf = NULL; /* no longer used */
369 AjPReport report = NULL;
370 AjPFeattable TabRpt = NULL;
371 AjPStr strand = NULL;
372 AjPStr substr = NULL;
373
374 ajint begin;
375 ajint end;
376 ajint len;
377 ajint Idc;
378
379 embInit("garnier",argc,argv);
380
381 seqall = ajAcdGetSeqall("sequence");
382 Idc = ajAcdGetInt("idc");
383 report = ajAcdGetReport("outfile");
384
385
386 substr = ajStrNew();
387
388
389 while(ajSeqallNext(seqall, &seq))
390 {
391 begin = ajSeqallGetseqBegin(seqall);
392 end = ajSeqallGetseqEnd(seqall);
393
394 TabRpt = ajFeattableNewSeq(seq);
395
396 strand = ajSeqGetSeqCopyS(seq);
397 ajStrFmtUpper(&strand);
398
399 ajStrAssignSubC(&substr,ajStrGetPtr(strand),begin-1,end-1);
400 len = ajStrGetLen(substr);
401
402 garnier_report(report, TabRpt, 1, len,
403 ajStrGetuniquePtr(&substr),begin-1,Idc);
404 if(outf)
405 {
406 ajStrAssignSubC(&substr,ajStrGetPtr(strand),begin-1,end-1);
407 garnier_do(outf,0,len,
408 ajStrGetuniquePtr(&substr),ajSeqGetNameC(seq),Idc);
409 }
410 ajStrDel(&strand);
411
412 ajReportWrite(report, TabRpt, seq);
413 ajFeattableDel(&TabRpt);
414 }
415 ajReportSetSeqstats(report, seqall);
416
417 ajSeqDel(&seq);
418 ajStrDel(&substr);
419
420 ajFileClose(&outf);
421 ajReportClose(report);
422 ajReportDel(&report);
423
424 ajSeqallDel(&seqall);
425 ajSeqDel(&seq);
426
427 embExit();
428
429 return 0;
430 }
431
432
433
434
435 /* @funcstatic garnier_do *****************************************************
436 **
437 ** Undocumented.
438 **
439 ** @param [u] outf [AjPFile] Undocumented
440 ** @param [r] from [ajint] Undocumented
441 ** @param [r] to [ajint] Undocumented
442 ** @param [u] seq [char*] Undocumented
443 ** @param [r] name [const char*] Undocumented
444 ** @param [r] Idc [ajint] Undocumented
445 ** @@
446 ******************************************************************************/
447
448
garnier_do(AjPFile outf,ajint from,ajint to,char * seq,const char * name,ajint Idc)449 static void garnier_do(AjPFile outf, ajint from, ajint to, char *seq,
450 const char *name, ajint Idc)
451 {
452 const char *refstr="\n Please cite:\n"
453 " Garnier, Osguthorpe and Robson (1978)"
454 " J. Mol. Biol. 120:97-120\n";
455
456 ajint i;
457 ajint end;
458 ajint amap[20];
459 ajint nna = 20;
460 ajint parr[4];
461 char carr[] = "HETC";
462 char *type;
463 ajint iarr[4];
464 ajint dharr[]=
465 {
466 0,158,-75,-100
467 };
468
469 ajint dsarr[]=
470 {
471 0,50,-88,-88
472 };
473
474 ajint n0;
475 ajint j;
476 ajint k;
477 ajint l0;
478 ajint l1;
479 ajint idc;
480 ajint dcs;
481 ajint dch;
482 ajint lastk;
483 float fn0;
484
485 idc = Idc;
486 end = to-from+1;
487 n0 = end;
488
489
490 type = AJALLOC0(n0*sizeof(char));
491
492 sascii = aascii;
493
494 if(idc<=0)
495 dcs = dch = 0;
496 else if(idc <4)
497 {
498 dch = dharr[idc];
499 dcs = 0;
500 }
501 else if(idc<=6)
502 {
503 dch = 0;
504 dcs = dsarr[idc-3];
505 }
506 else
507 dcs = dch = 0;
508
509 garnier_makemap(amino,amap,nna);
510
511
512
513 --n0;
514
515 for(i=0; i<n0; i++)
516 seq[i] = amap[aascii[(ajint)seq[i]]];
517
518
519 for(k=0;k<4;++k)
520 iarr[k]=0;
521
522 lastk = 0;
523 for(i=0; i<n0; i++)
524 {
525 parr[0]=helix[(ajint)seq[i]][8];
526 parr[1]=extend[(ajint)seq[i]][8];
527 parr[2]=turns[(ajint)seq[i]][8];
528 parr[3]=coil[(ajint)seq[i]][8];
529
530 for(j=1; j<9; j++)
531 {
532 if((i-j)>=0)
533 {
534 parr[0] += helix[(ajint)seq[i-j]][8+j];
535 parr[1] += extend[(ajint)seq[i-j]][8+j];
536 parr[2] += turns[(ajint)seq[i-j]][8+j];
537 parr[3] += coil[(ajint)seq[i-j]][8+j];
538 }
539
540 if((i+j)<n0)
541 {
542 parr[0] += helix[(ajint)seq[i+j]][8-j];
543 parr[1] += extend[(ajint)seq[i+j]][8-j];
544 parr[2] += turns[(ajint)seq[i+j]][8-j];
545 parr[3] += coil[(ajint)seq[i+j]][8-j];
546 }
547 }
548 parr[0] -= dch;
549 parr[1] -= dcs;
550
551 k = 0;
552
553 for(j=1; j<4; j++)
554 if(parr[j]>parr[k])
555 k=j;
556
557 if(parr[lastk]>=parr[k])
558 k=lastk;
559
560 lastk = k;
561 type[i] = carr[k];
562 iarr[k]++;
563 }
564
565 ajFmtPrintF(outf," GARNIER plot of %s, %3d aa; DCH = %d, DCS = %d\n",
566 name,n0,dch,dcs);
567 ajFmtPrintF(outf,"%s\n",refstr);
568
569
570 l1 = n0/60 + 1;
571 for(l0=0; l0<l1; l0++)
572 {
573 ajFmtPrintF(outf," ");
574 for(i=l0*60+9; i<n0 && i<(l0+1)*60; i+=10)
575 ajFmtPrintF(outf," .%5d",i+1);
576 ajFmtPrintF(outf,"\n ");
577 for(i=l0*60; i<n0 && i<(l0+1)*60; i++)
578 ajFmtPrintF(outf,"%c",amino[(ajint)seq[i]]);
579 ajFmtPrintF(outf,"\n helix ");
580 for(i=l0*60; i<n0 && i<(l0+1)*60; i++)
581 ajFmtPrintF(outf,"%c",(type[i]=='H')?'H':' ');
582 ajFmtPrintF(outf,"\n sheet ");
583 for(i=l0*60; i<n0 && i<(l0+1)*60; i++)
584 ajFmtPrintF(outf,"%c",(type[i]=='E')?'E':' ');
585 ajFmtPrintF(outf,"\n turns ");
586 for(i=l0*60; i<n0 && i<(l0+1)*60; i++)
587 ajFmtPrintF(outf,"%c",(type[i]=='T')?'T':' ');
588 ajFmtPrintF(outf,"\n coil ");
589 for(i=l0*60; i<n0 && i<(l0+1)*60; i++)
590 ajFmtPrintF(outf,"%c",(type[i]=='C')?'C':' ');
591 ajFmtPrintF(outf,"\n\n");
592 }
593 ajFmtPrintF(outf," Residue totals: H:%3d E:%3d T:%3d C:%3d\n",
594 iarr[0],iarr[1],iarr[2],iarr[3]);
595
596 fn0 = (float)(n0-16)/(float)100.0;
597 ajFmtPrintF(outf," percent: H: %4.1f E: %4.1f T: %4.1f C: %4.1f\n",
598 (float)iarr[0]/fn0,(float)iarr[1]/fn0,(float)iarr[2]/fn0,
599 (float)iarr[3]/fn0);
600 ajFmtPrintF(outf,"-----------------------------------------------"
601 "---------------------\n\n");
602
603 AJFREE(type);
604
605 return;
606 }
607
608
609
610
611 /* @funcstatic garnier_report *************************************************
612 **
613 ** Undocumented.
614 **
615 ** @param [u] report [AjPReport] Undocumented
616 ** @param [u] TabRpt [AjPFeattable] Undocumented
617 ** @param [r] from [ajint] Undocumented
618 ** @param [r] to [ajint] Undocumented
619 ** @param [w] seq [char*] Undocumented
620 ** @param [r] begin [ajint] Undocumented
621 ** @param [r] Idc [ajint] Undocumented
622 ** @@
623 ******************************************************************************/
624
garnier_report(AjPReport report,AjPFeattable TabRpt,ajint from,ajint to,char * seq,ajint begin,ajint Idc)625 static void garnier_report(AjPReport report, AjPFeattable TabRpt,
626 ajint from, ajint to, char *seq,
627 ajint begin, ajint Idc)
628 {
629 const char *refstr="\n Please cite:\n"
630 " Garnier, Osguthorpe and Robson (1978)"
631 " J. Mol. Biol. 120:97-120\n";
632
633 ajint i;
634 ajint end;
635 ajint amap[20];
636 ajint nna = 20;
637 ajint parr[4];
638 char carr[] = "HETC";
639 char* type;
640 ajint iarr[4];
641 ajint dharr[]=
642 {
643 0,158,-75,-100
644 };
645
646 ajint dsarr[]=
647 {
648 0,50,-88,-88
649 };
650
651 ajint n0;
652 ajint j;
653 ajint k;
654 ajint l0;
655 ajint idc;
656 ajint dcs;
657 ajint dch;
658 ajint lastk;
659 float fn0;
660 AjPStr tmpStr = NULL;
661 AjPStr strHelix = NULL;
662 AjPStr strExtend = NULL;
663 AjPStr strTurns = NULL;
664 AjPStr strCoil = NULL;
665 AjPFeature gf = NULL;
666 char testch = ' ';
667 ajint ito;
668
669 if(!strHelix)
670 {
671 ajStrAssignC(&strHelix, "SO:0001117"); /* alpha_helix */
672 ajStrAssignC(&strExtend, "SO:0001111"); /* beta_strand */
673 ajStrAssignC(&strTurns, "SO:0001128"); /* turn */
674 ajStrAssignC(&strCoil, "SO:0100012"); /* coil (unstructured) */
675 }
676
677 idc = Idc;
678 end = to-from+1;
679 n0 = end;
680
681 type = AJALLOC0(n0*sizeof(char));
682
683 sascii = aascii;
684
685 if(idc<=0)
686 dcs = dch = 0;
687 else if(idc <4)
688 {
689 dch = dharr[idc];
690 dcs = 0;
691 }
692 else if(idc<=6)
693 {
694 dch = 0;
695 dcs = dsarr[idc-3];
696 }
697 else
698 dcs = dch = 0;
699
700 garnier_makemap(amino,amap,nna);
701
702
703 /* copy from garnier.c original */
704
705 for(i=0; i<n0; i++)
706 seq[i] = amap[aascii[(ajint)seq[i]]];
707
708
709 for(k=0;k<4;++k)
710 iarr[k] = 0;
711
712 lastk = 0;
713 for(i=0; i<n0; i++)
714 {
715 parr[0] = helix[(ajint)seq[i]][8];
716 parr[1] = extend[(ajint)seq[i]][8];
717 parr[2] = turns[(ajint)seq[i]][8];
718 parr[3] = coil[(ajint)seq[i]][8];
719
720 for(j=1; j<9; j++)
721 {
722 if((i-j)>=0)
723 {
724 parr[0] += helix[(ajint)seq[i-j]][8+j];
725 parr[1] += extend[(ajint)seq[i-j]][8+j];
726 parr[2] += turns[(ajint)seq[i-j]][8+j];
727 parr[3] += coil[(ajint)seq[i-j]][8+j];
728 }
729
730 if((i+j)<n0)
731 {
732 parr[0] += helix[(ajint)seq[i+j]][8-j];
733 parr[1] += extend[(ajint)seq[i+j]][8-j];
734 parr[2] += turns[(ajint)seq[i+j]][8-j];
735 parr[3] += coil[(ajint)seq[i+j]][8-j];
736 }
737 }
738 parr[0] -= dch;
739 parr[1] -= dcs;
740
741 k = 0;
742
743 for(j=1; j<4; j++)
744 if(parr[j]>parr[k])
745 k = j;
746
747 if(parr[lastk]>=parr[k])
748 k = lastk;
749
750 lastk = k;
751 type[i] = carr[k];
752 iarr[k]++;
753 }
754
755 ajStrAssignC(&tmpStr, "");
756 ajFmtPrintAppS(&tmpStr,
757 "DCH = %d, DCS = %d\n",
758 dch,dcs);
759 ajFmtPrintAppS(&tmpStr, "%s\n",refstr);
760
761 ajReportSetHeaderS(report, tmpStr);
762
763
764 testch = ' ';
765 l0 = begin+1;
766
767 for(i=0; i<=n0; i++)
768 if(i==n0 || type[i] != testch)
769 {
770 if(i)
771 {
772 ito = i + begin;
773 switch(testch)
774 {
775 case 'H':
776 gf = ajFeatNewProt(TabRpt, NULL, strHelix, l0, ito, 0.0);
777 ajFmtPrintS(&tmpStr, "*garnier H");
778 ajFeatTagAddSS(gf, NULL, tmpStr);
779 break;
780 case 'E':
781 gf = ajFeatNewProt(TabRpt, NULL, strExtend, l0, ito, 0.0);
782 ajFmtPrintS(&tmpStr, "*garnier E");
783 ajFeatTagAddSS(gf, NULL, tmpStr);
784 break;
785 case 'T':
786 gf = ajFeatNewProt(TabRpt, NULL, strTurns, l0, ito, 0.0);
787 ajFmtPrintS(&tmpStr, "*garnier T");
788 ajFeatTagAddSS(gf, NULL, tmpStr);
789 break;
790 case 'C':
791 gf = ajFeatNewProt(TabRpt, NULL, strCoil, l0, ito, 0.0);
792 ajFmtPrintS(&tmpStr, "*garnier C");
793 ajFeatTagAddSS(gf, NULL, tmpStr);
794 break;
795 default:
796 break;
797 }
798
799 l0 = i+begin+1;
800 }
801
802 if(i < n0)
803 testch = type[i];
804 }
805
806
807
808 ajStrAssignC(&tmpStr, "");
809
810 ajFmtPrintAppS(&tmpStr,
811 " Residue totals: H:%3d E:%3d T:%3d C:%3d\n",
812 iarr[0],iarr[1],iarr[2],iarr[3]);
813 fn0 = (float)(n0-16)/(float)100.0;
814 ajFmtPrintAppS(&tmpStr,
815 " percent: H: %4.1f E: %4.1f T: %4.1f C: %4.1f\n",
816 (float)iarr[0]/fn0,(float)iarr[1]/fn0,(float)iarr[2]/fn0,
817 (float)iarr[3]/fn0);
818
819 ajReportSetTailS(report, tmpStr);
820
821 AJFREE(type);
822 ajStrDel(&tmpStr);
823 ajStrDel(&strExtend);
824 ajStrDel(&strHelix);
825 ajStrDel(&strTurns);
826 ajStrDel(&strCoil);
827
828 return;
829 }
830
831
832
833
834 /* @funcstatic garnier_makemap ************************************************
835 **
836 ** Undocumented.
837 **
838 ** @param [r] input [const char*] Undocumented
839 ** @param [w] map [ajint*] Undocumented
840 ** @param [r] n [ajint] Undocumented
841 ** @@
842 ******************************************************************************/
843
garnier_makemap(const char * input,ajint * map,ajint n)844 static void garnier_makemap(const char *input, ajint *map, ajint n)
845 {
846 ajint i;
847
848 for(i=0;i<n;i++)
849 map[aascii[(ajint)input[i]]]=i;
850
851 return;
852 }
853