1 static char rcsid[] = "$Id: intron.c 204388 2017-03-18 00:03:34Z twu $";
2 #ifdef HAVE_CONFIG_H
3 #include <config.h>
4 #endif
5
6 #include "intron.h"
7 #include "sense.h"
8 #include <stdlib.h> /* For abort() */
9
10
11 #ifdef DEBUG
12 #define debug(x) x
13 #else
14 #define debug(x)
15 #endif
16
17
18 int
19 Intron_type (char left1, char left2, char right2, char right1,
20 char left1_alt, char left2_alt, char right2_alt, char right1_alt,
21 int cdna_direction
22 #ifdef INTRON_HELP
23 , IIT_T splicesites_iit, int *splicesites_divint_crosstable,
24 int donor_typeint, int acceptor_typeint, Chrnum_T chrnum,
25 Chrpos_T leftgenomepos, Chrpos_T rightgenomepos,
26 Univcoord_T chroffset, Univcoord_T chrhigh, bool watsonp,
27 #endif
28 ) {
29 int introntype, leftdi, rightdi;
30
31 if ((left1 == 'G' || left1_alt == 'G') && (left2 == 'T' || left2_alt == 'T')) {
32 leftdi = LEFT_GT;
33 } else if ((left1 == 'G' || left1_alt == 'G') && (left2 == 'C' || left2_alt == 'C')) {
34 leftdi = LEFT_GC;
35 } else if ((left1 == 'A' || left1_alt == 'A') && (left2 == 'T' || left2_alt == 'T')) {
36 leftdi = LEFT_AT;
37 #ifndef PMAP
38 } else if ((left1 == 'C' || left1_alt == 'A') && (left2 == 'T' || left2_alt == 'T')) {
39 leftdi = LEFT_CT;
40 #endif
41
42 #ifdef INTRON_HELP
43 /* Not tested */
44 } else if (splicesites_iit == NULL) {
45 debug(printf("splicesites_iit is NULL\n"));
46 return NONINTRON;
47 } else if (cdna_direction > 0) {
48 if (watsonp) {
49 debug(printf("cdna_direction %d, watsonp %d, looking for donor at %u..%u, sign +1\n",
50 cdna_direction,watsonp,chroffset+leftgenomepos,chroffset+leftgenomepos+1U));
51 if (IIT_exists_with_divno_typed_signed(splicesites_iit,splicesites_divint_crosstable[chrnum],
52 chroffset+leftgenomepos,chroffset+leftgenomepos+1U,
53 donor_typeint,/*sign*/+1) == true) {
54 leftdi = LEFT_GT;
55 } else {
56 return NONINTRON;
57 }
58 } else {
59 debug(printf("cdna_direction %d, watsonp %d, looking for donor at %u..%u, sign -1\n",
60 cdna_direction,watsonp,chrhigh-leftgenomepos,chrhigh-leftgenomepos+1U));
61 if (IIT_exists_with_divno_typed_signed(splicesites_iit,splicesites_divint_crosstable[chrnum],
62 chrhigh-leftgenomepos,chrhigh-leftgenomepos+1U,
63 donor_typeint,/*sign*/-1) == true) {
64 leftdi = LEFT_GT;
65 } else {
66 return NONINTRON;
67 }
68 }
69 } else if (cdna_direction < 0) {
70 if (watsonp) {
71 debug(printf("cdna_direction %d, watsonp %d, looking for acceptor at %u..%u, sign -1\n",
72 cdna_direction,watsonp,chroffset+leftgenomepos,chroffset+leftgenomepos+1U));
73 if (IIT_exists_with_divno_typed_signed(splicesites_iit,splicesites_divint_crosstable[chrnum],
74 chroffset+leftgenomepos,chroffset+leftgenomepos+1U,
75 acceptor_typeint,/*sign*/-1) == true) {
76 leftdi = LEFT_CT;
77 } else {
78 return NONINTRON;
79 }
80 } else {
81 debug(printf("cdna_direction %d, watsonp %d, looking for acceptor at %u..%u, sign +1\n",
82 cdna_direction,watsonp,chrhigh-leftgenomepos,chrhigh-leftgenomepos+1U));
83 if (IIT_exists_with_divno_typed_signed(splicesites_iit,splicesites_divint_crosstable[chrnum],
84 chrhigh-leftgenomepos,chrhigh-leftgenomepos+1U,
85 acceptor_typeint,/*sign*/+1) == true) {
86 leftdi = LEFT_CT;
87 } else {
88 return NONINTRON;
89 }
90 }
91 #endif
92
93 } else {
94 return NONINTRON;
95 }
96
97 if ((right2 == 'A' || right2_alt == 'A') && (right1 == 'G' || right1_alt == 'G')) {
98 rightdi = RIGHT_AG;
99 } else if ((right2 == 'A' || right2_alt == 'A') && (right1 == 'C' || right1_alt == 'C')) {
100 rightdi = RIGHT_AC;
101 #ifndef PMAP
102 } else if ((right2 == 'G' || right2_alt == 'G') && (right1 == 'C' || right1_alt == 'C')) {
103 rightdi = RIGHT_GC;
104 } else if ((right2 == 'A' || right2_alt == 'A') && (right1 == 'T' || right1_alt == 'T')) {
105 rightdi = RIGHT_AT;
106 #endif
107
108 #ifdef INTRON_HELP
109 /* Not tested */
110 } else if (splicesites_iit == NULL) {
111 return NONINTRON;
112 } else if (cdna_direction > 0) {
113 if (watsonp) {
114 if (IIT_exists_with_divno_typed_signed(splicesites_iit,splicesites_divint_crosstable[chrnum],
115 chroffset+rightgenomepos,chroffset+rightgenomepos+1U,
116 acceptor_typeint,/*sign*/+1) == true) {
117 rightdi = RIGHT_AG;
118 } else {
119 return NONINTRON;
120 }
121 } else {
122 if (IIT_exists_with_divno_typed_signed(splicesites_iit,splicesites_divint_crosstable[chrnum],
123 chrhigh-rightgenomepos,chrhigh-rightgenomepos+1U,
124 acceptor_typeint,/*sign*/-1) == true) {
125 rightdi = RIGHT_AG;
126 } else {
127 return NONINTRON;
128 }
129 }
130 } else if (cdna_direction < 0) {
131 if (watsonp) {
132 if (IIT_exists_with_divno_typed_signed(splicesites_iit,splicesites_divint_crosstable[chrnum],
133 chroffset+rightgenomepos,chroffset+rightgenomepos+1U,
134 donor_typeint,/*sign*/-1) == true) {
135 rightdi = RIGHT_AC;
136 } else {
137 return NONINTRON;
138 }
139 } else {
140 if (IIT_exists_with_divno_typed_signed(splicesites_iit,splicesites_divint_crosstable[chrnum],
141 chrhigh-rightgenomepos,chrhigh-rightgenomepos+1U,
142 donor_typeint,/*sign*/-1) == true) {
143 rightdi = RIGHT_AC;
144 } else {
145 return NONINTRON;
146 }
147
148 }
149 #endif
150
151 } else {
152 return NONINTRON;
153 }
154
155
156 if ((introntype = leftdi & rightdi) == 0x00) {
157 return NONINTRON;
158 } else if (cdna_direction > 0) {
159 if (introntype < 0x08) {
160 return NONINTRON;
161 } else {
162 return introntype;
163 }
164 } else if (cdna_direction < 0) {
165 if (introntype > 0x04) {
166 return NONINTRON;
167 } else {
168 return introntype;
169 }
170 } else {
171 /* Should happen only from Stage3_merge_local_splice and Indel_resolve_middle_deletion */
172 /* return NONINTRON; */
173 return introntype; /* Needed for guess */
174 }
175 }
176
177
178 int
Intron_sensedir(int introntype)179 Intron_sensedir (int introntype) {
180 switch (introntype) {
181 case GTAG_FWD: case GCAG_FWD: case ATAC_FWD: return SENSE_FORWARD;
182 #ifndef PMAP
183 case GTAG_REV: case GCAG_REV: case ATAC_REV: return SENSE_ANTI;
184 #endif
185 default: return SENSE_NULL;
186 }
187 }
188
189 int
Intron_canonical_sensedir(int introntype)190 Intron_canonical_sensedir (int introntype) {
191 switch (introntype) {
192 case GTAG_FWD: case GCAG_FWD: return SENSE_FORWARD;
193 #ifndef PMAP
194 case GTAG_REV: case GCAG_REV: return SENSE_ANTI;
195 #endif
196 default: return SENSE_NULL;
197 }
198 }
199
200
201 int
Intron_level(int introntype)202 Intron_level (int introntype) {
203 switch (introntype) {
204 case GTAG_FWD: return 3;
205 case GCAG_FWD: return 2;
206 case ATAC_FWD: return 1;
207 #ifndef PMAP
208 case GTAG_REV: return 3;
209 case GCAG_REV: return 2;
210 case ATAC_REV: return 1;
211 #endif
212 default: return 0;
213 }
214 }
215
216 char *
Intron_type_string(int introntype)217 Intron_type_string (int introntype) {
218 switch (introntype) {
219 case GTAG_FWD: return "GT-AG, fwd";
220 case GCAG_FWD: return "GC-AG, fwd";
221 case ATAC_FWD: return "AT-AC, fwd";
222 #ifndef PMAP
223 case GTAG_REV: return "GT-AG, rev";
224 case GCAG_REV: return "GC-AG, rev";
225 case ATAC_REV: return "AT-AC, rev";
226 #endif
227 default: return "nonintron";
228 }
229 }
230
231 char *
Intron_left_dinucl_string(int dinucl)232 Intron_left_dinucl_string (int dinucl) {
233 switch (dinucl) {
234 case LEFT_GT: return "GT-";
235 case LEFT_GC: return "GC-";
236 case LEFT_AT: return "AT-";
237 #ifndef PMAP
238 case LEFT_CT: return "CT-";
239 #endif
240 default: return "XX-";
241 }
242 }
243
244 char *
Intron_right_dinucl_string(int dinucl)245 Intron_right_dinucl_string (int dinucl) {
246 switch (dinucl) {
247 case RIGHT_AG: return "-AG";
248 case RIGHT_AC: return "-AC";
249 #ifndef PMAP
250 case RIGHT_GC: return "-GC";
251 case RIGHT_AT: return "-AT";
252 #endif
253 default: return "-XX";
254 }
255 }
256
257
258 bool
Intron_canonical_fwd_p(char donor1,char donor2,char acceptor2,char acceptor1)259 Intron_canonical_fwd_p (char donor1, char donor2, char acceptor2, char acceptor1) {
260 if (donor1 == 'G' && donor2 == 'T' &&
261 acceptor2 == 'A' && acceptor1 == 'G') {
262 return true;
263 } else {
264 return false;
265 }
266 }
267
268 bool
Intron_canonical_rev_p(char donor1,char donor2,char acceptor2,char acceptor1)269 Intron_canonical_rev_p (char donor1, char donor2, char acceptor2, char acceptor1) {
270 if (donor1 == 'C' && donor2 == 'T' &&
271 acceptor2 == 'A' && acceptor1 == 'C') {
272 return true;
273 } else {
274 return false;
275 }
276 }
277
278 bool
Intron_gcag_fwd_p(char donor1,char donor2,char acceptor2,char acceptor1)279 Intron_gcag_fwd_p (char donor1, char donor2, char acceptor2, char acceptor1) {
280 if (donor1 == 'G' && donor2 == 'C' &&
281 acceptor2 == 'A' && acceptor1 == 'G') {
282 return true;
283 } else {
284 return false;
285 }
286 }
287
288 bool
Intron_atac_fwd_p(char donor1,char donor2,char acceptor2,char acceptor1)289 Intron_atac_fwd_p (char donor1, char donor2, char acceptor2, char acceptor1) {
290 if (donor1 == 'A' && donor2 == 'T' &&
291 acceptor2 == 'A' && acceptor1 == 'C') {
292 return true;
293 } else {
294 return false;
295 }
296 }
297
298 bool
Intron_gcag_rev_p(char donor1,char donor2,char acceptor2,char acceptor1)299 Intron_gcag_rev_p (char donor1, char donor2, char acceptor2, char acceptor1) {
300 if (donor1 == 'C' && donor2 == 'T' &&
301 acceptor2 == 'G' && acceptor1 == 'C') {
302 return true;
303 } else {
304 return false;
305 }
306 }
307
308 bool
Intron_atac_rev_p(char donor1,char donor2,char acceptor2,char acceptor1)309 Intron_atac_rev_p (char donor1, char donor2, char acceptor2, char acceptor1) {
310 if (donor1 == 'G' && donor2 == 'T' &&
311 acceptor2 == 'A' && acceptor1 == 'T') {
312 return true;
313 } else {
314 return false;
315 }
316 }
317
318