1 static char rcsid[] = "$Id: intron.c 204388 2017-03-18 00:03:34Z twu $";
2 #ifdef HAVE_CONFIG_H
3 #include <config.h>
4 #endif
5 
6 #include "intron.h"
7 #include "sense.h"
8 #include <stdlib.h>		/* For abort() */
9 
10 
11 #ifdef DEBUG
12 #define debug(x) x
13 #else
14 #define debug(x)
15 #endif
16 
17 
18 int
19 Intron_type (char left1, char left2, char right2, char right1,
20 	     char left1_alt, char left2_alt, char right2_alt, char right1_alt,
21 	     int cdna_direction
22 #ifdef INTRON_HELP
23 	     , IIT_T splicesites_iit, int *splicesites_divint_crosstable,
24 	     int donor_typeint, int acceptor_typeint, Chrnum_T chrnum,
25 	     Chrpos_T leftgenomepos, Chrpos_T rightgenomepos,
26 	     Univcoord_T chroffset, Univcoord_T chrhigh, bool watsonp,
27 #endif
28 	     ) {
29   int introntype, leftdi, rightdi;
30 
31   if ((left1 == 'G' || left1_alt == 'G') && (left2 == 'T' || left2_alt == 'T')) {
32     leftdi = LEFT_GT;
33   } else if ((left1 == 'G' || left1_alt == 'G') && (left2 == 'C' || left2_alt == 'C')) {
34     leftdi = LEFT_GC;
35   } else if ((left1 == 'A' || left1_alt == 'A') && (left2 == 'T' || left2_alt == 'T')) {
36     leftdi = LEFT_AT;
37 #ifndef PMAP
38   } else if ((left1 == 'C' || left1_alt == 'A') && (left2 == 'T' || left2_alt == 'T')) {
39     leftdi = LEFT_CT;
40 #endif
41 
42 #ifdef INTRON_HELP
43     /* Not tested */
44   } else if (splicesites_iit == NULL) {
45     debug(printf("splicesites_iit is NULL\n"));
46     return NONINTRON;
47   } else if (cdna_direction > 0) {
48     if (watsonp) {
49       debug(printf("cdna_direction %d, watsonp %d, looking for donor at %u..%u, sign +1\n",
50 		   cdna_direction,watsonp,chroffset+leftgenomepos,chroffset+leftgenomepos+1U));
51       if (IIT_exists_with_divno_typed_signed(splicesites_iit,splicesites_divint_crosstable[chrnum],
52 					     chroffset+leftgenomepos,chroffset+leftgenomepos+1U,
53 					     donor_typeint,/*sign*/+1) == true) {
54 	leftdi = LEFT_GT;
55       } else {
56 	return NONINTRON;
57       }
58     } else {
59       debug(printf("cdna_direction %d, watsonp %d, looking for donor at %u..%u, sign -1\n",
60 		   cdna_direction,watsonp,chrhigh-leftgenomepos,chrhigh-leftgenomepos+1U));
61       if (IIT_exists_with_divno_typed_signed(splicesites_iit,splicesites_divint_crosstable[chrnum],
62 					     chrhigh-leftgenomepos,chrhigh-leftgenomepos+1U,
63 					     donor_typeint,/*sign*/-1) == true) {
64 	leftdi = LEFT_GT;
65       } else {
66 	return NONINTRON;
67       }
68     }
69   } else if (cdna_direction < 0) {
70     if (watsonp) {
71       debug(printf("cdna_direction %d, watsonp %d, looking for acceptor at %u..%u, sign -1\n",
72 		   cdna_direction,watsonp,chroffset+leftgenomepos,chroffset+leftgenomepos+1U));
73       if (IIT_exists_with_divno_typed_signed(splicesites_iit,splicesites_divint_crosstable[chrnum],
74 					     chroffset+leftgenomepos,chroffset+leftgenomepos+1U,
75 					     acceptor_typeint,/*sign*/-1) == true) {
76 	leftdi = LEFT_CT;
77       } else {
78 	return NONINTRON;
79       }
80     } else {
81       debug(printf("cdna_direction %d, watsonp %d, looking for acceptor at %u..%u, sign +1\n",
82 		   cdna_direction,watsonp,chrhigh-leftgenomepos,chrhigh-leftgenomepos+1U));
83       if (IIT_exists_with_divno_typed_signed(splicesites_iit,splicesites_divint_crosstable[chrnum],
84 					     chrhigh-leftgenomepos,chrhigh-leftgenomepos+1U,
85 					     acceptor_typeint,/*sign*/+1) == true) {
86 	leftdi = LEFT_CT;
87       } else {
88 	return NONINTRON;
89       }
90     }
91 #endif
92 
93   } else {
94     return NONINTRON;
95   }
96 
97   if ((right2 == 'A' || right2_alt == 'A') && (right1 == 'G' || right1_alt == 'G')) {
98     rightdi = RIGHT_AG;
99   } else if ((right2 == 'A' || right2_alt == 'A') && (right1 == 'C' || right1_alt == 'C')) {
100     rightdi = RIGHT_AC;
101 #ifndef PMAP
102   } else if ((right2 == 'G' || right2_alt == 'G') && (right1 == 'C' || right1_alt == 'C')) {
103     rightdi = RIGHT_GC;
104   } else if ((right2 == 'A' || right2_alt == 'A') && (right1 == 'T' || right1_alt == 'T')) {
105     rightdi = RIGHT_AT;
106 #endif
107 
108 #ifdef INTRON_HELP
109     /* Not tested */
110   } else if (splicesites_iit == NULL) {
111     return NONINTRON;
112   } else if (cdna_direction > 0) {
113     if (watsonp) {
114       if (IIT_exists_with_divno_typed_signed(splicesites_iit,splicesites_divint_crosstable[chrnum],
115 					     chroffset+rightgenomepos,chroffset+rightgenomepos+1U,
116 					     acceptor_typeint,/*sign*/+1) == true) {
117 	rightdi = RIGHT_AG;
118       } else {
119 	return NONINTRON;
120       }
121     } else {
122       if (IIT_exists_with_divno_typed_signed(splicesites_iit,splicesites_divint_crosstable[chrnum],
123 					     chrhigh-rightgenomepos,chrhigh-rightgenomepos+1U,
124 					     acceptor_typeint,/*sign*/-1) == true) {
125 	rightdi = RIGHT_AG;
126       } else {
127 	return NONINTRON;
128       }
129     }
130   } else if (cdna_direction < 0) {
131     if (watsonp) {
132       if (IIT_exists_with_divno_typed_signed(splicesites_iit,splicesites_divint_crosstable[chrnum],
133 					     chroffset+rightgenomepos,chroffset+rightgenomepos+1U,
134 					     donor_typeint,/*sign*/-1) == true) {
135 	rightdi = RIGHT_AC;
136       } else {
137 	return NONINTRON;
138       }
139     } else {
140       if (IIT_exists_with_divno_typed_signed(splicesites_iit,splicesites_divint_crosstable[chrnum],
141 					     chrhigh-rightgenomepos,chrhigh-rightgenomepos+1U,
142 					     donor_typeint,/*sign*/-1) == true) {
143 	rightdi = RIGHT_AC;
144       } else {
145 	return NONINTRON;
146       }
147 
148     }
149 #endif
150 
151   } else {
152     return NONINTRON;
153   }
154 
155 
156   if ((introntype = leftdi & rightdi) == 0x00) {
157     return NONINTRON;
158   } else if (cdna_direction > 0) {
159     if (introntype < 0x08) {
160       return NONINTRON;
161     } else {
162       return introntype;
163     }
164   } else if (cdna_direction < 0) {
165     if (introntype > 0x04) {
166       return NONINTRON;
167     } else {
168       return introntype;
169     }
170   } else {
171     /* Should happen only from Stage3_merge_local_splice and Indel_resolve_middle_deletion */
172     /* return NONINTRON; */
173     return introntype;		/* Needed for guess */
174   }
175 }
176 
177 
178 int
Intron_sensedir(int introntype)179 Intron_sensedir (int introntype) {
180   switch (introntype) {
181   case GTAG_FWD: case GCAG_FWD: case ATAC_FWD: return SENSE_FORWARD;
182 #ifndef PMAP
183   case GTAG_REV: case GCAG_REV: case ATAC_REV: return SENSE_ANTI;
184 #endif
185   default: return SENSE_NULL;
186   }
187 }
188 
189 int
Intron_canonical_sensedir(int introntype)190 Intron_canonical_sensedir (int introntype) {
191   switch (introntype) {
192   case GTAG_FWD: case GCAG_FWD: return SENSE_FORWARD;
193 #ifndef PMAP
194   case GTAG_REV: case GCAG_REV: return SENSE_ANTI;
195 #endif
196   default: return SENSE_NULL;
197   }
198 }
199 
200 
201 int
Intron_level(int introntype)202 Intron_level (int introntype) {
203   switch (introntype) {
204   case GTAG_FWD: return 3;
205   case GCAG_FWD: return 2;
206   case ATAC_FWD: return 1;
207 #ifndef PMAP
208   case GTAG_REV: return 3;
209   case GCAG_REV: return 2;
210   case ATAC_REV: return 1;
211 #endif
212   default: return 0;
213   }
214 }
215 
216 char *
Intron_type_string(int introntype)217 Intron_type_string (int introntype) {
218   switch (introntype) {
219   case GTAG_FWD: return "GT-AG, fwd";
220   case GCAG_FWD: return "GC-AG, fwd";
221   case ATAC_FWD: return "AT-AC, fwd";
222 #ifndef PMAP
223   case GTAG_REV: return "GT-AG, rev";
224   case GCAG_REV: return "GC-AG, rev";
225   case ATAC_REV: return "AT-AC, rev";
226 #endif
227   default: return "nonintron";
228   }
229 }
230 
231 char *
Intron_left_dinucl_string(int dinucl)232 Intron_left_dinucl_string (int dinucl) {
233   switch (dinucl) {
234   case LEFT_GT: return "GT-";
235   case LEFT_GC: return "GC-";
236   case LEFT_AT: return "AT-";
237 #ifndef PMAP
238   case LEFT_CT: return "CT-";
239 #endif
240   default: return "XX-";
241   }
242 }
243 
244 char *
Intron_right_dinucl_string(int dinucl)245 Intron_right_dinucl_string (int dinucl) {
246   switch (dinucl) {
247   case RIGHT_AG: return "-AG";
248   case RIGHT_AC: return "-AC";
249 #ifndef PMAP
250   case RIGHT_GC: return "-GC";
251   case RIGHT_AT: return "-AT";
252 #endif
253   default: return "-XX";
254   }
255 }
256 
257 
258 bool
Intron_canonical_fwd_p(char donor1,char donor2,char acceptor2,char acceptor1)259 Intron_canonical_fwd_p (char donor1, char donor2, char acceptor2, char acceptor1) {
260   if (donor1 == 'G' && donor2 == 'T' &&
261       acceptor2 == 'A' && acceptor1 == 'G') {
262     return true;
263   } else {
264     return false;
265   }
266 }
267 
268 bool
Intron_canonical_rev_p(char donor1,char donor2,char acceptor2,char acceptor1)269 Intron_canonical_rev_p (char donor1, char donor2, char acceptor2, char acceptor1) {
270   if (donor1 == 'C' && donor2 == 'T' &&
271       acceptor2 == 'A' && acceptor1 == 'C') {
272     return true;
273   } else {
274     return false;
275   }
276 }
277 
278 bool
Intron_gcag_fwd_p(char donor1,char donor2,char acceptor2,char acceptor1)279 Intron_gcag_fwd_p (char donor1, char donor2, char acceptor2, char acceptor1) {
280   if (donor1 == 'G' && donor2 == 'C' &&
281       acceptor2 == 'A' && acceptor1 == 'G') {
282     return true;
283   } else {
284     return false;
285   }
286 }
287 
288 bool
Intron_atac_fwd_p(char donor1,char donor2,char acceptor2,char acceptor1)289 Intron_atac_fwd_p (char donor1, char donor2, char acceptor2, char acceptor1) {
290   if (donor1 == 'A' && donor2 == 'T' &&
291       acceptor2 == 'A' && acceptor1 == 'C') {
292     return true;
293   } else {
294     return false;
295   }
296 }
297 
298 bool
Intron_gcag_rev_p(char donor1,char donor2,char acceptor2,char acceptor1)299 Intron_gcag_rev_p (char donor1, char donor2, char acceptor2, char acceptor1) {
300   if (donor1 == 'C' && donor2 == 'T' &&
301       acceptor2 == 'G' && acceptor1 == 'C') {
302     return true;
303   } else {
304     return false;
305   }
306 }
307 
308 bool
Intron_atac_rev_p(char donor1,char donor2,char acceptor2,char acceptor1)309 Intron_atac_rev_p (char donor1, char donor2, char acceptor2, char acceptor1) {
310   if (donor1 == 'G' && donor2 == 'T' &&
311       acceptor2 == 'A' && acceptor1 == 'T') {
312     return true;
313   } else {
314     return false;
315   }
316 }
317 
318