1 static char rcsid[] = "$Id: junction.c 222800 2020-06-03 21:56:52Z twu $";
2 #ifdef HAVE_CONFIG_H
3 #include <config.h>
4 #endif
5
6 #include "junction.h"
7 #include "mem.h"
8 #include "assert.h"
9 #include "complement.h"
10 #include "maxent_hr.h"
11 #include "sense.h"
12
13
14 #define MIN_INTRONLEN 30
15
16
17 #ifdef DEBUG
18 #define debug(x) x
19 #else
20 #define debug(x)
21 #endif
22
23 /* Debugging procedures */
24 #ifdef DEBUG1
25 #define debug1(x) x
26 #else
27 #define debug1(x)
28 #endif
29
30
31
32 #define T Junction_T
33 struct T {
34 Junctiontype_T type;
35 int nindels; /* Should be positive */
36 Univcoord_T deletionpos;
37
38 Chrpos_T splice_distance;
39 int sensedir;
40 double donor_prob;
41 double acceptor_prob;
42 };
43
44
45 #ifdef DEBUG1
46 void
Junction_print(T this)47 Junction_print (T this) {
48 if (this == NULL) {
49 printf("No junction\n");
50 } else if (this->type == INS_JUNCTION) {
51 printf("Insertion of %d\n",this->nindels);
52 } else if (this->type == DEL_JUNCTION) {
53 printf("Deletion of %d at %llu\n",this->nindels,(unsigned long long) this->deletionpos);
54 } else if (this->type == SPLICE_JUNCTION) {
55 if (this->splice_distance == 0) {
56 printf("Splice ambiguous with sense %d, prob %f and %f\n",
57 this->sensedir,this->donor_prob,this->acceptor_prob);
58 } else {
59 printf("Splice with sense %d of %u, prob %f and %f\n",
60 this->sensedir,this->splice_distance,this->donor_prob,this->acceptor_prob);
61 }
62 }
63 return;
64 }
65 #endif
66
67 #ifdef DEBUG1
68 void
Junction_print_list(List_T list)69 Junction_print_list (List_T list) {
70 T this;
71 List_T p;
72
73 for (p = list; p != NULL; p = List_next(p)) {
74 this = (T) List_head(p);
75 if (this == NULL) {
76 printf("None,");
77 } else if (this->type == INS_JUNCTION) {
78 printf("Ins:%d,",this->nindels);
79 } else if (this->type == DEL_JUNCTION) {
80 printf("Del:%d,",this->nindels);
81 } else if (this->type == SPLICE_JUNCTION) {
82 if (this->splice_distance == 0) {
83 printf("Amb:%f-%f,",this->donor_prob,this->acceptor_prob);
84 } else {
85 printf("Splice:%u,",this->splice_distance);
86 }
87 }
88 }
89
90 return;
91 }
92 #endif
93
94 void
Junction_free(T * old)95 Junction_free (T *old) {
96 FREE(*old);
97 return;
98 }
99
100 void
Junction_list_gc(List_T * list)101 Junction_list_gc (List_T *list) {
102 List_T p;
103 T old;
104
105 for (p = *list; p != NULL; p = List_next(p)) {
106 old = (T) List_head(p);
107 Junction_free(&old);
108 }
109 /* List_free(&(*list)); -- Allocated by Listpool_push */
110 return;
111 }
112
113 T
Junction_new_insertion(int nindels)114 Junction_new_insertion (int nindels) {
115 T new = (T) MALLOC(sizeof(*new));
116
117 assert(nindels > 0);
118
119 new->type = INS_JUNCTION;
120 new->nindels = nindels;
121 new->deletionpos = 0;
122
123 new->splice_distance = 0;
124 new->sensedir = 0;
125 new->donor_prob = 0.0;
126 new->acceptor_prob = 0.0;
127
128 return new;
129 }
130
131 T
Junction_new_deletion(int nindels,Univcoord_T deletionpos)132 Junction_new_deletion (int nindels, Univcoord_T deletionpos) {
133 T new = (T) MALLOC(sizeof(*new));
134
135 assert(nindels > 0);
136
137 new->type = DEL_JUNCTION;
138 new->nindels = nindels;
139 new->deletionpos = deletionpos;
140
141 new->splice_distance = 0;
142 new->sensedir = 0;
143 new->donor_prob = 0.0;
144 new->acceptor_prob = 0.0;
145
146 return new;
147 }
148
149 T
Junction_new_splice(Chrpos_T splice_distance,int sensedir,double donor_prob,double acceptor_prob)150 Junction_new_splice (Chrpos_T splice_distance, int sensedir, double donor_prob, double acceptor_prob) {
151 T new = (T) MALLOC(sizeof(*new));
152
153 /* A zero splice distance here means a distant splice */
154 assert((int) splice_distance >= 0);
155
156 new->type = SPLICE_JUNCTION;
157 new->nindels = 0;
158 new->deletionpos = 0;
159
160 new->splice_distance = splice_distance;
161 new->sensedir = sensedir;
162 new->donor_prob = donor_prob;
163 new->acceptor_prob = acceptor_prob;
164
165 return new;
166 }
167
168
169 T
Junction_new_ambig_splice(int sensedir,double donor_prob,double acceptor_prob)170 Junction_new_ambig_splice (int sensedir, double donor_prob, double acceptor_prob) {
171 T new = (T) MALLOC(sizeof(*new));
172
173 new->type = SPLICE_JUNCTION;
174 new->nindels = 0;
175 new->deletionpos = 0;
176
177 new->splice_distance = 0; /* A zero splice distance is created for ambiguous splices */
178 new->sensedir = sensedir;
179 new->donor_prob = donor_prob;
180 new->acceptor_prob = acceptor_prob;
181
182 return new;
183 }
184
185
186 T
Junction_new_chimera(double donor_prob,double acceptor_prob)187 Junction_new_chimera (double donor_prob, double acceptor_prob) {
188 T new = (T) MALLOC(sizeof(*new));
189
190 new->type = CHIMERA_JUNCTION;
191 new->nindels = 0;
192 new->deletionpos = 0;
193
194 new->splice_distance = 0;
195 new->sensedir = 0;
196 new->donor_prob = donor_prob;
197 new->acceptor_prob = acceptor_prob;
198
199 return new;
200 }
201
202
203 T
Junction_new_generic(Univcoord_T left1,Univcoord_T left2,int querypos1,int querypos2,Univcoord_T chroffset,bool plusp,int sensedir)204 Junction_new_generic (Univcoord_T left1, Univcoord_T left2, int querypos1, int querypos2,
205 Univcoord_T chroffset, bool plusp, int sensedir) {
206 int nindels;
207 double donor_prob, acceptor_prob;
208 Chrpos_T splice_distance;
209
210 debug(printf("Entered Junction_new_generic with left %u, querypos %d to left %u, querypos %d, plusp %d, sensedir %d\n",
211 left1,querypos1,left2,querypos2,plusp,sensedir));
212
213 if (left1 + querypos1 > left2 + querypos2) {
214 nindels = (left1 + querypos1) - (left2 + querypos2);
215 debug(printf("Returning an insertion with %d indels\n",nindels));
216 return Junction_new_insertion(nindels);
217
218 } else if (left1 + querypos1 == left2 + querypos2) {
219 debug(printf("Returning NULL\n"));
220 return (Junction_T) NULL;
221
222 } else if ((nindels = (left2 + querypos2) - (left1 + querypos1)) < MIN_INTRONLEN) {
223 debug(printf("Returning a deletion with %d indels\n",nindels));
224 return Junction_new_deletion(nindels,/*deletionpos*/left1+querypos1);
225
226 } else {
227 splice_distance = left2 - left1;
228 if (sensedir == SENSE_FORWARD) {
229 if (plusp == true) {
230 donor_prob = Maxent_hr_donor_prob(left1 + querypos1,chroffset);
231 acceptor_prob = Maxent_hr_acceptor_prob(left2 + querypos2,chroffset);
232 debug(printf("Returning a splice with prob %f + %f\n",donor_prob,acceptor_prob));
233 return Junction_new_splice(splice_distance,SENSE_FORWARD,donor_prob,acceptor_prob);
234 } else {
235 donor_prob = Maxent_hr_antidonor_prob(left2 + querypos2,chroffset);
236 acceptor_prob = Maxent_hr_antiacceptor_prob(left1 + querypos1,chroffset);
237 debug(printf("Returning a splice with prob %f + %f\n",donor_prob,acceptor_prob));
238 return Junction_new_splice(splice_distance,SENSE_FORWARD,donor_prob,acceptor_prob);
239 }
240
241 } else {
242 if (plusp == true) {
243 donor_prob = Maxent_hr_antidonor_prob(left2 + querypos2,chroffset);
244 acceptor_prob = Maxent_hr_antiacceptor_prob(left1 + querypos1,chroffset);
245 debug(printf("Returning a splice with prob %f + %f\n",donor_prob,acceptor_prob));
246 return Junction_new_splice(splice_distance,SENSE_ANTI,donor_prob,acceptor_prob);
247 } else {
248 donor_prob = Maxent_hr_donor_prob(left1 + querypos1,chroffset);
249 acceptor_prob = Maxent_hr_acceptor_prob(left2 + querypos2,chroffset);
250 debug(printf("Returning a splice with prob %f + %f\n",donor_prob,acceptor_prob));
251 return Junction_new_splice(splice_distance,SENSE_ANTI,donor_prob,acceptor_prob);
252 }
253 }
254 }
255 }
256
257
258 T
Junction_copy(T old)259 Junction_copy (T old) {
260 T new = (T) MALLOC(sizeof(*new));
261
262 new->type = old->type;
263 new->nindels = old->nindels;
264 new->deletionpos = old->deletionpos;
265
266 new->splice_distance = old->splice_distance;
267 new->sensedir = old->sensedir;
268 new->donor_prob = old->donor_prob;
269 new->acceptor_prob = old->acceptor_prob;
270
271 return new;
272 }
273
274
275 List_T
Junction_copy_list(List_T old,Listpool_T listpool)276 Junction_copy_list (List_T old, Listpool_T listpool) {
277 List_T new = NULL, p;
278
279 for (p = old; p != NULL; p = List_next(p)) {
280 new = Listpool_push(new,listpool,(void *) Junction_copy((T) List_head(p)));
281 }
282 return List_reverse(new);
283 }
284
285
286 Junctiontype_T
Junction_type(T this)287 Junction_type (T this) {
288 return this->type;
289 }
290
291 char *
Junction_typestring(T this)292 Junction_typestring (T this) {
293 switch (this->type) {
294 case NO_JUNCTION: return "None";
295 case INS_JUNCTION: return "Insertion";
296 case DEL_JUNCTION: return "Deletion";
297 case SPLICE_JUNCTION: return "Splice";
298 case CHIMERA_JUNCTION: return "Chimera";
299 case AMB_JUNCTION: return "Amb";
300 case END_JUNCTION: return "End";
301 }
302 return (char *) NULL;
303 }
304
305 double
Junction_prob(T this)306 Junction_prob (T this) {
307 return this->donor_prob + this->acceptor_prob;
308 }
309
310 int
Junction_sensedir(T this)311 Junction_sensedir (T this) {
312 return this->sensedir;
313 }
314
315 double
Junction_donor_prob(T this)316 Junction_donor_prob (T this) {
317 return this->donor_prob;
318 }
319
320 double
Junction_acceptor_prob(T this)321 Junction_acceptor_prob (T this) {
322 return this->acceptor_prob;
323 }
324
325 double
Junction_splice_score(T this)326 Junction_splice_score (T this) {
327 return this->donor_prob + this->acceptor_prob;
328 }
329
330 int
Junction_nindels(T this)331 Junction_nindels (T this) {
332 return this->nindels;
333 }
334
335 int
Junction_adj(T this)336 Junction_adj (T this) {
337 if (this->type == DEL_JUNCTION) {
338 return +this->nindels;
339 } else if (this->type == INS_JUNCTION) {
340 return -this->nindels;
341 } else {
342 return 0;
343 }
344 }
345
346 int
Junction_ninserts(T this)347 Junction_ninserts (T this) {
348 if (this->type == INS_JUNCTION) {
349 return this->nindels;
350 } else {
351 return 0;
352 }
353 }
354
355 int
Junction_total_ninserts(List_T list)356 Junction_total_ninserts (List_T list) {
357 int ninserts = 0;
358 T this;
359 List_T p;
360
361 for (p = list; p != NULL; p = List_next(p)) {
362 this = (T) List_head(p);
363 if (this->type == INS_JUNCTION) {
364 ninserts += this->nindels;
365 }
366 }
367
368 return ninserts;
369 }
370
371
372
373 static char complCode[128] = COMPLEMENT_LC;
374
375 static char *
make_complement_inplace(char * sequence,unsigned int length)376 make_complement_inplace (char *sequence, unsigned int length) {
377 char temp;
378 unsigned int i, j;
379
380 for (i = 0, j = length-1; i < length/2; i++, j--) {
381 temp = complCode[(int) sequence[i]];
382 sequence[i] = complCode[(int) sequence[j]];
383 sequence[j] = temp;
384 }
385 if (i == j) {
386 sequence[i] = complCode[(int) sequence[i]];
387 }
388
389 return sequence;
390 }
391
392
393 Univcoord_T
Junction_deletionpos(T this)394 Junction_deletionpos (T this) {
395 return this->deletionpos;
396 }
397
398 void
Junction_set_deletionpos(T this,Univcoord_T deletionpos)399 Junction_set_deletionpos (T this, Univcoord_T deletionpos) {
400 this->deletionpos = deletionpos;
401 return;
402 }
403
404 char *
Junction_deletion_string(T this,Genome_T genome,bool plusp)405 Junction_deletion_string (T this, Genome_T genome, bool plusp) {
406 char *deletion_string;
407
408 /* printf("Entered Junction_deletion_string with plusp %d\n",plusp); */
409 /* printf("deletionpos = %u\n",this->deletionpos); */
410
411 deletion_string = (char *) MALLOC((this->nindels+1)*sizeof(char));
412 Genome_fill_buffer_simple(genome,this->deletionpos,this->nindels,deletion_string);
413 if (plusp == false) {
414 make_complement_inplace(deletion_string,this->nindels);
415 }
416
417 /* printf("string = %s\n",deletion_string); */
418 return deletion_string;
419 }
420
421
422 Chrpos_T
Junction_splice_distance(T this)423 Junction_splice_distance (T this) {
424 return this->splice_distance;
425 }
426
427 void
Junction_set_unambiguous(T this,Chrpos_T distance,double donor_prob,double acceptor_prob)428 Junction_set_unambiguous (T this, Chrpos_T distance, double donor_prob, double acceptor_prob) {
429 assert(distance != 0);
430 this->splice_distance = distance;
431 this->donor_prob = donor_prob;
432 this->acceptor_prob = acceptor_prob;
433
434 return;
435 }
436
437 void
Junction_set_ambiguous(T this)438 Junction_set_ambiguous (T this) {
439 this->splice_distance = 0;
440
441 return;
442 }
443
444
445