1 static char rcsid[] = "$Id: junction.c 222800 2020-06-03 21:56:52Z twu $";
2 #ifdef HAVE_CONFIG_H
3 #include <config.h>
4 #endif
5 
6 #include "junction.h"
7 #include "mem.h"
8 #include "assert.h"
9 #include "complement.h"
10 #include "maxent_hr.h"
11 #include "sense.h"
12 
13 
14 #define MIN_INTRONLEN 30
15 
16 
17 #ifdef DEBUG
18 #define debug(x) x
19 #else
20 #define debug(x)
21 #endif
22 
23 /* Debugging procedures */
24 #ifdef DEBUG1
25 #define debug1(x) x
26 #else
27 #define debug1(x)
28 #endif
29 
30 
31 
32 #define T Junction_T
33 struct T {
34   Junctiontype_T type;
35   int nindels;			/* Should be positive */
36   Univcoord_T deletionpos;
37 
38   Chrpos_T splice_distance;
39   int sensedir;
40   double donor_prob;
41   double acceptor_prob;
42 };
43 
44 
45 #ifdef DEBUG1
46 void
Junction_print(T this)47 Junction_print (T this) {
48   if (this == NULL) {
49     printf("No junction\n");
50   } else if (this->type == INS_JUNCTION) {
51     printf("Insertion of %d\n",this->nindels);
52   } else if (this->type == DEL_JUNCTION) {
53     printf("Deletion of %d at %llu\n",this->nindels,(unsigned long long) this->deletionpos);
54   } else if (this->type == SPLICE_JUNCTION) {
55     if (this->splice_distance == 0) {
56       printf("Splice ambiguous with sense %d, prob %f and %f\n",
57 	     this->sensedir,this->donor_prob,this->acceptor_prob);
58     } else {
59       printf("Splice with sense %d of %u, prob %f and %f\n",
60 	     this->sensedir,this->splice_distance,this->donor_prob,this->acceptor_prob);
61     }
62   }
63   return;
64 }
65 #endif
66 
67 #ifdef DEBUG1
68 void
Junction_print_list(List_T list)69 Junction_print_list (List_T list) {
70   T this;
71   List_T p;
72 
73   for (p = list; p != NULL; p = List_next(p)) {
74     this = (T) List_head(p);
75     if (this == NULL) {
76       printf("None,");
77     } else if (this->type == INS_JUNCTION) {
78       printf("Ins:%d,",this->nindels);
79     } else if (this->type == DEL_JUNCTION) {
80       printf("Del:%d,",this->nindels);
81     } else if (this->type == SPLICE_JUNCTION) {
82       if (this->splice_distance == 0) {
83 	printf("Amb:%f-%f,",this->donor_prob,this->acceptor_prob);
84       } else {
85 	printf("Splice:%u,",this->splice_distance);
86       }
87     }
88   }
89 
90   return;
91 }
92 #endif
93 
94 void
Junction_free(T * old)95 Junction_free (T *old) {
96   FREE(*old);
97   return;
98 }
99 
100 void
Junction_list_gc(List_T * list)101 Junction_list_gc (List_T *list) {
102   List_T p;
103   T old;
104 
105   for (p = *list; p != NULL; p = List_next(p)) {
106     old = (T) List_head(p);
107     Junction_free(&old);
108   }
109   /* List_free(&(*list)); -- Allocated by Listpool_push */
110   return;
111 }
112 
113 T
Junction_new_insertion(int nindels)114 Junction_new_insertion (int nindels) {
115   T new = (T) MALLOC(sizeof(*new));
116 
117   assert(nindels > 0);
118 
119   new->type = INS_JUNCTION;
120   new->nindels = nindels;
121   new->deletionpos = 0;
122 
123   new->splice_distance = 0;
124   new->sensedir = 0;
125   new->donor_prob = 0.0;
126   new->acceptor_prob = 0.0;
127 
128   return new;
129 }
130 
131 T
Junction_new_deletion(int nindels,Univcoord_T deletionpos)132 Junction_new_deletion (int nindels, Univcoord_T deletionpos) {
133   T new = (T) MALLOC(sizeof(*new));
134 
135   assert(nindels > 0);
136 
137   new->type = DEL_JUNCTION;
138   new->nindels = nindels;
139   new->deletionpos = deletionpos;
140 
141   new->splice_distance = 0;
142   new->sensedir = 0;
143   new->donor_prob = 0.0;
144   new->acceptor_prob = 0.0;
145 
146   return new;
147 }
148 
149 T
Junction_new_splice(Chrpos_T splice_distance,int sensedir,double donor_prob,double acceptor_prob)150 Junction_new_splice (Chrpos_T splice_distance, int sensedir, double donor_prob, double acceptor_prob) {
151   T new = (T) MALLOC(sizeof(*new));
152 
153   /* A zero splice distance here means a distant splice */
154   assert((int) splice_distance >= 0);
155 
156   new->type = SPLICE_JUNCTION;
157   new->nindels = 0;
158   new->deletionpos = 0;
159 
160   new->splice_distance = splice_distance;
161   new->sensedir = sensedir;
162   new->donor_prob = donor_prob;
163   new->acceptor_prob = acceptor_prob;
164 
165   return new;
166 }
167 
168 
169 T
Junction_new_ambig_splice(int sensedir,double donor_prob,double acceptor_prob)170 Junction_new_ambig_splice (int sensedir, double donor_prob, double acceptor_prob) {
171   T new = (T) MALLOC(sizeof(*new));
172 
173   new->type = SPLICE_JUNCTION;
174   new->nindels = 0;
175   new->deletionpos = 0;
176 
177   new->splice_distance = 0;    /* A zero splice distance is created for ambiguous splices */
178   new->sensedir = sensedir;
179   new->donor_prob = donor_prob;
180   new->acceptor_prob = acceptor_prob;
181 
182   return new;
183 }
184 
185 
186 T
Junction_new_chimera(double donor_prob,double acceptor_prob)187 Junction_new_chimera (double donor_prob, double acceptor_prob) {
188   T new = (T) MALLOC(sizeof(*new));
189 
190   new->type = CHIMERA_JUNCTION;
191   new->nindels = 0;
192   new->deletionpos = 0;
193 
194   new->splice_distance = 0;
195   new->sensedir = 0;
196   new->donor_prob = donor_prob;
197   new->acceptor_prob = acceptor_prob;
198 
199   return new;
200 }
201 
202 
203 T
Junction_new_generic(Univcoord_T left1,Univcoord_T left2,int querypos1,int querypos2,Univcoord_T chroffset,bool plusp,int sensedir)204 Junction_new_generic (Univcoord_T left1, Univcoord_T left2, int querypos1, int querypos2,
205 		      Univcoord_T chroffset, bool plusp, int sensedir) {
206   int nindels;
207   double donor_prob, acceptor_prob;
208   Chrpos_T splice_distance;
209 
210   debug(printf("Entered Junction_new_generic with left %u, querypos %d to left %u, querypos %d, plusp %d, sensedir %d\n",
211 	       left1,querypos1,left2,querypos2,plusp,sensedir));
212 
213   if (left1 + querypos1 > left2 + querypos2) {
214     nindels = (left1 + querypos1) - (left2 + querypos2);
215     debug(printf("Returning an insertion with %d indels\n",nindels));
216     return Junction_new_insertion(nindels);
217 
218   } else if (left1 + querypos1 == left2 + querypos2) {
219     debug(printf("Returning NULL\n"));
220     return (Junction_T) NULL;
221 
222   } else if ((nindels = (left2 + querypos2) - (left1 + querypos1)) < MIN_INTRONLEN) {
223     debug(printf("Returning a deletion with %d indels\n",nindels));
224     return Junction_new_deletion(nindels,/*deletionpos*/left1+querypos1);
225 
226   } else {
227     splice_distance = left2 - left1;
228     if (sensedir == SENSE_FORWARD) {
229       if (plusp == true) {
230 	donor_prob = Maxent_hr_donor_prob(left1 + querypos1,chroffset);
231 	acceptor_prob = Maxent_hr_acceptor_prob(left2 + querypos2,chroffset);
232 	debug(printf("Returning a splice with prob %f + %f\n",donor_prob,acceptor_prob));
233 	return Junction_new_splice(splice_distance,SENSE_FORWARD,donor_prob,acceptor_prob);
234       } else {
235 	donor_prob = Maxent_hr_antidonor_prob(left2 + querypos2,chroffset);
236 	acceptor_prob = Maxent_hr_antiacceptor_prob(left1 + querypos1,chroffset);
237 	debug(printf("Returning a splice with prob %f + %f\n",donor_prob,acceptor_prob));
238 	return Junction_new_splice(splice_distance,SENSE_FORWARD,donor_prob,acceptor_prob);
239       }
240 
241     } else {
242       if (plusp == true) {
243 	donor_prob = Maxent_hr_antidonor_prob(left2 + querypos2,chroffset);
244 	acceptor_prob = Maxent_hr_antiacceptor_prob(left1 + querypos1,chroffset);
245 	debug(printf("Returning a splice with prob %f + %f\n",donor_prob,acceptor_prob));
246 	return Junction_new_splice(splice_distance,SENSE_ANTI,donor_prob,acceptor_prob);
247       } else {
248 	donor_prob = Maxent_hr_donor_prob(left1 + querypos1,chroffset);
249 	acceptor_prob = Maxent_hr_acceptor_prob(left2 + querypos2,chroffset);
250 	debug(printf("Returning a splice with prob %f + %f\n",donor_prob,acceptor_prob));
251 	return Junction_new_splice(splice_distance,SENSE_ANTI,donor_prob,acceptor_prob);
252       }
253     }
254   }
255 }
256 
257 
258 T
Junction_copy(T old)259 Junction_copy (T old) {
260   T new = (T) MALLOC(sizeof(*new));
261 
262   new->type = old->type;
263   new->nindels = old->nindels;
264   new->deletionpos = old->deletionpos;
265 
266   new->splice_distance = old->splice_distance;
267   new->sensedir = old->sensedir;
268   new->donor_prob = old->donor_prob;
269   new->acceptor_prob = old->acceptor_prob;
270 
271   return new;
272 }
273 
274 
275 List_T
Junction_copy_list(List_T old,Listpool_T listpool)276 Junction_copy_list (List_T old, Listpool_T listpool) {
277   List_T new = NULL, p;
278 
279   for (p = old; p != NULL; p = List_next(p)) {
280     new = Listpool_push(new,listpool,(void *) Junction_copy((T) List_head(p)));
281   }
282   return List_reverse(new);
283 }
284 
285 
286 Junctiontype_T
Junction_type(T this)287 Junction_type (T this) {
288   return this->type;
289 }
290 
291 char *
Junction_typestring(T this)292 Junction_typestring (T this) {
293   switch (this->type) {
294   case NO_JUNCTION: return "None";
295   case INS_JUNCTION: return "Insertion";
296   case DEL_JUNCTION: return "Deletion";
297   case SPLICE_JUNCTION: return "Splice";
298   case CHIMERA_JUNCTION: return "Chimera";
299   case AMB_JUNCTION: return "Amb";
300   case END_JUNCTION: return "End";
301   }
302   return (char *) NULL;
303 }
304 
305 double
Junction_prob(T this)306 Junction_prob (T this) {
307   return this->donor_prob + this->acceptor_prob;
308 }
309 
310 int
Junction_sensedir(T this)311 Junction_sensedir (T this) {
312   return this->sensedir;
313 }
314 
315 double
Junction_donor_prob(T this)316 Junction_donor_prob (T this) {
317   return this->donor_prob;
318 }
319 
320 double
Junction_acceptor_prob(T this)321 Junction_acceptor_prob (T this) {
322   return this->acceptor_prob;
323 }
324 
325 double
Junction_splice_score(T this)326 Junction_splice_score (T this) {
327   return this->donor_prob + this->acceptor_prob;
328 }
329 
330 int
Junction_nindels(T this)331 Junction_nindels (T this) {
332   return this->nindels;
333 }
334 
335 int
Junction_adj(T this)336 Junction_adj (T this) {
337   if (this->type == DEL_JUNCTION) {
338     return +this->nindels;
339   } else if (this->type == INS_JUNCTION) {
340     return -this->nindels;
341   } else {
342     return 0;
343   }
344 }
345 
346 int
Junction_ninserts(T this)347 Junction_ninserts (T this) {
348   if (this->type == INS_JUNCTION) {
349     return this->nindels;
350   } else {
351     return 0;
352   }
353 }
354 
355 int
Junction_total_ninserts(List_T list)356 Junction_total_ninserts (List_T list) {
357   int ninserts = 0;
358   T this;
359   List_T p;
360 
361   for (p = list; p != NULL; p = List_next(p)) {
362     this = (T) List_head(p);
363     if (this->type == INS_JUNCTION) {
364       ninserts += this->nindels;
365     }
366   }
367 
368   return ninserts;
369 }
370 
371 
372 
373 static char complCode[128] = COMPLEMENT_LC;
374 
375 static char *
make_complement_inplace(char * sequence,unsigned int length)376 make_complement_inplace (char *sequence, unsigned int length) {
377   char temp;
378   unsigned int i, j;
379 
380   for (i = 0, j = length-1; i < length/2; i++, j--) {
381     temp = complCode[(int) sequence[i]];
382     sequence[i] = complCode[(int) sequence[j]];
383     sequence[j] = temp;
384   }
385   if (i == j) {
386     sequence[i] = complCode[(int) sequence[i]];
387   }
388 
389   return sequence;
390 }
391 
392 
393 Univcoord_T
Junction_deletionpos(T this)394 Junction_deletionpos (T this) {
395   return this->deletionpos;
396 }
397 
398 void
Junction_set_deletionpos(T this,Univcoord_T deletionpos)399 Junction_set_deletionpos (T this, Univcoord_T deletionpos) {
400   this->deletionpos = deletionpos;
401   return;
402 }
403 
404 char *
Junction_deletion_string(T this,Genome_T genome,bool plusp)405 Junction_deletion_string (T this, Genome_T genome, bool plusp) {
406   char *deletion_string;
407 
408   /* printf("Entered Junction_deletion_string with plusp %d\n",plusp); */
409   /* printf("deletionpos = %u\n",this->deletionpos); */
410 
411   deletion_string = (char *) MALLOC((this->nindels+1)*sizeof(char));
412   Genome_fill_buffer_simple(genome,this->deletionpos,this->nindels,deletion_string);
413   if (plusp == false) {
414     make_complement_inplace(deletion_string,this->nindels);
415   }
416 
417   /* printf("string = %s\n",deletion_string); */
418   return deletion_string;
419 }
420 
421 
422 Chrpos_T
Junction_splice_distance(T this)423 Junction_splice_distance (T this) {
424   return this->splice_distance;
425 }
426 
427 void
Junction_set_unambiguous(T this,Chrpos_T distance,double donor_prob,double acceptor_prob)428 Junction_set_unambiguous (T this, Chrpos_T distance, double donor_prob, double acceptor_prob) {
429   assert(distance != 0);
430   this->splice_distance = distance;
431   this->donor_prob = donor_prob;
432   this->acceptor_prob = acceptor_prob;
433 
434   return;
435 }
436 
437 void
Junction_set_ambiguous(T this)438 Junction_set_ambiguous (T this) {
439   this->splice_distance = 0;
440 
441   return;
442 }
443 
444 
445