1 
2 /******************************************************************************
3  *
4  *  This file is part of canu, a software program that assembles whole-genome
5  *  sequencing reads into contigs.
6  *
7  *  This software is based on:
8  *    'Celera Assembler' r4587 (http://wgs-assembler.sourceforge.net)
9  *    the 'kmer package' r1994 (http://kmer.sourceforge.net)
10  *
11  *  Except as indicated otherwise, this is a 'United States Government Work',
12  *  and is released in the public domain.
13  *
14  *  File 'README.licenses' in the root directory of this distribution
15  *  contains full conditions and disclaimers.
16  */
17 
18 #include "overlapInCore.H"
19 
20 //  Output the overlap between strings  S_ID  and  T_ID  which
21 //  have lengths  S_Len  and  T_Len , respectively.
22 //  The overlap information is in  (* olap) .
23 //  S_Dir  indicates the orientation of  S .
24 //  T is always forward.
25 
26 void
Output_Overlap(uint32 S_ID,int S_Len,Direction_t S_Dir,uint32 T_ID,int T_Len,Olap_Info_t * olap,Work_Area_t * WA)27 Output_Overlap(uint32 S_ID, int S_Len, Direction_t S_Dir,
28                uint32 T_ID, int T_Len, Olap_Info_t *olap,
29                Work_Area_t *WA) {
30 
31   ovOverlap  *ovs = WA->overlaps + WA->overlapsLen++;
32 
33   //  Overlap is good for UTG only.
34 
35   ovs->dat.ovl.forUTG = true;
36   ovs->dat.ovl.forOBT = false;
37   ovs->dat.ovl.forDUP = false;
38 
39   //  Set the span of the alignment.
40 
41   ovs->dat.ovl.span  = (olap->s_hi - olap->s_lo);
42   ovs->dat.ovl.span += (olap->t_hi - olap->t_lo);
43   ovs->dat.ovl.span += (olap->delta_ct);
44   assert((ovs->dat.ovl.span % 2) == 0);
45   ovs->dat.ovl.span /= 2;
46 
47   assert (S_ID < T_ID);
48 
49   int32  S_Right_Hang = S_Len - olap->s_hi - 1;
50   int32  T_Right_Hang = T_Len - olap->t_hi - 1;
51 
52   bool Sleft;
53 
54   char    orient = 0;
55   int32   ahg    = 0;
56   int32   bhg    = 0;
57 
58   //fprintf(stderr, "S %d %d = %d len %d T %d %d = %d len %d delta_ct %d delta %d\n",
59   //        olap->s_lo, olap->s_hi, olap->s_hi - olap->s_lo, S_Len,
60   //        olap->t_lo, olap->t_hi, olap->t_hi - olap->t_lo, T_Len,
61   //        olap->delta_ct, olap->delta[0]);
62 
63   assert(olap->s_lo < olap->s_hi);
64   assert(olap->t_lo < olap->t_hi);
65 
66   //  Ensure this is a dovetail or contained overlap.
67   assert((olap->s_lo == 0)         || (olap->t_lo == 0));
68   assert((olap->s_hi == S_Len - 1) || (olap->t_hi == T_Len - 1));
69 
70 
71   if ((olap->s_lo > olap->t_lo) ||
72       ((olap->s_lo == olap->t_lo) && (S_Right_Hang > T_Right_Hang))) {
73     assert(olap->t_lo == 0);
74     assert((olap->s_hi == S_Len - 1) || (olap->t_hi == T_Len - 1));
75     Sleft = true;
76   } else {
77     assert(olap->s_lo == 0);
78     assert((olap->s_hi == S_Len - 1) || (olap->t_hi == T_Len - 1));
79     Sleft = false;
80   }
81 
82   if (Sleft) {
83     ovs->a_iid = S_ID;
84     ovs->b_iid = T_ID;
85   } else {
86     ovs->a_iid = T_ID;
87     ovs->b_iid = S_ID;
88   }
89 
90   //if (Sleft) {
91   //  if (S_Right_Hang >= T_Right_Hang)
92   //    overlap_type = AS_CONTAINMENT;
93   //  else
94   //    overlap_type =  AS_DOVETAIL;
95   //} else {
96   //  if (T_Right_Hang >= S_Right_Hang)
97   //    overlap_type = AS_CONTAINMENT;
98   //  else
99   //    overlap_type =  AS_DOVETAIL;
100   //}
101 
102   if (Sleft) {
103     orient  = (S_Dir == FORWARD) ? 'N' : 'O';
104     ahg     = olap->s_lo;
105     bhg     = T_Right_Hang - S_Right_Hang;
106 
107   } else {
108     orient  = (S_Dir == FORWARD) ? 'N' : 'I';
109     ahg     = olap->t_lo;
110     bhg     = S_Right_Hang - T_Right_Hang;
111   }
112 
113   //  CMM: Regularize the reverse orientated containment overlaps to a common orientation.
114   //
115   //  This catches the case where a reverse orient S (T is always fowrard) is placed
116   //  in the A position; we flip the overlap to make S be forward and T be reverse.
117   //
118   if ((orient == 'O') && (S_Right_Hang >= T_Right_Hang)) {
119     orient  = 'I';
120     ahg     = -(T_Right_Hang - S_Right_Hang);
121     bhg     = -(olap->s_lo);
122   }
123 
124   ovs->erate(olap->quality);
125 
126   switch (orient) {
127     case 'N':
128       ovs->a_hang(ahg);
129       ovs->b_hang(bhg);
130       ovs->dat.ovl.flipped  = false;
131       break;
132 
133     case 'I':
134       ovs->a_hang(ahg);
135       ovs->b_hang(bhg);
136       ovs->dat.ovl.flipped  = true;
137       break;
138 
139     case 'O':
140       ovs->a_hang(-bhg);
141       ovs->b_hang(-ahg);
142       ovs->dat.ovl.flipped  = true;
143       break;
144 
145     case 'A':  //  Never reached.
146       ovs->a_hang(-bhg);
147       ovs->b_hang(-ahg);
148       ovs->dat.ovl.flipped  = false;
149       break;
150   }
151 
152 #if  OUTPUT_OVERLAP_DELTAS
153   signed char deltas[2 * AS_READ_MAX_NORMAL_LEN];
154   signed char *deltaCursor = deltas;
155 
156   if (Sleft == false)
157     for (i = 0;  i < olap->delta_ct;  i ++)
158       olap->delta [i] *= -1;
159 
160   for (int i = 0;  i < olap->delta_ct;  i ++) {
161     for (int j = abs (olap->delta [i]);  j > 0;  j -= AS_LONGEST_DELTA) {
162       if (j > AS_LONGEST_DELTA)
163         *deltaCursor++ = AS_LONG_DELTA_CODE;
164       else
165         *deltaCursor++ = j * Sign (olap->delta [i]);
166     }
167   }
168 
169   *deltaCursor = AS_ENDOF_DELTA_CODE;
170 #endif
171 
172 
173   WA->Total_Overlaps ++;
174 
175   if (bhg <= 0)
176     WA->Contained_Overlap_Ct ++;
177   else
178     WA->Dovetail_Overlap_Ct ++;
179 
180   //  Write overlaps if we've saved too many.
181   //  They're also written at the end of the thread.
182 
183   if (WA->overlapsLen >= WA->overlapsMax)
184 #pragma omp critical
185     {
186       for (int32 zz=0; zz<WA->overlapsLen; zz++)
187         Out_BOF->writeOverlap(WA->overlaps + zz);
188 
189       WA->overlapsLen = 0;
190     }
191 }
192 
193 
194 
195 void
Output_Partial_Overlap(uint32 s_id,uint32 t_id,Direction_t dir,const Olap_Info_t * olap,int s_len,int t_len,Work_Area_t * WA)196 Output_Partial_Overlap(uint32 s_id,
197                        uint32 t_id,
198                        Direction_t dir,
199                        const Olap_Info_t *olap,
200                        int s_len,
201                        int t_len,
202                        Work_Area_t  *WA) {
203 
204   Total_Overlaps++;
205 
206   ovOverlap  *ovl = WA->overlaps + WA->overlapsLen++;
207 
208   assert(s_id < t_id);
209 
210   ovl->a_iid = s_id;
211   ovl->b_iid = t_id;
212 
213   //  Overlap is good for OBT or DUP.  It will be refined more during the store build.
214 
215   ovl->dat.ovl.forUTG = false;
216   ovl->dat.ovl.forOBT = true;
217   ovl->dat.ovl.forDUP = true;
218 
219   //  Set the span of the alignment.
220 
221   ovl->dat.ovl.span  = (olap->s_hi - olap->s_lo);
222   ovl->dat.ovl.span += (olap->t_hi - olap->t_lo);
223   ovl->dat.ovl.span += (olap->delta_ct);
224   assert((ovl->dat.ovl.span % 2) == 0);
225   ovl->dat.ovl.span /= 2;
226 
227   // Convert to canonical form with s forward and use space-based
228   // coordinates
229 
230   if (dir == FORWARD) {
231     ovl->dat.ovl.ahg5 =         (olap->s_lo);
232     ovl->dat.ovl.ahg3 = s_len - (olap->s_hi + 1);
233     ovl->dat.ovl.bhg5 =         (olap->t_lo);
234     ovl->dat.ovl.bhg3 = t_len - (olap->t_hi + 1);
235 
236     ovl->dat.ovl.flipped = false;
237 
238     //assert(a < b);
239     //assert(c < d);
240 
241   } else {
242     ovl->dat.ovl.ahg5 = s_len - (olap->s_hi + 1);
243     ovl->dat.ovl.ahg3 =         (olap->s_lo);
244     ovl->dat.ovl.bhg5 = t_len - (olap->t_hi + 1);
245     ovl->dat.ovl.bhg3 =         (olap->t_lo);
246 
247     ovl->dat.ovl.flipped = true;
248 
249     //assert(a < b);
250     //assert(c > d);  //  Reverse!
251   }
252 
253   ovl->erate(olap->quality);
254 
255   //  We also flush the file at the end of a thread
256 
257   if (WA->overlapsLen >= WA->overlapsMax) {
258 #pragma omp critical
259     for (int32 zz=0; zz<WA->overlapsLen; zz++)
260       Out_BOF->writeOverlap(WA->overlaps + zz);
261 
262     WA->overlapsLen = 0;
263   }
264 }
265 
266