1 package org.broadinstitute.hellbender.utils.read;
2 
3 import com.google.common.collect.Lists;
4 import htsjdk.samtools.Cigar;
5 import htsjdk.samtools.CigarElement;
6 import htsjdk.samtools.CigarOperator;
7 import org.broadinstitute.gatk.nativebindings.smithwaterman.SWOverhangStrategy;
8 import org.broadinstitute.gatk.nativebindings.smithwaterman.SWParameters;
9 import org.broadinstitute.hellbender.utils.Tail;
10 import org.broadinstitute.hellbender.utils.Utils;
11 import org.broadinstitute.hellbender.utils.smithwaterman.SmithWatermanAligner;
12 import org.broadinstitute.hellbender.utils.smithwaterman.SmithWatermanAlignment;
13 
14 import java.util.ArrayList;
15 import java.util.Collections;
16 import java.util.List;
17 
18 public final class CigarUtils {
19 
20     // used in the bubble state machine to apply Smith-Waterman to the bubble sequence
21     // these values were chosen via optimization against the NA12878 knowledge base
22     public static final SWParameters NEW_SW_PARAMETERS = new SWParameters(200, -150, -260, -11);
23 
24     // In Mutect2 and HaplotypeCaller reads are realigned to their *best* haplotypes, which is very different from a generic alignment.
25     // The {@code NEW_SW_PARAMETERS} penalize a substitution error more than an indel up to a length of 9 bases!
26     // Suppose, for example, that a read has a single substitution error, say C -> T, on its last base.  Those parameters
27     // would prefer to extend a deletion until the next T on the reference is found in order to avoid the substitution, which is absurd.
28     // Since these parameters are for aligning a read to the biological sequence we believe it comes from, the parameters
29     // we choose should correspond to sequencer error.  They *do not* have anything to do with the prevalence of true variation!
30     public static final SWParameters ALIGNMENT_TO_BEST_HAPLOTYPE_SW_PARAMETERS = new SWParameters(10, -15, -30, -5);
31 
32     private static final String SW_PAD = "NNNNNNNNNN";
33 
CigarUtils()34     private CigarUtils(){}
35 
36     /**
37      * Inverts the order of the operators in the cigar.
38      * Eg 10M1D20M -> 20M1D10M
39      */
invertCigar(final Cigar cigar)40     public static Cigar invertCigar (final Cigar cigar) {
41         Utils.nonNull(cigar);
42         final List<CigarElement>  els = new ArrayList<>(cigar.getCigarElements());
43         Collections.reverse(els);
44         return new Cigar(els);
45     }
46 
47     /**
48      * Compute the number of reference bases between the start (inclusive) and end (exclusive) cigar elements.
49      * Reference bases are counted as the number of positions w.r.t. the reference spanned by an alignment to that reference.
50      * For example original position = 10. cigar: 2M3I2D1M. If you remove the 2M the new starting position is 12.
51      * If you remove the 2M3I it is still 12. If you remove 2M3I2D (not reasonable cigar), you will get position 14.
52      */
countRefBasesAndClips(final List<CigarElement> elems, final int cigarStartIndex, final int cigarEndIndex)53     public static int countRefBasesAndClips(final List<CigarElement> elems, final int cigarStartIndex, final int cigarEndIndex){
54         return countRefBasesAndMaybeAlsoClips(elems, cigarStartIndex, cigarEndIndex, true, true);
55     }
56 
countRefBasesAndSoftClips(final List<CigarElement> elems, final int cigarStartIndex, final int cigarEndIndex)57     public static int countRefBasesAndSoftClips(final List<CigarElement> elems, final int cigarStartIndex, final int cigarEndIndex){
58         return countRefBasesAndMaybeAlsoClips(elems, cigarStartIndex, cigarEndIndex, true, false);
59     }
60 
countRefBasesAndMaybeAlsoClips(final List<CigarElement> elems, final int cigarStartIndex, final int cigarEndIndex, final boolean includeSoftClips, final boolean includeHardClips)61     private static int countRefBasesAndMaybeAlsoClips(final List<CigarElement> elems, final int cigarStartIndex, final int cigarEndIndex, final boolean includeSoftClips, final boolean includeHardClips) {
62         Utils.nonNull(elems);
63         Utils.validateArg(cigarStartIndex >= 0 && cigarEndIndex <= elems.size() && cigarStartIndex <= cigarEndIndex, () -> "invalid index:" + 0 + " -" + elems.size());
64 
65         int result = 0;
66         for (final CigarElement elem : elems.subList(cigarStartIndex, cigarEndIndex)) {
67             final CigarOperator op = elem.getOperator();
68             if (op.consumesReferenceBases() || (includeSoftClips && op == CigarOperator.SOFT_CLIP) || (includeHardClips && op == CigarOperator.HARD_CLIP)) {
69                 result += elem.getLength();
70             }
71         }
72 
73         return result;
74     }
75 
76     /**
77      * Removes all clipping and padding operators from the cigar.
78      */
removeClipsAndPadding(final Cigar cigar)79     public static Cigar removeClipsAndPadding(final Cigar cigar) {
80         Utils.nonNull(cigar, "cigar is null");
81         final List<CigarElement> elements = new ArrayList<>(cigar.numCigarElements());
82         for ( final CigarElement ce : cigar.getCigarElements() ) {
83             if ( !isClipOrPaddingOperator(ce.getOperator()) ) {
84                 elements.add(ce);
85             }
86         }
87         return new Cigar(elements);
88     }
89 
isClipOrPaddingOperator(final CigarOperator op)90     private static boolean isClipOrPaddingOperator(final CigarOperator op) {
91         return op == CigarOperator.S || op == CigarOperator.H || op == CigarOperator.P;
92     }
93 
94     /**
95      * Returns whether the list has any N operators.
96      */
containsNOperator(final List<CigarElement> cigarElements)97     public static boolean containsNOperator(final List<CigarElement> cigarElements) {
98         return Utils.nonNull(cigarElements).stream().anyMatch(el -> el.getOperator() == CigarOperator.N);
99     }
100 
101     /**
102      * A good Cigar object obeys the following rules:
103      *  - is valid as per SAM spec {@link Cigar#isValid(String, long)}.
104      *  - has no consecutive I/D elements
105      *  - does not start or end with deletions (with or without preceding clips).
106      */
isGood(final Cigar c)107     public static boolean isGood(final Cigar c) {
108         Utils.nonNull(c, "cigar is null");
109 
110         //Note: the string comes from the SAMRecord so it must be a wellformed CIGAR (that is, in "\*|([0-9]+[MIDNSHPX=])+" as per SAM spec).
111         //We don't have to check that
112         if (c.isValid(null, -1) != null){  //if it's invalid, then it's not good
113             return false;
114         }
115         final List<CigarElement> elems = c.getCigarElements();
116         return !(hasConsecutiveIndels(elems) || startsOrEndsWithDeletionIgnoringClips(elems));
117     }
118 
119     /**
120      * Checks if cigar has consecutive I/D elements.
121      */
hasConsecutiveIndels(final List<CigarElement> elems)122     private static boolean hasConsecutiveIndels(final List<CigarElement> elems) {
123         boolean prevIndel = false;
124         for (final CigarElement elem : elems) {
125             final CigarOperator op = elem.getOperator();
126             final boolean isIndel = (op == CigarOperator.INSERTION || op == CigarOperator.DELETION);
127             if (prevIndel && isIndel) {
128                 return true;
129             }
130             prevIndel = isIndel;
131         }
132         return false;
133     }
134 
135     /**
136      * Checks if cigar starts with a deletion (ignoring any clips at the beginning).
137      */
startsOrEndsWithDeletionIgnoringClips(final List<CigarElement> elems)138     private static boolean startsOrEndsWithDeletionIgnoringClips(final List<CigarElement> elems) {
139 
140         for (final boolean leftSide : new boolean[] {true, false}) {
141             for (final CigarElement elem : leftSide ? elems : Lists.reverse(elems)) {
142                 final CigarOperator op = elem.getOperator();
143                 if (op == CigarOperator.DELETION) { //first non-clipping is deletion
144                     return true;
145                 } else if (!op.isClipping()) {  // first non-clipping is non deletion
146                     break;
147                 }
148             }
149         }
150 
151         return false;
152     }
153 
154     /**
155      * Calculate the cigar elements for this path against the reference sequence.
156      *
157      * This assumes that the reference and alt sequence are haplotypes derived from a de Bruijn graph or SeqGraph and have the same
158      * ref source and ref sink vertices.  That is, the alt sequence start and end are assumed anchored to the reference start and end, which
159      * occur at the ends of the padded assembly region.  Hence, unlike read alignment, there is no concept of a start or end coordinate here.
160      * Furthermore, it is important to note that in the rare case that the alt cigar begins or ends with a deletion, we must keep the leading
161      * or trailing deletion in order to maintain the original reference span of the alt haplotype.  This can occur, for example, when the ref
162      * haplotype starts with N repeats of a long sequence and the alt haplotype starts with N-1 repeats.
163      *
164      * @param aligner
165      * @param refSeq the reference sequence that all of the bases in this path should align to
166      * @return a Cigar mapping this path to refSeq, or null if no reasonable alignment could be found
167      */
calculateCigar(final byte[] refSeq, final byte[] altSeq, final SmithWatermanAligner aligner, final SWOverhangStrategy strategy)168     public static Cigar calculateCigar(final byte[] refSeq, final byte[] altSeq, final SmithWatermanAligner aligner, final SWOverhangStrategy strategy) {
169         Utils.nonNull(refSeq, "refSeq");
170         Utils.nonNull(altSeq, "altSeq");
171         if ( altSeq.length == 0 ) {
172             // horrible edge case from the unit tests, where this path has no bases
173             return new Cigar(Collections.singletonList(new CigarElement(refSeq.length, CigarOperator.D)));
174         }
175 
176         //Note: this is a performance optimization.
177         // If two strings are equal (a O(n) check) then it's trivial to get CIGAR for them.
178         // Furthermore, if their lengths are equal and their element-by-element comparison yields two or fewer mismatches
179         // it's also a trivial M-only CIGAR, because in order to have equal length one would need at least one insertion and
180         // one deletion, in which case two substitutions is a better alignment.
181         if (altSeq.length == refSeq.length){
182             int mismatchCount = 0;
183             for (int n = 0; n < refSeq.length && mismatchCount <= 2; n++) {
184                 mismatchCount += (altSeq[n] == refSeq[n] ? 0 : 1);
185             }
186             if (mismatchCount <= 2) {
187                 final Cigar matching = new Cigar();
188                 matching.add(new CigarElement(refSeq.length, CigarOperator.MATCH_OR_MISMATCH));
189                 return matching;
190             }
191         }
192 
193         final Cigar nonStandard;
194 
195         final String paddedRef = SW_PAD + new String(refSeq) + SW_PAD;
196         final String paddedPath = SW_PAD + new String(altSeq) + SW_PAD;
197         final SmithWatermanAlignment alignment = aligner.align(paddedRef.getBytes(), paddedPath.getBytes(), NEW_SW_PARAMETERS, strategy);
198 
199         if ( isSWFailure(alignment) ) {
200             return null;
201         }
202 
203         // cut off the padding bases
204         final int baseStart = SW_PAD.length();
205         final int baseEnd = paddedPath.length() - SW_PAD.length() - 1; // -1 because it's inclusive
206         final CigarBuilder.Result trimmedCigarAndDeletionsRemoved = AlignmentUtils.trimCigarByBases(alignment.getCigar(), baseStart, baseEnd);
207 
208         nonStandard = trimmedCigarAndDeletionsRemoved.getCigar();
209 
210         // leading deletion removed by cigar trimming shift the alignment start to the right
211         final int trimmedLeadingDeletions = trimmedCigarAndDeletionsRemoved.getLeadingDeletionBasesRemoved();
212 
213         // trailing deletions should be kept in order to left-align
214         final int trimmedTrailingDeletions = trimmedCigarAndDeletionsRemoved.getTrailingDeletionBasesRemoved();
215 
216         if ( trimmedTrailingDeletions > 0  ) {
217             nonStandard.add(new CigarElement(trimmedTrailingDeletions, CigarOperator.D));
218         }
219 
220         final CigarBuilder.Result leftAlignmentResult = AlignmentUtils.leftAlignIndels(nonStandard, refSeq, altSeq, trimmedLeadingDeletions);
221 
222         // we must account for possible leading deletions removed when trimming the padding and when left-aligning
223         // trailing deletions removed when trimming have already been restored for left-alignment, but left-alingment may have removed them again.
224         final int totalLeadingDeletionsRemoved = trimmedLeadingDeletions + leftAlignmentResult.getLeadingDeletionBasesRemoved();
225         final int totalTrailingDeletionsRemoved = leftAlignmentResult.getTrailingDeletionBasesRemoved();
226 
227         if (totalLeadingDeletionsRemoved == 0 && totalTrailingDeletionsRemoved == 0) {
228             return leftAlignmentResult.getCigar();
229         } else {
230             final List<CigarElement> resultElements = new ArrayList<>();
231             if (totalLeadingDeletionsRemoved > 0) {
232                 resultElements.add(new CigarElement(totalLeadingDeletionsRemoved, CigarOperator.D));
233             }
234             resultElements.addAll(leftAlignmentResult.getCigar().getCigarElements());
235             if (totalTrailingDeletionsRemoved > 0) {
236                 resultElements.add(new CigarElement(totalTrailingDeletionsRemoved, CigarOperator.D));
237             }
238             return new Cigar(resultElements);
239         }
240     }
241 
242     /**
243      * Make sure that the SW didn't fail in some terrible way, and throw exception if it did
244      */
isSWFailure(final SmithWatermanAlignment alignment)245     private static boolean isSWFailure(final SmithWatermanAlignment alignment) {
246         // check that the alignment starts at the first base, which it should given the padding
247         if ( alignment.getAlignmentOffset() > 0 ) {
248             return true;
249         }
250 
251         // check that we aren't getting any S operators (which would be very bad downstream)
252         for ( final CigarElement ce : alignment.getCigar().getCigarElements() ) {
253             if ( ce.getOperator() == CigarOperator.S )
254                 return true;
255             // soft clips at the end of the alignment are really insertions
256         }
257 
258         return false;
259     }
260 
261     /**
262      * Returns the length of the original read considering all clippings based on this cigar.
263      * <p>
264      *     The result of applying this method on a empty cigar is zero.
265      * </p>
266      * @param cigar the input cigar.
267      * @throws IllegalArgumentException if the input {@code cigar} is {@code null}.
268      * @return 0 or greater.
269      */
countUnclippedReadBases(final Cigar cigar)270     public static int countUnclippedReadBases(final Cigar cigar) {
271         Utils.nonNull(cigar, "the input cigar cannot be null");
272         return cigar.getCigarElements().stream()
273                 .filter(ce -> {
274                     final CigarOperator operator = ce.getOperator();
275                     return operator.isClipping() || operator.consumesReadBases();
276                 })
277                 .mapToInt(CigarElement::getLength)
278                 .sum();
279     }
280 
281     /**
282      * Count the number of soft- or hard- clipped bases from either the left or right end of a cigar
283      */
countClippedBases(final Cigar cigar, final Tail tail, final CigarOperator typeOfClip)284     public static int countClippedBases(final Cigar cigar, final Tail tail, final CigarOperator typeOfClip) {
285         Utils.validateArg(typeOfClip.isClipping(), "typeOfClip must be a clipping operator");
286 
287         final int size = cigar.numCigarElements();
288         if (size < 2) {
289             Utils.validateArg(size == 1 && !cigar.getFirstCigarElement().getOperator().isClipping(), "cigar is empty or completely clipped.");
290             return 0;
291         }
292 
293         int result = 0;
294 
295         for (int n = 0; n < size; n++) {
296             final int index = (tail == Tail.LEFT ? n : size - n - 1);
297             final CigarElement element = cigar.getCigarElement(index);
298             if (!element.getOperator().isClipping()) {
299                 return result;
300             } else if (element.getOperator() == typeOfClip) {
301                 result += element.getLength();
302             }
303         }
304 
305         throw new IllegalArgumentException("Input cigar " + cigar + " is completely clipped.");
306     }
307 
308     /**
309      * Count the number clipped bases (both soft and hard) from either the left or right end of a cigar
310      */
countClippedBases(final Cigar cigar, final Tail tail)311     public static int countClippedBases(final Cigar cigar, final Tail tail) {
312         return countClippedBases(cigar, tail, CigarOperator.SOFT_CLIP) + countClippedBases(cigar, tail, CigarOperator.HARD_CLIP);
313     }
314 
315     /**
316      * Count the number of soft- and hard-clipped bases over both ends of a cigar
317      */
countClippedBases(final Cigar cigar, final CigarOperator clippingType)318     public static int countClippedBases(final Cigar cigar, final CigarOperator clippingType) {
319         return countClippedBases(cigar, Tail.LEFT, clippingType) + countClippedBases(cigar, Tail.RIGHT, clippingType);
320     }
321 
322     /**
323      * Count the number of clipped bases (both soft and hard) over both ends of a cigar
324      */
countClippedBases(final Cigar cigar)325     public static int countClippedBases(final Cigar cigar) {
326         return countClippedBases(cigar, Tail.LEFT) + countClippedBases(cigar, Tail.RIGHT);
327     }
328 
countAlignedBases(final Cigar cigar )329     public static int countAlignedBases(final Cigar cigar ) {
330         return Utils.nonNull(cigar).getCigarElements().stream()
331                 .filter(cigarElement -> cigarElement.getOperator().isAlignment())
332                 .mapToInt(CigarElement::getLength)
333                 .sum();
334     }
335 
336     /**
337      * replace soft clips (S) with match (M) operators, normalizing the result by all the transformations of the {@link CigarBuilder} class:
338      * merging consecutive identical operators and removing zero-length elements.  For example 10S10M -> 20M and 10S10M10I10I -> 20M20I.
339      */
revertSoftClips(final Cigar originalCigar)340     public static Cigar revertSoftClips(final Cigar originalCigar) {
341         final CigarBuilder builder = new CigarBuilder();
342         for (final CigarElement element : originalCigar.getCigarElements()) {
343             if (element.getOperator() == CigarOperator.SOFT_CLIP) {
344                 builder.add(new CigarElement(element.getLength(), CigarOperator.MATCH_OR_MISMATCH));
345             } else {
346                 builder.add(element);
347             }
348         }
349 
350         return builder.make();
351     }
352 
353     /**
354      * Given a cigar string, soft clip up to leftClipEnd and soft clip starting at rightClipBegin
355      * @param start initial index to clip within read bases, inclusive
356      * @param stop final index to clip within read bases exclusive
357      * @param clippingOperator      type of clipping -- must be either hard clip or soft clip
358      */
clipCigar(final Cigar cigar, final int start, final int stop, CigarOperator clippingOperator)359     public static Cigar clipCigar(final Cigar cigar, final int start, final int stop, CigarOperator clippingOperator) {
360         Utils.validateArg(clippingOperator.isClipping(), "Not a clipping operator");
361         final boolean clipLeft = start == 0;
362 
363         final CigarBuilder newCigar = new CigarBuilder();
364 
365         int elementStart = 0;
366         for (final CigarElement element : cigar.getCigarElements()) {
367             final CigarOperator operator = element.getOperator();
368             // copy hard clips
369             if (operator == CigarOperator.HARD_CLIP) {
370                 newCigar.add(new CigarElement(element.getLength(), element.getOperator()));
371                 continue;
372             }
373             final int elementEnd = elementStart + (operator.consumesReadBases() ? element.getLength() : 0);
374 
375             // element precedes start or follows end of clip, copy it to new cigar
376             if (elementEnd <= start || elementStart >= stop) {
377                 // edge case: deletions at edge of clipping are meaningless and we skip them
378                 if (operator.consumesReadBases() || (elementStart != start && elementStart != stop)) {
379                     newCigar.add(new CigarElement(element.getLength(), operator));
380                 }
381             } else {    // otherwise, some or all of the element is soft-clipped
382                 final int unclippedLength = clipLeft ? elementEnd - stop : start - elementStart;
383                 final int clippedLength = element.getLength() - unclippedLength;
384 
385                 if (unclippedLength <= 0) { // totally clipped
386                     if (operator.consumesReadBases()) {
387                         newCigar.add(new CigarElement(element.getLength(), clippingOperator));
388                     }
389                 } else if (clipLeft) {
390                     newCigar.add(new CigarElement(clippedLength, clippingOperator));
391                     newCigar.add(new CigarElement(unclippedLength, operator));
392                 } else {
393                     newCigar.add(new CigarElement(unclippedLength, operator));
394                     newCigar.add(new CigarElement(clippedLength, clippingOperator));
395                 }
396             }
397             elementStart = elementEnd;
398         }
399 
400         return newCigar.make();
401     }
402 
403     /**
404      * How many bases to the right does a read's alignment start shift given its cigar and the number of left soft clips
405      */
alignmentStartShift(final Cigar cigar, final int numClipped)406     public static int alignmentStartShift(final Cigar cigar, final int numClipped) {
407         int refBasesClipped = 0;
408 
409         int elementStart = 0;   // this and elementEnd are indices in the read's bases
410         for (final CigarElement element : cigar.getCigarElements()) {
411             final CigarOperator operator = element.getOperator();
412             // hard clips consume neither read bases nor reference bases and are irrelevant
413             if (operator == CigarOperator.HARD_CLIP) {
414                 continue;
415             }
416             final int elementEnd = elementStart + (operator.consumesReadBases() ? element.getLength() : 0);
417 
418             if (elementEnd <= numClipped) {  // totally within clipped span -- this includes deletions immediately following clipping
419                 refBasesClipped += operator.consumesReferenceBases() ? element.getLength() : 0;
420             } else if (elementStart < numClipped) { // clip in middle of element, which means the element necessarily consumes read bases
421                 final int clippedLength = numClipped - elementStart;
422                 refBasesClipped += operator.consumesReferenceBases() ? clippedLength : 0;
423                 break;
424             }
425             elementStart = elementEnd;
426         }
427         return refBasesClipped;
428     }
429 
430     /**
431      * Computes the corresponding distance needs to be walked on the read, given the Cigar and distance walked on the reference.
432      * @param cigar   cigar along the 5-3 direction of read (when read is mapped to reverse strand, bwa mem output cigar should be inverted)
433      * @param start      start position (1-based) on the read (note it should not count the hard clipped bases, as usual)
434      * @param refDist                 distance to walk on the reference
435      * @param backward              whether to walk backwards along the read or not
436      * @return                          corresponding walk distance on read (always positive)
437      * @throws IllegalArgumentException if input cigar contains padding operation or 'N', or
438      *                                  either of {@code start} or distance is non-positive, or
439      *                                  {@code start} is larger than read length, or
440      *                                  requested reference walk distance is longer than the total read bases in cigar, or
441      *                                  computed read walk distance would "walk off" the read
442      */
computeAssociatedDistOnRead(final Cigar cigar, final int start, final int refDist, final boolean backward)443     public static int computeAssociatedDistOnRead(final Cigar cigar, final int start, final int refDist, final boolean backward) {
444 
445         Utils.validateArg(refDist > 0 && start > 0, () -> "start " + start + " or distance " + refDist + " is non-positive.");
446 
447         final List<CigarElement> elements = backward ? Lists.reverse(cigar.getCigarElements()) : cigar.getCigarElements();
448 
449         final int readLength = elements.stream().mapToInt(ce -> ce.getOperator().consumesReadBases() ? ce.getLength() : 0).sum();
450         final int readBasesToSkip = backward ? readLength - start : start - 1;
451 
452         int readBasesConsumed = 0;
453         int refBasesConsumed = 0;
454 
455         for (final CigarElement element : elements){
456             final int readBasesConsumedBeforeElement = readBasesConsumed;
457 
458             readBasesConsumed += element.getOperator().consumesReadBases() ? element.getLength() : 0;
459             // skip cigar elements that end before the read start or start after the reference end
460             if (readBasesConsumed <= readBasesToSkip) {
461                 continue;
462             }
463 
464             refBasesConsumed += element.getOperator().consumesReferenceBases() ? element.getLength() - Math.max(readBasesToSkip - readBasesConsumedBeforeElement, 0) : 0;
465             if (refBasesConsumed >= refDist) {
466                 final int excessRefBasesInElement = Math.max(refBasesConsumed - refDist, 0);
467                 return readBasesConsumed - readBasesToSkip - (element.getOperator().consumesReadBases() ? excessRefBasesInElement : 0);
468             }
469         }
470 
471         throw new IllegalArgumentException("Cigar " + cigar + "does not contain at least " + refDist + " reference bases past red start " + start + ".");
472     }
473 
474     /**
475      * Convert the 'I' CigarElement, if it is at either end (terminal) of the input cigar, to a corresponding 'S' operator.
476      * Note that we allow CIGAR of the format '10H10S10I10M', but disallows the format if after the conversion the cigar turns into a giant clip,
477      * e.g. '10H10S10I10S10H' is not allowed (if allowed, it becomes a giant clip of '10H30S10H' which is non-sense).
478      *
479      * @return a pair of number of clipped (hard and soft, including the ones from the converted terminal 'I') bases at the front and back of the
480      *         input {@code cigarAlongInput5to3Direction}.
481      *
482      * @throws IllegalArgumentException when the checks as described above fail.
483      */
convertTerminalInsertionToSoftClip(final Cigar cigar)484     public static Cigar convertTerminalInsertionToSoftClip(final Cigar cigar) {
485 
486         if (cigar.numCigarElements() < 2 ) {
487             return cigar;
488         }
489 
490         final CigarBuilder builder = new CigarBuilder();
491         for (int n = 0; n < cigar.numCigarElements(); n++) {
492             final CigarElement element = cigar.getCigarElement(n);
493             if (element.getOperator() != CigarOperator.INSERTION) { // not an insertion
494                 builder.add(element);
495             } else if (n == 0 || n == cigar.numCigarElements() - 1) {   // terminal insertion with no clipping -- convert to soft clip
496                 builder.add(new CigarElement(element.getLength(), CigarOperator.SOFT_CLIP));
497             } else if (cigar.getCigarElement(n-1).getOperator().isClipping() || cigar.getCigarElement(n+1).getOperator().isClipping()) {    // insertion preceding or following clip
498                 builder.add(new CigarElement(element.getLength(), CigarOperator.SOFT_CLIP));
499             } else {    // interior insertion
500                 builder.add(element);
501             }
502         }
503 
504         return builder.make();
505     }
506 }
507