1 package org.broadinstitute.hellbender.utils.read; 2 3 import com.google.common.collect.Lists; 4 import htsjdk.samtools.Cigar; 5 import htsjdk.samtools.CigarElement; 6 import htsjdk.samtools.CigarOperator; 7 import org.broadinstitute.gatk.nativebindings.smithwaterman.SWOverhangStrategy; 8 import org.broadinstitute.gatk.nativebindings.smithwaterman.SWParameters; 9 import org.broadinstitute.hellbender.utils.Tail; 10 import org.broadinstitute.hellbender.utils.Utils; 11 import org.broadinstitute.hellbender.utils.smithwaterman.SmithWatermanAligner; 12 import org.broadinstitute.hellbender.utils.smithwaterman.SmithWatermanAlignment; 13 14 import java.util.ArrayList; 15 import java.util.Collections; 16 import java.util.List; 17 18 public final class CigarUtils { 19 20 // used in the bubble state machine to apply Smith-Waterman to the bubble sequence 21 // these values were chosen via optimization against the NA12878 knowledge base 22 public static final SWParameters NEW_SW_PARAMETERS = new SWParameters(200, -150, -260, -11); 23 24 // In Mutect2 and HaplotypeCaller reads are realigned to their *best* haplotypes, which is very different from a generic alignment. 25 // The {@code NEW_SW_PARAMETERS} penalize a substitution error more than an indel up to a length of 9 bases! 26 // Suppose, for example, that a read has a single substitution error, say C -> T, on its last base. Those parameters 27 // would prefer to extend a deletion until the next T on the reference is found in order to avoid the substitution, which is absurd. 28 // Since these parameters are for aligning a read to the biological sequence we believe it comes from, the parameters 29 // we choose should correspond to sequencer error. They *do not* have anything to do with the prevalence of true variation! 30 public static final SWParameters ALIGNMENT_TO_BEST_HAPLOTYPE_SW_PARAMETERS = new SWParameters(10, -15, -30, -5); 31 32 private static final String SW_PAD = "NNNNNNNNNN"; 33 CigarUtils()34 private CigarUtils(){} 35 36 /** 37 * Inverts the order of the operators in the cigar. 38 * Eg 10M1D20M -> 20M1D10M 39 */ invertCigar(final Cigar cigar)40 public static Cigar invertCigar (final Cigar cigar) { 41 Utils.nonNull(cigar); 42 final List<CigarElement> els = new ArrayList<>(cigar.getCigarElements()); 43 Collections.reverse(els); 44 return new Cigar(els); 45 } 46 47 /** 48 * Compute the number of reference bases between the start (inclusive) and end (exclusive) cigar elements. 49 * Reference bases are counted as the number of positions w.r.t. the reference spanned by an alignment to that reference. 50 * For example original position = 10. cigar: 2M3I2D1M. If you remove the 2M the new starting position is 12. 51 * If you remove the 2M3I it is still 12. If you remove 2M3I2D (not reasonable cigar), you will get position 14. 52 */ countRefBasesAndClips(final List<CigarElement> elems, final int cigarStartIndex, final int cigarEndIndex)53 public static int countRefBasesAndClips(final List<CigarElement> elems, final int cigarStartIndex, final int cigarEndIndex){ 54 return countRefBasesAndMaybeAlsoClips(elems, cigarStartIndex, cigarEndIndex, true, true); 55 } 56 countRefBasesAndSoftClips(final List<CigarElement> elems, final int cigarStartIndex, final int cigarEndIndex)57 public static int countRefBasesAndSoftClips(final List<CigarElement> elems, final int cigarStartIndex, final int cigarEndIndex){ 58 return countRefBasesAndMaybeAlsoClips(elems, cigarStartIndex, cigarEndIndex, true, false); 59 } 60 countRefBasesAndMaybeAlsoClips(final List<CigarElement> elems, final int cigarStartIndex, final int cigarEndIndex, final boolean includeSoftClips, final boolean includeHardClips)61 private static int countRefBasesAndMaybeAlsoClips(final List<CigarElement> elems, final int cigarStartIndex, final int cigarEndIndex, final boolean includeSoftClips, final boolean includeHardClips) { 62 Utils.nonNull(elems); 63 Utils.validateArg(cigarStartIndex >= 0 && cigarEndIndex <= elems.size() && cigarStartIndex <= cigarEndIndex, () -> "invalid index:" + 0 + " -" + elems.size()); 64 65 int result = 0; 66 for (final CigarElement elem : elems.subList(cigarStartIndex, cigarEndIndex)) { 67 final CigarOperator op = elem.getOperator(); 68 if (op.consumesReferenceBases() || (includeSoftClips && op == CigarOperator.SOFT_CLIP) || (includeHardClips && op == CigarOperator.HARD_CLIP)) { 69 result += elem.getLength(); 70 } 71 } 72 73 return result; 74 } 75 76 /** 77 * Removes all clipping and padding operators from the cigar. 78 */ removeClipsAndPadding(final Cigar cigar)79 public static Cigar removeClipsAndPadding(final Cigar cigar) { 80 Utils.nonNull(cigar, "cigar is null"); 81 final List<CigarElement> elements = new ArrayList<>(cigar.numCigarElements()); 82 for ( final CigarElement ce : cigar.getCigarElements() ) { 83 if ( !isClipOrPaddingOperator(ce.getOperator()) ) { 84 elements.add(ce); 85 } 86 } 87 return new Cigar(elements); 88 } 89 isClipOrPaddingOperator(final CigarOperator op)90 private static boolean isClipOrPaddingOperator(final CigarOperator op) { 91 return op == CigarOperator.S || op == CigarOperator.H || op == CigarOperator.P; 92 } 93 94 /** 95 * Returns whether the list has any N operators. 96 */ containsNOperator(final List<CigarElement> cigarElements)97 public static boolean containsNOperator(final List<CigarElement> cigarElements) { 98 return Utils.nonNull(cigarElements).stream().anyMatch(el -> el.getOperator() == CigarOperator.N); 99 } 100 101 /** 102 * A good Cigar object obeys the following rules: 103 * - is valid as per SAM spec {@link Cigar#isValid(String, long)}. 104 * - has no consecutive I/D elements 105 * - does not start or end with deletions (with or without preceding clips). 106 */ isGood(final Cigar c)107 public static boolean isGood(final Cigar c) { 108 Utils.nonNull(c, "cigar is null"); 109 110 //Note: the string comes from the SAMRecord so it must be a wellformed CIGAR (that is, in "\*|([0-9]+[MIDNSHPX=])+" as per SAM spec). 111 //We don't have to check that 112 if (c.isValid(null, -1) != null){ //if it's invalid, then it's not good 113 return false; 114 } 115 final List<CigarElement> elems = c.getCigarElements(); 116 return !(hasConsecutiveIndels(elems) || startsOrEndsWithDeletionIgnoringClips(elems)); 117 } 118 119 /** 120 * Checks if cigar has consecutive I/D elements. 121 */ hasConsecutiveIndels(final List<CigarElement> elems)122 private static boolean hasConsecutiveIndels(final List<CigarElement> elems) { 123 boolean prevIndel = false; 124 for (final CigarElement elem : elems) { 125 final CigarOperator op = elem.getOperator(); 126 final boolean isIndel = (op == CigarOperator.INSERTION || op == CigarOperator.DELETION); 127 if (prevIndel && isIndel) { 128 return true; 129 } 130 prevIndel = isIndel; 131 } 132 return false; 133 } 134 135 /** 136 * Checks if cigar starts with a deletion (ignoring any clips at the beginning). 137 */ startsOrEndsWithDeletionIgnoringClips(final List<CigarElement> elems)138 private static boolean startsOrEndsWithDeletionIgnoringClips(final List<CigarElement> elems) { 139 140 for (final boolean leftSide : new boolean[] {true, false}) { 141 for (final CigarElement elem : leftSide ? elems : Lists.reverse(elems)) { 142 final CigarOperator op = elem.getOperator(); 143 if (op == CigarOperator.DELETION) { //first non-clipping is deletion 144 return true; 145 } else if (!op.isClipping()) { // first non-clipping is non deletion 146 break; 147 } 148 } 149 } 150 151 return false; 152 } 153 154 /** 155 * Calculate the cigar elements for this path against the reference sequence. 156 * 157 * This assumes that the reference and alt sequence are haplotypes derived from a de Bruijn graph or SeqGraph and have the same 158 * ref source and ref sink vertices. That is, the alt sequence start and end are assumed anchored to the reference start and end, which 159 * occur at the ends of the padded assembly region. Hence, unlike read alignment, there is no concept of a start or end coordinate here. 160 * Furthermore, it is important to note that in the rare case that the alt cigar begins or ends with a deletion, we must keep the leading 161 * or trailing deletion in order to maintain the original reference span of the alt haplotype. This can occur, for example, when the ref 162 * haplotype starts with N repeats of a long sequence and the alt haplotype starts with N-1 repeats. 163 * 164 * @param aligner 165 * @param refSeq the reference sequence that all of the bases in this path should align to 166 * @return a Cigar mapping this path to refSeq, or null if no reasonable alignment could be found 167 */ calculateCigar(final byte[] refSeq, final byte[] altSeq, final SmithWatermanAligner aligner, final SWOverhangStrategy strategy)168 public static Cigar calculateCigar(final byte[] refSeq, final byte[] altSeq, final SmithWatermanAligner aligner, final SWOverhangStrategy strategy) { 169 Utils.nonNull(refSeq, "refSeq"); 170 Utils.nonNull(altSeq, "altSeq"); 171 if ( altSeq.length == 0 ) { 172 // horrible edge case from the unit tests, where this path has no bases 173 return new Cigar(Collections.singletonList(new CigarElement(refSeq.length, CigarOperator.D))); 174 } 175 176 //Note: this is a performance optimization. 177 // If two strings are equal (a O(n) check) then it's trivial to get CIGAR for them. 178 // Furthermore, if their lengths are equal and their element-by-element comparison yields two or fewer mismatches 179 // it's also a trivial M-only CIGAR, because in order to have equal length one would need at least one insertion and 180 // one deletion, in which case two substitutions is a better alignment. 181 if (altSeq.length == refSeq.length){ 182 int mismatchCount = 0; 183 for (int n = 0; n < refSeq.length && mismatchCount <= 2; n++) { 184 mismatchCount += (altSeq[n] == refSeq[n] ? 0 : 1); 185 } 186 if (mismatchCount <= 2) { 187 final Cigar matching = new Cigar(); 188 matching.add(new CigarElement(refSeq.length, CigarOperator.MATCH_OR_MISMATCH)); 189 return matching; 190 } 191 } 192 193 final Cigar nonStandard; 194 195 final String paddedRef = SW_PAD + new String(refSeq) + SW_PAD; 196 final String paddedPath = SW_PAD + new String(altSeq) + SW_PAD; 197 final SmithWatermanAlignment alignment = aligner.align(paddedRef.getBytes(), paddedPath.getBytes(), NEW_SW_PARAMETERS, strategy); 198 199 if ( isSWFailure(alignment) ) { 200 return null; 201 } 202 203 // cut off the padding bases 204 final int baseStart = SW_PAD.length(); 205 final int baseEnd = paddedPath.length() - SW_PAD.length() - 1; // -1 because it's inclusive 206 final CigarBuilder.Result trimmedCigarAndDeletionsRemoved = AlignmentUtils.trimCigarByBases(alignment.getCigar(), baseStart, baseEnd); 207 208 nonStandard = trimmedCigarAndDeletionsRemoved.getCigar(); 209 210 // leading deletion removed by cigar trimming shift the alignment start to the right 211 final int trimmedLeadingDeletions = trimmedCigarAndDeletionsRemoved.getLeadingDeletionBasesRemoved(); 212 213 // trailing deletions should be kept in order to left-align 214 final int trimmedTrailingDeletions = trimmedCigarAndDeletionsRemoved.getTrailingDeletionBasesRemoved(); 215 216 if ( trimmedTrailingDeletions > 0 ) { 217 nonStandard.add(new CigarElement(trimmedTrailingDeletions, CigarOperator.D)); 218 } 219 220 final CigarBuilder.Result leftAlignmentResult = AlignmentUtils.leftAlignIndels(nonStandard, refSeq, altSeq, trimmedLeadingDeletions); 221 222 // we must account for possible leading deletions removed when trimming the padding and when left-aligning 223 // trailing deletions removed when trimming have already been restored for left-alignment, but left-alingment may have removed them again. 224 final int totalLeadingDeletionsRemoved = trimmedLeadingDeletions + leftAlignmentResult.getLeadingDeletionBasesRemoved(); 225 final int totalTrailingDeletionsRemoved = leftAlignmentResult.getTrailingDeletionBasesRemoved(); 226 227 if (totalLeadingDeletionsRemoved == 0 && totalTrailingDeletionsRemoved == 0) { 228 return leftAlignmentResult.getCigar(); 229 } else { 230 final List<CigarElement> resultElements = new ArrayList<>(); 231 if (totalLeadingDeletionsRemoved > 0) { 232 resultElements.add(new CigarElement(totalLeadingDeletionsRemoved, CigarOperator.D)); 233 } 234 resultElements.addAll(leftAlignmentResult.getCigar().getCigarElements()); 235 if (totalTrailingDeletionsRemoved > 0) { 236 resultElements.add(new CigarElement(totalTrailingDeletionsRemoved, CigarOperator.D)); 237 } 238 return new Cigar(resultElements); 239 } 240 } 241 242 /** 243 * Make sure that the SW didn't fail in some terrible way, and throw exception if it did 244 */ isSWFailure(final SmithWatermanAlignment alignment)245 private static boolean isSWFailure(final SmithWatermanAlignment alignment) { 246 // check that the alignment starts at the first base, which it should given the padding 247 if ( alignment.getAlignmentOffset() > 0 ) { 248 return true; 249 } 250 251 // check that we aren't getting any S operators (which would be very bad downstream) 252 for ( final CigarElement ce : alignment.getCigar().getCigarElements() ) { 253 if ( ce.getOperator() == CigarOperator.S ) 254 return true; 255 // soft clips at the end of the alignment are really insertions 256 } 257 258 return false; 259 } 260 261 /** 262 * Returns the length of the original read considering all clippings based on this cigar. 263 * <p> 264 * The result of applying this method on a empty cigar is zero. 265 * </p> 266 * @param cigar the input cigar. 267 * @throws IllegalArgumentException if the input {@code cigar} is {@code null}. 268 * @return 0 or greater. 269 */ countUnclippedReadBases(final Cigar cigar)270 public static int countUnclippedReadBases(final Cigar cigar) { 271 Utils.nonNull(cigar, "the input cigar cannot be null"); 272 return cigar.getCigarElements().stream() 273 .filter(ce -> { 274 final CigarOperator operator = ce.getOperator(); 275 return operator.isClipping() || operator.consumesReadBases(); 276 }) 277 .mapToInt(CigarElement::getLength) 278 .sum(); 279 } 280 281 /** 282 * Count the number of soft- or hard- clipped bases from either the left or right end of a cigar 283 */ countClippedBases(final Cigar cigar, final Tail tail, final CigarOperator typeOfClip)284 public static int countClippedBases(final Cigar cigar, final Tail tail, final CigarOperator typeOfClip) { 285 Utils.validateArg(typeOfClip.isClipping(), "typeOfClip must be a clipping operator"); 286 287 final int size = cigar.numCigarElements(); 288 if (size < 2) { 289 Utils.validateArg(size == 1 && !cigar.getFirstCigarElement().getOperator().isClipping(), "cigar is empty or completely clipped."); 290 return 0; 291 } 292 293 int result = 0; 294 295 for (int n = 0; n < size; n++) { 296 final int index = (tail == Tail.LEFT ? n : size - n - 1); 297 final CigarElement element = cigar.getCigarElement(index); 298 if (!element.getOperator().isClipping()) { 299 return result; 300 } else if (element.getOperator() == typeOfClip) { 301 result += element.getLength(); 302 } 303 } 304 305 throw new IllegalArgumentException("Input cigar " + cigar + " is completely clipped."); 306 } 307 308 /** 309 * Count the number clipped bases (both soft and hard) from either the left or right end of a cigar 310 */ countClippedBases(final Cigar cigar, final Tail tail)311 public static int countClippedBases(final Cigar cigar, final Tail tail) { 312 return countClippedBases(cigar, tail, CigarOperator.SOFT_CLIP) + countClippedBases(cigar, tail, CigarOperator.HARD_CLIP); 313 } 314 315 /** 316 * Count the number of soft- and hard-clipped bases over both ends of a cigar 317 */ countClippedBases(final Cigar cigar, final CigarOperator clippingType)318 public static int countClippedBases(final Cigar cigar, final CigarOperator clippingType) { 319 return countClippedBases(cigar, Tail.LEFT, clippingType) + countClippedBases(cigar, Tail.RIGHT, clippingType); 320 } 321 322 /** 323 * Count the number of clipped bases (both soft and hard) over both ends of a cigar 324 */ countClippedBases(final Cigar cigar)325 public static int countClippedBases(final Cigar cigar) { 326 return countClippedBases(cigar, Tail.LEFT) + countClippedBases(cigar, Tail.RIGHT); 327 } 328 countAlignedBases(final Cigar cigar )329 public static int countAlignedBases(final Cigar cigar ) { 330 return Utils.nonNull(cigar).getCigarElements().stream() 331 .filter(cigarElement -> cigarElement.getOperator().isAlignment()) 332 .mapToInt(CigarElement::getLength) 333 .sum(); 334 } 335 336 /** 337 * replace soft clips (S) with match (M) operators, normalizing the result by all the transformations of the {@link CigarBuilder} class: 338 * merging consecutive identical operators and removing zero-length elements. For example 10S10M -> 20M and 10S10M10I10I -> 20M20I. 339 */ revertSoftClips(final Cigar originalCigar)340 public static Cigar revertSoftClips(final Cigar originalCigar) { 341 final CigarBuilder builder = new CigarBuilder(); 342 for (final CigarElement element : originalCigar.getCigarElements()) { 343 if (element.getOperator() == CigarOperator.SOFT_CLIP) { 344 builder.add(new CigarElement(element.getLength(), CigarOperator.MATCH_OR_MISMATCH)); 345 } else { 346 builder.add(element); 347 } 348 } 349 350 return builder.make(); 351 } 352 353 /** 354 * Given a cigar string, soft clip up to leftClipEnd and soft clip starting at rightClipBegin 355 * @param start initial index to clip within read bases, inclusive 356 * @param stop final index to clip within read bases exclusive 357 * @param clippingOperator type of clipping -- must be either hard clip or soft clip 358 */ clipCigar(final Cigar cigar, final int start, final int stop, CigarOperator clippingOperator)359 public static Cigar clipCigar(final Cigar cigar, final int start, final int stop, CigarOperator clippingOperator) { 360 Utils.validateArg(clippingOperator.isClipping(), "Not a clipping operator"); 361 final boolean clipLeft = start == 0; 362 363 final CigarBuilder newCigar = new CigarBuilder(); 364 365 int elementStart = 0; 366 for (final CigarElement element : cigar.getCigarElements()) { 367 final CigarOperator operator = element.getOperator(); 368 // copy hard clips 369 if (operator == CigarOperator.HARD_CLIP) { 370 newCigar.add(new CigarElement(element.getLength(), element.getOperator())); 371 continue; 372 } 373 final int elementEnd = elementStart + (operator.consumesReadBases() ? element.getLength() : 0); 374 375 // element precedes start or follows end of clip, copy it to new cigar 376 if (elementEnd <= start || elementStart >= stop) { 377 // edge case: deletions at edge of clipping are meaningless and we skip them 378 if (operator.consumesReadBases() || (elementStart != start && elementStart != stop)) { 379 newCigar.add(new CigarElement(element.getLength(), operator)); 380 } 381 } else { // otherwise, some or all of the element is soft-clipped 382 final int unclippedLength = clipLeft ? elementEnd - stop : start - elementStart; 383 final int clippedLength = element.getLength() - unclippedLength; 384 385 if (unclippedLength <= 0) { // totally clipped 386 if (operator.consumesReadBases()) { 387 newCigar.add(new CigarElement(element.getLength(), clippingOperator)); 388 } 389 } else if (clipLeft) { 390 newCigar.add(new CigarElement(clippedLength, clippingOperator)); 391 newCigar.add(new CigarElement(unclippedLength, operator)); 392 } else { 393 newCigar.add(new CigarElement(unclippedLength, operator)); 394 newCigar.add(new CigarElement(clippedLength, clippingOperator)); 395 } 396 } 397 elementStart = elementEnd; 398 } 399 400 return newCigar.make(); 401 } 402 403 /** 404 * How many bases to the right does a read's alignment start shift given its cigar and the number of left soft clips 405 */ alignmentStartShift(final Cigar cigar, final int numClipped)406 public static int alignmentStartShift(final Cigar cigar, final int numClipped) { 407 int refBasesClipped = 0; 408 409 int elementStart = 0; // this and elementEnd are indices in the read's bases 410 for (final CigarElement element : cigar.getCigarElements()) { 411 final CigarOperator operator = element.getOperator(); 412 // hard clips consume neither read bases nor reference bases and are irrelevant 413 if (operator == CigarOperator.HARD_CLIP) { 414 continue; 415 } 416 final int elementEnd = elementStart + (operator.consumesReadBases() ? element.getLength() : 0); 417 418 if (elementEnd <= numClipped) { // totally within clipped span -- this includes deletions immediately following clipping 419 refBasesClipped += operator.consumesReferenceBases() ? element.getLength() : 0; 420 } else if (elementStart < numClipped) { // clip in middle of element, which means the element necessarily consumes read bases 421 final int clippedLength = numClipped - elementStart; 422 refBasesClipped += operator.consumesReferenceBases() ? clippedLength : 0; 423 break; 424 } 425 elementStart = elementEnd; 426 } 427 return refBasesClipped; 428 } 429 430 /** 431 * Computes the corresponding distance needs to be walked on the read, given the Cigar and distance walked on the reference. 432 * @param cigar cigar along the 5-3 direction of read (when read is mapped to reverse strand, bwa mem output cigar should be inverted) 433 * @param start start position (1-based) on the read (note it should not count the hard clipped bases, as usual) 434 * @param refDist distance to walk on the reference 435 * @param backward whether to walk backwards along the read or not 436 * @return corresponding walk distance on read (always positive) 437 * @throws IllegalArgumentException if input cigar contains padding operation or 'N', or 438 * either of {@code start} or distance is non-positive, or 439 * {@code start} is larger than read length, or 440 * requested reference walk distance is longer than the total read bases in cigar, or 441 * computed read walk distance would "walk off" the read 442 */ computeAssociatedDistOnRead(final Cigar cigar, final int start, final int refDist, final boolean backward)443 public static int computeAssociatedDistOnRead(final Cigar cigar, final int start, final int refDist, final boolean backward) { 444 445 Utils.validateArg(refDist > 0 && start > 0, () -> "start " + start + " or distance " + refDist + " is non-positive."); 446 447 final List<CigarElement> elements = backward ? Lists.reverse(cigar.getCigarElements()) : cigar.getCigarElements(); 448 449 final int readLength = elements.stream().mapToInt(ce -> ce.getOperator().consumesReadBases() ? ce.getLength() : 0).sum(); 450 final int readBasesToSkip = backward ? readLength - start : start - 1; 451 452 int readBasesConsumed = 0; 453 int refBasesConsumed = 0; 454 455 for (final CigarElement element : elements){ 456 final int readBasesConsumedBeforeElement = readBasesConsumed; 457 458 readBasesConsumed += element.getOperator().consumesReadBases() ? element.getLength() : 0; 459 // skip cigar elements that end before the read start or start after the reference end 460 if (readBasesConsumed <= readBasesToSkip) { 461 continue; 462 } 463 464 refBasesConsumed += element.getOperator().consumesReferenceBases() ? element.getLength() - Math.max(readBasesToSkip - readBasesConsumedBeforeElement, 0) : 0; 465 if (refBasesConsumed >= refDist) { 466 final int excessRefBasesInElement = Math.max(refBasesConsumed - refDist, 0); 467 return readBasesConsumed - readBasesToSkip - (element.getOperator().consumesReadBases() ? excessRefBasesInElement : 0); 468 } 469 } 470 471 throw new IllegalArgumentException("Cigar " + cigar + "does not contain at least " + refDist + " reference bases past red start " + start + "."); 472 } 473 474 /** 475 * Convert the 'I' CigarElement, if it is at either end (terminal) of the input cigar, to a corresponding 'S' operator. 476 * Note that we allow CIGAR of the format '10H10S10I10M', but disallows the format if after the conversion the cigar turns into a giant clip, 477 * e.g. '10H10S10I10S10H' is not allowed (if allowed, it becomes a giant clip of '10H30S10H' which is non-sense). 478 * 479 * @return a pair of number of clipped (hard and soft, including the ones from the converted terminal 'I') bases at the front and back of the 480 * input {@code cigarAlongInput5to3Direction}. 481 * 482 * @throws IllegalArgumentException when the checks as described above fail. 483 */ convertTerminalInsertionToSoftClip(final Cigar cigar)484 public static Cigar convertTerminalInsertionToSoftClip(final Cigar cigar) { 485 486 if (cigar.numCigarElements() < 2 ) { 487 return cigar; 488 } 489 490 final CigarBuilder builder = new CigarBuilder(); 491 for (int n = 0; n < cigar.numCigarElements(); n++) { 492 final CigarElement element = cigar.getCigarElement(n); 493 if (element.getOperator() != CigarOperator.INSERTION) { // not an insertion 494 builder.add(element); 495 } else if (n == 0 || n == cigar.numCigarElements() - 1) { // terminal insertion with no clipping -- convert to soft clip 496 builder.add(new CigarElement(element.getLength(), CigarOperator.SOFT_CLIP)); 497 } else if (cigar.getCigarElement(n-1).getOperator().isClipping() || cigar.getCigarElement(n+1).getOperator().isClipping()) { // insertion preceding or following clip 498 builder.add(new CigarElement(element.getLength(), CigarOperator.SOFT_CLIP)); 499 } else { // interior insertion 500 builder.add(element); 501 } 502 } 503 504 return builder.make(); 505 } 506 } 507