1 package org.broadinstitute.hellbender.utils.fragments; 2 3 import htsjdk.samtools.Cigar; 4 import htsjdk.samtools.SAMFileHeader; 5 import htsjdk.samtools.SAMReadGroupRecord; 6 import org.apache.commons.lang3.tuple.ImmutablePair; 7 import org.broadinstitute.barclay.argparser.Hidden; 8 import org.broadinstitute.hellbender.utils.Utils; 9 import org.broadinstitute.hellbender.utils.read.ArtificialReadUtils; 10 import org.broadinstitute.hellbender.utils.read.CigarUtils; 11 import org.broadinstitute.hellbender.utils.read.GATKRead; 12 import org.broadinstitute.hellbender.GATKBaseTest; 13 import org.testng.Assert; 14 import org.testng.annotations.DataProvider; 15 import org.testng.annotations.Test; 16 17 import java.util.*; 18 19 public class FragmentUtilsUnitTest extends GATKBaseTest { 20 21 private static final byte HIGH_QUALITY = 30; 22 private static final byte OVERLAPPING_QUALITY = 20; 23 makeOverlappingRead(final String leftFlank, final int leftQual, final String overlapBases, final byte[] overlapQuals, final String rightFlank, final int rightQual, final int alignmentStart, final int leftSoftclip, final int rightSoftclip)24 private GATKRead makeOverlappingRead(final String leftFlank, final int leftQual, final String overlapBases, 25 final byte[] overlapQuals, final String rightFlank, final int rightQual, 26 final int alignmentStart, final int leftSoftclip, final int rightSoftclip) { 27 final SAMFileHeader header = ArtificialReadUtils.createArtificialSamHeader(); 28 header.addReadGroup(new SAMReadGroupRecord("RG1")); 29 final String bases = leftFlank + overlapBases + rightFlank; 30 final int readLength = bases.length(); 31 final GATKRead read = ArtificialReadUtils.createArtificialRead(header, "myRead", 0, alignmentStart + leftSoftclip, readLength); 32 final byte[] leftQuals = Utils.dupBytes((byte) leftQual, leftFlank.length()); 33 final byte[] rightQuals = Utils.dupBytes((byte) rightQual, rightFlank.length()); 34 final byte[] quals = Utils.concat(leftQuals, overlapQuals, rightQuals); 35 read.setCigar((leftSoftclip != 0 ? leftSoftclip + "S" : "") + (readLength - rightSoftclip - leftSoftclip) + "M" + (rightSoftclip != 0 ? rightSoftclip + "S" : "")); 36 read.setBases(bases.getBytes()); 37 read.setBaseQualities(quals); 38 read.setReadGroup("RG1"); 39 read.setMappingQuality(60); 40 return read; 41 } 42 43 @DataProvider(name = "AdjustFragmentsTest") createAdjustFragmentsTest()44 public Object[][] createAdjustFragmentsTest() throws Exception { 45 List<Object[]> tests = new ArrayList<>(); 46 47 final String leftFlank = "CCC"; 48 final String rightFlank = "AAA"; 49 final String allOverlappingBases = "ACGTACGTGGAACCTTAG"; 50 for (int overlapSize = 1; overlapSize < allOverlappingBases.length(); overlapSize++) { 51 final String overlappingBases = allOverlappingBases.substring(0, overlapSize); 52 final byte[] overlappingBaseQuals = new byte[overlapSize]; 53 for (int i = 0; i < overlapSize; i++) { 54 overlappingBaseQuals[i] = HIGH_QUALITY; 55 } 56 final GATKRead read1 = makeOverlappingRead(leftFlank, HIGH_QUALITY, overlappingBases, overlappingBaseQuals, "", HIGH_QUALITY, 1, 0, 0); 57 final GATKRead read2 = makeOverlappingRead("", HIGH_QUALITY, overlappingBases, overlappingBaseQuals, rightFlank, HIGH_QUALITY, leftFlank.length() + 1, 0, 0); 58 tests.add(new Object[]{read1, read2, overlapSize}); 59 } 60 61 return tests.toArray(new Object[][]{}); 62 } 63 64 @Test(dataProvider = "AdjustFragmentsTest") testAdjustingTwoReads(final GATKRead read1, final GATKRead read2, final int overlapSize)65 public void testAdjustingTwoReads(final GATKRead read1, final GATKRead read2, final int overlapSize) { 66 FragmentUtils.adjustQualsOfOverlappingPairedFragments(ImmutablePair.of(read1, read2), true, OptionalInt.empty(), OptionalInt.empty()); 67 68 for (int i = 0; i < read1.getLength() - overlapSize; i++) { 69 Assert.assertEquals(read1.getBaseQualities()[i], HIGH_QUALITY); 70 } 71 for (int i = read1.getLength() - overlapSize; i < read1.getLength(); i++) { 72 Assert.assertEquals(read1.getBaseQualities()[i], OVERLAPPING_QUALITY); 73 } 74 75 for (int i = 0; i < overlapSize; i++) { 76 Assert.assertEquals(read2.getBaseQualities()[i], OVERLAPPING_QUALITY); 77 } 78 for (int i = overlapSize; i < read2.getLength(); i++) { 79 Assert.assertEquals(read2.getBaseQualities()[i], HIGH_QUALITY); 80 } 81 } 82 83 84 // Generate a bunch of reads with softclips that do not overlap with the other read. 85 @DataProvider(name = "AdjustFragmentsTestSoftClipsNotOverlapping") createAdjustFragmentsTestSoftClips()86 public Object[][] createAdjustFragmentsTestSoftClips() throws Exception { 87 List<Object[]> tests = new ArrayList<>(); 88 89 final String leftFlank = "CCC"; 90 final String rightFlank = "AAA"; 91 final String allOverlappingBases = "ACGTACGTGGAACCTTAG"; 92 for (int overlapSize = 1; overlapSize < allOverlappingBases.length(); overlapSize++) { 93 for (int leftSoftclip = 0; leftSoftclip <= leftFlank.length(); leftSoftclip++) { 94 for (int rightSoftclip = 0; rightSoftclip <= rightFlank.length(); rightSoftclip++) { 95 final String overlappingBases = allOverlappingBases.substring(0, overlapSize); 96 final byte[] overlappingBaseQuals = new byte[overlapSize]; 97 for (int i = 0; i < overlapSize; i++) { 98 overlappingBaseQuals[i] = HIGH_QUALITY; 99 } 100 final GATKRead read1 = makeOverlappingRead(leftFlank, HIGH_QUALITY, overlappingBases, overlappingBaseQuals, "", HIGH_QUALITY, 1, leftSoftclip, 0); 101 final GATKRead read2 = makeOverlappingRead("", HIGH_QUALITY, overlappingBases, overlappingBaseQuals, rightFlank, HIGH_QUALITY, leftFlank.length() + 1, 0, rightSoftclip); 102 tests.add(new Object[]{read1, read2, overlapSize}); 103 } 104 } 105 } 106 return tests.toArray(new Object[][]{}); 107 } 108 109 // Assert that despite the softclips that the reads are being properly 110 @Test(dataProvider = "AdjustFragmentsTestSoftClipsNotOverlapping") testAdjustingTwoReadsWithSoftClipping(final GATKRead read1, final GATKRead read2, final int overlapSize)111 public void testAdjustingTwoReadsWithSoftClipping(final GATKRead read1, final GATKRead read2, final int overlapSize) { 112 FragmentUtils.adjustQualsOfOverlappingPairedFragments(ImmutablePair.of(read1, read2), true, OptionalInt.empty(), OptionalInt.empty()); 113 114 for (int i = 0; i < read1.getLength() - overlapSize; i++) { 115 Assert.assertEquals(read1.getBaseQualities()[i], HIGH_QUALITY); 116 } 117 for (int i = read1.getLength() - overlapSize; i < read1.getLength(); i++) { 118 Assert.assertEquals(read1.getBaseQualities()[i], OVERLAPPING_QUALITY); 119 } 120 121 for (int i = 0; i < overlapSize; i++) { 122 Assert.assertEquals(read2.getBaseQualities()[i], OVERLAPPING_QUALITY); 123 } 124 for (int i = overlapSize; i < read2.getLength(); i++) { 125 Assert.assertEquals(read2.getBaseQualities()[i], HIGH_QUALITY); 126 } 127 } 128 129 130 // Generate a bunch of reads with softclips that are overlapping with the other read (and allow for reads with no overlap at all except for softclips) 131 @DataProvider(name = "AdjustFragmentsTestSoftClipsInOverlapRegion") createAdjustFragmentsTestSoftClipsInOverlapRegion()132 public Object[][] createAdjustFragmentsTestSoftClipsInOverlapRegion() throws Exception { 133 List<Object[]> tests = new ArrayList<>(); 134 135 final String leftFlank = "CCC"; 136 final String rightFlank = "AAA"; 137 final String allOverlappingBases = "ACGTACGTGGAACCTTAG"; 138 for (int overlapSize = 1; overlapSize < allOverlappingBases.length(); overlapSize++) { 139 for (int leftSoftclip = 0; leftSoftclip <= overlapSize; leftSoftclip++) { 140 for (int rightSoftclip = 0; rightSoftclip <= overlapSize; rightSoftclip++) { 141 final String overlappingBases = allOverlappingBases.substring(0, overlapSize); 142 final byte[] overlappingBaseQuals = new byte[overlapSize]; 143 for (int i = 0; i < overlapSize; i++) { 144 overlappingBaseQuals[i] = HIGH_QUALITY; 145 } 146 // Flipped so that the softclips occur in the overlapping region instead 147 final GATKRead read1 = makeOverlappingRead(leftFlank, HIGH_QUALITY, overlappingBases, overlappingBaseQuals, "", HIGH_QUALITY, 1, 0, rightSoftclip); 148 final GATKRead read2 = makeOverlappingRead("", HIGH_QUALITY, overlappingBases, overlappingBaseQuals, rightFlank, HIGH_QUALITY, leftFlank.length() + 1, leftSoftclip, 0); 149 tests.add(new Object[]{read1, rightSoftclip, read2, leftSoftclip, overlapSize}); 150 } 151 } 152 } 153 return tests.toArray(new Object[][]{}); 154 } 155 156 // Assert that despite reads only overlaping eachother in softlcipped bases that the overlapping code gracefully does nothing rather than failing 157 @Test(dataProvider = "AdjustFragmentsTestSoftClipsInOverlapRegion") testAdjustingTwoReadsWithSoftClippingOverlappingEachother(final GATKRead read1, final int read1Softclips, final GATKRead read2, final int read2Softclips, final int overlapSize)158 public void testAdjustingTwoReadsWithSoftClippingOverlappingEachother(final GATKRead read1, final int read1Softclips, final GATKRead read2, final int read2Softclips, final int overlapSize) { 159 FragmentUtils.adjustQualsOfOverlappingPairedFragments(ImmutablePair.of(read1, read2), true, OptionalInt.empty(), OptionalInt.empty()); 160 161 // Untouched bases in R1 that don't overlap 162 for (int i = 0; i < read1.getLength() - overlapSize; i++) { 163 Assert.assertEquals(read1.getBaseQualities()[i], HIGH_QUALITY); 164 } 165 // Bases in R1 that overlap with R2 (before R1 softclips and sans R2 softclips) 166 for (int i = read1.getLength() - overlapSize + read2Softclips; i < read1.getLength() - read1Softclips; i++) { 167 Assert.assertEquals(read1.getBaseQualities()[i], OVERLAPPING_QUALITY); 168 } 169 // Softclipped bases from R1 that did not get adjusted 170 for (int i = read1.getLength() - read1Softclips; i < read1.getLength(); i++) { 171 Assert.assertEquals(read1.getBaseQualities()[i], HIGH_QUALITY); 172 } 173 174 175 // Softclipped bases from R2 that did not get adjusted 176 for (int i = 0; i < read2Softclips; i++) { 177 Assert.assertEquals(read2.getBaseQualities()[i], HIGH_QUALITY); 178 } 179 // Bases in R2 that overlap with R1 (after R2 softclips adjusted for R1 softclips) 180 for (int i = read2Softclips; i < overlapSize - read1Softclips; i++) { 181 Assert.assertEquals(read2.getBaseQualities()[i], OVERLAPPING_QUALITY); 182 } 183 // Bases in R2 that did not overlap at all with R1 184 for (int i = overlapSize; i < read2.getLength(); i++) { 185 Assert.assertEquals(read2.getBaseQualities()[i], HIGH_QUALITY); 186 } 187 } 188 189 @Test 190 // This test asserts that that a read with an indel at its end will will have the correct bases examined for qualities testLeadingIndelBehaviorForOverlappingReads()191 public void testLeadingIndelBehaviorForOverlappingReads() { 192 final String leftFlank = "CCC"; 193 final String rightFlank = "AAA"; 194 final String allOverlappingBases = "ACGT"; 195 196 final String overlappingBases = allOverlappingBases.substring(0, 4); 197 final byte[] overlappingBaseQuals = new byte[]{HIGH_QUALITY, HIGH_QUALITY, HIGH_QUALITY, HIGH_QUALITY}; 198 199 // Flipped so that the softclips occur in the overlapping region instead 200 final GATKRead read1 = makeOverlappingRead(leftFlank, HIGH_QUALITY, overlappingBases, overlappingBaseQuals, "", HIGH_QUALITY, 1, 0, 0); 201 final GATKRead read2 = makeOverlappingRead("T", HIGH_QUALITY, overlappingBases, overlappingBaseQuals, rightFlank, HIGH_QUALITY, leftFlank.length() + 1, 0, 0); 202 203 // Add a leading indel to the second cigar (which could happen in HaplotypeCaller due to the clipping operations that happen in the asssembly region) 204 read2.setCigar("1I7M"); 205 206 FragmentUtils.adjustQualsOfOverlappingPairedFragments(ImmutablePair.of(read1, read2), true, OptionalInt.empty(), OptionalInt.empty()); 207 208 //Expected Qualities for reads: 209 final byte[] read1Expected = new byte[]{HIGH_QUALITY, HIGH_QUALITY, HIGH_QUALITY, OVERLAPPING_QUALITY, OVERLAPPING_QUALITY, OVERLAPPING_QUALITY, OVERLAPPING_QUALITY}; 210 final byte[] read2Expected = new byte[]{HIGH_QUALITY, OVERLAPPING_QUALITY, OVERLAPPING_QUALITY, OVERLAPPING_QUALITY, OVERLAPPING_QUALITY, HIGH_QUALITY, HIGH_QUALITY, HIGH_QUALITY}; 211 212 Assert.assertEquals(read1.getBaseQualities(), read1Expected); 213 Assert.assertEquals(read2.getBaseQualities(), read2Expected); 214 } 215 216 @Test 217 // This test exists to document the current indel behavior, this should be changed to reflect whatever approach is chosen when https://github.com/broadinstitute/gatk/issues/6890 is addressed testIndelBehaviorForOverlappingReads()218 public void testIndelBehaviorForOverlappingReads() { 219 final String leftFlank = "CCC"; 220 final String rightFlank = "AAA"; 221 final String read1OverlappingRegion = "ACGTTG"; 222 final String read2OverlappingRegion = "ACGAATTG"; // 2 A bases inserted in the matched region on read 2 223 224 final byte[] read1OverlappingBaseQuals = new byte[]{HIGH_QUALITY, HIGH_QUALITY, HIGH_QUALITY, HIGH_QUALITY, HIGH_QUALITY, HIGH_QUALITY}; 225 final byte[] read2OverlappingBaseQuals = new byte[]{HIGH_QUALITY, HIGH_QUALITY, HIGH_QUALITY, HIGH_QUALITY, HIGH_QUALITY, HIGH_QUALITY, HIGH_QUALITY, HIGH_QUALITY, HIGH_QUALITY}; 226 227 // Flipped so that the softclips occur in the overlapping region instead 228 final GATKRead read1 = makeOverlappingRead(leftFlank, HIGH_QUALITY, read1OverlappingRegion, read1OverlappingBaseQuals, "", HIGH_QUALITY, 1, 0, 0); 229 final GATKRead read2 = makeOverlappingRead("", HIGH_QUALITY, read2OverlappingRegion, read2OverlappingBaseQuals, rightFlank, HIGH_QUALITY, leftFlank.length() + 1, 0, 0); 230 231 // add a cigar to read 2 reflecting the 2 inserted bases 232 read2.setCigar("3M2I6M"); 233 234 FragmentUtils.adjustQualsOfOverlappingPairedFragments(ImmutablePair.of(read1, read2), true, OptionalInt.empty(), OptionalInt.empty()); 235 236 //Expected Qualities for reads: 237 // NOTE: these merely reflect the current flawed behavior where the cigar is not taken into account when evaluating matched bases. 238 final byte[] read1Expected = new byte[]{HIGH_QUALITY, HIGH_QUALITY, HIGH_QUALITY, OVERLAPPING_QUALITY, OVERLAPPING_QUALITY, OVERLAPPING_QUALITY, 0, 0, 0}; 239 final byte[] read2Expected = new byte[]{OVERLAPPING_QUALITY, OVERLAPPING_QUALITY, OVERLAPPING_QUALITY, 0, 0, 0, HIGH_QUALITY, HIGH_QUALITY, HIGH_QUALITY, HIGH_QUALITY, HIGH_QUALITY, HIGH_QUALITY}; 240 // NOTE: how the TTG bases at positions 7-9 on the reference are zeroed out on read 1 but on read 2 only position 7 (and not 8 or 9) are zeroed out due to the 2 base insertion in read 2. 241 242 Assert.assertEquals(read1.getBaseQualities(), read1Expected); 243 Assert.assertEquals(read2.getBaseQualities(), read2Expected); 244 } 245 246 } 247