1 package org.broadinstitute.hellbender.utils.fragments;
2 
3 import htsjdk.samtools.Cigar;
4 import htsjdk.samtools.SAMFileHeader;
5 import htsjdk.samtools.SAMReadGroupRecord;
6 import org.apache.commons.lang3.tuple.ImmutablePair;
7 import org.broadinstitute.barclay.argparser.Hidden;
8 import org.broadinstitute.hellbender.utils.Utils;
9 import org.broadinstitute.hellbender.utils.read.ArtificialReadUtils;
10 import org.broadinstitute.hellbender.utils.read.CigarUtils;
11 import org.broadinstitute.hellbender.utils.read.GATKRead;
12 import org.broadinstitute.hellbender.GATKBaseTest;
13 import org.testng.Assert;
14 import org.testng.annotations.DataProvider;
15 import org.testng.annotations.Test;
16 
17 import java.util.*;
18 
19 public class FragmentUtilsUnitTest extends GATKBaseTest {
20 
21     private static final byte HIGH_QUALITY = 30;
22     private static final byte OVERLAPPING_QUALITY = 20;
23 
makeOverlappingRead(final String leftFlank, final int leftQual, final String overlapBases, final byte[] overlapQuals, final String rightFlank, final int rightQual, final int alignmentStart, final int leftSoftclip, final int rightSoftclip)24     private GATKRead makeOverlappingRead(final String leftFlank, final int leftQual, final String overlapBases,
25                                          final byte[] overlapQuals, final String rightFlank, final int rightQual,
26                                          final int alignmentStart, final int leftSoftclip, final int rightSoftclip) {
27         final SAMFileHeader header = ArtificialReadUtils.createArtificialSamHeader();
28         header.addReadGroup(new SAMReadGroupRecord("RG1"));
29         final String bases = leftFlank + overlapBases + rightFlank;
30         final int readLength = bases.length();
31         final GATKRead read = ArtificialReadUtils.createArtificialRead(header, "myRead", 0, alignmentStart + leftSoftclip, readLength);
32         final byte[] leftQuals = Utils.dupBytes((byte) leftQual, leftFlank.length());
33         final byte[] rightQuals = Utils.dupBytes((byte) rightQual, rightFlank.length());
34         final byte[] quals = Utils.concat(leftQuals, overlapQuals, rightQuals);
35         read.setCigar((leftSoftclip != 0 ? leftSoftclip + "S" : "") + (readLength - rightSoftclip - leftSoftclip) + "M" + (rightSoftclip != 0 ? rightSoftclip + "S" : ""));
36         read.setBases(bases.getBytes());
37         read.setBaseQualities(quals);
38         read.setReadGroup("RG1");
39         read.setMappingQuality(60);
40         return read;
41     }
42 
43     @DataProvider(name = "AdjustFragmentsTest")
createAdjustFragmentsTest()44     public Object[][] createAdjustFragmentsTest() throws Exception {
45         List<Object[]> tests = new ArrayList<>();
46 
47         final String leftFlank = "CCC";
48         final String rightFlank = "AAA";
49         final String allOverlappingBases = "ACGTACGTGGAACCTTAG";
50         for (int overlapSize = 1; overlapSize < allOverlappingBases.length(); overlapSize++) {
51             final String overlappingBases = allOverlappingBases.substring(0, overlapSize);
52             final byte[] overlappingBaseQuals = new byte[overlapSize];
53             for (int i = 0; i < overlapSize; i++) {
54                 overlappingBaseQuals[i] = HIGH_QUALITY;
55             }
56             final GATKRead read1 = makeOverlappingRead(leftFlank, HIGH_QUALITY, overlappingBases, overlappingBaseQuals, "", HIGH_QUALITY, 1, 0, 0);
57             final GATKRead read2 = makeOverlappingRead("", HIGH_QUALITY, overlappingBases, overlappingBaseQuals, rightFlank, HIGH_QUALITY, leftFlank.length() + 1, 0, 0);
58             tests.add(new Object[]{read1, read2, overlapSize});
59         }
60 
61         return tests.toArray(new Object[][]{});
62     }
63 
64     @Test(dataProvider = "AdjustFragmentsTest")
testAdjustingTwoReads(final GATKRead read1, final GATKRead read2, final int overlapSize)65     public void testAdjustingTwoReads(final GATKRead read1, final GATKRead read2, final int overlapSize) {
66         FragmentUtils.adjustQualsOfOverlappingPairedFragments(ImmutablePair.of(read1, read2), true, OptionalInt.empty(), OptionalInt.empty());
67 
68         for (int i = 0; i < read1.getLength() - overlapSize; i++) {
69             Assert.assertEquals(read1.getBaseQualities()[i], HIGH_QUALITY);
70         }
71         for (int i = read1.getLength() - overlapSize; i < read1.getLength(); i++) {
72             Assert.assertEquals(read1.getBaseQualities()[i], OVERLAPPING_QUALITY);
73         }
74 
75         for (int i = 0; i < overlapSize; i++) {
76             Assert.assertEquals(read2.getBaseQualities()[i], OVERLAPPING_QUALITY);
77         }
78         for (int i = overlapSize; i < read2.getLength(); i++) {
79             Assert.assertEquals(read2.getBaseQualities()[i], HIGH_QUALITY);
80         }
81     }
82 
83 
84     // Generate a bunch of reads with softclips that do not overlap with the other read.
85     @DataProvider(name = "AdjustFragmentsTestSoftClipsNotOverlapping")
createAdjustFragmentsTestSoftClips()86     public Object[][] createAdjustFragmentsTestSoftClips() throws Exception {
87         List<Object[]> tests = new ArrayList<>();
88 
89         final String leftFlank = "CCC";
90         final String rightFlank = "AAA";
91         final String allOverlappingBases = "ACGTACGTGGAACCTTAG";
92         for (int overlapSize = 1; overlapSize < allOverlappingBases.length(); overlapSize++) {
93             for (int leftSoftclip = 0; leftSoftclip <= leftFlank.length(); leftSoftclip++) {
94                 for (int rightSoftclip = 0; rightSoftclip <= rightFlank.length(); rightSoftclip++) {
95                     final String overlappingBases = allOverlappingBases.substring(0, overlapSize);
96                     final byte[] overlappingBaseQuals = new byte[overlapSize];
97                     for (int i = 0; i < overlapSize; i++) {
98                         overlappingBaseQuals[i] = HIGH_QUALITY;
99                     }
100                     final GATKRead read1 = makeOverlappingRead(leftFlank, HIGH_QUALITY, overlappingBases, overlappingBaseQuals, "", HIGH_QUALITY, 1, leftSoftclip, 0);
101                     final GATKRead read2 = makeOverlappingRead("", HIGH_QUALITY, overlappingBases, overlappingBaseQuals, rightFlank, HIGH_QUALITY, leftFlank.length() + 1, 0, rightSoftclip);
102                     tests.add(new Object[]{read1, read2, overlapSize});
103                 }
104             }
105         }
106         return tests.toArray(new Object[][]{});
107     }
108 
109     // Assert that despite the softclips that the reads are being properly
110     @Test(dataProvider = "AdjustFragmentsTestSoftClipsNotOverlapping")
testAdjustingTwoReadsWithSoftClipping(final GATKRead read1, final GATKRead read2, final int overlapSize)111     public void testAdjustingTwoReadsWithSoftClipping(final GATKRead read1, final GATKRead read2, final int overlapSize) {
112         FragmentUtils.adjustQualsOfOverlappingPairedFragments(ImmutablePair.of(read1, read2), true, OptionalInt.empty(), OptionalInt.empty());
113 
114         for (int i = 0; i < read1.getLength() - overlapSize; i++) {
115             Assert.assertEquals(read1.getBaseQualities()[i], HIGH_QUALITY);
116         }
117         for (int i = read1.getLength() - overlapSize; i < read1.getLength(); i++) {
118             Assert.assertEquals(read1.getBaseQualities()[i], OVERLAPPING_QUALITY);
119         }
120 
121         for (int i = 0; i < overlapSize; i++) {
122             Assert.assertEquals(read2.getBaseQualities()[i], OVERLAPPING_QUALITY);
123         }
124         for (int i = overlapSize; i < read2.getLength(); i++) {
125             Assert.assertEquals(read2.getBaseQualities()[i], HIGH_QUALITY);
126         }
127     }
128 
129 
130     // Generate a bunch of reads with softclips that are overlapping with the other read (and allow for reads with no overlap at all except for softclips)
131     @DataProvider(name = "AdjustFragmentsTestSoftClipsInOverlapRegion")
createAdjustFragmentsTestSoftClipsInOverlapRegion()132     public Object[][] createAdjustFragmentsTestSoftClipsInOverlapRegion() throws Exception {
133         List<Object[]> tests = new ArrayList<>();
134 
135         final String leftFlank = "CCC";
136         final String rightFlank = "AAA";
137         final String allOverlappingBases = "ACGTACGTGGAACCTTAG";
138         for (int overlapSize = 1; overlapSize < allOverlappingBases.length(); overlapSize++) {
139             for (int leftSoftclip = 0; leftSoftclip <= overlapSize; leftSoftclip++) {
140                 for (int rightSoftclip = 0; rightSoftclip <= overlapSize; rightSoftclip++) {
141                     final String overlappingBases = allOverlappingBases.substring(0, overlapSize);
142                     final byte[] overlappingBaseQuals = new byte[overlapSize];
143                     for (int i = 0; i < overlapSize; i++) {
144                         overlappingBaseQuals[i] = HIGH_QUALITY;
145                     }
146                     // Flipped so that the softclips occur in the overlapping region instead
147                     final GATKRead read1 = makeOverlappingRead(leftFlank, HIGH_QUALITY, overlappingBases, overlappingBaseQuals, "", HIGH_QUALITY, 1, 0, rightSoftclip);
148                     final GATKRead read2 = makeOverlappingRead("", HIGH_QUALITY, overlappingBases, overlappingBaseQuals, rightFlank, HIGH_QUALITY, leftFlank.length() + 1, leftSoftclip, 0);
149                     tests.add(new Object[]{read1, rightSoftclip, read2, leftSoftclip, overlapSize});
150                 }
151             }
152         }
153         return tests.toArray(new Object[][]{});
154     }
155 
156     // Assert that despite reads only overlaping eachother in softlcipped bases that the overlapping code gracefully does nothing rather than failing
157     @Test(dataProvider = "AdjustFragmentsTestSoftClipsInOverlapRegion")
testAdjustingTwoReadsWithSoftClippingOverlappingEachother(final GATKRead read1, final int read1Softclips, final GATKRead read2, final int read2Softclips, final int overlapSize)158     public void testAdjustingTwoReadsWithSoftClippingOverlappingEachother(final GATKRead read1, final int read1Softclips, final GATKRead read2, final int read2Softclips, final int overlapSize) {
159         FragmentUtils.adjustQualsOfOverlappingPairedFragments(ImmutablePair.of(read1, read2), true, OptionalInt.empty(), OptionalInt.empty());
160 
161         // Untouched bases in R1 that don't overlap
162         for (int i = 0; i < read1.getLength() - overlapSize; i++) {
163             Assert.assertEquals(read1.getBaseQualities()[i], HIGH_QUALITY);
164         }
165         // Bases in R1 that overlap with R2 (before R1 softclips and sans R2 softclips)
166         for (int i = read1.getLength() - overlapSize + read2Softclips; i < read1.getLength() - read1Softclips; i++) {
167             Assert.assertEquals(read1.getBaseQualities()[i], OVERLAPPING_QUALITY);
168         }
169         // Softclipped bases from R1 that did not get adjusted
170         for (int i = read1.getLength() - read1Softclips; i < read1.getLength(); i++) {
171             Assert.assertEquals(read1.getBaseQualities()[i], HIGH_QUALITY);
172         }
173 
174 
175         // Softclipped bases from R2 that did not get adjusted
176         for (int i = 0; i < read2Softclips; i++) {
177             Assert.assertEquals(read2.getBaseQualities()[i], HIGH_QUALITY);
178         }
179         // Bases in R2 that overlap with R1 (after R2 softclips adjusted for R1 softclips)
180         for (int i = read2Softclips; i < overlapSize - read1Softclips; i++) {
181             Assert.assertEquals(read2.getBaseQualities()[i], OVERLAPPING_QUALITY);
182         }
183         // Bases in R2 that did not overlap at all with R1
184         for (int i = overlapSize; i < read2.getLength(); i++) {
185             Assert.assertEquals(read2.getBaseQualities()[i], HIGH_QUALITY);
186         }
187     }
188 
189     @Test
190     // This test asserts that that a read with an indel at its end will will have the correct bases examined for qualities
testLeadingIndelBehaviorForOverlappingReads()191     public void testLeadingIndelBehaviorForOverlappingReads() {
192         final String leftFlank = "CCC";
193         final String rightFlank = "AAA";
194         final String allOverlappingBases = "ACGT";
195 
196         final String overlappingBases = allOverlappingBases.substring(0, 4);
197         final byte[] overlappingBaseQuals = new byte[]{HIGH_QUALITY, HIGH_QUALITY, HIGH_QUALITY, HIGH_QUALITY};
198 
199         // Flipped so that the softclips occur in the overlapping region instead
200         final GATKRead read1 = makeOverlappingRead(leftFlank, HIGH_QUALITY, overlappingBases, overlappingBaseQuals, "", HIGH_QUALITY, 1, 0, 0);
201         final GATKRead read2 = makeOverlappingRead("T", HIGH_QUALITY, overlappingBases, overlappingBaseQuals, rightFlank, HIGH_QUALITY, leftFlank.length() + 1, 0, 0);
202 
203         // Add a leading indel to the second cigar (which could happen in HaplotypeCaller due to the clipping operations that happen in the asssembly region)
204         read2.setCigar("1I7M");
205 
206         FragmentUtils.adjustQualsOfOverlappingPairedFragments(ImmutablePair.of(read1, read2), true, OptionalInt.empty(), OptionalInt.empty());
207 
208         //Expected Qualities for reads:
209         final byte[] read1Expected = new byte[]{HIGH_QUALITY, HIGH_QUALITY, HIGH_QUALITY, OVERLAPPING_QUALITY, OVERLAPPING_QUALITY, OVERLAPPING_QUALITY, OVERLAPPING_QUALITY};
210         final byte[] read2Expected = new byte[]{HIGH_QUALITY, OVERLAPPING_QUALITY, OVERLAPPING_QUALITY, OVERLAPPING_QUALITY, OVERLAPPING_QUALITY, HIGH_QUALITY, HIGH_QUALITY, HIGH_QUALITY};
211 
212         Assert.assertEquals(read1.getBaseQualities(), read1Expected);
213         Assert.assertEquals(read2.getBaseQualities(), read2Expected);
214     }
215 
216     @Test
217     // This test exists to document the current indel behavior, this should be changed to reflect whatever approach is chosen when https://github.com/broadinstitute/gatk/issues/6890 is addressed
testIndelBehaviorForOverlappingReads()218     public void testIndelBehaviorForOverlappingReads() {
219         final String leftFlank = "CCC";
220         final String rightFlank = "AAA";
221         final String read1OverlappingRegion = "ACGTTG";
222         final String read2OverlappingRegion = "ACGAATTG"; // 2 A bases inserted in the matched region on read 2
223 
224         final byte[] read1OverlappingBaseQuals = new byte[]{HIGH_QUALITY, HIGH_QUALITY, HIGH_QUALITY, HIGH_QUALITY, HIGH_QUALITY, HIGH_QUALITY};
225         final byte[] read2OverlappingBaseQuals = new byte[]{HIGH_QUALITY, HIGH_QUALITY, HIGH_QUALITY, HIGH_QUALITY, HIGH_QUALITY, HIGH_QUALITY, HIGH_QUALITY, HIGH_QUALITY, HIGH_QUALITY};
226 
227         // Flipped so that the softclips occur in the overlapping region instead
228         final GATKRead read1 = makeOverlappingRead(leftFlank, HIGH_QUALITY, read1OverlappingRegion, read1OverlappingBaseQuals, "", HIGH_QUALITY, 1, 0, 0);
229         final GATKRead read2 = makeOverlappingRead("", HIGH_QUALITY, read2OverlappingRegion, read2OverlappingBaseQuals, rightFlank, HIGH_QUALITY, leftFlank.length() + 1, 0, 0);
230 
231         // add a cigar to read 2 reflecting the 2 inserted bases
232         read2.setCigar("3M2I6M");
233 
234         FragmentUtils.adjustQualsOfOverlappingPairedFragments(ImmutablePair.of(read1, read2), true, OptionalInt.empty(), OptionalInt.empty());
235 
236         //Expected Qualities for reads:
237         // NOTE: these merely reflect the current flawed behavior where the cigar is not taken into account when evaluating matched bases.
238         final byte[] read1Expected = new byte[]{HIGH_QUALITY, HIGH_QUALITY, HIGH_QUALITY, OVERLAPPING_QUALITY, OVERLAPPING_QUALITY, OVERLAPPING_QUALITY, 0, 0, 0};
239         final byte[] read2Expected = new byte[]{OVERLAPPING_QUALITY, OVERLAPPING_QUALITY, OVERLAPPING_QUALITY, 0, 0, 0, HIGH_QUALITY, HIGH_QUALITY, HIGH_QUALITY, HIGH_QUALITY, HIGH_QUALITY, HIGH_QUALITY};
240         // NOTE: how the TTG bases at positions 7-9 on the reference are zeroed out on read 1 but on read 2 only position 7 (and not 8 or 9) are zeroed out due to the 2 base insertion in read 2.
241 
242         Assert.assertEquals(read1.getBaseQualities(), read1Expected);
243         Assert.assertEquals(read2.getBaseQualities(), read2Expected);
244     }
245 
246 }
247