1 package org.broadinstitute.hellbender.utils.haplotype;
2 
3 import htsjdk.samtools.SAMFileHeader;
4 import htsjdk.samtools.SamReader;
5 import htsjdk.samtools.SamReaderFactory;
6 import htsjdk.samtools.SAMRecord;
7 import htsjdk.samtools.TextCigarCodec;
8 import htsjdk.samtools.ValidationStringency;
9 import htsjdk.samtools.util.IOUtil;
10 import htsjdk.samtools.util.Locatable;
11 import htsjdk.samtools.SamFiles;
12 
13 import java.nio.file.Path;
14 import org.broadinstitute.hellbender.GATKBaseTest;
15 import org.broadinstitute.hellbender.testutils.IntegrationTestSpec;
16 import org.broadinstitute.hellbender.utils.genotyper.*;
17 import org.broadinstitute.hellbender.utils.read.GATKRead;
18 import org.broadinstitute.hellbender.utils.read.ArtificialReadUtils;
19 import org.broadinstitute.hellbender.utils.SimpleInterval;
20 import org.broadinstitute.hellbender.testutils.SamAssertionUtils;
21 import org.testng.Assert;
22 
23 import org.testng.annotations.DataProvider;
24 import org.testng.annotations.Test;
25 
26 import java.io.File;
27 import java.io.IOException;
28 import java.util.*;
29 
30 
31 public class HaplotypeBAMWriterUnitTest extends GATKBaseTest {
32     private final SAMFileHeader samHeader = ArtificialReadUtils.createArtificialSamHeaderWithPrograms(20, 1, 1000, 3);
33     private final String expectedFilePath = getToolTestDataDir() + "/expected/";
34 
35     @DataProvider(name="ReadsLikelikhoodData")
makeReadsLikelikhoodData()36     public Object[][] makeReadsLikelikhoodData() {
37         final String haplotypeBaseSignature = "ACTGAAGGTTCC";
38         final Haplotype haplotype = makeHaplotype(haplotypeBaseSignature);
39         final int sampleCounts[] = {2, 2};
40         final Locatable loc = new SimpleInterval("20", 10, 20);
41         final AlleleLikelihoods<GATKRead, Haplotype> readLikelihoods = generateReadLikelihoods(sampleCounts);
42 
43         return new Object[][]{
44                 { haplotypeBaseSignature, Collections.singletonList(haplotype), loc, readLikelihoods }
45         };
46     }
47 
48     @Test(dataProvider = "ReadsLikelikhoodData")
testWriteToSAMFile( @uppressWarningsR) final String haplotypeBaseSignature, final List<Haplotype> haplotypes, final Locatable genomeLoc, final AlleleLikelihoods<GATKRead, Haplotype> readLikelihoods )49     public void testWriteToSAMFile
50         (
51             @SuppressWarnings("unused") final String haplotypeBaseSignature,
52             final List<Haplotype> haplotypes,
53             final Locatable genomeLoc,
54             final AlleleLikelihoods<GATKRead, Haplotype> readLikelihoods
55         ) throws IOException
56     {
57         // create output SAM file
58         final Path outPath = testWriteToFile(".sam", haplotypes, genomeLoc, readLikelihoods, false, false);
59         final File expectedFile = new File(expectedFilePath, "testSAM.sam");
60         IntegrationTestSpec.assertEqualTextFiles(outPath.toFile(), expectedFile);
61     }
62 
63     @Test(dataProvider = "ReadsLikelikhoodData")
testWriteToBAMFile( @uppressWarningsR) final String haplotypeBaseSignature, final List<Haplotype> haplotypes, final Locatable genomeLoc, final AlleleLikelihoods<GATKRead, Haplotype> readLikelihoods )64     public void testWriteToBAMFile
65         (
66             @SuppressWarnings("unused") final String haplotypeBaseSignature,
67             final List<Haplotype> haplotypes,
68             final Locatable genomeLoc,
69             final AlleleLikelihoods<GATKRead, Haplotype> readLikelihoods
70         ) throws IOException
71     {
72         // create output BAM file
73         final Path outPath = testWriteToFile(".bam", haplotypes, genomeLoc, readLikelihoods, false, false);
74         final File expectedFile = new File(expectedFilePath, "testBAM.bam");
75         SamAssertionUtils.assertEqualBamFiles(outPath.toFile(), expectedFile, false, ValidationStringency.DEFAULT_STRINGENCY);
76     }
77 
78     @Test(dataProvider = "ReadsLikelikhoodData")
testWriteToBAMFileWithIndex( @uppressWarningsR) final String haplotypeBaseSignature, final List<Haplotype> haplotypes, final Locatable genomeLoc, final AlleleLikelihoods<GATKRead, Haplotype> readLikelihoods )79     public void testWriteToBAMFileWithIndex
80             (
81                     @SuppressWarnings("unused") final String haplotypeBaseSignature,
82                     final List<Haplotype> haplotypes,
83                     final Locatable genomeLoc,
84                     final AlleleLikelihoods<GATKRead, Haplotype> readLikelihoods
85             ) throws IOException
86     {
87         // create output BAM file
88         final Path outPath = testWriteToFile(".bam", haplotypes, genomeLoc, readLikelihoods, true, false);
89         final File expectedFile = new File(expectedFilePath, "testBAM.bam");
90         SamAssertionUtils.assertEqualBamFiles(outPath.toFile(), expectedFile, false, ValidationStringency.DEFAULT_STRINGENCY);
91     }
92 
93     @Test(dataProvider = "ReadsLikelikhoodData")
testWriteToBAMFileWithMD5( @uppressWarningsR) final String haplotypeBaseSignature, final List<Haplotype> haplotypes, final Locatable genomeLoc, final AlleleLikelihoods<GATKRead, Haplotype> readLikelihoods )94     public void testWriteToBAMFileWithMD5
95             (
96                     @SuppressWarnings("unused") final String haplotypeBaseSignature,
97                     final List<Haplotype> haplotypes,
98                     final Locatable genomeLoc,
99                     final AlleleLikelihoods<GATKRead, Haplotype> readLikelihoods
100             ) throws IOException
101     {
102         // create output BAM file
103         final Path outPath = testWriteToFile(".bam", haplotypes, genomeLoc, readLikelihoods, false, true);
104         final File expectedFile = new File(expectedFilePath, "testBAM.bam");
105         SamAssertionUtils.assertEqualBamFiles(outPath.toFile(), expectedFile, false, ValidationStringency.DEFAULT_STRINGENCY);
106     }
107 
testWriteToFile( final String outputFileExtension, final List<Haplotype> haplotypes, final Locatable genomeLoc, final AlleleLikelihoods<GATKRead, Haplotype> readLikelihoods, final boolean createIndex, final boolean createMD5 )108     private Path testWriteToFile
109         (
110             final String outputFileExtension,
111             final List<Haplotype> haplotypes,
112             final Locatable genomeLoc,
113             final AlleleLikelihoods<GATKRead, Haplotype> readLikelihoods,
114             final boolean createIndex,
115             final boolean createMD5
116         ) throws IOException
117     {
118         final Path outPath = GATKBaseTest.createTempFile("haplotypeBamWriterTest", outputFileExtension).toPath();
119         final SAMFileDestination fileDest = new SAMFileDestination(outPath, createIndex, createMD5, samHeader, "TestHaplotypeRG");
120 
121         try (final HaplotypeBAMWriter haplotypeBAMWriter = new HaplotypeBAMWriter(HaplotypeBAMWriter.WriterType.ALL_POSSIBLE_HAPLOTYPES, fileDest)) {
122             haplotypeBAMWriter.writeReadsAlignedToHaplotypes(
123                     haplotypes,
124                     genomeLoc,
125                     haplotypes,
126                     new HashSet<>(), // called haplotypes
127                     readLikelihoods);
128         }
129 
130         Assert.assertEquals(getReadCounts(outPath), 5);
131 
132         final File expectedMD5File = new File(outPath.toFile().getAbsolutePath() + ".md5");
133         Assert.assertEquals(expectedMD5File.exists(), createMD5);
134         if (createIndex) {
135             Assert.assertNotNull(SamFiles.findIndex(outPath));
136         } else {
137             Assert.assertNull(SamFiles.findIndex(outPath));
138         }
139         return outPath;
140     }
141 
142     @Test(dataProvider = "ReadsLikelikhoodData")
testCreateSAMFromHeader( @uppressWarningsR) final String haplotypeBaseSignature, final List<Haplotype> haplotypes, final Locatable genomeLoc, final AlleleLikelihoods<GATKRead, Haplotype> readLikelihoods )143     public void testCreateSAMFromHeader(
144             @SuppressWarnings("unused")  final String haplotypeBaseSignature,
145             final List<Haplotype> haplotypes,
146             final Locatable genomeLoc,
147             final AlleleLikelihoods<GATKRead, Haplotype> readLikelihoods
148     ) throws IOException {
149         final Path outputPath = GATKBaseTest.createTempFile("fromHeaderSAM", ".sam").toPath();
150 
151         try (HaplotypeBAMWriter haplotypeBAMWriter = new HaplotypeBAMWriter(HaplotypeBAMWriter.WriterType.ALL_POSSIBLE_HAPLOTYPES, outputPath, false, false, samHeader)) {
152             haplotypeBAMWriter.writeReadsAlignedToHaplotypes(
153                     haplotypes,
154                     genomeLoc,
155                     haplotypes,
156                     new HashSet<>(), // called haplotypes
157                     readLikelihoods);
158         }
159 
160         final File expectedFile = new File(expectedFilePath, "fromHeaderSAM.sam");
161         IntegrationTestSpec.assertEqualTextFiles(outputPath.toFile(), expectedFile);
162     }
163 
164     @Test(dataProvider = "ReadsLikelikhoodData")
testAllHaplotypeWriter( final String haplotypeBaseSignature, final List<Haplotype> haplotypes, final Locatable genomeLoc, final AlleleLikelihoods<GATKRead, Haplotype> readLikelihoods )165     public void testAllHaplotypeWriter
166         (
167             final String haplotypeBaseSignature,
168             final List<Haplotype> haplotypes,
169             final Locatable genomeLoc,
170             final AlleleLikelihoods<GATKRead, Haplotype> readLikelihoods
171         )
172     {
173         final MockValidatingDestination mockDest = new MockValidatingDestination(haplotypeBaseSignature);
174         try (final HaplotypeBAMWriter haplotypeBAMWriter = new HaplotypeBAMWriter(HaplotypeBAMWriter.WriterType.ALL_POSSIBLE_HAPLOTYPES, mockDest)) {
175             haplotypeBAMWriter.writeReadsAlignedToHaplotypes(
176                     haplotypes,
177                     genomeLoc,
178                     haplotypes,
179                     new HashSet<>(), // called haplotypes
180                     readLikelihoods);
181         }
182 
183         Assert.assertTrue(mockDest.foundBases);
184         Assert.assertTrue(mockDest.readCount == 5); // 4 samples + 1 haplotype
185     }
186 
187     @Test(dataProvider = "ReadsLikelikhoodData")
testCalledHaplotypeWriter( final String haplotypeBaseSignature, final List<Haplotype> haplotypes, final Locatable genomeLoc, final AlleleLikelihoods<GATKRead, Haplotype> readLikelihoods )188     public void testCalledHaplotypeWriter
189         (
190             final String haplotypeBaseSignature,
191             final List<Haplotype> haplotypes,
192             final Locatable genomeLoc,
193             final AlleleLikelihoods<GATKRead, Haplotype> readLikelihoods
194         )
195     {
196         final MockValidatingDestination mockDest = new MockValidatingDestination(haplotypeBaseSignature);
197 
198         Set<Haplotype> calledHaplotypes = new LinkedHashSet<>(1);
199         calledHaplotypes.addAll(haplotypes);
200 
201         try (final HaplotypeBAMWriter haplotypeBAMWriter = new HaplotypeBAMWriter(HaplotypeBAMWriter.WriterType.CALLED_HAPLOTYPES, mockDest)) {
202             haplotypeBAMWriter.writeReadsAlignedToHaplotypes(
203                     haplotypes,
204                     genomeLoc,
205                     haplotypes,
206                     calledHaplotypes,
207                     readLikelihoods);
208         }
209 
210         Assert.assertTrue(mockDest.foundBases);
211         Assert.assertTrue(mockDest.readCount==5); // 4 samples + 1 haplotype
212     }
213 
214     @Test(dataProvider = "ReadsLikelikhoodData")
testNoCalledHaplotypes( final String haplotypeBaseSignature, final List<Haplotype> haplotypes, final Locatable genomeLoc, final AlleleLikelihoods<GATKRead, Haplotype> readLikelihoods )215     public void testNoCalledHaplotypes
216         (
217             final String haplotypeBaseSignature,
218             final List<Haplotype> haplotypes,
219             final Locatable genomeLoc,
220             final AlleleLikelihoods<GATKRead, Haplotype> readLikelihoods
221         )
222     {
223         final MockValidatingDestination mockDest = new MockValidatingDestination(haplotypeBaseSignature);
224 
225         try (final HaplotypeBAMWriter haplotypeBAMWriter = new HaplotypeBAMWriter(HaplotypeBAMWriter.WriterType.CALLED_HAPLOTYPES, mockDest)) {
226             haplotypeBAMWriter.writeReadsAlignedToHaplotypes(
227                     haplotypes,
228                     genomeLoc,
229                     haplotypes,
230                     new LinkedHashSet<>(),
231                     readLikelihoods);
232         }
233 
234         Assert.assertTrue(mockDest.readCount == 0); // no called haplotypes, no reads
235     }
236 
237     @Test(dataProvider = "ReadsLikelikhoodData")
testDontWriteHaplotypes( final String haplotypeBaseSignature, final List<Haplotype> haplotypes, final Locatable genomeLoc, final AlleleLikelihoods<GATKRead, Haplotype> readLikelihoods )238     public void testDontWriteHaplotypes
239         (
240             final String haplotypeBaseSignature,
241             final List<Haplotype> haplotypes,
242             final Locatable genomeLoc,
243             final AlleleLikelihoods<GATKRead, Haplotype> readLikelihoods
244         )
245     {
246         final MockValidatingDestination mockDest = new MockValidatingDestination(haplotypeBaseSignature);
247 
248         try (final HaplotypeBAMWriter haplotypeBAMWriter = new HaplotypeBAMWriter(HaplotypeBAMWriter.WriterType.ALL_POSSIBLE_HAPLOTYPES, mockDest)) {
249             haplotypeBAMWriter.setWriteHaplotypes(false);
250 
251             haplotypeBAMWriter.writeReadsAlignedToHaplotypes(
252                     haplotypes,
253                     genomeLoc,
254                     haplotypes,
255                     new HashSet<>(), // called haplotypes
256                     readLikelihoods);
257         }
258 
259         Assert.assertFalse(mockDest.foundBases);
260         Assert.assertTrue(mockDest.readCount == 4); // 4 samples + 0 haplotypes
261     }
262 
makeHaplotype(final String bases)263     private Haplotype makeHaplotype(final String bases) {
264         final String cigar = bases.length() + "M";
265         final Haplotype hap = new Haplotype(bases.getBytes());
266         hap.setCigar(TextCigarCodec.decode(cigar));
267         return hap;
268     }
269 
270     private class MockValidatingDestination extends HaplotypeBAMDestination {
271         private final String expectedBaseSignature;  // bases expected for the synthesized haplotype read
272         public int readCount = 0;           // number of reads written to this destination
273         public boolean foundBases = false;  // true we've seen a read that contains the expectedBaseSignature
274 
MockValidatingDestination(String baseSignature)275         private MockValidatingDestination(String baseSignature) {
276             super(samHeader, "testGroupID");
277             expectedBaseSignature = baseSignature;
278         }
279 
280         @Override
close()281         void close() {}
282 
283         @Override
add(GATKRead read)284         public void add(GATKRead read) {
285             readCount++;
286 
287             if (read.getBasesString().equals(expectedBaseSignature)) { // found haplotype base signature
288                 foundBases = true;
289             }
290         }
291     }
292 
generateReadLikelihoods(final int[] readCount)293     private AlleleLikelihoods<GATKRead, Haplotype> generateReadLikelihoods(final int[] readCount) {
294         final AlleleList<Haplotype> haplotypeList = generateHaplotypeList();
295         final SampleList sampleList = generateSampleList(readCount.length);
296         final Map<String,List<GATKRead>> readSamples = new LinkedHashMap<>(readCount.length);
297 
298         for (int i = 0; i < readCount.length; i++) {
299             readSamples.put(sampleList.getSample(i), generateReadsList(i, readCount[i]));
300         }
301 
302         return new AlleleLikelihoods<>(sampleList, haplotypeList, readSamples);
303     }
304 
generateHaplotypeList()305     private AlleleList<Haplotype> generateHaplotypeList() {
306         final Haplotype[] haps = {
307                 makeHaplotype("AAAA"),
308                 makeHaplotype("AAAG")
309         };
310         return new IndexedAlleleList<>(haps);
311     }
312 
generateSampleList(final int sampleCount)313     private SampleList generateSampleList(final int sampleCount) {
314         if (sampleCount < 0) {
315             throw new IllegalArgumentException("the number of sample cannot be negative");
316         }
317         final List<String> result = new ArrayList<>(sampleCount);
318         for (int i = 0; i < sampleCount; i++)
319             result.add("SAMPLE_" + i);
320         return new IndexedSampleList(result);
321     }
322 
generateReadsList(final int sampleIndex, final int readCount)323     private List<GATKRead> generateReadsList(final int sampleIndex, final int readCount) {
324         final List<GATKRead> reads = new ArrayList<>(readCount);
325         int readIndex = 0;
326         for (int j = 0; j < readCount; j++)
327             reads.add(ArtificialReadUtils.createArtificialRead(samHeader, "READ_" + sampleIndex + "_" + (readIndex++), 1, 1, 100));
328         return reads;
329     }
330 
getReadCounts(final Path result)331     private int getReadCounts(final Path result) throws IOException {
332         IOUtil.assertFileIsReadable(result);
333 
334         int count = 0;
335         try (final SamReader in = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.SILENT).open(result)) {
336             for (@SuppressWarnings("unused") final SAMRecord rec : in) {
337                 count++;
338             }
339         }
340         return count;
341     }
342 
343 }
344