1 package org.broadinstitute.hellbender.utils.haplotype; 2 3 import htsjdk.samtools.SAMFileHeader; 4 import htsjdk.samtools.SamReader; 5 import htsjdk.samtools.SamReaderFactory; 6 import htsjdk.samtools.SAMRecord; 7 import htsjdk.samtools.TextCigarCodec; 8 import htsjdk.samtools.ValidationStringency; 9 import htsjdk.samtools.util.IOUtil; 10 import htsjdk.samtools.util.Locatable; 11 import htsjdk.samtools.SamFiles; 12 13 import java.nio.file.Path; 14 import org.broadinstitute.hellbender.GATKBaseTest; 15 import org.broadinstitute.hellbender.testutils.IntegrationTestSpec; 16 import org.broadinstitute.hellbender.utils.genotyper.*; 17 import org.broadinstitute.hellbender.utils.read.GATKRead; 18 import org.broadinstitute.hellbender.utils.read.ArtificialReadUtils; 19 import org.broadinstitute.hellbender.utils.SimpleInterval; 20 import org.broadinstitute.hellbender.testutils.SamAssertionUtils; 21 import org.testng.Assert; 22 23 import org.testng.annotations.DataProvider; 24 import org.testng.annotations.Test; 25 26 import java.io.File; 27 import java.io.IOException; 28 import java.util.*; 29 30 31 public class HaplotypeBAMWriterUnitTest extends GATKBaseTest { 32 private final SAMFileHeader samHeader = ArtificialReadUtils.createArtificialSamHeaderWithPrograms(20, 1, 1000, 3); 33 private final String expectedFilePath = getToolTestDataDir() + "/expected/"; 34 35 @DataProvider(name="ReadsLikelikhoodData") makeReadsLikelikhoodData()36 public Object[][] makeReadsLikelikhoodData() { 37 final String haplotypeBaseSignature = "ACTGAAGGTTCC"; 38 final Haplotype haplotype = makeHaplotype(haplotypeBaseSignature); 39 final int sampleCounts[] = {2, 2}; 40 final Locatable loc = new SimpleInterval("20", 10, 20); 41 final AlleleLikelihoods<GATKRead, Haplotype> readLikelihoods = generateReadLikelihoods(sampleCounts); 42 43 return new Object[][]{ 44 { haplotypeBaseSignature, Collections.singletonList(haplotype), loc, readLikelihoods } 45 }; 46 } 47 48 @Test(dataProvider = "ReadsLikelikhoodData") testWriteToSAMFile( @uppressWarningsR) final String haplotypeBaseSignature, final List<Haplotype> haplotypes, final Locatable genomeLoc, final AlleleLikelihoods<GATKRead, Haplotype> readLikelihoods )49 public void testWriteToSAMFile 50 ( 51 @SuppressWarnings("unused") final String haplotypeBaseSignature, 52 final List<Haplotype> haplotypes, 53 final Locatable genomeLoc, 54 final AlleleLikelihoods<GATKRead, Haplotype> readLikelihoods 55 ) throws IOException 56 { 57 // create output SAM file 58 final Path outPath = testWriteToFile(".sam", haplotypes, genomeLoc, readLikelihoods, false, false); 59 final File expectedFile = new File(expectedFilePath, "testSAM.sam"); 60 IntegrationTestSpec.assertEqualTextFiles(outPath.toFile(), expectedFile); 61 } 62 63 @Test(dataProvider = "ReadsLikelikhoodData") testWriteToBAMFile( @uppressWarningsR) final String haplotypeBaseSignature, final List<Haplotype> haplotypes, final Locatable genomeLoc, final AlleleLikelihoods<GATKRead, Haplotype> readLikelihoods )64 public void testWriteToBAMFile 65 ( 66 @SuppressWarnings("unused") final String haplotypeBaseSignature, 67 final List<Haplotype> haplotypes, 68 final Locatable genomeLoc, 69 final AlleleLikelihoods<GATKRead, Haplotype> readLikelihoods 70 ) throws IOException 71 { 72 // create output BAM file 73 final Path outPath = testWriteToFile(".bam", haplotypes, genomeLoc, readLikelihoods, false, false); 74 final File expectedFile = new File(expectedFilePath, "testBAM.bam"); 75 SamAssertionUtils.assertEqualBamFiles(outPath.toFile(), expectedFile, false, ValidationStringency.DEFAULT_STRINGENCY); 76 } 77 78 @Test(dataProvider = "ReadsLikelikhoodData") testWriteToBAMFileWithIndex( @uppressWarningsR) final String haplotypeBaseSignature, final List<Haplotype> haplotypes, final Locatable genomeLoc, final AlleleLikelihoods<GATKRead, Haplotype> readLikelihoods )79 public void testWriteToBAMFileWithIndex 80 ( 81 @SuppressWarnings("unused") final String haplotypeBaseSignature, 82 final List<Haplotype> haplotypes, 83 final Locatable genomeLoc, 84 final AlleleLikelihoods<GATKRead, Haplotype> readLikelihoods 85 ) throws IOException 86 { 87 // create output BAM file 88 final Path outPath = testWriteToFile(".bam", haplotypes, genomeLoc, readLikelihoods, true, false); 89 final File expectedFile = new File(expectedFilePath, "testBAM.bam"); 90 SamAssertionUtils.assertEqualBamFiles(outPath.toFile(), expectedFile, false, ValidationStringency.DEFAULT_STRINGENCY); 91 } 92 93 @Test(dataProvider = "ReadsLikelikhoodData") testWriteToBAMFileWithMD5( @uppressWarningsR) final String haplotypeBaseSignature, final List<Haplotype> haplotypes, final Locatable genomeLoc, final AlleleLikelihoods<GATKRead, Haplotype> readLikelihoods )94 public void testWriteToBAMFileWithMD5 95 ( 96 @SuppressWarnings("unused") final String haplotypeBaseSignature, 97 final List<Haplotype> haplotypes, 98 final Locatable genomeLoc, 99 final AlleleLikelihoods<GATKRead, Haplotype> readLikelihoods 100 ) throws IOException 101 { 102 // create output BAM file 103 final Path outPath = testWriteToFile(".bam", haplotypes, genomeLoc, readLikelihoods, false, true); 104 final File expectedFile = new File(expectedFilePath, "testBAM.bam"); 105 SamAssertionUtils.assertEqualBamFiles(outPath.toFile(), expectedFile, false, ValidationStringency.DEFAULT_STRINGENCY); 106 } 107 testWriteToFile( final String outputFileExtension, final List<Haplotype> haplotypes, final Locatable genomeLoc, final AlleleLikelihoods<GATKRead, Haplotype> readLikelihoods, final boolean createIndex, final boolean createMD5 )108 private Path testWriteToFile 109 ( 110 final String outputFileExtension, 111 final List<Haplotype> haplotypes, 112 final Locatable genomeLoc, 113 final AlleleLikelihoods<GATKRead, Haplotype> readLikelihoods, 114 final boolean createIndex, 115 final boolean createMD5 116 ) throws IOException 117 { 118 final Path outPath = GATKBaseTest.createTempFile("haplotypeBamWriterTest", outputFileExtension).toPath(); 119 final SAMFileDestination fileDest = new SAMFileDestination(outPath, createIndex, createMD5, samHeader, "TestHaplotypeRG"); 120 121 try (final HaplotypeBAMWriter haplotypeBAMWriter = new HaplotypeBAMWriter(HaplotypeBAMWriter.WriterType.ALL_POSSIBLE_HAPLOTYPES, fileDest)) { 122 haplotypeBAMWriter.writeReadsAlignedToHaplotypes( 123 haplotypes, 124 genomeLoc, 125 haplotypes, 126 new HashSet<>(), // called haplotypes 127 readLikelihoods); 128 } 129 130 Assert.assertEquals(getReadCounts(outPath), 5); 131 132 final File expectedMD5File = new File(outPath.toFile().getAbsolutePath() + ".md5"); 133 Assert.assertEquals(expectedMD5File.exists(), createMD5); 134 if (createIndex) { 135 Assert.assertNotNull(SamFiles.findIndex(outPath)); 136 } else { 137 Assert.assertNull(SamFiles.findIndex(outPath)); 138 } 139 return outPath; 140 } 141 142 @Test(dataProvider = "ReadsLikelikhoodData") testCreateSAMFromHeader( @uppressWarningsR) final String haplotypeBaseSignature, final List<Haplotype> haplotypes, final Locatable genomeLoc, final AlleleLikelihoods<GATKRead, Haplotype> readLikelihoods )143 public void testCreateSAMFromHeader( 144 @SuppressWarnings("unused") final String haplotypeBaseSignature, 145 final List<Haplotype> haplotypes, 146 final Locatable genomeLoc, 147 final AlleleLikelihoods<GATKRead, Haplotype> readLikelihoods 148 ) throws IOException { 149 final Path outputPath = GATKBaseTest.createTempFile("fromHeaderSAM", ".sam").toPath(); 150 151 try (HaplotypeBAMWriter haplotypeBAMWriter = new HaplotypeBAMWriter(HaplotypeBAMWriter.WriterType.ALL_POSSIBLE_HAPLOTYPES, outputPath, false, false, samHeader)) { 152 haplotypeBAMWriter.writeReadsAlignedToHaplotypes( 153 haplotypes, 154 genomeLoc, 155 haplotypes, 156 new HashSet<>(), // called haplotypes 157 readLikelihoods); 158 } 159 160 final File expectedFile = new File(expectedFilePath, "fromHeaderSAM.sam"); 161 IntegrationTestSpec.assertEqualTextFiles(outputPath.toFile(), expectedFile); 162 } 163 164 @Test(dataProvider = "ReadsLikelikhoodData") testAllHaplotypeWriter( final String haplotypeBaseSignature, final List<Haplotype> haplotypes, final Locatable genomeLoc, final AlleleLikelihoods<GATKRead, Haplotype> readLikelihoods )165 public void testAllHaplotypeWriter 166 ( 167 final String haplotypeBaseSignature, 168 final List<Haplotype> haplotypes, 169 final Locatable genomeLoc, 170 final AlleleLikelihoods<GATKRead, Haplotype> readLikelihoods 171 ) 172 { 173 final MockValidatingDestination mockDest = new MockValidatingDestination(haplotypeBaseSignature); 174 try (final HaplotypeBAMWriter haplotypeBAMWriter = new HaplotypeBAMWriter(HaplotypeBAMWriter.WriterType.ALL_POSSIBLE_HAPLOTYPES, mockDest)) { 175 haplotypeBAMWriter.writeReadsAlignedToHaplotypes( 176 haplotypes, 177 genomeLoc, 178 haplotypes, 179 new HashSet<>(), // called haplotypes 180 readLikelihoods); 181 } 182 183 Assert.assertTrue(mockDest.foundBases); 184 Assert.assertTrue(mockDest.readCount == 5); // 4 samples + 1 haplotype 185 } 186 187 @Test(dataProvider = "ReadsLikelikhoodData") testCalledHaplotypeWriter( final String haplotypeBaseSignature, final List<Haplotype> haplotypes, final Locatable genomeLoc, final AlleleLikelihoods<GATKRead, Haplotype> readLikelihoods )188 public void testCalledHaplotypeWriter 189 ( 190 final String haplotypeBaseSignature, 191 final List<Haplotype> haplotypes, 192 final Locatable genomeLoc, 193 final AlleleLikelihoods<GATKRead, Haplotype> readLikelihoods 194 ) 195 { 196 final MockValidatingDestination mockDest = new MockValidatingDestination(haplotypeBaseSignature); 197 198 Set<Haplotype> calledHaplotypes = new LinkedHashSet<>(1); 199 calledHaplotypes.addAll(haplotypes); 200 201 try (final HaplotypeBAMWriter haplotypeBAMWriter = new HaplotypeBAMWriter(HaplotypeBAMWriter.WriterType.CALLED_HAPLOTYPES, mockDest)) { 202 haplotypeBAMWriter.writeReadsAlignedToHaplotypes( 203 haplotypes, 204 genomeLoc, 205 haplotypes, 206 calledHaplotypes, 207 readLikelihoods); 208 } 209 210 Assert.assertTrue(mockDest.foundBases); 211 Assert.assertTrue(mockDest.readCount==5); // 4 samples + 1 haplotype 212 } 213 214 @Test(dataProvider = "ReadsLikelikhoodData") testNoCalledHaplotypes( final String haplotypeBaseSignature, final List<Haplotype> haplotypes, final Locatable genomeLoc, final AlleleLikelihoods<GATKRead, Haplotype> readLikelihoods )215 public void testNoCalledHaplotypes 216 ( 217 final String haplotypeBaseSignature, 218 final List<Haplotype> haplotypes, 219 final Locatable genomeLoc, 220 final AlleleLikelihoods<GATKRead, Haplotype> readLikelihoods 221 ) 222 { 223 final MockValidatingDestination mockDest = new MockValidatingDestination(haplotypeBaseSignature); 224 225 try (final HaplotypeBAMWriter haplotypeBAMWriter = new HaplotypeBAMWriter(HaplotypeBAMWriter.WriterType.CALLED_HAPLOTYPES, mockDest)) { 226 haplotypeBAMWriter.writeReadsAlignedToHaplotypes( 227 haplotypes, 228 genomeLoc, 229 haplotypes, 230 new LinkedHashSet<>(), 231 readLikelihoods); 232 } 233 234 Assert.assertTrue(mockDest.readCount == 0); // no called haplotypes, no reads 235 } 236 237 @Test(dataProvider = "ReadsLikelikhoodData") testDontWriteHaplotypes( final String haplotypeBaseSignature, final List<Haplotype> haplotypes, final Locatable genomeLoc, final AlleleLikelihoods<GATKRead, Haplotype> readLikelihoods )238 public void testDontWriteHaplotypes 239 ( 240 final String haplotypeBaseSignature, 241 final List<Haplotype> haplotypes, 242 final Locatable genomeLoc, 243 final AlleleLikelihoods<GATKRead, Haplotype> readLikelihoods 244 ) 245 { 246 final MockValidatingDestination mockDest = new MockValidatingDestination(haplotypeBaseSignature); 247 248 try (final HaplotypeBAMWriter haplotypeBAMWriter = new HaplotypeBAMWriter(HaplotypeBAMWriter.WriterType.ALL_POSSIBLE_HAPLOTYPES, mockDest)) { 249 haplotypeBAMWriter.setWriteHaplotypes(false); 250 251 haplotypeBAMWriter.writeReadsAlignedToHaplotypes( 252 haplotypes, 253 genomeLoc, 254 haplotypes, 255 new HashSet<>(), // called haplotypes 256 readLikelihoods); 257 } 258 259 Assert.assertFalse(mockDest.foundBases); 260 Assert.assertTrue(mockDest.readCount == 4); // 4 samples + 0 haplotypes 261 } 262 makeHaplotype(final String bases)263 private Haplotype makeHaplotype(final String bases) { 264 final String cigar = bases.length() + "M"; 265 final Haplotype hap = new Haplotype(bases.getBytes()); 266 hap.setCigar(TextCigarCodec.decode(cigar)); 267 return hap; 268 } 269 270 private class MockValidatingDestination extends HaplotypeBAMDestination { 271 private final String expectedBaseSignature; // bases expected for the synthesized haplotype read 272 public int readCount = 0; // number of reads written to this destination 273 public boolean foundBases = false; // true we've seen a read that contains the expectedBaseSignature 274 MockValidatingDestination(String baseSignature)275 private MockValidatingDestination(String baseSignature) { 276 super(samHeader, "testGroupID"); 277 expectedBaseSignature = baseSignature; 278 } 279 280 @Override close()281 void close() {} 282 283 @Override add(GATKRead read)284 public void add(GATKRead read) { 285 readCount++; 286 287 if (read.getBasesString().equals(expectedBaseSignature)) { // found haplotype base signature 288 foundBases = true; 289 } 290 } 291 } 292 generateReadLikelihoods(final int[] readCount)293 private AlleleLikelihoods<GATKRead, Haplotype> generateReadLikelihoods(final int[] readCount) { 294 final AlleleList<Haplotype> haplotypeList = generateHaplotypeList(); 295 final SampleList sampleList = generateSampleList(readCount.length); 296 final Map<String,List<GATKRead>> readSamples = new LinkedHashMap<>(readCount.length); 297 298 for (int i = 0; i < readCount.length; i++) { 299 readSamples.put(sampleList.getSample(i), generateReadsList(i, readCount[i])); 300 } 301 302 return new AlleleLikelihoods<>(sampleList, haplotypeList, readSamples); 303 } 304 generateHaplotypeList()305 private AlleleList<Haplotype> generateHaplotypeList() { 306 final Haplotype[] haps = { 307 makeHaplotype("AAAA"), 308 makeHaplotype("AAAG") 309 }; 310 return new IndexedAlleleList<>(haps); 311 } 312 generateSampleList(final int sampleCount)313 private SampleList generateSampleList(final int sampleCount) { 314 if (sampleCount < 0) { 315 throw new IllegalArgumentException("the number of sample cannot be negative"); 316 } 317 final List<String> result = new ArrayList<>(sampleCount); 318 for (int i = 0; i < sampleCount; i++) 319 result.add("SAMPLE_" + i); 320 return new IndexedSampleList(result); 321 } 322 generateReadsList(final int sampleIndex, final int readCount)323 private List<GATKRead> generateReadsList(final int sampleIndex, final int readCount) { 324 final List<GATKRead> reads = new ArrayList<>(readCount); 325 int readIndex = 0; 326 for (int j = 0; j < readCount; j++) 327 reads.add(ArtificialReadUtils.createArtificialRead(samHeader, "READ_" + sampleIndex + "_" + (readIndex++), 1, 1, 100)); 328 return reads; 329 } 330 getReadCounts(final Path result)331 private int getReadCounts(final Path result) throws IOException { 332 IOUtil.assertFileIsReadable(result); 333 334 int count = 0; 335 try (final SamReader in = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.SILENT).open(result)) { 336 for (@SuppressWarnings("unused") final SAMRecord rec : in) { 337 count++; 338 } 339 } 340 return count; 341 } 342 343 } 344