1 package org.broadinstitute.hellbender.tools.walkers.bqsr; 2 3 import com.google.common.jimfs.Configuration; 4 import com.google.common.jimfs.Jimfs; 5 import htsjdk.samtools.SamReaderFactory; 6 import java.nio.file.FileSystem; 7 import java.nio.file.Path; 8 import org.apache.commons.lang.StringUtils; 9 import org.broadinstitute.barclay.argparser.CommandLineException; 10 import org.broadinstitute.hellbender.CommandLineProgramTest; 11 import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions; 12 import org.broadinstitute.hellbender.exceptions.UserException; 13 import org.broadinstitute.hellbender.GATKBaseTest; 14 import org.broadinstitute.hellbender.utils.gcs.BucketUtils; 15 import org.broadinstitute.hellbender.testutils.IntegrationTestSpec; 16 import org.broadinstitute.hellbender.testutils.SamAssertionUtils; 17 import org.testng.Assert; 18 import org.testng.annotations.DataProvider; 19 import org.testng.annotations.Test; 20 21 import java.io.File; 22 import java.io.IOException; 23 import java.util.ArrayList; 24 import java.util.Arrays; 25 import java.util.List; 26 import java.util.stream.Stream; 27 28 public final class ApplyBQSRIntegrationTest extends CommandLineProgramTest { 29 private static class ABQSRTest { 30 final String bam; 31 final String reference; 32 final String outputExtension; 33 final String args[]; 34 final String expectedFile; 35 ABQSRTest(String bam, String reference, String outputExtension, String args[], String expectedFile)36 private ABQSRTest(String bam, String reference, String outputExtension, String args[], String expectedFile) { 37 this.bam= bam; 38 this.reference = reference; 39 this.outputExtension = outputExtension; 40 this.args = args; 41 this.expectedFile = expectedFile; 42 } 43 44 @Override toString()45 public String toString() { 46 return String.format("ApplyBQSR(args='%s')", args == null ? "" : StringUtils.join(args)); 47 } 48 } 49 50 @Override getTestedClassName()51 public String getTestedClassName() { 52 return ApplyBQSR.class.getSimpleName(); 53 } 54 55 final String resourceDir = getTestDataDir() + "/" + "BQSR" + "/"; 56 final String hg18Reference = publicTestDir + "human_g1k_v37.chr17_1Mb.fasta"; 57 final String hiSeqBam = resourceDir + "HiSeq.1mb.1RG.2k_lines.alternate.bam"; 58 final String hiSeqCram = resourceDir + "HiSeq.1mb.1RG.2k_lines.alternate.cram"; 59 final String hiSeqBamAligned = resourceDir + "HiSeq.1mb.1RG.2k_lines.alternate_allaligned.bam"; 60 final String hiSeqCramAligned = resourceDir + "HiSeq.1mb.1RG.2k_lines.alternate_allaligned.cram"; 61 62 @DataProvider(name = "ApplyBQSRTest") createABQSRTestData()63 public Object[][] createABQSRTestData() { 64 List<Object[]> tests = new ArrayList<>(); 65 66 //Note: these outputs were created using GATK3 67 tests.add(new Object[]{new ABQSRTest(hiSeqBam, null, ".bam", null, resourceDir + "expected.HiSeq.1mb.1RG.2k_lines.alternate.recalibrated.DIQ.bam")}); 68 tests.add(new Object[]{new ABQSRTest(hiSeqBam, null, ".bam", new String[] {"-OQ"}, resourceDir + "expected.HiSeq.1mb.1RG.2k_lines.alternate.recalibrated.DIQ.OQ.bam")}); 69 tests.add(new Object[]{new ABQSRTest(hiSeqBam, null, ".bam", new String[] {"--quantize-quals", "-1"}, resourceDir + "expected.HiSeq.1mb.1RG.2k_lines.alternate.recalibrated.DIQ.qq-1.bam")}); 70 tests.add(new Object[]{new ABQSRTest(hiSeqBam, null, ".bam", new String[] {"--quantize-quals", "6"}, resourceDir + "expected.HiSeq.1mb.1RG.2k_lines.alternate.recalibrated.DIQ.qq6.bam")}); 71 tests.add(new Object[]{new ABQSRTest(hiSeqBam, null, ".bam", new String[] {"--static-quantized-quals", "10", "--static-quantized-quals", "20", "--static-quantized-quals", "30"}, resourceDir + "expected.HiSeq.1mb.1RG.2k_lines.alternate.recalibrated.DIQ.SQQ102030.bam")}); 72 tests.add(new Object[]{new ABQSRTest(hiSeqBam, null, ".bam", new String[] {"--static-quantized-quals", "10", "--static-quantized-quals", "20", "--static-quantized-quals", "30", "--round-down-quantized"}, resourceDir + "expected.HiSeq.1mb.1RG.2k_lines.alternate.recalibrated.DIQ.SQQ102030RDQ.bam")}); 73 74 tests.add(new Object[]{new ABQSRTest(hiSeqBamAligned, null, ".bam", null, resourceDir + "expected.HiSeq.1mb.1RG.2k_lines.alternate_allaligned.recalibrated.DIQ.bam")}); 75 tests.add(new Object[]{new ABQSRTest(hiSeqBamAligned, null, ".bam", new String[] {"-OQ"}, resourceDir + "expected.HiSeq.1mb.1RG.2k_lines.alternate_allaligned.recalibrated.DIQ.OQ.bam")}); 76 tests.add(new Object[]{new ABQSRTest(hiSeqBamAligned, null, ".bam", new String[] {"--quantize-quals", "-1"}, resourceDir + "expected.HiSeq.1mb.1RG.2k_lines.alternate_allaligned.recalibrated.DIQ.qq-1.bam")}); 77 tests.add(new Object[]{new ABQSRTest(hiSeqBamAligned, null, ".bam", new String[] {"--quantize-quals", "6"}, resourceDir + "expected.HiSeq.1mb.1RG.2k_lines.alternate_allaligned.recalibrated.DIQ.qq6.bam")}); 78 tests.add(new Object[]{new ABQSRTest(hiSeqBamAligned, null, ".bam", new String[] {"--static-quantized-quals", "10", "--static-quantized-quals", "20", "--static-quantized-quals", "30"}, resourceDir + "expected.HiSeq.1mb.1RG.2k_lines.alternate_allaligned.recalibrated.DIQ.SQQ102030.bam")}); 79 tests.add(new Object[]{new ABQSRTest(hiSeqBamAligned, null, ".bam", new String[] {"--static-quantized-quals", "10", "--static-quantized-quals", "20", "--static-quantized-quals", "30", "--round-down-quantized"}, resourceDir + "expected.HiSeq.1mb.1RG.2k_lines.alternate_allaligned.recalibrated.DIQ.SQQ102030RDQ.bam")}); 80 81 //CRAM - input and output crams generated by direct conversion of the corresponding BAM test files with samtools 1.3 82 tests.add(new Object[]{new ABQSRTest(hiSeqCram, hg18Reference, ".cram", new String[] {"--" + StandardArgumentDefinitions.DISABLE_SEQUENCE_DICT_VALIDATION_NAME, "true"}, resourceDir + "expected.HiSeq.1mb.1RG.2k_lines.alternate.recalibrated.DIQ.cram")}); 83 tests.add(new Object[]{new ABQSRTest(hiSeqCramAligned, hg18Reference, ".cram", new String[] {"--quantize-quals", "6", "--" + StandardArgumentDefinitions.DISABLE_SEQUENCE_DICT_VALIDATION_NAME, "true"}, resourceDir + "expected.HiSeq.1mb.1RG.2k_lines.alternate_allaligned.recalibrated.DIQ.qq6.cram")}); 84 85 return tests.toArray(new Object[][]{}); 86 } 87 88 @DataProvider(name = "MiniApplyBQSRTest") createMiniABQSRTestData()89 public Object[][] createMiniABQSRTestData() { 90 List<Object[]> tests = new ArrayList<>(); 91 92 //Note: these outputs were created using GATK3 93 tests.add(new Object[]{new ABQSRTest(hiSeqBam, null, ".bam", null, resourceDir + "expected.HiSeq.1mb.1RG.2k_lines.alternate.recalibrated.DIQ.bam")}); 94 95 return tests.toArray(new Object[][]{}); 96 } 97 98 @Test(dataProvider = "ApplyBQSRTest") testApplyBQSRFile(ABQSRTest params)99 public void testApplyBQSRFile(ABQSRTest params) throws IOException { 100 File outFile = GATKBaseTest.createTempFile("applyBQSRTest", params.outputExtension); 101 final ArrayList<String> args = new ArrayList<>(); 102 File refFile = null; 103 104 args.add("-I"); 105 args.add(new File(params.bam).getAbsolutePath()); 106 args.add("--" + StandardArgumentDefinitions.BQSR_TABLE_LONG_NAME); 107 args.add(new File(resourceDir + "HiSeq.20mb.1RG.table.gz").getAbsolutePath()); 108 args.add("-O"); 109 args.add(outFile.getAbsolutePath()); 110 if (params.reference != null) { 111 refFile = new File(params.reference); 112 args.add("-R"); 113 args.add(refFile.getAbsolutePath()); 114 if (params.args != null) { 115 Stream.of(params.args).forEach(arg -> args.add(arg)); 116 } 117 118 runCommandLine(args); 119 120 SamAssertionUtils.assertSamsEqual(outFile, new File(params.expectedFile), refFile); 121 } 122 } 123 124 @Test(dataProvider = "MiniApplyBQSRTest") testApplyBQSRPath(ABQSRTest params)125 public void testApplyBQSRPath(ABQSRTest params) throws IOException { 126 try (FileSystem jimfs = Jimfs.newFileSystem(Configuration.unix())) { 127 final Path outPath = jimfs.getPath("applyBQSRTest"+params.outputExtension); 128 129 final ArrayList<String> args = new ArrayList<>(); 130 Path refPath = null; 131 132 args.add("-I"); 133 args.add(new File(params.bam).getAbsolutePath()); 134 args.add("--" + StandardArgumentDefinitions.BQSR_TABLE_LONG_NAME); 135 args.add(new File(resourceDir + "HiSeq.20mb.1RG.table.gz").getAbsolutePath()); 136 args.add("-O"); args.add(outPath.toUri().toString()); 137 if (params.reference != null) { 138 File refFile = new File(params.reference); 139 args.add("-R"); args.add(refFile.getAbsolutePath()); 140 refPath = refFile.toPath(); 141 } 142 if (params.args != null) { 143 Stream.of(params.args).forEach(arg -> args.add(arg)); 144 } 145 146 runCommandLine(args); 147 148 SamAssertionUtils.assertSamsEqual(outPath, new File(params.expectedFile).toPath(), refPath); 149 } 150 } 151 152 @Test(dataProvider = "ApplyBQSRTest", groups={"bucket"}) testApplyBQSRCloud(ABQSRTest params)153 public void testApplyBQSRCloud(ABQSRTest params) throws IOException { 154 // getTempFilePath also deletes the file on exit. 155 final String outString = BucketUtils.getTempFilePath(getGCPTestStaging() + "tmp/testApplyBQSRCloud", params.outputExtension); 156 final Path outPath = BucketUtils.getPathOnGcs(outString); 157 final ArrayList<String> args = new ArrayList<>(); 158 Path refPath = null; 159 160 args.add("-I"); 161 args.add(new File(params.bam).getAbsolutePath()); 162 args.add("--" + StandardArgumentDefinitions.BQSR_TABLE_LONG_NAME); 163 args.add(new File(resourceDir + "HiSeq.20mb.1RG.table.gz").getAbsolutePath()); 164 args.add("-O"); 165 args.add(outString); 166 if (params.reference != null) { 167 File refFile = new File(params.reference); 168 args.add("-R"); 169 args.add(refFile.getAbsolutePath()); 170 refPath = refFile.toPath(); 171 } 172 if (params.args != null) { 173 Stream.of(params.args).forEach(arg -> args.add(arg)); 174 } 175 176 runCommandLine(args); 177 178 SamAssertionUtils.assertSamsEqual(outPath, new File(params.expectedFile).toPath(), refPath); 179 } 180 181 @Test testMissingReadGroup()182 public void testMissingReadGroup() throws IOException { 183 IntegrationTestSpec spec = new IntegrationTestSpec( 184 " -I " + hiSeqBamAligned + 185 " --" + StandardArgumentDefinitions.BQSR_TABLE_LONG_NAME + " " + resourceDir + "HiSeq.20mb.1RG.table.missingRG.gz" + 186 " -O /dev/null", 0, 187 IllegalStateException.class); 188 spec.executeTest("testMissingReadGroup", this); 189 } 190 191 @Test testemptyBqsrRecalFile()192 public void testemptyBqsrRecalFile() throws IOException { 193 IntegrationTestSpec spec = new IntegrationTestSpec( 194 " -I " + hiSeqBamAligned + 195 " --" + StandardArgumentDefinitions.BQSR_TABLE_LONG_NAME + " " + createTempFile("emptyBqsrRecal", "").toString() + 196 " -O /dev/null", 0, 197 UserException.class); 198 spec.executeTest("testemptyBqsrRecalFile", this); 199 } 200 201 @Test testPRNoFailWithHighMaxCycle()202 public void testPRNoFailWithHighMaxCycle() throws IOException { 203 IntegrationTestSpec spec = new IntegrationTestSpec( 204 " -I " + hiSeqBamAligned + 205 " --" + StandardArgumentDefinitions.BQSR_TABLE_LONG_NAME + " " + resourceDir + "HiSeq.1mb.1RG.highMaxCycle.table.gz" + 206 " -O /dev/null", 207 Arrays.<String>asList()); 208 spec.executeTest("testPRNoFailWithHighMaxCycle", this); //this just checks that the tool does not blow up 209 } 210 211 212 @Test testHelp()213 public void testHelp() throws IOException { 214 IntegrationTestSpec spec = new IntegrationTestSpec( 215 " -I " + hiSeqBamAligned + 216 " --help --" + StandardArgumentDefinitions.BQSR_TABLE_LONG_NAME + " " + resourceDir + "HiSeq.1mb.1RG.highMaxCycle.table.gz" + 217 " -O /dev/null", 218 Arrays.<String>asList()); 219 spec.executeTest("testHelp", this); //this just checks that the tool does not blow up 220 } 221 222 @Test testPRFailWithLowMaxCycle()223 public void testPRFailWithLowMaxCycle() throws IOException { 224 IntegrationTestSpec spec = new IntegrationTestSpec( 225 " -I " + hiSeqBamAligned + 226 " --" + StandardArgumentDefinitions.BQSR_TABLE_LONG_NAME+ " " + resourceDir + "HiSeq.1mb.1RG.lowMaxCycle.table.gz" + 227 " -O /dev/null", 228 0, 229 UserException.class); 230 spec.executeTest("testPRFailWithLowMaxCycle", this); 231 } 232 233 @Test testPRWithConflictingArguments_qqAndSQQ()234 public void testPRWithConflictingArguments_qqAndSQQ() throws IOException { 235 // --quantize-quals and --static-quantized-quals shouldn't be able to be run in the same command 236 final IntegrationTestSpec spec = new IntegrationTestSpec( 237 " -I " + hiSeqBam + 238 " --" + StandardArgumentDefinitions.BQSR_TABLE_LONG_NAME + " " + resourceDir + "HiSeq.20mb.1RG.table.gz" + 239 " --static-quantized-quals 9 --quantize-quals 4 " + 240 " -O /dev/null", 241 0, 242 CommandLineException.class); 243 spec.executeTest("testPRWithConflictingArguments_qqAndSQQ", this); 244 } 245 246 @Test testPRWithConflictingArguments_qqAndRDQ()247 public void testPRWithConflictingArguments_qqAndRDQ() throws IOException { 248 // --quantize-quals and --static-quantized-quals shouldn't be able to be run in the same command 249 final IntegrationTestSpec spec = new IntegrationTestSpec( 250 " -I " + hiSeqBam + 251 " --" + StandardArgumentDefinitions.BQSR_TABLE_LONG_NAME + " " + resourceDir + "HiSeq.20mb.1RG.table.gz" + 252 " --round-down-quantized --quantize-quals 4 " + 253 " -O /dev/null", 254 0, 255 CommandLineException.class); 256 spec.executeTest("testPRWithConflictingArguments_qqAndSQQ", this); 257 } 258 259 @Test testOverfiltering()260 public void testOverfiltering() throws IOException { 261 final File zeroRefBasesReadBam = new File(resourceDir, "NA12878.oq.read_consumes_zero_ref_bases.bam"); 262 final File outFile = GATKBaseTest.createTempFile("testReadThatConsumesNoReferenceBases", ".bam"); 263 final String[] args = new String[] { 264 "--input", zeroRefBasesReadBam.getAbsolutePath(), 265 "--" + StandardArgumentDefinitions.BQSR_TABLE_LONG_NAME, resourceDir + "NA12878.oq.gatk4.recal.gz", 266 "--use-original-qualities", 267 "--output", outFile.getAbsolutePath() 268 }; 269 runCommandLine(args); 270 //The expected output is actually the same as inputs for this read 271 SamAssertionUtils.assertSamsEqual(outFile, zeroRefBasesReadBam); 272 } 273 274 @Test testAddingPG()275 public void testAddingPG() throws IOException { 276 final File inFile = new File(resourceDir, "NA12878.oq.read_consumes_zero_ref_bases.bam"); 277 final File outFile = GATKBaseTest.createTempFile("testAddingPG", ".bam"); 278 final String[] args = new String[] { 279 "--input", inFile.getAbsolutePath(), 280 "--" + StandardArgumentDefinitions.BQSR_TABLE_LONG_NAME, resourceDir + "NA12878.oq.gatk4.recal.gz", 281 "--use-original-qualities", 282 "--" + StandardArgumentDefinitions.ADD_OUTPUT_SAM_PROGRAM_RECORD, 283 "--output", outFile.getAbsolutePath() 284 }; 285 runCommandLine(args); 286 287 //The expected output is actually the same as inputs for this read (this ignores the PGs header) 288 SamAssertionUtils.assertSamsEqual(outFile, inFile); 289 290 //input has no GATK ApplyBQSR in headers 291 Assert.assertNull(SamReaderFactory.makeDefault().open(inFile).getFileHeader().getProgramRecord("GATK ApplyBQSR")); 292 293 //output has a GATK ApplyBQSR in headers 294 Assert.assertNotNull(SamReaderFactory.makeDefault().open(outFile).getFileHeader().getProgramRecord("GATK ApplyBQSR")); 295 } 296 } 297