1 package org.broadinstitute.hellbender.tools.walkers.bqsr;
2 
3 import com.google.common.jimfs.Configuration;
4 import com.google.common.jimfs.Jimfs;
5 import htsjdk.samtools.SamReaderFactory;
6 import java.nio.file.FileSystem;
7 import java.nio.file.Path;
8 import org.apache.commons.lang.StringUtils;
9 import org.broadinstitute.barclay.argparser.CommandLineException;
10 import org.broadinstitute.hellbender.CommandLineProgramTest;
11 import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions;
12 import org.broadinstitute.hellbender.exceptions.UserException;
13 import org.broadinstitute.hellbender.GATKBaseTest;
14 import org.broadinstitute.hellbender.utils.gcs.BucketUtils;
15 import org.broadinstitute.hellbender.testutils.IntegrationTestSpec;
16 import org.broadinstitute.hellbender.testutils.SamAssertionUtils;
17 import org.testng.Assert;
18 import org.testng.annotations.DataProvider;
19 import org.testng.annotations.Test;
20 
21 import java.io.File;
22 import java.io.IOException;
23 import java.util.ArrayList;
24 import java.util.Arrays;
25 import java.util.List;
26 import java.util.stream.Stream;
27 
28 public final class ApplyBQSRIntegrationTest extends CommandLineProgramTest {
29     private static class ABQSRTest {
30         final String bam;
31         final String reference;
32         final String outputExtension;
33         final String args[];
34         final String expectedFile;
35 
ABQSRTest(String bam, String reference, String outputExtension, String args[], String expectedFile)36         private ABQSRTest(String bam, String reference, String outputExtension, String args[], String expectedFile) {
37             this.bam= bam;
38             this.reference = reference;
39             this.outputExtension = outputExtension;
40             this.args = args;
41             this.expectedFile = expectedFile;
42         }
43 
44         @Override
toString()45         public String toString() {
46             return String.format("ApplyBQSR(args='%s')", args == null ? "" : StringUtils.join(args));
47         }
48     }
49 
50     @Override
getTestedClassName()51     public String getTestedClassName() {
52         return ApplyBQSR.class.getSimpleName();
53     }
54 
55     final String resourceDir = getTestDataDir() + "/" + "BQSR" + "/";
56     final String hg18Reference = publicTestDir + "human_g1k_v37.chr17_1Mb.fasta";
57     final String hiSeqBam = resourceDir + "HiSeq.1mb.1RG.2k_lines.alternate.bam";
58     final String hiSeqCram = resourceDir + "HiSeq.1mb.1RG.2k_lines.alternate.cram";
59     final String hiSeqBamAligned = resourceDir + "HiSeq.1mb.1RG.2k_lines.alternate_allaligned.bam";
60     final String hiSeqCramAligned = resourceDir + "HiSeq.1mb.1RG.2k_lines.alternate_allaligned.cram";
61 
62     @DataProvider(name = "ApplyBQSRTest")
createABQSRTestData()63     public Object[][] createABQSRTestData() {
64         List<Object[]> tests = new ArrayList<>();
65 
66         //Note: these outputs were created using GATK3
67         tests.add(new Object[]{new ABQSRTest(hiSeqBam, null, ".bam", null, resourceDir + "expected.HiSeq.1mb.1RG.2k_lines.alternate.recalibrated.DIQ.bam")});
68         tests.add(new Object[]{new ABQSRTest(hiSeqBam, null, ".bam", new String[] {"-OQ"}, resourceDir + "expected.HiSeq.1mb.1RG.2k_lines.alternate.recalibrated.DIQ.OQ.bam")});
69         tests.add(new Object[]{new ABQSRTest(hiSeqBam, null, ".bam", new String[] {"--quantize-quals", "-1"}, resourceDir + "expected.HiSeq.1mb.1RG.2k_lines.alternate.recalibrated.DIQ.qq-1.bam")});
70         tests.add(new Object[]{new ABQSRTest(hiSeqBam, null, ".bam", new String[] {"--quantize-quals", "6"}, resourceDir + "expected.HiSeq.1mb.1RG.2k_lines.alternate.recalibrated.DIQ.qq6.bam")});
71         tests.add(new Object[]{new ABQSRTest(hiSeqBam, null, ".bam", new String[] {"--static-quantized-quals", "10", "--static-quantized-quals", "20", "--static-quantized-quals", "30"}, resourceDir + "expected.HiSeq.1mb.1RG.2k_lines.alternate.recalibrated.DIQ.SQQ102030.bam")});
72         tests.add(new Object[]{new ABQSRTest(hiSeqBam, null, ".bam", new String[] {"--static-quantized-quals", "10", "--static-quantized-quals", "20", "--static-quantized-quals", "30", "--round-down-quantized"}, resourceDir + "expected.HiSeq.1mb.1RG.2k_lines.alternate.recalibrated.DIQ.SQQ102030RDQ.bam")});
73 
74         tests.add(new Object[]{new ABQSRTest(hiSeqBamAligned, null, ".bam", null, resourceDir + "expected.HiSeq.1mb.1RG.2k_lines.alternate_allaligned.recalibrated.DIQ.bam")});
75         tests.add(new Object[]{new ABQSRTest(hiSeqBamAligned, null, ".bam", new String[] {"-OQ"}, resourceDir + "expected.HiSeq.1mb.1RG.2k_lines.alternate_allaligned.recalibrated.DIQ.OQ.bam")});
76         tests.add(new Object[]{new ABQSRTest(hiSeqBamAligned, null, ".bam", new String[] {"--quantize-quals", "-1"}, resourceDir + "expected.HiSeq.1mb.1RG.2k_lines.alternate_allaligned.recalibrated.DIQ.qq-1.bam")});
77         tests.add(new Object[]{new ABQSRTest(hiSeqBamAligned, null, ".bam", new String[] {"--quantize-quals", "6"}, resourceDir + "expected.HiSeq.1mb.1RG.2k_lines.alternate_allaligned.recalibrated.DIQ.qq6.bam")});
78         tests.add(new Object[]{new ABQSRTest(hiSeqBamAligned, null, ".bam", new String[] {"--static-quantized-quals", "10", "--static-quantized-quals", "20", "--static-quantized-quals", "30"}, resourceDir + "expected.HiSeq.1mb.1RG.2k_lines.alternate_allaligned.recalibrated.DIQ.SQQ102030.bam")});
79         tests.add(new Object[]{new ABQSRTest(hiSeqBamAligned, null, ".bam", new String[] {"--static-quantized-quals", "10", "--static-quantized-quals", "20", "--static-quantized-quals", "30", "--round-down-quantized"}, resourceDir + "expected.HiSeq.1mb.1RG.2k_lines.alternate_allaligned.recalibrated.DIQ.SQQ102030RDQ.bam")});
80 
81         //CRAM - input and output crams generated by direct conversion of the corresponding BAM test files with samtools 1.3
82         tests.add(new Object[]{new ABQSRTest(hiSeqCram, hg18Reference, ".cram", new String[] {"--" + StandardArgumentDefinitions.DISABLE_SEQUENCE_DICT_VALIDATION_NAME, "true"}, resourceDir + "expected.HiSeq.1mb.1RG.2k_lines.alternate.recalibrated.DIQ.cram")});
83         tests.add(new Object[]{new ABQSRTest(hiSeqCramAligned, hg18Reference, ".cram", new String[] {"--quantize-quals", "6", "--" + StandardArgumentDefinitions.DISABLE_SEQUENCE_DICT_VALIDATION_NAME, "true"}, resourceDir + "expected.HiSeq.1mb.1RG.2k_lines.alternate_allaligned.recalibrated.DIQ.qq6.cram")});
84 
85         return tests.toArray(new Object[][]{});
86     }
87 
88     @DataProvider(name = "MiniApplyBQSRTest")
createMiniABQSRTestData()89     public Object[][] createMiniABQSRTestData() {
90         List<Object[]> tests = new ArrayList<>();
91 
92         //Note: these outputs were created using GATK3
93         tests.add(new Object[]{new ABQSRTest(hiSeqBam, null, ".bam", null, resourceDir + "expected.HiSeq.1mb.1RG.2k_lines.alternate.recalibrated.DIQ.bam")});
94 
95         return tests.toArray(new Object[][]{});
96     }
97 
98     @Test(dataProvider = "ApplyBQSRTest")
testApplyBQSRFile(ABQSRTest params)99     public void testApplyBQSRFile(ABQSRTest params) throws IOException {
100         File outFile = GATKBaseTest.createTempFile("applyBQSRTest", params.outputExtension);
101         final ArrayList<String> args = new ArrayList<>();
102         File refFile = null;
103 
104         args.add("-I");
105         args.add(new File(params.bam).getAbsolutePath());
106         args.add("--" + StandardArgumentDefinitions.BQSR_TABLE_LONG_NAME);
107         args.add(new File(resourceDir + "HiSeq.20mb.1RG.table.gz").getAbsolutePath());
108         args.add("-O");
109         args.add(outFile.getAbsolutePath());
110         if (params.reference != null) {
111             refFile = new File(params.reference);
112             args.add("-R");
113             args.add(refFile.getAbsolutePath());
114             if (params.args != null) {
115                 Stream.of(params.args).forEach(arg -> args.add(arg));
116             }
117 
118             runCommandLine(args);
119 
120             SamAssertionUtils.assertSamsEqual(outFile, new File(params.expectedFile), refFile);
121         }
122     }
123 
124     @Test(dataProvider = "MiniApplyBQSRTest")
testApplyBQSRPath(ABQSRTest params)125     public void testApplyBQSRPath(ABQSRTest params) throws IOException {
126         try (FileSystem jimfs = Jimfs.newFileSystem(Configuration.unix())) {
127             final Path outPath = jimfs.getPath("applyBQSRTest"+params.outputExtension);
128 
129             final ArrayList<String> args = new ArrayList<>();
130             Path refPath = null;
131 
132             args.add("-I");
133             args.add(new File(params.bam).getAbsolutePath());
134             args.add("--" + StandardArgumentDefinitions.BQSR_TABLE_LONG_NAME);
135             args.add(new File(resourceDir + "HiSeq.20mb.1RG.table.gz").getAbsolutePath());
136             args.add("-O"); args.add(outPath.toUri().toString());
137             if (params.reference != null) {
138                 File refFile = new File(params.reference);
139                 args.add("-R"); args.add(refFile.getAbsolutePath());
140                 refPath = refFile.toPath();
141             }
142             if (params.args != null) {
143                 Stream.of(params.args).forEach(arg -> args.add(arg));
144             }
145 
146             runCommandLine(args);
147 
148             SamAssertionUtils.assertSamsEqual(outPath, new File(params.expectedFile).toPath(), refPath);
149         }
150     }
151 
152     @Test(dataProvider = "ApplyBQSRTest", groups={"bucket"})
testApplyBQSRCloud(ABQSRTest params)153     public void testApplyBQSRCloud(ABQSRTest params) throws IOException {
154         // getTempFilePath also deletes the file on exit.
155         final String outString = BucketUtils.getTempFilePath(getGCPTestStaging() + "tmp/testApplyBQSRCloud",  params.outputExtension);
156         final Path outPath = BucketUtils.getPathOnGcs(outString);
157         final ArrayList<String> args = new ArrayList<>();
158         Path refPath = null;
159 
160         args.add("-I");
161         args.add(new File(params.bam).getAbsolutePath());
162         args.add("--" + StandardArgumentDefinitions.BQSR_TABLE_LONG_NAME);
163         args.add(new File(resourceDir + "HiSeq.20mb.1RG.table.gz").getAbsolutePath());
164         args.add("-O");
165         args.add(outString);
166         if (params.reference != null) {
167             File refFile = new File(params.reference);
168             args.add("-R");
169             args.add(refFile.getAbsolutePath());
170             refPath = refFile.toPath();
171         }
172         if (params.args != null) {
173             Stream.of(params.args).forEach(arg -> args.add(arg));
174         }
175 
176         runCommandLine(args);
177 
178         SamAssertionUtils.assertSamsEqual(outPath, new File(params.expectedFile).toPath(), refPath);
179     }
180 
181     @Test
testMissingReadGroup()182     public void testMissingReadGroup() throws IOException {
183         IntegrationTestSpec spec = new IntegrationTestSpec(
184                 " -I " + hiSeqBamAligned +
185                         " --" + StandardArgumentDefinitions.BQSR_TABLE_LONG_NAME + " " + resourceDir + "HiSeq.20mb.1RG.table.missingRG.gz" +
186                         " -O /dev/null", 0,
187                 IllegalStateException.class);
188         spec.executeTest("testMissingReadGroup", this);
189     }
190 
191     @Test
testemptyBqsrRecalFile()192     public void testemptyBqsrRecalFile() throws IOException {
193         IntegrationTestSpec spec = new IntegrationTestSpec(
194                 " -I " + hiSeqBamAligned +
195                         " --" + StandardArgumentDefinitions.BQSR_TABLE_LONG_NAME + " " + createTempFile("emptyBqsrRecal", "").toString() +
196                         " -O /dev/null", 0,
197                 UserException.class);
198         spec.executeTest("testemptyBqsrRecalFile", this);
199     }
200 
201     @Test
testPRNoFailWithHighMaxCycle()202     public void testPRNoFailWithHighMaxCycle() throws IOException {
203         IntegrationTestSpec spec = new IntegrationTestSpec(
204                         " -I " + hiSeqBamAligned +
205                         " --" + StandardArgumentDefinitions.BQSR_TABLE_LONG_NAME + " " + resourceDir + "HiSeq.1mb.1RG.highMaxCycle.table.gz" +
206                         " -O /dev/null",
207                 Arrays.<String>asList());
208         spec.executeTest("testPRNoFailWithHighMaxCycle", this);      //this just checks that the tool does not blow up
209     }
210 
211 
212     @Test
testHelp()213     public void testHelp() throws IOException {
214         IntegrationTestSpec spec = new IntegrationTestSpec(
215                 " -I " + hiSeqBamAligned +
216                         " --help --" + StandardArgumentDefinitions.BQSR_TABLE_LONG_NAME + " " + resourceDir + "HiSeq.1mb.1RG.highMaxCycle.table.gz" +
217                         " -O /dev/null",
218                 Arrays.<String>asList());
219         spec.executeTest("testHelp", this);      //this just checks that the tool does not blow up
220     }
221 
222     @Test
testPRFailWithLowMaxCycle()223     public void testPRFailWithLowMaxCycle() throws IOException {
224         IntegrationTestSpec spec = new IntegrationTestSpec(
225                         " -I " + hiSeqBamAligned +
226                         " --"  + StandardArgumentDefinitions.BQSR_TABLE_LONG_NAME+ " " + resourceDir + "HiSeq.1mb.1RG.lowMaxCycle.table.gz" +
227                         " -O /dev/null",
228                 0,
229                 UserException.class);
230         spec.executeTest("testPRFailWithLowMaxCycle", this);
231     }
232 
233     @Test
testPRWithConflictingArguments_qqAndSQQ()234     public void testPRWithConflictingArguments_qqAndSQQ() throws IOException {
235         // --quantize-quals and --static-quantized-quals shouldn't be able to be run in the same command
236         final IntegrationTestSpec spec = new IntegrationTestSpec(
237                 " -I " + hiSeqBam +
238                         " --" + StandardArgumentDefinitions.BQSR_TABLE_LONG_NAME + " " + resourceDir + "HiSeq.20mb.1RG.table.gz" +
239                         " --static-quantized-quals 9 --quantize-quals 4 " +
240                         " -O /dev/null",
241                 0,
242                 CommandLineException.class);
243         spec.executeTest("testPRWithConflictingArguments_qqAndSQQ", this);
244     }
245 
246     @Test
testPRWithConflictingArguments_qqAndRDQ()247     public void testPRWithConflictingArguments_qqAndRDQ() throws IOException {
248         // --quantize-quals and --static-quantized-quals shouldn't be able to be run in the same command
249         final IntegrationTestSpec spec = new IntegrationTestSpec(
250                 " -I " + hiSeqBam +
251                         " --" + StandardArgumentDefinitions.BQSR_TABLE_LONG_NAME + " "  + resourceDir + "HiSeq.20mb.1RG.table.gz" +
252                         " --round-down-quantized --quantize-quals 4 " +
253                         " -O /dev/null",
254                 0,
255                 CommandLineException.class);
256         spec.executeTest("testPRWithConflictingArguments_qqAndSQQ", this);
257     }
258 
259     @Test
testOverfiltering()260     public void testOverfiltering() throws IOException {
261         final File zeroRefBasesReadBam = new File(resourceDir, "NA12878.oq.read_consumes_zero_ref_bases.bam");
262         final File outFile = GATKBaseTest.createTempFile("testReadThatConsumesNoReferenceBases", ".bam");
263         final String[] args = new String[] {
264                 "--input", zeroRefBasesReadBam.getAbsolutePath(),
265                 "--" + StandardArgumentDefinitions.BQSR_TABLE_LONG_NAME, resourceDir + "NA12878.oq.gatk4.recal.gz",
266                 "--use-original-qualities",
267                 "--output", outFile.getAbsolutePath()
268         };
269         runCommandLine(args);
270         //The expected output is actually the same as inputs for this read
271         SamAssertionUtils.assertSamsEqual(outFile, zeroRefBasesReadBam);
272     }
273 
274     @Test
testAddingPG()275     public void testAddingPG() throws IOException {
276         final File inFile = new File(resourceDir, "NA12878.oq.read_consumes_zero_ref_bases.bam");
277         final File outFile = GATKBaseTest.createTempFile("testAddingPG", ".bam");
278         final String[] args = new String[] {
279                 "--input", inFile.getAbsolutePath(),
280                 "--" + StandardArgumentDefinitions.BQSR_TABLE_LONG_NAME, resourceDir + "NA12878.oq.gatk4.recal.gz",
281                 "--use-original-qualities",
282                 "--" + StandardArgumentDefinitions.ADD_OUTPUT_SAM_PROGRAM_RECORD,
283                 "--output", outFile.getAbsolutePath()
284         };
285         runCommandLine(args);
286 
287         //The expected output is actually the same as inputs for this read (this ignores the PGs header)
288         SamAssertionUtils.assertSamsEqual(outFile, inFile);
289 
290         //input has no GATK ApplyBQSR in headers
291         Assert.assertNull(SamReaderFactory.makeDefault().open(inFile).getFileHeader().getProgramRecord("GATK ApplyBQSR"));
292 
293         //output has a GATK ApplyBQSR in headers
294         Assert.assertNotNull(SamReaderFactory.makeDefault().open(outFile).getFileHeader().getProgramRecord("GATK ApplyBQSR"));
295     }
296 }
297