1 package org.broadinstitute.hellbender.tools.walkers.variantutils;
2 
3 import java.util.Comparator;
4 import java.util.List;
5 
6 import htsjdk.variant.variantcontext.VariantContext;
7 import org.broadinstitute.barclay.argparser.CommandLineException;
8 import org.broadinstitute.hellbender.GATKBaseTest;
9 import org.broadinstitute.hellbender.Main;
10 import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions;
11 import org.broadinstitute.hellbender.testutils.ArgumentsBuilder;
12 import org.broadinstitute.hellbender.testutils.VariantContextTestUtils;
13 import org.broadinstitute.hellbender.utils.gcs.BucketUtils;
14 import org.broadinstitute.hellbender.utils.io.IOUtils;
15 import org.testng.Assert;
16 import org.testng.annotations.DataProvider;
17 import org.testng.annotations.Test;
18 
19 import org.broadinstitute.hellbender.CommandLineProgramTest;
20 import org.broadinstitute.hellbender.exceptions.UserException;
21 import org.broadinstitute.hellbender.testutils.IntegrationTestSpec;
22 import shaded.cloud_nio.com.google.common.collect.Comparators;
23 
24 import java.io.File;
25 import java.io.IOException;
26 import java.util.Collections;
27 
28 public class SelectVariantsIntegrationTest extends CommandLineProgramTest {
29 
baseTestString(String args, String testFile)30     private static String baseTestString(String args, String testFile) {
31         return " --variant " + testFile
32                     + " -O %s "
33                     + " --" + StandardArgumentDefinitions.ADD_OUTPUT_VCF_COMMANDLINE +" false "
34                     + args;
35     }
36 
37     @Test
testSampleSelection()38     public void testSampleSelection() throws IOException {
39         final String testFile = getToolTestDataDir() + "vcfexample2.vcf";
40 
41         final IntegrationTestSpec spec = new IntegrationTestSpec(
42                 " -R " + hg19MiniReference
43                         + " --variant " + testFile
44                         + " -sn NA11918 "
45                         + " --suppress-reference-path " // suppress reference file path in output for test differencing
46                         + " -O %s "
47                         + " --" + StandardArgumentDefinitions.ADD_OUTPUT_VCF_COMMANDLINE +" false",
48                 Collections.singletonList(getToolTestDataDir() + "expected/" + "testSelectVariants_SimpleSelection.vcf")
49         );
50 
51         spec.executeTest("testSampleSelection--" + testFile, this);
52     }
53 
54     @Test
testExpressionSelection()55     public void testExpressionSelection() throws IOException {
56         final String testFile = getToolTestDataDir() + "filteringDepthInFormat.vcf";
57 
58         final IntegrationTestSpec spec = new IntegrationTestSpec(
59                 " -R " + hg19MiniReference
60                         + " --variant " + testFile
61                         + " -select 'DP < 7' "
62                         + " --suppress-reference-path " // suppress reference file path in output for test differencing
63                         + " -O %s  --" + StandardArgumentDefinitions.ADD_OUTPUT_VCF_COMMANDLINE +" false",
64                 Collections.singletonList(getToolTestDataDir() + "expected/" + "testSelectVariants_SimpleExpressionSelection.vcf")
65         );
66 
67         spec.executeTest("testSimpleExpressionSelection--" + testFile, this);
68     }
69 
70     @Test
testRepeatedLineSelectionAndExludeFiltered()71     public void testRepeatedLineSelectionAndExludeFiltered() throws IOException {
72         final String testFile = getToolTestDataDir() + "test.dup.vcf";
73 
74         final IntegrationTestSpec spec = new IntegrationTestSpec(
75                 baseTestString(" -sn A -sn B -sn C -exclude-filtered ", testFile),
76                 Collections.singletonList(getToolTestDataDir() + "expected/" + "testSelectVariants_RepeatedLineSelection.vcf")
77         );
78 
79         spec.executeTest("testRepeatedLineSelection--" + testFile, this);
80     }
81 
82     @Test
testComplexSelection()83     public void testComplexSelection()  throws IOException {
84         final String testFile = getToolTestDataDir() + "vcfexample2.vcf";
85         final String samplesFile = getToolTestDataDir() + "samples.args";
86 
87         final IntegrationTestSpec spec = new IntegrationTestSpec(
88                 baseTestString(" -sn NA11894 -se 'NA069*' -sn " + samplesFile + " -select 'RMSMAPQ < 170.0'", testFile),
89                 Collections.singletonList(getToolTestDataDir() + "expected/" + "testSelectVariants_ComplexSelection.vcf")
90         );
91 
92         spec.executeTest("testComplexSelection--" + testFile, this);
93     }
94 
95     /**
96      * When input variants are untrimmed, they can be trimmed by select variants, which may change their order.
97      * This test confirms that this case is handled correctly, and the resulting variants are ouput correctly sorted.
98      */
99     @Test
testUntrimmedVariants()100     public void testUntrimmedVariants() throws IOException {
101         final File testFile = new File(getToolTestDataDir() + "untrimmed.vcf");
102         final File output = File.createTempFile("test_untrimmed", ".vcf");
103         final ArgumentsBuilder args = new ArgumentsBuilder()
104                 .addVCF(testFile)
105                 .addOutput(output)
106                 .add(StandardArgumentDefinitions.SAMPLE_NAME_SHORT_NAME, "SAMPLE_01");
107 
108         runCommandLine(args);
109 
110         final List<VariantContext> vcs = VariantContextTestUtils.readEntireVCFIntoMemory(output.getPath()).getRight();
111 
112         Assert.assertTrue(Comparators.isInOrder(vcs, Comparator.comparingInt(VariantContext::getStart)));
113     }
114 
115     @Test
testUntrimmedVariantsWithSetFilteredGtToNocall()116     public void testUntrimmedVariantsWithSetFilteredGtToNocall() throws IOException {
117         final File testFile = new File(getToolTestDataDir() + "untrimmed.vcf");
118         final File output = File.createTempFile("test_untrimmed", ".vcf");
119         final ArgumentsBuilder args = new ArgumentsBuilder()
120                 .addVCF(testFile)
121                 .addOutput(output)
122                 .add(StandardArgumentDefinitions.SAMPLE_NAME_SHORT_NAME, "SAMPLE_01")
123                 .addFlag("set-filtered-gt-to-nocall");
124 
125         runCommandLine(args);
126 
127         final List<VariantContext> vcs = VariantContextTestUtils.readEntireVCFIntoMemory(output.getPath()).getRight();
128 
129         Assert.assertTrue(Comparators.isInOrder(vcs, Comparator.comparingInt(VariantContext::getStart)));
130     }
131 
132     @Test
testComplexSelectionWithNonExistingSamples()133     public void testComplexSelectionWithNonExistingSamples()  throws IOException {
134         final String testFile = getToolTestDataDir() + "vcfexample2.vcf";
135         final String samplesFile = getToolTestDataDir() + "samples.args";
136 
137         final IntegrationTestSpec spec = new IntegrationTestSpec(
138                 baseTestString(" --allow-nonoverlapping-command-line-samples  -select 'RMSMAPQ < 170.0' -sn Z -sn " // non existent samples on command line
139                         + samplesFile, testFile),
140                 Collections.singletonList(getToolTestDataDir() + "expected/" + "testSelectVariants_ComplexSelectionWithNonExistingSamples.vcf")
141         );
142         spec.executeTest("testComplexSelectionWithNonExistingSamples--" + testFile, this);
143     }
144 
145     @Test
testNonExistentSampleFile()146     public void testNonExistentSampleFile() throws IOException {
147         final String testFile = getToolTestDataDir() + "vcfexample2.vcf";
148         final File nonExistentFile = GATKBaseTest.getSafeNonExistentFile("nonexistentSamples.args");
149 
150         final IntegrationTestSpec spec = new IntegrationTestSpec(
151                 baseTestString(" -sn A -sn Z -sn Q -sn " + nonExistentFile, testFile),
152                 1,
153                 CommandLineException.class
154         );
155         spec.executeTest("testNonExistentSampleFile--" + testFile, this);
156     }
157 
158     @Test
testNonExistingFieldSelection()159     public void testNonExistingFieldSelection()  throws IOException {
160         final String testFile = getToolTestDataDir() + "vcfexample2.vcf";
161 
162         final IntegrationTestSpec spec = new IntegrationTestSpec(
163                 baseTestString(" --exclude-non-variants -select 'foo!=0 || RMSMAPQ < 170.0' ", testFile),
164                 Collections.singletonList(getToolTestDataDir() + "expected/" + "testSelectVariants_NonExistingSelection.vcf")
165         );
166 
167         spec.executeTest("testNonExistingSelection--" + testFile, this);
168     }
169 
170     /**
171      * Test excluding samples from file and sample name
172      */
173     @Test
testSampleExclusionFromFileAndSeparateSample()174     public void testSampleExclusionFromFileAndSeparateSample()  throws IOException {
175         final String testFile = getToolTestDataDir() + "vcfexample2.vcf";
176         final String samplesFile = getToolTestDataDir() + "samples.args";
177 
178         final IntegrationTestSpec spec = new IntegrationTestSpec(
179                 baseTestString(" -xl-sn NA11894 -xl-sn " + samplesFile, testFile),
180                 Collections.singletonList(getToolTestDataDir() + "expected/" + "testSelectVariants_SampleExclusionFromFileAndSeparateSample.vcf")
181         );
182 
183         spec.executeTest("testSampleExclusionFromFileAndSeparateSample--" + testFile, this);
184     }
185 
186     /**
187      * Test excluding samples from file
188      */
189     @Test
testSampleExclusionJustFromFile()190     public void testSampleExclusionJustFromFile()  throws IOException {
191         final String testFile = getToolTestDataDir() + "vcfexample2.vcf";
192         final String samplesFile = getToolTestDataDir() + "samples.args";
193 
194         final IntegrationTestSpec spec = new IntegrationTestSpec(
195                 baseTestString(" -xl-sn " + samplesFile, testFile),
196                 Collections.singletonList(getToolTestDataDir() + "expected/" + "testSelectVariants_SampleExclusionJustFromFile.vcf")
197         );
198 
199         spec.executeTest("testSampleExclusionJustFromFile--" + testFile, this);
200     }
201 
202     /**
203      * Test excluding samples from expression
204      */
205     @Test
testSampleExclusionJustFromExpression()206     public void testSampleExclusionJustFromExpression()  throws IOException {
207         final String testFile = getToolTestDataDir() + "vcfexample2.vcf";
208 
209         final IntegrationTestSpec spec = new IntegrationTestSpec(
210                 baseTestString(" -xl-se 'NA069*' ", testFile),
211                 Collections.singletonList(getToolTestDataDir() + "expected/" + "testSelectVariants_SampleExclusionJustFromExpression.vcf")
212         );
213 
214         spec.executeTest("testSampleExclusionJustFromExpression--" + testFile, this);
215     }
216 
217     /**
218      * Test excluding samples from negation expression
219      */
220     @Test
testSampleExclusionJustFromNegationExpression()221     public void testSampleExclusionJustFromNegationExpression()  throws IOException {
222         final String testFile = getToolTestDataDir() + "vcfexample2.vcf";
223 
224         final IntegrationTestSpec spec = new IntegrationTestSpec(
225                 baseTestString(" -se 'NA[0-9]{4}[^1-9]' ", testFile),
226                 Collections.singletonList(getToolTestDataDir() + "expected/" + "testSelectVariants_SampleExclusionJustFromRegexExpression.vcf")
227         );
228 
229         spec.executeTest("testSampleExclusionJustFromRegexExpression--" + testFile, this);
230     }
231 
232     /**
233      * Test including samples that are not in the VCF
234      */
235 
236     @Test
testSampleInclusionWithNonexistingSamples()237     public void testSampleInclusionWithNonexistingSamples()  throws IOException {
238         final String testFile = getToolTestDataDir() + "vcfexample2.vcf";
239         final String samplesFile = getToolTestDataDir() + "samples.args";
240 
241         final IntegrationTestSpec spec = new IntegrationTestSpec(
242                 baseTestString(" -sn A -sn Z -sn Q -sn " + samplesFile, testFile),
243                 1,
244                 UserException.BadInput.class
245         );
246 
247         spec.executeTest("testSampleInclusionWithNonexistingSamples--" + testFile, this);
248     }
249 
250     @Test
testDiscordance()251     public void testDiscordance() throws IOException {
252         final String testFile = getToolTestDataDir() + "vcfexample2.vcf";
253         final String discordanceFile = getToolTestDataDir() + "vcfexample2DiscordanceConcordance.vcf";
254 
255         final IntegrationTestSpec spec = new IntegrationTestSpec(
256                 baseTestString(" -sn NA11992 " // not present in discordance track
257                                 + " -disc " + discordanceFile, testFile),
258                 Collections.singletonList(getToolTestDataDir() + "expected/" + "testSelectVariants_Discordance.vcf")
259         );
260 
261         spec.executeTest("testDiscordance--" + testFile, this);
262     }
263 
264     @Test
testConcordance()265     public void testConcordance()  throws IOException {
266         final String testFile = getToolTestDataDir() + "vcfexample2.vcf";
267         final String concordanceFile = getToolTestDataDir() + "vcfexample2DiscordanceConcordance.vcf";
268 
269         final IntegrationTestSpec spec = new IntegrationTestSpec(
270                 baseTestString(" -sn NA11894 -conc " + concordanceFile  + " --lenient ", testFile),
271                 Collections.singletonList(getToolTestDataDir() + "expected/" + "testSelectVariants_Concordance.vcf")
272         );
273 
274         spec.executeTest("testConcordance--" + testFile, this);
275     }
276 
277     /**
278      * Test including variant types.
279      */
280     @Test
testVariantTypeSelection()281     public void testVariantTypeSelection() throws IOException {
282         final String testFile = getToolTestDataDir() + "complexExample1.vcf";
283 
284         final IntegrationTestSpec spec = new IntegrationTestSpec(
285                 baseTestString(" --restrict-alleles-to MULTIALLELIC --select-type-to-include MIXED ",testFile),
286                 Collections.singletonList(getToolTestDataDir() + "expected/" + "testSelectVariants_VariantTypeSelection.vcf")
287         );
288 
289         spec.executeTest("testVariantTypeSelection--" + testFile, this);
290     }
291 
292     /**
293      * Test excluding indels that are larger than the specified size
294      */
295     @Test
testMaxIndelLengthSelection()296     public void testMaxIndelLengthSelection() throws IOException {
297         final String testFile = getToolTestDataDir() + "complexExample1.vcf";
298 
299         final IntegrationTestSpec spec = new IntegrationTestSpec(
300                 baseTestString(" --select-type-to-include INDEL --max-indel-size 2 ", testFile),
301                 Collections.singletonList(getToolTestDataDir() + "expected/" + "testSelectVariants_MaxIndelLengthSelection.vcf")
302         );
303 
304         spec.executeTest("testMaxIndelLengthSelection--" + testFile, this);
305     }
306 
307     /**
308      * Test excluding indels that are smaller than the specified size
309      */
310     @Test
testMinIndelLengthSelection()311     public void testMinIndelLengthSelection() throws IOException {
312         final String testFile = getToolTestDataDir() + "complexExample1.vcf";
313 
314         final IntegrationTestSpec spec = new IntegrationTestSpec(
315                 baseTestString(" --select-type-to-include INDEL --min-indel-size 2 ", testFile),
316                 Collections.singletonList(getToolTestDataDir() + "expected/" + "testSelectVariants_MinIndelLengthSelection.vcf")
317         );
318 
319         spec.executeTest("testMinIndelLengthSelection--" + testFile, this);
320     }
321 
322     @Test
testRemoveMLE()323     public void testRemoveMLE() throws IOException {
324         final String testFile = getToolTestDataDir() + "vcfexample.withMLE.vcf";
325 
326         final IntegrationTestSpec spec = new IntegrationTestSpec(
327                 baseTestString(" -sn NA12892 ", testFile),
328                 Collections.singletonList(getToolTestDataDir() + "expected/" + "testSelectVariants_RemoveMLE.vcf")
329         );
330 
331         spec.executeTest("testRemoveMLE--" + testFile, this);
332     }
333 
334     @Test
testKeepOriginalAC()335     public void testKeepOriginalAC() throws IOException {
336         final String testFile = getToolTestDataDir() + "vcfexample.loseAlleleInSelection.vcf";
337 
338         final IntegrationTestSpec spec = new IntegrationTestSpec(
339                 baseTestString(" --keep-original-ac -sn NA12892 ", testFile),
340                 Collections.singletonList(getToolTestDataDir() + "expected/" + "testSelectVariants_KeepOriginalAC.vcf")
341         );
342 
343         spec.executeTest("testKeepOriginalAC--" + testFile, this);
344     }
345 
346     @Test
testKeepOriginalACAndENV()347     public void testKeepOriginalACAndENV() throws IOException {
348         final String testFile = getToolTestDataDir() + "vcfexample.loseAlleleInSelection.vcf";
349 
350         final IntegrationTestSpec spec = new IntegrationTestSpec(
351                 baseTestString(" --keep-original-ac -sn NA12892 --exclude-non-variants --remove-unused-alternates ", testFile),
352                 Collections.singletonList(getToolTestDataDir() + "expected/" + "testSelectVariants_KeepOriginalACAndENV.vcf")
353         );
354 
355         spec.executeTest("testKeepOriginalACAndENV--" + testFile, this);
356     }
357 
358     @Test
testKeepOriginalDP()359     public void testKeepOriginalDP() throws IOException {
360         final String testFile = getToolTestDataDir() + "CEUtrioTest.vcf";
361 
362         final IntegrationTestSpec spec = new IntegrationTestSpec(
363                 baseTestString(" --keep-original-dp -sn NA12892 ", testFile),
364                 Collections.singletonList(getToolTestDataDir() + "expected/" + "testSelectVariants_KeepOriginalDP.vcf")
365         );
366 
367         spec.executeTest("testKeepOriginalDP--" + testFile, this);
368     }
369 
370     @Test
testMultipleRecordsAtOnePosition()371     public void testMultipleRecordsAtOnePosition() throws IOException {
372         final String testFile = getToolTestDataDir() + "selectVariants.onePosition.vcf";
373 
374         final IntegrationTestSpec spec = new IntegrationTestSpec(
375                 baseTestString(" -select 'KG_FREQ < 0.5' ", testFile),
376                 Collections.singletonList(getToolTestDataDir() + "expected/" + "testSelectVariants_MultipleRecordsAtOnePosition.vcf")
377         );
378 
379         spec.executeTest("testMultipleRecordsAtOnePosition--" + testFile, this);
380     }
381 
382     @Test
testNoGTs()383     public void testNoGTs() throws IOException {
384         final String testFile = getToolTestDataDir() + "vcf4.1.example.vcf";
385 
386         final IntegrationTestSpec spec = new IntegrationTestSpec (
387                 " --variant " + testFile + " -O %s  --" + StandardArgumentDefinitions.ADD_OUTPUT_VCF_COMMANDLINE +" false",
388                 Collections.singletonList(getToolTestDataDir() + "expected/" + "testSelectVariants_NoGTs.vcf")
389         );
390 
391         spec.executeTest("testNoGTs--" + testFile, this);
392     }
393 
394     @Test
testRemoveSingleSpanDelAlleleNoSpanDel()395     public void testRemoveSingleSpanDelAlleleNoSpanDel() throws IOException {
396         final String testFile = getToolTestDataDir() + "spanning_deletion.vcf";
397         final String sampleName = "NA1";
398 
399         final IntegrationTestSpec spec = new IntegrationTestSpec(
400                 baseTestString(" -sn " + sampleName + " --remove-unused-alternates --exclude-non-variants", testFile),
401                 Collections.singletonList(getToolTestDataDir() + "expected/" + "testSelectVariants_RemoveSingleSpanDelAlleleNoSpanDel.vcf")
402         );
403         spec.executeTest("test encounter no instance of '*' as only ALT allele and ensure line is removed when only monomorphic allele exists" + testFile, this);
404     }
405 
406     @Test
testRemoveSingleSpanDelAlleleExNonVar()407     public void testRemoveSingleSpanDelAlleleExNonVar() throws IOException {
408         final String testFile = getToolTestDataDir() + "spanning_deletion.vcf";
409         final String sampleName = "NA2";
410 
411         final IntegrationTestSpec spec = new IntegrationTestSpec(
412                 baseTestString(" -sn " + sampleName + " --remove-unused-alternates", testFile),
413                 Collections.singletonList(getToolTestDataDir() + "expected/" + "testSelectVariants_RemoveSingleSpanDelAlleleExNoVar.vcf")
414         );
415         spec.executeTest("test will not remove variant line where '*' is only ALT allele because --exclude-non-variants not called --" + testFile, this);
416     }
417 
418     @Test
testRemoveSingleSpanDelAllele()419     public void testRemoveSingleSpanDelAllele() throws IOException {
420         final String testFile = getToolTestDataDir() + "spanning_deletion.vcf";
421         final String sampleName = "NA2";
422 
423         final IntegrationTestSpec spec = new IntegrationTestSpec(
424                 baseTestString(" -sn " + sampleName + " --exclude-non-variants --remove-unused-alternates", testFile),
425                 Collections.singletonList(getToolTestDataDir() + "expected/" + "testSelectVariants_RemoveSingleSpanDelAllele.vcf")
426         );
427         spec.executeTest("test removes variant line where '*' is only ALT allele --" + testFile, this);
428     }
429 
430     @Test
testSelectFromMultiAllelic()431     public void testSelectFromMultiAllelic() throws IOException {
432         final String testFile = getToolTestDataDir() + "multi-allelic.bi-allelicInGIH.vcf";
433         final String sampleName = getToolTestDataDir() + "GIH.samples.args";
434 
435         final IntegrationTestSpec spec = new IntegrationTestSpec(
436                 baseTestString(" -sn " + sampleName + " --exclude-non-variants --remove-unused-alternates", testFile),
437                 Collections.singletonList(getToolTestDataDir() + "expected/" + "testSelectVariants_MultiAllelicExcludeNonVar.vcf")
438         );
439         spec.executeTest("test select from multi allelic with exclude-non-variants --" + testFile, this);
440     }
441 
442     @Test
testMultiAllelicAnnotationOrdering()443     public void testMultiAllelicAnnotationOrdering() throws IOException {
444         final String testFile = getToolTestDataDir() + "multi-allelic-ordering.vcf";
445 
446         final IntegrationTestSpec spec = new IntegrationTestSpec(
447                 baseTestString(" -sn SAMPLE-CC -sn SAMPLE-CT -sn SAMPLE-CA --exclude-non-variants", testFile),
448                 Collections.singletonList(getToolTestDataDir() + "expected/" + "testSelectVariants_MultiAllelicAnnotationOrdering.vcf")
449         );
450         spec.executeTest("test multi allelic annotation ordering --" + testFile, this);
451     }
452 
453     @Test
testFileWithoutInfoLineInHeader()454     public void testFileWithoutInfoLineInHeader() throws IOException {
455         testFileWithoutInfoLineInHeader("testSelectVariants_FileWithoutInfoLineInHeader", IllegalStateException.class);
456     }
457 
458     @Test
testFileWithoutInfoLineInHeaderWithOverride()459     public void testFileWithoutInfoLineInHeaderWithOverride() throws IOException {
460         testFileWithoutInfoLineInHeader("testSelectVariants_FileWithoutInfoLineInHeaderWithOverride", null);
461     }
462 
testFileWithoutInfoLineInHeader(final String name, final Class<? extends Exception> expectedException)463     private void testFileWithoutInfoLineInHeader(final String name, final Class<? extends Exception> expectedException) throws IOException {
464         final String testFile = getToolTestDataDir() + "missingHeaderLine.vcf";
465         final String outFile = getToolTestDataDir() + "expected/" + name + ".vcf";
466 
467         final String cmd = baseTestString(" -sn NA12892 " + (expectedException == null ? " --lenient" : ""), testFile);
468 
469         IntegrationTestSpec spec =
470                 expectedException != null
471                         ? new IntegrationTestSpec(cmd, 1, expectedException)
472                         : new IntegrationTestSpec(cmd, Collections.singletonList(outFile));
473 
474         spec.executeTest(name, this);
475     }
476 
477     @Test
testInvalidJexl()478     public void testInvalidJexl() throws IOException {
479         final String testFile = getToolTestDataDir() + "ac0.vcf";
480 
481         // NOTE: JexlEngine singleton construction in VariantContextUtils sets silent to false.
482         // However VariantFiltration.initialize() sets setSilent(true) on the shared instance.
483         // Just in case this test runs after a VariantFiltration in the same VM, always set silent back to false.
484         htsjdk.variant.variantcontext.VariantContextUtils.engine.get().setSilent(false);
485 
486         final IntegrationTestSpec spec = new IntegrationTestSpec(
487                 baseTestString(" -select 'vc.getGenotype(\"FAKE_SAMPLE\").isHomRef()' ", testFile),
488                 1,
489                 UserException.class);
490         spec.executeTest("InvalidJexl", this);
491     }
492 
493     @Test
testAlleleTrimming()494     public void testAlleleTrimming() throws IOException {
495         final String testFile = getToolTestDataDir() + "forHardLeftAlignVariantsTest.vcf";
496 
497         final IntegrationTestSpec spec = new IntegrationTestSpec(
498                 baseTestString(" -sn NA12878 --exclude-non-variants --remove-unused-alternates ", testFile),
499                 Collections.singletonList(getToolTestDataDir() + "expected/" + "testSelectVariants_AlleleTrimming.vcf"));
500         spec.executeTest("testAlleleTrimming", this);
501     }
502 
503     @DataProvider(name="unusedAlleleTrimmingProvider")
unusedAlleleTrimmingProvider()504     public Object[][] unusedAlleleTrimmingProvider() {
505         final String expectedPath = getToolTestDataDir() + "expected/";
506         return new Object[][] {
507                 {
508                         getToolTestDataDir() + "forHardLeftAlignVariantsTest.vcf",
509                         "--remove-unused-alternates",
510                         expectedPath + "testSelectVariants_UnusedAlleleHardLeftTrim.vcf"
511                 },
512                 {
513                         getToolTestDataDir() + "forHardLeftAlignVariantsTest.vcf",
514                         null,
515                         expectedPath + "testSelectVariants_UnusedAlleleHardLeft.vcf"
516                 },
517                 {
518                         getToolTestDataDir() + "multi-allelic-ordering.vcf",
519                         "-sn SAMPLE-CC -sn SAMPLE-CT",
520                         expectedPath + "testSelectVariants_UnusedAlleleCCCT.vcf"
521                 },
522                 {
523                         getToolTestDataDir() + "multi-allelic-ordering.vcf",
524                         "-sn SAMPLE-CC -sn SAMPLE-CT --exclude-non-variants",
525                         expectedPath + "testSelectVariants_UnusedAlleleCCCTEnv.vcf"
526                 },
527                 {
528                         getToolTestDataDir() + "multi-allelic-ordering.vcf",
529                         "-sn SAMPLE-CC -sn SAMPLE-CT --remove-unused-alternates",
530                         expectedPath + "testSelectVariants_UnusedAlleleCCCTTrim.vcf"
531                 },
532                 {
533                         getToolTestDataDir() + "multi-allelic-ordering.vcf",
534                         "-sn SAMPLE-CC -sn SAMPLE-CT --exclude-non-variants --remove-unused-alternates",
535                         expectedPath + "testSelectVariants_UnusedAlleleCCCTTrimAltEnv.vcf"
536                 }
537         };
538     }
539 
540     @Test(dataProvider="unusedAlleleTrimmingProvider")
testUnusedAlleleTrimming(final String vcf, final String extraArgs, final String expectedOutput)541     public void testUnusedAlleleTrimming(final String vcf, final String extraArgs, final String expectedOutput) throws IOException {
542         final IntegrationTestSpec spec = new IntegrationTestSpec(
543                 baseTestString(extraArgs == null ? "" : extraArgs, vcf),
544                 Collections.singletonList(expectedOutput)
545         );
546 
547         spec.executeTest(
548                 String.format("testUnusedAlleleTrimming: (%s,%s)", new File(vcf).getName(), extraArgs == null ? "(none)" : extraArgs),
549                 this);
550     }
551 
552     /**
553      *  Test with an empty VCF file
554      */
555     @Test
testEmptyVcfException()556     public void testEmptyVcfException() throws IOException {
557         final String testFile = getToolTestDataDir() + "reallyEmpty.vcf";
558 
559         final IntegrationTestSpec spec = new IntegrationTestSpec(
560                 baseTestString("", testFile),
561                 1,
562                 UserException.NoSuitableCodecs.class
563         );
564 
565         spec.executeTest("testEmptyVcfException--" + testFile, this);
566     }
567 
568     /**
569      * Test with a VCF file that is not a file
570      */
571     @Test
testNotFileVcfException()572     public void testNotFileVcfException() throws IOException {
573         final String testFile = getToolTestDataDir();
574 
575         final IntegrationTestSpec spec = new IntegrationTestSpec(
576                 baseTestString("", testFile),
577                 1,
578                 UserException.CouldNotReadInputFile.class
579         );
580 
581         spec.executeTest("testNotFileVcfException--" + testFile, this);
582     }
583 
584     /**
585      * Test with a VCF file that does not exist
586      */
587     @Test
testMissingVcfException()588     public void testMissingVcfException() throws IOException {
589         final String testFile = "test.vcf";
590 
591         final IntegrationTestSpec spec = new IntegrationTestSpec(
592                 baseTestString("", testFile),
593                 1,
594                 UserException.CouldNotReadInputFile.class
595         );
596 
597         spec.executeTest("testMissingVcfException--" + testFile, this);
598     }
599 
600     /**
601      * Test inverting the variant selection criteria by the -invertSelect argument
602      */
603     @Test
testInvertSelection()604     public void testInvertSelection()  throws IOException {
605         final String testFile = getToolTestDataDir() + "vcfexample2.vcf";
606         final String samplesFile = getToolTestDataDir() + "samples.args";
607 
608         final IntegrationTestSpec spec = new IntegrationTestSpec(
609                 baseTestString(" -sn NA11894 -sn " + samplesFile +
610                                     " -select 'RMSMAPQ < 170.0' --invert-select ", testFile),
611                 Collections.singletonList(getToolTestDataDir() + "expected/" + "testSelectVariants_InvertSelection.vcf")
612         );
613 
614         spec.executeTest("testInvertSelection--" + testFile, this);
615     }
616 
617     /**
618      * Test inverting the variant selection criteria by inverting the JEXL expression logic following -select
619      */
620     @Test
testInvertJexlSelection()621     public void testInvertJexlSelection()  throws IOException {
622         final String testFile = getToolTestDataDir() + "vcfexample2.vcf";
623         final String samplesFile = getToolTestDataDir() + "samples.args";
624 
625         final IntegrationTestSpec spec = new IntegrationTestSpec(
626                 baseTestString(" -sn NA11894 -sn " + samplesFile +
627                         " -select 'RMSMAPQ > 170.0' ", testFile),
628                 Collections.singletonList(getToolTestDataDir() + "expected/" + "testSelectVariants_InvertJexlSelection.vcf")
629         );
630 
631         spec.executeTest("testInvertJexlSelection--" + testFile, this);
632     }
633 
634     /**
635      * Test selecting variants with rsIDs from a .list file
636      */
637     @Test
testKeepSelectionIDFromFile()638     public void testKeepSelectionIDFromFile() throws IOException {
639         final String testFile = getToolTestDataDir() + "complexExample1.vcf";
640         final String idFile = getToolTestDataDir() + "complexExample1.vcf.id.args";
641 
642         final IntegrationTestSpec spec = new IntegrationTestSpec(
643                 baseTestString(" -ids " + idFile, testFile),
644                 Collections.singletonList(getToolTestDataDir() + "expected/" + "testSelectVariants_KeepSelectionID.vcf")
645         );
646 
647         spec.executeTest("testKeepSelectionIDFile--" + testFile, this);
648     }
649 
650     /**
651      * Test selecting variants with literal rsIDs
652      */
653     @Test
testKeepSelectionIDLiteral()654     public void testKeepSelectionIDLiteral() throws IOException {
655         final String testFile = getToolTestDataDir() + "complexExample1.vcf";
656 
657         final IntegrationTestSpec spec = new IntegrationTestSpec(
658                 baseTestString(" -ids testid1", testFile),
659                 Collections.singletonList(getToolTestDataDir() + "expected/" + "testSelectVariants_KeepSelectionID.vcf")
660         );
661 
662         spec.executeTest("testKeepSelectionIDLiteral--" + testFile, this);
663     }
664 
665     /**
666      * Test excluding variants with rsIDs from a file
667      */
668     @Test
testExcludeSelectionIDFromFile()669     public void testExcludeSelectionIDFromFile() throws IOException {
670         final String testFile = getToolTestDataDir() + "complexExample1.vcf";
671         final String idFile = getToolTestDataDir() + "complexExample1.vcf.id.args";
672 
673         final IntegrationTestSpec spec = new IntegrationTestSpec(
674                 baseTestString(" -xl-ids " + idFile, testFile),
675                 Collections.singletonList(getToolTestDataDir() + "expected/" + "testSelectVariants_ExcludeSelectionID.vcf")
676         );
677 
678         spec.executeTest("testExcludeSelectionIDFile--" + testFile, this);
679     }
680 
681     /**
682      * Test excluding variants with literal rsIDs
683      */
684     @Test
testExcludeSelectionIDLiteral()685     public void testExcludeSelectionIDLiteral() throws IOException {
686         final String testFile = getToolTestDataDir() + "complexExample1.vcf";
687 
688         final IntegrationTestSpec spec = new IntegrationTestSpec(
689                 baseTestString(" -xl-ids testid1", testFile),
690                 Collections.singletonList(getToolTestDataDir() + "expected/" + "testSelectVariants_ExcludeSelectionID.vcf")
691         );
692 
693         spec.executeTest("testExcludeSelectionIDLiteral--" + testFile, this);
694     }
695 
696     /**
697      * Test excluding variant types
698      */
699     @Test
testExcludeSelectionType()700     public void testExcludeSelectionType() throws IOException {
701         final String testFile = getToolTestDataDir() + "complexExample1.vcf";
702 
703         final IntegrationTestSpec spec = new IntegrationTestSpec(
704                 baseTestString(" --select-type-to-exclude SNP ", testFile),
705                 Collections.singletonList(getToolTestDataDir() + "expected/" + "testSelectVariants_ExcludeSelectionType.vcf")
706         );
707 
708         spec.executeTest("testExcludeSelectionType--" + testFile, this);
709     }
710 
711     @Test
testMendelianViolationSelection()712     public void testMendelianViolationSelection() throws IOException {
713         final String testFile = getToolTestDataDir() + "CEUtrioTest.vcf";
714         final String pedFile = getToolTestDataDir() + "CEUtrio.ped";
715 
716         final IntegrationTestSpec spec = new IntegrationTestSpec(
717                 baseTestString(" -ped " + pedFile + " --mendelian-violation --mendelian-violation-qual-threshold 0 ", testFile),
718                 Collections.singletonList(getToolTestDataDir() + "expected/" + "testSelectVariants_MendelianViolationSelection.vcf")
719         );
720 
721         spec.executeTest("testMendelianViolationSelection--" + testFile, this);
722     }
723 
724     @Test
testInvertMendelianViolationSelection()725     public void testInvertMendelianViolationSelection() throws IOException {
726         final String testFile = getToolTestDataDir() + "CEUtrioTest.vcf";
727         final String pedFile = getToolTestDataDir() + "CEUtrio.ped";
728 
729         final IntegrationTestSpec spec = new IntegrationTestSpec(
730                 baseTestString(" --mendelian-violation --mendelian-violation-qual-threshold 0 --invert-mendelian-violation -ped " + pedFile, testFile),
731                 Collections.singletonList(getToolTestDataDir() + "expected/" + "testSelectVariants_InvertMendelianViolationSelection.vcf")
732         );
733 
734         spec.executeTest("testInvertMendelianViolationSelection--" + testFile, this);
735     }
736 
737     @Test
testMaxFilteredGenotypesSelection()738     public void testMaxFilteredGenotypesSelection() throws IOException {
739         final String testFile = getToolTestDataDir() + "filteredSamples.vcf";
740 
741         final IntegrationTestSpec spec = new IntegrationTestSpec(
742                 baseTestString(" --max-filtered-genotypes 1 ", testFile),
743                 Collections.singletonList(getToolTestDataDir() + "expected/" + "testSelectVariants_MaxFilteredGenotypesSelection.vcf")
744         );
745 
746         spec.executeTest("testMaxFilteredGenotypesSelection--" + testFile, this);
747     }
748 
749     @Test
testMinFilteredGenotypesSelection()750     public void testMinFilteredGenotypesSelection() throws IOException {
751         final String testFile = getToolTestDataDir() + "filteredSamples.vcf";
752 
753         final IntegrationTestSpec spec = new IntegrationTestSpec(
754                 baseTestString(" --min-filtered-genotypes 2 ", testFile),
755                 Collections.singletonList(getToolTestDataDir() + "expected/" + "testSelectVariants_MinFilteredGenotypesSelection.vcf")
756         );
757 
758         spec.executeTest("testMinFilteredGenotypesSelection--" + testFile, this);
759     }
760 
761     @Test
testMaxFractionFilteredGenotypesSelection()762     public void testMaxFractionFilteredGenotypesSelection() throws IOException {
763         final String testFile = getToolTestDataDir() + "filteredSamples.vcf";
764 
765         final IntegrationTestSpec spec = new IntegrationTestSpec(
766                 baseTestString(" --max-fraction-filtered-genotypes 0.4 ", testFile),
767                 Collections.singletonList(getToolTestDataDir() + "expected/" + "testSelectVariants_MaxFractionFilteredGenotypesSelection.vcf")
768         );
769 
770         spec.executeTest("testMaxFractionFilteredGenotypesSelection--" + testFile, this);
771     }
772 
773     @Test
testMinFractionFilteredGenotypesSelection()774     public void testMinFractionFilteredGenotypesSelection() throws IOException {
775         final String testFile = getToolTestDataDir() + "filteredSamples.vcf";
776 
777         final  IntegrationTestSpec spec = new IntegrationTestSpec(
778                 baseTestString(" --min-fraction-filtered-genotypes 0.6 ", testFile),
779                 Collections.singletonList(getToolTestDataDir() + "expected/" + "testSelectVariants_MinFractionFilteredGenotypesSelection.vcf")
780         );
781 
782         spec.executeTest("testMinFractionFilteredGenotypesSelection--" + testFile, this);
783     }
784 
785     @Test
testSetFilteredGtoNocall()786     public void testSetFilteredGtoNocall() throws IOException {
787         final String testFile = getToolTestDataDir() + "filteredSamples.vcf";
788 
789         final IntegrationTestSpec spec = new IntegrationTestSpec(
790                 baseTestString(" --set-filtered-gt-to-nocall ", testFile),
791                 Collections.singletonList(getToolTestDataDir() + "expected/" + "testSelectVariants_SetFilteredGtoNocall.vcf")
792         );
793 
794         spec.executeTest("testSetFilteredGtoNocall--" + testFile, this);
795     }
796 
797     @Test
testMaxNoCall1()798     public void testMaxNoCall1() throws IOException {
799         final String testFile = getToolTestDataDir() + "vcfexample.forNoCallFiltering.vcf";
800 
801         final IntegrationTestSpec spec = new IntegrationTestSpec(
802                 baseTestString(" --max-nocall-number 1", testFile),
803                 Collections.singletonList(getToolTestDataDir() + "expected/" + "testSelectVariants_maxNOCALLnumber1.vcf")
804         );
805 
806         spec.executeTest("testMaxNoCall1--" + testFile, this);
807     }
808 
809     @Test
testMaxNoCall0_25()810     public void testMaxNoCall0_25() throws IOException {
811         final String testFile = getToolTestDataDir() + "vcfexample.forNoCallFiltering.vcf";
812 
813         final IntegrationTestSpec spec = new IntegrationTestSpec(
814                 baseTestString(" --max-nocall-fraction 0.25", testFile),
815                 Collections.singletonList(getToolTestDataDir() + "expected/" + "testSelectVariants_maxNOCALLnumber1.vcf")
816         );
817 
818         spec.executeTest("testMaxNoCall0_25--" + testFile, this);
819     }
820 
821     @Test
testMaxNoCall2()822     public void testMaxNoCall2() throws IOException {
823         final String testFile = getToolTestDataDir() + "vcfexample.forNoCallFiltering.vcf";
824 
825         final IntegrationTestSpec spec = new IntegrationTestSpec(
826                 baseTestString(" --max-nocall-number 2", testFile),
827                 Collections.singletonList(getToolTestDataDir() + "expected/" + "testSelectVariants_maxNOCALLnumber2.vcf")
828         );
829 
830         spec.executeTest("testMaxNoCall2--" + testFile, this);
831     }
832 
833     @Test
testMaxNoCall0_5()834     public void testMaxNoCall0_5() throws IOException {
835         final String testFile = getToolTestDataDir() + "vcfexample.forNoCallFiltering.vcf";
836 
837         final IntegrationTestSpec spec = new IntegrationTestSpec(
838                 baseTestString(" --max-nocall-fraction 0.5", testFile),
839                 Collections.singletonList(getToolTestDataDir() + "expected/" + "testSelectVariants_maxNOCALLnumber2.vcf")
840         );
841 
842         spec.executeTest("testMaxNoCall0_5--" + testFile, this);
843     }
844 
845     @Test
testHaploid()846     public void testHaploid() throws IOException {
847         final String testFile = getToolTestDataDir() + "haploid-multisample.vcf";
848 
849         final IntegrationTestSpec spec = new IntegrationTestSpec(
850                 baseTestString(" -sn HG00610 -select 'DP > 7' --remove-unused-alternates ", testFile),
851                 Collections.singletonList(getToolTestDataDir() + "expected/" + "testSelectVariants_Haploid.vcf")
852         );
853 
854         spec.executeTest("testHaploid--" + testFile, this);
855     }
856 
857     @Test
testTetraploid()858     public void testTetraploid() throws IOException {
859         final String testFile = getToolTestDataDir() + "tetraploid-multisample.vcf";
860 
861         final IntegrationTestSpec spec = new IntegrationTestSpec(
862                 baseTestString(" -sn NA18486 -select 'DP > 57' --remove-unused-alternates ", testFile),
863                 Collections.singletonList(getToolTestDataDir() + "expected/" + "testSelectVariants_Tetraploid.vcf")
864         );
865 
866         spec.executeTest("testTetraploid--" + testFile, this);
867     }
868 
869     @Test
testTetraDiploid()870     public void testTetraDiploid() throws IOException {
871         final String testFile = getToolTestDataDir() + "tetra-diploid.vcf";
872 
873         final IntegrationTestSpec spec = new IntegrationTestSpec(
874                 baseTestString(" -sn NA12878 -select 'DP > 48' --remove-unused-alternates ", testFile),
875                 Collections.singletonList(getToolTestDataDir() + "expected/" + "testSelectVariants_TetraDiploid.vcf")
876         );
877 
878         spec.executeTest("testTetraDiploid--" + testFile, this);
879     }
880 
881     @Test
testSACSimpleDiploid()882     public void testSACSimpleDiploid() throws IOException {
883         final String testFile = getToolTestDataDir() + "261_S01_raw_variants_gvcf.vcf";
884 
885         final IntegrationTestSpec spec = new IntegrationTestSpec(
886                 baseTestString(" --remove-unused-alternates", testFile),
887                 Collections.singletonList(getToolTestDataDir() + "expected/" + "testSelectVariants_SimpleDiploid.vcf")
888         );
889 
890         spec.executeTest("testSACSimpleDiploid" + testFile, this);
891     }
892 
893     @Test
testSACDiploid()894     public void testSACDiploid() throws IOException {
895         final String testFile = getToolTestDataDir() + "diploid-multisample-sac.g.vcf";
896 
897         final IntegrationTestSpec spec = new IntegrationTestSpec(
898                 baseTestString(" -sn NA12891 --remove-unused-alternates", testFile),
899                 Collections.singletonList(getToolTestDataDir() + "expected/" + "testSelectVariants_SACDiploid.vcf")
900         );
901 
902         spec.executeTest("testSACDiploid" + testFile, this);
903     }
904 
905     @Test
testSACNonDiploid()906     public void testSACNonDiploid() throws IOException {
907         final String testFile = getToolTestDataDir() + "tetraploid-multisample-sac.g.vcf";
908 
909         final IntegrationTestSpec spec = new IntegrationTestSpec(
910                 baseTestString(" -sn NA12891 --remove-unused-alternates", testFile),
911                 Collections.singletonList(getToolTestDataDir() + "expected/" + "testSelectVariants_SACNonDiploid.vcf")
912         );
913 
914         spec.executeTest("testSACNonDiploid" + testFile, this);
915     }
916 
917     @Test
testSetFilteredGtoNocallUpdateInfo()918     public void testSetFilteredGtoNocallUpdateInfo() throws IOException {
919         final String testFile = getToolTestDataDir() + "selectVariantsInfoField.vcf";
920 
921         final IntegrationTestSpec spec = new IntegrationTestSpec(
922                 baseTestString(" --set-filtered-gt-to-nocall --remove-unused-alternates --exclude-non-variants", testFile),
923                 Collections.singletonList(getToolTestDataDir() + "expected/" + "testSelectVariants_SetFilteredGtoNocallUpdateInfo.vcf")
924         );
925 
926         spec.executeTest("testSetFilteredGtoNocallUpdateInfo--" + testFile, this);
927     }
928 
929     @DataProvider(name = "dropAnnotationsDataProvider")
dropAnnotationsDataProvider()930     Object[][] dropAnnotationsDataProvider() {
931         return new Object[][]{
932                 {"-DA FisherStrand -DA OnOffGenotype -DGA RD -sn NA11894", "testSelectVariants_DropAnnotations.vcf", "standard"},
933                 {"-DA FisherStrand -DA OnOffGenotype -DGA RD -sn NA11894 -DA NotAnAnnotation -DGA AlsoNotAnAnnotation", "testSelectVariants_DropAnnotations.vcf", "unused_annotations"},
934                 {"-DA FisherStrand -DA OnOffGenotype -DGA RD -sn NA11894 -select 'FisherStrand > 10.0'", "testSelectVariants_DropAnnotationsSelectFisherStrand.vcf", "select_on_dropped_annotation"},
935                 {"-DA FisherStrand -DA OnOffGenotype -DGA RD -sn NA11894 -select 'RMSMAPQ > 175.0'", "testSelectVariants_DropAnnotationsSelectRMSMAPQ.vcf", "select_on_kept_annotation"},
936                 {"-DA FisherStrand -DA OnOffGenotype -DGA RD -sn NA11894 -select 'vc.getGenotype(\"NA11894\").getExtendedAttribute(\"RD\")>6'", "testSelectVariants_DropAnnotationsSelectRD.vcf", "select_on_dropped_genotype_annotation"},
937                 {"-DA FisherStrand -DA OnOffGenotype -DGA RD -sn NA11894 -select 'vc.getGenotype(\"NA11894\").getGQ()==1'", "testSelectVariants_DropAnnotationsSelectGQ.vcf", "select_on_kept_genotype_annotation"}
938         };
939     }
940 
941     @Test(dataProvider = "dropAnnotationsDataProvider")
testDropAnnotations(String args, String expectedFile, String testName)942     public void testDropAnnotations(String args, String expectedFile, String testName) throws IOException {
943         final String testFile = getToolTestDataDir() + "vcfexample2.vcf";
944 
945         final IntegrationTestSpec spec = new IntegrationTestSpec(
946                 baseTestString(args, testFile),
947                 Collections.singletonList(getToolTestDataDir() + "expected/" + expectedFile)
948         );
949         spec.executeTest("testDropAnnotations--" + testName, this);
950     }
951 
952     @Test(groups = "bucket")
testSampleSelectionOnNio()953     public void testSampleSelectionOnNio() throws IOException {
954         final String testFile = getToolTestDataDir() + "vcfexample2.vcf";
955 
956         final String out = BucketUtils.getTempFilePath(
957             getGCPTestStaging() +"testSelectVariants_SimpleSelection", ".vcf");
958 
959         final String[] args = new String[]{
960             "SelectVariants",
961             "-R", hg19MiniReference
962             , "--variant", testFile
963             , "-sn", "NA11918"
964             , "--suppress-reference-path" // suppress reference file path in output for test differencing
965             , "-O", out
966             , "--" + StandardArgumentDefinitions.ADD_OUTPUT_VCF_COMMANDLINE, "false"};
967 
968         final String expectedFile = getToolTestDataDir() + "expected/" + "testSelectVariants_SimpleSelection.vcf";
969 
970         new Main().instanceMain(args);
971 
972         IntegrationTestSpec.assertEqualTextFiles(IOUtils.getPath(out), IOUtils.getPath(expectedFile), null);
973     }
974 
975     // the input test file is a somatic VCF with several many-allelic sites and no PLs.  This tests that the tool does not attempt
976     // to create a PL-to-alleles cache, which would cause the tool to freeze.  See https://github.com/broadinstitute/gatk/issues/6291
977     @Test
testManyAllelicWithoutPLsDoesntFreeze()978     public void testManyAllelicWithoutPLsDoesntFreeze() {
979         final File input = new File(getToolTestDataDir(), "many-allelic-somatic.vcf");
980         final File output = createTempFile("output", ".vcf");
981         final ArgumentsBuilder args = new ArgumentsBuilder()
982                 .addVCF(input)
983                 .addReference(b37Reference)
984                 .addOutput(output);
985         runCommandLine(args);
986     }
987 }
988