1 package org.broadinstitute.hellbender.tools.walkers.variantutils; 2 3 import java.util.Comparator; 4 import java.util.List; 5 6 import htsjdk.variant.variantcontext.VariantContext; 7 import org.broadinstitute.barclay.argparser.CommandLineException; 8 import org.broadinstitute.hellbender.GATKBaseTest; 9 import org.broadinstitute.hellbender.Main; 10 import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions; 11 import org.broadinstitute.hellbender.testutils.ArgumentsBuilder; 12 import org.broadinstitute.hellbender.testutils.VariantContextTestUtils; 13 import org.broadinstitute.hellbender.utils.gcs.BucketUtils; 14 import org.broadinstitute.hellbender.utils.io.IOUtils; 15 import org.testng.Assert; 16 import org.testng.annotations.DataProvider; 17 import org.testng.annotations.Test; 18 19 import org.broadinstitute.hellbender.CommandLineProgramTest; 20 import org.broadinstitute.hellbender.exceptions.UserException; 21 import org.broadinstitute.hellbender.testutils.IntegrationTestSpec; 22 import shaded.cloud_nio.com.google.common.collect.Comparators; 23 24 import java.io.File; 25 import java.io.IOException; 26 import java.util.Collections; 27 28 public class SelectVariantsIntegrationTest extends CommandLineProgramTest { 29 baseTestString(String args, String testFile)30 private static String baseTestString(String args, String testFile) { 31 return " --variant " + testFile 32 + " -O %s " 33 + " --" + StandardArgumentDefinitions.ADD_OUTPUT_VCF_COMMANDLINE +" false " 34 + args; 35 } 36 37 @Test testSampleSelection()38 public void testSampleSelection() throws IOException { 39 final String testFile = getToolTestDataDir() + "vcfexample2.vcf"; 40 41 final IntegrationTestSpec spec = new IntegrationTestSpec( 42 " -R " + hg19MiniReference 43 + " --variant " + testFile 44 + " -sn NA11918 " 45 + " --suppress-reference-path " // suppress reference file path in output for test differencing 46 + " -O %s " 47 + " --" + StandardArgumentDefinitions.ADD_OUTPUT_VCF_COMMANDLINE +" false", 48 Collections.singletonList(getToolTestDataDir() + "expected/" + "testSelectVariants_SimpleSelection.vcf") 49 ); 50 51 spec.executeTest("testSampleSelection--" + testFile, this); 52 } 53 54 @Test testExpressionSelection()55 public void testExpressionSelection() throws IOException { 56 final String testFile = getToolTestDataDir() + "filteringDepthInFormat.vcf"; 57 58 final IntegrationTestSpec spec = new IntegrationTestSpec( 59 " -R " + hg19MiniReference 60 + " --variant " + testFile 61 + " -select 'DP < 7' " 62 + " --suppress-reference-path " // suppress reference file path in output for test differencing 63 + " -O %s --" + StandardArgumentDefinitions.ADD_OUTPUT_VCF_COMMANDLINE +" false", 64 Collections.singletonList(getToolTestDataDir() + "expected/" + "testSelectVariants_SimpleExpressionSelection.vcf") 65 ); 66 67 spec.executeTest("testSimpleExpressionSelection--" + testFile, this); 68 } 69 70 @Test testRepeatedLineSelectionAndExludeFiltered()71 public void testRepeatedLineSelectionAndExludeFiltered() throws IOException { 72 final String testFile = getToolTestDataDir() + "test.dup.vcf"; 73 74 final IntegrationTestSpec spec = new IntegrationTestSpec( 75 baseTestString(" -sn A -sn B -sn C -exclude-filtered ", testFile), 76 Collections.singletonList(getToolTestDataDir() + "expected/" + "testSelectVariants_RepeatedLineSelection.vcf") 77 ); 78 79 spec.executeTest("testRepeatedLineSelection--" + testFile, this); 80 } 81 82 @Test testComplexSelection()83 public void testComplexSelection() throws IOException { 84 final String testFile = getToolTestDataDir() + "vcfexample2.vcf"; 85 final String samplesFile = getToolTestDataDir() + "samples.args"; 86 87 final IntegrationTestSpec spec = new IntegrationTestSpec( 88 baseTestString(" -sn NA11894 -se 'NA069*' -sn " + samplesFile + " -select 'RMSMAPQ < 170.0'", testFile), 89 Collections.singletonList(getToolTestDataDir() + "expected/" + "testSelectVariants_ComplexSelection.vcf") 90 ); 91 92 spec.executeTest("testComplexSelection--" + testFile, this); 93 } 94 95 /** 96 * When input variants are untrimmed, they can be trimmed by select variants, which may change their order. 97 * This test confirms that this case is handled correctly, and the resulting variants are ouput correctly sorted. 98 */ 99 @Test testUntrimmedVariants()100 public void testUntrimmedVariants() throws IOException { 101 final File testFile = new File(getToolTestDataDir() + "untrimmed.vcf"); 102 final File output = File.createTempFile("test_untrimmed", ".vcf"); 103 final ArgumentsBuilder args = new ArgumentsBuilder() 104 .addVCF(testFile) 105 .addOutput(output) 106 .add(StandardArgumentDefinitions.SAMPLE_NAME_SHORT_NAME, "SAMPLE_01"); 107 108 runCommandLine(args); 109 110 final List<VariantContext> vcs = VariantContextTestUtils.readEntireVCFIntoMemory(output.getPath()).getRight(); 111 112 Assert.assertTrue(Comparators.isInOrder(vcs, Comparator.comparingInt(VariantContext::getStart))); 113 } 114 115 @Test testUntrimmedVariantsWithSetFilteredGtToNocall()116 public void testUntrimmedVariantsWithSetFilteredGtToNocall() throws IOException { 117 final File testFile = new File(getToolTestDataDir() + "untrimmed.vcf"); 118 final File output = File.createTempFile("test_untrimmed", ".vcf"); 119 final ArgumentsBuilder args = new ArgumentsBuilder() 120 .addVCF(testFile) 121 .addOutput(output) 122 .add(StandardArgumentDefinitions.SAMPLE_NAME_SHORT_NAME, "SAMPLE_01") 123 .addFlag("set-filtered-gt-to-nocall"); 124 125 runCommandLine(args); 126 127 final List<VariantContext> vcs = VariantContextTestUtils.readEntireVCFIntoMemory(output.getPath()).getRight(); 128 129 Assert.assertTrue(Comparators.isInOrder(vcs, Comparator.comparingInt(VariantContext::getStart))); 130 } 131 132 @Test testComplexSelectionWithNonExistingSamples()133 public void testComplexSelectionWithNonExistingSamples() throws IOException { 134 final String testFile = getToolTestDataDir() + "vcfexample2.vcf"; 135 final String samplesFile = getToolTestDataDir() + "samples.args"; 136 137 final IntegrationTestSpec spec = new IntegrationTestSpec( 138 baseTestString(" --allow-nonoverlapping-command-line-samples -select 'RMSMAPQ < 170.0' -sn Z -sn " // non existent samples on command line 139 + samplesFile, testFile), 140 Collections.singletonList(getToolTestDataDir() + "expected/" + "testSelectVariants_ComplexSelectionWithNonExistingSamples.vcf") 141 ); 142 spec.executeTest("testComplexSelectionWithNonExistingSamples--" + testFile, this); 143 } 144 145 @Test testNonExistentSampleFile()146 public void testNonExistentSampleFile() throws IOException { 147 final String testFile = getToolTestDataDir() + "vcfexample2.vcf"; 148 final File nonExistentFile = GATKBaseTest.getSafeNonExistentFile("nonexistentSamples.args"); 149 150 final IntegrationTestSpec spec = new IntegrationTestSpec( 151 baseTestString(" -sn A -sn Z -sn Q -sn " + nonExistentFile, testFile), 152 1, 153 CommandLineException.class 154 ); 155 spec.executeTest("testNonExistentSampleFile--" + testFile, this); 156 } 157 158 @Test testNonExistingFieldSelection()159 public void testNonExistingFieldSelection() throws IOException { 160 final String testFile = getToolTestDataDir() + "vcfexample2.vcf"; 161 162 final IntegrationTestSpec spec = new IntegrationTestSpec( 163 baseTestString(" --exclude-non-variants -select 'foo!=0 || RMSMAPQ < 170.0' ", testFile), 164 Collections.singletonList(getToolTestDataDir() + "expected/" + "testSelectVariants_NonExistingSelection.vcf") 165 ); 166 167 spec.executeTest("testNonExistingSelection--" + testFile, this); 168 } 169 170 /** 171 * Test excluding samples from file and sample name 172 */ 173 @Test testSampleExclusionFromFileAndSeparateSample()174 public void testSampleExclusionFromFileAndSeparateSample() throws IOException { 175 final String testFile = getToolTestDataDir() + "vcfexample2.vcf"; 176 final String samplesFile = getToolTestDataDir() + "samples.args"; 177 178 final IntegrationTestSpec spec = new IntegrationTestSpec( 179 baseTestString(" -xl-sn NA11894 -xl-sn " + samplesFile, testFile), 180 Collections.singletonList(getToolTestDataDir() + "expected/" + "testSelectVariants_SampleExclusionFromFileAndSeparateSample.vcf") 181 ); 182 183 spec.executeTest("testSampleExclusionFromFileAndSeparateSample--" + testFile, this); 184 } 185 186 /** 187 * Test excluding samples from file 188 */ 189 @Test testSampleExclusionJustFromFile()190 public void testSampleExclusionJustFromFile() throws IOException { 191 final String testFile = getToolTestDataDir() + "vcfexample2.vcf"; 192 final String samplesFile = getToolTestDataDir() + "samples.args"; 193 194 final IntegrationTestSpec spec = new IntegrationTestSpec( 195 baseTestString(" -xl-sn " + samplesFile, testFile), 196 Collections.singletonList(getToolTestDataDir() + "expected/" + "testSelectVariants_SampleExclusionJustFromFile.vcf") 197 ); 198 199 spec.executeTest("testSampleExclusionJustFromFile--" + testFile, this); 200 } 201 202 /** 203 * Test excluding samples from expression 204 */ 205 @Test testSampleExclusionJustFromExpression()206 public void testSampleExclusionJustFromExpression() throws IOException { 207 final String testFile = getToolTestDataDir() + "vcfexample2.vcf"; 208 209 final IntegrationTestSpec spec = new IntegrationTestSpec( 210 baseTestString(" -xl-se 'NA069*' ", testFile), 211 Collections.singletonList(getToolTestDataDir() + "expected/" + "testSelectVariants_SampleExclusionJustFromExpression.vcf") 212 ); 213 214 spec.executeTest("testSampleExclusionJustFromExpression--" + testFile, this); 215 } 216 217 /** 218 * Test excluding samples from negation expression 219 */ 220 @Test testSampleExclusionJustFromNegationExpression()221 public void testSampleExclusionJustFromNegationExpression() throws IOException { 222 final String testFile = getToolTestDataDir() + "vcfexample2.vcf"; 223 224 final IntegrationTestSpec spec = new IntegrationTestSpec( 225 baseTestString(" -se 'NA[0-9]{4}[^1-9]' ", testFile), 226 Collections.singletonList(getToolTestDataDir() + "expected/" + "testSelectVariants_SampleExclusionJustFromRegexExpression.vcf") 227 ); 228 229 spec.executeTest("testSampleExclusionJustFromRegexExpression--" + testFile, this); 230 } 231 232 /** 233 * Test including samples that are not in the VCF 234 */ 235 236 @Test testSampleInclusionWithNonexistingSamples()237 public void testSampleInclusionWithNonexistingSamples() throws IOException { 238 final String testFile = getToolTestDataDir() + "vcfexample2.vcf"; 239 final String samplesFile = getToolTestDataDir() + "samples.args"; 240 241 final IntegrationTestSpec spec = new IntegrationTestSpec( 242 baseTestString(" -sn A -sn Z -sn Q -sn " + samplesFile, testFile), 243 1, 244 UserException.BadInput.class 245 ); 246 247 spec.executeTest("testSampleInclusionWithNonexistingSamples--" + testFile, this); 248 } 249 250 @Test testDiscordance()251 public void testDiscordance() throws IOException { 252 final String testFile = getToolTestDataDir() + "vcfexample2.vcf"; 253 final String discordanceFile = getToolTestDataDir() + "vcfexample2DiscordanceConcordance.vcf"; 254 255 final IntegrationTestSpec spec = new IntegrationTestSpec( 256 baseTestString(" -sn NA11992 " // not present in discordance track 257 + " -disc " + discordanceFile, testFile), 258 Collections.singletonList(getToolTestDataDir() + "expected/" + "testSelectVariants_Discordance.vcf") 259 ); 260 261 spec.executeTest("testDiscordance--" + testFile, this); 262 } 263 264 @Test testConcordance()265 public void testConcordance() throws IOException { 266 final String testFile = getToolTestDataDir() + "vcfexample2.vcf"; 267 final String concordanceFile = getToolTestDataDir() + "vcfexample2DiscordanceConcordance.vcf"; 268 269 final IntegrationTestSpec spec = new IntegrationTestSpec( 270 baseTestString(" -sn NA11894 -conc " + concordanceFile + " --lenient ", testFile), 271 Collections.singletonList(getToolTestDataDir() + "expected/" + "testSelectVariants_Concordance.vcf") 272 ); 273 274 spec.executeTest("testConcordance--" + testFile, this); 275 } 276 277 /** 278 * Test including variant types. 279 */ 280 @Test testVariantTypeSelection()281 public void testVariantTypeSelection() throws IOException { 282 final String testFile = getToolTestDataDir() + "complexExample1.vcf"; 283 284 final IntegrationTestSpec spec = new IntegrationTestSpec( 285 baseTestString(" --restrict-alleles-to MULTIALLELIC --select-type-to-include MIXED ",testFile), 286 Collections.singletonList(getToolTestDataDir() + "expected/" + "testSelectVariants_VariantTypeSelection.vcf") 287 ); 288 289 spec.executeTest("testVariantTypeSelection--" + testFile, this); 290 } 291 292 /** 293 * Test excluding indels that are larger than the specified size 294 */ 295 @Test testMaxIndelLengthSelection()296 public void testMaxIndelLengthSelection() throws IOException { 297 final String testFile = getToolTestDataDir() + "complexExample1.vcf"; 298 299 final IntegrationTestSpec spec = new IntegrationTestSpec( 300 baseTestString(" --select-type-to-include INDEL --max-indel-size 2 ", testFile), 301 Collections.singletonList(getToolTestDataDir() + "expected/" + "testSelectVariants_MaxIndelLengthSelection.vcf") 302 ); 303 304 spec.executeTest("testMaxIndelLengthSelection--" + testFile, this); 305 } 306 307 /** 308 * Test excluding indels that are smaller than the specified size 309 */ 310 @Test testMinIndelLengthSelection()311 public void testMinIndelLengthSelection() throws IOException { 312 final String testFile = getToolTestDataDir() + "complexExample1.vcf"; 313 314 final IntegrationTestSpec spec = new IntegrationTestSpec( 315 baseTestString(" --select-type-to-include INDEL --min-indel-size 2 ", testFile), 316 Collections.singletonList(getToolTestDataDir() + "expected/" + "testSelectVariants_MinIndelLengthSelection.vcf") 317 ); 318 319 spec.executeTest("testMinIndelLengthSelection--" + testFile, this); 320 } 321 322 @Test testRemoveMLE()323 public void testRemoveMLE() throws IOException { 324 final String testFile = getToolTestDataDir() + "vcfexample.withMLE.vcf"; 325 326 final IntegrationTestSpec spec = new IntegrationTestSpec( 327 baseTestString(" -sn NA12892 ", testFile), 328 Collections.singletonList(getToolTestDataDir() + "expected/" + "testSelectVariants_RemoveMLE.vcf") 329 ); 330 331 spec.executeTest("testRemoveMLE--" + testFile, this); 332 } 333 334 @Test testKeepOriginalAC()335 public void testKeepOriginalAC() throws IOException { 336 final String testFile = getToolTestDataDir() + "vcfexample.loseAlleleInSelection.vcf"; 337 338 final IntegrationTestSpec spec = new IntegrationTestSpec( 339 baseTestString(" --keep-original-ac -sn NA12892 ", testFile), 340 Collections.singletonList(getToolTestDataDir() + "expected/" + "testSelectVariants_KeepOriginalAC.vcf") 341 ); 342 343 spec.executeTest("testKeepOriginalAC--" + testFile, this); 344 } 345 346 @Test testKeepOriginalACAndENV()347 public void testKeepOriginalACAndENV() throws IOException { 348 final String testFile = getToolTestDataDir() + "vcfexample.loseAlleleInSelection.vcf"; 349 350 final IntegrationTestSpec spec = new IntegrationTestSpec( 351 baseTestString(" --keep-original-ac -sn NA12892 --exclude-non-variants --remove-unused-alternates ", testFile), 352 Collections.singletonList(getToolTestDataDir() + "expected/" + "testSelectVariants_KeepOriginalACAndENV.vcf") 353 ); 354 355 spec.executeTest("testKeepOriginalACAndENV--" + testFile, this); 356 } 357 358 @Test testKeepOriginalDP()359 public void testKeepOriginalDP() throws IOException { 360 final String testFile = getToolTestDataDir() + "CEUtrioTest.vcf"; 361 362 final IntegrationTestSpec spec = new IntegrationTestSpec( 363 baseTestString(" --keep-original-dp -sn NA12892 ", testFile), 364 Collections.singletonList(getToolTestDataDir() + "expected/" + "testSelectVariants_KeepOriginalDP.vcf") 365 ); 366 367 spec.executeTest("testKeepOriginalDP--" + testFile, this); 368 } 369 370 @Test testMultipleRecordsAtOnePosition()371 public void testMultipleRecordsAtOnePosition() throws IOException { 372 final String testFile = getToolTestDataDir() + "selectVariants.onePosition.vcf"; 373 374 final IntegrationTestSpec spec = new IntegrationTestSpec( 375 baseTestString(" -select 'KG_FREQ < 0.5' ", testFile), 376 Collections.singletonList(getToolTestDataDir() + "expected/" + "testSelectVariants_MultipleRecordsAtOnePosition.vcf") 377 ); 378 379 spec.executeTest("testMultipleRecordsAtOnePosition--" + testFile, this); 380 } 381 382 @Test testNoGTs()383 public void testNoGTs() throws IOException { 384 final String testFile = getToolTestDataDir() + "vcf4.1.example.vcf"; 385 386 final IntegrationTestSpec spec = new IntegrationTestSpec ( 387 " --variant " + testFile + " -O %s --" + StandardArgumentDefinitions.ADD_OUTPUT_VCF_COMMANDLINE +" false", 388 Collections.singletonList(getToolTestDataDir() + "expected/" + "testSelectVariants_NoGTs.vcf") 389 ); 390 391 spec.executeTest("testNoGTs--" + testFile, this); 392 } 393 394 @Test testRemoveSingleSpanDelAlleleNoSpanDel()395 public void testRemoveSingleSpanDelAlleleNoSpanDel() throws IOException { 396 final String testFile = getToolTestDataDir() + "spanning_deletion.vcf"; 397 final String sampleName = "NA1"; 398 399 final IntegrationTestSpec spec = new IntegrationTestSpec( 400 baseTestString(" -sn " + sampleName + " --remove-unused-alternates --exclude-non-variants", testFile), 401 Collections.singletonList(getToolTestDataDir() + "expected/" + "testSelectVariants_RemoveSingleSpanDelAlleleNoSpanDel.vcf") 402 ); 403 spec.executeTest("test encounter no instance of '*' as only ALT allele and ensure line is removed when only monomorphic allele exists" + testFile, this); 404 } 405 406 @Test testRemoveSingleSpanDelAlleleExNonVar()407 public void testRemoveSingleSpanDelAlleleExNonVar() throws IOException { 408 final String testFile = getToolTestDataDir() + "spanning_deletion.vcf"; 409 final String sampleName = "NA2"; 410 411 final IntegrationTestSpec spec = new IntegrationTestSpec( 412 baseTestString(" -sn " + sampleName + " --remove-unused-alternates", testFile), 413 Collections.singletonList(getToolTestDataDir() + "expected/" + "testSelectVariants_RemoveSingleSpanDelAlleleExNoVar.vcf") 414 ); 415 spec.executeTest("test will not remove variant line where '*' is only ALT allele because --exclude-non-variants not called --" + testFile, this); 416 } 417 418 @Test testRemoveSingleSpanDelAllele()419 public void testRemoveSingleSpanDelAllele() throws IOException { 420 final String testFile = getToolTestDataDir() + "spanning_deletion.vcf"; 421 final String sampleName = "NA2"; 422 423 final IntegrationTestSpec spec = new IntegrationTestSpec( 424 baseTestString(" -sn " + sampleName + " --exclude-non-variants --remove-unused-alternates", testFile), 425 Collections.singletonList(getToolTestDataDir() + "expected/" + "testSelectVariants_RemoveSingleSpanDelAllele.vcf") 426 ); 427 spec.executeTest("test removes variant line where '*' is only ALT allele --" + testFile, this); 428 } 429 430 @Test testSelectFromMultiAllelic()431 public void testSelectFromMultiAllelic() throws IOException { 432 final String testFile = getToolTestDataDir() + "multi-allelic.bi-allelicInGIH.vcf"; 433 final String sampleName = getToolTestDataDir() + "GIH.samples.args"; 434 435 final IntegrationTestSpec spec = new IntegrationTestSpec( 436 baseTestString(" -sn " + sampleName + " --exclude-non-variants --remove-unused-alternates", testFile), 437 Collections.singletonList(getToolTestDataDir() + "expected/" + "testSelectVariants_MultiAllelicExcludeNonVar.vcf") 438 ); 439 spec.executeTest("test select from multi allelic with exclude-non-variants --" + testFile, this); 440 } 441 442 @Test testMultiAllelicAnnotationOrdering()443 public void testMultiAllelicAnnotationOrdering() throws IOException { 444 final String testFile = getToolTestDataDir() + "multi-allelic-ordering.vcf"; 445 446 final IntegrationTestSpec spec = new IntegrationTestSpec( 447 baseTestString(" -sn SAMPLE-CC -sn SAMPLE-CT -sn SAMPLE-CA --exclude-non-variants", testFile), 448 Collections.singletonList(getToolTestDataDir() + "expected/" + "testSelectVariants_MultiAllelicAnnotationOrdering.vcf") 449 ); 450 spec.executeTest("test multi allelic annotation ordering --" + testFile, this); 451 } 452 453 @Test testFileWithoutInfoLineInHeader()454 public void testFileWithoutInfoLineInHeader() throws IOException { 455 testFileWithoutInfoLineInHeader("testSelectVariants_FileWithoutInfoLineInHeader", IllegalStateException.class); 456 } 457 458 @Test testFileWithoutInfoLineInHeaderWithOverride()459 public void testFileWithoutInfoLineInHeaderWithOverride() throws IOException { 460 testFileWithoutInfoLineInHeader("testSelectVariants_FileWithoutInfoLineInHeaderWithOverride", null); 461 } 462 testFileWithoutInfoLineInHeader(final String name, final Class<? extends Exception> expectedException)463 private void testFileWithoutInfoLineInHeader(final String name, final Class<? extends Exception> expectedException) throws IOException { 464 final String testFile = getToolTestDataDir() + "missingHeaderLine.vcf"; 465 final String outFile = getToolTestDataDir() + "expected/" + name + ".vcf"; 466 467 final String cmd = baseTestString(" -sn NA12892 " + (expectedException == null ? " --lenient" : ""), testFile); 468 469 IntegrationTestSpec spec = 470 expectedException != null 471 ? new IntegrationTestSpec(cmd, 1, expectedException) 472 : new IntegrationTestSpec(cmd, Collections.singletonList(outFile)); 473 474 spec.executeTest(name, this); 475 } 476 477 @Test testInvalidJexl()478 public void testInvalidJexl() throws IOException { 479 final String testFile = getToolTestDataDir() + "ac0.vcf"; 480 481 // NOTE: JexlEngine singleton construction in VariantContextUtils sets silent to false. 482 // However VariantFiltration.initialize() sets setSilent(true) on the shared instance. 483 // Just in case this test runs after a VariantFiltration in the same VM, always set silent back to false. 484 htsjdk.variant.variantcontext.VariantContextUtils.engine.get().setSilent(false); 485 486 final IntegrationTestSpec spec = new IntegrationTestSpec( 487 baseTestString(" -select 'vc.getGenotype(\"FAKE_SAMPLE\").isHomRef()' ", testFile), 488 1, 489 UserException.class); 490 spec.executeTest("InvalidJexl", this); 491 } 492 493 @Test testAlleleTrimming()494 public void testAlleleTrimming() throws IOException { 495 final String testFile = getToolTestDataDir() + "forHardLeftAlignVariantsTest.vcf"; 496 497 final IntegrationTestSpec spec = new IntegrationTestSpec( 498 baseTestString(" -sn NA12878 --exclude-non-variants --remove-unused-alternates ", testFile), 499 Collections.singletonList(getToolTestDataDir() + "expected/" + "testSelectVariants_AlleleTrimming.vcf")); 500 spec.executeTest("testAlleleTrimming", this); 501 } 502 503 @DataProvider(name="unusedAlleleTrimmingProvider") unusedAlleleTrimmingProvider()504 public Object[][] unusedAlleleTrimmingProvider() { 505 final String expectedPath = getToolTestDataDir() + "expected/"; 506 return new Object[][] { 507 { 508 getToolTestDataDir() + "forHardLeftAlignVariantsTest.vcf", 509 "--remove-unused-alternates", 510 expectedPath + "testSelectVariants_UnusedAlleleHardLeftTrim.vcf" 511 }, 512 { 513 getToolTestDataDir() + "forHardLeftAlignVariantsTest.vcf", 514 null, 515 expectedPath + "testSelectVariants_UnusedAlleleHardLeft.vcf" 516 }, 517 { 518 getToolTestDataDir() + "multi-allelic-ordering.vcf", 519 "-sn SAMPLE-CC -sn SAMPLE-CT", 520 expectedPath + "testSelectVariants_UnusedAlleleCCCT.vcf" 521 }, 522 { 523 getToolTestDataDir() + "multi-allelic-ordering.vcf", 524 "-sn SAMPLE-CC -sn SAMPLE-CT --exclude-non-variants", 525 expectedPath + "testSelectVariants_UnusedAlleleCCCTEnv.vcf" 526 }, 527 { 528 getToolTestDataDir() + "multi-allelic-ordering.vcf", 529 "-sn SAMPLE-CC -sn SAMPLE-CT --remove-unused-alternates", 530 expectedPath + "testSelectVariants_UnusedAlleleCCCTTrim.vcf" 531 }, 532 { 533 getToolTestDataDir() + "multi-allelic-ordering.vcf", 534 "-sn SAMPLE-CC -sn SAMPLE-CT --exclude-non-variants --remove-unused-alternates", 535 expectedPath + "testSelectVariants_UnusedAlleleCCCTTrimAltEnv.vcf" 536 } 537 }; 538 } 539 540 @Test(dataProvider="unusedAlleleTrimmingProvider") testUnusedAlleleTrimming(final String vcf, final String extraArgs, final String expectedOutput)541 public void testUnusedAlleleTrimming(final String vcf, final String extraArgs, final String expectedOutput) throws IOException { 542 final IntegrationTestSpec spec = new IntegrationTestSpec( 543 baseTestString(extraArgs == null ? "" : extraArgs, vcf), 544 Collections.singletonList(expectedOutput) 545 ); 546 547 spec.executeTest( 548 String.format("testUnusedAlleleTrimming: (%s,%s)", new File(vcf).getName(), extraArgs == null ? "(none)" : extraArgs), 549 this); 550 } 551 552 /** 553 * Test with an empty VCF file 554 */ 555 @Test testEmptyVcfException()556 public void testEmptyVcfException() throws IOException { 557 final String testFile = getToolTestDataDir() + "reallyEmpty.vcf"; 558 559 final IntegrationTestSpec spec = new IntegrationTestSpec( 560 baseTestString("", testFile), 561 1, 562 UserException.NoSuitableCodecs.class 563 ); 564 565 spec.executeTest("testEmptyVcfException--" + testFile, this); 566 } 567 568 /** 569 * Test with a VCF file that is not a file 570 */ 571 @Test testNotFileVcfException()572 public void testNotFileVcfException() throws IOException { 573 final String testFile = getToolTestDataDir(); 574 575 final IntegrationTestSpec spec = new IntegrationTestSpec( 576 baseTestString("", testFile), 577 1, 578 UserException.CouldNotReadInputFile.class 579 ); 580 581 spec.executeTest("testNotFileVcfException--" + testFile, this); 582 } 583 584 /** 585 * Test with a VCF file that does not exist 586 */ 587 @Test testMissingVcfException()588 public void testMissingVcfException() throws IOException { 589 final String testFile = "test.vcf"; 590 591 final IntegrationTestSpec spec = new IntegrationTestSpec( 592 baseTestString("", testFile), 593 1, 594 UserException.CouldNotReadInputFile.class 595 ); 596 597 spec.executeTest("testMissingVcfException--" + testFile, this); 598 } 599 600 /** 601 * Test inverting the variant selection criteria by the -invertSelect argument 602 */ 603 @Test testInvertSelection()604 public void testInvertSelection() throws IOException { 605 final String testFile = getToolTestDataDir() + "vcfexample2.vcf"; 606 final String samplesFile = getToolTestDataDir() + "samples.args"; 607 608 final IntegrationTestSpec spec = new IntegrationTestSpec( 609 baseTestString(" -sn NA11894 -sn " + samplesFile + 610 " -select 'RMSMAPQ < 170.0' --invert-select ", testFile), 611 Collections.singletonList(getToolTestDataDir() + "expected/" + "testSelectVariants_InvertSelection.vcf") 612 ); 613 614 spec.executeTest("testInvertSelection--" + testFile, this); 615 } 616 617 /** 618 * Test inverting the variant selection criteria by inverting the JEXL expression logic following -select 619 */ 620 @Test testInvertJexlSelection()621 public void testInvertJexlSelection() throws IOException { 622 final String testFile = getToolTestDataDir() + "vcfexample2.vcf"; 623 final String samplesFile = getToolTestDataDir() + "samples.args"; 624 625 final IntegrationTestSpec spec = new IntegrationTestSpec( 626 baseTestString(" -sn NA11894 -sn " + samplesFile + 627 " -select 'RMSMAPQ > 170.0' ", testFile), 628 Collections.singletonList(getToolTestDataDir() + "expected/" + "testSelectVariants_InvertJexlSelection.vcf") 629 ); 630 631 spec.executeTest("testInvertJexlSelection--" + testFile, this); 632 } 633 634 /** 635 * Test selecting variants with rsIDs from a .list file 636 */ 637 @Test testKeepSelectionIDFromFile()638 public void testKeepSelectionIDFromFile() throws IOException { 639 final String testFile = getToolTestDataDir() + "complexExample1.vcf"; 640 final String idFile = getToolTestDataDir() + "complexExample1.vcf.id.args"; 641 642 final IntegrationTestSpec spec = new IntegrationTestSpec( 643 baseTestString(" -ids " + idFile, testFile), 644 Collections.singletonList(getToolTestDataDir() + "expected/" + "testSelectVariants_KeepSelectionID.vcf") 645 ); 646 647 spec.executeTest("testKeepSelectionIDFile--" + testFile, this); 648 } 649 650 /** 651 * Test selecting variants with literal rsIDs 652 */ 653 @Test testKeepSelectionIDLiteral()654 public void testKeepSelectionIDLiteral() throws IOException { 655 final String testFile = getToolTestDataDir() + "complexExample1.vcf"; 656 657 final IntegrationTestSpec spec = new IntegrationTestSpec( 658 baseTestString(" -ids testid1", testFile), 659 Collections.singletonList(getToolTestDataDir() + "expected/" + "testSelectVariants_KeepSelectionID.vcf") 660 ); 661 662 spec.executeTest("testKeepSelectionIDLiteral--" + testFile, this); 663 } 664 665 /** 666 * Test excluding variants with rsIDs from a file 667 */ 668 @Test testExcludeSelectionIDFromFile()669 public void testExcludeSelectionIDFromFile() throws IOException { 670 final String testFile = getToolTestDataDir() + "complexExample1.vcf"; 671 final String idFile = getToolTestDataDir() + "complexExample1.vcf.id.args"; 672 673 final IntegrationTestSpec spec = new IntegrationTestSpec( 674 baseTestString(" -xl-ids " + idFile, testFile), 675 Collections.singletonList(getToolTestDataDir() + "expected/" + "testSelectVariants_ExcludeSelectionID.vcf") 676 ); 677 678 spec.executeTest("testExcludeSelectionIDFile--" + testFile, this); 679 } 680 681 /** 682 * Test excluding variants with literal rsIDs 683 */ 684 @Test testExcludeSelectionIDLiteral()685 public void testExcludeSelectionIDLiteral() throws IOException { 686 final String testFile = getToolTestDataDir() + "complexExample1.vcf"; 687 688 final IntegrationTestSpec spec = new IntegrationTestSpec( 689 baseTestString(" -xl-ids testid1", testFile), 690 Collections.singletonList(getToolTestDataDir() + "expected/" + "testSelectVariants_ExcludeSelectionID.vcf") 691 ); 692 693 spec.executeTest("testExcludeSelectionIDLiteral--" + testFile, this); 694 } 695 696 /** 697 * Test excluding variant types 698 */ 699 @Test testExcludeSelectionType()700 public void testExcludeSelectionType() throws IOException { 701 final String testFile = getToolTestDataDir() + "complexExample1.vcf"; 702 703 final IntegrationTestSpec spec = new IntegrationTestSpec( 704 baseTestString(" --select-type-to-exclude SNP ", testFile), 705 Collections.singletonList(getToolTestDataDir() + "expected/" + "testSelectVariants_ExcludeSelectionType.vcf") 706 ); 707 708 spec.executeTest("testExcludeSelectionType--" + testFile, this); 709 } 710 711 @Test testMendelianViolationSelection()712 public void testMendelianViolationSelection() throws IOException { 713 final String testFile = getToolTestDataDir() + "CEUtrioTest.vcf"; 714 final String pedFile = getToolTestDataDir() + "CEUtrio.ped"; 715 716 final IntegrationTestSpec spec = new IntegrationTestSpec( 717 baseTestString(" -ped " + pedFile + " --mendelian-violation --mendelian-violation-qual-threshold 0 ", testFile), 718 Collections.singletonList(getToolTestDataDir() + "expected/" + "testSelectVariants_MendelianViolationSelection.vcf") 719 ); 720 721 spec.executeTest("testMendelianViolationSelection--" + testFile, this); 722 } 723 724 @Test testInvertMendelianViolationSelection()725 public void testInvertMendelianViolationSelection() throws IOException { 726 final String testFile = getToolTestDataDir() + "CEUtrioTest.vcf"; 727 final String pedFile = getToolTestDataDir() + "CEUtrio.ped"; 728 729 final IntegrationTestSpec spec = new IntegrationTestSpec( 730 baseTestString(" --mendelian-violation --mendelian-violation-qual-threshold 0 --invert-mendelian-violation -ped " + pedFile, testFile), 731 Collections.singletonList(getToolTestDataDir() + "expected/" + "testSelectVariants_InvertMendelianViolationSelection.vcf") 732 ); 733 734 spec.executeTest("testInvertMendelianViolationSelection--" + testFile, this); 735 } 736 737 @Test testMaxFilteredGenotypesSelection()738 public void testMaxFilteredGenotypesSelection() throws IOException { 739 final String testFile = getToolTestDataDir() + "filteredSamples.vcf"; 740 741 final IntegrationTestSpec spec = new IntegrationTestSpec( 742 baseTestString(" --max-filtered-genotypes 1 ", testFile), 743 Collections.singletonList(getToolTestDataDir() + "expected/" + "testSelectVariants_MaxFilteredGenotypesSelection.vcf") 744 ); 745 746 spec.executeTest("testMaxFilteredGenotypesSelection--" + testFile, this); 747 } 748 749 @Test testMinFilteredGenotypesSelection()750 public void testMinFilteredGenotypesSelection() throws IOException { 751 final String testFile = getToolTestDataDir() + "filteredSamples.vcf"; 752 753 final IntegrationTestSpec spec = new IntegrationTestSpec( 754 baseTestString(" --min-filtered-genotypes 2 ", testFile), 755 Collections.singletonList(getToolTestDataDir() + "expected/" + "testSelectVariants_MinFilteredGenotypesSelection.vcf") 756 ); 757 758 spec.executeTest("testMinFilteredGenotypesSelection--" + testFile, this); 759 } 760 761 @Test testMaxFractionFilteredGenotypesSelection()762 public void testMaxFractionFilteredGenotypesSelection() throws IOException { 763 final String testFile = getToolTestDataDir() + "filteredSamples.vcf"; 764 765 final IntegrationTestSpec spec = new IntegrationTestSpec( 766 baseTestString(" --max-fraction-filtered-genotypes 0.4 ", testFile), 767 Collections.singletonList(getToolTestDataDir() + "expected/" + "testSelectVariants_MaxFractionFilteredGenotypesSelection.vcf") 768 ); 769 770 spec.executeTest("testMaxFractionFilteredGenotypesSelection--" + testFile, this); 771 } 772 773 @Test testMinFractionFilteredGenotypesSelection()774 public void testMinFractionFilteredGenotypesSelection() throws IOException { 775 final String testFile = getToolTestDataDir() + "filteredSamples.vcf"; 776 777 final IntegrationTestSpec spec = new IntegrationTestSpec( 778 baseTestString(" --min-fraction-filtered-genotypes 0.6 ", testFile), 779 Collections.singletonList(getToolTestDataDir() + "expected/" + "testSelectVariants_MinFractionFilteredGenotypesSelection.vcf") 780 ); 781 782 spec.executeTest("testMinFractionFilteredGenotypesSelection--" + testFile, this); 783 } 784 785 @Test testSetFilteredGtoNocall()786 public void testSetFilteredGtoNocall() throws IOException { 787 final String testFile = getToolTestDataDir() + "filteredSamples.vcf"; 788 789 final IntegrationTestSpec spec = new IntegrationTestSpec( 790 baseTestString(" --set-filtered-gt-to-nocall ", testFile), 791 Collections.singletonList(getToolTestDataDir() + "expected/" + "testSelectVariants_SetFilteredGtoNocall.vcf") 792 ); 793 794 spec.executeTest("testSetFilteredGtoNocall--" + testFile, this); 795 } 796 797 @Test testMaxNoCall1()798 public void testMaxNoCall1() throws IOException { 799 final String testFile = getToolTestDataDir() + "vcfexample.forNoCallFiltering.vcf"; 800 801 final IntegrationTestSpec spec = new IntegrationTestSpec( 802 baseTestString(" --max-nocall-number 1", testFile), 803 Collections.singletonList(getToolTestDataDir() + "expected/" + "testSelectVariants_maxNOCALLnumber1.vcf") 804 ); 805 806 spec.executeTest("testMaxNoCall1--" + testFile, this); 807 } 808 809 @Test testMaxNoCall0_25()810 public void testMaxNoCall0_25() throws IOException { 811 final String testFile = getToolTestDataDir() + "vcfexample.forNoCallFiltering.vcf"; 812 813 final IntegrationTestSpec spec = new IntegrationTestSpec( 814 baseTestString(" --max-nocall-fraction 0.25", testFile), 815 Collections.singletonList(getToolTestDataDir() + "expected/" + "testSelectVariants_maxNOCALLnumber1.vcf") 816 ); 817 818 spec.executeTest("testMaxNoCall0_25--" + testFile, this); 819 } 820 821 @Test testMaxNoCall2()822 public void testMaxNoCall2() throws IOException { 823 final String testFile = getToolTestDataDir() + "vcfexample.forNoCallFiltering.vcf"; 824 825 final IntegrationTestSpec spec = new IntegrationTestSpec( 826 baseTestString(" --max-nocall-number 2", testFile), 827 Collections.singletonList(getToolTestDataDir() + "expected/" + "testSelectVariants_maxNOCALLnumber2.vcf") 828 ); 829 830 spec.executeTest("testMaxNoCall2--" + testFile, this); 831 } 832 833 @Test testMaxNoCall0_5()834 public void testMaxNoCall0_5() throws IOException { 835 final String testFile = getToolTestDataDir() + "vcfexample.forNoCallFiltering.vcf"; 836 837 final IntegrationTestSpec spec = new IntegrationTestSpec( 838 baseTestString(" --max-nocall-fraction 0.5", testFile), 839 Collections.singletonList(getToolTestDataDir() + "expected/" + "testSelectVariants_maxNOCALLnumber2.vcf") 840 ); 841 842 spec.executeTest("testMaxNoCall0_5--" + testFile, this); 843 } 844 845 @Test testHaploid()846 public void testHaploid() throws IOException { 847 final String testFile = getToolTestDataDir() + "haploid-multisample.vcf"; 848 849 final IntegrationTestSpec spec = new IntegrationTestSpec( 850 baseTestString(" -sn HG00610 -select 'DP > 7' --remove-unused-alternates ", testFile), 851 Collections.singletonList(getToolTestDataDir() + "expected/" + "testSelectVariants_Haploid.vcf") 852 ); 853 854 spec.executeTest("testHaploid--" + testFile, this); 855 } 856 857 @Test testTetraploid()858 public void testTetraploid() throws IOException { 859 final String testFile = getToolTestDataDir() + "tetraploid-multisample.vcf"; 860 861 final IntegrationTestSpec spec = new IntegrationTestSpec( 862 baseTestString(" -sn NA18486 -select 'DP > 57' --remove-unused-alternates ", testFile), 863 Collections.singletonList(getToolTestDataDir() + "expected/" + "testSelectVariants_Tetraploid.vcf") 864 ); 865 866 spec.executeTest("testTetraploid--" + testFile, this); 867 } 868 869 @Test testTetraDiploid()870 public void testTetraDiploid() throws IOException { 871 final String testFile = getToolTestDataDir() + "tetra-diploid.vcf"; 872 873 final IntegrationTestSpec spec = new IntegrationTestSpec( 874 baseTestString(" -sn NA12878 -select 'DP > 48' --remove-unused-alternates ", testFile), 875 Collections.singletonList(getToolTestDataDir() + "expected/" + "testSelectVariants_TetraDiploid.vcf") 876 ); 877 878 spec.executeTest("testTetraDiploid--" + testFile, this); 879 } 880 881 @Test testSACSimpleDiploid()882 public void testSACSimpleDiploid() throws IOException { 883 final String testFile = getToolTestDataDir() + "261_S01_raw_variants_gvcf.vcf"; 884 885 final IntegrationTestSpec spec = new IntegrationTestSpec( 886 baseTestString(" --remove-unused-alternates", testFile), 887 Collections.singletonList(getToolTestDataDir() + "expected/" + "testSelectVariants_SimpleDiploid.vcf") 888 ); 889 890 spec.executeTest("testSACSimpleDiploid" + testFile, this); 891 } 892 893 @Test testSACDiploid()894 public void testSACDiploid() throws IOException { 895 final String testFile = getToolTestDataDir() + "diploid-multisample-sac.g.vcf"; 896 897 final IntegrationTestSpec spec = new IntegrationTestSpec( 898 baseTestString(" -sn NA12891 --remove-unused-alternates", testFile), 899 Collections.singletonList(getToolTestDataDir() + "expected/" + "testSelectVariants_SACDiploid.vcf") 900 ); 901 902 spec.executeTest("testSACDiploid" + testFile, this); 903 } 904 905 @Test testSACNonDiploid()906 public void testSACNonDiploid() throws IOException { 907 final String testFile = getToolTestDataDir() + "tetraploid-multisample-sac.g.vcf"; 908 909 final IntegrationTestSpec spec = new IntegrationTestSpec( 910 baseTestString(" -sn NA12891 --remove-unused-alternates", testFile), 911 Collections.singletonList(getToolTestDataDir() + "expected/" + "testSelectVariants_SACNonDiploid.vcf") 912 ); 913 914 spec.executeTest("testSACNonDiploid" + testFile, this); 915 } 916 917 @Test testSetFilteredGtoNocallUpdateInfo()918 public void testSetFilteredGtoNocallUpdateInfo() throws IOException { 919 final String testFile = getToolTestDataDir() + "selectVariantsInfoField.vcf"; 920 921 final IntegrationTestSpec spec = new IntegrationTestSpec( 922 baseTestString(" --set-filtered-gt-to-nocall --remove-unused-alternates --exclude-non-variants", testFile), 923 Collections.singletonList(getToolTestDataDir() + "expected/" + "testSelectVariants_SetFilteredGtoNocallUpdateInfo.vcf") 924 ); 925 926 spec.executeTest("testSetFilteredGtoNocallUpdateInfo--" + testFile, this); 927 } 928 929 @DataProvider(name = "dropAnnotationsDataProvider") dropAnnotationsDataProvider()930 Object[][] dropAnnotationsDataProvider() { 931 return new Object[][]{ 932 {"-DA FisherStrand -DA OnOffGenotype -DGA RD -sn NA11894", "testSelectVariants_DropAnnotations.vcf", "standard"}, 933 {"-DA FisherStrand -DA OnOffGenotype -DGA RD -sn NA11894 -DA NotAnAnnotation -DGA AlsoNotAnAnnotation", "testSelectVariants_DropAnnotations.vcf", "unused_annotations"}, 934 {"-DA FisherStrand -DA OnOffGenotype -DGA RD -sn NA11894 -select 'FisherStrand > 10.0'", "testSelectVariants_DropAnnotationsSelectFisherStrand.vcf", "select_on_dropped_annotation"}, 935 {"-DA FisherStrand -DA OnOffGenotype -DGA RD -sn NA11894 -select 'RMSMAPQ > 175.0'", "testSelectVariants_DropAnnotationsSelectRMSMAPQ.vcf", "select_on_kept_annotation"}, 936 {"-DA FisherStrand -DA OnOffGenotype -DGA RD -sn NA11894 -select 'vc.getGenotype(\"NA11894\").getExtendedAttribute(\"RD\")>6'", "testSelectVariants_DropAnnotationsSelectRD.vcf", "select_on_dropped_genotype_annotation"}, 937 {"-DA FisherStrand -DA OnOffGenotype -DGA RD -sn NA11894 -select 'vc.getGenotype(\"NA11894\").getGQ()==1'", "testSelectVariants_DropAnnotationsSelectGQ.vcf", "select_on_kept_genotype_annotation"} 938 }; 939 } 940 941 @Test(dataProvider = "dropAnnotationsDataProvider") testDropAnnotations(String args, String expectedFile, String testName)942 public void testDropAnnotations(String args, String expectedFile, String testName) throws IOException { 943 final String testFile = getToolTestDataDir() + "vcfexample2.vcf"; 944 945 final IntegrationTestSpec spec = new IntegrationTestSpec( 946 baseTestString(args, testFile), 947 Collections.singletonList(getToolTestDataDir() + "expected/" + expectedFile) 948 ); 949 spec.executeTest("testDropAnnotations--" + testName, this); 950 } 951 952 @Test(groups = "bucket") testSampleSelectionOnNio()953 public void testSampleSelectionOnNio() throws IOException { 954 final String testFile = getToolTestDataDir() + "vcfexample2.vcf"; 955 956 final String out = BucketUtils.getTempFilePath( 957 getGCPTestStaging() +"testSelectVariants_SimpleSelection", ".vcf"); 958 959 final String[] args = new String[]{ 960 "SelectVariants", 961 "-R", hg19MiniReference 962 , "--variant", testFile 963 , "-sn", "NA11918" 964 , "--suppress-reference-path" // suppress reference file path in output for test differencing 965 , "-O", out 966 , "--" + StandardArgumentDefinitions.ADD_OUTPUT_VCF_COMMANDLINE, "false"}; 967 968 final String expectedFile = getToolTestDataDir() + "expected/" + "testSelectVariants_SimpleSelection.vcf"; 969 970 new Main().instanceMain(args); 971 972 IntegrationTestSpec.assertEqualTextFiles(IOUtils.getPath(out), IOUtils.getPath(expectedFile), null); 973 } 974 975 // the input test file is a somatic VCF with several many-allelic sites and no PLs. This tests that the tool does not attempt 976 // to create a PL-to-alleles cache, which would cause the tool to freeze. See https://github.com/broadinstitute/gatk/issues/6291 977 @Test testManyAllelicWithoutPLsDoesntFreeze()978 public void testManyAllelicWithoutPLsDoesntFreeze() { 979 final File input = new File(getToolTestDataDir(), "many-allelic-somatic.vcf"); 980 final File output = createTempFile("output", ".vcf"); 981 final ArgumentsBuilder args = new ArgumentsBuilder() 982 .addVCF(input) 983 .addReference(b37Reference) 984 .addOutput(output); 985 runCommandLine(args); 986 } 987 } 988