1 package org.broadinstitute.hellbender.tools.funcotator; 2 3 import htsjdk.variant.variantcontext.VariantContext; 4 import htsjdk.variant.vcf.VCFHeader; 5 import org.apache.commons.lang3.tuple.Pair; 6 import org.broadinstitute.hellbender.CommandLineProgramTest; 7 import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions; 8 import org.broadinstitute.hellbender.testutils.ArgumentsBuilder; 9 import org.broadinstitute.hellbender.testutils.VariantContextTestUtils; 10 import org.broadinstitute.hellbender.tools.funcotator.filtrationRules.AutosomalRecessiveConstants; 11 import org.broadinstitute.hellbender.tools.funcotator.filtrationRules.ClinVarFilter; 12 import org.broadinstitute.hellbender.tools.funcotator.filtrationRules.LmmFilter; 13 import org.broadinstitute.hellbender.tools.funcotator.filtrationRules.LofFilter; 14 import org.testng.Assert; 15 import org.testng.annotations.DataProvider; 16 import org.testng.annotations.Test; 17 18 import java.io.File; 19 import java.nio.file.Path; 20 import java.util.*; 21 22 public class FilterFuncotationsIntegrationTest extends CommandLineProgramTest { 23 24 private static final Path TEST_DATA_DIR = getTestDataDir().toPath().resolve("FilterFuncotations"); 25 26 private static final Set<String> ALL_FILTERS = new HashSet<>(Arrays.asList( 27 ClinVarFilter.CLINSIG_INFO_VALUE, LofFilter.CLINSIG_INFO_VALUE, LmmFilter.CLINSIG_INFO_VALUE, AutosomalRecessiveConstants.AR_INFO_VALUE)); 28 29 @DataProvider(name = "uniformVcfProvider") uniformVcfProvider()30 public Object[][] uniformVcfProvider() { 31 return new Object[][]{ 32 {"all.vcf", FilterFuncotations.Reference.hg38, FilterFuncotations.AlleleFrequencyDataSource.exac, Collections.emptySet(), ALL_FILTERS}, 33 {"all_gnomad.vcf", FilterFuncotations.Reference.hg38, FilterFuncotations.AlleleFrequencyDataSource.gnomad, Collections.emptySet(), ALL_FILTERS}, 34 {"ar_homvar.vcf", FilterFuncotations.Reference.hg38, FilterFuncotations.AlleleFrequencyDataSource.exac, Collections.emptySet(), Collections.singleton(AutosomalRecessiveConstants.AR_INFO_VALUE)}, 35 {"ar_hetvar.vcf", FilterFuncotations.Reference.hg38, FilterFuncotations.AlleleFrequencyDataSource.exac, Collections.singleton(FilterFuncotationsConstants.NOT_CLINSIG_FILTER), Collections.singleton(FilterFuncotationsConstants.CLINSIG_INFO_NOT_SIGNIFICANT)}, 36 {"ar_compound_het.vcf", FilterFuncotations.Reference.hg38, FilterFuncotations.AlleleFrequencyDataSource.exac, Collections.emptySet(), Collections.singleton(AutosomalRecessiveConstants.AR_INFO_VALUE)}, 37 {"clinvar.vcf", FilterFuncotations.Reference.hg19, FilterFuncotations.AlleleFrequencyDataSource.exac, Collections.emptySet(), Collections.singleton(ClinVarFilter.CLINSIG_INFO_VALUE)}, 38 {"clinvar_gnomad.vcf", FilterFuncotations.Reference.hg19, FilterFuncotations.AlleleFrequencyDataSource.gnomad, Collections.emptySet(), Collections.singleton(ClinVarFilter.CLINSIG_INFO_VALUE)}, 39 {"gnomad_af_failing_cases.vcf", FilterFuncotations.Reference.hg19, FilterFuncotations.AlleleFrequencyDataSource.gnomad, Collections.singleton(FilterFuncotationsConstants.NOT_CLINSIG_FILTER), 40 Collections.singleton(FilterFuncotationsConstants.CLINSIG_INFO_NOT_SIGNIFICANT)}, 41 {"gnomad_af_passing_cases.vcf", FilterFuncotations.Reference.hg19, FilterFuncotations.AlleleFrequencyDataSource.gnomad, Collections.emptySet(), Collections.singleton(LofFilter.CLINSIG_INFO_VALUE)}, 42 {"lmm.vcf", FilterFuncotations.Reference.hg38, FilterFuncotations.AlleleFrequencyDataSource.exac, Collections.emptySet(), Collections.singleton(LmmFilter.CLINSIG_INFO_VALUE)}, 43 {"lof.vcf", FilterFuncotations.Reference.b37, FilterFuncotations.AlleleFrequencyDataSource.exac, Collections.emptySet(), Collections.singleton(LofFilter.CLINSIG_INFO_VALUE)}, 44 {"lof_gnomad.vcf", FilterFuncotations.Reference.b37, FilterFuncotations.AlleleFrequencyDataSource.gnomad, Collections.emptySet(), Collections.singleton(LofFilter.CLINSIG_INFO_VALUE)}, 45 {"multi-allelic.vcf", FilterFuncotations.Reference.hg38, FilterFuncotations.AlleleFrequencyDataSource.exac, Collections.emptySet(), ALL_FILTERS}, 46 {"multi-allelic_gnomad.vcf", FilterFuncotations.Reference.hg38, FilterFuncotations.AlleleFrequencyDataSource.gnomad, Collections.emptySet(), ALL_FILTERS}, 47 {"multi-transcript.vcf", FilterFuncotations.Reference.hg38, FilterFuncotations.AlleleFrequencyDataSource.exac, Collections.emptySet(), ALL_FILTERS}, 48 {"multi-transcript_gnomad.vcf", FilterFuncotations.Reference.hg38, FilterFuncotations.AlleleFrequencyDataSource.gnomad, Collections.emptySet(), ALL_FILTERS}, 49 {"none.vcf", FilterFuncotations.Reference.hg38, FilterFuncotations.AlleleFrequencyDataSource.exac, Collections.singleton(FilterFuncotationsConstants.NOT_CLINSIG_FILTER), 50 Collections.singleton(FilterFuncotationsConstants.CLINSIG_INFO_NOT_SIGNIFICANT)}, 51 {"none_gnomad.vcf", FilterFuncotations.Reference.hg38, FilterFuncotations.AlleleFrequencyDataSource.gnomad, Collections.singleton(FilterFuncotationsConstants.NOT_CLINSIG_FILTER), 52 Collections.singleton(FilterFuncotationsConstants.CLINSIG_INFO_NOT_SIGNIFICANT)} 53 }; 54 } 55 56 @Test(dataProvider = "uniformVcfProvider") testFilterUniform(final String vcfName, final FilterFuncotations.Reference ref, final FilterFuncotations.AlleleFrequencyDataSource afDataSource, final Set<String> expectedFilters, final Set<String> expectedAnnotations)57 public void testFilterUniform(final String vcfName, 58 final FilterFuncotations.Reference ref, 59 final FilterFuncotations.AlleleFrequencyDataSource afDataSource, 60 final Set<String> expectedFilters, 61 final Set<String> expectedAnnotations) { 62 63 final File tmpOut = createTempFile(vcfName + ".filtered", ".vcf"); 64 65 final ArgumentsBuilder args = new ArgumentsBuilder() 66 .add(StandardArgumentDefinitions.VARIANT_SHORT_NAME, TEST_DATA_DIR.resolve(vcfName).toFile()) 67 .add("ref-version", ref.name()) 68 .add("allele-frequency-data-source", afDataSource.name()) 69 .addOutput(tmpOut); 70 71 runCommandLine(args.getArgsArray()); 72 73 final Pair<VCFHeader, List<VariantContext>> vcf = VariantContextTestUtils.readEntireVCFIntoMemory(tmpOut.toString()); 74 vcf.getRight().forEach(variant -> { 75 Assert.assertEquals(variant.getFilters(), expectedFilters); 76 77 final List<String> clinsigAnnotations = variant.getCommonInfo() 78 .getAttributeAsStringList(FilterFuncotationsConstants.CLINSIG_INFO_KEY, ""); 79 Assert.assertEquals(new HashSet<>(clinsigAnnotations), expectedAnnotations); 80 }); 81 } 82 } 83