1 package org.broadinstitute.hellbender.tools.funcotator;
2 
3 import htsjdk.variant.variantcontext.VariantContext;
4 import htsjdk.variant.vcf.VCFHeader;
5 import org.apache.commons.lang3.tuple.Pair;
6 import org.broadinstitute.hellbender.CommandLineProgramTest;
7 import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions;
8 import org.broadinstitute.hellbender.testutils.ArgumentsBuilder;
9 import org.broadinstitute.hellbender.testutils.VariantContextTestUtils;
10 import org.broadinstitute.hellbender.tools.funcotator.filtrationRules.AutosomalRecessiveConstants;
11 import org.broadinstitute.hellbender.tools.funcotator.filtrationRules.ClinVarFilter;
12 import org.broadinstitute.hellbender.tools.funcotator.filtrationRules.LmmFilter;
13 import org.broadinstitute.hellbender.tools.funcotator.filtrationRules.LofFilter;
14 import org.testng.Assert;
15 import org.testng.annotations.DataProvider;
16 import org.testng.annotations.Test;
17 
18 import java.io.File;
19 import java.nio.file.Path;
20 import java.util.*;
21 
22 public class FilterFuncotationsIntegrationTest extends CommandLineProgramTest {
23 
24     private static final Path TEST_DATA_DIR = getTestDataDir().toPath().resolve("FilterFuncotations");
25 
26     private static final Set<String> ALL_FILTERS = new HashSet<>(Arrays.asList(
27             ClinVarFilter.CLINSIG_INFO_VALUE, LofFilter.CLINSIG_INFO_VALUE, LmmFilter.CLINSIG_INFO_VALUE, AutosomalRecessiveConstants.AR_INFO_VALUE));
28 
29     @DataProvider(name = "uniformVcfProvider")
uniformVcfProvider()30     public Object[][] uniformVcfProvider() {
31         return new Object[][]{
32                 {"all.vcf", FilterFuncotations.Reference.hg38, FilterFuncotations.AlleleFrequencyDataSource.exac, Collections.emptySet(), ALL_FILTERS},
33                 {"all_gnomad.vcf", FilterFuncotations.Reference.hg38, FilterFuncotations.AlleleFrequencyDataSource.gnomad, Collections.emptySet(), ALL_FILTERS},
34                 {"ar_homvar.vcf", FilterFuncotations.Reference.hg38, FilterFuncotations.AlleleFrequencyDataSource.exac, Collections.emptySet(), Collections.singleton(AutosomalRecessiveConstants.AR_INFO_VALUE)},
35                 {"ar_hetvar.vcf", FilterFuncotations.Reference.hg38, FilterFuncotations.AlleleFrequencyDataSource.exac, Collections.singleton(FilterFuncotationsConstants.NOT_CLINSIG_FILTER), Collections.singleton(FilterFuncotationsConstants.CLINSIG_INFO_NOT_SIGNIFICANT)},
36                 {"ar_compound_het.vcf", FilterFuncotations.Reference.hg38, FilterFuncotations.AlleleFrequencyDataSource.exac, Collections.emptySet(), Collections.singleton(AutosomalRecessiveConstants.AR_INFO_VALUE)},
37                 {"clinvar.vcf", FilterFuncotations.Reference.hg19, FilterFuncotations.AlleleFrequencyDataSource.exac, Collections.emptySet(), Collections.singleton(ClinVarFilter.CLINSIG_INFO_VALUE)},
38                 {"clinvar_gnomad.vcf", FilterFuncotations.Reference.hg19, FilterFuncotations.AlleleFrequencyDataSource.gnomad, Collections.emptySet(), Collections.singleton(ClinVarFilter.CLINSIG_INFO_VALUE)},
39                 {"gnomad_af_failing_cases.vcf", FilterFuncotations.Reference.hg19, FilterFuncotations.AlleleFrequencyDataSource.gnomad, Collections.singleton(FilterFuncotationsConstants.NOT_CLINSIG_FILTER),
40                         Collections.singleton(FilterFuncotationsConstants.CLINSIG_INFO_NOT_SIGNIFICANT)},
41                 {"gnomad_af_passing_cases.vcf", FilterFuncotations.Reference.hg19, FilterFuncotations.AlleleFrequencyDataSource.gnomad, Collections.emptySet(), Collections.singleton(LofFilter.CLINSIG_INFO_VALUE)},
42                 {"lmm.vcf", FilterFuncotations.Reference.hg38, FilterFuncotations.AlleleFrequencyDataSource.exac, Collections.emptySet(), Collections.singleton(LmmFilter.CLINSIG_INFO_VALUE)},
43                 {"lof.vcf", FilterFuncotations.Reference.b37, FilterFuncotations.AlleleFrequencyDataSource.exac, Collections.emptySet(), Collections.singleton(LofFilter.CLINSIG_INFO_VALUE)},
44                 {"lof_gnomad.vcf", FilterFuncotations.Reference.b37, FilterFuncotations.AlleleFrequencyDataSource.gnomad, Collections.emptySet(), Collections.singleton(LofFilter.CLINSIG_INFO_VALUE)},
45                 {"multi-allelic.vcf", FilterFuncotations.Reference.hg38, FilterFuncotations.AlleleFrequencyDataSource.exac, Collections.emptySet(), ALL_FILTERS},
46                 {"multi-allelic_gnomad.vcf", FilterFuncotations.Reference.hg38, FilterFuncotations.AlleleFrequencyDataSource.gnomad, Collections.emptySet(), ALL_FILTERS},
47                 {"multi-transcript.vcf", FilterFuncotations.Reference.hg38, FilterFuncotations.AlleleFrequencyDataSource.exac, Collections.emptySet(), ALL_FILTERS},
48                 {"multi-transcript_gnomad.vcf", FilterFuncotations.Reference.hg38, FilterFuncotations.AlleleFrequencyDataSource.gnomad, Collections.emptySet(), ALL_FILTERS},
49                 {"none.vcf", FilterFuncotations.Reference.hg38, FilterFuncotations.AlleleFrequencyDataSource.exac, Collections.singleton(FilterFuncotationsConstants.NOT_CLINSIG_FILTER),
50                         Collections.singleton(FilterFuncotationsConstants.CLINSIG_INFO_NOT_SIGNIFICANT)},
51                 {"none_gnomad.vcf", FilterFuncotations.Reference.hg38, FilterFuncotations.AlleleFrequencyDataSource.gnomad, Collections.singleton(FilterFuncotationsConstants.NOT_CLINSIG_FILTER),
52                         Collections.singleton(FilterFuncotationsConstants.CLINSIG_INFO_NOT_SIGNIFICANT)}
53         };
54     }
55 
56     @Test(dataProvider = "uniformVcfProvider")
testFilterUniform(final String vcfName, final FilterFuncotations.Reference ref, final FilterFuncotations.AlleleFrequencyDataSource afDataSource, final Set<String> expectedFilters, final Set<String> expectedAnnotations)57     public void testFilterUniform(final String vcfName,
58                                   final FilterFuncotations.Reference ref,
59                                   final FilterFuncotations.AlleleFrequencyDataSource afDataSource,
60                                   final Set<String> expectedFilters,
61                                   final Set<String> expectedAnnotations) {
62 
63         final File tmpOut = createTempFile(vcfName + ".filtered", ".vcf");
64 
65         final ArgumentsBuilder args = new ArgumentsBuilder()
66                 .add(StandardArgumentDefinitions.VARIANT_SHORT_NAME, TEST_DATA_DIR.resolve(vcfName).toFile())
67                 .add("ref-version", ref.name())
68                 .add("allele-frequency-data-source", afDataSource.name())
69                 .addOutput(tmpOut);
70 
71         runCommandLine(args.getArgsArray());
72 
73         final Pair<VCFHeader, List<VariantContext>> vcf = VariantContextTestUtils.readEntireVCFIntoMemory(tmpOut.toString());
74         vcf.getRight().forEach(variant -> {
75             Assert.assertEquals(variant.getFilters(), expectedFilters);
76 
77             final List<String> clinsigAnnotations = variant.getCommonInfo()
78                     .getAttributeAsStringList(FilterFuncotationsConstants.CLINSIG_INFO_KEY, "");
79             Assert.assertEquals(new HashSet<>(clinsigAnnotations), expectedAnnotations);
80         });
81     }
82 }
83