1 package org.broadinstitute.hellbender.tools.walkers.annotator;
2 
3 import com.google.common.annotations.VisibleForTesting;
4 import htsjdk.variant.variantcontext.Allele;
5 import htsjdk.variant.variantcontext.VariantContext;
6 import org.apache.logging.log4j.LogManager;
7 import org.apache.logging.log4j.Logger;
8 import org.broadinstitute.barclay.help.DocumentedFeature;
9 import org.broadinstitute.hellbender.engine.GATKPath;
10 import org.broadinstitute.hellbender.engine.ReferenceContext;
11 import org.broadinstitute.hellbender.utils.Utils;
12 import org.broadinstitute.hellbender.utils.genotyper.AlleleLikelihoods;
13 import org.broadinstitute.hellbender.utils.help.HelpConstants;
14 import org.broadinstitute.hellbender.utils.read.GATKRead;
15 import org.broadinstitute.hellbender.utils.samples.MendelianViolation;
16 import org.broadinstitute.hellbender.utils.samples.Trio;
17 import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants;
18 
19 import java.util.*;
20 
21 /**
22  * Existence of a de novo mutation in at least one of the given families
23  *
24  * <p>This annotation uses the genotype information from individuals in family trios to identify possible de novo mutations and the sample(s) in which they occur. This works best if the genotypes have been processed according to the <a href="https://www.broadinstitute.org/gatk/guide/article?id=4723">Genotype Refinement workflow</a>.</p>
25  *
26  * <h3>Caveats</h3>
27  * <ul>
28  *     <li>The calculation assumes that the organism is diploid.</li>
29  *     <li>This annotation requires a valid pedigree file.</li>
30  *     <li>Only reports possible de novos for children whose genotypes have not been tagged as filtered (which is most appropriate if parent likelihoods
31  * have already been factored in using PhaseByTransmission).</li>
32  *     <li>When multiple trios are present, the annotation is simply the maximum of the likelihood ratios, rather than the strict 1-Prod(1-p_i) calculation, as this can scale poorly for uncertain sites and many trios.</li>
33  *     <li>This annotation can only be used from the Variant Annotator. If you attempt to use it from the UnifiedGenotyper, the run will fail with an error message to that effect. If you attempt to use it from the HaplotypeCaller, the run will complete successfully but the annotation will not be added to any variants.</li>
34  * </ul>
35  */
36 @DocumentedFeature(groupName=HelpConstants.DOC_CAT_ANNOTATORS, groupSummary=HelpConstants.DOC_CAT_ANNOTATORS_SUMMARY, summary="Existence of a de novo mutation in at least one of the given families (hiConfDeNovo, loConfDeNovo)")
37 public final class PossibleDeNovo extends PedigreeAnnotation {
38     protected final Logger warning = LogManager.getLogger(this.getClass());
39     private final MendelianViolation mendelianViolation;
40     private Set<Trio> trios;
41 
42     @VisibleForTesting
PossibleDeNovo(final Set<Trio> trios, final double minGenotypeQualityP)43     public PossibleDeNovo(final Set<Trio> trios, final double minGenotypeQualityP) {
44         super((Set<String>) null);
45         this.trios = Collections.unmodifiableSet(new LinkedHashSet<>(trios));
46         mendelianViolation = new MendelianViolation(minGenotypeQualityP);
47     }
48 
PossibleDeNovo(final GATKPath pedigreeFile)49     public PossibleDeNovo(final GATKPath pedigreeFile){
50         super(pedigreeFile);
51         mendelianViolation = new MendelianViolation(DEFAULT_MIN_GENOTYPE_QUALITY_P);
52     }
53 
PossibleDeNovo()54     public PossibleDeNovo(){
55         super((Set<String>) null);
56         mendelianViolation = new MendelianViolation(DEFAULT_MIN_GENOTYPE_QUALITY_P);
57     }
58 
59     @Override
validateArguments(Collection<String> founderIds, GATKPath pedigreeFile)60     void validateArguments(Collection<String> founderIds, GATKPath pedigreeFile) {
61         if (pedigreeFile == null) {
62             if ((founderIds != null && !founderIds.isEmpty())) {
63                 warning.warn("PossibleDenovo annotation will not be calculated, must provide a valid PED file (-ped). Founder-id arguments cannot be used for this annotation");
64             } else {
65                 warning.warn("PossibleDenovo Annotation will not be calculated, must provide a valid PED file (-ped) from the command line.");
66             }
67         } else {
68             if ((founderIds != null && !founderIds.isEmpty())) {
69                 warning.warn("PossibleDenovo annotation does not take founder-id arguments, trio information will be extracted only from the provided PED file");
70             }
71         }
72     }
73 
74     // Static thresholds for the denovo calculation
75     public final static double DEFAULT_MIN_GENOTYPE_QUALITY_P = 0; // TODO should this be exposed as a command line argument?
76     private static final int hi_GQ_threshold = 20; //WARNING - If you change this value, update the description in GATKVCFHeaderLines
77     private static final int lo_GQ_threshold = 10; //WARNING - If you change this value, update the description in GATKVCFHeaderLines
78     private static final double percentOfSamplesCutoff = 0.001; //for many, many samples use 0.1% of samples as allele frequency threshold for de novos
79     private static final int flatNumberOfSamplesCutoff = 4;
80 
initializeAndGetTrios()81     private Set<Trio> initializeAndGetTrios() {
82         if (trios == null) {
83             trios = getTrios();
84         }
85         return trios;
86     }
87 
88     @Override
getKeyNames()89     public List<String> getKeyNames() {
90         return Arrays.asList(
91             GATKVCFConstants.HI_CONF_DENOVO_KEY,
92             GATKVCFConstants.LO_CONF_DENOVO_KEY);
93     }
94 
95     @Override
annotate(final ReferenceContext ref, final VariantContext vc, final AlleleLikelihoods<GATKRead, Allele> likelihoods)96     public Map<String, Object> annotate(final ReferenceContext ref,
97                                         final VariantContext vc,
98                                         final AlleleLikelihoods<GATKRead, Allele> likelihoods) {
99         Utils.nonNull(vc);
100         Set<Trio> trioSet = initializeAndGetTrios();
101         if (trioSet.isEmpty()){
102             return Collections.emptyMap();
103         }
104         final List<String> highConfDeNovoChildren = new ArrayList<>();
105         final List<String> lowConfDeNovoChildren = new ArrayList<>();
106         for (final Trio trio : trioSet) {
107             if (vc.isBiallelic() &&
108                 PossibleDeNovo.contextHasTrioLikelihoods(vc, trio) &&
109                 mendelianViolation.isViolation(trio.getMother(), trio.getFather(), trio.getChild(), vc) &&
110                 mendelianViolation.getParentsRefRefChildHet() > 0) {
111 
112                 final int childGQ = vc.getGenotype(trio.getChildID()).getGQ();
113                 final int momGQ   = vc.getGenotype(trio.getMaternalID()).getGQ();
114                 final int dadGQ   = vc.getGenotype(trio.getPaternalID()).getGQ();
115 
116                 if (childGQ >= hi_GQ_threshold && momGQ >= hi_GQ_threshold && dadGQ >= hi_GQ_threshold) {
117                     highConfDeNovoChildren.add(trio.getChildID());
118                 } else if (childGQ >= lo_GQ_threshold && momGQ > 0 && dadGQ > 0) {
119                     lowConfDeNovoChildren.add(trio.getChildID());
120                 }
121             }
122         }
123 
124         final double percentNumberOfSamplesCutoff = vc.getNSamples()*percentOfSamplesCutoff;
125         final double AFcutoff = Math.max(flatNumberOfSamplesCutoff, percentNumberOfSamplesCutoff);
126         final int deNovoAlleleCount = vc.getCalledChrCount(vc.getAlternateAllele(0)); //we assume we're biallelic above so use the first alt
127 
128         final Map<String,Object> attributeMap = new LinkedHashMap<>(2);
129         if ( !highConfDeNovoChildren.isEmpty()  && deNovoAlleleCount < AFcutoff ) {
130             attributeMap.put(GATKVCFConstants.HI_CONF_DENOVO_KEY, highConfDeNovoChildren);
131         }
132         if ( !lowConfDeNovoChildren.isEmpty()  && deNovoAlleleCount < AFcutoff ) {
133             attributeMap.put(GATKVCFConstants.LO_CONF_DENOVO_KEY, lowConfDeNovoChildren);
134         }
135         return attributeMap;
136     }
137 
contextHasTrioLikelihoods(final VariantContext vc, final Trio trio)138     private static boolean contextHasTrioLikelihoods(final VariantContext vc, final Trio trio) {
139         final String mom = trio.getMaternalID();
140         final String dad = trio.getPaternalID();
141         final String kid = trio.getChildID();
142 
143         return   (!mom.isEmpty() && vc.hasGenotype(mom) && vc.getGenotype(mom).hasLikelihoods())
144               && (!dad.isEmpty() && vc.hasGenotype(dad) && vc.getGenotype(dad).hasLikelihoods())
145               && (!kid.isEmpty() && vc.hasGenotype(kid) && vc.getGenotype(kid).hasLikelihoods());
146     }
147 
148 }
149