1 package org.broadinstitute.hellbender.tools.walkers.annotator;
2 
3 import htsjdk.variant.variantcontext.*;
4 import htsjdk.variant.vcf.VCFFormatHeaderLine;
5 import org.broadinstitute.barclay.help.DocumentedFeature;
6 import org.broadinstitute.hellbender.engine.ReferenceContext;
7 import org.broadinstitute.hellbender.utils.MathUtils;
8 import org.broadinstitute.hellbender.utils.Utils;
9 import org.broadinstitute.hellbender.utils.genotyper.AlleleLikelihoods;
10 import org.broadinstitute.hellbender.utils.help.HelpConstants;
11 import org.broadinstitute.hellbender.utils.read.GATKRead;
12 import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants;
13 import org.broadinstitute.hellbender.utils.variant.GATKVCFHeaderLines;
14 
15 import java.util.*;
16 
17 /**
18  * Variant allele fraction for each sample.
19  *
20  * <p>This annotation describes the proportion of the sample's reads that support the variant allele(s). It uses only reads that are actually considered informative by HaplotypeCaller (HC) or Mutect2, using pre-read likelihoods that are produced internally by HC/Mutect2.</p>
21  * <p>In this context, an informative read is defined as one that allows the allele it carries to be easily distinguished. In contrast, a read might be considered uninformative if, for example, it only partially overlaps a short tandem repeat and it is not clear whether the read contains the reference allele or an extra repeat.</p>
22  *
23  * <p>See the method documentation on <a href="http://www.broadinstitute.org/gatk/guide/article?id=4721">using coverage information</a> for important interpretation details.</p>
24  *
25  * <h3>Caveats</h3>
26  * <ul>
27  *     <li>If a genotype is already annotated with allele fraction (as by the SomaticGenotypingEngine if `--get-af-from-ad` is not specified), the value will not be recalculated.</li>
28  * </ul>
29  *
30  * <h3>Related annotations</h3>
31  * <ul>
32  *     <li><b><a href="https://www.broadinstitute.org/gatk/guide/tooldocs/org_broadinstitute_gatk_tools_walkers_annotator_DepthPerAlleleBySample.php">DepthPerAlleleBySample</a></b> calculates depth of coverage for each allele per sample (AD).</li>
33  * </ul>
34  *
35  */
36 @DocumentedFeature(groupName= HelpConstants.DOC_CAT_ANNOTATORS, groupSummary=HelpConstants.DOC_CAT_ANNOTATORS_SUMMARY, summary="Variant allele fraction for a genotype")
37 
38 public final class AlleleFraction extends GenotypeAnnotation {
39     @Override
annotate(final ReferenceContext ref, final VariantContext vc, final Genotype g, final GenotypeBuilder gb, final AlleleLikelihoods<GATKRead, Allele> likelihoods)40     public void annotate(final ReferenceContext ref,
41                          final VariantContext vc,
42                          final Genotype g,
43                          final GenotypeBuilder gb,
44                          final AlleleLikelihoods<GATKRead, Allele> likelihoods) {
45         Utils.nonNull(gb, "gb is null");
46         Utils.nonNull(vc, "vc is null");
47 
48         final GenotypesContext genotypes = vc.getGenotypes();
49         if ( g == null || !g.isCalled() || g.hasExtendedAttribute(getKeyNames().get(0))) {  //don't overwrite AF based on Bayesian estimate if it already exists
50             return;
51         }
52        if ( g.hasAD() ) {
53             final int[] AD = g.getAD();
54             final double[] allAlleleFractions = MathUtils.normalizeSumToOne(Arrays.stream(AD).mapToDouble(x -> x).toArray());
55             gb.attribute(getKeyNames().get(0), Arrays.copyOfRange(allAlleleFractions, 1, allAlleleFractions.length)); //omit the first entry of the array corresponding to the reference
56         }
57         // if there is no AD value calculate it now using likelihoods
58         else if (likelihoods != null) {
59             DepthPerAlleleBySample adCalc = new DepthPerAlleleBySample();
60             final int[] AD = adCalc.annotateWithLikelihoods(vc, g, new LinkedHashSet<>(vc.getAlleles()), likelihoods);
61             final double[] allAlleleFractions = MathUtils.normalizeSumToOne(Arrays.stream(AD).mapToDouble(x -> x*1.0).toArray());
62             gb.attribute(getKeyNames().get(0), Arrays.copyOfRange(allAlleleFractions, 1, allAlleleFractions.length)); //omit the first entry of the array corresponding to the reference
63         }
64     }
65 
66     @Override
getKeyNames()67     public List<String> getKeyNames() { return Collections.singletonList(GATKVCFConstants.ALLELE_FRACTION_KEY);}
68 
69     @Override
getDescriptions()70     public List<VCFFormatHeaderLine> getDescriptions() {
71         return Collections.singletonList(GATKVCFHeaderLines.getFormatLine(getKeyNames().get(0)));
72     }
73 }
74