1 package org.broadinstitute.hellbender.tools.walkers.annotator; 2 3 import htsjdk.variant.variantcontext.*; 4 import htsjdk.variant.vcf.VCFFormatHeaderLine; 5 import org.broadinstitute.barclay.help.DocumentedFeature; 6 import org.broadinstitute.hellbender.engine.ReferenceContext; 7 import org.broadinstitute.hellbender.utils.MathUtils; 8 import org.broadinstitute.hellbender.utils.Utils; 9 import org.broadinstitute.hellbender.utils.genotyper.AlleleLikelihoods; 10 import org.broadinstitute.hellbender.utils.help.HelpConstants; 11 import org.broadinstitute.hellbender.utils.read.GATKRead; 12 import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants; 13 import org.broadinstitute.hellbender.utils.variant.GATKVCFHeaderLines; 14 15 import java.util.*; 16 17 /** 18 * Variant allele fraction for each sample. 19 * 20 * <p>This annotation describes the proportion of the sample's reads that support the variant allele(s). It uses only reads that are actually considered informative by HaplotypeCaller (HC) or Mutect2, using pre-read likelihoods that are produced internally by HC/Mutect2.</p> 21 * <p>In this context, an informative read is defined as one that allows the allele it carries to be easily distinguished. In contrast, a read might be considered uninformative if, for example, it only partially overlaps a short tandem repeat and it is not clear whether the read contains the reference allele or an extra repeat.</p> 22 * 23 * <p>See the method documentation on <a href="http://www.broadinstitute.org/gatk/guide/article?id=4721">using coverage information</a> for important interpretation details.</p> 24 * 25 * <h3>Caveats</h3> 26 * <ul> 27 * <li>If a genotype is already annotated with allele fraction (as by the SomaticGenotypingEngine if `--get-af-from-ad` is not specified), the value will not be recalculated.</li> 28 * </ul> 29 * 30 * <h3>Related annotations</h3> 31 * <ul> 32 * <li><b><a href="https://www.broadinstitute.org/gatk/guide/tooldocs/org_broadinstitute_gatk_tools_walkers_annotator_DepthPerAlleleBySample.php">DepthPerAlleleBySample</a></b> calculates depth of coverage for each allele per sample (AD).</li> 33 * </ul> 34 * 35 */ 36 @DocumentedFeature(groupName= HelpConstants.DOC_CAT_ANNOTATORS, groupSummary=HelpConstants.DOC_CAT_ANNOTATORS_SUMMARY, summary="Variant allele fraction for a genotype") 37 38 public final class AlleleFraction extends GenotypeAnnotation { 39 @Override annotate(final ReferenceContext ref, final VariantContext vc, final Genotype g, final GenotypeBuilder gb, final AlleleLikelihoods<GATKRead, Allele> likelihoods)40 public void annotate(final ReferenceContext ref, 41 final VariantContext vc, 42 final Genotype g, 43 final GenotypeBuilder gb, 44 final AlleleLikelihoods<GATKRead, Allele> likelihoods) { 45 Utils.nonNull(gb, "gb is null"); 46 Utils.nonNull(vc, "vc is null"); 47 48 final GenotypesContext genotypes = vc.getGenotypes(); 49 if ( g == null || !g.isCalled() || g.hasExtendedAttribute(getKeyNames().get(0))) { //don't overwrite AF based on Bayesian estimate if it already exists 50 return; 51 } 52 if ( g.hasAD() ) { 53 final int[] AD = g.getAD(); 54 final double[] allAlleleFractions = MathUtils.normalizeSumToOne(Arrays.stream(AD).mapToDouble(x -> x).toArray()); 55 gb.attribute(getKeyNames().get(0), Arrays.copyOfRange(allAlleleFractions, 1, allAlleleFractions.length)); //omit the first entry of the array corresponding to the reference 56 } 57 // if there is no AD value calculate it now using likelihoods 58 else if (likelihoods != null) { 59 DepthPerAlleleBySample adCalc = new DepthPerAlleleBySample(); 60 final int[] AD = adCalc.annotateWithLikelihoods(vc, g, new LinkedHashSet<>(vc.getAlleles()), likelihoods); 61 final double[] allAlleleFractions = MathUtils.normalizeSumToOne(Arrays.stream(AD).mapToDouble(x -> x*1.0).toArray()); 62 gb.attribute(getKeyNames().get(0), Arrays.copyOfRange(allAlleleFractions, 1, allAlleleFractions.length)); //omit the first entry of the array corresponding to the reference 63 } 64 } 65 66 @Override getKeyNames()67 public List<String> getKeyNames() { return Collections.singletonList(GATKVCFConstants.ALLELE_FRACTION_KEY);} 68 69 @Override getDescriptions()70 public List<VCFFormatHeaderLine> getDescriptions() { 71 return Collections.singletonList(GATKVCFHeaderLines.getFormatLine(getKeyNames().get(0))); 72 } 73 } 74