1 package org.broadinstitute.hellbender.tools.walkers.annotator; 2 3 import htsjdk.variant.variantcontext.Allele; 4 import htsjdk.variant.variantcontext.Genotype; 5 import htsjdk.variant.variantcontext.VariantContext; 6 import org.apache.commons.math3.stat.descriptive.DescriptiveStatistics; 7 import org.broadinstitute.barclay.help.DocumentedFeature; 8 import org.broadinstitute.hellbender.engine.ReferenceContext; 9 import org.broadinstitute.hellbender.utils.Utils; 10 import org.broadinstitute.hellbender.utils.genotyper.AlleleLikelihoods; 11 import org.broadinstitute.hellbender.utils.help.HelpConstants; 12 import org.broadinstitute.hellbender.utils.read.GATKRead; 13 import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants; 14 15 import java.util.*; 16 17 /** 18 * Summarize genotype statistics from all samples at the site level 19 * 20 * <p>This annotation collects several genotype-level statistics from all samples and summarizes them in the INFO field. The following statistics are collected:</p> 21 * <ul> 22 * <li>Number of called chromosomes (should amount to ploidy * called samples)</li> 23 * <li>Number of no-called samples</li> 24 * <li>p-value from Hardy-Weinberg Equilibrium test</li> 25 * <li>Mean of all GQ values</li> 26 * <li>Standard deviation of all GQ values</li> 27 * </ul> 28 * <h3>Note</h3> 29 * <p>These summaries can all be recomputed from the genotypes on the fly but it is a lot faster to add them here as INFO field annotations.</p> 30 */ 31 @DocumentedFeature(groupName=HelpConstants.DOC_CAT_ANNOTATORS, groupSummary=HelpConstants.DOC_CAT_ANNOTATORS_SUMMARY, summary="Summary of genotype statistics from all samples (NCC, GQ_MEAN, GQ_STDDEV)") 32 public final class GenotypeSummaries extends InfoFieldAnnotation { 33 34 @Override annotate(final ReferenceContext ref, final VariantContext vc, final AlleleLikelihoods<GATKRead, Allele> likelihoods)35 public Map<String, Object> annotate(final ReferenceContext ref, 36 final VariantContext vc, 37 final AlleleLikelihoods<GATKRead, Allele> likelihoods) { 38 Utils.nonNull(vc); 39 if ( ! vc.hasGenotypes() ) { 40 return Collections.emptyMap(); 41 } 42 43 final Map<String,Object> returnMap = new LinkedHashMap<>(); 44 returnMap.put(GATKVCFConstants.NOCALL_CHROM_KEY, vc.getNoCallCount()); 45 46 final DescriptiveStatistics stats = new DescriptiveStatistics(); 47 for( final Genotype g : vc.getGenotypes() ) { 48 if( g.hasGQ() ) { 49 stats.addValue(g.getGQ()); 50 } 51 } 52 if( stats.getN() > 0L ) { 53 returnMap.put(GATKVCFConstants.GQ_MEAN_KEY, String.format("%.2f", stats.getMean())); 54 if( stats.getN() > 1L ) { 55 returnMap.put(GATKVCFConstants.GQ_STDEV_KEY, String.format("%.2f", stats.getStandardDeviation())); 56 } 57 } 58 59 return returnMap; 60 } 61 62 @Override getKeyNames()63 public List<String> getKeyNames() { 64 return Arrays.asList( 65 GATKVCFConstants.NOCALL_CHROM_KEY, 66 GATKVCFConstants.GQ_MEAN_KEY, 67 GATKVCFConstants.GQ_STDEV_KEY); 68 } 69 } 70