1 package org.broadinstitute.hellbender.tools.walkers.annotator;
2 
3 import htsjdk.variant.variantcontext.Allele;
4 import htsjdk.variant.variantcontext.Genotype;
5 import htsjdk.variant.variantcontext.VariantContext;
6 import org.apache.commons.math3.stat.descriptive.DescriptiveStatistics;
7 import org.broadinstitute.barclay.help.DocumentedFeature;
8 import org.broadinstitute.hellbender.engine.ReferenceContext;
9 import org.broadinstitute.hellbender.utils.Utils;
10 import org.broadinstitute.hellbender.utils.genotyper.AlleleLikelihoods;
11 import org.broadinstitute.hellbender.utils.help.HelpConstants;
12 import org.broadinstitute.hellbender.utils.read.GATKRead;
13 import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants;
14 
15 import java.util.*;
16 
17 /**
18  * Summarize genotype statistics from all samples at the site level
19  *
20  * <p>This annotation collects several genotype-level statistics from all samples and summarizes them in the INFO field. The following statistics are collected:</p>
21  * <ul>
22  *     <li>Number of called chromosomes (should amount to ploidy * called samples)</li>
23  *     <li>Number of no-called samples</li>
24  *     <li>p-value from Hardy-Weinberg Equilibrium test</li>
25  *     <li>Mean of all GQ values</li>
26  *     <li>Standard deviation of all GQ values</li>
27  * </ul>
28  * <h3>Note</h3>
29  * <p>These summaries can all be recomputed from the genotypes on the fly but it is a lot faster to add them here as INFO field annotations.</p>
30  */
31 @DocumentedFeature(groupName=HelpConstants.DOC_CAT_ANNOTATORS, groupSummary=HelpConstants.DOC_CAT_ANNOTATORS_SUMMARY, summary="Summary of genotype statistics from all samples (NCC, GQ_MEAN, GQ_STDDEV)")
32 public final class GenotypeSummaries extends InfoFieldAnnotation {
33 
34     @Override
annotate(final ReferenceContext ref, final VariantContext vc, final AlleleLikelihoods<GATKRead, Allele> likelihoods)35     public Map<String, Object> annotate(final ReferenceContext ref,
36                                         final VariantContext vc,
37                                         final AlleleLikelihoods<GATKRead, Allele> likelihoods) {
38         Utils.nonNull(vc);
39         if ( ! vc.hasGenotypes() ) {
40             return Collections.emptyMap();
41         }
42 
43         final Map<String,Object> returnMap = new LinkedHashMap<>();
44         returnMap.put(GATKVCFConstants.NOCALL_CHROM_KEY, vc.getNoCallCount());
45 
46         final DescriptiveStatistics stats = new DescriptiveStatistics();
47         for( final Genotype g : vc.getGenotypes() ) {
48             if( g.hasGQ() ) {
49                 stats.addValue(g.getGQ());
50             }
51         }
52         if( stats.getN() > 0L ) {
53             returnMap.put(GATKVCFConstants.GQ_MEAN_KEY, String.format("%.2f", stats.getMean()));
54             if( stats.getN() > 1L ) {
55                 returnMap.put(GATKVCFConstants.GQ_STDEV_KEY, String.format("%.2f", stats.getStandardDeviation()));
56             }
57         }
58 
59         return returnMap;
60     }
61 
62     @Override
getKeyNames()63     public List<String> getKeyNames() {
64         return Arrays.asList(
65                 GATKVCFConstants.NOCALL_CHROM_KEY,
66                 GATKVCFConstants.GQ_MEAN_KEY,
67                 GATKVCFConstants.GQ_STDEV_KEY);
68     }
69 }
70