1 package org.broadinstitute.hellbender.tools.walkers.annotator;
2 
3 import htsjdk.variant.variantcontext.Allele;
4 import htsjdk.variant.variantcontext.VariantContext;
5 import org.broadinstitute.barclay.help.DocumentedFeature;
6 import org.broadinstitute.hellbender.utils.Utils;
7 import org.broadinstitute.hellbender.utils.genotyper.AlleleLikelihoods;
8 import org.broadinstitute.hellbender.utils.help.HelpConstants;
9 import org.broadinstitute.hellbender.utils.read.GATKRead;
10 import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants;
11 
12 import java.util.Collections;
13 import java.util.List;
14 import java.util.OptionalDouble;
15 
16 /**
17  * Rank Sum Test of per-read likelihoods of REF versus ALT reads
18  *
19  * <p>This variant-level annotation compares the likelihoods of reads to their best haplotype match, between reads that support the reference allele and those that support the alternate allele. The ideal result is a value close to zero, which indicates there is little to no difference.  A negative value indicates that the reads supporting the alternate allele have lower likelihoods to their best haplotype match than those supporting the reference allele. Conversely, a positive value indicates that the reads supporting the alternate allele have higher likelihoods to their best haplotype match than those supporting the reference allele. Finding a statistically significant difference either way suggests that the sequencing and/or mapping process may have been biased or affected by an artifact.</p>
20  *
21  * <h3>Statistical notes</h3>
22  * <p>The value output for this annotation is the u-based z-approximation from the Mann-Whitney-Wilcoxon Rank Sum Test for per-read likelihoods to the best haplotype match (likelihoods of reads supporting REF vs. likelihoods of reads supporting ALT). See the <a href="http://www.broadinstitute.org/gatk/guide/article?id=4732">method document on statistical tests</a> for a more detailed explanation of the ranksum test.</p>
23  *
24  * <h3>Caveat</h3>
25  * <p>The read position rank sum test can not be calculated for sites without a mixture of reads showing both the reference and alternate alleles.</p>
26  *
27  */
28 @DocumentedFeature(groupName=HelpConstants.DOC_CAT_ANNOTATORS, groupSummary=HelpConstants.DOC_CAT_ANNOTATORS_SUMMARY, summary="Rank sum test of per-read likelihoods of REF versus ALT reads (LikelihoodRankSum)")
29 public final class LikelihoodRankSumTest extends RankSumTest {
30 
31     @Override
getKeyNames()32     public List<String> getKeyNames() { return Collections.singletonList(GATKVCFConstants.LIKELIHOOD_RANK_SUM_KEY); }
33 
34     @Override
getElementForRead(final GATKRead read, final VariantContext vc, final AlleleLikelihoods<GATKRead, Allele>.BestAllele bestAllele)35     protected OptionalDouble getElementForRead(final GATKRead read, final VariantContext vc, final AlleleLikelihoods<GATKRead, Allele>.BestAllele bestAllele) {
36         Utils.nonNull(read, "read is null");
37         Utils.nonNull(bestAllele, "mostLikelyAllele is null");
38         if ( ! bestAllele.isInformative() ) {
39             throw new IllegalStateException("Should never see a non-informative allele for read " + read + " BestAllele " + bestAllele);
40         }
41         return OptionalDouble.of(bestAllele.likelihood);
42     }
43 
44     @Override
getElementForRead(final GATKRead read, final VariantContext vc)45     protected OptionalDouble getElementForRead(final GATKRead read, final VariantContext vc) {
46         // todo its possible this should throw, as This method should never have been called as getElementForRead(read,refloc,mostLikelyAllele) was overriden
47         return OptionalDouble.empty();
48     }
49 
50 }
51