1 package org.broadinstitute.hellbender.tools.walkers.annotator;
2 
3 import com.google.common.primitives.Doubles;
4 import htsjdk.variant.variantcontext.Allele;
5 import htsjdk.variant.variantcontext.GenotypesContext;
6 import htsjdk.variant.variantcontext.VariantContext;
7 import org.broadinstitute.hellbender.engine.ReferenceContext;
8 import org.broadinstitute.hellbender.utils.MannWhitneyU;
9 import org.broadinstitute.hellbender.utils.QualityUtils;
10 import org.broadinstitute.hellbender.utils.Utils;
11 import org.broadinstitute.hellbender.utils.genotyper.AlleleLikelihoods;
12 import org.broadinstitute.hellbender.utils.read.GATKRead;
13 
14 import java.util.*;
15 
16 
17 /**
18  * Abstract root for all RankSum based annotations
19  */
20 public abstract class RankSumTest extends InfoFieldAnnotation implements Annotation {
21     protected final static double INVALID_ELEMENT_FROM_READ = Double.NEGATIVE_INFINITY;
22 
23     @Override
annotate(final ReferenceContext ref, final VariantContext vc, final AlleleLikelihoods<GATKRead, Allele> likelihoods)24     public Map<String, Object> annotate(final ReferenceContext ref,
25                                         final VariantContext vc,
26                                         final AlleleLikelihoods<GATKRead, Allele> likelihoods) {
27         Utils.nonNull(vc, "vc is null");
28 
29         final GenotypesContext genotypes = vc.getGenotypes();
30         if (genotypes == null || genotypes.isEmpty()) {
31             return Collections.emptyMap();
32         }
33 
34         final List<Double> refQuals = new ArrayList<>();
35         final List<Double> altQuals = new ArrayList<>();
36 
37         if( likelihoods != null) {
38             fillQualsFromLikelihood(vc, likelihoods, refQuals, altQuals);
39         }
40 
41 
42         if ( refQuals.isEmpty() && altQuals.isEmpty() ) {
43             return Collections.emptyMap();
44         }
45 
46         final MannWhitneyU mannWhitneyU = new MannWhitneyU();
47 
48         // we are testing that set1 (the alt bases) have lower quality scores than set2 (the ref bases)
49         final MannWhitneyU.Result result = mannWhitneyU.test(Doubles.toArray(altQuals), Doubles.toArray(refQuals), MannWhitneyU.TestType.FIRST_DOMINATES);
50         final double zScore = result.getZ();
51 
52         if (Double.isNaN(zScore)) {
53             return Collections.emptyMap();
54         } else {
55             return Collections.singletonMap(getKeyNames().get(0), String.format("%.3f", zScore));
56         }
57     }
58 
fillQualsFromLikelihood(VariantContext vc, AlleleLikelihoods<GATKRead, Allele> likelihoods, List<Double> refQuals, List<Double> altQuals)59     protected void fillQualsFromLikelihood(VariantContext vc, AlleleLikelihoods<GATKRead, Allele> likelihoods, List<Double> refQuals, List<Double> altQuals) {
60         for (final AlleleLikelihoods<GATKRead, Allele>.BestAllele bestAllele : likelihoods.bestAllelesBreakingTies()) {
61             final GATKRead read = bestAllele.evidence;
62             final Allele allele = bestAllele.allele;
63             if (bestAllele.isInformative() && isUsableRead(read, vc)) {
64                 final OptionalDouble value = getElementForRead(read, vc, bestAllele);
65                 // Bypass read if the clipping goal is not reached or the refloc is inside a spanning deletion
66                 if (value.isPresent() && value.getAsDouble() != INVALID_ELEMENT_FROM_READ) {
67                     if (allele.isReference()) {
68                         refQuals.add(value.getAsDouble());
69                     } else if (vc.hasAllele(allele)) {
70                         altQuals.add(value.getAsDouble());
71                     }
72                 }
73             }
74         }
75     }
76 
77     /**
78      * Get the element for the given read at the given reference position
79      *
80      * @param read     the read
81      * @param vc       the variant to be annotated
82      * @param bestAllele the most likely allele for this read
83      * @return a Double representing the element to be used in the rank sum test, or null if it should not be used
84      */
getElementForRead(final GATKRead read, final VariantContext vc, final AlleleLikelihoods<GATKRead, Allele>.BestAllele bestAllele)85     protected OptionalDouble getElementForRead(final GATKRead read, final VariantContext vc, final AlleleLikelihoods<GATKRead, Allele>.BestAllele bestAllele) {
86         return getElementForRead(read, vc);
87     }
88 
89     /**
90      * Get the element for the given read at the given reference position
91      *
92      * @param read     the read
93      * @param vc       the variant to be annotated
94      * @return an OptionalDouble representing the element to be used in the rank sum test, empty if it should not be used
95      */
getElementForRead(final GATKRead read, final VariantContext vc)96     protected abstract OptionalDouble getElementForRead(final GATKRead read, final VariantContext vc);
97 
98     /**
99      * Can the read be used in comparative tests between ref / alt bases?
100      *
101      * @param read   the read to consider
102      * @param vc    the variant to be annotated
103      * @return true if this read is meaningful for comparison, false otherwise
104      */
isUsableRead(final GATKRead read, final VariantContext vc)105     protected boolean isUsableRead(final GATKRead read, final VariantContext vc) {
106         Utils.nonNull(read);
107         return read.getMappingQuality() != 0 && read.getMappingQuality() != QualityUtils.MAPPING_QUALITY_UNAVAILABLE;
108     }
109 
110 }
111