1 package org.broadinstitute.hellbender.tools.walkers.annotator; 2 3 import com.google.common.primitives.Doubles; 4 import htsjdk.variant.variantcontext.Allele; 5 import htsjdk.variant.variantcontext.GenotypesContext; 6 import htsjdk.variant.variantcontext.VariantContext; 7 import org.broadinstitute.hellbender.engine.ReferenceContext; 8 import org.broadinstitute.hellbender.utils.MannWhitneyU; 9 import org.broadinstitute.hellbender.utils.QualityUtils; 10 import org.broadinstitute.hellbender.utils.Utils; 11 import org.broadinstitute.hellbender.utils.genotyper.AlleleLikelihoods; 12 import org.broadinstitute.hellbender.utils.read.GATKRead; 13 14 import java.util.*; 15 16 17 /** 18 * Abstract root for all RankSum based annotations 19 */ 20 public abstract class RankSumTest extends InfoFieldAnnotation implements Annotation { 21 protected final static double INVALID_ELEMENT_FROM_READ = Double.NEGATIVE_INFINITY; 22 23 @Override annotate(final ReferenceContext ref, final VariantContext vc, final AlleleLikelihoods<GATKRead, Allele> likelihoods)24 public Map<String, Object> annotate(final ReferenceContext ref, 25 final VariantContext vc, 26 final AlleleLikelihoods<GATKRead, Allele> likelihoods) { 27 Utils.nonNull(vc, "vc is null"); 28 29 final GenotypesContext genotypes = vc.getGenotypes(); 30 if (genotypes == null || genotypes.isEmpty()) { 31 return Collections.emptyMap(); 32 } 33 34 final List<Double> refQuals = new ArrayList<>(); 35 final List<Double> altQuals = new ArrayList<>(); 36 37 if( likelihoods != null) { 38 fillQualsFromLikelihood(vc, likelihoods, refQuals, altQuals); 39 } 40 41 42 if ( refQuals.isEmpty() && altQuals.isEmpty() ) { 43 return Collections.emptyMap(); 44 } 45 46 final MannWhitneyU mannWhitneyU = new MannWhitneyU(); 47 48 // we are testing that set1 (the alt bases) have lower quality scores than set2 (the ref bases) 49 final MannWhitneyU.Result result = mannWhitneyU.test(Doubles.toArray(altQuals), Doubles.toArray(refQuals), MannWhitneyU.TestType.FIRST_DOMINATES); 50 final double zScore = result.getZ(); 51 52 if (Double.isNaN(zScore)) { 53 return Collections.emptyMap(); 54 } else { 55 return Collections.singletonMap(getKeyNames().get(0), String.format("%.3f", zScore)); 56 } 57 } 58 fillQualsFromLikelihood(VariantContext vc, AlleleLikelihoods<GATKRead, Allele> likelihoods, List<Double> refQuals, List<Double> altQuals)59 protected void fillQualsFromLikelihood(VariantContext vc, AlleleLikelihoods<GATKRead, Allele> likelihoods, List<Double> refQuals, List<Double> altQuals) { 60 for (final AlleleLikelihoods<GATKRead, Allele>.BestAllele bestAllele : likelihoods.bestAllelesBreakingTies()) { 61 final GATKRead read = bestAllele.evidence; 62 final Allele allele = bestAllele.allele; 63 if (bestAllele.isInformative() && isUsableRead(read, vc)) { 64 final OptionalDouble value = getElementForRead(read, vc, bestAllele); 65 // Bypass read if the clipping goal is not reached or the refloc is inside a spanning deletion 66 if (value.isPresent() && value.getAsDouble() != INVALID_ELEMENT_FROM_READ) { 67 if (allele.isReference()) { 68 refQuals.add(value.getAsDouble()); 69 } else if (vc.hasAllele(allele)) { 70 altQuals.add(value.getAsDouble()); 71 } 72 } 73 } 74 } 75 } 76 77 /** 78 * Get the element for the given read at the given reference position 79 * 80 * @param read the read 81 * @param vc the variant to be annotated 82 * @param bestAllele the most likely allele for this read 83 * @return a Double representing the element to be used in the rank sum test, or null if it should not be used 84 */ getElementForRead(final GATKRead read, final VariantContext vc, final AlleleLikelihoods<GATKRead, Allele>.BestAllele bestAllele)85 protected OptionalDouble getElementForRead(final GATKRead read, final VariantContext vc, final AlleleLikelihoods<GATKRead, Allele>.BestAllele bestAllele) { 86 return getElementForRead(read, vc); 87 } 88 89 /** 90 * Get the element for the given read at the given reference position 91 * 92 * @param read the read 93 * @param vc the variant to be annotated 94 * @return an OptionalDouble representing the element to be used in the rank sum test, empty if it should not be used 95 */ getElementForRead(final GATKRead read, final VariantContext vc)96 protected abstract OptionalDouble getElementForRead(final GATKRead read, final VariantContext vc); 97 98 /** 99 * Can the read be used in comparative tests between ref / alt bases? 100 * 101 * @param read the read to consider 102 * @param vc the variant to be annotated 103 * @return true if this read is meaningful for comparison, false otherwise 104 */ isUsableRead(final GATKRead read, final VariantContext vc)105 protected boolean isUsableRead(final GATKRead read, final VariantContext vc) { 106 Utils.nonNull(read); 107 return read.getMappingQuality() != 0 && read.getMappingQuality() != QualityUtils.MAPPING_QUALITY_UNAVAILABLE; 108 } 109 110 } 111