1 package org.broadinstitute.hellbender.tools.walkers.annotator;
2 
3 import htsjdk.variant.variantcontext.VariantContext;
4 import org.broadinstitute.barclay.help.DocumentedFeature;
5 import org.broadinstitute.hellbender.utils.Utils;
6 import org.broadinstitute.hellbender.utils.help.HelpConstants;
7 import org.broadinstitute.hellbender.utils.read.GATKRead;
8 import org.broadinstitute.hellbender.utils.read.ReadUtils;
9 import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants;
10 
11 import java.util.Collections;
12 import java.util.List;
13 import java.util.Optional;
14 import java.util.OptionalDouble;
15 
16 
17 /**
18  * Rank Sum Test of REF versus ALT base quality scores
19  *
20  * <p>This variant-level annotation tests compares the base qualities of the data supporting the reference allele with those supporting the alternate allele. The ideal result is a value close to zero, which indicates there is little to no difference. A negative value indicates that the bases supporting the alternate allele have lower quality scores than those supporting the reference allele. Conversely, a positive value indicates that the bases supporting the alternate allele have higher quality scores than those supporting the reference allele. Finding a statistically significant difference either way suggests that the sequencing process may have been biased or affected by an artifact.</p>
21  *
22  * <h3>Statistical notes</h3>
23  * <p>The value output for this annotation is the u-based z-approximation from the Mann-Whitney-Wilcoxon Rank Sum Test for base qualities (bases supporting REF vs. bases supporting ALT). See the <a href="http://www.broadinstitute.org/gatk/guide/article?id=4732">method document on statistical tests</a> for a more detailed explanation of the ranksum test.</p>
24  *
25  * <h3>Caveat</h3>
26  * <p>The base quality rank sum test can not be calculated for sites without a mixture of reads showing both the reference and alternate alleles.</p>
27  *
28  */
29 @DocumentedFeature(groupName= HelpConstants.DOC_CAT_ANNOTATORS, groupSummary=HelpConstants.DOC_CAT_ANNOTATORS_SUMMARY, summary="Rank sum test of REF versus ALT base quality scores (BaseQRankSum)")
30 public final class BaseQualityRankSumTest extends RankSumTest implements StandardAnnotation {
31 
32     @Override
getKeyNames()33     public List<String> getKeyNames() { return Collections.singletonList(GATKVCFConstants.BASE_QUAL_RANK_SUM_KEY); }
34 
35     @Override
getElementForRead(final GATKRead read, final VariantContext vc)36     protected OptionalDouble getElementForRead(final GATKRead read, final VariantContext vc) {
37         return getReadBaseQuality(read, vc);
38     }
39 
getReadBaseQuality(final GATKRead read, final VariantContext vc)40     public static OptionalDouble getReadBaseQuality(final GATKRead read, final VariantContext vc) {
41         Utils.nonNull(read);
42         final Optional<Byte> readBaseQuality = ReadUtils.getReadBaseQualityAtReferenceCoordinate(read, vc.getStart());
43         return readBaseQuality.isPresent() ? OptionalDouble.of(readBaseQuality.get()) : OptionalDouble.empty();
44     }
45 }
46