1 package org.broadinstitute.hellbender.tools.walkers.annotator;
2 
3 import com.google.common.primitives.Ints;
4 import htsjdk.samtools.Cigar;
5 import htsjdk.samtools.CigarElement;
6 import htsjdk.samtools.CigarOperator;
7 import htsjdk.variant.variantcontext.VariantContext;
8 import org.broadinstitute.barclay.help.DocumentedFeature;
9 import org.broadinstitute.hellbender.utils.MathUtils;
10 import org.broadinstitute.hellbender.utils.Utils;
11 import org.broadinstitute.hellbender.utils.help.HelpConstants;
12 import org.broadinstitute.hellbender.utils.read.AlignmentUtils;
13 import org.broadinstitute.hellbender.utils.read.GATKRead;
14 import org.broadinstitute.hellbender.utils.read.ReadUtils;
15 import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants;
16 
17 import java.util.List;
18 import java.util.OptionalDouble;
19 import java.util.OptionalInt;
20 
21 /**
22  * Median distance of variant starts from ends of reads supporting each alt allele.
23  *
24  * </p>The output is an array containing, for each alt allele, the median distance of the variant start from the closest read end over all reads that best match that allele.</p>
25  * </p>For example, for variant context with ref allele A and alt allele C the read position for alt-supporting read GGGGCTT is 2 because the A to C
26  * substitution is 2 bases from the right end of the read, which is less than its distance from the left end.
27  * For variant context with ref allele AG and alt allele A (deletion) the read position of alt-supporting read ATTTTT is 0.
28  * For variant context with ref allele A and alt allele AG (insertion) the read position of alt-supporting read TTTTAG is 1.</p>
29  * <p>The annotation considers only the read's bases themselves and not the position they map to with respect to the reference.  For example,
30  * suppose a substitution is preceded by 80 matching bases and followed by 10 matching bases, a 10-base deletion, and 10 more matching bases.  Its distance from the end of the read
31  * is 20 bases, not 30 bases, because the deleted bases belong to the reference, not the read.  Similarly soft-clipped bases are counted in the distance.</p>
32  * <p>This annotation is useful for filtering alignment artifacts.</p>
33  */
34 @DocumentedFeature(groupName=HelpConstants.DOC_CAT_ANNOTATORS, groupSummary=HelpConstants.DOC_CAT_ANNOTATORS_SUMMARY, summary="Median distance of variant starts from ends of reads supporting each allele (MPOS)")
35 public class ReadPosition extends PerAlleleAnnotation implements StandardMutectAnnotation {
36 
37     // we don't want a GGA mode allele with no reads to prejudice us against a site so we assign a non-suspicious value
38     private static final int VALUE_FOR_NO_READS = 50;
39 
40     @Override
aggregate(final List<Integer> values)41     protected int aggregate(final List<Integer> values) {
42         return values.isEmpty() ? VALUE_FOR_NO_READS : MathUtils.median(Ints.toArray(values));
43     }
44 
45     @Override
getVcfKey()46     protected String getVcfKey() { return GATKVCFConstants.MEDIAN_READ_POSITON_KEY; }
47 
48     @Override
getDescription()49     protected String getDescription() { return "median distance from end of read"; }
50 
51     @Override
getValueForRead(final GATKRead read, final VariantContext vc)52     protected OptionalInt getValueForRead(final GATKRead read, final VariantContext vc) {
53         if (vc.getStart() < read.getStart() || read.getEnd() < vc.getStart()) {
54             return OptionalInt.empty();
55         }
56         final OptionalDouble valueAsDouble = ReadPosRankSumTest.getReadPosition(read, vc);
57         return valueAsDouble.isPresent() ? OptionalInt.of((int) valueAsDouble.getAsDouble()) : OptionalInt.empty();
58     }
59 }
60