1 package org.broadinstitute.hellbender.tools.walkers.annotator;
2 
3 import htsjdk.variant.variantcontext.Allele;
4 import htsjdk.variant.variantcontext.VariantContext;
5 import htsjdk.variant.vcf.VCFHeaderLineType;
6 import htsjdk.variant.vcf.VCFInfoHeaderLine;
7 import org.apache.commons.lang.StringUtils;
8 import org.broadinstitute.barclay.help.DocumentedFeature;
9 import org.broadinstitute.hellbender.engine.ReferenceContext;
10 import org.broadinstitute.hellbender.utils.Utils;
11 import org.broadinstitute.hellbender.utils.genotyper.AlleleLikelihoods;
12 import org.broadinstitute.hellbender.utils.help.HelpConstants;
13 import org.broadinstitute.hellbender.utils.logging.OneShotLogger;
14 import org.broadinstitute.hellbender.utils.read.GATKRead;
15 
16 import java.util.Arrays;
17 import java.util.Collections;
18 import java.util.List;
19 import java.util.Map;
20 
21 /**
22  * Local reference context at a variant position.
23  *
24  * </p>The annotation gives ten reference bases each to the left and right of the variant start and the start base for a total of 21 reference bases.
25  * Start position is defined as one base before indels.  For example, the reference context AAAAAAAAAACTTTTTTTTTT would apply to a SNV variant
26  * context with ref allele C and alt allele G as well as to a deletion variant context with ref allele CT and alt allele C.</p>
27  *
28  */
29 @DocumentedFeature(groupName=HelpConstants.DOC_CAT_ANNOTATORS, groupSummary=HelpConstants.DOC_CAT_ANNOTATORS_SUMMARY, summary="Annotate with local reference bases (REF_BASES)")
30 public class ReferenceBases extends InfoFieldAnnotation {
31     public static final String REFERENCE_BASES_KEY = "REF_BASES";
32 
33     private int NUM_BASES_ON_EITHER_SIDE = 10;
34     private int REFERENCE_CONTEXT_LENGTH = 2*NUM_BASES_ON_EITHER_SIDE + 1;
35 
36     protected final OneShotLogger warning = new OneShotLogger(this.getClass());
37 
38     @Override
getKeyNames()39     public List<String> getKeyNames() { return Collections.singletonList(REFERENCE_BASES_KEY); }
40 
41     @Override
annotate(final ReferenceContext ref, final VariantContext vc, final AlleleLikelihoods<GATKRead, Allele> likelihoods)42     public Map<String, Object> annotate(final ReferenceContext ref,
43                                         final VariantContext vc,
44                                         final AlleleLikelihoods<GATKRead, Allele> likelihoods) {
45         if (ref==null)  {
46             warning.warn("REF_BASES requires the reference to annotate, none was provided");
47             return Collections.emptyMap();
48         }
49         final int basesToDiscardInFront = Math.max(vc.getStart() - ref.getWindow().getStart() - NUM_BASES_ON_EITHER_SIDE, 0);
50         final String allBases = new String(ref.getBases());
51         final int endIndex = Math.min(basesToDiscardInFront + 2 * NUM_BASES_ON_EITHER_SIDE + 1, allBases.length());
52         String localBases = allBases.substring(basesToDiscardInFront, endIndex);
53         if (localBases.length() < REFERENCE_CONTEXT_LENGTH) {
54             localBases = String.join("", localBases, StringUtils.repeat("N", REFERENCE_CONTEXT_LENGTH - localBases.length()));
55         }
56 
57         return Collections.singletonMap(REFERENCE_BASES_KEY, localBases );
58     }
59 
60     @Override
getDescriptions()61     public List<VCFInfoHeaderLine> getDescriptions() {
62         return Arrays.asList(new VCFInfoHeaderLine(ReferenceBases.REFERENCE_BASES_KEY, 1, VCFHeaderLineType.String, "local reference bases."));
63     }
64 
getNMiddleBases(final String bases, final int n)65     public static String getNMiddleBases(final String bases, final int n){
66         Utils.validateArg(bases.length() >= n, "bases must have n or more bases. bases = " + bases);
67         Utils.validateArg( bases.length() % 2 == 1, "the length of bases must be an odd number");
68         Utils.validateArg( n % 2 == 1, "n must be odd");
69 
70         final int numBasesOnEachSide = n/2;
71         final int middleIndex = bases.length()/2;
72         return bases.substring(middleIndex - numBasesOnEachSide, middleIndex + numBasesOnEachSide + 1);
73 
74     }
75 }
76