1 package org.broadinstitute.hellbender.tools.walkers.annotator; 2 3 import htsjdk.variant.variantcontext.Allele; 4 import htsjdk.variant.variantcontext.VariantContext; 5 import htsjdk.variant.vcf.VCFHeaderLineType; 6 import htsjdk.variant.vcf.VCFInfoHeaderLine; 7 import org.apache.commons.lang.StringUtils; 8 import org.broadinstitute.barclay.help.DocumentedFeature; 9 import org.broadinstitute.hellbender.engine.ReferenceContext; 10 import org.broadinstitute.hellbender.utils.Utils; 11 import org.broadinstitute.hellbender.utils.genotyper.AlleleLikelihoods; 12 import org.broadinstitute.hellbender.utils.help.HelpConstants; 13 import org.broadinstitute.hellbender.utils.logging.OneShotLogger; 14 import org.broadinstitute.hellbender.utils.read.GATKRead; 15 16 import java.util.Arrays; 17 import java.util.Collections; 18 import java.util.List; 19 import java.util.Map; 20 21 /** 22 * Local reference context at a variant position. 23 * 24 * </p>The annotation gives ten reference bases each to the left and right of the variant start and the start base for a total of 21 reference bases. 25 * Start position is defined as one base before indels. For example, the reference context AAAAAAAAAACTTTTTTTTTT would apply to a SNV variant 26 * context with ref allele C and alt allele G as well as to a deletion variant context with ref allele CT and alt allele C.</p> 27 * 28 */ 29 @DocumentedFeature(groupName=HelpConstants.DOC_CAT_ANNOTATORS, groupSummary=HelpConstants.DOC_CAT_ANNOTATORS_SUMMARY, summary="Annotate with local reference bases (REF_BASES)") 30 public class ReferenceBases extends InfoFieldAnnotation { 31 public static final String REFERENCE_BASES_KEY = "REF_BASES"; 32 33 private int NUM_BASES_ON_EITHER_SIDE = 10; 34 private int REFERENCE_CONTEXT_LENGTH = 2*NUM_BASES_ON_EITHER_SIDE + 1; 35 36 protected final OneShotLogger warning = new OneShotLogger(this.getClass()); 37 38 @Override getKeyNames()39 public List<String> getKeyNames() { return Collections.singletonList(REFERENCE_BASES_KEY); } 40 41 @Override annotate(final ReferenceContext ref, final VariantContext vc, final AlleleLikelihoods<GATKRead, Allele> likelihoods)42 public Map<String, Object> annotate(final ReferenceContext ref, 43 final VariantContext vc, 44 final AlleleLikelihoods<GATKRead, Allele> likelihoods) { 45 if (ref==null) { 46 warning.warn("REF_BASES requires the reference to annotate, none was provided"); 47 return Collections.emptyMap(); 48 } 49 final int basesToDiscardInFront = Math.max(vc.getStart() - ref.getWindow().getStart() - NUM_BASES_ON_EITHER_SIDE, 0); 50 final String allBases = new String(ref.getBases()); 51 final int endIndex = Math.min(basesToDiscardInFront + 2 * NUM_BASES_ON_EITHER_SIDE + 1, allBases.length()); 52 String localBases = allBases.substring(basesToDiscardInFront, endIndex); 53 if (localBases.length() < REFERENCE_CONTEXT_LENGTH) { 54 localBases = String.join("", localBases, StringUtils.repeat("N", REFERENCE_CONTEXT_LENGTH - localBases.length())); 55 } 56 57 return Collections.singletonMap(REFERENCE_BASES_KEY, localBases ); 58 } 59 60 @Override getDescriptions()61 public List<VCFInfoHeaderLine> getDescriptions() { 62 return Arrays.asList(new VCFInfoHeaderLine(ReferenceBases.REFERENCE_BASES_KEY, 1, VCFHeaderLineType.String, "local reference bases.")); 63 } 64 getNMiddleBases(final String bases, final int n)65 public static String getNMiddleBases(final String bases, final int n){ 66 Utils.validateArg(bases.length() >= n, "bases must have n or more bases. bases = " + bases); 67 Utils.validateArg( bases.length() % 2 == 1, "the length of bases must be an odd number"); 68 Utils.validateArg( n % 2 == 1, "n must be odd"); 69 70 final int numBasesOnEachSide = n/2; 71 final int middleIndex = bases.length()/2; 72 return bases.substring(middleIndex - numBasesOnEachSide, middleIndex + numBasesOnEachSide + 1); 73 74 } 75 } 76