1 package org.broadinstitute.hellbender.tools.walkers.annotator; 2 3 import com.google.common.collect.ImmutableMap; 4 import htsjdk.variant.variantcontext.Allele; 5 import htsjdk.variant.variantcontext.VariantContext; 6 import htsjdk.variant.vcf.VCFInfoHeaderLine; 7 import org.broadinstitute.barclay.help.DocumentedFeature; 8 import org.broadinstitute.hellbender.engine.ReferenceContext; 9 import org.broadinstitute.hellbender.tools.AddOriginalAlignmentTags; 10 import org.broadinstitute.hellbender.tools.walkers.mutect.filtering.Mutect2FilteringEngine; 11 import org.broadinstitute.hellbender.utils.*; 12 import org.broadinstitute.hellbender.utils.genotyper.AlleleLikelihoods; 13 import org.broadinstitute.hellbender.utils.help.HelpConstants; 14 import org.broadinstitute.hellbender.utils.logging.OneShotLogger; 15 import org.broadinstitute.hellbender.utils.read.GATKRead; 16 import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants; 17 import org.broadinstitute.hellbender.utils.variant.GATKVCFHeaderLines; 18 19 import java.util.Collection; 20 import java.util.Collections; 21 import java.util.List; 22 import java.util.Map; 23 24 /** 25 * Original Alignment annotation counts the number of alt reads where the original alignment contig doesn't match the current alignment contig 26 * 27 * <p>If reads were realigned to multiple references (for example the full human reference followed by just the 28 * mitochondrial contig) and the original alignment tag was recorded before realigning, then we can count the number of alt 29 * reads that have been realigned from other contigs to this one. This can be useful for downstream filtering if an alt 30 * allele has all or most of its support originally from a different contig. In the mitochondrial case this can be useful 31 * for filtering known NuMTs that are present in other contigs in the reference. </p> 32 * 33 * <h3>Caveat</h3> 34 * <p>This annotation can only be calculated if an OA tag is present in the bam and can only be run by Mutect2.</p> 35 * 36 */ 37 @DocumentedFeature(groupName= HelpConstants.DOC_CAT_ANNOTATORS, groupSummary=HelpConstants.DOC_CAT_ANNOTATORS_SUMMARY, summary="Number of alt reads with an OA tag that doesn't match the current alignment contig.") 38 public class OriginalAlignment extends InfoFieldAnnotation { 39 protected final OneShotLogger warning = new OneShotLogger(this.getClass()); 40 public static final String KEY = GATKVCFConstants.ORIGINAL_CONTIG_MISMATCH_KEY; 41 42 @Override annotate(ReferenceContext ref, VariantContext vc, AlleleLikelihoods<GATKRead, Allele> likelihoods)43 public Map<String, Object> annotate(ReferenceContext ref, VariantContext vc, AlleleLikelihoods<GATKRead, Allele> likelihoods) { 44 Utils.nonNull(vc); 45 Utils.nonNull(likelihoods); 46 47 final double[] lods = Mutect2FilteringEngine.getTumorLogOdds(vc); 48 if (lods==null) { 49 warning.warn(String.format("One or more variant contexts is missing the 'TLOD' annotation, %s will not be computed for these VariantContexts", GATKVCFConstants.ORIGINAL_CONTIG_MISMATCH_KEY)); 50 return Collections.emptyMap(); 51 } 52 final int indexOfMaxLod = MathUtils.maxElementIndex(lods); 53 final Allele altAlelle = vc.getAlternateAllele(indexOfMaxLod); 54 final Collection<AlleleLikelihoods<GATKRead, Allele>.BestAllele> bestAlleles = likelihoods.bestAllelesBreakingTies(); 55 final String currentContig = ref.getInterval().getContig(); 56 57 final long nonChrMAlt = bestAlleles.stream() 58 .filter(ba -> ba.evidence.hasAttribute(AddOriginalAlignmentTags.OA_TAG_NAME) && ba.isInformative() && ba.allele.equals(altAlelle) && 59 !AddOriginalAlignmentTags.getOAContig(ba.evidence).equals(currentContig)) 60 .count(); 61 return ImmutableMap.of(GATKVCFConstants.ORIGINAL_CONTIG_MISMATCH_KEY, nonChrMAlt); 62 } 63 64 @Override getDescriptions()65 public List<VCFInfoHeaderLine> getDescriptions() { 66 return Collections.singletonList(GATKVCFHeaderLines.getInfoLine(KEY)); 67 } 68 69 @Override getKeyNames()70 public List<String> getKeyNames() { 71 return Collections.singletonList(KEY); 72 } 73 } 74