1 package org.broadinstitute.hellbender.tools.walkers.annotator; 2 3 import com.google.common.annotations.VisibleForTesting; 4 import com.google.common.collect.ImmutableMap; 5 import htsjdk.variant.variantcontext.Allele; 6 import htsjdk.variant.variantcontext.VariantContext; 7 import htsjdk.variant.vcf.VCFInfoHeaderLine; 8 import org.apache.commons.lang3.tuple.Pair; 9 import org.broadinstitute.barclay.help.DocumentedFeature; 10 import org.broadinstitute.hellbender.engine.ReferenceContext; 11 import org.broadinstitute.hellbender.utils.Utils; 12 import org.broadinstitute.hellbender.utils.genotyper.AlleleLikelihoods; 13 import org.broadinstitute.hellbender.utils.help.HelpConstants; 14 import org.broadinstitute.hellbender.utils.read.AlignmentUtils; 15 import org.broadinstitute.hellbender.utils.read.GATKRead; 16 import org.broadinstitute.hellbender.utils.read.ReadUtils; 17 import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants; 18 import org.broadinstitute.hellbender.utils.variant.GATKVCFHeaderLines; 19 20 import java.util.*; 21 import java.util.stream.IntStream; 22 23 24 /** 25 * Apply a read-based annotation that reports the number of Ns seen at a given site. This is intended for use on consensus called data. 26 */ 27 @DocumentedFeature(groupName= HelpConstants.DOC_CAT_ANNOTATORS, groupSummary=HelpConstants.DOC_CAT_ANNOTATORS_SUMMARY, summary="Number of Ns at the pileup") 28 public class CountNs extends InfoFieldAnnotation { 29 /** 30 * Calculate annotations for each allele based on given VariantContext and likelihoods for a given genotype's sample 31 * and add the annotations to the GenotypeBuilder. By default annotations are only calculated for alt alleles but 32 * implementations may override the {@code includeRefAllele()} method. See parent class docs in {@link GenotypeAnnotation}. 33 */ 34 annotate(final ReferenceContext ref, final VariantContext vc, final AlleleLikelihoods<GATKRead, Allele> likelihoods)35 public Map<String, Object> annotate(final ReferenceContext ref, 36 final VariantContext vc, 37 final AlleleLikelihoods<GATKRead, Allele> likelihoods) { 38 Utils.nonNull(vc); 39 if ( likelihoods == null ) { 40 return Collections.emptyMap(); 41 } 42 long Count = IntStream.range(0, likelihoods.numberOfSamples()).boxed() 43 .flatMap(n -> likelihoods.sampleEvidence(n).stream()) 44 .filter(read -> doesReadHaveN(read, vc)).count(); 45 46 return ImmutableMap.of(GATKVCFConstants.N_COUNT_KEY, Count); 47 } 48 49 @Override getDescriptions()50 public List<VCFInfoHeaderLine> getDescriptions() { 51 return Collections.singletonList(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.N_COUNT_KEY)); 52 } 53 54 @Override getKeyNames()55 public List<String> getKeyNames() { return Arrays.asList(GATKVCFConstants.N_COUNT_KEY); } 56 57 @VisibleForTesting doesReadHaveN(final GATKRead read, final VariantContext vc)58 static Boolean doesReadHaveN(final GATKRead read, final VariantContext vc) { 59 final Optional<Byte> readBase = ReadUtils.getReadBaseAtReferenceCoordinate(read, vc.getStart()); 60 return readBase.isPresent() && readBase.get() == 'N'; 61 } 62 } 63