1 package org.broadinstitute.hellbender.tools.walkers.annotator;
2 
3 import com.google.common.annotations.VisibleForTesting;
4 import com.google.common.collect.ImmutableMap;
5 import htsjdk.variant.variantcontext.Allele;
6 import htsjdk.variant.variantcontext.VariantContext;
7 import htsjdk.variant.vcf.VCFInfoHeaderLine;
8 import org.apache.commons.lang3.tuple.Pair;
9 import org.broadinstitute.barclay.help.DocumentedFeature;
10 import org.broadinstitute.hellbender.engine.ReferenceContext;
11 import org.broadinstitute.hellbender.utils.Utils;
12 import org.broadinstitute.hellbender.utils.genotyper.AlleleLikelihoods;
13 import org.broadinstitute.hellbender.utils.help.HelpConstants;
14 import org.broadinstitute.hellbender.utils.read.AlignmentUtils;
15 import org.broadinstitute.hellbender.utils.read.GATKRead;
16 import org.broadinstitute.hellbender.utils.read.ReadUtils;
17 import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants;
18 import org.broadinstitute.hellbender.utils.variant.GATKVCFHeaderLines;
19 
20 import java.util.*;
21 import java.util.stream.IntStream;
22 
23 
24 /**
25  * Apply a read-based annotation that reports the number of Ns seen at a given site. This is intended for use on consensus called data.
26  */
27 @DocumentedFeature(groupName= HelpConstants.DOC_CAT_ANNOTATORS, groupSummary=HelpConstants.DOC_CAT_ANNOTATORS_SUMMARY, summary="Number of Ns at the pileup")
28 public class CountNs extends InfoFieldAnnotation {
29     /**
30      * Calculate annotations for each allele based on given VariantContext and likelihoods for a given genotype's sample
31      * and add the annotations to the GenotypeBuilder.  By default annotations are only calculated for alt alleles but
32      * implementations may override the {@code includeRefAllele()} method.  See parent class docs in {@link GenotypeAnnotation}.
33      */
34 
annotate(final ReferenceContext ref, final VariantContext vc, final AlleleLikelihoods<GATKRead, Allele> likelihoods)35     public Map<String, Object> annotate(final ReferenceContext ref,
36                          final VariantContext vc,
37                          final AlleleLikelihoods<GATKRead, Allele> likelihoods) {
38         Utils.nonNull(vc);
39         if ( likelihoods == null ) {
40             return Collections.emptyMap();
41         }
42         long Count = IntStream.range(0, likelihoods.numberOfSamples()).boxed()
43                 .flatMap(n -> likelihoods.sampleEvidence(n).stream())
44                 .filter(read -> doesReadHaveN(read, vc)).count();
45 
46         return ImmutableMap.of(GATKVCFConstants.N_COUNT_KEY, Count);
47     }
48 
49     @Override
getDescriptions()50     public List<VCFInfoHeaderLine> getDescriptions() {
51         return Collections.singletonList(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.N_COUNT_KEY));
52     }
53 
54     @Override
getKeyNames()55     public List<String> getKeyNames() { return Arrays.asList(GATKVCFConstants.N_COUNT_KEY); }
56 
57     @VisibleForTesting
doesReadHaveN(final GATKRead read, final VariantContext vc)58     static Boolean doesReadHaveN(final GATKRead read, final VariantContext vc) {
59         final Optional<Byte> readBase = ReadUtils.getReadBaseAtReferenceCoordinate(read, vc.getStart());
60         return readBase.isPresent() && readBase.get() == 'N';
61     }
62 }
63