1 package org.broadinstitute.hellbender.tools.walkers.annotator;
2 
3 import com.google.common.annotations.VisibleForTesting;
4 import htsjdk.variant.variantcontext.Allele;
5 import htsjdk.variant.variantcontext.VariantContext;
6 import htsjdk.variant.vcf.VCFConstants;
7 import htsjdk.variant.vcf.VCFInfoHeaderLine;
8 import htsjdk.variant.vcf.VCFStandardHeaderLines;
9 import org.broadinstitute.barclay.help.DocumentedFeature;
10 import org.broadinstitute.hellbender.engine.ReferenceContext;
11 import org.broadinstitute.hellbender.utils.Utils;
12 import org.broadinstitute.hellbender.utils.genotyper.AlleleLikelihoods;
13 import org.broadinstitute.hellbender.utils.help.HelpConstants;
14 import org.broadinstitute.hellbender.utils.read.GATKRead;
15 
16 import java.util.Collections;
17 import java.util.List;
18 import java.util.Map;
19 import java.util.stream.IntStream;
20 
21 
22 /**
23  * Count of all reads with MAPQ = 0 across all samples
24  *
25  * <p>This anotation gives you the count of all reads that have MAPQ = 0 across all samples. The count of reads with MAPQ0 can be used for quality control; high counts typically indicate regions where it is difficult to make confident calls.</p>
26  *
27  * <h3>Caveat</h3>
28  * <p>It is not useful to apply this annotation with HaplotypeCaller because HC filters out all reads with MQ0 upfront, so the annotation will always return a value of 0.</p>
29  *
30  * <h3>Related annotations</h3>
31  * <ul>
32  *     <li><b><a href="https://www.broadinstitute.org/gatk/guide/tooldocs/org_broadinstitute_gatk_tools_walkers_annotator_MappingQualityZeroBySample.php">MappingQualityZeroBySample</a></b> gives the count of reads with MAPQ=0 for each individual sample.</li>
33  *     <li><b><a href="https://www.broadinstitute.org/gatk/guide/tooldocs/org_broadinstitute_gatk_tools_walkers_annotator_LowMQ.php">LowMQ</a></b> gives the proportion of reads with low mapping quality (MAPQ below 10, including 0).</li>
34  * </ul>
35  *
36  */
37 @DocumentedFeature(groupName=HelpConstants.DOC_CAT_ANNOTATORS, groupSummary=HelpConstants.DOC_CAT_ANNOTATORS_SUMMARY, summary="Count of all reads with MAPQ = 0 across all samples (MQ0)")
38 public final class MappingQualityZero extends InfoFieldAnnotation {
39 
40     @Override
annotate(final ReferenceContext ref, final VariantContext vc, final AlleleLikelihoods<GATKRead, Allele> likelihoods)41     public Map<String, Object> annotate(final ReferenceContext ref,
42                                         final VariantContext vc,
43                                         final AlleleLikelihoods<GATKRead, Allele> likelihoods) {
44         Utils.nonNull(vc);
45         if (!vc.isVariant() || likelihoods == null){
46             return Collections.emptyMap();
47         }
48         //NOTE: unlike other annotations, this one returns 0 if likelihoods are empty
49         final long mq0 = IntStream.range(0, likelihoods.numberOfSamples()).boxed()
50                 .flatMap(s -> likelihoods.sampleEvidence(s).stream())
51                 .filter(r -> r.getMappingQuality() == 0)
52                 .count();
53 
54         return Collections.singletonMap(getKeyNames().get(0), formattedValue(mq0));
55     }
56 
57     @VisibleForTesting
formattedValue(long mq0)58     static String formattedValue(long mq0) {
59         return String.format("%d", mq0);
60     }
61 
62     @Override
getKeyNames()63     public List<String> getKeyNames() { return Collections.singletonList(VCFConstants.MAPPING_QUALITY_ZERO_KEY); }
64 
65     @Override
getDescriptions()66     public List<VCFInfoHeaderLine> getDescriptions() {
67         return Collections.singletonList(VCFStandardHeaderLines.getInfoLine(getKeyNames().get(0)));
68     }
69 }
70