1 package org.broadinstitute.hellbender.tools.walkers.annotator; 2 3 import com.google.common.annotations.VisibleForTesting; 4 import htsjdk.variant.variantcontext.Allele; 5 import htsjdk.variant.variantcontext.VariantContext; 6 import htsjdk.variant.vcf.VCFConstants; 7 import htsjdk.variant.vcf.VCFInfoHeaderLine; 8 import htsjdk.variant.vcf.VCFStandardHeaderLines; 9 import org.broadinstitute.barclay.help.DocumentedFeature; 10 import org.broadinstitute.hellbender.engine.ReferenceContext; 11 import org.broadinstitute.hellbender.utils.Utils; 12 import org.broadinstitute.hellbender.utils.genotyper.AlleleLikelihoods; 13 import org.broadinstitute.hellbender.utils.help.HelpConstants; 14 import org.broadinstitute.hellbender.utils.read.GATKRead; 15 16 import java.util.Collections; 17 import java.util.List; 18 import java.util.Map; 19 import java.util.stream.IntStream; 20 21 22 /** 23 * Count of all reads with MAPQ = 0 across all samples 24 * 25 * <p>This anotation gives you the count of all reads that have MAPQ = 0 across all samples. The count of reads with MAPQ0 can be used for quality control; high counts typically indicate regions where it is difficult to make confident calls.</p> 26 * 27 * <h3>Caveat</h3> 28 * <p>It is not useful to apply this annotation with HaplotypeCaller because HC filters out all reads with MQ0 upfront, so the annotation will always return a value of 0.</p> 29 * 30 * <h3>Related annotations</h3> 31 * <ul> 32 * <li><b><a href="https://www.broadinstitute.org/gatk/guide/tooldocs/org_broadinstitute_gatk_tools_walkers_annotator_MappingQualityZeroBySample.php">MappingQualityZeroBySample</a></b> gives the count of reads with MAPQ=0 for each individual sample.</li> 33 * <li><b><a href="https://www.broadinstitute.org/gatk/guide/tooldocs/org_broadinstitute_gatk_tools_walkers_annotator_LowMQ.php">LowMQ</a></b> gives the proportion of reads with low mapping quality (MAPQ below 10, including 0).</li> 34 * </ul> 35 * 36 */ 37 @DocumentedFeature(groupName=HelpConstants.DOC_CAT_ANNOTATORS, groupSummary=HelpConstants.DOC_CAT_ANNOTATORS_SUMMARY, summary="Count of all reads with MAPQ = 0 across all samples (MQ0)") 38 public final class MappingQualityZero extends InfoFieldAnnotation { 39 40 @Override annotate(final ReferenceContext ref, final VariantContext vc, final AlleleLikelihoods<GATKRead, Allele> likelihoods)41 public Map<String, Object> annotate(final ReferenceContext ref, 42 final VariantContext vc, 43 final AlleleLikelihoods<GATKRead, Allele> likelihoods) { 44 Utils.nonNull(vc); 45 if (!vc.isVariant() || likelihoods == null){ 46 return Collections.emptyMap(); 47 } 48 //NOTE: unlike other annotations, this one returns 0 if likelihoods are empty 49 final long mq0 = IntStream.range(0, likelihoods.numberOfSamples()).boxed() 50 .flatMap(s -> likelihoods.sampleEvidence(s).stream()) 51 .filter(r -> r.getMappingQuality() == 0) 52 .count(); 53 54 return Collections.singletonMap(getKeyNames().get(0), formattedValue(mq0)); 55 } 56 57 @VisibleForTesting formattedValue(long mq0)58 static String formattedValue(long mq0) { 59 return String.format("%d", mq0); 60 } 61 62 @Override getKeyNames()63 public List<String> getKeyNames() { return Collections.singletonList(VCFConstants.MAPPING_QUALITY_ZERO_KEY); } 64 65 @Override getDescriptions()66 public List<VCFInfoHeaderLine> getDescriptions() { 67 return Collections.singletonList(VCFStandardHeaderLines.getInfoLine(getKeyNames().get(0))); 68 } 69 } 70